1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
33 #include "stringpool.h"
41 #include "diagnostic-core.h"
42 #include "insn-attr.h"
45 #include "fold-const.h"
46 #include "stor-layout.h"
48 #include "print-tree.h"
54 #include "common/common-target.h"
55 #include "langhooks.h"
57 #include "sched-int.h"
59 #include "gimple-fold.h"
60 #include "gimple-iterator.h"
61 #include "gimple-ssa.h"
62 #include "gimple-walk.h"
65 #include "tm-constrs.h"
66 #include "tree-vectorizer.h"
67 #include "target-globals.h"
70 #include "tree-pass.h"
73 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
76 #include "gstab.h" /* for N_SLINE */
78 #include "case-cfn-macros.h"
81 /* This file should be included last. */
82 #include "target-def.h"
84 #ifndef TARGET_NO_PROTOTYPE
85 #define TARGET_NO_PROTOTYPE 0
88 #define min(A,B) ((A) < (B) ? (A) : (B))
89 #define max(A,B) ((A) > (B) ? (A) : (B))
91 /* Structure used to define the rs6000 stack */
92 typedef struct rs6000_stack
{
93 int reload_completed
; /* stack info won't change from here on */
94 int first_gp_reg_save
; /* first callee saved GP register used */
95 int first_fp_reg_save
; /* first callee saved FP register used */
96 int first_altivec_reg_save
; /* first callee saved AltiVec register used */
97 int lr_save_p
; /* true if the link reg needs to be saved */
98 int cr_save_p
; /* true if the CR reg needs to be saved */
99 unsigned int vrsave_mask
; /* mask of vec registers to save */
100 int push_p
; /* true if we need to allocate stack space */
101 int calls_p
; /* true if the function makes any calls */
102 int world_save_p
; /* true if we're saving *everything*:
103 r13-r31, cr, f14-f31, vrsave, v20-v31 */
104 enum rs6000_abi abi
; /* which ABI to use */
105 int gp_save_offset
; /* offset to save GP regs from initial SP */
106 int fp_save_offset
; /* offset to save FP regs from initial SP */
107 int altivec_save_offset
; /* offset to save AltiVec regs from initial SP */
108 int lr_save_offset
; /* offset to save LR from initial SP */
109 int cr_save_offset
; /* offset to save CR from initial SP */
110 int vrsave_save_offset
; /* offset to save VRSAVE from initial SP */
111 int spe_gp_save_offset
; /* offset to save spe 64-bit gprs */
112 int varargs_save_offset
; /* offset to save the varargs registers */
113 int ehrd_offset
; /* offset to EH return data */
114 int ehcr_offset
; /* offset to EH CR field data */
115 int reg_size
; /* register size (4 or 8) */
116 HOST_WIDE_INT vars_size
; /* variable save area size */
117 int parm_size
; /* outgoing parameter size */
118 int save_size
; /* save area size */
119 int fixed_size
; /* fixed size of stack frame */
120 int gp_size
; /* size of saved GP registers */
121 int fp_size
; /* size of saved FP registers */
122 int altivec_size
; /* size of saved AltiVec registers */
123 int cr_size
; /* size to hold CR if not in fixed area */
124 int vrsave_size
; /* size to hold VRSAVE */
125 int altivec_padding_size
; /* size of altivec alignment padding */
126 int spe_gp_size
; /* size of 64-bit GPR save size for SPE */
127 int spe_padding_size
;
128 HOST_WIDE_INT total_size
; /* total bytes allocated for stack */
129 int spe_64bit_regs_used
;
133 /* A C structure for machine-specific, per-function data.
134 This is added to the cfun structure. */
135 typedef struct GTY(()) machine_function
137 /* Whether the instruction chain has been scanned already. */
138 int spe_insn_chain_scanned_p
;
139 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
140 int ra_needs_full_frame
;
141 /* Flags if __builtin_return_address (0) was used. */
143 /* Cache lr_save_p after expansion of builtin_eh_return. */
145 /* Whether we need to save the TOC to the reserved stack location in the
146 function prologue. */
147 bool save_toc_in_prologue
;
148 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
149 varargs save area. */
150 HOST_WIDE_INT varargs_save_offset
;
151 /* Temporary stack slot to use for SDmode copies. This slot is
152 64-bits wide and is allocated early enough so that the offset
153 does not overflow the 16-bit load/store offset field. */
154 rtx sdmode_stack_slot
;
155 /* Alternative internal arg pointer for -fsplit-stack. */
156 rtx split_stack_arg_pointer
;
157 bool split_stack_argp_used
;
158 /* Flag if r2 setup is needed with ELFv2 ABI. */
159 bool r2_setup_needed
;
160 /* The number of components we use for separate shrink-wrapping. */
162 /* The components already handled by separate shrink-wrapping, which should
163 not be considered by the prologue and epilogue. */
164 bool gpr_is_wrapped_separately
[32];
165 bool fpr_is_wrapped_separately
[32];
166 bool lr_is_wrapped_separately
;
169 /* Support targetm.vectorize.builtin_mask_for_load. */
170 static GTY(()) tree altivec_builtin_mask_for_load
;
172 /* Set to nonzero once AIX common-mode calls have been defined. */
173 static GTY(()) int common_mode_defined
;
175 /* Label number of label created for -mrelocatable, to call to so we can
176 get the address of the GOT section */
177 static int rs6000_pic_labelno
;
180 /* Counter for labels which are to be placed in .fixup. */
181 int fixuplabelno
= 0;
184 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
187 /* Specify the machine mode that pointers have. After generation of rtl, the
188 compiler makes no further distinction between pointers and any other objects
189 of this machine mode. */
190 scalar_int_mode rs6000_pmode
;
192 /* Width in bits of a pointer. */
193 unsigned rs6000_pointer_size
;
195 #ifdef HAVE_AS_GNU_ATTRIBUTE
196 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
197 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
199 /* Flag whether floating point values have been passed/returned.
200 Note that this doesn't say whether fprs are used, since the
201 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
202 should be set for soft-float values passed in gprs and ieee128
203 values passed in vsx registers. */
204 static bool rs6000_passes_float
;
205 static bool rs6000_passes_long_double
;
206 /* Flag whether vector values have been passed/returned. */
207 static bool rs6000_passes_vector
;
208 /* Flag whether small (<= 8 byte) structures have been returned. */
209 static bool rs6000_returns_struct
;
212 /* Value is TRUE if register/mode pair is acceptable. */
213 static bool rs6000_hard_regno_mode_ok_p
214 [NUM_MACHINE_MODES
][FIRST_PSEUDO_REGISTER
];
216 /* Maximum number of registers needed for a given register class and mode. */
217 unsigned char rs6000_class_max_nregs
[NUM_MACHINE_MODES
][LIM_REG_CLASSES
];
219 /* How many registers are needed for a given register and mode. */
220 unsigned char rs6000_hard_regno_nregs
[NUM_MACHINE_MODES
][FIRST_PSEUDO_REGISTER
];
222 /* Map register number to register class. */
223 enum reg_class rs6000_regno_regclass
[FIRST_PSEUDO_REGISTER
];
225 static int dbg_cost_ctrl
;
227 /* Built in types. */
228 tree rs6000_builtin_types
[RS6000_BTI_MAX
];
229 tree rs6000_builtin_decls
[RS6000_BUILTIN_COUNT
];
231 /* Flag to say the TOC is initialized */
232 int toc_initialized
, need_toc_init
;
233 char toc_label_name
[10];
235 /* Cached value of rs6000_variable_issue. This is cached in
236 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
237 static short cached_can_issue_more
;
239 static GTY(()) section
*read_only_data_section
;
240 static GTY(()) section
*private_data_section
;
241 static GTY(()) section
*tls_data_section
;
242 static GTY(()) section
*tls_private_data_section
;
243 static GTY(()) section
*read_only_private_data_section
;
244 static GTY(()) section
*sdata2_section
;
245 static GTY(()) section
*toc_section
;
247 struct builtin_description
249 const HOST_WIDE_INT mask
;
250 const enum insn_code icode
;
251 const char *const name
;
252 const enum rs6000_builtins code
;
255 /* Describe the vector unit used for modes. */
256 enum rs6000_vector rs6000_vector_unit
[NUM_MACHINE_MODES
];
257 enum rs6000_vector rs6000_vector_mem
[NUM_MACHINE_MODES
];
259 /* Register classes for various constraints that are based on the target
261 enum reg_class rs6000_constraints
[RS6000_CONSTRAINT_MAX
];
263 /* Describe the alignment of a vector. */
264 int rs6000_vector_align
[NUM_MACHINE_MODES
];
266 /* Map selected modes to types for builtins. */
267 static GTY(()) tree builtin_mode_to_type
[MAX_MACHINE_MODE
][2];
269 /* What modes to automatically generate reciprocal divide estimate (fre) and
270 reciprocal sqrt (frsqrte) for. */
271 unsigned char rs6000_recip_bits
[MAX_MACHINE_MODE
];
273 /* Masks to determine which reciprocal esitmate instructions to generate
275 enum rs6000_recip_mask
{
276 RECIP_SF_DIV
= 0x001, /* Use divide estimate */
277 RECIP_DF_DIV
= 0x002,
278 RECIP_V4SF_DIV
= 0x004,
279 RECIP_V2DF_DIV
= 0x008,
281 RECIP_SF_RSQRT
= 0x010, /* Use reciprocal sqrt estimate. */
282 RECIP_DF_RSQRT
= 0x020,
283 RECIP_V4SF_RSQRT
= 0x040,
284 RECIP_V2DF_RSQRT
= 0x080,
286 /* Various combination of flags for -mrecip=xxx. */
288 RECIP_ALL
= (RECIP_SF_DIV
| RECIP_DF_DIV
| RECIP_V4SF_DIV
289 | RECIP_V2DF_DIV
| RECIP_SF_RSQRT
| RECIP_DF_RSQRT
290 | RECIP_V4SF_RSQRT
| RECIP_V2DF_RSQRT
),
292 RECIP_HIGH_PRECISION
= RECIP_ALL
,
294 /* On low precision machines like the power5, don't enable double precision
295 reciprocal square root estimate, since it isn't accurate enough. */
296 RECIP_LOW_PRECISION
= (RECIP_ALL
& ~(RECIP_DF_RSQRT
| RECIP_V2DF_RSQRT
))
299 /* -mrecip options. */
302 const char *string
; /* option name */
303 unsigned int mask
; /* mask bits to set */
304 } recip_options
[] = {
305 { "all", RECIP_ALL
},
306 { "none", RECIP_NONE
},
307 { "div", (RECIP_SF_DIV
| RECIP_DF_DIV
| RECIP_V4SF_DIV
309 { "divf", (RECIP_SF_DIV
| RECIP_V4SF_DIV
) },
310 { "divd", (RECIP_DF_DIV
| RECIP_V2DF_DIV
) },
311 { "rsqrt", (RECIP_SF_RSQRT
| RECIP_DF_RSQRT
| RECIP_V4SF_RSQRT
312 | RECIP_V2DF_RSQRT
) },
313 { "rsqrtf", (RECIP_SF_RSQRT
| RECIP_V4SF_RSQRT
) },
314 { "rsqrtd", (RECIP_DF_RSQRT
| RECIP_V2DF_RSQRT
) },
317 /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */
323 { "power9", PPC_PLATFORM_POWER9
},
324 { "power8", PPC_PLATFORM_POWER8
},
325 { "power7", PPC_PLATFORM_POWER7
},
326 { "power6x", PPC_PLATFORM_POWER6X
},
327 { "power6", PPC_PLATFORM_POWER6
},
328 { "power5+", PPC_PLATFORM_POWER5_PLUS
},
329 { "power5", PPC_PLATFORM_POWER5
},
330 { "ppc970", PPC_PLATFORM_PPC970
},
331 { "power4", PPC_PLATFORM_POWER4
},
332 { "ppca2", PPC_PLATFORM_PPCA2
},
333 { "ppc476", PPC_PLATFORM_PPC476
},
334 { "ppc464", PPC_PLATFORM_PPC464
},
335 { "ppc440", PPC_PLATFORM_PPC440
},
336 { "ppc405", PPC_PLATFORM_PPC405
},
337 { "ppc-cell-be", PPC_PLATFORM_CELL_BE
}
340 /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */
346 } cpu_supports_info
[] = {
347 /* AT_HWCAP masks. */
348 { "4xxmac", PPC_FEATURE_HAS_4xxMAC
, 0 },
349 { "altivec", PPC_FEATURE_HAS_ALTIVEC
, 0 },
350 { "arch_2_05", PPC_FEATURE_ARCH_2_05
, 0 },
351 { "arch_2_06", PPC_FEATURE_ARCH_2_06
, 0 },
352 { "archpmu", PPC_FEATURE_PERFMON_COMPAT
, 0 },
353 { "booke", PPC_FEATURE_BOOKE
, 0 },
354 { "cellbe", PPC_FEATURE_CELL_BE
, 0 },
355 { "dfp", PPC_FEATURE_HAS_DFP
, 0 },
356 { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE
, 0 },
357 { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE
, 0 },
358 { "fpu", PPC_FEATURE_HAS_FPU
, 0 },
359 { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP
, 0 },
360 { "mmu", PPC_FEATURE_HAS_MMU
, 0 },
361 { "notb", PPC_FEATURE_NO_TB
, 0 },
362 { "pa6t", PPC_FEATURE_PA6T
, 0 },
363 { "power4", PPC_FEATURE_POWER4
, 0 },
364 { "power5", PPC_FEATURE_POWER5
, 0 },
365 { "power5+", PPC_FEATURE_POWER5_PLUS
, 0 },
366 { "power6x", PPC_FEATURE_POWER6_EXT
, 0 },
367 { "ppc32", PPC_FEATURE_32
, 0 },
368 { "ppc601", PPC_FEATURE_601_INSTR
, 0 },
369 { "ppc64", PPC_FEATURE_64
, 0 },
370 { "ppcle", PPC_FEATURE_PPC_LE
, 0 },
371 { "smt", PPC_FEATURE_SMT
, 0 },
372 { "spe", PPC_FEATURE_HAS_SPE
, 0 },
373 { "true_le", PPC_FEATURE_TRUE_LE
, 0 },
374 { "ucache", PPC_FEATURE_UNIFIED_CACHE
, 0 },
375 { "vsx", PPC_FEATURE_HAS_VSX
, 0 },
377 /* AT_HWCAP2 masks. */
378 { "arch_2_07", PPC_FEATURE2_ARCH_2_07
, 1 },
379 { "dscr", PPC_FEATURE2_HAS_DSCR
, 1 },
380 { "ebb", PPC_FEATURE2_HAS_EBB
, 1 },
381 { "htm", PPC_FEATURE2_HAS_HTM
, 1 },
382 { "htm-nosc", PPC_FEATURE2_HTM_NOSC
, 1 },
383 { "isel", PPC_FEATURE2_HAS_ISEL
, 1 },
384 { "tar", PPC_FEATURE2_HAS_TAR
, 1 },
385 { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO
, 1 },
386 { "arch_3_00", PPC_FEATURE2_ARCH_3_00
, 1 },
387 { "ieee128", PPC_FEATURE2_HAS_IEEE128
, 1 }
390 /* Newer LIBCs explicitly export this symbol to declare that they provide
391 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
392 reference to this symbol whenever we expand a CPU builtin, so that
393 we never link against an old LIBC. */
394 const char *tcb_verification_symbol
= "__parse_hwcap_and_convert_at_platform";
396 /* True if we have expanded a CPU builtin. */
399 /* Pointer to function (in powerpcspe-c.c) that can define or undefine target
400 macros that have changed. Languages that don't support the preprocessor
401 don't link in powerpcspe-c.c, so we can't call it directly. */
402 void (*rs6000_target_modify_macros_ptr
) (bool, HOST_WIDE_INT
, HOST_WIDE_INT
);
404 /* Simplfy register classes into simpler classifications. We assume
405 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
406 check for standard register classes (gpr/floating/altivec/vsx) and
407 floating/vector classes (float/altivec/vsx). */
409 enum rs6000_reg_type
{
422 /* Map register class to register type. */
423 static enum rs6000_reg_type reg_class_to_reg_type
[N_REG_CLASSES
];
425 /* First/last register type for the 'normal' register types (i.e. general
426 purpose, floating point, altivec, and VSX registers). */
427 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
429 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
432 /* Register classes we care about in secondary reload or go if legitimate
433 address. We only need to worry about GPR, FPR, and Altivec registers here,
434 along an ANY field that is the OR of the 3 register classes. */
436 enum rs6000_reload_reg_type
{
437 RELOAD_REG_GPR
, /* General purpose registers. */
438 RELOAD_REG_FPR
, /* Traditional floating point regs. */
439 RELOAD_REG_VMX
, /* Altivec (VMX) registers. */
440 RELOAD_REG_ANY
, /* OR of GPR, FPR, Altivec masks. */
444 /* For setting up register classes, loop through the 3 register classes mapping
445 into real registers, and skip the ANY class, which is just an OR of the
447 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
448 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
450 /* Map reload register type to a register in the register class. */
451 struct reload_reg_map_type
{
452 const char *name
; /* Register class name. */
453 int reg
; /* Register in the register class. */
456 static const struct reload_reg_map_type reload_reg_map
[N_RELOAD_REG
] = {
457 { "Gpr", FIRST_GPR_REGNO
}, /* RELOAD_REG_GPR. */
458 { "Fpr", FIRST_FPR_REGNO
}, /* RELOAD_REG_FPR. */
459 { "VMX", FIRST_ALTIVEC_REGNO
}, /* RELOAD_REG_VMX. */
460 { "Any", -1 }, /* RELOAD_REG_ANY. */
463 /* Mask bits for each register class, indexed per mode. Historically the
464 compiler has been more restrictive which types can do PRE_MODIFY instead of
465 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
466 typedef unsigned char addr_mask_type
;
468 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
469 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
470 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
471 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
472 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
473 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
474 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
475 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
477 /* Register type masks based on the type, of valid addressing modes. */
478 struct rs6000_reg_addr
{
479 enum insn_code reload_load
; /* INSN to reload for loading. */
480 enum insn_code reload_store
; /* INSN to reload for storing. */
481 enum insn_code reload_fpr_gpr
; /* INSN to move from FPR to GPR. */
482 enum insn_code reload_gpr_vsx
; /* INSN to move from GPR to VSX. */
483 enum insn_code reload_vsx_gpr
; /* INSN to move from VSX to GPR. */
484 enum insn_code fusion_gpr_ld
; /* INSN for fusing gpr ADDIS/loads. */
485 /* INSNs for fusing addi with loads
486 or stores for each reg. class. */
487 enum insn_code fusion_addi_ld
[(int)N_RELOAD_REG
];
488 enum insn_code fusion_addi_st
[(int)N_RELOAD_REG
];
489 /* INSNs for fusing addis with loads
490 or stores for each reg. class. */
491 enum insn_code fusion_addis_ld
[(int)N_RELOAD_REG
];
492 enum insn_code fusion_addis_st
[(int)N_RELOAD_REG
];
493 addr_mask_type addr_mask
[(int)N_RELOAD_REG
]; /* Valid address masks. */
494 bool scalar_in_vmx_p
; /* Scalar value can go in VMX. */
495 bool fused_toc
; /* Mode supports TOC fusion. */
498 static struct rs6000_reg_addr reg_addr
[NUM_MACHINE_MODES
];
500 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
502 mode_supports_pre_incdec_p (machine_mode mode
)
504 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_PRE_INCDEC
)
508 /* Helper function to say whether a mode supports PRE_MODIFY. */
510 mode_supports_pre_modify_p (machine_mode mode
)
512 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_PRE_MODIFY
)
516 /* Given that there exists at least one variable that is set (produced)
517 by OUT_INSN and read (consumed) by IN_INSN, return true iff
518 IN_INSN represents one or more memory store operations and none of
519 the variables set by OUT_INSN is used by IN_INSN as the address of a
520 store operation. If either IN_INSN or OUT_INSN does not represent
521 a "single" RTL SET expression (as loosely defined by the
522 implementation of the single_set function) or a PARALLEL with only
523 SETs, CLOBBERs, and USEs inside, this function returns false.
525 This rs6000-specific version of store_data_bypass_p checks for
526 certain conditions that result in assertion failures (and internal
527 compiler errors) in the generic store_data_bypass_p function and
528 returns false rather than calling store_data_bypass_p if one of the
529 problematic conditions is detected. */
532 rs6000_store_data_bypass_p (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
539 in_set
= single_set (in_insn
);
542 if (MEM_P (SET_DEST (in_set
)))
544 out_set
= single_set (out_insn
);
547 out_pat
= PATTERN (out_insn
);
548 if (GET_CODE (out_pat
) == PARALLEL
)
550 for (i
= 0; i
< XVECLEN (out_pat
, 0); i
++)
552 out_exp
= XVECEXP (out_pat
, 0, i
);
553 if ((GET_CODE (out_exp
) == CLOBBER
)
554 || (GET_CODE (out_exp
) == USE
))
556 else if (GET_CODE (out_exp
) != SET
)
565 in_pat
= PATTERN (in_insn
);
566 if (GET_CODE (in_pat
) != PARALLEL
)
569 for (i
= 0; i
< XVECLEN (in_pat
, 0); i
++)
571 in_exp
= XVECEXP (in_pat
, 0, i
);
572 if ((GET_CODE (in_exp
) == CLOBBER
) || (GET_CODE (in_exp
) == USE
))
574 else if (GET_CODE (in_exp
) != SET
)
577 if (MEM_P (SET_DEST (in_exp
)))
579 out_set
= single_set (out_insn
);
582 out_pat
= PATTERN (out_insn
);
583 if (GET_CODE (out_pat
) != PARALLEL
)
585 for (j
= 0; j
< XVECLEN (out_pat
, 0); j
++)
587 out_exp
= XVECEXP (out_pat
, 0, j
);
588 if ((GET_CODE (out_exp
) == CLOBBER
)
589 || (GET_CODE (out_exp
) == USE
))
591 else if (GET_CODE (out_exp
) != SET
)
598 return store_data_bypass_p (out_insn
, in_insn
);
601 /* Return true if we have D-form addressing in altivec registers. */
603 mode_supports_vmx_dform (machine_mode mode
)
605 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_OFFSET
) != 0);
608 /* Return true if we have D-form addressing in VSX registers. This addressing
609 is more limited than normal d-form addressing in that the offset must be
610 aligned on a 16-byte boundary. */
612 mode_supports_vsx_dform_quad (machine_mode mode
)
614 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_QUAD_OFFSET
)
619 /* Target cpu costs. */
621 struct processor_costs
{
622 const int mulsi
; /* cost of SImode multiplication. */
623 const int mulsi_const
; /* cost of SImode multiplication by constant. */
624 const int mulsi_const9
; /* cost of SImode mult by short constant. */
625 const int muldi
; /* cost of DImode multiplication. */
626 const int divsi
; /* cost of SImode division. */
627 const int divdi
; /* cost of DImode division. */
628 const int fp
; /* cost of simple SFmode and DFmode insns. */
629 const int dmul
; /* cost of DFmode multiplication (and fmadd). */
630 const int sdiv
; /* cost of SFmode division (fdivs). */
631 const int ddiv
; /* cost of DFmode division (fdiv). */
632 const int cache_line_size
; /* cache line size in bytes. */
633 const int l1_cache_size
; /* size of l1 cache, in kilobytes. */
634 const int l2_cache_size
; /* size of l2 cache, in kilobytes. */
635 const int simultaneous_prefetches
; /* number of parallel prefetch
637 const int sfdf_convert
; /* cost of SF->DF conversion. */
640 const struct processor_costs
*rs6000_cost
;
642 /* Processor costs (relative to an add) */
644 /* Instruction size costs on 32bit processors. */
646 struct processor_costs size32_cost
= {
647 COSTS_N_INSNS (1), /* mulsi */
648 COSTS_N_INSNS (1), /* mulsi_const */
649 COSTS_N_INSNS (1), /* mulsi_const9 */
650 COSTS_N_INSNS (1), /* muldi */
651 COSTS_N_INSNS (1), /* divsi */
652 COSTS_N_INSNS (1), /* divdi */
653 COSTS_N_INSNS (1), /* fp */
654 COSTS_N_INSNS (1), /* dmul */
655 COSTS_N_INSNS (1), /* sdiv */
656 COSTS_N_INSNS (1), /* ddiv */
657 32, /* cache line size */
661 0, /* SF->DF convert */
664 /* Instruction size costs on 64bit processors. */
666 struct processor_costs size64_cost
= {
667 COSTS_N_INSNS (1), /* mulsi */
668 COSTS_N_INSNS (1), /* mulsi_const */
669 COSTS_N_INSNS (1), /* mulsi_const9 */
670 COSTS_N_INSNS (1), /* muldi */
671 COSTS_N_INSNS (1), /* divsi */
672 COSTS_N_INSNS (1), /* divdi */
673 COSTS_N_INSNS (1), /* fp */
674 COSTS_N_INSNS (1), /* dmul */
675 COSTS_N_INSNS (1), /* sdiv */
676 COSTS_N_INSNS (1), /* ddiv */
677 128, /* cache line size */
681 0, /* SF->DF convert */
684 /* Instruction costs on RS64A processors. */
686 struct processor_costs rs64a_cost
= {
687 COSTS_N_INSNS (20), /* mulsi */
688 COSTS_N_INSNS (12), /* mulsi_const */
689 COSTS_N_INSNS (8), /* mulsi_const9 */
690 COSTS_N_INSNS (34), /* muldi */
691 COSTS_N_INSNS (65), /* divsi */
692 COSTS_N_INSNS (67), /* divdi */
693 COSTS_N_INSNS (4), /* fp */
694 COSTS_N_INSNS (4), /* dmul */
695 COSTS_N_INSNS (31), /* sdiv */
696 COSTS_N_INSNS (31), /* ddiv */
697 128, /* cache line size */
701 0, /* SF->DF convert */
704 /* Instruction costs on MPCCORE processors. */
706 struct processor_costs mpccore_cost
= {
707 COSTS_N_INSNS (2), /* mulsi */
708 COSTS_N_INSNS (2), /* mulsi_const */
709 COSTS_N_INSNS (2), /* mulsi_const9 */
710 COSTS_N_INSNS (2), /* muldi */
711 COSTS_N_INSNS (6), /* divsi */
712 COSTS_N_INSNS (6), /* divdi */
713 COSTS_N_INSNS (4), /* fp */
714 COSTS_N_INSNS (5), /* dmul */
715 COSTS_N_INSNS (10), /* sdiv */
716 COSTS_N_INSNS (17), /* ddiv */
717 32, /* cache line size */
721 0, /* SF->DF convert */
724 /* Instruction costs on PPC403 processors. */
726 struct processor_costs ppc403_cost
= {
727 COSTS_N_INSNS (4), /* mulsi */
728 COSTS_N_INSNS (4), /* mulsi_const */
729 COSTS_N_INSNS (4), /* mulsi_const9 */
730 COSTS_N_INSNS (4), /* muldi */
731 COSTS_N_INSNS (33), /* divsi */
732 COSTS_N_INSNS (33), /* divdi */
733 COSTS_N_INSNS (11), /* fp */
734 COSTS_N_INSNS (11), /* dmul */
735 COSTS_N_INSNS (11), /* sdiv */
736 COSTS_N_INSNS (11), /* ddiv */
737 32, /* cache line size */
741 0, /* SF->DF convert */
744 /* Instruction costs on PPC405 processors. */
746 struct processor_costs ppc405_cost
= {
747 COSTS_N_INSNS (5), /* mulsi */
748 COSTS_N_INSNS (4), /* mulsi_const */
749 COSTS_N_INSNS (3), /* mulsi_const9 */
750 COSTS_N_INSNS (5), /* muldi */
751 COSTS_N_INSNS (35), /* divsi */
752 COSTS_N_INSNS (35), /* divdi */
753 COSTS_N_INSNS (11), /* fp */
754 COSTS_N_INSNS (11), /* dmul */
755 COSTS_N_INSNS (11), /* sdiv */
756 COSTS_N_INSNS (11), /* ddiv */
757 32, /* cache line size */
761 0, /* SF->DF convert */
764 /* Instruction costs on PPC440 processors. */
766 struct processor_costs ppc440_cost
= {
767 COSTS_N_INSNS (3), /* mulsi */
768 COSTS_N_INSNS (2), /* mulsi_const */
769 COSTS_N_INSNS (2), /* mulsi_const9 */
770 COSTS_N_INSNS (3), /* muldi */
771 COSTS_N_INSNS (34), /* divsi */
772 COSTS_N_INSNS (34), /* divdi */
773 COSTS_N_INSNS (5), /* fp */
774 COSTS_N_INSNS (5), /* dmul */
775 COSTS_N_INSNS (19), /* sdiv */
776 COSTS_N_INSNS (33), /* ddiv */
777 32, /* cache line size */
781 0, /* SF->DF convert */
784 /* Instruction costs on PPC476 processors. */
786 struct processor_costs ppc476_cost
= {
787 COSTS_N_INSNS (4), /* mulsi */
788 COSTS_N_INSNS (4), /* mulsi_const */
789 COSTS_N_INSNS (4), /* mulsi_const9 */
790 COSTS_N_INSNS (4), /* muldi */
791 COSTS_N_INSNS (11), /* divsi */
792 COSTS_N_INSNS (11), /* divdi */
793 COSTS_N_INSNS (6), /* fp */
794 COSTS_N_INSNS (6), /* dmul */
795 COSTS_N_INSNS (19), /* sdiv */
796 COSTS_N_INSNS (33), /* ddiv */
797 32, /* l1 cache line size */
801 0, /* SF->DF convert */
804 /* Instruction costs on PPC601 processors. */
806 struct processor_costs ppc601_cost
= {
807 COSTS_N_INSNS (5), /* mulsi */
808 COSTS_N_INSNS (5), /* mulsi_const */
809 COSTS_N_INSNS (5), /* mulsi_const9 */
810 COSTS_N_INSNS (5), /* muldi */
811 COSTS_N_INSNS (36), /* divsi */
812 COSTS_N_INSNS (36), /* divdi */
813 COSTS_N_INSNS (4), /* fp */
814 COSTS_N_INSNS (5), /* dmul */
815 COSTS_N_INSNS (17), /* sdiv */
816 COSTS_N_INSNS (31), /* ddiv */
817 32, /* cache line size */
821 0, /* SF->DF convert */
824 /* Instruction costs on PPC603 processors. */
826 struct processor_costs ppc603_cost
= {
827 COSTS_N_INSNS (5), /* mulsi */
828 COSTS_N_INSNS (3), /* mulsi_const */
829 COSTS_N_INSNS (2), /* mulsi_const9 */
830 COSTS_N_INSNS (5), /* muldi */
831 COSTS_N_INSNS (37), /* divsi */
832 COSTS_N_INSNS (37), /* divdi */
833 COSTS_N_INSNS (3), /* fp */
834 COSTS_N_INSNS (4), /* dmul */
835 COSTS_N_INSNS (18), /* sdiv */
836 COSTS_N_INSNS (33), /* ddiv */
837 32, /* cache line size */
841 0, /* SF->DF convert */
844 /* Instruction costs on PPC604 processors. */
846 struct processor_costs ppc604_cost
= {
847 COSTS_N_INSNS (4), /* mulsi */
848 COSTS_N_INSNS (4), /* mulsi_const */
849 COSTS_N_INSNS (4), /* mulsi_const9 */
850 COSTS_N_INSNS (4), /* muldi */
851 COSTS_N_INSNS (20), /* divsi */
852 COSTS_N_INSNS (20), /* divdi */
853 COSTS_N_INSNS (3), /* fp */
854 COSTS_N_INSNS (3), /* dmul */
855 COSTS_N_INSNS (18), /* sdiv */
856 COSTS_N_INSNS (32), /* ddiv */
857 32, /* cache line size */
861 0, /* SF->DF convert */
864 /* Instruction costs on PPC604e processors. */
866 struct processor_costs ppc604e_cost
= {
867 COSTS_N_INSNS (2), /* mulsi */
868 COSTS_N_INSNS (2), /* mulsi_const */
869 COSTS_N_INSNS (2), /* mulsi_const9 */
870 COSTS_N_INSNS (2), /* muldi */
871 COSTS_N_INSNS (20), /* divsi */
872 COSTS_N_INSNS (20), /* divdi */
873 COSTS_N_INSNS (3), /* fp */
874 COSTS_N_INSNS (3), /* dmul */
875 COSTS_N_INSNS (18), /* sdiv */
876 COSTS_N_INSNS (32), /* ddiv */
877 32, /* cache line size */
881 0, /* SF->DF convert */
884 /* Instruction costs on PPC620 processors. */
886 struct processor_costs ppc620_cost
= {
887 COSTS_N_INSNS (5), /* mulsi */
888 COSTS_N_INSNS (4), /* mulsi_const */
889 COSTS_N_INSNS (3), /* mulsi_const9 */
890 COSTS_N_INSNS (7), /* muldi */
891 COSTS_N_INSNS (21), /* divsi */
892 COSTS_N_INSNS (37), /* divdi */
893 COSTS_N_INSNS (3), /* fp */
894 COSTS_N_INSNS (3), /* dmul */
895 COSTS_N_INSNS (18), /* sdiv */
896 COSTS_N_INSNS (32), /* ddiv */
897 128, /* cache line size */
901 0, /* SF->DF convert */
904 /* Instruction costs on PPC630 processors. */
906 struct processor_costs ppc630_cost
= {
907 COSTS_N_INSNS (5), /* mulsi */
908 COSTS_N_INSNS (4), /* mulsi_const */
909 COSTS_N_INSNS (3), /* mulsi_const9 */
910 COSTS_N_INSNS (7), /* muldi */
911 COSTS_N_INSNS (21), /* divsi */
912 COSTS_N_INSNS (37), /* divdi */
913 COSTS_N_INSNS (3), /* fp */
914 COSTS_N_INSNS (3), /* dmul */
915 COSTS_N_INSNS (17), /* sdiv */
916 COSTS_N_INSNS (21), /* ddiv */
917 128, /* cache line size */
921 0, /* SF->DF convert */
924 /* Instruction costs on Cell processor. */
925 /* COSTS_N_INSNS (1) ~ one add. */
927 struct processor_costs ppccell_cost
= {
928 COSTS_N_INSNS (9/2)+2, /* mulsi */
929 COSTS_N_INSNS (6/2), /* mulsi_const */
930 COSTS_N_INSNS (6/2), /* mulsi_const9 */
931 COSTS_N_INSNS (15/2)+2, /* muldi */
932 COSTS_N_INSNS (38/2), /* divsi */
933 COSTS_N_INSNS (70/2), /* divdi */
934 COSTS_N_INSNS (10/2), /* fp */
935 COSTS_N_INSNS (10/2), /* dmul */
936 COSTS_N_INSNS (74/2), /* sdiv */
937 COSTS_N_INSNS (74/2), /* ddiv */
938 128, /* cache line size */
942 0, /* SF->DF convert */
945 /* Instruction costs on PPC750 and PPC7400 processors. */
947 struct processor_costs ppc750_cost
= {
948 COSTS_N_INSNS (5), /* mulsi */
949 COSTS_N_INSNS (3), /* mulsi_const */
950 COSTS_N_INSNS (2), /* mulsi_const9 */
951 COSTS_N_INSNS (5), /* muldi */
952 COSTS_N_INSNS (17), /* divsi */
953 COSTS_N_INSNS (17), /* divdi */
954 COSTS_N_INSNS (3), /* fp */
955 COSTS_N_INSNS (3), /* dmul */
956 COSTS_N_INSNS (17), /* sdiv */
957 COSTS_N_INSNS (31), /* ddiv */
958 32, /* cache line size */
962 0, /* SF->DF convert */
965 /* Instruction costs on PPC7450 processors. */
967 struct processor_costs ppc7450_cost
= {
968 COSTS_N_INSNS (4), /* mulsi */
969 COSTS_N_INSNS (3), /* mulsi_const */
970 COSTS_N_INSNS (3), /* mulsi_const9 */
971 COSTS_N_INSNS (4), /* muldi */
972 COSTS_N_INSNS (23), /* divsi */
973 COSTS_N_INSNS (23), /* divdi */
974 COSTS_N_INSNS (5), /* fp */
975 COSTS_N_INSNS (5), /* dmul */
976 COSTS_N_INSNS (21), /* sdiv */
977 COSTS_N_INSNS (35), /* ddiv */
978 32, /* cache line size */
982 0, /* SF->DF convert */
985 /* Instruction costs on PPC8540 processors. */
987 struct processor_costs ppc8540_cost
= {
988 COSTS_N_INSNS (4), /* mulsi */
989 COSTS_N_INSNS (4), /* mulsi_const */
990 COSTS_N_INSNS (4), /* mulsi_const9 */
991 COSTS_N_INSNS (4), /* muldi */
992 COSTS_N_INSNS (19), /* divsi */
993 COSTS_N_INSNS (19), /* divdi */
994 COSTS_N_INSNS (4), /* fp */
995 COSTS_N_INSNS (4), /* dmul */
996 COSTS_N_INSNS (29), /* sdiv */
997 COSTS_N_INSNS (29), /* ddiv */
998 32, /* cache line size */
1001 1, /* prefetch streams /*/
1002 0, /* SF->DF convert */
1005 /* Instruction costs on E300C2 and E300C3 cores. */
1007 struct processor_costs ppce300c2c3_cost
= {
1008 COSTS_N_INSNS (4), /* mulsi */
1009 COSTS_N_INSNS (4), /* mulsi_const */
1010 COSTS_N_INSNS (4), /* mulsi_const9 */
1011 COSTS_N_INSNS (4), /* muldi */
1012 COSTS_N_INSNS (19), /* divsi */
1013 COSTS_N_INSNS (19), /* divdi */
1014 COSTS_N_INSNS (3), /* fp */
1015 COSTS_N_INSNS (4), /* dmul */
1016 COSTS_N_INSNS (18), /* sdiv */
1017 COSTS_N_INSNS (33), /* ddiv */
1021 1, /* prefetch streams /*/
1022 0, /* SF->DF convert */
1025 /* Instruction costs on PPCE500MC processors. */
1027 struct processor_costs ppce500mc_cost
= {
1028 COSTS_N_INSNS (4), /* mulsi */
1029 COSTS_N_INSNS (4), /* mulsi_const */
1030 COSTS_N_INSNS (4), /* mulsi_const9 */
1031 COSTS_N_INSNS (4), /* muldi */
1032 COSTS_N_INSNS (14), /* divsi */
1033 COSTS_N_INSNS (14), /* divdi */
1034 COSTS_N_INSNS (8), /* fp */
1035 COSTS_N_INSNS (10), /* dmul */
1036 COSTS_N_INSNS (36), /* sdiv */
1037 COSTS_N_INSNS (66), /* ddiv */
1038 64, /* cache line size */
1041 1, /* prefetch streams /*/
1042 0, /* SF->DF convert */
1045 /* Instruction costs on PPCE500MC64 processors. */
1047 struct processor_costs ppce500mc64_cost
= {
1048 COSTS_N_INSNS (4), /* mulsi */
1049 COSTS_N_INSNS (4), /* mulsi_const */
1050 COSTS_N_INSNS (4), /* mulsi_const9 */
1051 COSTS_N_INSNS (4), /* muldi */
1052 COSTS_N_INSNS (14), /* divsi */
1053 COSTS_N_INSNS (14), /* divdi */
1054 COSTS_N_INSNS (4), /* fp */
1055 COSTS_N_INSNS (10), /* dmul */
1056 COSTS_N_INSNS (36), /* sdiv */
1057 COSTS_N_INSNS (66), /* ddiv */
1058 64, /* cache line size */
1061 1, /* prefetch streams /*/
1062 0, /* SF->DF convert */
1065 /* Instruction costs on PPCE5500 processors. */
1067 struct processor_costs ppce5500_cost
= {
1068 COSTS_N_INSNS (5), /* mulsi */
1069 COSTS_N_INSNS (5), /* mulsi_const */
1070 COSTS_N_INSNS (4), /* mulsi_const9 */
1071 COSTS_N_INSNS (5), /* muldi */
1072 COSTS_N_INSNS (14), /* divsi */
1073 COSTS_N_INSNS (14), /* divdi */
1074 COSTS_N_INSNS (7), /* fp */
1075 COSTS_N_INSNS (10), /* dmul */
1076 COSTS_N_INSNS (36), /* sdiv */
1077 COSTS_N_INSNS (66), /* ddiv */
1078 64, /* cache line size */
1081 1, /* prefetch streams /*/
1082 0, /* SF->DF convert */
1085 /* Instruction costs on PPCE6500 processors. */
1087 struct processor_costs ppce6500_cost
= {
1088 COSTS_N_INSNS (5), /* mulsi */
1089 COSTS_N_INSNS (5), /* mulsi_const */
1090 COSTS_N_INSNS (4), /* mulsi_const9 */
1091 COSTS_N_INSNS (5), /* muldi */
1092 COSTS_N_INSNS (14), /* divsi */
1093 COSTS_N_INSNS (14), /* divdi */
1094 COSTS_N_INSNS (7), /* fp */
1095 COSTS_N_INSNS (10), /* dmul */
1096 COSTS_N_INSNS (36), /* sdiv */
1097 COSTS_N_INSNS (66), /* ddiv */
1098 64, /* cache line size */
1101 1, /* prefetch streams /*/
1102 0, /* SF->DF convert */
1105 /* Instruction costs on AppliedMicro Titan processors. */
1107 struct processor_costs titan_cost
= {
1108 COSTS_N_INSNS (5), /* mulsi */
1109 COSTS_N_INSNS (5), /* mulsi_const */
1110 COSTS_N_INSNS (5), /* mulsi_const9 */
1111 COSTS_N_INSNS (5), /* muldi */
1112 COSTS_N_INSNS (18), /* divsi */
1113 COSTS_N_INSNS (18), /* divdi */
1114 COSTS_N_INSNS (10), /* fp */
1115 COSTS_N_INSNS (10), /* dmul */
1116 COSTS_N_INSNS (46), /* sdiv */
1117 COSTS_N_INSNS (72), /* ddiv */
1118 32, /* cache line size */
1121 1, /* prefetch streams /*/
1122 0, /* SF->DF convert */
1125 /* Instruction costs on POWER4 and POWER5 processors. */
1127 struct processor_costs power4_cost
= {
1128 COSTS_N_INSNS (3), /* mulsi */
1129 COSTS_N_INSNS (2), /* mulsi_const */
1130 COSTS_N_INSNS (2), /* mulsi_const9 */
1131 COSTS_N_INSNS (4), /* muldi */
1132 COSTS_N_INSNS (18), /* divsi */
1133 COSTS_N_INSNS (34), /* divdi */
1134 COSTS_N_INSNS (3), /* fp */
1135 COSTS_N_INSNS (3), /* dmul */
1136 COSTS_N_INSNS (17), /* sdiv */
1137 COSTS_N_INSNS (17), /* ddiv */
1138 128, /* cache line size */
1140 1024, /* l2 cache */
1141 8, /* prefetch streams /*/
1142 0, /* SF->DF convert */
1145 /* Instruction costs on POWER6 processors. */
1147 struct processor_costs power6_cost
= {
1148 COSTS_N_INSNS (8), /* mulsi */
1149 COSTS_N_INSNS (8), /* mulsi_const */
1150 COSTS_N_INSNS (8), /* mulsi_const9 */
1151 COSTS_N_INSNS (8), /* muldi */
1152 COSTS_N_INSNS (22), /* divsi */
1153 COSTS_N_INSNS (28), /* divdi */
1154 COSTS_N_INSNS (3), /* fp */
1155 COSTS_N_INSNS (3), /* dmul */
1156 COSTS_N_INSNS (13), /* sdiv */
1157 COSTS_N_INSNS (16), /* ddiv */
1158 128, /* cache line size */
1160 2048, /* l2 cache */
1161 16, /* prefetch streams */
1162 0, /* SF->DF convert */
1165 /* Instruction costs on POWER7 processors. */
1167 struct processor_costs power7_cost
= {
1168 COSTS_N_INSNS (2), /* mulsi */
1169 COSTS_N_INSNS (2), /* mulsi_const */
1170 COSTS_N_INSNS (2), /* mulsi_const9 */
1171 COSTS_N_INSNS (2), /* muldi */
1172 COSTS_N_INSNS (18), /* divsi */
1173 COSTS_N_INSNS (34), /* divdi */
1174 COSTS_N_INSNS (3), /* fp */
1175 COSTS_N_INSNS (3), /* dmul */
1176 COSTS_N_INSNS (13), /* sdiv */
1177 COSTS_N_INSNS (16), /* ddiv */
1178 128, /* cache line size */
1181 12, /* prefetch streams */
1182 COSTS_N_INSNS (3), /* SF->DF convert */
1185 /* Instruction costs on POWER8 processors. */
1187 struct processor_costs power8_cost
= {
1188 COSTS_N_INSNS (3), /* mulsi */
1189 COSTS_N_INSNS (3), /* mulsi_const */
1190 COSTS_N_INSNS (3), /* mulsi_const9 */
1191 COSTS_N_INSNS (3), /* muldi */
1192 COSTS_N_INSNS (19), /* divsi */
1193 COSTS_N_INSNS (35), /* divdi */
1194 COSTS_N_INSNS (3), /* fp */
1195 COSTS_N_INSNS (3), /* dmul */
1196 COSTS_N_INSNS (14), /* sdiv */
1197 COSTS_N_INSNS (17), /* ddiv */
1198 128, /* cache line size */
1201 12, /* prefetch streams */
1202 COSTS_N_INSNS (3), /* SF->DF convert */
1205 /* Instruction costs on POWER9 processors. */
1207 struct processor_costs power9_cost
= {
1208 COSTS_N_INSNS (3), /* mulsi */
1209 COSTS_N_INSNS (3), /* mulsi_const */
1210 COSTS_N_INSNS (3), /* mulsi_const9 */
1211 COSTS_N_INSNS (3), /* muldi */
1212 COSTS_N_INSNS (8), /* divsi */
1213 COSTS_N_INSNS (12), /* divdi */
1214 COSTS_N_INSNS (3), /* fp */
1215 COSTS_N_INSNS (3), /* dmul */
1216 COSTS_N_INSNS (13), /* sdiv */
1217 COSTS_N_INSNS (18), /* ddiv */
1218 128, /* cache line size */
1221 8, /* prefetch streams */
1222 COSTS_N_INSNS (3), /* SF->DF convert */
1225 /* Instruction costs on POWER A2 processors. */
1227 struct processor_costs ppca2_cost
= {
1228 COSTS_N_INSNS (16), /* mulsi */
1229 COSTS_N_INSNS (16), /* mulsi_const */
1230 COSTS_N_INSNS (16), /* mulsi_const9 */
1231 COSTS_N_INSNS (16), /* muldi */
1232 COSTS_N_INSNS (22), /* divsi */
1233 COSTS_N_INSNS (28), /* divdi */
1234 COSTS_N_INSNS (3), /* fp */
1235 COSTS_N_INSNS (3), /* dmul */
1236 COSTS_N_INSNS (59), /* sdiv */
1237 COSTS_N_INSNS (72), /* ddiv */
1240 2048, /* l2 cache */
1241 16, /* prefetch streams */
1242 0, /* SF->DF convert */
1246 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1247 #undef RS6000_BUILTIN_0
1248 #undef RS6000_BUILTIN_1
1249 #undef RS6000_BUILTIN_2
1250 #undef RS6000_BUILTIN_3
1251 #undef RS6000_BUILTIN_A
1252 #undef RS6000_BUILTIN_D
1253 #undef RS6000_BUILTIN_E
1254 #undef RS6000_BUILTIN_H
1255 #undef RS6000_BUILTIN_P
1256 #undef RS6000_BUILTIN_Q
1257 #undef RS6000_BUILTIN_S
1258 #undef RS6000_BUILTIN_X
1260 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
1261 { NAME, ICODE, MASK, ATTR },
1263 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1264 { NAME, ICODE, MASK, ATTR },
1266 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1267 { NAME, ICODE, MASK, ATTR },
1269 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1270 { NAME, ICODE, MASK, ATTR },
1272 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1273 { NAME, ICODE, MASK, ATTR },
1275 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1276 { NAME, ICODE, MASK, ATTR },
1278 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
1279 { NAME, ICODE, MASK, ATTR },
1281 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1282 { NAME, ICODE, MASK, ATTR },
1284 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1285 { NAME, ICODE, MASK, ATTR },
1287 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1288 { NAME, ICODE, MASK, ATTR },
1290 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
1291 { NAME, ICODE, MASK, ATTR },
1293 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1294 { NAME, ICODE, MASK, ATTR },
1296 struct rs6000_builtin_info_type
{
1298 const enum insn_code icode
;
1299 const HOST_WIDE_INT mask
;
1300 const unsigned attr
;
1303 static const struct rs6000_builtin_info_type rs6000_builtin_info
[] =
1305 #include "powerpcspe-builtin.def"
1308 #undef RS6000_BUILTIN_0
1309 #undef RS6000_BUILTIN_1
1310 #undef RS6000_BUILTIN_2
1311 #undef RS6000_BUILTIN_3
1312 #undef RS6000_BUILTIN_A
1313 #undef RS6000_BUILTIN_D
1314 #undef RS6000_BUILTIN_E
1315 #undef RS6000_BUILTIN_H
1316 #undef RS6000_BUILTIN_P
1317 #undef RS6000_BUILTIN_Q
1318 #undef RS6000_BUILTIN_S
1319 #undef RS6000_BUILTIN_X
1321 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1322 static tree (*rs6000_veclib_handler
) (combined_fn
, tree
, tree
);
1325 static bool rs6000_debug_legitimate_address_p (machine_mode
, rtx
, bool);
1326 static bool spe_func_has_64bit_regs_p (void);
1327 static struct machine_function
* rs6000_init_machine_status (void);
1328 static int rs6000_ra_ever_killed (void);
1329 static tree
rs6000_handle_longcall_attribute (tree
*, tree
, tree
, int, bool *);
1330 static tree
rs6000_handle_altivec_attribute (tree
*, tree
, tree
, int, bool *);
1331 static tree
rs6000_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
1332 static tree
rs6000_builtin_vectorized_libmass (combined_fn
, tree
, tree
);
1333 static void rs6000_emit_set_long_const (rtx
, HOST_WIDE_INT
);
1334 static int rs6000_memory_move_cost (machine_mode
, reg_class_t
, bool);
1335 static bool rs6000_debug_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
1336 static int rs6000_debug_address_cost (rtx
, machine_mode
, addr_space_t
,
1338 static int rs6000_debug_adjust_cost (rtx_insn
*, int, rtx_insn
*, int,
1340 static bool is_microcoded_insn (rtx_insn
*);
1341 static bool is_nonpipeline_insn (rtx_insn
*);
1342 static bool is_cracked_insn (rtx_insn
*);
1343 static bool is_load_insn (rtx
, rtx
*);
1344 static bool is_store_insn (rtx
, rtx
*);
1345 static bool set_to_load_agen (rtx_insn
*,rtx_insn
*);
1346 static bool insn_terminates_group_p (rtx_insn
*, enum group_termination
);
1347 static bool insn_must_be_first_in_group (rtx_insn
*);
1348 static bool insn_must_be_last_in_group (rtx_insn
*);
1349 static void altivec_init_builtins (void);
1350 static tree
builtin_function_type (machine_mode
, machine_mode
,
1351 machine_mode
, machine_mode
,
1352 enum rs6000_builtins
, const char *name
);
1353 static void rs6000_common_init_builtins (void);
1354 static void paired_init_builtins (void);
1355 static rtx
paired_expand_predicate_builtin (enum insn_code
, tree
, rtx
);
1356 static void spe_init_builtins (void);
1357 static void htm_init_builtins (void);
1358 static rtx
spe_expand_predicate_builtin (enum insn_code
, tree
, rtx
);
1359 static rtx
spe_expand_evsel_builtin (enum insn_code
, tree
, rtx
);
1360 static int rs6000_emit_int_cmove (rtx
, rtx
, rtx
, rtx
);
1361 static rs6000_stack_t
*rs6000_stack_info (void);
1362 static void is_altivec_return_reg (rtx
, void *);
1363 int easy_vector_constant (rtx
, machine_mode
);
1364 static rtx
rs6000_debug_legitimize_address (rtx
, rtx
, machine_mode
);
1365 static rtx
rs6000_legitimize_tls_address (rtx
, enum tls_model
);
1366 static rtx
rs6000_darwin64_record_arg (CUMULATIVE_ARGS
*, const_tree
,
1369 static void macho_branch_islands (void);
1371 static rtx
rs6000_legitimize_reload_address (rtx
, machine_mode
, int, int,
1373 static rtx
rs6000_debug_legitimize_reload_address (rtx
, machine_mode
, int,
1375 static bool rs6000_mode_dependent_address (const_rtx
);
1376 static bool rs6000_debug_mode_dependent_address (const_rtx
);
1377 static enum reg_class
rs6000_secondary_reload_class (enum reg_class
,
1379 static enum reg_class
rs6000_debug_secondary_reload_class (enum reg_class
,
1382 static enum reg_class
rs6000_preferred_reload_class (rtx
, enum reg_class
);
1383 static enum reg_class
rs6000_debug_preferred_reload_class (rtx
,
1385 static bool rs6000_secondary_memory_needed (enum reg_class
, enum reg_class
,
1387 static bool rs6000_debug_secondary_memory_needed (enum reg_class
,
1390 static bool rs6000_cannot_change_mode_class (machine_mode
,
1393 static bool rs6000_debug_cannot_change_mode_class (machine_mode
,
1396 static bool rs6000_save_toc_in_prologue_p (void);
1397 static rtx
rs6000_internal_arg_pointer (void);
1399 rtx (*rs6000_legitimize_reload_address_ptr
) (rtx
, machine_mode
, int, int,
1401 = rs6000_legitimize_reload_address
;
1403 static bool (*rs6000_mode_dependent_address_ptr
) (const_rtx
)
1404 = rs6000_mode_dependent_address
;
1406 enum reg_class (*rs6000_secondary_reload_class_ptr
) (enum reg_class
,
1408 = rs6000_secondary_reload_class
;
1410 enum reg_class (*rs6000_preferred_reload_class_ptr
) (rtx
, enum reg_class
)
1411 = rs6000_preferred_reload_class
;
1413 bool (*rs6000_secondary_memory_needed_ptr
) (enum reg_class
, enum reg_class
,
1415 = rs6000_secondary_memory_needed
;
1417 bool (*rs6000_cannot_change_mode_class_ptr
) (machine_mode
,
1420 = rs6000_cannot_change_mode_class
;
1422 const int INSN_NOT_AVAILABLE
= -1;
1424 static void rs6000_print_isa_options (FILE *, int, const char *,
1426 static void rs6000_print_builtin_options (FILE *, int, const char *,
1428 static HOST_WIDE_INT
rs6000_disable_incompatible_switches (void);
1430 static enum rs6000_reg_type
register_to_reg_type (rtx
, bool *);
1431 static bool rs6000_secondary_reload_move (enum rs6000_reg_type
,
1432 enum rs6000_reg_type
,
1434 secondary_reload_info
*,
1436 rtl_opt_pass
*make_pass_analyze_swaps (gcc::context
*);
1437 static bool rs6000_keep_leaf_when_profiled () __attribute__ ((unused
));
1438 static tree
rs6000_fold_builtin (tree
, int, tree
*, bool);
1440 /* Hash table stuff for keeping track of TOC entries. */
1442 struct GTY((for_user
)) toc_hash_struct
1444 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1445 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1447 machine_mode key_mode
;
1451 struct toc_hasher
: ggc_ptr_hash
<toc_hash_struct
>
1453 static hashval_t
hash (toc_hash_struct
*);
1454 static bool equal (toc_hash_struct
*, toc_hash_struct
*);
1457 static GTY (()) hash_table
<toc_hasher
> *toc_hash_table
;
1459 /* Hash table to keep track of the argument types for builtin functions. */
1461 struct GTY((for_user
)) builtin_hash_struct
1464 machine_mode mode
[4]; /* return value + 3 arguments. */
1465 unsigned char uns_p
[4]; /* and whether the types are unsigned. */
1468 struct builtin_hasher
: ggc_ptr_hash
<builtin_hash_struct
>
1470 static hashval_t
hash (builtin_hash_struct
*);
1471 static bool equal (builtin_hash_struct
*, builtin_hash_struct
*);
1474 static GTY (()) hash_table
<builtin_hasher
> *builtin_hash_table
;
1477 /* Default register names. */
1478 char rs6000_reg_names
[][8] =
1480 "0", "1", "2", "3", "4", "5", "6", "7",
1481 "8", "9", "10", "11", "12", "13", "14", "15",
1482 "16", "17", "18", "19", "20", "21", "22", "23",
1483 "24", "25", "26", "27", "28", "29", "30", "31",
1484 "0", "1", "2", "3", "4", "5", "6", "7",
1485 "8", "9", "10", "11", "12", "13", "14", "15",
1486 "16", "17", "18", "19", "20", "21", "22", "23",
1487 "24", "25", "26", "27", "28", "29", "30", "31",
1488 "mq", "lr", "ctr","ap",
1489 "0", "1", "2", "3", "4", "5", "6", "7",
1491 /* AltiVec registers. */
1492 "0", "1", "2", "3", "4", "5", "6", "7",
1493 "8", "9", "10", "11", "12", "13", "14", "15",
1494 "16", "17", "18", "19", "20", "21", "22", "23",
1495 "24", "25", "26", "27", "28", "29", "30", "31",
1497 /* SPE registers. */
1498 "spe_acc", "spefscr",
1499 /* Soft frame pointer. */
1501 /* HTM SPR registers. */
1502 "tfhar", "tfiar", "texasr",
1503 /* SPE High registers. */
1504 "0", "1", "2", "3", "4", "5", "6", "7",
1505 "8", "9", "10", "11", "12", "13", "14", "15",
1506 "16", "17", "18", "19", "20", "21", "22", "23",
1507 "24", "25", "26", "27", "28", "29", "30", "31"
1510 #ifdef TARGET_REGNAMES
1511 static const char alt_reg_names
[][8] =
1513 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1514 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1515 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1516 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1517 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1518 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1519 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1520 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1521 "mq", "lr", "ctr", "ap",
1522 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1524 /* AltiVec registers. */
1525 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1526 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1527 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1528 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1530 /* SPE registers. */
1531 "spe_acc", "spefscr",
1532 /* Soft frame pointer. */
1534 /* HTM SPR registers. */
1535 "tfhar", "tfiar", "texasr",
1536 /* SPE High registers. */
1537 "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7",
1538 "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15",
1539 "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23",
1540 "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31"
1544 /* Table of valid machine attributes. */
1546 static const struct attribute_spec rs6000_attribute_table
[] =
1548 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1549 affects_type_identity } */
1550 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute
,
1552 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute
,
1554 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute
,
1556 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute
,
1558 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute
,
1560 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1561 SUBTARGET_ATTRIBUTE_TABLE
,
1563 { NULL
, 0, 0, false, false, false, NULL
, false }
1566 #ifndef TARGET_PROFILE_KERNEL
1567 #define TARGET_PROFILE_KERNEL 0
1570 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1571 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1573 /* Initialize the GCC target structure. */
1574 #undef TARGET_ATTRIBUTE_TABLE
1575 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1576 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1577 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1578 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1579 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1581 #undef TARGET_ASM_ALIGNED_DI_OP
1582 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1584 /* Default unaligned ops are only provided for ELF. Find the ops needed
1585 for non-ELF systems. */
1586 #ifndef OBJECT_FORMAT_ELF
1588 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1590 #undef TARGET_ASM_UNALIGNED_HI_OP
1591 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1592 #undef TARGET_ASM_UNALIGNED_SI_OP
1593 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1594 #undef TARGET_ASM_UNALIGNED_DI_OP
1595 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1598 #undef TARGET_ASM_UNALIGNED_HI_OP
1599 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1600 #undef TARGET_ASM_UNALIGNED_SI_OP
1601 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1602 #undef TARGET_ASM_UNALIGNED_DI_OP
1603 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1604 #undef TARGET_ASM_ALIGNED_DI_OP
1605 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1609 /* This hook deals with fixups for relocatable code and DI-mode objects
1611 #undef TARGET_ASM_INTEGER
1612 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1614 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1615 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1616 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1619 #undef TARGET_SET_UP_BY_PROLOGUE
1620 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1622 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1623 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1624 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1625 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1626 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1627 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1628 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1629 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1630 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1631 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1632 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1633 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1635 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1636 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1638 #undef TARGET_INTERNAL_ARG_POINTER
1639 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1641 #undef TARGET_HAVE_TLS
1642 #define TARGET_HAVE_TLS HAVE_AS_TLS
1644 #undef TARGET_CANNOT_FORCE_CONST_MEM
1645 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1647 #undef TARGET_DELEGITIMIZE_ADDRESS
1648 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1650 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1651 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1653 #undef TARGET_LEGITIMATE_COMBINED_INSN
1654 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1656 #undef TARGET_ASM_FUNCTION_PROLOGUE
1657 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1658 #undef TARGET_ASM_FUNCTION_EPILOGUE
1659 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1661 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1662 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1664 #undef TARGET_LEGITIMIZE_ADDRESS
1665 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1667 #undef TARGET_SCHED_VARIABLE_ISSUE
1668 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1670 #undef TARGET_SCHED_ISSUE_RATE
1671 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1672 #undef TARGET_SCHED_ADJUST_COST
1673 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1674 #undef TARGET_SCHED_ADJUST_PRIORITY
1675 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1676 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1677 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1678 #undef TARGET_SCHED_INIT
1679 #define TARGET_SCHED_INIT rs6000_sched_init
1680 #undef TARGET_SCHED_FINISH
1681 #define TARGET_SCHED_FINISH rs6000_sched_finish
1682 #undef TARGET_SCHED_REORDER
1683 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1684 #undef TARGET_SCHED_REORDER2
1685 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1687 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1688 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1690 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1691 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1693 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1694 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1695 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1696 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1697 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1698 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1699 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1700 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1702 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1703 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1705 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1706 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1707 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1708 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1709 rs6000_builtin_support_vector_misalignment
1710 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1711 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1712 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1713 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1714 rs6000_builtin_vectorization_cost
1715 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1716 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1717 rs6000_preferred_simd_mode
1718 #undef TARGET_VECTORIZE_INIT_COST
1719 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1720 #undef TARGET_VECTORIZE_ADD_STMT_COST
1721 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1722 #undef TARGET_VECTORIZE_FINISH_COST
1723 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1724 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1725 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1727 #undef TARGET_INIT_BUILTINS
1728 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1729 #undef TARGET_BUILTIN_DECL
1730 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1732 #undef TARGET_FOLD_BUILTIN
1733 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1734 #undef TARGET_GIMPLE_FOLD_BUILTIN
1735 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1737 #undef TARGET_EXPAND_BUILTIN
1738 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1740 #undef TARGET_MANGLE_TYPE
1741 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1743 #undef TARGET_INIT_LIBFUNCS
1744 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1747 #undef TARGET_BINDS_LOCAL_P
1748 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1751 #undef TARGET_MS_BITFIELD_LAYOUT_P
1752 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1754 #undef TARGET_ASM_OUTPUT_MI_THUNK
1755 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1757 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1758 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1760 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1761 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1763 #undef TARGET_REGISTER_MOVE_COST
1764 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1765 #undef TARGET_MEMORY_MOVE_COST
1766 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1767 #undef TARGET_CANNOT_COPY_INSN_P
1768 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1769 #undef TARGET_RTX_COSTS
1770 #define TARGET_RTX_COSTS rs6000_rtx_costs
1771 #undef TARGET_ADDRESS_COST
1772 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1774 #undef TARGET_DWARF_REGISTER_SPAN
1775 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
1777 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1778 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1780 #undef TARGET_MEMBER_TYPE_FORCES_BLK
1781 #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
1783 #undef TARGET_PROMOTE_FUNCTION_MODE
1784 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1786 #undef TARGET_RETURN_IN_MEMORY
1787 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1789 #undef TARGET_RETURN_IN_MSB
1790 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1792 #undef TARGET_SETUP_INCOMING_VARARGS
1793 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1795 /* Always strict argument naming on rs6000. */
1796 #undef TARGET_STRICT_ARGUMENT_NAMING
1797 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1798 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1799 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1800 #undef TARGET_SPLIT_COMPLEX_ARG
1801 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1802 #undef TARGET_MUST_PASS_IN_STACK
1803 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1804 #undef TARGET_PASS_BY_REFERENCE
1805 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1806 #undef TARGET_ARG_PARTIAL_BYTES
1807 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1808 #undef TARGET_FUNCTION_ARG_ADVANCE
1809 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1810 #undef TARGET_FUNCTION_ARG
1811 #define TARGET_FUNCTION_ARG rs6000_function_arg
1812 #undef TARGET_FUNCTION_ARG_BOUNDARY
1813 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1815 #undef TARGET_BUILD_BUILTIN_VA_LIST
1816 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1818 #undef TARGET_EXPAND_BUILTIN_VA_START
1819 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1821 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1822 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1824 #undef TARGET_EH_RETURN_FILTER_MODE
1825 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1827 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1828 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1830 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1831 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1833 #undef TARGET_FLOATN_MODE
1834 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1836 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1837 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1839 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1840 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1842 #undef TARGET_MD_ASM_ADJUST
1843 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1845 #undef TARGET_OPTION_OVERRIDE
1846 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1848 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1849 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1850 rs6000_builtin_vectorized_function
1852 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1853 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1854 rs6000_builtin_md_vectorized_function
1856 #undef TARGET_STACK_PROTECT_GUARD
1857 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1860 #undef TARGET_STACK_PROTECT_FAIL
1861 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1865 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1866 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1869 /* Use a 32-bit anchor range. This leads to sequences like:
1871 addis tmp,anchor,high
1874 where tmp itself acts as an anchor, and can be shared between
1875 accesses to the same 64k page. */
1876 #undef TARGET_MIN_ANCHOR_OFFSET
1877 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1878 #undef TARGET_MAX_ANCHOR_OFFSET
1879 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1880 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1881 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1882 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1883 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1885 #undef TARGET_BUILTIN_RECIPROCAL
1886 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1888 #undef TARGET_EXPAND_TO_RTL_HOOK
1889 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1891 #undef TARGET_INSTANTIATE_DECLS
1892 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1894 #undef TARGET_SECONDARY_RELOAD
1895 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1897 #undef TARGET_LEGITIMATE_ADDRESS_P
1898 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1900 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1901 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1904 #define TARGET_LRA_P rs6000_lra_p
1906 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1907 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1909 #undef TARGET_CAN_ELIMINATE
1910 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1912 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1913 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1915 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1916 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1918 #undef TARGET_TRAMPOLINE_INIT
1919 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1921 #undef TARGET_FUNCTION_VALUE
1922 #define TARGET_FUNCTION_VALUE rs6000_function_value
1924 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1925 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1927 #undef TARGET_OPTION_SAVE
1928 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1930 #undef TARGET_OPTION_RESTORE
1931 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1933 #undef TARGET_OPTION_PRINT
1934 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1936 #undef TARGET_CAN_INLINE_P
1937 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1939 #undef TARGET_SET_CURRENT_FUNCTION
1940 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1942 #undef TARGET_LEGITIMATE_CONSTANT_P
1943 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1945 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1946 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1948 #undef TARGET_CAN_USE_DOLOOP_P
1949 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1951 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1952 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1954 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1955 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1956 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1957 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1958 #undef TARGET_UNWIND_WORD_MODE
1959 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1961 #undef TARGET_OFFLOAD_OPTIONS
1962 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1964 #undef TARGET_C_MODE_FOR_SUFFIX
1965 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1967 #undef TARGET_INVALID_BINARY_OP
1968 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1970 #undef TARGET_OPTAB_SUPPORTED_P
1971 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1973 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1974 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1976 #undef TARGET_HARD_REGNO_MODE_OK
1977 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1979 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1980 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1981 rs6000_hard_regno_call_part_clobbered
1984 /* Processor table. */
1987 const char *const name
; /* Canonical processor name. */
1988 const enum processor_type processor
; /* Processor type enum value. */
1989 const HOST_WIDE_INT target_enable
; /* Target flags to enable. */
1992 static struct rs6000_ptt
const processor_target_table
[] =
1994 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1995 #include "powerpcspe-cpus.def"
1999 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
2003 rs6000_cpu_name_lookup (const char *name
)
2009 for (i
= 0; i
< ARRAY_SIZE (processor_target_table
); i
++)
2010 if (! strcmp (name
, processor_target_table
[i
].name
))
2018 /* Return number of consecutive hard regs needed starting at reg REGNO
2019 to hold something of mode MODE.
2020 This is ordinarily the length in words of a value of mode MODE
2021 but can be less for certain modes in special long registers.
2023 For the SPE, GPRs are 64 bits but only 32 bits are visible in
2024 scalar instructions. The upper 32 bits are only available to the
2027 POWER and PowerPC GPRs hold 32 bits worth;
2028 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
2031 rs6000_hard_regno_nregs_internal (int regno
, machine_mode mode
)
2033 unsigned HOST_WIDE_INT reg_size
;
2035 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
2036 128-bit floating point that can go in vector registers, which has VSX
2037 memory addressing. */
2038 if (FP_REGNO_P (regno
))
2039 reg_size
= (VECTOR_MEM_VSX_P (mode
) || FLOAT128_VECTOR_P (mode
)
2040 ? UNITS_PER_VSX_WORD
2041 : UNITS_PER_FP_WORD
);
2043 else if (SPE_SIMD_REGNO_P (regno
) && TARGET_SPE
&& SPE_VECTOR_MODE (mode
))
2044 reg_size
= UNITS_PER_SPE_WORD
;
2046 else if (ALTIVEC_REGNO_P (regno
))
2047 reg_size
= UNITS_PER_ALTIVEC_WORD
;
2049 /* The value returned for SCmode in the E500 double case is 2 for
2050 ABI compatibility; storing an SCmode value in a single register
2051 would require function_arg and rs6000_spe_function_arg to handle
2052 SCmode so as to pass the value correctly in a pair of
2054 else if (TARGET_E500_DOUBLE
&& FLOAT_MODE_P (mode
) && mode
!= SCmode
2055 && !DECIMAL_FLOAT_MODE_P (mode
) && SPE_SIMD_REGNO_P (regno
))
2056 reg_size
= UNITS_PER_FP_WORD
;
2059 reg_size
= UNITS_PER_WORD
;
2061 return (GET_MODE_SIZE (mode
) + reg_size
- 1) / reg_size
;
2064 /* Value is 1 if hard register REGNO can hold a value of machine-mode
2067 rs6000_hard_regno_mode_ok_uncached (int regno
, machine_mode mode
)
2069 int last_regno
= regno
+ rs6000_hard_regno_nregs
[mode
][regno
] - 1;
2071 if (COMPLEX_MODE_P (mode
))
2072 mode
= GET_MODE_INNER (mode
);
2074 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
2075 register combinations, and use PTImode where we need to deal with quad
2076 word memory operations. Don't allow quad words in the argument or frame
2077 pointer registers, just registers 0..31. */
2078 if (mode
== PTImode
)
2079 return (IN_RANGE (regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
)
2080 && IN_RANGE (last_regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
)
2081 && ((regno
& 1) == 0));
2083 /* VSX registers that overlap the FPR registers are larger than for non-VSX
2084 implementations. Don't allow an item to be split between a FP register
2085 and an Altivec register. Allow TImode in all VSX registers if the user
2087 if (TARGET_VSX
&& VSX_REGNO_P (regno
)
2088 && (VECTOR_MEM_VSX_P (mode
)
2089 || FLOAT128_VECTOR_P (mode
)
2090 || reg_addr
[mode
].scalar_in_vmx_p
2091 || (TARGET_VSX_TIMODE
&& mode
== TImode
)
2092 || (TARGET_VADDUQM
&& mode
== V1TImode
)))
2094 if (FP_REGNO_P (regno
))
2095 return FP_REGNO_P (last_regno
);
2097 if (ALTIVEC_REGNO_P (regno
))
2099 if (GET_MODE_SIZE (mode
) != 16 && !reg_addr
[mode
].scalar_in_vmx_p
)
2102 return ALTIVEC_REGNO_P (last_regno
);
2106 /* The GPRs can hold any mode, but values bigger than one register
2107 cannot go past R31. */
2108 if (INT_REGNO_P (regno
))
2109 return INT_REGNO_P (last_regno
);
2111 /* The float registers (except for VSX vector modes) can only hold floating
2112 modes and DImode. */
2113 if (FP_REGNO_P (regno
))
2115 if (FLOAT128_VECTOR_P (mode
))
2118 if (SCALAR_FLOAT_MODE_P (mode
)
2119 && (mode
!= TDmode
|| (regno
% 2) == 0)
2120 && FP_REGNO_P (last_regno
))
2123 if (GET_MODE_CLASS (mode
) == MODE_INT
)
2125 if(GET_MODE_SIZE (mode
) == UNITS_PER_FP_WORD
)
2128 if (TARGET_VSX_SMALL_INTEGER
)
2133 if (TARGET_P9_VECTOR
&& (mode
== HImode
|| mode
== QImode
))
2138 if (PAIRED_SIMD_REGNO_P (regno
) && TARGET_PAIRED_FLOAT
2139 && PAIRED_VECTOR_MODE (mode
))
2145 /* The CR register can only hold CC modes. */
2146 if (CR_REGNO_P (regno
))
2147 return GET_MODE_CLASS (mode
) == MODE_CC
;
2149 if (CA_REGNO_P (regno
))
2150 return mode
== Pmode
|| mode
== SImode
;
2152 /* AltiVec only in AldyVec registers. */
2153 if (ALTIVEC_REGNO_P (regno
))
2154 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
)
2155 || mode
== V1TImode
);
2157 /* ...but GPRs can hold SIMD data on the SPE in one register. */
2158 if (SPE_SIMD_REGNO_P (regno
) && TARGET_SPE
&& SPE_VECTOR_MODE (mode
))
2161 /* We cannot put non-VSX TImode or PTImode anywhere except general register
2162 and it must be able to fit within the register set. */
2164 return GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
;
2167 /* Implement TARGET_HARD_REGNO_MODE_OK. */
2170 rs6000_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
2172 return rs6000_hard_regno_mode_ok_p
[mode
][regno
];
2175 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
2178 rs6000_hard_regno_call_part_clobbered (unsigned int regno
, machine_mode mode
)
2182 && GET_MODE_SIZE (mode
) > 4
2183 && INT_REGNO_P (regno
))
2187 && FP_REGNO_P (regno
)
2188 && GET_MODE_SIZE (mode
) > 8
2189 && !FLOAT128_2REG_P (mode
))
2195 /* Print interesting facts about registers. */
2197 rs6000_debug_reg_print (int first_regno
, int last_regno
, const char *reg_name
)
2201 for (r
= first_regno
; r
<= last_regno
; ++r
)
2203 const char *comma
= "";
2206 if (first_regno
== last_regno
)
2207 fprintf (stderr
, "%s:\t", reg_name
);
2209 fprintf (stderr
, "%s%d:\t", reg_name
, r
- first_regno
);
2212 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2213 if (rs6000_hard_regno_mode_ok_p
[m
][r
] && rs6000_hard_regno_nregs
[m
][r
])
2217 fprintf (stderr
, ",\n\t");
2222 if (rs6000_hard_regno_nregs
[m
][r
] > 1)
2223 len
+= fprintf (stderr
, "%s%s/%d", comma
, GET_MODE_NAME (m
),
2224 rs6000_hard_regno_nregs
[m
][r
]);
2226 len
+= fprintf (stderr
, "%s%s", comma
, GET_MODE_NAME (m
));
2231 if (call_used_regs
[r
])
2235 fprintf (stderr
, ",\n\t");
2240 len
+= fprintf (stderr
, "%s%s", comma
, "call-used");
2248 fprintf (stderr
, ",\n\t");
2253 len
+= fprintf (stderr
, "%s%s", comma
, "fixed");
2259 fprintf (stderr
, ",\n\t");
2263 len
+= fprintf (stderr
, "%sreg-class = %s", comma
,
2264 reg_class_names
[(int)rs6000_regno_regclass
[r
]]);
2269 fprintf (stderr
, ",\n\t");
2273 fprintf (stderr
, "%sregno = %d\n", comma
, r
);
2278 rs6000_debug_vector_unit (enum rs6000_vector v
)
2284 case VECTOR_NONE
: ret
= "none"; break;
2285 case VECTOR_ALTIVEC
: ret
= "altivec"; break;
2286 case VECTOR_VSX
: ret
= "vsx"; break;
2287 case VECTOR_P8_VECTOR
: ret
= "p8_vector"; break;
2288 case VECTOR_PAIRED
: ret
= "paired"; break;
2289 case VECTOR_SPE
: ret
= "spe"; break;
2290 case VECTOR_OTHER
: ret
= "other"; break;
2291 default: ret
= "unknown"; break;
2297 /* Inner function printing just the address mask for a particular reload
2299 DEBUG_FUNCTION
char *
2300 rs6000_debug_addr_mask (addr_mask_type mask
, bool keep_spaces
)
2305 if ((mask
& RELOAD_REG_VALID
) != 0)
2307 else if (keep_spaces
)
2310 if ((mask
& RELOAD_REG_MULTIPLE
) != 0)
2312 else if (keep_spaces
)
2315 if ((mask
& RELOAD_REG_INDEXED
) != 0)
2317 else if (keep_spaces
)
2320 if ((mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
2322 else if ((mask
& RELOAD_REG_OFFSET
) != 0)
2324 else if (keep_spaces
)
2327 if ((mask
& RELOAD_REG_PRE_INCDEC
) != 0)
2329 else if (keep_spaces
)
2332 if ((mask
& RELOAD_REG_PRE_MODIFY
) != 0)
2334 else if (keep_spaces
)
2337 if ((mask
& RELOAD_REG_AND_M16
) != 0)
2339 else if (keep_spaces
)
2347 /* Print the address masks in a human readble fashion. */
2349 rs6000_debug_print_mode (ssize_t m
)
2355 fprintf (stderr
, "Mode: %-5s", GET_MODE_NAME (m
));
2356 for (rc
= 0; rc
< N_RELOAD_REG
; rc
++)
2357 fprintf (stderr
, " %s: %s", reload_reg_map
[rc
].name
,
2358 rs6000_debug_addr_mask (reg_addr
[m
].addr_mask
[rc
], true));
2360 if ((reg_addr
[m
].reload_store
!= CODE_FOR_nothing
)
2361 || (reg_addr
[m
].reload_load
!= CODE_FOR_nothing
))
2362 fprintf (stderr
, " Reload=%c%c",
2363 (reg_addr
[m
].reload_store
!= CODE_FOR_nothing
) ? 's' : '*',
2364 (reg_addr
[m
].reload_load
!= CODE_FOR_nothing
) ? 'l' : '*');
2366 spaces
+= sizeof (" Reload=sl") - 1;
2368 if (reg_addr
[m
].scalar_in_vmx_p
)
2370 fprintf (stderr
, "%*s Upper=y", spaces
, "");
2374 spaces
+= sizeof (" Upper=y") - 1;
2376 fuse_extra_p
= ((reg_addr
[m
].fusion_gpr_ld
!= CODE_FOR_nothing
)
2377 || reg_addr
[m
].fused_toc
);
2380 for (rc
= 0; rc
< N_RELOAD_REG
; rc
++)
2382 if (rc
!= RELOAD_REG_ANY
)
2384 if (reg_addr
[m
].fusion_addi_ld
[rc
] != CODE_FOR_nothing
2385 || reg_addr
[m
].fusion_addi_ld
[rc
] != CODE_FOR_nothing
2386 || reg_addr
[m
].fusion_addi_st
[rc
] != CODE_FOR_nothing
2387 || reg_addr
[m
].fusion_addis_ld
[rc
] != CODE_FOR_nothing
2388 || reg_addr
[m
].fusion_addis_st
[rc
] != CODE_FOR_nothing
)
2390 fuse_extra_p
= true;
2399 fprintf (stderr
, "%*s Fuse:", spaces
, "");
2402 for (rc
= 0; rc
< N_RELOAD_REG
; rc
++)
2404 if (rc
!= RELOAD_REG_ANY
)
2408 if (reg_addr
[m
].fusion_addis_ld
[rc
] != CODE_FOR_nothing
)
2410 else if (reg_addr
[m
].fusion_addi_ld
[rc
] != CODE_FOR_nothing
)
2415 if (reg_addr
[m
].fusion_addis_st
[rc
] != CODE_FOR_nothing
)
2417 else if (reg_addr
[m
].fusion_addi_st
[rc
] != CODE_FOR_nothing
)
2422 if (load
== '-' && store
== '-')
2426 fprintf (stderr
, "%*s%c=%c%c", (spaces
+ 1), "",
2427 reload_reg_map
[rc
].name
[0], load
, store
);
2433 if (reg_addr
[m
].fusion_gpr_ld
!= CODE_FOR_nothing
)
2435 fprintf (stderr
, "%*sP8gpr", (spaces
+ 1), "");
2439 spaces
+= sizeof (" P8gpr") - 1;
2441 if (reg_addr
[m
].fused_toc
)
2443 fprintf (stderr
, "%*sToc", (spaces
+ 1), "");
2447 spaces
+= sizeof (" Toc") - 1;
2450 spaces
+= sizeof (" Fuse: G=ls F=ls v=ls P8gpr Toc") - 1;
2452 if (rs6000_vector_unit
[m
] != VECTOR_NONE
2453 || rs6000_vector_mem
[m
] != VECTOR_NONE
)
2455 fprintf (stderr
, "%*s vector: arith=%-10s mem=%s",
2457 rs6000_debug_vector_unit (rs6000_vector_unit
[m
]),
2458 rs6000_debug_vector_unit (rs6000_vector_mem
[m
]));
2461 fputs ("\n", stderr
);
2464 #define DEBUG_FMT_ID "%-32s= "
2465 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2466 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2467 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2469 /* Print various interesting information with -mdebug=reg. */
2471 rs6000_debug_reg_global (void)
2473 static const char *const tf
[2] = { "false", "true" };
2474 const char *nl
= (const char *)0;
2477 char costly_num
[20];
2479 char flags_buffer
[40];
2480 const char *costly_str
;
2481 const char *nop_str
;
2482 const char *trace_str
;
2483 const char *abi_str
;
2484 const char *cmodel_str
;
2485 struct cl_target_option cl_opts
;
2487 /* Modes we want tieable information on. */
2488 static const machine_mode print_tieable_modes
[] = {
2526 /* Virtual regs we are interested in. */
2527 const static struct {
2528 int regno
; /* register number. */
2529 const char *name
; /* register name. */
2530 } virtual_regs
[] = {
2531 { STACK_POINTER_REGNUM
, "stack pointer:" },
2532 { TOC_REGNUM
, "toc: " },
2533 { STATIC_CHAIN_REGNUM
, "static chain: " },
2534 { RS6000_PIC_OFFSET_TABLE_REGNUM
, "pic offset: " },
2535 { HARD_FRAME_POINTER_REGNUM
, "hard frame: " },
2536 { ARG_POINTER_REGNUM
, "arg pointer: " },
2537 { FRAME_POINTER_REGNUM
, "frame pointer:" },
2538 { FIRST_PSEUDO_REGISTER
, "first pseudo: " },
2539 { FIRST_VIRTUAL_REGISTER
, "first virtual:" },
2540 { VIRTUAL_INCOMING_ARGS_REGNUM
, "incoming_args:" },
2541 { VIRTUAL_STACK_VARS_REGNUM
, "stack_vars: " },
2542 { VIRTUAL_STACK_DYNAMIC_REGNUM
, "stack_dynamic:" },
2543 { VIRTUAL_OUTGOING_ARGS_REGNUM
, "outgoing_args:" },
2544 { VIRTUAL_CFA_REGNUM
, "cfa (frame): " },
2545 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM
, "stack boundry:" },
2546 { LAST_VIRTUAL_REGISTER
, "last virtual: " },
2549 fputs ("\nHard register information:\n", stderr
);
2550 rs6000_debug_reg_print (FIRST_GPR_REGNO
, LAST_GPR_REGNO
, "gr");
2551 rs6000_debug_reg_print (FIRST_FPR_REGNO
, LAST_FPR_REGNO
, "fp");
2552 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO
,
2555 rs6000_debug_reg_print (LR_REGNO
, LR_REGNO
, "lr");
2556 rs6000_debug_reg_print (CTR_REGNO
, CTR_REGNO
, "ctr");
2557 rs6000_debug_reg_print (CR0_REGNO
, CR7_REGNO
, "cr");
2558 rs6000_debug_reg_print (CA_REGNO
, CA_REGNO
, "ca");
2559 rs6000_debug_reg_print (VRSAVE_REGNO
, VRSAVE_REGNO
, "vrsave");
2560 rs6000_debug_reg_print (VSCR_REGNO
, VSCR_REGNO
, "vscr");
2561 rs6000_debug_reg_print (SPE_ACC_REGNO
, SPE_ACC_REGNO
, "spe_a");
2562 rs6000_debug_reg_print (SPEFSCR_REGNO
, SPEFSCR_REGNO
, "spe_f");
2564 fputs ("\nVirtual/stack/frame registers:\n", stderr
);
2565 for (v
= 0; v
< ARRAY_SIZE (virtual_regs
); v
++)
2566 fprintf (stderr
, "%s regno = %3d\n", virtual_regs
[v
].name
, virtual_regs
[v
].regno
);
2570 "d reg_class = %s\n"
2571 "f reg_class = %s\n"
2572 "v reg_class = %s\n"
2573 "wa reg_class = %s\n"
2574 "wb reg_class = %s\n"
2575 "wd reg_class = %s\n"
2576 "we reg_class = %s\n"
2577 "wf reg_class = %s\n"
2578 "wg reg_class = %s\n"
2579 "wh reg_class = %s\n"
2580 "wi reg_class = %s\n"
2581 "wj reg_class = %s\n"
2582 "wk reg_class = %s\n"
2583 "wl reg_class = %s\n"
2584 "wm reg_class = %s\n"
2585 "wo reg_class = %s\n"
2586 "wp reg_class = %s\n"
2587 "wq reg_class = %s\n"
2588 "wr reg_class = %s\n"
2589 "ws reg_class = %s\n"
2590 "wt reg_class = %s\n"
2591 "wu reg_class = %s\n"
2592 "wv reg_class = %s\n"
2593 "ww reg_class = %s\n"
2594 "wx reg_class = %s\n"
2595 "wy reg_class = %s\n"
2596 "wz reg_class = %s\n"
2597 "wA reg_class = %s\n"
2598 "wH reg_class = %s\n"
2599 "wI reg_class = %s\n"
2600 "wJ reg_class = %s\n"
2601 "wK reg_class = %s\n"
2603 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_d
]],
2604 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_f
]],
2605 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_v
]],
2606 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wa
]],
2607 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wb
]],
2608 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wd
]],
2609 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_we
]],
2610 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wf
]],
2611 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wg
]],
2612 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wh
]],
2613 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wi
]],
2614 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wj
]],
2615 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wk
]],
2616 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wl
]],
2617 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wm
]],
2618 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wo
]],
2619 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wp
]],
2620 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wq
]],
2621 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wr
]],
2622 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_ws
]],
2623 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wt
]],
2624 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wu
]],
2625 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wv
]],
2626 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_ww
]],
2627 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wx
]],
2628 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wy
]],
2629 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wz
]],
2630 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wA
]],
2631 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wH
]],
2632 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wI
]],
2633 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wJ
]],
2634 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wK
]]);
2637 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2638 rs6000_debug_print_mode (m
);
2640 fputs ("\n", stderr
);
2642 for (m1
= 0; m1
< ARRAY_SIZE (print_tieable_modes
); m1
++)
2644 machine_mode mode1
= print_tieable_modes
[m1
];
2645 bool first_time
= true;
2647 nl
= (const char *)0;
2648 for (m2
= 0; m2
< ARRAY_SIZE (print_tieable_modes
); m2
++)
2650 machine_mode mode2
= print_tieable_modes
[m2
];
2651 if (mode1
!= mode2
&& MODES_TIEABLE_P (mode1
, mode2
))
2655 fprintf (stderr
, "Tieable modes %s:", GET_MODE_NAME (mode1
));
2660 fprintf (stderr
, " %s", GET_MODE_NAME (mode2
));
2665 fputs ("\n", stderr
);
2671 if (rs6000_recip_control
)
2673 fprintf (stderr
, "\nReciprocal mask = 0x%x\n", rs6000_recip_control
);
2675 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2676 if (rs6000_recip_bits
[m
])
2679 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2681 (RS6000_RECIP_AUTO_RE_P (m
)
2683 : (RS6000_RECIP_HAVE_RE_P (m
) ? "have" : "none")),
2684 (RS6000_RECIP_AUTO_RSQRTE_P (m
)
2686 : (RS6000_RECIP_HAVE_RSQRTE_P (m
) ? "have" : "none")));
2689 fputs ("\n", stderr
);
2692 if (rs6000_cpu_index
>= 0)
2694 const char *name
= processor_target_table
[rs6000_cpu_index
].name
;
2696 = processor_target_table
[rs6000_cpu_index
].target_enable
;
2698 sprintf (flags_buffer
, "-mcpu=%s flags", name
);
2699 rs6000_print_isa_options (stderr
, 0, flags_buffer
, flags
);
2702 fprintf (stderr
, DEBUG_FMT_S
, "cpu", "<none>");
2704 if (rs6000_tune_index
>= 0)
2706 const char *name
= processor_target_table
[rs6000_tune_index
].name
;
2708 = processor_target_table
[rs6000_tune_index
].target_enable
;
2710 sprintf (flags_buffer
, "-mtune=%s flags", name
);
2711 rs6000_print_isa_options (stderr
, 0, flags_buffer
, flags
);
2714 fprintf (stderr
, DEBUG_FMT_S
, "tune", "<none>");
2716 cl_target_option_save (&cl_opts
, &global_options
);
2717 rs6000_print_isa_options (stderr
, 0, "rs6000_isa_flags",
2720 rs6000_print_isa_options (stderr
, 0, "rs6000_isa_flags_explicit",
2721 rs6000_isa_flags_explicit
);
2723 rs6000_print_builtin_options (stderr
, 0, "rs6000_builtin_mask",
2724 rs6000_builtin_mask
);
2726 rs6000_print_isa_options (stderr
, 0, "TARGET_DEFAULT", TARGET_DEFAULT
);
2728 fprintf (stderr
, DEBUG_FMT_S
, "--with-cpu default",
2729 OPTION_TARGET_CPU_DEFAULT
? OPTION_TARGET_CPU_DEFAULT
: "<none>");
2731 switch (rs6000_sched_costly_dep
)
2733 case max_dep_latency
:
2734 costly_str
= "max_dep_latency";
2738 costly_str
= "no_dep_costly";
2741 case all_deps_costly
:
2742 costly_str
= "all_deps_costly";
2745 case true_store_to_load_dep_costly
:
2746 costly_str
= "true_store_to_load_dep_costly";
2749 case store_to_load_dep_costly
:
2750 costly_str
= "store_to_load_dep_costly";
2754 costly_str
= costly_num
;
2755 sprintf (costly_num
, "%d", (int)rs6000_sched_costly_dep
);
2759 fprintf (stderr
, DEBUG_FMT_S
, "sched_costly_dep", costly_str
);
2761 switch (rs6000_sched_insert_nops
)
2763 case sched_finish_regroup_exact
:
2764 nop_str
= "sched_finish_regroup_exact";
2767 case sched_finish_pad_groups
:
2768 nop_str
= "sched_finish_pad_groups";
2771 case sched_finish_none
:
2772 nop_str
= "sched_finish_none";
2777 sprintf (nop_num
, "%d", (int)rs6000_sched_insert_nops
);
2781 fprintf (stderr
, DEBUG_FMT_S
, "sched_insert_nops", nop_str
);
2783 switch (rs6000_sdata
)
2790 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "data");
2794 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "sysv");
2798 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "eabi");
2803 switch (rs6000_traceback
)
2805 case traceback_default
: trace_str
= "default"; break;
2806 case traceback_none
: trace_str
= "none"; break;
2807 case traceback_part
: trace_str
= "part"; break;
2808 case traceback_full
: trace_str
= "full"; break;
2809 default: trace_str
= "unknown"; break;
2812 fprintf (stderr
, DEBUG_FMT_S
, "traceback", trace_str
);
2814 switch (rs6000_current_cmodel
)
2816 case CMODEL_SMALL
: cmodel_str
= "small"; break;
2817 case CMODEL_MEDIUM
: cmodel_str
= "medium"; break;
2818 case CMODEL_LARGE
: cmodel_str
= "large"; break;
2819 default: cmodel_str
= "unknown"; break;
2822 fprintf (stderr
, DEBUG_FMT_S
, "cmodel", cmodel_str
);
2824 switch (rs6000_current_abi
)
2826 case ABI_NONE
: abi_str
= "none"; break;
2827 case ABI_AIX
: abi_str
= "aix"; break;
2828 case ABI_ELFv2
: abi_str
= "ELFv2"; break;
2829 case ABI_V4
: abi_str
= "V4"; break;
2830 case ABI_DARWIN
: abi_str
= "darwin"; break;
2831 default: abi_str
= "unknown"; break;
2834 fprintf (stderr
, DEBUG_FMT_S
, "abi", abi_str
);
2836 if (rs6000_altivec_abi
)
2837 fprintf (stderr
, DEBUG_FMT_S
, "altivec_abi", "true");
2840 fprintf (stderr
, DEBUG_FMT_S
, "spe_abi", "true");
2842 if (rs6000_darwin64_abi
)
2843 fprintf (stderr
, DEBUG_FMT_S
, "darwin64_abi", "true");
2845 if (rs6000_float_gprs
)
2846 fprintf (stderr
, DEBUG_FMT_S
, "float_gprs", "true");
2848 fprintf (stderr
, DEBUG_FMT_S
, "fprs",
2849 (TARGET_FPRS
? "true" : "false"));
2851 fprintf (stderr
, DEBUG_FMT_S
, "single_float",
2852 (TARGET_SINGLE_FLOAT
? "true" : "false"));
2854 fprintf (stderr
, DEBUG_FMT_S
, "double_float",
2855 (TARGET_DOUBLE_FLOAT
? "true" : "false"));
2857 fprintf (stderr
, DEBUG_FMT_S
, "soft_float",
2858 (TARGET_SOFT_FLOAT
? "true" : "false"));
2860 fprintf (stderr
, DEBUG_FMT_S
, "e500_single",
2861 (TARGET_E500_SINGLE
? "true" : "false"));
2863 fprintf (stderr
, DEBUG_FMT_S
, "e500_double",
2864 (TARGET_E500_DOUBLE
? "true" : "false"));
2866 if (TARGET_LINK_STACK
)
2867 fprintf (stderr
, DEBUG_FMT_S
, "link_stack", "true");
2869 fprintf (stderr
, DEBUG_FMT_S
, "lra", TARGET_LRA
? "true" : "false");
2871 if (TARGET_P8_FUSION
)
2875 strcpy (options
, (TARGET_P9_FUSION
) ? "power9" : "power8");
2876 if (TARGET_TOC_FUSION
)
2877 strcat (options
, ", toc");
2879 if (TARGET_P8_FUSION_SIGN
)
2880 strcat (options
, ", sign");
2882 fprintf (stderr
, DEBUG_FMT_S
, "fusion", options
);
2885 fprintf (stderr
, DEBUG_FMT_S
, "plt-format",
2886 TARGET_SECURE_PLT
? "secure" : "bss");
2887 fprintf (stderr
, DEBUG_FMT_S
, "struct-return",
2888 aix_struct_return
? "aix" : "sysv");
2889 fprintf (stderr
, DEBUG_FMT_S
, "always_hint", tf
[!!rs6000_always_hint
]);
2890 fprintf (stderr
, DEBUG_FMT_S
, "sched_groups", tf
[!!rs6000_sched_groups
]);
2891 fprintf (stderr
, DEBUG_FMT_S
, "align_branch",
2892 tf
[!!rs6000_align_branch_targets
]);
2893 fprintf (stderr
, DEBUG_FMT_D
, "tls_size", rs6000_tls_size
);
2894 fprintf (stderr
, DEBUG_FMT_D
, "long_double_size",
2895 rs6000_long_double_type_size
);
2896 fprintf (stderr
, DEBUG_FMT_D
, "sched_restricted_insns_priority",
2897 (int)rs6000_sched_restricted_insns_priority
);
2898 fprintf (stderr
, DEBUG_FMT_D
, "Number of standard builtins",
2900 fprintf (stderr
, DEBUG_FMT_D
, "Number of rs6000 builtins",
2901 (int)RS6000_BUILTIN_COUNT
);
2903 fprintf (stderr
, DEBUG_FMT_D
, "Enable float128 on VSX",
2904 (int)TARGET_FLOAT128_ENABLE_TYPE
);
2907 fprintf (stderr
, DEBUG_FMT_D
, "VSX easy 64-bit scalar element",
2908 (int)VECTOR_ELEMENT_SCALAR_64BIT
);
2910 if (TARGET_DIRECT_MOVE_128
)
2911 fprintf (stderr
, DEBUG_FMT_D
, "VSX easy 64-bit mfvsrld element",
2912 (int)VECTOR_ELEMENT_MFVSRLD_64BIT
);
2916 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2917 legitimate address support to figure out the appropriate addressing to
2921 rs6000_setup_reg_addr_masks (void)
2923 ssize_t rc
, reg
, m
, nregs
;
2924 addr_mask_type any_addr_mask
, addr_mask
;
2926 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2928 machine_mode m2
= (machine_mode
) m
;
2929 bool complex_p
= false;
2930 bool small_int_p
= (m2
== QImode
|| m2
== HImode
|| m2
== SImode
);
2933 if (COMPLEX_MODE_P (m2
))
2936 m2
= GET_MODE_INNER (m2
);
2939 msize
= GET_MODE_SIZE (m2
);
2941 /* SDmode is special in that we want to access it only via REG+REG
2942 addressing on power7 and above, since we want to use the LFIWZX and
2943 STFIWZX instructions to load it. */
2944 bool indexed_only_p
= (m
== SDmode
&& TARGET_NO_SDMODE_STACK
);
2947 for (rc
= FIRST_RELOAD_REG_CLASS
; rc
<= LAST_RELOAD_REG_CLASS
; rc
++)
2950 reg
= reload_reg_map
[rc
].reg
;
2952 /* Can mode values go in the GPR/FPR/Altivec registers? */
2953 if (reg
>= 0 && rs6000_hard_regno_mode_ok_p
[m
][reg
])
2955 bool small_int_vsx_p
= (small_int_p
2956 && (rc
== RELOAD_REG_FPR
2957 || rc
== RELOAD_REG_VMX
));
2959 nregs
= rs6000_hard_regno_nregs
[m
][reg
];
2960 addr_mask
|= RELOAD_REG_VALID
;
2962 /* Indicate if the mode takes more than 1 physical register. If
2963 it takes a single register, indicate it can do REG+REG
2964 addressing. Small integers in VSX registers can only do
2965 REG+REG addressing. */
2966 if (small_int_vsx_p
)
2967 addr_mask
|= RELOAD_REG_INDEXED
;
2968 else if (nregs
> 1 || m
== BLKmode
|| complex_p
)
2969 addr_mask
|= RELOAD_REG_MULTIPLE
;
2971 addr_mask
|= RELOAD_REG_INDEXED
;
2973 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2974 addressing. Restrict addressing on SPE for 64-bit types
2975 because of the SUBREG hackery used to address 64-bit floats in
2976 '32-bit' GPRs. If we allow scalars into Altivec registers,
2977 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY. */
2980 && (rc
== RELOAD_REG_GPR
|| rc
== RELOAD_REG_FPR
)
2982 && !VECTOR_MODE_P (m2
)
2983 && !FLOAT128_VECTOR_P (m2
)
2986 && (m2
!= DFmode
|| !TARGET_UPPER_REGS_DF
)
2987 && (m2
!= SFmode
|| !TARGET_UPPER_REGS_SF
)
2988 && !(TARGET_E500_DOUBLE
&& msize
== 8))
2990 addr_mask
|= RELOAD_REG_PRE_INCDEC
;
2992 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2993 we don't allow PRE_MODIFY for some multi-register
2998 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
3002 if (TARGET_POWERPC64
)
3003 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
3009 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
3015 /* GPR and FPR registers can do REG+OFFSET addressing, except
3016 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
3017 for 64-bit scalars and 32-bit SFmode to altivec registers. */
3018 if ((addr_mask
!= 0) && !indexed_only_p
3020 && (rc
== RELOAD_REG_GPR
3021 || ((msize
== 8 || m2
== SFmode
)
3022 && (rc
== RELOAD_REG_FPR
3023 || (rc
== RELOAD_REG_VMX
3024 && TARGET_P9_DFORM_SCALAR
)))))
3025 addr_mask
|= RELOAD_REG_OFFSET
;
3027 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
3028 instructions are enabled. The offset for 128-bit VSX registers is
3029 only 12-bits. While GPRs can handle the full offset range, VSX
3030 registers can only handle the restricted range. */
3031 else if ((addr_mask
!= 0) && !indexed_only_p
3032 && msize
== 16 && TARGET_P9_DFORM_VECTOR
3033 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2
)
3034 || (m2
== TImode
&& TARGET_VSX_TIMODE
)))
3036 addr_mask
|= RELOAD_REG_OFFSET
;
3037 if (rc
== RELOAD_REG_FPR
|| rc
== RELOAD_REG_VMX
)
3038 addr_mask
|= RELOAD_REG_QUAD_OFFSET
;
3041 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
3042 addressing on 128-bit types. */
3043 if (rc
== RELOAD_REG_VMX
&& msize
== 16
3044 && (addr_mask
& RELOAD_REG_VALID
) != 0)
3045 addr_mask
|= RELOAD_REG_AND_M16
;
3047 reg_addr
[m
].addr_mask
[rc
] = addr_mask
;
3048 any_addr_mask
|= addr_mask
;
3051 reg_addr
[m
].addr_mask
[RELOAD_REG_ANY
] = any_addr_mask
;
3056 /* Initialize the various global tables that are based on register size. */
3058 rs6000_init_hard_regno_mode_ok (bool global_init_p
)
3064 /* Precalculate REGNO_REG_CLASS. */
3065 rs6000_regno_regclass
[0] = GENERAL_REGS
;
3066 for (r
= 1; r
< 32; ++r
)
3067 rs6000_regno_regclass
[r
] = BASE_REGS
;
3069 for (r
= 32; r
< 64; ++r
)
3070 rs6000_regno_regclass
[r
] = FLOAT_REGS
;
3072 for (r
= 64; r
< FIRST_PSEUDO_REGISTER
; ++r
)
3073 rs6000_regno_regclass
[r
] = NO_REGS
;
3075 for (r
= FIRST_ALTIVEC_REGNO
; r
<= LAST_ALTIVEC_REGNO
; ++r
)
3076 rs6000_regno_regclass
[r
] = ALTIVEC_REGS
;
3078 rs6000_regno_regclass
[CR0_REGNO
] = CR0_REGS
;
3079 for (r
= CR1_REGNO
; r
<= CR7_REGNO
; ++r
)
3080 rs6000_regno_regclass
[r
] = CR_REGS
;
3082 rs6000_regno_regclass
[LR_REGNO
] = LINK_REGS
;
3083 rs6000_regno_regclass
[CTR_REGNO
] = CTR_REGS
;
3084 rs6000_regno_regclass
[CA_REGNO
] = NO_REGS
;
3085 rs6000_regno_regclass
[VRSAVE_REGNO
] = VRSAVE_REGS
;
3086 rs6000_regno_regclass
[VSCR_REGNO
] = VRSAVE_REGS
;
3087 rs6000_regno_regclass
[SPE_ACC_REGNO
] = SPE_ACC_REGS
;
3088 rs6000_regno_regclass
[SPEFSCR_REGNO
] = SPEFSCR_REGS
;
3089 rs6000_regno_regclass
[TFHAR_REGNO
] = SPR_REGS
;
3090 rs6000_regno_regclass
[TFIAR_REGNO
] = SPR_REGS
;
3091 rs6000_regno_regclass
[TEXASR_REGNO
] = SPR_REGS
;
3092 rs6000_regno_regclass
[ARG_POINTER_REGNUM
] = BASE_REGS
;
3093 rs6000_regno_regclass
[FRAME_POINTER_REGNUM
] = BASE_REGS
;
3095 /* Precalculate register class to simpler reload register class. We don't
3096 need all of the register classes that are combinations of different
3097 classes, just the simple ones that have constraint letters. */
3098 for (c
= 0; c
< N_REG_CLASSES
; c
++)
3099 reg_class_to_reg_type
[c
] = NO_REG_TYPE
;
3101 reg_class_to_reg_type
[(int)GENERAL_REGS
] = GPR_REG_TYPE
;
3102 reg_class_to_reg_type
[(int)BASE_REGS
] = GPR_REG_TYPE
;
3103 reg_class_to_reg_type
[(int)VSX_REGS
] = VSX_REG_TYPE
;
3104 reg_class_to_reg_type
[(int)VRSAVE_REGS
] = SPR_REG_TYPE
;
3105 reg_class_to_reg_type
[(int)VSCR_REGS
] = SPR_REG_TYPE
;
3106 reg_class_to_reg_type
[(int)LINK_REGS
] = SPR_REG_TYPE
;
3107 reg_class_to_reg_type
[(int)CTR_REGS
] = SPR_REG_TYPE
;
3108 reg_class_to_reg_type
[(int)LINK_OR_CTR_REGS
] = SPR_REG_TYPE
;
3109 reg_class_to_reg_type
[(int)CR_REGS
] = CR_REG_TYPE
;
3110 reg_class_to_reg_type
[(int)CR0_REGS
] = CR_REG_TYPE
;
3111 reg_class_to_reg_type
[(int)SPE_ACC_REGS
] = SPE_ACC_TYPE
;
3112 reg_class_to_reg_type
[(int)SPEFSCR_REGS
] = SPEFSCR_REG_TYPE
;
3116 reg_class_to_reg_type
[(int)FLOAT_REGS
] = VSX_REG_TYPE
;
3117 reg_class_to_reg_type
[(int)ALTIVEC_REGS
] = VSX_REG_TYPE
;
3121 reg_class_to_reg_type
[(int)FLOAT_REGS
] = FPR_REG_TYPE
;
3122 reg_class_to_reg_type
[(int)ALTIVEC_REGS
] = ALTIVEC_REG_TYPE
;
3125 /* Precalculate the valid memory formats as well as the vector information,
3126 this must be set up before the rs6000_hard_regno_nregs_internal calls
3128 gcc_assert ((int)VECTOR_NONE
== 0);
3129 memset ((void *) &rs6000_vector_unit
[0], '\0', sizeof (rs6000_vector_unit
));
3130 memset ((void *) &rs6000_vector_mem
[0], '\0', sizeof (rs6000_vector_unit
));
3132 gcc_assert ((int)CODE_FOR_nothing
== 0);
3133 memset ((void *) ®_addr
[0], '\0', sizeof (reg_addr
));
3135 gcc_assert ((int)NO_REGS
== 0);
3136 memset ((void *) &rs6000_constraints
[0], '\0', sizeof (rs6000_constraints
));
3138 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
3139 believes it can use native alignment or still uses 128-bit alignment. */
3140 if (TARGET_VSX
&& !TARGET_VSX_ALIGN_128
)
3151 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
3152 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
3153 if (TARGET_FLOAT128_TYPE
)
3155 rs6000_vector_mem
[KFmode
] = VECTOR_VSX
;
3156 rs6000_vector_align
[KFmode
] = 128;
3158 if (FLOAT128_IEEE_P (TFmode
))
3160 rs6000_vector_mem
[TFmode
] = VECTOR_VSX
;
3161 rs6000_vector_align
[TFmode
] = 128;
3165 /* V2DF mode, VSX only. */
3168 rs6000_vector_unit
[V2DFmode
] = VECTOR_VSX
;
3169 rs6000_vector_mem
[V2DFmode
] = VECTOR_VSX
;
3170 rs6000_vector_align
[V2DFmode
] = align64
;
3173 /* V4SF mode, either VSX or Altivec. */
3176 rs6000_vector_unit
[V4SFmode
] = VECTOR_VSX
;
3177 rs6000_vector_mem
[V4SFmode
] = VECTOR_VSX
;
3178 rs6000_vector_align
[V4SFmode
] = align32
;
3180 else if (TARGET_ALTIVEC
)
3182 rs6000_vector_unit
[V4SFmode
] = VECTOR_ALTIVEC
;
3183 rs6000_vector_mem
[V4SFmode
] = VECTOR_ALTIVEC
;
3184 rs6000_vector_align
[V4SFmode
] = align32
;
3187 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
3191 rs6000_vector_unit
[V4SImode
] = VECTOR_ALTIVEC
;
3192 rs6000_vector_unit
[V8HImode
] = VECTOR_ALTIVEC
;
3193 rs6000_vector_unit
[V16QImode
] = VECTOR_ALTIVEC
;
3194 rs6000_vector_align
[V4SImode
] = align32
;
3195 rs6000_vector_align
[V8HImode
] = align32
;
3196 rs6000_vector_align
[V16QImode
] = align32
;
3200 rs6000_vector_mem
[V4SImode
] = VECTOR_VSX
;
3201 rs6000_vector_mem
[V8HImode
] = VECTOR_VSX
;
3202 rs6000_vector_mem
[V16QImode
] = VECTOR_VSX
;
3206 rs6000_vector_mem
[V4SImode
] = VECTOR_ALTIVEC
;
3207 rs6000_vector_mem
[V8HImode
] = VECTOR_ALTIVEC
;
3208 rs6000_vector_mem
[V16QImode
] = VECTOR_ALTIVEC
;
3212 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
3213 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
3216 rs6000_vector_mem
[V2DImode
] = VECTOR_VSX
;
3217 rs6000_vector_unit
[V2DImode
]
3218 = (TARGET_P8_VECTOR
) ? VECTOR_P8_VECTOR
: VECTOR_NONE
;
3219 rs6000_vector_align
[V2DImode
] = align64
;
3221 rs6000_vector_mem
[V1TImode
] = VECTOR_VSX
;
3222 rs6000_vector_unit
[V1TImode
]
3223 = (TARGET_P8_VECTOR
) ? VECTOR_P8_VECTOR
: VECTOR_NONE
;
3224 rs6000_vector_align
[V1TImode
] = 128;
3227 /* DFmode, see if we want to use the VSX unit. Memory is handled
3228 differently, so don't set rs6000_vector_mem. */
3229 if (TARGET_VSX
&& TARGET_VSX_SCALAR_DOUBLE
)
3231 rs6000_vector_unit
[DFmode
] = VECTOR_VSX
;
3232 rs6000_vector_align
[DFmode
] = 64;
3235 /* SFmode, see if we want to use the VSX unit. */
3236 if (TARGET_P8_VECTOR
&& TARGET_VSX_SCALAR_FLOAT
)
3238 rs6000_vector_unit
[SFmode
] = VECTOR_VSX
;
3239 rs6000_vector_align
[SFmode
] = 32;
3242 /* Allow TImode in VSX register and set the VSX memory macros. */
3243 if (TARGET_VSX
&& TARGET_VSX_TIMODE
)
3245 rs6000_vector_mem
[TImode
] = VECTOR_VSX
;
3246 rs6000_vector_align
[TImode
] = align64
;
3249 /* TODO add SPE and paired floating point vector support. */
3251 /* Register class constraints for the constraints that depend on compile
3252 switches. When the VSX code was added, different constraints were added
3253 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
3254 of the VSX registers are used. The register classes for scalar floating
3255 point types is set, based on whether we allow that type into the upper
3256 (Altivec) registers. GCC has register classes to target the Altivec
3257 registers for load/store operations, to select using a VSX memory
3258 operation instead of the traditional floating point operation. The
3261 d - Register class to use with traditional DFmode instructions.
3262 f - Register class to use with traditional SFmode instructions.
3263 v - Altivec register.
3264 wa - Any VSX register.
3265 wc - Reserved to represent individual CR bits (used in LLVM).
3266 wd - Preferred register class for V2DFmode.
3267 wf - Preferred register class for V4SFmode.
3268 wg - Float register for power6x move insns.
3269 wh - FP register for direct move instructions.
3270 wi - FP or VSX register to hold 64-bit integers for VSX insns.
3271 wj - FP or VSX register to hold 64-bit integers for direct moves.
3272 wk - FP or VSX register to hold 64-bit doubles for direct moves.
3273 wl - Float register if we can do 32-bit signed int loads.
3274 wm - VSX register for ISA 2.07 direct move operations.
3275 wn - always NO_REGS.
3276 wr - GPR if 64-bit mode is permitted.
3277 ws - Register class to do ISA 2.06 DF operations.
3278 wt - VSX register for TImode in VSX registers.
3279 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
3280 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
3281 ww - Register class to do SF conversions in with VSX operations.
3282 wx - Float register if we can do 32-bit int stores.
3283 wy - Register class to do ISA 2.07 SF operations.
3284 wz - Float register if we can do 32-bit unsigned int loads.
3285 wH - Altivec register if SImode is allowed in VSX registers.
3286 wI - VSX register if SImode is allowed in VSX registers.
3287 wJ - VSX register if QImode/HImode are allowed in VSX registers.
3288 wK - Altivec register if QImode/HImode are allowed in VSX registers. */
3290 if (TARGET_HARD_FLOAT
&& TARGET_FPRS
)
3291 rs6000_constraints
[RS6000_CONSTRAINT_f
] = FLOAT_REGS
; /* SFmode */
3293 if (TARGET_HARD_FLOAT
&& TARGET_FPRS
&& TARGET_DOUBLE_FLOAT
)
3294 rs6000_constraints
[RS6000_CONSTRAINT_d
] = FLOAT_REGS
; /* DFmode */
3298 rs6000_constraints
[RS6000_CONSTRAINT_wa
] = VSX_REGS
;
3299 rs6000_constraints
[RS6000_CONSTRAINT_wd
] = VSX_REGS
; /* V2DFmode */
3300 rs6000_constraints
[RS6000_CONSTRAINT_wf
] = VSX_REGS
; /* V4SFmode */
3302 if (TARGET_VSX_TIMODE
)
3303 rs6000_constraints
[RS6000_CONSTRAINT_wt
] = VSX_REGS
; /* TImode */
3305 if (TARGET_UPPER_REGS_DF
) /* DFmode */
3307 rs6000_constraints
[RS6000_CONSTRAINT_ws
] = VSX_REGS
;
3308 rs6000_constraints
[RS6000_CONSTRAINT_wv
] = ALTIVEC_REGS
;
3311 rs6000_constraints
[RS6000_CONSTRAINT_ws
] = FLOAT_REGS
;
3313 if (TARGET_UPPER_REGS_DI
) /* DImode */
3314 rs6000_constraints
[RS6000_CONSTRAINT_wi
] = VSX_REGS
;
3316 rs6000_constraints
[RS6000_CONSTRAINT_wi
] = FLOAT_REGS
;
3319 /* Add conditional constraints based on various options, to allow us to
3320 collapse multiple insn patterns. */
3322 rs6000_constraints
[RS6000_CONSTRAINT_v
] = ALTIVEC_REGS
;
3324 if (TARGET_MFPGPR
) /* DFmode */
3325 rs6000_constraints
[RS6000_CONSTRAINT_wg
] = FLOAT_REGS
;
3328 rs6000_constraints
[RS6000_CONSTRAINT_wl
] = FLOAT_REGS
; /* DImode */
3330 if (TARGET_DIRECT_MOVE
)
3332 rs6000_constraints
[RS6000_CONSTRAINT_wh
] = FLOAT_REGS
;
3333 rs6000_constraints
[RS6000_CONSTRAINT_wj
] /* DImode */
3334 = rs6000_constraints
[RS6000_CONSTRAINT_wi
];
3335 rs6000_constraints
[RS6000_CONSTRAINT_wk
] /* DFmode */
3336 = rs6000_constraints
[RS6000_CONSTRAINT_ws
];
3337 rs6000_constraints
[RS6000_CONSTRAINT_wm
] = VSX_REGS
;
3340 if (TARGET_POWERPC64
)
3342 rs6000_constraints
[RS6000_CONSTRAINT_wr
] = GENERAL_REGS
;
3343 rs6000_constraints
[RS6000_CONSTRAINT_wA
] = BASE_REGS
;
3346 if (TARGET_P8_VECTOR
&& TARGET_UPPER_REGS_SF
) /* SFmode */
3348 rs6000_constraints
[RS6000_CONSTRAINT_wu
] = ALTIVEC_REGS
;
3349 rs6000_constraints
[RS6000_CONSTRAINT_wy
] = VSX_REGS
;
3350 rs6000_constraints
[RS6000_CONSTRAINT_ww
] = VSX_REGS
;
3352 else if (TARGET_P8_VECTOR
)
3354 rs6000_constraints
[RS6000_CONSTRAINT_wy
] = FLOAT_REGS
;
3355 rs6000_constraints
[RS6000_CONSTRAINT_ww
] = FLOAT_REGS
;
3357 else if (TARGET_VSX
)
3358 rs6000_constraints
[RS6000_CONSTRAINT_ww
] = FLOAT_REGS
;
3361 rs6000_constraints
[RS6000_CONSTRAINT_wx
] = FLOAT_REGS
; /* DImode */
3364 rs6000_constraints
[RS6000_CONSTRAINT_wz
] = FLOAT_REGS
; /* DImode */
3366 if (TARGET_FLOAT128_TYPE
)
3368 rs6000_constraints
[RS6000_CONSTRAINT_wq
] = VSX_REGS
; /* KFmode */
3369 if (FLOAT128_IEEE_P (TFmode
))
3370 rs6000_constraints
[RS6000_CONSTRAINT_wp
] = VSX_REGS
; /* TFmode */
3373 /* Support for new D-form instructions. */
3374 if (TARGET_P9_DFORM_SCALAR
)
3375 rs6000_constraints
[RS6000_CONSTRAINT_wb
] = ALTIVEC_REGS
;
3377 /* Support for ISA 3.0 (power9) vectors. */
3378 if (TARGET_P9_VECTOR
)
3379 rs6000_constraints
[RS6000_CONSTRAINT_wo
] = VSX_REGS
;
3381 /* Support for new direct moves (ISA 3.0 + 64bit). */
3382 if (TARGET_DIRECT_MOVE_128
)
3383 rs6000_constraints
[RS6000_CONSTRAINT_we
] = VSX_REGS
;
3385 /* Support small integers in VSX registers. */
3386 if (TARGET_VSX_SMALL_INTEGER
)
3388 rs6000_constraints
[RS6000_CONSTRAINT_wH
] = ALTIVEC_REGS
;
3389 rs6000_constraints
[RS6000_CONSTRAINT_wI
] = FLOAT_REGS
;
3390 if (TARGET_P9_VECTOR
)
3392 rs6000_constraints
[RS6000_CONSTRAINT_wJ
] = FLOAT_REGS
;
3393 rs6000_constraints
[RS6000_CONSTRAINT_wK
] = ALTIVEC_REGS
;
3397 /* Set up the reload helper and direct move functions. */
3398 if (TARGET_VSX
|| TARGET_ALTIVEC
)
3402 reg_addr
[V16QImode
].reload_store
= CODE_FOR_reload_v16qi_di_store
;
3403 reg_addr
[V16QImode
].reload_load
= CODE_FOR_reload_v16qi_di_load
;
3404 reg_addr
[V8HImode
].reload_store
= CODE_FOR_reload_v8hi_di_store
;
3405 reg_addr
[V8HImode
].reload_load
= CODE_FOR_reload_v8hi_di_load
;
3406 reg_addr
[V4SImode
].reload_store
= CODE_FOR_reload_v4si_di_store
;
3407 reg_addr
[V4SImode
].reload_load
= CODE_FOR_reload_v4si_di_load
;
3408 reg_addr
[V2DImode
].reload_store
= CODE_FOR_reload_v2di_di_store
;
3409 reg_addr
[V2DImode
].reload_load
= CODE_FOR_reload_v2di_di_load
;
3410 reg_addr
[V1TImode
].reload_store
= CODE_FOR_reload_v1ti_di_store
;
3411 reg_addr
[V1TImode
].reload_load
= CODE_FOR_reload_v1ti_di_load
;
3412 reg_addr
[V4SFmode
].reload_store
= CODE_FOR_reload_v4sf_di_store
;
3413 reg_addr
[V4SFmode
].reload_load
= CODE_FOR_reload_v4sf_di_load
;
3414 reg_addr
[V2DFmode
].reload_store
= CODE_FOR_reload_v2df_di_store
;
3415 reg_addr
[V2DFmode
].reload_load
= CODE_FOR_reload_v2df_di_load
;
3416 reg_addr
[DFmode
].reload_store
= CODE_FOR_reload_df_di_store
;
3417 reg_addr
[DFmode
].reload_load
= CODE_FOR_reload_df_di_load
;
3418 reg_addr
[DDmode
].reload_store
= CODE_FOR_reload_dd_di_store
;
3419 reg_addr
[DDmode
].reload_load
= CODE_FOR_reload_dd_di_load
;
3420 reg_addr
[SFmode
].reload_store
= CODE_FOR_reload_sf_di_store
;
3421 reg_addr
[SFmode
].reload_load
= CODE_FOR_reload_sf_di_load
;
3423 if (FLOAT128_VECTOR_P (KFmode
))
3425 reg_addr
[KFmode
].reload_store
= CODE_FOR_reload_kf_di_store
;
3426 reg_addr
[KFmode
].reload_load
= CODE_FOR_reload_kf_di_load
;
3429 if (FLOAT128_VECTOR_P (TFmode
))
3431 reg_addr
[TFmode
].reload_store
= CODE_FOR_reload_tf_di_store
;
3432 reg_addr
[TFmode
].reload_load
= CODE_FOR_reload_tf_di_load
;
3435 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3437 if (TARGET_NO_SDMODE_STACK
)
3439 reg_addr
[SDmode
].reload_store
= CODE_FOR_reload_sd_di_store
;
3440 reg_addr
[SDmode
].reload_load
= CODE_FOR_reload_sd_di_load
;
3443 if (TARGET_VSX_TIMODE
)
3445 reg_addr
[TImode
].reload_store
= CODE_FOR_reload_ti_di_store
;
3446 reg_addr
[TImode
].reload_load
= CODE_FOR_reload_ti_di_load
;
3449 if (TARGET_DIRECT_MOVE
&& !TARGET_DIRECT_MOVE_128
)
3451 reg_addr
[TImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxti
;
3452 reg_addr
[V1TImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv1ti
;
3453 reg_addr
[V2DFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv2df
;
3454 reg_addr
[V2DImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv2di
;
3455 reg_addr
[V4SFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv4sf
;
3456 reg_addr
[V4SImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv4si
;
3457 reg_addr
[V8HImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv8hi
;
3458 reg_addr
[V16QImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv16qi
;
3459 reg_addr
[SFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxsf
;
3461 reg_addr
[TImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprti
;
3462 reg_addr
[V1TImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv1ti
;
3463 reg_addr
[V2DFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv2df
;
3464 reg_addr
[V2DImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv2di
;
3465 reg_addr
[V4SFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv4sf
;
3466 reg_addr
[V4SImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv4si
;
3467 reg_addr
[V8HImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv8hi
;
3468 reg_addr
[V16QImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv16qi
;
3469 reg_addr
[SFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprsf
;
3471 if (FLOAT128_VECTOR_P (KFmode
))
3473 reg_addr
[KFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxkf
;
3474 reg_addr
[KFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprkf
;
3477 if (FLOAT128_VECTOR_P (TFmode
))
3479 reg_addr
[TFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxtf
;
3480 reg_addr
[TFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprtf
;
3486 reg_addr
[V16QImode
].reload_store
= CODE_FOR_reload_v16qi_si_store
;
3487 reg_addr
[V16QImode
].reload_load
= CODE_FOR_reload_v16qi_si_load
;
3488 reg_addr
[V8HImode
].reload_store
= CODE_FOR_reload_v8hi_si_store
;
3489 reg_addr
[V8HImode
].reload_load
= CODE_FOR_reload_v8hi_si_load
;
3490 reg_addr
[V4SImode
].reload_store
= CODE_FOR_reload_v4si_si_store
;
3491 reg_addr
[V4SImode
].reload_load
= CODE_FOR_reload_v4si_si_load
;
3492 reg_addr
[V2DImode
].reload_store
= CODE_FOR_reload_v2di_si_store
;
3493 reg_addr
[V2DImode
].reload_load
= CODE_FOR_reload_v2di_si_load
;
3494 reg_addr
[V1TImode
].reload_store
= CODE_FOR_reload_v1ti_si_store
;
3495 reg_addr
[V1TImode
].reload_load
= CODE_FOR_reload_v1ti_si_load
;
3496 reg_addr
[V4SFmode
].reload_store
= CODE_FOR_reload_v4sf_si_store
;
3497 reg_addr
[V4SFmode
].reload_load
= CODE_FOR_reload_v4sf_si_load
;
3498 reg_addr
[V2DFmode
].reload_store
= CODE_FOR_reload_v2df_si_store
;
3499 reg_addr
[V2DFmode
].reload_load
= CODE_FOR_reload_v2df_si_load
;
3500 reg_addr
[DFmode
].reload_store
= CODE_FOR_reload_df_si_store
;
3501 reg_addr
[DFmode
].reload_load
= CODE_FOR_reload_df_si_load
;
3502 reg_addr
[DDmode
].reload_store
= CODE_FOR_reload_dd_si_store
;
3503 reg_addr
[DDmode
].reload_load
= CODE_FOR_reload_dd_si_load
;
3504 reg_addr
[SFmode
].reload_store
= CODE_FOR_reload_sf_si_store
;
3505 reg_addr
[SFmode
].reload_load
= CODE_FOR_reload_sf_si_load
;
3507 if (FLOAT128_VECTOR_P (KFmode
))
3509 reg_addr
[KFmode
].reload_store
= CODE_FOR_reload_kf_si_store
;
3510 reg_addr
[KFmode
].reload_load
= CODE_FOR_reload_kf_si_load
;
3513 if (FLOAT128_IEEE_P (TFmode
))
3515 reg_addr
[TFmode
].reload_store
= CODE_FOR_reload_tf_si_store
;
3516 reg_addr
[TFmode
].reload_load
= CODE_FOR_reload_tf_si_load
;
3519 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3521 if (TARGET_NO_SDMODE_STACK
)
3523 reg_addr
[SDmode
].reload_store
= CODE_FOR_reload_sd_si_store
;
3524 reg_addr
[SDmode
].reload_load
= CODE_FOR_reload_sd_si_load
;
3527 if (TARGET_VSX_TIMODE
)
3529 reg_addr
[TImode
].reload_store
= CODE_FOR_reload_ti_si_store
;
3530 reg_addr
[TImode
].reload_load
= CODE_FOR_reload_ti_si_load
;
3533 if (TARGET_DIRECT_MOVE
)
3535 reg_addr
[DImode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdi
;
3536 reg_addr
[DDmode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdd
;
3537 reg_addr
[DFmode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdf
;
3541 if (TARGET_UPPER_REGS_DF
)
3542 reg_addr
[DFmode
].scalar_in_vmx_p
= true;
3544 if (TARGET_UPPER_REGS_DI
)
3545 reg_addr
[DImode
].scalar_in_vmx_p
= true;
3547 if (TARGET_UPPER_REGS_SF
)
3548 reg_addr
[SFmode
].scalar_in_vmx_p
= true;
3550 if (TARGET_VSX_SMALL_INTEGER
)
3552 reg_addr
[SImode
].scalar_in_vmx_p
= true;
3553 if (TARGET_P9_VECTOR
)
3555 reg_addr
[HImode
].scalar_in_vmx_p
= true;
3556 reg_addr
[QImode
].scalar_in_vmx_p
= true;
3561 /* Setup the fusion operations. */
3562 if (TARGET_P8_FUSION
)
3564 reg_addr
[QImode
].fusion_gpr_ld
= CODE_FOR_fusion_gpr_load_qi
;
3565 reg_addr
[HImode
].fusion_gpr_ld
= CODE_FOR_fusion_gpr_load_hi
;
3566 reg_addr
[SImode
].fusion_gpr_ld
= CODE_FOR_fusion_gpr_load_si
;
3568 reg_addr
[DImode
].fusion_gpr_ld
= CODE_FOR_fusion_gpr_load_di
;
3571 if (TARGET_P9_FUSION
)
3574 enum machine_mode mode
; /* mode of the fused type. */
3575 enum machine_mode pmode
; /* pointer mode. */
3576 enum rs6000_reload_reg_type rtype
; /* register type. */
3577 enum insn_code load
; /* load insn. */
3578 enum insn_code store
; /* store insn. */
3581 static const struct fuse_insns addis_insns
[] = {
3582 { E_SFmode
, E_DImode
, RELOAD_REG_FPR
,
3583 CODE_FOR_fusion_vsx_di_sf_load
,
3584 CODE_FOR_fusion_vsx_di_sf_store
},
3586 { E_SFmode
, E_SImode
, RELOAD_REG_FPR
,
3587 CODE_FOR_fusion_vsx_si_sf_load
,
3588 CODE_FOR_fusion_vsx_si_sf_store
},
3590 { E_DFmode
, E_DImode
, RELOAD_REG_FPR
,
3591 CODE_FOR_fusion_vsx_di_df_load
,
3592 CODE_FOR_fusion_vsx_di_df_store
},
3594 { E_DFmode
, E_SImode
, RELOAD_REG_FPR
,
3595 CODE_FOR_fusion_vsx_si_df_load
,
3596 CODE_FOR_fusion_vsx_si_df_store
},
3598 { E_DImode
, E_DImode
, RELOAD_REG_FPR
,
3599 CODE_FOR_fusion_vsx_di_di_load
,
3600 CODE_FOR_fusion_vsx_di_di_store
},
3602 { E_DImode
, E_SImode
, RELOAD_REG_FPR
,
3603 CODE_FOR_fusion_vsx_si_di_load
,
3604 CODE_FOR_fusion_vsx_si_di_store
},
3606 { E_QImode
, E_DImode
, RELOAD_REG_GPR
,
3607 CODE_FOR_fusion_gpr_di_qi_load
,
3608 CODE_FOR_fusion_gpr_di_qi_store
},
3610 { E_QImode
, E_SImode
, RELOAD_REG_GPR
,
3611 CODE_FOR_fusion_gpr_si_qi_load
,
3612 CODE_FOR_fusion_gpr_si_qi_store
},
3614 { E_HImode
, E_DImode
, RELOAD_REG_GPR
,
3615 CODE_FOR_fusion_gpr_di_hi_load
,
3616 CODE_FOR_fusion_gpr_di_hi_store
},
3618 { E_HImode
, E_SImode
, RELOAD_REG_GPR
,
3619 CODE_FOR_fusion_gpr_si_hi_load
,
3620 CODE_FOR_fusion_gpr_si_hi_store
},
3622 { E_SImode
, E_DImode
, RELOAD_REG_GPR
,
3623 CODE_FOR_fusion_gpr_di_si_load
,
3624 CODE_FOR_fusion_gpr_di_si_store
},
3626 { E_SImode
, E_SImode
, RELOAD_REG_GPR
,
3627 CODE_FOR_fusion_gpr_si_si_load
,
3628 CODE_FOR_fusion_gpr_si_si_store
},
3630 { E_SFmode
, E_DImode
, RELOAD_REG_GPR
,
3631 CODE_FOR_fusion_gpr_di_sf_load
,
3632 CODE_FOR_fusion_gpr_di_sf_store
},
3634 { E_SFmode
, E_SImode
, RELOAD_REG_GPR
,
3635 CODE_FOR_fusion_gpr_si_sf_load
,
3636 CODE_FOR_fusion_gpr_si_sf_store
},
3638 { E_DImode
, E_DImode
, RELOAD_REG_GPR
,
3639 CODE_FOR_fusion_gpr_di_di_load
,
3640 CODE_FOR_fusion_gpr_di_di_store
},
3642 { E_DFmode
, E_DImode
, RELOAD_REG_GPR
,
3643 CODE_FOR_fusion_gpr_di_df_load
,
3644 CODE_FOR_fusion_gpr_di_df_store
},
3647 machine_mode cur_pmode
= Pmode
;
3650 for (i
= 0; i
< ARRAY_SIZE (addis_insns
); i
++)
3652 machine_mode xmode
= addis_insns
[i
].mode
;
3653 enum rs6000_reload_reg_type rtype
= addis_insns
[i
].rtype
;
3655 if (addis_insns
[i
].pmode
!= cur_pmode
)
3658 if (rtype
== RELOAD_REG_FPR
3659 && (!TARGET_HARD_FLOAT
|| !TARGET_FPRS
))
3662 reg_addr
[xmode
].fusion_addis_ld
[rtype
] = addis_insns
[i
].load
;
3663 reg_addr
[xmode
].fusion_addis_st
[rtype
] = addis_insns
[i
].store
;
3665 if (rtype
== RELOAD_REG_FPR
&& TARGET_P9_DFORM_SCALAR
)
3667 reg_addr
[xmode
].fusion_addis_ld
[RELOAD_REG_VMX
]
3668 = addis_insns
[i
].load
;
3669 reg_addr
[xmode
].fusion_addis_st
[RELOAD_REG_VMX
]
3670 = addis_insns
[i
].store
;
3675 /* Note which types we support fusing TOC setup plus memory insn. We only do
3676 fused TOCs for medium/large code models. */
3677 if (TARGET_P8_FUSION
&& TARGET_TOC_FUSION
&& TARGET_POWERPC64
3678 && (TARGET_CMODEL
!= CMODEL_SMALL
))
3680 reg_addr
[QImode
].fused_toc
= true;
3681 reg_addr
[HImode
].fused_toc
= true;
3682 reg_addr
[SImode
].fused_toc
= true;
3683 reg_addr
[DImode
].fused_toc
= true;
3684 if (TARGET_HARD_FLOAT
&& TARGET_FPRS
)
3686 if (TARGET_SINGLE_FLOAT
)
3687 reg_addr
[SFmode
].fused_toc
= true;
3688 if (TARGET_DOUBLE_FLOAT
)
3689 reg_addr
[DFmode
].fused_toc
= true;
3693 /* Precalculate HARD_REGNO_NREGS. */
3694 for (r
= 0; r
< FIRST_PSEUDO_REGISTER
; ++r
)
3695 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3696 rs6000_hard_regno_nregs
[m
][r
]
3697 = rs6000_hard_regno_nregs_internal (r
, (machine_mode
)m
);
3699 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3700 for (r
= 0; r
< FIRST_PSEUDO_REGISTER
; ++r
)
3701 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3702 if (rs6000_hard_regno_mode_ok_uncached (r
, (machine_mode
)m
))
3703 rs6000_hard_regno_mode_ok_p
[m
][r
] = true;
3705 /* Precalculate CLASS_MAX_NREGS sizes. */
3706 for (c
= 0; c
< LIM_REG_CLASSES
; ++c
)
3710 if (TARGET_VSX
&& VSX_REG_CLASS_P (c
))
3711 reg_size
= UNITS_PER_VSX_WORD
;
3713 else if (c
== ALTIVEC_REGS
)
3714 reg_size
= UNITS_PER_ALTIVEC_WORD
;
3716 else if (c
== FLOAT_REGS
)
3717 reg_size
= UNITS_PER_FP_WORD
;
3720 reg_size
= UNITS_PER_WORD
;
3722 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3724 machine_mode m2
= (machine_mode
)m
;
3725 int reg_size2
= reg_size
;
3727 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3729 if (TARGET_VSX
&& VSX_REG_CLASS_P (c
) && FLOAT128_2REG_P (m
))
3730 reg_size2
= UNITS_PER_FP_WORD
;
3732 rs6000_class_max_nregs
[m
][c
]
3733 = (GET_MODE_SIZE (m2
) + reg_size2
- 1) / reg_size2
;
3737 if (TARGET_E500_DOUBLE
)
3738 rs6000_class_max_nregs
[DFmode
][GENERAL_REGS
] = 1;
3740 /* Calculate which modes to automatically generate code to use a the
3741 reciprocal divide and square root instructions. In the future, possibly
3742 automatically generate the instructions even if the user did not specify
3743 -mrecip. The older machines double precision reciprocal sqrt estimate is
3744 not accurate enough. */
3745 memset (rs6000_recip_bits
, 0, sizeof (rs6000_recip_bits
));
3747 rs6000_recip_bits
[SFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3749 rs6000_recip_bits
[DFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3750 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
))
3751 rs6000_recip_bits
[V4SFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3752 if (VECTOR_UNIT_VSX_P (V2DFmode
))
3753 rs6000_recip_bits
[V2DFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3755 if (TARGET_FRSQRTES
)
3756 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3758 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3759 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
))
3760 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3761 if (VECTOR_UNIT_VSX_P (V2DFmode
))
3762 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3764 if (rs6000_recip_control
)
3766 if (!flag_finite_math_only
)
3767 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
3768 if (flag_trapping_math
)
3769 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
3770 if (!flag_reciprocal_math
)
3771 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
3772 if (flag_finite_math_only
&& !flag_trapping_math
&& flag_reciprocal_math
)
3774 if (RS6000_RECIP_HAVE_RE_P (SFmode
)
3775 && (rs6000_recip_control
& RECIP_SF_DIV
) != 0)
3776 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3778 if (RS6000_RECIP_HAVE_RE_P (DFmode
)
3779 && (rs6000_recip_control
& RECIP_DF_DIV
) != 0)
3780 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3782 if (RS6000_RECIP_HAVE_RE_P (V4SFmode
)
3783 && (rs6000_recip_control
& RECIP_V4SF_DIV
) != 0)
3784 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3786 if (RS6000_RECIP_HAVE_RE_P (V2DFmode
)
3787 && (rs6000_recip_control
& RECIP_V2DF_DIV
) != 0)
3788 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3790 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode
)
3791 && (rs6000_recip_control
& RECIP_SF_RSQRT
) != 0)
3792 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3794 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode
)
3795 && (rs6000_recip_control
& RECIP_DF_RSQRT
) != 0)
3796 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3798 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode
)
3799 && (rs6000_recip_control
& RECIP_V4SF_RSQRT
) != 0)
3800 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3802 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode
)
3803 && (rs6000_recip_control
& RECIP_V2DF_RSQRT
) != 0)
3804 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3808 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3809 legitimate address support to figure out the appropriate addressing to
3811 rs6000_setup_reg_addr_masks ();
3813 if (global_init_p
|| TARGET_DEBUG_TARGET
)
3815 if (TARGET_DEBUG_REG
)
3816 rs6000_debug_reg_global ();
3818 if (TARGET_DEBUG_COST
|| TARGET_DEBUG_REG
)
3820 "SImode variable mult cost = %d\n"
3821 "SImode constant mult cost = %d\n"
3822 "SImode short constant mult cost = %d\n"
3823 "DImode multipliciation cost = %d\n"
3824 "SImode division cost = %d\n"
3825 "DImode division cost = %d\n"
3826 "Simple fp operation cost = %d\n"
3827 "DFmode multiplication cost = %d\n"
3828 "SFmode division cost = %d\n"
3829 "DFmode division cost = %d\n"
3830 "cache line size = %d\n"
3831 "l1 cache size = %d\n"
3832 "l2 cache size = %d\n"
3833 "simultaneous prefetches = %d\n"
3836 rs6000_cost
->mulsi_const
,
3837 rs6000_cost
->mulsi_const9
,
3845 rs6000_cost
->cache_line_size
,
3846 rs6000_cost
->l1_cache_size
,
3847 rs6000_cost
->l2_cache_size
,
3848 rs6000_cost
->simultaneous_prefetches
);
3853 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3856 darwin_rs6000_override_options (void)
3858 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3860 rs6000_altivec_abi
= 1;
3861 TARGET_ALTIVEC_VRSAVE
= 1;
3862 rs6000_current_abi
= ABI_DARWIN
;
3864 if (DEFAULT_ABI
== ABI_DARWIN
3866 darwin_one_byte_bool
= 1;
3868 if (TARGET_64BIT
&& ! TARGET_POWERPC64
)
3870 rs6000_isa_flags
|= OPTION_MASK_POWERPC64
;
3871 warning (0, "-m64 requires PowerPC64 architecture, enabling");
3875 rs6000_default_long_calls
= 1;
3876 rs6000_isa_flags
|= OPTION_MASK_SOFT_FLOAT
;
3879 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3881 if (!flag_mkernel
&& !flag_apple_kext
3883 && ! (rs6000_isa_flags_explicit
& OPTION_MASK_ALTIVEC
))
3884 rs6000_isa_flags
|= OPTION_MASK_ALTIVEC
;
3886 /* Unless the user (not the configurer) has explicitly overridden
3887 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3888 G4 unless targeting the kernel. */
3891 && strverscmp (darwin_macosx_version_min
, "10.5") >= 0
3892 && ! (rs6000_isa_flags_explicit
& OPTION_MASK_ALTIVEC
)
3893 && ! global_options_set
.x_rs6000_cpu_index
)
3895 rs6000_isa_flags
|= OPTION_MASK_ALTIVEC
;
3900 /* If not otherwise specified by a target, make 'long double' equivalent to
3903 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3904 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3907 /* Return the builtin mask of the various options used that could affect which
3908 builtins were used. In the past we used target_flags, but we've run out of
3909 bits, and some options like SPE and PAIRED are no longer in
3913 rs6000_builtin_mask_calculate (void)
3915 return (((TARGET_ALTIVEC
) ? RS6000_BTM_ALTIVEC
: 0)
3916 | ((TARGET_CMPB
) ? RS6000_BTM_CMPB
: 0)
3917 | ((TARGET_VSX
) ? RS6000_BTM_VSX
: 0)
3918 | ((TARGET_SPE
) ? RS6000_BTM_SPE
: 0)
3919 | ((TARGET_PAIRED_FLOAT
) ? RS6000_BTM_PAIRED
: 0)
3920 | ((TARGET_FRE
) ? RS6000_BTM_FRE
: 0)
3921 | ((TARGET_FRES
) ? RS6000_BTM_FRES
: 0)
3922 | ((TARGET_FRSQRTE
) ? RS6000_BTM_FRSQRTE
: 0)
3923 | ((TARGET_FRSQRTES
) ? RS6000_BTM_FRSQRTES
: 0)
3924 | ((TARGET_POPCNTD
) ? RS6000_BTM_POPCNTD
: 0)
3925 | ((rs6000_cpu
== PROCESSOR_CELL
) ? RS6000_BTM_CELL
: 0)
3926 | ((TARGET_P8_VECTOR
) ? RS6000_BTM_P8_VECTOR
: 0)
3927 | ((TARGET_P9_VECTOR
) ? RS6000_BTM_P9_VECTOR
: 0)
3928 | ((TARGET_P9_MISC
) ? RS6000_BTM_P9_MISC
: 0)
3929 | ((TARGET_MODULO
) ? RS6000_BTM_MODULO
: 0)
3930 | ((TARGET_64BIT
) ? RS6000_BTM_64BIT
: 0)
3931 | ((TARGET_CRYPTO
) ? RS6000_BTM_CRYPTO
: 0)
3932 | ((TARGET_HTM
) ? RS6000_BTM_HTM
: 0)
3933 | ((TARGET_DFP
) ? RS6000_BTM_DFP
: 0)
3934 | ((TARGET_HARD_FLOAT
) ? RS6000_BTM_HARD_FLOAT
: 0)
3935 | ((TARGET_LONG_DOUBLE_128
) ? RS6000_BTM_LDBL128
: 0)
3936 | ((TARGET_FLOAT128_TYPE
) ? RS6000_BTM_FLOAT128
: 0));
3939 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3940 to clobber the XER[CA] bit because clobbering that bit without telling
3941 the compiler worked just fine with versions of GCC before GCC 5, and
3942 breaking a lot of older code in ways that are hard to track down is
3943 not such a great idea. */
3946 rs6000_md_asm_adjust (vec
<rtx
> &/*outputs*/, vec
<rtx
> &/*inputs*/,
3947 vec
<const char *> &/*constraints*/,
3948 vec
<rtx
> &clobbers
, HARD_REG_SET
&clobbered_regs
)
3950 clobbers
.safe_push (gen_rtx_REG (SImode
, CA_REGNO
));
3951 SET_HARD_REG_BIT (clobbered_regs
, CA_REGNO
);
3955 /* Override command line options.
3957 Combine build-specific configuration information with options
3958 specified on the command line to set various state variables which
3959 influence code generation, optimization, and expansion of built-in
3960 functions. Assure that command-line configuration preferences are
3961 compatible with each other and with the build configuration; issue
3962 warnings while adjusting configuration or error messages while
3963 rejecting configuration.
3965 Upon entry to this function:
3967 This function is called once at the beginning of
3968 compilation, and then again at the start and end of compiling
3969 each section of code that has a different configuration, as
3970 indicated, for example, by adding the
3972 __attribute__((__target__("cpu=power9")))
3974 qualifier to a function definition or, for example, by bracketing
3977 #pragma GCC target("altivec")
3981 #pragma GCC reset_options
3983 directives. Parameter global_init_p is true for the initial
3984 invocation, which initializes global variables, and false for all
3985 subsequent invocations.
3988 Various global state information is assumed to be valid. This
3989 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3990 default CPU specified at build configure time, TARGET_DEFAULT,
3991 representing the default set of option flags for the default
3992 target, and global_options_set.x_rs6000_isa_flags, representing
3993 which options were requested on the command line.
3995 Upon return from this function:
3997 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3998 was set by name on the command line. Additionally, if certain
3999 attributes are automatically enabled or disabled by this function
4000 in order to assure compatibility between options and
4001 configuration, the flags associated with those attributes are
4002 also set. By setting these "explicit bits", we avoid the risk
4003 that other code might accidentally overwrite these particular
4004 attributes with "default values".
4006 The various bits of rs6000_isa_flags are set to indicate the
4007 target options that have been selected for the most current
4008 compilation efforts. This has the effect of also turning on the
4009 associated TARGET_XXX values since these are macros which are
4010 generally defined to test the corresponding bit of the
4011 rs6000_isa_flags variable.
4013 The variable rs6000_builtin_mask is set to represent the target
4014 options for the most current compilation efforts, consistent with
4015 the current contents of rs6000_isa_flags. This variable controls
4016 expansion of built-in functions.
4018 Various other global variables and fields of global structures
4019 (over 50 in all) are initialized to reflect the desired options
4020 for the most current compilation efforts. */
4023 rs6000_option_override_internal (bool global_init_p
)
4026 bool have_cpu
= false;
4028 /* The default cpu requested at configure time, if any. */
4029 const char *implicit_cpu
= OPTION_TARGET_CPU_DEFAULT
;
4031 HOST_WIDE_INT set_masks
;
4032 HOST_WIDE_INT ignore_masks
;
4035 struct cl_target_option
*main_target_opt
4036 = ((global_init_p
|| target_option_default_node
== NULL
)
4037 ? NULL
: TREE_TARGET_OPTION (target_option_default_node
));
4039 /* Print defaults. */
4040 if ((TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
) && global_init_p
)
4041 rs6000_print_isa_options (stderr
, 0, "TARGET_DEFAULT", TARGET_DEFAULT
);
4043 /* Remember the explicit arguments. */
4045 rs6000_isa_flags_explicit
= global_options_set
.x_rs6000_isa_flags
;
4047 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
4048 library functions, so warn about it. The flag may be useful for
4049 performance studies from time to time though, so don't disable it
4051 if (global_options_set
.x_rs6000_alignment_flags
4052 && rs6000_alignment_flags
== MASK_ALIGN_POWER
4053 && DEFAULT_ABI
== ABI_DARWIN
4055 warning (0, "-malign-power is not supported for 64-bit Darwin;"
4056 " it is incompatible with the installed C and C++ libraries");
4058 /* Numerous experiment shows that IRA based loop pressure
4059 calculation works better for RTL loop invariant motion on targets
4060 with enough (>= 32) registers. It is an expensive optimization.
4061 So it is on only for peak performance. */
4062 if (optimize
>= 3 && global_init_p
4063 && !global_options_set
.x_flag_ira_loop_pressure
)
4064 flag_ira_loop_pressure
= 1;
4066 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
4067 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
4068 options were already specified. */
4069 if (flag_sanitize
& SANITIZE_USER_ADDRESS
4070 && !global_options_set
.x_flag_asynchronous_unwind_tables
)
4071 flag_asynchronous_unwind_tables
= 1;
4073 /* Set the pointer size. */
4076 rs6000_pmode
= DImode
;
4077 rs6000_pointer_size
= 64;
4081 rs6000_pmode
= SImode
;
4082 rs6000_pointer_size
= 32;
4085 /* Some OSs don't support saving the high part of 64-bit registers on context
4086 switch. Other OSs don't support saving Altivec registers. On those OSs,
4087 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
4088 if the user wants either, the user must explicitly specify them and we
4089 won't interfere with the user's specification. */
4091 set_masks
= POWERPC_MASKS
;
4092 #ifdef OS_MISSING_POWERPC64
4093 if (OS_MISSING_POWERPC64
)
4094 set_masks
&= ~OPTION_MASK_POWERPC64
;
4096 #ifdef OS_MISSING_ALTIVEC
4097 if (OS_MISSING_ALTIVEC
)
4098 set_masks
&= ~(OPTION_MASK_ALTIVEC
| OPTION_MASK_VSX
4099 | OTHER_VSX_VECTOR_MASKS
);
4102 /* Don't override by the processor default if given explicitly. */
4103 set_masks
&= ~rs6000_isa_flags_explicit
;
4105 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
4106 the cpu in a target attribute or pragma, but did not specify a tuning
4107 option, use the cpu for the tuning option rather than the option specified
4108 with -mtune on the command line. Process a '--with-cpu' configuration
4109 request as an implicit --cpu. */
4110 if (rs6000_cpu_index
>= 0)
4112 cpu_index
= rs6000_cpu_index
;
4115 else if (main_target_opt
!= NULL
&& main_target_opt
->x_rs6000_cpu_index
>= 0)
4117 rs6000_cpu_index
= cpu_index
= main_target_opt
->x_rs6000_cpu_index
;
4120 else if (implicit_cpu
)
4122 rs6000_cpu_index
= cpu_index
= rs6000_cpu_name_lookup (implicit_cpu
);
4127 /* PowerPC 64-bit LE requires at least ISA 2.07. */
4128 const char *default_cpu
= ((!TARGET_POWERPC64
)
4130 : ((BYTES_BIG_ENDIAN
)
4134 rs6000_cpu_index
= cpu_index
= rs6000_cpu_name_lookup (default_cpu
);
4138 gcc_assert (cpu_index
>= 0);
4142 #ifndef HAVE_AS_POWER9
4143 if (processor_target_table
[rs6000_cpu_index
].processor
4144 == PROCESSOR_POWER9
)
4147 warning (0, "will not generate power9 instructions because "
4148 "assembler lacks power9 support");
4151 #ifndef HAVE_AS_POWER8
4152 if (processor_target_table
[rs6000_cpu_index
].processor
4153 == PROCESSOR_POWER8
)
4156 warning (0, "will not generate power8 instructions because "
4157 "assembler lacks power8 support");
4160 #ifndef HAVE_AS_POPCNTD
4161 if (processor_target_table
[rs6000_cpu_index
].processor
4162 == PROCESSOR_POWER7
)
4165 warning (0, "will not generate power7 instructions because "
4166 "assembler lacks power7 support");
4170 if (processor_target_table
[rs6000_cpu_index
].processor
4171 == PROCESSOR_POWER6
)
4174 warning (0, "will not generate power6 instructions because "
4175 "assembler lacks power6 support");
4178 #ifndef HAVE_AS_POPCNTB
4179 if (processor_target_table
[rs6000_cpu_index
].processor
4180 == PROCESSOR_POWER5
)
4183 warning (0, "will not generate power5 instructions because "
4184 "assembler lacks power5 support");
4190 /* PowerPC 64-bit LE requires at least ISA 2.07. */
4191 const char *default_cpu
= (!TARGET_POWERPC64
4197 rs6000_cpu_index
= cpu_index
= rs6000_cpu_name_lookup (default_cpu
);
4201 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
4202 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
4203 with those from the cpu, except for options that were explicitly set. If
4204 we don't have a cpu, do not override the target bits set in
4208 rs6000_isa_flags
&= ~set_masks
;
4209 rs6000_isa_flags
|= (processor_target_table
[cpu_index
].target_enable
4214 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
4215 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
4216 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
4217 to using rs6000_isa_flags, we need to do the initialization here.
4219 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
4220 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
4221 HOST_WIDE_INT flags
= ((TARGET_DEFAULT
) ? TARGET_DEFAULT
4222 : processor_target_table
[cpu_index
].target_enable
);
4223 rs6000_isa_flags
|= (flags
& ~rs6000_isa_flags_explicit
);
4226 if (rs6000_tune_index
>= 0)
4227 tune_index
= rs6000_tune_index
;
4229 rs6000_tune_index
= tune_index
= cpu_index
;
4233 enum processor_type tune_proc
4234 = (TARGET_POWERPC64
? PROCESSOR_DEFAULT64
: PROCESSOR_DEFAULT
);
4237 for (i
= 0; i
< ARRAY_SIZE (processor_target_table
); i
++)
4238 if (processor_target_table
[i
].processor
== tune_proc
)
4240 rs6000_tune_index
= tune_index
= i
;
4245 gcc_assert (tune_index
>= 0);
4246 rs6000_cpu
= processor_target_table
[tune_index
].processor
;
4248 /* Pick defaults for SPE related control flags. Do this early to make sure
4249 that the TARGET_ macros are representative ASAP. */
4251 int spe_capable_cpu
=
4252 (rs6000_cpu
== PROCESSOR_PPC8540
4253 || rs6000_cpu
== PROCESSOR_PPC8548
);
4255 if (!global_options_set
.x_rs6000_spe_abi
)
4256 rs6000_spe_abi
= spe_capable_cpu
;
4258 if (!global_options_set
.x_rs6000_spe
)
4259 rs6000_spe
= spe_capable_cpu
;
4261 if (!global_options_set
.x_rs6000_float_gprs
)
4263 (rs6000_cpu
== PROCESSOR_PPC8540
? 1
4264 : rs6000_cpu
== PROCESSOR_PPC8548
? 2
4268 if (global_options_set
.x_rs6000_spe_abi
4271 error ("not configured for SPE ABI");
4273 if (global_options_set
.x_rs6000_spe
4276 error ("not configured for SPE instruction set");
4278 if (main_target_opt
!= NULL
4279 && ((main_target_opt
->x_rs6000_spe_abi
!= rs6000_spe_abi
)
4280 || (main_target_opt
->x_rs6000_spe
!= rs6000_spe
)
4281 || (main_target_opt
->x_rs6000_float_gprs
!= rs6000_float_gprs
)))
4282 error ("target attribute or pragma changes SPE ABI");
4284 if (rs6000_cpu
== PROCESSOR_PPCE300C2
|| rs6000_cpu
== PROCESSOR_PPCE300C3
4285 || rs6000_cpu
== PROCESSOR_PPCE500MC
|| rs6000_cpu
== PROCESSOR_PPCE500MC64
4286 || rs6000_cpu
== PROCESSOR_PPCE5500
)
4289 error ("AltiVec not supported in this target");
4291 error ("SPE not supported in this target");
4293 if (rs6000_cpu
== PROCESSOR_PPCE6500
)
4296 error ("SPE not supported in this target");
4299 /* Disable Cell microcode if we are optimizing for the Cell
4300 and not optimizing for size. */
4301 if (rs6000_gen_cell_microcode
== -1)
4302 rs6000_gen_cell_microcode
= !(rs6000_cpu
== PROCESSOR_CELL
4305 /* If we are optimizing big endian systems for space and it's OK to
4306 use instructions that would be microcoded on the Cell, use the
4307 load/store multiple and string instructions. */
4308 if (BYTES_BIG_ENDIAN
&& optimize_size
&& rs6000_gen_cell_microcode
)
4309 rs6000_isa_flags
|= ~rs6000_isa_flags_explicit
& (OPTION_MASK_MULTIPLE
4310 | OPTION_MASK_STRING
);
4312 /* Don't allow -mmultiple or -mstring on little endian systems
4313 unless the cpu is a 750, because the hardware doesn't support the
4314 instructions used in little endian mode, and causes an alignment
4315 trap. The 750 does not cause an alignment trap (except when the
4316 target is unaligned). */
4318 if (!BYTES_BIG_ENDIAN
&& rs6000_cpu
!= PROCESSOR_PPC750
)
4320 if (TARGET_MULTIPLE
)
4322 rs6000_isa_flags
&= ~OPTION_MASK_MULTIPLE
;
4323 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MULTIPLE
) != 0)
4324 warning (0, "-mmultiple is not supported on little endian systems");
4329 rs6000_isa_flags
&= ~OPTION_MASK_STRING
;
4330 if ((rs6000_isa_flags_explicit
& OPTION_MASK_STRING
) != 0)
4331 warning (0, "-mstring is not supported on little endian systems");
4335 /* If little-endian, default to -mstrict-align on older processors.
4336 Testing for htm matches power8 and later. */
4337 if (!BYTES_BIG_ENDIAN
4338 && !(processor_target_table
[tune_index
].target_enable
& OPTION_MASK_HTM
))
4339 rs6000_isa_flags
|= ~rs6000_isa_flags_explicit
& OPTION_MASK_STRICT_ALIGN
;
4341 /* -maltivec={le,be} implies -maltivec. */
4342 if (rs6000_altivec_element_order
!= 0)
4343 rs6000_isa_flags
|= OPTION_MASK_ALTIVEC
;
4345 /* Disallow -maltivec=le in big endian mode for now. This is not
4346 known to be useful for anyone. */
4347 if (BYTES_BIG_ENDIAN
&& rs6000_altivec_element_order
== 1)
4349 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
4350 rs6000_altivec_element_order
= 0;
4353 /* Add some warnings for VSX. */
4356 const char *msg
= NULL
;
4357 if (!TARGET_HARD_FLOAT
|| !TARGET_FPRS
4358 || !TARGET_SINGLE_FLOAT
|| !TARGET_DOUBLE_FLOAT
)
4360 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
4361 msg
= N_("-mvsx requires hardware floating point");
4364 rs6000_isa_flags
&= ~ OPTION_MASK_VSX
;
4365 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
4368 else if (TARGET_PAIRED_FLOAT
)
4369 msg
= N_("-mvsx and -mpaired are incompatible");
4370 else if (TARGET_AVOID_XFORM
> 0)
4371 msg
= N_("-mvsx needs indexed addressing");
4372 else if (!TARGET_ALTIVEC
&& (rs6000_isa_flags_explicit
4373 & OPTION_MASK_ALTIVEC
))
4375 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
4376 msg
= N_("-mvsx and -mno-altivec are incompatible");
4378 msg
= N_("-mno-altivec disables vsx");
4384 rs6000_isa_flags
&= ~ OPTION_MASK_VSX
;
4385 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
4389 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
4390 the -mcpu setting to enable options that conflict. */
4391 if ((!TARGET_HARD_FLOAT
|| !TARGET_ALTIVEC
|| !TARGET_VSX
)
4392 && (rs6000_isa_flags_explicit
& (OPTION_MASK_SOFT_FLOAT
4393 | OPTION_MASK_ALTIVEC
4394 | OPTION_MASK_VSX
)) != 0)
4395 rs6000_isa_flags
&= ~((OPTION_MASK_P8_VECTOR
| OPTION_MASK_CRYPTO
4396 | OPTION_MASK_DIRECT_MOVE
)
4397 & ~rs6000_isa_flags_explicit
);
4399 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4400 rs6000_print_isa_options (stderr
, 0, "before defaults", rs6000_isa_flags
);
4402 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
4403 off all of the options that depend on those flags. */
4404 ignore_masks
= rs6000_disable_incompatible_switches ();
4406 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
4407 unless the user explicitly used the -mno-<option> to disable the code. */
4408 if (TARGET_P9_VECTOR
|| TARGET_MODULO
|| TARGET_P9_DFORM_SCALAR
4409 || TARGET_P9_DFORM_VECTOR
|| TARGET_P9_DFORM_BOTH
> 0)
4410 rs6000_isa_flags
|= (ISA_3_0_MASKS_SERVER
& ~ignore_masks
);
4411 else if (TARGET_P9_MINMAX
)
4415 if (cpu_index
== PROCESSOR_POWER9
)
4417 /* legacy behavior: allow -mcpu-power9 with certain
4418 capabilities explicitly disabled. */
4419 rs6000_isa_flags
|= (ISA_3_0_MASKS_SERVER
& ~ignore_masks
);
4420 /* However, reject this automatic fix if certain
4421 capabilities required for TARGET_P9_MINMAX support
4422 have been explicitly disabled. */
4423 if (((OPTION_MASK_VSX
| OPTION_MASK_UPPER_REGS_SF
4424 | OPTION_MASK_UPPER_REGS_DF
) & rs6000_isa_flags
)
4425 != (OPTION_MASK_VSX
| OPTION_MASK_UPPER_REGS_SF
4426 | OPTION_MASK_UPPER_REGS_DF
))
4427 error ("-mpower9-minmax incompatible with explicitly disabled options");
4430 error ("Power9 target option is incompatible with -mcpu=<xxx> for "
4431 "<xxx> less than power9");
4433 else if ((ISA_3_0_MASKS_SERVER
& rs6000_isa_flags_explicit
)
4434 != (ISA_3_0_MASKS_SERVER
& rs6000_isa_flags
4435 & rs6000_isa_flags_explicit
))
4436 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
4437 were explicitly cleared. */
4438 error ("-mpower9-minmax incompatible with explicitly disabled options");
4440 rs6000_isa_flags
|= ISA_3_0_MASKS_SERVER
;
4442 else if (TARGET_P8_VECTOR
|| TARGET_DIRECT_MOVE
|| TARGET_CRYPTO
)
4443 rs6000_isa_flags
|= (ISA_2_7_MASKS_SERVER
& ~ignore_masks
);
4444 else if (TARGET_VSX
)
4445 rs6000_isa_flags
|= (ISA_2_6_MASKS_SERVER
& ~ignore_masks
);
4446 else if (TARGET_POPCNTD
)
4447 rs6000_isa_flags
|= (ISA_2_6_MASKS_EMBEDDED
& ~ignore_masks
);
4448 else if (TARGET_DFP
)
4449 rs6000_isa_flags
|= (ISA_2_5_MASKS_SERVER
& ~ignore_masks
);
4450 else if (TARGET_CMPB
)
4451 rs6000_isa_flags
|= (ISA_2_5_MASKS_EMBEDDED
& ~ignore_masks
);
4452 else if (TARGET_FPRND
)
4453 rs6000_isa_flags
|= (ISA_2_4_MASKS
& ~ignore_masks
);
4454 else if (TARGET_POPCNTB
)
4455 rs6000_isa_flags
|= (ISA_2_2_MASKS
& ~ignore_masks
);
4456 else if (TARGET_ALTIVEC
)
4457 rs6000_isa_flags
|= (OPTION_MASK_PPC_GFXOPT
& ~ignore_masks
);
4459 if (TARGET_CRYPTO
&& !TARGET_ALTIVEC
)
4461 if (rs6000_isa_flags_explicit
& OPTION_MASK_CRYPTO
)
4462 error ("-mcrypto requires -maltivec");
4463 rs6000_isa_flags
&= ~OPTION_MASK_CRYPTO
;
4466 if (TARGET_DIRECT_MOVE
&& !TARGET_VSX
)
4468 if (rs6000_isa_flags_explicit
& OPTION_MASK_DIRECT_MOVE
)
4469 error ("-mdirect-move requires -mvsx");
4470 rs6000_isa_flags
&= ~OPTION_MASK_DIRECT_MOVE
;
4473 if (TARGET_P8_VECTOR
&& !TARGET_ALTIVEC
)
4475 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
4476 error ("-mpower8-vector requires -maltivec");
4477 rs6000_isa_flags
&= ~OPTION_MASK_P8_VECTOR
;
4480 if (TARGET_P8_VECTOR
&& !TARGET_VSX
)
4482 if ((rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
4483 && (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
))
4484 error ("-mpower8-vector requires -mvsx");
4485 else if ((rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
) == 0)
4487 rs6000_isa_flags
&= ~OPTION_MASK_P8_VECTOR
;
4488 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
4489 rs6000_isa_flags_explicit
|= OPTION_MASK_P8_VECTOR
;
4493 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
4495 rs6000_isa_flags
|= OPTION_MASK_VSX
;
4496 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
4500 if (TARGET_VSX_TIMODE
&& !TARGET_VSX
)
4502 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX_TIMODE
)
4503 error ("-mvsx-timode requires -mvsx");
4504 rs6000_isa_flags
&= ~OPTION_MASK_VSX_TIMODE
;
4507 if (TARGET_DFP
&& !TARGET_HARD_FLOAT
)
4509 if (rs6000_isa_flags_explicit
& OPTION_MASK_DFP
)
4510 error ("-mhard-dfp requires -mhard-float");
4511 rs6000_isa_flags
&= ~OPTION_MASK_DFP
;
4514 /* Allow an explicit -mupper-regs to set -mupper-regs-df, -mupper-regs-di,
4515 and -mupper-regs-sf, depending on the cpu, unless the user explicitly also
4516 set the individual option. */
4517 if (TARGET_UPPER_REGS
> 0)
4520 && !(rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_DF
))
4522 rs6000_isa_flags
|= OPTION_MASK_UPPER_REGS_DF
;
4523 rs6000_isa_flags_explicit
|= OPTION_MASK_UPPER_REGS_DF
;
4526 && !(rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_DI
))
4528 rs6000_isa_flags
|= OPTION_MASK_UPPER_REGS_DI
;
4529 rs6000_isa_flags_explicit
|= OPTION_MASK_UPPER_REGS_DI
;
4531 if (TARGET_P8_VECTOR
4532 && !(rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_SF
))
4534 rs6000_isa_flags
|= OPTION_MASK_UPPER_REGS_SF
;
4535 rs6000_isa_flags_explicit
|= OPTION_MASK_UPPER_REGS_SF
;
4538 else if (TARGET_UPPER_REGS
== 0)
4541 && !(rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_DF
))
4543 rs6000_isa_flags
&= ~OPTION_MASK_UPPER_REGS_DF
;
4544 rs6000_isa_flags_explicit
|= OPTION_MASK_UPPER_REGS_DF
;
4547 && !(rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_DI
))
4549 rs6000_isa_flags
&= ~OPTION_MASK_UPPER_REGS_DI
;
4550 rs6000_isa_flags_explicit
|= OPTION_MASK_UPPER_REGS_DI
;
4552 if (TARGET_P8_VECTOR
4553 && !(rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_SF
))
4555 rs6000_isa_flags
&= ~OPTION_MASK_UPPER_REGS_SF
;
4556 rs6000_isa_flags_explicit
|= OPTION_MASK_UPPER_REGS_SF
;
4560 if (TARGET_UPPER_REGS_DF
&& !TARGET_VSX
)
4562 if (rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_DF
)
4563 error ("-mupper-regs-df requires -mvsx");
4564 rs6000_isa_flags
&= ~OPTION_MASK_UPPER_REGS_DF
;
4567 if (TARGET_UPPER_REGS_DI
&& !TARGET_VSX
)
4569 if (rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_DI
)
4570 error ("-mupper-regs-di requires -mvsx");
4571 rs6000_isa_flags
&= ~OPTION_MASK_UPPER_REGS_DI
;
4574 if (TARGET_UPPER_REGS_SF
&& !TARGET_P8_VECTOR
)
4576 if (rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_SF
)
4577 error ("-mupper-regs-sf requires -mpower8-vector");
4578 rs6000_isa_flags
&= ~OPTION_MASK_UPPER_REGS_SF
;
4581 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
4582 silently turn off quad memory mode. */
4583 if ((TARGET_QUAD_MEMORY
|| TARGET_QUAD_MEMORY_ATOMIC
) && !TARGET_POWERPC64
)
4585 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY
) != 0)
4586 warning (0, N_("-mquad-memory requires 64-bit mode"));
4588 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY_ATOMIC
) != 0)
4589 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
4591 rs6000_isa_flags
&= ~(OPTION_MASK_QUAD_MEMORY
4592 | OPTION_MASK_QUAD_MEMORY_ATOMIC
);
4595 /* Non-atomic quad memory load/store are disabled for little endian, since
4596 the words are reversed, but atomic operations can still be done by
4597 swapping the words. */
4598 if (TARGET_QUAD_MEMORY
&& !WORDS_BIG_ENDIAN
)
4600 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY
) != 0)
4601 warning (0, N_("-mquad-memory is not available in little endian mode"));
4603 rs6000_isa_flags
&= ~OPTION_MASK_QUAD_MEMORY
;
4606 /* Assume if the user asked for normal quad memory instructions, they want
4607 the atomic versions as well, unless they explicity told us not to use quad
4608 word atomic instructions. */
4609 if (TARGET_QUAD_MEMORY
4610 && !TARGET_QUAD_MEMORY_ATOMIC
4611 && ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY_ATOMIC
) == 0))
4612 rs6000_isa_flags
|= OPTION_MASK_QUAD_MEMORY_ATOMIC
;
4614 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4615 generating power8 instructions. */
4616 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION
))
4617 rs6000_isa_flags
|= (processor_target_table
[tune_index
].target_enable
4618 & OPTION_MASK_P8_FUSION
);
4620 /* Setting additional fusion flags turns on base fusion. */
4621 if (!TARGET_P8_FUSION
&& (TARGET_P8_FUSION_SIGN
|| TARGET_TOC_FUSION
))
4623 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION
)
4625 if (TARGET_P8_FUSION_SIGN
)
4626 error ("-mpower8-fusion-sign requires -mpower8-fusion");
4628 if (TARGET_TOC_FUSION
)
4629 error ("-mtoc-fusion requires -mpower8-fusion");
4631 rs6000_isa_flags
&= ~OPTION_MASK_P8_FUSION
;
4634 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION
;
4637 /* Power9 fusion is a superset over power8 fusion. */
4638 if (TARGET_P9_FUSION
&& !TARGET_P8_FUSION
)
4640 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION
)
4642 /* We prefer to not mention undocumented options in
4643 error messages. However, if users have managed to select
4644 power9-fusion without selecting power8-fusion, they
4645 already know about undocumented flags. */
4646 error ("-mpower9-fusion requires -mpower8-fusion");
4647 rs6000_isa_flags
&= ~OPTION_MASK_P9_FUSION
;
4650 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION
;
4653 /* Enable power9 fusion if we are tuning for power9, even if we aren't
4654 generating power9 instructions. */
4655 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P9_FUSION
))
4656 rs6000_isa_flags
|= (processor_target_table
[tune_index
].target_enable
4657 & OPTION_MASK_P9_FUSION
);
4659 /* Power8 does not fuse sign extended loads with the addis. If we are
4660 optimizing at high levels for speed, convert a sign extended load into a
4661 zero extending load, and an explicit sign extension. */
4662 if (TARGET_P8_FUSION
4663 && !(rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION_SIGN
)
4664 && optimize_function_for_speed_p (cfun
)
4666 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION_SIGN
;
4668 /* TOC fusion requires 64-bit and medium/large code model. */
4669 if (TARGET_TOC_FUSION
&& !TARGET_POWERPC64
)
4671 rs6000_isa_flags
&= ~OPTION_MASK_TOC_FUSION
;
4672 if ((rs6000_isa_flags_explicit
& OPTION_MASK_TOC_FUSION
) != 0)
4673 warning (0, N_("-mtoc-fusion requires 64-bit"));
4676 if (TARGET_TOC_FUSION
&& (TARGET_CMODEL
== CMODEL_SMALL
))
4678 rs6000_isa_flags
&= ~OPTION_MASK_TOC_FUSION
;
4679 if ((rs6000_isa_flags_explicit
& OPTION_MASK_TOC_FUSION
) != 0)
4680 warning (0, N_("-mtoc-fusion requires medium/large code model"));
4683 /* Turn on -mtoc-fusion by default if p8-fusion and 64-bit medium/large code
4685 if (TARGET_P8_FUSION
&& !TARGET_TOC_FUSION
&& TARGET_POWERPC64
4686 && (TARGET_CMODEL
!= CMODEL_SMALL
)
4687 && !(rs6000_isa_flags_explicit
& OPTION_MASK_TOC_FUSION
))
4688 rs6000_isa_flags
|= OPTION_MASK_TOC_FUSION
;
4690 /* ISA 3.0 vector instructions include ISA 2.07. */
4691 if (TARGET_P9_VECTOR
&& !TARGET_P8_VECTOR
)
4693 /* We prefer to not mention undocumented options in
4694 error messages. However, if users have managed to select
4695 power9-vector without selecting power8-vector, they
4696 already know about undocumented flags. */
4697 if ((rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
) &&
4698 (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
))
4699 error ("-mpower9-vector requires -mpower8-vector");
4700 else if ((rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
) == 0)
4702 rs6000_isa_flags
&= ~OPTION_MASK_P9_VECTOR
;
4703 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
4704 rs6000_isa_flags_explicit
|= OPTION_MASK_P9_VECTOR
;
4708 /* OPTION_MASK_P9_VECTOR is explicit and
4709 OPTION_MASK_P8_VECTOR is not explicit. */
4710 rs6000_isa_flags
|= OPTION_MASK_P8_VECTOR
;
4711 rs6000_isa_flags_explicit
|= OPTION_MASK_P8_VECTOR
;
4715 /* -mpower9-dform turns on both -mpower9-dform-scalar and
4716 -mpower9-dform-vector. */
4717 if (TARGET_P9_DFORM_BOTH
> 0)
4719 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P9_DFORM_VECTOR
))
4720 rs6000_isa_flags
|= OPTION_MASK_P9_DFORM_VECTOR
;
4722 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P9_DFORM_SCALAR
))
4723 rs6000_isa_flags
|= OPTION_MASK_P9_DFORM_SCALAR
;
4725 else if (TARGET_P9_DFORM_BOTH
== 0)
4727 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P9_DFORM_VECTOR
))
4728 rs6000_isa_flags
&= ~OPTION_MASK_P9_DFORM_VECTOR
;
4730 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P9_DFORM_SCALAR
))
4731 rs6000_isa_flags
&= ~OPTION_MASK_P9_DFORM_SCALAR
;
4734 /* ISA 3.0 D-form instructions require p9-vector and upper-regs. */
4735 if ((TARGET_P9_DFORM_SCALAR
|| TARGET_P9_DFORM_VECTOR
) && !TARGET_P9_VECTOR
)
4737 /* We prefer to not mention undocumented options in
4738 error messages. However, if users have managed to select
4739 power9-dform without selecting power9-vector, they
4740 already know about undocumented flags. */
4741 if ((rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
)
4742 && (rs6000_isa_flags_explicit
& (OPTION_MASK_P9_DFORM_SCALAR
4743 | OPTION_MASK_P9_DFORM_VECTOR
)))
4744 error ("-mpower9-dform requires -mpower9-vector");
4745 else if (rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
)
4748 ~(OPTION_MASK_P9_DFORM_SCALAR
| OPTION_MASK_P9_DFORM_VECTOR
);
4749 rs6000_isa_flags_explicit
|=
4750 (OPTION_MASK_P9_DFORM_SCALAR
| OPTION_MASK_P9_DFORM_VECTOR
);
4754 /* We know that OPTION_MASK_P9_VECTOR is not explicit and
4755 OPTION_MASK_P9_DFORM_SCALAR or OPTION_MASK_P9_DORM_VECTOR
4757 rs6000_isa_flags
|= OPTION_MASK_P9_VECTOR
;
4758 rs6000_isa_flags_explicit
|= OPTION_MASK_P9_VECTOR
;
4762 if ((TARGET_P9_DFORM_SCALAR
|| TARGET_P9_DFORM_VECTOR
)
4763 && !TARGET_DIRECT_MOVE
)
4765 /* We prefer to not mention undocumented options in
4766 error messages. However, if users have managed to select
4767 power9-dform without selecting direct-move, they
4768 already know about undocumented flags. */
4769 if ((rs6000_isa_flags_explicit
& OPTION_MASK_DIRECT_MOVE
)
4770 && ((rs6000_isa_flags_explicit
& OPTION_MASK_P9_DFORM_VECTOR
) ||
4771 (rs6000_isa_flags_explicit
& OPTION_MASK_P9_DFORM_SCALAR
) ||
4772 (TARGET_P9_DFORM_BOTH
== 1)))
4773 error ("-mpower9-dform, -mpower9-dform-vector, -mpower9-dform-scalar"
4774 " require -mdirect-move");
4775 else if ((rs6000_isa_flags_explicit
& OPTION_MASK_DIRECT_MOVE
) == 0)
4777 rs6000_isa_flags
|= OPTION_MASK_DIRECT_MOVE
;
4778 rs6000_isa_flags_explicit
|= OPTION_MASK_DIRECT_MOVE
;
4783 ~(OPTION_MASK_P9_DFORM_SCALAR
| OPTION_MASK_P9_DFORM_VECTOR
);
4784 rs6000_isa_flags_explicit
|=
4785 (OPTION_MASK_P9_DFORM_SCALAR
| OPTION_MASK_P9_DFORM_VECTOR
);
4789 if (TARGET_P9_DFORM_SCALAR
&& !TARGET_UPPER_REGS_DF
)
4791 /* We prefer to not mention undocumented options in
4792 error messages. However, if users have managed to select
4793 power9-dform without selecting upper-regs-df, they
4794 already know about undocumented flags. */
4795 if (rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_DF
)
4796 error ("-mpower9-dform requires -mupper-regs-df");
4797 rs6000_isa_flags
&= ~OPTION_MASK_P9_DFORM_SCALAR
;
4800 if (TARGET_P9_DFORM_SCALAR
&& !TARGET_UPPER_REGS_SF
)
4802 if (rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_SF
)
4803 error ("-mpower9-dform requires -mupper-regs-sf");
4804 rs6000_isa_flags
&= ~OPTION_MASK_P9_DFORM_SCALAR
;
4807 /* Enable LRA by default. */
4808 if ((rs6000_isa_flags_explicit
& OPTION_MASK_LRA
) == 0)
4809 rs6000_isa_flags
|= OPTION_MASK_LRA
;
4811 /* There have been bugs with -mvsx-timode that don't show up with -mlra,
4812 but do show up with -mno-lra. Given -mlra will become the default once
4813 PR 69847 is fixed, turn off the options with problems by default if
4814 -mno-lra was used, and warn if the user explicitly asked for the option.
4816 Enable -mpower9-dform-vector by default if LRA and other power9 options.
4817 Enable -mvsx-timode by default if LRA and VSX. */
4820 if (TARGET_VSX_TIMODE
)
4822 if ((rs6000_isa_flags_explicit
& OPTION_MASK_VSX_TIMODE
) != 0)
4823 warning (0, "-mvsx-timode might need -mlra");
4826 rs6000_isa_flags
&= ~OPTION_MASK_VSX_TIMODE
;
4832 if (TARGET_VSX
&& !TARGET_VSX_TIMODE
4833 && (rs6000_isa_flags_explicit
& OPTION_MASK_VSX_TIMODE
) == 0)
4834 rs6000_isa_flags
|= OPTION_MASK_VSX_TIMODE
;
4837 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4838 support. If we only have ISA 2.06 support, and the user did not specify
4839 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4840 but we don't enable the full vectorization support */
4841 if (TARGET_ALLOW_MOVMISALIGN
== -1 && TARGET_P8_VECTOR
&& TARGET_DIRECT_MOVE
)
4842 TARGET_ALLOW_MOVMISALIGN
= 1;
4844 else if (TARGET_ALLOW_MOVMISALIGN
&& !TARGET_VSX
)
4846 if (TARGET_ALLOW_MOVMISALIGN
> 0
4847 && global_options_set
.x_TARGET_ALLOW_MOVMISALIGN
)
4848 error ("-mallow-movmisalign requires -mvsx");
4850 TARGET_ALLOW_MOVMISALIGN
= 0;
4853 /* Determine when unaligned vector accesses are permitted, and when
4854 they are preferred over masked Altivec loads. Note that if
4855 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4856 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4858 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
4862 if (rs6000_isa_flags_explicit
& OPTION_MASK_EFFICIENT_UNALIGNED_VSX
)
4863 error ("-mefficient-unaligned-vsx requires -mvsx");
4865 rs6000_isa_flags
&= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX
;
4868 else if (!TARGET_ALLOW_MOVMISALIGN
)
4870 if (rs6000_isa_flags_explicit
& OPTION_MASK_EFFICIENT_UNALIGNED_VSX
)
4871 error ("-mefficient-unaligned-vsx requires -mallow-movmisalign");
4873 rs6000_isa_flags
&= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX
;
4877 /* Check whether we should allow small integers into VSX registers. We
4878 require direct move to prevent the register allocator from having to move
4879 variables through memory to do moves. SImode can be used on ISA 2.07,
4880 while HImode and QImode require ISA 3.0. */
4881 if (TARGET_VSX_SMALL_INTEGER
4882 && (!TARGET_DIRECT_MOVE
|| !TARGET_P8_VECTOR
|| !TARGET_UPPER_REGS_DI
))
4884 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX_SMALL_INTEGER
)
4885 error ("-mvsx-small-integer requires -mpower8-vector, "
4886 "-mupper-regs-di, and -mdirect-move");
4888 rs6000_isa_flags
&= ~OPTION_MASK_VSX_SMALL_INTEGER
;
4891 /* Set long double size before the IEEE 128-bit tests. */
4892 if (!global_options_set
.x_rs6000_long_double_type_size
)
4894 if (main_target_opt
!= NULL
4895 && (main_target_opt
->x_rs6000_long_double_type_size
4896 != RS6000_DEFAULT_LONG_DOUBLE_SIZE
))
4897 error ("target attribute or pragma changes long double size");
4899 rs6000_long_double_type_size
= RS6000_DEFAULT_LONG_DOUBLE_SIZE
;
4902 /* Set -mabi=ieeelongdouble on some old targets. Note, AIX and Darwin
4903 explicitly redefine TARGET_IEEEQUAD to 0, so those systems will not
4904 pick up this default. */
4905 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
4906 if (!global_options_set
.x_rs6000_ieeequad
)
4907 rs6000_ieeequad
= 1;
4910 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4911 sytems, but don't enable the __float128 keyword. */
4912 if (TARGET_VSX
&& TARGET_LONG_DOUBLE_128
4913 && (TARGET_FLOAT128_ENABLE_TYPE
|| TARGET_IEEEQUAD
)
4914 && ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_TYPE
) == 0))
4915 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_TYPE
;
4917 /* IEEE 128-bit floating point requires VSX support. */
4920 if (TARGET_FLOAT128_KEYWORD
)
4922 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_KEYWORD
) != 0)
4923 error ("-mfloat128 requires VSX support");
4925 rs6000_isa_flags
&= ~(OPTION_MASK_FLOAT128_TYPE
4926 | OPTION_MASK_FLOAT128_KEYWORD
4927 | OPTION_MASK_FLOAT128_HW
);
4930 else if (TARGET_FLOAT128_TYPE
)
4932 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_TYPE
) != 0)
4933 error ("-mfloat128-type requires VSX support");
4935 rs6000_isa_flags
&= ~(OPTION_MASK_FLOAT128_TYPE
4936 | OPTION_MASK_FLOAT128_KEYWORD
4937 | OPTION_MASK_FLOAT128_HW
);
4941 /* -mfloat128 and -mfloat128-hardware internally require the underlying IEEE
4942 128-bit floating point support to be enabled. */
4943 if (!TARGET_FLOAT128_TYPE
)
4945 if (TARGET_FLOAT128_KEYWORD
)
4947 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_KEYWORD
) != 0)
4949 error ("-mfloat128 requires -mfloat128-type");
4950 rs6000_isa_flags
&= ~(OPTION_MASK_FLOAT128_TYPE
4951 | OPTION_MASK_FLOAT128_KEYWORD
4952 | OPTION_MASK_FLOAT128_HW
);
4955 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_TYPE
;
4958 if (TARGET_FLOAT128_HW
)
4960 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0)
4962 error ("-mfloat128-hardware requires -mfloat128-type");
4963 rs6000_isa_flags
&= ~OPTION_MASK_FLOAT128_HW
;
4966 rs6000_isa_flags
&= ~(OPTION_MASK_FLOAT128_TYPE
4967 | OPTION_MASK_FLOAT128_KEYWORD
4968 | OPTION_MASK_FLOAT128_HW
);
4972 /* If we have -mfloat128-type and full ISA 3.0 support, enable
4973 -mfloat128-hardware by default. However, don't enable the __float128
4974 keyword. If the user explicitly turned on -mfloat128-hardware, enable the
4975 -mfloat128 option as well if it was not already set. */
4976 if (TARGET_FLOAT128_TYPE
&& !TARGET_FLOAT128_HW
4977 && (rs6000_isa_flags
& ISA_3_0_MASKS_IEEE
) == ISA_3_0_MASKS_IEEE
4978 && !(rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
))
4979 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_HW
;
4981 if (TARGET_FLOAT128_HW
4982 && (rs6000_isa_flags
& ISA_3_0_MASKS_IEEE
) != ISA_3_0_MASKS_IEEE
)
4984 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0)
4985 error ("-mfloat128-hardware requires full ISA 3.0 support");
4987 rs6000_isa_flags
&= ~OPTION_MASK_FLOAT128_HW
;
4990 if (TARGET_FLOAT128_HW
&& !TARGET_64BIT
)
4992 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0)
4993 error ("-mfloat128-hardware requires -m64");
4995 rs6000_isa_flags
&= ~OPTION_MASK_FLOAT128_HW
;
4998 if (TARGET_FLOAT128_HW
&& !TARGET_FLOAT128_KEYWORD
4999 && (rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0
5000 && (rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_KEYWORD
) == 0)
5001 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_KEYWORD
;
5003 /* Print the options after updating the defaults. */
5004 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
5005 rs6000_print_isa_options (stderr
, 0, "after defaults", rs6000_isa_flags
);
5007 /* E500mc does "better" if we inline more aggressively. Respect the
5008 user's opinion, though. */
5009 if (rs6000_block_move_inline_limit
== 0
5010 && (rs6000_cpu
== PROCESSOR_PPCE500MC
5011 || rs6000_cpu
== PROCESSOR_PPCE500MC64
5012 || rs6000_cpu
== PROCESSOR_PPCE5500
5013 || rs6000_cpu
== PROCESSOR_PPCE6500
))
5014 rs6000_block_move_inline_limit
= 128;
5016 /* store_one_arg depends on expand_block_move to handle at least the
5017 size of reg_parm_stack_space. */
5018 if (rs6000_block_move_inline_limit
< (TARGET_POWERPC64
? 64 : 32))
5019 rs6000_block_move_inline_limit
= (TARGET_POWERPC64
? 64 : 32);
5023 /* If the appropriate debug option is enabled, replace the target hooks
5024 with debug versions that call the real version and then prints
5025 debugging information. */
5026 if (TARGET_DEBUG_COST
)
5028 targetm
.rtx_costs
= rs6000_debug_rtx_costs
;
5029 targetm
.address_cost
= rs6000_debug_address_cost
;
5030 targetm
.sched
.adjust_cost
= rs6000_debug_adjust_cost
;
5033 if (TARGET_DEBUG_ADDR
)
5035 targetm
.legitimate_address_p
= rs6000_debug_legitimate_address_p
;
5036 targetm
.legitimize_address
= rs6000_debug_legitimize_address
;
5037 rs6000_secondary_reload_class_ptr
5038 = rs6000_debug_secondary_reload_class
;
5039 rs6000_secondary_memory_needed_ptr
5040 = rs6000_debug_secondary_memory_needed
;
5041 rs6000_cannot_change_mode_class_ptr
5042 = rs6000_debug_cannot_change_mode_class
;
5043 rs6000_preferred_reload_class_ptr
5044 = rs6000_debug_preferred_reload_class
;
5045 rs6000_legitimize_reload_address_ptr
5046 = rs6000_debug_legitimize_reload_address
;
5047 rs6000_mode_dependent_address_ptr
5048 = rs6000_debug_mode_dependent_address
;
5051 if (rs6000_veclibabi_name
)
5053 if (strcmp (rs6000_veclibabi_name
, "mass") == 0)
5054 rs6000_veclib_handler
= rs6000_builtin_vectorized_libmass
;
5057 error ("unknown vectorization library ABI type (%s) for "
5058 "-mveclibabi= switch", rs6000_veclibabi_name
);
5064 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
5065 target attribute or pragma which automatically enables both options,
5066 unless the altivec ABI was set. This is set by default for 64-bit, but
5068 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_altivec_abi
)
5069 rs6000_isa_flags
&= ~((OPTION_MASK_VSX
| OPTION_MASK_ALTIVEC
5070 | OPTION_MASK_FLOAT128_TYPE
5071 | OPTION_MASK_FLOAT128_KEYWORD
)
5072 & ~rs6000_isa_flags_explicit
);
5074 /* Enable Altivec ABI for AIX -maltivec. */
5075 if (TARGET_XCOFF
&& (TARGET_ALTIVEC
|| TARGET_VSX
))
5077 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_altivec_abi
)
5078 error ("target attribute or pragma changes AltiVec ABI");
5080 rs6000_altivec_abi
= 1;
5083 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
5084 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
5085 be explicitly overridden in either case. */
5088 if (!global_options_set
.x_rs6000_altivec_abi
5089 && (TARGET_64BIT
|| TARGET_ALTIVEC
|| TARGET_VSX
))
5091 if (main_target_opt
!= NULL
&&
5092 !main_target_opt
->x_rs6000_altivec_abi
)
5093 error ("target attribute or pragma changes AltiVec ABI");
5095 rs6000_altivec_abi
= 1;
5099 /* Set the Darwin64 ABI as default for 64-bit Darwin.
5100 So far, the only darwin64 targets are also MACH-O. */
5102 && DEFAULT_ABI
== ABI_DARWIN
5105 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_darwin64_abi
)
5106 error ("target attribute or pragma changes darwin64 ABI");
5109 rs6000_darwin64_abi
= 1;
5110 /* Default to natural alignment, for better performance. */
5111 rs6000_alignment_flags
= MASK_ALIGN_NATURAL
;
5115 /* Place FP constants in the constant pool instead of TOC
5116 if section anchors enabled. */
5117 if (flag_section_anchors
5118 && !global_options_set
.x_TARGET_NO_FP_IN_TOC
)
5119 TARGET_NO_FP_IN_TOC
= 1;
5121 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
5122 rs6000_print_isa_options (stderr
, 0, "before subtarget", rs6000_isa_flags
);
5124 #ifdef SUBTARGET_OVERRIDE_OPTIONS
5125 SUBTARGET_OVERRIDE_OPTIONS
;
5127 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
5128 SUBSUBTARGET_OVERRIDE_OPTIONS
;
5130 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
5131 SUB3TARGET_OVERRIDE_OPTIONS
;
5134 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
5135 rs6000_print_isa_options (stderr
, 0, "after subtarget", rs6000_isa_flags
);
5137 /* For the E500 family of cores, reset the single/double FP flags to let us
5138 check that they remain constant across attributes or pragmas. Also,
5139 clear a possible request for string instructions, not supported and which
5140 we might have silently queried above for -Os.
5142 For other families, clear ISEL in case it was set implicitly.
5147 case PROCESSOR_PPC8540
:
5148 case PROCESSOR_PPC8548
:
5149 case PROCESSOR_PPCE500MC
:
5150 case PROCESSOR_PPCE500MC64
:
5151 case PROCESSOR_PPCE5500
:
5152 case PROCESSOR_PPCE6500
:
5154 rs6000_single_float
= TARGET_E500_SINGLE
|| TARGET_E500_DOUBLE
;
5155 rs6000_double_float
= TARGET_E500_DOUBLE
;
5157 rs6000_isa_flags
&= ~OPTION_MASK_STRING
;
5163 if (have_cpu
&& !(rs6000_isa_flags_explicit
& OPTION_MASK_ISEL
))
5164 rs6000_isa_flags
&= ~OPTION_MASK_ISEL
;
5169 if (main_target_opt
)
5171 if (main_target_opt
->x_rs6000_single_float
!= rs6000_single_float
)
5172 error ("target attribute or pragma changes single precision floating "
5174 if (main_target_opt
->x_rs6000_double_float
!= rs6000_double_float
)
5175 error ("target attribute or pragma changes double precision floating "
5179 /* Detect invalid option combinations with E500. */
5182 rs6000_always_hint
= (rs6000_cpu
!= PROCESSOR_POWER4
5183 && rs6000_cpu
!= PROCESSOR_POWER5
5184 && rs6000_cpu
!= PROCESSOR_POWER6
5185 && rs6000_cpu
!= PROCESSOR_POWER7
5186 && rs6000_cpu
!= PROCESSOR_POWER8
5187 && rs6000_cpu
!= PROCESSOR_POWER9
5188 && rs6000_cpu
!= PROCESSOR_PPCA2
5189 && rs6000_cpu
!= PROCESSOR_CELL
5190 && rs6000_cpu
!= PROCESSOR_PPC476
);
5191 rs6000_sched_groups
= (rs6000_cpu
== PROCESSOR_POWER4
5192 || rs6000_cpu
== PROCESSOR_POWER5
5193 || rs6000_cpu
== PROCESSOR_POWER7
5194 || rs6000_cpu
== PROCESSOR_POWER8
);
5195 rs6000_align_branch_targets
= (rs6000_cpu
== PROCESSOR_POWER4
5196 || rs6000_cpu
== PROCESSOR_POWER5
5197 || rs6000_cpu
== PROCESSOR_POWER6
5198 || rs6000_cpu
== PROCESSOR_POWER7
5199 || rs6000_cpu
== PROCESSOR_POWER8
5200 || rs6000_cpu
== PROCESSOR_POWER9
5201 || rs6000_cpu
== PROCESSOR_PPCE500MC
5202 || rs6000_cpu
== PROCESSOR_PPCE500MC64
5203 || rs6000_cpu
== PROCESSOR_PPCE5500
5204 || rs6000_cpu
== PROCESSOR_PPCE6500
);
5206 /* Allow debug switches to override the above settings. These are set to -1
5207 in powerpcspe.opt to indicate the user hasn't directly set the switch. */
5208 if (TARGET_ALWAYS_HINT
>= 0)
5209 rs6000_always_hint
= TARGET_ALWAYS_HINT
;
5211 if (TARGET_SCHED_GROUPS
>= 0)
5212 rs6000_sched_groups
= TARGET_SCHED_GROUPS
;
5214 if (TARGET_ALIGN_BRANCH_TARGETS
>= 0)
5215 rs6000_align_branch_targets
= TARGET_ALIGN_BRANCH_TARGETS
;
5217 rs6000_sched_restricted_insns_priority
5218 = (rs6000_sched_groups
? 1 : 0);
5220 /* Handle -msched-costly-dep option. */
5221 rs6000_sched_costly_dep
5222 = (rs6000_sched_groups
? true_store_to_load_dep_costly
: no_dep_costly
);
5224 if (rs6000_sched_costly_dep_str
)
5226 if (! strcmp (rs6000_sched_costly_dep_str
, "no"))
5227 rs6000_sched_costly_dep
= no_dep_costly
;
5228 else if (! strcmp (rs6000_sched_costly_dep_str
, "all"))
5229 rs6000_sched_costly_dep
= all_deps_costly
;
5230 else if (! strcmp (rs6000_sched_costly_dep_str
, "true_store_to_load"))
5231 rs6000_sched_costly_dep
= true_store_to_load_dep_costly
;
5232 else if (! strcmp (rs6000_sched_costly_dep_str
, "store_to_load"))
5233 rs6000_sched_costly_dep
= store_to_load_dep_costly
;
5235 rs6000_sched_costly_dep
= ((enum rs6000_dependence_cost
)
5236 atoi (rs6000_sched_costly_dep_str
));
5239 /* Handle -minsert-sched-nops option. */
5240 rs6000_sched_insert_nops
5241 = (rs6000_sched_groups
? sched_finish_regroup_exact
: sched_finish_none
);
5243 if (rs6000_sched_insert_nops_str
)
5245 if (! strcmp (rs6000_sched_insert_nops_str
, "no"))
5246 rs6000_sched_insert_nops
= sched_finish_none
;
5247 else if (! strcmp (rs6000_sched_insert_nops_str
, "pad"))
5248 rs6000_sched_insert_nops
= sched_finish_pad_groups
;
5249 else if (! strcmp (rs6000_sched_insert_nops_str
, "regroup_exact"))
5250 rs6000_sched_insert_nops
= sched_finish_regroup_exact
;
5252 rs6000_sched_insert_nops
= ((enum rs6000_nop_insertion
)
5253 atoi (rs6000_sched_insert_nops_str
));
5256 /* Handle stack protector */
5257 if (!global_options_set
.x_rs6000_stack_protector_guard
)
5258 #ifdef TARGET_THREAD_SSP_OFFSET
5259 rs6000_stack_protector_guard
= SSP_TLS
;
5261 rs6000_stack_protector_guard
= SSP_GLOBAL
;
5264 #ifdef TARGET_THREAD_SSP_OFFSET
5265 rs6000_stack_protector_guard_offset
= TARGET_THREAD_SSP_OFFSET
;
5266 rs6000_stack_protector_guard_reg
= TARGET_64BIT
? 13 : 2;
5269 if (global_options_set
.x_rs6000_stack_protector_guard_offset_str
)
5272 const char *str
= rs6000_stack_protector_guard_offset_str
;
5275 long offset
= strtol (str
, &endp
, 0);
5276 if (!*str
|| *endp
|| errno
)
5277 error ("%qs is not a valid number "
5278 "in -mstack-protector-guard-offset=", str
);
5280 if (!IN_RANGE (offset
, -0x8000, 0x7fff)
5281 || (TARGET_64BIT
&& (offset
& 3)))
5282 error ("%qs is not a valid offset "
5283 "in -mstack-protector-guard-offset=", str
);
5285 rs6000_stack_protector_guard_offset
= offset
;
5288 if (global_options_set
.x_rs6000_stack_protector_guard_reg_str
)
5290 const char *str
= rs6000_stack_protector_guard_reg_str
;
5291 int reg
= decode_reg_name (str
);
5293 if (!IN_RANGE (reg
, 1, 31))
5294 error ("%qs is not a valid base register "
5295 "in -mstack-protector-guard-reg=", str
);
5297 rs6000_stack_protector_guard_reg
= reg
;
5300 if (rs6000_stack_protector_guard
== SSP_TLS
5301 && !IN_RANGE (rs6000_stack_protector_guard_reg
, 1, 31))
5302 error ("-mstack-protector-guard=tls needs a valid base register");
5306 #ifdef TARGET_REGNAMES
5307 /* If the user desires alternate register names, copy in the
5308 alternate names now. */
5309 if (TARGET_REGNAMES
)
5310 memcpy (rs6000_reg_names
, alt_reg_names
, sizeof (rs6000_reg_names
));
5313 /* Set aix_struct_return last, after the ABI is determined.
5314 If -maix-struct-return or -msvr4-struct-return was explicitly
5315 used, don't override with the ABI default. */
5316 if (!global_options_set
.x_aix_struct_return
)
5317 aix_struct_return
= (DEFAULT_ABI
!= ABI_V4
|| DRAFT_V4_STRUCT_RET
);
5320 /* IBM XL compiler defaults to unsigned bitfields. */
5321 if (TARGET_XL_COMPAT
)
5322 flag_signed_bitfields
= 0;
5325 if (TARGET_LONG_DOUBLE_128
&& !TARGET_IEEEQUAD
)
5326 REAL_MODE_FORMAT (TFmode
) = &ibm_extended_format
;
5328 ASM_GENERATE_INTERNAL_LABEL (toc_label_name
, "LCTOC", 1);
5330 /* We can only guarantee the availability of DI pseudo-ops when
5331 assembling for 64-bit targets. */
5334 targetm
.asm_out
.aligned_op
.di
= NULL
;
5335 targetm
.asm_out
.unaligned_op
.di
= NULL
;
5339 /* Set branch target alignment, if not optimizing for size. */
5342 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
5343 aligned 8byte to avoid misprediction by the branch predictor. */
5344 if (rs6000_cpu
== PROCESSOR_TITAN
5345 || rs6000_cpu
== PROCESSOR_CELL
)
5347 if (align_functions
<= 0)
5348 align_functions
= 8;
5349 if (align_jumps
<= 0)
5351 if (align_loops
<= 0)
5354 if (rs6000_align_branch_targets
)
5356 if (align_functions
<= 0)
5357 align_functions
= 16;
5358 if (align_jumps
<= 0)
5360 if (align_loops
<= 0)
5362 can_override_loop_align
= 1;
5366 if (align_jumps_max_skip
<= 0)
5367 align_jumps_max_skip
= 15;
5368 if (align_loops_max_skip
<= 0)
5369 align_loops_max_skip
= 15;
5372 /* Arrange to save and restore machine status around nested functions. */
5373 init_machine_status
= rs6000_init_machine_status
;
5375 /* We should always be splitting complex arguments, but we can't break
5376 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
5377 if (DEFAULT_ABI
== ABI_V4
|| DEFAULT_ABI
== ABI_DARWIN
)
5378 targetm
.calls
.split_complex_arg
= NULL
;
5380 /* The AIX and ELFv1 ABIs define standard function descriptors. */
5381 if (DEFAULT_ABI
== ABI_AIX
)
5382 targetm
.calls
.custom_function_descriptors
= 0;
5385 /* Initialize rs6000_cost with the appropriate target costs. */
5387 rs6000_cost
= TARGET_POWERPC64
? &size64_cost
: &size32_cost
;
5391 case PROCESSOR_RS64A
:
5392 rs6000_cost
= &rs64a_cost
;
5395 case PROCESSOR_MPCCORE
:
5396 rs6000_cost
= &mpccore_cost
;
5399 case PROCESSOR_PPC403
:
5400 rs6000_cost
= &ppc403_cost
;
5403 case PROCESSOR_PPC405
:
5404 rs6000_cost
= &ppc405_cost
;
5407 case PROCESSOR_PPC440
:
5408 rs6000_cost
= &ppc440_cost
;
5411 case PROCESSOR_PPC476
:
5412 rs6000_cost
= &ppc476_cost
;
5415 case PROCESSOR_PPC601
:
5416 rs6000_cost
= &ppc601_cost
;
5419 case PROCESSOR_PPC603
:
5420 rs6000_cost
= &ppc603_cost
;
5423 case PROCESSOR_PPC604
:
5424 rs6000_cost
= &ppc604_cost
;
5427 case PROCESSOR_PPC604e
:
5428 rs6000_cost
= &ppc604e_cost
;
5431 case PROCESSOR_PPC620
:
5432 rs6000_cost
= &ppc620_cost
;
5435 case PROCESSOR_PPC630
:
5436 rs6000_cost
= &ppc630_cost
;
5439 case PROCESSOR_CELL
:
5440 rs6000_cost
= &ppccell_cost
;
5443 case PROCESSOR_PPC750
:
5444 case PROCESSOR_PPC7400
:
5445 rs6000_cost
= &ppc750_cost
;
5448 case PROCESSOR_PPC7450
:
5449 rs6000_cost
= &ppc7450_cost
;
5452 case PROCESSOR_PPC8540
:
5453 case PROCESSOR_PPC8548
:
5454 rs6000_cost
= &ppc8540_cost
;
5457 case PROCESSOR_PPCE300C2
:
5458 case PROCESSOR_PPCE300C3
:
5459 rs6000_cost
= &ppce300c2c3_cost
;
5462 case PROCESSOR_PPCE500MC
:
5463 rs6000_cost
= &ppce500mc_cost
;
5466 case PROCESSOR_PPCE500MC64
:
5467 rs6000_cost
= &ppce500mc64_cost
;
5470 case PROCESSOR_PPCE5500
:
5471 rs6000_cost
= &ppce5500_cost
;
5474 case PROCESSOR_PPCE6500
:
5475 rs6000_cost
= &ppce6500_cost
;
5478 case PROCESSOR_TITAN
:
5479 rs6000_cost
= &titan_cost
;
5482 case PROCESSOR_POWER4
:
5483 case PROCESSOR_POWER5
:
5484 rs6000_cost
= &power4_cost
;
5487 case PROCESSOR_POWER6
:
5488 rs6000_cost
= &power6_cost
;
5491 case PROCESSOR_POWER7
:
5492 rs6000_cost
= &power7_cost
;
5495 case PROCESSOR_POWER8
:
5496 rs6000_cost
= &power8_cost
;
5499 case PROCESSOR_POWER9
:
5500 rs6000_cost
= &power9_cost
;
5503 case PROCESSOR_PPCA2
:
5504 rs6000_cost
= &ppca2_cost
;
5513 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
5514 rs6000_cost
->simultaneous_prefetches
,
5515 global_options
.x_param_values
,
5516 global_options_set
.x_param_values
);
5517 maybe_set_param_value (PARAM_L1_CACHE_SIZE
, rs6000_cost
->l1_cache_size
,
5518 global_options
.x_param_values
,
5519 global_options_set
.x_param_values
);
5520 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
5521 rs6000_cost
->cache_line_size
,
5522 global_options
.x_param_values
,
5523 global_options_set
.x_param_values
);
5524 maybe_set_param_value (PARAM_L2_CACHE_SIZE
, rs6000_cost
->l2_cache_size
,
5525 global_options
.x_param_values
,
5526 global_options_set
.x_param_values
);
5528 /* Increase loop peeling limits based on performance analysis. */
5529 maybe_set_param_value (PARAM_MAX_PEELED_INSNS
, 400,
5530 global_options
.x_param_values
,
5531 global_options_set
.x_param_values
);
5532 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS
, 400,
5533 global_options
.x_param_values
,
5534 global_options_set
.x_param_values
);
5536 /* Use the 'model' -fsched-pressure algorithm by default. */
5537 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
,
5538 SCHED_PRESSURE_MODEL
,
5539 global_options
.x_param_values
,
5540 global_options_set
.x_param_values
);
5542 /* If using typedef char *va_list, signal that
5543 __builtin_va_start (&ap, 0) can be optimized to
5544 ap = __builtin_next_arg (0). */
5545 if (DEFAULT_ABI
!= ABI_V4
)
5546 targetm
.expand_builtin_va_start
= NULL
;
5549 /* Set up single/double float flags.
5550 If TARGET_HARD_FLOAT is set, but neither single or double is set,
5551 then set both flags. */
5552 if (TARGET_HARD_FLOAT
&& TARGET_FPRS
5553 && rs6000_single_float
== 0 && rs6000_double_float
== 0)
5554 rs6000_single_float
= rs6000_double_float
= 1;
5556 /* If not explicitly specified via option, decide whether to generate indexed
5557 load/store instructions. A value of -1 indicates that the
5558 initial value of this variable has not been overwritten. During
5559 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
5560 if (TARGET_AVOID_XFORM
== -1)
5561 /* Avoid indexed addressing when targeting Power6 in order to avoid the
5562 DERAT mispredict penalty. However the LVE and STVE altivec instructions
5563 need indexed accesses and the type used is the scalar type of the element
5564 being loaded or stored. */
5565 TARGET_AVOID_XFORM
= (rs6000_cpu
== PROCESSOR_POWER6
&& TARGET_CMPB
5566 && !TARGET_ALTIVEC
);
5568 /* Set the -mrecip options. */
5569 if (rs6000_recip_name
)
5571 char *p
= ASTRDUP (rs6000_recip_name
);
5573 unsigned int mask
, i
;
5576 while ((q
= strtok (p
, ",")) != NULL
)
5587 if (!strcmp (q
, "default"))
5588 mask
= ((TARGET_RECIP_PRECISION
)
5589 ? RECIP_HIGH_PRECISION
: RECIP_LOW_PRECISION
);
5592 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
5593 if (!strcmp (q
, recip_options
[i
].string
))
5595 mask
= recip_options
[i
].mask
;
5599 if (i
== ARRAY_SIZE (recip_options
))
5601 error ("unknown option for -mrecip=%s", q
);
5609 rs6000_recip_control
&= ~mask
;
5611 rs6000_recip_control
|= mask
;
5615 /* Set the builtin mask of the various options used that could affect which
5616 builtins were used. In the past we used target_flags, but we've run out
5617 of bits, and some options like SPE and PAIRED are no longer in
5619 rs6000_builtin_mask
= rs6000_builtin_mask_calculate ();
5620 if (TARGET_DEBUG_BUILTIN
|| TARGET_DEBUG_TARGET
)
5621 rs6000_print_builtin_options (stderr
, 0, "builtin mask",
5622 rs6000_builtin_mask
);
5624 /* Initialize all of the registers. */
5625 rs6000_init_hard_regno_mode_ok (global_init_p
);
5627 /* Save the initial options in case the user does function specific options */
5629 target_option_default_node
= target_option_current_node
5630 = build_target_option_node (&global_options
);
5632 /* If not explicitly specified via option, decide whether to generate the
5633 extra blr's required to preserve the link stack on some cpus (eg, 476). */
5634 if (TARGET_LINK_STACK
== -1)
5635 SET_TARGET_LINK_STACK (rs6000_cpu
== PROCESSOR_PPC476
&& flag_pic
);
5640 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
5641 define the target cpu type. */
5644 rs6000_option_override (void)
5646 (void) rs6000_option_override_internal (true);
5650 /* Implement targetm.vectorize.builtin_mask_for_load. */
5652 rs6000_builtin_mask_for_load (void)
5654 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
5655 if ((TARGET_ALTIVEC
&& !TARGET_VSX
)
5656 || (TARGET_VSX
&& !TARGET_EFFICIENT_UNALIGNED_VSX
))
5657 return altivec_builtin_mask_for_load
;
5662 /* Implement LOOP_ALIGN. */
5664 rs6000_loop_align (rtx label
)
5669 /* Don't override loop alignment if -falign-loops was specified. */
5670 if (!can_override_loop_align
)
5671 return align_loops_log
;
5673 bb
= BLOCK_FOR_INSN (label
);
5674 ninsns
= num_loop_insns(bb
->loop_father
);
5676 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
5677 if (ninsns
> 4 && ninsns
<= 8
5678 && (rs6000_cpu
== PROCESSOR_POWER4
5679 || rs6000_cpu
== PROCESSOR_POWER5
5680 || rs6000_cpu
== PROCESSOR_POWER6
5681 || rs6000_cpu
== PROCESSOR_POWER7
5682 || rs6000_cpu
== PROCESSOR_POWER8
5683 || rs6000_cpu
== PROCESSOR_POWER9
))
5686 return align_loops_log
;
5689 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
5691 rs6000_loop_align_max_skip (rtx_insn
*label
)
5693 return (1 << rs6000_loop_align (label
)) - 1;
5696 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5697 after applying N number of iterations. This routine does not determine
5698 how may iterations are required to reach desired alignment. */
5701 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED
, bool is_packed
)
5708 if (rs6000_alignment_flags
== MASK_ALIGN_NATURAL
)
5711 if (rs6000_alignment_flags
== MASK_ALIGN_POWER
)
5721 /* Assuming that all other types are naturally aligned. CHECKME! */
5726 /* Return true if the vector misalignment factor is supported by the
5729 rs6000_builtin_support_vector_misalignment (machine_mode mode
,
5736 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
5739 /* Return if movmisalign pattern is not supported for this mode. */
5740 if (optab_handler (movmisalign_optab
, mode
) == CODE_FOR_nothing
)
5743 if (misalignment
== -1)
5745 /* Misalignment factor is unknown at compile time but we know
5746 it's word aligned. */
5747 if (rs6000_vector_alignment_reachable (type
, is_packed
))
5749 int element_size
= TREE_INT_CST_LOW (TYPE_SIZE (type
));
5751 if (element_size
== 64 || element_size
== 32)
5758 /* VSX supports word-aligned vector. */
5759 if (misalignment
% 4 == 0)
5765 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5767 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
5768 tree vectype
, int misalign
)
5773 switch (type_of_cost
)
5783 case cond_branch_not_taken
:
5792 case vec_promote_demote
:
5798 case cond_branch_taken
:
5801 case unaligned_load
:
5802 if (TARGET_P9_VECTOR
)
5805 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
5808 if (TARGET_VSX
&& TARGET_ALLOW_MOVMISALIGN
)
5810 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
5812 /* Double word aligned. */
5820 /* Double word aligned. */
5824 /* Unknown misalignment. */
5837 /* Misaligned loads are not supported. */
5842 case unaligned_store
:
5843 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
5846 if (TARGET_VSX
&& TARGET_ALLOW_MOVMISALIGN
)
5848 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
5850 /* Double word aligned. */
5858 /* Double word aligned. */
5862 /* Unknown misalignment. */
5875 /* Misaligned stores are not supported. */
5881 /* This is a rough approximation assuming non-constant elements
5882 constructed into a vector via element insertion. FIXME:
5883 vec_construct is not granular enough for uniformly good
5884 decisions. If the initialization is a splat, this is
5885 cheaper than we estimate. Improve this someday. */
5886 elem_type
= TREE_TYPE (vectype
);
5887 /* 32-bit vectors loaded into registers are stored as double
5888 precision, so we need 2 permutes, 2 converts, and 1 merge
5889 to construct a vector of short floats from them. */
5890 if (SCALAR_FLOAT_TYPE_P (elem_type
)
5891 && TYPE_PRECISION (elem_type
) == 32)
5893 /* On POWER9, integer vector types are built up in GPRs and then
5894 use a direct move (2 cycles). For POWER8 this is even worse,
5895 as we need two direct moves and a merge, and the direct moves
5897 else if (INTEGRAL_TYPE_P (elem_type
))
5899 if (TARGET_P9_VECTOR
)
5900 return TYPE_VECTOR_SUBPARTS (vectype
) - 1 + 2;
5902 return TYPE_VECTOR_SUBPARTS (vectype
) - 1 + 11;
5905 /* V2DFmode doesn't need a direct move. */
5913 /* Implement targetm.vectorize.preferred_simd_mode. */
5916 rs6000_preferred_simd_mode (scalar_mode mode
)
5925 if (TARGET_ALTIVEC
|| TARGET_VSX
)
5951 if (TARGET_PAIRED_FLOAT
5957 typedef struct _rs6000_cost_data
5959 struct loop
*loop_info
;
5963 /* Test for likely overcommitment of vector hardware resources. If a
5964 loop iteration is relatively large, and too large a percentage of
5965 instructions in the loop are vectorized, the cost model may not
5966 adequately reflect delays from unavailable vector resources.
5967 Penalize the loop body cost for this case. */
5970 rs6000_density_test (rs6000_cost_data
*data
)
5972 const int DENSITY_PCT_THRESHOLD
= 85;
5973 const int DENSITY_SIZE_THRESHOLD
= 70;
5974 const int DENSITY_PENALTY
= 10;
5975 struct loop
*loop
= data
->loop_info
;
5976 basic_block
*bbs
= get_loop_body (loop
);
5977 int nbbs
= loop
->num_nodes
;
5978 int vec_cost
= data
->cost
[vect_body
], not_vec_cost
= 0;
5981 for (i
= 0; i
< nbbs
; i
++)
5983 basic_block bb
= bbs
[i
];
5984 gimple_stmt_iterator gsi
;
5986 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
5988 gimple
*stmt
= gsi_stmt (gsi
);
5989 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5991 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
5992 && !STMT_VINFO_IN_PATTERN_P (stmt_info
))
5998 density_pct
= (vec_cost
* 100) / (vec_cost
+ not_vec_cost
);
6000 if (density_pct
> DENSITY_PCT_THRESHOLD
6001 && vec_cost
+ not_vec_cost
> DENSITY_SIZE_THRESHOLD
)
6003 data
->cost
[vect_body
] = vec_cost
* (100 + DENSITY_PENALTY
) / 100;
6004 if (dump_enabled_p ())
6005 dump_printf_loc (MSG_NOTE
, vect_location
,
6006 "density %d%%, cost %d exceeds threshold, penalizing "
6007 "loop body cost by %d%%", density_pct
,
6008 vec_cost
+ not_vec_cost
, DENSITY_PENALTY
);
6012 /* Implement targetm.vectorize.init_cost. */
6014 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
6015 instruction is needed by the vectorization. */
6016 static bool rs6000_vect_nonmem
;
6019 rs6000_init_cost (struct loop
*loop_info
)
6021 rs6000_cost_data
*data
= XNEW (struct _rs6000_cost_data
);
6022 data
->loop_info
= loop_info
;
6023 data
->cost
[vect_prologue
] = 0;
6024 data
->cost
[vect_body
] = 0;
6025 data
->cost
[vect_epilogue
] = 0;
6026 rs6000_vect_nonmem
= false;
6030 /* Implement targetm.vectorize.add_stmt_cost. */
6033 rs6000_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
6034 struct _stmt_vec_info
*stmt_info
, int misalign
,
6035 enum vect_cost_model_location where
)
6037 rs6000_cost_data
*cost_data
= (rs6000_cost_data
*) data
;
6038 unsigned retval
= 0;
6040 if (flag_vect_cost_model
)
6042 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
6043 int stmt_cost
= rs6000_builtin_vectorization_cost (kind
, vectype
,
6045 /* Statements in an inner loop relative to the loop being
6046 vectorized are weighted more heavily. The value here is
6047 arbitrary and could potentially be improved with analysis. */
6048 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
6049 count
*= 50; /* FIXME. */
6051 retval
= (unsigned) (count
* stmt_cost
);
6052 cost_data
->cost
[where
] += retval
;
6054 /* Check whether we're doing something other than just a copy loop.
6055 Not all such loops may be profitably vectorized; see
6056 rs6000_finish_cost. */
6057 if ((kind
== vec_to_scalar
|| kind
== vec_perm
6058 || kind
== vec_promote_demote
|| kind
== vec_construct
6059 || kind
== scalar_to_vec
)
6060 || (where
== vect_body
&& kind
== vector_stmt
))
6061 rs6000_vect_nonmem
= true;
6067 /* Implement targetm.vectorize.finish_cost. */
6070 rs6000_finish_cost (void *data
, unsigned *prologue_cost
,
6071 unsigned *body_cost
, unsigned *epilogue_cost
)
6073 rs6000_cost_data
*cost_data
= (rs6000_cost_data
*) data
;
6075 if (cost_data
->loop_info
)
6076 rs6000_density_test (cost_data
);
6078 /* Don't vectorize minimum-vectorization-factor, simple copy loops
6079 that require versioning for any reason. The vectorization is at
6080 best a wash inside the loop, and the versioning checks make
6081 profitability highly unlikely and potentially quite harmful. */
6082 if (cost_data
->loop_info
)
6084 loop_vec_info vec_info
= loop_vec_info_for_loop (cost_data
->loop_info
);
6085 if (!rs6000_vect_nonmem
6086 && LOOP_VINFO_VECT_FACTOR (vec_info
) == 2
6087 && LOOP_REQUIRES_VERSIONING (vec_info
))
6088 cost_data
->cost
[vect_body
] += 10000;
6091 *prologue_cost
= cost_data
->cost
[vect_prologue
];
6092 *body_cost
= cost_data
->cost
[vect_body
];
6093 *epilogue_cost
= cost_data
->cost
[vect_epilogue
];
6096 /* Implement targetm.vectorize.destroy_cost_data. */
6099 rs6000_destroy_cost_data (void *data
)
6104 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
6105 library with vectorized intrinsics. */
6108 rs6000_builtin_vectorized_libmass (combined_fn fn
, tree type_out
,
6112 const char *suffix
= NULL
;
6113 tree fntype
, new_fndecl
, bdecl
= NULL_TREE
;
6116 machine_mode el_mode
, in_mode
;
6119 /* Libmass is suitable for unsafe math only as it does not correctly support
6120 parts of IEEE with the required precision such as denormals. Only support
6121 it if we have VSX to use the simd d2 or f4 functions.
6122 XXX: Add variable length support. */
6123 if (!flag_unsafe_math_optimizations
|| !TARGET_VSX
)
6126 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
6127 n
= TYPE_VECTOR_SUBPARTS (type_out
);
6128 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
6129 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
6130 if (el_mode
!= in_mode
6166 if (el_mode
== DFmode
&& n
== 2)
6168 bdecl
= mathfn_built_in (double_type_node
, fn
);
6169 suffix
= "d2"; /* pow -> powd2 */
6171 else if (el_mode
== SFmode
&& n
== 4)
6173 bdecl
= mathfn_built_in (float_type_node
, fn
);
6174 suffix
= "4"; /* powf -> powf4 */
6186 gcc_assert (suffix
!= NULL
);
6187 bname
= IDENTIFIER_POINTER (DECL_NAME (bdecl
));
6191 strcpy (name
, bname
+ sizeof ("__builtin_") - 1);
6192 strcat (name
, suffix
);
6195 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
6196 else if (n_args
== 2)
6197 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
6201 /* Build a function declaration for the vectorized function. */
6202 new_fndecl
= build_decl (BUILTINS_LOCATION
,
6203 FUNCTION_DECL
, get_identifier (name
), fntype
);
6204 TREE_PUBLIC (new_fndecl
) = 1;
6205 DECL_EXTERNAL (new_fndecl
) = 1;
6206 DECL_IS_NOVOPS (new_fndecl
) = 1;
6207 TREE_READONLY (new_fndecl
) = 1;
6212 /* Returns a function decl for a vectorized version of the builtin function
6213 with builtin function code FN and the result vector type TYPE, or NULL_TREE
6214 if it is not available. */
6217 rs6000_builtin_vectorized_function (unsigned int fn
, tree type_out
,
6220 machine_mode in_mode
, out_mode
;
6223 if (TARGET_DEBUG_BUILTIN
)
6224 fprintf (stderr
, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
6225 combined_fn_name (combined_fn (fn
)),
6226 GET_MODE_NAME (TYPE_MODE (type_out
)),
6227 GET_MODE_NAME (TYPE_MODE (type_in
)));
6229 if (TREE_CODE (type_out
) != VECTOR_TYPE
6230 || TREE_CODE (type_in
) != VECTOR_TYPE
6231 || !TARGET_VECTORIZE_BUILTINS
)
6234 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
6235 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
6236 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
6237 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
6242 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6243 && out_mode
== DFmode
&& out_n
== 2
6244 && in_mode
== DFmode
&& in_n
== 2)
6245 return rs6000_builtin_decls
[VSX_BUILTIN_CPSGNDP
];
6246 if (VECTOR_UNIT_VSX_P (V4SFmode
)
6247 && out_mode
== SFmode
&& out_n
== 4
6248 && in_mode
== SFmode
&& in_n
== 4)
6249 return rs6000_builtin_decls
[VSX_BUILTIN_CPSGNSP
];
6250 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
6251 && out_mode
== SFmode
&& out_n
== 4
6252 && in_mode
== SFmode
&& in_n
== 4)
6253 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_COPYSIGN_V4SF
];
6256 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6257 && out_mode
== DFmode
&& out_n
== 2
6258 && in_mode
== DFmode
&& in_n
== 2)
6259 return rs6000_builtin_decls
[VSX_BUILTIN_XVRDPIP
];
6260 if (VECTOR_UNIT_VSX_P (V4SFmode
)
6261 && out_mode
== SFmode
&& out_n
== 4
6262 && in_mode
== SFmode
&& in_n
== 4)
6263 return rs6000_builtin_decls
[VSX_BUILTIN_XVRSPIP
];
6264 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
6265 && out_mode
== SFmode
&& out_n
== 4
6266 && in_mode
== SFmode
&& in_n
== 4)
6267 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VRFIP
];
6270 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6271 && out_mode
== DFmode
&& out_n
== 2
6272 && in_mode
== DFmode
&& in_n
== 2)
6273 return rs6000_builtin_decls
[VSX_BUILTIN_XVRDPIM
];
6274 if (VECTOR_UNIT_VSX_P (V4SFmode
)
6275 && out_mode
== SFmode
&& out_n
== 4
6276 && in_mode
== SFmode
&& in_n
== 4)
6277 return rs6000_builtin_decls
[VSX_BUILTIN_XVRSPIM
];
6278 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
6279 && out_mode
== SFmode
&& out_n
== 4
6280 && in_mode
== SFmode
&& in_n
== 4)
6281 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VRFIM
];
6284 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6285 && out_mode
== DFmode
&& out_n
== 2
6286 && in_mode
== DFmode
&& in_n
== 2)
6287 return rs6000_builtin_decls
[VSX_BUILTIN_XVMADDDP
];
6288 if (VECTOR_UNIT_VSX_P (V4SFmode
)
6289 && out_mode
== SFmode
&& out_n
== 4
6290 && in_mode
== SFmode
&& in_n
== 4)
6291 return rs6000_builtin_decls
[VSX_BUILTIN_XVMADDSP
];
6292 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
6293 && out_mode
== SFmode
&& out_n
== 4
6294 && in_mode
== SFmode
&& in_n
== 4)
6295 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VMADDFP
];
6298 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6299 && out_mode
== DFmode
&& out_n
== 2
6300 && in_mode
== DFmode
&& in_n
== 2)
6301 return rs6000_builtin_decls
[VSX_BUILTIN_XVRDPIZ
];
6302 if (VECTOR_UNIT_VSX_P (V4SFmode
)
6303 && out_mode
== SFmode
&& out_n
== 4
6304 && in_mode
== SFmode
&& in_n
== 4)
6305 return rs6000_builtin_decls
[VSX_BUILTIN_XVRSPIZ
];
6306 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
6307 && out_mode
== SFmode
&& out_n
== 4
6308 && in_mode
== SFmode
&& in_n
== 4)
6309 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VRFIZ
];
6312 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6313 && flag_unsafe_math_optimizations
6314 && out_mode
== DFmode
&& out_n
== 2
6315 && in_mode
== DFmode
&& in_n
== 2)
6316 return rs6000_builtin_decls
[VSX_BUILTIN_XVRDPI
];
6317 if (VECTOR_UNIT_VSX_P (V4SFmode
)
6318 && flag_unsafe_math_optimizations
6319 && out_mode
== SFmode
&& out_n
== 4
6320 && in_mode
== SFmode
&& in_n
== 4)
6321 return rs6000_builtin_decls
[VSX_BUILTIN_XVRSPI
];
6324 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6325 && !flag_trapping_math
6326 && out_mode
== DFmode
&& out_n
== 2
6327 && in_mode
== DFmode
&& in_n
== 2)
6328 return rs6000_builtin_decls
[VSX_BUILTIN_XVRDPIC
];
6329 if (VECTOR_UNIT_VSX_P (V4SFmode
)
6330 && !flag_trapping_math
6331 && out_mode
== SFmode
&& out_n
== 4
6332 && in_mode
== SFmode
&& in_n
== 4)
6333 return rs6000_builtin_decls
[VSX_BUILTIN_XVRSPIC
];
6339 /* Generate calls to libmass if appropriate. */
6340 if (rs6000_veclib_handler
)
6341 return rs6000_veclib_handler (combined_fn (fn
), type_out
, type_in
);
6346 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
6349 rs6000_builtin_md_vectorized_function (tree fndecl
, tree type_out
,
6352 machine_mode in_mode
, out_mode
;
6355 if (TARGET_DEBUG_BUILTIN
)
6356 fprintf (stderr
, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
6357 IDENTIFIER_POINTER (DECL_NAME (fndecl
)),
6358 GET_MODE_NAME (TYPE_MODE (type_out
)),
6359 GET_MODE_NAME (TYPE_MODE (type_in
)));
6361 if (TREE_CODE (type_out
) != VECTOR_TYPE
6362 || TREE_CODE (type_in
) != VECTOR_TYPE
6363 || !TARGET_VECTORIZE_BUILTINS
)
6366 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
6367 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
6368 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
6369 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
6371 enum rs6000_builtins fn
6372 = (enum rs6000_builtins
) DECL_FUNCTION_CODE (fndecl
);
6375 case RS6000_BUILTIN_RSQRTF
:
6376 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
)
6377 && out_mode
== SFmode
&& out_n
== 4
6378 && in_mode
== SFmode
&& in_n
== 4)
6379 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VRSQRTFP
];
6381 case RS6000_BUILTIN_RSQRT
:
6382 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6383 && out_mode
== DFmode
&& out_n
== 2
6384 && in_mode
== DFmode
&& in_n
== 2)
6385 return rs6000_builtin_decls
[VSX_BUILTIN_RSQRT_2DF
];
6387 case RS6000_BUILTIN_RECIPF
:
6388 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
)
6389 && out_mode
== SFmode
&& out_n
== 4
6390 && in_mode
== SFmode
&& in_n
== 4)
6391 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VRECIPFP
];
6393 case RS6000_BUILTIN_RECIP
:
6394 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6395 && out_mode
== DFmode
&& out_n
== 2
6396 && in_mode
== DFmode
&& in_n
== 2)
6397 return rs6000_builtin_decls
[VSX_BUILTIN_RECIP_V2DF
];
6405 /* Default CPU string for rs6000*_file_start functions. */
6406 static const char *rs6000_default_cpu
;
6408 /* Do anything needed at the start of the asm file. */
6411 rs6000_file_start (void)
6414 const char *start
= buffer
;
6415 FILE *file
= asm_out_file
;
6417 rs6000_default_cpu
= TARGET_CPU_DEFAULT
;
6419 default_file_start ();
6421 if (flag_verbose_asm
)
6423 sprintf (buffer
, "\n%s rs6000/powerpc options:", ASM_COMMENT_START
);
6425 if (rs6000_default_cpu
!= 0 && rs6000_default_cpu
[0] != '\0')
6427 fprintf (file
, "%s --with-cpu=%s", start
, rs6000_default_cpu
);
6431 if (global_options_set
.x_rs6000_cpu_index
)
6433 fprintf (file
, "%s -mcpu=%s", start
,
6434 processor_target_table
[rs6000_cpu_index
].name
);
6438 if (global_options_set
.x_rs6000_tune_index
)
6440 fprintf (file
, "%s -mtune=%s", start
,
6441 processor_target_table
[rs6000_tune_index
].name
);
6445 if (PPC405_ERRATUM77
)
6447 fprintf (file
, "%s PPC405CR_ERRATUM77", start
);
6451 #ifdef USING_ELFOS_H
6452 switch (rs6000_sdata
)
6454 case SDATA_NONE
: fprintf (file
, "%s -msdata=none", start
); start
= ""; break;
6455 case SDATA_DATA
: fprintf (file
, "%s -msdata=data", start
); start
= ""; break;
6456 case SDATA_SYSV
: fprintf (file
, "%s -msdata=sysv", start
); start
= ""; break;
6457 case SDATA_EABI
: fprintf (file
, "%s -msdata=eabi", start
); start
= ""; break;
6460 if (rs6000_sdata
&& g_switch_value
)
6462 fprintf (file
, "%s -G %d", start
,
6472 #ifdef USING_ELFOS_H
6473 if (!(rs6000_default_cpu
&& rs6000_default_cpu
[0])
6474 && !global_options_set
.x_rs6000_cpu_index
)
6476 fputs ("\t.machine ", asm_out_file
);
6477 if ((rs6000_isa_flags
& OPTION_MASK_MODULO
) != 0)
6478 fputs ("power9\n", asm_out_file
);
6479 else if ((rs6000_isa_flags
& OPTION_MASK_DIRECT_MOVE
) != 0)
6480 fputs ("power8\n", asm_out_file
);
6481 else if ((rs6000_isa_flags
& OPTION_MASK_POPCNTD
) != 0)
6482 fputs ("power7\n", asm_out_file
);
6483 else if ((rs6000_isa_flags
& OPTION_MASK_CMPB
) != 0)
6484 fputs ("power6\n", asm_out_file
);
6485 else if ((rs6000_isa_flags
& OPTION_MASK_POPCNTB
) != 0)
6486 fputs ("power5\n", asm_out_file
);
6487 else if ((rs6000_isa_flags
& OPTION_MASK_MFCRF
) != 0)
6488 fputs ("power4\n", asm_out_file
);
6489 else if ((rs6000_isa_flags
& OPTION_MASK_POWERPC64
) != 0)
6490 fputs ("ppc64\n", asm_out_file
);
6492 fputs ("ppc\n", asm_out_file
);
6496 if (DEFAULT_ABI
== ABI_ELFv2
)
6497 fprintf (file
, "\t.abiversion 2\n");
6501 /* Return nonzero if this function is known to have a null epilogue. */
6504 direct_return (void)
6506 if (reload_completed
)
6508 rs6000_stack_t
*info
= rs6000_stack_info ();
6510 if (info
->first_gp_reg_save
== 32
6511 && info
->first_fp_reg_save
== 64
6512 && info
->first_altivec_reg_save
== LAST_ALTIVEC_REGNO
+ 1
6513 && ! info
->lr_save_p
6514 && ! info
->cr_save_p
6515 && info
->vrsave_size
== 0
6523 /* Return the number of instructions it takes to form a constant in an
6524 integer register. */
6527 num_insns_constant_wide (HOST_WIDE_INT value
)
6529 /* signed constant loadable with addi */
6530 if (((unsigned HOST_WIDE_INT
) value
+ 0x8000) < 0x10000)
6533 /* constant loadable with addis */
6534 else if ((value
& 0xffff) == 0
6535 && (value
>> 31 == -1 || value
>> 31 == 0))
6538 else if (TARGET_POWERPC64
)
6540 HOST_WIDE_INT low
= ((value
& 0xffffffff) ^ 0x80000000) - 0x80000000;
6541 HOST_WIDE_INT high
= value
>> 31;
6543 if (high
== 0 || high
== -1)
6549 return num_insns_constant_wide (high
) + 1;
6551 return num_insns_constant_wide (low
) + 1;
6553 return (num_insns_constant_wide (high
)
6554 + num_insns_constant_wide (low
) + 1);
6562 num_insns_constant (rtx op
, machine_mode mode
)
6564 HOST_WIDE_INT low
, high
;
6566 switch (GET_CODE (op
))
6569 if ((INTVAL (op
) >> 31) != 0 && (INTVAL (op
) >> 31) != -1
6570 && rs6000_is_valid_and_mask (op
, mode
))
6573 return num_insns_constant_wide (INTVAL (op
));
6575 case CONST_WIDE_INT
:
6578 int ins
= CONST_WIDE_INT_NUNITS (op
) - 1;
6579 for (i
= 0; i
< CONST_WIDE_INT_NUNITS (op
); i
++)
6580 ins
+= num_insns_constant_wide (CONST_WIDE_INT_ELT (op
, i
));
6585 if (mode
== SFmode
|| mode
== SDmode
)
6589 if (DECIMAL_FLOAT_MODE_P (mode
))
6590 REAL_VALUE_TO_TARGET_DECIMAL32
6591 (*CONST_DOUBLE_REAL_VALUE (op
), l
);
6593 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op
), l
);
6594 return num_insns_constant_wide ((HOST_WIDE_INT
) l
);
6598 if (DECIMAL_FLOAT_MODE_P (mode
))
6599 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (op
), l
);
6601 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op
), l
);
6602 high
= l
[WORDS_BIG_ENDIAN
== 0];
6603 low
= l
[WORDS_BIG_ENDIAN
!= 0];
6606 return (num_insns_constant_wide (low
)
6607 + num_insns_constant_wide (high
));
6610 if ((high
== 0 && low
>= 0)
6611 || (high
== -1 && low
< 0))
6612 return num_insns_constant_wide (low
);
6614 else if (rs6000_is_valid_and_mask (op
, mode
))
6618 return num_insns_constant_wide (high
) + 1;
6621 return (num_insns_constant_wide (high
)
6622 + num_insns_constant_wide (low
) + 1);
6630 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6631 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6632 corresponding element of the vector, but for V4SFmode and V2SFmode,
6633 the corresponding "float" is interpreted as an SImode integer. */
6636 const_vector_elt_as_int (rtx op
, unsigned int elt
)
6640 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6641 gcc_assert (GET_MODE (op
) != V2DImode
6642 && GET_MODE (op
) != V2DFmode
);
6644 tmp
= CONST_VECTOR_ELT (op
, elt
);
6645 if (GET_MODE (op
) == V4SFmode
6646 || GET_MODE (op
) == V2SFmode
)
6647 tmp
= gen_lowpart (SImode
, tmp
);
6648 return INTVAL (tmp
);
6651 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6652 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6653 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6654 all items are set to the same value and contain COPIES replicas of the
6655 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6656 operand and the others are set to the value of the operand's msb. */
6659 vspltis_constant (rtx op
, unsigned step
, unsigned copies
)
6661 machine_mode mode
= GET_MODE (op
);
6662 machine_mode inner
= GET_MODE_INNER (mode
);
6670 HOST_WIDE_INT splat_val
;
6671 HOST_WIDE_INT msb_val
;
6673 if (mode
== V2DImode
|| mode
== V2DFmode
|| mode
== V1TImode
)
6676 nunits
= GET_MODE_NUNITS (mode
);
6677 bitsize
= GET_MODE_BITSIZE (inner
);
6678 mask
= GET_MODE_MASK (inner
);
6680 val
= const_vector_elt_as_int (op
, BYTES_BIG_ENDIAN
? nunits
- 1 : 0);
6682 msb_val
= val
>= 0 ? 0 : -1;
6684 /* Construct the value to be splatted, if possible. If not, return 0. */
6685 for (i
= 2; i
<= copies
; i
*= 2)
6687 HOST_WIDE_INT small_val
;
6689 small_val
= splat_val
>> bitsize
;
6691 if (splat_val
!= ((HOST_WIDE_INT
)
6692 ((unsigned HOST_WIDE_INT
) small_val
<< bitsize
)
6693 | (small_val
& mask
)))
6695 splat_val
= small_val
;
6698 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6699 if (EASY_VECTOR_15 (splat_val
))
6702 /* Also check if we can splat, and then add the result to itself. Do so if
6703 the value is positive, of if the splat instruction is using OP's mode;
6704 for splat_val < 0, the splat and the add should use the same mode. */
6705 else if (EASY_VECTOR_15_ADD_SELF (splat_val
)
6706 && (splat_val
>= 0 || (step
== 1 && copies
== 1)))
6709 /* Also check if are loading up the most significant bit which can be done by
6710 loading up -1 and shifting the value left by -1. */
6711 else if (EASY_VECTOR_MSB (splat_val
, inner
))
6717 /* Check if VAL is present in every STEP-th element, and the
6718 other elements are filled with its most significant bit. */
6719 for (i
= 1; i
< nunits
; ++i
)
6721 HOST_WIDE_INT desired_val
;
6722 unsigned elt
= BYTES_BIG_ENDIAN
? nunits
- 1 - i
: i
;
6723 if ((i
& (step
- 1)) == 0)
6726 desired_val
= msb_val
;
6728 if (desired_val
!= const_vector_elt_as_int (op
, elt
))
6735 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6736 instruction, filling in the bottom elements with 0 or -1.
6738 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6739 for the number of zeroes to shift in, or negative for the number of 0xff
6742 OP is a CONST_VECTOR. */
6745 vspltis_shifted (rtx op
)
6747 machine_mode mode
= GET_MODE (op
);
6748 machine_mode inner
= GET_MODE_INNER (mode
);
6756 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
)
6759 /* We need to create pseudo registers to do the shift, so don't recognize
6760 shift vector constants after reload. */
6761 if (!can_create_pseudo_p ())
6764 nunits
= GET_MODE_NUNITS (mode
);
6765 mask
= GET_MODE_MASK (inner
);
6767 val
= const_vector_elt_as_int (op
, BYTES_BIG_ENDIAN
? 0 : nunits
- 1);
6769 /* Check if the value can really be the operand of a vspltis[bhw]. */
6770 if (EASY_VECTOR_15 (val
))
6773 /* Also check if we are loading up the most significant bit which can be done
6774 by loading up -1 and shifting the value left by -1. */
6775 else if (EASY_VECTOR_MSB (val
, inner
))
6781 /* Check if VAL is present in every STEP-th element until we find elements
6782 that are 0 or all 1 bits. */
6783 for (i
= 1; i
< nunits
; ++i
)
6785 unsigned elt
= BYTES_BIG_ENDIAN
? i
: nunits
- 1 - i
;
6786 HOST_WIDE_INT elt_val
= const_vector_elt_as_int (op
, elt
);
6788 /* If the value isn't the splat value, check for the remaining elements
6794 for (j
= i
+1; j
< nunits
; ++j
)
6796 unsigned elt2
= BYTES_BIG_ENDIAN
? j
: nunits
- 1 - j
;
6797 if (const_vector_elt_as_int (op
, elt2
) != 0)
6801 return (nunits
- i
) * GET_MODE_SIZE (inner
);
6804 else if ((elt_val
& mask
) == mask
)
6806 for (j
= i
+1; j
< nunits
; ++j
)
6808 unsigned elt2
= BYTES_BIG_ENDIAN
? j
: nunits
- 1 - j
;
6809 if ((const_vector_elt_as_int (op
, elt2
) & mask
) != mask
)
6813 return -((nunits
- i
) * GET_MODE_SIZE (inner
));
6821 /* If all elements are equal, we don't need to do VLSDOI. */
6826 /* Return true if OP is of the given MODE and can be synthesized
6827 with a vspltisb, vspltish or vspltisw. */
6830 easy_altivec_constant (rtx op
, machine_mode mode
)
6832 unsigned step
, copies
;
6834 if (mode
== VOIDmode
)
6835 mode
= GET_MODE (op
);
6836 else if (mode
!= GET_MODE (op
))
6839 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6841 if (mode
== V2DFmode
)
6842 return zero_constant (op
, mode
);
6844 else if (mode
== V2DImode
)
6846 if (GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_INT
6847 || GET_CODE (CONST_VECTOR_ELT (op
, 1)) != CONST_INT
)
6850 if (zero_constant (op
, mode
))
6853 if (INTVAL (CONST_VECTOR_ELT (op
, 0)) == -1
6854 && INTVAL (CONST_VECTOR_ELT (op
, 1)) == -1)
6860 /* V1TImode is a special container for TImode. Ignore for now. */
6861 else if (mode
== V1TImode
)
6864 /* Start with a vspltisw. */
6865 step
= GET_MODE_NUNITS (mode
) / 4;
6868 if (vspltis_constant (op
, step
, copies
))
6871 /* Then try with a vspltish. */
6877 if (vspltis_constant (op
, step
, copies
))
6880 /* And finally a vspltisb. */
6886 if (vspltis_constant (op
, step
, copies
))
6889 if (vspltis_shifted (op
) != 0)
6895 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6896 result is OP. Abort if it is not possible. */
6899 gen_easy_altivec_constant (rtx op
)
6901 machine_mode mode
= GET_MODE (op
);
6902 int nunits
= GET_MODE_NUNITS (mode
);
6903 rtx val
= CONST_VECTOR_ELT (op
, BYTES_BIG_ENDIAN
? nunits
- 1 : 0);
6904 unsigned step
= nunits
/ 4;
6905 unsigned copies
= 1;
6907 /* Start with a vspltisw. */
6908 if (vspltis_constant (op
, step
, copies
))
6909 return gen_rtx_VEC_DUPLICATE (V4SImode
, gen_lowpart (SImode
, val
));
6911 /* Then try with a vspltish. */
6917 if (vspltis_constant (op
, step
, copies
))
6918 return gen_rtx_VEC_DUPLICATE (V8HImode
, gen_lowpart (HImode
, val
));
6920 /* And finally a vspltisb. */
6926 if (vspltis_constant (op
, step
, copies
))
6927 return gen_rtx_VEC_DUPLICATE (V16QImode
, gen_lowpart (QImode
, val
));
6932 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6933 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6935 Return the number of instructions needed (1 or 2) into the address pointed
6938 Return the constant that is being split via CONSTANT_PTR. */
6941 xxspltib_constant_p (rtx op
,
6946 size_t nunits
= GET_MODE_NUNITS (mode
);
6948 HOST_WIDE_INT value
;
6951 /* Set the returned values to out of bound values. */
6952 *num_insns_ptr
= -1;
6953 *constant_ptr
= 256;
6955 if (!TARGET_P9_VECTOR
)
6958 if (mode
== VOIDmode
)
6959 mode
= GET_MODE (op
);
6961 else if (mode
!= GET_MODE (op
) && GET_MODE (op
) != VOIDmode
)
6964 /* Handle (vec_duplicate <constant>). */
6965 if (GET_CODE (op
) == VEC_DUPLICATE
)
6967 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
6968 && mode
!= V2DImode
)
6971 element
= XEXP (op
, 0);
6972 if (!CONST_INT_P (element
))
6975 value
= INTVAL (element
);
6976 if (!IN_RANGE (value
, -128, 127))
6980 /* Handle (const_vector [...]). */
6981 else if (GET_CODE (op
) == CONST_VECTOR
)
6983 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
6984 && mode
!= V2DImode
)
6987 element
= CONST_VECTOR_ELT (op
, 0);
6988 if (!CONST_INT_P (element
))
6991 value
= INTVAL (element
);
6992 if (!IN_RANGE (value
, -128, 127))
6995 for (i
= 1; i
< nunits
; i
++)
6997 element
= CONST_VECTOR_ELT (op
, i
);
6998 if (!CONST_INT_P (element
))
7001 if (value
!= INTVAL (element
))
7006 /* Handle integer constants being loaded into the upper part of the VSX
7007 register as a scalar. If the value isn't 0/-1, only allow it if the mode
7008 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
7009 else if (CONST_INT_P (op
))
7011 if (!SCALAR_INT_MODE_P (mode
))
7014 value
= INTVAL (op
);
7015 if (!IN_RANGE (value
, -128, 127))
7018 if (!IN_RANGE (value
, -1, 0))
7020 if (!(reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_VALID
))
7023 if (EASY_VECTOR_15 (value
))
7031 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
7032 sign extend. Special case 0/-1 to allow getting any VSX register instead
7033 of an Altivec register. */
7034 if ((mode
== V4SImode
|| mode
== V8HImode
) && !IN_RANGE (value
, -1, 0)
7035 && EASY_VECTOR_15 (value
))
7038 /* Return # of instructions and the constant byte for XXSPLTIB. */
7039 if (mode
== V16QImode
)
7042 else if (IN_RANGE (value
, -1, 0))
7048 *constant_ptr
= (int) value
;
7053 output_vec_const_move (rtx
*operands
)
7055 int cst
, cst2
, shift
;
7061 mode
= GET_MODE (dest
);
7065 bool dest_vmx_p
= ALTIVEC_REGNO_P (REGNO (dest
));
7066 int xxspltib_value
= 256;
7069 if (zero_constant (vec
, mode
))
7071 if (TARGET_P9_VECTOR
)
7072 return "xxspltib %x0,0";
7074 else if (dest_vmx_p
)
7075 return "vspltisw %0,0";
7078 return "xxlxor %x0,%x0,%x0";
7081 if (all_ones_constant (vec
, mode
))
7083 if (TARGET_P9_VECTOR
)
7084 return "xxspltib %x0,255";
7086 else if (dest_vmx_p
)
7087 return "vspltisw %0,-1";
7089 else if (TARGET_P8_VECTOR
)
7090 return "xxlorc %x0,%x0,%x0";
7096 if (TARGET_P9_VECTOR
7097 && xxspltib_constant_p (vec
, mode
, &num_insns
, &xxspltib_value
))
7101 operands
[2] = GEN_INT (xxspltib_value
& 0xff);
7102 return "xxspltib %x0,%2";
7113 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest
)));
7114 if (zero_constant (vec
, mode
))
7115 return "vspltisw %0,0";
7117 if (all_ones_constant (vec
, mode
))
7118 return "vspltisw %0,-1";
7120 /* Do we need to construct a value using VSLDOI? */
7121 shift
= vspltis_shifted (vec
);
7125 splat_vec
= gen_easy_altivec_constant (vec
);
7126 gcc_assert (GET_CODE (splat_vec
) == VEC_DUPLICATE
);
7127 operands
[1] = XEXP (splat_vec
, 0);
7128 if (!EASY_VECTOR_15 (INTVAL (operands
[1])))
7131 switch (GET_MODE (splat_vec
))
7134 return "vspltisw %0,%1";
7137 return "vspltish %0,%1";
7140 return "vspltisb %0,%1";
7147 gcc_assert (TARGET_SPE
);
7149 /* Vector constant 0 is handled as a splitter of V2SI, and in the
7150 pattern of V1DI, V4HI, and V2SF.
7152 FIXME: We should probably return # and add post reload
7153 splitters for these, but this way is so easy ;-). */
7154 cst
= INTVAL (CONST_VECTOR_ELT (vec
, 0));
7155 cst2
= INTVAL (CONST_VECTOR_ELT (vec
, 1));
7156 operands
[1] = CONST_VECTOR_ELT (vec
, 0);
7157 operands
[2] = CONST_VECTOR_ELT (vec
, 1);
7159 return "li %0,%1\n\tevmergelo %0,%0,%0";
7160 else if (WORDS_BIG_ENDIAN
)
7161 return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
7163 return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1";
7166 /* Initialize TARGET of vector PAIRED to VALS. */
7169 paired_expand_vector_init (rtx target
, rtx vals
)
7171 machine_mode mode
= GET_MODE (target
);
7172 int n_elts
= GET_MODE_NUNITS (mode
);
7174 rtx x
, new_rtx
, tmp
, constant_op
, op1
, op2
;
7177 for (i
= 0; i
< n_elts
; ++i
)
7179 x
= XVECEXP (vals
, 0, i
);
7180 if (!(CONST_SCALAR_INT_P (x
) || CONST_DOUBLE_P (x
) || CONST_FIXED_P (x
)))
7185 /* Load from constant pool. */
7186 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
7192 /* The vector is initialized only with non-constants. */
7193 new_rtx
= gen_rtx_VEC_CONCAT (V2SFmode
, XVECEXP (vals
, 0, 0),
7194 XVECEXP (vals
, 0, 1));
7196 emit_move_insn (target
, new_rtx
);
7200 /* One field is non-constant and the other one is a constant. Load the
7201 constant from the constant pool and use ps_merge instruction to
7202 construct the whole vector. */
7203 op1
= XVECEXP (vals
, 0, 0);
7204 op2
= XVECEXP (vals
, 0, 1);
7206 constant_op
= (CONSTANT_P (op1
)) ? op1
: op2
;
7208 tmp
= gen_reg_rtx (GET_MODE (constant_op
));
7209 emit_move_insn (tmp
, constant_op
);
7211 if (CONSTANT_P (op1
))
7212 new_rtx
= gen_rtx_VEC_CONCAT (V2SFmode
, tmp
, op2
);
7214 new_rtx
= gen_rtx_VEC_CONCAT (V2SFmode
, op1
, tmp
);
7216 emit_move_insn (target
, new_rtx
);
7220 paired_expand_vector_move (rtx operands
[])
7222 rtx op0
= operands
[0], op1
= operands
[1];
7224 emit_move_insn (op0
, op1
);
7227 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
7228 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
7229 operands for the relation operation COND. This is a recursive
7233 paired_emit_vector_compare (enum rtx_code rcode
,
7234 rtx dest
, rtx op0
, rtx op1
,
7235 rtx cc_op0
, rtx cc_op1
)
7237 rtx tmp
= gen_reg_rtx (V2SFmode
);
7240 gcc_assert (TARGET_PAIRED_FLOAT
);
7241 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
7247 paired_emit_vector_compare (GE
, dest
, op1
, op0
, cc_op0
, cc_op1
);
7251 emit_insn (gen_subv2sf3 (tmp
, cc_op0
, cc_op1
));
7252 emit_insn (gen_selv2sf4 (dest
, tmp
, op0
, op1
, CONST0_RTX (SFmode
)));
7256 paired_emit_vector_compare (GE
, dest
, op0
, op1
, cc_op1
, cc_op0
);
7259 paired_emit_vector_compare (LE
, dest
, op1
, op0
, cc_op0
, cc_op1
);
7262 tmp1
= gen_reg_rtx (V2SFmode
);
7263 max
= gen_reg_rtx (V2SFmode
);
7264 min
= gen_reg_rtx (V2SFmode
);
7265 gen_reg_rtx (V2SFmode
);
7267 emit_insn (gen_subv2sf3 (tmp
, cc_op0
, cc_op1
));
7268 emit_insn (gen_selv2sf4
7269 (max
, tmp
, cc_op0
, cc_op1
, CONST0_RTX (SFmode
)));
7270 emit_insn (gen_subv2sf3 (tmp
, cc_op1
, cc_op0
));
7271 emit_insn (gen_selv2sf4
7272 (min
, tmp
, cc_op0
, cc_op1
, CONST0_RTX (SFmode
)));
7273 emit_insn (gen_subv2sf3 (tmp1
, min
, max
));
7274 emit_insn (gen_selv2sf4 (dest
, tmp1
, op0
, op1
, CONST0_RTX (SFmode
)));
7277 paired_emit_vector_compare (EQ
, dest
, op1
, op0
, cc_op0
, cc_op1
);
7280 paired_emit_vector_compare (LE
, dest
, op1
, op0
, cc_op0
, cc_op1
);
7283 paired_emit_vector_compare (LT
, dest
, op1
, op0
, cc_op0
, cc_op1
);
7286 paired_emit_vector_compare (GE
, dest
, op1
, op0
, cc_op0
, cc_op1
);
7289 paired_emit_vector_compare (GT
, dest
, op1
, op0
, cc_op0
, cc_op1
);
7298 /* Emit vector conditional expression.
7299 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
7300 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
7303 paired_emit_vector_cond_expr (rtx dest
, rtx op1
, rtx op2
,
7304 rtx cond
, rtx cc_op0
, rtx cc_op1
)
7306 enum rtx_code rcode
= GET_CODE (cond
);
7308 if (!TARGET_PAIRED_FLOAT
)
7311 paired_emit_vector_compare (rcode
, dest
, op1
, op2
, cc_op0
, cc_op1
);
7316 /* Initialize vector TARGET to VALS. */
7319 rs6000_expand_vector_init (rtx target
, rtx vals
)
7321 machine_mode mode
= GET_MODE (target
);
7322 machine_mode inner_mode
= GET_MODE_INNER (mode
);
7323 int n_elts
= GET_MODE_NUNITS (mode
);
7324 int n_var
= 0, one_var
= -1;
7325 bool all_same
= true, all_const_zero
= true;
7329 for (i
= 0; i
< n_elts
; ++i
)
7331 x
= XVECEXP (vals
, 0, i
);
7332 if (!(CONST_SCALAR_INT_P (x
) || CONST_DOUBLE_P (x
) || CONST_FIXED_P (x
)))
7333 ++n_var
, one_var
= i
;
7334 else if (x
!= CONST0_RTX (inner_mode
))
7335 all_const_zero
= false;
7337 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
7343 rtx const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
7344 bool int_vector_p
= (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
);
7345 if ((int_vector_p
|| TARGET_VSX
) && all_const_zero
)
7347 /* Zero register. */
7348 emit_move_insn (target
, CONST0_RTX (mode
));
7351 else if (int_vector_p
&& easy_vector_constant (const_vec
, mode
))
7353 /* Splat immediate. */
7354 emit_insn (gen_rtx_SET (target
, const_vec
));
7359 /* Load from constant pool. */
7360 emit_move_insn (target
, const_vec
);
7365 /* Double word values on VSX can use xxpermdi or lxvdsx. */
7366 if (VECTOR_MEM_VSX_P (mode
) && (mode
== V2DFmode
|| mode
== V2DImode
))
7370 size_t num_elements
= all_same
? 1 : 2;
7371 for (i
= 0; i
< num_elements
; i
++)
7373 op
[i
] = XVECEXP (vals
, 0, i
);
7374 /* Just in case there is a SUBREG with a smaller mode, do a
7376 if (GET_MODE (op
[i
]) != inner_mode
)
7378 rtx tmp
= gen_reg_rtx (inner_mode
);
7379 convert_move (tmp
, op
[i
], 0);
7382 /* Allow load with splat double word. */
7383 else if (MEM_P (op
[i
]))
7386 op
[i
] = force_reg (inner_mode
, op
[i
]);
7388 else if (!REG_P (op
[i
]))
7389 op
[i
] = force_reg (inner_mode
, op
[i
]);
7394 if (mode
== V2DFmode
)
7395 emit_insn (gen_vsx_splat_v2df (target
, op
[0]));
7397 emit_insn (gen_vsx_splat_v2di (target
, op
[0]));
7401 if (mode
== V2DFmode
)
7402 emit_insn (gen_vsx_concat_v2df (target
, op
[0], op
[1]));
7404 emit_insn (gen_vsx_concat_v2di (target
, op
[0], op
[1]));
7409 /* Special case initializing vector int if we are on 64-bit systems with
7410 direct move or we have the ISA 3.0 instructions. */
7411 if (mode
== V4SImode
&& VECTOR_MEM_VSX_P (V4SImode
)
7412 && TARGET_DIRECT_MOVE_64BIT
)
7416 rtx element0
= XVECEXP (vals
, 0, 0);
7417 if (MEM_P (element0
))
7418 element0
= rs6000_address_for_fpconvert (element0
);
7420 element0
= force_reg (SImode
, element0
);
7422 if (TARGET_P9_VECTOR
)
7423 emit_insn (gen_vsx_splat_v4si (target
, element0
));
7426 rtx tmp
= gen_reg_rtx (DImode
);
7427 emit_insn (gen_zero_extendsidi2 (tmp
, element0
));
7428 emit_insn (gen_vsx_splat_v4si_di (target
, tmp
));
7437 for (i
= 0; i
< 4; i
++)
7439 elements
[i
] = XVECEXP (vals
, 0, i
);
7440 if (!CONST_INT_P (elements
[i
]) && !REG_P (elements
[i
]))
7441 elements
[i
] = copy_to_mode_reg (SImode
, elements
[i
]);
7444 emit_insn (gen_vsx_init_v4si (target
, elements
[0], elements
[1],
7445 elements
[2], elements
[3]));
7450 /* With single precision floating point on VSX, know that internally single
7451 precision is actually represented as a double, and either make 2 V2DF
7452 vectors, and convert these vectors to single precision, or do one
7453 conversion, and splat the result to the other elements. */
7454 if (mode
== V4SFmode
&& VECTOR_MEM_VSX_P (V4SFmode
))
7458 rtx element0
= XVECEXP (vals
, 0, 0);
7460 if (TARGET_P9_VECTOR
)
7462 if (MEM_P (element0
))
7463 element0
= rs6000_address_for_fpconvert (element0
);
7465 emit_insn (gen_vsx_splat_v4sf (target
, element0
));
7470 rtx freg
= gen_reg_rtx (V4SFmode
);
7471 rtx sreg
= force_reg (SFmode
, element0
);
7472 rtx cvt
= (TARGET_XSCVDPSPN
7473 ? gen_vsx_xscvdpspn_scalar (freg
, sreg
)
7474 : gen_vsx_xscvdpsp_scalar (freg
, sreg
));
7477 emit_insn (gen_vsx_xxspltw_v4sf_direct (target
, freg
,
7483 rtx dbl_even
= gen_reg_rtx (V2DFmode
);
7484 rtx dbl_odd
= gen_reg_rtx (V2DFmode
);
7485 rtx flt_even
= gen_reg_rtx (V4SFmode
);
7486 rtx flt_odd
= gen_reg_rtx (V4SFmode
);
7487 rtx op0
= force_reg (SFmode
, XVECEXP (vals
, 0, 0));
7488 rtx op1
= force_reg (SFmode
, XVECEXP (vals
, 0, 1));
7489 rtx op2
= force_reg (SFmode
, XVECEXP (vals
, 0, 2));
7490 rtx op3
= force_reg (SFmode
, XVECEXP (vals
, 0, 3));
7492 /* Use VMRGEW if we can instead of doing a permute. */
7493 if (TARGET_P8_VECTOR
)
7495 emit_insn (gen_vsx_concat_v2sf (dbl_even
, op0
, op2
));
7496 emit_insn (gen_vsx_concat_v2sf (dbl_odd
, op1
, op3
));
7497 emit_insn (gen_vsx_xvcvdpsp (flt_even
, dbl_even
));
7498 emit_insn (gen_vsx_xvcvdpsp (flt_odd
, dbl_odd
));
7499 if (BYTES_BIG_ENDIAN
)
7500 emit_insn (gen_p8_vmrgew_v4sf_direct (target
, flt_even
, flt_odd
));
7502 emit_insn (gen_p8_vmrgew_v4sf_direct (target
, flt_odd
, flt_even
));
7506 emit_insn (gen_vsx_concat_v2sf (dbl_even
, op0
, op1
));
7507 emit_insn (gen_vsx_concat_v2sf (dbl_odd
, op2
, op3
));
7508 emit_insn (gen_vsx_xvcvdpsp (flt_even
, dbl_even
));
7509 emit_insn (gen_vsx_xvcvdpsp (flt_odd
, dbl_odd
));
7510 rs6000_expand_extract_even (target
, flt_even
, flt_odd
);
7516 /* Special case initializing vector short/char that are splats if we are on
7517 64-bit systems with direct move. */
7518 if (all_same
&& TARGET_DIRECT_MOVE_64BIT
7519 && (mode
== V16QImode
|| mode
== V8HImode
))
7521 rtx op0
= XVECEXP (vals
, 0, 0);
7522 rtx di_tmp
= gen_reg_rtx (DImode
);
7525 op0
= force_reg (GET_MODE_INNER (mode
), op0
);
7527 if (mode
== V16QImode
)
7529 emit_insn (gen_zero_extendqidi2 (di_tmp
, op0
));
7530 emit_insn (gen_vsx_vspltb_di (target
, di_tmp
));
7534 if (mode
== V8HImode
)
7536 emit_insn (gen_zero_extendhidi2 (di_tmp
, op0
));
7537 emit_insn (gen_vsx_vsplth_di (target
, di_tmp
));
7542 /* Store value to stack temp. Load vector element. Splat. However, splat
7543 of 64-bit items is not supported on Altivec. */
7544 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
7546 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (inner_mode
));
7547 emit_move_insn (adjust_address_nv (mem
, inner_mode
, 0),
7548 XVECEXP (vals
, 0, 0));
7549 x
= gen_rtx_UNSPEC (VOIDmode
,
7550 gen_rtvec (1, const0_rtx
), UNSPEC_LVE
);
7551 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
7553 gen_rtx_SET (target
, mem
),
7555 x
= gen_rtx_VEC_SELECT (inner_mode
, target
,
7556 gen_rtx_PARALLEL (VOIDmode
,
7557 gen_rtvec (1, const0_rtx
)));
7558 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
7562 /* One field is non-constant. Load constant then overwrite
7566 rtx copy
= copy_rtx (vals
);
7568 /* Load constant part of vector, substitute neighboring value for
7570 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
7571 rs6000_expand_vector_init (target
, copy
);
7573 /* Insert variable. */
7574 rs6000_expand_vector_set (target
, XVECEXP (vals
, 0, one_var
), one_var
);
7578 /* Construct the vector in memory one field at a time
7579 and load the whole vector. */
7580 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
7581 for (i
= 0; i
< n_elts
; i
++)
7582 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
7583 i
* GET_MODE_SIZE (inner_mode
)),
7584 XVECEXP (vals
, 0, i
));
7585 emit_move_insn (target
, mem
);
7588 /* Set field ELT of TARGET to VAL. */
7591 rs6000_expand_vector_set (rtx target
, rtx val
, int elt
)
7593 machine_mode mode
= GET_MODE (target
);
7594 machine_mode inner_mode
= GET_MODE_INNER (mode
);
7595 rtx reg
= gen_reg_rtx (mode
);
7597 int width
= GET_MODE_SIZE (inner_mode
);
7600 val
= force_reg (GET_MODE (val
), val
);
7602 if (VECTOR_MEM_VSX_P (mode
))
7604 rtx insn
= NULL_RTX
;
7605 rtx elt_rtx
= GEN_INT (elt
);
7607 if (mode
== V2DFmode
)
7608 insn
= gen_vsx_set_v2df (target
, target
, val
, elt_rtx
);
7610 else if (mode
== V2DImode
)
7611 insn
= gen_vsx_set_v2di (target
, target
, val
, elt_rtx
);
7613 else if (TARGET_P9_VECTOR
&& TARGET_VSX_SMALL_INTEGER
7614 && TARGET_UPPER_REGS_DI
&& TARGET_POWERPC64
)
7616 if (mode
== V4SImode
)
7617 insn
= gen_vsx_set_v4si_p9 (target
, target
, val
, elt_rtx
);
7618 else if (mode
== V8HImode
)
7619 insn
= gen_vsx_set_v8hi_p9 (target
, target
, val
, elt_rtx
);
7620 else if (mode
== V16QImode
)
7621 insn
= gen_vsx_set_v16qi_p9 (target
, target
, val
, elt_rtx
);
7631 /* Simplify setting single element vectors like V1TImode. */
7632 if (GET_MODE_SIZE (mode
) == GET_MODE_SIZE (inner_mode
) && elt
== 0)
7634 emit_move_insn (target
, gen_lowpart (mode
, val
));
7638 /* Load single variable value. */
7639 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (inner_mode
));
7640 emit_move_insn (adjust_address_nv (mem
, inner_mode
, 0), val
);
7641 x
= gen_rtx_UNSPEC (VOIDmode
,
7642 gen_rtvec (1, const0_rtx
), UNSPEC_LVE
);
7643 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
7645 gen_rtx_SET (reg
, mem
),
7648 /* Linear sequence. */
7649 mask
= gen_rtx_PARALLEL (V16QImode
, rtvec_alloc (16));
7650 for (i
= 0; i
< 16; ++i
)
7651 XVECEXP (mask
, 0, i
) = GEN_INT (i
);
7653 /* Set permute mask to insert element into target. */
7654 for (i
= 0; i
< width
; ++i
)
7655 XVECEXP (mask
, 0, elt
*width
+ i
)
7656 = GEN_INT (i
+ 0x10);
7657 x
= gen_rtx_CONST_VECTOR (V16QImode
, XVEC (mask
, 0));
7659 if (BYTES_BIG_ENDIAN
)
7660 x
= gen_rtx_UNSPEC (mode
,
7661 gen_rtvec (3, target
, reg
,
7662 force_reg (V16QImode
, x
)),
7666 if (TARGET_P9_VECTOR
)
7667 x
= gen_rtx_UNSPEC (mode
,
7668 gen_rtvec (3, target
, reg
,
7669 force_reg (V16QImode
, x
)),
7673 /* Invert selector. We prefer to generate VNAND on P8 so
7674 that future fusion opportunities can kick in, but must
7675 generate VNOR elsewhere. */
7676 rtx notx
= gen_rtx_NOT (V16QImode
, force_reg (V16QImode
, x
));
7677 rtx iorx
= (TARGET_P8_VECTOR
7678 ? gen_rtx_IOR (V16QImode
, notx
, notx
)
7679 : gen_rtx_AND (V16QImode
, notx
, notx
));
7680 rtx tmp
= gen_reg_rtx (V16QImode
);
7681 emit_insn (gen_rtx_SET (tmp
, iorx
));
7683 /* Permute with operands reversed and adjusted selector. */
7684 x
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, reg
, target
, tmp
),
7689 emit_insn (gen_rtx_SET (target
, x
));
7692 /* Extract field ELT from VEC into TARGET. */
7695 rs6000_expand_vector_extract (rtx target
, rtx vec
, rtx elt
)
7697 machine_mode mode
= GET_MODE (vec
);
7698 machine_mode inner_mode
= GET_MODE_INNER (mode
);
7701 if (VECTOR_MEM_VSX_P (mode
) && CONST_INT_P (elt
))
7708 gcc_assert (INTVAL (elt
) == 0 && inner_mode
== TImode
);
7709 emit_move_insn (target
, gen_lowpart (TImode
, vec
));
7712 emit_insn (gen_vsx_extract_v2df (target
, vec
, elt
));
7715 emit_insn (gen_vsx_extract_v2di (target
, vec
, elt
));
7718 emit_insn (gen_vsx_extract_v4sf (target
, vec
, elt
));
7721 if (TARGET_DIRECT_MOVE_64BIT
)
7723 emit_insn (gen_vsx_extract_v16qi (target
, vec
, elt
));
7729 if (TARGET_DIRECT_MOVE_64BIT
)
7731 emit_insn (gen_vsx_extract_v8hi (target
, vec
, elt
));
7737 if (TARGET_DIRECT_MOVE_64BIT
)
7739 emit_insn (gen_vsx_extract_v4si (target
, vec
, elt
));
7745 else if (VECTOR_MEM_VSX_P (mode
) && !CONST_INT_P (elt
)
7746 && TARGET_DIRECT_MOVE_64BIT
)
7748 if (GET_MODE (elt
) != DImode
)
7750 rtx tmp
= gen_reg_rtx (DImode
);
7751 convert_move (tmp
, elt
, 0);
7754 else if (!REG_P (elt
))
7755 elt
= force_reg (DImode
, elt
);
7760 emit_insn (gen_vsx_extract_v2df_var (target
, vec
, elt
));
7764 emit_insn (gen_vsx_extract_v2di_var (target
, vec
, elt
));
7768 emit_insn (gen_vsx_extract_v4sf_var (target
, vec
, elt
));
7772 emit_insn (gen_vsx_extract_v4si_var (target
, vec
, elt
));
7776 emit_insn (gen_vsx_extract_v8hi_var (target
, vec
, elt
));
7780 emit_insn (gen_vsx_extract_v16qi_var (target
, vec
, elt
));
7788 gcc_assert (CONST_INT_P (elt
));
7790 /* Allocate mode-sized buffer. */
7791 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
7793 emit_move_insn (mem
, vec
);
7795 /* Add offset to field within buffer matching vector element. */
7796 mem
= adjust_address_nv (mem
, inner_mode
,
7797 INTVAL (elt
) * GET_MODE_SIZE (inner_mode
));
7799 emit_move_insn (target
, adjust_address_nv (mem
, inner_mode
, 0));
7802 /* Helper function to return the register number of a RTX. */
7804 regno_or_subregno (rtx op
)
7808 else if (SUBREG_P (op
))
7809 return subreg_regno (op
);
7814 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7815 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7816 temporary (BASE_TMP) to fixup the address. Return the new memory address
7817 that is valid for reads or writes to a given register (SCALAR_REG). */
7820 rs6000_adjust_vec_address (rtx scalar_reg
,
7824 machine_mode scalar_mode
)
7826 unsigned scalar_size
= GET_MODE_SIZE (scalar_mode
);
7827 rtx addr
= XEXP (mem
, 0);
7832 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7833 gcc_assert (GET_RTX_CLASS (GET_CODE (addr
)) != RTX_AUTOINC
);
7835 /* Calculate what we need to add to the address to get the element
7837 if (CONST_INT_P (element
))
7838 element_offset
= GEN_INT (INTVAL (element
) * scalar_size
);
7841 int byte_shift
= exact_log2 (scalar_size
);
7842 gcc_assert (byte_shift
>= 0);
7844 if (byte_shift
== 0)
7845 element_offset
= element
;
7849 if (TARGET_POWERPC64
)
7850 emit_insn (gen_ashldi3 (base_tmp
, element
, GEN_INT (byte_shift
)));
7852 emit_insn (gen_ashlsi3 (base_tmp
, element
, GEN_INT (byte_shift
)));
7854 element_offset
= base_tmp
;
7858 /* Create the new address pointing to the element within the vector. If we
7859 are adding 0, we don't have to change the address. */
7860 if (element_offset
== const0_rtx
)
7863 /* A simple indirect address can be converted into a reg + offset
7865 else if (REG_P (addr
) || SUBREG_P (addr
))
7866 new_addr
= gen_rtx_PLUS (Pmode
, addr
, element_offset
);
7868 /* Optimize D-FORM addresses with constant offset with a constant element, to
7869 include the element offset in the address directly. */
7870 else if (GET_CODE (addr
) == PLUS
)
7872 rtx op0
= XEXP (addr
, 0);
7873 rtx op1
= XEXP (addr
, 1);
7876 gcc_assert (REG_P (op0
) || SUBREG_P (op0
));
7877 if (CONST_INT_P (op1
) && CONST_INT_P (element_offset
))
7879 HOST_WIDE_INT offset
= INTVAL (op1
) + INTVAL (element_offset
);
7880 rtx offset_rtx
= GEN_INT (offset
);
7882 if (IN_RANGE (offset
, -32768, 32767)
7883 && (scalar_size
< 8 || (offset
& 0x3) == 0))
7884 new_addr
= gen_rtx_PLUS (Pmode
, op0
, offset_rtx
);
7887 emit_move_insn (base_tmp
, offset_rtx
);
7888 new_addr
= gen_rtx_PLUS (Pmode
, op0
, base_tmp
);
7893 bool op1_reg_p
= (REG_P (op1
) || SUBREG_P (op1
));
7894 bool ele_reg_p
= (REG_P (element_offset
) || SUBREG_P (element_offset
));
7896 /* Note, ADDI requires the register being added to be a base
7897 register. If the register was R0, load it up into the temporary
7900 && (ele_reg_p
|| reg_or_subregno (op1
) != FIRST_GPR_REGNO
))
7902 insn
= gen_add3_insn (base_tmp
, op1
, element_offset
);
7903 gcc_assert (insn
!= NULL_RTX
);
7908 && reg_or_subregno (element_offset
) != FIRST_GPR_REGNO
)
7910 insn
= gen_add3_insn (base_tmp
, element_offset
, op1
);
7911 gcc_assert (insn
!= NULL_RTX
);
7917 emit_move_insn (base_tmp
, op1
);
7918 emit_insn (gen_add2_insn (base_tmp
, element_offset
));
7921 new_addr
= gen_rtx_PLUS (Pmode
, op0
, base_tmp
);
7927 emit_move_insn (base_tmp
, addr
);
7928 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
7931 /* If we have a PLUS, we need to see whether the particular register class
7932 allows for D-FORM or X-FORM addressing. */
7933 if (GET_CODE (new_addr
) == PLUS
)
7935 rtx op1
= XEXP (new_addr
, 1);
7936 addr_mask_type addr_mask
;
7937 int scalar_regno
= regno_or_subregno (scalar_reg
);
7939 gcc_assert (scalar_regno
< FIRST_PSEUDO_REGISTER
);
7940 if (INT_REGNO_P (scalar_regno
))
7941 addr_mask
= reg_addr
[scalar_mode
].addr_mask
[RELOAD_REG_GPR
];
7943 else if (FP_REGNO_P (scalar_regno
))
7944 addr_mask
= reg_addr
[scalar_mode
].addr_mask
[RELOAD_REG_FPR
];
7946 else if (ALTIVEC_REGNO_P (scalar_regno
))
7947 addr_mask
= reg_addr
[scalar_mode
].addr_mask
[RELOAD_REG_VMX
];
7952 if (REG_P (op1
) || SUBREG_P (op1
))
7953 valid_addr_p
= (addr_mask
& RELOAD_REG_INDEXED
) != 0;
7955 valid_addr_p
= (addr_mask
& RELOAD_REG_OFFSET
) != 0;
7958 else if (REG_P (new_addr
) || SUBREG_P (new_addr
))
7959 valid_addr_p
= true;
7962 valid_addr_p
= false;
7966 emit_move_insn (base_tmp
, new_addr
);
7967 new_addr
= base_tmp
;
7970 return change_address (mem
, scalar_mode
, new_addr
);
7973 /* Split a variable vec_extract operation into the component instructions. */
7976 rs6000_split_vec_extract_var (rtx dest
, rtx src
, rtx element
, rtx tmp_gpr
,
7979 machine_mode mode
= GET_MODE (src
);
7980 machine_mode scalar_mode
= GET_MODE (dest
);
7981 unsigned scalar_size
= GET_MODE_SIZE (scalar_mode
);
7982 int byte_shift
= exact_log2 (scalar_size
);
7984 gcc_assert (byte_shift
>= 0);
7986 /* If we are given a memory address, optimize to load just the element. We
7987 don't have to adjust the vector element number on little endian
7991 gcc_assert (REG_P (tmp_gpr
));
7992 emit_move_insn (dest
, rs6000_adjust_vec_address (dest
, src
, element
,
7993 tmp_gpr
, scalar_mode
));
7997 else if (REG_P (src
) || SUBREG_P (src
))
7999 int bit_shift
= byte_shift
+ 3;
8001 int dest_regno
= regno_or_subregno (dest
);
8002 int src_regno
= regno_or_subregno (src
);
8003 int element_regno
= regno_or_subregno (element
);
8005 gcc_assert (REG_P (tmp_gpr
));
8007 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
8008 a general purpose register. */
8009 if (TARGET_P9_VECTOR
8010 && (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
8011 && INT_REGNO_P (dest_regno
)
8012 && ALTIVEC_REGNO_P (src_regno
)
8013 && INT_REGNO_P (element_regno
))
8015 rtx dest_si
= gen_rtx_REG (SImode
, dest_regno
);
8016 rtx element_si
= gen_rtx_REG (SImode
, element_regno
);
8018 if (mode
== V16QImode
)
8019 emit_insn (VECTOR_ELT_ORDER_BIG
8020 ? gen_vextublx (dest_si
, element_si
, src
)
8021 : gen_vextubrx (dest_si
, element_si
, src
));
8023 else if (mode
== V8HImode
)
8025 rtx tmp_gpr_si
= gen_rtx_REG (SImode
, REGNO (tmp_gpr
));
8026 emit_insn (gen_ashlsi3 (tmp_gpr_si
, element_si
, const1_rtx
));
8027 emit_insn (VECTOR_ELT_ORDER_BIG
8028 ? gen_vextuhlx (dest_si
, tmp_gpr_si
, src
)
8029 : gen_vextuhrx (dest_si
, tmp_gpr_si
, src
));
8035 rtx tmp_gpr_si
= gen_rtx_REG (SImode
, REGNO (tmp_gpr
));
8036 emit_insn (gen_ashlsi3 (tmp_gpr_si
, element_si
, const2_rtx
));
8037 emit_insn (VECTOR_ELT_ORDER_BIG
8038 ? gen_vextuwlx (dest_si
, tmp_gpr_si
, src
)
8039 : gen_vextuwrx (dest_si
, tmp_gpr_si
, src
));
8046 gcc_assert (REG_P (tmp_altivec
));
8048 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
8049 an XOR, otherwise we need to subtract. The shift amount is so VSLO
8050 will shift the element into the upper position (adding 3 to convert a
8051 byte shift into a bit shift). */
8052 if (scalar_size
== 8)
8054 if (!VECTOR_ELT_ORDER_BIG
)
8056 emit_insn (gen_xordi3 (tmp_gpr
, element
, const1_rtx
));
8062 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
8064 emit_insn (gen_rtx_SET (tmp_gpr
,
8065 gen_rtx_AND (DImode
,
8066 gen_rtx_ASHIFT (DImode
,
8073 if (!VECTOR_ELT_ORDER_BIG
)
8075 rtx num_ele_m1
= GEN_INT (GET_MODE_NUNITS (mode
) - 1);
8077 emit_insn (gen_anddi3 (tmp_gpr
, element
, num_ele_m1
));
8078 emit_insn (gen_subdi3 (tmp_gpr
, num_ele_m1
, tmp_gpr
));
8084 emit_insn (gen_ashldi3 (tmp_gpr
, element2
, GEN_INT (bit_shift
)));
8087 /* Get the value into the lower byte of the Altivec register where VSLO
8089 if (TARGET_P9_VECTOR
)
8090 emit_insn (gen_vsx_splat_v2di (tmp_altivec
, tmp_gpr
));
8091 else if (can_create_pseudo_p ())
8092 emit_insn (gen_vsx_concat_v2di (tmp_altivec
, tmp_gpr
, tmp_gpr
));
8095 rtx tmp_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
8096 emit_move_insn (tmp_di
, tmp_gpr
);
8097 emit_insn (gen_vsx_concat_v2di (tmp_altivec
, tmp_di
, tmp_di
));
8100 /* Do the VSLO to get the value into the final location. */
8104 emit_insn (gen_vsx_vslo_v2df (dest
, src
, tmp_altivec
));
8108 emit_insn (gen_vsx_vslo_v2di (dest
, src
, tmp_altivec
));
8113 rtx tmp_altivec_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
8114 rtx tmp_altivec_v4sf
= gen_rtx_REG (V4SFmode
, REGNO (tmp_altivec
));
8115 rtx src_v2di
= gen_rtx_REG (V2DImode
, REGNO (src
));
8116 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di
, src_v2di
,
8119 emit_insn (gen_vsx_xscvspdp_scalar2 (dest
, tmp_altivec_v4sf
));
8127 rtx tmp_altivec_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
8128 rtx src_v2di
= gen_rtx_REG (V2DImode
, REGNO (src
));
8129 rtx tmp_gpr_di
= gen_rtx_REG (DImode
, REGNO (dest
));
8130 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di
, src_v2di
,
8132 emit_move_insn (tmp_gpr_di
, tmp_altivec_di
);
8133 emit_insn (gen_ashrdi3 (tmp_gpr_di
, tmp_gpr_di
,
8134 GEN_INT (64 - (8 * scalar_size
))));
8148 /* Helper function for rs6000_split_v4si_init to build up a DImode value from
8149 two SImode values. */
8152 rs6000_split_v4si_init_di_reg (rtx dest
, rtx si1
, rtx si2
, rtx tmp
)
8154 const unsigned HOST_WIDE_INT mask_32bit
= HOST_WIDE_INT_C (0xffffffff);
8156 if (CONST_INT_P (si1
) && CONST_INT_P (si2
))
8158 unsigned HOST_WIDE_INT const1
= (UINTVAL (si1
) & mask_32bit
) << 32;
8159 unsigned HOST_WIDE_INT const2
= UINTVAL (si2
) & mask_32bit
;
8161 emit_move_insn (dest
, GEN_INT (const1
| const2
));
8165 /* Put si1 into upper 32-bits of dest. */
8166 if (CONST_INT_P (si1
))
8167 emit_move_insn (dest
, GEN_INT ((UINTVAL (si1
) & mask_32bit
) << 32));
8170 /* Generate RLDIC. */
8171 rtx si1_di
= gen_rtx_REG (DImode
, regno_or_subregno (si1
));
8172 rtx shift_rtx
= gen_rtx_ASHIFT (DImode
, si1_di
, GEN_INT (32));
8173 rtx mask_rtx
= GEN_INT (mask_32bit
<< 32);
8174 rtx and_rtx
= gen_rtx_AND (DImode
, shift_rtx
, mask_rtx
);
8175 gcc_assert (!reg_overlap_mentioned_p (dest
, si1
));
8176 emit_insn (gen_rtx_SET (dest
, and_rtx
));
8179 /* Put si2 into the temporary. */
8180 gcc_assert (!reg_overlap_mentioned_p (dest
, tmp
));
8181 if (CONST_INT_P (si2
))
8182 emit_move_insn (tmp
, GEN_INT (UINTVAL (si2
) & mask_32bit
));
8184 emit_insn (gen_zero_extendsidi2 (tmp
, si2
));
8186 /* Combine the two parts. */
8187 emit_insn (gen_iordi3 (dest
, dest
, tmp
));
8191 /* Split a V4SI initialization. */
8194 rs6000_split_v4si_init (rtx operands
[])
8196 rtx dest
= operands
[0];
8198 /* Destination is a GPR, build up the two DImode parts in place. */
8199 if (REG_P (dest
) || SUBREG_P (dest
))
8201 int d_regno
= regno_or_subregno (dest
);
8202 rtx scalar1
= operands
[1];
8203 rtx scalar2
= operands
[2];
8204 rtx scalar3
= operands
[3];
8205 rtx scalar4
= operands
[4];
8206 rtx tmp1
= operands
[5];
8207 rtx tmp2
= operands
[6];
8209 /* Even though we only need one temporary (plus the destination, which
8210 has an early clobber constraint, try to use two temporaries, one for
8211 each double word created. That way the 2nd insn scheduling pass can
8212 rearrange things so the two parts are done in parallel. */
8213 if (BYTES_BIG_ENDIAN
)
8215 rtx di_lo
= gen_rtx_REG (DImode
, d_regno
);
8216 rtx di_hi
= gen_rtx_REG (DImode
, d_regno
+ 1);
8217 rs6000_split_v4si_init_di_reg (di_lo
, scalar1
, scalar2
, tmp1
);
8218 rs6000_split_v4si_init_di_reg (di_hi
, scalar3
, scalar4
, tmp2
);
8222 rtx di_lo
= gen_rtx_REG (DImode
, d_regno
+ 1);
8223 rtx di_hi
= gen_rtx_REG (DImode
, d_regno
);
8224 gcc_assert (!VECTOR_ELT_ORDER_BIG
);
8225 rs6000_split_v4si_init_di_reg (di_lo
, scalar4
, scalar3
, tmp1
);
8226 rs6000_split_v4si_init_di_reg (di_hi
, scalar2
, scalar1
, tmp2
);
8235 /* Return TRUE if OP is an invalid SUBREG operation on the e500. */
8238 invalid_e500_subreg (rtx op
, machine_mode mode
)
8240 if (TARGET_E500_DOUBLE
)
8242 /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
8243 subreg:TI and reg:TF. Decimal float modes are like integer
8244 modes (only low part of each register used) for this
8246 if (GET_CODE (op
) == SUBREG
8247 && (mode
== SImode
|| mode
== DImode
|| mode
== TImode
8248 || mode
== DDmode
|| mode
== TDmode
|| mode
== PTImode
)
8249 && REG_P (SUBREG_REG (op
))
8250 && (GET_MODE (SUBREG_REG (op
)) == DFmode
8251 || GET_MODE (SUBREG_REG (op
)) == TFmode
8252 || GET_MODE (SUBREG_REG (op
)) == IFmode
8253 || GET_MODE (SUBREG_REG (op
)) == KFmode
))
8256 /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
8258 if (GET_CODE (op
) == SUBREG
8259 && (mode
== DFmode
|| mode
== TFmode
|| mode
== IFmode
8261 && REG_P (SUBREG_REG (op
))
8262 && (GET_MODE (SUBREG_REG (op
)) == DImode
8263 || GET_MODE (SUBREG_REG (op
)) == TImode
8264 || GET_MODE (SUBREG_REG (op
)) == PTImode
8265 || GET_MODE (SUBREG_REG (op
)) == DDmode
8266 || GET_MODE (SUBREG_REG (op
)) == TDmode
))
8271 && GET_CODE (op
) == SUBREG
8273 && REG_P (SUBREG_REG (op
))
8274 && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op
))))
8280 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
8281 selects whether the alignment is abi mandated, optional, or
8282 both abi and optional alignment. */
8285 rs6000_data_alignment (tree type
, unsigned int align
, enum data_align how
)
8287 if (how
!= align_opt
)
8289 if (TREE_CODE (type
) == VECTOR_TYPE
)
8291 if ((TARGET_SPE
&& SPE_VECTOR_MODE (TYPE_MODE (type
)))
8292 || (TARGET_PAIRED_FLOAT
&& PAIRED_VECTOR_MODE (TYPE_MODE (type
))))
8297 else if (align
< 128)
8300 else if (TARGET_E500_DOUBLE
8301 && TREE_CODE (type
) == REAL_TYPE
8302 && TYPE_MODE (type
) == DFmode
)
8309 if (how
!= align_abi
)
8311 if (TREE_CODE (type
) == ARRAY_TYPE
8312 && TYPE_MODE (TREE_TYPE (type
)) == QImode
)
8314 if (align
< BITS_PER_WORD
)
8315 align
= BITS_PER_WORD
;
8322 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
8325 rs6000_special_adjust_field_align_p (tree type
, unsigned int computed
)
8327 if (TARGET_ALTIVEC
&& TREE_CODE (type
) == VECTOR_TYPE
)
8329 if (computed
!= 128)
8332 if (!warned
&& warn_psabi
)
8335 inform (input_location
,
8336 "the layout of aggregates containing vectors with"
8337 " %d-byte alignment has changed in GCC 5",
8338 computed
/ BITS_PER_UNIT
);
8341 /* In current GCC there is no special case. */
8348 /* AIX increases natural record alignment to doubleword if the first
8349 field is an FP double while the FP fields remain word aligned. */
8352 rs6000_special_round_type_align (tree type
, unsigned int computed
,
8353 unsigned int specified
)
8355 unsigned int align
= MAX (computed
, specified
);
8356 tree field
= TYPE_FIELDS (type
);
8358 /* Skip all non field decls */
8359 while (field
!= NULL
&& TREE_CODE (field
) != FIELD_DECL
)
8360 field
= DECL_CHAIN (field
);
8362 if (field
!= NULL
&& field
!= type
)
8364 type
= TREE_TYPE (field
);
8365 while (TREE_CODE (type
) == ARRAY_TYPE
)
8366 type
= TREE_TYPE (type
);
8368 if (type
!= error_mark_node
&& TYPE_MODE (type
) == DFmode
)
8369 align
= MAX (align
, 64);
8375 /* Darwin increases record alignment to the natural alignment of
8379 darwin_rs6000_special_round_type_align (tree type
, unsigned int computed
,
8380 unsigned int specified
)
8382 unsigned int align
= MAX (computed
, specified
);
8384 if (TYPE_PACKED (type
))
8387 /* Find the first field, looking down into aggregates. */
8389 tree field
= TYPE_FIELDS (type
);
8390 /* Skip all non field decls */
8391 while (field
!= NULL
&& TREE_CODE (field
) != FIELD_DECL
)
8392 field
= DECL_CHAIN (field
);
8395 /* A packed field does not contribute any extra alignment. */
8396 if (DECL_PACKED (field
))
8398 type
= TREE_TYPE (field
);
8399 while (TREE_CODE (type
) == ARRAY_TYPE
)
8400 type
= TREE_TYPE (type
);
8401 } while (AGGREGATE_TYPE_P (type
));
8403 if (! AGGREGATE_TYPE_P (type
) && type
!= error_mark_node
)
8404 align
= MAX (align
, TYPE_ALIGN (type
));
8409 /* Return 1 for an operand in small memory on V.4/eabi. */
8412 small_data_operand (rtx op ATTRIBUTE_UNUSED
,
8413 machine_mode mode ATTRIBUTE_UNUSED
)
8418 if (rs6000_sdata
== SDATA_NONE
|| rs6000_sdata
== SDATA_DATA
)
8421 if (DEFAULT_ABI
!= ABI_V4
)
8424 /* Vector and float memory instructions have a limited offset on the
8425 SPE, so using a vector or float variable directly as an operand is
8428 && (SPE_VECTOR_MODE (mode
) || FLOAT_MODE_P (mode
)))
8431 if (GET_CODE (op
) == SYMBOL_REF
)
8434 else if (GET_CODE (op
) != CONST
8435 || GET_CODE (XEXP (op
, 0)) != PLUS
8436 || GET_CODE (XEXP (XEXP (op
, 0), 0)) != SYMBOL_REF
8437 || GET_CODE (XEXP (XEXP (op
, 0), 1)) != CONST_INT
)
8442 rtx sum
= XEXP (op
, 0);
8443 HOST_WIDE_INT summand
;
8445 /* We have to be careful here, because it is the referenced address
8446 that must be 32k from _SDA_BASE_, not just the symbol. */
8447 summand
= INTVAL (XEXP (sum
, 1));
8448 if (summand
< 0 || summand
> g_switch_value
)
8451 sym_ref
= XEXP (sum
, 0);
8454 return SYMBOL_REF_SMALL_P (sym_ref
);
8460 /* Return true if either operand is a general purpose register. */
8463 gpr_or_gpr_p (rtx op0
, rtx op1
)
8465 return ((REG_P (op0
) && INT_REGNO_P (REGNO (op0
)))
8466 || (REG_P (op1
) && INT_REGNO_P (REGNO (op1
))));
8469 /* Return true if this is a move direct operation between GPR registers and
8470 floating point/VSX registers. */
8473 direct_move_p (rtx op0
, rtx op1
)
8477 if (!REG_P (op0
) || !REG_P (op1
))
8480 if (!TARGET_DIRECT_MOVE
&& !TARGET_MFPGPR
)
8483 regno0
= REGNO (op0
);
8484 regno1
= REGNO (op1
);
8485 if (regno0
>= FIRST_PSEUDO_REGISTER
|| regno1
>= FIRST_PSEUDO_REGISTER
)
8488 if (INT_REGNO_P (regno0
))
8489 return (TARGET_DIRECT_MOVE
) ? VSX_REGNO_P (regno1
) : FP_REGNO_P (regno1
);
8491 else if (INT_REGNO_P (regno1
))
8493 if (TARGET_MFPGPR
&& FP_REGNO_P (regno0
))
8496 else if (TARGET_DIRECT_MOVE
&& VSX_REGNO_P (regno0
))
8503 /* Return true if the OFFSET is valid for the quad address instructions that
8504 use d-form (register + offset) addressing. */
8507 quad_address_offset_p (HOST_WIDE_INT offset
)
8509 return (IN_RANGE (offset
, -32768, 32767) && ((offset
) & 0xf) == 0);
8512 /* Return true if the ADDR is an acceptable address for a quad memory
8513 operation of mode MODE (either LQ/STQ for general purpose registers, or
8514 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
8515 is intended for LQ/STQ. If it is false, the address is intended for the ISA
8516 3.0 LXV/STXV instruction. */
8519 quad_address_p (rtx addr
, machine_mode mode
, bool strict
)
8523 if (GET_MODE_SIZE (mode
) != 16)
8526 if (legitimate_indirect_address_p (addr
, strict
))
8529 if (VECTOR_MODE_P (mode
) && !mode_supports_vsx_dform_quad (mode
))
8532 if (GET_CODE (addr
) != PLUS
)
8535 op0
= XEXP (addr
, 0);
8536 if (!REG_P (op0
) || !INT_REG_OK_FOR_BASE_P (op0
, strict
))
8539 op1
= XEXP (addr
, 1);
8540 if (!CONST_INT_P (op1
))
8543 return quad_address_offset_p (INTVAL (op1
));
8546 /* Return true if this is a load or store quad operation. This function does
8547 not handle the atomic quad memory instructions. */
8550 quad_load_store_p (rtx op0
, rtx op1
)
8554 if (!TARGET_QUAD_MEMORY
)
8557 else if (REG_P (op0
) && MEM_P (op1
))
8558 ret
= (quad_int_reg_operand (op0
, GET_MODE (op0
))
8559 && quad_memory_operand (op1
, GET_MODE (op1
))
8560 && !reg_overlap_mentioned_p (op0
, op1
));
8562 else if (MEM_P (op0
) && REG_P (op1
))
8563 ret
= (quad_memory_operand (op0
, GET_MODE (op0
))
8564 && quad_int_reg_operand (op1
, GET_MODE (op1
)));
8569 if (TARGET_DEBUG_ADDR
)
8571 fprintf (stderr
, "\n========== quad_load_store, return %s\n",
8572 ret
? "true" : "false");
8573 debug_rtx (gen_rtx_SET (op0
, op1
));
8579 /* Given an address, return a constant offset term if one exists. */
8582 address_offset (rtx op
)
8584 if (GET_CODE (op
) == PRE_INC
8585 || GET_CODE (op
) == PRE_DEC
)
8587 else if (GET_CODE (op
) == PRE_MODIFY
8588 || GET_CODE (op
) == LO_SUM
)
8591 if (GET_CODE (op
) == CONST
)
8594 if (GET_CODE (op
) == PLUS
)
8597 if (CONST_INT_P (op
))
8603 /* Return true if the MEM operand is a memory operand suitable for use
8604 with a (full width, possibly multiple) gpr load/store. On
8605 powerpc64 this means the offset must be divisible by 4.
8606 Implements 'Y' constraint.
8608 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8609 a constraint function we know the operand has satisfied a suitable
8610 memory predicate. Also accept some odd rtl generated by reload
8611 (see rs6000_legitimize_reload_address for various forms). It is
8612 important that reload rtl be accepted by appropriate constraints
8613 but not by the operand predicate.
8615 Offsetting a lo_sum should not be allowed, except where we know by
8616 alignment that a 32k boundary is not crossed, but see the ???
8617 comment in rs6000_legitimize_reload_address. Note that by
8618 "offsetting" here we mean a further offset to access parts of the
8619 MEM. It's fine to have a lo_sum where the inner address is offset
8620 from a sym, since the same sym+offset will appear in the high part
8621 of the address calculation. */
8624 mem_operand_gpr (rtx op
, machine_mode mode
)
8626 unsigned HOST_WIDE_INT offset
;
8628 rtx addr
= XEXP (op
, 0);
8630 op
= address_offset (addr
);
8634 offset
= INTVAL (op
);
8635 if (TARGET_POWERPC64
&& (offset
& 3) != 0)
8638 extra
= GET_MODE_SIZE (mode
) - UNITS_PER_WORD
;
8642 if (GET_CODE (addr
) == LO_SUM
)
8643 /* For lo_sum addresses, we must allow any offset except one that
8644 causes a wrap, so test only the low 16 bits. */
8645 offset
= ((offset
& 0xffff) ^ 0x8000) - 0x8000;
8647 return offset
+ 0x8000 < 0x10000u
- extra
;
8650 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8651 enforce an offset divisible by 4 even for 32-bit. */
8654 mem_operand_ds_form (rtx op
, machine_mode mode
)
8656 unsigned HOST_WIDE_INT offset
;
8658 rtx addr
= XEXP (op
, 0);
8660 if (!offsettable_address_p (false, mode
, addr
))
8663 op
= address_offset (addr
);
8667 offset
= INTVAL (op
);
8668 if ((offset
& 3) != 0)
8671 extra
= GET_MODE_SIZE (mode
) - UNITS_PER_WORD
;
8675 if (GET_CODE (addr
) == LO_SUM
)
8676 /* For lo_sum addresses, we must allow any offset except one that
8677 causes a wrap, so test only the low 16 bits. */
8678 offset
= ((offset
& 0xffff) ^ 0x8000) - 0x8000;
8680 return offset
+ 0x8000 < 0x10000u
- extra
;
8683 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8686 reg_offset_addressing_ok_p (machine_mode mode
)
8700 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8701 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8702 a vector mode, if we want to use the VSX registers to move it around,
8703 we need to restrict ourselves to reg+reg addressing. Similarly for
8704 IEEE 128-bit floating point that is passed in a single vector
8706 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
))
8707 return mode_supports_vsx_dform_quad (mode
);
8714 /* Paired vector modes. Only reg+reg addressing is valid. */
8715 if (TARGET_PAIRED_FLOAT
)
8720 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8721 addressing for the LFIWZX and STFIWX instructions. */
8722 if (TARGET_NO_SDMODE_STACK
)
8734 virtual_stack_registers_memory_p (rtx op
)
8738 if (GET_CODE (op
) == REG
)
8739 regnum
= REGNO (op
);
8741 else if (GET_CODE (op
) == PLUS
8742 && GET_CODE (XEXP (op
, 0)) == REG
8743 && GET_CODE (XEXP (op
, 1)) == CONST_INT
)
8744 regnum
= REGNO (XEXP (op
, 0));
8749 return (regnum
>= FIRST_VIRTUAL_REGISTER
8750 && regnum
<= LAST_VIRTUAL_POINTER_REGISTER
);
8753 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8754 is known to not straddle a 32k boundary. This function is used
8755 to determine whether -mcmodel=medium code can use TOC pointer
8756 relative addressing for OP. This means the alignment of the TOC
8757 pointer must also be taken into account, and unfortunately that is
8760 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8761 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8765 offsettable_ok_by_alignment (rtx op
, HOST_WIDE_INT offset
,
8769 unsigned HOST_WIDE_INT dsize
, dalign
, lsb
, mask
;
8771 if (GET_CODE (op
) != SYMBOL_REF
)
8774 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8776 if (mode_supports_vsx_dform_quad (mode
))
8779 dsize
= GET_MODE_SIZE (mode
);
8780 decl
= SYMBOL_REF_DECL (op
);
8786 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8787 replacing memory addresses with an anchor plus offset. We
8788 could find the decl by rummaging around in the block->objects
8789 VEC for the given offset but that seems like too much work. */
8790 dalign
= BITS_PER_UNIT
;
8791 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op
)
8792 && SYMBOL_REF_ANCHOR_P (op
)
8793 && SYMBOL_REF_BLOCK (op
) != NULL
)
8795 struct object_block
*block
= SYMBOL_REF_BLOCK (op
);
8797 dalign
= block
->alignment
;
8798 offset
+= SYMBOL_REF_BLOCK_OFFSET (op
);
8800 else if (CONSTANT_POOL_ADDRESS_P (op
))
8802 /* It would be nice to have get_pool_align().. */
8803 machine_mode cmode
= get_pool_mode (op
);
8805 dalign
= GET_MODE_ALIGNMENT (cmode
);
8808 else if (DECL_P (decl
))
8810 dalign
= DECL_ALIGN (decl
);
8814 /* Allow BLKmode when the entire object is known to not
8815 cross a 32k boundary. */
8816 if (!DECL_SIZE_UNIT (decl
))
8819 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl
)))
8822 dsize
= tree_to_uhwi (DECL_SIZE_UNIT (decl
));
8826 dalign
/= BITS_PER_UNIT
;
8827 if (dalign
> POWERPC64_TOC_POINTER_ALIGNMENT
)
8828 dalign
= POWERPC64_TOC_POINTER_ALIGNMENT
;
8829 return dalign
>= dsize
;
8835 /* Find how many bits of the alignment we know for this access. */
8836 dalign
/= BITS_PER_UNIT
;
8837 if (dalign
> POWERPC64_TOC_POINTER_ALIGNMENT
)
8838 dalign
= POWERPC64_TOC_POINTER_ALIGNMENT
;
8840 lsb
= offset
& -offset
;
8844 return dalign
>= dsize
;
8848 constant_pool_expr_p (rtx op
)
8852 split_const (op
, &base
, &offset
);
8853 return (GET_CODE (base
) == SYMBOL_REF
8854 && CONSTANT_POOL_ADDRESS_P (base
)
8855 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base
), Pmode
));
8858 static const_rtx tocrel_base
, tocrel_offset
;
8860 /* Return true if OP is a toc pointer relative address (the output
8861 of create_TOC_reference). If STRICT, do not match non-split
8862 -mcmodel=large/medium toc pointer relative addresses. */
8865 toc_relative_expr_p (const_rtx op
, bool strict
)
8870 if (TARGET_CMODEL
!= CMODEL_SMALL
)
8872 /* When strict ensure we have everything tidy. */
8874 && !(GET_CODE (op
) == LO_SUM
8875 && REG_P (XEXP (op
, 0))
8876 && INT_REG_OK_FOR_BASE_P (XEXP (op
, 0), strict
)))
8879 /* When not strict, allow non-split TOC addresses and also allow
8880 (lo_sum (high ..)) TOC addresses created during reload. */
8881 if (GET_CODE (op
) == LO_SUM
)
8886 tocrel_offset
= const0_rtx
;
8887 if (GET_CODE (op
) == PLUS
&& add_cint_operand (XEXP (op
, 1), GET_MODE (op
)))
8889 tocrel_base
= XEXP (op
, 0);
8890 tocrel_offset
= XEXP (op
, 1);
8893 return (GET_CODE (tocrel_base
) == UNSPEC
8894 && XINT (tocrel_base
, 1) == UNSPEC_TOCREL
);
8897 /* Return true if X is a constant pool address, and also for cmodel=medium
8898 if X is a toc-relative address known to be offsettable within MODE. */
8901 legitimate_constant_pool_address_p (const_rtx x
, machine_mode mode
,
8904 return (toc_relative_expr_p (x
, strict
)
8905 && (TARGET_CMODEL
!= CMODEL_MEDIUM
8906 || constant_pool_expr_p (XVECEXP (tocrel_base
, 0, 0))
8908 || offsettable_ok_by_alignment (XVECEXP (tocrel_base
, 0, 0),
8909 INTVAL (tocrel_offset
), mode
)));
8913 legitimate_small_data_p (machine_mode mode
, rtx x
)
8915 return (DEFAULT_ABI
== ABI_V4
8916 && !flag_pic
&& !TARGET_TOC
8917 && (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == CONST
)
8918 && small_data_operand (x
, mode
));
8921 /* SPE offset addressing is limited to 5-bits worth of double words. */
8922 #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
8925 rs6000_legitimate_offset_address_p (machine_mode mode
, rtx x
,
8926 bool strict
, bool worst_case
)
8928 unsigned HOST_WIDE_INT offset
;
8931 if (GET_CODE (x
) != PLUS
)
8933 if (!REG_P (XEXP (x
, 0)))
8935 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), strict
))
8937 if (mode_supports_vsx_dform_quad (mode
))
8938 return quad_address_p (x
, mode
, strict
);
8939 if (!reg_offset_addressing_ok_p (mode
))
8940 return virtual_stack_registers_memory_p (x
);
8941 if (legitimate_constant_pool_address_p (x
, mode
, strict
|| lra_in_progress
))
8943 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
8946 offset
= INTVAL (XEXP (x
, 1));
8954 /* SPE vector modes. */
8955 return SPE_CONST_OFFSET_OK (offset
);
8960 /* On e500v2, we may have:
8962 (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
8964 Which gets addressed with evldd instructions. */
8965 if (TARGET_E500_DOUBLE
)
8966 return SPE_CONST_OFFSET_OK (offset
);
8968 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8970 if (VECTOR_MEM_VSX_P (mode
))
8975 if (!TARGET_POWERPC64
)
8977 else if (offset
& 3)
8987 if (TARGET_E500_DOUBLE
)
8988 return (SPE_CONST_OFFSET_OK (offset
)
8989 && SPE_CONST_OFFSET_OK (offset
+ 8));
8994 if (!TARGET_POWERPC64
)
8996 else if (offset
& 3)
9005 return offset
< 0x10000 - extra
;
9009 legitimate_indexed_address_p (rtx x
, int strict
)
9013 if (GET_CODE (x
) != PLUS
)
9019 /* Recognize the rtl generated by reload which we know will later be
9020 replaced with proper base and index regs. */
9022 && reload_in_progress
9023 && (REG_P (op0
) || GET_CODE (op0
) == PLUS
)
9027 return (REG_P (op0
) && REG_P (op1
)
9028 && ((INT_REG_OK_FOR_BASE_P (op0
, strict
)
9029 && INT_REG_OK_FOR_INDEX_P (op1
, strict
))
9030 || (INT_REG_OK_FOR_BASE_P (op1
, strict
)
9031 && INT_REG_OK_FOR_INDEX_P (op0
, strict
))));
9035 avoiding_indexed_address_p (machine_mode mode
)
9037 /* Avoid indexed addressing for modes that have non-indexed
9038 load/store instruction forms. */
9039 return (TARGET_AVOID_XFORM
&& VECTOR_MEM_NONE_P (mode
));
9043 legitimate_indirect_address_p (rtx x
, int strict
)
9045 return GET_CODE (x
) == REG
&& INT_REG_OK_FOR_BASE_P (x
, strict
);
9049 macho_lo_sum_memory_operand (rtx x
, machine_mode mode
)
9051 if (!TARGET_MACHO
|| !flag_pic
9052 || mode
!= SImode
|| GET_CODE (x
) != MEM
)
9056 if (GET_CODE (x
) != LO_SUM
)
9058 if (GET_CODE (XEXP (x
, 0)) != REG
)
9060 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), 0))
9064 return CONSTANT_P (x
);
9068 legitimate_lo_sum_address_p (machine_mode mode
, rtx x
, int strict
)
9070 if (GET_CODE (x
) != LO_SUM
)
9072 if (GET_CODE (XEXP (x
, 0)) != REG
)
9074 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), strict
))
9076 /* quad word addresses are restricted, and we can't use LO_SUM. */
9077 if (mode_supports_vsx_dform_quad (mode
))
9079 /* Restrict addressing for DI because of our SUBREG hackery. */
9080 if (TARGET_E500_DOUBLE
&& GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
9084 if (TARGET_ELF
|| TARGET_MACHO
)
9088 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
)
9090 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
9091 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
9092 recognizes some LO_SUM addresses as valid although this
9093 function says opposite. In most cases, LRA through different
9094 transformations can generate correct code for address reloads.
9095 It can not manage only some LO_SUM cases. So we need to add
9096 code analogous to one in rs6000_legitimize_reload_address for
9097 LOW_SUM here saying that some addresses are still valid. */
9098 large_toc_ok
= (lra_in_progress
&& TARGET_CMODEL
!= CMODEL_SMALL
9099 && small_toc_ref (x
, VOIDmode
));
9100 if (TARGET_TOC
&& ! large_toc_ok
)
9102 if (GET_MODE_NUNITS (mode
) != 1)
9104 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
9105 && !(/* ??? Assume floating point reg based on mode? */
9106 TARGET_HARD_FLOAT
&& TARGET_FPRS
&& TARGET_DOUBLE_FLOAT
9107 && (mode
== DFmode
|| mode
== DDmode
)))
9110 return CONSTANT_P (x
) || large_toc_ok
;
9117 /* Try machine-dependent ways of modifying an illegitimate address
9118 to be legitimate. If we find one, return the new, valid address.
9119 This is used from only one place: `memory_address' in explow.c.
9121 OLDX is the address as it was before break_out_memory_refs was
9122 called. In some cases it is useful to look at this to decide what
9125 It is always safe for this function to do nothing. It exists to
9126 recognize opportunities to optimize the output.
9128 On RS/6000, first check for the sum of a register with a constant
9129 integer that is out of range. If so, generate code to add the
9130 constant with the low-order 16 bits masked to the register and force
9131 this result into another register (this can be done with `cau').
9132 Then generate an address of REG+(CONST&0xffff), allowing for the
9133 possibility of bit 16 being a one.
9135 Then check for the sum of a register and something not constant, try to
9136 load the other things into a register and return the sum. */
9139 rs6000_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
9144 if (!reg_offset_addressing_ok_p (mode
)
9145 || mode_supports_vsx_dform_quad (mode
))
9147 if (virtual_stack_registers_memory_p (x
))
9150 /* In theory we should not be seeing addresses of the form reg+0,
9151 but just in case it is generated, optimize it away. */
9152 if (GET_CODE (x
) == PLUS
&& XEXP (x
, 1) == const0_rtx
)
9153 return force_reg (Pmode
, XEXP (x
, 0));
9155 /* For TImode with load/store quad, restrict addresses to just a single
9156 pointer, so it works with both GPRs and VSX registers. */
9157 /* Make sure both operands are registers. */
9158 else if (GET_CODE (x
) == PLUS
9159 && (mode
!= TImode
|| !TARGET_VSX_TIMODE
))
9160 return gen_rtx_PLUS (Pmode
,
9161 force_reg (Pmode
, XEXP (x
, 0)),
9162 force_reg (Pmode
, XEXP (x
, 1)));
9164 return force_reg (Pmode
, x
);
9166 if (GET_CODE (x
) == SYMBOL_REF
)
9168 enum tls_model model
= SYMBOL_REF_TLS_MODEL (x
);
9170 return rs6000_legitimize_tls_address (x
, model
);
9182 /* As in legitimate_offset_address_p we do not assume
9183 worst-case. The mode here is just a hint as to the registers
9184 used. A TImode is usually in gprs, but may actually be in
9185 fprs. Leave worst-case scenario for reload to handle via
9186 insn constraints. PTImode is only GPRs. */
9193 if (GET_CODE (x
) == PLUS
9194 && GET_CODE (XEXP (x
, 0)) == REG
9195 && GET_CODE (XEXP (x
, 1)) == CONST_INT
9196 && ((unsigned HOST_WIDE_INT
) (INTVAL (XEXP (x
, 1)) + 0x8000)
9198 && !(SPE_VECTOR_MODE (mode
)
9199 || (TARGET_E500_DOUBLE
&& GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)))
9201 HOST_WIDE_INT high_int
, low_int
;
9203 low_int
= ((INTVAL (XEXP (x
, 1)) & 0xffff) ^ 0x8000) - 0x8000;
9204 if (low_int
>= 0x8000 - extra
)
9206 high_int
= INTVAL (XEXP (x
, 1)) - low_int
;
9207 sum
= force_operand (gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
9208 GEN_INT (high_int
)), 0);
9209 return plus_constant (Pmode
, sum
, low_int
);
9211 else if (GET_CODE (x
) == PLUS
9212 && GET_CODE (XEXP (x
, 0)) == REG
9213 && GET_CODE (XEXP (x
, 1)) != CONST_INT
9214 && GET_MODE_NUNITS (mode
) == 1
9215 && (GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
9216 || (/* ??? Assume floating point reg based on mode? */
9217 (TARGET_HARD_FLOAT
&& TARGET_FPRS
&& TARGET_DOUBLE_FLOAT
)
9218 && (mode
== DFmode
|| mode
== DDmode
)))
9219 && !avoiding_indexed_address_p (mode
))
9221 return gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
9222 force_reg (Pmode
, force_operand (XEXP (x
, 1), 0)));
9224 else if (SPE_VECTOR_MODE (mode
)
9225 || (TARGET_E500_DOUBLE
&& GET_MODE_SIZE (mode
) > UNITS_PER_WORD
))
9229 /* We accept [reg + reg] and [reg + OFFSET]. */
9231 if (GET_CODE (x
) == PLUS
)
9233 rtx op1
= XEXP (x
, 0);
9234 rtx op2
= XEXP (x
, 1);
9237 op1
= force_reg (Pmode
, op1
);
9239 if (GET_CODE (op2
) != REG
9240 && (GET_CODE (op2
) != CONST_INT
9241 || !SPE_CONST_OFFSET_OK (INTVAL (op2
))
9242 || (GET_MODE_SIZE (mode
) > 8
9243 && !SPE_CONST_OFFSET_OK (INTVAL (op2
) + 8))))
9244 op2
= force_reg (Pmode
, op2
);
9246 /* We can't always do [reg + reg] for these, because [reg +
9247 reg + offset] is not a legitimate addressing mode. */
9248 y
= gen_rtx_PLUS (Pmode
, op1
, op2
);
9250 if ((GET_MODE_SIZE (mode
) > 8 || mode
== DDmode
) && REG_P (op2
))
9251 return force_reg (Pmode
, y
);
9256 return force_reg (Pmode
, x
);
9258 else if ((TARGET_ELF
9260 || !MACHO_DYNAMIC_NO_PIC_P
9266 && GET_CODE (x
) != CONST_INT
9267 && GET_CODE (x
) != CONST_WIDE_INT
9268 && GET_CODE (x
) != CONST_DOUBLE
9270 && GET_MODE_NUNITS (mode
) == 1
9271 && (GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
9272 || (/* ??? Assume floating point reg based on mode? */
9273 (TARGET_HARD_FLOAT
&& TARGET_FPRS
&& TARGET_DOUBLE_FLOAT
)
9274 && (mode
== DFmode
|| mode
== DDmode
))))
9276 rtx reg
= gen_reg_rtx (Pmode
);
9278 emit_insn (gen_elf_high (reg
, x
));
9280 emit_insn (gen_macho_high (reg
, x
));
9281 return gen_rtx_LO_SUM (Pmode
, reg
, x
);
9284 && GET_CODE (x
) == SYMBOL_REF
9285 && constant_pool_expr_p (x
)
9286 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x
), Pmode
))
9287 return create_TOC_reference (x
, NULL_RTX
);
9292 /* Debug version of rs6000_legitimize_address. */
9294 rs6000_debug_legitimize_address (rtx x
, rtx oldx
, machine_mode mode
)
9300 ret
= rs6000_legitimize_address (x
, oldx
, mode
);
9301 insns
= get_insns ();
9307 "\nrs6000_legitimize_address: mode %s, old code %s, "
9308 "new code %s, modified\n",
9309 GET_MODE_NAME (mode
), GET_RTX_NAME (GET_CODE (x
)),
9310 GET_RTX_NAME (GET_CODE (ret
)));
9312 fprintf (stderr
, "Original address:\n");
9315 fprintf (stderr
, "oldx:\n");
9318 fprintf (stderr
, "New address:\n");
9323 fprintf (stderr
, "Insns added:\n");
9324 debug_rtx_list (insns
, 20);
9330 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
9331 GET_MODE_NAME (mode
), GET_RTX_NAME (GET_CODE (x
)));
9342 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9343 We need to emit DTP-relative relocations. */
9345 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
9347 rs6000_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
9352 fputs ("\t.long\t", file
);
9355 fputs (DOUBLE_INT_ASM_OP
, file
);
9360 output_addr_const (file
, x
);
9362 fputs ("@dtprel+0x8000", file
);
9363 else if (TARGET_XCOFF
&& GET_CODE (x
) == SYMBOL_REF
)
9365 switch (SYMBOL_REF_TLS_MODEL (x
))
9369 case TLS_MODEL_LOCAL_EXEC
:
9370 fputs ("@le", file
);
9372 case TLS_MODEL_INITIAL_EXEC
:
9373 fputs ("@ie", file
);
9375 case TLS_MODEL_GLOBAL_DYNAMIC
:
9376 case TLS_MODEL_LOCAL_DYNAMIC
:
9385 /* Return true if X is a symbol that refers to real (rather than emulated)
9389 rs6000_real_tls_symbol_ref_p (rtx x
)
9391 return (GET_CODE (x
) == SYMBOL_REF
9392 && SYMBOL_REF_TLS_MODEL (x
) >= TLS_MODEL_REAL
);
9395 /* In the name of slightly smaller debug output, and to cater to
9396 general assembler lossage, recognize various UNSPEC sequences
9397 and turn them back into a direct symbol reference. */
9400 rs6000_delegitimize_address (rtx orig_x
)
9404 orig_x
= delegitimize_mem_from_attrs (orig_x
);
9410 if (TARGET_CMODEL
!= CMODEL_SMALL
9411 && GET_CODE (y
) == LO_SUM
)
9415 if (GET_CODE (y
) == PLUS
9416 && GET_MODE (y
) == Pmode
9417 && CONST_INT_P (XEXP (y
, 1)))
9419 offset
= XEXP (y
, 1);
9423 if (GET_CODE (y
) == UNSPEC
9424 && XINT (y
, 1) == UNSPEC_TOCREL
)
9426 y
= XVECEXP (y
, 0, 0);
9429 /* Do not associate thread-local symbols with the original
9430 constant pool symbol. */
9432 && GET_CODE (y
) == SYMBOL_REF
9433 && CONSTANT_POOL_ADDRESS_P (y
)
9434 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y
)))
9438 if (offset
!= NULL_RTX
)
9439 y
= gen_rtx_PLUS (Pmode
, y
, offset
);
9440 if (!MEM_P (orig_x
))
9443 return replace_equiv_address_nv (orig_x
, y
);
9447 && GET_CODE (orig_x
) == LO_SUM
9448 && GET_CODE (XEXP (orig_x
, 1)) == CONST
)
9450 y
= XEXP (XEXP (orig_x
, 1), 0);
9451 if (GET_CODE (y
) == UNSPEC
9452 && XINT (y
, 1) == UNSPEC_MACHOPIC_OFFSET
)
9453 return XVECEXP (y
, 0, 0);
9459 /* Return true if X shouldn't be emitted into the debug info.
9460 The linker doesn't like .toc section references from
9461 .debug_* sections, so reject .toc section symbols. */
9464 rs6000_const_not_ok_for_debug_p (rtx x
)
9466 if (GET_CODE (x
) == SYMBOL_REF
9467 && CONSTANT_POOL_ADDRESS_P (x
))
9469 rtx c
= get_pool_constant (x
);
9470 machine_mode cmode
= get_pool_mode (x
);
9471 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c
, cmode
))
9479 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
9482 rs6000_legitimate_combined_insn (rtx_insn
*insn
)
9484 int icode
= INSN_CODE (insn
);
9486 /* Reject creating doloop insns. Combine should not be allowed
9487 to create these for a number of reasons:
9488 1) In a nested loop, if combine creates one of these in an
9489 outer loop and the register allocator happens to allocate ctr
9490 to the outer loop insn, then the inner loop can't use ctr.
9491 Inner loops ought to be more highly optimized.
9492 2) Combine often wants to create one of these from what was
9493 originally a three insn sequence, first combining the three
9494 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
9495 allocated ctr, the splitter takes use back to the three insn
9496 sequence. It's better to stop combine at the two insn
9498 3) Faced with not being able to allocate ctr for ctrsi/crtdi
9499 insns, the register allocator sometimes uses floating point
9500 or vector registers for the pseudo. Since ctrsi/ctrdi is a
9501 jump insn and output reloads are not implemented for jumps,
9502 the ctrsi/ctrdi splitters need to handle all possible cases.
9503 That's a pain, and it gets to be seriously difficult when a
9504 splitter that runs after reload needs memory to transfer from
9505 a gpr to fpr. See PR70098 and PR71763 which are not fixed
9506 for the difficult case. It's better to not create problems
9507 in the first place. */
9508 if (icode
!= CODE_FOR_nothing
9509 && (icode
== CODE_FOR_ctrsi_internal1
9510 || icode
== CODE_FOR_ctrdi_internal1
9511 || icode
== CODE_FOR_ctrsi_internal2
9512 || icode
== CODE_FOR_ctrdi_internal2
9513 || icode
== CODE_FOR_ctrsi_internal3
9514 || icode
== CODE_FOR_ctrdi_internal3
9515 || icode
== CODE_FOR_ctrsi_internal4
9516 || icode
== CODE_FOR_ctrdi_internal4
))
9522 /* Construct the SYMBOL_REF for the tls_get_addr function. */
9524 static GTY(()) rtx rs6000_tls_symbol
;
9526 rs6000_tls_get_addr (void)
9528 if (!rs6000_tls_symbol
)
9529 rs6000_tls_symbol
= init_one_libfunc ("__tls_get_addr");
9531 return rs6000_tls_symbol
;
9534 /* Construct the SYMBOL_REF for TLS GOT references. */
9536 static GTY(()) rtx rs6000_got_symbol
;
9538 rs6000_got_sym (void)
9540 if (!rs6000_got_symbol
)
9542 rs6000_got_symbol
= gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
9543 SYMBOL_REF_FLAGS (rs6000_got_symbol
) |= SYMBOL_FLAG_LOCAL
;
9544 SYMBOL_REF_FLAGS (rs6000_got_symbol
) |= SYMBOL_FLAG_EXTERNAL
;
9547 return rs6000_got_symbol
;
9550 /* AIX Thread-Local Address support. */
9553 rs6000_legitimize_tls_address_aix (rtx addr
, enum tls_model model
)
9555 rtx sym
, mem
, tocref
, tlsreg
, tmpreg
, dest
, tlsaddr
;
9559 name
= XSTR (addr
, 0);
9560 /* Append TLS CSECT qualifier, unless the symbol already is qualified
9561 or the symbol will be in TLS private data section. */
9562 if (name
[strlen (name
) - 1] != ']'
9563 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr
))
9564 || bss_initializer_p (SYMBOL_REF_DECL (addr
))))
9566 tlsname
= XALLOCAVEC (char, strlen (name
) + 4);
9567 strcpy (tlsname
, name
);
9569 bss_initializer_p (SYMBOL_REF_DECL (addr
)) ? "[UL]" : "[TL]");
9570 tlsaddr
= copy_rtx (addr
);
9571 XSTR (tlsaddr
, 0) = ggc_strdup (tlsname
);
9576 /* Place addr into TOC constant pool. */
9577 sym
= force_const_mem (GET_MODE (tlsaddr
), tlsaddr
);
9579 /* Output the TOC entry and create the MEM referencing the value. */
9580 if (constant_pool_expr_p (XEXP (sym
, 0))
9581 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym
, 0)), Pmode
))
9583 tocref
= create_TOC_reference (XEXP (sym
, 0), NULL_RTX
);
9584 mem
= gen_const_mem (Pmode
, tocref
);
9585 set_mem_alias_set (mem
, get_TOC_alias_set ());
9590 /* Use global-dynamic for local-dynamic. */
9591 if (model
== TLS_MODEL_GLOBAL_DYNAMIC
9592 || model
== TLS_MODEL_LOCAL_DYNAMIC
)
9594 /* Create new TOC reference for @m symbol. */
9595 name
= XSTR (XVECEXP (XEXP (mem
, 0), 0, 0), 0);
9596 tlsname
= XALLOCAVEC (char, strlen (name
) + 1);
9597 strcpy (tlsname
, "*LCM");
9598 strcat (tlsname
, name
+ 3);
9599 rtx modaddr
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (tlsname
));
9600 SYMBOL_REF_FLAGS (modaddr
) |= SYMBOL_FLAG_LOCAL
;
9601 tocref
= create_TOC_reference (modaddr
, NULL_RTX
);
9602 rtx modmem
= gen_const_mem (Pmode
, tocref
);
9603 set_mem_alias_set (modmem
, get_TOC_alias_set ());
9605 rtx modreg
= gen_reg_rtx (Pmode
);
9606 emit_insn (gen_rtx_SET (modreg
, modmem
));
9608 tmpreg
= gen_reg_rtx (Pmode
);
9609 emit_insn (gen_rtx_SET (tmpreg
, mem
));
9611 dest
= gen_reg_rtx (Pmode
);
9613 emit_insn (gen_tls_get_addrsi (dest
, modreg
, tmpreg
));
9615 emit_insn (gen_tls_get_addrdi (dest
, modreg
, tmpreg
));
9618 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9619 else if (TARGET_32BIT
)
9621 tlsreg
= gen_reg_rtx (SImode
);
9622 emit_insn (gen_tls_get_tpointer (tlsreg
));
9625 tlsreg
= gen_rtx_REG (DImode
, 13);
9627 /* Load the TOC value into temporary register. */
9628 tmpreg
= gen_reg_rtx (Pmode
);
9629 emit_insn (gen_rtx_SET (tmpreg
, mem
));
9630 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
9631 gen_rtx_MINUS (Pmode
, addr
, tlsreg
));
9633 /* Add TOC symbol value to TLS pointer. */
9634 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tmpreg
, tlsreg
));
9639 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9640 this (thread-local) address. */
9643 rs6000_legitimize_tls_address (rtx addr
, enum tls_model model
)
9648 return rs6000_legitimize_tls_address_aix (addr
, model
);
9650 dest
= gen_reg_rtx (Pmode
);
9651 if (model
== TLS_MODEL_LOCAL_EXEC
&& rs6000_tls_size
== 16)
9657 tlsreg
= gen_rtx_REG (Pmode
, 13);
9658 insn
= gen_tls_tprel_64 (dest
, tlsreg
, addr
);
9662 tlsreg
= gen_rtx_REG (Pmode
, 2);
9663 insn
= gen_tls_tprel_32 (dest
, tlsreg
, addr
);
9667 else if (model
== TLS_MODEL_LOCAL_EXEC
&& rs6000_tls_size
== 32)
9671 tmp
= gen_reg_rtx (Pmode
);
9674 tlsreg
= gen_rtx_REG (Pmode
, 13);
9675 insn
= gen_tls_tprel_ha_64 (tmp
, tlsreg
, addr
);
9679 tlsreg
= gen_rtx_REG (Pmode
, 2);
9680 insn
= gen_tls_tprel_ha_32 (tmp
, tlsreg
, addr
);
9684 insn
= gen_tls_tprel_lo_64 (dest
, tmp
, addr
);
9686 insn
= gen_tls_tprel_lo_32 (dest
, tmp
, addr
);
9691 rtx r3
, got
, tga
, tmp1
, tmp2
, call_insn
;
9693 /* We currently use relocations like @got@tlsgd for tls, which
9694 means the linker will handle allocation of tls entries, placing
9695 them in the .got section. So use a pointer to the .got section,
9696 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9697 or to secondary GOT sections used by 32-bit -fPIC. */
9699 got
= gen_rtx_REG (Pmode
, 2);
9703 got
= gen_rtx_REG (Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
9706 rtx gsym
= rs6000_got_sym ();
9707 got
= gen_reg_rtx (Pmode
);
9709 rs6000_emit_move (got
, gsym
, Pmode
);
9714 tmp1
= gen_reg_rtx (Pmode
);
9715 tmp2
= gen_reg_rtx (Pmode
);
9716 mem
= gen_const_mem (Pmode
, tmp1
);
9717 lab
= gen_label_rtx ();
9718 emit_insn (gen_load_toc_v4_PIC_1b (gsym
, lab
));
9719 emit_move_insn (tmp1
, gen_rtx_REG (Pmode
, LR_REGNO
));
9720 if (TARGET_LINK_STACK
)
9721 emit_insn (gen_addsi3 (tmp1
, tmp1
, GEN_INT (4)));
9722 emit_move_insn (tmp2
, mem
);
9723 rtx_insn
*last
= emit_insn (gen_addsi3 (got
, tmp1
, tmp2
));
9724 set_unique_reg_note (last
, REG_EQUAL
, gsym
);
9729 if (model
== TLS_MODEL_GLOBAL_DYNAMIC
)
9731 tga
= rs6000_tls_get_addr ();
9732 emit_library_call_value (tga
, dest
, LCT_CONST
, Pmode
,
9735 r3
= gen_rtx_REG (Pmode
, 3);
9736 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
9739 insn
= gen_tls_gd_aix64 (r3
, got
, addr
, tga
, const0_rtx
);
9741 insn
= gen_tls_gd_aix32 (r3
, got
, addr
, tga
, const0_rtx
);
9743 else if (DEFAULT_ABI
== ABI_V4
)
9744 insn
= gen_tls_gd_sysvsi (r3
, got
, addr
, tga
, const0_rtx
);
9747 call_insn
= last_call_insn ();
9748 PATTERN (call_insn
) = insn
;
9749 if (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
&& flag_pic
)
9750 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
),
9751 pic_offset_table_rtx
);
9753 else if (model
== TLS_MODEL_LOCAL_DYNAMIC
)
9755 tga
= rs6000_tls_get_addr ();
9756 tmp1
= gen_reg_rtx (Pmode
);
9757 emit_library_call_value (tga
, tmp1
, LCT_CONST
, Pmode
,
9760 r3
= gen_rtx_REG (Pmode
, 3);
9761 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
9764 insn
= gen_tls_ld_aix64 (r3
, got
, tga
, const0_rtx
);
9766 insn
= gen_tls_ld_aix32 (r3
, got
, tga
, const0_rtx
);
9768 else if (DEFAULT_ABI
== ABI_V4
)
9769 insn
= gen_tls_ld_sysvsi (r3
, got
, tga
, const0_rtx
);
9772 call_insn
= last_call_insn ();
9773 PATTERN (call_insn
) = insn
;
9774 if (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
&& flag_pic
)
9775 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
),
9776 pic_offset_table_rtx
);
9778 if (rs6000_tls_size
== 16)
9781 insn
= gen_tls_dtprel_64 (dest
, tmp1
, addr
);
9783 insn
= gen_tls_dtprel_32 (dest
, tmp1
, addr
);
9785 else if (rs6000_tls_size
== 32)
9787 tmp2
= gen_reg_rtx (Pmode
);
9789 insn
= gen_tls_dtprel_ha_64 (tmp2
, tmp1
, addr
);
9791 insn
= gen_tls_dtprel_ha_32 (tmp2
, tmp1
, addr
);
9794 insn
= gen_tls_dtprel_lo_64 (dest
, tmp2
, addr
);
9796 insn
= gen_tls_dtprel_lo_32 (dest
, tmp2
, addr
);
9800 tmp2
= gen_reg_rtx (Pmode
);
9802 insn
= gen_tls_got_dtprel_64 (tmp2
, got
, addr
);
9804 insn
= gen_tls_got_dtprel_32 (tmp2
, got
, addr
);
9806 insn
= gen_rtx_SET (dest
, gen_rtx_PLUS (Pmode
, tmp2
, tmp1
));
9812 /* IE, or 64-bit offset LE. */
9813 tmp2
= gen_reg_rtx (Pmode
);
9815 insn
= gen_tls_got_tprel_64 (tmp2
, got
, addr
);
9817 insn
= gen_tls_got_tprel_32 (tmp2
, got
, addr
);
9820 insn
= gen_tls_tls_64 (dest
, tmp2
, addr
);
9822 insn
= gen_tls_tls_32 (dest
, tmp2
, addr
);
9830 /* Only create the global variable for the stack protect guard if we are using
9831 the global flavor of that guard. */
9833 rs6000_init_stack_protect_guard (void)
9835 if (rs6000_stack_protector_guard
== SSP_GLOBAL
)
9836 return default_stack_protect_guard ();
9841 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9844 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
9846 if (GET_CODE (x
) == HIGH
9847 && GET_CODE (XEXP (x
, 0)) == UNSPEC
)
9850 /* A TLS symbol in the TOC cannot contain a sum. */
9851 if (GET_CODE (x
) == CONST
9852 && GET_CODE (XEXP (x
, 0)) == PLUS
9853 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
9854 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0)) != 0)
9857 /* Do not place an ELF TLS symbol in the constant pool. */
9858 return TARGET_ELF
&& tls_referenced_p (x
);
9861 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9862 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9863 can be addressed relative to the toc pointer. */
9866 use_toc_relative_ref (rtx sym
, machine_mode mode
)
9868 return ((constant_pool_expr_p (sym
)
9869 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym
),
9870 get_pool_mode (sym
)))
9871 || (TARGET_CMODEL
== CMODEL_MEDIUM
9872 && SYMBOL_REF_LOCAL_P (sym
)
9873 && GET_MODE_SIZE (mode
) <= POWERPC64_TOC_POINTER_ALIGNMENT
));
9876 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
9877 replace the input X, or the original X if no replacement is called for.
9878 The output parameter *WIN is 1 if the calling macro should goto WIN,
9881 For RS/6000, we wish to handle large displacements off a base
9882 register by splitting the addend across an addiu/addis and the mem insn.
9883 This cuts number of extra insns needed from 3 to 1.
9885 On Darwin, we use this to generate code for floating point constants.
9886 A movsf_low is generated so we wind up with 2 instructions rather than 3.
9887 The Darwin code is inside #if TARGET_MACHO because only then are the
9888 machopic_* functions defined. */
9890 rs6000_legitimize_reload_address (rtx x
, machine_mode mode
,
9891 int opnum
, int type
,
9892 int ind_levels ATTRIBUTE_UNUSED
, int *win
)
9894 bool reg_offset_p
= reg_offset_addressing_ok_p (mode
);
9895 bool quad_offset_p
= mode_supports_vsx_dform_quad (mode
);
9897 /* Nasty hack for vsx_splat_v2df/v2di load from mem, which takes a
9898 DFmode/DImode MEM. Ditto for ISA 3.0 vsx_splat_v4sf/v4si. */
9901 && ((mode
== DFmode
&& recog_data
.operand_mode
[0] == V2DFmode
)
9902 || (mode
== DImode
&& recog_data
.operand_mode
[0] == V2DImode
)
9903 || (mode
== SFmode
&& recog_data
.operand_mode
[0] == V4SFmode
9904 && TARGET_P9_VECTOR
)
9905 || (mode
== SImode
&& recog_data
.operand_mode
[0] == V4SImode
9906 && TARGET_P9_VECTOR
)))
9907 reg_offset_p
= false;
9909 /* We must recognize output that we have already generated ourselves. */
9910 if (GET_CODE (x
) == PLUS
9911 && GET_CODE (XEXP (x
, 0)) == PLUS
9912 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
9913 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
9914 && GET_CODE (XEXP (x
, 1)) == CONST_INT
)
9916 if (TARGET_DEBUG_ADDR
)
9918 fprintf (stderr
, "\nlegitimize_reload_address push_reload #1:\n");
9921 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
9922 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
9923 opnum
, (enum reload_type
) type
);
9928 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
9929 if (GET_CODE (x
) == LO_SUM
9930 && GET_CODE (XEXP (x
, 0)) == HIGH
)
9932 if (TARGET_DEBUG_ADDR
)
9934 fprintf (stderr
, "\nlegitimize_reload_address push_reload #2:\n");
9937 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
9938 BASE_REG_CLASS
, Pmode
, VOIDmode
, 0, 0,
9939 opnum
, (enum reload_type
) type
);
9945 if (DEFAULT_ABI
== ABI_DARWIN
&& flag_pic
9946 && GET_CODE (x
) == LO_SUM
9947 && GET_CODE (XEXP (x
, 0)) == PLUS
9948 && XEXP (XEXP (x
, 0), 0) == pic_offset_table_rtx
9949 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == HIGH
9950 && XEXP (XEXP (XEXP (x
, 0), 1), 0) == XEXP (x
, 1)
9951 && machopic_operand_p (XEXP (x
, 1)))
9953 /* Result of previous invocation of this function on Darwin
9954 floating point constant. */
9955 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
9956 BASE_REG_CLASS
, Pmode
, VOIDmode
, 0, 0,
9957 opnum
, (enum reload_type
) type
);
9963 if (TARGET_CMODEL
!= CMODEL_SMALL
9966 && small_toc_ref (x
, VOIDmode
))
9968 rtx hi
= gen_rtx_HIGH (Pmode
, copy_rtx (x
));
9969 x
= gen_rtx_LO_SUM (Pmode
, hi
, x
);
9970 if (TARGET_DEBUG_ADDR
)
9972 fprintf (stderr
, "\nlegitimize_reload_address push_reload #3:\n");
9975 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
9976 BASE_REG_CLASS
, Pmode
, VOIDmode
, 0, 0,
9977 opnum
, (enum reload_type
) type
);
9982 if (GET_CODE (x
) == PLUS
9983 && REG_P (XEXP (x
, 0))
9984 && REGNO (XEXP (x
, 0)) < FIRST_PSEUDO_REGISTER
9985 && INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), 1)
9986 && CONST_INT_P (XEXP (x
, 1))
9988 && !SPE_VECTOR_MODE (mode
)
9989 && !(TARGET_E500_DOUBLE
&& GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
9990 && (quad_offset_p
|| !VECTOR_MODE_P (mode
) || VECTOR_MEM_NONE_P (mode
)))
9992 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
9993 HOST_WIDE_INT low
= ((val
& 0xffff) ^ 0x8000) - 0x8000;
9995 = (((val
- low
) & 0xffffffff) ^ 0x80000000) - 0x80000000;
9997 /* Check for 32-bit overflow or quad addresses with one of the
9998 four least significant bits set. */
9999 if (high
+ low
!= val
10000 || (quad_offset_p
&& (low
& 0xf)))
10006 /* Reload the high part into a base reg; leave the low part
10007 in the mem directly. */
10009 x
= gen_rtx_PLUS (GET_MODE (x
),
10010 gen_rtx_PLUS (GET_MODE (x
), XEXP (x
, 0),
10014 if (TARGET_DEBUG_ADDR
)
10016 fprintf (stderr
, "\nlegitimize_reload_address push_reload #4:\n");
10019 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
10020 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
10021 opnum
, (enum reload_type
) type
);
10026 if (GET_CODE (x
) == SYMBOL_REF
10029 && (!VECTOR_MODE_P (mode
) || VECTOR_MEM_NONE_P (mode
))
10030 && !SPE_VECTOR_MODE (mode
)
10032 && DEFAULT_ABI
== ABI_DARWIN
10033 && (flag_pic
|| MACHO_DYNAMIC_NO_PIC_P
)
10034 && machopic_symbol_defined_p (x
)
10036 && DEFAULT_ABI
== ABI_V4
10039 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
10040 The same goes for DImode without 64-bit gprs and DFmode and DDmode
10042 ??? Assume floating point reg based on mode? This assumption is
10043 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
10044 where reload ends up doing a DFmode load of a constant from
10045 mem using two gprs. Unfortunately, at this point reload
10046 hasn't yet selected regs so poking around in reload data
10047 won't help and even if we could figure out the regs reliably,
10048 we'd still want to allow this transformation when the mem is
10049 naturally aligned. Since we say the address is good here, we
10050 can't disable offsets from LO_SUMs in mem_operand_gpr.
10051 FIXME: Allow offset from lo_sum for other modes too, when
10052 mem is sufficiently aligned.
10054 Also disallow this if the type can go in VMX/Altivec registers, since
10055 those registers do not have d-form (reg+offset) address modes. */
10056 && !reg_addr
[mode
].scalar_in_vmx_p
10061 && (mode
!= TImode
|| !TARGET_VSX_TIMODE
)
10063 && (mode
!= DImode
|| TARGET_POWERPC64
)
10064 && ((mode
!= DFmode
&& mode
!= DDmode
) || TARGET_POWERPC64
10065 || (TARGET_HARD_FLOAT
&& TARGET_FPRS
&& TARGET_DOUBLE_FLOAT
)))
10070 rtx offset
= machopic_gen_offset (x
);
10071 x
= gen_rtx_LO_SUM (GET_MODE (x
),
10072 gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
,
10073 gen_rtx_HIGH (Pmode
, offset
)), offset
);
10077 x
= gen_rtx_LO_SUM (GET_MODE (x
),
10078 gen_rtx_HIGH (Pmode
, x
), x
);
10080 if (TARGET_DEBUG_ADDR
)
10082 fprintf (stderr
, "\nlegitimize_reload_address push_reload #5:\n");
10085 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
10086 BASE_REG_CLASS
, Pmode
, VOIDmode
, 0, 0,
10087 opnum
, (enum reload_type
) type
);
10092 /* Reload an offset address wrapped by an AND that represents the
10093 masking of the lower bits. Strip the outer AND and let reload
10094 convert the offset address into an indirect address. For VSX,
10095 force reload to create the address with an AND in a separate
10096 register, because we can't guarantee an altivec register will
10098 if (VECTOR_MEM_ALTIVEC_P (mode
)
10099 && GET_CODE (x
) == AND
10100 && GET_CODE (XEXP (x
, 0)) == PLUS
10101 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
10102 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
10103 && GET_CODE (XEXP (x
, 1)) == CONST_INT
10104 && INTVAL (XEXP (x
, 1)) == -16)
10114 && GET_CODE (x
) == SYMBOL_REF
10115 && use_toc_relative_ref (x
, mode
))
10117 x
= create_TOC_reference (x
, NULL_RTX
);
10118 if (TARGET_CMODEL
!= CMODEL_SMALL
)
10120 if (TARGET_DEBUG_ADDR
)
10122 fprintf (stderr
, "\nlegitimize_reload_address push_reload #6:\n");
10125 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
10126 BASE_REG_CLASS
, Pmode
, VOIDmode
, 0, 0,
10127 opnum
, (enum reload_type
) type
);
10136 /* Debug version of rs6000_legitimize_reload_address. */
10138 rs6000_debug_legitimize_reload_address (rtx x
, machine_mode mode
,
10139 int opnum
, int type
,
10140 int ind_levels
, int *win
)
10142 rtx ret
= rs6000_legitimize_reload_address (x
, mode
, opnum
, type
,
10145 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
10146 "type = %d, ind_levels = %d, win = %d, original addr:\n",
10147 GET_MODE_NAME (mode
), opnum
, type
, ind_levels
, *win
);
10151 fprintf (stderr
, "Same address returned\n");
10153 fprintf (stderr
, "NULL returned\n");
10156 fprintf (stderr
, "New address:\n");
10163 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
10164 that is a valid memory address for an instruction.
10165 The MODE argument is the machine mode for the MEM expression
10166 that wants to use this address.
10168 On the RS/6000, there are four valid address: a SYMBOL_REF that
10169 refers to a constant pool entry of an address (or the sum of it
10170 plus a constant), a short (16-bit signed) constant plus a register,
10171 the sum of two registers, or a register indirect, possibly with an
10172 auto-increment. For DFmode, DDmode and DImode with a constant plus
10173 register, we must ensure that both words are addressable or PowerPC64
10174 with offset word aligned.
10176 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
10177 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
10178 because adjacent memory cells are accessed by adding word-sized offsets
10179 during assembly output. */
10181 rs6000_legitimate_address_p (machine_mode mode
, rtx x
, bool reg_ok_strict
)
10183 bool reg_offset_p
= reg_offset_addressing_ok_p (mode
);
10184 bool quad_offset_p
= mode_supports_vsx_dform_quad (mode
);
10186 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
10187 if (VECTOR_MEM_ALTIVEC_P (mode
)
10188 && GET_CODE (x
) == AND
10189 && GET_CODE (XEXP (x
, 1)) == CONST_INT
10190 && INTVAL (XEXP (x
, 1)) == -16)
10193 if (TARGET_ELF
&& RS6000_SYMBOL_REF_TLS_P (x
))
10195 if (legitimate_indirect_address_p (x
, reg_ok_strict
))
10198 && (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == PRE_DEC
)
10199 && mode_supports_pre_incdec_p (mode
)
10200 && legitimate_indirect_address_p (XEXP (x
, 0), reg_ok_strict
))
10202 /* Handle restricted vector d-form offsets in ISA 3.0. */
10205 if (quad_address_p (x
, mode
, reg_ok_strict
))
10208 else if (virtual_stack_registers_memory_p (x
))
10211 else if (reg_offset_p
)
10213 if (legitimate_small_data_p (mode
, x
))
10215 if (legitimate_constant_pool_address_p (x
, mode
,
10216 reg_ok_strict
|| lra_in_progress
))
10218 if (reg_addr
[mode
].fused_toc
&& GET_CODE (x
) == UNSPEC
10219 && XINT (x
, 1) == UNSPEC_FUSION_ADDIS
)
10223 /* For TImode, if we have TImode in VSX registers, only allow register
10224 indirect addresses. This will allow the values to go in either GPRs
10225 or VSX registers without reloading. The vector types would tend to
10226 go into VSX registers, so we allow REG+REG, while TImode seems
10227 somewhat split, in that some uses are GPR based, and some VSX based. */
10228 /* FIXME: We could loosen this by changing the following to
10229 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
10230 but currently we cannot allow REG+REG addressing for TImode. See
10231 PR72827 for complete details on how this ends up hoodwinking DSE. */
10232 if (mode
== TImode
&& TARGET_VSX_TIMODE
)
10234 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
10235 if (! reg_ok_strict
10237 && GET_CODE (x
) == PLUS
10238 && GET_CODE (XEXP (x
, 0)) == REG
10239 && (XEXP (x
, 0) == virtual_stack_vars_rtx
10240 || XEXP (x
, 0) == arg_pointer_rtx
)
10241 && GET_CODE (XEXP (x
, 1)) == CONST_INT
)
10243 if (rs6000_legitimate_offset_address_p (mode
, x
, reg_ok_strict
, false))
10245 if (!FLOAT128_2REG_P (mode
)
10246 && ((TARGET_HARD_FLOAT
&& TARGET_FPRS
&& TARGET_DOUBLE_FLOAT
)
10247 || TARGET_POWERPC64
10248 || (mode
!= DFmode
&& mode
!= DDmode
)
10249 || (TARGET_E500_DOUBLE
&& mode
!= DDmode
))
10250 && (TARGET_POWERPC64
|| mode
!= DImode
)
10251 && (mode
!= TImode
|| VECTOR_MEM_VSX_P (TImode
))
10253 && !avoiding_indexed_address_p (mode
)
10254 && legitimate_indexed_address_p (x
, reg_ok_strict
))
10256 if (TARGET_UPDATE
&& GET_CODE (x
) == PRE_MODIFY
10257 && mode_supports_pre_modify_p (mode
)
10258 && legitimate_indirect_address_p (XEXP (x
, 0), reg_ok_strict
)
10259 && (rs6000_legitimate_offset_address_p (mode
, XEXP (x
, 1),
10260 reg_ok_strict
, false)
10261 || (!avoiding_indexed_address_p (mode
)
10262 && legitimate_indexed_address_p (XEXP (x
, 1), reg_ok_strict
)))
10263 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
10265 if (reg_offset_p
&& !quad_offset_p
10266 && legitimate_lo_sum_address_p (mode
, x
, reg_ok_strict
))
10271 /* Debug version of rs6000_legitimate_address_p. */
10273 rs6000_debug_legitimate_address_p (machine_mode mode
, rtx x
,
10274 bool reg_ok_strict
)
10276 bool ret
= rs6000_legitimate_address_p (mode
, x
, reg_ok_strict
);
10278 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
10279 "strict = %d, reload = %s, code = %s\n",
10280 ret
? "true" : "false",
10281 GET_MODE_NAME (mode
),
10285 : (reload_in_progress
? "progress" : "before")),
10286 GET_RTX_NAME (GET_CODE (x
)));
10292 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
10295 rs6000_mode_dependent_address_p (const_rtx addr
,
10296 addr_space_t as ATTRIBUTE_UNUSED
)
10298 return rs6000_mode_dependent_address_ptr (addr
);
10301 /* Go to LABEL if ADDR (a legitimate address expression)
10302 has an effect that depends on the machine mode it is used for.
10304 On the RS/6000 this is true of all integral offsets (since AltiVec
10305 and VSX modes don't allow them) or is a pre-increment or decrement.
10307 ??? Except that due to conceptual problems in offsettable_address_p
10308 we can't really report the problems of integral offsets. So leave
10309 this assuming that the adjustable offset must be valid for the
10310 sub-words of a TFmode operand, which is what we had before. */
10313 rs6000_mode_dependent_address (const_rtx addr
)
10315 switch (GET_CODE (addr
))
10318 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
10319 is considered a legitimate address before reload, so there
10320 are no offset restrictions in that case. Note that this
10321 condition is safe in strict mode because any address involving
10322 virtual_stack_vars_rtx or arg_pointer_rtx would already have
10323 been rejected as illegitimate. */
10324 if (XEXP (addr
, 0) != virtual_stack_vars_rtx
10325 && XEXP (addr
, 0) != arg_pointer_rtx
10326 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
)
10328 unsigned HOST_WIDE_INT val
= INTVAL (XEXP (addr
, 1));
10329 return val
+ 0x8000 >= 0x10000 - (TARGET_POWERPC64
? 8 : 12);
10334 /* Anything in the constant pool is sufficiently aligned that
10335 all bytes have the same high part address. */
10336 return !legitimate_constant_pool_address_p (addr
, QImode
, false);
10338 /* Auto-increment cases are now treated generically in recog.c. */
10340 return TARGET_UPDATE
;
10342 /* AND is only allowed in Altivec loads. */
10353 /* Debug version of rs6000_mode_dependent_address. */
10355 rs6000_debug_mode_dependent_address (const_rtx addr
)
10357 bool ret
= rs6000_mode_dependent_address (addr
);
10359 fprintf (stderr
, "\nrs6000_mode_dependent_address: ret = %s\n",
10360 ret
? "true" : "false");
10366 /* Implement FIND_BASE_TERM. */
10369 rs6000_find_base_term (rtx op
)
10374 if (GET_CODE (base
) == CONST
)
10375 base
= XEXP (base
, 0);
10376 if (GET_CODE (base
) == PLUS
)
10377 base
= XEXP (base
, 0);
10378 if (GET_CODE (base
) == UNSPEC
)
10379 switch (XINT (base
, 1))
10381 case UNSPEC_TOCREL
:
10382 case UNSPEC_MACHOPIC_OFFSET
:
10383 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
10384 for aliasing purposes. */
10385 return XVECEXP (base
, 0, 0);
10391 /* More elaborate version of recog's offsettable_memref_p predicate
10392 that works around the ??? note of rs6000_mode_dependent_address.
10393 In particular it accepts
10395 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
10397 in 32-bit mode, that the recog predicate rejects. */
10400 rs6000_offsettable_memref_p (rtx op
, machine_mode reg_mode
)
10407 /* First mimic offsettable_memref_p. */
10408 if (offsettable_address_p (true, GET_MODE (op
), XEXP (op
, 0)))
10411 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
10412 the latter predicate knows nothing about the mode of the memory
10413 reference and, therefore, assumes that it is the largest supported
10414 mode (TFmode). As a consequence, legitimate offsettable memory
10415 references are rejected. rs6000_legitimate_offset_address_p contains
10416 the correct logic for the PLUS case of rs6000_mode_dependent_address,
10417 at least with a little bit of help here given that we know the
10418 actual registers used. */
10419 worst_case
= ((TARGET_POWERPC64
&& GET_MODE_CLASS (reg_mode
) == MODE_INT
)
10420 || GET_MODE_SIZE (reg_mode
) == 4);
10421 return rs6000_legitimate_offset_address_p (GET_MODE (op
), XEXP (op
, 0),
10425 /* Determine the reassociation width to be used in reassociate_bb.
10426 This takes into account how many parallel operations we
10427 can actually do of a given type, and also the latency.
10429 int add/sub 6/cycle
10431 vect add/sub/mul 2/cycle
10432 fp add/sub/mul 2/cycle
10437 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
10440 switch (rs6000_cpu
)
10442 case PROCESSOR_POWER8
:
10443 case PROCESSOR_POWER9
:
10444 if (DECIMAL_FLOAT_MODE_P (mode
))
10446 if (VECTOR_MODE_P (mode
))
10448 if (INTEGRAL_MODE_P (mode
))
10449 return opc
== MULT_EXPR
? 4 : 6;
10450 if (FLOAT_MODE_P (mode
))
10459 /* Change register usage conditional on target flags. */
10461 rs6000_conditional_register_usage (void)
10465 if (TARGET_DEBUG_TARGET
)
10466 fprintf (stderr
, "rs6000_conditional_register_usage called\n");
10468 /* Set MQ register fixed (already call_used) so that it will not be
10470 fixed_regs
[64] = 1;
10472 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
10474 fixed_regs
[13] = call_used_regs
[13]
10475 = call_really_used_regs
[13] = 1;
10477 /* Conditionally disable FPRs. */
10478 if (TARGET_SOFT_FLOAT
|| !TARGET_FPRS
)
10479 for (i
= 32; i
< 64; i
++)
10480 fixed_regs
[i
] = call_used_regs
[i
]
10481 = call_really_used_regs
[i
] = 1;
10483 /* The TOC register is not killed across calls in a way that is
10484 visible to the compiler. */
10485 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
10486 call_really_used_regs
[2] = 0;
10488 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
== 2)
10489 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10491 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
== 1)
10492 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10493 = call_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10494 = call_really_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10496 if (DEFAULT_ABI
== ABI_DARWIN
&& flag_pic
)
10497 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10498 = call_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10499 = call_really_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10501 if (TARGET_TOC
&& TARGET_MINIMAL_TOC
)
10502 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10503 = call_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10507 global_regs
[SPEFSCR_REGNO
] = 1;
10508 /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
10509 registers in prologues and epilogues. We no longer use r14
10510 for FIXED_SCRATCH, but we're keeping r14 out of the allocation
10511 pool for link-compatibility with older versions of GCC. Once
10512 "old" code has died out, we can return r14 to the allocation
10515 = call_used_regs
[14]
10516 = call_really_used_regs
[14] = 1;
10519 if (!TARGET_ALTIVEC
&& !TARGET_VSX
)
10521 for (i
= FIRST_ALTIVEC_REGNO
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
10522 fixed_regs
[i
] = call_used_regs
[i
] = call_really_used_regs
[i
] = 1;
10523 call_really_used_regs
[VRSAVE_REGNO
] = 1;
10526 if (TARGET_ALTIVEC
|| TARGET_VSX
)
10527 global_regs
[VSCR_REGNO
] = 1;
10529 if (TARGET_ALTIVEC_ABI
)
10531 for (i
= FIRST_ALTIVEC_REGNO
; i
< FIRST_ALTIVEC_REGNO
+ 20; ++i
)
10532 call_used_regs
[i
] = call_really_used_regs
[i
] = 1;
10534 /* AIX reserves VR20:31 in non-extended ABI mode. */
10536 for (i
= FIRST_ALTIVEC_REGNO
+ 20; i
< FIRST_ALTIVEC_REGNO
+ 32; ++i
)
10537 fixed_regs
[i
] = call_used_regs
[i
] = call_really_used_regs
[i
] = 1;
10542 /* Output insns to set DEST equal to the constant SOURCE as a series of
10543 lis, ori and shl instructions and return TRUE. */
10546 rs6000_emit_set_const (rtx dest
, rtx source
)
10548 machine_mode mode
= GET_MODE (dest
);
10553 gcc_checking_assert (CONST_INT_P (source
));
10554 c
= INTVAL (source
);
10559 emit_insn (gen_rtx_SET (dest
, source
));
10563 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (SImode
);
10565 emit_insn (gen_rtx_SET (copy_rtx (temp
),
10566 GEN_INT (c
& ~(HOST_WIDE_INT
) 0xffff)));
10567 emit_insn (gen_rtx_SET (dest
,
10568 gen_rtx_IOR (SImode
, copy_rtx (temp
),
10569 GEN_INT (c
& 0xffff))));
10573 if (!TARGET_POWERPC64
)
10577 hi
= operand_subword_force (copy_rtx (dest
), WORDS_BIG_ENDIAN
== 0,
10579 lo
= operand_subword_force (dest
, WORDS_BIG_ENDIAN
!= 0,
10581 emit_move_insn (hi
, GEN_INT (c
>> 32));
10582 c
= ((c
& 0xffffffff) ^ 0x80000000) - 0x80000000;
10583 emit_move_insn (lo
, GEN_INT (c
));
10586 rs6000_emit_set_long_const (dest
, c
);
10590 gcc_unreachable ();
10593 insn
= get_last_insn ();
10594 set
= single_set (insn
);
10595 if (! CONSTANT_P (SET_SRC (set
)))
10596 set_unique_reg_note (insn
, REG_EQUAL
, GEN_INT (c
));
10601 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10602 Output insns to set DEST equal to the constant C as a series of
10603 lis, ori and shl instructions. */
10606 rs6000_emit_set_long_const (rtx dest
, HOST_WIDE_INT c
)
10609 HOST_WIDE_INT ud1
, ud2
, ud3
, ud4
;
10619 if ((ud4
== 0xffff && ud3
== 0xffff && ud2
== 0xffff && (ud1
& 0x8000))
10620 || (ud4
== 0 && ud3
== 0 && ud2
== 0 && ! (ud1
& 0x8000)))
10621 emit_move_insn (dest
, GEN_INT ((ud1
^ 0x8000) - 0x8000));
10623 else if ((ud4
== 0xffff && ud3
== 0xffff && (ud2
& 0x8000))
10624 || (ud4
== 0 && ud3
== 0 && ! (ud2
& 0x8000)))
10626 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10628 emit_move_insn (ud1
!= 0 ? copy_rtx (temp
) : dest
,
10629 GEN_INT (((ud2
<< 16) ^ 0x80000000) - 0x80000000));
10631 emit_move_insn (dest
,
10632 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10635 else if (ud3
== 0 && ud4
== 0)
10637 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10639 gcc_assert (ud2
& 0x8000);
10640 emit_move_insn (copy_rtx (temp
),
10641 GEN_INT (((ud2
<< 16) ^ 0x80000000) - 0x80000000));
10643 emit_move_insn (copy_rtx (temp
),
10644 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10646 emit_move_insn (dest
,
10647 gen_rtx_ZERO_EXTEND (DImode
,
10648 gen_lowpart (SImode
,
10649 copy_rtx (temp
))));
10651 else if ((ud4
== 0xffff && (ud3
& 0x8000))
10652 || (ud4
== 0 && ! (ud3
& 0x8000)))
10654 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10656 emit_move_insn (copy_rtx (temp
),
10657 GEN_INT (((ud3
<< 16) ^ 0x80000000) - 0x80000000));
10659 emit_move_insn (copy_rtx (temp
),
10660 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10662 emit_move_insn (ud1
!= 0 ? copy_rtx (temp
) : dest
,
10663 gen_rtx_ASHIFT (DImode
, copy_rtx (temp
),
10666 emit_move_insn (dest
,
10667 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10672 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10674 emit_move_insn (copy_rtx (temp
),
10675 GEN_INT (((ud4
<< 16) ^ 0x80000000) - 0x80000000));
10677 emit_move_insn (copy_rtx (temp
),
10678 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10681 emit_move_insn (ud2
!= 0 || ud1
!= 0 ? copy_rtx (temp
) : dest
,
10682 gen_rtx_ASHIFT (DImode
, copy_rtx (temp
),
10685 emit_move_insn (ud1
!= 0 ? copy_rtx (temp
) : dest
,
10686 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10687 GEN_INT (ud2
<< 16)));
10689 emit_move_insn (dest
,
10690 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10695 /* Helper for the following. Get rid of [r+r] memory refs
10696 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10699 rs6000_eliminate_indexed_memrefs (rtx operands
[2])
10701 if (reload_in_progress
)
10704 if (GET_CODE (operands
[0]) == MEM
10705 && GET_CODE (XEXP (operands
[0], 0)) != REG
10706 && ! legitimate_constant_pool_address_p (XEXP (operands
[0], 0),
10707 GET_MODE (operands
[0]), false))
10709 = replace_equiv_address (operands
[0],
10710 copy_addr_to_reg (XEXP (operands
[0], 0)));
10712 if (GET_CODE (operands
[1]) == MEM
10713 && GET_CODE (XEXP (operands
[1], 0)) != REG
10714 && ! legitimate_constant_pool_address_p (XEXP (operands
[1], 0),
10715 GET_MODE (operands
[1]), false))
10717 = replace_equiv_address (operands
[1],
10718 copy_addr_to_reg (XEXP (operands
[1], 0)));
10721 /* Generate a vector of constants to permute MODE for a little-endian
10722 storage operation by swapping the two halves of a vector. */
10724 rs6000_const_vec (machine_mode mode
)
10752 v
= rtvec_alloc (subparts
);
10754 for (i
= 0; i
< subparts
/ 2; ++i
)
10755 RTVEC_ELT (v
, i
) = gen_rtx_CONST_INT (DImode
, i
+ subparts
/ 2);
10756 for (i
= subparts
/ 2; i
< subparts
; ++i
)
10757 RTVEC_ELT (v
, i
) = gen_rtx_CONST_INT (DImode
, i
- subparts
/ 2);
10762 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
10763 for a VSX load or store operation. */
10765 rs6000_gen_le_vsx_permute (rtx source
, machine_mode mode
)
10767 /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and
10768 128-bit integers if they are allowed in VSX registers. */
10769 if (FLOAT128_VECTOR_P (mode
) || mode
== TImode
|| mode
== V1TImode
)
10770 return gen_rtx_ROTATE (mode
, source
, GEN_INT (64));
10773 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rs6000_const_vec (mode
));
10774 return gen_rtx_VEC_SELECT (mode
, source
, par
);
10778 /* Emit a little-endian load from vector memory location SOURCE to VSX
10779 register DEST in mode MODE. The load is done with two permuting
10780 insn's that represent an lxvd2x and xxpermdi. */
10782 rs6000_emit_le_vsx_load (rtx dest
, rtx source
, machine_mode mode
)
10784 rtx tmp
, permute_mem
, permute_reg
;
10786 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10788 if (mode
== TImode
|| mode
== V1TImode
)
10791 dest
= gen_lowpart (V2DImode
, dest
);
10792 source
= adjust_address (source
, V2DImode
, 0);
10795 tmp
= can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest
) : dest
;
10796 permute_mem
= rs6000_gen_le_vsx_permute (source
, mode
);
10797 permute_reg
= rs6000_gen_le_vsx_permute (tmp
, mode
);
10798 emit_insn (gen_rtx_SET (tmp
, permute_mem
));
10799 emit_insn (gen_rtx_SET (dest
, permute_reg
));
10802 /* Emit a little-endian store to vector memory location DEST from VSX
10803 register SOURCE in mode MODE. The store is done with two permuting
10804 insn's that represent an xxpermdi and an stxvd2x. */
10806 rs6000_emit_le_vsx_store (rtx dest
, rtx source
, machine_mode mode
)
10808 rtx tmp
, permute_src
, permute_tmp
;
10810 /* This should never be called during or after reload, because it does
10811 not re-permute the source register. It is intended only for use
10813 gcc_assert (!reload_in_progress
&& !lra_in_progress
&& !reload_completed
);
10815 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10817 if (mode
== TImode
|| mode
== V1TImode
)
10820 dest
= adjust_address (dest
, V2DImode
, 0);
10821 source
= gen_lowpart (V2DImode
, source
);
10824 tmp
= can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source
) : source
;
10825 permute_src
= rs6000_gen_le_vsx_permute (source
, mode
);
10826 permute_tmp
= rs6000_gen_le_vsx_permute (tmp
, mode
);
10827 emit_insn (gen_rtx_SET (tmp
, permute_src
));
10828 emit_insn (gen_rtx_SET (dest
, permute_tmp
));
10831 /* Emit a sequence representing a little-endian VSX load or store,
10832 moving data from SOURCE to DEST in mode MODE. This is done
10833 separately from rs6000_emit_move to ensure it is called only
10834 during expand. LE VSX loads and stores introduced later are
10835 handled with a split. The expand-time RTL generation allows
10836 us to optimize away redundant pairs of register-permutes. */
10838 rs6000_emit_le_vsx_move (rtx dest
, rtx source
, machine_mode mode
)
10840 gcc_assert (!BYTES_BIG_ENDIAN
10841 && VECTOR_MEM_VSX_P (mode
)
10842 && !TARGET_P9_VECTOR
10843 && !gpr_or_gpr_p (dest
, source
)
10844 && (MEM_P (source
) ^ MEM_P (dest
)));
10846 if (MEM_P (source
))
10848 gcc_assert (REG_P (dest
) || GET_CODE (dest
) == SUBREG
);
10849 rs6000_emit_le_vsx_load (dest
, source
, mode
);
10853 if (!REG_P (source
))
10854 source
= force_reg (mode
, source
);
10855 rs6000_emit_le_vsx_store (dest
, source
, mode
);
10859 /* Return whether a SFmode or SImode move can be done without converting one
10860 mode to another. This arrises when we have:
10862 (SUBREG:SF (REG:SI ...))
10863 (SUBREG:SI (REG:SF ...))
10865 and one of the values is in a floating point/vector register, where SFmode
10866 scalars are stored in DFmode format. */
10869 valid_sf_si_move (rtx dest
, rtx src
, machine_mode mode
)
10871 if (TARGET_ALLOW_SF_SUBREG
)
10874 if (mode
!= SFmode
&& GET_MODE_CLASS (mode
) != MODE_INT
)
10877 if (!SUBREG_P (src
) || !sf_subreg_operand (src
, mode
))
10880 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10881 if (SUBREG_P (dest
))
10883 rtx dest_subreg
= SUBREG_REG (dest
);
10884 rtx src_subreg
= SUBREG_REG (src
);
10885 return GET_MODE (dest_subreg
) == GET_MODE (src_subreg
);
10892 /* Helper function to change moves with:
10894 (SUBREG:SF (REG:SI)) and
10895 (SUBREG:SI (REG:SF))
10897 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10898 values are stored as DFmode values in the VSX registers. We need to convert
10899 the bits before we can use a direct move or operate on the bits in the
10900 vector register as an integer type.
10902 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10905 rs6000_emit_move_si_sf_subreg (rtx dest
, rtx source
, machine_mode mode
)
10907 if (TARGET_DIRECT_MOVE_64BIT
&& !reload_in_progress
&& !reload_completed
10908 && !lra_in_progress
10909 && (!SUBREG_P (dest
) || !sf_subreg_operand (dest
, mode
))
10910 && SUBREG_P (source
) && sf_subreg_operand (source
, mode
))
10912 rtx inner_source
= SUBREG_REG (source
);
10913 machine_mode inner_mode
= GET_MODE (inner_source
);
10915 if (mode
== SImode
&& inner_mode
== SFmode
)
10917 emit_insn (gen_movsi_from_sf (dest
, inner_source
));
10921 if (mode
== SFmode
&& inner_mode
== SImode
)
10923 emit_insn (gen_movsf_from_si (dest
, inner_source
));
10931 /* Emit a move from SOURCE to DEST in mode MODE. */
10933 rs6000_emit_move (rtx dest
, rtx source
, machine_mode mode
)
10936 operands
[0] = dest
;
10937 operands
[1] = source
;
10939 if (TARGET_DEBUG_ADDR
)
10942 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
10943 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10944 GET_MODE_NAME (mode
),
10945 reload_in_progress
,
10947 can_create_pseudo_p ());
10949 fprintf (stderr
, "source:\n");
10950 debug_rtx (source
);
10953 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
10954 if (CONST_WIDE_INT_P (operands
[1])
10955 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
)
10957 /* This should be fixed with the introduction of CONST_WIDE_INT. */
10958 gcc_unreachable ();
10961 /* See if we need to special case SImode/SFmode SUBREG moves. */
10962 if ((mode
== SImode
|| mode
== SFmode
) && SUBREG_P (source
)
10963 && rs6000_emit_move_si_sf_subreg (dest
, source
, mode
))
10966 /* Check if GCC is setting up a block move that will end up using FP
10967 registers as temporaries. We must make sure this is acceptable. */
10968 if (GET_CODE (operands
[0]) == MEM
10969 && GET_CODE (operands
[1]) == MEM
10971 && (SLOW_UNALIGNED_ACCESS (DImode
, MEM_ALIGN (operands
[0]))
10972 || SLOW_UNALIGNED_ACCESS (DImode
, MEM_ALIGN (operands
[1])))
10973 && ! (SLOW_UNALIGNED_ACCESS (SImode
, (MEM_ALIGN (operands
[0]) > 32
10974 ? 32 : MEM_ALIGN (operands
[0])))
10975 || SLOW_UNALIGNED_ACCESS (SImode
, (MEM_ALIGN (operands
[1]) > 32
10977 : MEM_ALIGN (operands
[1]))))
10978 && ! MEM_VOLATILE_P (operands
[0])
10979 && ! MEM_VOLATILE_P (operands
[1]))
10981 emit_move_insn (adjust_address (operands
[0], SImode
, 0),
10982 adjust_address (operands
[1], SImode
, 0));
10983 emit_move_insn (adjust_address (copy_rtx (operands
[0]), SImode
, 4),
10984 adjust_address (copy_rtx (operands
[1]), SImode
, 4));
10988 if (can_create_pseudo_p () && GET_CODE (operands
[0]) == MEM
10989 && !gpc_reg_operand (operands
[1], mode
))
10990 operands
[1] = force_reg (mode
, operands
[1]);
10992 /* Recognize the case where operand[1] is a reference to thread-local
10993 data and load its address to a register. */
10994 if (tls_referenced_p (operands
[1]))
10996 enum tls_model model
;
10997 rtx tmp
= operands
[1];
11000 if (GET_CODE (tmp
) == CONST
&& GET_CODE (XEXP (tmp
, 0)) == PLUS
)
11002 addend
= XEXP (XEXP (tmp
, 0), 1);
11003 tmp
= XEXP (XEXP (tmp
, 0), 0);
11006 gcc_assert (GET_CODE (tmp
) == SYMBOL_REF
);
11007 model
= SYMBOL_REF_TLS_MODEL (tmp
);
11008 gcc_assert (model
!= 0);
11010 tmp
= rs6000_legitimize_tls_address (tmp
, model
);
11013 tmp
= gen_rtx_PLUS (mode
, tmp
, addend
);
11014 tmp
= force_operand (tmp
, operands
[0]);
11019 /* Handle the case where reload calls us with an invalid address. */
11020 if (reload_in_progress
&& mode
== Pmode
11021 && (! general_operand (operands
[1], mode
)
11022 || ! nonimmediate_operand (operands
[0], mode
)))
11025 /* 128-bit constant floating-point values on Darwin should really be loaded
11026 as two parts. However, this premature splitting is a problem when DFmode
11027 values can go into Altivec registers. */
11028 if (FLOAT128_IBM_P (mode
) && !reg_addr
[DFmode
].scalar_in_vmx_p
11029 && GET_CODE (operands
[1]) == CONST_DOUBLE
)
11031 rs6000_emit_move (simplify_gen_subreg (DFmode
, operands
[0], mode
, 0),
11032 simplify_gen_subreg (DFmode
, operands
[1], mode
, 0),
11034 rs6000_emit_move (simplify_gen_subreg (DFmode
, operands
[0], mode
,
11035 GET_MODE_SIZE (DFmode
)),
11036 simplify_gen_subreg (DFmode
, operands
[1], mode
,
11037 GET_MODE_SIZE (DFmode
)),
11042 if (reload_in_progress
&& cfun
->machine
->sdmode_stack_slot
!= NULL_RTX
)
11043 cfun
->machine
->sdmode_stack_slot
=
11044 eliminate_regs (cfun
->machine
->sdmode_stack_slot
, VOIDmode
, NULL_RTX
);
11047 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
11048 p1:SD) if p1 is not of floating point class and p0 is spilled as
11049 we can have no analogous movsd_store for this. */
11050 if (lra_in_progress
&& mode
== DDmode
11051 && REG_P (operands
[0]) && REGNO (operands
[0]) >= FIRST_PSEUDO_REGISTER
11052 && reg_preferred_class (REGNO (operands
[0])) == NO_REGS
11053 && GET_CODE (operands
[1]) == SUBREG
&& REG_P (SUBREG_REG (operands
[1]))
11054 && GET_MODE (SUBREG_REG (operands
[1])) == SDmode
)
11057 int regno
= REGNO (SUBREG_REG (operands
[1]));
11059 if (regno
>= FIRST_PSEUDO_REGISTER
)
11061 cl
= reg_preferred_class (regno
);
11062 regno
= cl
== NO_REGS
? -1 : ira_class_hard_regs
[cl
][1];
11064 if (regno
>= 0 && ! FP_REGNO_P (regno
))
11067 operands
[0] = gen_lowpart_SUBREG (SDmode
, operands
[0]);
11068 operands
[1] = SUBREG_REG (operands
[1]);
11071 if (lra_in_progress
11073 && REG_P (operands
[0]) && REGNO (operands
[0]) >= FIRST_PSEUDO_REGISTER
11074 && reg_preferred_class (REGNO (operands
[0])) == NO_REGS
11075 && (REG_P (operands
[1])
11076 || (GET_CODE (operands
[1]) == SUBREG
11077 && REG_P (SUBREG_REG (operands
[1])))))
11079 int regno
= REGNO (GET_CODE (operands
[1]) == SUBREG
11080 ? SUBREG_REG (operands
[1]) : operands
[1]);
11083 if (regno
>= FIRST_PSEUDO_REGISTER
)
11085 cl
= reg_preferred_class (regno
);
11086 gcc_assert (cl
!= NO_REGS
);
11087 regno
= ira_class_hard_regs
[cl
][0];
11089 if (FP_REGNO_P (regno
))
11091 if (GET_MODE (operands
[0]) != DDmode
)
11092 operands
[0] = gen_rtx_SUBREG (DDmode
, operands
[0], 0);
11093 emit_insn (gen_movsd_store (operands
[0], operands
[1]));
11095 else if (INT_REGNO_P (regno
))
11096 emit_insn (gen_movsd_hardfloat (operands
[0], operands
[1]));
11101 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
11102 p:DD)) if p0 is not of floating point class and p1 is spilled as
11103 we can have no analogous movsd_load for this. */
11104 if (lra_in_progress
&& mode
== DDmode
11105 && GET_CODE (operands
[0]) == SUBREG
&& REG_P (SUBREG_REG (operands
[0]))
11106 && GET_MODE (SUBREG_REG (operands
[0])) == SDmode
11107 && REG_P (operands
[1]) && REGNO (operands
[1]) >= FIRST_PSEUDO_REGISTER
11108 && reg_preferred_class (REGNO (operands
[1])) == NO_REGS
)
11111 int regno
= REGNO (SUBREG_REG (operands
[0]));
11113 if (regno
>= FIRST_PSEUDO_REGISTER
)
11115 cl
= reg_preferred_class (regno
);
11116 regno
= cl
== NO_REGS
? -1 : ira_class_hard_regs
[cl
][0];
11118 if (regno
>= 0 && ! FP_REGNO_P (regno
))
11121 operands
[0] = SUBREG_REG (operands
[0]);
11122 operands
[1] = gen_lowpart_SUBREG (SDmode
, operands
[1]);
11125 if (lra_in_progress
11127 && (REG_P (operands
[0])
11128 || (GET_CODE (operands
[0]) == SUBREG
11129 && REG_P (SUBREG_REG (operands
[0]))))
11130 && REG_P (operands
[1]) && REGNO (operands
[1]) >= FIRST_PSEUDO_REGISTER
11131 && reg_preferred_class (REGNO (operands
[1])) == NO_REGS
)
11133 int regno
= REGNO (GET_CODE (operands
[0]) == SUBREG
11134 ? SUBREG_REG (operands
[0]) : operands
[0]);
11137 if (regno
>= FIRST_PSEUDO_REGISTER
)
11139 cl
= reg_preferred_class (regno
);
11140 gcc_assert (cl
!= NO_REGS
);
11141 regno
= ira_class_hard_regs
[cl
][0];
11143 if (FP_REGNO_P (regno
))
11145 if (GET_MODE (operands
[1]) != DDmode
)
11146 operands
[1] = gen_rtx_SUBREG (DDmode
, operands
[1], 0);
11147 emit_insn (gen_movsd_load (operands
[0], operands
[1]));
11149 else if (INT_REGNO_P (regno
))
11150 emit_insn (gen_movsd_hardfloat (operands
[0], operands
[1]));
11156 if (reload_in_progress
11158 && cfun
->machine
->sdmode_stack_slot
!= NULL_RTX
11159 && MEM_P (operands
[0])
11160 && rtx_equal_p (operands
[0], cfun
->machine
->sdmode_stack_slot
)
11161 && REG_P (operands
[1]))
11163 if (FP_REGNO_P (REGNO (operands
[1])))
11165 rtx mem
= adjust_address_nv (operands
[0], DDmode
, 0);
11166 mem
= eliminate_regs (mem
, VOIDmode
, NULL_RTX
);
11167 emit_insn (gen_movsd_store (mem
, operands
[1]));
11169 else if (INT_REGNO_P (REGNO (operands
[1])))
11171 rtx mem
= operands
[0];
11172 if (BYTES_BIG_ENDIAN
)
11173 mem
= adjust_address_nv (mem
, mode
, 4);
11174 mem
= eliminate_regs (mem
, VOIDmode
, NULL_RTX
);
11175 emit_insn (gen_movsd_hardfloat (mem
, operands
[1]));
11181 if (reload_in_progress
11183 && REG_P (operands
[0])
11184 && MEM_P (operands
[1])
11185 && cfun
->machine
->sdmode_stack_slot
!= NULL_RTX
11186 && rtx_equal_p (operands
[1], cfun
->machine
->sdmode_stack_slot
))
11188 if (FP_REGNO_P (REGNO (operands
[0])))
11190 rtx mem
= adjust_address_nv (operands
[1], DDmode
, 0);
11191 mem
= eliminate_regs (mem
, VOIDmode
, NULL_RTX
);
11192 emit_insn (gen_movsd_load (operands
[0], mem
));
11194 else if (INT_REGNO_P (REGNO (operands
[0])))
11196 rtx mem
= operands
[1];
11197 if (BYTES_BIG_ENDIAN
)
11198 mem
= adjust_address_nv (mem
, mode
, 4);
11199 mem
= eliminate_regs (mem
, VOIDmode
, NULL_RTX
);
11200 emit_insn (gen_movsd_hardfloat (operands
[0], mem
));
11207 /* FIXME: In the long term, this switch statement should go away
11208 and be replaced by a sequence of tests based on things like
11214 if (CONSTANT_P (operands
[1])
11215 && GET_CODE (operands
[1]) != CONST_INT
)
11216 operands
[1] = force_const_mem (mode
, operands
[1]);
11223 if (FLOAT128_2REG_P (mode
))
11224 rs6000_eliminate_indexed_memrefs (operands
);
11231 if (CONSTANT_P (operands
[1])
11232 && ! easy_fp_constant (operands
[1], mode
))
11233 operands
[1] = force_const_mem (mode
, operands
[1]);
11247 if (CONSTANT_P (operands
[1])
11248 && !easy_vector_constant (operands
[1], mode
))
11249 operands
[1] = force_const_mem (mode
, operands
[1]);
11254 /* Use default pattern for address of ELF small data */
11257 && DEFAULT_ABI
== ABI_V4
11258 && (GET_CODE (operands
[1]) == SYMBOL_REF
11259 || GET_CODE (operands
[1]) == CONST
)
11260 && small_data_operand (operands
[1], mode
))
11262 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11266 if (DEFAULT_ABI
== ABI_V4
11267 && mode
== Pmode
&& mode
== SImode
11268 && flag_pic
== 1 && got_operand (operands
[1], mode
))
11270 emit_insn (gen_movsi_got (operands
[0], operands
[1]));
11274 if ((TARGET_ELF
|| DEFAULT_ABI
== ABI_DARWIN
)
11278 && CONSTANT_P (operands
[1])
11279 && GET_CODE (operands
[1]) != HIGH
11280 && GET_CODE (operands
[1]) != CONST_INT
)
11282 rtx target
= (!can_create_pseudo_p ()
11284 : gen_reg_rtx (mode
));
11286 /* If this is a function address on -mcall-aixdesc,
11287 convert it to the address of the descriptor. */
11288 if (DEFAULT_ABI
== ABI_AIX
11289 && GET_CODE (operands
[1]) == SYMBOL_REF
11290 && XSTR (operands
[1], 0)[0] == '.')
11292 const char *name
= XSTR (operands
[1], 0);
11294 while (*name
== '.')
11296 new_ref
= gen_rtx_SYMBOL_REF (Pmode
, name
);
11297 CONSTANT_POOL_ADDRESS_P (new_ref
)
11298 = CONSTANT_POOL_ADDRESS_P (operands
[1]);
11299 SYMBOL_REF_FLAGS (new_ref
) = SYMBOL_REF_FLAGS (operands
[1]);
11300 SYMBOL_REF_USED (new_ref
) = SYMBOL_REF_USED (operands
[1]);
11301 SYMBOL_REF_DATA (new_ref
) = SYMBOL_REF_DATA (operands
[1]);
11302 operands
[1] = new_ref
;
11305 if (DEFAULT_ABI
== ABI_DARWIN
)
11308 if (MACHO_DYNAMIC_NO_PIC_P
)
11310 /* Take care of any required data indirection. */
11311 operands
[1] = rs6000_machopic_legitimize_pic_address (
11312 operands
[1], mode
, operands
[0]);
11313 if (operands
[0] != operands
[1])
11314 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11318 emit_insn (gen_macho_high (target
, operands
[1]));
11319 emit_insn (gen_macho_low (operands
[0], target
, operands
[1]));
11323 emit_insn (gen_elf_high (target
, operands
[1]));
11324 emit_insn (gen_elf_low (operands
[0], target
, operands
[1]));
11328 /* If this is a SYMBOL_REF that refers to a constant pool entry,
11329 and we have put it in the TOC, we just need to make a TOC-relative
11330 reference to it. */
11332 && GET_CODE (operands
[1]) == SYMBOL_REF
11333 && use_toc_relative_ref (operands
[1], mode
))
11334 operands
[1] = create_TOC_reference (operands
[1], operands
[0]);
11335 else if (mode
== Pmode
11336 && CONSTANT_P (operands
[1])
11337 && GET_CODE (operands
[1]) != HIGH
11338 && ((GET_CODE (operands
[1]) != CONST_INT
11339 && ! easy_fp_constant (operands
[1], mode
))
11340 || (GET_CODE (operands
[1]) == CONST_INT
11341 && (num_insns_constant (operands
[1], mode
)
11342 > (TARGET_CMODEL
!= CMODEL_SMALL
? 3 : 2)))
11343 || (GET_CODE (operands
[0]) == REG
11344 && FP_REGNO_P (REGNO (operands
[0]))))
11345 && !toc_relative_expr_p (operands
[1], false)
11346 && (TARGET_CMODEL
== CMODEL_SMALL
11347 || can_create_pseudo_p ()
11348 || (REG_P (operands
[0])
11349 && INT_REG_OK_FOR_BASE_P (operands
[0], true))))
11353 /* Darwin uses a special PIC legitimizer. */
11354 if (DEFAULT_ABI
== ABI_DARWIN
&& MACHOPIC_INDIRECT
)
11357 rs6000_machopic_legitimize_pic_address (operands
[1], mode
,
11359 if (operands
[0] != operands
[1])
11360 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11365 /* If we are to limit the number of things we put in the TOC and
11366 this is a symbol plus a constant we can add in one insn,
11367 just put the symbol in the TOC and add the constant. Don't do
11368 this if reload is in progress. */
11369 if (GET_CODE (operands
[1]) == CONST
11370 && TARGET_NO_SUM_IN_TOC
&& ! reload_in_progress
11371 && GET_CODE (XEXP (operands
[1], 0)) == PLUS
11372 && add_operand (XEXP (XEXP (operands
[1], 0), 1), mode
)
11373 && (GET_CODE (XEXP (XEXP (operands
[1], 0), 0)) == LABEL_REF
11374 || GET_CODE (XEXP (XEXP (operands
[1], 0), 0)) == SYMBOL_REF
)
11375 && ! side_effects_p (operands
[0]))
11378 force_const_mem (mode
, XEXP (XEXP (operands
[1], 0), 0));
11379 rtx other
= XEXP (XEXP (operands
[1], 0), 1);
11381 sym
= force_reg (mode
, sym
);
11382 emit_insn (gen_add3_insn (operands
[0], sym
, other
));
11386 operands
[1] = force_const_mem (mode
, operands
[1]);
11389 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
11390 && use_toc_relative_ref (XEXP (operands
[1], 0), mode
))
11392 rtx tocref
= create_TOC_reference (XEXP (operands
[1], 0),
11394 operands
[1] = gen_const_mem (mode
, tocref
);
11395 set_mem_alias_set (operands
[1], get_TOC_alias_set ());
11401 if (!VECTOR_MEM_VSX_P (TImode
))
11402 rs6000_eliminate_indexed_memrefs (operands
);
11406 rs6000_eliminate_indexed_memrefs (operands
);
11410 fatal_insn ("bad move", gen_rtx_SET (dest
, source
));
11413 /* Above, we may have called force_const_mem which may have returned
11414 an invalid address. If we can, fix this up; otherwise, reload will
11415 have to deal with it. */
11416 if (GET_CODE (operands
[1]) == MEM
&& ! reload_in_progress
)
11417 operands
[1] = validize_mem (operands
[1]);
11420 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11423 /* Return true if a structure, union or array containing FIELD should be
11424 accessed using `BLKMODE'.
11426 For the SPE, simd types are V2SI, and gcc can be tempted to put the
11427 entire thing in a DI and use subregs to access the internals.
11428 store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
11429 back-end. Because a single GPR can hold a V2SI, but not a DI, the
11430 best thing to do is set structs to BLKmode and avoid Severe Tire
11433 On e500 v2, DF and DI modes suffer from the same anomaly. DF can
11434 fit into 1, whereas DI still needs two. */
11437 rs6000_member_type_forces_blk (const_tree field
, machine_mode mode
)
11439 return ((TARGET_SPE
&& TREE_CODE (TREE_TYPE (field
)) == VECTOR_TYPE
)
11440 || (TARGET_E500_DOUBLE
&& mode
== DFmode
));
11443 /* Nonzero if we can use a floating-point register to pass this arg. */
11444 #define USE_FP_FOR_ARG_P(CUM,MODE) \
11445 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
11446 && (CUM)->fregno <= FP_ARG_MAX_REG \
11447 && TARGET_HARD_FLOAT && TARGET_FPRS)
11449 /* Nonzero if we can use an AltiVec register to pass this arg. */
11450 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
11451 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
11452 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
11453 && TARGET_ALTIVEC_ABI \
11456 /* Walk down the type tree of TYPE counting consecutive base elements.
11457 If *MODEP is VOIDmode, then set it to the first valid floating point
11458 or vector type. If a non-floating point or vector type is found, or
11459 if a floating point or vector type that doesn't match a non-VOIDmode
11460 *MODEP is found, then return -1, otherwise return the count in the
11464 rs6000_aggregate_candidate (const_tree type
, machine_mode
*modep
)
11467 HOST_WIDE_INT size
;
11469 switch (TREE_CODE (type
))
11472 mode
= TYPE_MODE (type
);
11473 if (!SCALAR_FLOAT_MODE_P (mode
))
11476 if (*modep
== VOIDmode
)
11479 if (*modep
== mode
)
11485 mode
= TYPE_MODE (TREE_TYPE (type
));
11486 if (!SCALAR_FLOAT_MODE_P (mode
))
11489 if (*modep
== VOIDmode
)
11492 if (*modep
== mode
)
11498 if (!TARGET_ALTIVEC_ABI
|| !TARGET_ALTIVEC
)
11501 /* Use V4SImode as representative of all 128-bit vector types. */
11502 size
= int_size_in_bytes (type
);
11512 if (*modep
== VOIDmode
)
11515 /* Vector modes are considered to be opaque: two vectors are
11516 equivalent for the purposes of being homogeneous aggregates
11517 if they are the same size. */
11518 if (*modep
== mode
)
11526 tree index
= TYPE_DOMAIN (type
);
11528 /* Can't handle incomplete types nor sizes that are not
11530 if (!COMPLETE_TYPE_P (type
)
11531 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
11534 count
= rs6000_aggregate_candidate (TREE_TYPE (type
), modep
);
11537 || !TYPE_MAX_VALUE (index
)
11538 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
11539 || !TYPE_MIN_VALUE (index
)
11540 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
11544 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
11545 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
11547 /* There must be no padding. */
11548 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
11560 /* Can't handle incomplete types nor sizes that are not
11562 if (!COMPLETE_TYPE_P (type
)
11563 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
11566 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
11568 if (TREE_CODE (field
) != FIELD_DECL
)
11571 sub_count
= rs6000_aggregate_candidate (TREE_TYPE (field
), modep
);
11574 count
+= sub_count
;
11577 /* There must be no padding. */
11578 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
11585 case QUAL_UNION_TYPE
:
11587 /* These aren't very interesting except in a degenerate case. */
11592 /* Can't handle incomplete types nor sizes that are not
11594 if (!COMPLETE_TYPE_P (type
)
11595 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
11598 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
11600 if (TREE_CODE (field
) != FIELD_DECL
)
11603 sub_count
= rs6000_aggregate_candidate (TREE_TYPE (field
), modep
);
11606 count
= count
> sub_count
? count
: sub_count
;
11609 /* There must be no padding. */
11610 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
11623 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
11624 float or vector aggregate that shall be passed in FP/vector registers
11625 according to the ELFv2 ABI, return the homogeneous element mode in
11626 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
11628 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
11631 rs6000_discover_homogeneous_aggregate (machine_mode mode
, const_tree type
,
11632 machine_mode
*elt_mode
,
11635 /* Note that we do not accept complex types at the top level as
11636 homogeneous aggregates; these types are handled via the
11637 targetm.calls.split_complex_arg mechanism. Complex types
11638 can be elements of homogeneous aggregates, however. */
11639 if (DEFAULT_ABI
== ABI_ELFv2
&& type
&& AGGREGATE_TYPE_P (type
))
11641 machine_mode field_mode
= VOIDmode
;
11642 int field_count
= rs6000_aggregate_candidate (type
, &field_mode
);
11644 if (field_count
> 0)
11646 int n_regs
= (SCALAR_FLOAT_MODE_P (field_mode
) ?
11647 (GET_MODE_SIZE (field_mode
) + 7) >> 3 : 1);
11649 /* The ELFv2 ABI allows homogeneous aggregates to occupy
11650 up to AGGR_ARG_NUM_REG registers. */
11651 if (field_count
* n_regs
<= AGGR_ARG_NUM_REG
)
11654 *elt_mode
= field_mode
;
11656 *n_elts
= field_count
;
11669 /* Return a nonzero value to say to return the function value in
11670 memory, just as large structures are always returned. TYPE will be
11671 the data type of the value, and FNTYPE will be the type of the
11672 function doing the returning, or @code{NULL} for libcalls.
11674 The AIX ABI for the RS/6000 specifies that all structures are
11675 returned in memory. The Darwin ABI does the same.
11677 For the Darwin 64 Bit ABI, a function result can be returned in
11678 registers or in memory, depending on the size of the return data
11679 type. If it is returned in registers, the value occupies the same
11680 registers as it would if it were the first and only function
11681 argument. Otherwise, the function places its result in memory at
11682 the location pointed to by GPR3.
11684 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
11685 but a draft put them in memory, and GCC used to implement the draft
11686 instead of the final standard. Therefore, aix_struct_return
11687 controls this instead of DEFAULT_ABI; V.4 targets needing backward
11688 compatibility can change DRAFT_V4_STRUCT_RET to override the
11689 default, and -m switches get the final word. See
11690 rs6000_option_override_internal for more details.
11692 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
11693 long double support is enabled. These values are returned in memory.
11695 int_size_in_bytes returns -1 for variable size objects, which go in
11696 memory always. The cast to unsigned makes -1 > 8. */
11699 rs6000_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
11701 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
11703 && rs6000_darwin64_abi
11704 && TREE_CODE (type
) == RECORD_TYPE
11705 && int_size_in_bytes (type
) > 0)
11707 CUMULATIVE_ARGS valcum
;
11711 valcum
.fregno
= FP_ARG_MIN_REG
;
11712 valcum
.vregno
= ALTIVEC_ARG_MIN_REG
;
11713 /* Do a trial code generation as if this were going to be passed
11714 as an argument; if any part goes in memory, we return NULL. */
11715 valret
= rs6000_darwin64_record_arg (&valcum
, type
, true, true);
11718 /* Otherwise fall through to more conventional ABI rules. */
11721 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
11722 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type
), type
,
11726 /* The ELFv2 ABI returns aggregates up to 16B in registers */
11727 if (DEFAULT_ABI
== ABI_ELFv2
&& AGGREGATE_TYPE_P (type
)
11728 && (unsigned HOST_WIDE_INT
) int_size_in_bytes (type
) <= 16)
11731 if (AGGREGATE_TYPE_P (type
)
11732 && (aix_struct_return
11733 || (unsigned HOST_WIDE_INT
) int_size_in_bytes (type
) > 8))
11736 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
11737 modes only exist for GCC vector types if -maltivec. */
11738 if (TARGET_32BIT
&& !TARGET_ALTIVEC_ABI
11739 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type
)))
11742 /* Return synthetic vectors in memory. */
11743 if (TREE_CODE (type
) == VECTOR_TYPE
11744 && int_size_in_bytes (type
) > (TARGET_ALTIVEC_ABI
? 16 : 8))
11746 static bool warned_for_return_big_vectors
= false;
11747 if (!warned_for_return_big_vectors
)
11749 warning (OPT_Wpsabi
, "GCC vector returned by reference: "
11750 "non-standard ABI extension with no compatibility guarantee");
11751 warned_for_return_big_vectors
= true;
11756 if (DEFAULT_ABI
== ABI_V4
&& TARGET_IEEEQUAD
11757 && FLOAT128_IEEE_P (TYPE_MODE (type
)))
11763 /* Specify whether values returned in registers should be at the most
11764 significant end of a register. We want aggregates returned by
11765 value to match the way aggregates are passed to functions. */
11768 rs6000_return_in_msb (const_tree valtype
)
11770 return (DEFAULT_ABI
== ABI_ELFv2
11771 && BYTES_BIG_ENDIAN
11772 && AGGREGATE_TYPE_P (valtype
)
11773 && FUNCTION_ARG_PADDING (TYPE_MODE (valtype
), valtype
) == upward
);
11776 #ifdef HAVE_AS_GNU_ATTRIBUTE
11777 /* Return TRUE if a call to function FNDECL may be one that
11778 potentially affects the function calling ABI of the object file. */
11781 call_ABI_of_interest (tree fndecl
)
11783 if (rs6000_gnu_attr
&& symtab
->state
== EXPANSION
)
11785 struct cgraph_node
*c_node
;
11787 /* Libcalls are always interesting. */
11788 if (fndecl
== NULL_TREE
)
11791 /* Any call to an external function is interesting. */
11792 if (DECL_EXTERNAL (fndecl
))
11795 /* Interesting functions that we are emitting in this object file. */
11796 c_node
= cgraph_node::get (fndecl
);
11797 c_node
= c_node
->ultimate_alias_target ();
11798 return !c_node
->only_called_directly_p ();
11804 /* Initialize a variable CUM of type CUMULATIVE_ARGS
11805 for a call to a function whose data type is FNTYPE.
11806 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
11808 For incoming args we set the number of arguments in the prototype large
11809 so we never return a PARALLEL. */
11812 init_cumulative_args (CUMULATIVE_ARGS
*cum
, tree fntype
,
11813 rtx libname ATTRIBUTE_UNUSED
, int incoming
,
11814 int libcall
, int n_named_args
,
11815 tree fndecl ATTRIBUTE_UNUSED
,
11816 machine_mode return_mode ATTRIBUTE_UNUSED
)
11818 static CUMULATIVE_ARGS zero_cumulative
;
11820 *cum
= zero_cumulative
;
11822 cum
->fregno
= FP_ARG_MIN_REG
;
11823 cum
->vregno
= ALTIVEC_ARG_MIN_REG
;
11824 cum
->prototype
= (fntype
&& prototype_p (fntype
));
11825 cum
->call_cookie
= ((DEFAULT_ABI
== ABI_V4
&& libcall
)
11826 ? CALL_LIBCALL
: CALL_NORMAL
);
11827 cum
->sysv_gregno
= GP_ARG_MIN_REG
;
11828 cum
->stdarg
= stdarg_p (fntype
);
11829 cum
->libcall
= libcall
;
11831 cum
->nargs_prototype
= 0;
11832 if (incoming
|| cum
->prototype
)
11833 cum
->nargs_prototype
= n_named_args
;
11835 /* Check for a longcall attribute. */
11836 if ((!fntype
&& rs6000_default_long_calls
)
11838 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype
))
11839 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype
))))
11840 cum
->call_cookie
|= CALL_LONG
;
11842 if (TARGET_DEBUG_ARG
)
11844 fprintf (stderr
, "\ninit_cumulative_args:");
11847 tree ret_type
= TREE_TYPE (fntype
);
11848 fprintf (stderr
, " ret code = %s,",
11849 get_tree_code_name (TREE_CODE (ret_type
)));
11852 if (cum
->call_cookie
& CALL_LONG
)
11853 fprintf (stderr
, " longcall,");
11855 fprintf (stderr
, " proto = %d, nargs = %d\n",
11856 cum
->prototype
, cum
->nargs_prototype
);
11859 #ifdef HAVE_AS_GNU_ATTRIBUTE
11860 if (TARGET_ELF
&& (TARGET_64BIT
|| DEFAULT_ABI
== ABI_V4
))
11862 cum
->escapes
= call_ABI_of_interest (fndecl
);
11869 return_type
= TREE_TYPE (fntype
);
11870 return_mode
= TYPE_MODE (return_type
);
11873 return_type
= lang_hooks
.types
.type_for_mode (return_mode
, 0);
11875 if (return_type
!= NULL
)
11877 if (TREE_CODE (return_type
) == RECORD_TYPE
11878 && TYPE_TRANSPARENT_AGGR (return_type
))
11880 return_type
= TREE_TYPE (first_field (return_type
));
11881 return_mode
= TYPE_MODE (return_type
);
11883 if (AGGREGATE_TYPE_P (return_type
)
11884 && ((unsigned HOST_WIDE_INT
) int_size_in_bytes (return_type
)
11886 rs6000_returns_struct
= true;
11888 if (SCALAR_FLOAT_MODE_P (return_mode
))
11890 rs6000_passes_float
= true;
11891 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
|| TARGET_64BIT
)
11892 && (FLOAT128_IBM_P (return_mode
)
11893 || FLOAT128_IEEE_P (return_mode
)
11894 || (return_type
!= NULL
11895 && (TYPE_MAIN_VARIANT (return_type
)
11896 == long_double_type_node
))))
11897 rs6000_passes_long_double
= true;
11899 if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode
)
11900 || SPE_VECTOR_MODE (return_mode
))
11901 rs6000_passes_vector
= true;
11908 && TARGET_ALTIVEC_ABI
11909 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype
))))
11911 error ("cannot return value in vector register because"
11912 " altivec instructions are disabled, use -maltivec"
11913 " to enable them");
11917 /* The mode the ABI uses for a word. This is not the same as word_mode
11918 for -m32 -mpowerpc64. This is used to implement various target hooks. */
11920 static scalar_int_mode
11921 rs6000_abi_word_mode (void)
11923 return TARGET_32BIT
? SImode
: DImode
;
11926 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
11928 rs6000_offload_options (void)
11931 return xstrdup ("-foffload-abi=lp64");
11933 return xstrdup ("-foffload-abi=ilp32");
11936 /* On rs6000, function arguments are promoted, as are function return
11939 static machine_mode
11940 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
11942 int *punsignedp ATTRIBUTE_UNUSED
,
11945 PROMOTE_MODE (mode
, *punsignedp
, type
);
11950 /* Return true if TYPE must be passed on the stack and not in registers. */
11953 rs6000_must_pass_in_stack (machine_mode mode
, const_tree type
)
11955 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
|| TARGET_64BIT
)
11956 return must_pass_in_stack_var_size (mode
, type
);
11958 return must_pass_in_stack_var_size_or_pad (mode
, type
);
11962 is_complex_IBM_long_double (machine_mode mode
)
11964 return mode
== ICmode
|| (!TARGET_IEEEQUAD
&& mode
== TCmode
);
11967 /* Whether ABI_V4 passes MODE args to a function in floating point
11971 abi_v4_pass_in_fpr (machine_mode mode
)
11973 if (!TARGET_FPRS
|| !TARGET_HARD_FLOAT
)
11975 if (TARGET_SINGLE_FLOAT
&& mode
== SFmode
)
11977 if (TARGET_DOUBLE_FLOAT
&& mode
== DFmode
)
11979 /* ABI_V4 passes complex IBM long double in 8 gprs.
11980 Stupid, but we can't change the ABI now. */
11981 if (is_complex_IBM_long_double (mode
))
11983 if (FLOAT128_2REG_P (mode
))
11985 if (DECIMAL_FLOAT_MODE_P (mode
))
11990 /* If defined, a C expression which determines whether, and in which
11991 direction, to pad out an argument with extra space. The value
11992 should be of type `enum direction': either `upward' to pad above
11993 the argument, `downward' to pad below, or `none' to inhibit
11996 For the AIX ABI structs are always stored left shifted in their
12000 function_arg_padding (machine_mode mode
, const_tree type
)
12002 #ifndef AGGREGATE_PADDING_FIXED
12003 #define AGGREGATE_PADDING_FIXED 0
12005 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
12006 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
12009 if (!AGGREGATE_PADDING_FIXED
)
12011 /* GCC used to pass structures of the same size as integer types as
12012 if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
12013 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
12014 passed padded downward, except that -mstrict-align further
12015 muddied the water in that multi-component structures of 2 and 4
12016 bytes in size were passed padded upward.
12018 The following arranges for best compatibility with previous
12019 versions of gcc, but removes the -mstrict-align dependency. */
12020 if (BYTES_BIG_ENDIAN
)
12022 HOST_WIDE_INT size
= 0;
12024 if (mode
== BLKmode
)
12026 if (type
&& TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
)
12027 size
= int_size_in_bytes (type
);
12030 size
= GET_MODE_SIZE (mode
);
12032 if (size
== 1 || size
== 2 || size
== 4)
12038 if (AGGREGATES_PAD_UPWARD_ALWAYS
)
12040 if (type
!= 0 && AGGREGATE_TYPE_P (type
))
12044 /* Fall back to the default. */
12045 return DEFAULT_FUNCTION_ARG_PADDING (mode
, type
);
12048 /* If defined, a C expression that gives the alignment boundary, in bits,
12049 of an argument with the specified mode and type. If it is not defined,
12050 PARM_BOUNDARY is used for all arguments.
12052 V.4 wants long longs and doubles to be double word aligned. Just
12053 testing the mode size is a boneheaded way to do this as it means
12054 that other types such as complex int are also double word aligned.
12055 However, we're stuck with this because changing the ABI might break
12056 existing library interfaces.
12058 Doubleword align SPE vectors.
12059 Quadword align Altivec/VSX vectors.
12060 Quadword align large synthetic vector types. */
12062 static unsigned int
12063 rs6000_function_arg_boundary (machine_mode mode
, const_tree type
)
12065 machine_mode elt_mode
;
12068 rs6000_discover_homogeneous_aggregate (mode
, type
, &elt_mode
, &n_elts
);
12070 if (DEFAULT_ABI
== ABI_V4
12071 && (GET_MODE_SIZE (mode
) == 8
12072 || (TARGET_HARD_FLOAT
12074 && !is_complex_IBM_long_double (mode
)
12075 && FLOAT128_2REG_P (mode
))))
12077 else if (FLOAT128_VECTOR_P (mode
))
12079 else if (SPE_VECTOR_MODE (mode
)
12080 || (type
&& TREE_CODE (type
) == VECTOR_TYPE
12081 && int_size_in_bytes (type
) >= 8
12082 && int_size_in_bytes (type
) < 16))
12084 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode
)
12085 || (type
&& TREE_CODE (type
) == VECTOR_TYPE
12086 && int_size_in_bytes (type
) >= 16))
12089 /* Aggregate types that need > 8 byte alignment are quadword-aligned
12090 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
12091 -mcompat-align-parm is used. */
12092 if (((DEFAULT_ABI
== ABI_AIX
&& !rs6000_compat_align_parm
)
12093 || DEFAULT_ABI
== ABI_ELFv2
)
12094 && type
&& TYPE_ALIGN (type
) > 64)
12096 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
12097 or homogeneous float/vector aggregates here. We already handled
12098 vector aggregates above, but still need to check for float here. */
12099 bool aggregate_p
= (AGGREGATE_TYPE_P (type
)
12100 && !SCALAR_FLOAT_MODE_P (elt_mode
));
12102 /* We used to check for BLKmode instead of the above aggregate type
12103 check. Warn when this results in any difference to the ABI. */
12104 if (aggregate_p
!= (mode
== BLKmode
))
12106 static bool warned
;
12107 if (!warned
&& warn_psabi
)
12110 inform (input_location
,
12111 "the ABI of passing aggregates with %d-byte alignment"
12112 " has changed in GCC 5",
12113 (int) TYPE_ALIGN (type
) / BITS_PER_UNIT
);
12121 /* Similar for the Darwin64 ABI. Note that for historical reasons we
12122 implement the "aggregate type" check as a BLKmode check here; this
12123 means certain aggregate types are in fact not aligned. */
12124 if (TARGET_MACHO
&& rs6000_darwin64_abi
12126 && type
&& TYPE_ALIGN (type
) > 64)
12129 return PARM_BOUNDARY
;
12132 /* The offset in words to the start of the parameter save area. */
12134 static unsigned int
12135 rs6000_parm_offset (void)
12137 return (DEFAULT_ABI
== ABI_V4
? 2
12138 : DEFAULT_ABI
== ABI_ELFv2
? 4
12142 /* For a function parm of MODE and TYPE, return the starting word in
12143 the parameter area. NWORDS of the parameter area are already used. */
12145 static unsigned int
12146 rs6000_parm_start (machine_mode mode
, const_tree type
,
12147 unsigned int nwords
)
12149 unsigned int align
;
12151 align
= rs6000_function_arg_boundary (mode
, type
) / PARM_BOUNDARY
- 1;
12152 return nwords
+ (-(rs6000_parm_offset () + nwords
) & align
);
12155 /* Compute the size (in words) of a function argument. */
12157 static unsigned long
12158 rs6000_arg_size (machine_mode mode
, const_tree type
)
12160 unsigned long size
;
12162 if (mode
!= BLKmode
)
12163 size
= GET_MODE_SIZE (mode
);
12165 size
= int_size_in_bytes (type
);
12168 return (size
+ 3) >> 2;
12170 return (size
+ 7) >> 3;
12173 /* Use this to flush pending int fields. */
12176 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS
*cum
,
12177 HOST_WIDE_INT bitpos
, int final
)
12179 unsigned int startbit
, endbit
;
12180 int intregs
, intoffset
;
12183 /* Handle the situations where a float is taking up the first half
12184 of the GPR, and the other half is empty (typically due to
12185 alignment restrictions). We can detect this by a 8-byte-aligned
12186 int field, or by seeing that this is the final flush for this
12187 argument. Count the word and continue on. */
12188 if (cum
->floats_in_gpr
== 1
12189 && (cum
->intoffset
% 64 == 0
12190 || (cum
->intoffset
== -1 && final
)))
12193 cum
->floats_in_gpr
= 0;
12196 if (cum
->intoffset
== -1)
12199 intoffset
= cum
->intoffset
;
12200 cum
->intoffset
= -1;
12201 cum
->floats_in_gpr
= 0;
12203 if (intoffset
% BITS_PER_WORD
!= 0)
12205 mode
= mode_for_size (BITS_PER_WORD
- intoffset
% BITS_PER_WORD
,
12207 if (mode
== BLKmode
)
12209 /* We couldn't find an appropriate mode, which happens,
12210 e.g., in packed structs when there are 3 bytes to load.
12211 Back intoffset back to the beginning of the word in this
12213 intoffset
= ROUND_DOWN (intoffset
, BITS_PER_WORD
);
12217 startbit
= ROUND_DOWN (intoffset
, BITS_PER_WORD
);
12218 endbit
= ROUND_UP (bitpos
, BITS_PER_WORD
);
12219 intregs
= (endbit
- startbit
) / BITS_PER_WORD
;
12220 cum
->words
+= intregs
;
12221 /* words should be unsigned. */
12222 if ((unsigned)cum
->words
< (endbit
/BITS_PER_WORD
))
12224 int pad
= (endbit
/BITS_PER_WORD
) - cum
->words
;
12229 /* The darwin64 ABI calls for us to recurse down through structs,
12230 looking for elements passed in registers. Unfortunately, we have
12231 to track int register count here also because of misalignments
12232 in powerpc alignment mode. */
12235 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS
*cum
,
12237 HOST_WIDE_INT startbitpos
)
12241 for (f
= TYPE_FIELDS (type
); f
; f
= DECL_CHAIN (f
))
12242 if (TREE_CODE (f
) == FIELD_DECL
)
12244 HOST_WIDE_INT bitpos
= startbitpos
;
12245 tree ftype
= TREE_TYPE (f
);
12247 if (ftype
== error_mark_node
)
12249 mode
= TYPE_MODE (ftype
);
12251 if (DECL_SIZE (f
) != 0
12252 && tree_fits_uhwi_p (bit_position (f
)))
12253 bitpos
+= int_bit_position (f
);
12255 /* ??? FIXME: else assume zero offset. */
12257 if (TREE_CODE (ftype
) == RECORD_TYPE
)
12258 rs6000_darwin64_record_arg_advance_recurse (cum
, ftype
, bitpos
);
12259 else if (USE_FP_FOR_ARG_P (cum
, mode
))
12261 unsigned n_fpregs
= (GET_MODE_SIZE (mode
) + 7) >> 3;
12262 rs6000_darwin64_record_arg_advance_flush (cum
, bitpos
, 0);
12263 cum
->fregno
+= n_fpregs
;
12264 /* Single-precision floats present a special problem for
12265 us, because they are smaller than an 8-byte GPR, and so
12266 the structure-packing rules combined with the standard
12267 varargs behavior mean that we want to pack float/float
12268 and float/int combinations into a single register's
12269 space. This is complicated by the arg advance flushing,
12270 which works on arbitrarily large groups of int-type
12272 if (mode
== SFmode
)
12274 if (cum
->floats_in_gpr
== 1)
12276 /* Two floats in a word; count the word and reset
12277 the float count. */
12279 cum
->floats_in_gpr
= 0;
12281 else if (bitpos
% 64 == 0)
12283 /* A float at the beginning of an 8-byte word;
12284 count it and put off adjusting cum->words until
12285 we see if a arg advance flush is going to do it
12287 cum
->floats_in_gpr
++;
12291 /* The float is at the end of a word, preceded
12292 by integer fields, so the arg advance flush
12293 just above has already set cum->words and
12294 everything is taken care of. */
12298 cum
->words
+= n_fpregs
;
12300 else if (USE_ALTIVEC_FOR_ARG_P (cum
, mode
, 1))
12302 rs6000_darwin64_record_arg_advance_flush (cum
, bitpos
, 0);
12306 else if (cum
->intoffset
== -1)
12307 cum
->intoffset
= bitpos
;
12311 /* Check for an item that needs to be considered specially under the darwin 64
12312 bit ABI. These are record types where the mode is BLK or the structure is
12313 8 bytes in size. */
12315 rs6000_darwin64_struct_check_p (machine_mode mode
, const_tree type
)
12317 return rs6000_darwin64_abi
12318 && ((mode
== BLKmode
12319 && TREE_CODE (type
) == RECORD_TYPE
12320 && int_size_in_bytes (type
) > 0)
12321 || (type
&& TREE_CODE (type
) == RECORD_TYPE
12322 && int_size_in_bytes (type
) == 8)) ? 1 : 0;
12325 /* Update the data in CUM to advance over an argument
12326 of mode MODE and data type TYPE.
12327 (TYPE is null for libcalls where that information may not be available.)
12329 Note that for args passed by reference, function_arg will be called
12330 with MODE and TYPE set to that of the pointer to the arg, not the arg
12334 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS
*cum
, machine_mode mode
,
12335 const_tree type
, bool named
, int depth
)
12337 machine_mode elt_mode
;
12340 rs6000_discover_homogeneous_aggregate (mode
, type
, &elt_mode
, &n_elts
);
12342 /* Only tick off an argument if we're not recursing. */
12344 cum
->nargs_prototype
--;
12346 #ifdef HAVE_AS_GNU_ATTRIBUTE
12347 if (TARGET_ELF
&& (TARGET_64BIT
|| DEFAULT_ABI
== ABI_V4
)
12350 if (SCALAR_FLOAT_MODE_P (mode
))
12352 rs6000_passes_float
= true;
12353 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
|| TARGET_64BIT
)
12354 && (FLOAT128_IBM_P (mode
)
12355 || FLOAT128_IEEE_P (mode
)
12357 && TYPE_MAIN_VARIANT (type
) == long_double_type_node
)))
12358 rs6000_passes_long_double
= true;
12360 if ((named
&& ALTIVEC_OR_VSX_VECTOR_MODE (mode
))
12361 || (SPE_VECTOR_MODE (mode
)
12363 && cum
->sysv_gregno
<= GP_ARG_MAX_REG
))
12364 rs6000_passes_vector
= true;
12368 if (TARGET_ALTIVEC_ABI
12369 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode
)
12370 || (type
&& TREE_CODE (type
) == VECTOR_TYPE
12371 && int_size_in_bytes (type
) == 16)))
12373 bool stack
= false;
12375 if (USE_ALTIVEC_FOR_ARG_P (cum
, elt_mode
, named
))
12377 cum
->vregno
+= n_elts
;
12379 if (!TARGET_ALTIVEC
)
12380 error ("cannot pass argument in vector register because"
12381 " altivec instructions are disabled, use -maltivec"
12382 " to enable them");
12384 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
12385 even if it is going to be passed in a vector register.
12386 Darwin does the same for variable-argument functions. */
12387 if (((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
12389 || (cum
->stdarg
&& DEFAULT_ABI
!= ABI_V4
))
12399 /* Vector parameters must be 16-byte aligned. In 32-bit
12400 mode this means we need to take into account the offset
12401 to the parameter save area. In 64-bit mode, they just
12402 have to start on an even word, since the parameter save
12403 area is 16-byte aligned. */
12405 align
= -(rs6000_parm_offset () + cum
->words
) & 3;
12407 align
= cum
->words
& 1;
12408 cum
->words
+= align
+ rs6000_arg_size (mode
, type
);
12410 if (TARGET_DEBUG_ARG
)
12412 fprintf (stderr
, "function_adv: words = %2d, align=%d, ",
12413 cum
->words
, align
);
12414 fprintf (stderr
, "nargs = %4d, proto = %d, mode = %4s\n",
12415 cum
->nargs_prototype
, cum
->prototype
,
12416 GET_MODE_NAME (mode
));
12420 else if (TARGET_SPE_ABI
&& TARGET_SPE
&& SPE_VECTOR_MODE (mode
)
12422 && cum
->sysv_gregno
<= GP_ARG_MAX_REG
)
12423 cum
->sysv_gregno
++;
12425 else if (TARGET_MACHO
&& rs6000_darwin64_struct_check_p (mode
, type
))
12427 int size
= int_size_in_bytes (type
);
12428 /* Variable sized types have size == -1 and are
12429 treated as if consisting entirely of ints.
12430 Pad to 16 byte boundary if needed. */
12431 if (TYPE_ALIGN (type
) >= 2 * BITS_PER_WORD
12432 && (cum
->words
% 2) != 0)
12434 /* For varargs, we can just go up by the size of the struct. */
12436 cum
->words
+= (size
+ 7) / 8;
12439 /* It is tempting to say int register count just goes up by
12440 sizeof(type)/8, but this is wrong in a case such as
12441 { int; double; int; } [powerpc alignment]. We have to
12442 grovel through the fields for these too. */
12443 cum
->intoffset
= 0;
12444 cum
->floats_in_gpr
= 0;
12445 rs6000_darwin64_record_arg_advance_recurse (cum
, type
, 0);
12446 rs6000_darwin64_record_arg_advance_flush (cum
,
12447 size
* BITS_PER_UNIT
, 1);
12449 if (TARGET_DEBUG_ARG
)
12451 fprintf (stderr
, "function_adv: words = %2d, align=%d, size=%d",
12452 cum
->words
, TYPE_ALIGN (type
), size
);
12454 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
12455 cum
->nargs_prototype
, cum
->prototype
,
12456 GET_MODE_NAME (mode
));
12459 else if (DEFAULT_ABI
== ABI_V4
)
12461 if (abi_v4_pass_in_fpr (mode
))
12463 /* _Decimal128 must use an even/odd register pair. This assumes
12464 that the register number is odd when fregno is odd. */
12465 if (mode
== TDmode
&& (cum
->fregno
% 2) == 1)
12468 if (cum
->fregno
+ (FLOAT128_2REG_P (mode
) ? 1 : 0)
12469 <= FP_ARG_V4_MAX_REG
)
12470 cum
->fregno
+= (GET_MODE_SIZE (mode
) + 7) >> 3;
12473 cum
->fregno
= FP_ARG_V4_MAX_REG
+ 1;
12474 if (mode
== DFmode
|| FLOAT128_IBM_P (mode
)
12475 || mode
== DDmode
|| mode
== TDmode
)
12476 cum
->words
+= cum
->words
& 1;
12477 cum
->words
+= rs6000_arg_size (mode
, type
);
12482 int n_words
= rs6000_arg_size (mode
, type
);
12483 int gregno
= cum
->sysv_gregno
;
12485 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
12486 (r7,r8) or (r9,r10). As does any other 2 word item such
12487 as complex int due to a historical mistake. */
12489 gregno
+= (1 - gregno
) & 1;
12491 /* Multi-reg args are not split between registers and stack. */
12492 if (gregno
+ n_words
- 1 > GP_ARG_MAX_REG
)
12494 /* Long long and SPE vectors are aligned on the stack.
12495 So are other 2 word items such as complex int due to
12496 a historical mistake. */
12498 cum
->words
+= cum
->words
& 1;
12499 cum
->words
+= n_words
;
12502 /* Note: continuing to accumulate gregno past when we've started
12503 spilling to the stack indicates the fact that we've started
12504 spilling to the stack to expand_builtin_saveregs. */
12505 cum
->sysv_gregno
= gregno
+ n_words
;
12508 if (TARGET_DEBUG_ARG
)
12510 fprintf (stderr
, "function_adv: words = %2d, fregno = %2d, ",
12511 cum
->words
, cum
->fregno
);
12512 fprintf (stderr
, "gregno = %2d, nargs = %4d, proto = %d, ",
12513 cum
->sysv_gregno
, cum
->nargs_prototype
, cum
->prototype
);
12514 fprintf (stderr
, "mode = %4s, named = %d\n",
12515 GET_MODE_NAME (mode
), named
);
12520 int n_words
= rs6000_arg_size (mode
, type
);
12521 int start_words
= cum
->words
;
12522 int align_words
= rs6000_parm_start (mode
, type
, start_words
);
12524 cum
->words
= align_words
+ n_words
;
12526 if (SCALAR_FLOAT_MODE_P (elt_mode
) && TARGET_HARD_FLOAT
&& TARGET_FPRS
)
12528 /* _Decimal128 must be passed in an even/odd float register pair.
12529 This assumes that the register number is odd when fregno is
12531 if (elt_mode
== TDmode
&& (cum
->fregno
% 2) == 1)
12533 cum
->fregno
+= n_elts
* ((GET_MODE_SIZE (elt_mode
) + 7) >> 3);
12536 if (TARGET_DEBUG_ARG
)
12538 fprintf (stderr
, "function_adv: words = %2d, fregno = %2d, ",
12539 cum
->words
, cum
->fregno
);
12540 fprintf (stderr
, "nargs = %4d, proto = %d, mode = %4s, ",
12541 cum
->nargs_prototype
, cum
->prototype
, GET_MODE_NAME (mode
));
12542 fprintf (stderr
, "named = %d, align = %d, depth = %d\n",
12543 named
, align_words
- start_words
, depth
);
12549 rs6000_function_arg_advance (cumulative_args_t cum
, machine_mode mode
,
12550 const_tree type
, bool named
)
12552 rs6000_function_arg_advance_1 (get_cumulative_args (cum
), mode
, type
, named
,
12557 spe_build_register_parallel (machine_mode mode
, int gregno
)
12559 rtx r1
, r3
, r5
, r7
;
12564 r1
= gen_rtx_REG (DImode
, gregno
);
12565 r1
= gen_rtx_EXPR_LIST (VOIDmode
, r1
, const0_rtx
);
12566 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, r1
));
12570 r1
= gen_rtx_REG (DImode
, gregno
);
12571 r1
= gen_rtx_EXPR_LIST (VOIDmode
, r1
, const0_rtx
);
12572 r3
= gen_rtx_REG (DImode
, gregno
+ 2);
12573 r3
= gen_rtx_EXPR_LIST (VOIDmode
, r3
, GEN_INT (8));
12574 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, r1
, r3
));
12577 r1
= gen_rtx_REG (DImode
, gregno
);
12578 r1
= gen_rtx_EXPR_LIST (VOIDmode
, r1
, const0_rtx
);
12579 r3
= gen_rtx_REG (DImode
, gregno
+ 2);
12580 r3
= gen_rtx_EXPR_LIST (VOIDmode
, r3
, GEN_INT (8));
12581 r5
= gen_rtx_REG (DImode
, gregno
+ 4);
12582 r5
= gen_rtx_EXPR_LIST (VOIDmode
, r5
, GEN_INT (16));
12583 r7
= gen_rtx_REG (DImode
, gregno
+ 6);
12584 r7
= gen_rtx_EXPR_LIST (VOIDmode
, r7
, GEN_INT (24));
12585 return gen_rtx_PARALLEL (mode
, gen_rtvec (4, r1
, r3
, r5
, r7
));
12588 gcc_unreachable ();
12592 /* Determine where to put a SIMD argument on the SPE. */
12594 rs6000_spe_function_arg (const CUMULATIVE_ARGS
*cum
, machine_mode mode
,
12597 int gregno
= cum
->sysv_gregno
;
12599 /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
12600 are passed and returned in a pair of GPRs for ABI compatibility. */
12601 if (TARGET_E500_DOUBLE
&& (mode
== DFmode
|| mode
== TFmode
12602 || mode
== DCmode
|| mode
== TCmode
))
12604 int n_words
= rs6000_arg_size (mode
, type
);
12606 /* Doubles go in an odd/even register pair (r5/r6, etc). */
12607 if (mode
== DFmode
)
12608 gregno
+= (1 - gregno
) & 1;
12610 /* Multi-reg args are not split between registers and stack. */
12611 if (gregno
+ n_words
- 1 > GP_ARG_MAX_REG
)
12614 return spe_build_register_parallel (mode
, gregno
);
12618 int n_words
= rs6000_arg_size (mode
, type
);
12620 /* SPE vectors are put in odd registers. */
12621 if (n_words
== 2 && (gregno
& 1) == 0)
12624 if (gregno
+ n_words
- 1 <= GP_ARG_MAX_REG
)
12627 machine_mode m
= SImode
;
12629 r1
= gen_rtx_REG (m
, gregno
);
12630 r1
= gen_rtx_EXPR_LIST (m
, r1
, const0_rtx
);
12631 r2
= gen_rtx_REG (m
, gregno
+ 1);
12632 r2
= gen_rtx_EXPR_LIST (m
, r2
, GEN_INT (4));
12633 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, r1
, r2
));
12640 if (gregno
<= GP_ARG_MAX_REG
)
12641 return gen_rtx_REG (mode
, gregno
);
12647 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
12648 structure between cum->intoffset and bitpos to integer registers. */
12651 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS
*cum
,
12652 HOST_WIDE_INT bitpos
, rtx rvec
[], int *k
)
12655 unsigned int regno
;
12656 unsigned int startbit
, endbit
;
12657 int this_regno
, intregs
, intoffset
;
12660 if (cum
->intoffset
== -1)
12663 intoffset
= cum
->intoffset
;
12664 cum
->intoffset
= -1;
12666 /* If this is the trailing part of a word, try to only load that
12667 much into the register. Otherwise load the whole register. Note
12668 that in the latter case we may pick up unwanted bits. It's not a
12669 problem at the moment but may wish to revisit. */
12671 if (intoffset
% BITS_PER_WORD
!= 0)
12673 mode
= mode_for_size (BITS_PER_WORD
- intoffset
% BITS_PER_WORD
,
12675 if (mode
== BLKmode
)
12677 /* We couldn't find an appropriate mode, which happens,
12678 e.g., in packed structs when there are 3 bytes to load.
12679 Back intoffset back to the beginning of the word in this
12681 intoffset
= ROUND_DOWN (intoffset
, BITS_PER_WORD
);
12688 startbit
= ROUND_DOWN (intoffset
, BITS_PER_WORD
);
12689 endbit
= ROUND_UP (bitpos
, BITS_PER_WORD
);
12690 intregs
= (endbit
- startbit
) / BITS_PER_WORD
;
12691 this_regno
= cum
->words
+ intoffset
/ BITS_PER_WORD
;
12693 if (intregs
> 0 && intregs
> GP_ARG_NUM_REG
- this_regno
)
12694 cum
->use_stack
= 1;
12696 intregs
= MIN (intregs
, GP_ARG_NUM_REG
- this_regno
);
12700 intoffset
/= BITS_PER_UNIT
;
12703 regno
= GP_ARG_MIN_REG
+ this_regno
;
12704 reg
= gen_rtx_REG (mode
, regno
);
12706 gen_rtx_EXPR_LIST (VOIDmode
, reg
, GEN_INT (intoffset
));
12709 intoffset
= (intoffset
| (UNITS_PER_WORD
-1)) + 1;
12713 while (intregs
> 0);
12716 /* Recursive workhorse for the following. */
12719 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS
*cum
, const_tree type
,
12720 HOST_WIDE_INT startbitpos
, rtx rvec
[],
12725 for (f
= TYPE_FIELDS (type
); f
; f
= DECL_CHAIN (f
))
12726 if (TREE_CODE (f
) == FIELD_DECL
)
12728 HOST_WIDE_INT bitpos
= startbitpos
;
12729 tree ftype
= TREE_TYPE (f
);
12731 if (ftype
== error_mark_node
)
12733 mode
= TYPE_MODE (ftype
);
12735 if (DECL_SIZE (f
) != 0
12736 && tree_fits_uhwi_p (bit_position (f
)))
12737 bitpos
+= int_bit_position (f
);
12739 /* ??? FIXME: else assume zero offset. */
12741 if (TREE_CODE (ftype
) == RECORD_TYPE
)
12742 rs6000_darwin64_record_arg_recurse (cum
, ftype
, bitpos
, rvec
, k
);
12743 else if (cum
->named
&& USE_FP_FOR_ARG_P (cum
, mode
))
12745 unsigned n_fpreg
= (GET_MODE_SIZE (mode
) + 7) >> 3;
12749 case E_SCmode
: mode
= SFmode
; break;
12750 case E_DCmode
: mode
= DFmode
; break;
12751 case E_TCmode
: mode
= TFmode
; break;
12755 rs6000_darwin64_record_arg_flush (cum
, bitpos
, rvec
, k
);
12756 if (cum
->fregno
+ n_fpreg
> FP_ARG_MAX_REG
+ 1)
12758 gcc_assert (cum
->fregno
== FP_ARG_MAX_REG
12759 && (mode
== TFmode
|| mode
== TDmode
));
12760 /* Long double or _Decimal128 split over regs and memory. */
12761 mode
= DECIMAL_FLOAT_MODE_P (mode
) ? DDmode
: DFmode
;
12765 = gen_rtx_EXPR_LIST (VOIDmode
,
12766 gen_rtx_REG (mode
, cum
->fregno
++),
12767 GEN_INT (bitpos
/ BITS_PER_UNIT
));
12768 if (FLOAT128_2REG_P (mode
))
12771 else if (cum
->named
&& USE_ALTIVEC_FOR_ARG_P (cum
, mode
, 1))
12773 rs6000_darwin64_record_arg_flush (cum
, bitpos
, rvec
, k
);
12775 = gen_rtx_EXPR_LIST (VOIDmode
,
12776 gen_rtx_REG (mode
, cum
->vregno
++),
12777 GEN_INT (bitpos
/ BITS_PER_UNIT
));
12779 else if (cum
->intoffset
== -1)
12780 cum
->intoffset
= bitpos
;
12784 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
12785 the register(s) to be used for each field and subfield of a struct
12786 being passed by value, along with the offset of where the
12787 register's value may be found in the block. FP fields go in FP
12788 register, vector fields go in vector registers, and everything
12789 else goes in int registers, packed as in memory.
12791 This code is also used for function return values. RETVAL indicates
12792 whether this is the case.
12794 Much of this is taken from the SPARC V9 port, which has a similar
12795 calling convention. */
12798 rs6000_darwin64_record_arg (CUMULATIVE_ARGS
*orig_cum
, const_tree type
,
12799 bool named
, bool retval
)
12801 rtx rvec
[FIRST_PSEUDO_REGISTER
];
12802 int k
= 1, kbase
= 1;
12803 HOST_WIDE_INT typesize
= int_size_in_bytes (type
);
12804 /* This is a copy; modifications are not visible to our caller. */
12805 CUMULATIVE_ARGS copy_cum
= *orig_cum
;
12806 CUMULATIVE_ARGS
*cum
= ©_cum
;
12808 /* Pad to 16 byte boundary if needed. */
12809 if (!retval
&& TYPE_ALIGN (type
) >= 2 * BITS_PER_WORD
12810 && (cum
->words
% 2) != 0)
12813 cum
->intoffset
= 0;
12814 cum
->use_stack
= 0;
12815 cum
->named
= named
;
12817 /* Put entries into rvec[] for individual FP and vector fields, and
12818 for the chunks of memory that go in int regs. Note we start at
12819 element 1; 0 is reserved for an indication of using memory, and
12820 may or may not be filled in below. */
12821 rs6000_darwin64_record_arg_recurse (cum
, type
, /* startbit pos= */ 0, rvec
, &k
);
12822 rs6000_darwin64_record_arg_flush (cum
, typesize
* BITS_PER_UNIT
, rvec
, &k
);
12824 /* If any part of the struct went on the stack put all of it there.
12825 This hack is because the generic code for
12826 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
12827 parts of the struct are not at the beginning. */
12828 if (cum
->use_stack
)
12831 return NULL_RTX
; /* doesn't go in registers at all */
12833 rvec
[0] = gen_rtx_EXPR_LIST (VOIDmode
, NULL_RTX
, const0_rtx
);
12835 if (k
> 1 || cum
->use_stack
)
12836 return gen_rtx_PARALLEL (BLKmode
, gen_rtvec_v (k
- kbase
, &rvec
[kbase
]));
12841 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
12844 rs6000_mixed_function_arg (machine_mode mode
, const_tree type
,
12849 rtx rvec
[GP_ARG_NUM_REG
+ 1];
12851 if (align_words
>= GP_ARG_NUM_REG
)
12854 n_units
= rs6000_arg_size (mode
, type
);
12856 /* Optimize the simple case where the arg fits in one gpr, except in
12857 the case of BLKmode due to assign_parms assuming that registers are
12858 BITS_PER_WORD wide. */
12860 || (n_units
== 1 && mode
!= BLKmode
))
12861 return gen_rtx_REG (mode
, GP_ARG_MIN_REG
+ align_words
);
12864 if (align_words
+ n_units
> GP_ARG_NUM_REG
)
12865 /* Not all of the arg fits in gprs. Say that it goes in memory too,
12866 using a magic NULL_RTX component.
12867 This is not strictly correct. Only some of the arg belongs in
12868 memory, not all of it. However, the normal scheme using
12869 function_arg_partial_nregs can result in unusual subregs, eg.
12870 (subreg:SI (reg:DF) 4), which are not handled well. The code to
12871 store the whole arg to memory is often more efficient than code
12872 to store pieces, and we know that space is available in the right
12873 place for the whole arg. */
12874 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, NULL_RTX
, const0_rtx
);
12879 rtx r
= gen_rtx_REG (SImode
, GP_ARG_MIN_REG
+ align_words
);
12880 rtx off
= GEN_INT (i
++ * 4);
12881 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, r
, off
);
12883 while (++align_words
< GP_ARG_NUM_REG
&& --n_units
!= 0);
12885 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (k
, rvec
));
12888 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
12889 but must also be copied into the parameter save area starting at
12890 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
12891 to the GPRs and/or memory. Return the number of elements used. */
12894 rs6000_psave_function_arg (machine_mode mode
, const_tree type
,
12895 int align_words
, rtx
*rvec
)
12899 if (align_words
< GP_ARG_NUM_REG
)
12901 int n_words
= rs6000_arg_size (mode
, type
);
12903 if (align_words
+ n_words
> GP_ARG_NUM_REG
12905 || (TARGET_32BIT
&& TARGET_POWERPC64
))
12907 /* If this is partially on the stack, then we only
12908 include the portion actually in registers here. */
12909 machine_mode rmode
= TARGET_32BIT
? SImode
: DImode
;
12912 if (align_words
+ n_words
> GP_ARG_NUM_REG
)
12914 /* Not all of the arg fits in gprs. Say that it goes in memory
12915 too, using a magic NULL_RTX component. Also see comment in
12916 rs6000_mixed_function_arg for why the normal
12917 function_arg_partial_nregs scheme doesn't work in this case. */
12918 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, NULL_RTX
, const0_rtx
);
12923 rtx r
= gen_rtx_REG (rmode
, GP_ARG_MIN_REG
+ align_words
);
12924 rtx off
= GEN_INT (i
++ * GET_MODE_SIZE (rmode
));
12925 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, r
, off
);
12927 while (++align_words
< GP_ARG_NUM_REG
&& --n_words
!= 0);
12931 /* The whole arg fits in gprs. */
12932 rtx r
= gen_rtx_REG (mode
, GP_ARG_MIN_REG
+ align_words
);
12933 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, r
, const0_rtx
);
12938 /* It's entirely in memory. */
12939 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, NULL_RTX
, const0_rtx
);
12945 /* RVEC is a vector of K components of an argument of mode MODE.
12946 Construct the final function_arg return value from it. */
12949 rs6000_finish_function_arg (machine_mode mode
, rtx
*rvec
, int k
)
12951 gcc_assert (k
>= 1);
12953 /* Avoid returning a PARALLEL in the trivial cases. */
12956 if (XEXP (rvec
[0], 0) == NULL_RTX
)
12959 if (GET_MODE (XEXP (rvec
[0], 0)) == mode
)
12960 return XEXP (rvec
[0], 0);
12963 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (k
, rvec
));
12966 /* Determine where to put an argument to a function.
12967 Value is zero to push the argument on the stack,
12968 or a hard register in which to store the argument.
12970 MODE is the argument's machine mode.
12971 TYPE is the data type of the argument (as a tree).
12972 This is null for libcalls where that information may
12974 CUM is a variable of type CUMULATIVE_ARGS which gives info about
12975 the preceding args and about the function being called. It is
12976 not modified in this routine.
12977 NAMED is nonzero if this argument is a named parameter
12978 (otherwise it is an extra parameter matching an ellipsis).
12980 On RS/6000 the first eight words of non-FP are normally in registers
12981 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
12982 Under V.4, the first 8 FP args are in registers.
12984 If this is floating-point and no prototype is specified, we use
12985 both an FP and integer register (or possibly FP reg and stack). Library
12986 functions (when CALL_LIBCALL is set) always have the proper types for args,
12987 so we can pass the FP value just in one register. emit_library_function
12988 doesn't support PARALLEL anyway.
12990 Note that for args passed by reference, function_arg will be called
12991 with MODE and TYPE set to that of the pointer to the arg, not the arg
12995 rs6000_function_arg (cumulative_args_t cum_v
, machine_mode mode
,
12996 const_tree type
, bool named
)
12998 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
12999 enum rs6000_abi abi
= DEFAULT_ABI
;
13000 machine_mode elt_mode
;
13003 /* Return a marker to indicate whether CR1 needs to set or clear the
13004 bit that V.4 uses to say fp args were passed in registers.
13005 Assume that we don't need the marker for software floating point,
13006 or compiler generated library calls. */
13007 if (mode
== VOIDmode
)
13010 && (cum
->call_cookie
& CALL_LIBCALL
) == 0
13012 || (cum
->nargs_prototype
< 0
13013 && (cum
->prototype
|| TARGET_NO_PROTOTYPE
))))
13015 /* For the SPE, we need to crxor CR6 always. */
13016 if (TARGET_SPE_ABI
)
13017 return GEN_INT (cum
->call_cookie
| CALL_V4_SET_FP_ARGS
);
13018 else if (TARGET_HARD_FLOAT
&& TARGET_FPRS
)
13019 return GEN_INT (cum
->call_cookie
13020 | ((cum
->fregno
== FP_ARG_MIN_REG
)
13021 ? CALL_V4_SET_FP_ARGS
13022 : CALL_V4_CLEAR_FP_ARGS
));
13025 return GEN_INT (cum
->call_cookie
& ~CALL_LIBCALL
);
13028 rs6000_discover_homogeneous_aggregate (mode
, type
, &elt_mode
, &n_elts
);
13030 if (TARGET_MACHO
&& rs6000_darwin64_struct_check_p (mode
, type
))
13032 rtx rslt
= rs6000_darwin64_record_arg (cum
, type
, named
, /*retval= */false);
13033 if (rslt
!= NULL_RTX
)
13035 /* Else fall through to usual handling. */
13038 if (USE_ALTIVEC_FOR_ARG_P (cum
, elt_mode
, named
))
13040 rtx rvec
[GP_ARG_NUM_REG
+ AGGR_ARG_NUM_REG
+ 1];
13044 /* Do we also need to pass this argument in the parameter save area?
13045 Library support functions for IEEE 128-bit are assumed to not need the
13046 value passed both in GPRs and in vector registers. */
13047 if (TARGET_64BIT
&& !cum
->prototype
13048 && (!cum
->libcall
|| !FLOAT128_VECTOR_P (elt_mode
)))
13050 int align_words
= ROUND_UP (cum
->words
, 2);
13051 k
= rs6000_psave_function_arg (mode
, type
, align_words
, rvec
);
13054 /* Describe where this argument goes in the vector registers. */
13055 for (i
= 0; i
< n_elts
&& cum
->vregno
+ i
<= ALTIVEC_ARG_MAX_REG
; i
++)
13057 r
= gen_rtx_REG (elt_mode
, cum
->vregno
+ i
);
13058 off
= GEN_INT (i
* GET_MODE_SIZE (elt_mode
));
13059 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, r
, off
);
13062 return rs6000_finish_function_arg (mode
, rvec
, k
);
13064 else if (TARGET_ALTIVEC_ABI
13065 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode
)
13066 || (type
&& TREE_CODE (type
) == VECTOR_TYPE
13067 && int_size_in_bytes (type
) == 16)))
13069 if (named
|| abi
== ABI_V4
)
13073 /* Vector parameters to varargs functions under AIX or Darwin
13074 get passed in memory and possibly also in GPRs. */
13075 int align
, align_words
, n_words
;
13076 machine_mode part_mode
;
13078 /* Vector parameters must be 16-byte aligned. In 32-bit
13079 mode this means we need to take into account the offset
13080 to the parameter save area. In 64-bit mode, they just
13081 have to start on an even word, since the parameter save
13082 area is 16-byte aligned. */
13084 align
= -(rs6000_parm_offset () + cum
->words
) & 3;
13086 align
= cum
->words
& 1;
13087 align_words
= cum
->words
+ align
;
13089 /* Out of registers? Memory, then. */
13090 if (align_words
>= GP_ARG_NUM_REG
)
13093 if (TARGET_32BIT
&& TARGET_POWERPC64
)
13094 return rs6000_mixed_function_arg (mode
, type
, align_words
);
13096 /* The vector value goes in GPRs. Only the part of the
13097 value in GPRs is reported here. */
13099 n_words
= rs6000_arg_size (mode
, type
);
13100 if (align_words
+ n_words
> GP_ARG_NUM_REG
)
13101 /* Fortunately, there are only two possibilities, the value
13102 is either wholly in GPRs or half in GPRs and half not. */
13103 part_mode
= DImode
;
13105 return gen_rtx_REG (part_mode
, GP_ARG_MIN_REG
+ align_words
);
13108 else if (TARGET_SPE_ABI
&& TARGET_SPE
13109 && (SPE_VECTOR_MODE (mode
)
13110 || (TARGET_E500_DOUBLE
&& (mode
== DFmode
13113 || mode
== TCmode
))))
13114 return rs6000_spe_function_arg (cum
, mode
, type
);
13116 else if (abi
== ABI_V4
)
13118 if (abi_v4_pass_in_fpr (mode
))
13120 /* _Decimal128 must use an even/odd register pair. This assumes
13121 that the register number is odd when fregno is odd. */
13122 if (mode
== TDmode
&& (cum
->fregno
% 2) == 1)
13125 if (cum
->fregno
+ (FLOAT128_2REG_P (mode
) ? 1 : 0)
13126 <= FP_ARG_V4_MAX_REG
)
13127 return gen_rtx_REG (mode
, cum
->fregno
);
13133 int n_words
= rs6000_arg_size (mode
, type
);
13134 int gregno
= cum
->sysv_gregno
;
13136 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
13137 (r7,r8) or (r9,r10). As does any other 2 word item such
13138 as complex int due to a historical mistake. */
13140 gregno
+= (1 - gregno
) & 1;
13142 /* Multi-reg args are not split between registers and stack. */
13143 if (gregno
+ n_words
- 1 > GP_ARG_MAX_REG
)
13146 if (TARGET_32BIT
&& TARGET_POWERPC64
)
13147 return rs6000_mixed_function_arg (mode
, type
,
13148 gregno
- GP_ARG_MIN_REG
);
13149 return gen_rtx_REG (mode
, gregno
);
13154 int align_words
= rs6000_parm_start (mode
, type
, cum
->words
);
13156 /* _Decimal128 must be passed in an even/odd float register pair.
13157 This assumes that the register number is odd when fregno is odd. */
13158 if (elt_mode
== TDmode
&& (cum
->fregno
% 2) == 1)
13161 if (USE_FP_FOR_ARG_P (cum
, elt_mode
))
13163 rtx rvec
[GP_ARG_NUM_REG
+ AGGR_ARG_NUM_REG
+ 1];
13166 unsigned long n_fpreg
= (GET_MODE_SIZE (elt_mode
) + 7) >> 3;
13169 /* Do we also need to pass this argument in the parameter
13171 if (type
&& (cum
->nargs_prototype
<= 0
13172 || ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
13173 && TARGET_XL_COMPAT
13174 && align_words
>= GP_ARG_NUM_REG
)))
13175 k
= rs6000_psave_function_arg (mode
, type
, align_words
, rvec
);
13177 /* Describe where this argument goes in the fprs. */
13178 for (i
= 0; i
< n_elts
13179 && cum
->fregno
+ i
* n_fpreg
<= FP_ARG_MAX_REG
; i
++)
13181 /* Check if the argument is split over registers and memory.
13182 This can only ever happen for long double or _Decimal128;
13183 complex types are handled via split_complex_arg. */
13184 machine_mode fmode
= elt_mode
;
13185 if (cum
->fregno
+ (i
+ 1) * n_fpreg
> FP_ARG_MAX_REG
+ 1)
13187 gcc_assert (FLOAT128_2REG_P (fmode
));
13188 fmode
= DECIMAL_FLOAT_MODE_P (fmode
) ? DDmode
: DFmode
;
13191 r
= gen_rtx_REG (fmode
, cum
->fregno
+ i
* n_fpreg
);
13192 off
= GEN_INT (i
* GET_MODE_SIZE (elt_mode
));
13193 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, r
, off
);
13196 /* If there were not enough FPRs to hold the argument, the rest
13197 usually goes into memory. However, if the current position
13198 is still within the register parameter area, a portion may
13199 actually have to go into GPRs.
13201 Note that it may happen that the portion of the argument
13202 passed in the first "half" of the first GPR was already
13203 passed in the last FPR as well.
13205 For unnamed arguments, we already set up GPRs to cover the
13206 whole argument in rs6000_psave_function_arg, so there is
13207 nothing further to do at this point. */
13208 fpr_words
= (i
* GET_MODE_SIZE (elt_mode
)) / (TARGET_32BIT
? 4 : 8);
13209 if (i
< n_elts
&& align_words
+ fpr_words
< GP_ARG_NUM_REG
13210 && cum
->nargs_prototype
> 0)
13212 static bool warned
;
13214 machine_mode rmode
= TARGET_32BIT
? SImode
: DImode
;
13215 int n_words
= rs6000_arg_size (mode
, type
);
13217 align_words
+= fpr_words
;
13218 n_words
-= fpr_words
;
13222 r
= gen_rtx_REG (rmode
, GP_ARG_MIN_REG
+ align_words
);
13223 off
= GEN_INT (fpr_words
++ * GET_MODE_SIZE (rmode
));
13224 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, r
, off
);
13226 while (++align_words
< GP_ARG_NUM_REG
&& --n_words
!= 0);
13228 if (!warned
&& warn_psabi
)
13231 inform (input_location
,
13232 "the ABI of passing homogeneous float aggregates"
13233 " has changed in GCC 5");
13237 return rs6000_finish_function_arg (mode
, rvec
, k
);
13239 else if (align_words
< GP_ARG_NUM_REG
)
13241 if (TARGET_32BIT
&& TARGET_POWERPC64
)
13242 return rs6000_mixed_function_arg (mode
, type
, align_words
);
13244 return gen_rtx_REG (mode
, GP_ARG_MIN_REG
+ align_words
);
13251 /* For an arg passed partly in registers and partly in memory, this is
13252 the number of bytes passed in registers. For args passed entirely in
13253 registers or entirely in memory, zero. When an arg is described by a
13254 PARALLEL, perhaps using more than one register type, this function
13255 returns the number of bytes used by the first element of the PARALLEL. */
13258 rs6000_arg_partial_bytes (cumulative_args_t cum_v
, machine_mode mode
,
13259 tree type
, bool named
)
13261 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
13262 bool passed_in_gprs
= true;
13265 machine_mode elt_mode
;
13268 rs6000_discover_homogeneous_aggregate (mode
, type
, &elt_mode
, &n_elts
);
13270 if (DEFAULT_ABI
== ABI_V4
)
13273 if (USE_ALTIVEC_FOR_ARG_P (cum
, elt_mode
, named
))
13275 /* If we are passing this arg in the fixed parameter save area (gprs or
13276 memory) as well as VRs, we do not use the partial bytes mechanism;
13277 instead, rs6000_function_arg will return a PARALLEL including a memory
13278 element as necessary. Library support functions for IEEE 128-bit are
13279 assumed to not need the value passed both in GPRs and in vector
13281 if (TARGET_64BIT
&& !cum
->prototype
13282 && (!cum
->libcall
|| !FLOAT128_VECTOR_P (elt_mode
)))
13285 /* Otherwise, we pass in VRs only. Check for partial copies. */
13286 passed_in_gprs
= false;
13287 if (cum
->vregno
+ n_elts
> ALTIVEC_ARG_MAX_REG
+ 1)
13288 ret
= (ALTIVEC_ARG_MAX_REG
+ 1 - cum
->vregno
) * 16;
13291 /* In this complicated case we just disable the partial_nregs code. */
13292 if (TARGET_MACHO
&& rs6000_darwin64_struct_check_p (mode
, type
))
13295 align_words
= rs6000_parm_start (mode
, type
, cum
->words
);
13297 if (USE_FP_FOR_ARG_P (cum
, elt_mode
))
13299 unsigned long n_fpreg
= (GET_MODE_SIZE (elt_mode
) + 7) >> 3;
13301 /* If we are passing this arg in the fixed parameter save area
13302 (gprs or memory) as well as FPRs, we do not use the partial
13303 bytes mechanism; instead, rs6000_function_arg will return a
13304 PARALLEL including a memory element as necessary. */
13306 && (cum
->nargs_prototype
<= 0
13307 || ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
13308 && TARGET_XL_COMPAT
13309 && align_words
>= GP_ARG_NUM_REG
)))
13312 /* Otherwise, we pass in FPRs only. Check for partial copies. */
13313 passed_in_gprs
= false;
13314 if (cum
->fregno
+ n_elts
* n_fpreg
> FP_ARG_MAX_REG
+ 1)
13316 /* Compute number of bytes / words passed in FPRs. If there
13317 is still space available in the register parameter area
13318 *after* that amount, a part of the argument will be passed
13319 in GPRs. In that case, the total amount passed in any
13320 registers is equal to the amount that would have been passed
13321 in GPRs if everything were passed there, so we fall back to
13322 the GPR code below to compute the appropriate value. */
13323 int fpr
= ((FP_ARG_MAX_REG
+ 1 - cum
->fregno
)
13324 * MIN (8, GET_MODE_SIZE (elt_mode
)));
13325 int fpr_words
= fpr
/ (TARGET_32BIT
? 4 : 8);
13327 if (align_words
+ fpr_words
< GP_ARG_NUM_REG
)
13328 passed_in_gprs
= true;
13335 && align_words
< GP_ARG_NUM_REG
13336 && GP_ARG_NUM_REG
< align_words
+ rs6000_arg_size (mode
, type
))
13337 ret
= (GP_ARG_NUM_REG
- align_words
) * (TARGET_32BIT
? 4 : 8);
13339 if (ret
!= 0 && TARGET_DEBUG_ARG
)
13340 fprintf (stderr
, "rs6000_arg_partial_bytes: %d\n", ret
);
13345 /* A C expression that indicates when an argument must be passed by
13346 reference. If nonzero for an argument, a copy of that argument is
13347 made in memory and a pointer to the argument is passed instead of
13348 the argument itself. The pointer is passed in whatever way is
13349 appropriate for passing a pointer to that type.
13351 Under V.4, aggregates and long double are passed by reference.
13353 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
13354 reference unless the AltiVec vector extension ABI is in force.
13356 As an extension to all ABIs, variable sized types are passed by
13360 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
13361 machine_mode mode
, const_tree type
,
13362 bool named ATTRIBUTE_UNUSED
)
13367 if (DEFAULT_ABI
== ABI_V4
&& TARGET_IEEEQUAD
13368 && FLOAT128_IEEE_P (TYPE_MODE (type
)))
13370 if (TARGET_DEBUG_ARG
)
13371 fprintf (stderr
, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
13375 if (DEFAULT_ABI
== ABI_V4
&& AGGREGATE_TYPE_P (type
))
13377 if (TARGET_DEBUG_ARG
)
13378 fprintf (stderr
, "function_arg_pass_by_reference: V4 aggregate\n");
13382 if (int_size_in_bytes (type
) < 0)
13384 if (TARGET_DEBUG_ARG
)
13385 fprintf (stderr
, "function_arg_pass_by_reference: variable size\n");
13389 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
13390 modes only exist for GCC vector types if -maltivec. */
13391 if (TARGET_32BIT
&& !TARGET_ALTIVEC_ABI
&& ALTIVEC_VECTOR_MODE (mode
))
13393 if (TARGET_DEBUG_ARG
)
13394 fprintf (stderr
, "function_arg_pass_by_reference: AltiVec\n");
13398 /* Pass synthetic vectors in memory. */
13399 if (TREE_CODE (type
) == VECTOR_TYPE
13400 && int_size_in_bytes (type
) > (TARGET_ALTIVEC_ABI
? 16 : 8))
13402 static bool warned_for_pass_big_vectors
= false;
13403 if (TARGET_DEBUG_ARG
)
13404 fprintf (stderr
, "function_arg_pass_by_reference: synthetic vector\n");
13405 if (!warned_for_pass_big_vectors
)
13407 warning (OPT_Wpsabi
, "GCC vector passed by reference: "
13408 "non-standard ABI extension with no compatibility guarantee");
13409 warned_for_pass_big_vectors
= true;
13417 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
13418 already processes. Return true if the parameter must be passed
13419 (fully or partially) on the stack. */
13422 rs6000_parm_needs_stack (cumulative_args_t args_so_far
, tree type
)
13428 /* Catch errors. */
13429 if (type
== NULL
|| type
== error_mark_node
)
13432 /* Handle types with no storage requirement. */
13433 if (TYPE_MODE (type
) == VOIDmode
)
13436 /* Handle complex types. */
13437 if (TREE_CODE (type
) == COMPLEX_TYPE
)
13438 return (rs6000_parm_needs_stack (args_so_far
, TREE_TYPE (type
))
13439 || rs6000_parm_needs_stack (args_so_far
, TREE_TYPE (type
)));
13441 /* Handle transparent aggregates. */
13442 if ((TREE_CODE (type
) == UNION_TYPE
|| TREE_CODE (type
) == RECORD_TYPE
)
13443 && TYPE_TRANSPARENT_AGGR (type
))
13444 type
= TREE_TYPE (first_field (type
));
13446 /* See if this arg was passed by invisible reference. */
13447 if (pass_by_reference (get_cumulative_args (args_so_far
),
13448 TYPE_MODE (type
), type
, true))
13449 type
= build_pointer_type (type
);
13451 /* Find mode as it is passed by the ABI. */
13452 unsignedp
= TYPE_UNSIGNED (type
);
13453 mode
= promote_mode (type
, TYPE_MODE (type
), &unsignedp
);
13455 /* If we must pass in stack, we need a stack. */
13456 if (rs6000_must_pass_in_stack (mode
, type
))
13459 /* If there is no incoming register, we need a stack. */
13460 entry_parm
= rs6000_function_arg (args_so_far
, mode
, type
, true);
13461 if (entry_parm
== NULL
)
13464 /* Likewise if we need to pass both in registers and on the stack. */
13465 if (GET_CODE (entry_parm
) == PARALLEL
13466 && XEXP (XVECEXP (entry_parm
, 0, 0), 0) == NULL_RTX
)
13469 /* Also true if we're partially in registers and partially not. */
13470 if (rs6000_arg_partial_bytes (args_so_far
, mode
, type
, true) != 0)
13473 /* Update info on where next arg arrives in registers. */
13474 rs6000_function_arg_advance (args_so_far
, mode
, type
, true);
13478 /* Return true if FUN has no prototype, has a variable argument
13479 list, or passes any parameter in memory. */
13482 rs6000_function_parms_need_stack (tree fun
, bool incoming
)
13484 tree fntype
, result
;
13485 CUMULATIVE_ARGS args_so_far_v
;
13486 cumulative_args_t args_so_far
;
13489 /* Must be a libcall, all of which only use reg parms. */
13494 fntype
= TREE_TYPE (fun
);
13496 /* Varargs functions need the parameter save area. */
13497 if ((!incoming
&& !prototype_p (fntype
)) || stdarg_p (fntype
))
13500 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v
, fntype
, NULL_RTX
);
13501 args_so_far
= pack_cumulative_args (&args_so_far_v
);
13503 /* When incoming, we will have been passed the function decl.
13504 It is necessary to use the decl to handle K&R style functions,
13505 where TYPE_ARG_TYPES may not be available. */
13508 gcc_assert (DECL_P (fun
));
13509 result
= DECL_RESULT (fun
);
13512 result
= TREE_TYPE (fntype
);
13514 if (result
&& aggregate_value_p (result
, fntype
))
13516 if (!TYPE_P (result
))
13517 result
= TREE_TYPE (result
);
13518 result
= build_pointer_type (result
);
13519 rs6000_parm_needs_stack (args_so_far
, result
);
13526 for (parm
= DECL_ARGUMENTS (fun
);
13527 parm
&& parm
!= void_list_node
;
13528 parm
= TREE_CHAIN (parm
))
13529 if (rs6000_parm_needs_stack (args_so_far
, TREE_TYPE (parm
)))
13534 function_args_iterator args_iter
;
13537 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
13538 if (rs6000_parm_needs_stack (args_so_far
, arg_type
))
13545 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
13546 usually a constant depending on the ABI. However, in the ELFv2 ABI
13547 the register parameter area is optional when calling a function that
13548 has a prototype is scope, has no variable argument list, and passes
13549 all parameters in registers. */
13552 rs6000_reg_parm_stack_space (tree fun
, bool incoming
)
13554 int reg_parm_stack_space
;
13556 switch (DEFAULT_ABI
)
13559 reg_parm_stack_space
= 0;
13564 reg_parm_stack_space
= TARGET_64BIT
? 64 : 32;
13568 /* ??? Recomputing this every time is a bit expensive. Is there
13569 a place to cache this information? */
13570 if (rs6000_function_parms_need_stack (fun
, incoming
))
13571 reg_parm_stack_space
= TARGET_64BIT
? 64 : 32;
13573 reg_parm_stack_space
= 0;
13577 return reg_parm_stack_space
;
13581 rs6000_move_block_from_reg (int regno
, rtx x
, int nregs
)
13584 machine_mode reg_mode
= TARGET_32BIT
? SImode
: DImode
;
13589 for (i
= 0; i
< nregs
; i
++)
13591 rtx tem
= adjust_address_nv (x
, reg_mode
, i
* GET_MODE_SIZE (reg_mode
));
13592 if (reload_completed
)
13594 if (! strict_memory_address_p (reg_mode
, XEXP (tem
, 0)))
13597 tem
= simplify_gen_subreg (reg_mode
, x
, BLKmode
,
13598 i
* GET_MODE_SIZE (reg_mode
));
13601 tem
= replace_equiv_address (tem
, XEXP (tem
, 0));
13605 emit_move_insn (tem
, gen_rtx_REG (reg_mode
, regno
+ i
));
13609 /* Perform any needed actions needed for a function that is receiving a
13610 variable number of arguments.
13614 MODE and TYPE are the mode and type of the current parameter.
13616 PRETEND_SIZE is a variable that should be set to the amount of stack
13617 that must be pushed by the prolog to pretend that our caller pushed
13620 Normally, this macro will push all remaining incoming registers on the
13621 stack and set PRETEND_SIZE to the length of the registers pushed. */
13624 setup_incoming_varargs (cumulative_args_t cum
, machine_mode mode
,
13625 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
13628 CUMULATIVE_ARGS next_cum
;
13629 int reg_size
= TARGET_32BIT
? 4 : 8;
13630 rtx save_area
= NULL_RTX
, mem
;
13631 int first_reg_offset
;
13632 alias_set_type set
;
13634 /* Skip the last named argument. */
13635 next_cum
= *get_cumulative_args (cum
);
13636 rs6000_function_arg_advance_1 (&next_cum
, mode
, type
, true, 0);
13638 if (DEFAULT_ABI
== ABI_V4
)
13640 first_reg_offset
= next_cum
.sysv_gregno
- GP_ARG_MIN_REG
;
13644 int gpr_reg_num
= 0, gpr_size
= 0, fpr_size
= 0;
13645 HOST_WIDE_INT offset
= 0;
13647 /* Try to optimize the size of the varargs save area.
13648 The ABI requires that ap.reg_save_area is doubleword
13649 aligned, but we don't need to allocate space for all
13650 the bytes, only those to which we actually will save
13652 if (cfun
->va_list_gpr_size
&& first_reg_offset
< GP_ARG_NUM_REG
)
13653 gpr_reg_num
= GP_ARG_NUM_REG
- first_reg_offset
;
13654 if (TARGET_HARD_FLOAT
&& TARGET_FPRS
13655 && next_cum
.fregno
<= FP_ARG_V4_MAX_REG
13656 && cfun
->va_list_fpr_size
)
13659 fpr_size
= (next_cum
.fregno
- FP_ARG_MIN_REG
)
13660 * UNITS_PER_FP_WORD
;
13661 if (cfun
->va_list_fpr_size
13662 < FP_ARG_V4_MAX_REG
+ 1 - next_cum
.fregno
)
13663 fpr_size
+= cfun
->va_list_fpr_size
* UNITS_PER_FP_WORD
;
13665 fpr_size
+= (FP_ARG_V4_MAX_REG
+ 1 - next_cum
.fregno
)
13666 * UNITS_PER_FP_WORD
;
13670 offset
= -((first_reg_offset
* reg_size
) & ~7);
13671 if (!fpr_size
&& gpr_reg_num
> cfun
->va_list_gpr_size
)
13673 gpr_reg_num
= cfun
->va_list_gpr_size
;
13674 if (reg_size
== 4 && (first_reg_offset
& 1))
13677 gpr_size
= (gpr_reg_num
* reg_size
+ 7) & ~7;
13680 offset
= - (int) (next_cum
.fregno
- FP_ARG_MIN_REG
)
13681 * UNITS_PER_FP_WORD
13682 - (int) (GP_ARG_NUM_REG
* reg_size
);
13684 if (gpr_size
+ fpr_size
)
13687 = assign_stack_local (BLKmode
, gpr_size
+ fpr_size
, 64);
13688 gcc_assert (GET_CODE (reg_save_area
) == MEM
);
13689 reg_save_area
= XEXP (reg_save_area
, 0);
13690 if (GET_CODE (reg_save_area
) == PLUS
)
13692 gcc_assert (XEXP (reg_save_area
, 0)
13693 == virtual_stack_vars_rtx
);
13694 gcc_assert (GET_CODE (XEXP (reg_save_area
, 1)) == CONST_INT
);
13695 offset
+= INTVAL (XEXP (reg_save_area
, 1));
13698 gcc_assert (reg_save_area
== virtual_stack_vars_rtx
);
13701 cfun
->machine
->varargs_save_offset
= offset
;
13702 save_area
= plus_constant (Pmode
, virtual_stack_vars_rtx
, offset
);
13707 first_reg_offset
= next_cum
.words
;
13708 save_area
= crtl
->args
.internal_arg_pointer
;
13710 if (targetm
.calls
.must_pass_in_stack (mode
, type
))
13711 first_reg_offset
+= rs6000_arg_size (TYPE_MODE (type
), type
);
13714 set
= get_varargs_alias_set ();
13715 if (! no_rtl
&& first_reg_offset
< GP_ARG_NUM_REG
13716 && cfun
->va_list_gpr_size
)
13718 int n_gpr
, nregs
= GP_ARG_NUM_REG
- first_reg_offset
;
13720 if (va_list_gpr_counter_field
)
13721 /* V4 va_list_gpr_size counts number of registers needed. */
13722 n_gpr
= cfun
->va_list_gpr_size
;
13724 /* char * va_list instead counts number of bytes needed. */
13725 n_gpr
= (cfun
->va_list_gpr_size
+ reg_size
- 1) / reg_size
;
13730 mem
= gen_rtx_MEM (BLKmode
,
13731 plus_constant (Pmode
, save_area
,
13732 first_reg_offset
* reg_size
));
13733 MEM_NOTRAP_P (mem
) = 1;
13734 set_mem_alias_set (mem
, set
);
13735 set_mem_align (mem
, BITS_PER_WORD
);
13737 rs6000_move_block_from_reg (GP_ARG_MIN_REG
+ first_reg_offset
, mem
,
13741 /* Save FP registers if needed. */
13742 if (DEFAULT_ABI
== ABI_V4
13743 && TARGET_HARD_FLOAT
&& TARGET_FPRS
13745 && next_cum
.fregno
<= FP_ARG_V4_MAX_REG
13746 && cfun
->va_list_fpr_size
)
13748 int fregno
= next_cum
.fregno
, nregs
;
13749 rtx cr1
= gen_rtx_REG (CCmode
, CR1_REGNO
);
13750 rtx lab
= gen_label_rtx ();
13751 int off
= (GP_ARG_NUM_REG
* reg_size
) + ((fregno
- FP_ARG_MIN_REG
)
13752 * UNITS_PER_FP_WORD
);
13755 (gen_rtx_SET (pc_rtx
,
13756 gen_rtx_IF_THEN_ELSE (VOIDmode
,
13757 gen_rtx_NE (VOIDmode
, cr1
,
13759 gen_rtx_LABEL_REF (VOIDmode
, lab
),
13763 fregno
<= FP_ARG_V4_MAX_REG
&& nregs
< cfun
->va_list_fpr_size
;
13764 fregno
++, off
+= UNITS_PER_FP_WORD
, nregs
++)
13766 mem
= gen_rtx_MEM ((TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
)
13768 plus_constant (Pmode
, save_area
, off
));
13769 MEM_NOTRAP_P (mem
) = 1;
13770 set_mem_alias_set (mem
, set
);
13771 set_mem_align (mem
, GET_MODE_ALIGNMENT (
13772 (TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
)
13773 ? DFmode
: SFmode
));
13774 emit_move_insn (mem
, gen_rtx_REG (
13775 (TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
)
13776 ? DFmode
: SFmode
, fregno
));
13783 /* Create the va_list data type. */
13786 rs6000_build_builtin_va_list (void)
13788 tree f_gpr
, f_fpr
, f_res
, f_ovf
, f_sav
, record
, type_decl
;
13790 /* For AIX, prefer 'char *' because that's what the system
13791 header files like. */
13792 if (DEFAULT_ABI
!= ABI_V4
)
13793 return build_pointer_type (char_type_node
);
13795 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
13796 type_decl
= build_decl (BUILTINS_LOCATION
, TYPE_DECL
,
13797 get_identifier ("__va_list_tag"), record
);
13799 f_gpr
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
, get_identifier ("gpr"),
13800 unsigned_char_type_node
);
13801 f_fpr
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
, get_identifier ("fpr"),
13802 unsigned_char_type_node
);
13803 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
13804 every user file. */
13805 f_res
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
13806 get_identifier ("reserved"), short_unsigned_type_node
);
13807 f_ovf
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
13808 get_identifier ("overflow_arg_area"),
13810 f_sav
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
13811 get_identifier ("reg_save_area"),
13814 va_list_gpr_counter_field
= f_gpr
;
13815 va_list_fpr_counter_field
= f_fpr
;
13817 DECL_FIELD_CONTEXT (f_gpr
) = record
;
13818 DECL_FIELD_CONTEXT (f_fpr
) = record
;
13819 DECL_FIELD_CONTEXT (f_res
) = record
;
13820 DECL_FIELD_CONTEXT (f_ovf
) = record
;
13821 DECL_FIELD_CONTEXT (f_sav
) = record
;
13823 TYPE_STUB_DECL (record
) = type_decl
;
13824 TYPE_NAME (record
) = type_decl
;
13825 TYPE_FIELDS (record
) = f_gpr
;
13826 DECL_CHAIN (f_gpr
) = f_fpr
;
13827 DECL_CHAIN (f_fpr
) = f_res
;
13828 DECL_CHAIN (f_res
) = f_ovf
;
13829 DECL_CHAIN (f_ovf
) = f_sav
;
13831 layout_type (record
);
13833 /* The correct type is an array type of one element. */
13834 return build_array_type (record
, build_index_type (size_zero_node
));
13837 /* Implement va_start. */
13840 rs6000_va_start (tree valist
, rtx nextarg
)
13842 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
13843 tree f_gpr
, f_fpr
, f_res
, f_ovf
, f_sav
;
13844 tree gpr
, fpr
, ovf
, sav
, t
;
13846 /* Only SVR4 needs something special. */
13847 if (DEFAULT_ABI
!= ABI_V4
)
13849 std_expand_builtin_va_start (valist
, nextarg
);
13853 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
13854 f_fpr
= DECL_CHAIN (f_gpr
);
13855 f_res
= DECL_CHAIN (f_fpr
);
13856 f_ovf
= DECL_CHAIN (f_res
);
13857 f_sav
= DECL_CHAIN (f_ovf
);
13859 valist
= build_simple_mem_ref (valist
);
13860 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
13861 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
13863 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
13865 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
13868 /* Count number of gp and fp argument registers used. */
13869 words
= crtl
->args
.info
.words
;
13870 n_gpr
= MIN (crtl
->args
.info
.sysv_gregno
- GP_ARG_MIN_REG
,
13872 n_fpr
= MIN (crtl
->args
.info
.fregno
- FP_ARG_MIN_REG
,
13875 if (TARGET_DEBUG_ARG
)
13876 fprintf (stderr
, "va_start: words = " HOST_WIDE_INT_PRINT_DEC
", n_gpr = "
13877 HOST_WIDE_INT_PRINT_DEC
", n_fpr = " HOST_WIDE_INT_PRINT_DEC
"\n",
13878 words
, n_gpr
, n_fpr
);
13880 if (cfun
->va_list_gpr_size
)
13882 t
= build2 (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
,
13883 build_int_cst (NULL_TREE
, n_gpr
));
13884 TREE_SIDE_EFFECTS (t
) = 1;
13885 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
13888 if (cfun
->va_list_fpr_size
)
13890 t
= build2 (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
,
13891 build_int_cst (NULL_TREE
, n_fpr
));
13892 TREE_SIDE_EFFECTS (t
) = 1;
13893 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
13895 #ifdef HAVE_AS_GNU_ATTRIBUTE
13896 if (call_ABI_of_interest (cfun
->decl
))
13897 rs6000_passes_float
= true;
13901 /* Find the overflow area. */
13902 t
= make_tree (TREE_TYPE (ovf
), crtl
->args
.internal_arg_pointer
);
13904 t
= fold_build_pointer_plus_hwi (t
, words
* MIN_UNITS_PER_WORD
);
13905 t
= build2 (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
13906 TREE_SIDE_EFFECTS (t
) = 1;
13907 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
13909 /* If there were no va_arg invocations, don't set up the register
13911 if (!cfun
->va_list_gpr_size
13912 && !cfun
->va_list_fpr_size
13913 && n_gpr
< GP_ARG_NUM_REG
13914 && n_fpr
< FP_ARG_V4_MAX_REG
)
13917 /* Find the register save area. */
13918 t
= make_tree (TREE_TYPE (sav
), virtual_stack_vars_rtx
);
13919 if (cfun
->machine
->varargs_save_offset
)
13920 t
= fold_build_pointer_plus_hwi (t
, cfun
->machine
->varargs_save_offset
);
13921 t
= build2 (MODIFY_EXPR
, TREE_TYPE (sav
), sav
, t
);
13922 TREE_SIDE_EFFECTS (t
) = 1;
13923 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
13926 /* Implement va_arg. */
13929 rs6000_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
13930 gimple_seq
*post_p
)
13932 tree f_gpr
, f_fpr
, f_res
, f_ovf
, f_sav
;
13933 tree gpr
, fpr
, ovf
, sav
, reg
, t
, u
;
13934 int size
, rsize
, n_reg
, sav_ofs
, sav_scale
;
13935 tree lab_false
, lab_over
, addr
;
13937 tree ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
13941 if (pass_by_reference (NULL
, TYPE_MODE (type
), type
, false))
13943 t
= rs6000_gimplify_va_arg (valist
, ptrtype
, pre_p
, post_p
);
13944 return build_va_arg_indirect_ref (t
);
13947 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
13948 earlier version of gcc, with the property that it always applied alignment
13949 adjustments to the va-args (even for zero-sized types). The cheapest way
13950 to deal with this is to replicate the effect of the part of
13951 std_gimplify_va_arg_expr that carries out the align adjust, for the case
13953 We don't need to check for pass-by-reference because of the test above.
13954 We can return a simplifed answer, since we know there's no offset to add. */
13957 && rs6000_darwin64_abi
)
13958 || DEFAULT_ABI
== ABI_ELFv2
13959 || (DEFAULT_ABI
== ABI_AIX
&& !rs6000_compat_align_parm
))
13960 && integer_zerop (TYPE_SIZE (type
)))
13962 unsigned HOST_WIDE_INT align
, boundary
;
13963 tree valist_tmp
= get_initialized_tmp_var (valist
, pre_p
, NULL
);
13964 align
= PARM_BOUNDARY
/ BITS_PER_UNIT
;
13965 boundary
= rs6000_function_arg_boundary (TYPE_MODE (type
), type
);
13966 if (boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
13967 boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
13968 boundary
/= BITS_PER_UNIT
;
13969 if (boundary
> align
)
13972 /* This updates arg ptr by the amount that would be necessary
13973 to align the zero-sized (but not zero-alignment) item. */
13974 t
= build2 (MODIFY_EXPR
, TREE_TYPE (valist
), valist_tmp
,
13975 fold_build_pointer_plus_hwi (valist_tmp
, boundary
- 1));
13976 gimplify_and_add (t
, pre_p
);
13978 t
= fold_convert (sizetype
, valist_tmp
);
13979 t
= build2 (MODIFY_EXPR
, TREE_TYPE (valist
), valist_tmp
,
13980 fold_convert (TREE_TYPE (valist
),
13981 fold_build2 (BIT_AND_EXPR
, sizetype
, t
,
13982 size_int (-boundary
))));
13983 t
= build2 (MODIFY_EXPR
, TREE_TYPE (valist
), valist
, t
);
13984 gimplify_and_add (t
, pre_p
);
13986 /* Since it is zero-sized there's no increment for the item itself. */
13987 valist_tmp
= fold_convert (build_pointer_type (type
), valist_tmp
);
13988 return build_va_arg_indirect_ref (valist_tmp
);
13991 if (DEFAULT_ABI
!= ABI_V4
)
13993 if (targetm
.calls
.split_complex_arg
&& TREE_CODE (type
) == COMPLEX_TYPE
)
13995 tree elem_type
= TREE_TYPE (type
);
13996 machine_mode elem_mode
= TYPE_MODE (elem_type
);
13997 int elem_size
= GET_MODE_SIZE (elem_mode
);
13999 if (elem_size
< UNITS_PER_WORD
)
14001 tree real_part
, imag_part
;
14002 gimple_seq post
= NULL
;
14004 real_part
= rs6000_gimplify_va_arg (valist
, elem_type
, pre_p
,
14006 /* Copy the value into a temporary, lest the formal temporary
14007 be reused out from under us. */
14008 real_part
= get_initialized_tmp_var (real_part
, pre_p
, &post
);
14009 gimple_seq_add_seq (pre_p
, post
);
14011 imag_part
= rs6000_gimplify_va_arg (valist
, elem_type
, pre_p
,
14014 return build2 (COMPLEX_EXPR
, type
, real_part
, imag_part
);
14018 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
14021 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
14022 f_fpr
= DECL_CHAIN (f_gpr
);
14023 f_res
= DECL_CHAIN (f_fpr
);
14024 f_ovf
= DECL_CHAIN (f_res
);
14025 f_sav
= DECL_CHAIN (f_ovf
);
14027 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
14028 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
14030 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
14032 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
14035 size
= int_size_in_bytes (type
);
14036 rsize
= (size
+ 3) / 4;
14037 int pad
= 4 * rsize
- size
;
14040 machine_mode mode
= TYPE_MODE (type
);
14041 if (abi_v4_pass_in_fpr (mode
))
14043 /* FP args go in FP registers, if present. */
14045 n_reg
= (size
+ 7) / 8;
14046 sav_ofs
= ((TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
) ? 8 : 4) * 4;
14047 sav_scale
= ((TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
) ? 8 : 4);
14048 if (mode
!= SFmode
&& mode
!= SDmode
)
14053 /* Otherwise into GP registers. */
14062 /* Pull the value out of the saved registers.... */
14065 addr
= create_tmp_var (ptr_type_node
, "addr");
14067 /* AltiVec vectors never go in registers when -mabi=altivec. */
14068 if (TARGET_ALTIVEC_ABI
&& ALTIVEC_VECTOR_MODE (mode
))
14072 lab_false
= create_artificial_label (input_location
);
14073 lab_over
= create_artificial_label (input_location
);
14075 /* Long long and SPE vectors are aligned in the registers.
14076 As are any other 2 gpr item such as complex int due to a
14077 historical mistake. */
14079 if (n_reg
== 2 && reg
== gpr
)
14082 u
= build2 (BIT_AND_EXPR
, TREE_TYPE (reg
), unshare_expr (reg
),
14083 build_int_cst (TREE_TYPE (reg
), n_reg
- 1));
14084 u
= build2 (POSTINCREMENT_EXPR
, TREE_TYPE (reg
),
14085 unshare_expr (reg
), u
);
14087 /* _Decimal128 is passed in even/odd fpr pairs; the stored
14088 reg number is 0 for f1, so we want to make it odd. */
14089 else if (reg
== fpr
&& mode
== TDmode
)
14091 t
= build2 (BIT_IOR_EXPR
, TREE_TYPE (reg
), unshare_expr (reg
),
14092 build_int_cst (TREE_TYPE (reg
), 1));
14093 u
= build2 (MODIFY_EXPR
, void_type_node
, unshare_expr (reg
), t
);
14096 t
= fold_convert (TREE_TYPE (reg
), size_int (8 - n_reg
+ 1));
14097 t
= build2 (GE_EXPR
, boolean_type_node
, u
, t
);
14098 u
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
14099 t
= build3 (COND_EXPR
, void_type_node
, t
, u
, NULL_TREE
);
14100 gimplify_and_add (t
, pre_p
);
14104 t
= fold_build_pointer_plus_hwi (sav
, sav_ofs
);
14106 u
= build2 (POSTINCREMENT_EXPR
, TREE_TYPE (reg
), unshare_expr (reg
),
14107 build_int_cst (TREE_TYPE (reg
), n_reg
));
14108 u
= fold_convert (sizetype
, u
);
14109 u
= build2 (MULT_EXPR
, sizetype
, u
, size_int (sav_scale
));
14110 t
= fold_build_pointer_plus (t
, u
);
14112 /* _Decimal32 varargs are located in the second word of the 64-bit
14113 FP register for 32-bit binaries. */
14115 && TARGET_HARD_FLOAT
&& TARGET_FPRS
14117 t
= fold_build_pointer_plus_hwi (t
, size
);
14119 /* Args are passed right-aligned. */
14120 if (BYTES_BIG_ENDIAN
)
14121 t
= fold_build_pointer_plus_hwi (t
, pad
);
14123 gimplify_assign (addr
, t
, pre_p
);
14125 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
14127 stmt
= gimple_build_label (lab_false
);
14128 gimple_seq_add_stmt (pre_p
, stmt
);
14130 if ((n_reg
== 2 && !regalign
) || n_reg
> 2)
14132 /* Ensure that we don't find any more args in regs.
14133 Alignment has taken care of for special cases. */
14134 gimplify_assign (reg
, build_int_cst (TREE_TYPE (reg
), 8), pre_p
);
14138 /* ... otherwise out of the overflow area. */
14140 /* Care for on-stack alignment if needed. */
14144 t
= fold_build_pointer_plus_hwi (t
, align
- 1);
14145 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
14146 build_int_cst (TREE_TYPE (t
), -align
));
14149 /* Args are passed right-aligned. */
14150 if (BYTES_BIG_ENDIAN
)
14151 t
= fold_build_pointer_plus_hwi (t
, pad
);
14153 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
14155 gimplify_assign (unshare_expr (addr
), t
, pre_p
);
14157 t
= fold_build_pointer_plus_hwi (t
, size
);
14158 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
14162 stmt
= gimple_build_label (lab_over
);
14163 gimple_seq_add_stmt (pre_p
, stmt
);
14166 if (STRICT_ALIGNMENT
14167 && (TYPE_ALIGN (type
)
14168 > (unsigned) BITS_PER_UNIT
* (align
< 4 ? 4 : align
)))
14170 /* The value (of type complex double, for example) may not be
14171 aligned in memory in the saved registers, so copy via a
14172 temporary. (This is the same code as used for SPARC.) */
14173 tree tmp
= create_tmp_var (type
, "va_arg_tmp");
14174 tree dest_addr
= build_fold_addr_expr (tmp
);
14176 tree copy
= build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
14177 3, dest_addr
, addr
, size_int (rsize
* 4));
14179 gimplify_and_add (copy
, pre_p
);
14183 addr
= fold_convert (ptrtype
, addr
);
14184 return build_va_arg_indirect_ref (addr
);
14190 def_builtin (const char *name
, tree type
, enum rs6000_builtins code
)
14193 unsigned classify
= rs6000_builtin_info
[(int)code
].attr
;
14194 const char *attr_string
= "";
14196 gcc_assert (name
!= NULL
);
14197 gcc_assert (IN_RANGE ((int)code
, 0, (int)RS6000_BUILTIN_COUNT
));
14199 if (rs6000_builtin_decls
[(int)code
])
14200 fatal_error (input_location
,
14201 "internal error: builtin function %s already processed", name
);
14203 rs6000_builtin_decls
[(int)code
] = t
=
14204 add_builtin_function (name
, type
, (int)code
, BUILT_IN_MD
, NULL
, NULL_TREE
);
14206 /* Set any special attributes. */
14207 if ((classify
& RS6000_BTC_CONST
) != 0)
14209 /* const function, function only depends on the inputs. */
14210 TREE_READONLY (t
) = 1;
14211 TREE_NOTHROW (t
) = 1;
14212 attr_string
= ", const";
14214 else if ((classify
& RS6000_BTC_PURE
) != 0)
14216 /* pure function, function can read global memory, but does not set any
14218 DECL_PURE_P (t
) = 1;
14219 TREE_NOTHROW (t
) = 1;
14220 attr_string
= ", pure";
14222 else if ((classify
& RS6000_BTC_FP
) != 0)
14224 /* Function is a math function. If rounding mode is on, then treat the
14225 function as not reading global memory, but it can have arbitrary side
14226 effects. If it is off, then assume the function is a const function.
14227 This mimics the ATTR_MATHFN_FPROUNDING attribute in
14228 builtin-attribute.def that is used for the math functions. */
14229 TREE_NOTHROW (t
) = 1;
14230 if (flag_rounding_math
)
14232 DECL_PURE_P (t
) = 1;
14233 DECL_IS_NOVOPS (t
) = 1;
14234 attr_string
= ", fp, pure";
14238 TREE_READONLY (t
) = 1;
14239 attr_string
= ", fp, const";
14242 else if ((classify
& RS6000_BTC_ATTR_MASK
) != 0)
14243 gcc_unreachable ();
14245 if (TARGET_DEBUG_BUILTIN
)
14246 fprintf (stderr
, "rs6000_builtin, code = %4d, %s%s\n",
14247 (int)code
, name
, attr_string
);
14250 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
14252 #undef RS6000_BUILTIN_0
14253 #undef RS6000_BUILTIN_1
14254 #undef RS6000_BUILTIN_2
14255 #undef RS6000_BUILTIN_3
14256 #undef RS6000_BUILTIN_A
14257 #undef RS6000_BUILTIN_D
14258 #undef RS6000_BUILTIN_E
14259 #undef RS6000_BUILTIN_H
14260 #undef RS6000_BUILTIN_P
14261 #undef RS6000_BUILTIN_Q
14262 #undef RS6000_BUILTIN_S
14263 #undef RS6000_BUILTIN_X
14265 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14266 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14267 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14268 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
14269 { MASK, ICODE, NAME, ENUM },
14271 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14272 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14273 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14274 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14275 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14276 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14277 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14278 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14280 static const struct builtin_description bdesc_3arg
[] =
14282 #include "powerpcspe-builtin.def"
14285 /* DST operations: void foo (void *, const int, const char). */
14287 #undef RS6000_BUILTIN_0
14288 #undef RS6000_BUILTIN_1
14289 #undef RS6000_BUILTIN_2
14290 #undef RS6000_BUILTIN_3
14291 #undef RS6000_BUILTIN_A
14292 #undef RS6000_BUILTIN_D
14293 #undef RS6000_BUILTIN_E
14294 #undef RS6000_BUILTIN_H
14295 #undef RS6000_BUILTIN_P
14296 #undef RS6000_BUILTIN_Q
14297 #undef RS6000_BUILTIN_S
14298 #undef RS6000_BUILTIN_X
14300 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14301 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14302 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14303 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14304 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14305 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
14306 { MASK, ICODE, NAME, ENUM },
14308 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14309 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14310 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14311 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14312 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14313 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14315 static const struct builtin_description bdesc_dst
[] =
14317 #include "powerpcspe-builtin.def"
14320 /* Simple binary operations: VECc = foo (VECa, VECb). */
14322 #undef RS6000_BUILTIN_0
14323 #undef RS6000_BUILTIN_1
14324 #undef RS6000_BUILTIN_2
14325 #undef RS6000_BUILTIN_3
14326 #undef RS6000_BUILTIN_A
14327 #undef RS6000_BUILTIN_D
14328 #undef RS6000_BUILTIN_E
14329 #undef RS6000_BUILTIN_H
14330 #undef RS6000_BUILTIN_P
14331 #undef RS6000_BUILTIN_Q
14332 #undef RS6000_BUILTIN_S
14333 #undef RS6000_BUILTIN_X
14335 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14336 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14337 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
14338 { MASK, ICODE, NAME, ENUM },
14340 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14341 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14342 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14343 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14344 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14345 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14346 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14347 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14348 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14350 static const struct builtin_description bdesc_2arg
[] =
14352 #include "powerpcspe-builtin.def"
14355 #undef RS6000_BUILTIN_0
14356 #undef RS6000_BUILTIN_1
14357 #undef RS6000_BUILTIN_2
14358 #undef RS6000_BUILTIN_3
14359 #undef RS6000_BUILTIN_A
14360 #undef RS6000_BUILTIN_D
14361 #undef RS6000_BUILTIN_E
14362 #undef RS6000_BUILTIN_H
14363 #undef RS6000_BUILTIN_P
14364 #undef RS6000_BUILTIN_Q
14365 #undef RS6000_BUILTIN_S
14366 #undef RS6000_BUILTIN_X
14368 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14369 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14370 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14371 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14372 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14373 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14374 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14375 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14376 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
14377 { MASK, ICODE, NAME, ENUM },
14379 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14380 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14381 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14383 /* AltiVec predicates. */
14385 static const struct builtin_description bdesc_altivec_preds
[] =
14387 #include "powerpcspe-builtin.def"
14390 /* SPE predicates. */
14391 #undef RS6000_BUILTIN_0
14392 #undef RS6000_BUILTIN_1
14393 #undef RS6000_BUILTIN_2
14394 #undef RS6000_BUILTIN_3
14395 #undef RS6000_BUILTIN_A
14396 #undef RS6000_BUILTIN_D
14397 #undef RS6000_BUILTIN_E
14398 #undef RS6000_BUILTIN_H
14399 #undef RS6000_BUILTIN_P
14400 #undef RS6000_BUILTIN_Q
14401 #undef RS6000_BUILTIN_S
14402 #undef RS6000_BUILTIN_X
14404 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14405 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14406 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14407 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14408 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14409 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14410 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14411 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14412 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14413 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14414 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
14415 { MASK, ICODE, NAME, ENUM },
14417 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14419 static const struct builtin_description bdesc_spe_predicates
[] =
14421 #include "powerpcspe-builtin.def"
14424 /* SPE evsel predicates. */
14425 #undef RS6000_BUILTIN_0
14426 #undef RS6000_BUILTIN_1
14427 #undef RS6000_BUILTIN_2
14428 #undef RS6000_BUILTIN_3
14429 #undef RS6000_BUILTIN_A
14430 #undef RS6000_BUILTIN_D
14431 #undef RS6000_BUILTIN_E
14432 #undef RS6000_BUILTIN_H
14433 #undef RS6000_BUILTIN_P
14434 #undef RS6000_BUILTIN_Q
14435 #undef RS6000_BUILTIN_S
14436 #undef RS6000_BUILTIN_X
14438 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14439 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14440 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14441 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14442 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14443 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14444 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
14445 { MASK, ICODE, NAME, ENUM },
14447 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14448 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14449 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14450 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14451 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14453 static const struct builtin_description bdesc_spe_evsel
[] =
14455 #include "powerpcspe-builtin.def"
14458 /* PAIRED predicates. */
14459 #undef RS6000_BUILTIN_0
14460 #undef RS6000_BUILTIN_1
14461 #undef RS6000_BUILTIN_2
14462 #undef RS6000_BUILTIN_3
14463 #undef RS6000_BUILTIN_A
14464 #undef RS6000_BUILTIN_D
14465 #undef RS6000_BUILTIN_E
14466 #undef RS6000_BUILTIN_H
14467 #undef RS6000_BUILTIN_P
14468 #undef RS6000_BUILTIN_Q
14469 #undef RS6000_BUILTIN_S
14470 #undef RS6000_BUILTIN_X
14472 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14473 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14474 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14475 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14476 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14477 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14478 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14479 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14480 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14481 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
14482 { MASK, ICODE, NAME, ENUM },
14484 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14485 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14487 static const struct builtin_description bdesc_paired_preds
[] =
14489 #include "powerpcspe-builtin.def"
14492 /* ABS* operations. */
14494 #undef RS6000_BUILTIN_0
14495 #undef RS6000_BUILTIN_1
14496 #undef RS6000_BUILTIN_2
14497 #undef RS6000_BUILTIN_3
14498 #undef RS6000_BUILTIN_A
14499 #undef RS6000_BUILTIN_D
14500 #undef RS6000_BUILTIN_E
14501 #undef RS6000_BUILTIN_H
14502 #undef RS6000_BUILTIN_P
14503 #undef RS6000_BUILTIN_Q
14504 #undef RS6000_BUILTIN_S
14505 #undef RS6000_BUILTIN_X
14507 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14508 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14509 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14510 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14511 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
14512 { MASK, ICODE, NAME, ENUM },
14514 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14515 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14516 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14517 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14518 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14519 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14520 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14522 static const struct builtin_description bdesc_abs
[] =
14524 #include "powerpcspe-builtin.def"
14527 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
14530 #undef RS6000_BUILTIN_0
14531 #undef RS6000_BUILTIN_1
14532 #undef RS6000_BUILTIN_2
14533 #undef RS6000_BUILTIN_3
14534 #undef RS6000_BUILTIN_A
14535 #undef RS6000_BUILTIN_D
14536 #undef RS6000_BUILTIN_E
14537 #undef RS6000_BUILTIN_H
14538 #undef RS6000_BUILTIN_P
14539 #undef RS6000_BUILTIN_Q
14540 #undef RS6000_BUILTIN_S
14541 #undef RS6000_BUILTIN_X
14543 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14544 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
14545 { MASK, ICODE, NAME, ENUM },
14547 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14548 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14549 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14550 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14551 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14552 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14553 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14554 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14555 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14556 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14558 static const struct builtin_description bdesc_1arg
[] =
14560 #include "powerpcspe-builtin.def"
14563 /* Simple no-argument operations: result = __builtin_darn_32 () */
14565 #undef RS6000_BUILTIN_0
14566 #undef RS6000_BUILTIN_1
14567 #undef RS6000_BUILTIN_2
14568 #undef RS6000_BUILTIN_3
14569 #undef RS6000_BUILTIN_A
14570 #undef RS6000_BUILTIN_D
14571 #undef RS6000_BUILTIN_E
14572 #undef RS6000_BUILTIN_H
14573 #undef RS6000_BUILTIN_P
14574 #undef RS6000_BUILTIN_Q
14575 #undef RS6000_BUILTIN_S
14576 #undef RS6000_BUILTIN_X
14578 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
14579 { MASK, ICODE, NAME, ENUM },
14581 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14582 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14583 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14584 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14585 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14586 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14587 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14588 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14589 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14590 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14591 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14593 static const struct builtin_description bdesc_0arg
[] =
14595 #include "powerpcspe-builtin.def"
14598 /* HTM builtins. */
14599 #undef RS6000_BUILTIN_0
14600 #undef RS6000_BUILTIN_1
14601 #undef RS6000_BUILTIN_2
14602 #undef RS6000_BUILTIN_3
14603 #undef RS6000_BUILTIN_A
14604 #undef RS6000_BUILTIN_D
14605 #undef RS6000_BUILTIN_E
14606 #undef RS6000_BUILTIN_H
14607 #undef RS6000_BUILTIN_P
14608 #undef RS6000_BUILTIN_Q
14609 #undef RS6000_BUILTIN_S
14610 #undef RS6000_BUILTIN_X
14612 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14613 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14614 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14615 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14616 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14617 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14618 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14619 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
14620 { MASK, ICODE, NAME, ENUM },
14622 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14623 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14624 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14625 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14627 static const struct builtin_description bdesc_htm
[] =
14629 #include "powerpcspe-builtin.def"
14632 #undef RS6000_BUILTIN_0
14633 #undef RS6000_BUILTIN_1
14634 #undef RS6000_BUILTIN_2
14635 #undef RS6000_BUILTIN_3
14636 #undef RS6000_BUILTIN_A
14637 #undef RS6000_BUILTIN_D
14638 #undef RS6000_BUILTIN_E
14639 #undef RS6000_BUILTIN_H
14640 #undef RS6000_BUILTIN_P
14641 #undef RS6000_BUILTIN_Q
14642 #undef RS6000_BUILTIN_S
14644 /* Return true if a builtin function is overloaded. */
14646 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode
)
14648 return (rs6000_builtin_info
[(int)fncode
].attr
& RS6000_BTC_OVERLOADED
) != 0;
14652 rs6000_overloaded_builtin_name (enum rs6000_builtins fncode
)
14654 return rs6000_builtin_info
[(int)fncode
].name
;
14657 /* Expand an expression EXP that calls a builtin without arguments. */
14659 rs6000_expand_zeroop_builtin (enum insn_code icode
, rtx target
)
14662 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
14664 if (icode
== CODE_FOR_nothing
)
14665 /* Builtin not supported on this processor. */
14669 || GET_MODE (target
) != tmode
14670 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14671 target
= gen_reg_rtx (tmode
);
14673 pat
= GEN_FCN (icode
) (target
);
14683 rs6000_expand_mtfsf_builtin (enum insn_code icode
, tree exp
)
14686 tree arg0
= CALL_EXPR_ARG (exp
, 0);
14687 tree arg1
= CALL_EXPR_ARG (exp
, 1);
14688 rtx op0
= expand_normal (arg0
);
14689 rtx op1
= expand_normal (arg1
);
14690 machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
14691 machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
14693 if (icode
== CODE_FOR_nothing
)
14694 /* Builtin not supported on this processor. */
14697 /* If we got invalid arguments bail out before generating bad rtl. */
14698 if (arg0
== error_mark_node
|| arg1
== error_mark_node
)
14701 if (GET_CODE (op0
) != CONST_INT
14702 || INTVAL (op0
) > 255
14703 || INTVAL (op0
) < 0)
14705 error ("argument 1 must be an 8-bit field value");
14709 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
14710 op0
= copy_to_mode_reg (mode0
, op0
);
14712 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
14713 op1
= copy_to_mode_reg (mode1
, op1
);
14715 pat
= GEN_FCN (icode
) (op0
, op1
);
14724 rs6000_expand_unop_builtin (enum insn_code icode
, tree exp
, rtx target
)
14727 tree arg0
= CALL_EXPR_ARG (exp
, 0);
14728 rtx op0
= expand_normal (arg0
);
14729 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
14730 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
14732 if (icode
== CODE_FOR_nothing
)
14733 /* Builtin not supported on this processor. */
14736 /* If we got invalid arguments bail out before generating bad rtl. */
14737 if (arg0
== error_mark_node
)
14740 if (icode
== CODE_FOR_altivec_vspltisb
14741 || icode
== CODE_FOR_altivec_vspltish
14742 || icode
== CODE_FOR_altivec_vspltisw
14743 || icode
== CODE_FOR_spe_evsplatfi
14744 || icode
== CODE_FOR_spe_evsplati
)
14746 /* Only allow 5-bit *signed* literals. */
14747 if (GET_CODE (op0
) != CONST_INT
14748 || INTVAL (op0
) > 15
14749 || INTVAL (op0
) < -16)
14751 error ("argument 1 must be a 5-bit signed literal");
14752 return CONST0_RTX (tmode
);
14757 || GET_MODE (target
) != tmode
14758 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14759 target
= gen_reg_rtx (tmode
);
14761 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14762 op0
= copy_to_mode_reg (mode0
, op0
);
14764 pat
= GEN_FCN (icode
) (target
, op0
);
14773 altivec_expand_abs_builtin (enum insn_code icode
, tree exp
, rtx target
)
14775 rtx pat
, scratch1
, scratch2
;
14776 tree arg0
= CALL_EXPR_ARG (exp
, 0);
14777 rtx op0
= expand_normal (arg0
);
14778 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
14779 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
14781 /* If we have invalid arguments, bail out before generating bad rtl. */
14782 if (arg0
== error_mark_node
)
14786 || GET_MODE (target
) != tmode
14787 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14788 target
= gen_reg_rtx (tmode
);
14790 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14791 op0
= copy_to_mode_reg (mode0
, op0
);
14793 scratch1
= gen_reg_rtx (mode0
);
14794 scratch2
= gen_reg_rtx (mode0
);
14796 pat
= GEN_FCN (icode
) (target
, op0
, scratch1
, scratch2
);
14805 rs6000_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
14808 tree arg0
= CALL_EXPR_ARG (exp
, 0);
14809 tree arg1
= CALL_EXPR_ARG (exp
, 1);
14810 rtx op0
= expand_normal (arg0
);
14811 rtx op1
= expand_normal (arg1
);
14812 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
14813 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
14814 machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
14816 if (icode
== CODE_FOR_nothing
)
14817 /* Builtin not supported on this processor. */
14820 /* If we got invalid arguments bail out before generating bad rtl. */
14821 if (arg0
== error_mark_node
|| arg1
== error_mark_node
)
14824 if (icode
== CODE_FOR_altivec_vcfux
14825 || icode
== CODE_FOR_altivec_vcfsx
14826 || icode
== CODE_FOR_altivec_vctsxs
14827 || icode
== CODE_FOR_altivec_vctuxs
14828 || icode
== CODE_FOR_altivec_vspltb
14829 || icode
== CODE_FOR_altivec_vsplth
14830 || icode
== CODE_FOR_altivec_vspltw
14831 || icode
== CODE_FOR_spe_evaddiw
14832 || icode
== CODE_FOR_spe_evldd
14833 || icode
== CODE_FOR_spe_evldh
14834 || icode
== CODE_FOR_spe_evldw
14835 || icode
== CODE_FOR_spe_evlhhesplat
14836 || icode
== CODE_FOR_spe_evlhhossplat
14837 || icode
== CODE_FOR_spe_evlhhousplat
14838 || icode
== CODE_FOR_spe_evlwhe
14839 || icode
== CODE_FOR_spe_evlwhos
14840 || icode
== CODE_FOR_spe_evlwhou
14841 || icode
== CODE_FOR_spe_evlwhsplat
14842 || icode
== CODE_FOR_spe_evlwwsplat
14843 || icode
== CODE_FOR_spe_evrlwi
14844 || icode
== CODE_FOR_spe_evslwi
14845 || icode
== CODE_FOR_spe_evsrwis
14846 || icode
== CODE_FOR_spe_evsubifw
14847 || icode
== CODE_FOR_spe_evsrwiu
)
14849 /* Only allow 5-bit unsigned literals. */
14851 if (TREE_CODE (arg1
) != INTEGER_CST
14852 || TREE_INT_CST_LOW (arg1
) & ~0x1f)
14854 error ("argument 2 must be a 5-bit unsigned literal");
14855 return CONST0_RTX (tmode
);
14858 else if (icode
== CODE_FOR_dfptstsfi_eq_dd
14859 || icode
== CODE_FOR_dfptstsfi_lt_dd
14860 || icode
== CODE_FOR_dfptstsfi_gt_dd
14861 || icode
== CODE_FOR_dfptstsfi_unordered_dd
14862 || icode
== CODE_FOR_dfptstsfi_eq_td
14863 || icode
== CODE_FOR_dfptstsfi_lt_td
14864 || icode
== CODE_FOR_dfptstsfi_gt_td
14865 || icode
== CODE_FOR_dfptstsfi_unordered_td
)
14867 /* Only allow 6-bit unsigned literals. */
14869 if (TREE_CODE (arg0
) != INTEGER_CST
14870 || !IN_RANGE (TREE_INT_CST_LOW (arg0
), 0, 63))
14872 error ("argument 1 must be a 6-bit unsigned literal");
14873 return CONST0_RTX (tmode
);
14876 else if (icode
== CODE_FOR_xststdcdp
14877 || icode
== CODE_FOR_xststdcsp
14878 || icode
== CODE_FOR_xvtstdcdp
14879 || icode
== CODE_FOR_xvtstdcsp
)
14881 /* Only allow 7-bit unsigned literals. */
14883 if (TREE_CODE (arg1
) != INTEGER_CST
14884 || !IN_RANGE (TREE_INT_CST_LOW (arg1
), 0, 127))
14886 error ("argument 2 must be a 7-bit unsigned literal");
14887 return CONST0_RTX (tmode
);
14892 || GET_MODE (target
) != tmode
14893 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14894 target
= gen_reg_rtx (tmode
);
14896 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14897 op0
= copy_to_mode_reg (mode0
, op0
);
14898 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
14899 op1
= copy_to_mode_reg (mode1
, op1
);
14901 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
14910 altivec_expand_predicate_builtin (enum insn_code icode
, tree exp
, rtx target
)
14913 tree cr6_form
= CALL_EXPR_ARG (exp
, 0);
14914 tree arg0
= CALL_EXPR_ARG (exp
, 1);
14915 tree arg1
= CALL_EXPR_ARG (exp
, 2);
14916 rtx op0
= expand_normal (arg0
);
14917 rtx op1
= expand_normal (arg1
);
14918 machine_mode tmode
= SImode
;
14919 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
14920 machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
14923 if (TREE_CODE (cr6_form
) != INTEGER_CST
)
14925 error ("argument 1 of __builtin_altivec_predicate must be a constant");
14929 cr6_form_int
= TREE_INT_CST_LOW (cr6_form
);
14931 gcc_assert (mode0
== mode1
);
14933 /* If we have invalid arguments, bail out before generating bad rtl. */
14934 if (arg0
== error_mark_node
|| arg1
== error_mark_node
)
14938 || GET_MODE (target
) != tmode
14939 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14940 target
= gen_reg_rtx (tmode
);
14942 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14943 op0
= copy_to_mode_reg (mode0
, op0
);
14944 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
14945 op1
= copy_to_mode_reg (mode1
, op1
);
14947 /* Note that for many of the relevant operations (e.g. cmpne or
14948 cmpeq) with float or double operands, it makes more sense for the
14949 mode of the allocated scratch register to select a vector of
14950 integer. But the choice to copy the mode of operand 0 was made
14951 long ago and there are no plans to change it. */
14952 scratch
= gen_reg_rtx (mode0
);
14954 pat
= GEN_FCN (icode
) (scratch
, op0
, op1
);
14959 /* The vec_any* and vec_all* predicates use the same opcodes for two
14960 different operations, but the bits in CR6 will be different
14961 depending on what information we want. So we have to play tricks
14962 with CR6 to get the right bits out.
14964 If you think this is disgusting, look at the specs for the
14965 AltiVec predicates. */
14967 switch (cr6_form_int
)
14970 emit_insn (gen_cr6_test_for_zero (target
));
14973 emit_insn (gen_cr6_test_for_zero_reverse (target
));
14976 emit_insn (gen_cr6_test_for_lt (target
));
14979 emit_insn (gen_cr6_test_for_lt_reverse (target
));
14982 error ("argument 1 of __builtin_altivec_predicate is out of range");
14990 paired_expand_lv_builtin (enum insn_code icode
, tree exp
, rtx target
)
14993 tree arg0
= CALL_EXPR_ARG (exp
, 0);
14994 tree arg1
= CALL_EXPR_ARG (exp
, 1);
14995 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
14996 machine_mode mode0
= Pmode
;
14997 machine_mode mode1
= Pmode
;
14998 rtx op0
= expand_normal (arg0
);
14999 rtx op1
= expand_normal (arg1
);
15001 if (icode
== CODE_FOR_nothing
)
15002 /* Builtin not supported on this processor. */
15005 /* If we got invalid arguments bail out before generating bad rtl. */
15006 if (arg0
== error_mark_node
|| arg1
== error_mark_node
)
15010 || GET_MODE (target
) != tmode
15011 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
15012 target
= gen_reg_rtx (tmode
);
15014 op1
= copy_to_mode_reg (mode1
, op1
);
15016 if (op0
== const0_rtx
)
15018 addr
= gen_rtx_MEM (tmode
, op1
);
15022 op0
= copy_to_mode_reg (mode0
, op0
);
15023 addr
= gen_rtx_MEM (tmode
, gen_rtx_PLUS (Pmode
, op0
, op1
));
15026 pat
= GEN_FCN (icode
) (target
, addr
);
15035 /* Return a constant vector for use as a little-endian permute control vector
15036 to reverse the order of elements of the given vector mode. */
15038 swap_selector_for_mode (machine_mode mode
)
15040 /* These are little endian vectors, so their elements are reversed
15041 from what you would normally expect for a permute control vector. */
15042 unsigned int swap2
[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
15043 unsigned int swap4
[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
15044 unsigned int swap8
[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
15045 unsigned int swap16
[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
15046 unsigned int *swaparray
, i
;
15063 swaparray
= swap16
;
15066 gcc_unreachable ();
15069 for (i
= 0; i
< 16; ++i
)
15070 perm
[i
] = GEN_INT (swaparray
[i
]);
15072 return force_reg (V16QImode
, gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, perm
)));
15075 /* Generate code for an "lvxl", or "lve*x" built-in for a little endian target
15076 with -maltivec=be specified. Issue the load followed by an element-
15077 reversing permute. */
15079 altivec_expand_lvx_be (rtx op0
, rtx op1
, machine_mode mode
, unsigned unspec
)
15081 rtx tmp
= gen_reg_rtx (mode
);
15082 rtx load
= gen_rtx_SET (tmp
, op1
);
15083 rtx lvx
= gen_rtx_UNSPEC (mode
, gen_rtvec (1, const0_rtx
), unspec
);
15084 rtx par
= gen_rtx_PARALLEL (mode
, gen_rtvec (2, load
, lvx
));
15085 rtx sel
= swap_selector_for_mode (mode
);
15086 rtx vperm
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, tmp
, tmp
, sel
), UNSPEC_VPERM
);
15088 gcc_assert (REG_P (op0
));
15090 emit_insn (gen_rtx_SET (op0
, vperm
));
15093 /* Generate code for a "stvxl" built-in for a little endian target with
15094 -maltivec=be specified. Issue the store preceded by an element-reversing
15097 altivec_expand_stvx_be (rtx op0
, rtx op1
, machine_mode mode
, unsigned unspec
)
15099 rtx tmp
= gen_reg_rtx (mode
);
15100 rtx store
= gen_rtx_SET (op0
, tmp
);
15101 rtx stvx
= gen_rtx_UNSPEC (mode
, gen_rtvec (1, const0_rtx
), unspec
);
15102 rtx par
= gen_rtx_PARALLEL (mode
, gen_rtvec (2, store
, stvx
));
15103 rtx sel
= swap_selector_for_mode (mode
);
15106 gcc_assert (REG_P (op1
));
15107 vperm
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op1
, op1
, sel
), UNSPEC_VPERM
);
15108 emit_insn (gen_rtx_SET (tmp
, vperm
));
15112 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
15113 specified. Issue the store preceded by an element-reversing permute. */
15115 altivec_expand_stvex_be (rtx op0
, rtx op1
, machine_mode mode
, unsigned unspec
)
15117 machine_mode inner_mode
= GET_MODE_INNER (mode
);
15118 rtx tmp
= gen_reg_rtx (mode
);
15119 rtx stvx
= gen_rtx_UNSPEC (inner_mode
, gen_rtvec (1, tmp
), unspec
);
15120 rtx sel
= swap_selector_for_mode (mode
);
15123 gcc_assert (REG_P (op1
));
15124 vperm
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op1
, op1
, sel
), UNSPEC_VPERM
);
15125 emit_insn (gen_rtx_SET (tmp
, vperm
));
15126 emit_insn (gen_rtx_SET (op0
, stvx
));
15130 altivec_expand_lv_builtin (enum insn_code icode
, tree exp
, rtx target
, bool blk
)
15133 tree arg0
= CALL_EXPR_ARG (exp
, 0);
15134 tree arg1
= CALL_EXPR_ARG (exp
, 1);
15135 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
15136 machine_mode mode0
= Pmode
;
15137 machine_mode mode1
= Pmode
;
15138 rtx op0
= expand_normal (arg0
);
15139 rtx op1
= expand_normal (arg1
);
15141 if (icode
== CODE_FOR_nothing
)
15142 /* Builtin not supported on this processor. */
15145 /* If we got invalid arguments bail out before generating bad rtl. */
15146 if (arg0
== error_mark_node
|| arg1
== error_mark_node
)
15150 || GET_MODE (target
) != tmode
15151 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
15152 target
= gen_reg_rtx (tmode
);
15154 op1
= copy_to_mode_reg (mode1
, op1
);
15156 /* For LVX, express the RTL accurately by ANDing the address with -16.
15157 LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
15158 so the raw address is fine. */
15159 if (icode
== CODE_FOR_altivec_lvx_v2df_2op
15160 || icode
== CODE_FOR_altivec_lvx_v2di_2op
15161 || icode
== CODE_FOR_altivec_lvx_v4sf_2op
15162 || icode
== CODE_FOR_altivec_lvx_v4si_2op
15163 || icode
== CODE_FOR_altivec_lvx_v8hi_2op
15164 || icode
== CODE_FOR_altivec_lvx_v16qi_2op
)
15167 if (op0
== const0_rtx
)
15171 op0
= copy_to_mode_reg (mode0
, op0
);
15172 rawaddr
= gen_rtx_PLUS (Pmode
, op1
, op0
);
15174 addr
= gen_rtx_AND (Pmode
, rawaddr
, gen_rtx_CONST_INT (Pmode
, -16));
15175 addr
= gen_rtx_MEM (blk
? BLKmode
: tmode
, addr
);
15177 /* For -maltivec=be, emit the load and follow it up with a
15178 permute to swap the elements. */
15179 if (!BYTES_BIG_ENDIAN
&& VECTOR_ELT_ORDER_BIG
)
15181 rtx temp
= gen_reg_rtx (tmode
);
15182 emit_insn (gen_rtx_SET (temp
, addr
));
15184 rtx sel
= swap_selector_for_mode (tmode
);
15185 rtx vperm
= gen_rtx_UNSPEC (tmode
, gen_rtvec (3, temp
, temp
, sel
),
15187 emit_insn (gen_rtx_SET (target
, vperm
));
15190 emit_insn (gen_rtx_SET (target
, addr
));
15194 if (op0
== const0_rtx
)
15195 addr
= gen_rtx_MEM (blk
? BLKmode
: tmode
, op1
);
15198 op0
= copy_to_mode_reg (mode0
, op0
);
15199 addr
= gen_rtx_MEM (blk
? BLKmode
: tmode
,
15200 gen_rtx_PLUS (Pmode
, op1
, op0
));
15203 pat
= GEN_FCN (icode
) (target
, addr
);
15213 spe_expand_stv_builtin (enum insn_code icode
, tree exp
)
15215 tree arg0
= CALL_EXPR_ARG (exp
, 0);
15216 tree arg1
= CALL_EXPR_ARG (exp
, 1);
15217 tree arg2
= CALL_EXPR_ARG (exp
, 2);
15218 rtx op0
= expand_normal (arg0
);
15219 rtx op1
= expand_normal (arg1
);
15220 rtx op2
= expand_normal (arg2
);
15222 machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
15223 machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
15224 machine_mode mode2
= insn_data
[icode
].operand
[2].mode
;
15226 /* Invalid arguments. Bail before doing anything stoopid! */
15227 if (arg0
== error_mark_node
15228 || arg1
== error_mark_node
15229 || arg2
== error_mark_node
)
15232 if (! (*insn_data
[icode
].operand
[2].predicate
) (op0
, mode2
))
15233 op0
= copy_to_mode_reg (mode2
, op0
);
15234 if (! (*insn_data
[icode
].operand
[0].predicate
) (op1
, mode0
))
15235 op1
= copy_to_mode_reg (mode0
, op1
);
15236 if (! (*insn_data
[icode
].operand
[1].predicate
) (op2
, mode1
))
15237 op2
= copy_to_mode_reg (mode1
, op2
);
15239 pat
= GEN_FCN (icode
) (op1
, op2
, op0
);
15246 paired_expand_stv_builtin (enum insn_code icode
, tree exp
)
15248 tree arg0
= CALL_EXPR_ARG (exp
, 0);
15249 tree arg1
= CALL_EXPR_ARG (exp
, 1);
15250 tree arg2
= CALL_EXPR_ARG (exp
, 2);
15251 rtx op0
= expand_normal (arg0
);
15252 rtx op1
= expand_normal (arg1
);
15253 rtx op2
= expand_normal (arg2
);
15255 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
15256 machine_mode mode1
= Pmode
;
15257 machine_mode mode2
= Pmode
;
15259 /* Invalid arguments. Bail before doing anything stoopid! */
15260 if (arg0
== error_mark_node
15261 || arg1
== error_mark_node
15262 || arg2
== error_mark_node
)
15265 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, tmode
))
15266 op0
= copy_to_mode_reg (tmode
, op0
);
15268 op2
= copy_to_mode_reg (mode2
, op2
);
15270 if (op1
== const0_rtx
)
15272 addr
= gen_rtx_MEM (tmode
, op2
);
15276 op1
= copy_to_mode_reg (mode1
, op1
);
15277 addr
= gen_rtx_MEM (tmode
, gen_rtx_PLUS (Pmode
, op1
, op2
));
15280 pat
= GEN_FCN (icode
) (addr
, op0
);
15287 altivec_expand_stxvl_builtin (enum insn_code icode
, tree exp
)
15290 tree arg0
= CALL_EXPR_ARG (exp
, 0);
15291 tree arg1
= CALL_EXPR_ARG (exp
, 1);
15292 tree arg2
= CALL_EXPR_ARG (exp
, 2);
15293 rtx op0
= expand_normal (arg0
);
15294 rtx op1
= expand_normal (arg1
);
15295 rtx op2
= expand_normal (arg2
);
15296 machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
15297 machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
15298 machine_mode mode2
= insn_data
[icode
].operand
[2].mode
;
15300 if (icode
== CODE_FOR_nothing
)
15301 /* Builtin not supported on this processor. */
15304 /* If we got invalid arguments bail out before generating bad rtl. */
15305 if (arg0
== error_mark_node
15306 || arg1
== error_mark_node
15307 || arg2
== error_mark_node
)
15310 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
15311 op0
= copy_to_mode_reg (mode0
, op0
);
15312 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
15313 op1
= copy_to_mode_reg (mode1
, op1
);
15314 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
15315 op2
= copy_to_mode_reg (mode2
, op2
);
15317 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
15325 altivec_expand_stv_builtin (enum insn_code icode
, tree exp
)
15327 tree arg0
= CALL_EXPR_ARG (exp
, 0);
15328 tree arg1
= CALL_EXPR_ARG (exp
, 1);
15329 tree arg2
= CALL_EXPR_ARG (exp
, 2);
15330 rtx op0
= expand_normal (arg0
);
15331 rtx op1
= expand_normal (arg1
);
15332 rtx op2
= expand_normal (arg2
);
15333 rtx pat
, addr
, rawaddr
;
15334 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
15335 machine_mode smode
= insn_data
[icode
].operand
[1].mode
;
15336 machine_mode mode1
= Pmode
;
15337 machine_mode mode2
= Pmode
;
15339 /* Invalid arguments. Bail before doing anything stoopid! */
15340 if (arg0
== error_mark_node
15341 || arg1
== error_mark_node
15342 || arg2
== error_mark_node
)
15345 op2
= copy_to_mode_reg (mode2
, op2
);
15347 /* For STVX, express the RTL accurately by ANDing the address with -16.
15348 STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
15349 so the raw address is fine. */
15350 if (icode
== CODE_FOR_altivec_stvx_v2df_2op
15351 || icode
== CODE_FOR_altivec_stvx_v2di_2op
15352 || icode
== CODE_FOR_altivec_stvx_v4sf_2op
15353 || icode
== CODE_FOR_altivec_stvx_v4si_2op
15354 || icode
== CODE_FOR_altivec_stvx_v8hi_2op
15355 || icode
== CODE_FOR_altivec_stvx_v16qi_2op
)
15357 if (op1
== const0_rtx
)
15361 op1
= copy_to_mode_reg (mode1
, op1
);
15362 rawaddr
= gen_rtx_PLUS (Pmode
, op2
, op1
);
15365 addr
= gen_rtx_AND (Pmode
, rawaddr
, gen_rtx_CONST_INT (Pmode
, -16));
15366 addr
= gen_rtx_MEM (tmode
, addr
);
15368 op0
= copy_to_mode_reg (tmode
, op0
);
15370 /* For -maltivec=be, emit a permute to swap the elements, followed
15372 if (!BYTES_BIG_ENDIAN
&& VECTOR_ELT_ORDER_BIG
)
15374 rtx temp
= gen_reg_rtx (tmode
);
15375 rtx sel
= swap_selector_for_mode (tmode
);
15376 rtx vperm
= gen_rtx_UNSPEC (tmode
, gen_rtvec (3, op0
, op0
, sel
),
15378 emit_insn (gen_rtx_SET (temp
, vperm
));
15379 emit_insn (gen_rtx_SET (addr
, temp
));
15382 emit_insn (gen_rtx_SET (addr
, op0
));
15386 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, smode
))
15387 op0
= copy_to_mode_reg (smode
, op0
);
15389 if (op1
== const0_rtx
)
15390 addr
= gen_rtx_MEM (tmode
, op2
);
15393 op1
= copy_to_mode_reg (mode1
, op1
);
15394 addr
= gen_rtx_MEM (tmode
, gen_rtx_PLUS (Pmode
, op2
, op1
));
15397 pat
= GEN_FCN (icode
) (addr
, op0
);
15405 /* Return the appropriate SPR number associated with the given builtin. */
15406 static inline HOST_WIDE_INT
15407 htm_spr_num (enum rs6000_builtins code
)
15409 if (code
== HTM_BUILTIN_GET_TFHAR
15410 || code
== HTM_BUILTIN_SET_TFHAR
)
15412 else if (code
== HTM_BUILTIN_GET_TFIAR
15413 || code
== HTM_BUILTIN_SET_TFIAR
)
15415 else if (code
== HTM_BUILTIN_GET_TEXASR
15416 || code
== HTM_BUILTIN_SET_TEXASR
)
15418 gcc_assert (code
== HTM_BUILTIN_GET_TEXASRU
15419 || code
== HTM_BUILTIN_SET_TEXASRU
);
15420 return TEXASRU_SPR
;
15423 /* Return the appropriate SPR regno associated with the given builtin. */
15424 static inline HOST_WIDE_INT
15425 htm_spr_regno (enum rs6000_builtins code
)
15427 if (code
== HTM_BUILTIN_GET_TFHAR
15428 || code
== HTM_BUILTIN_SET_TFHAR
)
15429 return TFHAR_REGNO
;
15430 else if (code
== HTM_BUILTIN_GET_TFIAR
15431 || code
== HTM_BUILTIN_SET_TFIAR
)
15432 return TFIAR_REGNO
;
15433 gcc_assert (code
== HTM_BUILTIN_GET_TEXASR
15434 || code
== HTM_BUILTIN_SET_TEXASR
15435 || code
== HTM_BUILTIN_GET_TEXASRU
15436 || code
== HTM_BUILTIN_SET_TEXASRU
);
15437 return TEXASR_REGNO
;
15440 /* Return the correct ICODE value depending on whether we are
15441 setting or reading the HTM SPRs. */
15442 static inline enum insn_code
15443 rs6000_htm_spr_icode (bool nonvoid
)
15446 return (TARGET_POWERPC64
) ? CODE_FOR_htm_mfspr_di
: CODE_FOR_htm_mfspr_si
;
15448 return (TARGET_POWERPC64
) ? CODE_FOR_htm_mtspr_di
: CODE_FOR_htm_mtspr_si
;
15451 /* Expand the HTM builtin in EXP and store the result in TARGET.
15452 Store true in *EXPANDEDP if we found a builtin to expand. */
15454 htm_expand_builtin (tree exp
, rtx target
, bool * expandedp
)
15456 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
15457 bool nonvoid
= TREE_TYPE (TREE_TYPE (fndecl
)) != void_type_node
;
15458 enum rs6000_builtins fcode
= (enum rs6000_builtins
) DECL_FUNCTION_CODE (fndecl
);
15459 const struct builtin_description
*d
;
15464 if (!TARGET_POWERPC64
15465 && (fcode
== HTM_BUILTIN_TABORTDC
15466 || fcode
== HTM_BUILTIN_TABORTDCI
))
15468 size_t uns_fcode
= (size_t)fcode
;
15469 const char *name
= rs6000_builtin_info
[uns_fcode
].name
;
15470 error ("builtin %s is only valid in 64-bit mode", name
);
15474 /* Expand the HTM builtins. */
15476 for (i
= 0; i
< ARRAY_SIZE (bdesc_htm
); i
++, d
++)
15477 if (d
->code
== fcode
)
15479 rtx op
[MAX_HTM_OPERANDS
], pat
;
15482 call_expr_arg_iterator iter
;
15483 unsigned attr
= rs6000_builtin_info
[fcode
].attr
;
15484 enum insn_code icode
= d
->icode
;
15485 const struct insn_operand_data
*insn_op
;
15486 bool uses_spr
= (attr
& RS6000_BTC_SPR
);
15490 icode
= rs6000_htm_spr_icode (nonvoid
);
15491 insn_op
= &insn_data
[icode
].operand
[0];
15495 machine_mode tmode
= (uses_spr
) ? insn_op
->mode
: E_SImode
;
15497 || GET_MODE (target
) != tmode
15498 || (uses_spr
&& !(*insn_op
->predicate
) (target
, tmode
)))
15499 target
= gen_reg_rtx (tmode
);
15501 op
[nopnds
++] = target
;
15504 FOR_EACH_CALL_EXPR_ARG (arg
, iter
, exp
)
15506 if (arg
== error_mark_node
|| nopnds
>= MAX_HTM_OPERANDS
)
15509 insn_op
= &insn_data
[icode
].operand
[nopnds
];
15511 op
[nopnds
] = expand_normal (arg
);
15513 if (!(*insn_op
->predicate
) (op
[nopnds
], insn_op
->mode
))
15515 if (!strcmp (insn_op
->constraint
, "n"))
15517 int arg_num
= (nonvoid
) ? nopnds
: nopnds
+ 1;
15518 if (!CONST_INT_P (op
[nopnds
]))
15519 error ("argument %d must be an unsigned literal", arg_num
);
15521 error ("argument %d is an unsigned literal that is "
15522 "out of range", arg_num
);
15525 op
[nopnds
] = copy_to_mode_reg (insn_op
->mode
, op
[nopnds
]);
15531 /* Handle the builtins for extended mnemonics. These accept
15532 no arguments, but map to builtins that take arguments. */
15535 case HTM_BUILTIN_TENDALL
: /* Alias for: tend. 1 */
15536 case HTM_BUILTIN_TRESUME
: /* Alias for: tsr. 1 */
15537 op
[nopnds
++] = GEN_INT (1);
15539 attr
|= RS6000_BTC_UNARY
;
15541 case HTM_BUILTIN_TSUSPEND
: /* Alias for: tsr. 0 */
15542 op
[nopnds
++] = GEN_INT (0);
15544 attr
|= RS6000_BTC_UNARY
;
15550 /* If this builtin accesses SPRs, then pass in the appropriate
15551 SPR number and SPR regno as the last two operands. */
15554 machine_mode mode
= (TARGET_POWERPC64
) ? DImode
: SImode
;
15555 op
[nopnds
++] = gen_rtx_CONST_INT (mode
, htm_spr_num (fcode
));
15556 op
[nopnds
++] = gen_rtx_REG (mode
, htm_spr_regno (fcode
));
15558 /* If this builtin accesses a CR, then pass in a scratch
15559 CR as the last operand. */
15560 else if (attr
& RS6000_BTC_CR
)
15561 { cr
= gen_reg_rtx (CCmode
);
15567 int expected_nopnds
= 0;
15568 if ((attr
& RS6000_BTC_TYPE_MASK
) == RS6000_BTC_UNARY
)
15569 expected_nopnds
= 1;
15570 else if ((attr
& RS6000_BTC_TYPE_MASK
) == RS6000_BTC_BINARY
)
15571 expected_nopnds
= 2;
15572 else if ((attr
& RS6000_BTC_TYPE_MASK
) == RS6000_BTC_TERNARY
)
15573 expected_nopnds
= 3;
15574 if (!(attr
& RS6000_BTC_VOID
))
15575 expected_nopnds
+= 1;
15577 expected_nopnds
+= 2;
15579 gcc_assert (nopnds
== expected_nopnds
15580 && nopnds
<= MAX_HTM_OPERANDS
);
15586 pat
= GEN_FCN (icode
) (op
[0]);
15589 pat
= GEN_FCN (icode
) (op
[0], op
[1]);
15592 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2]);
15595 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3]);
15598 gcc_unreachable ();
15604 if (attr
& RS6000_BTC_CR
)
15606 if (fcode
== HTM_BUILTIN_TBEGIN
)
15608 /* Emit code to set TARGET to true or false depending on
15609 whether the tbegin. instruction successfully or failed
15610 to start a transaction. We do this by placing the 1's
15611 complement of CR's EQ bit into TARGET. */
15612 rtx scratch
= gen_reg_rtx (SImode
);
15613 emit_insn (gen_rtx_SET (scratch
,
15614 gen_rtx_EQ (SImode
, cr
,
15616 emit_insn (gen_rtx_SET (target
,
15617 gen_rtx_XOR (SImode
, scratch
,
15622 /* Emit code to copy the 4-bit condition register field
15623 CR into the least significant end of register TARGET. */
15624 rtx scratch1
= gen_reg_rtx (SImode
);
15625 rtx scratch2
= gen_reg_rtx (SImode
);
15626 rtx subreg
= simplify_gen_subreg (CCmode
, scratch1
, SImode
, 0);
15627 emit_insn (gen_movcc (subreg
, cr
));
15628 emit_insn (gen_lshrsi3 (scratch2
, scratch1
, GEN_INT (28)));
15629 emit_insn (gen_andsi3 (target
, scratch2
, GEN_INT (0xf)));
15638 *expandedp
= false;
15642 /* Expand the CPU builtin in FCODE and store the result in TARGET. */
15645 cpu_expand_builtin (enum rs6000_builtins fcode
, tree exp ATTRIBUTE_UNUSED
,
15648 /* __builtin_cpu_init () is a nop, so expand to nothing. */
15649 if (fcode
== RS6000_BUILTIN_CPU_INIT
)
15652 if (target
== 0 || GET_MODE (target
) != SImode
)
15653 target
= gen_reg_rtx (SImode
);
15655 #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
15656 tree arg
= TREE_OPERAND (CALL_EXPR_ARG (exp
, 0), 0);
15657 if (TREE_CODE (arg
) != STRING_CST
)
15659 error ("builtin %s only accepts a string argument",
15660 rs6000_builtin_info
[(size_t) fcode
].name
);
15664 if (fcode
== RS6000_BUILTIN_CPU_IS
)
15666 const char *cpu
= TREE_STRING_POINTER (arg
);
15667 rtx cpuid
= NULL_RTX
;
15668 for (size_t i
= 0; i
< ARRAY_SIZE (cpu_is_info
); i
++)
15669 if (strcmp (cpu
, cpu_is_info
[i
].cpu
) == 0)
15671 /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */
15672 cpuid
= GEN_INT (cpu_is_info
[i
].cpuid
+ _DL_FIRST_PLATFORM
);
15675 if (cpuid
== NULL_RTX
)
15677 /* Invalid CPU argument. */
15678 error ("cpu %s is an invalid argument to builtin %s",
15679 cpu
, rs6000_builtin_info
[(size_t) fcode
].name
);
15683 rtx platform
= gen_reg_rtx (SImode
);
15684 rtx tcbmem
= gen_const_mem (SImode
,
15685 gen_rtx_PLUS (Pmode
,
15686 gen_rtx_REG (Pmode
, TLS_REGNUM
),
15687 GEN_INT (TCB_PLATFORM_OFFSET
)));
15688 emit_move_insn (platform
, tcbmem
);
15689 emit_insn (gen_eqsi3 (target
, platform
, cpuid
));
15691 else if (fcode
== RS6000_BUILTIN_CPU_SUPPORTS
)
15693 const char *hwcap
= TREE_STRING_POINTER (arg
);
15694 rtx mask
= NULL_RTX
;
15696 for (size_t i
= 0; i
< ARRAY_SIZE (cpu_supports_info
); i
++)
15697 if (strcmp (hwcap
, cpu_supports_info
[i
].hwcap
) == 0)
15699 mask
= GEN_INT (cpu_supports_info
[i
].mask
);
15700 hwcap_offset
= TCB_HWCAP_OFFSET (cpu_supports_info
[i
].id
);
15703 if (mask
== NULL_RTX
)
15705 /* Invalid HWCAP argument. */
15706 error ("hwcap %s is an invalid argument to builtin %s",
15707 hwcap
, rs6000_builtin_info
[(size_t) fcode
].name
);
15711 rtx tcb_hwcap
= gen_reg_rtx (SImode
);
15712 rtx tcbmem
= gen_const_mem (SImode
,
15713 gen_rtx_PLUS (Pmode
,
15714 gen_rtx_REG (Pmode
, TLS_REGNUM
),
15715 GEN_INT (hwcap_offset
)));
15716 emit_move_insn (tcb_hwcap
, tcbmem
);
15717 rtx scratch1
= gen_reg_rtx (SImode
);
15718 emit_insn (gen_rtx_SET (scratch1
, gen_rtx_AND (SImode
, tcb_hwcap
, mask
)));
15719 rtx scratch2
= gen_reg_rtx (SImode
);
15720 emit_insn (gen_eqsi3 (scratch2
, scratch1
, const0_rtx
));
15721 emit_insn (gen_rtx_SET (target
, gen_rtx_XOR (SImode
, scratch2
, const1_rtx
)));
15724 /* Record that we have expanded a CPU builtin, so that we can later
15725 emit a reference to the special symbol exported by LIBC to ensure we
15726 do not link against an old LIBC that doesn't support this feature. */
15727 cpu_builtin_p
= true;
15730 /* For old LIBCs, always return FALSE. */
15731 emit_move_insn (target
, GEN_INT (0));
15732 #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */
15738 rs6000_expand_ternop_builtin (enum insn_code icode
, tree exp
, rtx target
)
15741 tree arg0
= CALL_EXPR_ARG (exp
, 0);
15742 tree arg1
= CALL_EXPR_ARG (exp
, 1);
15743 tree arg2
= CALL_EXPR_ARG (exp
, 2);
15744 rtx op0
= expand_normal (arg0
);
15745 rtx op1
= expand_normal (arg1
);
15746 rtx op2
= expand_normal (arg2
);
15747 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
15748 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
15749 machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
15750 machine_mode mode2
= insn_data
[icode
].operand
[3].mode
;
15752 if (icode
== CODE_FOR_nothing
)
15753 /* Builtin not supported on this processor. */
15756 /* If we got invalid arguments bail out before generating bad rtl. */
15757 if (arg0
== error_mark_node
15758 || arg1
== error_mark_node
15759 || arg2
== error_mark_node
)
15762 /* Check and prepare argument depending on the instruction code.
15764 Note that a switch statement instead of the sequence of tests
15765 would be incorrect as many of the CODE_FOR values could be
15766 CODE_FOR_nothing and that would yield multiple alternatives
15767 with identical values. We'd never reach here at runtime in
15769 if (icode
== CODE_FOR_altivec_vsldoi_v4sf
15770 || icode
== CODE_FOR_altivec_vsldoi_v2df
15771 || icode
== CODE_FOR_altivec_vsldoi_v4si
15772 || icode
== CODE_FOR_altivec_vsldoi_v8hi
15773 || icode
== CODE_FOR_altivec_vsldoi_v16qi
)
15775 /* Only allow 4-bit unsigned literals. */
15777 if (TREE_CODE (arg2
) != INTEGER_CST
15778 || TREE_INT_CST_LOW (arg2
) & ~0xf)
15780 error ("argument 3 must be a 4-bit unsigned literal");
15781 return CONST0_RTX (tmode
);
15784 else if (icode
== CODE_FOR_vsx_xxpermdi_v2df
15785 || icode
== CODE_FOR_vsx_xxpermdi_v2di
15786 || icode
== CODE_FOR_vsx_xxpermdi_v2df_be
15787 || icode
== CODE_FOR_vsx_xxpermdi_v2di_be
15788 || icode
== CODE_FOR_vsx_xxpermdi_v1ti
15789 || icode
== CODE_FOR_vsx_xxpermdi_v4sf
15790 || icode
== CODE_FOR_vsx_xxpermdi_v4si
15791 || icode
== CODE_FOR_vsx_xxpermdi_v8hi
15792 || icode
== CODE_FOR_vsx_xxpermdi_v16qi
15793 || icode
== CODE_FOR_vsx_xxsldwi_v16qi
15794 || icode
== CODE_FOR_vsx_xxsldwi_v8hi
15795 || icode
== CODE_FOR_vsx_xxsldwi_v4si
15796 || icode
== CODE_FOR_vsx_xxsldwi_v4sf
15797 || icode
== CODE_FOR_vsx_xxsldwi_v2di
15798 || icode
== CODE_FOR_vsx_xxsldwi_v2df
)
15800 /* Only allow 2-bit unsigned literals. */
15802 if (TREE_CODE (arg2
) != INTEGER_CST
15803 || TREE_INT_CST_LOW (arg2
) & ~0x3)
15805 error ("argument 3 must be a 2-bit unsigned literal");
15806 return CONST0_RTX (tmode
);
15809 else if (icode
== CODE_FOR_vsx_set_v2df
15810 || icode
== CODE_FOR_vsx_set_v2di
15811 || icode
== CODE_FOR_bcdadd
15812 || icode
== CODE_FOR_bcdadd_lt
15813 || icode
== CODE_FOR_bcdadd_eq
15814 || icode
== CODE_FOR_bcdadd_gt
15815 || icode
== CODE_FOR_bcdsub
15816 || icode
== CODE_FOR_bcdsub_lt
15817 || icode
== CODE_FOR_bcdsub_eq
15818 || icode
== CODE_FOR_bcdsub_gt
)
15820 /* Only allow 1-bit unsigned literals. */
15822 if (TREE_CODE (arg2
) != INTEGER_CST
15823 || TREE_INT_CST_LOW (arg2
) & ~0x1)
15825 error ("argument 3 must be a 1-bit unsigned literal");
15826 return CONST0_RTX (tmode
);
15829 else if (icode
== CODE_FOR_dfp_ddedpd_dd
15830 || icode
== CODE_FOR_dfp_ddedpd_td
)
15832 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
15834 if (TREE_CODE (arg0
) != INTEGER_CST
15835 || TREE_INT_CST_LOW (arg2
) & ~0x3)
15837 error ("argument 1 must be 0 or 2");
15838 return CONST0_RTX (tmode
);
15841 else if (icode
== CODE_FOR_dfp_denbcd_dd
15842 || icode
== CODE_FOR_dfp_denbcd_td
)
15844 /* Only allow 1-bit unsigned literals. */
15846 if (TREE_CODE (arg0
) != INTEGER_CST
15847 || TREE_INT_CST_LOW (arg0
) & ~0x1)
15849 error ("argument 1 must be a 1-bit unsigned literal");
15850 return CONST0_RTX (tmode
);
15853 else if (icode
== CODE_FOR_dfp_dscli_dd
15854 || icode
== CODE_FOR_dfp_dscli_td
15855 || icode
== CODE_FOR_dfp_dscri_dd
15856 || icode
== CODE_FOR_dfp_dscri_td
)
15858 /* Only allow 6-bit unsigned literals. */
15860 if (TREE_CODE (arg1
) != INTEGER_CST
15861 || TREE_INT_CST_LOW (arg1
) & ~0x3f)
15863 error ("argument 2 must be a 6-bit unsigned literal");
15864 return CONST0_RTX (tmode
);
15867 else if (icode
== CODE_FOR_crypto_vshasigmaw
15868 || icode
== CODE_FOR_crypto_vshasigmad
)
15870 /* Check whether the 2nd and 3rd arguments are integer constants and in
15871 range and prepare arguments. */
15873 if (TREE_CODE (arg1
) != INTEGER_CST
|| wi::geu_p (arg1
, 2))
15875 error ("argument 2 must be 0 or 1");
15876 return CONST0_RTX (tmode
);
15880 if (TREE_CODE (arg2
) != INTEGER_CST
|| wi::geu_p (arg2
, 16))
15882 error ("argument 3 must be in the range 0..15");
15883 return CONST0_RTX (tmode
);
15888 || GET_MODE (target
) != tmode
15889 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
15890 target
= gen_reg_rtx (tmode
);
15892 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
15893 op0
= copy_to_mode_reg (mode0
, op0
);
15894 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
15895 op1
= copy_to_mode_reg (mode1
, op1
);
15896 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
15897 op2
= copy_to_mode_reg (mode2
, op2
);
15899 if (TARGET_PAIRED_FLOAT
&& icode
== CODE_FOR_selv2sf4
)
15900 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
, CONST0_RTX (SFmode
));
15902 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
15910 /* Expand the lvx builtins. */
15912 altivec_expand_ld_builtin (tree exp
, rtx target
, bool *expandedp
)
15914 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
15915 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
15917 machine_mode tmode
, mode0
;
15919 enum insn_code icode
;
15923 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi
:
15924 icode
= CODE_FOR_vector_altivec_load_v16qi
;
15926 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi
:
15927 icode
= CODE_FOR_vector_altivec_load_v8hi
;
15929 case ALTIVEC_BUILTIN_LD_INTERNAL_4si
:
15930 icode
= CODE_FOR_vector_altivec_load_v4si
;
15932 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf
:
15933 icode
= CODE_FOR_vector_altivec_load_v4sf
;
15935 case ALTIVEC_BUILTIN_LD_INTERNAL_2df
:
15936 icode
= CODE_FOR_vector_altivec_load_v2df
;
15938 case ALTIVEC_BUILTIN_LD_INTERNAL_2di
:
15939 icode
= CODE_FOR_vector_altivec_load_v2di
;
15941 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti
:
15942 icode
= CODE_FOR_vector_altivec_load_v1ti
;
15945 *expandedp
= false;
15951 arg0
= CALL_EXPR_ARG (exp
, 0);
15952 op0
= expand_normal (arg0
);
15953 tmode
= insn_data
[icode
].operand
[0].mode
;
15954 mode0
= insn_data
[icode
].operand
[1].mode
;
15957 || GET_MODE (target
) != tmode
15958 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
15959 target
= gen_reg_rtx (tmode
);
15961 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
15962 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
15964 pat
= GEN_FCN (icode
) (target
, op0
);
15971 /* Expand the stvx builtins. */
15973 altivec_expand_st_builtin (tree exp
, rtx target ATTRIBUTE_UNUSED
,
15976 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
15977 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
15979 machine_mode mode0
, mode1
;
15981 enum insn_code icode
;
15985 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi
:
15986 icode
= CODE_FOR_vector_altivec_store_v16qi
;
15988 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi
:
15989 icode
= CODE_FOR_vector_altivec_store_v8hi
;
15991 case ALTIVEC_BUILTIN_ST_INTERNAL_4si
:
15992 icode
= CODE_FOR_vector_altivec_store_v4si
;
15994 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf
:
15995 icode
= CODE_FOR_vector_altivec_store_v4sf
;
15997 case ALTIVEC_BUILTIN_ST_INTERNAL_2df
:
15998 icode
= CODE_FOR_vector_altivec_store_v2df
;
16000 case ALTIVEC_BUILTIN_ST_INTERNAL_2di
:
16001 icode
= CODE_FOR_vector_altivec_store_v2di
;
16003 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti
:
16004 icode
= CODE_FOR_vector_altivec_store_v1ti
;
16007 *expandedp
= false;
16011 arg0
= CALL_EXPR_ARG (exp
, 0);
16012 arg1
= CALL_EXPR_ARG (exp
, 1);
16013 op0
= expand_normal (arg0
);
16014 op1
= expand_normal (arg1
);
16015 mode0
= insn_data
[icode
].operand
[0].mode
;
16016 mode1
= insn_data
[icode
].operand
[1].mode
;
16018 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
16019 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
16020 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
16021 op1
= copy_to_mode_reg (mode1
, op1
);
16023 pat
= GEN_FCN (icode
) (op0
, op1
);
16031 /* Expand the dst builtins. */
16033 altivec_expand_dst_builtin (tree exp
, rtx target ATTRIBUTE_UNUSED
,
16036 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
16037 enum rs6000_builtins fcode
= (enum rs6000_builtins
) DECL_FUNCTION_CODE (fndecl
);
16038 tree arg0
, arg1
, arg2
;
16039 machine_mode mode0
, mode1
;
16040 rtx pat
, op0
, op1
, op2
;
16041 const struct builtin_description
*d
;
16044 *expandedp
= false;
16046 /* Handle DST variants. */
16048 for (i
= 0; i
< ARRAY_SIZE (bdesc_dst
); i
++, d
++)
16049 if (d
->code
== fcode
)
16051 arg0
= CALL_EXPR_ARG (exp
, 0);
16052 arg1
= CALL_EXPR_ARG (exp
, 1);
16053 arg2
= CALL_EXPR_ARG (exp
, 2);
16054 op0
= expand_normal (arg0
);
16055 op1
= expand_normal (arg1
);
16056 op2
= expand_normal (arg2
);
16057 mode0
= insn_data
[d
->icode
].operand
[0].mode
;
16058 mode1
= insn_data
[d
->icode
].operand
[1].mode
;
16060 /* Invalid arguments, bail out before generating bad rtl. */
16061 if (arg0
== error_mark_node
16062 || arg1
== error_mark_node
16063 || arg2
== error_mark_node
)
16068 if (TREE_CODE (arg2
) != INTEGER_CST
16069 || TREE_INT_CST_LOW (arg2
) & ~0x3)
16071 error ("argument to %qs must be a 2-bit unsigned literal", d
->name
);
16075 if (! (*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
16076 op0
= copy_to_mode_reg (Pmode
, op0
);
16077 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
16078 op1
= copy_to_mode_reg (mode1
, op1
);
16080 pat
= GEN_FCN (d
->icode
) (op0
, op1
, op2
);
16090 /* Expand vec_init builtin. */
16092 altivec_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
16094 machine_mode tmode
= TYPE_MODE (type
);
16095 machine_mode inner_mode
= GET_MODE_INNER (tmode
);
16096 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
16098 gcc_assert (VECTOR_MODE_P (tmode
));
16099 gcc_assert (n_elt
== call_expr_nargs (exp
));
16101 if (!target
|| !register_operand (target
, tmode
))
16102 target
= gen_reg_rtx (tmode
);
16104 /* If we have a vector compromised of a single element, such as V1TImode, do
16105 the initialization directly. */
16106 if (n_elt
== 1 && GET_MODE_SIZE (tmode
) == GET_MODE_SIZE (inner_mode
))
16108 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, 0));
16109 emit_move_insn (target
, gen_lowpart (tmode
, x
));
16113 rtvec v
= rtvec_alloc (n_elt
);
16115 for (i
= 0; i
< n_elt
; ++i
)
16117 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
16118 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
16121 rs6000_expand_vector_init (target
, gen_rtx_PARALLEL (tmode
, v
));
16127 /* Return the integer constant in ARG. Constrain it to be in the range
16128 of the subparts of VEC_TYPE; issue an error if not. */
16131 get_element_number (tree vec_type
, tree arg
)
16133 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
16135 if (!tree_fits_uhwi_p (arg
)
16136 || (elt
= tree_to_uhwi (arg
), elt
> max
))
16138 error ("selector must be an integer constant in the range 0..%wi", max
);
16145 /* Expand vec_set builtin. */
16147 altivec_expand_vec_set_builtin (tree exp
)
16149 machine_mode tmode
, mode1
;
16150 tree arg0
, arg1
, arg2
;
16154 arg0
= CALL_EXPR_ARG (exp
, 0);
16155 arg1
= CALL_EXPR_ARG (exp
, 1);
16156 arg2
= CALL_EXPR_ARG (exp
, 2);
16158 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
16159 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
16160 gcc_assert (VECTOR_MODE_P (tmode
));
16162 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
16163 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
16164 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
16166 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
16167 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
16169 op0
= force_reg (tmode
, op0
);
16170 op1
= force_reg (mode1
, op1
);
16172 rs6000_expand_vector_set (op0
, op1
, elt
);
16177 /* Expand vec_ext builtin. */
16179 altivec_expand_vec_ext_builtin (tree exp
, rtx target
)
16181 machine_mode tmode
, mode0
;
16186 arg0
= CALL_EXPR_ARG (exp
, 0);
16187 arg1
= CALL_EXPR_ARG (exp
, 1);
16189 op0
= expand_normal (arg0
);
16190 op1
= expand_normal (arg1
);
16192 /* Call get_element_number to validate arg1 if it is a constant. */
16193 if (TREE_CODE (arg1
) == INTEGER_CST
)
16194 (void) get_element_number (TREE_TYPE (arg0
), arg1
);
16196 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
16197 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
16198 gcc_assert (VECTOR_MODE_P (mode0
));
16200 op0
= force_reg (mode0
, op0
);
16202 if (optimize
|| !target
|| !register_operand (target
, tmode
))
16203 target
= gen_reg_rtx (tmode
);
16205 rs6000_expand_vector_extract (target
, op0
, op1
);
16210 /* Expand the builtin in EXP and store the result in TARGET. Store
16211 true in *EXPANDEDP if we found a builtin to expand. */
16213 altivec_expand_builtin (tree exp
, rtx target
, bool *expandedp
)
16215 const struct builtin_description
*d
;
16217 enum insn_code icode
;
16218 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
16219 tree arg0
, arg1
, arg2
;
16221 machine_mode tmode
, mode0
;
16222 enum rs6000_builtins fcode
16223 = (enum rs6000_builtins
) DECL_FUNCTION_CODE (fndecl
);
16225 if (rs6000_overloaded_builtin_p (fcode
))
16228 error ("unresolved overload for Altivec builtin %qF", fndecl
);
16230 /* Given it is invalid, just generate a normal call. */
16231 return expand_call (exp
, target
, false);
16234 target
= altivec_expand_ld_builtin (exp
, target
, expandedp
);
16238 target
= altivec_expand_st_builtin (exp
, target
, expandedp
);
16242 target
= altivec_expand_dst_builtin (exp
, target
, expandedp
);
16250 case ALTIVEC_BUILTIN_STVX_V2DF
:
16251 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df_2op
, exp
);
16252 case ALTIVEC_BUILTIN_STVX_V2DI
:
16253 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di_2op
, exp
);
16254 case ALTIVEC_BUILTIN_STVX_V4SF
:
16255 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf_2op
, exp
);
16256 case ALTIVEC_BUILTIN_STVX
:
16257 case ALTIVEC_BUILTIN_STVX_V4SI
:
16258 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si_2op
, exp
);
16259 case ALTIVEC_BUILTIN_STVX_V8HI
:
16260 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi_2op
, exp
);
16261 case ALTIVEC_BUILTIN_STVX_V16QI
:
16262 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi_2op
, exp
);
16263 case ALTIVEC_BUILTIN_STVEBX
:
16264 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx
, exp
);
16265 case ALTIVEC_BUILTIN_STVEHX
:
16266 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx
, exp
);
16267 case ALTIVEC_BUILTIN_STVEWX
:
16268 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx
, exp
);
16269 case ALTIVEC_BUILTIN_STVXL_V2DF
:
16270 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df
, exp
);
16271 case ALTIVEC_BUILTIN_STVXL_V2DI
:
16272 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di
, exp
);
16273 case ALTIVEC_BUILTIN_STVXL_V4SF
:
16274 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf
, exp
);
16275 case ALTIVEC_BUILTIN_STVXL
:
16276 case ALTIVEC_BUILTIN_STVXL_V4SI
:
16277 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si
, exp
);
16278 case ALTIVEC_BUILTIN_STVXL_V8HI
:
16279 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi
, exp
);
16280 case ALTIVEC_BUILTIN_STVXL_V16QI
:
16281 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi
, exp
);
16283 case ALTIVEC_BUILTIN_STVLX
:
16284 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx
, exp
);
16285 case ALTIVEC_BUILTIN_STVLXL
:
16286 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl
, exp
);
16287 case ALTIVEC_BUILTIN_STVRX
:
16288 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx
, exp
);
16289 case ALTIVEC_BUILTIN_STVRXL
:
16290 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl
, exp
);
16292 case P9V_BUILTIN_STXVL
:
16293 return altivec_expand_stxvl_builtin (CODE_FOR_stxvl
, exp
);
16295 case VSX_BUILTIN_STXVD2X_V1TI
:
16296 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti
, exp
);
16297 case VSX_BUILTIN_STXVD2X_V2DF
:
16298 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df
, exp
);
16299 case VSX_BUILTIN_STXVD2X_V2DI
:
16300 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di
, exp
);
16301 case VSX_BUILTIN_STXVW4X_V4SF
:
16302 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf
, exp
);
16303 case VSX_BUILTIN_STXVW4X_V4SI
:
16304 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si
, exp
);
16305 case VSX_BUILTIN_STXVW4X_V8HI
:
16306 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi
, exp
);
16307 case VSX_BUILTIN_STXVW4X_V16QI
:
16308 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi
, exp
);
16310 /* For the following on big endian, it's ok to use any appropriate
16311 unaligned-supporting store, so use a generic expander. For
16312 little-endian, the exact element-reversing instruction must
16314 case VSX_BUILTIN_ST_ELEMREV_V2DF
:
16316 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_store_v2df
16317 : CODE_FOR_vsx_st_elemrev_v2df
);
16318 return altivec_expand_stv_builtin (code
, exp
);
16320 case VSX_BUILTIN_ST_ELEMREV_V2DI
:
16322 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_store_v2di
16323 : CODE_FOR_vsx_st_elemrev_v2di
);
16324 return altivec_expand_stv_builtin (code
, exp
);
16326 case VSX_BUILTIN_ST_ELEMREV_V4SF
:
16328 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_store_v4sf
16329 : CODE_FOR_vsx_st_elemrev_v4sf
);
16330 return altivec_expand_stv_builtin (code
, exp
);
16332 case VSX_BUILTIN_ST_ELEMREV_V4SI
:
16334 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_store_v4si
16335 : CODE_FOR_vsx_st_elemrev_v4si
);
16336 return altivec_expand_stv_builtin (code
, exp
);
16338 case VSX_BUILTIN_ST_ELEMREV_V8HI
:
16340 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_store_v8hi
16341 : CODE_FOR_vsx_st_elemrev_v8hi
);
16342 return altivec_expand_stv_builtin (code
, exp
);
16344 case VSX_BUILTIN_ST_ELEMREV_V16QI
:
16346 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_store_v16qi
16347 : CODE_FOR_vsx_st_elemrev_v16qi
);
16348 return altivec_expand_stv_builtin (code
, exp
);
16351 case ALTIVEC_BUILTIN_MFVSCR
:
16352 icode
= CODE_FOR_altivec_mfvscr
;
16353 tmode
= insn_data
[icode
].operand
[0].mode
;
16356 || GET_MODE (target
) != tmode
16357 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
16358 target
= gen_reg_rtx (tmode
);
16360 pat
= GEN_FCN (icode
) (target
);
16366 case ALTIVEC_BUILTIN_MTVSCR
:
16367 icode
= CODE_FOR_altivec_mtvscr
;
16368 arg0
= CALL_EXPR_ARG (exp
, 0);
16369 op0
= expand_normal (arg0
);
16370 mode0
= insn_data
[icode
].operand
[0].mode
;
16372 /* If we got invalid arguments bail out before generating bad rtl. */
16373 if (arg0
== error_mark_node
)
16376 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
16377 op0
= copy_to_mode_reg (mode0
, op0
);
16379 pat
= GEN_FCN (icode
) (op0
);
16384 case ALTIVEC_BUILTIN_DSSALL
:
16385 emit_insn (gen_altivec_dssall ());
16388 case ALTIVEC_BUILTIN_DSS
:
16389 icode
= CODE_FOR_altivec_dss
;
16390 arg0
= CALL_EXPR_ARG (exp
, 0);
16392 op0
= expand_normal (arg0
);
16393 mode0
= insn_data
[icode
].operand
[0].mode
;
16395 /* If we got invalid arguments bail out before generating bad rtl. */
16396 if (arg0
== error_mark_node
)
16399 if (TREE_CODE (arg0
) != INTEGER_CST
16400 || TREE_INT_CST_LOW (arg0
) & ~0x3)
16402 error ("argument to dss must be a 2-bit unsigned literal");
16406 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
16407 op0
= copy_to_mode_reg (mode0
, op0
);
16409 emit_insn (gen_altivec_dss (op0
));
16412 case ALTIVEC_BUILTIN_VEC_INIT_V4SI
:
16413 case ALTIVEC_BUILTIN_VEC_INIT_V8HI
:
16414 case ALTIVEC_BUILTIN_VEC_INIT_V16QI
:
16415 case ALTIVEC_BUILTIN_VEC_INIT_V4SF
:
16416 case VSX_BUILTIN_VEC_INIT_V2DF
:
16417 case VSX_BUILTIN_VEC_INIT_V2DI
:
16418 case VSX_BUILTIN_VEC_INIT_V1TI
:
16419 return altivec_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
16421 case ALTIVEC_BUILTIN_VEC_SET_V4SI
:
16422 case ALTIVEC_BUILTIN_VEC_SET_V8HI
:
16423 case ALTIVEC_BUILTIN_VEC_SET_V16QI
:
16424 case ALTIVEC_BUILTIN_VEC_SET_V4SF
:
16425 case VSX_BUILTIN_VEC_SET_V2DF
:
16426 case VSX_BUILTIN_VEC_SET_V2DI
:
16427 case VSX_BUILTIN_VEC_SET_V1TI
:
16428 return altivec_expand_vec_set_builtin (exp
);
16430 case ALTIVEC_BUILTIN_VEC_EXT_V4SI
:
16431 case ALTIVEC_BUILTIN_VEC_EXT_V8HI
:
16432 case ALTIVEC_BUILTIN_VEC_EXT_V16QI
:
16433 case ALTIVEC_BUILTIN_VEC_EXT_V4SF
:
16434 case VSX_BUILTIN_VEC_EXT_V2DF
:
16435 case VSX_BUILTIN_VEC_EXT_V2DI
:
16436 case VSX_BUILTIN_VEC_EXT_V1TI
:
16437 return altivec_expand_vec_ext_builtin (exp
, target
);
16439 case P9V_BUILTIN_VEXTRACT4B
:
16440 case P9V_BUILTIN_VEC_VEXTRACT4B
:
16441 arg1
= CALL_EXPR_ARG (exp
, 1);
16444 /* Generate a normal call if it is invalid. */
16445 if (arg1
== error_mark_node
)
16446 return expand_call (exp
, target
, false);
16448 if (TREE_CODE (arg1
) != INTEGER_CST
|| TREE_INT_CST_LOW (arg1
) > 12)
16450 error ("second argument to vec_vextract4b must be 0..12");
16451 return expand_call (exp
, target
, false);
16455 case P9V_BUILTIN_VINSERT4B
:
16456 case P9V_BUILTIN_VINSERT4B_DI
:
16457 case P9V_BUILTIN_VEC_VINSERT4B
:
16458 arg2
= CALL_EXPR_ARG (exp
, 2);
16461 /* Generate a normal call if it is invalid. */
16462 if (arg2
== error_mark_node
)
16463 return expand_call (exp
, target
, false);
16465 if (TREE_CODE (arg2
) != INTEGER_CST
|| TREE_INT_CST_LOW (arg2
) > 12)
16467 error ("third argument to vec_vinsert4b must be 0..12");
16468 return expand_call (exp
, target
, false);
16474 /* Fall through. */
16477 /* Expand abs* operations. */
16479 for (i
= 0; i
< ARRAY_SIZE (bdesc_abs
); i
++, d
++)
16480 if (d
->code
== fcode
)
16481 return altivec_expand_abs_builtin (d
->icode
, exp
, target
);
16483 /* Expand the AltiVec predicates. */
16484 d
= bdesc_altivec_preds
;
16485 for (i
= 0; i
< ARRAY_SIZE (bdesc_altivec_preds
); i
++, d
++)
16486 if (d
->code
== fcode
)
16487 return altivec_expand_predicate_builtin (d
->icode
, exp
, target
);
16489 /* LV* are funky. We initialized them differently. */
16492 case ALTIVEC_BUILTIN_LVSL
:
16493 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl
,
16494 exp
, target
, false);
16495 case ALTIVEC_BUILTIN_LVSR
:
16496 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr
,
16497 exp
, target
, false);
16498 case ALTIVEC_BUILTIN_LVEBX
:
16499 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx
,
16500 exp
, target
, false);
16501 case ALTIVEC_BUILTIN_LVEHX
:
16502 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx
,
16503 exp
, target
, false);
16504 case ALTIVEC_BUILTIN_LVEWX
:
16505 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx
,
16506 exp
, target
, false);
16507 case ALTIVEC_BUILTIN_LVXL_V2DF
:
16508 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df
,
16509 exp
, target
, false);
16510 case ALTIVEC_BUILTIN_LVXL_V2DI
:
16511 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di
,
16512 exp
, target
, false);
16513 case ALTIVEC_BUILTIN_LVXL_V4SF
:
16514 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf
,
16515 exp
, target
, false);
16516 case ALTIVEC_BUILTIN_LVXL
:
16517 case ALTIVEC_BUILTIN_LVXL_V4SI
:
16518 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si
,
16519 exp
, target
, false);
16520 case ALTIVEC_BUILTIN_LVXL_V8HI
:
16521 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi
,
16522 exp
, target
, false);
16523 case ALTIVEC_BUILTIN_LVXL_V16QI
:
16524 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi
,
16525 exp
, target
, false);
16526 case ALTIVEC_BUILTIN_LVX_V2DF
:
16527 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df_2op
,
16528 exp
, target
, false);
16529 case ALTIVEC_BUILTIN_LVX_V2DI
:
16530 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di_2op
,
16531 exp
, target
, false);
16532 case ALTIVEC_BUILTIN_LVX_V4SF
:
16533 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf_2op
,
16534 exp
, target
, false);
16535 case ALTIVEC_BUILTIN_LVX
:
16536 case ALTIVEC_BUILTIN_LVX_V4SI
:
16537 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si_2op
,
16538 exp
, target
, false);
16539 case ALTIVEC_BUILTIN_LVX_V8HI
:
16540 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi_2op
,
16541 exp
, target
, false);
16542 case ALTIVEC_BUILTIN_LVX_V16QI
:
16543 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi_2op
,
16544 exp
, target
, false);
16545 case ALTIVEC_BUILTIN_LVLX
:
16546 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx
,
16547 exp
, target
, true);
16548 case ALTIVEC_BUILTIN_LVLXL
:
16549 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl
,
16550 exp
, target
, true);
16551 case ALTIVEC_BUILTIN_LVRX
:
16552 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx
,
16553 exp
, target
, true);
16554 case ALTIVEC_BUILTIN_LVRXL
:
16555 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl
,
16556 exp
, target
, true);
16557 case VSX_BUILTIN_LXVD2X_V1TI
:
16558 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti
,
16559 exp
, target
, false);
16560 case VSX_BUILTIN_LXVD2X_V2DF
:
16561 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df
,
16562 exp
, target
, false);
16563 case VSX_BUILTIN_LXVD2X_V2DI
:
16564 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di
,
16565 exp
, target
, false);
16566 case VSX_BUILTIN_LXVW4X_V4SF
:
16567 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf
,
16568 exp
, target
, false);
16569 case VSX_BUILTIN_LXVW4X_V4SI
:
16570 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si
,
16571 exp
, target
, false);
16572 case VSX_BUILTIN_LXVW4X_V8HI
:
16573 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi
,
16574 exp
, target
, false);
16575 case VSX_BUILTIN_LXVW4X_V16QI
:
16576 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi
,
16577 exp
, target
, false);
16578 /* For the following on big endian, it's ok to use any appropriate
16579 unaligned-supporting load, so use a generic expander. For
16580 little-endian, the exact element-reversing instruction must
16582 case VSX_BUILTIN_LD_ELEMREV_V2DF
:
16584 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_load_v2df
16585 : CODE_FOR_vsx_ld_elemrev_v2df
);
16586 return altivec_expand_lv_builtin (code
, exp
, target
, false);
16588 case VSX_BUILTIN_LD_ELEMREV_V2DI
:
16590 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_load_v2di
16591 : CODE_FOR_vsx_ld_elemrev_v2di
);
16592 return altivec_expand_lv_builtin (code
, exp
, target
, false);
16594 case VSX_BUILTIN_LD_ELEMREV_V4SF
:
16596 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_load_v4sf
16597 : CODE_FOR_vsx_ld_elemrev_v4sf
);
16598 return altivec_expand_lv_builtin (code
, exp
, target
, false);
16600 case VSX_BUILTIN_LD_ELEMREV_V4SI
:
16602 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_load_v4si
16603 : CODE_FOR_vsx_ld_elemrev_v4si
);
16604 return altivec_expand_lv_builtin (code
, exp
, target
, false);
16606 case VSX_BUILTIN_LD_ELEMREV_V8HI
:
16608 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_load_v8hi
16609 : CODE_FOR_vsx_ld_elemrev_v8hi
);
16610 return altivec_expand_lv_builtin (code
, exp
, target
, false);
16612 case VSX_BUILTIN_LD_ELEMREV_V16QI
:
16614 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_load_v16qi
16615 : CODE_FOR_vsx_ld_elemrev_v16qi
);
16616 return altivec_expand_lv_builtin (code
, exp
, target
, false);
16621 /* Fall through. */
16624 *expandedp
= false;
16628 /* Expand the builtin in EXP and store the result in TARGET. Store
16629 true in *EXPANDEDP if we found a builtin to expand. */
16631 paired_expand_builtin (tree exp
, rtx target
, bool * expandedp
)
16633 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
16634 enum rs6000_builtins fcode
= (enum rs6000_builtins
) DECL_FUNCTION_CODE (fndecl
);
16635 const struct builtin_description
*d
;
16642 case PAIRED_BUILTIN_STX
:
16643 return paired_expand_stv_builtin (CODE_FOR_paired_stx
, exp
);
16644 case PAIRED_BUILTIN_LX
:
16645 return paired_expand_lv_builtin (CODE_FOR_paired_lx
, exp
, target
);
16648 /* Fall through. */
16651 /* Expand the paired predicates. */
16652 d
= bdesc_paired_preds
;
16653 for (i
= 0; i
< ARRAY_SIZE (bdesc_paired_preds
); i
++, d
++)
16654 if (d
->code
== fcode
)
16655 return paired_expand_predicate_builtin (d
->icode
, exp
, target
);
16657 *expandedp
= false;
16661 /* Binops that need to be initialized manually, but can be expanded
16662 automagically by rs6000_expand_binop_builtin. */
16663 static const struct builtin_description bdesc_2arg_spe
[] =
16665 { RS6000_BTM_SPE
, CODE_FOR_spe_evlddx
, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX
},
16666 { RS6000_BTM_SPE
, CODE_FOR_spe_evldwx
, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX
},
16667 { RS6000_BTM_SPE
, CODE_FOR_spe_evldhx
, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX
},
16668 { RS6000_BTM_SPE
, CODE_FOR_spe_evlwhex
, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX
},
16669 { RS6000_BTM_SPE
, CODE_FOR_spe_evlwhoux
, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX
},
16670 { RS6000_BTM_SPE
, CODE_FOR_spe_evlwhosx
, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX
},
16671 { RS6000_BTM_SPE
, CODE_FOR_spe_evlwwsplatx
, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX
},
16672 { RS6000_BTM_SPE
, CODE_FOR_spe_evlwhsplatx
, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX
},
16673 { RS6000_BTM_SPE
, CODE_FOR_spe_evlhhesplatx
, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX
},
16674 { RS6000_BTM_SPE
, CODE_FOR_spe_evlhhousplatx
, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX
},
16675 { RS6000_BTM_SPE
, CODE_FOR_spe_evlhhossplatx
, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX
},
16676 { RS6000_BTM_SPE
, CODE_FOR_spe_evldd
, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD
},
16677 { RS6000_BTM_SPE
, CODE_FOR_spe_evldw
, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW
},
16678 { RS6000_BTM_SPE
, CODE_FOR_spe_evldh
, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH
},
16679 { RS6000_BTM_SPE
, CODE_FOR_spe_evlwhe
, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE
},
16680 { RS6000_BTM_SPE
, CODE_FOR_spe_evlwhou
, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU
},
16681 { RS6000_BTM_SPE
, CODE_FOR_spe_evlwhos
, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS
},
16682 { RS6000_BTM_SPE
, CODE_FOR_spe_evlwwsplat
, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT
},
16683 { RS6000_BTM_SPE
, CODE_FOR_spe_evlwhsplat
, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT
},
16684 { RS6000_BTM_SPE
, CODE_FOR_spe_evlhhesplat
, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT
},
16685 { RS6000_BTM_SPE
, CODE_FOR_spe_evlhhousplat
, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT
},
16686 { RS6000_BTM_SPE
, CODE_FOR_spe_evlhhossplat
, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT
}
16689 /* Expand the builtin in EXP and store the result in TARGET. Store
16690 true in *EXPANDEDP if we found a builtin to expand.
16692 This expands the SPE builtins that are not simple unary and binary
16695 spe_expand_builtin (tree exp
, rtx target
, bool *expandedp
)
16697 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
16699 enum rs6000_builtins fcode
= (enum rs6000_builtins
) DECL_FUNCTION_CODE (fndecl
);
16700 enum insn_code icode
;
16701 machine_mode tmode
, mode0
;
16703 const struct builtin_description
*d
;
16708 /* Syntax check for a 5-bit unsigned immediate. */
16711 case SPE_BUILTIN_EVSTDD
:
16712 case SPE_BUILTIN_EVSTDH
:
16713 case SPE_BUILTIN_EVSTDW
:
16714 case SPE_BUILTIN_EVSTWHE
:
16715 case SPE_BUILTIN_EVSTWHO
:
16716 case SPE_BUILTIN_EVSTWWE
:
16717 case SPE_BUILTIN_EVSTWWO
:
16718 arg1
= CALL_EXPR_ARG (exp
, 2);
16719 if (TREE_CODE (arg1
) != INTEGER_CST
16720 || TREE_INT_CST_LOW (arg1
) & ~0x1f)
16722 error ("argument 2 must be a 5-bit unsigned literal");
16730 /* The evsplat*i instructions are not quite generic. */
16733 case SPE_BUILTIN_EVSPLATFI
:
16734 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi
,
16736 case SPE_BUILTIN_EVSPLATI
:
16737 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati
,
16743 d
= bdesc_2arg_spe
;
16744 for (i
= 0; i
< ARRAY_SIZE (bdesc_2arg_spe
); ++i
, ++d
)
16745 if (d
->code
== fcode
)
16746 return rs6000_expand_binop_builtin (d
->icode
, exp
, target
);
16748 d
= bdesc_spe_predicates
;
16749 for (i
= 0; i
< ARRAY_SIZE (bdesc_spe_predicates
); ++i
, ++d
)
16750 if (d
->code
== fcode
)
16751 return spe_expand_predicate_builtin (d
->icode
, exp
, target
);
16753 d
= bdesc_spe_evsel
;
16754 for (i
= 0; i
< ARRAY_SIZE (bdesc_spe_evsel
); ++i
, ++d
)
16755 if (d
->code
== fcode
)
16756 return spe_expand_evsel_builtin (d
->icode
, exp
, target
);
16760 case SPE_BUILTIN_EVSTDDX
:
16761 return spe_expand_stv_builtin (CODE_FOR_spe_evstddx
, exp
);
16762 case SPE_BUILTIN_EVSTDHX
:
16763 return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx
, exp
);
16764 case SPE_BUILTIN_EVSTDWX
:
16765 return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx
, exp
);
16766 case SPE_BUILTIN_EVSTWHEX
:
16767 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex
, exp
);
16768 case SPE_BUILTIN_EVSTWHOX
:
16769 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox
, exp
);
16770 case SPE_BUILTIN_EVSTWWEX
:
16771 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex
, exp
);
16772 case SPE_BUILTIN_EVSTWWOX
:
16773 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox
, exp
);
16774 case SPE_BUILTIN_EVSTDD
:
16775 return spe_expand_stv_builtin (CODE_FOR_spe_evstdd
, exp
);
16776 case SPE_BUILTIN_EVSTDH
:
16777 return spe_expand_stv_builtin (CODE_FOR_spe_evstdh
, exp
);
16778 case SPE_BUILTIN_EVSTDW
:
16779 return spe_expand_stv_builtin (CODE_FOR_spe_evstdw
, exp
);
16780 case SPE_BUILTIN_EVSTWHE
:
16781 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe
, exp
);
16782 case SPE_BUILTIN_EVSTWHO
:
16783 return spe_expand_stv_builtin (CODE_FOR_spe_evstwho
, exp
);
16784 case SPE_BUILTIN_EVSTWWE
:
16785 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe
, exp
);
16786 case SPE_BUILTIN_EVSTWWO
:
16787 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo
, exp
);
16788 case SPE_BUILTIN_MFSPEFSCR
:
16789 icode
= CODE_FOR_spe_mfspefscr
;
16790 tmode
= insn_data
[icode
].operand
[0].mode
;
16793 || GET_MODE (target
) != tmode
16794 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
16795 target
= gen_reg_rtx (tmode
);
16797 pat
= GEN_FCN (icode
) (target
);
16802 case SPE_BUILTIN_MTSPEFSCR
:
16803 icode
= CODE_FOR_spe_mtspefscr
;
16804 arg0
= CALL_EXPR_ARG (exp
, 0);
16805 op0
= expand_normal (arg0
);
16806 mode0
= insn_data
[icode
].operand
[0].mode
;
16808 if (arg0
== error_mark_node
)
16811 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
16812 op0
= copy_to_mode_reg (mode0
, op0
);
16814 pat
= GEN_FCN (icode
) (op0
);
16822 *expandedp
= false;
16827 paired_expand_predicate_builtin (enum insn_code icode
, tree exp
, rtx target
)
16829 rtx pat
, scratch
, tmp
;
16830 tree form
= CALL_EXPR_ARG (exp
, 0);
16831 tree arg0
= CALL_EXPR_ARG (exp
, 1);
16832 tree arg1
= CALL_EXPR_ARG (exp
, 2);
16833 rtx op0
= expand_normal (arg0
);
16834 rtx op1
= expand_normal (arg1
);
16835 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
16836 machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
16838 enum rtx_code code
;
16840 if (TREE_CODE (form
) != INTEGER_CST
)
16842 error ("argument 1 of __builtin_paired_predicate must be a constant");
16846 form_int
= TREE_INT_CST_LOW (form
);
16848 gcc_assert (mode0
== mode1
);
16850 if (arg0
== error_mark_node
|| arg1
== error_mark_node
)
16854 || GET_MODE (target
) != SImode
16855 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, SImode
))
16856 target
= gen_reg_rtx (SImode
);
16857 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
16858 op0
= copy_to_mode_reg (mode0
, op0
);
16859 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
16860 op1
= copy_to_mode_reg (mode1
, op1
);
16862 scratch
= gen_reg_rtx (CCFPmode
);
16864 pat
= GEN_FCN (icode
) (scratch
, op0
, op1
);
16886 emit_insn (gen_move_from_CR_ov_bit (target
, scratch
));
16889 error ("argument 1 of __builtin_paired_predicate is out of range");
16893 tmp
= gen_rtx_fmt_ee (code
, SImode
, scratch
, const0_rtx
);
16894 emit_move_insn (target
, tmp
);
16899 spe_expand_predicate_builtin (enum insn_code icode
, tree exp
, rtx target
)
16901 rtx pat
, scratch
, tmp
;
16902 tree form
= CALL_EXPR_ARG (exp
, 0);
16903 tree arg0
= CALL_EXPR_ARG (exp
, 1);
16904 tree arg1
= CALL_EXPR_ARG (exp
, 2);
16905 rtx op0
= expand_normal (arg0
);
16906 rtx op1
= expand_normal (arg1
);
16907 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
16908 machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
16910 enum rtx_code code
;
16912 if (TREE_CODE (form
) != INTEGER_CST
)
16914 error ("argument 1 of __builtin_spe_predicate must be a constant");
16918 form_int
= TREE_INT_CST_LOW (form
);
16920 gcc_assert (mode0
== mode1
);
16922 if (arg0
== error_mark_node
|| arg1
== error_mark_node
)
16926 || GET_MODE (target
) != SImode
16927 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, SImode
))
16928 target
= gen_reg_rtx (SImode
);
16930 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
16931 op0
= copy_to_mode_reg (mode0
, op0
);
16932 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
16933 op1
= copy_to_mode_reg (mode1
, op1
);
16935 scratch
= gen_reg_rtx (CCmode
);
16937 pat
= GEN_FCN (icode
) (scratch
, op0
, op1
);
16942 /* There are 4 variants for each predicate: _any_, _all_, _upper_,
16943 _lower_. We use one compare, but look in different bits of the
16944 CR for each variant.
16946 There are 2 elements in each SPE simd type (upper/lower). The CR
16947 bits are set as follows:
16949 BIT0 | BIT 1 | BIT 2 | BIT 3
16950 U | L | (U | L) | (U & L)
16952 So, for an "all" relationship, BIT 3 would be set.
16953 For an "any" relationship, BIT 2 would be set. Etc.
16955 Following traditional nomenclature, these bits map to:
16957 BIT0 | BIT 1 | BIT 2 | BIT 3
16960 Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
16965 /* All variant. OV bit. */
16967 /* We need to get to the OV bit, which is the ORDERED bit. We
16968 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
16969 that's ugly and will make validate_condition_mode die.
16970 So let's just use another pattern. */
16971 emit_insn (gen_move_from_CR_ov_bit (target
, scratch
));
16973 /* Any variant. EQ bit. */
16977 /* Upper variant. LT bit. */
16981 /* Lower variant. GT bit. */
16986 error ("argument 1 of __builtin_spe_predicate is out of range");
16990 tmp
= gen_rtx_fmt_ee (code
, SImode
, scratch
, const0_rtx
);
16991 emit_move_insn (target
, tmp
);
16996 /* The evsel builtins look like this:
16998 e = __builtin_spe_evsel_OP (a, b, c, d);
17000 and work like this:
17002 e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
17003 e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
17007 spe_expand_evsel_builtin (enum insn_code icode
, tree exp
, rtx target
)
17010 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17011 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17012 tree arg2
= CALL_EXPR_ARG (exp
, 2);
17013 tree arg3
= CALL_EXPR_ARG (exp
, 3);
17014 rtx op0
= expand_normal (arg0
);
17015 rtx op1
= expand_normal (arg1
);
17016 rtx op2
= expand_normal (arg2
);
17017 rtx op3
= expand_normal (arg3
);
17018 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17019 machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
17021 gcc_assert (mode0
== mode1
);
17023 if (arg0
== error_mark_node
|| arg1
== error_mark_node
17024 || arg2
== error_mark_node
|| arg3
== error_mark_node
)
17028 || GET_MODE (target
) != mode0
17029 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, mode0
))
17030 target
= gen_reg_rtx (mode0
);
17032 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17033 op0
= copy_to_mode_reg (mode0
, op0
);
17034 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
17035 op1
= copy_to_mode_reg (mode0
, op1
);
17036 if (! (*insn_data
[icode
].operand
[1].predicate
) (op2
, mode1
))
17037 op2
= copy_to_mode_reg (mode0
, op2
);
17038 if (! (*insn_data
[icode
].operand
[1].predicate
) (op3
, mode1
))
17039 op3
= copy_to_mode_reg (mode0
, op3
);
17041 /* Generate the compare. */
17042 scratch
= gen_reg_rtx (CCmode
);
17043 pat
= GEN_FCN (icode
) (scratch
, op0
, op1
);
17048 if (mode0
== V2SImode
)
17049 emit_insn (gen_spe_evsel (target
, op2
, op3
, scratch
));
17051 emit_insn (gen_spe_evsel_fs (target
, op2
, op3
, scratch
));
17056 /* Raise an error message for a builtin function that is called without the
17057 appropriate target options being set. */
17060 rs6000_invalid_builtin (enum rs6000_builtins fncode
)
17062 size_t uns_fncode
= (size_t)fncode
;
17063 const char *name
= rs6000_builtin_info
[uns_fncode
].name
;
17064 HOST_WIDE_INT fnmask
= rs6000_builtin_info
[uns_fncode
].mask
;
17066 gcc_assert (name
!= NULL
);
17067 if ((fnmask
& RS6000_BTM_CELL
) != 0)
17068 error ("Builtin function %s is only valid for the cell processor", name
);
17069 else if ((fnmask
& RS6000_BTM_VSX
) != 0)
17070 error ("Builtin function %s requires the -mvsx option", name
);
17071 else if ((fnmask
& RS6000_BTM_HTM
) != 0)
17072 error ("Builtin function %s requires the -mhtm option", name
);
17073 else if ((fnmask
& RS6000_BTM_ALTIVEC
) != 0)
17074 error ("Builtin function %s requires the -maltivec option", name
);
17075 else if ((fnmask
& RS6000_BTM_PAIRED
) != 0)
17076 error ("Builtin function %s requires the -mpaired option", name
);
17077 else if ((fnmask
& RS6000_BTM_SPE
) != 0)
17078 error ("Builtin function %s requires the -mspe option", name
);
17079 else if ((fnmask
& (RS6000_BTM_DFP
| RS6000_BTM_P8_VECTOR
))
17080 == (RS6000_BTM_DFP
| RS6000_BTM_P8_VECTOR
))
17081 error ("Builtin function %s requires the -mhard-dfp and"
17082 " -mpower8-vector options", name
);
17083 else if ((fnmask
& RS6000_BTM_DFP
) != 0)
17084 error ("Builtin function %s requires the -mhard-dfp option", name
);
17085 else if ((fnmask
& RS6000_BTM_P8_VECTOR
) != 0)
17086 error ("Builtin function %s requires the -mpower8-vector option", name
);
17087 else if ((fnmask
& (RS6000_BTM_P9_VECTOR
| RS6000_BTM_64BIT
))
17088 == (RS6000_BTM_P9_VECTOR
| RS6000_BTM_64BIT
))
17089 error ("Builtin function %s requires the -mcpu=power9 and"
17090 " -m64 options", name
);
17091 else if ((fnmask
& RS6000_BTM_P9_VECTOR
) != 0)
17092 error ("Builtin function %s requires the -mcpu=power9 option", name
);
17093 else if ((fnmask
& (RS6000_BTM_P9_MISC
| RS6000_BTM_64BIT
))
17094 == (RS6000_BTM_P9_MISC
| RS6000_BTM_64BIT
))
17095 error ("Builtin function %s requires the -mcpu=power9 and"
17096 " -m64 options", name
);
17097 else if ((fnmask
& RS6000_BTM_P9_MISC
) == RS6000_BTM_P9_MISC
)
17098 error ("Builtin function %s requires the -mcpu=power9 option", name
);
17099 else if ((fnmask
& (RS6000_BTM_HARD_FLOAT
| RS6000_BTM_LDBL128
))
17100 == (RS6000_BTM_HARD_FLOAT
| RS6000_BTM_LDBL128
))
17101 error ("Builtin function %s requires the -mhard-float and"
17102 " -mlong-double-128 options", name
);
17103 else if ((fnmask
& RS6000_BTM_HARD_FLOAT
) != 0)
17104 error ("Builtin function %s requires the -mhard-float option", name
);
17105 else if ((fnmask
& RS6000_BTM_FLOAT128
) != 0)
17106 error ("Builtin function %s requires the -mfloat128 option", name
);
17108 error ("Builtin function %s is not supported with the current options",
17112 /* Target hook for early folding of built-ins, shamelessly stolen
17116 rs6000_fold_builtin (tree fndecl
, int n_args ATTRIBUTE_UNUSED
,
17117 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
17119 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
17121 enum rs6000_builtins fn_code
17122 = (enum rs6000_builtins
) DECL_FUNCTION_CODE (fndecl
);
17125 case RS6000_BUILTIN_NANQ
:
17126 case RS6000_BUILTIN_NANSQ
:
17128 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
17129 const char *str
= c_getstr (*args
);
17130 int quiet
= fn_code
== RS6000_BUILTIN_NANQ
;
17131 REAL_VALUE_TYPE real
;
17133 if (str
&& real_nan (&real
, str
, quiet
, TYPE_MODE (type
)))
17134 return build_real (type
, real
);
17137 case RS6000_BUILTIN_INFQ
:
17138 case RS6000_BUILTIN_HUGE_VALQ
:
17140 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
17141 REAL_VALUE_TYPE inf
;
17143 return build_real (type
, inf
);
17149 #ifdef SUBTARGET_FOLD_BUILTIN
17150 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
17156 /* Fold a machine-dependent built-in in GIMPLE. (For folding into
17157 a constant, use rs6000_fold_builtin.) */
17160 rs6000_gimple_fold_builtin (gimple_stmt_iterator
*gsi
)
17162 gimple
*stmt
= gsi_stmt (*gsi
);
17163 tree fndecl
= gimple_call_fndecl (stmt
);
17164 gcc_checking_assert (fndecl
&& DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
);
17165 enum rs6000_builtins fn_code
17166 = (enum rs6000_builtins
) DECL_FUNCTION_CODE (fndecl
);
17167 tree arg0
, arg1
, lhs
;
17171 /* Flavors of vec_add. We deliberately don't expand
17172 P8V_BUILTIN_VADDUQM as it gets lowered from V1TImode to
17173 TImode, resulting in much poorer code generation. */
17174 case ALTIVEC_BUILTIN_VADDUBM
:
17175 case ALTIVEC_BUILTIN_VADDUHM
:
17176 case ALTIVEC_BUILTIN_VADDUWM
:
17177 case P8V_BUILTIN_VADDUDM
:
17178 case ALTIVEC_BUILTIN_VADDFP
:
17179 case VSX_BUILTIN_XVADDDP
:
17181 arg0
= gimple_call_arg (stmt
, 0);
17182 arg1
= gimple_call_arg (stmt
, 1);
17183 lhs
= gimple_call_lhs (stmt
);
17184 gimple
*g
= gimple_build_assign (lhs
, PLUS_EXPR
, arg0
, arg1
);
17185 gimple_set_location (g
, gimple_location (stmt
));
17186 gsi_replace (gsi
, g
, true);
17189 /* Flavors of vec_sub. We deliberately don't expand
17190 P8V_BUILTIN_VSUBUQM. */
17191 case ALTIVEC_BUILTIN_VSUBUBM
:
17192 case ALTIVEC_BUILTIN_VSUBUHM
:
17193 case ALTIVEC_BUILTIN_VSUBUWM
:
17194 case P8V_BUILTIN_VSUBUDM
:
17195 case ALTIVEC_BUILTIN_VSUBFP
:
17196 case VSX_BUILTIN_XVSUBDP
:
17198 arg0
= gimple_call_arg (stmt
, 0);
17199 arg1
= gimple_call_arg (stmt
, 1);
17200 lhs
= gimple_call_lhs (stmt
);
17201 gimple
*g
= gimple_build_assign (lhs
, MINUS_EXPR
, arg0
, arg1
);
17202 gimple_set_location (g
, gimple_location (stmt
));
17203 gsi_replace (gsi
, g
, true);
17206 case VSX_BUILTIN_XVMULSP
:
17207 case VSX_BUILTIN_XVMULDP
:
17209 arg0
= gimple_call_arg (stmt
, 0);
17210 arg1
= gimple_call_arg (stmt
, 1);
17211 lhs
= gimple_call_lhs (stmt
);
17212 gimple
*g
= gimple_build_assign (lhs
, MULT_EXPR
, arg0
, arg1
);
17213 gimple_set_location (g
, gimple_location (stmt
));
17214 gsi_replace (gsi
, g
, true);
17217 /* Even element flavors of vec_mul (signed). */
17218 case ALTIVEC_BUILTIN_VMULESB
:
17219 case ALTIVEC_BUILTIN_VMULESH
:
17220 /* Even element flavors of vec_mul (unsigned). */
17221 case ALTIVEC_BUILTIN_VMULEUB
:
17222 case ALTIVEC_BUILTIN_VMULEUH
:
17224 arg0
= gimple_call_arg (stmt
, 0);
17225 arg1
= gimple_call_arg (stmt
, 1);
17226 lhs
= gimple_call_lhs (stmt
);
17227 gimple
*g
= gimple_build_assign (lhs
, VEC_WIDEN_MULT_EVEN_EXPR
, arg0
, arg1
);
17228 gimple_set_location (g
, gimple_location (stmt
));
17229 gsi_replace (gsi
, g
, true);
17232 /* Odd element flavors of vec_mul (signed). */
17233 case ALTIVEC_BUILTIN_VMULOSB
:
17234 case ALTIVEC_BUILTIN_VMULOSH
:
17235 /* Odd element flavors of vec_mul (unsigned). */
17236 case ALTIVEC_BUILTIN_VMULOUB
:
17237 case ALTIVEC_BUILTIN_VMULOUH
:
17239 arg0
= gimple_call_arg (stmt
, 0);
17240 arg1
= gimple_call_arg (stmt
, 1);
17241 lhs
= gimple_call_lhs (stmt
);
17242 gimple
*g
= gimple_build_assign (lhs
, VEC_WIDEN_MULT_ODD_EXPR
, arg0
, arg1
);
17243 gimple_set_location (g
, gimple_location (stmt
));
17244 gsi_replace (gsi
, g
, true);
17247 /* Flavors of vec_div (Integer). */
17248 case VSX_BUILTIN_DIV_V2DI
:
17249 case VSX_BUILTIN_UDIV_V2DI
:
17251 arg0
= gimple_call_arg (stmt
, 0);
17252 arg1
= gimple_call_arg (stmt
, 1);
17253 lhs
= gimple_call_lhs (stmt
);
17254 gimple
*g
= gimple_build_assign (lhs
, TRUNC_DIV_EXPR
, arg0
, arg1
);
17255 gimple_set_location (g
, gimple_location (stmt
));
17256 gsi_replace (gsi
, g
, true);
17259 /* Flavors of vec_div (Float). */
17260 case VSX_BUILTIN_XVDIVSP
:
17261 case VSX_BUILTIN_XVDIVDP
:
17263 arg0
= gimple_call_arg (stmt
, 0);
17264 arg1
= gimple_call_arg (stmt
, 1);
17265 lhs
= gimple_call_lhs (stmt
);
17266 gimple
*g
= gimple_build_assign (lhs
, RDIV_EXPR
, arg0
, arg1
);
17267 gimple_set_location (g
, gimple_location (stmt
));
17268 gsi_replace (gsi
, g
, true);
17271 /* Flavors of vec_and. */
17272 case ALTIVEC_BUILTIN_VAND
:
17274 arg0
= gimple_call_arg (stmt
, 0);
17275 arg1
= gimple_call_arg (stmt
, 1);
17276 lhs
= gimple_call_lhs (stmt
);
17277 gimple
*g
= gimple_build_assign (lhs
, BIT_AND_EXPR
, arg0
, arg1
);
17278 gimple_set_location (g
, gimple_location (stmt
));
17279 gsi_replace (gsi
, g
, true);
17282 /* Flavors of vec_andc. */
17283 case ALTIVEC_BUILTIN_VANDC
:
17285 arg0
= gimple_call_arg (stmt
, 0);
17286 arg1
= gimple_call_arg (stmt
, 1);
17287 lhs
= gimple_call_lhs (stmt
);
17288 tree temp
= create_tmp_reg_or_ssa_name (TREE_TYPE (arg1
));
17289 gimple
*g
= gimple_build_assign(temp
, BIT_NOT_EXPR
, arg1
);
17290 gimple_set_location (g
, gimple_location (stmt
));
17291 gsi_insert_before(gsi
, g
, GSI_SAME_STMT
);
17292 g
= gimple_build_assign (lhs
, BIT_AND_EXPR
, arg0
, temp
);
17293 gimple_set_location (g
, gimple_location (stmt
));
17294 gsi_replace (gsi
, g
, true);
17297 /* Flavors of vec_nand. */
17298 case P8V_BUILTIN_VEC_NAND
:
17299 case P8V_BUILTIN_NAND_V16QI
:
17300 case P8V_BUILTIN_NAND_V8HI
:
17301 case P8V_BUILTIN_NAND_V4SI
:
17302 case P8V_BUILTIN_NAND_V4SF
:
17303 case P8V_BUILTIN_NAND_V2DF
:
17304 case P8V_BUILTIN_NAND_V2DI
:
17306 arg0
= gimple_call_arg (stmt
, 0);
17307 arg1
= gimple_call_arg (stmt
, 1);
17308 lhs
= gimple_call_lhs (stmt
);
17309 tree temp
= create_tmp_reg_or_ssa_name (TREE_TYPE (arg1
));
17310 gimple
*g
= gimple_build_assign(temp
, BIT_AND_EXPR
, arg0
, arg1
);
17311 gimple_set_location (g
, gimple_location (stmt
));
17312 gsi_insert_before(gsi
, g
, GSI_SAME_STMT
);
17313 g
= gimple_build_assign (lhs
, BIT_NOT_EXPR
, temp
);
17314 gimple_set_location (g
, gimple_location (stmt
));
17315 gsi_replace (gsi
, g
, true);
17318 /* Flavors of vec_or. */
17319 case ALTIVEC_BUILTIN_VOR
:
17321 arg0
= gimple_call_arg (stmt
, 0);
17322 arg1
= gimple_call_arg (stmt
, 1);
17323 lhs
= gimple_call_lhs (stmt
);
17324 gimple
*g
= gimple_build_assign (lhs
, BIT_IOR_EXPR
, arg0
, arg1
);
17325 gimple_set_location (g
, gimple_location (stmt
));
17326 gsi_replace (gsi
, g
, true);
17329 /* flavors of vec_orc. */
17330 case P8V_BUILTIN_ORC_V16QI
:
17331 case P8V_BUILTIN_ORC_V8HI
:
17332 case P8V_BUILTIN_ORC_V4SI
:
17333 case P8V_BUILTIN_ORC_V4SF
:
17334 case P8V_BUILTIN_ORC_V2DF
:
17335 case P8V_BUILTIN_ORC_V2DI
:
17337 arg0
= gimple_call_arg (stmt
, 0);
17338 arg1
= gimple_call_arg (stmt
, 1);
17339 lhs
= gimple_call_lhs (stmt
);
17340 tree temp
= create_tmp_reg_or_ssa_name (TREE_TYPE (arg1
));
17341 gimple
*g
= gimple_build_assign(temp
, BIT_NOT_EXPR
, arg1
);
17342 gimple_set_location (g
, gimple_location (stmt
));
17343 gsi_insert_before(gsi
, g
, GSI_SAME_STMT
);
17344 g
= gimple_build_assign (lhs
, BIT_IOR_EXPR
, arg0
, temp
);
17345 gimple_set_location (g
, gimple_location (stmt
));
17346 gsi_replace (gsi
, g
, true);
17349 /* Flavors of vec_xor. */
17350 case ALTIVEC_BUILTIN_VXOR
:
17352 arg0
= gimple_call_arg (stmt
, 0);
17353 arg1
= gimple_call_arg (stmt
, 1);
17354 lhs
= gimple_call_lhs (stmt
);
17355 gimple
*g
= gimple_build_assign (lhs
, BIT_XOR_EXPR
, arg0
, arg1
);
17356 gimple_set_location (g
, gimple_location (stmt
));
17357 gsi_replace (gsi
, g
, true);
17360 /* Flavors of vec_nor. */
17361 case ALTIVEC_BUILTIN_VNOR
:
17363 arg0
= gimple_call_arg (stmt
, 0);
17364 arg1
= gimple_call_arg (stmt
, 1);
17365 lhs
= gimple_call_lhs (stmt
);
17366 tree temp
= create_tmp_reg_or_ssa_name (TREE_TYPE (arg1
));
17367 gimple
*g
= gimple_build_assign (temp
, BIT_IOR_EXPR
, arg0
, arg1
);
17368 gimple_set_location (g
, gimple_location (stmt
));
17369 gsi_insert_before(gsi
, g
, GSI_SAME_STMT
);
17370 g
= gimple_build_assign (lhs
, BIT_NOT_EXPR
, temp
);
17371 gimple_set_location (g
, gimple_location (stmt
));
17372 gsi_replace (gsi
, g
, true);
17382 /* Expand an expression EXP that calls a built-in function,
17383 with result going to TARGET if that's convenient
17384 (and in mode MODE if that's convenient).
17385 SUBTARGET may be used as the target for computing one of EXP's operands.
17386 IGNORE is nonzero if the value is to be ignored. */
17389 rs6000_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
17390 machine_mode mode ATTRIBUTE_UNUSED
,
17391 int ignore ATTRIBUTE_UNUSED
)
17393 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
17394 enum rs6000_builtins fcode
17395 = (enum rs6000_builtins
)DECL_FUNCTION_CODE (fndecl
);
17396 size_t uns_fcode
= (size_t)fcode
;
17397 const struct builtin_description
*d
;
17401 HOST_WIDE_INT mask
= rs6000_builtin_info
[uns_fcode
].mask
;
17402 bool func_valid_p
= ((rs6000_builtin_mask
& mask
) == mask
);
17404 if (TARGET_DEBUG_BUILTIN
)
17406 enum insn_code icode
= rs6000_builtin_info
[uns_fcode
].icode
;
17407 const char *name1
= rs6000_builtin_info
[uns_fcode
].name
;
17408 const char *name2
= ((icode
!= CODE_FOR_nothing
)
17409 ? get_insn_name ((int)icode
)
17413 switch (rs6000_builtin_info
[uns_fcode
].attr
& RS6000_BTC_TYPE_MASK
)
17415 default: name3
= "unknown"; break;
17416 case RS6000_BTC_SPECIAL
: name3
= "special"; break;
17417 case RS6000_BTC_UNARY
: name3
= "unary"; break;
17418 case RS6000_BTC_BINARY
: name3
= "binary"; break;
17419 case RS6000_BTC_TERNARY
: name3
= "ternary"; break;
17420 case RS6000_BTC_PREDICATE
: name3
= "predicate"; break;
17421 case RS6000_BTC_ABS
: name3
= "abs"; break;
17422 case RS6000_BTC_EVSEL
: name3
= "evsel"; break;
17423 case RS6000_BTC_DST
: name3
= "dst"; break;
17428 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
17429 (name1
) ? name1
: "---", fcode
,
17430 (name2
) ? name2
: "---", (int)icode
,
17432 func_valid_p
? "" : ", not valid");
17437 rs6000_invalid_builtin (fcode
);
17439 /* Given it is invalid, just generate a normal call. */
17440 return expand_call (exp
, target
, ignore
);
17445 case RS6000_BUILTIN_RECIP
:
17446 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3
, exp
, target
);
17448 case RS6000_BUILTIN_RECIPF
:
17449 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3
, exp
, target
);
17451 case RS6000_BUILTIN_RSQRTF
:
17452 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2
, exp
, target
);
17454 case RS6000_BUILTIN_RSQRT
:
17455 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2
, exp
, target
);
17457 case POWER7_BUILTIN_BPERMD
:
17458 return rs6000_expand_binop_builtin (((TARGET_64BIT
)
17459 ? CODE_FOR_bpermd_di
17460 : CODE_FOR_bpermd_si
), exp
, target
);
17462 case RS6000_BUILTIN_GET_TB
:
17463 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase
,
17466 case RS6000_BUILTIN_MFTB
:
17467 return rs6000_expand_zeroop_builtin (((TARGET_64BIT
)
17468 ? CODE_FOR_rs6000_mftb_di
17469 : CODE_FOR_rs6000_mftb_si
),
17472 case RS6000_BUILTIN_MFFS
:
17473 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs
, target
);
17475 case RS6000_BUILTIN_MTFSF
:
17476 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf
, exp
);
17478 case RS6000_BUILTIN_CPU_INIT
:
17479 case RS6000_BUILTIN_CPU_IS
:
17480 case RS6000_BUILTIN_CPU_SUPPORTS
:
17481 return cpu_expand_builtin (fcode
, exp
, target
);
17483 case ALTIVEC_BUILTIN_MASK_FOR_LOAD
:
17484 case ALTIVEC_BUILTIN_MASK_FOR_STORE
:
17486 int icode
= (BYTES_BIG_ENDIAN
? (int) CODE_FOR_altivec_lvsr_direct
17487 : (int) CODE_FOR_altivec_lvsl_direct
);
17488 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17489 machine_mode mode
= insn_data
[icode
].operand
[1].mode
;
17493 gcc_assert (TARGET_ALTIVEC
);
17495 arg
= CALL_EXPR_ARG (exp
, 0);
17496 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg
)));
17497 op
= expand_expr (arg
, NULL_RTX
, Pmode
, EXPAND_NORMAL
);
17498 addr
= memory_address (mode
, op
);
17499 if (fcode
== ALTIVEC_BUILTIN_MASK_FOR_STORE
)
17503 /* For the load case need to negate the address. */
17504 op
= gen_reg_rtx (GET_MODE (addr
));
17505 emit_insn (gen_rtx_SET (op
, gen_rtx_NEG (GET_MODE (addr
), addr
)));
17507 op
= gen_rtx_MEM (mode
, op
);
17510 || GET_MODE (target
) != tmode
17511 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17512 target
= gen_reg_rtx (tmode
);
17514 pat
= GEN_FCN (icode
) (target
, op
);
17522 case ALTIVEC_BUILTIN_VCFUX
:
17523 case ALTIVEC_BUILTIN_VCFSX
:
17524 case ALTIVEC_BUILTIN_VCTUXS
:
17525 case ALTIVEC_BUILTIN_VCTSXS
:
17526 /* FIXME: There's got to be a nicer way to handle this case than
17527 constructing a new CALL_EXPR. */
17528 if (call_expr_nargs (exp
) == 1)
17530 exp
= build_call_nary (TREE_TYPE (exp
), CALL_EXPR_FN (exp
),
17531 2, CALL_EXPR_ARG (exp
, 0), integer_zero_node
);
17539 if (TARGET_ALTIVEC
)
17541 ret
= altivec_expand_builtin (exp
, target
, &success
);
17548 ret
= spe_expand_builtin (exp
, target
, &success
);
17553 if (TARGET_PAIRED_FLOAT
)
17555 ret
= paired_expand_builtin (exp
, target
, &success
);
17562 ret
= htm_expand_builtin (exp
, target
, &success
);
17568 unsigned attr
= rs6000_builtin_info
[uns_fcode
].attr
& RS6000_BTC_TYPE_MASK
;
17569 /* RS6000_BTC_SPECIAL represents no-operand operators. */
17570 gcc_assert (attr
== RS6000_BTC_UNARY
17571 || attr
== RS6000_BTC_BINARY
17572 || attr
== RS6000_BTC_TERNARY
17573 || attr
== RS6000_BTC_SPECIAL
);
17575 /* Handle simple unary operations. */
17577 for (i
= 0; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
17578 if (d
->code
== fcode
)
17579 return rs6000_expand_unop_builtin (d
->icode
, exp
, target
);
17581 /* Handle simple binary operations. */
17583 for (i
= 0; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
17584 if (d
->code
== fcode
)
17585 return rs6000_expand_binop_builtin (d
->icode
, exp
, target
);
17587 /* Handle simple ternary operations. */
17589 for (i
= 0; i
< ARRAY_SIZE (bdesc_3arg
); i
++, d
++)
17590 if (d
->code
== fcode
)
17591 return rs6000_expand_ternop_builtin (d
->icode
, exp
, target
);
17593 /* Handle simple no-argument operations. */
17595 for (i
= 0; i
< ARRAY_SIZE (bdesc_0arg
); i
++, d
++)
17596 if (d
->code
== fcode
)
17597 return rs6000_expand_zeroop_builtin (d
->icode
, target
);
17599 gcc_unreachable ();
17602 /* Create a builtin vector type with a name. Taking care not to give
17603 the canonical type a name. */
17606 rs6000_vector_type (const char *name
, tree elt_type
, unsigned num_elts
)
17608 tree result
= build_vector_type (elt_type
, num_elts
);
17610 /* Copy so we don't give the canonical type a name. */
17611 result
= build_variant_type_copy (result
);
17613 add_builtin_type (name
, result
);
17619 rs6000_init_builtins (void)
17625 if (TARGET_DEBUG_BUILTIN
)
17626 fprintf (stderr
, "rs6000_init_builtins%s%s%s%s\n",
17627 (TARGET_PAIRED_FLOAT
) ? ", paired" : "",
17628 (TARGET_SPE
) ? ", spe" : "",
17629 (TARGET_ALTIVEC
) ? ", altivec" : "",
17630 (TARGET_VSX
) ? ", vsx" : "");
17632 V2SI_type_node
= build_vector_type (intSI_type_node
, 2);
17633 V2SF_type_node
= build_vector_type (float_type_node
, 2);
17634 V2DI_type_node
= rs6000_vector_type (TARGET_POWERPC64
? "__vector long"
17635 : "__vector long long",
17636 intDI_type_node
, 2);
17637 V2DF_type_node
= rs6000_vector_type ("__vector double", double_type_node
, 2);
17638 V4HI_type_node
= build_vector_type (intHI_type_node
, 4);
17639 V4SI_type_node
= rs6000_vector_type ("__vector signed int",
17640 intSI_type_node
, 4);
17641 V4SF_type_node
= rs6000_vector_type ("__vector float", float_type_node
, 4);
17642 V8HI_type_node
= rs6000_vector_type ("__vector signed short",
17643 intHI_type_node
, 8);
17644 V16QI_type_node
= rs6000_vector_type ("__vector signed char",
17645 intQI_type_node
, 16);
17647 unsigned_V16QI_type_node
= rs6000_vector_type ("__vector unsigned char",
17648 unsigned_intQI_type_node
, 16);
17649 unsigned_V8HI_type_node
= rs6000_vector_type ("__vector unsigned short",
17650 unsigned_intHI_type_node
, 8);
17651 unsigned_V4SI_type_node
= rs6000_vector_type ("__vector unsigned int",
17652 unsigned_intSI_type_node
, 4);
17653 unsigned_V2DI_type_node
= rs6000_vector_type (TARGET_POWERPC64
17654 ? "__vector unsigned long"
17655 : "__vector unsigned long long",
17656 unsigned_intDI_type_node
, 2);
17658 opaque_V2SF_type_node
= build_opaque_vector_type (float_type_node
, 2);
17659 opaque_V2SI_type_node
= build_opaque_vector_type (intSI_type_node
, 2);
17660 opaque_p_V2SI_type_node
= build_pointer_type (opaque_V2SI_type_node
);
17661 opaque_V4SI_type_node
= build_opaque_vector_type (intSI_type_node
, 4);
17663 const_str_type_node
17664 = build_pointer_type (build_qualified_type (char_type_node
,
17667 /* We use V1TI mode as a special container to hold __int128_t items that
17668 must live in VSX registers. */
17669 if (intTI_type_node
)
17671 V1TI_type_node
= rs6000_vector_type ("__vector __int128",
17672 intTI_type_node
, 1);
17673 unsigned_V1TI_type_node
17674 = rs6000_vector_type ("__vector unsigned __int128",
17675 unsigned_intTI_type_node
, 1);
17678 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
17679 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
17680 'vector unsigned short'. */
17682 bool_char_type_node
= build_distinct_type_copy (unsigned_intQI_type_node
);
17683 bool_short_type_node
= build_distinct_type_copy (unsigned_intHI_type_node
);
17684 bool_int_type_node
= build_distinct_type_copy (unsigned_intSI_type_node
);
17685 bool_long_type_node
= build_distinct_type_copy (unsigned_intDI_type_node
);
17686 pixel_type_node
= build_distinct_type_copy (unsigned_intHI_type_node
);
17688 long_integer_type_internal_node
= long_integer_type_node
;
17689 long_unsigned_type_internal_node
= long_unsigned_type_node
;
17690 long_long_integer_type_internal_node
= long_long_integer_type_node
;
17691 long_long_unsigned_type_internal_node
= long_long_unsigned_type_node
;
17692 intQI_type_internal_node
= intQI_type_node
;
17693 uintQI_type_internal_node
= unsigned_intQI_type_node
;
17694 intHI_type_internal_node
= intHI_type_node
;
17695 uintHI_type_internal_node
= unsigned_intHI_type_node
;
17696 intSI_type_internal_node
= intSI_type_node
;
17697 uintSI_type_internal_node
= unsigned_intSI_type_node
;
17698 intDI_type_internal_node
= intDI_type_node
;
17699 uintDI_type_internal_node
= unsigned_intDI_type_node
;
17700 intTI_type_internal_node
= intTI_type_node
;
17701 uintTI_type_internal_node
= unsigned_intTI_type_node
;
17702 float_type_internal_node
= float_type_node
;
17703 double_type_internal_node
= double_type_node
;
17704 long_double_type_internal_node
= long_double_type_node
;
17705 dfloat64_type_internal_node
= dfloat64_type_node
;
17706 dfloat128_type_internal_node
= dfloat128_type_node
;
17707 void_type_internal_node
= void_type_node
;
17709 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
17710 IFmode is the IBM extended 128-bit format that is a pair of doubles.
17711 TFmode will be either IEEE 128-bit floating point or the IBM double-double
17712 format that uses a pair of doubles, depending on the switches and
17715 We do not enable the actual __float128 keyword unless the user explicitly
17716 asks for it, because the library support is not yet complete.
17718 If we don't support for either 128-bit IBM double double or IEEE 128-bit
17719 floating point, we need make sure the type is non-zero or else self-test
17720 fails during bootstrap.
17722 We don't register a built-in type for __ibm128 if the type is the same as
17723 long double. Instead we add a #define for __ibm128 in
17724 rs6000_cpu_cpp_builtins to long double. */
17725 if (TARGET_LONG_DOUBLE_128
&& FLOAT128_IEEE_P (TFmode
))
17727 ibm128_float_type_node
= make_node (REAL_TYPE
);
17728 TYPE_PRECISION (ibm128_float_type_node
) = 128;
17729 SET_TYPE_MODE (ibm128_float_type_node
, IFmode
);
17730 layout_type (ibm128_float_type_node
);
17732 lang_hooks
.types
.register_builtin_type (ibm128_float_type_node
,
17736 ibm128_float_type_node
= long_double_type_node
;
17738 if (TARGET_FLOAT128_KEYWORD
)
17740 ieee128_float_type_node
= float128_type_node
;
17741 lang_hooks
.types
.register_builtin_type (ieee128_float_type_node
,
17745 else if (TARGET_FLOAT128_TYPE
)
17747 ieee128_float_type_node
= make_node (REAL_TYPE
);
17748 TYPE_PRECISION (ibm128_float_type_node
) = 128;
17749 SET_TYPE_MODE (ieee128_float_type_node
, KFmode
);
17750 layout_type (ieee128_float_type_node
);
17752 /* If we are not exporting the __float128/_Float128 keywords, we need a
17753 keyword to get the types created. Use __ieee128 as the dummy
17755 lang_hooks
.types
.register_builtin_type (ieee128_float_type_node
,
17760 ieee128_float_type_node
= long_double_type_node
;
17762 /* Initialize the modes for builtin_function_type, mapping a machine mode to
17764 builtin_mode_to_type
[QImode
][0] = integer_type_node
;
17765 builtin_mode_to_type
[HImode
][0] = integer_type_node
;
17766 builtin_mode_to_type
[SImode
][0] = intSI_type_node
;
17767 builtin_mode_to_type
[SImode
][1] = unsigned_intSI_type_node
;
17768 builtin_mode_to_type
[DImode
][0] = intDI_type_node
;
17769 builtin_mode_to_type
[DImode
][1] = unsigned_intDI_type_node
;
17770 builtin_mode_to_type
[TImode
][0] = intTI_type_node
;
17771 builtin_mode_to_type
[TImode
][1] = unsigned_intTI_type_node
;
17772 builtin_mode_to_type
[SFmode
][0] = float_type_node
;
17773 builtin_mode_to_type
[DFmode
][0] = double_type_node
;
17774 builtin_mode_to_type
[IFmode
][0] = ibm128_float_type_node
;
17775 builtin_mode_to_type
[KFmode
][0] = ieee128_float_type_node
;
17776 builtin_mode_to_type
[TFmode
][0] = long_double_type_node
;
17777 builtin_mode_to_type
[DDmode
][0] = dfloat64_type_node
;
17778 builtin_mode_to_type
[TDmode
][0] = dfloat128_type_node
;
17779 builtin_mode_to_type
[V1TImode
][0] = V1TI_type_node
;
17780 builtin_mode_to_type
[V1TImode
][1] = unsigned_V1TI_type_node
;
17781 builtin_mode_to_type
[V2SImode
][0] = V2SI_type_node
;
17782 builtin_mode_to_type
[V2SFmode
][0] = V2SF_type_node
;
17783 builtin_mode_to_type
[V2DImode
][0] = V2DI_type_node
;
17784 builtin_mode_to_type
[V2DImode
][1] = unsigned_V2DI_type_node
;
17785 builtin_mode_to_type
[V2DFmode
][0] = V2DF_type_node
;
17786 builtin_mode_to_type
[V4HImode
][0] = V4HI_type_node
;
17787 builtin_mode_to_type
[V4SImode
][0] = V4SI_type_node
;
17788 builtin_mode_to_type
[V4SImode
][1] = unsigned_V4SI_type_node
;
17789 builtin_mode_to_type
[V4SFmode
][0] = V4SF_type_node
;
17790 builtin_mode_to_type
[V8HImode
][0] = V8HI_type_node
;
17791 builtin_mode_to_type
[V8HImode
][1] = unsigned_V8HI_type_node
;
17792 builtin_mode_to_type
[V16QImode
][0] = V16QI_type_node
;
17793 builtin_mode_to_type
[V16QImode
][1] = unsigned_V16QI_type_node
;
17795 tdecl
= add_builtin_type ("__bool char", bool_char_type_node
);
17796 TYPE_NAME (bool_char_type_node
) = tdecl
;
17798 tdecl
= add_builtin_type ("__bool short", bool_short_type_node
);
17799 TYPE_NAME (bool_short_type_node
) = tdecl
;
17801 tdecl
= add_builtin_type ("__bool int", bool_int_type_node
);
17802 TYPE_NAME (bool_int_type_node
) = tdecl
;
17804 tdecl
= add_builtin_type ("__pixel", pixel_type_node
);
17805 TYPE_NAME (pixel_type_node
) = tdecl
;
17807 bool_V16QI_type_node
= rs6000_vector_type ("__vector __bool char",
17808 bool_char_type_node
, 16);
17809 bool_V8HI_type_node
= rs6000_vector_type ("__vector __bool short",
17810 bool_short_type_node
, 8);
17811 bool_V4SI_type_node
= rs6000_vector_type ("__vector __bool int",
17812 bool_int_type_node
, 4);
17813 bool_V2DI_type_node
= rs6000_vector_type (TARGET_POWERPC64
17814 ? "__vector __bool long"
17815 : "__vector __bool long long",
17816 bool_long_type_node
, 2);
17817 pixel_V8HI_type_node
= rs6000_vector_type ("__vector __pixel",
17818 pixel_type_node
, 8);
17820 /* Paired and SPE builtins are only available if you build a compiler with
17821 the appropriate options, so only create those builtins with the
17822 appropriate compiler option. Create Altivec and VSX builtins on machines
17823 with at least the general purpose extensions (970 and newer) to allow the
17824 use of the target attribute. */
17825 if (TARGET_PAIRED_FLOAT
)
17826 paired_init_builtins ();
17828 spe_init_builtins ();
17829 if (TARGET_EXTRA_BUILTINS
)
17830 altivec_init_builtins ();
17832 htm_init_builtins ();
17834 if (TARGET_EXTRA_BUILTINS
|| TARGET_SPE
|| TARGET_PAIRED_FLOAT
)
17835 rs6000_common_init_builtins ();
17837 ftype
= build_function_type_list (ieee128_float_type_node
,
17838 const_str_type_node
, NULL_TREE
);
17839 def_builtin ("__builtin_nanq", ftype
, RS6000_BUILTIN_NANQ
);
17840 def_builtin ("__builtin_nansq", ftype
, RS6000_BUILTIN_NANSQ
);
17842 ftype
= build_function_type_list (ieee128_float_type_node
, NULL_TREE
);
17843 def_builtin ("__builtin_infq", ftype
, RS6000_BUILTIN_INFQ
);
17844 def_builtin ("__builtin_huge_valq", ftype
, RS6000_BUILTIN_HUGE_VALQ
);
17846 ftype
= builtin_function_type (DFmode
, DFmode
, DFmode
, VOIDmode
,
17847 RS6000_BUILTIN_RECIP
, "__builtin_recipdiv");
17848 def_builtin ("__builtin_recipdiv", ftype
, RS6000_BUILTIN_RECIP
);
17850 ftype
= builtin_function_type (SFmode
, SFmode
, SFmode
, VOIDmode
,
17851 RS6000_BUILTIN_RECIPF
, "__builtin_recipdivf");
17852 def_builtin ("__builtin_recipdivf", ftype
, RS6000_BUILTIN_RECIPF
);
17854 ftype
= builtin_function_type (DFmode
, DFmode
, VOIDmode
, VOIDmode
,
17855 RS6000_BUILTIN_RSQRT
, "__builtin_rsqrt");
17856 def_builtin ("__builtin_rsqrt", ftype
, RS6000_BUILTIN_RSQRT
);
17858 ftype
= builtin_function_type (SFmode
, SFmode
, VOIDmode
, VOIDmode
,
17859 RS6000_BUILTIN_RSQRTF
, "__builtin_rsqrtf");
17860 def_builtin ("__builtin_rsqrtf", ftype
, RS6000_BUILTIN_RSQRTF
);
17862 mode
= (TARGET_64BIT
) ? DImode
: SImode
;
17863 ftype
= builtin_function_type (mode
, mode
, mode
, VOIDmode
,
17864 POWER7_BUILTIN_BPERMD
, "__builtin_bpermd");
17865 def_builtin ("__builtin_bpermd", ftype
, POWER7_BUILTIN_BPERMD
);
17867 ftype
= build_function_type_list (unsigned_intDI_type_node
,
17869 def_builtin ("__builtin_ppc_get_timebase", ftype
, RS6000_BUILTIN_GET_TB
);
17872 ftype
= build_function_type_list (unsigned_intDI_type_node
,
17875 ftype
= build_function_type_list (unsigned_intSI_type_node
,
17877 def_builtin ("__builtin_ppc_mftb", ftype
, RS6000_BUILTIN_MFTB
);
17879 ftype
= build_function_type_list (double_type_node
, NULL_TREE
);
17880 def_builtin ("__builtin_mffs", ftype
, RS6000_BUILTIN_MFFS
);
17882 ftype
= build_function_type_list (void_type_node
,
17883 intSI_type_node
, double_type_node
,
17885 def_builtin ("__builtin_mtfsf", ftype
, RS6000_BUILTIN_MTFSF
);
17887 ftype
= build_function_type_list (void_type_node
, NULL_TREE
);
17888 def_builtin ("__builtin_cpu_init", ftype
, RS6000_BUILTIN_CPU_INIT
);
17890 ftype
= build_function_type_list (bool_int_type_node
, const_ptr_type_node
,
17892 def_builtin ("__builtin_cpu_is", ftype
, RS6000_BUILTIN_CPU_IS
);
17893 def_builtin ("__builtin_cpu_supports", ftype
, RS6000_BUILTIN_CPU_SUPPORTS
);
17895 /* AIX libm provides clog as __clog. */
17896 if (TARGET_XCOFF
&&
17897 (tdecl
= builtin_decl_explicit (BUILT_IN_CLOG
)) != NULL_TREE
)
17898 set_user_assembler_name (tdecl
, "__clog");
17900 #ifdef SUBTARGET_INIT_BUILTINS
17901 SUBTARGET_INIT_BUILTINS
;
17905 /* Returns the rs6000 builtin decl for CODE. */
17908 rs6000_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
17910 HOST_WIDE_INT fnmask
;
17912 if (code
>= RS6000_BUILTIN_COUNT
)
17913 return error_mark_node
;
17915 fnmask
= rs6000_builtin_info
[code
].mask
;
17916 if ((fnmask
& rs6000_builtin_mask
) != fnmask
)
17918 rs6000_invalid_builtin ((enum rs6000_builtins
)code
);
17919 return error_mark_node
;
17922 return rs6000_builtin_decls
[code
];
17926 spe_init_builtins (void)
17928 tree puint_type_node
= build_pointer_type (unsigned_type_node
);
17929 tree pushort_type_node
= build_pointer_type (short_unsigned_type_node
);
17930 const struct builtin_description
*d
;
17932 HOST_WIDE_INT builtin_mask
= rs6000_builtin_mask
;
17934 tree v2si_ftype_4_v2si
17935 = build_function_type_list (opaque_V2SI_type_node
,
17936 opaque_V2SI_type_node
,
17937 opaque_V2SI_type_node
,
17938 opaque_V2SI_type_node
,
17939 opaque_V2SI_type_node
,
17942 tree v2sf_ftype_4_v2sf
17943 = build_function_type_list (opaque_V2SF_type_node
,
17944 opaque_V2SF_type_node
,
17945 opaque_V2SF_type_node
,
17946 opaque_V2SF_type_node
,
17947 opaque_V2SF_type_node
,
17950 tree int_ftype_int_v2si_v2si
17951 = build_function_type_list (integer_type_node
,
17953 opaque_V2SI_type_node
,
17954 opaque_V2SI_type_node
,
17957 tree int_ftype_int_v2sf_v2sf
17958 = build_function_type_list (integer_type_node
,
17960 opaque_V2SF_type_node
,
17961 opaque_V2SF_type_node
,
17964 tree void_ftype_v2si_puint_int
17965 = build_function_type_list (void_type_node
,
17966 opaque_V2SI_type_node
,
17971 tree void_ftype_v2si_puint_char
17972 = build_function_type_list (void_type_node
,
17973 opaque_V2SI_type_node
,
17978 tree void_ftype_v2si_pv2si_int
17979 = build_function_type_list (void_type_node
,
17980 opaque_V2SI_type_node
,
17981 opaque_p_V2SI_type_node
,
17985 tree void_ftype_v2si_pv2si_char
17986 = build_function_type_list (void_type_node
,
17987 opaque_V2SI_type_node
,
17988 opaque_p_V2SI_type_node
,
17992 tree void_ftype_int
17993 = build_function_type_list (void_type_node
, integer_type_node
, NULL_TREE
);
17995 tree int_ftype_void
17996 = build_function_type_list (integer_type_node
, NULL_TREE
);
17998 tree v2si_ftype_pv2si_int
17999 = build_function_type_list (opaque_V2SI_type_node
,
18000 opaque_p_V2SI_type_node
,
18004 tree v2si_ftype_puint_int
18005 = build_function_type_list (opaque_V2SI_type_node
,
18010 tree v2si_ftype_pushort_int
18011 = build_function_type_list (opaque_V2SI_type_node
,
18016 tree v2si_ftype_signed_char
18017 = build_function_type_list (opaque_V2SI_type_node
,
18018 signed_char_type_node
,
18021 add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node
);
18023 /* Initialize irregular SPE builtins. */
18025 def_builtin ("__builtin_spe_mtspefscr", void_ftype_int
, SPE_BUILTIN_MTSPEFSCR
);
18026 def_builtin ("__builtin_spe_mfspefscr", int_ftype_void
, SPE_BUILTIN_MFSPEFSCR
);
18027 def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int
, SPE_BUILTIN_EVSTDDX
);
18028 def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int
, SPE_BUILTIN_EVSTDHX
);
18029 def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int
, SPE_BUILTIN_EVSTDWX
);
18030 def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int
, SPE_BUILTIN_EVSTWHEX
);
18031 def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int
, SPE_BUILTIN_EVSTWHOX
);
18032 def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int
, SPE_BUILTIN_EVSTWWEX
);
18033 def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int
, SPE_BUILTIN_EVSTWWOX
);
18034 def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char
, SPE_BUILTIN_EVSTDD
);
18035 def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char
, SPE_BUILTIN_EVSTDH
);
18036 def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char
, SPE_BUILTIN_EVSTDW
);
18037 def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char
, SPE_BUILTIN_EVSTWHE
);
18038 def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char
, SPE_BUILTIN_EVSTWHO
);
18039 def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char
, SPE_BUILTIN_EVSTWWE
);
18040 def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char
, SPE_BUILTIN_EVSTWWO
);
18041 def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char
, SPE_BUILTIN_EVSPLATFI
);
18042 def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char
, SPE_BUILTIN_EVSPLATI
);
18045 def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int
, SPE_BUILTIN_EVLDDX
);
18046 def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int
, SPE_BUILTIN_EVLDWX
);
18047 def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int
, SPE_BUILTIN_EVLDHX
);
18048 def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int
, SPE_BUILTIN_EVLWHEX
);
18049 def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int
, SPE_BUILTIN_EVLWHOUX
);
18050 def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int
, SPE_BUILTIN_EVLWHOSX
);
18051 def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int
, SPE_BUILTIN_EVLWWSPLATX
);
18052 def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int
, SPE_BUILTIN_EVLWHSPLATX
);
18053 def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int
, SPE_BUILTIN_EVLHHESPLATX
);
18054 def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int
, SPE_BUILTIN_EVLHHOUSPLATX
);
18055 def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int
, SPE_BUILTIN_EVLHHOSSPLATX
);
18056 def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int
, SPE_BUILTIN_EVLDD
);
18057 def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int
, SPE_BUILTIN_EVLDW
);
18058 def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int
, SPE_BUILTIN_EVLDH
);
18059 def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int
, SPE_BUILTIN_EVLHHESPLAT
);
18060 def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int
, SPE_BUILTIN_EVLHHOSSPLAT
);
18061 def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int
, SPE_BUILTIN_EVLHHOUSPLAT
);
18062 def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int
, SPE_BUILTIN_EVLWHE
);
18063 def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int
, SPE_BUILTIN_EVLWHOS
);
18064 def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int
, SPE_BUILTIN_EVLWHOU
);
18065 def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int
, SPE_BUILTIN_EVLWHSPLAT
);
18066 def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int
, SPE_BUILTIN_EVLWWSPLAT
);
18069 d
= bdesc_spe_predicates
;
18070 for (i
= 0; i
< ARRAY_SIZE (bdesc_spe_predicates
); ++i
, d
++)
18073 HOST_WIDE_INT mask
= d
->mask
;
18075 if ((mask
& builtin_mask
) != mask
)
18077 if (TARGET_DEBUG_BUILTIN
)
18078 fprintf (stderr
, "spe_init_builtins, skip predicate %s\n",
18083 /* Cannot define builtin if the instruction is disabled. */
18084 gcc_assert (d
->icode
!= CODE_FOR_nothing
);
18085 switch (insn_data
[d
->icode
].operand
[1].mode
)
18088 type
= int_ftype_int_v2si_v2si
;
18091 type
= int_ftype_int_v2sf_v2sf
;
18094 gcc_unreachable ();
18097 def_builtin (d
->name
, type
, d
->code
);
18100 /* Evsel predicates. */
18101 d
= bdesc_spe_evsel
;
18102 for (i
= 0; i
< ARRAY_SIZE (bdesc_spe_evsel
); ++i
, d
++)
18105 HOST_WIDE_INT mask
= d
->mask
;
18107 if ((mask
& builtin_mask
) != mask
)
18109 if (TARGET_DEBUG_BUILTIN
)
18110 fprintf (stderr
, "spe_init_builtins, skip evsel %s\n",
18115 /* Cannot define builtin if the instruction is disabled. */
18116 gcc_assert (d
->icode
!= CODE_FOR_nothing
);
18117 switch (insn_data
[d
->icode
].operand
[1].mode
)
18120 type
= v2si_ftype_4_v2si
;
18123 type
= v2sf_ftype_4_v2sf
;
18126 gcc_unreachable ();
18129 def_builtin (d
->name
, type
, d
->code
);
18134 paired_init_builtins (void)
18136 const struct builtin_description
*d
;
18138 HOST_WIDE_INT builtin_mask
= rs6000_builtin_mask
;
18140 tree int_ftype_int_v2sf_v2sf
18141 = build_function_type_list (integer_type_node
,
18146 tree pcfloat_type_node
=
18147 build_pointer_type (build_qualified_type
18148 (float_type_node
, TYPE_QUAL_CONST
));
18150 tree v2sf_ftype_long_pcfloat
= build_function_type_list (V2SF_type_node
,
18151 long_integer_type_node
,
18154 tree void_ftype_v2sf_long_pcfloat
=
18155 build_function_type_list (void_type_node
,
18157 long_integer_type_node
,
18162 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat
,
18163 PAIRED_BUILTIN_LX
);
18166 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat
,
18167 PAIRED_BUILTIN_STX
);
18170 d
= bdesc_paired_preds
;
18171 for (i
= 0; i
< ARRAY_SIZE (bdesc_paired_preds
); ++i
, d
++)
18174 HOST_WIDE_INT mask
= d
->mask
;
18176 if ((mask
& builtin_mask
) != mask
)
18178 if (TARGET_DEBUG_BUILTIN
)
18179 fprintf (stderr
, "paired_init_builtins, skip predicate %s\n",
18184 /* Cannot define builtin if the instruction is disabled. */
18185 gcc_assert (d
->icode
!= CODE_FOR_nothing
);
18187 if (TARGET_DEBUG_BUILTIN
)
18188 fprintf (stderr
, "paired pred #%d, insn = %s [%d], mode = %s\n",
18189 (int)i
, get_insn_name (d
->icode
), (int)d
->icode
,
18190 GET_MODE_NAME (insn_data
[d
->icode
].operand
[1].mode
));
18192 switch (insn_data
[d
->icode
].operand
[1].mode
)
18195 type
= int_ftype_int_v2sf_v2sf
;
18198 gcc_unreachable ();
18201 def_builtin (d
->name
, type
, d
->code
);
18206 altivec_init_builtins (void)
18208 const struct builtin_description
*d
;
18212 HOST_WIDE_INT builtin_mask
= rs6000_builtin_mask
;
18214 tree pvoid_type_node
= build_pointer_type (void_type_node
);
18216 tree pcvoid_type_node
18217 = build_pointer_type (build_qualified_type (void_type_node
,
18220 tree int_ftype_opaque
18221 = build_function_type_list (integer_type_node
,
18222 opaque_V4SI_type_node
, NULL_TREE
);
18223 tree opaque_ftype_opaque
18224 = build_function_type_list (integer_type_node
, NULL_TREE
);
18225 tree opaque_ftype_opaque_int
18226 = build_function_type_list (opaque_V4SI_type_node
,
18227 opaque_V4SI_type_node
, integer_type_node
, NULL_TREE
);
18228 tree opaque_ftype_opaque_opaque_int
18229 = build_function_type_list (opaque_V4SI_type_node
,
18230 opaque_V4SI_type_node
, opaque_V4SI_type_node
,
18231 integer_type_node
, NULL_TREE
);
18232 tree opaque_ftype_opaque_opaque_opaque
18233 = build_function_type_list (opaque_V4SI_type_node
,
18234 opaque_V4SI_type_node
, opaque_V4SI_type_node
,
18235 opaque_V4SI_type_node
, NULL_TREE
);
18236 tree opaque_ftype_opaque_opaque
18237 = build_function_type_list (opaque_V4SI_type_node
,
18238 opaque_V4SI_type_node
, opaque_V4SI_type_node
,
18240 tree int_ftype_int_opaque_opaque
18241 = build_function_type_list (integer_type_node
,
18242 integer_type_node
, opaque_V4SI_type_node
,
18243 opaque_V4SI_type_node
, NULL_TREE
);
18244 tree int_ftype_int_v4si_v4si
18245 = build_function_type_list (integer_type_node
,
18246 integer_type_node
, V4SI_type_node
,
18247 V4SI_type_node
, NULL_TREE
);
18248 tree int_ftype_int_v2di_v2di
18249 = build_function_type_list (integer_type_node
,
18250 integer_type_node
, V2DI_type_node
,
18251 V2DI_type_node
, NULL_TREE
);
18252 tree void_ftype_v4si
18253 = build_function_type_list (void_type_node
, V4SI_type_node
, NULL_TREE
);
18254 tree v8hi_ftype_void
18255 = build_function_type_list (V8HI_type_node
, NULL_TREE
);
18256 tree void_ftype_void
18257 = build_function_type_list (void_type_node
, NULL_TREE
);
18258 tree void_ftype_int
18259 = build_function_type_list (void_type_node
, integer_type_node
, NULL_TREE
);
18261 tree opaque_ftype_long_pcvoid
18262 = build_function_type_list (opaque_V4SI_type_node
,
18263 long_integer_type_node
, pcvoid_type_node
,
18265 tree v16qi_ftype_long_pcvoid
18266 = build_function_type_list (V16QI_type_node
,
18267 long_integer_type_node
, pcvoid_type_node
,
18269 tree v8hi_ftype_long_pcvoid
18270 = build_function_type_list (V8HI_type_node
,
18271 long_integer_type_node
, pcvoid_type_node
,
18273 tree v4si_ftype_long_pcvoid
18274 = build_function_type_list (V4SI_type_node
,
18275 long_integer_type_node
, pcvoid_type_node
,
18277 tree v4sf_ftype_long_pcvoid
18278 = build_function_type_list (V4SF_type_node
,
18279 long_integer_type_node
, pcvoid_type_node
,
18281 tree v2df_ftype_long_pcvoid
18282 = build_function_type_list (V2DF_type_node
,
18283 long_integer_type_node
, pcvoid_type_node
,
18285 tree v2di_ftype_long_pcvoid
18286 = build_function_type_list (V2DI_type_node
,
18287 long_integer_type_node
, pcvoid_type_node
,
18290 tree void_ftype_opaque_long_pvoid
18291 = build_function_type_list (void_type_node
,
18292 opaque_V4SI_type_node
, long_integer_type_node
,
18293 pvoid_type_node
, NULL_TREE
);
18294 tree void_ftype_v4si_long_pvoid
18295 = build_function_type_list (void_type_node
,
18296 V4SI_type_node
, long_integer_type_node
,
18297 pvoid_type_node
, NULL_TREE
);
18298 tree void_ftype_v16qi_long_pvoid
18299 = build_function_type_list (void_type_node
,
18300 V16QI_type_node
, long_integer_type_node
,
18301 pvoid_type_node
, NULL_TREE
);
18303 tree void_ftype_v16qi_pvoid_long
18304 = build_function_type_list (void_type_node
,
18305 V16QI_type_node
, pvoid_type_node
,
18306 long_integer_type_node
, NULL_TREE
);
18308 tree void_ftype_v8hi_long_pvoid
18309 = build_function_type_list (void_type_node
,
18310 V8HI_type_node
, long_integer_type_node
,
18311 pvoid_type_node
, NULL_TREE
);
18312 tree void_ftype_v4sf_long_pvoid
18313 = build_function_type_list (void_type_node
,
18314 V4SF_type_node
, long_integer_type_node
,
18315 pvoid_type_node
, NULL_TREE
);
18316 tree void_ftype_v2df_long_pvoid
18317 = build_function_type_list (void_type_node
,
18318 V2DF_type_node
, long_integer_type_node
,
18319 pvoid_type_node
, NULL_TREE
);
18320 tree void_ftype_v2di_long_pvoid
18321 = build_function_type_list (void_type_node
,
18322 V2DI_type_node
, long_integer_type_node
,
18323 pvoid_type_node
, NULL_TREE
);
18324 tree int_ftype_int_v8hi_v8hi
18325 = build_function_type_list (integer_type_node
,
18326 integer_type_node
, V8HI_type_node
,
18327 V8HI_type_node
, NULL_TREE
);
18328 tree int_ftype_int_v16qi_v16qi
18329 = build_function_type_list (integer_type_node
,
18330 integer_type_node
, V16QI_type_node
,
18331 V16QI_type_node
, NULL_TREE
);
18332 tree int_ftype_int_v4sf_v4sf
18333 = build_function_type_list (integer_type_node
,
18334 integer_type_node
, V4SF_type_node
,
18335 V4SF_type_node
, NULL_TREE
);
18336 tree int_ftype_int_v2df_v2df
18337 = build_function_type_list (integer_type_node
,
18338 integer_type_node
, V2DF_type_node
,
18339 V2DF_type_node
, NULL_TREE
);
18340 tree v2di_ftype_v2di
18341 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
18342 tree v4si_ftype_v4si
18343 = build_function_type_list (V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
18344 tree v8hi_ftype_v8hi
18345 = build_function_type_list (V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
18346 tree v16qi_ftype_v16qi
18347 = build_function_type_list (V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
18348 tree v4sf_ftype_v4sf
18349 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
18350 tree v2df_ftype_v2df
18351 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
18352 tree void_ftype_pcvoid_int_int
18353 = build_function_type_list (void_type_node
,
18354 pcvoid_type_node
, integer_type_node
,
18355 integer_type_node
, NULL_TREE
);
18357 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si
, ALTIVEC_BUILTIN_MTVSCR
);
18358 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void
, ALTIVEC_BUILTIN_MFVSCR
);
18359 def_builtin ("__builtin_altivec_dssall", void_ftype_void
, ALTIVEC_BUILTIN_DSSALL
);
18360 def_builtin ("__builtin_altivec_dss", void_ftype_int
, ALTIVEC_BUILTIN_DSS
);
18361 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVSL
);
18362 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVSR
);
18363 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVEBX
);
18364 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVEHX
);
18365 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVEWX
);
18366 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVXL
);
18367 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid
,
18368 ALTIVEC_BUILTIN_LVXL_V2DF
);
18369 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid
,
18370 ALTIVEC_BUILTIN_LVXL_V2DI
);
18371 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid
,
18372 ALTIVEC_BUILTIN_LVXL_V4SF
);
18373 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid
,
18374 ALTIVEC_BUILTIN_LVXL_V4SI
);
18375 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid
,
18376 ALTIVEC_BUILTIN_LVXL_V8HI
);
18377 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid
,
18378 ALTIVEC_BUILTIN_LVXL_V16QI
);
18379 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVX
);
18380 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid
,
18381 ALTIVEC_BUILTIN_LVX_V2DF
);
18382 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid
,
18383 ALTIVEC_BUILTIN_LVX_V2DI
);
18384 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid
,
18385 ALTIVEC_BUILTIN_LVX_V4SF
);
18386 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid
,
18387 ALTIVEC_BUILTIN_LVX_V4SI
);
18388 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid
,
18389 ALTIVEC_BUILTIN_LVX_V8HI
);
18390 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid
,
18391 ALTIVEC_BUILTIN_LVX_V16QI
);
18392 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid
, ALTIVEC_BUILTIN_STVX
);
18393 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid
,
18394 ALTIVEC_BUILTIN_STVX_V2DF
);
18395 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid
,
18396 ALTIVEC_BUILTIN_STVX_V2DI
);
18397 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid
,
18398 ALTIVEC_BUILTIN_STVX_V4SF
);
18399 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid
,
18400 ALTIVEC_BUILTIN_STVX_V4SI
);
18401 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid
,
18402 ALTIVEC_BUILTIN_STVX_V8HI
);
18403 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid
,
18404 ALTIVEC_BUILTIN_STVX_V16QI
);
18405 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid
, ALTIVEC_BUILTIN_STVEWX
);
18406 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid
, ALTIVEC_BUILTIN_STVXL
);
18407 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid
,
18408 ALTIVEC_BUILTIN_STVXL_V2DF
);
18409 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid
,
18410 ALTIVEC_BUILTIN_STVXL_V2DI
);
18411 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid
,
18412 ALTIVEC_BUILTIN_STVXL_V4SF
);
18413 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid
,
18414 ALTIVEC_BUILTIN_STVXL_V4SI
);
18415 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid
,
18416 ALTIVEC_BUILTIN_STVXL_V8HI
);
18417 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid
,
18418 ALTIVEC_BUILTIN_STVXL_V16QI
);
18419 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_STVEBX
);
18420 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid
, ALTIVEC_BUILTIN_STVEHX
);
18421 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LD
);
18422 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LDE
);
18423 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LDL
);
18424 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVSL
);
18425 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVSR
);
18426 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVEBX
);
18427 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVEHX
);
18428 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVEWX
);
18429 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid
, ALTIVEC_BUILTIN_VEC_ST
);
18430 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid
, ALTIVEC_BUILTIN_VEC_STE
);
18431 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid
, ALTIVEC_BUILTIN_VEC_STL
);
18432 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid
, ALTIVEC_BUILTIN_VEC_STVEWX
);
18433 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid
, ALTIVEC_BUILTIN_VEC_STVEBX
);
18434 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid
, ALTIVEC_BUILTIN_VEC_STVEHX
);
18436 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid
,
18437 VSX_BUILTIN_LXVD2X_V2DF
);
18438 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid
,
18439 VSX_BUILTIN_LXVD2X_V2DI
);
18440 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid
,
18441 VSX_BUILTIN_LXVW4X_V4SF
);
18442 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid
,
18443 VSX_BUILTIN_LXVW4X_V4SI
);
18444 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid
,
18445 VSX_BUILTIN_LXVW4X_V8HI
);
18446 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid
,
18447 VSX_BUILTIN_LXVW4X_V16QI
);
18448 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid
,
18449 VSX_BUILTIN_STXVD2X_V2DF
);
18450 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid
,
18451 VSX_BUILTIN_STXVD2X_V2DI
);
18452 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid
,
18453 VSX_BUILTIN_STXVW4X_V4SF
);
18454 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid
,
18455 VSX_BUILTIN_STXVW4X_V4SI
);
18456 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid
,
18457 VSX_BUILTIN_STXVW4X_V8HI
);
18458 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid
,
18459 VSX_BUILTIN_STXVW4X_V16QI
);
18461 def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid
,
18462 VSX_BUILTIN_LD_ELEMREV_V2DF
);
18463 def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid
,
18464 VSX_BUILTIN_LD_ELEMREV_V2DI
);
18465 def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid
,
18466 VSX_BUILTIN_LD_ELEMREV_V4SF
);
18467 def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid
,
18468 VSX_BUILTIN_LD_ELEMREV_V4SI
);
18469 def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid
,
18470 VSX_BUILTIN_ST_ELEMREV_V2DF
);
18471 def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid
,
18472 VSX_BUILTIN_ST_ELEMREV_V2DI
);
18473 def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid
,
18474 VSX_BUILTIN_ST_ELEMREV_V4SF
);
18475 def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid
,
18476 VSX_BUILTIN_ST_ELEMREV_V4SI
);
18478 if (TARGET_P9_VECTOR
)
18480 def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid
,
18481 VSX_BUILTIN_LD_ELEMREV_V8HI
);
18482 def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid
,
18483 VSX_BUILTIN_LD_ELEMREV_V16QI
);
18484 def_builtin ("__builtin_vsx_st_elemrev_v8hi",
18485 void_ftype_v8hi_long_pvoid
, VSX_BUILTIN_ST_ELEMREV_V8HI
);
18486 def_builtin ("__builtin_vsx_st_elemrev_v16qi",
18487 void_ftype_v16qi_long_pvoid
, VSX_BUILTIN_ST_ELEMREV_V16QI
);
18491 rs6000_builtin_decls
[(int) VSX_BUILTIN_LD_ELEMREV_V8HI
]
18492 = rs6000_builtin_decls
[(int) VSX_BUILTIN_LXVW4X_V8HI
];
18493 rs6000_builtin_decls
[(int) VSX_BUILTIN_LD_ELEMREV_V16QI
]
18494 = rs6000_builtin_decls
[(int) VSX_BUILTIN_LXVW4X_V16QI
];
18495 rs6000_builtin_decls
[(int) VSX_BUILTIN_ST_ELEMREV_V8HI
]
18496 = rs6000_builtin_decls
[(int) VSX_BUILTIN_STXVW4X_V8HI
];
18497 rs6000_builtin_decls
[(int) VSX_BUILTIN_ST_ELEMREV_V16QI
]
18498 = rs6000_builtin_decls
[(int) VSX_BUILTIN_STXVW4X_V16QI
];
18501 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid
,
18502 VSX_BUILTIN_VEC_LD
);
18503 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid
,
18504 VSX_BUILTIN_VEC_ST
);
18505 def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid
,
18506 VSX_BUILTIN_VEC_XL
);
18507 def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid
,
18508 VSX_BUILTIN_VEC_XST
);
18510 def_builtin ("__builtin_vec_step", int_ftype_opaque
, ALTIVEC_BUILTIN_VEC_STEP
);
18511 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque
, ALTIVEC_BUILTIN_VEC_SPLATS
);
18512 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque
, ALTIVEC_BUILTIN_VEC_PROMOTE
);
18514 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int
, ALTIVEC_BUILTIN_VEC_SLD
);
18515 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_SPLAT
);
18516 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_EXTRACT
);
18517 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int
, ALTIVEC_BUILTIN_VEC_INSERT
);
18518 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_VSPLTW
);
18519 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_VSPLTH
);
18520 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_VSPLTB
);
18521 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_CTF
);
18522 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_VCFSX
);
18523 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_VCFUX
);
18524 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_CTS
);
18525 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_CTU
);
18527 def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque
,
18528 ALTIVEC_BUILTIN_VEC_ADDE
);
18529 def_builtin ("__builtin_vec_addec", opaque_ftype_opaque_opaque_opaque
,
18530 ALTIVEC_BUILTIN_VEC_ADDEC
);
18531 def_builtin ("__builtin_vec_cmpne", opaque_ftype_opaque_opaque
,
18532 ALTIVEC_BUILTIN_VEC_CMPNE
);
18533 def_builtin ("__builtin_vec_mul", opaque_ftype_opaque_opaque
,
18534 ALTIVEC_BUILTIN_VEC_MUL
);
18536 /* Cell builtins. */
18537 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVLX
);
18538 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVLXL
);
18539 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVRX
);
18540 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVRXL
);
18542 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVLX
);
18543 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVLXL
);
18544 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVRX
);
18545 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVRXL
);
18547 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_STVLX
);
18548 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_STVLXL
);
18549 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_STVRX
);
18550 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_STVRXL
);
18552 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_VEC_STVLX
);
18553 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_VEC_STVLXL
);
18554 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_VEC_STVRX
);
18555 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_VEC_STVRXL
);
18557 if (TARGET_P9_VECTOR
)
18558 def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long
,
18559 P9V_BUILTIN_STXVL
);
18561 /* Add the DST variants. */
18563 for (i
= 0; i
< ARRAY_SIZE (bdesc_dst
); i
++, d
++)
18565 HOST_WIDE_INT mask
= d
->mask
;
18567 /* It is expected that these dst built-in functions may have
18568 d->icode equal to CODE_FOR_nothing. */
18569 if ((mask
& builtin_mask
) != mask
)
18571 if (TARGET_DEBUG_BUILTIN
)
18572 fprintf (stderr
, "altivec_init_builtins, skip dst %s\n",
18576 def_builtin (d
->name
, void_ftype_pcvoid_int_int
, d
->code
);
18579 /* Initialize the predicates. */
18580 d
= bdesc_altivec_preds
;
18581 for (i
= 0; i
< ARRAY_SIZE (bdesc_altivec_preds
); i
++, d
++)
18583 machine_mode mode1
;
18585 HOST_WIDE_INT mask
= d
->mask
;
18587 if ((mask
& builtin_mask
) != mask
)
18589 if (TARGET_DEBUG_BUILTIN
)
18590 fprintf (stderr
, "altivec_init_builtins, skip predicate %s\n",
18595 if (rs6000_overloaded_builtin_p (d
->code
))
18599 /* Cannot define builtin if the instruction is disabled. */
18600 gcc_assert (d
->icode
!= CODE_FOR_nothing
);
18601 mode1
= insn_data
[d
->icode
].operand
[1].mode
;
18607 type
= int_ftype_int_opaque_opaque
;
18610 type
= int_ftype_int_v2di_v2di
;
18613 type
= int_ftype_int_v4si_v4si
;
18616 type
= int_ftype_int_v8hi_v8hi
;
18619 type
= int_ftype_int_v16qi_v16qi
;
18622 type
= int_ftype_int_v4sf_v4sf
;
18625 type
= int_ftype_int_v2df_v2df
;
18628 gcc_unreachable ();
18631 def_builtin (d
->name
, type
, d
->code
);
18634 /* Initialize the abs* operators. */
18636 for (i
= 0; i
< ARRAY_SIZE (bdesc_abs
); i
++, d
++)
18638 machine_mode mode0
;
18640 HOST_WIDE_INT mask
= d
->mask
;
18642 if ((mask
& builtin_mask
) != mask
)
18644 if (TARGET_DEBUG_BUILTIN
)
18645 fprintf (stderr
, "altivec_init_builtins, skip abs %s\n",
18650 /* Cannot define builtin if the instruction is disabled. */
18651 gcc_assert (d
->icode
!= CODE_FOR_nothing
);
18652 mode0
= insn_data
[d
->icode
].operand
[0].mode
;
18657 type
= v2di_ftype_v2di
;
18660 type
= v4si_ftype_v4si
;
18663 type
= v8hi_ftype_v8hi
;
18666 type
= v16qi_ftype_v16qi
;
18669 type
= v4sf_ftype_v4sf
;
18672 type
= v2df_ftype_v2df
;
18675 gcc_unreachable ();
18678 def_builtin (d
->name
, type
, d
->code
);
18681 /* Initialize target builtin that implements
18682 targetm.vectorize.builtin_mask_for_load. */
18684 decl
= add_builtin_function ("__builtin_altivec_mask_for_load",
18685 v16qi_ftype_long_pcvoid
,
18686 ALTIVEC_BUILTIN_MASK_FOR_LOAD
,
18687 BUILT_IN_MD
, NULL
, NULL_TREE
);
18688 TREE_READONLY (decl
) = 1;
18689 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
18690 altivec_builtin_mask_for_load
= decl
;
18692 /* Access to the vec_init patterns. */
18693 ftype
= build_function_type_list (V4SI_type_node
, integer_type_node
,
18694 integer_type_node
, integer_type_node
,
18695 integer_type_node
, NULL_TREE
);
18696 def_builtin ("__builtin_vec_init_v4si", ftype
, ALTIVEC_BUILTIN_VEC_INIT_V4SI
);
18698 ftype
= build_function_type_list (V8HI_type_node
, short_integer_type_node
,
18699 short_integer_type_node
,
18700 short_integer_type_node
,
18701 short_integer_type_node
,
18702 short_integer_type_node
,
18703 short_integer_type_node
,
18704 short_integer_type_node
,
18705 short_integer_type_node
, NULL_TREE
);
18706 def_builtin ("__builtin_vec_init_v8hi", ftype
, ALTIVEC_BUILTIN_VEC_INIT_V8HI
);
18708 ftype
= build_function_type_list (V16QI_type_node
, char_type_node
,
18709 char_type_node
, char_type_node
,
18710 char_type_node
, char_type_node
,
18711 char_type_node
, char_type_node
,
18712 char_type_node
, char_type_node
,
18713 char_type_node
, char_type_node
,
18714 char_type_node
, char_type_node
,
18715 char_type_node
, char_type_node
,
18716 char_type_node
, NULL_TREE
);
18717 def_builtin ("__builtin_vec_init_v16qi", ftype
,
18718 ALTIVEC_BUILTIN_VEC_INIT_V16QI
);
18720 ftype
= build_function_type_list (V4SF_type_node
, float_type_node
,
18721 float_type_node
, float_type_node
,
18722 float_type_node
, NULL_TREE
);
18723 def_builtin ("__builtin_vec_init_v4sf", ftype
, ALTIVEC_BUILTIN_VEC_INIT_V4SF
);
18725 /* VSX builtins. */
18726 ftype
= build_function_type_list (V2DF_type_node
, double_type_node
,
18727 double_type_node
, NULL_TREE
);
18728 def_builtin ("__builtin_vec_init_v2df", ftype
, VSX_BUILTIN_VEC_INIT_V2DF
);
18730 ftype
= build_function_type_list (V2DI_type_node
, intDI_type_node
,
18731 intDI_type_node
, NULL_TREE
);
18732 def_builtin ("__builtin_vec_init_v2di", ftype
, VSX_BUILTIN_VEC_INIT_V2DI
);
18734 /* Access to the vec_set patterns. */
18735 ftype
= build_function_type_list (V4SI_type_node
, V4SI_type_node
,
18737 integer_type_node
, NULL_TREE
);
18738 def_builtin ("__builtin_vec_set_v4si", ftype
, ALTIVEC_BUILTIN_VEC_SET_V4SI
);
18740 ftype
= build_function_type_list (V8HI_type_node
, V8HI_type_node
,
18742 integer_type_node
, NULL_TREE
);
18743 def_builtin ("__builtin_vec_set_v8hi", ftype
, ALTIVEC_BUILTIN_VEC_SET_V8HI
);
18745 ftype
= build_function_type_list (V16QI_type_node
, V16QI_type_node
,
18747 integer_type_node
, NULL_TREE
);
18748 def_builtin ("__builtin_vec_set_v16qi", ftype
, ALTIVEC_BUILTIN_VEC_SET_V16QI
);
18750 ftype
= build_function_type_list (V4SF_type_node
, V4SF_type_node
,
18752 integer_type_node
, NULL_TREE
);
18753 def_builtin ("__builtin_vec_set_v4sf", ftype
, ALTIVEC_BUILTIN_VEC_SET_V4SF
);
18755 ftype
= build_function_type_list (V2DF_type_node
, V2DF_type_node
,
18757 integer_type_node
, NULL_TREE
);
18758 def_builtin ("__builtin_vec_set_v2df", ftype
, VSX_BUILTIN_VEC_SET_V2DF
);
18760 ftype
= build_function_type_list (V2DI_type_node
, V2DI_type_node
,
18762 integer_type_node
, NULL_TREE
);
18763 def_builtin ("__builtin_vec_set_v2di", ftype
, VSX_BUILTIN_VEC_SET_V2DI
);
18765 /* Access to the vec_extract patterns. */
18766 ftype
= build_function_type_list (intSI_type_node
, V4SI_type_node
,
18767 integer_type_node
, NULL_TREE
);
18768 def_builtin ("__builtin_vec_ext_v4si", ftype
, ALTIVEC_BUILTIN_VEC_EXT_V4SI
);
18770 ftype
= build_function_type_list (intHI_type_node
, V8HI_type_node
,
18771 integer_type_node
, NULL_TREE
);
18772 def_builtin ("__builtin_vec_ext_v8hi", ftype
, ALTIVEC_BUILTIN_VEC_EXT_V8HI
);
18774 ftype
= build_function_type_list (intQI_type_node
, V16QI_type_node
,
18775 integer_type_node
, NULL_TREE
);
18776 def_builtin ("__builtin_vec_ext_v16qi", ftype
, ALTIVEC_BUILTIN_VEC_EXT_V16QI
);
18778 ftype
= build_function_type_list (float_type_node
, V4SF_type_node
,
18779 integer_type_node
, NULL_TREE
);
18780 def_builtin ("__builtin_vec_ext_v4sf", ftype
, ALTIVEC_BUILTIN_VEC_EXT_V4SF
);
18782 ftype
= build_function_type_list (double_type_node
, V2DF_type_node
,
18783 integer_type_node
, NULL_TREE
);
18784 def_builtin ("__builtin_vec_ext_v2df", ftype
, VSX_BUILTIN_VEC_EXT_V2DF
);
18786 ftype
= build_function_type_list (intDI_type_node
, V2DI_type_node
,
18787 integer_type_node
, NULL_TREE
);
18788 def_builtin ("__builtin_vec_ext_v2di", ftype
, VSX_BUILTIN_VEC_EXT_V2DI
);
18791 if (V1TI_type_node
)
18793 tree v1ti_ftype_long_pcvoid
18794 = build_function_type_list (V1TI_type_node
,
18795 long_integer_type_node
, pcvoid_type_node
,
18797 tree void_ftype_v1ti_long_pvoid
18798 = build_function_type_list (void_type_node
,
18799 V1TI_type_node
, long_integer_type_node
,
18800 pvoid_type_node
, NULL_TREE
);
18801 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid
,
18802 VSX_BUILTIN_LXVD2X_V1TI
);
18803 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid
,
18804 VSX_BUILTIN_STXVD2X_V1TI
);
18805 ftype
= build_function_type_list (V1TI_type_node
, intTI_type_node
,
18806 NULL_TREE
, NULL_TREE
);
18807 def_builtin ("__builtin_vec_init_v1ti", ftype
, VSX_BUILTIN_VEC_INIT_V1TI
);
18808 ftype
= build_function_type_list (V1TI_type_node
, V1TI_type_node
,
18810 integer_type_node
, NULL_TREE
);
18811 def_builtin ("__builtin_vec_set_v1ti", ftype
, VSX_BUILTIN_VEC_SET_V1TI
);
18812 ftype
= build_function_type_list (intTI_type_node
, V1TI_type_node
,
18813 integer_type_node
, NULL_TREE
);
18814 def_builtin ("__builtin_vec_ext_v1ti", ftype
, VSX_BUILTIN_VEC_EXT_V1TI
);
18820 htm_init_builtins (void)
18822 HOST_WIDE_INT builtin_mask
= rs6000_builtin_mask
;
18823 const struct builtin_description
*d
;
18827 for (i
= 0; i
< ARRAY_SIZE (bdesc_htm
); i
++, d
++)
18829 tree op
[MAX_HTM_OPERANDS
], type
;
18830 HOST_WIDE_INT mask
= d
->mask
;
18831 unsigned attr
= rs6000_builtin_info
[d
->code
].attr
;
18832 bool void_func
= (attr
& RS6000_BTC_VOID
);
18833 int attr_args
= (attr
& RS6000_BTC_TYPE_MASK
);
18835 tree gpr_type_node
;
18839 /* It is expected that these htm built-in functions may have
18840 d->icode equal to CODE_FOR_nothing. */
18842 if (TARGET_32BIT
&& TARGET_POWERPC64
)
18843 gpr_type_node
= long_long_unsigned_type_node
;
18845 gpr_type_node
= long_unsigned_type_node
;
18847 if (attr
& RS6000_BTC_SPR
)
18849 rettype
= gpr_type_node
;
18850 argtype
= gpr_type_node
;
18852 else if (d
->code
== HTM_BUILTIN_TABORTDC
18853 || d
->code
== HTM_BUILTIN_TABORTDCI
)
18855 rettype
= unsigned_type_node
;
18856 argtype
= gpr_type_node
;
18860 rettype
= unsigned_type_node
;
18861 argtype
= unsigned_type_node
;
18864 if ((mask
& builtin_mask
) != mask
)
18866 if (TARGET_DEBUG_BUILTIN
)
18867 fprintf (stderr
, "htm_builtin, skip binary %s\n", d
->name
);
18873 if (TARGET_DEBUG_BUILTIN
)
18874 fprintf (stderr
, "htm_builtin, bdesc_htm[%ld] no name\n",
18875 (long unsigned) i
);
18879 op
[nopnds
++] = (void_func
) ? void_type_node
: rettype
;
18881 if (attr_args
== RS6000_BTC_UNARY
)
18882 op
[nopnds
++] = argtype
;
18883 else if (attr_args
== RS6000_BTC_BINARY
)
18885 op
[nopnds
++] = argtype
;
18886 op
[nopnds
++] = argtype
;
18888 else if (attr_args
== RS6000_BTC_TERNARY
)
18890 op
[nopnds
++] = argtype
;
18891 op
[nopnds
++] = argtype
;
18892 op
[nopnds
++] = argtype
;
18898 type
= build_function_type_list (op
[0], NULL_TREE
);
18901 type
= build_function_type_list (op
[0], op
[1], NULL_TREE
);
18904 type
= build_function_type_list (op
[0], op
[1], op
[2], NULL_TREE
);
18907 type
= build_function_type_list (op
[0], op
[1], op
[2], op
[3],
18911 gcc_unreachable ();
18914 def_builtin (d
->name
, type
, d
->code
);
18918 /* Hash function for builtin functions with up to 3 arguments and a return
18921 builtin_hasher::hash (builtin_hash_struct
*bh
)
18926 for (i
= 0; i
< 4; i
++)
18928 ret
= (ret
* (unsigned)MAX_MACHINE_MODE
) + ((unsigned)bh
->mode
[i
]);
18929 ret
= (ret
* 2) + bh
->uns_p
[i
];
18935 /* Compare builtin hash entries H1 and H2 for equivalence. */
18937 builtin_hasher::equal (builtin_hash_struct
*p1
, builtin_hash_struct
*p2
)
18939 return ((p1
->mode
[0] == p2
->mode
[0])
18940 && (p1
->mode
[1] == p2
->mode
[1])
18941 && (p1
->mode
[2] == p2
->mode
[2])
18942 && (p1
->mode
[3] == p2
->mode
[3])
18943 && (p1
->uns_p
[0] == p2
->uns_p
[0])
18944 && (p1
->uns_p
[1] == p2
->uns_p
[1])
18945 && (p1
->uns_p
[2] == p2
->uns_p
[2])
18946 && (p1
->uns_p
[3] == p2
->uns_p
[3]));
18949 /* Map types for builtin functions with an explicit return type and up to 3
18950 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
18951 of the argument. */
18953 builtin_function_type (machine_mode mode_ret
, machine_mode mode_arg0
,
18954 machine_mode mode_arg1
, machine_mode mode_arg2
,
18955 enum rs6000_builtins builtin
, const char *name
)
18957 struct builtin_hash_struct h
;
18958 struct builtin_hash_struct
*h2
;
18961 tree ret_type
= NULL_TREE
;
18962 tree arg_type
[3] = { NULL_TREE
, NULL_TREE
, NULL_TREE
};
18964 /* Create builtin_hash_table. */
18965 if (builtin_hash_table
== NULL
)
18966 builtin_hash_table
= hash_table
<builtin_hasher
>::create_ggc (1500);
18968 h
.type
= NULL_TREE
;
18969 h
.mode
[0] = mode_ret
;
18970 h
.mode
[1] = mode_arg0
;
18971 h
.mode
[2] = mode_arg1
;
18972 h
.mode
[3] = mode_arg2
;
18978 /* If the builtin is a type that produces unsigned results or takes unsigned
18979 arguments, and it is returned as a decl for the vectorizer (such as
18980 widening multiplies, permute), make sure the arguments and return value
18981 are type correct. */
18984 /* unsigned 1 argument functions. */
18985 case CRYPTO_BUILTIN_VSBOX
:
18986 case P8V_BUILTIN_VGBBD
:
18987 case MISC_BUILTIN_CDTBCD
:
18988 case MISC_BUILTIN_CBCDTD
:
18993 /* unsigned 2 argument functions. */
18994 case ALTIVEC_BUILTIN_VMULEUB
:
18995 case ALTIVEC_BUILTIN_VMULEUH
:
18996 case ALTIVEC_BUILTIN_VMULOUB
:
18997 case ALTIVEC_BUILTIN_VMULOUH
:
18998 case CRYPTO_BUILTIN_VCIPHER
:
18999 case CRYPTO_BUILTIN_VCIPHERLAST
:
19000 case CRYPTO_BUILTIN_VNCIPHER
:
19001 case CRYPTO_BUILTIN_VNCIPHERLAST
:
19002 case CRYPTO_BUILTIN_VPMSUMB
:
19003 case CRYPTO_BUILTIN_VPMSUMH
:
19004 case CRYPTO_BUILTIN_VPMSUMW
:
19005 case CRYPTO_BUILTIN_VPMSUMD
:
19006 case CRYPTO_BUILTIN_VPMSUM
:
19007 case MISC_BUILTIN_ADDG6S
:
19008 case MISC_BUILTIN_DIVWEU
:
19009 case MISC_BUILTIN_DIVWEUO
:
19010 case MISC_BUILTIN_DIVDEU
:
19011 case MISC_BUILTIN_DIVDEUO
:
19012 case VSX_BUILTIN_UDIV_V2DI
:
19018 /* unsigned 3 argument functions. */
19019 case ALTIVEC_BUILTIN_VPERM_16QI_UNS
:
19020 case ALTIVEC_BUILTIN_VPERM_8HI_UNS
:
19021 case ALTIVEC_BUILTIN_VPERM_4SI_UNS
:
19022 case ALTIVEC_BUILTIN_VPERM_2DI_UNS
:
19023 case ALTIVEC_BUILTIN_VSEL_16QI_UNS
:
19024 case ALTIVEC_BUILTIN_VSEL_8HI_UNS
:
19025 case ALTIVEC_BUILTIN_VSEL_4SI_UNS
:
19026 case ALTIVEC_BUILTIN_VSEL_2DI_UNS
:
19027 case VSX_BUILTIN_VPERM_16QI_UNS
:
19028 case VSX_BUILTIN_VPERM_8HI_UNS
:
19029 case VSX_BUILTIN_VPERM_4SI_UNS
:
19030 case VSX_BUILTIN_VPERM_2DI_UNS
:
19031 case VSX_BUILTIN_XXSEL_16QI_UNS
:
19032 case VSX_BUILTIN_XXSEL_8HI_UNS
:
19033 case VSX_BUILTIN_XXSEL_4SI_UNS
:
19034 case VSX_BUILTIN_XXSEL_2DI_UNS
:
19035 case CRYPTO_BUILTIN_VPERMXOR
:
19036 case CRYPTO_BUILTIN_VPERMXOR_V2DI
:
19037 case CRYPTO_BUILTIN_VPERMXOR_V4SI
:
19038 case CRYPTO_BUILTIN_VPERMXOR_V8HI
:
19039 case CRYPTO_BUILTIN_VPERMXOR_V16QI
:
19040 case CRYPTO_BUILTIN_VSHASIGMAW
:
19041 case CRYPTO_BUILTIN_VSHASIGMAD
:
19042 case CRYPTO_BUILTIN_VSHASIGMA
:
19049 /* signed permute functions with unsigned char mask. */
19050 case ALTIVEC_BUILTIN_VPERM_16QI
:
19051 case ALTIVEC_BUILTIN_VPERM_8HI
:
19052 case ALTIVEC_BUILTIN_VPERM_4SI
:
19053 case ALTIVEC_BUILTIN_VPERM_4SF
:
19054 case ALTIVEC_BUILTIN_VPERM_2DI
:
19055 case ALTIVEC_BUILTIN_VPERM_2DF
:
19056 case VSX_BUILTIN_VPERM_16QI
:
19057 case VSX_BUILTIN_VPERM_8HI
:
19058 case VSX_BUILTIN_VPERM_4SI
:
19059 case VSX_BUILTIN_VPERM_4SF
:
19060 case VSX_BUILTIN_VPERM_2DI
:
19061 case VSX_BUILTIN_VPERM_2DF
:
19065 /* unsigned args, signed return. */
19066 case VSX_BUILTIN_XVCVUXDSP
:
19067 case VSX_BUILTIN_XVCVUXDDP_UNS
:
19068 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF
:
19072 /* signed args, unsigned return. */
19073 case VSX_BUILTIN_XVCVDPUXDS_UNS
:
19074 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI
:
19075 case MISC_BUILTIN_UNPACK_TD
:
19076 case MISC_BUILTIN_UNPACK_V1TI
:
19080 /* unsigned arguments for 128-bit pack instructions. */
19081 case MISC_BUILTIN_PACK_TD
:
19082 case MISC_BUILTIN_PACK_V1TI
:
19091 /* Figure out how many args are present. */
19092 while (num_args
> 0 && h
.mode
[num_args
] == VOIDmode
)
19095 ret_type
= builtin_mode_to_type
[h
.mode
[0]][h
.uns_p
[0]];
19096 if (!ret_type
&& h
.uns_p
[0])
19097 ret_type
= builtin_mode_to_type
[h
.mode
[0]][0];
19100 fatal_error (input_location
,
19101 "internal error: builtin function %s had an unexpected "
19102 "return type %s", name
, GET_MODE_NAME (h
.mode
[0]));
19104 for (i
= 0; i
< (int) ARRAY_SIZE (arg_type
); i
++)
19105 arg_type
[i
] = NULL_TREE
;
19107 for (i
= 0; i
< num_args
; i
++)
19109 int m
= (int) h
.mode
[i
+1];
19110 int uns_p
= h
.uns_p
[i
+1];
19112 arg_type
[i
] = builtin_mode_to_type
[m
][uns_p
];
19113 if (!arg_type
[i
] && uns_p
)
19114 arg_type
[i
] = builtin_mode_to_type
[m
][0];
19117 fatal_error (input_location
,
19118 "internal error: builtin function %s, argument %d "
19119 "had unexpected argument type %s", name
, i
,
19120 GET_MODE_NAME (m
));
19123 builtin_hash_struct
**found
= builtin_hash_table
->find_slot (&h
, INSERT
);
19124 if (*found
== NULL
)
19126 h2
= ggc_alloc
<builtin_hash_struct
> ();
19130 h2
->type
= build_function_type_list (ret_type
, arg_type
[0], arg_type
[1],
19131 arg_type
[2], NULL_TREE
);
19134 return (*found
)->type
;
19138 rs6000_common_init_builtins (void)
19140 const struct builtin_description
*d
;
19143 tree opaque_ftype_opaque
= NULL_TREE
;
19144 tree opaque_ftype_opaque_opaque
= NULL_TREE
;
19145 tree opaque_ftype_opaque_opaque_opaque
= NULL_TREE
;
19146 tree v2si_ftype
= NULL_TREE
;
19147 tree v2si_ftype_qi
= NULL_TREE
;
19148 tree v2si_ftype_v2si_qi
= NULL_TREE
;
19149 tree v2si_ftype_int_qi
= NULL_TREE
;
19150 HOST_WIDE_INT builtin_mask
= rs6000_builtin_mask
;
19152 if (!TARGET_PAIRED_FLOAT
)
19154 builtin_mode_to_type
[V2SImode
][0] = opaque_V2SI_type_node
;
19155 builtin_mode_to_type
[V2SFmode
][0] = opaque_V2SF_type_node
;
19158 /* Paired and SPE builtins are only available if you build a compiler with
19159 the appropriate options, so only create those builtins with the
19160 appropriate compiler option. Create Altivec and VSX builtins on machines
19161 with at least the general purpose extensions (970 and newer) to allow the
19162 use of the target attribute.. */
19164 if (TARGET_EXTRA_BUILTINS
)
19165 builtin_mask
|= RS6000_BTM_COMMON
;
19167 /* Add the ternary operators. */
19169 for (i
= 0; i
< ARRAY_SIZE (bdesc_3arg
); i
++, d
++)
19172 HOST_WIDE_INT mask
= d
->mask
;
19174 if ((mask
& builtin_mask
) != mask
)
19176 if (TARGET_DEBUG_BUILTIN
)
19177 fprintf (stderr
, "rs6000_builtin, skip ternary %s\n", d
->name
);
19181 if (rs6000_overloaded_builtin_p (d
->code
))
19183 if (! (type
= opaque_ftype_opaque_opaque_opaque
))
19184 type
= opaque_ftype_opaque_opaque_opaque
19185 = build_function_type_list (opaque_V4SI_type_node
,
19186 opaque_V4SI_type_node
,
19187 opaque_V4SI_type_node
,
19188 opaque_V4SI_type_node
,
19193 enum insn_code icode
= d
->icode
;
19196 if (TARGET_DEBUG_BUILTIN
)
19197 fprintf (stderr
, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
19203 if (icode
== CODE_FOR_nothing
)
19205 if (TARGET_DEBUG_BUILTIN
)
19206 fprintf (stderr
, "rs6000_builtin, skip ternary %s (no code)\n",
19212 type
= builtin_function_type (insn_data
[icode
].operand
[0].mode
,
19213 insn_data
[icode
].operand
[1].mode
,
19214 insn_data
[icode
].operand
[2].mode
,
19215 insn_data
[icode
].operand
[3].mode
,
19219 def_builtin (d
->name
, type
, d
->code
);
19222 /* Add the binary operators. */
19224 for (i
= 0; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
19226 machine_mode mode0
, mode1
, mode2
;
19228 HOST_WIDE_INT mask
= d
->mask
;
19230 if ((mask
& builtin_mask
) != mask
)
19232 if (TARGET_DEBUG_BUILTIN
)
19233 fprintf (stderr
, "rs6000_builtin, skip binary %s\n", d
->name
);
19237 if (rs6000_overloaded_builtin_p (d
->code
))
19239 if (! (type
= opaque_ftype_opaque_opaque
))
19240 type
= opaque_ftype_opaque_opaque
19241 = build_function_type_list (opaque_V4SI_type_node
,
19242 opaque_V4SI_type_node
,
19243 opaque_V4SI_type_node
,
19248 enum insn_code icode
= d
->icode
;
19251 if (TARGET_DEBUG_BUILTIN
)
19252 fprintf (stderr
, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
19258 if (icode
== CODE_FOR_nothing
)
19260 if (TARGET_DEBUG_BUILTIN
)
19261 fprintf (stderr
, "rs6000_builtin, skip binary %s (no code)\n",
19267 mode0
= insn_data
[icode
].operand
[0].mode
;
19268 mode1
= insn_data
[icode
].operand
[1].mode
;
19269 mode2
= insn_data
[icode
].operand
[2].mode
;
19271 if (mode0
== V2SImode
&& mode1
== V2SImode
&& mode2
== QImode
)
19273 if (! (type
= v2si_ftype_v2si_qi
))
19274 type
= v2si_ftype_v2si_qi
19275 = build_function_type_list (opaque_V2SI_type_node
,
19276 opaque_V2SI_type_node
,
19281 else if (mode0
== V2SImode
&& GET_MODE_CLASS (mode1
) == MODE_INT
19282 && mode2
== QImode
)
19284 if (! (type
= v2si_ftype_int_qi
))
19285 type
= v2si_ftype_int_qi
19286 = build_function_type_list (opaque_V2SI_type_node
,
19293 type
= builtin_function_type (mode0
, mode1
, mode2
, VOIDmode
,
19297 def_builtin (d
->name
, type
, d
->code
);
19300 /* Add the simple unary operators. */
19302 for (i
= 0; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
19304 machine_mode mode0
, mode1
;
19306 HOST_WIDE_INT mask
= d
->mask
;
19308 if ((mask
& builtin_mask
) != mask
)
19310 if (TARGET_DEBUG_BUILTIN
)
19311 fprintf (stderr
, "rs6000_builtin, skip unary %s\n", d
->name
);
19315 if (rs6000_overloaded_builtin_p (d
->code
))
19317 if (! (type
= opaque_ftype_opaque
))
19318 type
= opaque_ftype_opaque
19319 = build_function_type_list (opaque_V4SI_type_node
,
19320 opaque_V4SI_type_node
,
19325 enum insn_code icode
= d
->icode
;
19328 if (TARGET_DEBUG_BUILTIN
)
19329 fprintf (stderr
, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
19335 if (icode
== CODE_FOR_nothing
)
19337 if (TARGET_DEBUG_BUILTIN
)
19338 fprintf (stderr
, "rs6000_builtin, skip unary %s (no code)\n",
19344 mode0
= insn_data
[icode
].operand
[0].mode
;
19345 mode1
= insn_data
[icode
].operand
[1].mode
;
19347 if (mode0
== V2SImode
&& mode1
== QImode
)
19349 if (! (type
= v2si_ftype_qi
))
19350 type
= v2si_ftype_qi
19351 = build_function_type_list (opaque_V2SI_type_node
,
19357 type
= builtin_function_type (mode0
, mode1
, VOIDmode
, VOIDmode
,
19361 def_builtin (d
->name
, type
, d
->code
);
19364 /* Add the simple no-argument operators. */
19366 for (i
= 0; i
< ARRAY_SIZE (bdesc_0arg
); i
++, d
++)
19368 machine_mode mode0
;
19370 HOST_WIDE_INT mask
= d
->mask
;
19372 if ((mask
& builtin_mask
) != mask
)
19374 if (TARGET_DEBUG_BUILTIN
)
19375 fprintf (stderr
, "rs6000_builtin, skip no-argument %s\n", d
->name
);
19378 if (rs6000_overloaded_builtin_p (d
->code
))
19380 if (!opaque_ftype_opaque
)
19381 opaque_ftype_opaque
19382 = build_function_type_list (opaque_V4SI_type_node
, NULL_TREE
);
19383 type
= opaque_ftype_opaque
;
19387 enum insn_code icode
= d
->icode
;
19390 if (TARGET_DEBUG_BUILTIN
)
19391 fprintf (stderr
, "rs6000_builtin, bdesc_0arg[%lu] no name\n",
19392 (long unsigned) i
);
19395 if (icode
== CODE_FOR_nothing
)
19397 if (TARGET_DEBUG_BUILTIN
)
19399 "rs6000_builtin, skip no-argument %s (no code)\n",
19403 mode0
= insn_data
[icode
].operand
[0].mode
;
19404 if (mode0
== V2SImode
)
19407 if (! (type
= v2si_ftype
))
19410 = build_function_type_list (opaque_V2SI_type_node
,
19416 type
= builtin_function_type (mode0
, VOIDmode
, VOIDmode
, VOIDmode
,
19419 def_builtin (d
->name
, type
, d
->code
);
19423 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
19425 init_float128_ibm (machine_mode mode
)
19427 if (!TARGET_XL_COMPAT
)
19429 set_optab_libfunc (add_optab
, mode
, "__gcc_qadd");
19430 set_optab_libfunc (sub_optab
, mode
, "__gcc_qsub");
19431 set_optab_libfunc (smul_optab
, mode
, "__gcc_qmul");
19432 set_optab_libfunc (sdiv_optab
, mode
, "__gcc_qdiv");
19434 if (!(TARGET_HARD_FLOAT
&& (TARGET_FPRS
|| TARGET_E500_DOUBLE
)))
19436 set_optab_libfunc (neg_optab
, mode
, "__gcc_qneg");
19437 set_optab_libfunc (eq_optab
, mode
, "__gcc_qeq");
19438 set_optab_libfunc (ne_optab
, mode
, "__gcc_qne");
19439 set_optab_libfunc (gt_optab
, mode
, "__gcc_qgt");
19440 set_optab_libfunc (ge_optab
, mode
, "__gcc_qge");
19441 set_optab_libfunc (lt_optab
, mode
, "__gcc_qlt");
19442 set_optab_libfunc (le_optab
, mode
, "__gcc_qle");
19444 set_conv_libfunc (sext_optab
, mode
, SFmode
, "__gcc_stoq");
19445 set_conv_libfunc (sext_optab
, mode
, DFmode
, "__gcc_dtoq");
19446 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "__gcc_qtos");
19447 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "__gcc_qtod");
19448 set_conv_libfunc (sfix_optab
, SImode
, mode
, "__gcc_qtoi");
19449 set_conv_libfunc (ufix_optab
, SImode
, mode
, "__gcc_qtou");
19450 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "__gcc_itoq");
19451 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "__gcc_utoq");
19454 if (!(TARGET_HARD_FLOAT
&& TARGET_FPRS
))
19455 set_optab_libfunc (unord_optab
, mode
, "__gcc_qunord");
19459 set_optab_libfunc (add_optab
, mode
, "_xlqadd");
19460 set_optab_libfunc (sub_optab
, mode
, "_xlqsub");
19461 set_optab_libfunc (smul_optab
, mode
, "_xlqmul");
19462 set_optab_libfunc (sdiv_optab
, mode
, "_xlqdiv");
19465 /* Add various conversions for IFmode to use the traditional TFmode
19467 if (mode
== IFmode
)
19469 set_conv_libfunc (sext_optab
, mode
, SDmode
, "__dpd_extendsdtf2");
19470 set_conv_libfunc (sext_optab
, mode
, DDmode
, "__dpd_extendddtf2");
19471 set_conv_libfunc (trunc_optab
, mode
, TDmode
, "__dpd_trunctftd2");
19472 set_conv_libfunc (trunc_optab
, SDmode
, mode
, "__dpd_trunctfsd2");
19473 set_conv_libfunc (trunc_optab
, DDmode
, mode
, "__dpd_trunctfdd2");
19474 set_conv_libfunc (sext_optab
, TDmode
, mode
, "__dpd_extendtdtf2");
19476 if (TARGET_POWERPC64
)
19478 set_conv_libfunc (sfix_optab
, TImode
, mode
, "__fixtfti");
19479 set_conv_libfunc (ufix_optab
, TImode
, mode
, "__fixunstfti");
19480 set_conv_libfunc (sfloat_optab
, mode
, TImode
, "__floattitf");
19481 set_conv_libfunc (ufloat_optab
, mode
, TImode
, "__floatuntitf");
19486 /* Set up IEEE 128-bit floating point routines. Use different names if the
19487 arguments can be passed in a vector register. The historical PowerPC
19488 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
19489 continue to use that if we aren't using vector registers to pass IEEE
19490 128-bit floating point. */
19493 init_float128_ieee (machine_mode mode
)
19495 if (FLOAT128_VECTOR_P (mode
))
19497 set_optab_libfunc (add_optab
, mode
, "__addkf3");
19498 set_optab_libfunc (sub_optab
, mode
, "__subkf3");
19499 set_optab_libfunc (neg_optab
, mode
, "__negkf2");
19500 set_optab_libfunc (smul_optab
, mode
, "__mulkf3");
19501 set_optab_libfunc (sdiv_optab
, mode
, "__divkf3");
19502 set_optab_libfunc (sqrt_optab
, mode
, "__sqrtkf2");
19503 set_optab_libfunc (abs_optab
, mode
, "__abstkf2");
19505 set_optab_libfunc (eq_optab
, mode
, "__eqkf2");
19506 set_optab_libfunc (ne_optab
, mode
, "__nekf2");
19507 set_optab_libfunc (gt_optab
, mode
, "__gtkf2");
19508 set_optab_libfunc (ge_optab
, mode
, "__gekf2");
19509 set_optab_libfunc (lt_optab
, mode
, "__ltkf2");
19510 set_optab_libfunc (le_optab
, mode
, "__lekf2");
19511 set_optab_libfunc (unord_optab
, mode
, "__unordkf2");
19513 set_conv_libfunc (sext_optab
, mode
, SFmode
, "__extendsfkf2");
19514 set_conv_libfunc (sext_optab
, mode
, DFmode
, "__extenddfkf2");
19515 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "__trunckfsf2");
19516 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "__trunckfdf2");
19518 set_conv_libfunc (sext_optab
, mode
, IFmode
, "__extendtfkf2");
19519 if (mode
!= TFmode
&& FLOAT128_IBM_P (TFmode
))
19520 set_conv_libfunc (sext_optab
, mode
, TFmode
, "__extendtfkf2");
19522 set_conv_libfunc (trunc_optab
, IFmode
, mode
, "__trunckftf2");
19523 if (mode
!= TFmode
&& FLOAT128_IBM_P (TFmode
))
19524 set_conv_libfunc (trunc_optab
, TFmode
, mode
, "__trunckftf2");
19526 set_conv_libfunc (sext_optab
, mode
, SDmode
, "__dpd_extendsdkf2");
19527 set_conv_libfunc (sext_optab
, mode
, DDmode
, "__dpd_extendddkf2");
19528 set_conv_libfunc (trunc_optab
, mode
, TDmode
, "__dpd_trunckftd2");
19529 set_conv_libfunc (trunc_optab
, SDmode
, mode
, "__dpd_trunckfsd2");
19530 set_conv_libfunc (trunc_optab
, DDmode
, mode
, "__dpd_trunckfdd2");
19531 set_conv_libfunc (sext_optab
, TDmode
, mode
, "__dpd_extendtdkf2");
19533 set_conv_libfunc (sfix_optab
, SImode
, mode
, "__fixkfsi");
19534 set_conv_libfunc (ufix_optab
, SImode
, mode
, "__fixunskfsi");
19535 set_conv_libfunc (sfix_optab
, DImode
, mode
, "__fixkfdi");
19536 set_conv_libfunc (ufix_optab
, DImode
, mode
, "__fixunskfdi");
19538 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "__floatsikf");
19539 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "__floatunsikf");
19540 set_conv_libfunc (sfloat_optab
, mode
, DImode
, "__floatdikf");
19541 set_conv_libfunc (ufloat_optab
, mode
, DImode
, "__floatundikf");
19543 if (TARGET_POWERPC64
)
19545 set_conv_libfunc (sfix_optab
, TImode
, mode
, "__fixkfti");
19546 set_conv_libfunc (ufix_optab
, TImode
, mode
, "__fixunskfti");
19547 set_conv_libfunc (sfloat_optab
, mode
, TImode
, "__floattikf");
19548 set_conv_libfunc (ufloat_optab
, mode
, TImode
, "__floatuntikf");
19554 set_optab_libfunc (add_optab
, mode
, "_q_add");
19555 set_optab_libfunc (sub_optab
, mode
, "_q_sub");
19556 set_optab_libfunc (neg_optab
, mode
, "_q_neg");
19557 set_optab_libfunc (smul_optab
, mode
, "_q_mul");
19558 set_optab_libfunc (sdiv_optab
, mode
, "_q_div");
19559 if (TARGET_PPC_GPOPT
)
19560 set_optab_libfunc (sqrt_optab
, mode
, "_q_sqrt");
19562 set_optab_libfunc (eq_optab
, mode
, "_q_feq");
19563 set_optab_libfunc (ne_optab
, mode
, "_q_fne");
19564 set_optab_libfunc (gt_optab
, mode
, "_q_fgt");
19565 set_optab_libfunc (ge_optab
, mode
, "_q_fge");
19566 set_optab_libfunc (lt_optab
, mode
, "_q_flt");
19567 set_optab_libfunc (le_optab
, mode
, "_q_fle");
19569 set_conv_libfunc (sext_optab
, mode
, SFmode
, "_q_stoq");
19570 set_conv_libfunc (sext_optab
, mode
, DFmode
, "_q_dtoq");
19571 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "_q_qtos");
19572 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "_q_qtod");
19573 set_conv_libfunc (sfix_optab
, SImode
, mode
, "_q_qtoi");
19574 set_conv_libfunc (ufix_optab
, SImode
, mode
, "_q_qtou");
19575 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "_q_itoq");
19576 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "_q_utoq");
19581 rs6000_init_libfuncs (void)
19583 /* __float128 support. */
19584 if (TARGET_FLOAT128_TYPE
)
19586 init_float128_ibm (IFmode
);
19587 init_float128_ieee (KFmode
);
19590 /* AIX/Darwin/64-bit Linux quad floating point routines. */
19591 if (TARGET_LONG_DOUBLE_128
)
19593 if (!TARGET_IEEEQUAD
)
19594 init_float128_ibm (TFmode
);
19596 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
19598 init_float128_ieee (TFmode
);
19603 /* Expand a block clear operation, and return 1 if successful. Return 0
19604 if we should let the compiler generate normal code.
19606 operands[0] is the destination
19607 operands[1] is the length
19608 operands[3] is the alignment */
19611 expand_block_clear (rtx operands
[])
19613 rtx orig_dest
= operands
[0];
19614 rtx bytes_rtx
= operands
[1];
19615 rtx align_rtx
= operands
[3];
19616 bool constp
= (GET_CODE (bytes_rtx
) == CONST_INT
);
19617 HOST_WIDE_INT align
;
19618 HOST_WIDE_INT bytes
;
19623 /* If this is not a fixed size move, just call memcpy */
19627 /* This must be a fixed size alignment */
19628 gcc_assert (GET_CODE (align_rtx
) == CONST_INT
);
19629 align
= INTVAL (align_rtx
) * BITS_PER_UNIT
;
19631 /* Anything to clear? */
19632 bytes
= INTVAL (bytes_rtx
);
19636 /* Use the builtin memset after a point, to avoid huge code bloat.
19637 When optimize_size, avoid any significant code bloat; calling
19638 memset is about 4 instructions, so allow for one instruction to
19639 load zero and three to do clearing. */
19640 if (TARGET_ALTIVEC
&& align
>= 128)
19642 else if (TARGET_POWERPC64
&& (align
>= 64 || !STRICT_ALIGNMENT
))
19644 else if (TARGET_SPE
&& align
>= 64)
19649 if (optimize_size
&& bytes
> 3 * clear_step
)
19651 if (! optimize_size
&& bytes
> 8 * clear_step
)
19654 for (offset
= 0; bytes
> 0; offset
+= clear_bytes
, bytes
-= clear_bytes
)
19656 machine_mode mode
= BLKmode
;
19659 if (bytes
>= 16 && TARGET_ALTIVEC
&& align
>= 128)
19664 else if (bytes
>= 8 && TARGET_SPE
&& align
>= 64)
19669 else if (bytes
>= 8 && TARGET_POWERPC64
19670 && (align
>= 64 || !STRICT_ALIGNMENT
))
19674 if (offset
== 0 && align
< 64)
19678 /* If the address form is reg+offset with offset not a
19679 multiple of four, reload into reg indirect form here
19680 rather than waiting for reload. This way we get one
19681 reload, not one per store. */
19682 addr
= XEXP (orig_dest
, 0);
19683 if ((GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
)
19684 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
19685 && (INTVAL (XEXP (addr
, 1)) & 3) != 0)
19687 addr
= copy_addr_to_reg (addr
);
19688 orig_dest
= replace_equiv_address (orig_dest
, addr
);
19692 else if (bytes
>= 4 && (align
>= 32 || !STRICT_ALIGNMENT
))
19693 { /* move 4 bytes */
19697 else if (bytes
>= 2 && (align
>= 16 || !STRICT_ALIGNMENT
))
19698 { /* move 2 bytes */
19702 else /* move 1 byte at a time */
19708 dest
= adjust_address (orig_dest
, mode
, offset
);
19710 emit_move_insn (dest
, CONST0_RTX (mode
));
19716 /* Emit a potentially record-form instruction, setting DST from SRC.
19717 If DOT is 0, that is all; otherwise, set CCREG to the result of the
19718 signed comparison of DST with zero. If DOT is 1, the generated RTL
19719 doesn't care about the DST result; if DOT is 2, it does. If CCREG
19720 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
19721 a separate COMPARE. */
19724 rs6000_emit_dot_insn (rtx dst
, rtx src
, int dot
, rtx ccreg
)
19728 emit_move_insn (dst
, src
);
19732 if (cc_reg_not_cr0_operand (ccreg
, CCmode
))
19734 emit_move_insn (dst
, src
);
19735 emit_move_insn (ccreg
, gen_rtx_COMPARE (CCmode
, dst
, const0_rtx
));
19739 rtx ccset
= gen_rtx_SET (ccreg
, gen_rtx_COMPARE (CCmode
, src
, const0_rtx
));
19742 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, dst
);
19743 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, ccset
, clobber
)));
19747 rtx set
= gen_rtx_SET (dst
, src
);
19748 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, ccset
, set
)));
19752 /* Figure out the correct instructions to generate to load data for
19753 block compare. MODE is used for the read from memory, and
19754 data is zero extended if REG is wider than MODE. If LE code
19755 is being generated, bswap loads are used.
19757 REG is the destination register to move the data into.
19758 MEM is the memory block being read.
19759 MODE is the mode of memory to use for the read. */
19761 do_load_for_compare (rtx reg
, rtx mem
, machine_mode mode
)
19763 switch (GET_MODE (reg
))
19769 emit_insn (gen_zero_extendqidi2 (reg
, mem
));
19774 if (!BYTES_BIG_ENDIAN
)
19776 src
= gen_reg_rtx (HImode
);
19777 emit_insn (gen_bswaphi2 (src
, mem
));
19779 emit_insn (gen_zero_extendhidi2 (reg
, src
));
19785 if (!BYTES_BIG_ENDIAN
)
19787 src
= gen_reg_rtx (SImode
);
19788 emit_insn (gen_bswapsi2 (src
, mem
));
19790 emit_insn (gen_zero_extendsidi2 (reg
, src
));
19794 if (!BYTES_BIG_ENDIAN
)
19795 emit_insn (gen_bswapdi2 (reg
, mem
));
19797 emit_insn (gen_movdi (reg
, mem
));
19800 gcc_unreachable ();
19808 emit_insn (gen_zero_extendqisi2 (reg
, mem
));
19813 if (!BYTES_BIG_ENDIAN
)
19815 src
= gen_reg_rtx (HImode
);
19816 emit_insn (gen_bswaphi2 (src
, mem
));
19818 emit_insn (gen_zero_extendhisi2 (reg
, src
));
19822 if (!BYTES_BIG_ENDIAN
)
19823 emit_insn (gen_bswapsi2 (reg
, mem
));
19825 emit_insn (gen_movsi (reg
, mem
));
19828 /* DImode is larger than the destination reg so is not expected. */
19829 gcc_unreachable ();
19832 gcc_unreachable ();
19836 gcc_unreachable ();
19841 /* Select the mode to be used for reading the next chunk of bytes
19844 OFFSET is the current read offset from the beginning of the block.
19845 BYTES is the number of bytes remaining to be read.
19846 ALIGN is the minimum alignment of the memory blocks being compared in bytes.
19847 WORD_MODE_OK indicates using WORD_MODE is allowed, else SImode is
19848 the largest allowable mode. */
19849 static machine_mode
19850 select_block_compare_mode (unsigned HOST_WIDE_INT offset
,
19851 unsigned HOST_WIDE_INT bytes
,
19852 unsigned HOST_WIDE_INT align
, bool word_mode_ok
)
19854 /* First see if we can do a whole load unit
19855 as that will be more efficient than a larger load + shift. */
19857 /* If big, use biggest chunk.
19858 If exactly chunk size, use that size.
19859 If remainder can be done in one piece with shifting, do that.
19860 Do largest chunk possible without violating alignment rules. */
19862 /* The most we can read without potential page crossing. */
19863 unsigned HOST_WIDE_INT maxread
= ROUND_UP (bytes
, align
);
19865 if (word_mode_ok
&& bytes
>= UNITS_PER_WORD
)
19867 else if (bytes
== GET_MODE_SIZE (SImode
))
19869 else if (bytes
== GET_MODE_SIZE (HImode
))
19871 else if (bytes
== GET_MODE_SIZE (QImode
))
19873 else if (bytes
< GET_MODE_SIZE (SImode
)
19874 && offset
>= GET_MODE_SIZE (SImode
) - bytes
)
19875 /* This matches the case were we have SImode and 3 bytes
19876 and offset >= 1 and permits us to move back one and overlap
19877 with the previous read, thus avoiding having to shift
19878 unwanted bytes off of the input. */
19880 else if (word_mode_ok
&& bytes
< UNITS_PER_WORD
19881 && offset
>= UNITS_PER_WORD
-bytes
)
19882 /* Similarly, if we can use DImode it will get matched here and
19883 can do an overlapping read that ends at the end of the block. */
19885 else if (word_mode_ok
&& maxread
>= UNITS_PER_WORD
)
19886 /* It is safe to do all remaining in one load of largest size,
19887 possibly with a shift to get rid of unwanted bytes. */
19889 else if (maxread
>= GET_MODE_SIZE (SImode
))
19890 /* It is safe to do all remaining in one SImode load,
19891 possibly with a shift to get rid of unwanted bytes. */
19893 else if (bytes
> GET_MODE_SIZE (SImode
))
19895 else if (bytes
> GET_MODE_SIZE (HImode
))
19898 /* final fallback is do one byte */
19902 /* Compute the alignment of pointer+OFFSET where the original alignment
19903 of pointer was BASE_ALIGN. */
19904 static unsigned HOST_WIDE_INT
19905 compute_current_alignment (unsigned HOST_WIDE_INT base_align
,
19906 unsigned HOST_WIDE_INT offset
)
19910 return min (base_align
, offset
& -offset
);
19913 /* Expand a block compare operation, and return true if successful.
19914 Return false if we should let the compiler generate normal code,
19915 probably a memcmp call.
19917 OPERANDS[0] is the target (result).
19918 OPERANDS[1] is the first source.
19919 OPERANDS[2] is the second source.
19920 OPERANDS[3] is the length.
19921 OPERANDS[4] is the alignment. */
19923 expand_block_compare (rtx operands
[])
19925 rtx target
= operands
[0];
19926 rtx orig_src1
= operands
[1];
19927 rtx orig_src2
= operands
[2];
19928 rtx bytes_rtx
= operands
[3];
19929 rtx align_rtx
= operands
[4];
19930 HOST_WIDE_INT cmp_bytes
= 0;
19931 rtx src1
= orig_src1
;
19932 rtx src2
= orig_src2
;
19934 /* This case is complicated to handle because the subtract
19935 with carry instructions do not generate the 64-bit
19936 carry and so we must emit code to calculate it ourselves.
19937 We choose not to implement this yet. */
19938 if (TARGET_32BIT
&& TARGET_POWERPC64
)
19941 /* If this is not a fixed size compare, just call memcmp. */
19942 if (!CONST_INT_P (bytes_rtx
))
19945 /* This must be a fixed size alignment. */
19946 if (!CONST_INT_P (align_rtx
))
19949 unsigned int base_align
= UINTVAL (align_rtx
) / BITS_PER_UNIT
;
19951 /* SLOW_UNALIGNED_ACCESS -- don't do unaligned stuff. */
19952 if (SLOW_UNALIGNED_ACCESS (word_mode
, MEM_ALIGN (orig_src1
))
19953 || SLOW_UNALIGNED_ACCESS (word_mode
, MEM_ALIGN (orig_src2
)))
19956 gcc_assert (GET_MODE (target
) == SImode
);
19958 /* Anything to move? */
19959 unsigned HOST_WIDE_INT bytes
= UINTVAL (bytes_rtx
);
19963 /* The code generated for p7 and older is not faster than glibc
19964 memcmp if alignment is small and length is not short, so bail
19965 out to avoid those conditions. */
19966 if (!TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
19967 && ((base_align
== 1 && bytes
> 16)
19968 || (base_align
== 2 && bytes
> 32)))
19971 rtx tmp_reg_src1
= gen_reg_rtx (word_mode
);
19972 rtx tmp_reg_src2
= gen_reg_rtx (word_mode
);
19973 /* P7/P8 code uses cond for subfc. but P9 uses
19974 it for cmpld which needs CCUNSmode. */
19976 if (TARGET_P9_MISC
)
19977 cond
= gen_reg_rtx (CCUNSmode
);
19979 cond
= gen_reg_rtx (CCmode
);
19981 /* If we have an LE target without ldbrx and word_mode is DImode,
19982 then we must avoid using word_mode. */
19983 int word_mode_ok
= !(!BYTES_BIG_ENDIAN
&& !TARGET_LDBRX
19984 && word_mode
== DImode
);
19986 /* Strategy phase. How many ops will this take and should we expand it? */
19988 unsigned HOST_WIDE_INT offset
= 0;
19989 machine_mode load_mode
=
19990 select_block_compare_mode (offset
, bytes
, base_align
, word_mode_ok
);
19991 unsigned int load_mode_size
= GET_MODE_SIZE (load_mode
);
19993 /* We don't want to generate too much code. */
19994 unsigned HOST_WIDE_INT max_bytes
=
19995 load_mode_size
* (unsigned HOST_WIDE_INT
) rs6000_block_compare_inline_limit
;
19996 if (!IN_RANGE (bytes
, 1, max_bytes
))
19999 bool generate_6432_conversion
= false;
20000 rtx convert_label
= NULL
;
20001 rtx final_label
= NULL
;
20003 /* Example of generated code for 18 bytes aligned 1 byte.
20004 Compiled with -fno-reorder-blocks for clarity.
20022 .L6487: #convert_label
20026 .L6488: #final_label
20029 We start off with DImode for two blocks that jump to the DI->SI conversion
20030 if the difference is found there, then a final block of HImode that skips
20031 the DI->SI conversion. */
20035 unsigned int align
= compute_current_alignment (base_align
, offset
);
20036 if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
)
20037 load_mode
= select_block_compare_mode (offset
, bytes
, align
,
20040 load_mode
= select_block_compare_mode (0, bytes
, align
, word_mode_ok
);
20041 load_mode_size
= GET_MODE_SIZE (load_mode
);
20042 if (bytes
>= load_mode_size
)
20043 cmp_bytes
= load_mode_size
;
20044 else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
)
20046 /* Move this load back so it doesn't go past the end.
20047 P8/P9 can do this efficiently. */
20048 unsigned int extra_bytes
= load_mode_size
- bytes
;
20050 if (extra_bytes
< offset
)
20052 offset
-= extra_bytes
;
20053 cmp_bytes
= load_mode_size
;
20058 /* P7 and earlier can't do the overlapping load trick fast,
20059 so this forces a non-overlapping load and a shift to get
20060 rid of the extra bytes. */
20063 src1
= adjust_address (orig_src1
, load_mode
, offset
);
20064 src2
= adjust_address (orig_src2
, load_mode
, offset
);
20066 if (!REG_P (XEXP (src1
, 0)))
20068 rtx src1_reg
= copy_addr_to_reg (XEXP (src1
, 0));
20069 src1
= replace_equiv_address (src1
, src1_reg
);
20071 set_mem_size (src1
, cmp_bytes
);
20073 if (!REG_P (XEXP (src2
, 0)))
20075 rtx src2_reg
= copy_addr_to_reg (XEXP (src2
, 0));
20076 src2
= replace_equiv_address (src2
, src2_reg
);
20078 set_mem_size (src2
, cmp_bytes
);
20080 do_load_for_compare (tmp_reg_src1
, src1
, load_mode
);
20081 do_load_for_compare (tmp_reg_src2
, src2
, load_mode
);
20083 if (cmp_bytes
< load_mode_size
)
20085 /* Shift unneeded bytes off. */
20086 rtx sh
= GEN_INT (BITS_PER_UNIT
* (load_mode_size
- cmp_bytes
));
20087 if (word_mode
== DImode
)
20089 emit_insn (gen_lshrdi3 (tmp_reg_src1
, tmp_reg_src1
, sh
));
20090 emit_insn (gen_lshrdi3 (tmp_reg_src2
, tmp_reg_src2
, sh
));
20094 emit_insn (gen_lshrsi3 (tmp_reg_src1
, tmp_reg_src1
, sh
));
20095 emit_insn (gen_lshrsi3 (tmp_reg_src2
, tmp_reg_src2
, sh
));
20099 int remain
= bytes
- cmp_bytes
;
20100 if (GET_MODE_SIZE (GET_MODE (target
)) > GET_MODE_SIZE (load_mode
))
20102 /* Target is larger than load size so we don't need to
20103 reduce result size. */
20105 /* We previously did a block that need 64->32 conversion but
20106 the current block does not, so a label is needed to jump
20108 if (generate_6432_conversion
&& !final_label
)
20109 final_label
= gen_label_rtx ();
20113 /* This is not the last block, branch to the end if the result
20114 of this subtract is not zero. */
20116 final_label
= gen_label_rtx ();
20117 rtx fin_ref
= gen_rtx_LABEL_REF (VOIDmode
, final_label
);
20118 rtx tmp
= gen_rtx_MINUS (word_mode
, tmp_reg_src1
, tmp_reg_src2
);
20119 rtx cr
= gen_reg_rtx (CCmode
);
20120 rs6000_emit_dot_insn (tmp_reg_src2
, tmp
, 2, cr
);
20121 emit_insn (gen_movsi (target
,
20122 gen_lowpart (SImode
, tmp_reg_src2
)));
20123 rtx ne_rtx
= gen_rtx_NE (VOIDmode
, cr
, const0_rtx
);
20124 rtx ifelse
= gen_rtx_IF_THEN_ELSE (VOIDmode
, ne_rtx
,
20126 rtx j
= emit_jump_insn (gen_rtx_SET (pc_rtx
, ifelse
));
20127 JUMP_LABEL (j
) = final_label
;
20128 LABEL_NUSES (final_label
) += 1;
20132 if (word_mode
== DImode
)
20134 emit_insn (gen_subdi3 (tmp_reg_src2
, tmp_reg_src1
,
20136 emit_insn (gen_movsi (target
,
20137 gen_lowpart (SImode
, tmp_reg_src2
)));
20140 emit_insn (gen_subsi3 (target
, tmp_reg_src1
, tmp_reg_src2
));
20144 rtx fin_ref
= gen_rtx_LABEL_REF (VOIDmode
, final_label
);
20145 rtx j
= emit_jump_insn (gen_rtx_SET (pc_rtx
, fin_ref
));
20146 JUMP_LABEL(j
) = final_label
;
20147 LABEL_NUSES (final_label
) += 1;
20154 /* Do we need a 64->32 conversion block? We need the 64->32
20155 conversion even if target size == load_mode size because
20156 the subtract generates one extra bit. */
20157 generate_6432_conversion
= true;
20161 if (!convert_label
)
20162 convert_label
= gen_label_rtx ();
20164 /* Compare to zero and branch to convert_label if not zero. */
20165 rtx cvt_ref
= gen_rtx_LABEL_REF (VOIDmode
, convert_label
);
20166 if (TARGET_P9_MISC
)
20168 /* Generate a compare, and convert with a setb later. */
20169 rtx cmp
= gen_rtx_COMPARE (CCUNSmode
, tmp_reg_src1
,
20171 emit_insn (gen_rtx_SET (cond
, cmp
));
20174 /* Generate a subfc. and use the longer
20175 sequence for conversion. */
20177 emit_insn (gen_subfdi3_carry_dot2 (tmp_reg_src2
, tmp_reg_src2
,
20178 tmp_reg_src1
, cond
));
20180 emit_insn (gen_subfsi3_carry_dot2 (tmp_reg_src2
, tmp_reg_src2
,
20181 tmp_reg_src1
, cond
));
20182 rtx ne_rtx
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
20183 rtx ifelse
= gen_rtx_IF_THEN_ELSE (VOIDmode
, ne_rtx
,
20185 rtx j
= emit_jump_insn (gen_rtx_SET (pc_rtx
, ifelse
));
20186 JUMP_LABEL(j
) = convert_label
;
20187 LABEL_NUSES (convert_label
) += 1;
20191 /* Just do the subtract/compare. Since this is the last block
20192 the convert code will be generated immediately following. */
20193 if (TARGET_P9_MISC
)
20195 rtx cmp
= gen_rtx_COMPARE (CCUNSmode
, tmp_reg_src1
,
20197 emit_insn (gen_rtx_SET (cond
, cmp
));
20201 emit_insn (gen_subfdi3_carry (tmp_reg_src2
, tmp_reg_src2
,
20204 emit_insn (gen_subfsi3_carry (tmp_reg_src2
, tmp_reg_src2
,
20209 offset
+= cmp_bytes
;
20210 bytes
-= cmp_bytes
;
20213 if (generate_6432_conversion
)
20216 emit_label (convert_label
);
20218 /* We need to produce DI result from sub, then convert to target SI
20219 while maintaining <0 / ==0 / >0 properties. This sequence works:
20225 This is an alternate one Segher cooked up if somebody
20226 wants to expand this for something that doesn't have popcntd:
20233 And finally, p9 can just do this:
20237 if (TARGET_P9_MISC
)
20239 emit_insn (gen_setb_unsigned (target
, cond
));
20245 rtx tmp_reg_ca
= gen_reg_rtx (DImode
);
20246 emit_insn (gen_subfdi3_carry_in_xx (tmp_reg_ca
));
20247 emit_insn (gen_popcntddi2 (tmp_reg_src2
, tmp_reg_src2
));
20248 emit_insn (gen_iordi3 (tmp_reg_src2
, tmp_reg_src2
, tmp_reg_ca
));
20249 emit_insn (gen_movsi (target
, gen_lowpart (SImode
, tmp_reg_src2
)));
20253 rtx tmp_reg_ca
= gen_reg_rtx (SImode
);
20254 emit_insn (gen_subfsi3_carry_in_xx (tmp_reg_ca
));
20255 emit_insn (gen_popcntdsi2 (tmp_reg_src2
, tmp_reg_src2
));
20256 emit_insn (gen_iorsi3 (target
, tmp_reg_src2
, tmp_reg_ca
));
20262 emit_label (final_label
);
20264 gcc_assert (bytes
== 0);
20268 /* Generate alignment check and branch code to set up for
20269 strncmp when we don't have DI alignment.
20270 STRNCMP_LABEL is the label to branch if there is a page crossing.
20271 SRC is the string pointer to be examined.
20272 BYTES is the max number of bytes to compare. */
20274 expand_strncmp_align_check (rtx strncmp_label
, rtx src
, HOST_WIDE_INT bytes
)
20276 rtx lab_ref
= gen_rtx_LABEL_REF (VOIDmode
, strncmp_label
);
20277 rtx src_check
= copy_addr_to_reg (XEXP (src
, 0));
20278 if (GET_MODE (src_check
) == SImode
)
20279 emit_insn (gen_andsi3 (src_check
, src_check
, GEN_INT (0xfff)));
20281 emit_insn (gen_anddi3 (src_check
, src_check
, GEN_INT (0xfff)));
20282 rtx cond
= gen_reg_rtx (CCmode
);
20283 emit_move_insn (cond
, gen_rtx_COMPARE (CCmode
, src_check
,
20284 GEN_INT (4096 - bytes
)));
20286 rtx cmp_rtx
= gen_rtx_LT (VOIDmode
, cond
, const0_rtx
);
20288 rtx ifelse
= gen_rtx_IF_THEN_ELSE (VOIDmode
, cmp_rtx
,
20290 rtx j
= emit_jump_insn (gen_rtx_SET (pc_rtx
, ifelse
));
20291 JUMP_LABEL (j
) = strncmp_label
;
20292 LABEL_NUSES (strncmp_label
) += 1;
20295 /* Expand a string compare operation with length, and return
20296 true if successful. Return false if we should let the
20297 compiler generate normal code, probably a strncmp call.
20299 OPERANDS[0] is the target (result).
20300 OPERANDS[1] is the first source.
20301 OPERANDS[2] is the second source.
20302 If NO_LENGTH is zero, then:
20303 OPERANDS[3] is the length.
20304 OPERANDS[4] is the alignment in bytes.
20305 If NO_LENGTH is nonzero, then:
20306 OPERANDS[3] is the alignment in bytes. */
20308 expand_strn_compare (rtx operands
[], int no_length
)
20310 rtx target
= operands
[0];
20311 rtx orig_src1
= operands
[1];
20312 rtx orig_src2
= operands
[2];
20313 rtx bytes_rtx
, align_rtx
;
20317 align_rtx
= operands
[3];
20321 bytes_rtx
= operands
[3];
20322 align_rtx
= operands
[4];
20324 unsigned HOST_WIDE_INT cmp_bytes
= 0;
20325 rtx src1
= orig_src1
;
20326 rtx src2
= orig_src2
;
20328 /* If we have a length, it must be constant. This simplifies things
20329 a bit as we don't have to generate code to check if we've exceeded
20330 the length. Later this could be expanded to handle this case. */
20331 if (!no_length
&& !CONST_INT_P (bytes_rtx
))
20334 /* This must be a fixed size alignment. */
20335 if (!CONST_INT_P (align_rtx
))
20338 unsigned int base_align
= UINTVAL (align_rtx
);
20339 int align1
= MEM_ALIGN (orig_src1
) / BITS_PER_UNIT
;
20340 int align2
= MEM_ALIGN (orig_src2
) / BITS_PER_UNIT
;
20342 /* SLOW_UNALIGNED_ACCESS -- don't do unaligned stuff. */
20343 if (SLOW_UNALIGNED_ACCESS (word_mode
, align1
)
20344 || SLOW_UNALIGNED_ACCESS (word_mode
, align2
))
20347 gcc_assert (GET_MODE (target
) == SImode
);
20349 /* If we have an LE target without ldbrx and word_mode is DImode,
20350 then we must avoid using word_mode. */
20351 int word_mode_ok
= !(!BYTES_BIG_ENDIAN
&& !TARGET_LDBRX
20352 && word_mode
== DImode
);
20354 unsigned int word_mode_size
= GET_MODE_SIZE (word_mode
);
20356 unsigned HOST_WIDE_INT offset
= 0;
20357 unsigned HOST_WIDE_INT bytes
; /* N from the strncmp args if available. */
20358 unsigned HOST_WIDE_INT compare_length
; /* How much to compare inline. */
20360 /* Use this as a standin to determine the mode to use. */
20361 bytes
= rs6000_string_compare_inline_limit
* word_mode_size
;
20363 bytes
= UINTVAL (bytes_rtx
);
20365 machine_mode load_mode
=
20366 select_block_compare_mode (offset
, bytes
, base_align
, word_mode_ok
);
20367 unsigned int load_mode_size
= GET_MODE_SIZE (load_mode
);
20368 compare_length
= rs6000_string_compare_inline_limit
* load_mode_size
;
20370 /* If we have equality at the end of the last compare and we have not
20371 found the end of the string, we need to call strcmp/strncmp to
20372 compare the remainder. */
20373 bool equality_compare_rest
= false;
20377 bytes
= compare_length
;
20378 equality_compare_rest
= true;
20382 if (bytes
<= compare_length
)
20383 compare_length
= bytes
;
20385 equality_compare_rest
= true;
20388 rtx result_reg
= gen_reg_rtx (word_mode
);
20389 rtx final_move_label
= gen_label_rtx ();
20390 rtx final_label
= gen_label_rtx ();
20391 rtx begin_compare_label
= NULL
;
20393 if (base_align
< 8)
20395 /* Generate code that checks distance to 4k boundary for this case. */
20396 begin_compare_label
= gen_label_rtx ();
20397 rtx strncmp_label
= gen_label_rtx ();
20400 /* Strncmp for power8 in glibc does this:
20402 cmpldi cr7,r8,4096-16
20403 bgt cr7,L(pagecross) */
20405 /* Make sure that the length we use for the alignment test and
20406 the subsequent code generation are in agreement so we do not
20407 go past the length we tested for a 4k boundary crossing. */
20408 unsigned HOST_WIDE_INT align_test
= compare_length
;
20409 if (align_test
< 8)
20411 align_test
= HOST_WIDE_INT_1U
<< ceil_log2 (align_test
);
20412 base_align
= align_test
;
20416 align_test
= ROUND_UP (align_test
, 8);
20421 expand_strncmp_align_check (strncmp_label
, src1
, align_test
);
20423 expand_strncmp_align_check (strncmp_label
, src2
, align_test
);
20425 /* Now generate the following sequence:
20426 - branch to begin_compare
20429 - branch to final_label
20430 - begin_compare_label */
20432 rtx cmp_ref
= gen_rtx_LABEL_REF (VOIDmode
, begin_compare_label
);
20433 jmp
= emit_jump_insn (gen_rtx_SET (pc_rtx
, cmp_ref
));
20434 JUMP_LABEL (jmp
) = begin_compare_label
;
20435 LABEL_NUSES (begin_compare_label
) += 1;
20438 emit_label (strncmp_label
);
20440 if (!REG_P (XEXP (src1
, 0)))
20442 rtx src1_reg
= copy_addr_to_reg (XEXP (src1
, 0));
20443 src1
= replace_equiv_address (src1
, src1_reg
);
20446 if (!REG_P (XEXP (src2
, 0)))
20448 rtx src2_reg
= copy_addr_to_reg (XEXP (src2
, 0));
20449 src2
= replace_equiv_address (src2
, src2_reg
);
20454 tree fun
= builtin_decl_explicit (BUILT_IN_STRCMP
);
20455 emit_library_call_value (XEXP (DECL_RTL (fun
), 0),
20456 target
, LCT_NORMAL
, GET_MODE (target
),
20457 force_reg (Pmode
, XEXP (src1
, 0)), Pmode
,
20458 force_reg (Pmode
, XEXP (src2
, 0)), Pmode
);
20462 /* -m32 -mpowerpc64 results in word_mode being DImode even
20463 though otherwise it is 32-bit. The length arg to strncmp
20464 is a size_t which will be the same size as pointers. */
20467 len_rtx
= gen_reg_rtx (DImode
);
20469 len_rtx
= gen_reg_rtx (SImode
);
20471 emit_move_insn (len_rtx
, bytes_rtx
);
20473 tree fun
= builtin_decl_explicit (BUILT_IN_STRNCMP
);
20474 emit_library_call_value (XEXP (DECL_RTL (fun
), 0),
20475 target
, LCT_NORMAL
, GET_MODE (target
),
20476 force_reg (Pmode
, XEXP (src1
, 0)), Pmode
,
20477 force_reg (Pmode
, XEXP (src2
, 0)), Pmode
,
20478 len_rtx
, GET_MODE (len_rtx
));
20481 rtx fin_ref
= gen_rtx_LABEL_REF (VOIDmode
, final_label
);
20482 jmp
= emit_jump_insn (gen_rtx_SET (pc_rtx
, fin_ref
));
20483 JUMP_LABEL (jmp
) = final_label
;
20484 LABEL_NUSES (final_label
) += 1;
20486 emit_label (begin_compare_label
);
20489 rtx cleanup_label
= NULL
;
20490 rtx tmp_reg_src1
= gen_reg_rtx (word_mode
);
20491 rtx tmp_reg_src2
= gen_reg_rtx (word_mode
);
20493 /* Generate sequence of ld/ldbrx, cmpb to compare out
20494 to the length specified. */
20495 unsigned HOST_WIDE_INT bytes_to_compare
= compare_length
;
20496 while (bytes_to_compare
> 0)
20498 /* Compare sequence:
20499 check each 8B with: ld/ld cmpd bne
20500 If equal, use rldicr/cmpb to check for zero byte.
20501 cleanup code at end:
20502 cmpb get byte that differs
20503 cmpb look for zero byte
20505 cntlzd get bit of first zero/diff byte
20506 subfic convert for rldcl use
20507 rldcl rldcl extract diff/zero byte
20508 subf subtract for final result
20510 The last compare can branch around the cleanup code if the
20511 result is zero because the strings are exactly equal. */
20512 unsigned int align
= compute_current_alignment (base_align
, offset
);
20513 if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
)
20514 load_mode
= select_block_compare_mode (offset
, bytes_to_compare
, align
,
20517 load_mode
= select_block_compare_mode (0, bytes_to_compare
, align
,
20519 load_mode_size
= GET_MODE_SIZE (load_mode
);
20520 if (bytes_to_compare
>= load_mode_size
)
20521 cmp_bytes
= load_mode_size
;
20522 else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
)
20524 /* Move this load back so it doesn't go past the end.
20525 P8/P9 can do this efficiently. */
20526 unsigned int extra_bytes
= load_mode_size
- bytes_to_compare
;
20527 cmp_bytes
= bytes_to_compare
;
20528 if (extra_bytes
< offset
)
20530 offset
-= extra_bytes
;
20531 cmp_bytes
= load_mode_size
;
20532 bytes_to_compare
= cmp_bytes
;
20536 /* P7 and earlier can't do the overlapping load trick fast,
20537 so this forces a non-overlapping load and a shift to get
20538 rid of the extra bytes. */
20539 cmp_bytes
= bytes_to_compare
;
20541 src1
= adjust_address (orig_src1
, load_mode
, offset
);
20542 src2
= adjust_address (orig_src2
, load_mode
, offset
);
20544 if (!REG_P (XEXP (src1
, 0)))
20546 rtx src1_reg
= copy_addr_to_reg (XEXP (src1
, 0));
20547 src1
= replace_equiv_address (src1
, src1_reg
);
20549 set_mem_size (src1
, cmp_bytes
);
20551 if (!REG_P (XEXP (src2
, 0)))
20553 rtx src2_reg
= copy_addr_to_reg (XEXP (src2
, 0));
20554 src2
= replace_equiv_address (src2
, src2_reg
);
20556 set_mem_size (src2
, cmp_bytes
);
20558 do_load_for_compare (tmp_reg_src1
, src1
, load_mode
);
20559 do_load_for_compare (tmp_reg_src2
, src2
, load_mode
);
20561 /* We must always left-align the data we read, and
20562 clear any bytes to the right that are beyond the string.
20563 Otherwise the cmpb sequence won't produce the correct
20564 results. The beginning of the compare will be done
20565 with word_mode so will not have any extra shifts or
20568 if (load_mode_size
< word_mode_size
)
20570 /* Rotate left first. */
20571 rtx sh
= GEN_INT (BITS_PER_UNIT
* (word_mode_size
- load_mode_size
));
20572 if (word_mode
== DImode
)
20574 emit_insn (gen_rotldi3 (tmp_reg_src1
, tmp_reg_src1
, sh
));
20575 emit_insn (gen_rotldi3 (tmp_reg_src2
, tmp_reg_src2
, sh
));
20579 emit_insn (gen_rotlsi3 (tmp_reg_src1
, tmp_reg_src1
, sh
));
20580 emit_insn (gen_rotlsi3 (tmp_reg_src2
, tmp_reg_src2
, sh
));
20584 if (cmp_bytes
< word_mode_size
)
20586 /* Now clear right. This plus the rotate can be
20587 turned into a rldicr instruction. */
20588 HOST_WIDE_INT mb
= BITS_PER_UNIT
* (word_mode_size
- cmp_bytes
);
20589 rtx mask
= GEN_INT (HOST_WIDE_INT_M1U
<< mb
);
20590 if (word_mode
== DImode
)
20592 emit_insn (gen_anddi3_mask (tmp_reg_src1
, tmp_reg_src1
, mask
));
20593 emit_insn (gen_anddi3_mask (tmp_reg_src2
, tmp_reg_src2
, mask
));
20597 emit_insn (gen_andsi3_mask (tmp_reg_src1
, tmp_reg_src1
, mask
));
20598 emit_insn (gen_andsi3_mask (tmp_reg_src2
, tmp_reg_src2
, mask
));
20602 /* Cases to handle. A and B are chunks of the two strings.
20603 1: Not end of comparison:
20604 A != B: branch to cleanup code to compute result.
20605 A == B: check for 0 byte, next block if not found.
20606 2: End of the inline comparison:
20607 A != B: branch to cleanup code to compute result.
20608 A == B: check for 0 byte, call strcmp/strncmp
20609 3: compared requested N bytes:
20610 A == B: branch to result 0.
20611 A != B: cleanup code to compute result. */
20613 unsigned HOST_WIDE_INT remain
= bytes_to_compare
- cmp_bytes
;
20616 if (remain
> 0 || equality_compare_rest
)
20618 /* Branch to cleanup code, otherwise fall through to do
20620 if (!cleanup_label
)
20621 cleanup_label
= gen_label_rtx ();
20622 dst_label
= cleanup_label
;
20625 /* Branch to end and produce result of 0. */
20626 dst_label
= final_move_label
;
20628 rtx lab_ref
= gen_rtx_LABEL_REF (VOIDmode
, dst_label
);
20629 rtx cond
= gen_reg_rtx (CCmode
);
20631 /* Always produce the 0 result, it is needed if
20632 cmpb finds a 0 byte in this chunk. */
20633 rtx tmp
= gen_rtx_MINUS (word_mode
, tmp_reg_src1
, tmp_reg_src2
);
20634 rs6000_emit_dot_insn (result_reg
, tmp
, 1, cond
);
20637 if (remain
== 0 && !equality_compare_rest
)
20638 cmp_rtx
= gen_rtx_EQ (VOIDmode
, cond
, const0_rtx
);
20640 cmp_rtx
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
20642 rtx ifelse
= gen_rtx_IF_THEN_ELSE (VOIDmode
, cmp_rtx
,
20644 rtx j
= emit_jump_insn (gen_rtx_SET (pc_rtx
, ifelse
));
20645 JUMP_LABEL (j
) = dst_label
;
20646 LABEL_NUSES (dst_label
) += 1;
20648 if (remain
> 0 || equality_compare_rest
)
20650 /* Generate a cmpb to test for a 0 byte and branch
20651 to final result if found. */
20652 rtx cmpb_zero
= gen_reg_rtx (word_mode
);
20653 rtx lab_ref_fin
= gen_rtx_LABEL_REF (VOIDmode
, final_move_label
);
20654 rtx condz
= gen_reg_rtx (CCmode
);
20655 rtx zero_reg
= gen_reg_rtx (word_mode
);
20656 if (word_mode
== SImode
)
20658 emit_insn (gen_movsi (zero_reg
, GEN_INT (0)));
20659 emit_insn (gen_cmpbsi3 (cmpb_zero
, tmp_reg_src1
, zero_reg
));
20660 if (cmp_bytes
< word_mode_size
)
20662 /* Don't want to look at zero bytes past end. */
20664 BITS_PER_UNIT
* (word_mode_size
- cmp_bytes
);
20665 rtx mask
= GEN_INT (HOST_WIDE_INT_M1U
<< mb
);
20666 emit_insn (gen_andsi3_mask (cmpb_zero
, cmpb_zero
, mask
));
20671 emit_insn (gen_movdi (zero_reg
, GEN_INT (0)));
20672 emit_insn (gen_cmpbdi3 (cmpb_zero
, tmp_reg_src1
, zero_reg
));
20673 if (cmp_bytes
< word_mode_size
)
20675 /* Don't want to look at zero bytes past end. */
20677 BITS_PER_UNIT
* (word_mode_size
- cmp_bytes
);
20678 rtx mask
= GEN_INT (HOST_WIDE_INT_M1U
<< mb
);
20679 emit_insn (gen_anddi3_mask (cmpb_zero
, cmpb_zero
, mask
));
20683 emit_move_insn (condz
, gen_rtx_COMPARE (CCmode
, cmpb_zero
, zero_reg
));
20684 rtx cmpnz_rtx
= gen_rtx_NE (VOIDmode
, condz
, const0_rtx
);
20685 rtx ifelse
= gen_rtx_IF_THEN_ELSE (VOIDmode
, cmpnz_rtx
,
20686 lab_ref_fin
, pc_rtx
);
20687 rtx j2
= emit_jump_insn (gen_rtx_SET (pc_rtx
, ifelse
));
20688 JUMP_LABEL (j2
) = final_move_label
;
20689 LABEL_NUSES (final_move_label
) += 1;
20693 offset
+= cmp_bytes
;
20694 bytes_to_compare
-= cmp_bytes
;
20697 if (equality_compare_rest
)
20699 /* Update pointers past what has been compared already. */
20700 src1
= adjust_address (orig_src1
, load_mode
, offset
);
20701 src2
= adjust_address (orig_src2
, load_mode
, offset
);
20703 if (!REG_P (XEXP (src1
, 0)))
20705 rtx src1_reg
= copy_addr_to_reg (XEXP (src1
, 0));
20706 src1
= replace_equiv_address (src1
, src1_reg
);
20708 set_mem_size (src1
, cmp_bytes
);
20710 if (!REG_P (XEXP (src2
, 0)))
20712 rtx src2_reg
= copy_addr_to_reg (XEXP (src2
, 0));
20713 src2
= replace_equiv_address (src2
, src2_reg
);
20715 set_mem_size (src2
, cmp_bytes
);
20717 /* Construct call to strcmp/strncmp to compare the rest of the string. */
20720 tree fun
= builtin_decl_explicit (BUILT_IN_STRCMP
);
20721 emit_library_call_value (XEXP (DECL_RTL (fun
), 0),
20722 target
, LCT_NORMAL
, GET_MODE (target
),
20723 force_reg (Pmode
, XEXP (src1
, 0)), Pmode
,
20724 force_reg (Pmode
, XEXP (src2
, 0)), Pmode
);
20730 len_rtx
= gen_reg_rtx (DImode
);
20732 len_rtx
= gen_reg_rtx (SImode
);
20734 emit_move_insn (len_rtx
, GEN_INT (bytes
- compare_length
));
20735 tree fun
= builtin_decl_explicit (BUILT_IN_STRNCMP
);
20736 emit_library_call_value (XEXP (DECL_RTL (fun
), 0),
20737 target
, LCT_NORMAL
, GET_MODE (target
),
20738 force_reg (Pmode
, XEXP (src1
, 0)), Pmode
,
20739 force_reg (Pmode
, XEXP (src2
, 0)), Pmode
,
20740 len_rtx
, GET_MODE (len_rtx
));
20743 rtx fin_ref
= gen_rtx_LABEL_REF (VOIDmode
, final_label
);
20744 rtx jmp
= emit_jump_insn (gen_rtx_SET (pc_rtx
, fin_ref
));
20745 JUMP_LABEL (jmp
) = final_label
;
20746 LABEL_NUSES (final_label
) += 1;
20751 emit_label (cleanup_label
);
20753 /* Generate the final sequence that identifies the differing
20754 byte and generates the final result, taking into account
20757 cmpb cmpb_result1, src1, src2
20758 cmpb cmpb_result2, src1, zero
20759 orc cmpb_result1, cmp_result1, cmpb_result2
20760 cntlzd get bit of first zero/diff byte
20761 addi convert for rldcl use
20762 rldcl rldcl extract diff/zero byte
20763 subf subtract for final result
20766 rtx cmpb_diff
= gen_reg_rtx (word_mode
);
20767 rtx cmpb_zero
= gen_reg_rtx (word_mode
);
20768 rtx rot_amt
= gen_reg_rtx (word_mode
);
20769 rtx zero_reg
= gen_reg_rtx (word_mode
);
20771 rtx rot1_1
= gen_reg_rtx (word_mode
);
20772 rtx rot1_2
= gen_reg_rtx (word_mode
);
20773 rtx rot2_1
= gen_reg_rtx (word_mode
);
20774 rtx rot2_2
= gen_reg_rtx (word_mode
);
20776 if (word_mode
== SImode
)
20778 emit_insn (gen_cmpbsi3 (cmpb_diff
, tmp_reg_src1
, tmp_reg_src2
));
20779 emit_insn (gen_movsi (zero_reg
, GEN_INT (0)));
20780 emit_insn (gen_cmpbsi3 (cmpb_zero
, tmp_reg_src1
, zero_reg
));
20781 emit_insn (gen_one_cmplsi2 (cmpb_diff
,cmpb_diff
));
20782 emit_insn (gen_iorsi3 (cmpb_diff
, cmpb_diff
, cmpb_zero
));
20783 emit_insn (gen_clzsi2 (rot_amt
, cmpb_diff
));
20784 emit_insn (gen_addsi3 (rot_amt
, rot_amt
, GEN_INT (8)));
20785 emit_insn (gen_rotlsi3 (rot1_1
, tmp_reg_src1
,
20786 gen_lowpart (SImode
, rot_amt
)));
20787 emit_insn (gen_andsi3_mask (rot1_2
, rot1_1
, GEN_INT (0xff)));
20788 emit_insn (gen_rotlsi3 (rot2_1
, tmp_reg_src2
,
20789 gen_lowpart (SImode
, rot_amt
)));
20790 emit_insn (gen_andsi3_mask (rot2_2
, rot2_1
, GEN_INT (0xff)));
20791 emit_insn (gen_subsi3 (result_reg
, rot1_2
, rot2_2
));
20795 emit_insn (gen_cmpbdi3 (cmpb_diff
, tmp_reg_src1
, tmp_reg_src2
));
20796 emit_insn (gen_movdi (zero_reg
, GEN_INT (0)));
20797 emit_insn (gen_cmpbdi3 (cmpb_zero
, tmp_reg_src1
, zero_reg
));
20798 emit_insn (gen_one_cmpldi2 (cmpb_diff
,cmpb_diff
));
20799 emit_insn (gen_iordi3 (cmpb_diff
, cmpb_diff
, cmpb_zero
));
20800 emit_insn (gen_clzdi2 (rot_amt
, cmpb_diff
));
20801 emit_insn (gen_adddi3 (rot_amt
, rot_amt
, GEN_INT (8)));
20802 emit_insn (gen_rotldi3 (rot1_1
, tmp_reg_src1
,
20803 gen_lowpart (SImode
, rot_amt
)));
20804 emit_insn (gen_anddi3_mask (rot1_2
, rot1_1
, GEN_INT (0xff)));
20805 emit_insn (gen_rotldi3 (rot2_1
, tmp_reg_src2
,
20806 gen_lowpart (SImode
, rot_amt
)));
20807 emit_insn (gen_anddi3_mask (rot2_2
, rot2_1
, GEN_INT (0xff)));
20808 emit_insn (gen_subdi3 (result_reg
, rot1_2
, rot2_2
));
20811 emit_label (final_move_label
);
20812 emit_insn (gen_movsi (target
,
20813 gen_lowpart (SImode
, result_reg
)));
20814 emit_label (final_label
);
20818 /* Expand a block move operation, and return 1 if successful. Return 0
20819 if we should let the compiler generate normal code.
20821 operands[0] is the destination
20822 operands[1] is the source
20823 operands[2] is the length
20824 operands[3] is the alignment */
20826 #define MAX_MOVE_REG 4
20829 expand_block_move (rtx operands
[])
20831 rtx orig_dest
= operands
[0];
20832 rtx orig_src
= operands
[1];
20833 rtx bytes_rtx
= operands
[2];
20834 rtx align_rtx
= operands
[3];
20835 int constp
= (GET_CODE (bytes_rtx
) == CONST_INT
);
20840 rtx stores
[MAX_MOVE_REG
];
20843 /* If this is not a fixed size move, just call memcpy */
20847 /* This must be a fixed size alignment */
20848 gcc_assert (GET_CODE (align_rtx
) == CONST_INT
);
20849 align
= INTVAL (align_rtx
) * BITS_PER_UNIT
;
20851 /* Anything to move? */
20852 bytes
= INTVAL (bytes_rtx
);
20856 if (bytes
> rs6000_block_move_inline_limit
)
20859 for (offset
= 0; bytes
> 0; offset
+= move_bytes
, bytes
-= move_bytes
)
20862 rtx (*movmemsi
) (rtx
, rtx
, rtx
, rtx
);
20863 rtx (*mov
) (rtx
, rtx
);
20865 machine_mode mode
= BLKmode
;
20868 /* Altivec first, since it will be faster than a string move
20869 when it applies, and usually not significantly larger. */
20870 if (TARGET_ALTIVEC
&& bytes
>= 16 && align
>= 128)
20874 gen_func
.mov
= gen_movv4si
;
20876 else if (TARGET_SPE
&& bytes
>= 8 && align
>= 64)
20880 gen_func
.mov
= gen_movv2si
;
20882 else if (TARGET_STRING
20883 && bytes
> 24 /* move up to 32 bytes at a time */
20889 && ! fixed_regs
[10]
20890 && ! fixed_regs
[11]
20891 && ! fixed_regs
[12])
20893 move_bytes
= (bytes
> 32) ? 32 : bytes
;
20894 gen_func
.movmemsi
= gen_movmemsi_8reg
;
20896 else if (TARGET_STRING
20897 && bytes
> 16 /* move up to 24 bytes at a time */
20903 && ! fixed_regs
[10])
20905 move_bytes
= (bytes
> 24) ? 24 : bytes
;
20906 gen_func
.movmemsi
= gen_movmemsi_6reg
;
20908 else if (TARGET_STRING
20909 && bytes
> 8 /* move up to 16 bytes at a time */
20913 && ! fixed_regs
[8])
20915 move_bytes
= (bytes
> 16) ? 16 : bytes
;
20916 gen_func
.movmemsi
= gen_movmemsi_4reg
;
20918 else if (bytes
>= 8 && TARGET_POWERPC64
20919 && (align
>= 64 || !STRICT_ALIGNMENT
))
20923 gen_func
.mov
= gen_movdi
;
20924 if (offset
== 0 && align
< 64)
20928 /* If the address form is reg+offset with offset not a
20929 multiple of four, reload into reg indirect form here
20930 rather than waiting for reload. This way we get one
20931 reload, not one per load and/or store. */
20932 addr
= XEXP (orig_dest
, 0);
20933 if ((GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
)
20934 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
20935 && (INTVAL (XEXP (addr
, 1)) & 3) != 0)
20937 addr
= copy_addr_to_reg (addr
);
20938 orig_dest
= replace_equiv_address (orig_dest
, addr
);
20940 addr
= XEXP (orig_src
, 0);
20941 if ((GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
)
20942 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
20943 && (INTVAL (XEXP (addr
, 1)) & 3) != 0)
20945 addr
= copy_addr_to_reg (addr
);
20946 orig_src
= replace_equiv_address (orig_src
, addr
);
20950 else if (TARGET_STRING
&& bytes
> 4 && !TARGET_POWERPC64
)
20951 { /* move up to 8 bytes at a time */
20952 move_bytes
= (bytes
> 8) ? 8 : bytes
;
20953 gen_func
.movmemsi
= gen_movmemsi_2reg
;
20955 else if (bytes
>= 4 && (align
>= 32 || !STRICT_ALIGNMENT
))
20956 { /* move 4 bytes */
20959 gen_func
.mov
= gen_movsi
;
20961 else if (bytes
>= 2 && (align
>= 16 || !STRICT_ALIGNMENT
))
20962 { /* move 2 bytes */
20965 gen_func
.mov
= gen_movhi
;
20967 else if (TARGET_STRING
&& bytes
> 1)
20968 { /* move up to 4 bytes at a time */
20969 move_bytes
= (bytes
> 4) ? 4 : bytes
;
20970 gen_func
.movmemsi
= gen_movmemsi_1reg
;
20972 else /* move 1 byte at a time */
20976 gen_func
.mov
= gen_movqi
;
20979 src
= adjust_address (orig_src
, mode
, offset
);
20980 dest
= adjust_address (orig_dest
, mode
, offset
);
20982 if (mode
!= BLKmode
)
20984 rtx tmp_reg
= gen_reg_rtx (mode
);
20986 emit_insn ((*gen_func
.mov
) (tmp_reg
, src
));
20987 stores
[num_reg
++] = (*gen_func
.mov
) (dest
, tmp_reg
);
20990 if (mode
== BLKmode
|| num_reg
>= MAX_MOVE_REG
|| bytes
== move_bytes
)
20993 for (i
= 0; i
< num_reg
; i
++)
20994 emit_insn (stores
[i
]);
20998 if (mode
== BLKmode
)
21000 /* Move the address into scratch registers. The movmemsi
21001 patterns require zero offset. */
21002 if (!REG_P (XEXP (src
, 0)))
21004 rtx src_reg
= copy_addr_to_reg (XEXP (src
, 0));
21005 src
= replace_equiv_address (src
, src_reg
);
21007 set_mem_size (src
, move_bytes
);
21009 if (!REG_P (XEXP (dest
, 0)))
21011 rtx dest_reg
= copy_addr_to_reg (XEXP (dest
, 0));
21012 dest
= replace_equiv_address (dest
, dest_reg
);
21014 set_mem_size (dest
, move_bytes
);
21016 emit_insn ((*gen_func
.movmemsi
) (dest
, src
,
21017 GEN_INT (move_bytes
& 31),
21026 /* Return a string to perform a load_multiple operation.
21027 operands[0] is the vector.
21028 operands[1] is the source address.
21029 operands[2] is the first destination register. */
21032 rs6000_output_load_multiple (rtx operands
[3])
21034 /* We have to handle the case where the pseudo used to contain the address
21035 is assigned to one of the output registers. */
21037 int words
= XVECLEN (operands
[0], 0);
21040 if (XVECLEN (operands
[0], 0) == 1)
21041 return "lwz %2,0(%1)";
21043 for (i
= 0; i
< words
; i
++)
21044 if (refers_to_regno_p (REGNO (operands
[2]) + i
, operands
[1]))
21048 xop
[0] = GEN_INT (4 * (words
-1));
21049 xop
[1] = operands
[1];
21050 xop
[2] = operands
[2];
21051 output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop
);
21056 xop
[0] = GEN_INT (4 * (words
-1));
21057 xop
[1] = operands
[1];
21058 xop
[2] = gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
21059 output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop
);
21064 for (j
= 0; j
< words
; j
++)
21067 xop
[0] = GEN_INT (j
* 4);
21068 xop
[1] = operands
[1];
21069 xop
[2] = gen_rtx_REG (SImode
, REGNO (operands
[2]) + j
);
21070 output_asm_insn ("lwz %2,%0(%1)", xop
);
21072 xop
[0] = GEN_INT (i
* 4);
21073 xop
[1] = operands
[1];
21074 output_asm_insn ("lwz %1,%0(%1)", xop
);
21079 return "lswi %2,%1,%N0";
21083 /* A validation routine: say whether CODE, a condition code, and MODE
21084 match. The other alternatives either don't make sense or should
21085 never be generated. */
21088 validate_condition_mode (enum rtx_code code
, machine_mode mode
)
21090 gcc_assert ((GET_RTX_CLASS (code
) == RTX_COMPARE
21091 || GET_RTX_CLASS (code
) == RTX_COMM_COMPARE
)
21092 && GET_MODE_CLASS (mode
) == MODE_CC
);
21094 /* These don't make sense. */
21095 gcc_assert ((code
!= GT
&& code
!= LT
&& code
!= GE
&& code
!= LE
)
21096 || mode
!= CCUNSmode
);
21098 gcc_assert ((code
!= GTU
&& code
!= LTU
&& code
!= GEU
&& code
!= LEU
)
21099 || mode
== CCUNSmode
);
21101 gcc_assert (mode
== CCFPmode
21102 || (code
!= ORDERED
&& code
!= UNORDERED
21103 && code
!= UNEQ
&& code
!= LTGT
21104 && code
!= UNGT
&& code
!= UNLT
21105 && code
!= UNGE
&& code
!= UNLE
));
21107 /* These should never be generated except for
21108 flag_finite_math_only. */
21109 gcc_assert (mode
!= CCFPmode
21110 || flag_finite_math_only
21111 || (code
!= LE
&& code
!= GE
21112 && code
!= UNEQ
&& code
!= LTGT
21113 && code
!= UNGT
&& code
!= UNLT
));
21115 /* These are invalid; the information is not there. */
21116 gcc_assert (mode
!= CCEQmode
|| code
== EQ
|| code
== NE
);
21120 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
21121 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
21122 not zero, store there the bit offset (counted from the right) where
21123 the single stretch of 1 bits begins; and similarly for B, the bit
21124 offset where it ends. */
21127 rs6000_is_valid_mask (rtx mask
, int *b
, int *e
, machine_mode mode
)
21129 unsigned HOST_WIDE_INT val
= INTVAL (mask
);
21130 unsigned HOST_WIDE_INT bit
;
21132 int n
= GET_MODE_PRECISION (mode
);
21134 if (mode
!= DImode
&& mode
!= SImode
)
21137 if (INTVAL (mask
) >= 0)
21140 ne
= exact_log2 (bit
);
21141 nb
= exact_log2 (val
+ bit
);
21143 else if (val
+ 1 == 0)
21152 nb
= exact_log2 (bit
);
21153 ne
= exact_log2 (val
+ bit
);
21158 ne
= exact_log2 (bit
);
21159 if (val
+ bit
== 0)
21167 if (nb
< 0 || ne
< 0 || nb
>= n
|| ne
>= n
)
21178 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
21179 or rldicr instruction, to implement an AND with it in mode MODE. */
21182 rs6000_is_valid_and_mask (rtx mask
, machine_mode mode
)
21186 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
21189 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
21191 if (mode
== DImode
)
21192 return (ne
== 0 || nb
== 63 || (nb
< 32 && ne
<= nb
));
21194 /* For SImode, rlwinm can do everything. */
21195 if (mode
== SImode
)
21196 return (nb
< 32 && ne
< 32);
21201 /* Return the instruction template for an AND with mask in mode MODE, with
21202 operands OPERANDS. If DOT is true, make it a record-form instruction. */
21205 rs6000_insn_for_and_mask (machine_mode mode
, rtx
*operands
, bool dot
)
21209 if (!rs6000_is_valid_mask (operands
[2], &nb
, &ne
, mode
))
21210 gcc_unreachable ();
21212 if (mode
== DImode
&& ne
== 0)
21214 operands
[3] = GEN_INT (63 - nb
);
21216 return "rldicl. %0,%1,0,%3";
21217 return "rldicl %0,%1,0,%3";
21220 if (mode
== DImode
&& nb
== 63)
21222 operands
[3] = GEN_INT (63 - ne
);
21224 return "rldicr. %0,%1,0,%3";
21225 return "rldicr %0,%1,0,%3";
21228 if (nb
< 32 && ne
< 32)
21230 operands
[3] = GEN_INT (31 - nb
);
21231 operands
[4] = GEN_INT (31 - ne
);
21233 return "rlwinm. %0,%1,0,%3,%4";
21234 return "rlwinm %0,%1,0,%3,%4";
21237 gcc_unreachable ();
21240 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
21241 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
21242 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
21245 rs6000_is_valid_shift_mask (rtx mask
, rtx shift
, machine_mode mode
)
21249 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
21252 int n
= GET_MODE_PRECISION (mode
);
21255 if (CONST_INT_P (XEXP (shift
, 1)))
21257 sh
= INTVAL (XEXP (shift
, 1));
21258 if (sh
< 0 || sh
>= n
)
21262 rtx_code code
= GET_CODE (shift
);
21264 /* Convert any shift by 0 to a rotate, to simplify below code. */
21268 /* Convert rotate to simple shift if we can, to make analysis simpler. */
21269 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& ne
>= sh
)
21271 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& nb
< sh
)
21277 /* DImode rotates need rld*. */
21278 if (mode
== DImode
&& code
== ROTATE
)
21279 return (nb
== 63 || ne
== 0 || ne
== sh
);
21281 /* SImode rotates need rlw*. */
21282 if (mode
== SImode
&& code
== ROTATE
)
21283 return (nb
< 32 && ne
< 32 && sh
< 32);
21285 /* Wrap-around masks are only okay for rotates. */
21289 /* Variable shifts are only okay for rotates. */
21293 /* Don't allow ASHIFT if the mask is wrong for that. */
21294 if (code
== ASHIFT
&& ne
< sh
)
21297 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
21298 if the mask is wrong for that. */
21299 if (nb
< 32 && ne
< 32 && sh
< 32
21300 && !(code
== LSHIFTRT
&& nb
>= 32 - sh
))
21303 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
21304 if the mask is wrong for that. */
21305 if (code
== LSHIFTRT
)
21307 if (nb
== 63 || ne
== 0 || ne
== sh
)
21308 return !(code
== LSHIFTRT
&& nb
>= sh
);
21313 /* Return the instruction template for a shift with mask in mode MODE, with
21314 operands OPERANDS. If DOT is true, make it a record-form instruction. */
21317 rs6000_insn_for_shift_mask (machine_mode mode
, rtx
*operands
, bool dot
)
21321 if (!rs6000_is_valid_mask (operands
[3], &nb
, &ne
, mode
))
21322 gcc_unreachable ();
21324 if (mode
== DImode
&& ne
== 0)
21326 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
21327 operands
[2] = GEN_INT (64 - INTVAL (operands
[2]));
21328 operands
[3] = GEN_INT (63 - nb
);
21330 return "rld%I2cl. %0,%1,%2,%3";
21331 return "rld%I2cl %0,%1,%2,%3";
21334 if (mode
== DImode
&& nb
== 63)
21336 operands
[3] = GEN_INT (63 - ne
);
21338 return "rld%I2cr. %0,%1,%2,%3";
21339 return "rld%I2cr %0,%1,%2,%3";
21343 && GET_CODE (operands
[4]) != LSHIFTRT
21344 && CONST_INT_P (operands
[2])
21345 && ne
== INTVAL (operands
[2]))
21347 operands
[3] = GEN_INT (63 - nb
);
21349 return "rld%I2c. %0,%1,%2,%3";
21350 return "rld%I2c %0,%1,%2,%3";
21353 if (nb
< 32 && ne
< 32)
21355 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
21356 operands
[2] = GEN_INT (32 - INTVAL (operands
[2]));
21357 operands
[3] = GEN_INT (31 - nb
);
21358 operands
[4] = GEN_INT (31 - ne
);
21359 /* This insn can also be a 64-bit rotate with mask that really makes
21360 it just a shift right (with mask); the %h below are to adjust for
21361 that situation (shift count is >= 32 in that case). */
21363 return "rlw%I2nm. %0,%1,%h2,%3,%4";
21364 return "rlw%I2nm %0,%1,%h2,%3,%4";
21367 gcc_unreachable ();
21370 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
21371 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
21372 ASHIFT, or LSHIFTRT) in mode MODE. */
21375 rs6000_is_valid_insert_mask (rtx mask
, rtx shift
, machine_mode mode
)
21379 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
21382 int n
= GET_MODE_PRECISION (mode
);
21384 int sh
= INTVAL (XEXP (shift
, 1));
21385 if (sh
< 0 || sh
>= n
)
21388 rtx_code code
= GET_CODE (shift
);
21390 /* Convert any shift by 0 to a rotate, to simplify below code. */
21394 /* Convert rotate to simple shift if we can, to make analysis simpler. */
21395 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& ne
>= sh
)
21397 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& nb
< sh
)
21403 /* DImode rotates need rldimi. */
21404 if (mode
== DImode
&& code
== ROTATE
)
21407 /* SImode rotates need rlwimi. */
21408 if (mode
== SImode
&& code
== ROTATE
)
21409 return (nb
< 32 && ne
< 32 && sh
< 32);
21411 /* Wrap-around masks are only okay for rotates. */
21415 /* Don't allow ASHIFT if the mask is wrong for that. */
21416 if (code
== ASHIFT
&& ne
< sh
)
21419 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
21420 if the mask is wrong for that. */
21421 if (nb
< 32 && ne
< 32 && sh
< 32
21422 && !(code
== LSHIFTRT
&& nb
>= 32 - sh
))
21425 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
21426 if the mask is wrong for that. */
21427 if (code
== LSHIFTRT
)
21430 return !(code
== LSHIFTRT
&& nb
>= sh
);
21435 /* Return the instruction template for an insert with mask in mode MODE, with
21436 operands OPERANDS. If DOT is true, make it a record-form instruction. */
21439 rs6000_insn_for_insert_mask (machine_mode mode
, rtx
*operands
, bool dot
)
21443 if (!rs6000_is_valid_mask (operands
[3], &nb
, &ne
, mode
))
21444 gcc_unreachable ();
21446 /* Prefer rldimi because rlwimi is cracked. */
21447 if (TARGET_POWERPC64
21448 && (!dot
|| mode
== DImode
)
21449 && GET_CODE (operands
[4]) != LSHIFTRT
21450 && ne
== INTVAL (operands
[2]))
21452 operands
[3] = GEN_INT (63 - nb
);
21454 return "rldimi. %0,%1,%2,%3";
21455 return "rldimi %0,%1,%2,%3";
21458 if (nb
< 32 && ne
< 32)
21460 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
21461 operands
[2] = GEN_INT (32 - INTVAL (operands
[2]));
21462 operands
[3] = GEN_INT (31 - nb
);
21463 operands
[4] = GEN_INT (31 - ne
);
21465 return "rlwimi. %0,%1,%2,%3,%4";
21466 return "rlwimi %0,%1,%2,%3,%4";
21469 gcc_unreachable ();
21472 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
21473 using two machine instructions. */
21476 rs6000_is_valid_2insn_and (rtx c
, machine_mode mode
)
21478 /* There are two kinds of AND we can handle with two insns:
21479 1) those we can do with two rl* insn;
21482 We do not handle that last case yet. */
21484 /* If there is just one stretch of ones, we can do it. */
21485 if (rs6000_is_valid_mask (c
, NULL
, NULL
, mode
))
21488 /* Otherwise, fill in the lowest "hole"; if we can do the result with
21489 one insn, we can do the whole thing with two. */
21490 unsigned HOST_WIDE_INT val
= INTVAL (c
);
21491 unsigned HOST_WIDE_INT bit1
= val
& -val
;
21492 unsigned HOST_WIDE_INT bit2
= (val
+ bit1
) & ~val
;
21493 unsigned HOST_WIDE_INT val1
= (val
+ bit1
) & val
;
21494 unsigned HOST_WIDE_INT bit3
= val1
& -val1
;
21495 return rs6000_is_valid_and_mask (GEN_INT (val
+ bit3
- bit2
), mode
);
21498 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
21499 If EXPAND is true, split rotate-and-mask instructions we generate to
21500 their constituent parts as well (this is used during expand); if DOT
21501 is 1, make the last insn a record-form instruction clobbering the
21502 destination GPR and setting the CC reg (from operands[3]); if 2, set
21503 that GPR as well as the CC reg. */
21506 rs6000_emit_2insn_and (machine_mode mode
, rtx
*operands
, bool expand
, int dot
)
21508 gcc_assert (!(expand
&& dot
));
21510 unsigned HOST_WIDE_INT val
= INTVAL (operands
[2]);
21512 /* If it is one stretch of ones, it is DImode; shift left, mask, then
21513 shift right. This generates better code than doing the masks without
21514 shifts, or shifting first right and then left. */
21516 if (rs6000_is_valid_mask (operands
[2], &nb
, &ne
, mode
) && nb
>= ne
)
21518 gcc_assert (mode
== DImode
);
21520 int shift
= 63 - nb
;
21523 rtx tmp1
= gen_reg_rtx (DImode
);
21524 rtx tmp2
= gen_reg_rtx (DImode
);
21525 emit_insn (gen_ashldi3 (tmp1
, operands
[1], GEN_INT (shift
)));
21526 emit_insn (gen_anddi3 (tmp2
, tmp1
, GEN_INT (val
<< shift
)));
21527 emit_insn (gen_lshrdi3 (operands
[0], tmp2
, GEN_INT (shift
)));
21531 rtx tmp
= gen_rtx_ASHIFT (mode
, operands
[1], GEN_INT (shift
));
21532 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (val
<< shift
));
21533 emit_move_insn (operands
[0], tmp
);
21534 tmp
= gen_rtx_LSHIFTRT (mode
, operands
[0], GEN_INT (shift
));
21535 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
21540 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
21541 that does the rest. */
21542 unsigned HOST_WIDE_INT bit1
= val
& -val
;
21543 unsigned HOST_WIDE_INT bit2
= (val
+ bit1
) & ~val
;
21544 unsigned HOST_WIDE_INT val1
= (val
+ bit1
) & val
;
21545 unsigned HOST_WIDE_INT bit3
= val1
& -val1
;
21547 unsigned HOST_WIDE_INT mask1
= -bit3
+ bit2
- 1;
21548 unsigned HOST_WIDE_INT mask2
= val
+ bit3
- bit2
;
21550 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2
), mode
));
21552 /* Two "no-rotate"-and-mask instructions, for SImode. */
21553 if (rs6000_is_valid_and_mask (GEN_INT (mask1
), mode
))
21555 gcc_assert (mode
== SImode
);
21557 rtx reg
= expand
? gen_reg_rtx (mode
) : operands
[0];
21558 rtx tmp
= gen_rtx_AND (mode
, operands
[1], GEN_INT (mask1
));
21559 emit_move_insn (reg
, tmp
);
21560 tmp
= gen_rtx_AND (mode
, reg
, GEN_INT (mask2
));
21561 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
21565 gcc_assert (mode
== DImode
);
21567 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
21568 insns; we have to do the first in SImode, because it wraps. */
21569 if (mask2
<= 0xffffffff
21570 && rs6000_is_valid_and_mask (GEN_INT (mask1
), SImode
))
21572 rtx reg
= expand
? gen_reg_rtx (mode
) : operands
[0];
21573 rtx tmp
= gen_rtx_AND (SImode
, gen_lowpart (SImode
, operands
[1]),
21575 rtx reg_low
= gen_lowpart (SImode
, reg
);
21576 emit_move_insn (reg_low
, tmp
);
21577 tmp
= gen_rtx_AND (mode
, reg
, GEN_INT (mask2
));
21578 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
21582 /* Two rld* insns: rotate, clear the hole in the middle (which now is
21583 at the top end), rotate back and clear the other hole. */
21584 int right
= exact_log2 (bit3
);
21585 int left
= 64 - right
;
21587 /* Rotate the mask too. */
21588 mask1
= (mask1
>> right
) | ((bit2
- 1) << left
);
21592 rtx tmp1
= gen_reg_rtx (DImode
);
21593 rtx tmp2
= gen_reg_rtx (DImode
);
21594 rtx tmp3
= gen_reg_rtx (DImode
);
21595 emit_insn (gen_rotldi3 (tmp1
, operands
[1], GEN_INT (left
)));
21596 emit_insn (gen_anddi3 (tmp2
, tmp1
, GEN_INT (mask1
)));
21597 emit_insn (gen_rotldi3 (tmp3
, tmp2
, GEN_INT (right
)));
21598 emit_insn (gen_anddi3 (operands
[0], tmp3
, GEN_INT (mask2
)));
21602 rtx tmp
= gen_rtx_ROTATE (mode
, operands
[1], GEN_INT (left
));
21603 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (mask1
));
21604 emit_move_insn (operands
[0], tmp
);
21605 tmp
= gen_rtx_ROTATE (mode
, operands
[0], GEN_INT (right
));
21606 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (mask2
));
21607 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
21611 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
21612 for lfq and stfq insns iff the registers are hard registers. */
21615 registers_ok_for_quad_peep (rtx reg1
, rtx reg2
)
21617 /* We might have been passed a SUBREG. */
21618 if (GET_CODE (reg1
) != REG
|| GET_CODE (reg2
) != REG
)
21621 /* We might have been passed non floating point registers. */
21622 if (!FP_REGNO_P (REGNO (reg1
))
21623 || !FP_REGNO_P (REGNO (reg2
)))
21626 return (REGNO (reg1
) == REGNO (reg2
) - 1);
21629 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
21630 addr1 and addr2 must be in consecutive memory locations
21631 (addr2 == addr1 + 8). */
21634 mems_ok_for_quad_peep (rtx mem1
, rtx mem2
)
21637 unsigned int reg1
, reg2
;
21638 int offset1
, offset2
;
21640 /* The mems cannot be volatile. */
21641 if (MEM_VOLATILE_P (mem1
) || MEM_VOLATILE_P (mem2
))
21644 addr1
= XEXP (mem1
, 0);
21645 addr2
= XEXP (mem2
, 0);
21647 /* Extract an offset (if used) from the first addr. */
21648 if (GET_CODE (addr1
) == PLUS
)
21650 /* If not a REG, return zero. */
21651 if (GET_CODE (XEXP (addr1
, 0)) != REG
)
21655 reg1
= REGNO (XEXP (addr1
, 0));
21656 /* The offset must be constant! */
21657 if (GET_CODE (XEXP (addr1
, 1)) != CONST_INT
)
21659 offset1
= INTVAL (XEXP (addr1
, 1));
21662 else if (GET_CODE (addr1
) != REG
)
21666 reg1
= REGNO (addr1
);
21667 /* This was a simple (mem (reg)) expression. Offset is 0. */
21671 /* And now for the second addr. */
21672 if (GET_CODE (addr2
) == PLUS
)
21674 /* If not a REG, return zero. */
21675 if (GET_CODE (XEXP (addr2
, 0)) != REG
)
21679 reg2
= REGNO (XEXP (addr2
, 0));
21680 /* The offset must be constant. */
21681 if (GET_CODE (XEXP (addr2
, 1)) != CONST_INT
)
21683 offset2
= INTVAL (XEXP (addr2
, 1));
21686 else if (GET_CODE (addr2
) != REG
)
21690 reg2
= REGNO (addr2
);
21691 /* This was a simple (mem (reg)) expression. Offset is 0. */
21695 /* Both of these must have the same base register. */
21699 /* The offset for the second addr must be 8 more than the first addr. */
21700 if (offset2
!= offset1
+ 8)
21703 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
21710 rs6000_secondary_memory_needed_rtx (machine_mode mode
)
21712 static bool eliminated
= false;
21715 if (mode
!= SDmode
|| TARGET_NO_SDMODE_STACK
)
21716 ret
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
21719 rtx mem
= cfun
->machine
->sdmode_stack_slot
;
21720 gcc_assert (mem
!= NULL_RTX
);
21724 mem
= eliminate_regs (mem
, VOIDmode
, NULL_RTX
);
21725 cfun
->machine
->sdmode_stack_slot
= mem
;
21731 if (TARGET_DEBUG_ADDR
)
21733 fprintf (stderr
, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
21734 GET_MODE_NAME (mode
));
21736 fprintf (stderr
, "\tNULL_RTX\n");
21744 /* Return the mode to be used for memory when a secondary memory
21745 location is needed. For SDmode values we need to use DDmode, in
21746 all other cases we can use the same mode. */
21748 rs6000_secondary_memory_needed_mode (machine_mode mode
)
21750 if (lra_in_progress
&& mode
== SDmode
)
21756 rs6000_check_sdmode (tree
*tp
, int *walk_subtrees
, void *data ATTRIBUTE_UNUSED
)
21758 /* Don't walk into types. */
21759 if (*tp
== NULL_TREE
|| *tp
== error_mark_node
|| TYPE_P (*tp
))
21761 *walk_subtrees
= 0;
21765 switch (TREE_CODE (*tp
))
21774 case VIEW_CONVERT_EXPR
:
21775 if (TYPE_MODE (TREE_TYPE (*tp
)) == SDmode
)
21785 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
21786 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
21787 only work on the traditional altivec registers, note if an altivec register
21790 static enum rs6000_reg_type
21791 register_to_reg_type (rtx reg
, bool *is_altivec
)
21793 HOST_WIDE_INT regno
;
21794 enum reg_class rclass
;
21796 if (GET_CODE (reg
) == SUBREG
)
21797 reg
= SUBREG_REG (reg
);
21800 return NO_REG_TYPE
;
21802 regno
= REGNO (reg
);
21803 if (regno
>= FIRST_PSEUDO_REGISTER
)
21805 if (!lra_in_progress
&& !reload_in_progress
&& !reload_completed
)
21806 return PSEUDO_REG_TYPE
;
21808 regno
= true_regnum (reg
);
21809 if (regno
< 0 || regno
>= FIRST_PSEUDO_REGISTER
)
21810 return PSEUDO_REG_TYPE
;
21813 gcc_assert (regno
>= 0);
21815 if (is_altivec
&& ALTIVEC_REGNO_P (regno
))
21816 *is_altivec
= true;
21818 rclass
= rs6000_regno_regclass
[regno
];
21819 return reg_class_to_reg_type
[(int)rclass
];
21822 /* Helper function to return the cost of adding a TOC entry address. */
21825 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask
)
21829 if (TARGET_CMODEL
!= CMODEL_SMALL
)
21830 ret
= ((addr_mask
& RELOAD_REG_OFFSET
) == 0) ? 1 : 2;
21833 ret
= (TARGET_MINIMAL_TOC
) ? 6 : 3;
21838 /* Helper function for rs6000_secondary_reload to determine whether the memory
21839 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
21840 needs reloading. Return negative if the memory is not handled by the memory
21841 helper functions and to try a different reload method, 0 if no additional
21842 instructions are need, and positive to give the extra cost for the
21846 rs6000_secondary_reload_memory (rtx addr
,
21847 enum reg_class rclass
,
21850 int extra_cost
= 0;
21851 rtx reg
, and_arg
, plus_arg0
, plus_arg1
;
21852 addr_mask_type addr_mask
;
21853 const char *type
= NULL
;
21854 const char *fail_msg
= NULL
;
21856 if (GPR_REG_CLASS_P (rclass
))
21857 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_GPR
];
21859 else if (rclass
== FLOAT_REGS
)
21860 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
];
21862 else if (rclass
== ALTIVEC_REGS
)
21863 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
];
21865 /* For the combined VSX_REGS, turn off Altivec AND -16. */
21866 else if (rclass
== VSX_REGS
)
21867 addr_mask
= (reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
]
21868 & ~RELOAD_REG_AND_M16
);
21870 /* If the register allocator hasn't made up its mind yet on the register
21871 class to use, settle on defaults to use. */
21872 else if (rclass
== NO_REGS
)
21874 addr_mask
= (reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
]
21875 & ~RELOAD_REG_AND_M16
);
21877 if ((addr_mask
& RELOAD_REG_MULTIPLE
) != 0)
21878 addr_mask
&= ~(RELOAD_REG_INDEXED
21879 | RELOAD_REG_PRE_INCDEC
21880 | RELOAD_REG_PRE_MODIFY
);
21886 /* If the register isn't valid in this register class, just return now. */
21887 if ((addr_mask
& RELOAD_REG_VALID
) == 0)
21889 if (TARGET_DEBUG_ADDR
)
21892 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
21893 "not valid in class\n",
21894 GET_MODE_NAME (mode
), reg_class_names
[rclass
]);
21901 switch (GET_CODE (addr
))
21903 /* Does the register class supports auto update forms for this mode? We
21904 don't need a scratch register, since the powerpc only supports
21905 PRE_INC, PRE_DEC, and PRE_MODIFY. */
21908 reg
= XEXP (addr
, 0);
21909 if (!base_reg_operand (addr
, GET_MODE (reg
)))
21911 fail_msg
= "no base register #1";
21915 else if ((addr_mask
& RELOAD_REG_PRE_INCDEC
) == 0)
21923 reg
= XEXP (addr
, 0);
21924 plus_arg1
= XEXP (addr
, 1);
21925 if (!base_reg_operand (reg
, GET_MODE (reg
))
21926 || GET_CODE (plus_arg1
) != PLUS
21927 || !rtx_equal_p (reg
, XEXP (plus_arg1
, 0)))
21929 fail_msg
= "bad PRE_MODIFY";
21933 else if ((addr_mask
& RELOAD_REG_PRE_MODIFY
) == 0)
21940 /* Do we need to simulate AND -16 to clear the bottom address bits used
21941 in VMX load/stores? Only allow the AND for vector sizes. */
21943 and_arg
= XEXP (addr
, 0);
21944 if (GET_MODE_SIZE (mode
) != 16
21945 || GET_CODE (XEXP (addr
, 1)) != CONST_INT
21946 || INTVAL (XEXP (addr
, 1)) != -16)
21948 fail_msg
= "bad Altivec AND #1";
21952 if (rclass
!= ALTIVEC_REGS
)
21954 if (legitimate_indirect_address_p (and_arg
, false))
21957 else if (legitimate_indexed_address_p (and_arg
, false))
21962 fail_msg
= "bad Altivec AND #2";
21970 /* If this is an indirect address, make sure it is a base register. */
21973 if (!legitimate_indirect_address_p (addr
, false))
21980 /* If this is an indexed address, make sure the register class can handle
21981 indexed addresses for this mode. */
21983 plus_arg0
= XEXP (addr
, 0);
21984 plus_arg1
= XEXP (addr
, 1);
21986 /* (plus (plus (reg) (constant)) (constant)) is generated during
21987 push_reload processing, so handle it now. */
21988 if (GET_CODE (plus_arg0
) == PLUS
&& CONST_INT_P (plus_arg1
))
21990 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
21997 /* (plus (plus (reg) (constant)) (reg)) is also generated during
21998 push_reload processing, so handle it now. */
21999 else if (GET_CODE (plus_arg0
) == PLUS
&& REG_P (plus_arg1
))
22001 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
22004 type
= "indexed #2";
22008 else if (!base_reg_operand (plus_arg0
, GET_MODE (plus_arg0
)))
22010 fail_msg
= "no base register #2";
22014 else if (int_reg_operand (plus_arg1
, GET_MODE (plus_arg1
)))
22016 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0
22017 || !legitimate_indexed_address_p (addr
, false))
22024 else if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0
22025 && CONST_INT_P (plus_arg1
))
22027 if (!quad_address_offset_p (INTVAL (plus_arg1
)))
22030 type
= "vector d-form offset";
22034 /* Make sure the register class can handle offset addresses. */
22035 else if (rs6000_legitimate_offset_address_p (mode
, addr
, false, true))
22037 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
22040 type
= "offset #2";
22046 fail_msg
= "bad PLUS";
22053 /* Quad offsets are restricted and can't handle normal addresses. */
22054 if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
22057 type
= "vector d-form lo_sum";
22060 else if (!legitimate_lo_sum_address_p (mode
, addr
, false))
22062 fail_msg
= "bad LO_SUM";
22066 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
22073 /* Static addresses need to create a TOC entry. */
22077 if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
22080 type
= "vector d-form lo_sum #2";
22086 extra_cost
= rs6000_secondary_reload_toc_costs (addr_mask
);
22090 /* TOC references look like offsetable memory. */
22092 if (TARGET_CMODEL
== CMODEL_SMALL
|| XINT (addr
, 1) != UNSPEC_TOCREL
)
22094 fail_msg
= "bad UNSPEC";
22098 else if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
22101 type
= "vector d-form lo_sum #3";
22104 else if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
22107 type
= "toc reference";
22113 fail_msg
= "bad address";
22118 if (TARGET_DEBUG_ADDR
/* && extra_cost != 0 */)
22120 if (extra_cost
< 0)
22122 "rs6000_secondary_reload_memory error: mode = %s, "
22123 "class = %s, addr_mask = '%s', %s\n",
22124 GET_MODE_NAME (mode
),
22125 reg_class_names
[rclass
],
22126 rs6000_debug_addr_mask (addr_mask
, false),
22127 (fail_msg
!= NULL
) ? fail_msg
: "<bad address>");
22131 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
22132 "addr_mask = '%s', extra cost = %d, %s\n",
22133 GET_MODE_NAME (mode
),
22134 reg_class_names
[rclass
],
22135 rs6000_debug_addr_mask (addr_mask
, false),
22137 (type
) ? type
: "<none>");
22145 /* Helper function for rs6000_secondary_reload to return true if a move to a
22146 different register classe is really a simple move. */
22149 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type
,
22150 enum rs6000_reg_type from_type
,
22153 int size
= GET_MODE_SIZE (mode
);
22155 /* Add support for various direct moves available. In this function, we only
22156 look at cases where we don't need any extra registers, and one or more
22157 simple move insns are issued. Originally small integers are not allowed
22158 in FPR/VSX registers. Single precision binary floating is not a simple
22159 move because we need to convert to the single precision memory layout.
22160 The 4-byte SDmode can be moved. TDmode values are disallowed since they
22161 need special direct move handling, which we do not support yet. */
22162 if (TARGET_DIRECT_MOVE
22163 && ((to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
22164 || (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)))
22166 if (TARGET_POWERPC64
)
22168 /* ISA 2.07: MTVSRD or MVFVSRD. */
22172 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
22173 if (size
== 16 && TARGET_P9_VECTOR
&& mode
!= TDmode
)
22177 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
22178 if (TARGET_VSX_SMALL_INTEGER
)
22180 if (mode
== SImode
)
22183 if (TARGET_P9_VECTOR
&& (mode
== HImode
|| mode
== QImode
))
22187 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
22188 if (mode
== SDmode
)
22192 /* Power6+: MFTGPR or MFFGPR. */
22193 else if (TARGET_MFPGPR
&& TARGET_POWERPC64
&& size
== 8
22194 && ((to_type
== GPR_REG_TYPE
&& from_type
== FPR_REG_TYPE
)
22195 || (to_type
== FPR_REG_TYPE
&& from_type
== GPR_REG_TYPE
)))
22198 /* Move to/from SPR. */
22199 else if ((size
== 4 || (TARGET_POWERPC64
&& size
== 8))
22200 && ((to_type
== GPR_REG_TYPE
&& from_type
== SPR_REG_TYPE
)
22201 || (to_type
== SPR_REG_TYPE
&& from_type
== GPR_REG_TYPE
)))
22207 /* Direct move helper function for rs6000_secondary_reload, handle all of the
22208 special direct moves that involve allocating an extra register, return the
22209 insn code of the helper function if there is such a function or
22210 CODE_FOR_nothing if not. */
22213 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type
,
22214 enum rs6000_reg_type from_type
,
22216 secondary_reload_info
*sri
,
22220 enum insn_code icode
= CODE_FOR_nothing
;
22222 int size
= GET_MODE_SIZE (mode
);
22224 if (TARGET_POWERPC64
&& size
== 16)
22226 /* Handle moving 128-bit values from GPRs to VSX point registers on
22227 ISA 2.07 (power8, power9) when running in 64-bit mode using
22228 XXPERMDI to glue the two 64-bit values back together. */
22229 if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)
22231 cost
= 3; /* 2 mtvsrd's, 1 xxpermdi. */
22232 icode
= reg_addr
[mode
].reload_vsx_gpr
;
22235 /* Handle moving 128-bit values from VSX point registers to GPRs on
22236 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
22237 bottom 64-bit value. */
22238 else if (to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
22240 cost
= 3; /* 2 mfvsrd's, 1 xxpermdi. */
22241 icode
= reg_addr
[mode
].reload_gpr_vsx
;
22245 else if (TARGET_POWERPC64
&& mode
== SFmode
)
22247 if (to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
22249 cost
= 3; /* xscvdpspn, mfvsrd, and. */
22250 icode
= reg_addr
[mode
].reload_gpr_vsx
;
22253 else if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)
22255 cost
= 2; /* mtvsrz, xscvspdpn. */
22256 icode
= reg_addr
[mode
].reload_vsx_gpr
;
22260 else if (!TARGET_POWERPC64
&& size
== 8)
22262 /* Handle moving 64-bit values from GPRs to floating point registers on
22263 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
22264 32-bit values back together. Altivec register classes must be handled
22265 specially since a different instruction is used, and the secondary
22266 reload support requires a single instruction class in the scratch
22267 register constraint. However, right now TFmode is not allowed in
22268 Altivec registers, so the pattern will never match. */
22269 if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
&& !altivec_p
)
22271 cost
= 3; /* 2 mtvsrwz's, 1 fmrgow. */
22272 icode
= reg_addr
[mode
].reload_fpr_gpr
;
22276 if (icode
!= CODE_FOR_nothing
)
22281 sri
->icode
= icode
;
22282 sri
->extra_cost
= cost
;
22289 /* Return whether a move between two register classes can be done either
22290 directly (simple move) or via a pattern that uses a single extra temporary
22291 (using ISA 2.07's direct move in this case. */
22294 rs6000_secondary_reload_move (enum rs6000_reg_type to_type
,
22295 enum rs6000_reg_type from_type
,
22297 secondary_reload_info
*sri
,
22300 /* Fall back to load/store reloads if either type is not a register. */
22301 if (to_type
== NO_REG_TYPE
|| from_type
== NO_REG_TYPE
)
22304 /* If we haven't allocated registers yet, assume the move can be done for the
22305 standard register types. */
22306 if ((to_type
== PSEUDO_REG_TYPE
&& from_type
== PSEUDO_REG_TYPE
)
22307 || (to_type
== PSEUDO_REG_TYPE
&& IS_STD_REG_TYPE (from_type
))
22308 || (from_type
== PSEUDO_REG_TYPE
&& IS_STD_REG_TYPE (to_type
)))
22311 /* Moves to the same set of registers is a simple move for non-specialized
22313 if (to_type
== from_type
&& IS_STD_REG_TYPE (to_type
))
22316 /* Check whether a simple move can be done directly. */
22317 if (rs6000_secondary_reload_simple_move (to_type
, from_type
, mode
))
22321 sri
->icode
= CODE_FOR_nothing
;
22322 sri
->extra_cost
= 0;
22327 /* Now check if we can do it in a few steps. */
22328 return rs6000_secondary_reload_direct_move (to_type
, from_type
, mode
, sri
,
22332 /* Inform reload about cases where moving X with a mode MODE to a register in
22333 RCLASS requires an extra scratch or immediate register. Return the class
22334 needed for the immediate register.
22336 For VSX and Altivec, we may need a register to convert sp+offset into
22339 For misaligned 64-bit gpr loads and stores we need a register to
22340 convert an offset address to indirect. */
22343 rs6000_secondary_reload (bool in_p
,
22345 reg_class_t rclass_i
,
22347 secondary_reload_info
*sri
)
22349 enum reg_class rclass
= (enum reg_class
) rclass_i
;
22350 reg_class_t ret
= ALL_REGS
;
22351 enum insn_code icode
;
22352 bool default_p
= false;
22353 bool done_p
= false;
22355 /* Allow subreg of memory before/during reload. */
22356 bool memory_p
= (MEM_P (x
)
22357 || (!reload_completed
&& GET_CODE (x
) == SUBREG
22358 && MEM_P (SUBREG_REG (x
))));
22360 sri
->icode
= CODE_FOR_nothing
;
22361 sri
->t_icode
= CODE_FOR_nothing
;
22362 sri
->extra_cost
= 0;
22364 ? reg_addr
[mode
].reload_load
22365 : reg_addr
[mode
].reload_store
);
22367 if (REG_P (x
) || register_operand (x
, mode
))
22369 enum rs6000_reg_type to_type
= reg_class_to_reg_type
[(int)rclass
];
22370 bool altivec_p
= (rclass
== ALTIVEC_REGS
);
22371 enum rs6000_reg_type from_type
= register_to_reg_type (x
, &altivec_p
);
22374 std::swap (to_type
, from_type
);
22376 /* Can we do a direct move of some sort? */
22377 if (rs6000_secondary_reload_move (to_type
, from_type
, mode
, sri
,
22380 icode
= (enum insn_code
)sri
->icode
;
22387 /* Make sure 0.0 is not reloaded or forced into memory. */
22388 if (x
== CONST0_RTX (mode
) && VSX_REG_CLASS_P (rclass
))
22395 /* If this is a scalar floating point value and we want to load it into the
22396 traditional Altivec registers, do it via a move via a traditional floating
22397 point register, unless we have D-form addressing. Also make sure that
22398 non-zero constants use a FPR. */
22399 if (!done_p
&& reg_addr
[mode
].scalar_in_vmx_p
22400 && !mode_supports_vmx_dform (mode
)
22401 && (rclass
== VSX_REGS
|| rclass
== ALTIVEC_REGS
)
22402 && (memory_p
|| (GET_CODE (x
) == CONST_DOUBLE
)))
22409 /* Handle reload of load/stores if we have reload helper functions. */
22410 if (!done_p
&& icode
!= CODE_FOR_nothing
&& memory_p
)
22412 int extra_cost
= rs6000_secondary_reload_memory (XEXP (x
, 0), rclass
,
22415 if (extra_cost
>= 0)
22419 if (extra_cost
> 0)
22421 sri
->extra_cost
= extra_cost
;
22422 sri
->icode
= icode
;
22427 /* Handle unaligned loads and stores of integer registers. */
22428 if (!done_p
&& TARGET_POWERPC64
22429 && reg_class_to_reg_type
[(int)rclass
] == GPR_REG_TYPE
22431 && GET_MODE_SIZE (GET_MODE (x
)) >= UNITS_PER_WORD
)
22433 rtx addr
= XEXP (x
, 0);
22434 rtx off
= address_offset (addr
);
22436 if (off
!= NULL_RTX
)
22438 unsigned int extra
= GET_MODE_SIZE (GET_MODE (x
)) - UNITS_PER_WORD
;
22439 unsigned HOST_WIDE_INT offset
= INTVAL (off
);
22441 /* We need a secondary reload when our legitimate_address_p
22442 says the address is good (as otherwise the entire address
22443 will be reloaded), and the offset is not a multiple of
22444 four or we have an address wrap. Address wrap will only
22445 occur for LO_SUMs since legitimate_offset_address_p
22446 rejects addresses for 16-byte mems that will wrap. */
22447 if (GET_CODE (addr
) == LO_SUM
22448 ? (1 /* legitimate_address_p allows any offset for lo_sum */
22449 && ((offset
& 3) != 0
22450 || ((offset
& 0xffff) ^ 0x8000) >= 0x10000 - extra
))
22451 : (offset
+ 0x8000 < 0x10000 - extra
/* legitimate_address_p */
22452 && (offset
& 3) != 0))
22454 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
22456 sri
->icode
= ((TARGET_32BIT
) ? CODE_FOR_reload_si_load
22457 : CODE_FOR_reload_di_load
);
22459 sri
->icode
= ((TARGET_32BIT
) ? CODE_FOR_reload_si_store
22460 : CODE_FOR_reload_di_store
);
22461 sri
->extra_cost
= 2;
22472 if (!done_p
&& !TARGET_POWERPC64
22473 && reg_class_to_reg_type
[(int)rclass
] == GPR_REG_TYPE
22475 && GET_MODE_SIZE (GET_MODE (x
)) > UNITS_PER_WORD
)
22477 rtx addr
= XEXP (x
, 0);
22478 rtx off
= address_offset (addr
);
22480 if (off
!= NULL_RTX
)
22482 unsigned int extra
= GET_MODE_SIZE (GET_MODE (x
)) - UNITS_PER_WORD
;
22483 unsigned HOST_WIDE_INT offset
= INTVAL (off
);
22485 /* We need a secondary reload when our legitimate_address_p
22486 says the address is good (as otherwise the entire address
22487 will be reloaded), and we have a wrap.
22489 legitimate_lo_sum_address_p allows LO_SUM addresses to
22490 have any offset so test for wrap in the low 16 bits.
22492 legitimate_offset_address_p checks for the range
22493 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
22494 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
22495 [0x7ff4,0x7fff] respectively, so test for the
22496 intersection of these ranges, [0x7ffc,0x7fff] and
22497 [0x7ff4,0x7ff7] respectively.
22499 Note that the address we see here may have been
22500 manipulated by legitimize_reload_address. */
22501 if (GET_CODE (addr
) == LO_SUM
22502 ? ((offset
& 0xffff) ^ 0x8000) >= 0x10000 - extra
22503 : offset
- (0x8000 - extra
) < UNITS_PER_WORD
)
22506 sri
->icode
= CODE_FOR_reload_si_load
;
22508 sri
->icode
= CODE_FOR_reload_si_store
;
22509 sri
->extra_cost
= 2;
22524 ret
= default_secondary_reload (in_p
, x
, rclass
, mode
, sri
);
22526 gcc_assert (ret
!= ALL_REGS
);
22528 if (TARGET_DEBUG_ADDR
)
22531 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
22533 reg_class_names
[ret
],
22534 in_p
? "true" : "false",
22535 reg_class_names
[rclass
],
22536 GET_MODE_NAME (mode
));
22538 if (reload_completed
)
22539 fputs (", after reload", stderr
);
22542 fputs (", done_p not set", stderr
);
22545 fputs (", default secondary reload", stderr
);
22547 if (sri
->icode
!= CODE_FOR_nothing
)
22548 fprintf (stderr
, ", reload func = %s, extra cost = %d",
22549 insn_data
[sri
->icode
].name
, sri
->extra_cost
);
22551 else if (sri
->extra_cost
> 0)
22552 fprintf (stderr
, ", extra cost = %d", sri
->extra_cost
);
22554 fputs ("\n", stderr
);
22561 /* Better tracing for rs6000_secondary_reload_inner. */
22564 rs6000_secondary_reload_trace (int line
, rtx reg
, rtx mem
, rtx scratch
,
22569 gcc_assert (reg
!= NULL_RTX
&& mem
!= NULL_RTX
&& scratch
!= NULL_RTX
);
22571 fprintf (stderr
, "rs6000_secondary_reload_inner:%d, type = %s\n", line
,
22572 store_p
? "store" : "load");
22575 set
= gen_rtx_SET (mem
, reg
);
22577 set
= gen_rtx_SET (reg
, mem
);
22579 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
22580 debug_rtx (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
22583 static void rs6000_secondary_reload_fail (int, rtx
, rtx
, rtx
, bool)
22584 ATTRIBUTE_NORETURN
;
22587 rs6000_secondary_reload_fail (int line
, rtx reg
, rtx mem
, rtx scratch
,
22590 rs6000_secondary_reload_trace (line
, reg
, mem
, scratch
, store_p
);
22591 gcc_unreachable ();
22594 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
22595 reload helper functions. These were identified in
22596 rs6000_secondary_reload_memory, and if reload decided to use the secondary
22597 reload, it calls the insns:
22598 reload_<RELOAD:mode>_<P:mptrsize>_store
22599 reload_<RELOAD:mode>_<P:mptrsize>_load
22601 which in turn calls this function, to do whatever is necessary to create
22602 valid addresses. */
22605 rs6000_secondary_reload_inner (rtx reg
, rtx mem
, rtx scratch
, bool store_p
)
22607 int regno
= true_regnum (reg
);
22608 machine_mode mode
= GET_MODE (reg
);
22609 addr_mask_type addr_mask
;
22612 rtx op_reg
, op0
, op1
;
22617 if (regno
< 0 || regno
>= FIRST_PSEUDO_REGISTER
|| !MEM_P (mem
)
22618 || !base_reg_operand (scratch
, GET_MODE (scratch
)))
22619 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
22621 if (IN_RANGE (regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
))
22622 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_GPR
];
22624 else if (IN_RANGE (regno
, FIRST_FPR_REGNO
, LAST_FPR_REGNO
))
22625 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
];
22627 else if (IN_RANGE (regno
, FIRST_ALTIVEC_REGNO
, LAST_ALTIVEC_REGNO
))
22628 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
];
22631 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
22633 /* Make sure the mode is valid in this register class. */
22634 if ((addr_mask
& RELOAD_REG_VALID
) == 0)
22635 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
22637 if (TARGET_DEBUG_ADDR
)
22638 rs6000_secondary_reload_trace (__LINE__
, reg
, mem
, scratch
, store_p
);
22640 new_addr
= addr
= XEXP (mem
, 0);
22641 switch (GET_CODE (addr
))
22643 /* Does the register class support auto update forms for this mode? If
22644 not, do the update now. We don't need a scratch register, since the
22645 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
22648 op_reg
= XEXP (addr
, 0);
22649 if (!base_reg_operand (op_reg
, Pmode
))
22650 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
22652 if ((addr_mask
& RELOAD_REG_PRE_INCDEC
) == 0)
22654 emit_insn (gen_add2_insn (op_reg
, GEN_INT (GET_MODE_SIZE (mode
))));
22660 op0
= XEXP (addr
, 0);
22661 op1
= XEXP (addr
, 1);
22662 if (!base_reg_operand (op0
, Pmode
)
22663 || GET_CODE (op1
) != PLUS
22664 || !rtx_equal_p (op0
, XEXP (op1
, 0)))
22665 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
22667 if ((addr_mask
& RELOAD_REG_PRE_MODIFY
) == 0)
22669 emit_insn (gen_rtx_SET (op0
, op1
));
22674 /* Do we need to simulate AND -16 to clear the bottom address bits used
22675 in VMX load/stores? */
22677 op0
= XEXP (addr
, 0);
22678 op1
= XEXP (addr
, 1);
22679 if ((addr_mask
& RELOAD_REG_AND_M16
) == 0)
22681 if (REG_P (op0
) || GET_CODE (op0
) == SUBREG
)
22684 else if (GET_CODE (op1
) == PLUS
)
22686 emit_insn (gen_rtx_SET (scratch
, op1
));
22691 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
22693 and_op
= gen_rtx_AND (GET_MODE (scratch
), op_reg
, op1
);
22694 cc_clobber
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (CCmode
));
22695 rv
= gen_rtvec (2, gen_rtx_SET (scratch
, and_op
), cc_clobber
);
22696 emit_insn (gen_rtx_PARALLEL (VOIDmode
, rv
));
22697 new_addr
= scratch
;
22701 /* If this is an indirect address, make sure it is a base register. */
22704 if (!base_reg_operand (addr
, GET_MODE (addr
)))
22706 emit_insn (gen_rtx_SET (scratch
, addr
));
22707 new_addr
= scratch
;
22711 /* If this is an indexed address, make sure the register class can handle
22712 indexed addresses for this mode. */
22714 op0
= XEXP (addr
, 0);
22715 op1
= XEXP (addr
, 1);
22716 if (!base_reg_operand (op0
, Pmode
))
22717 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
22719 else if (int_reg_operand (op1
, Pmode
))
22721 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
22723 emit_insn (gen_rtx_SET (scratch
, addr
));
22724 new_addr
= scratch
;
22728 else if (mode_supports_vsx_dform_quad (mode
) && CONST_INT_P (op1
))
22730 if (((addr_mask
& RELOAD_REG_QUAD_OFFSET
) == 0)
22731 || !quad_address_p (addr
, mode
, false))
22733 emit_insn (gen_rtx_SET (scratch
, addr
));
22734 new_addr
= scratch
;
22738 /* Make sure the register class can handle offset addresses. */
22739 else if (rs6000_legitimate_offset_address_p (mode
, addr
, false, true))
22741 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
22743 emit_insn (gen_rtx_SET (scratch
, addr
));
22744 new_addr
= scratch
;
22749 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
22754 op0
= XEXP (addr
, 0);
22755 op1
= XEXP (addr
, 1);
22756 if (!base_reg_operand (op0
, Pmode
))
22757 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
22759 else if (int_reg_operand (op1
, Pmode
))
22761 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
22763 emit_insn (gen_rtx_SET (scratch
, addr
));
22764 new_addr
= scratch
;
22768 /* Quad offsets are restricted and can't handle normal addresses. */
22769 else if (mode_supports_vsx_dform_quad (mode
))
22771 emit_insn (gen_rtx_SET (scratch
, addr
));
22772 new_addr
= scratch
;
22775 /* Make sure the register class can handle offset addresses. */
22776 else if (legitimate_lo_sum_address_p (mode
, addr
, false))
22778 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
22780 emit_insn (gen_rtx_SET (scratch
, addr
));
22781 new_addr
= scratch
;
22786 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
22793 rs6000_emit_move (scratch
, addr
, Pmode
);
22794 new_addr
= scratch
;
22798 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
22801 /* Adjust the address if it changed. */
22802 if (addr
!= new_addr
)
22804 mem
= replace_equiv_address_nv (mem
, new_addr
);
22805 if (TARGET_DEBUG_ADDR
)
22806 fprintf (stderr
, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
22809 /* Now create the move. */
22811 emit_insn (gen_rtx_SET (mem
, reg
));
22813 emit_insn (gen_rtx_SET (reg
, mem
));
22818 /* Convert reloads involving 64-bit gprs and misaligned offset
22819 addressing, or multiple 32-bit gprs and offsets that are too large,
22820 to use indirect addressing. */
22823 rs6000_secondary_reload_gpr (rtx reg
, rtx mem
, rtx scratch
, bool store_p
)
22825 int regno
= true_regnum (reg
);
22826 enum reg_class rclass
;
22828 rtx scratch_or_premodify
= scratch
;
22830 if (TARGET_DEBUG_ADDR
)
22832 fprintf (stderr
, "\nrs6000_secondary_reload_gpr, type = %s\n",
22833 store_p
? "store" : "load");
22834 fprintf (stderr
, "reg:\n");
22836 fprintf (stderr
, "mem:\n");
22838 fprintf (stderr
, "scratch:\n");
22839 debug_rtx (scratch
);
22842 gcc_assert (regno
>= 0 && regno
< FIRST_PSEUDO_REGISTER
);
22843 gcc_assert (GET_CODE (mem
) == MEM
);
22844 rclass
= REGNO_REG_CLASS (regno
);
22845 gcc_assert (rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
);
22846 addr
= XEXP (mem
, 0);
22848 if (GET_CODE (addr
) == PRE_MODIFY
)
22850 gcc_assert (REG_P (XEXP (addr
, 0))
22851 && GET_CODE (XEXP (addr
, 1)) == PLUS
22852 && XEXP (XEXP (addr
, 1), 0) == XEXP (addr
, 0));
22853 scratch_or_premodify
= XEXP (addr
, 0);
22854 if (!HARD_REGISTER_P (scratch_or_premodify
))
22855 /* If we have a pseudo here then reload will have arranged
22856 to have it replaced, but only in the original insn.
22857 Use the replacement here too. */
22858 scratch_or_premodify
= find_replacement (&XEXP (addr
, 0));
22860 /* RTL emitted by rs6000_secondary_reload_gpr uses RTL
22861 expressions from the original insn, without unsharing them.
22862 Any RTL that points into the original insn will of course
22863 have register replacements applied. That is why we don't
22864 need to look for replacements under the PLUS. */
22865 addr
= XEXP (addr
, 1);
22867 gcc_assert (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
);
22869 rs6000_emit_move (scratch_or_premodify
, addr
, Pmode
);
22871 mem
= replace_equiv_address_nv (mem
, scratch_or_premodify
);
22873 /* Now create the move. */
22875 emit_insn (gen_rtx_SET (mem
, reg
));
22877 emit_insn (gen_rtx_SET (reg
, mem
));
22882 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
22883 this function has any SDmode references. If we are on a power7 or later, we
22884 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
22885 can load/store the value. */
22888 rs6000_alloc_sdmode_stack_slot (void)
22892 gimple_stmt_iterator gsi
;
22894 gcc_assert (cfun
->machine
->sdmode_stack_slot
== NULL_RTX
);
22895 /* We use a different approach for dealing with the secondary
22900 if (TARGET_NO_SDMODE_STACK
)
22903 FOR_EACH_BB_FN (bb
, cfun
)
22904 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
22906 tree ret
= walk_gimple_op (gsi_stmt (gsi
), rs6000_check_sdmode
, NULL
);
22909 rtx stack
= assign_stack_local (DDmode
, GET_MODE_SIZE (DDmode
), 0);
22910 cfun
->machine
->sdmode_stack_slot
= adjust_address_nv (stack
,
22916 /* Check for any SDmode parameters of the function. */
22917 for (t
= DECL_ARGUMENTS (cfun
->decl
); t
; t
= DECL_CHAIN (t
))
22919 if (TREE_TYPE (t
) == error_mark_node
)
22922 if (TYPE_MODE (TREE_TYPE (t
)) == SDmode
22923 || TYPE_MODE (DECL_ARG_TYPE (t
)) == SDmode
)
22925 rtx stack
= assign_stack_local (DDmode
, GET_MODE_SIZE (DDmode
), 0);
22926 cfun
->machine
->sdmode_stack_slot
= adjust_address_nv (stack
,
22934 rs6000_instantiate_decls (void)
22936 if (cfun
->machine
->sdmode_stack_slot
!= NULL_RTX
)
22937 instantiate_decl_rtl (cfun
->machine
->sdmode_stack_slot
);
22940 /* Given an rtx X being reloaded into a reg required to be
22941 in class CLASS, return the class of reg to actually use.
22942 In general this is just CLASS; but on some machines
22943 in some cases it is preferable to use a more restrictive class.
22945 On the RS/6000, we have to return NO_REGS when we want to reload a
22946 floating-point CONST_DOUBLE to force it to be copied to memory.
22948 We also don't want to reload integer values into floating-point
22949 registers if we can at all help it. In fact, this can
22950 cause reload to die, if it tries to generate a reload of CTR
22951 into a FP register and discovers it doesn't have the memory location
22954 ??? Would it be a good idea to have reload do the converse, that is
22955 try to reload floating modes into FP registers if possible?
22958 static enum reg_class
22959 rs6000_preferred_reload_class (rtx x
, enum reg_class rclass
)
22961 machine_mode mode
= GET_MODE (x
);
22962 bool is_constant
= CONSTANT_P (x
);
22964 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
22965 reload class for it. */
22966 if ((rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
22967 && (reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_VALID
) == 0)
22970 if ((rclass
== FLOAT_REGS
|| rclass
== VSX_REGS
)
22971 && (reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
] & RELOAD_REG_VALID
) == 0)
22974 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
22975 the reloading of address expressions using PLUS into floating point
22977 if (TARGET_VSX
&& VSX_REG_CLASS_P (rclass
) && GET_CODE (x
) != PLUS
)
22981 /* Zero is always allowed in all VSX registers. */
22982 if (x
== CONST0_RTX (mode
))
22985 /* If this is a vector constant that can be formed with a few Altivec
22986 instructions, we want altivec registers. */
22987 if (GET_CODE (x
) == CONST_VECTOR
&& easy_vector_constant (x
, mode
))
22988 return ALTIVEC_REGS
;
22990 /* If this is an integer constant that can easily be loaded into
22991 vector registers, allow it. */
22992 if (CONST_INT_P (x
))
22994 HOST_WIDE_INT value
= INTVAL (x
);
22996 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
22997 2.06 can generate it in the Altivec registers with
23001 if (TARGET_P8_VECTOR
)
23003 else if (rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
23004 return ALTIVEC_REGS
;
23009 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
23010 a sign extend in the Altivec registers. */
23011 if (IN_RANGE (value
, -128, 127) && TARGET_P9_VECTOR
23012 && TARGET_VSX_SMALL_INTEGER
23013 && (rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
))
23014 return ALTIVEC_REGS
;
23017 /* Force constant to memory. */
23021 /* D-form addressing can easily reload the value. */
23022 if (mode_supports_vmx_dform (mode
)
23023 || mode_supports_vsx_dform_quad (mode
))
23026 /* If this is a scalar floating point value and we don't have D-form
23027 addressing, prefer the traditional floating point registers so that we
23028 can use D-form (register+offset) addressing. */
23029 if (rclass
== VSX_REGS
23030 && (mode
== SFmode
|| GET_MODE_SIZE (mode
) == 8))
23033 /* Prefer the Altivec registers if Altivec is handling the vector
23034 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
23036 if (VECTOR_UNIT_ALTIVEC_P (mode
) || VECTOR_MEM_ALTIVEC_P (mode
)
23037 || mode
== V1TImode
)
23038 return ALTIVEC_REGS
;
23043 if (is_constant
|| GET_CODE (x
) == PLUS
)
23045 if (reg_class_subset_p (GENERAL_REGS
, rclass
))
23046 return GENERAL_REGS
;
23047 if (reg_class_subset_p (BASE_REGS
, rclass
))
23052 if (GET_MODE_CLASS (mode
) == MODE_INT
&& rclass
== NON_SPECIAL_REGS
)
23053 return GENERAL_REGS
;
23058 /* Debug version of rs6000_preferred_reload_class. */
23059 static enum reg_class
23060 rs6000_debug_preferred_reload_class (rtx x
, enum reg_class rclass
)
23062 enum reg_class ret
= rs6000_preferred_reload_class (x
, rclass
);
23065 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
23067 reg_class_names
[ret
], reg_class_names
[rclass
],
23068 GET_MODE_NAME (GET_MODE (x
)));
23074 /* If we are copying between FP or AltiVec registers and anything else, we need
23075 a memory location. The exception is when we are targeting ppc64 and the
23076 move to/from fpr to gpr instructions are available. Also, under VSX, you
23077 can copy vector registers from the FP register set to the Altivec register
23078 set and vice versa. */
23081 rs6000_secondary_memory_needed (enum reg_class from_class
,
23082 enum reg_class to_class
,
23085 enum rs6000_reg_type from_type
, to_type
;
23086 bool altivec_p
= ((from_class
== ALTIVEC_REGS
)
23087 || (to_class
== ALTIVEC_REGS
));
23089 /* If a simple/direct move is available, we don't need secondary memory */
23090 from_type
= reg_class_to_reg_type
[(int)from_class
];
23091 to_type
= reg_class_to_reg_type
[(int)to_class
];
23093 if (rs6000_secondary_reload_move (to_type
, from_type
, mode
,
23094 (secondary_reload_info
*)0, altivec_p
))
23097 /* If we have a floating point or vector register class, we need to use
23098 memory to transfer the data. */
23099 if (IS_FP_VECT_REG_TYPE (from_type
) || IS_FP_VECT_REG_TYPE (to_type
))
23105 /* Debug version of rs6000_secondary_memory_needed. */
23107 rs6000_debug_secondary_memory_needed (enum reg_class from_class
,
23108 enum reg_class to_class
,
23111 bool ret
= rs6000_secondary_memory_needed (from_class
, to_class
, mode
);
23114 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
23115 "to_class = %s, mode = %s\n",
23116 ret
? "true" : "false",
23117 reg_class_names
[from_class
],
23118 reg_class_names
[to_class
],
23119 GET_MODE_NAME (mode
));
23124 /* Return the register class of a scratch register needed to copy IN into
23125 or out of a register in RCLASS in MODE. If it can be done directly,
23126 NO_REGS is returned. */
23128 static enum reg_class
23129 rs6000_secondary_reload_class (enum reg_class rclass
, machine_mode mode
,
23134 if (TARGET_ELF
|| (DEFAULT_ABI
== ABI_DARWIN
23136 && MACHOPIC_INDIRECT
23140 /* We cannot copy a symbolic operand directly into anything
23141 other than BASE_REGS for TARGET_ELF. So indicate that a
23142 register from BASE_REGS is needed as an intermediate
23145 On Darwin, pic addresses require a load from memory, which
23146 needs a base register. */
23147 if (rclass
!= BASE_REGS
23148 && (GET_CODE (in
) == SYMBOL_REF
23149 || GET_CODE (in
) == HIGH
23150 || GET_CODE (in
) == LABEL_REF
23151 || GET_CODE (in
) == CONST
))
23155 if (GET_CODE (in
) == REG
)
23157 regno
= REGNO (in
);
23158 if (regno
>= FIRST_PSEUDO_REGISTER
)
23160 regno
= true_regnum (in
);
23161 if (regno
>= FIRST_PSEUDO_REGISTER
)
23165 else if (GET_CODE (in
) == SUBREG
)
23167 regno
= true_regnum (in
);
23168 if (regno
>= FIRST_PSEUDO_REGISTER
)
23174 /* If we have VSX register moves, prefer moving scalar values between
23175 Altivec registers and GPR by going via an FPR (and then via memory)
23176 instead of reloading the secondary memory address for Altivec moves. */
23178 && GET_MODE_SIZE (mode
) < 16
23179 && !mode_supports_vmx_dform (mode
)
23180 && (((rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
)
23181 && (regno
>= 0 && ALTIVEC_REGNO_P (regno
)))
23182 || ((rclass
== VSX_REGS
|| rclass
== ALTIVEC_REGS
)
23183 && (regno
>= 0 && INT_REGNO_P (regno
)))))
23186 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
23188 if (rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
23189 || (regno
>= 0 && INT_REGNO_P (regno
)))
23192 /* Constants, memory, and VSX registers can go into VSX registers (both the
23193 traditional floating point and the altivec registers). */
23194 if (rclass
== VSX_REGS
23195 && (regno
== -1 || VSX_REGNO_P (regno
)))
23198 /* Constants, memory, and FP registers can go into FP registers. */
23199 if ((regno
== -1 || FP_REGNO_P (regno
))
23200 && (rclass
== FLOAT_REGS
|| rclass
== NON_SPECIAL_REGS
))
23201 return (mode
!= SDmode
|| lra_in_progress
) ? NO_REGS
: GENERAL_REGS
;
23203 /* Memory, and AltiVec registers can go into AltiVec registers. */
23204 if ((regno
== -1 || ALTIVEC_REGNO_P (regno
))
23205 && rclass
== ALTIVEC_REGS
)
23208 /* We can copy among the CR registers. */
23209 if ((rclass
== CR_REGS
|| rclass
== CR0_REGS
)
23210 && regno
>= 0 && CR_REGNO_P (regno
))
23213 /* Otherwise, we need GENERAL_REGS. */
23214 return GENERAL_REGS
;
23217 /* Debug version of rs6000_secondary_reload_class. */
23218 static enum reg_class
23219 rs6000_debug_secondary_reload_class (enum reg_class rclass
,
23220 machine_mode mode
, rtx in
)
23222 enum reg_class ret
= rs6000_secondary_reload_class (rclass
, mode
, in
);
23224 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
23225 "mode = %s, input rtx:\n",
23226 reg_class_names
[ret
], reg_class_names
[rclass
],
23227 GET_MODE_NAME (mode
));
23233 /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
23236 rs6000_cannot_change_mode_class (machine_mode from
,
23238 enum reg_class rclass
)
23240 unsigned from_size
= GET_MODE_SIZE (from
);
23241 unsigned to_size
= GET_MODE_SIZE (to
);
23243 if (from_size
!= to_size
)
23245 enum reg_class xclass
= (TARGET_VSX
) ? VSX_REGS
: FLOAT_REGS
;
23247 if (reg_classes_intersect_p (xclass
, rclass
))
23249 unsigned to_nregs
= hard_regno_nregs
[FIRST_FPR_REGNO
][to
];
23250 unsigned from_nregs
= hard_regno_nregs
[FIRST_FPR_REGNO
][from
];
23251 bool to_float128_vector_p
= FLOAT128_VECTOR_P (to
);
23252 bool from_float128_vector_p
= FLOAT128_VECTOR_P (from
);
23254 /* Don't allow 64-bit types to overlap with 128-bit types that take a
23255 single register under VSX because the scalar part of the register
23256 is in the upper 64-bits, and not the lower 64-bits. Types like
23257 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
23258 IEEE floating point can't overlap, and neither can small
23261 if (to_float128_vector_p
&& from_float128_vector_p
)
23264 else if (to_float128_vector_p
|| from_float128_vector_p
)
23267 /* TDmode in floating-mode registers must always go into a register
23268 pair with the most significant word in the even-numbered register
23269 to match ISA requirements. In little-endian mode, this does not
23270 match subreg numbering, so we cannot allow subregs. */
23271 if (!BYTES_BIG_ENDIAN
&& (to
== TDmode
|| from
== TDmode
))
23274 if (from_size
< 8 || to_size
< 8)
23277 if (from_size
== 8 && (8 * to_nregs
) != to_size
)
23280 if (to_size
== 8 && (8 * from_nregs
) != from_size
)
23289 if (TARGET_E500_DOUBLE
23290 && ((((to
) == DFmode
) + ((from
) == DFmode
)) == 1
23291 || (((to
) == TFmode
) + ((from
) == TFmode
)) == 1
23292 || (((to
) == IFmode
) + ((from
) == IFmode
)) == 1
23293 || (((to
) == KFmode
) + ((from
) == KFmode
)) == 1
23294 || (((to
) == DDmode
) + ((from
) == DDmode
)) == 1
23295 || (((to
) == TDmode
) + ((from
) == TDmode
)) == 1
23296 || (((to
) == DImode
) + ((from
) == DImode
)) == 1))
23299 /* Since the VSX register set includes traditional floating point registers
23300 and altivec registers, just check for the size being different instead of
23301 trying to check whether the modes are vector modes. Otherwise it won't
23302 allow say DF and DI to change classes. For types like TFmode and TDmode
23303 that take 2 64-bit registers, rather than a single 128-bit register, don't
23304 allow subregs of those types to other 128 bit types. */
23305 if (TARGET_VSX
&& VSX_REG_CLASS_P (rclass
))
23307 unsigned num_regs
= (from_size
+ 15) / 16;
23308 if (hard_regno_nregs
[FIRST_FPR_REGNO
][to
] > num_regs
23309 || hard_regno_nregs
[FIRST_FPR_REGNO
][from
] > num_regs
)
23312 return (from_size
!= 8 && from_size
!= 16);
23315 if (TARGET_ALTIVEC
&& rclass
== ALTIVEC_REGS
23316 && (ALTIVEC_VECTOR_MODE (from
) + ALTIVEC_VECTOR_MODE (to
)) == 1)
23319 if (TARGET_SPE
&& (SPE_VECTOR_MODE (from
) + SPE_VECTOR_MODE (to
)) == 1
23320 && reg_classes_intersect_p (GENERAL_REGS
, rclass
))
23326 /* Debug version of rs6000_cannot_change_mode_class. */
23328 rs6000_debug_cannot_change_mode_class (machine_mode from
,
23330 enum reg_class rclass
)
23332 bool ret
= rs6000_cannot_change_mode_class (from
, to
, rclass
);
23335 "rs6000_cannot_change_mode_class, return %s, from = %s, "
23336 "to = %s, rclass = %s\n",
23337 ret
? "true" : "false",
23338 GET_MODE_NAME (from
), GET_MODE_NAME (to
),
23339 reg_class_names
[rclass
]);
23344 /* Return a string to do a move operation of 128 bits of data. */
23347 rs6000_output_move_128bit (rtx operands
[])
23349 rtx dest
= operands
[0];
23350 rtx src
= operands
[1];
23351 machine_mode mode
= GET_MODE (dest
);
23354 bool dest_gpr_p
, dest_fp_p
, dest_vmx_p
, dest_vsx_p
;
23355 bool src_gpr_p
, src_fp_p
, src_vmx_p
, src_vsx_p
;
23359 dest_regno
= REGNO (dest
);
23360 dest_gpr_p
= INT_REGNO_P (dest_regno
);
23361 dest_fp_p
= FP_REGNO_P (dest_regno
);
23362 dest_vmx_p
= ALTIVEC_REGNO_P (dest_regno
);
23363 dest_vsx_p
= dest_fp_p
| dest_vmx_p
;
23368 dest_gpr_p
= dest_fp_p
= dest_vmx_p
= dest_vsx_p
= false;
23373 src_regno
= REGNO (src
);
23374 src_gpr_p
= INT_REGNO_P (src_regno
);
23375 src_fp_p
= FP_REGNO_P (src_regno
);
23376 src_vmx_p
= ALTIVEC_REGNO_P (src_regno
);
23377 src_vsx_p
= src_fp_p
| src_vmx_p
;
23382 src_gpr_p
= src_fp_p
= src_vmx_p
= src_vsx_p
= false;
23385 /* Register moves. */
23386 if (dest_regno
>= 0 && src_regno
>= 0)
23393 if (TARGET_DIRECT_MOVE_128
&& src_vsx_p
)
23394 return (WORDS_BIG_ENDIAN
23395 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
23396 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
23398 else if (TARGET_VSX
&& TARGET_DIRECT_MOVE
&& src_vsx_p
)
23402 else if (TARGET_VSX
&& dest_vsx_p
)
23405 return "xxlor %x0,%x1,%x1";
23407 else if (TARGET_DIRECT_MOVE_128
&& src_gpr_p
)
23408 return (WORDS_BIG_ENDIAN
23409 ? "mtvsrdd %x0,%1,%L1"
23410 : "mtvsrdd %x0,%L1,%1");
23412 else if (TARGET_DIRECT_MOVE
&& src_gpr_p
)
23416 else if (TARGET_ALTIVEC
&& dest_vmx_p
&& src_vmx_p
)
23417 return "vor %0,%1,%1";
23419 else if (dest_fp_p
&& src_fp_p
)
23424 else if (dest_regno
>= 0 && MEM_P (src
))
23428 if (TARGET_QUAD_MEMORY
&& quad_load_store_p (dest
, src
))
23434 else if (TARGET_ALTIVEC
&& dest_vmx_p
23435 && altivec_indexed_or_indirect_operand (src
, mode
))
23436 return "lvx %0,%y1";
23438 else if (TARGET_VSX
&& dest_vsx_p
)
23440 if (mode_supports_vsx_dform_quad (mode
)
23441 && quad_address_p (XEXP (src
, 0), mode
, true))
23442 return "lxv %x0,%1";
23444 else if (TARGET_P9_VECTOR
)
23445 return "lxvx %x0,%y1";
23447 else if (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
23448 return "lxvw4x %x0,%y1";
23451 return "lxvd2x %x0,%y1";
23454 else if (TARGET_ALTIVEC
&& dest_vmx_p
)
23455 return "lvx %0,%y1";
23457 else if (dest_fp_p
)
23462 else if (src_regno
>= 0 && MEM_P (dest
))
23466 if (TARGET_QUAD_MEMORY
&& quad_load_store_p (dest
, src
))
23467 return "stq %1,%0";
23472 else if (TARGET_ALTIVEC
&& src_vmx_p
23473 && altivec_indexed_or_indirect_operand (src
, mode
))
23474 return "stvx %1,%y0";
23476 else if (TARGET_VSX
&& src_vsx_p
)
23478 if (mode_supports_vsx_dform_quad (mode
)
23479 && quad_address_p (XEXP (dest
, 0), mode
, true))
23480 return "stxv %x1,%0";
23482 else if (TARGET_P9_VECTOR
)
23483 return "stxvx %x1,%y0";
23485 else if (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
23486 return "stxvw4x %x1,%y0";
23489 return "stxvd2x %x1,%y0";
23492 else if (TARGET_ALTIVEC
&& src_vmx_p
)
23493 return "stvx %1,%y0";
23500 else if (dest_regno
>= 0
23501 && (GET_CODE (src
) == CONST_INT
23502 || GET_CODE (src
) == CONST_WIDE_INT
23503 || GET_CODE (src
) == CONST_DOUBLE
23504 || GET_CODE (src
) == CONST_VECTOR
))
23509 else if ((dest_vmx_p
&& TARGET_ALTIVEC
)
23510 || (dest_vsx_p
&& TARGET_VSX
))
23511 return output_vec_const_move (operands
);
23514 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest
, src
));
23517 /* Validate a 128-bit move. */
23519 rs6000_move_128bit_ok_p (rtx operands
[])
23521 machine_mode mode
= GET_MODE (operands
[0]);
23522 return (gpc_reg_operand (operands
[0], mode
)
23523 || gpc_reg_operand (operands
[1], mode
));
23526 /* Return true if a 128-bit move needs to be split. */
23528 rs6000_split_128bit_ok_p (rtx operands
[])
23530 if (!reload_completed
)
23533 if (!gpr_or_gpr_p (operands
[0], operands
[1]))
23536 if (quad_load_store_p (operands
[0], operands
[1]))
23543 /* Given a comparison operation, return the bit number in CCR to test. We
23544 know this is a valid comparison.
23546 SCC_P is 1 if this is for an scc. That means that %D will have been
23547 used instead of %C, so the bits will be in different places.
23549 Return -1 if OP isn't a valid comparison for some reason. */
23552 ccr_bit (rtx op
, int scc_p
)
23554 enum rtx_code code
= GET_CODE (op
);
23555 machine_mode cc_mode
;
23560 if (!COMPARISON_P (op
))
23563 reg
= XEXP (op
, 0);
23565 gcc_assert (GET_CODE (reg
) == REG
&& CR_REGNO_P (REGNO (reg
)));
23567 cc_mode
= GET_MODE (reg
);
23568 cc_regnum
= REGNO (reg
);
23569 base_bit
= 4 * (cc_regnum
- CR0_REGNO
);
23571 validate_condition_mode (code
, cc_mode
);
23573 /* When generating a sCOND operation, only positive conditions are
23576 || code
== EQ
|| code
== GT
|| code
== LT
|| code
== UNORDERED
23577 || code
== GTU
|| code
== LTU
);
23582 return scc_p
? base_bit
+ 3 : base_bit
+ 2;
23584 return base_bit
+ 2;
23585 case GT
: case GTU
: case UNLE
:
23586 return base_bit
+ 1;
23587 case LT
: case LTU
: case UNGE
:
23589 case ORDERED
: case UNORDERED
:
23590 return base_bit
+ 3;
23593 /* If scc, we will have done a cror to put the bit in the
23594 unordered position. So test that bit. For integer, this is ! LT
23595 unless this is an scc insn. */
23596 return scc_p
? base_bit
+ 3 : base_bit
;
23599 return scc_p
? base_bit
+ 3 : base_bit
+ 1;
23602 gcc_unreachable ();
23606 /* Return the GOT register. */
23609 rs6000_got_register (rtx value ATTRIBUTE_UNUSED
)
23611 /* The second flow pass currently (June 1999) can't update
23612 regs_ever_live without disturbing other parts of the compiler, so
23613 update it here to make the prolog/epilogue code happy. */
23614 if (!can_create_pseudo_p ()
23615 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM
))
23616 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM
, true);
23618 crtl
->uses_pic_offset_table
= 1;
23620 return pic_offset_table_rtx
;
23623 static rs6000_stack_t stack_info
;
23625 /* Function to init struct machine_function.
23626 This will be called, via a pointer variable,
23627 from push_function_context. */
23629 static struct machine_function
*
23630 rs6000_init_machine_status (void)
23632 stack_info
.reload_completed
= 0;
23633 return ggc_cleared_alloc
<machine_function
> ();
23636 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
23638 /* Write out a function code label. */
23641 rs6000_output_function_entry (FILE *file
, const char *fname
)
23643 if (fname
[0] != '.')
23645 switch (DEFAULT_ABI
)
23648 gcc_unreachable ();
23654 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "L.");
23664 RS6000_OUTPUT_BASENAME (file
, fname
);
23667 /* Print an operand. Recognize special options, documented below. */
23670 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
23671 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
23673 #define SMALL_DATA_RELOC "sda21"
23674 #define SMALL_DATA_REG 0
23678 print_operand (FILE *file
, rtx x
, int code
)
23681 unsigned HOST_WIDE_INT uval
;
23685 /* %a is output_address. */
23687 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
23691 /* Like 'J' but get to the GT bit only. */
23692 gcc_assert (REG_P (x
));
23694 /* Bit 1 is GT bit. */
23695 i
= 4 * (REGNO (x
) - CR0_REGNO
) + 1;
23697 /* Add one for shift count in rlinm for scc. */
23698 fprintf (file
, "%d", i
+ 1);
23702 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
23705 output_operand_lossage ("invalid %%e value");
23710 if ((uval
& 0xffff) == 0 && uval
!= 0)
23715 /* X is a CR register. Print the number of the EQ bit of the CR */
23716 if (GET_CODE (x
) != REG
|| ! CR_REGNO_P (REGNO (x
)))
23717 output_operand_lossage ("invalid %%E value");
23719 fprintf (file
, "%d", 4 * (REGNO (x
) - CR0_REGNO
) + 2);
23723 /* X is a CR register. Print the shift count needed to move it
23724 to the high-order four bits. */
23725 if (GET_CODE (x
) != REG
|| ! CR_REGNO_P (REGNO (x
)))
23726 output_operand_lossage ("invalid %%f value");
23728 fprintf (file
, "%d", 4 * (REGNO (x
) - CR0_REGNO
));
23732 /* Similar, but print the count for the rotate in the opposite
23734 if (GET_CODE (x
) != REG
|| ! CR_REGNO_P (REGNO (x
)))
23735 output_operand_lossage ("invalid %%F value");
23737 fprintf (file
, "%d", 32 - 4 * (REGNO (x
) - CR0_REGNO
));
23741 /* X is a constant integer. If it is negative, print "m",
23742 otherwise print "z". This is to make an aze or ame insn. */
23743 if (GET_CODE (x
) != CONST_INT
)
23744 output_operand_lossage ("invalid %%G value");
23745 else if (INTVAL (x
) >= 0)
23752 /* If constant, output low-order five bits. Otherwise, write
23755 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 31);
23757 print_operand (file
, x
, 0);
23761 /* If constant, output low-order six bits. Otherwise, write
23764 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 63);
23766 print_operand (file
, x
, 0);
23770 /* Print `i' if this is a constant, else nothing. */
23776 /* Write the bit number in CCR for jump. */
23777 i
= ccr_bit (x
, 0);
23779 output_operand_lossage ("invalid %%j code");
23781 fprintf (file
, "%d", i
);
23785 /* Similar, but add one for shift count in rlinm for scc and pass
23786 scc flag to `ccr_bit'. */
23787 i
= ccr_bit (x
, 1);
23789 output_operand_lossage ("invalid %%J code");
23791 /* If we want bit 31, write a shift count of zero, not 32. */
23792 fprintf (file
, "%d", i
== 31 ? 0 : i
+ 1);
23796 /* X must be a constant. Write the 1's complement of the
23799 output_operand_lossage ("invalid %%k value");
23801 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ~ INTVAL (x
));
23805 /* X must be a symbolic constant on ELF. Write an
23806 expression suitable for an 'addi' that adds in the low 16
23807 bits of the MEM. */
23808 if (GET_CODE (x
) == CONST
)
23810 if (GET_CODE (XEXP (x
, 0)) != PLUS
23811 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) != SYMBOL_REF
23812 && GET_CODE (XEXP (XEXP (x
, 0), 0)) != LABEL_REF
)
23813 || GET_CODE (XEXP (XEXP (x
, 0), 1)) != CONST_INT
)
23814 output_operand_lossage ("invalid %%K value");
23816 print_operand_address (file
, x
);
23817 fputs ("@l", file
);
23820 /* %l is output_asm_label. */
23823 /* Write second word of DImode or DFmode reference. Works on register
23824 or non-indexed memory only. */
23826 fputs (reg_names
[REGNO (x
) + 1], file
);
23827 else if (MEM_P (x
))
23829 machine_mode mode
= GET_MODE (x
);
23830 /* Handle possible auto-increment. Since it is pre-increment and
23831 we have already done it, we can just use an offset of word. */
23832 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
23833 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
23834 output_address (mode
, plus_constant (Pmode
, XEXP (XEXP (x
, 0), 0),
23836 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
23837 output_address (mode
, plus_constant (Pmode
, XEXP (XEXP (x
, 0), 0),
23840 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
,
23844 if (small_data_operand (x
, GET_MODE (x
)))
23845 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
23846 reg_names
[SMALL_DATA_REG
]);
23851 /* Write the number of elements in the vector times 4. */
23852 if (GET_CODE (x
) != PARALLEL
)
23853 output_operand_lossage ("invalid %%N value");
23855 fprintf (file
, "%d", XVECLEN (x
, 0) * 4);
23859 /* Similar, but subtract 1 first. */
23860 if (GET_CODE (x
) != PARALLEL
)
23861 output_operand_lossage ("invalid %%O value");
23863 fprintf (file
, "%d", (XVECLEN (x
, 0) - 1) * 4);
23867 /* X is a CONST_INT that is a power of two. Output the logarithm. */
23870 || (i
= exact_log2 (INTVAL (x
))) < 0)
23871 output_operand_lossage ("invalid %%p value");
23873 fprintf (file
, "%d", i
);
23877 /* The operand must be an indirect memory reference. The result
23878 is the register name. */
23879 if (GET_CODE (x
) != MEM
|| GET_CODE (XEXP (x
, 0)) != REG
23880 || REGNO (XEXP (x
, 0)) >= 32)
23881 output_operand_lossage ("invalid %%P value");
23883 fputs (reg_names
[REGNO (XEXP (x
, 0))], file
);
23887 /* This outputs the logical code corresponding to a boolean
23888 expression. The expression may have one or both operands
23889 negated (if one, only the first one). For condition register
23890 logical operations, it will also treat the negated
23891 CR codes as NOTs, but not handle NOTs of them. */
23893 const char *const *t
= 0;
23895 enum rtx_code code
= GET_CODE (x
);
23896 static const char * const tbl
[3][3] = {
23897 { "and", "andc", "nor" },
23898 { "or", "orc", "nand" },
23899 { "xor", "eqv", "xor" } };
23903 else if (code
== IOR
)
23905 else if (code
== XOR
)
23908 output_operand_lossage ("invalid %%q value");
23910 if (GET_CODE (XEXP (x
, 0)) != NOT
)
23914 if (GET_CODE (XEXP (x
, 1)) == NOT
)
23925 if (! TARGET_MFCRF
)
23931 /* X is a CR register. Print the mask for `mtcrf'. */
23932 if (GET_CODE (x
) != REG
|| ! CR_REGNO_P (REGNO (x
)))
23933 output_operand_lossage ("invalid %%R value");
23935 fprintf (file
, "%d", 128 >> (REGNO (x
) - CR0_REGNO
));
23939 /* Low 5 bits of 32 - value */
23941 output_operand_lossage ("invalid %%s value");
23943 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, (32 - INTVAL (x
)) & 31);
23947 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
23948 gcc_assert (REG_P (x
) && GET_MODE (x
) == CCmode
);
23950 /* Bit 3 is OV bit. */
23951 i
= 4 * (REGNO (x
) - CR0_REGNO
) + 3;
23953 /* If we want bit 31, write a shift count of zero, not 32. */
23954 fprintf (file
, "%d", i
== 31 ? 0 : i
+ 1);
23958 /* Print the symbolic name of a branch target register. */
23959 if (GET_CODE (x
) != REG
|| (REGNO (x
) != LR_REGNO
23960 && REGNO (x
) != CTR_REGNO
))
23961 output_operand_lossage ("invalid %%T value");
23962 else if (REGNO (x
) == LR_REGNO
)
23963 fputs ("lr", file
);
23965 fputs ("ctr", file
);
23969 /* High-order or low-order 16 bits of constant, whichever is non-zero,
23970 for use in unsigned operand. */
23973 output_operand_lossage ("invalid %%u value");
23978 if ((uval
& 0xffff) == 0)
23981 fprintf (file
, HOST_WIDE_INT_PRINT_HEX
, uval
& 0xffff);
23985 /* High-order 16 bits of constant for use in signed operand. */
23987 output_operand_lossage ("invalid %%v value");
23989 fprintf (file
, HOST_WIDE_INT_PRINT_HEX
,
23990 (INTVAL (x
) >> 16) & 0xffff);
23994 /* Print `u' if this has an auto-increment or auto-decrement. */
23996 && (GET_CODE (XEXP (x
, 0)) == PRE_INC
23997 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
23998 || GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
))
24003 /* Print the trap code for this operand. */
24004 switch (GET_CODE (x
))
24007 fputs ("eq", file
); /* 4 */
24010 fputs ("ne", file
); /* 24 */
24013 fputs ("lt", file
); /* 16 */
24016 fputs ("le", file
); /* 20 */
24019 fputs ("gt", file
); /* 8 */
24022 fputs ("ge", file
); /* 12 */
24025 fputs ("llt", file
); /* 2 */
24028 fputs ("lle", file
); /* 6 */
24031 fputs ("lgt", file
); /* 1 */
24034 fputs ("lge", file
); /* 5 */
24037 gcc_unreachable ();
24042 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
24045 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
24046 ((INTVAL (x
) & 0xffff) ^ 0x8000) - 0x8000);
24048 print_operand (file
, x
, 0);
24052 /* X is a FPR or Altivec register used in a VSX context. */
24053 if (GET_CODE (x
) != REG
|| !VSX_REGNO_P (REGNO (x
)))
24054 output_operand_lossage ("invalid %%x value");
24057 int reg
= REGNO (x
);
24058 int vsx_reg
= (FP_REGNO_P (reg
)
24060 : reg
- FIRST_ALTIVEC_REGNO
+ 32);
24062 #ifdef TARGET_REGNAMES
24063 if (TARGET_REGNAMES
)
24064 fprintf (file
, "%%vs%d", vsx_reg
);
24067 fprintf (file
, "%d", vsx_reg
);
24073 && (legitimate_indexed_address_p (XEXP (x
, 0), 0)
24074 || (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
24075 && legitimate_indexed_address_p (XEXP (XEXP (x
, 0), 1), 0))))
24080 /* Like 'L', for third word of TImode/PTImode */
24082 fputs (reg_names
[REGNO (x
) + 2], file
);
24083 else if (MEM_P (x
))
24085 machine_mode mode
= GET_MODE (x
);
24086 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
24087 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
24088 output_address (mode
, plus_constant (Pmode
,
24089 XEXP (XEXP (x
, 0), 0), 8));
24090 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
24091 output_address (mode
, plus_constant (Pmode
,
24092 XEXP (XEXP (x
, 0), 0), 8));
24094 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
, 8), 0));
24095 if (small_data_operand (x
, GET_MODE (x
)))
24096 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
24097 reg_names
[SMALL_DATA_REG
]);
24102 /* X is a SYMBOL_REF. Write out the name preceded by a
24103 period and without any trailing data in brackets. Used for function
24104 names. If we are configured for System V (or the embedded ABI) on
24105 the PowerPC, do not emit the period, since those systems do not use
24106 TOCs and the like. */
24107 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
24109 /* For macho, check to see if we need a stub. */
24112 const char *name
= XSTR (x
, 0);
24114 if (darwin_emit_branch_islands
24115 && MACHOPIC_INDIRECT
24116 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
24117 name
= machopic_indirection_name (x
, /*stub_p=*/true);
24119 assemble_name (file
, name
);
24121 else if (!DOT_SYMBOLS
)
24122 assemble_name (file
, XSTR (x
, 0));
24124 rs6000_output_function_entry (file
, XSTR (x
, 0));
24128 /* Like 'L', for last word of TImode/PTImode. */
24130 fputs (reg_names
[REGNO (x
) + 3], file
);
24131 else if (MEM_P (x
))
24133 machine_mode mode
= GET_MODE (x
);
24134 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
24135 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
24136 output_address (mode
, plus_constant (Pmode
,
24137 XEXP (XEXP (x
, 0), 0), 12));
24138 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
24139 output_address (mode
, plus_constant (Pmode
,
24140 XEXP (XEXP (x
, 0), 0), 12));
24142 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
, 12), 0));
24143 if (small_data_operand (x
, GET_MODE (x
)))
24144 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
24145 reg_names
[SMALL_DATA_REG
]);
24149 /* Print AltiVec or SPE memory operand. */
24154 gcc_assert (MEM_P (x
));
24158 /* Ugly hack because %y is overloaded. */
24159 if ((TARGET_SPE
|| TARGET_E500_DOUBLE
)
24160 && (GET_MODE_SIZE (GET_MODE (x
)) == 8
24161 || FLOAT128_2REG_P (GET_MODE (x
))
24162 || GET_MODE (x
) == TImode
24163 || GET_MODE (x
) == PTImode
))
24165 /* Handle [reg]. */
24168 fprintf (file
, "0(%s)", reg_names
[REGNO (tmp
)]);
24171 /* Handle [reg+UIMM]. */
24172 else if (GET_CODE (tmp
) == PLUS
&&
24173 GET_CODE (XEXP (tmp
, 1)) == CONST_INT
)
24177 gcc_assert (REG_P (XEXP (tmp
, 0)));
24179 x
= INTVAL (XEXP (tmp
, 1));
24180 fprintf (file
, "%d(%s)", x
, reg_names
[REGNO (XEXP (tmp
, 0))]);
24184 /* Fall through. Must be [reg+reg]. */
24186 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x
))
24187 && GET_CODE (tmp
) == AND
24188 && GET_CODE (XEXP (tmp
, 1)) == CONST_INT
24189 && INTVAL (XEXP (tmp
, 1)) == -16)
24190 tmp
= XEXP (tmp
, 0);
24191 else if (VECTOR_MEM_VSX_P (GET_MODE (x
))
24192 && GET_CODE (tmp
) == PRE_MODIFY
)
24193 tmp
= XEXP (tmp
, 1);
24195 fprintf (file
, "0,%s", reg_names
[REGNO (tmp
)]);
24198 if (GET_CODE (tmp
) != PLUS
24199 || !REG_P (XEXP (tmp
, 0))
24200 || !REG_P (XEXP (tmp
, 1)))
24202 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
24206 if (REGNO (XEXP (tmp
, 0)) == 0)
24207 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (tmp
, 1)) ],
24208 reg_names
[ REGNO (XEXP (tmp
, 0)) ]);
24210 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (tmp
, 0)) ],
24211 reg_names
[ REGNO (XEXP (tmp
, 1)) ]);
24218 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
24219 else if (MEM_P (x
))
24221 /* We need to handle PRE_INC and PRE_DEC here, since we need to
24222 know the width from the mode. */
24223 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
)
24224 fprintf (file
, "%d(%s)", GET_MODE_SIZE (GET_MODE (x
)),
24225 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
24226 else if (GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
24227 fprintf (file
, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x
)),
24228 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
24229 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
24230 output_address (GET_MODE (x
), XEXP (XEXP (x
, 0), 1));
24232 output_address (GET_MODE (x
), XEXP (x
, 0));
24236 if (toc_relative_expr_p (x
, false))
24237 /* This hack along with a corresponding hack in
24238 rs6000_output_addr_const_extra arranges to output addends
24239 where the assembler expects to find them. eg.
24240 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
24241 without this hack would be output as "x@toc+4". We
24243 output_addr_const (file
, CONST_CAST_RTX (tocrel_base
));
24245 output_addr_const (file
, x
);
24250 if (const char *name
= get_some_local_dynamic_name ())
24251 assemble_name (file
, name
);
24253 output_operand_lossage ("'%%&' used without any "
24254 "local dynamic TLS references");
24258 output_operand_lossage ("invalid %%xn code");
24262 /* Print the address of an operand. */
24265 print_operand_address (FILE *file
, rtx x
)
24268 fprintf (file
, "0(%s)", reg_names
[ REGNO (x
) ]);
24269 else if (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == CONST
24270 || GET_CODE (x
) == LABEL_REF
)
24272 output_addr_const (file
, x
);
24273 if (small_data_operand (x
, GET_MODE (x
)))
24274 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
24275 reg_names
[SMALL_DATA_REG
]);
24277 gcc_assert (!TARGET_TOC
);
24279 else if (GET_CODE (x
) == PLUS
&& REG_P (XEXP (x
, 0))
24280 && REG_P (XEXP (x
, 1)))
24282 if (REGNO (XEXP (x
, 0)) == 0)
24283 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (x
, 1)) ],
24284 reg_names
[ REGNO (XEXP (x
, 0)) ]);
24286 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (x
, 0)) ],
24287 reg_names
[ REGNO (XEXP (x
, 1)) ]);
24289 else if (GET_CODE (x
) == PLUS
&& REG_P (XEXP (x
, 0))
24290 && GET_CODE (XEXP (x
, 1)) == CONST_INT
)
24291 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
"(%s)",
24292 INTVAL (XEXP (x
, 1)), reg_names
[ REGNO (XEXP (x
, 0)) ]);
24294 else if (GET_CODE (x
) == LO_SUM
&& REG_P (XEXP (x
, 0))
24295 && CONSTANT_P (XEXP (x
, 1)))
24297 fprintf (file
, "lo16(");
24298 output_addr_const (file
, XEXP (x
, 1));
24299 fprintf (file
, ")(%s)", reg_names
[ REGNO (XEXP (x
, 0)) ]);
24303 else if (GET_CODE (x
) == LO_SUM
&& REG_P (XEXP (x
, 0))
24304 && CONSTANT_P (XEXP (x
, 1)))
24306 output_addr_const (file
, XEXP (x
, 1));
24307 fprintf (file
, "@l(%s)", reg_names
[ REGNO (XEXP (x
, 0)) ]);
24310 else if (toc_relative_expr_p (x
, false))
24312 /* This hack along with a corresponding hack in
24313 rs6000_output_addr_const_extra arranges to output addends
24314 where the assembler expects to find them. eg.
24316 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
24317 without this hack would be output as "x@toc+8@l(9)". We
24318 want "x+8@toc@l(9)". */
24319 output_addr_const (file
, CONST_CAST_RTX (tocrel_base
));
24320 if (GET_CODE (x
) == LO_SUM
)
24321 fprintf (file
, "@l(%s)", reg_names
[REGNO (XEXP (x
, 0))]);
24323 fprintf (file
, "(%s)", reg_names
[REGNO (XVECEXP (tocrel_base
, 0, 1))]);
24326 gcc_unreachable ();
24329 /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */
24332 rs6000_output_addr_const_extra (FILE *file
, rtx x
)
24334 if (GET_CODE (x
) == UNSPEC
)
24335 switch (XINT (x
, 1))
24337 case UNSPEC_TOCREL
:
24338 gcc_checking_assert (GET_CODE (XVECEXP (x
, 0, 0)) == SYMBOL_REF
24339 && REG_P (XVECEXP (x
, 0, 1))
24340 && REGNO (XVECEXP (x
, 0, 1)) == TOC_REGISTER
);
24341 output_addr_const (file
, XVECEXP (x
, 0, 0));
24342 if (x
== tocrel_base
&& tocrel_offset
!= const0_rtx
)
24344 if (INTVAL (tocrel_offset
) >= 0)
24345 fprintf (file
, "+");
24346 output_addr_const (file
, CONST_CAST_RTX (tocrel_offset
));
24348 if (!TARGET_AIX
|| (TARGET_ELF
&& TARGET_MINIMAL_TOC
))
24351 assemble_name (file
, toc_label_name
);
24354 else if (TARGET_ELF
)
24355 fputs ("@toc", file
);
24359 case UNSPEC_MACHOPIC_OFFSET
:
24360 output_addr_const (file
, XVECEXP (x
, 0, 0));
24362 machopic_output_function_base_name (file
);
24369 /* Target hook for assembling integer objects. The PowerPC version has
24370 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
24371 is defined. It also needs to handle DI-mode objects on 64-bit
24375 rs6000_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
24377 #ifdef RELOCATABLE_NEEDS_FIXUP
24378 /* Special handling for SI values. */
24379 if (RELOCATABLE_NEEDS_FIXUP
&& size
== 4 && aligned_p
)
24381 static int recurse
= 0;
24383 /* For -mrelocatable, we mark all addresses that need to be fixed up in
24384 the .fixup section. Since the TOC section is already relocated, we
24385 don't need to mark it here. We used to skip the text section, but it
24386 should never be valid for relocated addresses to be placed in the text
24388 if (DEFAULT_ABI
== ABI_V4
24389 && (TARGET_RELOCATABLE
|| flag_pic
> 1)
24390 && in_section
!= toc_section
24392 && !CONST_SCALAR_INT_P (x
)
24398 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCP", fixuplabelno
);
24400 ASM_OUTPUT_LABEL (asm_out_file
, buf
);
24401 fprintf (asm_out_file
, "\t.long\t(");
24402 output_addr_const (asm_out_file
, x
);
24403 fprintf (asm_out_file
, ")@fixup\n");
24404 fprintf (asm_out_file
, "\t.section\t\".fixup\",\"aw\"\n");
24405 ASM_OUTPUT_ALIGN (asm_out_file
, 2);
24406 fprintf (asm_out_file
, "\t.long\t");
24407 assemble_name (asm_out_file
, buf
);
24408 fprintf (asm_out_file
, "\n\t.previous\n");
24412 /* Remove initial .'s to turn a -mcall-aixdesc function
24413 address into the address of the descriptor, not the function
24415 else if (GET_CODE (x
) == SYMBOL_REF
24416 && XSTR (x
, 0)[0] == '.'
24417 && DEFAULT_ABI
== ABI_AIX
)
24419 const char *name
= XSTR (x
, 0);
24420 while (*name
== '.')
24423 fprintf (asm_out_file
, "\t.long\t%s\n", name
);
24427 #endif /* RELOCATABLE_NEEDS_FIXUP */
24428 return default_assemble_integer (x
, size
, aligned_p
);
24431 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
24432 /* Emit an assembler directive to set symbol visibility for DECL to
24433 VISIBILITY_TYPE. */
24436 rs6000_assemble_visibility (tree decl
, int vis
)
24441 /* Functions need to have their entry point symbol visibility set as
24442 well as their descriptor symbol visibility. */
24443 if (DEFAULT_ABI
== ABI_AIX
24445 && TREE_CODE (decl
) == FUNCTION_DECL
)
24447 static const char * const visibility_types
[] = {
24448 NULL
, "protected", "hidden", "internal"
24451 const char *name
, *type
;
24453 name
= ((* targetm
.strip_name_encoding
)
24454 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
))));
24455 type
= visibility_types
[vis
];
24457 fprintf (asm_out_file
, "\t.%s\t%s\n", type
, name
);
24458 fprintf (asm_out_file
, "\t.%s\t.%s\n", type
, name
);
24461 default_assemble_visibility (decl
, vis
);
24466 rs6000_reverse_condition (machine_mode mode
, enum rtx_code code
)
24468 /* Reversal of FP compares takes care -- an ordered compare
24469 becomes an unordered compare and vice versa. */
24470 if (mode
== CCFPmode
24471 && (!flag_finite_math_only
24472 || code
== UNLT
|| code
== UNLE
|| code
== UNGT
|| code
== UNGE
24473 || code
== UNEQ
|| code
== LTGT
))
24474 return reverse_condition_maybe_unordered (code
);
24476 return reverse_condition (code
);
24479 /* Generate a compare for CODE. Return a brand-new rtx that
24480 represents the result of the compare. */
24483 rs6000_generate_compare (rtx cmp
, machine_mode mode
)
24485 machine_mode comp_mode
;
24486 rtx compare_result
;
24487 enum rtx_code code
= GET_CODE (cmp
);
24488 rtx op0
= XEXP (cmp
, 0);
24489 rtx op1
= XEXP (cmp
, 1);
24491 if (!TARGET_FLOAT128_HW
&& FLOAT128_VECTOR_P (mode
))
24492 comp_mode
= CCmode
;
24493 else if (FLOAT_MODE_P (mode
))
24494 comp_mode
= CCFPmode
;
24495 else if (code
== GTU
|| code
== LTU
24496 || code
== GEU
|| code
== LEU
)
24497 comp_mode
= CCUNSmode
;
24498 else if ((code
== EQ
|| code
== NE
)
24499 && unsigned_reg_p (op0
)
24500 && (unsigned_reg_p (op1
)
24501 || (CONST_INT_P (op1
) && INTVAL (op1
) != 0)))
24502 /* These are unsigned values, perhaps there will be a later
24503 ordering compare that can be shared with this one. */
24504 comp_mode
= CCUNSmode
;
24506 comp_mode
= CCmode
;
24508 /* If we have an unsigned compare, make sure we don't have a signed value as
24510 if (comp_mode
== CCUNSmode
&& GET_CODE (op1
) == CONST_INT
24511 && INTVAL (op1
) < 0)
24513 op0
= copy_rtx_if_shared (op0
);
24514 op1
= force_reg (GET_MODE (op0
), op1
);
24515 cmp
= gen_rtx_fmt_ee (code
, GET_MODE (cmp
), op0
, op1
);
24518 /* First, the compare. */
24519 compare_result
= gen_reg_rtx (comp_mode
);
24521 /* E500 FP compare instructions on the GPRs. Yuck! */
24522 if ((!TARGET_FPRS
&& TARGET_HARD_FLOAT
)
24523 && FLOAT_MODE_P (mode
))
24525 rtx cmp
, or_result
, compare_result2
;
24526 machine_mode op_mode
= GET_MODE (op0
);
24529 if (op_mode
== VOIDmode
)
24530 op_mode
= GET_MODE (op1
);
24532 /* First reverse the condition codes that aren't directly supported. */
24540 code
= reverse_condition_maybe_unordered (code
);
24553 gcc_unreachable ();
24556 /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
24557 This explains the following mess. */
24565 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24566 ? gen_tstsfeq_gpr (compare_result
, op0
, op1
)
24567 : gen_cmpsfeq_gpr (compare_result
, op0
, op1
);
24571 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24572 ? gen_tstdfeq_gpr (compare_result
, op0
, op1
)
24573 : gen_cmpdfeq_gpr (compare_result
, op0
, op1
);
24579 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24580 ? gen_tsttfeq_gpr (compare_result
, op0
, op1
)
24581 : gen_cmptfeq_gpr (compare_result
, op0
, op1
);
24585 gcc_unreachable ();
24594 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24595 ? gen_tstsfgt_gpr (compare_result
, op0
, op1
)
24596 : gen_cmpsfgt_gpr (compare_result
, op0
, op1
);
24600 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24601 ? gen_tstdfgt_gpr (compare_result
, op0
, op1
)
24602 : gen_cmpdfgt_gpr (compare_result
, op0
, op1
);
24608 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24609 ? gen_tsttfgt_gpr (compare_result
, op0
, op1
)
24610 : gen_cmptfgt_gpr (compare_result
, op0
, op1
);
24614 gcc_unreachable ();
24623 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24624 ? gen_tstsflt_gpr (compare_result
, op0
, op1
)
24625 : gen_cmpsflt_gpr (compare_result
, op0
, op1
);
24629 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24630 ? gen_tstdflt_gpr (compare_result
, op0
, op1
)
24631 : gen_cmpdflt_gpr (compare_result
, op0
, op1
);
24637 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24638 ? gen_tsttflt_gpr (compare_result
, op0
, op1
)
24639 : gen_cmptflt_gpr (compare_result
, op0
, op1
);
24643 gcc_unreachable ();
24648 gcc_unreachable ();
24651 /* Synthesize LE and GE from LT/GT || EQ. */
24652 if (code
== LE
|| code
== GE
)
24656 compare_result2
= gen_reg_rtx (CCFPmode
);
24662 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24663 ? gen_tstsfeq_gpr (compare_result2
, op0
, op1
)
24664 : gen_cmpsfeq_gpr (compare_result2
, op0
, op1
);
24668 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24669 ? gen_tstdfeq_gpr (compare_result2
, op0
, op1
)
24670 : gen_cmpdfeq_gpr (compare_result2
, op0
, op1
);
24676 cmp
= (flag_finite_math_only
&& !flag_trapping_math
)
24677 ? gen_tsttfeq_gpr (compare_result2
, op0
, op1
)
24678 : gen_cmptfeq_gpr (compare_result2
, op0
, op1
);
24682 gcc_unreachable ();
24687 /* OR them together. */
24688 or_result
= gen_reg_rtx (CCFPmode
);
24689 cmp
= gen_e500_cr_ior_compare (or_result
, compare_result
,
24691 compare_result
= or_result
;
24694 code
= reverse_p
? NE
: EQ
;
24699 /* IEEE 128-bit support in VSX registers when we do not have hardware
24701 else if (!TARGET_FLOAT128_HW
&& FLOAT128_VECTOR_P (mode
))
24703 rtx libfunc
= NULL_RTX
;
24704 bool check_nan
= false;
24711 libfunc
= optab_libfunc (eq_optab
, mode
);
24716 libfunc
= optab_libfunc (ge_optab
, mode
);
24721 libfunc
= optab_libfunc (le_optab
, mode
);
24726 libfunc
= optab_libfunc (unord_optab
, mode
);
24727 code
= (code
== UNORDERED
) ? NE
: EQ
;
24733 libfunc
= optab_libfunc (ge_optab
, mode
);
24734 code
= (code
== UNGE
) ? GE
: GT
;
24740 libfunc
= optab_libfunc (le_optab
, mode
);
24741 code
= (code
== UNLE
) ? LE
: LT
;
24747 libfunc
= optab_libfunc (eq_optab
, mode
);
24748 code
= (code
= UNEQ
) ? EQ
: NE
;
24752 gcc_unreachable ();
24755 gcc_assert (libfunc
);
24758 dest
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
24759 SImode
, op0
, mode
, op1
, mode
);
24761 /* The library signals an exception for signalling NaNs, so we need to
24762 handle isgreater, etc. by first checking isordered. */
24765 rtx ne_rtx
, normal_dest
, unord_dest
;
24766 rtx unord_func
= optab_libfunc (unord_optab
, mode
);
24767 rtx join_label
= gen_label_rtx ();
24768 rtx join_ref
= gen_rtx_LABEL_REF (VOIDmode
, join_label
);
24769 rtx unord_cmp
= gen_reg_rtx (comp_mode
);
24772 /* Test for either value being a NaN. */
24773 gcc_assert (unord_func
);
24774 unord_dest
= emit_library_call_value (unord_func
, NULL_RTX
, LCT_CONST
,
24775 SImode
, op0
, mode
, op1
, mode
);
24777 /* Set value (0) if either value is a NaN, and jump to the join
24779 dest
= gen_reg_rtx (SImode
);
24780 emit_move_insn (dest
, const1_rtx
);
24781 emit_insn (gen_rtx_SET (unord_cmp
,
24782 gen_rtx_COMPARE (comp_mode
, unord_dest
,
24785 ne_rtx
= gen_rtx_NE (comp_mode
, unord_cmp
, const0_rtx
);
24786 emit_jump_insn (gen_rtx_SET (pc_rtx
,
24787 gen_rtx_IF_THEN_ELSE (VOIDmode
, ne_rtx
,
24791 /* Do the normal comparison, knowing that the values are not
24793 normal_dest
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
24794 SImode
, op0
, mode
, op1
, mode
);
24796 emit_insn (gen_cstoresi4 (dest
,
24797 gen_rtx_fmt_ee (code
, SImode
, normal_dest
,
24799 normal_dest
, const0_rtx
));
24801 /* Join NaN and non-Nan paths. Compare dest against 0. */
24802 emit_label (join_label
);
24806 emit_insn (gen_rtx_SET (compare_result
,
24807 gen_rtx_COMPARE (comp_mode
, dest
, const0_rtx
)));
24812 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
24813 CLOBBERs to match cmptf_internal2 pattern. */
24814 if (comp_mode
== CCFPmode
&& TARGET_XL_COMPAT
24815 && FLOAT128_IBM_P (GET_MODE (op0
))
24816 && TARGET_HARD_FLOAT
&& TARGET_FPRS
)
24817 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
24819 gen_rtx_SET (compare_result
,
24820 gen_rtx_COMPARE (comp_mode
, op0
, op1
)),
24821 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
24822 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
24823 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
24824 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
24825 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
24826 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
24827 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
24828 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
24829 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (Pmode
)))));
24830 else if (GET_CODE (op1
) == UNSPEC
24831 && XINT (op1
, 1) == UNSPEC_SP_TEST
)
24833 rtx op1b
= XVECEXP (op1
, 0, 0);
24834 comp_mode
= CCEQmode
;
24835 compare_result
= gen_reg_rtx (CCEQmode
);
24837 emit_insn (gen_stack_protect_testdi (compare_result
, op0
, op1b
));
24839 emit_insn (gen_stack_protect_testsi (compare_result
, op0
, op1b
));
24842 emit_insn (gen_rtx_SET (compare_result
,
24843 gen_rtx_COMPARE (comp_mode
, op0
, op1
)));
24846 /* Some kinds of FP comparisons need an OR operation;
24847 under flag_finite_math_only we don't bother. */
24848 if (FLOAT_MODE_P (mode
)
24849 && (!FLOAT128_IEEE_P (mode
) || TARGET_FLOAT128_HW
)
24850 && !flag_finite_math_only
24851 && !(TARGET_HARD_FLOAT
&& !TARGET_FPRS
)
24852 && (code
== LE
|| code
== GE
24853 || code
== UNEQ
|| code
== LTGT
24854 || code
== UNGT
|| code
== UNLT
))
24856 enum rtx_code or1
, or2
;
24857 rtx or1_rtx
, or2_rtx
, compare2_rtx
;
24858 rtx or_result
= gen_reg_rtx (CCEQmode
);
24862 case LE
: or1
= LT
; or2
= EQ
; break;
24863 case GE
: or1
= GT
; or2
= EQ
; break;
24864 case UNEQ
: or1
= UNORDERED
; or2
= EQ
; break;
24865 case LTGT
: or1
= LT
; or2
= GT
; break;
24866 case UNGT
: or1
= UNORDERED
; or2
= GT
; break;
24867 case UNLT
: or1
= UNORDERED
; or2
= LT
; break;
24868 default: gcc_unreachable ();
24870 validate_condition_mode (or1
, comp_mode
);
24871 validate_condition_mode (or2
, comp_mode
);
24872 or1_rtx
= gen_rtx_fmt_ee (or1
, SImode
, compare_result
, const0_rtx
);
24873 or2_rtx
= gen_rtx_fmt_ee (or2
, SImode
, compare_result
, const0_rtx
);
24874 compare2_rtx
= gen_rtx_COMPARE (CCEQmode
,
24875 gen_rtx_IOR (SImode
, or1_rtx
, or2_rtx
),
24877 emit_insn (gen_rtx_SET (or_result
, compare2_rtx
));
24879 compare_result
= or_result
;
24883 validate_condition_mode (code
, GET_MODE (compare_result
));
24885 return gen_rtx_fmt_ee (code
, VOIDmode
, compare_result
, const0_rtx
);
24889 /* Return the diagnostic message string if the binary operation OP is
24890 not permitted on TYPE1 and TYPE2, NULL otherwise. */
24893 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED
,
24897 machine_mode mode1
= TYPE_MODE (type1
);
24898 machine_mode mode2
= TYPE_MODE (type2
);
24900 /* For complex modes, use the inner type. */
24901 if (COMPLEX_MODE_P (mode1
))
24902 mode1
= GET_MODE_INNER (mode1
);
24904 if (COMPLEX_MODE_P (mode2
))
24905 mode2
= GET_MODE_INNER (mode2
);
24907 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
24908 double to intermix unless -mfloat128-convert. */
24909 if (mode1
== mode2
)
24912 if (!TARGET_FLOAT128_CVT
)
24914 if ((mode1
== KFmode
&& mode2
== IFmode
)
24915 || (mode1
== IFmode
&& mode2
== KFmode
))
24916 return N_("__float128 and __ibm128 cannot be used in the same "
24919 if (TARGET_IEEEQUAD
24920 && ((mode1
== IFmode
&& mode2
== TFmode
)
24921 || (mode1
== TFmode
&& mode2
== IFmode
)))
24922 return N_("__ibm128 and long double cannot be used in the same "
24925 if (!TARGET_IEEEQUAD
24926 && ((mode1
== KFmode
&& mode2
== TFmode
)
24927 || (mode1
== TFmode
&& mode2
== KFmode
)))
24928 return N_("__float128 and long double cannot be used in the same "
24936 /* Expand floating point conversion to/from __float128 and __ibm128. */
24939 rs6000_expand_float128_convert (rtx dest
, rtx src
, bool unsigned_p
)
24941 machine_mode dest_mode
= GET_MODE (dest
);
24942 machine_mode src_mode
= GET_MODE (src
);
24943 convert_optab cvt
= unknown_optab
;
24944 bool do_move
= false;
24945 rtx libfunc
= NULL_RTX
;
24947 typedef rtx (*rtx_2func_t
) (rtx
, rtx
);
24948 rtx_2func_t hw_convert
= (rtx_2func_t
)0;
24952 rtx_2func_t from_df
;
24953 rtx_2func_t from_sf
;
24954 rtx_2func_t from_si_sign
;
24955 rtx_2func_t from_si_uns
;
24956 rtx_2func_t from_di_sign
;
24957 rtx_2func_t from_di_uns
;
24960 rtx_2func_t to_si_sign
;
24961 rtx_2func_t to_si_uns
;
24962 rtx_2func_t to_di_sign
;
24963 rtx_2func_t to_di_uns
;
24964 } hw_conversions
[2] = {
24965 /* convertions to/from KFmode */
24967 gen_extenddfkf2_hw
, /* KFmode <- DFmode. */
24968 gen_extendsfkf2_hw
, /* KFmode <- SFmode. */
24969 gen_float_kfsi2_hw
, /* KFmode <- SImode (signed). */
24970 gen_floatuns_kfsi2_hw
, /* KFmode <- SImode (unsigned). */
24971 gen_float_kfdi2_hw
, /* KFmode <- DImode (signed). */
24972 gen_floatuns_kfdi2_hw
, /* KFmode <- DImode (unsigned). */
24973 gen_trunckfdf2_hw
, /* DFmode <- KFmode. */
24974 gen_trunckfsf2_hw
, /* SFmode <- KFmode. */
24975 gen_fix_kfsi2_hw
, /* SImode <- KFmode (signed). */
24976 gen_fixuns_kfsi2_hw
, /* SImode <- KFmode (unsigned). */
24977 gen_fix_kfdi2_hw
, /* DImode <- KFmode (signed). */
24978 gen_fixuns_kfdi2_hw
, /* DImode <- KFmode (unsigned). */
24981 /* convertions to/from TFmode */
24983 gen_extenddftf2_hw
, /* TFmode <- DFmode. */
24984 gen_extendsftf2_hw
, /* TFmode <- SFmode. */
24985 gen_float_tfsi2_hw
, /* TFmode <- SImode (signed). */
24986 gen_floatuns_tfsi2_hw
, /* TFmode <- SImode (unsigned). */
24987 gen_float_tfdi2_hw
, /* TFmode <- DImode (signed). */
24988 gen_floatuns_tfdi2_hw
, /* TFmode <- DImode (unsigned). */
24989 gen_trunctfdf2_hw
, /* DFmode <- TFmode. */
24990 gen_trunctfsf2_hw
, /* SFmode <- TFmode. */
24991 gen_fix_tfsi2_hw
, /* SImode <- TFmode (signed). */
24992 gen_fixuns_tfsi2_hw
, /* SImode <- TFmode (unsigned). */
24993 gen_fix_tfdi2_hw
, /* DImode <- TFmode (signed). */
24994 gen_fixuns_tfdi2_hw
, /* DImode <- TFmode (unsigned). */
24998 if (dest_mode
== src_mode
)
24999 gcc_unreachable ();
25001 /* Eliminate memory operations. */
25003 src
= force_reg (src_mode
, src
);
25007 rtx tmp
= gen_reg_rtx (dest_mode
);
25008 rs6000_expand_float128_convert (tmp
, src
, unsigned_p
);
25009 rs6000_emit_move (dest
, tmp
, dest_mode
);
25013 /* Convert to IEEE 128-bit floating point. */
25014 if (FLOAT128_IEEE_P (dest_mode
))
25016 if (dest_mode
== KFmode
)
25018 else if (dest_mode
== TFmode
)
25021 gcc_unreachable ();
25027 hw_convert
= hw_conversions
[kf_or_tf
].from_df
;
25032 hw_convert
= hw_conversions
[kf_or_tf
].from_sf
;
25038 if (FLOAT128_IBM_P (src_mode
))
25047 cvt
= ufloat_optab
;
25048 hw_convert
= hw_conversions
[kf_or_tf
].from_si_uns
;
25052 cvt
= sfloat_optab
;
25053 hw_convert
= hw_conversions
[kf_or_tf
].from_si_sign
;
25060 cvt
= ufloat_optab
;
25061 hw_convert
= hw_conversions
[kf_or_tf
].from_di_uns
;
25065 cvt
= sfloat_optab
;
25066 hw_convert
= hw_conversions
[kf_or_tf
].from_di_sign
;
25071 gcc_unreachable ();
25075 /* Convert from IEEE 128-bit floating point. */
25076 else if (FLOAT128_IEEE_P (src_mode
))
25078 if (src_mode
== KFmode
)
25080 else if (src_mode
== TFmode
)
25083 gcc_unreachable ();
25089 hw_convert
= hw_conversions
[kf_or_tf
].to_df
;
25094 hw_convert
= hw_conversions
[kf_or_tf
].to_sf
;
25100 if (FLOAT128_IBM_P (dest_mode
))
25110 hw_convert
= hw_conversions
[kf_or_tf
].to_si_uns
;
25115 hw_convert
= hw_conversions
[kf_or_tf
].to_si_sign
;
25123 hw_convert
= hw_conversions
[kf_or_tf
].to_di_uns
;
25128 hw_convert
= hw_conversions
[kf_or_tf
].to_di_sign
;
25133 gcc_unreachable ();
25137 /* Both IBM format. */
25138 else if (FLOAT128_IBM_P (dest_mode
) && FLOAT128_IBM_P (src_mode
))
25142 gcc_unreachable ();
25144 /* Handle conversion between TFmode/KFmode. */
25146 emit_move_insn (dest
, gen_lowpart (dest_mode
, src
));
25148 /* Handle conversion if we have hardware support. */
25149 else if (TARGET_FLOAT128_HW
&& hw_convert
)
25150 emit_insn ((hw_convert
) (dest
, src
));
25152 /* Call an external function to do the conversion. */
25153 else if (cvt
!= unknown_optab
)
25155 libfunc
= convert_optab_libfunc (cvt
, dest_mode
, src_mode
);
25156 gcc_assert (libfunc
!= NULL_RTX
);
25158 dest2
= emit_library_call_value (libfunc
, dest
, LCT_CONST
, dest_mode
,
25161 gcc_assert (dest2
!= NULL_RTX
);
25162 if (!rtx_equal_p (dest
, dest2
))
25163 emit_move_insn (dest
, dest2
);
25167 gcc_unreachable ();
25173 /* Emit the RTL for an sISEL pattern. */
25176 rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED
, rtx operands
[])
25178 rs6000_emit_int_cmove (operands
[0], operands
[1], const1_rtx
, const0_rtx
);
25181 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
25182 can be used as that dest register. Return the dest register. */
25185 rs6000_emit_eqne (machine_mode mode
, rtx op1
, rtx op2
, rtx scratch
)
25187 if (op2
== const0_rtx
)
25190 if (GET_CODE (scratch
) == SCRATCH
)
25191 scratch
= gen_reg_rtx (mode
);
25193 if (logical_operand (op2
, mode
))
25194 emit_insn (gen_rtx_SET (scratch
, gen_rtx_XOR (mode
, op1
, op2
)));
25196 emit_insn (gen_rtx_SET (scratch
,
25197 gen_rtx_PLUS (mode
, op1
, negate_rtx (mode
, op2
))));
25203 rs6000_emit_sCOND (machine_mode mode
, rtx operands
[])
25206 machine_mode op_mode
;
25207 enum rtx_code cond_code
;
25208 rtx result
= operands
[0];
25210 condition_rtx
= rs6000_generate_compare (operands
[1], mode
);
25211 cond_code
= GET_CODE (condition_rtx
);
25213 if (FLOAT_MODE_P (mode
)
25214 && !TARGET_FPRS
&& TARGET_HARD_FLOAT
)
25218 PUT_MODE (condition_rtx
, SImode
);
25219 t
= XEXP (condition_rtx
, 0);
25221 gcc_assert (cond_code
== NE
|| cond_code
== EQ
);
25223 if (cond_code
== NE
)
25224 emit_insn (gen_e500_flip_gt_bit (t
, t
));
25226 emit_insn (gen_move_from_CR_gt_bit (result
, t
));
25230 if (cond_code
== NE
25231 || cond_code
== GE
|| cond_code
== LE
25232 || cond_code
== GEU
|| cond_code
== LEU
25233 || cond_code
== ORDERED
|| cond_code
== UNGE
|| cond_code
== UNLE
)
25235 rtx not_result
= gen_reg_rtx (CCEQmode
);
25236 rtx not_op
, rev_cond_rtx
;
25237 machine_mode cc_mode
;
25239 cc_mode
= GET_MODE (XEXP (condition_rtx
, 0));
25241 rev_cond_rtx
= gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode
, cond_code
),
25242 SImode
, XEXP (condition_rtx
, 0), const0_rtx
);
25243 not_op
= gen_rtx_COMPARE (CCEQmode
, rev_cond_rtx
, const0_rtx
);
25244 emit_insn (gen_rtx_SET (not_result
, not_op
));
25245 condition_rtx
= gen_rtx_EQ (VOIDmode
, not_result
, const0_rtx
);
25248 op_mode
= GET_MODE (XEXP (operands
[1], 0));
25249 if (op_mode
== VOIDmode
)
25250 op_mode
= GET_MODE (XEXP (operands
[1], 1));
25252 if (TARGET_POWERPC64
&& (op_mode
== DImode
|| FLOAT_MODE_P (mode
)))
25254 PUT_MODE (condition_rtx
, DImode
);
25255 convert_move (result
, condition_rtx
, 0);
25259 PUT_MODE (condition_rtx
, SImode
);
25260 emit_insn (gen_rtx_SET (result
, condition_rtx
));
25264 /* Emit a branch of kind CODE to location LOC. */
25267 rs6000_emit_cbranch (machine_mode mode
, rtx operands
[])
25269 rtx condition_rtx
, loc_ref
;
25271 condition_rtx
= rs6000_generate_compare (operands
[0], mode
);
25272 loc_ref
= gen_rtx_LABEL_REF (VOIDmode
, operands
[3]);
25273 emit_jump_insn (gen_rtx_SET (pc_rtx
,
25274 gen_rtx_IF_THEN_ELSE (VOIDmode
, condition_rtx
,
25275 loc_ref
, pc_rtx
)));
25278 /* Return the string to output a conditional branch to LABEL, which is
25279 the operand template of the label, or NULL if the branch is really a
25280 conditional return.
25282 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
25283 condition code register and its mode specifies what kind of
25284 comparison we made.
25286 REVERSED is nonzero if we should reverse the sense of the comparison.
25288 INSN is the insn. */
25291 output_cbranch (rtx op
, const char *label
, int reversed
, rtx_insn
*insn
)
25293 static char string
[64];
25294 enum rtx_code code
= GET_CODE (op
);
25295 rtx cc_reg
= XEXP (op
, 0);
25296 machine_mode mode
= GET_MODE (cc_reg
);
25297 int cc_regno
= REGNO (cc_reg
) - CR0_REGNO
;
25298 int need_longbranch
= label
!= NULL
&& get_attr_length (insn
) == 8;
25299 int really_reversed
= reversed
^ need_longbranch
;
25305 validate_condition_mode (code
, mode
);
25307 /* Work out which way this really branches. We could use
25308 reverse_condition_maybe_unordered here always but this
25309 makes the resulting assembler clearer. */
25310 if (really_reversed
)
25312 /* Reversal of FP compares takes care -- an ordered compare
25313 becomes an unordered compare and vice versa. */
25314 if (mode
== CCFPmode
)
25315 code
= reverse_condition_maybe_unordered (code
);
25317 code
= reverse_condition (code
);
25320 if ((!TARGET_FPRS
&& TARGET_HARD_FLOAT
) && mode
== CCFPmode
)
25322 /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
25327 /* Opposite of GT. */
25336 gcc_unreachable ();
25342 /* Not all of these are actually distinct opcodes, but
25343 we distinguish them for clarity of the resulting assembler. */
25344 case NE
: case LTGT
:
25345 ccode
= "ne"; break;
25346 case EQ
: case UNEQ
:
25347 ccode
= "eq"; break;
25349 ccode
= "ge"; break;
25350 case GT
: case GTU
: case UNGT
:
25351 ccode
= "gt"; break;
25353 ccode
= "le"; break;
25354 case LT
: case LTU
: case UNLT
:
25355 ccode
= "lt"; break;
25356 case UNORDERED
: ccode
= "un"; break;
25357 case ORDERED
: ccode
= "nu"; break;
25358 case UNGE
: ccode
= "nl"; break;
25359 case UNLE
: ccode
= "ng"; break;
25361 gcc_unreachable ();
25364 /* Maybe we have a guess as to how likely the branch is. */
25366 note
= find_reg_note (insn
, REG_BR_PROB
, NULL_RTX
);
25367 if (note
!= NULL_RTX
)
25369 /* PROB is the difference from 50%. */
25370 int prob
= profile_probability::from_reg_br_prob_note (XINT (note
, 0))
25371 .to_reg_br_prob_base () - REG_BR_PROB_BASE
/ 2;
25373 /* Only hint for highly probable/improbable branches on newer cpus when
25374 we have real profile data, as static prediction overrides processor
25375 dynamic prediction. For older cpus we may as well always hint, but
25376 assume not taken for branches that are very close to 50% as a
25377 mispredicted taken branch is more expensive than a
25378 mispredicted not-taken branch. */
25379 if (rs6000_always_hint
25380 || (abs (prob
) > REG_BR_PROB_BASE
/ 100 * 48
25381 && (profile_status_for_fn (cfun
) != PROFILE_GUESSED
)
25382 && br_prob_note_reliable_p (note
)))
25384 if (abs (prob
) > REG_BR_PROB_BASE
/ 20
25385 && ((prob
> 0) ^ need_longbranch
))
25393 s
+= sprintf (s
, "b%slr%s ", ccode
, pred
);
25395 s
+= sprintf (s
, "b%s%s ", ccode
, pred
);
25397 /* We need to escape any '%' characters in the reg_names string.
25398 Assume they'd only be the first character.... */
25399 if (reg_names
[cc_regno
+ CR0_REGNO
][0] == '%')
25401 s
+= sprintf (s
, "%s", reg_names
[cc_regno
+ CR0_REGNO
]);
25405 /* If the branch distance was too far, we may have to use an
25406 unconditional branch to go the distance. */
25407 if (need_longbranch
)
25408 s
+= sprintf (s
, ",$+8\n\tb %s", label
);
25410 s
+= sprintf (s
, ",%s", label
);
25416 /* Return the string to flip the GT bit on a CR. */
25418 output_e500_flip_gt_bit (rtx dst
, rtx src
)
25420 static char string
[64];
25423 gcc_assert (GET_CODE (dst
) == REG
&& CR_REGNO_P (REGNO (dst
))
25424 && GET_CODE (src
) == REG
&& CR_REGNO_P (REGNO (src
)));
25427 a
= 4 * (REGNO (dst
) - CR0_REGNO
) + 1;
25428 b
= 4 * (REGNO (src
) - CR0_REGNO
) + 1;
25430 sprintf (string
, "crnot %d,%d", a
, b
);
25434 /* Return insn for VSX or Altivec comparisons. */
25437 rs6000_emit_vector_compare_inner (enum rtx_code code
, rtx op0
, rtx op1
)
25440 machine_mode mode
= GET_MODE (op0
);
25448 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
25459 mask
= gen_reg_rtx (mode
);
25460 emit_insn (gen_rtx_SET (mask
, gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
25467 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
25468 DMODE is expected destination mode. This is a recursive function. */
25471 rs6000_emit_vector_compare (enum rtx_code rcode
,
25473 machine_mode dmode
)
25476 bool swap_operands
= false;
25477 bool try_again
= false;
25479 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode
));
25480 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
25482 /* See if the comparison works as is. */
25483 mask
= rs6000_emit_vector_compare_inner (rcode
, op0
, op1
);
25491 swap_operands
= true;
25496 swap_operands
= true;
25504 /* Invert condition and try again.
25505 e.g., A != B becomes ~(A==B). */
25507 enum rtx_code rev_code
;
25508 enum insn_code nor_code
;
25511 rev_code
= reverse_condition_maybe_unordered (rcode
);
25512 if (rev_code
== UNKNOWN
)
25515 nor_code
= optab_handler (one_cmpl_optab
, dmode
);
25516 if (nor_code
== CODE_FOR_nothing
)
25519 mask2
= rs6000_emit_vector_compare (rev_code
, op0
, op1
, dmode
);
25523 mask
= gen_reg_rtx (dmode
);
25524 emit_insn (GEN_FCN (nor_code
) (mask
, mask2
));
25532 /* Try GT/GTU/LT/LTU OR EQ */
25535 enum insn_code ior_code
;
25536 enum rtx_code new_code
;
25557 gcc_unreachable ();
25560 ior_code
= optab_handler (ior_optab
, dmode
);
25561 if (ior_code
== CODE_FOR_nothing
)
25564 c_rtx
= rs6000_emit_vector_compare (new_code
, op0
, op1
, dmode
);
25568 eq_rtx
= rs6000_emit_vector_compare (EQ
, op0
, op1
, dmode
);
25572 mask
= gen_reg_rtx (dmode
);
25573 emit_insn (GEN_FCN (ior_code
) (mask
, c_rtx
, eq_rtx
));
25584 std::swap (op0
, op1
);
25586 mask
= rs6000_emit_vector_compare_inner (rcode
, op0
, op1
);
25591 /* You only get two chances. */
25595 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
25596 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
25597 operands for the relation operation COND. */
25600 rs6000_emit_vector_cond_expr (rtx dest
, rtx op_true
, rtx op_false
,
25601 rtx cond
, rtx cc_op0
, rtx cc_op1
)
25603 machine_mode dest_mode
= GET_MODE (dest
);
25604 machine_mode mask_mode
= GET_MODE (cc_op0
);
25605 enum rtx_code rcode
= GET_CODE (cond
);
25606 machine_mode cc_mode
= CCmode
;
25609 bool invert_move
= false;
25611 if (VECTOR_UNIT_NONE_P (dest_mode
))
25614 gcc_assert (GET_MODE_SIZE (dest_mode
) == GET_MODE_SIZE (mask_mode
)
25615 && GET_MODE_NUNITS (dest_mode
) == GET_MODE_NUNITS (mask_mode
));
25619 /* Swap operands if we can, and fall back to doing the operation as
25620 specified, and doing a NOR to invert the test. */
25626 /* Invert condition and try again.
25627 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
25628 invert_move
= true;
25629 rcode
= reverse_condition_maybe_unordered (rcode
);
25630 if (rcode
== UNKNOWN
)
25636 if (GET_MODE_CLASS (mask_mode
) == MODE_VECTOR_INT
)
25638 /* Invert condition to avoid compound test. */
25639 invert_move
= true;
25640 rcode
= reverse_condition (rcode
);
25648 /* Mark unsigned tests with CCUNSmode. */
25649 cc_mode
= CCUNSmode
;
25651 /* Invert condition to avoid compound test if necessary. */
25652 if (rcode
== GEU
|| rcode
== LEU
)
25654 invert_move
= true;
25655 rcode
= reverse_condition (rcode
);
25663 /* Get the vector mask for the given relational operations. */
25664 mask
= rs6000_emit_vector_compare (rcode
, cc_op0
, cc_op1
, mask_mode
);
25670 std::swap (op_true
, op_false
);
25672 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
25673 if (GET_MODE_CLASS (dest_mode
) == MODE_VECTOR_INT
25674 && (GET_CODE (op_true
) == CONST_VECTOR
25675 || GET_CODE (op_false
) == CONST_VECTOR
))
25677 rtx constant_0
= CONST0_RTX (dest_mode
);
25678 rtx constant_m1
= CONSTM1_RTX (dest_mode
);
25680 if (op_true
== constant_m1
&& op_false
== constant_0
)
25682 emit_move_insn (dest
, mask
);
25686 else if (op_true
== constant_0
&& op_false
== constant_m1
)
25688 emit_insn (gen_rtx_SET (dest
, gen_rtx_NOT (dest_mode
, mask
)));
25692 /* If we can't use the vector comparison directly, perhaps we can use
25693 the mask for the true or false fields, instead of loading up a
25695 if (op_true
== constant_m1
)
25698 if (op_false
== constant_0
)
25702 if (!REG_P (op_true
) && !SUBREG_P (op_true
))
25703 op_true
= force_reg (dest_mode
, op_true
);
25705 if (!REG_P (op_false
) && !SUBREG_P (op_false
))
25706 op_false
= force_reg (dest_mode
, op_false
);
25708 cond2
= gen_rtx_fmt_ee (NE
, cc_mode
, gen_lowpart (dest_mode
, mask
),
25709 CONST0_RTX (dest_mode
));
25710 emit_insn (gen_rtx_SET (dest
,
25711 gen_rtx_IF_THEN_ELSE (dest_mode
,
25718 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
25719 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
25720 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
25721 hardware has no such operation. */
25724 rs6000_emit_p9_fp_minmax (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
25726 enum rtx_code code
= GET_CODE (op
);
25727 rtx op0
= XEXP (op
, 0);
25728 rtx op1
= XEXP (op
, 1);
25729 machine_mode compare_mode
= GET_MODE (op0
);
25730 machine_mode result_mode
= GET_MODE (dest
);
25731 bool max_p
= false;
25733 if (result_mode
!= compare_mode
)
25736 if (code
== GE
|| code
== GT
)
25738 else if (code
== LE
|| code
== LT
)
25743 if (rtx_equal_p (op0
, true_cond
) && rtx_equal_p (op1
, false_cond
))
25746 else if (rtx_equal_p (op1
, true_cond
) && rtx_equal_p (op0
, false_cond
))
25752 rs6000_emit_minmax (dest
, max_p
? SMAX
: SMIN
, op0
, op1
);
25756 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
25757 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
25758 operands of the last comparison is nonzero/true, FALSE_COND if it is
25759 zero/false. Return 0 if the hardware has no such operation. */
25762 rs6000_emit_p9_fp_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
25764 enum rtx_code code
= GET_CODE (op
);
25765 rtx op0
= XEXP (op
, 0);
25766 rtx op1
= XEXP (op
, 1);
25767 machine_mode result_mode
= GET_MODE (dest
);
25772 if (!can_create_pseudo_p ())
25785 code
= swap_condition (code
);
25786 std::swap (op0
, op1
);
25793 /* Generate: [(parallel [(set (dest)
25794 (if_then_else (op (cmp1) (cmp2))
25797 (clobber (scratch))])]. */
25799 compare_rtx
= gen_rtx_fmt_ee (code
, CCFPmode
, op0
, op1
);
25800 cmove_rtx
= gen_rtx_SET (dest
,
25801 gen_rtx_IF_THEN_ELSE (result_mode
,
25806 clobber_rtx
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (V2DImode
));
25807 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
25808 gen_rtvec (2, cmove_rtx
, clobber_rtx
)));
25813 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
25814 operands of the last comparison is nonzero/true, FALSE_COND if it
25815 is zero/false. Return 0 if the hardware has no such operation. */
25818 rs6000_emit_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
25820 enum rtx_code code
= GET_CODE (op
);
25821 rtx op0
= XEXP (op
, 0);
25822 rtx op1
= XEXP (op
, 1);
25823 machine_mode compare_mode
= GET_MODE (op0
);
25824 machine_mode result_mode
= GET_MODE (dest
);
25826 bool is_against_zero
;
25828 /* These modes should always match. */
25829 if (GET_MODE (op1
) != compare_mode
25830 /* In the isel case however, we can use a compare immediate, so
25831 op1 may be a small constant. */
25832 && (!TARGET_ISEL
|| !short_cint_operand (op1
, VOIDmode
)))
25834 if (GET_MODE (true_cond
) != result_mode
)
25836 if (GET_MODE (false_cond
) != result_mode
)
25839 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
25840 if (TARGET_P9_MINMAX
25841 && (compare_mode
== SFmode
|| compare_mode
== DFmode
)
25842 && (result_mode
== SFmode
|| result_mode
== DFmode
))
25844 if (rs6000_emit_p9_fp_minmax (dest
, op
, true_cond
, false_cond
))
25847 if (rs6000_emit_p9_fp_cmove (dest
, op
, true_cond
, false_cond
))
25851 /* Don't allow using floating point comparisons for integer results for
25853 if (FLOAT_MODE_P (compare_mode
) && !FLOAT_MODE_P (result_mode
))
25856 /* First, work out if the hardware can do this at all, or
25857 if it's too slow.... */
25858 if (!FLOAT_MODE_P (compare_mode
))
25861 return rs6000_emit_int_cmove (dest
, op
, true_cond
, false_cond
);
25864 else if (TARGET_HARD_FLOAT
&& !TARGET_FPRS
25865 && SCALAR_FLOAT_MODE_P (compare_mode
))
25868 is_against_zero
= op1
== CONST0_RTX (compare_mode
);
25870 /* A floating-point subtract might overflow, underflow, or produce
25871 an inexact result, thus changing the floating-point flags, so it
25872 can't be generated if we care about that. It's safe if one side
25873 of the construct is zero, since then no subtract will be
25875 if (SCALAR_FLOAT_MODE_P (compare_mode
)
25876 && flag_trapping_math
&& ! is_against_zero
)
25879 /* Eliminate half of the comparisons by switching operands, this
25880 makes the remaining code simpler. */
25881 if (code
== UNLT
|| code
== UNGT
|| code
== UNORDERED
|| code
== NE
25882 || code
== LTGT
|| code
== LT
|| code
== UNLE
)
25884 code
= reverse_condition_maybe_unordered (code
);
25886 true_cond
= false_cond
;
25890 /* UNEQ and LTGT take four instructions for a comparison with zero,
25891 it'll probably be faster to use a branch here too. */
25892 if (code
== UNEQ
&& HONOR_NANS (compare_mode
))
25895 /* We're going to try to implement comparisons by performing
25896 a subtract, then comparing against zero. Unfortunately,
25897 Inf - Inf is NaN which is not zero, and so if we don't
25898 know that the operand is finite and the comparison
25899 would treat EQ different to UNORDERED, we can't do it. */
25900 if (HONOR_INFINITIES (compare_mode
)
25901 && code
!= GT
&& code
!= UNGE
25902 && (GET_CODE (op1
) != CONST_DOUBLE
25903 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1
)))
25904 /* Constructs of the form (a OP b ? a : b) are safe. */
25905 && ((! rtx_equal_p (op0
, false_cond
) && ! rtx_equal_p (op1
, false_cond
))
25906 || (! rtx_equal_p (op0
, true_cond
)
25907 && ! rtx_equal_p (op1
, true_cond
))))
25910 /* At this point we know we can use fsel. */
25912 /* Reduce the comparison to a comparison against zero. */
25913 if (! is_against_zero
)
25915 temp
= gen_reg_rtx (compare_mode
);
25916 emit_insn (gen_rtx_SET (temp
, gen_rtx_MINUS (compare_mode
, op0
, op1
)));
25918 op1
= CONST0_RTX (compare_mode
);
25921 /* If we don't care about NaNs we can reduce some of the comparisons
25922 down to faster ones. */
25923 if (! HONOR_NANS (compare_mode
))
25929 true_cond
= false_cond
;
25942 /* Now, reduce everything down to a GE. */
25949 temp
= gen_reg_rtx (compare_mode
);
25950 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
25955 temp
= gen_reg_rtx (compare_mode
);
25956 emit_insn (gen_rtx_SET (temp
, gen_rtx_ABS (compare_mode
, op0
)));
25961 temp
= gen_reg_rtx (compare_mode
);
25962 emit_insn (gen_rtx_SET (temp
,
25963 gen_rtx_NEG (compare_mode
,
25964 gen_rtx_ABS (compare_mode
, op0
))));
25969 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
25970 temp
= gen_reg_rtx (result_mode
);
25971 emit_insn (gen_rtx_SET (temp
,
25972 gen_rtx_IF_THEN_ELSE (result_mode
,
25973 gen_rtx_GE (VOIDmode
,
25975 true_cond
, false_cond
)));
25976 false_cond
= true_cond
;
25979 temp
= gen_reg_rtx (compare_mode
);
25980 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
25985 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
25986 temp
= gen_reg_rtx (result_mode
);
25987 emit_insn (gen_rtx_SET (temp
,
25988 gen_rtx_IF_THEN_ELSE (result_mode
,
25989 gen_rtx_GE (VOIDmode
,
25991 true_cond
, false_cond
)));
25992 true_cond
= false_cond
;
25995 temp
= gen_reg_rtx (compare_mode
);
25996 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
26001 gcc_unreachable ();
26004 emit_insn (gen_rtx_SET (dest
,
26005 gen_rtx_IF_THEN_ELSE (result_mode
,
26006 gen_rtx_GE (VOIDmode
,
26008 true_cond
, false_cond
)));
26012 /* Same as above, but for ints (isel). */
26015 rs6000_emit_int_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
26017 rtx condition_rtx
, cr
;
26018 machine_mode mode
= GET_MODE (dest
);
26019 enum rtx_code cond_code
;
26020 rtx (*isel_func
) (rtx
, rtx
, rtx
, rtx
, rtx
);
26023 if (mode
!= SImode
&& (!TARGET_POWERPC64
|| mode
!= DImode
))
26026 /* We still have to do the compare, because isel doesn't do a
26027 compare, it just looks at the CRx bits set by a previous compare
26029 condition_rtx
= rs6000_generate_compare (op
, mode
);
26030 cond_code
= GET_CODE (condition_rtx
);
26031 cr
= XEXP (condition_rtx
, 0);
26032 signedp
= GET_MODE (cr
) == CCmode
;
26034 isel_func
= (mode
== SImode
26035 ? (signedp
? gen_isel_signed_si
: gen_isel_unsigned_si
)
26036 : (signedp
? gen_isel_signed_di
: gen_isel_unsigned_di
));
26040 case LT
: case GT
: case LTU
: case GTU
: case EQ
:
26041 /* isel handles these directly. */
26045 /* We need to swap the sense of the comparison. */
26047 std::swap (false_cond
, true_cond
);
26048 PUT_CODE (condition_rtx
, reverse_condition (cond_code
));
26053 false_cond
= force_reg (mode
, false_cond
);
26054 if (true_cond
!= const0_rtx
)
26055 true_cond
= force_reg (mode
, true_cond
);
26057 emit_insn (isel_func (dest
, condition_rtx
, true_cond
, false_cond
, cr
));
26063 output_isel (rtx
*operands
)
26065 enum rtx_code code
;
26067 code
= GET_CODE (operands
[1]);
26069 if (code
== GE
|| code
== GEU
|| code
== LE
|| code
== LEU
|| code
== NE
)
26071 gcc_assert (GET_CODE (operands
[2]) == REG
26072 && GET_CODE (operands
[3]) == REG
);
26073 PUT_CODE (operands
[1], reverse_condition (code
));
26074 return "isel %0,%3,%2,%j1";
26077 return "isel %0,%2,%3,%j1";
26081 rs6000_emit_minmax (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
26083 machine_mode mode
= GET_MODE (op0
);
26087 /* VSX/altivec have direct min/max insns. */
26088 if ((code
== SMAX
|| code
== SMIN
)
26089 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode
)
26090 || (mode
== SFmode
&& VECTOR_UNIT_VSX_P (DFmode
))))
26092 emit_insn (gen_rtx_SET (dest
, gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
26096 if (code
== SMAX
|| code
== SMIN
)
26101 if (code
== SMAX
|| code
== UMAX
)
26102 target
= emit_conditional_move (dest
, c
, op0
, op1
, mode
,
26103 op0
, op1
, mode
, 0);
26105 target
= emit_conditional_move (dest
, c
, op0
, op1
, mode
,
26106 op1
, op0
, mode
, 0);
26107 gcc_assert (target
);
26108 if (target
!= dest
)
26109 emit_move_insn (dest
, target
);
26112 /* Split a signbit operation on 64-bit machines with direct move. Also allow
26113 for the value to come from memory or if it is already loaded into a GPR. */
26116 rs6000_split_signbit (rtx dest
, rtx src
)
26118 machine_mode d_mode
= GET_MODE (dest
);
26119 machine_mode s_mode
= GET_MODE (src
);
26120 rtx dest_di
= (d_mode
== DImode
) ? dest
: gen_lowpart (DImode
, dest
);
26121 rtx shift_reg
= dest_di
;
26123 gcc_assert (FLOAT128_IEEE_P (s_mode
) && TARGET_POWERPC64
);
26127 rtx mem
= (WORDS_BIG_ENDIAN
26128 ? adjust_address (src
, DImode
, 0)
26129 : adjust_address (src
, DImode
, 8));
26130 emit_insn (gen_rtx_SET (dest_di
, mem
));
26135 unsigned int r
= reg_or_subregno (src
);
26137 if (INT_REGNO_P (r
))
26138 shift_reg
= gen_rtx_REG (DImode
, r
+ (BYTES_BIG_ENDIAN
== 0));
26142 /* Generate the special mfvsrd instruction to get it in a GPR. */
26143 gcc_assert (VSX_REGNO_P (r
));
26144 if (s_mode
== KFmode
)
26145 emit_insn (gen_signbitkf2_dm2 (dest_di
, src
));
26147 emit_insn (gen_signbittf2_dm2 (dest_di
, src
));
26151 emit_insn (gen_lshrdi3 (dest_di
, shift_reg
, GEN_INT (63)));
26155 /* A subroutine of the atomic operation splitters. Jump to LABEL if
26156 COND is true. Mark the jump as unlikely to be taken. */
26159 emit_unlikely_jump (rtx cond
, rtx label
)
26161 rtx x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, cond
, label
, pc_rtx
);
26162 rtx_insn
*insn
= emit_jump_insn (gen_rtx_SET (pc_rtx
, x
));
26163 add_reg_br_prob_note (insn
, profile_probability::very_unlikely ());
26166 /* A subroutine of the atomic operation splitters. Emit a load-locked
26167 instruction in MODE. For QI/HImode, possibly use a pattern than includes
26168 the zero_extend operation. */
26171 emit_load_locked (machine_mode mode
, rtx reg
, rtx mem
)
26173 rtx (*fn
) (rtx
, rtx
) = NULL
;
26178 fn
= gen_load_lockedqi
;
26181 fn
= gen_load_lockedhi
;
26184 if (GET_MODE (mem
) == QImode
)
26185 fn
= gen_load_lockedqi_si
;
26186 else if (GET_MODE (mem
) == HImode
)
26187 fn
= gen_load_lockedhi_si
;
26189 fn
= gen_load_lockedsi
;
26192 fn
= gen_load_lockeddi
;
26195 fn
= gen_load_lockedti
;
26198 gcc_unreachable ();
26200 emit_insn (fn (reg
, mem
));
26203 /* A subroutine of the atomic operation splitters. Emit a store-conditional
26204 instruction in MODE. */
26207 emit_store_conditional (machine_mode mode
, rtx res
, rtx mem
, rtx val
)
26209 rtx (*fn
) (rtx
, rtx
, rtx
) = NULL
;
26214 fn
= gen_store_conditionalqi
;
26217 fn
= gen_store_conditionalhi
;
26220 fn
= gen_store_conditionalsi
;
26223 fn
= gen_store_conditionaldi
;
26226 fn
= gen_store_conditionalti
;
26229 gcc_unreachable ();
26232 /* Emit sync before stwcx. to address PPC405 Erratum. */
26233 if (PPC405_ERRATUM77
)
26234 emit_insn (gen_hwsync ());
26236 emit_insn (fn (res
, mem
, val
));
26239 /* Expand barriers before and after a load_locked/store_cond sequence. */
26242 rs6000_pre_atomic_barrier (rtx mem
, enum memmodel model
)
26244 rtx addr
= XEXP (mem
, 0);
26245 int strict_p
= (reload_in_progress
|| reload_completed
);
26247 if (!legitimate_indirect_address_p (addr
, strict_p
)
26248 && !legitimate_indexed_address_p (addr
, strict_p
))
26250 addr
= force_reg (Pmode
, addr
);
26251 mem
= replace_equiv_address_nv (mem
, addr
);
26256 case MEMMODEL_RELAXED
:
26257 case MEMMODEL_CONSUME
:
26258 case MEMMODEL_ACQUIRE
:
26260 case MEMMODEL_RELEASE
:
26261 case MEMMODEL_ACQ_REL
:
26262 emit_insn (gen_lwsync ());
26264 case MEMMODEL_SEQ_CST
:
26265 emit_insn (gen_hwsync ());
26268 gcc_unreachable ();
26274 rs6000_post_atomic_barrier (enum memmodel model
)
26278 case MEMMODEL_RELAXED
:
26279 case MEMMODEL_CONSUME
:
26280 case MEMMODEL_RELEASE
:
26282 case MEMMODEL_ACQUIRE
:
26283 case MEMMODEL_ACQ_REL
:
26284 case MEMMODEL_SEQ_CST
:
26285 emit_insn (gen_isync ());
26288 gcc_unreachable ();
26292 /* A subroutine of the various atomic expanders. For sub-word operations,
26293 we must adjust things to operate on SImode. Given the original MEM,
26294 return a new aligned memory. Also build and return the quantities by
26295 which to shift and mask. */
26298 rs6000_adjust_atomic_subword (rtx orig_mem
, rtx
*pshift
, rtx
*pmask
)
26300 rtx addr
, align
, shift
, mask
, mem
;
26301 HOST_WIDE_INT shift_mask
;
26302 machine_mode mode
= GET_MODE (orig_mem
);
26304 /* For smaller modes, we have to implement this via SImode. */
26305 shift_mask
= (mode
== QImode
? 0x18 : 0x10);
26307 addr
= XEXP (orig_mem
, 0);
26308 addr
= force_reg (GET_MODE (addr
), addr
);
26310 /* Aligned memory containing subword. Generate a new memory. We
26311 do not want any of the existing MEM_ATTR data, as we're now
26312 accessing memory outside the original object. */
26313 align
= expand_simple_binop (Pmode
, AND
, addr
, GEN_INT (-4),
26314 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
26315 mem
= gen_rtx_MEM (SImode
, align
);
26316 MEM_VOLATILE_P (mem
) = MEM_VOLATILE_P (orig_mem
);
26317 if (MEM_ALIAS_SET (orig_mem
) == ALIAS_SET_MEMORY_BARRIER
)
26318 set_mem_alias_set (mem
, ALIAS_SET_MEMORY_BARRIER
);
26320 /* Shift amount for subword relative to aligned word. */
26321 shift
= gen_reg_rtx (SImode
);
26322 addr
= gen_lowpart (SImode
, addr
);
26323 rtx tmp
= gen_reg_rtx (SImode
);
26324 emit_insn (gen_ashlsi3 (tmp
, addr
, GEN_INT (3)));
26325 emit_insn (gen_andsi3 (shift
, tmp
, GEN_INT (shift_mask
)));
26326 if (BYTES_BIG_ENDIAN
)
26327 shift
= expand_simple_binop (SImode
, XOR
, shift
, GEN_INT (shift_mask
),
26328 shift
, 1, OPTAB_LIB_WIDEN
);
26331 /* Mask for insertion. */
26332 mask
= expand_simple_binop (SImode
, ASHIFT
, GEN_INT (GET_MODE_MASK (mode
)),
26333 shift
, NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
26339 /* A subroutine of the various atomic expanders. For sub-word operands,
26340 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
26343 rs6000_mask_atomic_subword (rtx oldval
, rtx newval
, rtx mask
)
26347 x
= gen_reg_rtx (SImode
);
26348 emit_insn (gen_rtx_SET (x
, gen_rtx_AND (SImode
,
26349 gen_rtx_NOT (SImode
, mask
),
26352 x
= expand_simple_binop (SImode
, IOR
, newval
, x
, x
, 1, OPTAB_LIB_WIDEN
);
26357 /* A subroutine of the various atomic expanders. For sub-word operands,
26358 extract WIDE to NARROW via SHIFT. */
26361 rs6000_finish_atomic_subword (rtx narrow
, rtx wide
, rtx shift
)
26363 wide
= expand_simple_binop (SImode
, LSHIFTRT
, wide
, shift
,
26364 wide
, 1, OPTAB_LIB_WIDEN
);
26365 emit_move_insn (narrow
, gen_lowpart (GET_MODE (narrow
), wide
));
26368 /* Expand an atomic compare and swap operation. */
26371 rs6000_expand_atomic_compare_and_swap (rtx operands
[])
26373 rtx boolval
, retval
, mem
, oldval
, newval
, cond
;
26374 rtx label1
, label2
, x
, mask
, shift
;
26375 machine_mode mode
, orig_mode
;
26376 enum memmodel mod_s
, mod_f
;
26379 boolval
= operands
[0];
26380 retval
= operands
[1];
26382 oldval
= operands
[3];
26383 newval
= operands
[4];
26384 is_weak
= (INTVAL (operands
[5]) != 0);
26385 mod_s
= memmodel_base (INTVAL (operands
[6]));
26386 mod_f
= memmodel_base (INTVAL (operands
[7]));
26387 orig_mode
= mode
= GET_MODE (mem
);
26389 mask
= shift
= NULL_RTX
;
26390 if (mode
== QImode
|| mode
== HImode
)
26392 /* Before power8, we didn't have access to lbarx/lharx, so generate a
26393 lwarx and shift/mask operations. With power8, we need to do the
26394 comparison in SImode, but the store is still done in QI/HImode. */
26395 oldval
= convert_modes (SImode
, mode
, oldval
, 1);
26397 if (!TARGET_SYNC_HI_QI
)
26399 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
26401 /* Shift and mask OLDVAL into position with the word. */
26402 oldval
= expand_simple_binop (SImode
, ASHIFT
, oldval
, shift
,
26403 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
26405 /* Shift and mask NEWVAL into position within the word. */
26406 newval
= convert_modes (SImode
, mode
, newval
, 1);
26407 newval
= expand_simple_binop (SImode
, ASHIFT
, newval
, shift
,
26408 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
26411 /* Prepare to adjust the return value. */
26412 retval
= gen_reg_rtx (SImode
);
26415 else if (reg_overlap_mentioned_p (retval
, oldval
))
26416 oldval
= copy_to_reg (oldval
);
26418 if (mode
!= TImode
&& !reg_or_short_operand (oldval
, mode
))
26419 oldval
= copy_to_mode_reg (mode
, oldval
);
26421 if (reg_overlap_mentioned_p (retval
, newval
))
26422 newval
= copy_to_reg (newval
);
26424 mem
= rs6000_pre_atomic_barrier (mem
, mod_s
);
26429 label1
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
26430 emit_label (XEXP (label1
, 0));
26432 label2
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
26434 emit_load_locked (mode
, retval
, mem
);
26438 x
= expand_simple_binop (SImode
, AND
, retval
, mask
,
26439 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
26441 cond
= gen_reg_rtx (CCmode
);
26442 /* If we have TImode, synthesize a comparison. */
26443 if (mode
!= TImode
)
26444 x
= gen_rtx_COMPARE (CCmode
, x
, oldval
);
26447 rtx xor1_result
= gen_reg_rtx (DImode
);
26448 rtx xor2_result
= gen_reg_rtx (DImode
);
26449 rtx or_result
= gen_reg_rtx (DImode
);
26450 rtx new_word0
= simplify_gen_subreg (DImode
, x
, TImode
, 0);
26451 rtx new_word1
= simplify_gen_subreg (DImode
, x
, TImode
, 8);
26452 rtx old_word0
= simplify_gen_subreg (DImode
, oldval
, TImode
, 0);
26453 rtx old_word1
= simplify_gen_subreg (DImode
, oldval
, TImode
, 8);
26455 emit_insn (gen_xordi3 (xor1_result
, new_word0
, old_word0
));
26456 emit_insn (gen_xordi3 (xor2_result
, new_word1
, old_word1
));
26457 emit_insn (gen_iordi3 (or_result
, xor1_result
, xor2_result
));
26458 x
= gen_rtx_COMPARE (CCmode
, or_result
, const0_rtx
);
26461 emit_insn (gen_rtx_SET (cond
, x
));
26463 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
26464 emit_unlikely_jump (x
, label2
);
26468 x
= rs6000_mask_atomic_subword (retval
, newval
, mask
);
26470 emit_store_conditional (orig_mode
, cond
, mem
, x
);
26474 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
26475 emit_unlikely_jump (x
, label1
);
26478 if (!is_mm_relaxed (mod_f
))
26479 emit_label (XEXP (label2
, 0));
26481 rs6000_post_atomic_barrier (mod_s
);
26483 if (is_mm_relaxed (mod_f
))
26484 emit_label (XEXP (label2
, 0));
26487 rs6000_finish_atomic_subword (operands
[1], retval
, shift
);
26488 else if (mode
!= GET_MODE (operands
[1]))
26489 convert_move (operands
[1], retval
, 1);
26491 /* In all cases, CR0 contains EQ on success, and NE on failure. */
26492 x
= gen_rtx_EQ (SImode
, cond
, const0_rtx
);
26493 emit_insn (gen_rtx_SET (boolval
, x
));
26496 /* Expand an atomic exchange operation. */
26499 rs6000_expand_atomic_exchange (rtx operands
[])
26501 rtx retval
, mem
, val
, cond
;
26503 enum memmodel model
;
26504 rtx label
, x
, mask
, shift
;
26506 retval
= operands
[0];
26509 model
= memmodel_base (INTVAL (operands
[3]));
26510 mode
= GET_MODE (mem
);
26512 mask
= shift
= NULL_RTX
;
26513 if (!TARGET_SYNC_HI_QI
&& (mode
== QImode
|| mode
== HImode
))
26515 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
26517 /* Shift and mask VAL into position with the word. */
26518 val
= convert_modes (SImode
, mode
, val
, 1);
26519 val
= expand_simple_binop (SImode
, ASHIFT
, val
, shift
,
26520 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
26522 /* Prepare to adjust the return value. */
26523 retval
= gen_reg_rtx (SImode
);
26527 mem
= rs6000_pre_atomic_barrier (mem
, model
);
26529 label
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
26530 emit_label (XEXP (label
, 0));
26532 emit_load_locked (mode
, retval
, mem
);
26536 x
= rs6000_mask_atomic_subword (retval
, val
, mask
);
26538 cond
= gen_reg_rtx (CCmode
);
26539 emit_store_conditional (mode
, cond
, mem
, x
);
26541 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
26542 emit_unlikely_jump (x
, label
);
26544 rs6000_post_atomic_barrier (model
);
26547 rs6000_finish_atomic_subword (operands
[0], retval
, shift
);
26550 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
26551 to perform. MEM is the memory on which to operate. VAL is the second
26552 operand of the binary operator. BEFORE and AFTER are optional locations to
26553 return the value of MEM either before of after the operation. MODEL_RTX
26554 is a CONST_INT containing the memory model to use. */
26557 rs6000_expand_atomic_op (enum rtx_code code
, rtx mem
, rtx val
,
26558 rtx orig_before
, rtx orig_after
, rtx model_rtx
)
26560 enum memmodel model
= memmodel_base (INTVAL (model_rtx
));
26561 machine_mode mode
= GET_MODE (mem
);
26562 machine_mode store_mode
= mode
;
26563 rtx label
, x
, cond
, mask
, shift
;
26564 rtx before
= orig_before
, after
= orig_after
;
26566 mask
= shift
= NULL_RTX
;
26567 /* On power8, we want to use SImode for the operation. On previous systems,
26568 use the operation in a subword and shift/mask to get the proper byte or
26570 if (mode
== QImode
|| mode
== HImode
)
26572 if (TARGET_SYNC_HI_QI
)
26574 val
= convert_modes (SImode
, mode
, val
, 1);
26576 /* Prepare to adjust the return value. */
26577 before
= gen_reg_rtx (SImode
);
26579 after
= gen_reg_rtx (SImode
);
26584 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
26586 /* Shift and mask VAL into position with the word. */
26587 val
= convert_modes (SImode
, mode
, val
, 1);
26588 val
= expand_simple_binop (SImode
, ASHIFT
, val
, shift
,
26589 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
26595 /* We've already zero-extended VAL. That is sufficient to
26596 make certain that it does not affect other bits. */
26601 /* If we make certain that all of the other bits in VAL are
26602 set, that will be sufficient to not affect other bits. */
26603 x
= gen_rtx_NOT (SImode
, mask
);
26604 x
= gen_rtx_IOR (SImode
, x
, val
);
26605 emit_insn (gen_rtx_SET (val
, x
));
26612 /* These will all affect bits outside the field and need
26613 adjustment via MASK within the loop. */
26617 gcc_unreachable ();
26620 /* Prepare to adjust the return value. */
26621 before
= gen_reg_rtx (SImode
);
26623 after
= gen_reg_rtx (SImode
);
26624 store_mode
= mode
= SImode
;
26628 mem
= rs6000_pre_atomic_barrier (mem
, model
);
26630 label
= gen_label_rtx ();
26631 emit_label (label
);
26632 label
= gen_rtx_LABEL_REF (VOIDmode
, label
);
26634 if (before
== NULL_RTX
)
26635 before
= gen_reg_rtx (mode
);
26637 emit_load_locked (mode
, before
, mem
);
26641 x
= expand_simple_binop (mode
, AND
, before
, val
,
26642 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
26643 after
= expand_simple_unop (mode
, NOT
, x
, after
, 1);
26647 after
= expand_simple_binop (mode
, code
, before
, val
,
26648 after
, 1, OPTAB_LIB_WIDEN
);
26654 x
= expand_simple_binop (SImode
, AND
, after
, mask
,
26655 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
26656 x
= rs6000_mask_atomic_subword (before
, x
, mask
);
26658 else if (store_mode
!= mode
)
26659 x
= convert_modes (store_mode
, mode
, x
, 1);
26661 cond
= gen_reg_rtx (CCmode
);
26662 emit_store_conditional (store_mode
, cond
, mem
, x
);
26664 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
26665 emit_unlikely_jump (x
, label
);
26667 rs6000_post_atomic_barrier (model
);
26671 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
26672 then do the calcuations in a SImode register. */
26674 rs6000_finish_atomic_subword (orig_before
, before
, shift
);
26676 rs6000_finish_atomic_subword (orig_after
, after
, shift
);
26678 else if (store_mode
!= mode
)
26680 /* QImode/HImode on machines with lbarx/lharx where we do the native
26681 operation and then do the calcuations in a SImode register. */
26683 convert_move (orig_before
, before
, 1);
26685 convert_move (orig_after
, after
, 1);
26687 else if (orig_after
&& after
!= orig_after
)
26688 emit_move_insn (orig_after
, after
);
26691 /* Emit instructions to move SRC to DST. Called by splitters for
26692 multi-register moves. It will emit at most one instruction for
26693 each register that is accessed; that is, it won't emit li/lis pairs
26694 (or equivalent for 64-bit code). One of SRC or DST must be a hard
26698 rs6000_split_multireg_move (rtx dst
, rtx src
)
26700 /* The register number of the first register being moved. */
26702 /* The mode that is to be moved. */
26704 /* The mode that the move is being done in, and its size. */
26705 machine_mode reg_mode
;
26707 /* The number of registers that will be moved. */
26710 reg
= REG_P (dst
) ? REGNO (dst
) : REGNO (src
);
26711 mode
= GET_MODE (dst
);
26712 nregs
= hard_regno_nregs
[reg
][mode
];
26713 if (FP_REGNO_P (reg
))
26714 reg_mode
= DECIMAL_FLOAT_MODE_P (mode
) ? DDmode
:
26715 ((TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
) ? DFmode
: SFmode
);
26716 else if (ALTIVEC_REGNO_P (reg
))
26717 reg_mode
= V16QImode
;
26718 else if (TARGET_E500_DOUBLE
&& FLOAT128_2REG_P (mode
))
26721 reg_mode
= word_mode
;
26722 reg_mode_size
= GET_MODE_SIZE (reg_mode
);
26724 gcc_assert (reg_mode_size
* nregs
== GET_MODE_SIZE (mode
));
26726 /* TDmode residing in FP registers is special, since the ISA requires that
26727 the lower-numbered word of a register pair is always the most significant
26728 word, even in little-endian mode. This does not match the usual subreg
26729 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
26730 the appropriate constituent registers "by hand" in little-endian mode.
26732 Note we do not need to check for destructive overlap here since TDmode
26733 can only reside in even/odd register pairs. */
26734 if (FP_REGNO_P (reg
) && DECIMAL_FLOAT_MODE_P (mode
) && !BYTES_BIG_ENDIAN
)
26739 for (i
= 0; i
< nregs
; i
++)
26741 if (REG_P (src
) && FP_REGNO_P (REGNO (src
)))
26742 p_src
= gen_rtx_REG (reg_mode
, REGNO (src
) + nregs
- 1 - i
);
26744 p_src
= simplify_gen_subreg (reg_mode
, src
, mode
,
26745 i
* reg_mode_size
);
26747 if (REG_P (dst
) && FP_REGNO_P (REGNO (dst
)))
26748 p_dst
= gen_rtx_REG (reg_mode
, REGNO (dst
) + nregs
- 1 - i
);
26750 p_dst
= simplify_gen_subreg (reg_mode
, dst
, mode
,
26751 i
* reg_mode_size
);
26753 emit_insn (gen_rtx_SET (p_dst
, p_src
));
26759 if (REG_P (src
) && REG_P (dst
) && (REGNO (src
) < REGNO (dst
)))
26761 /* Move register range backwards, if we might have destructive
26764 for (i
= nregs
- 1; i
>= 0; i
--)
26765 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode
, dst
, mode
,
26766 i
* reg_mode_size
),
26767 simplify_gen_subreg (reg_mode
, src
, mode
,
26768 i
* reg_mode_size
)));
26774 bool used_update
= false;
26775 rtx restore_basereg
= NULL_RTX
;
26777 if (MEM_P (src
) && INT_REGNO_P (reg
))
26781 if (GET_CODE (XEXP (src
, 0)) == PRE_INC
26782 || GET_CODE (XEXP (src
, 0)) == PRE_DEC
)
26785 breg
= XEXP (XEXP (src
, 0), 0);
26786 delta_rtx
= (GET_CODE (XEXP (src
, 0)) == PRE_INC
26787 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src
)))
26788 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src
))));
26789 emit_insn (gen_add3_insn (breg
, breg
, delta_rtx
));
26790 src
= replace_equiv_address (src
, breg
);
26792 else if (! rs6000_offsettable_memref_p (src
, reg_mode
))
26794 if (GET_CODE (XEXP (src
, 0)) == PRE_MODIFY
)
26796 rtx basereg
= XEXP (XEXP (src
, 0), 0);
26799 rtx ndst
= simplify_gen_subreg (reg_mode
, dst
, mode
, 0);
26800 emit_insn (gen_rtx_SET (ndst
,
26801 gen_rtx_MEM (reg_mode
,
26803 used_update
= true;
26806 emit_insn (gen_rtx_SET (basereg
,
26807 XEXP (XEXP (src
, 0), 1)));
26808 src
= replace_equiv_address (src
, basereg
);
26812 rtx basereg
= gen_rtx_REG (Pmode
, reg
);
26813 emit_insn (gen_rtx_SET (basereg
, XEXP (src
, 0)));
26814 src
= replace_equiv_address (src
, basereg
);
26818 breg
= XEXP (src
, 0);
26819 if (GET_CODE (breg
) == PLUS
|| GET_CODE (breg
) == LO_SUM
)
26820 breg
= XEXP (breg
, 0);
26822 /* If the base register we are using to address memory is
26823 also a destination reg, then change that register last. */
26825 && REGNO (breg
) >= REGNO (dst
)
26826 && REGNO (breg
) < REGNO (dst
) + nregs
)
26827 j
= REGNO (breg
) - REGNO (dst
);
26829 else if (MEM_P (dst
) && INT_REGNO_P (reg
))
26833 if (GET_CODE (XEXP (dst
, 0)) == PRE_INC
26834 || GET_CODE (XEXP (dst
, 0)) == PRE_DEC
)
26837 breg
= XEXP (XEXP (dst
, 0), 0);
26838 delta_rtx
= (GET_CODE (XEXP (dst
, 0)) == PRE_INC
26839 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst
)))
26840 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst
))));
26842 /* We have to update the breg before doing the store.
26843 Use store with update, if available. */
26847 rtx nsrc
= simplify_gen_subreg (reg_mode
, src
, mode
, 0);
26848 emit_insn (TARGET_32BIT
26849 ? (TARGET_POWERPC64
26850 ? gen_movdi_si_update (breg
, breg
, delta_rtx
, nsrc
)
26851 : gen_movsi_update (breg
, breg
, delta_rtx
, nsrc
))
26852 : gen_movdi_di_update (breg
, breg
, delta_rtx
, nsrc
));
26853 used_update
= true;
26856 emit_insn (gen_add3_insn (breg
, breg
, delta_rtx
));
26857 dst
= replace_equiv_address (dst
, breg
);
26859 else if (!rs6000_offsettable_memref_p (dst
, reg_mode
)
26860 && GET_CODE (XEXP (dst
, 0)) != LO_SUM
)
26862 if (GET_CODE (XEXP (dst
, 0)) == PRE_MODIFY
)
26864 rtx basereg
= XEXP (XEXP (dst
, 0), 0);
26867 rtx nsrc
= simplify_gen_subreg (reg_mode
, src
, mode
, 0);
26868 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode
,
26871 used_update
= true;
26874 emit_insn (gen_rtx_SET (basereg
,
26875 XEXP (XEXP (dst
, 0), 1)));
26876 dst
= replace_equiv_address (dst
, basereg
);
26880 rtx basereg
= XEXP (XEXP (dst
, 0), 0);
26881 rtx offsetreg
= XEXP (XEXP (dst
, 0), 1);
26882 gcc_assert (GET_CODE (XEXP (dst
, 0)) == PLUS
26884 && REG_P (offsetreg
)
26885 && REGNO (basereg
) != REGNO (offsetreg
));
26886 if (REGNO (basereg
) == 0)
26888 rtx tmp
= offsetreg
;
26889 offsetreg
= basereg
;
26892 emit_insn (gen_add3_insn (basereg
, basereg
, offsetreg
));
26893 restore_basereg
= gen_sub3_insn (basereg
, basereg
, offsetreg
);
26894 dst
= replace_equiv_address (dst
, basereg
);
26897 else if (GET_CODE (XEXP (dst
, 0)) != LO_SUM
)
26898 gcc_assert (rs6000_offsettable_memref_p (dst
, reg_mode
));
26901 for (i
= 0; i
< nregs
; i
++)
26903 /* Calculate index to next subword. */
26908 /* If compiler already emitted move of first word by
26909 store with update, no need to do anything. */
26910 if (j
== 0 && used_update
)
26913 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode
, dst
, mode
,
26914 j
* reg_mode_size
),
26915 simplify_gen_subreg (reg_mode
, src
, mode
,
26916 j
* reg_mode_size
)));
26918 if (restore_basereg
!= NULL_RTX
)
26919 emit_insn (restore_basereg
);
26924 /* This page contains routines that are used to determine what the
26925 function prologue and epilogue code will do and write them out. */
26930 return !call_used_regs
[r
] && df_regs_ever_live_p (r
);
26933 /* Determine whether the gp REG is really used. */
26936 rs6000_reg_live_or_pic_offset_p (int reg
)
26938 /* We need to mark the PIC offset register live for the same conditions
26939 as it is set up, or otherwise it won't be saved before we clobber it. */
26941 if (reg
== RS6000_PIC_OFFSET_TABLE_REGNUM
&& !TARGET_SINGLE_PIC_BASE
)
26943 if (TARGET_TOC
&& TARGET_MINIMAL_TOC
26944 && (crtl
->calls_eh_return
26945 || df_regs_ever_live_p (reg
)
26946 || !constant_pool_empty_p ()))
26949 if ((DEFAULT_ABI
== ABI_V4
|| DEFAULT_ABI
== ABI_DARWIN
)
26954 /* If the function calls eh_return, claim used all the registers that would
26955 be checked for liveness otherwise. */
26957 return ((crtl
->calls_eh_return
|| df_regs_ever_live_p (reg
))
26958 && !call_used_regs
[reg
]);
26961 /* Return the first fixed-point register that is required to be
26962 saved. 32 if none. */
26965 first_reg_to_save (void)
26969 /* Find lowest numbered live register. */
26970 for (first_reg
= 13; first_reg
<= 31; first_reg
++)
26971 if (save_reg_p (first_reg
))
26974 if (first_reg
> RS6000_PIC_OFFSET_TABLE_REGNUM
26975 && ((DEFAULT_ABI
== ABI_V4
&& flag_pic
!= 0)
26976 || (DEFAULT_ABI
== ABI_DARWIN
&& flag_pic
)
26977 || (TARGET_TOC
&& TARGET_MINIMAL_TOC
))
26978 && rs6000_reg_live_or_pic_offset_p (RS6000_PIC_OFFSET_TABLE_REGNUM
))
26979 first_reg
= RS6000_PIC_OFFSET_TABLE_REGNUM
;
26983 && crtl
->uses_pic_offset_table
26984 && first_reg
> RS6000_PIC_OFFSET_TABLE_REGNUM
)
26985 return RS6000_PIC_OFFSET_TABLE_REGNUM
;
26991 /* Similar, for FP regs. */
26994 first_fp_reg_to_save (void)
26998 /* Find lowest numbered live register. */
26999 for (first_reg
= 14 + 32; first_reg
<= 63; first_reg
++)
27000 if (save_reg_p (first_reg
))
27006 /* Similar, for AltiVec regs. */
27009 first_altivec_reg_to_save (void)
27013 /* Stack frame remains as is unless we are in AltiVec ABI. */
27014 if (! TARGET_ALTIVEC_ABI
)
27015 return LAST_ALTIVEC_REGNO
+ 1;
27017 /* On Darwin, the unwind routines are compiled without
27018 TARGET_ALTIVEC, and use save_world to save/restore the
27019 altivec registers when necessary. */
27020 if (DEFAULT_ABI
== ABI_DARWIN
&& crtl
->calls_eh_return
27021 && ! TARGET_ALTIVEC
)
27022 return FIRST_ALTIVEC_REGNO
+ 20;
27024 /* Find lowest numbered live register. */
27025 for (i
= FIRST_ALTIVEC_REGNO
+ 20; i
<= LAST_ALTIVEC_REGNO
; ++i
)
27026 if (save_reg_p (i
))
27032 /* Return a 32-bit mask of the AltiVec registers we need to set in
27033 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
27034 the 32-bit word is 0. */
27036 static unsigned int
27037 compute_vrsave_mask (void)
27039 unsigned int i
, mask
= 0;
27041 /* On Darwin, the unwind routines are compiled without
27042 TARGET_ALTIVEC, and use save_world to save/restore the
27043 call-saved altivec registers when necessary. */
27044 if (DEFAULT_ABI
== ABI_DARWIN
&& crtl
->calls_eh_return
27045 && ! TARGET_ALTIVEC
)
27048 /* First, find out if we use _any_ altivec registers. */
27049 for (i
= FIRST_ALTIVEC_REGNO
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
27050 if (df_regs_ever_live_p (i
))
27051 mask
|= ALTIVEC_REG_BIT (i
);
27056 /* Next, remove the argument registers from the set. These must
27057 be in the VRSAVE mask set by the caller, so we don't need to add
27058 them in again. More importantly, the mask we compute here is
27059 used to generate CLOBBERs in the set_vrsave insn, and we do not
27060 wish the argument registers to die. */
27061 for (i
= ALTIVEC_ARG_MIN_REG
; i
< (unsigned) crtl
->args
.info
.vregno
; i
++)
27062 mask
&= ~ALTIVEC_REG_BIT (i
);
27064 /* Similarly, remove the return value from the set. */
27067 diddle_return_value (is_altivec_return_reg
, &yes
);
27069 mask
&= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN
);
27075 /* For a very restricted set of circumstances, we can cut down the
27076 size of prologues/epilogues by calling our own save/restore-the-world
27080 compute_save_world_info (rs6000_stack_t
*info
)
27082 info
->world_save_p
= 1;
27084 = (WORLD_SAVE_P (info
)
27085 && DEFAULT_ABI
== ABI_DARWIN
27086 && !cfun
->has_nonlocal_label
27087 && info
->first_fp_reg_save
== FIRST_SAVED_FP_REGNO
27088 && info
->first_gp_reg_save
== FIRST_SAVED_GP_REGNO
27089 && info
->first_altivec_reg_save
== FIRST_SAVED_ALTIVEC_REGNO
27090 && info
->cr_save_p
);
27092 /* This will not work in conjunction with sibcalls. Make sure there
27093 are none. (This check is expensive, but seldom executed.) */
27094 if (WORLD_SAVE_P (info
))
27097 for (insn
= get_last_insn_anywhere (); insn
; insn
= PREV_INSN (insn
))
27098 if (CALL_P (insn
) && SIBLING_CALL_P (insn
))
27100 info
->world_save_p
= 0;
27105 if (WORLD_SAVE_P (info
))
27107 /* Even if we're not touching VRsave, make sure there's room on the
27108 stack for it, if it looks like we're calling SAVE_WORLD, which
27109 will attempt to save it. */
27110 info
->vrsave_size
= 4;
27112 /* If we are going to save the world, we need to save the link register too. */
27113 info
->lr_save_p
= 1;
27115 /* "Save" the VRsave register too if we're saving the world. */
27116 if (info
->vrsave_mask
== 0)
27117 info
->vrsave_mask
= compute_vrsave_mask ();
27119 /* Because the Darwin register save/restore routines only handle
27120 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
27122 gcc_assert (info
->first_fp_reg_save
>= FIRST_SAVED_FP_REGNO
27123 && (info
->first_altivec_reg_save
27124 >= FIRST_SAVED_ALTIVEC_REGNO
));
27132 is_altivec_return_reg (rtx reg
, void *xyes
)
27134 bool *yes
= (bool *) xyes
;
27135 if (REGNO (reg
) == ALTIVEC_ARG_RETURN
)
27140 /* Return whether REG is a global user reg or has been specifed by
27141 -ffixed-REG. We should not restore these, and so cannot use
27142 lmw or out-of-line restore functions if there are any. We also
27143 can't save them (well, emit frame notes for them), because frame
27144 unwinding during exception handling will restore saved registers. */
27147 fixed_reg_p (int reg
)
27149 /* Ignore fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] when the
27150 backend sets it, overriding anything the user might have given. */
27151 if (reg
== RS6000_PIC_OFFSET_TABLE_REGNUM
27152 && ((DEFAULT_ABI
== ABI_V4
&& flag_pic
)
27153 || (DEFAULT_ABI
== ABI_DARWIN
&& flag_pic
)
27154 || (TARGET_TOC
&& TARGET_MINIMAL_TOC
)))
27157 return fixed_regs
[reg
];
27160 /* Determine the strategy for savings/restoring registers. */
27163 SAVE_MULTIPLE
= 0x1,
27164 SAVE_INLINE_GPRS
= 0x2,
27165 SAVE_INLINE_FPRS
= 0x4,
27166 SAVE_NOINLINE_GPRS_SAVES_LR
= 0x8,
27167 SAVE_NOINLINE_FPRS_SAVES_LR
= 0x10,
27168 SAVE_INLINE_VRS
= 0x20,
27169 REST_MULTIPLE
= 0x100,
27170 REST_INLINE_GPRS
= 0x200,
27171 REST_INLINE_FPRS
= 0x400,
27172 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR
= 0x800,
27173 REST_INLINE_VRS
= 0x1000
27177 rs6000_savres_strategy (rs6000_stack_t
*info
,
27178 bool using_static_chain_p
)
27182 /* Select between in-line and out-of-line save and restore of regs.
27183 First, all the obvious cases where we don't use out-of-line. */
27184 if (crtl
->calls_eh_return
27185 || cfun
->machine
->ra_need_lr
)
27186 strategy
|= (SAVE_INLINE_FPRS
| REST_INLINE_FPRS
27187 | SAVE_INLINE_GPRS
| REST_INLINE_GPRS
27188 | SAVE_INLINE_VRS
| REST_INLINE_VRS
);
27190 if (info
->first_gp_reg_save
== 32)
27191 strategy
|= SAVE_INLINE_GPRS
| REST_INLINE_GPRS
;
27193 if (info
->first_fp_reg_save
== 64
27194 /* The out-of-line FP routines use double-precision stores;
27195 we can't use those routines if we don't have such stores. */
27196 || (TARGET_HARD_FLOAT
&& !TARGET_DOUBLE_FLOAT
))
27197 strategy
|= SAVE_INLINE_FPRS
| REST_INLINE_FPRS
;
27199 if (info
->first_altivec_reg_save
== LAST_ALTIVEC_REGNO
+ 1)
27200 strategy
|= SAVE_INLINE_VRS
| REST_INLINE_VRS
;
27202 /* Define cutoff for using out-of-line functions to save registers. */
27203 if (DEFAULT_ABI
== ABI_V4
|| TARGET_ELF
)
27205 if (!optimize_size
)
27207 strategy
|= SAVE_INLINE_FPRS
| REST_INLINE_FPRS
;
27208 strategy
|= SAVE_INLINE_GPRS
| REST_INLINE_GPRS
;
27209 strategy
|= SAVE_INLINE_VRS
| REST_INLINE_VRS
;
27213 /* Prefer out-of-line restore if it will exit. */
27214 if (info
->first_fp_reg_save
> 61)
27215 strategy
|= SAVE_INLINE_FPRS
;
27216 if (info
->first_gp_reg_save
> 29)
27218 if (info
->first_fp_reg_save
== 64)
27219 strategy
|= SAVE_INLINE_GPRS
;
27221 strategy
|= SAVE_INLINE_GPRS
| REST_INLINE_GPRS
;
27223 if (info
->first_altivec_reg_save
== LAST_ALTIVEC_REGNO
)
27224 strategy
|= SAVE_INLINE_VRS
| REST_INLINE_VRS
;
27227 else if (DEFAULT_ABI
== ABI_DARWIN
)
27229 if (info
->first_fp_reg_save
> 60)
27230 strategy
|= SAVE_INLINE_FPRS
| REST_INLINE_FPRS
;
27231 if (info
->first_gp_reg_save
> 29)
27232 strategy
|= SAVE_INLINE_GPRS
| REST_INLINE_GPRS
;
27233 strategy
|= SAVE_INLINE_VRS
| REST_INLINE_VRS
;
27237 gcc_checking_assert (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
);
27238 if ((flag_shrink_wrap_separate
&& optimize_function_for_speed_p (cfun
))
27239 || info
->first_fp_reg_save
> 61)
27240 strategy
|= SAVE_INLINE_FPRS
| REST_INLINE_FPRS
;
27241 strategy
|= SAVE_INLINE_GPRS
| REST_INLINE_GPRS
;
27242 strategy
|= SAVE_INLINE_VRS
| REST_INLINE_VRS
;
27245 /* Don't bother to try to save things out-of-line if r11 is occupied
27246 by the static chain. It would require too much fiddling and the
27247 static chain is rarely used anyway. FPRs are saved w.r.t the stack
27248 pointer on Darwin, and AIX uses r1 or r12. */
27249 if (using_static_chain_p
27250 && (DEFAULT_ABI
== ABI_V4
|| DEFAULT_ABI
== ABI_DARWIN
))
27251 strategy
|= ((DEFAULT_ABI
== ABI_DARWIN
? 0 : SAVE_INLINE_FPRS
)
27253 | SAVE_INLINE_VRS
);
27255 /* Saving CR interferes with the exit routines used on the SPE, so
27258 && info
->spe_64bit_regs_used
27259 && info
->cr_save_p
)
27260 strategy
|= REST_INLINE_GPRS
;
27262 /* We can only use the out-of-line routines to restore fprs if we've
27263 saved all the registers from first_fp_reg_save in the prologue.
27264 Otherwise, we risk loading garbage. Of course, if we have saved
27265 out-of-line then we know we haven't skipped any fprs. */
27266 if ((strategy
& SAVE_INLINE_FPRS
)
27267 && !(strategy
& REST_INLINE_FPRS
))
27271 for (i
= info
->first_fp_reg_save
; i
< 64; i
++)
27272 if (fixed_regs
[i
] || !save_reg_p (i
))
27274 strategy
|= REST_INLINE_FPRS
;
27279 /* Similarly, for altivec regs. */
27280 if ((strategy
& SAVE_INLINE_VRS
)
27281 && !(strategy
& REST_INLINE_VRS
))
27285 for (i
= info
->first_altivec_reg_save
; i
< LAST_ALTIVEC_REGNO
+ 1; i
++)
27286 if (fixed_regs
[i
] || !save_reg_p (i
))
27288 strategy
|= REST_INLINE_VRS
;
27293 /* info->lr_save_p isn't yet set if the only reason lr needs to be
27294 saved is an out-of-line save or restore. Set up the value for
27295 the next test (excluding out-of-line gprs). */
27296 bool lr_save_p
= (info
->lr_save_p
27297 || !(strategy
& SAVE_INLINE_FPRS
)
27298 || !(strategy
& SAVE_INLINE_VRS
)
27299 || !(strategy
& REST_INLINE_FPRS
)
27300 || !(strategy
& REST_INLINE_VRS
));
27302 if (TARGET_MULTIPLE
27303 && !TARGET_POWERPC64
27304 && !(TARGET_SPE_ABI
&& info
->spe_64bit_regs_used
)
27305 && info
->first_gp_reg_save
< 31
27306 && !(flag_shrink_wrap
27307 && flag_shrink_wrap_separate
27308 && optimize_function_for_speed_p (cfun
)))
27310 /* Prefer store multiple for saves over out-of-line routines,
27311 since the store-multiple instruction will always be smaller. */
27312 strategy
|= SAVE_INLINE_GPRS
| SAVE_MULTIPLE
;
27314 /* The situation is more complicated with load multiple. We'd
27315 prefer to use the out-of-line routines for restores, since the
27316 "exit" out-of-line routines can handle the restore of LR and the
27317 frame teardown. However if doesn't make sense to use the
27318 out-of-line routine if that is the only reason we'd need to save
27319 LR, and we can't use the "exit" out-of-line gpr restore if we
27320 have saved some fprs; In those cases it is advantageous to use
27321 load multiple when available. */
27322 if (info
->first_fp_reg_save
!= 64 || !lr_save_p
)
27323 strategy
|= REST_INLINE_GPRS
| REST_MULTIPLE
;
27326 /* Using the "exit" out-of-line routine does not improve code size
27327 if using it would require lr to be saved and if only saving one
27329 else if (!lr_save_p
&& info
->first_gp_reg_save
> 29)
27330 strategy
|= SAVE_INLINE_GPRS
| REST_INLINE_GPRS
;
27332 /* We can only use load multiple or the out-of-line routines to
27333 restore gprs if we've saved all the registers from
27334 first_gp_reg_save. Otherwise, we risk loading garbage.
27335 Of course, if we have saved out-of-line or used stmw then we know
27336 we haven't skipped any gprs. */
27337 if ((strategy
& (SAVE_INLINE_GPRS
| SAVE_MULTIPLE
)) == SAVE_INLINE_GPRS
27338 && (strategy
& (REST_INLINE_GPRS
| REST_MULTIPLE
)) != REST_INLINE_GPRS
)
27342 for (i
= info
->first_gp_reg_save
; i
< 32; i
++)
27343 if (fixed_reg_p (i
) || !save_reg_p (i
))
27345 strategy
|= REST_INLINE_GPRS
;
27346 strategy
&= ~REST_MULTIPLE
;
27351 if (TARGET_ELF
&& TARGET_64BIT
)
27353 if (!(strategy
& SAVE_INLINE_FPRS
))
27354 strategy
|= SAVE_NOINLINE_FPRS_SAVES_LR
;
27355 else if (!(strategy
& SAVE_INLINE_GPRS
)
27356 && info
->first_fp_reg_save
== 64)
27357 strategy
|= SAVE_NOINLINE_GPRS_SAVES_LR
;
27359 else if (TARGET_AIX
&& !(strategy
& REST_INLINE_FPRS
))
27360 strategy
|= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR
;
27362 if (TARGET_MACHO
&& !(strategy
& SAVE_INLINE_FPRS
))
27363 strategy
|= SAVE_NOINLINE_FPRS_SAVES_LR
;
27368 /* Calculate the stack information for the current function. This is
27369 complicated by having two separate calling sequences, the AIX calling
27370 sequence and the V.4 calling sequence.
27372 AIX (and Darwin/Mac OS X) stack frames look like:
27374 SP----> +---------------------------------------+
27375 | back chain to caller | 0 0
27376 +---------------------------------------+
27377 | saved CR | 4 8 (8-11)
27378 +---------------------------------------+
27380 +---------------------------------------+
27381 | reserved for compilers | 12 24
27382 +---------------------------------------+
27383 | reserved for binders | 16 32
27384 +---------------------------------------+
27385 | saved TOC pointer | 20 40
27386 +---------------------------------------+
27387 | Parameter save area (+padding*) (P) | 24 48
27388 +---------------------------------------+
27389 | Alloca space (A) | 24+P etc.
27390 +---------------------------------------+
27391 | Local variable space (L) | 24+P+A
27392 +---------------------------------------+
27393 | Float/int conversion temporary (X) | 24+P+A+L
27394 +---------------------------------------+
27395 | Save area for AltiVec registers (W) | 24+P+A+L+X
27396 +---------------------------------------+
27397 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
27398 +---------------------------------------+
27399 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
27400 +---------------------------------------+
27401 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
27402 +---------------------------------------+
27403 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
27404 +---------------------------------------+
27405 old SP->| back chain to caller's caller |
27406 +---------------------------------------+
27408 * If the alloca area is present, the parameter save area is
27409 padded so that the former starts 16-byte aligned.
27411 The required alignment for AIX configurations is two words (i.e., 8
27414 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
27416 SP----> +---------------------------------------+
27417 | Back chain to caller | 0
27418 +---------------------------------------+
27419 | Save area for CR | 8
27420 +---------------------------------------+
27422 +---------------------------------------+
27423 | Saved TOC pointer | 24
27424 +---------------------------------------+
27425 | Parameter save area (+padding*) (P) | 32
27426 +---------------------------------------+
27427 | Alloca space (A) | 32+P
27428 +---------------------------------------+
27429 | Local variable space (L) | 32+P+A
27430 +---------------------------------------+
27431 | Save area for AltiVec registers (W) | 32+P+A+L
27432 +---------------------------------------+
27433 | AltiVec alignment padding (Y) | 32+P+A+L+W
27434 +---------------------------------------+
27435 | Save area for GP registers (G) | 32+P+A+L+W+Y
27436 +---------------------------------------+
27437 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
27438 +---------------------------------------+
27439 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
27440 +---------------------------------------+
27442 * If the alloca area is present, the parameter save area is
27443 padded so that the former starts 16-byte aligned.
27445 V.4 stack frames look like:
27447 SP----> +---------------------------------------+
27448 | back chain to caller | 0
27449 +---------------------------------------+
27450 | caller's saved LR | 4
27451 +---------------------------------------+
27452 | Parameter save area (+padding*) (P) | 8
27453 +---------------------------------------+
27454 | Alloca space (A) | 8+P
27455 +---------------------------------------+
27456 | Varargs save area (V) | 8+P+A
27457 +---------------------------------------+
27458 | Local variable space (L) | 8+P+A+V
27459 +---------------------------------------+
27460 | Float/int conversion temporary (X) | 8+P+A+V+L
27461 +---------------------------------------+
27462 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
27463 +---------------------------------------+
27464 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
27465 +---------------------------------------+
27466 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
27467 +---------------------------------------+
27468 | SPE: area for 64-bit GP registers |
27469 +---------------------------------------+
27470 | SPE alignment padding |
27471 +---------------------------------------+
27472 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
27473 +---------------------------------------+
27474 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
27475 +---------------------------------------+
27476 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
27477 +---------------------------------------+
27478 old SP->| back chain to caller's caller |
27479 +---------------------------------------+
27481 * If the alloca area is present and the required alignment is
27482 16 bytes, the parameter save area is padded so that the
27483 alloca area starts 16-byte aligned.
27485 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
27486 given. (But note below and in sysv4.h that we require only 8 and
27487 may round up the size of our stack frame anyways. The historical
27488 reason is early versions of powerpc-linux which didn't properly
27489 align the stack at program startup. A happy side-effect is that
27490 -mno-eabi libraries can be used with -meabi programs.)
27492 The EABI configuration defaults to the V.4 layout. However,
27493 the stack alignment requirements may differ. If -mno-eabi is not
27494 given, the required stack alignment is 8 bytes; if -mno-eabi is
27495 given, the required alignment is 16 bytes. (But see V.4 comment
27498 #ifndef ABI_STACK_BOUNDARY
27499 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
27502 static rs6000_stack_t
*
27503 rs6000_stack_info (void)
27505 /* We should never be called for thunks, we are not set up for that. */
27506 gcc_assert (!cfun
->is_thunk
);
27508 rs6000_stack_t
*info
= &stack_info
;
27509 int reg_size
= TARGET_32BIT
? 4 : 8;
27514 HOST_WIDE_INT non_fixed_size
;
27515 bool using_static_chain_p
;
27517 if (reload_completed
&& info
->reload_completed
)
27520 memset (info
, 0, sizeof (*info
));
27521 info
->reload_completed
= reload_completed
;
27525 /* Cache value so we don't rescan instruction chain over and over. */
27526 if (cfun
->machine
->spe_insn_chain_scanned_p
== 0)
27527 cfun
->machine
->spe_insn_chain_scanned_p
27528 = spe_func_has_64bit_regs_p () + 1;
27529 info
->spe_64bit_regs_used
= cfun
->machine
->spe_insn_chain_scanned_p
- 1;
27532 /* Select which calling sequence. */
27533 info
->abi
= DEFAULT_ABI
;
27535 /* Calculate which registers need to be saved & save area size. */
27536 info
->first_gp_reg_save
= first_reg_to_save ();
27537 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
27538 even if it currently looks like we won't. Reload may need it to
27539 get at a constant; if so, it will have already created a constant
27540 pool entry for it. */
27541 if (((TARGET_TOC
&& TARGET_MINIMAL_TOC
)
27542 || (flag_pic
== 1 && DEFAULT_ABI
== ABI_V4
)
27543 || (flag_pic
&& DEFAULT_ABI
== ABI_DARWIN
))
27544 && crtl
->uses_const_pool
27545 && info
->first_gp_reg_save
> RS6000_PIC_OFFSET_TABLE_REGNUM
)
27546 first_gp
= RS6000_PIC_OFFSET_TABLE_REGNUM
;
27548 first_gp
= info
->first_gp_reg_save
;
27550 info
->gp_size
= reg_size
* (32 - first_gp
);
27552 /* For the SPE, we have an additional upper 32-bits on each GPR.
27553 Ideally we should save the entire 64-bits only when the upper
27554 half is used in SIMD instructions. Since we only record
27555 registers live (not the size they are used in), this proves
27556 difficult because we'd have to traverse the instruction chain at
27557 the right time, taking reload into account. This is a real pain,
27558 so we opt to save the GPRs in 64-bits always if but one register
27559 gets used in 64-bits. Otherwise, all the registers in the frame
27560 get saved in 32-bits.
27562 So... since when we save all GPRs (except the SP) in 64-bits, the
27563 traditional GP save area will be empty. */
27564 if (TARGET_SPE_ABI
&& info
->spe_64bit_regs_used
!= 0)
27567 info
->first_fp_reg_save
= first_fp_reg_to_save ();
27568 info
->fp_size
= 8 * (64 - info
->first_fp_reg_save
);
27570 info
->first_altivec_reg_save
= first_altivec_reg_to_save ();
27571 info
->altivec_size
= 16 * (LAST_ALTIVEC_REGNO
+ 1
27572 - info
->first_altivec_reg_save
);
27574 /* Does this function call anything? */
27575 info
->calls_p
= (!crtl
->is_leaf
|| cfun
->machine
->ra_needs_full_frame
);
27577 /* Determine if we need to save the condition code registers. */
27578 if (save_reg_p (CR2_REGNO
)
27579 || save_reg_p (CR3_REGNO
)
27580 || save_reg_p (CR4_REGNO
))
27582 info
->cr_save_p
= 1;
27583 if (DEFAULT_ABI
== ABI_V4
)
27584 info
->cr_size
= reg_size
;
27587 /* If the current function calls __builtin_eh_return, then we need
27588 to allocate stack space for registers that will hold data for
27589 the exception handler. */
27590 if (crtl
->calls_eh_return
)
27593 for (i
= 0; EH_RETURN_DATA_REGNO (i
) != INVALID_REGNUM
; ++i
)
27596 /* SPE saves EH registers in 64-bits. */
27597 ehrd_size
= i
* (TARGET_SPE_ABI
&& info
->spe_64bit_regs_used
!= 0
27598 ? UNITS_PER_SPE_WORD
: UNITS_PER_WORD
);
27603 /* In the ELFv2 ABI, we also need to allocate space for separate
27604 CR field save areas if the function calls __builtin_eh_return. */
27605 if (DEFAULT_ABI
== ABI_ELFv2
&& crtl
->calls_eh_return
)
27607 /* This hard-codes that we have three call-saved CR fields. */
27608 ehcr_size
= 3 * reg_size
;
27609 /* We do *not* use the regular CR save mechanism. */
27610 info
->cr_save_p
= 0;
27615 /* Determine various sizes. */
27616 info
->reg_size
= reg_size
;
27617 info
->fixed_size
= RS6000_SAVE_AREA
;
27618 info
->vars_size
= RS6000_ALIGN (get_frame_size (), 8);
27619 if (cfun
->calls_alloca
)
27621 RS6000_ALIGN (crtl
->outgoing_args_size
+ info
->fixed_size
,
27622 STACK_BOUNDARY
/ BITS_PER_UNIT
) - info
->fixed_size
;
27624 info
->parm_size
= RS6000_ALIGN (crtl
->outgoing_args_size
,
27625 TARGET_ALTIVEC
? 16 : 8);
27626 if (FRAME_GROWS_DOWNWARD
)
27628 += RS6000_ALIGN (info
->fixed_size
+ info
->vars_size
+ info
->parm_size
,
27629 ABI_STACK_BOUNDARY
/ BITS_PER_UNIT
)
27630 - (info
->fixed_size
+ info
->vars_size
+ info
->parm_size
);
27632 if (TARGET_SPE_ABI
&& info
->spe_64bit_regs_used
!= 0)
27633 info
->spe_gp_size
= 8 * (32 - first_gp
);
27635 if (TARGET_ALTIVEC_ABI
)
27636 info
->vrsave_mask
= compute_vrsave_mask ();
27638 if (TARGET_ALTIVEC_VRSAVE
&& info
->vrsave_mask
)
27639 info
->vrsave_size
= 4;
27641 compute_save_world_info (info
);
27643 /* Calculate the offsets. */
27644 switch (DEFAULT_ABI
)
27648 gcc_unreachable ();
27653 info
->fp_save_offset
= -info
->fp_size
;
27654 info
->gp_save_offset
= info
->fp_save_offset
- info
->gp_size
;
27656 if (TARGET_ALTIVEC_ABI
)
27658 info
->vrsave_save_offset
= info
->gp_save_offset
- info
->vrsave_size
;
27660 /* Align stack so vector save area is on a quadword boundary.
27661 The padding goes above the vectors. */
27662 if (info
->altivec_size
!= 0)
27663 info
->altivec_padding_size
= info
->vrsave_save_offset
& 0xF;
27665 info
->altivec_save_offset
= info
->vrsave_save_offset
27666 - info
->altivec_padding_size
27667 - info
->altivec_size
;
27668 gcc_assert (info
->altivec_size
== 0
27669 || info
->altivec_save_offset
% 16 == 0);
27671 /* Adjust for AltiVec case. */
27672 info
->ehrd_offset
= info
->altivec_save_offset
- ehrd_size
;
27675 info
->ehrd_offset
= info
->gp_save_offset
- ehrd_size
;
27677 info
->ehcr_offset
= info
->ehrd_offset
- ehcr_size
;
27678 info
->cr_save_offset
= reg_size
; /* first word when 64-bit. */
27679 info
->lr_save_offset
= 2*reg_size
;
27683 info
->fp_save_offset
= -info
->fp_size
;
27684 info
->gp_save_offset
= info
->fp_save_offset
- info
->gp_size
;
27685 info
->cr_save_offset
= info
->gp_save_offset
- info
->cr_size
;
27687 if (TARGET_SPE_ABI
&& info
->spe_64bit_regs_used
!= 0)
27689 /* Align stack so SPE GPR save area is aligned on a
27690 double-word boundary. */
27691 if (info
->spe_gp_size
!= 0 && info
->cr_save_offset
!= 0)
27692 info
->spe_padding_size
= 8 - (-info
->cr_save_offset
% 8);
27694 info
->spe_padding_size
= 0;
27696 info
->spe_gp_save_offset
= info
->cr_save_offset
27697 - info
->spe_padding_size
27698 - info
->spe_gp_size
;
27700 /* Adjust for SPE case. */
27701 info
->ehrd_offset
= info
->spe_gp_save_offset
;
27703 else if (TARGET_ALTIVEC_ABI
)
27705 info
->vrsave_save_offset
= info
->cr_save_offset
- info
->vrsave_size
;
27707 /* Align stack so vector save area is on a quadword boundary. */
27708 if (info
->altivec_size
!= 0)
27709 info
->altivec_padding_size
= 16 - (-info
->vrsave_save_offset
% 16);
27711 info
->altivec_save_offset
= info
->vrsave_save_offset
27712 - info
->altivec_padding_size
27713 - info
->altivec_size
;
27715 /* Adjust for AltiVec case. */
27716 info
->ehrd_offset
= info
->altivec_save_offset
;
27719 info
->ehrd_offset
= info
->cr_save_offset
;
27721 info
->ehrd_offset
-= ehrd_size
;
27722 info
->lr_save_offset
= reg_size
;
27725 save_align
= (TARGET_ALTIVEC_ABI
|| DEFAULT_ABI
== ABI_DARWIN
) ? 16 : 8;
27726 info
->save_size
= RS6000_ALIGN (info
->fp_size
27728 + info
->altivec_size
27729 + info
->altivec_padding_size
27730 + info
->spe_gp_size
27731 + info
->spe_padding_size
27735 + info
->vrsave_size
,
27738 non_fixed_size
= info
->vars_size
+ info
->parm_size
+ info
->save_size
;
27740 info
->total_size
= RS6000_ALIGN (non_fixed_size
+ info
->fixed_size
,
27741 ABI_STACK_BOUNDARY
/ BITS_PER_UNIT
);
27743 /* Determine if we need to save the link register. */
27745 || ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
27747 && !TARGET_PROFILE_KERNEL
)
27748 || (DEFAULT_ABI
== ABI_V4
&& cfun
->calls_alloca
)
27749 #ifdef TARGET_RELOCATABLE
27750 || (DEFAULT_ABI
== ABI_V4
27751 && (TARGET_RELOCATABLE
|| flag_pic
> 1)
27752 && !constant_pool_empty_p ())
27754 || rs6000_ra_ever_killed ())
27755 info
->lr_save_p
= 1;
27757 using_static_chain_p
= (cfun
->static_chain_decl
!= NULL_TREE
27758 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM
)
27759 && call_used_regs
[STATIC_CHAIN_REGNUM
]);
27760 info
->savres_strategy
= rs6000_savres_strategy (info
, using_static_chain_p
);
27762 if (!(info
->savres_strategy
& SAVE_INLINE_GPRS
)
27763 || !(info
->savres_strategy
& SAVE_INLINE_FPRS
)
27764 || !(info
->savres_strategy
& SAVE_INLINE_VRS
)
27765 || !(info
->savres_strategy
& REST_INLINE_GPRS
)
27766 || !(info
->savres_strategy
& REST_INLINE_FPRS
)
27767 || !(info
->savres_strategy
& REST_INLINE_VRS
))
27768 info
->lr_save_p
= 1;
27770 if (info
->lr_save_p
)
27771 df_set_regs_ever_live (LR_REGNO
, true);
27773 /* Determine if we need to allocate any stack frame:
27775 For AIX we need to push the stack if a frame pointer is needed
27776 (because the stack might be dynamically adjusted), if we are
27777 debugging, if we make calls, or if the sum of fp_save, gp_save,
27778 and local variables are more than the space needed to save all
27779 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
27780 + 18*8 = 288 (GPR13 reserved).
27782 For V.4 we don't have the stack cushion that AIX uses, but assume
27783 that the debugger can handle stackless frames. */
27788 else if (DEFAULT_ABI
== ABI_V4
)
27789 info
->push_p
= non_fixed_size
!= 0;
27791 else if (frame_pointer_needed
)
27794 else if (TARGET_XCOFF
&& write_symbols
!= NO_DEBUG
)
27798 info
->push_p
= non_fixed_size
> (TARGET_32BIT
? 220 : 288);
27803 /* Return true if the current function uses any GPRs in 64-bit SIMD
27807 spe_func_has_64bit_regs_p (void)
27809 rtx_insn
*insns
, *insn
;
27811 /* Functions that save and restore all the call-saved registers will
27812 need to save/restore the registers in 64-bits. */
27813 if (crtl
->calls_eh_return
27814 || cfun
->calls_setjmp
27815 || crtl
->has_nonlocal_goto
)
27818 insns
= get_insns ();
27820 for (insn
= NEXT_INSN (insns
); insn
!= NULL_RTX
; insn
= NEXT_INSN (insn
))
27826 /* FIXME: This should be implemented with attributes...
27828 (set_attr "spe64" "true")....then,
27829 if (get_spe64(insn)) return true;
27831 It's the only reliable way to do the stuff below. */
27833 i
= PATTERN (insn
);
27834 if (GET_CODE (i
) == SET
)
27836 machine_mode mode
= GET_MODE (SET_SRC (i
));
27838 if (SPE_VECTOR_MODE (mode
))
27840 if (TARGET_E500_DOUBLE
27841 && (mode
== DFmode
|| FLOAT128_2REG_P (mode
)))
27851 debug_stack_info (rs6000_stack_t
*info
)
27853 const char *abi_string
;
27856 info
= rs6000_stack_info ();
27858 fprintf (stderr
, "\nStack information for function %s:\n",
27859 ((current_function_decl
&& DECL_NAME (current_function_decl
))
27860 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl
))
27865 default: abi_string
= "Unknown"; break;
27866 case ABI_NONE
: abi_string
= "NONE"; break;
27867 case ABI_AIX
: abi_string
= "AIX"; break;
27868 case ABI_ELFv2
: abi_string
= "ELFv2"; break;
27869 case ABI_DARWIN
: abi_string
= "Darwin"; break;
27870 case ABI_V4
: abi_string
= "V.4"; break;
27873 fprintf (stderr
, "\tABI = %5s\n", abi_string
);
27875 if (TARGET_ALTIVEC_ABI
)
27876 fprintf (stderr
, "\tALTIVEC ABI extensions enabled.\n");
27878 if (TARGET_SPE_ABI
)
27879 fprintf (stderr
, "\tSPE ABI extensions enabled.\n");
27881 if (info
->first_gp_reg_save
!= 32)
27882 fprintf (stderr
, "\tfirst_gp_reg_save = %5d\n", info
->first_gp_reg_save
);
27884 if (info
->first_fp_reg_save
!= 64)
27885 fprintf (stderr
, "\tfirst_fp_reg_save = %5d\n", info
->first_fp_reg_save
);
27887 if (info
->first_altivec_reg_save
<= LAST_ALTIVEC_REGNO
)
27888 fprintf (stderr
, "\tfirst_altivec_reg_save = %5d\n",
27889 info
->first_altivec_reg_save
);
27891 if (info
->lr_save_p
)
27892 fprintf (stderr
, "\tlr_save_p = %5d\n", info
->lr_save_p
);
27894 if (info
->cr_save_p
)
27895 fprintf (stderr
, "\tcr_save_p = %5d\n", info
->cr_save_p
);
27897 if (info
->vrsave_mask
)
27898 fprintf (stderr
, "\tvrsave_mask = 0x%x\n", info
->vrsave_mask
);
27901 fprintf (stderr
, "\tpush_p = %5d\n", info
->push_p
);
27904 fprintf (stderr
, "\tcalls_p = %5d\n", info
->calls_p
);
27907 fprintf (stderr
, "\tgp_save_offset = %5d\n", info
->gp_save_offset
);
27910 fprintf (stderr
, "\tfp_save_offset = %5d\n", info
->fp_save_offset
);
27912 if (info
->altivec_size
)
27913 fprintf (stderr
, "\taltivec_save_offset = %5d\n",
27914 info
->altivec_save_offset
);
27916 if (info
->spe_gp_size
)
27917 fprintf (stderr
, "\tspe_gp_save_offset = %5d\n",
27918 info
->spe_gp_save_offset
);
27920 if (info
->vrsave_size
)
27921 fprintf (stderr
, "\tvrsave_save_offset = %5d\n",
27922 info
->vrsave_save_offset
);
27924 if (info
->lr_save_p
)
27925 fprintf (stderr
, "\tlr_save_offset = %5d\n", info
->lr_save_offset
);
27927 if (info
->cr_save_p
)
27928 fprintf (stderr
, "\tcr_save_offset = %5d\n", info
->cr_save_offset
);
27930 if (info
->varargs_save_offset
)
27931 fprintf (stderr
, "\tvarargs_save_offset = %5d\n", info
->varargs_save_offset
);
27933 if (info
->total_size
)
27934 fprintf (stderr
, "\ttotal_size = " HOST_WIDE_INT_PRINT_DEC
"\n",
27937 if (info
->vars_size
)
27938 fprintf (stderr
, "\tvars_size = " HOST_WIDE_INT_PRINT_DEC
"\n",
27941 if (info
->parm_size
)
27942 fprintf (stderr
, "\tparm_size = %5d\n", info
->parm_size
);
27944 if (info
->fixed_size
)
27945 fprintf (stderr
, "\tfixed_size = %5d\n", info
->fixed_size
);
27948 fprintf (stderr
, "\tgp_size = %5d\n", info
->gp_size
);
27950 if (info
->spe_gp_size
)
27951 fprintf (stderr
, "\tspe_gp_size = %5d\n", info
->spe_gp_size
);
27954 fprintf (stderr
, "\tfp_size = %5d\n", info
->fp_size
);
27956 if (info
->altivec_size
)
27957 fprintf (stderr
, "\taltivec_size = %5d\n", info
->altivec_size
);
27959 if (info
->vrsave_size
)
27960 fprintf (stderr
, "\tvrsave_size = %5d\n", info
->vrsave_size
);
27962 if (info
->altivec_padding_size
)
27963 fprintf (stderr
, "\taltivec_padding_size= %5d\n",
27964 info
->altivec_padding_size
);
27966 if (info
->spe_padding_size
)
27967 fprintf (stderr
, "\tspe_padding_size = %5d\n",
27968 info
->spe_padding_size
);
27971 fprintf (stderr
, "\tcr_size = %5d\n", info
->cr_size
);
27973 if (info
->save_size
)
27974 fprintf (stderr
, "\tsave_size = %5d\n", info
->save_size
);
27976 if (info
->reg_size
!= 4)
27977 fprintf (stderr
, "\treg_size = %5d\n", info
->reg_size
);
27979 fprintf (stderr
, "\tsave-strategy = %04x\n", info
->savres_strategy
);
27981 fprintf (stderr
, "\n");
27985 rs6000_return_addr (int count
, rtx frame
)
27987 /* Currently we don't optimize very well between prolog and body
27988 code and for PIC code the code can be actually quite bad, so
27989 don't try to be too clever here. */
27991 || ((DEFAULT_ABI
== ABI_V4
|| DEFAULT_ABI
== ABI_DARWIN
) && flag_pic
))
27993 cfun
->machine
->ra_needs_full_frame
= 1;
28000 plus_constant (Pmode
,
28002 (gen_rtx_MEM (Pmode
,
28003 memory_address (Pmode
, frame
))),
28004 RETURN_ADDRESS_OFFSET
)));
28007 cfun
->machine
->ra_need_lr
= 1;
28008 return get_hard_reg_initial_val (Pmode
, LR_REGNO
);
28011 /* Say whether a function is a candidate for sibcall handling or not. */
28014 rs6000_function_ok_for_sibcall (tree decl
, tree exp
)
28019 fntype
= TREE_TYPE (decl
);
28021 fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
28023 /* We can't do it if the called function has more vector parameters
28024 than the current function; there's nowhere to put the VRsave code. */
28025 if (TARGET_ALTIVEC_ABI
28026 && TARGET_ALTIVEC_VRSAVE
28027 && !(decl
&& decl
== current_function_decl
))
28029 function_args_iterator args_iter
;
28033 /* Functions with vector parameters are required to have a
28034 prototype, so the argument type info must be available
28036 FOREACH_FUNCTION_ARGS(fntype
, type
, args_iter
)
28037 if (TREE_CODE (type
) == VECTOR_TYPE
28038 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type
)))
28041 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl
), type
, args_iter
)
28042 if (TREE_CODE (type
) == VECTOR_TYPE
28043 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type
)))
28050 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
28051 functions, because the callee may have a different TOC pointer to
28052 the caller and there's no way to ensure we restore the TOC when
28053 we return. With the secure-plt SYSV ABI we can't make non-local
28054 calls when -fpic/PIC because the plt call stubs use r30. */
28055 if (DEFAULT_ABI
== ABI_DARWIN
28056 || ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
28058 && !DECL_EXTERNAL (decl
)
28059 && !DECL_WEAK (decl
)
28060 && (*targetm
.binds_local_p
) (decl
))
28061 || (DEFAULT_ABI
== ABI_V4
28062 && (!TARGET_SECURE_PLT
28065 && (*targetm
.binds_local_p
) (decl
)))))
28067 tree attr_list
= TYPE_ATTRIBUTES (fntype
);
28069 if (!lookup_attribute ("longcall", attr_list
)
28070 || lookup_attribute ("shortcall", attr_list
))
28078 rs6000_ra_ever_killed (void)
28084 if (cfun
->is_thunk
)
28087 if (cfun
->machine
->lr_save_state
)
28088 return cfun
->machine
->lr_save_state
- 1;
28090 /* regs_ever_live has LR marked as used if any sibcalls are present,
28091 but this should not force saving and restoring in the
28092 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
28093 clobbers LR, so that is inappropriate. */
28095 /* Also, the prologue can generate a store into LR that
28096 doesn't really count, like this:
28099 bcl to set PIC register
28103 When we're called from the epilogue, we need to avoid counting
28104 this as a store. */
28106 push_topmost_sequence ();
28107 top
= get_insns ();
28108 pop_topmost_sequence ();
28109 reg
= gen_rtx_REG (Pmode
, LR_REGNO
);
28111 for (insn
= NEXT_INSN (top
); insn
!= NULL_RTX
; insn
= NEXT_INSN (insn
))
28117 if (!SIBLING_CALL_P (insn
))
28120 else if (find_regno_note (insn
, REG_INC
, LR_REGNO
))
28122 else if (set_of (reg
, insn
) != NULL_RTX
28123 && !prologue_epilogue_contains (insn
))
28130 /* Emit instructions needed to load the TOC register.
28131 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
28132 a constant pool; or for SVR4 -fpic. */
28135 rs6000_emit_load_toc_table (int fromprolog
)
28138 dest
= gen_rtx_REG (Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
28140 if (TARGET_ELF
&& TARGET_SECURE_PLT
&& DEFAULT_ABI
== ABI_V4
&& flag_pic
)
28143 rtx lab
, tmp1
, tmp2
, got
;
28145 lab
= gen_label_rtx ();
28146 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (lab
));
28147 lab
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (buf
));
28150 got
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (toc_label_name
));
28154 got
= rs6000_got_sym ();
28155 tmp1
= tmp2
= dest
;
28158 tmp1
= gen_reg_rtx (Pmode
);
28159 tmp2
= gen_reg_rtx (Pmode
);
28161 emit_insn (gen_load_toc_v4_PIC_1 (lab
));
28162 emit_move_insn (tmp1
, gen_rtx_REG (Pmode
, LR_REGNO
));
28163 emit_insn (gen_load_toc_v4_PIC_3b (tmp2
, tmp1
, got
, lab
));
28164 emit_insn (gen_load_toc_v4_PIC_3c (dest
, tmp2
, got
, lab
));
28166 else if (TARGET_ELF
&& DEFAULT_ABI
== ABI_V4
&& flag_pic
== 1)
28168 emit_insn (gen_load_toc_v4_pic_si ());
28169 emit_move_insn (dest
, gen_rtx_REG (Pmode
, LR_REGNO
));
28171 else if (TARGET_ELF
&& DEFAULT_ABI
== ABI_V4
&& flag_pic
== 2)
28174 rtx temp0
= (fromprolog
28175 ? gen_rtx_REG (Pmode
, 0)
28176 : gen_reg_rtx (Pmode
));
28182 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
28183 symF
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (buf
));
28185 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCL", rs6000_pic_labelno
);
28186 symL
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (buf
));
28188 emit_insn (gen_load_toc_v4_PIC_1 (symF
));
28189 emit_move_insn (dest
, gen_rtx_REG (Pmode
, LR_REGNO
));
28190 emit_insn (gen_load_toc_v4_PIC_2 (temp0
, dest
, symL
, symF
));
28196 tocsym
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (toc_label_name
));
28198 lab
= gen_label_rtx ();
28199 emit_insn (gen_load_toc_v4_PIC_1b (tocsym
, lab
));
28200 emit_move_insn (dest
, gen_rtx_REG (Pmode
, LR_REGNO
));
28201 if (TARGET_LINK_STACK
)
28202 emit_insn (gen_addsi3 (dest
, dest
, GEN_INT (4)));
28203 emit_move_insn (temp0
, gen_rtx_MEM (Pmode
, dest
));
28205 emit_insn (gen_addsi3 (dest
, temp0
, dest
));
28207 else if (TARGET_ELF
&& !TARGET_AIX
&& flag_pic
== 0 && TARGET_MINIMAL_TOC
)
28209 /* This is for AIX code running in non-PIC ELF32. */
28210 rtx realsym
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (toc_label_name
));
28213 emit_insn (gen_elf_high (dest
, realsym
));
28214 emit_insn (gen_elf_low (dest
, dest
, realsym
));
28218 gcc_assert (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
);
28221 emit_insn (gen_load_toc_aix_si (dest
));
28223 emit_insn (gen_load_toc_aix_di (dest
));
28227 /* Emit instructions to restore the link register after determining where
28228 its value has been stored. */
28231 rs6000_emit_eh_reg_restore (rtx source
, rtx scratch
)
28233 rs6000_stack_t
*info
= rs6000_stack_info ();
28236 operands
[0] = source
;
28237 operands
[1] = scratch
;
28239 if (info
->lr_save_p
)
28241 rtx frame_rtx
= stack_pointer_rtx
;
28242 HOST_WIDE_INT sp_offset
= 0;
28245 if (frame_pointer_needed
28246 || cfun
->calls_alloca
28247 || info
->total_size
> 32767)
28249 tmp
= gen_frame_mem (Pmode
, frame_rtx
);
28250 emit_move_insn (operands
[1], tmp
);
28251 frame_rtx
= operands
[1];
28253 else if (info
->push_p
)
28254 sp_offset
= info
->total_size
;
28256 tmp
= plus_constant (Pmode
, frame_rtx
,
28257 info
->lr_save_offset
+ sp_offset
);
28258 tmp
= gen_frame_mem (Pmode
, tmp
);
28259 emit_move_insn (tmp
, operands
[0]);
28262 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNO
), operands
[0]);
28264 /* Freeze lr_save_p. We've just emitted rtl that depends on the
28265 state of lr_save_p so any change from here on would be a bug. In
28266 particular, stop rs6000_ra_ever_killed from considering the SET
28267 of lr we may have added just above. */
28268 cfun
->machine
->lr_save_state
= info
->lr_save_p
+ 1;
28271 static GTY(()) alias_set_type set
= -1;
28274 get_TOC_alias_set (void)
28277 set
= new_alias_set ();
28281 /* This returns nonzero if the current function uses the TOC. This is
28282 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
28283 is generated by the ABI_V4 load_toc_* patterns. */
28290 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
28293 rtx pat
= PATTERN (insn
);
28296 if (GET_CODE (pat
) == PARALLEL
)
28297 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
28299 rtx sub
= XVECEXP (pat
, 0, i
);
28300 if (GET_CODE (sub
) == USE
)
28302 sub
= XEXP (sub
, 0);
28303 if (GET_CODE (sub
) == UNSPEC
28304 && XINT (sub
, 1) == UNSPEC_TOC
)
28314 create_TOC_reference (rtx symbol
, rtx largetoc_reg
)
28316 rtx tocrel
, tocreg
, hi
;
28318 if (TARGET_DEBUG_ADDR
)
28320 if (GET_CODE (symbol
) == SYMBOL_REF
)
28321 fprintf (stderr
, "\ncreate_TOC_reference, (symbol_ref %s)\n",
28325 fprintf (stderr
, "\ncreate_TOC_reference, code %s:\n",
28326 GET_RTX_NAME (GET_CODE (symbol
)));
28327 debug_rtx (symbol
);
28331 if (!can_create_pseudo_p ())
28332 df_set_regs_ever_live (TOC_REGISTER
, true);
28334 tocreg
= gen_rtx_REG (Pmode
, TOC_REGISTER
);
28335 tocrel
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, symbol
, tocreg
), UNSPEC_TOCREL
);
28336 if (TARGET_CMODEL
== CMODEL_SMALL
|| can_create_pseudo_p ())
28339 hi
= gen_rtx_HIGH (Pmode
, copy_rtx (tocrel
));
28340 if (largetoc_reg
!= NULL
)
28342 emit_move_insn (largetoc_reg
, hi
);
28345 return gen_rtx_LO_SUM (Pmode
, hi
, tocrel
);
28348 /* Issue assembly directives that create a reference to the given DWARF
28349 FRAME_TABLE_LABEL from the current function section. */
28351 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label
)
28353 fprintf (asm_out_file
, "\t.ref %s\n",
28354 (* targetm
.strip_name_encoding
) (frame_table_label
));
28357 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
28358 and the change to the stack pointer. */
28361 rs6000_emit_stack_tie (rtx fp
, bool hard_frame_needed
)
28368 regs
[i
++] = gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
28369 if (hard_frame_needed
)
28370 regs
[i
++] = gen_rtx_REG (Pmode
, HARD_FRAME_POINTER_REGNUM
);
28371 if (!(REGNO (fp
) == STACK_POINTER_REGNUM
28372 || (hard_frame_needed
28373 && REGNO (fp
) == HARD_FRAME_POINTER_REGNUM
)))
28376 p
= rtvec_alloc (i
);
28379 rtx mem
= gen_frame_mem (BLKmode
, regs
[i
]);
28380 RTVEC_ELT (p
, i
) = gen_rtx_SET (mem
, const0_rtx
);
28383 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode
, p
)));
28386 /* Emit the correct code for allocating stack space, as insns.
28387 If COPY_REG, make sure a copy of the old frame is left there.
28388 The generated code may use hard register 0 as a temporary. */
28391 rs6000_emit_allocate_stack (HOST_WIDE_INT size
, rtx copy_reg
, int copy_off
)
28394 rtx stack_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
28395 rtx tmp_reg
= gen_rtx_REG (Pmode
, 0);
28396 rtx todec
= gen_int_mode (-size
, Pmode
);
28399 if (INTVAL (todec
) != -size
)
28401 warning (0, "stack frame too large");
28402 emit_insn (gen_trap ());
28406 if (crtl
->limit_stack
)
28408 if (REG_P (stack_limit_rtx
)
28409 && REGNO (stack_limit_rtx
) > 1
28410 && REGNO (stack_limit_rtx
) <= 31)
28412 emit_insn (gen_add3_insn (tmp_reg
, stack_limit_rtx
, GEN_INT (size
)));
28413 emit_insn (gen_cond_trap (LTU
, stack_reg
, tmp_reg
,
28416 else if (GET_CODE (stack_limit_rtx
) == SYMBOL_REF
28418 && DEFAULT_ABI
== ABI_V4
28421 rtx toload
= gen_rtx_CONST (VOIDmode
,
28422 gen_rtx_PLUS (Pmode
,
28426 emit_insn (gen_elf_high (tmp_reg
, toload
));
28427 emit_insn (gen_elf_low (tmp_reg
, tmp_reg
, toload
));
28428 emit_insn (gen_cond_trap (LTU
, stack_reg
, tmp_reg
,
28432 warning (0, "stack limit expression is not supported");
28438 emit_insn (gen_add3_insn (copy_reg
, stack_reg
, GEN_INT (copy_off
)));
28440 emit_move_insn (copy_reg
, stack_reg
);
28445 /* Need a note here so that try_split doesn't get confused. */
28446 if (get_last_insn () == NULL_RTX
)
28447 emit_note (NOTE_INSN_DELETED
);
28448 insn
= emit_move_insn (tmp_reg
, todec
);
28449 try_split (PATTERN (insn
), insn
, 0);
28453 insn
= emit_insn (TARGET_32BIT
28454 ? gen_movsi_update_stack (stack_reg
, stack_reg
,
28456 : gen_movdi_di_update_stack (stack_reg
, stack_reg
,
28457 todec
, stack_reg
));
28458 /* Since we didn't use gen_frame_mem to generate the MEM, grab
28459 it now and set the alias set/attributes. The above gen_*_update
28460 calls will generate a PARALLEL with the MEM set being the first
28462 par
= PATTERN (insn
);
28463 gcc_assert (GET_CODE (par
) == PARALLEL
);
28464 set
= XVECEXP (par
, 0, 0);
28465 gcc_assert (GET_CODE (set
) == SET
);
28466 mem
= SET_DEST (set
);
28467 gcc_assert (MEM_P (mem
));
28468 MEM_NOTRAP_P (mem
) = 1;
28469 set_mem_alias_set (mem
, get_frame_alias_set ());
28471 RTX_FRAME_RELATED_P (insn
) = 1;
28472 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
28473 gen_rtx_SET (stack_reg
, gen_rtx_PLUS (Pmode
, stack_reg
,
28474 GEN_INT (-size
))));
28478 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
28480 #if PROBE_INTERVAL > 32768
28481 #error Cannot use indexed addressing mode for stack probing
28484 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
28485 inclusive. These are offsets from the current stack pointer. */
28488 rs6000_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
28490 /* See if we have a constant small number of probes to generate. If so,
28491 that's the easy case. */
28492 if (first
+ size
<= 32768)
28496 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
28497 it exceeds SIZE. If only one probe is needed, this will not
28498 generate any code. Then probe at FIRST + SIZE. */
28499 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
28500 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
28503 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
28507 /* Otherwise, do the same as above, but in a loop. Note that we must be
28508 extra careful with variables wrapping around because we might be at
28509 the very top (or the very bottom) of the address space and we have
28510 to be able to handle this case properly; in particular, we use an
28511 equality test for the loop condition. */
28514 HOST_WIDE_INT rounded_size
;
28515 rtx r12
= gen_rtx_REG (Pmode
, 12);
28516 rtx r0
= gen_rtx_REG (Pmode
, 0);
28518 /* Sanity check for the addressing mode we're going to use. */
28519 gcc_assert (first
<= 32768);
28521 /* Step 1: round SIZE to the previous multiple of the interval. */
28523 rounded_size
= ROUND_DOWN (size
, PROBE_INTERVAL
);
28526 /* Step 2: compute initial and final value of the loop counter. */
28528 /* TEST_ADDR = SP + FIRST. */
28529 emit_insn (gen_rtx_SET (r12
, plus_constant (Pmode
, stack_pointer_rtx
,
28532 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
28533 if (rounded_size
> 32768)
28535 emit_move_insn (r0
, GEN_INT (-rounded_size
));
28536 emit_insn (gen_rtx_SET (r0
, gen_rtx_PLUS (Pmode
, r12
, r0
)));
28539 emit_insn (gen_rtx_SET (r0
, plus_constant (Pmode
, r12
,
28543 /* Step 3: the loop
28547 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
28550 while (TEST_ADDR != LAST_ADDR)
28552 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
28553 until it is equal to ROUNDED_SIZE. */
28556 emit_insn (gen_probe_stack_rangedi (r12
, r12
, r0
));
28558 emit_insn (gen_probe_stack_rangesi (r12
, r12
, r0
));
28561 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
28562 that SIZE is equal to ROUNDED_SIZE. */
28564 if (size
!= rounded_size
)
28565 emit_stack_probe (plus_constant (Pmode
, r12
, rounded_size
- size
));
28569 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
28570 absolute addresses. */
28573 output_probe_stack_range (rtx reg1
, rtx reg2
)
28575 static int labelno
= 0;
28579 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
28582 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
28584 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
28586 xops
[1] = GEN_INT (-PROBE_INTERVAL
);
28587 output_asm_insn ("addi %0,%0,%1", xops
);
28589 /* Probe at TEST_ADDR. */
28590 xops
[1] = gen_rtx_REG (Pmode
, 0);
28591 output_asm_insn ("stw %1,0(%0)", xops
);
28593 /* Test if TEST_ADDR == LAST_ADDR. */
28596 output_asm_insn ("cmpd 0,%0,%1", xops
);
28598 output_asm_insn ("cmpw 0,%0,%1", xops
);
28601 fputs ("\tbne 0,", asm_out_file
);
28602 assemble_name_raw (asm_out_file
, loop_lab
);
28603 fputc ('\n', asm_out_file
);
28608 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
28609 with (plus:P (reg 1) VAL), and with REG2 replaced with REPL2 if REG2
28610 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
28611 deduce these equivalences by itself so it wasn't necessary to hold
28612 its hand so much. Don't be tempted to always supply d2_f_d_e with
28613 the actual cfa register, ie. r31 when we are using a hard frame
28614 pointer. That fails when saving regs off r1, and sched moves the
28615 r31 setup past the reg saves. */
28618 rs6000_frame_related (rtx_insn
*insn
, rtx reg
, HOST_WIDE_INT val
,
28619 rtx reg2
, rtx repl2
)
28623 if (REGNO (reg
) == STACK_POINTER_REGNUM
)
28625 gcc_checking_assert (val
== 0);
28629 repl
= gen_rtx_PLUS (Pmode
, gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
),
28632 rtx pat
= PATTERN (insn
);
28633 if (!repl
&& !reg2
)
28635 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
28636 if (GET_CODE (pat
) == PARALLEL
)
28637 for (int i
= 0; i
< XVECLEN (pat
, 0); i
++)
28638 if (GET_CODE (XVECEXP (pat
, 0, i
)) == SET
)
28640 rtx set
= XVECEXP (pat
, 0, i
);
28642 /* If this PARALLEL has been emitted for out-of-line
28643 register save functions, or store multiple, then omit
28644 eh_frame info for any user-defined global regs. If
28645 eh_frame info is supplied, frame unwinding will
28646 restore a user reg. */
28647 if (!REG_P (SET_SRC (set
))
28648 || !fixed_reg_p (REGNO (SET_SRC (set
))))
28649 RTX_FRAME_RELATED_P (set
) = 1;
28651 RTX_FRAME_RELATED_P (insn
) = 1;
28655 /* We expect that 'pat' is either a SET or a PARALLEL containing
28656 SETs (and possibly other stuff). In a PARALLEL, all the SETs
28657 are important so they all have to be marked RTX_FRAME_RELATED_P.
28658 Call simplify_replace_rtx on the SETs rather than the whole insn
28659 so as to leave the other stuff alone (for example USE of r12). */
28661 set_used_flags (pat
);
28662 if (GET_CODE (pat
) == SET
)
28665 pat
= simplify_replace_rtx (pat
, reg
, repl
);
28667 pat
= simplify_replace_rtx (pat
, reg2
, repl2
);
28669 else if (GET_CODE (pat
) == PARALLEL
)
28671 pat
= shallow_copy_rtx (pat
);
28672 XVEC (pat
, 0) = shallow_copy_rtvec (XVEC (pat
, 0));
28674 for (int i
= 0; i
< XVECLEN (pat
, 0); i
++)
28675 if (GET_CODE (XVECEXP (pat
, 0, i
)) == SET
)
28677 rtx set
= XVECEXP (pat
, 0, i
);
28680 set
= simplify_replace_rtx (set
, reg
, repl
);
28682 set
= simplify_replace_rtx (set
, reg2
, repl2
);
28683 XVECEXP (pat
, 0, i
) = set
;
28685 /* Omit eh_frame info for any user-defined global regs. */
28686 if (!REG_P (SET_SRC (set
))
28687 || !fixed_reg_p (REGNO (SET_SRC (set
))))
28688 RTX_FRAME_RELATED_P (set
) = 1;
28692 gcc_unreachable ();
28694 RTX_FRAME_RELATED_P (insn
) = 1;
28695 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, copy_rtx_if_shared (pat
));
28700 /* Returns an insn that has a vrsave set operation with the
28701 appropriate CLOBBERs. */
28704 generate_set_vrsave (rtx reg
, rs6000_stack_t
*info
, int epiloguep
)
28707 rtx insn
, clobs
[TOTAL_ALTIVEC_REGS
+ 1];
28708 rtx vrsave
= gen_rtx_REG (SImode
, VRSAVE_REGNO
);
28711 = gen_rtx_SET (vrsave
,
28712 gen_rtx_UNSPEC_VOLATILE (SImode
,
28713 gen_rtvec (2, reg
, vrsave
),
28714 UNSPECV_SET_VRSAVE
));
28718 /* We need to clobber the registers in the mask so the scheduler
28719 does not move sets to VRSAVE before sets of AltiVec registers.
28721 However, if the function receives nonlocal gotos, reload will set
28722 all call saved registers live. We will end up with:
28724 (set (reg 999) (mem))
28725 (parallel [ (set (reg vrsave) (unspec blah))
28726 (clobber (reg 999))])
28728 The clobber will cause the store into reg 999 to be dead, and
28729 flow will attempt to delete an epilogue insn. In this case, we
28730 need an unspec use/set of the register. */
28732 for (i
= FIRST_ALTIVEC_REGNO
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
28733 if (info
->vrsave_mask
& ALTIVEC_REG_BIT (i
))
28735 if (!epiloguep
|| call_used_regs
[i
])
28736 clobs
[nclobs
++] = gen_rtx_CLOBBER (VOIDmode
,
28737 gen_rtx_REG (V4SImode
, i
));
28740 rtx reg
= gen_rtx_REG (V4SImode
, i
);
28743 = gen_rtx_SET (reg
,
28744 gen_rtx_UNSPEC (V4SImode
,
28745 gen_rtvec (1, reg
), 27));
28749 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (nclobs
));
28751 for (i
= 0; i
< nclobs
; ++i
)
28752 XVECEXP (insn
, 0, i
) = clobs
[i
];
28758 gen_frame_set (rtx reg
, rtx frame_reg
, int offset
, bool store
)
28762 addr
= gen_rtx_PLUS (Pmode
, frame_reg
, GEN_INT (offset
));
28763 mem
= gen_frame_mem (GET_MODE (reg
), addr
);
28764 return gen_rtx_SET (store
? mem
: reg
, store
? reg
: mem
);
28768 gen_frame_load (rtx reg
, rtx frame_reg
, int offset
)
28770 return gen_frame_set (reg
, frame_reg
, offset
, false);
28774 gen_frame_store (rtx reg
, rtx frame_reg
, int offset
)
28776 return gen_frame_set (reg
, frame_reg
, offset
, true);
28779 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
28780 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
28783 emit_frame_save (rtx frame_reg
, machine_mode mode
,
28784 unsigned int regno
, int offset
, HOST_WIDE_INT frame_reg_to_sp
)
28788 /* Some cases that need register indexed addressing. */
28789 gcc_checking_assert (!((TARGET_ALTIVEC_ABI
&& ALTIVEC_VECTOR_MODE (mode
))
28790 || (TARGET_VSX
&& ALTIVEC_OR_VSX_VECTOR_MODE (mode
))
28791 || (TARGET_E500_DOUBLE
&& mode
== DFmode
)
28793 && SPE_VECTOR_MODE (mode
)
28794 && !SPE_CONST_OFFSET_OK (offset
))));
28796 reg
= gen_rtx_REG (mode
, regno
);
28797 rtx_insn
*insn
= emit_insn (gen_frame_store (reg
, frame_reg
, offset
));
28798 return rs6000_frame_related (insn
, frame_reg
, frame_reg_to_sp
,
28799 NULL_RTX
, NULL_RTX
);
28802 /* Emit an offset memory reference suitable for a frame store, while
28803 converting to a valid addressing mode. */
28806 gen_frame_mem_offset (machine_mode mode
, rtx reg
, int offset
)
28808 rtx int_rtx
, offset_rtx
;
28810 int_rtx
= GEN_INT (offset
);
28812 if ((TARGET_SPE_ABI
&& SPE_VECTOR_MODE (mode
) && !SPE_CONST_OFFSET_OK (offset
))
28813 || (TARGET_E500_DOUBLE
&& mode
== DFmode
))
28815 offset_rtx
= gen_rtx_REG (Pmode
, FIXED_SCRATCH
);
28816 emit_move_insn (offset_rtx
, int_rtx
);
28819 offset_rtx
= int_rtx
;
28821 return gen_frame_mem (mode
, gen_rtx_PLUS (Pmode
, reg
, offset_rtx
));
28824 #ifndef TARGET_FIX_AND_CONTINUE
28825 #define TARGET_FIX_AND_CONTINUE 0
28828 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
28829 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
28830 #define LAST_SAVRES_REGISTER 31
28831 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
28842 static GTY(()) rtx savres_routine_syms
[N_SAVRES_REGISTERS
][12];
28844 /* Temporary holding space for an out-of-line register save/restore
28846 static char savres_routine_name
[30];
28848 /* Return the name for an out-of-line register save/restore routine.
28849 We are saving/restoring GPRs if GPR is true. */
28852 rs6000_savres_routine_name (rs6000_stack_t
*info
, int regno
, int sel
)
28854 const char *prefix
= "";
28855 const char *suffix
= "";
28857 /* Different targets are supposed to define
28858 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
28859 routine name could be defined with:
28861 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
28863 This is a nice idea in practice, but in reality, things are
28864 complicated in several ways:
28866 - ELF targets have save/restore routines for GPRs.
28868 - SPE targets use different prefixes for 32/64-bit registers, and
28869 neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
28871 - PPC64 ELF targets have routines for save/restore of GPRs that
28872 differ in what they do with the link register, so having a set
28873 prefix doesn't work. (We only use one of the save routines at
28874 the moment, though.)
28876 - PPC32 elf targets have "exit" versions of the restore routines
28877 that restore the link register and can save some extra space.
28878 These require an extra suffix. (There are also "tail" versions
28879 of the restore routines and "GOT" versions of the save routines,
28880 but we don't generate those at present. Same problems apply,
28883 We deal with all this by synthesizing our own prefix/suffix and
28884 using that for the simple sprintf call shown above. */
28887 /* No floating point saves on the SPE. */
28888 gcc_assert ((sel
& SAVRES_REG
) == SAVRES_GPR
);
28890 if ((sel
& SAVRES_SAVE
))
28891 prefix
= info
->spe_64bit_regs_used
? "_save64gpr_" : "_save32gpr_";
28893 prefix
= info
->spe_64bit_regs_used
? "_rest64gpr_" : "_rest32gpr_";
28895 if ((sel
& SAVRES_LR
))
28898 else if (DEFAULT_ABI
== ABI_V4
)
28903 if ((sel
& SAVRES_REG
) == SAVRES_GPR
)
28904 prefix
= (sel
& SAVRES_SAVE
) ? "_savegpr_" : "_restgpr_";
28905 else if ((sel
& SAVRES_REG
) == SAVRES_FPR
)
28906 prefix
= (sel
& SAVRES_SAVE
) ? "_savefpr_" : "_restfpr_";
28907 else if ((sel
& SAVRES_REG
) == SAVRES_VR
)
28908 prefix
= (sel
& SAVRES_SAVE
) ? "_savevr_" : "_restvr_";
28912 if ((sel
& SAVRES_LR
))
28915 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
28917 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
28918 /* No out-of-line save/restore routines for GPRs on AIX. */
28919 gcc_assert (!TARGET_AIX
|| (sel
& SAVRES_REG
) != SAVRES_GPR
);
28923 if ((sel
& SAVRES_REG
) == SAVRES_GPR
)
28924 prefix
= ((sel
& SAVRES_SAVE
)
28925 ? ((sel
& SAVRES_LR
) ? "_savegpr0_" : "_savegpr1_")
28926 : ((sel
& SAVRES_LR
) ? "_restgpr0_" : "_restgpr1_"));
28927 else if ((sel
& SAVRES_REG
) == SAVRES_FPR
)
28929 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
28930 if ((sel
& SAVRES_LR
))
28931 prefix
= ((sel
& SAVRES_SAVE
) ? "_savefpr_" : "_restfpr_");
28935 prefix
= (sel
& SAVRES_SAVE
) ? SAVE_FP_PREFIX
: RESTORE_FP_PREFIX
;
28936 suffix
= (sel
& SAVRES_SAVE
) ? SAVE_FP_SUFFIX
: RESTORE_FP_SUFFIX
;
28939 else if ((sel
& SAVRES_REG
) == SAVRES_VR
)
28940 prefix
= (sel
& SAVRES_SAVE
) ? "_savevr_" : "_restvr_";
28945 if (DEFAULT_ABI
== ABI_DARWIN
)
28947 /* The Darwin approach is (slightly) different, in order to be
28948 compatible with code generated by the system toolchain. There is a
28949 single symbol for the start of save sequence, and the code here
28950 embeds an offset into that code on the basis of the first register
28952 prefix
= (sel
& SAVRES_SAVE
) ? "save" : "rest" ;
28953 if ((sel
& SAVRES_REG
) == SAVRES_GPR
)
28954 sprintf (savres_routine_name
, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix
,
28955 ((sel
& SAVRES_LR
) ? "x" : ""), (regno
== 13 ? "" : "+"),
28956 (regno
- 13) * 4, prefix
, regno
);
28957 else if ((sel
& SAVRES_REG
) == SAVRES_FPR
)
28958 sprintf (savres_routine_name
, "*%sFP%s%.0d ; %s f%d-f31", prefix
,
28959 (regno
== 14 ? "" : "+"), (regno
- 14) * 4, prefix
, regno
);
28960 else if ((sel
& SAVRES_REG
) == SAVRES_VR
)
28961 sprintf (savres_routine_name
, "*%sVEC%s%.0d ; %s v%d-v31", prefix
,
28962 (regno
== 20 ? "" : "+"), (regno
- 20) * 8, prefix
, regno
);
28967 sprintf (savres_routine_name
, "%s%d%s", prefix
, regno
, suffix
);
28969 return savres_routine_name
;
28972 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
28973 We are saving/restoring GPRs if GPR is true. */
28976 rs6000_savres_routine_sym (rs6000_stack_t
*info
, int sel
)
28978 int regno
= ((sel
& SAVRES_REG
) == SAVRES_GPR
28979 ? info
->first_gp_reg_save
28980 : (sel
& SAVRES_REG
) == SAVRES_FPR
28981 ? info
->first_fp_reg_save
- 32
28982 : (sel
& SAVRES_REG
) == SAVRES_VR
28983 ? info
->first_altivec_reg_save
- FIRST_ALTIVEC_REGNO
28988 /* On the SPE, we never have any FPRs, but we do have 32/64-bit
28989 versions of the gpr routines. */
28990 if (TARGET_SPE_ABI
&& (sel
& SAVRES_REG
) == SAVRES_GPR
28991 && info
->spe_64bit_regs_used
)
28992 select
^= SAVRES_FPR
^ SAVRES_GPR
;
28994 /* Don't generate bogus routine names. */
28995 gcc_assert (FIRST_SAVRES_REGISTER
<= regno
28996 && regno
<= LAST_SAVRES_REGISTER
28997 && select
>= 0 && select
<= 12);
28999 sym
= savres_routine_syms
[regno
-FIRST_SAVRES_REGISTER
][select
];
29005 name
= rs6000_savres_routine_name (info
, regno
, sel
);
29007 sym
= savres_routine_syms
[regno
-FIRST_SAVRES_REGISTER
][select
]
29008 = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
29009 SYMBOL_REF_FLAGS (sym
) |= SYMBOL_FLAG_FUNCTION
;
29015 /* Emit a sequence of insns, including a stack tie if needed, for
29016 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
29017 reset the stack pointer, but move the base of the frame into
29018 reg UPDT_REGNO for use by out-of-line register restore routines. */
29021 rs6000_emit_stack_reset (rs6000_stack_t
*info
,
29022 rtx frame_reg_rtx
, HOST_WIDE_INT frame_off
,
29023 unsigned updt_regno
)
29025 /* If there is nothing to do, don't do anything. */
29026 if (frame_off
== 0 && REGNO (frame_reg_rtx
) == updt_regno
)
29029 rtx updt_reg_rtx
= gen_rtx_REG (Pmode
, updt_regno
);
29031 /* This blockage is needed so that sched doesn't decide to move
29032 the sp change before the register restores. */
29033 if (DEFAULT_ABI
== ABI_V4
29035 && info
->spe_64bit_regs_used
!= 0
29036 && info
->first_gp_reg_save
!= 32))
29037 return emit_insn (gen_stack_restore_tie (updt_reg_rtx
, frame_reg_rtx
,
29038 GEN_INT (frame_off
)));
29040 /* If we are restoring registers out-of-line, we will be using the
29041 "exit" variants of the restore routines, which will reset the
29042 stack for us. But we do need to point updt_reg into the
29043 right place for those routines. */
29044 if (frame_off
!= 0)
29045 return emit_insn (gen_add3_insn (updt_reg_rtx
,
29046 frame_reg_rtx
, GEN_INT (frame_off
)));
29048 return emit_move_insn (updt_reg_rtx
, frame_reg_rtx
);
29053 /* Return the register number used as a pointer by out-of-line
29054 save/restore functions. */
29056 static inline unsigned
29057 ptr_regno_for_savres (int sel
)
29059 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
29060 return (sel
& SAVRES_REG
) == SAVRES_FPR
|| (sel
& SAVRES_LR
) ? 1 : 12;
29061 return DEFAULT_ABI
== ABI_DARWIN
&& (sel
& SAVRES_REG
) == SAVRES_FPR
? 1 : 11;
29064 /* Construct a parallel rtx describing the effect of a call to an
29065 out-of-line register save/restore routine, and emit the insn
29066 or jump_insn as appropriate. */
29069 rs6000_emit_savres_rtx (rs6000_stack_t
*info
,
29070 rtx frame_reg_rtx
, int save_area_offset
, int lr_offset
,
29071 machine_mode reg_mode
, int sel
)
29074 int offset
, start_reg
, end_reg
, n_regs
, use_reg
;
29075 int reg_size
= GET_MODE_SIZE (reg_mode
);
29082 start_reg
= ((sel
& SAVRES_REG
) == SAVRES_GPR
29083 ? info
->first_gp_reg_save
29084 : (sel
& SAVRES_REG
) == SAVRES_FPR
29085 ? info
->first_fp_reg_save
29086 : (sel
& SAVRES_REG
) == SAVRES_VR
29087 ? info
->first_altivec_reg_save
29089 end_reg
= ((sel
& SAVRES_REG
) == SAVRES_GPR
29091 : (sel
& SAVRES_REG
) == SAVRES_FPR
29093 : (sel
& SAVRES_REG
) == SAVRES_VR
29094 ? LAST_ALTIVEC_REGNO
+ 1
29096 n_regs
= end_reg
- start_reg
;
29097 p
= rtvec_alloc (3 + ((sel
& SAVRES_LR
) ? 1 : 0)
29098 + ((sel
& SAVRES_REG
) == SAVRES_VR
? 1 : 0)
29101 if (!(sel
& SAVRES_SAVE
) && (sel
& SAVRES_LR
))
29102 RTVEC_ELT (p
, offset
++) = ret_rtx
;
29104 RTVEC_ELT (p
, offset
++)
29105 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (Pmode
, LR_REGNO
));
29107 sym
= rs6000_savres_routine_sym (info
, sel
);
29108 RTVEC_ELT (p
, offset
++) = gen_rtx_USE (VOIDmode
, sym
);
29110 use_reg
= ptr_regno_for_savres (sel
);
29111 if ((sel
& SAVRES_REG
) == SAVRES_VR
)
29113 /* Vector regs are saved/restored using [reg+reg] addressing. */
29114 RTVEC_ELT (p
, offset
++)
29115 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (Pmode
, use_reg
));
29116 RTVEC_ELT (p
, offset
++)
29117 = gen_rtx_USE (VOIDmode
, gen_rtx_REG (Pmode
, 0));
29120 RTVEC_ELT (p
, offset
++)
29121 = gen_rtx_USE (VOIDmode
, gen_rtx_REG (Pmode
, use_reg
));
29123 for (i
= 0; i
< end_reg
- start_reg
; i
++)
29124 RTVEC_ELT (p
, i
+ offset
)
29125 = gen_frame_set (gen_rtx_REG (reg_mode
, start_reg
+ i
),
29126 frame_reg_rtx
, save_area_offset
+ reg_size
* i
,
29127 (sel
& SAVRES_SAVE
) != 0);
29129 if ((sel
& SAVRES_SAVE
) && (sel
& SAVRES_LR
))
29130 RTVEC_ELT (p
, i
+ offset
)
29131 = gen_frame_store (gen_rtx_REG (Pmode
, 0), frame_reg_rtx
, lr_offset
);
29133 par
= gen_rtx_PARALLEL (VOIDmode
, p
);
29135 if (!(sel
& SAVRES_SAVE
) && (sel
& SAVRES_LR
))
29137 insn
= emit_jump_insn (par
);
29138 JUMP_LABEL (insn
) = ret_rtx
;
29141 insn
= emit_insn (par
);
29145 /* Emit code to store CR fields that need to be saved into REG. */
29148 rs6000_emit_move_from_cr (rtx reg
)
29150 /* Only the ELFv2 ABI allows storing only selected fields. */
29151 if (DEFAULT_ABI
== ABI_ELFv2
&& TARGET_MFCRF
)
29153 int i
, cr_reg
[8], count
= 0;
29155 /* Collect CR fields that must be saved. */
29156 for (i
= 0; i
< 8; i
++)
29157 if (save_reg_p (CR0_REGNO
+ i
))
29158 cr_reg
[count
++] = i
;
29160 /* If it's just a single one, use mfcrf. */
29163 rtvec p
= rtvec_alloc (1);
29164 rtvec r
= rtvec_alloc (2);
29165 RTVEC_ELT (r
, 0) = gen_rtx_REG (CCmode
, CR0_REGNO
+ cr_reg
[0]);
29166 RTVEC_ELT (r
, 1) = GEN_INT (1 << (7 - cr_reg
[0]));
29168 = gen_rtx_SET (reg
,
29169 gen_rtx_UNSPEC (SImode
, r
, UNSPEC_MOVESI_FROM_CR
));
29171 emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
29175 /* ??? It might be better to handle count == 2 / 3 cases here
29176 as well, using logical operations to combine the values. */
29179 emit_insn (gen_movesi_from_cr (reg
));
29182 /* Return whether the split-stack arg pointer (r12) is used. */
29185 split_stack_arg_pointer_used_p (void)
29187 /* If the pseudo holding the arg pointer is no longer a pseudo,
29188 then the arg pointer is used. */
29189 if (cfun
->machine
->split_stack_arg_pointer
!= NULL_RTX
29190 && (!REG_P (cfun
->machine
->split_stack_arg_pointer
)
29191 || (REGNO (cfun
->machine
->split_stack_arg_pointer
)
29192 < FIRST_PSEUDO_REGISTER
)))
29195 /* Unfortunately we also need to do some code scanning, since
29196 r12 may have been substituted for the pseudo. */
29198 basic_block bb
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->next_bb
;
29199 FOR_BB_INSNS (bb
, insn
)
29200 if (NONDEBUG_INSN_P (insn
))
29202 /* A call destroys r12. */
29207 FOR_EACH_INSN_USE (use
, insn
)
29209 rtx x
= DF_REF_REG (use
);
29210 if (REG_P (x
) && REGNO (x
) == 12)
29214 FOR_EACH_INSN_DEF (def
, insn
)
29216 rtx x
= DF_REF_REG (def
);
29217 if (REG_P (x
) && REGNO (x
) == 12)
29221 return bitmap_bit_p (DF_LR_OUT (bb
), 12);
29224 /* Return whether we need to emit an ELFv2 global entry point prologue. */
29227 rs6000_global_entry_point_needed_p (void)
29229 /* Only needed for the ELFv2 ABI. */
29230 if (DEFAULT_ABI
!= ABI_ELFv2
)
29233 /* With -msingle-pic-base, we assume the whole program shares the same
29234 TOC, so no global entry point prologues are needed anywhere. */
29235 if (TARGET_SINGLE_PIC_BASE
)
29238 /* Ensure we have a global entry point for thunks. ??? We could
29239 avoid that if the target routine doesn't need a global entry point,
29240 but we do not know whether this is the case at this point. */
29241 if (cfun
->is_thunk
)
29244 /* For regular functions, rs6000_emit_prologue sets this flag if the
29245 routine ever uses the TOC pointer. */
29246 return cfun
->machine
->r2_setup_needed
;
29249 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
29251 rs6000_get_separate_components (void)
29253 rs6000_stack_t
*info
= rs6000_stack_info ();
29255 if (WORLD_SAVE_P (info
))
29258 if (TARGET_SPE_ABI
)
29261 gcc_assert (!(info
->savres_strategy
& SAVE_MULTIPLE
)
29262 && !(info
->savres_strategy
& REST_MULTIPLE
));
29264 /* Component 0 is the save/restore of LR (done via GPR0).
29265 Components 13..31 are the save/restore of GPR13..GPR31.
29266 Components 46..63 are the save/restore of FPR14..FPR31. */
29268 cfun
->machine
->n_components
= 64;
29270 sbitmap components
= sbitmap_alloc (cfun
->machine
->n_components
);
29271 bitmap_clear (components
);
29273 int reg_size
= TARGET_32BIT
? 4 : 8;
29274 int fp_reg_size
= 8;
29276 /* The GPRs we need saved to the frame. */
29277 if ((info
->savres_strategy
& SAVE_INLINE_GPRS
)
29278 && (info
->savres_strategy
& REST_INLINE_GPRS
))
29280 int offset
= info
->gp_save_offset
;
29282 offset
+= info
->total_size
;
29284 for (unsigned regno
= info
->first_gp_reg_save
; regno
< 32; regno
++)
29286 if (IN_RANGE (offset
, -0x8000, 0x7fff)
29287 && rs6000_reg_live_or_pic_offset_p (regno
))
29288 bitmap_set_bit (components
, regno
);
29290 offset
+= reg_size
;
29294 /* Don't mess with the hard frame pointer. */
29295 if (frame_pointer_needed
)
29296 bitmap_clear_bit (components
, HARD_FRAME_POINTER_REGNUM
);
29298 /* Don't mess with the fixed TOC register. */
29299 if ((TARGET_TOC
&& TARGET_MINIMAL_TOC
)
29300 || (flag_pic
== 1 && DEFAULT_ABI
== ABI_V4
)
29301 || (flag_pic
&& DEFAULT_ABI
== ABI_DARWIN
))
29302 bitmap_clear_bit (components
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
29304 /* The FPRs we need saved to the frame. */
29305 if ((info
->savres_strategy
& SAVE_INLINE_FPRS
)
29306 && (info
->savres_strategy
& REST_INLINE_FPRS
))
29308 int offset
= info
->fp_save_offset
;
29310 offset
+= info
->total_size
;
29312 for (unsigned regno
= info
->first_fp_reg_save
; regno
< 64; regno
++)
29314 if (IN_RANGE (offset
, -0x8000, 0x7fff) && save_reg_p (regno
))
29315 bitmap_set_bit (components
, regno
);
29317 offset
+= fp_reg_size
;
29321 /* Optimize LR save and restore if we can. This is component 0. Any
29322 out-of-line register save/restore routines need LR. */
29323 if (info
->lr_save_p
29324 && !(flag_pic
&& (DEFAULT_ABI
== ABI_V4
|| DEFAULT_ABI
== ABI_DARWIN
))
29325 && (info
->savres_strategy
& SAVE_INLINE_GPRS
)
29326 && (info
->savres_strategy
& REST_INLINE_GPRS
)
29327 && (info
->savres_strategy
& SAVE_INLINE_FPRS
)
29328 && (info
->savres_strategy
& REST_INLINE_FPRS
)
29329 && (info
->savres_strategy
& SAVE_INLINE_VRS
)
29330 && (info
->savres_strategy
& REST_INLINE_VRS
))
29332 int offset
= info
->lr_save_offset
;
29334 offset
+= info
->total_size
;
29335 if (IN_RANGE (offset
, -0x8000, 0x7fff))
29336 bitmap_set_bit (components
, 0);
29342 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
29344 rs6000_components_for_bb (basic_block bb
)
29346 rs6000_stack_t
*info
= rs6000_stack_info ();
29348 bitmap in
= DF_LIVE_IN (bb
);
29349 bitmap gen
= &DF_LIVE_BB_INFO (bb
)->gen
;
29350 bitmap kill
= &DF_LIVE_BB_INFO (bb
)->kill
;
29352 sbitmap components
= sbitmap_alloc (cfun
->machine
->n_components
);
29353 bitmap_clear (components
);
29355 /* A register is used in a bb if it is in the IN, GEN, or KILL sets. */
29358 for (unsigned regno
= info
->first_gp_reg_save
; regno
< 32; regno
++)
29359 if (bitmap_bit_p (in
, regno
)
29360 || bitmap_bit_p (gen
, regno
)
29361 || bitmap_bit_p (kill
, regno
))
29362 bitmap_set_bit (components
, regno
);
29365 for (unsigned regno
= info
->first_fp_reg_save
; regno
< 64; regno
++)
29366 if (bitmap_bit_p (in
, regno
)
29367 || bitmap_bit_p (gen
, regno
)
29368 || bitmap_bit_p (kill
, regno
))
29369 bitmap_set_bit (components
, regno
);
29371 /* The link register. */
29372 if (bitmap_bit_p (in
, LR_REGNO
)
29373 || bitmap_bit_p (gen
, LR_REGNO
)
29374 || bitmap_bit_p (kill
, LR_REGNO
))
29375 bitmap_set_bit (components
, 0);
29380 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
29382 rs6000_disqualify_components (sbitmap components
, edge e
,
29383 sbitmap edge_components
, bool /*is_prologue*/)
29385 /* Our LR pro/epilogue code moves LR via R0, so R0 had better not be
29386 live where we want to place that code. */
29387 if (bitmap_bit_p (edge_components
, 0)
29388 && bitmap_bit_p (DF_LIVE_IN (e
->dest
), 0))
29391 fprintf (dump_file
, "Disqualifying LR because GPR0 is live "
29392 "on entry to bb %d\n", e
->dest
->index
);
29393 bitmap_clear_bit (components
, 0);
29397 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
29399 rs6000_emit_prologue_components (sbitmap components
)
29401 rs6000_stack_t
*info
= rs6000_stack_info ();
29402 rtx ptr_reg
= gen_rtx_REG (Pmode
, frame_pointer_needed
29403 ? HARD_FRAME_POINTER_REGNUM
29404 : STACK_POINTER_REGNUM
);
29406 machine_mode reg_mode
= Pmode
;
29407 int reg_size
= TARGET_32BIT
? 4 : 8;
29408 machine_mode fp_reg_mode
= (TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
)
29410 int fp_reg_size
= 8;
29412 /* Prologue for LR. */
29413 if (bitmap_bit_p (components
, 0))
29415 rtx reg
= gen_rtx_REG (reg_mode
, 0);
29416 rtx_insn
*insn
= emit_move_insn (reg
, gen_rtx_REG (reg_mode
, LR_REGNO
));
29417 RTX_FRAME_RELATED_P (insn
) = 1;
29418 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
29420 int offset
= info
->lr_save_offset
;
29422 offset
+= info
->total_size
;
29424 insn
= emit_insn (gen_frame_store (reg
, ptr_reg
, offset
));
29425 RTX_FRAME_RELATED_P (insn
) = 1;
29426 rtx lr
= gen_rtx_REG (reg_mode
, LR_REGNO
);
29427 rtx mem
= copy_rtx (SET_DEST (single_set (insn
)));
29428 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (mem
, lr
));
29431 /* Prologue for the GPRs. */
29432 int offset
= info
->gp_save_offset
;
29434 offset
+= info
->total_size
;
29436 for (int i
= info
->first_gp_reg_save
; i
< 32; i
++)
29438 if (bitmap_bit_p (components
, i
))
29440 rtx reg
= gen_rtx_REG (reg_mode
, i
);
29441 rtx_insn
*insn
= emit_insn (gen_frame_store (reg
, ptr_reg
, offset
));
29442 RTX_FRAME_RELATED_P (insn
) = 1;
29443 rtx set
= copy_rtx (single_set (insn
));
29444 add_reg_note (insn
, REG_CFA_OFFSET
, set
);
29447 offset
+= reg_size
;
29450 /* Prologue for the FPRs. */
29451 offset
= info
->fp_save_offset
;
29453 offset
+= info
->total_size
;
29455 for (int i
= info
->first_fp_reg_save
; i
< 64; i
++)
29457 if (bitmap_bit_p (components
, i
))
29459 rtx reg
= gen_rtx_REG (fp_reg_mode
, i
);
29460 rtx_insn
*insn
= emit_insn (gen_frame_store (reg
, ptr_reg
, offset
));
29461 RTX_FRAME_RELATED_P (insn
) = 1;
29462 rtx set
= copy_rtx (single_set (insn
));
29463 add_reg_note (insn
, REG_CFA_OFFSET
, set
);
29466 offset
+= fp_reg_size
;
29470 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
29472 rs6000_emit_epilogue_components (sbitmap components
)
29474 rs6000_stack_t
*info
= rs6000_stack_info ();
29475 rtx ptr_reg
= gen_rtx_REG (Pmode
, frame_pointer_needed
29476 ? HARD_FRAME_POINTER_REGNUM
29477 : STACK_POINTER_REGNUM
);
29479 machine_mode reg_mode
= Pmode
;
29480 int reg_size
= TARGET_32BIT
? 4 : 8;
29482 machine_mode fp_reg_mode
= (TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
)
29484 int fp_reg_size
= 8;
29486 /* Epilogue for the FPRs. */
29487 int offset
= info
->fp_save_offset
;
29489 offset
+= info
->total_size
;
29491 for (int i
= info
->first_fp_reg_save
; i
< 64; i
++)
29493 if (bitmap_bit_p (components
, i
))
29495 rtx reg
= gen_rtx_REG (fp_reg_mode
, i
);
29496 rtx_insn
*insn
= emit_insn (gen_frame_load (reg
, ptr_reg
, offset
));
29497 RTX_FRAME_RELATED_P (insn
) = 1;
29498 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
29501 offset
+= fp_reg_size
;
29504 /* Epilogue for the GPRs. */
29505 offset
= info
->gp_save_offset
;
29507 offset
+= info
->total_size
;
29509 for (int i
= info
->first_gp_reg_save
; i
< 32; i
++)
29511 if (bitmap_bit_p (components
, i
))
29513 rtx reg
= gen_rtx_REG (reg_mode
, i
);
29514 rtx_insn
*insn
= emit_insn (gen_frame_load (reg
, ptr_reg
, offset
));
29515 RTX_FRAME_RELATED_P (insn
) = 1;
29516 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
29519 offset
+= reg_size
;
29522 /* Epilogue for LR. */
29523 if (bitmap_bit_p (components
, 0))
29525 int offset
= info
->lr_save_offset
;
29527 offset
+= info
->total_size
;
29529 rtx reg
= gen_rtx_REG (reg_mode
, 0);
29530 rtx_insn
*insn
= emit_insn (gen_frame_load (reg
, ptr_reg
, offset
));
29532 rtx lr
= gen_rtx_REG (Pmode
, LR_REGNO
);
29533 insn
= emit_move_insn (lr
, reg
);
29534 RTX_FRAME_RELATED_P (insn
) = 1;
29535 add_reg_note (insn
, REG_CFA_RESTORE
, lr
);
29539 /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
29541 rs6000_set_handled_components (sbitmap components
)
29543 rs6000_stack_t
*info
= rs6000_stack_info ();
29545 for (int i
= info
->first_gp_reg_save
; i
< 32; i
++)
29546 if (bitmap_bit_p (components
, i
))
29547 cfun
->machine
->gpr_is_wrapped_separately
[i
] = true;
29549 for (int i
= info
->first_fp_reg_save
; i
< 64; i
++)
29550 if (bitmap_bit_p (components
, i
))
29551 cfun
->machine
->fpr_is_wrapped_separately
[i
- 32] = true;
29553 if (bitmap_bit_p (components
, 0))
29554 cfun
->machine
->lr_is_wrapped_separately
= true;
29557 /* Emit function prologue as insns. */
29560 rs6000_emit_prologue (void)
29562 rs6000_stack_t
*info
= rs6000_stack_info ();
29563 machine_mode reg_mode
= Pmode
;
29564 int reg_size
= TARGET_32BIT
? 4 : 8;
29565 machine_mode fp_reg_mode
= (TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
)
29567 int fp_reg_size
= 8;
29568 rtx sp_reg_rtx
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
29569 rtx frame_reg_rtx
= sp_reg_rtx
;
29570 unsigned int cr_save_regno
;
29571 rtx cr_save_rtx
= NULL_RTX
;
29574 int using_static_chain_p
= (cfun
->static_chain_decl
!= NULL_TREE
29575 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM
)
29576 && call_used_regs
[STATIC_CHAIN_REGNUM
]);
29577 int using_split_stack
= (flag_split_stack
29578 && (lookup_attribute ("no_split_stack",
29579 DECL_ATTRIBUTES (cfun
->decl
))
29582 /* Offset to top of frame for frame_reg and sp respectively. */
29583 HOST_WIDE_INT frame_off
= 0;
29584 HOST_WIDE_INT sp_off
= 0;
29585 /* sp_adjust is the stack adjusting instruction, tracked so that the
29586 insn setting up the split-stack arg pointer can be emitted just
29587 prior to it, when r12 is not used here for other purposes. */
29588 rtx_insn
*sp_adjust
= 0;
29591 /* Track and check usage of r0, r11, r12. */
29592 int reg_inuse
= using_static_chain_p
? 1 << 11 : 0;
29593 #define START_USE(R) do \
29595 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
29596 reg_inuse |= 1 << (R); \
29598 #define END_USE(R) do \
29600 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
29601 reg_inuse &= ~(1 << (R)); \
29603 #define NOT_INUSE(R) do \
29605 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
29608 #define START_USE(R) do {} while (0)
29609 #define END_USE(R) do {} while (0)
29610 #define NOT_INUSE(R) do {} while (0)
29613 if (DEFAULT_ABI
== ABI_ELFv2
29614 && !TARGET_SINGLE_PIC_BASE
)
29616 cfun
->machine
->r2_setup_needed
= df_regs_ever_live_p (TOC_REGNUM
);
29618 /* With -mminimal-toc we may generate an extra use of r2 below. */
29619 if (TARGET_TOC
&& TARGET_MINIMAL_TOC
29620 && !constant_pool_empty_p ())
29621 cfun
->machine
->r2_setup_needed
= true;
29625 if (flag_stack_usage_info
)
29626 current_function_static_stack_size
= info
->total_size
;
29628 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
29630 HOST_WIDE_INT size
= info
->total_size
;
29632 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
29634 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
29635 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT
,
29636 size
- STACK_CHECK_PROTECT
);
29639 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
29642 if (TARGET_FIX_AND_CONTINUE
)
29644 /* gdb on darwin arranges to forward a function from the old
29645 address by modifying the first 5 instructions of the function
29646 to branch to the overriding function. This is necessary to
29647 permit function pointers that point to the old function to
29648 actually forward to the new function. */
29649 emit_insn (gen_nop ());
29650 emit_insn (gen_nop ());
29651 emit_insn (gen_nop ());
29652 emit_insn (gen_nop ());
29653 emit_insn (gen_nop ());
29656 if (TARGET_SPE_ABI
&& info
->spe_64bit_regs_used
!= 0)
29658 reg_mode
= V2SImode
;
29662 /* Handle world saves specially here. */
29663 if (WORLD_SAVE_P (info
))
29670 /* save_world expects lr in r0. */
29671 reg0
= gen_rtx_REG (Pmode
, 0);
29672 if (info
->lr_save_p
)
29674 insn
= emit_move_insn (reg0
,
29675 gen_rtx_REG (Pmode
, LR_REGNO
));
29676 RTX_FRAME_RELATED_P (insn
) = 1;
29679 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
29680 assumptions about the offsets of various bits of the stack
29682 gcc_assert (info
->gp_save_offset
== -220
29683 && info
->fp_save_offset
== -144
29684 && info
->lr_save_offset
== 8
29685 && info
->cr_save_offset
== 4
29688 && (!crtl
->calls_eh_return
29689 || info
->ehrd_offset
== -432)
29690 && info
->vrsave_save_offset
== -224
29691 && info
->altivec_save_offset
== -416);
29693 treg
= gen_rtx_REG (SImode
, 11);
29694 emit_move_insn (treg
, GEN_INT (-info
->total_size
));
29696 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
29697 in R11. It also clobbers R12, so beware! */
29699 /* Preserve CR2 for save_world prologues */
29701 sz
+= 32 - info
->first_gp_reg_save
;
29702 sz
+= 64 - info
->first_fp_reg_save
;
29703 sz
+= LAST_ALTIVEC_REGNO
- info
->first_altivec_reg_save
+ 1;
29704 p
= rtvec_alloc (sz
);
29706 RTVEC_ELT (p
, j
++) = gen_rtx_CLOBBER (VOIDmode
,
29707 gen_rtx_REG (SImode
,
29709 RTVEC_ELT (p
, j
++) = gen_rtx_USE (VOIDmode
,
29710 gen_rtx_SYMBOL_REF (Pmode
,
29712 /* We do floats first so that the instruction pattern matches
29714 for (i
= 0; i
< 64 - info
->first_fp_reg_save
; i
++)
29716 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
29718 info
->first_fp_reg_save
+ i
),
29720 info
->fp_save_offset
+ frame_off
+ 8 * i
);
29721 for (i
= 0; info
->first_altivec_reg_save
+ i
<= LAST_ALTIVEC_REGNO
; i
++)
29723 = gen_frame_store (gen_rtx_REG (V4SImode
,
29724 info
->first_altivec_reg_save
+ i
),
29726 info
->altivec_save_offset
+ frame_off
+ 16 * i
);
29727 for (i
= 0; i
< 32 - info
->first_gp_reg_save
; i
++)
29729 = gen_frame_store (gen_rtx_REG (reg_mode
, info
->first_gp_reg_save
+ i
),
29731 info
->gp_save_offset
+ frame_off
+ reg_size
* i
);
29733 /* CR register traditionally saved as CR2. */
29735 = gen_frame_store (gen_rtx_REG (SImode
, CR2_REGNO
),
29736 frame_reg_rtx
, info
->cr_save_offset
+ frame_off
);
29737 /* Explain about use of R0. */
29738 if (info
->lr_save_p
)
29740 = gen_frame_store (reg0
,
29741 frame_reg_rtx
, info
->lr_save_offset
+ frame_off
);
29742 /* Explain what happens to the stack pointer. */
29744 rtx newval
= gen_rtx_PLUS (Pmode
, sp_reg_rtx
, treg
);
29745 RTVEC_ELT (p
, j
++) = gen_rtx_SET (sp_reg_rtx
, newval
);
29748 insn
= emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
29749 rs6000_frame_related (insn
, frame_reg_rtx
, sp_off
- frame_off
,
29750 treg
, GEN_INT (-info
->total_size
));
29751 sp_off
= frame_off
= info
->total_size
;
29754 strategy
= info
->savres_strategy
;
29756 /* For V.4, update stack before we do any saving and set back pointer. */
29757 if (! WORLD_SAVE_P (info
)
29759 && (DEFAULT_ABI
== ABI_V4
29760 || crtl
->calls_eh_return
))
29762 bool need_r11
= (TARGET_SPE
29763 ? (!(strategy
& SAVE_INLINE_GPRS
)
29764 && info
->spe_64bit_regs_used
== 0)
29765 : (!(strategy
& SAVE_INLINE_FPRS
)
29766 || !(strategy
& SAVE_INLINE_GPRS
)
29767 || !(strategy
& SAVE_INLINE_VRS
)));
29768 int ptr_regno
= -1;
29769 rtx ptr_reg
= NULL_RTX
;
29772 if (info
->total_size
< 32767)
29773 frame_off
= info
->total_size
;
29776 else if (info
->cr_save_p
29778 || info
->first_fp_reg_save
< 64
29779 || info
->first_gp_reg_save
< 32
29780 || info
->altivec_size
!= 0
29781 || info
->vrsave_size
!= 0
29782 || crtl
->calls_eh_return
)
29786 /* The prologue won't be saving any regs so there is no need
29787 to set up a frame register to access any frame save area.
29788 We also won't be using frame_off anywhere below, but set
29789 the correct value anyway to protect against future
29790 changes to this function. */
29791 frame_off
= info
->total_size
;
29793 if (ptr_regno
!= -1)
29795 /* Set up the frame offset to that needed by the first
29796 out-of-line save function. */
29797 START_USE (ptr_regno
);
29798 ptr_reg
= gen_rtx_REG (Pmode
, ptr_regno
);
29799 frame_reg_rtx
= ptr_reg
;
29800 if (!(strategy
& SAVE_INLINE_FPRS
) && info
->fp_size
!= 0)
29801 gcc_checking_assert (info
->fp_save_offset
+ info
->fp_size
== 0);
29802 else if (!(strategy
& SAVE_INLINE_GPRS
) && info
->first_gp_reg_save
< 32)
29803 ptr_off
= info
->gp_save_offset
+ info
->gp_size
;
29804 else if (!(strategy
& SAVE_INLINE_VRS
) && info
->altivec_size
!= 0)
29805 ptr_off
= info
->altivec_save_offset
+ info
->altivec_size
;
29806 frame_off
= -ptr_off
;
29808 sp_adjust
= rs6000_emit_allocate_stack (info
->total_size
,
29810 if (REGNO (frame_reg_rtx
) == 12)
29812 sp_off
= info
->total_size
;
29813 if (frame_reg_rtx
!= sp_reg_rtx
)
29814 rs6000_emit_stack_tie (frame_reg_rtx
, false);
29817 /* If we use the link register, get it into r0. */
29818 if (!WORLD_SAVE_P (info
) && info
->lr_save_p
29819 && !cfun
->machine
->lr_is_wrapped_separately
)
29821 rtx addr
, reg
, mem
;
29823 reg
= gen_rtx_REG (Pmode
, 0);
29825 insn
= emit_move_insn (reg
, gen_rtx_REG (Pmode
, LR_REGNO
));
29826 RTX_FRAME_RELATED_P (insn
) = 1;
29828 if (!(strategy
& (SAVE_NOINLINE_GPRS_SAVES_LR
29829 | SAVE_NOINLINE_FPRS_SAVES_LR
)))
29831 addr
= gen_rtx_PLUS (Pmode
, frame_reg_rtx
,
29832 GEN_INT (info
->lr_save_offset
+ frame_off
));
29833 mem
= gen_rtx_MEM (Pmode
, addr
);
29834 /* This should not be of rs6000_sr_alias_set, because of
29835 __builtin_return_address. */
29837 insn
= emit_move_insn (mem
, reg
);
29838 rs6000_frame_related (insn
, frame_reg_rtx
, sp_off
- frame_off
,
29839 NULL_RTX
, NULL_RTX
);
29844 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
29845 r12 will be needed by out-of-line gpr restore. */
29846 cr_save_regno
= ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
29847 && !(strategy
& (SAVE_INLINE_GPRS
29848 | SAVE_NOINLINE_GPRS_SAVES_LR
))
29850 if (!WORLD_SAVE_P (info
)
29852 && REGNO (frame_reg_rtx
) != cr_save_regno
29853 && !(using_static_chain_p
&& cr_save_regno
== 11)
29854 && !(using_split_stack
&& cr_save_regno
== 12 && sp_adjust
))
29856 cr_save_rtx
= gen_rtx_REG (SImode
, cr_save_regno
);
29857 START_USE (cr_save_regno
);
29858 rs6000_emit_move_from_cr (cr_save_rtx
);
29861 /* Do any required saving of fpr's. If only one or two to save, do
29862 it ourselves. Otherwise, call function. */
29863 if (!WORLD_SAVE_P (info
) && (strategy
& SAVE_INLINE_FPRS
))
29865 int offset
= info
->fp_save_offset
+ frame_off
;
29866 for (int i
= info
->first_fp_reg_save
; i
< 64; i
++)
29869 && !cfun
->machine
->fpr_is_wrapped_separately
[i
- 32])
29870 emit_frame_save (frame_reg_rtx
, fp_reg_mode
, i
, offset
,
29871 sp_off
- frame_off
);
29873 offset
+= fp_reg_size
;
29876 else if (!WORLD_SAVE_P (info
) && info
->first_fp_reg_save
!= 64)
29878 bool lr
= (strategy
& SAVE_NOINLINE_FPRS_SAVES_LR
) != 0;
29879 int sel
= SAVRES_SAVE
| SAVRES_FPR
| (lr
? SAVRES_LR
: 0);
29880 unsigned ptr_regno
= ptr_regno_for_savres (sel
);
29881 rtx ptr_reg
= frame_reg_rtx
;
29883 if (REGNO (frame_reg_rtx
) == ptr_regno
)
29884 gcc_checking_assert (frame_off
== 0);
29887 ptr_reg
= gen_rtx_REG (Pmode
, ptr_regno
);
29888 NOT_INUSE (ptr_regno
);
29889 emit_insn (gen_add3_insn (ptr_reg
,
29890 frame_reg_rtx
, GEN_INT (frame_off
)));
29892 insn
= rs6000_emit_savres_rtx (info
, ptr_reg
,
29893 info
->fp_save_offset
,
29894 info
->lr_save_offset
,
29896 rs6000_frame_related (insn
, ptr_reg
, sp_off
,
29897 NULL_RTX
, NULL_RTX
);
29902 /* Save GPRs. This is done as a PARALLEL if we are using
29903 the store-multiple instructions. */
29904 if (!WORLD_SAVE_P (info
)
29906 && info
->spe_64bit_regs_used
!= 0
29907 && info
->first_gp_reg_save
!= 32)
29910 rtx spe_save_area_ptr
;
29911 HOST_WIDE_INT save_off
;
29912 int ool_adjust
= 0;
29914 /* Determine whether we can address all of the registers that need
29915 to be saved with an offset from frame_reg_rtx that fits in
29916 the small const field for SPE memory instructions. */
29917 int spe_regs_addressable
29918 = (SPE_CONST_OFFSET_OK (info
->spe_gp_save_offset
+ frame_off
29919 + reg_size
* (32 - info
->first_gp_reg_save
- 1))
29920 && (strategy
& SAVE_INLINE_GPRS
));
29922 if (spe_regs_addressable
)
29924 spe_save_area_ptr
= frame_reg_rtx
;
29925 save_off
= frame_off
;
29929 /* Make r11 point to the start of the SPE save area. We need
29930 to be careful here if r11 is holding the static chain. If
29931 it is, then temporarily save it in r0. */
29932 HOST_WIDE_INT offset
;
29934 if (!(strategy
& SAVE_INLINE_GPRS
))
29935 ool_adjust
= 8 * (info
->first_gp_reg_save
- FIRST_SAVED_GP_REGNO
);
29936 offset
= info
->spe_gp_save_offset
+ frame_off
- ool_adjust
;
29937 spe_save_area_ptr
= gen_rtx_REG (Pmode
, 11);
29938 save_off
= frame_off
- offset
;
29940 if (using_static_chain_p
)
29942 rtx r0
= gen_rtx_REG (Pmode
, 0);
29945 gcc_assert (info
->first_gp_reg_save
> 11);
29947 emit_move_insn (r0
, spe_save_area_ptr
);
29949 else if (REGNO (frame_reg_rtx
) != 11)
29952 emit_insn (gen_addsi3 (spe_save_area_ptr
,
29953 frame_reg_rtx
, GEN_INT (offset
)));
29954 if (!using_static_chain_p
&& REGNO (frame_reg_rtx
) == 11)
29955 frame_off
= -info
->spe_gp_save_offset
+ ool_adjust
;
29958 if ((strategy
& SAVE_INLINE_GPRS
))
29960 for (i
= 0; i
< 32 - info
->first_gp_reg_save
; i
++)
29961 if (rs6000_reg_live_or_pic_offset_p (info
->first_gp_reg_save
+ i
))
29962 emit_frame_save (spe_save_area_ptr
, reg_mode
,
29963 info
->first_gp_reg_save
+ i
,
29964 (info
->spe_gp_save_offset
+ save_off
29966 sp_off
- save_off
);
29970 insn
= rs6000_emit_savres_rtx (info
, spe_save_area_ptr
,
29971 info
->spe_gp_save_offset
+ save_off
,
29973 SAVRES_SAVE
| SAVRES_GPR
);
29975 rs6000_frame_related (insn
, spe_save_area_ptr
, sp_off
- save_off
,
29976 NULL_RTX
, NULL_RTX
);
29979 /* Move the static chain pointer back. */
29980 if (!spe_regs_addressable
)
29982 if (using_static_chain_p
)
29984 emit_move_insn (spe_save_area_ptr
, gen_rtx_REG (Pmode
, 0));
29987 else if (REGNO (frame_reg_rtx
) != 11)
29991 else if (!WORLD_SAVE_P (info
) && !(strategy
& SAVE_INLINE_GPRS
))
29993 bool lr
= (strategy
& SAVE_NOINLINE_GPRS_SAVES_LR
) != 0;
29994 int sel
= SAVRES_SAVE
| SAVRES_GPR
| (lr
? SAVRES_LR
: 0);
29995 unsigned ptr_regno
= ptr_regno_for_savres (sel
);
29996 rtx ptr_reg
= frame_reg_rtx
;
29997 bool ptr_set_up
= REGNO (ptr_reg
) == ptr_regno
;
29998 int end_save
= info
->gp_save_offset
+ info
->gp_size
;
30001 if (ptr_regno
== 12)
30004 ptr_reg
= gen_rtx_REG (Pmode
, ptr_regno
);
30006 /* Need to adjust r11 (r12) if we saved any FPRs. */
30007 if (end_save
+ frame_off
!= 0)
30009 rtx offset
= GEN_INT (end_save
+ frame_off
);
30012 frame_off
= -end_save
;
30014 NOT_INUSE (ptr_regno
);
30015 emit_insn (gen_add3_insn (ptr_reg
, frame_reg_rtx
, offset
));
30017 else if (!ptr_set_up
)
30019 NOT_INUSE (ptr_regno
);
30020 emit_move_insn (ptr_reg
, frame_reg_rtx
);
30022 ptr_off
= -end_save
;
30023 insn
= rs6000_emit_savres_rtx (info
, ptr_reg
,
30024 info
->gp_save_offset
+ ptr_off
,
30025 info
->lr_save_offset
+ ptr_off
,
30027 rs6000_frame_related (insn
, ptr_reg
, sp_off
- ptr_off
,
30028 NULL_RTX
, NULL_RTX
);
30032 else if (!WORLD_SAVE_P (info
) && (strategy
& SAVE_MULTIPLE
))
30036 p
= rtvec_alloc (32 - info
->first_gp_reg_save
);
30037 for (i
= 0; i
< 32 - info
->first_gp_reg_save
; i
++)
30039 = gen_frame_store (gen_rtx_REG (reg_mode
, info
->first_gp_reg_save
+ i
),
30041 info
->gp_save_offset
+ frame_off
+ reg_size
* i
);
30042 insn
= emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
30043 rs6000_frame_related (insn
, frame_reg_rtx
, sp_off
- frame_off
,
30044 NULL_RTX
, NULL_RTX
);
30046 else if (!WORLD_SAVE_P (info
))
30048 int offset
= info
->gp_save_offset
+ frame_off
;
30049 for (int i
= info
->first_gp_reg_save
; i
< 32; i
++)
30051 if (rs6000_reg_live_or_pic_offset_p (i
)
30052 && !cfun
->machine
->gpr_is_wrapped_separately
[i
])
30053 emit_frame_save (frame_reg_rtx
, reg_mode
, i
, offset
,
30054 sp_off
- frame_off
);
30056 offset
+= reg_size
;
30060 if (crtl
->calls_eh_return
)
30067 unsigned int regno
= EH_RETURN_DATA_REGNO (i
);
30068 if (regno
== INVALID_REGNUM
)
30072 p
= rtvec_alloc (i
);
30076 unsigned int regno
= EH_RETURN_DATA_REGNO (i
);
30077 if (regno
== INVALID_REGNUM
)
30081 = gen_frame_store (gen_rtx_REG (reg_mode
, regno
),
30083 info
->ehrd_offset
+ sp_off
+ reg_size
* (int) i
);
30084 RTVEC_ELT (p
, i
) = set
;
30085 RTX_FRAME_RELATED_P (set
) = 1;
30088 insn
= emit_insn (gen_blockage ());
30089 RTX_FRAME_RELATED_P (insn
) = 1;
30090 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, gen_rtx_PARALLEL (VOIDmode
, p
));
30093 /* In AIX ABI we need to make sure r2 is really saved. */
30094 if (TARGET_AIX
&& crtl
->calls_eh_return
)
30096 rtx tmp_reg
, tmp_reg_si
, hi
, lo
, compare_result
, toc_save_done
, jump
;
30097 rtx join_insn
, note
;
30098 rtx_insn
*save_insn
;
30099 long toc_restore_insn
;
30101 tmp_reg
= gen_rtx_REG (Pmode
, 11);
30102 tmp_reg_si
= gen_rtx_REG (SImode
, 11);
30103 if (using_static_chain_p
)
30106 emit_move_insn (gen_rtx_REG (Pmode
, 0), tmp_reg
);
30110 emit_move_insn (tmp_reg
, gen_rtx_REG (Pmode
, LR_REGNO
));
30111 /* Peek at instruction to which this function returns. If it's
30112 restoring r2, then we know we've already saved r2. We can't
30113 unconditionally save r2 because the value we have will already
30114 be updated if we arrived at this function via a plt call or
30115 toc adjusting stub. */
30116 emit_move_insn (tmp_reg_si
, gen_rtx_MEM (SImode
, tmp_reg
));
30117 toc_restore_insn
= ((TARGET_32BIT
? 0x80410000 : 0xE8410000)
30118 + RS6000_TOC_SAVE_SLOT
);
30119 hi
= gen_int_mode (toc_restore_insn
& ~0xffff, SImode
);
30120 emit_insn (gen_xorsi3 (tmp_reg_si
, tmp_reg_si
, hi
));
30121 compare_result
= gen_rtx_REG (CCUNSmode
, CR0_REGNO
);
30122 validate_condition_mode (EQ
, CCUNSmode
);
30123 lo
= gen_int_mode (toc_restore_insn
& 0xffff, SImode
);
30124 emit_insn (gen_rtx_SET (compare_result
,
30125 gen_rtx_COMPARE (CCUNSmode
, tmp_reg_si
, lo
)));
30126 toc_save_done
= gen_label_rtx ();
30127 jump
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
30128 gen_rtx_EQ (VOIDmode
, compare_result
,
30130 gen_rtx_LABEL_REF (VOIDmode
, toc_save_done
),
30132 jump
= emit_jump_insn (gen_rtx_SET (pc_rtx
, jump
));
30133 JUMP_LABEL (jump
) = toc_save_done
;
30134 LABEL_NUSES (toc_save_done
) += 1;
30136 save_insn
= emit_frame_save (frame_reg_rtx
, reg_mode
,
30137 TOC_REGNUM
, frame_off
+ RS6000_TOC_SAVE_SLOT
,
30138 sp_off
- frame_off
);
30140 emit_label (toc_save_done
);
30142 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
30143 have a CFG that has different saves along different paths.
30144 Move the note to a dummy blockage insn, which describes that
30145 R2 is unconditionally saved after the label. */
30146 /* ??? An alternate representation might be a special insn pattern
30147 containing both the branch and the store. That might let the
30148 code that minimizes the number of DW_CFA_advance opcodes better
30149 freedom in placing the annotations. */
30150 note
= find_reg_note (save_insn
, REG_FRAME_RELATED_EXPR
, NULL
);
30152 remove_note (save_insn
, note
);
30154 note
= alloc_reg_note (REG_FRAME_RELATED_EXPR
,
30155 copy_rtx (PATTERN (save_insn
)), NULL_RTX
);
30156 RTX_FRAME_RELATED_P (save_insn
) = 0;
30158 join_insn
= emit_insn (gen_blockage ());
30159 REG_NOTES (join_insn
) = note
;
30160 RTX_FRAME_RELATED_P (join_insn
) = 1;
30162 if (using_static_chain_p
)
30164 emit_move_insn (tmp_reg
, gen_rtx_REG (Pmode
, 0));
30171 /* Save CR if we use any that must be preserved. */
30172 if (!WORLD_SAVE_P (info
) && info
->cr_save_p
)
30174 rtx addr
= gen_rtx_PLUS (Pmode
, frame_reg_rtx
,
30175 GEN_INT (info
->cr_save_offset
+ frame_off
));
30176 rtx mem
= gen_frame_mem (SImode
, addr
);
30178 /* If we didn't copy cr before, do so now using r0. */
30179 if (cr_save_rtx
== NULL_RTX
)
30182 cr_save_rtx
= gen_rtx_REG (SImode
, 0);
30183 rs6000_emit_move_from_cr (cr_save_rtx
);
30186 /* Saving CR requires a two-instruction sequence: one instruction
30187 to move the CR to a general-purpose register, and a second
30188 instruction that stores the GPR to memory.
30190 We do not emit any DWARF CFI records for the first of these,
30191 because we cannot properly represent the fact that CR is saved in
30192 a register. One reason is that we cannot express that multiple
30193 CR fields are saved; another reason is that on 64-bit, the size
30194 of the CR register in DWARF (4 bytes) differs from the size of
30195 a general-purpose register.
30197 This means if any intervening instruction were to clobber one of
30198 the call-saved CR fields, we'd have incorrect CFI. To prevent
30199 this from happening, we mark the store to memory as a use of
30200 those CR fields, which prevents any such instruction from being
30201 scheduled in between the two instructions. */
30206 crsave_v
[n_crsave
++] = gen_rtx_SET (mem
, cr_save_rtx
);
30207 for (i
= 0; i
< 8; i
++)
30208 if (save_reg_p (CR0_REGNO
+ i
))
30209 crsave_v
[n_crsave
++]
30210 = gen_rtx_USE (VOIDmode
, gen_rtx_REG (CCmode
, CR0_REGNO
+ i
));
30212 insn
= emit_insn (gen_rtx_PARALLEL (VOIDmode
,
30213 gen_rtvec_v (n_crsave
, crsave_v
)));
30214 END_USE (REGNO (cr_save_rtx
));
30216 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
30217 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
30218 so we need to construct a frame expression manually. */
30219 RTX_FRAME_RELATED_P (insn
) = 1;
30221 /* Update address to be stack-pointer relative, like
30222 rs6000_frame_related would do. */
30223 addr
= gen_rtx_PLUS (Pmode
, gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
),
30224 GEN_INT (info
->cr_save_offset
+ sp_off
));
30225 mem
= gen_frame_mem (SImode
, addr
);
30227 if (DEFAULT_ABI
== ABI_ELFv2
)
30229 /* In the ELFv2 ABI we generate separate CFI records for each
30230 CR field that was actually saved. They all point to the
30231 same 32-bit stack slot. */
30235 for (i
= 0; i
< 8; i
++)
30236 if (save_reg_p (CR0_REGNO
+ i
))
30239 = gen_rtx_SET (mem
, gen_rtx_REG (SImode
, CR0_REGNO
+ i
));
30241 RTX_FRAME_RELATED_P (crframe
[n_crframe
]) = 1;
30245 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
30246 gen_rtx_PARALLEL (VOIDmode
,
30247 gen_rtvec_v (n_crframe
, crframe
)));
30251 /* In other ABIs, by convention, we use a single CR regnum to
30252 represent the fact that all call-saved CR fields are saved.
30253 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
30254 rtx set
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, CR2_REGNO
));
30255 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, set
);
30259 /* In the ELFv2 ABI we need to save all call-saved CR fields into
30260 *separate* slots if the routine calls __builtin_eh_return, so
30261 that they can be independently restored by the unwinder. */
30262 if (DEFAULT_ABI
== ABI_ELFv2
&& crtl
->calls_eh_return
)
30264 int i
, cr_off
= info
->ehcr_offset
;
30267 /* ??? We might get better performance by using multiple mfocrf
30269 crsave
= gen_rtx_REG (SImode
, 0);
30270 emit_insn (gen_movesi_from_cr (crsave
));
30272 for (i
= 0; i
< 8; i
++)
30273 if (!call_used_regs
[CR0_REGNO
+ i
])
30275 rtvec p
= rtvec_alloc (2);
30277 = gen_frame_store (crsave
, frame_reg_rtx
, cr_off
+ frame_off
);
30279 = gen_rtx_USE (VOIDmode
, gen_rtx_REG (CCmode
, CR0_REGNO
+ i
));
30281 insn
= emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
30283 RTX_FRAME_RELATED_P (insn
) = 1;
30284 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
30285 gen_frame_store (gen_rtx_REG (SImode
, CR0_REGNO
+ i
),
30286 sp_reg_rtx
, cr_off
+ sp_off
));
30288 cr_off
+= reg_size
;
30292 /* Update stack and set back pointer unless this is V.4,
30293 for which it was done previously. */
30294 if (!WORLD_SAVE_P (info
) && info
->push_p
30295 && !(DEFAULT_ABI
== ABI_V4
|| crtl
->calls_eh_return
))
30297 rtx ptr_reg
= NULL
;
30300 /* If saving altivec regs we need to be able to address all save
30301 locations using a 16-bit offset. */
30302 if ((strategy
& SAVE_INLINE_VRS
) == 0
30303 || (info
->altivec_size
!= 0
30304 && (info
->altivec_save_offset
+ info
->altivec_size
- 16
30305 + info
->total_size
- frame_off
) > 32767)
30306 || (info
->vrsave_size
!= 0
30307 && (info
->vrsave_save_offset
30308 + info
->total_size
- frame_off
) > 32767))
30310 int sel
= SAVRES_SAVE
| SAVRES_VR
;
30311 unsigned ptr_regno
= ptr_regno_for_savres (sel
);
30313 if (using_static_chain_p
30314 && ptr_regno
== STATIC_CHAIN_REGNUM
)
30316 if (REGNO (frame_reg_rtx
) != ptr_regno
)
30317 START_USE (ptr_regno
);
30318 ptr_reg
= gen_rtx_REG (Pmode
, ptr_regno
);
30319 frame_reg_rtx
= ptr_reg
;
30320 ptr_off
= info
->altivec_save_offset
+ info
->altivec_size
;
30321 frame_off
= -ptr_off
;
30323 else if (REGNO (frame_reg_rtx
) == 1)
30324 frame_off
= info
->total_size
;
30325 sp_adjust
= rs6000_emit_allocate_stack (info
->total_size
,
30327 if (REGNO (frame_reg_rtx
) == 12)
30329 sp_off
= info
->total_size
;
30330 if (frame_reg_rtx
!= sp_reg_rtx
)
30331 rs6000_emit_stack_tie (frame_reg_rtx
, false);
30334 /* Set frame pointer, if needed. */
30335 if (frame_pointer_needed
)
30337 insn
= emit_move_insn (gen_rtx_REG (Pmode
, HARD_FRAME_POINTER_REGNUM
),
30339 RTX_FRAME_RELATED_P (insn
) = 1;
30342 /* Save AltiVec registers if needed. Save here because the red zone does
30343 not always include AltiVec registers. */
30344 if (!WORLD_SAVE_P (info
)
30345 && info
->altivec_size
!= 0 && (strategy
& SAVE_INLINE_VRS
) == 0)
30347 int end_save
= info
->altivec_save_offset
+ info
->altivec_size
;
30349 /* Oddly, the vector save/restore functions point r0 at the end
30350 of the save area, then use r11 or r12 to load offsets for
30351 [reg+reg] addressing. */
30352 rtx ptr_reg
= gen_rtx_REG (Pmode
, 0);
30353 int scratch_regno
= ptr_regno_for_savres (SAVRES_SAVE
| SAVRES_VR
);
30354 rtx scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
30356 gcc_checking_assert (scratch_regno
== 11 || scratch_regno
== 12);
30358 if (scratch_regno
== 12)
30360 if (end_save
+ frame_off
!= 0)
30362 rtx offset
= GEN_INT (end_save
+ frame_off
);
30364 emit_insn (gen_add3_insn (ptr_reg
, frame_reg_rtx
, offset
));
30367 emit_move_insn (ptr_reg
, frame_reg_rtx
);
30369 ptr_off
= -end_save
;
30370 insn
= rs6000_emit_savres_rtx (info
, scratch_reg
,
30371 info
->altivec_save_offset
+ ptr_off
,
30372 0, V4SImode
, SAVRES_SAVE
| SAVRES_VR
);
30373 rs6000_frame_related (insn
, scratch_reg
, sp_off
- ptr_off
,
30374 NULL_RTX
, NULL_RTX
);
30375 if (REGNO (frame_reg_rtx
) == REGNO (scratch_reg
))
30377 /* The oddity mentioned above clobbered our frame reg. */
30378 emit_move_insn (frame_reg_rtx
, ptr_reg
);
30379 frame_off
= ptr_off
;
30382 else if (!WORLD_SAVE_P (info
)
30383 && info
->altivec_size
!= 0)
30387 for (i
= info
->first_altivec_reg_save
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
30388 if (info
->vrsave_mask
& ALTIVEC_REG_BIT (i
))
30390 rtx areg
, savereg
, mem
;
30391 HOST_WIDE_INT offset
;
30393 offset
= (info
->altivec_save_offset
+ frame_off
30394 + 16 * (i
- info
->first_altivec_reg_save
));
30396 savereg
= gen_rtx_REG (V4SImode
, i
);
30398 if (TARGET_P9_DFORM_VECTOR
&& quad_address_offset_p (offset
))
30400 mem
= gen_frame_mem (V4SImode
,
30401 gen_rtx_PLUS (Pmode
, frame_reg_rtx
,
30402 GEN_INT (offset
)));
30403 insn
= emit_insn (gen_rtx_SET (mem
, savereg
));
30409 areg
= gen_rtx_REG (Pmode
, 0);
30410 emit_move_insn (areg
, GEN_INT (offset
));
30412 /* AltiVec addressing mode is [reg+reg]. */
30413 mem
= gen_frame_mem (V4SImode
,
30414 gen_rtx_PLUS (Pmode
, frame_reg_rtx
, areg
));
30416 /* Rather than emitting a generic move, force use of the stvx
30417 instruction, which we always want on ISA 2.07 (power8) systems.
30418 In particular we don't want xxpermdi/stxvd2x for little
30420 insn
= emit_insn (gen_altivec_stvx_v4si_internal (mem
, savereg
));
30423 rs6000_frame_related (insn
, frame_reg_rtx
, sp_off
- frame_off
,
30424 areg
, GEN_INT (offset
));
30428 /* VRSAVE is a bit vector representing which AltiVec registers
30429 are used. The OS uses this to determine which vector
30430 registers to save on a context switch. We need to save
30431 VRSAVE on the stack frame, add whatever AltiVec registers we
30432 used in this function, and do the corresponding magic in the
30435 if (!WORLD_SAVE_P (info
)
30436 && info
->vrsave_size
!= 0)
30442 /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might
30443 be using r12 as frame_reg_rtx and r11 as the static chain
30444 pointer for nested functions. */
30446 if ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
30447 && !using_static_chain_p
)
30449 else if (using_split_stack
|| REGNO (frame_reg_rtx
) == 12)
30452 if (using_static_chain_p
)
30456 NOT_INUSE (save_regno
);
30457 reg
= gen_rtx_REG (SImode
, save_regno
);
30458 vrsave
= gen_rtx_REG (SImode
, VRSAVE_REGNO
);
30460 emit_insn (gen_get_vrsave_internal (reg
));
30462 emit_insn (gen_rtx_SET (reg
, vrsave
));
30465 offset
= info
->vrsave_save_offset
+ frame_off
;
30466 insn
= emit_insn (gen_frame_store (reg
, frame_reg_rtx
, offset
));
30468 /* Include the registers in the mask. */
30469 emit_insn (gen_iorsi3 (reg
, reg
, GEN_INT ((int) info
->vrsave_mask
)));
30471 insn
= emit_insn (generate_set_vrsave (reg
, info
, 0));
30474 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
30475 if (!TARGET_SINGLE_PIC_BASE
30476 && ((TARGET_TOC
&& TARGET_MINIMAL_TOC
30477 && !constant_pool_empty_p ())
30478 || (DEFAULT_ABI
== ABI_V4
30479 && (flag_pic
== 1 || (flag_pic
&& TARGET_SECURE_PLT
))
30480 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM
))))
30482 /* If emit_load_toc_table will use the link register, we need to save
30483 it. We use R12 for this purpose because emit_load_toc_table
30484 can use register 0. This allows us to use a plain 'blr' to return
30485 from the procedure more often. */
30486 int save_LR_around_toc_setup
= (TARGET_ELF
30487 && DEFAULT_ABI
== ABI_V4
30489 && ! info
->lr_save_p
30490 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
) > 0);
30491 if (save_LR_around_toc_setup
)
30493 rtx lr
= gen_rtx_REG (Pmode
, LR_REGNO
);
30494 rtx tmp
= gen_rtx_REG (Pmode
, 12);
30497 insn
= emit_move_insn (tmp
, lr
);
30498 RTX_FRAME_RELATED_P (insn
) = 1;
30500 rs6000_emit_load_toc_table (TRUE
);
30502 insn
= emit_move_insn (lr
, tmp
);
30503 add_reg_note (insn
, REG_CFA_RESTORE
, lr
);
30504 RTX_FRAME_RELATED_P (insn
) = 1;
30507 rs6000_emit_load_toc_table (TRUE
);
30511 if (!TARGET_SINGLE_PIC_BASE
30512 && DEFAULT_ABI
== ABI_DARWIN
30513 && flag_pic
&& crtl
->uses_pic_offset_table
)
30515 rtx lr
= gen_rtx_REG (Pmode
, LR_REGNO
);
30516 rtx src
= gen_rtx_SYMBOL_REF (Pmode
, MACHOPIC_FUNCTION_BASE_NAME
);
30518 /* Save and restore LR locally around this call (in R0). */
30519 if (!info
->lr_save_p
)
30520 emit_move_insn (gen_rtx_REG (Pmode
, 0), lr
);
30522 emit_insn (gen_load_macho_picbase (src
));
30524 emit_move_insn (gen_rtx_REG (Pmode
,
30525 RS6000_PIC_OFFSET_TABLE_REGNUM
),
30528 if (!info
->lr_save_p
)
30529 emit_move_insn (lr
, gen_rtx_REG (Pmode
, 0));
30533 /* If we need to, save the TOC register after doing the stack setup.
30534 Do not emit eh frame info for this save. The unwinder wants info,
30535 conceptually attached to instructions in this function, about
30536 register values in the caller of this function. This R2 may have
30537 already been changed from the value in the caller.
30538 We don't attempt to write accurate DWARF EH frame info for R2
30539 because code emitted by gcc for a (non-pointer) function call
30540 doesn't save and restore R2. Instead, R2 is managed out-of-line
30541 by a linker generated plt call stub when the function resides in
30542 a shared library. This behavior is costly to describe in DWARF,
30543 both in terms of the size of DWARF info and the time taken in the
30544 unwinder to interpret it. R2 changes, apart from the
30545 calls_eh_return case earlier in this function, are handled by
30546 linux-unwind.h frob_update_context. */
30547 if (rs6000_save_toc_in_prologue_p ())
30549 rtx reg
= gen_rtx_REG (reg_mode
, TOC_REGNUM
);
30550 emit_insn (gen_frame_store (reg
, sp_reg_rtx
, RS6000_TOC_SAVE_SLOT
));
30553 if (using_split_stack
&& split_stack_arg_pointer_used_p ())
30555 /* Set up the arg pointer (r12) for -fsplit-stack code. If
30556 __morestack was called, it left the arg pointer to the old
30557 stack in r29. Otherwise, the arg pointer is the top of the
30559 cfun
->machine
->split_stack_argp_used
= true;
30562 rtx r12
= gen_rtx_REG (Pmode
, 12);
30563 rtx set_r12
= gen_rtx_SET (r12
, sp_reg_rtx
);
30564 emit_insn_before (set_r12
, sp_adjust
);
30566 else if (frame_off
!= 0 || REGNO (frame_reg_rtx
) != 12)
30568 rtx r12
= gen_rtx_REG (Pmode
, 12);
30569 if (frame_off
== 0)
30570 emit_move_insn (r12
, frame_reg_rtx
);
30572 emit_insn (gen_add3_insn (r12
, frame_reg_rtx
, GEN_INT (frame_off
)));
30576 rtx r12
= gen_rtx_REG (Pmode
, 12);
30577 rtx r29
= gen_rtx_REG (Pmode
, 29);
30578 rtx cr7
= gen_rtx_REG (CCUNSmode
, CR7_REGNO
);
30579 rtx not_more
= gen_label_rtx ();
30582 jump
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
30583 gen_rtx_GEU (VOIDmode
, cr7
, const0_rtx
),
30584 gen_rtx_LABEL_REF (VOIDmode
, not_more
),
30586 jump
= emit_jump_insn (gen_rtx_SET (pc_rtx
, jump
));
30587 JUMP_LABEL (jump
) = not_more
;
30588 LABEL_NUSES (not_more
) += 1;
30589 emit_move_insn (r12
, r29
);
30590 emit_label (not_more
);
30595 /* Output .extern statements for the save/restore routines we use. */
30598 rs6000_output_savres_externs (FILE *file
)
30600 rs6000_stack_t
*info
= rs6000_stack_info ();
30602 if (TARGET_DEBUG_STACK
)
30603 debug_stack_info (info
);
30605 /* Write .extern for any function we will call to save and restore
30607 if (info
->first_fp_reg_save
< 64
30612 int regno
= info
->first_fp_reg_save
- 32;
30614 if ((info
->savres_strategy
& SAVE_INLINE_FPRS
) == 0)
30616 bool lr
= (info
->savres_strategy
& SAVE_NOINLINE_FPRS_SAVES_LR
) != 0;
30617 int sel
= SAVRES_SAVE
| SAVRES_FPR
| (lr
? SAVRES_LR
: 0);
30618 name
= rs6000_savres_routine_name (info
, regno
, sel
);
30619 fprintf (file
, "\t.extern %s\n", name
);
30621 if ((info
->savres_strategy
& REST_INLINE_FPRS
) == 0)
30623 bool lr
= (info
->savres_strategy
30624 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR
) == 0;
30625 int sel
= SAVRES_FPR
| (lr
? SAVRES_LR
: 0);
30626 name
= rs6000_savres_routine_name (info
, regno
, sel
);
30627 fprintf (file
, "\t.extern %s\n", name
);
30632 /* Write function prologue. */
30635 rs6000_output_function_prologue (FILE *file
)
30637 if (!cfun
->is_thunk
)
30638 rs6000_output_savres_externs (file
);
30640 /* ELFv2 ABI r2 setup code and local entry point. This must follow
30641 immediately after the global entry point label. */
30642 if (rs6000_global_entry_point_needed_p ())
30644 const char *name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
30646 (*targetm
.asm_out
.internal_label
) (file
, "LCF", rs6000_pic_labelno
);
30648 if (TARGET_CMODEL
!= CMODEL_LARGE
)
30650 /* In the small and medium code models, we assume the TOC is less
30651 2 GB away from the text section, so it can be computed via the
30652 following two-instruction sequence. */
30655 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
30656 fprintf (file
, "0:\taddis 2,12,.TOC.-");
30657 assemble_name (file
, buf
);
30658 fprintf (file
, "@ha\n");
30659 fprintf (file
, "\taddi 2,2,.TOC.-");
30660 assemble_name (file
, buf
);
30661 fprintf (file
, "@l\n");
30665 /* In the large code model, we allow arbitrary offsets between the
30666 TOC and the text section, so we have to load the offset from
30667 memory. The data field is emitted directly before the global
30668 entry point in rs6000_elf_declare_function_name. */
30671 #ifdef HAVE_AS_ENTRY_MARKERS
30672 /* If supported by the linker, emit a marker relocation. If the
30673 total code size of the final executable or shared library
30674 happens to fit into 2 GB after all, the linker will replace
30675 this code sequence with the sequence for the small or medium
30677 fprintf (file
, "\t.reloc .,R_PPC64_ENTRY\n");
30679 fprintf (file
, "\tld 2,");
30680 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCL", rs6000_pic_labelno
);
30681 assemble_name (file
, buf
);
30682 fprintf (file
, "-");
30683 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
30684 assemble_name (file
, buf
);
30685 fprintf (file
, "(12)\n");
30686 fprintf (file
, "\tadd 2,2,12\n");
30689 fputs ("\t.localentry\t", file
);
30690 assemble_name (file
, name
);
30691 fputs (",.-", file
);
30692 assemble_name (file
, name
);
30693 fputs ("\n", file
);
30696 /* Output -mprofile-kernel code. This needs to be done here instead of
30697 in output_function_profile since it must go after the ELFv2 ABI
30698 local entry point. */
30699 if (TARGET_PROFILE_KERNEL
&& crtl
->profile
)
30701 gcc_assert (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
);
30702 gcc_assert (!TARGET_32BIT
);
30704 asm_fprintf (file
, "\tmflr %s\n", reg_names
[0]);
30706 /* In the ELFv2 ABI we have no compiler stack word. It must be
30707 the resposibility of _mcount to preserve the static chain
30708 register if required. */
30709 if (DEFAULT_ABI
!= ABI_ELFv2
30710 && cfun
->static_chain_decl
!= NULL
)
30712 asm_fprintf (file
, "\tstd %s,24(%s)\n",
30713 reg_names
[STATIC_CHAIN_REGNUM
], reg_names
[1]);
30714 fprintf (file
, "\tbl %s\n", RS6000_MCOUNT
);
30715 asm_fprintf (file
, "\tld %s,24(%s)\n",
30716 reg_names
[STATIC_CHAIN_REGNUM
], reg_names
[1]);
30719 fprintf (file
, "\tbl %s\n", RS6000_MCOUNT
);
30722 rs6000_pic_labelno
++;
30725 /* -mprofile-kernel code calls mcount before the function prolog,
30726 so a profiled leaf function should stay a leaf function. */
30728 rs6000_keep_leaf_when_profiled ()
30730 return TARGET_PROFILE_KERNEL
;
30733 /* Non-zero if vmx regs are restored before the frame pop, zero if
30734 we restore after the pop when possible. */
30735 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
30737 /* Restoring cr is a two step process: loading a reg from the frame
30738 save, then moving the reg to cr. For ABI_V4 we must let the
30739 unwinder know that the stack location is no longer valid at or
30740 before the stack deallocation, but we can't emit a cfa_restore for
30741 cr at the stack deallocation like we do for other registers.
30742 The trouble is that it is possible for the move to cr to be
30743 scheduled after the stack deallocation. So say exactly where cr
30744 is located on each of the two insns. */
30747 load_cr_save (int regno
, rtx frame_reg_rtx
, int offset
, bool exit_func
)
30749 rtx mem
= gen_frame_mem_offset (SImode
, frame_reg_rtx
, offset
);
30750 rtx reg
= gen_rtx_REG (SImode
, regno
);
30751 rtx_insn
*insn
= emit_move_insn (reg
, mem
);
30753 if (!exit_func
&& DEFAULT_ABI
== ABI_V4
)
30755 rtx cr
= gen_rtx_REG (SImode
, CR2_REGNO
);
30756 rtx set
= gen_rtx_SET (reg
, cr
);
30758 add_reg_note (insn
, REG_CFA_REGISTER
, set
);
30759 RTX_FRAME_RELATED_P (insn
) = 1;
30764 /* Reload CR from REG. */
30767 restore_saved_cr (rtx reg
, int using_mfcr_multiple
, bool exit_func
)
30772 if (using_mfcr_multiple
)
30774 for (i
= 0; i
< 8; i
++)
30775 if (save_reg_p (CR0_REGNO
+ i
))
30777 gcc_assert (count
);
30780 if (using_mfcr_multiple
&& count
> 1)
30786 p
= rtvec_alloc (count
);
30789 for (i
= 0; i
< 8; i
++)
30790 if (save_reg_p (CR0_REGNO
+ i
))
30792 rtvec r
= rtvec_alloc (2);
30793 RTVEC_ELT (r
, 0) = reg
;
30794 RTVEC_ELT (r
, 1) = GEN_INT (1 << (7-i
));
30795 RTVEC_ELT (p
, ndx
) =
30796 gen_rtx_SET (gen_rtx_REG (CCmode
, CR0_REGNO
+ i
),
30797 gen_rtx_UNSPEC (CCmode
, r
, UNSPEC_MOVESI_TO_CR
));
30800 insn
= emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
30801 gcc_assert (ndx
== count
);
30803 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
30804 CR field separately. */
30805 if (!exit_func
&& DEFAULT_ABI
== ABI_ELFv2
&& flag_shrink_wrap
)
30807 for (i
= 0; i
< 8; i
++)
30808 if (save_reg_p (CR0_REGNO
+ i
))
30809 add_reg_note (insn
, REG_CFA_RESTORE
,
30810 gen_rtx_REG (SImode
, CR0_REGNO
+ i
));
30812 RTX_FRAME_RELATED_P (insn
) = 1;
30816 for (i
= 0; i
< 8; i
++)
30817 if (save_reg_p (CR0_REGNO
+ i
))
30819 rtx insn
= emit_insn (gen_movsi_to_cr_one
30820 (gen_rtx_REG (CCmode
, CR0_REGNO
+ i
), reg
));
30822 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
30823 CR field separately, attached to the insn that in fact
30824 restores this particular CR field. */
30825 if (!exit_func
&& DEFAULT_ABI
== ABI_ELFv2
&& flag_shrink_wrap
)
30827 add_reg_note (insn
, REG_CFA_RESTORE
,
30828 gen_rtx_REG (SImode
, CR0_REGNO
+ i
));
30830 RTX_FRAME_RELATED_P (insn
) = 1;
30834 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
30835 if (!exit_func
&& DEFAULT_ABI
!= ABI_ELFv2
30836 && (DEFAULT_ABI
== ABI_V4
|| flag_shrink_wrap
))
30838 rtx_insn
*insn
= get_last_insn ();
30839 rtx cr
= gen_rtx_REG (SImode
, CR2_REGNO
);
30841 add_reg_note (insn
, REG_CFA_RESTORE
, cr
);
30842 RTX_FRAME_RELATED_P (insn
) = 1;
30846 /* Like cr, the move to lr instruction can be scheduled after the
30847 stack deallocation, but unlike cr, its stack frame save is still
30848 valid. So we only need to emit the cfa_restore on the correct
30852 load_lr_save (int regno
, rtx frame_reg_rtx
, int offset
)
30854 rtx mem
= gen_frame_mem_offset (Pmode
, frame_reg_rtx
, offset
);
30855 rtx reg
= gen_rtx_REG (Pmode
, regno
);
30857 emit_move_insn (reg
, mem
);
30861 restore_saved_lr (int regno
, bool exit_func
)
30863 rtx reg
= gen_rtx_REG (Pmode
, regno
);
30864 rtx lr
= gen_rtx_REG (Pmode
, LR_REGNO
);
30865 rtx_insn
*insn
= emit_move_insn (lr
, reg
);
30867 if (!exit_func
&& flag_shrink_wrap
)
30869 add_reg_note (insn
, REG_CFA_RESTORE
, lr
);
30870 RTX_FRAME_RELATED_P (insn
) = 1;
30875 add_crlr_cfa_restore (const rs6000_stack_t
*info
, rtx cfa_restores
)
30877 if (DEFAULT_ABI
== ABI_ELFv2
)
30880 for (i
= 0; i
< 8; i
++)
30881 if (save_reg_p (CR0_REGNO
+ i
))
30883 rtx cr
= gen_rtx_REG (SImode
, CR0_REGNO
+ i
);
30884 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, cr
,
30888 else if (info
->cr_save_p
)
30889 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
,
30890 gen_rtx_REG (SImode
, CR2_REGNO
),
30893 if (info
->lr_save_p
)
30894 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
,
30895 gen_rtx_REG (Pmode
, LR_REGNO
),
30897 return cfa_restores
;
30900 /* Return true if OFFSET from stack pointer can be clobbered by signals.
30901 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
30902 below stack pointer not cloberred by signals. */
30905 offset_below_red_zone_p (HOST_WIDE_INT offset
)
30907 return offset
< (DEFAULT_ABI
== ABI_V4
30909 : TARGET_32BIT
? -220 : -288);
30912 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
30915 emit_cfa_restores (rtx cfa_restores
)
30917 rtx_insn
*insn
= get_last_insn ();
30918 rtx
*loc
= ®_NOTES (insn
);
30921 loc
= &XEXP (*loc
, 1);
30922 *loc
= cfa_restores
;
30923 RTX_FRAME_RELATED_P (insn
) = 1;
30926 /* Emit function epilogue as insns. */
30929 rs6000_emit_epilogue (int sibcall
)
30931 rs6000_stack_t
*info
;
30932 int restoring_GPRs_inline
;
30933 int restoring_FPRs_inline
;
30934 int using_load_multiple
;
30935 int using_mtcr_multiple
;
30936 int use_backchain_to_restore_sp
;
30939 HOST_WIDE_INT frame_off
= 0;
30940 rtx sp_reg_rtx
= gen_rtx_REG (Pmode
, 1);
30941 rtx frame_reg_rtx
= sp_reg_rtx
;
30942 rtx cfa_restores
= NULL_RTX
;
30944 rtx cr_save_reg
= NULL_RTX
;
30945 machine_mode reg_mode
= Pmode
;
30946 int reg_size
= TARGET_32BIT
? 4 : 8;
30947 machine_mode fp_reg_mode
= (TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
)
30949 int fp_reg_size
= 8;
30952 unsigned ptr_regno
;
30954 info
= rs6000_stack_info ();
30956 if (TARGET_SPE_ABI
&& info
->spe_64bit_regs_used
!= 0)
30958 reg_mode
= V2SImode
;
30962 strategy
= info
->savres_strategy
;
30963 using_load_multiple
= strategy
& REST_MULTIPLE
;
30964 restoring_FPRs_inline
= sibcall
|| (strategy
& REST_INLINE_FPRS
);
30965 restoring_GPRs_inline
= sibcall
|| (strategy
& REST_INLINE_GPRS
);
30966 using_mtcr_multiple
= (rs6000_cpu
== PROCESSOR_PPC601
30967 || rs6000_cpu
== PROCESSOR_PPC603
30968 || rs6000_cpu
== PROCESSOR_PPC750
30970 /* Restore via the backchain when we have a large frame, since this
30971 is more efficient than an addis, addi pair. The second condition
30972 here will not trigger at the moment; We don't actually need a
30973 frame pointer for alloca, but the generic parts of the compiler
30974 give us one anyway. */
30975 use_backchain_to_restore_sp
= (info
->total_size
+ (info
->lr_save_p
30976 ? info
->lr_save_offset
30978 || (cfun
->calls_alloca
30979 && !frame_pointer_needed
));
30980 restore_lr
= (info
->lr_save_p
30981 && (restoring_FPRs_inline
30982 || (strategy
& REST_NOINLINE_FPRS_DOESNT_RESTORE_LR
))
30983 && (restoring_GPRs_inline
30984 || info
->first_fp_reg_save
< 64)
30985 && !cfun
->machine
->lr_is_wrapped_separately
);
30988 if (WORLD_SAVE_P (info
))
30992 const char *alloc_rname
;
30995 /* eh_rest_world_r10 will return to the location saved in the LR
30996 stack slot (which is not likely to be our caller.)
30997 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
30998 rest_world is similar, except any R10 parameter is ignored.
30999 The exception-handling stuff that was here in 2.95 is no
31000 longer necessary. */
31003 + 32 - info
->first_gp_reg_save
31004 + LAST_ALTIVEC_REGNO
+ 1 - info
->first_altivec_reg_save
31005 + 63 + 1 - info
->first_fp_reg_save
);
31007 strcpy (rname
, ((crtl
->calls_eh_return
) ?
31008 "*eh_rest_world_r10" : "*rest_world"));
31009 alloc_rname
= ggc_strdup (rname
);
31012 RTVEC_ELT (p
, j
++) = ret_rtx
;
31014 = gen_rtx_USE (VOIDmode
, gen_rtx_SYMBOL_REF (Pmode
, alloc_rname
));
31015 /* The instruction pattern requires a clobber here;
31016 it is shared with the restVEC helper. */
31018 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (Pmode
, 11));
31021 /* CR register traditionally saved as CR2. */
31022 rtx reg
= gen_rtx_REG (SImode
, CR2_REGNO
);
31024 = gen_frame_load (reg
, frame_reg_rtx
, info
->cr_save_offset
);
31025 if (flag_shrink_wrap
)
31027 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
,
31028 gen_rtx_REG (Pmode
, LR_REGNO
),
31030 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
, cfa_restores
);
31034 for (i
= 0; i
< 32 - info
->first_gp_reg_save
; i
++)
31036 rtx reg
= gen_rtx_REG (reg_mode
, info
->first_gp_reg_save
+ i
);
31038 = gen_frame_load (reg
,
31039 frame_reg_rtx
, info
->gp_save_offset
+ reg_size
* i
);
31040 if (flag_shrink_wrap
)
31041 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
, cfa_restores
);
31043 for (i
= 0; info
->first_altivec_reg_save
+ i
<= LAST_ALTIVEC_REGNO
; i
++)
31045 rtx reg
= gen_rtx_REG (V4SImode
, info
->first_altivec_reg_save
+ i
);
31047 = gen_frame_load (reg
,
31048 frame_reg_rtx
, info
->altivec_save_offset
+ 16 * i
);
31049 if (flag_shrink_wrap
)
31050 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
, cfa_restores
);
31052 for (i
= 0; info
->first_fp_reg_save
+ i
<= 63; i
++)
31054 rtx reg
= gen_rtx_REG ((TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
31055 ? DFmode
: SFmode
),
31056 info
->first_fp_reg_save
+ i
);
31058 = gen_frame_load (reg
, frame_reg_rtx
, info
->fp_save_offset
+ 8 * i
);
31059 if (flag_shrink_wrap
)
31060 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
, cfa_restores
);
31063 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (Pmode
, 0));
31065 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (SImode
, 12));
31067 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (SImode
, 7));
31069 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (SImode
, 8));
31071 = gen_rtx_USE (VOIDmode
, gen_rtx_REG (SImode
, 10));
31072 insn
= emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
31074 if (flag_shrink_wrap
)
31076 REG_NOTES (insn
) = cfa_restores
;
31077 add_reg_note (insn
, REG_CFA_DEF_CFA
, sp_reg_rtx
);
31078 RTX_FRAME_RELATED_P (insn
) = 1;
31083 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
31085 frame_off
= info
->total_size
;
31087 /* Restore AltiVec registers if we must do so before adjusting the
31089 if (info
->altivec_size
!= 0
31090 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
31091 || (DEFAULT_ABI
!= ABI_V4
31092 && offset_below_red_zone_p (info
->altivec_save_offset
))))
31095 int scratch_regno
= ptr_regno_for_savres (SAVRES_VR
);
31097 gcc_checking_assert (scratch_regno
== 11 || scratch_regno
== 12);
31098 if (use_backchain_to_restore_sp
)
31100 int frame_regno
= 11;
31102 if ((strategy
& REST_INLINE_VRS
) == 0)
31104 /* Of r11 and r12, select the one not clobbered by an
31105 out-of-line restore function for the frame register. */
31106 frame_regno
= 11 + 12 - scratch_regno
;
31108 frame_reg_rtx
= gen_rtx_REG (Pmode
, frame_regno
);
31109 emit_move_insn (frame_reg_rtx
,
31110 gen_rtx_MEM (Pmode
, sp_reg_rtx
));
31113 else if (frame_pointer_needed
)
31114 frame_reg_rtx
= hard_frame_pointer_rtx
;
31116 if ((strategy
& REST_INLINE_VRS
) == 0)
31118 int end_save
= info
->altivec_save_offset
+ info
->altivec_size
;
31120 rtx ptr_reg
= gen_rtx_REG (Pmode
, 0);
31121 rtx scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
31123 if (end_save
+ frame_off
!= 0)
31125 rtx offset
= GEN_INT (end_save
+ frame_off
);
31127 emit_insn (gen_add3_insn (ptr_reg
, frame_reg_rtx
, offset
));
31130 emit_move_insn (ptr_reg
, frame_reg_rtx
);
31132 ptr_off
= -end_save
;
31133 insn
= rs6000_emit_savres_rtx (info
, scratch_reg
,
31134 info
->altivec_save_offset
+ ptr_off
,
31135 0, V4SImode
, SAVRES_VR
);
31139 for (i
= info
->first_altivec_reg_save
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
31140 if (info
->vrsave_mask
& ALTIVEC_REG_BIT (i
))
31142 rtx addr
, areg
, mem
, insn
;
31143 rtx reg
= gen_rtx_REG (V4SImode
, i
);
31144 HOST_WIDE_INT offset
31145 = (info
->altivec_save_offset
+ frame_off
31146 + 16 * (i
- info
->first_altivec_reg_save
));
31148 if (TARGET_P9_DFORM_VECTOR
&& quad_address_offset_p (offset
))
31150 mem
= gen_frame_mem (V4SImode
,
31151 gen_rtx_PLUS (Pmode
, frame_reg_rtx
,
31152 GEN_INT (offset
)));
31153 insn
= gen_rtx_SET (reg
, mem
);
31157 areg
= gen_rtx_REG (Pmode
, 0);
31158 emit_move_insn (areg
, GEN_INT (offset
));
31160 /* AltiVec addressing mode is [reg+reg]. */
31161 addr
= gen_rtx_PLUS (Pmode
, frame_reg_rtx
, areg
);
31162 mem
= gen_frame_mem (V4SImode
, addr
);
31164 /* Rather than emitting a generic move, force use of the
31165 lvx instruction, which we always want. In particular we
31166 don't want lxvd2x/xxpermdi for little endian. */
31167 insn
= gen_altivec_lvx_v4si_internal (reg
, mem
);
31170 (void) emit_insn (insn
);
31174 for (i
= info
->first_altivec_reg_save
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
31175 if (((strategy
& REST_INLINE_VRS
) == 0
31176 || (info
->vrsave_mask
& ALTIVEC_REG_BIT (i
)) != 0)
31177 && (flag_shrink_wrap
31178 || (offset_below_red_zone_p
31179 (info
->altivec_save_offset
31180 + 16 * (i
- info
->first_altivec_reg_save
)))))
31182 rtx reg
= gen_rtx_REG (V4SImode
, i
);
31183 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
, cfa_restores
);
31187 /* Restore VRSAVE if we must do so before adjusting the stack. */
31188 if (info
->vrsave_size
!= 0
31189 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
31190 || (DEFAULT_ABI
!= ABI_V4
31191 && offset_below_red_zone_p (info
->vrsave_save_offset
))))
31195 if (frame_reg_rtx
== sp_reg_rtx
)
31197 if (use_backchain_to_restore_sp
)
31199 frame_reg_rtx
= gen_rtx_REG (Pmode
, 11);
31200 emit_move_insn (frame_reg_rtx
,
31201 gen_rtx_MEM (Pmode
, sp_reg_rtx
));
31204 else if (frame_pointer_needed
)
31205 frame_reg_rtx
= hard_frame_pointer_rtx
;
31208 reg
= gen_rtx_REG (SImode
, 12);
31209 emit_insn (gen_frame_load (reg
, frame_reg_rtx
,
31210 info
->vrsave_save_offset
+ frame_off
));
31212 emit_insn (generate_set_vrsave (reg
, info
, 1));
31216 /* If we have a large stack frame, restore the old stack pointer
31217 using the backchain. */
31218 if (use_backchain_to_restore_sp
)
31220 if (frame_reg_rtx
== sp_reg_rtx
)
31222 /* Under V.4, don't reset the stack pointer until after we're done
31223 loading the saved registers. */
31224 if (DEFAULT_ABI
== ABI_V4
)
31225 frame_reg_rtx
= gen_rtx_REG (Pmode
, 11);
31227 insn
= emit_move_insn (frame_reg_rtx
,
31228 gen_rtx_MEM (Pmode
, sp_reg_rtx
));
31231 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
31232 && DEFAULT_ABI
== ABI_V4
)
31233 /* frame_reg_rtx has been set up by the altivec restore. */
31237 insn
= emit_move_insn (sp_reg_rtx
, frame_reg_rtx
);
31238 frame_reg_rtx
= sp_reg_rtx
;
31241 /* If we have a frame pointer, we can restore the old stack pointer
31243 else if (frame_pointer_needed
)
31245 frame_reg_rtx
= sp_reg_rtx
;
31246 if (DEFAULT_ABI
== ABI_V4
)
31247 frame_reg_rtx
= gen_rtx_REG (Pmode
, 11);
31248 /* Prevent reordering memory accesses against stack pointer restore. */
31249 else if (cfun
->calls_alloca
31250 || offset_below_red_zone_p (-info
->total_size
))
31251 rs6000_emit_stack_tie (frame_reg_rtx
, true);
31253 insn
= emit_insn (gen_add3_insn (frame_reg_rtx
, hard_frame_pointer_rtx
,
31254 GEN_INT (info
->total_size
)));
31257 else if (info
->push_p
31258 && DEFAULT_ABI
!= ABI_V4
31259 && !crtl
->calls_eh_return
)
31261 /* Prevent reordering memory accesses against stack pointer restore. */
31262 if (cfun
->calls_alloca
31263 || offset_below_red_zone_p (-info
->total_size
))
31264 rs6000_emit_stack_tie (frame_reg_rtx
, false);
31265 insn
= emit_insn (gen_add3_insn (sp_reg_rtx
, sp_reg_rtx
,
31266 GEN_INT (info
->total_size
)));
31269 if (insn
&& frame_reg_rtx
== sp_reg_rtx
)
31273 REG_NOTES (insn
) = cfa_restores
;
31274 cfa_restores
= NULL_RTX
;
31276 add_reg_note (insn
, REG_CFA_DEF_CFA
, sp_reg_rtx
);
31277 RTX_FRAME_RELATED_P (insn
) = 1;
31280 /* Restore AltiVec registers if we have not done so already. */
31281 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
31282 && info
->altivec_size
!= 0
31283 && (DEFAULT_ABI
== ABI_V4
31284 || !offset_below_red_zone_p (info
->altivec_save_offset
)))
31288 if ((strategy
& REST_INLINE_VRS
) == 0)
31290 int end_save
= info
->altivec_save_offset
+ info
->altivec_size
;
31292 rtx ptr_reg
= gen_rtx_REG (Pmode
, 0);
31293 int scratch_regno
= ptr_regno_for_savres (SAVRES_VR
);
31294 rtx scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
31296 if (end_save
+ frame_off
!= 0)
31298 rtx offset
= GEN_INT (end_save
+ frame_off
);
31300 emit_insn (gen_add3_insn (ptr_reg
, frame_reg_rtx
, offset
));
31303 emit_move_insn (ptr_reg
, frame_reg_rtx
);
31305 ptr_off
= -end_save
;
31306 insn
= rs6000_emit_savres_rtx (info
, scratch_reg
,
31307 info
->altivec_save_offset
+ ptr_off
,
31308 0, V4SImode
, SAVRES_VR
);
31309 if (REGNO (frame_reg_rtx
) == REGNO (scratch_reg
))
31311 /* Frame reg was clobbered by out-of-line save. Restore it
31312 from ptr_reg, and if we are calling out-of-line gpr or
31313 fpr restore set up the correct pointer and offset. */
31314 unsigned newptr_regno
= 1;
31315 if (!restoring_GPRs_inline
)
31317 bool lr
= info
->gp_save_offset
+ info
->gp_size
== 0;
31318 int sel
= SAVRES_GPR
| (lr
? SAVRES_LR
: 0);
31319 newptr_regno
= ptr_regno_for_savres (sel
);
31320 end_save
= info
->gp_save_offset
+ info
->gp_size
;
31322 else if (!restoring_FPRs_inline
)
31324 bool lr
= !(strategy
& REST_NOINLINE_FPRS_DOESNT_RESTORE_LR
);
31325 int sel
= SAVRES_FPR
| (lr
? SAVRES_LR
: 0);
31326 newptr_regno
= ptr_regno_for_savres (sel
);
31327 end_save
= info
->fp_save_offset
+ info
->fp_size
;
31330 if (newptr_regno
!= 1 && REGNO (frame_reg_rtx
) != newptr_regno
)
31331 frame_reg_rtx
= gen_rtx_REG (Pmode
, newptr_regno
);
31333 if (end_save
+ ptr_off
!= 0)
31335 rtx offset
= GEN_INT (end_save
+ ptr_off
);
31337 frame_off
= -end_save
;
31339 emit_insn (gen_addsi3_carry (frame_reg_rtx
,
31342 emit_insn (gen_adddi3_carry (frame_reg_rtx
,
31347 frame_off
= ptr_off
;
31348 emit_move_insn (frame_reg_rtx
, ptr_reg
);
31354 for (i
= info
->first_altivec_reg_save
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
31355 if (info
->vrsave_mask
& ALTIVEC_REG_BIT (i
))
31357 rtx addr
, areg
, mem
, insn
;
31358 rtx reg
= gen_rtx_REG (V4SImode
, i
);
31359 HOST_WIDE_INT offset
31360 = (info
->altivec_save_offset
+ frame_off
31361 + 16 * (i
- info
->first_altivec_reg_save
));
31363 if (TARGET_P9_DFORM_VECTOR
&& quad_address_offset_p (offset
))
31365 mem
= gen_frame_mem (V4SImode
,
31366 gen_rtx_PLUS (Pmode
, frame_reg_rtx
,
31367 GEN_INT (offset
)));
31368 insn
= gen_rtx_SET (reg
, mem
);
31372 areg
= gen_rtx_REG (Pmode
, 0);
31373 emit_move_insn (areg
, GEN_INT (offset
));
31375 /* AltiVec addressing mode is [reg+reg]. */
31376 addr
= gen_rtx_PLUS (Pmode
, frame_reg_rtx
, areg
);
31377 mem
= gen_frame_mem (V4SImode
, addr
);
31379 /* Rather than emitting a generic move, force use of the
31380 lvx instruction, which we always want. In particular we
31381 don't want lxvd2x/xxpermdi for little endian. */
31382 insn
= gen_altivec_lvx_v4si_internal (reg
, mem
);
31385 (void) emit_insn (insn
);
31389 for (i
= info
->first_altivec_reg_save
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
31390 if (((strategy
& REST_INLINE_VRS
) == 0
31391 || (info
->vrsave_mask
& ALTIVEC_REG_BIT (i
)) != 0)
31392 && (DEFAULT_ABI
== ABI_V4
|| flag_shrink_wrap
))
31394 rtx reg
= gen_rtx_REG (V4SImode
, i
);
31395 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
, cfa_restores
);
31399 /* Restore VRSAVE if we have not done so already. */
31400 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
31401 && info
->vrsave_size
!= 0
31402 && (DEFAULT_ABI
== ABI_V4
31403 || !offset_below_red_zone_p (info
->vrsave_save_offset
)))
31407 reg
= gen_rtx_REG (SImode
, 12);
31408 emit_insn (gen_frame_load (reg
, frame_reg_rtx
,
31409 info
->vrsave_save_offset
+ frame_off
));
31411 emit_insn (generate_set_vrsave (reg
, info
, 1));
31414 /* If we exit by an out-of-line restore function on ABI_V4 then that
31415 function will deallocate the stack, so we don't need to worry
31416 about the unwinder restoring cr from an invalid stack frame
31418 exit_func
= (!restoring_FPRs_inline
31419 || (!restoring_GPRs_inline
31420 && info
->first_fp_reg_save
== 64));
31422 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
31423 *separate* slots if the routine calls __builtin_eh_return, so
31424 that they can be independently restored by the unwinder. */
31425 if (DEFAULT_ABI
== ABI_ELFv2
&& crtl
->calls_eh_return
)
31427 int i
, cr_off
= info
->ehcr_offset
;
31429 for (i
= 0; i
< 8; i
++)
31430 if (!call_used_regs
[CR0_REGNO
+ i
])
31432 rtx reg
= gen_rtx_REG (SImode
, 0);
31433 emit_insn (gen_frame_load (reg
, frame_reg_rtx
,
31434 cr_off
+ frame_off
));
31436 insn
= emit_insn (gen_movsi_to_cr_one
31437 (gen_rtx_REG (CCmode
, CR0_REGNO
+ i
), reg
));
31439 if (!exit_func
&& flag_shrink_wrap
)
31441 add_reg_note (insn
, REG_CFA_RESTORE
,
31442 gen_rtx_REG (SImode
, CR0_REGNO
+ i
));
31444 RTX_FRAME_RELATED_P (insn
) = 1;
31447 cr_off
+= reg_size
;
31451 /* Get the old lr if we saved it. If we are restoring registers
31452 out-of-line, then the out-of-line routines can do this for us. */
31453 if (restore_lr
&& restoring_GPRs_inline
)
31454 load_lr_save (0, frame_reg_rtx
, info
->lr_save_offset
+ frame_off
);
31456 /* Get the old cr if we saved it. */
31457 if (info
->cr_save_p
)
31459 unsigned cr_save_regno
= 12;
31461 if (!restoring_GPRs_inline
)
31463 /* Ensure we don't use the register used by the out-of-line
31464 gpr register restore below. */
31465 bool lr
= info
->gp_save_offset
+ info
->gp_size
== 0;
31466 int sel
= SAVRES_GPR
| (lr
? SAVRES_LR
: 0);
31467 int gpr_ptr_regno
= ptr_regno_for_savres (sel
);
31469 if (gpr_ptr_regno
== 12)
31470 cr_save_regno
= 11;
31471 gcc_checking_assert (REGNO (frame_reg_rtx
) != cr_save_regno
);
31473 else if (REGNO (frame_reg_rtx
) == 12)
31474 cr_save_regno
= 11;
31476 cr_save_reg
= load_cr_save (cr_save_regno
, frame_reg_rtx
,
31477 info
->cr_save_offset
+ frame_off
,
31481 /* Set LR here to try to overlap restores below. */
31482 if (restore_lr
&& restoring_GPRs_inline
)
31483 restore_saved_lr (0, exit_func
);
31485 /* Load exception handler data registers, if needed. */
31486 if (crtl
->calls_eh_return
)
31488 unsigned int i
, regno
;
31492 rtx reg
= gen_rtx_REG (reg_mode
, 2);
31493 emit_insn (gen_frame_load (reg
, frame_reg_rtx
,
31494 frame_off
+ RS6000_TOC_SAVE_SLOT
));
31501 regno
= EH_RETURN_DATA_REGNO (i
);
31502 if (regno
== INVALID_REGNUM
)
31505 /* Note: possible use of r0 here to address SPE regs. */
31506 mem
= gen_frame_mem_offset (reg_mode
, frame_reg_rtx
,
31507 info
->ehrd_offset
+ frame_off
31508 + reg_size
* (int) i
);
31510 emit_move_insn (gen_rtx_REG (reg_mode
, regno
), mem
);
31514 /* Restore GPRs. This is done as a PARALLEL if we are using
31515 the load-multiple instructions. */
31517 && info
->spe_64bit_regs_used
31518 && info
->first_gp_reg_save
!= 32)
31520 /* Determine whether we can address all of the registers that need
31521 to be saved with an offset from frame_reg_rtx that fits in
31522 the small const field for SPE memory instructions. */
31523 int spe_regs_addressable
31524 = (SPE_CONST_OFFSET_OK (info
->spe_gp_save_offset
+ frame_off
31525 + reg_size
* (32 - info
->first_gp_reg_save
- 1))
31526 && restoring_GPRs_inline
);
31528 if (!spe_regs_addressable
)
31530 int ool_adjust
= 0;
31531 rtx old_frame_reg_rtx
= frame_reg_rtx
;
31532 /* Make r11 point to the start of the SPE save area. We worried about
31533 not clobbering it when we were saving registers in the prologue.
31534 There's no need to worry here because the static chain is passed
31535 anew to every function. */
31537 if (!restoring_GPRs_inline
)
31538 ool_adjust
= 8 * (info
->first_gp_reg_save
- FIRST_SAVED_GP_REGNO
);
31539 frame_reg_rtx
= gen_rtx_REG (Pmode
, 11);
31540 emit_insn (gen_addsi3 (frame_reg_rtx
, old_frame_reg_rtx
,
31541 GEN_INT (info
->spe_gp_save_offset
31544 /* Keep the invariant that frame_reg_rtx + frame_off points
31545 at the top of the stack frame. */
31546 frame_off
= -info
->spe_gp_save_offset
+ ool_adjust
;
31549 if (restoring_GPRs_inline
)
31551 HOST_WIDE_INT spe_offset
= info
->spe_gp_save_offset
+ frame_off
;
31553 for (i
= 0; i
< 32 - info
->first_gp_reg_save
; i
++)
31554 if (rs6000_reg_live_or_pic_offset_p (info
->first_gp_reg_save
+ i
))
31556 rtx offset
, addr
, mem
, reg
;
31558 /* We're doing all this to ensure that the immediate offset
31559 fits into the immediate field of 'evldd'. */
31560 gcc_assert (SPE_CONST_OFFSET_OK (spe_offset
+ reg_size
* i
));
31562 offset
= GEN_INT (spe_offset
+ reg_size
* i
);
31563 addr
= gen_rtx_PLUS (Pmode
, frame_reg_rtx
, offset
);
31564 mem
= gen_rtx_MEM (V2SImode
, addr
);
31565 reg
= gen_rtx_REG (reg_mode
, info
->first_gp_reg_save
+ i
);
31567 emit_move_insn (reg
, mem
);
31571 rs6000_emit_savres_rtx (info
, frame_reg_rtx
,
31572 info
->spe_gp_save_offset
+ frame_off
,
31573 info
->lr_save_offset
+ frame_off
,
31575 SAVRES_GPR
| SAVRES_LR
);
31577 else if (!restoring_GPRs_inline
)
31579 /* We are jumping to an out-of-line function. */
31581 int end_save
= info
->gp_save_offset
+ info
->gp_size
;
31582 bool can_use_exit
= end_save
== 0;
31583 int sel
= SAVRES_GPR
| (can_use_exit
? SAVRES_LR
: 0);
31586 /* Emit stack reset code if we need it. */
31587 ptr_regno
= ptr_regno_for_savres (sel
);
31588 ptr_reg
= gen_rtx_REG (Pmode
, ptr_regno
);
31590 rs6000_emit_stack_reset (info
, frame_reg_rtx
, frame_off
, ptr_regno
);
31591 else if (end_save
+ frame_off
!= 0)
31592 emit_insn (gen_add3_insn (ptr_reg
, frame_reg_rtx
,
31593 GEN_INT (end_save
+ frame_off
)));
31594 else if (REGNO (frame_reg_rtx
) != ptr_regno
)
31595 emit_move_insn (ptr_reg
, frame_reg_rtx
);
31596 if (REGNO (frame_reg_rtx
) == ptr_regno
)
31597 frame_off
= -end_save
;
31599 if (can_use_exit
&& info
->cr_save_p
)
31600 restore_saved_cr (cr_save_reg
, using_mtcr_multiple
, true);
31602 ptr_off
= -end_save
;
31603 rs6000_emit_savres_rtx (info
, ptr_reg
,
31604 info
->gp_save_offset
+ ptr_off
,
31605 info
->lr_save_offset
+ ptr_off
,
31608 else if (using_load_multiple
)
31611 p
= rtvec_alloc (32 - info
->first_gp_reg_save
);
31612 for (i
= 0; i
< 32 - info
->first_gp_reg_save
; i
++)
31614 = gen_frame_load (gen_rtx_REG (reg_mode
, info
->first_gp_reg_save
+ i
),
31616 info
->gp_save_offset
+ frame_off
+ reg_size
* i
);
31617 emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
31621 int offset
= info
->gp_save_offset
+ frame_off
;
31622 for (i
= info
->first_gp_reg_save
; i
< 32; i
++)
31624 if (rs6000_reg_live_or_pic_offset_p (i
)
31625 && !cfun
->machine
->gpr_is_wrapped_separately
[i
])
31627 rtx reg
= gen_rtx_REG (reg_mode
, i
);
31628 emit_insn (gen_frame_load (reg
, frame_reg_rtx
, offset
));
31631 offset
+= reg_size
;
31635 if (DEFAULT_ABI
== ABI_V4
|| flag_shrink_wrap
)
31637 /* If the frame pointer was used then we can't delay emitting
31638 a REG_CFA_DEF_CFA note. This must happen on the insn that
31639 restores the frame pointer, r31. We may have already emitted
31640 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
31641 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
31642 be harmless if emitted. */
31643 if (frame_pointer_needed
)
31645 insn
= get_last_insn ();
31646 add_reg_note (insn
, REG_CFA_DEF_CFA
,
31647 plus_constant (Pmode
, frame_reg_rtx
, frame_off
));
31648 RTX_FRAME_RELATED_P (insn
) = 1;
31651 /* Set up cfa_restores. We always need these when
31652 shrink-wrapping. If not shrink-wrapping then we only need
31653 the cfa_restore when the stack location is no longer valid.
31654 The cfa_restores must be emitted on or before the insn that
31655 invalidates the stack, and of course must not be emitted
31656 before the insn that actually does the restore. The latter
31657 is why it is a bad idea to emit the cfa_restores as a group
31658 on the last instruction here that actually does a restore:
31659 That insn may be reordered with respect to others doing
31661 if (flag_shrink_wrap
31662 && !restoring_GPRs_inline
31663 && info
->first_fp_reg_save
== 64)
31664 cfa_restores
= add_crlr_cfa_restore (info
, cfa_restores
);
31666 for (i
= info
->first_gp_reg_save
; i
< 32; i
++)
31667 if (!restoring_GPRs_inline
31668 || using_load_multiple
31669 || rs6000_reg_live_or_pic_offset_p (i
))
31671 if (cfun
->machine
->gpr_is_wrapped_separately
[i
])
31674 rtx reg
= gen_rtx_REG (reg_mode
, i
);
31675 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
, cfa_restores
);
31679 if (!restoring_GPRs_inline
31680 && info
->first_fp_reg_save
== 64)
31682 /* We are jumping to an out-of-line function. */
31684 emit_cfa_restores (cfa_restores
);
31688 if (restore_lr
&& !restoring_GPRs_inline
)
31690 load_lr_save (0, frame_reg_rtx
, info
->lr_save_offset
+ frame_off
);
31691 restore_saved_lr (0, exit_func
);
31694 /* Restore fpr's if we need to do it without calling a function. */
31695 if (restoring_FPRs_inline
)
31697 int offset
= info
->fp_save_offset
+ frame_off
;
31698 for (i
= info
->first_fp_reg_save
; i
< 64; i
++)
31701 && !cfun
->machine
->fpr_is_wrapped_separately
[i
- 32])
31703 rtx reg
= gen_rtx_REG (fp_reg_mode
, i
);
31704 emit_insn (gen_frame_load (reg
, frame_reg_rtx
, offset
));
31705 if (DEFAULT_ABI
== ABI_V4
|| flag_shrink_wrap
)
31706 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
,
31710 offset
+= fp_reg_size
;
31714 /* If we saved cr, restore it here. Just those that were used. */
31715 if (info
->cr_save_p
)
31716 restore_saved_cr (cr_save_reg
, using_mtcr_multiple
, exit_func
);
31718 /* If this is V.4, unwind the stack pointer after all of the loads
31719 have been done, or set up r11 if we are restoring fp out of line. */
31721 if (!restoring_FPRs_inline
)
31723 bool lr
= (strategy
& REST_NOINLINE_FPRS_DOESNT_RESTORE_LR
) == 0;
31724 int sel
= SAVRES_FPR
| (lr
? SAVRES_LR
: 0);
31725 ptr_regno
= ptr_regno_for_savres (sel
);
31728 insn
= rs6000_emit_stack_reset (info
, frame_reg_rtx
, frame_off
, ptr_regno
);
31729 if (REGNO (frame_reg_rtx
) == ptr_regno
)
31732 if (insn
&& restoring_FPRs_inline
)
31736 REG_NOTES (insn
) = cfa_restores
;
31737 cfa_restores
= NULL_RTX
;
31739 add_reg_note (insn
, REG_CFA_DEF_CFA
, sp_reg_rtx
);
31740 RTX_FRAME_RELATED_P (insn
) = 1;
31743 if (crtl
->calls_eh_return
)
31745 rtx sa
= EH_RETURN_STACKADJ_RTX
;
31746 emit_insn (gen_add3_insn (sp_reg_rtx
, sp_reg_rtx
, sa
));
31749 if (!sibcall
&& restoring_FPRs_inline
)
31753 /* We can't hang the cfa_restores off a simple return,
31754 since the shrink-wrap code sometimes uses an existing
31755 return. This means there might be a path from
31756 pre-prologue code to this return, and dwarf2cfi code
31757 wants the eh_frame unwinder state to be the same on
31758 all paths to any point. So we need to emit the
31759 cfa_restores before the return. For -m64 we really
31760 don't need epilogue cfa_restores at all, except for
31761 this irritating dwarf2cfi with shrink-wrap
31762 requirement; The stack red-zone means eh_frame info
31763 from the prologue telling the unwinder to restore
31764 from the stack is perfectly good right to the end of
31766 emit_insn (gen_blockage ());
31767 emit_cfa_restores (cfa_restores
);
31768 cfa_restores
= NULL_RTX
;
31771 emit_jump_insn (targetm
.gen_simple_return ());
31774 if (!sibcall
&& !restoring_FPRs_inline
)
31776 bool lr
= (strategy
& REST_NOINLINE_FPRS_DOESNT_RESTORE_LR
) == 0;
31777 rtvec p
= rtvec_alloc (3 + !!lr
+ 64 - info
->first_fp_reg_save
);
31779 RTVEC_ELT (p
, elt
++) = ret_rtx
;
31781 RTVEC_ELT (p
, elt
++)
31782 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (Pmode
, LR_REGNO
));
31784 /* We have to restore more than two FP registers, so branch to the
31785 restore function. It will return to our caller. */
31790 if (flag_shrink_wrap
)
31791 cfa_restores
= add_crlr_cfa_restore (info
, cfa_restores
);
31793 sym
= rs6000_savres_routine_sym (info
, SAVRES_FPR
| (lr
? SAVRES_LR
: 0));
31794 RTVEC_ELT (p
, elt
++) = gen_rtx_USE (VOIDmode
, sym
);
31795 reg
= (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)? 1 : 11;
31796 RTVEC_ELT (p
, elt
++) = gen_rtx_USE (VOIDmode
, gen_rtx_REG (Pmode
, reg
));
31798 for (i
= 0; i
< 64 - info
->first_fp_reg_save
; i
++)
31800 rtx reg
= gen_rtx_REG (DFmode
, info
->first_fp_reg_save
+ i
);
31802 RTVEC_ELT (p
, elt
++)
31803 = gen_frame_load (reg
, sp_reg_rtx
, info
->fp_save_offset
+ 8 * i
);
31804 if (flag_shrink_wrap
)
31805 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
, cfa_restores
);
31808 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
31814 /* Ensure the cfa_restores are hung off an insn that won't
31815 be reordered above other restores. */
31816 emit_insn (gen_blockage ());
31818 emit_cfa_restores (cfa_restores
);
31822 /* Write function epilogue. */
31825 rs6000_output_function_epilogue (FILE *file
)
31828 macho_branch_islands ();
31831 rtx_insn
*insn
= get_last_insn ();
31832 rtx_insn
*deleted_debug_label
= NULL
;
31834 /* Mach-O doesn't support labels at the end of objects, so if
31835 it looks like we might want one, take special action.
31837 First, collect any sequence of deleted debug labels. */
31840 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
31842 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
31843 notes only, instead set their CODE_LABEL_NUMBER to -1,
31844 otherwise there would be code generation differences
31845 in between -g and -g0. */
31846 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
31847 deleted_debug_label
= insn
;
31848 insn
= PREV_INSN (insn
);
31851 /* Second, if we have:
31854 then this needs to be detected, so skip past the barrier. */
31856 if (insn
&& BARRIER_P (insn
))
31857 insn
= PREV_INSN (insn
);
31859 /* Up to now we've only seen notes or barriers. */
31864 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
))
31865 /* Trailing label: <barrier>. */
31866 fputs ("\tnop\n", file
);
31869 /* Lastly, see if we have a completely empty function body. */
31870 while (insn
&& ! INSN_P (insn
))
31871 insn
= PREV_INSN (insn
);
31872 /* If we don't find any insns, we've got an empty function body;
31873 I.e. completely empty - without a return or branch. This is
31874 taken as the case where a function body has been removed
31875 because it contains an inline __builtin_unreachable(). GCC
31876 states that reaching __builtin_unreachable() means UB so we're
31877 not obliged to do anything special; however, we want
31878 non-zero-sized function bodies. To meet this, and help the
31879 user out, let's trap the case. */
31881 fputs ("\ttrap\n", file
);
31884 else if (deleted_debug_label
)
31885 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
31886 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
31887 CODE_LABEL_NUMBER (insn
) = -1;
31891 /* Output a traceback table here. See /usr/include/sys/debug.h for info
31894 We don't output a traceback table if -finhibit-size-directive was
31895 used. The documentation for -finhibit-size-directive reads
31896 ``don't output a @code{.size} assembler directive, or anything
31897 else that would cause trouble if the function is split in the
31898 middle, and the two halves are placed at locations far apart in
31899 memory.'' The traceback table has this property, since it
31900 includes the offset from the start of the function to the
31901 traceback table itself.
31903 System V.4 Powerpc's (and the embedded ABI derived from it) use a
31904 different traceback table. */
31905 if ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
31906 && ! flag_inhibit_size_directive
31907 && rs6000_traceback
!= traceback_none
&& !cfun
->is_thunk
)
31909 const char *fname
= NULL
;
31910 const char *language_string
= lang_hooks
.name
;
31911 int fixed_parms
= 0, float_parms
= 0, parm_info
= 0;
31913 int optional_tbtab
;
31914 rs6000_stack_t
*info
= rs6000_stack_info ();
31916 if (rs6000_traceback
== traceback_full
)
31917 optional_tbtab
= 1;
31918 else if (rs6000_traceback
== traceback_part
)
31919 optional_tbtab
= 0;
31921 optional_tbtab
= !optimize_size
&& !TARGET_ELF
;
31923 if (optional_tbtab
)
31925 fname
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
31926 while (*fname
== '.') /* V.4 encodes . in the name */
31929 /* Need label immediately before tbtab, so we can compute
31930 its offset from the function start. */
31931 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LT");
31932 ASM_OUTPUT_LABEL (file
, fname
);
31935 /* The .tbtab pseudo-op can only be used for the first eight
31936 expressions, since it can't handle the possibly variable
31937 length fields that follow. However, if you omit the optional
31938 fields, the assembler outputs zeros for all optional fields
31939 anyways, giving each variable length field is minimum length
31940 (as defined in sys/debug.h). Thus we can not use the .tbtab
31941 pseudo-op at all. */
31943 /* An all-zero word flags the start of the tbtab, for debuggers
31944 that have to find it by searching forward from the entry
31945 point or from the current pc. */
31946 fputs ("\t.long 0\n", file
);
31948 /* Tbtab format type. Use format type 0. */
31949 fputs ("\t.byte 0,", file
);
31951 /* Language type. Unfortunately, there does not seem to be any
31952 official way to discover the language being compiled, so we
31953 use language_string.
31954 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
31955 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
31956 a number, so for now use 9. LTO, Go and JIT aren't assigned numbers
31957 either, so for now use 0. */
31959 || ! strcmp (language_string
, "GNU GIMPLE")
31960 || ! strcmp (language_string
, "GNU Go")
31961 || ! strcmp (language_string
, "libgccjit"))
31963 else if (! strcmp (language_string
, "GNU F77")
31964 || lang_GNU_Fortran ())
31966 else if (! strcmp (language_string
, "GNU Pascal"))
31968 else if (! strcmp (language_string
, "GNU Ada"))
31970 else if (lang_GNU_CXX ()
31971 || ! strcmp (language_string
, "GNU Objective-C++"))
31973 else if (! strcmp (language_string
, "GNU Java"))
31975 else if (! strcmp (language_string
, "GNU Objective-C"))
31978 gcc_unreachable ();
31979 fprintf (file
, "%d,", i
);
31981 /* 8 single bit fields: global linkage (not set for C extern linkage,
31982 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
31983 from start of procedure stored in tbtab, internal function, function
31984 has controlled storage, function has no toc, function uses fp,
31985 function logs/aborts fp operations. */
31986 /* Assume that fp operations are used if any fp reg must be saved. */
31987 fprintf (file
, "%d,",
31988 (optional_tbtab
<< 5) | ((info
->first_fp_reg_save
!= 64) << 1));
31990 /* 6 bitfields: function is interrupt handler, name present in
31991 proc table, function calls alloca, on condition directives
31992 (controls stack walks, 3 bits), saves condition reg, saves
31994 /* The `function calls alloca' bit seems to be set whenever reg 31 is
31995 set up as a frame pointer, even when there is no alloca call. */
31996 fprintf (file
, "%d,",
31997 ((optional_tbtab
<< 6)
31998 | ((optional_tbtab
& frame_pointer_needed
) << 5)
31999 | (info
->cr_save_p
<< 1)
32000 | (info
->lr_save_p
)));
32002 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
32004 fprintf (file
, "%d,",
32005 (info
->push_p
<< 7) | (64 - info
->first_fp_reg_save
));
32007 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
32008 fprintf (file
, "%d,", (32 - first_reg_to_save ()));
32010 if (optional_tbtab
)
32012 /* Compute the parameter info from the function decl argument
32015 int next_parm_info_bit
= 31;
32017 for (decl
= DECL_ARGUMENTS (current_function_decl
);
32018 decl
; decl
= DECL_CHAIN (decl
))
32020 rtx parameter
= DECL_INCOMING_RTL (decl
);
32021 machine_mode mode
= GET_MODE (parameter
);
32023 if (GET_CODE (parameter
) == REG
)
32025 if (SCALAR_FLOAT_MODE_P (mode
))
32048 gcc_unreachable ();
32051 /* If only one bit will fit, don't or in this entry. */
32052 if (next_parm_info_bit
> 0)
32053 parm_info
|= (bits
<< (next_parm_info_bit
- 1));
32054 next_parm_info_bit
-= 2;
32058 fixed_parms
+= ((GET_MODE_SIZE (mode
)
32059 + (UNITS_PER_WORD
- 1))
32061 next_parm_info_bit
-= 1;
32067 /* Number of fixed point parameters. */
32068 /* This is actually the number of words of fixed point parameters; thus
32069 an 8 byte struct counts as 2; and thus the maximum value is 8. */
32070 fprintf (file
, "%d,", fixed_parms
);
32072 /* 2 bitfields: number of floating point parameters (7 bits), parameters
32074 /* This is actually the number of fp registers that hold parameters;
32075 and thus the maximum value is 13. */
32076 /* Set parameters on stack bit if parameters are not in their original
32077 registers, regardless of whether they are on the stack? Xlc
32078 seems to set the bit when not optimizing. */
32079 fprintf (file
, "%d\n", ((float_parms
<< 1) | (! optimize
)));
32081 if (optional_tbtab
)
32083 /* Optional fields follow. Some are variable length. */
32085 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single
32086 float, 11 double float. */
32087 /* There is an entry for each parameter in a register, in the order
32088 that they occur in the parameter list. Any intervening arguments
32089 on the stack are ignored. If the list overflows a long (max
32090 possible length 34 bits) then completely leave off all elements
32092 /* Only emit this long if there was at least one parameter. */
32093 if (fixed_parms
|| float_parms
)
32094 fprintf (file
, "\t.long %d\n", parm_info
);
32096 /* Offset from start of code to tb table. */
32097 fputs ("\t.long ", file
);
32098 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LT");
32099 RS6000_OUTPUT_BASENAME (file
, fname
);
32101 rs6000_output_function_entry (file
, fname
);
32104 /* Interrupt handler mask. */
32105 /* Omit this long, since we never set the interrupt handler bit
32108 /* Number of CTL (controlled storage) anchors. */
32109 /* Omit this long, since the has_ctl bit is never set above. */
32111 /* Displacement into stack of each CTL anchor. */
32112 /* Omit this list of longs, because there are no CTL anchors. */
32114 /* Length of function name. */
32117 fprintf (file
, "\t.short %d\n", (int) strlen (fname
));
32119 /* Function name. */
32120 assemble_string (fname
, strlen (fname
));
32122 /* Register for alloca automatic storage; this is always reg 31.
32123 Only emit this if the alloca bit was set above. */
32124 if (frame_pointer_needed
)
32125 fputs ("\t.byte 31\n", file
);
32127 fputs ("\t.align 2\n", file
);
32131 /* Arrange to define .LCTOC1 label, if not already done. */
32135 if (!toc_initialized
)
32137 switch_to_section (toc_section
);
32138 switch_to_section (current_function_section ());
32143 /* -fsplit-stack support. */
32145 /* A SYMBOL_REF for __morestack. */
32146 static GTY(()) rtx morestack_ref
;
32149 gen_add3_const (rtx rt
, rtx ra
, long c
)
32152 return gen_adddi3 (rt
, ra
, GEN_INT (c
));
32154 return gen_addsi3 (rt
, ra
, GEN_INT (c
));
32157 /* Emit -fsplit-stack prologue, which goes before the regular function
32158 prologue (at local entry point in the case of ELFv2). */
32161 rs6000_expand_split_stack_prologue (void)
32163 rs6000_stack_t
*info
= rs6000_stack_info ();
32164 unsigned HOST_WIDE_INT allocate
;
32165 long alloc_hi
, alloc_lo
;
32166 rtx r0
, r1
, r12
, lr
, ok_label
, compare
, jump
, call_fusage
;
32169 gcc_assert (flag_split_stack
&& reload_completed
);
32174 if (global_regs
[29])
32176 error ("-fsplit-stack uses register r29");
32177 inform (DECL_SOURCE_LOCATION (global_regs_decl
[29]),
32178 "conflicts with %qD", global_regs_decl
[29]);
32181 allocate
= info
->total_size
;
32182 if (allocate
> (unsigned HOST_WIDE_INT
) 1 << 31)
32184 sorry ("Stack frame larger than 2G is not supported for -fsplit-stack");
32187 if (morestack_ref
== NULL_RTX
)
32189 morestack_ref
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
32190 SYMBOL_REF_FLAGS (morestack_ref
) |= (SYMBOL_FLAG_LOCAL
32191 | SYMBOL_FLAG_FUNCTION
);
32194 r0
= gen_rtx_REG (Pmode
, 0);
32195 r1
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
32196 r12
= gen_rtx_REG (Pmode
, 12);
32197 emit_insn (gen_load_split_stack_limit (r0
));
32198 /* Always emit two insns here to calculate the requested stack,
32199 so that the linker can edit them when adjusting size for calling
32200 non-split-stack code. */
32201 alloc_hi
= (-allocate
+ 0x8000) & ~0xffffL
;
32202 alloc_lo
= -allocate
- alloc_hi
;
32205 emit_insn (gen_add3_const (r12
, r1
, alloc_hi
));
32207 emit_insn (gen_add3_const (r12
, r12
, alloc_lo
));
32209 emit_insn (gen_nop ());
32213 emit_insn (gen_add3_const (r12
, r1
, alloc_lo
));
32214 emit_insn (gen_nop ());
32217 compare
= gen_rtx_REG (CCUNSmode
, CR7_REGNO
);
32218 emit_insn (gen_rtx_SET (compare
, gen_rtx_COMPARE (CCUNSmode
, r12
, r0
)));
32219 ok_label
= gen_label_rtx ();
32220 jump
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
32221 gen_rtx_GEU (VOIDmode
, compare
, const0_rtx
),
32222 gen_rtx_LABEL_REF (VOIDmode
, ok_label
),
32224 insn
= emit_jump_insn (gen_rtx_SET (pc_rtx
, jump
));
32225 JUMP_LABEL (insn
) = ok_label
;
32226 /* Mark the jump as very likely to be taken. */
32227 add_reg_br_prob_note (insn
, profile_probability::very_likely ());
32229 lr
= gen_rtx_REG (Pmode
, LR_REGNO
);
32230 insn
= emit_move_insn (r0
, lr
);
32231 RTX_FRAME_RELATED_P (insn
) = 1;
32232 insn
= emit_insn (gen_frame_store (r0
, r1
, info
->lr_save_offset
));
32233 RTX_FRAME_RELATED_P (insn
) = 1;
32235 insn
= emit_call_insn (gen_call (gen_rtx_MEM (SImode
, morestack_ref
),
32236 const0_rtx
, const0_rtx
));
32237 call_fusage
= NULL_RTX
;
32238 use_reg (&call_fusage
, r12
);
32239 /* Say the call uses r0, even though it doesn't, to stop regrename
32240 from twiddling with the insns saving lr, trashing args for cfun.
32241 The insns restoring lr are similarly protected by making
32242 split_stack_return use r0. */
32243 use_reg (&call_fusage
, r0
);
32244 add_function_usage_to (insn
, call_fusage
);
32245 /* Indicate that this function can't jump to non-local gotos. */
32246 make_reg_eh_region_note_nothrow_nononlocal (insn
);
32247 emit_insn (gen_frame_load (r0
, r1
, info
->lr_save_offset
));
32248 insn
= emit_move_insn (lr
, r0
);
32249 add_reg_note (insn
, REG_CFA_RESTORE
, lr
);
32250 RTX_FRAME_RELATED_P (insn
) = 1;
32251 emit_insn (gen_split_stack_return ());
32253 emit_label (ok_label
);
32254 LABEL_NUSES (ok_label
) = 1;
32257 /* Return the internal arg pointer used for function incoming
32258 arguments. When -fsplit-stack, the arg pointer is r12 so we need
32259 to copy it to a pseudo in order for it to be preserved over calls
32260 and suchlike. We'd really like to use a pseudo here for the
32261 internal arg pointer but data-flow analysis is not prepared to
32262 accept pseudos as live at the beginning of a function. */
32265 rs6000_internal_arg_pointer (void)
32267 if (flag_split_stack
32268 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun
->decl
))
32272 if (cfun
->machine
->split_stack_arg_pointer
== NULL_RTX
)
32276 cfun
->machine
->split_stack_arg_pointer
= gen_reg_rtx (Pmode
);
32277 REG_POINTER (cfun
->machine
->split_stack_arg_pointer
) = 1;
32279 /* Put the pseudo initialization right after the note at the
32280 beginning of the function. */
32281 pat
= gen_rtx_SET (cfun
->machine
->split_stack_arg_pointer
,
32282 gen_rtx_REG (Pmode
, 12));
32283 push_topmost_sequence ();
32284 emit_insn_after (pat
, get_insns ());
32285 pop_topmost_sequence ();
32287 return plus_constant (Pmode
, cfun
->machine
->split_stack_arg_pointer
,
32288 FIRST_PARM_OFFSET (current_function_decl
));
32290 return virtual_incoming_args_rtx
;
32293 /* We may have to tell the dataflow pass that the split stack prologue
32294 is initializing a register. */
32297 rs6000_live_on_entry (bitmap regs
)
32299 if (flag_split_stack
)
32300 bitmap_set_bit (regs
, 12);
32303 /* Emit -fsplit-stack dynamic stack allocation space check. */
32306 rs6000_split_stack_space_check (rtx size
, rtx label
)
32308 rtx sp
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
32309 rtx limit
= gen_reg_rtx (Pmode
);
32310 rtx requested
= gen_reg_rtx (Pmode
);
32311 rtx cmp
= gen_reg_rtx (CCUNSmode
);
32314 emit_insn (gen_load_split_stack_limit (limit
));
32315 if (CONST_INT_P (size
))
32316 emit_insn (gen_add3_insn (requested
, sp
, GEN_INT (-INTVAL (size
))));
32319 size
= force_reg (Pmode
, size
);
32320 emit_move_insn (requested
, gen_rtx_MINUS (Pmode
, sp
, size
));
32322 emit_insn (gen_rtx_SET (cmp
, gen_rtx_COMPARE (CCUNSmode
, requested
, limit
)));
32323 jump
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
32324 gen_rtx_GEU (VOIDmode
, cmp
, const0_rtx
),
32325 gen_rtx_LABEL_REF (VOIDmode
, label
),
32327 jump
= emit_jump_insn (gen_rtx_SET (pc_rtx
, jump
));
32328 JUMP_LABEL (jump
) = label
;
32331 /* A C compound statement that outputs the assembler code for a thunk
32332 function, used to implement C++ virtual function calls with
32333 multiple inheritance. The thunk acts as a wrapper around a virtual
32334 function, adjusting the implicit object parameter before handing
32335 control off to the real function.
32337 First, emit code to add the integer DELTA to the location that
32338 contains the incoming first argument. Assume that this argument
32339 contains a pointer, and is the one used to pass the `this' pointer
32340 in C++. This is the incoming argument *before* the function
32341 prologue, e.g. `%o0' on a sparc. The addition must preserve the
32342 values of all other incoming arguments.
32344 After the addition, emit code to jump to FUNCTION, which is a
32345 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
32346 not touch the return address. Hence returning from FUNCTION will
32347 return to whoever called the current `thunk'.
32349 The effect must be as if FUNCTION had been called directly with the
32350 adjusted first argument. This macro is responsible for emitting
32351 all of the code for a thunk function; output_function_prologue()
32352 and output_function_epilogue() are not invoked.
32354 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
32355 been extracted from it.) It might possibly be useful on some
32356 targets, but probably not.
32358 If you do not define this macro, the target-independent code in the
32359 C++ frontend will generate a less efficient heavyweight thunk that
32360 calls FUNCTION instead of jumping to it. The generic approach does
32361 not support varargs. */
32364 rs6000_output_mi_thunk (FILE *file
, tree thunk_fndecl ATTRIBUTE_UNUSED
,
32365 HOST_WIDE_INT delta
, HOST_WIDE_INT vcall_offset
,
32368 rtx this_rtx
, funexp
;
32371 reload_completed
= 1;
32372 epilogue_completed
= 1;
32374 /* Mark the end of the (empty) prologue. */
32375 emit_note (NOTE_INSN_PROLOGUE_END
);
32377 /* Find the "this" pointer. If the function returns a structure,
32378 the structure return pointer is in r3. */
32379 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
))
32380 this_rtx
= gen_rtx_REG (Pmode
, 4);
32382 this_rtx
= gen_rtx_REG (Pmode
, 3);
32384 /* Apply the constant offset, if required. */
32386 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, GEN_INT (delta
)));
32388 /* Apply the offset from the vtable, if required. */
32391 rtx vcall_offset_rtx
= GEN_INT (vcall_offset
);
32392 rtx tmp
= gen_rtx_REG (Pmode
, 12);
32394 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, this_rtx
));
32395 if (((unsigned HOST_WIDE_INT
) vcall_offset
) + 0x8000 >= 0x10000)
32397 emit_insn (gen_add3_insn (tmp
, tmp
, vcall_offset_rtx
));
32398 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, tmp
));
32402 rtx loc
= gen_rtx_PLUS (Pmode
, tmp
, vcall_offset_rtx
);
32404 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, loc
));
32406 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, tmp
));
32409 /* Generate a tail call to the target function. */
32410 if (!TREE_USED (function
))
32412 assemble_external (function
);
32413 TREE_USED (function
) = 1;
32415 funexp
= XEXP (DECL_RTL (function
), 0);
32416 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
32419 if (MACHOPIC_INDIRECT
)
32420 funexp
= machopic_indirect_call_target (funexp
);
32423 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
32424 generate sibcall RTL explicitly. */
32425 insn
= emit_call_insn (
32426 gen_rtx_PARALLEL (VOIDmode
,
32428 gen_rtx_CALL (VOIDmode
,
32429 funexp
, const0_rtx
),
32430 gen_rtx_USE (VOIDmode
, const0_rtx
),
32431 simple_return_rtx
)));
32432 SIBLING_CALL_P (insn
) = 1;
32435 /* Run just enough of rest_of_compilation to get the insns emitted.
32436 There's not really enough bulk here to make other passes such as
32437 instruction scheduling worth while. Note that use_thunk calls
32438 assemble_start_function and assemble_end_function. */
32439 insn
= get_insns ();
32440 shorten_branches (insn
);
32441 final_start_function (insn
, file
, 1);
32442 final (insn
, file
, 1);
32443 final_end_function ();
32445 reload_completed
= 0;
32446 epilogue_completed
= 0;
32449 /* A quick summary of the various types of 'constant-pool tables'
32452 Target Flags Name One table per
32453 AIX (none) AIX TOC object file
32454 AIX -mfull-toc AIX TOC object file
32455 AIX -mminimal-toc AIX minimal TOC translation unit
32456 SVR4/EABI (none) SVR4 SDATA object file
32457 SVR4/EABI -fpic SVR4 pic object file
32458 SVR4/EABI -fPIC SVR4 PIC translation unit
32459 SVR4/EABI -mrelocatable EABI TOC function
32460 SVR4/EABI -maix AIX TOC object file
32461 SVR4/EABI -maix -mminimal-toc
32462 AIX minimal TOC translation unit
32464 Name Reg. Set by entries contains:
32465 made by addrs? fp? sum?
32467 AIX TOC 2 crt0 as Y option option
32468 AIX minimal TOC 30 prolog gcc Y Y option
32469 SVR4 SDATA 13 crt0 gcc N Y N
32470 SVR4 pic 30 prolog ld Y not yet N
32471 SVR4 PIC 30 prolog gcc Y option option
32472 EABI TOC 30 prolog gcc Y option option
32476 /* Hash functions for the hash table. */
32479 rs6000_hash_constant (rtx k
)
32481 enum rtx_code code
= GET_CODE (k
);
32482 machine_mode mode
= GET_MODE (k
);
32483 unsigned result
= (code
<< 3) ^ mode
;
32484 const char *format
;
32487 format
= GET_RTX_FORMAT (code
);
32488 flen
= strlen (format
);
32494 return result
* 1231 + (unsigned) INSN_UID (XEXP (k
, 0));
32496 case CONST_WIDE_INT
:
32499 flen
= CONST_WIDE_INT_NUNITS (k
);
32500 for (i
= 0; i
< flen
; i
++)
32501 result
= result
* 613 + CONST_WIDE_INT_ELT (k
, i
);
32506 if (mode
!= VOIDmode
)
32507 return real_hash (CONST_DOUBLE_REAL_VALUE (k
)) * result
;
32519 for (; fidx
< flen
; fidx
++)
32520 switch (format
[fidx
])
32525 const char *str
= XSTR (k
, fidx
);
32526 len
= strlen (str
);
32527 result
= result
* 613 + len
;
32528 for (i
= 0; i
< len
; i
++)
32529 result
= result
* 613 + (unsigned) str
[i
];
32534 result
= result
* 1231 + rs6000_hash_constant (XEXP (k
, fidx
));
32538 result
= result
* 613 + (unsigned) XINT (k
, fidx
);
32541 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT
))
32542 result
= result
* 613 + (unsigned) XWINT (k
, fidx
);
32546 for (i
= 0; i
< sizeof (HOST_WIDE_INT
) / sizeof (unsigned); i
++)
32547 result
= result
* 613 + (unsigned) (XWINT (k
, fidx
)
32554 gcc_unreachable ();
32561 toc_hasher::hash (toc_hash_struct
*thc
)
32563 return rs6000_hash_constant (thc
->key
) ^ thc
->key_mode
;
32566 /* Compare H1 and H2 for equivalence. */
32569 toc_hasher::equal (toc_hash_struct
*h1
, toc_hash_struct
*h2
)
32574 if (h1
->key_mode
!= h2
->key_mode
)
32577 return rtx_equal_p (r1
, r2
);
32580 /* These are the names given by the C++ front-end to vtables, and
32581 vtable-like objects. Ideally, this logic should not be here;
32582 instead, there should be some programmatic way of inquiring as
32583 to whether or not an object is a vtable. */
32585 #define VTABLE_NAME_P(NAME) \
32586 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
32587 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
32588 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
32589 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
32590 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
32592 #ifdef NO_DOLLAR_IN_LABEL
32593 /* Return a GGC-allocated character string translating dollar signs in
32594 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
32597 rs6000_xcoff_strip_dollar (const char *name
)
32603 q
= (const char *) strchr (name
, '$');
32605 if (q
== 0 || q
== name
)
32608 len
= strlen (name
);
32609 strip
= XALLOCAVEC (char, len
+ 1);
32610 strcpy (strip
, name
);
32611 p
= strip
+ (q
- name
);
32615 p
= strchr (p
+ 1, '$');
32618 return ggc_alloc_string (strip
, len
);
32623 rs6000_output_symbol_ref (FILE *file
, rtx x
)
32625 const char *name
= XSTR (x
, 0);
32627 /* Currently C++ toc references to vtables can be emitted before it
32628 is decided whether the vtable is public or private. If this is
32629 the case, then the linker will eventually complain that there is
32630 a reference to an unknown section. Thus, for vtables only,
32631 we emit the TOC reference to reference the identifier and not the
32633 if (VTABLE_NAME_P (name
))
32635 RS6000_OUTPUT_BASENAME (file
, name
);
32638 assemble_name (file
, name
);
32641 /* Output a TOC entry. We derive the entry name from what is being
32645 output_toc (FILE *file
, rtx x
, int labelno
, machine_mode mode
)
32648 const char *name
= buf
;
32650 HOST_WIDE_INT offset
= 0;
32652 gcc_assert (!TARGET_NO_TOC
);
32654 /* When the linker won't eliminate them, don't output duplicate
32655 TOC entries (this happens on AIX if there is any kind of TOC,
32656 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
32658 if (TARGET_TOC
&& GET_CODE (x
) != LABEL_REF
)
32660 struct toc_hash_struct
*h
;
32662 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
32663 time because GGC is not initialized at that point. */
32664 if (toc_hash_table
== NULL
)
32665 toc_hash_table
= hash_table
<toc_hasher
>::create_ggc (1021);
32667 h
= ggc_alloc
<toc_hash_struct
> ();
32669 h
->key_mode
= mode
;
32670 h
->labelno
= labelno
;
32672 toc_hash_struct
**found
= toc_hash_table
->find_slot (h
, INSERT
);
32673 if (*found
== NULL
)
32675 else /* This is indeed a duplicate.
32676 Set this label equal to that label. */
32678 fputs ("\t.set ", file
);
32679 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LC");
32680 fprintf (file
, "%d,", labelno
);
32681 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LC");
32682 fprintf (file
, "%d\n", ((*found
)->labelno
));
32685 if (TARGET_XCOFF
&& GET_CODE (x
) == SYMBOL_REF
32686 && (SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_GLOBAL_DYNAMIC
32687 || SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
))
32689 fputs ("\t.set ", file
);
32690 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LCM");
32691 fprintf (file
, "%d,", labelno
);
32692 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LCM");
32693 fprintf (file
, "%d\n", ((*found
)->labelno
));
32700 /* If we're going to put a double constant in the TOC, make sure it's
32701 aligned properly when strict alignment is on. */
32702 if ((CONST_DOUBLE_P (x
) || CONST_WIDE_INT_P (x
))
32703 && STRICT_ALIGNMENT
32704 && GET_MODE_BITSIZE (mode
) >= 64
32705 && ! (TARGET_NO_FP_IN_TOC
&& ! TARGET_MINIMAL_TOC
)) {
32706 ASM_OUTPUT_ALIGN (file
, 3);
32709 (*targetm
.asm_out
.internal_label
) (file
, "LC", labelno
);
32711 /* Handle FP constants specially. Note that if we have a minimal
32712 TOC, things we put here aren't actually in the TOC, so we can allow
32714 if (GET_CODE (x
) == CONST_DOUBLE
&&
32715 (GET_MODE (x
) == TFmode
|| GET_MODE (x
) == TDmode
32716 || GET_MODE (x
) == IFmode
|| GET_MODE (x
) == KFmode
))
32720 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
32721 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x
), k
);
32723 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), k
);
32727 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
32728 fputs (DOUBLE_INT_ASM_OP
, file
);
32730 fprintf (file
, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
32731 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
32732 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
32733 fprintf (file
, "0x%lx%08lx,0x%lx%08lx\n",
32734 k
[WORDS_BIG_ENDIAN
? 0 : 1] & 0xffffffff,
32735 k
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffff,
32736 k
[WORDS_BIG_ENDIAN
? 2 : 3] & 0xffffffff,
32737 k
[WORDS_BIG_ENDIAN
? 3 : 2] & 0xffffffff);
32742 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
32743 fputs ("\t.long ", file
);
32745 fprintf (file
, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
32746 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
32747 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
32748 fprintf (file
, "0x%lx,0x%lx,0x%lx,0x%lx\n",
32749 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
32750 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
32754 else if (GET_CODE (x
) == CONST_DOUBLE
&&
32755 (GET_MODE (x
) == DFmode
|| GET_MODE (x
) == DDmode
))
32759 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
32760 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x
), k
);
32762 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), k
);
32766 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
32767 fputs (DOUBLE_INT_ASM_OP
, file
);
32769 fprintf (file
, "\t.tc FD_%lx_%lx[TC],",
32770 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
32771 fprintf (file
, "0x%lx%08lx\n",
32772 k
[WORDS_BIG_ENDIAN
? 0 : 1] & 0xffffffff,
32773 k
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffff);
32778 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
32779 fputs ("\t.long ", file
);
32781 fprintf (file
, "\t.tc FD_%lx_%lx[TC],",
32782 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
32783 fprintf (file
, "0x%lx,0x%lx\n",
32784 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
32788 else if (GET_CODE (x
) == CONST_DOUBLE
&&
32789 (GET_MODE (x
) == SFmode
|| GET_MODE (x
) == SDmode
))
32793 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
32794 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x
), l
);
32796 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x
), l
);
32800 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
32801 fputs (DOUBLE_INT_ASM_OP
, file
);
32803 fprintf (file
, "\t.tc FS_%lx[TC],", l
& 0xffffffff);
32804 if (WORDS_BIG_ENDIAN
)
32805 fprintf (file
, "0x%lx00000000\n", l
& 0xffffffff);
32807 fprintf (file
, "0x%lx\n", l
& 0xffffffff);
32812 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
32813 fputs ("\t.long ", file
);
32815 fprintf (file
, "\t.tc FS_%lx[TC],", l
& 0xffffffff);
32816 fprintf (file
, "0x%lx\n", l
& 0xffffffff);
32820 else if (GET_MODE (x
) == VOIDmode
&& GET_CODE (x
) == CONST_INT
)
32822 unsigned HOST_WIDE_INT low
;
32823 HOST_WIDE_INT high
;
32825 low
= INTVAL (x
) & 0xffffffff;
32826 high
= (HOST_WIDE_INT
) INTVAL (x
) >> 32;
32828 /* TOC entries are always Pmode-sized, so when big-endian
32829 smaller integer constants in the TOC need to be padded.
32830 (This is still a win over putting the constants in
32831 a separate constant pool, because then we'd have
32832 to have both a TOC entry _and_ the actual constant.)
32834 For a 32-bit target, CONST_INT values are loaded and shifted
32835 entirely within `low' and can be stored in one TOC entry. */
32837 /* It would be easy to make this work, but it doesn't now. */
32838 gcc_assert (!TARGET_64BIT
|| POINTER_SIZE
>= GET_MODE_BITSIZE (mode
));
32840 if (WORDS_BIG_ENDIAN
&& POINTER_SIZE
> GET_MODE_BITSIZE (mode
))
32843 low
<<= POINTER_SIZE
- GET_MODE_BITSIZE (mode
);
32844 high
= (HOST_WIDE_INT
) low
>> 32;
32850 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
32851 fputs (DOUBLE_INT_ASM_OP
, file
);
32853 fprintf (file
, "\t.tc ID_%lx_%lx[TC],",
32854 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
32855 fprintf (file
, "0x%lx%08lx\n",
32856 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
32861 if (POINTER_SIZE
< GET_MODE_BITSIZE (mode
))
32863 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
32864 fputs ("\t.long ", file
);
32866 fprintf (file
, "\t.tc ID_%lx_%lx[TC],",
32867 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
32868 fprintf (file
, "0x%lx,0x%lx\n",
32869 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
32873 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
32874 fputs ("\t.long ", file
);
32876 fprintf (file
, "\t.tc IS_%lx[TC],", (long) low
& 0xffffffff);
32877 fprintf (file
, "0x%lx\n", (long) low
& 0xffffffff);
32883 if (GET_CODE (x
) == CONST
)
32885 gcc_assert (GET_CODE (XEXP (x
, 0)) == PLUS
32886 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
);
32888 base
= XEXP (XEXP (x
, 0), 0);
32889 offset
= INTVAL (XEXP (XEXP (x
, 0), 1));
32892 switch (GET_CODE (base
))
32895 name
= XSTR (base
, 0);
32899 ASM_GENERATE_INTERNAL_LABEL (buf
, "L",
32900 CODE_LABEL_NUMBER (XEXP (base
, 0)));
32904 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (base
));
32908 gcc_unreachable ();
32911 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
32912 fputs (TARGET_32BIT
? "\t.long " : DOUBLE_INT_ASM_OP
, file
);
32915 fputs ("\t.tc ", file
);
32916 RS6000_OUTPUT_BASENAME (file
, name
);
32919 fprintf (file
, ".N" HOST_WIDE_INT_PRINT_UNSIGNED
, - offset
);
32921 fprintf (file
, ".P" HOST_WIDE_INT_PRINT_UNSIGNED
, offset
);
32923 /* Mark large TOC symbols on AIX with [TE] so they are mapped
32924 after other TOC symbols, reducing overflow of small TOC access
32925 to [TC] symbols. */
32926 fputs (TARGET_XCOFF
&& TARGET_CMODEL
!= CMODEL_SMALL
32927 ? "[TE]," : "[TC],", file
);
32930 /* Currently C++ toc references to vtables can be emitted before it
32931 is decided whether the vtable is public or private. If this is
32932 the case, then the linker will eventually complain that there is
32933 a TOC reference to an unknown section. Thus, for vtables only,
32934 we emit the TOC reference to reference the symbol and not the
32936 if (VTABLE_NAME_P (name
))
32938 RS6000_OUTPUT_BASENAME (file
, name
);
32940 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, offset
);
32941 else if (offset
> 0)
32942 fprintf (file
, "+" HOST_WIDE_INT_PRINT_DEC
, offset
);
32945 output_addr_const (file
, x
);
32948 if (TARGET_XCOFF
&& GET_CODE (base
) == SYMBOL_REF
)
32950 switch (SYMBOL_REF_TLS_MODEL (base
))
32954 case TLS_MODEL_LOCAL_EXEC
:
32955 fputs ("@le", file
);
32957 case TLS_MODEL_INITIAL_EXEC
:
32958 fputs ("@ie", file
);
32960 /* Use global-dynamic for local-dynamic. */
32961 case TLS_MODEL_GLOBAL_DYNAMIC
:
32962 case TLS_MODEL_LOCAL_DYNAMIC
:
32964 (*targetm
.asm_out
.internal_label
) (file
, "LCM", labelno
);
32965 fputs ("\t.tc .", file
);
32966 RS6000_OUTPUT_BASENAME (file
, name
);
32967 fputs ("[TC],", file
);
32968 output_addr_const (file
, x
);
32969 fputs ("@m", file
);
32972 gcc_unreachable ();
32980 /* Output an assembler pseudo-op to write an ASCII string of N characters
32981 starting at P to FILE.
32983 On the RS/6000, we have to do this using the .byte operation and
32984 write out special characters outside the quoted string.
32985 Also, the assembler is broken; very long strings are truncated,
32986 so we must artificially break them up early. */
32989 output_ascii (FILE *file
, const char *p
, int n
)
32992 int i
, count_string
;
32993 const char *for_string
= "\t.byte \"";
32994 const char *for_decimal
= "\t.byte ";
32995 const char *to_close
= NULL
;
32998 for (i
= 0; i
< n
; i
++)
33001 if (c
>= ' ' && c
< 0177)
33004 fputs (for_string
, file
);
33007 /* Write two quotes to get one. */
33015 for_decimal
= "\"\n\t.byte ";
33019 if (count_string
>= 512)
33021 fputs (to_close
, file
);
33023 for_string
= "\t.byte \"";
33024 for_decimal
= "\t.byte ";
33032 fputs (for_decimal
, file
);
33033 fprintf (file
, "%d", c
);
33035 for_string
= "\n\t.byte \"";
33036 for_decimal
= ", ";
33042 /* Now close the string if we have written one. Then end the line. */
33044 fputs (to_close
, file
);
33047 /* Generate a unique section name for FILENAME for a section type
33048 represented by SECTION_DESC. Output goes into BUF.
33050 SECTION_DESC can be any string, as long as it is different for each
33051 possible section type.
33053 We name the section in the same manner as xlc. The name begins with an
33054 underscore followed by the filename (after stripping any leading directory
33055 names) with the last period replaced by the string SECTION_DESC. If
33056 FILENAME does not contain a period, SECTION_DESC is appended to the end of
33060 rs6000_gen_section_name (char **buf
, const char *filename
,
33061 const char *section_desc
)
33063 const char *q
, *after_last_slash
, *last_period
= 0;
33067 after_last_slash
= filename
;
33068 for (q
= filename
; *q
; q
++)
33071 after_last_slash
= q
+ 1;
33072 else if (*q
== '.')
33076 len
= strlen (after_last_slash
) + strlen (section_desc
) + 2;
33077 *buf
= (char *) xmalloc (len
);
33082 for (q
= after_last_slash
; *q
; q
++)
33084 if (q
== last_period
)
33086 strcpy (p
, section_desc
);
33087 p
+= strlen (section_desc
);
33091 else if (ISALNUM (*q
))
33095 if (last_period
== 0)
33096 strcpy (p
, section_desc
);
33101 /* Emit profile function. */
33104 output_profile_hook (int labelno ATTRIBUTE_UNUSED
)
33106 /* Non-standard profiling for kernels, which just saves LR then calls
33107 _mcount without worrying about arg saves. The idea is to change
33108 the function prologue as little as possible as it isn't easy to
33109 account for arg save/restore code added just for _mcount. */
33110 if (TARGET_PROFILE_KERNEL
)
33113 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
33115 #ifndef NO_PROFILE_COUNTERS
33116 # define NO_PROFILE_COUNTERS 0
33118 if (NO_PROFILE_COUNTERS
)
33119 emit_library_call (init_one_libfunc (RS6000_MCOUNT
),
33120 LCT_NORMAL
, VOIDmode
);
33124 const char *label_name
;
33127 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
33128 label_name
= ggc_strdup ((*targetm
.strip_name_encoding
) (buf
));
33129 fun
= gen_rtx_SYMBOL_REF (Pmode
, label_name
);
33131 emit_library_call (init_one_libfunc (RS6000_MCOUNT
),
33132 LCT_NORMAL
, VOIDmode
, fun
, Pmode
);
33135 else if (DEFAULT_ABI
== ABI_DARWIN
)
33137 const char *mcount_name
= RS6000_MCOUNT
;
33138 int caller_addr_regno
= LR_REGNO
;
33140 /* Be conservative and always set this, at least for now. */
33141 crtl
->uses_pic_offset_table
= 1;
33144 /* For PIC code, set up a stub and collect the caller's address
33145 from r0, which is where the prologue puts it. */
33146 if (MACHOPIC_INDIRECT
33147 && crtl
->uses_pic_offset_table
)
33148 caller_addr_regno
= 0;
33150 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, mcount_name
),
33151 LCT_NORMAL
, VOIDmode
,
33152 gen_rtx_REG (Pmode
, caller_addr_regno
), Pmode
);
33156 /* Write function profiler code. */
33159 output_function_profiler (FILE *file
, int labelno
)
33163 switch (DEFAULT_ABI
)
33166 gcc_unreachable ();
33171 warning (0, "no profiling of 64-bit code for this ABI");
33174 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
33175 fprintf (file
, "\tmflr %s\n", reg_names
[0]);
33176 if (NO_PROFILE_COUNTERS
)
33178 asm_fprintf (file
, "\tstw %s,4(%s)\n",
33179 reg_names
[0], reg_names
[1]);
33181 else if (TARGET_SECURE_PLT
&& flag_pic
)
33183 if (TARGET_LINK_STACK
)
33186 get_ppc476_thunk_name (name
);
33187 asm_fprintf (file
, "\tbl %s\n", name
);
33190 asm_fprintf (file
, "\tbcl 20,31,1f\n1:\n");
33191 asm_fprintf (file
, "\tstw %s,4(%s)\n",
33192 reg_names
[0], reg_names
[1]);
33193 asm_fprintf (file
, "\tmflr %s\n", reg_names
[12]);
33194 asm_fprintf (file
, "\taddis %s,%s,",
33195 reg_names
[12], reg_names
[12]);
33196 assemble_name (file
, buf
);
33197 asm_fprintf (file
, "-1b@ha\n\tla %s,", reg_names
[0]);
33198 assemble_name (file
, buf
);
33199 asm_fprintf (file
, "-1b@l(%s)\n", reg_names
[12]);
33201 else if (flag_pic
== 1)
33203 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file
);
33204 asm_fprintf (file
, "\tstw %s,4(%s)\n",
33205 reg_names
[0], reg_names
[1]);
33206 asm_fprintf (file
, "\tmflr %s\n", reg_names
[12]);
33207 asm_fprintf (file
, "\tlwz %s,", reg_names
[0]);
33208 assemble_name (file
, buf
);
33209 asm_fprintf (file
, "@got(%s)\n", reg_names
[12]);
33211 else if (flag_pic
> 1)
33213 asm_fprintf (file
, "\tstw %s,4(%s)\n",
33214 reg_names
[0], reg_names
[1]);
33215 /* Now, we need to get the address of the label. */
33216 if (TARGET_LINK_STACK
)
33219 get_ppc476_thunk_name (name
);
33220 asm_fprintf (file
, "\tbl %s\n\tb 1f\n\t.long ", name
);
33221 assemble_name (file
, buf
);
33222 fputs ("-.\n1:", file
);
33223 asm_fprintf (file
, "\tmflr %s\n", reg_names
[11]);
33224 asm_fprintf (file
, "\taddi %s,%s,4\n",
33225 reg_names
[11], reg_names
[11]);
33229 fputs ("\tbcl 20,31,1f\n\t.long ", file
);
33230 assemble_name (file
, buf
);
33231 fputs ("-.\n1:", file
);
33232 asm_fprintf (file
, "\tmflr %s\n", reg_names
[11]);
33234 asm_fprintf (file
, "\tlwz %s,0(%s)\n",
33235 reg_names
[0], reg_names
[11]);
33236 asm_fprintf (file
, "\tadd %s,%s,%s\n",
33237 reg_names
[0], reg_names
[0], reg_names
[11]);
33241 asm_fprintf (file
, "\tlis %s,", reg_names
[12]);
33242 assemble_name (file
, buf
);
33243 fputs ("@ha\n", file
);
33244 asm_fprintf (file
, "\tstw %s,4(%s)\n",
33245 reg_names
[0], reg_names
[1]);
33246 asm_fprintf (file
, "\tla %s,", reg_names
[0]);
33247 assemble_name (file
, buf
);
33248 asm_fprintf (file
, "@l(%s)\n", reg_names
[12]);
33251 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
33252 fprintf (file
, "\tbl %s%s\n",
33253 RS6000_MCOUNT
, flag_pic
? "@plt" : "");
33259 /* Don't do anything, done in output_profile_hook (). */
33266 /* The following variable value is the last issued insn. */
33268 static rtx_insn
*last_scheduled_insn
;
33270 /* The following variable helps to balance issuing of load and
33271 store instructions */
33273 static int load_store_pendulum
;
33275 /* The following variable helps pair divide insns during scheduling. */
33276 static int divide_cnt
;
33277 /* The following variable helps pair and alternate vector and vector load
33278 insns during scheduling. */
33279 static int vec_pairing
;
33282 /* Power4 load update and store update instructions are cracked into a
33283 load or store and an integer insn which are executed in the same cycle.
33284 Branches have their own dispatch slot which does not count against the
33285 GCC issue rate, but it changes the program flow so there are no other
33286 instructions to issue in this cycle. */
33289 rs6000_variable_issue_1 (rtx_insn
*insn
, int more
)
33291 last_scheduled_insn
= insn
;
33292 if (GET_CODE (PATTERN (insn
)) == USE
33293 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
33295 cached_can_issue_more
= more
;
33296 return cached_can_issue_more
;
33299 if (insn_terminates_group_p (insn
, current_group
))
33301 cached_can_issue_more
= 0;
33302 return cached_can_issue_more
;
33305 /* If no reservation, but reach here */
33306 if (recog_memoized (insn
) < 0)
33309 if (rs6000_sched_groups
)
33311 if (is_microcoded_insn (insn
))
33312 cached_can_issue_more
= 0;
33313 else if (is_cracked_insn (insn
))
33314 cached_can_issue_more
= more
> 2 ? more
- 2 : 0;
33316 cached_can_issue_more
= more
- 1;
33318 return cached_can_issue_more
;
33321 if (rs6000_cpu_attr
== CPU_CELL
&& is_nonpipeline_insn (insn
))
33324 cached_can_issue_more
= more
- 1;
33325 return cached_can_issue_more
;
33329 rs6000_variable_issue (FILE *stream
, int verbose
, rtx_insn
*insn
, int more
)
33331 int r
= rs6000_variable_issue_1 (insn
, more
);
33333 fprintf (stream
, "// rs6000_variable_issue (more = %d) = %d\n", more
, r
);
33337 /* Adjust the cost of a scheduling dependency. Return the new cost of
33338 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
33341 rs6000_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
, int cost
,
33344 enum attr_type attr_type
;
33346 if (recog_memoized (insn
) < 0 || recog_memoized (dep_insn
) < 0)
33353 /* Data dependency; DEP_INSN writes a register that INSN reads
33354 some cycles later. */
33356 /* Separate a load from a narrower, dependent store. */
33357 if ((rs6000_sched_groups
|| rs6000_cpu_attr
== CPU_POWER9
)
33358 && GET_CODE (PATTERN (insn
)) == SET
33359 && GET_CODE (PATTERN (dep_insn
)) == SET
33360 && GET_CODE (XEXP (PATTERN (insn
), 1)) == MEM
33361 && GET_CODE (XEXP (PATTERN (dep_insn
), 0)) == MEM
33362 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn
), 1)))
33363 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn
), 0)))))
33366 attr_type
= get_attr_type (insn
);
33371 /* Tell the first scheduling pass about the latency between
33372 a mtctr and bctr (and mtlr and br/blr). The first
33373 scheduling pass will not know about this latency since
33374 the mtctr instruction, which has the latency associated
33375 to it, will be generated by reload. */
33378 /* Leave some extra cycles between a compare and its
33379 dependent branch, to inhibit expensive mispredicts. */
33380 if ((rs6000_cpu_attr
== CPU_PPC603
33381 || rs6000_cpu_attr
== CPU_PPC604
33382 || rs6000_cpu_attr
== CPU_PPC604E
33383 || rs6000_cpu_attr
== CPU_PPC620
33384 || rs6000_cpu_attr
== CPU_PPC630
33385 || rs6000_cpu_attr
== CPU_PPC750
33386 || rs6000_cpu_attr
== CPU_PPC7400
33387 || rs6000_cpu_attr
== CPU_PPC7450
33388 || rs6000_cpu_attr
== CPU_PPCE5500
33389 || rs6000_cpu_attr
== CPU_PPCE6500
33390 || rs6000_cpu_attr
== CPU_POWER4
33391 || rs6000_cpu_attr
== CPU_POWER5
33392 || rs6000_cpu_attr
== CPU_POWER7
33393 || rs6000_cpu_attr
== CPU_POWER8
33394 || rs6000_cpu_attr
== CPU_POWER9
33395 || rs6000_cpu_attr
== CPU_CELL
)
33396 && recog_memoized (dep_insn
)
33397 && (INSN_CODE (dep_insn
) >= 0))
33399 switch (get_attr_type (dep_insn
))
33402 case TYPE_FPCOMPARE
:
33403 case TYPE_CR_LOGICAL
:
33404 case TYPE_DELAYED_CR
:
33408 if (get_attr_dot (dep_insn
) == DOT_YES
)
33413 if (get_attr_dot (dep_insn
) == DOT_YES
33414 && get_attr_var_shift (dep_insn
) == VAR_SHIFT_NO
)
33425 if ((rs6000_cpu
== PROCESSOR_POWER6
)
33426 && recog_memoized (dep_insn
)
33427 && (INSN_CODE (dep_insn
) >= 0))
33430 if (GET_CODE (PATTERN (insn
)) != SET
)
33431 /* If this happens, we have to extend this to schedule
33432 optimally. Return default for now. */
33435 /* Adjust the cost for the case where the value written
33436 by a fixed point operation is used as the address
33437 gen value on a store. */
33438 switch (get_attr_type (dep_insn
))
33443 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
33444 return get_attr_sign_extend (dep_insn
)
33445 == SIGN_EXTEND_YES
? 6 : 4;
33450 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
33451 return get_attr_var_shift (dep_insn
) == VAR_SHIFT_YES
?
33461 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
33469 if (get_attr_update (dep_insn
) == UPDATE_YES
33470 && ! rs6000_store_data_bypass_p (dep_insn
, insn
))
33476 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
33482 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
33483 return get_attr_size (dep_insn
) == SIZE_32
? 45 : 57;
33493 if ((rs6000_cpu
== PROCESSOR_POWER6
)
33494 && recog_memoized (dep_insn
)
33495 && (INSN_CODE (dep_insn
) >= 0))
33498 /* Adjust the cost for the case where the value written
33499 by a fixed point instruction is used within the address
33500 gen portion of a subsequent load(u)(x) */
33501 switch (get_attr_type (dep_insn
))
33506 if (set_to_load_agen (dep_insn
, insn
))
33507 return get_attr_sign_extend (dep_insn
)
33508 == SIGN_EXTEND_YES
? 6 : 4;
33513 if (set_to_load_agen (dep_insn
, insn
))
33514 return get_attr_var_shift (dep_insn
) == VAR_SHIFT_YES
?
33524 if (set_to_load_agen (dep_insn
, insn
))
33532 if (get_attr_update (dep_insn
) == UPDATE_YES
33533 && set_to_load_agen (dep_insn
, insn
))
33539 if (set_to_load_agen (dep_insn
, insn
))
33545 if (set_to_load_agen (dep_insn
, insn
))
33546 return get_attr_size (dep_insn
) == SIZE_32
? 45 : 57;
33556 if ((rs6000_cpu
== PROCESSOR_POWER6
)
33557 && get_attr_update (insn
) == UPDATE_NO
33558 && recog_memoized (dep_insn
)
33559 && (INSN_CODE (dep_insn
) >= 0)
33560 && (get_attr_type (dep_insn
) == TYPE_MFFGPR
))
33567 /* Fall out to return default cost. */
33571 case REG_DEP_OUTPUT
:
33572 /* Output dependency; DEP_INSN writes a register that INSN writes some
33574 if ((rs6000_cpu
== PROCESSOR_POWER6
)
33575 && recog_memoized (dep_insn
)
33576 && (INSN_CODE (dep_insn
) >= 0))
33578 attr_type
= get_attr_type (insn
);
33583 case TYPE_FPSIMPLE
:
33584 if (get_attr_type (dep_insn
) == TYPE_FP
33585 || get_attr_type (dep_insn
) == TYPE_FPSIMPLE
)
33589 if (get_attr_update (insn
) == UPDATE_NO
33590 && get_attr_type (dep_insn
) == TYPE_MFFGPR
)
33597 /* Fall through, no cost for output dependency. */
33601 /* Anti dependency; DEP_INSN reads a register that INSN writes some
33606 gcc_unreachable ();
33612 /* Debug version of rs6000_adjust_cost. */
33615 rs6000_debug_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
,
33616 int cost
, unsigned int dw
)
33618 int ret
= rs6000_adjust_cost (insn
, dep_type
, dep_insn
, cost
, dw
);
33626 default: dep
= "unknown depencency"; break;
33627 case REG_DEP_TRUE
: dep
= "data dependency"; break;
33628 case REG_DEP_OUTPUT
: dep
= "output dependency"; break;
33629 case REG_DEP_ANTI
: dep
= "anti depencency"; break;
33633 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
33634 "%s, insn:\n", ret
, cost
, dep
);
33642 /* The function returns a true if INSN is microcoded.
33643 Return false otherwise. */
33646 is_microcoded_insn (rtx_insn
*insn
)
33648 if (!insn
|| !NONDEBUG_INSN_P (insn
)
33649 || GET_CODE (PATTERN (insn
)) == USE
33650 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
33653 if (rs6000_cpu_attr
== CPU_CELL
)
33654 return get_attr_cell_micro (insn
) == CELL_MICRO_ALWAYS
;
33656 if (rs6000_sched_groups
33657 && (rs6000_cpu
== PROCESSOR_POWER4
|| rs6000_cpu
== PROCESSOR_POWER5
))
33659 enum attr_type type
= get_attr_type (insn
);
33660 if ((type
== TYPE_LOAD
33661 && get_attr_update (insn
) == UPDATE_YES
33662 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
)
33663 || ((type
== TYPE_LOAD
|| type
== TYPE_STORE
)
33664 && get_attr_update (insn
) == UPDATE_YES
33665 && get_attr_indexed (insn
) == INDEXED_YES
)
33666 || type
== TYPE_MFCR
)
33673 /* The function returns true if INSN is cracked into 2 instructions
33674 by the processor (and therefore occupies 2 issue slots). */
33677 is_cracked_insn (rtx_insn
*insn
)
33679 if (!insn
|| !NONDEBUG_INSN_P (insn
)
33680 || GET_CODE (PATTERN (insn
)) == USE
33681 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
33684 if (rs6000_sched_groups
33685 && (rs6000_cpu
== PROCESSOR_POWER4
|| rs6000_cpu
== PROCESSOR_POWER5
))
33687 enum attr_type type
= get_attr_type (insn
);
33688 if ((type
== TYPE_LOAD
33689 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
33690 && get_attr_update (insn
) == UPDATE_NO
)
33691 || (type
== TYPE_LOAD
33692 && get_attr_sign_extend (insn
) == SIGN_EXTEND_NO
33693 && get_attr_update (insn
) == UPDATE_YES
33694 && get_attr_indexed (insn
) == INDEXED_NO
)
33695 || (type
== TYPE_STORE
33696 && get_attr_update (insn
) == UPDATE_YES
33697 && get_attr_indexed (insn
) == INDEXED_NO
)
33698 || ((type
== TYPE_FPLOAD
|| type
== TYPE_FPSTORE
)
33699 && get_attr_update (insn
) == UPDATE_YES
)
33700 || type
== TYPE_DELAYED_CR
33701 || (type
== TYPE_EXTS
33702 && get_attr_dot (insn
) == DOT_YES
)
33703 || (type
== TYPE_SHIFT
33704 && get_attr_dot (insn
) == DOT_YES
33705 && get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
33706 || (type
== TYPE_MUL
33707 && get_attr_dot (insn
) == DOT_YES
)
33708 || type
== TYPE_DIV
33709 || (type
== TYPE_INSERT
33710 && get_attr_size (insn
) == SIZE_32
))
33717 /* The function returns true if INSN can be issued only from
33718 the branch slot. */
33721 is_branch_slot_insn (rtx_insn
*insn
)
33723 if (!insn
|| !NONDEBUG_INSN_P (insn
)
33724 || GET_CODE (PATTERN (insn
)) == USE
33725 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
33728 if (rs6000_sched_groups
)
33730 enum attr_type type
= get_attr_type (insn
);
33731 if (type
== TYPE_BRANCH
|| type
== TYPE_JMPREG
)
33739 /* The function returns true if out_inst sets a value that is
33740 used in the address generation computation of in_insn */
33742 set_to_load_agen (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
33744 rtx out_set
, in_set
;
33746 /* For performance reasons, only handle the simple case where
33747 both loads are a single_set. */
33748 out_set
= single_set (out_insn
);
33751 in_set
= single_set (in_insn
);
33753 return reg_mentioned_p (SET_DEST (out_set
), SET_SRC (in_set
));
33759 /* Try to determine base/offset/size parts of the given MEM.
33760 Return true if successful, false if all the values couldn't
33763 This function only looks for REG or REG+CONST address forms.
33764 REG+REG address form will return false. */
33767 get_memref_parts (rtx mem
, rtx
*base
, HOST_WIDE_INT
*offset
,
33768 HOST_WIDE_INT
*size
)
33771 if MEM_SIZE_KNOWN_P (mem
)
33772 *size
= MEM_SIZE (mem
);
33776 addr_rtx
= (XEXP (mem
, 0));
33777 if (GET_CODE (addr_rtx
) == PRE_MODIFY
)
33778 addr_rtx
= XEXP (addr_rtx
, 1);
33781 while (GET_CODE (addr_rtx
) == PLUS
33782 && CONST_INT_P (XEXP (addr_rtx
, 1)))
33784 *offset
+= INTVAL (XEXP (addr_rtx
, 1));
33785 addr_rtx
= XEXP (addr_rtx
, 0);
33787 if (!REG_P (addr_rtx
))
33794 /* The function returns true if the target storage location of
33795 mem1 is adjacent to the target storage location of mem2 */
33796 /* Return 1 if memory locations are adjacent. */
33799 adjacent_mem_locations (rtx mem1
, rtx mem2
)
33802 HOST_WIDE_INT off1
, size1
, off2
, size2
;
33804 if (get_memref_parts (mem1
, ®1
, &off1
, &size1
)
33805 && get_memref_parts (mem2
, ®2
, &off2
, &size2
))
33806 return ((REGNO (reg1
) == REGNO (reg2
))
33807 && ((off1
+ size1
== off2
)
33808 || (off2
+ size2
== off1
)));
33813 /* This function returns true if it can be determined that the two MEM
33814 locations overlap by at least 1 byte based on base reg/offset/size. */
33817 mem_locations_overlap (rtx mem1
, rtx mem2
)
33820 HOST_WIDE_INT off1
, size1
, off2
, size2
;
33822 if (get_memref_parts (mem1
, ®1
, &off1
, &size1
)
33823 && get_memref_parts (mem2
, ®2
, &off2
, &size2
))
33824 return ((REGNO (reg1
) == REGNO (reg2
))
33825 && (((off1
<= off2
) && (off1
+ size1
> off2
))
33826 || ((off2
<= off1
) && (off2
+ size2
> off1
))));
33831 /* A C statement (sans semicolon) to update the integer scheduling
33832 priority INSN_PRIORITY (INSN). Increase the priority to execute the
33833 INSN earlier, reduce the priority to execute INSN later. Do not
33834 define this macro if you do not need to adjust the scheduling
33835 priorities of insns. */
33838 rs6000_adjust_priority (rtx_insn
*insn ATTRIBUTE_UNUSED
, int priority
)
33840 rtx load_mem
, str_mem
;
33841 /* On machines (like the 750) which have asymmetric integer units,
33842 where one integer unit can do multiply and divides and the other
33843 can't, reduce the priority of multiply/divide so it is scheduled
33844 before other integer operations. */
33847 if (! INSN_P (insn
))
33850 if (GET_CODE (PATTERN (insn
)) == USE
)
33853 switch (rs6000_cpu_attr
) {
33855 switch (get_attr_type (insn
))
33862 fprintf (stderr
, "priority was %#x (%d) before adjustment\n",
33863 priority
, priority
);
33864 if (priority
>= 0 && priority
< 0x01000000)
33871 if (insn_must_be_first_in_group (insn
)
33872 && reload_completed
33873 && current_sched_info
->sched_max_insns_priority
33874 && rs6000_sched_restricted_insns_priority
)
33877 /* Prioritize insns that can be dispatched only in the first
33879 if (rs6000_sched_restricted_insns_priority
== 1)
33880 /* Attach highest priority to insn. This means that in
33881 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
33882 precede 'priority' (critical path) considerations. */
33883 return current_sched_info
->sched_max_insns_priority
;
33884 else if (rs6000_sched_restricted_insns_priority
== 2)
33885 /* Increase priority of insn by a minimal amount. This means that in
33886 haifa-sched.c:ready_sort(), only 'priority' (critical path)
33887 considerations precede dispatch-slot restriction considerations. */
33888 return (priority
+ 1);
33891 if (rs6000_cpu
== PROCESSOR_POWER6
33892 && ((load_store_pendulum
== -2 && is_load_insn (insn
, &load_mem
))
33893 || (load_store_pendulum
== 2 && is_store_insn (insn
, &str_mem
))))
33894 /* Attach highest priority to insn if the scheduler has just issued two
33895 stores and this instruction is a load, or two loads and this instruction
33896 is a store. Power6 wants loads and stores scheduled alternately
33898 return current_sched_info
->sched_max_insns_priority
;
33903 /* Return true if the instruction is nonpipelined on the Cell. */
33905 is_nonpipeline_insn (rtx_insn
*insn
)
33907 enum attr_type type
;
33908 if (!insn
|| !NONDEBUG_INSN_P (insn
)
33909 || GET_CODE (PATTERN (insn
)) == USE
33910 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
33913 type
= get_attr_type (insn
);
33914 if (type
== TYPE_MUL
33915 || type
== TYPE_DIV
33916 || type
== TYPE_SDIV
33917 || type
== TYPE_DDIV
33918 || type
== TYPE_SSQRT
33919 || type
== TYPE_DSQRT
33920 || type
== TYPE_MFCR
33921 || type
== TYPE_MFCRF
33922 || type
== TYPE_MFJMPR
)
33930 /* Return how many instructions the machine can issue per cycle. */
33933 rs6000_issue_rate (void)
33935 /* Unless scheduling for register pressure, use issue rate of 1 for
33936 first scheduling pass to decrease degradation. */
33937 if (!reload_completed
&& !flag_sched_pressure
)
33940 switch (rs6000_cpu_attr
) {
33942 case CPU_PPC601
: /* ? */
33952 case CPU_PPCE300C2
:
33953 case CPU_PPCE300C3
:
33954 case CPU_PPCE500MC
:
33955 case CPU_PPCE500MC64
:
33980 /* Return how many instructions to look ahead for better insn
33984 rs6000_use_sched_lookahead (void)
33986 switch (rs6000_cpu_attr
)
33993 return (reload_completed
? 8 : 0);
34000 /* We are choosing insn from the ready queue. Return zero if INSN can be
34003 rs6000_use_sched_lookahead_guard (rtx_insn
*insn
, int ready_index
)
34005 if (ready_index
== 0)
34008 if (rs6000_cpu_attr
!= CPU_CELL
)
34011 gcc_assert (insn
!= NULL_RTX
&& INSN_P (insn
));
34013 if (!reload_completed
34014 || is_nonpipeline_insn (insn
)
34015 || is_microcoded_insn (insn
))
34021 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
34022 and return true. */
34025 find_mem_ref (rtx pat
, rtx
*mem_ref
)
34030 /* stack_tie does not produce any real memory traffic. */
34031 if (tie_operand (pat
, VOIDmode
))
34034 if (GET_CODE (pat
) == MEM
)
34040 /* Recursively process the pattern. */
34041 fmt
= GET_RTX_FORMAT (GET_CODE (pat
));
34043 for (i
= GET_RTX_LENGTH (GET_CODE (pat
)) - 1; i
>= 0; i
--)
34047 if (find_mem_ref (XEXP (pat
, i
), mem_ref
))
34050 else if (fmt
[i
] == 'E')
34051 for (j
= XVECLEN (pat
, i
) - 1; j
>= 0; j
--)
34053 if (find_mem_ref (XVECEXP (pat
, i
, j
), mem_ref
))
34061 /* Determine if PAT is a PATTERN of a load insn. */
34064 is_load_insn1 (rtx pat
, rtx
*load_mem
)
34066 if (!pat
|| pat
== NULL_RTX
)
34069 if (GET_CODE (pat
) == SET
)
34070 return find_mem_ref (SET_SRC (pat
), load_mem
);
34072 if (GET_CODE (pat
) == PARALLEL
)
34076 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
34077 if (is_load_insn1 (XVECEXP (pat
, 0, i
), load_mem
))
34084 /* Determine if INSN loads from memory. */
34087 is_load_insn (rtx insn
, rtx
*load_mem
)
34089 if (!insn
|| !INSN_P (insn
))
34095 return is_load_insn1 (PATTERN (insn
), load_mem
);
34098 /* Determine if PAT is a PATTERN of a store insn. */
34101 is_store_insn1 (rtx pat
, rtx
*str_mem
)
34103 if (!pat
|| pat
== NULL_RTX
)
34106 if (GET_CODE (pat
) == SET
)
34107 return find_mem_ref (SET_DEST (pat
), str_mem
);
34109 if (GET_CODE (pat
) == PARALLEL
)
34113 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
34114 if (is_store_insn1 (XVECEXP (pat
, 0, i
), str_mem
))
34121 /* Determine if INSN stores to memory. */
34124 is_store_insn (rtx insn
, rtx
*str_mem
)
34126 if (!insn
|| !INSN_P (insn
))
34129 return is_store_insn1 (PATTERN (insn
), str_mem
);
34132 /* Return whether TYPE is a Power9 pairable vector instruction type. */
34135 is_power9_pairable_vec_type (enum attr_type type
)
34139 case TYPE_VECSIMPLE
:
34140 case TYPE_VECCOMPLEX
:
34144 case TYPE_VECFLOAT
:
34146 case TYPE_VECDOUBLE
:
34154 /* Returns whether the dependence between INSN and NEXT is considered
34155 costly by the given target. */
34158 rs6000_is_costly_dependence (dep_t dep
, int cost
, int distance
)
34162 rtx load_mem
, str_mem
;
34164 /* If the flag is not enabled - no dependence is considered costly;
34165 allow all dependent insns in the same group.
34166 This is the most aggressive option. */
34167 if (rs6000_sched_costly_dep
== no_dep_costly
)
34170 /* If the flag is set to 1 - a dependence is always considered costly;
34171 do not allow dependent instructions in the same group.
34172 This is the most conservative option. */
34173 if (rs6000_sched_costly_dep
== all_deps_costly
)
34176 insn
= DEP_PRO (dep
);
34177 next
= DEP_CON (dep
);
34179 if (rs6000_sched_costly_dep
== store_to_load_dep_costly
34180 && is_load_insn (next
, &load_mem
)
34181 && is_store_insn (insn
, &str_mem
))
34182 /* Prevent load after store in the same group. */
34185 if (rs6000_sched_costly_dep
== true_store_to_load_dep_costly
34186 && is_load_insn (next
, &load_mem
)
34187 && is_store_insn (insn
, &str_mem
)
34188 && DEP_TYPE (dep
) == REG_DEP_TRUE
34189 && mem_locations_overlap(str_mem
, load_mem
))
34190 /* Prevent load after store in the same group if it is a true
34194 /* The flag is set to X; dependences with latency >= X are considered costly,
34195 and will not be scheduled in the same group. */
34196 if (rs6000_sched_costly_dep
<= max_dep_latency
34197 && ((cost
- distance
) >= (int)rs6000_sched_costly_dep
))
34203 /* Return the next insn after INSN that is found before TAIL is reached,
34204 skipping any "non-active" insns - insns that will not actually occupy
34205 an issue slot. Return NULL_RTX if such an insn is not found. */
34208 get_next_active_insn (rtx_insn
*insn
, rtx_insn
*tail
)
34210 if (insn
== NULL_RTX
|| insn
== tail
)
34215 insn
= NEXT_INSN (insn
);
34216 if (insn
== NULL_RTX
|| insn
== tail
)
34220 || JUMP_P (insn
) || JUMP_TABLE_DATA_P (insn
)
34221 || (NONJUMP_INSN_P (insn
)
34222 && GET_CODE (PATTERN (insn
)) != USE
34223 && GET_CODE (PATTERN (insn
)) != CLOBBER
34224 && INSN_CODE (insn
) != CODE_FOR_stack_tie
))
34230 /* Do Power9 specific sched_reorder2 reordering of ready list. */
34233 power9_sched_reorder2 (rtx_insn
**ready
, int lastpos
)
34238 enum attr_type type
, type2
;
34240 type
= get_attr_type (last_scheduled_insn
);
34242 /* Try to issue fixed point divides back-to-back in pairs so they will be
34243 routed to separate execution units and execute in parallel. */
34244 if (type
== TYPE_DIV
&& divide_cnt
== 0)
34246 /* First divide has been scheduled. */
34249 /* Scan the ready list looking for another divide, if found move it
34250 to the end of the list so it is chosen next. */
34254 if (recog_memoized (ready
[pos
]) >= 0
34255 && get_attr_type (ready
[pos
]) == TYPE_DIV
)
34258 for (i
= pos
; i
< lastpos
; i
++)
34259 ready
[i
] = ready
[i
+ 1];
34260 ready
[lastpos
] = tmp
;
34268 /* Last insn was the 2nd divide or not a divide, reset the counter. */
34271 /* The best dispatch throughput for vector and vector load insns can be
34272 achieved by interleaving a vector and vector load such that they'll
34273 dispatch to the same superslice. If this pairing cannot be achieved
34274 then it is best to pair vector insns together and vector load insns
34277 To aid in this pairing, vec_pairing maintains the current state with
34278 the following values:
34280 0 : Initial state, no vecload/vector pairing has been started.
34282 1 : A vecload or vector insn has been issued and a candidate for
34283 pairing has been found and moved to the end of the ready
34285 if (type
== TYPE_VECLOAD
)
34287 /* Issued a vecload. */
34288 if (vec_pairing
== 0)
34290 int vecload_pos
= -1;
34291 /* We issued a single vecload, look for a vector insn to pair it
34292 with. If one isn't found, try to pair another vecload. */
34296 if (recog_memoized (ready
[pos
]) >= 0)
34298 type2
= get_attr_type (ready
[pos
]);
34299 if (is_power9_pairable_vec_type (type2
))
34301 /* Found a vector insn to pair with, move it to the
34302 end of the ready list so it is scheduled next. */
34304 for (i
= pos
; i
< lastpos
; i
++)
34305 ready
[i
] = ready
[i
+ 1];
34306 ready
[lastpos
] = tmp
;
34308 return cached_can_issue_more
;
34310 else if (type2
== TYPE_VECLOAD
&& vecload_pos
== -1)
34311 /* Remember position of first vecload seen. */
34316 if (vecload_pos
>= 0)
34318 /* Didn't find a vector to pair with but did find a vecload,
34319 move it to the end of the ready list. */
34320 tmp
= ready
[vecload_pos
];
34321 for (i
= vecload_pos
; i
< lastpos
; i
++)
34322 ready
[i
] = ready
[i
+ 1];
34323 ready
[lastpos
] = tmp
;
34325 return cached_can_issue_more
;
34329 else if (is_power9_pairable_vec_type (type
))
34331 /* Issued a vector operation. */
34332 if (vec_pairing
== 0)
34335 /* We issued a single vector insn, look for a vecload to pair it
34336 with. If one isn't found, try to pair another vector. */
34340 if (recog_memoized (ready
[pos
]) >= 0)
34342 type2
= get_attr_type (ready
[pos
]);
34343 if (type2
== TYPE_VECLOAD
)
34345 /* Found a vecload insn to pair with, move it to the
34346 end of the ready list so it is scheduled next. */
34348 for (i
= pos
; i
< lastpos
; i
++)
34349 ready
[i
] = ready
[i
+ 1];
34350 ready
[lastpos
] = tmp
;
34352 return cached_can_issue_more
;
34354 else if (is_power9_pairable_vec_type (type2
)
34356 /* Remember position of first vector insn seen. */
34363 /* Didn't find a vecload to pair with but did find a vector
34364 insn, move it to the end of the ready list. */
34365 tmp
= ready
[vec_pos
];
34366 for (i
= vec_pos
; i
< lastpos
; i
++)
34367 ready
[i
] = ready
[i
+ 1];
34368 ready
[lastpos
] = tmp
;
34370 return cached_can_issue_more
;
34375 /* We've either finished a vec/vecload pair, couldn't find an insn to
34376 continue the current pair, or the last insn had nothing to do with
34377 with pairing. In any case, reset the state. */
34381 return cached_can_issue_more
;
34384 /* We are about to begin issuing insns for this clock cycle. */
34387 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED
, int sched_verbose
,
34388 rtx_insn
**ready ATTRIBUTE_UNUSED
,
34389 int *pn_ready ATTRIBUTE_UNUSED
,
34390 int clock_var ATTRIBUTE_UNUSED
)
34392 int n_ready
= *pn_ready
;
34395 fprintf (dump
, "// rs6000_sched_reorder :\n");
34397 /* Reorder the ready list, if the second to last ready insn
34398 is a nonepipeline insn. */
34399 if (rs6000_cpu_attr
== CPU_CELL
&& n_ready
> 1)
34401 if (is_nonpipeline_insn (ready
[n_ready
- 1])
34402 && (recog_memoized (ready
[n_ready
- 2]) > 0))
34403 /* Simply swap first two insns. */
34404 std::swap (ready
[n_ready
- 1], ready
[n_ready
- 2]);
34407 if (rs6000_cpu
== PROCESSOR_POWER6
)
34408 load_store_pendulum
= 0;
34410 return rs6000_issue_rate ();
34413 /* Like rs6000_sched_reorder, but called after issuing each insn. */
34416 rs6000_sched_reorder2 (FILE *dump
, int sched_verbose
, rtx_insn
**ready
,
34417 int *pn_ready
, int clock_var ATTRIBUTE_UNUSED
)
34420 fprintf (dump
, "// rs6000_sched_reorder2 :\n");
34422 /* For Power6, we need to handle some special cases to try and keep the
34423 store queue from overflowing and triggering expensive flushes.
34425 This code monitors how load and store instructions are being issued
34426 and skews the ready list one way or the other to increase the likelihood
34427 that a desired instruction is issued at the proper time.
34429 A couple of things are done. First, we maintain a "load_store_pendulum"
34430 to track the current state of load/store issue.
34432 - If the pendulum is at zero, then no loads or stores have been
34433 issued in the current cycle so we do nothing.
34435 - If the pendulum is 1, then a single load has been issued in this
34436 cycle and we attempt to locate another load in the ready list to
34439 - If the pendulum is -2, then two stores have already been
34440 issued in this cycle, so we increase the priority of the first load
34441 in the ready list to increase it's likelihood of being chosen first
34444 - If the pendulum is -1, then a single store has been issued in this
34445 cycle and we attempt to locate another store in the ready list to
34446 issue with it, preferring a store to an adjacent memory location to
34447 facilitate store pairing in the store queue.
34449 - If the pendulum is 2, then two loads have already been
34450 issued in this cycle, so we increase the priority of the first store
34451 in the ready list to increase it's likelihood of being chosen first
34454 - If the pendulum < -2 or > 2, then do nothing.
34456 Note: This code covers the most common scenarios. There exist non
34457 load/store instructions which make use of the LSU and which
34458 would need to be accounted for to strictly model the behavior
34459 of the machine. Those instructions are currently unaccounted
34460 for to help minimize compile time overhead of this code.
34462 if (rs6000_cpu
== PROCESSOR_POWER6
&& last_scheduled_insn
)
34467 rtx load_mem
, str_mem
;
34469 if (is_store_insn (last_scheduled_insn
, &str_mem
))
34470 /* Issuing a store, swing the load_store_pendulum to the left */
34471 load_store_pendulum
--;
34472 else if (is_load_insn (last_scheduled_insn
, &load_mem
))
34473 /* Issuing a load, swing the load_store_pendulum to the right */
34474 load_store_pendulum
++;
34476 return cached_can_issue_more
;
34478 /* If the pendulum is balanced, or there is only one instruction on
34479 the ready list, then all is well, so return. */
34480 if ((load_store_pendulum
== 0) || (*pn_ready
<= 1))
34481 return cached_can_issue_more
;
34483 if (load_store_pendulum
== 1)
34485 /* A load has been issued in this cycle. Scan the ready list
34486 for another load to issue with it */
34491 if (is_load_insn (ready
[pos
], &load_mem
))
34493 /* Found a load. Move it to the head of the ready list,
34494 and adjust it's priority so that it is more likely to
34497 for (i
=pos
; i
<*pn_ready
-1; i
++)
34498 ready
[i
] = ready
[i
+ 1];
34499 ready
[*pn_ready
-1] = tmp
;
34501 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp
))
34502 INSN_PRIORITY (tmp
)++;
34508 else if (load_store_pendulum
== -2)
34510 /* Two stores have been issued in this cycle. Increase the
34511 priority of the first load in the ready list to favor it for
34512 issuing in the next cycle. */
34517 if (is_load_insn (ready
[pos
], &load_mem
)
34519 && INSN_PRIORITY_KNOWN (ready
[pos
]))
34521 INSN_PRIORITY (ready
[pos
])++;
34523 /* Adjust the pendulum to account for the fact that a load
34524 was found and increased in priority. This is to prevent
34525 increasing the priority of multiple loads */
34526 load_store_pendulum
--;
34533 else if (load_store_pendulum
== -1)
34535 /* A store has been issued in this cycle. Scan the ready list for
34536 another store to issue with it, preferring a store to an adjacent
34538 int first_store_pos
= -1;
34544 if (is_store_insn (ready
[pos
], &str_mem
))
34547 /* Maintain the index of the first store found on the
34549 if (first_store_pos
== -1)
34550 first_store_pos
= pos
;
34552 if (is_store_insn (last_scheduled_insn
, &str_mem2
)
34553 && adjacent_mem_locations (str_mem
, str_mem2
))
34555 /* Found an adjacent store. Move it to the head of the
34556 ready list, and adjust it's priority so that it is
34557 more likely to stay there */
34559 for (i
=pos
; i
<*pn_ready
-1; i
++)
34560 ready
[i
] = ready
[i
+ 1];
34561 ready
[*pn_ready
-1] = tmp
;
34563 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp
))
34564 INSN_PRIORITY (tmp
)++;
34566 first_store_pos
= -1;
34574 if (first_store_pos
>= 0)
34576 /* An adjacent store wasn't found, but a non-adjacent store was,
34577 so move the non-adjacent store to the front of the ready
34578 list, and adjust its priority so that it is more likely to
34580 tmp
= ready
[first_store_pos
];
34581 for (i
=first_store_pos
; i
<*pn_ready
-1; i
++)
34582 ready
[i
] = ready
[i
+ 1];
34583 ready
[*pn_ready
-1] = tmp
;
34584 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp
))
34585 INSN_PRIORITY (tmp
)++;
34588 else if (load_store_pendulum
== 2)
34590 /* Two loads have been issued in this cycle. Increase the priority
34591 of the first store in the ready list to favor it for issuing in
34597 if (is_store_insn (ready
[pos
], &str_mem
)
34599 && INSN_PRIORITY_KNOWN (ready
[pos
]))
34601 INSN_PRIORITY (ready
[pos
])++;
34603 /* Adjust the pendulum to account for the fact that a store
34604 was found and increased in priority. This is to prevent
34605 increasing the priority of multiple stores */
34606 load_store_pendulum
++;
34615 /* Do Power9 dependent reordering if necessary. */
34616 if (rs6000_cpu
== PROCESSOR_POWER9
&& last_scheduled_insn
34617 && recog_memoized (last_scheduled_insn
) >= 0)
34618 return power9_sched_reorder2 (ready
, *pn_ready
- 1);
34620 return cached_can_issue_more
;
34623 /* Return whether the presence of INSN causes a dispatch group termination
34624 of group WHICH_GROUP.
34626 If WHICH_GROUP == current_group, this function will return true if INSN
34627 causes the termination of the current group (i.e, the dispatch group to
34628 which INSN belongs). This means that INSN will be the last insn in the
34629 group it belongs to.
34631 If WHICH_GROUP == previous_group, this function will return true if INSN
34632 causes the termination of the previous group (i.e, the dispatch group that
34633 precedes the group to which INSN belongs). This means that INSN will be
34634 the first insn in the group it belongs to). */
34637 insn_terminates_group_p (rtx_insn
*insn
, enum group_termination which_group
)
34644 first
= insn_must_be_first_in_group (insn
);
34645 last
= insn_must_be_last_in_group (insn
);
34650 if (which_group
== current_group
)
34652 else if (which_group
== previous_group
)
34660 insn_must_be_first_in_group (rtx_insn
*insn
)
34662 enum attr_type type
;
34666 || DEBUG_INSN_P (insn
)
34667 || GET_CODE (PATTERN (insn
)) == USE
34668 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
34671 switch (rs6000_cpu
)
34673 case PROCESSOR_POWER5
:
34674 if (is_cracked_insn (insn
))
34677 case PROCESSOR_POWER4
:
34678 if (is_microcoded_insn (insn
))
34681 if (!rs6000_sched_groups
)
34684 type
= get_attr_type (insn
);
34691 case TYPE_DELAYED_CR
:
34692 case TYPE_CR_LOGICAL
:
34705 case PROCESSOR_POWER6
:
34706 type
= get_attr_type (insn
);
34715 case TYPE_FPCOMPARE
:
34726 if (get_attr_dot (insn
) == DOT_NO
34727 || get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
34732 if (get_attr_size (insn
) == SIZE_32
)
34740 if (get_attr_update (insn
) == UPDATE_YES
)
34748 case PROCESSOR_POWER7
:
34749 type
= get_attr_type (insn
);
34753 case TYPE_CR_LOGICAL
:
34767 if (get_attr_dot (insn
) == DOT_YES
)
34772 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
34773 || get_attr_update (insn
) == UPDATE_YES
)
34780 if (get_attr_update (insn
) == UPDATE_YES
)
34788 case PROCESSOR_POWER8
:
34789 type
= get_attr_type (insn
);
34793 case TYPE_CR_LOGICAL
:
34794 case TYPE_DELAYED_CR
:
34802 case TYPE_VECSTORE
:
34809 if (get_attr_dot (insn
) == DOT_YES
)
34814 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
34815 || get_attr_update (insn
) == UPDATE_YES
)
34820 if (get_attr_update (insn
) == UPDATE_YES
34821 && get_attr_indexed (insn
) == INDEXED_YES
)
34837 insn_must_be_last_in_group (rtx_insn
*insn
)
34839 enum attr_type type
;
34843 || DEBUG_INSN_P (insn
)
34844 || GET_CODE (PATTERN (insn
)) == USE
34845 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
34848 switch (rs6000_cpu
) {
34849 case PROCESSOR_POWER4
:
34850 case PROCESSOR_POWER5
:
34851 if (is_microcoded_insn (insn
))
34854 if (is_branch_slot_insn (insn
))
34858 case PROCESSOR_POWER6
:
34859 type
= get_attr_type (insn
);
34867 case TYPE_FPCOMPARE
:
34878 if (get_attr_dot (insn
) == DOT_NO
34879 || get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
34884 if (get_attr_size (insn
) == SIZE_32
)
34892 case PROCESSOR_POWER7
:
34893 type
= get_attr_type (insn
);
34903 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
34904 && get_attr_update (insn
) == UPDATE_YES
)
34909 if (get_attr_update (insn
) == UPDATE_YES
34910 && get_attr_indexed (insn
) == INDEXED_YES
)
34918 case PROCESSOR_POWER8
:
34919 type
= get_attr_type (insn
);
34931 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
34932 && get_attr_update (insn
) == UPDATE_YES
)
34937 if (get_attr_update (insn
) == UPDATE_YES
34938 && get_attr_indexed (insn
) == INDEXED_YES
)
34953 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
34954 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
34957 is_costly_group (rtx
*group_insns
, rtx next_insn
)
34960 int issue_rate
= rs6000_issue_rate ();
34962 for (i
= 0; i
< issue_rate
; i
++)
34964 sd_iterator_def sd_it
;
34966 rtx insn
= group_insns
[i
];
34971 FOR_EACH_DEP (insn
, SD_LIST_RES_FORW
, sd_it
, dep
)
34973 rtx next
= DEP_CON (dep
);
34975 if (next
== next_insn
34976 && rs6000_is_costly_dependence (dep
, dep_cost (dep
), 0))
34984 /* Utility of the function redefine_groups.
34985 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
34986 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
34987 to keep it "far" (in a separate group) from GROUP_INSNS, following
34988 one of the following schemes, depending on the value of the flag
34989 -minsert_sched_nops = X:
34990 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
34991 in order to force NEXT_INSN into a separate group.
34992 (2) X < sched_finish_regroup_exact: insert exactly X nops.
34993 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
34994 insertion (has a group just ended, how many vacant issue slots remain in the
34995 last group, and how many dispatch groups were encountered so far). */
34998 force_new_group (int sched_verbose
, FILE *dump
, rtx
*group_insns
,
34999 rtx_insn
*next_insn
, bool *group_end
, int can_issue_more
,
35004 int issue_rate
= rs6000_issue_rate ();
35005 bool end
= *group_end
;
35008 if (next_insn
== NULL_RTX
|| DEBUG_INSN_P (next_insn
))
35009 return can_issue_more
;
35011 if (rs6000_sched_insert_nops
> sched_finish_regroup_exact
)
35012 return can_issue_more
;
35014 force
= is_costly_group (group_insns
, next_insn
);
35016 return can_issue_more
;
35018 if (sched_verbose
> 6)
35019 fprintf (dump
,"force: group count = %d, can_issue_more = %d\n",
35020 *group_count
,can_issue_more
);
35022 if (rs6000_sched_insert_nops
== sched_finish_regroup_exact
)
35025 can_issue_more
= 0;
35027 /* Since only a branch can be issued in the last issue_slot, it is
35028 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
35029 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
35030 in this case the last nop will start a new group and the branch
35031 will be forced to the new group. */
35032 if (can_issue_more
&& !is_branch_slot_insn (next_insn
))
35035 /* Do we have a special group ending nop? */
35036 if (rs6000_cpu_attr
== CPU_POWER6
|| rs6000_cpu_attr
== CPU_POWER7
35037 || rs6000_cpu_attr
== CPU_POWER8
)
35039 nop
= gen_group_ending_nop ();
35040 emit_insn_before (nop
, next_insn
);
35041 can_issue_more
= 0;
35044 while (can_issue_more
> 0)
35047 emit_insn_before (nop
, next_insn
);
35055 if (rs6000_sched_insert_nops
< sched_finish_regroup_exact
)
35057 int n_nops
= rs6000_sched_insert_nops
;
35059 /* Nops can't be issued from the branch slot, so the effective
35060 issue_rate for nops is 'issue_rate - 1'. */
35061 if (can_issue_more
== 0)
35062 can_issue_more
= issue_rate
;
35064 if (can_issue_more
== 0)
35066 can_issue_more
= issue_rate
- 1;
35069 for (i
= 0; i
< issue_rate
; i
++)
35071 group_insns
[i
] = 0;
35078 emit_insn_before (nop
, next_insn
);
35079 if (can_issue_more
== issue_rate
- 1) /* new group begins */
35082 if (can_issue_more
== 0)
35084 can_issue_more
= issue_rate
- 1;
35087 for (i
= 0; i
< issue_rate
; i
++)
35089 group_insns
[i
] = 0;
35095 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
35098 /* Is next_insn going to start a new group? */
35101 || (can_issue_more
== 1 && !is_branch_slot_insn (next_insn
))
35102 || (can_issue_more
<= 2 && is_cracked_insn (next_insn
))
35103 || (can_issue_more
< issue_rate
&&
35104 insn_terminates_group_p (next_insn
, previous_group
)));
35105 if (*group_end
&& end
)
35108 if (sched_verbose
> 6)
35109 fprintf (dump
, "done force: group count = %d, can_issue_more = %d\n",
35110 *group_count
, can_issue_more
);
35111 return can_issue_more
;
35114 return can_issue_more
;
35117 /* This function tries to synch the dispatch groups that the compiler "sees"
35118 with the dispatch groups that the processor dispatcher is expected to
35119 form in practice. It tries to achieve this synchronization by forcing the
35120 estimated processor grouping on the compiler (as opposed to the function
35121 'pad_goups' which tries to force the scheduler's grouping on the processor).
35123 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
35124 examines the (estimated) dispatch groups that will be formed by the processor
35125 dispatcher. It marks these group boundaries to reflect the estimated
35126 processor grouping, overriding the grouping that the scheduler had marked.
35127 Depending on the value of the flag '-minsert-sched-nops' this function can
35128 force certain insns into separate groups or force a certain distance between
35129 them by inserting nops, for example, if there exists a "costly dependence"
35132 The function estimates the group boundaries that the processor will form as
35133 follows: It keeps track of how many vacant issue slots are available after
35134 each insn. A subsequent insn will start a new group if one of the following
35136 - no more vacant issue slots remain in the current dispatch group.
35137 - only the last issue slot, which is the branch slot, is vacant, but the next
35138 insn is not a branch.
35139 - only the last 2 or less issue slots, including the branch slot, are vacant,
35140 which means that a cracked insn (which occupies two issue slots) can't be
35141 issued in this group.
35142 - less than 'issue_rate' slots are vacant, and the next insn always needs to
35143 start a new group. */
35146 redefine_groups (FILE *dump
, int sched_verbose
, rtx_insn
*prev_head_insn
,
35149 rtx_insn
*insn
, *next_insn
;
35151 int can_issue_more
;
35154 int group_count
= 0;
35158 issue_rate
= rs6000_issue_rate ();
35159 group_insns
= XALLOCAVEC (rtx
, issue_rate
);
35160 for (i
= 0; i
< issue_rate
; i
++)
35162 group_insns
[i
] = 0;
35164 can_issue_more
= issue_rate
;
35166 insn
= get_next_active_insn (prev_head_insn
, tail
);
35169 while (insn
!= NULL_RTX
)
35171 slot
= (issue_rate
- can_issue_more
);
35172 group_insns
[slot
] = insn
;
35174 rs6000_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
);
35175 if (insn_terminates_group_p (insn
, current_group
))
35176 can_issue_more
= 0;
35178 next_insn
= get_next_active_insn (insn
, tail
);
35179 if (next_insn
== NULL_RTX
)
35180 return group_count
+ 1;
35182 /* Is next_insn going to start a new group? */
35184 = (can_issue_more
== 0
35185 || (can_issue_more
== 1 && !is_branch_slot_insn (next_insn
))
35186 || (can_issue_more
<= 2 && is_cracked_insn (next_insn
))
35187 || (can_issue_more
< issue_rate
&&
35188 insn_terminates_group_p (next_insn
, previous_group
)));
35190 can_issue_more
= force_new_group (sched_verbose
, dump
, group_insns
,
35191 next_insn
, &group_end
, can_issue_more
,
35197 can_issue_more
= 0;
35198 for (i
= 0; i
< issue_rate
; i
++)
35200 group_insns
[i
] = 0;
35204 if (GET_MODE (next_insn
) == TImode
&& can_issue_more
)
35205 PUT_MODE (next_insn
, VOIDmode
);
35206 else if (!can_issue_more
&& GET_MODE (next_insn
) != TImode
)
35207 PUT_MODE (next_insn
, TImode
);
35210 if (can_issue_more
== 0)
35211 can_issue_more
= issue_rate
;
35214 return group_count
;
35217 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
35218 dispatch group boundaries that the scheduler had marked. Pad with nops
35219 any dispatch groups which have vacant issue slots, in order to force the
35220 scheduler's grouping on the processor dispatcher. The function
35221 returns the number of dispatch groups found. */
35224 pad_groups (FILE *dump
, int sched_verbose
, rtx_insn
*prev_head_insn
,
35227 rtx_insn
*insn
, *next_insn
;
35230 int can_issue_more
;
35232 int group_count
= 0;
35234 /* Initialize issue_rate. */
35235 issue_rate
= rs6000_issue_rate ();
35236 can_issue_more
= issue_rate
;
35238 insn
= get_next_active_insn (prev_head_insn
, tail
);
35239 next_insn
= get_next_active_insn (insn
, tail
);
35241 while (insn
!= NULL_RTX
)
35244 rs6000_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
);
35246 group_end
= (next_insn
== NULL_RTX
|| GET_MODE (next_insn
) == TImode
);
35248 if (next_insn
== NULL_RTX
)
35253 /* If the scheduler had marked group termination at this location
35254 (between insn and next_insn), and neither insn nor next_insn will
35255 force group termination, pad the group with nops to force group
35258 && (rs6000_sched_insert_nops
== sched_finish_pad_groups
)
35259 && !insn_terminates_group_p (insn
, current_group
)
35260 && !insn_terminates_group_p (next_insn
, previous_group
))
35262 if (!is_branch_slot_insn (next_insn
))
35265 while (can_issue_more
)
35268 emit_insn_before (nop
, next_insn
);
35273 can_issue_more
= issue_rate
;
35278 next_insn
= get_next_active_insn (insn
, tail
);
35281 return group_count
;
35284 /* We're beginning a new block. Initialize data structures as necessary. */
35287 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED
,
35288 int sched_verbose ATTRIBUTE_UNUSED
,
35289 int max_ready ATTRIBUTE_UNUSED
)
35291 last_scheduled_insn
= NULL
;
35292 load_store_pendulum
= 0;
35297 /* The following function is called at the end of scheduling BB.
35298 After reload, it inserts nops at insn group bundling. */
35301 rs6000_sched_finish (FILE *dump
, int sched_verbose
)
35306 fprintf (dump
, "=== Finishing schedule.\n");
35308 if (reload_completed
&& rs6000_sched_groups
)
35310 /* Do not run sched_finish hook when selective scheduling enabled. */
35311 if (sel_sched_p ())
35314 if (rs6000_sched_insert_nops
== sched_finish_none
)
35317 if (rs6000_sched_insert_nops
== sched_finish_pad_groups
)
35318 n_groups
= pad_groups (dump
, sched_verbose
,
35319 current_sched_info
->prev_head
,
35320 current_sched_info
->next_tail
);
35322 n_groups
= redefine_groups (dump
, sched_verbose
,
35323 current_sched_info
->prev_head
,
35324 current_sched_info
->next_tail
);
35326 if (sched_verbose
>= 6)
35328 fprintf (dump
, "ngroups = %d\n", n_groups
);
35329 print_rtl (dump
, current_sched_info
->prev_head
);
35330 fprintf (dump
, "Done finish_sched\n");
35335 struct rs6000_sched_context
35337 short cached_can_issue_more
;
35338 rtx_insn
*last_scheduled_insn
;
35339 int load_store_pendulum
;
35344 typedef struct rs6000_sched_context rs6000_sched_context_def
;
35345 typedef rs6000_sched_context_def
*rs6000_sched_context_t
;
35347 /* Allocate store for new scheduling context. */
35349 rs6000_alloc_sched_context (void)
35351 return xmalloc (sizeof (rs6000_sched_context_def
));
35354 /* If CLEAN_P is true then initializes _SC with clean data,
35355 and from the global context otherwise. */
35357 rs6000_init_sched_context (void *_sc
, bool clean_p
)
35359 rs6000_sched_context_t sc
= (rs6000_sched_context_t
) _sc
;
35363 sc
->cached_can_issue_more
= 0;
35364 sc
->last_scheduled_insn
= NULL
;
35365 sc
->load_store_pendulum
= 0;
35366 sc
->divide_cnt
= 0;
35367 sc
->vec_pairing
= 0;
35371 sc
->cached_can_issue_more
= cached_can_issue_more
;
35372 sc
->last_scheduled_insn
= last_scheduled_insn
;
35373 sc
->load_store_pendulum
= load_store_pendulum
;
35374 sc
->divide_cnt
= divide_cnt
;
35375 sc
->vec_pairing
= vec_pairing
;
35379 /* Sets the global scheduling context to the one pointed to by _SC. */
35381 rs6000_set_sched_context (void *_sc
)
35383 rs6000_sched_context_t sc
= (rs6000_sched_context_t
) _sc
;
35385 gcc_assert (sc
!= NULL
);
35387 cached_can_issue_more
= sc
->cached_can_issue_more
;
35388 last_scheduled_insn
= sc
->last_scheduled_insn
;
35389 load_store_pendulum
= sc
->load_store_pendulum
;
35390 divide_cnt
= sc
->divide_cnt
;
35391 vec_pairing
= sc
->vec_pairing
;
35396 rs6000_free_sched_context (void *_sc
)
35398 gcc_assert (_sc
!= NULL
);
35404 rs6000_sched_can_speculate_insn (rtx_insn
*insn
)
35406 switch (get_attr_type (insn
))
35421 /* Length in units of the trampoline for entering a nested function. */
35424 rs6000_trampoline_size (void)
35428 switch (DEFAULT_ABI
)
35431 gcc_unreachable ();
35434 ret
= (TARGET_32BIT
) ? 12 : 24;
35438 gcc_assert (!TARGET_32BIT
);
35444 ret
= (TARGET_32BIT
) ? 40 : 48;
35451 /* Emit RTL insns to initialize the variable parts of a trampoline.
35452 FNADDR is an RTX for the address of the function's pure code.
35453 CXT is an RTX for the static chain value for the function. */
35456 rs6000_trampoline_init (rtx m_tramp
, tree fndecl
, rtx cxt
)
35458 int regsize
= (TARGET_32BIT
) ? 4 : 8;
35459 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
35460 rtx ctx_reg
= force_reg (Pmode
, cxt
);
35461 rtx addr
= force_reg (Pmode
, XEXP (m_tramp
, 0));
35463 switch (DEFAULT_ABI
)
35466 gcc_unreachable ();
35468 /* Under AIX, just build the 3 word function descriptor */
35471 rtx fnmem
, fn_reg
, toc_reg
;
35473 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS
)
35474 error ("You cannot take the address of a nested function if you use "
35475 "the -mno-pointers-to-nested-functions option.");
35477 fnmem
= gen_const_mem (Pmode
, force_reg (Pmode
, fnaddr
));
35478 fn_reg
= gen_reg_rtx (Pmode
);
35479 toc_reg
= gen_reg_rtx (Pmode
);
35481 /* Macro to shorten the code expansions below. */
35482 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
35484 m_tramp
= replace_equiv_address (m_tramp
, addr
);
35486 emit_move_insn (fn_reg
, MEM_PLUS (fnmem
, 0));
35487 emit_move_insn (toc_reg
, MEM_PLUS (fnmem
, regsize
));
35488 emit_move_insn (MEM_PLUS (m_tramp
, 0), fn_reg
);
35489 emit_move_insn (MEM_PLUS (m_tramp
, regsize
), toc_reg
);
35490 emit_move_insn (MEM_PLUS (m_tramp
, 2*regsize
), ctx_reg
);
35496 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
35500 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__trampoline_setup"),
35501 LCT_NORMAL
, VOIDmode
,
35503 GEN_INT (rs6000_trampoline_size ()), SImode
,
35511 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
35512 identifier as an argument, so the front end shouldn't look it up. */
35515 rs6000_attribute_takes_identifier_p (const_tree attr_id
)
35517 return is_attribute_p ("altivec", attr_id
);
35520 /* Handle the "altivec" attribute. The attribute may have
35521 arguments as follows:
35523 __attribute__((altivec(vector__)))
35524 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
35525 __attribute__((altivec(bool__))) (always followed by 'unsigned')
35527 and may appear more than once (e.g., 'vector bool char') in a
35528 given declaration. */
35531 rs6000_handle_altivec_attribute (tree
*node
,
35532 tree name ATTRIBUTE_UNUSED
,
35534 int flags ATTRIBUTE_UNUSED
,
35535 bool *no_add_attrs
)
35537 tree type
= *node
, result
= NULL_TREE
;
35541 = ((args
&& TREE_CODE (args
) == TREE_LIST
&& TREE_VALUE (args
)
35542 && TREE_CODE (TREE_VALUE (args
)) == IDENTIFIER_NODE
)
35543 ? *IDENTIFIER_POINTER (TREE_VALUE (args
))
35546 while (POINTER_TYPE_P (type
)
35547 || TREE_CODE (type
) == FUNCTION_TYPE
35548 || TREE_CODE (type
) == METHOD_TYPE
35549 || TREE_CODE (type
) == ARRAY_TYPE
)
35550 type
= TREE_TYPE (type
);
35552 mode
= TYPE_MODE (type
);
35554 /* Check for invalid AltiVec type qualifiers. */
35555 if (type
== long_double_type_node
)
35556 error ("use of %<long double%> in AltiVec types is invalid");
35557 else if (type
== boolean_type_node
)
35558 error ("use of boolean types in AltiVec types is invalid");
35559 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
35560 error ("use of %<complex%> in AltiVec types is invalid");
35561 else if (DECIMAL_FLOAT_MODE_P (mode
))
35562 error ("use of decimal floating point types in AltiVec types is invalid");
35563 else if (!TARGET_VSX
)
35565 if (type
== long_unsigned_type_node
|| type
== long_integer_type_node
)
35568 error ("use of %<long%> in AltiVec types is invalid for "
35569 "64-bit code without -mvsx");
35570 else if (rs6000_warn_altivec_long
)
35571 warning (0, "use of %<long%> in AltiVec types is deprecated; "
35574 else if (type
== long_long_unsigned_type_node
35575 || type
== long_long_integer_type_node
)
35576 error ("use of %<long long%> in AltiVec types is invalid without "
35578 else if (type
== double_type_node
)
35579 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
35582 switch (altivec_type
)
35585 unsigned_p
= TYPE_UNSIGNED (type
);
35589 result
= (unsigned_p
? unsigned_V1TI_type_node
: V1TI_type_node
);
35592 result
= (unsigned_p
? unsigned_V2DI_type_node
: V2DI_type_node
);
35595 result
= (unsigned_p
? unsigned_V4SI_type_node
: V4SI_type_node
);
35598 result
= (unsigned_p
? unsigned_V8HI_type_node
: V8HI_type_node
);
35601 result
= (unsigned_p
? unsigned_V16QI_type_node
: V16QI_type_node
);
35603 case E_SFmode
: result
= V4SF_type_node
; break;
35604 case E_DFmode
: result
= V2DF_type_node
; break;
35605 /* If the user says 'vector int bool', we may be handed the 'bool'
35606 attribute _before_ the 'vector' attribute, and so select the
35607 proper type in the 'b' case below. */
35608 case E_V4SImode
: case E_V8HImode
: case E_V16QImode
: case E_V4SFmode
:
35609 case E_V2DImode
: case E_V2DFmode
:
35617 case E_DImode
: case E_V2DImode
: result
= bool_V2DI_type_node
; break;
35618 case E_SImode
: case E_V4SImode
: result
= bool_V4SI_type_node
; break;
35619 case E_HImode
: case E_V8HImode
: result
= bool_V8HI_type_node
; break;
35620 case E_QImode
: case E_V16QImode
: result
= bool_V16QI_type_node
;
35627 case E_V8HImode
: result
= pixel_V8HI_type_node
;
35633 /* Propagate qualifiers attached to the element type
35634 onto the vector type. */
35635 if (result
&& result
!= type
&& TYPE_QUALS (type
))
35636 result
= build_qualified_type (result
, TYPE_QUALS (type
));
35638 *no_add_attrs
= true; /* No need to hang on to the attribute. */
35641 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
35646 /* AltiVec defines four built-in scalar types that serve as vector
35647 elements; we must teach the compiler how to mangle them. */
35649 static const char *
35650 rs6000_mangle_type (const_tree type
)
35652 type
= TYPE_MAIN_VARIANT (type
);
35654 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
35655 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
35658 if (type
== bool_char_type_node
) return "U6__boolc";
35659 if (type
== bool_short_type_node
) return "U6__bools";
35660 if (type
== pixel_type_node
) return "u7__pixel";
35661 if (type
== bool_int_type_node
) return "U6__booli";
35662 if (type
== bool_long_type_node
) return "U6__booll";
35664 /* Use a unique name for __float128 rather than trying to use "e" or "g". Use
35665 "g" for IBM extended double, no matter whether it is long double (using
35666 -mabi=ibmlongdouble) or the distinct __ibm128 type. */
35667 if (TARGET_FLOAT128_TYPE
)
35669 if (type
== ieee128_float_type_node
)
35670 return "U10__float128";
35672 if (type
== ibm128_float_type_node
)
35675 if (type
== long_double_type_node
&& TARGET_LONG_DOUBLE_128
)
35676 return (TARGET_IEEEQUAD
) ? "U10__float128" : "g";
35679 /* Mangle IBM extended float long double as `g' (__float128) on
35680 powerpc*-linux where long-double-64 previously was the default. */
35681 if (TYPE_MAIN_VARIANT (type
) == long_double_type_node
35683 && TARGET_LONG_DOUBLE_128
35684 && !TARGET_IEEEQUAD
)
35687 /* For all other types, use normal C++ mangling. */
35691 /* Handle a "longcall" or "shortcall" attribute; arguments as in
35692 struct attribute_spec.handler. */
35695 rs6000_handle_longcall_attribute (tree
*node
, tree name
,
35696 tree args ATTRIBUTE_UNUSED
,
35697 int flags ATTRIBUTE_UNUSED
,
35698 bool *no_add_attrs
)
35700 if (TREE_CODE (*node
) != FUNCTION_TYPE
35701 && TREE_CODE (*node
) != FIELD_DECL
35702 && TREE_CODE (*node
) != TYPE_DECL
)
35704 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
35706 *no_add_attrs
= true;
35712 /* Set longcall attributes on all functions declared when
35713 rs6000_default_long_calls is true. */
35715 rs6000_set_default_type_attributes (tree type
)
35717 if (rs6000_default_long_calls
35718 && (TREE_CODE (type
) == FUNCTION_TYPE
35719 || TREE_CODE (type
) == METHOD_TYPE
))
35720 TYPE_ATTRIBUTES (type
) = tree_cons (get_identifier ("longcall"),
35722 TYPE_ATTRIBUTES (type
));
35725 darwin_set_default_type_attributes (type
);
35729 /* Return a reference suitable for calling a function with the
35730 longcall attribute. */
35733 rs6000_longcall_ref (rtx call_ref
)
35735 const char *call_name
;
35738 if (GET_CODE (call_ref
) != SYMBOL_REF
)
35741 /* System V adds '.' to the internal name, so skip them. */
35742 call_name
= XSTR (call_ref
, 0);
35743 if (*call_name
== '.')
35745 while (*call_name
== '.')
35748 node
= get_identifier (call_name
);
35749 call_ref
= gen_rtx_SYMBOL_REF (VOIDmode
, IDENTIFIER_POINTER (node
));
35752 return force_reg (Pmode
, call_ref
);
35755 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
35756 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
35759 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
35760 struct attribute_spec.handler. */
35762 rs6000_handle_struct_attribute (tree
*node
, tree name
,
35763 tree args ATTRIBUTE_UNUSED
,
35764 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
35767 if (DECL_P (*node
))
35769 if (TREE_CODE (*node
) == TYPE_DECL
)
35770 type
= &TREE_TYPE (*node
);
35775 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
35776 || TREE_CODE (*type
) == UNION_TYPE
)))
35778 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
35779 *no_add_attrs
= true;
35782 else if ((is_attribute_p ("ms_struct", name
)
35783 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
35784 || ((is_attribute_p ("gcc_struct", name
)
35785 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
35787 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
35789 *no_add_attrs
= true;
35796 rs6000_ms_bitfield_layout_p (const_tree record_type
)
35798 return (TARGET_USE_MS_BITFIELD_LAYOUT
&&
35799 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
35800 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
35803 #ifdef USING_ELFOS_H
35805 /* A get_unnamed_section callback, used for switching to toc_section. */
35808 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED
)
35810 if ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
35811 && TARGET_MINIMAL_TOC
)
35813 if (!toc_initialized
)
35815 fprintf (asm_out_file
, "%s\n", TOC_SECTION_ASM_OP
);
35816 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
35817 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "LCTOC", 0);
35818 fprintf (asm_out_file
, "\t.tc ");
35819 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1[TC],");
35820 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
35821 fprintf (asm_out_file
, "\n");
35823 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
35824 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
35825 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
35826 fprintf (asm_out_file
, " = .+32768\n");
35827 toc_initialized
= 1;
35830 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
35832 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
35834 fprintf (asm_out_file
, "%s\n", TOC_SECTION_ASM_OP
);
35835 if (!toc_initialized
)
35837 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
35838 toc_initialized
= 1;
35843 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
35844 if (!toc_initialized
)
35846 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
35847 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
35848 fprintf (asm_out_file
, " = .+32768\n");
35849 toc_initialized
= 1;
35854 /* Implement TARGET_ASM_INIT_SECTIONS. */
35857 rs6000_elf_asm_init_sections (void)
35860 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op
, NULL
);
35863 = get_unnamed_section (SECTION_WRITE
, output_section_asm_op
,
35864 SDATA2_SECTION_ASM_OP
);
35867 /* Implement TARGET_SELECT_RTX_SECTION. */
35870 rs6000_elf_select_rtx_section (machine_mode mode
, rtx x
,
35871 unsigned HOST_WIDE_INT align
)
35873 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
))
35874 return toc_section
;
35876 return default_elf_select_rtx_section (mode
, x
, align
);
35879 /* For a SYMBOL_REF, set generic flags and then perform some
35880 target-specific processing.
35882 When the AIX ABI is requested on a non-AIX system, replace the
35883 function name with the real name (with a leading .) rather than the
35884 function descriptor name. This saves a lot of overriding code to
35885 read the prefixes. */
35887 static void rs6000_elf_encode_section_info (tree
, rtx
, int) ATTRIBUTE_UNUSED
;
35889 rs6000_elf_encode_section_info (tree decl
, rtx rtl
, int first
)
35891 default_encode_section_info (decl
, rtl
, first
);
35894 && TREE_CODE (decl
) == FUNCTION_DECL
35896 && DEFAULT_ABI
== ABI_AIX
)
35898 rtx sym_ref
= XEXP (rtl
, 0);
35899 size_t len
= strlen (XSTR (sym_ref
, 0));
35900 char *str
= XALLOCAVEC (char, len
+ 2);
35902 memcpy (str
+ 1, XSTR (sym_ref
, 0), len
+ 1);
35903 XSTR (sym_ref
, 0) = ggc_alloc_string (str
, len
+ 1);
35908 compare_section_name (const char *section
, const char *templ
)
35912 len
= strlen (templ
);
35913 return (strncmp (section
, templ
, len
) == 0
35914 && (section
[len
] == 0 || section
[len
] == '.'));
35918 rs6000_elf_in_small_data_p (const_tree decl
)
35920 if (rs6000_sdata
== SDATA_NONE
)
35923 /* We want to merge strings, so we never consider them small data. */
35924 if (TREE_CODE (decl
) == STRING_CST
)
35927 /* Functions are never in the small data area. */
35928 if (TREE_CODE (decl
) == FUNCTION_DECL
)
35931 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_SECTION_NAME (decl
))
35933 const char *section
= DECL_SECTION_NAME (decl
);
35934 if (compare_section_name (section
, ".sdata")
35935 || compare_section_name (section
, ".sdata2")
35936 || compare_section_name (section
, ".gnu.linkonce.s")
35937 || compare_section_name (section
, ".sbss")
35938 || compare_section_name (section
, ".sbss2")
35939 || compare_section_name (section
, ".gnu.linkonce.sb")
35940 || strcmp (section
, ".PPC.EMB.sdata0") == 0
35941 || strcmp (section
, ".PPC.EMB.sbss0") == 0)
35946 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (decl
));
35949 && size
<= g_switch_value
35950 /* If it's not public, and we're not going to reference it there,
35951 there's no need to put it in the small data section. */
35952 && (rs6000_sdata
!= SDATA_DATA
|| TREE_PUBLIC (decl
)))
35959 #endif /* USING_ELFOS_H */
35961 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
35964 rs6000_use_blocks_for_constant_p (machine_mode mode
, const_rtx x
)
35966 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
);
35969 /* Do not place thread-local symbols refs in the object blocks. */
35972 rs6000_use_blocks_for_decl_p (const_tree decl
)
35974 return !DECL_THREAD_LOCAL_P (decl
);
35977 /* Return a REG that occurs in ADDR with coefficient 1.
35978 ADDR can be effectively incremented by incrementing REG.
35980 r0 is special and we must not select it as an address
35981 register by this routine since our caller will try to
35982 increment the returned register via an "la" instruction. */
35985 find_addr_reg (rtx addr
)
35987 while (GET_CODE (addr
) == PLUS
)
35989 if (GET_CODE (XEXP (addr
, 0)) == REG
35990 && REGNO (XEXP (addr
, 0)) != 0)
35991 addr
= XEXP (addr
, 0);
35992 else if (GET_CODE (XEXP (addr
, 1)) == REG
35993 && REGNO (XEXP (addr
, 1)) != 0)
35994 addr
= XEXP (addr
, 1);
35995 else if (CONSTANT_P (XEXP (addr
, 0)))
35996 addr
= XEXP (addr
, 1);
35997 else if (CONSTANT_P (XEXP (addr
, 1)))
35998 addr
= XEXP (addr
, 0);
36000 gcc_unreachable ();
36002 gcc_assert (GET_CODE (addr
) == REG
&& REGNO (addr
) != 0);
36007 rs6000_fatal_bad_address (rtx op
)
36009 fatal_insn ("bad address", op
);
36014 typedef struct branch_island_d
{
36015 tree function_name
;
36021 static vec
<branch_island
, va_gc
> *branch_islands
;
36023 /* Remember to generate a branch island for far calls to the given
36027 add_compiler_branch_island (tree label_name
, tree function_name
,
36030 branch_island bi
= {function_name
, label_name
, line_number
};
36031 vec_safe_push (branch_islands
, bi
);
36034 /* Generate far-jump branch islands for everything recorded in
36035 branch_islands. Invoked immediately after the last instruction of
36036 the epilogue has been emitted; the branch islands must be appended
36037 to, and contiguous with, the function body. Mach-O stubs are
36038 generated in machopic_output_stub(). */
36041 macho_branch_islands (void)
36045 while (!vec_safe_is_empty (branch_islands
))
36047 branch_island
*bi
= &branch_islands
->last ();
36048 const char *label
= IDENTIFIER_POINTER (bi
->label_name
);
36049 const char *name
= IDENTIFIER_POINTER (bi
->function_name
);
36050 char name_buf
[512];
36051 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
36052 if (name
[0] == '*' || name
[0] == '&')
36053 strcpy (name_buf
, name
+1);
36057 strcpy (name_buf
+1, name
);
36059 strcpy (tmp_buf
, "\n");
36060 strcat (tmp_buf
, label
);
36061 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
36062 if (write_symbols
== DBX_DEBUG
|| write_symbols
== XCOFF_DEBUG
)
36063 dbxout_stabd (N_SLINE
, bi
->line_number
);
36064 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
36067 if (TARGET_LINK_STACK
)
36070 get_ppc476_thunk_name (name
);
36071 strcat (tmp_buf
, ":\n\tmflr r0\n\tbl ");
36072 strcat (tmp_buf
, name
);
36073 strcat (tmp_buf
, "\n");
36074 strcat (tmp_buf
, label
);
36075 strcat (tmp_buf
, "_pic:\n\tmflr r11\n");
36079 strcat (tmp_buf
, ":\n\tmflr r0\n\tbcl 20,31,");
36080 strcat (tmp_buf
, label
);
36081 strcat (tmp_buf
, "_pic\n");
36082 strcat (tmp_buf
, label
);
36083 strcat (tmp_buf
, "_pic:\n\tmflr r11\n");
36086 strcat (tmp_buf
, "\taddis r11,r11,ha16(");
36087 strcat (tmp_buf
, name_buf
);
36088 strcat (tmp_buf
, " - ");
36089 strcat (tmp_buf
, label
);
36090 strcat (tmp_buf
, "_pic)\n");
36092 strcat (tmp_buf
, "\tmtlr r0\n");
36094 strcat (tmp_buf
, "\taddi r12,r11,lo16(");
36095 strcat (tmp_buf
, name_buf
);
36096 strcat (tmp_buf
, " - ");
36097 strcat (tmp_buf
, label
);
36098 strcat (tmp_buf
, "_pic)\n");
36100 strcat (tmp_buf
, "\tmtctr r12\n\tbctr\n");
36104 strcat (tmp_buf
, ":\nlis r12,hi16(");
36105 strcat (tmp_buf
, name_buf
);
36106 strcat (tmp_buf
, ")\n\tori r12,r12,lo16(");
36107 strcat (tmp_buf
, name_buf
);
36108 strcat (tmp_buf
, ")\n\tmtctr r12\n\tbctr");
36110 output_asm_insn (tmp_buf
, 0);
36111 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
36112 if (write_symbols
== DBX_DEBUG
|| write_symbols
== XCOFF_DEBUG
)
36113 dbxout_stabd (N_SLINE
, bi
->line_number
);
36114 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
36115 branch_islands
->pop ();
36119 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
36120 already there or not. */
36123 no_previous_def (tree function_name
)
36128 FOR_EACH_VEC_SAFE_ELT (branch_islands
, ix
, bi
)
36129 if (function_name
== bi
->function_name
)
36134 /* GET_PREV_LABEL gets the label name from the previous definition of
36138 get_prev_label (tree function_name
)
36143 FOR_EACH_VEC_SAFE_ELT (branch_islands
, ix
, bi
)
36144 if (function_name
== bi
->function_name
)
36145 return bi
->label_name
;
36149 /* INSN is either a function call or a millicode call. It may have an
36150 unconditional jump in its delay slot.
36152 CALL_DEST is the routine we are calling. */
36155 output_call (rtx_insn
*insn
, rtx
*operands
, int dest_operand_number
,
36156 int cookie_operand_number
)
36158 static char buf
[256];
36159 if (darwin_emit_branch_islands
36160 && GET_CODE (operands
[dest_operand_number
]) == SYMBOL_REF
36161 && (INTVAL (operands
[cookie_operand_number
]) & CALL_LONG
))
36164 tree funname
= get_identifier (XSTR (operands
[dest_operand_number
], 0));
36166 if (no_previous_def (funname
))
36168 rtx label_rtx
= gen_label_rtx ();
36169 char *label_buf
, temp_buf
[256];
36170 ASM_GENERATE_INTERNAL_LABEL (temp_buf
, "L",
36171 CODE_LABEL_NUMBER (label_rtx
));
36172 label_buf
= temp_buf
[0] == '*' ? temp_buf
+ 1 : temp_buf
;
36173 labelname
= get_identifier (label_buf
);
36174 add_compiler_branch_island (labelname
, funname
, insn_line (insn
));
36177 labelname
= get_prev_label (funname
);
36179 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
36180 instruction will reach 'foo', otherwise link as 'bl L42'".
36181 "L42" should be a 'branch island', that will do a far jump to
36182 'foo'. Branch islands are generated in
36183 macho_branch_islands(). */
36184 sprintf (buf
, "jbsr %%z%d,%.246s",
36185 dest_operand_number
, IDENTIFIER_POINTER (labelname
));
36188 sprintf (buf
, "bl %%z%d", dest_operand_number
);
36192 /* Generate PIC and indirect symbol stubs. */
36195 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
36197 unsigned int length
;
36198 char *symbol_name
, *lazy_ptr_name
;
36199 char *local_label_0
;
36200 static int label
= 0;
36202 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
36203 symb
= (*targetm
.strip_name_encoding
) (symb
);
36206 length
= strlen (symb
);
36207 symbol_name
= XALLOCAVEC (char, length
+ 32);
36208 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
36210 lazy_ptr_name
= XALLOCAVEC (char, length
+ 32);
36211 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name
, symb
, length
);
36214 switch_to_section (darwin_sections
[machopic_picsymbol_stub1_section
]);
36216 switch_to_section (darwin_sections
[machopic_symbol_stub1_section
]);
36220 fprintf (file
, "\t.align 5\n");
36222 fprintf (file
, "%s:\n", stub
);
36223 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
36226 local_label_0
= XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
36227 sprintf (local_label_0
, "\"L%011d$spb\"", label
);
36229 fprintf (file
, "\tmflr r0\n");
36230 if (TARGET_LINK_STACK
)
36233 get_ppc476_thunk_name (name
);
36234 fprintf (file
, "\tbl %s\n", name
);
36235 fprintf (file
, "%s:\n\tmflr r11\n", local_label_0
);
36239 fprintf (file
, "\tbcl 20,31,%s\n", local_label_0
);
36240 fprintf (file
, "%s:\n\tmflr r11\n", local_label_0
);
36242 fprintf (file
, "\taddis r11,r11,ha16(%s-%s)\n",
36243 lazy_ptr_name
, local_label_0
);
36244 fprintf (file
, "\tmtlr r0\n");
36245 fprintf (file
, "\t%s r12,lo16(%s-%s)(r11)\n",
36246 (TARGET_64BIT
? "ldu" : "lwzu"),
36247 lazy_ptr_name
, local_label_0
);
36248 fprintf (file
, "\tmtctr r12\n");
36249 fprintf (file
, "\tbctr\n");
36253 fprintf (file
, "\t.align 4\n");
36255 fprintf (file
, "%s:\n", stub
);
36256 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
36258 fprintf (file
, "\tlis r11,ha16(%s)\n", lazy_ptr_name
);
36259 fprintf (file
, "\t%s r12,lo16(%s)(r11)\n",
36260 (TARGET_64BIT
? "ldu" : "lwzu"),
36262 fprintf (file
, "\tmtctr r12\n");
36263 fprintf (file
, "\tbctr\n");
36266 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
36267 fprintf (file
, "%s:\n", lazy_ptr_name
);
36268 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
36269 fprintf (file
, "%sdyld_stub_binding_helper\n",
36270 (TARGET_64BIT
? DOUBLE_INT_ASM_OP
: "\t.long\t"));
36273 /* Legitimize PIC addresses. If the address is already
36274 position-independent, we return ORIG. Newly generated
36275 position-independent addresses go into a reg. This is REG if non
36276 zero, otherwise we allocate register(s) as necessary. */
36278 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
36281 rs6000_machopic_legitimize_pic_address (rtx orig
, machine_mode mode
,
36286 if (reg
== NULL
&& ! reload_in_progress
&& ! reload_completed
)
36287 reg
= gen_reg_rtx (Pmode
);
36289 if (GET_CODE (orig
) == CONST
)
36293 if (GET_CODE (XEXP (orig
, 0)) == PLUS
36294 && XEXP (XEXP (orig
, 0), 0) == pic_offset_table_rtx
)
36297 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
36299 /* Use a different reg for the intermediate value, as
36300 it will be marked UNCHANGING. */
36301 reg_temp
= !can_create_pseudo_p () ? reg
: gen_reg_rtx (Pmode
);
36302 base
= rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig
, 0), 0),
36305 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig
, 0), 1),
36308 if (GET_CODE (offset
) == CONST_INT
)
36310 if (SMALL_INT (offset
))
36311 return plus_constant (Pmode
, base
, INTVAL (offset
));
36312 else if (! reload_in_progress
&& ! reload_completed
)
36313 offset
= force_reg (Pmode
, offset
);
36316 rtx mem
= force_const_mem (Pmode
, orig
);
36317 return machopic_legitimize_pic_address (mem
, Pmode
, reg
);
36320 return gen_rtx_PLUS (Pmode
, base
, offset
);
36323 /* Fall back on generic machopic code. */
36324 return machopic_legitimize_pic_address (orig
, mode
, reg
);
36327 /* Output a .machine directive for the Darwin assembler, and call
36328 the generic start_file routine. */
36331 rs6000_darwin_file_start (void)
36333 static const struct
36337 HOST_WIDE_INT if_set
;
36339 { "ppc64", "ppc64", MASK_64BIT
},
36340 { "970", "ppc970", MASK_PPC_GPOPT
| MASK_MFCRF
| MASK_POWERPC64
},
36341 { "power4", "ppc970", 0 },
36342 { "G5", "ppc970", 0 },
36343 { "7450", "ppc7450", 0 },
36344 { "7400", "ppc7400", MASK_ALTIVEC
},
36345 { "G4", "ppc7400", 0 },
36346 { "750", "ppc750", 0 },
36347 { "740", "ppc750", 0 },
36348 { "G3", "ppc750", 0 },
36349 { "604e", "ppc604e", 0 },
36350 { "604", "ppc604", 0 },
36351 { "603e", "ppc603", 0 },
36352 { "603", "ppc603", 0 },
36353 { "601", "ppc601", 0 },
36354 { NULL
, "ppc", 0 } };
36355 const char *cpu_id
= "";
36358 rs6000_file_start ();
36359 darwin_file_start ();
36361 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
36363 if (rs6000_default_cpu
!= 0 && rs6000_default_cpu
[0] != '\0')
36364 cpu_id
= rs6000_default_cpu
;
36366 if (global_options_set
.x_rs6000_cpu_index
)
36367 cpu_id
= processor_target_table
[rs6000_cpu_index
].name
;
36369 /* Look through the mapping array. Pick the first name that either
36370 matches the argument, has a bit set in IF_SET that is also set
36371 in the target flags, or has a NULL name. */
36374 while (mapping
[i
].arg
!= NULL
36375 && strcmp (mapping
[i
].arg
, cpu_id
) != 0
36376 && (mapping
[i
].if_set
& rs6000_isa_flags
) == 0)
36379 fprintf (asm_out_file
, "\t.machine %s\n", mapping
[i
].name
);
36382 #endif /* TARGET_MACHO */
36386 rs6000_elf_reloc_rw_mask (void)
36390 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
36396 /* Record an element in the table of global constructors. SYMBOL is
36397 a SYMBOL_REF of the function to be called; PRIORITY is a number
36398 between 0 and MAX_INIT_PRIORITY.
36400 This differs from default_named_section_asm_out_constructor in
36401 that we have special handling for -mrelocatable. */
36403 static void rs6000_elf_asm_out_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
36405 rs6000_elf_asm_out_constructor (rtx symbol
, int priority
)
36407 const char *section
= ".ctors";
36410 if (priority
!= DEFAULT_INIT_PRIORITY
)
36412 sprintf (buf
, ".ctors.%.5u",
36413 /* Invert the numbering so the linker puts us in the proper
36414 order; constructors are run from right to left, and the
36415 linker sorts in increasing order. */
36416 MAX_INIT_PRIORITY
- priority
);
36420 switch_to_section (get_section (section
, SECTION_WRITE
, NULL
));
36421 assemble_align (POINTER_SIZE
);
36423 if (DEFAULT_ABI
== ABI_V4
36424 && (TARGET_RELOCATABLE
|| flag_pic
> 1))
36426 fputs ("\t.long (", asm_out_file
);
36427 output_addr_const (asm_out_file
, symbol
);
36428 fputs (")@fixup\n", asm_out_file
);
36431 assemble_integer (symbol
, POINTER_SIZE
/ BITS_PER_UNIT
, POINTER_SIZE
, 1);
36434 static void rs6000_elf_asm_out_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
36436 rs6000_elf_asm_out_destructor (rtx symbol
, int priority
)
36438 const char *section
= ".dtors";
36441 if (priority
!= DEFAULT_INIT_PRIORITY
)
36443 sprintf (buf
, ".dtors.%.5u",
36444 /* Invert the numbering so the linker puts us in the proper
36445 order; constructors are run from right to left, and the
36446 linker sorts in increasing order. */
36447 MAX_INIT_PRIORITY
- priority
);
36451 switch_to_section (get_section (section
, SECTION_WRITE
, NULL
));
36452 assemble_align (POINTER_SIZE
);
36454 if (DEFAULT_ABI
== ABI_V4
36455 && (TARGET_RELOCATABLE
|| flag_pic
> 1))
36457 fputs ("\t.long (", asm_out_file
);
36458 output_addr_const (asm_out_file
, symbol
);
36459 fputs (")@fixup\n", asm_out_file
);
36462 assemble_integer (symbol
, POINTER_SIZE
/ BITS_PER_UNIT
, POINTER_SIZE
, 1);
36466 rs6000_elf_declare_function_name (FILE *file
, const char *name
, tree decl
)
36468 if (TARGET_64BIT
&& DEFAULT_ABI
!= ABI_ELFv2
)
36470 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file
);
36471 ASM_OUTPUT_LABEL (file
, name
);
36472 fputs (DOUBLE_INT_ASM_OP
, file
);
36473 rs6000_output_function_entry (file
, name
);
36474 fputs (",.TOC.@tocbase,0\n\t.previous\n", file
);
36477 fputs ("\t.size\t", file
);
36478 assemble_name (file
, name
);
36479 fputs (",24\n\t.type\t.", file
);
36480 assemble_name (file
, name
);
36481 fputs (",@function\n", file
);
36482 if (TREE_PUBLIC (decl
) && ! DECL_WEAK (decl
))
36484 fputs ("\t.globl\t.", file
);
36485 assemble_name (file
, name
);
36490 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
36491 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
36492 rs6000_output_function_entry (file
, name
);
36493 fputs (":\n", file
);
36497 if (DEFAULT_ABI
== ABI_V4
36498 && (TARGET_RELOCATABLE
|| flag_pic
> 1)
36499 && !TARGET_SECURE_PLT
36500 && (!constant_pool_empty_p () || crtl
->profile
)
36505 (*targetm
.asm_out
.internal_label
) (file
, "LCL", rs6000_pic_labelno
);
36507 fprintf (file
, "\t.long ");
36508 assemble_name (file
, toc_label_name
);
36511 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
36512 assemble_name (file
, buf
);
36516 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
36517 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
36519 if (TARGET_CMODEL
== CMODEL_LARGE
&& rs6000_global_entry_point_needed_p ())
36523 (*targetm
.asm_out
.internal_label
) (file
, "LCL", rs6000_pic_labelno
);
36525 fprintf (file
, "\t.quad .TOC.-");
36526 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
36527 assemble_name (file
, buf
);
36531 if (DEFAULT_ABI
== ABI_AIX
)
36533 const char *desc_name
, *orig_name
;
36535 orig_name
= (*targetm
.strip_name_encoding
) (name
);
36536 desc_name
= orig_name
;
36537 while (*desc_name
== '.')
36540 if (TREE_PUBLIC (decl
))
36541 fprintf (file
, "\t.globl %s\n", desc_name
);
36543 fprintf (file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
36544 fprintf (file
, "%s:\n", desc_name
);
36545 fprintf (file
, "\t.long %s\n", orig_name
);
36546 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file
);
36547 fputs ("\t.long 0\n", file
);
36548 fprintf (file
, "\t.previous\n");
36550 ASM_OUTPUT_LABEL (file
, name
);
36553 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED
;
36555 rs6000_elf_file_end (void)
36557 #ifdef HAVE_AS_GNU_ATTRIBUTE
36558 /* ??? The value emitted depends on options active at file end.
36559 Assume anyone using #pragma or attributes that might change
36560 options knows what they are doing. */
36561 if ((TARGET_64BIT
|| DEFAULT_ABI
== ABI_V4
)
36562 && rs6000_passes_float
)
36566 if (TARGET_DF_FPR
| TARGET_DF_SPE
)
36568 else if (TARGET_SF_FPR
| TARGET_SF_SPE
)
36572 if (rs6000_passes_long_double
)
36574 if (!TARGET_LONG_DOUBLE_128
)
36576 else if (TARGET_IEEEQUAD
)
36581 fprintf (asm_out_file
, "\t.gnu_attribute 4, %d\n", fp
);
36583 if (TARGET_32BIT
&& DEFAULT_ABI
== ABI_V4
)
36585 if (rs6000_passes_vector
)
36586 fprintf (asm_out_file
, "\t.gnu_attribute 8, %d\n",
36587 (TARGET_ALTIVEC_ABI
? 2
36588 : TARGET_SPE_ABI
? 3
36590 if (rs6000_returns_struct
)
36591 fprintf (asm_out_file
, "\t.gnu_attribute 12, %d\n",
36592 aix_struct_return
? 2 : 1);
36595 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
36596 if (TARGET_32BIT
|| DEFAULT_ABI
== ABI_ELFv2
)
36597 file_end_indicate_exec_stack ();
36600 if (flag_split_stack
)
36601 file_end_indicate_split_stack ();
36605 /* We have expanded a CPU builtin, so we need to emit a reference to
36606 the special symbol that LIBC uses to declare it supports the
36607 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
36608 switch_to_section (data_section
);
36609 fprintf (asm_out_file
, "\t.align %u\n", TARGET_32BIT
? 2 : 3);
36610 fprintf (asm_out_file
, "\t%s %s\n",
36611 TARGET_32BIT
? ".long" : ".quad", tcb_verification_symbol
);
36618 #ifndef HAVE_XCOFF_DWARF_EXTRAS
36619 #define HAVE_XCOFF_DWARF_EXTRAS 0
36622 static enum unwind_info_type
36623 rs6000_xcoff_debug_unwind_info (void)
36629 rs6000_xcoff_asm_output_anchor (rtx symbol
)
36633 sprintf (buffer
, "$ + " HOST_WIDE_INT_PRINT_DEC
,
36634 SYMBOL_REF_BLOCK_OFFSET (symbol
));
36635 fprintf (asm_out_file
, "%s", SET_ASM_OP
);
36636 RS6000_OUTPUT_BASENAME (asm_out_file
, XSTR (symbol
, 0));
36637 fprintf (asm_out_file
, ",");
36638 RS6000_OUTPUT_BASENAME (asm_out_file
, buffer
);
36639 fprintf (asm_out_file
, "\n");
36643 rs6000_xcoff_asm_globalize_label (FILE *stream
, const char *name
)
36645 fputs (GLOBAL_ASM_OP
, stream
);
36646 RS6000_OUTPUT_BASENAME (stream
, name
);
36647 putc ('\n', stream
);
36650 /* A get_unnamed_decl callback, used for read-only sections. PTR
36651 points to the section string variable. */
36654 rs6000_xcoff_output_readonly_section_asm_op (const void *directive
)
36656 fprintf (asm_out_file
, "\t.csect %s[RO],%s\n",
36657 *(const char *const *) directive
,
36658 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
36661 /* Likewise for read-write sections. */
36664 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive
)
36666 fprintf (asm_out_file
, "\t.csect %s[RW],%s\n",
36667 *(const char *const *) directive
,
36668 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
36672 rs6000_xcoff_output_tls_section_asm_op (const void *directive
)
36674 fprintf (asm_out_file
, "\t.csect %s[TL],%s\n",
36675 *(const char *const *) directive
,
36676 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
36679 /* A get_unnamed_section callback, used for switching to toc_section. */
36682 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED
)
36684 if (TARGET_MINIMAL_TOC
)
36686 /* toc_section is always selected at least once from
36687 rs6000_xcoff_file_start, so this is guaranteed to
36688 always be defined once and only once in each file. */
36689 if (!toc_initialized
)
36691 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file
);
36692 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file
);
36693 toc_initialized
= 1;
36695 fprintf (asm_out_file
, "\t.csect toc_table[RW]%s\n",
36696 (TARGET_32BIT
? "" : ",3"));
36699 fputs ("\t.toc\n", asm_out_file
);
36702 /* Implement TARGET_ASM_INIT_SECTIONS. */
36705 rs6000_xcoff_asm_init_sections (void)
36707 read_only_data_section
36708 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op
,
36709 &xcoff_read_only_section_name
);
36711 private_data_section
36712 = get_unnamed_section (SECTION_WRITE
,
36713 rs6000_xcoff_output_readwrite_section_asm_op
,
36714 &xcoff_private_data_section_name
);
36717 = get_unnamed_section (SECTION_TLS
,
36718 rs6000_xcoff_output_tls_section_asm_op
,
36719 &xcoff_tls_data_section_name
);
36721 tls_private_data_section
36722 = get_unnamed_section (SECTION_TLS
,
36723 rs6000_xcoff_output_tls_section_asm_op
,
36724 &xcoff_private_data_section_name
);
36726 read_only_private_data_section
36727 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op
,
36728 &xcoff_private_data_section_name
);
36731 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op
, NULL
);
36733 readonly_data_section
= read_only_data_section
;
36737 rs6000_xcoff_reloc_rw_mask (void)
36743 rs6000_xcoff_asm_named_section (const char *name
, unsigned int flags
,
36744 tree decl ATTRIBUTE_UNUSED
)
36747 static const char * const suffix
[5] = { "PR", "RO", "RW", "TL", "XO" };
36749 if (flags
& SECTION_EXCLUDE
)
36751 else if (flags
& SECTION_DEBUG
)
36753 fprintf (asm_out_file
, "\t.dwsect %s\n", name
);
36756 else if (flags
& SECTION_CODE
)
36758 else if (flags
& SECTION_TLS
)
36760 else if (flags
& SECTION_WRITE
)
36765 fprintf (asm_out_file
, "\t.csect %s%s[%s],%u\n",
36766 (flags
& SECTION_CODE
) ? "." : "",
36767 name
, suffix
[smclass
], flags
& SECTION_ENTSIZE
);
36770 #define IN_NAMED_SECTION(DECL) \
36771 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
36772 && DECL_SECTION_NAME (DECL) != NULL)
36775 rs6000_xcoff_select_section (tree decl
, int reloc
,
36776 unsigned HOST_WIDE_INT align
)
36778 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
36780 if (align
> BIGGEST_ALIGNMENT
)
36782 resolve_unique_section (decl
, reloc
, true);
36783 if (IN_NAMED_SECTION (decl
))
36784 return get_named_section (decl
, NULL
, reloc
);
36787 if (decl_readonly_section (decl
, reloc
))
36789 if (TREE_PUBLIC (decl
))
36790 return read_only_data_section
;
36792 return read_only_private_data_section
;
36797 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL_P (decl
))
36799 if (TREE_PUBLIC (decl
))
36800 return tls_data_section
;
36801 else if (bss_initializer_p (decl
))
36803 /* Convert to COMMON to emit in BSS. */
36804 DECL_COMMON (decl
) = 1;
36805 return tls_comm_section
;
36808 return tls_private_data_section
;
36812 if (TREE_PUBLIC (decl
))
36813 return data_section
;
36815 return private_data_section
;
36820 rs6000_xcoff_unique_section (tree decl
, int reloc ATTRIBUTE_UNUSED
)
36824 /* Use select_section for private data and uninitialized data with
36825 alignment <= BIGGEST_ALIGNMENT. */
36826 if (!TREE_PUBLIC (decl
)
36827 || DECL_COMMON (decl
)
36828 || (DECL_INITIAL (decl
) == NULL_TREE
36829 && DECL_ALIGN (decl
) <= BIGGEST_ALIGNMENT
)
36830 || DECL_INITIAL (decl
) == error_mark_node
36831 || (flag_zero_initialized_in_bss
36832 && initializer_zerop (DECL_INITIAL (decl
))))
36835 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
36836 name
= (*targetm
.strip_name_encoding
) (name
);
36837 set_decl_section_name (decl
, name
);
36840 /* Select section for constant in constant pool.
36842 On RS/6000, all constants are in the private read-only data area.
36843 However, if this is being placed in the TOC it must be output as a
36847 rs6000_xcoff_select_rtx_section (machine_mode mode
, rtx x
,
36848 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
36850 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
))
36851 return toc_section
;
36853 return read_only_private_data_section
;
36856 /* Remove any trailing [DS] or the like from the symbol name. */
36858 static const char *
36859 rs6000_xcoff_strip_name_encoding (const char *name
)
36864 len
= strlen (name
);
36865 if (name
[len
- 1] == ']')
36866 return ggc_alloc_string (name
, len
- 4);
36871 /* Section attributes. AIX is always PIC. */
36873 static unsigned int
36874 rs6000_xcoff_section_type_flags (tree decl
, const char *name
, int reloc
)
36876 unsigned int align
;
36877 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
36879 /* Align to at least UNIT size. */
36880 if ((flags
& SECTION_CODE
) != 0 || !decl
|| !DECL_P (decl
))
36881 align
= MIN_UNITS_PER_WORD
;
36883 /* Increase alignment of large objects if not already stricter. */
36884 align
= MAX ((DECL_ALIGN (decl
) / BITS_PER_UNIT
),
36885 int_size_in_bytes (TREE_TYPE (decl
)) > MIN_UNITS_PER_WORD
36886 ? UNITS_PER_FP_WORD
: MIN_UNITS_PER_WORD
);
36888 return flags
| (exact_log2 (align
) & SECTION_ENTSIZE
);
36891 /* Output at beginning of assembler file.
36893 Initialize the section names for the RS/6000 at this point.
36895 Specify filename, including full path, to assembler.
36897 We want to go into the TOC section so at least one .toc will be emitted.
36898 Also, in order to output proper .bs/.es pairs, we need at least one static
36899 [RW] section emitted.
36901 Finally, declare mcount when profiling to make the assembler happy. */
36904 rs6000_xcoff_file_start (void)
36906 rs6000_gen_section_name (&xcoff_bss_section_name
,
36907 main_input_filename
, ".bss_");
36908 rs6000_gen_section_name (&xcoff_private_data_section_name
,
36909 main_input_filename
, ".rw_");
36910 rs6000_gen_section_name (&xcoff_read_only_section_name
,
36911 main_input_filename
, ".ro_");
36912 rs6000_gen_section_name (&xcoff_tls_data_section_name
,
36913 main_input_filename
, ".tls_");
36914 rs6000_gen_section_name (&xcoff_tbss_section_name
,
36915 main_input_filename
, ".tbss_[UL]");
36917 fputs ("\t.file\t", asm_out_file
);
36918 output_quoted_string (asm_out_file
, main_input_filename
);
36919 fputc ('\n', asm_out_file
);
36920 if (write_symbols
!= NO_DEBUG
)
36921 switch_to_section (private_data_section
);
36922 switch_to_section (toc_section
);
36923 switch_to_section (text_section
);
36925 fprintf (asm_out_file
, "\t.extern %s\n", RS6000_MCOUNT
);
36926 rs6000_file_start ();
36929 /* Output at end of assembler file.
36930 On the RS/6000, referencing data should automatically pull in text. */
36933 rs6000_xcoff_file_end (void)
36935 switch_to_section (text_section
);
36936 fputs ("_section_.text:\n", asm_out_file
);
36937 switch_to_section (data_section
);
36938 fputs (TARGET_32BIT
36939 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
36943 struct declare_alias_data
36946 bool function_descriptor
;
36949 /* Declare alias N. A helper function for for_node_and_aliases. */
36952 rs6000_declare_alias (struct symtab_node
*n
, void *d
)
36954 struct declare_alias_data
*data
= (struct declare_alias_data
*)d
;
36955 /* Main symbol is output specially, because varasm machinery does part of
36956 the job for us - we do not need to declare .globl/lglobs and such. */
36957 if (!n
->alias
|| n
->weakref
)
36960 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n
->decl
)))
36963 /* Prevent assemble_alias from trying to use .set pseudo operation
36964 that does not behave as expected by the middle-end. */
36965 TREE_ASM_WRITTEN (n
->decl
) = true;
36967 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n
->decl
));
36968 char *buffer
= (char *) alloca (strlen (name
) + 2);
36970 int dollar_inside
= 0;
36972 strcpy (buffer
, name
);
36973 p
= strchr (buffer
, '$');
36977 p
= strchr (p
+ 1, '$');
36979 if (TREE_PUBLIC (n
->decl
))
36981 if (!RS6000_WEAK
|| !DECL_WEAK (n
->decl
))
36983 if (dollar_inside
) {
36984 if (data
->function_descriptor
)
36985 fprintf(data
->file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
36986 fprintf(data
->file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
36988 if (data
->function_descriptor
)
36990 fputs ("\t.globl .", data
->file
);
36991 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
36992 putc ('\n', data
->file
);
36994 fputs ("\t.globl ", data
->file
);
36995 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
36996 putc ('\n', data
->file
);
36998 #ifdef ASM_WEAKEN_DECL
36999 else if (DECL_WEAK (n
->decl
) && !data
->function_descriptor
)
37000 ASM_WEAKEN_DECL (data
->file
, n
->decl
, name
, NULL
);
37007 if (data
->function_descriptor
)
37008 fprintf(data
->file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
37009 fprintf(data
->file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
37011 if (data
->function_descriptor
)
37013 fputs ("\t.lglobl .", data
->file
);
37014 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
37015 putc ('\n', data
->file
);
37017 fputs ("\t.lglobl ", data
->file
);
37018 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
37019 putc ('\n', data
->file
);
37021 if (data
->function_descriptor
)
37022 fputs (".", data
->file
);
37023 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
37024 fputs (":\n", data
->file
);
37029 #ifdef HAVE_GAS_HIDDEN
37030 /* Helper function to calculate visibility of a DECL
37031 and return the value as a const string. */
37033 static const char *
37034 rs6000_xcoff_visibility (tree decl
)
37036 static const char * const visibility_types
[] = {
37037 "", ",protected", ",hidden", ",internal"
37040 enum symbol_visibility vis
= DECL_VISIBILITY (decl
);
37042 if (TREE_CODE (decl
) == FUNCTION_DECL
37043 && cgraph_node::get (decl
)
37044 && cgraph_node::get (decl
)->instrumentation_clone
37045 && cgraph_node::get (decl
)->instrumented_version
)
37046 vis
= DECL_VISIBILITY (cgraph_node::get (decl
)->instrumented_version
->decl
);
37048 return visibility_types
[vis
];
37053 /* This macro produces the initial definition of a function name.
37054 On the RS/6000, we need to place an extra '.' in the function name and
37055 output the function descriptor.
37056 Dollar signs are converted to underscores.
37058 The csect for the function will have already been created when
37059 text_section was selected. We do have to go back to that csect, however.
37061 The third and fourth parameters to the .function pseudo-op (16 and 044)
37062 are placeholders which no longer have any use.
37064 Because AIX assembler's .set command has unexpected semantics, we output
37065 all aliases as alternative labels in front of the definition. */
37068 rs6000_xcoff_declare_function_name (FILE *file
, const char *name
, tree decl
)
37070 char *buffer
= (char *) alloca (strlen (name
) + 1);
37072 int dollar_inside
= 0;
37073 struct declare_alias_data data
= {file
, false};
37075 strcpy (buffer
, name
);
37076 p
= strchr (buffer
, '$');
37080 p
= strchr (p
+ 1, '$');
37082 if (TREE_PUBLIC (decl
))
37084 if (!RS6000_WEAK
|| !DECL_WEAK (decl
))
37086 if (dollar_inside
) {
37087 fprintf(file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
37088 fprintf(file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
37090 fputs ("\t.globl .", file
);
37091 RS6000_OUTPUT_BASENAME (file
, buffer
);
37092 #ifdef HAVE_GAS_HIDDEN
37093 fputs (rs6000_xcoff_visibility (decl
), file
);
37100 if (dollar_inside
) {
37101 fprintf(file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
37102 fprintf(file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
37104 fputs ("\t.lglobl .", file
);
37105 RS6000_OUTPUT_BASENAME (file
, buffer
);
37108 fputs ("\t.csect ", file
);
37109 RS6000_OUTPUT_BASENAME (file
, buffer
);
37110 fputs (TARGET_32BIT
? "[DS]\n" : "[DS],3\n", file
);
37111 RS6000_OUTPUT_BASENAME (file
, buffer
);
37112 fputs (":\n", file
);
37113 symtab_node::get (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
37115 fputs (TARGET_32BIT
? "\t.long ." : "\t.llong .", file
);
37116 RS6000_OUTPUT_BASENAME (file
, buffer
);
37117 fputs (", TOC[tc0], 0\n", file
);
37119 switch_to_section (function_section (decl
));
37121 RS6000_OUTPUT_BASENAME (file
, buffer
);
37122 fputs (":\n", file
);
37123 data
.function_descriptor
= true;
37124 symtab_node::get (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
37126 if (!DECL_IGNORED_P (decl
))
37128 if (write_symbols
== DBX_DEBUG
|| write_symbols
== XCOFF_DEBUG
)
37129 xcoffout_declare_function (file
, decl
, buffer
);
37130 else if (write_symbols
== DWARF2_DEBUG
)
37132 name
= (*targetm
.strip_name_encoding
) (name
);
37133 fprintf (file
, "\t.function .%s,.%s,2,0\n", name
, name
);
37140 /* Output assembly language to globalize a symbol from a DECL,
37141 possibly with visibility. */
37144 rs6000_xcoff_asm_globalize_decl_name (FILE *stream
, tree decl
)
37146 const char *name
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
37147 fputs (GLOBAL_ASM_OP
, stream
);
37148 RS6000_OUTPUT_BASENAME (stream
, name
);
37149 #ifdef HAVE_GAS_HIDDEN
37150 fputs (rs6000_xcoff_visibility (decl
), stream
);
37152 putc ('\n', stream
);
37155 /* Output assembly language to define a symbol as COMMON from a DECL,
37156 possibly with visibility. */
37159 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream
,
37160 tree decl ATTRIBUTE_UNUSED
,
37162 unsigned HOST_WIDE_INT size
,
37163 unsigned HOST_WIDE_INT align
)
37165 unsigned HOST_WIDE_INT align2
= 2;
37168 align2
= floor_log2 (align
/ BITS_PER_UNIT
);
37172 fputs (COMMON_ASM_OP
, stream
);
37173 RS6000_OUTPUT_BASENAME (stream
, name
);
37176 "," HOST_WIDE_INT_PRINT_UNSIGNED
"," HOST_WIDE_INT_PRINT_UNSIGNED
,
37179 #ifdef HAVE_GAS_HIDDEN
37180 fputs (rs6000_xcoff_visibility (decl
), stream
);
37182 putc ('\n', stream
);
37185 /* This macro produces the initial definition of a object (variable) name.
37186 Because AIX assembler's .set command has unexpected semantics, we output
37187 all aliases as alternative labels in front of the definition. */
37190 rs6000_xcoff_declare_object_name (FILE *file
, const char *name
, tree decl
)
37192 struct declare_alias_data data
= {file
, false};
37193 RS6000_OUTPUT_BASENAME (file
, name
);
37194 fputs (":\n", file
);
37195 symtab_node::get_create (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
37199 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
37202 rs6000_asm_output_dwarf_pcrel (FILE *file
, int size
, const char *label
)
37204 fputs (integer_asm_op (size
, FALSE
), file
);
37205 assemble_name (file
, label
);
37206 fputs ("-$", file
);
37209 /* Output a symbol offset relative to the dbase for the current object.
37210 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
37213 __gcc_unwind_dbase is embedded in all executables/libraries through
37214 libgcc/config/rs6000/crtdbase.S. */
37217 rs6000_asm_output_dwarf_datarel (FILE *file
, int size
, const char *label
)
37219 fputs (integer_asm_op (size
, FALSE
), file
);
37220 assemble_name (file
, label
);
37221 fputs("-__gcc_unwind_dbase", file
);
37226 rs6000_xcoff_encode_section_info (tree decl
, rtx rtl
, int first
)
37230 const char *symname
;
37232 default_encode_section_info (decl
, rtl
, first
);
37234 /* Careful not to prod global register variables. */
37237 symbol
= XEXP (rtl
, 0);
37238 if (GET_CODE (symbol
) != SYMBOL_REF
)
37241 flags
= SYMBOL_REF_FLAGS (symbol
);
37243 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL_P (decl
))
37244 flags
&= ~SYMBOL_FLAG_HAS_BLOCK_INFO
;
37246 SYMBOL_REF_FLAGS (symbol
) = flags
;
37248 /* Append mapping class to extern decls. */
37249 symname
= XSTR (symbol
, 0);
37250 if (decl
/* sync condition with assemble_external () */
37251 && DECL_P (decl
) && DECL_EXTERNAL (decl
) && TREE_PUBLIC (decl
)
37252 && ((TREE_CODE (decl
) == VAR_DECL
&& !DECL_THREAD_LOCAL_P (decl
))
37253 || TREE_CODE (decl
) == FUNCTION_DECL
)
37254 && symname
[strlen (symname
) - 1] != ']')
37256 char *newname
= (char *) alloca (strlen (symname
) + 5);
37257 strcpy (newname
, symname
);
37258 strcat (newname
, (TREE_CODE (decl
) == FUNCTION_DECL
37259 ? "[DS]" : "[UA]"));
37260 XSTR (symbol
, 0) = ggc_strdup (newname
);
37263 #endif /* HAVE_AS_TLS */
37264 #endif /* TARGET_XCOFF */
37267 rs6000_asm_weaken_decl (FILE *stream
, tree decl
,
37268 const char *name
, const char *val
)
37270 fputs ("\t.weak\t", stream
);
37271 RS6000_OUTPUT_BASENAME (stream
, name
);
37272 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
37273 && DEFAULT_ABI
== ABI_AIX
&& DOT_SYMBOLS
)
37276 fputs ("[DS]", stream
);
37277 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
37279 fputs (rs6000_xcoff_visibility (decl
), stream
);
37281 fputs ("\n\t.weak\t.", stream
);
37282 RS6000_OUTPUT_BASENAME (stream
, name
);
37284 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
37286 fputs (rs6000_xcoff_visibility (decl
), stream
);
37288 fputc ('\n', stream
);
37291 #ifdef ASM_OUTPUT_DEF
37292 ASM_OUTPUT_DEF (stream
, name
, val
);
37294 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
37295 && DEFAULT_ABI
== ABI_AIX
&& DOT_SYMBOLS
)
37297 fputs ("\t.set\t.", stream
);
37298 RS6000_OUTPUT_BASENAME (stream
, name
);
37299 fputs (",.", stream
);
37300 RS6000_OUTPUT_BASENAME (stream
, val
);
37301 fputc ('\n', stream
);
37307 /* Return true if INSN should not be copied. */
37310 rs6000_cannot_copy_insn_p (rtx_insn
*insn
)
37312 return recog_memoized (insn
) >= 0
37313 && get_attr_cannot_copy (insn
);
37316 /* Compute a (partial) cost for rtx X. Return true if the complete
37317 cost has been computed, and false if subexpressions should be
37318 scanned. In either case, *TOTAL contains the cost result. */
37321 rs6000_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
37322 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
37324 int code
= GET_CODE (x
);
37328 /* On the RS/6000, if it is valid in the insn, it is free. */
37330 if (((outer_code
== SET
37331 || outer_code
== PLUS
37332 || outer_code
== MINUS
)
37333 && (satisfies_constraint_I (x
)
37334 || satisfies_constraint_L (x
)))
37335 || (outer_code
== AND
37336 && (satisfies_constraint_K (x
)
37338 ? satisfies_constraint_L (x
)
37339 : satisfies_constraint_J (x
))))
37340 || ((outer_code
== IOR
|| outer_code
== XOR
)
37341 && (satisfies_constraint_K (x
)
37343 ? satisfies_constraint_L (x
)
37344 : satisfies_constraint_J (x
))))
37345 || outer_code
== ASHIFT
37346 || outer_code
== ASHIFTRT
37347 || outer_code
== LSHIFTRT
37348 || outer_code
== ROTATE
37349 || outer_code
== ROTATERT
37350 || outer_code
== ZERO_EXTRACT
37351 || (outer_code
== MULT
37352 && satisfies_constraint_I (x
))
37353 || ((outer_code
== DIV
|| outer_code
== UDIV
37354 || outer_code
== MOD
|| outer_code
== UMOD
)
37355 && exact_log2 (INTVAL (x
)) >= 0)
37356 || (outer_code
== COMPARE
37357 && (satisfies_constraint_I (x
)
37358 || satisfies_constraint_K (x
)))
37359 || ((outer_code
== EQ
|| outer_code
== NE
)
37360 && (satisfies_constraint_I (x
)
37361 || satisfies_constraint_K (x
)
37363 ? satisfies_constraint_L (x
)
37364 : satisfies_constraint_J (x
))))
37365 || (outer_code
== GTU
37366 && satisfies_constraint_I (x
))
37367 || (outer_code
== LTU
37368 && satisfies_constraint_P (x
)))
37373 else if ((outer_code
== PLUS
37374 && reg_or_add_cint_operand (x
, VOIDmode
))
37375 || (outer_code
== MINUS
37376 && reg_or_sub_cint_operand (x
, VOIDmode
))
37377 || ((outer_code
== SET
37378 || outer_code
== IOR
37379 || outer_code
== XOR
)
37381 & ~ (unsigned HOST_WIDE_INT
) 0xffffffff) == 0))
37383 *total
= COSTS_N_INSNS (1);
37389 case CONST_WIDE_INT
:
37393 *total
= !speed
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
37397 /* When optimizing for size, MEM should be slightly more expensive
37398 than generating address, e.g., (plus (reg) (const)).
37399 L1 cache latency is about two instructions. */
37400 *total
= !speed
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
37401 if (SLOW_UNALIGNED_ACCESS (mode
, MEM_ALIGN (x
)))
37402 *total
+= COSTS_N_INSNS (100);
37411 if (FLOAT_MODE_P (mode
))
37412 *total
= rs6000_cost
->fp
;
37414 *total
= COSTS_N_INSNS (1);
37418 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
37419 && satisfies_constraint_I (XEXP (x
, 1)))
37421 if (INTVAL (XEXP (x
, 1)) >= -256
37422 && INTVAL (XEXP (x
, 1)) <= 255)
37423 *total
= rs6000_cost
->mulsi_const9
;
37425 *total
= rs6000_cost
->mulsi_const
;
37427 else if (mode
== SFmode
)
37428 *total
= rs6000_cost
->fp
;
37429 else if (FLOAT_MODE_P (mode
))
37430 *total
= rs6000_cost
->dmul
;
37431 else if (mode
== DImode
)
37432 *total
= rs6000_cost
->muldi
;
37434 *total
= rs6000_cost
->mulsi
;
37438 if (mode
== SFmode
)
37439 *total
= rs6000_cost
->fp
;
37441 *total
= rs6000_cost
->dmul
;
37446 if (FLOAT_MODE_P (mode
))
37448 *total
= mode
== DFmode
? rs6000_cost
->ddiv
37449 : rs6000_cost
->sdiv
;
37456 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
37457 && exact_log2 (INTVAL (XEXP (x
, 1))) >= 0)
37459 if (code
== DIV
|| code
== MOD
)
37461 *total
= COSTS_N_INSNS (2);
37464 *total
= COSTS_N_INSNS (1);
37468 if (GET_MODE (XEXP (x
, 1)) == DImode
)
37469 *total
= rs6000_cost
->divdi
;
37471 *total
= rs6000_cost
->divsi
;
37473 /* Add in shift and subtract for MOD unless we have a mod instruction. */
37474 if (!TARGET_MODULO
&& (code
== MOD
|| code
== UMOD
))
37475 *total
+= COSTS_N_INSNS (2);
37479 *total
= COSTS_N_INSNS (TARGET_CTZ
? 1 : 4);
37483 *total
= COSTS_N_INSNS (4);
37487 *total
= COSTS_N_INSNS (TARGET_POPCNTD
? 1 : 6);
37491 *total
= COSTS_N_INSNS (TARGET_CMPB
? 2 : 6);
37495 if (outer_code
== AND
|| outer_code
== IOR
|| outer_code
== XOR
)
37498 *total
= COSTS_N_INSNS (1);
37502 if (CONST_INT_P (XEXP (x
, 1)))
37504 rtx left
= XEXP (x
, 0);
37505 rtx_code left_code
= GET_CODE (left
);
37507 /* rotate-and-mask: 1 insn. */
37508 if ((left_code
== ROTATE
37509 || left_code
== ASHIFT
37510 || left_code
== LSHIFTRT
)
37511 && rs6000_is_valid_shift_mask (XEXP (x
, 1), left
, mode
))
37513 *total
= rtx_cost (XEXP (left
, 0), mode
, left_code
, 0, speed
);
37514 if (!CONST_INT_P (XEXP (left
, 1)))
37515 *total
+= rtx_cost (XEXP (left
, 1), SImode
, left_code
, 1, speed
);
37516 *total
+= COSTS_N_INSNS (1);
37520 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
37521 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
37522 if (rs6000_is_valid_and_mask (XEXP (x
, 1), mode
)
37523 || (val
& 0xffff) == val
37524 || (val
& 0xffff0000) == val
37525 || ((val
& 0xffff) == 0 && mode
== SImode
))
37527 *total
= rtx_cost (left
, mode
, AND
, 0, speed
);
37528 *total
+= COSTS_N_INSNS (1);
37533 if (rs6000_is_valid_2insn_and (XEXP (x
, 1), mode
))
37535 *total
= rtx_cost (left
, mode
, AND
, 0, speed
);
37536 *total
+= COSTS_N_INSNS (2);
37541 *total
= COSTS_N_INSNS (1);
37546 *total
= COSTS_N_INSNS (1);
37552 *total
= COSTS_N_INSNS (1);
37556 /* The EXTSWSLI instruction is a combined instruction. Don't count both
37557 the sign extend and shift separately within the insn. */
37558 if (TARGET_EXTSWSLI
&& mode
== DImode
37559 && GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
37560 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
)
37571 /* Handle mul_highpart. */
37572 if (outer_code
== TRUNCATE
37573 && GET_CODE (XEXP (x
, 0)) == MULT
)
37575 if (mode
== DImode
)
37576 *total
= rs6000_cost
->muldi
;
37578 *total
= rs6000_cost
->mulsi
;
37581 else if (outer_code
== AND
)
37584 *total
= COSTS_N_INSNS (1);
37589 if (GET_CODE (XEXP (x
, 0)) == MEM
)
37592 *total
= COSTS_N_INSNS (1);
37598 if (!FLOAT_MODE_P (mode
))
37600 *total
= COSTS_N_INSNS (1);
37606 case UNSIGNED_FLOAT
:
37609 case FLOAT_TRUNCATE
:
37610 *total
= rs6000_cost
->fp
;
37614 if (mode
== DFmode
)
37615 *total
= rs6000_cost
->sfdf_convert
;
37617 *total
= rs6000_cost
->fp
;
37621 switch (XINT (x
, 1))
37624 *total
= rs6000_cost
->fp
;
37636 *total
= COSTS_N_INSNS (1);
37639 else if (FLOAT_MODE_P (mode
)
37640 && TARGET_PPC_GFXOPT
&& TARGET_HARD_FLOAT
&& TARGET_FPRS
)
37642 *total
= rs6000_cost
->fp
;
37651 /* Carry bit requires mode == Pmode.
37652 NEG or PLUS already counted so only add one. */
37654 && (outer_code
== NEG
|| outer_code
== PLUS
))
37656 *total
= COSTS_N_INSNS (1);
37659 if (outer_code
== SET
)
37661 if (XEXP (x
, 1) == const0_rtx
)
37663 if (TARGET_ISEL
&& !TARGET_MFCRF
)
37664 *total
= COSTS_N_INSNS (8);
37666 *total
= COSTS_N_INSNS (2);
37671 *total
= COSTS_N_INSNS (3);
37680 if (outer_code
== SET
&& (XEXP (x
, 1) == const0_rtx
))
37682 if (TARGET_ISEL
&& !TARGET_MFCRF
)
37683 *total
= COSTS_N_INSNS (8);
37685 *total
= COSTS_N_INSNS (2);
37689 if (outer_code
== COMPARE
)
37703 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
37706 rs6000_debug_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
37707 int opno
, int *total
, bool speed
)
37709 bool ret
= rs6000_rtx_costs (x
, mode
, outer_code
, opno
, total
, speed
);
37712 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
37713 "opno = %d, total = %d, speed = %s, x:\n",
37714 ret
? "complete" : "scan inner",
37715 GET_MODE_NAME (mode
),
37716 GET_RTX_NAME (outer_code
),
37719 speed
? "true" : "false");
37726 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
37729 rs6000_debug_address_cost (rtx x
, machine_mode mode
,
37730 addr_space_t as
, bool speed
)
37732 int ret
= TARGET_ADDRESS_COST (x
, mode
, as
, speed
);
37734 fprintf (stderr
, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
37735 ret
, speed
? "true" : "false");
37742 /* A C expression returning the cost of moving data from a register of class
37743 CLASS1 to one of CLASS2. */
37746 rs6000_register_move_cost (machine_mode mode
,
37747 reg_class_t from
, reg_class_t to
)
37751 if (TARGET_DEBUG_COST
)
37754 /* Moves from/to GENERAL_REGS. */
37755 if (reg_classes_intersect_p (to
, GENERAL_REGS
)
37756 || reg_classes_intersect_p (from
, GENERAL_REGS
))
37758 reg_class_t rclass
= from
;
37760 if (! reg_classes_intersect_p (to
, GENERAL_REGS
))
37763 if (rclass
== FLOAT_REGS
|| rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
37764 ret
= (rs6000_memory_move_cost (mode
, rclass
, false)
37765 + rs6000_memory_move_cost (mode
, GENERAL_REGS
, false));
37767 /* It's more expensive to move CR_REGS than CR0_REGS because of the
37769 else if (rclass
== CR_REGS
)
37772 /* For those processors that have slow LR/CTR moves, make them more
37773 expensive than memory in order to bias spills to memory .*/
37774 else if ((rs6000_cpu
== PROCESSOR_POWER6
37775 || rs6000_cpu
== PROCESSOR_POWER7
37776 || rs6000_cpu
== PROCESSOR_POWER8
37777 || rs6000_cpu
== PROCESSOR_POWER9
)
37778 && reg_classes_intersect_p (rclass
, LINK_OR_CTR_REGS
))
37779 ret
= 6 * hard_regno_nregs
[0][mode
];
37782 /* A move will cost one instruction per GPR moved. */
37783 ret
= 2 * hard_regno_nregs
[0][mode
];
37786 /* If we have VSX, we can easily move between FPR or Altivec registers. */
37787 else if (VECTOR_MEM_VSX_P (mode
)
37788 && reg_classes_intersect_p (to
, VSX_REGS
)
37789 && reg_classes_intersect_p (from
, VSX_REGS
))
37790 ret
= 2 * hard_regno_nregs
[FIRST_FPR_REGNO
][mode
];
37792 /* Moving between two similar registers is just one instruction. */
37793 else if (reg_classes_intersect_p (to
, from
))
37794 ret
= (FLOAT128_2REG_P (mode
)) ? 4 : 2;
37796 /* Everything else has to go through GENERAL_REGS. */
37798 ret
= (rs6000_register_move_cost (mode
, GENERAL_REGS
, to
)
37799 + rs6000_register_move_cost (mode
, from
, GENERAL_REGS
));
37801 if (TARGET_DEBUG_COST
)
37803 if (dbg_cost_ctrl
== 1)
37805 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
37806 ret
, GET_MODE_NAME (mode
), reg_class_names
[from
],
37807 reg_class_names
[to
]);
37814 /* A C expressions returning the cost of moving data of MODE from a register to
37818 rs6000_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
37819 bool in ATTRIBUTE_UNUSED
)
37823 if (TARGET_DEBUG_COST
)
37826 if (reg_classes_intersect_p (rclass
, GENERAL_REGS
))
37827 ret
= 4 * hard_regno_nregs
[0][mode
];
37828 else if ((reg_classes_intersect_p (rclass
, FLOAT_REGS
)
37829 || reg_classes_intersect_p (rclass
, VSX_REGS
)))
37830 ret
= 4 * hard_regno_nregs
[32][mode
];
37831 else if (reg_classes_intersect_p (rclass
, ALTIVEC_REGS
))
37832 ret
= 4 * hard_regno_nregs
[FIRST_ALTIVEC_REGNO
][mode
];
37834 ret
= 4 + rs6000_register_move_cost (mode
, rclass
, GENERAL_REGS
);
37836 if (TARGET_DEBUG_COST
)
37838 if (dbg_cost_ctrl
== 1)
37840 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
37841 ret
, GET_MODE_NAME (mode
), reg_class_names
[rclass
], in
);
37848 /* Returns a code for a target-specific builtin that implements
37849 reciprocal of the function, or NULL_TREE if not available. */
37852 rs6000_builtin_reciprocal (tree fndecl
)
37854 switch (DECL_FUNCTION_CODE (fndecl
))
37856 case VSX_BUILTIN_XVSQRTDP
:
37857 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode
))
37860 return rs6000_builtin_decls
[VSX_BUILTIN_RSQRT_2DF
];
37862 case VSX_BUILTIN_XVSQRTSP
:
37863 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode
))
37866 return rs6000_builtin_decls
[VSX_BUILTIN_RSQRT_4SF
];
37873 /* Load up a constant. If the mode is a vector mode, splat the value across
37874 all of the vector elements. */
37877 rs6000_load_constant_and_splat (machine_mode mode
, REAL_VALUE_TYPE dconst
)
37881 if (mode
== SFmode
|| mode
== DFmode
)
37883 rtx d
= const_double_from_real_value (dconst
, mode
);
37884 reg
= force_reg (mode
, d
);
37886 else if (mode
== V4SFmode
)
37888 rtx d
= const_double_from_real_value (dconst
, SFmode
);
37889 rtvec v
= gen_rtvec (4, d
, d
, d
, d
);
37890 reg
= gen_reg_rtx (mode
);
37891 rs6000_expand_vector_init (reg
, gen_rtx_PARALLEL (mode
, v
));
37893 else if (mode
== V2DFmode
)
37895 rtx d
= const_double_from_real_value (dconst
, DFmode
);
37896 rtvec v
= gen_rtvec (2, d
, d
);
37897 reg
= gen_reg_rtx (mode
);
37898 rs6000_expand_vector_init (reg
, gen_rtx_PARALLEL (mode
, v
));
37901 gcc_unreachable ();
37906 /* Generate an FMA instruction. */
37909 rs6000_emit_madd (rtx target
, rtx m1
, rtx m2
, rtx a
)
37911 machine_mode mode
= GET_MODE (target
);
37914 dst
= expand_ternary_op (mode
, fma_optab
, m1
, m2
, a
, target
, 0);
37915 gcc_assert (dst
!= NULL
);
37918 emit_move_insn (target
, dst
);
37921 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
37924 rs6000_emit_nmsub (rtx dst
, rtx m1
, rtx m2
, rtx a
)
37926 machine_mode mode
= GET_MODE (dst
);
37929 /* This is a tad more complicated, since the fnma_optab is for
37930 a different expression: fma(-m1, m2, a), which is the same
37931 thing except in the case of signed zeros.
37933 Fortunately we know that if FMA is supported that FNMSUB is
37934 also supported in the ISA. Just expand it directly. */
37936 gcc_assert (optab_handler (fma_optab
, mode
) != CODE_FOR_nothing
);
37938 r
= gen_rtx_NEG (mode
, a
);
37939 r
= gen_rtx_FMA (mode
, m1
, m2
, r
);
37940 r
= gen_rtx_NEG (mode
, r
);
37941 emit_insn (gen_rtx_SET (dst
, r
));
37944 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
37945 add a reg_note saying that this was a division. Support both scalar and
37946 vector divide. Assumes no trapping math and finite arguments. */
37949 rs6000_emit_swdiv (rtx dst
, rtx n
, rtx d
, bool note_p
)
37951 machine_mode mode
= GET_MODE (dst
);
37952 rtx one
, x0
, e0
, x1
, xprev
, eprev
, xnext
, enext
, u
, v
;
37955 /* Low precision estimates guarantee 5 bits of accuracy. High
37956 precision estimates guarantee 14 bits of accuracy. SFmode
37957 requires 23 bits of accuracy. DFmode requires 52 bits of
37958 accuracy. Each pass at least doubles the accuracy, leading
37959 to the following. */
37960 int passes
= (TARGET_RECIP_PRECISION
) ? 1 : 3;
37961 if (mode
== DFmode
|| mode
== V2DFmode
)
37964 enum insn_code code
= optab_handler (smul_optab
, mode
);
37965 insn_gen_fn gen_mul
= GEN_FCN (code
);
37967 gcc_assert (code
!= CODE_FOR_nothing
);
37969 one
= rs6000_load_constant_and_splat (mode
, dconst1
);
37971 /* x0 = 1./d estimate */
37972 x0
= gen_reg_rtx (mode
);
37973 emit_insn (gen_rtx_SET (x0
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, d
),
37976 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
37979 /* e0 = 1. - d * x0 */
37980 e0
= gen_reg_rtx (mode
);
37981 rs6000_emit_nmsub (e0
, d
, x0
, one
);
37983 /* x1 = x0 + e0 * x0 */
37984 x1
= gen_reg_rtx (mode
);
37985 rs6000_emit_madd (x1
, e0
, x0
, x0
);
37987 for (i
= 0, xprev
= x1
, eprev
= e0
; i
< passes
- 2;
37988 ++i
, xprev
= xnext
, eprev
= enext
) {
37990 /* enext = eprev * eprev */
37991 enext
= gen_reg_rtx (mode
);
37992 emit_insn (gen_mul (enext
, eprev
, eprev
));
37994 /* xnext = xprev + enext * xprev */
37995 xnext
= gen_reg_rtx (mode
);
37996 rs6000_emit_madd (xnext
, enext
, xprev
, xprev
);
38002 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
38004 /* u = n * xprev */
38005 u
= gen_reg_rtx (mode
);
38006 emit_insn (gen_mul (u
, n
, xprev
));
38008 /* v = n - (d * u) */
38009 v
= gen_reg_rtx (mode
);
38010 rs6000_emit_nmsub (v
, d
, u
, n
);
38012 /* dst = (v * xprev) + u */
38013 rs6000_emit_madd (dst
, v
, xprev
, u
);
38016 add_reg_note (get_last_insn (), REG_EQUAL
, gen_rtx_DIV (mode
, n
, d
));
38019 /* Goldschmidt's Algorithm for single/double-precision floating point
38020 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
38023 rs6000_emit_swsqrt (rtx dst
, rtx src
, bool recip
)
38025 machine_mode mode
= GET_MODE (src
);
38026 rtx e
= gen_reg_rtx (mode
);
38027 rtx g
= gen_reg_rtx (mode
);
38028 rtx h
= gen_reg_rtx (mode
);
38030 /* Low precision estimates guarantee 5 bits of accuracy. High
38031 precision estimates guarantee 14 bits of accuracy. SFmode
38032 requires 23 bits of accuracy. DFmode requires 52 bits of
38033 accuracy. Each pass at least doubles the accuracy, leading
38034 to the following. */
38035 int passes
= (TARGET_RECIP_PRECISION
) ? 1 : 3;
38036 if (mode
== DFmode
|| mode
== V2DFmode
)
38041 enum insn_code code
= optab_handler (smul_optab
, mode
);
38042 insn_gen_fn gen_mul
= GEN_FCN (code
);
38044 gcc_assert (code
!= CODE_FOR_nothing
);
38046 mhalf
= rs6000_load_constant_and_splat (mode
, dconsthalf
);
38048 /* e = rsqrt estimate */
38049 emit_insn (gen_rtx_SET (e
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, src
),
38052 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
38055 rtx zero
= force_reg (mode
, CONST0_RTX (mode
));
38057 if (mode
== SFmode
)
38059 rtx target
= emit_conditional_move (e
, GT
, src
, zero
, mode
,
38062 emit_move_insn (e
, target
);
38066 rtx cond
= gen_rtx_GT (VOIDmode
, e
, zero
);
38067 rs6000_emit_vector_cond_expr (e
, e
, zero
, cond
, src
, zero
);
38071 /* g = sqrt estimate. */
38072 emit_insn (gen_mul (g
, e
, src
));
38073 /* h = 1/(2*sqrt) estimate. */
38074 emit_insn (gen_mul (h
, e
, mhalf
));
38080 rtx t
= gen_reg_rtx (mode
);
38081 rs6000_emit_nmsub (t
, g
, h
, mhalf
);
38082 /* Apply correction directly to 1/rsqrt estimate. */
38083 rs6000_emit_madd (dst
, e
, t
, e
);
38087 for (i
= 0; i
< passes
; i
++)
38089 rtx t1
= gen_reg_rtx (mode
);
38090 rtx g1
= gen_reg_rtx (mode
);
38091 rtx h1
= gen_reg_rtx (mode
);
38093 rs6000_emit_nmsub (t1
, g
, h
, mhalf
);
38094 rs6000_emit_madd (g1
, g
, t1
, g
);
38095 rs6000_emit_madd (h1
, h
, t1
, h
);
38100 /* Multiply by 2 for 1/rsqrt. */
38101 emit_insn (gen_add3_insn (dst
, h
, h
));
38106 rtx t
= gen_reg_rtx (mode
);
38107 rs6000_emit_nmsub (t
, g
, h
, mhalf
);
38108 rs6000_emit_madd (dst
, g
, t
, g
);
38114 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
38115 (Power7) targets. DST is the target, and SRC is the argument operand. */
38118 rs6000_emit_popcount (rtx dst
, rtx src
)
38120 machine_mode mode
= GET_MODE (dst
);
38123 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
38124 if (TARGET_POPCNTD
)
38126 if (mode
== SImode
)
38127 emit_insn (gen_popcntdsi2 (dst
, src
));
38129 emit_insn (gen_popcntddi2 (dst
, src
));
38133 tmp1
= gen_reg_rtx (mode
);
38135 if (mode
== SImode
)
38137 emit_insn (gen_popcntbsi2 (tmp1
, src
));
38138 tmp2
= expand_mult (SImode
, tmp1
, GEN_INT (0x01010101),
38140 tmp2
= force_reg (SImode
, tmp2
);
38141 emit_insn (gen_lshrsi3 (dst
, tmp2
, GEN_INT (24)));
38145 emit_insn (gen_popcntbdi2 (tmp1
, src
));
38146 tmp2
= expand_mult (DImode
, tmp1
,
38147 GEN_INT ((HOST_WIDE_INT
)
38148 0x01010101 << 32 | 0x01010101),
38150 tmp2
= force_reg (DImode
, tmp2
);
38151 emit_insn (gen_lshrdi3 (dst
, tmp2
, GEN_INT (56)));
38156 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
38157 target, and SRC is the argument operand. */
38160 rs6000_emit_parity (rtx dst
, rtx src
)
38162 machine_mode mode
= GET_MODE (dst
);
38165 tmp
= gen_reg_rtx (mode
);
38167 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
38170 if (mode
== SImode
)
38172 emit_insn (gen_popcntbsi2 (tmp
, src
));
38173 emit_insn (gen_paritysi2_cmpb (dst
, tmp
));
38177 emit_insn (gen_popcntbdi2 (tmp
, src
));
38178 emit_insn (gen_paritydi2_cmpb (dst
, tmp
));
38183 if (mode
== SImode
)
38185 /* Is mult+shift >= shift+xor+shift+xor? */
38186 if (rs6000_cost
->mulsi_const
>= COSTS_N_INSNS (3))
38188 rtx tmp1
, tmp2
, tmp3
, tmp4
;
38190 tmp1
= gen_reg_rtx (SImode
);
38191 emit_insn (gen_popcntbsi2 (tmp1
, src
));
38193 tmp2
= gen_reg_rtx (SImode
);
38194 emit_insn (gen_lshrsi3 (tmp2
, tmp1
, GEN_INT (16)));
38195 tmp3
= gen_reg_rtx (SImode
);
38196 emit_insn (gen_xorsi3 (tmp3
, tmp1
, tmp2
));
38198 tmp4
= gen_reg_rtx (SImode
);
38199 emit_insn (gen_lshrsi3 (tmp4
, tmp3
, GEN_INT (8)));
38200 emit_insn (gen_xorsi3 (tmp
, tmp3
, tmp4
));
38203 rs6000_emit_popcount (tmp
, src
);
38204 emit_insn (gen_andsi3 (dst
, tmp
, const1_rtx
));
38208 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
38209 if (rs6000_cost
->muldi
>= COSTS_N_INSNS (5))
38211 rtx tmp1
, tmp2
, tmp3
, tmp4
, tmp5
, tmp6
;
38213 tmp1
= gen_reg_rtx (DImode
);
38214 emit_insn (gen_popcntbdi2 (tmp1
, src
));
38216 tmp2
= gen_reg_rtx (DImode
);
38217 emit_insn (gen_lshrdi3 (tmp2
, tmp1
, GEN_INT (32)));
38218 tmp3
= gen_reg_rtx (DImode
);
38219 emit_insn (gen_xordi3 (tmp3
, tmp1
, tmp2
));
38221 tmp4
= gen_reg_rtx (DImode
);
38222 emit_insn (gen_lshrdi3 (tmp4
, tmp3
, GEN_INT (16)));
38223 tmp5
= gen_reg_rtx (DImode
);
38224 emit_insn (gen_xordi3 (tmp5
, tmp3
, tmp4
));
38226 tmp6
= gen_reg_rtx (DImode
);
38227 emit_insn (gen_lshrdi3 (tmp6
, tmp5
, GEN_INT (8)));
38228 emit_insn (gen_xordi3 (tmp
, tmp5
, tmp6
));
38231 rs6000_emit_popcount (tmp
, src
);
38232 emit_insn (gen_anddi3 (dst
, tmp
, const1_rtx
));
38236 /* Expand an Altivec constant permutation for little endian mode.
38237 There are two issues: First, the two input operands must be
38238 swapped so that together they form a double-wide array in LE
38239 order. Second, the vperm instruction has surprising behavior
38240 in LE mode: it interprets the elements of the source vectors
38241 in BE mode ("left to right") and interprets the elements of
38242 the destination vector in LE mode ("right to left"). To
38243 correct for this, we must subtract each element of the permute
38244 control vector from 31.
38246 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
38247 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
38248 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
38249 serve as the permute control vector. Then, in BE mode,
38253 places the desired result in vr9. However, in LE mode the
38254 vector contents will be
38256 vr10 = 00000003 00000002 00000001 00000000
38257 vr11 = 00000007 00000006 00000005 00000004
38259 The result of the vperm using the same permute control vector is
38261 vr9 = 05000000 07000000 01000000 03000000
38263 That is, the leftmost 4 bytes of vr10 are interpreted as the
38264 source for the rightmost 4 bytes of vr9, and so on.
38266 If we change the permute control vector to
38268 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
38276 vr9 = 00000006 00000004 00000002 00000000. */
38279 altivec_expand_vec_perm_const_le (rtx operands
[4])
38283 rtx constv
, unspec
;
38284 rtx target
= operands
[0];
38285 rtx op0
= operands
[1];
38286 rtx op1
= operands
[2];
38287 rtx sel
= operands
[3];
38289 /* Unpack and adjust the constant selector. */
38290 for (i
= 0; i
< 16; ++i
)
38292 rtx e
= XVECEXP (sel
, 0, i
);
38293 unsigned int elt
= 31 - (INTVAL (e
) & 31);
38294 perm
[i
] = GEN_INT (elt
);
38297 /* Expand to a permute, swapping the inputs and using the
38298 adjusted selector. */
38300 op0
= force_reg (V16QImode
, op0
);
38302 op1
= force_reg (V16QImode
, op1
);
38304 constv
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, perm
));
38305 constv
= force_reg (V16QImode
, constv
);
38306 unspec
= gen_rtx_UNSPEC (V16QImode
, gen_rtvec (3, op1
, op0
, constv
),
38308 if (!REG_P (target
))
38310 rtx tmp
= gen_reg_rtx (V16QImode
);
38311 emit_move_insn (tmp
, unspec
);
38315 emit_move_insn (target
, unspec
);
38318 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
38319 permute control vector. But here it's not a constant, so we must
38320 generate a vector NAND or NOR to do the adjustment. */
38323 altivec_expand_vec_perm_le (rtx operands
[4])
38325 rtx notx
, iorx
, unspec
;
38326 rtx target
= operands
[0];
38327 rtx op0
= operands
[1];
38328 rtx op1
= operands
[2];
38329 rtx sel
= operands
[3];
38331 rtx norreg
= gen_reg_rtx (V16QImode
);
38332 machine_mode mode
= GET_MODE (target
);
38334 /* Get everything in regs so the pattern matches. */
38336 op0
= force_reg (mode
, op0
);
38338 op1
= force_reg (mode
, op1
);
38340 sel
= force_reg (V16QImode
, sel
);
38341 if (!REG_P (target
))
38342 tmp
= gen_reg_rtx (mode
);
38344 if (TARGET_P9_VECTOR
)
38346 unspec
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op0
, op1
, sel
),
38351 /* Invert the selector with a VNAND if available, else a VNOR.
38352 The VNAND is preferred for future fusion opportunities. */
38353 notx
= gen_rtx_NOT (V16QImode
, sel
);
38354 iorx
= (TARGET_P8_VECTOR
38355 ? gen_rtx_IOR (V16QImode
, notx
, notx
)
38356 : gen_rtx_AND (V16QImode
, notx
, notx
));
38357 emit_insn (gen_rtx_SET (norreg
, iorx
));
38359 /* Permute with operands reversed and adjusted selector. */
38360 unspec
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op1
, op0
, norreg
),
38364 /* Copy into target, possibly by way of a register. */
38365 if (!REG_P (target
))
38367 emit_move_insn (tmp
, unspec
);
38371 emit_move_insn (target
, unspec
);
38374 /* Expand an Altivec constant permutation. Return true if we match
38375 an efficient implementation; false to fall back to VPERM. */
38378 altivec_expand_vec_perm_const (rtx operands
[4])
38380 struct altivec_perm_insn
{
38381 HOST_WIDE_INT mask
;
38382 enum insn_code impl
;
38383 unsigned char perm
[16];
38385 static const struct altivec_perm_insn patterns
[] = {
38386 { OPTION_MASK_ALTIVEC
, CODE_FOR_altivec_vpkuhum_direct
,
38387 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
38388 { OPTION_MASK_ALTIVEC
, CODE_FOR_altivec_vpkuwum_direct
,
38389 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
38390 { OPTION_MASK_ALTIVEC
,
38391 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghb_direct
38392 : CODE_FOR_altivec_vmrglb_direct
),
38393 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
38394 { OPTION_MASK_ALTIVEC
,
38395 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghh_direct
38396 : CODE_FOR_altivec_vmrglh_direct
),
38397 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
38398 { OPTION_MASK_ALTIVEC
,
38399 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghw_direct
38400 : CODE_FOR_altivec_vmrglw_direct
),
38401 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
38402 { OPTION_MASK_ALTIVEC
,
38403 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglb_direct
38404 : CODE_FOR_altivec_vmrghb_direct
),
38405 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
38406 { OPTION_MASK_ALTIVEC
,
38407 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglh_direct
38408 : CODE_FOR_altivec_vmrghh_direct
),
38409 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
38410 { OPTION_MASK_ALTIVEC
,
38411 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglw_direct
38412 : CODE_FOR_altivec_vmrghw_direct
),
38413 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
38414 { OPTION_MASK_P8_VECTOR
, CODE_FOR_p8_vmrgew
,
38415 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
38416 { OPTION_MASK_P8_VECTOR
, CODE_FOR_p8_vmrgow
,
38417 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
38420 unsigned int i
, j
, elt
, which
;
38421 unsigned char perm
[16];
38422 rtx target
, op0
, op1
, sel
, x
;
38425 target
= operands
[0];
38430 /* Unpack the constant selector. */
38431 for (i
= which
= 0; i
< 16; ++i
)
38433 rtx e
= XVECEXP (sel
, 0, i
);
38434 elt
= INTVAL (e
) & 31;
38435 which
|= (elt
< 16 ? 1 : 2);
38439 /* Simplify the constant selector based on operands. */
38443 gcc_unreachable ();
38447 if (!rtx_equal_p (op0
, op1
))
38452 for (i
= 0; i
< 16; ++i
)
38464 /* Look for splat patterns. */
38469 for (i
= 0; i
< 16; ++i
)
38470 if (perm
[i
] != elt
)
38474 if (!BYTES_BIG_ENDIAN
)
38476 emit_insn (gen_altivec_vspltb_direct (target
, op0
, GEN_INT (elt
)));
38482 for (i
= 0; i
< 16; i
+= 2)
38483 if (perm
[i
] != elt
|| perm
[i
+ 1] != elt
+ 1)
38487 int field
= BYTES_BIG_ENDIAN
? elt
/ 2 : 7 - elt
/ 2;
38488 x
= gen_reg_rtx (V8HImode
);
38489 emit_insn (gen_altivec_vsplth_direct (x
, gen_lowpart (V8HImode
, op0
),
38491 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
38498 for (i
= 0; i
< 16; i
+= 4)
38500 || perm
[i
+ 1] != elt
+ 1
38501 || perm
[i
+ 2] != elt
+ 2
38502 || perm
[i
+ 3] != elt
+ 3)
38506 int field
= BYTES_BIG_ENDIAN
? elt
/ 4 : 3 - elt
/ 4;
38507 x
= gen_reg_rtx (V4SImode
);
38508 emit_insn (gen_altivec_vspltw_direct (x
, gen_lowpart (V4SImode
, op0
),
38510 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
38516 /* Look for merge and pack patterns. */
38517 for (j
= 0; j
< ARRAY_SIZE (patterns
); ++j
)
38521 if ((patterns
[j
].mask
& rs6000_isa_flags
) == 0)
38524 elt
= patterns
[j
].perm
[0];
38525 if (perm
[0] == elt
)
38527 else if (perm
[0] == elt
+ 16)
38531 for (i
= 1; i
< 16; ++i
)
38533 elt
= patterns
[j
].perm
[i
];
38535 elt
= (elt
>= 16 ? elt
- 16 : elt
+ 16);
38536 else if (one_vec
&& elt
>= 16)
38538 if (perm
[i
] != elt
)
38543 enum insn_code icode
= patterns
[j
].impl
;
38544 machine_mode omode
= insn_data
[icode
].operand
[0].mode
;
38545 machine_mode imode
= insn_data
[icode
].operand
[1].mode
;
38547 /* For little-endian, don't use vpkuwum and vpkuhum if the
38548 underlying vector type is not V4SI and V8HI, respectively.
38549 For example, using vpkuwum with a V8HI picks up the even
38550 halfwords (BE numbering) when the even halfwords (LE
38551 numbering) are what we need. */
38552 if (!BYTES_BIG_ENDIAN
38553 && icode
== CODE_FOR_altivec_vpkuwum_direct
38554 && ((GET_CODE (op0
) == REG
38555 && GET_MODE (op0
) != V4SImode
)
38556 || (GET_CODE (op0
) == SUBREG
38557 && GET_MODE (XEXP (op0
, 0)) != V4SImode
)))
38559 if (!BYTES_BIG_ENDIAN
38560 && icode
== CODE_FOR_altivec_vpkuhum_direct
38561 && ((GET_CODE (op0
) == REG
38562 && GET_MODE (op0
) != V8HImode
)
38563 || (GET_CODE (op0
) == SUBREG
38564 && GET_MODE (XEXP (op0
, 0)) != V8HImode
)))
38567 /* For little-endian, the two input operands must be swapped
38568 (or swapped back) to ensure proper right-to-left numbering
38570 if (swapped
^ !BYTES_BIG_ENDIAN
)
38571 std::swap (op0
, op1
);
38572 if (imode
!= V16QImode
)
38574 op0
= gen_lowpart (imode
, op0
);
38575 op1
= gen_lowpart (imode
, op1
);
38577 if (omode
== V16QImode
)
38580 x
= gen_reg_rtx (omode
);
38581 emit_insn (GEN_FCN (icode
) (x
, op0
, op1
));
38582 if (omode
!= V16QImode
)
38583 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
38588 if (!BYTES_BIG_ENDIAN
)
38590 altivec_expand_vec_perm_const_le (operands
);
38597 /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation.
38598 Return true if we match an efficient implementation. */
38601 rs6000_expand_vec_perm_const_1 (rtx target
, rtx op0
, rtx op1
,
38602 unsigned char perm0
, unsigned char perm1
)
38606 /* If both selectors come from the same operand, fold to single op. */
38607 if ((perm0
& 2) == (perm1
& 2))
38614 /* If both operands are equal, fold to simpler permutation. */
38615 if (rtx_equal_p (op0
, op1
))
38618 perm1
= (perm1
& 1) + 2;
38620 /* If the first selector comes from the second operand, swap. */
38621 else if (perm0
& 2)
38627 std::swap (op0
, op1
);
38629 /* If the second selector does not come from the second operand, fail. */
38630 else if ((perm1
& 2) == 0)
38634 if (target
!= NULL
)
38636 machine_mode vmode
, dmode
;
38639 vmode
= GET_MODE (target
);
38640 gcc_assert (GET_MODE_NUNITS (vmode
) == 2);
38641 dmode
= mode_for_vector (GET_MODE_INNER (vmode
), 4);
38642 x
= gen_rtx_VEC_CONCAT (dmode
, op0
, op1
);
38643 v
= gen_rtvec (2, GEN_INT (perm0
), GEN_INT (perm1
));
38644 x
= gen_rtx_VEC_SELECT (vmode
, x
, gen_rtx_PARALLEL (VOIDmode
, v
));
38645 emit_insn (gen_rtx_SET (target
, x
));
38651 rs6000_expand_vec_perm_const (rtx operands
[4])
38653 rtx target
, op0
, op1
, sel
;
38654 unsigned char perm0
, perm1
;
38656 target
= operands
[0];
38661 /* Unpack the constant selector. */
38662 perm0
= INTVAL (XVECEXP (sel
, 0, 0)) & 3;
38663 perm1
= INTVAL (XVECEXP (sel
, 0, 1)) & 3;
38665 return rs6000_expand_vec_perm_const_1 (target
, op0
, op1
, perm0
, perm1
);
38668 /* Test whether a constant permutation is supported. */
38671 rs6000_vectorize_vec_perm_const_ok (machine_mode vmode
,
38672 const unsigned char *sel
)
38674 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
38675 if (TARGET_ALTIVEC
)
38678 /* Check for ps_merge* or evmerge* insns. */
38679 if ((TARGET_PAIRED_FLOAT
&& vmode
== V2SFmode
)
38680 || (TARGET_SPE
&& vmode
== V2SImode
))
38682 rtx op0
= gen_raw_REG (vmode
, LAST_VIRTUAL_REGISTER
+ 1);
38683 rtx op1
= gen_raw_REG (vmode
, LAST_VIRTUAL_REGISTER
+ 2);
38684 return rs6000_expand_vec_perm_const_1 (NULL
, op0
, op1
, sel
[0], sel
[1]);
38690 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
38693 rs6000_do_expand_vec_perm (rtx target
, rtx op0
, rtx op1
,
38694 machine_mode vmode
, unsigned nelt
, rtx perm
[])
38696 machine_mode imode
;
38700 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
)
38701 imode
= mode_for_vector
38702 (int_mode_for_mode (GET_MODE_INNER (vmode
)).require (), nelt
);
38704 x
= gen_rtx_CONST_VECTOR (imode
, gen_rtvec_v (nelt
, perm
));
38705 x
= expand_vec_perm (vmode
, op0
, op1
, x
, target
);
38707 emit_move_insn (target
, x
);
38710 /* Expand an extract even operation. */
38713 rs6000_expand_extract_even (rtx target
, rtx op0
, rtx op1
)
38715 machine_mode vmode
= GET_MODE (target
);
38716 unsigned i
, nelt
= GET_MODE_NUNITS (vmode
);
38719 for (i
= 0; i
< nelt
; i
++)
38720 perm
[i
] = GEN_INT (i
* 2);
38722 rs6000_do_expand_vec_perm (target
, op0
, op1
, vmode
, nelt
, perm
);
38725 /* Expand a vector interleave operation. */
38728 rs6000_expand_interleave (rtx target
, rtx op0
, rtx op1
, bool highp
)
38730 machine_mode vmode
= GET_MODE (target
);
38731 unsigned i
, high
, nelt
= GET_MODE_NUNITS (vmode
);
38734 high
= (highp
? 0 : nelt
/ 2);
38735 for (i
= 0; i
< nelt
/ 2; i
++)
38737 perm
[i
* 2] = GEN_INT (i
+ high
);
38738 perm
[i
* 2 + 1] = GEN_INT (i
+ nelt
+ high
);
38741 rs6000_do_expand_vec_perm (target
, op0
, op1
, vmode
, nelt
, perm
);
38744 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
38746 rs6000_scale_v2df (rtx tgt
, rtx src
, int scale
)
38748 HOST_WIDE_INT
hwi_scale (scale
);
38749 REAL_VALUE_TYPE r_pow
;
38750 rtvec v
= rtvec_alloc (2);
38752 rtx scale_vec
= gen_reg_rtx (V2DFmode
);
38753 (void)real_powi (&r_pow
, DFmode
, &dconst2
, hwi_scale
);
38754 elt
= const_double_from_real_value (r_pow
, DFmode
);
38755 RTVEC_ELT (v
, 0) = elt
;
38756 RTVEC_ELT (v
, 1) = elt
;
38757 rs6000_expand_vector_init (scale_vec
, gen_rtx_PARALLEL (V2DFmode
, v
));
38758 emit_insn (gen_mulv2df3 (tgt
, src
, scale_vec
));
38761 /* Return an RTX representing where to find the function value of a
38762 function returning MODE. */
38764 rs6000_complex_function_value (machine_mode mode
)
38766 unsigned int regno
;
38768 machine_mode inner
= GET_MODE_INNER (mode
);
38769 unsigned int inner_bytes
= GET_MODE_UNIT_SIZE (mode
);
38771 if (TARGET_FLOAT128_TYPE
38773 || (mode
== TCmode
&& TARGET_IEEEQUAD
)))
38774 regno
= ALTIVEC_ARG_RETURN
;
38776 else if (FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
&& TARGET_FPRS
)
38777 regno
= FP_ARG_RETURN
;
38781 regno
= GP_ARG_RETURN
;
38783 /* 32-bit is OK since it'll go in r3/r4. */
38784 if (TARGET_32BIT
&& inner_bytes
>= 4)
38785 return gen_rtx_REG (mode
, regno
);
38788 if (inner_bytes
>= 8)
38789 return gen_rtx_REG (mode
, regno
);
38791 r1
= gen_rtx_EXPR_LIST (inner
, gen_rtx_REG (inner
, regno
),
38793 r2
= gen_rtx_EXPR_LIST (inner
, gen_rtx_REG (inner
, regno
+ 1),
38794 GEN_INT (inner_bytes
));
38795 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, r1
, r2
));
38798 /* Return an rtx describing a return value of MODE as a PARALLEL
38799 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
38800 stride REG_STRIDE. */
38803 rs6000_parallel_return (machine_mode mode
,
38804 int n_elts
, machine_mode elt_mode
,
38805 unsigned int regno
, unsigned int reg_stride
)
38807 rtx par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (n_elts
));
38810 for (i
= 0; i
< n_elts
; i
++)
38812 rtx r
= gen_rtx_REG (elt_mode
, regno
);
38813 rtx off
= GEN_INT (i
* GET_MODE_SIZE (elt_mode
));
38814 XVECEXP (par
, 0, i
) = gen_rtx_EXPR_LIST (VOIDmode
, r
, off
);
38815 regno
+= reg_stride
;
38821 /* Target hook for TARGET_FUNCTION_VALUE.
38823 On the SPE, both FPs and vectors are returned in r3.
38825 On RS/6000 an integer value is in r3 and a floating-point value is in
38826 fp1, unless -msoft-float. */
38829 rs6000_function_value (const_tree valtype
,
38830 const_tree fn_decl_or_type ATTRIBUTE_UNUSED
,
38831 bool outgoing ATTRIBUTE_UNUSED
)
38834 unsigned int regno
;
38835 machine_mode elt_mode
;
38838 /* Special handling for structs in darwin64. */
38840 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype
), valtype
))
38842 CUMULATIVE_ARGS valcum
;
38846 valcum
.fregno
= FP_ARG_MIN_REG
;
38847 valcum
.vregno
= ALTIVEC_ARG_MIN_REG
;
38848 /* Do a trial code generation as if this were going to be passed as
38849 an argument; if any part goes in memory, we return NULL. */
38850 valret
= rs6000_darwin64_record_arg (&valcum
, valtype
, true, /* retval= */ true);
38853 /* Otherwise fall through to standard ABI rules. */
38856 mode
= TYPE_MODE (valtype
);
38858 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
38859 if (rs6000_discover_homogeneous_aggregate (mode
, valtype
, &elt_mode
, &n_elts
))
38861 int first_reg
, n_regs
;
38863 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode
))
38865 /* _Decimal128 must use even/odd register pairs. */
38866 first_reg
= (elt_mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
38867 n_regs
= (GET_MODE_SIZE (elt_mode
) + 7) >> 3;
38871 first_reg
= ALTIVEC_ARG_RETURN
;
38875 return rs6000_parallel_return (mode
, n_elts
, elt_mode
, first_reg
, n_regs
);
38878 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
38879 if (TARGET_32BIT
&& TARGET_POWERPC64
)
38888 int count
= GET_MODE_SIZE (mode
) / 4;
38889 return rs6000_parallel_return (mode
, count
, SImode
, GP_ARG_RETURN
, 1);
38892 if ((INTEGRAL_TYPE_P (valtype
)
38893 && GET_MODE_BITSIZE (mode
) < (TARGET_32BIT
? 32 : 64))
38894 || POINTER_TYPE_P (valtype
))
38895 mode
= TARGET_32BIT
? SImode
: DImode
;
38897 if (DECIMAL_FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
&& TARGET_FPRS
)
38898 /* _Decimal128 must use an even/odd register pair. */
38899 regno
= (mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
38900 else if (SCALAR_FLOAT_TYPE_P (valtype
) && TARGET_HARD_FLOAT
&& TARGET_FPRS
38901 && !FLOAT128_VECTOR_P (mode
)
38902 && ((TARGET_SINGLE_FLOAT
&& (mode
== SFmode
)) || TARGET_DOUBLE_FLOAT
))
38903 regno
= FP_ARG_RETURN
;
38904 else if (TREE_CODE (valtype
) == COMPLEX_TYPE
38905 && targetm
.calls
.split_complex_arg
)
38906 return rs6000_complex_function_value (mode
);
38907 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
38908 return register is used in both cases, and we won't see V2DImode/V2DFmode
38909 for pure altivec, combine the two cases. */
38910 else if ((TREE_CODE (valtype
) == VECTOR_TYPE
|| FLOAT128_VECTOR_P (mode
))
38911 && TARGET_ALTIVEC
&& TARGET_ALTIVEC_ABI
38912 && ALTIVEC_OR_VSX_VECTOR_MODE (mode
))
38913 regno
= ALTIVEC_ARG_RETURN
;
38914 else if (TARGET_E500_DOUBLE
&& TARGET_HARD_FLOAT
38915 && (mode
== DFmode
|| mode
== DCmode
38916 || FLOAT128_IBM_P (mode
) || mode
== TCmode
))
38917 return spe_build_register_parallel (mode
, GP_ARG_RETURN
);
38919 regno
= GP_ARG_RETURN
;
38921 return gen_rtx_REG (mode
, regno
);
38924 /* Define how to find the value returned by a library function
38925 assuming the value has mode MODE. */
38927 rs6000_libcall_value (machine_mode mode
)
38929 unsigned int regno
;
38931 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
38932 if (TARGET_32BIT
&& TARGET_POWERPC64
&& mode
== DImode
)
38933 return rs6000_parallel_return (mode
, 2, SImode
, GP_ARG_RETURN
, 1);
38935 if (DECIMAL_FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
&& TARGET_FPRS
)
38936 /* _Decimal128 must use an even/odd register pair. */
38937 regno
= (mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
38938 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode
)
38939 && TARGET_HARD_FLOAT
&& TARGET_FPRS
38940 && ((TARGET_SINGLE_FLOAT
&& mode
== SFmode
) || TARGET_DOUBLE_FLOAT
))
38941 regno
= FP_ARG_RETURN
;
38942 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
38943 return register is used in both cases, and we won't see V2DImode/V2DFmode
38944 for pure altivec, combine the two cases. */
38945 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode
)
38946 && TARGET_ALTIVEC
&& TARGET_ALTIVEC_ABI
)
38947 regno
= ALTIVEC_ARG_RETURN
;
38948 else if (COMPLEX_MODE_P (mode
) && targetm
.calls
.split_complex_arg
)
38949 return rs6000_complex_function_value (mode
);
38950 else if (TARGET_E500_DOUBLE
&& TARGET_HARD_FLOAT
38951 && (mode
== DFmode
|| mode
== DCmode
38952 || FLOAT128_IBM_P (mode
) || mode
== TCmode
))
38953 return spe_build_register_parallel (mode
, GP_ARG_RETURN
);
38955 regno
= GP_ARG_RETURN
;
38957 return gen_rtx_REG (mode
, regno
);
38961 /* Return true if we use LRA instead of reload pass. */
38963 rs6000_lra_p (void)
38968 /* Compute register pressure classes. We implement the target hook to avoid
38969 IRA picking something like NON_SPECIAL_REGS as a pressure class, which can
38970 lead to incorrect estimates of number of available registers and therefor
38971 increased register pressure/spill. */
38973 rs6000_compute_pressure_classes (enum reg_class
*pressure_classes
)
38978 pressure_classes
[n
++] = GENERAL_REGS
;
38980 pressure_classes
[n
++] = VSX_REGS
;
38983 if (TARGET_ALTIVEC
)
38984 pressure_classes
[n
++] = ALTIVEC_REGS
;
38985 if (TARGET_HARD_FLOAT
&& TARGET_FPRS
)
38986 pressure_classes
[n
++] = FLOAT_REGS
;
38988 pressure_classes
[n
++] = CR_REGS
;
38989 pressure_classes
[n
++] = SPECIAL_REGS
;
38994 /* Given FROM and TO register numbers, say whether this elimination is allowed.
38995 Frame pointer elimination is automatically handled.
38997 For the RS/6000, if frame pointer elimination is being done, we would like
38998 to convert ap into fp, not sp.
39000 We need r30 if -mminimal-toc was specified, and there are constant pool
39004 rs6000_can_eliminate (const int from
, const int to
)
39006 return (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
39007 ? ! frame_pointer_needed
39008 : from
== RS6000_PIC_OFFSET_TABLE_REGNUM
39009 ? ! TARGET_MINIMAL_TOC
|| TARGET_NO_TOC
39010 || constant_pool_empty_p ()
39014 /* Define the offset between two registers, FROM to be eliminated and its
39015 replacement TO, at the start of a routine. */
39017 rs6000_initial_elimination_offset (int from
, int to
)
39019 rs6000_stack_t
*info
= rs6000_stack_info ();
39020 HOST_WIDE_INT offset
;
39022 if (from
== HARD_FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
39023 offset
= info
->push_p
? 0 : -info
->total_size
;
39024 else if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
39026 offset
= info
->push_p
? 0 : -info
->total_size
;
39027 if (FRAME_GROWS_DOWNWARD
)
39028 offset
+= info
->fixed_size
+ info
->vars_size
+ info
->parm_size
;
39030 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
39031 offset
= FRAME_GROWS_DOWNWARD
39032 ? info
->fixed_size
+ info
->vars_size
+ info
->parm_size
39034 else if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
39035 offset
= info
->total_size
;
39036 else if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
39037 offset
= info
->push_p
? info
->total_size
: 0;
39038 else if (from
== RS6000_PIC_OFFSET_TABLE_REGNUM
)
39041 gcc_unreachable ();
39047 rs6000_dwarf_register_span (rtx reg
)
39051 unsigned regno
= REGNO (reg
);
39052 machine_mode mode
= GET_MODE (reg
);
39056 && (SPE_VECTOR_MODE (GET_MODE (reg
))
39057 || (TARGET_E500_DOUBLE
&& FLOAT_MODE_P (mode
)
39058 && mode
!= SFmode
&& mode
!= SDmode
&& mode
!= SCmode
)))
39063 regno
= REGNO (reg
);
39065 /* The duality of the SPE register size wreaks all kinds of havoc.
39066 This is a way of distinguishing r0 in 32-bits from r0 in
39068 words
= (GET_MODE_SIZE (mode
) + UNITS_PER_FP_WORD
- 1) / UNITS_PER_FP_WORD
;
39069 gcc_assert (words
<= 4);
39070 for (i
= 0; i
< words
; i
++, regno
++)
39072 if (BYTES_BIG_ENDIAN
)
39074 parts
[2 * i
] = gen_rtx_REG (SImode
, regno
+ FIRST_SPE_HIGH_REGNO
);
39075 parts
[2 * i
+ 1] = gen_rtx_REG (SImode
, regno
);
39079 parts
[2 * i
] = gen_rtx_REG (SImode
, regno
);
39080 parts
[2 * i
+ 1] = gen_rtx_REG (SImode
, regno
+ FIRST_SPE_HIGH_REGNO
);
39084 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (words
* 2, parts
));
39087 /* Fill in sizes for SPE register high parts in table used by unwinder. */
39090 rs6000_init_dwarf_reg_sizes_extra (tree address
)
39095 machine_mode mode
= TYPE_MODE (char_type_node
);
39096 rtx addr
= expand_expr (address
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
39097 rtx mem
= gen_rtx_MEM (BLKmode
, addr
);
39098 rtx value
= gen_int_mode (4, mode
);
39100 for (i
= FIRST_SPE_HIGH_REGNO
; i
< LAST_SPE_HIGH_REGNO
+1; i
++)
39102 int column
= DWARF_REG_TO_UNWIND_COLUMN
39103 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i
), true));
39104 HOST_WIDE_INT offset
= column
* GET_MODE_SIZE (mode
);
39106 emit_move_insn (adjust_address (mem
, mode
, offset
), value
);
39110 if (TARGET_MACHO
&& ! TARGET_ALTIVEC
)
39113 machine_mode mode
= TYPE_MODE (char_type_node
);
39114 rtx addr
= expand_expr (address
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
39115 rtx mem
= gen_rtx_MEM (BLKmode
, addr
);
39116 rtx value
= gen_int_mode (16, mode
);
39118 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
39119 The unwinder still needs to know the size of Altivec registers. */
39121 for (i
= FIRST_ALTIVEC_REGNO
; i
< LAST_ALTIVEC_REGNO
+1; i
++)
39123 int column
= DWARF_REG_TO_UNWIND_COLUMN
39124 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i
), true));
39125 HOST_WIDE_INT offset
= column
* GET_MODE_SIZE (mode
);
39127 emit_move_insn (adjust_address (mem
, mode
, offset
), value
);
39132 /* Map internal gcc register numbers to debug format register numbers.
39133 FORMAT specifies the type of debug register number to use:
39134 0 -- debug information, except for frame-related sections
39135 1 -- DWARF .debug_frame section
39136 2 -- DWARF .eh_frame section */
39139 rs6000_dbx_register_number (unsigned int regno
, unsigned int format
)
39141 /* We never use the GCC internal number for SPE high registers.
39142 Those are mapped to the 1200..1231 range for all debug formats. */
39143 if (SPE_HIGH_REGNO_P (regno
))
39144 return regno
- FIRST_SPE_HIGH_REGNO
+ 1200;
39146 /* Except for the above, we use the internal number for non-DWARF
39147 debug information, and also for .eh_frame. */
39148 if ((format
== 0 && write_symbols
!= DWARF2_DEBUG
) || format
== 2)
39151 /* On some platforms, we use the standard DWARF register
39152 numbering for .debug_info and .debug_frame. */
39153 #ifdef RS6000_USE_DWARF_NUMBERING
39156 if (regno
== LR_REGNO
)
39158 if (regno
== CTR_REGNO
)
39160 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
39161 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
39162 The actual code emitted saves the whole of CR, so we map CR2_REGNO
39163 to the DWARF reg for CR. */
39164 if (format
== 1 && regno
== CR2_REGNO
)
39166 if (CR_REGNO_P (regno
))
39167 return regno
- CR0_REGNO
+ 86;
39168 if (regno
== CA_REGNO
)
39169 return 101; /* XER */
39170 if (ALTIVEC_REGNO_P (regno
))
39171 return regno
- FIRST_ALTIVEC_REGNO
+ 1124;
39172 if (regno
== VRSAVE_REGNO
)
39174 if (regno
== VSCR_REGNO
)
39176 if (regno
== SPE_ACC_REGNO
)
39178 if (regno
== SPEFSCR_REGNO
)
39184 /* target hook eh_return_filter_mode */
39185 static scalar_int_mode
39186 rs6000_eh_return_filter_mode (void)
39188 return TARGET_32BIT
? SImode
: word_mode
;
39191 /* Target hook for scalar_mode_supported_p. */
39193 rs6000_scalar_mode_supported_p (scalar_mode mode
)
39195 /* -m32 does not support TImode. This is the default, from
39196 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
39197 same ABI as for -m32. But default_scalar_mode_supported_p allows
39198 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
39199 for -mpowerpc64. */
39200 if (TARGET_32BIT
&& mode
== TImode
)
39203 if (DECIMAL_FLOAT_MODE_P (mode
))
39204 return default_decimal_float_supported_p ();
39205 else if (TARGET_FLOAT128_TYPE
&& (mode
== KFmode
|| mode
== IFmode
))
39208 return default_scalar_mode_supported_p (mode
);
39211 /* Target hook for vector_mode_supported_p. */
39213 rs6000_vector_mode_supported_p (machine_mode mode
)
39216 if (TARGET_PAIRED_FLOAT
&& PAIRED_VECTOR_MODE (mode
))
39219 if (TARGET_SPE
&& SPE_VECTOR_MODE (mode
))
39222 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
39223 128-bit, the compiler might try to widen IEEE 128-bit to IBM
39225 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
) && !FLOAT128_IEEE_P (mode
))
39232 /* Target hook for floatn_mode. */
39233 static opt_scalar_float_mode
39234 rs6000_floatn_mode (int n
, bool extended
)
39244 if (TARGET_FLOAT128_KEYWORD
)
39245 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
39247 return opt_scalar_float_mode ();
39250 return opt_scalar_float_mode ();
39253 /* Those are the only valid _FloatNx types. */
39254 gcc_unreachable ();
39268 if (TARGET_FLOAT128_KEYWORD
)
39269 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
39271 return opt_scalar_float_mode ();
39274 return opt_scalar_float_mode ();
39280 /* Target hook for c_mode_for_suffix. */
39281 static machine_mode
39282 rs6000_c_mode_for_suffix (char suffix
)
39284 if (TARGET_FLOAT128_TYPE
)
39286 if (suffix
== 'q' || suffix
== 'Q')
39287 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
39289 /* At the moment, we are not defining a suffix for IBM extended double.
39290 If/when the default for -mabi=ieeelongdouble is changed, and we want
39291 to support __ibm128 constants in legacy library code, we may need to
39292 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
39293 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
39294 __float80 constants. */
39300 /* Target hook for invalid_arg_for_unprototyped_fn. */
39301 static const char *
39302 invalid_arg_for_unprototyped_fn (const_tree typelist
, const_tree funcdecl
, const_tree val
)
39304 return (!rs6000_darwin64_abi
39306 && TREE_CODE (TREE_TYPE (val
)) == VECTOR_TYPE
39307 && (funcdecl
== NULL_TREE
39308 || (TREE_CODE (funcdecl
) == FUNCTION_DECL
39309 && DECL_BUILT_IN_CLASS (funcdecl
) != BUILT_IN_MD
)))
39310 ? N_("AltiVec argument passed to unprototyped function")
39314 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
39315 setup by using __stack_chk_fail_local hidden function instead of
39316 calling __stack_chk_fail directly. Otherwise it is better to call
39317 __stack_chk_fail directly. */
39319 static tree ATTRIBUTE_UNUSED
39320 rs6000_stack_protect_fail (void)
39322 return (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
&& flag_pic
)
39323 ? default_hidden_stack_protect_fail ()
39324 : default_external_stack_protect_fail ();
39328 rs6000_final_prescan_insn (rtx_insn
*insn
, rtx
*operand ATTRIBUTE_UNUSED
,
39329 int num_operands ATTRIBUTE_UNUSED
)
39331 if (rs6000_warn_cell_microcode
)
39334 int insn_code_number
= recog_memoized (insn
);
39335 location_t location
= INSN_LOCATION (insn
);
39337 /* Punt on insns we cannot recognize. */
39338 if (insn_code_number
< 0)
39341 /* get_insn_template can modify recog_data, so save and restore it. */
39342 struct recog_data_d recog_data_save
= recog_data
;
39343 for (int i
= 0; i
< recog_data
.n_operands
; i
++)
39344 recog_data
.operand
[i
] = copy_rtx (recog_data
.operand
[i
]);
39345 temp
= get_insn_template (insn_code_number
, insn
);
39346 recog_data
= recog_data_save
;
39348 if (get_attr_cell_micro (insn
) == CELL_MICRO_ALWAYS
)
39349 warning_at (location
, OPT_mwarn_cell_microcode
,
39350 "emitting microcode insn %s\t[%s] #%d",
39351 temp
, insn_data
[INSN_CODE (insn
)].name
, INSN_UID (insn
));
39352 else if (get_attr_cell_micro (insn
) == CELL_MICRO_CONDITIONAL
)
39353 warning_at (location
, OPT_mwarn_cell_microcode
,
39354 "emitting conditional microcode insn %s\t[%s] #%d",
39355 temp
, insn_data
[INSN_CODE (insn
)].name
, INSN_UID (insn
));
39359 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
39362 static unsigned HOST_WIDE_INT
39363 rs6000_asan_shadow_offset (void)
39365 return (unsigned HOST_WIDE_INT
) 1 << (TARGET_64BIT
? 41 : 29);
39369 /* Mask options that we want to support inside of attribute((target)) and
39370 #pragma GCC target operations. Note, we do not include things like
39371 64/32-bit, endianness, hard/soft floating point, etc. that would have
39372 different calling sequences. */
39374 struct rs6000_opt_mask
{
39375 const char *name
; /* option name */
39376 HOST_WIDE_INT mask
; /* mask to set */
39377 bool invert
; /* invert sense of mask */
39378 bool valid_target
; /* option is a target option */
39381 static struct rs6000_opt_mask
const rs6000_opt_masks
[] =
39383 { "altivec", OPTION_MASK_ALTIVEC
, false, true },
39384 { "cmpb", OPTION_MASK_CMPB
, false, true },
39385 { "crypto", OPTION_MASK_CRYPTO
, false, true },
39386 { "direct-move", OPTION_MASK_DIRECT_MOVE
, false, true },
39387 { "dlmzb", OPTION_MASK_DLMZB
, false, true },
39388 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX
,
39390 { "float128", OPTION_MASK_FLOAT128_KEYWORD
, false, false },
39391 { "float128-type", OPTION_MASK_FLOAT128_TYPE
, false, false },
39392 { "float128-hardware", OPTION_MASK_FLOAT128_HW
, false, false },
39393 { "fprnd", OPTION_MASK_FPRND
, false, true },
39394 { "hard-dfp", OPTION_MASK_DFP
, false, true },
39395 { "htm", OPTION_MASK_HTM
, false, true },
39396 { "isel", OPTION_MASK_ISEL
, false, true },
39397 { "mfcrf", OPTION_MASK_MFCRF
, false, true },
39398 { "mfpgpr", OPTION_MASK_MFPGPR
, false, true },
39399 { "modulo", OPTION_MASK_MODULO
, false, true },
39400 { "mulhw", OPTION_MASK_MULHW
, false, true },
39401 { "multiple", OPTION_MASK_MULTIPLE
, false, true },
39402 { "popcntb", OPTION_MASK_POPCNTB
, false, true },
39403 { "popcntd", OPTION_MASK_POPCNTD
, false, true },
39404 { "power8-fusion", OPTION_MASK_P8_FUSION
, false, true },
39405 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN
, false, true },
39406 { "power8-vector", OPTION_MASK_P8_VECTOR
, false, true },
39407 { "power9-dform-scalar", OPTION_MASK_P9_DFORM_SCALAR
, false, true },
39408 { "power9-dform-vector", OPTION_MASK_P9_DFORM_VECTOR
, false, true },
39409 { "power9-fusion", OPTION_MASK_P9_FUSION
, false, true },
39410 { "power9-minmax", OPTION_MASK_P9_MINMAX
, false, true },
39411 { "power9-misc", OPTION_MASK_P9_MISC
, false, true },
39412 { "power9-vector", OPTION_MASK_P9_VECTOR
, false, true },
39413 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT
, false, true },
39414 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT
, false, true },
39415 { "quad-memory", OPTION_MASK_QUAD_MEMORY
, false, true },
39416 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC
, false, true },
39417 { "recip-precision", OPTION_MASK_RECIP_PRECISION
, false, true },
39418 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT
, false, true },
39419 { "string", OPTION_MASK_STRING
, false, true },
39420 { "toc-fusion", OPTION_MASK_TOC_FUSION
, false, true },
39421 { "update", OPTION_MASK_NO_UPDATE
, true , true },
39422 { "upper-regs-di", OPTION_MASK_UPPER_REGS_DI
, false, true },
39423 { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF
, false, true },
39424 { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF
, false, true },
39425 { "vsx", OPTION_MASK_VSX
, false, true },
39426 { "vsx-small-integer", OPTION_MASK_VSX_SMALL_INTEGER
, false, true },
39427 { "vsx-timode", OPTION_MASK_VSX_TIMODE
, false, true },
39428 #ifdef OPTION_MASK_64BIT
39430 { "aix64", OPTION_MASK_64BIT
, false, false },
39431 { "aix32", OPTION_MASK_64BIT
, true, false },
39433 { "64", OPTION_MASK_64BIT
, false, false },
39434 { "32", OPTION_MASK_64BIT
, true, false },
39437 #ifdef OPTION_MASK_EABI
39438 { "eabi", OPTION_MASK_EABI
, false, false },
39440 #ifdef OPTION_MASK_LITTLE_ENDIAN
39441 { "little", OPTION_MASK_LITTLE_ENDIAN
, false, false },
39442 { "big", OPTION_MASK_LITTLE_ENDIAN
, true, false },
39444 #ifdef OPTION_MASK_RELOCATABLE
39445 { "relocatable", OPTION_MASK_RELOCATABLE
, false, false },
39447 #ifdef OPTION_MASK_STRICT_ALIGN
39448 { "strict-align", OPTION_MASK_STRICT_ALIGN
, false, false },
39450 { "soft-float", OPTION_MASK_SOFT_FLOAT
, false, false },
39451 { "string", OPTION_MASK_STRING
, false, false },
39454 /* Builtin mask mapping for printing the flags. */
39455 static struct rs6000_opt_mask
const rs6000_builtin_mask_names
[] =
39457 { "altivec", RS6000_BTM_ALTIVEC
, false, false },
39458 { "vsx", RS6000_BTM_VSX
, false, false },
39459 { "spe", RS6000_BTM_SPE
, false, false },
39460 { "paired", RS6000_BTM_PAIRED
, false, false },
39461 { "fre", RS6000_BTM_FRE
, false, false },
39462 { "fres", RS6000_BTM_FRES
, false, false },
39463 { "frsqrte", RS6000_BTM_FRSQRTE
, false, false },
39464 { "frsqrtes", RS6000_BTM_FRSQRTES
, false, false },
39465 { "popcntd", RS6000_BTM_POPCNTD
, false, false },
39466 { "cell", RS6000_BTM_CELL
, false, false },
39467 { "power8-vector", RS6000_BTM_P8_VECTOR
, false, false },
39468 { "power9-vector", RS6000_BTM_P9_VECTOR
, false, false },
39469 { "power9-misc", RS6000_BTM_P9_MISC
, false, false },
39470 { "crypto", RS6000_BTM_CRYPTO
, false, false },
39471 { "htm", RS6000_BTM_HTM
, false, false },
39472 { "hard-dfp", RS6000_BTM_DFP
, false, false },
39473 { "hard-float", RS6000_BTM_HARD_FLOAT
, false, false },
39474 { "long-double-128", RS6000_BTM_LDBL128
, false, false },
39475 { "float128", RS6000_BTM_FLOAT128
, false, false },
39478 /* Option variables that we want to support inside attribute((target)) and
39479 #pragma GCC target operations. */
39481 struct rs6000_opt_var
{
39482 const char *name
; /* option name */
39483 size_t global_offset
; /* offset of the option in global_options. */
39484 size_t target_offset
; /* offset of the option in target options. */
39487 static struct rs6000_opt_var
const rs6000_opt_vars
[] =
39490 offsetof (struct gcc_options
, x_TARGET_FRIZ
),
39491 offsetof (struct cl_target_option
, x_TARGET_FRIZ
), },
39492 { "avoid-indexed-addresses",
39493 offsetof (struct gcc_options
, x_TARGET_AVOID_XFORM
),
39494 offsetof (struct cl_target_option
, x_TARGET_AVOID_XFORM
) },
39496 offsetof (struct gcc_options
, x_rs6000_paired_float
),
39497 offsetof (struct cl_target_option
, x_rs6000_paired_float
), },
39499 offsetof (struct gcc_options
, x_rs6000_default_long_calls
),
39500 offsetof (struct cl_target_option
, x_rs6000_default_long_calls
), },
39501 { "optimize-swaps",
39502 offsetof (struct gcc_options
, x_rs6000_optimize_swaps
),
39503 offsetof (struct cl_target_option
, x_rs6000_optimize_swaps
), },
39504 { "allow-movmisalign",
39505 offsetof (struct gcc_options
, x_TARGET_ALLOW_MOVMISALIGN
),
39506 offsetof (struct cl_target_option
, x_TARGET_ALLOW_MOVMISALIGN
), },
39507 { "allow-df-permute",
39508 offsetof (struct gcc_options
, x_TARGET_ALLOW_DF_PERMUTE
),
39509 offsetof (struct cl_target_option
, x_TARGET_ALLOW_DF_PERMUTE
), },
39511 offsetof (struct gcc_options
, x_TARGET_SCHED_GROUPS
),
39512 offsetof (struct cl_target_option
, x_TARGET_SCHED_GROUPS
), },
39514 offsetof (struct gcc_options
, x_TARGET_ALWAYS_HINT
),
39515 offsetof (struct cl_target_option
, x_TARGET_ALWAYS_HINT
), },
39516 { "align-branch-targets",
39517 offsetof (struct gcc_options
, x_TARGET_ALIGN_BRANCH_TARGETS
),
39518 offsetof (struct cl_target_option
, x_TARGET_ALIGN_BRANCH_TARGETS
), },
39519 { "vectorize-builtins",
39520 offsetof (struct gcc_options
, x_TARGET_VECTORIZE_BUILTINS
),
39521 offsetof (struct cl_target_option
, x_TARGET_VECTORIZE_BUILTINS
), },
39523 offsetof (struct gcc_options
, x_tls_markers
),
39524 offsetof (struct cl_target_option
, x_tls_markers
), },
39526 offsetof (struct gcc_options
, x_TARGET_SCHED_PROLOG
),
39527 offsetof (struct cl_target_option
, x_TARGET_SCHED_PROLOG
), },
39529 offsetof (struct gcc_options
, x_TARGET_SCHED_PROLOG
),
39530 offsetof (struct cl_target_option
, x_TARGET_SCHED_PROLOG
), },
39531 { "gen-cell-microcode",
39532 offsetof (struct gcc_options
, x_rs6000_gen_cell_microcode
),
39533 offsetof (struct cl_target_option
, x_rs6000_gen_cell_microcode
), },
39534 { "warn-cell-microcode",
39535 offsetof (struct gcc_options
, x_rs6000_warn_cell_microcode
),
39536 offsetof (struct cl_target_option
, x_rs6000_warn_cell_microcode
), },
39539 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
39540 parsing. Return true if there were no errors. */
39543 rs6000_inner_target_options (tree args
, bool attr_p
)
39547 if (args
== NULL_TREE
)
39550 else if (TREE_CODE (args
) == STRING_CST
)
39552 char *p
= ASTRDUP (TREE_STRING_POINTER (args
));
39555 while ((q
= strtok (p
, ",")) != NULL
)
39557 bool error_p
= false;
39558 bool not_valid_p
= false;
39559 const char *cpu_opt
= NULL
;
39562 if (strncmp (q
, "cpu=", 4) == 0)
39564 int cpu_index
= rs6000_cpu_name_lookup (q
+4);
39565 if (cpu_index
>= 0)
39566 rs6000_cpu_index
= cpu_index
;
39573 else if (strncmp (q
, "tune=", 5) == 0)
39575 int tune_index
= rs6000_cpu_name_lookup (q
+5);
39576 if (tune_index
>= 0)
39577 rs6000_tune_index
= tune_index
;
39587 bool invert
= false;
39591 if (strncmp (r
, "no-", 3) == 0)
39597 for (i
= 0; i
< ARRAY_SIZE (rs6000_opt_masks
); i
++)
39598 if (strcmp (r
, rs6000_opt_masks
[i
].name
) == 0)
39600 HOST_WIDE_INT mask
= rs6000_opt_masks
[i
].mask
;
39602 if (!rs6000_opt_masks
[i
].valid_target
)
39603 not_valid_p
= true;
39607 rs6000_isa_flags_explicit
|= mask
;
39609 /* VSX needs altivec, so -mvsx automagically sets
39610 altivec and disables -mavoid-indexed-addresses. */
39613 if (mask
== OPTION_MASK_VSX
)
39615 mask
|= OPTION_MASK_ALTIVEC
;
39616 TARGET_AVOID_XFORM
= 0;
39620 if (rs6000_opt_masks
[i
].invert
)
39624 rs6000_isa_flags
&= ~mask
;
39626 rs6000_isa_flags
|= mask
;
39631 if (error_p
&& !not_valid_p
)
39633 for (i
= 0; i
< ARRAY_SIZE (rs6000_opt_vars
); i
++)
39634 if (strcmp (r
, rs6000_opt_vars
[i
].name
) == 0)
39636 size_t j
= rs6000_opt_vars
[i
].global_offset
;
39637 *((int *) ((char *)&global_options
+ j
)) = !invert
;
39639 not_valid_p
= false;
39647 const char *eprefix
, *esuffix
;
39652 eprefix
= "__attribute__((__target__(";
39657 eprefix
= "#pragma GCC target ";
39662 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt
, eprefix
,
39664 else if (not_valid_p
)
39665 error ("%s\"%s\"%s is not allowed", eprefix
, q
, esuffix
);
39667 error ("%s\"%s\"%s is invalid", eprefix
, q
, esuffix
);
39672 else if (TREE_CODE (args
) == TREE_LIST
)
39676 tree value
= TREE_VALUE (args
);
39679 bool ret2
= rs6000_inner_target_options (value
, attr_p
);
39683 args
= TREE_CHAIN (args
);
39685 while (args
!= NULL_TREE
);
39690 error ("attribute %<target%> argument not a string");
39697 /* Print out the target options as a list for -mdebug=target. */
39700 rs6000_debug_target_options (tree args
, const char *prefix
)
39702 if (args
== NULL_TREE
)
39703 fprintf (stderr
, "%s<NULL>", prefix
);
39705 else if (TREE_CODE (args
) == STRING_CST
)
39707 char *p
= ASTRDUP (TREE_STRING_POINTER (args
));
39710 while ((q
= strtok (p
, ",")) != NULL
)
39713 fprintf (stderr
, "%s\"%s\"", prefix
, q
);
39718 else if (TREE_CODE (args
) == TREE_LIST
)
39722 tree value
= TREE_VALUE (args
);
39725 rs6000_debug_target_options (value
, prefix
);
39728 args
= TREE_CHAIN (args
);
39730 while (args
!= NULL_TREE
);
39734 gcc_unreachable ();
39740 /* Hook to validate attribute((target("..."))). */
39743 rs6000_valid_attribute_p (tree fndecl
,
39744 tree
ARG_UNUSED (name
),
39748 struct cl_target_option cur_target
;
39750 tree old_optimize
= build_optimization_node (&global_options
);
39751 tree new_target
, new_optimize
;
39752 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
39754 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
39756 if (TARGET_DEBUG_TARGET
)
39758 tree tname
= DECL_NAME (fndecl
);
39759 fprintf (stderr
, "\n==================== rs6000_valid_attribute_p:\n");
39761 fprintf (stderr
, "function: %.*s\n",
39762 (int) IDENTIFIER_LENGTH (tname
),
39763 IDENTIFIER_POINTER (tname
));
39765 fprintf (stderr
, "function: unknown\n");
39767 fprintf (stderr
, "args:");
39768 rs6000_debug_target_options (args
, " ");
39769 fprintf (stderr
, "\n");
39772 fprintf (stderr
, "flags: 0x%x\n", flags
);
39774 fprintf (stderr
, "--------------------\n");
39777 old_optimize
= build_optimization_node (&global_options
);
39778 func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
39780 /* If the function changed the optimization levels as well as setting target
39781 options, start with the optimizations specified. */
39782 if (func_optimize
&& func_optimize
!= old_optimize
)
39783 cl_optimization_restore (&global_options
,
39784 TREE_OPTIMIZATION (func_optimize
));
39786 /* The target attributes may also change some optimization flags, so update
39787 the optimization options if necessary. */
39788 cl_target_option_save (&cur_target
, &global_options
);
39789 rs6000_cpu_index
= rs6000_tune_index
= -1;
39790 ret
= rs6000_inner_target_options (args
, true);
39792 /* Set up any additional state. */
39795 ret
= rs6000_option_override_internal (false);
39796 new_target
= build_target_option_node (&global_options
);
39801 new_optimize
= build_optimization_node (&global_options
);
39808 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
39810 if (old_optimize
!= new_optimize
)
39811 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
39814 cl_target_option_restore (&global_options
, &cur_target
);
39816 if (old_optimize
!= new_optimize
)
39817 cl_optimization_restore (&global_options
,
39818 TREE_OPTIMIZATION (old_optimize
));
39824 /* Hook to validate the current #pragma GCC target and set the state, and
39825 update the macros based on what was changed. If ARGS is NULL, then
39826 POP_TARGET is used to reset the options. */
39829 rs6000_pragma_target_parse (tree args
, tree pop_target
)
39831 tree prev_tree
= build_target_option_node (&global_options
);
39833 struct cl_target_option
*prev_opt
, *cur_opt
;
39834 HOST_WIDE_INT prev_flags
, cur_flags
, diff_flags
;
39835 HOST_WIDE_INT prev_bumask
, cur_bumask
, diff_bumask
;
39837 if (TARGET_DEBUG_TARGET
)
39839 fprintf (stderr
, "\n==================== rs6000_pragma_target_parse\n");
39840 fprintf (stderr
, "args:");
39841 rs6000_debug_target_options (args
, " ");
39842 fprintf (stderr
, "\n");
39846 fprintf (stderr
, "pop_target:\n");
39847 debug_tree (pop_target
);
39850 fprintf (stderr
, "pop_target: <NULL>\n");
39852 fprintf (stderr
, "--------------------\n");
39857 cur_tree
= ((pop_target
)
39859 : target_option_default_node
);
39860 cl_target_option_restore (&global_options
,
39861 TREE_TARGET_OPTION (cur_tree
));
39865 rs6000_cpu_index
= rs6000_tune_index
= -1;
39866 if (!rs6000_inner_target_options (args
, false)
39867 || !rs6000_option_override_internal (false)
39868 || (cur_tree
= build_target_option_node (&global_options
))
39871 if (TARGET_DEBUG_BUILTIN
|| TARGET_DEBUG_TARGET
)
39872 fprintf (stderr
, "invalid pragma\n");
39878 target_option_current_node
= cur_tree
;
39880 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
39881 change the macros that are defined. */
39882 if (rs6000_target_modify_macros_ptr
)
39884 prev_opt
= TREE_TARGET_OPTION (prev_tree
);
39885 prev_bumask
= prev_opt
->x_rs6000_builtin_mask
;
39886 prev_flags
= prev_opt
->x_rs6000_isa_flags
;
39888 cur_opt
= TREE_TARGET_OPTION (cur_tree
);
39889 cur_flags
= cur_opt
->x_rs6000_isa_flags
;
39890 cur_bumask
= cur_opt
->x_rs6000_builtin_mask
;
39892 diff_bumask
= (prev_bumask
^ cur_bumask
);
39893 diff_flags
= (prev_flags
^ cur_flags
);
39895 if ((diff_flags
!= 0) || (diff_bumask
!= 0))
39897 /* Delete old macros. */
39898 rs6000_target_modify_macros_ptr (false,
39899 prev_flags
& diff_flags
,
39900 prev_bumask
& diff_bumask
);
39902 /* Define new macros. */
39903 rs6000_target_modify_macros_ptr (true,
39904 cur_flags
& diff_flags
,
39905 cur_bumask
& diff_bumask
);
39913 /* Remember the last target of rs6000_set_current_function. */
39914 static GTY(()) tree rs6000_previous_fndecl
;
39916 /* Establish appropriate back-end context for processing the function
39917 FNDECL. The argument might be NULL to indicate processing at top
39918 level, outside of any function scope. */
39920 rs6000_set_current_function (tree fndecl
)
39922 tree old_tree
= (rs6000_previous_fndecl
39923 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl
)
39926 tree new_tree
= (fndecl
39927 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
39930 if (TARGET_DEBUG_TARGET
)
39932 bool print_final
= false;
39933 fprintf (stderr
, "\n==================== rs6000_set_current_function");
39936 fprintf (stderr
, ", fndecl %s (%p)",
39937 (DECL_NAME (fndecl
)
39938 ? IDENTIFIER_POINTER (DECL_NAME (fndecl
))
39939 : "<unknown>"), (void *)fndecl
);
39941 if (rs6000_previous_fndecl
)
39942 fprintf (stderr
, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl
);
39944 fprintf (stderr
, "\n");
39947 fprintf (stderr
, "\nnew fndecl target specific options:\n");
39948 debug_tree (new_tree
);
39949 print_final
= true;
39954 fprintf (stderr
, "\nold fndecl target specific options:\n");
39955 debug_tree (old_tree
);
39956 print_final
= true;
39960 fprintf (stderr
, "--------------------\n");
39963 /* Only change the context if the function changes. This hook is called
39964 several times in the course of compiling a function, and we don't want to
39965 slow things down too much or call target_reinit when it isn't safe. */
39966 if (fndecl
&& fndecl
!= rs6000_previous_fndecl
)
39968 rs6000_previous_fndecl
= fndecl
;
39969 if (old_tree
== new_tree
)
39972 else if (new_tree
&& new_tree
!= target_option_default_node
)
39974 cl_target_option_restore (&global_options
,
39975 TREE_TARGET_OPTION (new_tree
));
39976 if (TREE_TARGET_GLOBALS (new_tree
))
39977 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
39979 TREE_TARGET_GLOBALS (new_tree
)
39980 = save_target_globals_default_opts ();
39983 else if (old_tree
&& old_tree
!= target_option_default_node
)
39985 new_tree
= target_option_current_node
;
39986 cl_target_option_restore (&global_options
,
39987 TREE_TARGET_OPTION (new_tree
));
39988 if (TREE_TARGET_GLOBALS (new_tree
))
39989 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
39990 else if (new_tree
== target_option_default_node
)
39991 restore_target_globals (&default_target_globals
);
39993 TREE_TARGET_GLOBALS (new_tree
)
39994 = save_target_globals_default_opts ();
40000 /* Save the current options */
40003 rs6000_function_specific_save (struct cl_target_option
*ptr
,
40004 struct gcc_options
*opts
)
40006 ptr
->x_rs6000_isa_flags
= opts
->x_rs6000_isa_flags
;
40007 ptr
->x_rs6000_isa_flags_explicit
= opts
->x_rs6000_isa_flags_explicit
;
40010 /* Restore the current options */
40013 rs6000_function_specific_restore (struct gcc_options
*opts
,
40014 struct cl_target_option
*ptr
)
40017 opts
->x_rs6000_isa_flags
= ptr
->x_rs6000_isa_flags
;
40018 opts
->x_rs6000_isa_flags_explicit
= ptr
->x_rs6000_isa_flags_explicit
;
40019 (void) rs6000_option_override_internal (false);
40022 /* Print the current options */
40025 rs6000_function_specific_print (FILE *file
, int indent
,
40026 struct cl_target_option
*ptr
)
40028 rs6000_print_isa_options (file
, indent
, "Isa options set",
40029 ptr
->x_rs6000_isa_flags
);
40031 rs6000_print_isa_options (file
, indent
, "Isa options explicit",
40032 ptr
->x_rs6000_isa_flags_explicit
);
40035 /* Helper function to print the current isa or misc options on a line. */
40038 rs6000_print_options_internal (FILE *file
,
40040 const char *string
,
40041 HOST_WIDE_INT flags
,
40042 const char *prefix
,
40043 const struct rs6000_opt_mask
*opts
,
40044 size_t num_elements
)
40047 size_t start_column
= 0;
40049 size_t max_column
= 120;
40050 size_t prefix_len
= strlen (prefix
);
40051 size_t comma_len
= 0;
40052 const char *comma
= "";
40055 start_column
+= fprintf (file
, "%*s", indent
, "");
40059 fprintf (stderr
, DEBUG_FMT_S
, string
, "<none>");
40063 start_column
+= fprintf (stderr
, DEBUG_FMT_WX
, string
, flags
);
40065 /* Print the various mask options. */
40066 cur_column
= start_column
;
40067 for (i
= 0; i
< num_elements
; i
++)
40069 bool invert
= opts
[i
].invert
;
40070 const char *name
= opts
[i
].name
;
40071 const char *no_str
= "";
40072 HOST_WIDE_INT mask
= opts
[i
].mask
;
40073 size_t len
= comma_len
+ prefix_len
+ strlen (name
);
40077 if ((flags
& mask
) == 0)
40080 len
+= sizeof ("no-") - 1;
40088 if ((flags
& mask
) != 0)
40091 len
+= sizeof ("no-") - 1;
40098 if (cur_column
> max_column
)
40100 fprintf (stderr
, ", \\\n%*s", (int)start_column
, "");
40101 cur_column
= start_column
+ len
;
40105 fprintf (file
, "%s%s%s%s", comma
, prefix
, no_str
, name
);
40107 comma_len
= sizeof (", ") - 1;
40110 fputs ("\n", file
);
40113 /* Helper function to print the current isa options on a line. */
40116 rs6000_print_isa_options (FILE *file
, int indent
, const char *string
,
40117 HOST_WIDE_INT flags
)
40119 rs6000_print_options_internal (file
, indent
, string
, flags
, "-m",
40120 &rs6000_opt_masks
[0],
40121 ARRAY_SIZE (rs6000_opt_masks
));
40125 rs6000_print_builtin_options (FILE *file
, int indent
, const char *string
,
40126 HOST_WIDE_INT flags
)
40128 rs6000_print_options_internal (file
, indent
, string
, flags
, "",
40129 &rs6000_builtin_mask_names
[0],
40130 ARRAY_SIZE (rs6000_builtin_mask_names
));
40133 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
40134 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
40135 -mvsx-timode, -mupper-regs-df).
40137 If the user used -mno-power8-vector, we need to turn off all of the implicit
40138 ISA 2.07 and 3.0 options that relate to the vector unit.
40140 If the user used -mno-power9-vector, we need to turn off all of the implicit
40141 ISA 3.0 options that relate to the vector unit.
40143 This function does not handle explicit options such as the user specifying
40144 -mdirect-move. These are handled in rs6000_option_override_internal, and
40145 the appropriate error is given if needed.
40147 We return a mask of all of the implicit options that should not be enabled
40150 static HOST_WIDE_INT
40151 rs6000_disable_incompatible_switches (void)
40153 HOST_WIDE_INT ignore_masks
= rs6000_isa_flags_explicit
;
40156 static const struct {
40157 const HOST_WIDE_INT no_flag
; /* flag explicitly turned off. */
40158 const HOST_WIDE_INT dep_flags
; /* flags that depend on this option. */
40159 const char *const name
; /* name of the switch. */
40161 { OPTION_MASK_P9_VECTOR
, OTHER_P9_VECTOR_MASKS
, "power9-vector" },
40162 { OPTION_MASK_P8_VECTOR
, OTHER_P8_VECTOR_MASKS
, "power8-vector" },
40163 { OPTION_MASK_VSX
, OTHER_VSX_VECTOR_MASKS
, "vsx" },
40166 for (i
= 0; i
< ARRAY_SIZE (flags
); i
++)
40168 HOST_WIDE_INT no_flag
= flags
[i
].no_flag
;
40170 if ((rs6000_isa_flags
& no_flag
) == 0
40171 && (rs6000_isa_flags_explicit
& no_flag
) != 0)
40173 HOST_WIDE_INT dep_flags
= flags
[i
].dep_flags
;
40174 HOST_WIDE_INT set_flags
= (rs6000_isa_flags_explicit
40180 for (j
= 0; j
< ARRAY_SIZE (rs6000_opt_masks
); j
++)
40181 if ((set_flags
& rs6000_opt_masks
[j
].mask
) != 0)
40183 set_flags
&= ~rs6000_opt_masks
[j
].mask
;
40184 error ("-mno-%s turns off -m%s",
40186 rs6000_opt_masks
[j
].name
);
40189 gcc_assert (!set_flags
);
40192 rs6000_isa_flags
&= ~dep_flags
;
40193 ignore_masks
|= no_flag
| dep_flags
;
40197 if (!TARGET_P9_VECTOR
40198 && (rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
) != 0
40199 && TARGET_P9_DFORM_BOTH
> 0)
40201 error ("-mno-power9-vector turns off -mpower9-dform");
40202 TARGET_P9_DFORM_BOTH
= 0;
40205 return ignore_masks
;
40209 /* Hook to determine if one function can safely inline another. */
40212 rs6000_can_inline_p (tree caller
, tree callee
)
40215 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
40216 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
40218 /* If callee has no option attributes, then it is ok to inline. */
40222 /* If caller has no option attributes, but callee does then it is not ok to
40224 else if (!caller_tree
)
40229 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
40230 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
40232 /* Callee's options should a subset of the caller's, i.e. a vsx function
40233 can inline an altivec function but a non-vsx function can't inline a
40235 if ((caller_opts
->x_rs6000_isa_flags
& callee_opts
->x_rs6000_isa_flags
)
40236 == callee_opts
->x_rs6000_isa_flags
)
40240 if (TARGET_DEBUG_TARGET
)
40241 fprintf (stderr
, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
40242 (DECL_NAME (caller
)
40243 ? IDENTIFIER_POINTER (DECL_NAME (caller
))
40245 (DECL_NAME (callee
)
40246 ? IDENTIFIER_POINTER (DECL_NAME (callee
))
40248 (ret
? "can" : "cannot"));
40253 /* Allocate a stack temp and fixup the address so it meets the particular
40254 memory requirements (either offetable or REG+REG addressing). */
40257 rs6000_allocate_stack_temp (machine_mode mode
,
40258 bool offsettable_p
,
40261 rtx stack
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
40262 rtx addr
= XEXP (stack
, 0);
40263 int strict_p
= (reload_in_progress
|| reload_completed
);
40265 if (!legitimate_indirect_address_p (addr
, strict_p
))
40268 && !rs6000_legitimate_offset_address_p (mode
, addr
, strict_p
, true))
40269 stack
= replace_equiv_address (stack
, copy_addr_to_reg (addr
));
40271 else if (reg_reg_p
&& !legitimate_indexed_address_p (addr
, strict_p
))
40272 stack
= replace_equiv_address (stack
, copy_addr_to_reg (addr
));
40278 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
40279 to such a form to deal with memory reference instructions like STFIWX that
40280 only take reg+reg addressing. */
40283 rs6000_address_for_fpconvert (rtx x
)
40285 int strict_p
= (reload_in_progress
|| reload_completed
);
40288 gcc_assert (MEM_P (x
));
40289 addr
= XEXP (x
, 0);
40290 if (! legitimate_indirect_address_p (addr
, strict_p
)
40291 && ! legitimate_indexed_address_p (addr
, strict_p
))
40293 if (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
40295 rtx reg
= XEXP (addr
, 0);
40296 HOST_WIDE_INT size
= GET_MODE_SIZE (GET_MODE (x
));
40297 rtx size_rtx
= GEN_INT ((GET_CODE (addr
) == PRE_DEC
) ? -size
: size
);
40298 gcc_assert (REG_P (reg
));
40299 emit_insn (gen_add3_insn (reg
, reg
, size_rtx
));
40302 else if (GET_CODE (addr
) == PRE_MODIFY
)
40304 rtx reg
= XEXP (addr
, 0);
40305 rtx expr
= XEXP (addr
, 1);
40306 gcc_assert (REG_P (reg
));
40307 gcc_assert (GET_CODE (expr
) == PLUS
);
40308 emit_insn (gen_add3_insn (reg
, XEXP (expr
, 0), XEXP (expr
, 1)));
40312 x
= replace_equiv_address (x
, copy_addr_to_reg (addr
));
40318 /* Given a memory reference, if it is not in the form for altivec memory
40319 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
40320 convert to the altivec format. */
40323 rs6000_address_for_altivec (rtx x
)
40325 gcc_assert (MEM_P (x
));
40326 if (!altivec_indexed_or_indirect_operand (x
, GET_MODE (x
)))
40328 rtx addr
= XEXP (x
, 0);
40329 int strict_p
= (reload_in_progress
|| reload_completed
);
40331 if (!legitimate_indexed_address_p (addr
, strict_p
)
40332 && !legitimate_indirect_address_p (addr
, strict_p
))
40333 addr
= copy_to_mode_reg (Pmode
, addr
);
40335 addr
= gen_rtx_AND (Pmode
, addr
, GEN_INT (-16));
40336 x
= change_address (x
, GET_MODE (x
), addr
);
40342 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
40344 On the RS/6000, all integer constants are acceptable, most won't be valid
40345 for particular insns, though. Only easy FP constants are acceptable. */
40348 rs6000_legitimate_constant_p (machine_mode mode
, rtx x
)
40350 if (TARGET_ELF
&& tls_referenced_p (x
))
40353 return ((GET_CODE (x
) != CONST_DOUBLE
&& GET_CODE (x
) != CONST_VECTOR
)
40354 || GET_MODE (x
) == VOIDmode
40355 || (TARGET_POWERPC64
&& mode
== DImode
)
40356 || easy_fp_constant (x
, mode
)
40357 || easy_vector_constant (x
, mode
));
40361 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
40364 chain_already_loaded (rtx_insn
*last
)
40366 for (; last
!= NULL
; last
= PREV_INSN (last
))
40368 if (NONJUMP_INSN_P (last
))
40370 rtx patt
= PATTERN (last
);
40372 if (GET_CODE (patt
) == SET
)
40374 rtx lhs
= XEXP (patt
, 0);
40376 if (REG_P (lhs
) && REGNO (lhs
) == STATIC_CHAIN_REGNUM
)
40384 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
40387 rs6000_call_aix (rtx value
, rtx func_desc
, rtx flag
, rtx cookie
)
40389 const bool direct_call_p
40390 = GET_CODE (func_desc
) == SYMBOL_REF
&& SYMBOL_REF_FUNCTION_P (func_desc
);
40391 rtx toc_reg
= gen_rtx_REG (Pmode
, TOC_REGNUM
);
40392 rtx toc_load
= NULL_RTX
;
40393 rtx toc_restore
= NULL_RTX
;
40395 rtx abi_reg
= NULL_RTX
;
40400 /* Handle longcall attributes. */
40401 if (INTVAL (cookie
) & CALL_LONG
)
40402 func_desc
= rs6000_longcall_ref (func_desc
);
40404 /* Handle indirect calls. */
40405 if (GET_CODE (func_desc
) != SYMBOL_REF
40406 || (DEFAULT_ABI
== ABI_AIX
&& !SYMBOL_REF_FUNCTION_P (func_desc
)))
40408 /* Save the TOC into its reserved slot before the call,
40409 and prepare to restore it after the call. */
40410 rtx stack_ptr
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
40411 rtx stack_toc_offset
= GEN_INT (RS6000_TOC_SAVE_SLOT
);
40412 rtx stack_toc_mem
= gen_frame_mem (Pmode
,
40413 gen_rtx_PLUS (Pmode
, stack_ptr
,
40414 stack_toc_offset
));
40415 rtx stack_toc_unspec
= gen_rtx_UNSPEC (Pmode
,
40416 gen_rtvec (1, stack_toc_offset
),
40418 toc_restore
= gen_rtx_SET (toc_reg
, stack_toc_unspec
);
40420 /* Can we optimize saving the TOC in the prologue or
40421 do we need to do it at every call? */
40422 if (TARGET_SAVE_TOC_INDIRECT
&& !cfun
->calls_alloca
)
40423 cfun
->machine
->save_toc_in_prologue
= true;
40426 MEM_VOLATILE_P (stack_toc_mem
) = 1;
40427 emit_move_insn (stack_toc_mem
, toc_reg
);
40430 if (DEFAULT_ABI
== ABI_ELFv2
)
40432 /* A function pointer in the ELFv2 ABI is just a plain address, but
40433 the ABI requires it to be loaded into r12 before the call. */
40434 func_addr
= gen_rtx_REG (Pmode
, 12);
40435 emit_move_insn (func_addr
, func_desc
);
40436 abi_reg
= func_addr
;
40440 /* A function pointer under AIX is a pointer to a data area whose
40441 first word contains the actual address of the function, whose
40442 second word contains a pointer to its TOC, and whose third word
40443 contains a value to place in the static chain register (r11).
40444 Note that if we load the static chain, our "trampoline" need
40445 not have any executable code. */
40447 /* Load up address of the actual function. */
40448 func_desc
= force_reg (Pmode
, func_desc
);
40449 func_addr
= gen_reg_rtx (Pmode
);
40450 emit_move_insn (func_addr
, gen_rtx_MEM (Pmode
, func_desc
));
40452 /* Prepare to load the TOC of the called function. Note that the
40453 TOC load must happen immediately before the actual call so
40454 that unwinding the TOC registers works correctly. See the
40455 comment in frob_update_context. */
40456 rtx func_toc_offset
= GEN_INT (GET_MODE_SIZE (Pmode
));
40457 rtx func_toc_mem
= gen_rtx_MEM (Pmode
,
40458 gen_rtx_PLUS (Pmode
, func_desc
,
40460 toc_load
= gen_rtx_USE (VOIDmode
, func_toc_mem
);
40462 /* If we have a static chain, load it up. But, if the call was
40463 originally direct, the 3rd word has not been written since no
40464 trampoline has been built, so we ought not to load it, lest we
40465 override a static chain value. */
40467 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
40468 && !chain_already_loaded (get_current_sequence ()->next
->last
))
40470 rtx sc_reg
= gen_rtx_REG (Pmode
, STATIC_CHAIN_REGNUM
);
40471 rtx func_sc_offset
= GEN_INT (2 * GET_MODE_SIZE (Pmode
));
40472 rtx func_sc_mem
= gen_rtx_MEM (Pmode
,
40473 gen_rtx_PLUS (Pmode
, func_desc
,
40475 emit_move_insn (sc_reg
, func_sc_mem
);
40482 /* Direct calls use the TOC: for local calls, the callee will
40483 assume the TOC register is set; for non-local calls, the
40484 PLT stub needs the TOC register. */
40486 func_addr
= func_desc
;
40489 /* Create the call. */
40490 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), flag
);
40491 if (value
!= NULL_RTX
)
40492 call
[0] = gen_rtx_SET (value
, call
[0]);
40496 call
[n_call
++] = toc_load
;
40498 call
[n_call
++] = toc_restore
;
40500 call
[n_call
++] = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (Pmode
, LR_REGNO
));
40502 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (n_call
, call
));
40503 insn
= emit_call_insn (insn
);
40505 /* Mention all registers defined by the ABI to hold information
40506 as uses in CALL_INSN_FUNCTION_USAGE. */
40508 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
40511 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
40514 rs6000_sibcall_aix (rtx value
, rtx func_desc
, rtx flag
, rtx cookie
)
40519 gcc_assert (INTVAL (cookie
) == 0);
40521 /* Create the call. */
40522 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_desc
), flag
);
40523 if (value
!= NULL_RTX
)
40524 call
[0] = gen_rtx_SET (value
, call
[0]);
40526 call
[1] = simple_return_rtx
;
40528 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (2, call
));
40529 insn
= emit_call_insn (insn
);
40531 /* Note use of the TOC register. */
40532 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), gen_rtx_REG (Pmode
, TOC_REGNUM
));
40535 /* Return whether we need to always update the saved TOC pointer when we update
40536 the stack pointer. */
40539 rs6000_save_toc_in_prologue_p (void)
40541 return (cfun
&& cfun
->machine
&& cfun
->machine
->save_toc_in_prologue
);
40544 #ifdef HAVE_GAS_HIDDEN
40545 # define USE_HIDDEN_LINKONCE 1
40547 # define USE_HIDDEN_LINKONCE 0
40550 /* Fills in the label name that should be used for a 476 link stack thunk. */
40553 get_ppc476_thunk_name (char name
[32])
40555 gcc_assert (TARGET_LINK_STACK
);
40557 if (USE_HIDDEN_LINKONCE
)
40558 sprintf (name
, "__ppc476.get_thunk");
40560 ASM_GENERATE_INTERNAL_LABEL (name
, "LPPC476_", 0);
40563 /* This function emits the simple thunk routine that is used to preserve
40564 the link stack on the 476 cpu. */
40566 static void rs6000_code_end (void) ATTRIBUTE_UNUSED
;
40568 rs6000_code_end (void)
40573 if (!TARGET_LINK_STACK
)
40576 get_ppc476_thunk_name (name
);
40578 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
, get_identifier (name
),
40579 build_function_type_list (void_type_node
, NULL_TREE
));
40580 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
40581 NULL_TREE
, void_type_node
);
40582 TREE_PUBLIC (decl
) = 1;
40583 TREE_STATIC (decl
) = 1;
40586 if (USE_HIDDEN_LINKONCE
&& !TARGET_XCOFF
)
40588 cgraph_node::create (decl
)->set_comdat_group (DECL_ASSEMBLER_NAME (decl
));
40589 targetm
.asm_out
.unique_section (decl
, 0);
40590 switch_to_section (get_named_section (decl
, NULL
, 0));
40591 DECL_WEAK (decl
) = 1;
40592 ASM_WEAKEN_DECL (asm_out_file
, decl
, name
, 0);
40593 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
40594 targetm
.asm_out
.assemble_visibility (decl
, VISIBILITY_HIDDEN
);
40595 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
40600 switch_to_section (text_section
);
40601 ASM_OUTPUT_LABEL (asm_out_file
, name
);
40604 DECL_INITIAL (decl
) = make_node (BLOCK
);
40605 current_function_decl
= decl
;
40606 allocate_struct_function (decl
, false);
40607 init_function_start (decl
);
40608 first_function_block_is_cold
= false;
40609 /* Make sure unwind info is emitted for the thunk if needed. */
40610 final_start_function (emit_barrier (), asm_out_file
, 1);
40612 fputs ("\tblr\n", asm_out_file
);
40614 final_end_function ();
40615 init_insn_lengths ();
40616 free_after_compilation (cfun
);
40618 current_function_decl
= NULL
;
40621 /* Add r30 to hard reg set if the prologue sets it up and it is not
40622 pic_offset_table_rtx. */
40625 rs6000_set_up_by_prologue (struct hard_reg_set_container
*set
)
40627 if (!TARGET_SINGLE_PIC_BASE
40629 && TARGET_MINIMAL_TOC
40630 && !constant_pool_empty_p ())
40631 add_to_hard_reg_set (&set
->set
, Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
40632 if (cfun
->machine
->split_stack_argp_used
)
40633 add_to_hard_reg_set (&set
->set
, Pmode
, 12);
40637 /* Helper function for rs6000_split_logical to emit a logical instruction after
40638 spliting the operation to single GPR registers.
40640 DEST is the destination register.
40641 OP1 and OP2 are the input source registers.
40642 CODE is the base operation (AND, IOR, XOR, NOT).
40643 MODE is the machine mode.
40644 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
40645 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
40646 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
40649 rs6000_split_logical_inner (rtx dest
,
40652 enum rtx_code code
,
40654 bool complement_final_p
,
40655 bool complement_op1_p
,
40656 bool complement_op2_p
)
40660 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
40661 if (op2
&& GET_CODE (op2
) == CONST_INT
40662 && (mode
== SImode
|| (mode
== DImode
&& TARGET_POWERPC64
))
40663 && !complement_final_p
&& !complement_op1_p
&& !complement_op2_p
)
40665 HOST_WIDE_INT mask
= GET_MODE_MASK (mode
);
40666 HOST_WIDE_INT value
= INTVAL (op2
) & mask
;
40668 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
40673 emit_insn (gen_rtx_SET (dest
, const0_rtx
));
40677 else if (value
== mask
)
40679 if (!rtx_equal_p (dest
, op1
))
40680 emit_insn (gen_rtx_SET (dest
, op1
));
40685 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
40686 into separate ORI/ORIS or XORI/XORIS instrucitons. */
40687 else if (code
== IOR
|| code
== XOR
)
40691 if (!rtx_equal_p (dest
, op1
))
40692 emit_insn (gen_rtx_SET (dest
, op1
));
40698 if (code
== AND
&& mode
== SImode
40699 && !complement_final_p
&& !complement_op1_p
&& !complement_op2_p
)
40701 emit_insn (gen_andsi3 (dest
, op1
, op2
));
40705 if (complement_op1_p
)
40706 op1
= gen_rtx_NOT (mode
, op1
);
40708 if (complement_op2_p
)
40709 op2
= gen_rtx_NOT (mode
, op2
);
40711 /* For canonical RTL, if only one arm is inverted it is the first. */
40712 if (!complement_op1_p
&& complement_op2_p
)
40713 std::swap (op1
, op2
);
40715 bool_rtx
= ((code
== NOT
)
40716 ? gen_rtx_NOT (mode
, op1
)
40717 : gen_rtx_fmt_ee (code
, mode
, op1
, op2
));
40719 if (complement_final_p
)
40720 bool_rtx
= gen_rtx_NOT (mode
, bool_rtx
);
40722 emit_insn (gen_rtx_SET (dest
, bool_rtx
));
40725 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
40726 operations are split immediately during RTL generation to allow for more
40727 optimizations of the AND/IOR/XOR.
40729 OPERANDS is an array containing the destination and two input operands.
40730 CODE is the base operation (AND, IOR, XOR, NOT).
40731 MODE is the machine mode.
40732 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
40733 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
40734 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
40735 CLOBBER_REG is either NULL or a scratch register of type CC to allow
40736 formation of the AND instructions. */
40739 rs6000_split_logical_di (rtx operands
[3],
40740 enum rtx_code code
,
40741 bool complement_final_p
,
40742 bool complement_op1_p
,
40743 bool complement_op2_p
)
40745 const HOST_WIDE_INT lower_32bits
= HOST_WIDE_INT_C(0xffffffff);
40746 const HOST_WIDE_INT upper_32bits
= ~ lower_32bits
;
40747 const HOST_WIDE_INT sign_bit
= HOST_WIDE_INT_C(0x80000000);
40748 enum hi_lo
{ hi
= 0, lo
= 1 };
40749 rtx op0_hi_lo
[2], op1_hi_lo
[2], op2_hi_lo
[2];
40752 op0_hi_lo
[hi
] = gen_highpart (SImode
, operands
[0]);
40753 op1_hi_lo
[hi
] = gen_highpart (SImode
, operands
[1]);
40754 op0_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[0]);
40755 op1_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[1]);
40758 op2_hi_lo
[hi
] = op2_hi_lo
[lo
] = NULL_RTX
;
40761 if (GET_CODE (operands
[2]) != CONST_INT
)
40763 op2_hi_lo
[hi
] = gen_highpart_mode (SImode
, DImode
, operands
[2]);
40764 op2_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[2]);
40768 HOST_WIDE_INT value
= INTVAL (operands
[2]);
40769 HOST_WIDE_INT value_hi_lo
[2];
40771 gcc_assert (!complement_final_p
);
40772 gcc_assert (!complement_op1_p
);
40773 gcc_assert (!complement_op2_p
);
40775 value_hi_lo
[hi
] = value
>> 32;
40776 value_hi_lo
[lo
] = value
& lower_32bits
;
40778 for (i
= 0; i
< 2; i
++)
40780 HOST_WIDE_INT sub_value
= value_hi_lo
[i
];
40782 if (sub_value
& sign_bit
)
40783 sub_value
|= upper_32bits
;
40785 op2_hi_lo
[i
] = GEN_INT (sub_value
);
40787 /* If this is an AND instruction, check to see if we need to load
40788 the value in a register. */
40789 if (code
== AND
&& sub_value
!= -1 && sub_value
!= 0
40790 && !and_operand (op2_hi_lo
[i
], SImode
))
40791 op2_hi_lo
[i
] = force_reg (SImode
, op2_hi_lo
[i
]);
40796 for (i
= 0; i
< 2; i
++)
40798 /* Split large IOR/XOR operations. */
40799 if ((code
== IOR
|| code
== XOR
)
40800 && GET_CODE (op2_hi_lo
[i
]) == CONST_INT
40801 && !complement_final_p
40802 && !complement_op1_p
40803 && !complement_op2_p
40804 && !logical_const_operand (op2_hi_lo
[i
], SImode
))
40806 HOST_WIDE_INT value
= INTVAL (op2_hi_lo
[i
]);
40807 HOST_WIDE_INT hi_16bits
= value
& HOST_WIDE_INT_C(0xffff0000);
40808 HOST_WIDE_INT lo_16bits
= value
& HOST_WIDE_INT_C(0x0000ffff);
40809 rtx tmp
= gen_reg_rtx (SImode
);
40811 /* Make sure the constant is sign extended. */
40812 if ((hi_16bits
& sign_bit
) != 0)
40813 hi_16bits
|= upper_32bits
;
40815 rs6000_split_logical_inner (tmp
, op1_hi_lo
[i
], GEN_INT (hi_16bits
),
40816 code
, SImode
, false, false, false);
40818 rs6000_split_logical_inner (op0_hi_lo
[i
], tmp
, GEN_INT (lo_16bits
),
40819 code
, SImode
, false, false, false);
40822 rs6000_split_logical_inner (op0_hi_lo
[i
], op1_hi_lo
[i
], op2_hi_lo
[i
],
40823 code
, SImode
, complement_final_p
,
40824 complement_op1_p
, complement_op2_p
);
40830 /* Split the insns that make up boolean operations operating on multiple GPR
40831 registers. The boolean MD patterns ensure that the inputs either are
40832 exactly the same as the output registers, or there is no overlap.
40834 OPERANDS is an array containing the destination and two input operands.
40835 CODE is the base operation (AND, IOR, XOR, NOT).
40836 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
40837 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
40838 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
40841 rs6000_split_logical (rtx operands
[3],
40842 enum rtx_code code
,
40843 bool complement_final_p
,
40844 bool complement_op1_p
,
40845 bool complement_op2_p
)
40847 machine_mode mode
= GET_MODE (operands
[0]);
40848 machine_mode sub_mode
;
40850 int sub_size
, regno0
, regno1
, nregs
, i
;
40852 /* If this is DImode, use the specialized version that can run before
40853 register allocation. */
40854 if (mode
== DImode
&& !TARGET_POWERPC64
)
40856 rs6000_split_logical_di (operands
, code
, complement_final_p
,
40857 complement_op1_p
, complement_op2_p
);
40863 op2
= (code
== NOT
) ? NULL_RTX
: operands
[2];
40864 sub_mode
= (TARGET_POWERPC64
) ? DImode
: SImode
;
40865 sub_size
= GET_MODE_SIZE (sub_mode
);
40866 regno0
= REGNO (op0
);
40867 regno1
= REGNO (op1
);
40869 gcc_assert (reload_completed
);
40870 gcc_assert (IN_RANGE (regno0
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
40871 gcc_assert (IN_RANGE (regno1
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
40873 nregs
= rs6000_hard_regno_nregs
[(int)mode
][regno0
];
40874 gcc_assert (nregs
> 1);
40876 if (op2
&& REG_P (op2
))
40877 gcc_assert (IN_RANGE (REGNO (op2
), FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
40879 for (i
= 0; i
< nregs
; i
++)
40881 int offset
= i
* sub_size
;
40882 rtx sub_op0
= simplify_subreg (sub_mode
, op0
, mode
, offset
);
40883 rtx sub_op1
= simplify_subreg (sub_mode
, op1
, mode
, offset
);
40884 rtx sub_op2
= ((code
== NOT
)
40886 : simplify_subreg (sub_mode
, op2
, mode
, offset
));
40888 rs6000_split_logical_inner (sub_op0
, sub_op1
, sub_op2
, code
, sub_mode
,
40889 complement_final_p
, complement_op1_p
,
40897 /* Return true if the peephole2 can combine a load involving a combination of
40898 an addis instruction and a load with an offset that can be fused together on
40902 fusion_gpr_load_p (rtx addis_reg
, /* register set via addis. */
40903 rtx addis_value
, /* addis value. */
40904 rtx target
, /* target register that is loaded. */
40905 rtx mem
) /* bottom part of the memory addr. */
40910 /* Validate arguments. */
40911 if (!base_reg_operand (addis_reg
, GET_MODE (addis_reg
)))
40914 if (!base_reg_operand (target
, GET_MODE (target
)))
40917 if (!fusion_gpr_addis (addis_value
, GET_MODE (addis_value
)))
40920 /* Allow sign/zero extension. */
40921 if (GET_CODE (mem
) == ZERO_EXTEND
40922 || (GET_CODE (mem
) == SIGN_EXTEND
&& TARGET_P8_FUSION_SIGN
))
40923 mem
= XEXP (mem
, 0);
40928 if (!fusion_gpr_mem_load (mem
, GET_MODE (mem
)))
40931 addr
= XEXP (mem
, 0); /* either PLUS or LO_SUM. */
40932 if (GET_CODE (addr
) != PLUS
&& GET_CODE (addr
) != LO_SUM
)
40935 /* Validate that the register used to load the high value is either the
40936 register being loaded, or we can safely replace its use.
40938 This function is only called from the peephole2 pass and we assume that
40939 there are 2 instructions in the peephole (addis and load), so we want to
40940 check if the target register was not used in the memory address and the
40941 register to hold the addis result is dead after the peephole. */
40942 if (REGNO (addis_reg
) != REGNO (target
))
40944 if (reg_mentioned_p (target
, mem
))
40947 if (!peep2_reg_dead_p (2, addis_reg
))
40950 /* If the target register being loaded is the stack pointer, we must
40951 avoid loading any other value into it, even temporarily. */
40952 if (REG_P (target
) && REGNO (target
) == STACK_POINTER_REGNUM
)
40956 base_reg
= XEXP (addr
, 0);
40957 return REGNO (addis_reg
) == REGNO (base_reg
);
40960 /* During the peephole2 pass, adjust and expand the insns for a load fusion
40961 sequence. We adjust the addis register to use the target register. If the
40962 load sign extends, we adjust the code to do the zero extending load, and an
40963 explicit sign extension later since the fusion only covers zero extending
40967 operands[0] register set with addis (to be replaced with target)
40968 operands[1] value set via addis
40969 operands[2] target register being loaded
40970 operands[3] D-form memory reference using operands[0]. */
40973 expand_fusion_gpr_load (rtx
*operands
)
40975 rtx addis_value
= operands
[1];
40976 rtx target
= operands
[2];
40977 rtx orig_mem
= operands
[3];
40978 rtx new_addr
, new_mem
, orig_addr
, offset
;
40979 enum rtx_code plus_or_lo_sum
;
40980 machine_mode target_mode
= GET_MODE (target
);
40981 machine_mode extend_mode
= target_mode
;
40982 machine_mode ptr_mode
= Pmode
;
40983 enum rtx_code extend
= UNKNOWN
;
40985 if (GET_CODE (orig_mem
) == ZERO_EXTEND
40986 || (TARGET_P8_FUSION_SIGN
&& GET_CODE (orig_mem
) == SIGN_EXTEND
))
40988 extend
= GET_CODE (orig_mem
);
40989 orig_mem
= XEXP (orig_mem
, 0);
40990 target_mode
= GET_MODE (orig_mem
);
40993 gcc_assert (MEM_P (orig_mem
));
40995 orig_addr
= XEXP (orig_mem
, 0);
40996 plus_or_lo_sum
= GET_CODE (orig_addr
);
40997 gcc_assert (plus_or_lo_sum
== PLUS
|| plus_or_lo_sum
== LO_SUM
);
40999 offset
= XEXP (orig_addr
, 1);
41000 new_addr
= gen_rtx_fmt_ee (plus_or_lo_sum
, ptr_mode
, addis_value
, offset
);
41001 new_mem
= replace_equiv_address_nv (orig_mem
, new_addr
, false);
41003 if (extend
!= UNKNOWN
)
41004 new_mem
= gen_rtx_fmt_e (ZERO_EXTEND
, extend_mode
, new_mem
);
41006 new_mem
= gen_rtx_UNSPEC (extend_mode
, gen_rtvec (1, new_mem
),
41007 UNSPEC_FUSION_GPR
);
41008 emit_insn (gen_rtx_SET (target
, new_mem
));
41010 if (extend
== SIGN_EXTEND
)
41012 int sub_off
= ((BYTES_BIG_ENDIAN
)
41013 ? GET_MODE_SIZE (extend_mode
) - GET_MODE_SIZE (target_mode
)
41016 = simplify_subreg (target_mode
, target
, extend_mode
, sub_off
);
41018 emit_insn (gen_rtx_SET (target
,
41019 gen_rtx_SIGN_EXTEND (extend_mode
, sign_reg
)));
41025 /* Emit the addis instruction that will be part of a fused instruction
41029 emit_fusion_addis (rtx target
, rtx addis_value
, const char *comment
,
41030 const char *mode_name
)
41033 char insn_template
[80];
41034 const char *addis_str
= NULL
;
41035 const char *comment_str
= ASM_COMMENT_START
;
41037 if (*comment_str
== ' ')
41040 /* Emit the addis instruction. */
41041 fuse_ops
[0] = target
;
41042 if (satisfies_constraint_L (addis_value
))
41044 fuse_ops
[1] = addis_value
;
41045 addis_str
= "lis %0,%v1";
41048 else if (GET_CODE (addis_value
) == PLUS
)
41050 rtx op0
= XEXP (addis_value
, 0);
41051 rtx op1
= XEXP (addis_value
, 1);
41053 if (REG_P (op0
) && CONST_INT_P (op1
)
41054 && satisfies_constraint_L (op1
))
41058 addis_str
= "addis %0,%1,%v2";
41062 else if (GET_CODE (addis_value
) == HIGH
)
41064 rtx value
= XEXP (addis_value
, 0);
41065 if (GET_CODE (value
) == UNSPEC
&& XINT (value
, 1) == UNSPEC_TOCREL
)
41067 fuse_ops
[1] = XVECEXP (value
, 0, 0); /* symbol ref. */
41068 fuse_ops
[2] = XVECEXP (value
, 0, 1); /* TOC register. */
41070 addis_str
= "addis %0,%2,%1@toc@ha";
41072 else if (TARGET_XCOFF
)
41073 addis_str
= "addis %0,%1@u(%2)";
41076 gcc_unreachable ();
41079 else if (GET_CODE (value
) == PLUS
)
41081 rtx op0
= XEXP (value
, 0);
41082 rtx op1
= XEXP (value
, 1);
41084 if (GET_CODE (op0
) == UNSPEC
41085 && XINT (op0
, 1) == UNSPEC_TOCREL
41086 && CONST_INT_P (op1
))
41088 fuse_ops
[1] = XVECEXP (op0
, 0, 0); /* symbol ref. */
41089 fuse_ops
[2] = XVECEXP (op0
, 0, 1); /* TOC register. */
41092 addis_str
= "addis %0,%2,%1+%3@toc@ha";
41094 else if (TARGET_XCOFF
)
41095 addis_str
= "addis %0,%1+%3@u(%2)";
41098 gcc_unreachable ();
41102 else if (satisfies_constraint_L (value
))
41104 fuse_ops
[1] = value
;
41105 addis_str
= "lis %0,%v1";
41108 else if (TARGET_ELF
&& !TARGET_POWERPC64
&& CONSTANT_P (value
))
41110 fuse_ops
[1] = value
;
41111 addis_str
= "lis %0,%1@ha";
41116 fatal_insn ("Could not generate addis value for fusion", addis_value
);
41118 sprintf (insn_template
, "%s\t\t%s %s, type %s", addis_str
, comment_str
,
41119 comment
, mode_name
);
41120 output_asm_insn (insn_template
, fuse_ops
);
41123 /* Emit a D-form load or store instruction that is the second instruction
41124 of a fusion sequence. */
41127 emit_fusion_load_store (rtx load_store_reg
, rtx addis_reg
, rtx offset
,
41128 const char *insn_str
)
41131 char insn_template
[80];
41133 fuse_ops
[0] = load_store_reg
;
41134 fuse_ops
[1] = addis_reg
;
41136 if (CONST_INT_P (offset
) && satisfies_constraint_I (offset
))
41138 sprintf (insn_template
, "%s %%0,%%2(%%1)", insn_str
);
41139 fuse_ops
[2] = offset
;
41140 output_asm_insn (insn_template
, fuse_ops
);
41143 else if (GET_CODE (offset
) == UNSPEC
41144 && XINT (offset
, 1) == UNSPEC_TOCREL
)
41147 sprintf (insn_template
, "%s %%0,%%2@toc@l(%%1)", insn_str
);
41149 else if (TARGET_XCOFF
)
41150 sprintf (insn_template
, "%s %%0,%%2@l(%%1)", insn_str
);
41153 gcc_unreachable ();
41155 fuse_ops
[2] = XVECEXP (offset
, 0, 0);
41156 output_asm_insn (insn_template
, fuse_ops
);
41159 else if (GET_CODE (offset
) == PLUS
41160 && GET_CODE (XEXP (offset
, 0)) == UNSPEC
41161 && XINT (XEXP (offset
, 0), 1) == UNSPEC_TOCREL
41162 && CONST_INT_P (XEXP (offset
, 1)))
41164 rtx tocrel_unspec
= XEXP (offset
, 0);
41166 sprintf (insn_template
, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str
);
41168 else if (TARGET_XCOFF
)
41169 sprintf (insn_template
, "%s %%0,%%2+%%3@l(%%1)", insn_str
);
41172 gcc_unreachable ();
41174 fuse_ops
[2] = XVECEXP (tocrel_unspec
, 0, 0);
41175 fuse_ops
[3] = XEXP (offset
, 1);
41176 output_asm_insn (insn_template
, fuse_ops
);
41179 else if (TARGET_ELF
&& !TARGET_POWERPC64
&& CONSTANT_P (offset
))
41181 sprintf (insn_template
, "%s %%0,%%2@l(%%1)", insn_str
);
41183 fuse_ops
[2] = offset
;
41184 output_asm_insn (insn_template
, fuse_ops
);
41188 fatal_insn ("Unable to generate load/store offset for fusion", offset
);
41193 /* Wrap a TOC address that can be fused to indicate that special fusion
41194 processing is needed. */
41197 fusion_wrap_memory_address (rtx old_mem
)
41199 rtx old_addr
= XEXP (old_mem
, 0);
41200 rtvec v
= gen_rtvec (1, old_addr
);
41201 rtx new_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_FUSION_ADDIS
);
41202 return replace_equiv_address_nv (old_mem
, new_addr
, false);
41205 /* Given an address, convert it into the addis and load offset parts. Addresses
41206 created during the peephole2 process look like:
41207 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
41208 (unspec [(...)] UNSPEC_TOCREL))
41210 Addresses created via toc fusion look like:
41211 (unspec [(unspec [(...)] UNSPEC_TOCREL)] UNSPEC_FUSION_ADDIS)) */
41214 fusion_split_address (rtx addr
, rtx
*p_hi
, rtx
*p_lo
)
41218 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_FUSION_ADDIS
)
41220 lo
= XVECEXP (addr
, 0, 0);
41221 hi
= gen_rtx_HIGH (Pmode
, lo
);
41223 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
)
41225 hi
= XEXP (addr
, 0);
41226 lo
= XEXP (addr
, 1);
41229 gcc_unreachable ();
41235 /* Return a string to fuse an addis instruction with a gpr load to the same
41236 register that we loaded up the addis instruction. The address that is used
41237 is the logical address that was formed during peephole2:
41238 (lo_sum (high) (low-part))
41240 Or the address is the TOC address that is wrapped before register allocation:
41241 (unspec [(addr) (toc-reg)] UNSPEC_FUSION_ADDIS)
41243 The code is complicated, so we call output_asm_insn directly, and just
41247 emit_fusion_gpr_load (rtx target
, rtx mem
)
41252 const char *load_str
= NULL
;
41253 const char *mode_name
= NULL
;
41256 if (GET_CODE (mem
) == ZERO_EXTEND
)
41257 mem
= XEXP (mem
, 0);
41259 gcc_assert (REG_P (target
) && MEM_P (mem
));
41261 addr
= XEXP (mem
, 0);
41262 fusion_split_address (addr
, &addis_value
, &load_offset
);
41264 /* Now emit the load instruction to the same register. */
41265 mode
= GET_MODE (mem
);
41269 mode_name
= "char";
41274 mode_name
= "short";
41280 mode_name
= (mode
== SFmode
) ? "float" : "int";
41286 gcc_assert (TARGET_POWERPC64
);
41287 mode_name
= (mode
== DFmode
) ? "double" : "long";
41292 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target
, mem
));
41295 /* Emit the addis instruction. */
41296 emit_fusion_addis (target
, addis_value
, "gpr load fusion", mode_name
);
41298 /* Emit the D-form load instruction. */
41299 emit_fusion_load_store (target
, target
, load_offset
, load_str
);
41305 /* Return true if the peephole2 can combine a load/store involving a
41306 combination of an addis instruction and the memory operation. This was
41307 added to the ISA 3.0 (power9) hardware. */
41310 fusion_p9_p (rtx addis_reg
, /* register set via addis. */
41311 rtx addis_value
, /* addis value. */
41312 rtx dest
, /* destination (memory or register). */
41313 rtx src
) /* source (register or memory). */
41315 rtx addr
, mem
, offset
;
41316 machine_mode mode
= GET_MODE (src
);
41318 /* Validate arguments. */
41319 if (!base_reg_operand (addis_reg
, GET_MODE (addis_reg
)))
41322 if (!fusion_gpr_addis (addis_value
, GET_MODE (addis_value
)))
41325 /* Ignore extend operations that are part of the load. */
41326 if (GET_CODE (src
) == FLOAT_EXTEND
|| GET_CODE (src
) == ZERO_EXTEND
)
41327 src
= XEXP (src
, 0);
41329 /* Test for memory<-register or register<-memory. */
41330 if (fpr_reg_operand (src
, mode
) || int_reg_operand (src
, mode
))
41338 else if (MEM_P (src
))
41340 if (!fpr_reg_operand (dest
, mode
) && !int_reg_operand (dest
, mode
))
41349 addr
= XEXP (mem
, 0); /* either PLUS or LO_SUM. */
41350 if (GET_CODE (addr
) == PLUS
)
41352 if (!rtx_equal_p (addis_reg
, XEXP (addr
, 0)))
41355 return satisfies_constraint_I (XEXP (addr
, 1));
41358 else if (GET_CODE (addr
) == LO_SUM
)
41360 if (!rtx_equal_p (addis_reg
, XEXP (addr
, 0)))
41363 offset
= XEXP (addr
, 1);
41364 if (TARGET_XCOFF
|| (TARGET_ELF
&& TARGET_POWERPC64
))
41365 return small_toc_ref (offset
, GET_MODE (offset
));
41367 else if (TARGET_ELF
&& !TARGET_POWERPC64
)
41368 return CONSTANT_P (offset
);
41374 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
41378 operands[0] register set with addis
41379 operands[1] value set via addis
41380 operands[2] target register being loaded
41381 operands[3] D-form memory reference using operands[0].
41383 This is similar to the fusion introduced with power8, except it scales to
41384 both loads/stores and does not require the result register to be the same as
41385 the base register. At the moment, we only do this if register set with addis
41389 expand_fusion_p9_load (rtx
*operands
)
41391 rtx tmp_reg
= operands
[0];
41392 rtx addis_value
= operands
[1];
41393 rtx target
= operands
[2];
41394 rtx orig_mem
= operands
[3];
41395 rtx new_addr
, new_mem
, orig_addr
, offset
, set
, clobber
, insn
;
41396 enum rtx_code plus_or_lo_sum
;
41397 machine_mode target_mode
= GET_MODE (target
);
41398 machine_mode extend_mode
= target_mode
;
41399 machine_mode ptr_mode
= Pmode
;
41400 enum rtx_code extend
= UNKNOWN
;
41402 if (GET_CODE (orig_mem
) == FLOAT_EXTEND
|| GET_CODE (orig_mem
) == ZERO_EXTEND
)
41404 extend
= GET_CODE (orig_mem
);
41405 orig_mem
= XEXP (orig_mem
, 0);
41406 target_mode
= GET_MODE (orig_mem
);
41409 gcc_assert (MEM_P (orig_mem
));
41411 orig_addr
= XEXP (orig_mem
, 0);
41412 plus_or_lo_sum
= GET_CODE (orig_addr
);
41413 gcc_assert (plus_or_lo_sum
== PLUS
|| plus_or_lo_sum
== LO_SUM
);
41415 offset
= XEXP (orig_addr
, 1);
41416 new_addr
= gen_rtx_fmt_ee (plus_or_lo_sum
, ptr_mode
, addis_value
, offset
);
41417 new_mem
= replace_equiv_address_nv (orig_mem
, new_addr
, false);
41419 if (extend
!= UNKNOWN
)
41420 new_mem
= gen_rtx_fmt_e (extend
, extend_mode
, new_mem
);
41422 new_mem
= gen_rtx_UNSPEC (extend_mode
, gen_rtvec (1, new_mem
),
41425 set
= gen_rtx_SET (target
, new_mem
);
41426 clobber
= gen_rtx_CLOBBER (VOIDmode
, tmp_reg
);
41427 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
));
41433 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
41437 operands[0] register set with addis
41438 operands[1] value set via addis
41439 operands[2] target D-form memory being stored to
41440 operands[3] register being stored
41442 This is similar to the fusion introduced with power8, except it scales to
41443 both loads/stores and does not require the result register to be the same as
41444 the base register. At the moment, we only do this if register set with addis
41448 expand_fusion_p9_store (rtx
*operands
)
41450 rtx tmp_reg
= operands
[0];
41451 rtx addis_value
= operands
[1];
41452 rtx orig_mem
= operands
[2];
41453 rtx src
= operands
[3];
41454 rtx new_addr
, new_mem
, orig_addr
, offset
, set
, clobber
, insn
, new_src
;
41455 enum rtx_code plus_or_lo_sum
;
41456 machine_mode target_mode
= GET_MODE (orig_mem
);
41457 machine_mode ptr_mode
= Pmode
;
41459 gcc_assert (MEM_P (orig_mem
));
41461 orig_addr
= XEXP (orig_mem
, 0);
41462 plus_or_lo_sum
= GET_CODE (orig_addr
);
41463 gcc_assert (plus_or_lo_sum
== PLUS
|| plus_or_lo_sum
== LO_SUM
);
41465 offset
= XEXP (orig_addr
, 1);
41466 new_addr
= gen_rtx_fmt_ee (plus_or_lo_sum
, ptr_mode
, addis_value
, offset
);
41467 new_mem
= replace_equiv_address_nv (orig_mem
, new_addr
, false);
41469 new_src
= gen_rtx_UNSPEC (target_mode
, gen_rtvec (1, src
),
41472 set
= gen_rtx_SET (new_mem
, new_src
);
41473 clobber
= gen_rtx_CLOBBER (VOIDmode
, tmp_reg
);
41474 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
));
41480 /* Return a string to fuse an addis instruction with a load using extended
41481 fusion. The address that is used is the logical address that was formed
41482 during peephole2: (lo_sum (high) (low-part))
41484 The code is complicated, so we call output_asm_insn directly, and just
41488 emit_fusion_p9_load (rtx reg
, rtx mem
, rtx tmp_reg
)
41490 machine_mode mode
= GET_MODE (reg
);
41494 const char *load_string
;
41497 if (GET_CODE (mem
) == FLOAT_EXTEND
|| GET_CODE (mem
) == ZERO_EXTEND
)
41499 mem
= XEXP (mem
, 0);
41500 mode
= GET_MODE (mem
);
41503 if (GET_CODE (reg
) == SUBREG
)
41505 gcc_assert (SUBREG_BYTE (reg
) == 0);
41506 reg
= SUBREG_REG (reg
);
41510 fatal_insn ("emit_fusion_p9_load, bad reg #1", reg
);
41513 if (FP_REGNO_P (r
))
41515 if (mode
== SFmode
)
41516 load_string
= "lfs";
41517 else if (mode
== DFmode
|| mode
== DImode
)
41518 load_string
= "lfd";
41520 gcc_unreachable ();
41522 else if (ALTIVEC_REGNO_P (r
) && TARGET_P9_DFORM_SCALAR
)
41524 if (mode
== SFmode
)
41525 load_string
= "lxssp";
41526 else if (mode
== DFmode
|| mode
== DImode
)
41527 load_string
= "lxsd";
41529 gcc_unreachable ();
41531 else if (INT_REGNO_P (r
))
41536 load_string
= "lbz";
41539 load_string
= "lhz";
41543 load_string
= "lwz";
41547 if (!TARGET_POWERPC64
)
41548 gcc_unreachable ();
41549 load_string
= "ld";
41552 gcc_unreachable ();
41556 fatal_insn ("emit_fusion_p9_load, bad reg #2", reg
);
41559 fatal_insn ("emit_fusion_p9_load not MEM", mem
);
41561 addr
= XEXP (mem
, 0);
41562 fusion_split_address (addr
, &hi
, &lo
);
41564 /* Emit the addis instruction. */
41565 emit_fusion_addis (tmp_reg
, hi
, "power9 load fusion", GET_MODE_NAME (mode
));
41567 /* Emit the D-form load instruction. */
41568 emit_fusion_load_store (reg
, tmp_reg
, lo
, load_string
);
41573 /* Return a string to fuse an addis instruction with a store using extended
41574 fusion. The address that is used is the logical address that was formed
41575 during peephole2: (lo_sum (high) (low-part))
41577 The code is complicated, so we call output_asm_insn directly, and just
41581 emit_fusion_p9_store (rtx mem
, rtx reg
, rtx tmp_reg
)
41583 machine_mode mode
= GET_MODE (reg
);
41587 const char *store_string
;
41590 if (GET_CODE (reg
) == SUBREG
)
41592 gcc_assert (SUBREG_BYTE (reg
) == 0);
41593 reg
= SUBREG_REG (reg
);
41597 fatal_insn ("emit_fusion_p9_store, bad reg #1", reg
);
41600 if (FP_REGNO_P (r
))
41602 if (mode
== SFmode
)
41603 store_string
= "stfs";
41604 else if (mode
== DFmode
)
41605 store_string
= "stfd";
41607 gcc_unreachable ();
41609 else if (ALTIVEC_REGNO_P (r
) && TARGET_P9_DFORM_SCALAR
)
41611 if (mode
== SFmode
)
41612 store_string
= "stxssp";
41613 else if (mode
== DFmode
|| mode
== DImode
)
41614 store_string
= "stxsd";
41616 gcc_unreachable ();
41618 else if (INT_REGNO_P (r
))
41623 store_string
= "stb";
41626 store_string
= "sth";
41630 store_string
= "stw";
41634 if (!TARGET_POWERPC64
)
41635 gcc_unreachable ();
41636 store_string
= "std";
41639 gcc_unreachable ();
41643 fatal_insn ("emit_fusion_p9_store, bad reg #2", reg
);
41646 fatal_insn ("emit_fusion_p9_store not MEM", mem
);
41648 addr
= XEXP (mem
, 0);
41649 fusion_split_address (addr
, &hi
, &lo
);
41651 /* Emit the addis instruction. */
41652 emit_fusion_addis (tmp_reg
, hi
, "power9 store fusion", GET_MODE_NAME (mode
));
41654 /* Emit the D-form load instruction. */
41655 emit_fusion_load_store (reg
, tmp_reg
, lo
, store_string
);
41661 /* Analyze vector computations and remove unnecessary doubleword
41662 swaps (xxswapdi instructions). This pass is performed only
41663 for little-endian VSX code generation.
41665 For this specific case, loads and stores of 4x32 and 2x64 vectors
41666 are inefficient. These are implemented using the lvx2dx and
41667 stvx2dx instructions, which invert the order of doublewords in
41668 a vector register. Thus the code generation inserts an xxswapdi
41669 after each such load, and prior to each such store. (For spill
41670 code after register assignment, an additional xxswapdi is inserted
41671 following each store in order to return a hard register to its
41674 The extra xxswapdi instructions reduce performance. This can be
41675 particularly bad for vectorized code. The purpose of this pass
41676 is to reduce the number of xxswapdi instructions required for
41679 The primary insight is that much code that operates on vectors
41680 does not care about the relative order of elements in a register,
41681 so long as the correct memory order is preserved. If we have
41682 a computation where all input values are provided by lvxd2x/xxswapdi
41683 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
41684 and all intermediate computations are pure SIMD (independent of
41685 element order), then all the xxswapdi's associated with the loads
41686 and stores may be removed.
41688 This pass uses some of the infrastructure and logical ideas from
41689 the "web" pass in web.c. We create maximal webs of computations
41690 fitting the description above using union-find. Each such web is
41691 then optimized by removing its unnecessary xxswapdi instructions.
41693 The pass is placed prior to global optimization so that we can
41694 perform the optimization in the safest and simplest way possible;
41695 that is, by replacing each xxswapdi insn with a register copy insn.
41696 Subsequent forward propagation will remove copies where possible.
41698 There are some operations sensitive to element order for which we
41699 can still allow the operation, provided we modify those operations.
41700 These include CONST_VECTORs, for which we must swap the first and
41701 second halves of the constant vector; and SUBREGs, for which we
41702 must adjust the byte offset to account for the swapped doublewords.
41703 A remaining opportunity would be non-immediate-form splats, for
41704 which we should adjust the selected lane of the input. We should
41705 also make code generation adjustments for sum-across operations,
41706 since this is a common vectorizer reduction.
41708 Because we run prior to the first split, we can see loads and stores
41709 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
41710 vector loads and stores that have not yet been split into a permuting
41711 load/store and a swap. (One way this can happen is with a builtin
41712 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
41713 than deleting a swap, we convert the load/store into a permuting
41714 load/store (which effectively removes the swap). */
41716 /* Notes on Permutes
41718 We do not currently handle computations that contain permutes. There
41719 is a general transformation that can be performed correctly, but it
41720 may introduce more expensive code than it replaces. To handle these
41721 would require a cost model to determine when to perform the optimization.
41722 This commentary records how this could be done if desired.
41724 The most general permute is something like this (example for V16QI):
41726 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
41727 (parallel [(const_int a0) (const_int a1)
41729 (const_int a14) (const_int a15)]))
41731 where a0,...,a15 are in [0,31] and select elements from op1 and op2
41732 to produce in the result.
41734 Regardless of mode, we can convert the PARALLEL to a mask of 16
41735 byte-element selectors. Let's call this M, with M[i] representing
41736 the ith byte-element selector value. Then if we swap doublewords
41737 throughout the computation, we can get correct behavior by replacing
41738 M with M' as follows:
41740 M'[i] = { (M[i]+8)%16 : M[i] in [0,15]
41741 { ((M[i]+8)%16)+16 : M[i] in [16,31]
41743 This seems promising at first, since we are just replacing one mask
41744 with another. But certain masks are preferable to others. If M
41745 is a mask that matches a vmrghh pattern, for example, M' certainly
41746 will not. Instead of a single vmrghh, we would generate a load of
41747 M' and a vperm. So we would need to know how many xxswapd's we can
41748 remove as a result of this transformation to determine if it's
41749 profitable; and preferably the logic would need to be aware of all
41750 the special preferable masks.
41752 Another form of permute is an UNSPEC_VPERM, in which the mask is
41753 already in a register. In some cases, this mask may be a constant
41754 that we can discover with ud-chains, in which case the above
41755 transformation is ok. However, the common usage here is for the
41756 mask to be produced by an UNSPEC_LVSL, in which case the mask
41757 cannot be known at compile time. In such a case we would have to
41758 generate several instructions to compute M' as above at run time,
41759 and a cost model is needed again.
41761 However, when the mask M for an UNSPEC_VPERM is loaded from the
41762 constant pool, we can replace M with M' as above at no cost
41763 beyond adding a constant pool entry. */
41765 /* This is based on the union-find logic in web.c. web_entry_base is
41766 defined in df.h. */
41767 class swap_web_entry
: public web_entry_base
41770 /* Pointer to the insn. */
41772 /* Set if insn contains a mention of a vector register. All other
41773 fields are undefined if this field is unset. */
41774 unsigned int is_relevant
: 1;
41775 /* Set if insn is a load. */
41776 unsigned int is_load
: 1;
41777 /* Set if insn is a store. */
41778 unsigned int is_store
: 1;
41779 /* Set if insn is a doubleword swap. This can either be a register swap
41780 or a permuting load or store (test is_load and is_store for this). */
41781 unsigned int is_swap
: 1;
41782 /* Set if the insn has a live-in use of a parameter register. */
41783 unsigned int is_live_in
: 1;
41784 /* Set if the insn has a live-out def of a return register. */
41785 unsigned int is_live_out
: 1;
41786 /* Set if the insn contains a subreg reference of a vector register. */
41787 unsigned int contains_subreg
: 1;
41788 /* Set if the insn contains a 128-bit integer operand. */
41789 unsigned int is_128_int
: 1;
41790 /* Set if this is a call-insn. */
41791 unsigned int is_call
: 1;
41792 /* Set if this insn does not perform a vector operation for which
41793 element order matters, or if we know how to fix it up if it does.
41794 Undefined if is_swap is set. */
41795 unsigned int is_swappable
: 1;
41796 /* A nonzero value indicates what kind of special handling for this
41797 insn is required if doublewords are swapped. Undefined if
41798 is_swappable is not set. */
41799 unsigned int special_handling
: 4;
41800 /* Set if the web represented by this entry cannot be optimized. */
41801 unsigned int web_not_optimizable
: 1;
41802 /* Set if this insn should be deleted. */
41803 unsigned int will_delete
: 1;
41806 enum special_handling_values
{
41819 /* Union INSN with all insns containing definitions that reach USE.
41820 Detect whether USE is live-in to the current function. */
41822 union_defs (swap_web_entry
*insn_entry
, rtx insn
, df_ref use
)
41824 struct df_link
*link
= DF_REF_CHAIN (use
);
41827 insn_entry
[INSN_UID (insn
)].is_live_in
= 1;
41831 if (DF_REF_IS_ARTIFICIAL (link
->ref
))
41832 insn_entry
[INSN_UID (insn
)].is_live_in
= 1;
41834 if (DF_REF_INSN_INFO (link
->ref
))
41836 rtx def_insn
= DF_REF_INSN (link
->ref
);
41837 (void)unionfind_union (insn_entry
+ INSN_UID (insn
),
41838 insn_entry
+ INSN_UID (def_insn
));
41845 /* Union INSN with all insns containing uses reached from DEF.
41846 Detect whether DEF is live-out from the current function. */
41848 union_uses (swap_web_entry
*insn_entry
, rtx insn
, df_ref def
)
41850 struct df_link
*link
= DF_REF_CHAIN (def
);
41853 insn_entry
[INSN_UID (insn
)].is_live_out
= 1;
41857 /* This could be an eh use or some other artificial use;
41858 we treat these all the same (killing the optimization). */
41859 if (DF_REF_IS_ARTIFICIAL (link
->ref
))
41860 insn_entry
[INSN_UID (insn
)].is_live_out
= 1;
41862 if (DF_REF_INSN_INFO (link
->ref
))
41864 rtx use_insn
= DF_REF_INSN (link
->ref
);
41865 (void)unionfind_union (insn_entry
+ INSN_UID (insn
),
41866 insn_entry
+ INSN_UID (use_insn
));
41873 /* Return 1 iff INSN is a load insn, including permuting loads that
41874 represent an lvxd2x instruction; else return 0. */
41875 static unsigned int
41876 insn_is_load_p (rtx insn
)
41878 rtx body
= PATTERN (insn
);
41880 if (GET_CODE (body
) == SET
)
41882 if (GET_CODE (SET_SRC (body
)) == MEM
)
41885 if (GET_CODE (SET_SRC (body
)) == VEC_SELECT
41886 && GET_CODE (XEXP (SET_SRC (body
), 0)) == MEM
)
41892 if (GET_CODE (body
) != PARALLEL
)
41895 rtx set
= XVECEXP (body
, 0, 0);
41897 if (GET_CODE (set
) == SET
&& GET_CODE (SET_SRC (set
)) == MEM
)
41903 /* Return 1 iff INSN is a store insn, including permuting stores that
41904 represent an stvxd2x instruction; else return 0. */
41905 static unsigned int
41906 insn_is_store_p (rtx insn
)
41908 rtx body
= PATTERN (insn
);
41909 if (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == MEM
)
41911 if (GET_CODE (body
) != PARALLEL
)
41913 rtx set
= XVECEXP (body
, 0, 0);
41914 if (GET_CODE (set
) == SET
&& GET_CODE (SET_DEST (set
)) == MEM
)
41919 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
41920 a permuting load, or a permuting store. */
41921 static unsigned int
41922 insn_is_swap_p (rtx insn
)
41924 rtx body
= PATTERN (insn
);
41925 if (GET_CODE (body
) != SET
)
41927 rtx rhs
= SET_SRC (body
);
41928 if (GET_CODE (rhs
) != VEC_SELECT
)
41930 rtx parallel
= XEXP (rhs
, 1);
41931 if (GET_CODE (parallel
) != PARALLEL
)
41933 unsigned int len
= XVECLEN (parallel
, 0);
41934 if (len
!= 2 && len
!= 4 && len
!= 8 && len
!= 16)
41936 for (unsigned int i
= 0; i
< len
/ 2; ++i
)
41938 rtx op
= XVECEXP (parallel
, 0, i
);
41939 if (GET_CODE (op
) != CONST_INT
|| INTVAL (op
) != len
/ 2 + i
)
41942 for (unsigned int i
= len
/ 2; i
< len
; ++i
)
41944 rtx op
= XVECEXP (parallel
, 0, i
);
41945 if (GET_CODE (op
) != CONST_INT
|| INTVAL (op
) != i
- len
/ 2)
41951 /* Return TRUE if insn is a swap fed by a load from the constant pool. */
41953 const_load_sequence_p (swap_web_entry
*insn_entry
, rtx insn
)
41955 unsigned uid
= INSN_UID (insn
);
41956 if (!insn_entry
[uid
].is_swap
|| insn_entry
[uid
].is_load
)
41959 /* Find the unique use in the swap and locate its def. If the def
41960 isn't unique, punt. */
41961 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
41963 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
41965 struct df_link
*def_link
= DF_REF_CHAIN (use
);
41966 if (!def_link
|| def_link
->next
)
41969 rtx def_insn
= DF_REF_INSN (def_link
->ref
);
41970 unsigned uid2
= INSN_UID (def_insn
);
41971 if (!insn_entry
[uid2
].is_load
|| !insn_entry
[uid2
].is_swap
)
41974 rtx body
= PATTERN (def_insn
);
41975 if (GET_CODE (body
) != SET
41976 || GET_CODE (SET_SRC (body
)) != VEC_SELECT
41977 || GET_CODE (XEXP (SET_SRC (body
), 0)) != MEM
)
41980 rtx mem
= XEXP (SET_SRC (body
), 0);
41981 rtx base_reg
= XEXP (mem
, 0);
41984 insn_info
= DF_INSN_INFO_GET (def_insn
);
41985 FOR_EACH_INSN_INFO_USE (base_use
, insn_info
)
41987 if (!rtx_equal_p (DF_REF_REG (base_use
), base_reg
))
41990 struct df_link
*base_def_link
= DF_REF_CHAIN (base_use
);
41991 if (!base_def_link
|| base_def_link
->next
)
41994 rtx tocrel_insn
= DF_REF_INSN (base_def_link
->ref
);
41995 rtx tocrel_body
= PATTERN (tocrel_insn
);
41997 if (GET_CODE (tocrel_body
) != SET
)
41999 /* There is an extra level of indirection for small/large
42001 rtx tocrel_expr
= SET_SRC (tocrel_body
);
42002 if (GET_CODE (tocrel_expr
) == MEM
)
42003 tocrel_expr
= XEXP (tocrel_expr
, 0);
42004 if (!toc_relative_expr_p (tocrel_expr
, false))
42006 split_const (XVECEXP (tocrel_base
, 0, 0), &base
, &offset
);
42007 if (GET_CODE (base
) != SYMBOL_REF
|| !CONSTANT_POOL_ADDRESS_P (base
))
42014 /* Return TRUE iff OP matches a V2DF reduction pattern. See the
42015 definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md. */
42017 v2df_reduction_p (rtx op
)
42019 if (GET_MODE (op
) != V2DFmode
)
42022 enum rtx_code code
= GET_CODE (op
);
42023 if (code
!= PLUS
&& code
!= SMIN
&& code
!= SMAX
)
42026 rtx concat
= XEXP (op
, 0);
42027 if (GET_CODE (concat
) != VEC_CONCAT
)
42030 rtx select0
= XEXP (concat
, 0);
42031 rtx select1
= XEXP (concat
, 1);
42032 if (GET_CODE (select0
) != VEC_SELECT
|| GET_CODE (select1
) != VEC_SELECT
)
42035 rtx reg0
= XEXP (select0
, 0);
42036 rtx reg1
= XEXP (select1
, 0);
42037 if (!rtx_equal_p (reg0
, reg1
) || !REG_P (reg0
))
42040 rtx parallel0
= XEXP (select0
, 1);
42041 rtx parallel1
= XEXP (select1
, 1);
42042 if (GET_CODE (parallel0
) != PARALLEL
|| GET_CODE (parallel1
) != PARALLEL
)
42045 if (!rtx_equal_p (XVECEXP (parallel0
, 0, 0), const1_rtx
)
42046 || !rtx_equal_p (XVECEXP (parallel1
, 0, 0), const0_rtx
))
42052 /* Return 1 iff OP is an operand that will not be affected by having
42053 vector doublewords swapped in memory. */
42054 static unsigned int
42055 rtx_is_swappable_p (rtx op
, unsigned int *special
)
42057 enum rtx_code code
= GET_CODE (op
);
42076 *special
= SH_CONST_VECTOR
;
42080 case VEC_DUPLICATE
:
42081 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
42082 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
42083 it represents a vector splat for which we can do special
42085 if (GET_CODE (XEXP (op
, 0)) == CONST_INT
)
42087 else if (REG_P (XEXP (op
, 0))
42088 && GET_MODE_INNER (GET_MODE (op
)) == GET_MODE (XEXP (op
, 0)))
42089 /* This catches V2DF and V2DI splat, at a minimum. */
42091 else if (GET_CODE (XEXP (op
, 0)) == TRUNCATE
42092 && REG_P (XEXP (XEXP (op
, 0), 0))
42093 && GET_MODE_INNER (GET_MODE (op
)) == GET_MODE (XEXP (op
, 0)))
42094 /* This catches splat of a truncated value. */
42096 else if (GET_CODE (XEXP (op
, 0)) == VEC_SELECT
)
42097 /* If the duplicated item is from a select, defer to the select
42098 processing to see if we can change the lane for the splat. */
42099 return rtx_is_swappable_p (XEXP (op
, 0), special
);
42104 /* A vec_extract operation is ok if we change the lane. */
42105 if (GET_CODE (XEXP (op
, 0)) == REG
42106 && GET_MODE_INNER (GET_MODE (XEXP (op
, 0))) == GET_MODE (op
)
42107 && GET_CODE ((parallel
= XEXP (op
, 1))) == PARALLEL
42108 && XVECLEN (parallel
, 0) == 1
42109 && GET_CODE (XVECEXP (parallel
, 0, 0)) == CONST_INT
)
42111 *special
= SH_EXTRACT
;
42114 /* An XXPERMDI is ok if we adjust the lanes. Note that if the
42115 XXPERMDI is a swap operation, it will be identified by
42116 insn_is_swap_p and therefore we won't get here. */
42117 else if (GET_CODE (XEXP (op
, 0)) == VEC_CONCAT
42118 && (GET_MODE (XEXP (op
, 0)) == V4DFmode
42119 || GET_MODE (XEXP (op
, 0)) == V4DImode
)
42120 && GET_CODE ((parallel
= XEXP (op
, 1))) == PARALLEL
42121 && XVECLEN (parallel
, 0) == 2
42122 && GET_CODE (XVECEXP (parallel
, 0, 0)) == CONST_INT
42123 && GET_CODE (XVECEXP (parallel
, 0, 1)) == CONST_INT
)
42125 *special
= SH_XXPERMDI
;
42128 else if (v2df_reduction_p (op
))
42135 /* Various operations are unsafe for this optimization, at least
42136 without significant additional work. Permutes are obviously
42137 problematic, as both the permute control vector and the ordering
42138 of the target values are invalidated by doubleword swapping.
42139 Vector pack and unpack modify the number of vector lanes.
42140 Merge-high/low will not operate correctly on swapped operands.
42141 Vector shifts across element boundaries are clearly uncool,
42142 as are vector select and concatenate operations. Vector
42143 sum-across instructions define one operand with a specific
42144 order-dependent element, so additional fixup code would be
42145 needed to make those work. Vector set and non-immediate-form
42146 vector splat are element-order sensitive. A few of these
42147 cases might be workable with special handling if required.
42148 Adding cost modeling would be appropriate in some cases. */
42149 int val
= XINT (op
, 1);
42154 case UNSPEC_VMRGH_DIRECT
:
42155 case UNSPEC_VMRGL_DIRECT
:
42156 case UNSPEC_VPACK_SIGN_SIGN_SAT
:
42157 case UNSPEC_VPACK_SIGN_UNS_SAT
:
42158 case UNSPEC_VPACK_UNS_UNS_MOD
:
42159 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT
:
42160 case UNSPEC_VPACK_UNS_UNS_SAT
:
42162 case UNSPEC_VPERM_UNS
:
42163 case UNSPEC_VPERMHI
:
42164 case UNSPEC_VPERMSI
:
42166 case UNSPEC_VSLDOI
:
42169 case UNSPEC_VSUM2SWS
:
42170 case UNSPEC_VSUM4S
:
42171 case UNSPEC_VSUM4UBS
:
42172 case UNSPEC_VSUMSWS
:
42173 case UNSPEC_VSUMSWS_DIRECT
:
42174 case UNSPEC_VSX_CONCAT
:
42175 case UNSPEC_VSX_SET
:
42176 case UNSPEC_VSX_SLDWI
:
42177 case UNSPEC_VUNPACK_HI_SIGN
:
42178 case UNSPEC_VUNPACK_HI_SIGN_DIRECT
:
42179 case UNSPEC_VUNPACK_LO_SIGN
:
42180 case UNSPEC_VUNPACK_LO_SIGN_DIRECT
:
42181 case UNSPEC_VUPKHPX
:
42182 case UNSPEC_VUPKHS_V4SF
:
42183 case UNSPEC_VUPKHU_V4SF
:
42184 case UNSPEC_VUPKLPX
:
42185 case UNSPEC_VUPKLS_V4SF
:
42186 case UNSPEC_VUPKLU_V4SF
:
42187 case UNSPEC_VSX_CVDPSPN
:
42188 case UNSPEC_VSX_CVSPDP
:
42189 case UNSPEC_VSX_CVSPDPN
:
42190 case UNSPEC_VSX_EXTRACT
:
42191 case UNSPEC_VSX_VSLO
:
42192 case UNSPEC_VSX_VEC_INIT
:
42194 case UNSPEC_VSPLT_DIRECT
:
42195 case UNSPEC_VSX_XXSPLTD
:
42196 *special
= SH_SPLAT
;
42198 case UNSPEC_REDUC_PLUS
:
42208 const char *fmt
= GET_RTX_FORMAT (code
);
42211 for (i
= 0; i
< GET_RTX_LENGTH (code
); ++i
)
42212 if (fmt
[i
] == 'e' || fmt
[i
] == 'u')
42214 unsigned int special_op
= SH_NONE
;
42215 ok
&= rtx_is_swappable_p (XEXP (op
, i
), &special_op
);
42216 if (special_op
== SH_NONE
)
42218 /* Ensure we never have two kinds of special handling
42219 for the same insn. */
42220 if (*special
!= SH_NONE
&& *special
!= special_op
)
42222 *special
= special_op
;
42224 else if (fmt
[i
] == 'E')
42225 for (j
= 0; j
< XVECLEN (op
, i
); ++j
)
42227 unsigned int special_op
= SH_NONE
;
42228 ok
&= rtx_is_swappable_p (XVECEXP (op
, i
, j
), &special_op
);
42229 if (special_op
== SH_NONE
)
42231 /* Ensure we never have two kinds of special handling
42232 for the same insn. */
42233 if (*special
!= SH_NONE
&& *special
!= special_op
)
42235 *special
= special_op
;
42241 /* Return 1 iff INSN is an operand that will not be affected by
42242 having vector doublewords swapped in memory (in which case
42243 *SPECIAL is unchanged), or that can be modified to be correct
42244 if vector doublewords are swapped in memory (in which case
42245 *SPECIAL is changed to a value indicating how). */
42246 static unsigned int
42247 insn_is_swappable_p (swap_web_entry
*insn_entry
, rtx insn
,
42248 unsigned int *special
)
42250 /* Calls are always bad. */
42251 if (GET_CODE (insn
) == CALL_INSN
)
42254 /* Loads and stores seen here are not permuting, but we can still
42255 fix them up by converting them to permuting ones. Exceptions:
42256 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
42257 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
42258 for the SET source. Also we must now make an exception for lvx
42259 and stvx when they are not in the UNSPEC_LVX/STVX form (with the
42260 explicit "& -16") since this leads to unrecognizable insns. */
42261 rtx body
= PATTERN (insn
);
42262 int i
= INSN_UID (insn
);
42264 if (insn_entry
[i
].is_load
)
42266 if (GET_CODE (body
) == SET
)
42268 rtx rhs
= SET_SRC (body
);
42269 /* Even without a swap, the RHS might be a vec_select for, say,
42270 a byte-reversing load. */
42271 if (GET_CODE (rhs
) != MEM
)
42273 if (GET_CODE (XEXP (rhs
, 0)) == AND
)
42276 *special
= SH_NOSWAP_LD
;
42283 if (insn_entry
[i
].is_store
)
42285 if (GET_CODE (body
) == SET
42286 && GET_CODE (SET_SRC (body
)) != UNSPEC
)
42288 rtx lhs
= SET_DEST (body
);
42289 /* Even without a swap, the LHS might be a vec_select for, say,
42290 a byte-reversing store. */
42291 if (GET_CODE (lhs
) != MEM
)
42293 if (GET_CODE (XEXP (lhs
, 0)) == AND
)
42296 *special
= SH_NOSWAP_ST
;
42303 /* A convert to single precision can be left as is provided that
42304 all of its uses are in xxspltw instructions that splat BE element
42306 if (GET_CODE (body
) == SET
42307 && GET_CODE (SET_SRC (body
)) == UNSPEC
42308 && XINT (SET_SRC (body
), 1) == UNSPEC_VSX_CVDPSPN
)
42311 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
42313 FOR_EACH_INSN_INFO_DEF (def
, insn_info
)
42315 struct df_link
*link
= DF_REF_CHAIN (def
);
42319 for (; link
; link
= link
->next
) {
42320 rtx use_insn
= DF_REF_INSN (link
->ref
);
42321 rtx use_body
= PATTERN (use_insn
);
42322 if (GET_CODE (use_body
) != SET
42323 || GET_CODE (SET_SRC (use_body
)) != UNSPEC
42324 || XINT (SET_SRC (use_body
), 1) != UNSPEC_VSX_XXSPLTW
42325 || XVECEXP (SET_SRC (use_body
), 0, 1) != const0_rtx
)
42333 /* A concatenation of two doublewords is ok if we reverse the
42334 order of the inputs. */
42335 if (GET_CODE (body
) == SET
42336 && GET_CODE (SET_SRC (body
)) == VEC_CONCAT
42337 && (GET_MODE (SET_SRC (body
)) == V2DFmode
42338 || GET_MODE (SET_SRC (body
)) == V2DImode
))
42340 *special
= SH_CONCAT
;
42344 /* V2DF reductions are always swappable. */
42345 if (GET_CODE (body
) == PARALLEL
)
42347 rtx expr
= XVECEXP (body
, 0, 0);
42348 if (GET_CODE (expr
) == SET
42349 && v2df_reduction_p (SET_SRC (expr
)))
42353 /* An UNSPEC_VPERM is ok if the mask operand is loaded from the
42355 if (GET_CODE (body
) == SET
42356 && GET_CODE (SET_SRC (body
)) == UNSPEC
42357 && XINT (SET_SRC (body
), 1) == UNSPEC_VPERM
42358 && XVECLEN (SET_SRC (body
), 0) == 3
42359 && GET_CODE (XVECEXP (SET_SRC (body
), 0, 2)) == REG
)
42361 rtx mask_reg
= XVECEXP (SET_SRC (body
), 0, 2);
42362 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
42364 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
42365 if (rtx_equal_p (DF_REF_REG (use
), mask_reg
))
42367 struct df_link
*def_link
= DF_REF_CHAIN (use
);
42368 /* Punt if multiple definitions for this reg. */
42369 if (def_link
&& !def_link
->next
&&
42370 const_load_sequence_p (insn_entry
,
42371 DF_REF_INSN (def_link
->ref
)))
42373 *special
= SH_VPERM
;
42379 /* Otherwise check the operands for vector lane violations. */
42380 return rtx_is_swappable_p (body
, special
);
42383 enum chain_purpose
{ FOR_LOADS
, FOR_STORES
};
42385 /* Return true if the UD or DU chain headed by LINK is non-empty,
42386 and every entry on the chain references an insn that is a
42387 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
42388 register swap must have only permuting loads as reaching defs.
42389 If PURPOSE is FOR_STORES, each such register swap must have only
42390 register swaps or permuting stores as reached uses. */
42392 chain_contains_only_swaps (swap_web_entry
*insn_entry
, struct df_link
*link
,
42393 enum chain_purpose purpose
)
42398 for (; link
; link
= link
->next
)
42400 if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link
->ref
))))
42403 if (DF_REF_IS_ARTIFICIAL (link
->ref
))
42406 rtx reached_insn
= DF_REF_INSN (link
->ref
);
42407 unsigned uid
= INSN_UID (reached_insn
);
42408 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (reached_insn
);
42410 if (!insn_entry
[uid
].is_swap
|| insn_entry
[uid
].is_load
42411 || insn_entry
[uid
].is_store
)
42414 if (purpose
== FOR_LOADS
)
42417 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
42419 struct df_link
*swap_link
= DF_REF_CHAIN (use
);
42423 if (DF_REF_IS_ARTIFICIAL (link
->ref
))
42426 rtx swap_def_insn
= DF_REF_INSN (swap_link
->ref
);
42427 unsigned uid2
= INSN_UID (swap_def_insn
);
42429 /* Only permuting loads are allowed. */
42430 if (!insn_entry
[uid2
].is_swap
|| !insn_entry
[uid2
].is_load
)
42433 swap_link
= swap_link
->next
;
42437 else if (purpose
== FOR_STORES
)
42440 FOR_EACH_INSN_INFO_DEF (def
, insn_info
)
42442 struct df_link
*swap_link
= DF_REF_CHAIN (def
);
42446 if (DF_REF_IS_ARTIFICIAL (link
->ref
))
42449 rtx swap_use_insn
= DF_REF_INSN (swap_link
->ref
);
42450 unsigned uid2
= INSN_UID (swap_use_insn
);
42452 /* Permuting stores or register swaps are allowed. */
42453 if (!insn_entry
[uid2
].is_swap
|| insn_entry
[uid2
].is_load
)
42456 swap_link
= swap_link
->next
;
42465 /* Mark the xxswapdi instructions associated with permuting loads and
42466 stores for removal. Note that we only flag them for deletion here,
42467 as there is a possibility of a swap being reached from multiple
42470 mark_swaps_for_removal (swap_web_entry
*insn_entry
, unsigned int i
)
42472 rtx insn
= insn_entry
[i
].insn
;
42473 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
42475 if (insn_entry
[i
].is_load
)
42478 FOR_EACH_INSN_INFO_DEF (def
, insn_info
)
42480 struct df_link
*link
= DF_REF_CHAIN (def
);
42482 /* We know by now that these are swaps, so we can delete
42483 them confidently. */
42486 rtx use_insn
= DF_REF_INSN (link
->ref
);
42487 insn_entry
[INSN_UID (use_insn
)].will_delete
= 1;
42492 else if (insn_entry
[i
].is_store
)
42495 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
42497 /* Ignore uses for addressability. */
42498 machine_mode mode
= GET_MODE (DF_REF_REG (use
));
42499 if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode
))
42502 struct df_link
*link
= DF_REF_CHAIN (use
);
42504 /* We know by now that these are swaps, so we can delete
42505 them confidently. */
42508 rtx def_insn
= DF_REF_INSN (link
->ref
);
42509 insn_entry
[INSN_UID (def_insn
)].will_delete
= 1;
42516 /* OP is either a CONST_VECTOR or an expression containing one.
42517 Swap the first half of the vector with the second in the first
42518 case. Recurse to find it in the second. */
42520 swap_const_vector_halves (rtx op
)
42523 enum rtx_code code
= GET_CODE (op
);
42524 if (GET_CODE (op
) == CONST_VECTOR
)
42526 int half_units
= GET_MODE_NUNITS (GET_MODE (op
)) / 2;
42527 for (i
= 0; i
< half_units
; ++i
)
42529 rtx temp
= CONST_VECTOR_ELT (op
, i
);
42530 CONST_VECTOR_ELT (op
, i
) = CONST_VECTOR_ELT (op
, i
+ half_units
);
42531 CONST_VECTOR_ELT (op
, i
+ half_units
) = temp
;
42537 const char *fmt
= GET_RTX_FORMAT (code
);
42538 for (i
= 0; i
< GET_RTX_LENGTH (code
); ++i
)
42539 if (fmt
[i
] == 'e' || fmt
[i
] == 'u')
42540 swap_const_vector_halves (XEXP (op
, i
));
42541 else if (fmt
[i
] == 'E')
42542 for (j
= 0; j
< XVECLEN (op
, i
); ++j
)
42543 swap_const_vector_halves (XVECEXP (op
, i
, j
));
42547 /* Find all subregs of a vector expression that perform a narrowing,
42548 and adjust the subreg index to account for doubleword swapping. */
42550 adjust_subreg_index (rtx op
)
42552 enum rtx_code code
= GET_CODE (op
);
42554 && (GET_MODE_SIZE (GET_MODE (op
))
42555 < GET_MODE_SIZE (GET_MODE (XEXP (op
, 0)))))
42557 unsigned int index
= SUBREG_BYTE (op
);
42562 SUBREG_BYTE (op
) = index
;
42565 const char *fmt
= GET_RTX_FORMAT (code
);
42567 for (i
= 0; i
< GET_RTX_LENGTH (code
); ++i
)
42568 if (fmt
[i
] == 'e' || fmt
[i
] == 'u')
42569 adjust_subreg_index (XEXP (op
, i
));
42570 else if (fmt
[i
] == 'E')
42571 for (j
= 0; j
< XVECLEN (op
, i
); ++j
)
42572 adjust_subreg_index (XVECEXP (op
, i
, j
));
42575 /* Convert the non-permuting load INSN to a permuting one. */
42577 permute_load (rtx_insn
*insn
)
42579 rtx body
= PATTERN (insn
);
42580 rtx mem_op
= SET_SRC (body
);
42581 rtx tgt_reg
= SET_DEST (body
);
42582 machine_mode mode
= GET_MODE (tgt_reg
);
42583 int n_elts
= GET_MODE_NUNITS (mode
);
42584 int half_elts
= n_elts
/ 2;
42585 rtx par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (n_elts
));
42587 for (i
= 0, j
= half_elts
; i
< half_elts
; ++i
, ++j
)
42588 XVECEXP (par
, 0, i
) = GEN_INT (j
);
42589 for (i
= half_elts
, j
= 0; j
< half_elts
; ++i
, ++j
)
42590 XVECEXP (par
, 0, i
) = GEN_INT (j
);
42591 rtx sel
= gen_rtx_VEC_SELECT (mode
, mem_op
, par
);
42592 SET_SRC (body
) = sel
;
42593 INSN_CODE (insn
) = -1; /* Force re-recognition. */
42594 df_insn_rescan (insn
);
42597 fprintf (dump_file
, "Replacing load %d with permuted load\n",
42601 /* Convert the non-permuting store INSN to a permuting one. */
42603 permute_store (rtx_insn
*insn
)
42605 rtx body
= PATTERN (insn
);
42606 rtx src_reg
= SET_SRC (body
);
42607 machine_mode mode
= GET_MODE (src_reg
);
42608 int n_elts
= GET_MODE_NUNITS (mode
);
42609 int half_elts
= n_elts
/ 2;
42610 rtx par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (n_elts
));
42612 for (i
= 0, j
= half_elts
; i
< half_elts
; ++i
, ++j
)
42613 XVECEXP (par
, 0, i
) = GEN_INT (j
);
42614 for (i
= half_elts
, j
= 0; j
< half_elts
; ++i
, ++j
)
42615 XVECEXP (par
, 0, i
) = GEN_INT (j
);
42616 rtx sel
= gen_rtx_VEC_SELECT (mode
, src_reg
, par
);
42617 SET_SRC (body
) = sel
;
42618 INSN_CODE (insn
) = -1; /* Force re-recognition. */
42619 df_insn_rescan (insn
);
42622 fprintf (dump_file
, "Replacing store %d with permuted store\n",
42626 /* Given OP that contains a vector extract operation, adjust the index
42627 of the extracted lane to account for the doubleword swap. */
42629 adjust_extract (rtx_insn
*insn
)
42631 rtx pattern
= PATTERN (insn
);
42632 if (GET_CODE (pattern
) == PARALLEL
)
42633 pattern
= XVECEXP (pattern
, 0, 0);
42634 rtx src
= SET_SRC (pattern
);
42635 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
42636 account for that. */
42637 rtx sel
= GET_CODE (src
) == VEC_DUPLICATE
? XEXP (src
, 0) : src
;
42638 rtx par
= XEXP (sel
, 1);
42639 int half_elts
= GET_MODE_NUNITS (GET_MODE (XEXP (sel
, 0))) >> 1;
42640 int lane
= INTVAL (XVECEXP (par
, 0, 0));
42641 lane
= lane
>= half_elts
? lane
- half_elts
: lane
+ half_elts
;
42642 XVECEXP (par
, 0, 0) = GEN_INT (lane
);
42643 INSN_CODE (insn
) = -1; /* Force re-recognition. */
42644 df_insn_rescan (insn
);
42647 fprintf (dump_file
, "Changing lane for extract %d\n", INSN_UID (insn
));
42650 /* Given OP that contains a vector direct-splat operation, adjust the index
42651 of the source lane to account for the doubleword swap. */
42653 adjust_splat (rtx_insn
*insn
)
42655 rtx body
= PATTERN (insn
);
42656 rtx unspec
= XEXP (body
, 1);
42657 int half_elts
= GET_MODE_NUNITS (GET_MODE (unspec
)) >> 1;
42658 int lane
= INTVAL (XVECEXP (unspec
, 0, 1));
42659 lane
= lane
>= half_elts
? lane
- half_elts
: lane
+ half_elts
;
42660 XVECEXP (unspec
, 0, 1) = GEN_INT (lane
);
42661 INSN_CODE (insn
) = -1; /* Force re-recognition. */
42662 df_insn_rescan (insn
);
42665 fprintf (dump_file
, "Changing lane for splat %d\n", INSN_UID (insn
));
42668 /* Given OP that contains an XXPERMDI operation (that is not a doubleword
42669 swap), reverse the order of the source operands and adjust the indices
42670 of the source lanes to account for doubleword reversal. */
42672 adjust_xxpermdi (rtx_insn
*insn
)
42674 rtx set
= PATTERN (insn
);
42675 rtx select
= XEXP (set
, 1);
42676 rtx concat
= XEXP (select
, 0);
42677 rtx src0
= XEXP (concat
, 0);
42678 XEXP (concat
, 0) = XEXP (concat
, 1);
42679 XEXP (concat
, 1) = src0
;
42680 rtx parallel
= XEXP (select
, 1);
42681 int lane0
= INTVAL (XVECEXP (parallel
, 0, 0));
42682 int lane1
= INTVAL (XVECEXP (parallel
, 0, 1));
42683 int new_lane0
= 3 - lane1
;
42684 int new_lane1
= 3 - lane0
;
42685 XVECEXP (parallel
, 0, 0) = GEN_INT (new_lane0
);
42686 XVECEXP (parallel
, 0, 1) = GEN_INT (new_lane1
);
42687 INSN_CODE (insn
) = -1; /* Force re-recognition. */
42688 df_insn_rescan (insn
);
42691 fprintf (dump_file
, "Changing lanes for xxpermdi %d\n", INSN_UID (insn
));
42694 /* Given OP that contains a VEC_CONCAT operation of two doublewords,
42695 reverse the order of those inputs. */
42697 adjust_concat (rtx_insn
*insn
)
42699 rtx set
= PATTERN (insn
);
42700 rtx concat
= XEXP (set
, 1);
42701 rtx src0
= XEXP (concat
, 0);
42702 XEXP (concat
, 0) = XEXP (concat
, 1);
42703 XEXP (concat
, 1) = src0
;
42704 INSN_CODE (insn
) = -1; /* Force re-recognition. */
42705 df_insn_rescan (insn
);
42708 fprintf (dump_file
, "Reversing inputs for concat %d\n", INSN_UID (insn
));
42711 /* Given an UNSPEC_VPERM insn, modify the mask loaded from the
42712 constant pool to reflect swapped doublewords. */
42714 adjust_vperm (rtx_insn
*insn
)
42716 /* We previously determined that the UNSPEC_VPERM was fed by a
42717 swap of a swapping load of a TOC-relative constant pool symbol.
42718 Find the MEM in the swapping load and replace it with a MEM for
42719 the adjusted mask constant. */
42720 rtx set
= PATTERN (insn
);
42721 rtx mask_reg
= XVECEXP (SET_SRC (set
), 0, 2);
42723 /* Find the swap. */
42724 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
42726 rtx_insn
*swap_insn
= 0;
42727 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
42728 if (rtx_equal_p (DF_REF_REG (use
), mask_reg
))
42730 struct df_link
*def_link
= DF_REF_CHAIN (use
);
42731 gcc_assert (def_link
&& !def_link
->next
);
42732 swap_insn
= DF_REF_INSN (def_link
->ref
);
42735 gcc_assert (swap_insn
);
42737 /* Find the load. */
42738 insn_info
= DF_INSN_INFO_GET (swap_insn
);
42739 rtx_insn
*load_insn
= 0;
42740 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
42742 struct df_link
*def_link
= DF_REF_CHAIN (use
);
42743 gcc_assert (def_link
&& !def_link
->next
);
42744 load_insn
= DF_REF_INSN (def_link
->ref
);
42747 gcc_assert (load_insn
);
42749 /* Find the TOC-relative symbol access. */
42750 insn_info
= DF_INSN_INFO_GET (load_insn
);
42751 rtx_insn
*tocrel_insn
= 0;
42752 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
42754 struct df_link
*def_link
= DF_REF_CHAIN (use
);
42755 gcc_assert (def_link
&& !def_link
->next
);
42756 tocrel_insn
= DF_REF_INSN (def_link
->ref
);
42759 gcc_assert (tocrel_insn
);
42761 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
42762 to set tocrel_base; otherwise it would be unnecessary as we've
42763 already established it will return true. */
42765 rtx tocrel_expr
= SET_SRC (PATTERN (tocrel_insn
));
42766 /* There is an extra level of indirection for small/large code models. */
42767 if (GET_CODE (tocrel_expr
) == MEM
)
42768 tocrel_expr
= XEXP (tocrel_expr
, 0);
42769 if (!toc_relative_expr_p (tocrel_expr
, false))
42770 gcc_unreachable ();
42771 split_const (XVECEXP (tocrel_base
, 0, 0), &base
, &offset
);
42772 rtx const_vector
= get_pool_constant (base
);
42773 /* With the extra indirection, get_pool_constant will produce the
42774 real constant from the reg_equal expression, so get the real
42776 if (GET_CODE (const_vector
) == SYMBOL_REF
)
42777 const_vector
= get_pool_constant (const_vector
);
42778 gcc_assert (GET_CODE (const_vector
) == CONST_VECTOR
);
42780 /* Create an adjusted mask from the initial mask. */
42781 unsigned int new_mask
[16], i
, val
;
42782 for (i
= 0; i
< 16; ++i
) {
42783 val
= INTVAL (XVECEXP (const_vector
, 0, i
));
42785 new_mask
[i
] = (val
+ 8) % 16;
42787 new_mask
[i
] = ((val
+ 8) % 16) + 16;
42790 /* Create a new CONST_VECTOR and a MEM that references it. */
42791 rtx vals
= gen_rtx_PARALLEL (V16QImode
, rtvec_alloc (16));
42792 for (i
= 0; i
< 16; ++i
)
42793 XVECEXP (vals
, 0, i
) = GEN_INT (new_mask
[i
]);
42794 rtx new_const_vector
= gen_rtx_CONST_VECTOR (V16QImode
, XVEC (vals
, 0));
42795 rtx new_mem
= force_const_mem (V16QImode
, new_const_vector
);
42796 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
42797 can't recognize. Force the SYMBOL_REF into a register. */
42798 if (!REG_P (XEXP (new_mem
, 0))) {
42799 rtx base_reg
= force_reg (Pmode
, XEXP (new_mem
, 0));
42800 XEXP (new_mem
, 0) = base_reg
;
42801 /* Move the newly created insn ahead of the load insn. */
42802 rtx_insn
*force_insn
= get_last_insn ();
42803 remove_insn (force_insn
);
42804 rtx_insn
*before_load_insn
= PREV_INSN (load_insn
);
42805 add_insn_after (force_insn
, before_load_insn
, BLOCK_FOR_INSN (load_insn
));
42806 df_insn_rescan (before_load_insn
);
42807 df_insn_rescan (force_insn
);
42810 /* Replace the MEM in the load instruction and rescan it. */
42811 XEXP (SET_SRC (PATTERN (load_insn
)), 0) = new_mem
;
42812 INSN_CODE (load_insn
) = -1; /* Force re-recognition. */
42813 df_insn_rescan (load_insn
);
42816 fprintf (dump_file
, "Adjusting mask for vperm %d\n", INSN_UID (insn
));
42819 /* The insn described by INSN_ENTRY[I] can be swapped, but only
42820 with special handling. Take care of that here. */
42822 handle_special_swappables (swap_web_entry
*insn_entry
, unsigned i
)
42824 rtx_insn
*insn
= insn_entry
[i
].insn
;
42825 rtx body
= PATTERN (insn
);
42827 switch (insn_entry
[i
].special_handling
)
42830 gcc_unreachable ();
42831 case SH_CONST_VECTOR
:
42833 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
42834 gcc_assert (GET_CODE (body
) == SET
);
42835 rtx rhs
= SET_SRC (body
);
42836 swap_const_vector_halves (rhs
);
42838 fprintf (dump_file
, "Swapping constant halves in insn %d\n", i
);
42842 /* A subreg of the same size is already safe. For subregs that
42843 select a smaller portion of a reg, adjust the index for
42844 swapped doublewords. */
42845 adjust_subreg_index (body
);
42847 fprintf (dump_file
, "Adjusting subreg in insn %d\n", i
);
42850 /* Convert a non-permuting load to a permuting one. */
42851 permute_load (insn
);
42854 /* Convert a non-permuting store to a permuting one. */
42855 permute_store (insn
);
42858 /* Change the lane on an extract operation. */
42859 adjust_extract (insn
);
42862 /* Change the lane on a direct-splat operation. */
42863 adjust_splat (insn
);
42866 /* Change the lanes on an XXPERMDI operation. */
42867 adjust_xxpermdi (insn
);
42870 /* Reverse the order of a concatenation operation. */
42871 adjust_concat (insn
);
42874 /* Change the mask loaded from the constant pool for a VPERM. */
42875 adjust_vperm (insn
);
42880 /* Find the insn from the Ith table entry, which is known to be a
42881 register swap Y = SWAP(X). Replace it with a copy Y = X. */
42883 replace_swap_with_copy (swap_web_entry
*insn_entry
, unsigned i
)
42885 rtx_insn
*insn
= insn_entry
[i
].insn
;
42886 rtx body
= PATTERN (insn
);
42887 rtx src_reg
= XEXP (SET_SRC (body
), 0);
42888 rtx copy
= gen_rtx_SET (SET_DEST (body
), src_reg
);
42889 rtx_insn
*new_insn
= emit_insn_before (copy
, insn
);
42890 set_block_for_insn (new_insn
, BLOCK_FOR_INSN (insn
));
42891 df_insn_rescan (new_insn
);
42895 unsigned int new_uid
= INSN_UID (new_insn
);
42896 fprintf (dump_file
, "Replacing swap %d with copy %d\n", i
, new_uid
);
42899 df_insn_delete (insn
);
42900 remove_insn (insn
);
42901 insn
->set_deleted ();
42904 /* Dump the swap table to DUMP_FILE. */
42906 dump_swap_insn_table (swap_web_entry
*insn_entry
)
42908 int e
= get_max_uid ();
42909 fprintf (dump_file
, "\nRelevant insns with their flag settings\n\n");
42911 for (int i
= 0; i
< e
; ++i
)
42912 if (insn_entry
[i
].is_relevant
)
42914 swap_web_entry
*pred_entry
= (swap_web_entry
*)insn_entry
[i
].pred ();
42915 fprintf (dump_file
, "%6d %6d ", i
,
42916 pred_entry
&& pred_entry
->insn
42917 ? INSN_UID (pred_entry
->insn
) : 0);
42918 if (insn_entry
[i
].is_load
)
42919 fputs ("load ", dump_file
);
42920 if (insn_entry
[i
].is_store
)
42921 fputs ("store ", dump_file
);
42922 if (insn_entry
[i
].is_swap
)
42923 fputs ("swap ", dump_file
);
42924 if (insn_entry
[i
].is_live_in
)
42925 fputs ("live-in ", dump_file
);
42926 if (insn_entry
[i
].is_live_out
)
42927 fputs ("live-out ", dump_file
);
42928 if (insn_entry
[i
].contains_subreg
)
42929 fputs ("subreg ", dump_file
);
42930 if (insn_entry
[i
].is_128_int
)
42931 fputs ("int128 ", dump_file
);
42932 if (insn_entry
[i
].is_call
)
42933 fputs ("call ", dump_file
);
42934 if (insn_entry
[i
].is_swappable
)
42936 fputs ("swappable ", dump_file
);
42937 if (insn_entry
[i
].special_handling
== SH_CONST_VECTOR
)
42938 fputs ("special:constvec ", dump_file
);
42939 else if (insn_entry
[i
].special_handling
== SH_SUBREG
)
42940 fputs ("special:subreg ", dump_file
);
42941 else if (insn_entry
[i
].special_handling
== SH_NOSWAP_LD
)
42942 fputs ("special:load ", dump_file
);
42943 else if (insn_entry
[i
].special_handling
== SH_NOSWAP_ST
)
42944 fputs ("special:store ", dump_file
);
42945 else if (insn_entry
[i
].special_handling
== SH_EXTRACT
)
42946 fputs ("special:extract ", dump_file
);
42947 else if (insn_entry
[i
].special_handling
== SH_SPLAT
)
42948 fputs ("special:splat ", dump_file
);
42949 else if (insn_entry
[i
].special_handling
== SH_XXPERMDI
)
42950 fputs ("special:xxpermdi ", dump_file
);
42951 else if (insn_entry
[i
].special_handling
== SH_CONCAT
)
42952 fputs ("special:concat ", dump_file
);
42953 else if (insn_entry
[i
].special_handling
== SH_VPERM
)
42954 fputs ("special:vperm ", dump_file
);
42956 if (insn_entry
[i
].web_not_optimizable
)
42957 fputs ("unoptimizable ", dump_file
);
42958 if (insn_entry
[i
].will_delete
)
42959 fputs ("delete ", dump_file
);
42960 fputs ("\n", dump_file
);
42962 fputs ("\n", dump_file
);
42965 /* Return RTX with its address canonicalized to (reg) or (+ reg reg).
42966 Here RTX is an (& addr (const_int -16)). Always return a new copy
42967 to avoid problems with combine. */
42969 alignment_with_canonical_addr (rtx align
)
42972 rtx addr
= XEXP (align
, 0);
42977 else if (GET_CODE (addr
) == PLUS
)
42979 rtx addrop0
= XEXP (addr
, 0);
42980 rtx addrop1
= XEXP (addr
, 1);
42982 if (!REG_P (addrop0
))
42983 addrop0
= force_reg (GET_MODE (addrop0
), addrop0
);
42985 if (!REG_P (addrop1
))
42986 addrop1
= force_reg (GET_MODE (addrop1
), addrop1
);
42988 canon
= gen_rtx_PLUS (GET_MODE (addr
), addrop0
, addrop1
);
42992 canon
= force_reg (GET_MODE (addr
), addr
);
42994 return gen_rtx_AND (GET_MODE (align
), canon
, GEN_INT (-16));
42997 /* Check whether an rtx is an alignment mask, and if so, return
42998 a fully-expanded rtx for the masking operation. */
43000 alignment_mask (rtx_insn
*insn
)
43002 rtx body
= PATTERN (insn
);
43004 if (GET_CODE (body
) != SET
43005 || GET_CODE (SET_SRC (body
)) != AND
43006 || !REG_P (XEXP (SET_SRC (body
), 0)))
43009 rtx mask
= XEXP (SET_SRC (body
), 1);
43011 if (GET_CODE (mask
) == CONST_INT
)
43013 if (INTVAL (mask
) == -16)
43014 return alignment_with_canonical_addr (SET_SRC (body
));
43022 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
43026 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
43028 if (!rtx_equal_p (DF_REF_REG (use
), mask
))
43031 struct df_link
*def_link
= DF_REF_CHAIN (use
);
43032 if (!def_link
|| def_link
->next
)
43035 rtx_insn
*const_insn
= DF_REF_INSN (def_link
->ref
);
43036 rtx const_body
= PATTERN (const_insn
);
43037 if (GET_CODE (const_body
) != SET
)
43040 real_mask
= SET_SRC (const_body
);
43042 if (GET_CODE (real_mask
) != CONST_INT
43043 || INTVAL (real_mask
) != -16)
43047 if (real_mask
== 0)
43050 return alignment_with_canonical_addr (SET_SRC (body
));
43053 /* Given INSN that's a load or store based at BASE_REG, look for a
43054 feeding computation that aligns its address on a 16-byte boundary. */
43056 find_alignment_op (rtx_insn
*insn
, rtx base_reg
)
43059 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
43060 rtx and_operation
= 0;
43062 FOR_EACH_INSN_INFO_USE (base_use
, insn_info
)
43064 if (!rtx_equal_p (DF_REF_REG (base_use
), base_reg
))
43067 struct df_link
*base_def_link
= DF_REF_CHAIN (base_use
);
43068 if (!base_def_link
|| base_def_link
->next
)
43071 /* With stack-protector code enabled, and possibly in other
43072 circumstances, there may not be an associated insn for
43074 if (DF_REF_IS_ARTIFICIAL (base_def_link
->ref
))
43077 rtx_insn
*and_insn
= DF_REF_INSN (base_def_link
->ref
);
43078 and_operation
= alignment_mask (and_insn
);
43079 if (and_operation
!= 0)
43083 return and_operation
;
43086 struct del_info
{ bool replace
; rtx_insn
*replace_insn
; };
43088 /* If INSN is the load for an lvx pattern, put it in canonical form. */
43090 recombine_lvx_pattern (rtx_insn
*insn
, del_info
*to_delete
)
43092 rtx body
= PATTERN (insn
);
43093 gcc_assert (GET_CODE (body
) == SET
43094 && GET_CODE (SET_SRC (body
)) == VEC_SELECT
43095 && GET_CODE (XEXP (SET_SRC (body
), 0)) == MEM
);
43097 rtx mem
= XEXP (SET_SRC (body
), 0);
43098 rtx base_reg
= XEXP (mem
, 0);
43100 rtx and_operation
= find_alignment_op (insn
, base_reg
);
43102 if (and_operation
!= 0)
43105 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
43106 FOR_EACH_INSN_INFO_DEF (def
, insn_info
)
43108 struct df_link
*link
= DF_REF_CHAIN (def
);
43109 if (!link
|| link
->next
)
43112 rtx_insn
*swap_insn
= DF_REF_INSN (link
->ref
);
43113 if (!insn_is_swap_p (swap_insn
)
43114 || insn_is_load_p (swap_insn
)
43115 || insn_is_store_p (swap_insn
))
43118 /* Expected lvx pattern found. Change the swap to
43119 a copy, and propagate the AND operation into the
43121 to_delete
[INSN_UID (swap_insn
)].replace
= true;
43122 to_delete
[INSN_UID (swap_insn
)].replace_insn
= swap_insn
;
43124 XEXP (mem
, 0) = and_operation
;
43125 SET_SRC (body
) = mem
;
43126 INSN_CODE (insn
) = -1; /* Force re-recognition. */
43127 df_insn_rescan (insn
);
43130 fprintf (dump_file
, "lvx opportunity found at %d\n",
43136 /* If INSN is the store for an stvx pattern, put it in canonical form. */
43138 recombine_stvx_pattern (rtx_insn
*insn
, del_info
*to_delete
)
43140 rtx body
= PATTERN (insn
);
43141 gcc_assert (GET_CODE (body
) == SET
43142 && GET_CODE (SET_DEST (body
)) == MEM
43143 && GET_CODE (SET_SRC (body
)) == VEC_SELECT
);
43144 rtx mem
= SET_DEST (body
);
43145 rtx base_reg
= XEXP (mem
, 0);
43147 rtx and_operation
= find_alignment_op (insn
, base_reg
);
43149 if (and_operation
!= 0)
43151 rtx src_reg
= XEXP (SET_SRC (body
), 0);
43153 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
43154 FOR_EACH_INSN_INFO_USE (src_use
, insn_info
)
43156 if (!rtx_equal_p (DF_REF_REG (src_use
), src_reg
))
43159 struct df_link
*link
= DF_REF_CHAIN (src_use
);
43160 if (!link
|| link
->next
)
43163 rtx_insn
*swap_insn
= DF_REF_INSN (link
->ref
);
43164 if (!insn_is_swap_p (swap_insn
)
43165 || insn_is_load_p (swap_insn
)
43166 || insn_is_store_p (swap_insn
))
43169 /* Expected stvx pattern found. Change the swap to
43170 a copy, and propagate the AND operation into the
43172 to_delete
[INSN_UID (swap_insn
)].replace
= true;
43173 to_delete
[INSN_UID (swap_insn
)].replace_insn
= swap_insn
;
43175 XEXP (mem
, 0) = and_operation
;
43176 SET_SRC (body
) = src_reg
;
43177 INSN_CODE (insn
) = -1; /* Force re-recognition. */
43178 df_insn_rescan (insn
);
43181 fprintf (dump_file
, "stvx opportunity found at %d\n",
43187 /* Look for patterns created from builtin lvx and stvx calls, and
43188 canonicalize them to be properly recognized as such. */
43190 recombine_lvx_stvx_patterns (function
*fun
)
43196 int num_insns
= get_max_uid ();
43197 del_info
*to_delete
= XCNEWVEC (del_info
, num_insns
);
43199 FOR_ALL_BB_FN (bb
, fun
)
43200 FOR_BB_INSNS (bb
, insn
)
43202 if (!NONDEBUG_INSN_P (insn
))
43205 if (insn_is_load_p (insn
) && insn_is_swap_p (insn
))
43206 recombine_lvx_pattern (insn
, to_delete
);
43207 else if (insn_is_store_p (insn
) && insn_is_swap_p (insn
))
43208 recombine_stvx_pattern (insn
, to_delete
);
43211 /* Turning swaps into copies is delayed until now, to avoid problems
43212 with deleting instructions during the insn walk. */
43213 for (i
= 0; i
< num_insns
; i
++)
43214 if (to_delete
[i
].replace
)
43216 rtx swap_body
= PATTERN (to_delete
[i
].replace_insn
);
43217 rtx src_reg
= XEXP (SET_SRC (swap_body
), 0);
43218 rtx copy
= gen_rtx_SET (SET_DEST (swap_body
), src_reg
);
43219 rtx_insn
*new_insn
= emit_insn_before (copy
,
43220 to_delete
[i
].replace_insn
);
43221 set_block_for_insn (new_insn
,
43222 BLOCK_FOR_INSN (to_delete
[i
].replace_insn
));
43223 df_insn_rescan (new_insn
);
43224 df_insn_delete (to_delete
[i
].replace_insn
);
43225 remove_insn (to_delete
[i
].replace_insn
);
43226 to_delete
[i
].replace_insn
->set_deleted ();
43232 /* Main entry point for this pass. */
43234 rs6000_analyze_swaps (function
*fun
)
43236 swap_web_entry
*insn_entry
;
43238 rtx_insn
*insn
, *curr_insn
= 0;
43240 /* Dataflow analysis for use-def chains. */
43241 df_set_flags (DF_RD_PRUNE_DEAD_DEFS
);
43242 df_chain_add_problem (DF_DU_CHAIN
| DF_UD_CHAIN
);
43244 df_set_flags (DF_DEFER_INSN_RESCAN
);
43246 /* Pre-pass to recombine lvx and stvx patterns so we don't lose info. */
43247 recombine_lvx_stvx_patterns (fun
);
43249 /* Allocate structure to represent webs of insns. */
43250 insn_entry
= XCNEWVEC (swap_web_entry
, get_max_uid ());
43252 /* Walk the insns to gather basic data. */
43253 FOR_ALL_BB_FN (bb
, fun
)
43254 FOR_BB_INSNS_SAFE (bb
, insn
, curr_insn
)
43256 unsigned int uid
= INSN_UID (insn
);
43257 if (NONDEBUG_INSN_P (insn
))
43259 insn_entry
[uid
].insn
= insn
;
43261 if (GET_CODE (insn
) == CALL_INSN
)
43262 insn_entry
[uid
].is_call
= 1;
43264 /* Walk the uses and defs to see if we mention vector regs.
43265 Record any constraints on optimization of such mentions. */
43266 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
43268 FOR_EACH_INSN_INFO_USE (mention
, insn_info
)
43270 /* We use DF_REF_REAL_REG here to get inside any subregs. */
43271 machine_mode mode
= GET_MODE (DF_REF_REAL_REG (mention
));
43273 /* If a use gets its value from a call insn, it will be
43274 a hard register and will look like (reg:V4SI 3 3).
43275 The df analysis creates two mentions for GPR3 and GPR4,
43276 both DImode. We must recognize this and treat it as a
43277 vector mention to ensure the call is unioned with this
43279 if (mode
== DImode
&& DF_REF_INSN_INFO (mention
))
43281 rtx feeder
= DF_REF_INSN (mention
);
43282 /* FIXME: It is pretty hard to get from the df mention
43283 to the mode of the use in the insn. We arbitrarily
43284 pick a vector mode here, even though the use might
43285 be a real DImode. We can be too conservative
43286 (create a web larger than necessary) because of
43287 this, so consider eventually fixing this. */
43288 if (GET_CODE (feeder
) == CALL_INSN
)
43292 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode
) || mode
== TImode
)
43294 insn_entry
[uid
].is_relevant
= 1;
43295 if (mode
== TImode
|| mode
== V1TImode
43296 || FLOAT128_VECTOR_P (mode
))
43297 insn_entry
[uid
].is_128_int
= 1;
43298 if (DF_REF_INSN_INFO (mention
))
43299 insn_entry
[uid
].contains_subreg
43300 = !rtx_equal_p (DF_REF_REG (mention
),
43301 DF_REF_REAL_REG (mention
));
43302 union_defs (insn_entry
, insn
, mention
);
43305 FOR_EACH_INSN_INFO_DEF (mention
, insn_info
)
43307 /* We use DF_REF_REAL_REG here to get inside any subregs. */
43308 machine_mode mode
= GET_MODE (DF_REF_REAL_REG (mention
));
43310 /* If we're loading up a hard vector register for a call,
43311 it looks like (set (reg:V4SI 9 9) (...)). The df
43312 analysis creates two mentions for GPR9 and GPR10, both
43313 DImode. So relying on the mode from the mentions
43314 isn't sufficient to ensure we union the call into the
43315 web with the parameter setup code. */
43316 if (mode
== DImode
&& GET_CODE (insn
) == SET
43317 && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn
))))
43318 mode
= GET_MODE (SET_DEST (insn
));
43320 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode
) || mode
== TImode
)
43322 insn_entry
[uid
].is_relevant
= 1;
43323 if (mode
== TImode
|| mode
== V1TImode
43324 || FLOAT128_VECTOR_P (mode
))
43325 insn_entry
[uid
].is_128_int
= 1;
43326 if (DF_REF_INSN_INFO (mention
))
43327 insn_entry
[uid
].contains_subreg
43328 = !rtx_equal_p (DF_REF_REG (mention
),
43329 DF_REF_REAL_REG (mention
));
43330 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
43331 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention
)))
43332 insn_entry
[uid
].is_live_out
= 1;
43333 union_uses (insn_entry
, insn
, mention
);
43337 if (insn_entry
[uid
].is_relevant
)
43339 /* Determine if this is a load or store. */
43340 insn_entry
[uid
].is_load
= insn_is_load_p (insn
);
43341 insn_entry
[uid
].is_store
= insn_is_store_p (insn
);
43343 /* Determine if this is a doubleword swap. If not,
43344 determine whether it can legally be swapped. */
43345 if (insn_is_swap_p (insn
))
43346 insn_entry
[uid
].is_swap
= 1;
43349 unsigned int special
= SH_NONE
;
43350 insn_entry
[uid
].is_swappable
43351 = insn_is_swappable_p (insn_entry
, insn
, &special
);
43352 if (special
!= SH_NONE
&& insn_entry
[uid
].contains_subreg
)
43353 insn_entry
[uid
].is_swappable
= 0;
43354 else if (special
!= SH_NONE
)
43355 insn_entry
[uid
].special_handling
= special
;
43356 else if (insn_entry
[uid
].contains_subreg
)
43357 insn_entry
[uid
].special_handling
= SH_SUBREG
;
43365 fprintf (dump_file
, "\nSwap insn entry table when first built\n");
43366 dump_swap_insn_table (insn_entry
);
43369 /* Record unoptimizable webs. */
43370 unsigned e
= get_max_uid (), i
;
43371 for (i
= 0; i
< e
; ++i
)
43373 if (!insn_entry
[i
].is_relevant
)
43376 swap_web_entry
*root
43377 = (swap_web_entry
*)(&insn_entry
[i
])->unionfind_root ();
43379 if (insn_entry
[i
].is_live_in
|| insn_entry
[i
].is_live_out
43380 || (insn_entry
[i
].contains_subreg
43381 && insn_entry
[i
].special_handling
!= SH_SUBREG
)
43382 || insn_entry
[i
].is_128_int
|| insn_entry
[i
].is_call
43383 || !(insn_entry
[i
].is_swappable
|| insn_entry
[i
].is_swap
))
43384 root
->web_not_optimizable
= 1;
43386 /* If we have loads or stores that aren't permuting then the
43387 optimization isn't appropriate. */
43388 else if ((insn_entry
[i
].is_load
|| insn_entry
[i
].is_store
)
43389 && !insn_entry
[i
].is_swap
&& !insn_entry
[i
].is_swappable
)
43390 root
->web_not_optimizable
= 1;
43392 /* If we have permuting loads or stores that are not accompanied
43393 by a register swap, the optimization isn't appropriate. */
43394 else if (insn_entry
[i
].is_load
&& insn_entry
[i
].is_swap
)
43396 rtx insn
= insn_entry
[i
].insn
;
43397 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
43400 FOR_EACH_INSN_INFO_DEF (def
, insn_info
)
43402 struct df_link
*link
= DF_REF_CHAIN (def
);
43404 if (!chain_contains_only_swaps (insn_entry
, link
, FOR_LOADS
))
43406 root
->web_not_optimizable
= 1;
43411 else if (insn_entry
[i
].is_store
&& insn_entry
[i
].is_swap
)
43413 rtx insn
= insn_entry
[i
].insn
;
43414 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
43417 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
43419 struct df_link
*link
= DF_REF_CHAIN (use
);
43421 if (!chain_contains_only_swaps (insn_entry
, link
, FOR_STORES
))
43423 root
->web_not_optimizable
= 1;
43432 fprintf (dump_file
, "\nSwap insn entry table after web analysis\n");
43433 dump_swap_insn_table (insn_entry
);
43436 /* For each load and store in an optimizable web (which implies
43437 the loads and stores are permuting), find the associated
43438 register swaps and mark them for removal. Due to various
43439 optimizations we may mark the same swap more than once. Also
43440 perform special handling for swappable insns that require it. */
43441 for (i
= 0; i
< e
; ++i
)
43442 if ((insn_entry
[i
].is_load
|| insn_entry
[i
].is_store
)
43443 && insn_entry
[i
].is_swap
)
43445 swap_web_entry
* root_entry
43446 = (swap_web_entry
*)((&insn_entry
[i
])->unionfind_root ());
43447 if (!root_entry
->web_not_optimizable
)
43448 mark_swaps_for_removal (insn_entry
, i
);
43450 else if (insn_entry
[i
].is_swappable
&& insn_entry
[i
].special_handling
)
43452 swap_web_entry
* root_entry
43453 = (swap_web_entry
*)((&insn_entry
[i
])->unionfind_root ());
43454 if (!root_entry
->web_not_optimizable
)
43455 handle_special_swappables (insn_entry
, i
);
43458 /* Now delete the swaps marked for removal. */
43459 for (i
= 0; i
< e
; ++i
)
43460 if (insn_entry
[i
].will_delete
)
43461 replace_swap_with_copy (insn_entry
, i
);
43468 const pass_data pass_data_analyze_swaps
=
43470 RTL_PASS
, /* type */
43471 "swaps", /* name */
43472 OPTGROUP_NONE
, /* optinfo_flags */
43473 TV_NONE
, /* tv_id */
43474 0, /* properties_required */
43475 0, /* properties_provided */
43476 0, /* properties_destroyed */
43477 0, /* todo_flags_start */
43478 TODO_df_finish
, /* todo_flags_finish */
43481 class pass_analyze_swaps
: public rtl_opt_pass
43484 pass_analyze_swaps(gcc::context
*ctxt
)
43485 : rtl_opt_pass(pass_data_analyze_swaps
, ctxt
)
43488 /* opt_pass methods: */
43489 virtual bool gate (function
*)
43491 return (optimize
> 0 && !BYTES_BIG_ENDIAN
&& TARGET_VSX
43492 && !TARGET_P9_VECTOR
&& rs6000_optimize_swaps
);
43495 virtual unsigned int execute (function
*fun
)
43497 return rs6000_analyze_swaps (fun
);
43502 return new pass_analyze_swaps (m_ctxt
);
43505 }; // class pass_analyze_swaps
43508 make_pass_analyze_swaps (gcc::context
*ctxt
)
43510 return new pass_analyze_swaps (ctxt
);
43513 #ifdef RS6000_GLIBC_ATOMIC_FENV
43514 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
43515 static tree atomic_hold_decl
, atomic_clear_decl
, atomic_update_decl
;
43518 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
43521 rs6000_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
43523 if (!TARGET_HARD_FLOAT
|| !TARGET_FPRS
)
43525 #ifdef RS6000_GLIBC_ATOMIC_FENV
43526 if (atomic_hold_decl
== NULL_TREE
)
43529 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
43530 get_identifier ("__atomic_feholdexcept"),
43531 build_function_type_list (void_type_node
,
43532 double_ptr_type_node
,
43534 TREE_PUBLIC (atomic_hold_decl
) = 1;
43535 DECL_EXTERNAL (atomic_hold_decl
) = 1;
43538 if (atomic_clear_decl
== NULL_TREE
)
43541 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
43542 get_identifier ("__atomic_feclearexcept"),
43543 build_function_type_list (void_type_node
,
43545 TREE_PUBLIC (atomic_clear_decl
) = 1;
43546 DECL_EXTERNAL (atomic_clear_decl
) = 1;
43549 tree const_double
= build_qualified_type (double_type_node
,
43551 tree const_double_ptr
= build_pointer_type (const_double
);
43552 if (atomic_update_decl
== NULL_TREE
)
43555 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
43556 get_identifier ("__atomic_feupdateenv"),
43557 build_function_type_list (void_type_node
,
43560 TREE_PUBLIC (atomic_update_decl
) = 1;
43561 DECL_EXTERNAL (atomic_update_decl
) = 1;
43564 tree fenv_var
= create_tmp_var_raw (double_type_node
);
43565 TREE_ADDRESSABLE (fenv_var
) = 1;
43566 tree fenv_addr
= build1 (ADDR_EXPR
, double_ptr_type_node
, fenv_var
);
43568 *hold
= build_call_expr (atomic_hold_decl
, 1, fenv_addr
);
43569 *clear
= build_call_expr (atomic_clear_decl
, 0);
43570 *update
= build_call_expr (atomic_update_decl
, 1,
43571 fold_convert (const_double_ptr
, fenv_addr
));
43576 tree mffs
= rs6000_builtin_decls
[RS6000_BUILTIN_MFFS
];
43577 tree mtfsf
= rs6000_builtin_decls
[RS6000_BUILTIN_MTFSF
];
43578 tree call_mffs
= build_call_expr (mffs
, 0);
43580 /* Generates the equivalent of feholdexcept (&fenv_var)
43582 *fenv_var = __builtin_mffs ();
43584 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
43585 __builtin_mtfsf (0xff, fenv_hold); */
43587 /* Mask to clear everything except for the rounding modes and non-IEEE
43588 arithmetic flag. */
43589 const unsigned HOST_WIDE_INT hold_exception_mask
=
43590 HOST_WIDE_INT_C (0xffffffff00000007);
43592 tree fenv_var
= create_tmp_var_raw (double_type_node
);
43594 tree hold_mffs
= build2 (MODIFY_EXPR
, void_type_node
, fenv_var
, call_mffs
);
43596 tree fenv_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, fenv_var
);
43597 tree fenv_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, fenv_llu
,
43598 build_int_cst (uint64_type_node
,
43599 hold_exception_mask
));
43601 tree fenv_hold_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
43604 tree hold_mtfsf
= build_call_expr (mtfsf
, 2,
43605 build_int_cst (unsigned_type_node
, 0xff),
43608 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, hold_mffs
, hold_mtfsf
);
43610 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
43612 double fenv_clear = __builtin_mffs ();
43613 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
43614 __builtin_mtfsf (0xff, fenv_clear); */
43616 /* Mask to clear everything except for the rounding modes and non-IEEE
43617 arithmetic flag. */
43618 const unsigned HOST_WIDE_INT clear_exception_mask
=
43619 HOST_WIDE_INT_C (0xffffffff00000000);
43621 tree fenv_clear
= create_tmp_var_raw (double_type_node
);
43623 tree clear_mffs
= build2 (MODIFY_EXPR
, void_type_node
, fenv_clear
, call_mffs
);
43625 tree fenv_clean_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, fenv_clear
);
43626 tree fenv_clear_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
,
43628 build_int_cst (uint64_type_node
,
43629 clear_exception_mask
));
43631 tree fenv_clear_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
43632 fenv_clear_llu_and
);
43634 tree clear_mtfsf
= build_call_expr (mtfsf
, 2,
43635 build_int_cst (unsigned_type_node
, 0xff),
43638 *clear
= build2 (COMPOUND_EXPR
, void_type_node
, clear_mffs
, clear_mtfsf
);
43640 /* Generates the equivalent of feupdateenv (&fenv_var)
43642 double old_fenv = __builtin_mffs ();
43643 double fenv_update;
43644 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
43645 (*(uint64_t*)fenv_var 0x1ff80fff);
43646 __builtin_mtfsf (0xff, fenv_update); */
43648 const unsigned HOST_WIDE_INT update_exception_mask
=
43649 HOST_WIDE_INT_C (0xffffffff1fffff00);
43650 const unsigned HOST_WIDE_INT new_exception_mask
=
43651 HOST_WIDE_INT_C (0x1ff80fff);
43653 tree old_fenv
= create_tmp_var_raw (double_type_node
);
43654 tree update_mffs
= build2 (MODIFY_EXPR
, void_type_node
, old_fenv
, call_mffs
);
43656 tree old_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, old_fenv
);
43657 tree old_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, old_llu
,
43658 build_int_cst (uint64_type_node
,
43659 update_exception_mask
));
43661 tree new_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, fenv_llu
,
43662 build_int_cst (uint64_type_node
,
43663 new_exception_mask
));
43665 tree new_llu_mask
= build2 (BIT_IOR_EXPR
, uint64_type_node
,
43666 old_llu_and
, new_llu_and
);
43668 tree fenv_update_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
43671 tree update_mtfsf
= build_call_expr (mtfsf
, 2,
43672 build_int_cst (unsigned_type_node
, 0xff),
43673 fenv_update_mtfsf
);
43675 *update
= build2 (COMPOUND_EXPR
, void_type_node
, update_mffs
, update_mtfsf
);
43678 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
43681 rs6000_optab_supported_p (int op
, machine_mode mode1
, machine_mode
,
43682 optimization_type opt_type
)
43687 return (opt_type
== OPTIMIZE_FOR_SPEED
43688 && RS6000_RECIP_AUTO_RSQRTE_P (mode1
));
43695 struct gcc_target targetm
= TARGET_INITIALIZER
;
43697 #include "gt-powerpcspe.h"