]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/rs6000/rs6000.c
c2af4b869073c082877db069c99b5239bca69172
[thirdparty/gcc.git] / gcc / config / rs6000 / rs6000.c
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2018 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #define IN_TARGET_CODE 1
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "gimple.h"
31 #include "cfghooks.h"
32 #include "cfgloop.h"
33 #include "df.h"
34 #include "tm_p.h"
35 #include "stringpool.h"
36 #include "expmed.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "ira.h"
40 #include "recog.h"
41 #include "cgraph.h"
42 #include "diagnostic-core.h"
43 #include "insn-attr.h"
44 #include "flags.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "attribs.h"
48 #include "stor-layout.h"
49 #include "calls.h"
50 #include "print-tree.h"
51 #include "varasm.h"
52 #include "explow.h"
53 #include "expr.h"
54 #include "output.h"
55 #include "dbxout.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "sched-int.h"
60 #include "gimplify.h"
61 #include "gimple-fold.h"
62 #include "gimple-iterator.h"
63 #include "gimple-ssa.h"
64 #include "gimple-walk.h"
65 #include "intl.h"
66 #include "params.h"
67 #include "tm-constrs.h"
68 #include "tree-vectorizer.h"
69 #include "target-globals.h"
70 #include "builtins.h"
71 #include "tree-vector-builder.h"
72 #include "context.h"
73 #include "tree-pass.h"
74 #include "except.h"
75 #if TARGET_XCOFF
76 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
77 #endif
78 #if TARGET_MACHO
79 #include "gstab.h" /* for N_SLINE */
80 #endif
81 #include "case-cfn-macros.h"
82 #include "ppc-auxv.h"
83 #include "tree-ssa-propagate.h"
84
85 /* This file should be included last. */
86 #include "target-def.h"
87
88 #ifndef TARGET_NO_PROTOTYPE
89 #define TARGET_NO_PROTOTYPE 0
90 #endif
91
92 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
93 systems will also set long double to be IEEE 128-bit. AIX and Darwin
94 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
95 those systems will not pick up this default. This needs to be after all
96 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
97 properly defined. */
98 #ifndef TARGET_IEEEQUAD_DEFAULT
99 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
100 #define TARGET_IEEEQUAD_DEFAULT 1
101 #else
102 #define TARGET_IEEEQUAD_DEFAULT 0
103 #endif
104 #endif
105
106 static pad_direction rs6000_function_arg_padding (machine_mode, const_tree);
107
108 /* Structure used to define the rs6000 stack */
109 typedef struct rs6000_stack {
110 int reload_completed; /* stack info won't change from here on */
111 int first_gp_reg_save; /* first callee saved GP register used */
112 int first_fp_reg_save; /* first callee saved FP register used */
113 int first_altivec_reg_save; /* first callee saved AltiVec register used */
114 int lr_save_p; /* true if the link reg needs to be saved */
115 int cr_save_p; /* true if the CR reg needs to be saved */
116 unsigned int vrsave_mask; /* mask of vec registers to save */
117 int push_p; /* true if we need to allocate stack space */
118 int calls_p; /* true if the function makes any calls */
119 int world_save_p; /* true if we're saving *everything*:
120 r13-r31, cr, f14-f31, vrsave, v20-v31 */
121 enum rs6000_abi abi; /* which ABI to use */
122 int gp_save_offset; /* offset to save GP regs from initial SP */
123 int fp_save_offset; /* offset to save FP regs from initial SP */
124 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
125 int lr_save_offset; /* offset to save LR from initial SP */
126 int cr_save_offset; /* offset to save CR from initial SP */
127 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
128 int varargs_save_offset; /* offset to save the varargs registers */
129 int ehrd_offset; /* offset to EH return data */
130 int ehcr_offset; /* offset to EH CR field data */
131 int reg_size; /* register size (4 or 8) */
132 HOST_WIDE_INT vars_size; /* variable save area size */
133 int parm_size; /* outgoing parameter size */
134 int save_size; /* save area size */
135 int fixed_size; /* fixed size of stack frame */
136 int gp_size; /* size of saved GP registers */
137 int fp_size; /* size of saved FP registers */
138 int altivec_size; /* size of saved AltiVec registers */
139 int cr_size; /* size to hold CR if not in fixed area */
140 int vrsave_size; /* size to hold VRSAVE */
141 int altivec_padding_size; /* size of altivec alignment padding */
142 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
143 int savres_strategy;
144 } rs6000_stack_t;
145
146 /* A C structure for machine-specific, per-function data.
147 This is added to the cfun structure. */
148 typedef struct GTY(()) machine_function
149 {
150 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
151 int ra_needs_full_frame;
152 /* Flags if __builtin_return_address (0) was used. */
153 int ra_need_lr;
154 /* Cache lr_save_p after expansion of builtin_eh_return. */
155 int lr_save_state;
156 /* Whether we need to save the TOC to the reserved stack location in the
157 function prologue. */
158 bool save_toc_in_prologue;
159 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
160 varargs save area. */
161 HOST_WIDE_INT varargs_save_offset;
162 /* Alternative internal arg pointer for -fsplit-stack. */
163 rtx split_stack_arg_pointer;
164 bool split_stack_argp_used;
165 /* Flag if r2 setup is needed with ELFv2 ABI. */
166 bool r2_setup_needed;
167 /* The number of components we use for separate shrink-wrapping. */
168 int n_components;
169 /* The components already handled by separate shrink-wrapping, which should
170 not be considered by the prologue and epilogue. */
171 bool gpr_is_wrapped_separately[32];
172 bool fpr_is_wrapped_separately[32];
173 bool lr_is_wrapped_separately;
174 bool toc_is_wrapped_separately;
175 } machine_function;
176
177 /* Support targetm.vectorize.builtin_mask_for_load. */
178 static GTY(()) tree altivec_builtin_mask_for_load;
179
180 /* Set to nonzero once AIX common-mode calls have been defined. */
181 static GTY(()) int common_mode_defined;
182
183 /* Label number of label created for -mrelocatable, to call to so we can
184 get the address of the GOT section */
185 static int rs6000_pic_labelno;
186
187 #ifdef USING_ELFOS_H
188 /* Counter for labels which are to be placed in .fixup. */
189 int fixuplabelno = 0;
190 #endif
191
192 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
193 int dot_symbols;
194
195 /* Specify the machine mode that pointers have. After generation of rtl, the
196 compiler makes no further distinction between pointers and any other objects
197 of this machine mode. */
198 scalar_int_mode rs6000_pmode;
199
200 #if TARGET_ELF
201 /* Note whether IEEE 128-bit floating point was passed or returned, either as
202 the __float128/_Float128 explicit type, or when long double is IEEE 128-bit
203 floating point. We changed the default C++ mangling for these types and we
204 may want to generate a weak alias of the old mangling (U10__float128) to the
205 new mangling (u9__ieee128). */
206 static bool rs6000_passes_ieee128;
207 #endif
208
209 /* Generate the manged name (i.e. U10__float128) used in GCC 8.1, and not the
210 name used in current releases (i.e. u9__ieee128). */
211 static bool ieee128_mangling_gcc_8_1;
212
213 /* Width in bits of a pointer. */
214 unsigned rs6000_pointer_size;
215
216 #ifdef HAVE_AS_GNU_ATTRIBUTE
217 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
218 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
219 # endif
220 /* Flag whether floating point values have been passed/returned.
221 Note that this doesn't say whether fprs are used, since the
222 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
223 should be set for soft-float values passed in gprs and ieee128
224 values passed in vsx registers. */
225 static bool rs6000_passes_float;
226 static bool rs6000_passes_long_double;
227 /* Flag whether vector values have been passed/returned. */
228 static bool rs6000_passes_vector;
229 /* Flag whether small (<= 8 byte) structures have been returned. */
230 static bool rs6000_returns_struct;
231 #endif
232
233 /* Value is TRUE if register/mode pair is acceptable. */
234 static bool rs6000_hard_regno_mode_ok_p
235 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
236
237 /* Maximum number of registers needed for a given register class and mode. */
238 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
239
240 /* How many registers are needed for a given register and mode. */
241 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
242
243 /* Map register number to register class. */
244 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
245
246 static int dbg_cost_ctrl;
247
248 /* Built in types. */
249 tree rs6000_builtin_types[RS6000_BTI_MAX];
250 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
251
252 /* Flag to say the TOC is initialized */
253 int toc_initialized, need_toc_init;
254 char toc_label_name[10];
255
256 /* Cached value of rs6000_variable_issue. This is cached in
257 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
258 static short cached_can_issue_more;
259
260 static GTY(()) section *read_only_data_section;
261 static GTY(()) section *private_data_section;
262 static GTY(()) section *tls_data_section;
263 static GTY(()) section *tls_private_data_section;
264 static GTY(()) section *read_only_private_data_section;
265 static GTY(()) section *sdata2_section;
266 static GTY(()) section *toc_section;
267
268 struct builtin_description
269 {
270 const HOST_WIDE_INT mask;
271 const enum insn_code icode;
272 const char *const name;
273 const enum rs6000_builtins code;
274 };
275
276 /* Describe the vector unit used for modes. */
277 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
278 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
279
280 /* Register classes for various constraints that are based on the target
281 switches. */
282 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
283
284 /* Describe the alignment of a vector. */
285 int rs6000_vector_align[NUM_MACHINE_MODES];
286
287 /* Map selected modes to types for builtins. */
288 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
289
290 /* What modes to automatically generate reciprocal divide estimate (fre) and
291 reciprocal sqrt (frsqrte) for. */
292 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
293
294 /* Masks to determine which reciprocal esitmate instructions to generate
295 automatically. */
296 enum rs6000_recip_mask {
297 RECIP_SF_DIV = 0x001, /* Use divide estimate */
298 RECIP_DF_DIV = 0x002,
299 RECIP_V4SF_DIV = 0x004,
300 RECIP_V2DF_DIV = 0x008,
301
302 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
303 RECIP_DF_RSQRT = 0x020,
304 RECIP_V4SF_RSQRT = 0x040,
305 RECIP_V2DF_RSQRT = 0x080,
306
307 /* Various combination of flags for -mrecip=xxx. */
308 RECIP_NONE = 0,
309 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
310 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
311 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
312
313 RECIP_HIGH_PRECISION = RECIP_ALL,
314
315 /* On low precision machines like the power5, don't enable double precision
316 reciprocal square root estimate, since it isn't accurate enough. */
317 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
318 };
319
320 /* -mrecip options. */
321 static struct
322 {
323 const char *string; /* option name */
324 unsigned int mask; /* mask bits to set */
325 } recip_options[] = {
326 { "all", RECIP_ALL },
327 { "none", RECIP_NONE },
328 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
329 | RECIP_V2DF_DIV) },
330 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
331 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
332 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
333 | RECIP_V2DF_RSQRT) },
334 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
335 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
336 };
337
338 /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */
339 static const struct
340 {
341 const char *cpu;
342 unsigned int cpuid;
343 } cpu_is_info[] = {
344 { "power9", PPC_PLATFORM_POWER9 },
345 { "power8", PPC_PLATFORM_POWER8 },
346 { "power7", PPC_PLATFORM_POWER7 },
347 { "power6x", PPC_PLATFORM_POWER6X },
348 { "power6", PPC_PLATFORM_POWER6 },
349 { "power5+", PPC_PLATFORM_POWER5_PLUS },
350 { "power5", PPC_PLATFORM_POWER5 },
351 { "ppc970", PPC_PLATFORM_PPC970 },
352 { "power4", PPC_PLATFORM_POWER4 },
353 { "ppca2", PPC_PLATFORM_PPCA2 },
354 { "ppc476", PPC_PLATFORM_PPC476 },
355 { "ppc464", PPC_PLATFORM_PPC464 },
356 { "ppc440", PPC_PLATFORM_PPC440 },
357 { "ppc405", PPC_PLATFORM_PPC405 },
358 { "ppc-cell-be", PPC_PLATFORM_CELL_BE }
359 };
360
361 /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */
362 static const struct
363 {
364 const char *hwcap;
365 int mask;
366 unsigned int id;
367 } cpu_supports_info[] = {
368 /* AT_HWCAP masks. */
369 { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 },
370 { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 },
371 { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 },
372 { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 },
373 { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 },
374 { "booke", PPC_FEATURE_BOOKE, 0 },
375 { "cellbe", PPC_FEATURE_CELL_BE, 0 },
376 { "dfp", PPC_FEATURE_HAS_DFP, 0 },
377 { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 },
378 { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 },
379 { "fpu", PPC_FEATURE_HAS_FPU, 0 },
380 { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 },
381 { "mmu", PPC_FEATURE_HAS_MMU, 0 },
382 { "notb", PPC_FEATURE_NO_TB, 0 },
383 { "pa6t", PPC_FEATURE_PA6T, 0 },
384 { "power4", PPC_FEATURE_POWER4, 0 },
385 { "power5", PPC_FEATURE_POWER5, 0 },
386 { "power5+", PPC_FEATURE_POWER5_PLUS, 0 },
387 { "power6x", PPC_FEATURE_POWER6_EXT, 0 },
388 { "ppc32", PPC_FEATURE_32, 0 },
389 { "ppc601", PPC_FEATURE_601_INSTR, 0 },
390 { "ppc64", PPC_FEATURE_64, 0 },
391 { "ppcle", PPC_FEATURE_PPC_LE, 0 },
392 { "smt", PPC_FEATURE_SMT, 0 },
393 { "spe", PPC_FEATURE_HAS_SPE, 0 },
394 { "true_le", PPC_FEATURE_TRUE_LE, 0 },
395 { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 },
396 { "vsx", PPC_FEATURE_HAS_VSX, 0 },
397
398 /* AT_HWCAP2 masks. */
399 { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 },
400 { "dscr", PPC_FEATURE2_HAS_DSCR, 1 },
401 { "ebb", PPC_FEATURE2_HAS_EBB, 1 },
402 { "htm", PPC_FEATURE2_HAS_HTM, 1 },
403 { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 },
404 { "htm-no-suspend", PPC_FEATURE2_HTM_NO_SUSPEND, 1 },
405 { "isel", PPC_FEATURE2_HAS_ISEL, 1 },
406 { "tar", PPC_FEATURE2_HAS_TAR, 1 },
407 { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 },
408 { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 },
409 { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 },
410 { "darn", PPC_FEATURE2_DARN, 1 },
411 { "scv", PPC_FEATURE2_SCV, 1 }
412 };
413
414 /* On PowerPC, we have a limited number of target clones that we care about
415 which means we can use an array to hold the options, rather than having more
416 elaborate data structures to identify each possible variation. Order the
417 clones from the default to the highest ISA. */
418 enum {
419 CLONE_DEFAULT = 0, /* default clone. */
420 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
421 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
422 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
423 CLONE_ISA_3_00, /* ISA 3.00 (power9). */
424 CLONE_MAX
425 };
426
427 /* Map compiler ISA bits into HWCAP names. */
428 struct clone_map {
429 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
430 const char *name; /* name to use in __builtin_cpu_supports. */
431 };
432
433 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
434 { 0, "" }, /* Default options. */
435 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
436 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
437 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
438 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.00 (power9). */
439 };
440
441
442 /* Newer LIBCs explicitly export this symbol to declare that they provide
443 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
444 reference to this symbol whenever we expand a CPU builtin, so that
445 we never link against an old LIBC. */
446 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
447
448 /* True if we have expanded a CPU builtin. */
449 bool cpu_builtin_p;
450
451 /* Pointer to function (in rs6000-c.c) that can define or undefine target
452 macros that have changed. Languages that don't support the preprocessor
453 don't link in rs6000-c.c, so we can't call it directly. */
454 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
455
456 /* Simplfy register classes into simpler classifications. We assume
457 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
458 check for standard register classes (gpr/floating/altivec/vsx) and
459 floating/vector classes (float/altivec/vsx). */
460
461 enum rs6000_reg_type {
462 NO_REG_TYPE,
463 PSEUDO_REG_TYPE,
464 GPR_REG_TYPE,
465 VSX_REG_TYPE,
466 ALTIVEC_REG_TYPE,
467 FPR_REG_TYPE,
468 SPR_REG_TYPE,
469 CR_REG_TYPE
470 };
471
472 /* Map register class to register type. */
473 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
474
475 /* First/last register type for the 'normal' register types (i.e. general
476 purpose, floating point, altivec, and VSX registers). */
477 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
478
479 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
480
481
482 /* Register classes we care about in secondary reload or go if legitimate
483 address. We only need to worry about GPR, FPR, and Altivec registers here,
484 along an ANY field that is the OR of the 3 register classes. */
485
486 enum rs6000_reload_reg_type {
487 RELOAD_REG_GPR, /* General purpose registers. */
488 RELOAD_REG_FPR, /* Traditional floating point regs. */
489 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
490 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
491 N_RELOAD_REG
492 };
493
494 /* For setting up register classes, loop through the 3 register classes mapping
495 into real registers, and skip the ANY class, which is just an OR of the
496 bits. */
497 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
498 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
499
500 /* Map reload register type to a register in the register class. */
501 struct reload_reg_map_type {
502 const char *name; /* Register class name. */
503 int reg; /* Register in the register class. */
504 };
505
506 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
507 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
508 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
509 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
510 { "Any", -1 }, /* RELOAD_REG_ANY. */
511 };
512
513 /* Mask bits for each register class, indexed per mode. Historically the
514 compiler has been more restrictive which types can do PRE_MODIFY instead of
515 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
516 typedef unsigned char addr_mask_type;
517
518 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
519 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
520 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
521 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
522 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
523 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
524 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
525 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
526
527 /* Register type masks based on the type, of valid addressing modes. */
528 struct rs6000_reg_addr {
529 enum insn_code reload_load; /* INSN to reload for loading. */
530 enum insn_code reload_store; /* INSN to reload for storing. */
531 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
532 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
533 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
534 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
535 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
536 };
537
538 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
539
540 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
541 static inline bool
542 mode_supports_pre_incdec_p (machine_mode mode)
543 {
544 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
545 != 0);
546 }
547
548 /* Helper function to say whether a mode supports PRE_MODIFY. */
549 static inline bool
550 mode_supports_pre_modify_p (machine_mode mode)
551 {
552 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
553 != 0);
554 }
555
556 /* Return true if we have D-form addressing in altivec registers. */
557 static inline bool
558 mode_supports_vmx_dform (machine_mode mode)
559 {
560 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
561 }
562
563 /* Return true if we have D-form addressing in VSX registers. This addressing
564 is more limited than normal d-form addressing in that the offset must be
565 aligned on a 16-byte boundary. */
566 static inline bool
567 mode_supports_dq_form (machine_mode mode)
568 {
569 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
570 != 0);
571 }
572
573 /* Given that there exists at least one variable that is set (produced)
574 by OUT_INSN and read (consumed) by IN_INSN, return true iff
575 IN_INSN represents one or more memory store operations and none of
576 the variables set by OUT_INSN is used by IN_INSN as the address of a
577 store operation. If either IN_INSN or OUT_INSN does not represent
578 a "single" RTL SET expression (as loosely defined by the
579 implementation of the single_set function) or a PARALLEL with only
580 SETs, CLOBBERs, and USEs inside, this function returns false.
581
582 This rs6000-specific version of store_data_bypass_p checks for
583 certain conditions that result in assertion failures (and internal
584 compiler errors) in the generic store_data_bypass_p function and
585 returns false rather than calling store_data_bypass_p if one of the
586 problematic conditions is detected. */
587
588 int
589 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
590 {
591 rtx out_set, in_set;
592 rtx out_pat, in_pat;
593 rtx out_exp, in_exp;
594 int i, j;
595
596 in_set = single_set (in_insn);
597 if (in_set)
598 {
599 if (MEM_P (SET_DEST (in_set)))
600 {
601 out_set = single_set (out_insn);
602 if (!out_set)
603 {
604 out_pat = PATTERN (out_insn);
605 if (GET_CODE (out_pat) == PARALLEL)
606 {
607 for (i = 0; i < XVECLEN (out_pat, 0); i++)
608 {
609 out_exp = XVECEXP (out_pat, 0, i);
610 if ((GET_CODE (out_exp) == CLOBBER)
611 || (GET_CODE (out_exp) == USE))
612 continue;
613 else if (GET_CODE (out_exp) != SET)
614 return false;
615 }
616 }
617 }
618 }
619 }
620 else
621 {
622 in_pat = PATTERN (in_insn);
623 if (GET_CODE (in_pat) != PARALLEL)
624 return false;
625
626 for (i = 0; i < XVECLEN (in_pat, 0); i++)
627 {
628 in_exp = XVECEXP (in_pat, 0, i);
629 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
630 continue;
631 else if (GET_CODE (in_exp) != SET)
632 return false;
633
634 if (MEM_P (SET_DEST (in_exp)))
635 {
636 out_set = single_set (out_insn);
637 if (!out_set)
638 {
639 out_pat = PATTERN (out_insn);
640 if (GET_CODE (out_pat) != PARALLEL)
641 return false;
642 for (j = 0; j < XVECLEN (out_pat, 0); j++)
643 {
644 out_exp = XVECEXP (out_pat, 0, j);
645 if ((GET_CODE (out_exp) == CLOBBER)
646 || (GET_CODE (out_exp) == USE))
647 continue;
648 else if (GET_CODE (out_exp) != SET)
649 return false;
650 }
651 }
652 }
653 }
654 }
655 return store_data_bypass_p (out_insn, in_insn);
656 }
657
658 \f
659 /* Processor costs (relative to an add) */
660
661 const struct processor_costs *rs6000_cost;
662
663 /* Instruction size costs on 32bit processors. */
664 static const
665 struct processor_costs size32_cost = {
666 COSTS_N_INSNS (1), /* mulsi */
667 COSTS_N_INSNS (1), /* mulsi_const */
668 COSTS_N_INSNS (1), /* mulsi_const9 */
669 COSTS_N_INSNS (1), /* muldi */
670 COSTS_N_INSNS (1), /* divsi */
671 COSTS_N_INSNS (1), /* divdi */
672 COSTS_N_INSNS (1), /* fp */
673 COSTS_N_INSNS (1), /* dmul */
674 COSTS_N_INSNS (1), /* sdiv */
675 COSTS_N_INSNS (1), /* ddiv */
676 32, /* cache line size */
677 0, /* l1 cache */
678 0, /* l2 cache */
679 0, /* streams */
680 0, /* SF->DF convert */
681 };
682
683 /* Instruction size costs on 64bit processors. */
684 static const
685 struct processor_costs size64_cost = {
686 COSTS_N_INSNS (1), /* mulsi */
687 COSTS_N_INSNS (1), /* mulsi_const */
688 COSTS_N_INSNS (1), /* mulsi_const9 */
689 COSTS_N_INSNS (1), /* muldi */
690 COSTS_N_INSNS (1), /* divsi */
691 COSTS_N_INSNS (1), /* divdi */
692 COSTS_N_INSNS (1), /* fp */
693 COSTS_N_INSNS (1), /* dmul */
694 COSTS_N_INSNS (1), /* sdiv */
695 COSTS_N_INSNS (1), /* ddiv */
696 128, /* cache line size */
697 0, /* l1 cache */
698 0, /* l2 cache */
699 0, /* streams */
700 0, /* SF->DF convert */
701 };
702
703 /* Instruction costs on RS64A processors. */
704 static const
705 struct processor_costs rs64a_cost = {
706 COSTS_N_INSNS (20), /* mulsi */
707 COSTS_N_INSNS (12), /* mulsi_const */
708 COSTS_N_INSNS (8), /* mulsi_const9 */
709 COSTS_N_INSNS (34), /* muldi */
710 COSTS_N_INSNS (65), /* divsi */
711 COSTS_N_INSNS (67), /* divdi */
712 COSTS_N_INSNS (4), /* fp */
713 COSTS_N_INSNS (4), /* dmul */
714 COSTS_N_INSNS (31), /* sdiv */
715 COSTS_N_INSNS (31), /* ddiv */
716 128, /* cache line size */
717 128, /* l1 cache */
718 2048, /* l2 cache */
719 1, /* streams */
720 0, /* SF->DF convert */
721 };
722
723 /* Instruction costs on MPCCORE processors. */
724 static const
725 struct processor_costs mpccore_cost = {
726 COSTS_N_INSNS (2), /* mulsi */
727 COSTS_N_INSNS (2), /* mulsi_const */
728 COSTS_N_INSNS (2), /* mulsi_const9 */
729 COSTS_N_INSNS (2), /* muldi */
730 COSTS_N_INSNS (6), /* divsi */
731 COSTS_N_INSNS (6), /* divdi */
732 COSTS_N_INSNS (4), /* fp */
733 COSTS_N_INSNS (5), /* dmul */
734 COSTS_N_INSNS (10), /* sdiv */
735 COSTS_N_INSNS (17), /* ddiv */
736 32, /* cache line size */
737 4, /* l1 cache */
738 16, /* l2 cache */
739 1, /* streams */
740 0, /* SF->DF convert */
741 };
742
743 /* Instruction costs on PPC403 processors. */
744 static const
745 struct processor_costs ppc403_cost = {
746 COSTS_N_INSNS (4), /* mulsi */
747 COSTS_N_INSNS (4), /* mulsi_const */
748 COSTS_N_INSNS (4), /* mulsi_const9 */
749 COSTS_N_INSNS (4), /* muldi */
750 COSTS_N_INSNS (33), /* divsi */
751 COSTS_N_INSNS (33), /* divdi */
752 COSTS_N_INSNS (11), /* fp */
753 COSTS_N_INSNS (11), /* dmul */
754 COSTS_N_INSNS (11), /* sdiv */
755 COSTS_N_INSNS (11), /* ddiv */
756 32, /* cache line size */
757 4, /* l1 cache */
758 16, /* l2 cache */
759 1, /* streams */
760 0, /* SF->DF convert */
761 };
762
763 /* Instruction costs on PPC405 processors. */
764 static const
765 struct processor_costs ppc405_cost = {
766 COSTS_N_INSNS (5), /* mulsi */
767 COSTS_N_INSNS (4), /* mulsi_const */
768 COSTS_N_INSNS (3), /* mulsi_const9 */
769 COSTS_N_INSNS (5), /* muldi */
770 COSTS_N_INSNS (35), /* divsi */
771 COSTS_N_INSNS (35), /* divdi */
772 COSTS_N_INSNS (11), /* fp */
773 COSTS_N_INSNS (11), /* dmul */
774 COSTS_N_INSNS (11), /* sdiv */
775 COSTS_N_INSNS (11), /* ddiv */
776 32, /* cache line size */
777 16, /* l1 cache */
778 128, /* l2 cache */
779 1, /* streams */
780 0, /* SF->DF convert */
781 };
782
783 /* Instruction costs on PPC440 processors. */
784 static const
785 struct processor_costs ppc440_cost = {
786 COSTS_N_INSNS (3), /* mulsi */
787 COSTS_N_INSNS (2), /* mulsi_const */
788 COSTS_N_INSNS (2), /* mulsi_const9 */
789 COSTS_N_INSNS (3), /* muldi */
790 COSTS_N_INSNS (34), /* divsi */
791 COSTS_N_INSNS (34), /* divdi */
792 COSTS_N_INSNS (5), /* fp */
793 COSTS_N_INSNS (5), /* dmul */
794 COSTS_N_INSNS (19), /* sdiv */
795 COSTS_N_INSNS (33), /* ddiv */
796 32, /* cache line size */
797 32, /* l1 cache */
798 256, /* l2 cache */
799 1, /* streams */
800 0, /* SF->DF convert */
801 };
802
803 /* Instruction costs on PPC476 processors. */
804 static const
805 struct processor_costs ppc476_cost = {
806 COSTS_N_INSNS (4), /* mulsi */
807 COSTS_N_INSNS (4), /* mulsi_const */
808 COSTS_N_INSNS (4), /* mulsi_const9 */
809 COSTS_N_INSNS (4), /* muldi */
810 COSTS_N_INSNS (11), /* divsi */
811 COSTS_N_INSNS (11), /* divdi */
812 COSTS_N_INSNS (6), /* fp */
813 COSTS_N_INSNS (6), /* dmul */
814 COSTS_N_INSNS (19), /* sdiv */
815 COSTS_N_INSNS (33), /* ddiv */
816 32, /* l1 cache line size */
817 32, /* l1 cache */
818 512, /* l2 cache */
819 1, /* streams */
820 0, /* SF->DF convert */
821 };
822
823 /* Instruction costs on PPC601 processors. */
824 static const
825 struct processor_costs ppc601_cost = {
826 COSTS_N_INSNS (5), /* mulsi */
827 COSTS_N_INSNS (5), /* mulsi_const */
828 COSTS_N_INSNS (5), /* mulsi_const9 */
829 COSTS_N_INSNS (5), /* muldi */
830 COSTS_N_INSNS (36), /* divsi */
831 COSTS_N_INSNS (36), /* divdi */
832 COSTS_N_INSNS (4), /* fp */
833 COSTS_N_INSNS (5), /* dmul */
834 COSTS_N_INSNS (17), /* sdiv */
835 COSTS_N_INSNS (31), /* ddiv */
836 32, /* cache line size */
837 32, /* l1 cache */
838 256, /* l2 cache */
839 1, /* streams */
840 0, /* SF->DF convert */
841 };
842
843 /* Instruction costs on PPC603 processors. */
844 static const
845 struct processor_costs ppc603_cost = {
846 COSTS_N_INSNS (5), /* mulsi */
847 COSTS_N_INSNS (3), /* mulsi_const */
848 COSTS_N_INSNS (2), /* mulsi_const9 */
849 COSTS_N_INSNS (5), /* muldi */
850 COSTS_N_INSNS (37), /* divsi */
851 COSTS_N_INSNS (37), /* divdi */
852 COSTS_N_INSNS (3), /* fp */
853 COSTS_N_INSNS (4), /* dmul */
854 COSTS_N_INSNS (18), /* sdiv */
855 COSTS_N_INSNS (33), /* ddiv */
856 32, /* cache line size */
857 8, /* l1 cache */
858 64, /* l2 cache */
859 1, /* streams */
860 0, /* SF->DF convert */
861 };
862
863 /* Instruction costs on PPC604 processors. */
864 static const
865 struct processor_costs ppc604_cost = {
866 COSTS_N_INSNS (4), /* mulsi */
867 COSTS_N_INSNS (4), /* mulsi_const */
868 COSTS_N_INSNS (4), /* mulsi_const9 */
869 COSTS_N_INSNS (4), /* muldi */
870 COSTS_N_INSNS (20), /* divsi */
871 COSTS_N_INSNS (20), /* divdi */
872 COSTS_N_INSNS (3), /* fp */
873 COSTS_N_INSNS (3), /* dmul */
874 COSTS_N_INSNS (18), /* sdiv */
875 COSTS_N_INSNS (32), /* ddiv */
876 32, /* cache line size */
877 16, /* l1 cache */
878 512, /* l2 cache */
879 1, /* streams */
880 0, /* SF->DF convert */
881 };
882
883 /* Instruction costs on PPC604e processors. */
884 static const
885 struct processor_costs ppc604e_cost = {
886 COSTS_N_INSNS (2), /* mulsi */
887 COSTS_N_INSNS (2), /* mulsi_const */
888 COSTS_N_INSNS (2), /* mulsi_const9 */
889 COSTS_N_INSNS (2), /* muldi */
890 COSTS_N_INSNS (20), /* divsi */
891 COSTS_N_INSNS (20), /* divdi */
892 COSTS_N_INSNS (3), /* fp */
893 COSTS_N_INSNS (3), /* dmul */
894 COSTS_N_INSNS (18), /* sdiv */
895 COSTS_N_INSNS (32), /* ddiv */
896 32, /* cache line size */
897 32, /* l1 cache */
898 1024, /* l2 cache */
899 1, /* streams */
900 0, /* SF->DF convert */
901 };
902
903 /* Instruction costs on PPC620 processors. */
904 static const
905 struct processor_costs ppc620_cost = {
906 COSTS_N_INSNS (5), /* mulsi */
907 COSTS_N_INSNS (4), /* mulsi_const */
908 COSTS_N_INSNS (3), /* mulsi_const9 */
909 COSTS_N_INSNS (7), /* muldi */
910 COSTS_N_INSNS (21), /* divsi */
911 COSTS_N_INSNS (37), /* divdi */
912 COSTS_N_INSNS (3), /* fp */
913 COSTS_N_INSNS (3), /* dmul */
914 COSTS_N_INSNS (18), /* sdiv */
915 COSTS_N_INSNS (32), /* ddiv */
916 128, /* cache line size */
917 32, /* l1 cache */
918 1024, /* l2 cache */
919 1, /* streams */
920 0, /* SF->DF convert */
921 };
922
923 /* Instruction costs on PPC630 processors. */
924 static const
925 struct processor_costs ppc630_cost = {
926 COSTS_N_INSNS (5), /* mulsi */
927 COSTS_N_INSNS (4), /* mulsi_const */
928 COSTS_N_INSNS (3), /* mulsi_const9 */
929 COSTS_N_INSNS (7), /* muldi */
930 COSTS_N_INSNS (21), /* divsi */
931 COSTS_N_INSNS (37), /* divdi */
932 COSTS_N_INSNS (3), /* fp */
933 COSTS_N_INSNS (3), /* dmul */
934 COSTS_N_INSNS (17), /* sdiv */
935 COSTS_N_INSNS (21), /* ddiv */
936 128, /* cache line size */
937 64, /* l1 cache */
938 1024, /* l2 cache */
939 1, /* streams */
940 0, /* SF->DF convert */
941 };
942
943 /* Instruction costs on Cell processor. */
944 /* COSTS_N_INSNS (1) ~ one add. */
945 static const
946 struct processor_costs ppccell_cost = {
947 COSTS_N_INSNS (9/2)+2, /* mulsi */
948 COSTS_N_INSNS (6/2), /* mulsi_const */
949 COSTS_N_INSNS (6/2), /* mulsi_const9 */
950 COSTS_N_INSNS (15/2)+2, /* muldi */
951 COSTS_N_INSNS (38/2), /* divsi */
952 COSTS_N_INSNS (70/2), /* divdi */
953 COSTS_N_INSNS (10/2), /* fp */
954 COSTS_N_INSNS (10/2), /* dmul */
955 COSTS_N_INSNS (74/2), /* sdiv */
956 COSTS_N_INSNS (74/2), /* ddiv */
957 128, /* cache line size */
958 32, /* l1 cache */
959 512, /* l2 cache */
960 6, /* streams */
961 0, /* SF->DF convert */
962 };
963
964 /* Instruction costs on PPC750 and PPC7400 processors. */
965 static const
966 struct processor_costs ppc750_cost = {
967 COSTS_N_INSNS (5), /* mulsi */
968 COSTS_N_INSNS (3), /* mulsi_const */
969 COSTS_N_INSNS (2), /* mulsi_const9 */
970 COSTS_N_INSNS (5), /* muldi */
971 COSTS_N_INSNS (17), /* divsi */
972 COSTS_N_INSNS (17), /* divdi */
973 COSTS_N_INSNS (3), /* fp */
974 COSTS_N_INSNS (3), /* dmul */
975 COSTS_N_INSNS (17), /* sdiv */
976 COSTS_N_INSNS (31), /* ddiv */
977 32, /* cache line size */
978 32, /* l1 cache */
979 512, /* l2 cache */
980 1, /* streams */
981 0, /* SF->DF convert */
982 };
983
984 /* Instruction costs on PPC7450 processors. */
985 static const
986 struct processor_costs ppc7450_cost = {
987 COSTS_N_INSNS (4), /* mulsi */
988 COSTS_N_INSNS (3), /* mulsi_const */
989 COSTS_N_INSNS (3), /* mulsi_const9 */
990 COSTS_N_INSNS (4), /* muldi */
991 COSTS_N_INSNS (23), /* divsi */
992 COSTS_N_INSNS (23), /* divdi */
993 COSTS_N_INSNS (5), /* fp */
994 COSTS_N_INSNS (5), /* dmul */
995 COSTS_N_INSNS (21), /* sdiv */
996 COSTS_N_INSNS (35), /* ddiv */
997 32, /* cache line size */
998 32, /* l1 cache */
999 1024, /* l2 cache */
1000 1, /* streams */
1001 0, /* SF->DF convert */
1002 };
1003
1004 /* Instruction costs on PPC8540 processors. */
1005 static const
1006 struct processor_costs ppc8540_cost = {
1007 COSTS_N_INSNS (4), /* mulsi */
1008 COSTS_N_INSNS (4), /* mulsi_const */
1009 COSTS_N_INSNS (4), /* mulsi_const9 */
1010 COSTS_N_INSNS (4), /* muldi */
1011 COSTS_N_INSNS (19), /* divsi */
1012 COSTS_N_INSNS (19), /* divdi */
1013 COSTS_N_INSNS (4), /* fp */
1014 COSTS_N_INSNS (4), /* dmul */
1015 COSTS_N_INSNS (29), /* sdiv */
1016 COSTS_N_INSNS (29), /* ddiv */
1017 32, /* cache line size */
1018 32, /* l1 cache */
1019 256, /* l2 cache */
1020 1, /* prefetch streams /*/
1021 0, /* SF->DF convert */
1022 };
1023
1024 /* Instruction costs on E300C2 and E300C3 cores. */
1025 static const
1026 struct processor_costs ppce300c2c3_cost = {
1027 COSTS_N_INSNS (4), /* mulsi */
1028 COSTS_N_INSNS (4), /* mulsi_const */
1029 COSTS_N_INSNS (4), /* mulsi_const9 */
1030 COSTS_N_INSNS (4), /* muldi */
1031 COSTS_N_INSNS (19), /* divsi */
1032 COSTS_N_INSNS (19), /* divdi */
1033 COSTS_N_INSNS (3), /* fp */
1034 COSTS_N_INSNS (4), /* dmul */
1035 COSTS_N_INSNS (18), /* sdiv */
1036 COSTS_N_INSNS (33), /* ddiv */
1037 32,
1038 16, /* l1 cache */
1039 16, /* l2 cache */
1040 1, /* prefetch streams /*/
1041 0, /* SF->DF convert */
1042 };
1043
1044 /* Instruction costs on PPCE500MC processors. */
1045 static const
1046 struct processor_costs ppce500mc_cost = {
1047 COSTS_N_INSNS (4), /* mulsi */
1048 COSTS_N_INSNS (4), /* mulsi_const */
1049 COSTS_N_INSNS (4), /* mulsi_const9 */
1050 COSTS_N_INSNS (4), /* muldi */
1051 COSTS_N_INSNS (14), /* divsi */
1052 COSTS_N_INSNS (14), /* divdi */
1053 COSTS_N_INSNS (8), /* fp */
1054 COSTS_N_INSNS (10), /* dmul */
1055 COSTS_N_INSNS (36), /* sdiv */
1056 COSTS_N_INSNS (66), /* ddiv */
1057 64, /* cache line size */
1058 32, /* l1 cache */
1059 128, /* l2 cache */
1060 1, /* prefetch streams /*/
1061 0, /* SF->DF convert */
1062 };
1063
1064 /* Instruction costs on PPCE500MC64 processors. */
1065 static const
1066 struct processor_costs ppce500mc64_cost = {
1067 COSTS_N_INSNS (4), /* mulsi */
1068 COSTS_N_INSNS (4), /* mulsi_const */
1069 COSTS_N_INSNS (4), /* mulsi_const9 */
1070 COSTS_N_INSNS (4), /* muldi */
1071 COSTS_N_INSNS (14), /* divsi */
1072 COSTS_N_INSNS (14), /* divdi */
1073 COSTS_N_INSNS (4), /* fp */
1074 COSTS_N_INSNS (10), /* dmul */
1075 COSTS_N_INSNS (36), /* sdiv */
1076 COSTS_N_INSNS (66), /* ddiv */
1077 64, /* cache line size */
1078 32, /* l1 cache */
1079 128, /* l2 cache */
1080 1, /* prefetch streams /*/
1081 0, /* SF->DF convert */
1082 };
1083
1084 /* Instruction costs on PPCE5500 processors. */
1085 static const
1086 struct processor_costs ppce5500_cost = {
1087 COSTS_N_INSNS (5), /* mulsi */
1088 COSTS_N_INSNS (5), /* mulsi_const */
1089 COSTS_N_INSNS (4), /* mulsi_const9 */
1090 COSTS_N_INSNS (5), /* muldi */
1091 COSTS_N_INSNS (14), /* divsi */
1092 COSTS_N_INSNS (14), /* divdi */
1093 COSTS_N_INSNS (7), /* fp */
1094 COSTS_N_INSNS (10), /* dmul */
1095 COSTS_N_INSNS (36), /* sdiv */
1096 COSTS_N_INSNS (66), /* ddiv */
1097 64, /* cache line size */
1098 32, /* l1 cache */
1099 128, /* l2 cache */
1100 1, /* prefetch streams /*/
1101 0, /* SF->DF convert */
1102 };
1103
1104 /* Instruction costs on PPCE6500 processors. */
1105 static const
1106 struct processor_costs ppce6500_cost = {
1107 COSTS_N_INSNS (5), /* mulsi */
1108 COSTS_N_INSNS (5), /* mulsi_const */
1109 COSTS_N_INSNS (4), /* mulsi_const9 */
1110 COSTS_N_INSNS (5), /* muldi */
1111 COSTS_N_INSNS (14), /* divsi */
1112 COSTS_N_INSNS (14), /* divdi */
1113 COSTS_N_INSNS (7), /* fp */
1114 COSTS_N_INSNS (10), /* dmul */
1115 COSTS_N_INSNS (36), /* sdiv */
1116 COSTS_N_INSNS (66), /* ddiv */
1117 64, /* cache line size */
1118 32, /* l1 cache */
1119 128, /* l2 cache */
1120 1, /* prefetch streams /*/
1121 0, /* SF->DF convert */
1122 };
1123
1124 /* Instruction costs on AppliedMicro Titan processors. */
1125 static const
1126 struct processor_costs titan_cost = {
1127 COSTS_N_INSNS (5), /* mulsi */
1128 COSTS_N_INSNS (5), /* mulsi_const */
1129 COSTS_N_INSNS (5), /* mulsi_const9 */
1130 COSTS_N_INSNS (5), /* muldi */
1131 COSTS_N_INSNS (18), /* divsi */
1132 COSTS_N_INSNS (18), /* divdi */
1133 COSTS_N_INSNS (10), /* fp */
1134 COSTS_N_INSNS (10), /* dmul */
1135 COSTS_N_INSNS (46), /* sdiv */
1136 COSTS_N_INSNS (72), /* ddiv */
1137 32, /* cache line size */
1138 32, /* l1 cache */
1139 512, /* l2 cache */
1140 1, /* prefetch streams /*/
1141 0, /* SF->DF convert */
1142 };
1143
1144 /* Instruction costs on POWER4 and POWER5 processors. */
1145 static const
1146 struct processor_costs power4_cost = {
1147 COSTS_N_INSNS (3), /* mulsi */
1148 COSTS_N_INSNS (2), /* mulsi_const */
1149 COSTS_N_INSNS (2), /* mulsi_const9 */
1150 COSTS_N_INSNS (4), /* muldi */
1151 COSTS_N_INSNS (18), /* divsi */
1152 COSTS_N_INSNS (34), /* divdi */
1153 COSTS_N_INSNS (3), /* fp */
1154 COSTS_N_INSNS (3), /* dmul */
1155 COSTS_N_INSNS (17), /* sdiv */
1156 COSTS_N_INSNS (17), /* ddiv */
1157 128, /* cache line size */
1158 32, /* l1 cache */
1159 1024, /* l2 cache */
1160 8, /* prefetch streams /*/
1161 0, /* SF->DF convert */
1162 };
1163
1164 /* Instruction costs on POWER6 processors. */
1165 static const
1166 struct processor_costs power6_cost = {
1167 COSTS_N_INSNS (8), /* mulsi */
1168 COSTS_N_INSNS (8), /* mulsi_const */
1169 COSTS_N_INSNS (8), /* mulsi_const9 */
1170 COSTS_N_INSNS (8), /* muldi */
1171 COSTS_N_INSNS (22), /* divsi */
1172 COSTS_N_INSNS (28), /* divdi */
1173 COSTS_N_INSNS (3), /* fp */
1174 COSTS_N_INSNS (3), /* dmul */
1175 COSTS_N_INSNS (13), /* sdiv */
1176 COSTS_N_INSNS (16), /* ddiv */
1177 128, /* cache line size */
1178 64, /* l1 cache */
1179 2048, /* l2 cache */
1180 16, /* prefetch streams */
1181 0, /* SF->DF convert */
1182 };
1183
1184 /* Instruction costs on POWER7 processors. */
1185 static const
1186 struct processor_costs power7_cost = {
1187 COSTS_N_INSNS (2), /* mulsi */
1188 COSTS_N_INSNS (2), /* mulsi_const */
1189 COSTS_N_INSNS (2), /* mulsi_const9 */
1190 COSTS_N_INSNS (2), /* muldi */
1191 COSTS_N_INSNS (18), /* divsi */
1192 COSTS_N_INSNS (34), /* divdi */
1193 COSTS_N_INSNS (3), /* fp */
1194 COSTS_N_INSNS (3), /* dmul */
1195 COSTS_N_INSNS (13), /* sdiv */
1196 COSTS_N_INSNS (16), /* ddiv */
1197 128, /* cache line size */
1198 32, /* l1 cache */
1199 256, /* l2 cache */
1200 12, /* prefetch streams */
1201 COSTS_N_INSNS (3), /* SF->DF convert */
1202 };
1203
1204 /* Instruction costs on POWER8 processors. */
1205 static const
1206 struct processor_costs power8_cost = {
1207 COSTS_N_INSNS (3), /* mulsi */
1208 COSTS_N_INSNS (3), /* mulsi_const */
1209 COSTS_N_INSNS (3), /* mulsi_const9 */
1210 COSTS_N_INSNS (3), /* muldi */
1211 COSTS_N_INSNS (19), /* divsi */
1212 COSTS_N_INSNS (35), /* divdi */
1213 COSTS_N_INSNS (3), /* fp */
1214 COSTS_N_INSNS (3), /* dmul */
1215 COSTS_N_INSNS (14), /* sdiv */
1216 COSTS_N_INSNS (17), /* ddiv */
1217 128, /* cache line size */
1218 32, /* l1 cache */
1219 256, /* l2 cache */
1220 12, /* prefetch streams */
1221 COSTS_N_INSNS (3), /* SF->DF convert */
1222 };
1223
1224 /* Instruction costs on POWER9 processors. */
1225 static const
1226 struct processor_costs power9_cost = {
1227 COSTS_N_INSNS (3), /* mulsi */
1228 COSTS_N_INSNS (3), /* mulsi_const */
1229 COSTS_N_INSNS (3), /* mulsi_const9 */
1230 COSTS_N_INSNS (3), /* muldi */
1231 COSTS_N_INSNS (8), /* divsi */
1232 COSTS_N_INSNS (12), /* divdi */
1233 COSTS_N_INSNS (3), /* fp */
1234 COSTS_N_INSNS (3), /* dmul */
1235 COSTS_N_INSNS (13), /* sdiv */
1236 COSTS_N_INSNS (18), /* ddiv */
1237 128, /* cache line size */
1238 32, /* l1 cache */
1239 512, /* l2 cache */
1240 8, /* prefetch streams */
1241 COSTS_N_INSNS (3), /* SF->DF convert */
1242 };
1243
1244 /* Instruction costs on POWER A2 processors. */
1245 static const
1246 struct processor_costs ppca2_cost = {
1247 COSTS_N_INSNS (16), /* mulsi */
1248 COSTS_N_INSNS (16), /* mulsi_const */
1249 COSTS_N_INSNS (16), /* mulsi_const9 */
1250 COSTS_N_INSNS (16), /* muldi */
1251 COSTS_N_INSNS (22), /* divsi */
1252 COSTS_N_INSNS (28), /* divdi */
1253 COSTS_N_INSNS (3), /* fp */
1254 COSTS_N_INSNS (3), /* dmul */
1255 COSTS_N_INSNS (59), /* sdiv */
1256 COSTS_N_INSNS (72), /* ddiv */
1257 64,
1258 16, /* l1 cache */
1259 2048, /* l2 cache */
1260 16, /* prefetch streams */
1261 0, /* SF->DF convert */
1262 };
1263
1264 \f
1265 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1266 #undef RS6000_BUILTIN_0
1267 #undef RS6000_BUILTIN_1
1268 #undef RS6000_BUILTIN_2
1269 #undef RS6000_BUILTIN_3
1270 #undef RS6000_BUILTIN_A
1271 #undef RS6000_BUILTIN_D
1272 #undef RS6000_BUILTIN_H
1273 #undef RS6000_BUILTIN_P
1274 #undef RS6000_BUILTIN_X
1275
1276 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
1277 { NAME, ICODE, MASK, ATTR },
1278
1279 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1280 { NAME, ICODE, MASK, ATTR },
1281
1282 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1283 { NAME, ICODE, MASK, ATTR },
1284
1285 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1286 { NAME, ICODE, MASK, ATTR },
1287
1288 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1289 { NAME, ICODE, MASK, ATTR },
1290
1291 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1292 { NAME, ICODE, MASK, ATTR },
1293
1294 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1295 { NAME, ICODE, MASK, ATTR },
1296
1297 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1298 { NAME, ICODE, MASK, ATTR },
1299
1300 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1301 { NAME, ICODE, MASK, ATTR },
1302
1303 struct rs6000_builtin_info_type {
1304 const char *name;
1305 const enum insn_code icode;
1306 const HOST_WIDE_INT mask;
1307 const unsigned attr;
1308 };
1309
1310 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1311 {
1312 #include "rs6000-builtin.def"
1313 };
1314
1315 #undef RS6000_BUILTIN_0
1316 #undef RS6000_BUILTIN_1
1317 #undef RS6000_BUILTIN_2
1318 #undef RS6000_BUILTIN_3
1319 #undef RS6000_BUILTIN_A
1320 #undef RS6000_BUILTIN_D
1321 #undef RS6000_BUILTIN_H
1322 #undef RS6000_BUILTIN_P
1323 #undef RS6000_BUILTIN_X
1324
1325 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1326 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1327
1328 \f
1329 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1330 static struct machine_function * rs6000_init_machine_status (void);
1331 static int rs6000_ra_ever_killed (void);
1332 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1333 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1334 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1335 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1336 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1337 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1338 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1339 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1340 bool);
1341 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1342 unsigned int);
1343 static bool is_microcoded_insn (rtx_insn *);
1344 static bool is_nonpipeline_insn (rtx_insn *);
1345 static bool is_cracked_insn (rtx_insn *);
1346 static bool is_load_insn (rtx, rtx *);
1347 static bool is_store_insn (rtx, rtx *);
1348 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1349 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1350 static bool insn_must_be_first_in_group (rtx_insn *);
1351 static bool insn_must_be_last_in_group (rtx_insn *);
1352 static void altivec_init_builtins (void);
1353 static tree builtin_function_type (machine_mode, machine_mode,
1354 machine_mode, machine_mode,
1355 enum rs6000_builtins, const char *name);
1356 static void rs6000_common_init_builtins (void);
1357 static void htm_init_builtins (void);
1358 static rs6000_stack_t *rs6000_stack_info (void);
1359 static void is_altivec_return_reg (rtx, void *);
1360 int easy_vector_constant (rtx, machine_mode);
1361 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1362 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1363 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1364 bool, bool);
1365 #if TARGET_MACHO
1366 static void macho_branch_islands (void);
1367 #endif
1368 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1369 int, int *);
1370 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1371 int, int, int *);
1372 static bool rs6000_mode_dependent_address (const_rtx);
1373 static bool rs6000_debug_mode_dependent_address (const_rtx);
1374 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1375 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1376 machine_mode, rtx);
1377 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1378 machine_mode,
1379 rtx);
1380 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1381 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1382 enum reg_class);
1383 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1384 reg_class_t,
1385 reg_class_t);
1386 static bool rs6000_debug_can_change_mode_class (machine_mode,
1387 machine_mode,
1388 reg_class_t);
1389 static bool rs6000_save_toc_in_prologue_p (void);
1390 static rtx rs6000_internal_arg_pointer (void);
1391
1392 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1393 int, int *)
1394 = rs6000_legitimize_reload_address;
1395
1396 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1397 = rs6000_mode_dependent_address;
1398
1399 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1400 machine_mode, rtx)
1401 = rs6000_secondary_reload_class;
1402
1403 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1404 = rs6000_preferred_reload_class;
1405
1406 const int INSN_NOT_AVAILABLE = -1;
1407
1408 static void rs6000_print_isa_options (FILE *, int, const char *,
1409 HOST_WIDE_INT);
1410 static void rs6000_print_builtin_options (FILE *, int, const char *,
1411 HOST_WIDE_INT);
1412 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1413
1414 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1415 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1416 enum rs6000_reg_type,
1417 machine_mode,
1418 secondary_reload_info *,
1419 bool);
1420 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1421 static bool rs6000_keep_leaf_when_profiled () __attribute__ ((unused));
1422 static tree rs6000_fold_builtin (tree, int, tree *, bool);
1423
1424 /* Hash table stuff for keeping track of TOC entries. */
1425
1426 struct GTY((for_user)) toc_hash_struct
1427 {
1428 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1429 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1430 rtx key;
1431 machine_mode key_mode;
1432 int labelno;
1433 };
1434
1435 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1436 {
1437 static hashval_t hash (toc_hash_struct *);
1438 static bool equal (toc_hash_struct *, toc_hash_struct *);
1439 };
1440
1441 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1442
1443 /* Hash table to keep track of the argument types for builtin functions. */
1444
1445 struct GTY((for_user)) builtin_hash_struct
1446 {
1447 tree type;
1448 machine_mode mode[4]; /* return value + 3 arguments. */
1449 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1450 };
1451
1452 struct builtin_hasher : ggc_ptr_hash<builtin_hash_struct>
1453 {
1454 static hashval_t hash (builtin_hash_struct *);
1455 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1456 };
1457
1458 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1459
1460 \f
1461 /* Default register names. */
1462 char rs6000_reg_names[][8] =
1463 {
1464 "0", "1", "2", "3", "4", "5", "6", "7",
1465 "8", "9", "10", "11", "12", "13", "14", "15",
1466 "16", "17", "18", "19", "20", "21", "22", "23",
1467 "24", "25", "26", "27", "28", "29", "30", "31",
1468 "0", "1", "2", "3", "4", "5", "6", "7",
1469 "8", "9", "10", "11", "12", "13", "14", "15",
1470 "16", "17", "18", "19", "20", "21", "22", "23",
1471 "24", "25", "26", "27", "28", "29", "30", "31",
1472 "mq", "lr", "ctr","ap",
1473 "0", "1", "2", "3", "4", "5", "6", "7",
1474 "ca",
1475 /* AltiVec registers. */
1476 "0", "1", "2", "3", "4", "5", "6", "7",
1477 "8", "9", "10", "11", "12", "13", "14", "15",
1478 "16", "17", "18", "19", "20", "21", "22", "23",
1479 "24", "25", "26", "27", "28", "29", "30", "31",
1480 "vrsave", "vscr",
1481 /* Soft frame pointer. */
1482 "sfp",
1483 /* HTM SPR registers. */
1484 "tfhar", "tfiar", "texasr"
1485 };
1486
1487 #ifdef TARGET_REGNAMES
1488 static const char alt_reg_names[][8] =
1489 {
1490 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1491 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1492 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1493 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1494 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1495 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1496 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1497 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1498 "mq", "lr", "ctr", "ap",
1499 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1500 "ca",
1501 /* AltiVec registers. */
1502 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1503 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1504 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1505 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1506 "vrsave", "vscr",
1507 /* Soft frame pointer. */
1508 "sfp",
1509 /* HTM SPR registers. */
1510 "tfhar", "tfiar", "texasr"
1511 };
1512 #endif
1513
1514 /* Table of valid machine attributes. */
1515
1516 static const struct attribute_spec rs6000_attribute_table[] =
1517 {
1518 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1519 affects_type_identity, handler, exclude } */
1520 { "altivec", 1, 1, false, true, false, false,
1521 rs6000_handle_altivec_attribute, NULL },
1522 { "longcall", 0, 0, false, true, true, false,
1523 rs6000_handle_longcall_attribute, NULL },
1524 { "shortcall", 0, 0, false, true, true, false,
1525 rs6000_handle_longcall_attribute, NULL },
1526 { "ms_struct", 0, 0, false, false, false, false,
1527 rs6000_handle_struct_attribute, NULL },
1528 { "gcc_struct", 0, 0, false, false, false, false,
1529 rs6000_handle_struct_attribute, NULL },
1530 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1531 SUBTARGET_ATTRIBUTE_TABLE,
1532 #endif
1533 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1534 };
1535 \f
1536 #ifndef TARGET_PROFILE_KERNEL
1537 #define TARGET_PROFILE_KERNEL 0
1538 #endif
1539
1540 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1541 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1542 \f
1543 /* Initialize the GCC target structure. */
1544 #undef TARGET_ATTRIBUTE_TABLE
1545 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1546 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1547 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1548 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1549 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1550
1551 #undef TARGET_ASM_ALIGNED_DI_OP
1552 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1553
1554 /* Default unaligned ops are only provided for ELF. Find the ops needed
1555 for non-ELF systems. */
1556 #ifndef OBJECT_FORMAT_ELF
1557 #if TARGET_XCOFF
1558 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1559 64-bit targets. */
1560 #undef TARGET_ASM_UNALIGNED_HI_OP
1561 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1562 #undef TARGET_ASM_UNALIGNED_SI_OP
1563 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1564 #undef TARGET_ASM_UNALIGNED_DI_OP
1565 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1566 #else
1567 /* For Darwin. */
1568 #undef TARGET_ASM_UNALIGNED_HI_OP
1569 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1570 #undef TARGET_ASM_UNALIGNED_SI_OP
1571 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1572 #undef TARGET_ASM_UNALIGNED_DI_OP
1573 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1574 #undef TARGET_ASM_ALIGNED_DI_OP
1575 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1576 #endif
1577 #endif
1578
1579 /* This hook deals with fixups for relocatable code and DI-mode objects
1580 in 64-bit code. */
1581 #undef TARGET_ASM_INTEGER
1582 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1583
1584 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1585 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1586 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1587 #endif
1588
1589 #undef TARGET_SET_UP_BY_PROLOGUE
1590 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1591
1592 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1593 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1594 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1595 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1596 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1597 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1598 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1599 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1600 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1601 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1602 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1603 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1604
1605 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1606 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1607
1608 #undef TARGET_INTERNAL_ARG_POINTER
1609 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1610
1611 #undef TARGET_HAVE_TLS
1612 #define TARGET_HAVE_TLS HAVE_AS_TLS
1613
1614 #undef TARGET_CANNOT_FORCE_CONST_MEM
1615 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1616
1617 #undef TARGET_DELEGITIMIZE_ADDRESS
1618 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1619
1620 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1621 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1622
1623 #undef TARGET_LEGITIMATE_COMBINED_INSN
1624 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1625
1626 #undef TARGET_ASM_FUNCTION_PROLOGUE
1627 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1628 #undef TARGET_ASM_FUNCTION_EPILOGUE
1629 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1630
1631 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1632 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1633
1634 #undef TARGET_LEGITIMIZE_ADDRESS
1635 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1636
1637 #undef TARGET_SCHED_VARIABLE_ISSUE
1638 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1639
1640 #undef TARGET_SCHED_ISSUE_RATE
1641 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1642 #undef TARGET_SCHED_ADJUST_COST
1643 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1644 #undef TARGET_SCHED_ADJUST_PRIORITY
1645 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1646 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1647 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1648 #undef TARGET_SCHED_INIT
1649 #define TARGET_SCHED_INIT rs6000_sched_init
1650 #undef TARGET_SCHED_FINISH
1651 #define TARGET_SCHED_FINISH rs6000_sched_finish
1652 #undef TARGET_SCHED_REORDER
1653 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1654 #undef TARGET_SCHED_REORDER2
1655 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1656
1657 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1658 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1659
1660 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1661 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1662
1663 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1664 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1665 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1666 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1667 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1668 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1669 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1670 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1671
1672 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1673 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1674
1675 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1676 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1677 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1678 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1679 rs6000_builtin_support_vector_misalignment
1680 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1681 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1682 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1683 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1684 rs6000_builtin_vectorization_cost
1685 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1686 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1687 rs6000_preferred_simd_mode
1688 #undef TARGET_VECTORIZE_INIT_COST
1689 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1690 #undef TARGET_VECTORIZE_ADD_STMT_COST
1691 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1692 #undef TARGET_VECTORIZE_FINISH_COST
1693 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1694 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1695 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1696
1697 #undef TARGET_INIT_BUILTINS
1698 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1699 #undef TARGET_BUILTIN_DECL
1700 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1701
1702 #undef TARGET_FOLD_BUILTIN
1703 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1704 #undef TARGET_GIMPLE_FOLD_BUILTIN
1705 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1706
1707 #undef TARGET_EXPAND_BUILTIN
1708 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1709
1710 #undef TARGET_MANGLE_TYPE
1711 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1712
1713 #undef TARGET_INIT_LIBFUNCS
1714 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1715
1716 #if TARGET_MACHO
1717 #undef TARGET_BINDS_LOCAL_P
1718 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1719 #endif
1720
1721 #undef TARGET_MS_BITFIELD_LAYOUT_P
1722 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1723
1724 #undef TARGET_ASM_OUTPUT_MI_THUNK
1725 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1726
1727 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1728 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1729
1730 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1731 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1732
1733 #undef TARGET_REGISTER_MOVE_COST
1734 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1735 #undef TARGET_MEMORY_MOVE_COST
1736 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1737 #undef TARGET_CANNOT_COPY_INSN_P
1738 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1739 #undef TARGET_RTX_COSTS
1740 #define TARGET_RTX_COSTS rs6000_rtx_costs
1741 #undef TARGET_ADDRESS_COST
1742 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1743 #undef TARGET_INSN_COST
1744 #define TARGET_INSN_COST rs6000_insn_cost
1745
1746 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1747 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1748
1749 #undef TARGET_PROMOTE_FUNCTION_MODE
1750 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1751
1752 #undef TARGET_RETURN_IN_MEMORY
1753 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1754
1755 #undef TARGET_RETURN_IN_MSB
1756 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1757
1758 #undef TARGET_SETUP_INCOMING_VARARGS
1759 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1760
1761 /* Always strict argument naming on rs6000. */
1762 #undef TARGET_STRICT_ARGUMENT_NAMING
1763 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1764 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1765 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1766 #undef TARGET_SPLIT_COMPLEX_ARG
1767 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1768 #undef TARGET_MUST_PASS_IN_STACK
1769 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1770 #undef TARGET_PASS_BY_REFERENCE
1771 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1772 #undef TARGET_ARG_PARTIAL_BYTES
1773 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1774 #undef TARGET_FUNCTION_ARG_ADVANCE
1775 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1776 #undef TARGET_FUNCTION_ARG
1777 #define TARGET_FUNCTION_ARG rs6000_function_arg
1778 #undef TARGET_FUNCTION_ARG_PADDING
1779 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1780 #undef TARGET_FUNCTION_ARG_BOUNDARY
1781 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1782
1783 #undef TARGET_BUILD_BUILTIN_VA_LIST
1784 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1785
1786 #undef TARGET_EXPAND_BUILTIN_VA_START
1787 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1788
1789 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1790 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1791
1792 #undef TARGET_EH_RETURN_FILTER_MODE
1793 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1794
1795 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1796 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1797
1798 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1799 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1800
1801 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1802 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1803
1804 #undef TARGET_FLOATN_MODE
1805 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1806
1807 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1808 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1809
1810 #undef TARGET_MD_ASM_ADJUST
1811 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1812
1813 #undef TARGET_OPTION_OVERRIDE
1814 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1815
1816 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1817 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1818 rs6000_builtin_vectorized_function
1819
1820 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1821 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1822 rs6000_builtin_md_vectorized_function
1823
1824 #undef TARGET_STACK_PROTECT_GUARD
1825 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1826
1827 #if !TARGET_MACHO
1828 #undef TARGET_STACK_PROTECT_FAIL
1829 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1830 #endif
1831
1832 #ifdef HAVE_AS_TLS
1833 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1834 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1835 #endif
1836
1837 /* Use a 32-bit anchor range. This leads to sequences like:
1838
1839 addis tmp,anchor,high
1840 add dest,tmp,low
1841
1842 where tmp itself acts as an anchor, and can be shared between
1843 accesses to the same 64k page. */
1844 #undef TARGET_MIN_ANCHOR_OFFSET
1845 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1846 #undef TARGET_MAX_ANCHOR_OFFSET
1847 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1848 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1849 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1850 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1851 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1852
1853 #undef TARGET_BUILTIN_RECIPROCAL
1854 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1855
1856 #undef TARGET_SECONDARY_RELOAD
1857 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1858 #undef TARGET_SECONDARY_MEMORY_NEEDED
1859 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1860 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1861 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1862
1863 #undef TARGET_LEGITIMATE_ADDRESS_P
1864 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1865
1866 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1867 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1868
1869 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1870 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1871
1872 #undef TARGET_CAN_ELIMINATE
1873 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1874
1875 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1876 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1877
1878 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1879 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1880
1881 #undef TARGET_TRAMPOLINE_INIT
1882 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1883
1884 #undef TARGET_FUNCTION_VALUE
1885 #define TARGET_FUNCTION_VALUE rs6000_function_value
1886
1887 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1888 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1889
1890 #undef TARGET_OPTION_SAVE
1891 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1892
1893 #undef TARGET_OPTION_RESTORE
1894 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1895
1896 #undef TARGET_OPTION_PRINT
1897 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1898
1899 #undef TARGET_CAN_INLINE_P
1900 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1901
1902 #undef TARGET_SET_CURRENT_FUNCTION
1903 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1904
1905 #undef TARGET_LEGITIMATE_CONSTANT_P
1906 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1907
1908 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1909 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1910
1911 #undef TARGET_CAN_USE_DOLOOP_P
1912 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1913
1914 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1915 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1916
1917 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1918 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1919 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1920 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1921 #undef TARGET_UNWIND_WORD_MODE
1922 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1923
1924 #undef TARGET_OFFLOAD_OPTIONS
1925 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1926
1927 #undef TARGET_C_MODE_FOR_SUFFIX
1928 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1929
1930 #undef TARGET_INVALID_BINARY_OP
1931 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1932
1933 #undef TARGET_OPTAB_SUPPORTED_P
1934 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1935
1936 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1937 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1938
1939 #undef TARGET_COMPARE_VERSION_PRIORITY
1940 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1941
1942 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1943 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1944 rs6000_generate_version_dispatcher_body
1945
1946 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1947 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1948 rs6000_get_function_versions_dispatcher
1949
1950 #undef TARGET_OPTION_FUNCTION_VERSIONS
1951 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1952
1953 #undef TARGET_HARD_REGNO_NREGS
1954 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1955 #undef TARGET_HARD_REGNO_MODE_OK
1956 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1957
1958 #undef TARGET_MODES_TIEABLE_P
1959 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1960
1961 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1962 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1963 rs6000_hard_regno_call_part_clobbered
1964
1965 #undef TARGET_SLOW_UNALIGNED_ACCESS
1966 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1967
1968 #undef TARGET_CAN_CHANGE_MODE_CLASS
1969 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1970
1971 #undef TARGET_CONSTANT_ALIGNMENT
1972 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1973
1974 #undef TARGET_STARTING_FRAME_OFFSET
1975 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1976
1977 #if TARGET_ELF && RS6000_WEAK
1978 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
1979 #define TARGET_ASM_GLOBALIZE_DECL_NAME rs6000_globalize_decl_name
1980 #endif
1981 \f
1982
1983 /* Processor table. */
1984 struct rs6000_ptt
1985 {
1986 const char *const name; /* Canonical processor name. */
1987 const enum processor_type processor; /* Processor type enum value. */
1988 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1989 };
1990
1991 static struct rs6000_ptt const processor_target_table[] =
1992 {
1993 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1994 #include "rs6000-cpus.def"
1995 #undef RS6000_CPU
1996 };
1997
1998 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1999 name is invalid. */
2000
2001 static int
2002 rs6000_cpu_name_lookup (const char *name)
2003 {
2004 size_t i;
2005
2006 if (name != NULL)
2007 {
2008 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
2009 if (! strcmp (name, processor_target_table[i].name))
2010 return (int)i;
2011 }
2012
2013 return -1;
2014 }
2015
2016 \f
2017 /* Return number of consecutive hard regs needed starting at reg REGNO
2018 to hold something of mode MODE.
2019 This is ordinarily the length in words of a value of mode MODE
2020 but can be less for certain modes in special long registers.
2021
2022 POWER and PowerPC GPRs hold 32 bits worth;
2023 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
2024
2025 static int
2026 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
2027 {
2028 unsigned HOST_WIDE_INT reg_size;
2029
2030 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
2031 128-bit floating point that can go in vector registers, which has VSX
2032 memory addressing. */
2033 if (FP_REGNO_P (regno))
2034 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
2035 ? UNITS_PER_VSX_WORD
2036 : UNITS_PER_FP_WORD);
2037
2038 else if (ALTIVEC_REGNO_P (regno))
2039 reg_size = UNITS_PER_ALTIVEC_WORD;
2040
2041 else
2042 reg_size = UNITS_PER_WORD;
2043
2044 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
2045 }
2046
2047 /* Value is 1 if hard register REGNO can hold a value of machine-mode
2048 MODE. */
2049 static int
2050 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
2051 {
2052 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
2053
2054 if (COMPLEX_MODE_P (mode))
2055 mode = GET_MODE_INNER (mode);
2056
2057 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
2058 register combinations, and use PTImode where we need to deal with quad
2059 word memory operations. Don't allow quad words in the argument or frame
2060 pointer registers, just registers 0..31. */
2061 if (mode == PTImode)
2062 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
2063 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
2064 && ((regno & 1) == 0));
2065
2066 /* VSX registers that overlap the FPR registers are larger than for non-VSX
2067 implementations. Don't allow an item to be split between a FP register
2068 and an Altivec register. Allow TImode in all VSX registers if the user
2069 asked for it. */
2070 if (TARGET_VSX && VSX_REGNO_P (regno)
2071 && (VECTOR_MEM_VSX_P (mode)
2072 || FLOAT128_VECTOR_P (mode)
2073 || reg_addr[mode].scalar_in_vmx_p
2074 || mode == TImode
2075 || (TARGET_VADDUQM && mode == V1TImode)))
2076 {
2077 if (FP_REGNO_P (regno))
2078 return FP_REGNO_P (last_regno);
2079
2080 if (ALTIVEC_REGNO_P (regno))
2081 {
2082 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
2083 return 0;
2084
2085 return ALTIVEC_REGNO_P (last_regno);
2086 }
2087 }
2088
2089 /* The GPRs can hold any mode, but values bigger than one register
2090 cannot go past R31. */
2091 if (INT_REGNO_P (regno))
2092 return INT_REGNO_P (last_regno);
2093
2094 /* The float registers (except for VSX vector modes) can only hold floating
2095 modes and DImode. */
2096 if (FP_REGNO_P (regno))
2097 {
2098 if (FLOAT128_VECTOR_P (mode))
2099 return false;
2100
2101 if (SCALAR_FLOAT_MODE_P (mode)
2102 && (mode != TDmode || (regno % 2) == 0)
2103 && FP_REGNO_P (last_regno))
2104 return 1;
2105
2106 if (GET_MODE_CLASS (mode) == MODE_INT)
2107 {
2108 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
2109 return 1;
2110
2111 if (TARGET_P8_VECTOR && (mode == SImode))
2112 return 1;
2113
2114 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
2115 return 1;
2116 }
2117
2118 return 0;
2119 }
2120
2121 /* The CR register can only hold CC modes. */
2122 if (CR_REGNO_P (regno))
2123 return GET_MODE_CLASS (mode) == MODE_CC;
2124
2125 if (CA_REGNO_P (regno))
2126 return mode == Pmode || mode == SImode;
2127
2128 /* AltiVec only in AldyVec registers. */
2129 if (ALTIVEC_REGNO_P (regno))
2130 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
2131 || mode == V1TImode);
2132
2133 /* We cannot put non-VSX TImode or PTImode anywhere except general register
2134 and it must be able to fit within the register set. */
2135
2136 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
2137 }
2138
2139 /* Implement TARGET_HARD_REGNO_NREGS. */
2140
2141 static unsigned int
2142 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
2143 {
2144 return rs6000_hard_regno_nregs[mode][regno];
2145 }
2146
2147 /* Implement TARGET_HARD_REGNO_MODE_OK. */
2148
2149 static bool
2150 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
2151 {
2152 return rs6000_hard_regno_mode_ok_p[mode][regno];
2153 }
2154
2155 /* Implement TARGET_MODES_TIEABLE_P.
2156
2157 PTImode cannot tie with other modes because PTImode is restricted to even
2158 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
2159 57744).
2160
2161 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
2162 128-bit floating point on VSX systems ties with other vectors. */
2163
2164 static bool
2165 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
2166 {
2167 if (mode1 == PTImode)
2168 return mode2 == PTImode;
2169 if (mode2 == PTImode)
2170 return false;
2171
2172 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
2173 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
2174 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
2175 return false;
2176
2177 if (SCALAR_FLOAT_MODE_P (mode1))
2178 return SCALAR_FLOAT_MODE_P (mode2);
2179 if (SCALAR_FLOAT_MODE_P (mode2))
2180 return false;
2181
2182 if (GET_MODE_CLASS (mode1) == MODE_CC)
2183 return GET_MODE_CLASS (mode2) == MODE_CC;
2184 if (GET_MODE_CLASS (mode2) == MODE_CC)
2185 return false;
2186
2187 return true;
2188 }
2189
2190 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
2191
2192 static bool
2193 rs6000_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
2194 {
2195 if (TARGET_32BIT
2196 && TARGET_POWERPC64
2197 && GET_MODE_SIZE (mode) > 4
2198 && INT_REGNO_P (regno))
2199 return true;
2200
2201 if (TARGET_VSX
2202 && FP_REGNO_P (regno)
2203 && GET_MODE_SIZE (mode) > 8
2204 && !FLOAT128_2REG_P (mode))
2205 return true;
2206
2207 return false;
2208 }
2209
2210 /* Print interesting facts about registers. */
2211 static void
2212 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2213 {
2214 int r, m;
2215
2216 for (r = first_regno; r <= last_regno; ++r)
2217 {
2218 const char *comma = "";
2219 int len;
2220
2221 if (first_regno == last_regno)
2222 fprintf (stderr, "%s:\t", reg_name);
2223 else
2224 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2225
2226 len = 8;
2227 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2228 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2229 {
2230 if (len > 70)
2231 {
2232 fprintf (stderr, ",\n\t");
2233 len = 8;
2234 comma = "";
2235 }
2236
2237 if (rs6000_hard_regno_nregs[m][r] > 1)
2238 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2239 rs6000_hard_regno_nregs[m][r]);
2240 else
2241 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2242
2243 comma = ", ";
2244 }
2245
2246 if (call_used_regs[r])
2247 {
2248 if (len > 70)
2249 {
2250 fprintf (stderr, ",\n\t");
2251 len = 8;
2252 comma = "";
2253 }
2254
2255 len += fprintf (stderr, "%s%s", comma, "call-used");
2256 comma = ", ";
2257 }
2258
2259 if (fixed_regs[r])
2260 {
2261 if (len > 70)
2262 {
2263 fprintf (stderr, ",\n\t");
2264 len = 8;
2265 comma = "";
2266 }
2267
2268 len += fprintf (stderr, "%s%s", comma, "fixed");
2269 comma = ", ";
2270 }
2271
2272 if (len > 70)
2273 {
2274 fprintf (stderr, ",\n\t");
2275 comma = "";
2276 }
2277
2278 len += fprintf (stderr, "%sreg-class = %s", comma,
2279 reg_class_names[(int)rs6000_regno_regclass[r]]);
2280 comma = ", ";
2281
2282 if (len > 70)
2283 {
2284 fprintf (stderr, ",\n\t");
2285 comma = "";
2286 }
2287
2288 fprintf (stderr, "%sregno = %d\n", comma, r);
2289 }
2290 }
2291
2292 static const char *
2293 rs6000_debug_vector_unit (enum rs6000_vector v)
2294 {
2295 const char *ret;
2296
2297 switch (v)
2298 {
2299 case VECTOR_NONE: ret = "none"; break;
2300 case VECTOR_ALTIVEC: ret = "altivec"; break;
2301 case VECTOR_VSX: ret = "vsx"; break;
2302 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2303 case VECTOR_OTHER: ret = "other"; break;
2304 default: ret = "unknown"; break;
2305 }
2306
2307 return ret;
2308 }
2309
2310 /* Inner function printing just the address mask for a particular reload
2311 register class. */
2312 DEBUG_FUNCTION char *
2313 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2314 {
2315 static char ret[8];
2316 char *p = ret;
2317
2318 if ((mask & RELOAD_REG_VALID) != 0)
2319 *p++ = 'v';
2320 else if (keep_spaces)
2321 *p++ = ' ';
2322
2323 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2324 *p++ = 'm';
2325 else if (keep_spaces)
2326 *p++ = ' ';
2327
2328 if ((mask & RELOAD_REG_INDEXED) != 0)
2329 *p++ = 'i';
2330 else if (keep_spaces)
2331 *p++ = ' ';
2332
2333 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2334 *p++ = 'O';
2335 else if ((mask & RELOAD_REG_OFFSET) != 0)
2336 *p++ = 'o';
2337 else if (keep_spaces)
2338 *p++ = ' ';
2339
2340 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2341 *p++ = '+';
2342 else if (keep_spaces)
2343 *p++ = ' ';
2344
2345 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2346 *p++ = '+';
2347 else if (keep_spaces)
2348 *p++ = ' ';
2349
2350 if ((mask & RELOAD_REG_AND_M16) != 0)
2351 *p++ = '&';
2352 else if (keep_spaces)
2353 *p++ = ' ';
2354
2355 *p = '\0';
2356
2357 return ret;
2358 }
2359
2360 /* Print the address masks in a human readble fashion. */
2361 DEBUG_FUNCTION void
2362 rs6000_debug_print_mode (ssize_t m)
2363 {
2364 ssize_t rc;
2365 int spaces = 0;
2366
2367 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2368 for (rc = 0; rc < N_RELOAD_REG; rc++)
2369 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2370 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2371
2372 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2373 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2374 {
2375 fprintf (stderr, "%*s Reload=%c%c", spaces, "",
2376 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2377 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2378 spaces = 0;
2379 }
2380 else
2381 spaces += sizeof (" Reload=sl") - 1;
2382
2383 if (reg_addr[m].scalar_in_vmx_p)
2384 {
2385 fprintf (stderr, "%*s Upper=y", spaces, "");
2386 spaces = 0;
2387 }
2388 else
2389 spaces += sizeof (" Upper=y") - 1;
2390
2391 if (rs6000_vector_unit[m] != VECTOR_NONE
2392 || rs6000_vector_mem[m] != VECTOR_NONE)
2393 {
2394 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2395 spaces, "",
2396 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2397 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2398 }
2399
2400 fputs ("\n", stderr);
2401 }
2402
2403 #define DEBUG_FMT_ID "%-32s= "
2404 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2405 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2406 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2407
2408 /* Print various interesting information with -mdebug=reg. */
2409 static void
2410 rs6000_debug_reg_global (void)
2411 {
2412 static const char *const tf[2] = { "false", "true" };
2413 const char *nl = (const char *)0;
2414 int m;
2415 size_t m1, m2, v;
2416 char costly_num[20];
2417 char nop_num[20];
2418 char flags_buffer[40];
2419 const char *costly_str;
2420 const char *nop_str;
2421 const char *trace_str;
2422 const char *abi_str;
2423 const char *cmodel_str;
2424 struct cl_target_option cl_opts;
2425
2426 /* Modes we want tieable information on. */
2427 static const machine_mode print_tieable_modes[] = {
2428 QImode,
2429 HImode,
2430 SImode,
2431 DImode,
2432 TImode,
2433 PTImode,
2434 SFmode,
2435 DFmode,
2436 TFmode,
2437 IFmode,
2438 KFmode,
2439 SDmode,
2440 DDmode,
2441 TDmode,
2442 V16QImode,
2443 V8HImode,
2444 V4SImode,
2445 V2DImode,
2446 V1TImode,
2447 V32QImode,
2448 V16HImode,
2449 V8SImode,
2450 V4DImode,
2451 V2TImode,
2452 V4SFmode,
2453 V2DFmode,
2454 V8SFmode,
2455 V4DFmode,
2456 CCmode,
2457 CCUNSmode,
2458 CCEQmode,
2459 };
2460
2461 /* Virtual regs we are interested in. */
2462 const static struct {
2463 int regno; /* register number. */
2464 const char *name; /* register name. */
2465 } virtual_regs[] = {
2466 { STACK_POINTER_REGNUM, "stack pointer:" },
2467 { TOC_REGNUM, "toc: " },
2468 { STATIC_CHAIN_REGNUM, "static chain: " },
2469 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2470 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2471 { ARG_POINTER_REGNUM, "arg pointer: " },
2472 { FRAME_POINTER_REGNUM, "frame pointer:" },
2473 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2474 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2475 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2476 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2477 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2478 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2479 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2480 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2481 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2482 };
2483
2484 fputs ("\nHard register information:\n", stderr);
2485 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2486 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2487 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2488 LAST_ALTIVEC_REGNO,
2489 "vs");
2490 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2491 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2492 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2493 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2494 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2495 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2496
2497 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2498 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2499 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2500
2501 fprintf (stderr,
2502 "\n"
2503 "d reg_class = %s\n"
2504 "f reg_class = %s\n"
2505 "v reg_class = %s\n"
2506 "wa reg_class = %s\n"
2507 "wb reg_class = %s\n"
2508 "wd reg_class = %s\n"
2509 "we reg_class = %s\n"
2510 "wf reg_class = %s\n"
2511 "wg reg_class = %s\n"
2512 "wh reg_class = %s\n"
2513 "wi reg_class = %s\n"
2514 "wj reg_class = %s\n"
2515 "wk reg_class = %s\n"
2516 "wl reg_class = %s\n"
2517 "wm reg_class = %s\n"
2518 "wo reg_class = %s\n"
2519 "wp reg_class = %s\n"
2520 "wq reg_class = %s\n"
2521 "wr reg_class = %s\n"
2522 "ws reg_class = %s\n"
2523 "wt reg_class = %s\n"
2524 "wu reg_class = %s\n"
2525 "wv reg_class = %s\n"
2526 "ww reg_class = %s\n"
2527 "wx reg_class = %s\n"
2528 "wy reg_class = %s\n"
2529 "wz reg_class = %s\n"
2530 "wA reg_class = %s\n"
2531 "wH reg_class = %s\n"
2532 "wI reg_class = %s\n"
2533 "wJ reg_class = %s\n"
2534 "wK reg_class = %s\n"
2535 "\n",
2536 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2537 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2538 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2539 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2540 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wb]],
2541 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2542 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2543 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2544 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2545 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2546 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2547 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2548 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2549 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2550 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2551 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wo]],
2552 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]],
2553 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]],
2554 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2555 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2556 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2557 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2558 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2559 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2560 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2561 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2562 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]],
2563 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]],
2564 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wH]],
2565 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wI]],
2566 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wJ]],
2567 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wK]]);
2568
2569 nl = "\n";
2570 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2571 rs6000_debug_print_mode (m);
2572
2573 fputs ("\n", stderr);
2574
2575 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2576 {
2577 machine_mode mode1 = print_tieable_modes[m1];
2578 bool first_time = true;
2579
2580 nl = (const char *)0;
2581 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2582 {
2583 machine_mode mode2 = print_tieable_modes[m2];
2584 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2585 {
2586 if (first_time)
2587 {
2588 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2589 nl = "\n";
2590 first_time = false;
2591 }
2592
2593 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2594 }
2595 }
2596
2597 if (!first_time)
2598 fputs ("\n", stderr);
2599 }
2600
2601 if (nl)
2602 fputs (nl, stderr);
2603
2604 if (rs6000_recip_control)
2605 {
2606 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2607
2608 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2609 if (rs6000_recip_bits[m])
2610 {
2611 fprintf (stderr,
2612 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2613 GET_MODE_NAME (m),
2614 (RS6000_RECIP_AUTO_RE_P (m)
2615 ? "auto"
2616 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2617 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2618 ? "auto"
2619 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2620 }
2621
2622 fputs ("\n", stderr);
2623 }
2624
2625 if (rs6000_cpu_index >= 0)
2626 {
2627 const char *name = processor_target_table[rs6000_cpu_index].name;
2628 HOST_WIDE_INT flags
2629 = processor_target_table[rs6000_cpu_index].target_enable;
2630
2631 sprintf (flags_buffer, "-mcpu=%s flags", name);
2632 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2633 }
2634 else
2635 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2636
2637 if (rs6000_tune_index >= 0)
2638 {
2639 const char *name = processor_target_table[rs6000_tune_index].name;
2640 HOST_WIDE_INT flags
2641 = processor_target_table[rs6000_tune_index].target_enable;
2642
2643 sprintf (flags_buffer, "-mtune=%s flags", name);
2644 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2645 }
2646 else
2647 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2648
2649 cl_target_option_save (&cl_opts, &global_options);
2650 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2651 rs6000_isa_flags);
2652
2653 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2654 rs6000_isa_flags_explicit);
2655
2656 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2657 rs6000_builtin_mask);
2658
2659 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2660
2661 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2662 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2663
2664 switch (rs6000_sched_costly_dep)
2665 {
2666 case max_dep_latency:
2667 costly_str = "max_dep_latency";
2668 break;
2669
2670 case no_dep_costly:
2671 costly_str = "no_dep_costly";
2672 break;
2673
2674 case all_deps_costly:
2675 costly_str = "all_deps_costly";
2676 break;
2677
2678 case true_store_to_load_dep_costly:
2679 costly_str = "true_store_to_load_dep_costly";
2680 break;
2681
2682 case store_to_load_dep_costly:
2683 costly_str = "store_to_load_dep_costly";
2684 break;
2685
2686 default:
2687 costly_str = costly_num;
2688 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2689 break;
2690 }
2691
2692 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2693
2694 switch (rs6000_sched_insert_nops)
2695 {
2696 case sched_finish_regroup_exact:
2697 nop_str = "sched_finish_regroup_exact";
2698 break;
2699
2700 case sched_finish_pad_groups:
2701 nop_str = "sched_finish_pad_groups";
2702 break;
2703
2704 case sched_finish_none:
2705 nop_str = "sched_finish_none";
2706 break;
2707
2708 default:
2709 nop_str = nop_num;
2710 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2711 break;
2712 }
2713
2714 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2715
2716 switch (rs6000_sdata)
2717 {
2718 default:
2719 case SDATA_NONE:
2720 break;
2721
2722 case SDATA_DATA:
2723 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2724 break;
2725
2726 case SDATA_SYSV:
2727 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2728 break;
2729
2730 case SDATA_EABI:
2731 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2732 break;
2733
2734 }
2735
2736 switch (rs6000_traceback)
2737 {
2738 case traceback_default: trace_str = "default"; break;
2739 case traceback_none: trace_str = "none"; break;
2740 case traceback_part: trace_str = "part"; break;
2741 case traceback_full: trace_str = "full"; break;
2742 default: trace_str = "unknown"; break;
2743 }
2744
2745 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2746
2747 switch (rs6000_current_cmodel)
2748 {
2749 case CMODEL_SMALL: cmodel_str = "small"; break;
2750 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2751 case CMODEL_LARGE: cmodel_str = "large"; break;
2752 default: cmodel_str = "unknown"; break;
2753 }
2754
2755 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2756
2757 switch (rs6000_current_abi)
2758 {
2759 case ABI_NONE: abi_str = "none"; break;
2760 case ABI_AIX: abi_str = "aix"; break;
2761 case ABI_ELFv2: abi_str = "ELFv2"; break;
2762 case ABI_V4: abi_str = "V4"; break;
2763 case ABI_DARWIN: abi_str = "darwin"; break;
2764 default: abi_str = "unknown"; break;
2765 }
2766
2767 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2768
2769 if (rs6000_altivec_abi)
2770 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2771
2772 if (rs6000_darwin64_abi)
2773 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2774
2775 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2776 (TARGET_SOFT_FLOAT ? "true" : "false"));
2777
2778 if (TARGET_LINK_STACK)
2779 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2780
2781 if (TARGET_P8_FUSION)
2782 {
2783 char options[80];
2784
2785 strcpy (options, (TARGET_P9_FUSION) ? "power9" : "power8");
2786 if (TARGET_P8_FUSION_SIGN)
2787 strcat (options, ", sign");
2788
2789 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2790 }
2791
2792 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2793 TARGET_SECURE_PLT ? "secure" : "bss");
2794 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2795 aix_struct_return ? "aix" : "sysv");
2796 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2797 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2798 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2799 tf[!!rs6000_align_branch_targets]);
2800 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2801 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2802 rs6000_long_double_type_size);
2803 if (rs6000_long_double_type_size > 64)
2804 {
2805 fprintf (stderr, DEBUG_FMT_S, "long double type",
2806 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2807 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2808 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2809 }
2810 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2811 (int)rs6000_sched_restricted_insns_priority);
2812 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2813 (int)END_BUILTINS);
2814 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2815 (int)RS6000_BUILTIN_COUNT);
2816
2817 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2818 (int)TARGET_FLOAT128_ENABLE_TYPE);
2819
2820 if (TARGET_VSX)
2821 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2822 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2823
2824 if (TARGET_DIRECT_MOVE_128)
2825 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2826 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2827 }
2828
2829 \f
2830 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2831 legitimate address support to figure out the appropriate addressing to
2832 use. */
2833
2834 static void
2835 rs6000_setup_reg_addr_masks (void)
2836 {
2837 ssize_t rc, reg, m, nregs;
2838 addr_mask_type any_addr_mask, addr_mask;
2839
2840 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2841 {
2842 machine_mode m2 = (machine_mode) m;
2843 bool complex_p = false;
2844 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2845 size_t msize;
2846
2847 if (COMPLEX_MODE_P (m2))
2848 {
2849 complex_p = true;
2850 m2 = GET_MODE_INNER (m2);
2851 }
2852
2853 msize = GET_MODE_SIZE (m2);
2854
2855 /* SDmode is special in that we want to access it only via REG+REG
2856 addressing on power7 and above, since we want to use the LFIWZX and
2857 STFIWZX instructions to load it. */
2858 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2859
2860 any_addr_mask = 0;
2861 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2862 {
2863 addr_mask = 0;
2864 reg = reload_reg_map[rc].reg;
2865
2866 /* Can mode values go in the GPR/FPR/Altivec registers? */
2867 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2868 {
2869 bool small_int_vsx_p = (small_int_p
2870 && (rc == RELOAD_REG_FPR
2871 || rc == RELOAD_REG_VMX));
2872
2873 nregs = rs6000_hard_regno_nregs[m][reg];
2874 addr_mask |= RELOAD_REG_VALID;
2875
2876 /* Indicate if the mode takes more than 1 physical register. If
2877 it takes a single register, indicate it can do REG+REG
2878 addressing. Small integers in VSX registers can only do
2879 REG+REG addressing. */
2880 if (small_int_vsx_p)
2881 addr_mask |= RELOAD_REG_INDEXED;
2882 else if (nregs > 1 || m == BLKmode || complex_p)
2883 addr_mask |= RELOAD_REG_MULTIPLE;
2884 else
2885 addr_mask |= RELOAD_REG_INDEXED;
2886
2887 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2888 addressing. If we allow scalars into Altivec registers,
2889 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2890
2891 For VSX systems, we don't allow update addressing for
2892 DFmode/SFmode if those registers can go in both the
2893 traditional floating point registers and Altivec registers.
2894 The load/store instructions for the Altivec registers do not
2895 have update forms. If we allowed update addressing, it seems
2896 to break IV-OPT code using floating point if the index type is
2897 int instead of long (PR target/81550 and target/84042). */
2898
2899 if (TARGET_UPDATE
2900 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2901 && msize <= 8
2902 && !VECTOR_MODE_P (m2)
2903 && !FLOAT128_VECTOR_P (m2)
2904 && !complex_p
2905 && (m != E_DFmode || !TARGET_VSX)
2906 && (m != E_SFmode || !TARGET_P8_VECTOR)
2907 && !small_int_vsx_p)
2908 {
2909 addr_mask |= RELOAD_REG_PRE_INCDEC;
2910
2911 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2912 we don't allow PRE_MODIFY for some multi-register
2913 operations. */
2914 switch (m)
2915 {
2916 default:
2917 addr_mask |= RELOAD_REG_PRE_MODIFY;
2918 break;
2919
2920 case E_DImode:
2921 if (TARGET_POWERPC64)
2922 addr_mask |= RELOAD_REG_PRE_MODIFY;
2923 break;
2924
2925 case E_DFmode:
2926 case E_DDmode:
2927 if (TARGET_HARD_FLOAT)
2928 addr_mask |= RELOAD_REG_PRE_MODIFY;
2929 break;
2930 }
2931 }
2932 }
2933
2934 /* GPR and FPR registers can do REG+OFFSET addressing, except
2935 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2936 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2937 if ((addr_mask != 0) && !indexed_only_p
2938 && msize <= 8
2939 && (rc == RELOAD_REG_GPR
2940 || ((msize == 8 || m2 == SFmode)
2941 && (rc == RELOAD_REG_FPR
2942 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2943 addr_mask |= RELOAD_REG_OFFSET;
2944
2945 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2946 instructions are enabled. The offset for 128-bit VSX registers is
2947 only 12-bits. While GPRs can handle the full offset range, VSX
2948 registers can only handle the restricted range. */
2949 else if ((addr_mask != 0) && !indexed_only_p
2950 && msize == 16 && TARGET_P9_VECTOR
2951 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2952 || (m2 == TImode && TARGET_VSX)))
2953 {
2954 addr_mask |= RELOAD_REG_OFFSET;
2955 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2956 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2957 }
2958
2959 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2960 addressing on 128-bit types. */
2961 if (rc == RELOAD_REG_VMX && msize == 16
2962 && (addr_mask & RELOAD_REG_VALID) != 0)
2963 addr_mask |= RELOAD_REG_AND_M16;
2964
2965 reg_addr[m].addr_mask[rc] = addr_mask;
2966 any_addr_mask |= addr_mask;
2967 }
2968
2969 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2970 }
2971 }
2972
2973 \f
2974 /* Initialize the various global tables that are based on register size. */
2975 static void
2976 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2977 {
2978 ssize_t r, m, c;
2979 int align64;
2980 int align32;
2981
2982 /* Precalculate REGNO_REG_CLASS. */
2983 rs6000_regno_regclass[0] = GENERAL_REGS;
2984 for (r = 1; r < 32; ++r)
2985 rs6000_regno_regclass[r] = BASE_REGS;
2986
2987 for (r = 32; r < 64; ++r)
2988 rs6000_regno_regclass[r] = FLOAT_REGS;
2989
2990 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
2991 rs6000_regno_regclass[r] = NO_REGS;
2992
2993 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2994 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2995
2996 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2997 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2998 rs6000_regno_regclass[r] = CR_REGS;
2999
3000 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
3001 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
3002 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
3003 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
3004 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
3005 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
3006 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
3007 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
3008 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
3009 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
3010
3011 /* Precalculate register class to simpler reload register class. We don't
3012 need all of the register classes that are combinations of different
3013 classes, just the simple ones that have constraint letters. */
3014 for (c = 0; c < N_REG_CLASSES; c++)
3015 reg_class_to_reg_type[c] = NO_REG_TYPE;
3016
3017 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
3018 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
3019 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
3020 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
3021 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
3022 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
3023 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
3024 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
3025 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
3026 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
3027
3028 if (TARGET_VSX)
3029 {
3030 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
3031 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
3032 }
3033 else
3034 {
3035 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
3036 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
3037 }
3038
3039 /* Precalculate the valid memory formats as well as the vector information,
3040 this must be set up before the rs6000_hard_regno_nregs_internal calls
3041 below. */
3042 gcc_assert ((int)VECTOR_NONE == 0);
3043 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
3044 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
3045
3046 gcc_assert ((int)CODE_FOR_nothing == 0);
3047 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
3048
3049 gcc_assert ((int)NO_REGS == 0);
3050 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
3051
3052 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
3053 believes it can use native alignment or still uses 128-bit alignment. */
3054 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
3055 {
3056 align64 = 64;
3057 align32 = 32;
3058 }
3059 else
3060 {
3061 align64 = 128;
3062 align32 = 128;
3063 }
3064
3065 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
3066 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
3067 if (TARGET_FLOAT128_TYPE)
3068 {
3069 rs6000_vector_mem[KFmode] = VECTOR_VSX;
3070 rs6000_vector_align[KFmode] = 128;
3071
3072 if (FLOAT128_IEEE_P (TFmode))
3073 {
3074 rs6000_vector_mem[TFmode] = VECTOR_VSX;
3075 rs6000_vector_align[TFmode] = 128;
3076 }
3077 }
3078
3079 /* V2DF mode, VSX only. */
3080 if (TARGET_VSX)
3081 {
3082 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
3083 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
3084 rs6000_vector_align[V2DFmode] = align64;
3085 }
3086
3087 /* V4SF mode, either VSX or Altivec. */
3088 if (TARGET_VSX)
3089 {
3090 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
3091 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
3092 rs6000_vector_align[V4SFmode] = align32;
3093 }
3094 else if (TARGET_ALTIVEC)
3095 {
3096 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
3097 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
3098 rs6000_vector_align[V4SFmode] = align32;
3099 }
3100
3101 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
3102 and stores. */
3103 if (TARGET_ALTIVEC)
3104 {
3105 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
3106 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
3107 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
3108 rs6000_vector_align[V4SImode] = align32;
3109 rs6000_vector_align[V8HImode] = align32;
3110 rs6000_vector_align[V16QImode] = align32;
3111
3112 if (TARGET_VSX)
3113 {
3114 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
3115 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
3116 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
3117 }
3118 else
3119 {
3120 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
3121 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
3122 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
3123 }
3124 }
3125
3126 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
3127 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
3128 if (TARGET_VSX)
3129 {
3130 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
3131 rs6000_vector_unit[V2DImode]
3132 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3133 rs6000_vector_align[V2DImode] = align64;
3134
3135 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
3136 rs6000_vector_unit[V1TImode]
3137 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3138 rs6000_vector_align[V1TImode] = 128;
3139 }
3140
3141 /* DFmode, see if we want to use the VSX unit. Memory is handled
3142 differently, so don't set rs6000_vector_mem. */
3143 if (TARGET_VSX)
3144 {
3145 rs6000_vector_unit[DFmode] = VECTOR_VSX;
3146 rs6000_vector_align[DFmode] = 64;
3147 }
3148
3149 /* SFmode, see if we want to use the VSX unit. */
3150 if (TARGET_P8_VECTOR)
3151 {
3152 rs6000_vector_unit[SFmode] = VECTOR_VSX;
3153 rs6000_vector_align[SFmode] = 32;
3154 }
3155
3156 /* Allow TImode in VSX register and set the VSX memory macros. */
3157 if (TARGET_VSX)
3158 {
3159 rs6000_vector_mem[TImode] = VECTOR_VSX;
3160 rs6000_vector_align[TImode] = align64;
3161 }
3162
3163 /* Register class constraints for the constraints that depend on compile
3164 switches. When the VSX code was added, different constraints were added
3165 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
3166 of the VSX registers are used. The register classes for scalar floating
3167 point types is set, based on whether we allow that type into the upper
3168 (Altivec) registers. GCC has register classes to target the Altivec
3169 registers for load/store operations, to select using a VSX memory
3170 operation instead of the traditional floating point operation. The
3171 constraints are:
3172
3173 d - Register class to use with traditional DFmode instructions.
3174 f - Register class to use with traditional SFmode instructions.
3175 v - Altivec register.
3176 wa - Any VSX register.
3177 wc - Reserved to represent individual CR bits (used in LLVM).
3178 wd - Preferred register class for V2DFmode.
3179 wf - Preferred register class for V4SFmode.
3180 wg - Float register for power6x move insns.
3181 wh - FP register for direct move instructions.
3182 wi - FP or VSX register to hold 64-bit integers for VSX insns.
3183 wj - FP or VSX register to hold 64-bit integers for direct moves.
3184 wk - FP or VSX register to hold 64-bit doubles for direct moves.
3185 wl - Float register if we can do 32-bit signed int loads.
3186 wm - VSX register for ISA 2.07 direct move operations.
3187 wn - always NO_REGS.
3188 wr - GPR if 64-bit mode is permitted.
3189 ws - Register class to do ISA 2.06 DF operations.
3190 wt - VSX register for TImode in VSX registers.
3191 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
3192 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
3193 ww - Register class to do SF conversions in with VSX operations.
3194 wx - Float register if we can do 32-bit int stores.
3195 wy - Register class to do ISA 2.07 SF operations.
3196 wz - Float register if we can do 32-bit unsigned int loads.
3197 wH - Altivec register if SImode is allowed in VSX registers.
3198 wI - VSX register if SImode is allowed in VSX registers.
3199 wJ - VSX register if QImode/HImode are allowed in VSX registers.
3200 wK - Altivec register if QImode/HImode are allowed in VSX registers. */
3201
3202 if (TARGET_HARD_FLOAT)
3203 {
3204 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
3205 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
3206 }
3207
3208 if (TARGET_VSX)
3209 {
3210 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
3211 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
3212 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
3213 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS; /* DFmode */
3214 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS; /* DFmode */
3215 rs6000_constraints[RS6000_CONSTRAINT_wi] = VSX_REGS; /* DImode */
3216 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
3217 }
3218
3219 /* Add conditional constraints based on various options, to allow us to
3220 collapse multiple insn patterns. */
3221 if (TARGET_ALTIVEC)
3222 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
3223
3224 if (TARGET_MFPGPR) /* DFmode */
3225 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
3226
3227 if (TARGET_LFIWAX)
3228 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
3229
3230 if (TARGET_DIRECT_MOVE)
3231 {
3232 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
3233 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
3234 = rs6000_constraints[RS6000_CONSTRAINT_wi];
3235 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
3236 = rs6000_constraints[RS6000_CONSTRAINT_ws];
3237 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
3238 }
3239
3240 if (TARGET_POWERPC64)
3241 {
3242 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
3243 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
3244 }
3245
3246 if (TARGET_P8_VECTOR) /* SFmode */
3247 {
3248 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
3249 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
3250 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
3251 }
3252 else if (TARGET_VSX)
3253 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3254
3255 if (TARGET_STFIWX)
3256 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
3257
3258 if (TARGET_LFIWZX)
3259 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
3260
3261 if (TARGET_FLOAT128_TYPE)
3262 {
3263 rs6000_constraints[RS6000_CONSTRAINT_wq] = VSX_REGS; /* KFmode */
3264 if (FLOAT128_IEEE_P (TFmode))
3265 rs6000_constraints[RS6000_CONSTRAINT_wp] = VSX_REGS; /* TFmode */
3266 }
3267
3268 if (TARGET_P9_VECTOR)
3269 {
3270 /* Support for new D-form instructions. */
3271 rs6000_constraints[RS6000_CONSTRAINT_wb] = ALTIVEC_REGS;
3272
3273 /* Support for ISA 3.0 (power9) vectors. */
3274 rs6000_constraints[RS6000_CONSTRAINT_wo] = VSX_REGS;
3275 }
3276
3277 /* Support for new direct moves (ISA 3.0 + 64bit). */
3278 if (TARGET_DIRECT_MOVE_128)
3279 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
3280
3281 /* Support small integers in VSX registers. */
3282 if (TARGET_P8_VECTOR)
3283 {
3284 rs6000_constraints[RS6000_CONSTRAINT_wH] = ALTIVEC_REGS;
3285 rs6000_constraints[RS6000_CONSTRAINT_wI] = FLOAT_REGS;
3286 if (TARGET_P9_VECTOR)
3287 {
3288 rs6000_constraints[RS6000_CONSTRAINT_wJ] = FLOAT_REGS;
3289 rs6000_constraints[RS6000_CONSTRAINT_wK] = ALTIVEC_REGS;
3290 }
3291 }
3292
3293 /* Set up the reload helper and direct move functions. */
3294 if (TARGET_VSX || TARGET_ALTIVEC)
3295 {
3296 if (TARGET_64BIT)
3297 {
3298 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
3299 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
3300 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
3301 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
3302 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
3303 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3304 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3305 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3306 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3307 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3308 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3309 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3310 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3311 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3312 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3313 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3314 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3315 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3316 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3317 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3318
3319 if (FLOAT128_VECTOR_P (KFmode))
3320 {
3321 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3322 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3323 }
3324
3325 if (FLOAT128_VECTOR_P (TFmode))
3326 {
3327 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3328 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3329 }
3330
3331 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3332 available. */
3333 if (TARGET_NO_SDMODE_STACK)
3334 {
3335 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3336 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3337 }
3338
3339 if (TARGET_VSX)
3340 {
3341 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3342 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3343 }
3344
3345 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3346 {
3347 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3348 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3349 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3350 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3351 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3352 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3353 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3354 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3355 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3356
3357 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3358 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3359 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3360 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3361 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3362 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3363 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3364 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3365 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3366
3367 if (FLOAT128_VECTOR_P (KFmode))
3368 {
3369 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3370 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3371 }
3372
3373 if (FLOAT128_VECTOR_P (TFmode))
3374 {
3375 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3376 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3377 }
3378 }
3379 }
3380 else
3381 {
3382 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3383 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3384 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3385 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3386 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3387 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3388 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3389 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3390 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3391 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3392 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3393 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3394 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3395 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3396 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3397 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3398 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3399 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3400 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3401 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3402
3403 if (FLOAT128_VECTOR_P (KFmode))
3404 {
3405 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3406 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3407 }
3408
3409 if (FLOAT128_IEEE_P (TFmode))
3410 {
3411 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3412 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3413 }
3414
3415 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3416 available. */
3417 if (TARGET_NO_SDMODE_STACK)
3418 {
3419 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3420 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3421 }
3422
3423 if (TARGET_VSX)
3424 {
3425 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3426 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3427 }
3428
3429 if (TARGET_DIRECT_MOVE)
3430 {
3431 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3432 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3433 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3434 }
3435 }
3436
3437 reg_addr[DFmode].scalar_in_vmx_p = true;
3438 reg_addr[DImode].scalar_in_vmx_p = true;
3439
3440 if (TARGET_P8_VECTOR)
3441 {
3442 reg_addr[SFmode].scalar_in_vmx_p = true;
3443 reg_addr[SImode].scalar_in_vmx_p = true;
3444
3445 if (TARGET_P9_VECTOR)
3446 {
3447 reg_addr[HImode].scalar_in_vmx_p = true;
3448 reg_addr[QImode].scalar_in_vmx_p = true;
3449 }
3450 }
3451 }
3452
3453 /* Precalculate HARD_REGNO_NREGS. */
3454 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3455 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3456 rs6000_hard_regno_nregs[m][r]
3457 = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
3458
3459 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3460 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3461 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3462 if (rs6000_hard_regno_mode_ok_uncached (r, (machine_mode)m))
3463 rs6000_hard_regno_mode_ok_p[m][r] = true;
3464
3465 /* Precalculate CLASS_MAX_NREGS sizes. */
3466 for (c = 0; c < LIM_REG_CLASSES; ++c)
3467 {
3468 int reg_size;
3469
3470 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3471 reg_size = UNITS_PER_VSX_WORD;
3472
3473 else if (c == ALTIVEC_REGS)
3474 reg_size = UNITS_PER_ALTIVEC_WORD;
3475
3476 else if (c == FLOAT_REGS)
3477 reg_size = UNITS_PER_FP_WORD;
3478
3479 else
3480 reg_size = UNITS_PER_WORD;
3481
3482 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3483 {
3484 machine_mode m2 = (machine_mode)m;
3485 int reg_size2 = reg_size;
3486
3487 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3488 in VSX. */
3489 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3490 reg_size2 = UNITS_PER_FP_WORD;
3491
3492 rs6000_class_max_nregs[m][c]
3493 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3494 }
3495 }
3496
3497 /* Calculate which modes to automatically generate code to use a the
3498 reciprocal divide and square root instructions. In the future, possibly
3499 automatically generate the instructions even if the user did not specify
3500 -mrecip. The older machines double precision reciprocal sqrt estimate is
3501 not accurate enough. */
3502 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3503 if (TARGET_FRES)
3504 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3505 if (TARGET_FRE)
3506 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3507 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3508 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3509 if (VECTOR_UNIT_VSX_P (V2DFmode))
3510 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3511
3512 if (TARGET_FRSQRTES)
3513 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3514 if (TARGET_FRSQRTE)
3515 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3516 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3517 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3518 if (VECTOR_UNIT_VSX_P (V2DFmode))
3519 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3520
3521 if (rs6000_recip_control)
3522 {
3523 if (!flag_finite_math_only)
3524 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3525 "-ffast-math");
3526 if (flag_trapping_math)
3527 warning (0, "%qs requires %qs or %qs", "-mrecip",
3528 "-fno-trapping-math", "-ffast-math");
3529 if (!flag_reciprocal_math)
3530 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3531 "-ffast-math");
3532 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3533 {
3534 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3535 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3536 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3537
3538 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3539 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3540 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3541
3542 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3543 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3544 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3545
3546 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3547 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3548 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3549
3550 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3551 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3552 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3553
3554 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3555 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3556 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3557
3558 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3559 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3560 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3561
3562 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3563 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3564 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3565 }
3566 }
3567
3568 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3569 legitimate address support to figure out the appropriate addressing to
3570 use. */
3571 rs6000_setup_reg_addr_masks ();
3572
3573 if (global_init_p || TARGET_DEBUG_TARGET)
3574 {
3575 if (TARGET_DEBUG_REG)
3576 rs6000_debug_reg_global ();
3577
3578 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3579 fprintf (stderr,
3580 "SImode variable mult cost = %d\n"
3581 "SImode constant mult cost = %d\n"
3582 "SImode short constant mult cost = %d\n"
3583 "DImode multipliciation cost = %d\n"
3584 "SImode division cost = %d\n"
3585 "DImode division cost = %d\n"
3586 "Simple fp operation cost = %d\n"
3587 "DFmode multiplication cost = %d\n"
3588 "SFmode division cost = %d\n"
3589 "DFmode division cost = %d\n"
3590 "cache line size = %d\n"
3591 "l1 cache size = %d\n"
3592 "l2 cache size = %d\n"
3593 "simultaneous prefetches = %d\n"
3594 "\n",
3595 rs6000_cost->mulsi,
3596 rs6000_cost->mulsi_const,
3597 rs6000_cost->mulsi_const9,
3598 rs6000_cost->muldi,
3599 rs6000_cost->divsi,
3600 rs6000_cost->divdi,
3601 rs6000_cost->fp,
3602 rs6000_cost->dmul,
3603 rs6000_cost->sdiv,
3604 rs6000_cost->ddiv,
3605 rs6000_cost->cache_line_size,
3606 rs6000_cost->l1_cache_size,
3607 rs6000_cost->l2_cache_size,
3608 rs6000_cost->simultaneous_prefetches);
3609 }
3610 }
3611
3612 #if TARGET_MACHO
3613 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3614
3615 static void
3616 darwin_rs6000_override_options (void)
3617 {
3618 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3619 off. */
3620 rs6000_altivec_abi = 1;
3621 TARGET_ALTIVEC_VRSAVE = 1;
3622 rs6000_current_abi = ABI_DARWIN;
3623
3624 if (DEFAULT_ABI == ABI_DARWIN
3625 && TARGET_64BIT)
3626 darwin_one_byte_bool = 1;
3627
3628 if (TARGET_64BIT && ! TARGET_POWERPC64)
3629 {
3630 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3631 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3632 }
3633 if (flag_mkernel)
3634 {
3635 rs6000_default_long_calls = 1;
3636 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3637 }
3638
3639 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3640 Altivec. */
3641 if (!flag_mkernel && !flag_apple_kext
3642 && TARGET_64BIT
3643 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3644 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3645
3646 /* Unless the user (not the configurer) has explicitly overridden
3647 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3648 G4 unless targeting the kernel. */
3649 if (!flag_mkernel
3650 && !flag_apple_kext
3651 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3652 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3653 && ! global_options_set.x_rs6000_cpu_index)
3654 {
3655 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3656 }
3657 }
3658 #endif
3659
3660 /* If not otherwise specified by a target, make 'long double' equivalent to
3661 'double'. */
3662
3663 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3664 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3665 #endif
3666
3667 /* Return the builtin mask of the various options used that could affect which
3668 builtins were used. In the past we used target_flags, but we've run out of
3669 bits, and some options are no longer in target_flags. */
3670
3671 HOST_WIDE_INT
3672 rs6000_builtin_mask_calculate (void)
3673 {
3674 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3675 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0)
3676 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3677 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3678 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3679 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3680 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3681 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3682 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3683 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3684 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3685 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3686 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3687 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3688 | ((TARGET_POWERPC64) ? RS6000_BTM_POWERPC64 : 0)
3689 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3690 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3691 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3692 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3693 | ((TARGET_LONG_DOUBLE_128
3694 && TARGET_HARD_FLOAT
3695 && !TARGET_IEEEQUAD) ? RS6000_BTM_LDBL128 : 0)
3696 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)
3697 | ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0));
3698 }
3699
3700 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3701 to clobber the XER[CA] bit because clobbering that bit without telling
3702 the compiler worked just fine with versions of GCC before GCC 5, and
3703 breaking a lot of older code in ways that are hard to track down is
3704 not such a great idea. */
3705
3706 static rtx_insn *
3707 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3708 vec<const char *> &/*constraints*/,
3709 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3710 {
3711 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3712 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3713 return NULL;
3714 }
3715
3716 /* Override command line options.
3717
3718 Combine build-specific configuration information with options
3719 specified on the command line to set various state variables which
3720 influence code generation, optimization, and expansion of built-in
3721 functions. Assure that command-line configuration preferences are
3722 compatible with each other and with the build configuration; issue
3723 warnings while adjusting configuration or error messages while
3724 rejecting configuration.
3725
3726 Upon entry to this function:
3727
3728 This function is called once at the beginning of
3729 compilation, and then again at the start and end of compiling
3730 each section of code that has a different configuration, as
3731 indicated, for example, by adding the
3732
3733 __attribute__((__target__("cpu=power9")))
3734
3735 qualifier to a function definition or, for example, by bracketing
3736 code between
3737
3738 #pragma GCC target("altivec")
3739
3740 and
3741
3742 #pragma GCC reset_options
3743
3744 directives. Parameter global_init_p is true for the initial
3745 invocation, which initializes global variables, and false for all
3746 subsequent invocations.
3747
3748
3749 Various global state information is assumed to be valid. This
3750 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3751 default CPU specified at build configure time, TARGET_DEFAULT,
3752 representing the default set of option flags for the default
3753 target, and global_options_set.x_rs6000_isa_flags, representing
3754 which options were requested on the command line.
3755
3756 Upon return from this function:
3757
3758 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3759 was set by name on the command line. Additionally, if certain
3760 attributes are automatically enabled or disabled by this function
3761 in order to assure compatibility between options and
3762 configuration, the flags associated with those attributes are
3763 also set. By setting these "explicit bits", we avoid the risk
3764 that other code might accidentally overwrite these particular
3765 attributes with "default values".
3766
3767 The various bits of rs6000_isa_flags are set to indicate the
3768 target options that have been selected for the most current
3769 compilation efforts. This has the effect of also turning on the
3770 associated TARGET_XXX values since these are macros which are
3771 generally defined to test the corresponding bit of the
3772 rs6000_isa_flags variable.
3773
3774 The variable rs6000_builtin_mask is set to represent the target
3775 options for the most current compilation efforts, consistent with
3776 the current contents of rs6000_isa_flags. This variable controls
3777 expansion of built-in functions.
3778
3779 Various other global variables and fields of global structures
3780 (over 50 in all) are initialized to reflect the desired options
3781 for the most current compilation efforts. */
3782
3783 static bool
3784 rs6000_option_override_internal (bool global_init_p)
3785 {
3786 bool ret = true;
3787
3788 HOST_WIDE_INT set_masks;
3789 HOST_WIDE_INT ignore_masks;
3790 int cpu_index = -1;
3791 int tune_index;
3792 struct cl_target_option *main_target_opt
3793 = ((global_init_p || target_option_default_node == NULL)
3794 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3795
3796 /* Print defaults. */
3797 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3798 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3799
3800 /* Remember the explicit arguments. */
3801 if (global_init_p)
3802 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3803
3804 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3805 library functions, so warn about it. The flag may be useful for
3806 performance studies from time to time though, so don't disable it
3807 entirely. */
3808 if (global_options_set.x_rs6000_alignment_flags
3809 && rs6000_alignment_flags == MASK_ALIGN_POWER
3810 && DEFAULT_ABI == ABI_DARWIN
3811 && TARGET_64BIT)
3812 warning (0, "%qs is not supported for 64-bit Darwin;"
3813 " it is incompatible with the installed C and C++ libraries",
3814 "-malign-power");
3815
3816 /* Numerous experiment shows that IRA based loop pressure
3817 calculation works better for RTL loop invariant motion on targets
3818 with enough (>= 32) registers. It is an expensive optimization.
3819 So it is on only for peak performance. */
3820 if (optimize >= 3 && global_init_p
3821 && !global_options_set.x_flag_ira_loop_pressure)
3822 flag_ira_loop_pressure = 1;
3823
3824 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3825 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3826 options were already specified. */
3827 if (flag_sanitize & SANITIZE_USER_ADDRESS
3828 && !global_options_set.x_flag_asynchronous_unwind_tables)
3829 flag_asynchronous_unwind_tables = 1;
3830
3831 /* Set the pointer size. */
3832 if (TARGET_64BIT)
3833 {
3834 rs6000_pmode = DImode;
3835 rs6000_pointer_size = 64;
3836 }
3837 else
3838 {
3839 rs6000_pmode = SImode;
3840 rs6000_pointer_size = 32;
3841 }
3842
3843 /* Some OSs don't support saving the high part of 64-bit registers on context
3844 switch. Other OSs don't support saving Altivec registers. On those OSs,
3845 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3846 if the user wants either, the user must explicitly specify them and we
3847 won't interfere with the user's specification. */
3848
3849 set_masks = POWERPC_MASKS;
3850 #ifdef OS_MISSING_POWERPC64
3851 if (OS_MISSING_POWERPC64)
3852 set_masks &= ~OPTION_MASK_POWERPC64;
3853 #endif
3854 #ifdef OS_MISSING_ALTIVEC
3855 if (OS_MISSING_ALTIVEC)
3856 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3857 | OTHER_VSX_VECTOR_MASKS);
3858 #endif
3859
3860 /* Don't override by the processor default if given explicitly. */
3861 set_masks &= ~rs6000_isa_flags_explicit;
3862
3863 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3864 the cpu in a target attribute or pragma, but did not specify a tuning
3865 option, use the cpu for the tuning option rather than the option specified
3866 with -mtune on the command line. Process a '--with-cpu' configuration
3867 request as an implicit --cpu. */
3868 if (rs6000_cpu_index >= 0)
3869 cpu_index = rs6000_cpu_index;
3870 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3871 cpu_index = main_target_opt->x_rs6000_cpu_index;
3872 else if (OPTION_TARGET_CPU_DEFAULT)
3873 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3874
3875 if (cpu_index >= 0)
3876 {
3877 const char *unavailable_cpu = NULL;
3878 switch (processor_target_table[cpu_index].processor)
3879 {
3880 #ifndef HAVE_AS_POWER9
3881 case PROCESSOR_POWER9:
3882 unavailable_cpu = "power9";
3883 break;
3884 #endif
3885 #ifndef HAVE_AS_POWER8
3886 case PROCESSOR_POWER8:
3887 unavailable_cpu = "power8";
3888 break;
3889 #endif
3890 #ifndef HAVE_AS_POPCNTD
3891 case PROCESSOR_POWER7:
3892 unavailable_cpu = "power7";
3893 break;
3894 #endif
3895 #ifndef HAVE_AS_DFP
3896 case PROCESSOR_POWER6:
3897 unavailable_cpu = "power6";
3898 break;
3899 #endif
3900 #ifndef HAVE_AS_POPCNTB
3901 case PROCESSOR_POWER5:
3902 unavailable_cpu = "power5";
3903 break;
3904 #endif
3905 default:
3906 break;
3907 }
3908 if (unavailable_cpu)
3909 {
3910 cpu_index = -1;
3911 warning (0, "will not generate %qs instructions because "
3912 "assembler lacks %qs support", unavailable_cpu,
3913 unavailable_cpu);
3914 }
3915 }
3916
3917 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3918 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3919 with those from the cpu, except for options that were explicitly set. If
3920 we don't have a cpu, do not override the target bits set in
3921 TARGET_DEFAULT. */
3922 if (cpu_index >= 0)
3923 {
3924 rs6000_cpu_index = cpu_index;
3925 rs6000_isa_flags &= ~set_masks;
3926 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3927 & set_masks);
3928 }
3929 else
3930 {
3931 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3932 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3933 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3934 to using rs6000_isa_flags, we need to do the initialization here.
3935
3936 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3937 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3938 HOST_WIDE_INT flags;
3939 if (TARGET_DEFAULT)
3940 flags = TARGET_DEFAULT;
3941 else
3942 {
3943 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3944 const char *default_cpu = (!TARGET_POWERPC64
3945 ? "powerpc"
3946 : (BYTES_BIG_ENDIAN
3947 ? "powerpc64"
3948 : "powerpc64le"));
3949 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3950 flags = processor_target_table[default_cpu_index].target_enable;
3951 }
3952 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3953 }
3954
3955 if (rs6000_tune_index >= 0)
3956 tune_index = rs6000_tune_index;
3957 else if (cpu_index >= 0)
3958 rs6000_tune_index = tune_index = cpu_index;
3959 else
3960 {
3961 size_t i;
3962 enum processor_type tune_proc
3963 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3964
3965 tune_index = -1;
3966 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3967 if (processor_target_table[i].processor == tune_proc)
3968 {
3969 tune_index = i;
3970 break;
3971 }
3972 }
3973
3974 if (cpu_index >= 0)
3975 rs6000_cpu = processor_target_table[cpu_index].processor;
3976 else
3977 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3978
3979 gcc_assert (tune_index >= 0);
3980 rs6000_tune = processor_target_table[tune_index].processor;
3981
3982 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3983 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3984 || rs6000_cpu == PROCESSOR_PPCE5500)
3985 {
3986 if (TARGET_ALTIVEC)
3987 error ("AltiVec not supported in this target");
3988 }
3989
3990 /* If we are optimizing big endian systems for space, use the load/store
3991 multiple instructions. */
3992 if (BYTES_BIG_ENDIAN && optimize_size)
3993 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3994
3995 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3996 because the hardware doesn't support the instructions used in little
3997 endian mode, and causes an alignment trap. The 750 does not cause an
3998 alignment trap (except when the target is unaligned). */
3999
4000 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
4001 {
4002 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
4003 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
4004 warning (0, "%qs is not supported on little endian systems",
4005 "-mmultiple");
4006 }
4007
4008 /* If little-endian, default to -mstrict-align on older processors.
4009 Testing for htm matches power8 and later. */
4010 if (!BYTES_BIG_ENDIAN
4011 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
4012 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
4013
4014 if (!rs6000_fold_gimple)
4015 fprintf (stderr,
4016 "gimple folding of rs6000 builtins has been disabled.\n");
4017
4018 /* Add some warnings for VSX. */
4019 if (TARGET_VSX)
4020 {
4021 const char *msg = NULL;
4022 if (!TARGET_HARD_FLOAT)
4023 {
4024 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4025 msg = N_("-mvsx requires hardware floating point");
4026 else
4027 {
4028 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4029 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4030 }
4031 }
4032 else if (TARGET_AVOID_XFORM > 0)
4033 msg = N_("-mvsx needs indexed addressing");
4034 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
4035 & OPTION_MASK_ALTIVEC))
4036 {
4037 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4038 msg = N_("-mvsx and -mno-altivec are incompatible");
4039 else
4040 msg = N_("-mno-altivec disables vsx");
4041 }
4042
4043 if (msg)
4044 {
4045 warning (0, msg);
4046 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4047 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4048 }
4049 }
4050
4051 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
4052 the -mcpu setting to enable options that conflict. */
4053 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
4054 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
4055 | OPTION_MASK_ALTIVEC
4056 | OPTION_MASK_VSX)) != 0)
4057 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
4058 | OPTION_MASK_DIRECT_MOVE)
4059 & ~rs6000_isa_flags_explicit);
4060
4061 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4062 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
4063
4064 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
4065 off all of the options that depend on those flags. */
4066 ignore_masks = rs6000_disable_incompatible_switches ();
4067
4068 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
4069 unless the user explicitly used the -mno-<option> to disable the code. */
4070 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
4071 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
4072 else if (TARGET_P9_MINMAX)
4073 {
4074 if (cpu_index >= 0)
4075 {
4076 if (cpu_index == PROCESSOR_POWER9)
4077 {
4078 /* legacy behavior: allow -mcpu=power9 with certain
4079 capabilities explicitly disabled. */
4080 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
4081 }
4082 else
4083 error ("power9 target option is incompatible with %<%s=<xxx>%> "
4084 "for <xxx> less than power9", "-mcpu");
4085 }
4086 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
4087 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
4088 & rs6000_isa_flags_explicit))
4089 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
4090 were explicitly cleared. */
4091 error ("%qs incompatible with explicitly disabled options",
4092 "-mpower9-minmax");
4093 else
4094 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
4095 }
4096 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
4097 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
4098 else if (TARGET_VSX)
4099 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
4100 else if (TARGET_POPCNTD)
4101 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
4102 else if (TARGET_DFP)
4103 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
4104 else if (TARGET_CMPB)
4105 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
4106 else if (TARGET_FPRND)
4107 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
4108 else if (TARGET_POPCNTB)
4109 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
4110 else if (TARGET_ALTIVEC)
4111 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
4112
4113 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
4114 {
4115 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
4116 error ("%qs requires %qs", "-mcrypto", "-maltivec");
4117 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
4118 }
4119
4120 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
4121 {
4122 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
4123 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
4124 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
4125 }
4126
4127 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
4128 {
4129 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4130 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
4131 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4132 }
4133
4134 if (TARGET_P8_VECTOR && !TARGET_VSX)
4135 {
4136 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4137 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
4138 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
4139 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
4140 {
4141 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4142 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4143 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4144 }
4145 else
4146 {
4147 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
4148 not explicit. */
4149 rs6000_isa_flags |= OPTION_MASK_VSX;
4150 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4151 }
4152 }
4153
4154 if (TARGET_DFP && !TARGET_HARD_FLOAT)
4155 {
4156 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
4157 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
4158 rs6000_isa_flags &= ~OPTION_MASK_DFP;
4159 }
4160
4161 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
4162 silently turn off quad memory mode. */
4163 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
4164 {
4165 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4166 warning (0, N_("-mquad-memory requires 64-bit mode"));
4167
4168 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
4169 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
4170
4171 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
4172 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
4173 }
4174
4175 /* Non-atomic quad memory load/store are disabled for little endian, since
4176 the words are reversed, but atomic operations can still be done by
4177 swapping the words. */
4178 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
4179 {
4180 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4181 warning (0, N_("-mquad-memory is not available in little endian "
4182 "mode"));
4183
4184 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4185 }
4186
4187 /* Assume if the user asked for normal quad memory instructions, they want
4188 the atomic versions as well, unless they explicity told us not to use quad
4189 word atomic instructions. */
4190 if (TARGET_QUAD_MEMORY
4191 && !TARGET_QUAD_MEMORY_ATOMIC
4192 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4193 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4194
4195 /* If we can shrink-wrap the TOC register save separately, then use
4196 -msave-toc-indirect unless explicitly disabled. */
4197 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
4198 && flag_shrink_wrap_separate
4199 && optimize_function_for_speed_p (cfun))
4200 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
4201
4202 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4203 generating power8 instructions. */
4204 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4205 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4206 & OPTION_MASK_P8_FUSION);
4207
4208 /* Setting additional fusion flags turns on base fusion. */
4209 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
4210 {
4211 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4212 {
4213 if (TARGET_P8_FUSION_SIGN)
4214 error ("%qs requires %qs", "-mpower8-fusion-sign",
4215 "-mpower8-fusion");
4216
4217 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4218 }
4219 else
4220 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4221 }
4222
4223 /* Power9 fusion is a superset over power8 fusion. */
4224 if (TARGET_P9_FUSION && !TARGET_P8_FUSION)
4225 {
4226 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4227 {
4228 /* We prefer to not mention undocumented options in
4229 error messages. However, if users have managed to select
4230 power9-fusion without selecting power8-fusion, they
4231 already know about undocumented flags. */
4232 error ("%qs requires %qs", "-mpower9-fusion", "-mpower8-fusion");
4233 rs6000_isa_flags &= ~OPTION_MASK_P9_FUSION;
4234 }
4235 else
4236 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4237 }
4238
4239 /* Enable power9 fusion if we are tuning for power9, even if we aren't
4240 generating power9 instructions. */
4241 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_FUSION))
4242 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4243 & OPTION_MASK_P9_FUSION);
4244
4245 /* Power8 does not fuse sign extended loads with the addis. If we are
4246 optimizing at high levels for speed, convert a sign extended load into a
4247 zero extending load, and an explicit sign extension. */
4248 if (TARGET_P8_FUSION
4249 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4250 && optimize_function_for_speed_p (cfun)
4251 && optimize >= 3)
4252 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4253
4254 /* ISA 3.0 vector instructions include ISA 2.07. */
4255 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4256 {
4257 /* We prefer to not mention undocumented options in
4258 error messages. However, if users have managed to select
4259 power9-vector without selecting power8-vector, they
4260 already know about undocumented flags. */
4261 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
4262 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
4263 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
4264 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
4265 {
4266 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4267 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4268 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4269 }
4270 else
4271 {
4272 /* OPTION_MASK_P9_VECTOR is explicit and
4273 OPTION_MASK_P8_VECTOR is not explicit. */
4274 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
4275 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4276 }
4277 }
4278
4279 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4280 support. If we only have ISA 2.06 support, and the user did not specify
4281 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4282 but we don't enable the full vectorization support */
4283 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4284 TARGET_ALLOW_MOVMISALIGN = 1;
4285
4286 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4287 {
4288 if (TARGET_ALLOW_MOVMISALIGN > 0
4289 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
4290 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4291
4292 TARGET_ALLOW_MOVMISALIGN = 0;
4293 }
4294
4295 /* Determine when unaligned vector accesses are permitted, and when
4296 they are preferred over masked Altivec loads. Note that if
4297 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4298 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4299 not true. */
4300 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4301 {
4302 if (!TARGET_VSX)
4303 {
4304 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4305 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4306
4307 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4308 }
4309
4310 else if (!TARGET_ALLOW_MOVMISALIGN)
4311 {
4312 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4313 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4314 "-mallow-movmisalign");
4315
4316 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4317 }
4318 }
4319
4320 /* Use long double size to select the appropriate long double. We use
4321 TYPE_PRECISION to differentiate the 3 different long double types. We map
4322 128 into the precision used for TFmode. */
4323 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
4324 ? 64
4325 : FLOAT_PRECISION_TFmode);
4326
4327 /* Set long double size before the IEEE 128-bit tests. */
4328 if (!global_options_set.x_rs6000_long_double_type_size)
4329 {
4330 if (main_target_opt != NULL
4331 && (main_target_opt->x_rs6000_long_double_type_size
4332 != default_long_double_size))
4333 error ("target attribute or pragma changes long double size");
4334 else
4335 rs6000_long_double_type_size = default_long_double_size;
4336 }
4337 else if (rs6000_long_double_type_size == 128)
4338 rs6000_long_double_type_size = FLOAT_PRECISION_TFmode;
4339
4340 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4341 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4342 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4343 those systems will not pick up this default. Warn if the user changes the
4344 default unless -Wno-psabi. */
4345 if (!global_options_set.x_rs6000_ieeequad)
4346 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
4347
4348 else if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT && TARGET_LONG_DOUBLE_128)
4349 {
4350 static bool warned_change_long_double;
4351 if (!warned_change_long_double)
4352 {
4353 warned_change_long_double = true;
4354 if (TARGET_IEEEQUAD)
4355 warning (OPT_Wpsabi, "Using IEEE extended precision long double");
4356 else
4357 warning (OPT_Wpsabi, "Using IBM extended precision long double");
4358 }
4359 }
4360
4361 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4362 sytems. In GCC 7, we would enable the the IEEE 128-bit floating point
4363 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4364 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4365 the keyword as well as the type. */
4366 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
4367
4368 /* IEEE 128-bit floating point requires VSX support. */
4369 if (TARGET_FLOAT128_KEYWORD)
4370 {
4371 if (!TARGET_VSX)
4372 {
4373 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4374 error ("%qs requires VSX support", "-mfloat128");
4375
4376 TARGET_FLOAT128_TYPE = 0;
4377 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
4378 | OPTION_MASK_FLOAT128_HW);
4379 }
4380 else if (!TARGET_FLOAT128_TYPE)
4381 {
4382 TARGET_FLOAT128_TYPE = 1;
4383 warning (0, "The -mfloat128 option may not be fully supported");
4384 }
4385 }
4386
4387 /* Enable the __float128 keyword under Linux by default. */
4388 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
4389 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4390 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4391
4392 /* If we have are supporting the float128 type and full ISA 3.0 support,
4393 enable -mfloat128-hardware by default. However, don't enable the
4394 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4395 because sometimes the compiler wants to put things in an integer
4396 container, and if we don't have __int128 support, it is impossible. */
4397 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
4398 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4399 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4400 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4401
4402 if (TARGET_FLOAT128_HW
4403 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4404 {
4405 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4406 error ("%qs requires full ISA 3.0 support", "-mfloat128-hardware");
4407
4408 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4409 }
4410
4411 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4412 {
4413 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4414 error ("%qs requires %qs", "-mfloat128-hardware", "-m64");
4415
4416 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4417 }
4418
4419 /* Print the options after updating the defaults. */
4420 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4421 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4422
4423 /* E500mc does "better" if we inline more aggressively. Respect the
4424 user's opinion, though. */
4425 if (rs6000_block_move_inline_limit == 0
4426 && (rs6000_tune == PROCESSOR_PPCE500MC
4427 || rs6000_tune == PROCESSOR_PPCE500MC64
4428 || rs6000_tune == PROCESSOR_PPCE5500
4429 || rs6000_tune == PROCESSOR_PPCE6500))
4430 rs6000_block_move_inline_limit = 128;
4431
4432 /* store_one_arg depends on expand_block_move to handle at least the
4433 size of reg_parm_stack_space. */
4434 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4435 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4436
4437 if (global_init_p)
4438 {
4439 /* If the appropriate debug option is enabled, replace the target hooks
4440 with debug versions that call the real version and then prints
4441 debugging information. */
4442 if (TARGET_DEBUG_COST)
4443 {
4444 targetm.rtx_costs = rs6000_debug_rtx_costs;
4445 targetm.address_cost = rs6000_debug_address_cost;
4446 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4447 }
4448
4449 if (TARGET_DEBUG_ADDR)
4450 {
4451 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4452 targetm.legitimize_address = rs6000_debug_legitimize_address;
4453 rs6000_secondary_reload_class_ptr
4454 = rs6000_debug_secondary_reload_class;
4455 targetm.secondary_memory_needed
4456 = rs6000_debug_secondary_memory_needed;
4457 targetm.can_change_mode_class
4458 = rs6000_debug_can_change_mode_class;
4459 rs6000_preferred_reload_class_ptr
4460 = rs6000_debug_preferred_reload_class;
4461 rs6000_legitimize_reload_address_ptr
4462 = rs6000_debug_legitimize_reload_address;
4463 rs6000_mode_dependent_address_ptr
4464 = rs6000_debug_mode_dependent_address;
4465 }
4466
4467 if (rs6000_veclibabi_name)
4468 {
4469 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4470 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4471 else
4472 {
4473 error ("unknown vectorization library ABI type (%qs) for "
4474 "%qs switch", rs6000_veclibabi_name, "-mveclibabi=");
4475 ret = false;
4476 }
4477 }
4478 }
4479
4480 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4481 target attribute or pragma which automatically enables both options,
4482 unless the altivec ABI was set. This is set by default for 64-bit, but
4483 not for 32-bit. */
4484 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4485 {
4486 TARGET_FLOAT128_TYPE = 0;
4487 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4488 | OPTION_MASK_FLOAT128_KEYWORD)
4489 & ~rs6000_isa_flags_explicit);
4490 }
4491
4492 /* Enable Altivec ABI for AIX -maltivec. */
4493 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4494 {
4495 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4496 error ("target attribute or pragma changes AltiVec ABI");
4497 else
4498 rs6000_altivec_abi = 1;
4499 }
4500
4501 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4502 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4503 be explicitly overridden in either case. */
4504 if (TARGET_ELF)
4505 {
4506 if (!global_options_set.x_rs6000_altivec_abi
4507 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4508 {
4509 if (main_target_opt != NULL &&
4510 !main_target_opt->x_rs6000_altivec_abi)
4511 error ("target attribute or pragma changes AltiVec ABI");
4512 else
4513 rs6000_altivec_abi = 1;
4514 }
4515 }
4516
4517 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4518 So far, the only darwin64 targets are also MACH-O. */
4519 if (TARGET_MACHO
4520 && DEFAULT_ABI == ABI_DARWIN
4521 && TARGET_64BIT)
4522 {
4523 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4524 error ("target attribute or pragma changes darwin64 ABI");
4525 else
4526 {
4527 rs6000_darwin64_abi = 1;
4528 /* Default to natural alignment, for better performance. */
4529 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4530 }
4531 }
4532
4533 /* Place FP constants in the constant pool instead of TOC
4534 if section anchors enabled. */
4535 if (flag_section_anchors
4536 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4537 TARGET_NO_FP_IN_TOC = 1;
4538
4539 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4540 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4541
4542 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4543 SUBTARGET_OVERRIDE_OPTIONS;
4544 #endif
4545 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4546 SUBSUBTARGET_OVERRIDE_OPTIONS;
4547 #endif
4548 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4549 SUB3TARGET_OVERRIDE_OPTIONS;
4550 #endif
4551
4552 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4553 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4554
4555 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4556 && rs6000_tune != PROCESSOR_POWER5
4557 && rs6000_tune != PROCESSOR_POWER6
4558 && rs6000_tune != PROCESSOR_POWER7
4559 && rs6000_tune != PROCESSOR_POWER8
4560 && rs6000_tune != PROCESSOR_POWER9
4561 && rs6000_tune != PROCESSOR_PPCA2
4562 && rs6000_tune != PROCESSOR_CELL
4563 && rs6000_tune != PROCESSOR_PPC476);
4564 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4565 || rs6000_tune == PROCESSOR_POWER5
4566 || rs6000_tune == PROCESSOR_POWER7
4567 || rs6000_tune == PROCESSOR_POWER8);
4568 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4569 || rs6000_tune == PROCESSOR_POWER5
4570 || rs6000_tune == PROCESSOR_POWER6
4571 || rs6000_tune == PROCESSOR_POWER7
4572 || rs6000_tune == PROCESSOR_POWER8
4573 || rs6000_tune == PROCESSOR_POWER9
4574 || rs6000_tune == PROCESSOR_PPCE500MC
4575 || rs6000_tune == PROCESSOR_PPCE500MC64
4576 || rs6000_tune == PROCESSOR_PPCE5500
4577 || rs6000_tune == PROCESSOR_PPCE6500);
4578
4579 /* Allow debug switches to override the above settings. These are set to -1
4580 in rs6000.opt to indicate the user hasn't directly set the switch. */
4581 if (TARGET_ALWAYS_HINT >= 0)
4582 rs6000_always_hint = TARGET_ALWAYS_HINT;
4583
4584 if (TARGET_SCHED_GROUPS >= 0)
4585 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4586
4587 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4588 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4589
4590 rs6000_sched_restricted_insns_priority
4591 = (rs6000_sched_groups ? 1 : 0);
4592
4593 /* Handle -msched-costly-dep option. */
4594 rs6000_sched_costly_dep
4595 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4596
4597 if (rs6000_sched_costly_dep_str)
4598 {
4599 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4600 rs6000_sched_costly_dep = no_dep_costly;
4601 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4602 rs6000_sched_costly_dep = all_deps_costly;
4603 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4604 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4605 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4606 rs6000_sched_costly_dep = store_to_load_dep_costly;
4607 else
4608 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4609 atoi (rs6000_sched_costly_dep_str));
4610 }
4611
4612 /* Handle -minsert-sched-nops option. */
4613 rs6000_sched_insert_nops
4614 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4615
4616 if (rs6000_sched_insert_nops_str)
4617 {
4618 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4619 rs6000_sched_insert_nops = sched_finish_none;
4620 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4621 rs6000_sched_insert_nops = sched_finish_pad_groups;
4622 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4623 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4624 else
4625 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4626 atoi (rs6000_sched_insert_nops_str));
4627 }
4628
4629 /* Handle stack protector */
4630 if (!global_options_set.x_rs6000_stack_protector_guard)
4631 #ifdef TARGET_THREAD_SSP_OFFSET
4632 rs6000_stack_protector_guard = SSP_TLS;
4633 #else
4634 rs6000_stack_protector_guard = SSP_GLOBAL;
4635 #endif
4636
4637 #ifdef TARGET_THREAD_SSP_OFFSET
4638 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4639 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4640 #endif
4641
4642 if (global_options_set.x_rs6000_stack_protector_guard_offset_str)
4643 {
4644 char *endp;
4645 const char *str = rs6000_stack_protector_guard_offset_str;
4646
4647 errno = 0;
4648 long offset = strtol (str, &endp, 0);
4649 if (!*str || *endp || errno)
4650 error ("%qs is not a valid number in %qs", str,
4651 "-mstack-protector-guard-offset=");
4652
4653 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4654 || (TARGET_64BIT && (offset & 3)))
4655 error ("%qs is not a valid offset in %qs", str,
4656 "-mstack-protector-guard-offset=");
4657
4658 rs6000_stack_protector_guard_offset = offset;
4659 }
4660
4661 if (global_options_set.x_rs6000_stack_protector_guard_reg_str)
4662 {
4663 const char *str = rs6000_stack_protector_guard_reg_str;
4664 int reg = decode_reg_name (str);
4665
4666 if (!IN_RANGE (reg, 1, 31))
4667 error ("%qs is not a valid base register in %qs", str,
4668 "-mstack-protector-guard-reg=");
4669
4670 rs6000_stack_protector_guard_reg = reg;
4671 }
4672
4673 if (rs6000_stack_protector_guard == SSP_TLS
4674 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4675 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4676
4677 if (global_init_p)
4678 {
4679 #ifdef TARGET_REGNAMES
4680 /* If the user desires alternate register names, copy in the
4681 alternate names now. */
4682 if (TARGET_REGNAMES)
4683 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4684 #endif
4685
4686 /* Set aix_struct_return last, after the ABI is determined.
4687 If -maix-struct-return or -msvr4-struct-return was explicitly
4688 used, don't override with the ABI default. */
4689 if (!global_options_set.x_aix_struct_return)
4690 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4691
4692 #if 0
4693 /* IBM XL compiler defaults to unsigned bitfields. */
4694 if (TARGET_XL_COMPAT)
4695 flag_signed_bitfields = 0;
4696 #endif
4697
4698 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4699 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4700
4701 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4702
4703 /* We can only guarantee the availability of DI pseudo-ops when
4704 assembling for 64-bit targets. */
4705 if (!TARGET_64BIT)
4706 {
4707 targetm.asm_out.aligned_op.di = NULL;
4708 targetm.asm_out.unaligned_op.di = NULL;
4709 }
4710
4711
4712 /* Set branch target alignment, if not optimizing for size. */
4713 if (!optimize_size)
4714 {
4715 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4716 aligned 8byte to avoid misprediction by the branch predictor. */
4717 if (rs6000_tune == PROCESSOR_TITAN
4718 || rs6000_tune == PROCESSOR_CELL)
4719 {
4720 if (flag_align_functions && !str_align_functions)
4721 str_align_functions = "8";
4722 if (flag_align_jumps && !str_align_jumps)
4723 str_align_jumps = "8";
4724 if (flag_align_loops && !str_align_loops)
4725 str_align_loops = "8";
4726 }
4727 if (rs6000_align_branch_targets)
4728 {
4729 if (flag_align_functions && !str_align_functions)
4730 str_align_functions = "16";
4731 if (flag_align_jumps && !str_align_jumps)
4732 str_align_jumps = "16";
4733 if (flag_align_loops && !str_align_loops)
4734 {
4735 can_override_loop_align = 1;
4736 str_align_loops = "16";
4737 }
4738 }
4739
4740 if (flag_align_jumps && !str_align_jumps)
4741 str_align_jumps = "16";
4742 if (flag_align_loops && !str_align_loops)
4743 str_align_loops = "16";
4744 }
4745
4746 /* Arrange to save and restore machine status around nested functions. */
4747 init_machine_status = rs6000_init_machine_status;
4748
4749 /* We should always be splitting complex arguments, but we can't break
4750 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4751 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4752 targetm.calls.split_complex_arg = NULL;
4753
4754 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4755 if (DEFAULT_ABI == ABI_AIX)
4756 targetm.calls.custom_function_descriptors = 0;
4757 }
4758
4759 /* Initialize rs6000_cost with the appropriate target costs. */
4760 if (optimize_size)
4761 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4762 else
4763 switch (rs6000_tune)
4764 {
4765 case PROCESSOR_RS64A:
4766 rs6000_cost = &rs64a_cost;
4767 break;
4768
4769 case PROCESSOR_MPCCORE:
4770 rs6000_cost = &mpccore_cost;
4771 break;
4772
4773 case PROCESSOR_PPC403:
4774 rs6000_cost = &ppc403_cost;
4775 break;
4776
4777 case PROCESSOR_PPC405:
4778 rs6000_cost = &ppc405_cost;
4779 break;
4780
4781 case PROCESSOR_PPC440:
4782 rs6000_cost = &ppc440_cost;
4783 break;
4784
4785 case PROCESSOR_PPC476:
4786 rs6000_cost = &ppc476_cost;
4787 break;
4788
4789 case PROCESSOR_PPC601:
4790 rs6000_cost = &ppc601_cost;
4791 break;
4792
4793 case PROCESSOR_PPC603:
4794 rs6000_cost = &ppc603_cost;
4795 break;
4796
4797 case PROCESSOR_PPC604:
4798 rs6000_cost = &ppc604_cost;
4799 break;
4800
4801 case PROCESSOR_PPC604e:
4802 rs6000_cost = &ppc604e_cost;
4803 break;
4804
4805 case PROCESSOR_PPC620:
4806 rs6000_cost = &ppc620_cost;
4807 break;
4808
4809 case PROCESSOR_PPC630:
4810 rs6000_cost = &ppc630_cost;
4811 break;
4812
4813 case PROCESSOR_CELL:
4814 rs6000_cost = &ppccell_cost;
4815 break;
4816
4817 case PROCESSOR_PPC750:
4818 case PROCESSOR_PPC7400:
4819 rs6000_cost = &ppc750_cost;
4820 break;
4821
4822 case PROCESSOR_PPC7450:
4823 rs6000_cost = &ppc7450_cost;
4824 break;
4825
4826 case PROCESSOR_PPC8540:
4827 case PROCESSOR_PPC8548:
4828 rs6000_cost = &ppc8540_cost;
4829 break;
4830
4831 case PROCESSOR_PPCE300C2:
4832 case PROCESSOR_PPCE300C3:
4833 rs6000_cost = &ppce300c2c3_cost;
4834 break;
4835
4836 case PROCESSOR_PPCE500MC:
4837 rs6000_cost = &ppce500mc_cost;
4838 break;
4839
4840 case PROCESSOR_PPCE500MC64:
4841 rs6000_cost = &ppce500mc64_cost;
4842 break;
4843
4844 case PROCESSOR_PPCE5500:
4845 rs6000_cost = &ppce5500_cost;
4846 break;
4847
4848 case PROCESSOR_PPCE6500:
4849 rs6000_cost = &ppce6500_cost;
4850 break;
4851
4852 case PROCESSOR_TITAN:
4853 rs6000_cost = &titan_cost;
4854 break;
4855
4856 case PROCESSOR_POWER4:
4857 case PROCESSOR_POWER5:
4858 rs6000_cost = &power4_cost;
4859 break;
4860
4861 case PROCESSOR_POWER6:
4862 rs6000_cost = &power6_cost;
4863 break;
4864
4865 case PROCESSOR_POWER7:
4866 rs6000_cost = &power7_cost;
4867 break;
4868
4869 case PROCESSOR_POWER8:
4870 rs6000_cost = &power8_cost;
4871 break;
4872
4873 case PROCESSOR_POWER9:
4874 rs6000_cost = &power9_cost;
4875 break;
4876
4877 case PROCESSOR_PPCA2:
4878 rs6000_cost = &ppca2_cost;
4879 break;
4880
4881 default:
4882 gcc_unreachable ();
4883 }
4884
4885 if (global_init_p)
4886 {
4887 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4888 rs6000_cost->simultaneous_prefetches,
4889 global_options.x_param_values,
4890 global_options_set.x_param_values);
4891 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
4892 global_options.x_param_values,
4893 global_options_set.x_param_values);
4894 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4895 rs6000_cost->cache_line_size,
4896 global_options.x_param_values,
4897 global_options_set.x_param_values);
4898 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
4899 global_options.x_param_values,
4900 global_options_set.x_param_values);
4901
4902 /* Increase loop peeling limits based on performance analysis. */
4903 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
4904 global_options.x_param_values,
4905 global_options_set.x_param_values);
4906 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
4907 global_options.x_param_values,
4908 global_options_set.x_param_values);
4909
4910 /* Use the 'model' -fsched-pressure algorithm by default. */
4911 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM,
4912 SCHED_PRESSURE_MODEL,
4913 global_options.x_param_values,
4914 global_options_set.x_param_values);
4915
4916 /* If using typedef char *va_list, signal that
4917 __builtin_va_start (&ap, 0) can be optimized to
4918 ap = __builtin_next_arg (0). */
4919 if (DEFAULT_ABI != ABI_V4)
4920 targetm.expand_builtin_va_start = NULL;
4921 }
4922
4923 /* If not explicitly specified via option, decide whether to generate indexed
4924 load/store instructions. A value of -1 indicates that the
4925 initial value of this variable has not been overwritten. During
4926 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4927 if (TARGET_AVOID_XFORM == -1)
4928 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4929 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4930 need indexed accesses and the type used is the scalar type of the element
4931 being loaded or stored. */
4932 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4933 && !TARGET_ALTIVEC);
4934
4935 /* Set the -mrecip options. */
4936 if (rs6000_recip_name)
4937 {
4938 char *p = ASTRDUP (rs6000_recip_name);
4939 char *q;
4940 unsigned int mask, i;
4941 bool invert;
4942
4943 while ((q = strtok (p, ",")) != NULL)
4944 {
4945 p = NULL;
4946 if (*q == '!')
4947 {
4948 invert = true;
4949 q++;
4950 }
4951 else
4952 invert = false;
4953
4954 if (!strcmp (q, "default"))
4955 mask = ((TARGET_RECIP_PRECISION)
4956 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4957 else
4958 {
4959 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4960 if (!strcmp (q, recip_options[i].string))
4961 {
4962 mask = recip_options[i].mask;
4963 break;
4964 }
4965
4966 if (i == ARRAY_SIZE (recip_options))
4967 {
4968 error ("unknown option for %<%s=%s%>", "-mrecip", q);
4969 invert = false;
4970 mask = 0;
4971 ret = false;
4972 }
4973 }
4974
4975 if (invert)
4976 rs6000_recip_control &= ~mask;
4977 else
4978 rs6000_recip_control |= mask;
4979 }
4980 }
4981
4982 /* Set the builtin mask of the various options used that could affect which
4983 builtins were used. In the past we used target_flags, but we've run out
4984 of bits, and some options are no longer in target_flags. */
4985 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4986 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4987 rs6000_print_builtin_options (stderr, 0, "builtin mask",
4988 rs6000_builtin_mask);
4989
4990 /* Initialize all of the registers. */
4991 rs6000_init_hard_regno_mode_ok (global_init_p);
4992
4993 /* Save the initial options in case the user does function specific options */
4994 if (global_init_p)
4995 target_option_default_node = target_option_current_node
4996 = build_target_option_node (&global_options);
4997
4998 /* If not explicitly specified via option, decide whether to generate the
4999 extra blr's required to preserve the link stack on some cpus (eg, 476). */
5000 if (TARGET_LINK_STACK == -1)
5001 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
5002
5003 /* Deprecate use of -mno-speculate-indirect-jumps. */
5004 if (!rs6000_speculate_indirect_jumps)
5005 warning (0, "%qs is deprecated and not recommended in any circumstances",
5006 "-mno-speculate-indirect-jumps");
5007
5008 return ret;
5009 }
5010
5011 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
5012 define the target cpu type. */
5013
5014 static void
5015 rs6000_option_override (void)
5016 {
5017 (void) rs6000_option_override_internal (true);
5018 }
5019
5020 \f
5021 /* Implement targetm.vectorize.builtin_mask_for_load. */
5022 static tree
5023 rs6000_builtin_mask_for_load (void)
5024 {
5025 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
5026 if ((TARGET_ALTIVEC && !TARGET_VSX)
5027 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
5028 return altivec_builtin_mask_for_load;
5029 else
5030 return 0;
5031 }
5032
5033 /* Implement LOOP_ALIGN. */
5034 align_flags
5035 rs6000_loop_align (rtx label)
5036 {
5037 basic_block bb;
5038 int ninsns;
5039
5040 /* Don't override loop alignment if -falign-loops was specified. */
5041 if (!can_override_loop_align)
5042 return align_loops;
5043
5044 bb = BLOCK_FOR_INSN (label);
5045 ninsns = num_loop_insns(bb->loop_father);
5046
5047 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
5048 if (ninsns > 4 && ninsns <= 8
5049 && (rs6000_tune == PROCESSOR_POWER4
5050 || rs6000_tune == PROCESSOR_POWER5
5051 || rs6000_tune == PROCESSOR_POWER6
5052 || rs6000_tune == PROCESSOR_POWER7
5053 || rs6000_tune == PROCESSOR_POWER8))
5054 return align_flags (5);
5055 else
5056 return align_loops;
5057 }
5058
5059 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5060 after applying N number of iterations. This routine does not determine
5061 how may iterations are required to reach desired alignment. */
5062
5063 static bool
5064 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
5065 {
5066 if (is_packed)
5067 return false;
5068
5069 if (TARGET_32BIT)
5070 {
5071 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
5072 return true;
5073
5074 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
5075 return true;
5076
5077 return false;
5078 }
5079 else
5080 {
5081 if (TARGET_MACHO)
5082 return false;
5083
5084 /* Assuming that all other types are naturally aligned. CHECKME! */
5085 return true;
5086 }
5087 }
5088
5089 /* Return true if the vector misalignment factor is supported by the
5090 target. */
5091 static bool
5092 rs6000_builtin_support_vector_misalignment (machine_mode mode,
5093 const_tree type,
5094 int misalignment,
5095 bool is_packed)
5096 {
5097 if (TARGET_VSX)
5098 {
5099 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5100 return true;
5101
5102 /* Return if movmisalign pattern is not supported for this mode. */
5103 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
5104 return false;
5105
5106 if (misalignment == -1)
5107 {
5108 /* Misalignment factor is unknown at compile time but we know
5109 it's word aligned. */
5110 if (rs6000_vector_alignment_reachable (type, is_packed))
5111 {
5112 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
5113
5114 if (element_size == 64 || element_size == 32)
5115 return true;
5116 }
5117
5118 return false;
5119 }
5120
5121 /* VSX supports word-aligned vector. */
5122 if (misalignment % 4 == 0)
5123 return true;
5124 }
5125 return false;
5126 }
5127
5128 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5129 static int
5130 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5131 tree vectype, int misalign)
5132 {
5133 unsigned elements;
5134 tree elem_type;
5135
5136 switch (type_of_cost)
5137 {
5138 case scalar_stmt:
5139 case scalar_load:
5140 case scalar_store:
5141 case vector_stmt:
5142 case vector_load:
5143 case vector_store:
5144 case vec_to_scalar:
5145 case scalar_to_vec:
5146 case cond_branch_not_taken:
5147 return 1;
5148
5149 case vec_perm:
5150 if (TARGET_VSX)
5151 return 3;
5152 else
5153 return 1;
5154
5155 case vec_promote_demote:
5156 if (TARGET_VSX)
5157 return 4;
5158 else
5159 return 1;
5160
5161 case cond_branch_taken:
5162 return 3;
5163
5164 case unaligned_load:
5165 case vector_gather_load:
5166 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5167 return 1;
5168
5169 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5170 {
5171 elements = TYPE_VECTOR_SUBPARTS (vectype);
5172 if (elements == 2)
5173 /* Double word aligned. */
5174 return 2;
5175
5176 if (elements == 4)
5177 {
5178 switch (misalign)
5179 {
5180 case 8:
5181 /* Double word aligned. */
5182 return 2;
5183
5184 case -1:
5185 /* Unknown misalignment. */
5186 case 4:
5187 case 12:
5188 /* Word aligned. */
5189 return 22;
5190
5191 default:
5192 gcc_unreachable ();
5193 }
5194 }
5195 }
5196
5197 if (TARGET_ALTIVEC)
5198 /* Misaligned loads are not supported. */
5199 gcc_unreachable ();
5200
5201 return 2;
5202
5203 case unaligned_store:
5204 case vector_scatter_store:
5205 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5206 return 1;
5207
5208 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5209 {
5210 elements = TYPE_VECTOR_SUBPARTS (vectype);
5211 if (elements == 2)
5212 /* Double word aligned. */
5213 return 2;
5214
5215 if (elements == 4)
5216 {
5217 switch (misalign)
5218 {
5219 case 8:
5220 /* Double word aligned. */
5221 return 2;
5222
5223 case -1:
5224 /* Unknown misalignment. */
5225 case 4:
5226 case 12:
5227 /* Word aligned. */
5228 return 23;
5229
5230 default:
5231 gcc_unreachable ();
5232 }
5233 }
5234 }
5235
5236 if (TARGET_ALTIVEC)
5237 /* Misaligned stores are not supported. */
5238 gcc_unreachable ();
5239
5240 return 2;
5241
5242 case vec_construct:
5243 /* This is a rough approximation assuming non-constant elements
5244 constructed into a vector via element insertion. FIXME:
5245 vec_construct is not granular enough for uniformly good
5246 decisions. If the initialization is a splat, this is
5247 cheaper than we estimate. Improve this someday. */
5248 elem_type = TREE_TYPE (vectype);
5249 /* 32-bit vectors loaded into registers are stored as double
5250 precision, so we need 2 permutes, 2 converts, and 1 merge
5251 to construct a vector of short floats from them. */
5252 if (SCALAR_FLOAT_TYPE_P (elem_type)
5253 && TYPE_PRECISION (elem_type) == 32)
5254 return 5;
5255 /* On POWER9, integer vector types are built up in GPRs and then
5256 use a direct move (2 cycles). For POWER8 this is even worse,
5257 as we need two direct moves and a merge, and the direct moves
5258 are five cycles. */
5259 else if (INTEGRAL_TYPE_P (elem_type))
5260 {
5261 if (TARGET_P9_VECTOR)
5262 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5263 else
5264 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
5265 }
5266 else
5267 /* V2DFmode doesn't need a direct move. */
5268 return 2;
5269
5270 default:
5271 gcc_unreachable ();
5272 }
5273 }
5274
5275 /* Implement targetm.vectorize.preferred_simd_mode. */
5276
5277 static machine_mode
5278 rs6000_preferred_simd_mode (scalar_mode mode)
5279 {
5280 if (TARGET_VSX)
5281 switch (mode)
5282 {
5283 case E_DFmode:
5284 return V2DFmode;
5285 default:;
5286 }
5287 if (TARGET_ALTIVEC || TARGET_VSX)
5288 switch (mode)
5289 {
5290 case E_SFmode:
5291 return V4SFmode;
5292 case E_TImode:
5293 return V1TImode;
5294 case E_DImode:
5295 return V2DImode;
5296 case E_SImode:
5297 return V4SImode;
5298 case E_HImode:
5299 return V8HImode;
5300 case E_QImode:
5301 return V16QImode;
5302 default:;
5303 }
5304 return word_mode;
5305 }
5306
5307 typedef struct _rs6000_cost_data
5308 {
5309 struct loop *loop_info;
5310 unsigned cost[3];
5311 } rs6000_cost_data;
5312
5313 /* Test for likely overcommitment of vector hardware resources. If a
5314 loop iteration is relatively large, and too large a percentage of
5315 instructions in the loop are vectorized, the cost model may not
5316 adequately reflect delays from unavailable vector resources.
5317 Penalize the loop body cost for this case. */
5318
5319 static void
5320 rs6000_density_test (rs6000_cost_data *data)
5321 {
5322 const int DENSITY_PCT_THRESHOLD = 85;
5323 const int DENSITY_SIZE_THRESHOLD = 70;
5324 const int DENSITY_PENALTY = 10;
5325 struct loop *loop = data->loop_info;
5326 basic_block *bbs = get_loop_body (loop);
5327 int nbbs = loop->num_nodes;
5328 loop_vec_info loop_vinfo = loop_vec_info_for_loop (data->loop_info);
5329 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
5330 int i, density_pct;
5331
5332 for (i = 0; i < nbbs; i++)
5333 {
5334 basic_block bb = bbs[i];
5335 gimple_stmt_iterator gsi;
5336
5337 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5338 {
5339 gimple *stmt = gsi_stmt (gsi);
5340 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
5341
5342 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5343 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5344 not_vec_cost++;
5345 }
5346 }
5347
5348 free (bbs);
5349 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5350
5351 if (density_pct > DENSITY_PCT_THRESHOLD
5352 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
5353 {
5354 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
5355 if (dump_enabled_p ())
5356 dump_printf_loc (MSG_NOTE, vect_location,
5357 "density %d%%, cost %d exceeds threshold, penalizing "
5358 "loop body cost by %d%%", density_pct,
5359 vec_cost + not_vec_cost, DENSITY_PENALTY);
5360 }
5361 }
5362
5363 /* Implement targetm.vectorize.init_cost. */
5364
5365 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5366 instruction is needed by the vectorization. */
5367 static bool rs6000_vect_nonmem;
5368
5369 static void *
5370 rs6000_init_cost (struct loop *loop_info)
5371 {
5372 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5373 data->loop_info = loop_info;
5374 data->cost[vect_prologue] = 0;
5375 data->cost[vect_body] = 0;
5376 data->cost[vect_epilogue] = 0;
5377 rs6000_vect_nonmem = false;
5378 return data;
5379 }
5380
5381 /* Implement targetm.vectorize.add_stmt_cost. */
5382
5383 static unsigned
5384 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5385 struct _stmt_vec_info *stmt_info, int misalign,
5386 enum vect_cost_model_location where)
5387 {
5388 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5389 unsigned retval = 0;
5390
5391 if (flag_vect_cost_model)
5392 {
5393 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5394 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5395 misalign);
5396 /* Statements in an inner loop relative to the loop being
5397 vectorized are weighted more heavily. The value here is
5398 arbitrary and could potentially be improved with analysis. */
5399 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5400 count *= 50; /* FIXME. */
5401
5402 retval = (unsigned) (count * stmt_cost);
5403 cost_data->cost[where] += retval;
5404
5405 /* Check whether we're doing something other than just a copy loop.
5406 Not all such loops may be profitably vectorized; see
5407 rs6000_finish_cost. */
5408 if ((kind == vec_to_scalar || kind == vec_perm
5409 || kind == vec_promote_demote || kind == vec_construct
5410 || kind == scalar_to_vec)
5411 || (where == vect_body && kind == vector_stmt))
5412 rs6000_vect_nonmem = true;
5413 }
5414
5415 return retval;
5416 }
5417
5418 /* Implement targetm.vectorize.finish_cost. */
5419
5420 static void
5421 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5422 unsigned *body_cost, unsigned *epilogue_cost)
5423 {
5424 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5425
5426 if (cost_data->loop_info)
5427 rs6000_density_test (cost_data);
5428
5429 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5430 that require versioning for any reason. The vectorization is at
5431 best a wash inside the loop, and the versioning checks make
5432 profitability highly unlikely and potentially quite harmful. */
5433 if (cost_data->loop_info)
5434 {
5435 loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
5436 if (!rs6000_vect_nonmem
5437 && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
5438 && LOOP_REQUIRES_VERSIONING (vec_info))
5439 cost_data->cost[vect_body] += 10000;
5440 }
5441
5442 *prologue_cost = cost_data->cost[vect_prologue];
5443 *body_cost = cost_data->cost[vect_body];
5444 *epilogue_cost = cost_data->cost[vect_epilogue];
5445 }
5446
5447 /* Implement targetm.vectorize.destroy_cost_data. */
5448
5449 static void
5450 rs6000_destroy_cost_data (void *data)
5451 {
5452 free (data);
5453 }
5454
5455 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5456 library with vectorized intrinsics. */
5457
5458 static tree
5459 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5460 tree type_in)
5461 {
5462 char name[32];
5463 const char *suffix = NULL;
5464 tree fntype, new_fndecl, bdecl = NULL_TREE;
5465 int n_args = 1;
5466 const char *bname;
5467 machine_mode el_mode, in_mode;
5468 int n, in_n;
5469
5470 /* Libmass is suitable for unsafe math only as it does not correctly support
5471 parts of IEEE with the required precision such as denormals. Only support
5472 it if we have VSX to use the simd d2 or f4 functions.
5473 XXX: Add variable length support. */
5474 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5475 return NULL_TREE;
5476
5477 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5478 n = TYPE_VECTOR_SUBPARTS (type_out);
5479 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5480 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5481 if (el_mode != in_mode
5482 || n != in_n)
5483 return NULL_TREE;
5484
5485 switch (fn)
5486 {
5487 CASE_CFN_ATAN2:
5488 CASE_CFN_HYPOT:
5489 CASE_CFN_POW:
5490 n_args = 2;
5491 gcc_fallthrough ();
5492
5493 CASE_CFN_ACOS:
5494 CASE_CFN_ACOSH:
5495 CASE_CFN_ASIN:
5496 CASE_CFN_ASINH:
5497 CASE_CFN_ATAN:
5498 CASE_CFN_ATANH:
5499 CASE_CFN_CBRT:
5500 CASE_CFN_COS:
5501 CASE_CFN_COSH:
5502 CASE_CFN_ERF:
5503 CASE_CFN_ERFC:
5504 CASE_CFN_EXP2:
5505 CASE_CFN_EXP:
5506 CASE_CFN_EXPM1:
5507 CASE_CFN_LGAMMA:
5508 CASE_CFN_LOG10:
5509 CASE_CFN_LOG1P:
5510 CASE_CFN_LOG2:
5511 CASE_CFN_LOG:
5512 CASE_CFN_SIN:
5513 CASE_CFN_SINH:
5514 CASE_CFN_SQRT:
5515 CASE_CFN_TAN:
5516 CASE_CFN_TANH:
5517 if (el_mode == DFmode && n == 2)
5518 {
5519 bdecl = mathfn_built_in (double_type_node, fn);
5520 suffix = "d2"; /* pow -> powd2 */
5521 }
5522 else if (el_mode == SFmode && n == 4)
5523 {
5524 bdecl = mathfn_built_in (float_type_node, fn);
5525 suffix = "4"; /* powf -> powf4 */
5526 }
5527 else
5528 return NULL_TREE;
5529 if (!bdecl)
5530 return NULL_TREE;
5531 break;
5532
5533 default:
5534 return NULL_TREE;
5535 }
5536
5537 gcc_assert (suffix != NULL);
5538 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5539 if (!bname)
5540 return NULL_TREE;
5541
5542 strcpy (name, bname + sizeof ("__builtin_") - 1);
5543 strcat (name, suffix);
5544
5545 if (n_args == 1)
5546 fntype = build_function_type_list (type_out, type_in, NULL);
5547 else if (n_args == 2)
5548 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5549 else
5550 gcc_unreachable ();
5551
5552 /* Build a function declaration for the vectorized function. */
5553 new_fndecl = build_decl (BUILTINS_LOCATION,
5554 FUNCTION_DECL, get_identifier (name), fntype);
5555 TREE_PUBLIC (new_fndecl) = 1;
5556 DECL_EXTERNAL (new_fndecl) = 1;
5557 DECL_IS_NOVOPS (new_fndecl) = 1;
5558 TREE_READONLY (new_fndecl) = 1;
5559
5560 return new_fndecl;
5561 }
5562
5563 /* Returns a function decl for a vectorized version of the builtin function
5564 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5565 if it is not available. */
5566
5567 static tree
5568 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5569 tree type_in)
5570 {
5571 machine_mode in_mode, out_mode;
5572 int in_n, out_n;
5573
5574 if (TARGET_DEBUG_BUILTIN)
5575 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5576 combined_fn_name (combined_fn (fn)),
5577 GET_MODE_NAME (TYPE_MODE (type_out)),
5578 GET_MODE_NAME (TYPE_MODE (type_in)));
5579
5580 if (TREE_CODE (type_out) != VECTOR_TYPE
5581 || TREE_CODE (type_in) != VECTOR_TYPE)
5582 return NULL_TREE;
5583
5584 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5585 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5586 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5587 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5588
5589 switch (fn)
5590 {
5591 CASE_CFN_COPYSIGN:
5592 if (VECTOR_UNIT_VSX_P (V2DFmode)
5593 && out_mode == DFmode && out_n == 2
5594 && in_mode == DFmode && in_n == 2)
5595 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5596 if (VECTOR_UNIT_VSX_P (V4SFmode)
5597 && out_mode == SFmode && out_n == 4
5598 && in_mode == SFmode && in_n == 4)
5599 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5600 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5601 && out_mode == SFmode && out_n == 4
5602 && in_mode == SFmode && in_n == 4)
5603 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5604 break;
5605 CASE_CFN_CEIL:
5606 if (VECTOR_UNIT_VSX_P (V2DFmode)
5607 && out_mode == DFmode && out_n == 2
5608 && in_mode == DFmode && in_n == 2)
5609 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5610 if (VECTOR_UNIT_VSX_P (V4SFmode)
5611 && out_mode == SFmode && out_n == 4
5612 && in_mode == SFmode && in_n == 4)
5613 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5614 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5615 && out_mode == SFmode && out_n == 4
5616 && in_mode == SFmode && in_n == 4)
5617 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5618 break;
5619 CASE_CFN_FLOOR:
5620 if (VECTOR_UNIT_VSX_P (V2DFmode)
5621 && out_mode == DFmode && out_n == 2
5622 && in_mode == DFmode && in_n == 2)
5623 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5624 if (VECTOR_UNIT_VSX_P (V4SFmode)
5625 && out_mode == SFmode && out_n == 4
5626 && in_mode == SFmode && in_n == 4)
5627 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5628 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5629 && out_mode == SFmode && out_n == 4
5630 && in_mode == SFmode && in_n == 4)
5631 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5632 break;
5633 CASE_CFN_FMA:
5634 if (VECTOR_UNIT_VSX_P (V2DFmode)
5635 && out_mode == DFmode && out_n == 2
5636 && in_mode == DFmode && in_n == 2)
5637 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5638 if (VECTOR_UNIT_VSX_P (V4SFmode)
5639 && out_mode == SFmode && out_n == 4
5640 && in_mode == SFmode && in_n == 4)
5641 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5642 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5643 && out_mode == SFmode && out_n == 4
5644 && in_mode == SFmode && in_n == 4)
5645 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5646 break;
5647 CASE_CFN_TRUNC:
5648 if (VECTOR_UNIT_VSX_P (V2DFmode)
5649 && out_mode == DFmode && out_n == 2
5650 && in_mode == DFmode && in_n == 2)
5651 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5652 if (VECTOR_UNIT_VSX_P (V4SFmode)
5653 && out_mode == SFmode && out_n == 4
5654 && in_mode == SFmode && in_n == 4)
5655 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5656 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5657 && out_mode == SFmode && out_n == 4
5658 && in_mode == SFmode && in_n == 4)
5659 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5660 break;
5661 CASE_CFN_NEARBYINT:
5662 if (VECTOR_UNIT_VSX_P (V2DFmode)
5663 && flag_unsafe_math_optimizations
5664 && out_mode == DFmode && out_n == 2
5665 && in_mode == DFmode && in_n == 2)
5666 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5667 if (VECTOR_UNIT_VSX_P (V4SFmode)
5668 && flag_unsafe_math_optimizations
5669 && out_mode == SFmode && out_n == 4
5670 && in_mode == SFmode && in_n == 4)
5671 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5672 break;
5673 CASE_CFN_RINT:
5674 if (VECTOR_UNIT_VSX_P (V2DFmode)
5675 && !flag_trapping_math
5676 && out_mode == DFmode && out_n == 2
5677 && in_mode == DFmode && in_n == 2)
5678 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5679 if (VECTOR_UNIT_VSX_P (V4SFmode)
5680 && !flag_trapping_math
5681 && out_mode == SFmode && out_n == 4
5682 && in_mode == SFmode && in_n == 4)
5683 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5684 break;
5685 default:
5686 break;
5687 }
5688
5689 /* Generate calls to libmass if appropriate. */
5690 if (rs6000_veclib_handler)
5691 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5692
5693 return NULL_TREE;
5694 }
5695
5696 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5697
5698 static tree
5699 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5700 tree type_in)
5701 {
5702 machine_mode in_mode, out_mode;
5703 int in_n, out_n;
5704
5705 if (TARGET_DEBUG_BUILTIN)
5706 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5707 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5708 GET_MODE_NAME (TYPE_MODE (type_out)),
5709 GET_MODE_NAME (TYPE_MODE (type_in)));
5710
5711 if (TREE_CODE (type_out) != VECTOR_TYPE
5712 || TREE_CODE (type_in) != VECTOR_TYPE)
5713 return NULL_TREE;
5714
5715 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5716 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5717 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5718 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5719
5720 enum rs6000_builtins fn
5721 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
5722 switch (fn)
5723 {
5724 case RS6000_BUILTIN_RSQRTF:
5725 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5726 && out_mode == SFmode && out_n == 4
5727 && in_mode == SFmode && in_n == 4)
5728 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5729 break;
5730 case RS6000_BUILTIN_RSQRT:
5731 if (VECTOR_UNIT_VSX_P (V2DFmode)
5732 && out_mode == DFmode && out_n == 2
5733 && in_mode == DFmode && in_n == 2)
5734 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5735 break;
5736 case RS6000_BUILTIN_RECIPF:
5737 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5738 && out_mode == SFmode && out_n == 4
5739 && in_mode == SFmode && in_n == 4)
5740 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5741 break;
5742 case RS6000_BUILTIN_RECIP:
5743 if (VECTOR_UNIT_VSX_P (V2DFmode)
5744 && out_mode == DFmode && out_n == 2
5745 && in_mode == DFmode && in_n == 2)
5746 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5747 break;
5748 default:
5749 break;
5750 }
5751 return NULL_TREE;
5752 }
5753 \f
5754 /* Default CPU string for rs6000*_file_start functions. */
5755 static const char *rs6000_default_cpu;
5756
5757 /* Do anything needed at the start of the asm file. */
5758
5759 static void
5760 rs6000_file_start (void)
5761 {
5762 char buffer[80];
5763 const char *start = buffer;
5764 FILE *file = asm_out_file;
5765
5766 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5767
5768 default_file_start ();
5769
5770 if (flag_verbose_asm)
5771 {
5772 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5773
5774 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5775 {
5776 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5777 start = "";
5778 }
5779
5780 if (global_options_set.x_rs6000_cpu_index)
5781 {
5782 fprintf (file, "%s -mcpu=%s", start,
5783 processor_target_table[rs6000_cpu_index].name);
5784 start = "";
5785 }
5786
5787 if (global_options_set.x_rs6000_tune_index)
5788 {
5789 fprintf (file, "%s -mtune=%s", start,
5790 processor_target_table[rs6000_tune_index].name);
5791 start = "";
5792 }
5793
5794 if (PPC405_ERRATUM77)
5795 {
5796 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5797 start = "";
5798 }
5799
5800 #ifdef USING_ELFOS_H
5801 switch (rs6000_sdata)
5802 {
5803 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5804 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5805 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5806 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5807 }
5808
5809 if (rs6000_sdata && g_switch_value)
5810 {
5811 fprintf (file, "%s -G %d", start,
5812 g_switch_value);
5813 start = "";
5814 }
5815 #endif
5816
5817 if (*start == '\0')
5818 putc ('\n', file);
5819 }
5820
5821 #ifdef USING_ELFOS_H
5822 if (!(rs6000_default_cpu && rs6000_default_cpu[0])
5823 && !global_options_set.x_rs6000_cpu_index)
5824 {
5825 fputs ("\t.machine ", asm_out_file);
5826 if ((rs6000_isa_flags & OPTION_MASK_MODULO) != 0)
5827 fputs ("power9\n", asm_out_file);
5828 else if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0)
5829 fputs ("power8\n", asm_out_file);
5830 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0)
5831 fputs ("power7\n", asm_out_file);
5832 else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0)
5833 fputs ("power6\n", asm_out_file);
5834 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0)
5835 fputs ("power5\n", asm_out_file);
5836 else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0)
5837 fputs ("power4\n", asm_out_file);
5838 else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
5839 fputs ("ppc64\n", asm_out_file);
5840 else
5841 fputs ("ppc\n", asm_out_file);
5842 }
5843 #endif
5844
5845 if (DEFAULT_ABI == ABI_ELFv2)
5846 fprintf (file, "\t.abiversion 2\n");
5847 }
5848
5849 \f
5850 /* Return nonzero if this function is known to have a null epilogue. */
5851
5852 int
5853 direct_return (void)
5854 {
5855 if (reload_completed)
5856 {
5857 rs6000_stack_t *info = rs6000_stack_info ();
5858
5859 if (info->first_gp_reg_save == 32
5860 && info->first_fp_reg_save == 64
5861 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5862 && ! info->lr_save_p
5863 && ! info->cr_save_p
5864 && info->vrsave_size == 0
5865 && ! info->push_p)
5866 return 1;
5867 }
5868
5869 return 0;
5870 }
5871
5872 /* Return the number of instructions it takes to form a constant in an
5873 integer register. */
5874
5875 int
5876 num_insns_constant_wide (HOST_WIDE_INT value)
5877 {
5878 /* signed constant loadable with addi */
5879 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
5880 return 1;
5881
5882 /* constant loadable with addis */
5883 else if ((value & 0xffff) == 0
5884 && (value >> 31 == -1 || value >> 31 == 0))
5885 return 1;
5886
5887 else if (TARGET_POWERPC64)
5888 {
5889 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5890 HOST_WIDE_INT high = value >> 31;
5891
5892 if (high == 0 || high == -1)
5893 return 2;
5894
5895 high >>= 1;
5896
5897 if (low == 0)
5898 return num_insns_constant_wide (high) + 1;
5899 else if (high == 0)
5900 return num_insns_constant_wide (low) + 1;
5901 else
5902 return (num_insns_constant_wide (high)
5903 + num_insns_constant_wide (low) + 1);
5904 }
5905
5906 else
5907 return 2;
5908 }
5909
5910 int
5911 num_insns_constant (rtx op, machine_mode mode)
5912 {
5913 HOST_WIDE_INT low, high;
5914
5915 switch (GET_CODE (op))
5916 {
5917 case CONST_INT:
5918 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
5919 && rs6000_is_valid_and_mask (op, mode))
5920 return 2;
5921 else
5922 return num_insns_constant_wide (INTVAL (op));
5923
5924 case CONST_WIDE_INT:
5925 {
5926 int i;
5927 int ins = CONST_WIDE_INT_NUNITS (op) - 1;
5928 for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5929 ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
5930 return ins;
5931 }
5932
5933 case CONST_DOUBLE:
5934 if (mode == SFmode || mode == SDmode)
5935 {
5936 long l;
5937
5938 if (DECIMAL_FLOAT_MODE_P (mode))
5939 REAL_VALUE_TO_TARGET_DECIMAL32
5940 (*CONST_DOUBLE_REAL_VALUE (op), l);
5941 else
5942 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), l);
5943 return num_insns_constant_wide ((HOST_WIDE_INT) l);
5944 }
5945
5946 long l[2];
5947 if (DECIMAL_FLOAT_MODE_P (mode))
5948 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (op), l);
5949 else
5950 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op), l);
5951 high = l[WORDS_BIG_ENDIAN == 0];
5952 low = l[WORDS_BIG_ENDIAN != 0];
5953
5954 if (TARGET_32BIT)
5955 return (num_insns_constant_wide (low)
5956 + num_insns_constant_wide (high));
5957 else
5958 {
5959 if ((high == 0 && low >= 0)
5960 || (high == -1 && low < 0))
5961 return num_insns_constant_wide (low);
5962
5963 else if (rs6000_is_valid_and_mask (op, mode))
5964 return 2;
5965
5966 else if (low == 0)
5967 return num_insns_constant_wide (high) + 1;
5968
5969 else
5970 return (num_insns_constant_wide (high)
5971 + num_insns_constant_wide (low) + 1);
5972 }
5973
5974 default:
5975 gcc_unreachable ();
5976 }
5977 }
5978
5979 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5980 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5981 corresponding element of the vector, but for V4SFmode, the
5982 corresponding "float" is interpreted as an SImode integer. */
5983
5984 HOST_WIDE_INT
5985 const_vector_elt_as_int (rtx op, unsigned int elt)
5986 {
5987 rtx tmp;
5988
5989 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5990 gcc_assert (GET_MODE (op) != V2DImode
5991 && GET_MODE (op) != V2DFmode);
5992
5993 tmp = CONST_VECTOR_ELT (op, elt);
5994 if (GET_MODE (op) == V4SFmode)
5995 tmp = gen_lowpart (SImode, tmp);
5996 return INTVAL (tmp);
5997 }
5998
5999 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6000 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6001 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6002 all items are set to the same value and contain COPIES replicas of the
6003 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6004 operand and the others are set to the value of the operand's msb. */
6005
6006 static bool
6007 vspltis_constant (rtx op, unsigned step, unsigned copies)
6008 {
6009 machine_mode mode = GET_MODE (op);
6010 machine_mode inner = GET_MODE_INNER (mode);
6011
6012 unsigned i;
6013 unsigned nunits;
6014 unsigned bitsize;
6015 unsigned mask;
6016
6017 HOST_WIDE_INT val;
6018 HOST_WIDE_INT splat_val;
6019 HOST_WIDE_INT msb_val;
6020
6021 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6022 return false;
6023
6024 nunits = GET_MODE_NUNITS (mode);
6025 bitsize = GET_MODE_BITSIZE (inner);
6026 mask = GET_MODE_MASK (inner);
6027
6028 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6029 splat_val = val;
6030 msb_val = val >= 0 ? 0 : -1;
6031
6032 /* Construct the value to be splatted, if possible. If not, return 0. */
6033 for (i = 2; i <= copies; i *= 2)
6034 {
6035 HOST_WIDE_INT small_val;
6036 bitsize /= 2;
6037 small_val = splat_val >> bitsize;
6038 mask >>= bitsize;
6039 if (splat_val != ((HOST_WIDE_INT)
6040 ((unsigned HOST_WIDE_INT) small_val << bitsize)
6041 | (small_val & mask)))
6042 return false;
6043 splat_val = small_val;
6044 }
6045
6046 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6047 if (EASY_VECTOR_15 (splat_val))
6048 ;
6049
6050 /* Also check if we can splat, and then add the result to itself. Do so if
6051 the value is positive, of if the splat instruction is using OP's mode;
6052 for splat_val < 0, the splat and the add should use the same mode. */
6053 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6054 && (splat_val >= 0 || (step == 1 && copies == 1)))
6055 ;
6056
6057 /* Also check if are loading up the most significant bit which can be done by
6058 loading up -1 and shifting the value left by -1. */
6059 else if (EASY_VECTOR_MSB (splat_val, inner))
6060 ;
6061
6062 else
6063 return false;
6064
6065 /* Check if VAL is present in every STEP-th element, and the
6066 other elements are filled with its most significant bit. */
6067 for (i = 1; i < nunits; ++i)
6068 {
6069 HOST_WIDE_INT desired_val;
6070 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6071 if ((i & (step - 1)) == 0)
6072 desired_val = val;
6073 else
6074 desired_val = msb_val;
6075
6076 if (desired_val != const_vector_elt_as_int (op, elt))
6077 return false;
6078 }
6079
6080 return true;
6081 }
6082
6083 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6084 instruction, filling in the bottom elements with 0 or -1.
6085
6086 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6087 for the number of zeroes to shift in, or negative for the number of 0xff
6088 bytes to shift in.
6089
6090 OP is a CONST_VECTOR. */
6091
6092 int
6093 vspltis_shifted (rtx op)
6094 {
6095 machine_mode mode = GET_MODE (op);
6096 machine_mode inner = GET_MODE_INNER (mode);
6097
6098 unsigned i, j;
6099 unsigned nunits;
6100 unsigned mask;
6101
6102 HOST_WIDE_INT val;
6103
6104 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6105 return false;
6106
6107 /* We need to create pseudo registers to do the shift, so don't recognize
6108 shift vector constants after reload. */
6109 if (!can_create_pseudo_p ())
6110 return false;
6111
6112 nunits = GET_MODE_NUNITS (mode);
6113 mask = GET_MODE_MASK (inner);
6114
6115 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6116
6117 /* Check if the value can really be the operand of a vspltis[bhw]. */
6118 if (EASY_VECTOR_15 (val))
6119 ;
6120
6121 /* Also check if we are loading up the most significant bit which can be done
6122 by loading up -1 and shifting the value left by -1. */
6123 else if (EASY_VECTOR_MSB (val, inner))
6124 ;
6125
6126 else
6127 return 0;
6128
6129 /* Check if VAL is present in every STEP-th element until we find elements
6130 that are 0 or all 1 bits. */
6131 for (i = 1; i < nunits; ++i)
6132 {
6133 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6134 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6135
6136 /* If the value isn't the splat value, check for the remaining elements
6137 being 0/-1. */
6138 if (val != elt_val)
6139 {
6140 if (elt_val == 0)
6141 {
6142 for (j = i+1; j < nunits; ++j)
6143 {
6144 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6145 if (const_vector_elt_as_int (op, elt2) != 0)
6146 return 0;
6147 }
6148
6149 return (nunits - i) * GET_MODE_SIZE (inner);
6150 }
6151
6152 else if ((elt_val & mask) == mask)
6153 {
6154 for (j = i+1; j < nunits; ++j)
6155 {
6156 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6157 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6158 return 0;
6159 }
6160
6161 return -((nunits - i) * GET_MODE_SIZE (inner));
6162 }
6163
6164 else
6165 return 0;
6166 }
6167 }
6168
6169 /* If all elements are equal, we don't need to do VLSDOI. */
6170 return 0;
6171 }
6172
6173
6174 /* Return true if OP is of the given MODE and can be synthesized
6175 with a vspltisb, vspltish or vspltisw. */
6176
6177 bool
6178 easy_altivec_constant (rtx op, machine_mode mode)
6179 {
6180 unsigned step, copies;
6181
6182 if (mode == VOIDmode)
6183 mode = GET_MODE (op);
6184 else if (mode != GET_MODE (op))
6185 return false;
6186
6187 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6188 constants. */
6189 if (mode == V2DFmode)
6190 return zero_constant (op, mode);
6191
6192 else if (mode == V2DImode)
6193 {
6194 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
6195 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
6196 return false;
6197
6198 if (zero_constant (op, mode))
6199 return true;
6200
6201 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6202 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6203 return true;
6204
6205 return false;
6206 }
6207
6208 /* V1TImode is a special container for TImode. Ignore for now. */
6209 else if (mode == V1TImode)
6210 return false;
6211
6212 /* Start with a vspltisw. */
6213 step = GET_MODE_NUNITS (mode) / 4;
6214 copies = 1;
6215
6216 if (vspltis_constant (op, step, copies))
6217 return true;
6218
6219 /* Then try with a vspltish. */
6220 if (step == 1)
6221 copies <<= 1;
6222 else
6223 step >>= 1;
6224
6225 if (vspltis_constant (op, step, copies))
6226 return true;
6227
6228 /* And finally a vspltisb. */
6229 if (step == 1)
6230 copies <<= 1;
6231 else
6232 step >>= 1;
6233
6234 if (vspltis_constant (op, step, copies))
6235 return true;
6236
6237 if (vspltis_shifted (op) != 0)
6238 return true;
6239
6240 return false;
6241 }
6242
6243 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6244 result is OP. Abort if it is not possible. */
6245
6246 rtx
6247 gen_easy_altivec_constant (rtx op)
6248 {
6249 machine_mode mode = GET_MODE (op);
6250 int nunits = GET_MODE_NUNITS (mode);
6251 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6252 unsigned step = nunits / 4;
6253 unsigned copies = 1;
6254
6255 /* Start with a vspltisw. */
6256 if (vspltis_constant (op, step, copies))
6257 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6258
6259 /* Then try with a vspltish. */
6260 if (step == 1)
6261 copies <<= 1;
6262 else
6263 step >>= 1;
6264
6265 if (vspltis_constant (op, step, copies))
6266 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6267
6268 /* And finally a vspltisb. */
6269 if (step == 1)
6270 copies <<= 1;
6271 else
6272 step >>= 1;
6273
6274 if (vspltis_constant (op, step, copies))
6275 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6276
6277 gcc_unreachable ();
6278 }
6279
6280 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6281 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6282
6283 Return the number of instructions needed (1 or 2) into the address pointed
6284 via NUM_INSNS_PTR.
6285
6286 Return the constant that is being split via CONSTANT_PTR. */
6287
6288 bool
6289 xxspltib_constant_p (rtx op,
6290 machine_mode mode,
6291 int *num_insns_ptr,
6292 int *constant_ptr)
6293 {
6294 size_t nunits = GET_MODE_NUNITS (mode);
6295 size_t i;
6296 HOST_WIDE_INT value;
6297 rtx element;
6298
6299 /* Set the returned values to out of bound values. */
6300 *num_insns_ptr = -1;
6301 *constant_ptr = 256;
6302
6303 if (!TARGET_P9_VECTOR)
6304 return false;
6305
6306 if (mode == VOIDmode)
6307 mode = GET_MODE (op);
6308
6309 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6310 return false;
6311
6312 /* Handle (vec_duplicate <constant>). */
6313 if (GET_CODE (op) == VEC_DUPLICATE)
6314 {
6315 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6316 && mode != V2DImode)
6317 return false;
6318
6319 element = XEXP (op, 0);
6320 if (!CONST_INT_P (element))
6321 return false;
6322
6323 value = INTVAL (element);
6324 if (!IN_RANGE (value, -128, 127))
6325 return false;
6326 }
6327
6328 /* Handle (const_vector [...]). */
6329 else if (GET_CODE (op) == CONST_VECTOR)
6330 {
6331 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6332 && mode != V2DImode)
6333 return false;
6334
6335 element = CONST_VECTOR_ELT (op, 0);
6336 if (!CONST_INT_P (element))
6337 return false;
6338
6339 value = INTVAL (element);
6340 if (!IN_RANGE (value, -128, 127))
6341 return false;
6342
6343 for (i = 1; i < nunits; i++)
6344 {
6345 element = CONST_VECTOR_ELT (op, i);
6346 if (!CONST_INT_P (element))
6347 return false;
6348
6349 if (value != INTVAL (element))
6350 return false;
6351 }
6352 }
6353
6354 /* Handle integer constants being loaded into the upper part of the VSX
6355 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6356 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6357 else if (CONST_INT_P (op))
6358 {
6359 if (!SCALAR_INT_MODE_P (mode))
6360 return false;
6361
6362 value = INTVAL (op);
6363 if (!IN_RANGE (value, -128, 127))
6364 return false;
6365
6366 if (!IN_RANGE (value, -1, 0))
6367 {
6368 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6369 return false;
6370
6371 if (EASY_VECTOR_15 (value))
6372 return false;
6373 }
6374 }
6375
6376 else
6377 return false;
6378
6379 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6380 sign extend. Special case 0/-1 to allow getting any VSX register instead
6381 of an Altivec register. */
6382 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6383 && EASY_VECTOR_15 (value))
6384 return false;
6385
6386 /* Return # of instructions and the constant byte for XXSPLTIB. */
6387 if (mode == V16QImode)
6388 *num_insns_ptr = 1;
6389
6390 else if (IN_RANGE (value, -1, 0))
6391 *num_insns_ptr = 1;
6392
6393 else
6394 *num_insns_ptr = 2;
6395
6396 *constant_ptr = (int) value;
6397 return true;
6398 }
6399
6400 const char *
6401 output_vec_const_move (rtx *operands)
6402 {
6403 int shift;
6404 machine_mode mode;
6405 rtx dest, vec;
6406
6407 dest = operands[0];
6408 vec = operands[1];
6409 mode = GET_MODE (dest);
6410
6411 if (TARGET_VSX)
6412 {
6413 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6414 int xxspltib_value = 256;
6415 int num_insns = -1;
6416
6417 if (zero_constant (vec, mode))
6418 {
6419 if (TARGET_P9_VECTOR)
6420 return "xxspltib %x0,0";
6421
6422 else if (dest_vmx_p)
6423 return "vspltisw %0,0";
6424
6425 else
6426 return "xxlxor %x0,%x0,%x0";
6427 }
6428
6429 if (all_ones_constant (vec, mode))
6430 {
6431 if (TARGET_P9_VECTOR)
6432 return "xxspltib %x0,255";
6433
6434 else if (dest_vmx_p)
6435 return "vspltisw %0,-1";
6436
6437 else if (TARGET_P8_VECTOR)
6438 return "xxlorc %x0,%x0,%x0";
6439
6440 else
6441 gcc_unreachable ();
6442 }
6443
6444 if (TARGET_P9_VECTOR
6445 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6446 {
6447 if (num_insns == 1)
6448 {
6449 operands[2] = GEN_INT (xxspltib_value & 0xff);
6450 return "xxspltib %x0,%2";
6451 }
6452
6453 return "#";
6454 }
6455 }
6456
6457 if (TARGET_ALTIVEC)
6458 {
6459 rtx splat_vec;
6460
6461 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6462 if (zero_constant (vec, mode))
6463 return "vspltisw %0,0";
6464
6465 if (all_ones_constant (vec, mode))
6466 return "vspltisw %0,-1";
6467
6468 /* Do we need to construct a value using VSLDOI? */
6469 shift = vspltis_shifted (vec);
6470 if (shift != 0)
6471 return "#";
6472
6473 splat_vec = gen_easy_altivec_constant (vec);
6474 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6475 operands[1] = XEXP (splat_vec, 0);
6476 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6477 return "#";
6478
6479 switch (GET_MODE (splat_vec))
6480 {
6481 case E_V4SImode:
6482 return "vspltisw %0,%1";
6483
6484 case E_V8HImode:
6485 return "vspltish %0,%1";
6486
6487 case E_V16QImode:
6488 return "vspltisb %0,%1";
6489
6490 default:
6491 gcc_unreachable ();
6492 }
6493 }
6494
6495 gcc_unreachable ();
6496 }
6497
6498 /* Initialize vector TARGET to VALS. */
6499
6500 void
6501 rs6000_expand_vector_init (rtx target, rtx vals)
6502 {
6503 machine_mode mode = GET_MODE (target);
6504 machine_mode inner_mode = GET_MODE_INNER (mode);
6505 int n_elts = GET_MODE_NUNITS (mode);
6506 int n_var = 0, one_var = -1;
6507 bool all_same = true, all_const_zero = true;
6508 rtx x, mem;
6509 int i;
6510
6511 for (i = 0; i < n_elts; ++i)
6512 {
6513 x = XVECEXP (vals, 0, i);
6514 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6515 ++n_var, one_var = i;
6516 else if (x != CONST0_RTX (inner_mode))
6517 all_const_zero = false;
6518
6519 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6520 all_same = false;
6521 }
6522
6523 if (n_var == 0)
6524 {
6525 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6526 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6527 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6528 {
6529 /* Zero register. */
6530 emit_move_insn (target, CONST0_RTX (mode));
6531 return;
6532 }
6533 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6534 {
6535 /* Splat immediate. */
6536 emit_insn (gen_rtx_SET (target, const_vec));
6537 return;
6538 }
6539 else
6540 {
6541 /* Load from constant pool. */
6542 emit_move_insn (target, const_vec);
6543 return;
6544 }
6545 }
6546
6547 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6548 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6549 {
6550 rtx op[2];
6551 size_t i;
6552 size_t num_elements = all_same ? 1 : 2;
6553 for (i = 0; i < num_elements; i++)
6554 {
6555 op[i] = XVECEXP (vals, 0, i);
6556 /* Just in case there is a SUBREG with a smaller mode, do a
6557 conversion. */
6558 if (GET_MODE (op[i]) != inner_mode)
6559 {
6560 rtx tmp = gen_reg_rtx (inner_mode);
6561 convert_move (tmp, op[i], 0);
6562 op[i] = tmp;
6563 }
6564 /* Allow load with splat double word. */
6565 else if (MEM_P (op[i]))
6566 {
6567 if (!all_same)
6568 op[i] = force_reg (inner_mode, op[i]);
6569 }
6570 else if (!REG_P (op[i]))
6571 op[i] = force_reg (inner_mode, op[i]);
6572 }
6573
6574 if (all_same)
6575 {
6576 if (mode == V2DFmode)
6577 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6578 else
6579 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6580 }
6581 else
6582 {
6583 if (mode == V2DFmode)
6584 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6585 else
6586 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6587 }
6588 return;
6589 }
6590
6591 /* Special case initializing vector int if we are on 64-bit systems with
6592 direct move or we have the ISA 3.0 instructions. */
6593 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6594 && TARGET_DIRECT_MOVE_64BIT)
6595 {
6596 if (all_same)
6597 {
6598 rtx element0 = XVECEXP (vals, 0, 0);
6599 if (MEM_P (element0))
6600 element0 = rs6000_address_for_fpconvert (element0);
6601 else
6602 element0 = force_reg (SImode, element0);
6603
6604 if (TARGET_P9_VECTOR)
6605 emit_insn (gen_vsx_splat_v4si (target, element0));
6606 else
6607 {
6608 rtx tmp = gen_reg_rtx (DImode);
6609 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6610 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6611 }
6612 return;
6613 }
6614 else
6615 {
6616 rtx elements[4];
6617 size_t i;
6618
6619 for (i = 0; i < 4; i++)
6620 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
6621
6622 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6623 elements[2], elements[3]));
6624 return;
6625 }
6626 }
6627
6628 /* With single precision floating point on VSX, know that internally single
6629 precision is actually represented as a double, and either make 2 V2DF
6630 vectors, and convert these vectors to single precision, or do one
6631 conversion, and splat the result to the other elements. */
6632 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6633 {
6634 if (all_same)
6635 {
6636 rtx element0 = XVECEXP (vals, 0, 0);
6637
6638 if (TARGET_P9_VECTOR)
6639 {
6640 if (MEM_P (element0))
6641 element0 = rs6000_address_for_fpconvert (element0);
6642
6643 emit_insn (gen_vsx_splat_v4sf (target, element0));
6644 }
6645
6646 else
6647 {
6648 rtx freg = gen_reg_rtx (V4SFmode);
6649 rtx sreg = force_reg (SFmode, element0);
6650 rtx cvt = (TARGET_XSCVDPSPN
6651 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6652 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6653
6654 emit_insn (cvt);
6655 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6656 const0_rtx));
6657 }
6658 }
6659 else
6660 {
6661 rtx dbl_even = gen_reg_rtx (V2DFmode);
6662 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6663 rtx flt_even = gen_reg_rtx (V4SFmode);
6664 rtx flt_odd = gen_reg_rtx (V4SFmode);
6665 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6666 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6667 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6668 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6669
6670 /* Use VMRGEW if we can instead of doing a permute. */
6671 if (TARGET_P8_VECTOR)
6672 {
6673 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
6674 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
6675 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6676 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6677 if (BYTES_BIG_ENDIAN)
6678 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
6679 else
6680 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
6681 }
6682 else
6683 {
6684 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6685 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6686 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6687 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6688 rs6000_expand_extract_even (target, flt_even, flt_odd);
6689 }
6690 }
6691 return;
6692 }
6693
6694 /* Special case initializing vector short/char that are splats if we are on
6695 64-bit systems with direct move. */
6696 if (all_same && TARGET_DIRECT_MOVE_64BIT
6697 && (mode == V16QImode || mode == V8HImode))
6698 {
6699 rtx op0 = XVECEXP (vals, 0, 0);
6700 rtx di_tmp = gen_reg_rtx (DImode);
6701
6702 if (!REG_P (op0))
6703 op0 = force_reg (GET_MODE_INNER (mode), op0);
6704
6705 if (mode == V16QImode)
6706 {
6707 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
6708 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
6709 return;
6710 }
6711
6712 if (mode == V8HImode)
6713 {
6714 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
6715 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
6716 return;
6717 }
6718 }
6719
6720 /* Store value to stack temp. Load vector element. Splat. However, splat
6721 of 64-bit items is not supported on Altivec. */
6722 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6723 {
6724 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6725 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6726 XVECEXP (vals, 0, 0));
6727 x = gen_rtx_UNSPEC (VOIDmode,
6728 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6729 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6730 gen_rtvec (2,
6731 gen_rtx_SET (target, mem),
6732 x)));
6733 x = gen_rtx_VEC_SELECT (inner_mode, target,
6734 gen_rtx_PARALLEL (VOIDmode,
6735 gen_rtvec (1, const0_rtx)));
6736 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6737 return;
6738 }
6739
6740 /* One field is non-constant. Load constant then overwrite
6741 varying field. */
6742 if (n_var == 1)
6743 {
6744 rtx copy = copy_rtx (vals);
6745
6746 /* Load constant part of vector, substitute neighboring value for
6747 varying element. */
6748 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6749 rs6000_expand_vector_init (target, copy);
6750
6751 /* Insert variable. */
6752 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
6753 return;
6754 }
6755
6756 /* Construct the vector in memory one field at a time
6757 and load the whole vector. */
6758 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6759 for (i = 0; i < n_elts; i++)
6760 emit_move_insn (adjust_address_nv (mem, inner_mode,
6761 i * GET_MODE_SIZE (inner_mode)),
6762 XVECEXP (vals, 0, i));
6763 emit_move_insn (target, mem);
6764 }
6765
6766 /* Set field ELT of TARGET to VAL. */
6767
6768 void
6769 rs6000_expand_vector_set (rtx target, rtx val, int elt)
6770 {
6771 machine_mode mode = GET_MODE (target);
6772 machine_mode inner_mode = GET_MODE_INNER (mode);
6773 rtx reg = gen_reg_rtx (mode);
6774 rtx mask, mem, x;
6775 int width = GET_MODE_SIZE (inner_mode);
6776 int i;
6777
6778 val = force_reg (GET_MODE (val), val);
6779
6780 if (VECTOR_MEM_VSX_P (mode))
6781 {
6782 rtx insn = NULL_RTX;
6783 rtx elt_rtx = GEN_INT (elt);
6784
6785 if (mode == V2DFmode)
6786 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
6787
6788 else if (mode == V2DImode)
6789 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
6790
6791 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
6792 {
6793 if (mode == V4SImode)
6794 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
6795 else if (mode == V8HImode)
6796 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
6797 else if (mode == V16QImode)
6798 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
6799 else if (mode == V4SFmode)
6800 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
6801 }
6802
6803 if (insn)
6804 {
6805 emit_insn (insn);
6806 return;
6807 }
6808 }
6809
6810 /* Simplify setting single element vectors like V1TImode. */
6811 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
6812 {
6813 emit_move_insn (target, gen_lowpart (mode, val));
6814 return;
6815 }
6816
6817 /* Load single variable value. */
6818 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6819 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
6820 x = gen_rtx_UNSPEC (VOIDmode,
6821 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6822 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6823 gen_rtvec (2,
6824 gen_rtx_SET (reg, mem),
6825 x)));
6826
6827 /* Linear sequence. */
6828 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
6829 for (i = 0; i < 16; ++i)
6830 XVECEXP (mask, 0, i) = GEN_INT (i);
6831
6832 /* Set permute mask to insert element into target. */
6833 for (i = 0; i < width; ++i)
6834 XVECEXP (mask, 0, elt*width + i)
6835 = GEN_INT (i + 0x10);
6836 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
6837
6838 if (BYTES_BIG_ENDIAN)
6839 x = gen_rtx_UNSPEC (mode,
6840 gen_rtvec (3, target, reg,
6841 force_reg (V16QImode, x)),
6842 UNSPEC_VPERM);
6843 else
6844 {
6845 if (TARGET_P9_VECTOR)
6846 x = gen_rtx_UNSPEC (mode,
6847 gen_rtvec (3, reg, target,
6848 force_reg (V16QImode, x)),
6849 UNSPEC_VPERMR);
6850 else
6851 {
6852 /* Invert selector. We prefer to generate VNAND on P8 so
6853 that future fusion opportunities can kick in, but must
6854 generate VNOR elsewhere. */
6855 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
6856 rtx iorx = (TARGET_P8_VECTOR
6857 ? gen_rtx_IOR (V16QImode, notx, notx)
6858 : gen_rtx_AND (V16QImode, notx, notx));
6859 rtx tmp = gen_reg_rtx (V16QImode);
6860 emit_insn (gen_rtx_SET (tmp, iorx));
6861
6862 /* Permute with operands reversed and adjusted selector. */
6863 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
6864 UNSPEC_VPERM);
6865 }
6866 }
6867
6868 emit_insn (gen_rtx_SET (target, x));
6869 }
6870
6871 /* Extract field ELT from VEC into TARGET. */
6872
6873 void
6874 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
6875 {
6876 machine_mode mode = GET_MODE (vec);
6877 machine_mode inner_mode = GET_MODE_INNER (mode);
6878 rtx mem;
6879
6880 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
6881 {
6882 switch (mode)
6883 {
6884 default:
6885 break;
6886 case E_V1TImode:
6887 gcc_assert (INTVAL (elt) == 0 && inner_mode == TImode);
6888 emit_move_insn (target, gen_lowpart (TImode, vec));
6889 break;
6890 case E_V2DFmode:
6891 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
6892 return;
6893 case E_V2DImode:
6894 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
6895 return;
6896 case E_V4SFmode:
6897 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
6898 return;
6899 case E_V16QImode:
6900 if (TARGET_DIRECT_MOVE_64BIT)
6901 {
6902 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
6903 return;
6904 }
6905 else
6906 break;
6907 case E_V8HImode:
6908 if (TARGET_DIRECT_MOVE_64BIT)
6909 {
6910 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
6911 return;
6912 }
6913 else
6914 break;
6915 case E_V4SImode:
6916 if (TARGET_DIRECT_MOVE_64BIT)
6917 {
6918 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
6919 return;
6920 }
6921 break;
6922 }
6923 }
6924 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
6925 && TARGET_DIRECT_MOVE_64BIT)
6926 {
6927 if (GET_MODE (elt) != DImode)
6928 {
6929 rtx tmp = gen_reg_rtx (DImode);
6930 convert_move (tmp, elt, 0);
6931 elt = tmp;
6932 }
6933 else if (!REG_P (elt))
6934 elt = force_reg (DImode, elt);
6935
6936 switch (mode)
6937 {
6938 case E_V2DFmode:
6939 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
6940 return;
6941
6942 case E_V2DImode:
6943 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
6944 return;
6945
6946 case E_V4SFmode:
6947 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
6948 return;
6949
6950 case E_V4SImode:
6951 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
6952 return;
6953
6954 case E_V8HImode:
6955 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
6956 return;
6957
6958 case E_V16QImode:
6959 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
6960 return;
6961
6962 default:
6963 gcc_unreachable ();
6964 }
6965 }
6966
6967 gcc_assert (CONST_INT_P (elt));
6968
6969 /* Allocate mode-sized buffer. */
6970 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6971
6972 emit_move_insn (mem, vec);
6973
6974 /* Add offset to field within buffer matching vector element. */
6975 mem = adjust_address_nv (mem, inner_mode,
6976 INTVAL (elt) * GET_MODE_SIZE (inner_mode));
6977
6978 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
6979 }
6980
6981 /* Helper function to return the register number of a RTX. */
6982 static inline int
6983 regno_or_subregno (rtx op)
6984 {
6985 if (REG_P (op))
6986 return REGNO (op);
6987 else if (SUBREG_P (op))
6988 return subreg_regno (op);
6989 else
6990 gcc_unreachable ();
6991 }
6992
6993 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
6994 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
6995 temporary (BASE_TMP) to fixup the address. Return the new memory address
6996 that is valid for reads or writes to a given register (SCALAR_REG). */
6997
6998 rtx
6999 rs6000_adjust_vec_address (rtx scalar_reg,
7000 rtx mem,
7001 rtx element,
7002 rtx base_tmp,
7003 machine_mode scalar_mode)
7004 {
7005 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7006 rtx addr = XEXP (mem, 0);
7007 rtx element_offset;
7008 rtx new_addr;
7009 bool valid_addr_p;
7010
7011 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7012 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7013
7014 /* Calculate what we need to add to the address to get the element
7015 address. */
7016 if (CONST_INT_P (element))
7017 element_offset = GEN_INT (INTVAL (element) * scalar_size);
7018 else
7019 {
7020 int byte_shift = exact_log2 (scalar_size);
7021 gcc_assert (byte_shift >= 0);
7022
7023 if (byte_shift == 0)
7024 element_offset = element;
7025
7026 else
7027 {
7028 if (TARGET_POWERPC64)
7029 emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift)));
7030 else
7031 emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift)));
7032
7033 element_offset = base_tmp;
7034 }
7035 }
7036
7037 /* Create the new address pointing to the element within the vector. If we
7038 are adding 0, we don't have to change the address. */
7039 if (element_offset == const0_rtx)
7040 new_addr = addr;
7041
7042 /* A simple indirect address can be converted into a reg + offset
7043 address. */
7044 else if (REG_P (addr) || SUBREG_P (addr))
7045 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7046
7047 /* Optimize D-FORM addresses with constant offset with a constant element, to
7048 include the element offset in the address directly. */
7049 else if (GET_CODE (addr) == PLUS)
7050 {
7051 rtx op0 = XEXP (addr, 0);
7052 rtx op1 = XEXP (addr, 1);
7053 rtx insn;
7054
7055 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7056 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7057 {
7058 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7059 rtx offset_rtx = GEN_INT (offset);
7060
7061 if (IN_RANGE (offset, -32768, 32767)
7062 && (scalar_size < 8 || (offset & 0x3) == 0))
7063 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7064 else
7065 {
7066 emit_move_insn (base_tmp, offset_rtx);
7067 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7068 }
7069 }
7070 else
7071 {
7072 bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1));
7073 bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset));
7074
7075 /* Note, ADDI requires the register being added to be a base
7076 register. If the register was R0, load it up into the temporary
7077 and do the add. */
7078 if (op1_reg_p
7079 && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO))
7080 {
7081 insn = gen_add3_insn (base_tmp, op1, element_offset);
7082 gcc_assert (insn != NULL_RTX);
7083 emit_insn (insn);
7084 }
7085
7086 else if (ele_reg_p
7087 && reg_or_subregno (element_offset) != FIRST_GPR_REGNO)
7088 {
7089 insn = gen_add3_insn (base_tmp, element_offset, op1);
7090 gcc_assert (insn != NULL_RTX);
7091 emit_insn (insn);
7092 }
7093
7094 else
7095 {
7096 emit_move_insn (base_tmp, op1);
7097 emit_insn (gen_add2_insn (base_tmp, element_offset));
7098 }
7099
7100 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7101 }
7102 }
7103
7104 else
7105 {
7106 emit_move_insn (base_tmp, addr);
7107 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7108 }
7109
7110 /* If we have a PLUS, we need to see whether the particular register class
7111 allows for D-FORM or X-FORM addressing. */
7112 if (GET_CODE (new_addr) == PLUS)
7113 {
7114 rtx op1 = XEXP (new_addr, 1);
7115 addr_mask_type addr_mask;
7116 int scalar_regno = regno_or_subregno (scalar_reg);
7117
7118 gcc_assert (scalar_regno < FIRST_PSEUDO_REGISTER);
7119 if (INT_REGNO_P (scalar_regno))
7120 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR];
7121
7122 else if (FP_REGNO_P (scalar_regno))
7123 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR];
7124
7125 else if (ALTIVEC_REGNO_P (scalar_regno))
7126 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX];
7127
7128 else
7129 gcc_unreachable ();
7130
7131 if (REG_P (op1) || SUBREG_P (op1))
7132 valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
7133 else
7134 valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
7135 }
7136
7137 else if (REG_P (new_addr) || SUBREG_P (new_addr))
7138 valid_addr_p = true;
7139
7140 else
7141 valid_addr_p = false;
7142
7143 if (!valid_addr_p)
7144 {
7145 emit_move_insn (base_tmp, new_addr);
7146 new_addr = base_tmp;
7147 }
7148
7149 return change_address (mem, scalar_mode, new_addr);
7150 }
7151
7152 /* Split a variable vec_extract operation into the component instructions. */
7153
7154 void
7155 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7156 rtx tmp_altivec)
7157 {
7158 machine_mode mode = GET_MODE (src);
7159 machine_mode scalar_mode = GET_MODE (dest);
7160 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7161 int byte_shift = exact_log2 (scalar_size);
7162
7163 gcc_assert (byte_shift >= 0);
7164
7165 /* If we are given a memory address, optimize to load just the element. We
7166 don't have to adjust the vector element number on little endian
7167 systems. */
7168 if (MEM_P (src))
7169 {
7170 gcc_assert (REG_P (tmp_gpr));
7171 emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element,
7172 tmp_gpr, scalar_mode));
7173 return;
7174 }
7175
7176 else if (REG_P (src) || SUBREG_P (src))
7177 {
7178 int bit_shift = byte_shift + 3;
7179 rtx element2;
7180 int dest_regno = regno_or_subregno (dest);
7181 int src_regno = regno_or_subregno (src);
7182 int element_regno = regno_or_subregno (element);
7183
7184 gcc_assert (REG_P (tmp_gpr));
7185
7186 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7187 a general purpose register. */
7188 if (TARGET_P9_VECTOR
7189 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
7190 && INT_REGNO_P (dest_regno)
7191 && ALTIVEC_REGNO_P (src_regno)
7192 && INT_REGNO_P (element_regno))
7193 {
7194 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7195 rtx element_si = gen_rtx_REG (SImode, element_regno);
7196
7197 if (mode == V16QImode)
7198 emit_insn (BYTES_BIG_ENDIAN
7199 ? gen_vextublx (dest_si, element_si, src)
7200 : gen_vextubrx (dest_si, element_si, src));
7201
7202 else if (mode == V8HImode)
7203 {
7204 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7205 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7206 emit_insn (BYTES_BIG_ENDIAN
7207 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7208 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7209 }
7210
7211
7212 else
7213 {
7214 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7215 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7216 emit_insn (BYTES_BIG_ENDIAN
7217 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7218 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7219 }
7220
7221 return;
7222 }
7223
7224
7225 gcc_assert (REG_P (tmp_altivec));
7226
7227 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7228 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7229 will shift the element into the upper position (adding 3 to convert a
7230 byte shift into a bit shift). */
7231 if (scalar_size == 8)
7232 {
7233 if (!BYTES_BIG_ENDIAN)
7234 {
7235 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7236 element2 = tmp_gpr;
7237 }
7238 else
7239 element2 = element;
7240
7241 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7242 bit. */
7243 emit_insn (gen_rtx_SET (tmp_gpr,
7244 gen_rtx_AND (DImode,
7245 gen_rtx_ASHIFT (DImode,
7246 element2,
7247 GEN_INT (6)),
7248 GEN_INT (64))));
7249 }
7250 else
7251 {
7252 if (!BYTES_BIG_ENDIAN)
7253 {
7254 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
7255
7256 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7257 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7258 element2 = tmp_gpr;
7259 }
7260 else
7261 element2 = element;
7262
7263 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7264 }
7265
7266 /* Get the value into the lower byte of the Altivec register where VSLO
7267 expects it. */
7268 if (TARGET_P9_VECTOR)
7269 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7270 else if (can_create_pseudo_p ())
7271 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7272 else
7273 {
7274 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7275 emit_move_insn (tmp_di, tmp_gpr);
7276 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7277 }
7278
7279 /* Do the VSLO to get the value into the final location. */
7280 switch (mode)
7281 {
7282 case E_V2DFmode:
7283 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7284 return;
7285
7286 case E_V2DImode:
7287 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7288 return;
7289
7290 case E_V4SFmode:
7291 {
7292 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7293 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7294 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7295 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7296 tmp_altivec));
7297
7298 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7299 return;
7300 }
7301
7302 case E_V4SImode:
7303 case E_V8HImode:
7304 case E_V16QImode:
7305 {
7306 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7307 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7308 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7309 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7310 tmp_altivec));
7311 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7312 emit_insn (gen_ashrdi3 (tmp_gpr_di, tmp_gpr_di,
7313 GEN_INT (64 - (8 * scalar_size))));
7314 return;
7315 }
7316
7317 default:
7318 gcc_unreachable ();
7319 }
7320
7321 return;
7322 }
7323 else
7324 gcc_unreachable ();
7325 }
7326
7327 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7328 selects whether the alignment is abi mandated, optional, or
7329 both abi and optional alignment. */
7330
7331 unsigned int
7332 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7333 {
7334 if (how != align_opt)
7335 {
7336 if (TREE_CODE (type) == VECTOR_TYPE && align < 128)
7337 align = 128;
7338 }
7339
7340 if (how != align_abi)
7341 {
7342 if (TREE_CODE (type) == ARRAY_TYPE
7343 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7344 {
7345 if (align < BITS_PER_WORD)
7346 align = BITS_PER_WORD;
7347 }
7348 }
7349
7350 return align;
7351 }
7352
7353 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
7354 instructions simply ignore the low bits; VSX memory instructions
7355 are aligned to 4 or 8 bytes. */
7356
7357 static bool
7358 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
7359 {
7360 return (STRICT_ALIGNMENT
7361 || (!TARGET_EFFICIENT_UNALIGNED_VSX
7362 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
7363 || ((VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode))
7364 && (int) align < VECTOR_ALIGN (mode)))));
7365 }
7366
7367 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7368
7369 bool
7370 rs6000_special_adjust_field_align_p (tree type, unsigned int computed)
7371 {
7372 if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE)
7373 {
7374 if (computed != 128)
7375 {
7376 static bool warned;
7377 if (!warned && warn_psabi)
7378 {
7379 warned = true;
7380 inform (input_location,
7381 "the layout of aggregates containing vectors with"
7382 " %d-byte alignment has changed in GCC 5",
7383 computed / BITS_PER_UNIT);
7384 }
7385 }
7386 /* In current GCC there is no special case. */
7387 return false;
7388 }
7389
7390 return false;
7391 }
7392
7393 /* AIX increases natural record alignment to doubleword if the first
7394 field is an FP double while the FP fields remain word aligned. */
7395
7396 unsigned int
7397 rs6000_special_round_type_align (tree type, unsigned int computed,
7398 unsigned int specified)
7399 {
7400 unsigned int align = MAX (computed, specified);
7401 tree field = TYPE_FIELDS (type);
7402
7403 /* Skip all non field decls */
7404 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7405 field = DECL_CHAIN (field);
7406
7407 if (field != NULL && field != type)
7408 {
7409 type = TREE_TYPE (field);
7410 while (TREE_CODE (type) == ARRAY_TYPE)
7411 type = TREE_TYPE (type);
7412
7413 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
7414 align = MAX (align, 64);
7415 }
7416
7417 return align;
7418 }
7419
7420 /* Darwin increases record alignment to the natural alignment of
7421 the first field. */
7422
7423 unsigned int
7424 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
7425 unsigned int specified)
7426 {
7427 unsigned int align = MAX (computed, specified);
7428
7429 if (TYPE_PACKED (type))
7430 return align;
7431
7432 /* Find the first field, looking down into aggregates. */
7433 do {
7434 tree field = TYPE_FIELDS (type);
7435 /* Skip all non field decls */
7436 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7437 field = DECL_CHAIN (field);
7438 if (! field)
7439 break;
7440 /* A packed field does not contribute any extra alignment. */
7441 if (DECL_PACKED (field))
7442 return align;
7443 type = TREE_TYPE (field);
7444 while (TREE_CODE (type) == ARRAY_TYPE)
7445 type = TREE_TYPE (type);
7446 } while (AGGREGATE_TYPE_P (type));
7447
7448 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
7449 align = MAX (align, TYPE_ALIGN (type));
7450
7451 return align;
7452 }
7453
7454 /* Return 1 for an operand in small memory on V.4/eabi. */
7455
7456 int
7457 small_data_operand (rtx op ATTRIBUTE_UNUSED,
7458 machine_mode mode ATTRIBUTE_UNUSED)
7459 {
7460 #if TARGET_ELF
7461 rtx sym_ref;
7462
7463 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
7464 return 0;
7465
7466 if (DEFAULT_ABI != ABI_V4)
7467 return 0;
7468
7469 if (GET_CODE (op) == SYMBOL_REF)
7470 sym_ref = op;
7471
7472 else if (GET_CODE (op) != CONST
7473 || GET_CODE (XEXP (op, 0)) != PLUS
7474 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
7475 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
7476 return 0;
7477
7478 else
7479 {
7480 rtx sum = XEXP (op, 0);
7481 HOST_WIDE_INT summand;
7482
7483 /* We have to be careful here, because it is the referenced address
7484 that must be 32k from _SDA_BASE_, not just the symbol. */
7485 summand = INTVAL (XEXP (sum, 1));
7486 if (summand < 0 || summand > g_switch_value)
7487 return 0;
7488
7489 sym_ref = XEXP (sum, 0);
7490 }
7491
7492 return SYMBOL_REF_SMALL_P (sym_ref);
7493 #else
7494 return 0;
7495 #endif
7496 }
7497
7498 /* Return true if either operand is a general purpose register. */
7499
7500 bool
7501 gpr_or_gpr_p (rtx op0, rtx op1)
7502 {
7503 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
7504 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
7505 }
7506
7507 /* Return true if this is a move direct operation between GPR registers and
7508 floating point/VSX registers. */
7509
7510 bool
7511 direct_move_p (rtx op0, rtx op1)
7512 {
7513 int regno0, regno1;
7514
7515 if (!REG_P (op0) || !REG_P (op1))
7516 return false;
7517
7518 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
7519 return false;
7520
7521 regno0 = REGNO (op0);
7522 regno1 = REGNO (op1);
7523 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
7524 return false;
7525
7526 if (INT_REGNO_P (regno0))
7527 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
7528
7529 else if (INT_REGNO_P (regno1))
7530 {
7531 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
7532 return true;
7533
7534 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
7535 return true;
7536 }
7537
7538 return false;
7539 }
7540
7541 /* Return true if the OFFSET is valid for the quad address instructions that
7542 use d-form (register + offset) addressing. */
7543
7544 static inline bool
7545 quad_address_offset_p (HOST_WIDE_INT offset)
7546 {
7547 return (IN_RANGE (offset, -32768, 32767) && ((offset) & 0xf) == 0);
7548 }
7549
7550 /* Return true if the ADDR is an acceptable address for a quad memory
7551 operation of mode MODE (either LQ/STQ for general purpose registers, or
7552 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
7553 is intended for LQ/STQ. If it is false, the address is intended for the ISA
7554 3.0 LXV/STXV instruction. */
7555
7556 bool
7557 quad_address_p (rtx addr, machine_mode mode, bool strict)
7558 {
7559 rtx op0, op1;
7560
7561 if (GET_MODE_SIZE (mode) != 16)
7562 return false;
7563
7564 if (legitimate_indirect_address_p (addr, strict))
7565 return true;
7566
7567 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
7568 return false;
7569
7570 if (GET_CODE (addr) != PLUS)
7571 return false;
7572
7573 op0 = XEXP (addr, 0);
7574 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
7575 return false;
7576
7577 op1 = XEXP (addr, 1);
7578 if (!CONST_INT_P (op1))
7579 return false;
7580
7581 return quad_address_offset_p (INTVAL (op1));
7582 }
7583
7584 /* Return true if this is a load or store quad operation. This function does
7585 not handle the atomic quad memory instructions. */
7586
7587 bool
7588 quad_load_store_p (rtx op0, rtx op1)
7589 {
7590 bool ret;
7591
7592 if (!TARGET_QUAD_MEMORY)
7593 ret = false;
7594
7595 else if (REG_P (op0) && MEM_P (op1))
7596 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
7597 && quad_memory_operand (op1, GET_MODE (op1))
7598 && !reg_overlap_mentioned_p (op0, op1));
7599
7600 else if (MEM_P (op0) && REG_P (op1))
7601 ret = (quad_memory_operand (op0, GET_MODE (op0))
7602 && quad_int_reg_operand (op1, GET_MODE (op1)));
7603
7604 else
7605 ret = false;
7606
7607 if (TARGET_DEBUG_ADDR)
7608 {
7609 fprintf (stderr, "\n========== quad_load_store, return %s\n",
7610 ret ? "true" : "false");
7611 debug_rtx (gen_rtx_SET (op0, op1));
7612 }
7613
7614 return ret;
7615 }
7616
7617 /* Given an address, return a constant offset term if one exists. */
7618
7619 static rtx
7620 address_offset (rtx op)
7621 {
7622 if (GET_CODE (op) == PRE_INC
7623 || GET_CODE (op) == PRE_DEC)
7624 op = XEXP (op, 0);
7625 else if (GET_CODE (op) == PRE_MODIFY
7626 || GET_CODE (op) == LO_SUM)
7627 op = XEXP (op, 1);
7628
7629 if (GET_CODE (op) == CONST)
7630 op = XEXP (op, 0);
7631
7632 if (GET_CODE (op) == PLUS)
7633 op = XEXP (op, 1);
7634
7635 if (CONST_INT_P (op))
7636 return op;
7637
7638 return NULL_RTX;
7639 }
7640
7641 /* Return true if the MEM operand is a memory operand suitable for use
7642 with a (full width, possibly multiple) gpr load/store. On
7643 powerpc64 this means the offset must be divisible by 4.
7644 Implements 'Y' constraint.
7645
7646 Accept direct, indexed, offset, lo_sum and tocref. Since this is
7647 a constraint function we know the operand has satisfied a suitable
7648 memory predicate. Also accept some odd rtl generated by reload
7649 (see rs6000_legitimize_reload_address for various forms). It is
7650 important that reload rtl be accepted by appropriate constraints
7651 but not by the operand predicate.
7652
7653 Offsetting a lo_sum should not be allowed, except where we know by
7654 alignment that a 32k boundary is not crossed, but see the ???
7655 comment in rs6000_legitimize_reload_address. Note that by
7656 "offsetting" here we mean a further offset to access parts of the
7657 MEM. It's fine to have a lo_sum where the inner address is offset
7658 from a sym, since the same sym+offset will appear in the high part
7659 of the address calculation. */
7660
7661 bool
7662 mem_operand_gpr (rtx op, machine_mode mode)
7663 {
7664 unsigned HOST_WIDE_INT offset;
7665 int extra;
7666 rtx addr = XEXP (op, 0);
7667
7668 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
7669 if (TARGET_UPDATE
7670 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
7671 && mode_supports_pre_incdec_p (mode)
7672 && legitimate_indirect_address_p (XEXP (addr, 0), false))
7673 return true;
7674
7675 /* Don't allow non-offsettable addresses. See PRs 83969 and 84279. */
7676 if (!rs6000_offsettable_memref_p (op, mode, false))
7677 return false;
7678
7679 op = address_offset (addr);
7680 if (op == NULL_RTX)
7681 return true;
7682
7683 offset = INTVAL (op);
7684 if (TARGET_POWERPC64 && (offset & 3) != 0)
7685 return false;
7686
7687 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7688 if (extra < 0)
7689 extra = 0;
7690
7691 if (GET_CODE (addr) == LO_SUM)
7692 /* For lo_sum addresses, we must allow any offset except one that
7693 causes a wrap, so test only the low 16 bits. */
7694 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7695
7696 return offset + 0x8000 < 0x10000u - extra;
7697 }
7698
7699 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
7700 enforce an offset divisible by 4 even for 32-bit. */
7701
7702 bool
7703 mem_operand_ds_form (rtx op, machine_mode mode)
7704 {
7705 unsigned HOST_WIDE_INT offset;
7706 int extra;
7707 rtx addr = XEXP (op, 0);
7708
7709 if (!offsettable_address_p (false, mode, addr))
7710 return false;
7711
7712 op = address_offset (addr);
7713 if (op == NULL_RTX)
7714 return true;
7715
7716 offset = INTVAL (op);
7717 if ((offset & 3) != 0)
7718 return false;
7719
7720 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7721 if (extra < 0)
7722 extra = 0;
7723
7724 if (GET_CODE (addr) == LO_SUM)
7725 /* For lo_sum addresses, we must allow any offset except one that
7726 causes a wrap, so test only the low 16 bits. */
7727 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7728
7729 return offset + 0x8000 < 0x10000u - extra;
7730 }
7731 \f
7732 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
7733
7734 static bool
7735 reg_offset_addressing_ok_p (machine_mode mode)
7736 {
7737 switch (mode)
7738 {
7739 case E_V16QImode:
7740 case E_V8HImode:
7741 case E_V4SFmode:
7742 case E_V4SImode:
7743 case E_V2DFmode:
7744 case E_V2DImode:
7745 case E_V1TImode:
7746 case E_TImode:
7747 case E_TFmode:
7748 case E_KFmode:
7749 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
7750 ISA 3.0 vector d-form addressing mode was added. While TImode is not
7751 a vector mode, if we want to use the VSX registers to move it around,
7752 we need to restrict ourselves to reg+reg addressing. Similarly for
7753 IEEE 128-bit floating point that is passed in a single vector
7754 register. */
7755 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
7756 return mode_supports_dq_form (mode);
7757 break;
7758
7759 case E_SDmode:
7760 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
7761 addressing for the LFIWZX and STFIWX instructions. */
7762 if (TARGET_NO_SDMODE_STACK)
7763 return false;
7764 break;
7765
7766 default:
7767 break;
7768 }
7769
7770 return true;
7771 }
7772
7773 static bool
7774 virtual_stack_registers_memory_p (rtx op)
7775 {
7776 int regnum;
7777
7778 if (GET_CODE (op) == REG)
7779 regnum = REGNO (op);
7780
7781 else if (GET_CODE (op) == PLUS
7782 && GET_CODE (XEXP (op, 0)) == REG
7783 && GET_CODE (XEXP (op, 1)) == CONST_INT)
7784 regnum = REGNO (XEXP (op, 0));
7785
7786 else
7787 return false;
7788
7789 return (regnum >= FIRST_VIRTUAL_REGISTER
7790 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
7791 }
7792
7793 /* Return true if a MODE sized memory accesses to OP plus OFFSET
7794 is known to not straddle a 32k boundary. This function is used
7795 to determine whether -mcmodel=medium code can use TOC pointer
7796 relative addressing for OP. This means the alignment of the TOC
7797 pointer must also be taken into account, and unfortunately that is
7798 only 8 bytes. */
7799
7800 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
7801 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
7802 #endif
7803
7804 static bool
7805 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
7806 machine_mode mode)
7807 {
7808 tree decl;
7809 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
7810
7811 if (GET_CODE (op) != SYMBOL_REF)
7812 return false;
7813
7814 /* ISA 3.0 vector d-form addressing is restricted, don't allow
7815 SYMBOL_REF. */
7816 if (mode_supports_dq_form (mode))
7817 return false;
7818
7819 dsize = GET_MODE_SIZE (mode);
7820 decl = SYMBOL_REF_DECL (op);
7821 if (!decl)
7822 {
7823 if (dsize == 0)
7824 return false;
7825
7826 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
7827 replacing memory addresses with an anchor plus offset. We
7828 could find the decl by rummaging around in the block->objects
7829 VEC for the given offset but that seems like too much work. */
7830 dalign = BITS_PER_UNIT;
7831 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
7832 && SYMBOL_REF_ANCHOR_P (op)
7833 && SYMBOL_REF_BLOCK (op) != NULL)
7834 {
7835 struct object_block *block = SYMBOL_REF_BLOCK (op);
7836
7837 dalign = block->alignment;
7838 offset += SYMBOL_REF_BLOCK_OFFSET (op);
7839 }
7840 else if (CONSTANT_POOL_ADDRESS_P (op))
7841 {
7842 /* It would be nice to have get_pool_align().. */
7843 machine_mode cmode = get_pool_mode (op);
7844
7845 dalign = GET_MODE_ALIGNMENT (cmode);
7846 }
7847 }
7848 else if (DECL_P (decl))
7849 {
7850 dalign = DECL_ALIGN (decl);
7851
7852 if (dsize == 0)
7853 {
7854 /* Allow BLKmode when the entire object is known to not
7855 cross a 32k boundary. */
7856 if (!DECL_SIZE_UNIT (decl))
7857 return false;
7858
7859 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
7860 return false;
7861
7862 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
7863 if (dsize > 32768)
7864 return false;
7865
7866 dalign /= BITS_PER_UNIT;
7867 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7868 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7869 return dalign >= dsize;
7870 }
7871 }
7872 else
7873 gcc_unreachable ();
7874
7875 /* Find how many bits of the alignment we know for this access. */
7876 dalign /= BITS_PER_UNIT;
7877 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7878 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7879 mask = dalign - 1;
7880 lsb = offset & -offset;
7881 mask &= lsb - 1;
7882 dalign = mask + 1;
7883
7884 return dalign >= dsize;
7885 }
7886
7887 static bool
7888 constant_pool_expr_p (rtx op)
7889 {
7890 rtx base, offset;
7891
7892 split_const (op, &base, &offset);
7893 return (GET_CODE (base) == SYMBOL_REF
7894 && CONSTANT_POOL_ADDRESS_P (base)
7895 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
7896 }
7897
7898 /* These are only used to pass through from print_operand/print_operand_address
7899 to rs6000_output_addr_const_extra over the intervening function
7900 output_addr_const which is not target code. */
7901 static const_rtx tocrel_base_oac, tocrel_offset_oac;
7902
7903 /* Return true if OP is a toc pointer relative address (the output
7904 of create_TOC_reference). If STRICT, do not match non-split
7905 -mcmodel=large/medium toc pointer relative addresses. If the pointers
7906 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
7907 TOCREL_OFFSET_RET respectively. */
7908
7909 bool
7910 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
7911 const_rtx *tocrel_offset_ret)
7912 {
7913 if (!TARGET_TOC)
7914 return false;
7915
7916 if (TARGET_CMODEL != CMODEL_SMALL)
7917 {
7918 /* When strict ensure we have everything tidy. */
7919 if (strict
7920 && !(GET_CODE (op) == LO_SUM
7921 && REG_P (XEXP (op, 0))
7922 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
7923 return false;
7924
7925 /* When not strict, allow non-split TOC addresses and also allow
7926 (lo_sum (high ..)) TOC addresses created during reload. */
7927 if (GET_CODE (op) == LO_SUM)
7928 op = XEXP (op, 1);
7929 }
7930
7931 const_rtx tocrel_base = op;
7932 const_rtx tocrel_offset = const0_rtx;
7933
7934 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
7935 {
7936 tocrel_base = XEXP (op, 0);
7937 tocrel_offset = XEXP (op, 1);
7938 }
7939
7940 if (tocrel_base_ret)
7941 *tocrel_base_ret = tocrel_base;
7942 if (tocrel_offset_ret)
7943 *tocrel_offset_ret = tocrel_offset;
7944
7945 return (GET_CODE (tocrel_base) == UNSPEC
7946 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
7947 }
7948
7949 /* Return true if X is a constant pool address, and also for cmodel=medium
7950 if X is a toc-relative address known to be offsettable within MODE. */
7951
7952 bool
7953 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
7954 bool strict)
7955 {
7956 const_rtx tocrel_base, tocrel_offset;
7957 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
7958 && (TARGET_CMODEL != CMODEL_MEDIUM
7959 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
7960 || mode == QImode
7961 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
7962 INTVAL (tocrel_offset), mode)));
7963 }
7964
7965 static bool
7966 legitimate_small_data_p (machine_mode mode, rtx x)
7967 {
7968 return (DEFAULT_ABI == ABI_V4
7969 && !flag_pic && !TARGET_TOC
7970 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
7971 && small_data_operand (x, mode));
7972 }
7973
7974 bool
7975 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
7976 bool strict, bool worst_case)
7977 {
7978 unsigned HOST_WIDE_INT offset;
7979 unsigned int extra;
7980
7981 if (GET_CODE (x) != PLUS)
7982 return false;
7983 if (!REG_P (XEXP (x, 0)))
7984 return false;
7985 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7986 return false;
7987 if (mode_supports_dq_form (mode))
7988 return quad_address_p (x, mode, strict);
7989 if (!reg_offset_addressing_ok_p (mode))
7990 return virtual_stack_registers_memory_p (x);
7991 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
7992 return true;
7993 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7994 return false;
7995
7996 offset = INTVAL (XEXP (x, 1));
7997 extra = 0;
7998 switch (mode)
7999 {
8000 case E_DFmode:
8001 case E_DDmode:
8002 case E_DImode:
8003 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8004 addressing. */
8005 if (VECTOR_MEM_VSX_P (mode))
8006 return false;
8007
8008 if (!worst_case)
8009 break;
8010 if (!TARGET_POWERPC64)
8011 extra = 4;
8012 else if (offset & 3)
8013 return false;
8014 break;
8015
8016 case E_TFmode:
8017 case E_IFmode:
8018 case E_KFmode:
8019 case E_TDmode:
8020 case E_TImode:
8021 case E_PTImode:
8022 extra = 8;
8023 if (!worst_case)
8024 break;
8025 if (!TARGET_POWERPC64)
8026 extra = 12;
8027 else if (offset & 3)
8028 return false;
8029 break;
8030
8031 default:
8032 break;
8033 }
8034
8035 offset += 0x8000;
8036 return offset < 0x10000 - extra;
8037 }
8038
8039 bool
8040 legitimate_indexed_address_p (rtx x, int strict)
8041 {
8042 rtx op0, op1;
8043
8044 if (GET_CODE (x) != PLUS)
8045 return false;
8046
8047 op0 = XEXP (x, 0);
8048 op1 = XEXP (x, 1);
8049
8050 return (REG_P (op0) && REG_P (op1)
8051 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
8052 && INT_REG_OK_FOR_INDEX_P (op1, strict))
8053 || (INT_REG_OK_FOR_BASE_P (op1, strict)
8054 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
8055 }
8056
8057 bool
8058 avoiding_indexed_address_p (machine_mode mode)
8059 {
8060 /* Avoid indexed addressing for modes that have non-indexed
8061 load/store instruction forms. */
8062 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
8063 }
8064
8065 bool
8066 legitimate_indirect_address_p (rtx x, int strict)
8067 {
8068 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
8069 }
8070
8071 bool
8072 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
8073 {
8074 if (!TARGET_MACHO || !flag_pic
8075 || mode != SImode || GET_CODE (x) != MEM)
8076 return false;
8077 x = XEXP (x, 0);
8078
8079 if (GET_CODE (x) != LO_SUM)
8080 return false;
8081 if (GET_CODE (XEXP (x, 0)) != REG)
8082 return false;
8083 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
8084 return false;
8085 x = XEXP (x, 1);
8086
8087 return CONSTANT_P (x);
8088 }
8089
8090 static bool
8091 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
8092 {
8093 if (GET_CODE (x) != LO_SUM)
8094 return false;
8095 if (GET_CODE (XEXP (x, 0)) != REG)
8096 return false;
8097 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8098 return false;
8099 /* quad word addresses are restricted, and we can't use LO_SUM. */
8100 if (mode_supports_dq_form (mode))
8101 return false;
8102 x = XEXP (x, 1);
8103
8104 if (TARGET_ELF || TARGET_MACHO)
8105 {
8106 bool large_toc_ok;
8107
8108 if (DEFAULT_ABI == ABI_V4 && flag_pic)
8109 return false;
8110 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8111 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8112 recognizes some LO_SUM addresses as valid although this
8113 function says opposite. In most cases, LRA through different
8114 transformations can generate correct code for address reloads.
8115 It can not manage only some LO_SUM cases. So we need to add
8116 code analogous to one in rs6000_legitimize_reload_address for
8117 LOW_SUM here saying that some addresses are still valid. */
8118 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8119 && small_toc_ref (x, VOIDmode));
8120 if (TARGET_TOC && ! large_toc_ok)
8121 return false;
8122 if (GET_MODE_NUNITS (mode) != 1)
8123 return false;
8124 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8125 && !(/* ??? Assume floating point reg based on mode? */
8126 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8127 return false;
8128
8129 return CONSTANT_P (x) || large_toc_ok;
8130 }
8131
8132 return false;
8133 }
8134
8135
8136 /* Try machine-dependent ways of modifying an illegitimate address
8137 to be legitimate. If we find one, return the new, valid address.
8138 This is used from only one place: `memory_address' in explow.c.
8139
8140 OLDX is the address as it was before break_out_memory_refs was
8141 called. In some cases it is useful to look at this to decide what
8142 needs to be done.
8143
8144 It is always safe for this function to do nothing. It exists to
8145 recognize opportunities to optimize the output.
8146
8147 On RS/6000, first check for the sum of a register with a constant
8148 integer that is out of range. If so, generate code to add the
8149 constant with the low-order 16 bits masked to the register and force
8150 this result into another register (this can be done with `cau').
8151 Then generate an address of REG+(CONST&0xffff), allowing for the
8152 possibility of bit 16 being a one.
8153
8154 Then check for the sum of a register and something not constant, try to
8155 load the other things into a register and return the sum. */
8156
8157 static rtx
8158 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8159 machine_mode mode)
8160 {
8161 unsigned int extra;
8162
8163 if (!reg_offset_addressing_ok_p (mode)
8164 || mode_supports_dq_form (mode))
8165 {
8166 if (virtual_stack_registers_memory_p (x))
8167 return x;
8168
8169 /* In theory we should not be seeing addresses of the form reg+0,
8170 but just in case it is generated, optimize it away. */
8171 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
8172 return force_reg (Pmode, XEXP (x, 0));
8173
8174 /* For TImode with load/store quad, restrict addresses to just a single
8175 pointer, so it works with both GPRs and VSX registers. */
8176 /* Make sure both operands are registers. */
8177 else if (GET_CODE (x) == PLUS
8178 && (mode != TImode || !TARGET_VSX))
8179 return gen_rtx_PLUS (Pmode,
8180 force_reg (Pmode, XEXP (x, 0)),
8181 force_reg (Pmode, XEXP (x, 1)));
8182 else
8183 return force_reg (Pmode, x);
8184 }
8185 if (GET_CODE (x) == SYMBOL_REF)
8186 {
8187 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8188 if (model != 0)
8189 return rs6000_legitimize_tls_address (x, model);
8190 }
8191
8192 extra = 0;
8193 switch (mode)
8194 {
8195 case E_TFmode:
8196 case E_TDmode:
8197 case E_TImode:
8198 case E_PTImode:
8199 case E_IFmode:
8200 case E_KFmode:
8201 /* As in legitimate_offset_address_p we do not assume
8202 worst-case. The mode here is just a hint as to the registers
8203 used. A TImode is usually in gprs, but may actually be in
8204 fprs. Leave worst-case scenario for reload to handle via
8205 insn constraints. PTImode is only GPRs. */
8206 extra = 8;
8207 break;
8208 default:
8209 break;
8210 }
8211
8212 if (GET_CODE (x) == PLUS
8213 && GET_CODE (XEXP (x, 0)) == REG
8214 && GET_CODE (XEXP (x, 1)) == CONST_INT
8215 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
8216 >= 0x10000 - extra))
8217 {
8218 HOST_WIDE_INT high_int, low_int;
8219 rtx sum;
8220 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8221 if (low_int >= 0x8000 - extra)
8222 low_int = 0;
8223 high_int = INTVAL (XEXP (x, 1)) - low_int;
8224 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
8225 GEN_INT (high_int)), 0);
8226 return plus_constant (Pmode, sum, low_int);
8227 }
8228 else if (GET_CODE (x) == PLUS
8229 && GET_CODE (XEXP (x, 0)) == REG
8230 && GET_CODE (XEXP (x, 1)) != CONST_INT
8231 && GET_MODE_NUNITS (mode) == 1
8232 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8233 || (/* ??? Assume floating point reg based on mode? */
8234 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8235 && !avoiding_indexed_address_p (mode))
8236 {
8237 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
8238 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
8239 }
8240 else if ((TARGET_ELF
8241 #if TARGET_MACHO
8242 || !MACHO_DYNAMIC_NO_PIC_P
8243 #endif
8244 )
8245 && TARGET_32BIT
8246 && TARGET_NO_TOC
8247 && ! flag_pic
8248 && GET_CODE (x) != CONST_INT
8249 && GET_CODE (x) != CONST_WIDE_INT
8250 && GET_CODE (x) != CONST_DOUBLE
8251 && CONSTANT_P (x)
8252 && GET_MODE_NUNITS (mode) == 1
8253 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8254 || (/* ??? Assume floating point reg based on mode? */
8255 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
8256 {
8257 rtx reg = gen_reg_rtx (Pmode);
8258 if (TARGET_ELF)
8259 emit_insn (gen_elf_high (reg, x));
8260 else
8261 emit_insn (gen_macho_high (reg, x));
8262 return gen_rtx_LO_SUM (Pmode, reg, x);
8263 }
8264 else if (TARGET_TOC
8265 && GET_CODE (x) == SYMBOL_REF
8266 && constant_pool_expr_p (x)
8267 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
8268 return create_TOC_reference (x, NULL_RTX);
8269 else
8270 return x;
8271 }
8272
8273 /* Debug version of rs6000_legitimize_address. */
8274 static rtx
8275 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
8276 {
8277 rtx ret;
8278 rtx_insn *insns;
8279
8280 start_sequence ();
8281 ret = rs6000_legitimize_address (x, oldx, mode);
8282 insns = get_insns ();
8283 end_sequence ();
8284
8285 if (ret != x)
8286 {
8287 fprintf (stderr,
8288 "\nrs6000_legitimize_address: mode %s, old code %s, "
8289 "new code %s, modified\n",
8290 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
8291 GET_RTX_NAME (GET_CODE (ret)));
8292
8293 fprintf (stderr, "Original address:\n");
8294 debug_rtx (x);
8295
8296 fprintf (stderr, "oldx:\n");
8297 debug_rtx (oldx);
8298
8299 fprintf (stderr, "New address:\n");
8300 debug_rtx (ret);
8301
8302 if (insns)
8303 {
8304 fprintf (stderr, "Insns added:\n");
8305 debug_rtx_list (insns, 20);
8306 }
8307 }
8308 else
8309 {
8310 fprintf (stderr,
8311 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8312 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
8313
8314 debug_rtx (x);
8315 }
8316
8317 if (insns)
8318 emit_insn (insns);
8319
8320 return ret;
8321 }
8322
8323 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8324 We need to emit DTP-relative relocations. */
8325
8326 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
8327 static void
8328 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
8329 {
8330 switch (size)
8331 {
8332 case 4:
8333 fputs ("\t.long\t", file);
8334 break;
8335 case 8:
8336 fputs (DOUBLE_INT_ASM_OP, file);
8337 break;
8338 default:
8339 gcc_unreachable ();
8340 }
8341 output_addr_const (file, x);
8342 if (TARGET_ELF)
8343 fputs ("@dtprel+0x8000", file);
8344 else if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF)
8345 {
8346 switch (SYMBOL_REF_TLS_MODEL (x))
8347 {
8348 case 0:
8349 break;
8350 case TLS_MODEL_LOCAL_EXEC:
8351 fputs ("@le", file);
8352 break;
8353 case TLS_MODEL_INITIAL_EXEC:
8354 fputs ("@ie", file);
8355 break;
8356 case TLS_MODEL_GLOBAL_DYNAMIC:
8357 case TLS_MODEL_LOCAL_DYNAMIC:
8358 fputs ("@m", file);
8359 break;
8360 default:
8361 gcc_unreachable ();
8362 }
8363 }
8364 }
8365
8366 /* Return true if X is a symbol that refers to real (rather than emulated)
8367 TLS. */
8368
8369 static bool
8370 rs6000_real_tls_symbol_ref_p (rtx x)
8371 {
8372 return (GET_CODE (x) == SYMBOL_REF
8373 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
8374 }
8375
8376 /* In the name of slightly smaller debug output, and to cater to
8377 general assembler lossage, recognize various UNSPEC sequences
8378 and turn them back into a direct symbol reference. */
8379
8380 static rtx
8381 rs6000_delegitimize_address (rtx orig_x)
8382 {
8383 rtx x, y, offset;
8384
8385 orig_x = delegitimize_mem_from_attrs (orig_x);
8386 x = orig_x;
8387 if (MEM_P (x))
8388 x = XEXP (x, 0);
8389
8390 y = x;
8391 if (TARGET_CMODEL != CMODEL_SMALL
8392 && GET_CODE (y) == LO_SUM)
8393 y = XEXP (y, 1);
8394
8395 offset = NULL_RTX;
8396 if (GET_CODE (y) == PLUS
8397 && GET_MODE (y) == Pmode
8398 && CONST_INT_P (XEXP (y, 1)))
8399 {
8400 offset = XEXP (y, 1);
8401 y = XEXP (y, 0);
8402 }
8403
8404 if (GET_CODE (y) == UNSPEC
8405 && XINT (y, 1) == UNSPEC_TOCREL)
8406 {
8407 y = XVECEXP (y, 0, 0);
8408
8409 #ifdef HAVE_AS_TLS
8410 /* Do not associate thread-local symbols with the original
8411 constant pool symbol. */
8412 if (TARGET_XCOFF
8413 && GET_CODE (y) == SYMBOL_REF
8414 && CONSTANT_POOL_ADDRESS_P (y)
8415 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
8416 return orig_x;
8417 #endif
8418
8419 if (offset != NULL_RTX)
8420 y = gen_rtx_PLUS (Pmode, y, offset);
8421 if (!MEM_P (orig_x))
8422 return y;
8423 else
8424 return replace_equiv_address_nv (orig_x, y);
8425 }
8426
8427 if (TARGET_MACHO
8428 && GET_CODE (orig_x) == LO_SUM
8429 && GET_CODE (XEXP (orig_x, 1)) == CONST)
8430 {
8431 y = XEXP (XEXP (orig_x, 1), 0);
8432 if (GET_CODE (y) == UNSPEC
8433 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
8434 return XVECEXP (y, 0, 0);
8435 }
8436
8437 return orig_x;
8438 }
8439
8440 /* Return true if X shouldn't be emitted into the debug info.
8441 The linker doesn't like .toc section references from
8442 .debug_* sections, so reject .toc section symbols. */
8443
8444 static bool
8445 rs6000_const_not_ok_for_debug_p (rtx x)
8446 {
8447 if (GET_CODE (x) == UNSPEC)
8448 return true;
8449 if (GET_CODE (x) == SYMBOL_REF
8450 && CONSTANT_POOL_ADDRESS_P (x))
8451 {
8452 rtx c = get_pool_constant (x);
8453 machine_mode cmode = get_pool_mode (x);
8454 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
8455 return true;
8456 }
8457
8458 return false;
8459 }
8460
8461
8462 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
8463
8464 static bool
8465 rs6000_legitimate_combined_insn (rtx_insn *insn)
8466 {
8467 int icode = INSN_CODE (insn);
8468
8469 /* Reject creating doloop insns. Combine should not be allowed
8470 to create these for a number of reasons:
8471 1) In a nested loop, if combine creates one of these in an
8472 outer loop and the register allocator happens to allocate ctr
8473 to the outer loop insn, then the inner loop can't use ctr.
8474 Inner loops ought to be more highly optimized.
8475 2) Combine often wants to create one of these from what was
8476 originally a three insn sequence, first combining the three
8477 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
8478 allocated ctr, the splitter takes use back to the three insn
8479 sequence. It's better to stop combine at the two insn
8480 sequence.
8481 3) Faced with not being able to allocate ctr for ctrsi/crtdi
8482 insns, the register allocator sometimes uses floating point
8483 or vector registers for the pseudo. Since ctrsi/ctrdi is a
8484 jump insn and output reloads are not implemented for jumps,
8485 the ctrsi/ctrdi splitters need to handle all possible cases.
8486 That's a pain, and it gets to be seriously difficult when a
8487 splitter that runs after reload needs memory to transfer from
8488 a gpr to fpr. See PR70098 and PR71763 which are not fixed
8489 for the difficult case. It's better to not create problems
8490 in the first place. */
8491 if (icode != CODE_FOR_nothing
8492 && (icode == CODE_FOR_bdz_si
8493 || icode == CODE_FOR_bdz_di
8494 || icode == CODE_FOR_bdnz_si
8495 || icode == CODE_FOR_bdnz_di
8496 || icode == CODE_FOR_bdztf_si
8497 || icode == CODE_FOR_bdztf_di
8498 || icode == CODE_FOR_bdnztf_si
8499 || icode == CODE_FOR_bdnztf_di))
8500 return false;
8501
8502 return true;
8503 }
8504
8505 /* Construct the SYMBOL_REF for the tls_get_addr function. */
8506
8507 static GTY(()) rtx rs6000_tls_symbol;
8508 static rtx
8509 rs6000_tls_get_addr (void)
8510 {
8511 if (!rs6000_tls_symbol)
8512 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
8513
8514 return rs6000_tls_symbol;
8515 }
8516
8517 /* Construct the SYMBOL_REF for TLS GOT references. */
8518
8519 static GTY(()) rtx rs6000_got_symbol;
8520 static rtx
8521 rs6000_got_sym (void)
8522 {
8523 if (!rs6000_got_symbol)
8524 {
8525 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8526 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
8527 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
8528 }
8529
8530 return rs6000_got_symbol;
8531 }
8532
8533 /* AIX Thread-Local Address support. */
8534
8535 static rtx
8536 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
8537 {
8538 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
8539 const char *name;
8540 char *tlsname;
8541
8542 name = XSTR (addr, 0);
8543 /* Append TLS CSECT qualifier, unless the symbol already is qualified
8544 or the symbol will be in TLS private data section. */
8545 if (name[strlen (name) - 1] != ']'
8546 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
8547 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
8548 {
8549 tlsname = XALLOCAVEC (char, strlen (name) + 4);
8550 strcpy (tlsname, name);
8551 strcat (tlsname,
8552 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
8553 tlsaddr = copy_rtx (addr);
8554 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
8555 }
8556 else
8557 tlsaddr = addr;
8558
8559 /* Place addr into TOC constant pool. */
8560 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
8561
8562 /* Output the TOC entry and create the MEM referencing the value. */
8563 if (constant_pool_expr_p (XEXP (sym, 0))
8564 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
8565 {
8566 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
8567 mem = gen_const_mem (Pmode, tocref);
8568 set_mem_alias_set (mem, get_TOC_alias_set ());
8569 }
8570 else
8571 return sym;
8572
8573 /* Use global-dynamic for local-dynamic. */
8574 if (model == TLS_MODEL_GLOBAL_DYNAMIC
8575 || model == TLS_MODEL_LOCAL_DYNAMIC)
8576 {
8577 /* Create new TOC reference for @m symbol. */
8578 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
8579 tlsname = XALLOCAVEC (char, strlen (name) + 1);
8580 strcpy (tlsname, "*LCM");
8581 strcat (tlsname, name + 3);
8582 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
8583 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
8584 tocref = create_TOC_reference (modaddr, NULL_RTX);
8585 rtx modmem = gen_const_mem (Pmode, tocref);
8586 set_mem_alias_set (modmem, get_TOC_alias_set ());
8587
8588 rtx modreg = gen_reg_rtx (Pmode);
8589 emit_insn (gen_rtx_SET (modreg, modmem));
8590
8591 tmpreg = gen_reg_rtx (Pmode);
8592 emit_insn (gen_rtx_SET (tmpreg, mem));
8593
8594 dest = gen_reg_rtx (Pmode);
8595 if (TARGET_32BIT)
8596 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
8597 else
8598 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
8599 return dest;
8600 }
8601 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
8602 else if (TARGET_32BIT)
8603 {
8604 tlsreg = gen_reg_rtx (SImode);
8605 emit_insn (gen_tls_get_tpointer (tlsreg));
8606 }
8607 else
8608 tlsreg = gen_rtx_REG (DImode, 13);
8609
8610 /* Load the TOC value into temporary register. */
8611 tmpreg = gen_reg_rtx (Pmode);
8612 emit_insn (gen_rtx_SET (tmpreg, mem));
8613 set_unique_reg_note (get_last_insn (), REG_EQUAL,
8614 gen_rtx_MINUS (Pmode, addr, tlsreg));
8615
8616 /* Add TOC symbol value to TLS pointer. */
8617 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
8618
8619 return dest;
8620 }
8621
8622 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
8623 this (thread-local) address. */
8624
8625 static rtx
8626 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
8627 {
8628 rtx dest, insn;
8629
8630 if (TARGET_XCOFF)
8631 return rs6000_legitimize_tls_address_aix (addr, model);
8632
8633 dest = gen_reg_rtx (Pmode);
8634 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
8635 {
8636 rtx tlsreg;
8637
8638 if (TARGET_64BIT)
8639 {
8640 tlsreg = gen_rtx_REG (Pmode, 13);
8641 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
8642 }
8643 else
8644 {
8645 tlsreg = gen_rtx_REG (Pmode, 2);
8646 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
8647 }
8648 emit_insn (insn);
8649 }
8650 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
8651 {
8652 rtx tlsreg, tmp;
8653
8654 tmp = gen_reg_rtx (Pmode);
8655 if (TARGET_64BIT)
8656 {
8657 tlsreg = gen_rtx_REG (Pmode, 13);
8658 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
8659 }
8660 else
8661 {
8662 tlsreg = gen_rtx_REG (Pmode, 2);
8663 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
8664 }
8665 emit_insn (insn);
8666 if (TARGET_64BIT)
8667 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
8668 else
8669 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
8670 emit_insn (insn);
8671 }
8672 else
8673 {
8674 rtx r3, got, tga, tmp1, tmp2, call_insn;
8675
8676 /* We currently use relocations like @got@tlsgd for tls, which
8677 means the linker will handle allocation of tls entries, placing
8678 them in the .got section. So use a pointer to the .got section,
8679 not one to secondary TOC sections used by 64-bit -mminimal-toc,
8680 or to secondary GOT sections used by 32-bit -fPIC. */
8681 if (TARGET_64BIT)
8682 got = gen_rtx_REG (Pmode, 2);
8683 else
8684 {
8685 if (flag_pic == 1)
8686 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
8687 else
8688 {
8689 rtx gsym = rs6000_got_sym ();
8690 got = gen_reg_rtx (Pmode);
8691 if (flag_pic == 0)
8692 rs6000_emit_move (got, gsym, Pmode);
8693 else
8694 {
8695 rtx mem, lab;
8696
8697 tmp1 = gen_reg_rtx (Pmode);
8698 tmp2 = gen_reg_rtx (Pmode);
8699 mem = gen_const_mem (Pmode, tmp1);
8700 lab = gen_label_rtx ();
8701 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
8702 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
8703 if (TARGET_LINK_STACK)
8704 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
8705 emit_move_insn (tmp2, mem);
8706 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
8707 set_unique_reg_note (last, REG_EQUAL, gsym);
8708 }
8709 }
8710 }
8711
8712 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
8713 {
8714 tga = rs6000_tls_get_addr ();
8715 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
8716 const0_rtx, Pmode);
8717
8718 r3 = gen_rtx_REG (Pmode, 3);
8719 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
8720 {
8721 if (TARGET_64BIT)
8722 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
8723 else
8724 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
8725 }
8726 else if (DEFAULT_ABI == ABI_V4)
8727 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
8728 else
8729 gcc_unreachable ();
8730 call_insn = last_call_insn ();
8731 PATTERN (call_insn) = insn;
8732 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
8733 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
8734 pic_offset_table_rtx);
8735 }
8736 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
8737 {
8738 tga = rs6000_tls_get_addr ();
8739 tmp1 = gen_reg_rtx (Pmode);
8740 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
8741 const0_rtx, Pmode);
8742
8743 r3 = gen_rtx_REG (Pmode, 3);
8744 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
8745 {
8746 if (TARGET_64BIT)
8747 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
8748 else
8749 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
8750 }
8751 else if (DEFAULT_ABI == ABI_V4)
8752 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
8753 else
8754 gcc_unreachable ();
8755 call_insn = last_call_insn ();
8756 PATTERN (call_insn) = insn;
8757 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
8758 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
8759 pic_offset_table_rtx);
8760
8761 if (rs6000_tls_size == 16)
8762 {
8763 if (TARGET_64BIT)
8764 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
8765 else
8766 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
8767 }
8768 else if (rs6000_tls_size == 32)
8769 {
8770 tmp2 = gen_reg_rtx (Pmode);
8771 if (TARGET_64BIT)
8772 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
8773 else
8774 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
8775 emit_insn (insn);
8776 if (TARGET_64BIT)
8777 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
8778 else
8779 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
8780 }
8781 else
8782 {
8783 tmp2 = gen_reg_rtx (Pmode);
8784 if (TARGET_64BIT)
8785 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
8786 else
8787 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
8788 emit_insn (insn);
8789 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
8790 }
8791 emit_insn (insn);
8792 }
8793 else
8794 {
8795 /* IE, or 64-bit offset LE. */
8796 tmp2 = gen_reg_rtx (Pmode);
8797 if (TARGET_64BIT)
8798 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
8799 else
8800 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
8801 emit_insn (insn);
8802 if (TARGET_64BIT)
8803 insn = gen_tls_tls_64 (dest, tmp2, addr);
8804 else
8805 insn = gen_tls_tls_32 (dest, tmp2, addr);
8806 emit_insn (insn);
8807 }
8808 }
8809
8810 return dest;
8811 }
8812
8813 /* Only create the global variable for the stack protect guard if we are using
8814 the global flavor of that guard. */
8815 static tree
8816 rs6000_init_stack_protect_guard (void)
8817 {
8818 if (rs6000_stack_protector_guard == SSP_GLOBAL)
8819 return default_stack_protect_guard ();
8820
8821 return NULL_TREE;
8822 }
8823
8824 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8825
8826 static bool
8827 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8828 {
8829 if (GET_CODE (x) == HIGH
8830 && GET_CODE (XEXP (x, 0)) == UNSPEC)
8831 return true;
8832
8833 /* A TLS symbol in the TOC cannot contain a sum. */
8834 if (GET_CODE (x) == CONST
8835 && GET_CODE (XEXP (x, 0)) == PLUS
8836 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8837 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
8838 return true;
8839
8840 /* Do not place an ELF TLS symbol in the constant pool. */
8841 return TARGET_ELF && tls_referenced_p (x);
8842 }
8843
8844 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
8845 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
8846 can be addressed relative to the toc pointer. */
8847
8848 static bool
8849 use_toc_relative_ref (rtx sym, machine_mode mode)
8850 {
8851 return ((constant_pool_expr_p (sym)
8852 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
8853 get_pool_mode (sym)))
8854 || (TARGET_CMODEL == CMODEL_MEDIUM
8855 && SYMBOL_REF_LOCAL_P (sym)
8856 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
8857 }
8858
8859 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
8860 replace the input X, or the original X if no replacement is called for.
8861 The output parameter *WIN is 1 if the calling macro should goto WIN,
8862 0 if it should not.
8863
8864 For RS/6000, we wish to handle large displacements off a base
8865 register by splitting the addend across an addiu/addis and the mem insn.
8866 This cuts number of extra insns needed from 3 to 1.
8867
8868 On Darwin, we use this to generate code for floating point constants.
8869 A movsf_low is generated so we wind up with 2 instructions rather than 3.
8870 The Darwin code is inside #if TARGET_MACHO because only then are the
8871 machopic_* functions defined. */
8872 static rtx
8873 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
8874 int opnum, int type,
8875 int ind_levels ATTRIBUTE_UNUSED, int *win)
8876 {
8877 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
8878 bool quad_offset_p = mode_supports_dq_form (mode);
8879
8880 /* Nasty hack for vsx_splat_v2df/v2di load from mem, which takes a
8881 DFmode/DImode MEM. Ditto for ISA 3.0 vsx_splat_v4sf/v4si. */
8882 if (reg_offset_p
8883 && opnum == 1
8884 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
8885 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)
8886 || (mode == SFmode && recog_data.operand_mode[0] == V4SFmode
8887 && TARGET_P9_VECTOR)
8888 || (mode == SImode && recog_data.operand_mode[0] == V4SImode
8889 && TARGET_P9_VECTOR)))
8890 reg_offset_p = false;
8891
8892 /* We must recognize output that we have already generated ourselves. */
8893 if (GET_CODE (x) == PLUS
8894 && GET_CODE (XEXP (x, 0)) == PLUS
8895 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
8896 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
8897 && GET_CODE (XEXP (x, 1)) == CONST_INT)
8898 {
8899 if (TARGET_DEBUG_ADDR)
8900 {
8901 fprintf (stderr, "\nlegitimize_reload_address push_reload #1:\n");
8902 debug_rtx (x);
8903 }
8904 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8905 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
8906 opnum, (enum reload_type) type);
8907 *win = 1;
8908 return x;
8909 }
8910
8911 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
8912 if (GET_CODE (x) == LO_SUM
8913 && GET_CODE (XEXP (x, 0)) == HIGH)
8914 {
8915 if (TARGET_DEBUG_ADDR)
8916 {
8917 fprintf (stderr, "\nlegitimize_reload_address push_reload #2:\n");
8918 debug_rtx (x);
8919 }
8920 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8921 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
8922 opnum, (enum reload_type) type);
8923 *win = 1;
8924 return x;
8925 }
8926
8927 #if TARGET_MACHO
8928 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
8929 && GET_CODE (x) == LO_SUM
8930 && GET_CODE (XEXP (x, 0)) == PLUS
8931 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
8932 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
8933 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
8934 && machopic_operand_p (XEXP (x, 1)))
8935 {
8936 /* Result of previous invocation of this function on Darwin
8937 floating point constant. */
8938 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8939 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
8940 opnum, (enum reload_type) type);
8941 *win = 1;
8942 return x;
8943 }
8944 #endif
8945
8946 if (TARGET_CMODEL != CMODEL_SMALL
8947 && reg_offset_p
8948 && !quad_offset_p
8949 && small_toc_ref (x, VOIDmode))
8950 {
8951 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
8952 x = gen_rtx_LO_SUM (Pmode, hi, x);
8953 if (TARGET_DEBUG_ADDR)
8954 {
8955 fprintf (stderr, "\nlegitimize_reload_address push_reload #3:\n");
8956 debug_rtx (x);
8957 }
8958 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8959 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
8960 opnum, (enum reload_type) type);
8961 *win = 1;
8962 return x;
8963 }
8964
8965 if (GET_CODE (x) == PLUS
8966 && REG_P (XEXP (x, 0))
8967 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
8968 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
8969 && CONST_INT_P (XEXP (x, 1))
8970 && reg_offset_p
8971 && (quad_offset_p || !VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
8972 {
8973 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
8974 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
8975 HOST_WIDE_INT high
8976 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8977
8978 /* Check for 32-bit overflow or quad addresses with one of the
8979 four least significant bits set. */
8980 if (high + low != val
8981 || (quad_offset_p && (low & 0xf)))
8982 {
8983 *win = 0;
8984 return x;
8985 }
8986
8987 /* Reload the high part into a base reg; leave the low part
8988 in the mem directly. */
8989
8990 x = gen_rtx_PLUS (GET_MODE (x),
8991 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
8992 GEN_INT (high)),
8993 GEN_INT (low));
8994
8995 if (TARGET_DEBUG_ADDR)
8996 {
8997 fprintf (stderr, "\nlegitimize_reload_address push_reload #4:\n");
8998 debug_rtx (x);
8999 }
9000 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9001 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
9002 opnum, (enum reload_type) type);
9003 *win = 1;
9004 return x;
9005 }
9006
9007 if (GET_CODE (x) == SYMBOL_REF
9008 && reg_offset_p
9009 && !quad_offset_p
9010 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
9011 #if TARGET_MACHO
9012 && DEFAULT_ABI == ABI_DARWIN
9013 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
9014 && machopic_symbol_defined_p (x)
9015 #else
9016 && DEFAULT_ABI == ABI_V4
9017 && !flag_pic
9018 #endif
9019 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
9020 The same goes for DImode without 64-bit gprs and DFmode and DDmode
9021 without fprs.
9022 ??? Assume floating point reg based on mode? This assumption is
9023 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
9024 where reload ends up doing a DFmode load of a constant from
9025 mem using two gprs. Unfortunately, at this point reload
9026 hasn't yet selected regs so poking around in reload data
9027 won't help and even if we could figure out the regs reliably,
9028 we'd still want to allow this transformation when the mem is
9029 naturally aligned. Since we say the address is good here, we
9030 can't disable offsets from LO_SUMs in mem_operand_gpr.
9031 FIXME: Allow offset from lo_sum for other modes too, when
9032 mem is sufficiently aligned.
9033
9034 Also disallow this if the type can go in VMX/Altivec registers, since
9035 those registers do not have d-form (reg+offset) address modes. */
9036 && !reg_addr[mode].scalar_in_vmx_p
9037 && mode != TFmode
9038 && mode != TDmode
9039 && mode != IFmode
9040 && mode != KFmode
9041 && (mode != TImode || !TARGET_VSX)
9042 && mode != PTImode
9043 && (mode != DImode || TARGET_POWERPC64)
9044 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
9045 || TARGET_HARD_FLOAT))
9046 {
9047 #if TARGET_MACHO
9048 if (flag_pic)
9049 {
9050 rtx offset = machopic_gen_offset (x);
9051 x = gen_rtx_LO_SUM (GET_MODE (x),
9052 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
9053 gen_rtx_HIGH (Pmode, offset)), offset);
9054 }
9055 else
9056 #endif
9057 x = gen_rtx_LO_SUM (GET_MODE (x),
9058 gen_rtx_HIGH (Pmode, x), x);
9059
9060 if (TARGET_DEBUG_ADDR)
9061 {
9062 fprintf (stderr, "\nlegitimize_reload_address push_reload #5:\n");
9063 debug_rtx (x);
9064 }
9065 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9066 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9067 opnum, (enum reload_type) type);
9068 *win = 1;
9069 return x;
9070 }
9071
9072 /* Reload an offset address wrapped by an AND that represents the
9073 masking of the lower bits. Strip the outer AND and let reload
9074 convert the offset address into an indirect address. For VSX,
9075 force reload to create the address with an AND in a separate
9076 register, because we can't guarantee an altivec register will
9077 be used. */
9078 if (VECTOR_MEM_ALTIVEC_P (mode)
9079 && GET_CODE (x) == AND
9080 && GET_CODE (XEXP (x, 0)) == PLUS
9081 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
9082 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
9083 && GET_CODE (XEXP (x, 1)) == CONST_INT
9084 && INTVAL (XEXP (x, 1)) == -16)
9085 {
9086 x = XEXP (x, 0);
9087 *win = 1;
9088 return x;
9089 }
9090
9091 if (TARGET_TOC
9092 && reg_offset_p
9093 && !quad_offset_p
9094 && GET_CODE (x) == SYMBOL_REF
9095 && use_toc_relative_ref (x, mode))
9096 {
9097 x = create_TOC_reference (x, NULL_RTX);
9098 if (TARGET_CMODEL != CMODEL_SMALL)
9099 {
9100 if (TARGET_DEBUG_ADDR)
9101 {
9102 fprintf (stderr, "\nlegitimize_reload_address push_reload #6:\n");
9103 debug_rtx (x);
9104 }
9105 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9106 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9107 opnum, (enum reload_type) type);
9108 }
9109 *win = 1;
9110 return x;
9111 }
9112 *win = 0;
9113 return x;
9114 }
9115
9116 /* Debug version of rs6000_legitimize_reload_address. */
9117 static rtx
9118 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
9119 int opnum, int type,
9120 int ind_levels, int *win)
9121 {
9122 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
9123 ind_levels, win);
9124 fprintf (stderr,
9125 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
9126 "type = %d, ind_levels = %d, win = %d, original addr:\n",
9127 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
9128 debug_rtx (x);
9129
9130 if (x == ret)
9131 fprintf (stderr, "Same address returned\n");
9132 else if (!ret)
9133 fprintf (stderr, "NULL returned\n");
9134 else
9135 {
9136 fprintf (stderr, "New address:\n");
9137 debug_rtx (ret);
9138 }
9139
9140 return ret;
9141 }
9142
9143 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9144 that is a valid memory address for an instruction.
9145 The MODE argument is the machine mode for the MEM expression
9146 that wants to use this address.
9147
9148 On the RS/6000, there are four valid address: a SYMBOL_REF that
9149 refers to a constant pool entry of an address (or the sum of it
9150 plus a constant), a short (16-bit signed) constant plus a register,
9151 the sum of two registers, or a register indirect, possibly with an
9152 auto-increment. For DFmode, DDmode and DImode with a constant plus
9153 register, we must ensure that both words are addressable or PowerPC64
9154 with offset word aligned.
9155
9156 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9157 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9158 because adjacent memory cells are accessed by adding word-sized offsets
9159 during assembly output. */
9160 static bool
9161 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
9162 {
9163 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9164 bool quad_offset_p = mode_supports_dq_form (mode);
9165
9166 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
9167 if (VECTOR_MEM_ALTIVEC_P (mode)
9168 && GET_CODE (x) == AND
9169 && GET_CODE (XEXP (x, 1)) == CONST_INT
9170 && INTVAL (XEXP (x, 1)) == -16)
9171 x = XEXP (x, 0);
9172
9173 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
9174 return 0;
9175 if (legitimate_indirect_address_p (x, reg_ok_strict))
9176 return 1;
9177 if (TARGET_UPDATE
9178 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
9179 && mode_supports_pre_incdec_p (mode)
9180 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
9181 return 1;
9182 /* Handle restricted vector d-form offsets in ISA 3.0. */
9183 if (quad_offset_p)
9184 {
9185 if (quad_address_p (x, mode, reg_ok_strict))
9186 return 1;
9187 }
9188 else if (virtual_stack_registers_memory_p (x))
9189 return 1;
9190
9191 else if (reg_offset_p)
9192 {
9193 if (legitimate_small_data_p (mode, x))
9194 return 1;
9195 if (legitimate_constant_pool_address_p (x, mode,
9196 reg_ok_strict || lra_in_progress))
9197 return 1;
9198 }
9199
9200 /* For TImode, if we have TImode in VSX registers, only allow register
9201 indirect addresses. This will allow the values to go in either GPRs
9202 or VSX registers without reloading. The vector types would tend to
9203 go into VSX registers, so we allow REG+REG, while TImode seems
9204 somewhat split, in that some uses are GPR based, and some VSX based. */
9205 /* FIXME: We could loosen this by changing the following to
9206 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
9207 but currently we cannot allow REG+REG addressing for TImode. See
9208 PR72827 for complete details on how this ends up hoodwinking DSE. */
9209 if (mode == TImode && TARGET_VSX)
9210 return 0;
9211 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9212 if (! reg_ok_strict
9213 && reg_offset_p
9214 && GET_CODE (x) == PLUS
9215 && GET_CODE (XEXP (x, 0)) == REG
9216 && (XEXP (x, 0) == virtual_stack_vars_rtx
9217 || XEXP (x, 0) == arg_pointer_rtx)
9218 && GET_CODE (XEXP (x, 1)) == CONST_INT)
9219 return 1;
9220 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
9221 return 1;
9222 if (!FLOAT128_2REG_P (mode)
9223 && (TARGET_HARD_FLOAT
9224 || TARGET_POWERPC64
9225 || (mode != DFmode && mode != DDmode))
9226 && (TARGET_POWERPC64 || mode != DImode)
9227 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
9228 && mode != PTImode
9229 && !avoiding_indexed_address_p (mode)
9230 && legitimate_indexed_address_p (x, reg_ok_strict))
9231 return 1;
9232 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
9233 && mode_supports_pre_modify_p (mode)
9234 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
9235 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
9236 reg_ok_strict, false)
9237 || (!avoiding_indexed_address_p (mode)
9238 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
9239 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
9240 return 1;
9241 if (reg_offset_p && !quad_offset_p
9242 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
9243 return 1;
9244 return 0;
9245 }
9246
9247 /* Debug version of rs6000_legitimate_address_p. */
9248 static bool
9249 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
9250 bool reg_ok_strict)
9251 {
9252 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
9253 fprintf (stderr,
9254 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9255 "strict = %d, reload = %s, code = %s\n",
9256 ret ? "true" : "false",
9257 GET_MODE_NAME (mode),
9258 reg_ok_strict,
9259 (reload_completed ? "after" : "before"),
9260 GET_RTX_NAME (GET_CODE (x)));
9261 debug_rtx (x);
9262
9263 return ret;
9264 }
9265
9266 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
9267
9268 static bool
9269 rs6000_mode_dependent_address_p (const_rtx addr,
9270 addr_space_t as ATTRIBUTE_UNUSED)
9271 {
9272 return rs6000_mode_dependent_address_ptr (addr);
9273 }
9274
9275 /* Go to LABEL if ADDR (a legitimate address expression)
9276 has an effect that depends on the machine mode it is used for.
9277
9278 On the RS/6000 this is true of all integral offsets (since AltiVec
9279 and VSX modes don't allow them) or is a pre-increment or decrement.
9280
9281 ??? Except that due to conceptual problems in offsettable_address_p
9282 we can't really report the problems of integral offsets. So leave
9283 this assuming that the adjustable offset must be valid for the
9284 sub-words of a TFmode operand, which is what we had before. */
9285
9286 static bool
9287 rs6000_mode_dependent_address (const_rtx addr)
9288 {
9289 switch (GET_CODE (addr))
9290 {
9291 case PLUS:
9292 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
9293 is considered a legitimate address before reload, so there
9294 are no offset restrictions in that case. Note that this
9295 condition is safe in strict mode because any address involving
9296 virtual_stack_vars_rtx or arg_pointer_rtx would already have
9297 been rejected as illegitimate. */
9298 if (XEXP (addr, 0) != virtual_stack_vars_rtx
9299 && XEXP (addr, 0) != arg_pointer_rtx
9300 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
9301 {
9302 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
9303 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
9304 }
9305 break;
9306
9307 case LO_SUM:
9308 /* Anything in the constant pool is sufficiently aligned that
9309 all bytes have the same high part address. */
9310 return !legitimate_constant_pool_address_p (addr, QImode, false);
9311
9312 /* Auto-increment cases are now treated generically in recog.c. */
9313 case PRE_MODIFY:
9314 return TARGET_UPDATE;
9315
9316 /* AND is only allowed in Altivec loads. */
9317 case AND:
9318 return true;
9319
9320 default:
9321 break;
9322 }
9323
9324 return false;
9325 }
9326
9327 /* Debug version of rs6000_mode_dependent_address. */
9328 static bool
9329 rs6000_debug_mode_dependent_address (const_rtx addr)
9330 {
9331 bool ret = rs6000_mode_dependent_address (addr);
9332
9333 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
9334 ret ? "true" : "false");
9335 debug_rtx (addr);
9336
9337 return ret;
9338 }
9339
9340 /* Implement FIND_BASE_TERM. */
9341
9342 rtx
9343 rs6000_find_base_term (rtx op)
9344 {
9345 rtx base;
9346
9347 base = op;
9348 if (GET_CODE (base) == CONST)
9349 base = XEXP (base, 0);
9350 if (GET_CODE (base) == PLUS)
9351 base = XEXP (base, 0);
9352 if (GET_CODE (base) == UNSPEC)
9353 switch (XINT (base, 1))
9354 {
9355 case UNSPEC_TOCREL:
9356 case UNSPEC_MACHOPIC_OFFSET:
9357 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
9358 for aliasing purposes. */
9359 return XVECEXP (base, 0, 0);
9360 }
9361
9362 return op;
9363 }
9364
9365 /* More elaborate version of recog's offsettable_memref_p predicate
9366 that works around the ??? note of rs6000_mode_dependent_address.
9367 In particular it accepts
9368
9369 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9370
9371 in 32-bit mode, that the recog predicate rejects. */
9372
9373 static bool
9374 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
9375 {
9376 bool worst_case;
9377
9378 if (!MEM_P (op))
9379 return false;
9380
9381 /* First mimic offsettable_memref_p. */
9382 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
9383 return true;
9384
9385 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
9386 the latter predicate knows nothing about the mode of the memory
9387 reference and, therefore, assumes that it is the largest supported
9388 mode (TFmode). As a consequence, legitimate offsettable memory
9389 references are rejected. rs6000_legitimate_offset_address_p contains
9390 the correct logic for the PLUS case of rs6000_mode_dependent_address,
9391 at least with a little bit of help here given that we know the
9392 actual registers used. */
9393 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
9394 || GET_MODE_SIZE (reg_mode) == 4);
9395 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
9396 strict, worst_case);
9397 }
9398
9399 /* Determine the reassociation width to be used in reassociate_bb.
9400 This takes into account how many parallel operations we
9401 can actually do of a given type, and also the latency.
9402 P8:
9403 int add/sub 6/cycle
9404 mul 2/cycle
9405 vect add/sub/mul 2/cycle
9406 fp add/sub/mul 2/cycle
9407 dfp 1/cycle
9408 */
9409
9410 static int
9411 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
9412 machine_mode mode)
9413 {
9414 switch (rs6000_tune)
9415 {
9416 case PROCESSOR_POWER8:
9417 case PROCESSOR_POWER9:
9418 if (DECIMAL_FLOAT_MODE_P (mode))
9419 return 1;
9420 if (VECTOR_MODE_P (mode))
9421 return 4;
9422 if (INTEGRAL_MODE_P (mode))
9423 return 1;
9424 if (FLOAT_MODE_P (mode))
9425 return 4;
9426 break;
9427 default:
9428 break;
9429 }
9430 return 1;
9431 }
9432
9433 /* Change register usage conditional on target flags. */
9434 static void
9435 rs6000_conditional_register_usage (void)
9436 {
9437 int i;
9438
9439 if (TARGET_DEBUG_TARGET)
9440 fprintf (stderr, "rs6000_conditional_register_usage called\n");
9441
9442 /* Set MQ register fixed (already call_used) so that it will not be
9443 allocated. */
9444 fixed_regs[64] = 1;
9445
9446 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
9447 if (TARGET_64BIT)
9448 fixed_regs[13] = call_used_regs[13]
9449 = call_really_used_regs[13] = 1;
9450
9451 /* Conditionally disable FPRs. */
9452 if (TARGET_SOFT_FLOAT)
9453 for (i = 32; i < 64; i++)
9454 fixed_regs[i] = call_used_regs[i]
9455 = call_really_used_regs[i] = 1;
9456
9457 /* The TOC register is not killed across calls in a way that is
9458 visible to the compiler. */
9459 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9460 call_really_used_regs[2] = 0;
9461
9462 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
9463 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9464
9465 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
9466 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9467 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9468 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9469
9470 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
9471 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9472 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9473 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9474
9475 if (TARGET_TOC && TARGET_MINIMAL_TOC)
9476 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9477 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9478
9479 if (!TARGET_ALTIVEC && !TARGET_VSX)
9480 {
9481 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
9482 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
9483 call_really_used_regs[VRSAVE_REGNO] = 1;
9484 }
9485
9486 if (TARGET_ALTIVEC || TARGET_VSX)
9487 global_regs[VSCR_REGNO] = 1;
9488
9489 if (TARGET_ALTIVEC_ABI)
9490 {
9491 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
9492 call_used_regs[i] = call_really_used_regs[i] = 1;
9493
9494 /* AIX reserves VR20:31 in non-extended ABI mode. */
9495 if (TARGET_XCOFF)
9496 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
9497 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
9498 }
9499 }
9500
9501 \f
9502 /* Output insns to set DEST equal to the constant SOURCE as a series of
9503 lis, ori and shl instructions and return TRUE. */
9504
9505 bool
9506 rs6000_emit_set_const (rtx dest, rtx source)
9507 {
9508 machine_mode mode = GET_MODE (dest);
9509 rtx temp, set;
9510 rtx_insn *insn;
9511 HOST_WIDE_INT c;
9512
9513 gcc_checking_assert (CONST_INT_P (source));
9514 c = INTVAL (source);
9515 switch (mode)
9516 {
9517 case E_QImode:
9518 case E_HImode:
9519 emit_insn (gen_rtx_SET (dest, source));
9520 return true;
9521
9522 case E_SImode:
9523 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
9524
9525 emit_insn (gen_rtx_SET (copy_rtx (temp),
9526 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
9527 emit_insn (gen_rtx_SET (dest,
9528 gen_rtx_IOR (SImode, copy_rtx (temp),
9529 GEN_INT (c & 0xffff))));
9530 break;
9531
9532 case E_DImode:
9533 if (!TARGET_POWERPC64)
9534 {
9535 rtx hi, lo;
9536
9537 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
9538 DImode);
9539 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
9540 DImode);
9541 emit_move_insn (hi, GEN_INT (c >> 32));
9542 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
9543 emit_move_insn (lo, GEN_INT (c));
9544 }
9545 else
9546 rs6000_emit_set_long_const (dest, c);
9547 break;
9548
9549 default:
9550 gcc_unreachable ();
9551 }
9552
9553 insn = get_last_insn ();
9554 set = single_set (insn);
9555 if (! CONSTANT_P (SET_SRC (set)))
9556 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
9557
9558 return true;
9559 }
9560
9561 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
9562 Output insns to set DEST equal to the constant C as a series of
9563 lis, ori and shl instructions. */
9564
9565 static void
9566 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
9567 {
9568 rtx temp;
9569 HOST_WIDE_INT ud1, ud2, ud3, ud4;
9570
9571 ud1 = c & 0xffff;
9572 c = c >> 16;
9573 ud2 = c & 0xffff;
9574 c = c >> 16;
9575 ud3 = c & 0xffff;
9576 c = c >> 16;
9577 ud4 = c & 0xffff;
9578
9579 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
9580 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
9581 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
9582
9583 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
9584 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
9585 {
9586 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9587
9588 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9589 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9590 if (ud1 != 0)
9591 emit_move_insn (dest,
9592 gen_rtx_IOR (DImode, copy_rtx (temp),
9593 GEN_INT (ud1)));
9594 }
9595 else if (ud3 == 0 && ud4 == 0)
9596 {
9597 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9598
9599 gcc_assert (ud2 & 0x8000);
9600 emit_move_insn (copy_rtx (temp),
9601 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9602 if (ud1 != 0)
9603 emit_move_insn (copy_rtx (temp),
9604 gen_rtx_IOR (DImode, copy_rtx (temp),
9605 GEN_INT (ud1)));
9606 emit_move_insn (dest,
9607 gen_rtx_ZERO_EXTEND (DImode,
9608 gen_lowpart (SImode,
9609 copy_rtx (temp))));
9610 }
9611 else if ((ud4 == 0xffff && (ud3 & 0x8000))
9612 || (ud4 == 0 && ! (ud3 & 0x8000)))
9613 {
9614 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9615
9616 emit_move_insn (copy_rtx (temp),
9617 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
9618 if (ud2 != 0)
9619 emit_move_insn (copy_rtx (temp),
9620 gen_rtx_IOR (DImode, copy_rtx (temp),
9621 GEN_INT (ud2)));
9622 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9623 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9624 GEN_INT (16)));
9625 if (ud1 != 0)
9626 emit_move_insn (dest,
9627 gen_rtx_IOR (DImode, copy_rtx (temp),
9628 GEN_INT (ud1)));
9629 }
9630 else
9631 {
9632 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9633
9634 emit_move_insn (copy_rtx (temp),
9635 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
9636 if (ud3 != 0)
9637 emit_move_insn (copy_rtx (temp),
9638 gen_rtx_IOR (DImode, copy_rtx (temp),
9639 GEN_INT (ud3)));
9640
9641 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
9642 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9643 GEN_INT (32)));
9644 if (ud2 != 0)
9645 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9646 gen_rtx_IOR (DImode, copy_rtx (temp),
9647 GEN_INT (ud2 << 16)));
9648 if (ud1 != 0)
9649 emit_move_insn (dest,
9650 gen_rtx_IOR (DImode, copy_rtx (temp),
9651 GEN_INT (ud1)));
9652 }
9653 }
9654
9655 /* Helper for the following. Get rid of [r+r] memory refs
9656 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
9657
9658 static void
9659 rs6000_eliminate_indexed_memrefs (rtx operands[2])
9660 {
9661 if (GET_CODE (operands[0]) == MEM
9662 && GET_CODE (XEXP (operands[0], 0)) != REG
9663 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
9664 GET_MODE (operands[0]), false))
9665 operands[0]
9666 = replace_equiv_address (operands[0],
9667 copy_addr_to_reg (XEXP (operands[0], 0)));
9668
9669 if (GET_CODE (operands[1]) == MEM
9670 && GET_CODE (XEXP (operands[1], 0)) != REG
9671 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
9672 GET_MODE (operands[1]), false))
9673 operands[1]
9674 = replace_equiv_address (operands[1],
9675 copy_addr_to_reg (XEXP (operands[1], 0)));
9676 }
9677
9678 /* Generate a vector of constants to permute MODE for a little-endian
9679 storage operation by swapping the two halves of a vector. */
9680 static rtvec
9681 rs6000_const_vec (machine_mode mode)
9682 {
9683 int i, subparts;
9684 rtvec v;
9685
9686 switch (mode)
9687 {
9688 case E_V1TImode:
9689 subparts = 1;
9690 break;
9691 case E_V2DFmode:
9692 case E_V2DImode:
9693 subparts = 2;
9694 break;
9695 case E_V4SFmode:
9696 case E_V4SImode:
9697 subparts = 4;
9698 break;
9699 case E_V8HImode:
9700 subparts = 8;
9701 break;
9702 case E_V16QImode:
9703 subparts = 16;
9704 break;
9705 default:
9706 gcc_unreachable();
9707 }
9708
9709 v = rtvec_alloc (subparts);
9710
9711 for (i = 0; i < subparts / 2; ++i)
9712 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
9713 for (i = subparts / 2; i < subparts; ++i)
9714 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
9715
9716 return v;
9717 }
9718
9719 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
9720 store operation. */
9721 void
9722 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
9723 {
9724 /* Scalar permutations are easier to express in integer modes rather than
9725 floating-point modes, so cast them here. We use V1TImode instead
9726 of TImode to ensure that the values don't go through GPRs. */
9727 if (FLOAT128_VECTOR_P (mode))
9728 {
9729 dest = gen_lowpart (V1TImode, dest);
9730 source = gen_lowpart (V1TImode, source);
9731 mode = V1TImode;
9732 }
9733
9734 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
9735 scalar. */
9736 if (mode == TImode || mode == V1TImode)
9737 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
9738 GEN_INT (64))));
9739 else
9740 {
9741 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
9742 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
9743 }
9744 }
9745
9746 /* Emit a little-endian load from vector memory location SOURCE to VSX
9747 register DEST in mode MODE. The load is done with two permuting
9748 insn's that represent an lxvd2x and xxpermdi. */
9749 void
9750 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
9751 {
9752 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
9753 V1TImode). */
9754 if (mode == TImode || mode == V1TImode)
9755 {
9756 mode = V2DImode;
9757 dest = gen_lowpart (V2DImode, dest);
9758 source = adjust_address (source, V2DImode, 0);
9759 }
9760
9761 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
9762 rs6000_emit_le_vsx_permute (tmp, source, mode);
9763 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9764 }
9765
9766 /* Emit a little-endian store to vector memory location DEST from VSX
9767 register SOURCE in mode MODE. The store is done with two permuting
9768 insn's that represent an xxpermdi and an stxvd2x. */
9769 void
9770 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
9771 {
9772 /* This should never be called during or after LRA, because it does
9773 not re-permute the source register. It is intended only for use
9774 during expand. */
9775 gcc_assert (!lra_in_progress && !reload_completed);
9776
9777 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
9778 V1TImode). */
9779 if (mode == TImode || mode == V1TImode)
9780 {
9781 mode = V2DImode;
9782 dest = adjust_address (dest, V2DImode, 0);
9783 source = gen_lowpart (V2DImode, source);
9784 }
9785
9786 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
9787 rs6000_emit_le_vsx_permute (tmp, source, mode);
9788 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9789 }
9790
9791 /* Emit a sequence representing a little-endian VSX load or store,
9792 moving data from SOURCE to DEST in mode MODE. This is done
9793 separately from rs6000_emit_move to ensure it is called only
9794 during expand. LE VSX loads and stores introduced later are
9795 handled with a split. The expand-time RTL generation allows
9796 us to optimize away redundant pairs of register-permutes. */
9797 void
9798 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
9799 {
9800 gcc_assert (!BYTES_BIG_ENDIAN
9801 && VECTOR_MEM_VSX_P (mode)
9802 && !TARGET_P9_VECTOR
9803 && !gpr_or_gpr_p (dest, source)
9804 && (MEM_P (source) ^ MEM_P (dest)));
9805
9806 if (MEM_P (source))
9807 {
9808 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
9809 rs6000_emit_le_vsx_load (dest, source, mode);
9810 }
9811 else
9812 {
9813 if (!REG_P (source))
9814 source = force_reg (mode, source);
9815 rs6000_emit_le_vsx_store (dest, source, mode);
9816 }
9817 }
9818
9819 /* Return whether a SFmode or SImode move can be done without converting one
9820 mode to another. This arrises when we have:
9821
9822 (SUBREG:SF (REG:SI ...))
9823 (SUBREG:SI (REG:SF ...))
9824
9825 and one of the values is in a floating point/vector register, where SFmode
9826 scalars are stored in DFmode format. */
9827
9828 bool
9829 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
9830 {
9831 if (TARGET_ALLOW_SF_SUBREG)
9832 return true;
9833
9834 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
9835 return true;
9836
9837 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
9838 return true;
9839
9840 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
9841 if (SUBREG_P (dest))
9842 {
9843 rtx dest_subreg = SUBREG_REG (dest);
9844 rtx src_subreg = SUBREG_REG (src);
9845 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
9846 }
9847
9848 return false;
9849 }
9850
9851
9852 /* Helper function to change moves with:
9853
9854 (SUBREG:SF (REG:SI)) and
9855 (SUBREG:SI (REG:SF))
9856
9857 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
9858 values are stored as DFmode values in the VSX registers. We need to convert
9859 the bits before we can use a direct move or operate on the bits in the
9860 vector register as an integer type.
9861
9862 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
9863
9864 static bool
9865 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
9866 {
9867 if (TARGET_DIRECT_MOVE_64BIT && !lra_in_progress && !reload_completed
9868 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
9869 && SUBREG_P (source) && sf_subreg_operand (source, mode))
9870 {
9871 rtx inner_source = SUBREG_REG (source);
9872 machine_mode inner_mode = GET_MODE (inner_source);
9873
9874 if (mode == SImode && inner_mode == SFmode)
9875 {
9876 emit_insn (gen_movsi_from_sf (dest, inner_source));
9877 return true;
9878 }
9879
9880 if (mode == SFmode && inner_mode == SImode)
9881 {
9882 emit_insn (gen_movsf_from_si (dest, inner_source));
9883 return true;
9884 }
9885 }
9886
9887 return false;
9888 }
9889
9890 /* Emit a move from SOURCE to DEST in mode MODE. */
9891 void
9892 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
9893 {
9894 rtx operands[2];
9895 operands[0] = dest;
9896 operands[1] = source;
9897
9898 if (TARGET_DEBUG_ADDR)
9899 {
9900 fprintf (stderr,
9901 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
9902 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
9903 GET_MODE_NAME (mode),
9904 lra_in_progress,
9905 reload_completed,
9906 can_create_pseudo_p ());
9907 debug_rtx (dest);
9908 fprintf (stderr, "source:\n");
9909 debug_rtx (source);
9910 }
9911
9912 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
9913 if (CONST_WIDE_INT_P (operands[1])
9914 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
9915 {
9916 /* This should be fixed with the introduction of CONST_WIDE_INT. */
9917 gcc_unreachable ();
9918 }
9919
9920 #ifdef HAVE_AS_GNU_ATTRIBUTE
9921 /* If we use a long double type, set the flags in .gnu_attribute that say
9922 what the long double type is. This is to allow the linker's warning
9923 message for the wrong long double to be useful, even if the function does
9924 not do a call (for example, doing a 128-bit add on power9 if the long
9925 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
9926 used if they aren't the default long dobule type. */
9927 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
9928 {
9929 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
9930 rs6000_passes_float = rs6000_passes_long_double = true;
9931
9932 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
9933 rs6000_passes_float = rs6000_passes_long_double = true;
9934 }
9935 #endif
9936
9937 /* See if we need to special case SImode/SFmode SUBREG moves. */
9938 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
9939 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
9940 return;
9941
9942 /* Check if GCC is setting up a block move that will end up using FP
9943 registers as temporaries. We must make sure this is acceptable. */
9944 if (GET_CODE (operands[0]) == MEM
9945 && GET_CODE (operands[1]) == MEM
9946 && mode == DImode
9947 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
9948 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
9949 && ! (rs6000_slow_unaligned_access (SImode,
9950 (MEM_ALIGN (operands[0]) > 32
9951 ? 32 : MEM_ALIGN (operands[0])))
9952 || rs6000_slow_unaligned_access (SImode,
9953 (MEM_ALIGN (operands[1]) > 32
9954 ? 32 : MEM_ALIGN (operands[1]))))
9955 && ! MEM_VOLATILE_P (operands [0])
9956 && ! MEM_VOLATILE_P (operands [1]))
9957 {
9958 emit_move_insn (adjust_address (operands[0], SImode, 0),
9959 adjust_address (operands[1], SImode, 0));
9960 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
9961 adjust_address (copy_rtx (operands[1]), SImode, 4));
9962 return;
9963 }
9964
9965 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
9966 && !gpc_reg_operand (operands[1], mode))
9967 operands[1] = force_reg (mode, operands[1]);
9968
9969 /* Recognize the case where operand[1] is a reference to thread-local
9970 data and load its address to a register. */
9971 if (tls_referenced_p (operands[1]))
9972 {
9973 enum tls_model model;
9974 rtx tmp = operands[1];
9975 rtx addend = NULL;
9976
9977 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
9978 {
9979 addend = XEXP (XEXP (tmp, 0), 1);
9980 tmp = XEXP (XEXP (tmp, 0), 0);
9981 }
9982
9983 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
9984 model = SYMBOL_REF_TLS_MODEL (tmp);
9985 gcc_assert (model != 0);
9986
9987 tmp = rs6000_legitimize_tls_address (tmp, model);
9988 if (addend)
9989 {
9990 tmp = gen_rtx_PLUS (mode, tmp, addend);
9991 tmp = force_operand (tmp, operands[0]);
9992 }
9993 operands[1] = tmp;
9994 }
9995
9996 /* 128-bit constant floating-point values on Darwin should really be loaded
9997 as two parts. However, this premature splitting is a problem when DFmode
9998 values can go into Altivec registers. */
9999 if (FLOAT128_IBM_P (mode) && !reg_addr[DFmode].scalar_in_vmx_p
10000 && GET_CODE (operands[1]) == CONST_DOUBLE)
10001 {
10002 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
10003 simplify_gen_subreg (DFmode, operands[1], mode, 0),
10004 DFmode);
10005 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
10006 GET_MODE_SIZE (DFmode)),
10007 simplify_gen_subreg (DFmode, operands[1], mode,
10008 GET_MODE_SIZE (DFmode)),
10009 DFmode);
10010 return;
10011 }
10012
10013 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10014 p1:SD) if p1 is not of floating point class and p0 is spilled as
10015 we can have no analogous movsd_store for this. */
10016 if (lra_in_progress && mode == DDmode
10017 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
10018 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10019 && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
10020 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
10021 {
10022 enum reg_class cl;
10023 int regno = REGNO (SUBREG_REG (operands[1]));
10024
10025 if (regno >= FIRST_PSEUDO_REGISTER)
10026 {
10027 cl = reg_preferred_class (regno);
10028 regno = reg_renumber[regno];
10029 if (regno < 0)
10030 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
10031 }
10032 if (regno >= 0 && ! FP_REGNO_P (regno))
10033 {
10034 mode = SDmode;
10035 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
10036 operands[1] = SUBREG_REG (operands[1]);
10037 }
10038 }
10039 if (lra_in_progress
10040 && mode == SDmode
10041 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
10042 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10043 && (REG_P (operands[1])
10044 || (GET_CODE (operands[1]) == SUBREG
10045 && REG_P (SUBREG_REG (operands[1])))))
10046 {
10047 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
10048 ? SUBREG_REG (operands[1]) : operands[1]);
10049 enum reg_class cl;
10050
10051 if (regno >= FIRST_PSEUDO_REGISTER)
10052 {
10053 cl = reg_preferred_class (regno);
10054 gcc_assert (cl != NO_REGS);
10055 regno = reg_renumber[regno];
10056 if (regno < 0)
10057 regno = ira_class_hard_regs[cl][0];
10058 }
10059 if (FP_REGNO_P (regno))
10060 {
10061 if (GET_MODE (operands[0]) != DDmode)
10062 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
10063 emit_insn (gen_movsd_store (operands[0], operands[1]));
10064 }
10065 else if (INT_REGNO_P (regno))
10066 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10067 else
10068 gcc_unreachable();
10069 return;
10070 }
10071 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10072 p:DD)) if p0 is not of floating point class and p1 is spilled as
10073 we can have no analogous movsd_load for this. */
10074 if (lra_in_progress && mode == DDmode
10075 && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
10076 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
10077 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
10078 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10079 {
10080 enum reg_class cl;
10081 int regno = REGNO (SUBREG_REG (operands[0]));
10082
10083 if (regno >= FIRST_PSEUDO_REGISTER)
10084 {
10085 cl = reg_preferred_class (regno);
10086 regno = reg_renumber[regno];
10087 if (regno < 0)
10088 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
10089 }
10090 if (regno >= 0 && ! FP_REGNO_P (regno))
10091 {
10092 mode = SDmode;
10093 operands[0] = SUBREG_REG (operands[0]);
10094 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
10095 }
10096 }
10097 if (lra_in_progress
10098 && mode == SDmode
10099 && (REG_P (operands[0])
10100 || (GET_CODE (operands[0]) == SUBREG
10101 && REG_P (SUBREG_REG (operands[0]))))
10102 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
10103 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10104 {
10105 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
10106 ? SUBREG_REG (operands[0]) : operands[0]);
10107 enum reg_class cl;
10108
10109 if (regno >= FIRST_PSEUDO_REGISTER)
10110 {
10111 cl = reg_preferred_class (regno);
10112 gcc_assert (cl != NO_REGS);
10113 regno = reg_renumber[regno];
10114 if (regno < 0)
10115 regno = ira_class_hard_regs[cl][0];
10116 }
10117 if (FP_REGNO_P (regno))
10118 {
10119 if (GET_MODE (operands[1]) != DDmode)
10120 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
10121 emit_insn (gen_movsd_load (operands[0], operands[1]));
10122 }
10123 else if (INT_REGNO_P (regno))
10124 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10125 else
10126 gcc_unreachable();
10127 return;
10128 }
10129
10130 /* FIXME: In the long term, this switch statement should go away
10131 and be replaced by a sequence of tests based on things like
10132 mode == Pmode. */
10133 switch (mode)
10134 {
10135 case E_HImode:
10136 case E_QImode:
10137 if (CONSTANT_P (operands[1])
10138 && GET_CODE (operands[1]) != CONST_INT)
10139 operands[1] = force_const_mem (mode, operands[1]);
10140 break;
10141
10142 case E_TFmode:
10143 case E_TDmode:
10144 case E_IFmode:
10145 case E_KFmode:
10146 if (FLOAT128_2REG_P (mode))
10147 rs6000_eliminate_indexed_memrefs (operands);
10148 /* fall through */
10149
10150 case E_DFmode:
10151 case E_DDmode:
10152 case E_SFmode:
10153 case E_SDmode:
10154 if (CONSTANT_P (operands[1])
10155 && ! easy_fp_constant (operands[1], mode))
10156 operands[1] = force_const_mem (mode, operands[1]);
10157 break;
10158
10159 case E_V16QImode:
10160 case E_V8HImode:
10161 case E_V4SFmode:
10162 case E_V4SImode:
10163 case E_V2DFmode:
10164 case E_V2DImode:
10165 case E_V1TImode:
10166 if (CONSTANT_P (operands[1])
10167 && !easy_vector_constant (operands[1], mode))
10168 operands[1] = force_const_mem (mode, operands[1]);
10169 break;
10170
10171 case E_SImode:
10172 case E_DImode:
10173 /* Use default pattern for address of ELF small data */
10174 if (TARGET_ELF
10175 && mode == Pmode
10176 && DEFAULT_ABI == ABI_V4
10177 && (GET_CODE (operands[1]) == SYMBOL_REF
10178 || GET_CODE (operands[1]) == CONST)
10179 && small_data_operand (operands[1], mode))
10180 {
10181 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10182 return;
10183 }
10184
10185 if (DEFAULT_ABI == ABI_V4
10186 && mode == Pmode && mode == SImode
10187 && flag_pic == 1 && got_operand (operands[1], mode))
10188 {
10189 emit_insn (gen_movsi_got (operands[0], operands[1]));
10190 return;
10191 }
10192
10193 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
10194 && TARGET_NO_TOC
10195 && ! flag_pic
10196 && mode == Pmode
10197 && CONSTANT_P (operands[1])
10198 && GET_CODE (operands[1]) != HIGH
10199 && GET_CODE (operands[1]) != CONST_INT)
10200 {
10201 rtx target = (!can_create_pseudo_p ()
10202 ? operands[0]
10203 : gen_reg_rtx (mode));
10204
10205 /* If this is a function address on -mcall-aixdesc,
10206 convert it to the address of the descriptor. */
10207 if (DEFAULT_ABI == ABI_AIX
10208 && GET_CODE (operands[1]) == SYMBOL_REF
10209 && XSTR (operands[1], 0)[0] == '.')
10210 {
10211 const char *name = XSTR (operands[1], 0);
10212 rtx new_ref;
10213 while (*name == '.')
10214 name++;
10215 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
10216 CONSTANT_POOL_ADDRESS_P (new_ref)
10217 = CONSTANT_POOL_ADDRESS_P (operands[1]);
10218 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
10219 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
10220 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
10221 operands[1] = new_ref;
10222 }
10223
10224 if (DEFAULT_ABI == ABI_DARWIN)
10225 {
10226 #if TARGET_MACHO
10227 if (MACHO_DYNAMIC_NO_PIC_P)
10228 {
10229 /* Take care of any required data indirection. */
10230 operands[1] = rs6000_machopic_legitimize_pic_address (
10231 operands[1], mode, operands[0]);
10232 if (operands[0] != operands[1])
10233 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10234 return;
10235 }
10236 #endif
10237 emit_insn (gen_macho_high (target, operands[1]));
10238 emit_insn (gen_macho_low (operands[0], target, operands[1]));
10239 return;
10240 }
10241
10242 emit_insn (gen_elf_high (target, operands[1]));
10243 emit_insn (gen_elf_low (operands[0], target, operands[1]));
10244 return;
10245 }
10246
10247 /* If this is a SYMBOL_REF that refers to a constant pool entry,
10248 and we have put it in the TOC, we just need to make a TOC-relative
10249 reference to it. */
10250 if (TARGET_TOC
10251 && GET_CODE (operands[1]) == SYMBOL_REF
10252 && use_toc_relative_ref (operands[1], mode))
10253 operands[1] = create_TOC_reference (operands[1], operands[0]);
10254 else if (mode == Pmode
10255 && CONSTANT_P (operands[1])
10256 && GET_CODE (operands[1]) != HIGH
10257 && ((GET_CODE (operands[1]) != CONST_INT
10258 && ! easy_fp_constant (operands[1], mode))
10259 || (GET_CODE (operands[1]) == CONST_INT
10260 && (num_insns_constant (operands[1], mode)
10261 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
10262 || (GET_CODE (operands[0]) == REG
10263 && FP_REGNO_P (REGNO (operands[0]))))
10264 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
10265 && (TARGET_CMODEL == CMODEL_SMALL
10266 || can_create_pseudo_p ()
10267 || (REG_P (operands[0])
10268 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
10269 {
10270
10271 #if TARGET_MACHO
10272 /* Darwin uses a special PIC legitimizer. */
10273 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
10274 {
10275 operands[1] =
10276 rs6000_machopic_legitimize_pic_address (operands[1], mode,
10277 operands[0]);
10278 if (operands[0] != operands[1])
10279 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10280 return;
10281 }
10282 #endif
10283
10284 /* If we are to limit the number of things we put in the TOC and
10285 this is a symbol plus a constant we can add in one insn,
10286 just put the symbol in the TOC and add the constant. */
10287 if (GET_CODE (operands[1]) == CONST
10288 && TARGET_NO_SUM_IN_TOC
10289 && GET_CODE (XEXP (operands[1], 0)) == PLUS
10290 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
10291 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
10292 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
10293 && ! side_effects_p (operands[0]))
10294 {
10295 rtx sym =
10296 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
10297 rtx other = XEXP (XEXP (operands[1], 0), 1);
10298
10299 sym = force_reg (mode, sym);
10300 emit_insn (gen_add3_insn (operands[0], sym, other));
10301 return;
10302 }
10303
10304 operands[1] = force_const_mem (mode, operands[1]);
10305
10306 if (TARGET_TOC
10307 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10308 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
10309 {
10310 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
10311 operands[0]);
10312 operands[1] = gen_const_mem (mode, tocref);
10313 set_mem_alias_set (operands[1], get_TOC_alias_set ());
10314 }
10315 }
10316 break;
10317
10318 case E_TImode:
10319 if (!VECTOR_MEM_VSX_P (TImode))
10320 rs6000_eliminate_indexed_memrefs (operands);
10321 break;
10322
10323 case E_PTImode:
10324 rs6000_eliminate_indexed_memrefs (operands);
10325 break;
10326
10327 default:
10328 fatal_insn ("bad move", gen_rtx_SET (dest, source));
10329 }
10330
10331 /* Above, we may have called force_const_mem which may have returned
10332 an invalid address. If we can, fix this up; otherwise, reload will
10333 have to deal with it. */
10334 if (GET_CODE (operands[1]) == MEM)
10335 operands[1] = validize_mem (operands[1]);
10336
10337 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10338 }
10339 \f
10340 /* Nonzero if we can use a floating-point register to pass this arg. */
10341 #define USE_FP_FOR_ARG_P(CUM,MODE) \
10342 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
10343 && (CUM)->fregno <= FP_ARG_MAX_REG \
10344 && TARGET_HARD_FLOAT)
10345
10346 /* Nonzero if we can use an AltiVec register to pass this arg. */
10347 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
10348 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
10349 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
10350 && TARGET_ALTIVEC_ABI \
10351 && (NAMED))
10352
10353 /* Walk down the type tree of TYPE counting consecutive base elements.
10354 If *MODEP is VOIDmode, then set it to the first valid floating point
10355 or vector type. If a non-floating point or vector type is found, or
10356 if a floating point or vector type that doesn't match a non-VOIDmode
10357 *MODEP is found, then return -1, otherwise return the count in the
10358 sub-tree. */
10359
10360 static int
10361 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
10362 {
10363 machine_mode mode;
10364 HOST_WIDE_INT size;
10365
10366 switch (TREE_CODE (type))
10367 {
10368 case REAL_TYPE:
10369 mode = TYPE_MODE (type);
10370 if (!SCALAR_FLOAT_MODE_P (mode))
10371 return -1;
10372
10373 if (*modep == VOIDmode)
10374 *modep = mode;
10375
10376 if (*modep == mode)
10377 return 1;
10378
10379 break;
10380
10381 case COMPLEX_TYPE:
10382 mode = TYPE_MODE (TREE_TYPE (type));
10383 if (!SCALAR_FLOAT_MODE_P (mode))
10384 return -1;
10385
10386 if (*modep == VOIDmode)
10387 *modep = mode;
10388
10389 if (*modep == mode)
10390 return 2;
10391
10392 break;
10393
10394 case VECTOR_TYPE:
10395 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
10396 return -1;
10397
10398 /* Use V4SImode as representative of all 128-bit vector types. */
10399 size = int_size_in_bytes (type);
10400 switch (size)
10401 {
10402 case 16:
10403 mode = V4SImode;
10404 break;
10405 default:
10406 return -1;
10407 }
10408
10409 if (*modep == VOIDmode)
10410 *modep = mode;
10411
10412 /* Vector modes are considered to be opaque: two vectors are
10413 equivalent for the purposes of being homogeneous aggregates
10414 if they are the same size. */
10415 if (*modep == mode)
10416 return 1;
10417
10418 break;
10419
10420 case ARRAY_TYPE:
10421 {
10422 int count;
10423 tree index = TYPE_DOMAIN (type);
10424
10425 /* Can't handle incomplete types nor sizes that are not
10426 fixed. */
10427 if (!COMPLETE_TYPE_P (type)
10428 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10429 return -1;
10430
10431 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
10432 if (count == -1
10433 || !index
10434 || !TYPE_MAX_VALUE (index)
10435 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
10436 || !TYPE_MIN_VALUE (index)
10437 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
10438 || count < 0)
10439 return -1;
10440
10441 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
10442 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
10443
10444 /* There must be no padding. */
10445 if (wi::to_wide (TYPE_SIZE (type))
10446 != count * GET_MODE_BITSIZE (*modep))
10447 return -1;
10448
10449 return count;
10450 }
10451
10452 case RECORD_TYPE:
10453 {
10454 int count = 0;
10455 int sub_count;
10456 tree field;
10457
10458 /* Can't handle incomplete types nor sizes that are not
10459 fixed. */
10460 if (!COMPLETE_TYPE_P (type)
10461 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10462 return -1;
10463
10464 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10465 {
10466 if (TREE_CODE (field) != FIELD_DECL)
10467 continue;
10468
10469 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
10470 if (sub_count < 0)
10471 return -1;
10472 count += sub_count;
10473 }
10474
10475 /* There must be no padding. */
10476 if (wi::to_wide (TYPE_SIZE (type))
10477 != count * GET_MODE_BITSIZE (*modep))
10478 return -1;
10479
10480 return count;
10481 }
10482
10483 case UNION_TYPE:
10484 case QUAL_UNION_TYPE:
10485 {
10486 /* These aren't very interesting except in a degenerate case. */
10487 int count = 0;
10488 int sub_count;
10489 tree field;
10490
10491 /* Can't handle incomplete types nor sizes that are not
10492 fixed. */
10493 if (!COMPLETE_TYPE_P (type)
10494 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10495 return -1;
10496
10497 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10498 {
10499 if (TREE_CODE (field) != FIELD_DECL)
10500 continue;
10501
10502 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
10503 if (sub_count < 0)
10504 return -1;
10505 count = count > sub_count ? count : sub_count;
10506 }
10507
10508 /* There must be no padding. */
10509 if (wi::to_wide (TYPE_SIZE (type))
10510 != count * GET_MODE_BITSIZE (*modep))
10511 return -1;
10512
10513 return count;
10514 }
10515
10516 default:
10517 break;
10518 }
10519
10520 return -1;
10521 }
10522
10523 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
10524 float or vector aggregate that shall be passed in FP/vector registers
10525 according to the ELFv2 ABI, return the homogeneous element mode in
10526 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
10527
10528 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
10529
10530 static bool
10531 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
10532 machine_mode *elt_mode,
10533 int *n_elts)
10534 {
10535 /* Note that we do not accept complex types at the top level as
10536 homogeneous aggregates; these types are handled via the
10537 targetm.calls.split_complex_arg mechanism. Complex types
10538 can be elements of homogeneous aggregates, however. */
10539 if (TARGET_HARD_FLOAT && DEFAULT_ABI == ABI_ELFv2 && type
10540 && AGGREGATE_TYPE_P (type))
10541 {
10542 machine_mode field_mode = VOIDmode;
10543 int field_count = rs6000_aggregate_candidate (type, &field_mode);
10544
10545 if (field_count > 0)
10546 {
10547 int reg_size = ALTIVEC_OR_VSX_VECTOR_MODE (field_mode) ? 16 : 8;
10548 int field_size = ROUND_UP (GET_MODE_SIZE (field_mode), reg_size);
10549
10550 /* The ELFv2 ABI allows homogeneous aggregates to occupy
10551 up to AGGR_ARG_NUM_REG registers. */
10552 if (field_count * field_size <= AGGR_ARG_NUM_REG * reg_size)
10553 {
10554 if (elt_mode)
10555 *elt_mode = field_mode;
10556 if (n_elts)
10557 *n_elts = field_count;
10558 return true;
10559 }
10560 }
10561 }
10562
10563 if (elt_mode)
10564 *elt_mode = mode;
10565 if (n_elts)
10566 *n_elts = 1;
10567 return false;
10568 }
10569
10570 /* Return a nonzero value to say to return the function value in
10571 memory, just as large structures are always returned. TYPE will be
10572 the data type of the value, and FNTYPE will be the type of the
10573 function doing the returning, or @code{NULL} for libcalls.
10574
10575 The AIX ABI for the RS/6000 specifies that all structures are
10576 returned in memory. The Darwin ABI does the same.
10577
10578 For the Darwin 64 Bit ABI, a function result can be returned in
10579 registers or in memory, depending on the size of the return data
10580 type. If it is returned in registers, the value occupies the same
10581 registers as it would if it were the first and only function
10582 argument. Otherwise, the function places its result in memory at
10583 the location pointed to by GPR3.
10584
10585 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
10586 but a draft put them in memory, and GCC used to implement the draft
10587 instead of the final standard. Therefore, aix_struct_return
10588 controls this instead of DEFAULT_ABI; V.4 targets needing backward
10589 compatibility can change DRAFT_V4_STRUCT_RET to override the
10590 default, and -m switches get the final word. See
10591 rs6000_option_override_internal for more details.
10592
10593 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
10594 long double support is enabled. These values are returned in memory.
10595
10596 int_size_in_bytes returns -1 for variable size objects, which go in
10597 memory always. The cast to unsigned makes -1 > 8. */
10598
10599 static bool
10600 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
10601 {
10602 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
10603 if (TARGET_MACHO
10604 && rs6000_darwin64_abi
10605 && TREE_CODE (type) == RECORD_TYPE
10606 && int_size_in_bytes (type) > 0)
10607 {
10608 CUMULATIVE_ARGS valcum;
10609 rtx valret;
10610
10611 valcum.words = 0;
10612 valcum.fregno = FP_ARG_MIN_REG;
10613 valcum.vregno = ALTIVEC_ARG_MIN_REG;
10614 /* Do a trial code generation as if this were going to be passed
10615 as an argument; if any part goes in memory, we return NULL. */
10616 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
10617 if (valret)
10618 return false;
10619 /* Otherwise fall through to more conventional ABI rules. */
10620 }
10621
10622 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
10623 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
10624 NULL, NULL))
10625 return false;
10626
10627 /* The ELFv2 ABI returns aggregates up to 16B in registers */
10628 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
10629 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
10630 return false;
10631
10632 if (AGGREGATE_TYPE_P (type)
10633 && (aix_struct_return
10634 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
10635 return true;
10636
10637 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
10638 modes only exist for GCC vector types if -maltivec. */
10639 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
10640 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
10641 return false;
10642
10643 /* Return synthetic vectors in memory. */
10644 if (TREE_CODE (type) == VECTOR_TYPE
10645 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
10646 {
10647 static bool warned_for_return_big_vectors = false;
10648 if (!warned_for_return_big_vectors)
10649 {
10650 warning (OPT_Wpsabi, "GCC vector returned by reference: "
10651 "non-standard ABI extension with no compatibility "
10652 "guarantee");
10653 warned_for_return_big_vectors = true;
10654 }
10655 return true;
10656 }
10657
10658 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
10659 && FLOAT128_IEEE_P (TYPE_MODE (type)))
10660 return true;
10661
10662 return false;
10663 }
10664
10665 /* Specify whether values returned in registers should be at the most
10666 significant end of a register. We want aggregates returned by
10667 value to match the way aggregates are passed to functions. */
10668
10669 static bool
10670 rs6000_return_in_msb (const_tree valtype)
10671 {
10672 return (DEFAULT_ABI == ABI_ELFv2
10673 && BYTES_BIG_ENDIAN
10674 && AGGREGATE_TYPE_P (valtype)
10675 && (rs6000_function_arg_padding (TYPE_MODE (valtype), valtype)
10676 == PAD_UPWARD));
10677 }
10678
10679 #ifdef HAVE_AS_GNU_ATTRIBUTE
10680 /* Return TRUE if a call to function FNDECL may be one that
10681 potentially affects the function calling ABI of the object file. */
10682
10683 static bool
10684 call_ABI_of_interest (tree fndecl)
10685 {
10686 if (rs6000_gnu_attr && symtab->state == EXPANSION)
10687 {
10688 struct cgraph_node *c_node;
10689
10690 /* Libcalls are always interesting. */
10691 if (fndecl == NULL_TREE)
10692 return true;
10693
10694 /* Any call to an external function is interesting. */
10695 if (DECL_EXTERNAL (fndecl))
10696 return true;
10697
10698 /* Interesting functions that we are emitting in this object file. */
10699 c_node = cgraph_node::get (fndecl);
10700 c_node = c_node->ultimate_alias_target ();
10701 return !c_node->only_called_directly_p ();
10702 }
10703 return false;
10704 }
10705 #endif
10706
10707 /* Initialize a variable CUM of type CUMULATIVE_ARGS
10708 for a call to a function whose data type is FNTYPE.
10709 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
10710
10711 For incoming args we set the number of arguments in the prototype large
10712 so we never return a PARALLEL. */
10713
10714 void
10715 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
10716 rtx libname ATTRIBUTE_UNUSED, int incoming,
10717 int libcall, int n_named_args,
10718 tree fndecl ATTRIBUTE_UNUSED,
10719 machine_mode return_mode ATTRIBUTE_UNUSED)
10720 {
10721 static CUMULATIVE_ARGS zero_cumulative;
10722
10723 *cum = zero_cumulative;
10724 cum->words = 0;
10725 cum->fregno = FP_ARG_MIN_REG;
10726 cum->vregno = ALTIVEC_ARG_MIN_REG;
10727 cum->prototype = (fntype && prototype_p (fntype));
10728 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
10729 ? CALL_LIBCALL : CALL_NORMAL);
10730 cum->sysv_gregno = GP_ARG_MIN_REG;
10731 cum->stdarg = stdarg_p (fntype);
10732 cum->libcall = libcall;
10733
10734 cum->nargs_prototype = 0;
10735 if (incoming || cum->prototype)
10736 cum->nargs_prototype = n_named_args;
10737
10738 /* Check for a longcall attribute. */
10739 if ((!fntype && rs6000_default_long_calls)
10740 || (fntype
10741 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
10742 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
10743 cum->call_cookie |= CALL_LONG;
10744
10745 if (TARGET_DEBUG_ARG)
10746 {
10747 fprintf (stderr, "\ninit_cumulative_args:");
10748 if (fntype)
10749 {
10750 tree ret_type = TREE_TYPE (fntype);
10751 fprintf (stderr, " ret code = %s,",
10752 get_tree_code_name (TREE_CODE (ret_type)));
10753 }
10754
10755 if (cum->call_cookie & CALL_LONG)
10756 fprintf (stderr, " longcall,");
10757
10758 fprintf (stderr, " proto = %d, nargs = %d\n",
10759 cum->prototype, cum->nargs_prototype);
10760 }
10761
10762 #ifdef HAVE_AS_GNU_ATTRIBUTE
10763 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4))
10764 {
10765 cum->escapes = call_ABI_of_interest (fndecl);
10766 if (cum->escapes)
10767 {
10768 tree return_type;
10769
10770 if (fntype)
10771 {
10772 return_type = TREE_TYPE (fntype);
10773 return_mode = TYPE_MODE (return_type);
10774 }
10775 else
10776 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
10777
10778 if (return_type != NULL)
10779 {
10780 if (TREE_CODE (return_type) == RECORD_TYPE
10781 && TYPE_TRANSPARENT_AGGR (return_type))
10782 {
10783 return_type = TREE_TYPE (first_field (return_type));
10784 return_mode = TYPE_MODE (return_type);
10785 }
10786 if (AGGREGATE_TYPE_P (return_type)
10787 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
10788 <= 8))
10789 rs6000_returns_struct = true;
10790 }
10791 if (SCALAR_FLOAT_MODE_P (return_mode))
10792 {
10793 rs6000_passes_float = true;
10794 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
10795 && (FLOAT128_IBM_P (return_mode)
10796 || FLOAT128_IEEE_P (return_mode)
10797 || (return_type != NULL
10798 && (TYPE_MAIN_VARIANT (return_type)
10799 == long_double_type_node))))
10800 rs6000_passes_long_double = true;
10801
10802 /* Note if we passed or return a IEEE 128-bit type. We changed
10803 the mangling for these types, and we may need to make an alias
10804 with the old mangling. */
10805 if (FLOAT128_IEEE_P (return_mode))
10806 rs6000_passes_ieee128 = true;
10807 }
10808 if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode))
10809 rs6000_passes_vector = true;
10810 }
10811 }
10812 #endif
10813
10814 if (fntype
10815 && !TARGET_ALTIVEC
10816 && TARGET_ALTIVEC_ABI
10817 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
10818 {
10819 error ("cannot return value in vector register because"
10820 " altivec instructions are disabled, use %qs"
10821 " to enable them", "-maltivec");
10822 }
10823 }
10824 \f
10825 /* The mode the ABI uses for a word. This is not the same as word_mode
10826 for -m32 -mpowerpc64. This is used to implement various target hooks. */
10827
10828 static scalar_int_mode
10829 rs6000_abi_word_mode (void)
10830 {
10831 return TARGET_32BIT ? SImode : DImode;
10832 }
10833
10834 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
10835 static char *
10836 rs6000_offload_options (void)
10837 {
10838 if (TARGET_64BIT)
10839 return xstrdup ("-foffload-abi=lp64");
10840 else
10841 return xstrdup ("-foffload-abi=ilp32");
10842 }
10843
10844 /* On rs6000, function arguments are promoted, as are function return
10845 values. */
10846
10847 static machine_mode
10848 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
10849 machine_mode mode,
10850 int *punsignedp ATTRIBUTE_UNUSED,
10851 const_tree, int)
10852 {
10853 PROMOTE_MODE (mode, *punsignedp, type);
10854
10855 return mode;
10856 }
10857
10858 /* Return true if TYPE must be passed on the stack and not in registers. */
10859
10860 static bool
10861 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
10862 {
10863 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
10864 return must_pass_in_stack_var_size (mode, type);
10865 else
10866 return must_pass_in_stack_var_size_or_pad (mode, type);
10867 }
10868
10869 static inline bool
10870 is_complex_IBM_long_double (machine_mode mode)
10871 {
10872 return mode == ICmode || (mode == TCmode && FLOAT128_IBM_P (TCmode));
10873 }
10874
10875 /* Whether ABI_V4 passes MODE args to a function in floating point
10876 registers. */
10877
10878 static bool
10879 abi_v4_pass_in_fpr (machine_mode mode, bool named)
10880 {
10881 if (!TARGET_HARD_FLOAT)
10882 return false;
10883 if (mode == DFmode)
10884 return true;
10885 if (mode == SFmode && named)
10886 return true;
10887 /* ABI_V4 passes complex IBM long double in 8 gprs.
10888 Stupid, but we can't change the ABI now. */
10889 if (is_complex_IBM_long_double (mode))
10890 return false;
10891 if (FLOAT128_2REG_P (mode))
10892 return true;
10893 if (DECIMAL_FLOAT_MODE_P (mode))
10894 return true;
10895 return false;
10896 }
10897
10898 /* Implement TARGET_FUNCTION_ARG_PADDING.
10899
10900 For the AIX ABI structs are always stored left shifted in their
10901 argument slot. */
10902
10903 static pad_direction
10904 rs6000_function_arg_padding (machine_mode mode, const_tree type)
10905 {
10906 #ifndef AGGREGATE_PADDING_FIXED
10907 #define AGGREGATE_PADDING_FIXED 0
10908 #endif
10909 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
10910 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
10911 #endif
10912
10913 if (!AGGREGATE_PADDING_FIXED)
10914 {
10915 /* GCC used to pass structures of the same size as integer types as
10916 if they were in fact integers, ignoring TARGET_FUNCTION_ARG_PADDING.
10917 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
10918 passed padded downward, except that -mstrict-align further
10919 muddied the water in that multi-component structures of 2 and 4
10920 bytes in size were passed padded upward.
10921
10922 The following arranges for best compatibility with previous
10923 versions of gcc, but removes the -mstrict-align dependency. */
10924 if (BYTES_BIG_ENDIAN)
10925 {
10926 HOST_WIDE_INT size = 0;
10927
10928 if (mode == BLKmode)
10929 {
10930 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
10931 size = int_size_in_bytes (type);
10932 }
10933 else
10934 size = GET_MODE_SIZE (mode);
10935
10936 if (size == 1 || size == 2 || size == 4)
10937 return PAD_DOWNWARD;
10938 }
10939 return PAD_UPWARD;
10940 }
10941
10942 if (AGGREGATES_PAD_UPWARD_ALWAYS)
10943 {
10944 if (type != 0 && AGGREGATE_TYPE_P (type))
10945 return PAD_UPWARD;
10946 }
10947
10948 /* Fall back to the default. */
10949 return default_function_arg_padding (mode, type);
10950 }
10951
10952 /* If defined, a C expression that gives the alignment boundary, in bits,
10953 of an argument with the specified mode and type. If it is not defined,
10954 PARM_BOUNDARY is used for all arguments.
10955
10956 V.4 wants long longs and doubles to be double word aligned. Just
10957 testing the mode size is a boneheaded way to do this as it means
10958 that other types such as complex int are also double word aligned.
10959 However, we're stuck with this because changing the ABI might break
10960 existing library interfaces.
10961
10962 Quadword align Altivec/VSX vectors.
10963 Quadword align large synthetic vector types. */
10964
10965 static unsigned int
10966 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
10967 {
10968 machine_mode elt_mode;
10969 int n_elts;
10970
10971 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10972
10973 if (DEFAULT_ABI == ABI_V4
10974 && (GET_MODE_SIZE (mode) == 8
10975 || (TARGET_HARD_FLOAT
10976 && !is_complex_IBM_long_double (mode)
10977 && FLOAT128_2REG_P (mode))))
10978 return 64;
10979 else if (FLOAT128_VECTOR_P (mode))
10980 return 128;
10981 else if (type && TREE_CODE (type) == VECTOR_TYPE
10982 && int_size_in_bytes (type) >= 8
10983 && int_size_in_bytes (type) < 16)
10984 return 64;
10985 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
10986 || (type && TREE_CODE (type) == VECTOR_TYPE
10987 && int_size_in_bytes (type) >= 16))
10988 return 128;
10989
10990 /* Aggregate types that need > 8 byte alignment are quadword-aligned
10991 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
10992 -mcompat-align-parm is used. */
10993 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
10994 || DEFAULT_ABI == ABI_ELFv2)
10995 && type && TYPE_ALIGN (type) > 64)
10996 {
10997 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
10998 or homogeneous float/vector aggregates here. We already handled
10999 vector aggregates above, but still need to check for float here. */
11000 bool aggregate_p = (AGGREGATE_TYPE_P (type)
11001 && !SCALAR_FLOAT_MODE_P (elt_mode));
11002
11003 /* We used to check for BLKmode instead of the above aggregate type
11004 check. Warn when this results in any difference to the ABI. */
11005 if (aggregate_p != (mode == BLKmode))
11006 {
11007 static bool warned;
11008 if (!warned && warn_psabi)
11009 {
11010 warned = true;
11011 inform (input_location,
11012 "the ABI of passing aggregates with %d-byte alignment"
11013 " has changed in GCC 5",
11014 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
11015 }
11016 }
11017
11018 if (aggregate_p)
11019 return 128;
11020 }
11021
11022 /* Similar for the Darwin64 ABI. Note that for historical reasons we
11023 implement the "aggregate type" check as a BLKmode check here; this
11024 means certain aggregate types are in fact not aligned. */
11025 if (TARGET_MACHO && rs6000_darwin64_abi
11026 && mode == BLKmode
11027 && type && TYPE_ALIGN (type) > 64)
11028 return 128;
11029
11030 return PARM_BOUNDARY;
11031 }
11032
11033 /* The offset in words to the start of the parameter save area. */
11034
11035 static unsigned int
11036 rs6000_parm_offset (void)
11037 {
11038 return (DEFAULT_ABI == ABI_V4 ? 2
11039 : DEFAULT_ABI == ABI_ELFv2 ? 4
11040 : 6);
11041 }
11042
11043 /* For a function parm of MODE and TYPE, return the starting word in
11044 the parameter area. NWORDS of the parameter area are already used. */
11045
11046 static unsigned int
11047 rs6000_parm_start (machine_mode mode, const_tree type,
11048 unsigned int nwords)
11049 {
11050 unsigned int align;
11051
11052 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
11053 return nwords + (-(rs6000_parm_offset () + nwords) & align);
11054 }
11055
11056 /* Compute the size (in words) of a function argument. */
11057
11058 static unsigned long
11059 rs6000_arg_size (machine_mode mode, const_tree type)
11060 {
11061 unsigned long size;
11062
11063 if (mode != BLKmode)
11064 size = GET_MODE_SIZE (mode);
11065 else
11066 size = int_size_in_bytes (type);
11067
11068 if (TARGET_32BIT)
11069 return (size + 3) >> 2;
11070 else
11071 return (size + 7) >> 3;
11072 }
11073 \f
11074 /* Use this to flush pending int fields. */
11075
11076 static void
11077 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
11078 HOST_WIDE_INT bitpos, int final)
11079 {
11080 unsigned int startbit, endbit;
11081 int intregs, intoffset;
11082
11083 /* Handle the situations where a float is taking up the first half
11084 of the GPR, and the other half is empty (typically due to
11085 alignment restrictions). We can detect this by a 8-byte-aligned
11086 int field, or by seeing that this is the final flush for this
11087 argument. Count the word and continue on. */
11088 if (cum->floats_in_gpr == 1
11089 && (cum->intoffset % 64 == 0
11090 || (cum->intoffset == -1 && final)))
11091 {
11092 cum->words++;
11093 cum->floats_in_gpr = 0;
11094 }
11095
11096 if (cum->intoffset == -1)
11097 return;
11098
11099 intoffset = cum->intoffset;
11100 cum->intoffset = -1;
11101 cum->floats_in_gpr = 0;
11102
11103 if (intoffset % BITS_PER_WORD != 0)
11104 {
11105 unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD;
11106 if (!int_mode_for_size (bits, 0).exists ())
11107 {
11108 /* We couldn't find an appropriate mode, which happens,
11109 e.g., in packed structs when there are 3 bytes to load.
11110 Back intoffset back to the beginning of the word in this
11111 case. */
11112 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
11113 }
11114 }
11115
11116 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
11117 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
11118 intregs = (endbit - startbit) / BITS_PER_WORD;
11119 cum->words += intregs;
11120 /* words should be unsigned. */
11121 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
11122 {
11123 int pad = (endbit/BITS_PER_WORD) - cum->words;
11124 cum->words += pad;
11125 }
11126 }
11127
11128 /* The darwin64 ABI calls for us to recurse down through structs,
11129 looking for elements passed in registers. Unfortunately, we have
11130 to track int register count here also because of misalignments
11131 in powerpc alignment mode. */
11132
11133 static void
11134 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
11135 const_tree type,
11136 HOST_WIDE_INT startbitpos)
11137 {
11138 tree f;
11139
11140 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
11141 if (TREE_CODE (f) == FIELD_DECL)
11142 {
11143 HOST_WIDE_INT bitpos = startbitpos;
11144 tree ftype = TREE_TYPE (f);
11145 machine_mode mode;
11146 if (ftype == error_mark_node)
11147 continue;
11148 mode = TYPE_MODE (ftype);
11149
11150 if (DECL_SIZE (f) != 0
11151 && tree_fits_uhwi_p (bit_position (f)))
11152 bitpos += int_bit_position (f);
11153
11154 /* ??? FIXME: else assume zero offset. */
11155
11156 if (TREE_CODE (ftype) == RECORD_TYPE)
11157 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
11158 else if (USE_FP_FOR_ARG_P (cum, mode))
11159 {
11160 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
11161 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
11162 cum->fregno += n_fpregs;
11163 /* Single-precision floats present a special problem for
11164 us, because they are smaller than an 8-byte GPR, and so
11165 the structure-packing rules combined with the standard
11166 varargs behavior mean that we want to pack float/float
11167 and float/int combinations into a single register's
11168 space. This is complicated by the arg advance flushing,
11169 which works on arbitrarily large groups of int-type
11170 fields. */
11171 if (mode == SFmode)
11172 {
11173 if (cum->floats_in_gpr == 1)
11174 {
11175 /* Two floats in a word; count the word and reset
11176 the float count. */
11177 cum->words++;
11178 cum->floats_in_gpr = 0;
11179 }
11180 else if (bitpos % 64 == 0)
11181 {
11182 /* A float at the beginning of an 8-byte word;
11183 count it and put off adjusting cum->words until
11184 we see if a arg advance flush is going to do it
11185 for us. */
11186 cum->floats_in_gpr++;
11187 }
11188 else
11189 {
11190 /* The float is at the end of a word, preceded
11191 by integer fields, so the arg advance flush
11192 just above has already set cum->words and
11193 everything is taken care of. */
11194 }
11195 }
11196 else
11197 cum->words += n_fpregs;
11198 }
11199 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
11200 {
11201 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
11202 cum->vregno++;
11203 cum->words += 2;
11204 }
11205 else if (cum->intoffset == -1)
11206 cum->intoffset = bitpos;
11207 }
11208 }
11209
11210 /* Check for an item that needs to be considered specially under the darwin 64
11211 bit ABI. These are record types where the mode is BLK or the structure is
11212 8 bytes in size. */
11213 static int
11214 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
11215 {
11216 return rs6000_darwin64_abi
11217 && ((mode == BLKmode
11218 && TREE_CODE (type) == RECORD_TYPE
11219 && int_size_in_bytes (type) > 0)
11220 || (type && TREE_CODE (type) == RECORD_TYPE
11221 && int_size_in_bytes (type) == 8)) ? 1 : 0;
11222 }
11223
11224 /* Update the data in CUM to advance over an argument
11225 of mode MODE and data type TYPE.
11226 (TYPE is null for libcalls where that information may not be available.)
11227
11228 Note that for args passed by reference, function_arg will be called
11229 with MODE and TYPE set to that of the pointer to the arg, not the arg
11230 itself. */
11231
11232 static void
11233 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
11234 const_tree type, bool named, int depth)
11235 {
11236 machine_mode elt_mode;
11237 int n_elts;
11238
11239 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11240
11241 /* Only tick off an argument if we're not recursing. */
11242 if (depth == 0)
11243 cum->nargs_prototype--;
11244
11245 #ifdef HAVE_AS_GNU_ATTRIBUTE
11246 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4)
11247 && cum->escapes)
11248 {
11249 if (SCALAR_FLOAT_MODE_P (mode))
11250 {
11251 rs6000_passes_float = true;
11252 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
11253 && (FLOAT128_IBM_P (mode)
11254 || FLOAT128_IEEE_P (mode)
11255 || (type != NULL
11256 && TYPE_MAIN_VARIANT (type) == long_double_type_node)))
11257 rs6000_passes_long_double = true;
11258
11259 /* Note if we passed or return a IEEE 128-bit type. We changed the
11260 mangling for these types, and we may need to make an alias with
11261 the old mangling. */
11262 if (FLOAT128_IEEE_P (mode))
11263 rs6000_passes_ieee128 = true;
11264 }
11265 if (named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
11266 rs6000_passes_vector = true;
11267 }
11268 #endif
11269
11270 if (TARGET_ALTIVEC_ABI
11271 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
11272 || (type && TREE_CODE (type) == VECTOR_TYPE
11273 && int_size_in_bytes (type) == 16)))
11274 {
11275 bool stack = false;
11276
11277 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11278 {
11279 cum->vregno += n_elts;
11280
11281 if (!TARGET_ALTIVEC)
11282 error ("cannot pass argument in vector register because"
11283 " altivec instructions are disabled, use %qs"
11284 " to enable them", "-maltivec");
11285
11286 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
11287 even if it is going to be passed in a vector register.
11288 Darwin does the same for variable-argument functions. */
11289 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11290 && TARGET_64BIT)
11291 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
11292 stack = true;
11293 }
11294 else
11295 stack = true;
11296
11297 if (stack)
11298 {
11299 int align;
11300
11301 /* Vector parameters must be 16-byte aligned. In 32-bit
11302 mode this means we need to take into account the offset
11303 to the parameter save area. In 64-bit mode, they just
11304 have to start on an even word, since the parameter save
11305 area is 16-byte aligned. */
11306 if (TARGET_32BIT)
11307 align = -(rs6000_parm_offset () + cum->words) & 3;
11308 else
11309 align = cum->words & 1;
11310 cum->words += align + rs6000_arg_size (mode, type);
11311
11312 if (TARGET_DEBUG_ARG)
11313 {
11314 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
11315 cum->words, align);
11316 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
11317 cum->nargs_prototype, cum->prototype,
11318 GET_MODE_NAME (mode));
11319 }
11320 }
11321 }
11322 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11323 {
11324 int size = int_size_in_bytes (type);
11325 /* Variable sized types have size == -1 and are
11326 treated as if consisting entirely of ints.
11327 Pad to 16 byte boundary if needed. */
11328 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
11329 && (cum->words % 2) != 0)
11330 cum->words++;
11331 /* For varargs, we can just go up by the size of the struct. */
11332 if (!named)
11333 cum->words += (size + 7) / 8;
11334 else
11335 {
11336 /* It is tempting to say int register count just goes up by
11337 sizeof(type)/8, but this is wrong in a case such as
11338 { int; double; int; } [powerpc alignment]. We have to
11339 grovel through the fields for these too. */
11340 cum->intoffset = 0;
11341 cum->floats_in_gpr = 0;
11342 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
11343 rs6000_darwin64_record_arg_advance_flush (cum,
11344 size * BITS_PER_UNIT, 1);
11345 }
11346 if (TARGET_DEBUG_ARG)
11347 {
11348 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
11349 cum->words, TYPE_ALIGN (type), size);
11350 fprintf (stderr,
11351 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
11352 cum->nargs_prototype, cum->prototype,
11353 GET_MODE_NAME (mode));
11354 }
11355 }
11356 else if (DEFAULT_ABI == ABI_V4)
11357 {
11358 if (abi_v4_pass_in_fpr (mode, named))
11359 {
11360 /* _Decimal128 must use an even/odd register pair. This assumes
11361 that the register number is odd when fregno is odd. */
11362 if (mode == TDmode && (cum->fregno % 2) == 1)
11363 cum->fregno++;
11364
11365 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
11366 <= FP_ARG_V4_MAX_REG)
11367 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
11368 else
11369 {
11370 cum->fregno = FP_ARG_V4_MAX_REG + 1;
11371 if (mode == DFmode || FLOAT128_IBM_P (mode)
11372 || mode == DDmode || mode == TDmode)
11373 cum->words += cum->words & 1;
11374 cum->words += rs6000_arg_size (mode, type);
11375 }
11376 }
11377 else
11378 {
11379 int n_words = rs6000_arg_size (mode, type);
11380 int gregno = cum->sysv_gregno;
11381
11382 /* Long long is put in (r3,r4), (r5,r6), (r7,r8) or (r9,r10).
11383 As does any other 2 word item such as complex int due to a
11384 historical mistake. */
11385 if (n_words == 2)
11386 gregno += (1 - gregno) & 1;
11387
11388 /* Multi-reg args are not split between registers and stack. */
11389 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
11390 {
11391 /* Long long is aligned on the stack. So are other 2 word
11392 items such as complex int due to a historical mistake. */
11393 if (n_words == 2)
11394 cum->words += cum->words & 1;
11395 cum->words += n_words;
11396 }
11397
11398 /* Note: continuing to accumulate gregno past when we've started
11399 spilling to the stack indicates the fact that we've started
11400 spilling to the stack to expand_builtin_saveregs. */
11401 cum->sysv_gregno = gregno + n_words;
11402 }
11403
11404 if (TARGET_DEBUG_ARG)
11405 {
11406 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
11407 cum->words, cum->fregno);
11408 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
11409 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
11410 fprintf (stderr, "mode = %4s, named = %d\n",
11411 GET_MODE_NAME (mode), named);
11412 }
11413 }
11414 else
11415 {
11416 int n_words = rs6000_arg_size (mode, type);
11417 int start_words = cum->words;
11418 int align_words = rs6000_parm_start (mode, type, start_words);
11419
11420 cum->words = align_words + n_words;
11421
11422 if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT)
11423 {
11424 /* _Decimal128 must be passed in an even/odd float register pair.
11425 This assumes that the register number is odd when fregno is
11426 odd. */
11427 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
11428 cum->fregno++;
11429 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
11430 }
11431
11432 if (TARGET_DEBUG_ARG)
11433 {
11434 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
11435 cum->words, cum->fregno);
11436 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
11437 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
11438 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
11439 named, align_words - start_words, depth);
11440 }
11441 }
11442 }
11443
11444 static void
11445 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
11446 const_tree type, bool named)
11447 {
11448 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
11449 0);
11450 }
11451
11452 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
11453 structure between cum->intoffset and bitpos to integer registers. */
11454
11455 static void
11456 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
11457 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
11458 {
11459 machine_mode mode;
11460 unsigned int regno;
11461 unsigned int startbit, endbit;
11462 int this_regno, intregs, intoffset;
11463 rtx reg;
11464
11465 if (cum->intoffset == -1)
11466 return;
11467
11468 intoffset = cum->intoffset;
11469 cum->intoffset = -1;
11470
11471 /* If this is the trailing part of a word, try to only load that
11472 much into the register. Otherwise load the whole register. Note
11473 that in the latter case we may pick up unwanted bits. It's not a
11474 problem at the moment but may wish to revisit. */
11475
11476 if (intoffset % BITS_PER_WORD != 0)
11477 {
11478 unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD;
11479 if (!int_mode_for_size (bits, 0).exists (&mode))
11480 {
11481 /* We couldn't find an appropriate mode, which happens,
11482 e.g., in packed structs when there are 3 bytes to load.
11483 Back intoffset back to the beginning of the word in this
11484 case. */
11485 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
11486 mode = word_mode;
11487 }
11488 }
11489 else
11490 mode = word_mode;
11491
11492 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
11493 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
11494 intregs = (endbit - startbit) / BITS_PER_WORD;
11495 this_regno = cum->words + intoffset / BITS_PER_WORD;
11496
11497 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
11498 cum->use_stack = 1;
11499
11500 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
11501 if (intregs <= 0)
11502 return;
11503
11504 intoffset /= BITS_PER_UNIT;
11505 do
11506 {
11507 regno = GP_ARG_MIN_REG + this_regno;
11508 reg = gen_rtx_REG (mode, regno);
11509 rvec[(*k)++] =
11510 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
11511
11512 this_regno += 1;
11513 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
11514 mode = word_mode;
11515 intregs -= 1;
11516 }
11517 while (intregs > 0);
11518 }
11519
11520 /* Recursive workhorse for the following. */
11521
11522 static void
11523 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
11524 HOST_WIDE_INT startbitpos, rtx rvec[],
11525 int *k)
11526 {
11527 tree f;
11528
11529 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
11530 if (TREE_CODE (f) == FIELD_DECL)
11531 {
11532 HOST_WIDE_INT bitpos = startbitpos;
11533 tree ftype = TREE_TYPE (f);
11534 machine_mode mode;
11535 if (ftype == error_mark_node)
11536 continue;
11537 mode = TYPE_MODE (ftype);
11538
11539 if (DECL_SIZE (f) != 0
11540 && tree_fits_uhwi_p (bit_position (f)))
11541 bitpos += int_bit_position (f);
11542
11543 /* ??? FIXME: else assume zero offset. */
11544
11545 if (TREE_CODE (ftype) == RECORD_TYPE)
11546 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
11547 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
11548 {
11549 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
11550 #if 0
11551 switch (mode)
11552 {
11553 case E_SCmode: mode = SFmode; break;
11554 case E_DCmode: mode = DFmode; break;
11555 case E_TCmode: mode = TFmode; break;
11556 default: break;
11557 }
11558 #endif
11559 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
11560 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
11561 {
11562 gcc_assert (cum->fregno == FP_ARG_MAX_REG
11563 && (mode == TFmode || mode == TDmode));
11564 /* Long double or _Decimal128 split over regs and memory. */
11565 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
11566 cum->use_stack=1;
11567 }
11568 rvec[(*k)++]
11569 = gen_rtx_EXPR_LIST (VOIDmode,
11570 gen_rtx_REG (mode, cum->fregno++),
11571 GEN_INT (bitpos / BITS_PER_UNIT));
11572 if (FLOAT128_2REG_P (mode))
11573 cum->fregno++;
11574 }
11575 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
11576 {
11577 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
11578 rvec[(*k)++]
11579 = gen_rtx_EXPR_LIST (VOIDmode,
11580 gen_rtx_REG (mode, cum->vregno++),
11581 GEN_INT (bitpos / BITS_PER_UNIT));
11582 }
11583 else if (cum->intoffset == -1)
11584 cum->intoffset = bitpos;
11585 }
11586 }
11587
11588 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
11589 the register(s) to be used for each field and subfield of a struct
11590 being passed by value, along with the offset of where the
11591 register's value may be found in the block. FP fields go in FP
11592 register, vector fields go in vector registers, and everything
11593 else goes in int registers, packed as in memory.
11594
11595 This code is also used for function return values. RETVAL indicates
11596 whether this is the case.
11597
11598 Much of this is taken from the SPARC V9 port, which has a similar
11599 calling convention. */
11600
11601 static rtx
11602 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
11603 bool named, bool retval)
11604 {
11605 rtx rvec[FIRST_PSEUDO_REGISTER];
11606 int k = 1, kbase = 1;
11607 HOST_WIDE_INT typesize = int_size_in_bytes (type);
11608 /* This is a copy; modifications are not visible to our caller. */
11609 CUMULATIVE_ARGS copy_cum = *orig_cum;
11610 CUMULATIVE_ARGS *cum = &copy_cum;
11611
11612 /* Pad to 16 byte boundary if needed. */
11613 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
11614 && (cum->words % 2) != 0)
11615 cum->words++;
11616
11617 cum->intoffset = 0;
11618 cum->use_stack = 0;
11619 cum->named = named;
11620
11621 /* Put entries into rvec[] for individual FP and vector fields, and
11622 for the chunks of memory that go in int regs. Note we start at
11623 element 1; 0 is reserved for an indication of using memory, and
11624 may or may not be filled in below. */
11625 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
11626 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
11627
11628 /* If any part of the struct went on the stack put all of it there.
11629 This hack is because the generic code for
11630 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
11631 parts of the struct are not at the beginning. */
11632 if (cum->use_stack)
11633 {
11634 if (retval)
11635 return NULL_RTX; /* doesn't go in registers at all */
11636 kbase = 0;
11637 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11638 }
11639 if (k > 1 || cum->use_stack)
11640 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
11641 else
11642 return NULL_RTX;
11643 }
11644
11645 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
11646
11647 static rtx
11648 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
11649 int align_words)
11650 {
11651 int n_units;
11652 int i, k;
11653 rtx rvec[GP_ARG_NUM_REG + 1];
11654
11655 if (align_words >= GP_ARG_NUM_REG)
11656 return NULL_RTX;
11657
11658 n_units = rs6000_arg_size (mode, type);
11659
11660 /* Optimize the simple case where the arg fits in one gpr, except in
11661 the case of BLKmode due to assign_parms assuming that registers are
11662 BITS_PER_WORD wide. */
11663 if (n_units == 0
11664 || (n_units == 1 && mode != BLKmode))
11665 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
11666
11667 k = 0;
11668 if (align_words + n_units > GP_ARG_NUM_REG)
11669 /* Not all of the arg fits in gprs. Say that it goes in memory too,
11670 using a magic NULL_RTX component.
11671 This is not strictly correct. Only some of the arg belongs in
11672 memory, not all of it. However, the normal scheme using
11673 function_arg_partial_nregs can result in unusual subregs, eg.
11674 (subreg:SI (reg:DF) 4), which are not handled well. The code to
11675 store the whole arg to memory is often more efficient than code
11676 to store pieces, and we know that space is available in the right
11677 place for the whole arg. */
11678 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11679
11680 i = 0;
11681 do
11682 {
11683 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
11684 rtx off = GEN_INT (i++ * 4);
11685 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11686 }
11687 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
11688
11689 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
11690 }
11691
11692 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
11693 but must also be copied into the parameter save area starting at
11694 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
11695 to the GPRs and/or memory. Return the number of elements used. */
11696
11697 static int
11698 rs6000_psave_function_arg (machine_mode mode, const_tree type,
11699 int align_words, rtx *rvec)
11700 {
11701 int k = 0;
11702
11703 if (align_words < GP_ARG_NUM_REG)
11704 {
11705 int n_words = rs6000_arg_size (mode, type);
11706
11707 if (align_words + n_words > GP_ARG_NUM_REG
11708 || mode == BLKmode
11709 || (TARGET_32BIT && TARGET_POWERPC64))
11710 {
11711 /* If this is partially on the stack, then we only
11712 include the portion actually in registers here. */
11713 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
11714 int i = 0;
11715
11716 if (align_words + n_words > GP_ARG_NUM_REG)
11717 {
11718 /* Not all of the arg fits in gprs. Say that it goes in memory
11719 too, using a magic NULL_RTX component. Also see comment in
11720 rs6000_mixed_function_arg for why the normal
11721 function_arg_partial_nregs scheme doesn't work in this case. */
11722 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11723 }
11724
11725 do
11726 {
11727 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
11728 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
11729 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11730 }
11731 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
11732 }
11733 else
11734 {
11735 /* The whole arg fits in gprs. */
11736 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
11737 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
11738 }
11739 }
11740 else
11741 {
11742 /* It's entirely in memory. */
11743 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11744 }
11745
11746 return k;
11747 }
11748
11749 /* RVEC is a vector of K components of an argument of mode MODE.
11750 Construct the final function_arg return value from it. */
11751
11752 static rtx
11753 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
11754 {
11755 gcc_assert (k >= 1);
11756
11757 /* Avoid returning a PARALLEL in the trivial cases. */
11758 if (k == 1)
11759 {
11760 if (XEXP (rvec[0], 0) == NULL_RTX)
11761 return NULL_RTX;
11762
11763 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
11764 return XEXP (rvec[0], 0);
11765 }
11766
11767 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
11768 }
11769
11770 /* Determine where to put an argument to a function.
11771 Value is zero to push the argument on the stack,
11772 or a hard register in which to store the argument.
11773
11774 MODE is the argument's machine mode.
11775 TYPE is the data type of the argument (as a tree).
11776 This is null for libcalls where that information may
11777 not be available.
11778 CUM is a variable of type CUMULATIVE_ARGS which gives info about
11779 the preceding args and about the function being called. It is
11780 not modified in this routine.
11781 NAMED is nonzero if this argument is a named parameter
11782 (otherwise it is an extra parameter matching an ellipsis).
11783
11784 On RS/6000 the first eight words of non-FP are normally in registers
11785 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
11786 Under V.4, the first 8 FP args are in registers.
11787
11788 If this is floating-point and no prototype is specified, we use
11789 both an FP and integer register (or possibly FP reg and stack). Library
11790 functions (when CALL_LIBCALL is set) always have the proper types for args,
11791 so we can pass the FP value just in one register. emit_library_function
11792 doesn't support PARALLEL anyway.
11793
11794 Note that for args passed by reference, function_arg will be called
11795 with MODE and TYPE set to that of the pointer to the arg, not the arg
11796 itself. */
11797
11798 static rtx
11799 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
11800 const_tree type, bool named)
11801 {
11802 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11803 enum rs6000_abi abi = DEFAULT_ABI;
11804 machine_mode elt_mode;
11805 int n_elts;
11806
11807 /* Return a marker to indicate whether CR1 needs to set or clear the
11808 bit that V.4 uses to say fp args were passed in registers.
11809 Assume that we don't need the marker for software floating point,
11810 or compiler generated library calls. */
11811 if (mode == VOIDmode)
11812 {
11813 if (abi == ABI_V4
11814 && (cum->call_cookie & CALL_LIBCALL) == 0
11815 && (cum->stdarg
11816 || (cum->nargs_prototype < 0
11817 && (cum->prototype || TARGET_NO_PROTOTYPE)))
11818 && TARGET_HARD_FLOAT)
11819 return GEN_INT (cum->call_cookie
11820 | ((cum->fregno == FP_ARG_MIN_REG)
11821 ? CALL_V4_SET_FP_ARGS
11822 : CALL_V4_CLEAR_FP_ARGS));
11823
11824 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
11825 }
11826
11827 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11828
11829 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11830 {
11831 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
11832 if (rslt != NULL_RTX)
11833 return rslt;
11834 /* Else fall through to usual handling. */
11835 }
11836
11837 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11838 {
11839 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
11840 rtx r, off;
11841 int i, k = 0;
11842
11843 /* Do we also need to pass this argument in the parameter save area?
11844 Library support functions for IEEE 128-bit are assumed to not need the
11845 value passed both in GPRs and in vector registers. */
11846 if (TARGET_64BIT && !cum->prototype
11847 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
11848 {
11849 int align_words = ROUND_UP (cum->words, 2);
11850 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
11851 }
11852
11853 /* Describe where this argument goes in the vector registers. */
11854 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
11855 {
11856 r = gen_rtx_REG (elt_mode, cum->vregno + i);
11857 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
11858 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11859 }
11860
11861 return rs6000_finish_function_arg (mode, rvec, k);
11862 }
11863 else if (TARGET_ALTIVEC_ABI
11864 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
11865 || (type && TREE_CODE (type) == VECTOR_TYPE
11866 && int_size_in_bytes (type) == 16)))
11867 {
11868 if (named || abi == ABI_V4)
11869 return NULL_RTX;
11870 else
11871 {
11872 /* Vector parameters to varargs functions under AIX or Darwin
11873 get passed in memory and possibly also in GPRs. */
11874 int align, align_words, n_words;
11875 machine_mode part_mode;
11876
11877 /* Vector parameters must be 16-byte aligned. In 32-bit
11878 mode this means we need to take into account the offset
11879 to the parameter save area. In 64-bit mode, they just
11880 have to start on an even word, since the parameter save
11881 area is 16-byte aligned. */
11882 if (TARGET_32BIT)
11883 align = -(rs6000_parm_offset () + cum->words) & 3;
11884 else
11885 align = cum->words & 1;
11886 align_words = cum->words + align;
11887
11888 /* Out of registers? Memory, then. */
11889 if (align_words >= GP_ARG_NUM_REG)
11890 return NULL_RTX;
11891
11892 if (TARGET_32BIT && TARGET_POWERPC64)
11893 return rs6000_mixed_function_arg (mode, type, align_words);
11894
11895 /* The vector value goes in GPRs. Only the part of the
11896 value in GPRs is reported here. */
11897 part_mode = mode;
11898 n_words = rs6000_arg_size (mode, type);
11899 if (align_words + n_words > GP_ARG_NUM_REG)
11900 /* Fortunately, there are only two possibilities, the value
11901 is either wholly in GPRs or half in GPRs and half not. */
11902 part_mode = DImode;
11903
11904 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
11905 }
11906 }
11907
11908 else if (abi == ABI_V4)
11909 {
11910 if (abi_v4_pass_in_fpr (mode, named))
11911 {
11912 /* _Decimal128 must use an even/odd register pair. This assumes
11913 that the register number is odd when fregno is odd. */
11914 if (mode == TDmode && (cum->fregno % 2) == 1)
11915 cum->fregno++;
11916
11917 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
11918 <= FP_ARG_V4_MAX_REG)
11919 return gen_rtx_REG (mode, cum->fregno);
11920 else
11921 return NULL_RTX;
11922 }
11923 else
11924 {
11925 int n_words = rs6000_arg_size (mode, type);
11926 int gregno = cum->sysv_gregno;
11927
11928 /* Long long is put in (r3,r4), (r5,r6), (r7,r8) or (r9,r10).
11929 As does any other 2 word item such as complex int due to a
11930 historical mistake. */
11931 if (n_words == 2)
11932 gregno += (1 - gregno) & 1;
11933
11934 /* Multi-reg args are not split between registers and stack. */
11935 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
11936 return NULL_RTX;
11937
11938 if (TARGET_32BIT && TARGET_POWERPC64)
11939 return rs6000_mixed_function_arg (mode, type,
11940 gregno - GP_ARG_MIN_REG);
11941 return gen_rtx_REG (mode, gregno);
11942 }
11943 }
11944 else
11945 {
11946 int align_words = rs6000_parm_start (mode, type, cum->words);
11947
11948 /* _Decimal128 must be passed in an even/odd float register pair.
11949 This assumes that the register number is odd when fregno is odd. */
11950 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
11951 cum->fregno++;
11952
11953 if (USE_FP_FOR_ARG_P (cum, elt_mode))
11954 {
11955 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
11956 rtx r, off;
11957 int i, k = 0;
11958 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
11959 int fpr_words;
11960
11961 /* Do we also need to pass this argument in the parameter
11962 save area? */
11963 if (type && (cum->nargs_prototype <= 0
11964 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11965 && TARGET_XL_COMPAT
11966 && align_words >= GP_ARG_NUM_REG)))
11967 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
11968
11969 /* Describe where this argument goes in the fprs. */
11970 for (i = 0; i < n_elts
11971 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
11972 {
11973 /* Check if the argument is split over registers and memory.
11974 This can only ever happen for long double or _Decimal128;
11975 complex types are handled via split_complex_arg. */
11976 machine_mode fmode = elt_mode;
11977 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
11978 {
11979 gcc_assert (FLOAT128_2REG_P (fmode));
11980 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
11981 }
11982
11983 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
11984 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
11985 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11986 }
11987
11988 /* If there were not enough FPRs to hold the argument, the rest
11989 usually goes into memory. However, if the current position
11990 is still within the register parameter area, a portion may
11991 actually have to go into GPRs.
11992
11993 Note that it may happen that the portion of the argument
11994 passed in the first "half" of the first GPR was already
11995 passed in the last FPR as well.
11996
11997 For unnamed arguments, we already set up GPRs to cover the
11998 whole argument in rs6000_psave_function_arg, so there is
11999 nothing further to do at this point. */
12000 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
12001 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
12002 && cum->nargs_prototype > 0)
12003 {
12004 static bool warned;
12005
12006 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
12007 int n_words = rs6000_arg_size (mode, type);
12008
12009 align_words += fpr_words;
12010 n_words -= fpr_words;
12011
12012 do
12013 {
12014 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
12015 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
12016 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12017 }
12018 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
12019
12020 if (!warned && warn_psabi)
12021 {
12022 warned = true;
12023 inform (input_location,
12024 "the ABI of passing homogeneous float aggregates"
12025 " has changed in GCC 5");
12026 }
12027 }
12028
12029 return rs6000_finish_function_arg (mode, rvec, k);
12030 }
12031 else if (align_words < GP_ARG_NUM_REG)
12032 {
12033 if (TARGET_32BIT && TARGET_POWERPC64)
12034 return rs6000_mixed_function_arg (mode, type, align_words);
12035
12036 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12037 }
12038 else
12039 return NULL_RTX;
12040 }
12041 }
12042 \f
12043 /* For an arg passed partly in registers and partly in memory, this is
12044 the number of bytes passed in registers. For args passed entirely in
12045 registers or entirely in memory, zero. When an arg is described by a
12046 PARALLEL, perhaps using more than one register type, this function
12047 returns the number of bytes used by the first element of the PARALLEL. */
12048
12049 static int
12050 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
12051 tree type, bool named)
12052 {
12053 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12054 bool passed_in_gprs = true;
12055 int ret = 0;
12056 int align_words;
12057 machine_mode elt_mode;
12058 int n_elts;
12059
12060 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12061
12062 if (DEFAULT_ABI == ABI_V4)
12063 return 0;
12064
12065 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
12066 {
12067 /* If we are passing this arg in the fixed parameter save area (gprs or
12068 memory) as well as VRs, we do not use the partial bytes mechanism;
12069 instead, rs6000_function_arg will return a PARALLEL including a memory
12070 element as necessary. Library support functions for IEEE 128-bit are
12071 assumed to not need the value passed both in GPRs and in vector
12072 registers. */
12073 if (TARGET_64BIT && !cum->prototype
12074 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
12075 return 0;
12076
12077 /* Otherwise, we pass in VRs only. Check for partial copies. */
12078 passed_in_gprs = false;
12079 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
12080 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
12081 }
12082
12083 /* In this complicated case we just disable the partial_nregs code. */
12084 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12085 return 0;
12086
12087 align_words = rs6000_parm_start (mode, type, cum->words);
12088
12089 if (USE_FP_FOR_ARG_P (cum, elt_mode))
12090 {
12091 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
12092
12093 /* If we are passing this arg in the fixed parameter save area
12094 (gprs or memory) as well as FPRs, we do not use the partial
12095 bytes mechanism; instead, rs6000_function_arg will return a
12096 PARALLEL including a memory element as necessary. */
12097 if (type
12098 && (cum->nargs_prototype <= 0
12099 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12100 && TARGET_XL_COMPAT
12101 && align_words >= GP_ARG_NUM_REG)))
12102 return 0;
12103
12104 /* Otherwise, we pass in FPRs only. Check for partial copies. */
12105 passed_in_gprs = false;
12106 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
12107 {
12108 /* Compute number of bytes / words passed in FPRs. If there
12109 is still space available in the register parameter area
12110 *after* that amount, a part of the argument will be passed
12111 in GPRs. In that case, the total amount passed in any
12112 registers is equal to the amount that would have been passed
12113 in GPRs if everything were passed there, so we fall back to
12114 the GPR code below to compute the appropriate value. */
12115 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
12116 * MIN (8, GET_MODE_SIZE (elt_mode)));
12117 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
12118
12119 if (align_words + fpr_words < GP_ARG_NUM_REG)
12120 passed_in_gprs = true;
12121 else
12122 ret = fpr;
12123 }
12124 }
12125
12126 if (passed_in_gprs
12127 && align_words < GP_ARG_NUM_REG
12128 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
12129 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
12130
12131 if (ret != 0 && TARGET_DEBUG_ARG)
12132 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
12133
12134 return ret;
12135 }
12136 \f
12137 /* A C expression that indicates when an argument must be passed by
12138 reference. If nonzero for an argument, a copy of that argument is
12139 made in memory and a pointer to the argument is passed instead of
12140 the argument itself. The pointer is passed in whatever way is
12141 appropriate for passing a pointer to that type.
12142
12143 Under V.4, aggregates and long double are passed by reference.
12144
12145 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
12146 reference unless the AltiVec vector extension ABI is in force.
12147
12148 As an extension to all ABIs, variable sized types are passed by
12149 reference. */
12150
12151 static bool
12152 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
12153 machine_mode mode, const_tree type,
12154 bool named ATTRIBUTE_UNUSED)
12155 {
12156 if (!type)
12157 return 0;
12158
12159 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
12160 && FLOAT128_IEEE_P (TYPE_MODE (type)))
12161 {
12162 if (TARGET_DEBUG_ARG)
12163 fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
12164 return 1;
12165 }
12166
12167 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
12168 {
12169 if (TARGET_DEBUG_ARG)
12170 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
12171 return 1;
12172 }
12173
12174 if (int_size_in_bytes (type) < 0)
12175 {
12176 if (TARGET_DEBUG_ARG)
12177 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
12178 return 1;
12179 }
12180
12181 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
12182 modes only exist for GCC vector types if -maltivec. */
12183 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
12184 {
12185 if (TARGET_DEBUG_ARG)
12186 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
12187 return 1;
12188 }
12189
12190 /* Pass synthetic vectors in memory. */
12191 if (TREE_CODE (type) == VECTOR_TYPE
12192 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
12193 {
12194 static bool warned_for_pass_big_vectors = false;
12195 if (TARGET_DEBUG_ARG)
12196 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
12197 if (!warned_for_pass_big_vectors)
12198 {
12199 warning (OPT_Wpsabi, "GCC vector passed by reference: "
12200 "non-standard ABI extension with no compatibility "
12201 "guarantee");
12202 warned_for_pass_big_vectors = true;
12203 }
12204 return 1;
12205 }
12206
12207 return 0;
12208 }
12209
12210 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
12211 already processes. Return true if the parameter must be passed
12212 (fully or partially) on the stack. */
12213
12214 static bool
12215 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
12216 {
12217 machine_mode mode;
12218 int unsignedp;
12219 rtx entry_parm;
12220
12221 /* Catch errors. */
12222 if (type == NULL || type == error_mark_node)
12223 return true;
12224
12225 /* Handle types with no storage requirement. */
12226 if (TYPE_MODE (type) == VOIDmode)
12227 return false;
12228
12229 /* Handle complex types. */
12230 if (TREE_CODE (type) == COMPLEX_TYPE)
12231 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
12232 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
12233
12234 /* Handle transparent aggregates. */
12235 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
12236 && TYPE_TRANSPARENT_AGGR (type))
12237 type = TREE_TYPE (first_field (type));
12238
12239 /* See if this arg was passed by invisible reference. */
12240 if (pass_by_reference (get_cumulative_args (args_so_far),
12241 TYPE_MODE (type), type, true))
12242 type = build_pointer_type (type);
12243
12244 /* Find mode as it is passed by the ABI. */
12245 unsignedp = TYPE_UNSIGNED (type);
12246 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
12247
12248 /* If we must pass in stack, we need a stack. */
12249 if (rs6000_must_pass_in_stack (mode, type))
12250 return true;
12251
12252 /* If there is no incoming register, we need a stack. */
12253 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
12254 if (entry_parm == NULL)
12255 return true;
12256
12257 /* Likewise if we need to pass both in registers and on the stack. */
12258 if (GET_CODE (entry_parm) == PARALLEL
12259 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
12260 return true;
12261
12262 /* Also true if we're partially in registers and partially not. */
12263 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
12264 return true;
12265
12266 /* Update info on where next arg arrives in registers. */
12267 rs6000_function_arg_advance (args_so_far, mode, type, true);
12268 return false;
12269 }
12270
12271 /* Return true if FUN has no prototype, has a variable argument
12272 list, or passes any parameter in memory. */
12273
12274 static bool
12275 rs6000_function_parms_need_stack (tree fun, bool incoming)
12276 {
12277 tree fntype, result;
12278 CUMULATIVE_ARGS args_so_far_v;
12279 cumulative_args_t args_so_far;
12280
12281 if (!fun)
12282 /* Must be a libcall, all of which only use reg parms. */
12283 return false;
12284
12285 fntype = fun;
12286 if (!TYPE_P (fun))
12287 fntype = TREE_TYPE (fun);
12288
12289 /* Varargs functions need the parameter save area. */
12290 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
12291 return true;
12292
12293 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
12294 args_so_far = pack_cumulative_args (&args_so_far_v);
12295
12296 /* When incoming, we will have been passed the function decl.
12297 It is necessary to use the decl to handle K&R style functions,
12298 where TYPE_ARG_TYPES may not be available. */
12299 if (incoming)
12300 {
12301 gcc_assert (DECL_P (fun));
12302 result = DECL_RESULT (fun);
12303 }
12304 else
12305 result = TREE_TYPE (fntype);
12306
12307 if (result && aggregate_value_p (result, fntype))
12308 {
12309 if (!TYPE_P (result))
12310 result = TREE_TYPE (result);
12311 result = build_pointer_type (result);
12312 rs6000_parm_needs_stack (args_so_far, result);
12313 }
12314
12315 if (incoming)
12316 {
12317 tree parm;
12318
12319 for (parm = DECL_ARGUMENTS (fun);
12320 parm && parm != void_list_node;
12321 parm = TREE_CHAIN (parm))
12322 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
12323 return true;
12324 }
12325 else
12326 {
12327 function_args_iterator args_iter;
12328 tree arg_type;
12329
12330 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
12331 if (rs6000_parm_needs_stack (args_so_far, arg_type))
12332 return true;
12333 }
12334
12335 return false;
12336 }
12337
12338 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
12339 usually a constant depending on the ABI. However, in the ELFv2 ABI
12340 the register parameter area is optional when calling a function that
12341 has a prototype is scope, has no variable argument list, and passes
12342 all parameters in registers. */
12343
12344 int
12345 rs6000_reg_parm_stack_space (tree fun, bool incoming)
12346 {
12347 int reg_parm_stack_space;
12348
12349 switch (DEFAULT_ABI)
12350 {
12351 default:
12352 reg_parm_stack_space = 0;
12353 break;
12354
12355 case ABI_AIX:
12356 case ABI_DARWIN:
12357 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
12358 break;
12359
12360 case ABI_ELFv2:
12361 /* ??? Recomputing this every time is a bit expensive. Is there
12362 a place to cache this information? */
12363 if (rs6000_function_parms_need_stack (fun, incoming))
12364 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
12365 else
12366 reg_parm_stack_space = 0;
12367 break;
12368 }
12369
12370 return reg_parm_stack_space;
12371 }
12372
12373 static void
12374 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
12375 {
12376 int i;
12377 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
12378
12379 if (nregs == 0)
12380 return;
12381
12382 for (i = 0; i < nregs; i++)
12383 {
12384 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
12385 if (reload_completed)
12386 {
12387 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
12388 tem = NULL_RTX;
12389 else
12390 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
12391 i * GET_MODE_SIZE (reg_mode));
12392 }
12393 else
12394 tem = replace_equiv_address (tem, XEXP (tem, 0));
12395
12396 gcc_assert (tem);
12397
12398 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
12399 }
12400 }
12401 \f
12402 /* Perform any needed actions needed for a function that is receiving a
12403 variable number of arguments.
12404
12405 CUM is as above.
12406
12407 MODE and TYPE are the mode and type of the current parameter.
12408
12409 PRETEND_SIZE is a variable that should be set to the amount of stack
12410 that must be pushed by the prolog to pretend that our caller pushed
12411 it.
12412
12413 Normally, this macro will push all remaining incoming registers on the
12414 stack and set PRETEND_SIZE to the length of the registers pushed. */
12415
12416 static void
12417 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
12418 tree type, int *pretend_size ATTRIBUTE_UNUSED,
12419 int no_rtl)
12420 {
12421 CUMULATIVE_ARGS next_cum;
12422 int reg_size = TARGET_32BIT ? 4 : 8;
12423 rtx save_area = NULL_RTX, mem;
12424 int first_reg_offset;
12425 alias_set_type set;
12426
12427 /* Skip the last named argument. */
12428 next_cum = *get_cumulative_args (cum);
12429 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
12430
12431 if (DEFAULT_ABI == ABI_V4)
12432 {
12433 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
12434
12435 if (! no_rtl)
12436 {
12437 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
12438 HOST_WIDE_INT offset = 0;
12439
12440 /* Try to optimize the size of the varargs save area.
12441 The ABI requires that ap.reg_save_area is doubleword
12442 aligned, but we don't need to allocate space for all
12443 the bytes, only those to which we actually will save
12444 anything. */
12445 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
12446 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
12447 if (TARGET_HARD_FLOAT
12448 && next_cum.fregno <= FP_ARG_V4_MAX_REG
12449 && cfun->va_list_fpr_size)
12450 {
12451 if (gpr_reg_num)
12452 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
12453 * UNITS_PER_FP_WORD;
12454 if (cfun->va_list_fpr_size
12455 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
12456 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
12457 else
12458 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
12459 * UNITS_PER_FP_WORD;
12460 }
12461 if (gpr_reg_num)
12462 {
12463 offset = -((first_reg_offset * reg_size) & ~7);
12464 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
12465 {
12466 gpr_reg_num = cfun->va_list_gpr_size;
12467 if (reg_size == 4 && (first_reg_offset & 1))
12468 gpr_reg_num++;
12469 }
12470 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
12471 }
12472 else if (fpr_size)
12473 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
12474 * UNITS_PER_FP_WORD
12475 - (int) (GP_ARG_NUM_REG * reg_size);
12476
12477 if (gpr_size + fpr_size)
12478 {
12479 rtx reg_save_area
12480 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
12481 gcc_assert (GET_CODE (reg_save_area) == MEM);
12482 reg_save_area = XEXP (reg_save_area, 0);
12483 if (GET_CODE (reg_save_area) == PLUS)
12484 {
12485 gcc_assert (XEXP (reg_save_area, 0)
12486 == virtual_stack_vars_rtx);
12487 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
12488 offset += INTVAL (XEXP (reg_save_area, 1));
12489 }
12490 else
12491 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
12492 }
12493
12494 cfun->machine->varargs_save_offset = offset;
12495 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
12496 }
12497 }
12498 else
12499 {
12500 first_reg_offset = next_cum.words;
12501 save_area = crtl->args.internal_arg_pointer;
12502
12503 if (targetm.calls.must_pass_in_stack (mode, type))
12504 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
12505 }
12506
12507 set = get_varargs_alias_set ();
12508 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
12509 && cfun->va_list_gpr_size)
12510 {
12511 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
12512
12513 if (va_list_gpr_counter_field)
12514 /* V4 va_list_gpr_size counts number of registers needed. */
12515 n_gpr = cfun->va_list_gpr_size;
12516 else
12517 /* char * va_list instead counts number of bytes needed. */
12518 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
12519
12520 if (nregs > n_gpr)
12521 nregs = n_gpr;
12522
12523 mem = gen_rtx_MEM (BLKmode,
12524 plus_constant (Pmode, save_area,
12525 first_reg_offset * reg_size));
12526 MEM_NOTRAP_P (mem) = 1;
12527 set_mem_alias_set (mem, set);
12528 set_mem_align (mem, BITS_PER_WORD);
12529
12530 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
12531 nregs);
12532 }
12533
12534 /* Save FP registers if needed. */
12535 if (DEFAULT_ABI == ABI_V4
12536 && TARGET_HARD_FLOAT
12537 && ! no_rtl
12538 && next_cum.fregno <= FP_ARG_V4_MAX_REG
12539 && cfun->va_list_fpr_size)
12540 {
12541 int fregno = next_cum.fregno, nregs;
12542 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
12543 rtx lab = gen_label_rtx ();
12544 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
12545 * UNITS_PER_FP_WORD);
12546
12547 emit_jump_insn
12548 (gen_rtx_SET (pc_rtx,
12549 gen_rtx_IF_THEN_ELSE (VOIDmode,
12550 gen_rtx_NE (VOIDmode, cr1,
12551 const0_rtx),
12552 gen_rtx_LABEL_REF (VOIDmode, lab),
12553 pc_rtx)));
12554
12555 for (nregs = 0;
12556 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
12557 fregno++, off += UNITS_PER_FP_WORD, nregs++)
12558 {
12559 mem = gen_rtx_MEM (TARGET_HARD_FLOAT ? DFmode : SFmode,
12560 plus_constant (Pmode, save_area, off));
12561 MEM_NOTRAP_P (mem) = 1;
12562 set_mem_alias_set (mem, set);
12563 set_mem_align (mem, GET_MODE_ALIGNMENT (
12564 TARGET_HARD_FLOAT ? DFmode : SFmode));
12565 emit_move_insn (mem, gen_rtx_REG (
12566 TARGET_HARD_FLOAT ? DFmode : SFmode, fregno));
12567 }
12568
12569 emit_label (lab);
12570 }
12571 }
12572
12573 /* Create the va_list data type. */
12574
12575 static tree
12576 rs6000_build_builtin_va_list (void)
12577 {
12578 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
12579
12580 /* For AIX, prefer 'char *' because that's what the system
12581 header files like. */
12582 if (DEFAULT_ABI != ABI_V4)
12583 return build_pointer_type (char_type_node);
12584
12585 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
12586 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
12587 get_identifier ("__va_list_tag"), record);
12588
12589 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
12590 unsigned_char_type_node);
12591 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
12592 unsigned_char_type_node);
12593 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
12594 every user file. */
12595 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12596 get_identifier ("reserved"), short_unsigned_type_node);
12597 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12598 get_identifier ("overflow_arg_area"),
12599 ptr_type_node);
12600 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12601 get_identifier ("reg_save_area"),
12602 ptr_type_node);
12603
12604 va_list_gpr_counter_field = f_gpr;
12605 va_list_fpr_counter_field = f_fpr;
12606
12607 DECL_FIELD_CONTEXT (f_gpr) = record;
12608 DECL_FIELD_CONTEXT (f_fpr) = record;
12609 DECL_FIELD_CONTEXT (f_res) = record;
12610 DECL_FIELD_CONTEXT (f_ovf) = record;
12611 DECL_FIELD_CONTEXT (f_sav) = record;
12612
12613 TYPE_STUB_DECL (record) = type_decl;
12614 TYPE_NAME (record) = type_decl;
12615 TYPE_FIELDS (record) = f_gpr;
12616 DECL_CHAIN (f_gpr) = f_fpr;
12617 DECL_CHAIN (f_fpr) = f_res;
12618 DECL_CHAIN (f_res) = f_ovf;
12619 DECL_CHAIN (f_ovf) = f_sav;
12620
12621 layout_type (record);
12622
12623 /* The correct type is an array type of one element. */
12624 return build_array_type (record, build_index_type (size_zero_node));
12625 }
12626
12627 /* Implement va_start. */
12628
12629 static void
12630 rs6000_va_start (tree valist, rtx nextarg)
12631 {
12632 HOST_WIDE_INT words, n_gpr, n_fpr;
12633 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
12634 tree gpr, fpr, ovf, sav, t;
12635
12636 /* Only SVR4 needs something special. */
12637 if (DEFAULT_ABI != ABI_V4)
12638 {
12639 std_expand_builtin_va_start (valist, nextarg);
12640 return;
12641 }
12642
12643 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12644 f_fpr = DECL_CHAIN (f_gpr);
12645 f_res = DECL_CHAIN (f_fpr);
12646 f_ovf = DECL_CHAIN (f_res);
12647 f_sav = DECL_CHAIN (f_ovf);
12648
12649 valist = build_simple_mem_ref (valist);
12650 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12651 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
12652 f_fpr, NULL_TREE);
12653 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
12654 f_ovf, NULL_TREE);
12655 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
12656 f_sav, NULL_TREE);
12657
12658 /* Count number of gp and fp argument registers used. */
12659 words = crtl->args.info.words;
12660 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
12661 GP_ARG_NUM_REG);
12662 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
12663 FP_ARG_NUM_REG);
12664
12665 if (TARGET_DEBUG_ARG)
12666 fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = "
12667 HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n",
12668 words, n_gpr, n_fpr);
12669
12670 if (cfun->va_list_gpr_size)
12671 {
12672 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
12673 build_int_cst (NULL_TREE, n_gpr));
12674 TREE_SIDE_EFFECTS (t) = 1;
12675 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12676 }
12677
12678 if (cfun->va_list_fpr_size)
12679 {
12680 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
12681 build_int_cst (NULL_TREE, n_fpr));
12682 TREE_SIDE_EFFECTS (t) = 1;
12683 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12684
12685 #ifdef HAVE_AS_GNU_ATTRIBUTE
12686 if (call_ABI_of_interest (cfun->decl))
12687 rs6000_passes_float = true;
12688 #endif
12689 }
12690
12691 /* Find the overflow area. */
12692 t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
12693 if (words != 0)
12694 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
12695 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
12696 TREE_SIDE_EFFECTS (t) = 1;
12697 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12698
12699 /* If there were no va_arg invocations, don't set up the register
12700 save area. */
12701 if (!cfun->va_list_gpr_size
12702 && !cfun->va_list_fpr_size
12703 && n_gpr < GP_ARG_NUM_REG
12704 && n_fpr < FP_ARG_V4_MAX_REG)
12705 return;
12706
12707 /* Find the register save area. */
12708 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
12709 if (cfun->machine->varargs_save_offset)
12710 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
12711 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
12712 TREE_SIDE_EFFECTS (t) = 1;
12713 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12714 }
12715
12716 /* Implement va_arg. */
12717
12718 static tree
12719 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
12720 gimple_seq *post_p)
12721 {
12722 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
12723 tree gpr, fpr, ovf, sav, reg, t, u;
12724 int size, rsize, n_reg, sav_ofs, sav_scale;
12725 tree lab_false, lab_over, addr;
12726 int align;
12727 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
12728 int regalign = 0;
12729 gimple *stmt;
12730
12731 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
12732 {
12733 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
12734 return build_va_arg_indirect_ref (t);
12735 }
12736
12737 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
12738 earlier version of gcc, with the property that it always applied alignment
12739 adjustments to the va-args (even for zero-sized types). The cheapest way
12740 to deal with this is to replicate the effect of the part of
12741 std_gimplify_va_arg_expr that carries out the align adjust, for the case
12742 of relevance.
12743 We don't need to check for pass-by-reference because of the test above.
12744 We can return a simplifed answer, since we know there's no offset to add. */
12745
12746 if (((TARGET_MACHO
12747 && rs6000_darwin64_abi)
12748 || DEFAULT_ABI == ABI_ELFv2
12749 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
12750 && integer_zerop (TYPE_SIZE (type)))
12751 {
12752 unsigned HOST_WIDE_INT align, boundary;
12753 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
12754 align = PARM_BOUNDARY / BITS_PER_UNIT;
12755 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
12756 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
12757 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
12758 boundary /= BITS_PER_UNIT;
12759 if (boundary > align)
12760 {
12761 tree t ;
12762 /* This updates arg ptr by the amount that would be necessary
12763 to align the zero-sized (but not zero-alignment) item. */
12764 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
12765 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
12766 gimplify_and_add (t, pre_p);
12767
12768 t = fold_convert (sizetype, valist_tmp);
12769 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
12770 fold_convert (TREE_TYPE (valist),
12771 fold_build2 (BIT_AND_EXPR, sizetype, t,
12772 size_int (-boundary))));
12773 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
12774 gimplify_and_add (t, pre_p);
12775 }
12776 /* Since it is zero-sized there's no increment for the item itself. */
12777 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
12778 return build_va_arg_indirect_ref (valist_tmp);
12779 }
12780
12781 if (DEFAULT_ABI != ABI_V4)
12782 {
12783 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
12784 {
12785 tree elem_type = TREE_TYPE (type);
12786 machine_mode elem_mode = TYPE_MODE (elem_type);
12787 int elem_size = GET_MODE_SIZE (elem_mode);
12788
12789 if (elem_size < UNITS_PER_WORD)
12790 {
12791 tree real_part, imag_part;
12792 gimple_seq post = NULL;
12793
12794 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
12795 &post);
12796 /* Copy the value into a temporary, lest the formal temporary
12797 be reused out from under us. */
12798 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
12799 gimple_seq_add_seq (pre_p, post);
12800
12801 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
12802 post_p);
12803
12804 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
12805 }
12806 }
12807
12808 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
12809 }
12810
12811 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12812 f_fpr = DECL_CHAIN (f_gpr);
12813 f_res = DECL_CHAIN (f_fpr);
12814 f_ovf = DECL_CHAIN (f_res);
12815 f_sav = DECL_CHAIN (f_ovf);
12816
12817 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12818 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
12819 f_fpr, NULL_TREE);
12820 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
12821 f_ovf, NULL_TREE);
12822 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
12823 f_sav, NULL_TREE);
12824
12825 size = int_size_in_bytes (type);
12826 rsize = (size + 3) / 4;
12827 int pad = 4 * rsize - size;
12828 align = 1;
12829
12830 machine_mode mode = TYPE_MODE (type);
12831 if (abi_v4_pass_in_fpr (mode, false))
12832 {
12833 /* FP args go in FP registers, if present. */
12834 reg = fpr;
12835 n_reg = (size + 7) / 8;
12836 sav_ofs = (TARGET_HARD_FLOAT ? 8 : 4) * 4;
12837 sav_scale = (TARGET_HARD_FLOAT ? 8 : 4);
12838 if (mode != SFmode && mode != SDmode)
12839 align = 8;
12840 }
12841 else
12842 {
12843 /* Otherwise into GP registers. */
12844 reg = gpr;
12845 n_reg = rsize;
12846 sav_ofs = 0;
12847 sav_scale = 4;
12848 if (n_reg == 2)
12849 align = 8;
12850 }
12851
12852 /* Pull the value out of the saved registers.... */
12853
12854 lab_over = NULL;
12855 addr = create_tmp_var (ptr_type_node, "addr");
12856
12857 /* AltiVec vectors never go in registers when -mabi=altivec. */
12858 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
12859 align = 16;
12860 else
12861 {
12862 lab_false = create_artificial_label (input_location);
12863 lab_over = create_artificial_label (input_location);
12864
12865 /* Long long is aligned in the registers. As are any other 2 gpr
12866 item such as complex int due to a historical mistake. */
12867 u = reg;
12868 if (n_reg == 2 && reg == gpr)
12869 {
12870 regalign = 1;
12871 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12872 build_int_cst (TREE_TYPE (reg), n_reg - 1));
12873 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
12874 unshare_expr (reg), u);
12875 }
12876 /* _Decimal128 is passed in even/odd fpr pairs; the stored
12877 reg number is 0 for f1, so we want to make it odd. */
12878 else if (reg == fpr && mode == TDmode)
12879 {
12880 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12881 build_int_cst (TREE_TYPE (reg), 1));
12882 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
12883 }
12884
12885 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
12886 t = build2 (GE_EXPR, boolean_type_node, u, t);
12887 u = build1 (GOTO_EXPR, void_type_node, lab_false);
12888 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12889 gimplify_and_add (t, pre_p);
12890
12891 t = sav;
12892 if (sav_ofs)
12893 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12894
12895 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12896 build_int_cst (TREE_TYPE (reg), n_reg));
12897 u = fold_convert (sizetype, u);
12898 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
12899 t = fold_build_pointer_plus (t, u);
12900
12901 /* _Decimal32 varargs are located in the second word of the 64-bit
12902 FP register for 32-bit binaries. */
12903 if (TARGET_32BIT && TARGET_HARD_FLOAT && mode == SDmode)
12904 t = fold_build_pointer_plus_hwi (t, size);
12905
12906 /* Args are passed right-aligned. */
12907 if (BYTES_BIG_ENDIAN)
12908 t = fold_build_pointer_plus_hwi (t, pad);
12909
12910 gimplify_assign (addr, t, pre_p);
12911
12912 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12913
12914 stmt = gimple_build_label (lab_false);
12915 gimple_seq_add_stmt (pre_p, stmt);
12916
12917 if ((n_reg == 2 && !regalign) || n_reg > 2)
12918 {
12919 /* Ensure that we don't find any more args in regs.
12920 Alignment has taken care of for special cases. */
12921 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
12922 }
12923 }
12924
12925 /* ... otherwise out of the overflow area. */
12926
12927 /* Care for on-stack alignment if needed. */
12928 t = ovf;
12929 if (align != 1)
12930 {
12931 t = fold_build_pointer_plus_hwi (t, align - 1);
12932 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
12933 build_int_cst (TREE_TYPE (t), -align));
12934 }
12935
12936 /* Args are passed right-aligned. */
12937 if (BYTES_BIG_ENDIAN)
12938 t = fold_build_pointer_plus_hwi (t, pad);
12939
12940 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12941
12942 gimplify_assign (unshare_expr (addr), t, pre_p);
12943
12944 t = fold_build_pointer_plus_hwi (t, size);
12945 gimplify_assign (unshare_expr (ovf), t, pre_p);
12946
12947 if (lab_over)
12948 {
12949 stmt = gimple_build_label (lab_over);
12950 gimple_seq_add_stmt (pre_p, stmt);
12951 }
12952
12953 if (STRICT_ALIGNMENT
12954 && (TYPE_ALIGN (type)
12955 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
12956 {
12957 /* The value (of type complex double, for example) may not be
12958 aligned in memory in the saved registers, so copy via a
12959 temporary. (This is the same code as used for SPARC.) */
12960 tree tmp = create_tmp_var (type, "va_arg_tmp");
12961 tree dest_addr = build_fold_addr_expr (tmp);
12962
12963 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
12964 3, dest_addr, addr, size_int (rsize * 4));
12965 TREE_ADDRESSABLE (tmp) = 1;
12966
12967 gimplify_and_add (copy, pre_p);
12968 addr = dest_addr;
12969 }
12970
12971 addr = fold_convert (ptrtype, addr);
12972 return build_va_arg_indirect_ref (addr);
12973 }
12974
12975 /* Builtins. */
12976
12977 static void
12978 def_builtin (const char *name, tree type, enum rs6000_builtins code)
12979 {
12980 tree t;
12981 unsigned classify = rs6000_builtin_info[(int)code].attr;
12982 const char *attr_string = "";
12983
12984 gcc_assert (name != NULL);
12985 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
12986
12987 if (rs6000_builtin_decls[(int)code])
12988 fatal_error (input_location,
12989 "internal error: builtin function %qs already processed",
12990 name);
12991
12992 rs6000_builtin_decls[(int)code] = t =
12993 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
12994
12995 /* Set any special attributes. */
12996 if ((classify & RS6000_BTC_CONST) != 0)
12997 {
12998 /* const function, function only depends on the inputs. */
12999 TREE_READONLY (t) = 1;
13000 TREE_NOTHROW (t) = 1;
13001 attr_string = ", const";
13002 }
13003 else if ((classify & RS6000_BTC_PURE) != 0)
13004 {
13005 /* pure function, function can read global memory, but does not set any
13006 external state. */
13007 DECL_PURE_P (t) = 1;
13008 TREE_NOTHROW (t) = 1;
13009 attr_string = ", pure";
13010 }
13011 else if ((classify & RS6000_BTC_FP) != 0)
13012 {
13013 /* Function is a math function. If rounding mode is on, then treat the
13014 function as not reading global memory, but it can have arbitrary side
13015 effects. If it is off, then assume the function is a const function.
13016 This mimics the ATTR_MATHFN_FPROUNDING attribute in
13017 builtin-attribute.def that is used for the math functions. */
13018 TREE_NOTHROW (t) = 1;
13019 if (flag_rounding_math)
13020 {
13021 DECL_PURE_P (t) = 1;
13022 DECL_IS_NOVOPS (t) = 1;
13023 attr_string = ", fp, pure";
13024 }
13025 else
13026 {
13027 TREE_READONLY (t) = 1;
13028 attr_string = ", fp, const";
13029 }
13030 }
13031 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
13032 gcc_unreachable ();
13033
13034 if (TARGET_DEBUG_BUILTIN)
13035 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
13036 (int)code, name, attr_string);
13037 }
13038
13039 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
13040
13041 #undef RS6000_BUILTIN_0
13042 #undef RS6000_BUILTIN_1
13043 #undef RS6000_BUILTIN_2
13044 #undef RS6000_BUILTIN_3
13045 #undef RS6000_BUILTIN_A
13046 #undef RS6000_BUILTIN_D
13047 #undef RS6000_BUILTIN_H
13048 #undef RS6000_BUILTIN_P
13049 #undef RS6000_BUILTIN_X
13050
13051 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13052 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13053 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13054 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
13055 { MASK, ICODE, NAME, ENUM },
13056
13057 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13058 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13059 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13060 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13061 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13062
13063 static const struct builtin_description bdesc_3arg[] =
13064 {
13065 #include "rs6000-builtin.def"
13066 };
13067
13068 /* DST operations: void foo (void *, const int, const char). */
13069
13070 #undef RS6000_BUILTIN_0
13071 #undef RS6000_BUILTIN_1
13072 #undef RS6000_BUILTIN_2
13073 #undef RS6000_BUILTIN_3
13074 #undef RS6000_BUILTIN_A
13075 #undef RS6000_BUILTIN_D
13076 #undef RS6000_BUILTIN_H
13077 #undef RS6000_BUILTIN_P
13078 #undef RS6000_BUILTIN_X
13079
13080 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13081 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13082 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13083 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13084 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13085 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
13086 { MASK, ICODE, NAME, ENUM },
13087
13088 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13089 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13090 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13091
13092 static const struct builtin_description bdesc_dst[] =
13093 {
13094 #include "rs6000-builtin.def"
13095 };
13096
13097 /* Simple binary operations: VECc = foo (VECa, VECb). */
13098
13099 #undef RS6000_BUILTIN_0
13100 #undef RS6000_BUILTIN_1
13101 #undef RS6000_BUILTIN_2
13102 #undef RS6000_BUILTIN_3
13103 #undef RS6000_BUILTIN_A
13104 #undef RS6000_BUILTIN_D
13105 #undef RS6000_BUILTIN_H
13106 #undef RS6000_BUILTIN_P
13107 #undef RS6000_BUILTIN_X
13108
13109 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13110 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13111 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
13112 { MASK, ICODE, NAME, ENUM },
13113
13114 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13115 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13116 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13117 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13118 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13119 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13120
13121 static const struct builtin_description bdesc_2arg[] =
13122 {
13123 #include "rs6000-builtin.def"
13124 };
13125
13126 #undef RS6000_BUILTIN_0
13127 #undef RS6000_BUILTIN_1
13128 #undef RS6000_BUILTIN_2
13129 #undef RS6000_BUILTIN_3
13130 #undef RS6000_BUILTIN_A
13131 #undef RS6000_BUILTIN_D
13132 #undef RS6000_BUILTIN_H
13133 #undef RS6000_BUILTIN_P
13134 #undef RS6000_BUILTIN_X
13135
13136 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13137 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13138 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13139 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13140 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13141 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13142 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13143 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
13144 { MASK, ICODE, NAME, ENUM },
13145
13146 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13147
13148 /* AltiVec predicates. */
13149
13150 static const struct builtin_description bdesc_altivec_preds[] =
13151 {
13152 #include "rs6000-builtin.def"
13153 };
13154
13155 /* ABS* operations. */
13156
13157 #undef RS6000_BUILTIN_0
13158 #undef RS6000_BUILTIN_1
13159 #undef RS6000_BUILTIN_2
13160 #undef RS6000_BUILTIN_3
13161 #undef RS6000_BUILTIN_A
13162 #undef RS6000_BUILTIN_D
13163 #undef RS6000_BUILTIN_H
13164 #undef RS6000_BUILTIN_P
13165 #undef RS6000_BUILTIN_X
13166
13167 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13168 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13169 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13170 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13171 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
13172 { MASK, ICODE, NAME, ENUM },
13173
13174 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13175 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13176 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13177 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13178
13179 static const struct builtin_description bdesc_abs[] =
13180 {
13181 #include "rs6000-builtin.def"
13182 };
13183
13184 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
13185 foo (VECa). */
13186
13187 #undef RS6000_BUILTIN_0
13188 #undef RS6000_BUILTIN_1
13189 #undef RS6000_BUILTIN_2
13190 #undef RS6000_BUILTIN_3
13191 #undef RS6000_BUILTIN_A
13192 #undef RS6000_BUILTIN_D
13193 #undef RS6000_BUILTIN_H
13194 #undef RS6000_BUILTIN_P
13195 #undef RS6000_BUILTIN_X
13196
13197 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13198 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
13199 { MASK, ICODE, NAME, ENUM },
13200
13201 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13202 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13203 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13204 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13205 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13206 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13207 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13208
13209 static const struct builtin_description bdesc_1arg[] =
13210 {
13211 #include "rs6000-builtin.def"
13212 };
13213
13214 /* Simple no-argument operations: result = __builtin_darn_32 () */
13215
13216 #undef RS6000_BUILTIN_0
13217 #undef RS6000_BUILTIN_1
13218 #undef RS6000_BUILTIN_2
13219 #undef RS6000_BUILTIN_3
13220 #undef RS6000_BUILTIN_A
13221 #undef RS6000_BUILTIN_D
13222 #undef RS6000_BUILTIN_H
13223 #undef RS6000_BUILTIN_P
13224 #undef RS6000_BUILTIN_X
13225
13226 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
13227 { MASK, ICODE, NAME, ENUM },
13228
13229 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13230 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13231 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13232 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13233 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13234 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13235 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13236 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13237
13238 static const struct builtin_description bdesc_0arg[] =
13239 {
13240 #include "rs6000-builtin.def"
13241 };
13242
13243 /* HTM builtins. */
13244 #undef RS6000_BUILTIN_0
13245 #undef RS6000_BUILTIN_1
13246 #undef RS6000_BUILTIN_2
13247 #undef RS6000_BUILTIN_3
13248 #undef RS6000_BUILTIN_A
13249 #undef RS6000_BUILTIN_D
13250 #undef RS6000_BUILTIN_H
13251 #undef RS6000_BUILTIN_P
13252 #undef RS6000_BUILTIN_X
13253
13254 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13255 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13256 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13257 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13258 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13259 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13260 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
13261 { MASK, ICODE, NAME, ENUM },
13262
13263 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13264 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13265
13266 static const struct builtin_description bdesc_htm[] =
13267 {
13268 #include "rs6000-builtin.def"
13269 };
13270
13271 #undef RS6000_BUILTIN_0
13272 #undef RS6000_BUILTIN_1
13273 #undef RS6000_BUILTIN_2
13274 #undef RS6000_BUILTIN_3
13275 #undef RS6000_BUILTIN_A
13276 #undef RS6000_BUILTIN_D
13277 #undef RS6000_BUILTIN_H
13278 #undef RS6000_BUILTIN_P
13279
13280 /* Return true if a builtin function is overloaded. */
13281 bool
13282 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
13283 {
13284 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
13285 }
13286
13287 const char *
13288 rs6000_overloaded_builtin_name (enum rs6000_builtins fncode)
13289 {
13290 return rs6000_builtin_info[(int)fncode].name;
13291 }
13292
13293 /* Expand an expression EXP that calls a builtin without arguments. */
13294 static rtx
13295 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
13296 {
13297 rtx pat;
13298 machine_mode tmode = insn_data[icode].operand[0].mode;
13299
13300 if (icode == CODE_FOR_nothing)
13301 /* Builtin not supported on this processor. */
13302 return 0;
13303
13304 if (target == 0
13305 || GET_MODE (target) != tmode
13306 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13307 target = gen_reg_rtx (tmode);
13308
13309 pat = GEN_FCN (icode) (target);
13310 if (! pat)
13311 return 0;
13312 emit_insn (pat);
13313
13314 return target;
13315 }
13316
13317
13318 static rtx
13319 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
13320 {
13321 rtx pat;
13322 tree arg0 = CALL_EXPR_ARG (exp, 0);
13323 tree arg1 = CALL_EXPR_ARG (exp, 1);
13324 rtx op0 = expand_normal (arg0);
13325 rtx op1 = expand_normal (arg1);
13326 machine_mode mode0 = insn_data[icode].operand[0].mode;
13327 machine_mode mode1 = insn_data[icode].operand[1].mode;
13328
13329 if (icode == CODE_FOR_nothing)
13330 /* Builtin not supported on this processor. */
13331 return 0;
13332
13333 /* If we got invalid arguments bail out before generating bad rtl. */
13334 if (arg0 == error_mark_node || arg1 == error_mark_node)
13335 return const0_rtx;
13336
13337 if (GET_CODE (op0) != CONST_INT
13338 || INTVAL (op0) > 255
13339 || INTVAL (op0) < 0)
13340 {
13341 error ("argument 1 must be an 8-bit field value");
13342 return const0_rtx;
13343 }
13344
13345 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13346 op0 = copy_to_mode_reg (mode0, op0);
13347
13348 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13349 op1 = copy_to_mode_reg (mode1, op1);
13350
13351 pat = GEN_FCN (icode) (op0, op1);
13352 if (! pat)
13353 return const0_rtx;
13354 emit_insn (pat);
13355
13356 return NULL_RTX;
13357 }
13358
13359 static rtx
13360 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
13361 {
13362 rtx pat;
13363 tree arg0 = CALL_EXPR_ARG (exp, 0);
13364 rtx op0 = expand_normal (arg0);
13365 machine_mode tmode = insn_data[icode].operand[0].mode;
13366 machine_mode mode0 = insn_data[icode].operand[1].mode;
13367
13368 if (icode == CODE_FOR_nothing)
13369 /* Builtin not supported on this processor. */
13370 return 0;
13371
13372 /* If we got invalid arguments bail out before generating bad rtl. */
13373 if (arg0 == error_mark_node)
13374 return const0_rtx;
13375
13376 if (icode == CODE_FOR_altivec_vspltisb
13377 || icode == CODE_FOR_altivec_vspltish
13378 || icode == CODE_FOR_altivec_vspltisw)
13379 {
13380 /* Only allow 5-bit *signed* literals. */
13381 if (GET_CODE (op0) != CONST_INT
13382 || INTVAL (op0) > 15
13383 || INTVAL (op0) < -16)
13384 {
13385 error ("argument 1 must be a 5-bit signed literal");
13386 return CONST0_RTX (tmode);
13387 }
13388 }
13389
13390 if (target == 0
13391 || GET_MODE (target) != tmode
13392 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13393 target = gen_reg_rtx (tmode);
13394
13395 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13396 op0 = copy_to_mode_reg (mode0, op0);
13397
13398 pat = GEN_FCN (icode) (target, op0);
13399 if (! pat)
13400 return 0;
13401 emit_insn (pat);
13402
13403 return target;
13404 }
13405
13406 static rtx
13407 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
13408 {
13409 rtx pat, scratch1, scratch2;
13410 tree arg0 = CALL_EXPR_ARG (exp, 0);
13411 rtx op0 = expand_normal (arg0);
13412 machine_mode tmode = insn_data[icode].operand[0].mode;
13413 machine_mode mode0 = insn_data[icode].operand[1].mode;
13414
13415 /* If we have invalid arguments, bail out before generating bad rtl. */
13416 if (arg0 == error_mark_node)
13417 return const0_rtx;
13418
13419 if (target == 0
13420 || GET_MODE (target) != tmode
13421 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13422 target = gen_reg_rtx (tmode);
13423
13424 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13425 op0 = copy_to_mode_reg (mode0, op0);
13426
13427 scratch1 = gen_reg_rtx (mode0);
13428 scratch2 = gen_reg_rtx (mode0);
13429
13430 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
13431 if (! pat)
13432 return 0;
13433 emit_insn (pat);
13434
13435 return target;
13436 }
13437
13438 static rtx
13439 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
13440 {
13441 rtx pat;
13442 tree arg0 = CALL_EXPR_ARG (exp, 0);
13443 tree arg1 = CALL_EXPR_ARG (exp, 1);
13444 rtx op0 = expand_normal (arg0);
13445 rtx op1 = expand_normal (arg1);
13446 machine_mode tmode = insn_data[icode].operand[0].mode;
13447 machine_mode mode0 = insn_data[icode].operand[1].mode;
13448 machine_mode mode1 = insn_data[icode].operand[2].mode;
13449
13450 if (icode == CODE_FOR_nothing)
13451 /* Builtin not supported on this processor. */
13452 return 0;
13453
13454 /* If we got invalid arguments bail out before generating bad rtl. */
13455 if (arg0 == error_mark_node || arg1 == error_mark_node)
13456 return const0_rtx;
13457
13458 if (icode == CODE_FOR_altivec_vcfux
13459 || icode == CODE_FOR_altivec_vcfsx
13460 || icode == CODE_FOR_altivec_vctsxs
13461 || icode == CODE_FOR_altivec_vctuxs
13462 || icode == CODE_FOR_altivec_vspltb
13463 || icode == CODE_FOR_altivec_vsplth
13464 || icode == CODE_FOR_altivec_vspltw)
13465 {
13466 /* Only allow 5-bit unsigned literals. */
13467 STRIP_NOPS (arg1);
13468 if (TREE_CODE (arg1) != INTEGER_CST
13469 || TREE_INT_CST_LOW (arg1) & ~0x1f)
13470 {
13471 error ("argument 2 must be a 5-bit unsigned literal");
13472 return CONST0_RTX (tmode);
13473 }
13474 }
13475 else if (icode == CODE_FOR_dfptstsfi_eq_dd
13476 || icode == CODE_FOR_dfptstsfi_lt_dd
13477 || icode == CODE_FOR_dfptstsfi_gt_dd
13478 || icode == CODE_FOR_dfptstsfi_unordered_dd
13479 || icode == CODE_FOR_dfptstsfi_eq_td
13480 || icode == CODE_FOR_dfptstsfi_lt_td
13481 || icode == CODE_FOR_dfptstsfi_gt_td
13482 || icode == CODE_FOR_dfptstsfi_unordered_td)
13483 {
13484 /* Only allow 6-bit unsigned literals. */
13485 STRIP_NOPS (arg0);
13486 if (TREE_CODE (arg0) != INTEGER_CST
13487 || !IN_RANGE (TREE_INT_CST_LOW (arg0), 0, 63))
13488 {
13489 error ("argument 1 must be a 6-bit unsigned literal");
13490 return CONST0_RTX (tmode);
13491 }
13492 }
13493 else if (icode == CODE_FOR_xststdcqp_kf
13494 || icode == CODE_FOR_xststdcqp_tf
13495 || icode == CODE_FOR_xststdcdp
13496 || icode == CODE_FOR_xststdcsp
13497 || icode == CODE_FOR_xvtstdcdp
13498 || icode == CODE_FOR_xvtstdcsp)
13499 {
13500 /* Only allow 7-bit unsigned literals. */
13501 STRIP_NOPS (arg1);
13502 if (TREE_CODE (arg1) != INTEGER_CST
13503 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 127))
13504 {
13505 error ("argument 2 must be a 7-bit unsigned literal");
13506 return CONST0_RTX (tmode);
13507 }
13508 }
13509 else if (icode == CODE_FOR_unpackv1ti
13510 || icode == CODE_FOR_unpackkf
13511 || icode == CODE_FOR_unpacktf
13512 || icode == CODE_FOR_unpackif
13513 || icode == CODE_FOR_unpacktd)
13514 {
13515 /* Only allow 1-bit unsigned literals. */
13516 STRIP_NOPS (arg1);
13517 if (TREE_CODE (arg1) != INTEGER_CST
13518 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 1))
13519 {
13520 error ("argument 2 must be a 1-bit unsigned literal");
13521 return CONST0_RTX (tmode);
13522 }
13523 }
13524
13525 if (target == 0
13526 || GET_MODE (target) != tmode
13527 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13528 target = gen_reg_rtx (tmode);
13529
13530 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13531 op0 = copy_to_mode_reg (mode0, op0);
13532 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13533 op1 = copy_to_mode_reg (mode1, op1);
13534
13535 pat = GEN_FCN (icode) (target, op0, op1);
13536 if (! pat)
13537 return 0;
13538 emit_insn (pat);
13539
13540 return target;
13541 }
13542
13543 static rtx
13544 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
13545 {
13546 rtx pat, scratch;
13547 tree cr6_form = CALL_EXPR_ARG (exp, 0);
13548 tree arg0 = CALL_EXPR_ARG (exp, 1);
13549 tree arg1 = CALL_EXPR_ARG (exp, 2);
13550 rtx op0 = expand_normal (arg0);
13551 rtx op1 = expand_normal (arg1);
13552 machine_mode tmode = SImode;
13553 machine_mode mode0 = insn_data[icode].operand[1].mode;
13554 machine_mode mode1 = insn_data[icode].operand[2].mode;
13555 int cr6_form_int;
13556
13557 if (TREE_CODE (cr6_form) != INTEGER_CST)
13558 {
13559 error ("argument 1 of %qs must be a constant",
13560 "__builtin_altivec_predicate");
13561 return const0_rtx;
13562 }
13563 else
13564 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
13565
13566 gcc_assert (mode0 == mode1);
13567
13568 /* If we have invalid arguments, bail out before generating bad rtl. */
13569 if (arg0 == error_mark_node || arg1 == error_mark_node)
13570 return const0_rtx;
13571
13572 if (target == 0
13573 || GET_MODE (target) != tmode
13574 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13575 target = gen_reg_rtx (tmode);
13576
13577 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13578 op0 = copy_to_mode_reg (mode0, op0);
13579 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13580 op1 = copy_to_mode_reg (mode1, op1);
13581
13582 /* Note that for many of the relevant operations (e.g. cmpne or
13583 cmpeq) with float or double operands, it makes more sense for the
13584 mode of the allocated scratch register to select a vector of
13585 integer. But the choice to copy the mode of operand 0 was made
13586 long ago and there are no plans to change it. */
13587 scratch = gen_reg_rtx (mode0);
13588
13589 pat = GEN_FCN (icode) (scratch, op0, op1);
13590 if (! pat)
13591 return 0;
13592 emit_insn (pat);
13593
13594 /* The vec_any* and vec_all* predicates use the same opcodes for two
13595 different operations, but the bits in CR6 will be different
13596 depending on what information we want. So we have to play tricks
13597 with CR6 to get the right bits out.
13598
13599 If you think this is disgusting, look at the specs for the
13600 AltiVec predicates. */
13601
13602 switch (cr6_form_int)
13603 {
13604 case 0:
13605 emit_insn (gen_cr6_test_for_zero (target));
13606 break;
13607 case 1:
13608 emit_insn (gen_cr6_test_for_zero_reverse (target));
13609 break;
13610 case 2:
13611 emit_insn (gen_cr6_test_for_lt (target));
13612 break;
13613 case 3:
13614 emit_insn (gen_cr6_test_for_lt_reverse (target));
13615 break;
13616 default:
13617 error ("argument 1 of %qs is out of range",
13618 "__builtin_altivec_predicate");
13619 break;
13620 }
13621
13622 return target;
13623 }
13624
13625 rtx
13626 swap_endian_selector_for_mode (machine_mode mode)
13627 {
13628 unsigned int swap1[16] = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
13629 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
13630 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
13631 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
13632
13633 unsigned int *swaparray, i;
13634 rtx perm[16];
13635
13636 switch (mode)
13637 {
13638 case E_V1TImode:
13639 swaparray = swap1;
13640 break;
13641 case E_V2DFmode:
13642 case E_V2DImode:
13643 swaparray = swap2;
13644 break;
13645 case E_V4SFmode:
13646 case E_V4SImode:
13647 swaparray = swap4;
13648 break;
13649 case E_V8HImode:
13650 swaparray = swap8;
13651 break;
13652 default:
13653 gcc_unreachable ();
13654 }
13655
13656 for (i = 0; i < 16; ++i)
13657 perm[i] = GEN_INT (swaparray[i]);
13658
13659 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode,
13660 gen_rtvec_v (16, perm)));
13661 }
13662
13663 static rtx
13664 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
13665 {
13666 rtx pat, addr;
13667 tree arg0 = CALL_EXPR_ARG (exp, 0);
13668 tree arg1 = CALL_EXPR_ARG (exp, 1);
13669 machine_mode tmode = insn_data[icode].operand[0].mode;
13670 machine_mode mode0 = Pmode;
13671 machine_mode mode1 = Pmode;
13672 rtx op0 = expand_normal (arg0);
13673 rtx op1 = expand_normal (arg1);
13674
13675 if (icode == CODE_FOR_nothing)
13676 /* Builtin not supported on this processor. */
13677 return 0;
13678
13679 /* If we got invalid arguments bail out before generating bad rtl. */
13680 if (arg0 == error_mark_node || arg1 == error_mark_node)
13681 return const0_rtx;
13682
13683 if (target == 0
13684 || GET_MODE (target) != tmode
13685 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13686 target = gen_reg_rtx (tmode);
13687
13688 op1 = copy_to_mode_reg (mode1, op1);
13689
13690 /* For LVX, express the RTL accurately by ANDing the address with -16.
13691 LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
13692 so the raw address is fine. */
13693 if (icode == CODE_FOR_altivec_lvx_v1ti
13694 || icode == CODE_FOR_altivec_lvx_v2df
13695 || icode == CODE_FOR_altivec_lvx_v2di
13696 || icode == CODE_FOR_altivec_lvx_v4sf
13697 || icode == CODE_FOR_altivec_lvx_v4si
13698 || icode == CODE_FOR_altivec_lvx_v8hi
13699 || icode == CODE_FOR_altivec_lvx_v16qi)
13700 {
13701 rtx rawaddr;
13702 if (op0 == const0_rtx)
13703 rawaddr = op1;
13704 else
13705 {
13706 op0 = copy_to_mode_reg (mode0, op0);
13707 rawaddr = gen_rtx_PLUS (Pmode, op1, op0);
13708 }
13709 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
13710 addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr);
13711
13712 emit_insn (gen_rtx_SET (target, addr));
13713 }
13714 else
13715 {
13716 if (op0 == const0_rtx)
13717 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
13718 else
13719 {
13720 op0 = copy_to_mode_reg (mode0, op0);
13721 addr = gen_rtx_MEM (blk ? BLKmode : tmode,
13722 gen_rtx_PLUS (Pmode, op1, op0));
13723 }
13724
13725 pat = GEN_FCN (icode) (target, addr);
13726 if (! pat)
13727 return 0;
13728 emit_insn (pat);
13729 }
13730
13731 return target;
13732 }
13733
13734 static rtx
13735 altivec_expand_stxvl_builtin (enum insn_code icode, tree exp)
13736 {
13737 rtx pat;
13738 tree arg0 = CALL_EXPR_ARG (exp, 0);
13739 tree arg1 = CALL_EXPR_ARG (exp, 1);
13740 tree arg2 = CALL_EXPR_ARG (exp, 2);
13741 rtx op0 = expand_normal (arg0);
13742 rtx op1 = expand_normal (arg1);
13743 rtx op2 = expand_normal (arg2);
13744 machine_mode mode0 = insn_data[icode].operand[0].mode;
13745 machine_mode mode1 = insn_data[icode].operand[1].mode;
13746 machine_mode mode2 = insn_data[icode].operand[2].mode;
13747
13748 if (icode == CODE_FOR_nothing)
13749 /* Builtin not supported on this processor. */
13750 return NULL_RTX;
13751
13752 /* If we got invalid arguments bail out before generating bad rtl. */
13753 if (arg0 == error_mark_node
13754 || arg1 == error_mark_node
13755 || arg2 == error_mark_node)
13756 return NULL_RTX;
13757
13758 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13759 op0 = copy_to_mode_reg (mode0, op0);
13760 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13761 op1 = copy_to_mode_reg (mode1, op1);
13762 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13763 op2 = copy_to_mode_reg (mode2, op2);
13764
13765 pat = GEN_FCN (icode) (op0, op1, op2);
13766 if (pat)
13767 emit_insn (pat);
13768
13769 return NULL_RTX;
13770 }
13771
13772 static rtx
13773 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
13774 {
13775 tree arg0 = CALL_EXPR_ARG (exp, 0);
13776 tree arg1 = CALL_EXPR_ARG (exp, 1);
13777 tree arg2 = CALL_EXPR_ARG (exp, 2);
13778 rtx op0 = expand_normal (arg0);
13779 rtx op1 = expand_normal (arg1);
13780 rtx op2 = expand_normal (arg2);
13781 rtx pat, addr, rawaddr;
13782 machine_mode tmode = insn_data[icode].operand[0].mode;
13783 machine_mode smode = insn_data[icode].operand[1].mode;
13784 machine_mode mode1 = Pmode;
13785 machine_mode mode2 = Pmode;
13786
13787 /* Invalid arguments. Bail before doing anything stoopid! */
13788 if (arg0 == error_mark_node
13789 || arg1 == error_mark_node
13790 || arg2 == error_mark_node)
13791 return const0_rtx;
13792
13793 op2 = copy_to_mode_reg (mode2, op2);
13794
13795 /* For STVX, express the RTL accurately by ANDing the address with -16.
13796 STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
13797 so the raw address is fine. */
13798 if (icode == CODE_FOR_altivec_stvx_v2df
13799 || icode == CODE_FOR_altivec_stvx_v2di
13800 || icode == CODE_FOR_altivec_stvx_v4sf
13801 || icode == CODE_FOR_altivec_stvx_v4si
13802 || icode == CODE_FOR_altivec_stvx_v8hi
13803 || icode == CODE_FOR_altivec_stvx_v16qi)
13804 {
13805 if (op1 == const0_rtx)
13806 rawaddr = op2;
13807 else
13808 {
13809 op1 = copy_to_mode_reg (mode1, op1);
13810 rawaddr = gen_rtx_PLUS (Pmode, op2, op1);
13811 }
13812
13813 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
13814 addr = gen_rtx_MEM (tmode, addr);
13815
13816 op0 = copy_to_mode_reg (tmode, op0);
13817
13818 emit_insn (gen_rtx_SET (addr, op0));
13819 }
13820 else
13821 {
13822 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
13823 op0 = copy_to_mode_reg (smode, op0);
13824
13825 if (op1 == const0_rtx)
13826 addr = gen_rtx_MEM (tmode, op2);
13827 else
13828 {
13829 op1 = copy_to_mode_reg (mode1, op1);
13830 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1));
13831 }
13832
13833 pat = GEN_FCN (icode) (addr, op0);
13834 if (pat)
13835 emit_insn (pat);
13836 }
13837
13838 return NULL_RTX;
13839 }
13840
13841 /* Return the appropriate SPR number associated with the given builtin. */
13842 static inline HOST_WIDE_INT
13843 htm_spr_num (enum rs6000_builtins code)
13844 {
13845 if (code == HTM_BUILTIN_GET_TFHAR
13846 || code == HTM_BUILTIN_SET_TFHAR)
13847 return TFHAR_SPR;
13848 else if (code == HTM_BUILTIN_GET_TFIAR
13849 || code == HTM_BUILTIN_SET_TFIAR)
13850 return TFIAR_SPR;
13851 else if (code == HTM_BUILTIN_GET_TEXASR
13852 || code == HTM_BUILTIN_SET_TEXASR)
13853 return TEXASR_SPR;
13854 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
13855 || code == HTM_BUILTIN_SET_TEXASRU);
13856 return TEXASRU_SPR;
13857 }
13858
13859 /* Return the appropriate SPR regno associated with the given builtin. */
13860 static inline HOST_WIDE_INT
13861 htm_spr_regno (enum rs6000_builtins code)
13862 {
13863 if (code == HTM_BUILTIN_GET_TFHAR
13864 || code == HTM_BUILTIN_SET_TFHAR)
13865 return TFHAR_REGNO;
13866 else if (code == HTM_BUILTIN_GET_TFIAR
13867 || code == HTM_BUILTIN_SET_TFIAR)
13868 return TFIAR_REGNO;
13869 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
13870 || code == HTM_BUILTIN_SET_TEXASR
13871 || code == HTM_BUILTIN_GET_TEXASRU
13872 || code == HTM_BUILTIN_SET_TEXASRU);
13873 return TEXASR_REGNO;
13874 }
13875
13876 /* Return the correct ICODE value depending on whether we are
13877 setting or reading the HTM SPRs. */
13878 static inline enum insn_code
13879 rs6000_htm_spr_icode (bool nonvoid)
13880 {
13881 if (nonvoid)
13882 return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
13883 else
13884 return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
13885 }
13886
13887 /* Expand the HTM builtin in EXP and store the result in TARGET.
13888 Store true in *EXPANDEDP if we found a builtin to expand. */
13889 static rtx
13890 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
13891 {
13892 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13893 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
13894 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13895 const struct builtin_description *d;
13896 size_t i;
13897
13898 *expandedp = true;
13899
13900 if (!TARGET_POWERPC64
13901 && (fcode == HTM_BUILTIN_TABORTDC
13902 || fcode == HTM_BUILTIN_TABORTDCI))
13903 {
13904 size_t uns_fcode = (size_t)fcode;
13905 const char *name = rs6000_builtin_info[uns_fcode].name;
13906 error ("builtin %qs is only valid in 64-bit mode", name);
13907 return const0_rtx;
13908 }
13909
13910 /* Expand the HTM builtins. */
13911 d = bdesc_htm;
13912 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
13913 if (d->code == fcode)
13914 {
13915 rtx op[MAX_HTM_OPERANDS], pat;
13916 int nopnds = 0;
13917 tree arg;
13918 call_expr_arg_iterator iter;
13919 unsigned attr = rs6000_builtin_info[fcode].attr;
13920 enum insn_code icode = d->icode;
13921 const struct insn_operand_data *insn_op;
13922 bool uses_spr = (attr & RS6000_BTC_SPR);
13923 rtx cr = NULL_RTX;
13924
13925 if (uses_spr)
13926 icode = rs6000_htm_spr_icode (nonvoid);
13927 insn_op = &insn_data[icode].operand[0];
13928
13929 if (nonvoid)
13930 {
13931 machine_mode tmode = (uses_spr) ? insn_op->mode : E_SImode;
13932 if (!target
13933 || GET_MODE (target) != tmode
13934 || (uses_spr && !(*insn_op->predicate) (target, tmode)))
13935 target = gen_reg_rtx (tmode);
13936 if (uses_spr)
13937 op[nopnds++] = target;
13938 }
13939
13940 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
13941 {
13942 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
13943 return const0_rtx;
13944
13945 insn_op = &insn_data[icode].operand[nopnds];
13946
13947 op[nopnds] = expand_normal (arg);
13948
13949 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
13950 {
13951 if (!strcmp (insn_op->constraint, "n"))
13952 {
13953 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
13954 if (!CONST_INT_P (op[nopnds]))
13955 error ("argument %d must be an unsigned literal", arg_num);
13956 else
13957 error ("argument %d is an unsigned literal that is "
13958 "out of range", arg_num);
13959 return const0_rtx;
13960 }
13961 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
13962 }
13963
13964 nopnds++;
13965 }
13966
13967 /* Handle the builtins for extended mnemonics. These accept
13968 no arguments, but map to builtins that take arguments. */
13969 switch (fcode)
13970 {
13971 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
13972 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
13973 op[nopnds++] = GEN_INT (1);
13974 if (flag_checking)
13975 attr |= RS6000_BTC_UNARY;
13976 break;
13977 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
13978 op[nopnds++] = GEN_INT (0);
13979 if (flag_checking)
13980 attr |= RS6000_BTC_UNARY;
13981 break;
13982 default:
13983 break;
13984 }
13985
13986 /* If this builtin accesses SPRs, then pass in the appropriate
13987 SPR number and SPR regno as the last two operands. */
13988 if (uses_spr)
13989 {
13990 machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode;
13991 op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode));
13992 op[nopnds++] = gen_rtx_REG (mode, htm_spr_regno (fcode));
13993 }
13994 /* If this builtin accesses a CR, then pass in a scratch
13995 CR as the last operand. */
13996 else if (attr & RS6000_BTC_CR)
13997 { cr = gen_reg_rtx (CCmode);
13998 op[nopnds++] = cr;
13999 }
14000
14001 if (flag_checking)
14002 {
14003 int expected_nopnds = 0;
14004 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
14005 expected_nopnds = 1;
14006 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
14007 expected_nopnds = 2;
14008 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
14009 expected_nopnds = 3;
14010 if (!(attr & RS6000_BTC_VOID))
14011 expected_nopnds += 1;
14012 if (uses_spr)
14013 expected_nopnds += 2;
14014
14015 gcc_assert (nopnds == expected_nopnds
14016 && nopnds <= MAX_HTM_OPERANDS);
14017 }
14018
14019 switch (nopnds)
14020 {
14021 case 1:
14022 pat = GEN_FCN (icode) (op[0]);
14023 break;
14024 case 2:
14025 pat = GEN_FCN (icode) (op[0], op[1]);
14026 break;
14027 case 3:
14028 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
14029 break;
14030 case 4:
14031 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
14032 break;
14033 default:
14034 gcc_unreachable ();
14035 }
14036 if (!pat)
14037 return NULL_RTX;
14038 emit_insn (pat);
14039
14040 if (attr & RS6000_BTC_CR)
14041 {
14042 if (fcode == HTM_BUILTIN_TBEGIN)
14043 {
14044 /* Emit code to set TARGET to true or false depending on
14045 whether the tbegin. instruction successfully or failed
14046 to start a transaction. We do this by placing the 1's
14047 complement of CR's EQ bit into TARGET. */
14048 rtx scratch = gen_reg_rtx (SImode);
14049 emit_insn (gen_rtx_SET (scratch,
14050 gen_rtx_EQ (SImode, cr,
14051 const0_rtx)));
14052 emit_insn (gen_rtx_SET (target,
14053 gen_rtx_XOR (SImode, scratch,
14054 GEN_INT (1))));
14055 }
14056 else
14057 {
14058 /* Emit code to copy the 4-bit condition register field
14059 CR into the least significant end of register TARGET. */
14060 rtx scratch1 = gen_reg_rtx (SImode);
14061 rtx scratch2 = gen_reg_rtx (SImode);
14062 rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0);
14063 emit_insn (gen_movcc (subreg, cr));
14064 emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28)));
14065 emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf)));
14066 }
14067 }
14068
14069 if (nonvoid)
14070 return target;
14071 return const0_rtx;
14072 }
14073
14074 *expandedp = false;
14075 return NULL_RTX;
14076 }
14077
14078 /* Expand the CPU builtin in FCODE and store the result in TARGET. */
14079
14080 static rtx
14081 cpu_expand_builtin (enum rs6000_builtins fcode, tree exp ATTRIBUTE_UNUSED,
14082 rtx target)
14083 {
14084 /* __builtin_cpu_init () is a nop, so expand to nothing. */
14085 if (fcode == RS6000_BUILTIN_CPU_INIT)
14086 return const0_rtx;
14087
14088 if (target == 0 || GET_MODE (target) != SImode)
14089 target = gen_reg_rtx (SImode);
14090
14091 #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
14092 tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0);
14093 /* Target clones creates an ARRAY_REF instead of STRING_CST, convert it back
14094 to a STRING_CST. */
14095 if (TREE_CODE (arg) == ARRAY_REF
14096 && TREE_CODE (TREE_OPERAND (arg, 0)) == STRING_CST
14097 && TREE_CODE (TREE_OPERAND (arg, 1)) == INTEGER_CST
14098 && compare_tree_int (TREE_OPERAND (arg, 1), 0) == 0)
14099 arg = TREE_OPERAND (arg, 0);
14100
14101 if (TREE_CODE (arg) != STRING_CST)
14102 {
14103 error ("builtin %qs only accepts a string argument",
14104 rs6000_builtin_info[(size_t) fcode].name);
14105 return const0_rtx;
14106 }
14107
14108 if (fcode == RS6000_BUILTIN_CPU_IS)
14109 {
14110 const char *cpu = TREE_STRING_POINTER (arg);
14111 rtx cpuid = NULL_RTX;
14112 for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++)
14113 if (strcmp (cpu, cpu_is_info[i].cpu) == 0)
14114 {
14115 /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */
14116 cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM);
14117 break;
14118 }
14119 if (cpuid == NULL_RTX)
14120 {
14121 /* Invalid CPU argument. */
14122 error ("cpu %qs is an invalid argument to builtin %qs",
14123 cpu, rs6000_builtin_info[(size_t) fcode].name);
14124 return const0_rtx;
14125 }
14126
14127 rtx platform = gen_reg_rtx (SImode);
14128 rtx tcbmem = gen_const_mem (SImode,
14129 gen_rtx_PLUS (Pmode,
14130 gen_rtx_REG (Pmode, TLS_REGNUM),
14131 GEN_INT (TCB_PLATFORM_OFFSET)));
14132 emit_move_insn (platform, tcbmem);
14133 emit_insn (gen_eqsi3 (target, platform, cpuid));
14134 }
14135 else if (fcode == RS6000_BUILTIN_CPU_SUPPORTS)
14136 {
14137 const char *hwcap = TREE_STRING_POINTER (arg);
14138 rtx mask = NULL_RTX;
14139 int hwcap_offset;
14140 for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++)
14141 if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0)
14142 {
14143 mask = GEN_INT (cpu_supports_info[i].mask);
14144 hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id);
14145 break;
14146 }
14147 if (mask == NULL_RTX)
14148 {
14149 /* Invalid HWCAP argument. */
14150 error ("%s %qs is an invalid argument to builtin %qs",
14151 "hwcap", hwcap, rs6000_builtin_info[(size_t) fcode].name);
14152 return const0_rtx;
14153 }
14154
14155 rtx tcb_hwcap = gen_reg_rtx (SImode);
14156 rtx tcbmem = gen_const_mem (SImode,
14157 gen_rtx_PLUS (Pmode,
14158 gen_rtx_REG (Pmode, TLS_REGNUM),
14159 GEN_INT (hwcap_offset)));
14160 emit_move_insn (tcb_hwcap, tcbmem);
14161 rtx scratch1 = gen_reg_rtx (SImode);
14162 emit_insn (gen_rtx_SET (scratch1, gen_rtx_AND (SImode, tcb_hwcap, mask)));
14163 rtx scratch2 = gen_reg_rtx (SImode);
14164 emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx));
14165 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (SImode, scratch2, const1_rtx)));
14166 }
14167 else
14168 gcc_unreachable ();
14169
14170 /* Record that we have expanded a CPU builtin, so that we can later
14171 emit a reference to the special symbol exported by LIBC to ensure we
14172 do not link against an old LIBC that doesn't support this feature. */
14173 cpu_builtin_p = true;
14174
14175 #else
14176 warning (0, "builtin %qs needs GLIBC (2.23 and newer) that exports hardware "
14177 "capability bits", rs6000_builtin_info[(size_t) fcode].name);
14178
14179 /* For old LIBCs, always return FALSE. */
14180 emit_move_insn (target, GEN_INT (0));
14181 #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */
14182
14183 return target;
14184 }
14185
14186 static rtx
14187 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
14188 {
14189 rtx pat;
14190 tree arg0 = CALL_EXPR_ARG (exp, 0);
14191 tree arg1 = CALL_EXPR_ARG (exp, 1);
14192 tree arg2 = CALL_EXPR_ARG (exp, 2);
14193 rtx op0 = expand_normal (arg0);
14194 rtx op1 = expand_normal (arg1);
14195 rtx op2 = expand_normal (arg2);
14196 machine_mode tmode = insn_data[icode].operand[0].mode;
14197 machine_mode mode0 = insn_data[icode].operand[1].mode;
14198 machine_mode mode1 = insn_data[icode].operand[2].mode;
14199 machine_mode mode2 = insn_data[icode].operand[3].mode;
14200
14201 if (icode == CODE_FOR_nothing)
14202 /* Builtin not supported on this processor. */
14203 return 0;
14204
14205 /* If we got invalid arguments bail out before generating bad rtl. */
14206 if (arg0 == error_mark_node
14207 || arg1 == error_mark_node
14208 || arg2 == error_mark_node)
14209 return const0_rtx;
14210
14211 /* Check and prepare argument depending on the instruction code.
14212
14213 Note that a switch statement instead of the sequence of tests
14214 would be incorrect as many of the CODE_FOR values could be
14215 CODE_FOR_nothing and that would yield multiple alternatives
14216 with identical values. We'd never reach here at runtime in
14217 this case. */
14218 if (icode == CODE_FOR_altivec_vsldoi_v4sf
14219 || icode == CODE_FOR_altivec_vsldoi_v2df
14220 || icode == CODE_FOR_altivec_vsldoi_v4si
14221 || icode == CODE_FOR_altivec_vsldoi_v8hi
14222 || icode == CODE_FOR_altivec_vsldoi_v16qi)
14223 {
14224 /* Only allow 4-bit unsigned literals. */
14225 STRIP_NOPS (arg2);
14226 if (TREE_CODE (arg2) != INTEGER_CST
14227 || TREE_INT_CST_LOW (arg2) & ~0xf)
14228 {
14229 error ("argument 3 must be a 4-bit unsigned literal");
14230 return CONST0_RTX (tmode);
14231 }
14232 }
14233 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
14234 || icode == CODE_FOR_vsx_xxpermdi_v2di
14235 || icode == CODE_FOR_vsx_xxpermdi_v2df_be
14236 || icode == CODE_FOR_vsx_xxpermdi_v2di_be
14237 || icode == CODE_FOR_vsx_xxpermdi_v1ti
14238 || icode == CODE_FOR_vsx_xxpermdi_v4sf
14239 || icode == CODE_FOR_vsx_xxpermdi_v4si
14240 || icode == CODE_FOR_vsx_xxpermdi_v8hi
14241 || icode == CODE_FOR_vsx_xxpermdi_v16qi
14242 || icode == CODE_FOR_vsx_xxsldwi_v16qi
14243 || icode == CODE_FOR_vsx_xxsldwi_v8hi
14244 || icode == CODE_FOR_vsx_xxsldwi_v4si
14245 || icode == CODE_FOR_vsx_xxsldwi_v4sf
14246 || icode == CODE_FOR_vsx_xxsldwi_v2di
14247 || icode == CODE_FOR_vsx_xxsldwi_v2df)
14248 {
14249 /* Only allow 2-bit unsigned literals. */
14250 STRIP_NOPS (arg2);
14251 if (TREE_CODE (arg2) != INTEGER_CST
14252 || TREE_INT_CST_LOW (arg2) & ~0x3)
14253 {
14254 error ("argument 3 must be a 2-bit unsigned literal");
14255 return CONST0_RTX (tmode);
14256 }
14257 }
14258 else if (icode == CODE_FOR_vsx_set_v2df
14259 || icode == CODE_FOR_vsx_set_v2di
14260 || icode == CODE_FOR_bcdadd
14261 || icode == CODE_FOR_bcdadd_lt
14262 || icode == CODE_FOR_bcdadd_eq
14263 || icode == CODE_FOR_bcdadd_gt
14264 || icode == CODE_FOR_bcdsub
14265 || icode == CODE_FOR_bcdsub_lt
14266 || icode == CODE_FOR_bcdsub_eq
14267 || icode == CODE_FOR_bcdsub_gt)
14268 {
14269 /* Only allow 1-bit unsigned literals. */
14270 STRIP_NOPS (arg2);
14271 if (TREE_CODE (arg2) != INTEGER_CST
14272 || TREE_INT_CST_LOW (arg2) & ~0x1)
14273 {
14274 error ("argument 3 must be a 1-bit unsigned literal");
14275 return CONST0_RTX (tmode);
14276 }
14277 }
14278 else if (icode == CODE_FOR_dfp_ddedpd_dd
14279 || icode == CODE_FOR_dfp_ddedpd_td)
14280 {
14281 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
14282 STRIP_NOPS (arg0);
14283 if (TREE_CODE (arg0) != INTEGER_CST
14284 || TREE_INT_CST_LOW (arg2) & ~0x3)
14285 {
14286 error ("argument 1 must be 0 or 2");
14287 return CONST0_RTX (tmode);
14288 }
14289 }
14290 else if (icode == CODE_FOR_dfp_denbcd_dd
14291 || icode == CODE_FOR_dfp_denbcd_td)
14292 {
14293 /* Only allow 1-bit unsigned literals. */
14294 STRIP_NOPS (arg0);
14295 if (TREE_CODE (arg0) != INTEGER_CST
14296 || TREE_INT_CST_LOW (arg0) & ~0x1)
14297 {
14298 error ("argument 1 must be a 1-bit unsigned literal");
14299 return CONST0_RTX (tmode);
14300 }
14301 }
14302 else if (icode == CODE_FOR_dfp_dscli_dd
14303 || icode == CODE_FOR_dfp_dscli_td
14304 || icode == CODE_FOR_dfp_dscri_dd
14305 || icode == CODE_FOR_dfp_dscri_td)
14306 {
14307 /* Only allow 6-bit unsigned literals. */
14308 STRIP_NOPS (arg1);
14309 if (TREE_CODE (arg1) != INTEGER_CST
14310 || TREE_INT_CST_LOW (arg1) & ~0x3f)
14311 {
14312 error ("argument 2 must be a 6-bit unsigned literal");
14313 return CONST0_RTX (tmode);
14314 }
14315 }
14316 else if (icode == CODE_FOR_crypto_vshasigmaw
14317 || icode == CODE_FOR_crypto_vshasigmad)
14318 {
14319 /* Check whether the 2nd and 3rd arguments are integer constants and in
14320 range and prepare arguments. */
14321 STRIP_NOPS (arg1);
14322 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (wi::to_wide (arg1), 2))
14323 {
14324 error ("argument 2 must be 0 or 1");
14325 return CONST0_RTX (tmode);
14326 }
14327
14328 STRIP_NOPS (arg2);
14329 if (TREE_CODE (arg2) != INTEGER_CST
14330 || wi::geu_p (wi::to_wide (arg2), 16))
14331 {
14332 error ("argument 3 must be in the range 0..15");
14333 return CONST0_RTX (tmode);
14334 }
14335 }
14336
14337 if (target == 0
14338 || GET_MODE (target) != tmode
14339 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14340 target = gen_reg_rtx (tmode);
14341
14342 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14343 op0 = copy_to_mode_reg (mode0, op0);
14344 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14345 op1 = copy_to_mode_reg (mode1, op1);
14346 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14347 op2 = copy_to_mode_reg (mode2, op2);
14348
14349 pat = GEN_FCN (icode) (target, op0, op1, op2);
14350 if (! pat)
14351 return 0;
14352 emit_insn (pat);
14353
14354 return target;
14355 }
14356
14357
14358 /* Expand the dst builtins. */
14359 static rtx
14360 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
14361 bool *expandedp)
14362 {
14363 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14364 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14365 tree arg0, arg1, arg2;
14366 machine_mode mode0, mode1;
14367 rtx pat, op0, op1, op2;
14368 const struct builtin_description *d;
14369 size_t i;
14370
14371 *expandedp = false;
14372
14373 /* Handle DST variants. */
14374 d = bdesc_dst;
14375 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
14376 if (d->code == fcode)
14377 {
14378 arg0 = CALL_EXPR_ARG (exp, 0);
14379 arg1 = CALL_EXPR_ARG (exp, 1);
14380 arg2 = CALL_EXPR_ARG (exp, 2);
14381 op0 = expand_normal (arg0);
14382 op1 = expand_normal (arg1);
14383 op2 = expand_normal (arg2);
14384 mode0 = insn_data[d->icode].operand[0].mode;
14385 mode1 = insn_data[d->icode].operand[1].mode;
14386
14387 /* Invalid arguments, bail out before generating bad rtl. */
14388 if (arg0 == error_mark_node
14389 || arg1 == error_mark_node
14390 || arg2 == error_mark_node)
14391 return const0_rtx;
14392
14393 *expandedp = true;
14394 STRIP_NOPS (arg2);
14395 if (TREE_CODE (arg2) != INTEGER_CST
14396 || TREE_INT_CST_LOW (arg2) & ~0x3)
14397 {
14398 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
14399 return const0_rtx;
14400 }
14401
14402 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14403 op0 = copy_to_mode_reg (Pmode, op0);
14404 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14405 op1 = copy_to_mode_reg (mode1, op1);
14406
14407 pat = GEN_FCN (d->icode) (op0, op1, op2);
14408 if (pat != 0)
14409 emit_insn (pat);
14410
14411 return NULL_RTX;
14412 }
14413
14414 return NULL_RTX;
14415 }
14416
14417 /* Expand vec_init builtin. */
14418 static rtx
14419 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
14420 {
14421 machine_mode tmode = TYPE_MODE (type);
14422 machine_mode inner_mode = GET_MODE_INNER (tmode);
14423 int i, n_elt = GET_MODE_NUNITS (tmode);
14424
14425 gcc_assert (VECTOR_MODE_P (tmode));
14426 gcc_assert (n_elt == call_expr_nargs (exp));
14427
14428 if (!target || !register_operand (target, tmode))
14429 target = gen_reg_rtx (tmode);
14430
14431 /* If we have a vector compromised of a single element, such as V1TImode, do
14432 the initialization directly. */
14433 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
14434 {
14435 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
14436 emit_move_insn (target, gen_lowpart (tmode, x));
14437 }
14438 else
14439 {
14440 rtvec v = rtvec_alloc (n_elt);
14441
14442 for (i = 0; i < n_elt; ++i)
14443 {
14444 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
14445 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
14446 }
14447
14448 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
14449 }
14450
14451 return target;
14452 }
14453
14454 /* Return the integer constant in ARG. Constrain it to be in the range
14455 of the subparts of VEC_TYPE; issue an error if not. */
14456
14457 static int
14458 get_element_number (tree vec_type, tree arg)
14459 {
14460 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
14461
14462 if (!tree_fits_uhwi_p (arg)
14463 || (elt = tree_to_uhwi (arg), elt > max))
14464 {
14465 error ("selector must be an integer constant in the range 0..%wi", max);
14466 return 0;
14467 }
14468
14469 return elt;
14470 }
14471
14472 /* Expand vec_set builtin. */
14473 static rtx
14474 altivec_expand_vec_set_builtin (tree exp)
14475 {
14476 machine_mode tmode, mode1;
14477 tree arg0, arg1, arg2;
14478 int elt;
14479 rtx op0, op1;
14480
14481 arg0 = CALL_EXPR_ARG (exp, 0);
14482 arg1 = CALL_EXPR_ARG (exp, 1);
14483 arg2 = CALL_EXPR_ARG (exp, 2);
14484
14485 tmode = TYPE_MODE (TREE_TYPE (arg0));
14486 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14487 gcc_assert (VECTOR_MODE_P (tmode));
14488
14489 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
14490 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
14491 elt = get_element_number (TREE_TYPE (arg0), arg2);
14492
14493 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
14494 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
14495
14496 op0 = force_reg (tmode, op0);
14497 op1 = force_reg (mode1, op1);
14498
14499 rs6000_expand_vector_set (op0, op1, elt);
14500
14501 return op0;
14502 }
14503
14504 /* Expand vec_ext builtin. */
14505 static rtx
14506 altivec_expand_vec_ext_builtin (tree exp, rtx target)
14507 {
14508 machine_mode tmode, mode0;
14509 tree arg0, arg1;
14510 rtx op0;
14511 rtx op1;
14512
14513 arg0 = CALL_EXPR_ARG (exp, 0);
14514 arg1 = CALL_EXPR_ARG (exp, 1);
14515
14516 op0 = expand_normal (arg0);
14517 op1 = expand_normal (arg1);
14518
14519 /* Call get_element_number to validate arg1 if it is a constant. */
14520 if (TREE_CODE (arg1) == INTEGER_CST)
14521 (void) get_element_number (TREE_TYPE (arg0), arg1);
14522
14523 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14524 mode0 = TYPE_MODE (TREE_TYPE (arg0));
14525 gcc_assert (VECTOR_MODE_P (mode0));
14526
14527 op0 = force_reg (mode0, op0);
14528
14529 if (optimize || !target || !register_operand (target, tmode))
14530 target = gen_reg_rtx (tmode);
14531
14532 rs6000_expand_vector_extract (target, op0, op1);
14533
14534 return target;
14535 }
14536
14537 /* Expand the builtin in EXP and store the result in TARGET. Store
14538 true in *EXPANDEDP if we found a builtin to expand. */
14539 static rtx
14540 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
14541 {
14542 const struct builtin_description *d;
14543 size_t i;
14544 enum insn_code icode;
14545 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14546 tree arg0, arg1, arg2;
14547 rtx op0, pat;
14548 machine_mode tmode, mode0;
14549 enum rs6000_builtins fcode
14550 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14551
14552 if (rs6000_overloaded_builtin_p (fcode))
14553 {
14554 *expandedp = true;
14555 error ("unresolved overload for Altivec builtin %qF", fndecl);
14556
14557 /* Given it is invalid, just generate a normal call. */
14558 return expand_call (exp, target, false);
14559 }
14560
14561 target = altivec_expand_dst_builtin (exp, target, expandedp);
14562 if (*expandedp)
14563 return target;
14564
14565 *expandedp = true;
14566
14567 switch (fcode)
14568 {
14569 case ALTIVEC_BUILTIN_STVX_V2DF:
14570 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df, exp);
14571 case ALTIVEC_BUILTIN_STVX_V2DI:
14572 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di, exp);
14573 case ALTIVEC_BUILTIN_STVX_V4SF:
14574 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf, exp);
14575 case ALTIVEC_BUILTIN_STVX:
14576 case ALTIVEC_BUILTIN_STVX_V4SI:
14577 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si, exp);
14578 case ALTIVEC_BUILTIN_STVX_V8HI:
14579 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi, exp);
14580 case ALTIVEC_BUILTIN_STVX_V16QI:
14581 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi, exp);
14582 case ALTIVEC_BUILTIN_STVEBX:
14583 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
14584 case ALTIVEC_BUILTIN_STVEHX:
14585 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
14586 case ALTIVEC_BUILTIN_STVEWX:
14587 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
14588 case ALTIVEC_BUILTIN_STVXL_V2DF:
14589 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
14590 case ALTIVEC_BUILTIN_STVXL_V2DI:
14591 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
14592 case ALTIVEC_BUILTIN_STVXL_V4SF:
14593 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
14594 case ALTIVEC_BUILTIN_STVXL:
14595 case ALTIVEC_BUILTIN_STVXL_V4SI:
14596 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
14597 case ALTIVEC_BUILTIN_STVXL_V8HI:
14598 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
14599 case ALTIVEC_BUILTIN_STVXL_V16QI:
14600 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
14601
14602 case ALTIVEC_BUILTIN_STVLX:
14603 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
14604 case ALTIVEC_BUILTIN_STVLXL:
14605 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
14606 case ALTIVEC_BUILTIN_STVRX:
14607 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
14608 case ALTIVEC_BUILTIN_STVRXL:
14609 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
14610
14611 case P9V_BUILTIN_STXVL:
14612 return altivec_expand_stxvl_builtin (CODE_FOR_stxvl, exp);
14613
14614 case P9V_BUILTIN_XST_LEN_R:
14615 return altivec_expand_stxvl_builtin (CODE_FOR_xst_len_r, exp);
14616
14617 case VSX_BUILTIN_STXVD2X_V1TI:
14618 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
14619 case VSX_BUILTIN_STXVD2X_V2DF:
14620 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
14621 case VSX_BUILTIN_STXVD2X_V2DI:
14622 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
14623 case VSX_BUILTIN_STXVW4X_V4SF:
14624 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
14625 case VSX_BUILTIN_STXVW4X_V4SI:
14626 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
14627 case VSX_BUILTIN_STXVW4X_V8HI:
14628 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
14629 case VSX_BUILTIN_STXVW4X_V16QI:
14630 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
14631
14632 /* For the following on big endian, it's ok to use any appropriate
14633 unaligned-supporting store, so use a generic expander. For
14634 little-endian, the exact element-reversing instruction must
14635 be used. */
14636 case VSX_BUILTIN_ST_ELEMREV_V1TI:
14637 {
14638 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v1ti
14639 : CODE_FOR_vsx_st_elemrev_v1ti);
14640 return altivec_expand_stv_builtin (code, exp);
14641 }
14642 case VSX_BUILTIN_ST_ELEMREV_V2DF:
14643 {
14644 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df
14645 : CODE_FOR_vsx_st_elemrev_v2df);
14646 return altivec_expand_stv_builtin (code, exp);
14647 }
14648 case VSX_BUILTIN_ST_ELEMREV_V2DI:
14649 {
14650 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di
14651 : CODE_FOR_vsx_st_elemrev_v2di);
14652 return altivec_expand_stv_builtin (code, exp);
14653 }
14654 case VSX_BUILTIN_ST_ELEMREV_V4SF:
14655 {
14656 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf
14657 : CODE_FOR_vsx_st_elemrev_v4sf);
14658 return altivec_expand_stv_builtin (code, exp);
14659 }
14660 case VSX_BUILTIN_ST_ELEMREV_V4SI:
14661 {
14662 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si
14663 : CODE_FOR_vsx_st_elemrev_v4si);
14664 return altivec_expand_stv_builtin (code, exp);
14665 }
14666 case VSX_BUILTIN_ST_ELEMREV_V8HI:
14667 {
14668 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi
14669 : CODE_FOR_vsx_st_elemrev_v8hi);
14670 return altivec_expand_stv_builtin (code, exp);
14671 }
14672 case VSX_BUILTIN_ST_ELEMREV_V16QI:
14673 {
14674 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi
14675 : CODE_FOR_vsx_st_elemrev_v16qi);
14676 return altivec_expand_stv_builtin (code, exp);
14677 }
14678
14679 case ALTIVEC_BUILTIN_MFVSCR:
14680 icode = CODE_FOR_altivec_mfvscr;
14681 tmode = insn_data[icode].operand[0].mode;
14682
14683 if (target == 0
14684 || GET_MODE (target) != tmode
14685 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14686 target = gen_reg_rtx (tmode);
14687
14688 pat = GEN_FCN (icode) (target);
14689 if (! pat)
14690 return 0;
14691 emit_insn (pat);
14692 return target;
14693
14694 case ALTIVEC_BUILTIN_MTVSCR:
14695 icode = CODE_FOR_altivec_mtvscr;
14696 arg0 = CALL_EXPR_ARG (exp, 0);
14697 op0 = expand_normal (arg0);
14698 mode0 = insn_data[icode].operand[0].mode;
14699
14700 /* If we got invalid arguments bail out before generating bad rtl. */
14701 if (arg0 == error_mark_node)
14702 return const0_rtx;
14703
14704 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14705 op0 = copy_to_mode_reg (mode0, op0);
14706
14707 pat = GEN_FCN (icode) (op0);
14708 if (pat)
14709 emit_insn (pat);
14710 return NULL_RTX;
14711
14712 case ALTIVEC_BUILTIN_DSSALL:
14713 emit_insn (gen_altivec_dssall ());
14714 return NULL_RTX;
14715
14716 case ALTIVEC_BUILTIN_DSS:
14717 icode = CODE_FOR_altivec_dss;
14718 arg0 = CALL_EXPR_ARG (exp, 0);
14719 STRIP_NOPS (arg0);
14720 op0 = expand_normal (arg0);
14721 mode0 = insn_data[icode].operand[0].mode;
14722
14723 /* If we got invalid arguments bail out before generating bad rtl. */
14724 if (arg0 == error_mark_node)
14725 return const0_rtx;
14726
14727 if (TREE_CODE (arg0) != INTEGER_CST
14728 || TREE_INT_CST_LOW (arg0) & ~0x3)
14729 {
14730 error ("argument to %qs must be a 2-bit unsigned literal", "dss");
14731 return const0_rtx;
14732 }
14733
14734 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14735 op0 = copy_to_mode_reg (mode0, op0);
14736
14737 emit_insn (gen_altivec_dss (op0));
14738 return NULL_RTX;
14739
14740 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
14741 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
14742 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
14743 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
14744 case VSX_BUILTIN_VEC_INIT_V2DF:
14745 case VSX_BUILTIN_VEC_INIT_V2DI:
14746 case VSX_BUILTIN_VEC_INIT_V1TI:
14747 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
14748
14749 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
14750 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
14751 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
14752 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
14753 case VSX_BUILTIN_VEC_SET_V2DF:
14754 case VSX_BUILTIN_VEC_SET_V2DI:
14755 case VSX_BUILTIN_VEC_SET_V1TI:
14756 return altivec_expand_vec_set_builtin (exp);
14757
14758 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
14759 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
14760 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
14761 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
14762 case VSX_BUILTIN_VEC_EXT_V2DF:
14763 case VSX_BUILTIN_VEC_EXT_V2DI:
14764 case VSX_BUILTIN_VEC_EXT_V1TI:
14765 return altivec_expand_vec_ext_builtin (exp, target);
14766
14767 case P9V_BUILTIN_VEC_EXTRACT4B:
14768 arg1 = CALL_EXPR_ARG (exp, 1);
14769 STRIP_NOPS (arg1);
14770
14771 /* Generate a normal call if it is invalid. */
14772 if (arg1 == error_mark_node)
14773 return expand_call (exp, target, false);
14774
14775 if (TREE_CODE (arg1) != INTEGER_CST || TREE_INT_CST_LOW (arg1) > 12)
14776 {
14777 error ("second argument to %qs must be 0..12", "vec_vextract4b");
14778 return expand_call (exp, target, false);
14779 }
14780 break;
14781
14782 case P9V_BUILTIN_VEC_INSERT4B:
14783 arg2 = CALL_EXPR_ARG (exp, 2);
14784 STRIP_NOPS (arg2);
14785
14786 /* Generate a normal call if it is invalid. */
14787 if (arg2 == error_mark_node)
14788 return expand_call (exp, target, false);
14789
14790 if (TREE_CODE (arg2) != INTEGER_CST || TREE_INT_CST_LOW (arg2) > 12)
14791 {
14792 error ("third argument to %qs must be 0..12", "vec_vinsert4b");
14793 return expand_call (exp, target, false);
14794 }
14795 break;
14796
14797 default:
14798 break;
14799 /* Fall through. */
14800 }
14801
14802 /* Expand abs* operations. */
14803 d = bdesc_abs;
14804 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
14805 if (d->code == fcode)
14806 return altivec_expand_abs_builtin (d->icode, exp, target);
14807
14808 /* Expand the AltiVec predicates. */
14809 d = bdesc_altivec_preds;
14810 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
14811 if (d->code == fcode)
14812 return altivec_expand_predicate_builtin (d->icode, exp, target);
14813
14814 /* LV* are funky. We initialized them differently. */
14815 switch (fcode)
14816 {
14817 case ALTIVEC_BUILTIN_LVSL:
14818 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
14819 exp, target, false);
14820 case ALTIVEC_BUILTIN_LVSR:
14821 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
14822 exp, target, false);
14823 case ALTIVEC_BUILTIN_LVEBX:
14824 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
14825 exp, target, false);
14826 case ALTIVEC_BUILTIN_LVEHX:
14827 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
14828 exp, target, false);
14829 case ALTIVEC_BUILTIN_LVEWX:
14830 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
14831 exp, target, false);
14832 case ALTIVEC_BUILTIN_LVXL_V2DF:
14833 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
14834 exp, target, false);
14835 case ALTIVEC_BUILTIN_LVXL_V2DI:
14836 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
14837 exp, target, false);
14838 case ALTIVEC_BUILTIN_LVXL_V4SF:
14839 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
14840 exp, target, false);
14841 case ALTIVEC_BUILTIN_LVXL:
14842 case ALTIVEC_BUILTIN_LVXL_V4SI:
14843 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
14844 exp, target, false);
14845 case ALTIVEC_BUILTIN_LVXL_V8HI:
14846 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
14847 exp, target, false);
14848 case ALTIVEC_BUILTIN_LVXL_V16QI:
14849 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
14850 exp, target, false);
14851 case ALTIVEC_BUILTIN_LVX_V1TI:
14852 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v1ti,
14853 exp, target, false);
14854 case ALTIVEC_BUILTIN_LVX_V2DF:
14855 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df,
14856 exp, target, false);
14857 case ALTIVEC_BUILTIN_LVX_V2DI:
14858 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di,
14859 exp, target, false);
14860 case ALTIVEC_BUILTIN_LVX_V4SF:
14861 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf,
14862 exp, target, false);
14863 case ALTIVEC_BUILTIN_LVX:
14864 case ALTIVEC_BUILTIN_LVX_V4SI:
14865 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si,
14866 exp, target, false);
14867 case ALTIVEC_BUILTIN_LVX_V8HI:
14868 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi,
14869 exp, target, false);
14870 case ALTIVEC_BUILTIN_LVX_V16QI:
14871 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi,
14872 exp, target, false);
14873 case ALTIVEC_BUILTIN_LVLX:
14874 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
14875 exp, target, true);
14876 case ALTIVEC_BUILTIN_LVLXL:
14877 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
14878 exp, target, true);
14879 case ALTIVEC_BUILTIN_LVRX:
14880 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
14881 exp, target, true);
14882 case ALTIVEC_BUILTIN_LVRXL:
14883 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
14884 exp, target, true);
14885 case VSX_BUILTIN_LXVD2X_V1TI:
14886 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
14887 exp, target, false);
14888 case VSX_BUILTIN_LXVD2X_V2DF:
14889 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
14890 exp, target, false);
14891 case VSX_BUILTIN_LXVD2X_V2DI:
14892 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
14893 exp, target, false);
14894 case VSX_BUILTIN_LXVW4X_V4SF:
14895 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
14896 exp, target, false);
14897 case VSX_BUILTIN_LXVW4X_V4SI:
14898 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
14899 exp, target, false);
14900 case VSX_BUILTIN_LXVW4X_V8HI:
14901 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
14902 exp, target, false);
14903 case VSX_BUILTIN_LXVW4X_V16QI:
14904 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
14905 exp, target, false);
14906 /* For the following on big endian, it's ok to use any appropriate
14907 unaligned-supporting load, so use a generic expander. For
14908 little-endian, the exact element-reversing instruction must
14909 be used. */
14910 case VSX_BUILTIN_LD_ELEMREV_V2DF:
14911 {
14912 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df
14913 : CODE_FOR_vsx_ld_elemrev_v2df);
14914 return altivec_expand_lv_builtin (code, exp, target, false);
14915 }
14916 case VSX_BUILTIN_LD_ELEMREV_V1TI:
14917 {
14918 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v1ti
14919 : CODE_FOR_vsx_ld_elemrev_v1ti);
14920 return altivec_expand_lv_builtin (code, exp, target, false);
14921 }
14922 case VSX_BUILTIN_LD_ELEMREV_V2DI:
14923 {
14924 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di
14925 : CODE_FOR_vsx_ld_elemrev_v2di);
14926 return altivec_expand_lv_builtin (code, exp, target, false);
14927 }
14928 case VSX_BUILTIN_LD_ELEMREV_V4SF:
14929 {
14930 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf
14931 : CODE_FOR_vsx_ld_elemrev_v4sf);
14932 return altivec_expand_lv_builtin (code, exp, target, false);
14933 }
14934 case VSX_BUILTIN_LD_ELEMREV_V4SI:
14935 {
14936 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si
14937 : CODE_FOR_vsx_ld_elemrev_v4si);
14938 return altivec_expand_lv_builtin (code, exp, target, false);
14939 }
14940 case VSX_BUILTIN_LD_ELEMREV_V8HI:
14941 {
14942 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi
14943 : CODE_FOR_vsx_ld_elemrev_v8hi);
14944 return altivec_expand_lv_builtin (code, exp, target, false);
14945 }
14946 case VSX_BUILTIN_LD_ELEMREV_V16QI:
14947 {
14948 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi
14949 : CODE_FOR_vsx_ld_elemrev_v16qi);
14950 return altivec_expand_lv_builtin (code, exp, target, false);
14951 }
14952 break;
14953 default:
14954 break;
14955 /* Fall through. */
14956 }
14957
14958 *expandedp = false;
14959 return NULL_RTX;
14960 }
14961
14962 /* Check whether a builtin function is supported in this target
14963 configuration. */
14964 bool
14965 rs6000_builtin_is_supported_p (enum rs6000_builtins fncode)
14966 {
14967 HOST_WIDE_INT fnmask = rs6000_builtin_info[fncode].mask;
14968 if ((fnmask & rs6000_builtin_mask) != fnmask)
14969 return false;
14970 else
14971 return true;
14972 }
14973
14974 /* Raise an error message for a builtin function that is called without the
14975 appropriate target options being set. */
14976
14977 static void
14978 rs6000_invalid_builtin (enum rs6000_builtins fncode)
14979 {
14980 size_t uns_fncode = (size_t) fncode;
14981 const char *name = rs6000_builtin_info[uns_fncode].name;
14982 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
14983
14984 gcc_assert (name != NULL);
14985 if ((fnmask & RS6000_BTM_CELL) != 0)
14986 error ("builtin function %qs is only valid for the cell processor", name);
14987 else if ((fnmask & RS6000_BTM_VSX) != 0)
14988 error ("builtin function %qs requires the %qs option", name, "-mvsx");
14989 else if ((fnmask & RS6000_BTM_HTM) != 0)
14990 error ("builtin function %qs requires the %qs option", name, "-mhtm");
14991 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
14992 error ("builtin function %qs requires the %qs option", name, "-maltivec");
14993 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
14994 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
14995 error ("builtin function %qs requires the %qs and %qs options",
14996 name, "-mhard-dfp", "-mpower8-vector");
14997 else if ((fnmask & RS6000_BTM_DFP) != 0)
14998 error ("builtin function %qs requires the %qs option", name, "-mhard-dfp");
14999 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
15000 error ("builtin function %qs requires the %qs option", name,
15001 "-mpower8-vector");
15002 else if ((fnmask & (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
15003 == (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
15004 error ("builtin function %qs requires the %qs and %qs options",
15005 name, "-mcpu=power9", "-m64");
15006 else if ((fnmask & RS6000_BTM_P9_VECTOR) != 0)
15007 error ("builtin function %qs requires the %qs option", name,
15008 "-mcpu=power9");
15009 else if ((fnmask & (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
15010 == (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
15011 error ("builtin function %qs requires the %qs and %qs options",
15012 name, "-mcpu=power9", "-m64");
15013 else if ((fnmask & RS6000_BTM_P9_MISC) == RS6000_BTM_P9_MISC)
15014 error ("builtin function %qs requires the %qs option", name,
15015 "-mcpu=power9");
15016 else if ((fnmask & RS6000_BTM_LDBL128) == RS6000_BTM_LDBL128)
15017 {
15018 if (!TARGET_HARD_FLOAT)
15019 error ("builtin function %qs requires the %qs option", name,
15020 "-mhard-float");
15021 else
15022 error ("builtin function %qs requires the %qs option", name,
15023 TARGET_IEEEQUAD ? "-mabi=ibmlongdouble" : "-mlong-double-128");
15024 }
15025 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
15026 error ("builtin function %qs requires the %qs option", name,
15027 "-mhard-float");
15028 else if ((fnmask & RS6000_BTM_FLOAT128_HW) != 0)
15029 error ("builtin function %qs requires ISA 3.0 IEEE 128-bit floating point",
15030 name);
15031 else if ((fnmask & RS6000_BTM_FLOAT128) != 0)
15032 error ("builtin function %qs requires the %qs option", name, "-mfloat128");
15033 else if ((fnmask & (RS6000_BTM_POPCNTD | RS6000_BTM_POWERPC64))
15034 == (RS6000_BTM_POPCNTD | RS6000_BTM_POWERPC64))
15035 error ("builtin function %qs requires the %qs (or newer), and "
15036 "%qs or %qs options",
15037 name, "-mcpu=power7", "-m64", "-mpowerpc64");
15038 else
15039 error ("builtin function %qs is not supported with the current options",
15040 name);
15041 }
15042
15043 /* Target hook for early folding of built-ins, shamelessly stolen
15044 from ia64.c. */
15045
15046 static tree
15047 rs6000_fold_builtin (tree fndecl ATTRIBUTE_UNUSED,
15048 int n_args ATTRIBUTE_UNUSED,
15049 tree *args ATTRIBUTE_UNUSED,
15050 bool ignore ATTRIBUTE_UNUSED)
15051 {
15052 #ifdef SUBTARGET_FOLD_BUILTIN
15053 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
15054 #else
15055 return NULL_TREE;
15056 #endif
15057 }
15058
15059 /* Helper function to sort out which built-ins may be valid without having
15060 a LHS. */
15061 static bool
15062 rs6000_builtin_valid_without_lhs (enum rs6000_builtins fn_code)
15063 {
15064 switch (fn_code)
15065 {
15066 case ALTIVEC_BUILTIN_STVX_V16QI:
15067 case ALTIVEC_BUILTIN_STVX_V8HI:
15068 case ALTIVEC_BUILTIN_STVX_V4SI:
15069 case ALTIVEC_BUILTIN_STVX_V4SF:
15070 case ALTIVEC_BUILTIN_STVX_V2DI:
15071 case ALTIVEC_BUILTIN_STVX_V2DF:
15072 return true;
15073 default:
15074 return false;
15075 }
15076 }
15077
15078 /* Helper function to handle the gimple folding of a vector compare
15079 operation. This sets up true/false vectors, and uses the
15080 VEC_COND_EXPR operation.
15081 CODE indicates which comparison is to be made. (EQ, GT, ...).
15082 TYPE indicates the type of the result. */
15083 static tree
15084 fold_build_vec_cmp (tree_code code, tree type,
15085 tree arg0, tree arg1)
15086 {
15087 tree cmp_type = build_same_sized_truth_vector_type (type);
15088 tree zero_vec = build_zero_cst (type);
15089 tree minus_one_vec = build_minus_one_cst (type);
15090 tree cmp = fold_build2 (code, cmp_type, arg0, arg1);
15091 return fold_build3 (VEC_COND_EXPR, type, cmp, minus_one_vec, zero_vec);
15092 }
15093
15094 /* Helper function to handle the in-between steps for the
15095 vector compare built-ins. */
15096 static void
15097 fold_compare_helper (gimple_stmt_iterator *gsi, tree_code code, gimple *stmt)
15098 {
15099 tree arg0 = gimple_call_arg (stmt, 0);
15100 tree arg1 = gimple_call_arg (stmt, 1);
15101 tree lhs = gimple_call_lhs (stmt);
15102 tree cmp = fold_build_vec_cmp (code, TREE_TYPE (lhs), arg0, arg1);
15103 gimple *g = gimple_build_assign (lhs, cmp);
15104 gimple_set_location (g, gimple_location (stmt));
15105 gsi_replace (gsi, g, true);
15106 }
15107
15108 /* Helper function to handle the vector merge[hl] built-ins. The
15109 implementation difference between h and l versions for this code are in
15110 the values used when building of the permute vector for high word versus
15111 low word merge. The variance is keyed off the use_high parameter. */
15112 static void
15113 fold_mergehl_helper (gimple_stmt_iterator *gsi, gimple *stmt, int use_high)
15114 {
15115 tree arg0 = gimple_call_arg (stmt, 0);
15116 tree arg1 = gimple_call_arg (stmt, 1);
15117 tree lhs = gimple_call_lhs (stmt);
15118 tree lhs_type = TREE_TYPE (lhs);
15119 tree lhs_type_type = TREE_TYPE (lhs_type);
15120 int n_elts = TYPE_VECTOR_SUBPARTS (lhs_type);
15121 int midpoint = n_elts / 2;
15122 int offset = 0;
15123
15124 if (use_high == 1)
15125 offset = midpoint;
15126
15127 tree_vector_builder elts (lhs_type, VECTOR_CST_NELTS (arg0), 1);
15128
15129 for (int i = 0; i < midpoint; i++)
15130 {
15131 elts.safe_push (build_int_cst (lhs_type_type, offset + i));
15132 elts.safe_push (build_int_cst (lhs_type_type, offset + n_elts + i));
15133 }
15134
15135 tree permute = elts.build ();
15136
15137 gimple *g = gimple_build_assign (lhs, VEC_PERM_EXPR, arg0, arg1, permute);
15138 gimple_set_location (g, gimple_location (stmt));
15139 gsi_replace (gsi, g, true);
15140 }
15141
15142 /* Fold a machine-dependent built-in in GIMPLE. (For folding into
15143 a constant, use rs6000_fold_builtin.) */
15144
15145 bool
15146 rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
15147 {
15148 gimple *stmt = gsi_stmt (*gsi);
15149 tree fndecl = gimple_call_fndecl (stmt);
15150 gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD);
15151 enum rs6000_builtins fn_code
15152 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15153 tree arg0, arg1, lhs, temp;
15154 gimple *g;
15155
15156 size_t uns_fncode = (size_t) fn_code;
15157 enum insn_code icode = rs6000_builtin_info[uns_fncode].icode;
15158 const char *fn_name1 = rs6000_builtin_info[uns_fncode].name;
15159 const char *fn_name2 = (icode != CODE_FOR_nothing)
15160 ? get_insn_name ((int) icode)
15161 : "nothing";
15162
15163 if (TARGET_DEBUG_BUILTIN)
15164 fprintf (stderr, "rs6000_gimple_fold_builtin %d %s %s\n",
15165 fn_code, fn_name1, fn_name2);
15166
15167 if (!rs6000_fold_gimple)
15168 return false;
15169
15170 /* Prevent gimple folding for code that does not have a LHS, unless it is
15171 allowed per the rs6000_builtin_valid_without_lhs helper function. */
15172 if (!gimple_call_lhs (stmt) && !rs6000_builtin_valid_without_lhs (fn_code))
15173 return false;
15174
15175 /* Don't fold invalid builtins, let rs6000_expand_builtin diagnose it. */
15176 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fncode].mask;
15177 bool func_valid_p = (rs6000_builtin_mask & mask) == mask;
15178 if (!func_valid_p)
15179 return false;
15180
15181 switch (fn_code)
15182 {
15183 /* Flavors of vec_add. We deliberately don't expand
15184 P8V_BUILTIN_VADDUQM as it gets lowered from V1TImode to
15185 TImode, resulting in much poorer code generation. */
15186 case ALTIVEC_BUILTIN_VADDUBM:
15187 case ALTIVEC_BUILTIN_VADDUHM:
15188 case ALTIVEC_BUILTIN_VADDUWM:
15189 case P8V_BUILTIN_VADDUDM:
15190 case ALTIVEC_BUILTIN_VADDFP:
15191 case VSX_BUILTIN_XVADDDP:
15192 arg0 = gimple_call_arg (stmt, 0);
15193 arg1 = gimple_call_arg (stmt, 1);
15194 lhs = gimple_call_lhs (stmt);
15195 g = gimple_build_assign (lhs, PLUS_EXPR, arg0, arg1);
15196 gimple_set_location (g, gimple_location (stmt));
15197 gsi_replace (gsi, g, true);
15198 return true;
15199 /* Flavors of vec_sub. We deliberately don't expand
15200 P8V_BUILTIN_VSUBUQM. */
15201 case ALTIVEC_BUILTIN_VSUBUBM:
15202 case ALTIVEC_BUILTIN_VSUBUHM:
15203 case ALTIVEC_BUILTIN_VSUBUWM:
15204 case P8V_BUILTIN_VSUBUDM:
15205 case ALTIVEC_BUILTIN_VSUBFP:
15206 case VSX_BUILTIN_XVSUBDP:
15207 arg0 = gimple_call_arg (stmt, 0);
15208 arg1 = gimple_call_arg (stmt, 1);
15209 lhs = gimple_call_lhs (stmt);
15210 g = gimple_build_assign (lhs, MINUS_EXPR, arg0, arg1);
15211 gimple_set_location (g, gimple_location (stmt));
15212 gsi_replace (gsi, g, true);
15213 return true;
15214 case VSX_BUILTIN_XVMULSP:
15215 case VSX_BUILTIN_XVMULDP:
15216 arg0 = gimple_call_arg (stmt, 0);
15217 arg1 = gimple_call_arg (stmt, 1);
15218 lhs = gimple_call_lhs (stmt);
15219 g = gimple_build_assign (lhs, MULT_EXPR, arg0, arg1);
15220 gimple_set_location (g, gimple_location (stmt));
15221 gsi_replace (gsi, g, true);
15222 return true;
15223 /* Even element flavors of vec_mul (signed). */
15224 case ALTIVEC_BUILTIN_VMULESB:
15225 case ALTIVEC_BUILTIN_VMULESH:
15226 case P8V_BUILTIN_VMULESW:
15227 /* Even element flavors of vec_mul (unsigned). */
15228 case ALTIVEC_BUILTIN_VMULEUB:
15229 case ALTIVEC_BUILTIN_VMULEUH:
15230 case P8V_BUILTIN_VMULEUW:
15231 arg0 = gimple_call_arg (stmt, 0);
15232 arg1 = gimple_call_arg (stmt, 1);
15233 lhs = gimple_call_lhs (stmt);
15234 g = gimple_build_assign (lhs, VEC_WIDEN_MULT_EVEN_EXPR, arg0, arg1);
15235 gimple_set_location (g, gimple_location (stmt));
15236 gsi_replace (gsi, g, true);
15237 return true;
15238 /* Odd element flavors of vec_mul (signed). */
15239 case ALTIVEC_BUILTIN_VMULOSB:
15240 case ALTIVEC_BUILTIN_VMULOSH:
15241 case P8V_BUILTIN_VMULOSW:
15242 /* Odd element flavors of vec_mul (unsigned). */
15243 case ALTIVEC_BUILTIN_VMULOUB:
15244 case ALTIVEC_BUILTIN_VMULOUH:
15245 case P8V_BUILTIN_VMULOUW:
15246 arg0 = gimple_call_arg (stmt, 0);
15247 arg1 = gimple_call_arg (stmt, 1);
15248 lhs = gimple_call_lhs (stmt);
15249 g = gimple_build_assign (lhs, VEC_WIDEN_MULT_ODD_EXPR, arg0, arg1);
15250 gimple_set_location (g, gimple_location (stmt));
15251 gsi_replace (gsi, g, true);
15252 return true;
15253 /* Flavors of vec_div (Integer). */
15254 case VSX_BUILTIN_DIV_V2DI:
15255 case VSX_BUILTIN_UDIV_V2DI:
15256 arg0 = gimple_call_arg (stmt, 0);
15257 arg1 = gimple_call_arg (stmt, 1);
15258 lhs = gimple_call_lhs (stmt);
15259 g = gimple_build_assign (lhs, TRUNC_DIV_EXPR, arg0, arg1);
15260 gimple_set_location (g, gimple_location (stmt));
15261 gsi_replace (gsi, g, true);
15262 return true;
15263 /* Flavors of vec_div (Float). */
15264 case VSX_BUILTIN_XVDIVSP:
15265 case VSX_BUILTIN_XVDIVDP:
15266 arg0 = gimple_call_arg (stmt, 0);
15267 arg1 = gimple_call_arg (stmt, 1);
15268 lhs = gimple_call_lhs (stmt);
15269 g = gimple_build_assign (lhs, RDIV_EXPR, arg0, arg1);
15270 gimple_set_location (g, gimple_location (stmt));
15271 gsi_replace (gsi, g, true);
15272 return true;
15273 /* Flavors of vec_and. */
15274 case ALTIVEC_BUILTIN_VAND:
15275 arg0 = gimple_call_arg (stmt, 0);
15276 arg1 = gimple_call_arg (stmt, 1);
15277 lhs = gimple_call_lhs (stmt);
15278 g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, arg1);
15279 gimple_set_location (g, gimple_location (stmt));
15280 gsi_replace (gsi, g, true);
15281 return true;
15282 /* Flavors of vec_andc. */
15283 case ALTIVEC_BUILTIN_VANDC:
15284 arg0 = gimple_call_arg (stmt, 0);
15285 arg1 = gimple_call_arg (stmt, 1);
15286 lhs = gimple_call_lhs (stmt);
15287 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15288 g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1);
15289 gimple_set_location (g, gimple_location (stmt));
15290 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15291 g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, temp);
15292 gimple_set_location (g, gimple_location (stmt));
15293 gsi_replace (gsi, g, true);
15294 return true;
15295 /* Flavors of vec_nand. */
15296 case P8V_BUILTIN_VEC_NAND:
15297 case P8V_BUILTIN_NAND_V16QI:
15298 case P8V_BUILTIN_NAND_V8HI:
15299 case P8V_BUILTIN_NAND_V4SI:
15300 case P8V_BUILTIN_NAND_V4SF:
15301 case P8V_BUILTIN_NAND_V2DF:
15302 case P8V_BUILTIN_NAND_V2DI:
15303 arg0 = gimple_call_arg (stmt, 0);
15304 arg1 = gimple_call_arg (stmt, 1);
15305 lhs = gimple_call_lhs (stmt);
15306 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15307 g = gimple_build_assign (temp, BIT_AND_EXPR, arg0, arg1);
15308 gimple_set_location (g, gimple_location (stmt));
15309 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15310 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
15311 gimple_set_location (g, gimple_location (stmt));
15312 gsi_replace (gsi, g, true);
15313 return true;
15314 /* Flavors of vec_or. */
15315 case ALTIVEC_BUILTIN_VOR:
15316 arg0 = gimple_call_arg (stmt, 0);
15317 arg1 = gimple_call_arg (stmt, 1);
15318 lhs = gimple_call_lhs (stmt);
15319 g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, arg1);
15320 gimple_set_location (g, gimple_location (stmt));
15321 gsi_replace (gsi, g, true);
15322 return true;
15323 /* flavors of vec_orc. */
15324 case P8V_BUILTIN_ORC_V16QI:
15325 case P8V_BUILTIN_ORC_V8HI:
15326 case P8V_BUILTIN_ORC_V4SI:
15327 case P8V_BUILTIN_ORC_V4SF:
15328 case P8V_BUILTIN_ORC_V2DF:
15329 case P8V_BUILTIN_ORC_V2DI:
15330 arg0 = gimple_call_arg (stmt, 0);
15331 arg1 = gimple_call_arg (stmt, 1);
15332 lhs = gimple_call_lhs (stmt);
15333 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15334 g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1);
15335 gimple_set_location (g, gimple_location (stmt));
15336 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15337 g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, temp);
15338 gimple_set_location (g, gimple_location (stmt));
15339 gsi_replace (gsi, g, true);
15340 return true;
15341 /* Flavors of vec_xor. */
15342 case ALTIVEC_BUILTIN_VXOR:
15343 arg0 = gimple_call_arg (stmt, 0);
15344 arg1 = gimple_call_arg (stmt, 1);
15345 lhs = gimple_call_lhs (stmt);
15346 g = gimple_build_assign (lhs, BIT_XOR_EXPR, arg0, arg1);
15347 gimple_set_location (g, gimple_location (stmt));
15348 gsi_replace (gsi, g, true);
15349 return true;
15350 /* Flavors of vec_nor. */
15351 case ALTIVEC_BUILTIN_VNOR:
15352 arg0 = gimple_call_arg (stmt, 0);
15353 arg1 = gimple_call_arg (stmt, 1);
15354 lhs = gimple_call_lhs (stmt);
15355 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15356 g = gimple_build_assign (temp, BIT_IOR_EXPR, arg0, arg1);
15357 gimple_set_location (g, gimple_location (stmt));
15358 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15359 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
15360 gimple_set_location (g, gimple_location (stmt));
15361 gsi_replace (gsi, g, true);
15362 return true;
15363 /* flavors of vec_abs. */
15364 case ALTIVEC_BUILTIN_ABS_V16QI:
15365 case ALTIVEC_BUILTIN_ABS_V8HI:
15366 case ALTIVEC_BUILTIN_ABS_V4SI:
15367 case ALTIVEC_BUILTIN_ABS_V4SF:
15368 case P8V_BUILTIN_ABS_V2DI:
15369 case VSX_BUILTIN_XVABSDP:
15370 arg0 = gimple_call_arg (stmt, 0);
15371 if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (arg0)))
15372 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (arg0))))
15373 return false;
15374 lhs = gimple_call_lhs (stmt);
15375 g = gimple_build_assign (lhs, ABS_EXPR, arg0);
15376 gimple_set_location (g, gimple_location (stmt));
15377 gsi_replace (gsi, g, true);
15378 return true;
15379 /* flavors of vec_min. */
15380 case VSX_BUILTIN_XVMINDP:
15381 case P8V_BUILTIN_VMINSD:
15382 case P8V_BUILTIN_VMINUD:
15383 case ALTIVEC_BUILTIN_VMINSB:
15384 case ALTIVEC_BUILTIN_VMINSH:
15385 case ALTIVEC_BUILTIN_VMINSW:
15386 case ALTIVEC_BUILTIN_VMINUB:
15387 case ALTIVEC_BUILTIN_VMINUH:
15388 case ALTIVEC_BUILTIN_VMINUW:
15389 case ALTIVEC_BUILTIN_VMINFP:
15390 arg0 = gimple_call_arg (stmt, 0);
15391 arg1 = gimple_call_arg (stmt, 1);
15392 lhs = gimple_call_lhs (stmt);
15393 g = gimple_build_assign (lhs, MIN_EXPR, arg0, arg1);
15394 gimple_set_location (g, gimple_location (stmt));
15395 gsi_replace (gsi, g, true);
15396 return true;
15397 /* flavors of vec_max. */
15398 case VSX_BUILTIN_XVMAXDP:
15399 case P8V_BUILTIN_VMAXSD:
15400 case P8V_BUILTIN_VMAXUD:
15401 case ALTIVEC_BUILTIN_VMAXSB:
15402 case ALTIVEC_BUILTIN_VMAXSH:
15403 case ALTIVEC_BUILTIN_VMAXSW:
15404 case ALTIVEC_BUILTIN_VMAXUB:
15405 case ALTIVEC_BUILTIN_VMAXUH:
15406 case ALTIVEC_BUILTIN_VMAXUW:
15407 case ALTIVEC_BUILTIN_VMAXFP:
15408 arg0 = gimple_call_arg (stmt, 0);
15409 arg1 = gimple_call_arg (stmt, 1);
15410 lhs = gimple_call_lhs (stmt);
15411 g = gimple_build_assign (lhs, MAX_EXPR, arg0, arg1);
15412 gimple_set_location (g, gimple_location (stmt));
15413 gsi_replace (gsi, g, true);
15414 return true;
15415 /* Flavors of vec_eqv. */
15416 case P8V_BUILTIN_EQV_V16QI:
15417 case P8V_BUILTIN_EQV_V8HI:
15418 case P8V_BUILTIN_EQV_V4SI:
15419 case P8V_BUILTIN_EQV_V4SF:
15420 case P8V_BUILTIN_EQV_V2DF:
15421 case P8V_BUILTIN_EQV_V2DI:
15422 arg0 = gimple_call_arg (stmt, 0);
15423 arg1 = gimple_call_arg (stmt, 1);
15424 lhs = gimple_call_lhs (stmt);
15425 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15426 g = gimple_build_assign (temp, BIT_XOR_EXPR, arg0, arg1);
15427 gimple_set_location (g, gimple_location (stmt));
15428 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15429 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
15430 gimple_set_location (g, gimple_location (stmt));
15431 gsi_replace (gsi, g, true);
15432 return true;
15433 /* Flavors of vec_rotate_left. */
15434 case ALTIVEC_BUILTIN_VRLB:
15435 case ALTIVEC_BUILTIN_VRLH:
15436 case ALTIVEC_BUILTIN_VRLW:
15437 case P8V_BUILTIN_VRLD:
15438 arg0 = gimple_call_arg (stmt, 0);
15439 arg1 = gimple_call_arg (stmt, 1);
15440 lhs = gimple_call_lhs (stmt);
15441 g = gimple_build_assign (lhs, LROTATE_EXPR, arg0, arg1);
15442 gimple_set_location (g, gimple_location (stmt));
15443 gsi_replace (gsi, g, true);
15444 return true;
15445 /* Flavors of vector shift right algebraic.
15446 vec_sra{b,h,w} -> vsra{b,h,w}. */
15447 case ALTIVEC_BUILTIN_VSRAB:
15448 case ALTIVEC_BUILTIN_VSRAH:
15449 case ALTIVEC_BUILTIN_VSRAW:
15450 case P8V_BUILTIN_VSRAD:
15451 arg0 = gimple_call_arg (stmt, 0);
15452 arg1 = gimple_call_arg (stmt, 1);
15453 lhs = gimple_call_lhs (stmt);
15454 g = gimple_build_assign (lhs, RSHIFT_EXPR, arg0, arg1);
15455 gimple_set_location (g, gimple_location (stmt));
15456 gsi_replace (gsi, g, true);
15457 return true;
15458 /* Flavors of vector shift left.
15459 builtin_altivec_vsl{b,h,w} -> vsl{b,h,w}. */
15460 case ALTIVEC_BUILTIN_VSLB:
15461 case ALTIVEC_BUILTIN_VSLH:
15462 case ALTIVEC_BUILTIN_VSLW:
15463 case P8V_BUILTIN_VSLD:
15464 arg0 = gimple_call_arg (stmt, 0);
15465 if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (arg0)))
15466 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (arg0))))
15467 return false;
15468 arg1 = gimple_call_arg (stmt, 1);
15469 lhs = gimple_call_lhs (stmt);
15470 g = gimple_build_assign (lhs, LSHIFT_EXPR, arg0, arg1);
15471 gimple_set_location (g, gimple_location (stmt));
15472 gsi_replace (gsi, g, true);
15473 return true;
15474 /* Flavors of vector shift right. */
15475 case ALTIVEC_BUILTIN_VSRB:
15476 case ALTIVEC_BUILTIN_VSRH:
15477 case ALTIVEC_BUILTIN_VSRW:
15478 case P8V_BUILTIN_VSRD:
15479 {
15480 arg0 = gimple_call_arg (stmt, 0);
15481 arg1 = gimple_call_arg (stmt, 1);
15482 lhs = gimple_call_lhs (stmt);
15483 gimple_seq stmts = NULL;
15484 /* Convert arg0 to unsigned. */
15485 tree arg0_unsigned
15486 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15487 unsigned_type_for (TREE_TYPE (arg0)), arg0);
15488 tree res
15489 = gimple_build (&stmts, RSHIFT_EXPR,
15490 TREE_TYPE (arg0_unsigned), arg0_unsigned, arg1);
15491 /* Convert result back to the lhs type. */
15492 res = gimple_build (&stmts, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), res);
15493 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15494 update_call_from_tree (gsi, res);
15495 return true;
15496 }
15497 /* Vector loads. */
15498 case ALTIVEC_BUILTIN_LVX_V16QI:
15499 case ALTIVEC_BUILTIN_LVX_V8HI:
15500 case ALTIVEC_BUILTIN_LVX_V4SI:
15501 case ALTIVEC_BUILTIN_LVX_V4SF:
15502 case ALTIVEC_BUILTIN_LVX_V2DI:
15503 case ALTIVEC_BUILTIN_LVX_V2DF:
15504 case ALTIVEC_BUILTIN_LVX_V1TI:
15505 {
15506 arg0 = gimple_call_arg (stmt, 0); // offset
15507 arg1 = gimple_call_arg (stmt, 1); // address
15508 lhs = gimple_call_lhs (stmt);
15509 location_t loc = gimple_location (stmt);
15510 /* Since arg1 may be cast to a different type, just use ptr_type_node
15511 here instead of trying to enforce TBAA on pointer types. */
15512 tree arg1_type = ptr_type_node;
15513 tree lhs_type = TREE_TYPE (lhs);
15514 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
15515 the tree using the value from arg0. The resulting type will match
15516 the type of arg1. */
15517 gimple_seq stmts = NULL;
15518 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
15519 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
15520 arg1_type, arg1, temp_offset);
15521 /* Mask off any lower bits from the address. */
15522 tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR,
15523 arg1_type, temp_addr,
15524 build_int_cst (arg1_type, -16));
15525 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15526 /* Use the build2 helper to set up the mem_ref. The MEM_REF could also
15527 take an offset, but since we've already incorporated the offset
15528 above, here we just pass in a zero. */
15529 gimple *g
15530 = gimple_build_assign (lhs, build2 (MEM_REF, lhs_type, aligned_addr,
15531 build_int_cst (arg1_type, 0)));
15532 gimple_set_location (g, loc);
15533 gsi_replace (gsi, g, true);
15534 return true;
15535 }
15536 /* Vector stores. */
15537 case ALTIVEC_BUILTIN_STVX_V16QI:
15538 case ALTIVEC_BUILTIN_STVX_V8HI:
15539 case ALTIVEC_BUILTIN_STVX_V4SI:
15540 case ALTIVEC_BUILTIN_STVX_V4SF:
15541 case ALTIVEC_BUILTIN_STVX_V2DI:
15542 case ALTIVEC_BUILTIN_STVX_V2DF:
15543 {
15544 arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */
15545 arg1 = gimple_call_arg (stmt, 1); /* Offset. */
15546 tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */
15547 location_t loc = gimple_location (stmt);
15548 tree arg0_type = TREE_TYPE (arg0);
15549 /* Use ptr_type_node (no TBAA) for the arg2_type.
15550 FIXME: (Richard) "A proper fix would be to transition this type as
15551 seen from the frontend to GIMPLE, for example in a similar way we
15552 do for MEM_REFs by piggy-backing that on an extra argument, a
15553 constant zero pointer of the alias pointer type to use (which would
15554 also serve as a type indicator of the store itself). I'd use a
15555 target specific internal function for this (not sure if we can have
15556 those target specific, but I guess if it's folded away then that's
15557 fine) and get away with the overload set." */
15558 tree arg2_type = ptr_type_node;
15559 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
15560 the tree using the value from arg0. The resulting type will match
15561 the type of arg2. */
15562 gimple_seq stmts = NULL;
15563 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1);
15564 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
15565 arg2_type, arg2, temp_offset);
15566 /* Mask off any lower bits from the address. */
15567 tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR,
15568 arg2_type, temp_addr,
15569 build_int_cst (arg2_type, -16));
15570 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15571 /* The desired gimple result should be similar to:
15572 MEM[(__vector floatD.1407 *)_1] = vf1D.2697; */
15573 gimple *g
15574 = gimple_build_assign (build2 (MEM_REF, arg0_type, aligned_addr,
15575 build_int_cst (arg2_type, 0)), arg0);
15576 gimple_set_location (g, loc);
15577 gsi_replace (gsi, g, true);
15578 return true;
15579 }
15580
15581 /* Vector Fused multiply-add (fma). */
15582 case ALTIVEC_BUILTIN_VMADDFP:
15583 case VSX_BUILTIN_XVMADDDP:
15584 case ALTIVEC_BUILTIN_VMLADDUHM:
15585 {
15586 arg0 = gimple_call_arg (stmt, 0);
15587 arg1 = gimple_call_arg (stmt, 1);
15588 tree arg2 = gimple_call_arg (stmt, 2);
15589 lhs = gimple_call_lhs (stmt);
15590 gcall *g = gimple_build_call_internal (IFN_FMA, 3, arg0, arg1, arg2);
15591 gimple_call_set_lhs (g, lhs);
15592 gimple_call_set_nothrow (g, true);
15593 gimple_set_location (g, gimple_location (stmt));
15594 gsi_replace (gsi, g, true);
15595 return true;
15596 }
15597
15598 /* Vector compares; EQ, NE, GE, GT, LE. */
15599 case ALTIVEC_BUILTIN_VCMPEQUB:
15600 case ALTIVEC_BUILTIN_VCMPEQUH:
15601 case ALTIVEC_BUILTIN_VCMPEQUW:
15602 case P8V_BUILTIN_VCMPEQUD:
15603 fold_compare_helper (gsi, EQ_EXPR, stmt);
15604 return true;
15605
15606 case P9V_BUILTIN_CMPNEB:
15607 case P9V_BUILTIN_CMPNEH:
15608 case P9V_BUILTIN_CMPNEW:
15609 fold_compare_helper (gsi, NE_EXPR, stmt);
15610 return true;
15611
15612 case VSX_BUILTIN_CMPGE_16QI:
15613 case VSX_BUILTIN_CMPGE_U16QI:
15614 case VSX_BUILTIN_CMPGE_8HI:
15615 case VSX_BUILTIN_CMPGE_U8HI:
15616 case VSX_BUILTIN_CMPGE_4SI:
15617 case VSX_BUILTIN_CMPGE_U4SI:
15618 case VSX_BUILTIN_CMPGE_2DI:
15619 case VSX_BUILTIN_CMPGE_U2DI:
15620 fold_compare_helper (gsi, GE_EXPR, stmt);
15621 return true;
15622
15623 case ALTIVEC_BUILTIN_VCMPGTSB:
15624 case ALTIVEC_BUILTIN_VCMPGTUB:
15625 case ALTIVEC_BUILTIN_VCMPGTSH:
15626 case ALTIVEC_BUILTIN_VCMPGTUH:
15627 case ALTIVEC_BUILTIN_VCMPGTSW:
15628 case ALTIVEC_BUILTIN_VCMPGTUW:
15629 case P8V_BUILTIN_VCMPGTUD:
15630 case P8V_BUILTIN_VCMPGTSD:
15631 fold_compare_helper (gsi, GT_EXPR, stmt);
15632 return true;
15633
15634 case VSX_BUILTIN_CMPLE_16QI:
15635 case VSX_BUILTIN_CMPLE_U16QI:
15636 case VSX_BUILTIN_CMPLE_8HI:
15637 case VSX_BUILTIN_CMPLE_U8HI:
15638 case VSX_BUILTIN_CMPLE_4SI:
15639 case VSX_BUILTIN_CMPLE_U4SI:
15640 case VSX_BUILTIN_CMPLE_2DI:
15641 case VSX_BUILTIN_CMPLE_U2DI:
15642 fold_compare_helper (gsi, LE_EXPR, stmt);
15643 return true;
15644
15645 /* flavors of vec_splat_[us]{8,16,32}. */
15646 case ALTIVEC_BUILTIN_VSPLTISB:
15647 case ALTIVEC_BUILTIN_VSPLTISH:
15648 case ALTIVEC_BUILTIN_VSPLTISW:
15649 {
15650 int size;
15651
15652 if (fn_code == ALTIVEC_BUILTIN_VSPLTISB)
15653 size = 8;
15654 else if (fn_code == ALTIVEC_BUILTIN_VSPLTISH)
15655 size = 16;
15656 else
15657 size = 32;
15658
15659 arg0 = gimple_call_arg (stmt, 0);
15660 lhs = gimple_call_lhs (stmt);
15661
15662 /* Only fold the vec_splat_*() if the lower bits of arg 0 is a
15663 5-bit signed constant in range -16 to +15. */
15664 if (TREE_CODE (arg0) != INTEGER_CST
15665 || !IN_RANGE (sext_hwi(TREE_INT_CST_LOW (arg0), size),
15666 -16, 15))
15667 return false;
15668 gimple_seq stmts = NULL;
15669 location_t loc = gimple_location (stmt);
15670 tree splat_value = gimple_convert (&stmts, loc,
15671 TREE_TYPE (TREE_TYPE (lhs)), arg0);
15672 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15673 tree splat_tree = build_vector_from_val (TREE_TYPE (lhs), splat_value);
15674 g = gimple_build_assign (lhs, splat_tree);
15675 gimple_set_location (g, gimple_location (stmt));
15676 gsi_replace (gsi, g, true);
15677 return true;
15678 }
15679
15680 /* vec_mergel (integrals). */
15681 case ALTIVEC_BUILTIN_VMRGLH:
15682 case ALTIVEC_BUILTIN_VMRGLW:
15683 case VSX_BUILTIN_XXMRGLW_4SI:
15684 case ALTIVEC_BUILTIN_VMRGLB:
15685 case VSX_BUILTIN_VEC_MERGEL_V2DI:
15686 fold_mergehl_helper (gsi, stmt, 1);
15687 return true;
15688 /* vec_mergeh (integrals). */
15689 case ALTIVEC_BUILTIN_VMRGHH:
15690 case ALTIVEC_BUILTIN_VMRGHW:
15691 case VSX_BUILTIN_XXMRGHW_4SI:
15692 case ALTIVEC_BUILTIN_VMRGHB:
15693 case VSX_BUILTIN_VEC_MERGEH_V2DI:
15694 fold_mergehl_helper (gsi, stmt, 0);
15695 return true;
15696 default:
15697 if (TARGET_DEBUG_BUILTIN)
15698 fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
15699 fn_code, fn_name1, fn_name2);
15700 break;
15701 }
15702
15703 return false;
15704 }
15705
15706 /* Expand an expression EXP that calls a built-in function,
15707 with result going to TARGET if that's convenient
15708 (and in mode MODE if that's convenient).
15709 SUBTARGET may be used as the target for computing one of EXP's operands.
15710 IGNORE is nonzero if the value is to be ignored. */
15711
15712 static rtx
15713 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
15714 machine_mode mode ATTRIBUTE_UNUSED,
15715 int ignore ATTRIBUTE_UNUSED)
15716 {
15717 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15718 enum rs6000_builtins fcode
15719 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
15720 size_t uns_fcode = (size_t)fcode;
15721 const struct builtin_description *d;
15722 size_t i;
15723 rtx ret;
15724 bool success;
15725 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
15726 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
15727 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
15728
15729 /* We have two different modes (KFmode, TFmode) that are the IEEE 128-bit
15730 floating point type, depending on whether long double is the IBM extended
15731 double (KFmode) or long double is IEEE 128-bit (TFmode). It is simpler if
15732 we only define one variant of the built-in function, and switch the code
15733 when defining it, rather than defining two built-ins and using the
15734 overload table in rs6000-c.c to switch between the two. If we don't have
15735 the proper assembler, don't do this switch because CODE_FOR_*kf* and
15736 CODE_FOR_*tf* will be CODE_FOR_nothing. */
15737 #ifdef HAVE_AS_POWER9
15738 if (FLOAT128_IEEE_P (TFmode))
15739 switch (icode)
15740 {
15741 default:
15742 break;
15743
15744 case CODE_FOR_sqrtkf2_odd: icode = CODE_FOR_sqrttf2_odd; break;
15745 case CODE_FOR_trunckfdf2_odd: icode = CODE_FOR_trunctfdf2_odd; break;
15746 case CODE_FOR_addkf3_odd: icode = CODE_FOR_addtf3_odd; break;
15747 case CODE_FOR_subkf3_odd: icode = CODE_FOR_subtf3_odd; break;
15748 case CODE_FOR_mulkf3_odd: icode = CODE_FOR_multf3_odd; break;
15749 case CODE_FOR_divkf3_odd: icode = CODE_FOR_divtf3_odd; break;
15750 case CODE_FOR_fmakf4_odd: icode = CODE_FOR_fmatf4_odd; break;
15751 case CODE_FOR_xsxexpqp_kf: icode = CODE_FOR_xsxexpqp_tf; break;
15752 case CODE_FOR_xsxsigqp_kf: icode = CODE_FOR_xsxsigqp_tf; break;
15753 case CODE_FOR_xststdcnegqp_kf: icode = CODE_FOR_xststdcnegqp_tf; break;
15754 case CODE_FOR_xsiexpqp_kf: icode = CODE_FOR_xsiexpqp_tf; break;
15755 case CODE_FOR_xsiexpqpf_kf: icode = CODE_FOR_xsiexpqpf_tf; break;
15756 case CODE_FOR_xststdcqp_kf: icode = CODE_FOR_xststdcqp_tf; break;
15757 }
15758 #endif
15759
15760 if (TARGET_DEBUG_BUILTIN)
15761 {
15762 const char *name1 = rs6000_builtin_info[uns_fcode].name;
15763 const char *name2 = (icode != CODE_FOR_nothing)
15764 ? get_insn_name ((int) icode)
15765 : "nothing";
15766 const char *name3;
15767
15768 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
15769 {
15770 default: name3 = "unknown"; break;
15771 case RS6000_BTC_SPECIAL: name3 = "special"; break;
15772 case RS6000_BTC_UNARY: name3 = "unary"; break;
15773 case RS6000_BTC_BINARY: name3 = "binary"; break;
15774 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
15775 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
15776 case RS6000_BTC_ABS: name3 = "abs"; break;
15777 case RS6000_BTC_DST: name3 = "dst"; break;
15778 }
15779
15780
15781 fprintf (stderr,
15782 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
15783 (name1) ? name1 : "---", fcode,
15784 (name2) ? name2 : "---", (int) icode,
15785 name3,
15786 func_valid_p ? "" : ", not valid");
15787 }
15788
15789 if (!func_valid_p)
15790 {
15791 rs6000_invalid_builtin (fcode);
15792
15793 /* Given it is invalid, just generate a normal call. */
15794 return expand_call (exp, target, ignore);
15795 }
15796
15797 switch (fcode)
15798 {
15799 case RS6000_BUILTIN_RECIP:
15800 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
15801
15802 case RS6000_BUILTIN_RECIPF:
15803 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
15804
15805 case RS6000_BUILTIN_RSQRTF:
15806 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
15807
15808 case RS6000_BUILTIN_RSQRT:
15809 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
15810
15811 case POWER7_BUILTIN_BPERMD:
15812 return rs6000_expand_binop_builtin (((TARGET_64BIT)
15813 ? CODE_FOR_bpermd_di
15814 : CODE_FOR_bpermd_si), exp, target);
15815
15816 case RS6000_BUILTIN_GET_TB:
15817 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
15818 target);
15819
15820 case RS6000_BUILTIN_MFTB:
15821 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
15822 ? CODE_FOR_rs6000_mftb_di
15823 : CODE_FOR_rs6000_mftb_si),
15824 target);
15825
15826 case RS6000_BUILTIN_MFFS:
15827 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
15828
15829 case RS6000_BUILTIN_MTFSF:
15830 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
15831
15832 case RS6000_BUILTIN_CPU_INIT:
15833 case RS6000_BUILTIN_CPU_IS:
15834 case RS6000_BUILTIN_CPU_SUPPORTS:
15835 return cpu_expand_builtin (fcode, exp, target);
15836
15837 case MISC_BUILTIN_SPEC_BARRIER:
15838 {
15839 emit_insn (gen_speculation_barrier ());
15840 return NULL_RTX;
15841 }
15842
15843 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
15844 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
15845 {
15846 int icode2 = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
15847 : (int) CODE_FOR_altivec_lvsl_direct);
15848 machine_mode tmode = insn_data[icode2].operand[0].mode;
15849 machine_mode mode = insn_data[icode2].operand[1].mode;
15850 tree arg;
15851 rtx op, addr, pat;
15852
15853 gcc_assert (TARGET_ALTIVEC);
15854
15855 arg = CALL_EXPR_ARG (exp, 0);
15856 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
15857 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
15858 addr = memory_address (mode, op);
15859 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
15860 op = addr;
15861 else
15862 {
15863 /* For the load case need to negate the address. */
15864 op = gen_reg_rtx (GET_MODE (addr));
15865 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
15866 }
15867 op = gen_rtx_MEM (mode, op);
15868
15869 if (target == 0
15870 || GET_MODE (target) != tmode
15871 || ! (*insn_data[icode2].operand[0].predicate) (target, tmode))
15872 target = gen_reg_rtx (tmode);
15873
15874 pat = GEN_FCN (icode2) (target, op);
15875 if (!pat)
15876 return 0;
15877 emit_insn (pat);
15878
15879 return target;
15880 }
15881
15882 case ALTIVEC_BUILTIN_VCFUX:
15883 case ALTIVEC_BUILTIN_VCFSX:
15884 case ALTIVEC_BUILTIN_VCTUXS:
15885 case ALTIVEC_BUILTIN_VCTSXS:
15886 /* FIXME: There's got to be a nicer way to handle this case than
15887 constructing a new CALL_EXPR. */
15888 if (call_expr_nargs (exp) == 1)
15889 {
15890 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
15891 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
15892 }
15893 break;
15894
15895 /* For the pack and unpack int128 routines, fix up the builtin so it
15896 uses the correct IBM128 type. */
15897 case MISC_BUILTIN_PACK_IF:
15898 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
15899 {
15900 icode = CODE_FOR_packtf;
15901 fcode = MISC_BUILTIN_PACK_TF;
15902 uns_fcode = (size_t)fcode;
15903 }
15904 break;
15905
15906 case MISC_BUILTIN_UNPACK_IF:
15907 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
15908 {
15909 icode = CODE_FOR_unpacktf;
15910 fcode = MISC_BUILTIN_UNPACK_TF;
15911 uns_fcode = (size_t)fcode;
15912 }
15913 break;
15914
15915 default:
15916 break;
15917 }
15918
15919 if (TARGET_ALTIVEC)
15920 {
15921 ret = altivec_expand_builtin (exp, target, &success);
15922
15923 if (success)
15924 return ret;
15925 }
15926 if (TARGET_HTM)
15927 {
15928 ret = htm_expand_builtin (exp, target, &success);
15929
15930 if (success)
15931 return ret;
15932 }
15933
15934 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
15935 /* RS6000_BTC_SPECIAL represents no-operand operators. */
15936 gcc_assert (attr == RS6000_BTC_UNARY
15937 || attr == RS6000_BTC_BINARY
15938 || attr == RS6000_BTC_TERNARY
15939 || attr == RS6000_BTC_SPECIAL);
15940
15941 /* Handle simple unary operations. */
15942 d = bdesc_1arg;
15943 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15944 if (d->code == fcode)
15945 return rs6000_expand_unop_builtin (icode, exp, target);
15946
15947 /* Handle simple binary operations. */
15948 d = bdesc_2arg;
15949 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15950 if (d->code == fcode)
15951 return rs6000_expand_binop_builtin (icode, exp, target);
15952
15953 /* Handle simple ternary operations. */
15954 d = bdesc_3arg;
15955 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
15956 if (d->code == fcode)
15957 return rs6000_expand_ternop_builtin (icode, exp, target);
15958
15959 /* Handle simple no-argument operations. */
15960 d = bdesc_0arg;
15961 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
15962 if (d->code == fcode)
15963 return rs6000_expand_zeroop_builtin (icode, target);
15964
15965 gcc_unreachable ();
15966 }
15967
15968 /* Create a builtin vector type with a name. Taking care not to give
15969 the canonical type a name. */
15970
15971 static tree
15972 rs6000_vector_type (const char *name, tree elt_type, unsigned num_elts)
15973 {
15974 tree result = build_vector_type (elt_type, num_elts);
15975
15976 /* Copy so we don't give the canonical type a name. */
15977 result = build_variant_type_copy (result);
15978
15979 add_builtin_type (name, result);
15980
15981 return result;
15982 }
15983
15984 static void
15985 rs6000_init_builtins (void)
15986 {
15987 tree tdecl;
15988 tree ftype;
15989 machine_mode mode;
15990
15991 if (TARGET_DEBUG_BUILTIN)
15992 fprintf (stderr, "rs6000_init_builtins%s%s\n",
15993 (TARGET_ALTIVEC) ? ", altivec" : "",
15994 (TARGET_VSX) ? ", vsx" : "");
15995
15996 V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64 ? "__vector long"
15997 : "__vector long long",
15998 intDI_type_node, 2);
15999 V2DF_type_node = rs6000_vector_type ("__vector double", double_type_node, 2);
16000 V4SI_type_node = rs6000_vector_type ("__vector signed int",
16001 intSI_type_node, 4);
16002 V4SF_type_node = rs6000_vector_type ("__vector float", float_type_node, 4);
16003 V8HI_type_node = rs6000_vector_type ("__vector signed short",
16004 intHI_type_node, 8);
16005 V16QI_type_node = rs6000_vector_type ("__vector signed char",
16006 intQI_type_node, 16);
16007
16008 unsigned_V16QI_type_node = rs6000_vector_type ("__vector unsigned char",
16009 unsigned_intQI_type_node, 16);
16010 unsigned_V8HI_type_node = rs6000_vector_type ("__vector unsigned short",
16011 unsigned_intHI_type_node, 8);
16012 unsigned_V4SI_type_node = rs6000_vector_type ("__vector unsigned int",
16013 unsigned_intSI_type_node, 4);
16014 unsigned_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
16015 ? "__vector unsigned long"
16016 : "__vector unsigned long long",
16017 unsigned_intDI_type_node, 2);
16018
16019 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
16020
16021 const_str_type_node
16022 = build_pointer_type (build_qualified_type (char_type_node,
16023 TYPE_QUAL_CONST));
16024
16025 /* We use V1TI mode as a special container to hold __int128_t items that
16026 must live in VSX registers. */
16027 if (intTI_type_node)
16028 {
16029 V1TI_type_node = rs6000_vector_type ("__vector __int128",
16030 intTI_type_node, 1);
16031 unsigned_V1TI_type_node
16032 = rs6000_vector_type ("__vector unsigned __int128",
16033 unsigned_intTI_type_node, 1);
16034 }
16035
16036 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
16037 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
16038 'vector unsigned short'. */
16039
16040 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
16041 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16042 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
16043 bool_long_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
16044 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16045
16046 long_integer_type_internal_node = long_integer_type_node;
16047 long_unsigned_type_internal_node = long_unsigned_type_node;
16048 long_long_integer_type_internal_node = long_long_integer_type_node;
16049 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
16050 intQI_type_internal_node = intQI_type_node;
16051 uintQI_type_internal_node = unsigned_intQI_type_node;
16052 intHI_type_internal_node = intHI_type_node;
16053 uintHI_type_internal_node = unsigned_intHI_type_node;
16054 intSI_type_internal_node = intSI_type_node;
16055 uintSI_type_internal_node = unsigned_intSI_type_node;
16056 intDI_type_internal_node = intDI_type_node;
16057 uintDI_type_internal_node = unsigned_intDI_type_node;
16058 intTI_type_internal_node = intTI_type_node;
16059 uintTI_type_internal_node = unsigned_intTI_type_node;
16060 float_type_internal_node = float_type_node;
16061 double_type_internal_node = double_type_node;
16062 long_double_type_internal_node = long_double_type_node;
16063 dfloat64_type_internal_node = dfloat64_type_node;
16064 dfloat128_type_internal_node = dfloat128_type_node;
16065 void_type_internal_node = void_type_node;
16066
16067 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
16068 IFmode is the IBM extended 128-bit format that is a pair of doubles.
16069 TFmode will be either IEEE 128-bit floating point or the IBM double-double
16070 format that uses a pair of doubles, depending on the switches and
16071 defaults.
16072
16073 If we don't support for either 128-bit IBM double double or IEEE 128-bit
16074 floating point, we need make sure the type is non-zero or else self-test
16075 fails during bootstrap.
16076
16077 Always create __ibm128 as a separate type, even if the current long double
16078 format is IBM extended double.
16079
16080 For IEEE 128-bit floating point, always create the type __ieee128. If the
16081 user used -mfloat128, rs6000-c.c will create a define from __float128 to
16082 __ieee128. */
16083 if (TARGET_FLOAT128_TYPE)
16084 {
16085 if (!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128)
16086 ibm128_float_type_node = long_double_type_node;
16087 else
16088 {
16089 ibm128_float_type_node = make_node (REAL_TYPE);
16090 TYPE_PRECISION (ibm128_float_type_node) = 128;
16091 SET_TYPE_MODE (ibm128_float_type_node, IFmode);
16092 layout_type (ibm128_float_type_node);
16093 }
16094
16095 lang_hooks.types.register_builtin_type (ibm128_float_type_node,
16096 "__ibm128");
16097
16098 if (TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128)
16099 ieee128_float_type_node = long_double_type_node;
16100 else
16101 ieee128_float_type_node = float128_type_node;
16102
16103 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
16104 "__ieee128");
16105 }
16106
16107 else
16108 ieee128_float_type_node = ibm128_float_type_node = long_double_type_node;
16109
16110 /* Initialize the modes for builtin_function_type, mapping a machine mode to
16111 tree type node. */
16112 builtin_mode_to_type[QImode][0] = integer_type_node;
16113 builtin_mode_to_type[HImode][0] = integer_type_node;
16114 builtin_mode_to_type[SImode][0] = intSI_type_node;
16115 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
16116 builtin_mode_to_type[DImode][0] = intDI_type_node;
16117 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
16118 builtin_mode_to_type[TImode][0] = intTI_type_node;
16119 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
16120 builtin_mode_to_type[SFmode][0] = float_type_node;
16121 builtin_mode_to_type[DFmode][0] = double_type_node;
16122 builtin_mode_to_type[IFmode][0] = ibm128_float_type_node;
16123 builtin_mode_to_type[KFmode][0] = ieee128_float_type_node;
16124 builtin_mode_to_type[TFmode][0] = long_double_type_node;
16125 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
16126 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
16127 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
16128 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
16129 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
16130 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
16131 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
16132 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
16133 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
16134 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
16135 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
16136 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
16137 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
16138 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
16139
16140 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
16141 TYPE_NAME (bool_char_type_node) = tdecl;
16142
16143 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
16144 TYPE_NAME (bool_short_type_node) = tdecl;
16145
16146 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
16147 TYPE_NAME (bool_int_type_node) = tdecl;
16148
16149 tdecl = add_builtin_type ("__pixel", pixel_type_node);
16150 TYPE_NAME (pixel_type_node) = tdecl;
16151
16152 bool_V16QI_type_node = rs6000_vector_type ("__vector __bool char",
16153 bool_char_type_node, 16);
16154 bool_V8HI_type_node = rs6000_vector_type ("__vector __bool short",
16155 bool_short_type_node, 8);
16156 bool_V4SI_type_node = rs6000_vector_type ("__vector __bool int",
16157 bool_int_type_node, 4);
16158 bool_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
16159 ? "__vector __bool long"
16160 : "__vector __bool long long",
16161 bool_long_long_type_node, 2);
16162 pixel_V8HI_type_node = rs6000_vector_type ("__vector __pixel",
16163 pixel_type_node, 8);
16164
16165 /* Create Altivec and VSX builtins on machines with at least the
16166 general purpose extensions (970 and newer) to allow the use of
16167 the target attribute. */
16168 if (TARGET_EXTRA_BUILTINS)
16169 altivec_init_builtins ();
16170 if (TARGET_HTM)
16171 htm_init_builtins ();
16172
16173 if (TARGET_EXTRA_BUILTINS)
16174 rs6000_common_init_builtins ();
16175
16176 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
16177 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
16178 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
16179
16180 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
16181 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
16182 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
16183
16184 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
16185 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
16186 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
16187
16188 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
16189 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
16190 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
16191
16192 mode = (TARGET_64BIT) ? DImode : SImode;
16193 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
16194 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
16195 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
16196
16197 ftype = build_function_type_list (unsigned_intDI_type_node,
16198 NULL_TREE);
16199 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
16200
16201 if (TARGET_64BIT)
16202 ftype = build_function_type_list (unsigned_intDI_type_node,
16203 NULL_TREE);
16204 else
16205 ftype = build_function_type_list (unsigned_intSI_type_node,
16206 NULL_TREE);
16207 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
16208
16209 ftype = build_function_type_list (double_type_node, NULL_TREE);
16210 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
16211
16212 ftype = build_function_type_list (void_type_node,
16213 intSI_type_node, double_type_node,
16214 NULL_TREE);
16215 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
16216
16217 ftype = build_function_type_list (void_type_node, NULL_TREE);
16218 def_builtin ("__builtin_cpu_init", ftype, RS6000_BUILTIN_CPU_INIT);
16219 def_builtin ("__builtin_ppc_speculation_barrier", ftype,
16220 MISC_BUILTIN_SPEC_BARRIER);
16221
16222 ftype = build_function_type_list (bool_int_type_node, const_ptr_type_node,
16223 NULL_TREE);
16224 def_builtin ("__builtin_cpu_is", ftype, RS6000_BUILTIN_CPU_IS);
16225 def_builtin ("__builtin_cpu_supports", ftype, RS6000_BUILTIN_CPU_SUPPORTS);
16226
16227 /* AIX libm provides clog as __clog. */
16228 if (TARGET_XCOFF &&
16229 (tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
16230 set_user_assembler_name (tdecl, "__clog");
16231
16232 #ifdef SUBTARGET_INIT_BUILTINS
16233 SUBTARGET_INIT_BUILTINS;
16234 #endif
16235 }
16236
16237 /* Returns the rs6000 builtin decl for CODE. */
16238
16239 static tree
16240 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
16241 {
16242 HOST_WIDE_INT fnmask;
16243
16244 if (code >= RS6000_BUILTIN_COUNT)
16245 return error_mark_node;
16246
16247 fnmask = rs6000_builtin_info[code].mask;
16248 if ((fnmask & rs6000_builtin_mask) != fnmask)
16249 {
16250 rs6000_invalid_builtin ((enum rs6000_builtins)code);
16251 return error_mark_node;
16252 }
16253
16254 return rs6000_builtin_decls[code];
16255 }
16256
16257 static void
16258 altivec_init_builtins (void)
16259 {
16260 const struct builtin_description *d;
16261 size_t i;
16262 tree ftype;
16263 tree decl;
16264 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
16265
16266 tree pvoid_type_node = build_pointer_type (void_type_node);
16267
16268 tree pcvoid_type_node
16269 = build_pointer_type (build_qualified_type (void_type_node,
16270 TYPE_QUAL_CONST));
16271
16272 tree int_ftype_opaque
16273 = build_function_type_list (integer_type_node,
16274 opaque_V4SI_type_node, NULL_TREE);
16275 tree opaque_ftype_opaque
16276 = build_function_type_list (integer_type_node, NULL_TREE);
16277 tree opaque_ftype_opaque_int
16278 = build_function_type_list (opaque_V4SI_type_node,
16279 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
16280 tree opaque_ftype_opaque_opaque_int
16281 = build_function_type_list (opaque_V4SI_type_node,
16282 opaque_V4SI_type_node, opaque_V4SI_type_node,
16283 integer_type_node, NULL_TREE);
16284 tree opaque_ftype_opaque_opaque_opaque
16285 = build_function_type_list (opaque_V4SI_type_node,
16286 opaque_V4SI_type_node, opaque_V4SI_type_node,
16287 opaque_V4SI_type_node, NULL_TREE);
16288 tree opaque_ftype_opaque_opaque
16289 = build_function_type_list (opaque_V4SI_type_node,
16290 opaque_V4SI_type_node, opaque_V4SI_type_node,
16291 NULL_TREE);
16292 tree int_ftype_int_opaque_opaque
16293 = build_function_type_list (integer_type_node,
16294 integer_type_node, opaque_V4SI_type_node,
16295 opaque_V4SI_type_node, NULL_TREE);
16296 tree int_ftype_int_v4si_v4si
16297 = build_function_type_list (integer_type_node,
16298 integer_type_node, V4SI_type_node,
16299 V4SI_type_node, NULL_TREE);
16300 tree int_ftype_int_v2di_v2di
16301 = build_function_type_list (integer_type_node,
16302 integer_type_node, V2DI_type_node,
16303 V2DI_type_node, NULL_TREE);
16304 tree void_ftype_v4si
16305 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
16306 tree v8hi_ftype_void
16307 = build_function_type_list (V8HI_type_node, NULL_TREE);
16308 tree void_ftype_void
16309 = build_function_type_list (void_type_node, NULL_TREE);
16310 tree void_ftype_int
16311 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
16312
16313 tree opaque_ftype_long_pcvoid
16314 = build_function_type_list (opaque_V4SI_type_node,
16315 long_integer_type_node, pcvoid_type_node,
16316 NULL_TREE);
16317 tree v16qi_ftype_long_pcvoid
16318 = build_function_type_list (V16QI_type_node,
16319 long_integer_type_node, pcvoid_type_node,
16320 NULL_TREE);
16321 tree v8hi_ftype_long_pcvoid
16322 = build_function_type_list (V8HI_type_node,
16323 long_integer_type_node, pcvoid_type_node,
16324 NULL_TREE);
16325 tree v4si_ftype_long_pcvoid
16326 = build_function_type_list (V4SI_type_node,
16327 long_integer_type_node, pcvoid_type_node,
16328 NULL_TREE);
16329 tree v4sf_ftype_long_pcvoid
16330 = build_function_type_list (V4SF_type_node,
16331 long_integer_type_node, pcvoid_type_node,
16332 NULL_TREE);
16333 tree v2df_ftype_long_pcvoid
16334 = build_function_type_list (V2DF_type_node,
16335 long_integer_type_node, pcvoid_type_node,
16336 NULL_TREE);
16337 tree v2di_ftype_long_pcvoid
16338 = build_function_type_list (V2DI_type_node,
16339 long_integer_type_node, pcvoid_type_node,
16340 NULL_TREE);
16341 tree v1ti_ftype_long_pcvoid
16342 = build_function_type_list (V1TI_type_node,
16343 long_integer_type_node, pcvoid_type_node,
16344 NULL_TREE);
16345
16346 tree void_ftype_opaque_long_pvoid
16347 = build_function_type_list (void_type_node,
16348 opaque_V4SI_type_node, long_integer_type_node,
16349 pvoid_type_node, NULL_TREE);
16350 tree void_ftype_v4si_long_pvoid
16351 = build_function_type_list (void_type_node,
16352 V4SI_type_node, long_integer_type_node,
16353 pvoid_type_node, NULL_TREE);
16354 tree void_ftype_v16qi_long_pvoid
16355 = build_function_type_list (void_type_node,
16356 V16QI_type_node, long_integer_type_node,
16357 pvoid_type_node, NULL_TREE);
16358
16359 tree void_ftype_v16qi_pvoid_long
16360 = build_function_type_list (void_type_node,
16361 V16QI_type_node, pvoid_type_node,
16362 long_integer_type_node, NULL_TREE);
16363
16364 tree void_ftype_v8hi_long_pvoid
16365 = build_function_type_list (void_type_node,
16366 V8HI_type_node, long_integer_type_node,
16367 pvoid_type_node, NULL_TREE);
16368 tree void_ftype_v4sf_long_pvoid
16369 = build_function_type_list (void_type_node,
16370 V4SF_type_node, long_integer_type_node,
16371 pvoid_type_node, NULL_TREE);
16372 tree void_ftype_v2df_long_pvoid
16373 = build_function_type_list (void_type_node,
16374 V2DF_type_node, long_integer_type_node,
16375 pvoid_type_node, NULL_TREE);
16376 tree void_ftype_v1ti_long_pvoid
16377 = build_function_type_list (void_type_node,
16378 V1TI_type_node, long_integer_type_node,
16379 pvoid_type_node, NULL_TREE);
16380 tree void_ftype_v2di_long_pvoid
16381 = build_function_type_list (void_type_node,
16382 V2DI_type_node, long_integer_type_node,
16383 pvoid_type_node, NULL_TREE);
16384 tree int_ftype_int_v8hi_v8hi
16385 = build_function_type_list (integer_type_node,
16386 integer_type_node, V8HI_type_node,
16387 V8HI_type_node, NULL_TREE);
16388 tree int_ftype_int_v16qi_v16qi
16389 = build_function_type_list (integer_type_node,
16390 integer_type_node, V16QI_type_node,
16391 V16QI_type_node, NULL_TREE);
16392 tree int_ftype_int_v4sf_v4sf
16393 = build_function_type_list (integer_type_node,
16394 integer_type_node, V4SF_type_node,
16395 V4SF_type_node, NULL_TREE);
16396 tree int_ftype_int_v2df_v2df
16397 = build_function_type_list (integer_type_node,
16398 integer_type_node, V2DF_type_node,
16399 V2DF_type_node, NULL_TREE);
16400 tree v2di_ftype_v2di
16401 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
16402 tree v4si_ftype_v4si
16403 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
16404 tree v8hi_ftype_v8hi
16405 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
16406 tree v16qi_ftype_v16qi
16407 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
16408 tree v4sf_ftype_v4sf
16409 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
16410 tree v2df_ftype_v2df
16411 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
16412 tree void_ftype_pcvoid_int_int
16413 = build_function_type_list (void_type_node,
16414 pcvoid_type_node, integer_type_node,
16415 integer_type_node, NULL_TREE);
16416
16417 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
16418 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
16419 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
16420 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
16421 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
16422 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
16423 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
16424 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
16425 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
16426 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
16427 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
16428 ALTIVEC_BUILTIN_LVXL_V2DF);
16429 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
16430 ALTIVEC_BUILTIN_LVXL_V2DI);
16431 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
16432 ALTIVEC_BUILTIN_LVXL_V4SF);
16433 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
16434 ALTIVEC_BUILTIN_LVXL_V4SI);
16435 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
16436 ALTIVEC_BUILTIN_LVXL_V8HI);
16437 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
16438 ALTIVEC_BUILTIN_LVXL_V16QI);
16439 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
16440 def_builtin ("__builtin_altivec_lvx_v1ti", v1ti_ftype_long_pcvoid,
16441 ALTIVEC_BUILTIN_LVX_V1TI);
16442 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
16443 ALTIVEC_BUILTIN_LVX_V2DF);
16444 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
16445 ALTIVEC_BUILTIN_LVX_V2DI);
16446 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
16447 ALTIVEC_BUILTIN_LVX_V4SF);
16448 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
16449 ALTIVEC_BUILTIN_LVX_V4SI);
16450 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
16451 ALTIVEC_BUILTIN_LVX_V8HI);
16452 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
16453 ALTIVEC_BUILTIN_LVX_V16QI);
16454 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
16455 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
16456 ALTIVEC_BUILTIN_STVX_V2DF);
16457 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
16458 ALTIVEC_BUILTIN_STVX_V2DI);
16459 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
16460 ALTIVEC_BUILTIN_STVX_V4SF);
16461 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
16462 ALTIVEC_BUILTIN_STVX_V4SI);
16463 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
16464 ALTIVEC_BUILTIN_STVX_V8HI);
16465 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
16466 ALTIVEC_BUILTIN_STVX_V16QI);
16467 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
16468 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
16469 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
16470 ALTIVEC_BUILTIN_STVXL_V2DF);
16471 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
16472 ALTIVEC_BUILTIN_STVXL_V2DI);
16473 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
16474 ALTIVEC_BUILTIN_STVXL_V4SF);
16475 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
16476 ALTIVEC_BUILTIN_STVXL_V4SI);
16477 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
16478 ALTIVEC_BUILTIN_STVXL_V8HI);
16479 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
16480 ALTIVEC_BUILTIN_STVXL_V16QI);
16481 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
16482 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
16483 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
16484 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
16485 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
16486 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
16487 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
16488 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
16489 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
16490 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
16491 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
16492 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
16493 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
16494 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
16495 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
16496 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
16497
16498 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
16499 VSX_BUILTIN_LXVD2X_V2DF);
16500 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
16501 VSX_BUILTIN_LXVD2X_V2DI);
16502 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
16503 VSX_BUILTIN_LXVW4X_V4SF);
16504 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
16505 VSX_BUILTIN_LXVW4X_V4SI);
16506 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
16507 VSX_BUILTIN_LXVW4X_V8HI);
16508 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
16509 VSX_BUILTIN_LXVW4X_V16QI);
16510 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
16511 VSX_BUILTIN_STXVD2X_V2DF);
16512 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
16513 VSX_BUILTIN_STXVD2X_V2DI);
16514 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
16515 VSX_BUILTIN_STXVW4X_V4SF);
16516 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
16517 VSX_BUILTIN_STXVW4X_V4SI);
16518 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
16519 VSX_BUILTIN_STXVW4X_V8HI);
16520 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
16521 VSX_BUILTIN_STXVW4X_V16QI);
16522
16523 def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid,
16524 VSX_BUILTIN_LD_ELEMREV_V2DF);
16525 def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid,
16526 VSX_BUILTIN_LD_ELEMREV_V2DI);
16527 def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid,
16528 VSX_BUILTIN_LD_ELEMREV_V4SF);
16529 def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid,
16530 VSX_BUILTIN_LD_ELEMREV_V4SI);
16531 def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid,
16532 VSX_BUILTIN_LD_ELEMREV_V8HI);
16533 def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid,
16534 VSX_BUILTIN_LD_ELEMREV_V16QI);
16535 def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid,
16536 VSX_BUILTIN_ST_ELEMREV_V2DF);
16537 def_builtin ("__builtin_vsx_st_elemrev_v1ti", void_ftype_v1ti_long_pvoid,
16538 VSX_BUILTIN_ST_ELEMREV_V1TI);
16539 def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid,
16540 VSX_BUILTIN_ST_ELEMREV_V2DI);
16541 def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid,
16542 VSX_BUILTIN_ST_ELEMREV_V4SF);
16543 def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid,
16544 VSX_BUILTIN_ST_ELEMREV_V4SI);
16545 def_builtin ("__builtin_vsx_st_elemrev_v8hi", void_ftype_v8hi_long_pvoid,
16546 VSX_BUILTIN_ST_ELEMREV_V8HI);
16547 def_builtin ("__builtin_vsx_st_elemrev_v16qi", void_ftype_v16qi_long_pvoid,
16548 VSX_BUILTIN_ST_ELEMREV_V16QI);
16549
16550 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
16551 VSX_BUILTIN_VEC_LD);
16552 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
16553 VSX_BUILTIN_VEC_ST);
16554 def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid,
16555 VSX_BUILTIN_VEC_XL);
16556 def_builtin ("__builtin_vec_xl_be", opaque_ftype_long_pcvoid,
16557 VSX_BUILTIN_VEC_XL_BE);
16558 def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid,
16559 VSX_BUILTIN_VEC_XST);
16560 def_builtin ("__builtin_vec_xst_be", void_ftype_opaque_long_pvoid,
16561 VSX_BUILTIN_VEC_XST_BE);
16562
16563 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
16564 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
16565 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
16566
16567 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
16568 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
16569 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
16570 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
16571 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
16572 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
16573 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
16574 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
16575 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
16576 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
16577 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
16578 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
16579
16580 def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque,
16581 ALTIVEC_BUILTIN_VEC_ADDE);
16582 def_builtin ("__builtin_vec_addec", opaque_ftype_opaque_opaque_opaque,
16583 ALTIVEC_BUILTIN_VEC_ADDEC);
16584 def_builtin ("__builtin_vec_cmpne", opaque_ftype_opaque_opaque,
16585 ALTIVEC_BUILTIN_VEC_CMPNE);
16586 def_builtin ("__builtin_vec_mul", opaque_ftype_opaque_opaque,
16587 ALTIVEC_BUILTIN_VEC_MUL);
16588 def_builtin ("__builtin_vec_sube", opaque_ftype_opaque_opaque_opaque,
16589 ALTIVEC_BUILTIN_VEC_SUBE);
16590 def_builtin ("__builtin_vec_subec", opaque_ftype_opaque_opaque_opaque,
16591 ALTIVEC_BUILTIN_VEC_SUBEC);
16592
16593 /* Cell builtins. */
16594 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
16595 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
16596 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
16597 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
16598
16599 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
16600 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
16601 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
16602 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
16603
16604 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
16605 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
16606 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
16607 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
16608
16609 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
16610 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
16611 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
16612 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
16613
16614 if (TARGET_P9_VECTOR)
16615 {
16616 def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long,
16617 P9V_BUILTIN_STXVL);
16618 def_builtin ("__builtin_xst_len_r", void_ftype_v16qi_pvoid_long,
16619 P9V_BUILTIN_XST_LEN_R);
16620 }
16621
16622 /* Add the DST variants. */
16623 d = bdesc_dst;
16624 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
16625 {
16626 HOST_WIDE_INT mask = d->mask;
16627
16628 /* It is expected that these dst built-in functions may have
16629 d->icode equal to CODE_FOR_nothing. */
16630 if ((mask & builtin_mask) != mask)
16631 {
16632 if (TARGET_DEBUG_BUILTIN)
16633 fprintf (stderr, "altivec_init_builtins, skip dst %s\n",
16634 d->name);
16635 continue;
16636 }
16637 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
16638 }
16639
16640 /* Initialize the predicates. */
16641 d = bdesc_altivec_preds;
16642 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
16643 {
16644 machine_mode mode1;
16645 tree type;
16646 HOST_WIDE_INT mask = d->mask;
16647
16648 if ((mask & builtin_mask) != mask)
16649 {
16650 if (TARGET_DEBUG_BUILTIN)
16651 fprintf (stderr, "altivec_init_builtins, skip predicate %s\n",
16652 d->name);
16653 continue;
16654 }
16655
16656 if (rs6000_overloaded_builtin_p (d->code))
16657 mode1 = VOIDmode;
16658 else
16659 {
16660 /* Cannot define builtin if the instruction is disabled. */
16661 gcc_assert (d->icode != CODE_FOR_nothing);
16662 mode1 = insn_data[d->icode].operand[1].mode;
16663 }
16664
16665 switch (mode1)
16666 {
16667 case E_VOIDmode:
16668 type = int_ftype_int_opaque_opaque;
16669 break;
16670 case E_V2DImode:
16671 type = int_ftype_int_v2di_v2di;
16672 break;
16673 case E_V4SImode:
16674 type = int_ftype_int_v4si_v4si;
16675 break;
16676 case E_V8HImode:
16677 type = int_ftype_int_v8hi_v8hi;
16678 break;
16679 case E_V16QImode:
16680 type = int_ftype_int_v16qi_v16qi;
16681 break;
16682 case E_V4SFmode:
16683 type = int_ftype_int_v4sf_v4sf;
16684 break;
16685 case E_V2DFmode:
16686 type = int_ftype_int_v2df_v2df;
16687 break;
16688 default:
16689 gcc_unreachable ();
16690 }
16691
16692 def_builtin (d->name, type, d->code);
16693 }
16694
16695 /* Initialize the abs* operators. */
16696 d = bdesc_abs;
16697 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
16698 {
16699 machine_mode mode0;
16700 tree type;
16701 HOST_WIDE_INT mask = d->mask;
16702
16703 if ((mask & builtin_mask) != mask)
16704 {
16705 if (TARGET_DEBUG_BUILTIN)
16706 fprintf (stderr, "altivec_init_builtins, skip abs %s\n",
16707 d->name);
16708 continue;
16709 }
16710
16711 /* Cannot define builtin if the instruction is disabled. */
16712 gcc_assert (d->icode != CODE_FOR_nothing);
16713 mode0 = insn_data[d->icode].operand[0].mode;
16714
16715 switch (mode0)
16716 {
16717 case E_V2DImode:
16718 type = v2di_ftype_v2di;
16719 break;
16720 case E_V4SImode:
16721 type = v4si_ftype_v4si;
16722 break;
16723 case E_V8HImode:
16724 type = v8hi_ftype_v8hi;
16725 break;
16726 case E_V16QImode:
16727 type = v16qi_ftype_v16qi;
16728 break;
16729 case E_V4SFmode:
16730 type = v4sf_ftype_v4sf;
16731 break;
16732 case E_V2DFmode:
16733 type = v2df_ftype_v2df;
16734 break;
16735 default:
16736 gcc_unreachable ();
16737 }
16738
16739 def_builtin (d->name, type, d->code);
16740 }
16741
16742 /* Initialize target builtin that implements
16743 targetm.vectorize.builtin_mask_for_load. */
16744
16745 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
16746 v16qi_ftype_long_pcvoid,
16747 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
16748 BUILT_IN_MD, NULL, NULL_TREE);
16749 TREE_READONLY (decl) = 1;
16750 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
16751 altivec_builtin_mask_for_load = decl;
16752
16753 /* Access to the vec_init patterns. */
16754 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
16755 integer_type_node, integer_type_node,
16756 integer_type_node, NULL_TREE);
16757 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
16758
16759 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
16760 short_integer_type_node,
16761 short_integer_type_node,
16762 short_integer_type_node,
16763 short_integer_type_node,
16764 short_integer_type_node,
16765 short_integer_type_node,
16766 short_integer_type_node, NULL_TREE);
16767 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
16768
16769 ftype = build_function_type_list (V16QI_type_node, char_type_node,
16770 char_type_node, char_type_node,
16771 char_type_node, char_type_node,
16772 char_type_node, char_type_node,
16773 char_type_node, char_type_node,
16774 char_type_node, char_type_node,
16775 char_type_node, char_type_node,
16776 char_type_node, char_type_node,
16777 char_type_node, NULL_TREE);
16778 def_builtin ("__builtin_vec_init_v16qi", ftype,
16779 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
16780
16781 ftype = build_function_type_list (V4SF_type_node, float_type_node,
16782 float_type_node, float_type_node,
16783 float_type_node, NULL_TREE);
16784 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
16785
16786 /* VSX builtins. */
16787 ftype = build_function_type_list (V2DF_type_node, double_type_node,
16788 double_type_node, NULL_TREE);
16789 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
16790
16791 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
16792 intDI_type_node, NULL_TREE);
16793 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
16794
16795 /* Access to the vec_set patterns. */
16796 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
16797 intSI_type_node,
16798 integer_type_node, NULL_TREE);
16799 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
16800
16801 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
16802 intHI_type_node,
16803 integer_type_node, NULL_TREE);
16804 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
16805
16806 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
16807 intQI_type_node,
16808 integer_type_node, NULL_TREE);
16809 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
16810
16811 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
16812 float_type_node,
16813 integer_type_node, NULL_TREE);
16814 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
16815
16816 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
16817 double_type_node,
16818 integer_type_node, NULL_TREE);
16819 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
16820
16821 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
16822 intDI_type_node,
16823 integer_type_node, NULL_TREE);
16824 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
16825
16826 /* Access to the vec_extract patterns. */
16827 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
16828 integer_type_node, NULL_TREE);
16829 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
16830
16831 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
16832 integer_type_node, NULL_TREE);
16833 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
16834
16835 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
16836 integer_type_node, NULL_TREE);
16837 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
16838
16839 ftype = build_function_type_list (float_type_node, V4SF_type_node,
16840 integer_type_node, NULL_TREE);
16841 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
16842
16843 ftype = build_function_type_list (double_type_node, V2DF_type_node,
16844 integer_type_node, NULL_TREE);
16845 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
16846
16847 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
16848 integer_type_node, NULL_TREE);
16849 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
16850
16851
16852 if (V1TI_type_node)
16853 {
16854 tree v1ti_ftype_long_pcvoid
16855 = build_function_type_list (V1TI_type_node,
16856 long_integer_type_node, pcvoid_type_node,
16857 NULL_TREE);
16858 tree void_ftype_v1ti_long_pvoid
16859 = build_function_type_list (void_type_node,
16860 V1TI_type_node, long_integer_type_node,
16861 pvoid_type_node, NULL_TREE);
16862 def_builtin ("__builtin_vsx_ld_elemrev_v1ti", v1ti_ftype_long_pcvoid,
16863 VSX_BUILTIN_LD_ELEMREV_V1TI);
16864 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
16865 VSX_BUILTIN_LXVD2X_V1TI);
16866 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
16867 VSX_BUILTIN_STXVD2X_V1TI);
16868 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
16869 NULL_TREE, NULL_TREE);
16870 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
16871 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
16872 intTI_type_node,
16873 integer_type_node, NULL_TREE);
16874 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
16875 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
16876 integer_type_node, NULL_TREE);
16877 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
16878 }
16879
16880 }
16881
16882 static void
16883 htm_init_builtins (void)
16884 {
16885 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
16886 const struct builtin_description *d;
16887 size_t i;
16888
16889 d = bdesc_htm;
16890 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
16891 {
16892 tree op[MAX_HTM_OPERANDS], type;
16893 HOST_WIDE_INT mask = d->mask;
16894 unsigned attr = rs6000_builtin_info[d->code].attr;
16895 bool void_func = (attr & RS6000_BTC_VOID);
16896 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
16897 int nopnds = 0;
16898 tree gpr_type_node;
16899 tree rettype;
16900 tree argtype;
16901
16902 /* It is expected that these htm built-in functions may have
16903 d->icode equal to CODE_FOR_nothing. */
16904
16905 if (TARGET_32BIT && TARGET_POWERPC64)
16906 gpr_type_node = long_long_unsigned_type_node;
16907 else
16908 gpr_type_node = long_unsigned_type_node;
16909
16910 if (attr & RS6000_BTC_SPR)
16911 {
16912 rettype = gpr_type_node;
16913 argtype = gpr_type_node;
16914 }
16915 else if (d->code == HTM_BUILTIN_TABORTDC
16916 || d->code == HTM_BUILTIN_TABORTDCI)
16917 {
16918 rettype = unsigned_type_node;
16919 argtype = gpr_type_node;
16920 }
16921 else
16922 {
16923 rettype = unsigned_type_node;
16924 argtype = unsigned_type_node;
16925 }
16926
16927 if ((mask & builtin_mask) != mask)
16928 {
16929 if (TARGET_DEBUG_BUILTIN)
16930 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
16931 continue;
16932 }
16933
16934 if (d->name == 0)
16935 {
16936 if (TARGET_DEBUG_BUILTIN)
16937 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
16938 (long unsigned) i);
16939 continue;
16940 }
16941
16942 op[nopnds++] = (void_func) ? void_type_node : rettype;
16943
16944 if (attr_args == RS6000_BTC_UNARY)
16945 op[nopnds++] = argtype;
16946 else if (attr_args == RS6000_BTC_BINARY)
16947 {
16948 op[nopnds++] = argtype;
16949 op[nopnds++] = argtype;
16950 }
16951 else if (attr_args == RS6000_BTC_TERNARY)
16952 {
16953 op[nopnds++] = argtype;
16954 op[nopnds++] = argtype;
16955 op[nopnds++] = argtype;
16956 }
16957
16958 switch (nopnds)
16959 {
16960 case 1:
16961 type = build_function_type_list (op[0], NULL_TREE);
16962 break;
16963 case 2:
16964 type = build_function_type_list (op[0], op[1], NULL_TREE);
16965 break;
16966 case 3:
16967 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
16968 break;
16969 case 4:
16970 type = build_function_type_list (op[0], op[1], op[2], op[3],
16971 NULL_TREE);
16972 break;
16973 default:
16974 gcc_unreachable ();
16975 }
16976
16977 def_builtin (d->name, type, d->code);
16978 }
16979 }
16980
16981 /* Hash function for builtin functions with up to 3 arguments and a return
16982 type. */
16983 hashval_t
16984 builtin_hasher::hash (builtin_hash_struct *bh)
16985 {
16986 unsigned ret = 0;
16987 int i;
16988
16989 for (i = 0; i < 4; i++)
16990 {
16991 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
16992 ret = (ret * 2) + bh->uns_p[i];
16993 }
16994
16995 return ret;
16996 }
16997
16998 /* Compare builtin hash entries H1 and H2 for equivalence. */
16999 bool
17000 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
17001 {
17002 return ((p1->mode[0] == p2->mode[0])
17003 && (p1->mode[1] == p2->mode[1])
17004 && (p1->mode[2] == p2->mode[2])
17005 && (p1->mode[3] == p2->mode[3])
17006 && (p1->uns_p[0] == p2->uns_p[0])
17007 && (p1->uns_p[1] == p2->uns_p[1])
17008 && (p1->uns_p[2] == p2->uns_p[2])
17009 && (p1->uns_p[3] == p2->uns_p[3]));
17010 }
17011
17012 /* Map types for builtin functions with an explicit return type and up to 3
17013 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
17014 of the argument. */
17015 static tree
17016 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
17017 machine_mode mode_arg1, machine_mode mode_arg2,
17018 enum rs6000_builtins builtin, const char *name)
17019 {
17020 struct builtin_hash_struct h;
17021 struct builtin_hash_struct *h2;
17022 int num_args = 3;
17023 int i;
17024 tree ret_type = NULL_TREE;
17025 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
17026
17027 /* Create builtin_hash_table. */
17028 if (builtin_hash_table == NULL)
17029 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
17030
17031 h.type = NULL_TREE;
17032 h.mode[0] = mode_ret;
17033 h.mode[1] = mode_arg0;
17034 h.mode[2] = mode_arg1;
17035 h.mode[3] = mode_arg2;
17036 h.uns_p[0] = 0;
17037 h.uns_p[1] = 0;
17038 h.uns_p[2] = 0;
17039 h.uns_p[3] = 0;
17040
17041 /* If the builtin is a type that produces unsigned results or takes unsigned
17042 arguments, and it is returned as a decl for the vectorizer (such as
17043 widening multiplies, permute), make sure the arguments and return value
17044 are type correct. */
17045 switch (builtin)
17046 {
17047 /* unsigned 1 argument functions. */
17048 case CRYPTO_BUILTIN_VSBOX:
17049 case P8V_BUILTIN_VGBBD:
17050 case MISC_BUILTIN_CDTBCD:
17051 case MISC_BUILTIN_CBCDTD:
17052 h.uns_p[0] = 1;
17053 h.uns_p[1] = 1;
17054 break;
17055
17056 /* unsigned 2 argument functions. */
17057 case ALTIVEC_BUILTIN_VMULEUB:
17058 case ALTIVEC_BUILTIN_VMULEUH:
17059 case P8V_BUILTIN_VMULEUW:
17060 case ALTIVEC_BUILTIN_VMULOUB:
17061 case ALTIVEC_BUILTIN_VMULOUH:
17062 case P8V_BUILTIN_VMULOUW:
17063 case CRYPTO_BUILTIN_VCIPHER:
17064 case CRYPTO_BUILTIN_VCIPHERLAST:
17065 case CRYPTO_BUILTIN_VNCIPHER:
17066 case CRYPTO_BUILTIN_VNCIPHERLAST:
17067 case CRYPTO_BUILTIN_VPMSUMB:
17068 case CRYPTO_BUILTIN_VPMSUMH:
17069 case CRYPTO_BUILTIN_VPMSUMW:
17070 case CRYPTO_BUILTIN_VPMSUMD:
17071 case CRYPTO_BUILTIN_VPMSUM:
17072 case MISC_BUILTIN_ADDG6S:
17073 case MISC_BUILTIN_DIVWEU:
17074 case MISC_BUILTIN_DIVDEU:
17075 case VSX_BUILTIN_UDIV_V2DI:
17076 case ALTIVEC_BUILTIN_VMAXUB:
17077 case ALTIVEC_BUILTIN_VMINUB:
17078 case ALTIVEC_BUILTIN_VMAXUH:
17079 case ALTIVEC_BUILTIN_VMINUH:
17080 case ALTIVEC_BUILTIN_VMAXUW:
17081 case ALTIVEC_BUILTIN_VMINUW:
17082 case P8V_BUILTIN_VMAXUD:
17083 case P8V_BUILTIN_VMINUD:
17084 h.uns_p[0] = 1;
17085 h.uns_p[1] = 1;
17086 h.uns_p[2] = 1;
17087 break;
17088
17089 /* unsigned 3 argument functions. */
17090 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
17091 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
17092 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
17093 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
17094 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
17095 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
17096 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
17097 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
17098 case VSX_BUILTIN_VPERM_16QI_UNS:
17099 case VSX_BUILTIN_VPERM_8HI_UNS:
17100 case VSX_BUILTIN_VPERM_4SI_UNS:
17101 case VSX_BUILTIN_VPERM_2DI_UNS:
17102 case VSX_BUILTIN_XXSEL_16QI_UNS:
17103 case VSX_BUILTIN_XXSEL_8HI_UNS:
17104 case VSX_BUILTIN_XXSEL_4SI_UNS:
17105 case VSX_BUILTIN_XXSEL_2DI_UNS:
17106 case CRYPTO_BUILTIN_VPERMXOR:
17107 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
17108 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
17109 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
17110 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
17111 case CRYPTO_BUILTIN_VSHASIGMAW:
17112 case CRYPTO_BUILTIN_VSHASIGMAD:
17113 case CRYPTO_BUILTIN_VSHASIGMA:
17114 h.uns_p[0] = 1;
17115 h.uns_p[1] = 1;
17116 h.uns_p[2] = 1;
17117 h.uns_p[3] = 1;
17118 break;
17119
17120 /* signed permute functions with unsigned char mask. */
17121 case ALTIVEC_BUILTIN_VPERM_16QI:
17122 case ALTIVEC_BUILTIN_VPERM_8HI:
17123 case ALTIVEC_BUILTIN_VPERM_4SI:
17124 case ALTIVEC_BUILTIN_VPERM_4SF:
17125 case ALTIVEC_BUILTIN_VPERM_2DI:
17126 case ALTIVEC_BUILTIN_VPERM_2DF:
17127 case VSX_BUILTIN_VPERM_16QI:
17128 case VSX_BUILTIN_VPERM_8HI:
17129 case VSX_BUILTIN_VPERM_4SI:
17130 case VSX_BUILTIN_VPERM_4SF:
17131 case VSX_BUILTIN_VPERM_2DI:
17132 case VSX_BUILTIN_VPERM_2DF:
17133 h.uns_p[3] = 1;
17134 break;
17135
17136 /* unsigned args, signed return. */
17137 case VSX_BUILTIN_XVCVUXDSP:
17138 case VSX_BUILTIN_XVCVUXDDP_UNS:
17139 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
17140 h.uns_p[1] = 1;
17141 break;
17142
17143 /* signed args, unsigned return. */
17144 case VSX_BUILTIN_XVCVDPUXDS_UNS:
17145 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
17146 case MISC_BUILTIN_UNPACK_TD:
17147 case MISC_BUILTIN_UNPACK_V1TI:
17148 h.uns_p[0] = 1;
17149 break;
17150
17151 /* unsigned arguments, bool return (compares). */
17152 case ALTIVEC_BUILTIN_VCMPEQUB:
17153 case ALTIVEC_BUILTIN_VCMPEQUH:
17154 case ALTIVEC_BUILTIN_VCMPEQUW:
17155 case P8V_BUILTIN_VCMPEQUD:
17156 case VSX_BUILTIN_CMPGE_U16QI:
17157 case VSX_BUILTIN_CMPGE_U8HI:
17158 case VSX_BUILTIN_CMPGE_U4SI:
17159 case VSX_BUILTIN_CMPGE_U2DI:
17160 case ALTIVEC_BUILTIN_VCMPGTUB:
17161 case ALTIVEC_BUILTIN_VCMPGTUH:
17162 case ALTIVEC_BUILTIN_VCMPGTUW:
17163 case P8V_BUILTIN_VCMPGTUD:
17164 h.uns_p[1] = 1;
17165 h.uns_p[2] = 1;
17166 break;
17167
17168 /* unsigned arguments for 128-bit pack instructions. */
17169 case MISC_BUILTIN_PACK_TD:
17170 case MISC_BUILTIN_PACK_V1TI:
17171 h.uns_p[1] = 1;
17172 h.uns_p[2] = 1;
17173 break;
17174
17175 /* unsigned second arguments (vector shift right). */
17176 case ALTIVEC_BUILTIN_VSRB:
17177 case ALTIVEC_BUILTIN_VSRH:
17178 case ALTIVEC_BUILTIN_VSRW:
17179 case P8V_BUILTIN_VSRD:
17180 h.uns_p[2] = 1;
17181 break;
17182
17183 default:
17184 break;
17185 }
17186
17187 /* Figure out how many args are present. */
17188 while (num_args > 0 && h.mode[num_args] == VOIDmode)
17189 num_args--;
17190
17191 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
17192 if (!ret_type && h.uns_p[0])
17193 ret_type = builtin_mode_to_type[h.mode[0]][0];
17194
17195 if (!ret_type)
17196 fatal_error (input_location,
17197 "internal error: builtin function %qs had an unexpected "
17198 "return type %qs", name, GET_MODE_NAME (h.mode[0]));
17199
17200 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
17201 arg_type[i] = NULL_TREE;
17202
17203 for (i = 0; i < num_args; i++)
17204 {
17205 int m = (int) h.mode[i+1];
17206 int uns_p = h.uns_p[i+1];
17207
17208 arg_type[i] = builtin_mode_to_type[m][uns_p];
17209 if (!arg_type[i] && uns_p)
17210 arg_type[i] = builtin_mode_to_type[m][0];
17211
17212 if (!arg_type[i])
17213 fatal_error (input_location,
17214 "internal error: builtin function %qs, argument %d "
17215 "had unexpected argument type %qs", name, i,
17216 GET_MODE_NAME (m));
17217 }
17218
17219 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
17220 if (*found == NULL)
17221 {
17222 h2 = ggc_alloc<builtin_hash_struct> ();
17223 *h2 = h;
17224 *found = h2;
17225
17226 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
17227 arg_type[2], NULL_TREE);
17228 }
17229
17230 return (*found)->type;
17231 }
17232
17233 static void
17234 rs6000_common_init_builtins (void)
17235 {
17236 const struct builtin_description *d;
17237 size_t i;
17238
17239 tree opaque_ftype_opaque = NULL_TREE;
17240 tree opaque_ftype_opaque_opaque = NULL_TREE;
17241 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
17242 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17243
17244 /* Create Altivec and VSX builtins on machines with at least the
17245 general purpose extensions (970 and newer) to allow the use of
17246 the target attribute. */
17247
17248 if (TARGET_EXTRA_BUILTINS)
17249 builtin_mask |= RS6000_BTM_COMMON;
17250
17251 /* Add the ternary operators. */
17252 d = bdesc_3arg;
17253 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
17254 {
17255 tree type;
17256 HOST_WIDE_INT mask = d->mask;
17257
17258 if ((mask & builtin_mask) != mask)
17259 {
17260 if (TARGET_DEBUG_BUILTIN)
17261 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
17262 continue;
17263 }
17264
17265 if (rs6000_overloaded_builtin_p (d->code))
17266 {
17267 if (! (type = opaque_ftype_opaque_opaque_opaque))
17268 type = opaque_ftype_opaque_opaque_opaque
17269 = build_function_type_list (opaque_V4SI_type_node,
17270 opaque_V4SI_type_node,
17271 opaque_V4SI_type_node,
17272 opaque_V4SI_type_node,
17273 NULL_TREE);
17274 }
17275 else
17276 {
17277 enum insn_code icode = d->icode;
17278 if (d->name == 0)
17279 {
17280 if (TARGET_DEBUG_BUILTIN)
17281 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
17282 (long unsigned)i);
17283
17284 continue;
17285 }
17286
17287 if (icode == CODE_FOR_nothing)
17288 {
17289 if (TARGET_DEBUG_BUILTIN)
17290 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
17291 d->name);
17292
17293 continue;
17294 }
17295
17296 type = builtin_function_type (insn_data[icode].operand[0].mode,
17297 insn_data[icode].operand[1].mode,
17298 insn_data[icode].operand[2].mode,
17299 insn_data[icode].operand[3].mode,
17300 d->code, d->name);
17301 }
17302
17303 def_builtin (d->name, type, d->code);
17304 }
17305
17306 /* Add the binary operators. */
17307 d = bdesc_2arg;
17308 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17309 {
17310 machine_mode mode0, mode1, mode2;
17311 tree type;
17312 HOST_WIDE_INT mask = d->mask;
17313
17314 if ((mask & builtin_mask) != mask)
17315 {
17316 if (TARGET_DEBUG_BUILTIN)
17317 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
17318 continue;
17319 }
17320
17321 if (rs6000_overloaded_builtin_p (d->code))
17322 {
17323 if (! (type = opaque_ftype_opaque_opaque))
17324 type = opaque_ftype_opaque_opaque
17325 = build_function_type_list (opaque_V4SI_type_node,
17326 opaque_V4SI_type_node,
17327 opaque_V4SI_type_node,
17328 NULL_TREE);
17329 }
17330 else
17331 {
17332 enum insn_code icode = d->icode;
17333 if (d->name == 0)
17334 {
17335 if (TARGET_DEBUG_BUILTIN)
17336 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
17337 (long unsigned)i);
17338
17339 continue;
17340 }
17341
17342 if (icode == CODE_FOR_nothing)
17343 {
17344 if (TARGET_DEBUG_BUILTIN)
17345 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
17346 d->name);
17347
17348 continue;
17349 }
17350
17351 mode0 = insn_data[icode].operand[0].mode;
17352 mode1 = insn_data[icode].operand[1].mode;
17353 mode2 = insn_data[icode].operand[2].mode;
17354
17355 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
17356 d->code, d->name);
17357 }
17358
17359 def_builtin (d->name, type, d->code);
17360 }
17361
17362 /* Add the simple unary operators. */
17363 d = bdesc_1arg;
17364 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17365 {
17366 machine_mode mode0, mode1;
17367 tree type;
17368 HOST_WIDE_INT mask = d->mask;
17369
17370 if ((mask & builtin_mask) != mask)
17371 {
17372 if (TARGET_DEBUG_BUILTIN)
17373 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
17374 continue;
17375 }
17376
17377 if (rs6000_overloaded_builtin_p (d->code))
17378 {
17379 if (! (type = opaque_ftype_opaque))
17380 type = opaque_ftype_opaque
17381 = build_function_type_list (opaque_V4SI_type_node,
17382 opaque_V4SI_type_node,
17383 NULL_TREE);
17384 }
17385 else
17386 {
17387 enum insn_code icode = d->icode;
17388 if (d->name == 0)
17389 {
17390 if (TARGET_DEBUG_BUILTIN)
17391 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
17392 (long unsigned)i);
17393
17394 continue;
17395 }
17396
17397 if (icode == CODE_FOR_nothing)
17398 {
17399 if (TARGET_DEBUG_BUILTIN)
17400 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
17401 d->name);
17402
17403 continue;
17404 }
17405
17406 mode0 = insn_data[icode].operand[0].mode;
17407 mode1 = insn_data[icode].operand[1].mode;
17408
17409 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
17410 d->code, d->name);
17411 }
17412
17413 def_builtin (d->name, type, d->code);
17414 }
17415
17416 /* Add the simple no-argument operators. */
17417 d = bdesc_0arg;
17418 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
17419 {
17420 machine_mode mode0;
17421 tree type;
17422 HOST_WIDE_INT mask = d->mask;
17423
17424 if ((mask & builtin_mask) != mask)
17425 {
17426 if (TARGET_DEBUG_BUILTIN)
17427 fprintf (stderr, "rs6000_builtin, skip no-argument %s\n", d->name);
17428 continue;
17429 }
17430 if (rs6000_overloaded_builtin_p (d->code))
17431 {
17432 if (!opaque_ftype_opaque)
17433 opaque_ftype_opaque
17434 = build_function_type_list (opaque_V4SI_type_node, NULL_TREE);
17435 type = opaque_ftype_opaque;
17436 }
17437 else
17438 {
17439 enum insn_code icode = d->icode;
17440 if (d->name == 0)
17441 {
17442 if (TARGET_DEBUG_BUILTIN)
17443 fprintf (stderr, "rs6000_builtin, bdesc_0arg[%lu] no name\n",
17444 (long unsigned) i);
17445 continue;
17446 }
17447 if (icode == CODE_FOR_nothing)
17448 {
17449 if (TARGET_DEBUG_BUILTIN)
17450 fprintf (stderr,
17451 "rs6000_builtin, skip no-argument %s (no code)\n",
17452 d->name);
17453 continue;
17454 }
17455 mode0 = insn_data[icode].operand[0].mode;
17456 type = builtin_function_type (mode0, VOIDmode, VOIDmode, VOIDmode,
17457 d->code, d->name);
17458 }
17459 def_builtin (d->name, type, d->code);
17460 }
17461 }
17462
17463 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
17464 static void
17465 init_float128_ibm (machine_mode mode)
17466 {
17467 if (!TARGET_XL_COMPAT)
17468 {
17469 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
17470 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
17471 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
17472 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
17473
17474 if (!TARGET_HARD_FLOAT)
17475 {
17476 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
17477 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
17478 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
17479 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
17480 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
17481 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
17482 set_optab_libfunc (le_optab, mode, "__gcc_qle");
17483 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
17484
17485 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
17486 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
17487 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
17488 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
17489 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
17490 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
17491 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
17492 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
17493 }
17494 }
17495 else
17496 {
17497 set_optab_libfunc (add_optab, mode, "_xlqadd");
17498 set_optab_libfunc (sub_optab, mode, "_xlqsub");
17499 set_optab_libfunc (smul_optab, mode, "_xlqmul");
17500 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
17501 }
17502
17503 /* Add various conversions for IFmode to use the traditional TFmode
17504 names. */
17505 if (mode == IFmode)
17506 {
17507 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
17508 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
17509 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
17510 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
17511 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
17512 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
17513
17514 if (TARGET_POWERPC64)
17515 {
17516 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
17517 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
17518 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
17519 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
17520 }
17521 }
17522 }
17523
17524 /* Create a decl for either complex long double multiply or complex long double
17525 divide when long double is IEEE 128-bit floating point. We can't use
17526 __multc3 and __divtc3 because the original long double using IBM extended
17527 double used those names. The complex multiply/divide functions are encoded
17528 as builtin functions with a complex result and 4 scalar inputs. */
17529
17530 static void
17531 create_complex_muldiv (const char *name, built_in_function fncode, tree fntype)
17532 {
17533 tree fndecl = add_builtin_function (name, fntype, fncode, BUILT_IN_NORMAL,
17534 name, NULL_TREE);
17535
17536 set_builtin_decl (fncode, fndecl, true);
17537
17538 if (TARGET_DEBUG_BUILTIN)
17539 fprintf (stderr, "create complex %s, fncode: %d\n", name, (int) fncode);
17540
17541 return;
17542 }
17543
17544 /* Set up IEEE 128-bit floating point routines. Use different names if the
17545 arguments can be passed in a vector register. The historical PowerPC
17546 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
17547 continue to use that if we aren't using vector registers to pass IEEE
17548 128-bit floating point. */
17549
17550 static void
17551 init_float128_ieee (machine_mode mode)
17552 {
17553 if (FLOAT128_VECTOR_P (mode))
17554 {
17555 static bool complex_muldiv_init_p = false;
17556
17557 /* Set up to call __mulkc3 and __divkc3 under -mabi=ieeelongdouble. If
17558 we have clone or target attributes, this will be called a second
17559 time. We want to create the built-in function only once. */
17560 if (mode == TFmode && TARGET_IEEEQUAD && !complex_muldiv_init_p)
17561 {
17562 complex_muldiv_init_p = true;
17563 built_in_function fncode_mul =
17564 (built_in_function) (BUILT_IN_COMPLEX_MUL_MIN + TCmode
17565 - MIN_MODE_COMPLEX_FLOAT);
17566 built_in_function fncode_div =
17567 (built_in_function) (BUILT_IN_COMPLEX_DIV_MIN + TCmode
17568 - MIN_MODE_COMPLEX_FLOAT);
17569
17570 tree fntype = build_function_type_list (complex_long_double_type_node,
17571 long_double_type_node,
17572 long_double_type_node,
17573 long_double_type_node,
17574 long_double_type_node,
17575 NULL_TREE);
17576
17577 create_complex_muldiv ("__mulkc3", fncode_mul, fntype);
17578 create_complex_muldiv ("__divkc3", fncode_div, fntype);
17579 }
17580
17581 set_optab_libfunc (add_optab, mode, "__addkf3");
17582 set_optab_libfunc (sub_optab, mode, "__subkf3");
17583 set_optab_libfunc (neg_optab, mode, "__negkf2");
17584 set_optab_libfunc (smul_optab, mode, "__mulkf3");
17585 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
17586 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
17587 set_optab_libfunc (abs_optab, mode, "__abskf2");
17588 set_optab_libfunc (powi_optab, mode, "__powikf2");
17589
17590 set_optab_libfunc (eq_optab, mode, "__eqkf2");
17591 set_optab_libfunc (ne_optab, mode, "__nekf2");
17592 set_optab_libfunc (gt_optab, mode, "__gtkf2");
17593 set_optab_libfunc (ge_optab, mode, "__gekf2");
17594 set_optab_libfunc (lt_optab, mode, "__ltkf2");
17595 set_optab_libfunc (le_optab, mode, "__lekf2");
17596 set_optab_libfunc (unord_optab, mode, "__unordkf2");
17597
17598 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
17599 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
17600 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
17601 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
17602
17603 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
17604 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
17605 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
17606
17607 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
17608 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
17609 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
17610
17611 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
17612 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
17613 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
17614 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
17615 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
17616 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
17617
17618 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
17619 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
17620 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
17621 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
17622
17623 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
17624 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
17625 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
17626 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
17627
17628 if (TARGET_POWERPC64)
17629 {
17630 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
17631 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
17632 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
17633 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
17634 }
17635 }
17636
17637 else
17638 {
17639 set_optab_libfunc (add_optab, mode, "_q_add");
17640 set_optab_libfunc (sub_optab, mode, "_q_sub");
17641 set_optab_libfunc (neg_optab, mode, "_q_neg");
17642 set_optab_libfunc (smul_optab, mode, "_q_mul");
17643 set_optab_libfunc (sdiv_optab, mode, "_q_div");
17644 if (TARGET_PPC_GPOPT)
17645 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
17646
17647 set_optab_libfunc (eq_optab, mode, "_q_feq");
17648 set_optab_libfunc (ne_optab, mode, "_q_fne");
17649 set_optab_libfunc (gt_optab, mode, "_q_fgt");
17650 set_optab_libfunc (ge_optab, mode, "_q_fge");
17651 set_optab_libfunc (lt_optab, mode, "_q_flt");
17652 set_optab_libfunc (le_optab, mode, "_q_fle");
17653
17654 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
17655 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
17656 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
17657 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
17658 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
17659 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
17660 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
17661 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
17662 }
17663 }
17664
17665 static void
17666 rs6000_init_libfuncs (void)
17667 {
17668 /* __float128 support. */
17669 if (TARGET_FLOAT128_TYPE)
17670 {
17671 init_float128_ibm (IFmode);
17672 init_float128_ieee (KFmode);
17673 }
17674
17675 /* AIX/Darwin/64-bit Linux quad floating point routines. */
17676 if (TARGET_LONG_DOUBLE_128)
17677 {
17678 if (!TARGET_IEEEQUAD)
17679 init_float128_ibm (TFmode);
17680
17681 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
17682 else
17683 init_float128_ieee (TFmode);
17684 }
17685 }
17686
17687 /* Emit a potentially record-form instruction, setting DST from SRC.
17688 If DOT is 0, that is all; otherwise, set CCREG to the result of the
17689 signed comparison of DST with zero. If DOT is 1, the generated RTL
17690 doesn't care about the DST result; if DOT is 2, it does. If CCREG
17691 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
17692 a separate COMPARE. */
17693
17694 void
17695 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
17696 {
17697 if (dot == 0)
17698 {
17699 emit_move_insn (dst, src);
17700 return;
17701 }
17702
17703 if (cc_reg_not_cr0_operand (ccreg, CCmode))
17704 {
17705 emit_move_insn (dst, src);
17706 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
17707 return;
17708 }
17709
17710 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
17711 if (dot == 1)
17712 {
17713 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
17714 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
17715 }
17716 else
17717 {
17718 rtx set = gen_rtx_SET (dst, src);
17719 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
17720 }
17721 }
17722
17723 \f
17724 /* A validation routine: say whether CODE, a condition code, and MODE
17725 match. The other alternatives either don't make sense or should
17726 never be generated. */
17727
17728 void
17729 validate_condition_mode (enum rtx_code code, machine_mode mode)
17730 {
17731 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
17732 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
17733 && GET_MODE_CLASS (mode) == MODE_CC);
17734
17735 /* These don't make sense. */
17736 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
17737 || mode != CCUNSmode);
17738
17739 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
17740 || mode == CCUNSmode);
17741
17742 gcc_assert (mode == CCFPmode
17743 || (code != ORDERED && code != UNORDERED
17744 && code != UNEQ && code != LTGT
17745 && code != UNGT && code != UNLT
17746 && code != UNGE && code != UNLE));
17747
17748 /* These should never be generated except for
17749 flag_finite_math_only. */
17750 gcc_assert (mode != CCFPmode
17751 || flag_finite_math_only
17752 || (code != LE && code != GE
17753 && code != UNEQ && code != LTGT
17754 && code != UNGT && code != UNLT));
17755
17756 /* These are invalid; the information is not there. */
17757 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
17758 }
17759
17760 \f
17761 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
17762 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
17763 not zero, store there the bit offset (counted from the right) where
17764 the single stretch of 1 bits begins; and similarly for B, the bit
17765 offset where it ends. */
17766
17767 bool
17768 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
17769 {
17770 unsigned HOST_WIDE_INT val = INTVAL (mask);
17771 unsigned HOST_WIDE_INT bit;
17772 int nb, ne;
17773 int n = GET_MODE_PRECISION (mode);
17774
17775 if (mode != DImode && mode != SImode)
17776 return false;
17777
17778 if (INTVAL (mask) >= 0)
17779 {
17780 bit = val & -val;
17781 ne = exact_log2 (bit);
17782 nb = exact_log2 (val + bit);
17783 }
17784 else if (val + 1 == 0)
17785 {
17786 nb = n;
17787 ne = 0;
17788 }
17789 else if (val & 1)
17790 {
17791 val = ~val;
17792 bit = val & -val;
17793 nb = exact_log2 (bit);
17794 ne = exact_log2 (val + bit);
17795 }
17796 else
17797 {
17798 bit = val & -val;
17799 ne = exact_log2 (bit);
17800 if (val + bit == 0)
17801 nb = n;
17802 else
17803 nb = 0;
17804 }
17805
17806 nb--;
17807
17808 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
17809 return false;
17810
17811 if (b)
17812 *b = nb;
17813 if (e)
17814 *e = ne;
17815
17816 return true;
17817 }
17818
17819 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
17820 or rldicr instruction, to implement an AND with it in mode MODE. */
17821
17822 bool
17823 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
17824 {
17825 int nb, ne;
17826
17827 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
17828 return false;
17829
17830 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
17831 does not wrap. */
17832 if (mode == DImode)
17833 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
17834
17835 /* For SImode, rlwinm can do everything. */
17836 if (mode == SImode)
17837 return (nb < 32 && ne < 32);
17838
17839 return false;
17840 }
17841
17842 /* Return the instruction template for an AND with mask in mode MODE, with
17843 operands OPERANDS. If DOT is true, make it a record-form instruction. */
17844
17845 const char *
17846 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
17847 {
17848 int nb, ne;
17849
17850 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
17851 gcc_unreachable ();
17852
17853 if (mode == DImode && ne == 0)
17854 {
17855 operands[3] = GEN_INT (63 - nb);
17856 if (dot)
17857 return "rldicl. %0,%1,0,%3";
17858 return "rldicl %0,%1,0,%3";
17859 }
17860
17861 if (mode == DImode && nb == 63)
17862 {
17863 operands[3] = GEN_INT (63 - ne);
17864 if (dot)
17865 return "rldicr. %0,%1,0,%3";
17866 return "rldicr %0,%1,0,%3";
17867 }
17868
17869 if (nb < 32 && ne < 32)
17870 {
17871 operands[3] = GEN_INT (31 - nb);
17872 operands[4] = GEN_INT (31 - ne);
17873 if (dot)
17874 return "rlwinm. %0,%1,0,%3,%4";
17875 return "rlwinm %0,%1,0,%3,%4";
17876 }
17877
17878 gcc_unreachable ();
17879 }
17880
17881 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
17882 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
17883 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
17884
17885 bool
17886 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
17887 {
17888 int nb, ne;
17889
17890 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
17891 return false;
17892
17893 int n = GET_MODE_PRECISION (mode);
17894 int sh = -1;
17895
17896 if (CONST_INT_P (XEXP (shift, 1)))
17897 {
17898 sh = INTVAL (XEXP (shift, 1));
17899 if (sh < 0 || sh >= n)
17900 return false;
17901 }
17902
17903 rtx_code code = GET_CODE (shift);
17904
17905 /* Convert any shift by 0 to a rotate, to simplify below code. */
17906 if (sh == 0)
17907 code = ROTATE;
17908
17909 /* Convert rotate to simple shift if we can, to make analysis simpler. */
17910 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
17911 code = ASHIFT;
17912 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
17913 {
17914 code = LSHIFTRT;
17915 sh = n - sh;
17916 }
17917
17918 /* DImode rotates need rld*. */
17919 if (mode == DImode && code == ROTATE)
17920 return (nb == 63 || ne == 0 || ne == sh);
17921
17922 /* SImode rotates need rlw*. */
17923 if (mode == SImode && code == ROTATE)
17924 return (nb < 32 && ne < 32 && sh < 32);
17925
17926 /* Wrap-around masks are only okay for rotates. */
17927 if (ne > nb)
17928 return false;
17929
17930 /* Variable shifts are only okay for rotates. */
17931 if (sh < 0)
17932 return false;
17933
17934 /* Don't allow ASHIFT if the mask is wrong for that. */
17935 if (code == ASHIFT && ne < sh)
17936 return false;
17937
17938 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
17939 if the mask is wrong for that. */
17940 if (nb < 32 && ne < 32 && sh < 32
17941 && !(code == LSHIFTRT && nb >= 32 - sh))
17942 return true;
17943
17944 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
17945 if the mask is wrong for that. */
17946 if (code == LSHIFTRT)
17947 sh = 64 - sh;
17948 if (nb == 63 || ne == 0 || ne == sh)
17949 return !(code == LSHIFTRT && nb >= sh);
17950
17951 return false;
17952 }
17953
17954 /* Return the instruction template for a shift with mask in mode MODE, with
17955 operands OPERANDS. If DOT is true, make it a record-form instruction. */
17956
17957 const char *
17958 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
17959 {
17960 int nb, ne;
17961
17962 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
17963 gcc_unreachable ();
17964
17965 if (mode == DImode && ne == 0)
17966 {
17967 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
17968 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
17969 operands[3] = GEN_INT (63 - nb);
17970 if (dot)
17971 return "rld%I2cl. %0,%1,%2,%3";
17972 return "rld%I2cl %0,%1,%2,%3";
17973 }
17974
17975 if (mode == DImode && nb == 63)
17976 {
17977 operands[3] = GEN_INT (63 - ne);
17978 if (dot)
17979 return "rld%I2cr. %0,%1,%2,%3";
17980 return "rld%I2cr %0,%1,%2,%3";
17981 }
17982
17983 if (mode == DImode
17984 && GET_CODE (operands[4]) != LSHIFTRT
17985 && CONST_INT_P (operands[2])
17986 && ne == INTVAL (operands[2]))
17987 {
17988 operands[3] = GEN_INT (63 - nb);
17989 if (dot)
17990 return "rld%I2c. %0,%1,%2,%3";
17991 return "rld%I2c %0,%1,%2,%3";
17992 }
17993
17994 if (nb < 32 && ne < 32)
17995 {
17996 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
17997 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
17998 operands[3] = GEN_INT (31 - nb);
17999 operands[4] = GEN_INT (31 - ne);
18000 /* This insn can also be a 64-bit rotate with mask that really makes
18001 it just a shift right (with mask); the %h below are to adjust for
18002 that situation (shift count is >= 32 in that case). */
18003 if (dot)
18004 return "rlw%I2nm. %0,%1,%h2,%3,%4";
18005 return "rlw%I2nm %0,%1,%h2,%3,%4";
18006 }
18007
18008 gcc_unreachable ();
18009 }
18010
18011 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
18012 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
18013 ASHIFT, or LSHIFTRT) in mode MODE. */
18014
18015 bool
18016 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
18017 {
18018 int nb, ne;
18019
18020 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18021 return false;
18022
18023 int n = GET_MODE_PRECISION (mode);
18024
18025 int sh = INTVAL (XEXP (shift, 1));
18026 if (sh < 0 || sh >= n)
18027 return false;
18028
18029 rtx_code code = GET_CODE (shift);
18030
18031 /* Convert any shift by 0 to a rotate, to simplify below code. */
18032 if (sh == 0)
18033 code = ROTATE;
18034
18035 /* Convert rotate to simple shift if we can, to make analysis simpler. */
18036 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
18037 code = ASHIFT;
18038 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
18039 {
18040 code = LSHIFTRT;
18041 sh = n - sh;
18042 }
18043
18044 /* DImode rotates need rldimi. */
18045 if (mode == DImode && code == ROTATE)
18046 return (ne == sh);
18047
18048 /* SImode rotates need rlwimi. */
18049 if (mode == SImode && code == ROTATE)
18050 return (nb < 32 && ne < 32 && sh < 32);
18051
18052 /* Wrap-around masks are only okay for rotates. */
18053 if (ne > nb)
18054 return false;
18055
18056 /* Don't allow ASHIFT if the mask is wrong for that. */
18057 if (code == ASHIFT && ne < sh)
18058 return false;
18059
18060 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
18061 if the mask is wrong for that. */
18062 if (nb < 32 && ne < 32 && sh < 32
18063 && !(code == LSHIFTRT && nb >= 32 - sh))
18064 return true;
18065
18066 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
18067 if the mask is wrong for that. */
18068 if (code == LSHIFTRT)
18069 sh = 64 - sh;
18070 if (ne == sh)
18071 return !(code == LSHIFTRT && nb >= sh);
18072
18073 return false;
18074 }
18075
18076 /* Return the instruction template for an insert with mask in mode MODE, with
18077 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18078
18079 const char *
18080 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
18081 {
18082 int nb, ne;
18083
18084 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
18085 gcc_unreachable ();
18086
18087 /* Prefer rldimi because rlwimi is cracked. */
18088 if (TARGET_POWERPC64
18089 && (!dot || mode == DImode)
18090 && GET_CODE (operands[4]) != LSHIFTRT
18091 && ne == INTVAL (operands[2]))
18092 {
18093 operands[3] = GEN_INT (63 - nb);
18094 if (dot)
18095 return "rldimi. %0,%1,%2,%3";
18096 return "rldimi %0,%1,%2,%3";
18097 }
18098
18099 if (nb < 32 && ne < 32)
18100 {
18101 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18102 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
18103 operands[3] = GEN_INT (31 - nb);
18104 operands[4] = GEN_INT (31 - ne);
18105 if (dot)
18106 return "rlwimi. %0,%1,%2,%3,%4";
18107 return "rlwimi %0,%1,%2,%3,%4";
18108 }
18109
18110 gcc_unreachable ();
18111 }
18112
18113 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
18114 using two machine instructions. */
18115
18116 bool
18117 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
18118 {
18119 /* There are two kinds of AND we can handle with two insns:
18120 1) those we can do with two rl* insn;
18121 2) ori[s];xori[s].
18122
18123 We do not handle that last case yet. */
18124
18125 /* If there is just one stretch of ones, we can do it. */
18126 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
18127 return true;
18128
18129 /* Otherwise, fill in the lowest "hole"; if we can do the result with
18130 one insn, we can do the whole thing with two. */
18131 unsigned HOST_WIDE_INT val = INTVAL (c);
18132 unsigned HOST_WIDE_INT bit1 = val & -val;
18133 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
18134 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
18135 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
18136 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
18137 }
18138
18139 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
18140 If EXPAND is true, split rotate-and-mask instructions we generate to
18141 their constituent parts as well (this is used during expand); if DOT
18142 is 1, make the last insn a record-form instruction clobbering the
18143 destination GPR and setting the CC reg (from operands[3]); if 2, set
18144 that GPR as well as the CC reg. */
18145
18146 void
18147 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
18148 {
18149 gcc_assert (!(expand && dot));
18150
18151 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
18152
18153 /* If it is one stretch of ones, it is DImode; shift left, mask, then
18154 shift right. This generates better code than doing the masks without
18155 shifts, or shifting first right and then left. */
18156 int nb, ne;
18157 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
18158 {
18159 gcc_assert (mode == DImode);
18160
18161 int shift = 63 - nb;
18162 if (expand)
18163 {
18164 rtx tmp1 = gen_reg_rtx (DImode);
18165 rtx tmp2 = gen_reg_rtx (DImode);
18166 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
18167 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
18168 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
18169 }
18170 else
18171 {
18172 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
18173 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
18174 emit_move_insn (operands[0], tmp);
18175 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
18176 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18177 }
18178 return;
18179 }
18180
18181 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
18182 that does the rest. */
18183 unsigned HOST_WIDE_INT bit1 = val & -val;
18184 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
18185 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
18186 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
18187
18188 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
18189 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
18190
18191 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
18192
18193 /* Two "no-rotate"-and-mask instructions, for SImode. */
18194 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
18195 {
18196 gcc_assert (mode == SImode);
18197
18198 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
18199 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
18200 emit_move_insn (reg, tmp);
18201 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
18202 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18203 return;
18204 }
18205
18206 gcc_assert (mode == DImode);
18207
18208 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
18209 insns; we have to do the first in SImode, because it wraps. */
18210 if (mask2 <= 0xffffffff
18211 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
18212 {
18213 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
18214 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
18215 GEN_INT (mask1));
18216 rtx reg_low = gen_lowpart (SImode, reg);
18217 emit_move_insn (reg_low, tmp);
18218 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
18219 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18220 return;
18221 }
18222
18223 /* Two rld* insns: rotate, clear the hole in the middle (which now is
18224 at the top end), rotate back and clear the other hole. */
18225 int right = exact_log2 (bit3);
18226 int left = 64 - right;
18227
18228 /* Rotate the mask too. */
18229 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
18230
18231 if (expand)
18232 {
18233 rtx tmp1 = gen_reg_rtx (DImode);
18234 rtx tmp2 = gen_reg_rtx (DImode);
18235 rtx tmp3 = gen_reg_rtx (DImode);
18236 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
18237 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
18238 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
18239 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
18240 }
18241 else
18242 {
18243 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
18244 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
18245 emit_move_insn (operands[0], tmp);
18246 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
18247 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
18248 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18249 }
18250 }
18251 \f
18252 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
18253 for lfq and stfq insns iff the registers are hard registers. */
18254
18255 int
18256 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
18257 {
18258 /* We might have been passed a SUBREG. */
18259 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
18260 return 0;
18261
18262 /* We might have been passed non floating point registers. */
18263 if (!FP_REGNO_P (REGNO (reg1))
18264 || !FP_REGNO_P (REGNO (reg2)))
18265 return 0;
18266
18267 return (REGNO (reg1) == REGNO (reg2) - 1);
18268 }
18269
18270 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
18271 addr1 and addr2 must be in consecutive memory locations
18272 (addr2 == addr1 + 8). */
18273
18274 int
18275 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
18276 {
18277 rtx addr1, addr2;
18278 unsigned int reg1, reg2;
18279 int offset1, offset2;
18280
18281 /* The mems cannot be volatile. */
18282 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
18283 return 0;
18284
18285 addr1 = XEXP (mem1, 0);
18286 addr2 = XEXP (mem2, 0);
18287
18288 /* Extract an offset (if used) from the first addr. */
18289 if (GET_CODE (addr1) == PLUS)
18290 {
18291 /* If not a REG, return zero. */
18292 if (GET_CODE (XEXP (addr1, 0)) != REG)
18293 return 0;
18294 else
18295 {
18296 reg1 = REGNO (XEXP (addr1, 0));
18297 /* The offset must be constant! */
18298 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
18299 return 0;
18300 offset1 = INTVAL (XEXP (addr1, 1));
18301 }
18302 }
18303 else if (GET_CODE (addr1) != REG)
18304 return 0;
18305 else
18306 {
18307 reg1 = REGNO (addr1);
18308 /* This was a simple (mem (reg)) expression. Offset is 0. */
18309 offset1 = 0;
18310 }
18311
18312 /* And now for the second addr. */
18313 if (GET_CODE (addr2) == PLUS)
18314 {
18315 /* If not a REG, return zero. */
18316 if (GET_CODE (XEXP (addr2, 0)) != REG)
18317 return 0;
18318 else
18319 {
18320 reg2 = REGNO (XEXP (addr2, 0));
18321 /* The offset must be constant. */
18322 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
18323 return 0;
18324 offset2 = INTVAL (XEXP (addr2, 1));
18325 }
18326 }
18327 else if (GET_CODE (addr2) != REG)
18328 return 0;
18329 else
18330 {
18331 reg2 = REGNO (addr2);
18332 /* This was a simple (mem (reg)) expression. Offset is 0. */
18333 offset2 = 0;
18334 }
18335
18336 /* Both of these must have the same base register. */
18337 if (reg1 != reg2)
18338 return 0;
18339
18340 /* The offset for the second addr must be 8 more than the first addr. */
18341 if (offset2 != offset1 + 8)
18342 return 0;
18343
18344 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
18345 instructions. */
18346 return 1;
18347 }
18348 \f
18349 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
18350 need to use DDmode, in all other cases we can use the same mode. */
18351 static machine_mode
18352 rs6000_secondary_memory_needed_mode (machine_mode mode)
18353 {
18354 if (lra_in_progress && mode == SDmode)
18355 return DDmode;
18356 return mode;
18357 }
18358
18359 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
18360 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
18361 only work on the traditional altivec registers, note if an altivec register
18362 was chosen. */
18363
18364 static enum rs6000_reg_type
18365 register_to_reg_type (rtx reg, bool *is_altivec)
18366 {
18367 HOST_WIDE_INT regno;
18368 enum reg_class rclass;
18369
18370 if (GET_CODE (reg) == SUBREG)
18371 reg = SUBREG_REG (reg);
18372
18373 if (!REG_P (reg))
18374 return NO_REG_TYPE;
18375
18376 regno = REGNO (reg);
18377 if (regno >= FIRST_PSEUDO_REGISTER)
18378 {
18379 if (!lra_in_progress && !reload_completed)
18380 return PSEUDO_REG_TYPE;
18381
18382 regno = true_regnum (reg);
18383 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
18384 return PSEUDO_REG_TYPE;
18385 }
18386
18387 gcc_assert (regno >= 0);
18388
18389 if (is_altivec && ALTIVEC_REGNO_P (regno))
18390 *is_altivec = true;
18391
18392 rclass = rs6000_regno_regclass[regno];
18393 return reg_class_to_reg_type[(int)rclass];
18394 }
18395
18396 /* Helper function to return the cost of adding a TOC entry address. */
18397
18398 static inline int
18399 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
18400 {
18401 int ret;
18402
18403 if (TARGET_CMODEL != CMODEL_SMALL)
18404 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
18405
18406 else
18407 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
18408
18409 return ret;
18410 }
18411
18412 /* Helper function for rs6000_secondary_reload to determine whether the memory
18413 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
18414 needs reloading. Return negative if the memory is not handled by the memory
18415 helper functions and to try a different reload method, 0 if no additional
18416 instructions are need, and positive to give the extra cost for the
18417 memory. */
18418
18419 static int
18420 rs6000_secondary_reload_memory (rtx addr,
18421 enum reg_class rclass,
18422 machine_mode mode)
18423 {
18424 int extra_cost = 0;
18425 rtx reg, and_arg, plus_arg0, plus_arg1;
18426 addr_mask_type addr_mask;
18427 const char *type = NULL;
18428 const char *fail_msg = NULL;
18429
18430 if (GPR_REG_CLASS_P (rclass))
18431 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
18432
18433 else if (rclass == FLOAT_REGS)
18434 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
18435
18436 else if (rclass == ALTIVEC_REGS)
18437 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
18438
18439 /* For the combined VSX_REGS, turn off Altivec AND -16. */
18440 else if (rclass == VSX_REGS)
18441 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
18442 & ~RELOAD_REG_AND_M16);
18443
18444 /* If the register allocator hasn't made up its mind yet on the register
18445 class to use, settle on defaults to use. */
18446 else if (rclass == NO_REGS)
18447 {
18448 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
18449 & ~RELOAD_REG_AND_M16);
18450
18451 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
18452 addr_mask &= ~(RELOAD_REG_INDEXED
18453 | RELOAD_REG_PRE_INCDEC
18454 | RELOAD_REG_PRE_MODIFY);
18455 }
18456
18457 else
18458 addr_mask = 0;
18459
18460 /* If the register isn't valid in this register class, just return now. */
18461 if ((addr_mask & RELOAD_REG_VALID) == 0)
18462 {
18463 if (TARGET_DEBUG_ADDR)
18464 {
18465 fprintf (stderr,
18466 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
18467 "not valid in class\n",
18468 GET_MODE_NAME (mode), reg_class_names[rclass]);
18469 debug_rtx (addr);
18470 }
18471
18472 return -1;
18473 }
18474
18475 switch (GET_CODE (addr))
18476 {
18477 /* Does the register class supports auto update forms for this mode? We
18478 don't need a scratch register, since the powerpc only supports
18479 PRE_INC, PRE_DEC, and PRE_MODIFY. */
18480 case PRE_INC:
18481 case PRE_DEC:
18482 reg = XEXP (addr, 0);
18483 if (!base_reg_operand (addr, GET_MODE (reg)))
18484 {
18485 fail_msg = "no base register #1";
18486 extra_cost = -1;
18487 }
18488
18489 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
18490 {
18491 extra_cost = 1;
18492 type = "update";
18493 }
18494 break;
18495
18496 case PRE_MODIFY:
18497 reg = XEXP (addr, 0);
18498 plus_arg1 = XEXP (addr, 1);
18499 if (!base_reg_operand (reg, GET_MODE (reg))
18500 || GET_CODE (plus_arg1) != PLUS
18501 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
18502 {
18503 fail_msg = "bad PRE_MODIFY";
18504 extra_cost = -1;
18505 }
18506
18507 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
18508 {
18509 extra_cost = 1;
18510 type = "update";
18511 }
18512 break;
18513
18514 /* Do we need to simulate AND -16 to clear the bottom address bits used
18515 in VMX load/stores? Only allow the AND for vector sizes. */
18516 case AND:
18517 and_arg = XEXP (addr, 0);
18518 if (GET_MODE_SIZE (mode) != 16
18519 || GET_CODE (XEXP (addr, 1)) != CONST_INT
18520 || INTVAL (XEXP (addr, 1)) != -16)
18521 {
18522 fail_msg = "bad Altivec AND #1";
18523 extra_cost = -1;
18524 }
18525
18526 if (rclass != ALTIVEC_REGS)
18527 {
18528 if (legitimate_indirect_address_p (and_arg, false))
18529 extra_cost = 1;
18530
18531 else if (legitimate_indexed_address_p (and_arg, false))
18532 extra_cost = 2;
18533
18534 else
18535 {
18536 fail_msg = "bad Altivec AND #2";
18537 extra_cost = -1;
18538 }
18539
18540 type = "and";
18541 }
18542 break;
18543
18544 /* If this is an indirect address, make sure it is a base register. */
18545 case REG:
18546 case SUBREG:
18547 if (!legitimate_indirect_address_p (addr, false))
18548 {
18549 extra_cost = 1;
18550 type = "move";
18551 }
18552 break;
18553
18554 /* If this is an indexed address, make sure the register class can handle
18555 indexed addresses for this mode. */
18556 case PLUS:
18557 plus_arg0 = XEXP (addr, 0);
18558 plus_arg1 = XEXP (addr, 1);
18559
18560 /* (plus (plus (reg) (constant)) (constant)) is generated during
18561 push_reload processing, so handle it now. */
18562 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
18563 {
18564 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18565 {
18566 extra_cost = 1;
18567 type = "offset";
18568 }
18569 }
18570
18571 /* (plus (plus (reg) (constant)) (reg)) is also generated during
18572 push_reload processing, so handle it now. */
18573 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
18574 {
18575 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
18576 {
18577 extra_cost = 1;
18578 type = "indexed #2";
18579 }
18580 }
18581
18582 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
18583 {
18584 fail_msg = "no base register #2";
18585 extra_cost = -1;
18586 }
18587
18588 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
18589 {
18590 if ((addr_mask & RELOAD_REG_INDEXED) == 0
18591 || !legitimate_indexed_address_p (addr, false))
18592 {
18593 extra_cost = 1;
18594 type = "indexed";
18595 }
18596 }
18597
18598 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
18599 && CONST_INT_P (plus_arg1))
18600 {
18601 if (!quad_address_offset_p (INTVAL (plus_arg1)))
18602 {
18603 extra_cost = 1;
18604 type = "vector d-form offset";
18605 }
18606 }
18607
18608 /* Make sure the register class can handle offset addresses. */
18609 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
18610 {
18611 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18612 {
18613 extra_cost = 1;
18614 type = "offset #2";
18615 }
18616 }
18617
18618 else
18619 {
18620 fail_msg = "bad PLUS";
18621 extra_cost = -1;
18622 }
18623
18624 break;
18625
18626 case LO_SUM:
18627 /* Quad offsets are restricted and can't handle normal addresses. */
18628 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
18629 {
18630 extra_cost = -1;
18631 type = "vector d-form lo_sum";
18632 }
18633
18634 else if (!legitimate_lo_sum_address_p (mode, addr, false))
18635 {
18636 fail_msg = "bad LO_SUM";
18637 extra_cost = -1;
18638 }
18639
18640 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18641 {
18642 extra_cost = 1;
18643 type = "lo_sum";
18644 }
18645 break;
18646
18647 /* Static addresses need to create a TOC entry. */
18648 case CONST:
18649 case SYMBOL_REF:
18650 case LABEL_REF:
18651 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
18652 {
18653 extra_cost = -1;
18654 type = "vector d-form lo_sum #2";
18655 }
18656
18657 else
18658 {
18659 type = "address";
18660 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
18661 }
18662 break;
18663
18664 /* TOC references look like offsetable memory. */
18665 case UNSPEC:
18666 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
18667 {
18668 fail_msg = "bad UNSPEC";
18669 extra_cost = -1;
18670 }
18671
18672 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
18673 {
18674 extra_cost = -1;
18675 type = "vector d-form lo_sum #3";
18676 }
18677
18678 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18679 {
18680 extra_cost = 1;
18681 type = "toc reference";
18682 }
18683 break;
18684
18685 default:
18686 {
18687 fail_msg = "bad address";
18688 extra_cost = -1;
18689 }
18690 }
18691
18692 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
18693 {
18694 if (extra_cost < 0)
18695 fprintf (stderr,
18696 "rs6000_secondary_reload_memory error: mode = %s, "
18697 "class = %s, addr_mask = '%s', %s\n",
18698 GET_MODE_NAME (mode),
18699 reg_class_names[rclass],
18700 rs6000_debug_addr_mask (addr_mask, false),
18701 (fail_msg != NULL) ? fail_msg : "<bad address>");
18702
18703 else
18704 fprintf (stderr,
18705 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
18706 "addr_mask = '%s', extra cost = %d, %s\n",
18707 GET_MODE_NAME (mode),
18708 reg_class_names[rclass],
18709 rs6000_debug_addr_mask (addr_mask, false),
18710 extra_cost,
18711 (type) ? type : "<none>");
18712
18713 debug_rtx (addr);
18714 }
18715
18716 return extra_cost;
18717 }
18718
18719 /* Helper function for rs6000_secondary_reload to return true if a move to a
18720 different register classe is really a simple move. */
18721
18722 static bool
18723 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
18724 enum rs6000_reg_type from_type,
18725 machine_mode mode)
18726 {
18727 int size = GET_MODE_SIZE (mode);
18728
18729 /* Add support for various direct moves available. In this function, we only
18730 look at cases where we don't need any extra registers, and one or more
18731 simple move insns are issued. Originally small integers are not allowed
18732 in FPR/VSX registers. Single precision binary floating is not a simple
18733 move because we need to convert to the single precision memory layout.
18734 The 4-byte SDmode can be moved. TDmode values are disallowed since they
18735 need special direct move handling, which we do not support yet. */
18736 if (TARGET_DIRECT_MOVE
18737 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
18738 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
18739 {
18740 if (TARGET_POWERPC64)
18741 {
18742 /* ISA 2.07: MTVSRD or MVFVSRD. */
18743 if (size == 8)
18744 return true;
18745
18746 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
18747 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
18748 return true;
18749 }
18750
18751 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
18752 if (TARGET_P8_VECTOR)
18753 {
18754 if (mode == SImode)
18755 return true;
18756
18757 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
18758 return true;
18759 }
18760
18761 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
18762 if (mode == SDmode)
18763 return true;
18764 }
18765
18766 /* Power6+: MFTGPR or MFFGPR. */
18767 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
18768 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
18769 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
18770 return true;
18771
18772 /* Move to/from SPR. */
18773 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
18774 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
18775 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
18776 return true;
18777
18778 return false;
18779 }
18780
18781 /* Direct move helper function for rs6000_secondary_reload, handle all of the
18782 special direct moves that involve allocating an extra register, return the
18783 insn code of the helper function if there is such a function or
18784 CODE_FOR_nothing if not. */
18785
18786 static bool
18787 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
18788 enum rs6000_reg_type from_type,
18789 machine_mode mode,
18790 secondary_reload_info *sri,
18791 bool altivec_p)
18792 {
18793 bool ret = false;
18794 enum insn_code icode = CODE_FOR_nothing;
18795 int cost = 0;
18796 int size = GET_MODE_SIZE (mode);
18797
18798 if (TARGET_POWERPC64 && size == 16)
18799 {
18800 /* Handle moving 128-bit values from GPRs to VSX point registers on
18801 ISA 2.07 (power8, power9) when running in 64-bit mode using
18802 XXPERMDI to glue the two 64-bit values back together. */
18803 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
18804 {
18805 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
18806 icode = reg_addr[mode].reload_vsx_gpr;
18807 }
18808
18809 /* Handle moving 128-bit values from VSX point registers to GPRs on
18810 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
18811 bottom 64-bit value. */
18812 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
18813 {
18814 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
18815 icode = reg_addr[mode].reload_gpr_vsx;
18816 }
18817 }
18818
18819 else if (TARGET_POWERPC64 && mode == SFmode)
18820 {
18821 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
18822 {
18823 cost = 3; /* xscvdpspn, mfvsrd, and. */
18824 icode = reg_addr[mode].reload_gpr_vsx;
18825 }
18826
18827 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
18828 {
18829 cost = 2; /* mtvsrz, xscvspdpn. */
18830 icode = reg_addr[mode].reload_vsx_gpr;
18831 }
18832 }
18833
18834 else if (!TARGET_POWERPC64 && size == 8)
18835 {
18836 /* Handle moving 64-bit values from GPRs to floating point registers on
18837 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
18838 32-bit values back together. Altivec register classes must be handled
18839 specially since a different instruction is used, and the secondary
18840 reload support requires a single instruction class in the scratch
18841 register constraint. However, right now TFmode is not allowed in
18842 Altivec registers, so the pattern will never match. */
18843 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
18844 {
18845 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
18846 icode = reg_addr[mode].reload_fpr_gpr;
18847 }
18848 }
18849
18850 if (icode != CODE_FOR_nothing)
18851 {
18852 ret = true;
18853 if (sri)
18854 {
18855 sri->icode = icode;
18856 sri->extra_cost = cost;
18857 }
18858 }
18859
18860 return ret;
18861 }
18862
18863 /* Return whether a move between two register classes can be done either
18864 directly (simple move) or via a pattern that uses a single extra temporary
18865 (using ISA 2.07's direct move in this case. */
18866
18867 static bool
18868 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
18869 enum rs6000_reg_type from_type,
18870 machine_mode mode,
18871 secondary_reload_info *sri,
18872 bool altivec_p)
18873 {
18874 /* Fall back to load/store reloads if either type is not a register. */
18875 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
18876 return false;
18877
18878 /* If we haven't allocated registers yet, assume the move can be done for the
18879 standard register types. */
18880 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
18881 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
18882 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
18883 return true;
18884
18885 /* Moves to the same set of registers is a simple move for non-specialized
18886 registers. */
18887 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
18888 return true;
18889
18890 /* Check whether a simple move can be done directly. */
18891 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
18892 {
18893 if (sri)
18894 {
18895 sri->icode = CODE_FOR_nothing;
18896 sri->extra_cost = 0;
18897 }
18898 return true;
18899 }
18900
18901 /* Now check if we can do it in a few steps. */
18902 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
18903 altivec_p);
18904 }
18905
18906 /* Inform reload about cases where moving X with a mode MODE to a register in
18907 RCLASS requires an extra scratch or immediate register. Return the class
18908 needed for the immediate register.
18909
18910 For VSX and Altivec, we may need a register to convert sp+offset into
18911 reg+sp.
18912
18913 For misaligned 64-bit gpr loads and stores we need a register to
18914 convert an offset address to indirect. */
18915
18916 static reg_class_t
18917 rs6000_secondary_reload (bool in_p,
18918 rtx x,
18919 reg_class_t rclass_i,
18920 machine_mode mode,
18921 secondary_reload_info *sri)
18922 {
18923 enum reg_class rclass = (enum reg_class) rclass_i;
18924 reg_class_t ret = ALL_REGS;
18925 enum insn_code icode;
18926 bool default_p = false;
18927 bool done_p = false;
18928
18929 /* Allow subreg of memory before/during reload. */
18930 bool memory_p = (MEM_P (x)
18931 || (!reload_completed && GET_CODE (x) == SUBREG
18932 && MEM_P (SUBREG_REG (x))));
18933
18934 sri->icode = CODE_FOR_nothing;
18935 sri->t_icode = CODE_FOR_nothing;
18936 sri->extra_cost = 0;
18937 icode = ((in_p)
18938 ? reg_addr[mode].reload_load
18939 : reg_addr[mode].reload_store);
18940
18941 if (REG_P (x) || register_operand (x, mode))
18942 {
18943 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
18944 bool altivec_p = (rclass == ALTIVEC_REGS);
18945 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
18946
18947 if (!in_p)
18948 std::swap (to_type, from_type);
18949
18950 /* Can we do a direct move of some sort? */
18951 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
18952 altivec_p))
18953 {
18954 icode = (enum insn_code)sri->icode;
18955 default_p = false;
18956 done_p = true;
18957 ret = NO_REGS;
18958 }
18959 }
18960
18961 /* Make sure 0.0 is not reloaded or forced into memory. */
18962 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
18963 {
18964 ret = NO_REGS;
18965 default_p = false;
18966 done_p = true;
18967 }
18968
18969 /* If this is a scalar floating point value and we want to load it into the
18970 traditional Altivec registers, do it via a move via a traditional floating
18971 point register, unless we have D-form addressing. Also make sure that
18972 non-zero constants use a FPR. */
18973 if (!done_p && reg_addr[mode].scalar_in_vmx_p
18974 && !mode_supports_vmx_dform (mode)
18975 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
18976 && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
18977 {
18978 ret = FLOAT_REGS;
18979 default_p = false;
18980 done_p = true;
18981 }
18982
18983 /* Handle reload of load/stores if we have reload helper functions. */
18984 if (!done_p && icode != CODE_FOR_nothing && memory_p)
18985 {
18986 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
18987 mode);
18988
18989 if (extra_cost >= 0)
18990 {
18991 done_p = true;
18992 ret = NO_REGS;
18993 if (extra_cost > 0)
18994 {
18995 sri->extra_cost = extra_cost;
18996 sri->icode = icode;
18997 }
18998 }
18999 }
19000
19001 /* Handle unaligned loads and stores of integer registers. */
19002 if (!done_p && TARGET_POWERPC64
19003 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
19004 && memory_p
19005 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
19006 {
19007 rtx addr = XEXP (x, 0);
19008 rtx off = address_offset (addr);
19009
19010 if (off != NULL_RTX)
19011 {
19012 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
19013 unsigned HOST_WIDE_INT offset = INTVAL (off);
19014
19015 /* We need a secondary reload when our legitimate_address_p
19016 says the address is good (as otherwise the entire address
19017 will be reloaded), and the offset is not a multiple of
19018 four or we have an address wrap. Address wrap will only
19019 occur for LO_SUMs since legitimate_offset_address_p
19020 rejects addresses for 16-byte mems that will wrap. */
19021 if (GET_CODE (addr) == LO_SUM
19022 ? (1 /* legitimate_address_p allows any offset for lo_sum */
19023 && ((offset & 3) != 0
19024 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
19025 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
19026 && (offset & 3) != 0))
19027 {
19028 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
19029 if (in_p)
19030 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
19031 : CODE_FOR_reload_di_load);
19032 else
19033 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
19034 : CODE_FOR_reload_di_store);
19035 sri->extra_cost = 2;
19036 ret = NO_REGS;
19037 done_p = true;
19038 }
19039 else
19040 default_p = true;
19041 }
19042 else
19043 default_p = true;
19044 }
19045
19046 if (!done_p && !TARGET_POWERPC64
19047 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
19048 && memory_p
19049 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
19050 {
19051 rtx addr = XEXP (x, 0);
19052 rtx off = address_offset (addr);
19053
19054 if (off != NULL_RTX)
19055 {
19056 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
19057 unsigned HOST_WIDE_INT offset = INTVAL (off);
19058
19059 /* We need a secondary reload when our legitimate_address_p
19060 says the address is good (as otherwise the entire address
19061 will be reloaded), and we have a wrap.
19062
19063 legitimate_lo_sum_address_p allows LO_SUM addresses to
19064 have any offset so test for wrap in the low 16 bits.
19065
19066 legitimate_offset_address_p checks for the range
19067 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
19068 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
19069 [0x7ff4,0x7fff] respectively, so test for the
19070 intersection of these ranges, [0x7ffc,0x7fff] and
19071 [0x7ff4,0x7ff7] respectively.
19072
19073 Note that the address we see here may have been
19074 manipulated by legitimize_reload_address. */
19075 if (GET_CODE (addr) == LO_SUM
19076 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
19077 : offset - (0x8000 - extra) < UNITS_PER_WORD)
19078 {
19079 if (in_p)
19080 sri->icode = CODE_FOR_reload_si_load;
19081 else
19082 sri->icode = CODE_FOR_reload_si_store;
19083 sri->extra_cost = 2;
19084 ret = NO_REGS;
19085 done_p = true;
19086 }
19087 else
19088 default_p = true;
19089 }
19090 else
19091 default_p = true;
19092 }
19093
19094 if (!done_p)
19095 default_p = true;
19096
19097 if (default_p)
19098 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
19099
19100 gcc_assert (ret != ALL_REGS);
19101
19102 if (TARGET_DEBUG_ADDR)
19103 {
19104 fprintf (stderr,
19105 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
19106 "mode = %s",
19107 reg_class_names[ret],
19108 in_p ? "true" : "false",
19109 reg_class_names[rclass],
19110 GET_MODE_NAME (mode));
19111
19112 if (reload_completed)
19113 fputs (", after reload", stderr);
19114
19115 if (!done_p)
19116 fputs (", done_p not set", stderr);
19117
19118 if (default_p)
19119 fputs (", default secondary reload", stderr);
19120
19121 if (sri->icode != CODE_FOR_nothing)
19122 fprintf (stderr, ", reload func = %s, extra cost = %d",
19123 insn_data[sri->icode].name, sri->extra_cost);
19124
19125 else if (sri->extra_cost > 0)
19126 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
19127
19128 fputs ("\n", stderr);
19129 debug_rtx (x);
19130 }
19131
19132 return ret;
19133 }
19134
19135 /* Better tracing for rs6000_secondary_reload_inner. */
19136
19137 static void
19138 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
19139 bool store_p)
19140 {
19141 rtx set, clobber;
19142
19143 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
19144
19145 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
19146 store_p ? "store" : "load");
19147
19148 if (store_p)
19149 set = gen_rtx_SET (mem, reg);
19150 else
19151 set = gen_rtx_SET (reg, mem);
19152
19153 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
19154 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
19155 }
19156
19157 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
19158 ATTRIBUTE_NORETURN;
19159
19160 static void
19161 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
19162 bool store_p)
19163 {
19164 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
19165 gcc_unreachable ();
19166 }
19167
19168 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
19169 reload helper functions. These were identified in
19170 rs6000_secondary_reload_memory, and if reload decided to use the secondary
19171 reload, it calls the insns:
19172 reload_<RELOAD:mode>_<P:mptrsize>_store
19173 reload_<RELOAD:mode>_<P:mptrsize>_load
19174
19175 which in turn calls this function, to do whatever is necessary to create
19176 valid addresses. */
19177
19178 void
19179 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
19180 {
19181 int regno = true_regnum (reg);
19182 machine_mode mode = GET_MODE (reg);
19183 addr_mask_type addr_mask;
19184 rtx addr;
19185 rtx new_addr;
19186 rtx op_reg, op0, op1;
19187 rtx and_op;
19188 rtx cc_clobber;
19189 rtvec rv;
19190
19191 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
19192 || !base_reg_operand (scratch, GET_MODE (scratch)))
19193 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19194
19195 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
19196 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
19197
19198 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
19199 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
19200
19201 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
19202 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
19203
19204 else
19205 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19206
19207 /* Make sure the mode is valid in this register class. */
19208 if ((addr_mask & RELOAD_REG_VALID) == 0)
19209 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19210
19211 if (TARGET_DEBUG_ADDR)
19212 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
19213
19214 new_addr = addr = XEXP (mem, 0);
19215 switch (GET_CODE (addr))
19216 {
19217 /* Does the register class support auto update forms for this mode? If
19218 not, do the update now. We don't need a scratch register, since the
19219 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
19220 case PRE_INC:
19221 case PRE_DEC:
19222 op_reg = XEXP (addr, 0);
19223 if (!base_reg_operand (op_reg, Pmode))
19224 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19225
19226 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
19227 {
19228 emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
19229 new_addr = op_reg;
19230 }
19231 break;
19232
19233 case PRE_MODIFY:
19234 op0 = XEXP (addr, 0);
19235 op1 = XEXP (addr, 1);
19236 if (!base_reg_operand (op0, Pmode)
19237 || GET_CODE (op1) != PLUS
19238 || !rtx_equal_p (op0, XEXP (op1, 0)))
19239 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19240
19241 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
19242 {
19243 emit_insn (gen_rtx_SET (op0, op1));
19244 new_addr = reg;
19245 }
19246 break;
19247
19248 /* Do we need to simulate AND -16 to clear the bottom address bits used
19249 in VMX load/stores? */
19250 case AND:
19251 op0 = XEXP (addr, 0);
19252 op1 = XEXP (addr, 1);
19253 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
19254 {
19255 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
19256 op_reg = op0;
19257
19258 else if (GET_CODE (op1) == PLUS)
19259 {
19260 emit_insn (gen_rtx_SET (scratch, op1));
19261 op_reg = scratch;
19262 }
19263
19264 else
19265 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19266
19267 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
19268 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
19269 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
19270 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
19271 new_addr = scratch;
19272 }
19273 break;
19274
19275 /* If this is an indirect address, make sure it is a base register. */
19276 case REG:
19277 case SUBREG:
19278 if (!base_reg_operand (addr, GET_MODE (addr)))
19279 {
19280 emit_insn (gen_rtx_SET (scratch, addr));
19281 new_addr = scratch;
19282 }
19283 break;
19284
19285 /* If this is an indexed address, make sure the register class can handle
19286 indexed addresses for this mode. */
19287 case PLUS:
19288 op0 = XEXP (addr, 0);
19289 op1 = XEXP (addr, 1);
19290 if (!base_reg_operand (op0, Pmode))
19291 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19292
19293 else if (int_reg_operand (op1, Pmode))
19294 {
19295 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
19296 {
19297 emit_insn (gen_rtx_SET (scratch, addr));
19298 new_addr = scratch;
19299 }
19300 }
19301
19302 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
19303 {
19304 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
19305 || !quad_address_p (addr, mode, false))
19306 {
19307 emit_insn (gen_rtx_SET (scratch, addr));
19308 new_addr = scratch;
19309 }
19310 }
19311
19312 /* Make sure the register class can handle offset addresses. */
19313 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
19314 {
19315 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19316 {
19317 emit_insn (gen_rtx_SET (scratch, addr));
19318 new_addr = scratch;
19319 }
19320 }
19321
19322 else
19323 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19324
19325 break;
19326
19327 case LO_SUM:
19328 op0 = XEXP (addr, 0);
19329 op1 = XEXP (addr, 1);
19330 if (!base_reg_operand (op0, Pmode))
19331 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19332
19333 else if (int_reg_operand (op1, Pmode))
19334 {
19335 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
19336 {
19337 emit_insn (gen_rtx_SET (scratch, addr));
19338 new_addr = scratch;
19339 }
19340 }
19341
19342 /* Quad offsets are restricted and can't handle normal addresses. */
19343 else if (mode_supports_dq_form (mode))
19344 {
19345 emit_insn (gen_rtx_SET (scratch, addr));
19346 new_addr = scratch;
19347 }
19348
19349 /* Make sure the register class can handle offset addresses. */
19350 else if (legitimate_lo_sum_address_p (mode, addr, false))
19351 {
19352 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19353 {
19354 emit_insn (gen_rtx_SET (scratch, addr));
19355 new_addr = scratch;
19356 }
19357 }
19358
19359 else
19360 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19361
19362 break;
19363
19364 case SYMBOL_REF:
19365 case CONST:
19366 case LABEL_REF:
19367 rs6000_emit_move (scratch, addr, Pmode);
19368 new_addr = scratch;
19369 break;
19370
19371 default:
19372 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19373 }
19374
19375 /* Adjust the address if it changed. */
19376 if (addr != new_addr)
19377 {
19378 mem = replace_equiv_address_nv (mem, new_addr);
19379 if (TARGET_DEBUG_ADDR)
19380 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
19381 }
19382
19383 /* Now create the move. */
19384 if (store_p)
19385 emit_insn (gen_rtx_SET (mem, reg));
19386 else
19387 emit_insn (gen_rtx_SET (reg, mem));
19388
19389 return;
19390 }
19391
19392 /* Convert reloads involving 64-bit gprs and misaligned offset
19393 addressing, or multiple 32-bit gprs and offsets that are too large,
19394 to use indirect addressing. */
19395
19396 void
19397 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
19398 {
19399 int regno = true_regnum (reg);
19400 enum reg_class rclass;
19401 rtx addr;
19402 rtx scratch_or_premodify = scratch;
19403
19404 if (TARGET_DEBUG_ADDR)
19405 {
19406 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
19407 store_p ? "store" : "load");
19408 fprintf (stderr, "reg:\n");
19409 debug_rtx (reg);
19410 fprintf (stderr, "mem:\n");
19411 debug_rtx (mem);
19412 fprintf (stderr, "scratch:\n");
19413 debug_rtx (scratch);
19414 }
19415
19416 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
19417 gcc_assert (GET_CODE (mem) == MEM);
19418 rclass = REGNO_REG_CLASS (regno);
19419 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
19420 addr = XEXP (mem, 0);
19421
19422 if (GET_CODE (addr) == PRE_MODIFY)
19423 {
19424 gcc_assert (REG_P (XEXP (addr, 0))
19425 && GET_CODE (XEXP (addr, 1)) == PLUS
19426 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
19427 scratch_or_premodify = XEXP (addr, 0);
19428 if (!HARD_REGISTER_P (scratch_or_premodify))
19429 /* If we have a pseudo here then reload will have arranged
19430 to have it replaced, but only in the original insn.
19431 Use the replacement here too. */
19432 scratch_or_premodify = find_replacement (&XEXP (addr, 0));
19433
19434 /* RTL emitted by rs6000_secondary_reload_gpr uses RTL
19435 expressions from the original insn, without unsharing them.
19436 Any RTL that points into the original insn will of course
19437 have register replacements applied. That is why we don't
19438 need to look for replacements under the PLUS. */
19439 addr = XEXP (addr, 1);
19440 }
19441 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
19442
19443 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
19444
19445 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
19446
19447 /* Now create the move. */
19448 if (store_p)
19449 emit_insn (gen_rtx_SET (mem, reg));
19450 else
19451 emit_insn (gen_rtx_SET (reg, mem));
19452
19453 return;
19454 }
19455
19456 /* Given an rtx X being reloaded into a reg required to be
19457 in class CLASS, return the class of reg to actually use.
19458 In general this is just CLASS; but on some machines
19459 in some cases it is preferable to use a more restrictive class.
19460
19461 On the RS/6000, we have to return NO_REGS when we want to reload a
19462 floating-point CONST_DOUBLE to force it to be copied to memory.
19463
19464 We also don't want to reload integer values into floating-point
19465 registers if we can at all help it. In fact, this can
19466 cause reload to die, if it tries to generate a reload of CTR
19467 into a FP register and discovers it doesn't have the memory location
19468 required.
19469
19470 ??? Would it be a good idea to have reload do the converse, that is
19471 try to reload floating modes into FP registers if possible?
19472 */
19473
19474 static enum reg_class
19475 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
19476 {
19477 machine_mode mode = GET_MODE (x);
19478 bool is_constant = CONSTANT_P (x);
19479
19480 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
19481 reload class for it. */
19482 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
19483 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
19484 return NO_REGS;
19485
19486 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
19487 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
19488 return NO_REGS;
19489
19490 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
19491 the reloading of address expressions using PLUS into floating point
19492 registers. */
19493 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
19494 {
19495 if (is_constant)
19496 {
19497 /* Zero is always allowed in all VSX registers. */
19498 if (x == CONST0_RTX (mode))
19499 return rclass;
19500
19501 /* If this is a vector constant that can be formed with a few Altivec
19502 instructions, we want altivec registers. */
19503 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
19504 return ALTIVEC_REGS;
19505
19506 /* If this is an integer constant that can easily be loaded into
19507 vector registers, allow it. */
19508 if (CONST_INT_P (x))
19509 {
19510 HOST_WIDE_INT value = INTVAL (x);
19511
19512 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
19513 2.06 can generate it in the Altivec registers with
19514 VSPLTI<x>. */
19515 if (value == -1)
19516 {
19517 if (TARGET_P8_VECTOR)
19518 return rclass;
19519 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
19520 return ALTIVEC_REGS;
19521 else
19522 return NO_REGS;
19523 }
19524
19525 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
19526 a sign extend in the Altivec registers. */
19527 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
19528 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
19529 return ALTIVEC_REGS;
19530 }
19531
19532 /* Force constant to memory. */
19533 return NO_REGS;
19534 }
19535
19536 /* D-form addressing can easily reload the value. */
19537 if (mode_supports_vmx_dform (mode)
19538 || mode_supports_dq_form (mode))
19539 return rclass;
19540
19541 /* If this is a scalar floating point value and we don't have D-form
19542 addressing, prefer the traditional floating point registers so that we
19543 can use D-form (register+offset) addressing. */
19544 if (rclass == VSX_REGS
19545 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
19546 return FLOAT_REGS;
19547
19548 /* Prefer the Altivec registers if Altivec is handling the vector
19549 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
19550 loads. */
19551 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
19552 || mode == V1TImode)
19553 return ALTIVEC_REGS;
19554
19555 return rclass;
19556 }
19557
19558 if (is_constant || GET_CODE (x) == PLUS)
19559 {
19560 if (reg_class_subset_p (GENERAL_REGS, rclass))
19561 return GENERAL_REGS;
19562 if (reg_class_subset_p (BASE_REGS, rclass))
19563 return BASE_REGS;
19564 return NO_REGS;
19565 }
19566
19567 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
19568 return GENERAL_REGS;
19569
19570 return rclass;
19571 }
19572
19573 /* Debug version of rs6000_preferred_reload_class. */
19574 static enum reg_class
19575 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
19576 {
19577 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
19578
19579 fprintf (stderr,
19580 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
19581 "mode = %s, x:\n",
19582 reg_class_names[ret], reg_class_names[rclass],
19583 GET_MODE_NAME (GET_MODE (x)));
19584 debug_rtx (x);
19585
19586 return ret;
19587 }
19588
19589 /* If we are copying between FP or AltiVec registers and anything else, we need
19590 a memory location. The exception is when we are targeting ppc64 and the
19591 move to/from fpr to gpr instructions are available. Also, under VSX, you
19592 can copy vector registers from the FP register set to the Altivec register
19593 set and vice versa. */
19594
19595 static bool
19596 rs6000_secondary_memory_needed (machine_mode mode,
19597 reg_class_t from_class,
19598 reg_class_t to_class)
19599 {
19600 enum rs6000_reg_type from_type, to_type;
19601 bool altivec_p = ((from_class == ALTIVEC_REGS)
19602 || (to_class == ALTIVEC_REGS));
19603
19604 /* If a simple/direct move is available, we don't need secondary memory */
19605 from_type = reg_class_to_reg_type[(int)from_class];
19606 to_type = reg_class_to_reg_type[(int)to_class];
19607
19608 if (rs6000_secondary_reload_move (to_type, from_type, mode,
19609 (secondary_reload_info *)0, altivec_p))
19610 return false;
19611
19612 /* If we have a floating point or vector register class, we need to use
19613 memory to transfer the data. */
19614 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
19615 return true;
19616
19617 return false;
19618 }
19619
19620 /* Debug version of rs6000_secondary_memory_needed. */
19621 static bool
19622 rs6000_debug_secondary_memory_needed (machine_mode mode,
19623 reg_class_t from_class,
19624 reg_class_t to_class)
19625 {
19626 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
19627
19628 fprintf (stderr,
19629 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
19630 "to_class = %s, mode = %s\n",
19631 ret ? "true" : "false",
19632 reg_class_names[from_class],
19633 reg_class_names[to_class],
19634 GET_MODE_NAME (mode));
19635
19636 return ret;
19637 }
19638
19639 /* Return the register class of a scratch register needed to copy IN into
19640 or out of a register in RCLASS in MODE. If it can be done directly,
19641 NO_REGS is returned. */
19642
19643 static enum reg_class
19644 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
19645 rtx in)
19646 {
19647 int regno;
19648
19649 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
19650 #if TARGET_MACHO
19651 && MACHOPIC_INDIRECT
19652 #endif
19653 ))
19654 {
19655 /* We cannot copy a symbolic operand directly into anything
19656 other than BASE_REGS for TARGET_ELF. So indicate that a
19657 register from BASE_REGS is needed as an intermediate
19658 register.
19659
19660 On Darwin, pic addresses require a load from memory, which
19661 needs a base register. */
19662 if (rclass != BASE_REGS
19663 && (GET_CODE (in) == SYMBOL_REF
19664 || GET_CODE (in) == HIGH
19665 || GET_CODE (in) == LABEL_REF
19666 || GET_CODE (in) == CONST))
19667 return BASE_REGS;
19668 }
19669
19670 if (GET_CODE (in) == REG)
19671 {
19672 regno = REGNO (in);
19673 if (regno >= FIRST_PSEUDO_REGISTER)
19674 {
19675 regno = true_regnum (in);
19676 if (regno >= FIRST_PSEUDO_REGISTER)
19677 regno = -1;
19678 }
19679 }
19680 else if (GET_CODE (in) == SUBREG)
19681 {
19682 regno = true_regnum (in);
19683 if (regno >= FIRST_PSEUDO_REGISTER)
19684 regno = -1;
19685 }
19686 else
19687 regno = -1;
19688
19689 /* If we have VSX register moves, prefer moving scalar values between
19690 Altivec registers and GPR by going via an FPR (and then via memory)
19691 instead of reloading the secondary memory address for Altivec moves. */
19692 if (TARGET_VSX
19693 && GET_MODE_SIZE (mode) < 16
19694 && !mode_supports_vmx_dform (mode)
19695 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
19696 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
19697 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
19698 && (regno >= 0 && INT_REGNO_P (regno)))))
19699 return FLOAT_REGS;
19700
19701 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
19702 into anything. */
19703 if (rclass == GENERAL_REGS || rclass == BASE_REGS
19704 || (regno >= 0 && INT_REGNO_P (regno)))
19705 return NO_REGS;
19706
19707 /* Constants, memory, and VSX registers can go into VSX registers (both the
19708 traditional floating point and the altivec registers). */
19709 if (rclass == VSX_REGS
19710 && (regno == -1 || VSX_REGNO_P (regno)))
19711 return NO_REGS;
19712
19713 /* Constants, memory, and FP registers can go into FP registers. */
19714 if ((regno == -1 || FP_REGNO_P (regno))
19715 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
19716 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
19717
19718 /* Memory, and AltiVec registers can go into AltiVec registers. */
19719 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
19720 && rclass == ALTIVEC_REGS)
19721 return NO_REGS;
19722
19723 /* We can copy among the CR registers. */
19724 if ((rclass == CR_REGS || rclass == CR0_REGS)
19725 && regno >= 0 && CR_REGNO_P (regno))
19726 return NO_REGS;
19727
19728 /* Otherwise, we need GENERAL_REGS. */
19729 return GENERAL_REGS;
19730 }
19731
19732 /* Debug version of rs6000_secondary_reload_class. */
19733 static enum reg_class
19734 rs6000_debug_secondary_reload_class (enum reg_class rclass,
19735 machine_mode mode, rtx in)
19736 {
19737 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
19738 fprintf (stderr,
19739 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
19740 "mode = %s, input rtx:\n",
19741 reg_class_names[ret], reg_class_names[rclass],
19742 GET_MODE_NAME (mode));
19743 debug_rtx (in);
19744
19745 return ret;
19746 }
19747
19748 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
19749
19750 static bool
19751 rs6000_can_change_mode_class (machine_mode from,
19752 machine_mode to,
19753 reg_class_t rclass)
19754 {
19755 unsigned from_size = GET_MODE_SIZE (from);
19756 unsigned to_size = GET_MODE_SIZE (to);
19757
19758 if (from_size != to_size)
19759 {
19760 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
19761
19762 if (reg_classes_intersect_p (xclass, rclass))
19763 {
19764 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
19765 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
19766 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
19767 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
19768
19769 /* Don't allow 64-bit types to overlap with 128-bit types that take a
19770 single register under VSX because the scalar part of the register
19771 is in the upper 64-bits, and not the lower 64-bits. Types like
19772 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
19773 IEEE floating point can't overlap, and neither can small
19774 values. */
19775
19776 if (to_float128_vector_p && from_float128_vector_p)
19777 return true;
19778
19779 else if (to_float128_vector_p || from_float128_vector_p)
19780 return false;
19781
19782 /* TDmode in floating-mode registers must always go into a register
19783 pair with the most significant word in the even-numbered register
19784 to match ISA requirements. In little-endian mode, this does not
19785 match subreg numbering, so we cannot allow subregs. */
19786 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
19787 return false;
19788
19789 if (from_size < 8 || to_size < 8)
19790 return false;
19791
19792 if (from_size == 8 && (8 * to_nregs) != to_size)
19793 return false;
19794
19795 if (to_size == 8 && (8 * from_nregs) != from_size)
19796 return false;
19797
19798 return true;
19799 }
19800 else
19801 return true;
19802 }
19803
19804 /* Since the VSX register set includes traditional floating point registers
19805 and altivec registers, just check for the size being different instead of
19806 trying to check whether the modes are vector modes. Otherwise it won't
19807 allow say DF and DI to change classes. For types like TFmode and TDmode
19808 that take 2 64-bit registers, rather than a single 128-bit register, don't
19809 allow subregs of those types to other 128 bit types. */
19810 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
19811 {
19812 unsigned num_regs = (from_size + 15) / 16;
19813 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
19814 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
19815 return false;
19816
19817 return (from_size == 8 || from_size == 16);
19818 }
19819
19820 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
19821 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
19822 return false;
19823
19824 return true;
19825 }
19826
19827 /* Debug version of rs6000_can_change_mode_class. */
19828 static bool
19829 rs6000_debug_can_change_mode_class (machine_mode from,
19830 machine_mode to,
19831 reg_class_t rclass)
19832 {
19833 bool ret = rs6000_can_change_mode_class (from, to, rclass);
19834
19835 fprintf (stderr,
19836 "rs6000_can_change_mode_class, return %s, from = %s, "
19837 "to = %s, rclass = %s\n",
19838 ret ? "true" : "false",
19839 GET_MODE_NAME (from), GET_MODE_NAME (to),
19840 reg_class_names[rclass]);
19841
19842 return ret;
19843 }
19844 \f
19845 /* Return a string to do a move operation of 128 bits of data. */
19846
19847 const char *
19848 rs6000_output_move_128bit (rtx operands[])
19849 {
19850 rtx dest = operands[0];
19851 rtx src = operands[1];
19852 machine_mode mode = GET_MODE (dest);
19853 int dest_regno;
19854 int src_regno;
19855 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
19856 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
19857
19858 if (REG_P (dest))
19859 {
19860 dest_regno = REGNO (dest);
19861 dest_gpr_p = INT_REGNO_P (dest_regno);
19862 dest_fp_p = FP_REGNO_P (dest_regno);
19863 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
19864 dest_vsx_p = dest_fp_p | dest_vmx_p;
19865 }
19866 else
19867 {
19868 dest_regno = -1;
19869 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
19870 }
19871
19872 if (REG_P (src))
19873 {
19874 src_regno = REGNO (src);
19875 src_gpr_p = INT_REGNO_P (src_regno);
19876 src_fp_p = FP_REGNO_P (src_regno);
19877 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
19878 src_vsx_p = src_fp_p | src_vmx_p;
19879 }
19880 else
19881 {
19882 src_regno = -1;
19883 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
19884 }
19885
19886 /* Register moves. */
19887 if (dest_regno >= 0 && src_regno >= 0)
19888 {
19889 if (dest_gpr_p)
19890 {
19891 if (src_gpr_p)
19892 return "#";
19893
19894 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
19895 return (WORDS_BIG_ENDIAN
19896 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
19897 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
19898
19899 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
19900 return "#";
19901 }
19902
19903 else if (TARGET_VSX && dest_vsx_p)
19904 {
19905 if (src_vsx_p)
19906 return "xxlor %x0,%x1,%x1";
19907
19908 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
19909 return (WORDS_BIG_ENDIAN
19910 ? "mtvsrdd %x0,%1,%L1"
19911 : "mtvsrdd %x0,%L1,%1");
19912
19913 else if (TARGET_DIRECT_MOVE && src_gpr_p)
19914 return "#";
19915 }
19916
19917 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
19918 return "vor %0,%1,%1";
19919
19920 else if (dest_fp_p && src_fp_p)
19921 return "#";
19922 }
19923
19924 /* Loads. */
19925 else if (dest_regno >= 0 && MEM_P (src))
19926 {
19927 if (dest_gpr_p)
19928 {
19929 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
19930 return "lq %0,%1";
19931 else
19932 return "#";
19933 }
19934
19935 else if (TARGET_ALTIVEC && dest_vmx_p
19936 && altivec_indexed_or_indirect_operand (src, mode))
19937 return "lvx %0,%y1";
19938
19939 else if (TARGET_VSX && dest_vsx_p)
19940 {
19941 if (mode_supports_dq_form (mode)
19942 && quad_address_p (XEXP (src, 0), mode, true))
19943 return "lxv %x0,%1";
19944
19945 else if (TARGET_P9_VECTOR)
19946 return "lxvx %x0,%y1";
19947
19948 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
19949 return "lxvw4x %x0,%y1";
19950
19951 else
19952 return "lxvd2x %x0,%y1";
19953 }
19954
19955 else if (TARGET_ALTIVEC && dest_vmx_p)
19956 return "lvx %0,%y1";
19957
19958 else if (dest_fp_p)
19959 return "#";
19960 }
19961
19962 /* Stores. */
19963 else if (src_regno >= 0 && MEM_P (dest))
19964 {
19965 if (src_gpr_p)
19966 {
19967 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
19968 return "stq %1,%0";
19969 else
19970 return "#";
19971 }
19972
19973 else if (TARGET_ALTIVEC && src_vmx_p
19974 && altivec_indexed_or_indirect_operand (dest, mode))
19975 return "stvx %1,%y0";
19976
19977 else if (TARGET_VSX && src_vsx_p)
19978 {
19979 if (mode_supports_dq_form (mode)
19980 && quad_address_p (XEXP (dest, 0), mode, true))
19981 return "stxv %x1,%0";
19982
19983 else if (TARGET_P9_VECTOR)
19984 return "stxvx %x1,%y0";
19985
19986 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
19987 return "stxvw4x %x1,%y0";
19988
19989 else
19990 return "stxvd2x %x1,%y0";
19991 }
19992
19993 else if (TARGET_ALTIVEC && src_vmx_p)
19994 return "stvx %1,%y0";
19995
19996 else if (src_fp_p)
19997 return "#";
19998 }
19999
20000 /* Constants. */
20001 else if (dest_regno >= 0
20002 && (GET_CODE (src) == CONST_INT
20003 || GET_CODE (src) == CONST_WIDE_INT
20004 || GET_CODE (src) == CONST_DOUBLE
20005 || GET_CODE (src) == CONST_VECTOR))
20006 {
20007 if (dest_gpr_p)
20008 return "#";
20009
20010 else if ((dest_vmx_p && TARGET_ALTIVEC)
20011 || (dest_vsx_p && TARGET_VSX))
20012 return output_vec_const_move (operands);
20013 }
20014
20015 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
20016 }
20017
20018 /* Validate a 128-bit move. */
20019 bool
20020 rs6000_move_128bit_ok_p (rtx operands[])
20021 {
20022 machine_mode mode = GET_MODE (operands[0]);
20023 return (gpc_reg_operand (operands[0], mode)
20024 || gpc_reg_operand (operands[1], mode));
20025 }
20026
20027 /* Return true if a 128-bit move needs to be split. */
20028 bool
20029 rs6000_split_128bit_ok_p (rtx operands[])
20030 {
20031 if (!reload_completed)
20032 return false;
20033
20034 if (!gpr_or_gpr_p (operands[0], operands[1]))
20035 return false;
20036
20037 if (quad_load_store_p (operands[0], operands[1]))
20038 return false;
20039
20040 return true;
20041 }
20042
20043 \f
20044 /* Given a comparison operation, return the bit number in CCR to test. We
20045 know this is a valid comparison.
20046
20047 SCC_P is 1 if this is for an scc. That means that %D will have been
20048 used instead of %C, so the bits will be in different places.
20049
20050 Return -1 if OP isn't a valid comparison for some reason. */
20051
20052 int
20053 ccr_bit (rtx op, int scc_p)
20054 {
20055 enum rtx_code code = GET_CODE (op);
20056 machine_mode cc_mode;
20057 int cc_regnum;
20058 int base_bit;
20059 rtx reg;
20060
20061 if (!COMPARISON_P (op))
20062 return -1;
20063
20064 reg = XEXP (op, 0);
20065
20066 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
20067
20068 cc_mode = GET_MODE (reg);
20069 cc_regnum = REGNO (reg);
20070 base_bit = 4 * (cc_regnum - CR0_REGNO);
20071
20072 validate_condition_mode (code, cc_mode);
20073
20074 /* When generating a sCOND operation, only positive conditions are
20075 allowed. */
20076 gcc_assert (!scc_p
20077 || code == EQ || code == GT || code == LT || code == UNORDERED
20078 || code == GTU || code == LTU);
20079
20080 switch (code)
20081 {
20082 case NE:
20083 return scc_p ? base_bit + 3 : base_bit + 2;
20084 case EQ:
20085 return base_bit + 2;
20086 case GT: case GTU: case UNLE:
20087 return base_bit + 1;
20088 case LT: case LTU: case UNGE:
20089 return base_bit;
20090 case ORDERED: case UNORDERED:
20091 return base_bit + 3;
20092
20093 case GE: case GEU:
20094 /* If scc, we will have done a cror to put the bit in the
20095 unordered position. So test that bit. For integer, this is ! LT
20096 unless this is an scc insn. */
20097 return scc_p ? base_bit + 3 : base_bit;
20098
20099 case LE: case LEU:
20100 return scc_p ? base_bit + 3 : base_bit + 1;
20101
20102 default:
20103 gcc_unreachable ();
20104 }
20105 }
20106 \f
20107 /* Return the GOT register. */
20108
20109 rtx
20110 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
20111 {
20112 /* The second flow pass currently (June 1999) can't update
20113 regs_ever_live without disturbing other parts of the compiler, so
20114 update it here to make the prolog/epilogue code happy. */
20115 if (!can_create_pseudo_p ()
20116 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
20117 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
20118
20119 crtl->uses_pic_offset_table = 1;
20120
20121 return pic_offset_table_rtx;
20122 }
20123 \f
20124 static rs6000_stack_t stack_info;
20125
20126 /* Function to init struct machine_function.
20127 This will be called, via a pointer variable,
20128 from push_function_context. */
20129
20130 static struct machine_function *
20131 rs6000_init_machine_status (void)
20132 {
20133 stack_info.reload_completed = 0;
20134 return ggc_cleared_alloc<machine_function> ();
20135 }
20136 \f
20137 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
20138
20139 /* Write out a function code label. */
20140
20141 void
20142 rs6000_output_function_entry (FILE *file, const char *fname)
20143 {
20144 if (fname[0] != '.')
20145 {
20146 switch (DEFAULT_ABI)
20147 {
20148 default:
20149 gcc_unreachable ();
20150
20151 case ABI_AIX:
20152 if (DOT_SYMBOLS)
20153 putc ('.', file);
20154 else
20155 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
20156 break;
20157
20158 case ABI_ELFv2:
20159 case ABI_V4:
20160 case ABI_DARWIN:
20161 break;
20162 }
20163 }
20164
20165 RS6000_OUTPUT_BASENAME (file, fname);
20166 }
20167
20168 /* Print an operand. Recognize special options, documented below. */
20169
20170 #if TARGET_ELF
20171 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
20172 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
20173 #else
20174 #define SMALL_DATA_RELOC "sda21"
20175 #define SMALL_DATA_REG 0
20176 #endif
20177
20178 void
20179 print_operand (FILE *file, rtx x, int code)
20180 {
20181 int i;
20182 unsigned HOST_WIDE_INT uval;
20183
20184 switch (code)
20185 {
20186 /* %a is output_address. */
20187
20188 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
20189 output_operand. */
20190
20191 case 'D':
20192 /* Like 'J' but get to the GT bit only. */
20193 gcc_assert (REG_P (x));
20194
20195 /* Bit 1 is GT bit. */
20196 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
20197
20198 /* Add one for shift count in rlinm for scc. */
20199 fprintf (file, "%d", i + 1);
20200 return;
20201
20202 case 'e':
20203 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
20204 if (! INT_P (x))
20205 {
20206 output_operand_lossage ("invalid %%e value");
20207 return;
20208 }
20209
20210 uval = INTVAL (x);
20211 if ((uval & 0xffff) == 0 && uval != 0)
20212 putc ('s', file);
20213 return;
20214
20215 case 'E':
20216 /* X is a CR register. Print the number of the EQ bit of the CR */
20217 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
20218 output_operand_lossage ("invalid %%E value");
20219 else
20220 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
20221 return;
20222
20223 case 'f':
20224 /* X is a CR register. Print the shift count needed to move it
20225 to the high-order four bits. */
20226 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
20227 output_operand_lossage ("invalid %%f value");
20228 else
20229 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
20230 return;
20231
20232 case 'F':
20233 /* Similar, but print the count for the rotate in the opposite
20234 direction. */
20235 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
20236 output_operand_lossage ("invalid %%F value");
20237 else
20238 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
20239 return;
20240
20241 case 'G':
20242 /* X is a constant integer. If it is negative, print "m",
20243 otherwise print "z". This is to make an aze or ame insn. */
20244 if (GET_CODE (x) != CONST_INT)
20245 output_operand_lossage ("invalid %%G value");
20246 else if (INTVAL (x) >= 0)
20247 putc ('z', file);
20248 else
20249 putc ('m', file);
20250 return;
20251
20252 case 'h':
20253 /* If constant, output low-order five bits. Otherwise, write
20254 normally. */
20255 if (INT_P (x))
20256 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
20257 else
20258 print_operand (file, x, 0);
20259 return;
20260
20261 case 'H':
20262 /* If constant, output low-order six bits. Otherwise, write
20263 normally. */
20264 if (INT_P (x))
20265 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
20266 else
20267 print_operand (file, x, 0);
20268 return;
20269
20270 case 'I':
20271 /* Print `i' if this is a constant, else nothing. */
20272 if (INT_P (x))
20273 putc ('i', file);
20274 return;
20275
20276 case 'j':
20277 /* Write the bit number in CCR for jump. */
20278 i = ccr_bit (x, 0);
20279 if (i == -1)
20280 output_operand_lossage ("invalid %%j code");
20281 else
20282 fprintf (file, "%d", i);
20283 return;
20284
20285 case 'J':
20286 /* Similar, but add one for shift count in rlinm for scc and pass
20287 scc flag to `ccr_bit'. */
20288 i = ccr_bit (x, 1);
20289 if (i == -1)
20290 output_operand_lossage ("invalid %%J code");
20291 else
20292 /* If we want bit 31, write a shift count of zero, not 32. */
20293 fprintf (file, "%d", i == 31 ? 0 : i + 1);
20294 return;
20295
20296 case 'k':
20297 /* X must be a constant. Write the 1's complement of the
20298 constant. */
20299 if (! INT_P (x))
20300 output_operand_lossage ("invalid %%k value");
20301 else
20302 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
20303 return;
20304
20305 case 'K':
20306 /* X must be a symbolic constant on ELF. Write an
20307 expression suitable for an 'addi' that adds in the low 16
20308 bits of the MEM. */
20309 if (GET_CODE (x) == CONST)
20310 {
20311 if (GET_CODE (XEXP (x, 0)) != PLUS
20312 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
20313 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
20314 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
20315 output_operand_lossage ("invalid %%K value");
20316 }
20317 print_operand_address (file, x);
20318 fputs ("@l", file);
20319 return;
20320
20321 /* %l is output_asm_label. */
20322
20323 case 'L':
20324 /* Write second word of DImode or DFmode reference. Works on register
20325 or non-indexed memory only. */
20326 if (REG_P (x))
20327 fputs (reg_names[REGNO (x) + 1], file);
20328 else if (MEM_P (x))
20329 {
20330 machine_mode mode = GET_MODE (x);
20331 /* Handle possible auto-increment. Since it is pre-increment and
20332 we have already done it, we can just use an offset of word. */
20333 if (GET_CODE (XEXP (x, 0)) == PRE_INC
20334 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
20335 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
20336 UNITS_PER_WORD));
20337 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20338 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
20339 UNITS_PER_WORD));
20340 else
20341 output_address (mode, XEXP (adjust_address_nv (x, SImode,
20342 UNITS_PER_WORD),
20343 0));
20344
20345 if (small_data_operand (x, GET_MODE (x)))
20346 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20347 reg_names[SMALL_DATA_REG]);
20348 }
20349 return;
20350
20351 case 'N': /* Unused */
20352 /* Write the number of elements in the vector times 4. */
20353 if (GET_CODE (x) != PARALLEL)
20354 output_operand_lossage ("invalid %%N value");
20355 else
20356 fprintf (file, "%d", XVECLEN (x, 0) * 4);
20357 return;
20358
20359 case 'O': /* Unused */
20360 /* Similar, but subtract 1 first. */
20361 if (GET_CODE (x) != PARALLEL)
20362 output_operand_lossage ("invalid %%O value");
20363 else
20364 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
20365 return;
20366
20367 case 'p':
20368 /* X is a CONST_INT that is a power of two. Output the logarithm. */
20369 if (! INT_P (x)
20370 || INTVAL (x) < 0
20371 || (i = exact_log2 (INTVAL (x))) < 0)
20372 output_operand_lossage ("invalid %%p value");
20373 else
20374 fprintf (file, "%d", i);
20375 return;
20376
20377 case 'P':
20378 /* The operand must be an indirect memory reference. The result
20379 is the register name. */
20380 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
20381 || REGNO (XEXP (x, 0)) >= 32)
20382 output_operand_lossage ("invalid %%P value");
20383 else
20384 fputs (reg_names[REGNO (XEXP (x, 0))], file);
20385 return;
20386
20387 case 'q':
20388 /* This outputs the logical code corresponding to a boolean
20389 expression. The expression may have one or both operands
20390 negated (if one, only the first one). For condition register
20391 logical operations, it will also treat the negated
20392 CR codes as NOTs, but not handle NOTs of them. */
20393 {
20394 const char *const *t = 0;
20395 const char *s;
20396 enum rtx_code code = GET_CODE (x);
20397 static const char * const tbl[3][3] = {
20398 { "and", "andc", "nor" },
20399 { "or", "orc", "nand" },
20400 { "xor", "eqv", "xor" } };
20401
20402 if (code == AND)
20403 t = tbl[0];
20404 else if (code == IOR)
20405 t = tbl[1];
20406 else if (code == XOR)
20407 t = tbl[2];
20408 else
20409 output_operand_lossage ("invalid %%q value");
20410
20411 if (GET_CODE (XEXP (x, 0)) != NOT)
20412 s = t[0];
20413 else
20414 {
20415 if (GET_CODE (XEXP (x, 1)) == NOT)
20416 s = t[2];
20417 else
20418 s = t[1];
20419 }
20420
20421 fputs (s, file);
20422 }
20423 return;
20424
20425 case 'Q':
20426 if (! TARGET_MFCRF)
20427 return;
20428 fputc (',', file);
20429 /* FALLTHRU */
20430
20431 case 'R':
20432 /* X is a CR register. Print the mask for `mtcrf'. */
20433 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
20434 output_operand_lossage ("invalid %%R value");
20435 else
20436 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
20437 return;
20438
20439 case 's':
20440 /* Low 5 bits of 32 - value */
20441 if (! INT_P (x))
20442 output_operand_lossage ("invalid %%s value");
20443 else
20444 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
20445 return;
20446
20447 case 't':
20448 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
20449 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
20450
20451 /* Bit 3 is OV bit. */
20452 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
20453
20454 /* If we want bit 31, write a shift count of zero, not 32. */
20455 fprintf (file, "%d", i == 31 ? 0 : i + 1);
20456 return;
20457
20458 case 'T':
20459 /* Print the symbolic name of a branch target register. */
20460 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
20461 && REGNO (x) != CTR_REGNO))
20462 output_operand_lossage ("invalid %%T value");
20463 else if (REGNO (x) == LR_REGNO)
20464 fputs ("lr", file);
20465 else
20466 fputs ("ctr", file);
20467 return;
20468
20469 case 'u':
20470 /* High-order or low-order 16 bits of constant, whichever is non-zero,
20471 for use in unsigned operand. */
20472 if (! INT_P (x))
20473 {
20474 output_operand_lossage ("invalid %%u value");
20475 return;
20476 }
20477
20478 uval = INTVAL (x);
20479 if ((uval & 0xffff) == 0)
20480 uval >>= 16;
20481
20482 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
20483 return;
20484
20485 case 'v':
20486 /* High-order 16 bits of constant for use in signed operand. */
20487 if (! INT_P (x))
20488 output_operand_lossage ("invalid %%v value");
20489 else
20490 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
20491 (INTVAL (x) >> 16) & 0xffff);
20492 return;
20493
20494 case 'U':
20495 /* Print `u' if this has an auto-increment or auto-decrement. */
20496 if (MEM_P (x)
20497 && (GET_CODE (XEXP (x, 0)) == PRE_INC
20498 || GET_CODE (XEXP (x, 0)) == PRE_DEC
20499 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
20500 putc ('u', file);
20501 return;
20502
20503 case 'V':
20504 /* Print the trap code for this operand. */
20505 switch (GET_CODE (x))
20506 {
20507 case EQ:
20508 fputs ("eq", file); /* 4 */
20509 break;
20510 case NE:
20511 fputs ("ne", file); /* 24 */
20512 break;
20513 case LT:
20514 fputs ("lt", file); /* 16 */
20515 break;
20516 case LE:
20517 fputs ("le", file); /* 20 */
20518 break;
20519 case GT:
20520 fputs ("gt", file); /* 8 */
20521 break;
20522 case GE:
20523 fputs ("ge", file); /* 12 */
20524 break;
20525 case LTU:
20526 fputs ("llt", file); /* 2 */
20527 break;
20528 case LEU:
20529 fputs ("lle", file); /* 6 */
20530 break;
20531 case GTU:
20532 fputs ("lgt", file); /* 1 */
20533 break;
20534 case GEU:
20535 fputs ("lge", file); /* 5 */
20536 break;
20537 default:
20538 gcc_unreachable ();
20539 }
20540 break;
20541
20542 case 'w':
20543 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
20544 normally. */
20545 if (INT_P (x))
20546 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
20547 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
20548 else
20549 print_operand (file, x, 0);
20550 return;
20551
20552 case 'x':
20553 /* X is a FPR or Altivec register used in a VSX context. */
20554 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
20555 output_operand_lossage ("invalid %%x value");
20556 else
20557 {
20558 int reg = REGNO (x);
20559 int vsx_reg = (FP_REGNO_P (reg)
20560 ? reg - 32
20561 : reg - FIRST_ALTIVEC_REGNO + 32);
20562
20563 #ifdef TARGET_REGNAMES
20564 if (TARGET_REGNAMES)
20565 fprintf (file, "%%vs%d", vsx_reg);
20566 else
20567 #endif
20568 fprintf (file, "%d", vsx_reg);
20569 }
20570 return;
20571
20572 case 'X':
20573 if (MEM_P (x)
20574 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
20575 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
20576 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
20577 putc ('x', file);
20578 return;
20579
20580 case 'Y':
20581 /* Like 'L', for third word of TImode/PTImode */
20582 if (REG_P (x))
20583 fputs (reg_names[REGNO (x) + 2], file);
20584 else if (MEM_P (x))
20585 {
20586 machine_mode mode = GET_MODE (x);
20587 if (GET_CODE (XEXP (x, 0)) == PRE_INC
20588 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
20589 output_address (mode, plus_constant (Pmode,
20590 XEXP (XEXP (x, 0), 0), 8));
20591 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20592 output_address (mode, plus_constant (Pmode,
20593 XEXP (XEXP (x, 0), 0), 8));
20594 else
20595 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
20596 if (small_data_operand (x, GET_MODE (x)))
20597 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20598 reg_names[SMALL_DATA_REG]);
20599 }
20600 return;
20601
20602 case 'z':
20603 /* X is a SYMBOL_REF. Write out the name preceded by a
20604 period and without any trailing data in brackets. Used for function
20605 names. If we are configured for System V (or the embedded ABI) on
20606 the PowerPC, do not emit the period, since those systems do not use
20607 TOCs and the like. */
20608 gcc_assert (GET_CODE (x) == SYMBOL_REF);
20609
20610 /* For macho, check to see if we need a stub. */
20611 if (TARGET_MACHO)
20612 {
20613 const char *name = XSTR (x, 0);
20614 #if TARGET_MACHO
20615 if (darwin_emit_branch_islands
20616 && MACHOPIC_INDIRECT
20617 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
20618 name = machopic_indirection_name (x, /*stub_p=*/true);
20619 #endif
20620 assemble_name (file, name);
20621 }
20622 else if (!DOT_SYMBOLS)
20623 assemble_name (file, XSTR (x, 0));
20624 else
20625 rs6000_output_function_entry (file, XSTR (x, 0));
20626 return;
20627
20628 case 'Z':
20629 /* Like 'L', for last word of TImode/PTImode. */
20630 if (REG_P (x))
20631 fputs (reg_names[REGNO (x) + 3], file);
20632 else if (MEM_P (x))
20633 {
20634 machine_mode mode = GET_MODE (x);
20635 if (GET_CODE (XEXP (x, 0)) == PRE_INC
20636 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
20637 output_address (mode, plus_constant (Pmode,
20638 XEXP (XEXP (x, 0), 0), 12));
20639 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20640 output_address (mode, plus_constant (Pmode,
20641 XEXP (XEXP (x, 0), 0), 12));
20642 else
20643 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
20644 if (small_data_operand (x, GET_MODE (x)))
20645 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20646 reg_names[SMALL_DATA_REG]);
20647 }
20648 return;
20649
20650 /* Print AltiVec memory operand. */
20651 case 'y':
20652 {
20653 rtx tmp;
20654
20655 gcc_assert (MEM_P (x));
20656
20657 tmp = XEXP (x, 0);
20658
20659 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
20660 && GET_CODE (tmp) == AND
20661 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
20662 && INTVAL (XEXP (tmp, 1)) == -16)
20663 tmp = XEXP (tmp, 0);
20664 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
20665 && GET_CODE (tmp) == PRE_MODIFY)
20666 tmp = XEXP (tmp, 1);
20667 if (REG_P (tmp))
20668 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
20669 else
20670 {
20671 if (GET_CODE (tmp) != PLUS
20672 || !REG_P (XEXP (tmp, 0))
20673 || !REG_P (XEXP (tmp, 1)))
20674 {
20675 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
20676 break;
20677 }
20678
20679 if (REGNO (XEXP (tmp, 0)) == 0)
20680 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
20681 reg_names[ REGNO (XEXP (tmp, 0)) ]);
20682 else
20683 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
20684 reg_names[ REGNO (XEXP (tmp, 1)) ]);
20685 }
20686 break;
20687 }
20688
20689 case 0:
20690 if (REG_P (x))
20691 fprintf (file, "%s", reg_names[REGNO (x)]);
20692 else if (MEM_P (x))
20693 {
20694 /* We need to handle PRE_INC and PRE_DEC here, since we need to
20695 know the width from the mode. */
20696 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
20697 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
20698 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
20699 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
20700 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
20701 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
20702 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20703 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
20704 else
20705 output_address (GET_MODE (x), XEXP (x, 0));
20706 }
20707 else
20708 {
20709 if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
20710 /* This hack along with a corresponding hack in
20711 rs6000_output_addr_const_extra arranges to output addends
20712 where the assembler expects to find them. eg.
20713 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
20714 without this hack would be output as "x@toc+4". We
20715 want "x+4@toc". */
20716 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
20717 else
20718 output_addr_const (file, x);
20719 }
20720 return;
20721
20722 case '&':
20723 if (const char *name = get_some_local_dynamic_name ())
20724 assemble_name (file, name);
20725 else
20726 output_operand_lossage ("'%%&' used without any "
20727 "local dynamic TLS references");
20728 return;
20729
20730 default:
20731 output_operand_lossage ("invalid %%xn code");
20732 }
20733 }
20734 \f
20735 /* Print the address of an operand. */
20736
20737 void
20738 print_operand_address (FILE *file, rtx x)
20739 {
20740 if (REG_P (x))
20741 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
20742 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
20743 || GET_CODE (x) == LABEL_REF)
20744 {
20745 output_addr_const (file, x);
20746 if (small_data_operand (x, GET_MODE (x)))
20747 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20748 reg_names[SMALL_DATA_REG]);
20749 else
20750 gcc_assert (!TARGET_TOC);
20751 }
20752 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
20753 && REG_P (XEXP (x, 1)))
20754 {
20755 if (REGNO (XEXP (x, 0)) == 0)
20756 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
20757 reg_names[ REGNO (XEXP (x, 0)) ]);
20758 else
20759 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
20760 reg_names[ REGNO (XEXP (x, 1)) ]);
20761 }
20762 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
20763 && GET_CODE (XEXP (x, 1)) == CONST_INT)
20764 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
20765 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
20766 #if TARGET_MACHO
20767 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
20768 && CONSTANT_P (XEXP (x, 1)))
20769 {
20770 fprintf (file, "lo16(");
20771 output_addr_const (file, XEXP (x, 1));
20772 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
20773 }
20774 #endif
20775 #if TARGET_ELF
20776 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
20777 && CONSTANT_P (XEXP (x, 1)))
20778 {
20779 output_addr_const (file, XEXP (x, 1));
20780 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
20781 }
20782 #endif
20783 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
20784 {
20785 /* This hack along with a corresponding hack in
20786 rs6000_output_addr_const_extra arranges to output addends
20787 where the assembler expects to find them. eg.
20788 (lo_sum (reg 9)
20789 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
20790 without this hack would be output as "x@toc+8@l(9)". We
20791 want "x+8@toc@l(9)". */
20792 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
20793 if (GET_CODE (x) == LO_SUM)
20794 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
20795 else
20796 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
20797 }
20798 else
20799 gcc_unreachable ();
20800 }
20801 \f
20802 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
20803
20804 static bool
20805 rs6000_output_addr_const_extra (FILE *file, rtx x)
20806 {
20807 if (GET_CODE (x) == UNSPEC)
20808 switch (XINT (x, 1))
20809 {
20810 case UNSPEC_TOCREL:
20811 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
20812 && REG_P (XVECEXP (x, 0, 1))
20813 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
20814 output_addr_const (file, XVECEXP (x, 0, 0));
20815 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
20816 {
20817 if (INTVAL (tocrel_offset_oac) >= 0)
20818 fprintf (file, "+");
20819 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
20820 }
20821 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
20822 {
20823 putc ('-', file);
20824 assemble_name (file, toc_label_name);
20825 need_toc_init = 1;
20826 }
20827 else if (TARGET_ELF)
20828 fputs ("@toc", file);
20829 return true;
20830
20831 #if TARGET_MACHO
20832 case UNSPEC_MACHOPIC_OFFSET:
20833 output_addr_const (file, XVECEXP (x, 0, 0));
20834 putc ('-', file);
20835 machopic_output_function_base_name (file);
20836 return true;
20837 #endif
20838 }
20839 return false;
20840 }
20841 \f
20842 /* Target hook for assembling integer objects. The PowerPC version has
20843 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
20844 is defined. It also needs to handle DI-mode objects on 64-bit
20845 targets. */
20846
20847 static bool
20848 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
20849 {
20850 #ifdef RELOCATABLE_NEEDS_FIXUP
20851 /* Special handling for SI values. */
20852 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
20853 {
20854 static int recurse = 0;
20855
20856 /* For -mrelocatable, we mark all addresses that need to be fixed up in
20857 the .fixup section. Since the TOC section is already relocated, we
20858 don't need to mark it here. We used to skip the text section, but it
20859 should never be valid for relocated addresses to be placed in the text
20860 section. */
20861 if (DEFAULT_ABI == ABI_V4
20862 && (TARGET_RELOCATABLE || flag_pic > 1)
20863 && in_section != toc_section
20864 && !recurse
20865 && !CONST_SCALAR_INT_P (x)
20866 && CONSTANT_P (x))
20867 {
20868 char buf[256];
20869
20870 recurse = 1;
20871 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
20872 fixuplabelno++;
20873 ASM_OUTPUT_LABEL (asm_out_file, buf);
20874 fprintf (asm_out_file, "\t.long\t(");
20875 output_addr_const (asm_out_file, x);
20876 fprintf (asm_out_file, ")@fixup\n");
20877 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
20878 ASM_OUTPUT_ALIGN (asm_out_file, 2);
20879 fprintf (asm_out_file, "\t.long\t");
20880 assemble_name (asm_out_file, buf);
20881 fprintf (asm_out_file, "\n\t.previous\n");
20882 recurse = 0;
20883 return true;
20884 }
20885 /* Remove initial .'s to turn a -mcall-aixdesc function
20886 address into the address of the descriptor, not the function
20887 itself. */
20888 else if (GET_CODE (x) == SYMBOL_REF
20889 && XSTR (x, 0)[0] == '.'
20890 && DEFAULT_ABI == ABI_AIX)
20891 {
20892 const char *name = XSTR (x, 0);
20893 while (*name == '.')
20894 name++;
20895
20896 fprintf (asm_out_file, "\t.long\t%s\n", name);
20897 return true;
20898 }
20899 }
20900 #endif /* RELOCATABLE_NEEDS_FIXUP */
20901 return default_assemble_integer (x, size, aligned_p);
20902 }
20903
20904 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
20905 /* Emit an assembler directive to set symbol visibility for DECL to
20906 VISIBILITY_TYPE. */
20907
20908 static void
20909 rs6000_assemble_visibility (tree decl, int vis)
20910 {
20911 if (TARGET_XCOFF)
20912 return;
20913
20914 /* Functions need to have their entry point symbol visibility set as
20915 well as their descriptor symbol visibility. */
20916 if (DEFAULT_ABI == ABI_AIX
20917 && DOT_SYMBOLS
20918 && TREE_CODE (decl) == FUNCTION_DECL)
20919 {
20920 static const char * const visibility_types[] = {
20921 NULL, "protected", "hidden", "internal"
20922 };
20923
20924 const char *name, *type;
20925
20926 name = ((* targetm.strip_name_encoding)
20927 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
20928 type = visibility_types[vis];
20929
20930 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
20931 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
20932 }
20933 else
20934 default_assemble_visibility (decl, vis);
20935 }
20936 #endif
20937 \f
20938 enum rtx_code
20939 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
20940 {
20941 /* Reversal of FP compares takes care -- an ordered compare
20942 becomes an unordered compare and vice versa. */
20943 if (mode == CCFPmode
20944 && (!flag_finite_math_only
20945 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
20946 || code == UNEQ || code == LTGT))
20947 return reverse_condition_maybe_unordered (code);
20948 else
20949 return reverse_condition (code);
20950 }
20951
20952 /* Generate a compare for CODE. Return a brand-new rtx that
20953 represents the result of the compare. */
20954
20955 static rtx
20956 rs6000_generate_compare (rtx cmp, machine_mode mode)
20957 {
20958 machine_mode comp_mode;
20959 rtx compare_result;
20960 enum rtx_code code = GET_CODE (cmp);
20961 rtx op0 = XEXP (cmp, 0);
20962 rtx op1 = XEXP (cmp, 1);
20963
20964 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
20965 comp_mode = CCmode;
20966 else if (FLOAT_MODE_P (mode))
20967 comp_mode = CCFPmode;
20968 else if (code == GTU || code == LTU
20969 || code == GEU || code == LEU)
20970 comp_mode = CCUNSmode;
20971 else if ((code == EQ || code == NE)
20972 && unsigned_reg_p (op0)
20973 && (unsigned_reg_p (op1)
20974 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
20975 /* These are unsigned values, perhaps there will be a later
20976 ordering compare that can be shared with this one. */
20977 comp_mode = CCUNSmode;
20978 else
20979 comp_mode = CCmode;
20980
20981 /* If we have an unsigned compare, make sure we don't have a signed value as
20982 an immediate. */
20983 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
20984 && INTVAL (op1) < 0)
20985 {
20986 op0 = copy_rtx_if_shared (op0);
20987 op1 = force_reg (GET_MODE (op0), op1);
20988 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
20989 }
20990
20991 /* First, the compare. */
20992 compare_result = gen_reg_rtx (comp_mode);
20993
20994 /* IEEE 128-bit support in VSX registers when we do not have hardware
20995 support. */
20996 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
20997 {
20998 rtx libfunc = NULL_RTX;
20999 bool check_nan = false;
21000 rtx dest;
21001
21002 switch (code)
21003 {
21004 case EQ:
21005 case NE:
21006 libfunc = optab_libfunc (eq_optab, mode);
21007 break;
21008
21009 case GT:
21010 case GE:
21011 libfunc = optab_libfunc (ge_optab, mode);
21012 break;
21013
21014 case LT:
21015 case LE:
21016 libfunc = optab_libfunc (le_optab, mode);
21017 break;
21018
21019 case UNORDERED:
21020 case ORDERED:
21021 libfunc = optab_libfunc (unord_optab, mode);
21022 code = (code == UNORDERED) ? NE : EQ;
21023 break;
21024
21025 case UNGE:
21026 case UNGT:
21027 check_nan = true;
21028 libfunc = optab_libfunc (ge_optab, mode);
21029 code = (code == UNGE) ? GE : GT;
21030 break;
21031
21032 case UNLE:
21033 case UNLT:
21034 check_nan = true;
21035 libfunc = optab_libfunc (le_optab, mode);
21036 code = (code == UNLE) ? LE : LT;
21037 break;
21038
21039 case UNEQ:
21040 case LTGT:
21041 check_nan = true;
21042 libfunc = optab_libfunc (eq_optab, mode);
21043 code = (code = UNEQ) ? EQ : NE;
21044 break;
21045
21046 default:
21047 gcc_unreachable ();
21048 }
21049
21050 gcc_assert (libfunc);
21051
21052 if (!check_nan)
21053 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
21054 SImode, op0, mode, op1, mode);
21055
21056 /* The library signals an exception for signalling NaNs, so we need to
21057 handle isgreater, etc. by first checking isordered. */
21058 else
21059 {
21060 rtx ne_rtx, normal_dest, unord_dest;
21061 rtx unord_func = optab_libfunc (unord_optab, mode);
21062 rtx join_label = gen_label_rtx ();
21063 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
21064 rtx unord_cmp = gen_reg_rtx (comp_mode);
21065
21066
21067 /* Test for either value being a NaN. */
21068 gcc_assert (unord_func);
21069 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
21070 SImode, op0, mode, op1, mode);
21071
21072 /* Set value (0) if either value is a NaN, and jump to the join
21073 label. */
21074 dest = gen_reg_rtx (SImode);
21075 emit_move_insn (dest, const1_rtx);
21076 emit_insn (gen_rtx_SET (unord_cmp,
21077 gen_rtx_COMPARE (comp_mode, unord_dest,
21078 const0_rtx)));
21079
21080 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
21081 emit_jump_insn (gen_rtx_SET (pc_rtx,
21082 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
21083 join_ref,
21084 pc_rtx)));
21085
21086 /* Do the normal comparison, knowing that the values are not
21087 NaNs. */
21088 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
21089 SImode, op0, mode, op1, mode);
21090
21091 emit_insn (gen_cstoresi4 (dest,
21092 gen_rtx_fmt_ee (code, SImode, normal_dest,
21093 const0_rtx),
21094 normal_dest, const0_rtx));
21095
21096 /* Join NaN and non-Nan paths. Compare dest against 0. */
21097 emit_label (join_label);
21098 code = NE;
21099 }
21100
21101 emit_insn (gen_rtx_SET (compare_result,
21102 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
21103 }
21104
21105 else
21106 {
21107 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
21108 CLOBBERs to match cmptf_internal2 pattern. */
21109 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
21110 && FLOAT128_IBM_P (GET_MODE (op0))
21111 && TARGET_HARD_FLOAT)
21112 emit_insn (gen_rtx_PARALLEL (VOIDmode,
21113 gen_rtvec (10,
21114 gen_rtx_SET (compare_result,
21115 gen_rtx_COMPARE (comp_mode, op0, op1)),
21116 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21117 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21118 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21119 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21120 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21121 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21122 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21123 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21124 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
21125 else if (GET_CODE (op1) == UNSPEC
21126 && XINT (op1, 1) == UNSPEC_SP_TEST)
21127 {
21128 rtx op1b = XVECEXP (op1, 0, 0);
21129 comp_mode = CCEQmode;
21130 compare_result = gen_reg_rtx (CCEQmode);
21131 if (TARGET_64BIT)
21132 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
21133 else
21134 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
21135 }
21136 else
21137 emit_insn (gen_rtx_SET (compare_result,
21138 gen_rtx_COMPARE (comp_mode, op0, op1)));
21139 }
21140
21141 /* Some kinds of FP comparisons need an OR operation;
21142 under flag_finite_math_only we don't bother. */
21143 if (FLOAT_MODE_P (mode)
21144 && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)
21145 && !flag_finite_math_only
21146 && (code == LE || code == GE
21147 || code == UNEQ || code == LTGT
21148 || code == UNGT || code == UNLT))
21149 {
21150 enum rtx_code or1, or2;
21151 rtx or1_rtx, or2_rtx, compare2_rtx;
21152 rtx or_result = gen_reg_rtx (CCEQmode);
21153
21154 switch (code)
21155 {
21156 case LE: or1 = LT; or2 = EQ; break;
21157 case GE: or1 = GT; or2 = EQ; break;
21158 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
21159 case LTGT: or1 = LT; or2 = GT; break;
21160 case UNGT: or1 = UNORDERED; or2 = GT; break;
21161 case UNLT: or1 = UNORDERED; or2 = LT; break;
21162 default: gcc_unreachable ();
21163 }
21164 validate_condition_mode (or1, comp_mode);
21165 validate_condition_mode (or2, comp_mode);
21166 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
21167 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
21168 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
21169 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
21170 const_true_rtx);
21171 emit_insn (gen_rtx_SET (or_result, compare2_rtx));
21172
21173 compare_result = or_result;
21174 code = EQ;
21175 }
21176
21177 validate_condition_mode (code, GET_MODE (compare_result));
21178
21179 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
21180 }
21181
21182 \f
21183 /* Return the diagnostic message string if the binary operation OP is
21184 not permitted on TYPE1 and TYPE2, NULL otherwise. */
21185
21186 static const char*
21187 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
21188 const_tree type1,
21189 const_tree type2)
21190 {
21191 machine_mode mode1 = TYPE_MODE (type1);
21192 machine_mode mode2 = TYPE_MODE (type2);
21193
21194 /* For complex modes, use the inner type. */
21195 if (COMPLEX_MODE_P (mode1))
21196 mode1 = GET_MODE_INNER (mode1);
21197
21198 if (COMPLEX_MODE_P (mode2))
21199 mode2 = GET_MODE_INNER (mode2);
21200
21201 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
21202 double to intermix unless -mfloat128-convert. */
21203 if (mode1 == mode2)
21204 return NULL;
21205
21206 if (!TARGET_FLOAT128_CVT)
21207 {
21208 if ((mode1 == KFmode && mode2 == IFmode)
21209 || (mode1 == IFmode && mode2 == KFmode))
21210 return N_("__float128 and __ibm128 cannot be used in the same "
21211 "expression");
21212
21213 if (TARGET_IEEEQUAD
21214 && ((mode1 == IFmode && mode2 == TFmode)
21215 || (mode1 == TFmode && mode2 == IFmode)))
21216 return N_("__ibm128 and long double cannot be used in the same "
21217 "expression");
21218
21219 if (!TARGET_IEEEQUAD
21220 && ((mode1 == KFmode && mode2 == TFmode)
21221 || (mode1 == TFmode && mode2 == KFmode)))
21222 return N_("__float128 and long double cannot be used in the same "
21223 "expression");
21224 }
21225
21226 return NULL;
21227 }
21228
21229 \f
21230 /* Expand floating point conversion to/from __float128 and __ibm128. */
21231
21232 void
21233 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
21234 {
21235 machine_mode dest_mode = GET_MODE (dest);
21236 machine_mode src_mode = GET_MODE (src);
21237 convert_optab cvt = unknown_optab;
21238 bool do_move = false;
21239 rtx libfunc = NULL_RTX;
21240 rtx dest2;
21241 typedef rtx (*rtx_2func_t) (rtx, rtx);
21242 rtx_2func_t hw_convert = (rtx_2func_t)0;
21243 size_t kf_or_tf;
21244
21245 struct hw_conv_t {
21246 rtx_2func_t from_df;
21247 rtx_2func_t from_sf;
21248 rtx_2func_t from_si_sign;
21249 rtx_2func_t from_si_uns;
21250 rtx_2func_t from_di_sign;
21251 rtx_2func_t from_di_uns;
21252 rtx_2func_t to_df;
21253 rtx_2func_t to_sf;
21254 rtx_2func_t to_si_sign;
21255 rtx_2func_t to_si_uns;
21256 rtx_2func_t to_di_sign;
21257 rtx_2func_t to_di_uns;
21258 } hw_conversions[2] = {
21259 /* convertions to/from KFmode */
21260 {
21261 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
21262 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
21263 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
21264 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
21265 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
21266 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
21267 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
21268 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
21269 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
21270 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
21271 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
21272 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
21273 },
21274
21275 /* convertions to/from TFmode */
21276 {
21277 gen_extenddftf2_hw, /* TFmode <- DFmode. */
21278 gen_extendsftf2_hw, /* TFmode <- SFmode. */
21279 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
21280 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
21281 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
21282 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
21283 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
21284 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
21285 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
21286 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
21287 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
21288 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
21289 },
21290 };
21291
21292 if (dest_mode == src_mode)
21293 gcc_unreachable ();
21294
21295 /* Eliminate memory operations. */
21296 if (MEM_P (src))
21297 src = force_reg (src_mode, src);
21298
21299 if (MEM_P (dest))
21300 {
21301 rtx tmp = gen_reg_rtx (dest_mode);
21302 rs6000_expand_float128_convert (tmp, src, unsigned_p);
21303 rs6000_emit_move (dest, tmp, dest_mode);
21304 return;
21305 }
21306
21307 /* Convert to IEEE 128-bit floating point. */
21308 if (FLOAT128_IEEE_P (dest_mode))
21309 {
21310 if (dest_mode == KFmode)
21311 kf_or_tf = 0;
21312 else if (dest_mode == TFmode)
21313 kf_or_tf = 1;
21314 else
21315 gcc_unreachable ();
21316
21317 switch (src_mode)
21318 {
21319 case E_DFmode:
21320 cvt = sext_optab;
21321 hw_convert = hw_conversions[kf_or_tf].from_df;
21322 break;
21323
21324 case E_SFmode:
21325 cvt = sext_optab;
21326 hw_convert = hw_conversions[kf_or_tf].from_sf;
21327 break;
21328
21329 case E_KFmode:
21330 case E_IFmode:
21331 case E_TFmode:
21332 if (FLOAT128_IBM_P (src_mode))
21333 cvt = sext_optab;
21334 else
21335 do_move = true;
21336 break;
21337
21338 case E_SImode:
21339 if (unsigned_p)
21340 {
21341 cvt = ufloat_optab;
21342 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
21343 }
21344 else
21345 {
21346 cvt = sfloat_optab;
21347 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
21348 }
21349 break;
21350
21351 case E_DImode:
21352 if (unsigned_p)
21353 {
21354 cvt = ufloat_optab;
21355 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
21356 }
21357 else
21358 {
21359 cvt = sfloat_optab;
21360 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
21361 }
21362 break;
21363
21364 default:
21365 gcc_unreachable ();
21366 }
21367 }
21368
21369 /* Convert from IEEE 128-bit floating point. */
21370 else if (FLOAT128_IEEE_P (src_mode))
21371 {
21372 if (src_mode == KFmode)
21373 kf_or_tf = 0;
21374 else if (src_mode == TFmode)
21375 kf_or_tf = 1;
21376 else
21377 gcc_unreachable ();
21378
21379 switch (dest_mode)
21380 {
21381 case E_DFmode:
21382 cvt = trunc_optab;
21383 hw_convert = hw_conversions[kf_or_tf].to_df;
21384 break;
21385
21386 case E_SFmode:
21387 cvt = trunc_optab;
21388 hw_convert = hw_conversions[kf_or_tf].to_sf;
21389 break;
21390
21391 case E_KFmode:
21392 case E_IFmode:
21393 case E_TFmode:
21394 if (FLOAT128_IBM_P (dest_mode))
21395 cvt = trunc_optab;
21396 else
21397 do_move = true;
21398 break;
21399
21400 case E_SImode:
21401 if (unsigned_p)
21402 {
21403 cvt = ufix_optab;
21404 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
21405 }
21406 else
21407 {
21408 cvt = sfix_optab;
21409 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
21410 }
21411 break;
21412
21413 case E_DImode:
21414 if (unsigned_p)
21415 {
21416 cvt = ufix_optab;
21417 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
21418 }
21419 else
21420 {
21421 cvt = sfix_optab;
21422 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
21423 }
21424 break;
21425
21426 default:
21427 gcc_unreachable ();
21428 }
21429 }
21430
21431 /* Both IBM format. */
21432 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
21433 do_move = true;
21434
21435 else
21436 gcc_unreachable ();
21437
21438 /* Handle conversion between TFmode/KFmode/IFmode. */
21439 if (do_move)
21440 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
21441
21442 /* Handle conversion if we have hardware support. */
21443 else if (TARGET_FLOAT128_HW && hw_convert)
21444 emit_insn ((hw_convert) (dest, src));
21445
21446 /* Call an external function to do the conversion. */
21447 else if (cvt != unknown_optab)
21448 {
21449 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
21450 gcc_assert (libfunc != NULL_RTX);
21451
21452 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
21453 src, src_mode);
21454
21455 gcc_assert (dest2 != NULL_RTX);
21456 if (!rtx_equal_p (dest, dest2))
21457 emit_move_insn (dest, dest2);
21458 }
21459
21460 else
21461 gcc_unreachable ();
21462
21463 return;
21464 }
21465
21466 \f
21467 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
21468 can be used as that dest register. Return the dest register. */
21469
21470 rtx
21471 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
21472 {
21473 if (op2 == const0_rtx)
21474 return op1;
21475
21476 if (GET_CODE (scratch) == SCRATCH)
21477 scratch = gen_reg_rtx (mode);
21478
21479 if (logical_operand (op2, mode))
21480 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
21481 else
21482 emit_insn (gen_rtx_SET (scratch,
21483 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
21484
21485 return scratch;
21486 }
21487
21488 void
21489 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
21490 {
21491 rtx condition_rtx;
21492 machine_mode op_mode;
21493 enum rtx_code cond_code;
21494 rtx result = operands[0];
21495
21496 condition_rtx = rs6000_generate_compare (operands[1], mode);
21497 cond_code = GET_CODE (condition_rtx);
21498
21499 if (cond_code == NE
21500 || cond_code == GE || cond_code == LE
21501 || cond_code == GEU || cond_code == LEU
21502 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
21503 {
21504 rtx not_result = gen_reg_rtx (CCEQmode);
21505 rtx not_op, rev_cond_rtx;
21506 machine_mode cc_mode;
21507
21508 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
21509
21510 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
21511 SImode, XEXP (condition_rtx, 0), const0_rtx);
21512 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
21513 emit_insn (gen_rtx_SET (not_result, not_op));
21514 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
21515 }
21516
21517 op_mode = GET_MODE (XEXP (operands[1], 0));
21518 if (op_mode == VOIDmode)
21519 op_mode = GET_MODE (XEXP (operands[1], 1));
21520
21521 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
21522 {
21523 PUT_MODE (condition_rtx, DImode);
21524 convert_move (result, condition_rtx, 0);
21525 }
21526 else
21527 {
21528 PUT_MODE (condition_rtx, SImode);
21529 emit_insn (gen_rtx_SET (result, condition_rtx));
21530 }
21531 }
21532
21533 /* Emit a branch of kind CODE to location LOC. */
21534
21535 void
21536 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
21537 {
21538 rtx condition_rtx, loc_ref;
21539
21540 condition_rtx = rs6000_generate_compare (operands[0], mode);
21541 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
21542 emit_jump_insn (gen_rtx_SET (pc_rtx,
21543 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
21544 loc_ref, pc_rtx)));
21545 }
21546
21547 /* Return the string to output a conditional branch to LABEL, which is
21548 the operand template of the label, or NULL if the branch is really a
21549 conditional return.
21550
21551 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
21552 condition code register and its mode specifies what kind of
21553 comparison we made.
21554
21555 REVERSED is nonzero if we should reverse the sense of the comparison.
21556
21557 INSN is the insn. */
21558
21559 char *
21560 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
21561 {
21562 static char string[64];
21563 enum rtx_code code = GET_CODE (op);
21564 rtx cc_reg = XEXP (op, 0);
21565 machine_mode mode = GET_MODE (cc_reg);
21566 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
21567 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
21568 int really_reversed = reversed ^ need_longbranch;
21569 char *s = string;
21570 const char *ccode;
21571 const char *pred;
21572 rtx note;
21573
21574 validate_condition_mode (code, mode);
21575
21576 /* Work out which way this really branches. We could use
21577 reverse_condition_maybe_unordered here always but this
21578 makes the resulting assembler clearer. */
21579 if (really_reversed)
21580 {
21581 /* Reversal of FP compares takes care -- an ordered compare
21582 becomes an unordered compare and vice versa. */
21583 if (mode == CCFPmode)
21584 code = reverse_condition_maybe_unordered (code);
21585 else
21586 code = reverse_condition (code);
21587 }
21588
21589 switch (code)
21590 {
21591 /* Not all of these are actually distinct opcodes, but
21592 we distinguish them for clarity of the resulting assembler. */
21593 case NE: case LTGT:
21594 ccode = "ne"; break;
21595 case EQ: case UNEQ:
21596 ccode = "eq"; break;
21597 case GE: case GEU:
21598 ccode = "ge"; break;
21599 case GT: case GTU: case UNGT:
21600 ccode = "gt"; break;
21601 case LE: case LEU:
21602 ccode = "le"; break;
21603 case LT: case LTU: case UNLT:
21604 ccode = "lt"; break;
21605 case UNORDERED: ccode = "un"; break;
21606 case ORDERED: ccode = "nu"; break;
21607 case UNGE: ccode = "nl"; break;
21608 case UNLE: ccode = "ng"; break;
21609 default:
21610 gcc_unreachable ();
21611 }
21612
21613 /* Maybe we have a guess as to how likely the branch is. */
21614 pred = "";
21615 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
21616 if (note != NULL_RTX)
21617 {
21618 /* PROB is the difference from 50%. */
21619 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
21620 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
21621
21622 /* Only hint for highly probable/improbable branches on newer cpus when
21623 we have real profile data, as static prediction overrides processor
21624 dynamic prediction. For older cpus we may as well always hint, but
21625 assume not taken for branches that are very close to 50% as a
21626 mispredicted taken branch is more expensive than a
21627 mispredicted not-taken branch. */
21628 if (rs6000_always_hint
21629 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
21630 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
21631 && br_prob_note_reliable_p (note)))
21632 {
21633 if (abs (prob) > REG_BR_PROB_BASE / 20
21634 && ((prob > 0) ^ need_longbranch))
21635 pred = "+";
21636 else
21637 pred = "-";
21638 }
21639 }
21640
21641 if (label == NULL)
21642 s += sprintf (s, "b%slr%s ", ccode, pred);
21643 else
21644 s += sprintf (s, "b%s%s ", ccode, pred);
21645
21646 /* We need to escape any '%' characters in the reg_names string.
21647 Assume they'd only be the first character.... */
21648 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
21649 *s++ = '%';
21650 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
21651
21652 if (label != NULL)
21653 {
21654 /* If the branch distance was too far, we may have to use an
21655 unconditional branch to go the distance. */
21656 if (need_longbranch)
21657 s += sprintf (s, ",$+8\n\tb %s", label);
21658 else
21659 s += sprintf (s, ",%s", label);
21660 }
21661
21662 return string;
21663 }
21664
21665 /* Return insn for VSX or Altivec comparisons. */
21666
21667 static rtx
21668 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
21669 {
21670 rtx mask;
21671 machine_mode mode = GET_MODE (op0);
21672
21673 switch (code)
21674 {
21675 default:
21676 break;
21677
21678 case GE:
21679 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21680 return NULL_RTX;
21681 /* FALLTHRU */
21682
21683 case EQ:
21684 case GT:
21685 case GTU:
21686 case ORDERED:
21687 case UNORDERED:
21688 case UNEQ:
21689 case LTGT:
21690 mask = gen_reg_rtx (mode);
21691 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
21692 return mask;
21693 }
21694
21695 return NULL_RTX;
21696 }
21697
21698 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
21699 DMODE is expected destination mode. This is a recursive function. */
21700
21701 static rtx
21702 rs6000_emit_vector_compare (enum rtx_code rcode,
21703 rtx op0, rtx op1,
21704 machine_mode dmode)
21705 {
21706 rtx mask;
21707 bool swap_operands = false;
21708 bool try_again = false;
21709
21710 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
21711 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
21712
21713 /* See if the comparison works as is. */
21714 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
21715 if (mask)
21716 return mask;
21717
21718 switch (rcode)
21719 {
21720 case LT:
21721 rcode = GT;
21722 swap_operands = true;
21723 try_again = true;
21724 break;
21725 case LTU:
21726 rcode = GTU;
21727 swap_operands = true;
21728 try_again = true;
21729 break;
21730 case NE:
21731 case UNLE:
21732 case UNLT:
21733 case UNGE:
21734 case UNGT:
21735 /* Invert condition and try again.
21736 e.g., A != B becomes ~(A==B). */
21737 {
21738 enum rtx_code rev_code;
21739 enum insn_code nor_code;
21740 rtx mask2;
21741
21742 rev_code = reverse_condition_maybe_unordered (rcode);
21743 if (rev_code == UNKNOWN)
21744 return NULL_RTX;
21745
21746 nor_code = optab_handler (one_cmpl_optab, dmode);
21747 if (nor_code == CODE_FOR_nothing)
21748 return NULL_RTX;
21749
21750 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
21751 if (!mask2)
21752 return NULL_RTX;
21753
21754 mask = gen_reg_rtx (dmode);
21755 emit_insn (GEN_FCN (nor_code) (mask, mask2));
21756 return mask;
21757 }
21758 break;
21759 case GE:
21760 case GEU:
21761 case LE:
21762 case LEU:
21763 /* Try GT/GTU/LT/LTU OR EQ */
21764 {
21765 rtx c_rtx, eq_rtx;
21766 enum insn_code ior_code;
21767 enum rtx_code new_code;
21768
21769 switch (rcode)
21770 {
21771 case GE:
21772 new_code = GT;
21773 break;
21774
21775 case GEU:
21776 new_code = GTU;
21777 break;
21778
21779 case LE:
21780 new_code = LT;
21781 break;
21782
21783 case LEU:
21784 new_code = LTU;
21785 break;
21786
21787 default:
21788 gcc_unreachable ();
21789 }
21790
21791 ior_code = optab_handler (ior_optab, dmode);
21792 if (ior_code == CODE_FOR_nothing)
21793 return NULL_RTX;
21794
21795 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
21796 if (!c_rtx)
21797 return NULL_RTX;
21798
21799 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
21800 if (!eq_rtx)
21801 return NULL_RTX;
21802
21803 mask = gen_reg_rtx (dmode);
21804 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
21805 return mask;
21806 }
21807 break;
21808 default:
21809 return NULL_RTX;
21810 }
21811
21812 if (try_again)
21813 {
21814 if (swap_operands)
21815 std::swap (op0, op1);
21816
21817 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
21818 if (mask)
21819 return mask;
21820 }
21821
21822 /* You only get two chances. */
21823 return NULL_RTX;
21824 }
21825
21826 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
21827 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
21828 operands for the relation operation COND. */
21829
21830 int
21831 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
21832 rtx cond, rtx cc_op0, rtx cc_op1)
21833 {
21834 machine_mode dest_mode = GET_MODE (dest);
21835 machine_mode mask_mode = GET_MODE (cc_op0);
21836 enum rtx_code rcode = GET_CODE (cond);
21837 machine_mode cc_mode = CCmode;
21838 rtx mask;
21839 rtx cond2;
21840 bool invert_move = false;
21841
21842 if (VECTOR_UNIT_NONE_P (dest_mode))
21843 return 0;
21844
21845 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
21846 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
21847
21848 switch (rcode)
21849 {
21850 /* Swap operands if we can, and fall back to doing the operation as
21851 specified, and doing a NOR to invert the test. */
21852 case NE:
21853 case UNLE:
21854 case UNLT:
21855 case UNGE:
21856 case UNGT:
21857 /* Invert condition and try again.
21858 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
21859 invert_move = true;
21860 rcode = reverse_condition_maybe_unordered (rcode);
21861 if (rcode == UNKNOWN)
21862 return 0;
21863 break;
21864
21865 case GE:
21866 case LE:
21867 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
21868 {
21869 /* Invert condition to avoid compound test. */
21870 invert_move = true;
21871 rcode = reverse_condition (rcode);
21872 }
21873 break;
21874
21875 case GTU:
21876 case GEU:
21877 case LTU:
21878 case LEU:
21879 /* Mark unsigned tests with CCUNSmode. */
21880 cc_mode = CCUNSmode;
21881
21882 /* Invert condition to avoid compound test if necessary. */
21883 if (rcode == GEU || rcode == LEU)
21884 {
21885 invert_move = true;
21886 rcode = reverse_condition (rcode);
21887 }
21888 break;
21889
21890 default:
21891 break;
21892 }
21893
21894 /* Get the vector mask for the given relational operations. */
21895 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
21896
21897 if (!mask)
21898 return 0;
21899
21900 if (invert_move)
21901 std::swap (op_true, op_false);
21902
21903 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
21904 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
21905 && (GET_CODE (op_true) == CONST_VECTOR
21906 || GET_CODE (op_false) == CONST_VECTOR))
21907 {
21908 rtx constant_0 = CONST0_RTX (dest_mode);
21909 rtx constant_m1 = CONSTM1_RTX (dest_mode);
21910
21911 if (op_true == constant_m1 && op_false == constant_0)
21912 {
21913 emit_move_insn (dest, mask);
21914 return 1;
21915 }
21916
21917 else if (op_true == constant_0 && op_false == constant_m1)
21918 {
21919 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
21920 return 1;
21921 }
21922
21923 /* If we can't use the vector comparison directly, perhaps we can use
21924 the mask for the true or false fields, instead of loading up a
21925 constant. */
21926 if (op_true == constant_m1)
21927 op_true = mask;
21928
21929 if (op_false == constant_0)
21930 op_false = mask;
21931 }
21932
21933 if (!REG_P (op_true) && !SUBREG_P (op_true))
21934 op_true = force_reg (dest_mode, op_true);
21935
21936 if (!REG_P (op_false) && !SUBREG_P (op_false))
21937 op_false = force_reg (dest_mode, op_false);
21938
21939 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
21940 CONST0_RTX (dest_mode));
21941 emit_insn (gen_rtx_SET (dest,
21942 gen_rtx_IF_THEN_ELSE (dest_mode,
21943 cond2,
21944 op_true,
21945 op_false)));
21946 return 1;
21947 }
21948
21949 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
21950 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
21951 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
21952 hardware has no such operation. */
21953
21954 static int
21955 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
21956 {
21957 enum rtx_code code = GET_CODE (op);
21958 rtx op0 = XEXP (op, 0);
21959 rtx op1 = XEXP (op, 1);
21960 machine_mode compare_mode = GET_MODE (op0);
21961 machine_mode result_mode = GET_MODE (dest);
21962 bool max_p = false;
21963
21964 if (result_mode != compare_mode)
21965 return 0;
21966
21967 if (code == GE || code == GT)
21968 max_p = true;
21969 else if (code == LE || code == LT)
21970 max_p = false;
21971 else
21972 return 0;
21973
21974 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
21975 ;
21976
21977 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond))
21978 max_p = !max_p;
21979
21980 else
21981 return 0;
21982
21983 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
21984 return 1;
21985 }
21986
21987 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
21988 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
21989 operands of the last comparison is nonzero/true, FALSE_COND if it is
21990 zero/false. Return 0 if the hardware has no such operation. */
21991
21992 static int
21993 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
21994 {
21995 enum rtx_code code = GET_CODE (op);
21996 rtx op0 = XEXP (op, 0);
21997 rtx op1 = XEXP (op, 1);
21998 machine_mode result_mode = GET_MODE (dest);
21999 rtx compare_rtx;
22000 rtx cmove_rtx;
22001 rtx clobber_rtx;
22002
22003 if (!can_create_pseudo_p ())
22004 return 0;
22005
22006 switch (code)
22007 {
22008 case EQ:
22009 case GE:
22010 case GT:
22011 break;
22012
22013 case NE:
22014 case LT:
22015 case LE:
22016 code = swap_condition (code);
22017 std::swap (op0, op1);
22018 break;
22019
22020 default:
22021 return 0;
22022 }
22023
22024 /* Generate: [(parallel [(set (dest)
22025 (if_then_else (op (cmp1) (cmp2))
22026 (true)
22027 (false)))
22028 (clobber (scratch))])]. */
22029
22030 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
22031 cmove_rtx = gen_rtx_SET (dest,
22032 gen_rtx_IF_THEN_ELSE (result_mode,
22033 compare_rtx,
22034 true_cond,
22035 false_cond));
22036
22037 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
22038 emit_insn (gen_rtx_PARALLEL (VOIDmode,
22039 gen_rtvec (2, cmove_rtx, clobber_rtx)));
22040
22041 return 1;
22042 }
22043
22044 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
22045 operands of the last comparison is nonzero/true, FALSE_COND if it
22046 is zero/false. Return 0 if the hardware has no such operation. */
22047
22048 int
22049 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22050 {
22051 enum rtx_code code = GET_CODE (op);
22052 rtx op0 = XEXP (op, 0);
22053 rtx op1 = XEXP (op, 1);
22054 machine_mode compare_mode = GET_MODE (op0);
22055 machine_mode result_mode = GET_MODE (dest);
22056 rtx temp;
22057 bool is_against_zero;
22058
22059 /* These modes should always match. */
22060 if (GET_MODE (op1) != compare_mode
22061 /* In the isel case however, we can use a compare immediate, so
22062 op1 may be a small constant. */
22063 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
22064 return 0;
22065 if (GET_MODE (true_cond) != result_mode)
22066 return 0;
22067 if (GET_MODE (false_cond) != result_mode)
22068 return 0;
22069
22070 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
22071 if (TARGET_P9_MINMAX
22072 && (compare_mode == SFmode || compare_mode == DFmode)
22073 && (result_mode == SFmode || result_mode == DFmode))
22074 {
22075 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
22076 return 1;
22077
22078 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
22079 return 1;
22080 }
22081
22082 /* Don't allow using floating point comparisons for integer results for
22083 now. */
22084 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
22085 return 0;
22086
22087 /* First, work out if the hardware can do this at all, or
22088 if it's too slow.... */
22089 if (!FLOAT_MODE_P (compare_mode))
22090 {
22091 if (TARGET_ISEL)
22092 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
22093 return 0;
22094 }
22095
22096 is_against_zero = op1 == CONST0_RTX (compare_mode);
22097
22098 /* A floating-point subtract might overflow, underflow, or produce
22099 an inexact result, thus changing the floating-point flags, so it
22100 can't be generated if we care about that. It's safe if one side
22101 of the construct is zero, since then no subtract will be
22102 generated. */
22103 if (SCALAR_FLOAT_MODE_P (compare_mode)
22104 && flag_trapping_math && ! is_against_zero)
22105 return 0;
22106
22107 /* Eliminate half of the comparisons by switching operands, this
22108 makes the remaining code simpler. */
22109 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
22110 || code == LTGT || code == LT || code == UNLE)
22111 {
22112 code = reverse_condition_maybe_unordered (code);
22113 temp = true_cond;
22114 true_cond = false_cond;
22115 false_cond = temp;
22116 }
22117
22118 /* UNEQ and LTGT take four instructions for a comparison with zero,
22119 it'll probably be faster to use a branch here too. */
22120 if (code == UNEQ && HONOR_NANS (compare_mode))
22121 return 0;
22122
22123 /* We're going to try to implement comparisons by performing
22124 a subtract, then comparing against zero. Unfortunately,
22125 Inf - Inf is NaN which is not zero, and so if we don't
22126 know that the operand is finite and the comparison
22127 would treat EQ different to UNORDERED, we can't do it. */
22128 if (HONOR_INFINITIES (compare_mode)
22129 && code != GT && code != UNGE
22130 && (GET_CODE (op1) != CONST_DOUBLE
22131 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
22132 /* Constructs of the form (a OP b ? a : b) are safe. */
22133 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
22134 || (! rtx_equal_p (op0, true_cond)
22135 && ! rtx_equal_p (op1, true_cond))))
22136 return 0;
22137
22138 /* At this point we know we can use fsel. */
22139
22140 /* Reduce the comparison to a comparison against zero. */
22141 if (! is_against_zero)
22142 {
22143 temp = gen_reg_rtx (compare_mode);
22144 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
22145 op0 = temp;
22146 op1 = CONST0_RTX (compare_mode);
22147 }
22148
22149 /* If we don't care about NaNs we can reduce some of the comparisons
22150 down to faster ones. */
22151 if (! HONOR_NANS (compare_mode))
22152 switch (code)
22153 {
22154 case GT:
22155 code = LE;
22156 temp = true_cond;
22157 true_cond = false_cond;
22158 false_cond = temp;
22159 break;
22160 case UNGE:
22161 code = GE;
22162 break;
22163 case UNEQ:
22164 code = EQ;
22165 break;
22166 default:
22167 break;
22168 }
22169
22170 /* Now, reduce everything down to a GE. */
22171 switch (code)
22172 {
22173 case GE:
22174 break;
22175
22176 case LE:
22177 temp = gen_reg_rtx (compare_mode);
22178 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
22179 op0 = temp;
22180 break;
22181
22182 case ORDERED:
22183 temp = gen_reg_rtx (compare_mode);
22184 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
22185 op0 = temp;
22186 break;
22187
22188 case EQ:
22189 temp = gen_reg_rtx (compare_mode);
22190 emit_insn (gen_rtx_SET (temp,
22191 gen_rtx_NEG (compare_mode,
22192 gen_rtx_ABS (compare_mode, op0))));
22193 op0 = temp;
22194 break;
22195
22196 case UNGE:
22197 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
22198 temp = gen_reg_rtx (result_mode);
22199 emit_insn (gen_rtx_SET (temp,
22200 gen_rtx_IF_THEN_ELSE (result_mode,
22201 gen_rtx_GE (VOIDmode,
22202 op0, op1),
22203 true_cond, false_cond)));
22204 false_cond = true_cond;
22205 true_cond = temp;
22206
22207 temp = gen_reg_rtx (compare_mode);
22208 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
22209 op0 = temp;
22210 break;
22211
22212 case GT:
22213 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
22214 temp = gen_reg_rtx (result_mode);
22215 emit_insn (gen_rtx_SET (temp,
22216 gen_rtx_IF_THEN_ELSE (result_mode,
22217 gen_rtx_GE (VOIDmode,
22218 op0, op1),
22219 true_cond, false_cond)));
22220 true_cond = false_cond;
22221 false_cond = temp;
22222
22223 temp = gen_reg_rtx (compare_mode);
22224 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
22225 op0 = temp;
22226 break;
22227
22228 default:
22229 gcc_unreachable ();
22230 }
22231
22232 emit_insn (gen_rtx_SET (dest,
22233 gen_rtx_IF_THEN_ELSE (result_mode,
22234 gen_rtx_GE (VOIDmode,
22235 op0, op1),
22236 true_cond, false_cond)));
22237 return 1;
22238 }
22239
22240 /* Same as above, but for ints (isel). */
22241
22242 int
22243 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22244 {
22245 rtx condition_rtx, cr;
22246 machine_mode mode = GET_MODE (dest);
22247 enum rtx_code cond_code;
22248 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
22249 bool signedp;
22250
22251 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
22252 return 0;
22253
22254 /* We still have to do the compare, because isel doesn't do a
22255 compare, it just looks at the CRx bits set by a previous compare
22256 instruction. */
22257 condition_rtx = rs6000_generate_compare (op, mode);
22258 cond_code = GET_CODE (condition_rtx);
22259 cr = XEXP (condition_rtx, 0);
22260 signedp = GET_MODE (cr) == CCmode;
22261
22262 isel_func = (mode == SImode
22263 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
22264 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
22265
22266 switch (cond_code)
22267 {
22268 case LT: case GT: case LTU: case GTU: case EQ:
22269 /* isel handles these directly. */
22270 break;
22271
22272 default:
22273 /* We need to swap the sense of the comparison. */
22274 {
22275 std::swap (false_cond, true_cond);
22276 PUT_CODE (condition_rtx, reverse_condition (cond_code));
22277 }
22278 break;
22279 }
22280
22281 false_cond = force_reg (mode, false_cond);
22282 if (true_cond != const0_rtx)
22283 true_cond = force_reg (mode, true_cond);
22284
22285 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
22286
22287 return 1;
22288 }
22289
22290 void
22291 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
22292 {
22293 machine_mode mode = GET_MODE (op0);
22294 enum rtx_code c;
22295 rtx target;
22296
22297 /* VSX/altivec have direct min/max insns. */
22298 if ((code == SMAX || code == SMIN)
22299 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
22300 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
22301 {
22302 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
22303 return;
22304 }
22305
22306 if (code == SMAX || code == SMIN)
22307 c = GE;
22308 else
22309 c = GEU;
22310
22311 if (code == SMAX || code == UMAX)
22312 target = emit_conditional_move (dest, c, op0, op1, mode,
22313 op0, op1, mode, 0);
22314 else
22315 target = emit_conditional_move (dest, c, op0, op1, mode,
22316 op1, op0, mode, 0);
22317 gcc_assert (target);
22318 if (target != dest)
22319 emit_move_insn (dest, target);
22320 }
22321
22322 /* A subroutine of the atomic operation splitters. Jump to LABEL if
22323 COND is true. Mark the jump as unlikely to be taken. */
22324
22325 static void
22326 emit_unlikely_jump (rtx cond, rtx label)
22327 {
22328 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
22329 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
22330 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
22331 }
22332
22333 /* A subroutine of the atomic operation splitters. Emit a load-locked
22334 instruction in MODE. For QI/HImode, possibly use a pattern than includes
22335 the zero_extend operation. */
22336
22337 static void
22338 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
22339 {
22340 rtx (*fn) (rtx, rtx) = NULL;
22341
22342 switch (mode)
22343 {
22344 case E_QImode:
22345 fn = gen_load_lockedqi;
22346 break;
22347 case E_HImode:
22348 fn = gen_load_lockedhi;
22349 break;
22350 case E_SImode:
22351 if (GET_MODE (mem) == QImode)
22352 fn = gen_load_lockedqi_si;
22353 else if (GET_MODE (mem) == HImode)
22354 fn = gen_load_lockedhi_si;
22355 else
22356 fn = gen_load_lockedsi;
22357 break;
22358 case E_DImode:
22359 fn = gen_load_lockeddi;
22360 break;
22361 case E_TImode:
22362 fn = gen_load_lockedti;
22363 break;
22364 default:
22365 gcc_unreachable ();
22366 }
22367 emit_insn (fn (reg, mem));
22368 }
22369
22370 /* A subroutine of the atomic operation splitters. Emit a store-conditional
22371 instruction in MODE. */
22372
22373 static void
22374 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
22375 {
22376 rtx (*fn) (rtx, rtx, rtx) = NULL;
22377
22378 switch (mode)
22379 {
22380 case E_QImode:
22381 fn = gen_store_conditionalqi;
22382 break;
22383 case E_HImode:
22384 fn = gen_store_conditionalhi;
22385 break;
22386 case E_SImode:
22387 fn = gen_store_conditionalsi;
22388 break;
22389 case E_DImode:
22390 fn = gen_store_conditionaldi;
22391 break;
22392 case E_TImode:
22393 fn = gen_store_conditionalti;
22394 break;
22395 default:
22396 gcc_unreachable ();
22397 }
22398
22399 /* Emit sync before stwcx. to address PPC405 Erratum. */
22400 if (PPC405_ERRATUM77)
22401 emit_insn (gen_hwsync ());
22402
22403 emit_insn (fn (res, mem, val));
22404 }
22405
22406 /* Expand barriers before and after a load_locked/store_cond sequence. */
22407
22408 static rtx
22409 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
22410 {
22411 rtx addr = XEXP (mem, 0);
22412
22413 if (!legitimate_indirect_address_p (addr, reload_completed)
22414 && !legitimate_indexed_address_p (addr, reload_completed))
22415 {
22416 addr = force_reg (Pmode, addr);
22417 mem = replace_equiv_address_nv (mem, addr);
22418 }
22419
22420 switch (model)
22421 {
22422 case MEMMODEL_RELAXED:
22423 case MEMMODEL_CONSUME:
22424 case MEMMODEL_ACQUIRE:
22425 break;
22426 case MEMMODEL_RELEASE:
22427 case MEMMODEL_ACQ_REL:
22428 emit_insn (gen_lwsync ());
22429 break;
22430 case MEMMODEL_SEQ_CST:
22431 emit_insn (gen_hwsync ());
22432 break;
22433 default:
22434 gcc_unreachable ();
22435 }
22436 return mem;
22437 }
22438
22439 static void
22440 rs6000_post_atomic_barrier (enum memmodel model)
22441 {
22442 switch (model)
22443 {
22444 case MEMMODEL_RELAXED:
22445 case MEMMODEL_CONSUME:
22446 case MEMMODEL_RELEASE:
22447 break;
22448 case MEMMODEL_ACQUIRE:
22449 case MEMMODEL_ACQ_REL:
22450 case MEMMODEL_SEQ_CST:
22451 emit_insn (gen_isync ());
22452 break;
22453 default:
22454 gcc_unreachable ();
22455 }
22456 }
22457
22458 /* A subroutine of the various atomic expanders. For sub-word operations,
22459 we must adjust things to operate on SImode. Given the original MEM,
22460 return a new aligned memory. Also build and return the quantities by
22461 which to shift and mask. */
22462
22463 static rtx
22464 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
22465 {
22466 rtx addr, align, shift, mask, mem;
22467 HOST_WIDE_INT shift_mask;
22468 machine_mode mode = GET_MODE (orig_mem);
22469
22470 /* For smaller modes, we have to implement this via SImode. */
22471 shift_mask = (mode == QImode ? 0x18 : 0x10);
22472
22473 addr = XEXP (orig_mem, 0);
22474 addr = force_reg (GET_MODE (addr), addr);
22475
22476 /* Aligned memory containing subword. Generate a new memory. We
22477 do not want any of the existing MEM_ATTR data, as we're now
22478 accessing memory outside the original object. */
22479 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
22480 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22481 mem = gen_rtx_MEM (SImode, align);
22482 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
22483 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
22484 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
22485
22486 /* Shift amount for subword relative to aligned word. */
22487 shift = gen_reg_rtx (SImode);
22488 addr = gen_lowpart (SImode, addr);
22489 rtx tmp = gen_reg_rtx (SImode);
22490 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
22491 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
22492 if (BYTES_BIG_ENDIAN)
22493 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
22494 shift, 1, OPTAB_LIB_WIDEN);
22495 *pshift = shift;
22496
22497 /* Mask for insertion. */
22498 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
22499 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
22500 *pmask = mask;
22501
22502 return mem;
22503 }
22504
22505 /* A subroutine of the various atomic expanders. For sub-word operands,
22506 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
22507
22508 static rtx
22509 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
22510 {
22511 rtx x;
22512
22513 x = gen_reg_rtx (SImode);
22514 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
22515 gen_rtx_NOT (SImode, mask),
22516 oldval)));
22517
22518 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
22519
22520 return x;
22521 }
22522
22523 /* A subroutine of the various atomic expanders. For sub-word operands,
22524 extract WIDE to NARROW via SHIFT. */
22525
22526 static void
22527 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
22528 {
22529 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
22530 wide, 1, OPTAB_LIB_WIDEN);
22531 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
22532 }
22533
22534 /* Expand an atomic compare and swap operation. */
22535
22536 void
22537 rs6000_expand_atomic_compare_and_swap (rtx operands[])
22538 {
22539 rtx boolval, retval, mem, oldval, newval, cond;
22540 rtx label1, label2, x, mask, shift;
22541 machine_mode mode, orig_mode;
22542 enum memmodel mod_s, mod_f;
22543 bool is_weak;
22544
22545 boolval = operands[0];
22546 retval = operands[1];
22547 mem = operands[2];
22548 oldval = operands[3];
22549 newval = operands[4];
22550 is_weak = (INTVAL (operands[5]) != 0);
22551 mod_s = memmodel_base (INTVAL (operands[6]));
22552 mod_f = memmodel_base (INTVAL (operands[7]));
22553 orig_mode = mode = GET_MODE (mem);
22554
22555 mask = shift = NULL_RTX;
22556 if (mode == QImode || mode == HImode)
22557 {
22558 /* Before power8, we didn't have access to lbarx/lharx, so generate a
22559 lwarx and shift/mask operations. With power8, we need to do the
22560 comparison in SImode, but the store is still done in QI/HImode. */
22561 oldval = convert_modes (SImode, mode, oldval, 1);
22562
22563 if (!TARGET_SYNC_HI_QI)
22564 {
22565 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
22566
22567 /* Shift and mask OLDVAL into position with the word. */
22568 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
22569 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22570
22571 /* Shift and mask NEWVAL into position within the word. */
22572 newval = convert_modes (SImode, mode, newval, 1);
22573 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
22574 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22575 }
22576
22577 /* Prepare to adjust the return value. */
22578 retval = gen_reg_rtx (SImode);
22579 mode = SImode;
22580 }
22581 else if (reg_overlap_mentioned_p (retval, oldval))
22582 oldval = copy_to_reg (oldval);
22583
22584 if (mode != TImode && !reg_or_short_operand (oldval, mode))
22585 oldval = copy_to_mode_reg (mode, oldval);
22586
22587 if (reg_overlap_mentioned_p (retval, newval))
22588 newval = copy_to_reg (newval);
22589
22590 mem = rs6000_pre_atomic_barrier (mem, mod_s);
22591
22592 label1 = NULL_RTX;
22593 if (!is_weak)
22594 {
22595 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
22596 emit_label (XEXP (label1, 0));
22597 }
22598 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
22599
22600 emit_load_locked (mode, retval, mem);
22601
22602 x = retval;
22603 if (mask)
22604 x = expand_simple_binop (SImode, AND, retval, mask,
22605 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22606
22607 cond = gen_reg_rtx (CCmode);
22608 /* If we have TImode, synthesize a comparison. */
22609 if (mode != TImode)
22610 x = gen_rtx_COMPARE (CCmode, x, oldval);
22611 else
22612 {
22613 rtx xor1_result = gen_reg_rtx (DImode);
22614 rtx xor2_result = gen_reg_rtx (DImode);
22615 rtx or_result = gen_reg_rtx (DImode);
22616 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
22617 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
22618 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
22619 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
22620
22621 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
22622 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
22623 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
22624 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
22625 }
22626
22627 emit_insn (gen_rtx_SET (cond, x));
22628
22629 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
22630 emit_unlikely_jump (x, label2);
22631
22632 x = newval;
22633 if (mask)
22634 x = rs6000_mask_atomic_subword (retval, newval, mask);
22635
22636 emit_store_conditional (orig_mode, cond, mem, x);
22637
22638 if (!is_weak)
22639 {
22640 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
22641 emit_unlikely_jump (x, label1);
22642 }
22643
22644 if (!is_mm_relaxed (mod_f))
22645 emit_label (XEXP (label2, 0));
22646
22647 rs6000_post_atomic_barrier (mod_s);
22648
22649 if (is_mm_relaxed (mod_f))
22650 emit_label (XEXP (label2, 0));
22651
22652 if (shift)
22653 rs6000_finish_atomic_subword (operands[1], retval, shift);
22654 else if (mode != GET_MODE (operands[1]))
22655 convert_move (operands[1], retval, 1);
22656
22657 /* In all cases, CR0 contains EQ on success, and NE on failure. */
22658 x = gen_rtx_EQ (SImode, cond, const0_rtx);
22659 emit_insn (gen_rtx_SET (boolval, x));
22660 }
22661
22662 /* Expand an atomic exchange operation. */
22663
22664 void
22665 rs6000_expand_atomic_exchange (rtx operands[])
22666 {
22667 rtx retval, mem, val, cond;
22668 machine_mode mode;
22669 enum memmodel model;
22670 rtx label, x, mask, shift;
22671
22672 retval = operands[0];
22673 mem = operands[1];
22674 val = operands[2];
22675 model = memmodel_base (INTVAL (operands[3]));
22676 mode = GET_MODE (mem);
22677
22678 mask = shift = NULL_RTX;
22679 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
22680 {
22681 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
22682
22683 /* Shift and mask VAL into position with the word. */
22684 val = convert_modes (SImode, mode, val, 1);
22685 val = expand_simple_binop (SImode, ASHIFT, val, shift,
22686 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22687
22688 /* Prepare to adjust the return value. */
22689 retval = gen_reg_rtx (SImode);
22690 mode = SImode;
22691 }
22692
22693 mem = rs6000_pre_atomic_barrier (mem, model);
22694
22695 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
22696 emit_label (XEXP (label, 0));
22697
22698 emit_load_locked (mode, retval, mem);
22699
22700 x = val;
22701 if (mask)
22702 x = rs6000_mask_atomic_subword (retval, val, mask);
22703
22704 cond = gen_reg_rtx (CCmode);
22705 emit_store_conditional (mode, cond, mem, x);
22706
22707 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
22708 emit_unlikely_jump (x, label);
22709
22710 rs6000_post_atomic_barrier (model);
22711
22712 if (shift)
22713 rs6000_finish_atomic_subword (operands[0], retval, shift);
22714 }
22715
22716 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
22717 to perform. MEM is the memory on which to operate. VAL is the second
22718 operand of the binary operator. BEFORE and AFTER are optional locations to
22719 return the value of MEM either before of after the operation. MODEL_RTX
22720 is a CONST_INT containing the memory model to use. */
22721
22722 void
22723 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
22724 rtx orig_before, rtx orig_after, rtx model_rtx)
22725 {
22726 enum memmodel model = memmodel_base (INTVAL (model_rtx));
22727 machine_mode mode = GET_MODE (mem);
22728 machine_mode store_mode = mode;
22729 rtx label, x, cond, mask, shift;
22730 rtx before = orig_before, after = orig_after;
22731
22732 mask = shift = NULL_RTX;
22733 /* On power8, we want to use SImode for the operation. On previous systems,
22734 use the operation in a subword and shift/mask to get the proper byte or
22735 halfword. */
22736 if (mode == QImode || mode == HImode)
22737 {
22738 if (TARGET_SYNC_HI_QI)
22739 {
22740 val = convert_modes (SImode, mode, val, 1);
22741
22742 /* Prepare to adjust the return value. */
22743 before = gen_reg_rtx (SImode);
22744 if (after)
22745 after = gen_reg_rtx (SImode);
22746 mode = SImode;
22747 }
22748 else
22749 {
22750 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
22751
22752 /* Shift and mask VAL into position with the word. */
22753 val = convert_modes (SImode, mode, val, 1);
22754 val = expand_simple_binop (SImode, ASHIFT, val, shift,
22755 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22756
22757 switch (code)
22758 {
22759 case IOR:
22760 case XOR:
22761 /* We've already zero-extended VAL. That is sufficient to
22762 make certain that it does not affect other bits. */
22763 mask = NULL;
22764 break;
22765
22766 case AND:
22767 /* If we make certain that all of the other bits in VAL are
22768 set, that will be sufficient to not affect other bits. */
22769 x = gen_rtx_NOT (SImode, mask);
22770 x = gen_rtx_IOR (SImode, x, val);
22771 emit_insn (gen_rtx_SET (val, x));
22772 mask = NULL;
22773 break;
22774
22775 case NOT:
22776 case PLUS:
22777 case MINUS:
22778 /* These will all affect bits outside the field and need
22779 adjustment via MASK within the loop. */
22780 break;
22781
22782 default:
22783 gcc_unreachable ();
22784 }
22785
22786 /* Prepare to adjust the return value. */
22787 before = gen_reg_rtx (SImode);
22788 if (after)
22789 after = gen_reg_rtx (SImode);
22790 store_mode = mode = SImode;
22791 }
22792 }
22793
22794 mem = rs6000_pre_atomic_barrier (mem, model);
22795
22796 label = gen_label_rtx ();
22797 emit_label (label);
22798 label = gen_rtx_LABEL_REF (VOIDmode, label);
22799
22800 if (before == NULL_RTX)
22801 before = gen_reg_rtx (mode);
22802
22803 emit_load_locked (mode, before, mem);
22804
22805 if (code == NOT)
22806 {
22807 x = expand_simple_binop (mode, AND, before, val,
22808 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22809 after = expand_simple_unop (mode, NOT, x, after, 1);
22810 }
22811 else
22812 {
22813 after = expand_simple_binop (mode, code, before, val,
22814 after, 1, OPTAB_LIB_WIDEN);
22815 }
22816
22817 x = after;
22818 if (mask)
22819 {
22820 x = expand_simple_binop (SImode, AND, after, mask,
22821 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22822 x = rs6000_mask_atomic_subword (before, x, mask);
22823 }
22824 else if (store_mode != mode)
22825 x = convert_modes (store_mode, mode, x, 1);
22826
22827 cond = gen_reg_rtx (CCmode);
22828 emit_store_conditional (store_mode, cond, mem, x);
22829
22830 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
22831 emit_unlikely_jump (x, label);
22832
22833 rs6000_post_atomic_barrier (model);
22834
22835 if (shift)
22836 {
22837 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
22838 then do the calcuations in a SImode register. */
22839 if (orig_before)
22840 rs6000_finish_atomic_subword (orig_before, before, shift);
22841 if (orig_after)
22842 rs6000_finish_atomic_subword (orig_after, after, shift);
22843 }
22844 else if (store_mode != mode)
22845 {
22846 /* QImode/HImode on machines with lbarx/lharx where we do the native
22847 operation and then do the calcuations in a SImode register. */
22848 if (orig_before)
22849 convert_move (orig_before, before, 1);
22850 if (orig_after)
22851 convert_move (orig_after, after, 1);
22852 }
22853 else if (orig_after && after != orig_after)
22854 emit_move_insn (orig_after, after);
22855 }
22856
22857 /* Emit instructions to move SRC to DST. Called by splitters for
22858 multi-register moves. It will emit at most one instruction for
22859 each register that is accessed; that is, it won't emit li/lis pairs
22860 (or equivalent for 64-bit code). One of SRC or DST must be a hard
22861 register. */
22862
22863 void
22864 rs6000_split_multireg_move (rtx dst, rtx src)
22865 {
22866 /* The register number of the first register being moved. */
22867 int reg;
22868 /* The mode that is to be moved. */
22869 machine_mode mode;
22870 /* The mode that the move is being done in, and its size. */
22871 machine_mode reg_mode;
22872 int reg_mode_size;
22873 /* The number of registers that will be moved. */
22874 int nregs;
22875
22876 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
22877 mode = GET_MODE (dst);
22878 nregs = hard_regno_nregs (reg, mode);
22879 if (FP_REGNO_P (reg))
22880 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
22881 (TARGET_HARD_FLOAT ? DFmode : SFmode);
22882 else if (ALTIVEC_REGNO_P (reg))
22883 reg_mode = V16QImode;
22884 else
22885 reg_mode = word_mode;
22886 reg_mode_size = GET_MODE_SIZE (reg_mode);
22887
22888 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
22889
22890 /* TDmode residing in FP registers is special, since the ISA requires that
22891 the lower-numbered word of a register pair is always the most significant
22892 word, even in little-endian mode. This does not match the usual subreg
22893 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
22894 the appropriate constituent registers "by hand" in little-endian mode.
22895
22896 Note we do not need to check for destructive overlap here since TDmode
22897 can only reside in even/odd register pairs. */
22898 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
22899 {
22900 rtx p_src, p_dst;
22901 int i;
22902
22903 for (i = 0; i < nregs; i++)
22904 {
22905 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
22906 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
22907 else
22908 p_src = simplify_gen_subreg (reg_mode, src, mode,
22909 i * reg_mode_size);
22910
22911 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
22912 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
22913 else
22914 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
22915 i * reg_mode_size);
22916
22917 emit_insn (gen_rtx_SET (p_dst, p_src));
22918 }
22919
22920 return;
22921 }
22922
22923 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
22924 {
22925 /* Move register range backwards, if we might have destructive
22926 overlap. */
22927 int i;
22928 for (i = nregs - 1; i >= 0; i--)
22929 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
22930 i * reg_mode_size),
22931 simplify_gen_subreg (reg_mode, src, mode,
22932 i * reg_mode_size)));
22933 }
22934 else
22935 {
22936 int i;
22937 int j = -1;
22938 bool used_update = false;
22939 rtx restore_basereg = NULL_RTX;
22940
22941 if (MEM_P (src) && INT_REGNO_P (reg))
22942 {
22943 rtx breg;
22944
22945 if (GET_CODE (XEXP (src, 0)) == PRE_INC
22946 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
22947 {
22948 rtx delta_rtx;
22949 breg = XEXP (XEXP (src, 0), 0);
22950 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
22951 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
22952 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
22953 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
22954 src = replace_equiv_address (src, breg);
22955 }
22956 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
22957 {
22958 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
22959 {
22960 rtx basereg = XEXP (XEXP (src, 0), 0);
22961 if (TARGET_UPDATE)
22962 {
22963 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
22964 emit_insn (gen_rtx_SET (ndst,
22965 gen_rtx_MEM (reg_mode,
22966 XEXP (src, 0))));
22967 used_update = true;
22968 }
22969 else
22970 emit_insn (gen_rtx_SET (basereg,
22971 XEXP (XEXP (src, 0), 1)));
22972 src = replace_equiv_address (src, basereg);
22973 }
22974 else
22975 {
22976 rtx basereg = gen_rtx_REG (Pmode, reg);
22977 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
22978 src = replace_equiv_address (src, basereg);
22979 }
22980 }
22981
22982 breg = XEXP (src, 0);
22983 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
22984 breg = XEXP (breg, 0);
22985
22986 /* If the base register we are using to address memory is
22987 also a destination reg, then change that register last. */
22988 if (REG_P (breg)
22989 && REGNO (breg) >= REGNO (dst)
22990 && REGNO (breg) < REGNO (dst) + nregs)
22991 j = REGNO (breg) - REGNO (dst);
22992 }
22993 else if (MEM_P (dst) && INT_REGNO_P (reg))
22994 {
22995 rtx breg;
22996
22997 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
22998 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
22999 {
23000 rtx delta_rtx;
23001 breg = XEXP (XEXP (dst, 0), 0);
23002 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
23003 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
23004 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
23005
23006 /* We have to update the breg before doing the store.
23007 Use store with update, if available. */
23008
23009 if (TARGET_UPDATE)
23010 {
23011 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
23012 emit_insn (TARGET_32BIT
23013 ? (TARGET_POWERPC64
23014 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
23015 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
23016 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
23017 used_update = true;
23018 }
23019 else
23020 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
23021 dst = replace_equiv_address (dst, breg);
23022 }
23023 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
23024 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
23025 {
23026 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
23027 {
23028 rtx basereg = XEXP (XEXP (dst, 0), 0);
23029 if (TARGET_UPDATE)
23030 {
23031 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
23032 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
23033 XEXP (dst, 0)),
23034 nsrc));
23035 used_update = true;
23036 }
23037 else
23038 emit_insn (gen_rtx_SET (basereg,
23039 XEXP (XEXP (dst, 0), 1)));
23040 dst = replace_equiv_address (dst, basereg);
23041 }
23042 else
23043 {
23044 rtx basereg = XEXP (XEXP (dst, 0), 0);
23045 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
23046 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
23047 && REG_P (basereg)
23048 && REG_P (offsetreg)
23049 && REGNO (basereg) != REGNO (offsetreg));
23050 if (REGNO (basereg) == 0)
23051 {
23052 rtx tmp = offsetreg;
23053 offsetreg = basereg;
23054 basereg = tmp;
23055 }
23056 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
23057 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
23058 dst = replace_equiv_address (dst, basereg);
23059 }
23060 }
23061 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
23062 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
23063 }
23064
23065 for (i = 0; i < nregs; i++)
23066 {
23067 /* Calculate index to next subword. */
23068 ++j;
23069 if (j == nregs)
23070 j = 0;
23071
23072 /* If compiler already emitted move of first word by
23073 store with update, no need to do anything. */
23074 if (j == 0 && used_update)
23075 continue;
23076
23077 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
23078 j * reg_mode_size),
23079 simplify_gen_subreg (reg_mode, src, mode,
23080 j * reg_mode_size)));
23081 }
23082 if (restore_basereg != NULL_RTX)
23083 emit_insn (restore_basereg);
23084 }
23085 }
23086
23087 \f
23088 /* This page contains routines that are used to determine what the
23089 function prologue and epilogue code will do and write them out. */
23090
23091 /* Determine whether the REG is really used. */
23092
23093 static bool
23094 save_reg_p (int reg)
23095 {
23096 /* We need to mark the PIC offset register live for the same conditions
23097 as it is set up, or otherwise it won't be saved before we clobber it. */
23098
23099 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM && !TARGET_SINGLE_PIC_BASE)
23100 {
23101 /* When calling eh_return, we must return true for all the cases
23102 where conditional_register_usage marks the PIC offset reg
23103 call used. */
23104 if (TARGET_TOC && TARGET_MINIMAL_TOC
23105 && (crtl->calls_eh_return
23106 || df_regs_ever_live_p (reg)
23107 || !constant_pool_empty_p ()))
23108 return true;
23109
23110 if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
23111 && flag_pic)
23112 return true;
23113 }
23114
23115 return !call_used_regs[reg] && df_regs_ever_live_p (reg);
23116 }
23117
23118 /* Return the first fixed-point register that is required to be
23119 saved. 32 if none. */
23120
23121 int
23122 first_reg_to_save (void)
23123 {
23124 int first_reg;
23125
23126 /* Find lowest numbered live register. */
23127 for (first_reg = 13; first_reg <= 31; first_reg++)
23128 if (save_reg_p (first_reg))
23129 break;
23130
23131 #if TARGET_MACHO
23132 if (flag_pic
23133 && crtl->uses_pic_offset_table
23134 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
23135 return RS6000_PIC_OFFSET_TABLE_REGNUM;
23136 #endif
23137
23138 return first_reg;
23139 }
23140
23141 /* Similar, for FP regs. */
23142
23143 int
23144 first_fp_reg_to_save (void)
23145 {
23146 int first_reg;
23147
23148 /* Find lowest numbered live register. */
23149 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
23150 if (save_reg_p (first_reg))
23151 break;
23152
23153 return first_reg;
23154 }
23155
23156 /* Similar, for AltiVec regs. */
23157
23158 static int
23159 first_altivec_reg_to_save (void)
23160 {
23161 int i;
23162
23163 /* Stack frame remains as is unless we are in AltiVec ABI. */
23164 if (! TARGET_ALTIVEC_ABI)
23165 return LAST_ALTIVEC_REGNO + 1;
23166
23167 /* On Darwin, the unwind routines are compiled without
23168 TARGET_ALTIVEC, and use save_world to save/restore the
23169 altivec registers when necessary. */
23170 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
23171 && ! TARGET_ALTIVEC)
23172 return FIRST_ALTIVEC_REGNO + 20;
23173
23174 /* Find lowest numbered live register. */
23175 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
23176 if (save_reg_p (i))
23177 break;
23178
23179 return i;
23180 }
23181
23182 /* Return a 32-bit mask of the AltiVec registers we need to set in
23183 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
23184 the 32-bit word is 0. */
23185
23186 static unsigned int
23187 compute_vrsave_mask (void)
23188 {
23189 unsigned int i, mask = 0;
23190
23191 /* On Darwin, the unwind routines are compiled without
23192 TARGET_ALTIVEC, and use save_world to save/restore the
23193 call-saved altivec registers when necessary. */
23194 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
23195 && ! TARGET_ALTIVEC)
23196 mask |= 0xFFF;
23197
23198 /* First, find out if we use _any_ altivec registers. */
23199 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
23200 if (df_regs_ever_live_p (i))
23201 mask |= ALTIVEC_REG_BIT (i);
23202
23203 if (mask == 0)
23204 return mask;
23205
23206 /* Next, remove the argument registers from the set. These must
23207 be in the VRSAVE mask set by the caller, so we don't need to add
23208 them in again. More importantly, the mask we compute here is
23209 used to generate CLOBBERs in the set_vrsave insn, and we do not
23210 wish the argument registers to die. */
23211 for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++)
23212 mask &= ~ALTIVEC_REG_BIT (i);
23213
23214 /* Similarly, remove the return value from the set. */
23215 {
23216 bool yes = false;
23217 diddle_return_value (is_altivec_return_reg, &yes);
23218 if (yes)
23219 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
23220 }
23221
23222 return mask;
23223 }
23224
23225 /* For a very restricted set of circumstances, we can cut down the
23226 size of prologues/epilogues by calling our own save/restore-the-world
23227 routines. */
23228
23229 static void
23230 compute_save_world_info (rs6000_stack_t *info)
23231 {
23232 info->world_save_p = 1;
23233 info->world_save_p
23234 = (WORLD_SAVE_P (info)
23235 && DEFAULT_ABI == ABI_DARWIN
23236 && !cfun->has_nonlocal_label
23237 && info->first_fp_reg_save == FIRST_SAVED_FP_REGNO
23238 && info->first_gp_reg_save == FIRST_SAVED_GP_REGNO
23239 && info->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
23240 && info->cr_save_p);
23241
23242 /* This will not work in conjunction with sibcalls. Make sure there
23243 are none. (This check is expensive, but seldom executed.) */
23244 if (WORLD_SAVE_P (info))
23245 {
23246 rtx_insn *insn;
23247 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
23248 if (CALL_P (insn) && SIBLING_CALL_P (insn))
23249 {
23250 info->world_save_p = 0;
23251 break;
23252 }
23253 }
23254
23255 if (WORLD_SAVE_P (info))
23256 {
23257 /* Even if we're not touching VRsave, make sure there's room on the
23258 stack for it, if it looks like we're calling SAVE_WORLD, which
23259 will attempt to save it. */
23260 info->vrsave_size = 4;
23261
23262 /* If we are going to save the world, we need to save the link register too. */
23263 info->lr_save_p = 1;
23264
23265 /* "Save" the VRsave register too if we're saving the world. */
23266 if (info->vrsave_mask == 0)
23267 info->vrsave_mask = compute_vrsave_mask ();
23268
23269 /* Because the Darwin register save/restore routines only handle
23270 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
23271 check. */
23272 gcc_assert (info->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
23273 && (info->first_altivec_reg_save
23274 >= FIRST_SAVED_ALTIVEC_REGNO));
23275 }
23276
23277 return;
23278 }
23279
23280
23281 static void
23282 is_altivec_return_reg (rtx reg, void *xyes)
23283 {
23284 bool *yes = (bool *) xyes;
23285 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
23286 *yes = true;
23287 }
23288
23289 \f
23290 /* Return whether REG is a global user reg or has been specifed by
23291 -ffixed-REG. We should not restore these, and so cannot use
23292 lmw or out-of-line restore functions if there are any. We also
23293 can't save them (well, emit frame notes for them), because frame
23294 unwinding during exception handling will restore saved registers. */
23295
23296 static bool
23297 fixed_reg_p (int reg)
23298 {
23299 /* Ignore fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] when the
23300 backend sets it, overriding anything the user might have given. */
23301 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
23302 && ((DEFAULT_ABI == ABI_V4 && flag_pic)
23303 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
23304 || (TARGET_TOC && TARGET_MINIMAL_TOC)))
23305 return false;
23306
23307 return fixed_regs[reg];
23308 }
23309
23310 /* Determine the strategy for savings/restoring registers. */
23311
23312 enum {
23313 SAVE_MULTIPLE = 0x1,
23314 SAVE_INLINE_GPRS = 0x2,
23315 SAVE_INLINE_FPRS = 0x4,
23316 SAVE_NOINLINE_GPRS_SAVES_LR = 0x8,
23317 SAVE_NOINLINE_FPRS_SAVES_LR = 0x10,
23318 SAVE_INLINE_VRS = 0x20,
23319 REST_MULTIPLE = 0x100,
23320 REST_INLINE_GPRS = 0x200,
23321 REST_INLINE_FPRS = 0x400,
23322 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x800,
23323 REST_INLINE_VRS = 0x1000
23324 };
23325
23326 static int
23327 rs6000_savres_strategy (rs6000_stack_t *info,
23328 bool using_static_chain_p)
23329 {
23330 int strategy = 0;
23331
23332 /* Select between in-line and out-of-line save and restore of regs.
23333 First, all the obvious cases where we don't use out-of-line. */
23334 if (crtl->calls_eh_return
23335 || cfun->machine->ra_need_lr)
23336 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
23337 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
23338 | SAVE_INLINE_VRS | REST_INLINE_VRS);
23339
23340 if (info->first_gp_reg_save == 32)
23341 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
23342
23343 if (info->first_fp_reg_save == 64)
23344 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
23345
23346 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1)
23347 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
23348
23349 /* Define cutoff for using out-of-line functions to save registers. */
23350 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
23351 {
23352 if (!optimize_size)
23353 {
23354 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
23355 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
23356 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
23357 }
23358 else
23359 {
23360 /* Prefer out-of-line restore if it will exit. */
23361 if (info->first_fp_reg_save > 61)
23362 strategy |= SAVE_INLINE_FPRS;
23363 if (info->first_gp_reg_save > 29)
23364 {
23365 if (info->first_fp_reg_save == 64)
23366 strategy |= SAVE_INLINE_GPRS;
23367 else
23368 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
23369 }
23370 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
23371 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
23372 }
23373 }
23374 else if (DEFAULT_ABI == ABI_DARWIN)
23375 {
23376 if (info->first_fp_reg_save > 60)
23377 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
23378 if (info->first_gp_reg_save > 29)
23379 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
23380 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
23381 }
23382 else
23383 {
23384 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
23385 if ((flag_shrink_wrap_separate && optimize_function_for_speed_p (cfun))
23386 || info->first_fp_reg_save > 61)
23387 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
23388 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
23389 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
23390 }
23391
23392 /* Don't bother to try to save things out-of-line if r11 is occupied
23393 by the static chain. It would require too much fiddling and the
23394 static chain is rarely used anyway. FPRs are saved w.r.t the stack
23395 pointer on Darwin, and AIX uses r1 or r12. */
23396 if (using_static_chain_p
23397 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
23398 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
23399 | SAVE_INLINE_GPRS
23400 | SAVE_INLINE_VRS);
23401
23402 /* Don't ever restore fixed regs. That means we can't use the
23403 out-of-line register restore functions if a fixed reg is in the
23404 range of regs restored. */
23405 if (!(strategy & REST_INLINE_FPRS))
23406 for (int i = info->first_fp_reg_save; i < 64; i++)
23407 if (fixed_regs[i])
23408 {
23409 strategy |= REST_INLINE_FPRS;
23410 break;
23411 }
23412
23413 /* We can only use the out-of-line routines to restore fprs if we've
23414 saved all the registers from first_fp_reg_save in the prologue.
23415 Otherwise, we risk loading garbage. Of course, if we have saved
23416 out-of-line then we know we haven't skipped any fprs. */
23417 if ((strategy & SAVE_INLINE_FPRS)
23418 && !(strategy & REST_INLINE_FPRS))
23419 for (int i = info->first_fp_reg_save; i < 64; i++)
23420 if (!save_reg_p (i))
23421 {
23422 strategy |= REST_INLINE_FPRS;
23423 break;
23424 }
23425
23426 /* Similarly, for altivec regs. */
23427 if (!(strategy & REST_INLINE_VRS))
23428 for (int i = info->first_altivec_reg_save; i < LAST_ALTIVEC_REGNO + 1; i++)
23429 if (fixed_regs[i])
23430 {
23431 strategy |= REST_INLINE_VRS;
23432 break;
23433 }
23434
23435 if ((strategy & SAVE_INLINE_VRS)
23436 && !(strategy & REST_INLINE_VRS))
23437 for (int i = info->first_altivec_reg_save; i < LAST_ALTIVEC_REGNO + 1; i++)
23438 if (!save_reg_p (i))
23439 {
23440 strategy |= REST_INLINE_VRS;
23441 break;
23442 }
23443
23444 /* info->lr_save_p isn't yet set if the only reason lr needs to be
23445 saved is an out-of-line save or restore. Set up the value for
23446 the next test (excluding out-of-line gprs). */
23447 bool lr_save_p = (info->lr_save_p
23448 || !(strategy & SAVE_INLINE_FPRS)
23449 || !(strategy & SAVE_INLINE_VRS)
23450 || !(strategy & REST_INLINE_FPRS)
23451 || !(strategy & REST_INLINE_VRS));
23452
23453 if (TARGET_MULTIPLE
23454 && !TARGET_POWERPC64
23455 && info->first_gp_reg_save < 31
23456 && !(flag_shrink_wrap
23457 && flag_shrink_wrap_separate
23458 && optimize_function_for_speed_p (cfun)))
23459 {
23460 int count = 0;
23461 for (int i = info->first_gp_reg_save; i < 32; i++)
23462 if (save_reg_p (i))
23463 count++;
23464
23465 if (count <= 1)
23466 /* Don't use store multiple if only one reg needs to be
23467 saved. This can occur for example when the ABI_V4 pic reg
23468 (r30) needs to be saved to make calls, but r31 is not
23469 used. */
23470 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
23471 else
23472 {
23473 /* Prefer store multiple for saves over out-of-line
23474 routines, since the store-multiple instruction will
23475 always be smaller. */
23476 strategy |= SAVE_INLINE_GPRS | SAVE_MULTIPLE;
23477
23478 /* The situation is more complicated with load multiple.
23479 We'd prefer to use the out-of-line routines for restores,
23480 since the "exit" out-of-line routines can handle the
23481 restore of LR and the frame teardown. However if doesn't
23482 make sense to use the out-of-line routine if that is the
23483 only reason we'd need to save LR, and we can't use the
23484 "exit" out-of-line gpr restore if we have saved some
23485 fprs; In those cases it is advantageous to use load
23486 multiple when available. */
23487 if (info->first_fp_reg_save != 64 || !lr_save_p)
23488 strategy |= REST_INLINE_GPRS | REST_MULTIPLE;
23489 }
23490 }
23491
23492 /* Using the "exit" out-of-line routine does not improve code size
23493 if using it would require lr to be saved and if only saving one
23494 or two gprs. */
23495 else if (!lr_save_p && info->first_gp_reg_save > 29)
23496 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
23497
23498 /* Don't ever restore fixed regs. */
23499 if ((strategy & (REST_INLINE_GPRS | REST_MULTIPLE)) != REST_INLINE_GPRS)
23500 for (int i = info->first_gp_reg_save; i < 32; i++)
23501 if (fixed_reg_p (i))
23502 {
23503 strategy |= REST_INLINE_GPRS;
23504 strategy &= ~REST_MULTIPLE;
23505 break;
23506 }
23507
23508 /* We can only use load multiple or the out-of-line routines to
23509 restore gprs if we've saved all the registers from
23510 first_gp_reg_save. Otherwise, we risk loading garbage.
23511 Of course, if we have saved out-of-line or used stmw then we know
23512 we haven't skipped any gprs. */
23513 if ((strategy & (SAVE_INLINE_GPRS | SAVE_MULTIPLE)) == SAVE_INLINE_GPRS
23514 && (strategy & (REST_INLINE_GPRS | REST_MULTIPLE)) != REST_INLINE_GPRS)
23515 for (int i = info->first_gp_reg_save; i < 32; i++)
23516 if (!save_reg_p (i))
23517 {
23518 strategy |= REST_INLINE_GPRS;
23519 strategy &= ~REST_MULTIPLE;
23520 break;
23521 }
23522
23523 if (TARGET_ELF && TARGET_64BIT)
23524 {
23525 if (!(strategy & SAVE_INLINE_FPRS))
23526 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
23527 else if (!(strategy & SAVE_INLINE_GPRS)
23528 && info->first_fp_reg_save == 64)
23529 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
23530 }
23531 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
23532 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
23533
23534 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
23535 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
23536
23537 return strategy;
23538 }
23539
23540 /* Calculate the stack information for the current function. This is
23541 complicated by having two separate calling sequences, the AIX calling
23542 sequence and the V.4 calling sequence.
23543
23544 AIX (and Darwin/Mac OS X) stack frames look like:
23545 32-bit 64-bit
23546 SP----> +---------------------------------------+
23547 | back chain to caller | 0 0
23548 +---------------------------------------+
23549 | saved CR | 4 8 (8-11)
23550 +---------------------------------------+
23551 | saved LR | 8 16
23552 +---------------------------------------+
23553 | reserved for compilers | 12 24
23554 +---------------------------------------+
23555 | reserved for binders | 16 32
23556 +---------------------------------------+
23557 | saved TOC pointer | 20 40
23558 +---------------------------------------+
23559 | Parameter save area (+padding*) (P) | 24 48
23560 +---------------------------------------+
23561 | Alloca space (A) | 24+P etc.
23562 +---------------------------------------+
23563 | Local variable space (L) | 24+P+A
23564 +---------------------------------------+
23565 | Float/int conversion temporary (X) | 24+P+A+L
23566 +---------------------------------------+
23567 | Save area for AltiVec registers (W) | 24+P+A+L+X
23568 +---------------------------------------+
23569 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
23570 +---------------------------------------+
23571 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
23572 +---------------------------------------+
23573 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
23574 +---------------------------------------+
23575 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
23576 +---------------------------------------+
23577 old SP->| back chain to caller's caller |
23578 +---------------------------------------+
23579
23580 * If the alloca area is present, the parameter save area is
23581 padded so that the former starts 16-byte aligned.
23582
23583 The required alignment for AIX configurations is two words (i.e., 8
23584 or 16 bytes).
23585
23586 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
23587
23588 SP----> +---------------------------------------+
23589 | Back chain to caller | 0
23590 +---------------------------------------+
23591 | Save area for CR | 8
23592 +---------------------------------------+
23593 | Saved LR | 16
23594 +---------------------------------------+
23595 | Saved TOC pointer | 24
23596 +---------------------------------------+
23597 | Parameter save area (+padding*) (P) | 32
23598 +---------------------------------------+
23599 | Alloca space (A) | 32+P
23600 +---------------------------------------+
23601 | Local variable space (L) | 32+P+A
23602 +---------------------------------------+
23603 | Save area for AltiVec registers (W) | 32+P+A+L
23604 +---------------------------------------+
23605 | AltiVec alignment padding (Y) | 32+P+A+L+W
23606 +---------------------------------------+
23607 | Save area for GP registers (G) | 32+P+A+L+W+Y
23608 +---------------------------------------+
23609 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
23610 +---------------------------------------+
23611 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
23612 +---------------------------------------+
23613
23614 * If the alloca area is present, the parameter save area is
23615 padded so that the former starts 16-byte aligned.
23616
23617 V.4 stack frames look like:
23618
23619 SP----> +---------------------------------------+
23620 | back chain to caller | 0
23621 +---------------------------------------+
23622 | caller's saved LR | 4
23623 +---------------------------------------+
23624 | Parameter save area (+padding*) (P) | 8
23625 +---------------------------------------+
23626 | Alloca space (A) | 8+P
23627 +---------------------------------------+
23628 | Varargs save area (V) | 8+P+A
23629 +---------------------------------------+
23630 | Local variable space (L) | 8+P+A+V
23631 +---------------------------------------+
23632 | Float/int conversion temporary (X) | 8+P+A+V+L
23633 +---------------------------------------+
23634 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
23635 +---------------------------------------+
23636 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
23637 +---------------------------------------+
23638 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
23639 +---------------------------------------+
23640 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
23641 +---------------------------------------+
23642 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
23643 +---------------------------------------+
23644 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
23645 +---------------------------------------+
23646 old SP->| back chain to caller's caller |
23647 +---------------------------------------+
23648
23649 * If the alloca area is present and the required alignment is
23650 16 bytes, the parameter save area is padded so that the
23651 alloca area starts 16-byte aligned.
23652
23653 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
23654 given. (But note below and in sysv4.h that we require only 8 and
23655 may round up the size of our stack frame anyways. The historical
23656 reason is early versions of powerpc-linux which didn't properly
23657 align the stack at program startup. A happy side-effect is that
23658 -mno-eabi libraries can be used with -meabi programs.)
23659
23660 The EABI configuration defaults to the V.4 layout. However,
23661 the stack alignment requirements may differ. If -mno-eabi is not
23662 given, the required stack alignment is 8 bytes; if -mno-eabi is
23663 given, the required alignment is 16 bytes. (But see V.4 comment
23664 above.) */
23665
23666 #ifndef ABI_STACK_BOUNDARY
23667 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
23668 #endif
23669
23670 static rs6000_stack_t *
23671 rs6000_stack_info (void)
23672 {
23673 /* We should never be called for thunks, we are not set up for that. */
23674 gcc_assert (!cfun->is_thunk);
23675
23676 rs6000_stack_t *info = &stack_info;
23677 int reg_size = TARGET_32BIT ? 4 : 8;
23678 int ehrd_size;
23679 int ehcr_size;
23680 int save_align;
23681 int first_gp;
23682 HOST_WIDE_INT non_fixed_size;
23683 bool using_static_chain_p;
23684
23685 if (reload_completed && info->reload_completed)
23686 return info;
23687
23688 memset (info, 0, sizeof (*info));
23689 info->reload_completed = reload_completed;
23690
23691 /* Select which calling sequence. */
23692 info->abi = DEFAULT_ABI;
23693
23694 /* Calculate which registers need to be saved & save area size. */
23695 info->first_gp_reg_save = first_reg_to_save ();
23696 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
23697 even if it currently looks like we won't. Reload may need it to
23698 get at a constant; if so, it will have already created a constant
23699 pool entry for it. */
23700 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
23701 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
23702 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
23703 && crtl->uses_const_pool
23704 && info->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
23705 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
23706 else
23707 first_gp = info->first_gp_reg_save;
23708
23709 info->gp_size = reg_size * (32 - first_gp);
23710
23711 info->first_fp_reg_save = first_fp_reg_to_save ();
23712 info->fp_size = 8 * (64 - info->first_fp_reg_save);
23713
23714 info->first_altivec_reg_save = first_altivec_reg_to_save ();
23715 info->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
23716 - info->first_altivec_reg_save);
23717
23718 /* Does this function call anything? */
23719 info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame);
23720
23721 /* Determine if we need to save the condition code registers. */
23722 if (save_reg_p (CR2_REGNO)
23723 || save_reg_p (CR3_REGNO)
23724 || save_reg_p (CR4_REGNO))
23725 {
23726 info->cr_save_p = 1;
23727 if (DEFAULT_ABI == ABI_V4)
23728 info->cr_size = reg_size;
23729 }
23730
23731 /* If the current function calls __builtin_eh_return, then we need
23732 to allocate stack space for registers that will hold data for
23733 the exception handler. */
23734 if (crtl->calls_eh_return)
23735 {
23736 unsigned int i;
23737 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
23738 continue;
23739
23740 ehrd_size = i * UNITS_PER_WORD;
23741 }
23742 else
23743 ehrd_size = 0;
23744
23745 /* In the ELFv2 ABI, we also need to allocate space for separate
23746 CR field save areas if the function calls __builtin_eh_return. */
23747 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
23748 {
23749 /* This hard-codes that we have three call-saved CR fields. */
23750 ehcr_size = 3 * reg_size;
23751 /* We do *not* use the regular CR save mechanism. */
23752 info->cr_save_p = 0;
23753 }
23754 else
23755 ehcr_size = 0;
23756
23757 /* Determine various sizes. */
23758 info->reg_size = reg_size;
23759 info->fixed_size = RS6000_SAVE_AREA;
23760 info->vars_size = RS6000_ALIGN (get_frame_size (), 8);
23761 if (cfun->calls_alloca)
23762 info->parm_size =
23763 RS6000_ALIGN (crtl->outgoing_args_size + info->fixed_size,
23764 STACK_BOUNDARY / BITS_PER_UNIT) - info->fixed_size;
23765 else
23766 info->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
23767 TARGET_ALTIVEC ? 16 : 8);
23768 if (FRAME_GROWS_DOWNWARD)
23769 info->vars_size
23770 += RS6000_ALIGN (info->fixed_size + info->vars_size + info->parm_size,
23771 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
23772 - (info->fixed_size + info->vars_size + info->parm_size);
23773
23774 if (TARGET_ALTIVEC_ABI)
23775 info->vrsave_mask = compute_vrsave_mask ();
23776
23777 if (TARGET_ALTIVEC_VRSAVE && info->vrsave_mask)
23778 info->vrsave_size = 4;
23779
23780 compute_save_world_info (info);
23781
23782 /* Calculate the offsets. */
23783 switch (DEFAULT_ABI)
23784 {
23785 case ABI_NONE:
23786 default:
23787 gcc_unreachable ();
23788
23789 case ABI_AIX:
23790 case ABI_ELFv2:
23791 case ABI_DARWIN:
23792 info->fp_save_offset = -info->fp_size;
23793 info->gp_save_offset = info->fp_save_offset - info->gp_size;
23794
23795 if (TARGET_ALTIVEC_ABI)
23796 {
23797 info->vrsave_save_offset = info->gp_save_offset - info->vrsave_size;
23798
23799 /* Align stack so vector save area is on a quadword boundary.
23800 The padding goes above the vectors. */
23801 if (info->altivec_size != 0)
23802 info->altivec_padding_size = info->vrsave_save_offset & 0xF;
23803
23804 info->altivec_save_offset = info->vrsave_save_offset
23805 - info->altivec_padding_size
23806 - info->altivec_size;
23807 gcc_assert (info->altivec_size == 0
23808 || info->altivec_save_offset % 16 == 0);
23809
23810 /* Adjust for AltiVec case. */
23811 info->ehrd_offset = info->altivec_save_offset - ehrd_size;
23812 }
23813 else
23814 info->ehrd_offset = info->gp_save_offset - ehrd_size;
23815
23816 info->ehcr_offset = info->ehrd_offset - ehcr_size;
23817 info->cr_save_offset = reg_size; /* first word when 64-bit. */
23818 info->lr_save_offset = 2*reg_size;
23819 break;
23820
23821 case ABI_V4:
23822 info->fp_save_offset = -info->fp_size;
23823 info->gp_save_offset = info->fp_save_offset - info->gp_size;
23824 info->cr_save_offset = info->gp_save_offset - info->cr_size;
23825
23826 if (TARGET_ALTIVEC_ABI)
23827 {
23828 info->vrsave_save_offset = info->cr_save_offset - info->vrsave_size;
23829
23830 /* Align stack so vector save area is on a quadword boundary. */
23831 if (info->altivec_size != 0)
23832 info->altivec_padding_size = 16 - (-info->vrsave_save_offset % 16);
23833
23834 info->altivec_save_offset = info->vrsave_save_offset
23835 - info->altivec_padding_size
23836 - info->altivec_size;
23837
23838 /* Adjust for AltiVec case. */
23839 info->ehrd_offset = info->altivec_save_offset;
23840 }
23841 else
23842 info->ehrd_offset = info->cr_save_offset;
23843
23844 info->ehrd_offset -= ehrd_size;
23845 info->lr_save_offset = reg_size;
23846 }
23847
23848 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
23849 info->save_size = RS6000_ALIGN (info->fp_size
23850 + info->gp_size
23851 + info->altivec_size
23852 + info->altivec_padding_size
23853 + ehrd_size
23854 + ehcr_size
23855 + info->cr_size
23856 + info->vrsave_size,
23857 save_align);
23858
23859 non_fixed_size = info->vars_size + info->parm_size + info->save_size;
23860
23861 info->total_size = RS6000_ALIGN (non_fixed_size + info->fixed_size,
23862 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
23863
23864 /* Determine if we need to save the link register. */
23865 if (info->calls_p
23866 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23867 && crtl->profile
23868 && !TARGET_PROFILE_KERNEL)
23869 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
23870 #ifdef TARGET_RELOCATABLE
23871 || (DEFAULT_ABI == ABI_V4
23872 && (TARGET_RELOCATABLE || flag_pic > 1)
23873 && !constant_pool_empty_p ())
23874 #endif
23875 || rs6000_ra_ever_killed ())
23876 info->lr_save_p = 1;
23877
23878 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
23879 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
23880 && call_used_regs[STATIC_CHAIN_REGNUM]);
23881 info->savres_strategy = rs6000_savres_strategy (info, using_static_chain_p);
23882
23883 if (!(info->savres_strategy & SAVE_INLINE_GPRS)
23884 || !(info->savres_strategy & SAVE_INLINE_FPRS)
23885 || !(info->savres_strategy & SAVE_INLINE_VRS)
23886 || !(info->savres_strategy & REST_INLINE_GPRS)
23887 || !(info->savres_strategy & REST_INLINE_FPRS)
23888 || !(info->savres_strategy & REST_INLINE_VRS))
23889 info->lr_save_p = 1;
23890
23891 if (info->lr_save_p)
23892 df_set_regs_ever_live (LR_REGNO, true);
23893
23894 /* Determine if we need to allocate any stack frame:
23895
23896 For AIX we need to push the stack if a frame pointer is needed
23897 (because the stack might be dynamically adjusted), if we are
23898 debugging, if we make calls, or if the sum of fp_save, gp_save,
23899 and local variables are more than the space needed to save all
23900 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
23901 + 18*8 = 288 (GPR13 reserved).
23902
23903 For V.4 we don't have the stack cushion that AIX uses, but assume
23904 that the debugger can handle stackless frames. */
23905
23906 if (info->calls_p)
23907 info->push_p = 1;
23908
23909 else if (DEFAULT_ABI == ABI_V4)
23910 info->push_p = non_fixed_size != 0;
23911
23912 else if (frame_pointer_needed)
23913 info->push_p = 1;
23914
23915 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
23916 info->push_p = 1;
23917
23918 else
23919 info->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
23920
23921 return info;
23922 }
23923
23924 static void
23925 debug_stack_info (rs6000_stack_t *info)
23926 {
23927 const char *abi_string;
23928
23929 if (! info)
23930 info = rs6000_stack_info ();
23931
23932 fprintf (stderr, "\nStack information for function %s:\n",
23933 ((current_function_decl && DECL_NAME (current_function_decl))
23934 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
23935 : "<unknown>"));
23936
23937 switch (info->abi)
23938 {
23939 default: abi_string = "Unknown"; break;
23940 case ABI_NONE: abi_string = "NONE"; break;
23941 case ABI_AIX: abi_string = "AIX"; break;
23942 case ABI_ELFv2: abi_string = "ELFv2"; break;
23943 case ABI_DARWIN: abi_string = "Darwin"; break;
23944 case ABI_V4: abi_string = "V.4"; break;
23945 }
23946
23947 fprintf (stderr, "\tABI = %5s\n", abi_string);
23948
23949 if (TARGET_ALTIVEC_ABI)
23950 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
23951
23952 if (info->first_gp_reg_save != 32)
23953 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
23954
23955 if (info->first_fp_reg_save != 64)
23956 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
23957
23958 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
23959 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
23960 info->first_altivec_reg_save);
23961
23962 if (info->lr_save_p)
23963 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
23964
23965 if (info->cr_save_p)
23966 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
23967
23968 if (info->vrsave_mask)
23969 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
23970
23971 if (info->push_p)
23972 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
23973
23974 if (info->calls_p)
23975 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
23976
23977 if (info->gp_size)
23978 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
23979
23980 if (info->fp_size)
23981 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
23982
23983 if (info->altivec_size)
23984 fprintf (stderr, "\taltivec_save_offset = %5d\n",
23985 info->altivec_save_offset);
23986
23987 if (info->vrsave_size)
23988 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
23989 info->vrsave_save_offset);
23990
23991 if (info->lr_save_p)
23992 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
23993
23994 if (info->cr_save_p)
23995 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
23996
23997 if (info->varargs_save_offset)
23998 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
23999
24000 if (info->total_size)
24001 fprintf (stderr, "\ttotal_size = " HOST_WIDE_INT_PRINT_DEC"\n",
24002 info->total_size);
24003
24004 if (info->vars_size)
24005 fprintf (stderr, "\tvars_size = " HOST_WIDE_INT_PRINT_DEC"\n",
24006 info->vars_size);
24007
24008 if (info->parm_size)
24009 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
24010
24011 if (info->fixed_size)
24012 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
24013
24014 if (info->gp_size)
24015 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
24016
24017 if (info->fp_size)
24018 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
24019
24020 if (info->altivec_size)
24021 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
24022
24023 if (info->vrsave_size)
24024 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
24025
24026 if (info->altivec_padding_size)
24027 fprintf (stderr, "\taltivec_padding_size= %5d\n",
24028 info->altivec_padding_size);
24029
24030 if (info->cr_size)
24031 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
24032
24033 if (info->save_size)
24034 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
24035
24036 if (info->reg_size != 4)
24037 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
24038
24039 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
24040
24041 fprintf (stderr, "\n");
24042 }
24043
24044 rtx
24045 rs6000_return_addr (int count, rtx frame)
24046 {
24047 /* We can't use get_hard_reg_initial_val for LR when count == 0 if LR
24048 is trashed by the prologue, as it is for PIC on ABI_V4 and Darwin. */
24049 if (count != 0
24050 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
24051 {
24052 cfun->machine->ra_needs_full_frame = 1;
24053
24054 if (count == 0)
24055 /* FRAME is set to frame_pointer_rtx by the generic code, but that
24056 is good for loading 0(r1) only when !FRAME_GROWS_DOWNWARD. */
24057 frame = stack_pointer_rtx;
24058 rtx prev_frame_addr = memory_address (Pmode, frame);
24059 rtx prev_frame = copy_to_reg (gen_rtx_MEM (Pmode, prev_frame_addr));
24060 rtx lr_save_off = plus_constant (Pmode,
24061 prev_frame, RETURN_ADDRESS_OFFSET);
24062 rtx lr_save_addr = memory_address (Pmode, lr_save_off);
24063 return gen_rtx_MEM (Pmode, lr_save_addr);
24064 }
24065
24066 cfun->machine->ra_need_lr = 1;
24067 return get_hard_reg_initial_val (Pmode, LR_REGNO);
24068 }
24069
24070 /* Say whether a function is a candidate for sibcall handling or not. */
24071
24072 static bool
24073 rs6000_function_ok_for_sibcall (tree decl, tree exp)
24074 {
24075 tree fntype;
24076
24077 if (decl)
24078 fntype = TREE_TYPE (decl);
24079 else
24080 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
24081
24082 /* We can't do it if the called function has more vector parameters
24083 than the current function; there's nowhere to put the VRsave code. */
24084 if (TARGET_ALTIVEC_ABI
24085 && TARGET_ALTIVEC_VRSAVE
24086 && !(decl && decl == current_function_decl))
24087 {
24088 function_args_iterator args_iter;
24089 tree type;
24090 int nvreg = 0;
24091
24092 /* Functions with vector parameters are required to have a
24093 prototype, so the argument type info must be available
24094 here. */
24095 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
24096 if (TREE_CODE (type) == VECTOR_TYPE
24097 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
24098 nvreg++;
24099
24100 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
24101 if (TREE_CODE (type) == VECTOR_TYPE
24102 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
24103 nvreg--;
24104
24105 if (nvreg > 0)
24106 return false;
24107 }
24108
24109 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
24110 functions, because the callee may have a different TOC pointer to
24111 the caller and there's no way to ensure we restore the TOC when
24112 we return. With the secure-plt SYSV ABI we can't make non-local
24113 calls when -fpic/PIC because the plt call stubs use r30. */
24114 if (DEFAULT_ABI == ABI_DARWIN
24115 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
24116 && decl
24117 && !DECL_EXTERNAL (decl)
24118 && !DECL_WEAK (decl)
24119 && (*targetm.binds_local_p) (decl))
24120 || (DEFAULT_ABI == ABI_V4
24121 && (!TARGET_SECURE_PLT
24122 || !flag_pic
24123 || (decl
24124 && (*targetm.binds_local_p) (decl)))))
24125 {
24126 tree attr_list = TYPE_ATTRIBUTES (fntype);
24127
24128 if (!lookup_attribute ("longcall", attr_list)
24129 || lookup_attribute ("shortcall", attr_list))
24130 return true;
24131 }
24132
24133 return false;
24134 }
24135
24136 static int
24137 rs6000_ra_ever_killed (void)
24138 {
24139 rtx_insn *top;
24140 rtx reg;
24141 rtx_insn *insn;
24142
24143 if (cfun->is_thunk)
24144 return 0;
24145
24146 if (cfun->machine->lr_save_state)
24147 return cfun->machine->lr_save_state - 1;
24148
24149 /* regs_ever_live has LR marked as used if any sibcalls are present,
24150 but this should not force saving and restoring in the
24151 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
24152 clobbers LR, so that is inappropriate. */
24153
24154 /* Also, the prologue can generate a store into LR that
24155 doesn't really count, like this:
24156
24157 move LR->R0
24158 bcl to set PIC register
24159 move LR->R31
24160 move R0->LR
24161
24162 When we're called from the epilogue, we need to avoid counting
24163 this as a store. */
24164
24165 push_topmost_sequence ();
24166 top = get_insns ();
24167 pop_topmost_sequence ();
24168 reg = gen_rtx_REG (Pmode, LR_REGNO);
24169
24170 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
24171 {
24172 if (INSN_P (insn))
24173 {
24174 if (CALL_P (insn))
24175 {
24176 if (!SIBLING_CALL_P (insn))
24177 return 1;
24178 }
24179 else if (find_regno_note (insn, REG_INC, LR_REGNO))
24180 return 1;
24181 else if (set_of (reg, insn) != NULL_RTX
24182 && !prologue_epilogue_contains (insn))
24183 return 1;
24184 }
24185 }
24186 return 0;
24187 }
24188 \f
24189 /* Emit instructions needed to load the TOC register.
24190 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
24191 a constant pool; or for SVR4 -fpic. */
24192
24193 void
24194 rs6000_emit_load_toc_table (int fromprolog)
24195 {
24196 rtx dest;
24197 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
24198
24199 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
24200 {
24201 char buf[30];
24202 rtx lab, tmp1, tmp2, got;
24203
24204 lab = gen_label_rtx ();
24205 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
24206 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
24207 if (flag_pic == 2)
24208 {
24209 got = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
24210 need_toc_init = 1;
24211 }
24212 else
24213 got = rs6000_got_sym ();
24214 tmp1 = tmp2 = dest;
24215 if (!fromprolog)
24216 {
24217 tmp1 = gen_reg_rtx (Pmode);
24218 tmp2 = gen_reg_rtx (Pmode);
24219 }
24220 emit_insn (gen_load_toc_v4_PIC_1 (lab));
24221 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
24222 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
24223 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
24224 }
24225 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
24226 {
24227 emit_insn (gen_load_toc_v4_pic_si ());
24228 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
24229 }
24230 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
24231 {
24232 char buf[30];
24233 rtx temp0 = (fromprolog
24234 ? gen_rtx_REG (Pmode, 0)
24235 : gen_reg_rtx (Pmode));
24236
24237 if (fromprolog)
24238 {
24239 rtx symF, symL;
24240
24241 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
24242 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
24243
24244 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
24245 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
24246
24247 emit_insn (gen_load_toc_v4_PIC_1 (symF));
24248 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
24249 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
24250 }
24251 else
24252 {
24253 rtx tocsym, lab;
24254
24255 tocsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
24256 need_toc_init = 1;
24257 lab = gen_label_rtx ();
24258 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
24259 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
24260 if (TARGET_LINK_STACK)
24261 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
24262 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
24263 }
24264 emit_insn (gen_addsi3 (dest, temp0, dest));
24265 }
24266 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
24267 {
24268 /* This is for AIX code running in non-PIC ELF32. */
24269 rtx realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
24270
24271 need_toc_init = 1;
24272 emit_insn (gen_elf_high (dest, realsym));
24273 emit_insn (gen_elf_low (dest, dest, realsym));
24274 }
24275 else
24276 {
24277 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
24278
24279 if (TARGET_32BIT)
24280 emit_insn (gen_load_toc_aix_si (dest));
24281 else
24282 emit_insn (gen_load_toc_aix_di (dest));
24283 }
24284 }
24285
24286 /* Emit instructions to restore the link register after determining where
24287 its value has been stored. */
24288
24289 void
24290 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
24291 {
24292 rs6000_stack_t *info = rs6000_stack_info ();
24293 rtx operands[2];
24294
24295 operands[0] = source;
24296 operands[1] = scratch;
24297
24298 if (info->lr_save_p)
24299 {
24300 rtx frame_rtx = stack_pointer_rtx;
24301 HOST_WIDE_INT sp_offset = 0;
24302 rtx tmp;
24303
24304 if (frame_pointer_needed
24305 || cfun->calls_alloca
24306 || info->total_size > 32767)
24307 {
24308 tmp = gen_frame_mem (Pmode, frame_rtx);
24309 emit_move_insn (operands[1], tmp);
24310 frame_rtx = operands[1];
24311 }
24312 else if (info->push_p)
24313 sp_offset = info->total_size;
24314
24315 tmp = plus_constant (Pmode, frame_rtx,
24316 info->lr_save_offset + sp_offset);
24317 tmp = gen_frame_mem (Pmode, tmp);
24318 emit_move_insn (tmp, operands[0]);
24319 }
24320 else
24321 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
24322
24323 /* Freeze lr_save_p. We've just emitted rtl that depends on the
24324 state of lr_save_p so any change from here on would be a bug. In
24325 particular, stop rs6000_ra_ever_killed from considering the SET
24326 of lr we may have added just above. */
24327 cfun->machine->lr_save_state = info->lr_save_p + 1;
24328 }
24329
24330 static GTY(()) alias_set_type set = -1;
24331
24332 alias_set_type
24333 get_TOC_alias_set (void)
24334 {
24335 if (set == -1)
24336 set = new_alias_set ();
24337 return set;
24338 }
24339
24340 /* This returns nonzero if the current function uses the TOC. This is
24341 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
24342 is generated by the ABI_V4 load_toc_* patterns.
24343 Return 2 instead of 1 if the load_toc_* pattern is in the function
24344 partition that doesn't start the function. */
24345 #if TARGET_ELF
24346 static int
24347 uses_TOC (void)
24348 {
24349 rtx_insn *insn;
24350 int ret = 1;
24351
24352 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24353 {
24354 if (INSN_P (insn))
24355 {
24356 rtx pat = PATTERN (insn);
24357 int i;
24358
24359 if (GET_CODE (pat) == PARALLEL)
24360 for (i = 0; i < XVECLEN (pat, 0); i++)
24361 {
24362 rtx sub = XVECEXP (pat, 0, i);
24363 if (GET_CODE (sub) == USE)
24364 {
24365 sub = XEXP (sub, 0);
24366 if (GET_CODE (sub) == UNSPEC
24367 && XINT (sub, 1) == UNSPEC_TOC)
24368 return ret;
24369 }
24370 }
24371 }
24372 else if (crtl->has_bb_partition
24373 && NOTE_P (insn)
24374 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
24375 ret = 2;
24376 }
24377 return 0;
24378 }
24379 #endif
24380
24381 rtx
24382 create_TOC_reference (rtx symbol, rtx largetoc_reg)
24383 {
24384 rtx tocrel, tocreg, hi;
24385
24386 if (TARGET_DEBUG_ADDR)
24387 {
24388 if (GET_CODE (symbol) == SYMBOL_REF)
24389 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
24390 XSTR (symbol, 0));
24391 else
24392 {
24393 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
24394 GET_RTX_NAME (GET_CODE (symbol)));
24395 debug_rtx (symbol);
24396 }
24397 }
24398
24399 if (!can_create_pseudo_p ())
24400 df_set_regs_ever_live (TOC_REGISTER, true);
24401
24402 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
24403 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
24404 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
24405 return tocrel;
24406
24407 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
24408 if (largetoc_reg != NULL)
24409 {
24410 emit_move_insn (largetoc_reg, hi);
24411 hi = largetoc_reg;
24412 }
24413 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
24414 }
24415
24416 /* Issue assembly directives that create a reference to the given DWARF
24417 FRAME_TABLE_LABEL from the current function section. */
24418 void
24419 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
24420 {
24421 fprintf (asm_out_file, "\t.ref %s\n",
24422 (* targetm.strip_name_encoding) (frame_table_label));
24423 }
24424 \f
24425 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
24426 and the change to the stack pointer. */
24427
24428 static void
24429 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
24430 {
24431 rtvec p;
24432 int i;
24433 rtx regs[3];
24434
24435 i = 0;
24436 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
24437 if (hard_frame_needed)
24438 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
24439 if (!(REGNO (fp) == STACK_POINTER_REGNUM
24440 || (hard_frame_needed
24441 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
24442 regs[i++] = fp;
24443
24444 p = rtvec_alloc (i);
24445 while (--i >= 0)
24446 {
24447 rtx mem = gen_frame_mem (BLKmode, regs[i]);
24448 RTVEC_ELT (p, i) = gen_rtx_SET (mem, const0_rtx);
24449 }
24450
24451 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
24452 }
24453
24454 /* Allocate SIZE_INT bytes on the stack using a store with update style insn
24455 and set the appropriate attributes for the generated insn. Return the
24456 first insn which adjusts the stack pointer or the last insn before
24457 the stack adjustment loop.
24458
24459 SIZE_INT is used to create the CFI note for the allocation.
24460
24461 SIZE_RTX is an rtx containing the size of the adjustment. Note that
24462 since stacks grow to lower addresses its runtime value is -SIZE_INT.
24463
24464 ORIG_SP contains the backchain value that must be stored at *sp. */
24465
24466 static rtx_insn *
24467 rs6000_emit_allocate_stack_1 (HOST_WIDE_INT size_int, rtx orig_sp)
24468 {
24469 rtx_insn *insn;
24470
24471 rtx size_rtx = GEN_INT (-size_int);
24472 if (size_int > 32767)
24473 {
24474 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
24475 /* Need a note here so that try_split doesn't get confused. */
24476 if (get_last_insn () == NULL_RTX)
24477 emit_note (NOTE_INSN_DELETED);
24478 insn = emit_move_insn (tmp_reg, size_rtx);
24479 try_split (PATTERN (insn), insn, 0);
24480 size_rtx = tmp_reg;
24481 }
24482
24483 if (Pmode == SImode)
24484 insn = emit_insn (gen_movsi_update_stack (stack_pointer_rtx,
24485 stack_pointer_rtx,
24486 size_rtx,
24487 orig_sp));
24488 else
24489 insn = emit_insn (gen_movdi_di_update_stack (stack_pointer_rtx,
24490 stack_pointer_rtx,
24491 size_rtx,
24492 orig_sp));
24493 rtx par = PATTERN (insn);
24494 gcc_assert (GET_CODE (par) == PARALLEL);
24495 rtx set = XVECEXP (par, 0, 0);
24496 gcc_assert (GET_CODE (set) == SET);
24497 rtx mem = SET_DEST (set);
24498 gcc_assert (MEM_P (mem));
24499 MEM_NOTRAP_P (mem) = 1;
24500 set_mem_alias_set (mem, get_frame_alias_set ());
24501
24502 RTX_FRAME_RELATED_P (insn) = 1;
24503 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
24504 gen_rtx_SET (stack_pointer_rtx,
24505 gen_rtx_PLUS (Pmode,
24506 stack_pointer_rtx,
24507 GEN_INT (-size_int))));
24508
24509 /* Emit a blockage to ensure the allocation/probing insns are
24510 not optimized, combined, removed, etc. Add REG_STACK_CHECK
24511 note for similar reasons. */
24512 if (flag_stack_clash_protection)
24513 {
24514 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
24515 emit_insn (gen_blockage ());
24516 }
24517
24518 return insn;
24519 }
24520
24521 static HOST_WIDE_INT
24522 get_stack_clash_protection_probe_interval (void)
24523 {
24524 return (HOST_WIDE_INT_1U
24525 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL));
24526 }
24527
24528 static HOST_WIDE_INT
24529 get_stack_clash_protection_guard_size (void)
24530 {
24531 return (HOST_WIDE_INT_1U
24532 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE));
24533 }
24534
24535 /* Allocate ORIG_SIZE bytes on the stack and probe the newly
24536 allocated space every STACK_CLASH_PROTECTION_PROBE_INTERVAL bytes.
24537
24538 COPY_REG, if non-null, should contain a copy of the original
24539 stack pointer at exit from this function.
24540
24541 This is subtly different than the Ada probing in that it tries hard to
24542 prevent attacks that jump the stack guard. Thus it is never allowed to
24543 allocate more than STACK_CLASH_PROTECTION_PROBE_INTERVAL bytes of stack
24544 space without a suitable probe. */
24545 static rtx_insn *
24546 rs6000_emit_probe_stack_range_stack_clash (HOST_WIDE_INT orig_size,
24547 rtx copy_reg)
24548 {
24549 rtx orig_sp = copy_reg;
24550
24551 HOST_WIDE_INT probe_interval = get_stack_clash_protection_probe_interval ();
24552
24553 /* Round the size down to a multiple of PROBE_INTERVAL. */
24554 HOST_WIDE_INT rounded_size = ROUND_DOWN (orig_size, probe_interval);
24555
24556 /* If explicitly requested,
24557 or the rounded size is not the same as the original size
24558 or the the rounded size is greater than a page,
24559 then we will need a copy of the original stack pointer. */
24560 if (rounded_size != orig_size
24561 || rounded_size > probe_interval
24562 || copy_reg)
24563 {
24564 /* If the caller did not request a copy of the incoming stack
24565 pointer, then we use r0 to hold the copy. */
24566 if (!copy_reg)
24567 orig_sp = gen_rtx_REG (Pmode, 0);
24568 emit_move_insn (orig_sp, stack_pointer_rtx);
24569 }
24570
24571 /* There's three cases here.
24572
24573 One is a single probe which is the most common and most efficiently
24574 implemented as it does not have to have a copy of the original
24575 stack pointer if there are no residuals.
24576
24577 Second is unrolled allocation/probes which we use if there's just
24578 a few of them. It needs to save the original stack pointer into a
24579 temporary for use as a source register in the allocation/probe.
24580
24581 Last is a loop. This is the most uncommon case and least efficient. */
24582 rtx_insn *retval = NULL;
24583 if (rounded_size == probe_interval)
24584 {
24585 retval = rs6000_emit_allocate_stack_1 (probe_interval, stack_pointer_rtx);
24586
24587 dump_stack_clash_frame_info (PROBE_INLINE, rounded_size != orig_size);
24588 }
24589 else if (rounded_size <= 8 * probe_interval)
24590 {
24591 /* The ABI requires using the store with update insns to allocate
24592 space and store the backchain into the stack
24593
24594 So we save the current stack pointer into a temporary, then
24595 emit the store-with-update insns to store the saved stack pointer
24596 into the right location in each new page. */
24597 for (int i = 0; i < rounded_size; i += probe_interval)
24598 {
24599 rtx_insn *insn
24600 = rs6000_emit_allocate_stack_1 (probe_interval, orig_sp);
24601
24602 /* Save the first stack adjustment in RETVAL. */
24603 if (i == 0)
24604 retval = insn;
24605 }
24606
24607 dump_stack_clash_frame_info (PROBE_INLINE, rounded_size != orig_size);
24608 }
24609 else
24610 {
24611 /* Compute the ending address. */
24612 rtx end_addr
24613 = copy_reg ? gen_rtx_REG (Pmode, 0) : gen_rtx_REG (Pmode, 12);
24614 rtx rs = GEN_INT (-rounded_size);
24615 rtx_insn *insn;
24616 if (add_operand (rs, Pmode))
24617 insn = emit_insn (gen_add3_insn (end_addr, stack_pointer_rtx, rs));
24618 else
24619 {
24620 emit_move_insn (end_addr, GEN_INT (-rounded_size));
24621 insn = emit_insn (gen_add3_insn (end_addr, end_addr,
24622 stack_pointer_rtx));
24623 /* Describe the effect of INSN to the CFI engine. */
24624 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
24625 gen_rtx_SET (end_addr,
24626 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
24627 rs)));
24628 }
24629 RTX_FRAME_RELATED_P (insn) = 1;
24630
24631 /* Emit the loop. */
24632 if (TARGET_64BIT)
24633 retval = emit_insn (gen_probe_stack_rangedi (stack_pointer_rtx,
24634 stack_pointer_rtx, orig_sp,
24635 end_addr));
24636 else
24637 retval = emit_insn (gen_probe_stack_rangesi (stack_pointer_rtx,
24638 stack_pointer_rtx, orig_sp,
24639 end_addr));
24640 RTX_FRAME_RELATED_P (retval) = 1;
24641 /* Describe the effect of INSN to the CFI engine. */
24642 add_reg_note (retval, REG_FRAME_RELATED_EXPR,
24643 gen_rtx_SET (stack_pointer_rtx, end_addr));
24644
24645 /* Emit a blockage to ensure the allocation/probing insns are
24646 not optimized, combined, removed, etc. Other cases handle this
24647 within their call to rs6000_emit_allocate_stack_1. */
24648 emit_insn (gen_blockage ());
24649
24650 dump_stack_clash_frame_info (PROBE_LOOP, rounded_size != orig_size);
24651 }
24652
24653 if (orig_size != rounded_size)
24654 {
24655 /* Allocate (and implicitly probe) any residual space. */
24656 HOST_WIDE_INT residual = orig_size - rounded_size;
24657
24658 rtx_insn *insn = rs6000_emit_allocate_stack_1 (residual, orig_sp);
24659
24660 /* If the residual was the only allocation, then we can return the
24661 allocating insn. */
24662 if (!retval)
24663 retval = insn;
24664 }
24665
24666 return retval;
24667 }
24668
24669 /* Emit the correct code for allocating stack space, as insns.
24670 If COPY_REG, make sure a copy of the old frame is left there.
24671 The generated code may use hard register 0 as a temporary. */
24672
24673 static rtx_insn *
24674 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
24675 {
24676 rtx_insn *insn;
24677 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
24678 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
24679 rtx todec = gen_int_mode (-size, Pmode);
24680
24681 if (INTVAL (todec) != -size)
24682 {
24683 warning (0, "stack frame too large");
24684 emit_insn (gen_trap ());
24685 return 0;
24686 }
24687
24688 if (crtl->limit_stack)
24689 {
24690 if (REG_P (stack_limit_rtx)
24691 && REGNO (stack_limit_rtx) > 1
24692 && REGNO (stack_limit_rtx) <= 31)
24693 {
24694 rtx_insn *insn
24695 = gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size));
24696 gcc_assert (insn);
24697 emit_insn (insn);
24698 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg, const0_rtx));
24699 }
24700 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
24701 && TARGET_32BIT
24702 && DEFAULT_ABI == ABI_V4
24703 && !flag_pic)
24704 {
24705 rtx toload = gen_rtx_CONST (VOIDmode,
24706 gen_rtx_PLUS (Pmode,
24707 stack_limit_rtx,
24708 GEN_INT (size)));
24709
24710 emit_insn (gen_elf_high (tmp_reg, toload));
24711 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
24712 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
24713 const0_rtx));
24714 }
24715 else
24716 warning (0, "stack limit expression is not supported");
24717 }
24718
24719 if (flag_stack_clash_protection)
24720 {
24721 if (size < get_stack_clash_protection_guard_size ())
24722 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
24723 else
24724 {
24725 rtx_insn *insn = rs6000_emit_probe_stack_range_stack_clash (size,
24726 copy_reg);
24727
24728 /* If we asked for a copy with an offset, then we still need add in
24729 the offset. */
24730 if (copy_reg && copy_off)
24731 emit_insn (gen_add3_insn (copy_reg, copy_reg, GEN_INT (copy_off)));
24732 return insn;
24733 }
24734 }
24735
24736 if (copy_reg)
24737 {
24738 if (copy_off != 0)
24739 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
24740 else
24741 emit_move_insn (copy_reg, stack_reg);
24742 }
24743
24744 /* Since we didn't use gen_frame_mem to generate the MEM, grab
24745 it now and set the alias set/attributes. The above gen_*_update
24746 calls will generate a PARALLEL with the MEM set being the first
24747 operation. */
24748 insn = rs6000_emit_allocate_stack_1 (size, stack_reg);
24749 return insn;
24750 }
24751
24752 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
24753
24754 #if PROBE_INTERVAL > 32768
24755 #error Cannot use indexed addressing mode for stack probing
24756 #endif
24757
24758 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
24759 inclusive. These are offsets from the current stack pointer. */
24760
24761 static void
24762 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
24763 {
24764 /* See if we have a constant small number of probes to generate. If so,
24765 that's the easy case. */
24766 if (first + size <= 32768)
24767 {
24768 HOST_WIDE_INT i;
24769
24770 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
24771 it exceeds SIZE. If only one probe is needed, this will not
24772 generate any code. Then probe at FIRST + SIZE. */
24773 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
24774 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
24775 -(first + i)));
24776
24777 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
24778 -(first + size)));
24779 }
24780
24781 /* Otherwise, do the same as above, but in a loop. Note that we must be
24782 extra careful with variables wrapping around because we might be at
24783 the very top (or the very bottom) of the address space and we have
24784 to be able to handle this case properly; in particular, we use an
24785 equality test for the loop condition. */
24786 else
24787 {
24788 HOST_WIDE_INT rounded_size;
24789 rtx r12 = gen_rtx_REG (Pmode, 12);
24790 rtx r0 = gen_rtx_REG (Pmode, 0);
24791
24792 /* Sanity check for the addressing mode we're going to use. */
24793 gcc_assert (first <= 32768);
24794
24795 /* Step 1: round SIZE to the previous multiple of the interval. */
24796
24797 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
24798
24799
24800 /* Step 2: compute initial and final value of the loop counter. */
24801
24802 /* TEST_ADDR = SP + FIRST. */
24803 emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, stack_pointer_rtx,
24804 -first)));
24805
24806 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
24807 if (rounded_size > 32768)
24808 {
24809 emit_move_insn (r0, GEN_INT (-rounded_size));
24810 emit_insn (gen_rtx_SET (r0, gen_rtx_PLUS (Pmode, r12, r0)));
24811 }
24812 else
24813 emit_insn (gen_rtx_SET (r0, plus_constant (Pmode, r12,
24814 -rounded_size)));
24815
24816
24817 /* Step 3: the loop
24818
24819 do
24820 {
24821 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
24822 probe at TEST_ADDR
24823 }
24824 while (TEST_ADDR != LAST_ADDR)
24825
24826 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
24827 until it is equal to ROUNDED_SIZE. */
24828
24829 if (TARGET_64BIT)
24830 emit_insn (gen_probe_stack_rangedi (r12, r12, stack_pointer_rtx, r0));
24831 else
24832 emit_insn (gen_probe_stack_rangesi (r12, r12, stack_pointer_rtx, r0));
24833
24834
24835 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
24836 that SIZE is equal to ROUNDED_SIZE. */
24837
24838 if (size != rounded_size)
24839 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
24840 }
24841 }
24842
24843 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
24844 addresses, not offsets. */
24845
24846 static const char *
24847 output_probe_stack_range_1 (rtx reg1, rtx reg2)
24848 {
24849 static int labelno = 0;
24850 char loop_lab[32];
24851 rtx xops[2];
24852
24853 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
24854
24855 /* Loop. */
24856 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
24857
24858 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
24859 xops[0] = reg1;
24860 xops[1] = GEN_INT (-PROBE_INTERVAL);
24861 output_asm_insn ("addi %0,%0,%1", xops);
24862
24863 /* Probe at TEST_ADDR. */
24864 xops[1] = gen_rtx_REG (Pmode, 0);
24865 output_asm_insn ("stw %1,0(%0)", xops);
24866
24867 /* Test if TEST_ADDR == LAST_ADDR. */
24868 xops[1] = reg2;
24869 if (TARGET_64BIT)
24870 output_asm_insn ("cmpd 0,%0,%1", xops);
24871 else
24872 output_asm_insn ("cmpw 0,%0,%1", xops);
24873
24874 /* Branch. */
24875 fputs ("\tbne 0,", asm_out_file);
24876 assemble_name_raw (asm_out_file, loop_lab);
24877 fputc ('\n', asm_out_file);
24878
24879 return "";
24880 }
24881
24882 /* This function is called when rs6000_frame_related is processing
24883 SETs within a PARALLEL, and returns whether the REGNO save ought to
24884 be marked RTX_FRAME_RELATED_P. The PARALLELs involved are those
24885 for out-of-line register save functions, store multiple, and the
24886 Darwin world_save. They may contain registers that don't really
24887 need saving. */
24888
24889 static bool
24890 interesting_frame_related_regno (unsigned int regno)
24891 {
24892 /* Saves apparently of r0 are actually saving LR. It doesn't make
24893 sense to substitute the regno here to test save_reg_p (LR_REGNO).
24894 We *know* LR needs saving, and dwarf2cfi.c is able to deduce that
24895 (set (mem) (r0)) is saving LR from a prior (set (r0) (lr)) marked
24896 as frame related. */
24897 if (regno == 0)
24898 return true;
24899 /* If we see CR2 then we are here on a Darwin world save. Saves of
24900 CR2 signify the whole CR is being saved. This is a long-standing
24901 ABI wart fixed by ELFv2. As for r0/lr there is no need to check
24902 that CR needs to be saved. */
24903 if (regno == CR2_REGNO)
24904 return true;
24905 /* Omit frame info for any user-defined global regs. If frame info
24906 is supplied for them, frame unwinding will restore a user reg.
24907 Also omit frame info for any reg we don't need to save, as that
24908 bloats frame info and can cause problems with shrink wrapping.
24909 Since global regs won't be seen as needing to be saved, both of
24910 these conditions are covered by save_reg_p. */
24911 return save_reg_p (regno);
24912 }
24913
24914 /* Probe a range of stack addresses from REG1 to REG3 inclusive. These are
24915 addresses, not offsets.
24916
24917 REG2 contains the backchain that must be stored into *sp at each allocation.
24918
24919 This is subtly different than the Ada probing above in that it tries hard
24920 to prevent attacks that jump the stack guard. Thus, it is never allowed
24921 to allocate more than PROBE_INTERVAL bytes of stack space without a
24922 suitable probe. */
24923
24924 static const char *
24925 output_probe_stack_range_stack_clash (rtx reg1, rtx reg2, rtx reg3)
24926 {
24927 static int labelno = 0;
24928 char loop_lab[32];
24929 rtx xops[3];
24930
24931 HOST_WIDE_INT probe_interval = get_stack_clash_protection_probe_interval ();
24932
24933 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
24934
24935 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
24936
24937 /* This allocates and probes. */
24938 xops[0] = reg1;
24939 xops[1] = reg2;
24940 xops[2] = GEN_INT (-probe_interval);
24941 if (TARGET_64BIT)
24942 output_asm_insn ("stdu %1,%2(%0)", xops);
24943 else
24944 output_asm_insn ("stwu %1,%2(%0)", xops);
24945
24946 /* Jump to LOOP_LAB if TEST_ADDR != LAST_ADDR. */
24947 xops[0] = reg1;
24948 xops[1] = reg3;
24949 if (TARGET_64BIT)
24950 output_asm_insn ("cmpd 0,%0,%1", xops);
24951 else
24952 output_asm_insn ("cmpw 0,%0,%1", xops);
24953
24954 fputs ("\tbne 0,", asm_out_file);
24955 assemble_name_raw (asm_out_file, loop_lab);
24956 fputc ('\n', asm_out_file);
24957
24958 return "";
24959 }
24960
24961 /* Wrapper around the output_probe_stack_range routines. */
24962 const char *
24963 output_probe_stack_range (rtx reg1, rtx reg2, rtx reg3)
24964 {
24965 if (flag_stack_clash_protection)
24966 return output_probe_stack_range_stack_clash (reg1, reg2, reg3);
24967 else
24968 return output_probe_stack_range_1 (reg1, reg3);
24969 }
24970
24971 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
24972 with (plus:P (reg 1) VAL), and with REG2 replaced with REPL2 if REG2
24973 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
24974 deduce these equivalences by itself so it wasn't necessary to hold
24975 its hand so much. Don't be tempted to always supply d2_f_d_e with
24976 the actual cfa register, ie. r31 when we are using a hard frame
24977 pointer. That fails when saving regs off r1, and sched moves the
24978 r31 setup past the reg saves. */
24979
24980 static rtx_insn *
24981 rs6000_frame_related (rtx_insn *insn, rtx reg, HOST_WIDE_INT val,
24982 rtx reg2, rtx repl2)
24983 {
24984 rtx repl;
24985
24986 if (REGNO (reg) == STACK_POINTER_REGNUM)
24987 {
24988 gcc_checking_assert (val == 0);
24989 repl = NULL_RTX;
24990 }
24991 else
24992 repl = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
24993 GEN_INT (val));
24994
24995 rtx pat = PATTERN (insn);
24996 if (!repl && !reg2)
24997 {
24998 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
24999 if (GET_CODE (pat) == PARALLEL)
25000 for (int i = 0; i < XVECLEN (pat, 0); i++)
25001 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25002 {
25003 rtx set = XVECEXP (pat, 0, i);
25004
25005 if (!REG_P (SET_SRC (set))
25006 || interesting_frame_related_regno (REGNO (SET_SRC (set))))
25007 RTX_FRAME_RELATED_P (set) = 1;
25008 }
25009 RTX_FRAME_RELATED_P (insn) = 1;
25010 return insn;
25011 }
25012
25013 /* We expect that 'pat' is either a SET or a PARALLEL containing
25014 SETs (and possibly other stuff). In a PARALLEL, all the SETs
25015 are important so they all have to be marked RTX_FRAME_RELATED_P.
25016 Call simplify_replace_rtx on the SETs rather than the whole insn
25017 so as to leave the other stuff alone (for example USE of r12). */
25018
25019 set_used_flags (pat);
25020 if (GET_CODE (pat) == SET)
25021 {
25022 if (repl)
25023 pat = simplify_replace_rtx (pat, reg, repl);
25024 if (reg2)
25025 pat = simplify_replace_rtx (pat, reg2, repl2);
25026 }
25027 else if (GET_CODE (pat) == PARALLEL)
25028 {
25029 pat = shallow_copy_rtx (pat);
25030 XVEC (pat, 0) = shallow_copy_rtvec (XVEC (pat, 0));
25031
25032 for (int i = 0; i < XVECLEN (pat, 0); i++)
25033 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25034 {
25035 rtx set = XVECEXP (pat, 0, i);
25036
25037 if (repl)
25038 set = simplify_replace_rtx (set, reg, repl);
25039 if (reg2)
25040 set = simplify_replace_rtx (set, reg2, repl2);
25041 XVECEXP (pat, 0, i) = set;
25042
25043 if (!REG_P (SET_SRC (set))
25044 || interesting_frame_related_regno (REGNO (SET_SRC (set))))
25045 RTX_FRAME_RELATED_P (set) = 1;
25046 }
25047 }
25048 else
25049 gcc_unreachable ();
25050
25051 RTX_FRAME_RELATED_P (insn) = 1;
25052 add_reg_note (insn, REG_FRAME_RELATED_EXPR, copy_rtx_if_shared (pat));
25053
25054 return insn;
25055 }
25056
25057 /* Returns an insn that has a vrsave set operation with the
25058 appropriate CLOBBERs. */
25059
25060 static rtx
25061 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
25062 {
25063 int nclobs, i;
25064 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
25065 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
25066
25067 clobs[0]
25068 = gen_rtx_SET (vrsave,
25069 gen_rtx_UNSPEC_VOLATILE (SImode,
25070 gen_rtvec (2, reg, vrsave),
25071 UNSPECV_SET_VRSAVE));
25072
25073 nclobs = 1;
25074
25075 /* We need to clobber the registers in the mask so the scheduler
25076 does not move sets to VRSAVE before sets of AltiVec registers.
25077
25078 However, if the function receives nonlocal gotos, reload will set
25079 all call saved registers live. We will end up with:
25080
25081 (set (reg 999) (mem))
25082 (parallel [ (set (reg vrsave) (unspec blah))
25083 (clobber (reg 999))])
25084
25085 The clobber will cause the store into reg 999 to be dead, and
25086 flow will attempt to delete an epilogue insn. In this case, we
25087 need an unspec use/set of the register. */
25088
25089 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
25090 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
25091 {
25092 if (!epiloguep || call_used_regs [i])
25093 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
25094 gen_rtx_REG (V4SImode, i));
25095 else
25096 {
25097 rtx reg = gen_rtx_REG (V4SImode, i);
25098
25099 clobs[nclobs++]
25100 = gen_rtx_SET (reg,
25101 gen_rtx_UNSPEC (V4SImode,
25102 gen_rtvec (1, reg), 27));
25103 }
25104 }
25105
25106 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
25107
25108 for (i = 0; i < nclobs; ++i)
25109 XVECEXP (insn, 0, i) = clobs[i];
25110
25111 return insn;
25112 }
25113
25114 static rtx
25115 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
25116 {
25117 rtx addr, mem;
25118
25119 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
25120 mem = gen_frame_mem (GET_MODE (reg), addr);
25121 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
25122 }
25123
25124 static rtx
25125 gen_frame_load (rtx reg, rtx frame_reg, int offset)
25126 {
25127 return gen_frame_set (reg, frame_reg, offset, false);
25128 }
25129
25130 static rtx
25131 gen_frame_store (rtx reg, rtx frame_reg, int offset)
25132 {
25133 return gen_frame_set (reg, frame_reg, offset, true);
25134 }
25135
25136 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
25137 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
25138
25139 static rtx_insn *
25140 emit_frame_save (rtx frame_reg, machine_mode mode,
25141 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
25142 {
25143 rtx reg;
25144
25145 /* Some cases that need register indexed addressing. */
25146 gcc_checking_assert (!(TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
25147 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode)));
25148
25149 reg = gen_rtx_REG (mode, regno);
25150 rtx_insn *insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
25151 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
25152 NULL_RTX, NULL_RTX);
25153 }
25154
25155 /* Emit an offset memory reference suitable for a frame store, while
25156 converting to a valid addressing mode. */
25157
25158 static rtx
25159 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
25160 {
25161 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, GEN_INT (offset)));
25162 }
25163
25164 #ifndef TARGET_FIX_AND_CONTINUE
25165 #define TARGET_FIX_AND_CONTINUE 0
25166 #endif
25167
25168 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
25169 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
25170 #define LAST_SAVRES_REGISTER 31
25171 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
25172
25173 enum {
25174 SAVRES_LR = 0x1,
25175 SAVRES_SAVE = 0x2,
25176 SAVRES_REG = 0x0c,
25177 SAVRES_GPR = 0,
25178 SAVRES_FPR = 4,
25179 SAVRES_VR = 8
25180 };
25181
25182 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
25183
25184 /* Temporary holding space for an out-of-line register save/restore
25185 routine name. */
25186 static char savres_routine_name[30];
25187
25188 /* Return the name for an out-of-line register save/restore routine.
25189 We are saving/restoring GPRs if GPR is true. */
25190
25191 static char *
25192 rs6000_savres_routine_name (int regno, int sel)
25193 {
25194 const char *prefix = "";
25195 const char *suffix = "";
25196
25197 /* Different targets are supposed to define
25198 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
25199 routine name could be defined with:
25200
25201 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
25202
25203 This is a nice idea in practice, but in reality, things are
25204 complicated in several ways:
25205
25206 - ELF targets have save/restore routines for GPRs.
25207
25208 - PPC64 ELF targets have routines for save/restore of GPRs that
25209 differ in what they do with the link register, so having a set
25210 prefix doesn't work. (We only use one of the save routines at
25211 the moment, though.)
25212
25213 - PPC32 elf targets have "exit" versions of the restore routines
25214 that restore the link register and can save some extra space.
25215 These require an extra suffix. (There are also "tail" versions
25216 of the restore routines and "GOT" versions of the save routines,
25217 but we don't generate those at present. Same problems apply,
25218 though.)
25219
25220 We deal with all this by synthesizing our own prefix/suffix and
25221 using that for the simple sprintf call shown above. */
25222 if (DEFAULT_ABI == ABI_V4)
25223 {
25224 if (TARGET_64BIT)
25225 goto aix_names;
25226
25227 if ((sel & SAVRES_REG) == SAVRES_GPR)
25228 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
25229 else if ((sel & SAVRES_REG) == SAVRES_FPR)
25230 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
25231 else if ((sel & SAVRES_REG) == SAVRES_VR)
25232 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
25233 else
25234 abort ();
25235
25236 if ((sel & SAVRES_LR))
25237 suffix = "_x";
25238 }
25239 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25240 {
25241 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
25242 /* No out-of-line save/restore routines for GPRs on AIX. */
25243 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
25244 #endif
25245
25246 aix_names:
25247 if ((sel & SAVRES_REG) == SAVRES_GPR)
25248 prefix = ((sel & SAVRES_SAVE)
25249 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
25250 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
25251 else if ((sel & SAVRES_REG) == SAVRES_FPR)
25252 {
25253 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
25254 if ((sel & SAVRES_LR))
25255 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
25256 else
25257 #endif
25258 {
25259 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
25260 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
25261 }
25262 }
25263 else if ((sel & SAVRES_REG) == SAVRES_VR)
25264 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
25265 else
25266 abort ();
25267 }
25268
25269 if (DEFAULT_ABI == ABI_DARWIN)
25270 {
25271 /* The Darwin approach is (slightly) different, in order to be
25272 compatible with code generated by the system toolchain. There is a
25273 single symbol for the start of save sequence, and the code here
25274 embeds an offset into that code on the basis of the first register
25275 to be saved. */
25276 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
25277 if ((sel & SAVRES_REG) == SAVRES_GPR)
25278 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
25279 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
25280 (regno - 13) * 4, prefix, regno);
25281 else if ((sel & SAVRES_REG) == SAVRES_FPR)
25282 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
25283 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
25284 else if ((sel & SAVRES_REG) == SAVRES_VR)
25285 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
25286 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
25287 else
25288 abort ();
25289 }
25290 else
25291 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
25292
25293 return savres_routine_name;
25294 }
25295
25296 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
25297 We are saving/restoring GPRs if GPR is true. */
25298
25299 static rtx
25300 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
25301 {
25302 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
25303 ? info->first_gp_reg_save
25304 : (sel & SAVRES_REG) == SAVRES_FPR
25305 ? info->first_fp_reg_save - 32
25306 : (sel & SAVRES_REG) == SAVRES_VR
25307 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
25308 : -1);
25309 rtx sym;
25310 int select = sel;
25311
25312 /* Don't generate bogus routine names. */
25313 gcc_assert (FIRST_SAVRES_REGISTER <= regno
25314 && regno <= LAST_SAVRES_REGISTER
25315 && select >= 0 && select <= 12);
25316
25317 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
25318
25319 if (sym == NULL)
25320 {
25321 char *name;
25322
25323 name = rs6000_savres_routine_name (regno, sel);
25324
25325 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
25326 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
25327 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
25328 }
25329
25330 return sym;
25331 }
25332
25333 /* Emit a sequence of insns, including a stack tie if needed, for
25334 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
25335 reset the stack pointer, but move the base of the frame into
25336 reg UPDT_REGNO for use by out-of-line register restore routines. */
25337
25338 static rtx
25339 rs6000_emit_stack_reset (rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
25340 unsigned updt_regno)
25341 {
25342 /* If there is nothing to do, don't do anything. */
25343 if (frame_off == 0 && REGNO (frame_reg_rtx) == updt_regno)
25344 return NULL_RTX;
25345
25346 rtx updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
25347
25348 /* This blockage is needed so that sched doesn't decide to move
25349 the sp change before the register restores. */
25350 if (DEFAULT_ABI == ABI_V4)
25351 return emit_insn (gen_stack_restore_tie (updt_reg_rtx, frame_reg_rtx,
25352 GEN_INT (frame_off)));
25353
25354 /* If we are restoring registers out-of-line, we will be using the
25355 "exit" variants of the restore routines, which will reset the
25356 stack for us. But we do need to point updt_reg into the
25357 right place for those routines. */
25358 if (frame_off != 0)
25359 return emit_insn (gen_add3_insn (updt_reg_rtx,
25360 frame_reg_rtx, GEN_INT (frame_off)));
25361 else
25362 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
25363
25364 return NULL_RTX;
25365 }
25366
25367 /* Return the register number used as a pointer by out-of-line
25368 save/restore functions. */
25369
25370 static inline unsigned
25371 ptr_regno_for_savres (int sel)
25372 {
25373 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25374 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
25375 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
25376 }
25377
25378 /* Construct a parallel rtx describing the effect of a call to an
25379 out-of-line register save/restore routine, and emit the insn
25380 or jump_insn as appropriate. */
25381
25382 static rtx_insn *
25383 rs6000_emit_savres_rtx (rs6000_stack_t *info,
25384 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
25385 machine_mode reg_mode, int sel)
25386 {
25387 int i;
25388 int offset, start_reg, end_reg, n_regs, use_reg;
25389 int reg_size = GET_MODE_SIZE (reg_mode);
25390 rtx sym;
25391 rtvec p;
25392 rtx par;
25393 rtx_insn *insn;
25394
25395 offset = 0;
25396 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
25397 ? info->first_gp_reg_save
25398 : (sel & SAVRES_REG) == SAVRES_FPR
25399 ? info->first_fp_reg_save
25400 : (sel & SAVRES_REG) == SAVRES_VR
25401 ? info->first_altivec_reg_save
25402 : -1);
25403 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
25404 ? 32
25405 : (sel & SAVRES_REG) == SAVRES_FPR
25406 ? 64
25407 : (sel & SAVRES_REG) == SAVRES_VR
25408 ? LAST_ALTIVEC_REGNO + 1
25409 : -1);
25410 n_regs = end_reg - start_reg;
25411 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
25412 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
25413 + n_regs);
25414
25415 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
25416 RTVEC_ELT (p, offset++) = ret_rtx;
25417
25418 RTVEC_ELT (p, offset++)
25419 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
25420
25421 sym = rs6000_savres_routine_sym (info, sel);
25422 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
25423
25424 use_reg = ptr_regno_for_savres (sel);
25425 if ((sel & SAVRES_REG) == SAVRES_VR)
25426 {
25427 /* Vector regs are saved/restored using [reg+reg] addressing. */
25428 RTVEC_ELT (p, offset++)
25429 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
25430 RTVEC_ELT (p, offset++)
25431 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
25432 }
25433 else
25434 RTVEC_ELT (p, offset++)
25435 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
25436
25437 for (i = 0; i < end_reg - start_reg; i++)
25438 RTVEC_ELT (p, i + offset)
25439 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
25440 frame_reg_rtx, save_area_offset + reg_size * i,
25441 (sel & SAVRES_SAVE) != 0);
25442
25443 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
25444 RTVEC_ELT (p, i + offset)
25445 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
25446
25447 par = gen_rtx_PARALLEL (VOIDmode, p);
25448
25449 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
25450 {
25451 insn = emit_jump_insn (par);
25452 JUMP_LABEL (insn) = ret_rtx;
25453 }
25454 else
25455 insn = emit_insn (par);
25456 return insn;
25457 }
25458
25459 /* Emit prologue code to store CR fields that need to be saved into REG. This
25460 function should only be called when moving the non-volatile CRs to REG, it
25461 is not a general purpose routine to move the entire set of CRs to REG.
25462 Specifically, gen_prologue_movesi_from_cr() does not contain uses of the
25463 volatile CRs. */
25464
25465 static void
25466 rs6000_emit_prologue_move_from_cr (rtx reg)
25467 {
25468 /* Only the ELFv2 ABI allows storing only selected fields. */
25469 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
25470 {
25471 int i, cr_reg[8], count = 0;
25472
25473 /* Collect CR fields that must be saved. */
25474 for (i = 0; i < 8; i++)
25475 if (save_reg_p (CR0_REGNO + i))
25476 cr_reg[count++] = i;
25477
25478 /* If it's just a single one, use mfcrf. */
25479 if (count == 1)
25480 {
25481 rtvec p = rtvec_alloc (1);
25482 rtvec r = rtvec_alloc (2);
25483 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
25484 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
25485 RTVEC_ELT (p, 0)
25486 = gen_rtx_SET (reg,
25487 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
25488
25489 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
25490 return;
25491 }
25492
25493 /* ??? It might be better to handle count == 2 / 3 cases here
25494 as well, using logical operations to combine the values. */
25495 }
25496
25497 emit_insn (gen_prologue_movesi_from_cr (reg));
25498 }
25499
25500 /* Return whether the split-stack arg pointer (r12) is used. */
25501
25502 static bool
25503 split_stack_arg_pointer_used_p (void)
25504 {
25505 /* If the pseudo holding the arg pointer is no longer a pseudo,
25506 then the arg pointer is used. */
25507 if (cfun->machine->split_stack_arg_pointer != NULL_RTX
25508 && (!REG_P (cfun->machine->split_stack_arg_pointer)
25509 || (REGNO (cfun->machine->split_stack_arg_pointer)
25510 < FIRST_PSEUDO_REGISTER)))
25511 return true;
25512
25513 /* Unfortunately we also need to do some code scanning, since
25514 r12 may have been substituted for the pseudo. */
25515 rtx_insn *insn;
25516 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
25517 FOR_BB_INSNS (bb, insn)
25518 if (NONDEBUG_INSN_P (insn))
25519 {
25520 /* A call destroys r12. */
25521 if (CALL_P (insn))
25522 return false;
25523
25524 df_ref use;
25525 FOR_EACH_INSN_USE (use, insn)
25526 {
25527 rtx x = DF_REF_REG (use);
25528 if (REG_P (x) && REGNO (x) == 12)
25529 return true;
25530 }
25531 df_ref def;
25532 FOR_EACH_INSN_DEF (def, insn)
25533 {
25534 rtx x = DF_REF_REG (def);
25535 if (REG_P (x) && REGNO (x) == 12)
25536 return false;
25537 }
25538 }
25539 return bitmap_bit_p (DF_LR_OUT (bb), 12);
25540 }
25541
25542 /* Return whether we need to emit an ELFv2 global entry point prologue. */
25543
25544 static bool
25545 rs6000_global_entry_point_needed_p (void)
25546 {
25547 /* Only needed for the ELFv2 ABI. */
25548 if (DEFAULT_ABI != ABI_ELFv2)
25549 return false;
25550
25551 /* With -msingle-pic-base, we assume the whole program shares the same
25552 TOC, so no global entry point prologues are needed anywhere. */
25553 if (TARGET_SINGLE_PIC_BASE)
25554 return false;
25555
25556 /* Ensure we have a global entry point for thunks. ??? We could
25557 avoid that if the target routine doesn't need a global entry point,
25558 but we do not know whether this is the case at this point. */
25559 if (cfun->is_thunk)
25560 return true;
25561
25562 /* For regular functions, rs6000_emit_prologue sets this flag if the
25563 routine ever uses the TOC pointer. */
25564 return cfun->machine->r2_setup_needed;
25565 }
25566
25567 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
25568 static sbitmap
25569 rs6000_get_separate_components (void)
25570 {
25571 rs6000_stack_t *info = rs6000_stack_info ();
25572
25573 if (WORLD_SAVE_P (info))
25574 return NULL;
25575
25576 gcc_assert (!(info->savres_strategy & SAVE_MULTIPLE)
25577 && !(info->savres_strategy & REST_MULTIPLE));
25578
25579 /* Component 0 is the save/restore of LR (done via GPR0).
25580 Component 2 is the save of the TOC (GPR2).
25581 Components 13..31 are the save/restore of GPR13..GPR31.
25582 Components 46..63 are the save/restore of FPR14..FPR31. */
25583
25584 cfun->machine->n_components = 64;
25585
25586 sbitmap components = sbitmap_alloc (cfun->machine->n_components);
25587 bitmap_clear (components);
25588
25589 int reg_size = TARGET_32BIT ? 4 : 8;
25590 int fp_reg_size = 8;
25591
25592 /* The GPRs we need saved to the frame. */
25593 if ((info->savres_strategy & SAVE_INLINE_GPRS)
25594 && (info->savres_strategy & REST_INLINE_GPRS))
25595 {
25596 int offset = info->gp_save_offset;
25597 if (info->push_p)
25598 offset += info->total_size;
25599
25600 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
25601 {
25602 if (IN_RANGE (offset, -0x8000, 0x7fff)
25603 && save_reg_p (regno))
25604 bitmap_set_bit (components, regno);
25605
25606 offset += reg_size;
25607 }
25608 }
25609
25610 /* Don't mess with the hard frame pointer. */
25611 if (frame_pointer_needed)
25612 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
25613
25614 /* Don't mess with the fixed TOC register. */
25615 if ((TARGET_TOC && TARGET_MINIMAL_TOC)
25616 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
25617 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
25618 bitmap_clear_bit (components, RS6000_PIC_OFFSET_TABLE_REGNUM);
25619
25620 /* The FPRs we need saved to the frame. */
25621 if ((info->savres_strategy & SAVE_INLINE_FPRS)
25622 && (info->savres_strategy & REST_INLINE_FPRS))
25623 {
25624 int offset = info->fp_save_offset;
25625 if (info->push_p)
25626 offset += info->total_size;
25627
25628 for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++)
25629 {
25630 if (IN_RANGE (offset, -0x8000, 0x7fff) && save_reg_p (regno))
25631 bitmap_set_bit (components, regno);
25632
25633 offset += fp_reg_size;
25634 }
25635 }
25636
25637 /* Optimize LR save and restore if we can. This is component 0. Any
25638 out-of-line register save/restore routines need LR. */
25639 if (info->lr_save_p
25640 && !(flag_pic && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
25641 && (info->savres_strategy & SAVE_INLINE_GPRS)
25642 && (info->savres_strategy & REST_INLINE_GPRS)
25643 && (info->savres_strategy & SAVE_INLINE_FPRS)
25644 && (info->savres_strategy & REST_INLINE_FPRS)
25645 && (info->savres_strategy & SAVE_INLINE_VRS)
25646 && (info->savres_strategy & REST_INLINE_VRS))
25647 {
25648 int offset = info->lr_save_offset;
25649 if (info->push_p)
25650 offset += info->total_size;
25651 if (IN_RANGE (offset, -0x8000, 0x7fff))
25652 bitmap_set_bit (components, 0);
25653 }
25654
25655 /* Optimize saving the TOC. This is component 2. */
25656 if (cfun->machine->save_toc_in_prologue)
25657 bitmap_set_bit (components, 2);
25658
25659 return components;
25660 }
25661
25662 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
25663 static sbitmap
25664 rs6000_components_for_bb (basic_block bb)
25665 {
25666 rs6000_stack_t *info = rs6000_stack_info ();
25667
25668 bitmap in = DF_LIVE_IN (bb);
25669 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
25670 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
25671
25672 sbitmap components = sbitmap_alloc (cfun->machine->n_components);
25673 bitmap_clear (components);
25674
25675 /* A register is used in a bb if it is in the IN, GEN, or KILL sets. */
25676
25677 /* GPRs. */
25678 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
25679 if (bitmap_bit_p (in, regno)
25680 || bitmap_bit_p (gen, regno)
25681 || bitmap_bit_p (kill, regno))
25682 bitmap_set_bit (components, regno);
25683
25684 /* FPRs. */
25685 for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++)
25686 if (bitmap_bit_p (in, regno)
25687 || bitmap_bit_p (gen, regno)
25688 || bitmap_bit_p (kill, regno))
25689 bitmap_set_bit (components, regno);
25690
25691 /* The link register. */
25692 if (bitmap_bit_p (in, LR_REGNO)
25693 || bitmap_bit_p (gen, LR_REGNO)
25694 || bitmap_bit_p (kill, LR_REGNO))
25695 bitmap_set_bit (components, 0);
25696
25697 /* The TOC save. */
25698 if (bitmap_bit_p (in, TOC_REGNUM)
25699 || bitmap_bit_p (gen, TOC_REGNUM)
25700 || bitmap_bit_p (kill, TOC_REGNUM))
25701 bitmap_set_bit (components, 2);
25702
25703 return components;
25704 }
25705
25706 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
25707 static void
25708 rs6000_disqualify_components (sbitmap components, edge e,
25709 sbitmap edge_components, bool /*is_prologue*/)
25710 {
25711 /* Our LR pro/epilogue code moves LR via R0, so R0 had better not be
25712 live where we want to place that code. */
25713 if (bitmap_bit_p (edge_components, 0)
25714 && bitmap_bit_p (DF_LIVE_IN (e->dest), 0))
25715 {
25716 if (dump_file)
25717 fprintf (dump_file, "Disqualifying LR because GPR0 is live "
25718 "on entry to bb %d\n", e->dest->index);
25719 bitmap_clear_bit (components, 0);
25720 }
25721 }
25722
25723 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
25724 static void
25725 rs6000_emit_prologue_components (sbitmap components)
25726 {
25727 rs6000_stack_t *info = rs6000_stack_info ();
25728 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
25729 ? HARD_FRAME_POINTER_REGNUM
25730 : STACK_POINTER_REGNUM);
25731
25732 machine_mode reg_mode = Pmode;
25733 int reg_size = TARGET_32BIT ? 4 : 8;
25734 machine_mode fp_reg_mode = TARGET_HARD_FLOAT ? DFmode : SFmode;
25735 int fp_reg_size = 8;
25736
25737 /* Prologue for LR. */
25738 if (bitmap_bit_p (components, 0))
25739 {
25740 rtx lr = gen_rtx_REG (reg_mode, LR_REGNO);
25741 rtx reg = gen_rtx_REG (reg_mode, 0);
25742 rtx_insn *insn = emit_move_insn (reg, lr);
25743 RTX_FRAME_RELATED_P (insn) = 1;
25744 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (reg, lr));
25745
25746 int offset = info->lr_save_offset;
25747 if (info->push_p)
25748 offset += info->total_size;
25749
25750 insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
25751 RTX_FRAME_RELATED_P (insn) = 1;
25752 rtx mem = copy_rtx (SET_DEST (single_set (insn)));
25753 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, lr));
25754 }
25755
25756 /* Prologue for TOC. */
25757 if (bitmap_bit_p (components, 2))
25758 {
25759 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
25760 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25761 emit_insn (gen_frame_store (reg, sp_reg, RS6000_TOC_SAVE_SLOT));
25762 }
25763
25764 /* Prologue for the GPRs. */
25765 int offset = info->gp_save_offset;
25766 if (info->push_p)
25767 offset += info->total_size;
25768
25769 for (int i = info->first_gp_reg_save; i < 32; i++)
25770 {
25771 if (bitmap_bit_p (components, i))
25772 {
25773 rtx reg = gen_rtx_REG (reg_mode, i);
25774 rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
25775 RTX_FRAME_RELATED_P (insn) = 1;
25776 rtx set = copy_rtx (single_set (insn));
25777 add_reg_note (insn, REG_CFA_OFFSET, set);
25778 }
25779
25780 offset += reg_size;
25781 }
25782
25783 /* Prologue for the FPRs. */
25784 offset = info->fp_save_offset;
25785 if (info->push_p)
25786 offset += info->total_size;
25787
25788 for (int i = info->first_fp_reg_save; i < 64; i++)
25789 {
25790 if (bitmap_bit_p (components, i))
25791 {
25792 rtx reg = gen_rtx_REG (fp_reg_mode, i);
25793 rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
25794 RTX_FRAME_RELATED_P (insn) = 1;
25795 rtx set = copy_rtx (single_set (insn));
25796 add_reg_note (insn, REG_CFA_OFFSET, set);
25797 }
25798
25799 offset += fp_reg_size;
25800 }
25801 }
25802
25803 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
25804 static void
25805 rs6000_emit_epilogue_components (sbitmap components)
25806 {
25807 rs6000_stack_t *info = rs6000_stack_info ();
25808 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
25809 ? HARD_FRAME_POINTER_REGNUM
25810 : STACK_POINTER_REGNUM);
25811
25812 machine_mode reg_mode = Pmode;
25813 int reg_size = TARGET_32BIT ? 4 : 8;
25814
25815 machine_mode fp_reg_mode = TARGET_HARD_FLOAT ? DFmode : SFmode;
25816 int fp_reg_size = 8;
25817
25818 /* Epilogue for the FPRs. */
25819 int offset = info->fp_save_offset;
25820 if (info->push_p)
25821 offset += info->total_size;
25822
25823 for (int i = info->first_fp_reg_save; i < 64; i++)
25824 {
25825 if (bitmap_bit_p (components, i))
25826 {
25827 rtx reg = gen_rtx_REG (fp_reg_mode, i);
25828 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
25829 RTX_FRAME_RELATED_P (insn) = 1;
25830 add_reg_note (insn, REG_CFA_RESTORE, reg);
25831 }
25832
25833 offset += fp_reg_size;
25834 }
25835
25836 /* Epilogue for the GPRs. */
25837 offset = info->gp_save_offset;
25838 if (info->push_p)
25839 offset += info->total_size;
25840
25841 for (int i = info->first_gp_reg_save; i < 32; i++)
25842 {
25843 if (bitmap_bit_p (components, i))
25844 {
25845 rtx reg = gen_rtx_REG (reg_mode, i);
25846 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
25847 RTX_FRAME_RELATED_P (insn) = 1;
25848 add_reg_note (insn, REG_CFA_RESTORE, reg);
25849 }
25850
25851 offset += reg_size;
25852 }
25853
25854 /* Epilogue for LR. */
25855 if (bitmap_bit_p (components, 0))
25856 {
25857 int offset = info->lr_save_offset;
25858 if (info->push_p)
25859 offset += info->total_size;
25860
25861 rtx reg = gen_rtx_REG (reg_mode, 0);
25862 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
25863
25864 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
25865 insn = emit_move_insn (lr, reg);
25866 RTX_FRAME_RELATED_P (insn) = 1;
25867 add_reg_note (insn, REG_CFA_RESTORE, lr);
25868 }
25869 }
25870
25871 /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
25872 static void
25873 rs6000_set_handled_components (sbitmap components)
25874 {
25875 rs6000_stack_t *info = rs6000_stack_info ();
25876
25877 for (int i = info->first_gp_reg_save; i < 32; i++)
25878 if (bitmap_bit_p (components, i))
25879 cfun->machine->gpr_is_wrapped_separately[i] = true;
25880
25881 for (int i = info->first_fp_reg_save; i < 64; i++)
25882 if (bitmap_bit_p (components, i))
25883 cfun->machine->fpr_is_wrapped_separately[i - 32] = true;
25884
25885 if (bitmap_bit_p (components, 0))
25886 cfun->machine->lr_is_wrapped_separately = true;
25887
25888 if (bitmap_bit_p (components, 2))
25889 cfun->machine->toc_is_wrapped_separately = true;
25890 }
25891
25892 /* VRSAVE is a bit vector representing which AltiVec registers
25893 are used. The OS uses this to determine which vector
25894 registers to save on a context switch. We need to save
25895 VRSAVE on the stack frame, add whatever AltiVec registers we
25896 used in this function, and do the corresponding magic in the
25897 epilogue. */
25898 static void
25899 emit_vrsave_prologue (rs6000_stack_t *info, int save_regno,
25900 HOST_WIDE_INT frame_off, rtx frame_reg_rtx)
25901 {
25902 /* Get VRSAVE into a GPR. */
25903 rtx reg = gen_rtx_REG (SImode, save_regno);
25904 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
25905 if (TARGET_MACHO)
25906 emit_insn (gen_get_vrsave_internal (reg));
25907 else
25908 emit_insn (gen_rtx_SET (reg, vrsave));
25909
25910 /* Save VRSAVE. */
25911 int offset = info->vrsave_save_offset + frame_off;
25912 emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
25913
25914 /* Include the registers in the mask. */
25915 emit_insn (gen_iorsi3 (reg, reg, GEN_INT (info->vrsave_mask)));
25916
25917 emit_insn (generate_set_vrsave (reg, info, 0));
25918 }
25919
25920 /* Set up the arg pointer (r12) for -fsplit-stack code. If __morestack was
25921 called, it left the arg pointer to the old stack in r29. Otherwise, the
25922 arg pointer is the top of the current frame. */
25923 static void
25924 emit_split_stack_prologue (rs6000_stack_t *info, rtx_insn *sp_adjust,
25925 HOST_WIDE_INT frame_off, rtx frame_reg_rtx)
25926 {
25927 cfun->machine->split_stack_argp_used = true;
25928
25929 if (sp_adjust)
25930 {
25931 rtx r12 = gen_rtx_REG (Pmode, 12);
25932 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25933 rtx set_r12 = gen_rtx_SET (r12, sp_reg_rtx);
25934 emit_insn_before (set_r12, sp_adjust);
25935 }
25936 else if (frame_off != 0 || REGNO (frame_reg_rtx) != 12)
25937 {
25938 rtx r12 = gen_rtx_REG (Pmode, 12);
25939 if (frame_off == 0)
25940 emit_move_insn (r12, frame_reg_rtx);
25941 else
25942 emit_insn (gen_add3_insn (r12, frame_reg_rtx, GEN_INT (frame_off)));
25943 }
25944
25945 if (info->push_p)
25946 {
25947 rtx r12 = gen_rtx_REG (Pmode, 12);
25948 rtx r29 = gen_rtx_REG (Pmode, 29);
25949 rtx cr7 = gen_rtx_REG (CCUNSmode, CR7_REGNO);
25950 rtx not_more = gen_label_rtx ();
25951 rtx jump;
25952
25953 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
25954 gen_rtx_GEU (VOIDmode, cr7, const0_rtx),
25955 gen_rtx_LABEL_REF (VOIDmode, not_more),
25956 pc_rtx);
25957 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
25958 JUMP_LABEL (jump) = not_more;
25959 LABEL_NUSES (not_more) += 1;
25960 emit_move_insn (r12, r29);
25961 emit_label (not_more);
25962 }
25963 }
25964
25965 /* Emit function prologue as insns. */
25966
25967 void
25968 rs6000_emit_prologue (void)
25969 {
25970 rs6000_stack_t *info = rs6000_stack_info ();
25971 machine_mode reg_mode = Pmode;
25972 int reg_size = TARGET_32BIT ? 4 : 8;
25973 machine_mode fp_reg_mode = TARGET_HARD_FLOAT ? DFmode : SFmode;
25974 int fp_reg_size = 8;
25975 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25976 rtx frame_reg_rtx = sp_reg_rtx;
25977 unsigned int cr_save_regno;
25978 rtx cr_save_rtx = NULL_RTX;
25979 rtx_insn *insn;
25980 int strategy;
25981 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
25982 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
25983 && call_used_regs[STATIC_CHAIN_REGNUM]);
25984 int using_split_stack = (flag_split_stack
25985 && (lookup_attribute ("no_split_stack",
25986 DECL_ATTRIBUTES (cfun->decl))
25987 == NULL));
25988
25989 /* Offset to top of frame for frame_reg and sp respectively. */
25990 HOST_WIDE_INT frame_off = 0;
25991 HOST_WIDE_INT sp_off = 0;
25992 /* sp_adjust is the stack adjusting instruction, tracked so that the
25993 insn setting up the split-stack arg pointer can be emitted just
25994 prior to it, when r12 is not used here for other purposes. */
25995 rtx_insn *sp_adjust = 0;
25996
25997 #if CHECKING_P
25998 /* Track and check usage of r0, r11, r12. */
25999 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
26000 #define START_USE(R) do \
26001 { \
26002 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
26003 reg_inuse |= 1 << (R); \
26004 } while (0)
26005 #define END_USE(R) do \
26006 { \
26007 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
26008 reg_inuse &= ~(1 << (R)); \
26009 } while (0)
26010 #define NOT_INUSE(R) do \
26011 { \
26012 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
26013 } while (0)
26014 #else
26015 #define START_USE(R) do {} while (0)
26016 #define END_USE(R) do {} while (0)
26017 #define NOT_INUSE(R) do {} while (0)
26018 #endif
26019
26020 if (DEFAULT_ABI == ABI_ELFv2
26021 && !TARGET_SINGLE_PIC_BASE)
26022 {
26023 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
26024
26025 /* With -mminimal-toc we may generate an extra use of r2 below. */
26026 if (TARGET_TOC && TARGET_MINIMAL_TOC
26027 && !constant_pool_empty_p ())
26028 cfun->machine->r2_setup_needed = true;
26029 }
26030
26031
26032 if (flag_stack_usage_info)
26033 current_function_static_stack_size = info->total_size;
26034
26035 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
26036 {
26037 HOST_WIDE_INT size = info->total_size;
26038
26039 if (crtl->is_leaf && !cfun->calls_alloca)
26040 {
26041 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
26042 rs6000_emit_probe_stack_range (get_stack_check_protect (),
26043 size - get_stack_check_protect ());
26044 }
26045 else if (size > 0)
26046 rs6000_emit_probe_stack_range (get_stack_check_protect (), size);
26047 }
26048
26049 if (TARGET_FIX_AND_CONTINUE)
26050 {
26051 /* gdb on darwin arranges to forward a function from the old
26052 address by modifying the first 5 instructions of the function
26053 to branch to the overriding function. This is necessary to
26054 permit function pointers that point to the old function to
26055 actually forward to the new function. */
26056 emit_insn (gen_nop ());
26057 emit_insn (gen_nop ());
26058 emit_insn (gen_nop ());
26059 emit_insn (gen_nop ());
26060 emit_insn (gen_nop ());
26061 }
26062
26063 /* Handle world saves specially here. */
26064 if (WORLD_SAVE_P (info))
26065 {
26066 int i, j, sz;
26067 rtx treg;
26068 rtvec p;
26069 rtx reg0;
26070
26071 /* save_world expects lr in r0. */
26072 reg0 = gen_rtx_REG (Pmode, 0);
26073 if (info->lr_save_p)
26074 {
26075 insn = emit_move_insn (reg0,
26076 gen_rtx_REG (Pmode, LR_REGNO));
26077 RTX_FRAME_RELATED_P (insn) = 1;
26078 }
26079
26080 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
26081 assumptions about the offsets of various bits of the stack
26082 frame. */
26083 gcc_assert (info->gp_save_offset == -220
26084 && info->fp_save_offset == -144
26085 && info->lr_save_offset == 8
26086 && info->cr_save_offset == 4
26087 && info->push_p
26088 && info->lr_save_p
26089 && (!crtl->calls_eh_return
26090 || info->ehrd_offset == -432)
26091 && info->vrsave_save_offset == -224
26092 && info->altivec_save_offset == -416);
26093
26094 treg = gen_rtx_REG (SImode, 11);
26095 emit_move_insn (treg, GEN_INT (-info->total_size));
26096
26097 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
26098 in R11. It also clobbers R12, so beware! */
26099
26100 /* Preserve CR2 for save_world prologues */
26101 sz = 5;
26102 sz += 32 - info->first_gp_reg_save;
26103 sz += 64 - info->first_fp_reg_save;
26104 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
26105 p = rtvec_alloc (sz);
26106 j = 0;
26107 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
26108 gen_rtx_REG (SImode,
26109 LR_REGNO));
26110 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
26111 gen_rtx_SYMBOL_REF (Pmode,
26112 "*save_world"));
26113 /* We do floats first so that the instruction pattern matches
26114 properly. */
26115 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
26116 RTVEC_ELT (p, j++)
26117 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT ? DFmode : SFmode,
26118 info->first_fp_reg_save + i),
26119 frame_reg_rtx,
26120 info->fp_save_offset + frame_off + 8 * i);
26121 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
26122 RTVEC_ELT (p, j++)
26123 = gen_frame_store (gen_rtx_REG (V4SImode,
26124 info->first_altivec_reg_save + i),
26125 frame_reg_rtx,
26126 info->altivec_save_offset + frame_off + 16 * i);
26127 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
26128 RTVEC_ELT (p, j++)
26129 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
26130 frame_reg_rtx,
26131 info->gp_save_offset + frame_off + reg_size * i);
26132
26133 /* CR register traditionally saved as CR2. */
26134 RTVEC_ELT (p, j++)
26135 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
26136 frame_reg_rtx, info->cr_save_offset + frame_off);
26137 /* Explain about use of R0. */
26138 if (info->lr_save_p)
26139 RTVEC_ELT (p, j++)
26140 = gen_frame_store (reg0,
26141 frame_reg_rtx, info->lr_save_offset + frame_off);
26142 /* Explain what happens to the stack pointer. */
26143 {
26144 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
26145 RTVEC_ELT (p, j++) = gen_rtx_SET (sp_reg_rtx, newval);
26146 }
26147
26148 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
26149 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
26150 treg, GEN_INT (-info->total_size));
26151 sp_off = frame_off = info->total_size;
26152 }
26153
26154 strategy = info->savres_strategy;
26155
26156 /* For V.4, update stack before we do any saving and set back pointer. */
26157 if (! WORLD_SAVE_P (info)
26158 && info->push_p
26159 && (DEFAULT_ABI == ABI_V4
26160 || crtl->calls_eh_return))
26161 {
26162 bool need_r11 = (!(strategy & SAVE_INLINE_FPRS)
26163 || !(strategy & SAVE_INLINE_GPRS)
26164 || !(strategy & SAVE_INLINE_VRS));
26165 int ptr_regno = -1;
26166 rtx ptr_reg = NULL_RTX;
26167 int ptr_off = 0;
26168
26169 if (info->total_size < 32767)
26170 frame_off = info->total_size;
26171 else if (need_r11)
26172 ptr_regno = 11;
26173 else if (info->cr_save_p
26174 || info->lr_save_p
26175 || info->first_fp_reg_save < 64
26176 || info->first_gp_reg_save < 32
26177 || info->altivec_size != 0
26178 || info->vrsave_size != 0
26179 || crtl->calls_eh_return)
26180 ptr_regno = 12;
26181 else
26182 {
26183 /* The prologue won't be saving any regs so there is no need
26184 to set up a frame register to access any frame save area.
26185 We also won't be using frame_off anywhere below, but set
26186 the correct value anyway to protect against future
26187 changes to this function. */
26188 frame_off = info->total_size;
26189 }
26190 if (ptr_regno != -1)
26191 {
26192 /* Set up the frame offset to that needed by the first
26193 out-of-line save function. */
26194 START_USE (ptr_regno);
26195 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
26196 frame_reg_rtx = ptr_reg;
26197 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
26198 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
26199 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
26200 ptr_off = info->gp_save_offset + info->gp_size;
26201 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
26202 ptr_off = info->altivec_save_offset + info->altivec_size;
26203 frame_off = -ptr_off;
26204 }
26205 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
26206 ptr_reg, ptr_off);
26207 if (REGNO (frame_reg_rtx) == 12)
26208 sp_adjust = 0;
26209 sp_off = info->total_size;
26210 if (frame_reg_rtx != sp_reg_rtx)
26211 rs6000_emit_stack_tie (frame_reg_rtx, false);
26212 }
26213
26214 /* If we use the link register, get it into r0. */
26215 if (!WORLD_SAVE_P (info) && info->lr_save_p
26216 && !cfun->machine->lr_is_wrapped_separately)
26217 {
26218 rtx addr, reg, mem;
26219
26220 reg = gen_rtx_REG (Pmode, 0);
26221 START_USE (0);
26222 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
26223 RTX_FRAME_RELATED_P (insn) = 1;
26224
26225 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
26226 | SAVE_NOINLINE_FPRS_SAVES_LR)))
26227 {
26228 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
26229 GEN_INT (info->lr_save_offset + frame_off));
26230 mem = gen_rtx_MEM (Pmode, addr);
26231 /* This should not be of rs6000_sr_alias_set, because of
26232 __builtin_return_address. */
26233
26234 insn = emit_move_insn (mem, reg);
26235 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
26236 NULL_RTX, NULL_RTX);
26237 END_USE (0);
26238 }
26239 }
26240
26241 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
26242 r12 will be needed by out-of-line gpr restore. */
26243 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26244 && !(strategy & (SAVE_INLINE_GPRS
26245 | SAVE_NOINLINE_GPRS_SAVES_LR))
26246 ? 11 : 12);
26247 if (!WORLD_SAVE_P (info)
26248 && info->cr_save_p
26249 && REGNO (frame_reg_rtx) != cr_save_regno
26250 && !(using_static_chain_p && cr_save_regno == 11)
26251 && !(using_split_stack && cr_save_regno == 12 && sp_adjust))
26252 {
26253 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
26254 START_USE (cr_save_regno);
26255 rs6000_emit_prologue_move_from_cr (cr_save_rtx);
26256 }
26257
26258 /* Do any required saving of fpr's. If only one or two to save, do
26259 it ourselves. Otherwise, call function. */
26260 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
26261 {
26262 int offset = info->fp_save_offset + frame_off;
26263 for (int i = info->first_fp_reg_save; i < 64; i++)
26264 {
26265 if (save_reg_p (i)
26266 && !cfun->machine->fpr_is_wrapped_separately[i - 32])
26267 emit_frame_save (frame_reg_rtx, fp_reg_mode, i, offset,
26268 sp_off - frame_off);
26269
26270 offset += fp_reg_size;
26271 }
26272 }
26273 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
26274 {
26275 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
26276 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
26277 unsigned ptr_regno = ptr_regno_for_savres (sel);
26278 rtx ptr_reg = frame_reg_rtx;
26279
26280 if (REGNO (frame_reg_rtx) == ptr_regno)
26281 gcc_checking_assert (frame_off == 0);
26282 else
26283 {
26284 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
26285 NOT_INUSE (ptr_regno);
26286 emit_insn (gen_add3_insn (ptr_reg,
26287 frame_reg_rtx, GEN_INT (frame_off)));
26288 }
26289 insn = rs6000_emit_savres_rtx (info, ptr_reg,
26290 info->fp_save_offset,
26291 info->lr_save_offset,
26292 DFmode, sel);
26293 rs6000_frame_related (insn, ptr_reg, sp_off,
26294 NULL_RTX, NULL_RTX);
26295 if (lr)
26296 END_USE (0);
26297 }
26298
26299 /* Save GPRs. This is done as a PARALLEL if we are using
26300 the store-multiple instructions. */
26301 if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
26302 {
26303 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
26304 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
26305 unsigned ptr_regno = ptr_regno_for_savres (sel);
26306 rtx ptr_reg = frame_reg_rtx;
26307 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
26308 int end_save = info->gp_save_offset + info->gp_size;
26309 int ptr_off;
26310
26311 if (ptr_regno == 12)
26312 sp_adjust = 0;
26313 if (!ptr_set_up)
26314 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
26315
26316 /* Need to adjust r11 (r12) if we saved any FPRs. */
26317 if (end_save + frame_off != 0)
26318 {
26319 rtx offset = GEN_INT (end_save + frame_off);
26320
26321 if (ptr_set_up)
26322 frame_off = -end_save;
26323 else
26324 NOT_INUSE (ptr_regno);
26325 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
26326 }
26327 else if (!ptr_set_up)
26328 {
26329 NOT_INUSE (ptr_regno);
26330 emit_move_insn (ptr_reg, frame_reg_rtx);
26331 }
26332 ptr_off = -end_save;
26333 insn = rs6000_emit_savres_rtx (info, ptr_reg,
26334 info->gp_save_offset + ptr_off,
26335 info->lr_save_offset + ptr_off,
26336 reg_mode, sel);
26337 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
26338 NULL_RTX, NULL_RTX);
26339 if (lr)
26340 END_USE (0);
26341 }
26342 else if (!WORLD_SAVE_P (info) && (strategy & SAVE_MULTIPLE))
26343 {
26344 rtvec p;
26345 int i;
26346 p = rtvec_alloc (32 - info->first_gp_reg_save);
26347 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
26348 RTVEC_ELT (p, i)
26349 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
26350 frame_reg_rtx,
26351 info->gp_save_offset + frame_off + reg_size * i);
26352 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
26353 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
26354 NULL_RTX, NULL_RTX);
26355 }
26356 else if (!WORLD_SAVE_P (info))
26357 {
26358 int offset = info->gp_save_offset + frame_off;
26359 for (int i = info->first_gp_reg_save; i < 32; i++)
26360 {
26361 if (save_reg_p (i)
26362 && !cfun->machine->gpr_is_wrapped_separately[i])
26363 emit_frame_save (frame_reg_rtx, reg_mode, i, offset,
26364 sp_off - frame_off);
26365
26366 offset += reg_size;
26367 }
26368 }
26369
26370 if (crtl->calls_eh_return)
26371 {
26372 unsigned int i;
26373 rtvec p;
26374
26375 for (i = 0; ; ++i)
26376 {
26377 unsigned int regno = EH_RETURN_DATA_REGNO (i);
26378 if (regno == INVALID_REGNUM)
26379 break;
26380 }
26381
26382 p = rtvec_alloc (i);
26383
26384 for (i = 0; ; ++i)
26385 {
26386 unsigned int regno = EH_RETURN_DATA_REGNO (i);
26387 if (regno == INVALID_REGNUM)
26388 break;
26389
26390 rtx set
26391 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
26392 sp_reg_rtx,
26393 info->ehrd_offset + sp_off + reg_size * (int) i);
26394 RTVEC_ELT (p, i) = set;
26395 RTX_FRAME_RELATED_P (set) = 1;
26396 }
26397
26398 insn = emit_insn (gen_blockage ());
26399 RTX_FRAME_RELATED_P (insn) = 1;
26400 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
26401 }
26402
26403 /* In AIX ABI we need to make sure r2 is really saved. */
26404 if (TARGET_AIX && crtl->calls_eh_return)
26405 {
26406 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
26407 rtx join_insn, note;
26408 rtx_insn *save_insn;
26409 long toc_restore_insn;
26410
26411 tmp_reg = gen_rtx_REG (Pmode, 11);
26412 tmp_reg_si = gen_rtx_REG (SImode, 11);
26413 if (using_static_chain_p)
26414 {
26415 START_USE (0);
26416 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
26417 }
26418 else
26419 START_USE (11);
26420 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
26421 /* Peek at instruction to which this function returns. If it's
26422 restoring r2, then we know we've already saved r2. We can't
26423 unconditionally save r2 because the value we have will already
26424 be updated if we arrived at this function via a plt call or
26425 toc adjusting stub. */
26426 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
26427 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
26428 + RS6000_TOC_SAVE_SLOT);
26429 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
26430 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
26431 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
26432 validate_condition_mode (EQ, CCUNSmode);
26433 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
26434 emit_insn (gen_rtx_SET (compare_result,
26435 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
26436 toc_save_done = gen_label_rtx ();
26437 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
26438 gen_rtx_EQ (VOIDmode, compare_result,
26439 const0_rtx),
26440 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
26441 pc_rtx);
26442 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
26443 JUMP_LABEL (jump) = toc_save_done;
26444 LABEL_NUSES (toc_save_done) += 1;
26445
26446 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
26447 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
26448 sp_off - frame_off);
26449
26450 emit_label (toc_save_done);
26451
26452 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
26453 have a CFG that has different saves along different paths.
26454 Move the note to a dummy blockage insn, which describes that
26455 R2 is unconditionally saved after the label. */
26456 /* ??? An alternate representation might be a special insn pattern
26457 containing both the branch and the store. That might let the
26458 code that minimizes the number of DW_CFA_advance opcodes better
26459 freedom in placing the annotations. */
26460 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
26461 if (note)
26462 remove_note (save_insn, note);
26463 else
26464 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
26465 copy_rtx (PATTERN (save_insn)), NULL_RTX);
26466 RTX_FRAME_RELATED_P (save_insn) = 0;
26467
26468 join_insn = emit_insn (gen_blockage ());
26469 REG_NOTES (join_insn) = note;
26470 RTX_FRAME_RELATED_P (join_insn) = 1;
26471
26472 if (using_static_chain_p)
26473 {
26474 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
26475 END_USE (0);
26476 }
26477 else
26478 END_USE (11);
26479 }
26480
26481 /* Save CR if we use any that must be preserved. */
26482 if (!WORLD_SAVE_P (info) && info->cr_save_p)
26483 {
26484 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
26485 GEN_INT (info->cr_save_offset + frame_off));
26486 rtx mem = gen_frame_mem (SImode, addr);
26487
26488 /* If we didn't copy cr before, do so now using r0. */
26489 if (cr_save_rtx == NULL_RTX)
26490 {
26491 START_USE (0);
26492 cr_save_rtx = gen_rtx_REG (SImode, 0);
26493 rs6000_emit_prologue_move_from_cr (cr_save_rtx);
26494 }
26495
26496 /* Saving CR requires a two-instruction sequence: one instruction
26497 to move the CR to a general-purpose register, and a second
26498 instruction that stores the GPR to memory.
26499
26500 We do not emit any DWARF CFI records for the first of these,
26501 because we cannot properly represent the fact that CR is saved in
26502 a register. One reason is that we cannot express that multiple
26503 CR fields are saved; another reason is that on 64-bit, the size
26504 of the CR register in DWARF (4 bytes) differs from the size of
26505 a general-purpose register.
26506
26507 This means if any intervening instruction were to clobber one of
26508 the call-saved CR fields, we'd have incorrect CFI. To prevent
26509 this from happening, we mark the store to memory as a use of
26510 those CR fields, which prevents any such instruction from being
26511 scheduled in between the two instructions. */
26512 rtx crsave_v[9];
26513 int n_crsave = 0;
26514 int i;
26515
26516 crsave_v[n_crsave++] = gen_rtx_SET (mem, cr_save_rtx);
26517 for (i = 0; i < 8; i++)
26518 if (save_reg_p (CR0_REGNO + i))
26519 crsave_v[n_crsave++]
26520 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
26521
26522 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
26523 gen_rtvec_v (n_crsave, crsave_v)));
26524 END_USE (REGNO (cr_save_rtx));
26525
26526 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
26527 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
26528 so we need to construct a frame expression manually. */
26529 RTX_FRAME_RELATED_P (insn) = 1;
26530
26531 /* Update address to be stack-pointer relative, like
26532 rs6000_frame_related would do. */
26533 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
26534 GEN_INT (info->cr_save_offset + sp_off));
26535 mem = gen_frame_mem (SImode, addr);
26536
26537 if (DEFAULT_ABI == ABI_ELFv2)
26538 {
26539 /* In the ELFv2 ABI we generate separate CFI records for each
26540 CR field that was actually saved. They all point to the
26541 same 32-bit stack slot. */
26542 rtx crframe[8];
26543 int n_crframe = 0;
26544
26545 for (i = 0; i < 8; i++)
26546 if (save_reg_p (CR0_REGNO + i))
26547 {
26548 crframe[n_crframe]
26549 = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR0_REGNO + i));
26550
26551 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
26552 n_crframe++;
26553 }
26554
26555 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
26556 gen_rtx_PARALLEL (VOIDmode,
26557 gen_rtvec_v (n_crframe, crframe)));
26558 }
26559 else
26560 {
26561 /* In other ABIs, by convention, we use a single CR regnum to
26562 represent the fact that all call-saved CR fields are saved.
26563 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
26564 rtx set = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR2_REGNO));
26565 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
26566 }
26567 }
26568
26569 /* In the ELFv2 ABI we need to save all call-saved CR fields into
26570 *separate* slots if the routine calls __builtin_eh_return, so
26571 that they can be independently restored by the unwinder. */
26572 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
26573 {
26574 int i, cr_off = info->ehcr_offset;
26575 rtx crsave;
26576
26577 /* ??? We might get better performance by using multiple mfocrf
26578 instructions. */
26579 crsave = gen_rtx_REG (SImode, 0);
26580 emit_insn (gen_prologue_movesi_from_cr (crsave));
26581
26582 for (i = 0; i < 8; i++)
26583 if (!call_used_regs[CR0_REGNO + i])
26584 {
26585 rtvec p = rtvec_alloc (2);
26586 RTVEC_ELT (p, 0)
26587 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
26588 RTVEC_ELT (p, 1)
26589 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
26590
26591 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
26592
26593 RTX_FRAME_RELATED_P (insn) = 1;
26594 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
26595 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
26596 sp_reg_rtx, cr_off + sp_off));
26597
26598 cr_off += reg_size;
26599 }
26600 }
26601
26602 /* If we are emitting stack probes, but allocate no stack, then
26603 just note that in the dump file. */
26604 if (flag_stack_clash_protection
26605 && dump_file
26606 && !info->push_p)
26607 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
26608
26609 /* Update stack and set back pointer unless this is V.4,
26610 for which it was done previously. */
26611 if (!WORLD_SAVE_P (info) && info->push_p
26612 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
26613 {
26614 rtx ptr_reg = NULL;
26615 int ptr_off = 0;
26616
26617 /* If saving altivec regs we need to be able to address all save
26618 locations using a 16-bit offset. */
26619 if ((strategy & SAVE_INLINE_VRS) == 0
26620 || (info->altivec_size != 0
26621 && (info->altivec_save_offset + info->altivec_size - 16
26622 + info->total_size - frame_off) > 32767)
26623 || (info->vrsave_size != 0
26624 && (info->vrsave_save_offset
26625 + info->total_size - frame_off) > 32767))
26626 {
26627 int sel = SAVRES_SAVE | SAVRES_VR;
26628 unsigned ptr_regno = ptr_regno_for_savres (sel);
26629
26630 if (using_static_chain_p
26631 && ptr_regno == STATIC_CHAIN_REGNUM)
26632 ptr_regno = 12;
26633 if (REGNO (frame_reg_rtx) != ptr_regno)
26634 START_USE (ptr_regno);
26635 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
26636 frame_reg_rtx = ptr_reg;
26637 ptr_off = info->altivec_save_offset + info->altivec_size;
26638 frame_off = -ptr_off;
26639 }
26640 else if (REGNO (frame_reg_rtx) == 1)
26641 frame_off = info->total_size;
26642 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
26643 ptr_reg, ptr_off);
26644 if (REGNO (frame_reg_rtx) == 12)
26645 sp_adjust = 0;
26646 sp_off = info->total_size;
26647 if (frame_reg_rtx != sp_reg_rtx)
26648 rs6000_emit_stack_tie (frame_reg_rtx, false);
26649 }
26650
26651 /* Set frame pointer, if needed. */
26652 if (frame_pointer_needed)
26653 {
26654 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
26655 sp_reg_rtx);
26656 RTX_FRAME_RELATED_P (insn) = 1;
26657 }
26658
26659 /* Save AltiVec registers if needed. Save here because the red zone does
26660 not always include AltiVec registers. */
26661 if (!WORLD_SAVE_P (info)
26662 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
26663 {
26664 int end_save = info->altivec_save_offset + info->altivec_size;
26665 int ptr_off;
26666 /* Oddly, the vector save/restore functions point r0 at the end
26667 of the save area, then use r11 or r12 to load offsets for
26668 [reg+reg] addressing. */
26669 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
26670 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
26671 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
26672
26673 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
26674 NOT_INUSE (0);
26675 if (scratch_regno == 12)
26676 sp_adjust = 0;
26677 if (end_save + frame_off != 0)
26678 {
26679 rtx offset = GEN_INT (end_save + frame_off);
26680
26681 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
26682 }
26683 else
26684 emit_move_insn (ptr_reg, frame_reg_rtx);
26685
26686 ptr_off = -end_save;
26687 insn = rs6000_emit_savres_rtx (info, scratch_reg,
26688 info->altivec_save_offset + ptr_off,
26689 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
26690 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
26691 NULL_RTX, NULL_RTX);
26692 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
26693 {
26694 /* The oddity mentioned above clobbered our frame reg. */
26695 emit_move_insn (frame_reg_rtx, ptr_reg);
26696 frame_off = ptr_off;
26697 }
26698 }
26699 else if (!WORLD_SAVE_P (info)
26700 && info->altivec_size != 0)
26701 {
26702 int i;
26703
26704 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
26705 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
26706 {
26707 rtx areg, savereg, mem;
26708 HOST_WIDE_INT offset;
26709
26710 offset = (info->altivec_save_offset + frame_off
26711 + 16 * (i - info->first_altivec_reg_save));
26712
26713 savereg = gen_rtx_REG (V4SImode, i);
26714
26715 if (TARGET_P9_VECTOR && quad_address_offset_p (offset))
26716 {
26717 mem = gen_frame_mem (V4SImode,
26718 gen_rtx_PLUS (Pmode, frame_reg_rtx,
26719 GEN_INT (offset)));
26720 insn = emit_insn (gen_rtx_SET (mem, savereg));
26721 areg = NULL_RTX;
26722 }
26723 else
26724 {
26725 NOT_INUSE (0);
26726 areg = gen_rtx_REG (Pmode, 0);
26727 emit_move_insn (areg, GEN_INT (offset));
26728
26729 /* AltiVec addressing mode is [reg+reg]. */
26730 mem = gen_frame_mem (V4SImode,
26731 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
26732
26733 /* Rather than emitting a generic move, force use of the stvx
26734 instruction, which we always want on ISA 2.07 (power8) systems.
26735 In particular we don't want xxpermdi/stxvd2x for little
26736 endian. */
26737 insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg));
26738 }
26739
26740 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
26741 areg, GEN_INT (offset));
26742 }
26743 }
26744
26745 /* VRSAVE is a bit vector representing which AltiVec registers
26746 are used. The OS uses this to determine which vector
26747 registers to save on a context switch. We need to save
26748 VRSAVE on the stack frame, add whatever AltiVec registers we
26749 used in this function, and do the corresponding magic in the
26750 epilogue. */
26751
26752 if (!WORLD_SAVE_P (info) && info->vrsave_size != 0)
26753 {
26754 /* Get VRSAVE into a GPR. Note that ABI_V4 and ABI_DARWIN might
26755 be using r12 as frame_reg_rtx and r11 as the static chain
26756 pointer for nested functions. */
26757 int save_regno = 12;
26758 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26759 && !using_static_chain_p)
26760 save_regno = 11;
26761 else if (using_split_stack || REGNO (frame_reg_rtx) == 12)
26762 {
26763 save_regno = 11;
26764 if (using_static_chain_p)
26765 save_regno = 0;
26766 }
26767 NOT_INUSE (save_regno);
26768
26769 emit_vrsave_prologue (info, save_regno, frame_off, frame_reg_rtx);
26770 }
26771
26772 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
26773 if (!TARGET_SINGLE_PIC_BASE
26774 && ((TARGET_TOC && TARGET_MINIMAL_TOC
26775 && !constant_pool_empty_p ())
26776 || (DEFAULT_ABI == ABI_V4
26777 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
26778 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
26779 {
26780 /* If emit_load_toc_table will use the link register, we need to save
26781 it. We use R12 for this purpose because emit_load_toc_table
26782 can use register 0. This allows us to use a plain 'blr' to return
26783 from the procedure more often. */
26784 int save_LR_around_toc_setup = (TARGET_ELF
26785 && DEFAULT_ABI == ABI_V4
26786 && flag_pic
26787 && ! info->lr_save_p
26788 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
26789 if (save_LR_around_toc_setup)
26790 {
26791 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
26792 rtx tmp = gen_rtx_REG (Pmode, 12);
26793
26794 sp_adjust = 0;
26795 insn = emit_move_insn (tmp, lr);
26796 RTX_FRAME_RELATED_P (insn) = 1;
26797
26798 rs6000_emit_load_toc_table (TRUE);
26799
26800 insn = emit_move_insn (lr, tmp);
26801 add_reg_note (insn, REG_CFA_RESTORE, lr);
26802 RTX_FRAME_RELATED_P (insn) = 1;
26803 }
26804 else
26805 rs6000_emit_load_toc_table (TRUE);
26806 }
26807
26808 #if TARGET_MACHO
26809 if (!TARGET_SINGLE_PIC_BASE
26810 && DEFAULT_ABI == ABI_DARWIN
26811 && flag_pic && crtl->uses_pic_offset_table)
26812 {
26813 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
26814 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
26815
26816 /* Save and restore LR locally around this call (in R0). */
26817 if (!info->lr_save_p)
26818 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
26819
26820 emit_insn (gen_load_macho_picbase (src));
26821
26822 emit_move_insn (gen_rtx_REG (Pmode,
26823 RS6000_PIC_OFFSET_TABLE_REGNUM),
26824 lr);
26825
26826 if (!info->lr_save_p)
26827 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
26828 }
26829 #endif
26830
26831 /* If we need to, save the TOC register after doing the stack setup.
26832 Do not emit eh frame info for this save. The unwinder wants info,
26833 conceptually attached to instructions in this function, about
26834 register values in the caller of this function. This R2 may have
26835 already been changed from the value in the caller.
26836 We don't attempt to write accurate DWARF EH frame info for R2
26837 because code emitted by gcc for a (non-pointer) function call
26838 doesn't save and restore R2. Instead, R2 is managed out-of-line
26839 by a linker generated plt call stub when the function resides in
26840 a shared library. This behavior is costly to describe in DWARF,
26841 both in terms of the size of DWARF info and the time taken in the
26842 unwinder to interpret it. R2 changes, apart from the
26843 calls_eh_return case earlier in this function, are handled by
26844 linux-unwind.h frob_update_context. */
26845 if (rs6000_save_toc_in_prologue_p ()
26846 && !cfun->machine->toc_is_wrapped_separately)
26847 {
26848 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
26849 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
26850 }
26851
26852 /* Set up the arg pointer (r12) for -fsplit-stack code. */
26853 if (using_split_stack && split_stack_arg_pointer_used_p ())
26854 emit_split_stack_prologue (info, sp_adjust, frame_off, frame_reg_rtx);
26855 }
26856
26857 /* Output .extern statements for the save/restore routines we use. */
26858
26859 static void
26860 rs6000_output_savres_externs (FILE *file)
26861 {
26862 rs6000_stack_t *info = rs6000_stack_info ();
26863
26864 if (TARGET_DEBUG_STACK)
26865 debug_stack_info (info);
26866
26867 /* Write .extern for any function we will call to save and restore
26868 fp values. */
26869 if (info->first_fp_reg_save < 64
26870 && !TARGET_MACHO
26871 && !TARGET_ELF)
26872 {
26873 char *name;
26874 int regno = info->first_fp_reg_save - 32;
26875
26876 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
26877 {
26878 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
26879 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
26880 name = rs6000_savres_routine_name (regno, sel);
26881 fprintf (file, "\t.extern %s\n", name);
26882 }
26883 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
26884 {
26885 bool lr = (info->savres_strategy
26886 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
26887 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
26888 name = rs6000_savres_routine_name (regno, sel);
26889 fprintf (file, "\t.extern %s\n", name);
26890 }
26891 }
26892 }
26893
26894 /* Write function prologue. */
26895
26896 static void
26897 rs6000_output_function_prologue (FILE *file)
26898 {
26899 if (!cfun->is_thunk)
26900 rs6000_output_savres_externs (file);
26901
26902 /* ELFv2 ABI r2 setup code and local entry point. This must follow
26903 immediately after the global entry point label. */
26904 if (rs6000_global_entry_point_needed_p ())
26905 {
26906 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
26907
26908 (*targetm.asm_out.internal_label) (file, "LCF", rs6000_pic_labelno);
26909
26910 if (TARGET_CMODEL != CMODEL_LARGE)
26911 {
26912 /* In the small and medium code models, we assume the TOC is less
26913 2 GB away from the text section, so it can be computed via the
26914 following two-instruction sequence. */
26915 char buf[256];
26916
26917 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
26918 fprintf (file, "0:\taddis 2,12,.TOC.-");
26919 assemble_name (file, buf);
26920 fprintf (file, "@ha\n");
26921 fprintf (file, "\taddi 2,2,.TOC.-");
26922 assemble_name (file, buf);
26923 fprintf (file, "@l\n");
26924 }
26925 else
26926 {
26927 /* In the large code model, we allow arbitrary offsets between the
26928 TOC and the text section, so we have to load the offset from
26929 memory. The data field is emitted directly before the global
26930 entry point in rs6000_elf_declare_function_name. */
26931 char buf[256];
26932
26933 #ifdef HAVE_AS_ENTRY_MARKERS
26934 /* If supported by the linker, emit a marker relocation. If the
26935 total code size of the final executable or shared library
26936 happens to fit into 2 GB after all, the linker will replace
26937 this code sequence with the sequence for the small or medium
26938 code model. */
26939 fprintf (file, "\t.reloc .,R_PPC64_ENTRY\n");
26940 #endif
26941 fprintf (file, "\tld 2,");
26942 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
26943 assemble_name (file, buf);
26944 fprintf (file, "-");
26945 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
26946 assemble_name (file, buf);
26947 fprintf (file, "(12)\n");
26948 fprintf (file, "\tadd 2,2,12\n");
26949 }
26950
26951 fputs ("\t.localentry\t", file);
26952 assemble_name (file, name);
26953 fputs (",.-", file);
26954 assemble_name (file, name);
26955 fputs ("\n", file);
26956 }
26957
26958 /* Output -mprofile-kernel code. This needs to be done here instead of
26959 in output_function_profile since it must go after the ELFv2 ABI
26960 local entry point. */
26961 if (TARGET_PROFILE_KERNEL && crtl->profile)
26962 {
26963 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
26964 gcc_assert (!TARGET_32BIT);
26965
26966 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
26967
26968 /* In the ELFv2 ABI we have no compiler stack word. It must be
26969 the resposibility of _mcount to preserve the static chain
26970 register if required. */
26971 if (DEFAULT_ABI != ABI_ELFv2
26972 && cfun->static_chain_decl != NULL)
26973 {
26974 asm_fprintf (file, "\tstd %s,24(%s)\n",
26975 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
26976 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
26977 asm_fprintf (file, "\tld %s,24(%s)\n",
26978 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
26979 }
26980 else
26981 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
26982 }
26983
26984 rs6000_pic_labelno++;
26985 }
26986
26987 /* -mprofile-kernel code calls mcount before the function prolog,
26988 so a profiled leaf function should stay a leaf function. */
26989 static bool
26990 rs6000_keep_leaf_when_profiled ()
26991 {
26992 return TARGET_PROFILE_KERNEL;
26993 }
26994
26995 /* Non-zero if vmx regs are restored before the frame pop, zero if
26996 we restore after the pop when possible. */
26997 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
26998
26999 /* Restoring cr is a two step process: loading a reg from the frame
27000 save, then moving the reg to cr. For ABI_V4 we must let the
27001 unwinder know that the stack location is no longer valid at or
27002 before the stack deallocation, but we can't emit a cfa_restore for
27003 cr at the stack deallocation like we do for other registers.
27004 The trouble is that it is possible for the move to cr to be
27005 scheduled after the stack deallocation. So say exactly where cr
27006 is located on each of the two insns. */
27007
27008 static rtx
27009 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
27010 {
27011 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
27012 rtx reg = gen_rtx_REG (SImode, regno);
27013 rtx_insn *insn = emit_move_insn (reg, mem);
27014
27015 if (!exit_func && DEFAULT_ABI == ABI_V4)
27016 {
27017 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
27018 rtx set = gen_rtx_SET (reg, cr);
27019
27020 add_reg_note (insn, REG_CFA_REGISTER, set);
27021 RTX_FRAME_RELATED_P (insn) = 1;
27022 }
27023 return reg;
27024 }
27025
27026 /* Reload CR from REG. */
27027
27028 static void
27029 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
27030 {
27031 int count = 0;
27032 int i;
27033
27034 if (using_mfcr_multiple)
27035 {
27036 for (i = 0; i < 8; i++)
27037 if (save_reg_p (CR0_REGNO + i))
27038 count++;
27039 gcc_assert (count);
27040 }
27041
27042 if (using_mfcr_multiple && count > 1)
27043 {
27044 rtx_insn *insn;
27045 rtvec p;
27046 int ndx;
27047
27048 p = rtvec_alloc (count);
27049
27050 ndx = 0;
27051 for (i = 0; i < 8; i++)
27052 if (save_reg_p (CR0_REGNO + i))
27053 {
27054 rtvec r = rtvec_alloc (2);
27055 RTVEC_ELT (r, 0) = reg;
27056 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
27057 RTVEC_ELT (p, ndx) =
27058 gen_rtx_SET (gen_rtx_REG (CCmode, CR0_REGNO + i),
27059 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
27060 ndx++;
27061 }
27062 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27063 gcc_assert (ndx == count);
27064
27065 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
27066 CR field separately. */
27067 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
27068 {
27069 for (i = 0; i < 8; i++)
27070 if (save_reg_p (CR0_REGNO + i))
27071 add_reg_note (insn, REG_CFA_RESTORE,
27072 gen_rtx_REG (SImode, CR0_REGNO + i));
27073
27074 RTX_FRAME_RELATED_P (insn) = 1;
27075 }
27076 }
27077 else
27078 for (i = 0; i < 8; i++)
27079 if (save_reg_p (CR0_REGNO + i))
27080 {
27081 rtx insn = emit_insn (gen_movsi_to_cr_one
27082 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
27083
27084 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
27085 CR field separately, attached to the insn that in fact
27086 restores this particular CR field. */
27087 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
27088 {
27089 add_reg_note (insn, REG_CFA_RESTORE,
27090 gen_rtx_REG (SImode, CR0_REGNO + i));
27091
27092 RTX_FRAME_RELATED_P (insn) = 1;
27093 }
27094 }
27095
27096 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
27097 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
27098 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
27099 {
27100 rtx_insn *insn = get_last_insn ();
27101 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
27102
27103 add_reg_note (insn, REG_CFA_RESTORE, cr);
27104 RTX_FRAME_RELATED_P (insn) = 1;
27105 }
27106 }
27107
27108 /* Like cr, the move to lr instruction can be scheduled after the
27109 stack deallocation, but unlike cr, its stack frame save is still
27110 valid. So we only need to emit the cfa_restore on the correct
27111 instruction. */
27112
27113 static void
27114 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
27115 {
27116 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
27117 rtx reg = gen_rtx_REG (Pmode, regno);
27118
27119 emit_move_insn (reg, mem);
27120 }
27121
27122 static void
27123 restore_saved_lr (int regno, bool exit_func)
27124 {
27125 rtx reg = gen_rtx_REG (Pmode, regno);
27126 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27127 rtx_insn *insn = emit_move_insn (lr, reg);
27128
27129 if (!exit_func && flag_shrink_wrap)
27130 {
27131 add_reg_note (insn, REG_CFA_RESTORE, lr);
27132 RTX_FRAME_RELATED_P (insn) = 1;
27133 }
27134 }
27135
27136 static rtx
27137 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
27138 {
27139 if (DEFAULT_ABI == ABI_ELFv2)
27140 {
27141 int i;
27142 for (i = 0; i < 8; i++)
27143 if (save_reg_p (CR0_REGNO + i))
27144 {
27145 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
27146 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
27147 cfa_restores);
27148 }
27149 }
27150 else if (info->cr_save_p)
27151 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
27152 gen_rtx_REG (SImode, CR2_REGNO),
27153 cfa_restores);
27154
27155 if (info->lr_save_p)
27156 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
27157 gen_rtx_REG (Pmode, LR_REGNO),
27158 cfa_restores);
27159 return cfa_restores;
27160 }
27161
27162 /* Return true if OFFSET from stack pointer can be clobbered by signals.
27163 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
27164 below stack pointer not cloberred by signals. */
27165
27166 static inline bool
27167 offset_below_red_zone_p (HOST_WIDE_INT offset)
27168 {
27169 return offset < (DEFAULT_ABI == ABI_V4
27170 ? 0
27171 : TARGET_32BIT ? -220 : -288);
27172 }
27173
27174 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
27175
27176 static void
27177 emit_cfa_restores (rtx cfa_restores)
27178 {
27179 rtx_insn *insn = get_last_insn ();
27180 rtx *loc = &REG_NOTES (insn);
27181
27182 while (*loc)
27183 loc = &XEXP (*loc, 1);
27184 *loc = cfa_restores;
27185 RTX_FRAME_RELATED_P (insn) = 1;
27186 }
27187
27188 /* Emit function epilogue as insns. */
27189
27190 void
27191 rs6000_emit_epilogue (int sibcall)
27192 {
27193 rs6000_stack_t *info;
27194 int restoring_GPRs_inline;
27195 int restoring_FPRs_inline;
27196 int using_load_multiple;
27197 int using_mtcr_multiple;
27198 int use_backchain_to_restore_sp;
27199 int restore_lr;
27200 int strategy;
27201 HOST_WIDE_INT frame_off = 0;
27202 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
27203 rtx frame_reg_rtx = sp_reg_rtx;
27204 rtx cfa_restores = NULL_RTX;
27205 rtx insn;
27206 rtx cr_save_reg = NULL_RTX;
27207 machine_mode reg_mode = Pmode;
27208 int reg_size = TARGET_32BIT ? 4 : 8;
27209 machine_mode fp_reg_mode = TARGET_HARD_FLOAT ? DFmode : SFmode;
27210 int fp_reg_size = 8;
27211 int i;
27212 bool exit_func;
27213 unsigned ptr_regno;
27214
27215 info = rs6000_stack_info ();
27216
27217 strategy = info->savres_strategy;
27218 using_load_multiple = strategy & REST_MULTIPLE;
27219 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
27220 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
27221 using_mtcr_multiple = (rs6000_tune == PROCESSOR_PPC601
27222 || rs6000_tune == PROCESSOR_PPC603
27223 || rs6000_tune == PROCESSOR_PPC750
27224 || optimize_size);
27225 /* Restore via the backchain when we have a large frame, since this
27226 is more efficient than an addis, addi pair. The second condition
27227 here will not trigger at the moment; We don't actually need a
27228 frame pointer for alloca, but the generic parts of the compiler
27229 give us one anyway. */
27230 use_backchain_to_restore_sp = (info->total_size + (info->lr_save_p
27231 ? info->lr_save_offset
27232 : 0) > 32767
27233 || (cfun->calls_alloca
27234 && !frame_pointer_needed));
27235 restore_lr = (info->lr_save_p
27236 && (restoring_FPRs_inline
27237 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
27238 && (restoring_GPRs_inline
27239 || info->first_fp_reg_save < 64)
27240 && !cfun->machine->lr_is_wrapped_separately);
27241
27242
27243 if (WORLD_SAVE_P (info))
27244 {
27245 int i, j;
27246 char rname[30];
27247 const char *alloc_rname;
27248 rtvec p;
27249
27250 /* eh_rest_world_r10 will return to the location saved in the LR
27251 stack slot (which is not likely to be our caller.)
27252 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
27253 rest_world is similar, except any R10 parameter is ignored.
27254 The exception-handling stuff that was here in 2.95 is no
27255 longer necessary. */
27256
27257 p = rtvec_alloc (9
27258 + 32 - info->first_gp_reg_save
27259 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
27260 + 63 + 1 - info->first_fp_reg_save);
27261
27262 strcpy (rname, ((crtl->calls_eh_return) ?
27263 "*eh_rest_world_r10" : "*rest_world"));
27264 alloc_rname = ggc_strdup (rname);
27265
27266 j = 0;
27267 RTVEC_ELT (p, j++) = ret_rtx;
27268 RTVEC_ELT (p, j++)
27269 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
27270 /* The instruction pattern requires a clobber here;
27271 it is shared with the restVEC helper. */
27272 RTVEC_ELT (p, j++)
27273 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
27274
27275 {
27276 /* CR register traditionally saved as CR2. */
27277 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
27278 RTVEC_ELT (p, j++)
27279 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
27280 if (flag_shrink_wrap)
27281 {
27282 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
27283 gen_rtx_REG (Pmode, LR_REGNO),
27284 cfa_restores);
27285 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
27286 }
27287 }
27288
27289 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
27290 {
27291 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
27292 RTVEC_ELT (p, j++)
27293 = gen_frame_load (reg,
27294 frame_reg_rtx, info->gp_save_offset + reg_size * i);
27295 if (flag_shrink_wrap
27296 && save_reg_p (info->first_gp_reg_save + i))
27297 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
27298 }
27299 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
27300 {
27301 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
27302 RTVEC_ELT (p, j++)
27303 = gen_frame_load (reg,
27304 frame_reg_rtx, info->altivec_save_offset + 16 * i);
27305 if (flag_shrink_wrap
27306 && save_reg_p (info->first_altivec_reg_save + i))
27307 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
27308 }
27309 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
27310 {
27311 rtx reg = gen_rtx_REG (TARGET_HARD_FLOAT ? DFmode : SFmode,
27312 info->first_fp_reg_save + i);
27313 RTVEC_ELT (p, j++)
27314 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
27315 if (flag_shrink_wrap
27316 && save_reg_p (info->first_fp_reg_save + i))
27317 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
27318 }
27319 RTVEC_ELT (p, j++)
27320 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
27321 RTVEC_ELT (p, j++)
27322 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
27323 RTVEC_ELT (p, j++)
27324 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
27325 RTVEC_ELT (p, j++)
27326 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
27327 RTVEC_ELT (p, j++)
27328 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
27329 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
27330
27331 if (flag_shrink_wrap)
27332 {
27333 REG_NOTES (insn) = cfa_restores;
27334 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
27335 RTX_FRAME_RELATED_P (insn) = 1;
27336 }
27337 return;
27338 }
27339
27340 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
27341 if (info->push_p)
27342 frame_off = info->total_size;
27343
27344 /* Restore AltiVec registers if we must do so before adjusting the
27345 stack. */
27346 if (info->altivec_size != 0
27347 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
27348 || (DEFAULT_ABI != ABI_V4
27349 && offset_below_red_zone_p (info->altivec_save_offset))))
27350 {
27351 int i;
27352 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
27353
27354 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
27355 if (use_backchain_to_restore_sp)
27356 {
27357 int frame_regno = 11;
27358
27359 if ((strategy & REST_INLINE_VRS) == 0)
27360 {
27361 /* Of r11 and r12, select the one not clobbered by an
27362 out-of-line restore function for the frame register. */
27363 frame_regno = 11 + 12 - scratch_regno;
27364 }
27365 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
27366 emit_move_insn (frame_reg_rtx,
27367 gen_rtx_MEM (Pmode, sp_reg_rtx));
27368 frame_off = 0;
27369 }
27370 else if (frame_pointer_needed)
27371 frame_reg_rtx = hard_frame_pointer_rtx;
27372
27373 if ((strategy & REST_INLINE_VRS) == 0)
27374 {
27375 int end_save = info->altivec_save_offset + info->altivec_size;
27376 int ptr_off;
27377 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
27378 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
27379
27380 if (end_save + frame_off != 0)
27381 {
27382 rtx offset = GEN_INT (end_save + frame_off);
27383
27384 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
27385 }
27386 else
27387 emit_move_insn (ptr_reg, frame_reg_rtx);
27388
27389 ptr_off = -end_save;
27390 insn = rs6000_emit_savres_rtx (info, scratch_reg,
27391 info->altivec_save_offset + ptr_off,
27392 0, V4SImode, SAVRES_VR);
27393 }
27394 else
27395 {
27396 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
27397 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
27398 {
27399 rtx addr, areg, mem, insn;
27400 rtx reg = gen_rtx_REG (V4SImode, i);
27401 HOST_WIDE_INT offset
27402 = (info->altivec_save_offset + frame_off
27403 + 16 * (i - info->first_altivec_reg_save));
27404
27405 if (TARGET_P9_VECTOR && quad_address_offset_p (offset))
27406 {
27407 mem = gen_frame_mem (V4SImode,
27408 gen_rtx_PLUS (Pmode, frame_reg_rtx,
27409 GEN_INT (offset)));
27410 insn = gen_rtx_SET (reg, mem);
27411 }
27412 else
27413 {
27414 areg = gen_rtx_REG (Pmode, 0);
27415 emit_move_insn (areg, GEN_INT (offset));
27416
27417 /* AltiVec addressing mode is [reg+reg]. */
27418 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
27419 mem = gen_frame_mem (V4SImode, addr);
27420
27421 /* Rather than emitting a generic move, force use of the
27422 lvx instruction, which we always want. In particular we
27423 don't want lxvd2x/xxpermdi for little endian. */
27424 insn = gen_altivec_lvx_v4si_internal (reg, mem);
27425 }
27426
27427 (void) emit_insn (insn);
27428 }
27429 }
27430
27431 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
27432 if (((strategy & REST_INLINE_VRS) == 0
27433 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
27434 && (flag_shrink_wrap
27435 || (offset_below_red_zone_p
27436 (info->altivec_save_offset
27437 + 16 * (i - info->first_altivec_reg_save))))
27438 && save_reg_p (i))
27439 {
27440 rtx reg = gen_rtx_REG (V4SImode, i);
27441 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
27442 }
27443 }
27444
27445 /* Restore VRSAVE if we must do so before adjusting the stack. */
27446 if (info->vrsave_size != 0
27447 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
27448 || (DEFAULT_ABI != ABI_V4
27449 && offset_below_red_zone_p (info->vrsave_save_offset))))
27450 {
27451 rtx reg;
27452
27453 if (frame_reg_rtx == sp_reg_rtx)
27454 {
27455 if (use_backchain_to_restore_sp)
27456 {
27457 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
27458 emit_move_insn (frame_reg_rtx,
27459 gen_rtx_MEM (Pmode, sp_reg_rtx));
27460 frame_off = 0;
27461 }
27462 else if (frame_pointer_needed)
27463 frame_reg_rtx = hard_frame_pointer_rtx;
27464 }
27465
27466 reg = gen_rtx_REG (SImode, 12);
27467 emit_insn (gen_frame_load (reg, frame_reg_rtx,
27468 info->vrsave_save_offset + frame_off));
27469
27470 emit_insn (generate_set_vrsave (reg, info, 1));
27471 }
27472
27473 insn = NULL_RTX;
27474 /* If we have a large stack frame, restore the old stack pointer
27475 using the backchain. */
27476 if (use_backchain_to_restore_sp)
27477 {
27478 if (frame_reg_rtx == sp_reg_rtx)
27479 {
27480 /* Under V.4, don't reset the stack pointer until after we're done
27481 loading the saved registers. */
27482 if (DEFAULT_ABI == ABI_V4)
27483 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
27484
27485 insn = emit_move_insn (frame_reg_rtx,
27486 gen_rtx_MEM (Pmode, sp_reg_rtx));
27487 frame_off = 0;
27488 }
27489 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
27490 && DEFAULT_ABI == ABI_V4)
27491 /* frame_reg_rtx has been set up by the altivec restore. */
27492 ;
27493 else
27494 {
27495 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
27496 frame_reg_rtx = sp_reg_rtx;
27497 }
27498 }
27499 /* If we have a frame pointer, we can restore the old stack pointer
27500 from it. */
27501 else if (frame_pointer_needed)
27502 {
27503 frame_reg_rtx = sp_reg_rtx;
27504 if (DEFAULT_ABI == ABI_V4)
27505 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
27506 /* Prevent reordering memory accesses against stack pointer restore. */
27507 else if (cfun->calls_alloca
27508 || offset_below_red_zone_p (-info->total_size))
27509 rs6000_emit_stack_tie (frame_reg_rtx, true);
27510
27511 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
27512 GEN_INT (info->total_size)));
27513 frame_off = 0;
27514 }
27515 else if (info->push_p
27516 && DEFAULT_ABI != ABI_V4
27517 && !crtl->calls_eh_return)
27518 {
27519 /* Prevent reordering memory accesses against stack pointer restore. */
27520 if (cfun->calls_alloca
27521 || offset_below_red_zone_p (-info->total_size))
27522 rs6000_emit_stack_tie (frame_reg_rtx, false);
27523 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
27524 GEN_INT (info->total_size)));
27525 frame_off = 0;
27526 }
27527 if (insn && frame_reg_rtx == sp_reg_rtx)
27528 {
27529 if (cfa_restores)
27530 {
27531 REG_NOTES (insn) = cfa_restores;
27532 cfa_restores = NULL_RTX;
27533 }
27534 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
27535 RTX_FRAME_RELATED_P (insn) = 1;
27536 }
27537
27538 /* Restore AltiVec registers if we have not done so already. */
27539 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
27540 && info->altivec_size != 0
27541 && (DEFAULT_ABI == ABI_V4
27542 || !offset_below_red_zone_p (info->altivec_save_offset)))
27543 {
27544 int i;
27545
27546 if ((strategy & REST_INLINE_VRS) == 0)
27547 {
27548 int end_save = info->altivec_save_offset + info->altivec_size;
27549 int ptr_off;
27550 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
27551 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
27552 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
27553
27554 if (end_save + frame_off != 0)
27555 {
27556 rtx offset = GEN_INT (end_save + frame_off);
27557
27558 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
27559 }
27560 else
27561 emit_move_insn (ptr_reg, frame_reg_rtx);
27562
27563 ptr_off = -end_save;
27564 insn = rs6000_emit_savres_rtx (info, scratch_reg,
27565 info->altivec_save_offset + ptr_off,
27566 0, V4SImode, SAVRES_VR);
27567 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
27568 {
27569 /* Frame reg was clobbered by out-of-line save. Restore it
27570 from ptr_reg, and if we are calling out-of-line gpr or
27571 fpr restore set up the correct pointer and offset. */
27572 unsigned newptr_regno = 1;
27573 if (!restoring_GPRs_inline)
27574 {
27575 bool lr = info->gp_save_offset + info->gp_size == 0;
27576 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
27577 newptr_regno = ptr_regno_for_savres (sel);
27578 end_save = info->gp_save_offset + info->gp_size;
27579 }
27580 else if (!restoring_FPRs_inline)
27581 {
27582 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
27583 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
27584 newptr_regno = ptr_regno_for_savres (sel);
27585 end_save = info->fp_save_offset + info->fp_size;
27586 }
27587
27588 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
27589 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
27590
27591 if (end_save + ptr_off != 0)
27592 {
27593 rtx offset = GEN_INT (end_save + ptr_off);
27594
27595 frame_off = -end_save;
27596 if (TARGET_32BIT)
27597 emit_insn (gen_addsi3_carry (frame_reg_rtx,
27598 ptr_reg, offset));
27599 else
27600 emit_insn (gen_adddi3_carry (frame_reg_rtx,
27601 ptr_reg, offset));
27602 }
27603 else
27604 {
27605 frame_off = ptr_off;
27606 emit_move_insn (frame_reg_rtx, ptr_reg);
27607 }
27608 }
27609 }
27610 else
27611 {
27612 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
27613 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
27614 {
27615 rtx addr, areg, mem, insn;
27616 rtx reg = gen_rtx_REG (V4SImode, i);
27617 HOST_WIDE_INT offset
27618 = (info->altivec_save_offset + frame_off
27619 + 16 * (i - info->first_altivec_reg_save));
27620
27621 if (TARGET_P9_VECTOR && quad_address_offset_p (offset))
27622 {
27623 mem = gen_frame_mem (V4SImode,
27624 gen_rtx_PLUS (Pmode, frame_reg_rtx,
27625 GEN_INT (offset)));
27626 insn = gen_rtx_SET (reg, mem);
27627 }
27628 else
27629 {
27630 areg = gen_rtx_REG (Pmode, 0);
27631 emit_move_insn (areg, GEN_INT (offset));
27632
27633 /* AltiVec addressing mode is [reg+reg]. */
27634 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
27635 mem = gen_frame_mem (V4SImode, addr);
27636
27637 /* Rather than emitting a generic move, force use of the
27638 lvx instruction, which we always want. In particular we
27639 don't want lxvd2x/xxpermdi for little endian. */
27640 insn = gen_altivec_lvx_v4si_internal (reg, mem);
27641 }
27642
27643 (void) emit_insn (insn);
27644 }
27645 }
27646
27647 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
27648 if (((strategy & REST_INLINE_VRS) == 0
27649 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
27650 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
27651 && save_reg_p (i))
27652 {
27653 rtx reg = gen_rtx_REG (V4SImode, i);
27654 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
27655 }
27656 }
27657
27658 /* Restore VRSAVE if we have not done so already. */
27659 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
27660 && info->vrsave_size != 0
27661 && (DEFAULT_ABI == ABI_V4
27662 || !offset_below_red_zone_p (info->vrsave_save_offset)))
27663 {
27664 rtx reg;
27665
27666 reg = gen_rtx_REG (SImode, 12);
27667 emit_insn (gen_frame_load (reg, frame_reg_rtx,
27668 info->vrsave_save_offset + frame_off));
27669
27670 emit_insn (generate_set_vrsave (reg, info, 1));
27671 }
27672
27673 /* If we exit by an out-of-line restore function on ABI_V4 then that
27674 function will deallocate the stack, so we don't need to worry
27675 about the unwinder restoring cr from an invalid stack frame
27676 location. */
27677 exit_func = (!restoring_FPRs_inline
27678 || (!restoring_GPRs_inline
27679 && info->first_fp_reg_save == 64));
27680
27681 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
27682 *separate* slots if the routine calls __builtin_eh_return, so
27683 that they can be independently restored by the unwinder. */
27684 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
27685 {
27686 int i, cr_off = info->ehcr_offset;
27687
27688 for (i = 0; i < 8; i++)
27689 if (!call_used_regs[CR0_REGNO + i])
27690 {
27691 rtx reg = gen_rtx_REG (SImode, 0);
27692 emit_insn (gen_frame_load (reg, frame_reg_rtx,
27693 cr_off + frame_off));
27694
27695 insn = emit_insn (gen_movsi_to_cr_one
27696 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
27697
27698 if (!exit_func && flag_shrink_wrap)
27699 {
27700 add_reg_note (insn, REG_CFA_RESTORE,
27701 gen_rtx_REG (SImode, CR0_REGNO + i));
27702
27703 RTX_FRAME_RELATED_P (insn) = 1;
27704 }
27705
27706 cr_off += reg_size;
27707 }
27708 }
27709
27710 /* Get the old lr if we saved it. If we are restoring registers
27711 out-of-line, then the out-of-line routines can do this for us. */
27712 if (restore_lr && restoring_GPRs_inline)
27713 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
27714
27715 /* Get the old cr if we saved it. */
27716 if (info->cr_save_p)
27717 {
27718 unsigned cr_save_regno = 12;
27719
27720 if (!restoring_GPRs_inline)
27721 {
27722 /* Ensure we don't use the register used by the out-of-line
27723 gpr register restore below. */
27724 bool lr = info->gp_save_offset + info->gp_size == 0;
27725 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
27726 int gpr_ptr_regno = ptr_regno_for_savres (sel);
27727
27728 if (gpr_ptr_regno == 12)
27729 cr_save_regno = 11;
27730 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
27731 }
27732 else if (REGNO (frame_reg_rtx) == 12)
27733 cr_save_regno = 11;
27734
27735 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
27736 info->cr_save_offset + frame_off,
27737 exit_func);
27738 }
27739
27740 /* Set LR here to try to overlap restores below. */
27741 if (restore_lr && restoring_GPRs_inline)
27742 restore_saved_lr (0, exit_func);
27743
27744 /* Load exception handler data registers, if needed. */
27745 if (crtl->calls_eh_return)
27746 {
27747 unsigned int i, regno;
27748
27749 if (TARGET_AIX)
27750 {
27751 rtx reg = gen_rtx_REG (reg_mode, 2);
27752 emit_insn (gen_frame_load (reg, frame_reg_rtx,
27753 frame_off + RS6000_TOC_SAVE_SLOT));
27754 }
27755
27756 for (i = 0; ; ++i)
27757 {
27758 rtx mem;
27759
27760 regno = EH_RETURN_DATA_REGNO (i);
27761 if (regno == INVALID_REGNUM)
27762 break;
27763
27764 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
27765 info->ehrd_offset + frame_off
27766 + reg_size * (int) i);
27767
27768 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
27769 }
27770 }
27771
27772 /* Restore GPRs. This is done as a PARALLEL if we are using
27773 the load-multiple instructions. */
27774 if (!restoring_GPRs_inline)
27775 {
27776 /* We are jumping to an out-of-line function. */
27777 rtx ptr_reg;
27778 int end_save = info->gp_save_offset + info->gp_size;
27779 bool can_use_exit = end_save == 0;
27780 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
27781 int ptr_off;
27782
27783 /* Emit stack reset code if we need it. */
27784 ptr_regno = ptr_regno_for_savres (sel);
27785 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
27786 if (can_use_exit)
27787 rs6000_emit_stack_reset (frame_reg_rtx, frame_off, ptr_regno);
27788 else if (end_save + frame_off != 0)
27789 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
27790 GEN_INT (end_save + frame_off)));
27791 else if (REGNO (frame_reg_rtx) != ptr_regno)
27792 emit_move_insn (ptr_reg, frame_reg_rtx);
27793 if (REGNO (frame_reg_rtx) == ptr_regno)
27794 frame_off = -end_save;
27795
27796 if (can_use_exit && info->cr_save_p)
27797 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
27798
27799 ptr_off = -end_save;
27800 rs6000_emit_savres_rtx (info, ptr_reg,
27801 info->gp_save_offset + ptr_off,
27802 info->lr_save_offset + ptr_off,
27803 reg_mode, sel);
27804 }
27805 else if (using_load_multiple)
27806 {
27807 rtvec p;
27808 p = rtvec_alloc (32 - info->first_gp_reg_save);
27809 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
27810 RTVEC_ELT (p, i)
27811 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
27812 frame_reg_rtx,
27813 info->gp_save_offset + frame_off + reg_size * i);
27814 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27815 }
27816 else
27817 {
27818 int offset = info->gp_save_offset + frame_off;
27819 for (i = info->first_gp_reg_save; i < 32; i++)
27820 {
27821 if (save_reg_p (i)
27822 && !cfun->machine->gpr_is_wrapped_separately[i])
27823 {
27824 rtx reg = gen_rtx_REG (reg_mode, i);
27825 emit_insn (gen_frame_load (reg, frame_reg_rtx, offset));
27826 }
27827
27828 offset += reg_size;
27829 }
27830 }
27831
27832 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
27833 {
27834 /* If the frame pointer was used then we can't delay emitting
27835 a REG_CFA_DEF_CFA note. This must happen on the insn that
27836 restores the frame pointer, r31. We may have already emitted
27837 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
27838 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
27839 be harmless if emitted. */
27840 if (frame_pointer_needed)
27841 {
27842 insn = get_last_insn ();
27843 add_reg_note (insn, REG_CFA_DEF_CFA,
27844 plus_constant (Pmode, frame_reg_rtx, frame_off));
27845 RTX_FRAME_RELATED_P (insn) = 1;
27846 }
27847
27848 /* Set up cfa_restores. We always need these when
27849 shrink-wrapping. If not shrink-wrapping then we only need
27850 the cfa_restore when the stack location is no longer valid.
27851 The cfa_restores must be emitted on or before the insn that
27852 invalidates the stack, and of course must not be emitted
27853 before the insn that actually does the restore. The latter
27854 is why it is a bad idea to emit the cfa_restores as a group
27855 on the last instruction here that actually does a restore:
27856 That insn may be reordered with respect to others doing
27857 restores. */
27858 if (flag_shrink_wrap
27859 && !restoring_GPRs_inline
27860 && info->first_fp_reg_save == 64)
27861 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
27862
27863 for (i = info->first_gp_reg_save; i < 32; i++)
27864 if (save_reg_p (i)
27865 && !cfun->machine->gpr_is_wrapped_separately[i])
27866 {
27867 rtx reg = gen_rtx_REG (reg_mode, i);
27868 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
27869 }
27870 }
27871
27872 if (!restoring_GPRs_inline
27873 && info->first_fp_reg_save == 64)
27874 {
27875 /* We are jumping to an out-of-line function. */
27876 if (cfa_restores)
27877 emit_cfa_restores (cfa_restores);
27878 return;
27879 }
27880
27881 if (restore_lr && !restoring_GPRs_inline)
27882 {
27883 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
27884 restore_saved_lr (0, exit_func);
27885 }
27886
27887 /* Restore fpr's if we need to do it without calling a function. */
27888 if (restoring_FPRs_inline)
27889 {
27890 int offset = info->fp_save_offset + frame_off;
27891 for (i = info->first_fp_reg_save; i < 64; i++)
27892 {
27893 if (save_reg_p (i)
27894 && !cfun->machine->fpr_is_wrapped_separately[i - 32])
27895 {
27896 rtx reg = gen_rtx_REG (fp_reg_mode, i);
27897 emit_insn (gen_frame_load (reg, frame_reg_rtx, offset));
27898 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
27899 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
27900 cfa_restores);
27901 }
27902
27903 offset += fp_reg_size;
27904 }
27905 }
27906
27907 /* If we saved cr, restore it here. Just those that were used. */
27908 if (info->cr_save_p)
27909 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
27910
27911 /* If this is V.4, unwind the stack pointer after all of the loads
27912 have been done, or set up r11 if we are restoring fp out of line. */
27913 ptr_regno = 1;
27914 if (!restoring_FPRs_inline)
27915 {
27916 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
27917 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
27918 ptr_regno = ptr_regno_for_savres (sel);
27919 }
27920
27921 insn = rs6000_emit_stack_reset (frame_reg_rtx, frame_off, ptr_regno);
27922 if (REGNO (frame_reg_rtx) == ptr_regno)
27923 frame_off = 0;
27924
27925 if (insn && restoring_FPRs_inline)
27926 {
27927 if (cfa_restores)
27928 {
27929 REG_NOTES (insn) = cfa_restores;
27930 cfa_restores = NULL_RTX;
27931 }
27932 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
27933 RTX_FRAME_RELATED_P (insn) = 1;
27934 }
27935
27936 if (crtl->calls_eh_return)
27937 {
27938 rtx sa = EH_RETURN_STACKADJ_RTX;
27939 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
27940 }
27941
27942 if (!sibcall && restoring_FPRs_inline)
27943 {
27944 if (cfa_restores)
27945 {
27946 /* We can't hang the cfa_restores off a simple return,
27947 since the shrink-wrap code sometimes uses an existing
27948 return. This means there might be a path from
27949 pre-prologue code to this return, and dwarf2cfi code
27950 wants the eh_frame unwinder state to be the same on
27951 all paths to any point. So we need to emit the
27952 cfa_restores before the return. For -m64 we really
27953 don't need epilogue cfa_restores at all, except for
27954 this irritating dwarf2cfi with shrink-wrap
27955 requirement; The stack red-zone means eh_frame info
27956 from the prologue telling the unwinder to restore
27957 from the stack is perfectly good right to the end of
27958 the function. */
27959 emit_insn (gen_blockage ());
27960 emit_cfa_restores (cfa_restores);
27961 cfa_restores = NULL_RTX;
27962 }
27963
27964 emit_jump_insn (targetm.gen_simple_return ());
27965 }
27966
27967 if (!sibcall && !restoring_FPRs_inline)
27968 {
27969 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
27970 rtvec p = rtvec_alloc (3 + !!lr + 64 - info->first_fp_reg_save);
27971 int elt = 0;
27972 RTVEC_ELT (p, elt++) = ret_rtx;
27973 if (lr)
27974 RTVEC_ELT (p, elt++)
27975 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
27976
27977 /* We have to restore more than two FP registers, so branch to the
27978 restore function. It will return to our caller. */
27979 int i;
27980 int reg;
27981 rtx sym;
27982
27983 if (flag_shrink_wrap)
27984 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
27985
27986 sym = rs6000_savres_routine_sym (info, SAVRES_FPR | (lr ? SAVRES_LR : 0));
27987 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, sym);
27988 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
27989 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
27990
27991 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
27992 {
27993 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
27994
27995 RTVEC_ELT (p, elt++)
27996 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
27997 if (flag_shrink_wrap
27998 && save_reg_p (info->first_fp_reg_save + i))
27999 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28000 }
28001
28002 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
28003 }
28004
28005 if (cfa_restores)
28006 {
28007 if (sibcall)
28008 /* Ensure the cfa_restores are hung off an insn that won't
28009 be reordered above other restores. */
28010 emit_insn (gen_blockage ());
28011
28012 emit_cfa_restores (cfa_restores);
28013 }
28014 }
28015
28016 /* Write function epilogue. */
28017
28018 static void
28019 rs6000_output_function_epilogue (FILE *file)
28020 {
28021 #if TARGET_MACHO
28022 macho_branch_islands ();
28023
28024 {
28025 rtx_insn *insn = get_last_insn ();
28026 rtx_insn *deleted_debug_label = NULL;
28027
28028 /* Mach-O doesn't support labels at the end of objects, so if
28029 it looks like we might want one, take special action.
28030
28031 First, collect any sequence of deleted debug labels. */
28032 while (insn
28033 && NOTE_P (insn)
28034 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
28035 {
28036 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
28037 notes only, instead set their CODE_LABEL_NUMBER to -1,
28038 otherwise there would be code generation differences
28039 in between -g and -g0. */
28040 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
28041 deleted_debug_label = insn;
28042 insn = PREV_INSN (insn);
28043 }
28044
28045 /* Second, if we have:
28046 label:
28047 barrier
28048 then this needs to be detected, so skip past the barrier. */
28049
28050 if (insn && BARRIER_P (insn))
28051 insn = PREV_INSN (insn);
28052
28053 /* Up to now we've only seen notes or barriers. */
28054 if (insn)
28055 {
28056 if (LABEL_P (insn)
28057 || (NOTE_P (insn)
28058 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
28059 /* Trailing label: <barrier>. */
28060 fputs ("\tnop\n", file);
28061 else
28062 {
28063 /* Lastly, see if we have a completely empty function body. */
28064 while (insn && ! INSN_P (insn))
28065 insn = PREV_INSN (insn);
28066 /* If we don't find any insns, we've got an empty function body;
28067 I.e. completely empty - without a return or branch. This is
28068 taken as the case where a function body has been removed
28069 because it contains an inline __builtin_unreachable(). GCC
28070 states that reaching __builtin_unreachable() means UB so we're
28071 not obliged to do anything special; however, we want
28072 non-zero-sized function bodies. To meet this, and help the
28073 user out, let's trap the case. */
28074 if (insn == NULL)
28075 fputs ("\ttrap\n", file);
28076 }
28077 }
28078 else if (deleted_debug_label)
28079 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
28080 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
28081 CODE_LABEL_NUMBER (insn) = -1;
28082 }
28083 #endif
28084
28085 /* Output a traceback table here. See /usr/include/sys/debug.h for info
28086 on its format.
28087
28088 We don't output a traceback table if -finhibit-size-directive was
28089 used. The documentation for -finhibit-size-directive reads
28090 ``don't output a @code{.size} assembler directive, or anything
28091 else that would cause trouble if the function is split in the
28092 middle, and the two halves are placed at locations far apart in
28093 memory.'' The traceback table has this property, since it
28094 includes the offset from the start of the function to the
28095 traceback table itself.
28096
28097 System V.4 Powerpc's (and the embedded ABI derived from it) use a
28098 different traceback table. */
28099 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28100 && ! flag_inhibit_size_directive
28101 && rs6000_traceback != traceback_none && !cfun->is_thunk)
28102 {
28103 const char *fname = NULL;
28104 const char *language_string = lang_hooks.name;
28105 int fixed_parms = 0, float_parms = 0, parm_info = 0;
28106 int i;
28107 int optional_tbtab;
28108 rs6000_stack_t *info = rs6000_stack_info ();
28109
28110 if (rs6000_traceback == traceback_full)
28111 optional_tbtab = 1;
28112 else if (rs6000_traceback == traceback_part)
28113 optional_tbtab = 0;
28114 else
28115 optional_tbtab = !optimize_size && !TARGET_ELF;
28116
28117 if (optional_tbtab)
28118 {
28119 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
28120 while (*fname == '.') /* V.4 encodes . in the name */
28121 fname++;
28122
28123 /* Need label immediately before tbtab, so we can compute
28124 its offset from the function start. */
28125 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
28126 ASM_OUTPUT_LABEL (file, fname);
28127 }
28128
28129 /* The .tbtab pseudo-op can only be used for the first eight
28130 expressions, since it can't handle the possibly variable
28131 length fields that follow. However, if you omit the optional
28132 fields, the assembler outputs zeros for all optional fields
28133 anyways, giving each variable length field is minimum length
28134 (as defined in sys/debug.h). Thus we can not use the .tbtab
28135 pseudo-op at all. */
28136
28137 /* An all-zero word flags the start of the tbtab, for debuggers
28138 that have to find it by searching forward from the entry
28139 point or from the current pc. */
28140 fputs ("\t.long 0\n", file);
28141
28142 /* Tbtab format type. Use format type 0. */
28143 fputs ("\t.byte 0,", file);
28144
28145 /* Language type. Unfortunately, there does not seem to be any
28146 official way to discover the language being compiled, so we
28147 use language_string.
28148 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
28149 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
28150 a number, so for now use 9. LTO, Go and JIT aren't assigned numbers
28151 either, so for now use 0. */
28152 if (lang_GNU_C ()
28153 || ! strcmp (language_string, "GNU GIMPLE")
28154 || ! strcmp (language_string, "GNU Go")
28155 || ! strcmp (language_string, "libgccjit"))
28156 i = 0;
28157 else if (! strcmp (language_string, "GNU F77")
28158 || lang_GNU_Fortran ())
28159 i = 1;
28160 else if (! strcmp (language_string, "GNU Pascal"))
28161 i = 2;
28162 else if (! strcmp (language_string, "GNU Ada"))
28163 i = 3;
28164 else if (lang_GNU_CXX ()
28165 || ! strcmp (language_string, "GNU Objective-C++"))
28166 i = 9;
28167 else if (! strcmp (language_string, "GNU Java"))
28168 i = 13;
28169 else if (! strcmp (language_string, "GNU Objective-C"))
28170 i = 14;
28171 else
28172 gcc_unreachable ();
28173 fprintf (file, "%d,", i);
28174
28175 /* 8 single bit fields: global linkage (not set for C extern linkage,
28176 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
28177 from start of procedure stored in tbtab, internal function, function
28178 has controlled storage, function has no toc, function uses fp,
28179 function logs/aborts fp operations. */
28180 /* Assume that fp operations are used if any fp reg must be saved. */
28181 fprintf (file, "%d,",
28182 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
28183
28184 /* 6 bitfields: function is interrupt handler, name present in
28185 proc table, function calls alloca, on condition directives
28186 (controls stack walks, 3 bits), saves condition reg, saves
28187 link reg. */
28188 /* The `function calls alloca' bit seems to be set whenever reg 31 is
28189 set up as a frame pointer, even when there is no alloca call. */
28190 fprintf (file, "%d,",
28191 ((optional_tbtab << 6)
28192 | ((optional_tbtab & frame_pointer_needed) << 5)
28193 | (info->cr_save_p << 1)
28194 | (info->lr_save_p)));
28195
28196 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
28197 (6 bits). */
28198 fprintf (file, "%d,",
28199 (info->push_p << 7) | (64 - info->first_fp_reg_save));
28200
28201 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
28202 fprintf (file, "%d,", (32 - first_reg_to_save ()));
28203
28204 if (optional_tbtab)
28205 {
28206 /* Compute the parameter info from the function decl argument
28207 list. */
28208 tree decl;
28209 int next_parm_info_bit = 31;
28210
28211 for (decl = DECL_ARGUMENTS (current_function_decl);
28212 decl; decl = DECL_CHAIN (decl))
28213 {
28214 rtx parameter = DECL_INCOMING_RTL (decl);
28215 machine_mode mode = GET_MODE (parameter);
28216
28217 if (GET_CODE (parameter) == REG)
28218 {
28219 if (SCALAR_FLOAT_MODE_P (mode))
28220 {
28221 int bits;
28222
28223 float_parms++;
28224
28225 switch (mode)
28226 {
28227 case E_SFmode:
28228 case E_SDmode:
28229 bits = 0x2;
28230 break;
28231
28232 case E_DFmode:
28233 case E_DDmode:
28234 case E_TFmode:
28235 case E_TDmode:
28236 case E_IFmode:
28237 case E_KFmode:
28238 bits = 0x3;
28239 break;
28240
28241 default:
28242 gcc_unreachable ();
28243 }
28244
28245 /* If only one bit will fit, don't or in this entry. */
28246 if (next_parm_info_bit > 0)
28247 parm_info |= (bits << (next_parm_info_bit - 1));
28248 next_parm_info_bit -= 2;
28249 }
28250 else
28251 {
28252 fixed_parms += ((GET_MODE_SIZE (mode)
28253 + (UNITS_PER_WORD - 1))
28254 / UNITS_PER_WORD);
28255 next_parm_info_bit -= 1;
28256 }
28257 }
28258 }
28259 }
28260
28261 /* Number of fixed point parameters. */
28262 /* This is actually the number of words of fixed point parameters; thus
28263 an 8 byte struct counts as 2; and thus the maximum value is 8. */
28264 fprintf (file, "%d,", fixed_parms);
28265
28266 /* 2 bitfields: number of floating point parameters (7 bits), parameters
28267 all on stack. */
28268 /* This is actually the number of fp registers that hold parameters;
28269 and thus the maximum value is 13. */
28270 /* Set parameters on stack bit if parameters are not in their original
28271 registers, regardless of whether they are on the stack? Xlc
28272 seems to set the bit when not optimizing. */
28273 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
28274
28275 if (optional_tbtab)
28276 {
28277 /* Optional fields follow. Some are variable length. */
28278
28279 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single
28280 float, 11 double float. */
28281 /* There is an entry for each parameter in a register, in the order
28282 that they occur in the parameter list. Any intervening arguments
28283 on the stack are ignored. If the list overflows a long (max
28284 possible length 34 bits) then completely leave off all elements
28285 that don't fit. */
28286 /* Only emit this long if there was at least one parameter. */
28287 if (fixed_parms || float_parms)
28288 fprintf (file, "\t.long %d\n", parm_info);
28289
28290 /* Offset from start of code to tb table. */
28291 fputs ("\t.long ", file);
28292 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
28293 RS6000_OUTPUT_BASENAME (file, fname);
28294 putc ('-', file);
28295 rs6000_output_function_entry (file, fname);
28296 putc ('\n', file);
28297
28298 /* Interrupt handler mask. */
28299 /* Omit this long, since we never set the interrupt handler bit
28300 above. */
28301
28302 /* Number of CTL (controlled storage) anchors. */
28303 /* Omit this long, since the has_ctl bit is never set above. */
28304
28305 /* Displacement into stack of each CTL anchor. */
28306 /* Omit this list of longs, because there are no CTL anchors. */
28307
28308 /* Length of function name. */
28309 if (*fname == '*')
28310 ++fname;
28311 fprintf (file, "\t.short %d\n", (int) strlen (fname));
28312
28313 /* Function name. */
28314 assemble_string (fname, strlen (fname));
28315
28316 /* Register for alloca automatic storage; this is always reg 31.
28317 Only emit this if the alloca bit was set above. */
28318 if (frame_pointer_needed)
28319 fputs ("\t.byte 31\n", file);
28320
28321 fputs ("\t.align 2\n", file);
28322 }
28323 }
28324
28325 /* Arrange to define .LCTOC1 label, if not already done. */
28326 if (need_toc_init)
28327 {
28328 need_toc_init = 0;
28329 if (!toc_initialized)
28330 {
28331 switch_to_section (toc_section);
28332 switch_to_section (current_function_section ());
28333 }
28334 }
28335 }
28336
28337 /* -fsplit-stack support. */
28338
28339 /* A SYMBOL_REF for __morestack. */
28340 static GTY(()) rtx morestack_ref;
28341
28342 static rtx
28343 gen_add3_const (rtx rt, rtx ra, long c)
28344 {
28345 if (TARGET_64BIT)
28346 return gen_adddi3 (rt, ra, GEN_INT (c));
28347 else
28348 return gen_addsi3 (rt, ra, GEN_INT (c));
28349 }
28350
28351 /* Emit -fsplit-stack prologue, which goes before the regular function
28352 prologue (at local entry point in the case of ELFv2). */
28353
28354 void
28355 rs6000_expand_split_stack_prologue (void)
28356 {
28357 rs6000_stack_t *info = rs6000_stack_info ();
28358 unsigned HOST_WIDE_INT allocate;
28359 long alloc_hi, alloc_lo;
28360 rtx r0, r1, r12, lr, ok_label, compare, jump, call_fusage;
28361 rtx_insn *insn;
28362
28363 gcc_assert (flag_split_stack && reload_completed);
28364
28365 if (!info->push_p)
28366 return;
28367
28368 if (global_regs[29])
28369 {
28370 error ("%qs uses register r29", "-fsplit-stack");
28371 inform (DECL_SOURCE_LOCATION (global_regs_decl[29]),
28372 "conflicts with %qD", global_regs_decl[29]);
28373 }
28374
28375 allocate = info->total_size;
28376 if (allocate > (unsigned HOST_WIDE_INT) 1 << 31)
28377 {
28378 sorry ("Stack frame larger than 2G is not supported for -fsplit-stack");
28379 return;
28380 }
28381 if (morestack_ref == NULL_RTX)
28382 {
28383 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
28384 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
28385 | SYMBOL_FLAG_FUNCTION);
28386 }
28387
28388 r0 = gen_rtx_REG (Pmode, 0);
28389 r1 = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
28390 r12 = gen_rtx_REG (Pmode, 12);
28391 emit_insn (gen_load_split_stack_limit (r0));
28392 /* Always emit two insns here to calculate the requested stack,
28393 so that the linker can edit them when adjusting size for calling
28394 non-split-stack code. */
28395 alloc_hi = (-allocate + 0x8000) & ~0xffffL;
28396 alloc_lo = -allocate - alloc_hi;
28397 if (alloc_hi != 0)
28398 {
28399 emit_insn (gen_add3_const (r12, r1, alloc_hi));
28400 if (alloc_lo != 0)
28401 emit_insn (gen_add3_const (r12, r12, alloc_lo));
28402 else
28403 emit_insn (gen_nop ());
28404 }
28405 else
28406 {
28407 emit_insn (gen_add3_const (r12, r1, alloc_lo));
28408 emit_insn (gen_nop ());
28409 }
28410
28411 compare = gen_rtx_REG (CCUNSmode, CR7_REGNO);
28412 emit_insn (gen_rtx_SET (compare, gen_rtx_COMPARE (CCUNSmode, r12, r0)));
28413 ok_label = gen_label_rtx ();
28414 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
28415 gen_rtx_GEU (VOIDmode, compare, const0_rtx),
28416 gen_rtx_LABEL_REF (VOIDmode, ok_label),
28417 pc_rtx);
28418 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
28419 JUMP_LABEL (insn) = ok_label;
28420 /* Mark the jump as very likely to be taken. */
28421 add_reg_br_prob_note (insn, profile_probability::very_likely ());
28422
28423 lr = gen_rtx_REG (Pmode, LR_REGNO);
28424 insn = emit_move_insn (r0, lr);
28425 RTX_FRAME_RELATED_P (insn) = 1;
28426 insn = emit_insn (gen_frame_store (r0, r1, info->lr_save_offset));
28427 RTX_FRAME_RELATED_P (insn) = 1;
28428
28429 insn = emit_call_insn (gen_call (gen_rtx_MEM (SImode, morestack_ref),
28430 const0_rtx, const0_rtx));
28431 call_fusage = NULL_RTX;
28432 use_reg (&call_fusage, r12);
28433 /* Say the call uses r0, even though it doesn't, to stop regrename
28434 from twiddling with the insns saving lr, trashing args for cfun.
28435 The insns restoring lr are similarly protected by making
28436 split_stack_return use r0. */
28437 use_reg (&call_fusage, r0);
28438 add_function_usage_to (insn, call_fusage);
28439 /* Indicate that this function can't jump to non-local gotos. */
28440 make_reg_eh_region_note_nothrow_nononlocal (insn);
28441 emit_insn (gen_frame_load (r0, r1, info->lr_save_offset));
28442 insn = emit_move_insn (lr, r0);
28443 add_reg_note (insn, REG_CFA_RESTORE, lr);
28444 RTX_FRAME_RELATED_P (insn) = 1;
28445 emit_insn (gen_split_stack_return ());
28446
28447 emit_label (ok_label);
28448 LABEL_NUSES (ok_label) = 1;
28449 }
28450
28451 /* Return the internal arg pointer used for function incoming
28452 arguments. When -fsplit-stack, the arg pointer is r12 so we need
28453 to copy it to a pseudo in order for it to be preserved over calls
28454 and suchlike. We'd really like to use a pseudo here for the
28455 internal arg pointer but data-flow analysis is not prepared to
28456 accept pseudos as live at the beginning of a function. */
28457
28458 static rtx
28459 rs6000_internal_arg_pointer (void)
28460 {
28461 if (flag_split_stack
28462 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
28463 == NULL))
28464
28465 {
28466 if (cfun->machine->split_stack_arg_pointer == NULL_RTX)
28467 {
28468 rtx pat;
28469
28470 cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode);
28471 REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1;
28472
28473 /* Put the pseudo initialization right after the note at the
28474 beginning of the function. */
28475 pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer,
28476 gen_rtx_REG (Pmode, 12));
28477 push_topmost_sequence ();
28478 emit_insn_after (pat, get_insns ());
28479 pop_topmost_sequence ();
28480 }
28481 rtx ret = plus_constant (Pmode, cfun->machine->split_stack_arg_pointer,
28482 FIRST_PARM_OFFSET (current_function_decl));
28483 return copy_to_reg (ret);
28484 }
28485 return virtual_incoming_args_rtx;
28486 }
28487
28488 /* We may have to tell the dataflow pass that the split stack prologue
28489 is initializing a register. */
28490
28491 static void
28492 rs6000_live_on_entry (bitmap regs)
28493 {
28494 if (flag_split_stack)
28495 bitmap_set_bit (regs, 12);
28496 }
28497
28498 /* Emit -fsplit-stack dynamic stack allocation space check. */
28499
28500 void
28501 rs6000_split_stack_space_check (rtx size, rtx label)
28502 {
28503 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
28504 rtx limit = gen_reg_rtx (Pmode);
28505 rtx requested = gen_reg_rtx (Pmode);
28506 rtx cmp = gen_reg_rtx (CCUNSmode);
28507 rtx jump;
28508
28509 emit_insn (gen_load_split_stack_limit (limit));
28510 if (CONST_INT_P (size))
28511 emit_insn (gen_add3_insn (requested, sp, GEN_INT (-INTVAL (size))));
28512 else
28513 {
28514 size = force_reg (Pmode, size);
28515 emit_move_insn (requested, gen_rtx_MINUS (Pmode, sp, size));
28516 }
28517 emit_insn (gen_rtx_SET (cmp, gen_rtx_COMPARE (CCUNSmode, requested, limit)));
28518 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
28519 gen_rtx_GEU (VOIDmode, cmp, const0_rtx),
28520 gen_rtx_LABEL_REF (VOIDmode, label),
28521 pc_rtx);
28522 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
28523 JUMP_LABEL (jump) = label;
28524 }
28525 \f
28526 /* A C compound statement that outputs the assembler code for a thunk
28527 function, used to implement C++ virtual function calls with
28528 multiple inheritance. The thunk acts as a wrapper around a virtual
28529 function, adjusting the implicit object parameter before handing
28530 control off to the real function.
28531
28532 First, emit code to add the integer DELTA to the location that
28533 contains the incoming first argument. Assume that this argument
28534 contains a pointer, and is the one used to pass the `this' pointer
28535 in C++. This is the incoming argument *before* the function
28536 prologue, e.g. `%o0' on a sparc. The addition must preserve the
28537 values of all other incoming arguments.
28538
28539 After the addition, emit code to jump to FUNCTION, which is a
28540 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
28541 not touch the return address. Hence returning from FUNCTION will
28542 return to whoever called the current `thunk'.
28543
28544 The effect must be as if FUNCTION had been called directly with the
28545 adjusted first argument. This macro is responsible for emitting
28546 all of the code for a thunk function; output_function_prologue()
28547 and output_function_epilogue() are not invoked.
28548
28549 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
28550 been extracted from it.) It might possibly be useful on some
28551 targets, but probably not.
28552
28553 If you do not define this macro, the target-independent code in the
28554 C++ frontend will generate a less efficient heavyweight thunk that
28555 calls FUNCTION instead of jumping to it. The generic approach does
28556 not support varargs. */
28557
28558 static void
28559 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
28560 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
28561 tree function)
28562 {
28563 rtx this_rtx, funexp;
28564 rtx_insn *insn;
28565
28566 reload_completed = 1;
28567 epilogue_completed = 1;
28568
28569 /* Mark the end of the (empty) prologue. */
28570 emit_note (NOTE_INSN_PROLOGUE_END);
28571
28572 /* Find the "this" pointer. If the function returns a structure,
28573 the structure return pointer is in r3. */
28574 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
28575 this_rtx = gen_rtx_REG (Pmode, 4);
28576 else
28577 this_rtx = gen_rtx_REG (Pmode, 3);
28578
28579 /* Apply the constant offset, if required. */
28580 if (delta)
28581 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
28582
28583 /* Apply the offset from the vtable, if required. */
28584 if (vcall_offset)
28585 {
28586 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
28587 rtx tmp = gen_rtx_REG (Pmode, 12);
28588
28589 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
28590 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
28591 {
28592 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
28593 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
28594 }
28595 else
28596 {
28597 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
28598
28599 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
28600 }
28601 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
28602 }
28603
28604 /* Generate a tail call to the target function. */
28605 if (!TREE_USED (function))
28606 {
28607 assemble_external (function);
28608 TREE_USED (function) = 1;
28609 }
28610 funexp = XEXP (DECL_RTL (function), 0);
28611 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
28612
28613 #if TARGET_MACHO
28614 if (MACHOPIC_INDIRECT)
28615 funexp = machopic_indirect_call_target (funexp);
28616 #endif
28617
28618 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
28619 generate sibcall RTL explicitly. */
28620 insn = emit_call_insn (
28621 gen_rtx_PARALLEL (VOIDmode,
28622 gen_rtvec (3,
28623 gen_rtx_CALL (VOIDmode,
28624 funexp, const0_rtx),
28625 gen_rtx_USE (VOIDmode, const0_rtx),
28626 simple_return_rtx)));
28627 SIBLING_CALL_P (insn) = 1;
28628 emit_barrier ();
28629
28630 /* Run just enough of rest_of_compilation to get the insns emitted.
28631 There's not really enough bulk here to make other passes such as
28632 instruction scheduling worth while. Note that use_thunk calls
28633 assemble_start_function and assemble_end_function. */
28634 insn = get_insns ();
28635 shorten_branches (insn);
28636 final_start_function (insn, file, 1);
28637 final (insn, file, 1);
28638 final_end_function ();
28639
28640 reload_completed = 0;
28641 epilogue_completed = 0;
28642 }
28643 \f
28644 /* A quick summary of the various types of 'constant-pool tables'
28645 under PowerPC:
28646
28647 Target Flags Name One table per
28648 AIX (none) AIX TOC object file
28649 AIX -mfull-toc AIX TOC object file
28650 AIX -mminimal-toc AIX minimal TOC translation unit
28651 SVR4/EABI (none) SVR4 SDATA object file
28652 SVR4/EABI -fpic SVR4 pic object file
28653 SVR4/EABI -fPIC SVR4 PIC translation unit
28654 SVR4/EABI -mrelocatable EABI TOC function
28655 SVR4/EABI -maix AIX TOC object file
28656 SVR4/EABI -maix -mminimal-toc
28657 AIX minimal TOC translation unit
28658
28659 Name Reg. Set by entries contains:
28660 made by addrs? fp? sum?
28661
28662 AIX TOC 2 crt0 as Y option option
28663 AIX minimal TOC 30 prolog gcc Y Y option
28664 SVR4 SDATA 13 crt0 gcc N Y N
28665 SVR4 pic 30 prolog ld Y not yet N
28666 SVR4 PIC 30 prolog gcc Y option option
28667 EABI TOC 30 prolog gcc Y option option
28668
28669 */
28670
28671 /* Hash functions for the hash table. */
28672
28673 static unsigned
28674 rs6000_hash_constant (rtx k)
28675 {
28676 enum rtx_code code = GET_CODE (k);
28677 machine_mode mode = GET_MODE (k);
28678 unsigned result = (code << 3) ^ mode;
28679 const char *format;
28680 int flen, fidx;
28681
28682 format = GET_RTX_FORMAT (code);
28683 flen = strlen (format);
28684 fidx = 0;
28685
28686 switch (code)
28687 {
28688 case LABEL_REF:
28689 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
28690
28691 case CONST_WIDE_INT:
28692 {
28693 int i;
28694 flen = CONST_WIDE_INT_NUNITS (k);
28695 for (i = 0; i < flen; i++)
28696 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
28697 return result;
28698 }
28699
28700 case CONST_DOUBLE:
28701 if (mode != VOIDmode)
28702 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
28703 flen = 2;
28704 break;
28705
28706 case CODE_LABEL:
28707 fidx = 3;
28708 break;
28709
28710 default:
28711 break;
28712 }
28713
28714 for (; fidx < flen; fidx++)
28715 switch (format[fidx])
28716 {
28717 case 's':
28718 {
28719 unsigned i, len;
28720 const char *str = XSTR (k, fidx);
28721 len = strlen (str);
28722 result = result * 613 + len;
28723 for (i = 0; i < len; i++)
28724 result = result * 613 + (unsigned) str[i];
28725 break;
28726 }
28727 case 'u':
28728 case 'e':
28729 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
28730 break;
28731 case 'i':
28732 case 'n':
28733 result = result * 613 + (unsigned) XINT (k, fidx);
28734 break;
28735 case 'w':
28736 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
28737 result = result * 613 + (unsigned) XWINT (k, fidx);
28738 else
28739 {
28740 size_t i;
28741 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
28742 result = result * 613 + (unsigned) (XWINT (k, fidx)
28743 >> CHAR_BIT * i);
28744 }
28745 break;
28746 case '0':
28747 break;
28748 default:
28749 gcc_unreachable ();
28750 }
28751
28752 return result;
28753 }
28754
28755 hashval_t
28756 toc_hasher::hash (toc_hash_struct *thc)
28757 {
28758 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
28759 }
28760
28761 /* Compare H1 and H2 for equivalence. */
28762
28763 bool
28764 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
28765 {
28766 rtx r1 = h1->key;
28767 rtx r2 = h2->key;
28768
28769 if (h1->key_mode != h2->key_mode)
28770 return 0;
28771
28772 return rtx_equal_p (r1, r2);
28773 }
28774
28775 /* These are the names given by the C++ front-end to vtables, and
28776 vtable-like objects. Ideally, this logic should not be here;
28777 instead, there should be some programmatic way of inquiring as
28778 to whether or not an object is a vtable. */
28779
28780 #define VTABLE_NAME_P(NAME) \
28781 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
28782 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
28783 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
28784 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
28785 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
28786
28787 #ifdef NO_DOLLAR_IN_LABEL
28788 /* Return a GGC-allocated character string translating dollar signs in
28789 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
28790
28791 const char *
28792 rs6000_xcoff_strip_dollar (const char *name)
28793 {
28794 char *strip, *p;
28795 const char *q;
28796 size_t len;
28797
28798 q = (const char *) strchr (name, '$');
28799
28800 if (q == 0 || q == name)
28801 return name;
28802
28803 len = strlen (name);
28804 strip = XALLOCAVEC (char, len + 1);
28805 strcpy (strip, name);
28806 p = strip + (q - name);
28807 while (p)
28808 {
28809 *p = '_';
28810 p = strchr (p + 1, '$');
28811 }
28812
28813 return ggc_alloc_string (strip, len);
28814 }
28815 #endif
28816
28817 void
28818 rs6000_output_symbol_ref (FILE *file, rtx x)
28819 {
28820 const char *name = XSTR (x, 0);
28821
28822 /* Currently C++ toc references to vtables can be emitted before it
28823 is decided whether the vtable is public or private. If this is
28824 the case, then the linker will eventually complain that there is
28825 a reference to an unknown section. Thus, for vtables only,
28826 we emit the TOC reference to reference the identifier and not the
28827 symbol. */
28828 if (VTABLE_NAME_P (name))
28829 {
28830 RS6000_OUTPUT_BASENAME (file, name);
28831 }
28832 else
28833 assemble_name (file, name);
28834 }
28835
28836 /* Output a TOC entry. We derive the entry name from what is being
28837 written. */
28838
28839 void
28840 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
28841 {
28842 char buf[256];
28843 const char *name = buf;
28844 rtx base = x;
28845 HOST_WIDE_INT offset = 0;
28846
28847 gcc_assert (!TARGET_NO_TOC);
28848
28849 /* When the linker won't eliminate them, don't output duplicate
28850 TOC entries (this happens on AIX if there is any kind of TOC,
28851 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
28852 CODE_LABELs. */
28853 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
28854 {
28855 struct toc_hash_struct *h;
28856
28857 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
28858 time because GGC is not initialized at that point. */
28859 if (toc_hash_table == NULL)
28860 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
28861
28862 h = ggc_alloc<toc_hash_struct> ();
28863 h->key = x;
28864 h->key_mode = mode;
28865 h->labelno = labelno;
28866
28867 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
28868 if (*found == NULL)
28869 *found = h;
28870 else /* This is indeed a duplicate.
28871 Set this label equal to that label. */
28872 {
28873 fputs ("\t.set ", file);
28874 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
28875 fprintf (file, "%d,", labelno);
28876 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
28877 fprintf (file, "%d\n", ((*found)->labelno));
28878
28879 #ifdef HAVE_AS_TLS
28880 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
28881 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
28882 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
28883 {
28884 fputs ("\t.set ", file);
28885 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
28886 fprintf (file, "%d,", labelno);
28887 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
28888 fprintf (file, "%d\n", ((*found)->labelno));
28889 }
28890 #endif
28891 return;
28892 }
28893 }
28894
28895 /* If we're going to put a double constant in the TOC, make sure it's
28896 aligned properly when strict alignment is on. */
28897 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
28898 && STRICT_ALIGNMENT
28899 && GET_MODE_BITSIZE (mode) >= 64
28900 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
28901 ASM_OUTPUT_ALIGN (file, 3);
28902 }
28903
28904 (*targetm.asm_out.internal_label) (file, "LC", labelno);
28905
28906 /* Handle FP constants specially. Note that if we have a minimal
28907 TOC, things we put here aren't actually in the TOC, so we can allow
28908 FP constants. */
28909 if (GET_CODE (x) == CONST_DOUBLE &&
28910 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
28911 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
28912 {
28913 long k[4];
28914
28915 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
28916 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
28917 else
28918 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
28919
28920 if (TARGET_64BIT)
28921 {
28922 if (TARGET_ELF || TARGET_MINIMAL_TOC)
28923 fputs (DOUBLE_INT_ASM_OP, file);
28924 else
28925 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
28926 k[0] & 0xffffffff, k[1] & 0xffffffff,
28927 k[2] & 0xffffffff, k[3] & 0xffffffff);
28928 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
28929 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
28930 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
28931 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
28932 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
28933 return;
28934 }
28935 else
28936 {
28937 if (TARGET_ELF || TARGET_MINIMAL_TOC)
28938 fputs ("\t.long ", file);
28939 else
28940 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
28941 k[0] & 0xffffffff, k[1] & 0xffffffff,
28942 k[2] & 0xffffffff, k[3] & 0xffffffff);
28943 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
28944 k[0] & 0xffffffff, k[1] & 0xffffffff,
28945 k[2] & 0xffffffff, k[3] & 0xffffffff);
28946 return;
28947 }
28948 }
28949 else if (GET_CODE (x) == CONST_DOUBLE &&
28950 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
28951 {
28952 long k[2];
28953
28954 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
28955 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
28956 else
28957 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
28958
28959 if (TARGET_64BIT)
28960 {
28961 if (TARGET_ELF || TARGET_MINIMAL_TOC)
28962 fputs (DOUBLE_INT_ASM_OP, file);
28963 else
28964 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
28965 k[0] & 0xffffffff, k[1] & 0xffffffff);
28966 fprintf (file, "0x%lx%08lx\n",
28967 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
28968 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
28969 return;
28970 }
28971 else
28972 {
28973 if (TARGET_ELF || TARGET_MINIMAL_TOC)
28974 fputs ("\t.long ", file);
28975 else
28976 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
28977 k[0] & 0xffffffff, k[1] & 0xffffffff);
28978 fprintf (file, "0x%lx,0x%lx\n",
28979 k[0] & 0xffffffff, k[1] & 0xffffffff);
28980 return;
28981 }
28982 }
28983 else if (GET_CODE (x) == CONST_DOUBLE &&
28984 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
28985 {
28986 long l;
28987
28988 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
28989 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
28990 else
28991 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
28992
28993 if (TARGET_64BIT)
28994 {
28995 if (TARGET_ELF || TARGET_MINIMAL_TOC)
28996 fputs (DOUBLE_INT_ASM_OP, file);
28997 else
28998 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
28999 if (WORDS_BIG_ENDIAN)
29000 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
29001 else
29002 fprintf (file, "0x%lx\n", l & 0xffffffff);
29003 return;
29004 }
29005 else
29006 {
29007 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29008 fputs ("\t.long ", file);
29009 else
29010 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
29011 fprintf (file, "0x%lx\n", l & 0xffffffff);
29012 return;
29013 }
29014 }
29015 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
29016 {
29017 unsigned HOST_WIDE_INT low;
29018 HOST_WIDE_INT high;
29019
29020 low = INTVAL (x) & 0xffffffff;
29021 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
29022
29023 /* TOC entries are always Pmode-sized, so when big-endian
29024 smaller integer constants in the TOC need to be padded.
29025 (This is still a win over putting the constants in
29026 a separate constant pool, because then we'd have
29027 to have both a TOC entry _and_ the actual constant.)
29028
29029 For a 32-bit target, CONST_INT values are loaded and shifted
29030 entirely within `low' and can be stored in one TOC entry. */
29031
29032 /* It would be easy to make this work, but it doesn't now. */
29033 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
29034
29035 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
29036 {
29037 low |= high << 32;
29038 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
29039 high = (HOST_WIDE_INT) low >> 32;
29040 low &= 0xffffffff;
29041 }
29042
29043 if (TARGET_64BIT)
29044 {
29045 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29046 fputs (DOUBLE_INT_ASM_OP, file);
29047 else
29048 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
29049 (long) high & 0xffffffff, (long) low & 0xffffffff);
29050 fprintf (file, "0x%lx%08lx\n",
29051 (long) high & 0xffffffff, (long) low & 0xffffffff);
29052 return;
29053 }
29054 else
29055 {
29056 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
29057 {
29058 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29059 fputs ("\t.long ", file);
29060 else
29061 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
29062 (long) high & 0xffffffff, (long) low & 0xffffffff);
29063 fprintf (file, "0x%lx,0x%lx\n",
29064 (long) high & 0xffffffff, (long) low & 0xffffffff);
29065 }
29066 else
29067 {
29068 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29069 fputs ("\t.long ", file);
29070 else
29071 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
29072 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
29073 }
29074 return;
29075 }
29076 }
29077
29078 if (GET_CODE (x) == CONST)
29079 {
29080 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
29081 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
29082
29083 base = XEXP (XEXP (x, 0), 0);
29084 offset = INTVAL (XEXP (XEXP (x, 0), 1));
29085 }
29086
29087 switch (GET_CODE (base))
29088 {
29089 case SYMBOL_REF:
29090 name = XSTR (base, 0);
29091 break;
29092
29093 case LABEL_REF:
29094 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
29095 CODE_LABEL_NUMBER (XEXP (base, 0)));
29096 break;
29097
29098 case CODE_LABEL:
29099 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
29100 break;
29101
29102 default:
29103 gcc_unreachable ();
29104 }
29105
29106 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29107 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
29108 else
29109 {
29110 fputs ("\t.tc ", file);
29111 RS6000_OUTPUT_BASENAME (file, name);
29112
29113 if (offset < 0)
29114 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
29115 else if (offset)
29116 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
29117
29118 /* Mark large TOC symbols on AIX with [TE] so they are mapped
29119 after other TOC symbols, reducing overflow of small TOC access
29120 to [TC] symbols. */
29121 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
29122 ? "[TE]," : "[TC],", file);
29123 }
29124
29125 /* Currently C++ toc references to vtables can be emitted before it
29126 is decided whether the vtable is public or private. If this is
29127 the case, then the linker will eventually complain that there is
29128 a TOC reference to an unknown section. Thus, for vtables only,
29129 we emit the TOC reference to reference the symbol and not the
29130 section. */
29131 if (VTABLE_NAME_P (name))
29132 {
29133 RS6000_OUTPUT_BASENAME (file, name);
29134 if (offset < 0)
29135 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
29136 else if (offset > 0)
29137 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
29138 }
29139 else
29140 output_addr_const (file, x);
29141
29142 #if HAVE_AS_TLS
29143 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF)
29144 {
29145 switch (SYMBOL_REF_TLS_MODEL (base))
29146 {
29147 case 0:
29148 break;
29149 case TLS_MODEL_LOCAL_EXEC:
29150 fputs ("@le", file);
29151 break;
29152 case TLS_MODEL_INITIAL_EXEC:
29153 fputs ("@ie", file);
29154 break;
29155 /* Use global-dynamic for local-dynamic. */
29156 case TLS_MODEL_GLOBAL_DYNAMIC:
29157 case TLS_MODEL_LOCAL_DYNAMIC:
29158 putc ('\n', file);
29159 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
29160 fputs ("\t.tc .", file);
29161 RS6000_OUTPUT_BASENAME (file, name);
29162 fputs ("[TC],", file);
29163 output_addr_const (file, x);
29164 fputs ("@m", file);
29165 break;
29166 default:
29167 gcc_unreachable ();
29168 }
29169 }
29170 #endif
29171
29172 putc ('\n', file);
29173 }
29174 \f
29175 /* Output an assembler pseudo-op to write an ASCII string of N characters
29176 starting at P to FILE.
29177
29178 On the RS/6000, we have to do this using the .byte operation and
29179 write out special characters outside the quoted string.
29180 Also, the assembler is broken; very long strings are truncated,
29181 so we must artificially break them up early. */
29182
29183 void
29184 output_ascii (FILE *file, const char *p, int n)
29185 {
29186 char c;
29187 int i, count_string;
29188 const char *for_string = "\t.byte \"";
29189 const char *for_decimal = "\t.byte ";
29190 const char *to_close = NULL;
29191
29192 count_string = 0;
29193 for (i = 0; i < n; i++)
29194 {
29195 c = *p++;
29196 if (c >= ' ' && c < 0177)
29197 {
29198 if (for_string)
29199 fputs (for_string, file);
29200 putc (c, file);
29201
29202 /* Write two quotes to get one. */
29203 if (c == '"')
29204 {
29205 putc (c, file);
29206 ++count_string;
29207 }
29208
29209 for_string = NULL;
29210 for_decimal = "\"\n\t.byte ";
29211 to_close = "\"\n";
29212 ++count_string;
29213
29214 if (count_string >= 512)
29215 {
29216 fputs (to_close, file);
29217
29218 for_string = "\t.byte \"";
29219 for_decimal = "\t.byte ";
29220 to_close = NULL;
29221 count_string = 0;
29222 }
29223 }
29224 else
29225 {
29226 if (for_decimal)
29227 fputs (for_decimal, file);
29228 fprintf (file, "%d", c);
29229
29230 for_string = "\n\t.byte \"";
29231 for_decimal = ", ";
29232 to_close = "\n";
29233 count_string = 0;
29234 }
29235 }
29236
29237 /* Now close the string if we have written one. Then end the line. */
29238 if (to_close)
29239 fputs (to_close, file);
29240 }
29241 \f
29242 /* Generate a unique section name for FILENAME for a section type
29243 represented by SECTION_DESC. Output goes into BUF.
29244
29245 SECTION_DESC can be any string, as long as it is different for each
29246 possible section type.
29247
29248 We name the section in the same manner as xlc. The name begins with an
29249 underscore followed by the filename (after stripping any leading directory
29250 names) with the last period replaced by the string SECTION_DESC. If
29251 FILENAME does not contain a period, SECTION_DESC is appended to the end of
29252 the name. */
29253
29254 void
29255 rs6000_gen_section_name (char **buf, const char *filename,
29256 const char *section_desc)
29257 {
29258 const char *q, *after_last_slash, *last_period = 0;
29259 char *p;
29260 int len;
29261
29262 after_last_slash = filename;
29263 for (q = filename; *q; q++)
29264 {
29265 if (*q == '/')
29266 after_last_slash = q + 1;
29267 else if (*q == '.')
29268 last_period = q;
29269 }
29270
29271 len = strlen (after_last_slash) + strlen (section_desc) + 2;
29272 *buf = (char *) xmalloc (len);
29273
29274 p = *buf;
29275 *p++ = '_';
29276
29277 for (q = after_last_slash; *q; q++)
29278 {
29279 if (q == last_period)
29280 {
29281 strcpy (p, section_desc);
29282 p += strlen (section_desc);
29283 break;
29284 }
29285
29286 else if (ISALNUM (*q))
29287 *p++ = *q;
29288 }
29289
29290 if (last_period == 0)
29291 strcpy (p, section_desc);
29292 else
29293 *p = '\0';
29294 }
29295 \f
29296 /* Emit profile function. */
29297
29298 void
29299 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
29300 {
29301 /* Non-standard profiling for kernels, which just saves LR then calls
29302 _mcount without worrying about arg saves. The idea is to change
29303 the function prologue as little as possible as it isn't easy to
29304 account for arg save/restore code added just for _mcount. */
29305 if (TARGET_PROFILE_KERNEL)
29306 return;
29307
29308 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29309 {
29310 #ifndef NO_PROFILE_COUNTERS
29311 # define NO_PROFILE_COUNTERS 0
29312 #endif
29313 if (NO_PROFILE_COUNTERS)
29314 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
29315 LCT_NORMAL, VOIDmode);
29316 else
29317 {
29318 char buf[30];
29319 const char *label_name;
29320 rtx fun;
29321
29322 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
29323 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
29324 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
29325
29326 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
29327 LCT_NORMAL, VOIDmode, fun, Pmode);
29328 }
29329 }
29330 else if (DEFAULT_ABI == ABI_DARWIN)
29331 {
29332 const char *mcount_name = RS6000_MCOUNT;
29333 int caller_addr_regno = LR_REGNO;
29334
29335 /* Be conservative and always set this, at least for now. */
29336 crtl->uses_pic_offset_table = 1;
29337
29338 #if TARGET_MACHO
29339 /* For PIC code, set up a stub and collect the caller's address
29340 from r0, which is where the prologue puts it. */
29341 if (MACHOPIC_INDIRECT
29342 && crtl->uses_pic_offset_table)
29343 caller_addr_regno = 0;
29344 #endif
29345 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
29346 LCT_NORMAL, VOIDmode,
29347 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
29348 }
29349 }
29350
29351 /* Write function profiler code. */
29352
29353 void
29354 output_function_profiler (FILE *file, int labelno)
29355 {
29356 char buf[100];
29357
29358 switch (DEFAULT_ABI)
29359 {
29360 default:
29361 gcc_unreachable ();
29362
29363 case ABI_V4:
29364 if (!TARGET_32BIT)
29365 {
29366 warning (0, "no profiling of 64-bit code for this ABI");
29367 return;
29368 }
29369 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
29370 fprintf (file, "\tmflr %s\n", reg_names[0]);
29371 if (NO_PROFILE_COUNTERS)
29372 {
29373 asm_fprintf (file, "\tstw %s,4(%s)\n",
29374 reg_names[0], reg_names[1]);
29375 }
29376 else if (TARGET_SECURE_PLT && flag_pic)
29377 {
29378 if (TARGET_LINK_STACK)
29379 {
29380 char name[32];
29381 get_ppc476_thunk_name (name);
29382 asm_fprintf (file, "\tbl %s\n", name);
29383 }
29384 else
29385 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
29386 asm_fprintf (file, "\tstw %s,4(%s)\n",
29387 reg_names[0], reg_names[1]);
29388 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
29389 asm_fprintf (file, "\taddis %s,%s,",
29390 reg_names[12], reg_names[12]);
29391 assemble_name (file, buf);
29392 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
29393 assemble_name (file, buf);
29394 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
29395 }
29396 else if (flag_pic == 1)
29397 {
29398 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
29399 asm_fprintf (file, "\tstw %s,4(%s)\n",
29400 reg_names[0], reg_names[1]);
29401 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
29402 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
29403 assemble_name (file, buf);
29404 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
29405 }
29406 else if (flag_pic > 1)
29407 {
29408 asm_fprintf (file, "\tstw %s,4(%s)\n",
29409 reg_names[0], reg_names[1]);
29410 /* Now, we need to get the address of the label. */
29411 if (TARGET_LINK_STACK)
29412 {
29413 char name[32];
29414 get_ppc476_thunk_name (name);
29415 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
29416 assemble_name (file, buf);
29417 fputs ("-.\n1:", file);
29418 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
29419 asm_fprintf (file, "\taddi %s,%s,4\n",
29420 reg_names[11], reg_names[11]);
29421 }
29422 else
29423 {
29424 fputs ("\tbcl 20,31,1f\n\t.long ", file);
29425 assemble_name (file, buf);
29426 fputs ("-.\n1:", file);
29427 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
29428 }
29429 asm_fprintf (file, "\tlwz %s,0(%s)\n",
29430 reg_names[0], reg_names[11]);
29431 asm_fprintf (file, "\tadd %s,%s,%s\n",
29432 reg_names[0], reg_names[0], reg_names[11]);
29433 }
29434 else
29435 {
29436 asm_fprintf (file, "\tlis %s,", reg_names[12]);
29437 assemble_name (file, buf);
29438 fputs ("@ha\n", file);
29439 asm_fprintf (file, "\tstw %s,4(%s)\n",
29440 reg_names[0], reg_names[1]);
29441 asm_fprintf (file, "\tla %s,", reg_names[0]);
29442 assemble_name (file, buf);
29443 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
29444 }
29445
29446 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
29447 fprintf (file, "\tbl %s%s\n",
29448 RS6000_MCOUNT, flag_pic ? "@plt" : "");
29449 break;
29450
29451 case ABI_AIX:
29452 case ABI_ELFv2:
29453 case ABI_DARWIN:
29454 /* Don't do anything, done in output_profile_hook (). */
29455 break;
29456 }
29457 }
29458
29459 \f
29460
29461 /* The following variable value is the last issued insn. */
29462
29463 static rtx_insn *last_scheduled_insn;
29464
29465 /* The following variable helps to balance issuing of load and
29466 store instructions */
29467
29468 static int load_store_pendulum;
29469
29470 /* The following variable helps pair divide insns during scheduling. */
29471 static int divide_cnt;
29472 /* The following variable helps pair and alternate vector and vector load
29473 insns during scheduling. */
29474 static int vec_pairing;
29475
29476
29477 /* Power4 load update and store update instructions are cracked into a
29478 load or store and an integer insn which are executed in the same cycle.
29479 Branches have their own dispatch slot which does not count against the
29480 GCC issue rate, but it changes the program flow so there are no other
29481 instructions to issue in this cycle. */
29482
29483 static int
29484 rs6000_variable_issue_1 (rtx_insn *insn, int more)
29485 {
29486 last_scheduled_insn = insn;
29487 if (GET_CODE (PATTERN (insn)) == USE
29488 || GET_CODE (PATTERN (insn)) == CLOBBER)
29489 {
29490 cached_can_issue_more = more;
29491 return cached_can_issue_more;
29492 }
29493
29494 if (insn_terminates_group_p (insn, current_group))
29495 {
29496 cached_can_issue_more = 0;
29497 return cached_can_issue_more;
29498 }
29499
29500 /* If no reservation, but reach here */
29501 if (recog_memoized (insn) < 0)
29502 return more;
29503
29504 if (rs6000_sched_groups)
29505 {
29506 if (is_microcoded_insn (insn))
29507 cached_can_issue_more = 0;
29508 else if (is_cracked_insn (insn))
29509 cached_can_issue_more = more > 2 ? more - 2 : 0;
29510 else
29511 cached_can_issue_more = more - 1;
29512
29513 return cached_can_issue_more;
29514 }
29515
29516 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
29517 return 0;
29518
29519 cached_can_issue_more = more - 1;
29520 return cached_can_issue_more;
29521 }
29522
29523 static int
29524 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
29525 {
29526 int r = rs6000_variable_issue_1 (insn, more);
29527 if (verbose)
29528 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
29529 return r;
29530 }
29531
29532 /* Adjust the cost of a scheduling dependency. Return the new cost of
29533 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
29534
29535 static int
29536 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
29537 unsigned int)
29538 {
29539 enum attr_type attr_type;
29540
29541 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
29542 return cost;
29543
29544 switch (dep_type)
29545 {
29546 case REG_DEP_TRUE:
29547 {
29548 /* Data dependency; DEP_INSN writes a register that INSN reads
29549 some cycles later. */
29550
29551 /* Separate a load from a narrower, dependent store. */
29552 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9)
29553 && GET_CODE (PATTERN (insn)) == SET
29554 && GET_CODE (PATTERN (dep_insn)) == SET
29555 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
29556 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
29557 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
29558 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
29559 return cost + 14;
29560
29561 attr_type = get_attr_type (insn);
29562
29563 switch (attr_type)
29564 {
29565 case TYPE_JMPREG:
29566 /* Tell the first scheduling pass about the latency between
29567 a mtctr and bctr (and mtlr and br/blr). The first
29568 scheduling pass will not know about this latency since
29569 the mtctr instruction, which has the latency associated
29570 to it, will be generated by reload. */
29571 return 4;
29572 case TYPE_BRANCH:
29573 /* Leave some extra cycles between a compare and its
29574 dependent branch, to inhibit expensive mispredicts. */
29575 if ((rs6000_tune == PROCESSOR_PPC603
29576 || rs6000_tune == PROCESSOR_PPC604
29577 || rs6000_tune == PROCESSOR_PPC604e
29578 || rs6000_tune == PROCESSOR_PPC620
29579 || rs6000_tune == PROCESSOR_PPC630
29580 || rs6000_tune == PROCESSOR_PPC750
29581 || rs6000_tune == PROCESSOR_PPC7400
29582 || rs6000_tune == PROCESSOR_PPC7450
29583 || rs6000_tune == PROCESSOR_PPCE5500
29584 || rs6000_tune == PROCESSOR_PPCE6500
29585 || rs6000_tune == PROCESSOR_POWER4
29586 || rs6000_tune == PROCESSOR_POWER5
29587 || rs6000_tune == PROCESSOR_POWER7
29588 || rs6000_tune == PROCESSOR_POWER8
29589 || rs6000_tune == PROCESSOR_POWER9
29590 || rs6000_tune == PROCESSOR_CELL)
29591 && recog_memoized (dep_insn)
29592 && (INSN_CODE (dep_insn) >= 0))
29593
29594 switch (get_attr_type (dep_insn))
29595 {
29596 case TYPE_CMP:
29597 case TYPE_FPCOMPARE:
29598 case TYPE_CR_LOGICAL:
29599 return cost + 2;
29600 case TYPE_EXTS:
29601 case TYPE_MUL:
29602 if (get_attr_dot (dep_insn) == DOT_YES)
29603 return cost + 2;
29604 else
29605 break;
29606 case TYPE_SHIFT:
29607 if (get_attr_dot (dep_insn) == DOT_YES
29608 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
29609 return cost + 2;
29610 else
29611 break;
29612 default:
29613 break;
29614 }
29615 break;
29616
29617 case TYPE_STORE:
29618 case TYPE_FPSTORE:
29619 if ((rs6000_tune == PROCESSOR_POWER6)
29620 && recog_memoized (dep_insn)
29621 && (INSN_CODE (dep_insn) >= 0))
29622 {
29623
29624 if (GET_CODE (PATTERN (insn)) != SET)
29625 /* If this happens, we have to extend this to schedule
29626 optimally. Return default for now. */
29627 return cost;
29628
29629 /* Adjust the cost for the case where the value written
29630 by a fixed point operation is used as the address
29631 gen value on a store. */
29632 switch (get_attr_type (dep_insn))
29633 {
29634 case TYPE_LOAD:
29635 case TYPE_CNTLZ:
29636 {
29637 if (! rs6000_store_data_bypass_p (dep_insn, insn))
29638 return get_attr_sign_extend (dep_insn)
29639 == SIGN_EXTEND_YES ? 6 : 4;
29640 break;
29641 }
29642 case TYPE_SHIFT:
29643 {
29644 if (! rs6000_store_data_bypass_p (dep_insn, insn))
29645 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
29646 6 : 3;
29647 break;
29648 }
29649 case TYPE_INTEGER:
29650 case TYPE_ADD:
29651 case TYPE_LOGICAL:
29652 case TYPE_EXTS:
29653 case TYPE_INSERT:
29654 {
29655 if (! rs6000_store_data_bypass_p (dep_insn, insn))
29656 return 3;
29657 break;
29658 }
29659 case TYPE_STORE:
29660 case TYPE_FPLOAD:
29661 case TYPE_FPSTORE:
29662 {
29663 if (get_attr_update (dep_insn) == UPDATE_YES
29664 && ! rs6000_store_data_bypass_p (dep_insn, insn))
29665 return 3;
29666 break;
29667 }
29668 case TYPE_MUL:
29669 {
29670 if (! rs6000_store_data_bypass_p (dep_insn, insn))
29671 return 17;
29672 break;
29673 }
29674 case TYPE_DIV:
29675 {
29676 if (! rs6000_store_data_bypass_p (dep_insn, insn))
29677 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
29678 break;
29679 }
29680 default:
29681 break;
29682 }
29683 }
29684 break;
29685
29686 case TYPE_LOAD:
29687 if ((rs6000_tune == PROCESSOR_POWER6)
29688 && recog_memoized (dep_insn)
29689 && (INSN_CODE (dep_insn) >= 0))
29690 {
29691
29692 /* Adjust the cost for the case where the value written
29693 by a fixed point instruction is used within the address
29694 gen portion of a subsequent load(u)(x) */
29695 switch (get_attr_type (dep_insn))
29696 {
29697 case TYPE_LOAD:
29698 case TYPE_CNTLZ:
29699 {
29700 if (set_to_load_agen (dep_insn, insn))
29701 return get_attr_sign_extend (dep_insn)
29702 == SIGN_EXTEND_YES ? 6 : 4;
29703 break;
29704 }
29705 case TYPE_SHIFT:
29706 {
29707 if (set_to_load_agen (dep_insn, insn))
29708 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
29709 6 : 3;
29710 break;
29711 }
29712 case TYPE_INTEGER:
29713 case TYPE_ADD:
29714 case TYPE_LOGICAL:
29715 case TYPE_EXTS:
29716 case TYPE_INSERT:
29717 {
29718 if (set_to_load_agen (dep_insn, insn))
29719 return 3;
29720 break;
29721 }
29722 case TYPE_STORE:
29723 case TYPE_FPLOAD:
29724 case TYPE_FPSTORE:
29725 {
29726 if (get_attr_update (dep_insn) == UPDATE_YES
29727 && set_to_load_agen (dep_insn, insn))
29728 return 3;
29729 break;
29730 }
29731 case TYPE_MUL:
29732 {
29733 if (set_to_load_agen (dep_insn, insn))
29734 return 17;
29735 break;
29736 }
29737 case TYPE_DIV:
29738 {
29739 if (set_to_load_agen (dep_insn, insn))
29740 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
29741 break;
29742 }
29743 default:
29744 break;
29745 }
29746 }
29747 break;
29748
29749 case TYPE_FPLOAD:
29750 if ((rs6000_tune == PROCESSOR_POWER6)
29751 && get_attr_update (insn) == UPDATE_NO
29752 && recog_memoized (dep_insn)
29753 && (INSN_CODE (dep_insn) >= 0)
29754 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
29755 return 2;
29756
29757 default:
29758 break;
29759 }
29760
29761 /* Fall out to return default cost. */
29762 }
29763 break;
29764
29765 case REG_DEP_OUTPUT:
29766 /* Output dependency; DEP_INSN writes a register that INSN writes some
29767 cycles later. */
29768 if ((rs6000_tune == PROCESSOR_POWER6)
29769 && recog_memoized (dep_insn)
29770 && (INSN_CODE (dep_insn) >= 0))
29771 {
29772 attr_type = get_attr_type (insn);
29773
29774 switch (attr_type)
29775 {
29776 case TYPE_FP:
29777 case TYPE_FPSIMPLE:
29778 if (get_attr_type (dep_insn) == TYPE_FP
29779 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
29780 return 1;
29781 break;
29782 case TYPE_FPLOAD:
29783 if (get_attr_update (insn) == UPDATE_NO
29784 && get_attr_type (dep_insn) == TYPE_MFFGPR)
29785 return 2;
29786 break;
29787 default:
29788 break;
29789 }
29790 }
29791 /* Fall through, no cost for output dependency. */
29792 /* FALLTHRU */
29793
29794 case REG_DEP_ANTI:
29795 /* Anti dependency; DEP_INSN reads a register that INSN writes some
29796 cycles later. */
29797 return 0;
29798
29799 default:
29800 gcc_unreachable ();
29801 }
29802
29803 return cost;
29804 }
29805
29806 /* Debug version of rs6000_adjust_cost. */
29807
29808 static int
29809 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
29810 int cost, unsigned int dw)
29811 {
29812 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
29813
29814 if (ret != cost)
29815 {
29816 const char *dep;
29817
29818 switch (dep_type)
29819 {
29820 default: dep = "unknown depencency"; break;
29821 case REG_DEP_TRUE: dep = "data dependency"; break;
29822 case REG_DEP_OUTPUT: dep = "output dependency"; break;
29823 case REG_DEP_ANTI: dep = "anti depencency"; break;
29824 }
29825
29826 fprintf (stderr,
29827 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
29828 "%s, insn:\n", ret, cost, dep);
29829
29830 debug_rtx (insn);
29831 }
29832
29833 return ret;
29834 }
29835
29836 /* The function returns a true if INSN is microcoded.
29837 Return false otherwise. */
29838
29839 static bool
29840 is_microcoded_insn (rtx_insn *insn)
29841 {
29842 if (!insn || !NONDEBUG_INSN_P (insn)
29843 || GET_CODE (PATTERN (insn)) == USE
29844 || GET_CODE (PATTERN (insn)) == CLOBBER)
29845 return false;
29846
29847 if (rs6000_tune == PROCESSOR_CELL)
29848 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
29849
29850 if (rs6000_sched_groups
29851 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
29852 {
29853 enum attr_type type = get_attr_type (insn);
29854 if ((type == TYPE_LOAD
29855 && get_attr_update (insn) == UPDATE_YES
29856 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
29857 || ((type == TYPE_LOAD || type == TYPE_STORE)
29858 && get_attr_update (insn) == UPDATE_YES
29859 && get_attr_indexed (insn) == INDEXED_YES)
29860 || type == TYPE_MFCR)
29861 return true;
29862 }
29863
29864 return false;
29865 }
29866
29867 /* The function returns true if INSN is cracked into 2 instructions
29868 by the processor (and therefore occupies 2 issue slots). */
29869
29870 static bool
29871 is_cracked_insn (rtx_insn *insn)
29872 {
29873 if (!insn || !NONDEBUG_INSN_P (insn)
29874 || GET_CODE (PATTERN (insn)) == USE
29875 || GET_CODE (PATTERN (insn)) == CLOBBER)
29876 return false;
29877
29878 if (rs6000_sched_groups
29879 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
29880 {
29881 enum attr_type type = get_attr_type (insn);
29882 if ((type == TYPE_LOAD
29883 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
29884 && get_attr_update (insn) == UPDATE_NO)
29885 || (type == TYPE_LOAD
29886 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
29887 && get_attr_update (insn) == UPDATE_YES
29888 && get_attr_indexed (insn) == INDEXED_NO)
29889 || (type == TYPE_STORE
29890 && get_attr_update (insn) == UPDATE_YES
29891 && get_attr_indexed (insn) == INDEXED_NO)
29892 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
29893 && get_attr_update (insn) == UPDATE_YES)
29894 || (type == TYPE_CR_LOGICAL
29895 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
29896 || (type == TYPE_EXTS
29897 && get_attr_dot (insn) == DOT_YES)
29898 || (type == TYPE_SHIFT
29899 && get_attr_dot (insn) == DOT_YES
29900 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
29901 || (type == TYPE_MUL
29902 && get_attr_dot (insn) == DOT_YES)
29903 || type == TYPE_DIV
29904 || (type == TYPE_INSERT
29905 && get_attr_size (insn) == SIZE_32))
29906 return true;
29907 }
29908
29909 return false;
29910 }
29911
29912 /* The function returns true if INSN can be issued only from
29913 the branch slot. */
29914
29915 static bool
29916 is_branch_slot_insn (rtx_insn *insn)
29917 {
29918 if (!insn || !NONDEBUG_INSN_P (insn)
29919 || GET_CODE (PATTERN (insn)) == USE
29920 || GET_CODE (PATTERN (insn)) == CLOBBER)
29921 return false;
29922
29923 if (rs6000_sched_groups)
29924 {
29925 enum attr_type type = get_attr_type (insn);
29926 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
29927 return true;
29928 return false;
29929 }
29930
29931 return false;
29932 }
29933
29934 /* The function returns true if out_inst sets a value that is
29935 used in the address generation computation of in_insn */
29936 static bool
29937 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
29938 {
29939 rtx out_set, in_set;
29940
29941 /* For performance reasons, only handle the simple case where
29942 both loads are a single_set. */
29943 out_set = single_set (out_insn);
29944 if (out_set)
29945 {
29946 in_set = single_set (in_insn);
29947 if (in_set)
29948 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
29949 }
29950
29951 return false;
29952 }
29953
29954 /* Try to determine base/offset/size parts of the given MEM.
29955 Return true if successful, false if all the values couldn't
29956 be determined.
29957
29958 This function only looks for REG or REG+CONST address forms.
29959 REG+REG address form will return false. */
29960
29961 static bool
29962 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
29963 HOST_WIDE_INT *size)
29964 {
29965 rtx addr_rtx;
29966 if MEM_SIZE_KNOWN_P (mem)
29967 *size = MEM_SIZE (mem);
29968 else
29969 return false;
29970
29971 addr_rtx = (XEXP (mem, 0));
29972 if (GET_CODE (addr_rtx) == PRE_MODIFY)
29973 addr_rtx = XEXP (addr_rtx, 1);
29974
29975 *offset = 0;
29976 while (GET_CODE (addr_rtx) == PLUS
29977 && CONST_INT_P (XEXP (addr_rtx, 1)))
29978 {
29979 *offset += INTVAL (XEXP (addr_rtx, 1));
29980 addr_rtx = XEXP (addr_rtx, 0);
29981 }
29982 if (!REG_P (addr_rtx))
29983 return false;
29984
29985 *base = addr_rtx;
29986 return true;
29987 }
29988
29989 /* The function returns true if the target storage location of
29990 mem1 is adjacent to the target storage location of mem2 */
29991 /* Return 1 if memory locations are adjacent. */
29992
29993 static bool
29994 adjacent_mem_locations (rtx mem1, rtx mem2)
29995 {
29996 rtx reg1, reg2;
29997 HOST_WIDE_INT off1, size1, off2, size2;
29998
29999 if (get_memref_parts (mem1, &reg1, &off1, &size1)
30000 && get_memref_parts (mem2, &reg2, &off2, &size2))
30001 return ((REGNO (reg1) == REGNO (reg2))
30002 && ((off1 + size1 == off2)
30003 || (off2 + size2 == off1)));
30004
30005 return false;
30006 }
30007
30008 /* This function returns true if it can be determined that the two MEM
30009 locations overlap by at least 1 byte based on base reg/offset/size. */
30010
30011 static bool
30012 mem_locations_overlap (rtx mem1, rtx mem2)
30013 {
30014 rtx reg1, reg2;
30015 HOST_WIDE_INT off1, size1, off2, size2;
30016
30017 if (get_memref_parts (mem1, &reg1, &off1, &size1)
30018 && get_memref_parts (mem2, &reg2, &off2, &size2))
30019 return ((REGNO (reg1) == REGNO (reg2))
30020 && (((off1 <= off2) && (off1 + size1 > off2))
30021 || ((off2 <= off1) && (off2 + size2 > off1))));
30022
30023 return false;
30024 }
30025
30026 /* A C statement (sans semicolon) to update the integer scheduling
30027 priority INSN_PRIORITY (INSN). Increase the priority to execute the
30028 INSN earlier, reduce the priority to execute INSN later. Do not
30029 define this macro if you do not need to adjust the scheduling
30030 priorities of insns. */
30031
30032 static int
30033 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
30034 {
30035 rtx load_mem, str_mem;
30036 /* On machines (like the 750) which have asymmetric integer units,
30037 where one integer unit can do multiply and divides and the other
30038 can't, reduce the priority of multiply/divide so it is scheduled
30039 before other integer operations. */
30040
30041 #if 0
30042 if (! INSN_P (insn))
30043 return priority;
30044
30045 if (GET_CODE (PATTERN (insn)) == USE)
30046 return priority;
30047
30048 switch (rs6000_tune) {
30049 case PROCESSOR_PPC750:
30050 switch (get_attr_type (insn))
30051 {
30052 default:
30053 break;
30054
30055 case TYPE_MUL:
30056 case TYPE_DIV:
30057 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
30058 priority, priority);
30059 if (priority >= 0 && priority < 0x01000000)
30060 priority >>= 3;
30061 break;
30062 }
30063 }
30064 #endif
30065
30066 if (insn_must_be_first_in_group (insn)
30067 && reload_completed
30068 && current_sched_info->sched_max_insns_priority
30069 && rs6000_sched_restricted_insns_priority)
30070 {
30071
30072 /* Prioritize insns that can be dispatched only in the first
30073 dispatch slot. */
30074 if (rs6000_sched_restricted_insns_priority == 1)
30075 /* Attach highest priority to insn. This means that in
30076 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
30077 precede 'priority' (critical path) considerations. */
30078 return current_sched_info->sched_max_insns_priority;
30079 else if (rs6000_sched_restricted_insns_priority == 2)
30080 /* Increase priority of insn by a minimal amount. This means that in
30081 haifa-sched.c:ready_sort(), only 'priority' (critical path)
30082 considerations precede dispatch-slot restriction considerations. */
30083 return (priority + 1);
30084 }
30085
30086 if (rs6000_tune == PROCESSOR_POWER6
30087 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
30088 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
30089 /* Attach highest priority to insn if the scheduler has just issued two
30090 stores and this instruction is a load, or two loads and this instruction
30091 is a store. Power6 wants loads and stores scheduled alternately
30092 when possible */
30093 return current_sched_info->sched_max_insns_priority;
30094
30095 return priority;
30096 }
30097
30098 /* Return true if the instruction is nonpipelined on the Cell. */
30099 static bool
30100 is_nonpipeline_insn (rtx_insn *insn)
30101 {
30102 enum attr_type type;
30103 if (!insn || !NONDEBUG_INSN_P (insn)
30104 || GET_CODE (PATTERN (insn)) == USE
30105 || GET_CODE (PATTERN (insn)) == CLOBBER)
30106 return false;
30107
30108 type = get_attr_type (insn);
30109 if (type == TYPE_MUL
30110 || type == TYPE_DIV
30111 || type == TYPE_SDIV
30112 || type == TYPE_DDIV
30113 || type == TYPE_SSQRT
30114 || type == TYPE_DSQRT
30115 || type == TYPE_MFCR
30116 || type == TYPE_MFCRF
30117 || type == TYPE_MFJMPR)
30118 {
30119 return true;
30120 }
30121 return false;
30122 }
30123
30124
30125 /* Return how many instructions the machine can issue per cycle. */
30126
30127 static int
30128 rs6000_issue_rate (void)
30129 {
30130 /* Unless scheduling for register pressure, use issue rate of 1 for
30131 first scheduling pass to decrease degradation. */
30132 if (!reload_completed && !flag_sched_pressure)
30133 return 1;
30134
30135 switch (rs6000_tune) {
30136 case PROCESSOR_RS64A:
30137 case PROCESSOR_PPC601: /* ? */
30138 case PROCESSOR_PPC7450:
30139 return 3;
30140 case PROCESSOR_PPC440:
30141 case PROCESSOR_PPC603:
30142 case PROCESSOR_PPC750:
30143 case PROCESSOR_PPC7400:
30144 case PROCESSOR_PPC8540:
30145 case PROCESSOR_PPC8548:
30146 case PROCESSOR_CELL:
30147 case PROCESSOR_PPCE300C2:
30148 case PROCESSOR_PPCE300C3:
30149 case PROCESSOR_PPCE500MC:
30150 case PROCESSOR_PPCE500MC64:
30151 case PROCESSOR_PPCE5500:
30152 case PROCESSOR_PPCE6500:
30153 case PROCESSOR_TITAN:
30154 return 2;
30155 case PROCESSOR_PPC476:
30156 case PROCESSOR_PPC604:
30157 case PROCESSOR_PPC604e:
30158 case PROCESSOR_PPC620:
30159 case PROCESSOR_PPC630:
30160 return 4;
30161 case PROCESSOR_POWER4:
30162 case PROCESSOR_POWER5:
30163 case PROCESSOR_POWER6:
30164 case PROCESSOR_POWER7:
30165 return 5;
30166 case PROCESSOR_POWER8:
30167 return 7;
30168 case PROCESSOR_POWER9:
30169 return 6;
30170 default:
30171 return 1;
30172 }
30173 }
30174
30175 /* Return how many instructions to look ahead for better insn
30176 scheduling. */
30177
30178 static int
30179 rs6000_use_sched_lookahead (void)
30180 {
30181 switch (rs6000_tune)
30182 {
30183 case PROCESSOR_PPC8540:
30184 case PROCESSOR_PPC8548:
30185 return 4;
30186
30187 case PROCESSOR_CELL:
30188 return (reload_completed ? 8 : 0);
30189
30190 default:
30191 return 0;
30192 }
30193 }
30194
30195 /* We are choosing insn from the ready queue. Return zero if INSN can be
30196 chosen. */
30197 static int
30198 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
30199 {
30200 if (ready_index == 0)
30201 return 0;
30202
30203 if (rs6000_tune != PROCESSOR_CELL)
30204 return 0;
30205
30206 gcc_assert (insn != NULL_RTX && INSN_P (insn));
30207
30208 if (!reload_completed
30209 || is_nonpipeline_insn (insn)
30210 || is_microcoded_insn (insn))
30211 return 1;
30212
30213 return 0;
30214 }
30215
30216 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
30217 and return true. */
30218
30219 static bool
30220 find_mem_ref (rtx pat, rtx *mem_ref)
30221 {
30222 const char * fmt;
30223 int i, j;
30224
30225 /* stack_tie does not produce any real memory traffic. */
30226 if (tie_operand (pat, VOIDmode))
30227 return false;
30228
30229 if (GET_CODE (pat) == MEM)
30230 {
30231 *mem_ref = pat;
30232 return true;
30233 }
30234
30235 /* Recursively process the pattern. */
30236 fmt = GET_RTX_FORMAT (GET_CODE (pat));
30237
30238 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
30239 {
30240 if (fmt[i] == 'e')
30241 {
30242 if (find_mem_ref (XEXP (pat, i), mem_ref))
30243 return true;
30244 }
30245 else if (fmt[i] == 'E')
30246 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
30247 {
30248 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
30249 return true;
30250 }
30251 }
30252
30253 return false;
30254 }
30255
30256 /* Determine if PAT is a PATTERN of a load insn. */
30257
30258 static bool
30259 is_load_insn1 (rtx pat, rtx *load_mem)
30260 {
30261 if (!pat || pat == NULL_RTX)
30262 return false;
30263
30264 if (GET_CODE (pat) == SET)
30265 return find_mem_ref (SET_SRC (pat), load_mem);
30266
30267 if (GET_CODE (pat) == PARALLEL)
30268 {
30269 int i;
30270
30271 for (i = 0; i < XVECLEN (pat, 0); i++)
30272 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
30273 return true;
30274 }
30275
30276 return false;
30277 }
30278
30279 /* Determine if INSN loads from memory. */
30280
30281 static bool
30282 is_load_insn (rtx insn, rtx *load_mem)
30283 {
30284 if (!insn || !INSN_P (insn))
30285 return false;
30286
30287 if (CALL_P (insn))
30288 return false;
30289
30290 return is_load_insn1 (PATTERN (insn), load_mem);
30291 }
30292
30293 /* Determine if PAT is a PATTERN of a store insn. */
30294
30295 static bool
30296 is_store_insn1 (rtx pat, rtx *str_mem)
30297 {
30298 if (!pat || pat == NULL_RTX)
30299 return false;
30300
30301 if (GET_CODE (pat) == SET)
30302 return find_mem_ref (SET_DEST (pat), str_mem);
30303
30304 if (GET_CODE (pat) == PARALLEL)
30305 {
30306 int i;
30307
30308 for (i = 0; i < XVECLEN (pat, 0); i++)
30309 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
30310 return true;
30311 }
30312
30313 return false;
30314 }
30315
30316 /* Determine if INSN stores to memory. */
30317
30318 static bool
30319 is_store_insn (rtx insn, rtx *str_mem)
30320 {
30321 if (!insn || !INSN_P (insn))
30322 return false;
30323
30324 return is_store_insn1 (PATTERN (insn), str_mem);
30325 }
30326
30327 /* Return whether TYPE is a Power9 pairable vector instruction type. */
30328
30329 static bool
30330 is_power9_pairable_vec_type (enum attr_type type)
30331 {
30332 switch (type)
30333 {
30334 case TYPE_VECSIMPLE:
30335 case TYPE_VECCOMPLEX:
30336 case TYPE_VECDIV:
30337 case TYPE_VECCMP:
30338 case TYPE_VECPERM:
30339 case TYPE_VECFLOAT:
30340 case TYPE_VECFDIV:
30341 case TYPE_VECDOUBLE:
30342 return true;
30343 default:
30344 break;
30345 }
30346 return false;
30347 }
30348
30349 /* Returns whether the dependence between INSN and NEXT is considered
30350 costly by the given target. */
30351
30352 static bool
30353 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
30354 {
30355 rtx insn;
30356 rtx next;
30357 rtx load_mem, str_mem;
30358
30359 /* If the flag is not enabled - no dependence is considered costly;
30360 allow all dependent insns in the same group.
30361 This is the most aggressive option. */
30362 if (rs6000_sched_costly_dep == no_dep_costly)
30363 return false;
30364
30365 /* If the flag is set to 1 - a dependence is always considered costly;
30366 do not allow dependent instructions in the same group.
30367 This is the most conservative option. */
30368 if (rs6000_sched_costly_dep == all_deps_costly)
30369 return true;
30370
30371 insn = DEP_PRO (dep);
30372 next = DEP_CON (dep);
30373
30374 if (rs6000_sched_costly_dep == store_to_load_dep_costly
30375 && is_load_insn (next, &load_mem)
30376 && is_store_insn (insn, &str_mem))
30377 /* Prevent load after store in the same group. */
30378 return true;
30379
30380 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
30381 && is_load_insn (next, &load_mem)
30382 && is_store_insn (insn, &str_mem)
30383 && DEP_TYPE (dep) == REG_DEP_TRUE
30384 && mem_locations_overlap(str_mem, load_mem))
30385 /* Prevent load after store in the same group if it is a true
30386 dependence. */
30387 return true;
30388
30389 /* The flag is set to X; dependences with latency >= X are considered costly,
30390 and will not be scheduled in the same group. */
30391 if (rs6000_sched_costly_dep <= max_dep_latency
30392 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
30393 return true;
30394
30395 return false;
30396 }
30397
30398 /* Return the next insn after INSN that is found before TAIL is reached,
30399 skipping any "non-active" insns - insns that will not actually occupy
30400 an issue slot. Return NULL_RTX if such an insn is not found. */
30401
30402 static rtx_insn *
30403 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
30404 {
30405 if (insn == NULL_RTX || insn == tail)
30406 return NULL;
30407
30408 while (1)
30409 {
30410 insn = NEXT_INSN (insn);
30411 if (insn == NULL_RTX || insn == tail)
30412 return NULL;
30413
30414 if (CALL_P (insn)
30415 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
30416 || (NONJUMP_INSN_P (insn)
30417 && GET_CODE (PATTERN (insn)) != USE
30418 && GET_CODE (PATTERN (insn)) != CLOBBER
30419 && INSN_CODE (insn) != CODE_FOR_stack_tie))
30420 break;
30421 }
30422 return insn;
30423 }
30424
30425 /* Do Power9 specific sched_reorder2 reordering of ready list. */
30426
30427 static int
30428 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
30429 {
30430 int pos;
30431 int i;
30432 rtx_insn *tmp;
30433 enum attr_type type, type2;
30434
30435 type = get_attr_type (last_scheduled_insn);
30436
30437 /* Try to issue fixed point divides back-to-back in pairs so they will be
30438 routed to separate execution units and execute in parallel. */
30439 if (type == TYPE_DIV && divide_cnt == 0)
30440 {
30441 /* First divide has been scheduled. */
30442 divide_cnt = 1;
30443
30444 /* Scan the ready list looking for another divide, if found move it
30445 to the end of the list so it is chosen next. */
30446 pos = lastpos;
30447 while (pos >= 0)
30448 {
30449 if (recog_memoized (ready[pos]) >= 0
30450 && get_attr_type (ready[pos]) == TYPE_DIV)
30451 {
30452 tmp = ready[pos];
30453 for (i = pos; i < lastpos; i++)
30454 ready[i] = ready[i + 1];
30455 ready[lastpos] = tmp;
30456 break;
30457 }
30458 pos--;
30459 }
30460 }
30461 else
30462 {
30463 /* Last insn was the 2nd divide or not a divide, reset the counter. */
30464 divide_cnt = 0;
30465
30466 /* The best dispatch throughput for vector and vector load insns can be
30467 achieved by interleaving a vector and vector load such that they'll
30468 dispatch to the same superslice. If this pairing cannot be achieved
30469 then it is best to pair vector insns together and vector load insns
30470 together.
30471
30472 To aid in this pairing, vec_pairing maintains the current state with
30473 the following values:
30474
30475 0 : Initial state, no vecload/vector pairing has been started.
30476
30477 1 : A vecload or vector insn has been issued and a candidate for
30478 pairing has been found and moved to the end of the ready
30479 list. */
30480 if (type == TYPE_VECLOAD)
30481 {
30482 /* Issued a vecload. */
30483 if (vec_pairing == 0)
30484 {
30485 int vecload_pos = -1;
30486 /* We issued a single vecload, look for a vector insn to pair it
30487 with. If one isn't found, try to pair another vecload. */
30488 pos = lastpos;
30489 while (pos >= 0)
30490 {
30491 if (recog_memoized (ready[pos]) >= 0)
30492 {
30493 type2 = get_attr_type (ready[pos]);
30494 if (is_power9_pairable_vec_type (type2))
30495 {
30496 /* Found a vector insn to pair with, move it to the
30497 end of the ready list so it is scheduled next. */
30498 tmp = ready[pos];
30499 for (i = pos; i < lastpos; i++)
30500 ready[i] = ready[i + 1];
30501 ready[lastpos] = tmp;
30502 vec_pairing = 1;
30503 return cached_can_issue_more;
30504 }
30505 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
30506 /* Remember position of first vecload seen. */
30507 vecload_pos = pos;
30508 }
30509 pos--;
30510 }
30511 if (vecload_pos >= 0)
30512 {
30513 /* Didn't find a vector to pair with but did find a vecload,
30514 move it to the end of the ready list. */
30515 tmp = ready[vecload_pos];
30516 for (i = vecload_pos; i < lastpos; i++)
30517 ready[i] = ready[i + 1];
30518 ready[lastpos] = tmp;
30519 vec_pairing = 1;
30520 return cached_can_issue_more;
30521 }
30522 }
30523 }
30524 else if (is_power9_pairable_vec_type (type))
30525 {
30526 /* Issued a vector operation. */
30527 if (vec_pairing == 0)
30528 {
30529 int vec_pos = -1;
30530 /* We issued a single vector insn, look for a vecload to pair it
30531 with. If one isn't found, try to pair another vector. */
30532 pos = lastpos;
30533 while (pos >= 0)
30534 {
30535 if (recog_memoized (ready[pos]) >= 0)
30536 {
30537 type2 = get_attr_type (ready[pos]);
30538 if (type2 == TYPE_VECLOAD)
30539 {
30540 /* Found a vecload insn to pair with, move it to the
30541 end of the ready list so it is scheduled next. */
30542 tmp = ready[pos];
30543 for (i = pos; i < lastpos; i++)
30544 ready[i] = ready[i + 1];
30545 ready[lastpos] = tmp;
30546 vec_pairing = 1;
30547 return cached_can_issue_more;
30548 }
30549 else if (is_power9_pairable_vec_type (type2)
30550 && vec_pos == -1)
30551 /* Remember position of first vector insn seen. */
30552 vec_pos = pos;
30553 }
30554 pos--;
30555 }
30556 if (vec_pos >= 0)
30557 {
30558 /* Didn't find a vecload to pair with but did find a vector
30559 insn, move it to the end of the ready list. */
30560 tmp = ready[vec_pos];
30561 for (i = vec_pos; i < lastpos; i++)
30562 ready[i] = ready[i + 1];
30563 ready[lastpos] = tmp;
30564 vec_pairing = 1;
30565 return cached_can_issue_more;
30566 }
30567 }
30568 }
30569
30570 /* We've either finished a vec/vecload pair, couldn't find an insn to
30571 continue the current pair, or the last insn had nothing to do with
30572 with pairing. In any case, reset the state. */
30573 vec_pairing = 0;
30574 }
30575
30576 return cached_can_issue_more;
30577 }
30578
30579 /* We are about to begin issuing insns for this clock cycle. */
30580
30581 static int
30582 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
30583 rtx_insn **ready ATTRIBUTE_UNUSED,
30584 int *pn_ready ATTRIBUTE_UNUSED,
30585 int clock_var ATTRIBUTE_UNUSED)
30586 {
30587 int n_ready = *pn_ready;
30588
30589 if (sched_verbose)
30590 fprintf (dump, "// rs6000_sched_reorder :\n");
30591
30592 /* Reorder the ready list, if the second to last ready insn
30593 is a nonepipeline insn. */
30594 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
30595 {
30596 if (is_nonpipeline_insn (ready[n_ready - 1])
30597 && (recog_memoized (ready[n_ready - 2]) > 0))
30598 /* Simply swap first two insns. */
30599 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
30600 }
30601
30602 if (rs6000_tune == PROCESSOR_POWER6)
30603 load_store_pendulum = 0;
30604
30605 return rs6000_issue_rate ();
30606 }
30607
30608 /* Like rs6000_sched_reorder, but called after issuing each insn. */
30609
30610 static int
30611 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
30612 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
30613 {
30614 if (sched_verbose)
30615 fprintf (dump, "// rs6000_sched_reorder2 :\n");
30616
30617 /* For Power6, we need to handle some special cases to try and keep the
30618 store queue from overflowing and triggering expensive flushes.
30619
30620 This code monitors how load and store instructions are being issued
30621 and skews the ready list one way or the other to increase the likelihood
30622 that a desired instruction is issued at the proper time.
30623
30624 A couple of things are done. First, we maintain a "load_store_pendulum"
30625 to track the current state of load/store issue.
30626
30627 - If the pendulum is at zero, then no loads or stores have been
30628 issued in the current cycle so we do nothing.
30629
30630 - If the pendulum is 1, then a single load has been issued in this
30631 cycle and we attempt to locate another load in the ready list to
30632 issue with it.
30633
30634 - If the pendulum is -2, then two stores have already been
30635 issued in this cycle, so we increase the priority of the first load
30636 in the ready list to increase it's likelihood of being chosen first
30637 in the next cycle.
30638
30639 - If the pendulum is -1, then a single store has been issued in this
30640 cycle and we attempt to locate another store in the ready list to
30641 issue with it, preferring a store to an adjacent memory location to
30642 facilitate store pairing in the store queue.
30643
30644 - If the pendulum is 2, then two loads have already been
30645 issued in this cycle, so we increase the priority of the first store
30646 in the ready list to increase it's likelihood of being chosen first
30647 in the next cycle.
30648
30649 - If the pendulum < -2 or > 2, then do nothing.
30650
30651 Note: This code covers the most common scenarios. There exist non
30652 load/store instructions which make use of the LSU and which
30653 would need to be accounted for to strictly model the behavior
30654 of the machine. Those instructions are currently unaccounted
30655 for to help minimize compile time overhead of this code.
30656 */
30657 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
30658 {
30659 int pos;
30660 int i;
30661 rtx_insn *tmp;
30662 rtx load_mem, str_mem;
30663
30664 if (is_store_insn (last_scheduled_insn, &str_mem))
30665 /* Issuing a store, swing the load_store_pendulum to the left */
30666 load_store_pendulum--;
30667 else if (is_load_insn (last_scheduled_insn, &load_mem))
30668 /* Issuing a load, swing the load_store_pendulum to the right */
30669 load_store_pendulum++;
30670 else
30671 return cached_can_issue_more;
30672
30673 /* If the pendulum is balanced, or there is only one instruction on
30674 the ready list, then all is well, so return. */
30675 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
30676 return cached_can_issue_more;
30677
30678 if (load_store_pendulum == 1)
30679 {
30680 /* A load has been issued in this cycle. Scan the ready list
30681 for another load to issue with it */
30682 pos = *pn_ready-1;
30683
30684 while (pos >= 0)
30685 {
30686 if (is_load_insn (ready[pos], &load_mem))
30687 {
30688 /* Found a load. Move it to the head of the ready list,
30689 and adjust it's priority so that it is more likely to
30690 stay there */
30691 tmp = ready[pos];
30692 for (i=pos; i<*pn_ready-1; i++)
30693 ready[i] = ready[i + 1];
30694 ready[*pn_ready-1] = tmp;
30695
30696 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
30697 INSN_PRIORITY (tmp)++;
30698 break;
30699 }
30700 pos--;
30701 }
30702 }
30703 else if (load_store_pendulum == -2)
30704 {
30705 /* Two stores have been issued in this cycle. Increase the
30706 priority of the first load in the ready list to favor it for
30707 issuing in the next cycle. */
30708 pos = *pn_ready-1;
30709
30710 while (pos >= 0)
30711 {
30712 if (is_load_insn (ready[pos], &load_mem)
30713 && !sel_sched_p ()
30714 && INSN_PRIORITY_KNOWN (ready[pos]))
30715 {
30716 INSN_PRIORITY (ready[pos])++;
30717
30718 /* Adjust the pendulum to account for the fact that a load
30719 was found and increased in priority. This is to prevent
30720 increasing the priority of multiple loads */
30721 load_store_pendulum--;
30722
30723 break;
30724 }
30725 pos--;
30726 }
30727 }
30728 else if (load_store_pendulum == -1)
30729 {
30730 /* A store has been issued in this cycle. Scan the ready list for
30731 another store to issue with it, preferring a store to an adjacent
30732 memory location */
30733 int first_store_pos = -1;
30734
30735 pos = *pn_ready-1;
30736
30737 while (pos >= 0)
30738 {
30739 if (is_store_insn (ready[pos], &str_mem))
30740 {
30741 rtx str_mem2;
30742 /* Maintain the index of the first store found on the
30743 list */
30744 if (first_store_pos == -1)
30745 first_store_pos = pos;
30746
30747 if (is_store_insn (last_scheduled_insn, &str_mem2)
30748 && adjacent_mem_locations (str_mem, str_mem2))
30749 {
30750 /* Found an adjacent store. Move it to the head of the
30751 ready list, and adjust it's priority so that it is
30752 more likely to stay there */
30753 tmp = ready[pos];
30754 for (i=pos; i<*pn_ready-1; i++)
30755 ready[i] = ready[i + 1];
30756 ready[*pn_ready-1] = tmp;
30757
30758 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
30759 INSN_PRIORITY (tmp)++;
30760
30761 first_store_pos = -1;
30762
30763 break;
30764 };
30765 }
30766 pos--;
30767 }
30768
30769 if (first_store_pos >= 0)
30770 {
30771 /* An adjacent store wasn't found, but a non-adjacent store was,
30772 so move the non-adjacent store to the front of the ready
30773 list, and adjust its priority so that it is more likely to
30774 stay there. */
30775 tmp = ready[first_store_pos];
30776 for (i=first_store_pos; i<*pn_ready-1; i++)
30777 ready[i] = ready[i + 1];
30778 ready[*pn_ready-1] = tmp;
30779 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
30780 INSN_PRIORITY (tmp)++;
30781 }
30782 }
30783 else if (load_store_pendulum == 2)
30784 {
30785 /* Two loads have been issued in this cycle. Increase the priority
30786 of the first store in the ready list to favor it for issuing in
30787 the next cycle. */
30788 pos = *pn_ready-1;
30789
30790 while (pos >= 0)
30791 {
30792 if (is_store_insn (ready[pos], &str_mem)
30793 && !sel_sched_p ()
30794 && INSN_PRIORITY_KNOWN (ready[pos]))
30795 {
30796 INSN_PRIORITY (ready[pos])++;
30797
30798 /* Adjust the pendulum to account for the fact that a store
30799 was found and increased in priority. This is to prevent
30800 increasing the priority of multiple stores */
30801 load_store_pendulum++;
30802
30803 break;
30804 }
30805 pos--;
30806 }
30807 }
30808 }
30809
30810 /* Do Power9 dependent reordering if necessary. */
30811 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
30812 && recog_memoized (last_scheduled_insn) >= 0)
30813 return power9_sched_reorder2 (ready, *pn_ready - 1);
30814
30815 return cached_can_issue_more;
30816 }
30817
30818 /* Return whether the presence of INSN causes a dispatch group termination
30819 of group WHICH_GROUP.
30820
30821 If WHICH_GROUP == current_group, this function will return true if INSN
30822 causes the termination of the current group (i.e, the dispatch group to
30823 which INSN belongs). This means that INSN will be the last insn in the
30824 group it belongs to.
30825
30826 If WHICH_GROUP == previous_group, this function will return true if INSN
30827 causes the termination of the previous group (i.e, the dispatch group that
30828 precedes the group to which INSN belongs). This means that INSN will be
30829 the first insn in the group it belongs to). */
30830
30831 static bool
30832 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
30833 {
30834 bool first, last;
30835
30836 if (! insn)
30837 return false;
30838
30839 first = insn_must_be_first_in_group (insn);
30840 last = insn_must_be_last_in_group (insn);
30841
30842 if (first && last)
30843 return true;
30844
30845 if (which_group == current_group)
30846 return last;
30847 else if (which_group == previous_group)
30848 return first;
30849
30850 return false;
30851 }
30852
30853
30854 static bool
30855 insn_must_be_first_in_group (rtx_insn *insn)
30856 {
30857 enum attr_type type;
30858
30859 if (!insn
30860 || NOTE_P (insn)
30861 || DEBUG_INSN_P (insn)
30862 || GET_CODE (PATTERN (insn)) == USE
30863 || GET_CODE (PATTERN (insn)) == CLOBBER)
30864 return false;
30865
30866 switch (rs6000_tune)
30867 {
30868 case PROCESSOR_POWER5:
30869 if (is_cracked_insn (insn))
30870 return true;
30871 /* FALLTHRU */
30872 case PROCESSOR_POWER4:
30873 if (is_microcoded_insn (insn))
30874 return true;
30875
30876 if (!rs6000_sched_groups)
30877 return false;
30878
30879 type = get_attr_type (insn);
30880
30881 switch (type)
30882 {
30883 case TYPE_MFCR:
30884 case TYPE_MFCRF:
30885 case TYPE_MTCR:
30886 case TYPE_CR_LOGICAL:
30887 case TYPE_MTJMPR:
30888 case TYPE_MFJMPR:
30889 case TYPE_DIV:
30890 case TYPE_LOAD_L:
30891 case TYPE_STORE_C:
30892 case TYPE_ISYNC:
30893 case TYPE_SYNC:
30894 return true;
30895 default:
30896 break;
30897 }
30898 break;
30899 case PROCESSOR_POWER6:
30900 type = get_attr_type (insn);
30901
30902 switch (type)
30903 {
30904 case TYPE_EXTS:
30905 case TYPE_CNTLZ:
30906 case TYPE_TRAP:
30907 case TYPE_MUL:
30908 case TYPE_INSERT:
30909 case TYPE_FPCOMPARE:
30910 case TYPE_MFCR:
30911 case TYPE_MTCR:
30912 case TYPE_MFJMPR:
30913 case TYPE_MTJMPR:
30914 case TYPE_ISYNC:
30915 case TYPE_SYNC:
30916 case TYPE_LOAD_L:
30917 case TYPE_STORE_C:
30918 return true;
30919 case TYPE_SHIFT:
30920 if (get_attr_dot (insn) == DOT_NO
30921 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
30922 return true;
30923 else
30924 break;
30925 case TYPE_DIV:
30926 if (get_attr_size (insn) == SIZE_32)
30927 return true;
30928 else
30929 break;
30930 case TYPE_LOAD:
30931 case TYPE_STORE:
30932 case TYPE_FPLOAD:
30933 case TYPE_FPSTORE:
30934 if (get_attr_update (insn) == UPDATE_YES)
30935 return true;
30936 else
30937 break;
30938 default:
30939 break;
30940 }
30941 break;
30942 case PROCESSOR_POWER7:
30943 type = get_attr_type (insn);
30944
30945 switch (type)
30946 {
30947 case TYPE_CR_LOGICAL:
30948 case TYPE_MFCR:
30949 case TYPE_MFCRF:
30950 case TYPE_MTCR:
30951 case TYPE_DIV:
30952 case TYPE_ISYNC:
30953 case TYPE_LOAD_L:
30954 case TYPE_STORE_C:
30955 case TYPE_MFJMPR:
30956 case TYPE_MTJMPR:
30957 return true;
30958 case TYPE_MUL:
30959 case TYPE_SHIFT:
30960 case TYPE_EXTS:
30961 if (get_attr_dot (insn) == DOT_YES)
30962 return true;
30963 else
30964 break;
30965 case TYPE_LOAD:
30966 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
30967 || get_attr_update (insn) == UPDATE_YES)
30968 return true;
30969 else
30970 break;
30971 case TYPE_STORE:
30972 case TYPE_FPLOAD:
30973 case TYPE_FPSTORE:
30974 if (get_attr_update (insn) == UPDATE_YES)
30975 return true;
30976 else
30977 break;
30978 default:
30979 break;
30980 }
30981 break;
30982 case PROCESSOR_POWER8:
30983 type = get_attr_type (insn);
30984
30985 switch (type)
30986 {
30987 case TYPE_CR_LOGICAL:
30988 case TYPE_MFCR:
30989 case TYPE_MFCRF:
30990 case TYPE_MTCR:
30991 case TYPE_SYNC:
30992 case TYPE_ISYNC:
30993 case TYPE_LOAD_L:
30994 case TYPE_STORE_C:
30995 case TYPE_VECSTORE:
30996 case TYPE_MFJMPR:
30997 case TYPE_MTJMPR:
30998 return true;
30999 case TYPE_SHIFT:
31000 case TYPE_EXTS:
31001 case TYPE_MUL:
31002 if (get_attr_dot (insn) == DOT_YES)
31003 return true;
31004 else
31005 break;
31006 case TYPE_LOAD:
31007 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
31008 || get_attr_update (insn) == UPDATE_YES)
31009 return true;
31010 else
31011 break;
31012 case TYPE_STORE:
31013 if (get_attr_update (insn) == UPDATE_YES
31014 && get_attr_indexed (insn) == INDEXED_YES)
31015 return true;
31016 else
31017 break;
31018 default:
31019 break;
31020 }
31021 break;
31022 default:
31023 break;
31024 }
31025
31026 return false;
31027 }
31028
31029 static bool
31030 insn_must_be_last_in_group (rtx_insn *insn)
31031 {
31032 enum attr_type type;
31033
31034 if (!insn
31035 || NOTE_P (insn)
31036 || DEBUG_INSN_P (insn)
31037 || GET_CODE (PATTERN (insn)) == USE
31038 || GET_CODE (PATTERN (insn)) == CLOBBER)
31039 return false;
31040
31041 switch (rs6000_tune) {
31042 case PROCESSOR_POWER4:
31043 case PROCESSOR_POWER5:
31044 if (is_microcoded_insn (insn))
31045 return true;
31046
31047 if (is_branch_slot_insn (insn))
31048 return true;
31049
31050 break;
31051 case PROCESSOR_POWER6:
31052 type = get_attr_type (insn);
31053
31054 switch (type)
31055 {
31056 case TYPE_EXTS:
31057 case TYPE_CNTLZ:
31058 case TYPE_TRAP:
31059 case TYPE_MUL:
31060 case TYPE_FPCOMPARE:
31061 case TYPE_MFCR:
31062 case TYPE_MTCR:
31063 case TYPE_MFJMPR:
31064 case TYPE_MTJMPR:
31065 case TYPE_ISYNC:
31066 case TYPE_SYNC:
31067 case TYPE_LOAD_L:
31068 case TYPE_STORE_C:
31069 return true;
31070 case TYPE_SHIFT:
31071 if (get_attr_dot (insn) == DOT_NO
31072 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
31073 return true;
31074 else
31075 break;
31076 case TYPE_DIV:
31077 if (get_attr_size (insn) == SIZE_32)
31078 return true;
31079 else
31080 break;
31081 default:
31082 break;
31083 }
31084 break;
31085 case PROCESSOR_POWER7:
31086 type = get_attr_type (insn);
31087
31088 switch (type)
31089 {
31090 case TYPE_ISYNC:
31091 case TYPE_SYNC:
31092 case TYPE_LOAD_L:
31093 case TYPE_STORE_C:
31094 return true;
31095 case TYPE_LOAD:
31096 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
31097 && get_attr_update (insn) == UPDATE_YES)
31098 return true;
31099 else
31100 break;
31101 case TYPE_STORE:
31102 if (get_attr_update (insn) == UPDATE_YES
31103 && get_attr_indexed (insn) == INDEXED_YES)
31104 return true;
31105 else
31106 break;
31107 default:
31108 break;
31109 }
31110 break;
31111 case PROCESSOR_POWER8:
31112 type = get_attr_type (insn);
31113
31114 switch (type)
31115 {
31116 case TYPE_MFCR:
31117 case TYPE_MTCR:
31118 case TYPE_ISYNC:
31119 case TYPE_SYNC:
31120 case TYPE_LOAD_L:
31121 case TYPE_STORE_C:
31122 return true;
31123 case TYPE_LOAD:
31124 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
31125 && get_attr_update (insn) == UPDATE_YES)
31126 return true;
31127 else
31128 break;
31129 case TYPE_STORE:
31130 if (get_attr_update (insn) == UPDATE_YES
31131 && get_attr_indexed (insn) == INDEXED_YES)
31132 return true;
31133 else
31134 break;
31135 default:
31136 break;
31137 }
31138 break;
31139 default:
31140 break;
31141 }
31142
31143 return false;
31144 }
31145
31146 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
31147 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
31148
31149 static bool
31150 is_costly_group (rtx *group_insns, rtx next_insn)
31151 {
31152 int i;
31153 int issue_rate = rs6000_issue_rate ();
31154
31155 for (i = 0; i < issue_rate; i++)
31156 {
31157 sd_iterator_def sd_it;
31158 dep_t dep;
31159 rtx insn = group_insns[i];
31160
31161 if (!insn)
31162 continue;
31163
31164 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
31165 {
31166 rtx next = DEP_CON (dep);
31167
31168 if (next == next_insn
31169 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
31170 return true;
31171 }
31172 }
31173
31174 return false;
31175 }
31176
31177 /* Utility of the function redefine_groups.
31178 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
31179 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
31180 to keep it "far" (in a separate group) from GROUP_INSNS, following
31181 one of the following schemes, depending on the value of the flag
31182 -minsert_sched_nops = X:
31183 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
31184 in order to force NEXT_INSN into a separate group.
31185 (2) X < sched_finish_regroup_exact: insert exactly X nops.
31186 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
31187 insertion (has a group just ended, how many vacant issue slots remain in the
31188 last group, and how many dispatch groups were encountered so far). */
31189
31190 static int
31191 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
31192 rtx_insn *next_insn, bool *group_end, int can_issue_more,
31193 int *group_count)
31194 {
31195 rtx nop;
31196 bool force;
31197 int issue_rate = rs6000_issue_rate ();
31198 bool end = *group_end;
31199 int i;
31200
31201 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
31202 return can_issue_more;
31203
31204 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
31205 return can_issue_more;
31206
31207 force = is_costly_group (group_insns, next_insn);
31208 if (!force)
31209 return can_issue_more;
31210
31211 if (sched_verbose > 6)
31212 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
31213 *group_count ,can_issue_more);
31214
31215 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
31216 {
31217 if (*group_end)
31218 can_issue_more = 0;
31219
31220 /* Since only a branch can be issued in the last issue_slot, it is
31221 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
31222 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
31223 in this case the last nop will start a new group and the branch
31224 will be forced to the new group. */
31225 if (can_issue_more && !is_branch_slot_insn (next_insn))
31226 can_issue_more--;
31227
31228 /* Do we have a special group ending nop? */
31229 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
31230 || rs6000_tune == PROCESSOR_POWER8)
31231 {
31232 nop = gen_group_ending_nop ();
31233 emit_insn_before (nop, next_insn);
31234 can_issue_more = 0;
31235 }
31236 else
31237 while (can_issue_more > 0)
31238 {
31239 nop = gen_nop ();
31240 emit_insn_before (nop, next_insn);
31241 can_issue_more--;
31242 }
31243
31244 *group_end = true;
31245 return 0;
31246 }
31247
31248 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
31249 {
31250 int n_nops = rs6000_sched_insert_nops;
31251
31252 /* Nops can't be issued from the branch slot, so the effective
31253 issue_rate for nops is 'issue_rate - 1'. */
31254 if (can_issue_more == 0)
31255 can_issue_more = issue_rate;
31256 can_issue_more--;
31257 if (can_issue_more == 0)
31258 {
31259 can_issue_more = issue_rate - 1;
31260 (*group_count)++;
31261 end = true;
31262 for (i = 0; i < issue_rate; i++)
31263 {
31264 group_insns[i] = 0;
31265 }
31266 }
31267
31268 while (n_nops > 0)
31269 {
31270 nop = gen_nop ();
31271 emit_insn_before (nop, next_insn);
31272 if (can_issue_more == issue_rate - 1) /* new group begins */
31273 end = false;
31274 can_issue_more--;
31275 if (can_issue_more == 0)
31276 {
31277 can_issue_more = issue_rate - 1;
31278 (*group_count)++;
31279 end = true;
31280 for (i = 0; i < issue_rate; i++)
31281 {
31282 group_insns[i] = 0;
31283 }
31284 }
31285 n_nops--;
31286 }
31287
31288 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
31289 can_issue_more++;
31290
31291 /* Is next_insn going to start a new group? */
31292 *group_end
31293 = (end
31294 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
31295 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
31296 || (can_issue_more < issue_rate &&
31297 insn_terminates_group_p (next_insn, previous_group)));
31298 if (*group_end && end)
31299 (*group_count)--;
31300
31301 if (sched_verbose > 6)
31302 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
31303 *group_count, can_issue_more);
31304 return can_issue_more;
31305 }
31306
31307 return can_issue_more;
31308 }
31309
31310 /* This function tries to synch the dispatch groups that the compiler "sees"
31311 with the dispatch groups that the processor dispatcher is expected to
31312 form in practice. It tries to achieve this synchronization by forcing the
31313 estimated processor grouping on the compiler (as opposed to the function
31314 'pad_goups' which tries to force the scheduler's grouping on the processor).
31315
31316 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
31317 examines the (estimated) dispatch groups that will be formed by the processor
31318 dispatcher. It marks these group boundaries to reflect the estimated
31319 processor grouping, overriding the grouping that the scheduler had marked.
31320 Depending on the value of the flag '-minsert-sched-nops' this function can
31321 force certain insns into separate groups or force a certain distance between
31322 them by inserting nops, for example, if there exists a "costly dependence"
31323 between the insns.
31324
31325 The function estimates the group boundaries that the processor will form as
31326 follows: It keeps track of how many vacant issue slots are available after
31327 each insn. A subsequent insn will start a new group if one of the following
31328 4 cases applies:
31329 - no more vacant issue slots remain in the current dispatch group.
31330 - only the last issue slot, which is the branch slot, is vacant, but the next
31331 insn is not a branch.
31332 - only the last 2 or less issue slots, including the branch slot, are vacant,
31333 which means that a cracked insn (which occupies two issue slots) can't be
31334 issued in this group.
31335 - less than 'issue_rate' slots are vacant, and the next insn always needs to
31336 start a new group. */
31337
31338 static int
31339 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
31340 rtx_insn *tail)
31341 {
31342 rtx_insn *insn, *next_insn;
31343 int issue_rate;
31344 int can_issue_more;
31345 int slot, i;
31346 bool group_end;
31347 int group_count = 0;
31348 rtx *group_insns;
31349
31350 /* Initialize. */
31351 issue_rate = rs6000_issue_rate ();
31352 group_insns = XALLOCAVEC (rtx, issue_rate);
31353 for (i = 0; i < issue_rate; i++)
31354 {
31355 group_insns[i] = 0;
31356 }
31357 can_issue_more = issue_rate;
31358 slot = 0;
31359 insn = get_next_active_insn (prev_head_insn, tail);
31360 group_end = false;
31361
31362 while (insn != NULL_RTX)
31363 {
31364 slot = (issue_rate - can_issue_more);
31365 group_insns[slot] = insn;
31366 can_issue_more =
31367 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
31368 if (insn_terminates_group_p (insn, current_group))
31369 can_issue_more = 0;
31370
31371 next_insn = get_next_active_insn (insn, tail);
31372 if (next_insn == NULL_RTX)
31373 return group_count + 1;
31374
31375 /* Is next_insn going to start a new group? */
31376 group_end
31377 = (can_issue_more == 0
31378 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
31379 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
31380 || (can_issue_more < issue_rate &&
31381 insn_terminates_group_p (next_insn, previous_group)));
31382
31383 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
31384 next_insn, &group_end, can_issue_more,
31385 &group_count);
31386
31387 if (group_end)
31388 {
31389 group_count++;
31390 can_issue_more = 0;
31391 for (i = 0; i < issue_rate; i++)
31392 {
31393 group_insns[i] = 0;
31394 }
31395 }
31396
31397 if (GET_MODE (next_insn) == TImode && can_issue_more)
31398 PUT_MODE (next_insn, VOIDmode);
31399 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
31400 PUT_MODE (next_insn, TImode);
31401
31402 insn = next_insn;
31403 if (can_issue_more == 0)
31404 can_issue_more = issue_rate;
31405 } /* while */
31406
31407 return group_count;
31408 }
31409
31410 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
31411 dispatch group boundaries that the scheduler had marked. Pad with nops
31412 any dispatch groups which have vacant issue slots, in order to force the
31413 scheduler's grouping on the processor dispatcher. The function
31414 returns the number of dispatch groups found. */
31415
31416 static int
31417 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
31418 rtx_insn *tail)
31419 {
31420 rtx_insn *insn, *next_insn;
31421 rtx nop;
31422 int issue_rate;
31423 int can_issue_more;
31424 int group_end;
31425 int group_count = 0;
31426
31427 /* Initialize issue_rate. */
31428 issue_rate = rs6000_issue_rate ();
31429 can_issue_more = issue_rate;
31430
31431 insn = get_next_active_insn (prev_head_insn, tail);
31432 next_insn = get_next_active_insn (insn, tail);
31433
31434 while (insn != NULL_RTX)
31435 {
31436 can_issue_more =
31437 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
31438
31439 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
31440
31441 if (next_insn == NULL_RTX)
31442 break;
31443
31444 if (group_end)
31445 {
31446 /* If the scheduler had marked group termination at this location
31447 (between insn and next_insn), and neither insn nor next_insn will
31448 force group termination, pad the group with nops to force group
31449 termination. */
31450 if (can_issue_more
31451 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
31452 && !insn_terminates_group_p (insn, current_group)
31453 && !insn_terminates_group_p (next_insn, previous_group))
31454 {
31455 if (!is_branch_slot_insn (next_insn))
31456 can_issue_more--;
31457
31458 while (can_issue_more)
31459 {
31460 nop = gen_nop ();
31461 emit_insn_before (nop, next_insn);
31462 can_issue_more--;
31463 }
31464 }
31465
31466 can_issue_more = issue_rate;
31467 group_count++;
31468 }
31469
31470 insn = next_insn;
31471 next_insn = get_next_active_insn (insn, tail);
31472 }
31473
31474 return group_count;
31475 }
31476
31477 /* We're beginning a new block. Initialize data structures as necessary. */
31478
31479 static void
31480 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
31481 int sched_verbose ATTRIBUTE_UNUSED,
31482 int max_ready ATTRIBUTE_UNUSED)
31483 {
31484 last_scheduled_insn = NULL;
31485 load_store_pendulum = 0;
31486 divide_cnt = 0;
31487 vec_pairing = 0;
31488 }
31489
31490 /* The following function is called at the end of scheduling BB.
31491 After reload, it inserts nops at insn group bundling. */
31492
31493 static void
31494 rs6000_sched_finish (FILE *dump, int sched_verbose)
31495 {
31496 int n_groups;
31497
31498 if (sched_verbose)
31499 fprintf (dump, "=== Finishing schedule.\n");
31500
31501 if (reload_completed && rs6000_sched_groups)
31502 {
31503 /* Do not run sched_finish hook when selective scheduling enabled. */
31504 if (sel_sched_p ())
31505 return;
31506
31507 if (rs6000_sched_insert_nops == sched_finish_none)
31508 return;
31509
31510 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
31511 n_groups = pad_groups (dump, sched_verbose,
31512 current_sched_info->prev_head,
31513 current_sched_info->next_tail);
31514 else
31515 n_groups = redefine_groups (dump, sched_verbose,
31516 current_sched_info->prev_head,
31517 current_sched_info->next_tail);
31518
31519 if (sched_verbose >= 6)
31520 {
31521 fprintf (dump, "ngroups = %d\n", n_groups);
31522 print_rtl (dump, current_sched_info->prev_head);
31523 fprintf (dump, "Done finish_sched\n");
31524 }
31525 }
31526 }
31527
31528 struct rs6000_sched_context
31529 {
31530 short cached_can_issue_more;
31531 rtx_insn *last_scheduled_insn;
31532 int load_store_pendulum;
31533 int divide_cnt;
31534 int vec_pairing;
31535 };
31536
31537 typedef struct rs6000_sched_context rs6000_sched_context_def;
31538 typedef rs6000_sched_context_def *rs6000_sched_context_t;
31539
31540 /* Allocate store for new scheduling context. */
31541 static void *
31542 rs6000_alloc_sched_context (void)
31543 {
31544 return xmalloc (sizeof (rs6000_sched_context_def));
31545 }
31546
31547 /* If CLEAN_P is true then initializes _SC with clean data,
31548 and from the global context otherwise. */
31549 static void
31550 rs6000_init_sched_context (void *_sc, bool clean_p)
31551 {
31552 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
31553
31554 if (clean_p)
31555 {
31556 sc->cached_can_issue_more = 0;
31557 sc->last_scheduled_insn = NULL;
31558 sc->load_store_pendulum = 0;
31559 sc->divide_cnt = 0;
31560 sc->vec_pairing = 0;
31561 }
31562 else
31563 {
31564 sc->cached_can_issue_more = cached_can_issue_more;
31565 sc->last_scheduled_insn = last_scheduled_insn;
31566 sc->load_store_pendulum = load_store_pendulum;
31567 sc->divide_cnt = divide_cnt;
31568 sc->vec_pairing = vec_pairing;
31569 }
31570 }
31571
31572 /* Sets the global scheduling context to the one pointed to by _SC. */
31573 static void
31574 rs6000_set_sched_context (void *_sc)
31575 {
31576 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
31577
31578 gcc_assert (sc != NULL);
31579
31580 cached_can_issue_more = sc->cached_can_issue_more;
31581 last_scheduled_insn = sc->last_scheduled_insn;
31582 load_store_pendulum = sc->load_store_pendulum;
31583 divide_cnt = sc->divide_cnt;
31584 vec_pairing = sc->vec_pairing;
31585 }
31586
31587 /* Free _SC. */
31588 static void
31589 rs6000_free_sched_context (void *_sc)
31590 {
31591 gcc_assert (_sc != NULL);
31592
31593 free (_sc);
31594 }
31595
31596 static bool
31597 rs6000_sched_can_speculate_insn (rtx_insn *insn)
31598 {
31599 switch (get_attr_type (insn))
31600 {
31601 case TYPE_DIV:
31602 case TYPE_SDIV:
31603 case TYPE_DDIV:
31604 case TYPE_VECDIV:
31605 case TYPE_SSQRT:
31606 case TYPE_DSQRT:
31607 return false;
31608
31609 default:
31610 return true;
31611 }
31612 }
31613 \f
31614 /* Length in units of the trampoline for entering a nested function. */
31615
31616 int
31617 rs6000_trampoline_size (void)
31618 {
31619 int ret = 0;
31620
31621 switch (DEFAULT_ABI)
31622 {
31623 default:
31624 gcc_unreachable ();
31625
31626 case ABI_AIX:
31627 ret = (TARGET_32BIT) ? 12 : 24;
31628 break;
31629
31630 case ABI_ELFv2:
31631 gcc_assert (!TARGET_32BIT);
31632 ret = 32;
31633 break;
31634
31635 case ABI_DARWIN:
31636 case ABI_V4:
31637 ret = (TARGET_32BIT) ? 40 : 48;
31638 break;
31639 }
31640
31641 return ret;
31642 }
31643
31644 /* Emit RTL insns to initialize the variable parts of a trampoline.
31645 FNADDR is an RTX for the address of the function's pure code.
31646 CXT is an RTX for the static chain value for the function. */
31647
31648 static void
31649 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
31650 {
31651 int regsize = (TARGET_32BIT) ? 4 : 8;
31652 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
31653 rtx ctx_reg = force_reg (Pmode, cxt);
31654 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
31655
31656 switch (DEFAULT_ABI)
31657 {
31658 default:
31659 gcc_unreachable ();
31660
31661 /* Under AIX, just build the 3 word function descriptor */
31662 case ABI_AIX:
31663 {
31664 rtx fnmem, fn_reg, toc_reg;
31665
31666 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
31667 error ("you cannot take the address of a nested function if you use "
31668 "the %qs option", "-mno-pointers-to-nested-functions");
31669
31670 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
31671 fn_reg = gen_reg_rtx (Pmode);
31672 toc_reg = gen_reg_rtx (Pmode);
31673
31674 /* Macro to shorten the code expansions below. */
31675 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
31676
31677 m_tramp = replace_equiv_address (m_tramp, addr);
31678
31679 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
31680 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
31681 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
31682 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
31683 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
31684
31685 # undef MEM_PLUS
31686 }
31687 break;
31688
31689 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
31690 case ABI_ELFv2:
31691 case ABI_DARWIN:
31692 case ABI_V4:
31693 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
31694 LCT_NORMAL, VOIDmode,
31695 addr, Pmode,
31696 GEN_INT (rs6000_trampoline_size ()), SImode,
31697 fnaddr, Pmode,
31698 ctx_reg, Pmode);
31699 break;
31700 }
31701 }
31702
31703 \f
31704 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
31705 identifier as an argument, so the front end shouldn't look it up. */
31706
31707 static bool
31708 rs6000_attribute_takes_identifier_p (const_tree attr_id)
31709 {
31710 return is_attribute_p ("altivec", attr_id);
31711 }
31712
31713 /* Handle the "altivec" attribute. The attribute may have
31714 arguments as follows:
31715
31716 __attribute__((altivec(vector__)))
31717 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
31718 __attribute__((altivec(bool__))) (always followed by 'unsigned')
31719
31720 and may appear more than once (e.g., 'vector bool char') in a
31721 given declaration. */
31722
31723 static tree
31724 rs6000_handle_altivec_attribute (tree *node,
31725 tree name ATTRIBUTE_UNUSED,
31726 tree args,
31727 int flags ATTRIBUTE_UNUSED,
31728 bool *no_add_attrs)
31729 {
31730 tree type = *node, result = NULL_TREE;
31731 machine_mode mode;
31732 int unsigned_p;
31733 char altivec_type
31734 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
31735 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
31736 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
31737 : '?');
31738
31739 while (POINTER_TYPE_P (type)
31740 || TREE_CODE (type) == FUNCTION_TYPE
31741 || TREE_CODE (type) == METHOD_TYPE
31742 || TREE_CODE (type) == ARRAY_TYPE)
31743 type = TREE_TYPE (type);
31744
31745 mode = TYPE_MODE (type);
31746
31747 /* Check for invalid AltiVec type qualifiers. */
31748 if (type == long_double_type_node)
31749 error ("use of %<long double%> in AltiVec types is invalid");
31750 else if (type == boolean_type_node)
31751 error ("use of boolean types in AltiVec types is invalid");
31752 else if (TREE_CODE (type) == COMPLEX_TYPE)
31753 error ("use of %<complex%> in AltiVec types is invalid");
31754 else if (DECIMAL_FLOAT_MODE_P (mode))
31755 error ("use of decimal floating point types in AltiVec types is invalid");
31756 else if (!TARGET_VSX)
31757 {
31758 if (type == long_unsigned_type_node || type == long_integer_type_node)
31759 {
31760 if (TARGET_64BIT)
31761 error ("use of %<long%> in AltiVec types is invalid for "
31762 "64-bit code without %qs", "-mvsx");
31763 else if (rs6000_warn_altivec_long)
31764 warning (0, "use of %<long%> in AltiVec types is deprecated; "
31765 "use %<int%>");
31766 }
31767 else if (type == long_long_unsigned_type_node
31768 || type == long_long_integer_type_node)
31769 error ("use of %<long long%> in AltiVec types is invalid without %qs",
31770 "-mvsx");
31771 else if (type == double_type_node)
31772 error ("use of %<double%> in AltiVec types is invalid without %qs",
31773 "-mvsx");
31774 }
31775
31776 switch (altivec_type)
31777 {
31778 case 'v':
31779 unsigned_p = TYPE_UNSIGNED (type);
31780 switch (mode)
31781 {
31782 case E_TImode:
31783 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
31784 break;
31785 case E_DImode:
31786 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
31787 break;
31788 case E_SImode:
31789 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
31790 break;
31791 case E_HImode:
31792 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
31793 break;
31794 case E_QImode:
31795 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
31796 break;
31797 case E_SFmode: result = V4SF_type_node; break;
31798 case E_DFmode: result = V2DF_type_node; break;
31799 /* If the user says 'vector int bool', we may be handed the 'bool'
31800 attribute _before_ the 'vector' attribute, and so select the
31801 proper type in the 'b' case below. */
31802 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
31803 case E_V2DImode: case E_V2DFmode:
31804 result = type;
31805 default: break;
31806 }
31807 break;
31808 case 'b':
31809 switch (mode)
31810 {
31811 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
31812 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
31813 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
31814 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
31815 default: break;
31816 }
31817 break;
31818 case 'p':
31819 switch (mode)
31820 {
31821 case E_V8HImode: result = pixel_V8HI_type_node;
31822 default: break;
31823 }
31824 default: break;
31825 }
31826
31827 /* Propagate qualifiers attached to the element type
31828 onto the vector type. */
31829 if (result && result != type && TYPE_QUALS (type))
31830 result = build_qualified_type (result, TYPE_QUALS (type));
31831
31832 *no_add_attrs = true; /* No need to hang on to the attribute. */
31833
31834 if (result)
31835 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
31836
31837 return NULL_TREE;
31838 }
31839
31840 /* AltiVec defines five built-in scalar types that serve as vector
31841 elements; we must teach the compiler how to mangle them. The 128-bit
31842 floating point mangling is target-specific as well. */
31843
31844 static const char *
31845 rs6000_mangle_type (const_tree type)
31846 {
31847 type = TYPE_MAIN_VARIANT (type);
31848
31849 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
31850 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
31851 return NULL;
31852
31853 if (type == bool_char_type_node) return "U6__boolc";
31854 if (type == bool_short_type_node) return "U6__bools";
31855 if (type == pixel_type_node) return "u7__pixel";
31856 if (type == bool_int_type_node) return "U6__booli";
31857 if (type == bool_long_long_type_node) return "U6__boolx";
31858
31859 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
31860 return "g";
31861 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
31862 return ieee128_mangling_gcc_8_1 ? "U10__float128" : "u9__ieee128";
31863
31864 /* For all other types, use the default mangling. */
31865 return NULL;
31866 }
31867
31868 /* Handle a "longcall" or "shortcall" attribute; arguments as in
31869 struct attribute_spec.handler. */
31870
31871 static tree
31872 rs6000_handle_longcall_attribute (tree *node, tree name,
31873 tree args ATTRIBUTE_UNUSED,
31874 int flags ATTRIBUTE_UNUSED,
31875 bool *no_add_attrs)
31876 {
31877 if (TREE_CODE (*node) != FUNCTION_TYPE
31878 && TREE_CODE (*node) != FIELD_DECL
31879 && TREE_CODE (*node) != TYPE_DECL)
31880 {
31881 warning (OPT_Wattributes, "%qE attribute only applies to functions",
31882 name);
31883 *no_add_attrs = true;
31884 }
31885
31886 return NULL_TREE;
31887 }
31888
31889 /* Set longcall attributes on all functions declared when
31890 rs6000_default_long_calls is true. */
31891 static void
31892 rs6000_set_default_type_attributes (tree type)
31893 {
31894 if (rs6000_default_long_calls
31895 && (TREE_CODE (type) == FUNCTION_TYPE
31896 || TREE_CODE (type) == METHOD_TYPE))
31897 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
31898 NULL_TREE,
31899 TYPE_ATTRIBUTES (type));
31900
31901 #if TARGET_MACHO
31902 darwin_set_default_type_attributes (type);
31903 #endif
31904 }
31905
31906 /* Return a reference suitable for calling a function with the
31907 longcall attribute. */
31908
31909 rtx
31910 rs6000_longcall_ref (rtx call_ref)
31911 {
31912 const char *call_name;
31913 tree node;
31914
31915 if (GET_CODE (call_ref) != SYMBOL_REF)
31916 return call_ref;
31917
31918 /* System V adds '.' to the internal name, so skip them. */
31919 call_name = XSTR (call_ref, 0);
31920 if (*call_name == '.')
31921 {
31922 while (*call_name == '.')
31923 call_name++;
31924
31925 node = get_identifier (call_name);
31926 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
31927 }
31928
31929 return force_reg (Pmode, call_ref);
31930 }
31931 \f
31932 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
31933 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
31934 #endif
31935
31936 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
31937 struct attribute_spec.handler. */
31938 static tree
31939 rs6000_handle_struct_attribute (tree *node, tree name,
31940 tree args ATTRIBUTE_UNUSED,
31941 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
31942 {
31943 tree *type = NULL;
31944 if (DECL_P (*node))
31945 {
31946 if (TREE_CODE (*node) == TYPE_DECL)
31947 type = &TREE_TYPE (*node);
31948 }
31949 else
31950 type = node;
31951
31952 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
31953 || TREE_CODE (*type) == UNION_TYPE)))
31954 {
31955 warning (OPT_Wattributes, "%qE attribute ignored", name);
31956 *no_add_attrs = true;
31957 }
31958
31959 else if ((is_attribute_p ("ms_struct", name)
31960 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
31961 || ((is_attribute_p ("gcc_struct", name)
31962 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
31963 {
31964 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
31965 name);
31966 *no_add_attrs = true;
31967 }
31968
31969 return NULL_TREE;
31970 }
31971
31972 static bool
31973 rs6000_ms_bitfield_layout_p (const_tree record_type)
31974 {
31975 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
31976 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
31977 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
31978 }
31979 \f
31980 #ifdef USING_ELFOS_H
31981
31982 /* A get_unnamed_section callback, used for switching to toc_section. */
31983
31984 static void
31985 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
31986 {
31987 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
31988 && TARGET_MINIMAL_TOC)
31989 {
31990 if (!toc_initialized)
31991 {
31992 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
31993 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
31994 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
31995 fprintf (asm_out_file, "\t.tc ");
31996 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
31997 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
31998 fprintf (asm_out_file, "\n");
31999
32000 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
32001 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
32002 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
32003 fprintf (asm_out_file, " = .+32768\n");
32004 toc_initialized = 1;
32005 }
32006 else
32007 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
32008 }
32009 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
32010 {
32011 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
32012 if (!toc_initialized)
32013 {
32014 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
32015 toc_initialized = 1;
32016 }
32017 }
32018 else
32019 {
32020 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
32021 if (!toc_initialized)
32022 {
32023 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
32024 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
32025 fprintf (asm_out_file, " = .+32768\n");
32026 toc_initialized = 1;
32027 }
32028 }
32029 }
32030
32031 /* Implement TARGET_ASM_INIT_SECTIONS. */
32032
32033 static void
32034 rs6000_elf_asm_init_sections (void)
32035 {
32036 toc_section
32037 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
32038
32039 sdata2_section
32040 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
32041 SDATA2_SECTION_ASM_OP);
32042 }
32043
32044 /* Implement TARGET_SELECT_RTX_SECTION. */
32045
32046 static section *
32047 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
32048 unsigned HOST_WIDE_INT align)
32049 {
32050 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
32051 return toc_section;
32052 else
32053 return default_elf_select_rtx_section (mode, x, align);
32054 }
32055 \f
32056 /* For a SYMBOL_REF, set generic flags and then perform some
32057 target-specific processing.
32058
32059 When the AIX ABI is requested on a non-AIX system, replace the
32060 function name with the real name (with a leading .) rather than the
32061 function descriptor name. This saves a lot of overriding code to
32062 read the prefixes. */
32063
32064 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
32065 static void
32066 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
32067 {
32068 default_encode_section_info (decl, rtl, first);
32069
32070 if (first
32071 && TREE_CODE (decl) == FUNCTION_DECL
32072 && !TARGET_AIX
32073 && DEFAULT_ABI == ABI_AIX)
32074 {
32075 rtx sym_ref = XEXP (rtl, 0);
32076 size_t len = strlen (XSTR (sym_ref, 0));
32077 char *str = XALLOCAVEC (char, len + 2);
32078 str[0] = '.';
32079 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
32080 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
32081 }
32082 }
32083
32084 static inline bool
32085 compare_section_name (const char *section, const char *templ)
32086 {
32087 int len;
32088
32089 len = strlen (templ);
32090 return (strncmp (section, templ, len) == 0
32091 && (section[len] == 0 || section[len] == '.'));
32092 }
32093
32094 bool
32095 rs6000_elf_in_small_data_p (const_tree decl)
32096 {
32097 if (rs6000_sdata == SDATA_NONE)
32098 return false;
32099
32100 /* We want to merge strings, so we never consider them small data. */
32101 if (TREE_CODE (decl) == STRING_CST)
32102 return false;
32103
32104 /* Functions are never in the small data area. */
32105 if (TREE_CODE (decl) == FUNCTION_DECL)
32106 return false;
32107
32108 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
32109 {
32110 const char *section = DECL_SECTION_NAME (decl);
32111 if (compare_section_name (section, ".sdata")
32112 || compare_section_name (section, ".sdata2")
32113 || compare_section_name (section, ".gnu.linkonce.s")
32114 || compare_section_name (section, ".sbss")
32115 || compare_section_name (section, ".sbss2")
32116 || compare_section_name (section, ".gnu.linkonce.sb")
32117 || strcmp (section, ".PPC.EMB.sdata0") == 0
32118 || strcmp (section, ".PPC.EMB.sbss0") == 0)
32119 return true;
32120 }
32121 else
32122 {
32123 /* If we are told not to put readonly data in sdata, then don't. */
32124 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
32125 && !rs6000_readonly_in_sdata)
32126 return false;
32127
32128 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
32129
32130 if (size > 0
32131 && size <= g_switch_value
32132 /* If it's not public, and we're not going to reference it there,
32133 there's no need to put it in the small data section. */
32134 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
32135 return true;
32136 }
32137
32138 return false;
32139 }
32140
32141 #endif /* USING_ELFOS_H */
32142 \f
32143 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
32144
32145 static bool
32146 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
32147 {
32148 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
32149 }
32150
32151 /* Do not place thread-local symbols refs in the object blocks. */
32152
32153 static bool
32154 rs6000_use_blocks_for_decl_p (const_tree decl)
32155 {
32156 return !DECL_THREAD_LOCAL_P (decl);
32157 }
32158 \f
32159 /* Return a REG that occurs in ADDR with coefficient 1.
32160 ADDR can be effectively incremented by incrementing REG.
32161
32162 r0 is special and we must not select it as an address
32163 register by this routine since our caller will try to
32164 increment the returned register via an "la" instruction. */
32165
32166 rtx
32167 find_addr_reg (rtx addr)
32168 {
32169 while (GET_CODE (addr) == PLUS)
32170 {
32171 if (GET_CODE (XEXP (addr, 0)) == REG
32172 && REGNO (XEXP (addr, 0)) != 0)
32173 addr = XEXP (addr, 0);
32174 else if (GET_CODE (XEXP (addr, 1)) == REG
32175 && REGNO (XEXP (addr, 1)) != 0)
32176 addr = XEXP (addr, 1);
32177 else if (CONSTANT_P (XEXP (addr, 0)))
32178 addr = XEXP (addr, 1);
32179 else if (CONSTANT_P (XEXP (addr, 1)))
32180 addr = XEXP (addr, 0);
32181 else
32182 gcc_unreachable ();
32183 }
32184 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
32185 return addr;
32186 }
32187
32188 void
32189 rs6000_fatal_bad_address (rtx op)
32190 {
32191 fatal_insn ("bad address", op);
32192 }
32193
32194 #if TARGET_MACHO
32195
32196 typedef struct branch_island_d {
32197 tree function_name;
32198 tree label_name;
32199 int line_number;
32200 } branch_island;
32201
32202
32203 static vec<branch_island, va_gc> *branch_islands;
32204
32205 /* Remember to generate a branch island for far calls to the given
32206 function. */
32207
32208 static void
32209 add_compiler_branch_island (tree label_name, tree function_name,
32210 int line_number)
32211 {
32212 branch_island bi = {function_name, label_name, line_number};
32213 vec_safe_push (branch_islands, bi);
32214 }
32215
32216 /* Generate far-jump branch islands for everything recorded in
32217 branch_islands. Invoked immediately after the last instruction of
32218 the epilogue has been emitted; the branch islands must be appended
32219 to, and contiguous with, the function body. Mach-O stubs are
32220 generated in machopic_output_stub(). */
32221
32222 static void
32223 macho_branch_islands (void)
32224 {
32225 char tmp_buf[512];
32226
32227 while (!vec_safe_is_empty (branch_islands))
32228 {
32229 branch_island *bi = &branch_islands->last ();
32230 const char *label = IDENTIFIER_POINTER (bi->label_name);
32231 const char *name = IDENTIFIER_POINTER (bi->function_name);
32232 char name_buf[512];
32233 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
32234 if (name[0] == '*' || name[0] == '&')
32235 strcpy (name_buf, name+1);
32236 else
32237 {
32238 name_buf[0] = '_';
32239 strcpy (name_buf+1, name);
32240 }
32241 strcpy (tmp_buf, "\n");
32242 strcat (tmp_buf, label);
32243 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
32244 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
32245 dbxout_stabd (N_SLINE, bi->line_number);
32246 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
32247 if (flag_pic)
32248 {
32249 if (TARGET_LINK_STACK)
32250 {
32251 char name[32];
32252 get_ppc476_thunk_name (name);
32253 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
32254 strcat (tmp_buf, name);
32255 strcat (tmp_buf, "\n");
32256 strcat (tmp_buf, label);
32257 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
32258 }
32259 else
32260 {
32261 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
32262 strcat (tmp_buf, label);
32263 strcat (tmp_buf, "_pic\n");
32264 strcat (tmp_buf, label);
32265 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
32266 }
32267
32268 strcat (tmp_buf, "\taddis r11,r11,ha16(");
32269 strcat (tmp_buf, name_buf);
32270 strcat (tmp_buf, " - ");
32271 strcat (tmp_buf, label);
32272 strcat (tmp_buf, "_pic)\n");
32273
32274 strcat (tmp_buf, "\tmtlr r0\n");
32275
32276 strcat (tmp_buf, "\taddi r12,r11,lo16(");
32277 strcat (tmp_buf, name_buf);
32278 strcat (tmp_buf, " - ");
32279 strcat (tmp_buf, label);
32280 strcat (tmp_buf, "_pic)\n");
32281
32282 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
32283 }
32284 else
32285 {
32286 strcat (tmp_buf, ":\nlis r12,hi16(");
32287 strcat (tmp_buf, name_buf);
32288 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
32289 strcat (tmp_buf, name_buf);
32290 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
32291 }
32292 output_asm_insn (tmp_buf, 0);
32293 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
32294 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
32295 dbxout_stabd (N_SLINE, bi->line_number);
32296 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
32297 branch_islands->pop ();
32298 }
32299 }
32300
32301 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
32302 already there or not. */
32303
32304 static int
32305 no_previous_def (tree function_name)
32306 {
32307 branch_island *bi;
32308 unsigned ix;
32309
32310 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
32311 if (function_name == bi->function_name)
32312 return 0;
32313 return 1;
32314 }
32315
32316 /* GET_PREV_LABEL gets the label name from the previous definition of
32317 the function. */
32318
32319 static tree
32320 get_prev_label (tree function_name)
32321 {
32322 branch_island *bi;
32323 unsigned ix;
32324
32325 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
32326 if (function_name == bi->function_name)
32327 return bi->label_name;
32328 return NULL_TREE;
32329 }
32330
32331 /* INSN is either a function call or a millicode call. It may have an
32332 unconditional jump in its delay slot.
32333
32334 CALL_DEST is the routine we are calling. */
32335
32336 char *
32337 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
32338 int cookie_operand_number)
32339 {
32340 static char buf[256];
32341 if (darwin_emit_branch_islands
32342 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
32343 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
32344 {
32345 tree labelname;
32346 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
32347
32348 if (no_previous_def (funname))
32349 {
32350 rtx label_rtx = gen_label_rtx ();
32351 char *label_buf, temp_buf[256];
32352 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
32353 CODE_LABEL_NUMBER (label_rtx));
32354 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
32355 labelname = get_identifier (label_buf);
32356 add_compiler_branch_island (labelname, funname, insn_line (insn));
32357 }
32358 else
32359 labelname = get_prev_label (funname);
32360
32361 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
32362 instruction will reach 'foo', otherwise link as 'bl L42'".
32363 "L42" should be a 'branch island', that will do a far jump to
32364 'foo'. Branch islands are generated in
32365 macho_branch_islands(). */
32366 sprintf (buf, "jbsr %%z%d,%.246s",
32367 dest_operand_number, IDENTIFIER_POINTER (labelname));
32368 }
32369 else
32370 sprintf (buf, "bl %%z%d", dest_operand_number);
32371 return buf;
32372 }
32373
32374 /* Generate PIC and indirect symbol stubs. */
32375
32376 void
32377 machopic_output_stub (FILE *file, const char *symb, const char *stub)
32378 {
32379 unsigned int length;
32380 char *symbol_name, *lazy_ptr_name;
32381 char *local_label_0;
32382 static int label = 0;
32383
32384 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
32385 symb = (*targetm.strip_name_encoding) (symb);
32386
32387
32388 length = strlen (symb);
32389 symbol_name = XALLOCAVEC (char, length + 32);
32390 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
32391
32392 lazy_ptr_name = XALLOCAVEC (char, length + 32);
32393 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
32394
32395 if (flag_pic == 2)
32396 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
32397 else
32398 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
32399
32400 if (flag_pic == 2)
32401 {
32402 fprintf (file, "\t.align 5\n");
32403
32404 fprintf (file, "%s:\n", stub);
32405 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
32406
32407 label++;
32408 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
32409 sprintf (local_label_0, "\"L%011d$spb\"", label);
32410
32411 fprintf (file, "\tmflr r0\n");
32412 if (TARGET_LINK_STACK)
32413 {
32414 char name[32];
32415 get_ppc476_thunk_name (name);
32416 fprintf (file, "\tbl %s\n", name);
32417 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
32418 }
32419 else
32420 {
32421 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
32422 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
32423 }
32424 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
32425 lazy_ptr_name, local_label_0);
32426 fprintf (file, "\tmtlr r0\n");
32427 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
32428 (TARGET_64BIT ? "ldu" : "lwzu"),
32429 lazy_ptr_name, local_label_0);
32430 fprintf (file, "\tmtctr r12\n");
32431 fprintf (file, "\tbctr\n");
32432 }
32433 else
32434 {
32435 fprintf (file, "\t.align 4\n");
32436
32437 fprintf (file, "%s:\n", stub);
32438 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
32439
32440 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
32441 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
32442 (TARGET_64BIT ? "ldu" : "lwzu"),
32443 lazy_ptr_name);
32444 fprintf (file, "\tmtctr r12\n");
32445 fprintf (file, "\tbctr\n");
32446 }
32447
32448 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
32449 fprintf (file, "%s:\n", lazy_ptr_name);
32450 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
32451 fprintf (file, "%sdyld_stub_binding_helper\n",
32452 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
32453 }
32454
32455 /* Legitimize PIC addresses. If the address is already
32456 position-independent, we return ORIG. Newly generated
32457 position-independent addresses go into a reg. This is REG if non
32458 zero, otherwise we allocate register(s) as necessary. */
32459
32460 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
32461
32462 rtx
32463 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
32464 rtx reg)
32465 {
32466 rtx base, offset;
32467
32468 if (reg == NULL && !reload_completed)
32469 reg = gen_reg_rtx (Pmode);
32470
32471 if (GET_CODE (orig) == CONST)
32472 {
32473 rtx reg_temp;
32474
32475 if (GET_CODE (XEXP (orig, 0)) == PLUS
32476 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
32477 return orig;
32478
32479 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
32480
32481 /* Use a different reg for the intermediate value, as
32482 it will be marked UNCHANGING. */
32483 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
32484 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
32485 Pmode, reg_temp);
32486 offset =
32487 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
32488 Pmode, reg);
32489
32490 if (GET_CODE (offset) == CONST_INT)
32491 {
32492 if (SMALL_INT (offset))
32493 return plus_constant (Pmode, base, INTVAL (offset));
32494 else if (!reload_completed)
32495 offset = force_reg (Pmode, offset);
32496 else
32497 {
32498 rtx mem = force_const_mem (Pmode, orig);
32499 return machopic_legitimize_pic_address (mem, Pmode, reg);
32500 }
32501 }
32502 return gen_rtx_PLUS (Pmode, base, offset);
32503 }
32504
32505 /* Fall back on generic machopic code. */
32506 return machopic_legitimize_pic_address (orig, mode, reg);
32507 }
32508
32509 /* Output a .machine directive for the Darwin assembler, and call
32510 the generic start_file routine. */
32511
32512 static void
32513 rs6000_darwin_file_start (void)
32514 {
32515 static const struct
32516 {
32517 const char *arg;
32518 const char *name;
32519 HOST_WIDE_INT if_set;
32520 } mapping[] = {
32521 { "ppc64", "ppc64", MASK_64BIT },
32522 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
32523 { "power4", "ppc970", 0 },
32524 { "G5", "ppc970", 0 },
32525 { "7450", "ppc7450", 0 },
32526 { "7400", "ppc7400", MASK_ALTIVEC },
32527 { "G4", "ppc7400", 0 },
32528 { "750", "ppc750", 0 },
32529 { "740", "ppc750", 0 },
32530 { "G3", "ppc750", 0 },
32531 { "604e", "ppc604e", 0 },
32532 { "604", "ppc604", 0 },
32533 { "603e", "ppc603", 0 },
32534 { "603", "ppc603", 0 },
32535 { "601", "ppc601", 0 },
32536 { NULL, "ppc", 0 } };
32537 const char *cpu_id = "";
32538 size_t i;
32539
32540 rs6000_file_start ();
32541 darwin_file_start ();
32542
32543 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
32544
32545 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
32546 cpu_id = rs6000_default_cpu;
32547
32548 if (global_options_set.x_rs6000_cpu_index)
32549 cpu_id = processor_target_table[rs6000_cpu_index].name;
32550
32551 /* Look through the mapping array. Pick the first name that either
32552 matches the argument, has a bit set in IF_SET that is also set
32553 in the target flags, or has a NULL name. */
32554
32555 i = 0;
32556 while (mapping[i].arg != NULL
32557 && strcmp (mapping[i].arg, cpu_id) != 0
32558 && (mapping[i].if_set & rs6000_isa_flags) == 0)
32559 i++;
32560
32561 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
32562 }
32563
32564 #endif /* TARGET_MACHO */
32565
32566 #if TARGET_ELF
32567 static int
32568 rs6000_elf_reloc_rw_mask (void)
32569 {
32570 if (flag_pic)
32571 return 3;
32572 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
32573 return 2;
32574 else
32575 return 0;
32576 }
32577
32578 /* Record an element in the table of global constructors. SYMBOL is
32579 a SYMBOL_REF of the function to be called; PRIORITY is a number
32580 between 0 and MAX_INIT_PRIORITY.
32581
32582 This differs from default_named_section_asm_out_constructor in
32583 that we have special handling for -mrelocatable. */
32584
32585 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
32586 static void
32587 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
32588 {
32589 const char *section = ".ctors";
32590 char buf[18];
32591
32592 if (priority != DEFAULT_INIT_PRIORITY)
32593 {
32594 sprintf (buf, ".ctors.%.5u",
32595 /* Invert the numbering so the linker puts us in the proper
32596 order; constructors are run from right to left, and the
32597 linker sorts in increasing order. */
32598 MAX_INIT_PRIORITY - priority);
32599 section = buf;
32600 }
32601
32602 switch_to_section (get_section (section, SECTION_WRITE, NULL));
32603 assemble_align (POINTER_SIZE);
32604
32605 if (DEFAULT_ABI == ABI_V4
32606 && (TARGET_RELOCATABLE || flag_pic > 1))
32607 {
32608 fputs ("\t.long (", asm_out_file);
32609 output_addr_const (asm_out_file, symbol);
32610 fputs (")@fixup\n", asm_out_file);
32611 }
32612 else
32613 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
32614 }
32615
32616 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
32617 static void
32618 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
32619 {
32620 const char *section = ".dtors";
32621 char buf[18];
32622
32623 if (priority != DEFAULT_INIT_PRIORITY)
32624 {
32625 sprintf (buf, ".dtors.%.5u",
32626 /* Invert the numbering so the linker puts us in the proper
32627 order; constructors are run from right to left, and the
32628 linker sorts in increasing order. */
32629 MAX_INIT_PRIORITY - priority);
32630 section = buf;
32631 }
32632
32633 switch_to_section (get_section (section, SECTION_WRITE, NULL));
32634 assemble_align (POINTER_SIZE);
32635
32636 if (DEFAULT_ABI == ABI_V4
32637 && (TARGET_RELOCATABLE || flag_pic > 1))
32638 {
32639 fputs ("\t.long (", asm_out_file);
32640 output_addr_const (asm_out_file, symbol);
32641 fputs (")@fixup\n", asm_out_file);
32642 }
32643 else
32644 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
32645 }
32646
32647 void
32648 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
32649 {
32650 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
32651 {
32652 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
32653 ASM_OUTPUT_LABEL (file, name);
32654 fputs (DOUBLE_INT_ASM_OP, file);
32655 rs6000_output_function_entry (file, name);
32656 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
32657 if (DOT_SYMBOLS)
32658 {
32659 fputs ("\t.size\t", file);
32660 assemble_name (file, name);
32661 fputs (",24\n\t.type\t.", file);
32662 assemble_name (file, name);
32663 fputs (",@function\n", file);
32664 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
32665 {
32666 fputs ("\t.globl\t.", file);
32667 assemble_name (file, name);
32668 putc ('\n', file);
32669 }
32670 }
32671 else
32672 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
32673 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
32674 rs6000_output_function_entry (file, name);
32675 fputs (":\n", file);
32676 return;
32677 }
32678
32679 int uses_toc;
32680 if (DEFAULT_ABI == ABI_V4
32681 && (TARGET_RELOCATABLE || flag_pic > 1)
32682 && !TARGET_SECURE_PLT
32683 && (!constant_pool_empty_p () || crtl->profile)
32684 && (uses_toc = uses_TOC ()))
32685 {
32686 char buf[256];
32687
32688 if (uses_toc == 2)
32689 switch_to_other_text_partition ();
32690 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
32691
32692 fprintf (file, "\t.long ");
32693 assemble_name (file, toc_label_name);
32694 need_toc_init = 1;
32695 putc ('-', file);
32696 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
32697 assemble_name (file, buf);
32698 putc ('\n', file);
32699 if (uses_toc == 2)
32700 switch_to_other_text_partition ();
32701 }
32702
32703 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
32704 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
32705
32706 if (TARGET_CMODEL == CMODEL_LARGE && rs6000_global_entry_point_needed_p ())
32707 {
32708 char buf[256];
32709
32710 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
32711
32712 fprintf (file, "\t.quad .TOC.-");
32713 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
32714 assemble_name (file, buf);
32715 putc ('\n', file);
32716 }
32717
32718 if (DEFAULT_ABI == ABI_AIX)
32719 {
32720 const char *desc_name, *orig_name;
32721
32722 orig_name = (*targetm.strip_name_encoding) (name);
32723 desc_name = orig_name;
32724 while (*desc_name == '.')
32725 desc_name++;
32726
32727 if (TREE_PUBLIC (decl))
32728 fprintf (file, "\t.globl %s\n", desc_name);
32729
32730 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
32731 fprintf (file, "%s:\n", desc_name);
32732 fprintf (file, "\t.long %s\n", orig_name);
32733 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
32734 fputs ("\t.long 0\n", file);
32735 fprintf (file, "\t.previous\n");
32736 }
32737 ASM_OUTPUT_LABEL (file, name);
32738 }
32739
32740 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
32741 static void
32742 rs6000_elf_file_end (void)
32743 {
32744 #ifdef HAVE_AS_GNU_ATTRIBUTE
32745 /* ??? The value emitted depends on options active at file end.
32746 Assume anyone using #pragma or attributes that might change
32747 options knows what they are doing. */
32748 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
32749 && rs6000_passes_float)
32750 {
32751 int fp;
32752
32753 if (TARGET_HARD_FLOAT)
32754 fp = 1;
32755 else
32756 fp = 2;
32757 if (rs6000_passes_long_double)
32758 {
32759 if (!TARGET_LONG_DOUBLE_128)
32760 fp |= 2 * 4;
32761 else if (TARGET_IEEEQUAD)
32762 fp |= 3 * 4;
32763 else
32764 fp |= 1 * 4;
32765 }
32766 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
32767 }
32768 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
32769 {
32770 if (rs6000_passes_vector)
32771 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
32772 (TARGET_ALTIVEC_ABI ? 2 : 1));
32773 if (rs6000_returns_struct)
32774 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
32775 aix_struct_return ? 2 : 1);
32776 }
32777 #endif
32778 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
32779 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
32780 file_end_indicate_exec_stack ();
32781 #endif
32782
32783 if (flag_split_stack)
32784 file_end_indicate_split_stack ();
32785
32786 if (cpu_builtin_p)
32787 {
32788 /* We have expanded a CPU builtin, so we need to emit a reference to
32789 the special symbol that LIBC uses to declare it supports the
32790 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
32791 switch_to_section (data_section);
32792 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
32793 fprintf (asm_out_file, "\t%s %s\n",
32794 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
32795 }
32796 }
32797 #endif
32798
32799 #if TARGET_XCOFF
32800
32801 #ifndef HAVE_XCOFF_DWARF_EXTRAS
32802 #define HAVE_XCOFF_DWARF_EXTRAS 0
32803 #endif
32804
32805 static enum unwind_info_type
32806 rs6000_xcoff_debug_unwind_info (void)
32807 {
32808 return UI_NONE;
32809 }
32810
32811 static void
32812 rs6000_xcoff_asm_output_anchor (rtx symbol)
32813 {
32814 char buffer[100];
32815
32816 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
32817 SYMBOL_REF_BLOCK_OFFSET (symbol));
32818 fprintf (asm_out_file, "%s", SET_ASM_OP);
32819 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
32820 fprintf (asm_out_file, ",");
32821 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
32822 fprintf (asm_out_file, "\n");
32823 }
32824
32825 static void
32826 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
32827 {
32828 fputs (GLOBAL_ASM_OP, stream);
32829 RS6000_OUTPUT_BASENAME (stream, name);
32830 putc ('\n', stream);
32831 }
32832
32833 /* A get_unnamed_decl callback, used for read-only sections. PTR
32834 points to the section string variable. */
32835
32836 static void
32837 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
32838 {
32839 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
32840 *(const char *const *) directive,
32841 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
32842 }
32843
32844 /* Likewise for read-write sections. */
32845
32846 static void
32847 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
32848 {
32849 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
32850 *(const char *const *) directive,
32851 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
32852 }
32853
32854 static void
32855 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
32856 {
32857 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
32858 *(const char *const *) directive,
32859 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
32860 }
32861
32862 /* A get_unnamed_section callback, used for switching to toc_section. */
32863
32864 static void
32865 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
32866 {
32867 if (TARGET_MINIMAL_TOC)
32868 {
32869 /* toc_section is always selected at least once from
32870 rs6000_xcoff_file_start, so this is guaranteed to
32871 always be defined once and only once in each file. */
32872 if (!toc_initialized)
32873 {
32874 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
32875 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
32876 toc_initialized = 1;
32877 }
32878 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
32879 (TARGET_32BIT ? "" : ",3"));
32880 }
32881 else
32882 fputs ("\t.toc\n", asm_out_file);
32883 }
32884
32885 /* Implement TARGET_ASM_INIT_SECTIONS. */
32886
32887 static void
32888 rs6000_xcoff_asm_init_sections (void)
32889 {
32890 read_only_data_section
32891 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
32892 &xcoff_read_only_section_name);
32893
32894 private_data_section
32895 = get_unnamed_section (SECTION_WRITE,
32896 rs6000_xcoff_output_readwrite_section_asm_op,
32897 &xcoff_private_data_section_name);
32898
32899 tls_data_section
32900 = get_unnamed_section (SECTION_TLS,
32901 rs6000_xcoff_output_tls_section_asm_op,
32902 &xcoff_tls_data_section_name);
32903
32904 tls_private_data_section
32905 = get_unnamed_section (SECTION_TLS,
32906 rs6000_xcoff_output_tls_section_asm_op,
32907 &xcoff_private_data_section_name);
32908
32909 read_only_private_data_section
32910 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
32911 &xcoff_private_data_section_name);
32912
32913 toc_section
32914 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
32915
32916 readonly_data_section = read_only_data_section;
32917 }
32918
32919 static int
32920 rs6000_xcoff_reloc_rw_mask (void)
32921 {
32922 return 3;
32923 }
32924
32925 static void
32926 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
32927 tree decl ATTRIBUTE_UNUSED)
32928 {
32929 int smclass;
32930 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
32931
32932 if (flags & SECTION_EXCLUDE)
32933 smclass = 4;
32934 else if (flags & SECTION_DEBUG)
32935 {
32936 fprintf (asm_out_file, "\t.dwsect %s\n", name);
32937 return;
32938 }
32939 else if (flags & SECTION_CODE)
32940 smclass = 0;
32941 else if (flags & SECTION_TLS)
32942 smclass = 3;
32943 else if (flags & SECTION_WRITE)
32944 smclass = 2;
32945 else
32946 smclass = 1;
32947
32948 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
32949 (flags & SECTION_CODE) ? "." : "",
32950 name, suffix[smclass], flags & SECTION_ENTSIZE);
32951 }
32952
32953 #define IN_NAMED_SECTION(DECL) \
32954 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
32955 && DECL_SECTION_NAME (DECL) != NULL)
32956
32957 static section *
32958 rs6000_xcoff_select_section (tree decl, int reloc,
32959 unsigned HOST_WIDE_INT align)
32960 {
32961 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
32962 named section. */
32963 if (align > BIGGEST_ALIGNMENT)
32964 {
32965 resolve_unique_section (decl, reloc, true);
32966 if (IN_NAMED_SECTION (decl))
32967 return get_named_section (decl, NULL, reloc);
32968 }
32969
32970 if (decl_readonly_section (decl, reloc))
32971 {
32972 if (TREE_PUBLIC (decl))
32973 return read_only_data_section;
32974 else
32975 return read_only_private_data_section;
32976 }
32977 else
32978 {
32979 #if HAVE_AS_TLS
32980 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
32981 {
32982 if (TREE_PUBLIC (decl))
32983 return tls_data_section;
32984 else if (bss_initializer_p (decl))
32985 {
32986 /* Convert to COMMON to emit in BSS. */
32987 DECL_COMMON (decl) = 1;
32988 return tls_comm_section;
32989 }
32990 else
32991 return tls_private_data_section;
32992 }
32993 else
32994 #endif
32995 if (TREE_PUBLIC (decl))
32996 return data_section;
32997 else
32998 return private_data_section;
32999 }
33000 }
33001
33002 static void
33003 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
33004 {
33005 const char *name;
33006
33007 /* Use select_section for private data and uninitialized data with
33008 alignment <= BIGGEST_ALIGNMENT. */
33009 if (!TREE_PUBLIC (decl)
33010 || DECL_COMMON (decl)
33011 || (DECL_INITIAL (decl) == NULL_TREE
33012 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
33013 || DECL_INITIAL (decl) == error_mark_node
33014 || (flag_zero_initialized_in_bss
33015 && initializer_zerop (DECL_INITIAL (decl))))
33016 return;
33017
33018 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
33019 name = (*targetm.strip_name_encoding) (name);
33020 set_decl_section_name (decl, name);
33021 }
33022
33023 /* Select section for constant in constant pool.
33024
33025 On RS/6000, all constants are in the private read-only data area.
33026 However, if this is being placed in the TOC it must be output as a
33027 toc entry. */
33028
33029 static section *
33030 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
33031 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
33032 {
33033 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
33034 return toc_section;
33035 else
33036 return read_only_private_data_section;
33037 }
33038
33039 /* Remove any trailing [DS] or the like from the symbol name. */
33040
33041 static const char *
33042 rs6000_xcoff_strip_name_encoding (const char *name)
33043 {
33044 size_t len;
33045 if (*name == '*')
33046 name++;
33047 len = strlen (name);
33048 if (name[len - 1] == ']')
33049 return ggc_alloc_string (name, len - 4);
33050 else
33051 return name;
33052 }
33053
33054 /* Section attributes. AIX is always PIC. */
33055
33056 static unsigned int
33057 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
33058 {
33059 unsigned int align;
33060 unsigned int flags = default_section_type_flags (decl, name, reloc);
33061
33062 /* Align to at least UNIT size. */
33063 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
33064 align = MIN_UNITS_PER_WORD;
33065 else
33066 /* Increase alignment of large objects if not already stricter. */
33067 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
33068 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
33069 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
33070
33071 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
33072 }
33073
33074 /* Output at beginning of assembler file.
33075
33076 Initialize the section names for the RS/6000 at this point.
33077
33078 Specify filename, including full path, to assembler.
33079
33080 We want to go into the TOC section so at least one .toc will be emitted.
33081 Also, in order to output proper .bs/.es pairs, we need at least one static
33082 [RW] section emitted.
33083
33084 Finally, declare mcount when profiling to make the assembler happy. */
33085
33086 static void
33087 rs6000_xcoff_file_start (void)
33088 {
33089 rs6000_gen_section_name (&xcoff_bss_section_name,
33090 main_input_filename, ".bss_");
33091 rs6000_gen_section_name (&xcoff_private_data_section_name,
33092 main_input_filename, ".rw_");
33093 rs6000_gen_section_name (&xcoff_read_only_section_name,
33094 main_input_filename, ".ro_");
33095 rs6000_gen_section_name (&xcoff_tls_data_section_name,
33096 main_input_filename, ".tls_");
33097 rs6000_gen_section_name (&xcoff_tbss_section_name,
33098 main_input_filename, ".tbss_[UL]");
33099
33100 fputs ("\t.file\t", asm_out_file);
33101 output_quoted_string (asm_out_file, main_input_filename);
33102 fputc ('\n', asm_out_file);
33103 if (write_symbols != NO_DEBUG)
33104 switch_to_section (private_data_section);
33105 switch_to_section (toc_section);
33106 switch_to_section (text_section);
33107 if (profile_flag)
33108 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
33109 rs6000_file_start ();
33110 }
33111
33112 /* Output at end of assembler file.
33113 On the RS/6000, referencing data should automatically pull in text. */
33114
33115 static void
33116 rs6000_xcoff_file_end (void)
33117 {
33118 switch_to_section (text_section);
33119 fputs ("_section_.text:\n", asm_out_file);
33120 switch_to_section (data_section);
33121 fputs (TARGET_32BIT
33122 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
33123 asm_out_file);
33124 }
33125
33126 struct declare_alias_data
33127 {
33128 FILE *file;
33129 bool function_descriptor;
33130 };
33131
33132 /* Declare alias N. A helper function for for_node_and_aliases. */
33133
33134 static bool
33135 rs6000_declare_alias (struct symtab_node *n, void *d)
33136 {
33137 struct declare_alias_data *data = (struct declare_alias_data *)d;
33138 /* Main symbol is output specially, because varasm machinery does part of
33139 the job for us - we do not need to declare .globl/lglobs and such. */
33140 if (!n->alias || n->weakref)
33141 return false;
33142
33143 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
33144 return false;
33145
33146 /* Prevent assemble_alias from trying to use .set pseudo operation
33147 that does not behave as expected by the middle-end. */
33148 TREE_ASM_WRITTEN (n->decl) = true;
33149
33150 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
33151 char *buffer = (char *) alloca (strlen (name) + 2);
33152 char *p;
33153 int dollar_inside = 0;
33154
33155 strcpy (buffer, name);
33156 p = strchr (buffer, '$');
33157 while (p) {
33158 *p = '_';
33159 dollar_inside++;
33160 p = strchr (p + 1, '$');
33161 }
33162 if (TREE_PUBLIC (n->decl))
33163 {
33164 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
33165 {
33166 if (dollar_inside) {
33167 if (data->function_descriptor)
33168 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
33169 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
33170 }
33171 if (data->function_descriptor)
33172 {
33173 fputs ("\t.globl .", data->file);
33174 RS6000_OUTPUT_BASENAME (data->file, buffer);
33175 putc ('\n', data->file);
33176 }
33177 fputs ("\t.globl ", data->file);
33178 RS6000_OUTPUT_BASENAME (data->file, buffer);
33179 putc ('\n', data->file);
33180 }
33181 #ifdef ASM_WEAKEN_DECL
33182 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
33183 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
33184 #endif
33185 }
33186 else
33187 {
33188 if (dollar_inside)
33189 {
33190 if (data->function_descriptor)
33191 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
33192 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
33193 }
33194 if (data->function_descriptor)
33195 {
33196 fputs ("\t.lglobl .", data->file);
33197 RS6000_OUTPUT_BASENAME (data->file, buffer);
33198 putc ('\n', data->file);
33199 }
33200 fputs ("\t.lglobl ", data->file);
33201 RS6000_OUTPUT_BASENAME (data->file, buffer);
33202 putc ('\n', data->file);
33203 }
33204 if (data->function_descriptor)
33205 fputs (".", data->file);
33206 RS6000_OUTPUT_BASENAME (data->file, buffer);
33207 fputs (":\n", data->file);
33208 return false;
33209 }
33210
33211
33212 #ifdef HAVE_GAS_HIDDEN
33213 /* Helper function to calculate visibility of a DECL
33214 and return the value as a const string. */
33215
33216 static const char *
33217 rs6000_xcoff_visibility (tree decl)
33218 {
33219 static const char * const visibility_types[] = {
33220 "", ",protected", ",hidden", ",internal"
33221 };
33222
33223 enum symbol_visibility vis = DECL_VISIBILITY (decl);
33224 return visibility_types[vis];
33225 }
33226 #endif
33227
33228
33229 /* This macro produces the initial definition of a function name.
33230 On the RS/6000, we need to place an extra '.' in the function name and
33231 output the function descriptor.
33232 Dollar signs are converted to underscores.
33233
33234 The csect for the function will have already been created when
33235 text_section was selected. We do have to go back to that csect, however.
33236
33237 The third and fourth parameters to the .function pseudo-op (16 and 044)
33238 are placeholders which no longer have any use.
33239
33240 Because AIX assembler's .set command has unexpected semantics, we output
33241 all aliases as alternative labels in front of the definition. */
33242
33243 void
33244 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
33245 {
33246 char *buffer = (char *) alloca (strlen (name) + 1);
33247 char *p;
33248 int dollar_inside = 0;
33249 struct declare_alias_data data = {file, false};
33250
33251 strcpy (buffer, name);
33252 p = strchr (buffer, '$');
33253 while (p) {
33254 *p = '_';
33255 dollar_inside++;
33256 p = strchr (p + 1, '$');
33257 }
33258 if (TREE_PUBLIC (decl))
33259 {
33260 if (!RS6000_WEAK || !DECL_WEAK (decl))
33261 {
33262 if (dollar_inside) {
33263 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
33264 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
33265 }
33266 fputs ("\t.globl .", file);
33267 RS6000_OUTPUT_BASENAME (file, buffer);
33268 #ifdef HAVE_GAS_HIDDEN
33269 fputs (rs6000_xcoff_visibility (decl), file);
33270 #endif
33271 putc ('\n', file);
33272 }
33273 }
33274 else
33275 {
33276 if (dollar_inside) {
33277 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
33278 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
33279 }
33280 fputs ("\t.lglobl .", file);
33281 RS6000_OUTPUT_BASENAME (file, buffer);
33282 putc ('\n', file);
33283 }
33284 fputs ("\t.csect ", file);
33285 RS6000_OUTPUT_BASENAME (file, buffer);
33286 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
33287 RS6000_OUTPUT_BASENAME (file, buffer);
33288 fputs (":\n", file);
33289 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
33290 &data, true);
33291 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
33292 RS6000_OUTPUT_BASENAME (file, buffer);
33293 fputs (", TOC[tc0], 0\n", file);
33294 in_section = NULL;
33295 switch_to_section (function_section (decl));
33296 putc ('.', file);
33297 RS6000_OUTPUT_BASENAME (file, buffer);
33298 fputs (":\n", file);
33299 data.function_descriptor = true;
33300 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
33301 &data, true);
33302 if (!DECL_IGNORED_P (decl))
33303 {
33304 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
33305 xcoffout_declare_function (file, decl, buffer);
33306 else if (write_symbols == DWARF2_DEBUG)
33307 {
33308 name = (*targetm.strip_name_encoding) (name);
33309 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
33310 }
33311 }
33312 return;
33313 }
33314
33315
33316 /* Output assembly language to globalize a symbol from a DECL,
33317 possibly with visibility. */
33318
33319 void
33320 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
33321 {
33322 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
33323 fputs (GLOBAL_ASM_OP, stream);
33324 RS6000_OUTPUT_BASENAME (stream, name);
33325 #ifdef HAVE_GAS_HIDDEN
33326 fputs (rs6000_xcoff_visibility (decl), stream);
33327 #endif
33328 putc ('\n', stream);
33329 }
33330
33331 /* Output assembly language to define a symbol as COMMON from a DECL,
33332 possibly with visibility. */
33333
33334 void
33335 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
33336 tree decl ATTRIBUTE_UNUSED,
33337 const char *name,
33338 unsigned HOST_WIDE_INT size,
33339 unsigned HOST_WIDE_INT align)
33340 {
33341 unsigned HOST_WIDE_INT align2 = 2;
33342
33343 if (align > 32)
33344 align2 = floor_log2 (align / BITS_PER_UNIT);
33345 else if (size > 4)
33346 align2 = 3;
33347
33348 fputs (COMMON_ASM_OP, stream);
33349 RS6000_OUTPUT_BASENAME (stream, name);
33350
33351 fprintf (stream,
33352 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
33353 size, align2);
33354
33355 #ifdef HAVE_GAS_HIDDEN
33356 if (decl != NULL)
33357 fputs (rs6000_xcoff_visibility (decl), stream);
33358 #endif
33359 putc ('\n', stream);
33360 }
33361
33362 /* This macro produces the initial definition of a object (variable) name.
33363 Because AIX assembler's .set command has unexpected semantics, we output
33364 all aliases as alternative labels in front of the definition. */
33365
33366 void
33367 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
33368 {
33369 struct declare_alias_data data = {file, false};
33370 RS6000_OUTPUT_BASENAME (file, name);
33371 fputs (":\n", file);
33372 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
33373 &data, true);
33374 }
33375
33376 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
33377
33378 void
33379 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
33380 {
33381 fputs (integer_asm_op (size, FALSE), file);
33382 assemble_name (file, label);
33383 fputs ("-$", file);
33384 }
33385
33386 /* Output a symbol offset relative to the dbase for the current object.
33387 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
33388 signed offsets.
33389
33390 __gcc_unwind_dbase is embedded in all executables/libraries through
33391 libgcc/config/rs6000/crtdbase.S. */
33392
33393 void
33394 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
33395 {
33396 fputs (integer_asm_op (size, FALSE), file);
33397 assemble_name (file, label);
33398 fputs("-__gcc_unwind_dbase", file);
33399 }
33400
33401 #ifdef HAVE_AS_TLS
33402 static void
33403 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
33404 {
33405 rtx symbol;
33406 int flags;
33407 const char *symname;
33408
33409 default_encode_section_info (decl, rtl, first);
33410
33411 /* Careful not to prod global register variables. */
33412 if (!MEM_P (rtl))
33413 return;
33414 symbol = XEXP (rtl, 0);
33415 if (GET_CODE (symbol) != SYMBOL_REF)
33416 return;
33417
33418 flags = SYMBOL_REF_FLAGS (symbol);
33419
33420 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
33421 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
33422
33423 SYMBOL_REF_FLAGS (symbol) = flags;
33424
33425 /* Append mapping class to extern decls. */
33426 symname = XSTR (symbol, 0);
33427 if (decl /* sync condition with assemble_external () */
33428 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
33429 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
33430 || TREE_CODE (decl) == FUNCTION_DECL)
33431 && symname[strlen (symname) - 1] != ']')
33432 {
33433 char *newname = (char *) alloca (strlen (symname) + 5);
33434 strcpy (newname, symname);
33435 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
33436 ? "[DS]" : "[UA]"));
33437 XSTR (symbol, 0) = ggc_strdup (newname);
33438 }
33439 }
33440 #endif /* HAVE_AS_TLS */
33441 #endif /* TARGET_XCOFF */
33442
33443 void
33444 rs6000_asm_weaken_decl (FILE *stream, tree decl,
33445 const char *name, const char *val)
33446 {
33447 fputs ("\t.weak\t", stream);
33448 RS6000_OUTPUT_BASENAME (stream, name);
33449 if (decl && TREE_CODE (decl) == FUNCTION_DECL
33450 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
33451 {
33452 if (TARGET_XCOFF)
33453 fputs ("[DS]", stream);
33454 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
33455 if (TARGET_XCOFF)
33456 fputs (rs6000_xcoff_visibility (decl), stream);
33457 #endif
33458 fputs ("\n\t.weak\t.", stream);
33459 RS6000_OUTPUT_BASENAME (stream, name);
33460 }
33461 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
33462 if (TARGET_XCOFF)
33463 fputs (rs6000_xcoff_visibility (decl), stream);
33464 #endif
33465 fputc ('\n', stream);
33466 if (val)
33467 {
33468 #ifdef ASM_OUTPUT_DEF
33469 ASM_OUTPUT_DEF (stream, name, val);
33470 #endif
33471 if (decl && TREE_CODE (decl) == FUNCTION_DECL
33472 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
33473 {
33474 fputs ("\t.set\t.", stream);
33475 RS6000_OUTPUT_BASENAME (stream, name);
33476 fputs (",.", stream);
33477 RS6000_OUTPUT_BASENAME (stream, val);
33478 fputc ('\n', stream);
33479 }
33480 }
33481 }
33482
33483
33484 /* Return true if INSN should not be copied. */
33485
33486 static bool
33487 rs6000_cannot_copy_insn_p (rtx_insn *insn)
33488 {
33489 return recog_memoized (insn) >= 0
33490 && get_attr_cannot_copy (insn);
33491 }
33492
33493 /* Compute a (partial) cost for rtx X. Return true if the complete
33494 cost has been computed, and false if subexpressions should be
33495 scanned. In either case, *TOTAL contains the cost result. */
33496
33497 static bool
33498 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
33499 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
33500 {
33501 int code = GET_CODE (x);
33502
33503 switch (code)
33504 {
33505 /* On the RS/6000, if it is valid in the insn, it is free. */
33506 case CONST_INT:
33507 if (((outer_code == SET
33508 || outer_code == PLUS
33509 || outer_code == MINUS)
33510 && (satisfies_constraint_I (x)
33511 || satisfies_constraint_L (x)))
33512 || (outer_code == AND
33513 && (satisfies_constraint_K (x)
33514 || (mode == SImode
33515 ? satisfies_constraint_L (x)
33516 : satisfies_constraint_J (x))))
33517 || ((outer_code == IOR || outer_code == XOR)
33518 && (satisfies_constraint_K (x)
33519 || (mode == SImode
33520 ? satisfies_constraint_L (x)
33521 : satisfies_constraint_J (x))))
33522 || outer_code == ASHIFT
33523 || outer_code == ASHIFTRT
33524 || outer_code == LSHIFTRT
33525 || outer_code == ROTATE
33526 || outer_code == ROTATERT
33527 || outer_code == ZERO_EXTRACT
33528 || (outer_code == MULT
33529 && satisfies_constraint_I (x))
33530 || ((outer_code == DIV || outer_code == UDIV
33531 || outer_code == MOD || outer_code == UMOD)
33532 && exact_log2 (INTVAL (x)) >= 0)
33533 || (outer_code == COMPARE
33534 && (satisfies_constraint_I (x)
33535 || satisfies_constraint_K (x)))
33536 || ((outer_code == EQ || outer_code == NE)
33537 && (satisfies_constraint_I (x)
33538 || satisfies_constraint_K (x)
33539 || (mode == SImode
33540 ? satisfies_constraint_L (x)
33541 : satisfies_constraint_J (x))))
33542 || (outer_code == GTU
33543 && satisfies_constraint_I (x))
33544 || (outer_code == LTU
33545 && satisfies_constraint_P (x)))
33546 {
33547 *total = 0;
33548 return true;
33549 }
33550 else if ((outer_code == PLUS
33551 && reg_or_add_cint_operand (x, VOIDmode))
33552 || (outer_code == MINUS
33553 && reg_or_sub_cint_operand (x, VOIDmode))
33554 || ((outer_code == SET
33555 || outer_code == IOR
33556 || outer_code == XOR)
33557 && (INTVAL (x)
33558 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
33559 {
33560 *total = COSTS_N_INSNS (1);
33561 return true;
33562 }
33563 /* FALLTHRU */
33564
33565 case CONST_DOUBLE:
33566 case CONST_WIDE_INT:
33567 case CONST:
33568 case HIGH:
33569 case SYMBOL_REF:
33570 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
33571 return true;
33572
33573 case MEM:
33574 /* When optimizing for size, MEM should be slightly more expensive
33575 than generating address, e.g., (plus (reg) (const)).
33576 L1 cache latency is about two instructions. */
33577 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
33578 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
33579 *total += COSTS_N_INSNS (100);
33580 return true;
33581
33582 case LABEL_REF:
33583 *total = 0;
33584 return true;
33585
33586 case PLUS:
33587 case MINUS:
33588 if (FLOAT_MODE_P (mode))
33589 *total = rs6000_cost->fp;
33590 else
33591 *total = COSTS_N_INSNS (1);
33592 return false;
33593
33594 case MULT:
33595 if (GET_CODE (XEXP (x, 1)) == CONST_INT
33596 && satisfies_constraint_I (XEXP (x, 1)))
33597 {
33598 if (INTVAL (XEXP (x, 1)) >= -256
33599 && INTVAL (XEXP (x, 1)) <= 255)
33600 *total = rs6000_cost->mulsi_const9;
33601 else
33602 *total = rs6000_cost->mulsi_const;
33603 }
33604 else if (mode == SFmode)
33605 *total = rs6000_cost->fp;
33606 else if (FLOAT_MODE_P (mode))
33607 *total = rs6000_cost->dmul;
33608 else if (mode == DImode)
33609 *total = rs6000_cost->muldi;
33610 else
33611 *total = rs6000_cost->mulsi;
33612 return false;
33613
33614 case FMA:
33615 if (mode == SFmode)
33616 *total = rs6000_cost->fp;
33617 else
33618 *total = rs6000_cost->dmul;
33619 break;
33620
33621 case DIV:
33622 case MOD:
33623 if (FLOAT_MODE_P (mode))
33624 {
33625 *total = mode == DFmode ? rs6000_cost->ddiv
33626 : rs6000_cost->sdiv;
33627 return false;
33628 }
33629 /* FALLTHRU */
33630
33631 case UDIV:
33632 case UMOD:
33633 if (GET_CODE (XEXP (x, 1)) == CONST_INT
33634 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
33635 {
33636 if (code == DIV || code == MOD)
33637 /* Shift, addze */
33638 *total = COSTS_N_INSNS (2);
33639 else
33640 /* Shift */
33641 *total = COSTS_N_INSNS (1);
33642 }
33643 else
33644 {
33645 if (GET_MODE (XEXP (x, 1)) == DImode)
33646 *total = rs6000_cost->divdi;
33647 else
33648 *total = rs6000_cost->divsi;
33649 }
33650 /* Add in shift and subtract for MOD unless we have a mod instruction. */
33651 if (!TARGET_MODULO && (code == MOD || code == UMOD))
33652 *total += COSTS_N_INSNS (2);
33653 return false;
33654
33655 case CTZ:
33656 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
33657 return false;
33658
33659 case FFS:
33660 *total = COSTS_N_INSNS (4);
33661 return false;
33662
33663 case POPCOUNT:
33664 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
33665 return false;
33666
33667 case PARITY:
33668 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
33669 return false;
33670
33671 case NOT:
33672 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
33673 *total = 0;
33674 else
33675 *total = COSTS_N_INSNS (1);
33676 return false;
33677
33678 case AND:
33679 if (CONST_INT_P (XEXP (x, 1)))
33680 {
33681 rtx left = XEXP (x, 0);
33682 rtx_code left_code = GET_CODE (left);
33683
33684 /* rotate-and-mask: 1 insn. */
33685 if ((left_code == ROTATE
33686 || left_code == ASHIFT
33687 || left_code == LSHIFTRT)
33688 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
33689 {
33690 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
33691 if (!CONST_INT_P (XEXP (left, 1)))
33692 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
33693 *total += COSTS_N_INSNS (1);
33694 return true;
33695 }
33696
33697 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
33698 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
33699 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
33700 || (val & 0xffff) == val
33701 || (val & 0xffff0000) == val
33702 || ((val & 0xffff) == 0 && mode == SImode))
33703 {
33704 *total = rtx_cost (left, mode, AND, 0, speed);
33705 *total += COSTS_N_INSNS (1);
33706 return true;
33707 }
33708
33709 /* 2 insns. */
33710 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
33711 {
33712 *total = rtx_cost (left, mode, AND, 0, speed);
33713 *total += COSTS_N_INSNS (2);
33714 return true;
33715 }
33716 }
33717
33718 *total = COSTS_N_INSNS (1);
33719 return false;
33720
33721 case IOR:
33722 /* FIXME */
33723 *total = COSTS_N_INSNS (1);
33724 return true;
33725
33726 case CLZ:
33727 case XOR:
33728 case ZERO_EXTRACT:
33729 *total = COSTS_N_INSNS (1);
33730 return false;
33731
33732 case ASHIFT:
33733 /* The EXTSWSLI instruction is a combined instruction. Don't count both
33734 the sign extend and shift separately within the insn. */
33735 if (TARGET_EXTSWSLI && mode == DImode
33736 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
33737 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
33738 {
33739 *total = 0;
33740 return false;
33741 }
33742 /* fall through */
33743
33744 case ASHIFTRT:
33745 case LSHIFTRT:
33746 case ROTATE:
33747 case ROTATERT:
33748 /* Handle mul_highpart. */
33749 if (outer_code == TRUNCATE
33750 && GET_CODE (XEXP (x, 0)) == MULT)
33751 {
33752 if (mode == DImode)
33753 *total = rs6000_cost->muldi;
33754 else
33755 *total = rs6000_cost->mulsi;
33756 return true;
33757 }
33758 else if (outer_code == AND)
33759 *total = 0;
33760 else
33761 *total = COSTS_N_INSNS (1);
33762 return false;
33763
33764 case SIGN_EXTEND:
33765 case ZERO_EXTEND:
33766 if (GET_CODE (XEXP (x, 0)) == MEM)
33767 *total = 0;
33768 else
33769 *total = COSTS_N_INSNS (1);
33770 return false;
33771
33772 case COMPARE:
33773 case NEG:
33774 case ABS:
33775 if (!FLOAT_MODE_P (mode))
33776 {
33777 *total = COSTS_N_INSNS (1);
33778 return false;
33779 }
33780 /* FALLTHRU */
33781
33782 case FLOAT:
33783 case UNSIGNED_FLOAT:
33784 case FIX:
33785 case UNSIGNED_FIX:
33786 case FLOAT_TRUNCATE:
33787 *total = rs6000_cost->fp;
33788 return false;
33789
33790 case FLOAT_EXTEND:
33791 if (mode == DFmode)
33792 *total = rs6000_cost->sfdf_convert;
33793 else
33794 *total = rs6000_cost->fp;
33795 return false;
33796
33797 case UNSPEC:
33798 switch (XINT (x, 1))
33799 {
33800 case UNSPEC_FRSP:
33801 *total = rs6000_cost->fp;
33802 return true;
33803
33804 default:
33805 break;
33806 }
33807 break;
33808
33809 case CALL:
33810 case IF_THEN_ELSE:
33811 if (!speed)
33812 {
33813 *total = COSTS_N_INSNS (1);
33814 return true;
33815 }
33816 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
33817 {
33818 *total = rs6000_cost->fp;
33819 return false;
33820 }
33821 break;
33822
33823 case NE:
33824 case EQ:
33825 case GTU:
33826 case LTU:
33827 /* Carry bit requires mode == Pmode.
33828 NEG or PLUS already counted so only add one. */
33829 if (mode == Pmode
33830 && (outer_code == NEG || outer_code == PLUS))
33831 {
33832 *total = COSTS_N_INSNS (1);
33833 return true;
33834 }
33835 /* FALLTHRU */
33836
33837 case GT:
33838 case LT:
33839 case UNORDERED:
33840 if (outer_code == SET)
33841 {
33842 if (XEXP (x, 1) == const0_rtx)
33843 {
33844 *total = COSTS_N_INSNS (2);
33845 return true;
33846 }
33847 else
33848 {
33849 *total = COSTS_N_INSNS (3);
33850 return false;
33851 }
33852 }
33853 /* CC COMPARE. */
33854 if (outer_code == COMPARE)
33855 {
33856 *total = 0;
33857 return true;
33858 }
33859 break;
33860
33861 default:
33862 break;
33863 }
33864
33865 return false;
33866 }
33867
33868 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
33869
33870 static bool
33871 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
33872 int opno, int *total, bool speed)
33873 {
33874 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
33875
33876 fprintf (stderr,
33877 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
33878 "opno = %d, total = %d, speed = %s, x:\n",
33879 ret ? "complete" : "scan inner",
33880 GET_MODE_NAME (mode),
33881 GET_RTX_NAME (outer_code),
33882 opno,
33883 *total,
33884 speed ? "true" : "false");
33885
33886 debug_rtx (x);
33887
33888 return ret;
33889 }
33890
33891 static int
33892 rs6000_insn_cost (rtx_insn *insn, bool speed)
33893 {
33894 if (recog_memoized (insn) < 0)
33895 return 0;
33896
33897 if (!speed)
33898 return get_attr_length (insn);
33899
33900 int cost = get_attr_cost (insn);
33901 if (cost > 0)
33902 return cost;
33903
33904 int n = get_attr_length (insn) / 4;
33905 enum attr_type type = get_attr_type (insn);
33906
33907 switch (type)
33908 {
33909 case TYPE_LOAD:
33910 case TYPE_FPLOAD:
33911 case TYPE_VECLOAD:
33912 cost = COSTS_N_INSNS (n + 1);
33913 break;
33914
33915 case TYPE_MUL:
33916 switch (get_attr_size (insn))
33917 {
33918 case SIZE_8:
33919 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
33920 break;
33921 case SIZE_16:
33922 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
33923 break;
33924 case SIZE_32:
33925 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
33926 break;
33927 case SIZE_64:
33928 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
33929 break;
33930 default:
33931 gcc_unreachable ();
33932 }
33933 break;
33934 case TYPE_DIV:
33935 switch (get_attr_size (insn))
33936 {
33937 case SIZE_32:
33938 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
33939 break;
33940 case SIZE_64:
33941 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
33942 break;
33943 default:
33944 gcc_unreachable ();
33945 }
33946 break;
33947
33948 case TYPE_FP:
33949 cost = n * rs6000_cost->fp;
33950 break;
33951 case TYPE_DMUL:
33952 cost = n * rs6000_cost->dmul;
33953 break;
33954 case TYPE_SDIV:
33955 cost = n * rs6000_cost->sdiv;
33956 break;
33957 case TYPE_DDIV:
33958 cost = n * rs6000_cost->ddiv;
33959 break;
33960
33961 case TYPE_SYNC:
33962 case TYPE_LOAD_L:
33963 case TYPE_MFCR:
33964 case TYPE_MFCRF:
33965 cost = COSTS_N_INSNS (n + 2);
33966 break;
33967
33968 default:
33969 cost = COSTS_N_INSNS (n);
33970 }
33971
33972 return cost;
33973 }
33974
33975 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
33976
33977 static int
33978 rs6000_debug_address_cost (rtx x, machine_mode mode,
33979 addr_space_t as, bool speed)
33980 {
33981 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
33982
33983 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
33984 ret, speed ? "true" : "false");
33985 debug_rtx (x);
33986
33987 return ret;
33988 }
33989
33990
33991 /* A C expression returning the cost of moving data from a register of class
33992 CLASS1 to one of CLASS2. */
33993
33994 static int
33995 rs6000_register_move_cost (machine_mode mode,
33996 reg_class_t from, reg_class_t to)
33997 {
33998 int ret;
33999
34000 if (TARGET_DEBUG_COST)
34001 dbg_cost_ctrl++;
34002
34003 /* Moves from/to GENERAL_REGS. */
34004 if (reg_classes_intersect_p (to, GENERAL_REGS)
34005 || reg_classes_intersect_p (from, GENERAL_REGS))
34006 {
34007 reg_class_t rclass = from;
34008
34009 if (! reg_classes_intersect_p (to, GENERAL_REGS))
34010 rclass = to;
34011
34012 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
34013 ret = (rs6000_memory_move_cost (mode, rclass, false)
34014 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
34015
34016 /* It's more expensive to move CR_REGS than CR0_REGS because of the
34017 shift. */
34018 else if (rclass == CR_REGS)
34019 ret = 4;
34020
34021 /* For those processors that have slow LR/CTR moves, make them more
34022 expensive than memory in order to bias spills to memory .*/
34023 else if ((rs6000_tune == PROCESSOR_POWER6
34024 || rs6000_tune == PROCESSOR_POWER7
34025 || rs6000_tune == PROCESSOR_POWER8
34026 || rs6000_tune == PROCESSOR_POWER9)
34027 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
34028 ret = 6 * hard_regno_nregs (0, mode);
34029
34030 else
34031 /* A move will cost one instruction per GPR moved. */
34032 ret = 2 * hard_regno_nregs (0, mode);
34033 }
34034
34035 /* If we have VSX, we can easily move between FPR or Altivec registers. */
34036 else if (VECTOR_MEM_VSX_P (mode)
34037 && reg_classes_intersect_p (to, VSX_REGS)
34038 && reg_classes_intersect_p (from, VSX_REGS))
34039 ret = 2 * hard_regno_nregs (FIRST_FPR_REGNO, mode);
34040
34041 /* Moving between two similar registers is just one instruction. */
34042 else if (reg_classes_intersect_p (to, from))
34043 ret = (FLOAT128_2REG_P (mode)) ? 4 : 2;
34044
34045 /* Everything else has to go through GENERAL_REGS. */
34046 else
34047 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
34048 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
34049
34050 if (TARGET_DEBUG_COST)
34051 {
34052 if (dbg_cost_ctrl == 1)
34053 fprintf (stderr,
34054 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
34055 ret, GET_MODE_NAME (mode), reg_class_names[from],
34056 reg_class_names[to]);
34057 dbg_cost_ctrl--;
34058 }
34059
34060 return ret;
34061 }
34062
34063 /* A C expressions returning the cost of moving data of MODE from a register to
34064 or from memory. */
34065
34066 static int
34067 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
34068 bool in ATTRIBUTE_UNUSED)
34069 {
34070 int ret;
34071
34072 if (TARGET_DEBUG_COST)
34073 dbg_cost_ctrl++;
34074
34075 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
34076 ret = 4 * hard_regno_nregs (0, mode);
34077 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
34078 || reg_classes_intersect_p (rclass, VSX_REGS)))
34079 ret = 4 * hard_regno_nregs (32, mode);
34080 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
34081 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
34082 else
34083 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
34084
34085 if (TARGET_DEBUG_COST)
34086 {
34087 if (dbg_cost_ctrl == 1)
34088 fprintf (stderr,
34089 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
34090 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
34091 dbg_cost_ctrl--;
34092 }
34093
34094 return ret;
34095 }
34096
34097 /* Returns a code for a target-specific builtin that implements
34098 reciprocal of the function, or NULL_TREE if not available. */
34099
34100 static tree
34101 rs6000_builtin_reciprocal (tree fndecl)
34102 {
34103 switch (DECL_FUNCTION_CODE (fndecl))
34104 {
34105 case VSX_BUILTIN_XVSQRTDP:
34106 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
34107 return NULL_TREE;
34108
34109 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
34110
34111 case VSX_BUILTIN_XVSQRTSP:
34112 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
34113 return NULL_TREE;
34114
34115 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
34116
34117 default:
34118 return NULL_TREE;
34119 }
34120 }
34121
34122 /* Load up a constant. If the mode is a vector mode, splat the value across
34123 all of the vector elements. */
34124
34125 static rtx
34126 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
34127 {
34128 rtx reg;
34129
34130 if (mode == SFmode || mode == DFmode)
34131 {
34132 rtx d = const_double_from_real_value (dconst, mode);
34133 reg = force_reg (mode, d);
34134 }
34135 else if (mode == V4SFmode)
34136 {
34137 rtx d = const_double_from_real_value (dconst, SFmode);
34138 rtvec v = gen_rtvec (4, d, d, d, d);
34139 reg = gen_reg_rtx (mode);
34140 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
34141 }
34142 else if (mode == V2DFmode)
34143 {
34144 rtx d = const_double_from_real_value (dconst, DFmode);
34145 rtvec v = gen_rtvec (2, d, d);
34146 reg = gen_reg_rtx (mode);
34147 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
34148 }
34149 else
34150 gcc_unreachable ();
34151
34152 return reg;
34153 }
34154
34155 /* Generate an FMA instruction. */
34156
34157 static void
34158 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
34159 {
34160 machine_mode mode = GET_MODE (target);
34161 rtx dst;
34162
34163 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
34164 gcc_assert (dst != NULL);
34165
34166 if (dst != target)
34167 emit_move_insn (target, dst);
34168 }
34169
34170 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
34171
34172 static void
34173 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
34174 {
34175 machine_mode mode = GET_MODE (dst);
34176 rtx r;
34177
34178 /* This is a tad more complicated, since the fnma_optab is for
34179 a different expression: fma(-m1, m2, a), which is the same
34180 thing except in the case of signed zeros.
34181
34182 Fortunately we know that if FMA is supported that FNMSUB is
34183 also supported in the ISA. Just expand it directly. */
34184
34185 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
34186
34187 r = gen_rtx_NEG (mode, a);
34188 r = gen_rtx_FMA (mode, m1, m2, r);
34189 r = gen_rtx_NEG (mode, r);
34190 emit_insn (gen_rtx_SET (dst, r));
34191 }
34192
34193 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
34194 add a reg_note saying that this was a division. Support both scalar and
34195 vector divide. Assumes no trapping math and finite arguments. */
34196
34197 void
34198 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
34199 {
34200 machine_mode mode = GET_MODE (dst);
34201 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
34202 int i;
34203
34204 /* Low precision estimates guarantee 5 bits of accuracy. High
34205 precision estimates guarantee 14 bits of accuracy. SFmode
34206 requires 23 bits of accuracy. DFmode requires 52 bits of
34207 accuracy. Each pass at least doubles the accuracy, leading
34208 to the following. */
34209 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
34210 if (mode == DFmode || mode == V2DFmode)
34211 passes++;
34212
34213 enum insn_code code = optab_handler (smul_optab, mode);
34214 insn_gen_fn gen_mul = GEN_FCN (code);
34215
34216 gcc_assert (code != CODE_FOR_nothing);
34217
34218 one = rs6000_load_constant_and_splat (mode, dconst1);
34219
34220 /* x0 = 1./d estimate */
34221 x0 = gen_reg_rtx (mode);
34222 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
34223 UNSPEC_FRES)));
34224
34225 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
34226 if (passes > 1) {
34227
34228 /* e0 = 1. - d * x0 */
34229 e0 = gen_reg_rtx (mode);
34230 rs6000_emit_nmsub (e0, d, x0, one);
34231
34232 /* x1 = x0 + e0 * x0 */
34233 x1 = gen_reg_rtx (mode);
34234 rs6000_emit_madd (x1, e0, x0, x0);
34235
34236 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
34237 ++i, xprev = xnext, eprev = enext) {
34238
34239 /* enext = eprev * eprev */
34240 enext = gen_reg_rtx (mode);
34241 emit_insn (gen_mul (enext, eprev, eprev));
34242
34243 /* xnext = xprev + enext * xprev */
34244 xnext = gen_reg_rtx (mode);
34245 rs6000_emit_madd (xnext, enext, xprev, xprev);
34246 }
34247
34248 } else
34249 xprev = x0;
34250
34251 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
34252
34253 /* u = n * xprev */
34254 u = gen_reg_rtx (mode);
34255 emit_insn (gen_mul (u, n, xprev));
34256
34257 /* v = n - (d * u) */
34258 v = gen_reg_rtx (mode);
34259 rs6000_emit_nmsub (v, d, u, n);
34260
34261 /* dst = (v * xprev) + u */
34262 rs6000_emit_madd (dst, v, xprev, u);
34263
34264 if (note_p)
34265 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
34266 }
34267
34268 /* Goldschmidt's Algorithm for single/double-precision floating point
34269 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
34270
34271 void
34272 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
34273 {
34274 machine_mode mode = GET_MODE (src);
34275 rtx e = gen_reg_rtx (mode);
34276 rtx g = gen_reg_rtx (mode);
34277 rtx h = gen_reg_rtx (mode);
34278
34279 /* Low precision estimates guarantee 5 bits of accuracy. High
34280 precision estimates guarantee 14 bits of accuracy. SFmode
34281 requires 23 bits of accuracy. DFmode requires 52 bits of
34282 accuracy. Each pass at least doubles the accuracy, leading
34283 to the following. */
34284 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
34285 if (mode == DFmode || mode == V2DFmode)
34286 passes++;
34287
34288 int i;
34289 rtx mhalf;
34290 enum insn_code code = optab_handler (smul_optab, mode);
34291 insn_gen_fn gen_mul = GEN_FCN (code);
34292
34293 gcc_assert (code != CODE_FOR_nothing);
34294
34295 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
34296
34297 /* e = rsqrt estimate */
34298 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
34299 UNSPEC_RSQRT)));
34300
34301 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
34302 if (!recip)
34303 {
34304 rtx zero = force_reg (mode, CONST0_RTX (mode));
34305
34306 if (mode == SFmode)
34307 {
34308 rtx target = emit_conditional_move (e, GT, src, zero, mode,
34309 e, zero, mode, 0);
34310 if (target != e)
34311 emit_move_insn (e, target);
34312 }
34313 else
34314 {
34315 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
34316 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
34317 }
34318 }
34319
34320 /* g = sqrt estimate. */
34321 emit_insn (gen_mul (g, e, src));
34322 /* h = 1/(2*sqrt) estimate. */
34323 emit_insn (gen_mul (h, e, mhalf));
34324
34325 if (recip)
34326 {
34327 if (passes == 1)
34328 {
34329 rtx t = gen_reg_rtx (mode);
34330 rs6000_emit_nmsub (t, g, h, mhalf);
34331 /* Apply correction directly to 1/rsqrt estimate. */
34332 rs6000_emit_madd (dst, e, t, e);
34333 }
34334 else
34335 {
34336 for (i = 0; i < passes; i++)
34337 {
34338 rtx t1 = gen_reg_rtx (mode);
34339 rtx g1 = gen_reg_rtx (mode);
34340 rtx h1 = gen_reg_rtx (mode);
34341
34342 rs6000_emit_nmsub (t1, g, h, mhalf);
34343 rs6000_emit_madd (g1, g, t1, g);
34344 rs6000_emit_madd (h1, h, t1, h);
34345
34346 g = g1;
34347 h = h1;
34348 }
34349 /* Multiply by 2 for 1/rsqrt. */
34350 emit_insn (gen_add3_insn (dst, h, h));
34351 }
34352 }
34353 else
34354 {
34355 rtx t = gen_reg_rtx (mode);
34356 rs6000_emit_nmsub (t, g, h, mhalf);
34357 rs6000_emit_madd (dst, g, t, g);
34358 }
34359
34360 return;
34361 }
34362
34363 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
34364 (Power7) targets. DST is the target, and SRC is the argument operand. */
34365
34366 void
34367 rs6000_emit_popcount (rtx dst, rtx src)
34368 {
34369 machine_mode mode = GET_MODE (dst);
34370 rtx tmp1, tmp2;
34371
34372 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
34373 if (TARGET_POPCNTD)
34374 {
34375 if (mode == SImode)
34376 emit_insn (gen_popcntdsi2 (dst, src));
34377 else
34378 emit_insn (gen_popcntddi2 (dst, src));
34379 return;
34380 }
34381
34382 tmp1 = gen_reg_rtx (mode);
34383
34384 if (mode == SImode)
34385 {
34386 emit_insn (gen_popcntbsi2 (tmp1, src));
34387 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
34388 NULL_RTX, 0);
34389 tmp2 = force_reg (SImode, tmp2);
34390 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
34391 }
34392 else
34393 {
34394 emit_insn (gen_popcntbdi2 (tmp1, src));
34395 tmp2 = expand_mult (DImode, tmp1,
34396 GEN_INT ((HOST_WIDE_INT)
34397 0x01010101 << 32 | 0x01010101),
34398 NULL_RTX, 0);
34399 tmp2 = force_reg (DImode, tmp2);
34400 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
34401 }
34402 }
34403
34404
34405 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
34406 target, and SRC is the argument operand. */
34407
34408 void
34409 rs6000_emit_parity (rtx dst, rtx src)
34410 {
34411 machine_mode mode = GET_MODE (dst);
34412 rtx tmp;
34413
34414 tmp = gen_reg_rtx (mode);
34415
34416 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
34417 if (TARGET_CMPB)
34418 {
34419 if (mode == SImode)
34420 {
34421 emit_insn (gen_popcntbsi2 (tmp, src));
34422 emit_insn (gen_paritysi2_cmpb (dst, tmp));
34423 }
34424 else
34425 {
34426 emit_insn (gen_popcntbdi2 (tmp, src));
34427 emit_insn (gen_paritydi2_cmpb (dst, tmp));
34428 }
34429 return;
34430 }
34431
34432 if (mode == SImode)
34433 {
34434 /* Is mult+shift >= shift+xor+shift+xor? */
34435 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
34436 {
34437 rtx tmp1, tmp2, tmp3, tmp4;
34438
34439 tmp1 = gen_reg_rtx (SImode);
34440 emit_insn (gen_popcntbsi2 (tmp1, src));
34441
34442 tmp2 = gen_reg_rtx (SImode);
34443 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
34444 tmp3 = gen_reg_rtx (SImode);
34445 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
34446
34447 tmp4 = gen_reg_rtx (SImode);
34448 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
34449 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
34450 }
34451 else
34452 rs6000_emit_popcount (tmp, src);
34453 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
34454 }
34455 else
34456 {
34457 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
34458 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
34459 {
34460 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
34461
34462 tmp1 = gen_reg_rtx (DImode);
34463 emit_insn (gen_popcntbdi2 (tmp1, src));
34464
34465 tmp2 = gen_reg_rtx (DImode);
34466 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
34467 tmp3 = gen_reg_rtx (DImode);
34468 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
34469
34470 tmp4 = gen_reg_rtx (DImode);
34471 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
34472 tmp5 = gen_reg_rtx (DImode);
34473 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
34474
34475 tmp6 = gen_reg_rtx (DImode);
34476 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
34477 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
34478 }
34479 else
34480 rs6000_emit_popcount (tmp, src);
34481 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
34482 }
34483 }
34484
34485 /* Expand an Altivec constant permutation for little endian mode.
34486 OP0 and OP1 are the input vectors and TARGET is the output vector.
34487 SEL specifies the constant permutation vector.
34488
34489 There are two issues: First, the two input operands must be
34490 swapped so that together they form a double-wide array in LE
34491 order. Second, the vperm instruction has surprising behavior
34492 in LE mode: it interprets the elements of the source vectors
34493 in BE mode ("left to right") and interprets the elements of
34494 the destination vector in LE mode ("right to left"). To
34495 correct for this, we must subtract each element of the permute
34496 control vector from 31.
34497
34498 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
34499 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
34500 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
34501 serve as the permute control vector. Then, in BE mode,
34502
34503 vperm 9,10,11,12
34504
34505 places the desired result in vr9. However, in LE mode the
34506 vector contents will be
34507
34508 vr10 = 00000003 00000002 00000001 00000000
34509 vr11 = 00000007 00000006 00000005 00000004
34510
34511 The result of the vperm using the same permute control vector is
34512
34513 vr9 = 05000000 07000000 01000000 03000000
34514
34515 That is, the leftmost 4 bytes of vr10 are interpreted as the
34516 source for the rightmost 4 bytes of vr9, and so on.
34517
34518 If we change the permute control vector to
34519
34520 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
34521
34522 and issue
34523
34524 vperm 9,11,10,12
34525
34526 we get the desired
34527
34528 vr9 = 00000006 00000004 00000002 00000000. */
34529
34530 static void
34531 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
34532 const vec_perm_indices &sel)
34533 {
34534 unsigned int i;
34535 rtx perm[16];
34536 rtx constv, unspec;
34537
34538 /* Unpack and adjust the constant selector. */
34539 for (i = 0; i < 16; ++i)
34540 {
34541 unsigned int elt = 31 - (sel[i] & 31);
34542 perm[i] = GEN_INT (elt);
34543 }
34544
34545 /* Expand to a permute, swapping the inputs and using the
34546 adjusted selector. */
34547 if (!REG_P (op0))
34548 op0 = force_reg (V16QImode, op0);
34549 if (!REG_P (op1))
34550 op1 = force_reg (V16QImode, op1);
34551
34552 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
34553 constv = force_reg (V16QImode, constv);
34554 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
34555 UNSPEC_VPERM);
34556 if (!REG_P (target))
34557 {
34558 rtx tmp = gen_reg_rtx (V16QImode);
34559 emit_move_insn (tmp, unspec);
34560 unspec = tmp;
34561 }
34562
34563 emit_move_insn (target, unspec);
34564 }
34565
34566 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
34567 permute control vector. But here it's not a constant, so we must
34568 generate a vector NAND or NOR to do the adjustment. */
34569
34570 void
34571 altivec_expand_vec_perm_le (rtx operands[4])
34572 {
34573 rtx notx, iorx, unspec;
34574 rtx target = operands[0];
34575 rtx op0 = operands[1];
34576 rtx op1 = operands[2];
34577 rtx sel = operands[3];
34578 rtx tmp = target;
34579 rtx norreg = gen_reg_rtx (V16QImode);
34580 machine_mode mode = GET_MODE (target);
34581
34582 /* Get everything in regs so the pattern matches. */
34583 if (!REG_P (op0))
34584 op0 = force_reg (mode, op0);
34585 if (!REG_P (op1))
34586 op1 = force_reg (mode, op1);
34587 if (!REG_P (sel))
34588 sel = force_reg (V16QImode, sel);
34589 if (!REG_P (target))
34590 tmp = gen_reg_rtx (mode);
34591
34592 if (TARGET_P9_VECTOR)
34593 {
34594 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
34595 UNSPEC_VPERMR);
34596 }
34597 else
34598 {
34599 /* Invert the selector with a VNAND if available, else a VNOR.
34600 The VNAND is preferred for future fusion opportunities. */
34601 notx = gen_rtx_NOT (V16QImode, sel);
34602 iorx = (TARGET_P8_VECTOR
34603 ? gen_rtx_IOR (V16QImode, notx, notx)
34604 : gen_rtx_AND (V16QImode, notx, notx));
34605 emit_insn (gen_rtx_SET (norreg, iorx));
34606
34607 /* Permute with operands reversed and adjusted selector. */
34608 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
34609 UNSPEC_VPERM);
34610 }
34611
34612 /* Copy into target, possibly by way of a register. */
34613 if (!REG_P (target))
34614 {
34615 emit_move_insn (tmp, unspec);
34616 unspec = tmp;
34617 }
34618
34619 emit_move_insn (target, unspec);
34620 }
34621
34622 /* Expand an Altivec constant permutation. Return true if we match
34623 an efficient implementation; false to fall back to VPERM.
34624
34625 OP0 and OP1 are the input vectors and TARGET is the output vector.
34626 SEL specifies the constant permutation vector. */
34627
34628 static bool
34629 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
34630 const vec_perm_indices &sel)
34631 {
34632 struct altivec_perm_insn {
34633 HOST_WIDE_INT mask;
34634 enum insn_code impl;
34635 unsigned char perm[16];
34636 };
34637 static const struct altivec_perm_insn patterns[] = {
34638 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
34639 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
34640 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
34641 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
34642 { OPTION_MASK_ALTIVEC,
34643 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
34644 : CODE_FOR_altivec_vmrglb_direct),
34645 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
34646 { OPTION_MASK_ALTIVEC,
34647 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
34648 : CODE_FOR_altivec_vmrglh_direct),
34649 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
34650 { OPTION_MASK_ALTIVEC,
34651 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
34652 : CODE_FOR_altivec_vmrglw_direct),
34653 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
34654 { OPTION_MASK_ALTIVEC,
34655 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
34656 : CODE_FOR_altivec_vmrghb_direct),
34657 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
34658 { OPTION_MASK_ALTIVEC,
34659 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
34660 : CODE_FOR_altivec_vmrghh_direct),
34661 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
34662 { OPTION_MASK_ALTIVEC,
34663 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
34664 : CODE_FOR_altivec_vmrghw_direct),
34665 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
34666 { OPTION_MASK_P8_VECTOR,
34667 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
34668 : CODE_FOR_p8_vmrgow_v4sf_direct),
34669 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
34670 { OPTION_MASK_P8_VECTOR,
34671 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
34672 : CODE_FOR_p8_vmrgew_v4sf_direct),
34673 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
34674 };
34675
34676 unsigned int i, j, elt, which;
34677 unsigned char perm[16];
34678 rtx x;
34679 bool one_vec;
34680
34681 /* Unpack the constant selector. */
34682 for (i = which = 0; i < 16; ++i)
34683 {
34684 elt = sel[i] & 31;
34685 which |= (elt < 16 ? 1 : 2);
34686 perm[i] = elt;
34687 }
34688
34689 /* Simplify the constant selector based on operands. */
34690 switch (which)
34691 {
34692 default:
34693 gcc_unreachable ();
34694
34695 case 3:
34696 one_vec = false;
34697 if (!rtx_equal_p (op0, op1))
34698 break;
34699 /* FALLTHRU */
34700
34701 case 2:
34702 for (i = 0; i < 16; ++i)
34703 perm[i] &= 15;
34704 op0 = op1;
34705 one_vec = true;
34706 break;
34707
34708 case 1:
34709 op1 = op0;
34710 one_vec = true;
34711 break;
34712 }
34713
34714 /* Look for splat patterns. */
34715 if (one_vec)
34716 {
34717 elt = perm[0];
34718
34719 for (i = 0; i < 16; ++i)
34720 if (perm[i] != elt)
34721 break;
34722 if (i == 16)
34723 {
34724 if (!BYTES_BIG_ENDIAN)
34725 elt = 15 - elt;
34726 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
34727 return true;
34728 }
34729
34730 if (elt % 2 == 0)
34731 {
34732 for (i = 0; i < 16; i += 2)
34733 if (perm[i] != elt || perm[i + 1] != elt + 1)
34734 break;
34735 if (i == 16)
34736 {
34737 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
34738 x = gen_reg_rtx (V8HImode);
34739 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
34740 GEN_INT (field)));
34741 emit_move_insn (target, gen_lowpart (V16QImode, x));
34742 return true;
34743 }
34744 }
34745
34746 if (elt % 4 == 0)
34747 {
34748 for (i = 0; i < 16; i += 4)
34749 if (perm[i] != elt
34750 || perm[i + 1] != elt + 1
34751 || perm[i + 2] != elt + 2
34752 || perm[i + 3] != elt + 3)
34753 break;
34754 if (i == 16)
34755 {
34756 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
34757 x = gen_reg_rtx (V4SImode);
34758 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
34759 GEN_INT (field)));
34760 emit_move_insn (target, gen_lowpart (V16QImode, x));
34761 return true;
34762 }
34763 }
34764 }
34765
34766 /* Look for merge and pack patterns. */
34767 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
34768 {
34769 bool swapped;
34770
34771 if ((patterns[j].mask & rs6000_isa_flags) == 0)
34772 continue;
34773
34774 elt = patterns[j].perm[0];
34775 if (perm[0] == elt)
34776 swapped = false;
34777 else if (perm[0] == elt + 16)
34778 swapped = true;
34779 else
34780 continue;
34781 for (i = 1; i < 16; ++i)
34782 {
34783 elt = patterns[j].perm[i];
34784 if (swapped)
34785 elt = (elt >= 16 ? elt - 16 : elt + 16);
34786 else if (one_vec && elt >= 16)
34787 elt -= 16;
34788 if (perm[i] != elt)
34789 break;
34790 }
34791 if (i == 16)
34792 {
34793 enum insn_code icode = patterns[j].impl;
34794 machine_mode omode = insn_data[icode].operand[0].mode;
34795 machine_mode imode = insn_data[icode].operand[1].mode;
34796
34797 /* For little-endian, don't use vpkuwum and vpkuhum if the
34798 underlying vector type is not V4SI and V8HI, respectively.
34799 For example, using vpkuwum with a V8HI picks up the even
34800 halfwords (BE numbering) when the even halfwords (LE
34801 numbering) are what we need. */
34802 if (!BYTES_BIG_ENDIAN
34803 && icode == CODE_FOR_altivec_vpkuwum_direct
34804 && ((GET_CODE (op0) == REG
34805 && GET_MODE (op0) != V4SImode)
34806 || (GET_CODE (op0) == SUBREG
34807 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
34808 continue;
34809 if (!BYTES_BIG_ENDIAN
34810 && icode == CODE_FOR_altivec_vpkuhum_direct
34811 && ((GET_CODE (op0) == REG
34812 && GET_MODE (op0) != V8HImode)
34813 || (GET_CODE (op0) == SUBREG
34814 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
34815 continue;
34816
34817 /* For little-endian, the two input operands must be swapped
34818 (or swapped back) to ensure proper right-to-left numbering
34819 from 0 to 2N-1. */
34820 if (swapped ^ !BYTES_BIG_ENDIAN)
34821 std::swap (op0, op1);
34822 if (imode != V16QImode)
34823 {
34824 op0 = gen_lowpart (imode, op0);
34825 op1 = gen_lowpart (imode, op1);
34826 }
34827 if (omode == V16QImode)
34828 x = target;
34829 else
34830 x = gen_reg_rtx (omode);
34831 emit_insn (GEN_FCN (icode) (x, op0, op1));
34832 if (omode != V16QImode)
34833 emit_move_insn (target, gen_lowpart (V16QImode, x));
34834 return true;
34835 }
34836 }
34837
34838 if (!BYTES_BIG_ENDIAN)
34839 {
34840 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
34841 return true;
34842 }
34843
34844 return false;
34845 }
34846
34847 /* Expand a VSX Permute Doubleword constant permutation.
34848 Return true if we match an efficient implementation. */
34849
34850 static bool
34851 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
34852 unsigned char perm0, unsigned char perm1)
34853 {
34854 rtx x;
34855
34856 /* If both selectors come from the same operand, fold to single op. */
34857 if ((perm0 & 2) == (perm1 & 2))
34858 {
34859 if (perm0 & 2)
34860 op0 = op1;
34861 else
34862 op1 = op0;
34863 }
34864 /* If both operands are equal, fold to simpler permutation. */
34865 if (rtx_equal_p (op0, op1))
34866 {
34867 perm0 = perm0 & 1;
34868 perm1 = (perm1 & 1) + 2;
34869 }
34870 /* If the first selector comes from the second operand, swap. */
34871 else if (perm0 & 2)
34872 {
34873 if (perm1 & 2)
34874 return false;
34875 perm0 -= 2;
34876 perm1 += 2;
34877 std::swap (op0, op1);
34878 }
34879 /* If the second selector does not come from the second operand, fail. */
34880 else if ((perm1 & 2) == 0)
34881 return false;
34882
34883 /* Success! */
34884 if (target != NULL)
34885 {
34886 machine_mode vmode, dmode;
34887 rtvec v;
34888
34889 vmode = GET_MODE (target);
34890 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
34891 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
34892 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
34893 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
34894 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
34895 emit_insn (gen_rtx_SET (target, x));
34896 }
34897 return true;
34898 }
34899
34900 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
34901
34902 static bool
34903 rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
34904 rtx op1, const vec_perm_indices &sel)
34905 {
34906 bool testing_p = !target;
34907
34908 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
34909 if (TARGET_ALTIVEC && testing_p)
34910 return true;
34911
34912 /* Check for ps_merge* or xxpermdi insns. */
34913 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
34914 {
34915 if (testing_p)
34916 {
34917 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
34918 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
34919 }
34920 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
34921 return true;
34922 }
34923
34924 if (TARGET_ALTIVEC)
34925 {
34926 /* Force the target-independent code to lower to V16QImode. */
34927 if (vmode != V16QImode)
34928 return false;
34929 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
34930 return true;
34931 }
34932
34933 return false;
34934 }
34935
34936 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
34937 OP0 and OP1 are the input vectors and TARGET is the output vector.
34938 PERM specifies the constant permutation vector. */
34939
34940 static void
34941 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
34942 machine_mode vmode, const vec_perm_builder &perm)
34943 {
34944 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
34945 if (x != target)
34946 emit_move_insn (target, x);
34947 }
34948
34949 /* Expand an extract even operation. */
34950
34951 void
34952 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
34953 {
34954 machine_mode vmode = GET_MODE (target);
34955 unsigned i, nelt = GET_MODE_NUNITS (vmode);
34956 vec_perm_builder perm (nelt, nelt, 1);
34957
34958 for (i = 0; i < nelt; i++)
34959 perm.quick_push (i * 2);
34960
34961 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
34962 }
34963
34964 /* Expand a vector interleave operation. */
34965
34966 void
34967 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
34968 {
34969 machine_mode vmode = GET_MODE (target);
34970 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
34971 vec_perm_builder perm (nelt, nelt, 1);
34972
34973 high = (highp ? 0 : nelt / 2);
34974 for (i = 0; i < nelt / 2; i++)
34975 {
34976 perm.quick_push (i + high);
34977 perm.quick_push (i + nelt + high);
34978 }
34979
34980 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
34981 }
34982
34983 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
34984 void
34985 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
34986 {
34987 HOST_WIDE_INT hwi_scale (scale);
34988 REAL_VALUE_TYPE r_pow;
34989 rtvec v = rtvec_alloc (2);
34990 rtx elt;
34991 rtx scale_vec = gen_reg_rtx (V2DFmode);
34992 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
34993 elt = const_double_from_real_value (r_pow, DFmode);
34994 RTVEC_ELT (v, 0) = elt;
34995 RTVEC_ELT (v, 1) = elt;
34996 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
34997 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
34998 }
34999
35000 /* Return an RTX representing where to find the function value of a
35001 function returning MODE. */
35002 static rtx
35003 rs6000_complex_function_value (machine_mode mode)
35004 {
35005 unsigned int regno;
35006 rtx r1, r2;
35007 machine_mode inner = GET_MODE_INNER (mode);
35008 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
35009
35010 if (TARGET_FLOAT128_TYPE
35011 && (mode == KCmode
35012 || (mode == TCmode && TARGET_IEEEQUAD)))
35013 regno = ALTIVEC_ARG_RETURN;
35014
35015 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
35016 regno = FP_ARG_RETURN;
35017
35018 else
35019 {
35020 regno = GP_ARG_RETURN;
35021
35022 /* 32-bit is OK since it'll go in r3/r4. */
35023 if (TARGET_32BIT && inner_bytes >= 4)
35024 return gen_rtx_REG (mode, regno);
35025 }
35026
35027 if (inner_bytes >= 8)
35028 return gen_rtx_REG (mode, regno);
35029
35030 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
35031 const0_rtx);
35032 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
35033 GEN_INT (inner_bytes));
35034 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
35035 }
35036
35037 /* Return an rtx describing a return value of MODE as a PARALLEL
35038 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
35039 stride REG_STRIDE. */
35040
35041 static rtx
35042 rs6000_parallel_return (machine_mode mode,
35043 int n_elts, machine_mode elt_mode,
35044 unsigned int regno, unsigned int reg_stride)
35045 {
35046 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
35047
35048 int i;
35049 for (i = 0; i < n_elts; i++)
35050 {
35051 rtx r = gen_rtx_REG (elt_mode, regno);
35052 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
35053 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
35054 regno += reg_stride;
35055 }
35056
35057 return par;
35058 }
35059
35060 /* Target hook for TARGET_FUNCTION_VALUE.
35061
35062 An integer value is in r3 and a floating-point value is in fp1,
35063 unless -msoft-float. */
35064
35065 static rtx
35066 rs6000_function_value (const_tree valtype,
35067 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
35068 bool outgoing ATTRIBUTE_UNUSED)
35069 {
35070 machine_mode mode;
35071 unsigned int regno;
35072 machine_mode elt_mode;
35073 int n_elts;
35074
35075 /* Special handling for structs in darwin64. */
35076 if (TARGET_MACHO
35077 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
35078 {
35079 CUMULATIVE_ARGS valcum;
35080 rtx valret;
35081
35082 valcum.words = 0;
35083 valcum.fregno = FP_ARG_MIN_REG;
35084 valcum.vregno = ALTIVEC_ARG_MIN_REG;
35085 /* Do a trial code generation as if this were going to be passed as
35086 an argument; if any part goes in memory, we return NULL. */
35087 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
35088 if (valret)
35089 return valret;
35090 /* Otherwise fall through to standard ABI rules. */
35091 }
35092
35093 mode = TYPE_MODE (valtype);
35094
35095 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
35096 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
35097 {
35098 int first_reg, n_regs;
35099
35100 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
35101 {
35102 /* _Decimal128 must use even/odd register pairs. */
35103 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
35104 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
35105 }
35106 else
35107 {
35108 first_reg = ALTIVEC_ARG_RETURN;
35109 n_regs = 1;
35110 }
35111
35112 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
35113 }
35114
35115 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
35116 if (TARGET_32BIT && TARGET_POWERPC64)
35117 switch (mode)
35118 {
35119 default:
35120 break;
35121 case E_DImode:
35122 case E_SCmode:
35123 case E_DCmode:
35124 case E_TCmode:
35125 int count = GET_MODE_SIZE (mode) / 4;
35126 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
35127 }
35128
35129 if ((INTEGRAL_TYPE_P (valtype)
35130 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
35131 || POINTER_TYPE_P (valtype))
35132 mode = TARGET_32BIT ? SImode : DImode;
35133
35134 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
35135 /* _Decimal128 must use an even/odd register pair. */
35136 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
35137 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
35138 && !FLOAT128_VECTOR_P (mode))
35139 regno = FP_ARG_RETURN;
35140 else if (TREE_CODE (valtype) == COMPLEX_TYPE
35141 && targetm.calls.split_complex_arg)
35142 return rs6000_complex_function_value (mode);
35143 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
35144 return register is used in both cases, and we won't see V2DImode/V2DFmode
35145 for pure altivec, combine the two cases. */
35146 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
35147 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
35148 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
35149 regno = ALTIVEC_ARG_RETURN;
35150 else
35151 regno = GP_ARG_RETURN;
35152
35153 return gen_rtx_REG (mode, regno);
35154 }
35155
35156 /* Define how to find the value returned by a library function
35157 assuming the value has mode MODE. */
35158 rtx
35159 rs6000_libcall_value (machine_mode mode)
35160 {
35161 unsigned int regno;
35162
35163 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
35164 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
35165 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
35166
35167 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
35168 /* _Decimal128 must use an even/odd register pair. */
35169 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
35170 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
35171 regno = FP_ARG_RETURN;
35172 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
35173 return register is used in both cases, and we won't see V2DImode/V2DFmode
35174 for pure altivec, combine the two cases. */
35175 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
35176 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
35177 regno = ALTIVEC_ARG_RETURN;
35178 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
35179 return rs6000_complex_function_value (mode);
35180 else
35181 regno = GP_ARG_RETURN;
35182
35183 return gen_rtx_REG (mode, regno);
35184 }
35185
35186 /* Compute register pressure classes. We implement the target hook to avoid
35187 IRA picking something like NON_SPECIAL_REGS as a pressure class, which can
35188 lead to incorrect estimates of number of available registers and therefor
35189 increased register pressure/spill. */
35190 static int
35191 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
35192 {
35193 int n;
35194
35195 n = 0;
35196 pressure_classes[n++] = GENERAL_REGS;
35197 if (TARGET_VSX)
35198 pressure_classes[n++] = VSX_REGS;
35199 else
35200 {
35201 if (TARGET_ALTIVEC)
35202 pressure_classes[n++] = ALTIVEC_REGS;
35203 if (TARGET_HARD_FLOAT)
35204 pressure_classes[n++] = FLOAT_REGS;
35205 }
35206 pressure_classes[n++] = CR_REGS;
35207 pressure_classes[n++] = SPECIAL_REGS;
35208
35209 return n;
35210 }
35211
35212 /* Given FROM and TO register numbers, say whether this elimination is allowed.
35213 Frame pointer elimination is automatically handled.
35214
35215 For the RS/6000, if frame pointer elimination is being done, we would like
35216 to convert ap into fp, not sp.
35217
35218 We need r30 if -mminimal-toc was specified, and there are constant pool
35219 references. */
35220
35221 static bool
35222 rs6000_can_eliminate (const int from, const int to)
35223 {
35224 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
35225 ? ! frame_pointer_needed
35226 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
35227 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC
35228 || constant_pool_empty_p ()
35229 : true);
35230 }
35231
35232 /* Define the offset between two registers, FROM to be eliminated and its
35233 replacement TO, at the start of a routine. */
35234 HOST_WIDE_INT
35235 rs6000_initial_elimination_offset (int from, int to)
35236 {
35237 rs6000_stack_t *info = rs6000_stack_info ();
35238 HOST_WIDE_INT offset;
35239
35240 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
35241 offset = info->push_p ? 0 : -info->total_size;
35242 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
35243 {
35244 offset = info->push_p ? 0 : -info->total_size;
35245 if (FRAME_GROWS_DOWNWARD)
35246 offset += info->fixed_size + info->vars_size + info->parm_size;
35247 }
35248 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
35249 offset = FRAME_GROWS_DOWNWARD
35250 ? info->fixed_size + info->vars_size + info->parm_size
35251 : 0;
35252 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
35253 offset = info->total_size;
35254 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
35255 offset = info->push_p ? info->total_size : 0;
35256 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
35257 offset = 0;
35258 else
35259 gcc_unreachable ();
35260
35261 return offset;
35262 }
35263
35264 /* Fill in sizes of registers used by unwinder. */
35265
35266 static void
35267 rs6000_init_dwarf_reg_sizes_extra (tree address)
35268 {
35269 if (TARGET_MACHO && ! TARGET_ALTIVEC)
35270 {
35271 int i;
35272 machine_mode mode = TYPE_MODE (char_type_node);
35273 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
35274 rtx mem = gen_rtx_MEM (BLKmode, addr);
35275 rtx value = gen_int_mode (16, mode);
35276
35277 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
35278 The unwinder still needs to know the size of Altivec registers. */
35279
35280 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
35281 {
35282 int column = DWARF_REG_TO_UNWIND_COLUMN
35283 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
35284 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
35285
35286 emit_move_insn (adjust_address (mem, mode, offset), value);
35287 }
35288 }
35289 }
35290
35291 /* Map internal gcc register numbers to debug format register numbers.
35292 FORMAT specifies the type of debug register number to use:
35293 0 -- debug information, except for frame-related sections
35294 1 -- DWARF .debug_frame section
35295 2 -- DWARF .eh_frame section */
35296
35297 unsigned int
35298 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
35299 {
35300 /* Except for the above, we use the internal number for non-DWARF
35301 debug information, and also for .eh_frame. */
35302 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
35303 return regno;
35304
35305 /* On some platforms, we use the standard DWARF register
35306 numbering for .debug_info and .debug_frame. */
35307 #ifdef RS6000_USE_DWARF_NUMBERING
35308 if (regno <= 63)
35309 return regno;
35310 if (regno == LR_REGNO)
35311 return 108;
35312 if (regno == CTR_REGNO)
35313 return 109;
35314 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
35315 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
35316 The actual code emitted saves the whole of CR, so we map CR2_REGNO
35317 to the DWARF reg for CR. */
35318 if (format == 1 && regno == CR2_REGNO)
35319 return 64;
35320 if (CR_REGNO_P (regno))
35321 return regno - CR0_REGNO + 86;
35322 if (regno == CA_REGNO)
35323 return 101; /* XER */
35324 if (ALTIVEC_REGNO_P (regno))
35325 return regno - FIRST_ALTIVEC_REGNO + 1124;
35326 if (regno == VRSAVE_REGNO)
35327 return 356;
35328 if (regno == VSCR_REGNO)
35329 return 67;
35330 #endif
35331 return regno;
35332 }
35333
35334 /* target hook eh_return_filter_mode */
35335 static scalar_int_mode
35336 rs6000_eh_return_filter_mode (void)
35337 {
35338 return TARGET_32BIT ? SImode : word_mode;
35339 }
35340
35341 /* Target hook for translate_mode_attribute. */
35342 static machine_mode
35343 rs6000_translate_mode_attribute (machine_mode mode)
35344 {
35345 if ((FLOAT128_IEEE_P (mode)
35346 && ieee128_float_type_node == long_double_type_node)
35347 || (FLOAT128_IBM_P (mode)
35348 && ibm128_float_type_node == long_double_type_node))
35349 return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
35350 return mode;
35351 }
35352
35353 /* Target hook for scalar_mode_supported_p. */
35354 static bool
35355 rs6000_scalar_mode_supported_p (scalar_mode mode)
35356 {
35357 /* -m32 does not support TImode. This is the default, from
35358 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
35359 same ABI as for -m32. But default_scalar_mode_supported_p allows
35360 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
35361 for -mpowerpc64. */
35362 if (TARGET_32BIT && mode == TImode)
35363 return false;
35364
35365 if (DECIMAL_FLOAT_MODE_P (mode))
35366 return default_decimal_float_supported_p ();
35367 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
35368 return true;
35369 else
35370 return default_scalar_mode_supported_p (mode);
35371 }
35372
35373 /* Target hook for vector_mode_supported_p. */
35374 static bool
35375 rs6000_vector_mode_supported_p (machine_mode mode)
35376 {
35377 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
35378 128-bit, the compiler might try to widen IEEE 128-bit to IBM
35379 double-double. */
35380 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
35381 return true;
35382
35383 else
35384 return false;
35385 }
35386
35387 /* Target hook for floatn_mode. */
35388 static opt_scalar_float_mode
35389 rs6000_floatn_mode (int n, bool extended)
35390 {
35391 if (extended)
35392 {
35393 switch (n)
35394 {
35395 case 32:
35396 return DFmode;
35397
35398 case 64:
35399 if (TARGET_FLOAT128_TYPE)
35400 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
35401 else
35402 return opt_scalar_float_mode ();
35403
35404 case 128:
35405 return opt_scalar_float_mode ();
35406
35407 default:
35408 /* Those are the only valid _FloatNx types. */
35409 gcc_unreachable ();
35410 }
35411 }
35412 else
35413 {
35414 switch (n)
35415 {
35416 case 32:
35417 return SFmode;
35418
35419 case 64:
35420 return DFmode;
35421
35422 case 128:
35423 if (TARGET_FLOAT128_TYPE)
35424 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
35425 else
35426 return opt_scalar_float_mode ();
35427
35428 default:
35429 return opt_scalar_float_mode ();
35430 }
35431 }
35432
35433 }
35434
35435 /* Target hook for c_mode_for_suffix. */
35436 static machine_mode
35437 rs6000_c_mode_for_suffix (char suffix)
35438 {
35439 if (TARGET_FLOAT128_TYPE)
35440 {
35441 if (suffix == 'q' || suffix == 'Q')
35442 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
35443
35444 /* At the moment, we are not defining a suffix for IBM extended double.
35445 If/when the default for -mabi=ieeelongdouble is changed, and we want
35446 to support __ibm128 constants in legacy library code, we may need to
35447 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
35448 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
35449 __float80 constants. */
35450 }
35451
35452 return VOIDmode;
35453 }
35454
35455 /* Target hook for invalid_arg_for_unprototyped_fn. */
35456 static const char *
35457 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
35458 {
35459 return (!rs6000_darwin64_abi
35460 && typelist == 0
35461 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
35462 && (funcdecl == NULL_TREE
35463 || (TREE_CODE (funcdecl) == FUNCTION_DECL
35464 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
35465 ? N_("AltiVec argument passed to unprototyped function")
35466 : NULL;
35467 }
35468
35469 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
35470 setup by using __stack_chk_fail_local hidden function instead of
35471 calling __stack_chk_fail directly. Otherwise it is better to call
35472 __stack_chk_fail directly. */
35473
35474 static tree ATTRIBUTE_UNUSED
35475 rs6000_stack_protect_fail (void)
35476 {
35477 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
35478 ? default_hidden_stack_protect_fail ()
35479 : default_external_stack_protect_fail ();
35480 }
35481
35482 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
35483
35484 #if TARGET_ELF
35485 static unsigned HOST_WIDE_INT
35486 rs6000_asan_shadow_offset (void)
35487 {
35488 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
35489 }
35490 #endif
35491 \f
35492 /* Mask options that we want to support inside of attribute((target)) and
35493 #pragma GCC target operations. Note, we do not include things like
35494 64/32-bit, endianness, hard/soft floating point, etc. that would have
35495 different calling sequences. */
35496
35497 struct rs6000_opt_mask {
35498 const char *name; /* option name */
35499 HOST_WIDE_INT mask; /* mask to set */
35500 bool invert; /* invert sense of mask */
35501 bool valid_target; /* option is a target option */
35502 };
35503
35504 static struct rs6000_opt_mask const rs6000_opt_masks[] =
35505 {
35506 { "altivec", OPTION_MASK_ALTIVEC, false, true },
35507 { "cmpb", OPTION_MASK_CMPB, false, true },
35508 { "crypto", OPTION_MASK_CRYPTO, false, true },
35509 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
35510 { "dlmzb", OPTION_MASK_DLMZB, false, true },
35511 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
35512 false, true },
35513 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
35514 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
35515 { "fprnd", OPTION_MASK_FPRND, false, true },
35516 { "hard-dfp", OPTION_MASK_DFP, false, true },
35517 { "htm", OPTION_MASK_HTM, false, true },
35518 { "isel", OPTION_MASK_ISEL, false, true },
35519 { "mfcrf", OPTION_MASK_MFCRF, false, true },
35520 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
35521 { "modulo", OPTION_MASK_MODULO, false, true },
35522 { "mulhw", OPTION_MASK_MULHW, false, true },
35523 { "multiple", OPTION_MASK_MULTIPLE, false, true },
35524 { "popcntb", OPTION_MASK_POPCNTB, false, true },
35525 { "popcntd", OPTION_MASK_POPCNTD, false, true },
35526 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
35527 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
35528 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
35529 { "power9-fusion", OPTION_MASK_P9_FUSION, false, true },
35530 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
35531 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
35532 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
35533 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
35534 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
35535 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
35536 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
35537 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
35538 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
35539 { "string", 0, false, true },
35540 { "update", OPTION_MASK_NO_UPDATE, true , true },
35541 { "vsx", OPTION_MASK_VSX, false, true },
35542 #ifdef OPTION_MASK_64BIT
35543 #if TARGET_AIX_OS
35544 { "aix64", OPTION_MASK_64BIT, false, false },
35545 { "aix32", OPTION_MASK_64BIT, true, false },
35546 #else
35547 { "64", OPTION_MASK_64BIT, false, false },
35548 { "32", OPTION_MASK_64BIT, true, false },
35549 #endif
35550 #endif
35551 #ifdef OPTION_MASK_EABI
35552 { "eabi", OPTION_MASK_EABI, false, false },
35553 #endif
35554 #ifdef OPTION_MASK_LITTLE_ENDIAN
35555 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
35556 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
35557 #endif
35558 #ifdef OPTION_MASK_RELOCATABLE
35559 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
35560 #endif
35561 #ifdef OPTION_MASK_STRICT_ALIGN
35562 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
35563 #endif
35564 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
35565 { "string", 0, false, false },
35566 };
35567
35568 /* Builtin mask mapping for printing the flags. */
35569 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
35570 {
35571 { "altivec", RS6000_BTM_ALTIVEC, false, false },
35572 { "vsx", RS6000_BTM_VSX, false, false },
35573 { "fre", RS6000_BTM_FRE, false, false },
35574 { "fres", RS6000_BTM_FRES, false, false },
35575 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
35576 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
35577 { "popcntd", RS6000_BTM_POPCNTD, false, false },
35578 { "cell", RS6000_BTM_CELL, false, false },
35579 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
35580 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
35581 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
35582 { "crypto", RS6000_BTM_CRYPTO, false, false },
35583 { "htm", RS6000_BTM_HTM, false, false },
35584 { "hard-dfp", RS6000_BTM_DFP, false, false },
35585 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
35586 { "long-double-128", RS6000_BTM_LDBL128, false, false },
35587 { "powerpc64", RS6000_BTM_POWERPC64, false, false },
35588 { "float128", RS6000_BTM_FLOAT128, false, false },
35589 { "float128-hw", RS6000_BTM_FLOAT128_HW,false, false },
35590 };
35591
35592 /* Option variables that we want to support inside attribute((target)) and
35593 #pragma GCC target operations. */
35594
35595 struct rs6000_opt_var {
35596 const char *name; /* option name */
35597 size_t global_offset; /* offset of the option in global_options. */
35598 size_t target_offset; /* offset of the option in target options. */
35599 };
35600
35601 static struct rs6000_opt_var const rs6000_opt_vars[] =
35602 {
35603 { "friz",
35604 offsetof (struct gcc_options, x_TARGET_FRIZ),
35605 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
35606 { "avoid-indexed-addresses",
35607 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
35608 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
35609 { "longcall",
35610 offsetof (struct gcc_options, x_rs6000_default_long_calls),
35611 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
35612 { "optimize-swaps",
35613 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
35614 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
35615 { "allow-movmisalign",
35616 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
35617 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
35618 { "sched-groups",
35619 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
35620 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
35621 { "always-hint",
35622 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
35623 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
35624 { "align-branch-targets",
35625 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
35626 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
35627 { "tls-markers",
35628 offsetof (struct gcc_options, x_tls_markers),
35629 offsetof (struct cl_target_option, x_tls_markers), },
35630 { "sched-prolog",
35631 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
35632 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
35633 { "sched-epilog",
35634 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
35635 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
35636 { "speculate-indirect-jumps",
35637 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
35638 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
35639 };
35640
35641 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
35642 parsing. Return true if there were no errors. */
35643
35644 static bool
35645 rs6000_inner_target_options (tree args, bool attr_p)
35646 {
35647 bool ret = true;
35648
35649 if (args == NULL_TREE)
35650 ;
35651
35652 else if (TREE_CODE (args) == STRING_CST)
35653 {
35654 char *p = ASTRDUP (TREE_STRING_POINTER (args));
35655 char *q;
35656
35657 while ((q = strtok (p, ",")) != NULL)
35658 {
35659 bool error_p = false;
35660 bool not_valid_p = false;
35661 const char *cpu_opt = NULL;
35662
35663 p = NULL;
35664 if (strncmp (q, "cpu=", 4) == 0)
35665 {
35666 int cpu_index = rs6000_cpu_name_lookup (q+4);
35667 if (cpu_index >= 0)
35668 rs6000_cpu_index = cpu_index;
35669 else
35670 {
35671 error_p = true;
35672 cpu_opt = q+4;
35673 }
35674 }
35675 else if (strncmp (q, "tune=", 5) == 0)
35676 {
35677 int tune_index = rs6000_cpu_name_lookup (q+5);
35678 if (tune_index >= 0)
35679 rs6000_tune_index = tune_index;
35680 else
35681 {
35682 error_p = true;
35683 cpu_opt = q+5;
35684 }
35685 }
35686 else
35687 {
35688 size_t i;
35689 bool invert = false;
35690 char *r = q;
35691
35692 error_p = true;
35693 if (strncmp (r, "no-", 3) == 0)
35694 {
35695 invert = true;
35696 r += 3;
35697 }
35698
35699 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
35700 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
35701 {
35702 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
35703
35704 if (!rs6000_opt_masks[i].valid_target)
35705 not_valid_p = true;
35706 else
35707 {
35708 error_p = false;
35709 rs6000_isa_flags_explicit |= mask;
35710
35711 /* VSX needs altivec, so -mvsx automagically sets
35712 altivec and disables -mavoid-indexed-addresses. */
35713 if (!invert)
35714 {
35715 if (mask == OPTION_MASK_VSX)
35716 {
35717 mask |= OPTION_MASK_ALTIVEC;
35718 TARGET_AVOID_XFORM = 0;
35719 }
35720 }
35721
35722 if (rs6000_opt_masks[i].invert)
35723 invert = !invert;
35724
35725 if (invert)
35726 rs6000_isa_flags &= ~mask;
35727 else
35728 rs6000_isa_flags |= mask;
35729 }
35730 break;
35731 }
35732
35733 if (error_p && !not_valid_p)
35734 {
35735 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
35736 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
35737 {
35738 size_t j = rs6000_opt_vars[i].global_offset;
35739 *((int *) ((char *)&global_options + j)) = !invert;
35740 error_p = false;
35741 not_valid_p = false;
35742 break;
35743 }
35744 }
35745 }
35746
35747 if (error_p)
35748 {
35749 const char *eprefix, *esuffix;
35750
35751 ret = false;
35752 if (attr_p)
35753 {
35754 eprefix = "__attribute__((__target__(";
35755 esuffix = ")))";
35756 }
35757 else
35758 {
35759 eprefix = "#pragma GCC target ";
35760 esuffix = "";
35761 }
35762
35763 if (cpu_opt)
35764 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
35765 q, esuffix);
35766 else if (not_valid_p)
35767 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
35768 else
35769 error ("%s%qs%s is invalid", eprefix, q, esuffix);
35770 }
35771 }
35772 }
35773
35774 else if (TREE_CODE (args) == TREE_LIST)
35775 {
35776 do
35777 {
35778 tree value = TREE_VALUE (args);
35779 if (value)
35780 {
35781 bool ret2 = rs6000_inner_target_options (value, attr_p);
35782 if (!ret2)
35783 ret = false;
35784 }
35785 args = TREE_CHAIN (args);
35786 }
35787 while (args != NULL_TREE);
35788 }
35789
35790 else
35791 {
35792 error ("attribute %<target%> argument not a string");
35793 return false;
35794 }
35795
35796 return ret;
35797 }
35798
35799 /* Print out the target options as a list for -mdebug=target. */
35800
35801 static void
35802 rs6000_debug_target_options (tree args, const char *prefix)
35803 {
35804 if (args == NULL_TREE)
35805 fprintf (stderr, "%s<NULL>", prefix);
35806
35807 else if (TREE_CODE (args) == STRING_CST)
35808 {
35809 char *p = ASTRDUP (TREE_STRING_POINTER (args));
35810 char *q;
35811
35812 while ((q = strtok (p, ",")) != NULL)
35813 {
35814 p = NULL;
35815 fprintf (stderr, "%s\"%s\"", prefix, q);
35816 prefix = ", ";
35817 }
35818 }
35819
35820 else if (TREE_CODE (args) == TREE_LIST)
35821 {
35822 do
35823 {
35824 tree value = TREE_VALUE (args);
35825 if (value)
35826 {
35827 rs6000_debug_target_options (value, prefix);
35828 prefix = ", ";
35829 }
35830 args = TREE_CHAIN (args);
35831 }
35832 while (args != NULL_TREE);
35833 }
35834
35835 else
35836 gcc_unreachable ();
35837
35838 return;
35839 }
35840
35841 \f
35842 /* Hook to validate attribute((target("..."))). */
35843
35844 static bool
35845 rs6000_valid_attribute_p (tree fndecl,
35846 tree ARG_UNUSED (name),
35847 tree args,
35848 int flags)
35849 {
35850 struct cl_target_option cur_target;
35851 bool ret;
35852 tree old_optimize;
35853 tree new_target, new_optimize;
35854 tree func_optimize;
35855
35856 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
35857
35858 if (TARGET_DEBUG_TARGET)
35859 {
35860 tree tname = DECL_NAME (fndecl);
35861 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
35862 if (tname)
35863 fprintf (stderr, "function: %.*s\n",
35864 (int) IDENTIFIER_LENGTH (tname),
35865 IDENTIFIER_POINTER (tname));
35866 else
35867 fprintf (stderr, "function: unknown\n");
35868
35869 fprintf (stderr, "args:");
35870 rs6000_debug_target_options (args, " ");
35871 fprintf (stderr, "\n");
35872
35873 if (flags)
35874 fprintf (stderr, "flags: 0x%x\n", flags);
35875
35876 fprintf (stderr, "--------------------\n");
35877 }
35878
35879 /* attribute((target("default"))) does nothing, beyond
35880 affecting multi-versioning. */
35881 if (TREE_VALUE (args)
35882 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
35883 && TREE_CHAIN (args) == NULL_TREE
35884 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
35885 return true;
35886
35887 old_optimize = build_optimization_node (&global_options);
35888 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
35889
35890 /* If the function changed the optimization levels as well as setting target
35891 options, start with the optimizations specified. */
35892 if (func_optimize && func_optimize != old_optimize)
35893 cl_optimization_restore (&global_options,
35894 TREE_OPTIMIZATION (func_optimize));
35895
35896 /* The target attributes may also change some optimization flags, so update
35897 the optimization options if necessary. */
35898 cl_target_option_save (&cur_target, &global_options);
35899 rs6000_cpu_index = rs6000_tune_index = -1;
35900 ret = rs6000_inner_target_options (args, true);
35901
35902 /* Set up any additional state. */
35903 if (ret)
35904 {
35905 ret = rs6000_option_override_internal (false);
35906 new_target = build_target_option_node (&global_options);
35907 }
35908 else
35909 new_target = NULL;
35910
35911 new_optimize = build_optimization_node (&global_options);
35912
35913 if (!new_target)
35914 ret = false;
35915
35916 else if (fndecl)
35917 {
35918 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
35919
35920 if (old_optimize != new_optimize)
35921 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
35922 }
35923
35924 cl_target_option_restore (&global_options, &cur_target);
35925
35926 if (old_optimize != new_optimize)
35927 cl_optimization_restore (&global_options,
35928 TREE_OPTIMIZATION (old_optimize));
35929
35930 return ret;
35931 }
35932
35933 \f
35934 /* Hook to validate the current #pragma GCC target and set the state, and
35935 update the macros based on what was changed. If ARGS is NULL, then
35936 POP_TARGET is used to reset the options. */
35937
35938 bool
35939 rs6000_pragma_target_parse (tree args, tree pop_target)
35940 {
35941 tree prev_tree = build_target_option_node (&global_options);
35942 tree cur_tree;
35943 struct cl_target_option *prev_opt, *cur_opt;
35944 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
35945 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
35946
35947 if (TARGET_DEBUG_TARGET)
35948 {
35949 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
35950 fprintf (stderr, "args:");
35951 rs6000_debug_target_options (args, " ");
35952 fprintf (stderr, "\n");
35953
35954 if (pop_target)
35955 {
35956 fprintf (stderr, "pop_target:\n");
35957 debug_tree (pop_target);
35958 }
35959 else
35960 fprintf (stderr, "pop_target: <NULL>\n");
35961
35962 fprintf (stderr, "--------------------\n");
35963 }
35964
35965 if (! args)
35966 {
35967 cur_tree = ((pop_target)
35968 ? pop_target
35969 : target_option_default_node);
35970 cl_target_option_restore (&global_options,
35971 TREE_TARGET_OPTION (cur_tree));
35972 }
35973 else
35974 {
35975 rs6000_cpu_index = rs6000_tune_index = -1;
35976 if (!rs6000_inner_target_options (args, false)
35977 || !rs6000_option_override_internal (false)
35978 || (cur_tree = build_target_option_node (&global_options))
35979 == NULL_TREE)
35980 {
35981 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
35982 fprintf (stderr, "invalid pragma\n");
35983
35984 return false;
35985 }
35986 }
35987
35988 target_option_current_node = cur_tree;
35989 rs6000_activate_target_options (target_option_current_node);
35990
35991 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
35992 change the macros that are defined. */
35993 if (rs6000_target_modify_macros_ptr)
35994 {
35995 prev_opt = TREE_TARGET_OPTION (prev_tree);
35996 prev_bumask = prev_opt->x_rs6000_builtin_mask;
35997 prev_flags = prev_opt->x_rs6000_isa_flags;
35998
35999 cur_opt = TREE_TARGET_OPTION (cur_tree);
36000 cur_flags = cur_opt->x_rs6000_isa_flags;
36001 cur_bumask = cur_opt->x_rs6000_builtin_mask;
36002
36003 diff_bumask = (prev_bumask ^ cur_bumask);
36004 diff_flags = (prev_flags ^ cur_flags);
36005
36006 if ((diff_flags != 0) || (diff_bumask != 0))
36007 {
36008 /* Delete old macros. */
36009 rs6000_target_modify_macros_ptr (false,
36010 prev_flags & diff_flags,
36011 prev_bumask & diff_bumask);
36012
36013 /* Define new macros. */
36014 rs6000_target_modify_macros_ptr (true,
36015 cur_flags & diff_flags,
36016 cur_bumask & diff_bumask);
36017 }
36018 }
36019
36020 return true;
36021 }
36022
36023 \f
36024 /* Remember the last target of rs6000_set_current_function. */
36025 static GTY(()) tree rs6000_previous_fndecl;
36026
36027 /* Restore target's globals from NEW_TREE and invalidate the
36028 rs6000_previous_fndecl cache. */
36029
36030 void
36031 rs6000_activate_target_options (tree new_tree)
36032 {
36033 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
36034 if (TREE_TARGET_GLOBALS (new_tree))
36035 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
36036 else if (new_tree == target_option_default_node)
36037 restore_target_globals (&default_target_globals);
36038 else
36039 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
36040 rs6000_previous_fndecl = NULL_TREE;
36041 }
36042
36043 /* Establish appropriate back-end context for processing the function
36044 FNDECL. The argument might be NULL to indicate processing at top
36045 level, outside of any function scope. */
36046 static void
36047 rs6000_set_current_function (tree fndecl)
36048 {
36049 if (TARGET_DEBUG_TARGET)
36050 {
36051 fprintf (stderr, "\n==================== rs6000_set_current_function");
36052
36053 if (fndecl)
36054 fprintf (stderr, ", fndecl %s (%p)",
36055 (DECL_NAME (fndecl)
36056 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
36057 : "<unknown>"), (void *)fndecl);
36058
36059 if (rs6000_previous_fndecl)
36060 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
36061
36062 fprintf (stderr, "\n");
36063 }
36064
36065 /* Only change the context if the function changes. This hook is called
36066 several times in the course of compiling a function, and we don't want to
36067 slow things down too much or call target_reinit when it isn't safe. */
36068 if (fndecl == rs6000_previous_fndecl)
36069 return;
36070
36071 tree old_tree;
36072 if (rs6000_previous_fndecl == NULL_TREE)
36073 old_tree = target_option_current_node;
36074 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
36075 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
36076 else
36077 old_tree = target_option_default_node;
36078
36079 tree new_tree;
36080 if (fndecl == NULL_TREE)
36081 {
36082 if (old_tree != target_option_current_node)
36083 new_tree = target_option_current_node;
36084 else
36085 new_tree = NULL_TREE;
36086 }
36087 else
36088 {
36089 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
36090 if (new_tree == NULL_TREE)
36091 new_tree = target_option_default_node;
36092 }
36093
36094 if (TARGET_DEBUG_TARGET)
36095 {
36096 if (new_tree)
36097 {
36098 fprintf (stderr, "\nnew fndecl target specific options:\n");
36099 debug_tree (new_tree);
36100 }
36101
36102 if (old_tree)
36103 {
36104 fprintf (stderr, "\nold fndecl target specific options:\n");
36105 debug_tree (old_tree);
36106 }
36107
36108 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
36109 fprintf (stderr, "--------------------\n");
36110 }
36111
36112 if (new_tree && old_tree != new_tree)
36113 rs6000_activate_target_options (new_tree);
36114
36115 if (fndecl)
36116 rs6000_previous_fndecl = fndecl;
36117 }
36118
36119 \f
36120 /* Save the current options */
36121
36122 static void
36123 rs6000_function_specific_save (struct cl_target_option *ptr,
36124 struct gcc_options *opts)
36125 {
36126 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
36127 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
36128 }
36129
36130 /* Restore the current options */
36131
36132 static void
36133 rs6000_function_specific_restore (struct gcc_options *opts,
36134 struct cl_target_option *ptr)
36135
36136 {
36137 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
36138 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
36139 (void) rs6000_option_override_internal (false);
36140 }
36141
36142 /* Print the current options */
36143
36144 static void
36145 rs6000_function_specific_print (FILE *file, int indent,
36146 struct cl_target_option *ptr)
36147 {
36148 rs6000_print_isa_options (file, indent, "Isa options set",
36149 ptr->x_rs6000_isa_flags);
36150
36151 rs6000_print_isa_options (file, indent, "Isa options explicit",
36152 ptr->x_rs6000_isa_flags_explicit);
36153 }
36154
36155 /* Helper function to print the current isa or misc options on a line. */
36156
36157 static void
36158 rs6000_print_options_internal (FILE *file,
36159 int indent,
36160 const char *string,
36161 HOST_WIDE_INT flags,
36162 const char *prefix,
36163 const struct rs6000_opt_mask *opts,
36164 size_t num_elements)
36165 {
36166 size_t i;
36167 size_t start_column = 0;
36168 size_t cur_column;
36169 size_t max_column = 120;
36170 size_t prefix_len = strlen (prefix);
36171 size_t comma_len = 0;
36172 const char *comma = "";
36173
36174 if (indent)
36175 start_column += fprintf (file, "%*s", indent, "");
36176
36177 if (!flags)
36178 {
36179 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
36180 return;
36181 }
36182
36183 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
36184
36185 /* Print the various mask options. */
36186 cur_column = start_column;
36187 for (i = 0; i < num_elements; i++)
36188 {
36189 bool invert = opts[i].invert;
36190 const char *name = opts[i].name;
36191 const char *no_str = "";
36192 HOST_WIDE_INT mask = opts[i].mask;
36193 size_t len = comma_len + prefix_len + strlen (name);
36194
36195 if (!invert)
36196 {
36197 if ((flags & mask) == 0)
36198 {
36199 no_str = "no-";
36200 len += sizeof ("no-") - 1;
36201 }
36202
36203 flags &= ~mask;
36204 }
36205
36206 else
36207 {
36208 if ((flags & mask) != 0)
36209 {
36210 no_str = "no-";
36211 len += sizeof ("no-") - 1;
36212 }
36213
36214 flags |= mask;
36215 }
36216
36217 cur_column += len;
36218 if (cur_column > max_column)
36219 {
36220 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
36221 cur_column = start_column + len;
36222 comma = "";
36223 }
36224
36225 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
36226 comma = ", ";
36227 comma_len = sizeof (", ") - 1;
36228 }
36229
36230 fputs ("\n", file);
36231 }
36232
36233 /* Helper function to print the current isa options on a line. */
36234
36235 static void
36236 rs6000_print_isa_options (FILE *file, int indent, const char *string,
36237 HOST_WIDE_INT flags)
36238 {
36239 rs6000_print_options_internal (file, indent, string, flags, "-m",
36240 &rs6000_opt_masks[0],
36241 ARRAY_SIZE (rs6000_opt_masks));
36242 }
36243
36244 static void
36245 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
36246 HOST_WIDE_INT flags)
36247 {
36248 rs6000_print_options_internal (file, indent, string, flags, "",
36249 &rs6000_builtin_mask_names[0],
36250 ARRAY_SIZE (rs6000_builtin_mask_names));
36251 }
36252
36253 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
36254 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
36255 -mupper-regs-df, etc.).
36256
36257 If the user used -mno-power8-vector, we need to turn off all of the implicit
36258 ISA 2.07 and 3.0 options that relate to the vector unit.
36259
36260 If the user used -mno-power9-vector, we need to turn off all of the implicit
36261 ISA 3.0 options that relate to the vector unit.
36262
36263 This function does not handle explicit options such as the user specifying
36264 -mdirect-move. These are handled in rs6000_option_override_internal, and
36265 the appropriate error is given if needed.
36266
36267 We return a mask of all of the implicit options that should not be enabled
36268 by default. */
36269
36270 static HOST_WIDE_INT
36271 rs6000_disable_incompatible_switches (void)
36272 {
36273 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
36274 size_t i, j;
36275
36276 static const struct {
36277 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
36278 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
36279 const char *const name; /* name of the switch. */
36280 } flags[] = {
36281 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
36282 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
36283 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
36284 };
36285
36286 for (i = 0; i < ARRAY_SIZE (flags); i++)
36287 {
36288 HOST_WIDE_INT no_flag = flags[i].no_flag;
36289
36290 if ((rs6000_isa_flags & no_flag) == 0
36291 && (rs6000_isa_flags_explicit & no_flag) != 0)
36292 {
36293 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
36294 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
36295 & rs6000_isa_flags
36296 & dep_flags);
36297
36298 if (set_flags)
36299 {
36300 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
36301 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
36302 {
36303 set_flags &= ~rs6000_opt_masks[j].mask;
36304 error ("%<-mno-%s%> turns off %<-m%s%>",
36305 flags[i].name,
36306 rs6000_opt_masks[j].name);
36307 }
36308
36309 gcc_assert (!set_flags);
36310 }
36311
36312 rs6000_isa_flags &= ~dep_flags;
36313 ignore_masks |= no_flag | dep_flags;
36314 }
36315 }
36316
36317 return ignore_masks;
36318 }
36319
36320 \f
36321 /* Helper function for printing the function name when debugging. */
36322
36323 static const char *
36324 get_decl_name (tree fn)
36325 {
36326 tree name;
36327
36328 if (!fn)
36329 return "<null>";
36330
36331 name = DECL_NAME (fn);
36332 if (!name)
36333 return "<no-name>";
36334
36335 return IDENTIFIER_POINTER (name);
36336 }
36337
36338 /* Return the clone id of the target we are compiling code for in a target
36339 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
36340 the priority list for the target clones (ordered from lowest to
36341 highest). */
36342
36343 static int
36344 rs6000_clone_priority (tree fndecl)
36345 {
36346 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
36347 HOST_WIDE_INT isa_masks;
36348 int ret = CLONE_DEFAULT;
36349 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
36350 const char *attrs_str = NULL;
36351
36352 attrs = TREE_VALUE (TREE_VALUE (attrs));
36353 attrs_str = TREE_STRING_POINTER (attrs);
36354
36355 /* Return priority zero for default function. Return the ISA needed for the
36356 function if it is not the default. */
36357 if (strcmp (attrs_str, "default") != 0)
36358 {
36359 if (fn_opts == NULL_TREE)
36360 fn_opts = target_option_default_node;
36361
36362 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
36363 isa_masks = rs6000_isa_flags;
36364 else
36365 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
36366
36367 for (ret = CLONE_MAX - 1; ret != 0; ret--)
36368 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
36369 break;
36370 }
36371
36372 if (TARGET_DEBUG_TARGET)
36373 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
36374 get_decl_name (fndecl), ret);
36375
36376 return ret;
36377 }
36378
36379 /* This compares the priority of target features in function DECL1 and DECL2.
36380 It returns positive value if DECL1 is higher priority, negative value if
36381 DECL2 is higher priority and 0 if they are the same. Note, priorities are
36382 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
36383
36384 static int
36385 rs6000_compare_version_priority (tree decl1, tree decl2)
36386 {
36387 int priority1 = rs6000_clone_priority (decl1);
36388 int priority2 = rs6000_clone_priority (decl2);
36389 int ret = priority1 - priority2;
36390
36391 if (TARGET_DEBUG_TARGET)
36392 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
36393 get_decl_name (decl1), get_decl_name (decl2), ret);
36394
36395 return ret;
36396 }
36397
36398 /* Make a dispatcher declaration for the multi-versioned function DECL.
36399 Calls to DECL function will be replaced with calls to the dispatcher
36400 by the front-end. Returns the decl of the dispatcher function. */
36401
36402 static tree
36403 rs6000_get_function_versions_dispatcher (void *decl)
36404 {
36405 tree fn = (tree) decl;
36406 struct cgraph_node *node = NULL;
36407 struct cgraph_node *default_node = NULL;
36408 struct cgraph_function_version_info *node_v = NULL;
36409 struct cgraph_function_version_info *first_v = NULL;
36410
36411 tree dispatch_decl = NULL;
36412
36413 struct cgraph_function_version_info *default_version_info = NULL;
36414 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
36415
36416 if (TARGET_DEBUG_TARGET)
36417 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
36418 get_decl_name (fn));
36419
36420 node = cgraph_node::get (fn);
36421 gcc_assert (node != NULL);
36422
36423 node_v = node->function_version ();
36424 gcc_assert (node_v != NULL);
36425
36426 if (node_v->dispatcher_resolver != NULL)
36427 return node_v->dispatcher_resolver;
36428
36429 /* Find the default version and make it the first node. */
36430 first_v = node_v;
36431 /* Go to the beginning of the chain. */
36432 while (first_v->prev != NULL)
36433 first_v = first_v->prev;
36434
36435 default_version_info = first_v;
36436 while (default_version_info != NULL)
36437 {
36438 const tree decl2 = default_version_info->this_node->decl;
36439 if (is_function_default_version (decl2))
36440 break;
36441 default_version_info = default_version_info->next;
36442 }
36443
36444 /* If there is no default node, just return NULL. */
36445 if (default_version_info == NULL)
36446 return NULL;
36447
36448 /* Make default info the first node. */
36449 if (first_v != default_version_info)
36450 {
36451 default_version_info->prev->next = default_version_info->next;
36452 if (default_version_info->next)
36453 default_version_info->next->prev = default_version_info->prev;
36454 first_v->prev = default_version_info;
36455 default_version_info->next = first_v;
36456 default_version_info->prev = NULL;
36457 }
36458
36459 default_node = default_version_info->this_node;
36460
36461 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
36462 error_at (DECL_SOURCE_LOCATION (default_node->decl),
36463 "target_clones attribute needs GLIBC (2.23 and newer) that "
36464 "exports hardware capability bits");
36465 #else
36466
36467 if (targetm.has_ifunc_p ())
36468 {
36469 struct cgraph_function_version_info *it_v = NULL;
36470 struct cgraph_node *dispatcher_node = NULL;
36471 struct cgraph_function_version_info *dispatcher_version_info = NULL;
36472
36473 /* Right now, the dispatching is done via ifunc. */
36474 dispatch_decl = make_dispatcher_decl (default_node->decl);
36475
36476 dispatcher_node = cgraph_node::get_create (dispatch_decl);
36477 gcc_assert (dispatcher_node != NULL);
36478 dispatcher_node->dispatcher_function = 1;
36479 dispatcher_version_info
36480 = dispatcher_node->insert_new_function_version ();
36481 dispatcher_version_info->next = default_version_info;
36482 dispatcher_node->definition = 1;
36483
36484 /* Set the dispatcher for all the versions. */
36485 it_v = default_version_info;
36486 while (it_v != NULL)
36487 {
36488 it_v->dispatcher_resolver = dispatch_decl;
36489 it_v = it_v->next;
36490 }
36491 }
36492 else
36493 {
36494 error_at (DECL_SOURCE_LOCATION (default_node->decl),
36495 "multiversioning needs ifunc which is not supported "
36496 "on this target");
36497 }
36498 #endif
36499
36500 return dispatch_decl;
36501 }
36502
36503 /* Make the resolver function decl to dispatch the versions of a multi-
36504 versioned function, DEFAULT_DECL. Create an empty basic block in the
36505 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
36506 function. */
36507
36508 static tree
36509 make_resolver_func (const tree default_decl,
36510 const tree dispatch_decl,
36511 basic_block *empty_bb)
36512 {
36513 /* Make the resolver function static. The resolver function returns
36514 void *. */
36515 tree decl_name = clone_function_name (default_decl, "resolver");
36516 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
36517 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
36518 tree decl = build_fn_decl (resolver_name, type);
36519 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
36520
36521 DECL_NAME (decl) = decl_name;
36522 TREE_USED (decl) = 1;
36523 DECL_ARTIFICIAL (decl) = 1;
36524 DECL_IGNORED_P (decl) = 0;
36525 TREE_PUBLIC (decl) = 0;
36526 DECL_UNINLINABLE (decl) = 1;
36527
36528 /* Resolver is not external, body is generated. */
36529 DECL_EXTERNAL (decl) = 0;
36530 DECL_EXTERNAL (dispatch_decl) = 0;
36531
36532 DECL_CONTEXT (decl) = NULL_TREE;
36533 DECL_INITIAL (decl) = make_node (BLOCK);
36534 DECL_STATIC_CONSTRUCTOR (decl) = 0;
36535
36536 /* Build result decl and add to function_decl. */
36537 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
36538 DECL_ARTIFICIAL (t) = 1;
36539 DECL_IGNORED_P (t) = 1;
36540 DECL_RESULT (decl) = t;
36541
36542 gimplify_function_tree (decl);
36543 push_cfun (DECL_STRUCT_FUNCTION (decl));
36544 *empty_bb = init_lowered_empty_function (decl, false,
36545 profile_count::uninitialized ());
36546
36547 cgraph_node::add_new_function (decl, true);
36548 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
36549
36550 pop_cfun ();
36551
36552 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
36553 DECL_ATTRIBUTES (dispatch_decl)
36554 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
36555
36556 cgraph_node::create_same_body_alias (dispatch_decl, decl);
36557
36558 return decl;
36559 }
36560
36561 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
36562 return a pointer to VERSION_DECL if we are running on a machine that
36563 supports the index CLONE_ISA hardware architecture bits. This function will
36564 be called during version dispatch to decide which function version to
36565 execute. It returns the basic block at the end, to which more conditions
36566 can be added. */
36567
36568 static basic_block
36569 add_condition_to_bb (tree function_decl, tree version_decl,
36570 int clone_isa, basic_block new_bb)
36571 {
36572 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
36573
36574 gcc_assert (new_bb != NULL);
36575 gimple_seq gseq = bb_seq (new_bb);
36576
36577
36578 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
36579 build_fold_addr_expr (version_decl));
36580 tree result_var = create_tmp_var (ptr_type_node);
36581 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
36582 gimple *return_stmt = gimple_build_return (result_var);
36583
36584 if (clone_isa == CLONE_DEFAULT)
36585 {
36586 gimple_seq_add_stmt (&gseq, convert_stmt);
36587 gimple_seq_add_stmt (&gseq, return_stmt);
36588 set_bb_seq (new_bb, gseq);
36589 gimple_set_bb (convert_stmt, new_bb);
36590 gimple_set_bb (return_stmt, new_bb);
36591 pop_cfun ();
36592 return new_bb;
36593 }
36594
36595 tree bool_zero = build_int_cst (bool_int_type_node, 0);
36596 tree cond_var = create_tmp_var (bool_int_type_node);
36597 tree predicate_decl = rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS];
36598 const char *arg_str = rs6000_clone_map[clone_isa].name;
36599 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
36600 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
36601 gimple_call_set_lhs (call_cond_stmt, cond_var);
36602
36603 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
36604 gimple_set_bb (call_cond_stmt, new_bb);
36605 gimple_seq_add_stmt (&gseq, call_cond_stmt);
36606
36607 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
36608 NULL_TREE, NULL_TREE);
36609 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
36610 gimple_set_bb (if_else_stmt, new_bb);
36611 gimple_seq_add_stmt (&gseq, if_else_stmt);
36612
36613 gimple_seq_add_stmt (&gseq, convert_stmt);
36614 gimple_seq_add_stmt (&gseq, return_stmt);
36615 set_bb_seq (new_bb, gseq);
36616
36617 basic_block bb1 = new_bb;
36618 edge e12 = split_block (bb1, if_else_stmt);
36619 basic_block bb2 = e12->dest;
36620 e12->flags &= ~EDGE_FALLTHRU;
36621 e12->flags |= EDGE_TRUE_VALUE;
36622
36623 edge e23 = split_block (bb2, return_stmt);
36624 gimple_set_bb (convert_stmt, bb2);
36625 gimple_set_bb (return_stmt, bb2);
36626
36627 basic_block bb3 = e23->dest;
36628 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
36629
36630 remove_edge (e23);
36631 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
36632
36633 pop_cfun ();
36634 return bb3;
36635 }
36636
36637 /* This function generates the dispatch function for multi-versioned functions.
36638 DISPATCH_DECL is the function which will contain the dispatch logic.
36639 FNDECLS are the function choices for dispatch, and is a tree chain.
36640 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
36641 code is generated. */
36642
36643 static int
36644 dispatch_function_versions (tree dispatch_decl,
36645 void *fndecls_p,
36646 basic_block *empty_bb)
36647 {
36648 int ix;
36649 tree ele;
36650 vec<tree> *fndecls;
36651 tree clones[CLONE_MAX];
36652
36653 if (TARGET_DEBUG_TARGET)
36654 fputs ("dispatch_function_versions, top\n", stderr);
36655
36656 gcc_assert (dispatch_decl != NULL
36657 && fndecls_p != NULL
36658 && empty_bb != NULL);
36659
36660 /* fndecls_p is actually a vector. */
36661 fndecls = static_cast<vec<tree> *> (fndecls_p);
36662
36663 /* At least one more version other than the default. */
36664 gcc_assert (fndecls->length () >= 2);
36665
36666 /* The first version in the vector is the default decl. */
36667 memset ((void *) clones, '\0', sizeof (clones));
36668 clones[CLONE_DEFAULT] = (*fndecls)[0];
36669
36670 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
36671 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
36672 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
36673 recent glibc. If we ever need to call __builtin_cpu_init, we would need
36674 to insert the code here to do the call. */
36675
36676 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
36677 {
36678 int priority = rs6000_clone_priority (ele);
36679 if (!clones[priority])
36680 clones[priority] = ele;
36681 }
36682
36683 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
36684 if (clones[ix])
36685 {
36686 if (TARGET_DEBUG_TARGET)
36687 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
36688 ix, get_decl_name (clones[ix]));
36689
36690 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
36691 *empty_bb);
36692 }
36693
36694 return 0;
36695 }
36696
36697 /* Generate the dispatching code body to dispatch multi-versioned function
36698 DECL. The target hook is called to process the "target" attributes and
36699 provide the code to dispatch the right function at run-time. NODE points
36700 to the dispatcher decl whose body will be created. */
36701
36702 static tree
36703 rs6000_generate_version_dispatcher_body (void *node_p)
36704 {
36705 tree resolver;
36706 basic_block empty_bb;
36707 struct cgraph_node *node = (cgraph_node *) node_p;
36708 struct cgraph_function_version_info *ninfo = node->function_version ();
36709
36710 if (ninfo->dispatcher_resolver)
36711 return ninfo->dispatcher_resolver;
36712
36713 /* node is going to be an alias, so remove the finalized bit. */
36714 node->definition = false;
36715
36716 /* The first version in the chain corresponds to the default version. */
36717 ninfo->dispatcher_resolver = resolver
36718 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
36719
36720 if (TARGET_DEBUG_TARGET)
36721 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
36722 get_decl_name (resolver));
36723
36724 push_cfun (DECL_STRUCT_FUNCTION (resolver));
36725 auto_vec<tree, 2> fn_ver_vec;
36726
36727 for (struct cgraph_function_version_info *vinfo = ninfo->next;
36728 vinfo;
36729 vinfo = vinfo->next)
36730 {
36731 struct cgraph_node *version = vinfo->this_node;
36732 /* Check for virtual functions here again, as by this time it should
36733 have been determined if this function needs a vtable index or
36734 not. This happens for methods in derived classes that override
36735 virtual methods in base classes but are not explicitly marked as
36736 virtual. */
36737 if (DECL_VINDEX (version->decl))
36738 sorry ("Virtual function multiversioning not supported");
36739
36740 fn_ver_vec.safe_push (version->decl);
36741 }
36742
36743 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
36744 cgraph_edge::rebuild_edges ();
36745 pop_cfun ();
36746 return resolver;
36747 }
36748
36749 \f
36750 /* Hook to determine if one function can safely inline another. */
36751
36752 static bool
36753 rs6000_can_inline_p (tree caller, tree callee)
36754 {
36755 bool ret = false;
36756 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
36757 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
36758
36759 /* If callee has no option attributes, then it is ok to inline. */
36760 if (!callee_tree)
36761 ret = true;
36762
36763 /* If caller has no option attributes, but callee does then it is not ok to
36764 inline. */
36765 else if (!caller_tree)
36766 ret = false;
36767
36768 else
36769 {
36770 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
36771 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
36772
36773 /* Callee's options should a subset of the caller's, i.e. a vsx function
36774 can inline an altivec function but a non-vsx function can't inline a
36775 vsx function. */
36776 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
36777 == callee_opts->x_rs6000_isa_flags)
36778 ret = true;
36779 }
36780
36781 if (TARGET_DEBUG_TARGET)
36782 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
36783 get_decl_name (caller), get_decl_name (callee),
36784 (ret ? "can" : "cannot"));
36785
36786 return ret;
36787 }
36788 \f
36789 /* Allocate a stack temp and fixup the address so it meets the particular
36790 memory requirements (either offetable or REG+REG addressing). */
36791
36792 rtx
36793 rs6000_allocate_stack_temp (machine_mode mode,
36794 bool offsettable_p,
36795 bool reg_reg_p)
36796 {
36797 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
36798 rtx addr = XEXP (stack, 0);
36799 int strict_p = reload_completed;
36800
36801 if (!legitimate_indirect_address_p (addr, strict_p))
36802 {
36803 if (offsettable_p
36804 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
36805 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
36806
36807 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
36808 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
36809 }
36810
36811 return stack;
36812 }
36813
36814 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
36815 to such a form to deal with memory reference instructions like STFIWX that
36816 only take reg+reg addressing. */
36817
36818 rtx
36819 rs6000_address_for_fpconvert (rtx x)
36820 {
36821 rtx addr;
36822
36823 gcc_assert (MEM_P (x));
36824 addr = XEXP (x, 0);
36825 if (can_create_pseudo_p ()
36826 && ! legitimate_indirect_address_p (addr, reload_completed)
36827 && ! legitimate_indexed_address_p (addr, reload_completed))
36828 {
36829 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
36830 {
36831 rtx reg = XEXP (addr, 0);
36832 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
36833 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
36834 gcc_assert (REG_P (reg));
36835 emit_insn (gen_add3_insn (reg, reg, size_rtx));
36836 addr = reg;
36837 }
36838 else if (GET_CODE (addr) == PRE_MODIFY)
36839 {
36840 rtx reg = XEXP (addr, 0);
36841 rtx expr = XEXP (addr, 1);
36842 gcc_assert (REG_P (reg));
36843 gcc_assert (GET_CODE (expr) == PLUS);
36844 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
36845 addr = reg;
36846 }
36847
36848 x = replace_equiv_address (x, copy_addr_to_reg (addr));
36849 }
36850
36851 return x;
36852 }
36853
36854 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
36855
36856 On the RS/6000, all integer constants are acceptable, most won't be valid
36857 for particular insns, though. Only easy FP constants are acceptable. */
36858
36859 static bool
36860 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
36861 {
36862 if (TARGET_ELF && tls_referenced_p (x))
36863 return false;
36864
36865 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
36866 || GET_MODE (x) == VOIDmode
36867 || (TARGET_POWERPC64 && mode == DImode)
36868 || easy_fp_constant (x, mode)
36869 || easy_vector_constant (x, mode));
36870 }
36871
36872 \f
36873 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
36874
36875 static bool
36876 chain_already_loaded (rtx_insn *last)
36877 {
36878 for (; last != NULL; last = PREV_INSN (last))
36879 {
36880 if (NONJUMP_INSN_P (last))
36881 {
36882 rtx patt = PATTERN (last);
36883
36884 if (GET_CODE (patt) == SET)
36885 {
36886 rtx lhs = XEXP (patt, 0);
36887
36888 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
36889 return true;
36890 }
36891 }
36892 }
36893 return false;
36894 }
36895
36896 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
36897
36898 void
36899 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
36900 {
36901 const bool direct_call_p
36902 = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc);
36903 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
36904 rtx toc_load = NULL_RTX;
36905 rtx toc_restore = NULL_RTX;
36906 rtx func_addr;
36907 rtx abi_reg = NULL_RTX;
36908 rtx call[4];
36909 int n_call;
36910 rtx insn;
36911
36912 /* Handle longcall attributes. */
36913 if (INTVAL (cookie) & CALL_LONG)
36914 func_desc = rs6000_longcall_ref (func_desc);
36915
36916 /* Handle indirect calls. */
36917 if (GET_CODE (func_desc) != SYMBOL_REF
36918 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
36919 {
36920 /* Save the TOC into its reserved slot before the call,
36921 and prepare to restore it after the call. */
36922 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
36923 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
36924 rtx stack_toc_mem = gen_frame_mem (Pmode,
36925 gen_rtx_PLUS (Pmode, stack_ptr,
36926 stack_toc_offset));
36927 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
36928 gen_rtvec (1, stack_toc_offset),
36929 UNSPEC_TOCSLOT);
36930 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
36931
36932 /* Can we optimize saving the TOC in the prologue or
36933 do we need to do it at every call? */
36934 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
36935 cfun->machine->save_toc_in_prologue = true;
36936 else
36937 {
36938 MEM_VOLATILE_P (stack_toc_mem) = 1;
36939 emit_move_insn (stack_toc_mem, toc_reg);
36940 }
36941
36942 if (DEFAULT_ABI == ABI_ELFv2)
36943 {
36944 /* A function pointer in the ELFv2 ABI is just a plain address, but
36945 the ABI requires it to be loaded into r12 before the call. */
36946 func_addr = gen_rtx_REG (Pmode, 12);
36947 emit_move_insn (func_addr, func_desc);
36948 abi_reg = func_addr;
36949 }
36950 else
36951 {
36952 /* A function pointer under AIX is a pointer to a data area whose
36953 first word contains the actual address of the function, whose
36954 second word contains a pointer to its TOC, and whose third word
36955 contains a value to place in the static chain register (r11).
36956 Note that if we load the static chain, our "trampoline" need
36957 not have any executable code. */
36958
36959 /* Load up address of the actual function. */
36960 func_desc = force_reg (Pmode, func_desc);
36961 func_addr = gen_reg_rtx (Pmode);
36962 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
36963
36964 /* Prepare to load the TOC of the called function. Note that the
36965 TOC load must happen immediately before the actual call so
36966 that unwinding the TOC registers works correctly. See the
36967 comment in frob_update_context. */
36968 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
36969 rtx func_toc_mem = gen_rtx_MEM (Pmode,
36970 gen_rtx_PLUS (Pmode, func_desc,
36971 func_toc_offset));
36972 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
36973
36974 /* If we have a static chain, load it up. But, if the call was
36975 originally direct, the 3rd word has not been written since no
36976 trampoline has been built, so we ought not to load it, lest we
36977 override a static chain value. */
36978 if (!direct_call_p
36979 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
36980 && !chain_already_loaded (get_current_sequence ()->next->last))
36981 {
36982 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
36983 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
36984 rtx func_sc_mem = gen_rtx_MEM (Pmode,
36985 gen_rtx_PLUS (Pmode, func_desc,
36986 func_sc_offset));
36987 emit_move_insn (sc_reg, func_sc_mem);
36988 abi_reg = sc_reg;
36989 }
36990 }
36991 }
36992 else
36993 {
36994 /* Direct calls use the TOC: for local calls, the callee will
36995 assume the TOC register is set; for non-local calls, the
36996 PLT stub needs the TOC register. */
36997 abi_reg = toc_reg;
36998 func_addr = func_desc;
36999 }
37000
37001 /* Create the call. */
37002 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
37003 if (value != NULL_RTX)
37004 call[0] = gen_rtx_SET (value, call[0]);
37005 n_call = 1;
37006
37007 if (toc_load)
37008 call[n_call++] = toc_load;
37009 if (toc_restore)
37010 call[n_call++] = toc_restore;
37011
37012 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
37013
37014 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
37015 insn = emit_call_insn (insn);
37016
37017 /* Mention all registers defined by the ABI to hold information
37018 as uses in CALL_INSN_FUNCTION_USAGE. */
37019 if (abi_reg)
37020 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
37021 }
37022
37023 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
37024
37025 void
37026 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
37027 {
37028 rtx call[2];
37029 rtx insn;
37030
37031 gcc_assert (INTVAL (cookie) == 0);
37032
37033 /* Create the call. */
37034 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
37035 if (value != NULL_RTX)
37036 call[0] = gen_rtx_SET (value, call[0]);
37037
37038 call[1] = simple_return_rtx;
37039
37040 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
37041 insn = emit_call_insn (insn);
37042
37043 /* Note use of the TOC register. */
37044 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
37045 }
37046
37047 /* Return whether we need to always update the saved TOC pointer when we update
37048 the stack pointer. */
37049
37050 static bool
37051 rs6000_save_toc_in_prologue_p (void)
37052 {
37053 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
37054 }
37055
37056 #ifdef HAVE_GAS_HIDDEN
37057 # define USE_HIDDEN_LINKONCE 1
37058 #else
37059 # define USE_HIDDEN_LINKONCE 0
37060 #endif
37061
37062 /* Fills in the label name that should be used for a 476 link stack thunk. */
37063
37064 void
37065 get_ppc476_thunk_name (char name[32])
37066 {
37067 gcc_assert (TARGET_LINK_STACK);
37068
37069 if (USE_HIDDEN_LINKONCE)
37070 sprintf (name, "__ppc476.get_thunk");
37071 else
37072 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
37073 }
37074
37075 /* This function emits the simple thunk routine that is used to preserve
37076 the link stack on the 476 cpu. */
37077
37078 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
37079 static void
37080 rs6000_code_end (void)
37081 {
37082 char name[32];
37083 tree decl;
37084
37085 if (!TARGET_LINK_STACK)
37086 return;
37087
37088 get_ppc476_thunk_name (name);
37089
37090 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
37091 build_function_type_list (void_type_node, NULL_TREE));
37092 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
37093 NULL_TREE, void_type_node);
37094 TREE_PUBLIC (decl) = 1;
37095 TREE_STATIC (decl) = 1;
37096
37097 #if RS6000_WEAK
37098 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
37099 {
37100 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
37101 targetm.asm_out.unique_section (decl, 0);
37102 switch_to_section (get_named_section (decl, NULL, 0));
37103 DECL_WEAK (decl) = 1;
37104 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
37105 targetm.asm_out.globalize_label (asm_out_file, name);
37106 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
37107 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
37108 }
37109 else
37110 #endif
37111 {
37112 switch_to_section (text_section);
37113 ASM_OUTPUT_LABEL (asm_out_file, name);
37114 }
37115
37116 DECL_INITIAL (decl) = make_node (BLOCK);
37117 current_function_decl = decl;
37118 allocate_struct_function (decl, false);
37119 init_function_start (decl);
37120 first_function_block_is_cold = false;
37121 /* Make sure unwind info is emitted for the thunk if needed. */
37122 final_start_function (emit_barrier (), asm_out_file, 1);
37123
37124 fputs ("\tblr\n", asm_out_file);
37125
37126 final_end_function ();
37127 init_insn_lengths ();
37128 free_after_compilation (cfun);
37129 set_cfun (NULL);
37130 current_function_decl = NULL;
37131 }
37132
37133 /* Add r30 to hard reg set if the prologue sets it up and it is not
37134 pic_offset_table_rtx. */
37135
37136 static void
37137 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
37138 {
37139 if (!TARGET_SINGLE_PIC_BASE
37140 && TARGET_TOC
37141 && TARGET_MINIMAL_TOC
37142 && !constant_pool_empty_p ())
37143 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
37144 if (cfun->machine->split_stack_argp_used)
37145 add_to_hard_reg_set (&set->set, Pmode, 12);
37146
37147 /* Make sure the hard reg set doesn't include r2, which was possibly added
37148 via PIC_OFFSET_TABLE_REGNUM. */
37149 if (TARGET_TOC)
37150 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
37151 }
37152
37153 \f
37154 /* Helper function for rs6000_split_logical to emit a logical instruction after
37155 spliting the operation to single GPR registers.
37156
37157 DEST is the destination register.
37158 OP1 and OP2 are the input source registers.
37159 CODE is the base operation (AND, IOR, XOR, NOT).
37160 MODE is the machine mode.
37161 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
37162 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
37163 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
37164
37165 static void
37166 rs6000_split_logical_inner (rtx dest,
37167 rtx op1,
37168 rtx op2,
37169 enum rtx_code code,
37170 machine_mode mode,
37171 bool complement_final_p,
37172 bool complement_op1_p,
37173 bool complement_op2_p)
37174 {
37175 rtx bool_rtx;
37176
37177 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
37178 if (op2 && GET_CODE (op2) == CONST_INT
37179 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
37180 && !complement_final_p && !complement_op1_p && !complement_op2_p)
37181 {
37182 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
37183 HOST_WIDE_INT value = INTVAL (op2) & mask;
37184
37185 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
37186 if (code == AND)
37187 {
37188 if (value == 0)
37189 {
37190 emit_insn (gen_rtx_SET (dest, const0_rtx));
37191 return;
37192 }
37193
37194 else if (value == mask)
37195 {
37196 if (!rtx_equal_p (dest, op1))
37197 emit_insn (gen_rtx_SET (dest, op1));
37198 return;
37199 }
37200 }
37201
37202 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
37203 into separate ORI/ORIS or XORI/XORIS instrucitons. */
37204 else if (code == IOR || code == XOR)
37205 {
37206 if (value == 0)
37207 {
37208 if (!rtx_equal_p (dest, op1))
37209 emit_insn (gen_rtx_SET (dest, op1));
37210 return;
37211 }
37212 }
37213 }
37214
37215 if (code == AND && mode == SImode
37216 && !complement_final_p && !complement_op1_p && !complement_op2_p)
37217 {
37218 emit_insn (gen_andsi3 (dest, op1, op2));
37219 return;
37220 }
37221
37222 if (complement_op1_p)
37223 op1 = gen_rtx_NOT (mode, op1);
37224
37225 if (complement_op2_p)
37226 op2 = gen_rtx_NOT (mode, op2);
37227
37228 /* For canonical RTL, if only one arm is inverted it is the first. */
37229 if (!complement_op1_p && complement_op2_p)
37230 std::swap (op1, op2);
37231
37232 bool_rtx = ((code == NOT)
37233 ? gen_rtx_NOT (mode, op1)
37234 : gen_rtx_fmt_ee (code, mode, op1, op2));
37235
37236 if (complement_final_p)
37237 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
37238
37239 emit_insn (gen_rtx_SET (dest, bool_rtx));
37240 }
37241
37242 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
37243 operations are split immediately during RTL generation to allow for more
37244 optimizations of the AND/IOR/XOR.
37245
37246 OPERANDS is an array containing the destination and two input operands.
37247 CODE is the base operation (AND, IOR, XOR, NOT).
37248 MODE is the machine mode.
37249 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
37250 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
37251 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
37252 CLOBBER_REG is either NULL or a scratch register of type CC to allow
37253 formation of the AND instructions. */
37254
37255 static void
37256 rs6000_split_logical_di (rtx operands[3],
37257 enum rtx_code code,
37258 bool complement_final_p,
37259 bool complement_op1_p,
37260 bool complement_op2_p)
37261 {
37262 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
37263 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
37264 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
37265 enum hi_lo { hi = 0, lo = 1 };
37266 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
37267 size_t i;
37268
37269 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
37270 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
37271 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
37272 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
37273
37274 if (code == NOT)
37275 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
37276 else
37277 {
37278 if (GET_CODE (operands[2]) != CONST_INT)
37279 {
37280 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
37281 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
37282 }
37283 else
37284 {
37285 HOST_WIDE_INT value = INTVAL (operands[2]);
37286 HOST_WIDE_INT value_hi_lo[2];
37287
37288 gcc_assert (!complement_final_p);
37289 gcc_assert (!complement_op1_p);
37290 gcc_assert (!complement_op2_p);
37291
37292 value_hi_lo[hi] = value >> 32;
37293 value_hi_lo[lo] = value & lower_32bits;
37294
37295 for (i = 0; i < 2; i++)
37296 {
37297 HOST_WIDE_INT sub_value = value_hi_lo[i];
37298
37299 if (sub_value & sign_bit)
37300 sub_value |= upper_32bits;
37301
37302 op2_hi_lo[i] = GEN_INT (sub_value);
37303
37304 /* If this is an AND instruction, check to see if we need to load
37305 the value in a register. */
37306 if (code == AND && sub_value != -1 && sub_value != 0
37307 && !and_operand (op2_hi_lo[i], SImode))
37308 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
37309 }
37310 }
37311 }
37312
37313 for (i = 0; i < 2; i++)
37314 {
37315 /* Split large IOR/XOR operations. */
37316 if ((code == IOR || code == XOR)
37317 && GET_CODE (op2_hi_lo[i]) == CONST_INT
37318 && !complement_final_p
37319 && !complement_op1_p
37320 && !complement_op2_p
37321 && !logical_const_operand (op2_hi_lo[i], SImode))
37322 {
37323 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
37324 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
37325 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
37326 rtx tmp = gen_reg_rtx (SImode);
37327
37328 /* Make sure the constant is sign extended. */
37329 if ((hi_16bits & sign_bit) != 0)
37330 hi_16bits |= upper_32bits;
37331
37332 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
37333 code, SImode, false, false, false);
37334
37335 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
37336 code, SImode, false, false, false);
37337 }
37338 else
37339 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
37340 code, SImode, complement_final_p,
37341 complement_op1_p, complement_op2_p);
37342 }
37343
37344 return;
37345 }
37346
37347 /* Split the insns that make up boolean operations operating on multiple GPR
37348 registers. The boolean MD patterns ensure that the inputs either are
37349 exactly the same as the output registers, or there is no overlap.
37350
37351 OPERANDS is an array containing the destination and two input operands.
37352 CODE is the base operation (AND, IOR, XOR, NOT).
37353 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
37354 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
37355 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
37356
37357 void
37358 rs6000_split_logical (rtx operands[3],
37359 enum rtx_code code,
37360 bool complement_final_p,
37361 bool complement_op1_p,
37362 bool complement_op2_p)
37363 {
37364 machine_mode mode = GET_MODE (operands[0]);
37365 machine_mode sub_mode;
37366 rtx op0, op1, op2;
37367 int sub_size, regno0, regno1, nregs, i;
37368
37369 /* If this is DImode, use the specialized version that can run before
37370 register allocation. */
37371 if (mode == DImode && !TARGET_POWERPC64)
37372 {
37373 rs6000_split_logical_di (operands, code, complement_final_p,
37374 complement_op1_p, complement_op2_p);
37375 return;
37376 }
37377
37378 op0 = operands[0];
37379 op1 = operands[1];
37380 op2 = (code == NOT) ? NULL_RTX : operands[2];
37381 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
37382 sub_size = GET_MODE_SIZE (sub_mode);
37383 regno0 = REGNO (op0);
37384 regno1 = REGNO (op1);
37385
37386 gcc_assert (reload_completed);
37387 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
37388 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
37389
37390 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
37391 gcc_assert (nregs > 1);
37392
37393 if (op2 && REG_P (op2))
37394 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
37395
37396 for (i = 0; i < nregs; i++)
37397 {
37398 int offset = i * sub_size;
37399 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
37400 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
37401 rtx sub_op2 = ((code == NOT)
37402 ? NULL_RTX
37403 : simplify_subreg (sub_mode, op2, mode, offset));
37404
37405 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
37406 complement_final_p, complement_op1_p,
37407 complement_op2_p);
37408 }
37409
37410 return;
37411 }
37412
37413 \f
37414 /* Return true if the peephole2 can combine a load involving a combination of
37415 an addis instruction and a load with an offset that can be fused together on
37416 a power8. */
37417
37418 bool
37419 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
37420 rtx addis_value, /* addis value. */
37421 rtx target, /* target register that is loaded. */
37422 rtx mem) /* bottom part of the memory addr. */
37423 {
37424 rtx addr;
37425 rtx base_reg;
37426
37427 /* Validate arguments. */
37428 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
37429 return false;
37430
37431 if (!base_reg_operand (target, GET_MODE (target)))
37432 return false;
37433
37434 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
37435 return false;
37436
37437 /* Allow sign/zero extension. */
37438 if (GET_CODE (mem) == ZERO_EXTEND
37439 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
37440 mem = XEXP (mem, 0);
37441
37442 if (!MEM_P (mem))
37443 return false;
37444
37445 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
37446 return false;
37447
37448 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
37449 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
37450 return false;
37451
37452 /* Validate that the register used to load the high value is either the
37453 register being loaded, or we can safely replace its use.
37454
37455 This function is only called from the peephole2 pass and we assume that
37456 there are 2 instructions in the peephole (addis and load), so we want to
37457 check if the target register was not used in the memory address and the
37458 register to hold the addis result is dead after the peephole. */
37459 if (REGNO (addis_reg) != REGNO (target))
37460 {
37461 if (reg_mentioned_p (target, mem))
37462 return false;
37463
37464 if (!peep2_reg_dead_p (2, addis_reg))
37465 return false;
37466
37467 /* If the target register being loaded is the stack pointer, we must
37468 avoid loading any other value into it, even temporarily. */
37469 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
37470 return false;
37471 }
37472
37473 base_reg = XEXP (addr, 0);
37474 return REGNO (addis_reg) == REGNO (base_reg);
37475 }
37476
37477 /* During the peephole2 pass, adjust and expand the insns for a load fusion
37478 sequence. We adjust the addis register to use the target register. If the
37479 load sign extends, we adjust the code to do the zero extending load, and an
37480 explicit sign extension later since the fusion only covers zero extending
37481 loads.
37482
37483 The operands are:
37484 operands[0] register set with addis (to be replaced with target)
37485 operands[1] value set via addis
37486 operands[2] target register being loaded
37487 operands[3] D-form memory reference using operands[0]. */
37488
37489 void
37490 expand_fusion_gpr_load (rtx *operands)
37491 {
37492 rtx addis_value = operands[1];
37493 rtx target = operands[2];
37494 rtx orig_mem = operands[3];
37495 rtx new_addr, new_mem, orig_addr, offset;
37496 enum rtx_code plus_or_lo_sum;
37497 machine_mode target_mode = GET_MODE (target);
37498 machine_mode extend_mode = target_mode;
37499 machine_mode ptr_mode = Pmode;
37500 enum rtx_code extend = UNKNOWN;
37501
37502 if (GET_CODE (orig_mem) == ZERO_EXTEND
37503 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
37504 {
37505 extend = GET_CODE (orig_mem);
37506 orig_mem = XEXP (orig_mem, 0);
37507 target_mode = GET_MODE (orig_mem);
37508 }
37509
37510 gcc_assert (MEM_P (orig_mem));
37511
37512 orig_addr = XEXP (orig_mem, 0);
37513 plus_or_lo_sum = GET_CODE (orig_addr);
37514 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
37515
37516 offset = XEXP (orig_addr, 1);
37517 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
37518 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
37519
37520 if (extend != UNKNOWN)
37521 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
37522
37523 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
37524 UNSPEC_FUSION_GPR);
37525 emit_insn (gen_rtx_SET (target, new_mem));
37526
37527 if (extend == SIGN_EXTEND)
37528 {
37529 int sub_off = ((BYTES_BIG_ENDIAN)
37530 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
37531 : 0);
37532 rtx sign_reg
37533 = simplify_subreg (target_mode, target, extend_mode, sub_off);
37534
37535 emit_insn (gen_rtx_SET (target,
37536 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
37537 }
37538
37539 return;
37540 }
37541
37542 /* Emit the addis instruction that will be part of a fused instruction
37543 sequence. */
37544
37545 void
37546 emit_fusion_addis (rtx target, rtx addis_value)
37547 {
37548 rtx fuse_ops[10];
37549 const char *addis_str = NULL;
37550
37551 /* Emit the addis instruction. */
37552 fuse_ops[0] = target;
37553 if (satisfies_constraint_L (addis_value))
37554 {
37555 fuse_ops[1] = addis_value;
37556 addis_str = "lis %0,%v1";
37557 }
37558
37559 else if (GET_CODE (addis_value) == PLUS)
37560 {
37561 rtx op0 = XEXP (addis_value, 0);
37562 rtx op1 = XEXP (addis_value, 1);
37563
37564 if (REG_P (op0) && CONST_INT_P (op1)
37565 && satisfies_constraint_L (op1))
37566 {
37567 fuse_ops[1] = op0;
37568 fuse_ops[2] = op1;
37569 addis_str = "addis %0,%1,%v2";
37570 }
37571 }
37572
37573 else if (GET_CODE (addis_value) == HIGH)
37574 {
37575 rtx value = XEXP (addis_value, 0);
37576 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
37577 {
37578 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
37579 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
37580 if (TARGET_ELF)
37581 addis_str = "addis %0,%2,%1@toc@ha";
37582
37583 else if (TARGET_XCOFF)
37584 addis_str = "addis %0,%1@u(%2)";
37585
37586 else
37587 gcc_unreachable ();
37588 }
37589
37590 else if (GET_CODE (value) == PLUS)
37591 {
37592 rtx op0 = XEXP (value, 0);
37593 rtx op1 = XEXP (value, 1);
37594
37595 if (GET_CODE (op0) == UNSPEC
37596 && XINT (op0, 1) == UNSPEC_TOCREL
37597 && CONST_INT_P (op1))
37598 {
37599 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
37600 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
37601 fuse_ops[3] = op1;
37602 if (TARGET_ELF)
37603 addis_str = "addis %0,%2,%1+%3@toc@ha";
37604
37605 else if (TARGET_XCOFF)
37606 addis_str = "addis %0,%1+%3@u(%2)";
37607
37608 else
37609 gcc_unreachable ();
37610 }
37611 }
37612
37613 else if (satisfies_constraint_L (value))
37614 {
37615 fuse_ops[1] = value;
37616 addis_str = "lis %0,%v1";
37617 }
37618
37619 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
37620 {
37621 fuse_ops[1] = value;
37622 addis_str = "lis %0,%1@ha";
37623 }
37624 }
37625
37626 if (!addis_str)
37627 fatal_insn ("Could not generate addis value for fusion", addis_value);
37628
37629 output_asm_insn (addis_str, fuse_ops);
37630 }
37631
37632 /* Emit a D-form load or store instruction that is the second instruction
37633 of a fusion sequence. */
37634
37635 void
37636 emit_fusion_load_store (rtx load_store_reg, rtx addis_reg, rtx offset,
37637 const char *insn_str)
37638 {
37639 rtx fuse_ops[10];
37640 char insn_template[80];
37641
37642 fuse_ops[0] = load_store_reg;
37643 fuse_ops[1] = addis_reg;
37644
37645 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
37646 {
37647 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
37648 fuse_ops[2] = offset;
37649 output_asm_insn (insn_template, fuse_ops);
37650 }
37651
37652 else if (GET_CODE (offset) == UNSPEC
37653 && XINT (offset, 1) == UNSPEC_TOCREL)
37654 {
37655 if (TARGET_ELF)
37656 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
37657
37658 else if (TARGET_XCOFF)
37659 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
37660
37661 else
37662 gcc_unreachable ();
37663
37664 fuse_ops[2] = XVECEXP (offset, 0, 0);
37665 output_asm_insn (insn_template, fuse_ops);
37666 }
37667
37668 else if (GET_CODE (offset) == PLUS
37669 && GET_CODE (XEXP (offset, 0)) == UNSPEC
37670 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
37671 && CONST_INT_P (XEXP (offset, 1)))
37672 {
37673 rtx tocrel_unspec = XEXP (offset, 0);
37674 if (TARGET_ELF)
37675 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
37676
37677 else if (TARGET_XCOFF)
37678 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
37679
37680 else
37681 gcc_unreachable ();
37682
37683 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
37684 fuse_ops[3] = XEXP (offset, 1);
37685 output_asm_insn (insn_template, fuse_ops);
37686 }
37687
37688 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
37689 {
37690 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
37691
37692 fuse_ops[2] = offset;
37693 output_asm_insn (insn_template, fuse_ops);
37694 }
37695
37696 else
37697 fatal_insn ("Unable to generate load/store offset for fusion", offset);
37698
37699 return;
37700 }
37701
37702 /* Given an address, convert it into the addis and load offset parts. Addresses
37703 created during the peephole2 process look like:
37704 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
37705 (unspec [(...)] UNSPEC_TOCREL)) */
37706
37707 static void
37708 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
37709 {
37710 rtx hi, lo;
37711
37712 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
37713 {
37714 hi = XEXP (addr, 0);
37715 lo = XEXP (addr, 1);
37716 }
37717 else
37718 gcc_unreachable ();
37719
37720 *p_hi = hi;
37721 *p_lo = lo;
37722 }
37723
37724 /* Return a string to fuse an addis instruction with a gpr load to the same
37725 register that we loaded up the addis instruction. The address that is used
37726 is the logical address that was formed during peephole2:
37727 (lo_sum (high) (low-part))
37728
37729 The code is complicated, so we call output_asm_insn directly, and just
37730 return "". */
37731
37732 const char *
37733 emit_fusion_gpr_load (rtx target, rtx mem)
37734 {
37735 rtx addis_value;
37736 rtx addr;
37737 rtx load_offset;
37738 const char *load_str = NULL;
37739 machine_mode mode;
37740
37741 if (GET_CODE (mem) == ZERO_EXTEND)
37742 mem = XEXP (mem, 0);
37743
37744 gcc_assert (REG_P (target) && MEM_P (mem));
37745
37746 addr = XEXP (mem, 0);
37747 fusion_split_address (addr, &addis_value, &load_offset);
37748
37749 /* Now emit the load instruction to the same register. */
37750 mode = GET_MODE (mem);
37751 switch (mode)
37752 {
37753 case E_QImode:
37754 load_str = "lbz";
37755 break;
37756
37757 case E_HImode:
37758 load_str = "lhz";
37759 break;
37760
37761 case E_SImode:
37762 case E_SFmode:
37763 load_str = "lwz";
37764 break;
37765
37766 case E_DImode:
37767 case E_DFmode:
37768 gcc_assert (TARGET_POWERPC64);
37769 load_str = "ld";
37770 break;
37771
37772 default:
37773 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
37774 }
37775
37776 /* Emit the addis instruction. */
37777 emit_fusion_addis (target, addis_value);
37778
37779 /* Emit the D-form load instruction. */
37780 emit_fusion_load_store (target, target, load_offset, load_str);
37781
37782 return "";
37783 }
37784 \f
37785
37786 /* Return true if the peephole2 can combine a load/store involving a
37787 combination of an addis instruction and the memory operation. This was
37788 added to the ISA 3.0 (power9) hardware. */
37789
37790 bool
37791 fusion_p9_p (rtx addis_reg, /* register set via addis. */
37792 rtx addis_value, /* addis value. */
37793 rtx dest, /* destination (memory or register). */
37794 rtx src) /* source (register or memory). */
37795 {
37796 rtx addr, mem, offset;
37797 machine_mode mode = GET_MODE (src);
37798
37799 /* Validate arguments. */
37800 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
37801 return false;
37802
37803 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
37804 return false;
37805
37806 /* Ignore extend operations that are part of the load. */
37807 if (GET_CODE (src) == FLOAT_EXTEND || GET_CODE (src) == ZERO_EXTEND)
37808 src = XEXP (src, 0);
37809
37810 /* Test for memory<-register or register<-memory. */
37811 if (fpr_reg_operand (src, mode) || int_reg_operand (src, mode))
37812 {
37813 if (!MEM_P (dest))
37814 return false;
37815
37816 mem = dest;
37817 }
37818
37819 else if (MEM_P (src))
37820 {
37821 if (!fpr_reg_operand (dest, mode) && !int_reg_operand (dest, mode))
37822 return false;
37823
37824 mem = src;
37825 }
37826
37827 else
37828 return false;
37829
37830 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
37831 if (GET_CODE (addr) == PLUS)
37832 {
37833 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
37834 return false;
37835
37836 return satisfies_constraint_I (XEXP (addr, 1));
37837 }
37838
37839 else if (GET_CODE (addr) == LO_SUM)
37840 {
37841 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
37842 return false;
37843
37844 offset = XEXP (addr, 1);
37845 if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
37846 return small_toc_ref (offset, GET_MODE (offset));
37847
37848 else if (TARGET_ELF && !TARGET_POWERPC64)
37849 return CONSTANT_P (offset);
37850 }
37851
37852 return false;
37853 }
37854
37855 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
37856 load sequence.
37857
37858 The operands are:
37859 operands[0] register set with addis
37860 operands[1] value set via addis
37861 operands[2] target register being loaded
37862 operands[3] D-form memory reference using operands[0].
37863
37864 This is similar to the fusion introduced with power8, except it scales to
37865 both loads/stores and does not require the result register to be the same as
37866 the base register. At the moment, we only do this if register set with addis
37867 is dead. */
37868
37869 void
37870 expand_fusion_p9_load (rtx *operands)
37871 {
37872 rtx tmp_reg = operands[0];
37873 rtx addis_value = operands[1];
37874 rtx target = operands[2];
37875 rtx orig_mem = operands[3];
37876 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn;
37877 enum rtx_code plus_or_lo_sum;
37878 machine_mode target_mode = GET_MODE (target);
37879 machine_mode extend_mode = target_mode;
37880 machine_mode ptr_mode = Pmode;
37881 enum rtx_code extend = UNKNOWN;
37882
37883 if (GET_CODE (orig_mem) == FLOAT_EXTEND || GET_CODE (orig_mem) == ZERO_EXTEND)
37884 {
37885 extend = GET_CODE (orig_mem);
37886 orig_mem = XEXP (orig_mem, 0);
37887 target_mode = GET_MODE (orig_mem);
37888 }
37889
37890 gcc_assert (MEM_P (orig_mem));
37891
37892 orig_addr = XEXP (orig_mem, 0);
37893 plus_or_lo_sum = GET_CODE (orig_addr);
37894 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
37895
37896 offset = XEXP (orig_addr, 1);
37897 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
37898 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
37899
37900 if (extend != UNKNOWN)
37901 new_mem = gen_rtx_fmt_e (extend, extend_mode, new_mem);
37902
37903 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
37904 UNSPEC_FUSION_P9);
37905
37906 set = gen_rtx_SET (target, new_mem);
37907 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
37908 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
37909 emit_insn (insn);
37910
37911 return;
37912 }
37913
37914 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
37915 store sequence.
37916
37917 The operands are:
37918 operands[0] register set with addis
37919 operands[1] value set via addis
37920 operands[2] target D-form memory being stored to
37921 operands[3] register being stored
37922
37923 This is similar to the fusion introduced with power8, except it scales to
37924 both loads/stores and does not require the result register to be the same as
37925 the base register. At the moment, we only do this if register set with addis
37926 is dead. */
37927
37928 void
37929 expand_fusion_p9_store (rtx *operands)
37930 {
37931 rtx tmp_reg = operands[0];
37932 rtx addis_value = operands[1];
37933 rtx orig_mem = operands[2];
37934 rtx src = operands[3];
37935 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn, new_src;
37936 enum rtx_code plus_or_lo_sum;
37937 machine_mode target_mode = GET_MODE (orig_mem);
37938 machine_mode ptr_mode = Pmode;
37939
37940 gcc_assert (MEM_P (orig_mem));
37941
37942 orig_addr = XEXP (orig_mem, 0);
37943 plus_or_lo_sum = GET_CODE (orig_addr);
37944 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
37945
37946 offset = XEXP (orig_addr, 1);
37947 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
37948 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
37949
37950 new_src = gen_rtx_UNSPEC (target_mode, gen_rtvec (1, src),
37951 UNSPEC_FUSION_P9);
37952
37953 set = gen_rtx_SET (new_mem, new_src);
37954 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
37955 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
37956 emit_insn (insn);
37957
37958 return;
37959 }
37960
37961 /* Return a string to fuse an addis instruction with a load using extended
37962 fusion. The address that is used is the logical address that was formed
37963 during peephole2: (lo_sum (high) (low-part))
37964
37965 The code is complicated, so we call output_asm_insn directly, and just
37966 return "". */
37967
37968 const char *
37969 emit_fusion_p9_load (rtx reg, rtx mem, rtx tmp_reg)
37970 {
37971 machine_mode mode = GET_MODE (reg);
37972 rtx hi;
37973 rtx lo;
37974 rtx addr;
37975 const char *load_string;
37976 int r;
37977
37978 if (GET_CODE (mem) == FLOAT_EXTEND || GET_CODE (mem) == ZERO_EXTEND)
37979 {
37980 mem = XEXP (mem, 0);
37981 mode = GET_MODE (mem);
37982 }
37983
37984 if (GET_CODE (reg) == SUBREG)
37985 {
37986 gcc_assert (SUBREG_BYTE (reg) == 0);
37987 reg = SUBREG_REG (reg);
37988 }
37989
37990 if (!REG_P (reg))
37991 fatal_insn ("emit_fusion_p9_load, bad reg #1", reg);
37992
37993 r = REGNO (reg);
37994 if (FP_REGNO_P (r))
37995 {
37996 if (mode == SFmode)
37997 load_string = "lfs";
37998 else if (mode == DFmode || mode == DImode)
37999 load_string = "lfd";
38000 else
38001 gcc_unreachable ();
38002 }
38003 else if (ALTIVEC_REGNO_P (r) && TARGET_P9_VECTOR)
38004 {
38005 if (mode == SFmode)
38006 load_string = "lxssp";
38007 else if (mode == DFmode || mode == DImode)
38008 load_string = "lxsd";
38009 else
38010 gcc_unreachable ();
38011 }
38012 else if (INT_REGNO_P (r))
38013 {
38014 switch (mode)
38015 {
38016 case E_QImode:
38017 load_string = "lbz";
38018 break;
38019 case E_HImode:
38020 load_string = "lhz";
38021 break;
38022 case E_SImode:
38023 case E_SFmode:
38024 load_string = "lwz";
38025 break;
38026 case E_DImode:
38027 case E_DFmode:
38028 if (!TARGET_POWERPC64)
38029 gcc_unreachable ();
38030 load_string = "ld";
38031 break;
38032 default:
38033 gcc_unreachable ();
38034 }
38035 }
38036 else
38037 fatal_insn ("emit_fusion_p9_load, bad reg #2", reg);
38038
38039 if (!MEM_P (mem))
38040 fatal_insn ("emit_fusion_p9_load not MEM", mem);
38041
38042 addr = XEXP (mem, 0);
38043 fusion_split_address (addr, &hi, &lo);
38044
38045 /* Emit the addis instruction. */
38046 emit_fusion_addis (tmp_reg, hi);
38047
38048 /* Emit the D-form load instruction. */
38049 emit_fusion_load_store (reg, tmp_reg, lo, load_string);
38050
38051 return "";
38052 }
38053
38054 /* Return a string to fuse an addis instruction with a store using extended
38055 fusion. The address that is used is the logical address that was formed
38056 during peephole2: (lo_sum (high) (low-part))
38057
38058 The code is complicated, so we call output_asm_insn directly, and just
38059 return "". */
38060
38061 const char *
38062 emit_fusion_p9_store (rtx mem, rtx reg, rtx tmp_reg)
38063 {
38064 machine_mode mode = GET_MODE (reg);
38065 rtx hi;
38066 rtx lo;
38067 rtx addr;
38068 const char *store_string;
38069 int r;
38070
38071 if (GET_CODE (reg) == SUBREG)
38072 {
38073 gcc_assert (SUBREG_BYTE (reg) == 0);
38074 reg = SUBREG_REG (reg);
38075 }
38076
38077 if (!REG_P (reg))
38078 fatal_insn ("emit_fusion_p9_store, bad reg #1", reg);
38079
38080 r = REGNO (reg);
38081 if (FP_REGNO_P (r))
38082 {
38083 if (mode == SFmode)
38084 store_string = "stfs";
38085 else if (mode == DFmode)
38086 store_string = "stfd";
38087 else
38088 gcc_unreachable ();
38089 }
38090 else if (ALTIVEC_REGNO_P (r) && TARGET_P9_VECTOR)
38091 {
38092 if (mode == SFmode)
38093 store_string = "stxssp";
38094 else if (mode == DFmode || mode == DImode)
38095 store_string = "stxsd";
38096 else
38097 gcc_unreachable ();
38098 }
38099 else if (INT_REGNO_P (r))
38100 {
38101 switch (mode)
38102 {
38103 case E_QImode:
38104 store_string = "stb";
38105 break;
38106 case E_HImode:
38107 store_string = "sth";
38108 break;
38109 case E_SImode:
38110 case E_SFmode:
38111 store_string = "stw";
38112 break;
38113 case E_DImode:
38114 case E_DFmode:
38115 if (!TARGET_POWERPC64)
38116 gcc_unreachable ();
38117 store_string = "std";
38118 break;
38119 default:
38120 gcc_unreachable ();
38121 }
38122 }
38123 else
38124 fatal_insn ("emit_fusion_p9_store, bad reg #2", reg);
38125
38126 if (!MEM_P (mem))
38127 fatal_insn ("emit_fusion_p9_store not MEM", mem);
38128
38129 addr = XEXP (mem, 0);
38130 fusion_split_address (addr, &hi, &lo);
38131
38132 /* Emit the addis instruction. */
38133 emit_fusion_addis (tmp_reg, hi);
38134
38135 /* Emit the D-form load instruction. */
38136 emit_fusion_load_store (reg, tmp_reg, lo, store_string);
38137
38138 return "";
38139 }
38140
38141 #ifdef RS6000_GLIBC_ATOMIC_FENV
38142 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
38143 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
38144 #endif
38145
38146 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
38147
38148 static void
38149 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
38150 {
38151 if (!TARGET_HARD_FLOAT)
38152 {
38153 #ifdef RS6000_GLIBC_ATOMIC_FENV
38154 if (atomic_hold_decl == NULL_TREE)
38155 {
38156 atomic_hold_decl
38157 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
38158 get_identifier ("__atomic_feholdexcept"),
38159 build_function_type_list (void_type_node,
38160 double_ptr_type_node,
38161 NULL_TREE));
38162 TREE_PUBLIC (atomic_hold_decl) = 1;
38163 DECL_EXTERNAL (atomic_hold_decl) = 1;
38164 }
38165
38166 if (atomic_clear_decl == NULL_TREE)
38167 {
38168 atomic_clear_decl
38169 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
38170 get_identifier ("__atomic_feclearexcept"),
38171 build_function_type_list (void_type_node,
38172 NULL_TREE));
38173 TREE_PUBLIC (atomic_clear_decl) = 1;
38174 DECL_EXTERNAL (atomic_clear_decl) = 1;
38175 }
38176
38177 tree const_double = build_qualified_type (double_type_node,
38178 TYPE_QUAL_CONST);
38179 tree const_double_ptr = build_pointer_type (const_double);
38180 if (atomic_update_decl == NULL_TREE)
38181 {
38182 atomic_update_decl
38183 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
38184 get_identifier ("__atomic_feupdateenv"),
38185 build_function_type_list (void_type_node,
38186 const_double_ptr,
38187 NULL_TREE));
38188 TREE_PUBLIC (atomic_update_decl) = 1;
38189 DECL_EXTERNAL (atomic_update_decl) = 1;
38190 }
38191
38192 tree fenv_var = create_tmp_var_raw (double_type_node);
38193 TREE_ADDRESSABLE (fenv_var) = 1;
38194 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
38195
38196 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
38197 *clear = build_call_expr (atomic_clear_decl, 0);
38198 *update = build_call_expr (atomic_update_decl, 1,
38199 fold_convert (const_double_ptr, fenv_addr));
38200 #endif
38201 return;
38202 }
38203
38204 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
38205 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
38206 tree call_mffs = build_call_expr (mffs, 0);
38207
38208 /* Generates the equivalent of feholdexcept (&fenv_var)
38209
38210 *fenv_var = __builtin_mffs ();
38211 double fenv_hold;
38212 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
38213 __builtin_mtfsf (0xff, fenv_hold); */
38214
38215 /* Mask to clear everything except for the rounding modes and non-IEEE
38216 arithmetic flag. */
38217 const unsigned HOST_WIDE_INT hold_exception_mask =
38218 HOST_WIDE_INT_C (0xffffffff00000007);
38219
38220 tree fenv_var = create_tmp_var_raw (double_type_node);
38221
38222 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
38223
38224 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
38225 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
38226 build_int_cst (uint64_type_node,
38227 hold_exception_mask));
38228
38229 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
38230 fenv_llu_and);
38231
38232 tree hold_mtfsf = build_call_expr (mtfsf, 2,
38233 build_int_cst (unsigned_type_node, 0xff),
38234 fenv_hold_mtfsf);
38235
38236 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
38237
38238 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
38239
38240 double fenv_clear = __builtin_mffs ();
38241 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
38242 __builtin_mtfsf (0xff, fenv_clear); */
38243
38244 /* Mask to clear everything except for the rounding modes and non-IEEE
38245 arithmetic flag. */
38246 const unsigned HOST_WIDE_INT clear_exception_mask =
38247 HOST_WIDE_INT_C (0xffffffff00000000);
38248
38249 tree fenv_clear = create_tmp_var_raw (double_type_node);
38250
38251 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
38252
38253 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
38254 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
38255 fenv_clean_llu,
38256 build_int_cst (uint64_type_node,
38257 clear_exception_mask));
38258
38259 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
38260 fenv_clear_llu_and);
38261
38262 tree clear_mtfsf = build_call_expr (mtfsf, 2,
38263 build_int_cst (unsigned_type_node, 0xff),
38264 fenv_clear_mtfsf);
38265
38266 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
38267
38268 /* Generates the equivalent of feupdateenv (&fenv_var)
38269
38270 double old_fenv = __builtin_mffs ();
38271 double fenv_update;
38272 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
38273 (*(uint64_t*)fenv_var 0x1ff80fff);
38274 __builtin_mtfsf (0xff, fenv_update); */
38275
38276 const unsigned HOST_WIDE_INT update_exception_mask =
38277 HOST_WIDE_INT_C (0xffffffff1fffff00);
38278 const unsigned HOST_WIDE_INT new_exception_mask =
38279 HOST_WIDE_INT_C (0x1ff80fff);
38280
38281 tree old_fenv = create_tmp_var_raw (double_type_node);
38282 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
38283
38284 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
38285 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
38286 build_int_cst (uint64_type_node,
38287 update_exception_mask));
38288
38289 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
38290 build_int_cst (uint64_type_node,
38291 new_exception_mask));
38292
38293 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
38294 old_llu_and, new_llu_and);
38295
38296 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
38297 new_llu_mask);
38298
38299 tree update_mtfsf = build_call_expr (mtfsf, 2,
38300 build_int_cst (unsigned_type_node, 0xff),
38301 fenv_update_mtfsf);
38302
38303 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
38304 }
38305
38306 void
38307 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
38308 {
38309 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
38310
38311 rtx_tmp0 = gen_reg_rtx (V2DFmode);
38312 rtx_tmp1 = gen_reg_rtx (V2DFmode);
38313
38314 /* The destination of the vmrgew instruction layout is:
38315 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
38316 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
38317 vmrgew instruction will be correct. */
38318 if (BYTES_BIG_ENDIAN)
38319 {
38320 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
38321 GEN_INT (0)));
38322 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
38323 GEN_INT (3)));
38324 }
38325 else
38326 {
38327 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
38328 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
38329 }
38330
38331 rtx_tmp2 = gen_reg_rtx (V4SFmode);
38332 rtx_tmp3 = gen_reg_rtx (V4SFmode);
38333
38334 emit_insn (gen_vsx_xvcdpsp (rtx_tmp2, rtx_tmp0));
38335 emit_insn (gen_vsx_xvcdpsp (rtx_tmp3, rtx_tmp1));
38336
38337 if (BYTES_BIG_ENDIAN)
38338 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
38339 else
38340 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
38341 }
38342
38343 void
38344 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
38345 {
38346 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
38347
38348 rtx_tmp0 = gen_reg_rtx (V2DImode);
38349 rtx_tmp1 = gen_reg_rtx (V2DImode);
38350
38351 /* The destination of the vmrgew instruction layout is:
38352 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
38353 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
38354 vmrgew instruction will be correct. */
38355 if (BYTES_BIG_ENDIAN)
38356 {
38357 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
38358 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
38359 }
38360 else
38361 {
38362 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
38363 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
38364 }
38365
38366 rtx_tmp2 = gen_reg_rtx (V4SFmode);
38367 rtx_tmp3 = gen_reg_rtx (V4SFmode);
38368
38369 if (signed_convert)
38370 {
38371 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
38372 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
38373 }
38374 else
38375 {
38376 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
38377 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
38378 }
38379
38380 if (BYTES_BIG_ENDIAN)
38381 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
38382 else
38383 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
38384 }
38385
38386 void
38387 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
38388 rtx src2)
38389 {
38390 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
38391
38392 rtx_tmp0 = gen_reg_rtx (V2DFmode);
38393 rtx_tmp1 = gen_reg_rtx (V2DFmode);
38394
38395 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
38396 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
38397
38398 rtx_tmp2 = gen_reg_rtx (V4SImode);
38399 rtx_tmp3 = gen_reg_rtx (V4SImode);
38400
38401 if (signed_convert)
38402 {
38403 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
38404 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
38405 }
38406 else
38407 {
38408 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
38409 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
38410 }
38411
38412 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
38413 }
38414
38415 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
38416
38417 static bool
38418 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
38419 optimization_type opt_type)
38420 {
38421 switch (op)
38422 {
38423 case rsqrt_optab:
38424 return (opt_type == OPTIMIZE_FOR_SPEED
38425 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
38426
38427 default:
38428 return true;
38429 }
38430 }
38431
38432 /* Implement TARGET_CONSTANT_ALIGNMENT. */
38433
38434 static HOST_WIDE_INT
38435 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
38436 {
38437 if (TREE_CODE (exp) == STRING_CST
38438 && (STRICT_ALIGNMENT || !optimize_size))
38439 return MAX (align, BITS_PER_WORD);
38440 return align;
38441 }
38442
38443 /* Implement TARGET_STARTING_FRAME_OFFSET. */
38444
38445 static HOST_WIDE_INT
38446 rs6000_starting_frame_offset (void)
38447 {
38448 if (FRAME_GROWS_DOWNWARD)
38449 return 0;
38450 return RS6000_STARTING_FRAME_OFFSET;
38451 }
38452 \f
38453
38454 /* Create an alias for a mangled name where we have changed the mangling (in
38455 GCC 8.1, we used U10__float128, and now we use u9__ieee128). This is called
38456 via the target hook TARGET_ASM_GLOBALIZE_DECL_NAME. */
38457
38458 #if TARGET_ELF && RS6000_WEAK
38459 static void
38460 rs6000_globalize_decl_name (FILE * stream, tree decl)
38461 {
38462 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
38463
38464 targetm.asm_out.globalize_label (stream, name);
38465
38466 if (rs6000_passes_ieee128 && name[0] == '_' && name[1] == 'Z')
38467 {
38468 tree save_asm_name = DECL_ASSEMBLER_NAME (decl);
38469 const char *old_name;
38470
38471 ieee128_mangling_gcc_8_1 = true;
38472 lang_hooks.set_decl_assembler_name (decl);
38473 old_name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
38474 SET_DECL_ASSEMBLER_NAME (decl, save_asm_name);
38475 ieee128_mangling_gcc_8_1 = false;
38476
38477 if (strcmp (name, old_name) != 0)
38478 {
38479 fprintf (stream, "\t.weak %s\n", old_name);
38480 fprintf (stream, "\t.set %s,%s\n", old_name, name);
38481 }
38482 }
38483 }
38484 #endif
38485
38486 \f
38487 struct gcc_target targetm = TARGET_INITIALIZER;
38488
38489 #include "gt-rs6000.h"