]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/rs6000/rs6000.c
rs6000: Change rs6000_expand_vector_set param
[thirdparty/gcc.git] / gcc / config / rs6000 / rs6000.c
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2020 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #define IN_TARGET_CODE 1
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "gimple.h"
31 #include "cfghooks.h"
32 #include "cfgloop.h"
33 #include "df.h"
34 #include "tm_p.h"
35 #include "stringpool.h"
36 #include "expmed.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "ira.h"
40 #include "recog.h"
41 #include "cgraph.h"
42 #include "diagnostic-core.h"
43 #include "insn-attr.h"
44 #include "flags.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "attribs.h"
48 #include "stor-layout.h"
49 #include "calls.h"
50 #include "print-tree.h"
51 #include "varasm.h"
52 #include "explow.h"
53 #include "expr.h"
54 #include "output.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "reload.h"
58 #include "sched-int.h"
59 #include "gimplify.h"
60 #include "gimple-fold.h"
61 #include "gimple-iterator.h"
62 #include "gimple-walk.h"
63 #include "ssa.h"
64 #include "tree-vectorizer.h"
65 #include "tree-ssa-propagate.h"
66 #include "intl.h"
67 #include "tm-constrs.h"
68 #include "target-globals.h"
69 #include "builtins.h"
70 #include "tree-vector-builder.h"
71 #include "context.h"
72 #include "tree-pass.h"
73 #include "except.h"
74 #if TARGET_XCOFF
75 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
76 #endif
77 #include "case-cfn-macros.h"
78 #include "ppc-auxv.h"
79 #include "rs6000-internal.h"
80 #include "opts.h"
81
82 /* This file should be included last. */
83 #include "target-def.h"
84
85 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
86 systems will also set long double to be IEEE 128-bit. AIX and Darwin
87 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
88 those systems will not pick up this default. This needs to be after all
89 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
90 properly defined. */
91 #ifndef TARGET_IEEEQUAD_DEFAULT
92 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
93 #define TARGET_IEEEQUAD_DEFAULT 1
94 #else
95 #define TARGET_IEEEQUAD_DEFAULT 0
96 #endif
97 #endif
98
99 /* Don't enable PC-relative addressing if the target does not support it. */
100 #ifndef PCREL_SUPPORTED_BY_OS
101 #define PCREL_SUPPORTED_BY_OS 0
102 #endif
103
104 /* Support targetm.vectorize.builtin_mask_for_load. */
105 tree altivec_builtin_mask_for_load;
106
107 #ifdef USING_ELFOS_H
108 /* Counter for labels which are to be placed in .fixup. */
109 int fixuplabelno = 0;
110 #endif
111
112 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
113 int dot_symbols;
114
115 /* Specify the machine mode that pointers have. After generation of rtl, the
116 compiler makes no further distinction between pointers and any other objects
117 of this machine mode. */
118 scalar_int_mode rs6000_pmode;
119
120 #if TARGET_ELF
121 /* Note whether IEEE 128-bit floating point was passed or returned, either as
122 the __float128/_Float128 explicit type, or when long double is IEEE 128-bit
123 floating point. We changed the default C++ mangling for these types and we
124 may want to generate a weak alias of the old mangling (U10__float128) to the
125 new mangling (u9__ieee128). */
126 bool rs6000_passes_ieee128 = false;
127 #endif
128
129 /* Generate the manged name (i.e. U10__float128) used in GCC 8.1, and not the
130 name used in current releases (i.e. u9__ieee128). */
131 static bool ieee128_mangling_gcc_8_1;
132
133 /* Width in bits of a pointer. */
134 unsigned rs6000_pointer_size;
135
136 #ifdef HAVE_AS_GNU_ATTRIBUTE
137 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
138 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
139 # endif
140 /* Flag whether floating point values have been passed/returned.
141 Note that this doesn't say whether fprs are used, since the
142 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
143 should be set for soft-float values passed in gprs and ieee128
144 values passed in vsx registers. */
145 bool rs6000_passes_float = false;
146 bool rs6000_passes_long_double = false;
147 /* Flag whether vector values have been passed/returned. */
148 bool rs6000_passes_vector = false;
149 /* Flag whether small (<= 8 byte) structures have been returned. */
150 bool rs6000_returns_struct = false;
151 #endif
152
153 /* Value is TRUE if register/mode pair is acceptable. */
154 static bool rs6000_hard_regno_mode_ok_p
155 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
156
157 /* Maximum number of registers needed for a given register class and mode. */
158 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
159
160 /* How many registers are needed for a given register and mode. */
161 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
162
163 /* Map register number to register class. */
164 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
165
166 static int dbg_cost_ctrl;
167
168 /* Built in types. */
169 tree rs6000_builtin_types[RS6000_BTI_MAX];
170 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
171
172 /* Flag to say the TOC is initialized */
173 int toc_initialized, need_toc_init;
174 char toc_label_name[10];
175
176 /* Cached value of rs6000_variable_issue. This is cached in
177 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
178 static short cached_can_issue_more;
179
180 static GTY(()) section *read_only_data_section;
181 static GTY(()) section *private_data_section;
182 static GTY(()) section *tls_data_section;
183 static GTY(()) section *tls_private_data_section;
184 static GTY(()) section *read_only_private_data_section;
185 static GTY(()) section *sdata2_section;
186
187 section *toc_section = 0;
188
189 /* Describe the vector unit used for modes. */
190 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
191 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
192
193 /* Register classes for various constraints that are based on the target
194 switches. */
195 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
196
197 /* Describe the alignment of a vector. */
198 int rs6000_vector_align[NUM_MACHINE_MODES];
199
200 /* Map selected modes to types for builtins. */
201 tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
202
203 /* What modes to automatically generate reciprocal divide estimate (fre) and
204 reciprocal sqrt (frsqrte) for. */
205 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
206
207 /* Masks to determine which reciprocal esitmate instructions to generate
208 automatically. */
209 enum rs6000_recip_mask {
210 RECIP_SF_DIV = 0x001, /* Use divide estimate */
211 RECIP_DF_DIV = 0x002,
212 RECIP_V4SF_DIV = 0x004,
213 RECIP_V2DF_DIV = 0x008,
214
215 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
216 RECIP_DF_RSQRT = 0x020,
217 RECIP_V4SF_RSQRT = 0x040,
218 RECIP_V2DF_RSQRT = 0x080,
219
220 /* Various combination of flags for -mrecip=xxx. */
221 RECIP_NONE = 0,
222 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
223 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
224 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
225
226 RECIP_HIGH_PRECISION = RECIP_ALL,
227
228 /* On low precision machines like the power5, don't enable double precision
229 reciprocal square root estimate, since it isn't accurate enough. */
230 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
231 };
232
233 /* -mrecip options. */
234 static struct
235 {
236 const char *string; /* option name */
237 unsigned int mask; /* mask bits to set */
238 } recip_options[] = {
239 { "all", RECIP_ALL },
240 { "none", RECIP_NONE },
241 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
242 | RECIP_V2DF_DIV) },
243 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
244 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
245 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
246 | RECIP_V2DF_RSQRT) },
247 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
248 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
249 };
250
251 /* On PowerPC, we have a limited number of target clones that we care about
252 which means we can use an array to hold the options, rather than having more
253 elaborate data structures to identify each possible variation. Order the
254 clones from the default to the highest ISA. */
255 enum {
256 CLONE_DEFAULT = 0, /* default clone. */
257 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
258 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
259 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
260 CLONE_ISA_3_00, /* ISA 3.0 (power9). */
261 CLONE_ISA_3_1, /* ISA 3.1 (power10). */
262 CLONE_MAX
263 };
264
265 /* Map compiler ISA bits into HWCAP names. */
266 struct clone_map {
267 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
268 const char *name; /* name to use in __builtin_cpu_supports. */
269 };
270
271 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
272 { 0, "" }, /* Default options. */
273 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
274 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
275 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
276 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.0 (power9). */
277 { OPTION_MASK_POWER10, "arch_3_1" }, /* ISA 3.1 (power10). */
278 };
279
280
281 /* Newer LIBCs explicitly export this symbol to declare that they provide
282 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
283 reference to this symbol whenever we expand a CPU builtin, so that
284 we never link against an old LIBC. */
285 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
286
287 /* True if we have expanded a CPU builtin. */
288 bool cpu_builtin_p = false;
289
290 /* Pointer to function (in rs6000-c.c) that can define or undefine target
291 macros that have changed. Languages that don't support the preprocessor
292 don't link in rs6000-c.c, so we can't call it directly. */
293 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
294
295 /* Simplfy register classes into simpler classifications. We assume
296 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
297 check for standard register classes (gpr/floating/altivec/vsx) and
298 floating/vector classes (float/altivec/vsx). */
299
300 enum rs6000_reg_type {
301 NO_REG_TYPE,
302 PSEUDO_REG_TYPE,
303 GPR_REG_TYPE,
304 VSX_REG_TYPE,
305 ALTIVEC_REG_TYPE,
306 FPR_REG_TYPE,
307 SPR_REG_TYPE,
308 CR_REG_TYPE
309 };
310
311 /* Map register class to register type. */
312 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
313
314 /* First/last register type for the 'normal' register types (i.e. general
315 purpose, floating point, altivec, and VSX registers). */
316 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
317
318 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
319
320
321 /* Register classes we care about in secondary reload or go if legitimate
322 address. We only need to worry about GPR, FPR, and Altivec registers here,
323 along an ANY field that is the OR of the 3 register classes. */
324
325 enum rs6000_reload_reg_type {
326 RELOAD_REG_GPR, /* General purpose registers. */
327 RELOAD_REG_FPR, /* Traditional floating point regs. */
328 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
329 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
330 N_RELOAD_REG
331 };
332
333 /* For setting up register classes, loop through the 3 register classes mapping
334 into real registers, and skip the ANY class, which is just an OR of the
335 bits. */
336 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
337 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
338
339 /* Map reload register type to a register in the register class. */
340 struct reload_reg_map_type {
341 const char *name; /* Register class name. */
342 int reg; /* Register in the register class. */
343 };
344
345 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
346 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
347 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
348 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
349 { "Any", -1 }, /* RELOAD_REG_ANY. */
350 };
351
352 /* Mask bits for each register class, indexed per mode. Historically the
353 compiler has been more restrictive which types can do PRE_MODIFY instead of
354 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
355 typedef unsigned char addr_mask_type;
356
357 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
358 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
359 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
360 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
361 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
362 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
363 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
364 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
365
366 /* Register type masks based on the type, of valid addressing modes. */
367 struct rs6000_reg_addr {
368 enum insn_code reload_load; /* INSN to reload for loading. */
369 enum insn_code reload_store; /* INSN to reload for storing. */
370 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
371 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
372 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
373 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
374 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
375 };
376
377 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
378
379 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
380 static inline bool
381 mode_supports_pre_incdec_p (machine_mode mode)
382 {
383 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
384 != 0);
385 }
386
387 /* Helper function to say whether a mode supports PRE_MODIFY. */
388 static inline bool
389 mode_supports_pre_modify_p (machine_mode mode)
390 {
391 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
392 != 0);
393 }
394
395 /* Return true if we have D-form addressing in altivec registers. */
396 static inline bool
397 mode_supports_vmx_dform (machine_mode mode)
398 {
399 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
400 }
401
402 /* Return true if we have D-form addressing in VSX registers. This addressing
403 is more limited than normal d-form addressing in that the offset must be
404 aligned on a 16-byte boundary. */
405 static inline bool
406 mode_supports_dq_form (machine_mode mode)
407 {
408 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
409 != 0);
410 }
411
412 /* Given that there exists at least one variable that is set (produced)
413 by OUT_INSN and read (consumed) by IN_INSN, return true iff
414 IN_INSN represents one or more memory store operations and none of
415 the variables set by OUT_INSN is used by IN_INSN as the address of a
416 store operation. If either IN_INSN or OUT_INSN does not represent
417 a "single" RTL SET expression (as loosely defined by the
418 implementation of the single_set function) or a PARALLEL with only
419 SETs, CLOBBERs, and USEs inside, this function returns false.
420
421 This rs6000-specific version of store_data_bypass_p checks for
422 certain conditions that result in assertion failures (and internal
423 compiler errors) in the generic store_data_bypass_p function and
424 returns false rather than calling store_data_bypass_p if one of the
425 problematic conditions is detected. */
426
427 int
428 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
429 {
430 rtx out_set, in_set;
431 rtx out_pat, in_pat;
432 rtx out_exp, in_exp;
433 int i, j;
434
435 in_set = single_set (in_insn);
436 if (in_set)
437 {
438 if (MEM_P (SET_DEST (in_set)))
439 {
440 out_set = single_set (out_insn);
441 if (!out_set)
442 {
443 out_pat = PATTERN (out_insn);
444 if (GET_CODE (out_pat) == PARALLEL)
445 {
446 for (i = 0; i < XVECLEN (out_pat, 0); i++)
447 {
448 out_exp = XVECEXP (out_pat, 0, i);
449 if ((GET_CODE (out_exp) == CLOBBER)
450 || (GET_CODE (out_exp) == USE))
451 continue;
452 else if (GET_CODE (out_exp) != SET)
453 return false;
454 }
455 }
456 }
457 }
458 }
459 else
460 {
461 in_pat = PATTERN (in_insn);
462 if (GET_CODE (in_pat) != PARALLEL)
463 return false;
464
465 for (i = 0; i < XVECLEN (in_pat, 0); i++)
466 {
467 in_exp = XVECEXP (in_pat, 0, i);
468 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
469 continue;
470 else if (GET_CODE (in_exp) != SET)
471 return false;
472
473 if (MEM_P (SET_DEST (in_exp)))
474 {
475 out_set = single_set (out_insn);
476 if (!out_set)
477 {
478 out_pat = PATTERN (out_insn);
479 if (GET_CODE (out_pat) != PARALLEL)
480 return false;
481 for (j = 0; j < XVECLEN (out_pat, 0); j++)
482 {
483 out_exp = XVECEXP (out_pat, 0, j);
484 if ((GET_CODE (out_exp) == CLOBBER)
485 || (GET_CODE (out_exp) == USE))
486 continue;
487 else if (GET_CODE (out_exp) != SET)
488 return false;
489 }
490 }
491 }
492 }
493 }
494 return store_data_bypass_p (out_insn, in_insn);
495 }
496
497 \f
498 /* Processor costs (relative to an add) */
499
500 const struct processor_costs *rs6000_cost;
501
502 /* Instruction size costs on 32bit processors. */
503 static const
504 struct processor_costs size32_cost = {
505 COSTS_N_INSNS (1), /* mulsi */
506 COSTS_N_INSNS (1), /* mulsi_const */
507 COSTS_N_INSNS (1), /* mulsi_const9 */
508 COSTS_N_INSNS (1), /* muldi */
509 COSTS_N_INSNS (1), /* divsi */
510 COSTS_N_INSNS (1), /* divdi */
511 COSTS_N_INSNS (1), /* fp */
512 COSTS_N_INSNS (1), /* dmul */
513 COSTS_N_INSNS (1), /* sdiv */
514 COSTS_N_INSNS (1), /* ddiv */
515 32, /* cache line size */
516 0, /* l1 cache */
517 0, /* l2 cache */
518 0, /* streams */
519 0, /* SF->DF convert */
520 };
521
522 /* Instruction size costs on 64bit processors. */
523 static const
524 struct processor_costs size64_cost = {
525 COSTS_N_INSNS (1), /* mulsi */
526 COSTS_N_INSNS (1), /* mulsi_const */
527 COSTS_N_INSNS (1), /* mulsi_const9 */
528 COSTS_N_INSNS (1), /* muldi */
529 COSTS_N_INSNS (1), /* divsi */
530 COSTS_N_INSNS (1), /* divdi */
531 COSTS_N_INSNS (1), /* fp */
532 COSTS_N_INSNS (1), /* dmul */
533 COSTS_N_INSNS (1), /* sdiv */
534 COSTS_N_INSNS (1), /* ddiv */
535 128, /* cache line size */
536 0, /* l1 cache */
537 0, /* l2 cache */
538 0, /* streams */
539 0, /* SF->DF convert */
540 };
541
542 /* Instruction costs on RS64A processors. */
543 static const
544 struct processor_costs rs64a_cost = {
545 COSTS_N_INSNS (20), /* mulsi */
546 COSTS_N_INSNS (12), /* mulsi_const */
547 COSTS_N_INSNS (8), /* mulsi_const9 */
548 COSTS_N_INSNS (34), /* muldi */
549 COSTS_N_INSNS (65), /* divsi */
550 COSTS_N_INSNS (67), /* divdi */
551 COSTS_N_INSNS (4), /* fp */
552 COSTS_N_INSNS (4), /* dmul */
553 COSTS_N_INSNS (31), /* sdiv */
554 COSTS_N_INSNS (31), /* ddiv */
555 128, /* cache line size */
556 128, /* l1 cache */
557 2048, /* l2 cache */
558 1, /* streams */
559 0, /* SF->DF convert */
560 };
561
562 /* Instruction costs on MPCCORE processors. */
563 static const
564 struct processor_costs mpccore_cost = {
565 COSTS_N_INSNS (2), /* mulsi */
566 COSTS_N_INSNS (2), /* mulsi_const */
567 COSTS_N_INSNS (2), /* mulsi_const9 */
568 COSTS_N_INSNS (2), /* muldi */
569 COSTS_N_INSNS (6), /* divsi */
570 COSTS_N_INSNS (6), /* divdi */
571 COSTS_N_INSNS (4), /* fp */
572 COSTS_N_INSNS (5), /* dmul */
573 COSTS_N_INSNS (10), /* sdiv */
574 COSTS_N_INSNS (17), /* ddiv */
575 32, /* cache line size */
576 4, /* l1 cache */
577 16, /* l2 cache */
578 1, /* streams */
579 0, /* SF->DF convert */
580 };
581
582 /* Instruction costs on PPC403 processors. */
583 static const
584 struct processor_costs ppc403_cost = {
585 COSTS_N_INSNS (4), /* mulsi */
586 COSTS_N_INSNS (4), /* mulsi_const */
587 COSTS_N_INSNS (4), /* mulsi_const9 */
588 COSTS_N_INSNS (4), /* muldi */
589 COSTS_N_INSNS (33), /* divsi */
590 COSTS_N_INSNS (33), /* divdi */
591 COSTS_N_INSNS (11), /* fp */
592 COSTS_N_INSNS (11), /* dmul */
593 COSTS_N_INSNS (11), /* sdiv */
594 COSTS_N_INSNS (11), /* ddiv */
595 32, /* cache line size */
596 4, /* l1 cache */
597 16, /* l2 cache */
598 1, /* streams */
599 0, /* SF->DF convert */
600 };
601
602 /* Instruction costs on PPC405 processors. */
603 static const
604 struct processor_costs ppc405_cost = {
605 COSTS_N_INSNS (5), /* mulsi */
606 COSTS_N_INSNS (4), /* mulsi_const */
607 COSTS_N_INSNS (3), /* mulsi_const9 */
608 COSTS_N_INSNS (5), /* muldi */
609 COSTS_N_INSNS (35), /* divsi */
610 COSTS_N_INSNS (35), /* divdi */
611 COSTS_N_INSNS (11), /* fp */
612 COSTS_N_INSNS (11), /* dmul */
613 COSTS_N_INSNS (11), /* sdiv */
614 COSTS_N_INSNS (11), /* ddiv */
615 32, /* cache line size */
616 16, /* l1 cache */
617 128, /* l2 cache */
618 1, /* streams */
619 0, /* SF->DF convert */
620 };
621
622 /* Instruction costs on PPC440 processors. */
623 static const
624 struct processor_costs ppc440_cost = {
625 COSTS_N_INSNS (3), /* mulsi */
626 COSTS_N_INSNS (2), /* mulsi_const */
627 COSTS_N_INSNS (2), /* mulsi_const9 */
628 COSTS_N_INSNS (3), /* muldi */
629 COSTS_N_INSNS (34), /* divsi */
630 COSTS_N_INSNS (34), /* divdi */
631 COSTS_N_INSNS (5), /* fp */
632 COSTS_N_INSNS (5), /* dmul */
633 COSTS_N_INSNS (19), /* sdiv */
634 COSTS_N_INSNS (33), /* ddiv */
635 32, /* cache line size */
636 32, /* l1 cache */
637 256, /* l2 cache */
638 1, /* streams */
639 0, /* SF->DF convert */
640 };
641
642 /* Instruction costs on PPC476 processors. */
643 static const
644 struct processor_costs ppc476_cost = {
645 COSTS_N_INSNS (4), /* mulsi */
646 COSTS_N_INSNS (4), /* mulsi_const */
647 COSTS_N_INSNS (4), /* mulsi_const9 */
648 COSTS_N_INSNS (4), /* muldi */
649 COSTS_N_INSNS (11), /* divsi */
650 COSTS_N_INSNS (11), /* divdi */
651 COSTS_N_INSNS (6), /* fp */
652 COSTS_N_INSNS (6), /* dmul */
653 COSTS_N_INSNS (19), /* sdiv */
654 COSTS_N_INSNS (33), /* ddiv */
655 32, /* l1 cache line size */
656 32, /* l1 cache */
657 512, /* l2 cache */
658 1, /* streams */
659 0, /* SF->DF convert */
660 };
661
662 /* Instruction costs on PPC601 processors. */
663 static const
664 struct processor_costs ppc601_cost = {
665 COSTS_N_INSNS (5), /* mulsi */
666 COSTS_N_INSNS (5), /* mulsi_const */
667 COSTS_N_INSNS (5), /* mulsi_const9 */
668 COSTS_N_INSNS (5), /* muldi */
669 COSTS_N_INSNS (36), /* divsi */
670 COSTS_N_INSNS (36), /* divdi */
671 COSTS_N_INSNS (4), /* fp */
672 COSTS_N_INSNS (5), /* dmul */
673 COSTS_N_INSNS (17), /* sdiv */
674 COSTS_N_INSNS (31), /* ddiv */
675 32, /* cache line size */
676 32, /* l1 cache */
677 256, /* l2 cache */
678 1, /* streams */
679 0, /* SF->DF convert */
680 };
681
682 /* Instruction costs on PPC603 processors. */
683 static const
684 struct processor_costs ppc603_cost = {
685 COSTS_N_INSNS (5), /* mulsi */
686 COSTS_N_INSNS (3), /* mulsi_const */
687 COSTS_N_INSNS (2), /* mulsi_const9 */
688 COSTS_N_INSNS (5), /* muldi */
689 COSTS_N_INSNS (37), /* divsi */
690 COSTS_N_INSNS (37), /* divdi */
691 COSTS_N_INSNS (3), /* fp */
692 COSTS_N_INSNS (4), /* dmul */
693 COSTS_N_INSNS (18), /* sdiv */
694 COSTS_N_INSNS (33), /* ddiv */
695 32, /* cache line size */
696 8, /* l1 cache */
697 64, /* l2 cache */
698 1, /* streams */
699 0, /* SF->DF convert */
700 };
701
702 /* Instruction costs on PPC604 processors. */
703 static const
704 struct processor_costs ppc604_cost = {
705 COSTS_N_INSNS (4), /* mulsi */
706 COSTS_N_INSNS (4), /* mulsi_const */
707 COSTS_N_INSNS (4), /* mulsi_const9 */
708 COSTS_N_INSNS (4), /* muldi */
709 COSTS_N_INSNS (20), /* divsi */
710 COSTS_N_INSNS (20), /* divdi */
711 COSTS_N_INSNS (3), /* fp */
712 COSTS_N_INSNS (3), /* dmul */
713 COSTS_N_INSNS (18), /* sdiv */
714 COSTS_N_INSNS (32), /* ddiv */
715 32, /* cache line size */
716 16, /* l1 cache */
717 512, /* l2 cache */
718 1, /* streams */
719 0, /* SF->DF convert */
720 };
721
722 /* Instruction costs on PPC604e processors. */
723 static const
724 struct processor_costs ppc604e_cost = {
725 COSTS_N_INSNS (2), /* mulsi */
726 COSTS_N_INSNS (2), /* mulsi_const */
727 COSTS_N_INSNS (2), /* mulsi_const9 */
728 COSTS_N_INSNS (2), /* muldi */
729 COSTS_N_INSNS (20), /* divsi */
730 COSTS_N_INSNS (20), /* divdi */
731 COSTS_N_INSNS (3), /* fp */
732 COSTS_N_INSNS (3), /* dmul */
733 COSTS_N_INSNS (18), /* sdiv */
734 COSTS_N_INSNS (32), /* ddiv */
735 32, /* cache line size */
736 32, /* l1 cache */
737 1024, /* l2 cache */
738 1, /* streams */
739 0, /* SF->DF convert */
740 };
741
742 /* Instruction costs on PPC620 processors. */
743 static const
744 struct processor_costs ppc620_cost = {
745 COSTS_N_INSNS (5), /* mulsi */
746 COSTS_N_INSNS (4), /* mulsi_const */
747 COSTS_N_INSNS (3), /* mulsi_const9 */
748 COSTS_N_INSNS (7), /* muldi */
749 COSTS_N_INSNS (21), /* divsi */
750 COSTS_N_INSNS (37), /* divdi */
751 COSTS_N_INSNS (3), /* fp */
752 COSTS_N_INSNS (3), /* dmul */
753 COSTS_N_INSNS (18), /* sdiv */
754 COSTS_N_INSNS (32), /* ddiv */
755 128, /* cache line size */
756 32, /* l1 cache */
757 1024, /* l2 cache */
758 1, /* streams */
759 0, /* SF->DF convert */
760 };
761
762 /* Instruction costs on PPC630 processors. */
763 static const
764 struct processor_costs ppc630_cost = {
765 COSTS_N_INSNS (5), /* mulsi */
766 COSTS_N_INSNS (4), /* mulsi_const */
767 COSTS_N_INSNS (3), /* mulsi_const9 */
768 COSTS_N_INSNS (7), /* muldi */
769 COSTS_N_INSNS (21), /* divsi */
770 COSTS_N_INSNS (37), /* divdi */
771 COSTS_N_INSNS (3), /* fp */
772 COSTS_N_INSNS (3), /* dmul */
773 COSTS_N_INSNS (17), /* sdiv */
774 COSTS_N_INSNS (21), /* ddiv */
775 128, /* cache line size */
776 64, /* l1 cache */
777 1024, /* l2 cache */
778 1, /* streams */
779 0, /* SF->DF convert */
780 };
781
782 /* Instruction costs on Cell processor. */
783 /* COSTS_N_INSNS (1) ~ one add. */
784 static const
785 struct processor_costs ppccell_cost = {
786 COSTS_N_INSNS (9/2)+2, /* mulsi */
787 COSTS_N_INSNS (6/2), /* mulsi_const */
788 COSTS_N_INSNS (6/2), /* mulsi_const9 */
789 COSTS_N_INSNS (15/2)+2, /* muldi */
790 COSTS_N_INSNS (38/2), /* divsi */
791 COSTS_N_INSNS (70/2), /* divdi */
792 COSTS_N_INSNS (10/2), /* fp */
793 COSTS_N_INSNS (10/2), /* dmul */
794 COSTS_N_INSNS (74/2), /* sdiv */
795 COSTS_N_INSNS (74/2), /* ddiv */
796 128, /* cache line size */
797 32, /* l1 cache */
798 512, /* l2 cache */
799 6, /* streams */
800 0, /* SF->DF convert */
801 };
802
803 /* Instruction costs on PPC750 and PPC7400 processors. */
804 static const
805 struct processor_costs ppc750_cost = {
806 COSTS_N_INSNS (5), /* mulsi */
807 COSTS_N_INSNS (3), /* mulsi_const */
808 COSTS_N_INSNS (2), /* mulsi_const9 */
809 COSTS_N_INSNS (5), /* muldi */
810 COSTS_N_INSNS (17), /* divsi */
811 COSTS_N_INSNS (17), /* divdi */
812 COSTS_N_INSNS (3), /* fp */
813 COSTS_N_INSNS (3), /* dmul */
814 COSTS_N_INSNS (17), /* sdiv */
815 COSTS_N_INSNS (31), /* ddiv */
816 32, /* cache line size */
817 32, /* l1 cache */
818 512, /* l2 cache */
819 1, /* streams */
820 0, /* SF->DF convert */
821 };
822
823 /* Instruction costs on PPC7450 processors. */
824 static const
825 struct processor_costs ppc7450_cost = {
826 COSTS_N_INSNS (4), /* mulsi */
827 COSTS_N_INSNS (3), /* mulsi_const */
828 COSTS_N_INSNS (3), /* mulsi_const9 */
829 COSTS_N_INSNS (4), /* muldi */
830 COSTS_N_INSNS (23), /* divsi */
831 COSTS_N_INSNS (23), /* divdi */
832 COSTS_N_INSNS (5), /* fp */
833 COSTS_N_INSNS (5), /* dmul */
834 COSTS_N_INSNS (21), /* sdiv */
835 COSTS_N_INSNS (35), /* ddiv */
836 32, /* cache line size */
837 32, /* l1 cache */
838 1024, /* l2 cache */
839 1, /* streams */
840 0, /* SF->DF convert */
841 };
842
843 /* Instruction costs on PPC8540 processors. */
844 static const
845 struct processor_costs ppc8540_cost = {
846 COSTS_N_INSNS (4), /* mulsi */
847 COSTS_N_INSNS (4), /* mulsi_const */
848 COSTS_N_INSNS (4), /* mulsi_const9 */
849 COSTS_N_INSNS (4), /* muldi */
850 COSTS_N_INSNS (19), /* divsi */
851 COSTS_N_INSNS (19), /* divdi */
852 COSTS_N_INSNS (4), /* fp */
853 COSTS_N_INSNS (4), /* dmul */
854 COSTS_N_INSNS (29), /* sdiv */
855 COSTS_N_INSNS (29), /* ddiv */
856 32, /* cache line size */
857 32, /* l1 cache */
858 256, /* l2 cache */
859 1, /* prefetch streams /*/
860 0, /* SF->DF convert */
861 };
862
863 /* Instruction costs on E300C2 and E300C3 cores. */
864 static const
865 struct processor_costs ppce300c2c3_cost = {
866 COSTS_N_INSNS (4), /* mulsi */
867 COSTS_N_INSNS (4), /* mulsi_const */
868 COSTS_N_INSNS (4), /* mulsi_const9 */
869 COSTS_N_INSNS (4), /* muldi */
870 COSTS_N_INSNS (19), /* divsi */
871 COSTS_N_INSNS (19), /* divdi */
872 COSTS_N_INSNS (3), /* fp */
873 COSTS_N_INSNS (4), /* dmul */
874 COSTS_N_INSNS (18), /* sdiv */
875 COSTS_N_INSNS (33), /* ddiv */
876 32,
877 16, /* l1 cache */
878 16, /* l2 cache */
879 1, /* prefetch streams /*/
880 0, /* SF->DF convert */
881 };
882
883 /* Instruction costs on PPCE500MC processors. */
884 static const
885 struct processor_costs ppce500mc_cost = {
886 COSTS_N_INSNS (4), /* mulsi */
887 COSTS_N_INSNS (4), /* mulsi_const */
888 COSTS_N_INSNS (4), /* mulsi_const9 */
889 COSTS_N_INSNS (4), /* muldi */
890 COSTS_N_INSNS (14), /* divsi */
891 COSTS_N_INSNS (14), /* divdi */
892 COSTS_N_INSNS (8), /* fp */
893 COSTS_N_INSNS (10), /* dmul */
894 COSTS_N_INSNS (36), /* sdiv */
895 COSTS_N_INSNS (66), /* ddiv */
896 64, /* cache line size */
897 32, /* l1 cache */
898 128, /* l2 cache */
899 1, /* prefetch streams /*/
900 0, /* SF->DF convert */
901 };
902
903 /* Instruction costs on PPCE500MC64 processors. */
904 static const
905 struct processor_costs ppce500mc64_cost = {
906 COSTS_N_INSNS (4), /* mulsi */
907 COSTS_N_INSNS (4), /* mulsi_const */
908 COSTS_N_INSNS (4), /* mulsi_const9 */
909 COSTS_N_INSNS (4), /* muldi */
910 COSTS_N_INSNS (14), /* divsi */
911 COSTS_N_INSNS (14), /* divdi */
912 COSTS_N_INSNS (4), /* fp */
913 COSTS_N_INSNS (10), /* dmul */
914 COSTS_N_INSNS (36), /* sdiv */
915 COSTS_N_INSNS (66), /* ddiv */
916 64, /* cache line size */
917 32, /* l1 cache */
918 128, /* l2 cache */
919 1, /* prefetch streams /*/
920 0, /* SF->DF convert */
921 };
922
923 /* Instruction costs on PPCE5500 processors. */
924 static const
925 struct processor_costs ppce5500_cost = {
926 COSTS_N_INSNS (5), /* mulsi */
927 COSTS_N_INSNS (5), /* mulsi_const */
928 COSTS_N_INSNS (4), /* mulsi_const9 */
929 COSTS_N_INSNS (5), /* muldi */
930 COSTS_N_INSNS (14), /* divsi */
931 COSTS_N_INSNS (14), /* divdi */
932 COSTS_N_INSNS (7), /* fp */
933 COSTS_N_INSNS (10), /* dmul */
934 COSTS_N_INSNS (36), /* sdiv */
935 COSTS_N_INSNS (66), /* ddiv */
936 64, /* cache line size */
937 32, /* l1 cache */
938 128, /* l2 cache */
939 1, /* prefetch streams /*/
940 0, /* SF->DF convert */
941 };
942
943 /* Instruction costs on PPCE6500 processors. */
944 static const
945 struct processor_costs ppce6500_cost = {
946 COSTS_N_INSNS (5), /* mulsi */
947 COSTS_N_INSNS (5), /* mulsi_const */
948 COSTS_N_INSNS (4), /* mulsi_const9 */
949 COSTS_N_INSNS (5), /* muldi */
950 COSTS_N_INSNS (14), /* divsi */
951 COSTS_N_INSNS (14), /* divdi */
952 COSTS_N_INSNS (7), /* fp */
953 COSTS_N_INSNS (10), /* dmul */
954 COSTS_N_INSNS (36), /* sdiv */
955 COSTS_N_INSNS (66), /* ddiv */
956 64, /* cache line size */
957 32, /* l1 cache */
958 128, /* l2 cache */
959 1, /* prefetch streams /*/
960 0, /* SF->DF convert */
961 };
962
963 /* Instruction costs on AppliedMicro Titan processors. */
964 static const
965 struct processor_costs titan_cost = {
966 COSTS_N_INSNS (5), /* mulsi */
967 COSTS_N_INSNS (5), /* mulsi_const */
968 COSTS_N_INSNS (5), /* mulsi_const9 */
969 COSTS_N_INSNS (5), /* muldi */
970 COSTS_N_INSNS (18), /* divsi */
971 COSTS_N_INSNS (18), /* divdi */
972 COSTS_N_INSNS (10), /* fp */
973 COSTS_N_INSNS (10), /* dmul */
974 COSTS_N_INSNS (46), /* sdiv */
975 COSTS_N_INSNS (72), /* ddiv */
976 32, /* cache line size */
977 32, /* l1 cache */
978 512, /* l2 cache */
979 1, /* prefetch streams /*/
980 0, /* SF->DF convert */
981 };
982
983 /* Instruction costs on POWER4 and POWER5 processors. */
984 static const
985 struct processor_costs power4_cost = {
986 COSTS_N_INSNS (3), /* mulsi */
987 COSTS_N_INSNS (2), /* mulsi_const */
988 COSTS_N_INSNS (2), /* mulsi_const9 */
989 COSTS_N_INSNS (4), /* muldi */
990 COSTS_N_INSNS (18), /* divsi */
991 COSTS_N_INSNS (34), /* divdi */
992 COSTS_N_INSNS (3), /* fp */
993 COSTS_N_INSNS (3), /* dmul */
994 COSTS_N_INSNS (17), /* sdiv */
995 COSTS_N_INSNS (17), /* ddiv */
996 128, /* cache line size */
997 32, /* l1 cache */
998 1024, /* l2 cache */
999 8, /* prefetch streams /*/
1000 0, /* SF->DF convert */
1001 };
1002
1003 /* Instruction costs on POWER6 processors. */
1004 static const
1005 struct processor_costs power6_cost = {
1006 COSTS_N_INSNS (8), /* mulsi */
1007 COSTS_N_INSNS (8), /* mulsi_const */
1008 COSTS_N_INSNS (8), /* mulsi_const9 */
1009 COSTS_N_INSNS (8), /* muldi */
1010 COSTS_N_INSNS (22), /* divsi */
1011 COSTS_N_INSNS (28), /* divdi */
1012 COSTS_N_INSNS (3), /* fp */
1013 COSTS_N_INSNS (3), /* dmul */
1014 COSTS_N_INSNS (13), /* sdiv */
1015 COSTS_N_INSNS (16), /* ddiv */
1016 128, /* cache line size */
1017 64, /* l1 cache */
1018 2048, /* l2 cache */
1019 16, /* prefetch streams */
1020 0, /* SF->DF convert */
1021 };
1022
1023 /* Instruction costs on POWER7 processors. */
1024 static const
1025 struct processor_costs power7_cost = {
1026 COSTS_N_INSNS (2), /* mulsi */
1027 COSTS_N_INSNS (2), /* mulsi_const */
1028 COSTS_N_INSNS (2), /* mulsi_const9 */
1029 COSTS_N_INSNS (2), /* muldi */
1030 COSTS_N_INSNS (18), /* divsi */
1031 COSTS_N_INSNS (34), /* divdi */
1032 COSTS_N_INSNS (3), /* fp */
1033 COSTS_N_INSNS (3), /* dmul */
1034 COSTS_N_INSNS (13), /* sdiv */
1035 COSTS_N_INSNS (16), /* ddiv */
1036 128, /* cache line size */
1037 32, /* l1 cache */
1038 256, /* l2 cache */
1039 12, /* prefetch streams */
1040 COSTS_N_INSNS (3), /* SF->DF convert */
1041 };
1042
1043 /* Instruction costs on POWER8 processors. */
1044 static const
1045 struct processor_costs power8_cost = {
1046 COSTS_N_INSNS (3), /* mulsi */
1047 COSTS_N_INSNS (3), /* mulsi_const */
1048 COSTS_N_INSNS (3), /* mulsi_const9 */
1049 COSTS_N_INSNS (3), /* muldi */
1050 COSTS_N_INSNS (19), /* divsi */
1051 COSTS_N_INSNS (35), /* divdi */
1052 COSTS_N_INSNS (3), /* fp */
1053 COSTS_N_INSNS (3), /* dmul */
1054 COSTS_N_INSNS (14), /* sdiv */
1055 COSTS_N_INSNS (17), /* ddiv */
1056 128, /* cache line size */
1057 32, /* l1 cache */
1058 256, /* l2 cache */
1059 12, /* prefetch streams */
1060 COSTS_N_INSNS (3), /* SF->DF convert */
1061 };
1062
1063 /* Instruction costs on POWER9 processors. */
1064 static const
1065 struct processor_costs power9_cost = {
1066 COSTS_N_INSNS (3), /* mulsi */
1067 COSTS_N_INSNS (3), /* mulsi_const */
1068 COSTS_N_INSNS (3), /* mulsi_const9 */
1069 COSTS_N_INSNS (3), /* muldi */
1070 COSTS_N_INSNS (8), /* divsi */
1071 COSTS_N_INSNS (12), /* divdi */
1072 COSTS_N_INSNS (3), /* fp */
1073 COSTS_N_INSNS (3), /* dmul */
1074 COSTS_N_INSNS (13), /* sdiv */
1075 COSTS_N_INSNS (18), /* ddiv */
1076 128, /* cache line size */
1077 32, /* l1 cache */
1078 512, /* l2 cache */
1079 8, /* prefetch streams */
1080 COSTS_N_INSNS (3), /* SF->DF convert */
1081 };
1082
1083 /* Instruction costs on POWER A2 processors. */
1084 static const
1085 struct processor_costs ppca2_cost = {
1086 COSTS_N_INSNS (16), /* mulsi */
1087 COSTS_N_INSNS (16), /* mulsi_const */
1088 COSTS_N_INSNS (16), /* mulsi_const9 */
1089 COSTS_N_INSNS (16), /* muldi */
1090 COSTS_N_INSNS (22), /* divsi */
1091 COSTS_N_INSNS (28), /* divdi */
1092 COSTS_N_INSNS (3), /* fp */
1093 COSTS_N_INSNS (3), /* dmul */
1094 COSTS_N_INSNS (59), /* sdiv */
1095 COSTS_N_INSNS (72), /* ddiv */
1096 64,
1097 16, /* l1 cache */
1098 2048, /* l2 cache */
1099 16, /* prefetch streams */
1100 0, /* SF->DF convert */
1101 };
1102
1103 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1104 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1105
1106 \f
1107 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1108 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1109 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1110 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1111 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1112 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1113 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1114 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1115 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1116 bool);
1117 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1118 unsigned int);
1119 static bool is_microcoded_insn (rtx_insn *);
1120 static bool is_nonpipeline_insn (rtx_insn *);
1121 static bool is_cracked_insn (rtx_insn *);
1122 static bool is_load_insn (rtx, rtx *);
1123 static bool is_store_insn (rtx, rtx *);
1124 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1125 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1126 static bool insn_must_be_first_in_group (rtx_insn *);
1127 static bool insn_must_be_last_in_group (rtx_insn *);
1128 int easy_vector_constant (rtx, machine_mode);
1129 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1130 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1131 #if TARGET_MACHO
1132 static tree get_prev_label (tree);
1133 #endif
1134 static bool rs6000_mode_dependent_address (const_rtx);
1135 static bool rs6000_debug_mode_dependent_address (const_rtx);
1136 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1137 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1138 machine_mode, rtx);
1139 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1140 machine_mode,
1141 rtx);
1142 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1143 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1144 enum reg_class);
1145 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1146 reg_class_t,
1147 reg_class_t);
1148 static bool rs6000_debug_can_change_mode_class (machine_mode,
1149 machine_mode,
1150 reg_class_t);
1151
1152 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1153 = rs6000_mode_dependent_address;
1154
1155 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1156 machine_mode, rtx)
1157 = rs6000_secondary_reload_class;
1158
1159 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1160 = rs6000_preferred_reload_class;
1161
1162 const int INSN_NOT_AVAILABLE = -1;
1163
1164 static void rs6000_print_isa_options (FILE *, int, const char *,
1165 HOST_WIDE_INT);
1166 static void rs6000_print_builtin_options (FILE *, int, const char *,
1167 HOST_WIDE_INT);
1168 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1169
1170 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1171 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1172 enum rs6000_reg_type,
1173 machine_mode,
1174 secondary_reload_info *,
1175 bool);
1176 static enum non_prefixed_form reg_to_non_prefixed (rtx reg, machine_mode mode);
1177 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1178
1179 /* Hash table stuff for keeping track of TOC entries. */
1180
1181 struct GTY((for_user)) toc_hash_struct
1182 {
1183 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1184 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1185 rtx key;
1186 machine_mode key_mode;
1187 int labelno;
1188 };
1189
1190 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1191 {
1192 static hashval_t hash (toc_hash_struct *);
1193 static bool equal (toc_hash_struct *, toc_hash_struct *);
1194 };
1195
1196 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1197
1198
1199 \f
1200 /* Default register names. */
1201 char rs6000_reg_names[][8] =
1202 {
1203 /* GPRs */
1204 "0", "1", "2", "3", "4", "5", "6", "7",
1205 "8", "9", "10", "11", "12", "13", "14", "15",
1206 "16", "17", "18", "19", "20", "21", "22", "23",
1207 "24", "25", "26", "27", "28", "29", "30", "31",
1208 /* FPRs */
1209 "0", "1", "2", "3", "4", "5", "6", "7",
1210 "8", "9", "10", "11", "12", "13", "14", "15",
1211 "16", "17", "18", "19", "20", "21", "22", "23",
1212 "24", "25", "26", "27", "28", "29", "30", "31",
1213 /* VRs */
1214 "0", "1", "2", "3", "4", "5", "6", "7",
1215 "8", "9", "10", "11", "12", "13", "14", "15",
1216 "16", "17", "18", "19", "20", "21", "22", "23",
1217 "24", "25", "26", "27", "28", "29", "30", "31",
1218 /* lr ctr ca ap */
1219 "lr", "ctr", "ca", "ap",
1220 /* cr0..cr7 */
1221 "0", "1", "2", "3", "4", "5", "6", "7",
1222 /* vrsave vscr sfp */
1223 "vrsave", "vscr", "sfp",
1224 };
1225
1226 #ifdef TARGET_REGNAMES
1227 static const char alt_reg_names[][8] =
1228 {
1229 /* GPRs */
1230 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1231 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1232 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1233 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1234 /* FPRs */
1235 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1236 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1237 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1238 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1239 /* VRs */
1240 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1241 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1242 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1243 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1244 /* lr ctr ca ap */
1245 "lr", "ctr", "ca", "ap",
1246 /* cr0..cr7 */
1247 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1248 /* vrsave vscr sfp */
1249 "vrsave", "vscr", "sfp",
1250 };
1251 #endif
1252
1253 /* Table of valid machine attributes. */
1254
1255 static const struct attribute_spec rs6000_attribute_table[] =
1256 {
1257 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1258 affects_type_identity, handler, exclude } */
1259 { "altivec", 1, 1, false, true, false, false,
1260 rs6000_handle_altivec_attribute, NULL },
1261 { "longcall", 0, 0, false, true, true, false,
1262 rs6000_handle_longcall_attribute, NULL },
1263 { "shortcall", 0, 0, false, true, true, false,
1264 rs6000_handle_longcall_attribute, NULL },
1265 { "ms_struct", 0, 0, false, false, false, false,
1266 rs6000_handle_struct_attribute, NULL },
1267 { "gcc_struct", 0, 0, false, false, false, false,
1268 rs6000_handle_struct_attribute, NULL },
1269 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1270 SUBTARGET_ATTRIBUTE_TABLE,
1271 #endif
1272 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1273 };
1274 \f
1275 #ifndef TARGET_PROFILE_KERNEL
1276 #define TARGET_PROFILE_KERNEL 0
1277 #endif
1278 \f
1279 /* Initialize the GCC target structure. */
1280 #undef TARGET_ATTRIBUTE_TABLE
1281 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1282 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1283 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1284 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1285 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1286
1287 #undef TARGET_ASM_ALIGNED_DI_OP
1288 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1289
1290 /* Default unaligned ops are only provided for ELF. Find the ops needed
1291 for non-ELF systems. */
1292 #ifndef OBJECT_FORMAT_ELF
1293 #if TARGET_XCOFF
1294 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1295 64-bit targets. */
1296 #undef TARGET_ASM_UNALIGNED_HI_OP
1297 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1298 #undef TARGET_ASM_UNALIGNED_SI_OP
1299 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1300 #undef TARGET_ASM_UNALIGNED_DI_OP
1301 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1302 #else
1303 /* For Darwin. */
1304 #undef TARGET_ASM_UNALIGNED_HI_OP
1305 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1306 #undef TARGET_ASM_UNALIGNED_SI_OP
1307 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1308 #undef TARGET_ASM_UNALIGNED_DI_OP
1309 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1310 #undef TARGET_ASM_ALIGNED_DI_OP
1311 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1312 #endif
1313 #endif
1314
1315 /* This hook deals with fixups for relocatable code and DI-mode objects
1316 in 64-bit code. */
1317 #undef TARGET_ASM_INTEGER
1318 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1319
1320 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1321 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1322 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1323 #endif
1324
1325 #undef TARGET_SET_UP_BY_PROLOGUE
1326 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1327
1328 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1329 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1330 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1331 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1332 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1333 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1334 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1335 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1336 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1337 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1338 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1339 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1340
1341 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1342 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1343
1344 #undef TARGET_INTERNAL_ARG_POINTER
1345 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1346
1347 #undef TARGET_HAVE_TLS
1348 #define TARGET_HAVE_TLS HAVE_AS_TLS
1349
1350 #undef TARGET_CANNOT_FORCE_CONST_MEM
1351 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1352
1353 #undef TARGET_DELEGITIMIZE_ADDRESS
1354 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1355
1356 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1357 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1358
1359 #undef TARGET_LEGITIMATE_COMBINED_INSN
1360 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1361
1362 #undef TARGET_ASM_FUNCTION_PROLOGUE
1363 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1364 #undef TARGET_ASM_FUNCTION_EPILOGUE
1365 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1366
1367 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1368 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1369
1370 #undef TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC
1371 #define TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC rs6000_gen_pic_addr_diff_vec
1372
1373 #undef TARGET_LEGITIMIZE_ADDRESS
1374 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1375
1376 #undef TARGET_SCHED_VARIABLE_ISSUE
1377 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1378
1379 #undef TARGET_SCHED_ISSUE_RATE
1380 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1381 #undef TARGET_SCHED_ADJUST_COST
1382 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1383 #undef TARGET_SCHED_ADJUST_PRIORITY
1384 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1385 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1386 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1387 #undef TARGET_SCHED_INIT
1388 #define TARGET_SCHED_INIT rs6000_sched_init
1389 #undef TARGET_SCHED_FINISH
1390 #define TARGET_SCHED_FINISH rs6000_sched_finish
1391 #undef TARGET_SCHED_REORDER
1392 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1393 #undef TARGET_SCHED_REORDER2
1394 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1395
1396 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1397 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1398
1399 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1400 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1401
1402 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1403 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1404 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1405 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1406 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1407 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1408 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1409 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1410
1411 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1412 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1413
1414 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1415 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1416 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1417 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1418 rs6000_builtin_support_vector_misalignment
1419 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1420 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1421 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1422 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1423 rs6000_builtin_vectorization_cost
1424 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1425 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1426 rs6000_preferred_simd_mode
1427 #undef TARGET_VECTORIZE_INIT_COST
1428 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1429 #undef TARGET_VECTORIZE_ADD_STMT_COST
1430 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1431 #undef TARGET_VECTORIZE_FINISH_COST
1432 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1433 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1434 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1435
1436 #undef TARGET_LOOP_UNROLL_ADJUST
1437 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1438
1439 #undef TARGET_INIT_BUILTINS
1440 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1441 #undef TARGET_BUILTIN_DECL
1442 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1443
1444 #undef TARGET_FOLD_BUILTIN
1445 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1446 #undef TARGET_GIMPLE_FOLD_BUILTIN
1447 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1448
1449 #undef TARGET_EXPAND_BUILTIN
1450 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1451
1452 #undef TARGET_MANGLE_TYPE
1453 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1454
1455 #undef TARGET_INIT_LIBFUNCS
1456 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1457
1458 #if TARGET_MACHO
1459 #undef TARGET_BINDS_LOCAL_P
1460 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1461 #endif
1462
1463 #undef TARGET_MS_BITFIELD_LAYOUT_P
1464 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1465
1466 #undef TARGET_ASM_OUTPUT_MI_THUNK
1467 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1468
1469 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1470 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1471
1472 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1473 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1474
1475 #undef TARGET_REGISTER_MOVE_COST
1476 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1477 #undef TARGET_MEMORY_MOVE_COST
1478 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1479 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1480 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1481 rs6000_ira_change_pseudo_allocno_class
1482 #undef TARGET_CANNOT_COPY_INSN_P
1483 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1484 #undef TARGET_RTX_COSTS
1485 #define TARGET_RTX_COSTS rs6000_rtx_costs
1486 #undef TARGET_ADDRESS_COST
1487 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1488 #undef TARGET_INSN_COST
1489 #define TARGET_INSN_COST rs6000_insn_cost
1490
1491 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1492 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1493
1494 #undef TARGET_PROMOTE_FUNCTION_MODE
1495 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1496
1497 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
1498 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE rs6000_override_options_after_change
1499
1500 #undef TARGET_RETURN_IN_MEMORY
1501 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1502
1503 #undef TARGET_RETURN_IN_MSB
1504 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1505
1506 #undef TARGET_SETUP_INCOMING_VARARGS
1507 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1508
1509 /* Always strict argument naming on rs6000. */
1510 #undef TARGET_STRICT_ARGUMENT_NAMING
1511 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1512 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1513 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1514 #undef TARGET_SPLIT_COMPLEX_ARG
1515 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1516 #undef TARGET_MUST_PASS_IN_STACK
1517 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1518 #undef TARGET_PASS_BY_REFERENCE
1519 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1520 #undef TARGET_ARG_PARTIAL_BYTES
1521 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1522 #undef TARGET_FUNCTION_ARG_ADVANCE
1523 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1524 #undef TARGET_FUNCTION_ARG
1525 #define TARGET_FUNCTION_ARG rs6000_function_arg
1526 #undef TARGET_FUNCTION_ARG_PADDING
1527 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1528 #undef TARGET_FUNCTION_ARG_BOUNDARY
1529 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1530
1531 #undef TARGET_BUILD_BUILTIN_VA_LIST
1532 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1533
1534 #undef TARGET_EXPAND_BUILTIN_VA_START
1535 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1536
1537 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1538 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1539
1540 #undef TARGET_EH_RETURN_FILTER_MODE
1541 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1542
1543 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1544 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1545
1546 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1547 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1548
1549 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1550 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1551
1552 #undef TARGET_FLOATN_MODE
1553 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1554
1555 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1556 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1557
1558 #undef TARGET_MD_ASM_ADJUST
1559 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1560
1561 #undef TARGET_OPTION_OVERRIDE
1562 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1563
1564 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1565 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1566 rs6000_builtin_vectorized_function
1567
1568 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1569 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1570 rs6000_builtin_md_vectorized_function
1571
1572 #undef TARGET_STACK_PROTECT_GUARD
1573 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1574
1575 #if !TARGET_MACHO
1576 #undef TARGET_STACK_PROTECT_FAIL
1577 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1578 #endif
1579
1580 #ifdef HAVE_AS_TLS
1581 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1582 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1583 #endif
1584
1585 /* Use a 32-bit anchor range. This leads to sequences like:
1586
1587 addis tmp,anchor,high
1588 add dest,tmp,low
1589
1590 where tmp itself acts as an anchor, and can be shared between
1591 accesses to the same 64k page. */
1592 #undef TARGET_MIN_ANCHOR_OFFSET
1593 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1594 #undef TARGET_MAX_ANCHOR_OFFSET
1595 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1596 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1597 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1598 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1599 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1600
1601 #undef TARGET_BUILTIN_RECIPROCAL
1602 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1603
1604 #undef TARGET_SECONDARY_RELOAD
1605 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1606 #undef TARGET_SECONDARY_MEMORY_NEEDED
1607 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1608 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1609 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1610
1611 #undef TARGET_LEGITIMATE_ADDRESS_P
1612 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1613
1614 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1615 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1616
1617 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1618 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1619
1620 #undef TARGET_CAN_ELIMINATE
1621 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1622
1623 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1624 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1625
1626 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1627 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1628
1629 #undef TARGET_TRAMPOLINE_INIT
1630 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1631
1632 #undef TARGET_FUNCTION_VALUE
1633 #define TARGET_FUNCTION_VALUE rs6000_function_value
1634
1635 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1636 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1637
1638 #undef TARGET_OPTION_SAVE
1639 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1640
1641 #undef TARGET_OPTION_RESTORE
1642 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1643
1644 #undef TARGET_OPTION_PRINT
1645 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1646
1647 #undef TARGET_CAN_INLINE_P
1648 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1649
1650 #undef TARGET_SET_CURRENT_FUNCTION
1651 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1652
1653 #undef TARGET_LEGITIMATE_CONSTANT_P
1654 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1655
1656 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1657 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1658
1659 #undef TARGET_CAN_USE_DOLOOP_P
1660 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1661
1662 #undef TARGET_PREDICT_DOLOOP_P
1663 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1664
1665 #undef TARGET_HAVE_COUNT_REG_DECR_P
1666 #define TARGET_HAVE_COUNT_REG_DECR_P true
1667
1668 /* 1000000000 is infinite cost in IVOPTs. */
1669 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1670 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1671
1672 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1673 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1674
1675 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1676 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1677
1678 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1679 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1680 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1681 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1682 #undef TARGET_UNWIND_WORD_MODE
1683 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1684
1685 #undef TARGET_OFFLOAD_OPTIONS
1686 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1687
1688 #undef TARGET_C_MODE_FOR_SUFFIX
1689 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1690
1691 #undef TARGET_INVALID_BINARY_OP
1692 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1693
1694 #undef TARGET_OPTAB_SUPPORTED_P
1695 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1696
1697 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1698 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1699
1700 #undef TARGET_COMPARE_VERSION_PRIORITY
1701 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1702
1703 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1704 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1705 rs6000_generate_version_dispatcher_body
1706
1707 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1708 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1709 rs6000_get_function_versions_dispatcher
1710
1711 #undef TARGET_OPTION_FUNCTION_VERSIONS
1712 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1713
1714 #undef TARGET_HARD_REGNO_NREGS
1715 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1716 #undef TARGET_HARD_REGNO_MODE_OK
1717 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1718
1719 #undef TARGET_MODES_TIEABLE_P
1720 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1721
1722 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1723 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1724 rs6000_hard_regno_call_part_clobbered
1725
1726 #undef TARGET_SLOW_UNALIGNED_ACCESS
1727 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1728
1729 #undef TARGET_CAN_CHANGE_MODE_CLASS
1730 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1731
1732 #undef TARGET_CONSTANT_ALIGNMENT
1733 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1734
1735 #undef TARGET_STARTING_FRAME_OFFSET
1736 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1737
1738 #if TARGET_ELF && RS6000_WEAK
1739 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
1740 #define TARGET_ASM_GLOBALIZE_DECL_NAME rs6000_globalize_decl_name
1741 #endif
1742
1743 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1744 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1745
1746 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1747 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1748
1749 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
1750 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
1751 rs6000_cannot_substitute_mem_equiv_p
1752
1753 #undef TARGET_INVALID_CONVERSION
1754 #define TARGET_INVALID_CONVERSION rs6000_invalid_conversion
1755 \f
1756
1757 /* Processor table. */
1758 struct rs6000_ptt
1759 {
1760 const char *const name; /* Canonical processor name. */
1761 const enum processor_type processor; /* Processor type enum value. */
1762 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1763 };
1764
1765 static struct rs6000_ptt const processor_target_table[] =
1766 {
1767 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1768 #include "rs6000-cpus.def"
1769 #undef RS6000_CPU
1770 };
1771
1772 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1773 name is invalid. */
1774
1775 static int
1776 rs6000_cpu_name_lookup (const char *name)
1777 {
1778 size_t i;
1779
1780 if (name != NULL)
1781 {
1782 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1783 if (! strcmp (name, processor_target_table[i].name))
1784 return (int)i;
1785 }
1786
1787 return -1;
1788 }
1789
1790 \f
1791 /* Return number of consecutive hard regs needed starting at reg REGNO
1792 to hold something of mode MODE.
1793 This is ordinarily the length in words of a value of mode MODE
1794 but can be less for certain modes in special long registers.
1795
1796 POWER and PowerPC GPRs hold 32 bits worth;
1797 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1798
1799 static int
1800 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1801 {
1802 unsigned HOST_WIDE_INT reg_size;
1803
1804 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1805 128-bit floating point that can go in vector registers, which has VSX
1806 memory addressing. */
1807 if (FP_REGNO_P (regno))
1808 reg_size = (VECTOR_MEM_VSX_P (mode) || VECTOR_ALIGNMENT_P (mode)
1809 ? UNITS_PER_VSX_WORD
1810 : UNITS_PER_FP_WORD);
1811
1812 else if (ALTIVEC_REGNO_P (regno))
1813 reg_size = UNITS_PER_ALTIVEC_WORD;
1814
1815 else
1816 reg_size = UNITS_PER_WORD;
1817
1818 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1819 }
1820
1821 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1822 MODE. */
1823 static int
1824 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
1825 {
1826 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1827
1828 if (COMPLEX_MODE_P (mode))
1829 mode = GET_MODE_INNER (mode);
1830
1831 /* Vector pair modes need even/odd VSX register pairs. Only allow vector
1832 registers. */
1833 if (mode == OOmode)
1834 return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0);
1835
1836 /* MMA accumulator modes need FPR registers divisible by 4. */
1837 if (mode == XOmode)
1838 return (TARGET_MMA && FP_REGNO_P (regno) && (regno & 3) == 0);
1839
1840 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1841 register combinations, and use PTImode where we need to deal with quad
1842 word memory operations. Don't allow quad words in the argument or frame
1843 pointer registers, just registers 0..31. */
1844 if (mode == PTImode)
1845 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1846 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1847 && ((regno & 1) == 0));
1848
1849 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1850 implementations. Don't allow an item to be split between a FP register
1851 and an Altivec register. Allow TImode in all VSX registers if the user
1852 asked for it. */
1853 if (TARGET_VSX && VSX_REGNO_P (regno)
1854 && (VECTOR_MEM_VSX_P (mode)
1855 || VECTOR_ALIGNMENT_P (mode)
1856 || reg_addr[mode].scalar_in_vmx_p
1857 || mode == TImode
1858 || (TARGET_VADDUQM && mode == V1TImode)))
1859 {
1860 if (FP_REGNO_P (regno))
1861 return FP_REGNO_P (last_regno);
1862
1863 if (ALTIVEC_REGNO_P (regno))
1864 {
1865 if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p)
1866 return 0;
1867
1868 return ALTIVEC_REGNO_P (last_regno);
1869 }
1870 }
1871
1872 /* The GPRs can hold any mode, but values bigger than one register
1873 cannot go past R31. */
1874 if (INT_REGNO_P (regno))
1875 return INT_REGNO_P (last_regno);
1876
1877 /* The float registers (except for VSX vector modes) can only hold floating
1878 modes and DImode. */
1879 if (FP_REGNO_P (regno))
1880 {
1881 if (VECTOR_ALIGNMENT_P (mode))
1882 return false;
1883
1884 if (SCALAR_FLOAT_MODE_P (mode)
1885 && (mode != TDmode || (regno % 2) == 0)
1886 && FP_REGNO_P (last_regno))
1887 return 1;
1888
1889 if (GET_MODE_CLASS (mode) == MODE_INT)
1890 {
1891 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1892 return 1;
1893
1894 if (TARGET_P8_VECTOR && (mode == SImode))
1895 return 1;
1896
1897 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
1898 return 1;
1899 }
1900
1901 return 0;
1902 }
1903
1904 /* The CR register can only hold CC modes. */
1905 if (CR_REGNO_P (regno))
1906 return GET_MODE_CLASS (mode) == MODE_CC;
1907
1908 if (CA_REGNO_P (regno))
1909 return mode == Pmode || mode == SImode;
1910
1911 /* AltiVec only in AldyVec registers. */
1912 if (ALTIVEC_REGNO_P (regno))
1913 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1914 || mode == V1TImode);
1915
1916 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1917 and it must be able to fit within the register set. */
1918
1919 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1920 }
1921
1922 /* Implement TARGET_HARD_REGNO_NREGS. */
1923
1924 static unsigned int
1925 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
1926 {
1927 return rs6000_hard_regno_nregs[mode][regno];
1928 }
1929
1930 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1931
1932 static bool
1933 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
1934 {
1935 return rs6000_hard_regno_mode_ok_p[mode][regno];
1936 }
1937
1938 /* Implement TARGET_MODES_TIEABLE_P.
1939
1940 PTImode cannot tie with other modes because PTImode is restricted to even
1941 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1942 57744).
1943
1944 Similarly, don't allow OOmode (vector pair, restricted to even VSX
1945 registers) or XOmode (vector quad, restricted to FPR registers divisible
1946 by 4) to tie with other modes.
1947
1948 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1949 128-bit floating point on VSX systems ties with other vectors. */
1950
1951 static bool
1952 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
1953 {
1954 if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
1955 || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode)
1956 return mode1 == mode2;
1957
1958 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
1959 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
1960 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
1961 return false;
1962
1963 if (SCALAR_FLOAT_MODE_P (mode1))
1964 return SCALAR_FLOAT_MODE_P (mode2);
1965 if (SCALAR_FLOAT_MODE_P (mode2))
1966 return false;
1967
1968 if (GET_MODE_CLASS (mode1) == MODE_CC)
1969 return GET_MODE_CLASS (mode2) == MODE_CC;
1970 if (GET_MODE_CLASS (mode2) == MODE_CC)
1971 return false;
1972
1973 return true;
1974 }
1975
1976 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
1977
1978 static bool
1979 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
1980 machine_mode mode)
1981 {
1982 if (TARGET_32BIT
1983 && TARGET_POWERPC64
1984 && GET_MODE_SIZE (mode) > 4
1985 && INT_REGNO_P (regno))
1986 return true;
1987
1988 if (TARGET_VSX
1989 && FP_REGNO_P (regno)
1990 && GET_MODE_SIZE (mode) > 8
1991 && !FLOAT128_2REG_P (mode))
1992 return true;
1993
1994 return false;
1995 }
1996
1997 /* Print interesting facts about registers. */
1998 static void
1999 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2000 {
2001 int r, m;
2002
2003 for (r = first_regno; r <= last_regno; ++r)
2004 {
2005 const char *comma = "";
2006 int len;
2007
2008 if (first_regno == last_regno)
2009 fprintf (stderr, "%s:\t", reg_name);
2010 else
2011 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2012
2013 len = 8;
2014 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2015 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2016 {
2017 if (len > 70)
2018 {
2019 fprintf (stderr, ",\n\t");
2020 len = 8;
2021 comma = "";
2022 }
2023
2024 if (rs6000_hard_regno_nregs[m][r] > 1)
2025 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2026 rs6000_hard_regno_nregs[m][r]);
2027 else
2028 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2029
2030 comma = ", ";
2031 }
2032
2033 if (call_used_or_fixed_reg_p (r))
2034 {
2035 if (len > 70)
2036 {
2037 fprintf (stderr, ",\n\t");
2038 len = 8;
2039 comma = "";
2040 }
2041
2042 len += fprintf (stderr, "%s%s", comma, "call-used");
2043 comma = ", ";
2044 }
2045
2046 if (fixed_regs[r])
2047 {
2048 if (len > 70)
2049 {
2050 fprintf (stderr, ",\n\t");
2051 len = 8;
2052 comma = "";
2053 }
2054
2055 len += fprintf (stderr, "%s%s", comma, "fixed");
2056 comma = ", ";
2057 }
2058
2059 if (len > 70)
2060 {
2061 fprintf (stderr, ",\n\t");
2062 comma = "";
2063 }
2064
2065 len += fprintf (stderr, "%sreg-class = %s", comma,
2066 reg_class_names[(int)rs6000_regno_regclass[r]]);
2067 comma = ", ";
2068
2069 if (len > 70)
2070 {
2071 fprintf (stderr, ",\n\t");
2072 comma = "";
2073 }
2074
2075 fprintf (stderr, "%sregno = %d\n", comma, r);
2076 }
2077 }
2078
2079 static const char *
2080 rs6000_debug_vector_unit (enum rs6000_vector v)
2081 {
2082 const char *ret;
2083
2084 switch (v)
2085 {
2086 case VECTOR_NONE: ret = "none"; break;
2087 case VECTOR_ALTIVEC: ret = "altivec"; break;
2088 case VECTOR_VSX: ret = "vsx"; break;
2089 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2090 default: ret = "unknown"; break;
2091 }
2092
2093 return ret;
2094 }
2095
2096 /* Inner function printing just the address mask for a particular reload
2097 register class. */
2098 DEBUG_FUNCTION char *
2099 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2100 {
2101 static char ret[8];
2102 char *p = ret;
2103
2104 if ((mask & RELOAD_REG_VALID) != 0)
2105 *p++ = 'v';
2106 else if (keep_spaces)
2107 *p++ = ' ';
2108
2109 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2110 *p++ = 'm';
2111 else if (keep_spaces)
2112 *p++ = ' ';
2113
2114 if ((mask & RELOAD_REG_INDEXED) != 0)
2115 *p++ = 'i';
2116 else if (keep_spaces)
2117 *p++ = ' ';
2118
2119 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2120 *p++ = 'O';
2121 else if ((mask & RELOAD_REG_OFFSET) != 0)
2122 *p++ = 'o';
2123 else if (keep_spaces)
2124 *p++ = ' ';
2125
2126 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2127 *p++ = '+';
2128 else if (keep_spaces)
2129 *p++ = ' ';
2130
2131 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2132 *p++ = '+';
2133 else if (keep_spaces)
2134 *p++ = ' ';
2135
2136 if ((mask & RELOAD_REG_AND_M16) != 0)
2137 *p++ = '&';
2138 else if (keep_spaces)
2139 *p++ = ' ';
2140
2141 *p = '\0';
2142
2143 return ret;
2144 }
2145
2146 /* Print the address masks in a human readble fashion. */
2147 DEBUG_FUNCTION void
2148 rs6000_debug_print_mode (ssize_t m)
2149 {
2150 ssize_t rc;
2151 int spaces = 0;
2152
2153 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2154 for (rc = 0; rc < N_RELOAD_REG; rc++)
2155 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2156 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2157
2158 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2159 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2160 {
2161 fprintf (stderr, "%*s Reload=%c%c", spaces, "",
2162 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2163 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2164 spaces = 0;
2165 }
2166 else
2167 spaces += strlen (" Reload=sl");
2168
2169 if (reg_addr[m].scalar_in_vmx_p)
2170 {
2171 fprintf (stderr, "%*s Upper=y", spaces, "");
2172 spaces = 0;
2173 }
2174 else
2175 spaces += strlen (" Upper=y");
2176
2177 if (rs6000_vector_unit[m] != VECTOR_NONE
2178 || rs6000_vector_mem[m] != VECTOR_NONE)
2179 {
2180 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2181 spaces, "",
2182 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2183 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2184 }
2185
2186 fputs ("\n", stderr);
2187 }
2188
2189 #define DEBUG_FMT_ID "%-32s= "
2190 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2191 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2192 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2193
2194 /* Print various interesting information with -mdebug=reg. */
2195 static void
2196 rs6000_debug_reg_global (void)
2197 {
2198 static const char *const tf[2] = { "false", "true" };
2199 const char *nl = (const char *)0;
2200 int m;
2201 size_t m1, m2, v;
2202 char costly_num[20];
2203 char nop_num[20];
2204 char flags_buffer[40];
2205 const char *costly_str;
2206 const char *nop_str;
2207 const char *trace_str;
2208 const char *abi_str;
2209 const char *cmodel_str;
2210 struct cl_target_option cl_opts;
2211
2212 /* Modes we want tieable information on. */
2213 static const machine_mode print_tieable_modes[] = {
2214 QImode,
2215 HImode,
2216 SImode,
2217 DImode,
2218 TImode,
2219 PTImode,
2220 SFmode,
2221 DFmode,
2222 TFmode,
2223 IFmode,
2224 KFmode,
2225 SDmode,
2226 DDmode,
2227 TDmode,
2228 V2SImode,
2229 V2SFmode,
2230 V16QImode,
2231 V8HImode,
2232 V4SImode,
2233 V2DImode,
2234 V1TImode,
2235 V32QImode,
2236 V16HImode,
2237 V8SImode,
2238 V4DImode,
2239 V2TImode,
2240 V4SFmode,
2241 V2DFmode,
2242 V8SFmode,
2243 V4DFmode,
2244 OOmode,
2245 XOmode,
2246 CCmode,
2247 CCUNSmode,
2248 CCEQmode,
2249 CCFPmode,
2250 };
2251
2252 /* Virtual regs we are interested in. */
2253 const static struct {
2254 int regno; /* register number. */
2255 const char *name; /* register name. */
2256 } virtual_regs[] = {
2257 { STACK_POINTER_REGNUM, "stack pointer:" },
2258 { TOC_REGNUM, "toc: " },
2259 { STATIC_CHAIN_REGNUM, "static chain: " },
2260 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2261 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2262 { ARG_POINTER_REGNUM, "arg pointer: " },
2263 { FRAME_POINTER_REGNUM, "frame pointer:" },
2264 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2265 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2266 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2267 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2268 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2269 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2270 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2271 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2272 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2273 };
2274
2275 fputs ("\nHard register information:\n", stderr);
2276 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2277 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2278 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2279 LAST_ALTIVEC_REGNO,
2280 "vs");
2281 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2282 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2283 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2284 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2285 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2286 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2287
2288 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2289 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2290 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2291
2292 fprintf (stderr,
2293 "\n"
2294 "d reg_class = %s\n"
2295 "f reg_class = %s\n"
2296 "v reg_class = %s\n"
2297 "wa reg_class = %s\n"
2298 "we reg_class = %s\n"
2299 "wr reg_class = %s\n"
2300 "wx reg_class = %s\n"
2301 "wA reg_class = %s\n"
2302 "\n",
2303 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2304 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2305 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2306 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2307 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2308 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2309 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2310 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
2311
2312 nl = "\n";
2313 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2314 rs6000_debug_print_mode (m);
2315
2316 fputs ("\n", stderr);
2317
2318 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2319 {
2320 machine_mode mode1 = print_tieable_modes[m1];
2321 bool first_time = true;
2322
2323 nl = (const char *)0;
2324 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2325 {
2326 machine_mode mode2 = print_tieable_modes[m2];
2327 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2328 {
2329 if (first_time)
2330 {
2331 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2332 nl = "\n";
2333 first_time = false;
2334 }
2335
2336 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2337 }
2338 }
2339
2340 if (!first_time)
2341 fputs ("\n", stderr);
2342 }
2343
2344 if (nl)
2345 fputs (nl, stderr);
2346
2347 if (rs6000_recip_control)
2348 {
2349 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2350
2351 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2352 if (rs6000_recip_bits[m])
2353 {
2354 fprintf (stderr,
2355 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2356 GET_MODE_NAME (m),
2357 (RS6000_RECIP_AUTO_RE_P (m)
2358 ? "auto"
2359 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2360 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2361 ? "auto"
2362 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2363 }
2364
2365 fputs ("\n", stderr);
2366 }
2367
2368 if (rs6000_cpu_index >= 0)
2369 {
2370 const char *name = processor_target_table[rs6000_cpu_index].name;
2371 HOST_WIDE_INT flags
2372 = processor_target_table[rs6000_cpu_index].target_enable;
2373
2374 sprintf (flags_buffer, "-mcpu=%s flags", name);
2375 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2376 }
2377 else
2378 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2379
2380 if (rs6000_tune_index >= 0)
2381 {
2382 const char *name = processor_target_table[rs6000_tune_index].name;
2383 HOST_WIDE_INT flags
2384 = processor_target_table[rs6000_tune_index].target_enable;
2385
2386 sprintf (flags_buffer, "-mtune=%s flags", name);
2387 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2388 }
2389 else
2390 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2391
2392 cl_target_option_save (&cl_opts, &global_options, &global_options_set);
2393 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2394 rs6000_isa_flags);
2395
2396 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2397 rs6000_isa_flags_explicit);
2398
2399 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2400 rs6000_builtin_mask);
2401
2402 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2403
2404 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2405 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2406
2407 switch (rs6000_sched_costly_dep)
2408 {
2409 case max_dep_latency:
2410 costly_str = "max_dep_latency";
2411 break;
2412
2413 case no_dep_costly:
2414 costly_str = "no_dep_costly";
2415 break;
2416
2417 case all_deps_costly:
2418 costly_str = "all_deps_costly";
2419 break;
2420
2421 case true_store_to_load_dep_costly:
2422 costly_str = "true_store_to_load_dep_costly";
2423 break;
2424
2425 case store_to_load_dep_costly:
2426 costly_str = "store_to_load_dep_costly";
2427 break;
2428
2429 default:
2430 costly_str = costly_num;
2431 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2432 break;
2433 }
2434
2435 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2436
2437 switch (rs6000_sched_insert_nops)
2438 {
2439 case sched_finish_regroup_exact:
2440 nop_str = "sched_finish_regroup_exact";
2441 break;
2442
2443 case sched_finish_pad_groups:
2444 nop_str = "sched_finish_pad_groups";
2445 break;
2446
2447 case sched_finish_none:
2448 nop_str = "sched_finish_none";
2449 break;
2450
2451 default:
2452 nop_str = nop_num;
2453 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2454 break;
2455 }
2456
2457 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2458
2459 switch (rs6000_sdata)
2460 {
2461 default:
2462 case SDATA_NONE:
2463 break;
2464
2465 case SDATA_DATA:
2466 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2467 break;
2468
2469 case SDATA_SYSV:
2470 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2471 break;
2472
2473 case SDATA_EABI:
2474 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2475 break;
2476
2477 }
2478
2479 switch (rs6000_traceback)
2480 {
2481 case traceback_default: trace_str = "default"; break;
2482 case traceback_none: trace_str = "none"; break;
2483 case traceback_part: trace_str = "part"; break;
2484 case traceback_full: trace_str = "full"; break;
2485 default: trace_str = "unknown"; break;
2486 }
2487
2488 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2489
2490 switch (rs6000_current_cmodel)
2491 {
2492 case CMODEL_SMALL: cmodel_str = "small"; break;
2493 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2494 case CMODEL_LARGE: cmodel_str = "large"; break;
2495 default: cmodel_str = "unknown"; break;
2496 }
2497
2498 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2499
2500 switch (rs6000_current_abi)
2501 {
2502 case ABI_NONE: abi_str = "none"; break;
2503 case ABI_AIX: abi_str = "aix"; break;
2504 case ABI_ELFv2: abi_str = "ELFv2"; break;
2505 case ABI_V4: abi_str = "V4"; break;
2506 case ABI_DARWIN: abi_str = "darwin"; break;
2507 default: abi_str = "unknown"; break;
2508 }
2509
2510 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2511
2512 if (rs6000_altivec_abi)
2513 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2514
2515 if (rs6000_darwin64_abi)
2516 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2517
2518 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2519 (TARGET_SOFT_FLOAT ? "true" : "false"));
2520
2521 if (TARGET_LINK_STACK)
2522 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2523
2524 if (TARGET_P8_FUSION)
2525 {
2526 char options[80];
2527
2528 strcpy (options, "power8");
2529 if (TARGET_P8_FUSION_SIGN)
2530 strcat (options, ", sign");
2531
2532 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2533 }
2534
2535 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2536 TARGET_SECURE_PLT ? "secure" : "bss");
2537 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2538 aix_struct_return ? "aix" : "sysv");
2539 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2540 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2541 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2542 tf[!!rs6000_align_branch_targets]);
2543 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2544 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2545 rs6000_long_double_type_size);
2546 if (rs6000_long_double_type_size > 64)
2547 {
2548 fprintf (stderr, DEBUG_FMT_S, "long double type",
2549 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2550 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2551 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2552 }
2553 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2554 (int)rs6000_sched_restricted_insns_priority);
2555 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2556 (int)END_BUILTINS);
2557 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2558 (int)RS6000_BUILTIN_COUNT);
2559
2560 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2561 (int)TARGET_FLOAT128_ENABLE_TYPE);
2562
2563 if (TARGET_VSX)
2564 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2565 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2566
2567 if (TARGET_DIRECT_MOVE_128)
2568 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2569 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2570 }
2571
2572 \f
2573 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2574 legitimate address support to figure out the appropriate addressing to
2575 use. */
2576
2577 static void
2578 rs6000_setup_reg_addr_masks (void)
2579 {
2580 ssize_t rc, reg, m, nregs;
2581 addr_mask_type any_addr_mask, addr_mask;
2582
2583 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2584 {
2585 machine_mode m2 = (machine_mode) m;
2586 bool complex_p = false;
2587 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2588 size_t msize;
2589
2590 if (COMPLEX_MODE_P (m2))
2591 {
2592 complex_p = true;
2593 m2 = GET_MODE_INNER (m2);
2594 }
2595
2596 msize = GET_MODE_SIZE (m2);
2597
2598 /* SDmode is special in that we want to access it only via REG+REG
2599 addressing on power7 and above, since we want to use the LFIWZX and
2600 STFIWZX instructions to load it. */
2601 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2602
2603 any_addr_mask = 0;
2604 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2605 {
2606 addr_mask = 0;
2607 reg = reload_reg_map[rc].reg;
2608
2609 /* Can mode values go in the GPR/FPR/Altivec registers? */
2610 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2611 {
2612 bool small_int_vsx_p = (small_int_p
2613 && (rc == RELOAD_REG_FPR
2614 || rc == RELOAD_REG_VMX));
2615
2616 nregs = rs6000_hard_regno_nregs[m][reg];
2617 addr_mask |= RELOAD_REG_VALID;
2618
2619 /* Indicate if the mode takes more than 1 physical register. If
2620 it takes a single register, indicate it can do REG+REG
2621 addressing. Small integers in VSX registers can only do
2622 REG+REG addressing. */
2623 if (small_int_vsx_p)
2624 addr_mask |= RELOAD_REG_INDEXED;
2625 else if (nregs > 1 || m == BLKmode || complex_p)
2626 addr_mask |= RELOAD_REG_MULTIPLE;
2627 else
2628 addr_mask |= RELOAD_REG_INDEXED;
2629
2630 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2631 addressing. If we allow scalars into Altivec registers,
2632 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2633
2634 For VSX systems, we don't allow update addressing for
2635 DFmode/SFmode if those registers can go in both the
2636 traditional floating point registers and Altivec registers.
2637 The load/store instructions for the Altivec registers do not
2638 have update forms. If we allowed update addressing, it seems
2639 to break IV-OPT code using floating point if the index type is
2640 int instead of long (PR target/81550 and target/84042). */
2641
2642 if (TARGET_UPDATE
2643 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2644 && msize <= 8
2645 && !VECTOR_MODE_P (m2)
2646 && !VECTOR_ALIGNMENT_P (m2)
2647 && !complex_p
2648 && (m != E_DFmode || !TARGET_VSX)
2649 && (m != E_SFmode || !TARGET_P8_VECTOR)
2650 && !small_int_vsx_p)
2651 {
2652 addr_mask |= RELOAD_REG_PRE_INCDEC;
2653
2654 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2655 we don't allow PRE_MODIFY for some multi-register
2656 operations. */
2657 switch (m)
2658 {
2659 default:
2660 addr_mask |= RELOAD_REG_PRE_MODIFY;
2661 break;
2662
2663 case E_DImode:
2664 if (TARGET_POWERPC64)
2665 addr_mask |= RELOAD_REG_PRE_MODIFY;
2666 break;
2667
2668 case E_DFmode:
2669 case E_DDmode:
2670 if (TARGET_HARD_FLOAT)
2671 addr_mask |= RELOAD_REG_PRE_MODIFY;
2672 break;
2673 }
2674 }
2675 }
2676
2677 /* GPR and FPR registers can do REG+OFFSET addressing, except
2678 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2679 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2680 if ((addr_mask != 0) && !indexed_only_p
2681 && msize <= 8
2682 && (rc == RELOAD_REG_GPR
2683 || ((msize == 8 || m2 == SFmode)
2684 && (rc == RELOAD_REG_FPR
2685 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2686 addr_mask |= RELOAD_REG_OFFSET;
2687
2688 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2689 instructions are enabled. The offset for 128-bit VSX registers is
2690 only 12-bits. While GPRs can handle the full offset range, VSX
2691 registers can only handle the restricted range. */
2692 else if ((addr_mask != 0) && !indexed_only_p
2693 && msize == 16 && TARGET_P9_VECTOR
2694 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2695 || (m2 == TImode && TARGET_VSX)))
2696 {
2697 addr_mask |= RELOAD_REG_OFFSET;
2698 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2699 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2700 }
2701
2702 /* Vector pairs can do both indexed and offset loads if the
2703 instructions are enabled, otherwise they can only do offset loads
2704 since it will be broken into two vector moves. Vector quads can
2705 only do offset loads. */
2706 else if ((addr_mask != 0) && TARGET_MMA
2707 && (m2 == OOmode || m2 == XOmode))
2708 {
2709 addr_mask |= RELOAD_REG_OFFSET;
2710 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2711 {
2712 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2713 if (m2 == OOmode)
2714 addr_mask |= RELOAD_REG_INDEXED;
2715 }
2716 }
2717
2718 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2719 addressing on 128-bit types. */
2720 if (rc == RELOAD_REG_VMX && msize == 16
2721 && (addr_mask & RELOAD_REG_VALID) != 0)
2722 addr_mask |= RELOAD_REG_AND_M16;
2723
2724 reg_addr[m].addr_mask[rc] = addr_mask;
2725 any_addr_mask |= addr_mask;
2726 }
2727
2728 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2729 }
2730 }
2731
2732 \f
2733 /* Initialize the various global tables that are based on register size. */
2734 static void
2735 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2736 {
2737 ssize_t r, m, c;
2738 int align64;
2739 int align32;
2740
2741 /* Precalculate REGNO_REG_CLASS. */
2742 rs6000_regno_regclass[0] = GENERAL_REGS;
2743 for (r = 1; r < 32; ++r)
2744 rs6000_regno_regclass[r] = BASE_REGS;
2745
2746 for (r = 32; r < 64; ++r)
2747 rs6000_regno_regclass[r] = FLOAT_REGS;
2748
2749 for (r = 64; HARD_REGISTER_NUM_P (r); ++r)
2750 rs6000_regno_regclass[r] = NO_REGS;
2751
2752 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2753 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2754
2755 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2756 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2757 rs6000_regno_regclass[r] = CR_REGS;
2758
2759 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2760 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2761 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2762 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2763 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2764 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2765 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2766
2767 /* Precalculate register class to simpler reload register class. We don't
2768 need all of the register classes that are combinations of different
2769 classes, just the simple ones that have constraint letters. */
2770 for (c = 0; c < N_REG_CLASSES; c++)
2771 reg_class_to_reg_type[c] = NO_REG_TYPE;
2772
2773 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2774 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2775 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2776 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2777 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2778 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2779 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2780 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2781 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2782 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2783
2784 if (TARGET_VSX)
2785 {
2786 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2787 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2788 }
2789 else
2790 {
2791 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2792 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2793 }
2794
2795 /* Precalculate the valid memory formats as well as the vector information,
2796 this must be set up before the rs6000_hard_regno_nregs_internal calls
2797 below. */
2798 gcc_assert ((int)VECTOR_NONE == 0);
2799 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2800 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem));
2801
2802 gcc_assert ((int)CODE_FOR_nothing == 0);
2803 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2804
2805 gcc_assert ((int)NO_REGS == 0);
2806 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2807
2808 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2809 believes it can use native alignment or still uses 128-bit alignment. */
2810 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2811 {
2812 align64 = 64;
2813 align32 = 32;
2814 }
2815 else
2816 {
2817 align64 = 128;
2818 align32 = 128;
2819 }
2820
2821 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2822 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2823 if (TARGET_FLOAT128_TYPE)
2824 {
2825 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2826 rs6000_vector_align[KFmode] = 128;
2827
2828 if (FLOAT128_IEEE_P (TFmode))
2829 {
2830 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2831 rs6000_vector_align[TFmode] = 128;
2832 }
2833 }
2834
2835 /* V2DF mode, VSX only. */
2836 if (TARGET_VSX)
2837 {
2838 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2839 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2840 rs6000_vector_align[V2DFmode] = align64;
2841 }
2842
2843 /* V4SF mode, either VSX or Altivec. */
2844 if (TARGET_VSX)
2845 {
2846 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2847 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2848 rs6000_vector_align[V4SFmode] = align32;
2849 }
2850 else if (TARGET_ALTIVEC)
2851 {
2852 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2853 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2854 rs6000_vector_align[V4SFmode] = align32;
2855 }
2856
2857 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2858 and stores. */
2859 if (TARGET_ALTIVEC)
2860 {
2861 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2862 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2863 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2864 rs6000_vector_align[V4SImode] = align32;
2865 rs6000_vector_align[V8HImode] = align32;
2866 rs6000_vector_align[V16QImode] = align32;
2867
2868 if (TARGET_VSX)
2869 {
2870 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2871 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2872 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2873 }
2874 else
2875 {
2876 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2877 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2878 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2879 }
2880 }
2881
2882 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2883 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2884 if (TARGET_VSX)
2885 {
2886 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2887 rs6000_vector_unit[V2DImode]
2888 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2889 rs6000_vector_align[V2DImode] = align64;
2890
2891 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2892 rs6000_vector_unit[V1TImode]
2893 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2894 rs6000_vector_align[V1TImode] = 128;
2895 }
2896
2897 /* DFmode, see if we want to use the VSX unit. Memory is handled
2898 differently, so don't set rs6000_vector_mem. */
2899 if (TARGET_VSX)
2900 {
2901 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2902 rs6000_vector_align[DFmode] = 64;
2903 }
2904
2905 /* SFmode, see if we want to use the VSX unit. */
2906 if (TARGET_P8_VECTOR)
2907 {
2908 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2909 rs6000_vector_align[SFmode] = 32;
2910 }
2911
2912 /* Allow TImode in VSX register and set the VSX memory macros. */
2913 if (TARGET_VSX)
2914 {
2915 rs6000_vector_mem[TImode] = VECTOR_VSX;
2916 rs6000_vector_align[TImode] = align64;
2917 }
2918
2919 /* Add support for vector pairs and vector quad registers. */
2920 if (TARGET_MMA)
2921 {
2922 rs6000_vector_unit[OOmode] = VECTOR_NONE;
2923 rs6000_vector_mem[OOmode] = VECTOR_VSX;
2924 rs6000_vector_align[OOmode] = 256;
2925
2926 rs6000_vector_unit[XOmode] = VECTOR_NONE;
2927 rs6000_vector_mem[XOmode] = VECTOR_VSX;
2928 rs6000_vector_align[XOmode] = 512;
2929 }
2930
2931 /* Register class constraints for the constraints that depend on compile
2932 switches. When the VSX code was added, different constraints were added
2933 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2934 of the VSX registers are used. The register classes for scalar floating
2935 point types is set, based on whether we allow that type into the upper
2936 (Altivec) registers. GCC has register classes to target the Altivec
2937 registers for load/store operations, to select using a VSX memory
2938 operation instead of the traditional floating point operation. The
2939 constraints are:
2940
2941 d - Register class to use with traditional DFmode instructions.
2942 f - Register class to use with traditional SFmode instructions.
2943 v - Altivec register.
2944 wa - Any VSX register.
2945 wc - Reserved to represent individual CR bits (used in LLVM).
2946 wn - always NO_REGS.
2947 wr - GPR if 64-bit mode is permitted.
2948 wx - Float register if we can do 32-bit int stores. */
2949
2950 if (TARGET_HARD_FLOAT)
2951 {
2952 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
2953 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
2954 }
2955
2956 if (TARGET_VSX)
2957 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2958
2959 /* Add conditional constraints based on various options, to allow us to
2960 collapse multiple insn patterns. */
2961 if (TARGET_ALTIVEC)
2962 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2963
2964 if (TARGET_POWERPC64)
2965 {
2966 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2967 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
2968 }
2969
2970 if (TARGET_STFIWX)
2971 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2972
2973 /* Support for new direct moves (ISA 3.0 + 64bit). */
2974 if (TARGET_DIRECT_MOVE_128)
2975 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
2976
2977 /* Set up the reload helper and direct move functions. */
2978 if (TARGET_VSX || TARGET_ALTIVEC)
2979 {
2980 if (TARGET_64BIT)
2981 {
2982 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2983 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2984 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2985 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
2986 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
2987 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
2988 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
2989 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
2990 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
2991 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
2992 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
2993 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
2994 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
2995 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
2996 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
2997 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
2998 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
2999 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3000 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3001 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3002
3003 if (FLOAT128_VECTOR_P (KFmode))
3004 {
3005 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3006 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3007 }
3008
3009 if (FLOAT128_VECTOR_P (TFmode))
3010 {
3011 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3012 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3013 }
3014
3015 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3016 available. */
3017 if (TARGET_NO_SDMODE_STACK)
3018 {
3019 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3020 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3021 }
3022
3023 if (TARGET_VSX)
3024 {
3025 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3026 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3027 }
3028
3029 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3030 {
3031 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3032 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3033 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3034 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3035 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3036 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3037 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3038 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3039 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3040
3041 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3042 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3043 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3044 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3045 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3046 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3047 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3048 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3049 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3050
3051 if (FLOAT128_VECTOR_P (KFmode))
3052 {
3053 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3054 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3055 }
3056
3057 if (FLOAT128_VECTOR_P (TFmode))
3058 {
3059 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3060 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3061 }
3062
3063 if (TARGET_MMA)
3064 {
3065 reg_addr[OOmode].reload_store = CODE_FOR_reload_oo_di_store;
3066 reg_addr[OOmode].reload_load = CODE_FOR_reload_oo_di_load;
3067 reg_addr[XOmode].reload_store = CODE_FOR_reload_xo_di_store;
3068 reg_addr[XOmode].reload_load = CODE_FOR_reload_xo_di_load;
3069 }
3070 }
3071 }
3072 else
3073 {
3074 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3075 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3076 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3077 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3078 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3079 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3080 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3081 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3082 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3083 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3084 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3085 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3086 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3087 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3088 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3089 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3090 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3091 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3092 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3093 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3094
3095 if (FLOAT128_VECTOR_P (KFmode))
3096 {
3097 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3098 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3099 }
3100
3101 if (FLOAT128_IEEE_P (TFmode))
3102 {
3103 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3104 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3105 }
3106
3107 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3108 available. */
3109 if (TARGET_NO_SDMODE_STACK)
3110 {
3111 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3112 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3113 }
3114
3115 if (TARGET_VSX)
3116 {
3117 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3118 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3119 }
3120
3121 if (TARGET_DIRECT_MOVE)
3122 {
3123 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3124 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3125 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3126 }
3127 }
3128
3129 reg_addr[DFmode].scalar_in_vmx_p = true;
3130 reg_addr[DImode].scalar_in_vmx_p = true;
3131
3132 if (TARGET_P8_VECTOR)
3133 {
3134 reg_addr[SFmode].scalar_in_vmx_p = true;
3135 reg_addr[SImode].scalar_in_vmx_p = true;
3136
3137 if (TARGET_P9_VECTOR)
3138 {
3139 reg_addr[HImode].scalar_in_vmx_p = true;
3140 reg_addr[QImode].scalar_in_vmx_p = true;
3141 }
3142 }
3143 }
3144
3145 /* Precalculate HARD_REGNO_NREGS. */
3146 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3147 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3148 rs6000_hard_regno_nregs[m][r]
3149 = rs6000_hard_regno_nregs_internal (r, (machine_mode) m);
3150
3151 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3152 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3153 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3154 rs6000_hard_regno_mode_ok_p[m][r]
3155 = rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m);
3156
3157 /* Precalculate CLASS_MAX_NREGS sizes. */
3158 for (c = 0; c < LIM_REG_CLASSES; ++c)
3159 {
3160 int reg_size;
3161
3162 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3163 reg_size = UNITS_PER_VSX_WORD;
3164
3165 else if (c == ALTIVEC_REGS)
3166 reg_size = UNITS_PER_ALTIVEC_WORD;
3167
3168 else if (c == FLOAT_REGS)
3169 reg_size = UNITS_PER_FP_WORD;
3170
3171 else
3172 reg_size = UNITS_PER_WORD;
3173
3174 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3175 {
3176 machine_mode m2 = (machine_mode)m;
3177 int reg_size2 = reg_size;
3178
3179 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3180 in VSX. */
3181 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3182 reg_size2 = UNITS_PER_FP_WORD;
3183
3184 rs6000_class_max_nregs[m][c]
3185 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3186 }
3187 }
3188
3189 /* Calculate which modes to automatically generate code to use a the
3190 reciprocal divide and square root instructions. In the future, possibly
3191 automatically generate the instructions even if the user did not specify
3192 -mrecip. The older machines double precision reciprocal sqrt estimate is
3193 not accurate enough. */
3194 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3195 if (TARGET_FRES)
3196 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3197 if (TARGET_FRE)
3198 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3199 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3200 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3201 if (VECTOR_UNIT_VSX_P (V2DFmode))
3202 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3203
3204 if (TARGET_FRSQRTES)
3205 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3206 if (TARGET_FRSQRTE)
3207 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3208 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3209 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3210 if (VECTOR_UNIT_VSX_P (V2DFmode))
3211 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3212
3213 if (rs6000_recip_control)
3214 {
3215 if (!flag_finite_math_only)
3216 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3217 "-ffast-math");
3218 if (flag_trapping_math)
3219 warning (0, "%qs requires %qs or %qs", "-mrecip",
3220 "-fno-trapping-math", "-ffast-math");
3221 if (!flag_reciprocal_math)
3222 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3223 "-ffast-math");
3224 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3225 {
3226 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3227 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3228 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3229
3230 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3231 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3232 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3233
3234 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3235 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3236 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3237
3238 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3239 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3240 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3241
3242 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3243 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3244 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3245
3246 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3247 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3248 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3249
3250 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3251 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3252 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3253
3254 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3255 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3256 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3257 }
3258 }
3259
3260 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3261 legitimate address support to figure out the appropriate addressing to
3262 use. */
3263 rs6000_setup_reg_addr_masks ();
3264
3265 if (global_init_p || TARGET_DEBUG_TARGET)
3266 {
3267 if (TARGET_DEBUG_REG)
3268 rs6000_debug_reg_global ();
3269
3270 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3271 fprintf (stderr,
3272 "SImode variable mult cost = %d\n"
3273 "SImode constant mult cost = %d\n"
3274 "SImode short constant mult cost = %d\n"
3275 "DImode multipliciation cost = %d\n"
3276 "SImode division cost = %d\n"
3277 "DImode division cost = %d\n"
3278 "Simple fp operation cost = %d\n"
3279 "DFmode multiplication cost = %d\n"
3280 "SFmode division cost = %d\n"
3281 "DFmode division cost = %d\n"
3282 "cache line size = %d\n"
3283 "l1 cache size = %d\n"
3284 "l2 cache size = %d\n"
3285 "simultaneous prefetches = %d\n"
3286 "\n",
3287 rs6000_cost->mulsi,
3288 rs6000_cost->mulsi_const,
3289 rs6000_cost->mulsi_const9,
3290 rs6000_cost->muldi,
3291 rs6000_cost->divsi,
3292 rs6000_cost->divdi,
3293 rs6000_cost->fp,
3294 rs6000_cost->dmul,
3295 rs6000_cost->sdiv,
3296 rs6000_cost->ddiv,
3297 rs6000_cost->cache_line_size,
3298 rs6000_cost->l1_cache_size,
3299 rs6000_cost->l2_cache_size,
3300 rs6000_cost->simultaneous_prefetches);
3301 }
3302 }
3303
3304 #if TARGET_MACHO
3305 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3306
3307 static void
3308 darwin_rs6000_override_options (void)
3309 {
3310 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3311 off. */
3312 rs6000_altivec_abi = 1;
3313 TARGET_ALTIVEC_VRSAVE = 1;
3314 rs6000_current_abi = ABI_DARWIN;
3315
3316 if (DEFAULT_ABI == ABI_DARWIN
3317 && TARGET_64BIT)
3318 darwin_one_byte_bool = 1;
3319
3320 if (TARGET_64BIT && ! TARGET_POWERPC64)
3321 {
3322 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3323 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3324 }
3325
3326 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3327 optimisation, and will not work with the most generic case (where the
3328 symbol is undefined external, but there is no symbl stub). */
3329 if (TARGET_64BIT)
3330 rs6000_default_long_calls = 0;
3331
3332 /* ld_classic is (so far) still used for kernel (static) code, and supports
3333 the JBSR longcall / branch islands. */
3334 if (flag_mkernel)
3335 {
3336 rs6000_default_long_calls = 1;
3337
3338 /* Allow a kext author to do -mkernel -mhard-float. */
3339 if (! (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT))
3340 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3341 }
3342
3343 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3344 Altivec. */
3345 if (!flag_mkernel && !flag_apple_kext
3346 && TARGET_64BIT
3347 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3348 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3349
3350 /* Unless the user (not the configurer) has explicitly overridden
3351 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3352 G4 unless targeting the kernel. */
3353 if (!flag_mkernel
3354 && !flag_apple_kext
3355 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3356 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3357 && ! global_options_set.x_rs6000_cpu_index)
3358 {
3359 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3360 }
3361 }
3362 #endif
3363
3364 /* If not otherwise specified by a target, make 'long double' equivalent to
3365 'double'. */
3366
3367 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3368 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3369 #endif
3370
3371 /* Return the builtin mask of the various options used that could affect which
3372 builtins were used. In the past we used target_flags, but we've run out of
3373 bits, and some options are no longer in target_flags. */
3374
3375 HOST_WIDE_INT
3376 rs6000_builtin_mask_calculate (void)
3377 {
3378 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3379 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0)
3380 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3381 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3382 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3383 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3384 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3385 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3386 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3387 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3388 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3389 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3390 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3391 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3392 | ((TARGET_POWERPC64) ? RS6000_BTM_POWERPC64 : 0)
3393 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3394 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3395 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3396 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3397 | ((TARGET_LONG_DOUBLE_128
3398 && TARGET_HARD_FLOAT
3399 && !TARGET_IEEEQUAD) ? RS6000_BTM_LDBL128 : 0)
3400 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)
3401 | ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0)
3402 | ((TARGET_MMA) ? RS6000_BTM_MMA : 0)
3403 | ((TARGET_POWER10) ? RS6000_BTM_P10 : 0));
3404 }
3405
3406 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3407 to clobber the XER[CA] bit because clobbering that bit without telling
3408 the compiler worked just fine with versions of GCC before GCC 5, and
3409 breaking a lot of older code in ways that are hard to track down is
3410 not such a great idea. */
3411
3412 static rtx_insn *
3413 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3414 vec<const char *> &/*constraints*/,
3415 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3416 {
3417 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3418 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3419 return NULL;
3420 }
3421
3422 /* This target function is similar to the hook TARGET_OPTION_OVERRIDE
3423 but is called when the optimize level is changed via an attribute or
3424 pragma or when it is reset at the end of the code affected by the
3425 attribute or pragma. It is not called at the beginning of compilation
3426 when TARGET_OPTION_OVERRIDE is called so if you want to perform these
3427 actions then, you should have TARGET_OPTION_OVERRIDE call
3428 TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE. */
3429
3430 static void
3431 rs6000_override_options_after_change (void)
3432 {
3433 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
3434 turns -frename-registers on. */
3435 if ((global_options_set.x_flag_unroll_loops && flag_unroll_loops)
3436 || (global_options_set.x_flag_unroll_all_loops
3437 && flag_unroll_all_loops))
3438 {
3439 if (!global_options_set.x_unroll_only_small_loops)
3440 unroll_only_small_loops = 0;
3441 if (!global_options_set.x_flag_rename_registers)
3442 flag_rename_registers = 1;
3443 if (!global_options_set.x_flag_cunroll_grow_size)
3444 flag_cunroll_grow_size = 1;
3445 }
3446 else if (!global_options_set.x_flag_cunroll_grow_size)
3447 flag_cunroll_grow_size = flag_peel_loops || optimize >= 3;
3448 }
3449
3450 #ifdef TARGET_USES_LINUX64_OPT
3451 static void
3452 rs6000_linux64_override_options ()
3453 {
3454 if (!global_options_set.x_rs6000_alignment_flags)
3455 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
3456 if (rs6000_isa_flags & OPTION_MASK_64BIT)
3457 {
3458 if (DEFAULT_ABI != ABI_AIX)
3459 {
3460 rs6000_current_abi = ABI_AIX;
3461 error (INVALID_64BIT, "call");
3462 }
3463 dot_symbols = !strcmp (rs6000_abi_name, "aixdesc");
3464 if (ELFv2_ABI_CHECK)
3465 {
3466 rs6000_current_abi = ABI_ELFv2;
3467 if (dot_symbols)
3468 error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>");
3469 }
3470 if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE)
3471 {
3472 rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE;
3473 error (INVALID_64BIT, "relocatable");
3474 }
3475 if (rs6000_isa_flags & OPTION_MASK_EABI)
3476 {
3477 rs6000_isa_flags &= ~OPTION_MASK_EABI;
3478 error (INVALID_64BIT, "eabi");
3479 }
3480 if (TARGET_PROTOTYPE)
3481 {
3482 target_prototype = 0;
3483 error (INVALID_64BIT, "prototype");
3484 }
3485 if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) == 0)
3486 {
3487 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3488 error ("%<-m64%> requires a PowerPC64 cpu");
3489 }
3490 if (!global_options_set.x_rs6000_current_cmodel)
3491 SET_CMODEL (CMODEL_MEDIUM);
3492 if ((rs6000_isa_flags_explicit & OPTION_MASK_MINIMAL_TOC) != 0)
3493 {
3494 if (global_options_set.x_rs6000_current_cmodel
3495 && rs6000_current_cmodel != CMODEL_SMALL)
3496 error ("%<-mcmodel incompatible with other toc options%>");
3497 if (TARGET_MINIMAL_TOC)
3498 SET_CMODEL (CMODEL_SMALL);
3499 else if (TARGET_PCREL
3500 || (PCREL_SUPPORTED_BY_OS
3501 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0))
3502 /* Ignore -mno-minimal-toc. */
3503 ;
3504 else
3505 SET_CMODEL (CMODEL_SMALL);
3506 }
3507 if (rs6000_current_cmodel != CMODEL_SMALL)
3508 {
3509 if (!global_options_set.x_TARGET_NO_FP_IN_TOC)
3510 TARGET_NO_FP_IN_TOC = rs6000_current_cmodel == CMODEL_MEDIUM;
3511 if (!global_options_set.x_TARGET_NO_SUM_IN_TOC)
3512 TARGET_NO_SUM_IN_TOC = 0;
3513 }
3514 if (TARGET_PLTSEQ && DEFAULT_ABI != ABI_ELFv2)
3515 {
3516 if (global_options_set.x_rs6000_pltseq)
3517 warning (0, "%qs unsupported for this ABI",
3518 "-mpltseq");
3519 rs6000_pltseq = false;
3520 }
3521 }
3522 else if (TARGET_64BIT)
3523 error (INVALID_32BIT, "32");
3524 else
3525 {
3526 if (TARGET_PROFILE_KERNEL)
3527 {
3528 profile_kernel = 0;
3529 error (INVALID_32BIT, "profile-kernel");
3530 }
3531 if (global_options_set.x_rs6000_current_cmodel)
3532 {
3533 SET_CMODEL (CMODEL_SMALL);
3534 error (INVALID_32BIT, "cmodel");
3535 }
3536 }
3537 }
3538 #endif
3539
3540 /* Return true if we are using GLIBC, and it supports IEEE 128-bit long double.
3541 This support is only in little endian GLIBC 2.32 or newer. */
3542 static bool
3543 glibc_supports_ieee_128bit (void)
3544 {
3545 #ifdef OPTION_GLIBC
3546 if (OPTION_GLIBC && !BYTES_BIG_ENDIAN
3547 && ((TARGET_GLIBC_MAJOR * 1000) + TARGET_GLIBC_MINOR) >= 2032)
3548 return true;
3549 #endif /* OPTION_GLIBC. */
3550
3551 return false;
3552 }
3553
3554 /* Override command line options.
3555
3556 Combine build-specific configuration information with options
3557 specified on the command line to set various state variables which
3558 influence code generation, optimization, and expansion of built-in
3559 functions. Assure that command-line configuration preferences are
3560 compatible with each other and with the build configuration; issue
3561 warnings while adjusting configuration or error messages while
3562 rejecting configuration.
3563
3564 Upon entry to this function:
3565
3566 This function is called once at the beginning of
3567 compilation, and then again at the start and end of compiling
3568 each section of code that has a different configuration, as
3569 indicated, for example, by adding the
3570
3571 __attribute__((__target__("cpu=power9")))
3572
3573 qualifier to a function definition or, for example, by bracketing
3574 code between
3575
3576 #pragma GCC target("altivec")
3577
3578 and
3579
3580 #pragma GCC reset_options
3581
3582 directives. Parameter global_init_p is true for the initial
3583 invocation, which initializes global variables, and false for all
3584 subsequent invocations.
3585
3586
3587 Various global state information is assumed to be valid. This
3588 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3589 default CPU specified at build configure time, TARGET_DEFAULT,
3590 representing the default set of option flags for the default
3591 target, and global_options_set.x_rs6000_isa_flags, representing
3592 which options were requested on the command line.
3593
3594 Upon return from this function:
3595
3596 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3597 was set by name on the command line. Additionally, if certain
3598 attributes are automatically enabled or disabled by this function
3599 in order to assure compatibility between options and
3600 configuration, the flags associated with those attributes are
3601 also set. By setting these "explicit bits", we avoid the risk
3602 that other code might accidentally overwrite these particular
3603 attributes with "default values".
3604
3605 The various bits of rs6000_isa_flags are set to indicate the
3606 target options that have been selected for the most current
3607 compilation efforts. This has the effect of also turning on the
3608 associated TARGET_XXX values since these are macros which are
3609 generally defined to test the corresponding bit of the
3610 rs6000_isa_flags variable.
3611
3612 The variable rs6000_builtin_mask is set to represent the target
3613 options for the most current compilation efforts, consistent with
3614 the current contents of rs6000_isa_flags. This variable controls
3615 expansion of built-in functions.
3616
3617 Various other global variables and fields of global structures
3618 (over 50 in all) are initialized to reflect the desired options
3619 for the most current compilation efforts. */
3620
3621 static bool
3622 rs6000_option_override_internal (bool global_init_p)
3623 {
3624 bool ret = true;
3625
3626 HOST_WIDE_INT set_masks;
3627 HOST_WIDE_INT ignore_masks;
3628 int cpu_index = -1;
3629 int tune_index;
3630 struct cl_target_option *main_target_opt
3631 = ((global_init_p || target_option_default_node == NULL)
3632 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3633
3634 /* Print defaults. */
3635 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3636 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3637
3638 /* Remember the explicit arguments. */
3639 if (global_init_p)
3640 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3641
3642 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3643 library functions, so warn about it. The flag may be useful for
3644 performance studies from time to time though, so don't disable it
3645 entirely. */
3646 if (global_options_set.x_rs6000_alignment_flags
3647 && rs6000_alignment_flags == MASK_ALIGN_POWER
3648 && DEFAULT_ABI == ABI_DARWIN
3649 && TARGET_64BIT)
3650 warning (0, "%qs is not supported for 64-bit Darwin;"
3651 " it is incompatible with the installed C and C++ libraries",
3652 "-malign-power");
3653
3654 /* Numerous experiment shows that IRA based loop pressure
3655 calculation works better for RTL loop invariant motion on targets
3656 with enough (>= 32) registers. It is an expensive optimization.
3657 So it is on only for peak performance. */
3658 if (optimize >= 3 && global_init_p
3659 && !global_options_set.x_flag_ira_loop_pressure)
3660 flag_ira_loop_pressure = 1;
3661
3662 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3663 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3664 options were already specified. */
3665 if (flag_sanitize & SANITIZE_USER_ADDRESS
3666 && !global_options_set.x_flag_asynchronous_unwind_tables)
3667 flag_asynchronous_unwind_tables = 1;
3668
3669 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3670 loop unroller is active. It is only checked during unrolling, so
3671 we can just set it on by default. */
3672 if (!global_options_set.x_flag_variable_expansion_in_unroller)
3673 flag_variable_expansion_in_unroller = 1;
3674
3675 /* Set the pointer size. */
3676 if (TARGET_64BIT)
3677 {
3678 rs6000_pmode = DImode;
3679 rs6000_pointer_size = 64;
3680 }
3681 else
3682 {
3683 rs6000_pmode = SImode;
3684 rs6000_pointer_size = 32;
3685 }
3686
3687 /* Some OSs don't support saving the high part of 64-bit registers on context
3688 switch. Other OSs don't support saving Altivec registers. On those OSs,
3689 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3690 if the user wants either, the user must explicitly specify them and we
3691 won't interfere with the user's specification. */
3692
3693 set_masks = POWERPC_MASKS;
3694 #ifdef OS_MISSING_POWERPC64
3695 if (OS_MISSING_POWERPC64)
3696 set_masks &= ~OPTION_MASK_POWERPC64;
3697 #endif
3698 #ifdef OS_MISSING_ALTIVEC
3699 if (OS_MISSING_ALTIVEC)
3700 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3701 | OTHER_VSX_VECTOR_MASKS);
3702 #endif
3703
3704 /* Don't override by the processor default if given explicitly. */
3705 set_masks &= ~rs6000_isa_flags_explicit;
3706
3707 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3708 the cpu in a target attribute or pragma, but did not specify a tuning
3709 option, use the cpu for the tuning option rather than the option specified
3710 with -mtune on the command line. Process a '--with-cpu' configuration
3711 request as an implicit --cpu. */
3712 if (rs6000_cpu_index >= 0)
3713 cpu_index = rs6000_cpu_index;
3714 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3715 cpu_index = main_target_opt->x_rs6000_cpu_index;
3716 else if (OPTION_TARGET_CPU_DEFAULT)
3717 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3718
3719 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3720 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3721 with those from the cpu, except for options that were explicitly set. If
3722 we don't have a cpu, do not override the target bits set in
3723 TARGET_DEFAULT. */
3724 if (cpu_index >= 0)
3725 {
3726 rs6000_cpu_index = cpu_index;
3727 rs6000_isa_flags &= ~set_masks;
3728 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3729 & set_masks);
3730 }
3731 else
3732 {
3733 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3734 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3735 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3736 to using rs6000_isa_flags, we need to do the initialization here.
3737
3738 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3739 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3740 HOST_WIDE_INT flags;
3741 if (TARGET_DEFAULT)
3742 flags = TARGET_DEFAULT;
3743 else
3744 {
3745 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3746 const char *default_cpu = (!TARGET_POWERPC64
3747 ? "powerpc"
3748 : (BYTES_BIG_ENDIAN
3749 ? "powerpc64"
3750 : "powerpc64le"));
3751 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3752 flags = processor_target_table[default_cpu_index].target_enable;
3753 }
3754 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3755 }
3756
3757 if (rs6000_tune_index >= 0)
3758 tune_index = rs6000_tune_index;
3759 else if (cpu_index >= 0)
3760 rs6000_tune_index = tune_index = cpu_index;
3761 else
3762 {
3763 size_t i;
3764 enum processor_type tune_proc
3765 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3766
3767 tune_index = -1;
3768 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3769 if (processor_target_table[i].processor == tune_proc)
3770 {
3771 tune_index = i;
3772 break;
3773 }
3774 }
3775
3776 if (cpu_index >= 0)
3777 rs6000_cpu = processor_target_table[cpu_index].processor;
3778 else
3779 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3780
3781 gcc_assert (tune_index >= 0);
3782 rs6000_tune = processor_target_table[tune_index].processor;
3783
3784 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3785 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3786 || rs6000_cpu == PROCESSOR_PPCE5500)
3787 {
3788 if (TARGET_ALTIVEC)
3789 error ("AltiVec not supported in this target");
3790 }
3791
3792 /* If we are optimizing big endian systems for space, use the load/store
3793 multiple instructions. */
3794 if (BYTES_BIG_ENDIAN && optimize_size)
3795 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3796
3797 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3798 because the hardware doesn't support the instructions used in little
3799 endian mode, and causes an alignment trap. The 750 does not cause an
3800 alignment trap (except when the target is unaligned). */
3801
3802 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
3803 {
3804 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3805 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3806 warning (0, "%qs is not supported on little endian systems",
3807 "-mmultiple");
3808 }
3809
3810 /* If little-endian, default to -mstrict-align on older processors.
3811 Testing for htm matches power8 and later. */
3812 if (!BYTES_BIG_ENDIAN
3813 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3814 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3815
3816 if (!rs6000_fold_gimple)
3817 fprintf (stderr,
3818 "gimple folding of rs6000 builtins has been disabled.\n");
3819
3820 /* Add some warnings for VSX. */
3821 if (TARGET_VSX)
3822 {
3823 const char *msg = NULL;
3824 if (!TARGET_HARD_FLOAT)
3825 {
3826 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3827 msg = N_("%<-mvsx%> requires hardware floating point");
3828 else
3829 {
3830 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3831 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3832 }
3833 }
3834 else if (TARGET_AVOID_XFORM > 0)
3835 msg = N_("%<-mvsx%> needs indexed addressing");
3836 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3837 & OPTION_MASK_ALTIVEC))
3838 {
3839 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3840 msg = N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3841 else
3842 msg = N_("%<-mno-altivec%> disables vsx");
3843 }
3844
3845 if (msg)
3846 {
3847 warning (0, msg);
3848 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3849 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3850 }
3851 }
3852
3853 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3854 the -mcpu setting to enable options that conflict. */
3855 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3856 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3857 | OPTION_MASK_ALTIVEC
3858 | OPTION_MASK_VSX)) != 0)
3859 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3860 | OPTION_MASK_DIRECT_MOVE)
3861 & ~rs6000_isa_flags_explicit);
3862
3863 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3864 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3865
3866 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3867 off all of the options that depend on those flags. */
3868 ignore_masks = rs6000_disable_incompatible_switches ();
3869
3870 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3871 unless the user explicitly used the -mno-<option> to disable the code. */
3872 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
3873 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3874 else if (TARGET_P9_MINMAX)
3875 {
3876 if (cpu_index >= 0)
3877 {
3878 if (cpu_index == PROCESSOR_POWER9)
3879 {
3880 /* legacy behavior: allow -mcpu=power9 with certain
3881 capabilities explicitly disabled. */
3882 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3883 }
3884 else
3885 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3886 "for <xxx> less than power9", "-mcpu");
3887 }
3888 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
3889 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
3890 & rs6000_isa_flags_explicit))
3891 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3892 were explicitly cleared. */
3893 error ("%qs incompatible with explicitly disabled options",
3894 "-mpower9-minmax");
3895 else
3896 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
3897 }
3898 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3899 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
3900 else if (TARGET_VSX)
3901 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
3902 else if (TARGET_POPCNTD)
3903 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
3904 else if (TARGET_DFP)
3905 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
3906 else if (TARGET_CMPB)
3907 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
3908 else if (TARGET_FPRND)
3909 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
3910 else if (TARGET_POPCNTB)
3911 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
3912 else if (TARGET_ALTIVEC)
3913 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
3914
3915 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3916 {
3917 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3918 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3919 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3920 }
3921
3922 if (!TARGET_FPRND && TARGET_VSX)
3923 {
3924 if (rs6000_isa_flags_explicit & OPTION_MASK_FPRND)
3925 /* TARGET_VSX = 1 implies Power 7 and newer */
3926 error ("%qs requires %qs", "-mvsx", "-mfprnd");
3927 rs6000_isa_flags &= ~OPTION_MASK_FPRND;
3928 }
3929
3930 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3931 {
3932 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3933 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3934 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3935 }
3936
3937 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3938 {
3939 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3940 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3941 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3942 }
3943
3944 if (TARGET_P8_VECTOR && !TARGET_VSX)
3945 {
3946 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3947 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
3948 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3949 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
3950 {
3951 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3952 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3953 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3954 }
3955 else
3956 {
3957 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
3958 not explicit. */
3959 rs6000_isa_flags |= OPTION_MASK_VSX;
3960 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3961 }
3962 }
3963
3964 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3965 {
3966 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3967 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3968 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3969 }
3970
3971 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3972 silently turn off quad memory mode. */
3973 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3974 {
3975 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3976 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
3977
3978 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3979 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
3980
3981 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3982 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3983 }
3984
3985 /* Non-atomic quad memory load/store are disabled for little endian, since
3986 the words are reversed, but atomic operations can still be done by
3987 swapping the words. */
3988 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3989 {
3990 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3991 warning (0, N_("%<-mquad-memory%> is not available in little endian "
3992 "mode"));
3993
3994 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3995 }
3996
3997 /* Assume if the user asked for normal quad memory instructions, they want
3998 the atomic versions as well, unless they explicity told us not to use quad
3999 word atomic instructions. */
4000 if (TARGET_QUAD_MEMORY
4001 && !TARGET_QUAD_MEMORY_ATOMIC
4002 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4003 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4004
4005 /* If we can shrink-wrap the TOC register save separately, then use
4006 -msave-toc-indirect unless explicitly disabled. */
4007 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
4008 && flag_shrink_wrap_separate
4009 && optimize_function_for_speed_p (cfun))
4010 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
4011
4012 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4013 generating power8 instructions. Power9 does not optimize power8 fusion
4014 cases. */
4015 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4016 {
4017 if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
4018 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4019 else
4020 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4021 }
4022
4023 /* Setting additional fusion flags turns on base fusion. */
4024 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
4025 {
4026 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4027 {
4028 if (TARGET_P8_FUSION_SIGN)
4029 error ("%qs requires %qs", "-mpower8-fusion-sign",
4030 "-mpower8-fusion");
4031
4032 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4033 }
4034 else
4035 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4036 }
4037
4038 /* Power8 does not fuse sign extended loads with the addis. If we are
4039 optimizing at high levels for speed, convert a sign extended load into a
4040 zero extending load, and an explicit sign extension. */
4041 if (TARGET_P8_FUSION
4042 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4043 && optimize_function_for_speed_p (cfun)
4044 && optimize >= 3)
4045 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4046
4047 /* ISA 3.0 vector instructions include ISA 2.07. */
4048 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4049 {
4050 /* We prefer to not mention undocumented options in
4051 error messages. However, if users have managed to select
4052 power9-vector without selecting power8-vector, they
4053 already know about undocumented flags. */
4054 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
4055 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
4056 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
4057 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
4058 {
4059 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4060 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4061 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4062 }
4063 else
4064 {
4065 /* OPTION_MASK_P9_VECTOR is explicit and
4066 OPTION_MASK_P8_VECTOR is not explicit. */
4067 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
4068 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4069 }
4070 }
4071
4072 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4073 support. If we only have ISA 2.06 support, and the user did not specify
4074 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4075 but we don't enable the full vectorization support */
4076 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4077 TARGET_ALLOW_MOVMISALIGN = 1;
4078
4079 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4080 {
4081 if (TARGET_ALLOW_MOVMISALIGN > 0
4082 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
4083 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4084
4085 TARGET_ALLOW_MOVMISALIGN = 0;
4086 }
4087
4088 /* Determine when unaligned vector accesses are permitted, and when
4089 they are preferred over masked Altivec loads. Note that if
4090 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4091 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4092 not true. */
4093 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4094 {
4095 if (!TARGET_VSX)
4096 {
4097 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4098 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4099
4100 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4101 }
4102
4103 else if (!TARGET_ALLOW_MOVMISALIGN)
4104 {
4105 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4106 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4107 "-mallow-movmisalign");
4108
4109 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4110 }
4111 }
4112
4113 if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX))
4114 {
4115 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4116 rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4117 else
4118 rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4119 }
4120
4121 if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_VECTOR_PAIR))
4122 {
4123 if (TARGET_MMA && TARGET_EFFICIENT_UNALIGNED_VSX)
4124 rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
4125 else
4126 rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
4127 }
4128
4129 /* Use long double size to select the appropriate long double. We use
4130 TYPE_PRECISION to differentiate the 3 different long double types. We map
4131 128 into the precision used for TFmode. */
4132 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
4133 ? 64
4134 : FLOAT_PRECISION_TFmode);
4135
4136 /* Set long double size before the IEEE 128-bit tests. */
4137 if (!global_options_set.x_rs6000_long_double_type_size)
4138 {
4139 if (main_target_opt != NULL
4140 && (main_target_opt->x_rs6000_long_double_type_size
4141 != default_long_double_size))
4142 error ("target attribute or pragma changes %<long double%> size");
4143 else
4144 rs6000_long_double_type_size = default_long_double_size;
4145 }
4146 else if (rs6000_long_double_type_size == 128)
4147 rs6000_long_double_type_size = FLOAT_PRECISION_TFmode;
4148 else if (global_options_set.x_rs6000_ieeequad)
4149 {
4150 if (global_options.x_rs6000_ieeequad)
4151 error ("%qs requires %qs", "-mabi=ieeelongdouble", "-mlong-double-128");
4152 else
4153 error ("%qs requires %qs", "-mabi=ibmlongdouble", "-mlong-double-128");
4154 }
4155
4156 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4157 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4158 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4159 those systems will not pick up this default. Warn if the user changes the
4160 default unless -Wno-psabi. */
4161 if (!global_options_set.x_rs6000_ieeequad)
4162 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
4163
4164 else
4165 {
4166 if (global_options.x_rs6000_ieeequad
4167 && (!TARGET_POPCNTD || !TARGET_VSX))
4168 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
4169
4170 if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT && TARGET_LONG_DOUBLE_128)
4171 {
4172 /* Determine if the user can change the default long double type at
4173 compilation time. Only C and C++ support this, and you need GLIBC
4174 2.32 or newer. Only issue one warning. */
4175 static bool warned_change_long_double;
4176
4177 if (!warned_change_long_double
4178 && (!glibc_supports_ieee_128bit ()
4179 || (!lang_GNU_C () && !lang_GNU_CXX ())))
4180 {
4181 warned_change_long_double = true;
4182 if (TARGET_IEEEQUAD)
4183 warning (OPT_Wpsabi, "Using IEEE extended precision "
4184 "%<long double%>");
4185 else
4186 warning (OPT_Wpsabi, "Using IBM extended precision "
4187 "%<long double%>");
4188 }
4189 }
4190 }
4191
4192 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4193 sytems. In GCC 7, we would enable the IEEE 128-bit floating point
4194 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4195 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4196 the keyword as well as the type. */
4197 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
4198
4199 /* IEEE 128-bit floating point requires VSX support. */
4200 if (TARGET_FLOAT128_KEYWORD)
4201 {
4202 if (!TARGET_VSX)
4203 {
4204 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4205 error ("%qs requires VSX support", "-mfloat128");
4206
4207 TARGET_FLOAT128_TYPE = 0;
4208 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
4209 | OPTION_MASK_FLOAT128_HW);
4210 }
4211 else if (!TARGET_FLOAT128_TYPE)
4212 {
4213 TARGET_FLOAT128_TYPE = 1;
4214 warning (0, "The %<-mfloat128%> option may not be fully supported");
4215 }
4216 }
4217
4218 /* Enable the __float128 keyword under Linux by default. */
4219 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
4220 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4221 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4222
4223 /* If we have are supporting the float128 type and full ISA 3.0 support,
4224 enable -mfloat128-hardware by default. However, don't enable the
4225 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4226 because sometimes the compiler wants to put things in an integer
4227 container, and if we don't have __int128 support, it is impossible. */
4228 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
4229 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4230 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4231 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4232
4233 if (TARGET_FLOAT128_HW
4234 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4235 {
4236 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4237 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4238
4239 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4240 }
4241
4242 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4243 {
4244 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4245 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4246
4247 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4248 }
4249
4250 /* Enable -mprefixed by default on power10 systems. */
4251 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) == 0)
4252 rs6000_isa_flags |= OPTION_MASK_PREFIXED;
4253
4254 /* -mprefixed requires -mcpu=power10 (or later). */
4255 else if (TARGET_PREFIXED && !TARGET_POWER10)
4256 {
4257 if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) != 0)
4258 error ("%qs requires %qs", "-mprefixed", "-mcpu=power10");
4259
4260 rs6000_isa_flags &= ~OPTION_MASK_PREFIXED;
4261 }
4262
4263 /* -mpcrel requires prefixed load/store addressing. */
4264 if (TARGET_PCREL && !TARGET_PREFIXED)
4265 {
4266 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4267 error ("%qs requires %qs", "-mpcrel", "-mprefixed");
4268
4269 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4270 }
4271
4272 /* Print the options after updating the defaults. */
4273 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4274 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4275
4276 /* E500mc does "better" if we inline more aggressively. Respect the
4277 user's opinion, though. */
4278 if (rs6000_block_move_inline_limit == 0
4279 && (rs6000_tune == PROCESSOR_PPCE500MC
4280 || rs6000_tune == PROCESSOR_PPCE500MC64
4281 || rs6000_tune == PROCESSOR_PPCE5500
4282 || rs6000_tune == PROCESSOR_PPCE6500))
4283 rs6000_block_move_inline_limit = 128;
4284
4285 /* store_one_arg depends on expand_block_move to handle at least the
4286 size of reg_parm_stack_space. */
4287 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4288 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4289
4290 if (global_init_p)
4291 {
4292 /* If the appropriate debug option is enabled, replace the target hooks
4293 with debug versions that call the real version and then prints
4294 debugging information. */
4295 if (TARGET_DEBUG_COST)
4296 {
4297 targetm.rtx_costs = rs6000_debug_rtx_costs;
4298 targetm.address_cost = rs6000_debug_address_cost;
4299 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4300 }
4301
4302 if (TARGET_DEBUG_ADDR)
4303 {
4304 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4305 targetm.legitimize_address = rs6000_debug_legitimize_address;
4306 rs6000_secondary_reload_class_ptr
4307 = rs6000_debug_secondary_reload_class;
4308 targetm.secondary_memory_needed
4309 = rs6000_debug_secondary_memory_needed;
4310 targetm.can_change_mode_class
4311 = rs6000_debug_can_change_mode_class;
4312 rs6000_preferred_reload_class_ptr
4313 = rs6000_debug_preferred_reload_class;
4314 rs6000_mode_dependent_address_ptr
4315 = rs6000_debug_mode_dependent_address;
4316 }
4317
4318 if (rs6000_veclibabi_name)
4319 {
4320 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4321 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4322 else
4323 {
4324 error ("unknown vectorization library ABI type (%qs) for "
4325 "%qs switch", rs6000_veclibabi_name, "-mveclibabi=");
4326 ret = false;
4327 }
4328 }
4329 }
4330
4331 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4332 target attribute or pragma which automatically enables both options,
4333 unless the altivec ABI was set. This is set by default for 64-bit, but
4334 not for 32-bit. */
4335 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4336 {
4337 TARGET_FLOAT128_TYPE = 0;
4338 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4339 | OPTION_MASK_FLOAT128_KEYWORD)
4340 & ~rs6000_isa_flags_explicit);
4341 }
4342
4343 /* Enable Altivec ABI for AIX -maltivec. */
4344 if (TARGET_XCOFF
4345 && (TARGET_ALTIVEC || TARGET_VSX)
4346 && !global_options_set.x_rs6000_altivec_abi)
4347 {
4348 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4349 error ("target attribute or pragma changes AltiVec ABI");
4350 else
4351 rs6000_altivec_abi = 1;
4352 }
4353
4354 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4355 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4356 be explicitly overridden in either case. */
4357 if (TARGET_ELF)
4358 {
4359 if (!global_options_set.x_rs6000_altivec_abi
4360 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4361 {
4362 if (main_target_opt != NULL &&
4363 !main_target_opt->x_rs6000_altivec_abi)
4364 error ("target attribute or pragma changes AltiVec ABI");
4365 else
4366 rs6000_altivec_abi = 1;
4367 }
4368 }
4369
4370 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4371 So far, the only darwin64 targets are also MACH-O. */
4372 if (TARGET_MACHO
4373 && DEFAULT_ABI == ABI_DARWIN
4374 && TARGET_64BIT)
4375 {
4376 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4377 error ("target attribute or pragma changes darwin64 ABI");
4378 else
4379 {
4380 rs6000_darwin64_abi = 1;
4381 /* Default to natural alignment, for better performance. */
4382 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4383 }
4384 }
4385
4386 /* Place FP constants in the constant pool instead of TOC
4387 if section anchors enabled. */
4388 if (flag_section_anchors
4389 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4390 TARGET_NO_FP_IN_TOC = 1;
4391
4392 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4393 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4394
4395 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4396 SUBTARGET_OVERRIDE_OPTIONS;
4397 #endif
4398 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4399 SUBSUBTARGET_OVERRIDE_OPTIONS;
4400 #endif
4401 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4402 SUB3TARGET_OVERRIDE_OPTIONS;
4403 #endif
4404
4405 /* If the ABI has support for PC-relative relocations, enable it by default.
4406 This test depends on the sub-target tests above setting the code model to
4407 medium for ELF v2 systems. */
4408 if (PCREL_SUPPORTED_BY_OS
4409 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0)
4410 rs6000_isa_flags |= OPTION_MASK_PCREL;
4411
4412 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4413 after the subtarget override options are done. */
4414 else if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM)
4415 {
4416 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4417 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4418
4419 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4420 }
4421
4422 /* Enable -mmma by default on power10 systems. */
4423 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0)
4424 rs6000_isa_flags |= OPTION_MASK_MMA;
4425
4426 /* Turn off vector pair/mma options on non-power10 systems. */
4427 else if (!TARGET_POWER10 && TARGET_MMA)
4428 {
4429 if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0)
4430 error ("%qs requires %qs", "-mmma", "-mcpu=power10");
4431
4432 rs6000_isa_flags &= ~OPTION_MASK_MMA;
4433 }
4434
4435 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4436 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4437
4438 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4439 && rs6000_tune != PROCESSOR_POWER5
4440 && rs6000_tune != PROCESSOR_POWER6
4441 && rs6000_tune != PROCESSOR_POWER7
4442 && rs6000_tune != PROCESSOR_POWER8
4443 && rs6000_tune != PROCESSOR_POWER9
4444 && rs6000_tune != PROCESSOR_POWER10
4445 && rs6000_tune != PROCESSOR_PPCA2
4446 && rs6000_tune != PROCESSOR_CELL
4447 && rs6000_tune != PROCESSOR_PPC476);
4448 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4449 || rs6000_tune == PROCESSOR_POWER5
4450 || rs6000_tune == PROCESSOR_POWER7
4451 || rs6000_tune == PROCESSOR_POWER8);
4452 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4453 || rs6000_tune == PROCESSOR_POWER5
4454 || rs6000_tune == PROCESSOR_POWER6
4455 || rs6000_tune == PROCESSOR_POWER7
4456 || rs6000_tune == PROCESSOR_POWER8
4457 || rs6000_tune == PROCESSOR_POWER9
4458 || rs6000_tune == PROCESSOR_POWER10
4459 || rs6000_tune == PROCESSOR_PPCE500MC
4460 || rs6000_tune == PROCESSOR_PPCE500MC64
4461 || rs6000_tune == PROCESSOR_PPCE5500
4462 || rs6000_tune == PROCESSOR_PPCE6500);
4463
4464 /* Allow debug switches to override the above settings. These are set to -1
4465 in rs6000.opt to indicate the user hasn't directly set the switch. */
4466 if (TARGET_ALWAYS_HINT >= 0)
4467 rs6000_always_hint = TARGET_ALWAYS_HINT;
4468
4469 if (TARGET_SCHED_GROUPS >= 0)
4470 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4471
4472 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4473 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4474
4475 rs6000_sched_restricted_insns_priority
4476 = (rs6000_sched_groups ? 1 : 0);
4477
4478 /* Handle -msched-costly-dep option. */
4479 rs6000_sched_costly_dep
4480 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4481
4482 if (rs6000_sched_costly_dep_str)
4483 {
4484 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4485 rs6000_sched_costly_dep = no_dep_costly;
4486 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4487 rs6000_sched_costly_dep = all_deps_costly;
4488 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4489 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4490 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4491 rs6000_sched_costly_dep = store_to_load_dep_costly;
4492 else
4493 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4494 atoi (rs6000_sched_costly_dep_str));
4495 }
4496
4497 /* Handle -minsert-sched-nops option. */
4498 rs6000_sched_insert_nops
4499 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4500
4501 if (rs6000_sched_insert_nops_str)
4502 {
4503 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4504 rs6000_sched_insert_nops = sched_finish_none;
4505 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4506 rs6000_sched_insert_nops = sched_finish_pad_groups;
4507 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4508 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4509 else
4510 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4511 atoi (rs6000_sched_insert_nops_str));
4512 }
4513
4514 /* Handle stack protector */
4515 if (!global_options_set.x_rs6000_stack_protector_guard)
4516 #ifdef TARGET_THREAD_SSP_OFFSET
4517 rs6000_stack_protector_guard = SSP_TLS;
4518 #else
4519 rs6000_stack_protector_guard = SSP_GLOBAL;
4520 #endif
4521
4522 #ifdef TARGET_THREAD_SSP_OFFSET
4523 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4524 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4525 #endif
4526
4527 if (global_options_set.x_rs6000_stack_protector_guard_offset_str)
4528 {
4529 char *endp;
4530 const char *str = rs6000_stack_protector_guard_offset_str;
4531
4532 errno = 0;
4533 long offset = strtol (str, &endp, 0);
4534 if (!*str || *endp || errno)
4535 error ("%qs is not a valid number in %qs", str,
4536 "-mstack-protector-guard-offset=");
4537
4538 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4539 || (TARGET_64BIT && (offset & 3)))
4540 error ("%qs is not a valid offset in %qs", str,
4541 "-mstack-protector-guard-offset=");
4542
4543 rs6000_stack_protector_guard_offset = offset;
4544 }
4545
4546 if (global_options_set.x_rs6000_stack_protector_guard_reg_str)
4547 {
4548 const char *str = rs6000_stack_protector_guard_reg_str;
4549 int reg = decode_reg_name (str);
4550
4551 if (!IN_RANGE (reg, 1, 31))
4552 error ("%qs is not a valid base register in %qs", str,
4553 "-mstack-protector-guard-reg=");
4554
4555 rs6000_stack_protector_guard_reg = reg;
4556 }
4557
4558 if (rs6000_stack_protector_guard == SSP_TLS
4559 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4560 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4561
4562 if (global_init_p)
4563 {
4564 #ifdef TARGET_REGNAMES
4565 /* If the user desires alternate register names, copy in the
4566 alternate names now. */
4567 if (TARGET_REGNAMES)
4568 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4569 #endif
4570
4571 /* Set aix_struct_return last, after the ABI is determined.
4572 If -maix-struct-return or -msvr4-struct-return was explicitly
4573 used, don't override with the ABI default. */
4574 if (!global_options_set.x_aix_struct_return)
4575 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4576
4577 #if 0
4578 /* IBM XL compiler defaults to unsigned bitfields. */
4579 if (TARGET_XL_COMPAT)
4580 flag_signed_bitfields = 0;
4581 #endif
4582
4583 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4584 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4585
4586 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4587
4588 /* We can only guarantee the availability of DI pseudo-ops when
4589 assembling for 64-bit targets. */
4590 if (!TARGET_64BIT)
4591 {
4592 targetm.asm_out.aligned_op.di = NULL;
4593 targetm.asm_out.unaligned_op.di = NULL;
4594 }
4595
4596
4597 /* Set branch target alignment, if not optimizing for size. */
4598 if (!optimize_size)
4599 {
4600 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4601 aligned 8byte to avoid misprediction by the branch predictor. */
4602 if (rs6000_tune == PROCESSOR_TITAN
4603 || rs6000_tune == PROCESSOR_CELL)
4604 {
4605 if (flag_align_functions && !str_align_functions)
4606 str_align_functions = "8";
4607 if (flag_align_jumps && !str_align_jumps)
4608 str_align_jumps = "8";
4609 if (flag_align_loops && !str_align_loops)
4610 str_align_loops = "8";
4611 }
4612 if (rs6000_align_branch_targets)
4613 {
4614 if (flag_align_functions && !str_align_functions)
4615 str_align_functions = "16";
4616 if (flag_align_jumps && !str_align_jumps)
4617 str_align_jumps = "16";
4618 if (flag_align_loops && !str_align_loops)
4619 {
4620 can_override_loop_align = 1;
4621 str_align_loops = "16";
4622 }
4623 }
4624 }
4625
4626 /* Arrange to save and restore machine status around nested functions. */
4627 init_machine_status = rs6000_init_machine_status;
4628
4629 /* We should always be splitting complex arguments, but we can't break
4630 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4631 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4632 targetm.calls.split_complex_arg = NULL;
4633
4634 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4635 if (DEFAULT_ABI == ABI_AIX)
4636 targetm.calls.custom_function_descriptors = 0;
4637 }
4638
4639 /* Initialize rs6000_cost with the appropriate target costs. */
4640 if (optimize_size)
4641 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4642 else
4643 switch (rs6000_tune)
4644 {
4645 case PROCESSOR_RS64A:
4646 rs6000_cost = &rs64a_cost;
4647 break;
4648
4649 case PROCESSOR_MPCCORE:
4650 rs6000_cost = &mpccore_cost;
4651 break;
4652
4653 case PROCESSOR_PPC403:
4654 rs6000_cost = &ppc403_cost;
4655 break;
4656
4657 case PROCESSOR_PPC405:
4658 rs6000_cost = &ppc405_cost;
4659 break;
4660
4661 case PROCESSOR_PPC440:
4662 rs6000_cost = &ppc440_cost;
4663 break;
4664
4665 case PROCESSOR_PPC476:
4666 rs6000_cost = &ppc476_cost;
4667 break;
4668
4669 case PROCESSOR_PPC601:
4670 rs6000_cost = &ppc601_cost;
4671 break;
4672
4673 case PROCESSOR_PPC603:
4674 rs6000_cost = &ppc603_cost;
4675 break;
4676
4677 case PROCESSOR_PPC604:
4678 rs6000_cost = &ppc604_cost;
4679 break;
4680
4681 case PROCESSOR_PPC604e:
4682 rs6000_cost = &ppc604e_cost;
4683 break;
4684
4685 case PROCESSOR_PPC620:
4686 rs6000_cost = &ppc620_cost;
4687 break;
4688
4689 case PROCESSOR_PPC630:
4690 rs6000_cost = &ppc630_cost;
4691 break;
4692
4693 case PROCESSOR_CELL:
4694 rs6000_cost = &ppccell_cost;
4695 break;
4696
4697 case PROCESSOR_PPC750:
4698 case PROCESSOR_PPC7400:
4699 rs6000_cost = &ppc750_cost;
4700 break;
4701
4702 case PROCESSOR_PPC7450:
4703 rs6000_cost = &ppc7450_cost;
4704 break;
4705
4706 case PROCESSOR_PPC8540:
4707 case PROCESSOR_PPC8548:
4708 rs6000_cost = &ppc8540_cost;
4709 break;
4710
4711 case PROCESSOR_PPCE300C2:
4712 case PROCESSOR_PPCE300C3:
4713 rs6000_cost = &ppce300c2c3_cost;
4714 break;
4715
4716 case PROCESSOR_PPCE500MC:
4717 rs6000_cost = &ppce500mc_cost;
4718 break;
4719
4720 case PROCESSOR_PPCE500MC64:
4721 rs6000_cost = &ppce500mc64_cost;
4722 break;
4723
4724 case PROCESSOR_PPCE5500:
4725 rs6000_cost = &ppce5500_cost;
4726 break;
4727
4728 case PROCESSOR_PPCE6500:
4729 rs6000_cost = &ppce6500_cost;
4730 break;
4731
4732 case PROCESSOR_TITAN:
4733 rs6000_cost = &titan_cost;
4734 break;
4735
4736 case PROCESSOR_POWER4:
4737 case PROCESSOR_POWER5:
4738 rs6000_cost = &power4_cost;
4739 break;
4740
4741 case PROCESSOR_POWER6:
4742 rs6000_cost = &power6_cost;
4743 break;
4744
4745 case PROCESSOR_POWER7:
4746 rs6000_cost = &power7_cost;
4747 break;
4748
4749 case PROCESSOR_POWER8:
4750 rs6000_cost = &power8_cost;
4751 break;
4752
4753 case PROCESSOR_POWER9:
4754 case PROCESSOR_POWER10:
4755 rs6000_cost = &power9_cost;
4756 break;
4757
4758 case PROCESSOR_PPCA2:
4759 rs6000_cost = &ppca2_cost;
4760 break;
4761
4762 default:
4763 gcc_unreachable ();
4764 }
4765
4766 if (global_init_p)
4767 {
4768 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4769 param_simultaneous_prefetches,
4770 rs6000_cost->simultaneous_prefetches);
4771 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4772 param_l1_cache_size,
4773 rs6000_cost->l1_cache_size);
4774 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4775 param_l1_cache_line_size,
4776 rs6000_cost->cache_line_size);
4777 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4778 param_l2_cache_size,
4779 rs6000_cost->l2_cache_size);
4780
4781 /* Increase loop peeling limits based on performance analysis. */
4782 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4783 param_max_peeled_insns, 400);
4784 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4785 param_max_completely_peeled_insns, 400);
4786
4787 /* The lxvl/stxvl instructions don't perform well before Power10. */
4788 if (TARGET_POWER10)
4789 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4790 param_vect_partial_vector_usage, 1);
4791 else
4792 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4793 param_vect_partial_vector_usage, 0);
4794
4795 /* Use the 'model' -fsched-pressure algorithm by default. */
4796 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4797 param_sched_pressure_algorithm,
4798 SCHED_PRESSURE_MODEL);
4799
4800 /* If using typedef char *va_list, signal that
4801 __builtin_va_start (&ap, 0) can be optimized to
4802 ap = __builtin_next_arg (0). */
4803 if (DEFAULT_ABI != ABI_V4)
4804 targetm.expand_builtin_va_start = NULL;
4805 }
4806
4807 rs6000_override_options_after_change ();
4808
4809 /* If not explicitly specified via option, decide whether to generate indexed
4810 load/store instructions. A value of -1 indicates that the
4811 initial value of this variable has not been overwritten. During
4812 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4813 if (TARGET_AVOID_XFORM == -1)
4814 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4815 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4816 need indexed accesses and the type used is the scalar type of the element
4817 being loaded or stored. */
4818 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4819 && !TARGET_ALTIVEC);
4820
4821 /* Set the -mrecip options. */
4822 if (rs6000_recip_name)
4823 {
4824 char *p = ASTRDUP (rs6000_recip_name);
4825 char *q;
4826 unsigned int mask, i;
4827 bool invert;
4828
4829 while ((q = strtok (p, ",")) != NULL)
4830 {
4831 p = NULL;
4832 if (*q == '!')
4833 {
4834 invert = true;
4835 q++;
4836 }
4837 else
4838 invert = false;
4839
4840 if (!strcmp (q, "default"))
4841 mask = ((TARGET_RECIP_PRECISION)
4842 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4843 else
4844 {
4845 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4846 if (!strcmp (q, recip_options[i].string))
4847 {
4848 mask = recip_options[i].mask;
4849 break;
4850 }
4851
4852 if (i == ARRAY_SIZE (recip_options))
4853 {
4854 error ("unknown option for %<%s=%s%>", "-mrecip", q);
4855 invert = false;
4856 mask = 0;
4857 ret = false;
4858 }
4859 }
4860
4861 if (invert)
4862 rs6000_recip_control &= ~mask;
4863 else
4864 rs6000_recip_control |= mask;
4865 }
4866 }
4867
4868 /* Set the builtin mask of the various options used that could affect which
4869 builtins were used. In the past we used target_flags, but we've run out
4870 of bits, and some options are no longer in target_flags. */
4871 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4872 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4873 rs6000_print_builtin_options (stderr, 0, "builtin mask",
4874 rs6000_builtin_mask);
4875
4876 /* Initialize all of the registers. */
4877 rs6000_init_hard_regno_mode_ok (global_init_p);
4878
4879 /* Save the initial options in case the user does function specific options */
4880 if (global_init_p)
4881 target_option_default_node = target_option_current_node
4882 = build_target_option_node (&global_options, &global_options_set);
4883
4884 /* If not explicitly specified via option, decide whether to generate the
4885 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4886 if (TARGET_LINK_STACK == -1)
4887 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
4888
4889 /* Deprecate use of -mno-speculate-indirect-jumps. */
4890 if (!rs6000_speculate_indirect_jumps)
4891 warning (0, "%qs is deprecated and not recommended in any circumstances",
4892 "-mno-speculate-indirect-jumps");
4893
4894 return ret;
4895 }
4896
4897 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4898 define the target cpu type. */
4899
4900 static void
4901 rs6000_option_override (void)
4902 {
4903 (void) rs6000_option_override_internal (true);
4904 }
4905
4906 \f
4907 /* Implement targetm.vectorize.builtin_mask_for_load. */
4908 static tree
4909 rs6000_builtin_mask_for_load (void)
4910 {
4911 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
4912 if ((TARGET_ALTIVEC && !TARGET_VSX)
4913 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
4914 return altivec_builtin_mask_for_load;
4915 else
4916 return 0;
4917 }
4918
4919 /* Implement LOOP_ALIGN. */
4920 align_flags
4921 rs6000_loop_align (rtx label)
4922 {
4923 basic_block bb;
4924 int ninsns;
4925
4926 /* Don't override loop alignment if -falign-loops was specified. */
4927 if (!can_override_loop_align)
4928 return align_loops;
4929
4930 bb = BLOCK_FOR_INSN (label);
4931 ninsns = num_loop_insns(bb->loop_father);
4932
4933 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4934 if (ninsns > 4 && ninsns <= 8
4935 && (rs6000_tune == PROCESSOR_POWER4
4936 || rs6000_tune == PROCESSOR_POWER5
4937 || rs6000_tune == PROCESSOR_POWER6
4938 || rs6000_tune == PROCESSOR_POWER7
4939 || rs6000_tune == PROCESSOR_POWER8))
4940 return align_flags (5);
4941 else
4942 return align_loops;
4943 }
4944
4945 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4946 after applying N number of iterations. This routine does not determine
4947 how may iterations are required to reach desired alignment. */
4948
4949 static bool
4950 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4951 {
4952 if (is_packed)
4953 return false;
4954
4955 if (TARGET_32BIT)
4956 {
4957 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4958 return true;
4959
4960 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4961 return true;
4962
4963 return false;
4964 }
4965 else
4966 {
4967 if (TARGET_MACHO)
4968 return false;
4969
4970 /* Assuming that all other types are naturally aligned. CHECKME! */
4971 return true;
4972 }
4973 }
4974
4975 /* Return true if the vector misalignment factor is supported by the
4976 target. */
4977 static bool
4978 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4979 const_tree type,
4980 int misalignment,
4981 bool is_packed)
4982 {
4983 if (TARGET_VSX)
4984 {
4985 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4986 return true;
4987
4988 /* Return if movmisalign pattern is not supported for this mode. */
4989 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4990 return false;
4991
4992 if (misalignment == -1)
4993 {
4994 /* Misalignment factor is unknown at compile time but we know
4995 it's word aligned. */
4996 if (rs6000_vector_alignment_reachable (type, is_packed))
4997 {
4998 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4999
5000 if (element_size == 64 || element_size == 32)
5001 return true;
5002 }
5003
5004 return false;
5005 }
5006
5007 /* VSX supports word-aligned vector. */
5008 if (misalignment % 4 == 0)
5009 return true;
5010 }
5011 return false;
5012 }
5013
5014 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5015 static int
5016 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5017 tree vectype, int misalign)
5018 {
5019 unsigned elements;
5020 tree elem_type;
5021
5022 switch (type_of_cost)
5023 {
5024 case scalar_stmt:
5025 case scalar_store:
5026 case vector_stmt:
5027 case vector_store:
5028 case vec_to_scalar:
5029 case scalar_to_vec:
5030 case cond_branch_not_taken:
5031 return 1;
5032 case scalar_load:
5033 case vector_load:
5034 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5035 return 2;
5036
5037 case vec_perm:
5038 /* Power7 has only one permute unit, make it a bit expensive. */
5039 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
5040 return 3;
5041 else
5042 return 1;
5043
5044 case vec_promote_demote:
5045 /* Power7 has only one permute/pack unit, make it a bit expensive. */
5046 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
5047 return 4;
5048 else
5049 return 1;
5050
5051 case cond_branch_taken:
5052 return 3;
5053
5054 case unaligned_load:
5055 case vector_gather_load:
5056 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5057 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5058 return 2;
5059
5060 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5061 {
5062 elements = TYPE_VECTOR_SUBPARTS (vectype);
5063 if (elements == 2)
5064 /* Double word aligned. */
5065 return 4;
5066
5067 if (elements == 4)
5068 {
5069 switch (misalign)
5070 {
5071 case 8:
5072 /* Double word aligned. */
5073 return 4;
5074
5075 case -1:
5076 /* Unknown misalignment. */
5077 case 4:
5078 case 12:
5079 /* Word aligned. */
5080 return 33;
5081
5082 default:
5083 gcc_unreachable ();
5084 }
5085 }
5086 }
5087
5088 if (TARGET_ALTIVEC)
5089 /* Misaligned loads are not supported. */
5090 gcc_unreachable ();
5091
5092 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5093 return 4;
5094
5095 case unaligned_store:
5096 case vector_scatter_store:
5097 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5098 return 1;
5099
5100 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5101 {
5102 elements = TYPE_VECTOR_SUBPARTS (vectype);
5103 if (elements == 2)
5104 /* Double word aligned. */
5105 return 2;
5106
5107 if (elements == 4)
5108 {
5109 switch (misalign)
5110 {
5111 case 8:
5112 /* Double word aligned. */
5113 return 2;
5114
5115 case -1:
5116 /* Unknown misalignment. */
5117 case 4:
5118 case 12:
5119 /* Word aligned. */
5120 return 23;
5121
5122 default:
5123 gcc_unreachable ();
5124 }
5125 }
5126 }
5127
5128 if (TARGET_ALTIVEC)
5129 /* Misaligned stores are not supported. */
5130 gcc_unreachable ();
5131
5132 return 2;
5133
5134 case vec_construct:
5135 /* This is a rough approximation assuming non-constant elements
5136 constructed into a vector via element insertion. FIXME:
5137 vec_construct is not granular enough for uniformly good
5138 decisions. If the initialization is a splat, this is
5139 cheaper than we estimate. Improve this someday. */
5140 elem_type = TREE_TYPE (vectype);
5141 /* 32-bit vectors loaded into registers are stored as double
5142 precision, so we need 2 permutes, 2 converts, and 1 merge
5143 to construct a vector of short floats from them. */
5144 if (SCALAR_FLOAT_TYPE_P (elem_type)
5145 && TYPE_PRECISION (elem_type) == 32)
5146 return 5;
5147 /* On POWER9, integer vector types are built up in GPRs and then
5148 use a direct move (2 cycles). For POWER8 this is even worse,
5149 as we need two direct moves and a merge, and the direct moves
5150 are five cycles. */
5151 else if (INTEGRAL_TYPE_P (elem_type))
5152 {
5153 if (TARGET_P9_VECTOR)
5154 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5155 else
5156 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
5157 }
5158 else
5159 /* V2DFmode doesn't need a direct move. */
5160 return 2;
5161
5162 default:
5163 gcc_unreachable ();
5164 }
5165 }
5166
5167 /* Implement targetm.vectorize.preferred_simd_mode. */
5168
5169 static machine_mode
5170 rs6000_preferred_simd_mode (scalar_mode mode)
5171 {
5172 opt_machine_mode vmode = mode_for_vector (mode, 16 / GET_MODE_SIZE (mode));
5173
5174 if (vmode.exists () && !VECTOR_MEM_NONE_P (vmode.require ()))
5175 return vmode.require ();
5176
5177 return word_mode;
5178 }
5179
5180 typedef struct _rs6000_cost_data
5181 {
5182 struct loop *loop_info;
5183 unsigned cost[3];
5184 } rs6000_cost_data;
5185
5186 /* Test for likely overcommitment of vector hardware resources. If a
5187 loop iteration is relatively large, and too large a percentage of
5188 instructions in the loop are vectorized, the cost model may not
5189 adequately reflect delays from unavailable vector resources.
5190 Penalize the loop body cost for this case. */
5191
5192 static void
5193 rs6000_density_test (rs6000_cost_data *data)
5194 {
5195 const int DENSITY_PCT_THRESHOLD = 85;
5196 const int DENSITY_SIZE_THRESHOLD = 70;
5197 const int DENSITY_PENALTY = 10;
5198 struct loop *loop = data->loop_info;
5199 basic_block *bbs = get_loop_body (loop);
5200 int nbbs = loop->num_nodes;
5201 loop_vec_info loop_vinfo = loop_vec_info_for_loop (data->loop_info);
5202 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
5203 int i, density_pct;
5204
5205 for (i = 0; i < nbbs; i++)
5206 {
5207 basic_block bb = bbs[i];
5208 gimple_stmt_iterator gsi;
5209
5210 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5211 {
5212 gimple *stmt = gsi_stmt (gsi);
5213 if (is_gimple_debug (stmt))
5214 continue;
5215
5216 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
5217
5218 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5219 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5220 not_vec_cost++;
5221 }
5222 }
5223
5224 free (bbs);
5225 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5226
5227 if (density_pct > DENSITY_PCT_THRESHOLD
5228 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
5229 {
5230 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
5231 if (dump_enabled_p ())
5232 dump_printf_loc (MSG_NOTE, vect_location,
5233 "density %d%%, cost %d exceeds threshold, penalizing "
5234 "loop body cost by %d%%", density_pct,
5235 vec_cost + not_vec_cost, DENSITY_PENALTY);
5236 }
5237 }
5238
5239 /* Implement targetm.vectorize.init_cost. */
5240
5241 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5242 instruction is needed by the vectorization. */
5243 static bool rs6000_vect_nonmem;
5244
5245 static void *
5246 rs6000_init_cost (struct loop *loop_info)
5247 {
5248 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5249 data->loop_info = loop_info;
5250 data->cost[vect_prologue] = 0;
5251 data->cost[vect_body] = 0;
5252 data->cost[vect_epilogue] = 0;
5253 rs6000_vect_nonmem = false;
5254 return data;
5255 }
5256
5257 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5258 For some statement, we would like to further fine-grain tweak the cost on
5259 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5260 information on statement operation codes etc. One typical case here is
5261 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5262 for scalar cost, but it should be priced more whatever transformed to either
5263 compare + branch or compare + isel instructions. */
5264
5265 static unsigned
5266 rs6000_adjust_vect_cost_per_stmt (enum vect_cost_for_stmt kind,
5267 struct _stmt_vec_info *stmt_info)
5268 {
5269 if (kind == scalar_stmt && stmt_info && stmt_info->stmt
5270 && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
5271 {
5272 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
5273 if (subcode == COND_EXPR)
5274 return 2;
5275 }
5276
5277 return 0;
5278 }
5279
5280 /* Implement targetm.vectorize.add_stmt_cost. */
5281
5282 static unsigned
5283 rs6000_add_stmt_cost (class vec_info *vinfo, void *data, int count,
5284 enum vect_cost_for_stmt kind,
5285 struct _stmt_vec_info *stmt_info, tree vectype,
5286 int misalign, enum vect_cost_model_location where)
5287 {
5288 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5289 unsigned retval = 0;
5290
5291 if (flag_vect_cost_model)
5292 {
5293 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5294 misalign);
5295 stmt_cost += rs6000_adjust_vect_cost_per_stmt (kind, stmt_info);
5296 /* Statements in an inner loop relative to the loop being
5297 vectorized are weighted more heavily. The value here is
5298 arbitrary and could potentially be improved with analysis. */
5299 if (where == vect_body && stmt_info
5300 && stmt_in_inner_loop_p (vinfo, stmt_info))
5301 count *= 50; /* FIXME. */
5302
5303 retval = (unsigned) (count * stmt_cost);
5304 cost_data->cost[where] += retval;
5305
5306 /* Check whether we're doing something other than just a copy loop.
5307 Not all such loops may be profitably vectorized; see
5308 rs6000_finish_cost. */
5309 if ((kind == vec_to_scalar || kind == vec_perm
5310 || kind == vec_promote_demote || kind == vec_construct
5311 || kind == scalar_to_vec)
5312 || (where == vect_body && kind == vector_stmt))
5313 rs6000_vect_nonmem = true;
5314 }
5315
5316 return retval;
5317 }
5318
5319 /* For some target specific vectorization cost which can't be handled per stmt,
5320 we check the requisite conditions and adjust the vectorization cost
5321 accordingly if satisfied. One typical example is to model shift cost for
5322 vector with length by counting number of required lengths under condition
5323 LOOP_VINFO_FULLY_WITH_LENGTH_P. */
5324
5325 static void
5326 rs6000_adjust_vect_cost_per_loop (rs6000_cost_data *data)
5327 {
5328 struct loop *loop = data->loop_info;
5329 gcc_assert (loop);
5330 loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop);
5331
5332 if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
5333 {
5334 rgroup_controls *rgc;
5335 unsigned int num_vectors_m1;
5336 unsigned int shift_cnt = 0;
5337 FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), num_vectors_m1, rgc)
5338 if (rgc->type)
5339 /* Each length needs one shift to fill into bits 0-7. */
5340 shift_cnt += num_vectors_m1 + 1;
5341
5342 rs6000_add_stmt_cost (loop_vinfo, (void *) data, shift_cnt, scalar_stmt,
5343 NULL, NULL_TREE, 0, vect_body);
5344 }
5345 }
5346
5347 /* Implement targetm.vectorize.finish_cost. */
5348
5349 static void
5350 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5351 unsigned *body_cost, unsigned *epilogue_cost)
5352 {
5353 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5354
5355 if (cost_data->loop_info)
5356 {
5357 rs6000_adjust_vect_cost_per_loop (cost_data);
5358 rs6000_density_test (cost_data);
5359 }
5360
5361 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5362 that require versioning for any reason. The vectorization is at
5363 best a wash inside the loop, and the versioning checks make
5364 profitability highly unlikely and potentially quite harmful. */
5365 if (cost_data->loop_info)
5366 {
5367 loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
5368 if (!rs6000_vect_nonmem
5369 && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
5370 && LOOP_REQUIRES_VERSIONING (vec_info))
5371 cost_data->cost[vect_body] += 10000;
5372 }
5373
5374 *prologue_cost = cost_data->cost[vect_prologue];
5375 *body_cost = cost_data->cost[vect_body];
5376 *epilogue_cost = cost_data->cost[vect_epilogue];
5377 }
5378
5379 /* Implement targetm.vectorize.destroy_cost_data. */
5380
5381 static void
5382 rs6000_destroy_cost_data (void *data)
5383 {
5384 free (data);
5385 }
5386
5387 /* Implement targetm.loop_unroll_adjust. */
5388
5389 static unsigned
5390 rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
5391 {
5392 if (unroll_only_small_loops)
5393 {
5394 /* TODO: These are hardcoded values right now. We probably should use
5395 a PARAM here. */
5396 if (loop->ninsns <= 6)
5397 return MIN (4, nunroll);
5398 if (loop->ninsns <= 10)
5399 return MIN (2, nunroll);
5400
5401 return 0;
5402 }
5403
5404 return nunroll;
5405 }
5406
5407 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5408 library with vectorized intrinsics. */
5409
5410 static tree
5411 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5412 tree type_in)
5413 {
5414 char name[32];
5415 const char *suffix = NULL;
5416 tree fntype, new_fndecl, bdecl = NULL_TREE;
5417 int n_args = 1;
5418 const char *bname;
5419 machine_mode el_mode, in_mode;
5420 int n, in_n;
5421
5422 /* Libmass is suitable for unsafe math only as it does not correctly support
5423 parts of IEEE with the required precision such as denormals. Only support
5424 it if we have VSX to use the simd d2 or f4 functions.
5425 XXX: Add variable length support. */
5426 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5427 return NULL_TREE;
5428
5429 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5430 n = TYPE_VECTOR_SUBPARTS (type_out);
5431 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5432 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5433 if (el_mode != in_mode
5434 || n != in_n)
5435 return NULL_TREE;
5436
5437 switch (fn)
5438 {
5439 CASE_CFN_ATAN2:
5440 CASE_CFN_HYPOT:
5441 CASE_CFN_POW:
5442 n_args = 2;
5443 gcc_fallthrough ();
5444
5445 CASE_CFN_ACOS:
5446 CASE_CFN_ACOSH:
5447 CASE_CFN_ASIN:
5448 CASE_CFN_ASINH:
5449 CASE_CFN_ATAN:
5450 CASE_CFN_ATANH:
5451 CASE_CFN_CBRT:
5452 CASE_CFN_COS:
5453 CASE_CFN_COSH:
5454 CASE_CFN_ERF:
5455 CASE_CFN_ERFC:
5456 CASE_CFN_EXP2:
5457 CASE_CFN_EXP:
5458 CASE_CFN_EXPM1:
5459 CASE_CFN_LGAMMA:
5460 CASE_CFN_LOG10:
5461 CASE_CFN_LOG1P:
5462 CASE_CFN_LOG2:
5463 CASE_CFN_LOG:
5464 CASE_CFN_SIN:
5465 CASE_CFN_SINH:
5466 CASE_CFN_SQRT:
5467 CASE_CFN_TAN:
5468 CASE_CFN_TANH:
5469 if (el_mode == DFmode && n == 2)
5470 {
5471 bdecl = mathfn_built_in (double_type_node, fn);
5472 suffix = "d2"; /* pow -> powd2 */
5473 }
5474 else if (el_mode == SFmode && n == 4)
5475 {
5476 bdecl = mathfn_built_in (float_type_node, fn);
5477 suffix = "4"; /* powf -> powf4 */
5478 }
5479 else
5480 return NULL_TREE;
5481 if (!bdecl)
5482 return NULL_TREE;
5483 break;
5484
5485 default:
5486 return NULL_TREE;
5487 }
5488
5489 gcc_assert (suffix != NULL);
5490 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5491 if (!bname)
5492 return NULL_TREE;
5493
5494 strcpy (name, bname + strlen ("__builtin_"));
5495 strcat (name, suffix);
5496
5497 if (n_args == 1)
5498 fntype = build_function_type_list (type_out, type_in, NULL);
5499 else if (n_args == 2)
5500 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5501 else
5502 gcc_unreachable ();
5503
5504 /* Build a function declaration for the vectorized function. */
5505 new_fndecl = build_decl (BUILTINS_LOCATION,
5506 FUNCTION_DECL, get_identifier (name), fntype);
5507 TREE_PUBLIC (new_fndecl) = 1;
5508 DECL_EXTERNAL (new_fndecl) = 1;
5509 DECL_IS_NOVOPS (new_fndecl) = 1;
5510 TREE_READONLY (new_fndecl) = 1;
5511
5512 return new_fndecl;
5513 }
5514
5515 /* Returns a function decl for a vectorized version of the builtin function
5516 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5517 if it is not available. */
5518
5519 static tree
5520 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5521 tree type_in)
5522 {
5523 machine_mode in_mode, out_mode;
5524 int in_n, out_n;
5525
5526 if (TARGET_DEBUG_BUILTIN)
5527 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5528 combined_fn_name (combined_fn (fn)),
5529 GET_MODE_NAME (TYPE_MODE (type_out)),
5530 GET_MODE_NAME (TYPE_MODE (type_in)));
5531
5532 if (TREE_CODE (type_out) != VECTOR_TYPE
5533 || TREE_CODE (type_in) != VECTOR_TYPE)
5534 return NULL_TREE;
5535
5536 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5537 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5538 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5539 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5540
5541 switch (fn)
5542 {
5543 CASE_CFN_COPYSIGN:
5544 if (VECTOR_UNIT_VSX_P (V2DFmode)
5545 && out_mode == DFmode && out_n == 2
5546 && in_mode == DFmode && in_n == 2)
5547 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5548 if (VECTOR_UNIT_VSX_P (V4SFmode)
5549 && out_mode == SFmode && out_n == 4
5550 && in_mode == SFmode && in_n == 4)
5551 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5552 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5553 && out_mode == SFmode && out_n == 4
5554 && in_mode == SFmode && in_n == 4)
5555 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5556 break;
5557 CASE_CFN_CEIL:
5558 if (VECTOR_UNIT_VSX_P (V2DFmode)
5559 && out_mode == DFmode && out_n == 2
5560 && in_mode == DFmode && in_n == 2)
5561 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5562 if (VECTOR_UNIT_VSX_P (V4SFmode)
5563 && out_mode == SFmode && out_n == 4
5564 && in_mode == SFmode && in_n == 4)
5565 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5566 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5567 && out_mode == SFmode && out_n == 4
5568 && in_mode == SFmode && in_n == 4)
5569 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5570 break;
5571 CASE_CFN_FLOOR:
5572 if (VECTOR_UNIT_VSX_P (V2DFmode)
5573 && out_mode == DFmode && out_n == 2
5574 && in_mode == DFmode && in_n == 2)
5575 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5576 if (VECTOR_UNIT_VSX_P (V4SFmode)
5577 && out_mode == SFmode && out_n == 4
5578 && in_mode == SFmode && in_n == 4)
5579 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5580 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5581 && out_mode == SFmode && out_n == 4
5582 && in_mode == SFmode && in_n == 4)
5583 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5584 break;
5585 CASE_CFN_FMA:
5586 if (VECTOR_UNIT_VSX_P (V2DFmode)
5587 && out_mode == DFmode && out_n == 2
5588 && in_mode == DFmode && in_n == 2)
5589 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5590 if (VECTOR_UNIT_VSX_P (V4SFmode)
5591 && out_mode == SFmode && out_n == 4
5592 && in_mode == SFmode && in_n == 4)
5593 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5594 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5595 && out_mode == SFmode && out_n == 4
5596 && in_mode == SFmode && in_n == 4)
5597 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5598 break;
5599 CASE_CFN_TRUNC:
5600 if (VECTOR_UNIT_VSX_P (V2DFmode)
5601 && out_mode == DFmode && out_n == 2
5602 && in_mode == DFmode && in_n == 2)
5603 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5604 if (VECTOR_UNIT_VSX_P (V4SFmode)
5605 && out_mode == SFmode && out_n == 4
5606 && in_mode == SFmode && in_n == 4)
5607 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5608 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5609 && out_mode == SFmode && out_n == 4
5610 && in_mode == SFmode && in_n == 4)
5611 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5612 break;
5613 CASE_CFN_NEARBYINT:
5614 if (VECTOR_UNIT_VSX_P (V2DFmode)
5615 && flag_unsafe_math_optimizations
5616 && out_mode == DFmode && out_n == 2
5617 && in_mode == DFmode && in_n == 2)
5618 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5619 if (VECTOR_UNIT_VSX_P (V4SFmode)
5620 && flag_unsafe_math_optimizations
5621 && out_mode == SFmode && out_n == 4
5622 && in_mode == SFmode && in_n == 4)
5623 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5624 break;
5625 CASE_CFN_RINT:
5626 if (VECTOR_UNIT_VSX_P (V2DFmode)
5627 && !flag_trapping_math
5628 && out_mode == DFmode && out_n == 2
5629 && in_mode == DFmode && in_n == 2)
5630 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5631 if (VECTOR_UNIT_VSX_P (V4SFmode)
5632 && !flag_trapping_math
5633 && out_mode == SFmode && out_n == 4
5634 && in_mode == SFmode && in_n == 4)
5635 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5636 break;
5637 default:
5638 break;
5639 }
5640
5641 /* Generate calls to libmass if appropriate. */
5642 if (rs6000_veclib_handler)
5643 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5644
5645 return NULL_TREE;
5646 }
5647
5648 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5649
5650 static tree
5651 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5652 tree type_in)
5653 {
5654 machine_mode in_mode, out_mode;
5655 int in_n, out_n;
5656
5657 if (TARGET_DEBUG_BUILTIN)
5658 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5659 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5660 GET_MODE_NAME (TYPE_MODE (type_out)),
5661 GET_MODE_NAME (TYPE_MODE (type_in)));
5662
5663 if (TREE_CODE (type_out) != VECTOR_TYPE
5664 || TREE_CODE (type_in) != VECTOR_TYPE)
5665 return NULL_TREE;
5666
5667 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5668 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5669 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5670 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5671
5672 enum rs6000_builtins fn
5673 = (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
5674 switch (fn)
5675 {
5676 case RS6000_BUILTIN_RSQRTF:
5677 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5678 && out_mode == SFmode && out_n == 4
5679 && in_mode == SFmode && in_n == 4)
5680 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5681 break;
5682 case RS6000_BUILTIN_RSQRT:
5683 if (VECTOR_UNIT_VSX_P (V2DFmode)
5684 && out_mode == DFmode && out_n == 2
5685 && in_mode == DFmode && in_n == 2)
5686 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5687 break;
5688 case RS6000_BUILTIN_RECIPF:
5689 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5690 && out_mode == SFmode && out_n == 4
5691 && in_mode == SFmode && in_n == 4)
5692 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5693 break;
5694 case RS6000_BUILTIN_RECIP:
5695 if (VECTOR_UNIT_VSX_P (V2DFmode)
5696 && out_mode == DFmode && out_n == 2
5697 && in_mode == DFmode && in_n == 2)
5698 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5699 break;
5700 default:
5701 break;
5702 }
5703 return NULL_TREE;
5704 }
5705 \f
5706 /* Default CPU string for rs6000*_file_start functions. */
5707 static const char *rs6000_default_cpu;
5708
5709 #ifdef USING_ELFOS_H
5710 const char *rs6000_machine;
5711
5712 const char *
5713 rs6000_machine_from_flags (void)
5714 {
5715 HOST_WIDE_INT flags = rs6000_isa_flags;
5716
5717 /* Disable the flags that should never influence the .machine selection. */
5718 flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT);
5719
5720 if ((flags & (ISA_3_1_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER)) != 0)
5721 return "power10";
5722 if ((flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0)
5723 return "power9";
5724 if ((flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0)
5725 return "power8";
5726 if ((flags & (ISA_2_6_MASKS_SERVER & ~ISA_2_5_MASKS_SERVER)) != 0)
5727 return "power7";
5728 if ((flags & (ISA_2_5_MASKS_SERVER & ~ISA_2_4_MASKS)) != 0)
5729 return "power6";
5730 if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0)
5731 return "power5";
5732 if ((flags & ISA_2_1_MASKS) != 0)
5733 return "power4";
5734 if ((flags & OPTION_MASK_POWERPC64) != 0)
5735 return "ppc64";
5736 return "ppc";
5737 }
5738
5739 void
5740 emit_asm_machine (void)
5741 {
5742 fprintf (asm_out_file, "\t.machine %s\n", rs6000_machine);
5743 }
5744 #endif
5745
5746 /* Do anything needed at the start of the asm file. */
5747
5748 static void
5749 rs6000_file_start (void)
5750 {
5751 char buffer[80];
5752 const char *start = buffer;
5753 FILE *file = asm_out_file;
5754
5755 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5756
5757 default_file_start ();
5758
5759 if (flag_verbose_asm)
5760 {
5761 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5762
5763 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5764 {
5765 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5766 start = "";
5767 }
5768
5769 if (global_options_set.x_rs6000_cpu_index)
5770 {
5771 fprintf (file, "%s -mcpu=%s", start,
5772 processor_target_table[rs6000_cpu_index].name);
5773 start = "";
5774 }
5775
5776 if (global_options_set.x_rs6000_tune_index)
5777 {
5778 fprintf (file, "%s -mtune=%s", start,
5779 processor_target_table[rs6000_tune_index].name);
5780 start = "";
5781 }
5782
5783 if (PPC405_ERRATUM77)
5784 {
5785 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5786 start = "";
5787 }
5788
5789 #ifdef USING_ELFOS_H
5790 switch (rs6000_sdata)
5791 {
5792 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5793 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5794 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5795 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5796 }
5797
5798 if (rs6000_sdata && g_switch_value)
5799 {
5800 fprintf (file, "%s -G %d", start,
5801 g_switch_value);
5802 start = "";
5803 }
5804 #endif
5805
5806 if (*start == '\0')
5807 putc ('\n', file);
5808 }
5809
5810 #ifdef USING_ELFOS_H
5811 rs6000_machine = rs6000_machine_from_flags ();
5812 emit_asm_machine ();
5813 #endif
5814
5815 if (DEFAULT_ABI == ABI_ELFv2)
5816 fprintf (file, "\t.abiversion 2\n");
5817 }
5818
5819 \f
5820 /* Return nonzero if this function is known to have a null epilogue. */
5821
5822 int
5823 direct_return (void)
5824 {
5825 if (reload_completed)
5826 {
5827 rs6000_stack_t *info = rs6000_stack_info ();
5828
5829 if (info->first_gp_reg_save == 32
5830 && info->first_fp_reg_save == 64
5831 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5832 && ! info->lr_save_p
5833 && ! info->cr_save_p
5834 && info->vrsave_size == 0
5835 && ! info->push_p)
5836 return 1;
5837 }
5838
5839 return 0;
5840 }
5841
5842 /* Helper for num_insns_constant. Calculate number of instructions to
5843 load VALUE to a single gpr using combinations of addi, addis, ori,
5844 oris, sldi and rldimi instructions. */
5845
5846 static int
5847 num_insns_constant_gpr (HOST_WIDE_INT value)
5848 {
5849 /* signed constant loadable with addi */
5850 if (SIGNED_INTEGER_16BIT_P (value))
5851 return 1;
5852
5853 /* constant loadable with addis */
5854 else if ((value & 0xffff) == 0
5855 && (value >> 31 == -1 || value >> 31 == 0))
5856 return 1;
5857
5858 /* PADDI can support up to 34 bit signed integers. */
5859 else if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (value))
5860 return 1;
5861
5862 else if (TARGET_POWERPC64)
5863 {
5864 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5865 HOST_WIDE_INT high = value >> 31;
5866
5867 if (high == 0 || high == -1)
5868 return 2;
5869
5870 high >>= 1;
5871
5872 if (low == 0 || low == high)
5873 return num_insns_constant_gpr (high) + 1;
5874 else if (high == 0)
5875 return num_insns_constant_gpr (low) + 1;
5876 else
5877 return (num_insns_constant_gpr (high)
5878 + num_insns_constant_gpr (low) + 1);
5879 }
5880
5881 else
5882 return 2;
5883 }
5884
5885 /* Helper for num_insns_constant. Allow constants formed by the
5886 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
5887 and handle modes that require multiple gprs. */
5888
5889 static int
5890 num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode)
5891 {
5892 int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5893 int total = 0;
5894 while (nregs-- > 0)
5895 {
5896 HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD);
5897 int insns = num_insns_constant_gpr (low);
5898 if (insns > 2
5899 /* We won't get more than 2 from num_insns_constant_gpr
5900 except when TARGET_POWERPC64 and mode is DImode or
5901 wider, so the register mode must be DImode. */
5902 && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
5903 insns = 2;
5904 total += insns;
5905 /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
5906 it all at once would be UB. */
5907 value >>= (BITS_PER_WORD - 1);
5908 value >>= 1;
5909 }
5910 return total;
5911 }
5912
5913 /* Return the number of instructions it takes to form a constant in as
5914 many gprs are needed for MODE. */
5915
5916 int
5917 num_insns_constant (rtx op, machine_mode mode)
5918 {
5919 HOST_WIDE_INT val;
5920
5921 switch (GET_CODE (op))
5922 {
5923 case CONST_INT:
5924 val = INTVAL (op);
5925 break;
5926
5927 case CONST_WIDE_INT:
5928 {
5929 int insns = 0;
5930 for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5931 insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i),
5932 DImode);
5933 return insns;
5934 }
5935
5936 case CONST_DOUBLE:
5937 {
5938 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
5939
5940 if (mode == SFmode || mode == SDmode)
5941 {
5942 long l;
5943
5944 if (mode == SDmode)
5945 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l);
5946 else
5947 REAL_VALUE_TO_TARGET_SINGLE (*rv, l);
5948 /* See the first define_split in rs6000.md handling a
5949 const_double_operand. */
5950 val = l;
5951 mode = SImode;
5952 }
5953 else if (mode == DFmode || mode == DDmode)
5954 {
5955 long l[2];
5956
5957 if (mode == DDmode)
5958 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l);
5959 else
5960 REAL_VALUE_TO_TARGET_DOUBLE (*rv, l);
5961
5962 /* See the second (32-bit) and third (64-bit) define_split
5963 in rs6000.md handling a const_double_operand. */
5964 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32;
5965 val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL;
5966 mode = DImode;
5967 }
5968 else if (mode == TFmode || mode == TDmode
5969 || mode == KFmode || mode == IFmode)
5970 {
5971 long l[4];
5972 int insns;
5973
5974 if (mode == TDmode)
5975 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l);
5976 else
5977 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l);
5978
5979 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32;
5980 val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL;
5981 insns = num_insns_constant_multi (val, DImode);
5982 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32;
5983 val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL;
5984 insns += num_insns_constant_multi (val, DImode);
5985 return insns;
5986 }
5987 else
5988 gcc_unreachable ();
5989 }
5990 break;
5991
5992 default:
5993 gcc_unreachable ();
5994 }
5995
5996 return num_insns_constant_multi (val, mode);
5997 }
5998
5999 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6000 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6001 corresponding element of the vector, but for V4SFmode, the
6002 corresponding "float" is interpreted as an SImode integer. */
6003
6004 HOST_WIDE_INT
6005 const_vector_elt_as_int (rtx op, unsigned int elt)
6006 {
6007 rtx tmp;
6008
6009 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6010 gcc_assert (GET_MODE (op) != V2DImode
6011 && GET_MODE (op) != V2DFmode);
6012
6013 tmp = CONST_VECTOR_ELT (op, elt);
6014 if (GET_MODE (op) == V4SFmode)
6015 tmp = gen_lowpart (SImode, tmp);
6016 return INTVAL (tmp);
6017 }
6018
6019 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6020 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6021 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6022 all items are set to the same value and contain COPIES replicas of the
6023 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6024 operand and the others are set to the value of the operand's msb. */
6025
6026 static bool
6027 vspltis_constant (rtx op, unsigned step, unsigned copies)
6028 {
6029 machine_mode mode = GET_MODE (op);
6030 machine_mode inner = GET_MODE_INNER (mode);
6031
6032 unsigned i;
6033 unsigned nunits;
6034 unsigned bitsize;
6035 unsigned mask;
6036
6037 HOST_WIDE_INT val;
6038 HOST_WIDE_INT splat_val;
6039 HOST_WIDE_INT msb_val;
6040
6041 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6042 return false;
6043
6044 nunits = GET_MODE_NUNITS (mode);
6045 bitsize = GET_MODE_BITSIZE (inner);
6046 mask = GET_MODE_MASK (inner);
6047
6048 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6049 splat_val = val;
6050 msb_val = val >= 0 ? 0 : -1;
6051
6052 /* Construct the value to be splatted, if possible. If not, return 0. */
6053 for (i = 2; i <= copies; i *= 2)
6054 {
6055 HOST_WIDE_INT small_val;
6056 bitsize /= 2;
6057 small_val = splat_val >> bitsize;
6058 mask >>= bitsize;
6059 if (splat_val != ((HOST_WIDE_INT)
6060 ((unsigned HOST_WIDE_INT) small_val << bitsize)
6061 | (small_val & mask)))
6062 return false;
6063 splat_val = small_val;
6064 }
6065
6066 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6067 if (EASY_VECTOR_15 (splat_val))
6068 ;
6069
6070 /* Also check if we can splat, and then add the result to itself. Do so if
6071 the value is positive, of if the splat instruction is using OP's mode;
6072 for splat_val < 0, the splat and the add should use the same mode. */
6073 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6074 && (splat_val >= 0 || (step == 1 && copies == 1)))
6075 ;
6076
6077 /* Also check if are loading up the most significant bit which can be done by
6078 loading up -1 and shifting the value left by -1. */
6079 else if (EASY_VECTOR_MSB (splat_val, inner))
6080 ;
6081
6082 else
6083 return false;
6084
6085 /* Check if VAL is present in every STEP-th element, and the
6086 other elements are filled with its most significant bit. */
6087 for (i = 1; i < nunits; ++i)
6088 {
6089 HOST_WIDE_INT desired_val;
6090 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6091 if ((i & (step - 1)) == 0)
6092 desired_val = val;
6093 else
6094 desired_val = msb_val;
6095
6096 if (desired_val != const_vector_elt_as_int (op, elt))
6097 return false;
6098 }
6099
6100 return true;
6101 }
6102
6103 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6104 instruction, filling in the bottom elements with 0 or -1.
6105
6106 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6107 for the number of zeroes to shift in, or negative for the number of 0xff
6108 bytes to shift in.
6109
6110 OP is a CONST_VECTOR. */
6111
6112 int
6113 vspltis_shifted (rtx op)
6114 {
6115 machine_mode mode = GET_MODE (op);
6116 machine_mode inner = GET_MODE_INNER (mode);
6117
6118 unsigned i, j;
6119 unsigned nunits;
6120 unsigned mask;
6121
6122 HOST_WIDE_INT val;
6123
6124 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6125 return false;
6126
6127 /* We need to create pseudo registers to do the shift, so don't recognize
6128 shift vector constants after reload. */
6129 if (!can_create_pseudo_p ())
6130 return false;
6131
6132 nunits = GET_MODE_NUNITS (mode);
6133 mask = GET_MODE_MASK (inner);
6134
6135 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6136
6137 /* Check if the value can really be the operand of a vspltis[bhw]. */
6138 if (EASY_VECTOR_15 (val))
6139 ;
6140
6141 /* Also check if we are loading up the most significant bit which can be done
6142 by loading up -1 and shifting the value left by -1. */
6143 else if (EASY_VECTOR_MSB (val, inner))
6144 ;
6145
6146 else
6147 return 0;
6148
6149 /* Check if VAL is present in every STEP-th element until we find elements
6150 that are 0 or all 1 bits. */
6151 for (i = 1; i < nunits; ++i)
6152 {
6153 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6154 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6155
6156 /* If the value isn't the splat value, check for the remaining elements
6157 being 0/-1. */
6158 if (val != elt_val)
6159 {
6160 if (elt_val == 0)
6161 {
6162 for (j = i+1; j < nunits; ++j)
6163 {
6164 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6165 if (const_vector_elt_as_int (op, elt2) != 0)
6166 return 0;
6167 }
6168
6169 return (nunits - i) * GET_MODE_SIZE (inner);
6170 }
6171
6172 else if ((elt_val & mask) == mask)
6173 {
6174 for (j = i+1; j < nunits; ++j)
6175 {
6176 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6177 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6178 return 0;
6179 }
6180
6181 return -((nunits - i) * GET_MODE_SIZE (inner));
6182 }
6183
6184 else
6185 return 0;
6186 }
6187 }
6188
6189 /* If all elements are equal, we don't need to do VLSDOI. */
6190 return 0;
6191 }
6192
6193
6194 /* Return true if OP is of the given MODE and can be synthesized
6195 with a vspltisb, vspltish or vspltisw. */
6196
6197 bool
6198 easy_altivec_constant (rtx op, machine_mode mode)
6199 {
6200 unsigned step, copies;
6201
6202 if (mode == VOIDmode)
6203 mode = GET_MODE (op);
6204 else if (mode != GET_MODE (op))
6205 return false;
6206
6207 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6208 constants. */
6209 if (mode == V2DFmode)
6210 return zero_constant (op, mode);
6211
6212 else if (mode == V2DImode)
6213 {
6214 if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
6215 || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
6216 return false;
6217
6218 if (zero_constant (op, mode))
6219 return true;
6220
6221 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6222 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6223 return true;
6224
6225 return false;
6226 }
6227
6228 /* V1TImode is a special container for TImode. Ignore for now. */
6229 else if (mode == V1TImode)
6230 return false;
6231
6232 /* Start with a vspltisw. */
6233 step = GET_MODE_NUNITS (mode) / 4;
6234 copies = 1;
6235
6236 if (vspltis_constant (op, step, copies))
6237 return true;
6238
6239 /* Then try with a vspltish. */
6240 if (step == 1)
6241 copies <<= 1;
6242 else
6243 step >>= 1;
6244
6245 if (vspltis_constant (op, step, copies))
6246 return true;
6247
6248 /* And finally a vspltisb. */
6249 if (step == 1)
6250 copies <<= 1;
6251 else
6252 step >>= 1;
6253
6254 if (vspltis_constant (op, step, copies))
6255 return true;
6256
6257 if (vspltis_shifted (op) != 0)
6258 return true;
6259
6260 return false;
6261 }
6262
6263 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6264 result is OP. Abort if it is not possible. */
6265
6266 rtx
6267 gen_easy_altivec_constant (rtx op)
6268 {
6269 machine_mode mode = GET_MODE (op);
6270 int nunits = GET_MODE_NUNITS (mode);
6271 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6272 unsigned step = nunits / 4;
6273 unsigned copies = 1;
6274
6275 /* Start with a vspltisw. */
6276 if (vspltis_constant (op, step, copies))
6277 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6278
6279 /* Then try with a vspltish. */
6280 if (step == 1)
6281 copies <<= 1;
6282 else
6283 step >>= 1;
6284
6285 if (vspltis_constant (op, step, copies))
6286 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6287
6288 /* And finally a vspltisb. */
6289 if (step == 1)
6290 copies <<= 1;
6291 else
6292 step >>= 1;
6293
6294 if (vspltis_constant (op, step, copies))
6295 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6296
6297 gcc_unreachable ();
6298 }
6299
6300 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6301 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6302
6303 Return the number of instructions needed (1 or 2) into the address pointed
6304 via NUM_INSNS_PTR.
6305
6306 Return the constant that is being split via CONSTANT_PTR. */
6307
6308 bool
6309 xxspltib_constant_p (rtx op,
6310 machine_mode mode,
6311 int *num_insns_ptr,
6312 int *constant_ptr)
6313 {
6314 size_t nunits = GET_MODE_NUNITS (mode);
6315 size_t i;
6316 HOST_WIDE_INT value;
6317 rtx element;
6318
6319 /* Set the returned values to out of bound values. */
6320 *num_insns_ptr = -1;
6321 *constant_ptr = 256;
6322
6323 if (!TARGET_P9_VECTOR)
6324 return false;
6325
6326 if (mode == VOIDmode)
6327 mode = GET_MODE (op);
6328
6329 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6330 return false;
6331
6332 /* Handle (vec_duplicate <constant>). */
6333 if (GET_CODE (op) == VEC_DUPLICATE)
6334 {
6335 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6336 && mode != V2DImode)
6337 return false;
6338
6339 element = XEXP (op, 0);
6340 if (!CONST_INT_P (element))
6341 return false;
6342
6343 value = INTVAL (element);
6344 if (!IN_RANGE (value, -128, 127))
6345 return false;
6346 }
6347
6348 /* Handle (const_vector [...]). */
6349 else if (GET_CODE (op) == CONST_VECTOR)
6350 {
6351 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6352 && mode != V2DImode)
6353 return false;
6354
6355 element = CONST_VECTOR_ELT (op, 0);
6356 if (!CONST_INT_P (element))
6357 return false;
6358
6359 value = INTVAL (element);
6360 if (!IN_RANGE (value, -128, 127))
6361 return false;
6362
6363 for (i = 1; i < nunits; i++)
6364 {
6365 element = CONST_VECTOR_ELT (op, i);
6366 if (!CONST_INT_P (element))
6367 return false;
6368
6369 if (value != INTVAL (element))
6370 return false;
6371 }
6372 }
6373
6374 /* Handle integer constants being loaded into the upper part of the VSX
6375 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6376 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6377 else if (CONST_INT_P (op))
6378 {
6379 if (!SCALAR_INT_MODE_P (mode))
6380 return false;
6381
6382 value = INTVAL (op);
6383 if (!IN_RANGE (value, -128, 127))
6384 return false;
6385
6386 if (!IN_RANGE (value, -1, 0))
6387 {
6388 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6389 return false;
6390
6391 if (EASY_VECTOR_15 (value))
6392 return false;
6393 }
6394 }
6395
6396 else
6397 return false;
6398
6399 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6400 sign extend. Special case 0/-1 to allow getting any VSX register instead
6401 of an Altivec register. */
6402 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6403 && EASY_VECTOR_15 (value))
6404 return false;
6405
6406 /* Return # of instructions and the constant byte for XXSPLTIB. */
6407 if (mode == V16QImode)
6408 *num_insns_ptr = 1;
6409
6410 else if (IN_RANGE (value, -1, 0))
6411 *num_insns_ptr = 1;
6412
6413 else
6414 *num_insns_ptr = 2;
6415
6416 *constant_ptr = (int) value;
6417 return true;
6418 }
6419
6420 const char *
6421 output_vec_const_move (rtx *operands)
6422 {
6423 int shift;
6424 machine_mode mode;
6425 rtx dest, vec;
6426
6427 dest = operands[0];
6428 vec = operands[1];
6429 mode = GET_MODE (dest);
6430
6431 if (TARGET_VSX)
6432 {
6433 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6434 int xxspltib_value = 256;
6435 int num_insns = -1;
6436
6437 if (zero_constant (vec, mode))
6438 {
6439 if (TARGET_P9_VECTOR)
6440 return "xxspltib %x0,0";
6441
6442 else if (dest_vmx_p)
6443 return "vspltisw %0,0";
6444
6445 else
6446 return "xxlxor %x0,%x0,%x0";
6447 }
6448
6449 if (all_ones_constant (vec, mode))
6450 {
6451 if (TARGET_P9_VECTOR)
6452 return "xxspltib %x0,255";
6453
6454 else if (dest_vmx_p)
6455 return "vspltisw %0,-1";
6456
6457 else if (TARGET_P8_VECTOR)
6458 return "xxlorc %x0,%x0,%x0";
6459
6460 else
6461 gcc_unreachable ();
6462 }
6463
6464 if (TARGET_P9_VECTOR
6465 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6466 {
6467 if (num_insns == 1)
6468 {
6469 operands[2] = GEN_INT (xxspltib_value & 0xff);
6470 return "xxspltib %x0,%2";
6471 }
6472
6473 return "#";
6474 }
6475 }
6476
6477 if (TARGET_ALTIVEC)
6478 {
6479 rtx splat_vec;
6480
6481 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6482 if (zero_constant (vec, mode))
6483 return "vspltisw %0,0";
6484
6485 if (all_ones_constant (vec, mode))
6486 return "vspltisw %0,-1";
6487
6488 /* Do we need to construct a value using VSLDOI? */
6489 shift = vspltis_shifted (vec);
6490 if (shift != 0)
6491 return "#";
6492
6493 splat_vec = gen_easy_altivec_constant (vec);
6494 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6495 operands[1] = XEXP (splat_vec, 0);
6496 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6497 return "#";
6498
6499 switch (GET_MODE (splat_vec))
6500 {
6501 case E_V4SImode:
6502 return "vspltisw %0,%1";
6503
6504 case E_V8HImode:
6505 return "vspltish %0,%1";
6506
6507 case E_V16QImode:
6508 return "vspltisb %0,%1";
6509
6510 default:
6511 gcc_unreachable ();
6512 }
6513 }
6514
6515 gcc_unreachable ();
6516 }
6517
6518 /* Initialize vector TARGET to VALS. */
6519
6520 void
6521 rs6000_expand_vector_init (rtx target, rtx vals)
6522 {
6523 machine_mode mode = GET_MODE (target);
6524 machine_mode inner_mode = GET_MODE_INNER (mode);
6525 unsigned int n_elts = GET_MODE_NUNITS (mode);
6526 int n_var = 0, one_var = -1;
6527 bool all_same = true, all_const_zero = true;
6528 rtx x, mem;
6529 unsigned int i;
6530
6531 for (i = 0; i < n_elts; ++i)
6532 {
6533 x = XVECEXP (vals, 0, i);
6534 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6535 ++n_var, one_var = i;
6536 else if (x != CONST0_RTX (inner_mode))
6537 all_const_zero = false;
6538
6539 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6540 all_same = false;
6541 }
6542
6543 if (n_var == 0)
6544 {
6545 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6546 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6547 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6548 {
6549 /* Zero register. */
6550 emit_move_insn (target, CONST0_RTX (mode));
6551 return;
6552 }
6553 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6554 {
6555 /* Splat immediate. */
6556 emit_insn (gen_rtx_SET (target, const_vec));
6557 return;
6558 }
6559 else
6560 {
6561 /* Load from constant pool. */
6562 emit_move_insn (target, const_vec);
6563 return;
6564 }
6565 }
6566
6567 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6568 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6569 {
6570 rtx op[2];
6571 size_t i;
6572 size_t num_elements = all_same ? 1 : 2;
6573 for (i = 0; i < num_elements; i++)
6574 {
6575 op[i] = XVECEXP (vals, 0, i);
6576 /* Just in case there is a SUBREG with a smaller mode, do a
6577 conversion. */
6578 if (GET_MODE (op[i]) != inner_mode)
6579 {
6580 rtx tmp = gen_reg_rtx (inner_mode);
6581 convert_move (tmp, op[i], 0);
6582 op[i] = tmp;
6583 }
6584 /* Allow load with splat double word. */
6585 else if (MEM_P (op[i]))
6586 {
6587 if (!all_same)
6588 op[i] = force_reg (inner_mode, op[i]);
6589 }
6590 else if (!REG_P (op[i]))
6591 op[i] = force_reg (inner_mode, op[i]);
6592 }
6593
6594 if (all_same)
6595 {
6596 if (mode == V2DFmode)
6597 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6598 else
6599 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6600 }
6601 else
6602 {
6603 if (mode == V2DFmode)
6604 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6605 else
6606 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6607 }
6608 return;
6609 }
6610
6611 /* Special case initializing vector int if we are on 64-bit systems with
6612 direct move or we have the ISA 3.0 instructions. */
6613 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6614 && TARGET_DIRECT_MOVE_64BIT)
6615 {
6616 if (all_same)
6617 {
6618 rtx element0 = XVECEXP (vals, 0, 0);
6619 if (MEM_P (element0))
6620 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6621 else
6622 element0 = force_reg (SImode, element0);
6623
6624 if (TARGET_P9_VECTOR)
6625 emit_insn (gen_vsx_splat_v4si (target, element0));
6626 else
6627 {
6628 rtx tmp = gen_reg_rtx (DImode);
6629 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6630 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6631 }
6632 return;
6633 }
6634 else
6635 {
6636 rtx elements[4];
6637 size_t i;
6638
6639 for (i = 0; i < 4; i++)
6640 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
6641
6642 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6643 elements[2], elements[3]));
6644 return;
6645 }
6646 }
6647
6648 /* With single precision floating point on VSX, know that internally single
6649 precision is actually represented as a double, and either make 2 V2DF
6650 vectors, and convert these vectors to single precision, or do one
6651 conversion, and splat the result to the other elements. */
6652 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6653 {
6654 if (all_same)
6655 {
6656 rtx element0 = XVECEXP (vals, 0, 0);
6657
6658 if (TARGET_P9_VECTOR)
6659 {
6660 if (MEM_P (element0))
6661 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6662
6663 emit_insn (gen_vsx_splat_v4sf (target, element0));
6664 }
6665
6666 else
6667 {
6668 rtx freg = gen_reg_rtx (V4SFmode);
6669 rtx sreg = force_reg (SFmode, element0);
6670 rtx cvt = (TARGET_XSCVDPSPN
6671 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6672 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6673
6674 emit_insn (cvt);
6675 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6676 const0_rtx));
6677 }
6678 }
6679 else
6680 {
6681 if (TARGET_P8_VECTOR && TARGET_POWERPC64)
6682 {
6683 rtx tmp_sf[4];
6684 rtx tmp_si[4];
6685 rtx tmp_di[4];
6686 rtx mrg_di[4];
6687 for (i = 0; i < 4; i++)
6688 {
6689 tmp_si[i] = gen_reg_rtx (SImode);
6690 tmp_di[i] = gen_reg_rtx (DImode);
6691 mrg_di[i] = gen_reg_rtx (DImode);
6692 tmp_sf[i] = force_reg (SFmode, XVECEXP (vals, 0, i));
6693 emit_insn (gen_movsi_from_sf (tmp_si[i], tmp_sf[i]));
6694 emit_insn (gen_zero_extendsidi2 (tmp_di[i], tmp_si[i]));
6695 }
6696
6697 if (!BYTES_BIG_ENDIAN)
6698 {
6699 std::swap (tmp_di[0], tmp_di[1]);
6700 std::swap (tmp_di[2], tmp_di[3]);
6701 }
6702
6703 emit_insn (gen_ashldi3 (mrg_di[0], tmp_di[0], GEN_INT (32)));
6704 emit_insn (gen_iordi3 (mrg_di[1], mrg_di[0], tmp_di[1]));
6705 emit_insn (gen_ashldi3 (mrg_di[2], tmp_di[2], GEN_INT (32)));
6706 emit_insn (gen_iordi3 (mrg_di[3], mrg_di[2], tmp_di[3]));
6707
6708 rtx tmp_v2di = gen_reg_rtx (V2DImode);
6709 emit_insn (gen_vsx_concat_v2di (tmp_v2di, mrg_di[1], mrg_di[3]));
6710 emit_move_insn (target, gen_lowpart (V4SFmode, tmp_v2di));
6711 }
6712 else
6713 {
6714 rtx dbl_even = gen_reg_rtx (V2DFmode);
6715 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6716 rtx flt_even = gen_reg_rtx (V4SFmode);
6717 rtx flt_odd = gen_reg_rtx (V4SFmode);
6718 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6719 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6720 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6721 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6722
6723 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6724 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6725 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6726 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6727 rs6000_expand_extract_even (target, flt_even, flt_odd);
6728 }
6729 }
6730 return;
6731 }
6732
6733 /* Special case initializing vector short/char that are splats if we are on
6734 64-bit systems with direct move. */
6735 if (all_same && TARGET_DIRECT_MOVE_64BIT
6736 && (mode == V16QImode || mode == V8HImode))
6737 {
6738 rtx op0 = XVECEXP (vals, 0, 0);
6739 rtx di_tmp = gen_reg_rtx (DImode);
6740
6741 if (!REG_P (op0))
6742 op0 = force_reg (GET_MODE_INNER (mode), op0);
6743
6744 if (mode == V16QImode)
6745 {
6746 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
6747 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
6748 return;
6749 }
6750
6751 if (mode == V8HImode)
6752 {
6753 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
6754 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
6755 return;
6756 }
6757 }
6758
6759 /* Store value to stack temp. Load vector element. Splat. However, splat
6760 of 64-bit items is not supported on Altivec. */
6761 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6762 {
6763 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6764 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6765 XVECEXP (vals, 0, 0));
6766 x = gen_rtx_UNSPEC (VOIDmode,
6767 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6768 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6769 gen_rtvec (2,
6770 gen_rtx_SET (target, mem),
6771 x)));
6772 x = gen_rtx_VEC_SELECT (inner_mode, target,
6773 gen_rtx_PARALLEL (VOIDmode,
6774 gen_rtvec (1, const0_rtx)));
6775 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6776 return;
6777 }
6778
6779 /* One field is non-constant. Load constant then overwrite
6780 varying field. */
6781 if (n_var == 1)
6782 {
6783 rtx copy = copy_rtx (vals);
6784
6785 /* Load constant part of vector, substitute neighboring value for
6786 varying element. */
6787 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6788 rs6000_expand_vector_init (target, copy);
6789
6790 /* Insert variable. */
6791 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var),
6792 GEN_INT (one_var));
6793 return;
6794 }
6795
6796 if (TARGET_DIRECT_MOVE && (mode == V16QImode || mode == V8HImode))
6797 {
6798 rtx op[16];
6799 /* Force the values into word_mode registers. */
6800 for (i = 0; i < n_elts; i++)
6801 {
6802 rtx tmp = force_reg (GET_MODE_INNER (mode), XVECEXP (vals, 0, i));
6803 if (TARGET_POWERPC64)
6804 {
6805 op[i] = gen_reg_rtx (DImode);
6806 emit_insn (gen_zero_extendqidi2 (op[i], tmp));
6807 }
6808 else
6809 {
6810 op[i] = gen_reg_rtx (SImode);
6811 emit_insn (gen_zero_extendqisi2 (op[i], tmp));
6812 }
6813 }
6814
6815 /* Take unsigned char big endianness on 64bit as example for below
6816 construction, the input values are: A, B, C, D, ..., O, P. */
6817
6818 if (TARGET_DIRECT_MOVE_128)
6819 {
6820 /* Move to VSX register with vec_concat, each has 2 values.
6821 eg: vr1[0] = { xxxxxxxA, xxxxxxxB };
6822 vr1[1] = { xxxxxxxC, xxxxxxxD };
6823 ...
6824 vr1[7] = { xxxxxxxO, xxxxxxxP }; */
6825 rtx vr1[8];
6826 for (i = 0; i < n_elts / 2; i++)
6827 {
6828 vr1[i] = gen_reg_rtx (V2DImode);
6829 emit_insn (gen_vsx_concat_v2di (vr1[i], op[i * 2],
6830 op[i * 2 + 1]));
6831 }
6832
6833 /* Pack vectors with 2 values into vectors with 4 values.
6834 eg: vr2[0] = { xxxAxxxB, xxxCxxxD };
6835 vr2[1] = { xxxExxxF, xxxGxxxH };
6836 vr2[1] = { xxxIxxxJ, xxxKxxxL };
6837 vr2[3] = { xxxMxxxN, xxxOxxxP }; */
6838 rtx vr2[4];
6839 for (i = 0; i < n_elts / 4; i++)
6840 {
6841 vr2[i] = gen_reg_rtx (V4SImode);
6842 emit_insn (gen_altivec_vpkudum (vr2[i], vr1[i * 2],
6843 vr1[i * 2 + 1]));
6844 }
6845
6846 /* Pack vectors with 4 values into vectors with 8 values.
6847 eg: vr3[0] = { xAxBxCxD, xExFxGxH };
6848 vr3[1] = { xIxJxKxL, xMxNxOxP }; */
6849 rtx vr3[2];
6850 for (i = 0; i < n_elts / 8; i++)
6851 {
6852 vr3[i] = gen_reg_rtx (V8HImode);
6853 emit_insn (gen_altivec_vpkuwum (vr3[i], vr2[i * 2],
6854 vr2[i * 2 + 1]));
6855 }
6856
6857 /* If it's V8HImode, it's done and return it. */
6858 if (mode == V8HImode)
6859 {
6860 emit_insn (gen_rtx_SET (target, vr3[0]));
6861 return;
6862 }
6863
6864 /* Pack vectors with 8 values into 16 values. */
6865 rtx res = gen_reg_rtx (V16QImode);
6866 emit_insn (gen_altivec_vpkuhum (res, vr3[0], vr3[1]));
6867 emit_insn (gen_rtx_SET (target, res));
6868 }
6869 else
6870 {
6871 rtx (*merge_v16qi) (rtx, rtx, rtx) = NULL;
6872 rtx (*merge_v8hi) (rtx, rtx, rtx) = NULL;
6873 rtx (*merge_v4si) (rtx, rtx, rtx) = NULL;
6874 rtx perm_idx;
6875
6876 /* Set up some common gen routines and values. */
6877 if (BYTES_BIG_ENDIAN)
6878 {
6879 if (mode == V16QImode)
6880 {
6881 merge_v16qi = gen_altivec_vmrghb;
6882 merge_v8hi = gen_altivec_vmrglh;
6883 }
6884 else
6885 merge_v8hi = gen_altivec_vmrghh;
6886
6887 merge_v4si = gen_altivec_vmrglw;
6888 perm_idx = GEN_INT (3);
6889 }
6890 else
6891 {
6892 if (mode == V16QImode)
6893 {
6894 merge_v16qi = gen_altivec_vmrglb;
6895 merge_v8hi = gen_altivec_vmrghh;
6896 }
6897 else
6898 merge_v8hi = gen_altivec_vmrglh;
6899
6900 merge_v4si = gen_altivec_vmrghw;
6901 perm_idx = GEN_INT (0);
6902 }
6903
6904 /* Move to VSX register with direct move.
6905 eg: vr_qi[0] = { xxxxxxxA, xxxxxxxx };
6906 vr_qi[1] = { xxxxxxxB, xxxxxxxx };
6907 ...
6908 vr_qi[15] = { xxxxxxxP, xxxxxxxx }; */
6909 rtx vr_qi[16];
6910 for (i = 0; i < n_elts; i++)
6911 {
6912 vr_qi[i] = gen_reg_rtx (V16QImode);
6913 if (TARGET_POWERPC64)
6914 emit_insn (gen_p8_mtvsrd_v16qidi2 (vr_qi[i], op[i]));
6915 else
6916 emit_insn (gen_p8_mtvsrwz_v16qisi2 (vr_qi[i], op[i]));
6917 }
6918
6919 /* Merge/move to vector short.
6920 eg: vr_hi[0] = { xxxxxxxx, xxxxxxAB };
6921 vr_hi[1] = { xxxxxxxx, xxxxxxCD };
6922 ...
6923 vr_hi[7] = { xxxxxxxx, xxxxxxOP }; */
6924 rtx vr_hi[8];
6925 for (i = 0; i < 8; i++)
6926 {
6927 rtx tmp = vr_qi[i];
6928 if (mode == V16QImode)
6929 {
6930 tmp = gen_reg_rtx (V16QImode);
6931 emit_insn (merge_v16qi (tmp, vr_qi[2 * i], vr_qi[2 * i + 1]));
6932 }
6933 vr_hi[i] = gen_reg_rtx (V8HImode);
6934 emit_move_insn (vr_hi[i], gen_lowpart (V8HImode, tmp));
6935 }
6936
6937 /* Merge vector short to vector int.
6938 eg: vr_si[0] = { xxxxxxxx, xxxxABCD };
6939 vr_si[1] = { xxxxxxxx, xxxxEFGH };
6940 ...
6941 vr_si[3] = { xxxxxxxx, xxxxMNOP }; */
6942 rtx vr_si[4];
6943 for (i = 0; i < 4; i++)
6944 {
6945 rtx tmp = gen_reg_rtx (V8HImode);
6946 emit_insn (merge_v8hi (tmp, vr_hi[2 * i], vr_hi[2 * i + 1]));
6947 vr_si[i] = gen_reg_rtx (V4SImode);
6948 emit_move_insn (vr_si[i], gen_lowpart (V4SImode, tmp));
6949 }
6950
6951 /* Merge vector int to vector long.
6952 eg: vr_di[0] = { xxxxxxxx, ABCDEFGH };
6953 vr_di[1] = { xxxxxxxx, IJKLMNOP }; */
6954 rtx vr_di[2];
6955 for (i = 0; i < 2; i++)
6956 {
6957 rtx tmp = gen_reg_rtx (V4SImode);
6958 emit_insn (merge_v4si (tmp, vr_si[2 * i], vr_si[2 * i + 1]));
6959 vr_di[i] = gen_reg_rtx (V2DImode);
6960 emit_move_insn (vr_di[i], gen_lowpart (V2DImode, tmp));
6961 }
6962
6963 rtx res = gen_reg_rtx (V2DImode);
6964 emit_insn (gen_vsx_xxpermdi_v2di (res, vr_di[0], vr_di[1], perm_idx));
6965 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, res)));
6966 }
6967
6968 return;
6969 }
6970
6971 /* Construct the vector in memory one field at a time
6972 and load the whole vector. */
6973 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6974 for (i = 0; i < n_elts; i++)
6975 emit_move_insn (adjust_address_nv (mem, inner_mode,
6976 i * GET_MODE_SIZE (inner_mode)),
6977 XVECEXP (vals, 0, i));
6978 emit_move_insn (target, mem);
6979 }
6980
6981 /* Set field ELT_RTX of TARGET to VAL. */
6982
6983 void
6984 rs6000_expand_vector_set (rtx target, rtx val, rtx elt_rtx)
6985 {
6986 machine_mode mode = GET_MODE (target);
6987 machine_mode inner_mode = GET_MODE_INNER (mode);
6988 rtx reg = gen_reg_rtx (mode);
6989 rtx mask, mem, x;
6990 int width = GET_MODE_SIZE (inner_mode);
6991 int i;
6992
6993 val = force_reg (GET_MODE (val), val);
6994
6995 if (VECTOR_MEM_VSX_P (mode))
6996 {
6997 rtx insn = NULL_RTX;
6998
6999 if (mode == V2DFmode)
7000 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
7001
7002 else if (mode == V2DImode)
7003 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
7004
7005 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
7006 {
7007 if (mode == V4SImode)
7008 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
7009 else if (mode == V8HImode)
7010 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
7011 else if (mode == V16QImode)
7012 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
7013 else if (mode == V4SFmode)
7014 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
7015 }
7016
7017 if (insn)
7018 {
7019 emit_insn (insn);
7020 return;
7021 }
7022 }
7023
7024 gcc_assert (CONST_INT_P (elt_rtx));
7025
7026 /* Simplify setting single element vectors like V1TImode. */
7027 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode)
7028 && INTVAL (elt_rtx) == 0)
7029 {
7030 emit_move_insn (target, gen_lowpart (mode, val));
7031 return;
7032 }
7033
7034 /* Load single variable value. */
7035 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7036 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
7037 x = gen_rtx_UNSPEC (VOIDmode,
7038 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7039 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7040 gen_rtvec (2,
7041 gen_rtx_SET (reg, mem),
7042 x)));
7043
7044 /* Linear sequence. */
7045 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
7046 for (i = 0; i < 16; ++i)
7047 XVECEXP (mask, 0, i) = GEN_INT (i);
7048
7049 /* Set permute mask to insert element into target. */
7050 for (i = 0; i < width; ++i)
7051 XVECEXP (mask, 0, INTVAL (elt_rtx) * width + i) = GEN_INT (i + 0x10);
7052 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
7053
7054 if (BYTES_BIG_ENDIAN)
7055 x = gen_rtx_UNSPEC (mode,
7056 gen_rtvec (3, target, reg,
7057 force_reg (V16QImode, x)),
7058 UNSPEC_VPERM);
7059 else
7060 {
7061 if (TARGET_P9_VECTOR)
7062 x = gen_rtx_UNSPEC (mode,
7063 gen_rtvec (3, reg, target,
7064 force_reg (V16QImode, x)),
7065 UNSPEC_VPERMR);
7066 else
7067 {
7068 /* Invert selector. We prefer to generate VNAND on P8 so
7069 that future fusion opportunities can kick in, but must
7070 generate VNOR elsewhere. */
7071 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
7072 rtx iorx = (TARGET_P8_VECTOR
7073 ? gen_rtx_IOR (V16QImode, notx, notx)
7074 : gen_rtx_AND (V16QImode, notx, notx));
7075 rtx tmp = gen_reg_rtx (V16QImode);
7076 emit_insn (gen_rtx_SET (tmp, iorx));
7077
7078 /* Permute with operands reversed and adjusted selector. */
7079 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
7080 UNSPEC_VPERM);
7081 }
7082 }
7083
7084 emit_insn (gen_rtx_SET (target, x));
7085 }
7086
7087 /* Extract field ELT from VEC into TARGET. */
7088
7089 void
7090 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
7091 {
7092 machine_mode mode = GET_MODE (vec);
7093 machine_mode inner_mode = GET_MODE_INNER (mode);
7094 rtx mem;
7095
7096 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
7097 {
7098 switch (mode)
7099 {
7100 default:
7101 break;
7102 case E_V1TImode:
7103 emit_move_insn (target, gen_lowpart (TImode, vec));
7104 break;
7105 case E_V2DFmode:
7106 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
7107 return;
7108 case E_V2DImode:
7109 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
7110 return;
7111 case E_V4SFmode:
7112 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
7113 return;
7114 case E_V16QImode:
7115 if (TARGET_DIRECT_MOVE_64BIT)
7116 {
7117 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
7118 return;
7119 }
7120 else
7121 break;
7122 case E_V8HImode:
7123 if (TARGET_DIRECT_MOVE_64BIT)
7124 {
7125 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
7126 return;
7127 }
7128 else
7129 break;
7130 case E_V4SImode:
7131 if (TARGET_DIRECT_MOVE_64BIT)
7132 {
7133 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
7134 return;
7135 }
7136 break;
7137 }
7138 }
7139 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
7140 && TARGET_DIRECT_MOVE_64BIT)
7141 {
7142 if (GET_MODE (elt) != DImode)
7143 {
7144 rtx tmp = gen_reg_rtx (DImode);
7145 convert_move (tmp, elt, 0);
7146 elt = tmp;
7147 }
7148 else if (!REG_P (elt))
7149 elt = force_reg (DImode, elt);
7150
7151 switch (mode)
7152 {
7153 case E_V1TImode:
7154 emit_move_insn (target, gen_lowpart (TImode, vec));
7155 return;
7156
7157 case E_V2DFmode:
7158 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
7159 return;
7160
7161 case E_V2DImode:
7162 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
7163 return;
7164
7165 case E_V4SFmode:
7166 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
7167 return;
7168
7169 case E_V4SImode:
7170 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
7171 return;
7172
7173 case E_V8HImode:
7174 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
7175 return;
7176
7177 case E_V16QImode:
7178 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7179 return;
7180
7181 default:
7182 gcc_unreachable ();
7183 }
7184 }
7185
7186 /* Allocate mode-sized buffer. */
7187 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7188
7189 emit_move_insn (mem, vec);
7190 if (CONST_INT_P (elt))
7191 {
7192 int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode);
7193
7194 /* Add offset to field within buffer matching vector element. */
7195 mem = adjust_address_nv (mem, inner_mode,
7196 modulo_elt * GET_MODE_SIZE (inner_mode));
7197 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7198 }
7199 else
7200 {
7201 unsigned int ele_size = GET_MODE_SIZE (inner_mode);
7202 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
7203 rtx new_addr = gen_reg_rtx (Pmode);
7204
7205 elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
7206 if (ele_size > 1)
7207 elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
7208 new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
7209 new_addr = change_address (mem, inner_mode, new_addr);
7210 emit_move_insn (target, new_addr);
7211 }
7212 }
7213
7214 /* Return the offset within a memory object (MEM) of a vector type to a given
7215 element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If
7216 the element is constant, we return a constant integer.
7217
7218 Otherwise, we use a base register temporary to calculate the offset after
7219 masking it to fit within the bounds of the vector and scaling it. The
7220 masking is required by the 64-bit ELF version 2 ABI for the vec_extract
7221 built-in function. */
7222
7223 static rtx
7224 get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
7225 {
7226 if (CONST_INT_P (element))
7227 return GEN_INT (INTVAL (element) * scalar_size);
7228
7229 /* All insns should use the 'Q' constraint (address is a single register) if
7230 the element number is not a constant. */
7231 gcc_assert (satisfies_constraint_Q (mem));
7232
7233 /* Mask the element to make sure the element number is between 0 and the
7234 maximum number of elements - 1 so that we don't generate an address
7235 outside the vector. */
7236 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (GET_MODE (mem)) - 1);
7237 rtx and_op = gen_rtx_AND (Pmode, element, num_ele_m1);
7238 emit_insn (gen_rtx_SET (base_tmp, and_op));
7239
7240 /* Shift the element to get the byte offset from the element number. */
7241 int shift = exact_log2 (scalar_size);
7242 gcc_assert (shift >= 0);
7243
7244 if (shift > 0)
7245 {
7246 rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
7247 emit_insn (gen_rtx_SET (base_tmp, shift_op));
7248 }
7249
7250 return base_tmp;
7251 }
7252
7253 /* Helper function update PC-relative addresses when we are adjusting a memory
7254 address (ADDR) to a vector to point to a scalar field within the vector with
7255 a constant offset (ELEMENT_OFFSET). If the address is not valid, we can
7256 use the base register temporary (BASE_TMP) to form the address. */
7257
7258 static rtx
7259 adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
7260 {
7261 rtx new_addr = NULL;
7262
7263 gcc_assert (CONST_INT_P (element_offset));
7264
7265 if (GET_CODE (addr) == CONST)
7266 addr = XEXP (addr, 0);
7267
7268 if (GET_CODE (addr) == PLUS)
7269 {
7270 rtx op0 = XEXP (addr, 0);
7271 rtx op1 = XEXP (addr, 1);
7272
7273 if (CONST_INT_P (op1))
7274 {
7275 HOST_WIDE_INT offset
7276 = INTVAL (XEXP (addr, 1)) + INTVAL (element_offset);
7277
7278 if (offset == 0)
7279 new_addr = op0;
7280
7281 else
7282 {
7283 rtx plus = gen_rtx_PLUS (Pmode, op0, GEN_INT (offset));
7284 new_addr = gen_rtx_CONST (Pmode, plus);
7285 }
7286 }
7287
7288 else
7289 {
7290 emit_move_insn (base_tmp, addr);
7291 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7292 }
7293 }
7294
7295 else if (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
7296 {
7297 rtx plus = gen_rtx_PLUS (Pmode, addr, element_offset);
7298 new_addr = gen_rtx_CONST (Pmode, plus);
7299 }
7300
7301 else
7302 gcc_unreachable ();
7303
7304 return new_addr;
7305 }
7306
7307 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7308 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7309 temporary (BASE_TMP) to fixup the address. Return the new memory address
7310 that is valid for reads or writes to a given register (SCALAR_REG).
7311
7312 This function is expected to be called after reload is completed when we are
7313 splitting insns. The temporary BASE_TMP might be set multiple times with
7314 this code. */
7315
7316 rtx
7317 rs6000_adjust_vec_address (rtx scalar_reg,
7318 rtx mem,
7319 rtx element,
7320 rtx base_tmp,
7321 machine_mode scalar_mode)
7322 {
7323 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7324 rtx addr = XEXP (mem, 0);
7325 rtx new_addr;
7326
7327 gcc_assert (!reg_mentioned_p (base_tmp, addr));
7328 gcc_assert (!reg_mentioned_p (base_tmp, element));
7329
7330 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7331 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7332
7333 /* Calculate what we need to add to the address to get the element
7334 address. */
7335 rtx element_offset = get_vector_offset (mem, element, base_tmp, scalar_size);
7336
7337 /* Create the new address pointing to the element within the vector. If we
7338 are adding 0, we don't have to change the address. */
7339 if (element_offset == const0_rtx)
7340 new_addr = addr;
7341
7342 /* A simple indirect address can be converted into a reg + offset
7343 address. */
7344 else if (REG_P (addr) || SUBREG_P (addr))
7345 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7346
7347 /* For references to local static variables, fold a constant offset into the
7348 address. */
7349 else if (pcrel_local_address (addr, Pmode) && CONST_INT_P (element_offset))
7350 new_addr = adjust_vec_address_pcrel (addr, element_offset, base_tmp);
7351
7352 /* Optimize D-FORM addresses with constant offset with a constant element, to
7353 include the element offset in the address directly. */
7354 else if (GET_CODE (addr) == PLUS)
7355 {
7356 rtx op0 = XEXP (addr, 0);
7357 rtx op1 = XEXP (addr, 1);
7358
7359 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7360 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7361 {
7362 /* op0 should never be r0, because r0+offset is not valid. But it
7363 doesn't hurt to make sure it is not r0. */
7364 gcc_assert (reg_or_subregno (op0) != 0);
7365
7366 /* D-FORM address with constant element number. */
7367 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7368 rtx offset_rtx = GEN_INT (offset);
7369 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7370 }
7371 else
7372 {
7373 /* If we don't have a D-FORM address with a constant element number,
7374 add the two elements in the current address. Then add the offset.
7375
7376 Previously, we tried to add the offset to OP1 and change the
7377 address to an X-FORM format adding OP0 and BASE_TMP, but it became
7378 complicated because we had to verify that op1 was not GPR0 and we
7379 had a constant element offset (due to the way ADDI is defined).
7380 By doing the add of OP0 and OP1 first, and then adding in the
7381 offset, it has the benefit that if D-FORM instructions are
7382 allowed, the offset is part of the memory access to the vector
7383 element. */
7384 emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
7385 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7386 }
7387 }
7388
7389 else
7390 {
7391 emit_move_insn (base_tmp, addr);
7392 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7393 }
7394
7395 /* If the address isn't valid, move the address into the temporary base
7396 register. Some reasons it could not be valid include:
7397
7398 The address offset overflowed the 16 or 34 bit offset size;
7399 We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
7400 We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
7401 Only X_FORM loads can be done, and the address is D_FORM. */
7402
7403 enum insn_form iform
7404 = address_to_insn_form (new_addr, scalar_mode,
7405 reg_to_non_prefixed (scalar_reg, scalar_mode));
7406
7407 if (iform == INSN_FORM_BAD)
7408 {
7409 emit_move_insn (base_tmp, new_addr);
7410 new_addr = base_tmp;
7411 }
7412
7413 return change_address (mem, scalar_mode, new_addr);
7414 }
7415
7416 /* Split a variable vec_extract operation into the component instructions. */
7417
7418 void
7419 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7420 rtx tmp_altivec)
7421 {
7422 machine_mode mode = GET_MODE (src);
7423 machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src));
7424 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7425 int byte_shift = exact_log2 (scalar_size);
7426
7427 gcc_assert (byte_shift >= 0);
7428
7429 /* If we are given a memory address, optimize to load just the element. We
7430 don't have to adjust the vector element number on little endian
7431 systems. */
7432 if (MEM_P (src))
7433 {
7434 emit_move_insn (dest,
7435 rs6000_adjust_vec_address (dest, src, element, tmp_gpr,
7436 scalar_mode));
7437 return;
7438 }
7439
7440 else if (REG_P (src) || SUBREG_P (src))
7441 {
7442 int num_elements = GET_MODE_NUNITS (mode);
7443 int bits_in_element = mode_to_bits (GET_MODE_INNER (mode));
7444 int bit_shift = 7 - exact_log2 (num_elements);
7445 rtx element2;
7446 unsigned int dest_regno = reg_or_subregno (dest);
7447 unsigned int src_regno = reg_or_subregno (src);
7448 unsigned int element_regno = reg_or_subregno (element);
7449
7450 gcc_assert (REG_P (tmp_gpr));
7451
7452 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7453 a general purpose register. */
7454 if (TARGET_P9_VECTOR
7455 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
7456 && INT_REGNO_P (dest_regno)
7457 && ALTIVEC_REGNO_P (src_regno)
7458 && INT_REGNO_P (element_regno))
7459 {
7460 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7461 rtx element_si = gen_rtx_REG (SImode, element_regno);
7462
7463 if (mode == V16QImode)
7464 emit_insn (BYTES_BIG_ENDIAN
7465 ? gen_vextublx (dest_si, element_si, src)
7466 : gen_vextubrx (dest_si, element_si, src));
7467
7468 else if (mode == V8HImode)
7469 {
7470 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7471 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7472 emit_insn (BYTES_BIG_ENDIAN
7473 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7474 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7475 }
7476
7477
7478 else
7479 {
7480 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7481 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7482 emit_insn (BYTES_BIG_ENDIAN
7483 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7484 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7485 }
7486
7487 return;
7488 }
7489
7490
7491 gcc_assert (REG_P (tmp_altivec));
7492
7493 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7494 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7495 will shift the element into the upper position (adding 3 to convert a
7496 byte shift into a bit shift). */
7497 if (scalar_size == 8)
7498 {
7499 if (!BYTES_BIG_ENDIAN)
7500 {
7501 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7502 element2 = tmp_gpr;
7503 }
7504 else
7505 element2 = element;
7506
7507 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7508 bit. */
7509 emit_insn (gen_rtx_SET (tmp_gpr,
7510 gen_rtx_AND (DImode,
7511 gen_rtx_ASHIFT (DImode,
7512 element2,
7513 GEN_INT (6)),
7514 GEN_INT (64))));
7515 }
7516 else
7517 {
7518 if (!BYTES_BIG_ENDIAN)
7519 {
7520 rtx num_ele_m1 = GEN_INT (num_elements - 1);
7521
7522 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7523 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7524 element2 = tmp_gpr;
7525 }
7526 else
7527 element2 = element;
7528
7529 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7530 }
7531
7532 /* Get the value into the lower byte of the Altivec register where VSLO
7533 expects it. */
7534 if (TARGET_P9_VECTOR)
7535 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7536 else if (can_create_pseudo_p ())
7537 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7538 else
7539 {
7540 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7541 emit_move_insn (tmp_di, tmp_gpr);
7542 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7543 }
7544
7545 /* Do the VSLO to get the value into the final location. */
7546 switch (mode)
7547 {
7548 case E_V2DFmode:
7549 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7550 return;
7551
7552 case E_V2DImode:
7553 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7554 return;
7555
7556 case E_V4SFmode:
7557 {
7558 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7559 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7560 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7561 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7562 tmp_altivec));
7563
7564 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7565 return;
7566 }
7567
7568 case E_V4SImode:
7569 case E_V8HImode:
7570 case E_V16QImode:
7571 {
7572 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7573 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7574 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7575 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7576 tmp_altivec));
7577 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7578 emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di,
7579 GEN_INT (64 - bits_in_element)));
7580 return;
7581 }
7582
7583 default:
7584 gcc_unreachable ();
7585 }
7586
7587 return;
7588 }
7589 else
7590 gcc_unreachable ();
7591 }
7592
7593 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7594 selects whether the alignment is abi mandated, optional, or
7595 both abi and optional alignment. */
7596
7597 unsigned int
7598 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7599 {
7600 if (how != align_opt)
7601 {
7602 if (TREE_CODE (type) == VECTOR_TYPE && align < 128)
7603 align = 128;
7604 }
7605
7606 if (how != align_abi)
7607 {
7608 if (TREE_CODE (type) == ARRAY_TYPE
7609 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7610 {
7611 if (align < BITS_PER_WORD)
7612 align = BITS_PER_WORD;
7613 }
7614 }
7615
7616 return align;
7617 }
7618
7619 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
7620 instructions simply ignore the low bits; VSX memory instructions
7621 are aligned to 4 or 8 bytes. */
7622
7623 static bool
7624 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
7625 {
7626 return (STRICT_ALIGNMENT
7627 || (!TARGET_EFFICIENT_UNALIGNED_VSX
7628 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
7629 || ((VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode))
7630 && (int) align < VECTOR_ALIGN (mode)))));
7631 }
7632
7633 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7634
7635 bool
7636 rs6000_special_adjust_field_align_p (tree type, unsigned int computed)
7637 {
7638 if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE)
7639 {
7640 if (computed != 128)
7641 {
7642 static bool warned;
7643 if (!warned && warn_psabi)
7644 {
7645 warned = true;
7646 inform (input_location,
7647 "the layout of aggregates containing vectors with"
7648 " %d-byte alignment has changed in GCC 5",
7649 computed / BITS_PER_UNIT);
7650 }
7651 }
7652 /* In current GCC there is no special case. */
7653 return false;
7654 }
7655
7656 return false;
7657 }
7658
7659 /* AIX increases natural record alignment to doubleword if the first
7660 field is an FP double while the FP fields remain word aligned. */
7661
7662 unsigned int
7663 rs6000_special_round_type_align (tree type, unsigned int computed,
7664 unsigned int specified)
7665 {
7666 unsigned int align = MAX (computed, specified);
7667 tree field = TYPE_FIELDS (type);
7668
7669 /* Skip all non field decls */
7670 while (field != NULL
7671 && (TREE_CODE (field) != FIELD_DECL
7672 || DECL_FIELD_ABI_IGNORED (field)))
7673 field = DECL_CHAIN (field);
7674
7675 if (field != NULL && field != type)
7676 {
7677 type = TREE_TYPE (field);
7678 while (TREE_CODE (type) == ARRAY_TYPE)
7679 type = TREE_TYPE (type);
7680
7681 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
7682 align = MAX (align, 64);
7683 }
7684
7685 return align;
7686 }
7687
7688 /* Darwin increases record alignment to the natural alignment of
7689 the first field. */
7690
7691 unsigned int
7692 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
7693 unsigned int specified)
7694 {
7695 unsigned int align = MAX (computed, specified);
7696
7697 if (TYPE_PACKED (type))
7698 return align;
7699
7700 /* Find the first field, looking down into aggregates. */
7701 do {
7702 tree field = TYPE_FIELDS (type);
7703 /* Skip all non field decls */
7704 while (field != NULL
7705 && (TREE_CODE (field) != FIELD_DECL
7706 || DECL_FIELD_ABI_IGNORED (field)))
7707 field = DECL_CHAIN (field);
7708 if (! field)
7709 break;
7710 /* A packed field does not contribute any extra alignment. */
7711 if (DECL_PACKED (field))
7712 return align;
7713 type = TREE_TYPE (field);
7714 while (TREE_CODE (type) == ARRAY_TYPE)
7715 type = TREE_TYPE (type);
7716 } while (AGGREGATE_TYPE_P (type));
7717
7718 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
7719 align = MAX (align, TYPE_ALIGN (type));
7720
7721 return align;
7722 }
7723
7724 /* Return 1 for an operand in small memory on V.4/eabi. */
7725
7726 int
7727 small_data_operand (rtx op ATTRIBUTE_UNUSED,
7728 machine_mode mode ATTRIBUTE_UNUSED)
7729 {
7730 #if TARGET_ELF
7731 rtx sym_ref;
7732
7733 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
7734 return 0;
7735
7736 if (DEFAULT_ABI != ABI_V4)
7737 return 0;
7738
7739 if (SYMBOL_REF_P (op))
7740 sym_ref = op;
7741
7742 else if (GET_CODE (op) != CONST
7743 || GET_CODE (XEXP (op, 0)) != PLUS
7744 || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0))
7745 || !CONST_INT_P (XEXP (XEXP (op, 0), 1)))
7746 return 0;
7747
7748 else
7749 {
7750 rtx sum = XEXP (op, 0);
7751 HOST_WIDE_INT summand;
7752
7753 /* We have to be careful here, because it is the referenced address
7754 that must be 32k from _SDA_BASE_, not just the symbol. */
7755 summand = INTVAL (XEXP (sum, 1));
7756 if (summand < 0 || summand > g_switch_value)
7757 return 0;
7758
7759 sym_ref = XEXP (sum, 0);
7760 }
7761
7762 return SYMBOL_REF_SMALL_P (sym_ref);
7763 #else
7764 return 0;
7765 #endif
7766 }
7767
7768 /* Return true if either operand is a general purpose register. */
7769
7770 bool
7771 gpr_or_gpr_p (rtx op0, rtx op1)
7772 {
7773 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
7774 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
7775 }
7776
7777 /* Return true if this is a move direct operation between GPR registers and
7778 floating point/VSX registers. */
7779
7780 bool
7781 direct_move_p (rtx op0, rtx op1)
7782 {
7783 if (!REG_P (op0) || !REG_P (op1))
7784 return false;
7785
7786 if (!TARGET_DIRECT_MOVE)
7787 return false;
7788
7789 int regno0 = REGNO (op0);
7790 int regno1 = REGNO (op1);
7791 if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1))
7792 return false;
7793
7794 if (INT_REGNO_P (regno0) && VSX_REGNO_P (regno1))
7795 return true;
7796
7797 if (VSX_REGNO_P (regno0) && INT_REGNO_P (regno1))
7798 return true;
7799
7800 return false;
7801 }
7802
7803 /* Return true if the ADDR is an acceptable address for a quad memory
7804 operation of mode MODE (either LQ/STQ for general purpose registers, or
7805 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
7806 is intended for LQ/STQ. If it is false, the address is intended for the ISA
7807 3.0 LXV/STXV instruction. */
7808
7809 bool
7810 quad_address_p (rtx addr, machine_mode mode, bool strict)
7811 {
7812 rtx op0, op1;
7813
7814 if (GET_MODE_SIZE (mode) < 16)
7815 return false;
7816
7817 if (legitimate_indirect_address_p (addr, strict))
7818 return true;
7819
7820 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
7821 return false;
7822
7823 /* Is this a valid prefixed address? If the bottom four bits of the offset
7824 are non-zero, we could use a prefixed instruction (which does not have the
7825 DQ-form constraint that the traditional instruction had) instead of
7826 forcing the unaligned offset to a GPR. */
7827 if (address_is_prefixed (addr, mode, NON_PREFIXED_DQ))
7828 return true;
7829
7830 if (GET_CODE (addr) != PLUS)
7831 return false;
7832
7833 op0 = XEXP (addr, 0);
7834 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
7835 return false;
7836
7837 op1 = XEXP (addr, 1);
7838 if (!CONST_INT_P (op1))
7839 return false;
7840
7841 return quad_address_offset_p (INTVAL (op1));
7842 }
7843
7844 /* Return true if this is a load or store quad operation. This function does
7845 not handle the atomic quad memory instructions. */
7846
7847 bool
7848 quad_load_store_p (rtx op0, rtx op1)
7849 {
7850 bool ret;
7851
7852 if (!TARGET_QUAD_MEMORY)
7853 ret = false;
7854
7855 else if (REG_P (op0) && MEM_P (op1))
7856 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
7857 && quad_memory_operand (op1, GET_MODE (op1))
7858 && !reg_overlap_mentioned_p (op0, op1));
7859
7860 else if (MEM_P (op0) && REG_P (op1))
7861 ret = (quad_memory_operand (op0, GET_MODE (op0))
7862 && quad_int_reg_operand (op1, GET_MODE (op1)));
7863
7864 else
7865 ret = false;
7866
7867 if (TARGET_DEBUG_ADDR)
7868 {
7869 fprintf (stderr, "\n========== quad_load_store, return %s\n",
7870 ret ? "true" : "false");
7871 debug_rtx (gen_rtx_SET (op0, op1));
7872 }
7873
7874 return ret;
7875 }
7876
7877 /* Given an address, return a constant offset term if one exists. */
7878
7879 static rtx
7880 address_offset (rtx op)
7881 {
7882 if (GET_CODE (op) == PRE_INC
7883 || GET_CODE (op) == PRE_DEC)
7884 op = XEXP (op, 0);
7885 else if (GET_CODE (op) == PRE_MODIFY
7886 || GET_CODE (op) == LO_SUM)
7887 op = XEXP (op, 1);
7888
7889 if (GET_CODE (op) == CONST)
7890 op = XEXP (op, 0);
7891
7892 if (GET_CODE (op) == PLUS)
7893 op = XEXP (op, 1);
7894
7895 if (CONST_INT_P (op))
7896 return op;
7897
7898 return NULL_RTX;
7899 }
7900
7901 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
7902 the mode. If we can't find (or don't know) the alignment of the symbol
7903 we assume (optimistically) that it's sufficiently aligned [??? maybe we
7904 should be pessimistic]. Offsets are validated in the same way as for
7905 reg + offset. */
7906 static bool
7907 darwin_rs6000_legitimate_lo_sum_const_p (rtx x, machine_mode mode)
7908 {
7909 /* We should not get here with this. */
7910 gcc_checking_assert (! mode_supports_dq_form (mode));
7911
7912 if (GET_CODE (x) == CONST)
7913 x = XEXP (x, 0);
7914
7915 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
7916 x = XVECEXP (x, 0, 0);
7917
7918 rtx sym = NULL_RTX;
7919 unsigned HOST_WIDE_INT offset = 0;
7920
7921 if (GET_CODE (x) == PLUS)
7922 {
7923 sym = XEXP (x, 0);
7924 if (! SYMBOL_REF_P (sym))
7925 return false;
7926 if (!CONST_INT_P (XEXP (x, 1)))
7927 return false;
7928 offset = INTVAL (XEXP (x, 1));
7929 }
7930 else if (SYMBOL_REF_P (x))
7931 sym = x;
7932 else if (CONST_INT_P (x))
7933 offset = INTVAL (x);
7934 else if (GET_CODE (x) == LABEL_REF)
7935 offset = 0; // We assume code labels are Pmode aligned
7936 else
7937 return false; // not sure what we have here.
7938
7939 /* If we don't know the alignment of the thing to which the symbol refers,
7940 we assume optimistically it is "enough".
7941 ??? maybe we should be pessimistic instead. */
7942 unsigned align = 0;
7943
7944 if (sym)
7945 {
7946 tree decl = SYMBOL_REF_DECL (sym);
7947 #if TARGET_MACHO
7948 if (MACHO_SYMBOL_INDIRECTION_P (sym))
7949 /* The decl in an indirection symbol is the original one, which might
7950 be less aligned than the indirection. Our indirections are always
7951 pointer-aligned. */
7952 ;
7953 else
7954 #endif
7955 if (decl && DECL_ALIGN (decl))
7956 align = DECL_ALIGN_UNIT (decl);
7957 }
7958
7959 unsigned int extra = 0;
7960 switch (mode)
7961 {
7962 case E_DFmode:
7963 case E_DDmode:
7964 case E_DImode:
7965 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7966 addressing. */
7967 if (VECTOR_MEM_VSX_P (mode))
7968 return false;
7969
7970 if (!TARGET_POWERPC64)
7971 extra = 4;
7972 else if ((offset & 3) || (align & 3))
7973 return false;
7974 break;
7975
7976 case E_TFmode:
7977 case E_IFmode:
7978 case E_KFmode:
7979 case E_TDmode:
7980 case E_TImode:
7981 case E_PTImode:
7982 extra = 8;
7983 if (!TARGET_POWERPC64)
7984 extra = 12;
7985 else if ((offset & 3) || (align & 3))
7986 return false;
7987 break;
7988
7989 default:
7990 break;
7991 }
7992
7993 /* We only care if the access(es) would cause a change to the high part. */
7994 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7995 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7996 }
7997
7998 /* Return true if the MEM operand is a memory operand suitable for use
7999 with a (full width, possibly multiple) gpr load/store. On
8000 powerpc64 this means the offset must be divisible by 4.
8001 Implements 'Y' constraint.
8002
8003 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8004 a constraint function we know the operand has satisfied a suitable
8005 memory predicate.
8006
8007 Offsetting a lo_sum should not be allowed, except where we know by
8008 alignment that a 32k boundary is not crossed. Note that by
8009 "offsetting" here we mean a further offset to access parts of the
8010 MEM. It's fine to have a lo_sum where the inner address is offset
8011 from a sym, since the same sym+offset will appear in the high part
8012 of the address calculation. */
8013
8014 bool
8015 mem_operand_gpr (rtx op, machine_mode mode)
8016 {
8017 unsigned HOST_WIDE_INT offset;
8018 int extra;
8019 rtx addr = XEXP (op, 0);
8020
8021 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
8022 if (TARGET_UPDATE
8023 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
8024 && mode_supports_pre_incdec_p (mode)
8025 && legitimate_indirect_address_p (XEXP (addr, 0), false))
8026 return true;
8027
8028 /* Allow prefixed instructions if supported. If the bottom two bits of the
8029 offset are non-zero, we could use a prefixed instruction (which does not
8030 have the DS-form constraint that the traditional instruction had) instead
8031 of forcing the unaligned offset to a GPR. */
8032 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
8033 return true;
8034
8035 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
8036 really OK. Doing this early avoids teaching all the other machinery
8037 about them. */
8038 if (TARGET_MACHO && GET_CODE (addr) == LO_SUM)
8039 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr, 1), mode);
8040
8041 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
8042 if (!rs6000_offsettable_memref_p (op, mode, false))
8043 return false;
8044
8045 op = address_offset (addr);
8046 if (op == NULL_RTX)
8047 return true;
8048
8049 offset = INTVAL (op);
8050 if (TARGET_POWERPC64 && (offset & 3) != 0)
8051 return false;
8052
8053 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8054 if (extra < 0)
8055 extra = 0;
8056
8057 if (GET_CODE (addr) == LO_SUM)
8058 /* For lo_sum addresses, we must allow any offset except one that
8059 causes a wrap, so test only the low 16 bits. */
8060 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8061
8062 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8063 }
8064
8065 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8066 enforce an offset divisible by 4 even for 32-bit. */
8067
8068 bool
8069 mem_operand_ds_form (rtx op, machine_mode mode)
8070 {
8071 unsigned HOST_WIDE_INT offset;
8072 int extra;
8073 rtx addr = XEXP (op, 0);
8074
8075 /* Allow prefixed instructions if supported. If the bottom two bits of the
8076 offset are non-zero, we could use a prefixed instruction (which does not
8077 have the DS-form constraint that the traditional instruction had) instead
8078 of forcing the unaligned offset to a GPR. */
8079 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
8080 return true;
8081
8082 if (!offsettable_address_p (false, mode, addr))
8083 return false;
8084
8085 op = address_offset (addr);
8086 if (op == NULL_RTX)
8087 return true;
8088
8089 offset = INTVAL (op);
8090 if ((offset & 3) != 0)
8091 return false;
8092
8093 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8094 if (extra < 0)
8095 extra = 0;
8096
8097 if (GET_CODE (addr) == LO_SUM)
8098 /* For lo_sum addresses, we must allow any offset except one that
8099 causes a wrap, so test only the low 16 bits. */
8100 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8101
8102 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8103 }
8104 \f
8105 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8106
8107 static bool
8108 reg_offset_addressing_ok_p (machine_mode mode)
8109 {
8110 switch (mode)
8111 {
8112 case E_V16QImode:
8113 case E_V8HImode:
8114 case E_V4SFmode:
8115 case E_V4SImode:
8116 case E_V2DFmode:
8117 case E_V2DImode:
8118 case E_V1TImode:
8119 case E_TImode:
8120 case E_TFmode:
8121 case E_KFmode:
8122 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8123 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8124 a vector mode, if we want to use the VSX registers to move it around,
8125 we need to restrict ourselves to reg+reg addressing. Similarly for
8126 IEEE 128-bit floating point that is passed in a single vector
8127 register. */
8128 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
8129 return mode_supports_dq_form (mode);
8130 break;
8131
8132 /* The vector pair/quad types support offset addressing if the
8133 underlying vectors support offset addressing. */
8134 case E_OOmode:
8135 case E_XOmode:
8136 return TARGET_MMA;
8137
8138 case E_SDmode:
8139 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8140 addressing for the LFIWZX and STFIWX instructions. */
8141 if (TARGET_NO_SDMODE_STACK)
8142 return false;
8143 break;
8144
8145 default:
8146 break;
8147 }
8148
8149 return true;
8150 }
8151
8152 static bool
8153 virtual_stack_registers_memory_p (rtx op)
8154 {
8155 int regnum;
8156
8157 if (REG_P (op))
8158 regnum = REGNO (op);
8159
8160 else if (GET_CODE (op) == PLUS
8161 && REG_P (XEXP (op, 0))
8162 && CONST_INT_P (XEXP (op, 1)))
8163 regnum = REGNO (XEXP (op, 0));
8164
8165 else
8166 return false;
8167
8168 return (regnum >= FIRST_VIRTUAL_REGISTER
8169 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
8170 }
8171
8172 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8173 is known to not straddle a 32k boundary. This function is used
8174 to determine whether -mcmodel=medium code can use TOC pointer
8175 relative addressing for OP. This means the alignment of the TOC
8176 pointer must also be taken into account, and unfortunately that is
8177 only 8 bytes. */
8178
8179 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8180 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8181 #endif
8182
8183 static bool
8184 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
8185 machine_mode mode)
8186 {
8187 tree decl;
8188 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
8189
8190 if (!SYMBOL_REF_P (op))
8191 return false;
8192
8193 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8194 SYMBOL_REF. */
8195 if (mode_supports_dq_form (mode))
8196 return false;
8197
8198 dsize = GET_MODE_SIZE (mode);
8199 decl = SYMBOL_REF_DECL (op);
8200 if (!decl)
8201 {
8202 if (dsize == 0)
8203 return false;
8204
8205 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8206 replacing memory addresses with an anchor plus offset. We
8207 could find the decl by rummaging around in the block->objects
8208 VEC for the given offset but that seems like too much work. */
8209 dalign = BITS_PER_UNIT;
8210 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
8211 && SYMBOL_REF_ANCHOR_P (op)
8212 && SYMBOL_REF_BLOCK (op) != NULL)
8213 {
8214 struct object_block *block = SYMBOL_REF_BLOCK (op);
8215
8216 dalign = block->alignment;
8217 offset += SYMBOL_REF_BLOCK_OFFSET (op);
8218 }
8219 else if (CONSTANT_POOL_ADDRESS_P (op))
8220 {
8221 /* It would be nice to have get_pool_align().. */
8222 machine_mode cmode = get_pool_mode (op);
8223
8224 dalign = GET_MODE_ALIGNMENT (cmode);
8225 }
8226 }
8227 else if (DECL_P (decl))
8228 {
8229 dalign = DECL_ALIGN (decl);
8230
8231 if (dsize == 0)
8232 {
8233 /* Allow BLKmode when the entire object is known to not
8234 cross a 32k boundary. */
8235 if (!DECL_SIZE_UNIT (decl))
8236 return false;
8237
8238 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
8239 return false;
8240
8241 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
8242 if (dsize > 32768)
8243 return false;
8244
8245 dalign /= BITS_PER_UNIT;
8246 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8247 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8248 return dalign >= dsize;
8249 }
8250 }
8251 else
8252 gcc_unreachable ();
8253
8254 /* Find how many bits of the alignment we know for this access. */
8255 dalign /= BITS_PER_UNIT;
8256 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8257 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8258 mask = dalign - 1;
8259 lsb = offset & -offset;
8260 mask &= lsb - 1;
8261 dalign = mask + 1;
8262
8263 return dalign >= dsize;
8264 }
8265
8266 static bool
8267 constant_pool_expr_p (rtx op)
8268 {
8269 rtx base, offset;
8270
8271 split_const (op, &base, &offset);
8272 return (SYMBOL_REF_P (base)
8273 && CONSTANT_POOL_ADDRESS_P (base)
8274 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
8275 }
8276
8277 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
8278 use that as the register to put the HIGH value into if register allocation
8279 is already done. */
8280
8281 rtx
8282 create_TOC_reference (rtx symbol, rtx largetoc_reg)
8283 {
8284 rtx tocrel, tocreg, hi;
8285
8286 gcc_assert (TARGET_TOC);
8287
8288 if (TARGET_DEBUG_ADDR)
8289 {
8290 if (SYMBOL_REF_P (symbol))
8291 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
8292 XSTR (symbol, 0));
8293 else
8294 {
8295 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
8296 GET_RTX_NAME (GET_CODE (symbol)));
8297 debug_rtx (symbol);
8298 }
8299 }
8300
8301 if (!can_create_pseudo_p ())
8302 df_set_regs_ever_live (TOC_REGISTER, true);
8303
8304 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
8305 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
8306 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
8307 return tocrel;
8308
8309 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
8310 if (largetoc_reg != NULL)
8311 {
8312 emit_move_insn (largetoc_reg, hi);
8313 hi = largetoc_reg;
8314 }
8315 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
8316 }
8317
8318 /* These are only used to pass through from print_operand/print_operand_address
8319 to rs6000_output_addr_const_extra over the intervening function
8320 output_addr_const which is not target code. */
8321 static const_rtx tocrel_base_oac, tocrel_offset_oac;
8322
8323 /* Return true if OP is a toc pointer relative address (the output
8324 of create_TOC_reference). If STRICT, do not match non-split
8325 -mcmodel=large/medium toc pointer relative addresses. If the pointers
8326 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
8327 TOCREL_OFFSET_RET respectively. */
8328
8329 bool
8330 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
8331 const_rtx *tocrel_offset_ret)
8332 {
8333 if (!TARGET_TOC)
8334 return false;
8335
8336 if (TARGET_CMODEL != CMODEL_SMALL)
8337 {
8338 /* When strict ensure we have everything tidy. */
8339 if (strict
8340 && !(GET_CODE (op) == LO_SUM
8341 && REG_P (XEXP (op, 0))
8342 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8343 return false;
8344
8345 /* When not strict, allow non-split TOC addresses and also allow
8346 (lo_sum (high ..)) TOC addresses created during reload. */
8347 if (GET_CODE (op) == LO_SUM)
8348 op = XEXP (op, 1);
8349 }
8350
8351 const_rtx tocrel_base = op;
8352 const_rtx tocrel_offset = const0_rtx;
8353
8354 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8355 {
8356 tocrel_base = XEXP (op, 0);
8357 tocrel_offset = XEXP (op, 1);
8358 }
8359
8360 if (tocrel_base_ret)
8361 *tocrel_base_ret = tocrel_base;
8362 if (tocrel_offset_ret)
8363 *tocrel_offset_ret = tocrel_offset;
8364
8365 return (GET_CODE (tocrel_base) == UNSPEC
8366 && XINT (tocrel_base, 1) == UNSPEC_TOCREL
8367 && REG_P (XVECEXP (tocrel_base, 0, 1))
8368 && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER);
8369 }
8370
8371 /* Return true if X is a constant pool address, and also for cmodel=medium
8372 if X is a toc-relative address known to be offsettable within MODE. */
8373
8374 bool
8375 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
8376 bool strict)
8377 {
8378 const_rtx tocrel_base, tocrel_offset;
8379 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
8380 && (TARGET_CMODEL != CMODEL_MEDIUM
8381 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
8382 || mode == QImode
8383 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
8384 INTVAL (tocrel_offset), mode)));
8385 }
8386
8387 static bool
8388 legitimate_small_data_p (machine_mode mode, rtx x)
8389 {
8390 return (DEFAULT_ABI == ABI_V4
8391 && !flag_pic && !TARGET_TOC
8392 && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST)
8393 && small_data_operand (x, mode));
8394 }
8395
8396 bool
8397 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
8398 bool strict, bool worst_case)
8399 {
8400 unsigned HOST_WIDE_INT offset;
8401 unsigned int extra;
8402
8403 if (GET_CODE (x) != PLUS)
8404 return false;
8405 if (!REG_P (XEXP (x, 0)))
8406 return false;
8407 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8408 return false;
8409 if (mode_supports_dq_form (mode))
8410 return quad_address_p (x, mode, strict);
8411 if (!reg_offset_addressing_ok_p (mode))
8412 return virtual_stack_registers_memory_p (x);
8413 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
8414 return true;
8415 if (!CONST_INT_P (XEXP (x, 1)))
8416 return false;
8417
8418 offset = INTVAL (XEXP (x, 1));
8419 extra = 0;
8420 switch (mode)
8421 {
8422 case E_DFmode:
8423 case E_DDmode:
8424 case E_DImode:
8425 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8426 addressing. */
8427 if (VECTOR_MEM_VSX_P (mode))
8428 return false;
8429
8430 if (!worst_case)
8431 break;
8432 if (!TARGET_POWERPC64)
8433 extra = 4;
8434 else if (offset & 3)
8435 return false;
8436 break;
8437
8438 case E_TFmode:
8439 case E_IFmode:
8440 case E_KFmode:
8441 case E_TDmode:
8442 case E_TImode:
8443 case E_PTImode:
8444 extra = 8;
8445 if (!worst_case)
8446 break;
8447 if (!TARGET_POWERPC64)
8448 extra = 12;
8449 else if (offset & 3)
8450 return false;
8451 break;
8452
8453 default:
8454 break;
8455 }
8456
8457 if (TARGET_PREFIXED)
8458 return SIGNED_34BIT_OFFSET_EXTRA_P (offset, extra);
8459 else
8460 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8461 }
8462
8463 bool
8464 legitimate_indexed_address_p (rtx x, int strict)
8465 {
8466 rtx op0, op1;
8467
8468 if (GET_CODE (x) != PLUS)
8469 return false;
8470
8471 op0 = XEXP (x, 0);
8472 op1 = XEXP (x, 1);
8473
8474 return (REG_P (op0) && REG_P (op1)
8475 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
8476 && INT_REG_OK_FOR_INDEX_P (op1, strict))
8477 || (INT_REG_OK_FOR_BASE_P (op1, strict)
8478 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
8479 }
8480
8481 bool
8482 avoiding_indexed_address_p (machine_mode mode)
8483 {
8484 unsigned int msize = GET_MODE_SIZE (mode);
8485
8486 /* Avoid indexed addressing for modes that have non-indexed load/store
8487 instruction forms. On power10, vector pairs have an indexed
8488 form, but vector quads don't. */
8489 if (msize > 16)
8490 return msize != 32;
8491
8492 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
8493 }
8494
8495 bool
8496 legitimate_indirect_address_p (rtx x, int strict)
8497 {
8498 return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict);
8499 }
8500
8501 bool
8502 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
8503 {
8504 if (!TARGET_MACHO || !flag_pic
8505 || mode != SImode || !MEM_P (x))
8506 return false;
8507 x = XEXP (x, 0);
8508
8509 if (GET_CODE (x) != LO_SUM)
8510 return false;
8511 if (!REG_P (XEXP (x, 0)))
8512 return false;
8513 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
8514 return false;
8515 x = XEXP (x, 1);
8516
8517 return CONSTANT_P (x);
8518 }
8519
8520 static bool
8521 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
8522 {
8523 if (GET_CODE (x) != LO_SUM)
8524 return false;
8525 if (!REG_P (XEXP (x, 0)))
8526 return false;
8527 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8528 return false;
8529 /* quad word addresses are restricted, and we can't use LO_SUM. */
8530 if (mode_supports_dq_form (mode))
8531 return false;
8532 x = XEXP (x, 1);
8533
8534 if (TARGET_ELF || TARGET_MACHO)
8535 {
8536 bool large_toc_ok;
8537
8538 if (DEFAULT_ABI == ABI_V4 && flag_pic)
8539 return false;
8540 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8541 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8542 recognizes some LO_SUM addresses as valid although this
8543 function says opposite. In most cases, LRA through different
8544 transformations can generate correct code for address reloads.
8545 It cannot manage only some LO_SUM cases. So we need to add
8546 code here saying that some addresses are still valid. */
8547 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8548 && small_toc_ref (x, VOIDmode));
8549 if (TARGET_TOC && ! large_toc_ok)
8550 return false;
8551 if (GET_MODE_NUNITS (mode) != 1)
8552 return false;
8553 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8554 && !(/* ??? Assume floating point reg based on mode? */
8555 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8556 return false;
8557
8558 return CONSTANT_P (x) || large_toc_ok;
8559 }
8560
8561 return false;
8562 }
8563
8564
8565 /* Try machine-dependent ways of modifying an illegitimate address
8566 to be legitimate. If we find one, return the new, valid address.
8567 This is used from only one place: `memory_address' in explow.c.
8568
8569 OLDX is the address as it was before break_out_memory_refs was
8570 called. In some cases it is useful to look at this to decide what
8571 needs to be done.
8572
8573 It is always safe for this function to do nothing. It exists to
8574 recognize opportunities to optimize the output.
8575
8576 On RS/6000, first check for the sum of a register with a constant
8577 integer that is out of range. If so, generate code to add the
8578 constant with the low-order 16 bits masked to the register and force
8579 this result into another register (this can be done with `cau').
8580 Then generate an address of REG+(CONST&0xffff), allowing for the
8581 possibility of bit 16 being a one.
8582
8583 Then check for the sum of a register and something not constant, try to
8584 load the other things into a register and return the sum. */
8585
8586 static rtx
8587 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8588 machine_mode mode)
8589 {
8590 unsigned int extra;
8591
8592 if (!reg_offset_addressing_ok_p (mode)
8593 || mode_supports_dq_form (mode))
8594 {
8595 if (virtual_stack_registers_memory_p (x))
8596 return x;
8597
8598 /* In theory we should not be seeing addresses of the form reg+0,
8599 but just in case it is generated, optimize it away. */
8600 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
8601 return force_reg (Pmode, XEXP (x, 0));
8602
8603 /* For TImode with load/store quad, restrict addresses to just a single
8604 pointer, so it works with both GPRs and VSX registers. */
8605 /* Make sure both operands are registers. */
8606 else if (GET_CODE (x) == PLUS
8607 && (mode != TImode || !TARGET_VSX))
8608 return gen_rtx_PLUS (Pmode,
8609 force_reg (Pmode, XEXP (x, 0)),
8610 force_reg (Pmode, XEXP (x, 1)));
8611 else
8612 return force_reg (Pmode, x);
8613 }
8614 if (SYMBOL_REF_P (x))
8615 {
8616 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8617 if (model != 0)
8618 return rs6000_legitimize_tls_address (x, model);
8619 }
8620
8621 extra = 0;
8622 switch (mode)
8623 {
8624 case E_TFmode:
8625 case E_TDmode:
8626 case E_TImode:
8627 case E_PTImode:
8628 case E_IFmode:
8629 case E_KFmode:
8630 /* As in legitimate_offset_address_p we do not assume
8631 worst-case. The mode here is just a hint as to the registers
8632 used. A TImode is usually in gprs, but may actually be in
8633 fprs. Leave worst-case scenario for reload to handle via
8634 insn constraints. PTImode is only GPRs. */
8635 extra = 8;
8636 break;
8637 default:
8638 break;
8639 }
8640
8641 if (GET_CODE (x) == PLUS
8642 && REG_P (XEXP (x, 0))
8643 && CONST_INT_P (XEXP (x, 1))
8644 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
8645 >= 0x10000 - extra))
8646 {
8647 HOST_WIDE_INT high_int, low_int;
8648 rtx sum;
8649 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8650 if (low_int >= 0x8000 - extra)
8651 low_int = 0;
8652 high_int = INTVAL (XEXP (x, 1)) - low_int;
8653 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
8654 gen_int_mode (high_int, Pmode)), 0);
8655 return plus_constant (Pmode, sum, low_int);
8656 }
8657 else if (GET_CODE (x) == PLUS
8658 && REG_P (XEXP (x, 0))
8659 && !CONST_INT_P (XEXP (x, 1))
8660 && GET_MODE_NUNITS (mode) == 1
8661 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8662 || (/* ??? Assume floating point reg based on mode? */
8663 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8664 && !avoiding_indexed_address_p (mode))
8665 {
8666 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
8667 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
8668 }
8669 else if ((TARGET_ELF
8670 #if TARGET_MACHO
8671 || !MACHO_DYNAMIC_NO_PIC_P
8672 #endif
8673 )
8674 && TARGET_32BIT
8675 && TARGET_NO_TOC_OR_PCREL
8676 && !flag_pic
8677 && !CONST_INT_P (x)
8678 && !CONST_WIDE_INT_P (x)
8679 && !CONST_DOUBLE_P (x)
8680 && CONSTANT_P (x)
8681 && GET_MODE_NUNITS (mode) == 1
8682 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8683 || (/* ??? Assume floating point reg based on mode? */
8684 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
8685 {
8686 rtx reg = gen_reg_rtx (Pmode);
8687 if (TARGET_ELF)
8688 emit_insn (gen_elf_high (reg, x));
8689 else
8690 emit_insn (gen_macho_high (Pmode, reg, x));
8691 return gen_rtx_LO_SUM (Pmode, reg, x);
8692 }
8693 else if (TARGET_TOC
8694 && SYMBOL_REF_P (x)
8695 && constant_pool_expr_p (x)
8696 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
8697 return create_TOC_reference (x, NULL_RTX);
8698 else
8699 return x;
8700 }
8701
8702 /* Debug version of rs6000_legitimize_address. */
8703 static rtx
8704 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
8705 {
8706 rtx ret;
8707 rtx_insn *insns;
8708
8709 start_sequence ();
8710 ret = rs6000_legitimize_address (x, oldx, mode);
8711 insns = get_insns ();
8712 end_sequence ();
8713
8714 if (ret != x)
8715 {
8716 fprintf (stderr,
8717 "\nrs6000_legitimize_address: mode %s, old code %s, "
8718 "new code %s, modified\n",
8719 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
8720 GET_RTX_NAME (GET_CODE (ret)));
8721
8722 fprintf (stderr, "Original address:\n");
8723 debug_rtx (x);
8724
8725 fprintf (stderr, "oldx:\n");
8726 debug_rtx (oldx);
8727
8728 fprintf (stderr, "New address:\n");
8729 debug_rtx (ret);
8730
8731 if (insns)
8732 {
8733 fprintf (stderr, "Insns added:\n");
8734 debug_rtx_list (insns, 20);
8735 }
8736 }
8737 else
8738 {
8739 fprintf (stderr,
8740 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8741 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
8742
8743 debug_rtx (x);
8744 }
8745
8746 if (insns)
8747 emit_insn (insns);
8748
8749 return ret;
8750 }
8751
8752 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8753 We need to emit DTP-relative relocations. */
8754
8755 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
8756 static void
8757 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
8758 {
8759 switch (size)
8760 {
8761 case 4:
8762 fputs ("\t.long\t", file);
8763 break;
8764 case 8:
8765 fputs (DOUBLE_INT_ASM_OP, file);
8766 break;
8767 default:
8768 gcc_unreachable ();
8769 }
8770 output_addr_const (file, x);
8771 if (TARGET_ELF)
8772 fputs ("@dtprel+0x8000", file);
8773 else if (TARGET_XCOFF && SYMBOL_REF_P (x))
8774 {
8775 switch (SYMBOL_REF_TLS_MODEL (x))
8776 {
8777 case 0:
8778 break;
8779 case TLS_MODEL_LOCAL_EXEC:
8780 fputs ("@le", file);
8781 break;
8782 case TLS_MODEL_INITIAL_EXEC:
8783 fputs ("@ie", file);
8784 break;
8785 case TLS_MODEL_GLOBAL_DYNAMIC:
8786 case TLS_MODEL_LOCAL_DYNAMIC:
8787 fputs ("@m", file);
8788 break;
8789 default:
8790 gcc_unreachable ();
8791 }
8792 }
8793 }
8794
8795 /* Return true if X is a symbol that refers to real (rather than emulated)
8796 TLS. */
8797
8798 static bool
8799 rs6000_real_tls_symbol_ref_p (rtx x)
8800 {
8801 return (SYMBOL_REF_P (x)
8802 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
8803 }
8804
8805 /* In the name of slightly smaller debug output, and to cater to
8806 general assembler lossage, recognize various UNSPEC sequences
8807 and turn them back into a direct symbol reference. */
8808
8809 static rtx
8810 rs6000_delegitimize_address (rtx orig_x)
8811 {
8812 rtx x, y, offset;
8813
8814 if (GET_CODE (orig_x) == UNSPEC && XINT (orig_x, 1) == UNSPEC_FUSION_GPR)
8815 orig_x = XVECEXP (orig_x, 0, 0);
8816
8817 orig_x = delegitimize_mem_from_attrs (orig_x);
8818
8819 x = orig_x;
8820 if (MEM_P (x))
8821 x = XEXP (x, 0);
8822
8823 y = x;
8824 if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM)
8825 y = XEXP (y, 1);
8826
8827 offset = NULL_RTX;
8828 if (GET_CODE (y) == PLUS
8829 && GET_MODE (y) == Pmode
8830 && CONST_INT_P (XEXP (y, 1)))
8831 {
8832 offset = XEXP (y, 1);
8833 y = XEXP (y, 0);
8834 }
8835
8836 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL)
8837 {
8838 y = XVECEXP (y, 0, 0);
8839
8840 #ifdef HAVE_AS_TLS
8841 /* Do not associate thread-local symbols with the original
8842 constant pool symbol. */
8843 if (TARGET_XCOFF
8844 && SYMBOL_REF_P (y)
8845 && CONSTANT_POOL_ADDRESS_P (y)
8846 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
8847 return orig_x;
8848 #endif
8849
8850 if (offset != NULL_RTX)
8851 y = gen_rtx_PLUS (Pmode, y, offset);
8852 if (!MEM_P (orig_x))
8853 return y;
8854 else
8855 return replace_equiv_address_nv (orig_x, y);
8856 }
8857
8858 if (TARGET_MACHO
8859 && GET_CODE (orig_x) == LO_SUM
8860 && GET_CODE (XEXP (orig_x, 1)) == CONST)
8861 {
8862 y = XEXP (XEXP (orig_x, 1), 0);
8863 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
8864 return XVECEXP (y, 0, 0);
8865 }
8866
8867 return orig_x;
8868 }
8869
8870 /* Return true if X shouldn't be emitted into the debug info.
8871 The linker doesn't like .toc section references from
8872 .debug_* sections, so reject .toc section symbols. */
8873
8874 static bool
8875 rs6000_const_not_ok_for_debug_p (rtx x)
8876 {
8877 if (GET_CODE (x) == UNSPEC)
8878 return true;
8879 if (SYMBOL_REF_P (x)
8880 && CONSTANT_POOL_ADDRESS_P (x))
8881 {
8882 rtx c = get_pool_constant (x);
8883 machine_mode cmode = get_pool_mode (x);
8884 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
8885 return true;
8886 }
8887
8888 return false;
8889 }
8890
8891 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
8892
8893 static bool
8894 rs6000_legitimate_combined_insn (rtx_insn *insn)
8895 {
8896 int icode = INSN_CODE (insn);
8897
8898 /* Reject creating doloop insns. Combine should not be allowed
8899 to create these for a number of reasons:
8900 1) In a nested loop, if combine creates one of these in an
8901 outer loop and the register allocator happens to allocate ctr
8902 to the outer loop insn, then the inner loop can't use ctr.
8903 Inner loops ought to be more highly optimized.
8904 2) Combine often wants to create one of these from what was
8905 originally a three insn sequence, first combining the three
8906 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
8907 allocated ctr, the splitter takes use back to the three insn
8908 sequence. It's better to stop combine at the two insn
8909 sequence.
8910 3) Faced with not being able to allocate ctr for ctrsi/crtdi
8911 insns, the register allocator sometimes uses floating point
8912 or vector registers for the pseudo. Since ctrsi/ctrdi is a
8913 jump insn and output reloads are not implemented for jumps,
8914 the ctrsi/ctrdi splitters need to handle all possible cases.
8915 That's a pain, and it gets to be seriously difficult when a
8916 splitter that runs after reload needs memory to transfer from
8917 a gpr to fpr. See PR70098 and PR71763 which are not fixed
8918 for the difficult case. It's better to not create problems
8919 in the first place. */
8920 if (icode != CODE_FOR_nothing
8921 && (icode == CODE_FOR_bdz_si
8922 || icode == CODE_FOR_bdz_di
8923 || icode == CODE_FOR_bdnz_si
8924 || icode == CODE_FOR_bdnz_di
8925 || icode == CODE_FOR_bdztf_si
8926 || icode == CODE_FOR_bdztf_di
8927 || icode == CODE_FOR_bdnztf_si
8928 || icode == CODE_FOR_bdnztf_di))
8929 return false;
8930
8931 return true;
8932 }
8933
8934 /* Construct the SYMBOL_REF for the tls_get_addr function. */
8935
8936 static GTY(()) rtx rs6000_tls_symbol;
8937 static rtx
8938 rs6000_tls_get_addr (void)
8939 {
8940 if (!rs6000_tls_symbol)
8941 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
8942
8943 return rs6000_tls_symbol;
8944 }
8945
8946 /* Construct the SYMBOL_REF for TLS GOT references. */
8947
8948 static GTY(()) rtx rs6000_got_symbol;
8949 rtx
8950 rs6000_got_sym (void)
8951 {
8952 if (!rs6000_got_symbol)
8953 {
8954 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8955 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
8956 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
8957 }
8958
8959 return rs6000_got_symbol;
8960 }
8961
8962 /* AIX Thread-Local Address support. */
8963
8964 static rtx
8965 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
8966 {
8967 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
8968 const char *name;
8969 char *tlsname;
8970
8971 name = XSTR (addr, 0);
8972 /* Append TLS CSECT qualifier, unless the symbol already is qualified
8973 or the symbol will be in TLS private data section. */
8974 if (name[strlen (name) - 1] != ']'
8975 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
8976 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
8977 {
8978 tlsname = XALLOCAVEC (char, strlen (name) + 4);
8979 strcpy (tlsname, name);
8980 strcat (tlsname,
8981 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
8982 tlsaddr = copy_rtx (addr);
8983 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
8984 }
8985 else
8986 tlsaddr = addr;
8987
8988 /* Place addr into TOC constant pool. */
8989 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
8990
8991 /* Output the TOC entry and create the MEM referencing the value. */
8992 if (constant_pool_expr_p (XEXP (sym, 0))
8993 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
8994 {
8995 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
8996 mem = gen_const_mem (Pmode, tocref);
8997 set_mem_alias_set (mem, get_TOC_alias_set ());
8998 }
8999 else
9000 return sym;
9001
9002 /* Use global-dynamic for local-dynamic. */
9003 if (model == TLS_MODEL_GLOBAL_DYNAMIC
9004 || model == TLS_MODEL_LOCAL_DYNAMIC)
9005 {
9006 /* Create new TOC reference for @m symbol. */
9007 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
9008 tlsname = XALLOCAVEC (char, strlen (name) + 1);
9009 strcpy (tlsname, "*LCM");
9010 strcat (tlsname, name + 3);
9011 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
9012 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
9013 tocref = create_TOC_reference (modaddr, NULL_RTX);
9014 rtx modmem = gen_const_mem (Pmode, tocref);
9015 set_mem_alias_set (modmem, get_TOC_alias_set ());
9016
9017 rtx modreg = gen_reg_rtx (Pmode);
9018 emit_insn (gen_rtx_SET (modreg, modmem));
9019
9020 tmpreg = gen_reg_rtx (Pmode);
9021 emit_insn (gen_rtx_SET (tmpreg, mem));
9022
9023 dest = gen_reg_rtx (Pmode);
9024 if (TARGET_32BIT)
9025 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
9026 else
9027 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
9028 return dest;
9029 }
9030 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9031 else if (TARGET_32BIT)
9032 {
9033 tlsreg = gen_reg_rtx (SImode);
9034 emit_insn (gen_tls_get_tpointer (tlsreg));
9035 }
9036 else
9037 tlsreg = gen_rtx_REG (DImode, 13);
9038
9039 /* Load the TOC value into temporary register. */
9040 tmpreg = gen_reg_rtx (Pmode);
9041 emit_insn (gen_rtx_SET (tmpreg, mem));
9042 set_unique_reg_note (get_last_insn (), REG_EQUAL,
9043 gen_rtx_MINUS (Pmode, addr, tlsreg));
9044
9045 /* Add TOC symbol value to TLS pointer. */
9046 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
9047
9048 return dest;
9049 }
9050
9051 /* Passes the tls arg value for global dynamic and local dynamic
9052 emit_library_call_value in rs6000_legitimize_tls_address to
9053 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
9054 marker relocs put on __tls_get_addr calls. */
9055 static rtx global_tlsarg;
9056
9057 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9058 this (thread-local) address. */
9059
9060 static rtx
9061 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
9062 {
9063 rtx dest, insn;
9064
9065 if (TARGET_XCOFF)
9066 return rs6000_legitimize_tls_address_aix (addr, model);
9067
9068 dest = gen_reg_rtx (Pmode);
9069 if (model == TLS_MODEL_LOCAL_EXEC
9070 && (rs6000_tls_size == 16 || rs6000_pcrel_p ()))
9071 {
9072 rtx tlsreg;
9073
9074 if (TARGET_64BIT)
9075 {
9076 tlsreg = gen_rtx_REG (Pmode, 13);
9077 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
9078 }
9079 else
9080 {
9081 tlsreg = gen_rtx_REG (Pmode, 2);
9082 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
9083 }
9084 emit_insn (insn);
9085 }
9086 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
9087 {
9088 rtx tlsreg, tmp;
9089
9090 tmp = gen_reg_rtx (Pmode);
9091 if (TARGET_64BIT)
9092 {
9093 tlsreg = gen_rtx_REG (Pmode, 13);
9094 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
9095 }
9096 else
9097 {
9098 tlsreg = gen_rtx_REG (Pmode, 2);
9099 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
9100 }
9101 emit_insn (insn);
9102 if (TARGET_64BIT)
9103 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
9104 else
9105 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
9106 emit_insn (insn);
9107 }
9108 else
9109 {
9110 rtx got, tga, tmp1, tmp2;
9111
9112 /* We currently use relocations like @got@tlsgd for tls, which
9113 means the linker will handle allocation of tls entries, placing
9114 them in the .got section. So use a pointer to the .got section,
9115 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9116 or to secondary GOT sections used by 32-bit -fPIC. */
9117 if (rs6000_pcrel_p ())
9118 got = const0_rtx;
9119 else if (TARGET_64BIT)
9120 got = gen_rtx_REG (Pmode, 2);
9121 else
9122 {
9123 if (flag_pic == 1)
9124 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
9125 else
9126 {
9127 rtx gsym = rs6000_got_sym ();
9128 got = gen_reg_rtx (Pmode);
9129 if (flag_pic == 0)
9130 rs6000_emit_move (got, gsym, Pmode);
9131 else
9132 {
9133 rtx mem, lab;
9134
9135 tmp1 = gen_reg_rtx (Pmode);
9136 tmp2 = gen_reg_rtx (Pmode);
9137 mem = gen_const_mem (Pmode, tmp1);
9138 lab = gen_label_rtx ();
9139 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
9140 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
9141 if (TARGET_LINK_STACK)
9142 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
9143 emit_move_insn (tmp2, mem);
9144 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
9145 set_unique_reg_note (last, REG_EQUAL, gsym);
9146 }
9147 }
9148 }
9149
9150 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
9151 {
9152 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
9153 UNSPEC_TLSGD);
9154 tga = rs6000_tls_get_addr ();
9155 rtx argreg = gen_rtx_REG (Pmode, 3);
9156 emit_insn (gen_rtx_SET (argreg, arg));
9157 global_tlsarg = arg;
9158 emit_library_call_value (tga, dest, LCT_CONST, Pmode, argreg, Pmode);
9159 global_tlsarg = NULL_RTX;
9160
9161 /* Make a note so that the result of this call can be CSEd. */
9162 rtvec vec = gen_rtvec (1, copy_rtx (arg));
9163 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
9164 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
9165 }
9166 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
9167 {
9168 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
9169 tga = rs6000_tls_get_addr ();
9170 tmp1 = gen_reg_rtx (Pmode);
9171 rtx argreg = gen_rtx_REG (Pmode, 3);
9172 emit_insn (gen_rtx_SET (argreg, arg));
9173 global_tlsarg = arg;
9174 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode, argreg, Pmode);
9175 global_tlsarg = NULL_RTX;
9176
9177 /* Make a note so that the result of this call can be CSEd. */
9178 rtvec vec = gen_rtvec (1, copy_rtx (arg));
9179 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
9180 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
9181
9182 if (rs6000_tls_size == 16 || rs6000_pcrel_p ())
9183 {
9184 if (TARGET_64BIT)
9185 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
9186 else
9187 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
9188 }
9189 else if (rs6000_tls_size == 32)
9190 {
9191 tmp2 = gen_reg_rtx (Pmode);
9192 if (TARGET_64BIT)
9193 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
9194 else
9195 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
9196 emit_insn (insn);
9197 if (TARGET_64BIT)
9198 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
9199 else
9200 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
9201 }
9202 else
9203 {
9204 tmp2 = gen_reg_rtx (Pmode);
9205 if (TARGET_64BIT)
9206 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
9207 else
9208 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
9209 emit_insn (insn);
9210 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
9211 }
9212 emit_insn (insn);
9213 }
9214 else
9215 {
9216 /* IE, or 64-bit offset LE. */
9217 tmp2 = gen_reg_rtx (Pmode);
9218 if (TARGET_64BIT)
9219 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
9220 else
9221 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
9222 emit_insn (insn);
9223 if (rs6000_pcrel_p ())
9224 {
9225 if (TARGET_64BIT)
9226 insn = gen_tls_tls_pcrel_64 (dest, tmp2, addr);
9227 else
9228 insn = gen_tls_tls_pcrel_32 (dest, tmp2, addr);
9229 }
9230 else if (TARGET_64BIT)
9231 insn = gen_tls_tls_64 (dest, tmp2, addr);
9232 else
9233 insn = gen_tls_tls_32 (dest, tmp2, addr);
9234 emit_insn (insn);
9235 }
9236 }
9237
9238 return dest;
9239 }
9240
9241 /* Only create the global variable for the stack protect guard if we are using
9242 the global flavor of that guard. */
9243 static tree
9244 rs6000_init_stack_protect_guard (void)
9245 {
9246 if (rs6000_stack_protector_guard == SSP_GLOBAL)
9247 return default_stack_protect_guard ();
9248
9249 return NULL_TREE;
9250 }
9251
9252 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9253
9254 static bool
9255 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9256 {
9257 if (GET_CODE (x) == HIGH
9258 && GET_CODE (XEXP (x, 0)) == UNSPEC)
9259 return true;
9260
9261 /* A TLS symbol in the TOC cannot contain a sum. */
9262 if (GET_CODE (x) == CONST
9263 && GET_CODE (XEXP (x, 0)) == PLUS
9264 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
9265 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
9266 return true;
9267
9268 /* Do not place an ELF TLS symbol in the constant pool. */
9269 return TARGET_ELF && tls_referenced_p (x);
9270 }
9271
9272 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9273 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9274 can be addressed relative to the toc pointer. */
9275
9276 static bool
9277 use_toc_relative_ref (rtx sym, machine_mode mode)
9278 {
9279 return ((constant_pool_expr_p (sym)
9280 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
9281 get_pool_mode (sym)))
9282 || (TARGET_CMODEL == CMODEL_MEDIUM
9283 && SYMBOL_REF_LOCAL_P (sym)
9284 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9285 }
9286
9287 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9288 that is a valid memory address for an instruction.
9289 The MODE argument is the machine mode for the MEM expression
9290 that wants to use this address.
9291
9292 On the RS/6000, there are four valid address: a SYMBOL_REF that
9293 refers to a constant pool entry of an address (or the sum of it
9294 plus a constant), a short (16-bit signed) constant plus a register,
9295 the sum of two registers, or a register indirect, possibly with an
9296 auto-increment. For DFmode, DDmode and DImode with a constant plus
9297 register, we must ensure that both words are addressable or PowerPC64
9298 with offset word aligned.
9299
9300 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9301 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9302 because adjacent memory cells are accessed by adding word-sized offsets
9303 during assembly output. */
9304 static bool
9305 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
9306 {
9307 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9308 bool quad_offset_p = mode_supports_dq_form (mode);
9309
9310 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
9311 return 0;
9312
9313 /* Handle unaligned altivec lvx/stvx type addresses. */
9314 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
9315 && GET_CODE (x) == AND
9316 && CONST_INT_P (XEXP (x, 1))
9317 && INTVAL (XEXP (x, 1)) == -16)
9318 {
9319 x = XEXP (x, 0);
9320 return (legitimate_indirect_address_p (x, reg_ok_strict)
9321 || legitimate_indexed_address_p (x, reg_ok_strict)
9322 || virtual_stack_registers_memory_p (x));
9323 }
9324
9325 if (legitimate_indirect_address_p (x, reg_ok_strict))
9326 return 1;
9327 if (TARGET_UPDATE
9328 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
9329 && mode_supports_pre_incdec_p (mode)
9330 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
9331 return 1;
9332
9333 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
9334 if (address_is_prefixed (x, mode, NON_PREFIXED_DEFAULT))
9335 return 1;
9336
9337 /* Handle restricted vector d-form offsets in ISA 3.0. */
9338 if (quad_offset_p)
9339 {
9340 if (quad_address_p (x, mode, reg_ok_strict))
9341 return 1;
9342 }
9343 else if (virtual_stack_registers_memory_p (x))
9344 return 1;
9345
9346 else if (reg_offset_p)
9347 {
9348 if (legitimate_small_data_p (mode, x))
9349 return 1;
9350 if (legitimate_constant_pool_address_p (x, mode,
9351 reg_ok_strict || lra_in_progress))
9352 return 1;
9353 }
9354
9355 /* For TImode, if we have TImode in VSX registers, only allow register
9356 indirect addresses. This will allow the values to go in either GPRs
9357 or VSX registers without reloading. The vector types would tend to
9358 go into VSX registers, so we allow REG+REG, while TImode seems
9359 somewhat split, in that some uses are GPR based, and some VSX based. */
9360 /* FIXME: We could loosen this by changing the following to
9361 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
9362 but currently we cannot allow REG+REG addressing for TImode. See
9363 PR72827 for complete details on how this ends up hoodwinking DSE. */
9364 if (mode == TImode && TARGET_VSX)
9365 return 0;
9366 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9367 if (! reg_ok_strict
9368 && reg_offset_p
9369 && GET_CODE (x) == PLUS
9370 && REG_P (XEXP (x, 0))
9371 && (XEXP (x, 0) == virtual_stack_vars_rtx
9372 || XEXP (x, 0) == arg_pointer_rtx)
9373 && CONST_INT_P (XEXP (x, 1)))
9374 return 1;
9375 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
9376 return 1;
9377 if (!FLOAT128_2REG_P (mode)
9378 && (TARGET_HARD_FLOAT
9379 || TARGET_POWERPC64
9380 || (mode != DFmode && mode != DDmode))
9381 && (TARGET_POWERPC64 || mode != DImode)
9382 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
9383 && mode != PTImode
9384 && !avoiding_indexed_address_p (mode)
9385 && legitimate_indexed_address_p (x, reg_ok_strict))
9386 return 1;
9387 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
9388 && mode_supports_pre_modify_p (mode)
9389 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
9390 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
9391 reg_ok_strict, false)
9392 || (!avoiding_indexed_address_p (mode)
9393 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
9394 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
9395 {
9396 /* There is no prefixed version of the load/store with update. */
9397 rtx addr = XEXP (x, 1);
9398 return !address_is_prefixed (addr, mode, NON_PREFIXED_DEFAULT);
9399 }
9400 if (reg_offset_p && !quad_offset_p
9401 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
9402 return 1;
9403 return 0;
9404 }
9405
9406 /* Debug version of rs6000_legitimate_address_p. */
9407 static bool
9408 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
9409 bool reg_ok_strict)
9410 {
9411 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
9412 fprintf (stderr,
9413 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9414 "strict = %d, reload = %s, code = %s\n",
9415 ret ? "true" : "false",
9416 GET_MODE_NAME (mode),
9417 reg_ok_strict,
9418 (reload_completed ? "after" : "before"),
9419 GET_RTX_NAME (GET_CODE (x)));
9420 debug_rtx (x);
9421
9422 return ret;
9423 }
9424
9425 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
9426
9427 static bool
9428 rs6000_mode_dependent_address_p (const_rtx addr,
9429 addr_space_t as ATTRIBUTE_UNUSED)
9430 {
9431 return rs6000_mode_dependent_address_ptr (addr);
9432 }
9433
9434 /* Go to LABEL if ADDR (a legitimate address expression)
9435 has an effect that depends on the machine mode it is used for.
9436
9437 On the RS/6000 this is true of all integral offsets (since AltiVec
9438 and VSX modes don't allow them) or is a pre-increment or decrement.
9439
9440 ??? Except that due to conceptual problems in offsettable_address_p
9441 we can't really report the problems of integral offsets. So leave
9442 this assuming that the adjustable offset must be valid for the
9443 sub-words of a TFmode operand, which is what we had before. */
9444
9445 static bool
9446 rs6000_mode_dependent_address (const_rtx addr)
9447 {
9448 switch (GET_CODE (addr))
9449 {
9450 case PLUS:
9451 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
9452 is considered a legitimate address before reload, so there
9453 are no offset restrictions in that case. Note that this
9454 condition is safe in strict mode because any address involving
9455 virtual_stack_vars_rtx or arg_pointer_rtx would already have
9456 been rejected as illegitimate. */
9457 if (XEXP (addr, 0) != virtual_stack_vars_rtx
9458 && XEXP (addr, 0) != arg_pointer_rtx
9459 && CONST_INT_P (XEXP (addr, 1)))
9460 {
9461 HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
9462 HOST_WIDE_INT extra = TARGET_POWERPC64 ? 8 : 12;
9463 if (TARGET_PREFIXED)
9464 return !SIGNED_34BIT_OFFSET_EXTRA_P (val, extra);
9465 else
9466 return !SIGNED_16BIT_OFFSET_EXTRA_P (val, extra);
9467 }
9468 break;
9469
9470 case LO_SUM:
9471 /* Anything in the constant pool is sufficiently aligned that
9472 all bytes have the same high part address. */
9473 return !legitimate_constant_pool_address_p (addr, QImode, false);
9474
9475 /* Auto-increment cases are now treated generically in recog.c. */
9476 case PRE_MODIFY:
9477 return TARGET_UPDATE;
9478
9479 /* AND is only allowed in Altivec loads. */
9480 case AND:
9481 return true;
9482
9483 default:
9484 break;
9485 }
9486
9487 return false;
9488 }
9489
9490 /* Debug version of rs6000_mode_dependent_address. */
9491 static bool
9492 rs6000_debug_mode_dependent_address (const_rtx addr)
9493 {
9494 bool ret = rs6000_mode_dependent_address (addr);
9495
9496 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
9497 ret ? "true" : "false");
9498 debug_rtx (addr);
9499
9500 return ret;
9501 }
9502
9503 /* Implement FIND_BASE_TERM. */
9504
9505 rtx
9506 rs6000_find_base_term (rtx op)
9507 {
9508 rtx base;
9509
9510 base = op;
9511 if (GET_CODE (base) == CONST)
9512 base = XEXP (base, 0);
9513 if (GET_CODE (base) == PLUS)
9514 base = XEXP (base, 0);
9515 if (GET_CODE (base) == UNSPEC)
9516 switch (XINT (base, 1))
9517 {
9518 case UNSPEC_TOCREL:
9519 case UNSPEC_MACHOPIC_OFFSET:
9520 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
9521 for aliasing purposes. */
9522 return XVECEXP (base, 0, 0);
9523 }
9524
9525 return op;
9526 }
9527
9528 /* More elaborate version of recog's offsettable_memref_p predicate
9529 that works around the ??? note of rs6000_mode_dependent_address.
9530 In particular it accepts
9531
9532 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9533
9534 in 32-bit mode, that the recog predicate rejects. */
9535
9536 static bool
9537 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
9538 {
9539 bool worst_case;
9540
9541 if (!MEM_P (op))
9542 return false;
9543
9544 /* First mimic offsettable_memref_p. */
9545 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
9546 return true;
9547
9548 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
9549 the latter predicate knows nothing about the mode of the memory
9550 reference and, therefore, assumes that it is the largest supported
9551 mode (TFmode). As a consequence, legitimate offsettable memory
9552 references are rejected. rs6000_legitimate_offset_address_p contains
9553 the correct logic for the PLUS case of rs6000_mode_dependent_address,
9554 at least with a little bit of help here given that we know the
9555 actual registers used. */
9556 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
9557 || GET_MODE_SIZE (reg_mode) == 4);
9558 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
9559 strict, worst_case);
9560 }
9561
9562 /* Determine the reassociation width to be used in reassociate_bb.
9563 This takes into account how many parallel operations we
9564 can actually do of a given type, and also the latency.
9565 P8:
9566 int add/sub 6/cycle
9567 mul 2/cycle
9568 vect add/sub/mul 2/cycle
9569 fp add/sub/mul 2/cycle
9570 dfp 1/cycle
9571 */
9572
9573 static int
9574 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
9575 machine_mode mode)
9576 {
9577 switch (rs6000_tune)
9578 {
9579 case PROCESSOR_POWER8:
9580 case PROCESSOR_POWER9:
9581 case PROCESSOR_POWER10:
9582 if (DECIMAL_FLOAT_MODE_P (mode))
9583 return 1;
9584 if (VECTOR_MODE_P (mode))
9585 return 4;
9586 if (INTEGRAL_MODE_P (mode))
9587 return 1;
9588 if (FLOAT_MODE_P (mode))
9589 return 4;
9590 break;
9591 default:
9592 break;
9593 }
9594 return 1;
9595 }
9596
9597 /* Change register usage conditional on target flags. */
9598 static void
9599 rs6000_conditional_register_usage (void)
9600 {
9601 int i;
9602
9603 if (TARGET_DEBUG_TARGET)
9604 fprintf (stderr, "rs6000_conditional_register_usage called\n");
9605
9606 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
9607 if (TARGET_64BIT)
9608 fixed_regs[13] = call_used_regs[13] = 1;
9609
9610 /* Conditionally disable FPRs. */
9611 if (TARGET_SOFT_FLOAT)
9612 for (i = 32; i < 64; i++)
9613 fixed_regs[i] = call_used_regs[i] = 1;
9614
9615 /* The TOC register is not killed across calls in a way that is
9616 visible to the compiler. */
9617 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9618 call_used_regs[2] = 0;
9619
9620 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
9621 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9622
9623 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
9624 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9625 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9626
9627 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
9628 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9629 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9630
9631 if (TARGET_TOC && TARGET_MINIMAL_TOC)
9632 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9633
9634 if (!TARGET_ALTIVEC && !TARGET_VSX)
9635 {
9636 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
9637 fixed_regs[i] = call_used_regs[i] = 1;
9638 call_used_regs[VRSAVE_REGNO] = 1;
9639 }
9640
9641 if (TARGET_ALTIVEC || TARGET_VSX)
9642 global_regs[VSCR_REGNO] = 1;
9643
9644 if (TARGET_ALTIVEC_ABI)
9645 {
9646 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
9647 call_used_regs[i] = 1;
9648
9649 /* AIX reserves VR20:31 in non-extended ABI mode. */
9650 if (TARGET_XCOFF)
9651 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
9652 fixed_regs[i] = call_used_regs[i] = 1;
9653 }
9654 }
9655
9656 \f
9657 /* Output insns to set DEST equal to the constant SOURCE as a series of
9658 lis, ori and shl instructions and return TRUE. */
9659
9660 bool
9661 rs6000_emit_set_const (rtx dest, rtx source)
9662 {
9663 machine_mode mode = GET_MODE (dest);
9664 rtx temp, set;
9665 rtx_insn *insn;
9666 HOST_WIDE_INT c;
9667
9668 gcc_checking_assert (CONST_INT_P (source));
9669 c = INTVAL (source);
9670 switch (mode)
9671 {
9672 case E_QImode:
9673 case E_HImode:
9674 emit_insn (gen_rtx_SET (dest, source));
9675 return true;
9676
9677 case E_SImode:
9678 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
9679
9680 emit_insn (gen_rtx_SET (copy_rtx (temp),
9681 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
9682 emit_insn (gen_rtx_SET (dest,
9683 gen_rtx_IOR (SImode, copy_rtx (temp),
9684 GEN_INT (c & 0xffff))));
9685 break;
9686
9687 case E_DImode:
9688 if (!TARGET_POWERPC64)
9689 {
9690 rtx hi, lo;
9691
9692 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
9693 DImode);
9694 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
9695 DImode);
9696 emit_move_insn (hi, GEN_INT (c >> 32));
9697 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
9698 emit_move_insn (lo, GEN_INT (c));
9699 }
9700 else
9701 rs6000_emit_set_long_const (dest, c);
9702 break;
9703
9704 default:
9705 gcc_unreachable ();
9706 }
9707
9708 insn = get_last_insn ();
9709 set = single_set (insn);
9710 if (! CONSTANT_P (SET_SRC (set)))
9711 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
9712
9713 return true;
9714 }
9715
9716 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
9717 Output insns to set DEST equal to the constant C as a series of
9718 lis, ori and shl instructions. */
9719
9720 static void
9721 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
9722 {
9723 rtx temp;
9724 HOST_WIDE_INT ud1, ud2, ud3, ud4;
9725
9726 ud1 = c & 0xffff;
9727 c = c >> 16;
9728 ud2 = c & 0xffff;
9729 c = c >> 16;
9730 ud3 = c & 0xffff;
9731 c = c >> 16;
9732 ud4 = c & 0xffff;
9733
9734 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
9735 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
9736 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
9737
9738 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
9739 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
9740 {
9741 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9742
9743 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9744 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9745 if (ud1 != 0)
9746 emit_move_insn (dest,
9747 gen_rtx_IOR (DImode, copy_rtx (temp),
9748 GEN_INT (ud1)));
9749 }
9750 else if (ud3 == 0 && ud4 == 0)
9751 {
9752 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9753
9754 gcc_assert (ud2 & 0x8000);
9755 emit_move_insn (copy_rtx (temp),
9756 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9757 if (ud1 != 0)
9758 emit_move_insn (copy_rtx (temp),
9759 gen_rtx_IOR (DImode, copy_rtx (temp),
9760 GEN_INT (ud1)));
9761 emit_move_insn (dest,
9762 gen_rtx_ZERO_EXTEND (DImode,
9763 gen_lowpart (SImode,
9764 copy_rtx (temp))));
9765 }
9766 else if (ud1 == ud3 && ud2 == ud4)
9767 {
9768 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9769 HOST_WIDE_INT num = (ud2 << 16) | ud1;
9770 rs6000_emit_set_long_const (temp, (num ^ 0x80000000) - 0x80000000);
9771 rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff));
9772 rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32));
9773 emit_move_insn (dest, gen_rtx_IOR (DImode, one, two));
9774 }
9775 else if ((ud4 == 0xffff && (ud3 & 0x8000))
9776 || (ud4 == 0 && ! (ud3 & 0x8000)))
9777 {
9778 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9779
9780 emit_move_insn (copy_rtx (temp),
9781 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
9782 if (ud2 != 0)
9783 emit_move_insn (copy_rtx (temp),
9784 gen_rtx_IOR (DImode, copy_rtx (temp),
9785 GEN_INT (ud2)));
9786 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9787 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9788 GEN_INT (16)));
9789 if (ud1 != 0)
9790 emit_move_insn (dest,
9791 gen_rtx_IOR (DImode, copy_rtx (temp),
9792 GEN_INT (ud1)));
9793 }
9794 else
9795 {
9796 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9797
9798 emit_move_insn (copy_rtx (temp),
9799 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
9800 if (ud3 != 0)
9801 emit_move_insn (copy_rtx (temp),
9802 gen_rtx_IOR (DImode, copy_rtx (temp),
9803 GEN_INT (ud3)));
9804
9805 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
9806 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9807 GEN_INT (32)));
9808 if (ud2 != 0)
9809 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9810 gen_rtx_IOR (DImode, copy_rtx (temp),
9811 GEN_INT (ud2 << 16)));
9812 if (ud1 != 0)
9813 emit_move_insn (dest,
9814 gen_rtx_IOR (DImode, copy_rtx (temp),
9815 GEN_INT (ud1)));
9816 }
9817 }
9818
9819 /* Helper for the following. Get rid of [r+r] memory refs
9820 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
9821
9822 static void
9823 rs6000_eliminate_indexed_memrefs (rtx operands[2])
9824 {
9825 if (MEM_P (operands[0])
9826 && !REG_P (XEXP (operands[0], 0))
9827 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
9828 GET_MODE (operands[0]), false))
9829 operands[0]
9830 = replace_equiv_address (operands[0],
9831 copy_addr_to_reg (XEXP (operands[0], 0)));
9832
9833 if (MEM_P (operands[1])
9834 && !REG_P (XEXP (operands[1], 0))
9835 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
9836 GET_MODE (operands[1]), false))
9837 operands[1]
9838 = replace_equiv_address (operands[1],
9839 copy_addr_to_reg (XEXP (operands[1], 0)));
9840 }
9841
9842 /* Generate a vector of constants to permute MODE for a little-endian
9843 storage operation by swapping the two halves of a vector. */
9844 static rtvec
9845 rs6000_const_vec (machine_mode mode)
9846 {
9847 int i, subparts;
9848 rtvec v;
9849
9850 switch (mode)
9851 {
9852 case E_V1TImode:
9853 subparts = 1;
9854 break;
9855 case E_V2DFmode:
9856 case E_V2DImode:
9857 subparts = 2;
9858 break;
9859 case E_V4SFmode:
9860 case E_V4SImode:
9861 subparts = 4;
9862 break;
9863 case E_V8HImode:
9864 subparts = 8;
9865 break;
9866 case E_V16QImode:
9867 subparts = 16;
9868 break;
9869 default:
9870 gcc_unreachable();
9871 }
9872
9873 v = rtvec_alloc (subparts);
9874
9875 for (i = 0; i < subparts / 2; ++i)
9876 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
9877 for (i = subparts / 2; i < subparts; ++i)
9878 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
9879
9880 return v;
9881 }
9882
9883 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
9884 store operation. */
9885 void
9886 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
9887 {
9888 /* Scalar permutations are easier to express in integer modes rather than
9889 floating-point modes, so cast them here. We use V1TImode instead
9890 of TImode to ensure that the values don't go through GPRs. */
9891 if (FLOAT128_VECTOR_P (mode))
9892 {
9893 dest = gen_lowpart (V1TImode, dest);
9894 source = gen_lowpart (V1TImode, source);
9895 mode = V1TImode;
9896 }
9897
9898 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
9899 scalar. */
9900 if (mode == TImode || mode == V1TImode)
9901 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
9902 GEN_INT (64))));
9903 else
9904 {
9905 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
9906 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
9907 }
9908 }
9909
9910 /* Emit a little-endian load from vector memory location SOURCE to VSX
9911 register DEST in mode MODE. The load is done with two permuting
9912 insn's that represent an lxvd2x and xxpermdi. */
9913 void
9914 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
9915 {
9916 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
9917 V1TImode). */
9918 if (mode == TImode || mode == V1TImode)
9919 {
9920 mode = V2DImode;
9921 dest = gen_lowpart (V2DImode, dest);
9922 source = adjust_address (source, V2DImode, 0);
9923 }
9924
9925 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
9926 rs6000_emit_le_vsx_permute (tmp, source, mode);
9927 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9928 }
9929
9930 /* Emit a little-endian store to vector memory location DEST from VSX
9931 register SOURCE in mode MODE. The store is done with two permuting
9932 insn's that represent an xxpermdi and an stxvd2x. */
9933 void
9934 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
9935 {
9936 /* This should never be called during or after LRA, because it does
9937 not re-permute the source register. It is intended only for use
9938 during expand. */
9939 gcc_assert (!lra_in_progress && !reload_completed);
9940
9941 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
9942 V1TImode). */
9943 if (mode == TImode || mode == V1TImode)
9944 {
9945 mode = V2DImode;
9946 dest = adjust_address (dest, V2DImode, 0);
9947 source = gen_lowpart (V2DImode, source);
9948 }
9949
9950 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
9951 rs6000_emit_le_vsx_permute (tmp, source, mode);
9952 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9953 }
9954
9955 /* Emit a sequence representing a little-endian VSX load or store,
9956 moving data from SOURCE to DEST in mode MODE. This is done
9957 separately from rs6000_emit_move to ensure it is called only
9958 during expand. LE VSX loads and stores introduced later are
9959 handled with a split. The expand-time RTL generation allows
9960 us to optimize away redundant pairs of register-permutes. */
9961 void
9962 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
9963 {
9964 gcc_assert (!BYTES_BIG_ENDIAN
9965 && VECTOR_MEM_VSX_P (mode)
9966 && !TARGET_P9_VECTOR
9967 && !gpr_or_gpr_p (dest, source)
9968 && (MEM_P (source) ^ MEM_P (dest)));
9969
9970 if (MEM_P (source))
9971 {
9972 gcc_assert (REG_P (dest) || SUBREG_P (dest));
9973 rs6000_emit_le_vsx_load (dest, source, mode);
9974 }
9975 else
9976 {
9977 if (!REG_P (source))
9978 source = force_reg (mode, source);
9979 rs6000_emit_le_vsx_store (dest, source, mode);
9980 }
9981 }
9982
9983 /* Return whether a SFmode or SImode move can be done without converting one
9984 mode to another. This arrises when we have:
9985
9986 (SUBREG:SF (REG:SI ...))
9987 (SUBREG:SI (REG:SF ...))
9988
9989 and one of the values is in a floating point/vector register, where SFmode
9990 scalars are stored in DFmode format. */
9991
9992 bool
9993 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
9994 {
9995 if (TARGET_ALLOW_SF_SUBREG)
9996 return true;
9997
9998 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
9999 return true;
10000
10001 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
10002 return true;
10003
10004 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10005 if (SUBREG_P (dest))
10006 {
10007 rtx dest_subreg = SUBREG_REG (dest);
10008 rtx src_subreg = SUBREG_REG (src);
10009 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
10010 }
10011
10012 return false;
10013 }
10014
10015
10016 /* Helper function to change moves with:
10017
10018 (SUBREG:SF (REG:SI)) and
10019 (SUBREG:SI (REG:SF))
10020
10021 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10022 values are stored as DFmode values in the VSX registers. We need to convert
10023 the bits before we can use a direct move or operate on the bits in the
10024 vector register as an integer type.
10025
10026 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10027
10028 static bool
10029 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
10030 {
10031 if (TARGET_DIRECT_MOVE_64BIT && !reload_completed
10032 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
10033 && SUBREG_P (source) && sf_subreg_operand (source, mode))
10034 {
10035 rtx inner_source = SUBREG_REG (source);
10036 machine_mode inner_mode = GET_MODE (inner_source);
10037
10038 if (mode == SImode && inner_mode == SFmode)
10039 {
10040 emit_insn (gen_movsi_from_sf (dest, inner_source));
10041 return true;
10042 }
10043
10044 if (mode == SFmode && inner_mode == SImode)
10045 {
10046 emit_insn (gen_movsf_from_si (dest, inner_source));
10047 return true;
10048 }
10049 }
10050
10051 return false;
10052 }
10053
10054 /* Emit a move from SOURCE to DEST in mode MODE. */
10055 void
10056 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
10057 {
10058 rtx operands[2];
10059 operands[0] = dest;
10060 operands[1] = source;
10061
10062 if (TARGET_DEBUG_ADDR)
10063 {
10064 fprintf (stderr,
10065 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
10066 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10067 GET_MODE_NAME (mode),
10068 lra_in_progress,
10069 reload_completed,
10070 can_create_pseudo_p ());
10071 debug_rtx (dest);
10072 fprintf (stderr, "source:\n");
10073 debug_rtx (source);
10074 }
10075
10076 /* Check that we get CONST_WIDE_INT only when we should. */
10077 if (CONST_WIDE_INT_P (operands[1])
10078 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
10079 gcc_unreachable ();
10080
10081 #ifdef HAVE_AS_GNU_ATTRIBUTE
10082 /* If we use a long double type, set the flags in .gnu_attribute that say
10083 what the long double type is. This is to allow the linker's warning
10084 message for the wrong long double to be useful, even if the function does
10085 not do a call (for example, doing a 128-bit add on power9 if the long
10086 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
10087 used if they aren't the default long dobule type. */
10088 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
10089 {
10090 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
10091 rs6000_passes_float = rs6000_passes_long_double = true;
10092
10093 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
10094 rs6000_passes_float = rs6000_passes_long_double = true;
10095 }
10096 #endif
10097
10098 /* See if we need to special case SImode/SFmode SUBREG moves. */
10099 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
10100 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
10101 return;
10102
10103 /* Check if GCC is setting up a block move that will end up using FP
10104 registers as temporaries. We must make sure this is acceptable. */
10105 if (MEM_P (operands[0])
10106 && MEM_P (operands[1])
10107 && mode == DImode
10108 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
10109 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
10110 && ! (rs6000_slow_unaligned_access (SImode,
10111 (MEM_ALIGN (operands[0]) > 32
10112 ? 32 : MEM_ALIGN (operands[0])))
10113 || rs6000_slow_unaligned_access (SImode,
10114 (MEM_ALIGN (operands[1]) > 32
10115 ? 32 : MEM_ALIGN (operands[1]))))
10116 && ! MEM_VOLATILE_P (operands [0])
10117 && ! MEM_VOLATILE_P (operands [1]))
10118 {
10119 emit_move_insn (adjust_address (operands[0], SImode, 0),
10120 adjust_address (operands[1], SImode, 0));
10121 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
10122 adjust_address (copy_rtx (operands[1]), SImode, 4));
10123 return;
10124 }
10125
10126 if (can_create_pseudo_p () && MEM_P (operands[0])
10127 && !gpc_reg_operand (operands[1], mode))
10128 operands[1] = force_reg (mode, operands[1]);
10129
10130 /* Recognize the case where operand[1] is a reference to thread-local
10131 data and load its address to a register. */
10132 if (tls_referenced_p (operands[1]))
10133 {
10134 enum tls_model model;
10135 rtx tmp = operands[1];
10136 rtx addend = NULL;
10137
10138 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
10139 {
10140 addend = XEXP (XEXP (tmp, 0), 1);
10141 tmp = XEXP (XEXP (tmp, 0), 0);
10142 }
10143
10144 gcc_assert (SYMBOL_REF_P (tmp));
10145 model = SYMBOL_REF_TLS_MODEL (tmp);
10146 gcc_assert (model != 0);
10147
10148 tmp = rs6000_legitimize_tls_address (tmp, model);
10149 if (addend)
10150 {
10151 tmp = gen_rtx_PLUS (mode, tmp, addend);
10152 tmp = force_operand (tmp, operands[0]);
10153 }
10154 operands[1] = tmp;
10155 }
10156
10157 /* 128-bit constant floating-point values on Darwin should really be loaded
10158 as two parts. However, this premature splitting is a problem when DFmode
10159 values can go into Altivec registers. */
10160 if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode)
10161 && !reg_addr[DFmode].scalar_in_vmx_p)
10162 {
10163 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
10164 simplify_gen_subreg (DFmode, operands[1], mode, 0),
10165 DFmode);
10166 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
10167 GET_MODE_SIZE (DFmode)),
10168 simplify_gen_subreg (DFmode, operands[1], mode,
10169 GET_MODE_SIZE (DFmode)),
10170 DFmode);
10171 return;
10172 }
10173
10174 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10175 p1:SD) if p1 is not of floating point class and p0 is spilled as
10176 we can have no analogous movsd_store for this. */
10177 if (lra_in_progress && mode == DDmode
10178 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
10179 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10180 && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))
10181 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
10182 {
10183 enum reg_class cl;
10184 int regno = REGNO (SUBREG_REG (operands[1]));
10185
10186 if (!HARD_REGISTER_NUM_P (regno))
10187 {
10188 cl = reg_preferred_class (regno);
10189 regno = reg_renumber[regno];
10190 if (regno < 0)
10191 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
10192 }
10193 if (regno >= 0 && ! FP_REGNO_P (regno))
10194 {
10195 mode = SDmode;
10196 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
10197 operands[1] = SUBREG_REG (operands[1]);
10198 }
10199 }
10200 if (lra_in_progress
10201 && mode == SDmode
10202 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
10203 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10204 && (REG_P (operands[1])
10205 || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])))))
10206 {
10207 int regno = reg_or_subregno (operands[1]);
10208 enum reg_class cl;
10209
10210 if (!HARD_REGISTER_NUM_P (regno))
10211 {
10212 cl = reg_preferred_class (regno);
10213 gcc_assert (cl != NO_REGS);
10214 regno = reg_renumber[regno];
10215 if (regno < 0)
10216 regno = ira_class_hard_regs[cl][0];
10217 }
10218 if (FP_REGNO_P (regno))
10219 {
10220 if (GET_MODE (operands[0]) != DDmode)
10221 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
10222 emit_insn (gen_movsd_store (operands[0], operands[1]));
10223 }
10224 else if (INT_REGNO_P (regno))
10225 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10226 else
10227 gcc_unreachable();
10228 return;
10229 }
10230 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10231 p:DD)) if p0 is not of floating point class and p1 is spilled as
10232 we can have no analogous movsd_load for this. */
10233 if (lra_in_progress && mode == DDmode
10234 && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))
10235 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
10236 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
10237 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10238 {
10239 enum reg_class cl;
10240 int regno = REGNO (SUBREG_REG (operands[0]));
10241
10242 if (!HARD_REGISTER_NUM_P (regno))
10243 {
10244 cl = reg_preferred_class (regno);
10245 regno = reg_renumber[regno];
10246 if (regno < 0)
10247 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
10248 }
10249 if (regno >= 0 && ! FP_REGNO_P (regno))
10250 {
10251 mode = SDmode;
10252 operands[0] = SUBREG_REG (operands[0]);
10253 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
10254 }
10255 }
10256 if (lra_in_progress
10257 && mode == SDmode
10258 && (REG_P (operands[0])
10259 || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))))
10260 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
10261 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10262 {
10263 int regno = reg_or_subregno (operands[0]);
10264 enum reg_class cl;
10265
10266 if (!HARD_REGISTER_NUM_P (regno))
10267 {
10268 cl = reg_preferred_class (regno);
10269 gcc_assert (cl != NO_REGS);
10270 regno = reg_renumber[regno];
10271 if (regno < 0)
10272 regno = ira_class_hard_regs[cl][0];
10273 }
10274 if (FP_REGNO_P (regno))
10275 {
10276 if (GET_MODE (operands[1]) != DDmode)
10277 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
10278 emit_insn (gen_movsd_load (operands[0], operands[1]));
10279 }
10280 else if (INT_REGNO_P (regno))
10281 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10282 else
10283 gcc_unreachable();
10284 return;
10285 }
10286
10287 /* FIXME: In the long term, this switch statement should go away
10288 and be replaced by a sequence of tests based on things like
10289 mode == Pmode. */
10290 switch (mode)
10291 {
10292 case E_HImode:
10293 case E_QImode:
10294 if (CONSTANT_P (operands[1])
10295 && !CONST_INT_P (operands[1]))
10296 operands[1] = force_const_mem (mode, operands[1]);
10297 break;
10298
10299 case E_TFmode:
10300 case E_TDmode:
10301 case E_IFmode:
10302 case E_KFmode:
10303 if (FLOAT128_2REG_P (mode))
10304 rs6000_eliminate_indexed_memrefs (operands);
10305 /* fall through */
10306
10307 case E_DFmode:
10308 case E_DDmode:
10309 case E_SFmode:
10310 case E_SDmode:
10311 if (CONSTANT_P (operands[1])
10312 && ! easy_fp_constant (operands[1], mode))
10313 operands[1] = force_const_mem (mode, operands[1]);
10314 break;
10315
10316 case E_V16QImode:
10317 case E_V8HImode:
10318 case E_V4SFmode:
10319 case E_V4SImode:
10320 case E_V2DFmode:
10321 case E_V2DImode:
10322 case E_V1TImode:
10323 if (CONSTANT_P (operands[1])
10324 && !easy_vector_constant (operands[1], mode))
10325 operands[1] = force_const_mem (mode, operands[1]);
10326 break;
10327
10328 case E_OOmode:
10329 case E_XOmode:
10330 if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) != 0)
10331 error ("%qs is an opaque type, and you can't set it to other values.",
10332 (mode == OOmode) ? "__vector_pair" : "__vector_quad");
10333 break;
10334
10335 case E_SImode:
10336 case E_DImode:
10337 /* Use default pattern for address of ELF small data */
10338 if (TARGET_ELF
10339 && mode == Pmode
10340 && DEFAULT_ABI == ABI_V4
10341 && (SYMBOL_REF_P (operands[1])
10342 || GET_CODE (operands[1]) == CONST)
10343 && small_data_operand (operands[1], mode))
10344 {
10345 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10346 return;
10347 }
10348
10349 /* Use the default pattern for loading up PC-relative addresses. */
10350 if (TARGET_PCREL && mode == Pmode
10351 && pcrel_local_or_external_address (operands[1], Pmode))
10352 {
10353 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10354 return;
10355 }
10356
10357 if (DEFAULT_ABI == ABI_V4
10358 && mode == Pmode && mode == SImode
10359 && flag_pic == 1 && got_operand (operands[1], mode))
10360 {
10361 emit_insn (gen_movsi_got (operands[0], operands[1]));
10362 return;
10363 }
10364
10365 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
10366 && TARGET_NO_TOC_OR_PCREL
10367 && ! flag_pic
10368 && mode == Pmode
10369 && CONSTANT_P (operands[1])
10370 && GET_CODE (operands[1]) != HIGH
10371 && !CONST_INT_P (operands[1]))
10372 {
10373 rtx target = (!can_create_pseudo_p ()
10374 ? operands[0]
10375 : gen_reg_rtx (mode));
10376
10377 /* If this is a function address on -mcall-aixdesc,
10378 convert it to the address of the descriptor. */
10379 if (DEFAULT_ABI == ABI_AIX
10380 && SYMBOL_REF_P (operands[1])
10381 && XSTR (operands[1], 0)[0] == '.')
10382 {
10383 const char *name = XSTR (operands[1], 0);
10384 rtx new_ref;
10385 while (*name == '.')
10386 name++;
10387 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
10388 CONSTANT_POOL_ADDRESS_P (new_ref)
10389 = CONSTANT_POOL_ADDRESS_P (operands[1]);
10390 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
10391 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
10392 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
10393 operands[1] = new_ref;
10394 }
10395
10396 if (DEFAULT_ABI == ABI_DARWIN)
10397 {
10398 #if TARGET_MACHO
10399 /* This is not PIC code, but could require the subset of
10400 indirections used by mdynamic-no-pic. */
10401 if (MACHO_DYNAMIC_NO_PIC_P)
10402 {
10403 /* Take care of any required data indirection. */
10404 operands[1] = rs6000_machopic_legitimize_pic_address (
10405 operands[1], mode, operands[0]);
10406 if (operands[0] != operands[1])
10407 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10408 return;
10409 }
10410 #endif
10411 emit_insn (gen_macho_high (Pmode, target, operands[1]));
10412 emit_insn (gen_macho_low (Pmode, operands[0],
10413 target, operands[1]));
10414 return;
10415 }
10416
10417 emit_insn (gen_elf_high (target, operands[1]));
10418 emit_insn (gen_elf_low (operands[0], target, operands[1]));
10419 return;
10420 }
10421
10422 /* If this is a SYMBOL_REF that refers to a constant pool entry,
10423 and we have put it in the TOC, we just need to make a TOC-relative
10424 reference to it. */
10425 if (TARGET_TOC
10426 && SYMBOL_REF_P (operands[1])
10427 && use_toc_relative_ref (operands[1], mode))
10428 operands[1] = create_TOC_reference (operands[1], operands[0]);
10429 else if (mode == Pmode
10430 && CONSTANT_P (operands[1])
10431 && GET_CODE (operands[1]) != HIGH
10432 && ((REG_P (operands[0])
10433 && FP_REGNO_P (REGNO (operands[0])))
10434 || !CONST_INT_P (operands[1])
10435 || (num_insns_constant (operands[1], mode)
10436 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
10437 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
10438 && (TARGET_CMODEL == CMODEL_SMALL
10439 || can_create_pseudo_p ()
10440 || (REG_P (operands[0])
10441 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
10442 {
10443
10444 #if TARGET_MACHO
10445 /* Darwin uses a special PIC legitimizer. */
10446 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
10447 {
10448 operands[1] =
10449 rs6000_machopic_legitimize_pic_address (operands[1], mode,
10450 operands[0]);
10451 if (operands[0] != operands[1])
10452 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10453 return;
10454 }
10455 #endif
10456
10457 /* If we are to limit the number of things we put in the TOC and
10458 this is a symbol plus a constant we can add in one insn,
10459 just put the symbol in the TOC and add the constant. */
10460 if (GET_CODE (operands[1]) == CONST
10461 && TARGET_NO_SUM_IN_TOC
10462 && GET_CODE (XEXP (operands[1], 0)) == PLUS
10463 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
10464 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
10465 || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0)))
10466 && ! side_effects_p (operands[0]))
10467 {
10468 rtx sym =
10469 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
10470 rtx other = XEXP (XEXP (operands[1], 0), 1);
10471
10472 sym = force_reg (mode, sym);
10473 emit_insn (gen_add3_insn (operands[0], sym, other));
10474 return;
10475 }
10476
10477 operands[1] = force_const_mem (mode, operands[1]);
10478
10479 if (TARGET_TOC
10480 && SYMBOL_REF_P (XEXP (operands[1], 0))
10481 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
10482 {
10483 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
10484 operands[0]);
10485 operands[1] = gen_const_mem (mode, tocref);
10486 set_mem_alias_set (operands[1], get_TOC_alias_set ());
10487 }
10488 }
10489 break;
10490
10491 case E_TImode:
10492 if (!VECTOR_MEM_VSX_P (TImode))
10493 rs6000_eliminate_indexed_memrefs (operands);
10494 break;
10495
10496 case E_PTImode:
10497 rs6000_eliminate_indexed_memrefs (operands);
10498 break;
10499
10500 default:
10501 fatal_insn ("bad move", gen_rtx_SET (dest, source));
10502 }
10503
10504 /* Above, we may have called force_const_mem which may have returned
10505 an invalid address. If we can, fix this up; otherwise, reload will
10506 have to deal with it. */
10507 if (MEM_P (operands[1]))
10508 operands[1] = validize_mem (operands[1]);
10509
10510 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10511 }
10512 \f
10513
10514 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
10515 static void
10516 init_float128_ibm (machine_mode mode)
10517 {
10518 if (!TARGET_XL_COMPAT)
10519 {
10520 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
10521 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
10522 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
10523 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
10524
10525 if (!TARGET_HARD_FLOAT)
10526 {
10527 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
10528 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
10529 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
10530 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
10531 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
10532 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
10533 set_optab_libfunc (le_optab, mode, "__gcc_qle");
10534 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
10535
10536 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
10537 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
10538 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
10539 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
10540 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
10541 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
10542 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
10543 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
10544 }
10545 }
10546 else
10547 {
10548 set_optab_libfunc (add_optab, mode, "_xlqadd");
10549 set_optab_libfunc (sub_optab, mode, "_xlqsub");
10550 set_optab_libfunc (smul_optab, mode, "_xlqmul");
10551 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
10552 }
10553
10554 /* Add various conversions for IFmode to use the traditional TFmode
10555 names. */
10556 if (mode == IFmode)
10557 {
10558 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
10559 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
10560 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
10561 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
10562 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
10563 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
10564
10565 if (TARGET_POWERPC64)
10566 {
10567 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
10568 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
10569 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
10570 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
10571 }
10572 }
10573 }
10574
10575 /* Create a decl for either complex long double multiply or complex long double
10576 divide when long double is IEEE 128-bit floating point. We can't use
10577 __multc3 and __divtc3 because the original long double using IBM extended
10578 double used those names. The complex multiply/divide functions are encoded
10579 as builtin functions with a complex result and 4 scalar inputs. */
10580
10581 static void
10582 create_complex_muldiv (const char *name, built_in_function fncode, tree fntype)
10583 {
10584 tree fndecl = add_builtin_function (name, fntype, fncode, BUILT_IN_NORMAL,
10585 name, NULL_TREE);
10586
10587 set_builtin_decl (fncode, fndecl, true);
10588
10589 if (TARGET_DEBUG_BUILTIN)
10590 fprintf (stderr, "create complex %s, fncode: %d\n", name, (int) fncode);
10591
10592 return;
10593 }
10594
10595 /* Set up IEEE 128-bit floating point routines. Use different names if the
10596 arguments can be passed in a vector register. The historical PowerPC
10597 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
10598 continue to use that if we aren't using vector registers to pass IEEE
10599 128-bit floating point. */
10600
10601 static void
10602 init_float128_ieee (machine_mode mode)
10603 {
10604 if (FLOAT128_VECTOR_P (mode))
10605 {
10606 static bool complex_muldiv_init_p = false;
10607
10608 /* Set up to call __mulkc3 and __divkc3 under -mabi=ieeelongdouble. If
10609 we have clone or target attributes, this will be called a second
10610 time. We want to create the built-in function only once. */
10611 if (mode == TFmode && TARGET_IEEEQUAD && !complex_muldiv_init_p)
10612 {
10613 complex_muldiv_init_p = true;
10614 built_in_function fncode_mul =
10615 (built_in_function) (BUILT_IN_COMPLEX_MUL_MIN + TCmode
10616 - MIN_MODE_COMPLEX_FLOAT);
10617 built_in_function fncode_div =
10618 (built_in_function) (BUILT_IN_COMPLEX_DIV_MIN + TCmode
10619 - MIN_MODE_COMPLEX_FLOAT);
10620
10621 tree fntype = build_function_type_list (complex_long_double_type_node,
10622 long_double_type_node,
10623 long_double_type_node,
10624 long_double_type_node,
10625 long_double_type_node,
10626 NULL_TREE);
10627
10628 create_complex_muldiv ("__mulkc3", fncode_mul, fntype);
10629 create_complex_muldiv ("__divkc3", fncode_div, fntype);
10630 }
10631
10632 set_optab_libfunc (add_optab, mode, "__addkf3");
10633 set_optab_libfunc (sub_optab, mode, "__subkf3");
10634 set_optab_libfunc (neg_optab, mode, "__negkf2");
10635 set_optab_libfunc (smul_optab, mode, "__mulkf3");
10636 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
10637 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
10638 set_optab_libfunc (abs_optab, mode, "__abskf2");
10639 set_optab_libfunc (powi_optab, mode, "__powikf2");
10640
10641 set_optab_libfunc (eq_optab, mode, "__eqkf2");
10642 set_optab_libfunc (ne_optab, mode, "__nekf2");
10643 set_optab_libfunc (gt_optab, mode, "__gtkf2");
10644 set_optab_libfunc (ge_optab, mode, "__gekf2");
10645 set_optab_libfunc (lt_optab, mode, "__ltkf2");
10646 set_optab_libfunc (le_optab, mode, "__lekf2");
10647 set_optab_libfunc (unord_optab, mode, "__unordkf2");
10648
10649 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
10650 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
10651 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
10652 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
10653
10654 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
10655 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
10656 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
10657
10658 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
10659 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
10660 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
10661
10662 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
10663 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
10664 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
10665 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
10666 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
10667 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
10668
10669 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
10670 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
10671 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
10672 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
10673
10674 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
10675 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
10676 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
10677 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
10678
10679 if (TARGET_POWERPC64)
10680 {
10681 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
10682 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
10683 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
10684 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
10685 }
10686 }
10687
10688 else
10689 {
10690 set_optab_libfunc (add_optab, mode, "_q_add");
10691 set_optab_libfunc (sub_optab, mode, "_q_sub");
10692 set_optab_libfunc (neg_optab, mode, "_q_neg");
10693 set_optab_libfunc (smul_optab, mode, "_q_mul");
10694 set_optab_libfunc (sdiv_optab, mode, "_q_div");
10695 if (TARGET_PPC_GPOPT)
10696 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
10697
10698 set_optab_libfunc (eq_optab, mode, "_q_feq");
10699 set_optab_libfunc (ne_optab, mode, "_q_fne");
10700 set_optab_libfunc (gt_optab, mode, "_q_fgt");
10701 set_optab_libfunc (ge_optab, mode, "_q_fge");
10702 set_optab_libfunc (lt_optab, mode, "_q_flt");
10703 set_optab_libfunc (le_optab, mode, "_q_fle");
10704
10705 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
10706 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
10707 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
10708 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
10709 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
10710 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
10711 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
10712 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
10713 }
10714 }
10715
10716 static void
10717 rs6000_init_libfuncs (void)
10718 {
10719 /* __float128 support. */
10720 if (TARGET_FLOAT128_TYPE)
10721 {
10722 init_float128_ibm (IFmode);
10723 init_float128_ieee (KFmode);
10724 }
10725
10726 /* AIX/Darwin/64-bit Linux quad floating point routines. */
10727 if (TARGET_LONG_DOUBLE_128)
10728 {
10729 if (!TARGET_IEEEQUAD)
10730 init_float128_ibm (TFmode);
10731
10732 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
10733 else
10734 init_float128_ieee (TFmode);
10735 }
10736 }
10737
10738 /* Emit a potentially record-form instruction, setting DST from SRC.
10739 If DOT is 0, that is all; otherwise, set CCREG to the result of the
10740 signed comparison of DST with zero. If DOT is 1, the generated RTL
10741 doesn't care about the DST result; if DOT is 2, it does. If CCREG
10742 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
10743 a separate COMPARE. */
10744
10745 void
10746 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
10747 {
10748 if (dot == 0)
10749 {
10750 emit_move_insn (dst, src);
10751 return;
10752 }
10753
10754 if (cc_reg_not_cr0_operand (ccreg, CCmode))
10755 {
10756 emit_move_insn (dst, src);
10757 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
10758 return;
10759 }
10760
10761 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
10762 if (dot == 1)
10763 {
10764 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
10765 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
10766 }
10767 else
10768 {
10769 rtx set = gen_rtx_SET (dst, src);
10770 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
10771 }
10772 }
10773
10774 \f
10775 /* A validation routine: say whether CODE, a condition code, and MODE
10776 match. The other alternatives either don't make sense or should
10777 never be generated. */
10778
10779 void
10780 validate_condition_mode (enum rtx_code code, machine_mode mode)
10781 {
10782 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
10783 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
10784 && GET_MODE_CLASS (mode) == MODE_CC);
10785
10786 /* These don't make sense. */
10787 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
10788 || mode != CCUNSmode);
10789
10790 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
10791 || mode == CCUNSmode);
10792
10793 gcc_assert (mode == CCFPmode
10794 || (code != ORDERED && code != UNORDERED
10795 && code != UNEQ && code != LTGT
10796 && code != UNGT && code != UNLT
10797 && code != UNGE && code != UNLE));
10798
10799 /* These are invalid; the information is not there. */
10800 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
10801 }
10802
10803 \f
10804 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
10805 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
10806 not zero, store there the bit offset (counted from the right) where
10807 the single stretch of 1 bits begins; and similarly for B, the bit
10808 offset where it ends. */
10809
10810 bool
10811 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
10812 {
10813 unsigned HOST_WIDE_INT val = INTVAL (mask);
10814 unsigned HOST_WIDE_INT bit;
10815 int nb, ne;
10816 int n = GET_MODE_PRECISION (mode);
10817
10818 if (mode != DImode && mode != SImode)
10819 return false;
10820
10821 if (INTVAL (mask) >= 0)
10822 {
10823 bit = val & -val;
10824 ne = exact_log2 (bit);
10825 nb = exact_log2 (val + bit);
10826 }
10827 else if (val + 1 == 0)
10828 {
10829 nb = n;
10830 ne = 0;
10831 }
10832 else if (val & 1)
10833 {
10834 val = ~val;
10835 bit = val & -val;
10836 nb = exact_log2 (bit);
10837 ne = exact_log2 (val + bit);
10838 }
10839 else
10840 {
10841 bit = val & -val;
10842 ne = exact_log2 (bit);
10843 if (val + bit == 0)
10844 nb = n;
10845 else
10846 nb = 0;
10847 }
10848
10849 nb--;
10850
10851 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
10852 return false;
10853
10854 if (b)
10855 *b = nb;
10856 if (e)
10857 *e = ne;
10858
10859 return true;
10860 }
10861
10862 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
10863 or rldicr instruction, to implement an AND with it in mode MODE. */
10864
10865 bool
10866 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
10867 {
10868 int nb, ne;
10869
10870 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10871 return false;
10872
10873 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
10874 does not wrap. */
10875 if (mode == DImode)
10876 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
10877
10878 /* For SImode, rlwinm can do everything. */
10879 if (mode == SImode)
10880 return (nb < 32 && ne < 32);
10881
10882 return false;
10883 }
10884
10885 /* Return the instruction template for an AND with mask in mode MODE, with
10886 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10887
10888 const char *
10889 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
10890 {
10891 int nb, ne;
10892
10893 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
10894 gcc_unreachable ();
10895
10896 if (mode == DImode && ne == 0)
10897 {
10898 operands[3] = GEN_INT (63 - nb);
10899 if (dot)
10900 return "rldicl. %0,%1,0,%3";
10901 return "rldicl %0,%1,0,%3";
10902 }
10903
10904 if (mode == DImode && nb == 63)
10905 {
10906 operands[3] = GEN_INT (63 - ne);
10907 if (dot)
10908 return "rldicr. %0,%1,0,%3";
10909 return "rldicr %0,%1,0,%3";
10910 }
10911
10912 if (nb < 32 && ne < 32)
10913 {
10914 operands[3] = GEN_INT (31 - nb);
10915 operands[4] = GEN_INT (31 - ne);
10916 if (dot)
10917 return "rlwinm. %0,%1,0,%3,%4";
10918 return "rlwinm %0,%1,0,%3,%4";
10919 }
10920
10921 gcc_unreachable ();
10922 }
10923
10924 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
10925 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
10926 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
10927
10928 bool
10929 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
10930 {
10931 int nb, ne;
10932
10933 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10934 return false;
10935
10936 int n = GET_MODE_PRECISION (mode);
10937 int sh = -1;
10938
10939 if (CONST_INT_P (XEXP (shift, 1)))
10940 {
10941 sh = INTVAL (XEXP (shift, 1));
10942 if (sh < 0 || sh >= n)
10943 return false;
10944 }
10945
10946 rtx_code code = GET_CODE (shift);
10947
10948 /* Convert any shift by 0 to a rotate, to simplify below code. */
10949 if (sh == 0)
10950 code = ROTATE;
10951
10952 /* Convert rotate to simple shift if we can, to make analysis simpler. */
10953 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
10954 code = ASHIFT;
10955 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
10956 {
10957 code = LSHIFTRT;
10958 sh = n - sh;
10959 }
10960
10961 /* DImode rotates need rld*. */
10962 if (mode == DImode && code == ROTATE)
10963 return (nb == 63 || ne == 0 || ne == sh);
10964
10965 /* SImode rotates need rlw*. */
10966 if (mode == SImode && code == ROTATE)
10967 return (nb < 32 && ne < 32 && sh < 32);
10968
10969 /* Wrap-around masks are only okay for rotates. */
10970 if (ne > nb)
10971 return false;
10972
10973 /* Variable shifts are only okay for rotates. */
10974 if (sh < 0)
10975 return false;
10976
10977 /* Don't allow ASHIFT if the mask is wrong for that. */
10978 if (code == ASHIFT && ne < sh)
10979 return false;
10980
10981 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
10982 if the mask is wrong for that. */
10983 if (nb < 32 && ne < 32 && sh < 32
10984 && !(code == LSHIFTRT && nb >= 32 - sh))
10985 return true;
10986
10987 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
10988 if the mask is wrong for that. */
10989 if (code == LSHIFTRT)
10990 sh = 64 - sh;
10991 if (nb == 63 || ne == 0 || ne == sh)
10992 return !(code == LSHIFTRT && nb >= sh);
10993
10994 return false;
10995 }
10996
10997 /* Return the instruction template for a shift with mask in mode MODE, with
10998 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10999
11000 const char *
11001 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
11002 {
11003 int nb, ne;
11004
11005 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
11006 gcc_unreachable ();
11007
11008 if (mode == DImode && ne == 0)
11009 {
11010 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11011 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
11012 operands[3] = GEN_INT (63 - nb);
11013 if (dot)
11014 return "rld%I2cl. %0,%1,%2,%3";
11015 return "rld%I2cl %0,%1,%2,%3";
11016 }
11017
11018 if (mode == DImode && nb == 63)
11019 {
11020 operands[3] = GEN_INT (63 - ne);
11021 if (dot)
11022 return "rld%I2cr. %0,%1,%2,%3";
11023 return "rld%I2cr %0,%1,%2,%3";
11024 }
11025
11026 if (mode == DImode
11027 && GET_CODE (operands[4]) != LSHIFTRT
11028 && CONST_INT_P (operands[2])
11029 && ne == INTVAL (operands[2]))
11030 {
11031 operands[3] = GEN_INT (63 - nb);
11032 if (dot)
11033 return "rld%I2c. %0,%1,%2,%3";
11034 return "rld%I2c %0,%1,%2,%3";
11035 }
11036
11037 if (nb < 32 && ne < 32)
11038 {
11039 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11040 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
11041 operands[3] = GEN_INT (31 - nb);
11042 operands[4] = GEN_INT (31 - ne);
11043 /* This insn can also be a 64-bit rotate with mask that really makes
11044 it just a shift right (with mask); the %h below are to adjust for
11045 that situation (shift count is >= 32 in that case). */
11046 if (dot)
11047 return "rlw%I2nm. %0,%1,%h2,%3,%4";
11048 return "rlw%I2nm %0,%1,%h2,%3,%4";
11049 }
11050
11051 gcc_unreachable ();
11052 }
11053
11054 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
11055 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
11056 ASHIFT, or LSHIFTRT) in mode MODE. */
11057
11058 bool
11059 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
11060 {
11061 int nb, ne;
11062
11063 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11064 return false;
11065
11066 int n = GET_MODE_PRECISION (mode);
11067
11068 int sh = INTVAL (XEXP (shift, 1));
11069 if (sh < 0 || sh >= n)
11070 return false;
11071
11072 rtx_code code = GET_CODE (shift);
11073
11074 /* Convert any shift by 0 to a rotate, to simplify below code. */
11075 if (sh == 0)
11076 code = ROTATE;
11077
11078 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11079 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
11080 code = ASHIFT;
11081 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
11082 {
11083 code = LSHIFTRT;
11084 sh = n - sh;
11085 }
11086
11087 /* DImode rotates need rldimi. */
11088 if (mode == DImode && code == ROTATE)
11089 return (ne == sh);
11090
11091 /* SImode rotates need rlwimi. */
11092 if (mode == SImode && code == ROTATE)
11093 return (nb < 32 && ne < 32 && sh < 32);
11094
11095 /* Wrap-around masks are only okay for rotates. */
11096 if (ne > nb)
11097 return false;
11098
11099 /* Don't allow ASHIFT if the mask is wrong for that. */
11100 if (code == ASHIFT && ne < sh)
11101 return false;
11102
11103 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
11104 if the mask is wrong for that. */
11105 if (nb < 32 && ne < 32 && sh < 32
11106 && !(code == LSHIFTRT && nb >= 32 - sh))
11107 return true;
11108
11109 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
11110 if the mask is wrong for that. */
11111 if (code == LSHIFTRT)
11112 sh = 64 - sh;
11113 if (ne == sh)
11114 return !(code == LSHIFTRT && nb >= sh);
11115
11116 return false;
11117 }
11118
11119 /* Return the instruction template for an insert with mask in mode MODE, with
11120 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11121
11122 const char *
11123 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
11124 {
11125 int nb, ne;
11126
11127 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
11128 gcc_unreachable ();
11129
11130 /* Prefer rldimi because rlwimi is cracked. */
11131 if (TARGET_POWERPC64
11132 && (!dot || mode == DImode)
11133 && GET_CODE (operands[4]) != LSHIFTRT
11134 && ne == INTVAL (operands[2]))
11135 {
11136 operands[3] = GEN_INT (63 - nb);
11137 if (dot)
11138 return "rldimi. %0,%1,%2,%3";
11139 return "rldimi %0,%1,%2,%3";
11140 }
11141
11142 if (nb < 32 && ne < 32)
11143 {
11144 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11145 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
11146 operands[3] = GEN_INT (31 - nb);
11147 operands[4] = GEN_INT (31 - ne);
11148 if (dot)
11149 return "rlwimi. %0,%1,%2,%3,%4";
11150 return "rlwimi %0,%1,%2,%3,%4";
11151 }
11152
11153 gcc_unreachable ();
11154 }
11155
11156 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
11157 using two machine instructions. */
11158
11159 bool
11160 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
11161 {
11162 /* There are two kinds of AND we can handle with two insns:
11163 1) those we can do with two rl* insn;
11164 2) ori[s];xori[s].
11165
11166 We do not handle that last case yet. */
11167
11168 /* If there is just one stretch of ones, we can do it. */
11169 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
11170 return true;
11171
11172 /* Otherwise, fill in the lowest "hole"; if we can do the result with
11173 one insn, we can do the whole thing with two. */
11174 unsigned HOST_WIDE_INT val = INTVAL (c);
11175 unsigned HOST_WIDE_INT bit1 = val & -val;
11176 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
11177 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
11178 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
11179 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
11180 }
11181
11182 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
11183 If EXPAND is true, split rotate-and-mask instructions we generate to
11184 their constituent parts as well (this is used during expand); if DOT
11185 is 1, make the last insn a record-form instruction clobbering the
11186 destination GPR and setting the CC reg (from operands[3]); if 2, set
11187 that GPR as well as the CC reg. */
11188
11189 void
11190 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
11191 {
11192 gcc_assert (!(expand && dot));
11193
11194 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
11195
11196 /* If it is one stretch of ones, it is DImode; shift left, mask, then
11197 shift right. This generates better code than doing the masks without
11198 shifts, or shifting first right and then left. */
11199 int nb, ne;
11200 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
11201 {
11202 gcc_assert (mode == DImode);
11203
11204 int shift = 63 - nb;
11205 if (expand)
11206 {
11207 rtx tmp1 = gen_reg_rtx (DImode);
11208 rtx tmp2 = gen_reg_rtx (DImode);
11209 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
11210 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
11211 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
11212 }
11213 else
11214 {
11215 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
11216 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
11217 emit_move_insn (operands[0], tmp);
11218 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
11219 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11220 }
11221 return;
11222 }
11223
11224 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
11225 that does the rest. */
11226 unsigned HOST_WIDE_INT bit1 = val & -val;
11227 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
11228 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
11229 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
11230
11231 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
11232 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
11233
11234 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
11235
11236 /* Two "no-rotate"-and-mask instructions, for SImode. */
11237 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
11238 {
11239 gcc_assert (mode == SImode);
11240
11241 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
11242 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
11243 emit_move_insn (reg, tmp);
11244 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
11245 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11246 return;
11247 }
11248
11249 gcc_assert (mode == DImode);
11250
11251 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
11252 insns; we have to do the first in SImode, because it wraps. */
11253 if (mask2 <= 0xffffffff
11254 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
11255 {
11256 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
11257 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
11258 GEN_INT (mask1));
11259 rtx reg_low = gen_lowpart (SImode, reg);
11260 emit_move_insn (reg_low, tmp);
11261 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
11262 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11263 return;
11264 }
11265
11266 /* Two rld* insns: rotate, clear the hole in the middle (which now is
11267 at the top end), rotate back and clear the other hole. */
11268 int right = exact_log2 (bit3);
11269 int left = 64 - right;
11270
11271 /* Rotate the mask too. */
11272 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
11273
11274 if (expand)
11275 {
11276 rtx tmp1 = gen_reg_rtx (DImode);
11277 rtx tmp2 = gen_reg_rtx (DImode);
11278 rtx tmp3 = gen_reg_rtx (DImode);
11279 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
11280 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
11281 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
11282 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
11283 }
11284 else
11285 {
11286 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
11287 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
11288 emit_move_insn (operands[0], tmp);
11289 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
11290 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
11291 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11292 }
11293 }
11294 \f
11295 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
11296 for lfq and stfq insns iff the registers are hard registers. */
11297
11298 int
11299 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
11300 {
11301 /* We might have been passed a SUBREG. */
11302 if (!REG_P (reg1) || !REG_P (reg2))
11303 return 0;
11304
11305 /* We might have been passed non floating point registers. */
11306 if (!FP_REGNO_P (REGNO (reg1))
11307 || !FP_REGNO_P (REGNO (reg2)))
11308 return 0;
11309
11310 return (REGNO (reg1) == REGNO (reg2) - 1);
11311 }
11312
11313 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
11314 addr1 and addr2 must be in consecutive memory locations
11315 (addr2 == addr1 + 8). */
11316
11317 int
11318 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
11319 {
11320 rtx addr1, addr2;
11321 unsigned int reg1, reg2;
11322 int offset1, offset2;
11323
11324 /* The mems cannot be volatile. */
11325 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
11326 return 0;
11327
11328 addr1 = XEXP (mem1, 0);
11329 addr2 = XEXP (mem2, 0);
11330
11331 /* Extract an offset (if used) from the first addr. */
11332 if (GET_CODE (addr1) == PLUS)
11333 {
11334 /* If not a REG, return zero. */
11335 if (!REG_P (XEXP (addr1, 0)))
11336 return 0;
11337 else
11338 {
11339 reg1 = REGNO (XEXP (addr1, 0));
11340 /* The offset must be constant! */
11341 if (!CONST_INT_P (XEXP (addr1, 1)))
11342 return 0;
11343 offset1 = INTVAL (XEXP (addr1, 1));
11344 }
11345 }
11346 else if (!REG_P (addr1))
11347 return 0;
11348 else
11349 {
11350 reg1 = REGNO (addr1);
11351 /* This was a simple (mem (reg)) expression. Offset is 0. */
11352 offset1 = 0;
11353 }
11354
11355 /* And now for the second addr. */
11356 if (GET_CODE (addr2) == PLUS)
11357 {
11358 /* If not a REG, return zero. */
11359 if (!REG_P (XEXP (addr2, 0)))
11360 return 0;
11361 else
11362 {
11363 reg2 = REGNO (XEXP (addr2, 0));
11364 /* The offset must be constant. */
11365 if (!CONST_INT_P (XEXP (addr2, 1)))
11366 return 0;
11367 offset2 = INTVAL (XEXP (addr2, 1));
11368 }
11369 }
11370 else if (!REG_P (addr2))
11371 return 0;
11372 else
11373 {
11374 reg2 = REGNO (addr2);
11375 /* This was a simple (mem (reg)) expression. Offset is 0. */
11376 offset2 = 0;
11377 }
11378
11379 /* Both of these must have the same base register. */
11380 if (reg1 != reg2)
11381 return 0;
11382
11383 /* The offset for the second addr must be 8 more than the first addr. */
11384 if (offset2 != offset1 + 8)
11385 return 0;
11386
11387 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
11388 instructions. */
11389 return 1;
11390 }
11391 \f
11392 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
11393 need to use DDmode, in all other cases we can use the same mode. */
11394 static machine_mode
11395 rs6000_secondary_memory_needed_mode (machine_mode mode)
11396 {
11397 if (lra_in_progress && mode == SDmode)
11398 return DDmode;
11399 return mode;
11400 }
11401
11402 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
11403 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
11404 only work on the traditional altivec registers, note if an altivec register
11405 was chosen. */
11406
11407 static enum rs6000_reg_type
11408 register_to_reg_type (rtx reg, bool *is_altivec)
11409 {
11410 HOST_WIDE_INT regno;
11411 enum reg_class rclass;
11412
11413 if (SUBREG_P (reg))
11414 reg = SUBREG_REG (reg);
11415
11416 if (!REG_P (reg))
11417 return NO_REG_TYPE;
11418
11419 regno = REGNO (reg);
11420 if (!HARD_REGISTER_NUM_P (regno))
11421 {
11422 if (!lra_in_progress && !reload_completed)
11423 return PSEUDO_REG_TYPE;
11424
11425 regno = true_regnum (reg);
11426 if (regno < 0 || !HARD_REGISTER_NUM_P (regno))
11427 return PSEUDO_REG_TYPE;
11428 }
11429
11430 gcc_assert (regno >= 0);
11431
11432 if (is_altivec && ALTIVEC_REGNO_P (regno))
11433 *is_altivec = true;
11434
11435 rclass = rs6000_regno_regclass[regno];
11436 return reg_class_to_reg_type[(int)rclass];
11437 }
11438
11439 /* Helper function to return the cost of adding a TOC entry address. */
11440
11441 static inline int
11442 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
11443 {
11444 int ret;
11445
11446 if (TARGET_CMODEL != CMODEL_SMALL)
11447 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
11448
11449 else
11450 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
11451
11452 return ret;
11453 }
11454
11455 /* Helper function for rs6000_secondary_reload to determine whether the memory
11456 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
11457 needs reloading. Return negative if the memory is not handled by the memory
11458 helper functions and to try a different reload method, 0 if no additional
11459 instructions are need, and positive to give the extra cost for the
11460 memory. */
11461
11462 static int
11463 rs6000_secondary_reload_memory (rtx addr,
11464 enum reg_class rclass,
11465 machine_mode mode)
11466 {
11467 int extra_cost = 0;
11468 rtx reg, and_arg, plus_arg0, plus_arg1;
11469 addr_mask_type addr_mask;
11470 const char *type = NULL;
11471 const char *fail_msg = NULL;
11472
11473 if (GPR_REG_CLASS_P (rclass))
11474 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
11475
11476 else if (rclass == FLOAT_REGS)
11477 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
11478
11479 else if (rclass == ALTIVEC_REGS)
11480 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
11481
11482 /* For the combined VSX_REGS, turn off Altivec AND -16. */
11483 else if (rclass == VSX_REGS)
11484 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
11485 & ~RELOAD_REG_AND_M16);
11486
11487 /* If the register allocator hasn't made up its mind yet on the register
11488 class to use, settle on defaults to use. */
11489 else if (rclass == NO_REGS)
11490 {
11491 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
11492 & ~RELOAD_REG_AND_M16);
11493
11494 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
11495 addr_mask &= ~(RELOAD_REG_INDEXED
11496 | RELOAD_REG_PRE_INCDEC
11497 | RELOAD_REG_PRE_MODIFY);
11498 }
11499
11500 else
11501 addr_mask = 0;
11502
11503 /* If the register isn't valid in this register class, just return now. */
11504 if ((addr_mask & RELOAD_REG_VALID) == 0)
11505 {
11506 if (TARGET_DEBUG_ADDR)
11507 {
11508 fprintf (stderr,
11509 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11510 "not valid in class\n",
11511 GET_MODE_NAME (mode), reg_class_names[rclass]);
11512 debug_rtx (addr);
11513 }
11514
11515 return -1;
11516 }
11517
11518 switch (GET_CODE (addr))
11519 {
11520 /* Does the register class supports auto update forms for this mode? We
11521 don't need a scratch register, since the powerpc only supports
11522 PRE_INC, PRE_DEC, and PRE_MODIFY. */
11523 case PRE_INC:
11524 case PRE_DEC:
11525 reg = XEXP (addr, 0);
11526 if (!base_reg_operand (addr, GET_MODE (reg)))
11527 {
11528 fail_msg = "no base register #1";
11529 extra_cost = -1;
11530 }
11531
11532 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
11533 {
11534 extra_cost = 1;
11535 type = "update";
11536 }
11537 break;
11538
11539 case PRE_MODIFY:
11540 reg = XEXP (addr, 0);
11541 plus_arg1 = XEXP (addr, 1);
11542 if (!base_reg_operand (reg, GET_MODE (reg))
11543 || GET_CODE (plus_arg1) != PLUS
11544 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
11545 {
11546 fail_msg = "bad PRE_MODIFY";
11547 extra_cost = -1;
11548 }
11549
11550 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
11551 {
11552 extra_cost = 1;
11553 type = "update";
11554 }
11555 break;
11556
11557 /* Do we need to simulate AND -16 to clear the bottom address bits used
11558 in VMX load/stores? Only allow the AND for vector sizes. */
11559 case AND:
11560 and_arg = XEXP (addr, 0);
11561 if (GET_MODE_SIZE (mode) != 16
11562 || !CONST_INT_P (XEXP (addr, 1))
11563 || INTVAL (XEXP (addr, 1)) != -16)
11564 {
11565 fail_msg = "bad Altivec AND #1";
11566 extra_cost = -1;
11567 }
11568
11569 if (rclass != ALTIVEC_REGS)
11570 {
11571 if (legitimate_indirect_address_p (and_arg, false))
11572 extra_cost = 1;
11573
11574 else if (legitimate_indexed_address_p (and_arg, false))
11575 extra_cost = 2;
11576
11577 else
11578 {
11579 fail_msg = "bad Altivec AND #2";
11580 extra_cost = -1;
11581 }
11582
11583 type = "and";
11584 }
11585 break;
11586
11587 /* If this is an indirect address, make sure it is a base register. */
11588 case REG:
11589 case SUBREG:
11590 if (!legitimate_indirect_address_p (addr, false))
11591 {
11592 extra_cost = 1;
11593 type = "move";
11594 }
11595 break;
11596
11597 /* If this is an indexed address, make sure the register class can handle
11598 indexed addresses for this mode. */
11599 case PLUS:
11600 plus_arg0 = XEXP (addr, 0);
11601 plus_arg1 = XEXP (addr, 1);
11602
11603 /* (plus (plus (reg) (constant)) (constant)) is generated during
11604 push_reload processing, so handle it now. */
11605 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
11606 {
11607 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11608 {
11609 extra_cost = 1;
11610 type = "offset";
11611 }
11612 }
11613
11614 /* (plus (plus (reg) (constant)) (reg)) is also generated during
11615 push_reload processing, so handle it now. */
11616 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
11617 {
11618 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11619 {
11620 extra_cost = 1;
11621 type = "indexed #2";
11622 }
11623 }
11624
11625 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
11626 {
11627 fail_msg = "no base register #2";
11628 extra_cost = -1;
11629 }
11630
11631 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
11632 {
11633 if ((addr_mask & RELOAD_REG_INDEXED) == 0
11634 || !legitimate_indexed_address_p (addr, false))
11635 {
11636 extra_cost = 1;
11637 type = "indexed";
11638 }
11639 }
11640
11641 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
11642 && CONST_INT_P (plus_arg1))
11643 {
11644 if (!quad_address_offset_p (INTVAL (plus_arg1)))
11645 {
11646 extra_cost = 1;
11647 type = "vector d-form offset";
11648 }
11649 }
11650
11651 /* Make sure the register class can handle offset addresses. */
11652 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
11653 {
11654 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11655 {
11656 extra_cost = 1;
11657 type = "offset #2";
11658 }
11659 }
11660
11661 else
11662 {
11663 fail_msg = "bad PLUS";
11664 extra_cost = -1;
11665 }
11666
11667 break;
11668
11669 case LO_SUM:
11670 /* Quad offsets are restricted and can't handle normal addresses. */
11671 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11672 {
11673 extra_cost = -1;
11674 type = "vector d-form lo_sum";
11675 }
11676
11677 else if (!legitimate_lo_sum_address_p (mode, addr, false))
11678 {
11679 fail_msg = "bad LO_SUM";
11680 extra_cost = -1;
11681 }
11682
11683 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11684 {
11685 extra_cost = 1;
11686 type = "lo_sum";
11687 }
11688 break;
11689
11690 /* Static addresses need to create a TOC entry. */
11691 case CONST:
11692 case SYMBOL_REF:
11693 case LABEL_REF:
11694 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11695 {
11696 extra_cost = -1;
11697 type = "vector d-form lo_sum #2";
11698 }
11699
11700 else
11701 {
11702 type = "address";
11703 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
11704 }
11705 break;
11706
11707 /* TOC references look like offsetable memory. */
11708 case UNSPEC:
11709 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
11710 {
11711 fail_msg = "bad UNSPEC";
11712 extra_cost = -1;
11713 }
11714
11715 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11716 {
11717 extra_cost = -1;
11718 type = "vector d-form lo_sum #3";
11719 }
11720
11721 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11722 {
11723 extra_cost = 1;
11724 type = "toc reference";
11725 }
11726 break;
11727
11728 default:
11729 {
11730 fail_msg = "bad address";
11731 extra_cost = -1;
11732 }
11733 }
11734
11735 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
11736 {
11737 if (extra_cost < 0)
11738 fprintf (stderr,
11739 "rs6000_secondary_reload_memory error: mode = %s, "
11740 "class = %s, addr_mask = '%s', %s\n",
11741 GET_MODE_NAME (mode),
11742 reg_class_names[rclass],
11743 rs6000_debug_addr_mask (addr_mask, false),
11744 (fail_msg != NULL) ? fail_msg : "<bad address>");
11745
11746 else
11747 fprintf (stderr,
11748 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11749 "addr_mask = '%s', extra cost = %d, %s\n",
11750 GET_MODE_NAME (mode),
11751 reg_class_names[rclass],
11752 rs6000_debug_addr_mask (addr_mask, false),
11753 extra_cost,
11754 (type) ? type : "<none>");
11755
11756 debug_rtx (addr);
11757 }
11758
11759 return extra_cost;
11760 }
11761
11762 /* Helper function for rs6000_secondary_reload to return true if a move to a
11763 different register classe is really a simple move. */
11764
11765 static bool
11766 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
11767 enum rs6000_reg_type from_type,
11768 machine_mode mode)
11769 {
11770 int size = GET_MODE_SIZE (mode);
11771
11772 /* Add support for various direct moves available. In this function, we only
11773 look at cases where we don't need any extra registers, and one or more
11774 simple move insns are issued. Originally small integers are not allowed
11775 in FPR/VSX registers. Single precision binary floating is not a simple
11776 move because we need to convert to the single precision memory layout.
11777 The 4-byte SDmode can be moved. TDmode values are disallowed since they
11778 need special direct move handling, which we do not support yet. */
11779 if (TARGET_DIRECT_MOVE
11780 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11781 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
11782 {
11783 if (TARGET_POWERPC64)
11784 {
11785 /* ISA 2.07: MTVSRD or MVFVSRD. */
11786 if (size == 8)
11787 return true;
11788
11789 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
11790 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
11791 return true;
11792 }
11793
11794 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11795 if (TARGET_P8_VECTOR)
11796 {
11797 if (mode == SImode)
11798 return true;
11799
11800 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
11801 return true;
11802 }
11803
11804 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11805 if (mode == SDmode)
11806 return true;
11807 }
11808
11809 /* Move to/from SPR. */
11810 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
11811 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
11812 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
11813 return true;
11814
11815 return false;
11816 }
11817
11818 /* Direct move helper function for rs6000_secondary_reload, handle all of the
11819 special direct moves that involve allocating an extra register, return the
11820 insn code of the helper function if there is such a function or
11821 CODE_FOR_nothing if not. */
11822
11823 static bool
11824 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
11825 enum rs6000_reg_type from_type,
11826 machine_mode mode,
11827 secondary_reload_info *sri,
11828 bool altivec_p)
11829 {
11830 bool ret = false;
11831 enum insn_code icode = CODE_FOR_nothing;
11832 int cost = 0;
11833 int size = GET_MODE_SIZE (mode);
11834
11835 if (TARGET_POWERPC64 && size == 16)
11836 {
11837 /* Handle moving 128-bit values from GPRs to VSX point registers on
11838 ISA 2.07 (power8, power9) when running in 64-bit mode using
11839 XXPERMDI to glue the two 64-bit values back together. */
11840 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
11841 {
11842 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
11843 icode = reg_addr[mode].reload_vsx_gpr;
11844 }
11845
11846 /* Handle moving 128-bit values from VSX point registers to GPRs on
11847 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
11848 bottom 64-bit value. */
11849 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11850 {
11851 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
11852 icode = reg_addr[mode].reload_gpr_vsx;
11853 }
11854 }
11855
11856 else if (TARGET_POWERPC64 && mode == SFmode)
11857 {
11858 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11859 {
11860 cost = 3; /* xscvdpspn, mfvsrd, and. */
11861 icode = reg_addr[mode].reload_gpr_vsx;
11862 }
11863
11864 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
11865 {
11866 cost = 2; /* mtvsrz, xscvspdpn. */
11867 icode = reg_addr[mode].reload_vsx_gpr;
11868 }
11869 }
11870
11871 else if (!TARGET_POWERPC64 && size == 8)
11872 {
11873 /* Handle moving 64-bit values from GPRs to floating point registers on
11874 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
11875 32-bit values back together. Altivec register classes must be handled
11876 specially since a different instruction is used, and the secondary
11877 reload support requires a single instruction class in the scratch
11878 register constraint. However, right now TFmode is not allowed in
11879 Altivec registers, so the pattern will never match. */
11880 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
11881 {
11882 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
11883 icode = reg_addr[mode].reload_fpr_gpr;
11884 }
11885 }
11886
11887 if (icode != CODE_FOR_nothing)
11888 {
11889 ret = true;
11890 if (sri)
11891 {
11892 sri->icode = icode;
11893 sri->extra_cost = cost;
11894 }
11895 }
11896
11897 return ret;
11898 }
11899
11900 /* Return whether a move between two register classes can be done either
11901 directly (simple move) or via a pattern that uses a single extra temporary
11902 (using ISA 2.07's direct move in this case. */
11903
11904 static bool
11905 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
11906 enum rs6000_reg_type from_type,
11907 machine_mode mode,
11908 secondary_reload_info *sri,
11909 bool altivec_p)
11910 {
11911 /* Fall back to load/store reloads if either type is not a register. */
11912 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
11913 return false;
11914
11915 /* If we haven't allocated registers yet, assume the move can be done for the
11916 standard register types. */
11917 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
11918 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
11919 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
11920 return true;
11921
11922 /* Moves to the same set of registers is a simple move for non-specialized
11923 registers. */
11924 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
11925 return true;
11926
11927 /* Check whether a simple move can be done directly. */
11928 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
11929 {
11930 if (sri)
11931 {
11932 sri->icode = CODE_FOR_nothing;
11933 sri->extra_cost = 0;
11934 }
11935 return true;
11936 }
11937
11938 /* Now check if we can do it in a few steps. */
11939 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
11940 altivec_p);
11941 }
11942
11943 /* Inform reload about cases where moving X with a mode MODE to a register in
11944 RCLASS requires an extra scratch or immediate register. Return the class
11945 needed for the immediate register.
11946
11947 For VSX and Altivec, we may need a register to convert sp+offset into
11948 reg+sp.
11949
11950 For misaligned 64-bit gpr loads and stores we need a register to
11951 convert an offset address to indirect. */
11952
11953 static reg_class_t
11954 rs6000_secondary_reload (bool in_p,
11955 rtx x,
11956 reg_class_t rclass_i,
11957 machine_mode mode,
11958 secondary_reload_info *sri)
11959 {
11960 enum reg_class rclass = (enum reg_class) rclass_i;
11961 reg_class_t ret = ALL_REGS;
11962 enum insn_code icode;
11963 bool default_p = false;
11964 bool done_p = false;
11965
11966 /* Allow subreg of memory before/during reload. */
11967 bool memory_p = (MEM_P (x)
11968 || (!reload_completed && SUBREG_P (x)
11969 && MEM_P (SUBREG_REG (x))));
11970
11971 sri->icode = CODE_FOR_nothing;
11972 sri->t_icode = CODE_FOR_nothing;
11973 sri->extra_cost = 0;
11974 icode = ((in_p)
11975 ? reg_addr[mode].reload_load
11976 : reg_addr[mode].reload_store);
11977
11978 if (REG_P (x) || register_operand (x, mode))
11979 {
11980 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
11981 bool altivec_p = (rclass == ALTIVEC_REGS);
11982 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
11983
11984 if (!in_p)
11985 std::swap (to_type, from_type);
11986
11987 /* Can we do a direct move of some sort? */
11988 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
11989 altivec_p))
11990 {
11991 icode = (enum insn_code)sri->icode;
11992 default_p = false;
11993 done_p = true;
11994 ret = NO_REGS;
11995 }
11996 }
11997
11998 /* Make sure 0.0 is not reloaded or forced into memory. */
11999 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
12000 {
12001 ret = NO_REGS;
12002 default_p = false;
12003 done_p = true;
12004 }
12005
12006 /* If this is a scalar floating point value and we want to load it into the
12007 traditional Altivec registers, do it via a move via a traditional floating
12008 point register, unless we have D-form addressing. Also make sure that
12009 non-zero constants use a FPR. */
12010 if (!done_p && reg_addr[mode].scalar_in_vmx_p
12011 && !mode_supports_vmx_dform (mode)
12012 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
12013 && (memory_p || CONST_DOUBLE_P (x)))
12014 {
12015 ret = FLOAT_REGS;
12016 default_p = false;
12017 done_p = true;
12018 }
12019
12020 /* Handle reload of load/stores if we have reload helper functions. */
12021 if (!done_p && icode != CODE_FOR_nothing && memory_p)
12022 {
12023 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
12024 mode);
12025
12026 if (extra_cost >= 0)
12027 {
12028 done_p = true;
12029 ret = NO_REGS;
12030 if (extra_cost > 0)
12031 {
12032 sri->extra_cost = extra_cost;
12033 sri->icode = icode;
12034 }
12035 }
12036 }
12037
12038 /* Handle unaligned loads and stores of integer registers. */
12039 if (!done_p && TARGET_POWERPC64
12040 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
12041 && memory_p
12042 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
12043 {
12044 rtx addr = XEXP (x, 0);
12045 rtx off = address_offset (addr);
12046
12047 if (off != NULL_RTX)
12048 {
12049 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
12050 unsigned HOST_WIDE_INT offset = INTVAL (off);
12051
12052 /* We need a secondary reload when our legitimate_address_p
12053 says the address is good (as otherwise the entire address
12054 will be reloaded), and the offset is not a multiple of
12055 four or we have an address wrap. Address wrap will only
12056 occur for LO_SUMs since legitimate_offset_address_p
12057 rejects addresses for 16-byte mems that will wrap. */
12058 if (GET_CODE (addr) == LO_SUM
12059 ? (1 /* legitimate_address_p allows any offset for lo_sum */
12060 && ((offset & 3) != 0
12061 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
12062 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
12063 && (offset & 3) != 0))
12064 {
12065 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
12066 if (in_p)
12067 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
12068 : CODE_FOR_reload_di_load);
12069 else
12070 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
12071 : CODE_FOR_reload_di_store);
12072 sri->extra_cost = 2;
12073 ret = NO_REGS;
12074 done_p = true;
12075 }
12076 else
12077 default_p = true;
12078 }
12079 else
12080 default_p = true;
12081 }
12082
12083 if (!done_p && !TARGET_POWERPC64
12084 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
12085 && memory_p
12086 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
12087 {
12088 rtx addr = XEXP (x, 0);
12089 rtx off = address_offset (addr);
12090
12091 if (off != NULL_RTX)
12092 {
12093 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
12094 unsigned HOST_WIDE_INT offset = INTVAL (off);
12095
12096 /* We need a secondary reload when our legitimate_address_p
12097 says the address is good (as otherwise the entire address
12098 will be reloaded), and we have a wrap.
12099
12100 legitimate_lo_sum_address_p allows LO_SUM addresses to
12101 have any offset so test for wrap in the low 16 bits.
12102
12103 legitimate_offset_address_p checks for the range
12104 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
12105 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
12106 [0x7ff4,0x7fff] respectively, so test for the
12107 intersection of these ranges, [0x7ffc,0x7fff] and
12108 [0x7ff4,0x7ff7] respectively.
12109
12110 Note that the address we see here may have been
12111 manipulated by legitimize_reload_address. */
12112 if (GET_CODE (addr) == LO_SUM
12113 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
12114 : offset - (0x8000 - extra) < UNITS_PER_WORD)
12115 {
12116 if (in_p)
12117 sri->icode = CODE_FOR_reload_si_load;
12118 else
12119 sri->icode = CODE_FOR_reload_si_store;
12120 sri->extra_cost = 2;
12121 ret = NO_REGS;
12122 done_p = true;
12123 }
12124 else
12125 default_p = true;
12126 }
12127 else
12128 default_p = true;
12129 }
12130
12131 if (!done_p)
12132 default_p = true;
12133
12134 if (default_p)
12135 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
12136
12137 gcc_assert (ret != ALL_REGS);
12138
12139 if (TARGET_DEBUG_ADDR)
12140 {
12141 fprintf (stderr,
12142 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
12143 "mode = %s",
12144 reg_class_names[ret],
12145 in_p ? "true" : "false",
12146 reg_class_names[rclass],
12147 GET_MODE_NAME (mode));
12148
12149 if (reload_completed)
12150 fputs (", after reload", stderr);
12151
12152 if (!done_p)
12153 fputs (", done_p not set", stderr);
12154
12155 if (default_p)
12156 fputs (", default secondary reload", stderr);
12157
12158 if (sri->icode != CODE_FOR_nothing)
12159 fprintf (stderr, ", reload func = %s, extra cost = %d",
12160 insn_data[sri->icode].name, sri->extra_cost);
12161
12162 else if (sri->extra_cost > 0)
12163 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
12164
12165 fputs ("\n", stderr);
12166 debug_rtx (x);
12167 }
12168
12169 return ret;
12170 }
12171
12172 /* Better tracing for rs6000_secondary_reload_inner. */
12173
12174 static void
12175 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
12176 bool store_p)
12177 {
12178 rtx set, clobber;
12179
12180 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
12181
12182 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
12183 store_p ? "store" : "load");
12184
12185 if (store_p)
12186 set = gen_rtx_SET (mem, reg);
12187 else
12188 set = gen_rtx_SET (reg, mem);
12189
12190 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
12191 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
12192 }
12193
12194 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
12195 ATTRIBUTE_NORETURN;
12196
12197 static void
12198 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
12199 bool store_p)
12200 {
12201 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
12202 gcc_unreachable ();
12203 }
12204
12205 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
12206 reload helper functions. These were identified in
12207 rs6000_secondary_reload_memory, and if reload decided to use the secondary
12208 reload, it calls the insns:
12209 reload_<RELOAD:mode>_<P:mptrsize>_store
12210 reload_<RELOAD:mode>_<P:mptrsize>_load
12211
12212 which in turn calls this function, to do whatever is necessary to create
12213 valid addresses. */
12214
12215 void
12216 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
12217 {
12218 int regno = true_regnum (reg);
12219 machine_mode mode = GET_MODE (reg);
12220 addr_mask_type addr_mask;
12221 rtx addr;
12222 rtx new_addr;
12223 rtx op_reg, op0, op1;
12224 rtx and_op;
12225 rtx cc_clobber;
12226 rtvec rv;
12227
12228 if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem)
12229 || !base_reg_operand (scratch, GET_MODE (scratch)))
12230 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12231
12232 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
12233 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
12234
12235 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
12236 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
12237
12238 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
12239 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
12240
12241 else
12242 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12243
12244 /* Make sure the mode is valid in this register class. */
12245 if ((addr_mask & RELOAD_REG_VALID) == 0)
12246 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12247
12248 if (TARGET_DEBUG_ADDR)
12249 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
12250
12251 new_addr = addr = XEXP (mem, 0);
12252 switch (GET_CODE (addr))
12253 {
12254 /* Does the register class support auto update forms for this mode? If
12255 not, do the update now. We don't need a scratch register, since the
12256 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
12257 case PRE_INC:
12258 case PRE_DEC:
12259 op_reg = XEXP (addr, 0);
12260 if (!base_reg_operand (op_reg, Pmode))
12261 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12262
12263 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
12264 {
12265 int delta = GET_MODE_SIZE (mode);
12266 if (GET_CODE (addr) == PRE_DEC)
12267 delta = -delta;
12268 emit_insn (gen_add2_insn (op_reg, GEN_INT (delta)));
12269 new_addr = op_reg;
12270 }
12271 break;
12272
12273 case PRE_MODIFY:
12274 op0 = XEXP (addr, 0);
12275 op1 = XEXP (addr, 1);
12276 if (!base_reg_operand (op0, Pmode)
12277 || GET_CODE (op1) != PLUS
12278 || !rtx_equal_p (op0, XEXP (op1, 0)))
12279 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12280
12281 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
12282 {
12283 emit_insn (gen_rtx_SET (op0, op1));
12284 new_addr = reg;
12285 }
12286 break;
12287
12288 /* Do we need to simulate AND -16 to clear the bottom address bits used
12289 in VMX load/stores? */
12290 case AND:
12291 op0 = XEXP (addr, 0);
12292 op1 = XEXP (addr, 1);
12293 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
12294 {
12295 if (REG_P (op0) || SUBREG_P (op0))
12296 op_reg = op0;
12297
12298 else if (GET_CODE (op1) == PLUS)
12299 {
12300 emit_insn (gen_rtx_SET (scratch, op1));
12301 op_reg = scratch;
12302 }
12303
12304 else
12305 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12306
12307 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
12308 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
12309 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
12310 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
12311 new_addr = scratch;
12312 }
12313 break;
12314
12315 /* If this is an indirect address, make sure it is a base register. */
12316 case REG:
12317 case SUBREG:
12318 if (!base_reg_operand (addr, GET_MODE (addr)))
12319 {
12320 emit_insn (gen_rtx_SET (scratch, addr));
12321 new_addr = scratch;
12322 }
12323 break;
12324
12325 /* If this is an indexed address, make sure the register class can handle
12326 indexed addresses for this mode. */
12327 case PLUS:
12328 op0 = XEXP (addr, 0);
12329 op1 = XEXP (addr, 1);
12330 if (!base_reg_operand (op0, Pmode))
12331 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12332
12333 else if (int_reg_operand (op1, Pmode))
12334 {
12335 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12336 {
12337 emit_insn (gen_rtx_SET (scratch, addr));
12338 new_addr = scratch;
12339 }
12340 }
12341
12342 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
12343 {
12344 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
12345 || !quad_address_p (addr, mode, false))
12346 {
12347 emit_insn (gen_rtx_SET (scratch, addr));
12348 new_addr = scratch;
12349 }
12350 }
12351
12352 /* Make sure the register class can handle offset addresses. */
12353 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
12354 {
12355 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12356 {
12357 emit_insn (gen_rtx_SET (scratch, addr));
12358 new_addr = scratch;
12359 }
12360 }
12361
12362 else
12363 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12364
12365 break;
12366
12367 case LO_SUM:
12368 op0 = XEXP (addr, 0);
12369 op1 = XEXP (addr, 1);
12370 if (!base_reg_operand (op0, Pmode))
12371 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12372
12373 else if (int_reg_operand (op1, Pmode))
12374 {
12375 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12376 {
12377 emit_insn (gen_rtx_SET (scratch, addr));
12378 new_addr = scratch;
12379 }
12380 }
12381
12382 /* Quad offsets are restricted and can't handle normal addresses. */
12383 else if (mode_supports_dq_form (mode))
12384 {
12385 emit_insn (gen_rtx_SET (scratch, addr));
12386 new_addr = scratch;
12387 }
12388
12389 /* Make sure the register class can handle offset addresses. */
12390 else if (legitimate_lo_sum_address_p (mode, addr, false))
12391 {
12392 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12393 {
12394 emit_insn (gen_rtx_SET (scratch, addr));
12395 new_addr = scratch;
12396 }
12397 }
12398
12399 else
12400 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12401
12402 break;
12403
12404 case SYMBOL_REF:
12405 case CONST:
12406 case LABEL_REF:
12407 rs6000_emit_move (scratch, addr, Pmode);
12408 new_addr = scratch;
12409 break;
12410
12411 default:
12412 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12413 }
12414
12415 /* Adjust the address if it changed. */
12416 if (addr != new_addr)
12417 {
12418 mem = replace_equiv_address_nv (mem, new_addr);
12419 if (TARGET_DEBUG_ADDR)
12420 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
12421 }
12422
12423 /* Now create the move. */
12424 if (store_p)
12425 emit_insn (gen_rtx_SET (mem, reg));
12426 else
12427 emit_insn (gen_rtx_SET (reg, mem));
12428
12429 return;
12430 }
12431
12432 /* Convert reloads involving 64-bit gprs and misaligned offset
12433 addressing, or multiple 32-bit gprs and offsets that are too large,
12434 to use indirect addressing. */
12435
12436 void
12437 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
12438 {
12439 int regno = true_regnum (reg);
12440 enum reg_class rclass;
12441 rtx addr;
12442 rtx scratch_or_premodify = scratch;
12443
12444 if (TARGET_DEBUG_ADDR)
12445 {
12446 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
12447 store_p ? "store" : "load");
12448 fprintf (stderr, "reg:\n");
12449 debug_rtx (reg);
12450 fprintf (stderr, "mem:\n");
12451 debug_rtx (mem);
12452 fprintf (stderr, "scratch:\n");
12453 debug_rtx (scratch);
12454 }
12455
12456 gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno));
12457 gcc_assert (MEM_P (mem));
12458 rclass = REGNO_REG_CLASS (regno);
12459 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
12460 addr = XEXP (mem, 0);
12461
12462 if (GET_CODE (addr) == PRE_MODIFY)
12463 {
12464 gcc_assert (REG_P (XEXP (addr, 0))
12465 && GET_CODE (XEXP (addr, 1)) == PLUS
12466 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
12467 scratch_or_premodify = XEXP (addr, 0);
12468 addr = XEXP (addr, 1);
12469 }
12470 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
12471
12472 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
12473
12474 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
12475
12476 /* Now create the move. */
12477 if (store_p)
12478 emit_insn (gen_rtx_SET (mem, reg));
12479 else
12480 emit_insn (gen_rtx_SET (reg, mem));
12481
12482 return;
12483 }
12484
12485 /* Given an rtx X being reloaded into a reg required to be
12486 in class CLASS, return the class of reg to actually use.
12487 In general this is just CLASS; but on some machines
12488 in some cases it is preferable to use a more restrictive class.
12489
12490 On the RS/6000, we have to return NO_REGS when we want to reload a
12491 floating-point CONST_DOUBLE to force it to be copied to memory.
12492
12493 We also don't want to reload integer values into floating-point
12494 registers if we can at all help it. In fact, this can
12495 cause reload to die, if it tries to generate a reload of CTR
12496 into a FP register and discovers it doesn't have the memory location
12497 required.
12498
12499 ??? Would it be a good idea to have reload do the converse, that is
12500 try to reload floating modes into FP registers if possible?
12501 */
12502
12503 static enum reg_class
12504 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
12505 {
12506 machine_mode mode = GET_MODE (x);
12507 bool is_constant = CONSTANT_P (x);
12508
12509 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
12510 reload class for it. */
12511 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
12512 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
12513 return NO_REGS;
12514
12515 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
12516 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
12517 return NO_REGS;
12518
12519 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
12520 the reloading of address expressions using PLUS into floating point
12521 registers. */
12522 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
12523 {
12524 if (is_constant)
12525 {
12526 /* Zero is always allowed in all VSX registers. */
12527 if (x == CONST0_RTX (mode))
12528 return rclass;
12529
12530 /* If this is a vector constant that can be formed with a few Altivec
12531 instructions, we want altivec registers. */
12532 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
12533 return ALTIVEC_REGS;
12534
12535 /* If this is an integer constant that can easily be loaded into
12536 vector registers, allow it. */
12537 if (CONST_INT_P (x))
12538 {
12539 HOST_WIDE_INT value = INTVAL (x);
12540
12541 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
12542 2.06 can generate it in the Altivec registers with
12543 VSPLTI<x>. */
12544 if (value == -1)
12545 {
12546 if (TARGET_P8_VECTOR)
12547 return rclass;
12548 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
12549 return ALTIVEC_REGS;
12550 else
12551 return NO_REGS;
12552 }
12553
12554 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
12555 a sign extend in the Altivec registers. */
12556 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
12557 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
12558 return ALTIVEC_REGS;
12559 }
12560
12561 /* Force constant to memory. */
12562 return NO_REGS;
12563 }
12564
12565 /* D-form addressing can easily reload the value. */
12566 if (mode_supports_vmx_dform (mode)
12567 || mode_supports_dq_form (mode))
12568 return rclass;
12569
12570 /* If this is a scalar floating point value and we don't have D-form
12571 addressing, prefer the traditional floating point registers so that we
12572 can use D-form (register+offset) addressing. */
12573 if (rclass == VSX_REGS
12574 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
12575 return FLOAT_REGS;
12576
12577 /* Prefer the Altivec registers if Altivec is handling the vector
12578 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
12579 loads. */
12580 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
12581 || mode == V1TImode)
12582 return ALTIVEC_REGS;
12583
12584 return rclass;
12585 }
12586
12587 if (is_constant || GET_CODE (x) == PLUS)
12588 {
12589 if (reg_class_subset_p (GENERAL_REGS, rclass))
12590 return GENERAL_REGS;
12591 if (reg_class_subset_p (BASE_REGS, rclass))
12592 return BASE_REGS;
12593 return NO_REGS;
12594 }
12595
12596 /* For the vector pair and vector quad modes, prefer their natural register
12597 (VSX or FPR) rather than GPR registers. For other integer types, prefer
12598 the GPR registers. */
12599 if (rclass == GEN_OR_FLOAT_REGS)
12600 {
12601 if (mode == OOmode)
12602 return VSX_REGS;
12603
12604 if (mode == XOmode)
12605 return FLOAT_REGS;
12606
12607 if (GET_MODE_CLASS (mode) == MODE_INT)
12608 return GENERAL_REGS;
12609 }
12610
12611 return rclass;
12612 }
12613
12614 /* Debug version of rs6000_preferred_reload_class. */
12615 static enum reg_class
12616 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
12617 {
12618 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
12619
12620 fprintf (stderr,
12621 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
12622 "mode = %s, x:\n",
12623 reg_class_names[ret], reg_class_names[rclass],
12624 GET_MODE_NAME (GET_MODE (x)));
12625 debug_rtx (x);
12626
12627 return ret;
12628 }
12629
12630 /* If we are copying between FP or AltiVec registers and anything else, we need
12631 a memory location. The exception is when we are targeting ppc64 and the
12632 move to/from fpr to gpr instructions are available. Also, under VSX, you
12633 can copy vector registers from the FP register set to the Altivec register
12634 set and vice versa. */
12635
12636 static bool
12637 rs6000_secondary_memory_needed (machine_mode mode,
12638 reg_class_t from_class,
12639 reg_class_t to_class)
12640 {
12641 enum rs6000_reg_type from_type, to_type;
12642 bool altivec_p = ((from_class == ALTIVEC_REGS)
12643 || (to_class == ALTIVEC_REGS));
12644
12645 /* If a simple/direct move is available, we don't need secondary memory */
12646 from_type = reg_class_to_reg_type[(int)from_class];
12647 to_type = reg_class_to_reg_type[(int)to_class];
12648
12649 if (rs6000_secondary_reload_move (to_type, from_type, mode,
12650 (secondary_reload_info *)0, altivec_p))
12651 return false;
12652
12653 /* If we have a floating point or vector register class, we need to use
12654 memory to transfer the data. */
12655 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
12656 return true;
12657
12658 return false;
12659 }
12660
12661 /* Debug version of rs6000_secondary_memory_needed. */
12662 static bool
12663 rs6000_debug_secondary_memory_needed (machine_mode mode,
12664 reg_class_t from_class,
12665 reg_class_t to_class)
12666 {
12667 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
12668
12669 fprintf (stderr,
12670 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
12671 "to_class = %s, mode = %s\n",
12672 ret ? "true" : "false",
12673 reg_class_names[from_class],
12674 reg_class_names[to_class],
12675 GET_MODE_NAME (mode));
12676
12677 return ret;
12678 }
12679
12680 /* Return the register class of a scratch register needed to copy IN into
12681 or out of a register in RCLASS in MODE. If it can be done directly,
12682 NO_REGS is returned. */
12683
12684 static enum reg_class
12685 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
12686 rtx in)
12687 {
12688 int regno;
12689
12690 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
12691 #if TARGET_MACHO
12692 && MACHOPIC_INDIRECT
12693 #endif
12694 ))
12695 {
12696 /* We cannot copy a symbolic operand directly into anything
12697 other than BASE_REGS for TARGET_ELF. So indicate that a
12698 register from BASE_REGS is needed as an intermediate
12699 register.
12700
12701 On Darwin, pic addresses require a load from memory, which
12702 needs a base register. */
12703 if (rclass != BASE_REGS
12704 && (SYMBOL_REF_P (in)
12705 || GET_CODE (in) == HIGH
12706 || GET_CODE (in) == LABEL_REF
12707 || GET_CODE (in) == CONST))
12708 return BASE_REGS;
12709 }
12710
12711 if (REG_P (in))
12712 {
12713 regno = REGNO (in);
12714 if (!HARD_REGISTER_NUM_P (regno))
12715 {
12716 regno = true_regnum (in);
12717 if (!HARD_REGISTER_NUM_P (regno))
12718 regno = -1;
12719 }
12720 }
12721 else if (SUBREG_P (in))
12722 {
12723 regno = true_regnum (in);
12724 if (!HARD_REGISTER_NUM_P (regno))
12725 regno = -1;
12726 }
12727 else
12728 regno = -1;
12729
12730 /* If we have VSX register moves, prefer moving scalar values between
12731 Altivec registers and GPR by going via an FPR (and then via memory)
12732 instead of reloading the secondary memory address for Altivec moves. */
12733 if (TARGET_VSX
12734 && GET_MODE_SIZE (mode) < 16
12735 && !mode_supports_vmx_dform (mode)
12736 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
12737 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
12738 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
12739 && (regno >= 0 && INT_REGNO_P (regno)))))
12740 return FLOAT_REGS;
12741
12742 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
12743 into anything. */
12744 if (rclass == GENERAL_REGS || rclass == BASE_REGS
12745 || (regno >= 0 && INT_REGNO_P (regno)))
12746 return NO_REGS;
12747
12748 /* Constants, memory, and VSX registers can go into VSX registers (both the
12749 traditional floating point and the altivec registers). */
12750 if (rclass == VSX_REGS
12751 && (regno == -1 || VSX_REGNO_P (regno)))
12752 return NO_REGS;
12753
12754 /* Constants, memory, and FP registers can go into FP registers. */
12755 if ((regno == -1 || FP_REGNO_P (regno))
12756 && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS))
12757 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
12758
12759 /* Memory, and AltiVec registers can go into AltiVec registers. */
12760 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
12761 && rclass == ALTIVEC_REGS)
12762 return NO_REGS;
12763
12764 /* We can copy among the CR registers. */
12765 if ((rclass == CR_REGS || rclass == CR0_REGS)
12766 && regno >= 0 && CR_REGNO_P (regno))
12767 return NO_REGS;
12768
12769 /* Otherwise, we need GENERAL_REGS. */
12770 return GENERAL_REGS;
12771 }
12772
12773 /* Debug version of rs6000_secondary_reload_class. */
12774 static enum reg_class
12775 rs6000_debug_secondary_reload_class (enum reg_class rclass,
12776 machine_mode mode, rtx in)
12777 {
12778 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
12779 fprintf (stderr,
12780 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
12781 "mode = %s, input rtx:\n",
12782 reg_class_names[ret], reg_class_names[rclass],
12783 GET_MODE_NAME (mode));
12784 debug_rtx (in);
12785
12786 return ret;
12787 }
12788
12789 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
12790
12791 static bool
12792 rs6000_can_change_mode_class (machine_mode from,
12793 machine_mode to,
12794 reg_class_t rclass)
12795 {
12796 unsigned from_size = GET_MODE_SIZE (from);
12797 unsigned to_size = GET_MODE_SIZE (to);
12798
12799 if (from_size != to_size)
12800 {
12801 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
12802
12803 if (reg_classes_intersect_p (xclass, rclass))
12804 {
12805 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
12806 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
12807 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
12808 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
12809
12810 /* Don't allow 64-bit types to overlap with 128-bit types that take a
12811 single register under VSX because the scalar part of the register
12812 is in the upper 64-bits, and not the lower 64-bits. Types like
12813 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
12814 IEEE floating point can't overlap, and neither can small
12815 values. */
12816
12817 if (to_float128_vector_p && from_float128_vector_p)
12818 return true;
12819
12820 else if (to_float128_vector_p || from_float128_vector_p)
12821 return false;
12822
12823 /* TDmode in floating-mode registers must always go into a register
12824 pair with the most significant word in the even-numbered register
12825 to match ISA requirements. In little-endian mode, this does not
12826 match subreg numbering, so we cannot allow subregs. */
12827 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
12828 return false;
12829
12830 /* Allow SD<->DD changes, since SDmode values are stored in
12831 the low half of the DDmode, just like target-independent
12832 code expects. We need to allow at least SD->DD since
12833 rs6000_secondary_memory_needed_mode asks for that change
12834 to be made for SD reloads. */
12835 if ((to == DDmode && from == SDmode)
12836 || (to == SDmode && from == DDmode))
12837 return true;
12838
12839 if (from_size < 8 || to_size < 8)
12840 return false;
12841
12842 if (from_size == 8 && (8 * to_nregs) != to_size)
12843 return false;
12844
12845 if (to_size == 8 && (8 * from_nregs) != from_size)
12846 return false;
12847
12848 return true;
12849 }
12850 else
12851 return true;
12852 }
12853
12854 /* Since the VSX register set includes traditional floating point registers
12855 and altivec registers, just check for the size being different instead of
12856 trying to check whether the modes are vector modes. Otherwise it won't
12857 allow say DF and DI to change classes. For types like TFmode and TDmode
12858 that take 2 64-bit registers, rather than a single 128-bit register, don't
12859 allow subregs of those types to other 128 bit types. */
12860 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
12861 {
12862 unsigned num_regs = (from_size + 15) / 16;
12863 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
12864 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
12865 return false;
12866
12867 return (from_size == 8 || from_size == 16);
12868 }
12869
12870 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
12871 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
12872 return false;
12873
12874 return true;
12875 }
12876
12877 /* Debug version of rs6000_can_change_mode_class. */
12878 static bool
12879 rs6000_debug_can_change_mode_class (machine_mode from,
12880 machine_mode to,
12881 reg_class_t rclass)
12882 {
12883 bool ret = rs6000_can_change_mode_class (from, to, rclass);
12884
12885 fprintf (stderr,
12886 "rs6000_can_change_mode_class, return %s, from = %s, "
12887 "to = %s, rclass = %s\n",
12888 ret ? "true" : "false",
12889 GET_MODE_NAME (from), GET_MODE_NAME (to),
12890 reg_class_names[rclass]);
12891
12892 return ret;
12893 }
12894 \f
12895 /* Return a string to do a move operation of 128 bits of data. */
12896
12897 const char *
12898 rs6000_output_move_128bit (rtx operands[])
12899 {
12900 rtx dest = operands[0];
12901 rtx src = operands[1];
12902 machine_mode mode = GET_MODE (dest);
12903 int dest_regno;
12904 int src_regno;
12905 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
12906 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
12907
12908 if (REG_P (dest))
12909 {
12910 dest_regno = REGNO (dest);
12911 dest_gpr_p = INT_REGNO_P (dest_regno);
12912 dest_fp_p = FP_REGNO_P (dest_regno);
12913 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
12914 dest_vsx_p = dest_fp_p | dest_vmx_p;
12915 }
12916 else
12917 {
12918 dest_regno = -1;
12919 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
12920 }
12921
12922 if (REG_P (src))
12923 {
12924 src_regno = REGNO (src);
12925 src_gpr_p = INT_REGNO_P (src_regno);
12926 src_fp_p = FP_REGNO_P (src_regno);
12927 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
12928 src_vsx_p = src_fp_p | src_vmx_p;
12929 }
12930 else
12931 {
12932 src_regno = -1;
12933 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
12934 }
12935
12936 /* Register moves. */
12937 if (dest_regno >= 0 && src_regno >= 0)
12938 {
12939 if (dest_gpr_p)
12940 {
12941 if (src_gpr_p)
12942 return "#";
12943
12944 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
12945 return (WORDS_BIG_ENDIAN
12946 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
12947 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
12948
12949 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
12950 return "#";
12951 }
12952
12953 else if (TARGET_VSX && dest_vsx_p)
12954 {
12955 if (src_vsx_p)
12956 return "xxlor %x0,%x1,%x1";
12957
12958 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
12959 return (WORDS_BIG_ENDIAN
12960 ? "mtvsrdd %x0,%1,%L1"
12961 : "mtvsrdd %x0,%L1,%1");
12962
12963 else if (TARGET_DIRECT_MOVE && src_gpr_p)
12964 return "#";
12965 }
12966
12967 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
12968 return "vor %0,%1,%1";
12969
12970 else if (dest_fp_p && src_fp_p)
12971 return "#";
12972 }
12973
12974 /* Loads. */
12975 else if (dest_regno >= 0 && MEM_P (src))
12976 {
12977 if (dest_gpr_p)
12978 {
12979 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
12980 return "lq %0,%1";
12981 else
12982 return "#";
12983 }
12984
12985 else if (TARGET_ALTIVEC && dest_vmx_p
12986 && altivec_indexed_or_indirect_operand (src, mode))
12987 return "lvx %0,%y1";
12988
12989 else if (TARGET_VSX && dest_vsx_p)
12990 {
12991 if (mode_supports_dq_form (mode)
12992 && quad_address_p (XEXP (src, 0), mode, true))
12993 return "lxv %x0,%1";
12994
12995 else if (TARGET_P9_VECTOR)
12996 return "lxvx %x0,%y1";
12997
12998 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
12999 return "lxvw4x %x0,%y1";
13000
13001 else
13002 return "lxvd2x %x0,%y1";
13003 }
13004
13005 else if (TARGET_ALTIVEC && dest_vmx_p)
13006 return "lvx %0,%y1";
13007
13008 else if (dest_fp_p)
13009 return "#";
13010 }
13011
13012 /* Stores. */
13013 else if (src_regno >= 0 && MEM_P (dest))
13014 {
13015 if (src_gpr_p)
13016 {
13017 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
13018 return "stq %1,%0";
13019 else
13020 return "#";
13021 }
13022
13023 else if (TARGET_ALTIVEC && src_vmx_p
13024 && altivec_indexed_or_indirect_operand (dest, mode))
13025 return "stvx %1,%y0";
13026
13027 else if (TARGET_VSX && src_vsx_p)
13028 {
13029 if (mode_supports_dq_form (mode)
13030 && quad_address_p (XEXP (dest, 0), mode, true))
13031 return "stxv %x1,%0";
13032
13033 else if (TARGET_P9_VECTOR)
13034 return "stxvx %x1,%y0";
13035
13036 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
13037 return "stxvw4x %x1,%y0";
13038
13039 else
13040 return "stxvd2x %x1,%y0";
13041 }
13042
13043 else if (TARGET_ALTIVEC && src_vmx_p)
13044 return "stvx %1,%y0";
13045
13046 else if (src_fp_p)
13047 return "#";
13048 }
13049
13050 /* Constants. */
13051 else if (dest_regno >= 0
13052 && (CONST_INT_P (src)
13053 || CONST_WIDE_INT_P (src)
13054 || CONST_DOUBLE_P (src)
13055 || GET_CODE (src) == CONST_VECTOR))
13056 {
13057 if (dest_gpr_p)
13058 return "#";
13059
13060 else if ((dest_vmx_p && TARGET_ALTIVEC)
13061 || (dest_vsx_p && TARGET_VSX))
13062 return output_vec_const_move (operands);
13063 }
13064
13065 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
13066 }
13067
13068 /* Validate a 128-bit move. */
13069 bool
13070 rs6000_move_128bit_ok_p (rtx operands[])
13071 {
13072 machine_mode mode = GET_MODE (operands[0]);
13073 return (gpc_reg_operand (operands[0], mode)
13074 || gpc_reg_operand (operands[1], mode));
13075 }
13076
13077 /* Return true if a 128-bit move needs to be split. */
13078 bool
13079 rs6000_split_128bit_ok_p (rtx operands[])
13080 {
13081 if (!reload_completed)
13082 return false;
13083
13084 if (!gpr_or_gpr_p (operands[0], operands[1]))
13085 return false;
13086
13087 if (quad_load_store_p (operands[0], operands[1]))
13088 return false;
13089
13090 return true;
13091 }
13092
13093 \f
13094 /* Given a comparison operation, return the bit number in CCR to test. We
13095 know this is a valid comparison.
13096
13097 SCC_P is 1 if this is for an scc. That means that %D will have been
13098 used instead of %C, so the bits will be in different places.
13099
13100 Return -1 if OP isn't a valid comparison for some reason. */
13101
13102 int
13103 ccr_bit (rtx op, int scc_p)
13104 {
13105 enum rtx_code code = GET_CODE (op);
13106 machine_mode cc_mode;
13107 int cc_regnum;
13108 int base_bit;
13109 rtx reg;
13110
13111 if (!COMPARISON_P (op))
13112 return -1;
13113
13114 reg = XEXP (op, 0);
13115
13116 if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg)))
13117 return -1;
13118
13119 cc_mode = GET_MODE (reg);
13120 cc_regnum = REGNO (reg);
13121 base_bit = 4 * (cc_regnum - CR0_REGNO);
13122
13123 validate_condition_mode (code, cc_mode);
13124
13125 /* When generating a sCOND operation, only positive conditions are
13126 allowed. */
13127 if (scc_p)
13128 switch (code)
13129 {
13130 case EQ:
13131 case GT:
13132 case LT:
13133 case UNORDERED:
13134 case GTU:
13135 case LTU:
13136 break;
13137 default:
13138 return -1;
13139 }
13140
13141 switch (code)
13142 {
13143 case NE:
13144 return scc_p ? base_bit + 3 : base_bit + 2;
13145 case EQ:
13146 return base_bit + 2;
13147 case GT: case GTU: case UNLE:
13148 return base_bit + 1;
13149 case LT: case LTU: case UNGE:
13150 return base_bit;
13151 case ORDERED: case UNORDERED:
13152 return base_bit + 3;
13153
13154 case GE: case GEU:
13155 /* If scc, we will have done a cror to put the bit in the
13156 unordered position. So test that bit. For integer, this is ! LT
13157 unless this is an scc insn. */
13158 return scc_p ? base_bit + 3 : base_bit;
13159
13160 case LE: case LEU:
13161 return scc_p ? base_bit + 3 : base_bit + 1;
13162
13163 default:
13164 return -1;
13165 }
13166 }
13167 \f
13168 /* Return the GOT register. */
13169
13170 rtx
13171 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
13172 {
13173 /* The second flow pass currently (June 1999) can't update
13174 regs_ever_live without disturbing other parts of the compiler, so
13175 update it here to make the prolog/epilogue code happy. */
13176 if (!can_create_pseudo_p ()
13177 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
13178 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
13179
13180 crtl->uses_pic_offset_table = 1;
13181
13182 return pic_offset_table_rtx;
13183 }
13184 \f
13185 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
13186
13187 /* Write out a function code label. */
13188
13189 void
13190 rs6000_output_function_entry (FILE *file, const char *fname)
13191 {
13192 if (fname[0] != '.')
13193 {
13194 switch (DEFAULT_ABI)
13195 {
13196 default:
13197 gcc_unreachable ();
13198
13199 case ABI_AIX:
13200 if (DOT_SYMBOLS)
13201 putc ('.', file);
13202 else
13203 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
13204 break;
13205
13206 case ABI_ELFv2:
13207 case ABI_V4:
13208 case ABI_DARWIN:
13209 break;
13210 }
13211 }
13212
13213 RS6000_OUTPUT_BASENAME (file, fname);
13214 }
13215
13216 /* Print an operand. Recognize special options, documented below. */
13217
13218 #if TARGET_ELF
13219 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
13220 only introduced by the linker, when applying the sda21
13221 relocation. */
13222 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
13223 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
13224 #else
13225 #define SMALL_DATA_RELOC "sda21"
13226 #define SMALL_DATA_REG 0
13227 #endif
13228
13229 void
13230 print_operand (FILE *file, rtx x, int code)
13231 {
13232 int i;
13233 unsigned HOST_WIDE_INT uval;
13234
13235 switch (code)
13236 {
13237 /* %a is output_address. */
13238
13239 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
13240 output_operand. */
13241
13242 case 'A':
13243 /* Write the MMA accumulator number associated with VSX register X. */
13244 if (!REG_P (x) || !FP_REGNO_P (REGNO (x)) || (REGNO (x) % 4) != 0)
13245 output_operand_lossage ("invalid %%A value");
13246 else
13247 fprintf (file, "%d", (REGNO (x) - FIRST_FPR_REGNO) / 4);
13248 return;
13249
13250 case 'D':
13251 /* Like 'J' but get to the GT bit only. */
13252 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13253 {
13254 output_operand_lossage ("invalid %%D value");
13255 return;
13256 }
13257
13258 /* Bit 1 is GT bit. */
13259 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
13260
13261 /* Add one for shift count in rlinm for scc. */
13262 fprintf (file, "%d", i + 1);
13263 return;
13264
13265 case 'e':
13266 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
13267 if (! INT_P (x))
13268 {
13269 output_operand_lossage ("invalid %%e value");
13270 return;
13271 }
13272
13273 uval = INTVAL (x);
13274 if ((uval & 0xffff) == 0 && uval != 0)
13275 putc ('s', file);
13276 return;
13277
13278 case 'E':
13279 /* X is a CR register. Print the number of the EQ bit of the CR */
13280 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13281 output_operand_lossage ("invalid %%E value");
13282 else
13283 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
13284 return;
13285
13286 case 'f':
13287 /* X is a CR register. Print the shift count needed to move it
13288 to the high-order four bits. */
13289 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13290 output_operand_lossage ("invalid %%f value");
13291 else
13292 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
13293 return;
13294
13295 case 'F':
13296 /* Similar, but print the count for the rotate in the opposite
13297 direction. */
13298 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13299 output_operand_lossage ("invalid %%F value");
13300 else
13301 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
13302 return;
13303
13304 case 'G':
13305 /* X is a constant integer. If it is negative, print "m",
13306 otherwise print "z". This is to make an aze or ame insn. */
13307 if (!CONST_INT_P (x))
13308 output_operand_lossage ("invalid %%G value");
13309 else if (INTVAL (x) >= 0)
13310 putc ('z', file);
13311 else
13312 putc ('m', file);
13313 return;
13314
13315 case 'h':
13316 /* If constant, output low-order five bits. Otherwise, write
13317 normally. */
13318 if (INT_P (x))
13319 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
13320 else
13321 print_operand (file, x, 0);
13322 return;
13323
13324 case 'H':
13325 /* If constant, output low-order six bits. Otherwise, write
13326 normally. */
13327 if (INT_P (x))
13328 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
13329 else
13330 print_operand (file, x, 0);
13331 return;
13332
13333 case 'I':
13334 /* Print `i' if this is a constant, else nothing. */
13335 if (INT_P (x))
13336 putc ('i', file);
13337 return;
13338
13339 case 'j':
13340 /* Write the bit number in CCR for jump. */
13341 i = ccr_bit (x, 0);
13342 if (i == -1)
13343 output_operand_lossage ("invalid %%j code");
13344 else
13345 fprintf (file, "%d", i);
13346 return;
13347
13348 case 'J':
13349 /* Similar, but add one for shift count in rlinm for scc and pass
13350 scc flag to `ccr_bit'. */
13351 i = ccr_bit (x, 1);
13352 if (i == -1)
13353 output_operand_lossage ("invalid %%J code");
13354 else
13355 /* If we want bit 31, write a shift count of zero, not 32. */
13356 fprintf (file, "%d", i == 31 ? 0 : i + 1);
13357 return;
13358
13359 case 'k':
13360 /* X must be a constant. Write the 1's complement of the
13361 constant. */
13362 if (! INT_P (x))
13363 output_operand_lossage ("invalid %%k value");
13364 else
13365 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
13366 return;
13367
13368 case 'K':
13369 /* X must be a symbolic constant on ELF. Write an
13370 expression suitable for an 'addi' that adds in the low 16
13371 bits of the MEM. */
13372 if (GET_CODE (x) == CONST)
13373 {
13374 if (GET_CODE (XEXP (x, 0)) != PLUS
13375 || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
13376 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
13377 || !CONST_INT_P (XEXP (XEXP (x, 0), 1)))
13378 output_operand_lossage ("invalid %%K value");
13379 }
13380 print_operand_address (file, x);
13381 fputs ("@l", file);
13382 return;
13383
13384 /* %l is output_asm_label. */
13385
13386 case 'L':
13387 /* Write second word of DImode or DFmode reference. Works on register
13388 or non-indexed memory only. */
13389 if (REG_P (x))
13390 fputs (reg_names[REGNO (x) + 1], file);
13391 else if (MEM_P (x))
13392 {
13393 machine_mode mode = GET_MODE (x);
13394 /* Handle possible auto-increment. Since it is pre-increment and
13395 we have already done it, we can just use an offset of word. */
13396 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13397 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13398 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
13399 UNITS_PER_WORD));
13400 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13401 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
13402 UNITS_PER_WORD));
13403 else
13404 output_address (mode, XEXP (adjust_address_nv (x, SImode,
13405 UNITS_PER_WORD),
13406 0));
13407
13408 if (small_data_operand (x, GET_MODE (x)))
13409 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13410 reg_names[SMALL_DATA_REG]);
13411 }
13412 return;
13413
13414 case 'N': /* Unused */
13415 /* Write the number of elements in the vector times 4. */
13416 if (GET_CODE (x) != PARALLEL)
13417 output_operand_lossage ("invalid %%N value");
13418 else
13419 fprintf (file, "%d", XVECLEN (x, 0) * 4);
13420 return;
13421
13422 case 'O': /* Unused */
13423 /* Similar, but subtract 1 first. */
13424 if (GET_CODE (x) != PARALLEL)
13425 output_operand_lossage ("invalid %%O value");
13426 else
13427 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
13428 return;
13429
13430 case 'p':
13431 /* X is a CONST_INT that is a power of two. Output the logarithm. */
13432 if (! INT_P (x)
13433 || INTVAL (x) < 0
13434 || (i = exact_log2 (INTVAL (x))) < 0)
13435 output_operand_lossage ("invalid %%p value");
13436 else
13437 fprintf (file, "%d", i);
13438 return;
13439
13440 case 'P':
13441 /* The operand must be an indirect memory reference. The result
13442 is the register name. */
13443 if (!MEM_P (x) || !REG_P (XEXP (x, 0))
13444 || REGNO (XEXP (x, 0)) >= 32)
13445 output_operand_lossage ("invalid %%P value");
13446 else
13447 fputs (reg_names[REGNO (XEXP (x, 0))], file);
13448 return;
13449
13450 case 'q':
13451 /* This outputs the logical code corresponding to a boolean
13452 expression. The expression may have one or both operands
13453 negated (if one, only the first one). For condition register
13454 logical operations, it will also treat the negated
13455 CR codes as NOTs, but not handle NOTs of them. */
13456 {
13457 const char *const *t = 0;
13458 const char *s;
13459 enum rtx_code code = GET_CODE (x);
13460 static const char * const tbl[3][3] = {
13461 { "and", "andc", "nor" },
13462 { "or", "orc", "nand" },
13463 { "xor", "eqv", "xor" } };
13464
13465 if (code == AND)
13466 t = tbl[0];
13467 else if (code == IOR)
13468 t = tbl[1];
13469 else if (code == XOR)
13470 t = tbl[2];
13471 else
13472 output_operand_lossage ("invalid %%q value");
13473
13474 if (GET_CODE (XEXP (x, 0)) != NOT)
13475 s = t[0];
13476 else
13477 {
13478 if (GET_CODE (XEXP (x, 1)) == NOT)
13479 s = t[2];
13480 else
13481 s = t[1];
13482 }
13483
13484 fputs (s, file);
13485 }
13486 return;
13487
13488 case 'Q':
13489 if (! TARGET_MFCRF)
13490 return;
13491 fputc (',', file);
13492 /* FALLTHRU */
13493
13494 case 'R':
13495 /* X is a CR register. Print the mask for `mtcrf'. */
13496 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13497 output_operand_lossage ("invalid %%R value");
13498 else
13499 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
13500 return;
13501
13502 case 's':
13503 /* Low 5 bits of 32 - value */
13504 if (! INT_P (x))
13505 output_operand_lossage ("invalid %%s value");
13506 else
13507 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
13508 return;
13509
13510 case 't':
13511 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
13512 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13513 {
13514 output_operand_lossage ("invalid %%t value");
13515 return;
13516 }
13517
13518 /* Bit 3 is OV bit. */
13519 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
13520
13521 /* If we want bit 31, write a shift count of zero, not 32. */
13522 fprintf (file, "%d", i == 31 ? 0 : i + 1);
13523 return;
13524
13525 case 'T':
13526 /* Print the symbolic name of a branch target register. */
13527 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13528 x = XVECEXP (x, 0, 0);
13529 if (!REG_P (x) || (REGNO (x) != LR_REGNO
13530 && REGNO (x) != CTR_REGNO))
13531 output_operand_lossage ("invalid %%T value");
13532 else if (REGNO (x) == LR_REGNO)
13533 fputs ("lr", file);
13534 else
13535 fputs ("ctr", file);
13536 return;
13537
13538 case 'u':
13539 /* High-order or low-order 16 bits of constant, whichever is non-zero,
13540 for use in unsigned operand. */
13541 if (! INT_P (x))
13542 {
13543 output_operand_lossage ("invalid %%u value");
13544 return;
13545 }
13546
13547 uval = INTVAL (x);
13548 if ((uval & 0xffff) == 0)
13549 uval >>= 16;
13550
13551 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
13552 return;
13553
13554 case 'v':
13555 /* High-order 16 bits of constant for use in signed operand. */
13556 if (! INT_P (x))
13557 output_operand_lossage ("invalid %%v value");
13558 else
13559 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
13560 (INTVAL (x) >> 16) & 0xffff);
13561 return;
13562
13563 case 'U':
13564 /* Print `u' if this has an auto-increment or auto-decrement. */
13565 if (MEM_P (x)
13566 && (GET_CODE (XEXP (x, 0)) == PRE_INC
13567 || GET_CODE (XEXP (x, 0)) == PRE_DEC
13568 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
13569 putc ('u', file);
13570 return;
13571
13572 case 'V':
13573 /* Print the trap code for this operand. */
13574 switch (GET_CODE (x))
13575 {
13576 case EQ:
13577 fputs ("eq", file); /* 4 */
13578 break;
13579 case NE:
13580 fputs ("ne", file); /* 24 */
13581 break;
13582 case LT:
13583 fputs ("lt", file); /* 16 */
13584 break;
13585 case LE:
13586 fputs ("le", file); /* 20 */
13587 break;
13588 case GT:
13589 fputs ("gt", file); /* 8 */
13590 break;
13591 case GE:
13592 fputs ("ge", file); /* 12 */
13593 break;
13594 case LTU:
13595 fputs ("llt", file); /* 2 */
13596 break;
13597 case LEU:
13598 fputs ("lle", file); /* 6 */
13599 break;
13600 case GTU:
13601 fputs ("lgt", file); /* 1 */
13602 break;
13603 case GEU:
13604 fputs ("lge", file); /* 5 */
13605 break;
13606 default:
13607 output_operand_lossage ("invalid %%V value");
13608 }
13609 break;
13610
13611 case 'w':
13612 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
13613 normally. */
13614 if (INT_P (x))
13615 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
13616 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
13617 else
13618 print_operand (file, x, 0);
13619 return;
13620
13621 case 'x':
13622 /* X is a FPR or Altivec register used in a VSX context. */
13623 if (!REG_P (x) || !VSX_REGNO_P (REGNO (x)))
13624 output_operand_lossage ("invalid %%x value");
13625 else
13626 {
13627 int reg = REGNO (x);
13628 int vsx_reg = (FP_REGNO_P (reg)
13629 ? reg - 32
13630 : reg - FIRST_ALTIVEC_REGNO + 32);
13631
13632 #ifdef TARGET_REGNAMES
13633 if (TARGET_REGNAMES)
13634 fprintf (file, "%%vs%d", vsx_reg);
13635 else
13636 #endif
13637 fprintf (file, "%d", vsx_reg);
13638 }
13639 return;
13640
13641 case 'X':
13642 if (MEM_P (x)
13643 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
13644 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
13645 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
13646 putc ('x', file);
13647 return;
13648
13649 case 'Y':
13650 /* Like 'L', for third word of TImode/PTImode */
13651 if (REG_P (x))
13652 fputs (reg_names[REGNO (x) + 2], file);
13653 else if (MEM_P (x))
13654 {
13655 machine_mode mode = GET_MODE (x);
13656 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13657 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13658 output_address (mode, plus_constant (Pmode,
13659 XEXP (XEXP (x, 0), 0), 8));
13660 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13661 output_address (mode, plus_constant (Pmode,
13662 XEXP (XEXP (x, 0), 0), 8));
13663 else
13664 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
13665 if (small_data_operand (x, GET_MODE (x)))
13666 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13667 reg_names[SMALL_DATA_REG]);
13668 }
13669 return;
13670
13671 case 'z':
13672 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13673 x = XVECEXP (x, 0, 1);
13674 /* X is a SYMBOL_REF. Write out the name preceded by a
13675 period and without any trailing data in brackets. Used for function
13676 names. If we are configured for System V (or the embedded ABI) on
13677 the PowerPC, do not emit the period, since those systems do not use
13678 TOCs and the like. */
13679 if (!SYMBOL_REF_P (x))
13680 {
13681 output_operand_lossage ("invalid %%z value");
13682 return;
13683 }
13684
13685 /* For macho, check to see if we need a stub. */
13686 if (TARGET_MACHO)
13687 {
13688 const char *name = XSTR (x, 0);
13689 #if TARGET_MACHO
13690 if (darwin_symbol_stubs
13691 && MACHOPIC_INDIRECT
13692 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13693 name = machopic_indirection_name (x, /*stub_p=*/true);
13694 #endif
13695 assemble_name (file, name);
13696 }
13697 else if (!DOT_SYMBOLS)
13698 assemble_name (file, XSTR (x, 0));
13699 else
13700 rs6000_output_function_entry (file, XSTR (x, 0));
13701 return;
13702
13703 case 'Z':
13704 /* Like 'L', for last word of TImode/PTImode. */
13705 if (REG_P (x))
13706 fputs (reg_names[REGNO (x) + 3], file);
13707 else if (MEM_P (x))
13708 {
13709 machine_mode mode = GET_MODE (x);
13710 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13711 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13712 output_address (mode, plus_constant (Pmode,
13713 XEXP (XEXP (x, 0), 0), 12));
13714 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13715 output_address (mode, plus_constant (Pmode,
13716 XEXP (XEXP (x, 0), 0), 12));
13717 else
13718 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
13719 if (small_data_operand (x, GET_MODE (x)))
13720 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13721 reg_names[SMALL_DATA_REG]);
13722 }
13723 return;
13724
13725 /* Print AltiVec memory operand. */
13726 case 'y':
13727 {
13728 rtx tmp;
13729
13730 gcc_assert (MEM_P (x));
13731
13732 tmp = XEXP (x, 0);
13733
13734 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
13735 && GET_CODE (tmp) == AND
13736 && CONST_INT_P (XEXP (tmp, 1))
13737 && INTVAL (XEXP (tmp, 1)) == -16)
13738 tmp = XEXP (tmp, 0);
13739 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
13740 && GET_CODE (tmp) == PRE_MODIFY)
13741 tmp = XEXP (tmp, 1);
13742 if (REG_P (tmp))
13743 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
13744 else
13745 {
13746 if (GET_CODE (tmp) != PLUS
13747 || !REG_P (XEXP (tmp, 0))
13748 || !REG_P (XEXP (tmp, 1)))
13749 {
13750 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
13751 break;
13752 }
13753
13754 if (REGNO (XEXP (tmp, 0)) == 0)
13755 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
13756 reg_names[ REGNO (XEXP (tmp, 0)) ]);
13757 else
13758 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
13759 reg_names[ REGNO (XEXP (tmp, 1)) ]);
13760 }
13761 break;
13762 }
13763
13764 case 0:
13765 if (REG_P (x))
13766 fprintf (file, "%s", reg_names[REGNO (x)]);
13767 else if (MEM_P (x))
13768 {
13769 /* We need to handle PRE_INC and PRE_DEC here, since we need to
13770 know the width from the mode. */
13771 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
13772 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
13773 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
13774 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
13775 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
13776 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
13777 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13778 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
13779 else
13780 output_address (GET_MODE (x), XEXP (x, 0));
13781 }
13782 else if (toc_relative_expr_p (x, false,
13783 &tocrel_base_oac, &tocrel_offset_oac))
13784 /* This hack along with a corresponding hack in
13785 rs6000_output_addr_const_extra arranges to output addends
13786 where the assembler expects to find them. eg.
13787 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
13788 without this hack would be output as "x@toc+4". We
13789 want "x+4@toc". */
13790 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
13791 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
13792 output_addr_const (file, XVECEXP (x, 0, 0));
13793 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13794 output_addr_const (file, XVECEXP (x, 0, 1));
13795 else
13796 output_addr_const (file, x);
13797 return;
13798
13799 case '&':
13800 if (const char *name = get_some_local_dynamic_name ())
13801 assemble_name (file, name);
13802 else
13803 output_operand_lossage ("'%%&' used without any "
13804 "local dynamic TLS references");
13805 return;
13806
13807 default:
13808 output_operand_lossage ("invalid %%xn code");
13809 }
13810 }
13811 \f
13812 /* Print the address of an operand. */
13813
13814 void
13815 print_operand_address (FILE *file, rtx x)
13816 {
13817 if (REG_P (x))
13818 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
13819
13820 /* Is it a PC-relative address? */
13821 else if (TARGET_PCREL && pcrel_local_or_external_address (x, VOIDmode))
13822 {
13823 HOST_WIDE_INT offset;
13824
13825 if (GET_CODE (x) == CONST)
13826 x = XEXP (x, 0);
13827
13828 if (GET_CODE (x) == PLUS)
13829 {
13830 offset = INTVAL (XEXP (x, 1));
13831 x = XEXP (x, 0);
13832 }
13833 else
13834 offset = 0;
13835
13836 output_addr_const (file, x);
13837
13838 if (offset)
13839 fprintf (file, "%+" PRId64, offset);
13840
13841 if (SYMBOL_REF_P (x) && !SYMBOL_REF_LOCAL_P (x))
13842 fprintf (file, "@got");
13843
13844 fprintf (file, "@pcrel");
13845 }
13846 else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
13847 || GET_CODE (x) == LABEL_REF)
13848 {
13849 output_addr_const (file, x);
13850 if (small_data_operand (x, GET_MODE (x)))
13851 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13852 reg_names[SMALL_DATA_REG]);
13853 else
13854 gcc_assert (!TARGET_TOC);
13855 }
13856 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
13857 && REG_P (XEXP (x, 1)))
13858 {
13859 if (REGNO (XEXP (x, 0)) == 0)
13860 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
13861 reg_names[ REGNO (XEXP (x, 0)) ]);
13862 else
13863 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
13864 reg_names[ REGNO (XEXP (x, 1)) ]);
13865 }
13866 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
13867 && CONST_INT_P (XEXP (x, 1)))
13868 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
13869 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
13870 #if TARGET_MACHO
13871 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
13872 && CONSTANT_P (XEXP (x, 1)))
13873 {
13874 fprintf (file, "lo16(");
13875 output_addr_const (file, XEXP (x, 1));
13876 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
13877 }
13878 #endif
13879 #if TARGET_ELF
13880 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
13881 && CONSTANT_P (XEXP (x, 1)))
13882 {
13883 output_addr_const (file, XEXP (x, 1));
13884 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
13885 }
13886 #endif
13887 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
13888 {
13889 /* This hack along with a corresponding hack in
13890 rs6000_output_addr_const_extra arranges to output addends
13891 where the assembler expects to find them. eg.
13892 (lo_sum (reg 9)
13893 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
13894 without this hack would be output as "x@toc+8@l(9)". We
13895 want "x+8@toc@l(9)". */
13896 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
13897 if (GET_CODE (x) == LO_SUM)
13898 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
13899 else
13900 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
13901 }
13902 else
13903 output_addr_const (file, x);
13904 }
13905 \f
13906 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13907
13908 bool
13909 rs6000_output_addr_const_extra (FILE *file, rtx x)
13910 {
13911 if (GET_CODE (x) == UNSPEC)
13912 switch (XINT (x, 1))
13913 {
13914 case UNSPEC_TOCREL:
13915 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0))
13916 && REG_P (XVECEXP (x, 0, 1))
13917 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
13918 output_addr_const (file, XVECEXP (x, 0, 0));
13919 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
13920 {
13921 if (INTVAL (tocrel_offset_oac) >= 0)
13922 fprintf (file, "+");
13923 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
13924 }
13925 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
13926 {
13927 putc ('-', file);
13928 assemble_name (file, toc_label_name);
13929 need_toc_init = 1;
13930 }
13931 else if (TARGET_ELF)
13932 fputs ("@toc", file);
13933 return true;
13934
13935 #if TARGET_MACHO
13936 case UNSPEC_MACHOPIC_OFFSET:
13937 output_addr_const (file, XVECEXP (x, 0, 0));
13938 putc ('-', file);
13939 machopic_output_function_base_name (file);
13940 return true;
13941 #endif
13942 }
13943 return false;
13944 }
13945 \f
13946 /* Target hook for assembling integer objects. The PowerPC version has
13947 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
13948 is defined. It also needs to handle DI-mode objects on 64-bit
13949 targets. */
13950
13951 static bool
13952 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
13953 {
13954 #ifdef RELOCATABLE_NEEDS_FIXUP
13955 /* Special handling for SI values. */
13956 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
13957 {
13958 static int recurse = 0;
13959
13960 /* For -mrelocatable, we mark all addresses that need to be fixed up in
13961 the .fixup section. Since the TOC section is already relocated, we
13962 don't need to mark it here. We used to skip the text section, but it
13963 should never be valid for relocated addresses to be placed in the text
13964 section. */
13965 if (DEFAULT_ABI == ABI_V4
13966 && (TARGET_RELOCATABLE || flag_pic > 1)
13967 && in_section != toc_section
13968 && !recurse
13969 && !CONST_SCALAR_INT_P (x)
13970 && CONSTANT_P (x))
13971 {
13972 char buf[256];
13973
13974 recurse = 1;
13975 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
13976 fixuplabelno++;
13977 ASM_OUTPUT_LABEL (asm_out_file, buf);
13978 fprintf (asm_out_file, "\t.long\t(");
13979 output_addr_const (asm_out_file, x);
13980 fprintf (asm_out_file, ")@fixup\n");
13981 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
13982 ASM_OUTPUT_ALIGN (asm_out_file, 2);
13983 fprintf (asm_out_file, "\t.long\t");
13984 assemble_name (asm_out_file, buf);
13985 fprintf (asm_out_file, "\n\t.previous\n");
13986 recurse = 0;
13987 return true;
13988 }
13989 /* Remove initial .'s to turn a -mcall-aixdesc function
13990 address into the address of the descriptor, not the function
13991 itself. */
13992 else if (SYMBOL_REF_P (x)
13993 && XSTR (x, 0)[0] == '.'
13994 && DEFAULT_ABI == ABI_AIX)
13995 {
13996 const char *name = XSTR (x, 0);
13997 while (*name == '.')
13998 name++;
13999
14000 fprintf (asm_out_file, "\t.long\t%s\n", name);
14001 return true;
14002 }
14003 }
14004 #endif /* RELOCATABLE_NEEDS_FIXUP */
14005 return default_assemble_integer (x, size, aligned_p);
14006 }
14007
14008 /* Return a template string for assembly to emit when making an
14009 external call. FUNOP is the call mem argument operand number. */
14010
14011 static const char *
14012 rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall)
14013 {
14014 /* -Wformat-overflow workaround, without which gcc thinks that %u
14015 might produce 10 digits. */
14016 gcc_assert (funop <= MAX_RECOG_OPERANDS);
14017
14018 char arg[12];
14019 arg[0] = 0;
14020 if (GET_CODE (operands[funop + 1]) == UNSPEC)
14021 {
14022 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
14023 sprintf (arg, "(%%%u@tlsgd)", funop + 1);
14024 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
14025 sprintf (arg, "(%%&@tlsld)");
14026 }
14027
14028 /* The magic 32768 offset here corresponds to the offset of
14029 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
14030 char z[11];
14031 sprintf (z, "%%z%u%s", funop,
14032 (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2
14033 ? "+32768" : ""));
14034
14035 static char str[32]; /* 1 spare */
14036 if (rs6000_pcrel_p ())
14037 sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg);
14038 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
14039 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
14040 sibcall ? "" : "\n\tnop");
14041 else if (DEFAULT_ABI == ABI_V4)
14042 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
14043 flag_pic ? "@plt" : "");
14044 #if TARGET_MACHO
14045 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
14046 else if (DEFAULT_ABI == ABI_DARWIN)
14047 {
14048 /* The cookie is in operand func+2. */
14049 gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT);
14050 int cookie = INTVAL (operands[funop + 2]);
14051 if (cookie & CALL_LONG)
14052 {
14053 tree funname = get_identifier (XSTR (operands[funop], 0));
14054 tree labelname = get_prev_label (funname);
14055 gcc_checking_assert (labelname && !sibcall);
14056
14057 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
14058 instruction will reach 'foo', otherwise link as 'bl L42'".
14059 "L42" should be a 'branch island', that will do a far jump to
14060 'foo'. Branch islands are generated in
14061 macho_branch_islands(). */
14062 sprintf (str, "jbsr %%z%u,%.10s", funop,
14063 IDENTIFIER_POINTER (labelname));
14064 }
14065 else
14066 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
14067 after the call. */
14068 sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg);
14069 }
14070 #endif
14071 else
14072 gcc_unreachable ();
14073 return str;
14074 }
14075
14076 const char *
14077 rs6000_call_template (rtx *operands, unsigned int funop)
14078 {
14079 return rs6000_call_template_1 (operands, funop, false);
14080 }
14081
14082 const char *
14083 rs6000_sibcall_template (rtx *operands, unsigned int funop)
14084 {
14085 return rs6000_call_template_1 (operands, funop, true);
14086 }
14087
14088 /* As above, for indirect calls. */
14089
14090 static const char *
14091 rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop,
14092 bool sibcall)
14093 {
14094 /* -Wformat-overflow workaround, without which gcc thinks that %u
14095 might produce 10 digits. Note that -Wformat-overflow will not
14096 currently warn here for str[], so do not rely on a warning to
14097 ensure str[] is correctly sized. */
14098 gcc_assert (funop <= MAX_RECOG_OPERANDS);
14099
14100 /* Currently, funop is either 0 or 1. The maximum string is always
14101 a !speculate 64-bit __tls_get_addr call.
14102
14103 ABI_ELFv2, pcrel:
14104 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14105 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
14106 . 9 crset 2\n\t
14107 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14108 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
14109 . 8 beq%T1l-
14110 .---
14111 .142
14112
14113 ABI_AIX:
14114 . 9 ld 2,%3\n\t
14115 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14116 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14117 . 9 crset 2\n\t
14118 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14119 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14120 . 10 beq%T1l-\n\t
14121 . 10 ld 2,%4(1)
14122 .---
14123 .151
14124
14125 ABI_ELFv2:
14126 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14127 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14128 . 9 crset 2\n\t
14129 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14130 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14131 . 10 beq%T1l-\n\t
14132 . 10 ld 2,%3(1)
14133 .---
14134 .142
14135
14136 ABI_V4:
14137 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14138 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
14139 . 9 crset 2\n\t
14140 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14141 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
14142 . 8 beq%T1l-
14143 .---
14144 .141 */
14145 static char str[160]; /* 8 spare */
14146 char *s = str;
14147 const char *ptrload = TARGET_64BIT ? "d" : "wz";
14148
14149 if (DEFAULT_ABI == ABI_AIX)
14150 s += sprintf (s,
14151 "l%s 2,%%%u\n\t",
14152 ptrload, funop + 3);
14153
14154 /* We don't need the extra code to stop indirect call speculation if
14155 calling via LR. */
14156 bool speculate = (TARGET_MACHO
14157 || rs6000_speculate_indirect_jumps
14158 || (REG_P (operands[funop])
14159 && REGNO (operands[funop]) == LR_REGNO));
14160
14161 if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)
14162 {
14163 const char *rel64 = TARGET_64BIT ? "64" : "";
14164 char tls[29];
14165 tls[0] = 0;
14166 if (GET_CODE (operands[funop + 1]) == UNSPEC)
14167 {
14168 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
14169 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
14170 rel64, funop + 1);
14171 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
14172 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
14173 rel64);
14174 }
14175
14176 const char *notoc = rs6000_pcrel_p () ? "_NOTOC" : "";
14177 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
14178 && flag_pic == 2 ? "+32768" : "");
14179 if (!speculate)
14180 {
14181 s += sprintf (s,
14182 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
14183 tls, rel64, notoc, funop, addend);
14184 s += sprintf (s, "crset 2\n\t");
14185 }
14186 s += sprintf (s,
14187 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
14188 tls, rel64, notoc, funop, addend);
14189 }
14190 else if (!speculate)
14191 s += sprintf (s, "crset 2\n\t");
14192
14193 if (rs6000_pcrel_p ())
14194 {
14195 if (speculate)
14196 sprintf (s, "b%%T%ul", funop);
14197 else
14198 sprintf (s, "beq%%T%ul-", funop);
14199 }
14200 else if (DEFAULT_ABI == ABI_AIX)
14201 {
14202 if (speculate)
14203 sprintf (s,
14204 "b%%T%ul\n\t"
14205 "l%s 2,%%%u(1)",
14206 funop, ptrload, funop + 4);
14207 else
14208 sprintf (s,
14209 "beq%%T%ul-\n\t"
14210 "l%s 2,%%%u(1)",
14211 funop, ptrload, funop + 4);
14212 }
14213 else if (DEFAULT_ABI == ABI_ELFv2)
14214 {
14215 if (speculate)
14216 sprintf (s,
14217 "b%%T%ul\n\t"
14218 "l%s 2,%%%u(1)",
14219 funop, ptrload, funop + 3);
14220 else
14221 sprintf (s,
14222 "beq%%T%ul-\n\t"
14223 "l%s 2,%%%u(1)",
14224 funop, ptrload, funop + 3);
14225 }
14226 else
14227 {
14228 if (speculate)
14229 sprintf (s,
14230 "b%%T%u%s",
14231 funop, sibcall ? "" : "l");
14232 else
14233 sprintf (s,
14234 "beq%%T%u%s-%s",
14235 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : "");
14236 }
14237 return str;
14238 }
14239
14240 const char *
14241 rs6000_indirect_call_template (rtx *operands, unsigned int funop)
14242 {
14243 return rs6000_indirect_call_template_1 (operands, funop, false);
14244 }
14245
14246 const char *
14247 rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop)
14248 {
14249 return rs6000_indirect_call_template_1 (operands, funop, true);
14250 }
14251
14252 #if HAVE_AS_PLTSEQ
14253 /* Output indirect call insns. WHICH identifies the type of sequence. */
14254 const char *
14255 rs6000_pltseq_template (rtx *operands, int which)
14256 {
14257 const char *rel64 = TARGET_64BIT ? "64" : "";
14258 char tls[30];
14259 tls[0] = 0;
14260 if (GET_CODE (operands[3]) == UNSPEC)
14261 {
14262 char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4';
14263 if (XINT (operands[3], 1) == UNSPEC_TLSGD)
14264 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
14265 off, rel64);
14266 else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
14267 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
14268 off, rel64);
14269 }
14270
14271 gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
14272 static char str[96]; /* 10 spare */
14273 char off = WORDS_BIG_ENDIAN ? '2' : '4';
14274 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
14275 && flag_pic == 2 ? "+32768" : "");
14276 switch (which)
14277 {
14278 case RS6000_PLTSEQ_TOCSAVE:
14279 sprintf (str,
14280 "st%s\n\t"
14281 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
14282 TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)",
14283 tls, rel64);
14284 break;
14285 case RS6000_PLTSEQ_PLT16_HA:
14286 if (DEFAULT_ABI == ABI_V4 && !flag_pic)
14287 sprintf (str,
14288 "lis %%0,0\n\t"
14289 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
14290 tls, off, rel64);
14291 else
14292 sprintf (str,
14293 "addis %%0,%%1,0\n\t"
14294 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
14295 tls, off, rel64, addend);
14296 break;
14297 case RS6000_PLTSEQ_PLT16_LO:
14298 sprintf (str,
14299 "l%s %%0,0(%%1)\n\t"
14300 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
14301 TARGET_64BIT ? "d" : "wz",
14302 tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend);
14303 break;
14304 case RS6000_PLTSEQ_MTCTR:
14305 sprintf (str,
14306 "mtctr %%1\n\t"
14307 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
14308 tls, rel64, addend);
14309 break;
14310 case RS6000_PLTSEQ_PLT_PCREL34:
14311 sprintf (str,
14312 "pl%s %%0,0(0),1\n\t"
14313 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
14314 TARGET_64BIT ? "d" : "wz",
14315 tls, rel64);
14316 break;
14317 default:
14318 gcc_unreachable ();
14319 }
14320 return str;
14321 }
14322 #endif
14323 \f
14324 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
14325 /* Emit an assembler directive to set symbol visibility for DECL to
14326 VISIBILITY_TYPE. */
14327
14328 static void
14329 rs6000_assemble_visibility (tree decl, int vis)
14330 {
14331 if (TARGET_XCOFF)
14332 return;
14333
14334 /* Functions need to have their entry point symbol visibility set as
14335 well as their descriptor symbol visibility. */
14336 if (DEFAULT_ABI == ABI_AIX
14337 && DOT_SYMBOLS
14338 && TREE_CODE (decl) == FUNCTION_DECL)
14339 {
14340 static const char * const visibility_types[] = {
14341 NULL, "protected", "hidden", "internal"
14342 };
14343
14344 const char *name, *type;
14345
14346 name = ((* targetm.strip_name_encoding)
14347 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
14348 type = visibility_types[vis];
14349
14350 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
14351 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
14352 }
14353 else
14354 default_assemble_visibility (decl, vis);
14355 }
14356 #endif
14357 \f
14358 enum rtx_code
14359 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
14360 {
14361 /* Reversal of FP compares takes care -- an ordered compare
14362 becomes an unordered compare and vice versa. */
14363 if (mode == CCFPmode
14364 && (!flag_finite_math_only
14365 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
14366 || code == UNEQ || code == LTGT))
14367 return reverse_condition_maybe_unordered (code);
14368 else
14369 return reverse_condition (code);
14370 }
14371
14372 /* Generate a compare for CODE. Return a brand-new rtx that
14373 represents the result of the compare. */
14374
14375 static rtx
14376 rs6000_generate_compare (rtx cmp, machine_mode mode)
14377 {
14378 machine_mode comp_mode;
14379 rtx compare_result;
14380 enum rtx_code code = GET_CODE (cmp);
14381 rtx op0 = XEXP (cmp, 0);
14382 rtx op1 = XEXP (cmp, 1);
14383
14384 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
14385 comp_mode = CCmode;
14386 else if (FLOAT_MODE_P (mode))
14387 comp_mode = CCFPmode;
14388 else if (code == GTU || code == LTU
14389 || code == GEU || code == LEU)
14390 comp_mode = CCUNSmode;
14391 else if ((code == EQ || code == NE)
14392 && unsigned_reg_p (op0)
14393 && (unsigned_reg_p (op1)
14394 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
14395 /* These are unsigned values, perhaps there will be a later
14396 ordering compare that can be shared with this one. */
14397 comp_mode = CCUNSmode;
14398 else
14399 comp_mode = CCmode;
14400
14401 /* If we have an unsigned compare, make sure we don't have a signed value as
14402 an immediate. */
14403 if (comp_mode == CCUNSmode && CONST_INT_P (op1)
14404 && INTVAL (op1) < 0)
14405 {
14406 op0 = copy_rtx_if_shared (op0);
14407 op1 = force_reg (GET_MODE (op0), op1);
14408 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
14409 }
14410
14411 /* First, the compare. */
14412 compare_result = gen_reg_rtx (comp_mode);
14413
14414 /* IEEE 128-bit support in VSX registers when we do not have hardware
14415 support. */
14416 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
14417 {
14418 rtx libfunc = NULL_RTX;
14419 bool check_nan = false;
14420 rtx dest;
14421
14422 switch (code)
14423 {
14424 case EQ:
14425 case NE:
14426 libfunc = optab_libfunc (eq_optab, mode);
14427 break;
14428
14429 case GT:
14430 case GE:
14431 libfunc = optab_libfunc (ge_optab, mode);
14432 break;
14433
14434 case LT:
14435 case LE:
14436 libfunc = optab_libfunc (le_optab, mode);
14437 break;
14438
14439 case UNORDERED:
14440 case ORDERED:
14441 libfunc = optab_libfunc (unord_optab, mode);
14442 code = (code == UNORDERED) ? NE : EQ;
14443 break;
14444
14445 case UNGE:
14446 case UNGT:
14447 check_nan = true;
14448 libfunc = optab_libfunc (ge_optab, mode);
14449 code = (code == UNGE) ? GE : GT;
14450 break;
14451
14452 case UNLE:
14453 case UNLT:
14454 check_nan = true;
14455 libfunc = optab_libfunc (le_optab, mode);
14456 code = (code == UNLE) ? LE : LT;
14457 break;
14458
14459 case UNEQ:
14460 case LTGT:
14461 check_nan = true;
14462 libfunc = optab_libfunc (eq_optab, mode);
14463 code = (code = UNEQ) ? EQ : NE;
14464 break;
14465
14466 default:
14467 gcc_unreachable ();
14468 }
14469
14470 gcc_assert (libfunc);
14471
14472 if (!check_nan)
14473 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
14474 SImode, op0, mode, op1, mode);
14475
14476 /* The library signals an exception for signalling NaNs, so we need to
14477 handle isgreater, etc. by first checking isordered. */
14478 else
14479 {
14480 rtx ne_rtx, normal_dest, unord_dest;
14481 rtx unord_func = optab_libfunc (unord_optab, mode);
14482 rtx join_label = gen_label_rtx ();
14483 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
14484 rtx unord_cmp = gen_reg_rtx (comp_mode);
14485
14486
14487 /* Test for either value being a NaN. */
14488 gcc_assert (unord_func);
14489 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
14490 SImode, op0, mode, op1, mode);
14491
14492 /* Set value (0) if either value is a NaN, and jump to the join
14493 label. */
14494 dest = gen_reg_rtx (SImode);
14495 emit_move_insn (dest, const1_rtx);
14496 emit_insn (gen_rtx_SET (unord_cmp,
14497 gen_rtx_COMPARE (comp_mode, unord_dest,
14498 const0_rtx)));
14499
14500 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
14501 emit_jump_insn (gen_rtx_SET (pc_rtx,
14502 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
14503 join_ref,
14504 pc_rtx)));
14505
14506 /* Do the normal comparison, knowing that the values are not
14507 NaNs. */
14508 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
14509 SImode, op0, mode, op1, mode);
14510
14511 emit_insn (gen_cstoresi4 (dest,
14512 gen_rtx_fmt_ee (code, SImode, normal_dest,
14513 const0_rtx),
14514 normal_dest, const0_rtx));
14515
14516 /* Join NaN and non-Nan paths. Compare dest against 0. */
14517 emit_label (join_label);
14518 code = NE;
14519 }
14520
14521 emit_insn (gen_rtx_SET (compare_result,
14522 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
14523 }
14524
14525 else
14526 {
14527 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
14528 CLOBBERs to match cmptf_internal2 pattern. */
14529 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
14530 && FLOAT128_IBM_P (GET_MODE (op0))
14531 && TARGET_HARD_FLOAT)
14532 emit_insn (gen_rtx_PARALLEL (VOIDmode,
14533 gen_rtvec (10,
14534 gen_rtx_SET (compare_result,
14535 gen_rtx_COMPARE (comp_mode, op0, op1)),
14536 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14537 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14538 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14539 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14540 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14541 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14542 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14543 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14544 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
14545 else if (GET_CODE (op1) == UNSPEC
14546 && XINT (op1, 1) == UNSPEC_SP_TEST)
14547 {
14548 rtx op1b = XVECEXP (op1, 0, 0);
14549 comp_mode = CCEQmode;
14550 compare_result = gen_reg_rtx (CCEQmode);
14551 if (TARGET_64BIT)
14552 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
14553 else
14554 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
14555 }
14556 else
14557 emit_insn (gen_rtx_SET (compare_result,
14558 gen_rtx_COMPARE (comp_mode, op0, op1)));
14559 }
14560
14561 validate_condition_mode (code, GET_MODE (compare_result));
14562
14563 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
14564 }
14565
14566 \f
14567 /* Return the diagnostic message string if the binary operation OP is
14568 not permitted on TYPE1 and TYPE2, NULL otherwise. */
14569
14570 static const char*
14571 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
14572 const_tree type1,
14573 const_tree type2)
14574 {
14575 machine_mode mode1 = TYPE_MODE (type1);
14576 machine_mode mode2 = TYPE_MODE (type2);
14577
14578 /* For complex modes, use the inner type. */
14579 if (COMPLEX_MODE_P (mode1))
14580 mode1 = GET_MODE_INNER (mode1);
14581
14582 if (COMPLEX_MODE_P (mode2))
14583 mode2 = GET_MODE_INNER (mode2);
14584
14585 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
14586 double to intermix unless -mfloat128-convert. */
14587 if (mode1 == mode2)
14588 return NULL;
14589
14590 if (!TARGET_FLOAT128_CVT)
14591 {
14592 if ((FLOAT128_IEEE_P (mode1) && FLOAT128_IBM_P (mode2))
14593 || (FLOAT128_IBM_P (mode1) && FLOAT128_IEEE_P (mode2)))
14594 return N_("Invalid mixing of IEEE 128-bit and IBM 128-bit floating "
14595 "point types");
14596 }
14597
14598 return NULL;
14599 }
14600
14601 \f
14602 /* Expand floating point conversion to/from __float128 and __ibm128. */
14603
14604 void
14605 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
14606 {
14607 machine_mode dest_mode = GET_MODE (dest);
14608 machine_mode src_mode = GET_MODE (src);
14609 convert_optab cvt = unknown_optab;
14610 bool do_move = false;
14611 rtx libfunc = NULL_RTX;
14612 rtx dest2;
14613 typedef rtx (*rtx_2func_t) (rtx, rtx);
14614 rtx_2func_t hw_convert = (rtx_2func_t)0;
14615 size_t kf_or_tf;
14616
14617 struct hw_conv_t {
14618 rtx_2func_t from_df;
14619 rtx_2func_t from_sf;
14620 rtx_2func_t from_si_sign;
14621 rtx_2func_t from_si_uns;
14622 rtx_2func_t from_di_sign;
14623 rtx_2func_t from_di_uns;
14624 rtx_2func_t to_df;
14625 rtx_2func_t to_sf;
14626 rtx_2func_t to_si_sign;
14627 rtx_2func_t to_si_uns;
14628 rtx_2func_t to_di_sign;
14629 rtx_2func_t to_di_uns;
14630 } hw_conversions[2] = {
14631 /* convertions to/from KFmode */
14632 {
14633 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
14634 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
14635 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
14636 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
14637 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
14638 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
14639 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
14640 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
14641 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
14642 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
14643 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
14644 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
14645 },
14646
14647 /* convertions to/from TFmode */
14648 {
14649 gen_extenddftf2_hw, /* TFmode <- DFmode. */
14650 gen_extendsftf2_hw, /* TFmode <- SFmode. */
14651 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
14652 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
14653 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
14654 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
14655 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
14656 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
14657 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
14658 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
14659 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
14660 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
14661 },
14662 };
14663
14664 if (dest_mode == src_mode)
14665 gcc_unreachable ();
14666
14667 /* Eliminate memory operations. */
14668 if (MEM_P (src))
14669 src = force_reg (src_mode, src);
14670
14671 if (MEM_P (dest))
14672 {
14673 rtx tmp = gen_reg_rtx (dest_mode);
14674 rs6000_expand_float128_convert (tmp, src, unsigned_p);
14675 rs6000_emit_move (dest, tmp, dest_mode);
14676 return;
14677 }
14678
14679 /* Convert to IEEE 128-bit floating point. */
14680 if (FLOAT128_IEEE_P (dest_mode))
14681 {
14682 if (dest_mode == KFmode)
14683 kf_or_tf = 0;
14684 else if (dest_mode == TFmode)
14685 kf_or_tf = 1;
14686 else
14687 gcc_unreachable ();
14688
14689 switch (src_mode)
14690 {
14691 case E_DFmode:
14692 cvt = sext_optab;
14693 hw_convert = hw_conversions[kf_or_tf].from_df;
14694 break;
14695
14696 case E_SFmode:
14697 cvt = sext_optab;
14698 hw_convert = hw_conversions[kf_or_tf].from_sf;
14699 break;
14700
14701 case E_KFmode:
14702 case E_IFmode:
14703 case E_TFmode:
14704 if (FLOAT128_IBM_P (src_mode))
14705 cvt = sext_optab;
14706 else
14707 do_move = true;
14708 break;
14709
14710 case E_SImode:
14711 if (unsigned_p)
14712 {
14713 cvt = ufloat_optab;
14714 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
14715 }
14716 else
14717 {
14718 cvt = sfloat_optab;
14719 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
14720 }
14721 break;
14722
14723 case E_DImode:
14724 if (unsigned_p)
14725 {
14726 cvt = ufloat_optab;
14727 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
14728 }
14729 else
14730 {
14731 cvt = sfloat_optab;
14732 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
14733 }
14734 break;
14735
14736 default:
14737 gcc_unreachable ();
14738 }
14739 }
14740
14741 /* Convert from IEEE 128-bit floating point. */
14742 else if (FLOAT128_IEEE_P (src_mode))
14743 {
14744 if (src_mode == KFmode)
14745 kf_or_tf = 0;
14746 else if (src_mode == TFmode)
14747 kf_or_tf = 1;
14748 else
14749 gcc_unreachable ();
14750
14751 switch (dest_mode)
14752 {
14753 case E_DFmode:
14754 cvt = trunc_optab;
14755 hw_convert = hw_conversions[kf_or_tf].to_df;
14756 break;
14757
14758 case E_SFmode:
14759 cvt = trunc_optab;
14760 hw_convert = hw_conversions[kf_or_tf].to_sf;
14761 break;
14762
14763 case E_KFmode:
14764 case E_IFmode:
14765 case E_TFmode:
14766 if (FLOAT128_IBM_P (dest_mode))
14767 cvt = trunc_optab;
14768 else
14769 do_move = true;
14770 break;
14771
14772 case E_SImode:
14773 if (unsigned_p)
14774 {
14775 cvt = ufix_optab;
14776 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
14777 }
14778 else
14779 {
14780 cvt = sfix_optab;
14781 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
14782 }
14783 break;
14784
14785 case E_DImode:
14786 if (unsigned_p)
14787 {
14788 cvt = ufix_optab;
14789 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
14790 }
14791 else
14792 {
14793 cvt = sfix_optab;
14794 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
14795 }
14796 break;
14797
14798 default:
14799 gcc_unreachable ();
14800 }
14801 }
14802
14803 /* Both IBM format. */
14804 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
14805 do_move = true;
14806
14807 else
14808 gcc_unreachable ();
14809
14810 /* Handle conversion between TFmode/KFmode/IFmode. */
14811 if (do_move)
14812 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
14813
14814 /* Handle conversion if we have hardware support. */
14815 else if (TARGET_FLOAT128_HW && hw_convert)
14816 emit_insn ((hw_convert) (dest, src));
14817
14818 /* Call an external function to do the conversion. */
14819 else if (cvt != unknown_optab)
14820 {
14821 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
14822 gcc_assert (libfunc != NULL_RTX);
14823
14824 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
14825 src, src_mode);
14826
14827 gcc_assert (dest2 != NULL_RTX);
14828 if (!rtx_equal_p (dest, dest2))
14829 emit_move_insn (dest, dest2);
14830 }
14831
14832 else
14833 gcc_unreachable ();
14834
14835 return;
14836 }
14837
14838 \f
14839 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
14840 can be used as that dest register. Return the dest register. */
14841
14842 rtx
14843 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
14844 {
14845 if (op2 == const0_rtx)
14846 return op1;
14847
14848 if (GET_CODE (scratch) == SCRATCH)
14849 scratch = gen_reg_rtx (mode);
14850
14851 if (logical_operand (op2, mode))
14852 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
14853 else
14854 emit_insn (gen_rtx_SET (scratch,
14855 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
14856
14857 return scratch;
14858 }
14859
14860 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
14861 requires this. The result is mode MODE. */
14862 rtx
14863 rs6000_emit_fp_cror (rtx_code code, machine_mode mode, rtx x)
14864 {
14865 rtx cond[2];
14866 int n = 0;
14867 if (code == LTGT || code == LE || code == UNLT)
14868 cond[n++] = gen_rtx_fmt_ee (LT, mode, x, const0_rtx);
14869 if (code == LTGT || code == GE || code == UNGT)
14870 cond[n++] = gen_rtx_fmt_ee (GT, mode, x, const0_rtx);
14871 if (code == LE || code == GE || code == UNEQ)
14872 cond[n++] = gen_rtx_fmt_ee (EQ, mode, x, const0_rtx);
14873 if (code == UNLT || code == UNGT || code == UNEQ)
14874 cond[n++] = gen_rtx_fmt_ee (UNORDERED, mode, x, const0_rtx);
14875
14876 gcc_assert (n == 2);
14877
14878 rtx cc = gen_reg_rtx (CCEQmode);
14879 rtx logical = gen_rtx_IOR (mode, cond[0], cond[1]);
14880 emit_insn (gen_cceq_ior_compare (mode, cc, logical, cond[0], x, cond[1], x));
14881
14882 return cc;
14883 }
14884
14885 void
14886 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
14887 {
14888 rtx condition_rtx = rs6000_generate_compare (operands[1], mode);
14889 rtx_code cond_code = GET_CODE (condition_rtx);
14890
14891 if (FLOAT_MODE_P (mode) && HONOR_NANS (mode)
14892 && !(FLOAT128_VECTOR_P (mode) && !TARGET_FLOAT128_HW))
14893 ;
14894 else if (cond_code == NE
14895 || cond_code == GE || cond_code == LE
14896 || cond_code == GEU || cond_code == LEU
14897 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
14898 {
14899 rtx not_result = gen_reg_rtx (CCEQmode);
14900 rtx not_op, rev_cond_rtx;
14901 machine_mode cc_mode;
14902
14903 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
14904
14905 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
14906 SImode, XEXP (condition_rtx, 0), const0_rtx);
14907 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
14908 emit_insn (gen_rtx_SET (not_result, not_op));
14909 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
14910 }
14911
14912 machine_mode op_mode = GET_MODE (XEXP (operands[1], 0));
14913 if (op_mode == VOIDmode)
14914 op_mode = GET_MODE (XEXP (operands[1], 1));
14915
14916 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
14917 {
14918 PUT_MODE (condition_rtx, DImode);
14919 convert_move (operands[0], condition_rtx, 0);
14920 }
14921 else
14922 {
14923 PUT_MODE (condition_rtx, SImode);
14924 emit_insn (gen_rtx_SET (operands[0], condition_rtx));
14925 }
14926 }
14927
14928 /* Emit a branch of kind CODE to location LOC. */
14929
14930 void
14931 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
14932 {
14933 rtx condition_rtx = rs6000_generate_compare (operands[0], mode);
14934 rtx loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
14935 rtx ite = gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx, loc_ref, pc_rtx);
14936 emit_jump_insn (gen_rtx_SET (pc_rtx, ite));
14937 }
14938
14939 /* Return the string to output a conditional branch to LABEL, which is
14940 the operand template of the label, or NULL if the branch is really a
14941 conditional return.
14942
14943 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
14944 condition code register and its mode specifies what kind of
14945 comparison we made.
14946
14947 REVERSED is nonzero if we should reverse the sense of the comparison.
14948
14949 INSN is the insn. */
14950
14951 char *
14952 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
14953 {
14954 static char string[64];
14955 enum rtx_code code = GET_CODE (op);
14956 rtx cc_reg = XEXP (op, 0);
14957 machine_mode mode = GET_MODE (cc_reg);
14958 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
14959 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
14960 int really_reversed = reversed ^ need_longbranch;
14961 char *s = string;
14962 const char *ccode;
14963 const char *pred;
14964 rtx note;
14965
14966 validate_condition_mode (code, mode);
14967
14968 /* Work out which way this really branches. We could use
14969 reverse_condition_maybe_unordered here always but this
14970 makes the resulting assembler clearer. */
14971 if (really_reversed)
14972 {
14973 /* Reversal of FP compares takes care -- an ordered compare
14974 becomes an unordered compare and vice versa. */
14975 if (mode == CCFPmode)
14976 code = reverse_condition_maybe_unordered (code);
14977 else
14978 code = reverse_condition (code);
14979 }
14980
14981 switch (code)
14982 {
14983 /* Not all of these are actually distinct opcodes, but
14984 we distinguish them for clarity of the resulting assembler. */
14985 case NE: case LTGT:
14986 ccode = "ne"; break;
14987 case EQ: case UNEQ:
14988 ccode = "eq"; break;
14989 case GE: case GEU:
14990 ccode = "ge"; break;
14991 case GT: case GTU: case UNGT:
14992 ccode = "gt"; break;
14993 case LE: case LEU:
14994 ccode = "le"; break;
14995 case LT: case LTU: case UNLT:
14996 ccode = "lt"; break;
14997 case UNORDERED: ccode = "un"; break;
14998 case ORDERED: ccode = "nu"; break;
14999 case UNGE: ccode = "nl"; break;
15000 case UNLE: ccode = "ng"; break;
15001 default:
15002 gcc_unreachable ();
15003 }
15004
15005 /* Maybe we have a guess as to how likely the branch is. */
15006 pred = "";
15007 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
15008 if (note != NULL_RTX)
15009 {
15010 /* PROB is the difference from 50%. */
15011 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
15012 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
15013
15014 /* Only hint for highly probable/improbable branches on newer cpus when
15015 we have real profile data, as static prediction overrides processor
15016 dynamic prediction. For older cpus we may as well always hint, but
15017 assume not taken for branches that are very close to 50% as a
15018 mispredicted taken branch is more expensive than a
15019 mispredicted not-taken branch. */
15020 if (rs6000_always_hint
15021 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
15022 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
15023 && br_prob_note_reliable_p (note)))
15024 {
15025 if (abs (prob) > REG_BR_PROB_BASE / 20
15026 && ((prob > 0) ^ need_longbranch))
15027 pred = "+";
15028 else
15029 pred = "-";
15030 }
15031 }
15032
15033 if (label == NULL)
15034 s += sprintf (s, "b%slr%s ", ccode, pred);
15035 else
15036 s += sprintf (s, "b%s%s ", ccode, pred);
15037
15038 /* We need to escape any '%' characters in the reg_names string.
15039 Assume they'd only be the first character.... */
15040 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
15041 *s++ = '%';
15042 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
15043
15044 if (label != NULL)
15045 {
15046 /* If the branch distance was too far, we may have to use an
15047 unconditional branch to go the distance. */
15048 if (need_longbranch)
15049 s += sprintf (s, ",$+8\n\tb %s", label);
15050 else
15051 s += sprintf (s, ",%s", label);
15052 }
15053
15054 return string;
15055 }
15056
15057 /* Return insn for VSX or Altivec comparisons. */
15058
15059 static rtx
15060 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
15061 {
15062 rtx mask;
15063 machine_mode mode = GET_MODE (op0);
15064
15065 switch (code)
15066 {
15067 default:
15068 break;
15069
15070 case GE:
15071 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15072 return NULL_RTX;
15073 /* FALLTHRU */
15074
15075 case EQ:
15076 case GT:
15077 case GTU:
15078 case ORDERED:
15079 case UNORDERED:
15080 case UNEQ:
15081 case LTGT:
15082 mask = gen_reg_rtx (mode);
15083 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
15084 return mask;
15085 }
15086
15087 return NULL_RTX;
15088 }
15089
15090 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
15091 DMODE is expected destination mode. This is a recursive function. */
15092
15093 static rtx
15094 rs6000_emit_vector_compare (enum rtx_code rcode,
15095 rtx op0, rtx op1,
15096 machine_mode dmode)
15097 {
15098 rtx mask;
15099 bool swap_operands = false;
15100 bool try_again = false;
15101
15102 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
15103 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
15104
15105 /* See if the comparison works as is. */
15106 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
15107 if (mask)
15108 return mask;
15109
15110 switch (rcode)
15111 {
15112 case LT:
15113 rcode = GT;
15114 swap_operands = true;
15115 try_again = true;
15116 break;
15117 case LTU:
15118 rcode = GTU;
15119 swap_operands = true;
15120 try_again = true;
15121 break;
15122 case NE:
15123 case UNLE:
15124 case UNLT:
15125 case UNGE:
15126 case UNGT:
15127 /* Invert condition and try again.
15128 e.g., A != B becomes ~(A==B). */
15129 {
15130 enum rtx_code rev_code;
15131 enum insn_code nor_code;
15132 rtx mask2;
15133
15134 rev_code = reverse_condition_maybe_unordered (rcode);
15135 if (rev_code == UNKNOWN)
15136 return NULL_RTX;
15137
15138 nor_code = optab_handler (one_cmpl_optab, dmode);
15139 if (nor_code == CODE_FOR_nothing)
15140 return NULL_RTX;
15141
15142 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
15143 if (!mask2)
15144 return NULL_RTX;
15145
15146 mask = gen_reg_rtx (dmode);
15147 emit_insn (GEN_FCN (nor_code) (mask, mask2));
15148 return mask;
15149 }
15150 break;
15151 case GE:
15152 case GEU:
15153 case LE:
15154 case LEU:
15155 /* Try GT/GTU/LT/LTU OR EQ */
15156 {
15157 rtx c_rtx, eq_rtx;
15158 enum insn_code ior_code;
15159 enum rtx_code new_code;
15160
15161 switch (rcode)
15162 {
15163 case GE:
15164 new_code = GT;
15165 break;
15166
15167 case GEU:
15168 new_code = GTU;
15169 break;
15170
15171 case LE:
15172 new_code = LT;
15173 break;
15174
15175 case LEU:
15176 new_code = LTU;
15177 break;
15178
15179 default:
15180 gcc_unreachable ();
15181 }
15182
15183 ior_code = optab_handler (ior_optab, dmode);
15184 if (ior_code == CODE_FOR_nothing)
15185 return NULL_RTX;
15186
15187 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
15188 if (!c_rtx)
15189 return NULL_RTX;
15190
15191 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
15192 if (!eq_rtx)
15193 return NULL_RTX;
15194
15195 mask = gen_reg_rtx (dmode);
15196 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
15197 return mask;
15198 }
15199 break;
15200 default:
15201 return NULL_RTX;
15202 }
15203
15204 if (try_again)
15205 {
15206 if (swap_operands)
15207 std::swap (op0, op1);
15208
15209 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
15210 if (mask)
15211 return mask;
15212 }
15213
15214 /* You only get two chances. */
15215 return NULL_RTX;
15216 }
15217
15218 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
15219 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
15220 operands for the relation operation COND. */
15221
15222 int
15223 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
15224 rtx cond, rtx cc_op0, rtx cc_op1)
15225 {
15226 machine_mode dest_mode = GET_MODE (dest);
15227 machine_mode mask_mode = GET_MODE (cc_op0);
15228 enum rtx_code rcode = GET_CODE (cond);
15229 machine_mode cc_mode = CCmode;
15230 rtx mask;
15231 rtx cond2;
15232 bool invert_move = false;
15233
15234 if (VECTOR_UNIT_NONE_P (dest_mode))
15235 return 0;
15236
15237 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
15238 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
15239
15240 switch (rcode)
15241 {
15242 /* Swap operands if we can, and fall back to doing the operation as
15243 specified, and doing a NOR to invert the test. */
15244 case NE:
15245 case UNLE:
15246 case UNLT:
15247 case UNGE:
15248 case UNGT:
15249 /* Invert condition and try again.
15250 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
15251 invert_move = true;
15252 rcode = reverse_condition_maybe_unordered (rcode);
15253 if (rcode == UNKNOWN)
15254 return 0;
15255 break;
15256
15257 case GE:
15258 case LE:
15259 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
15260 {
15261 /* Invert condition to avoid compound test. */
15262 invert_move = true;
15263 rcode = reverse_condition (rcode);
15264 }
15265 break;
15266
15267 case GTU:
15268 case GEU:
15269 case LTU:
15270 case LEU:
15271 /* Mark unsigned tests with CCUNSmode. */
15272 cc_mode = CCUNSmode;
15273
15274 /* Invert condition to avoid compound test if necessary. */
15275 if (rcode == GEU || rcode == LEU)
15276 {
15277 invert_move = true;
15278 rcode = reverse_condition (rcode);
15279 }
15280 break;
15281
15282 default:
15283 break;
15284 }
15285
15286 /* Get the vector mask for the given relational operations. */
15287 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
15288
15289 if (!mask)
15290 return 0;
15291
15292 if (invert_move)
15293 std::swap (op_true, op_false);
15294
15295 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
15296 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
15297 && (GET_CODE (op_true) == CONST_VECTOR
15298 || GET_CODE (op_false) == CONST_VECTOR))
15299 {
15300 rtx constant_0 = CONST0_RTX (dest_mode);
15301 rtx constant_m1 = CONSTM1_RTX (dest_mode);
15302
15303 if (op_true == constant_m1 && op_false == constant_0)
15304 {
15305 emit_move_insn (dest, mask);
15306 return 1;
15307 }
15308
15309 else if (op_true == constant_0 && op_false == constant_m1)
15310 {
15311 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
15312 return 1;
15313 }
15314
15315 /* If we can't use the vector comparison directly, perhaps we can use
15316 the mask for the true or false fields, instead of loading up a
15317 constant. */
15318 if (op_true == constant_m1)
15319 op_true = mask;
15320
15321 if (op_false == constant_0)
15322 op_false = mask;
15323 }
15324
15325 if (!REG_P (op_true) && !SUBREG_P (op_true))
15326 op_true = force_reg (dest_mode, op_true);
15327
15328 if (!REG_P (op_false) && !SUBREG_P (op_false))
15329 op_false = force_reg (dest_mode, op_false);
15330
15331 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
15332 CONST0_RTX (dest_mode));
15333 emit_insn (gen_rtx_SET (dest,
15334 gen_rtx_IF_THEN_ELSE (dest_mode,
15335 cond2,
15336 op_true,
15337 op_false)));
15338 return 1;
15339 }
15340
15341 /* Possibly emit the xsmaxcdp and xsmincdp instructions to emit a maximum or
15342 minimum with "C" semantics.
15343
15344 Unless you use -ffast-math, you can't use these instructions to replace
15345 conditions that implicitly reverse the condition because the comparison
15346 might generate a NaN or signed zer0.
15347
15348 I.e. the following can be replaced all of the time
15349 ret = (op1 > op2) ? op1 : op2 ; generate xsmaxcdp
15350 ret = (op1 >= op2) ? op1 : op2 ; generate xsmaxcdp
15351 ret = (op1 < op2) ? op1 : op2; ; generate xsmincdp
15352 ret = (op1 <= op2) ? op1 : op2; ; generate xsmincdp
15353
15354 The following can be replaced only if -ffast-math is used:
15355 ret = (op1 < op2) ? op2 : op1 ; generate xsmaxcdp
15356 ret = (op1 <= op2) ? op2 : op1 ; generate xsmaxcdp
15357 ret = (op1 > op2) ? op2 : op1; ; generate xsmincdp
15358 ret = (op1 >= op2) ? op2 : op1; ; generate xsmincdp
15359
15360 Move TRUE_COND to DEST if OP of the operands of the last comparison is
15361 nonzero/true, FALSE_COND if it is zero/false.
15362
15363 Return false if we can't generate the appropriate minimum or maximum, and
15364 true if we can did the minimum or maximum. */
15365
15366 static bool
15367 rs6000_maybe_emit_maxc_minc (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15368 {
15369 enum rtx_code code = GET_CODE (op);
15370 rtx op0 = XEXP (op, 0);
15371 rtx op1 = XEXP (op, 1);
15372 machine_mode compare_mode = GET_MODE (op0);
15373 machine_mode result_mode = GET_MODE (dest);
15374 bool max_p = false;
15375
15376 if (result_mode != compare_mode)
15377 return false;
15378
15379 if (code == GE || code == GT)
15380 max_p = true;
15381 else if (code == LE || code == LT)
15382 max_p = false;
15383 else
15384 return false;
15385
15386 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
15387 ;
15388
15389 /* Only when NaNs and signed-zeros are not in effect, smax could be
15390 used for `op0 < op1 ? op1 : op0`, and smin could be used for
15391 `op0 > op1 ? op1 : op0`. */
15392 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond)
15393 && !HONOR_NANS (compare_mode) && !HONOR_SIGNED_ZEROS (compare_mode))
15394 max_p = !max_p;
15395
15396 else
15397 return false;
15398
15399 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
15400 return true;
15401 }
15402
15403 /* Possibly emit a floating point conditional move by generating a compare that
15404 sets a mask instruction and a XXSEL select instruction.
15405
15406 Move TRUE_COND to DEST if OP of the operands of the last comparison is
15407 nonzero/true, FALSE_COND if it is zero/false.
15408
15409 Return false if the operation cannot be generated, and true if we could
15410 generate the instruction. */
15411
15412 static bool
15413 rs6000_maybe_emit_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15414 {
15415 enum rtx_code code = GET_CODE (op);
15416 rtx op0 = XEXP (op, 0);
15417 rtx op1 = XEXP (op, 1);
15418 machine_mode result_mode = GET_MODE (dest);
15419 rtx compare_rtx;
15420 rtx cmove_rtx;
15421 rtx clobber_rtx;
15422
15423 if (!can_create_pseudo_p ())
15424 return 0;
15425
15426 switch (code)
15427 {
15428 case EQ:
15429 case GE:
15430 case GT:
15431 break;
15432
15433 case NE:
15434 case LT:
15435 case LE:
15436 code = swap_condition (code);
15437 std::swap (op0, op1);
15438 break;
15439
15440 default:
15441 return false;
15442 }
15443
15444 /* Generate: [(parallel [(set (dest)
15445 (if_then_else (op (cmp1) (cmp2))
15446 (true)
15447 (false)))
15448 (clobber (scratch))])]. */
15449
15450 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
15451 cmove_rtx = gen_rtx_SET (dest,
15452 gen_rtx_IF_THEN_ELSE (result_mode,
15453 compare_rtx,
15454 true_cond,
15455 false_cond));
15456
15457 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
15458 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15459 gen_rtvec (2, cmove_rtx, clobber_rtx)));
15460
15461 return true;
15462 }
15463
15464 /* Helper function to return true if the target has instructions to do a
15465 compare and set mask instruction that can be used with XXSEL to implement a
15466 conditional move. It is also assumed that such a target also supports the
15467 "C" minimum and maximum instructions. */
15468
15469 static bool
15470 have_compare_and_set_mask (machine_mode mode)
15471 {
15472 switch (mode)
15473 {
15474 case E_SFmode:
15475 case E_DFmode:
15476 return TARGET_P9_MINMAX;
15477
15478 default:
15479 break;
15480 }
15481
15482 return false;
15483 }
15484
15485 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
15486 operands of the last comparison is nonzero/true, FALSE_COND if it
15487 is zero/false. Return 0 if the hardware has no such operation. */
15488
15489 bool
15490 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15491 {
15492 enum rtx_code code = GET_CODE (op);
15493 rtx op0 = XEXP (op, 0);
15494 rtx op1 = XEXP (op, 1);
15495 machine_mode compare_mode = GET_MODE (op0);
15496 machine_mode result_mode = GET_MODE (dest);
15497 rtx temp;
15498 bool is_against_zero;
15499
15500 /* These modes should always match. */
15501 if (GET_MODE (op1) != compare_mode
15502 /* In the isel case however, we can use a compare immediate, so
15503 op1 may be a small constant. */
15504 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
15505 return false;
15506 if (GET_MODE (true_cond) != result_mode)
15507 return false;
15508 if (GET_MODE (false_cond) != result_mode)
15509 return false;
15510
15511 /* See if we can use the "C" minimum, "C" maximum, and compare and set mask
15512 instructions. */
15513 if (have_compare_and_set_mask (compare_mode)
15514 && have_compare_and_set_mask (result_mode))
15515 {
15516 if (rs6000_maybe_emit_maxc_minc (dest, op, true_cond, false_cond))
15517 return true;
15518
15519 if (rs6000_maybe_emit_fp_cmove (dest, op, true_cond, false_cond))
15520 return true;
15521 }
15522
15523 /* Don't allow using floating point comparisons for integer results for
15524 now. */
15525 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
15526 return false;
15527
15528 /* First, work out if the hardware can do this at all, or
15529 if it's too slow.... */
15530 if (!FLOAT_MODE_P (compare_mode))
15531 {
15532 if (TARGET_ISEL)
15533 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
15534 return false;
15535 }
15536
15537 is_against_zero = op1 == CONST0_RTX (compare_mode);
15538
15539 /* A floating-point subtract might overflow, underflow, or produce
15540 an inexact result, thus changing the floating-point flags, so it
15541 can't be generated if we care about that. It's safe if one side
15542 of the construct is zero, since then no subtract will be
15543 generated. */
15544 if (SCALAR_FLOAT_MODE_P (compare_mode)
15545 && flag_trapping_math && ! is_against_zero)
15546 return false;
15547
15548 /* Eliminate half of the comparisons by switching operands, this
15549 makes the remaining code simpler. */
15550 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
15551 || code == LTGT || code == LT || code == UNLE)
15552 {
15553 code = reverse_condition_maybe_unordered (code);
15554 temp = true_cond;
15555 true_cond = false_cond;
15556 false_cond = temp;
15557 }
15558
15559 /* UNEQ and LTGT take four instructions for a comparison with zero,
15560 it'll probably be faster to use a branch here too. */
15561 if (code == UNEQ && HONOR_NANS (compare_mode))
15562 return false;
15563
15564 /* We're going to try to implement comparisons by performing
15565 a subtract, then comparing against zero. Unfortunately,
15566 Inf - Inf is NaN which is not zero, and so if we don't
15567 know that the operand is finite and the comparison
15568 would treat EQ different to UNORDERED, we can't do it. */
15569 if (HONOR_INFINITIES (compare_mode)
15570 && code != GT && code != UNGE
15571 && (!CONST_DOUBLE_P (op1)
15572 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
15573 /* Constructs of the form (a OP b ? a : b) are safe. */
15574 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
15575 || (! rtx_equal_p (op0, true_cond)
15576 && ! rtx_equal_p (op1, true_cond))))
15577 return false;
15578
15579 /* At this point we know we can use fsel. */
15580
15581 /* Don't allow compare_mode other than SFmode or DFmode, for others there
15582 is no fsel instruction. */
15583 if (compare_mode != SFmode && compare_mode != DFmode)
15584 return false;
15585
15586 /* Reduce the comparison to a comparison against zero. */
15587 if (! is_against_zero)
15588 {
15589 temp = gen_reg_rtx (compare_mode);
15590 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
15591 op0 = temp;
15592 op1 = CONST0_RTX (compare_mode);
15593 }
15594
15595 /* If we don't care about NaNs we can reduce some of the comparisons
15596 down to faster ones. */
15597 if (! HONOR_NANS (compare_mode))
15598 switch (code)
15599 {
15600 case GT:
15601 code = LE;
15602 temp = true_cond;
15603 true_cond = false_cond;
15604 false_cond = temp;
15605 break;
15606 case UNGE:
15607 code = GE;
15608 break;
15609 case UNEQ:
15610 code = EQ;
15611 break;
15612 default:
15613 break;
15614 }
15615
15616 /* Now, reduce everything down to a GE. */
15617 switch (code)
15618 {
15619 case GE:
15620 break;
15621
15622 case LE:
15623 temp = gen_reg_rtx (compare_mode);
15624 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15625 op0 = temp;
15626 break;
15627
15628 case ORDERED:
15629 temp = gen_reg_rtx (compare_mode);
15630 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
15631 op0 = temp;
15632 break;
15633
15634 case EQ:
15635 temp = gen_reg_rtx (compare_mode);
15636 emit_insn (gen_rtx_SET (temp,
15637 gen_rtx_NEG (compare_mode,
15638 gen_rtx_ABS (compare_mode, op0))));
15639 op0 = temp;
15640 break;
15641
15642 case UNGE:
15643 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
15644 temp = gen_reg_rtx (result_mode);
15645 emit_insn (gen_rtx_SET (temp,
15646 gen_rtx_IF_THEN_ELSE (result_mode,
15647 gen_rtx_GE (VOIDmode,
15648 op0, op1),
15649 true_cond, false_cond)));
15650 false_cond = true_cond;
15651 true_cond = temp;
15652
15653 temp = gen_reg_rtx (compare_mode);
15654 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15655 op0 = temp;
15656 break;
15657
15658 case GT:
15659 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
15660 temp = gen_reg_rtx (result_mode);
15661 emit_insn (gen_rtx_SET (temp,
15662 gen_rtx_IF_THEN_ELSE (result_mode,
15663 gen_rtx_GE (VOIDmode,
15664 op0, op1),
15665 true_cond, false_cond)));
15666 true_cond = false_cond;
15667 false_cond = temp;
15668
15669 temp = gen_reg_rtx (compare_mode);
15670 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15671 op0 = temp;
15672 break;
15673
15674 default:
15675 gcc_unreachable ();
15676 }
15677
15678 emit_insn (gen_rtx_SET (dest,
15679 gen_rtx_IF_THEN_ELSE (result_mode,
15680 gen_rtx_GE (VOIDmode,
15681 op0, op1),
15682 true_cond, false_cond)));
15683 return true;
15684 }
15685
15686 /* Same as above, but for ints (isel). */
15687
15688 bool
15689 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15690 {
15691 rtx condition_rtx, cr;
15692 machine_mode mode = GET_MODE (dest);
15693 enum rtx_code cond_code;
15694 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
15695 bool signedp;
15696
15697 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
15698 return false;
15699
15700 /* We still have to do the compare, because isel doesn't do a
15701 compare, it just looks at the CRx bits set by a previous compare
15702 instruction. */
15703 condition_rtx = rs6000_generate_compare (op, mode);
15704 cond_code = GET_CODE (condition_rtx);
15705 cr = XEXP (condition_rtx, 0);
15706 signedp = GET_MODE (cr) == CCmode;
15707
15708 isel_func = (mode == SImode
15709 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
15710 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
15711
15712 switch (cond_code)
15713 {
15714 case LT: case GT: case LTU: case GTU: case EQ:
15715 /* isel handles these directly. */
15716 break;
15717
15718 default:
15719 /* We need to swap the sense of the comparison. */
15720 {
15721 std::swap (false_cond, true_cond);
15722 PUT_CODE (condition_rtx, reverse_condition (cond_code));
15723 }
15724 break;
15725 }
15726
15727 false_cond = force_reg (mode, false_cond);
15728 if (true_cond != const0_rtx)
15729 true_cond = force_reg (mode, true_cond);
15730
15731 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
15732
15733 return true;
15734 }
15735
15736 void
15737 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
15738 {
15739 machine_mode mode = GET_MODE (op0);
15740 enum rtx_code c;
15741 rtx target;
15742
15743 /* VSX/altivec have direct min/max insns. */
15744 if ((code == SMAX || code == SMIN)
15745 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
15746 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
15747 {
15748 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
15749 return;
15750 }
15751
15752 if (code == SMAX || code == SMIN)
15753 c = GE;
15754 else
15755 c = GEU;
15756
15757 if (code == SMAX || code == UMAX)
15758 target = emit_conditional_move (dest, c, op0, op1, mode,
15759 op0, op1, mode, 0);
15760 else
15761 target = emit_conditional_move (dest, c, op0, op1, mode,
15762 op1, op0, mode, 0);
15763 gcc_assert (target);
15764 if (target != dest)
15765 emit_move_insn (dest, target);
15766 }
15767
15768 /* A subroutine of the atomic operation splitters. Jump to LABEL if
15769 COND is true. Mark the jump as unlikely to be taken. */
15770
15771 static void
15772 emit_unlikely_jump (rtx cond, rtx label)
15773 {
15774 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
15775 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
15776 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
15777 }
15778
15779 /* A subroutine of the atomic operation splitters. Emit a load-locked
15780 instruction in MODE. For QI/HImode, possibly use a pattern than includes
15781 the zero_extend operation. */
15782
15783 static void
15784 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
15785 {
15786 rtx (*fn) (rtx, rtx) = NULL;
15787
15788 switch (mode)
15789 {
15790 case E_QImode:
15791 fn = gen_load_lockedqi;
15792 break;
15793 case E_HImode:
15794 fn = gen_load_lockedhi;
15795 break;
15796 case E_SImode:
15797 if (GET_MODE (mem) == QImode)
15798 fn = gen_load_lockedqi_si;
15799 else if (GET_MODE (mem) == HImode)
15800 fn = gen_load_lockedhi_si;
15801 else
15802 fn = gen_load_lockedsi;
15803 break;
15804 case E_DImode:
15805 fn = gen_load_lockeddi;
15806 break;
15807 case E_TImode:
15808 fn = gen_load_lockedti;
15809 break;
15810 default:
15811 gcc_unreachable ();
15812 }
15813 emit_insn (fn (reg, mem));
15814 }
15815
15816 /* A subroutine of the atomic operation splitters. Emit a store-conditional
15817 instruction in MODE. */
15818
15819 static void
15820 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
15821 {
15822 rtx (*fn) (rtx, rtx, rtx) = NULL;
15823
15824 switch (mode)
15825 {
15826 case E_QImode:
15827 fn = gen_store_conditionalqi;
15828 break;
15829 case E_HImode:
15830 fn = gen_store_conditionalhi;
15831 break;
15832 case E_SImode:
15833 fn = gen_store_conditionalsi;
15834 break;
15835 case E_DImode:
15836 fn = gen_store_conditionaldi;
15837 break;
15838 case E_TImode:
15839 fn = gen_store_conditionalti;
15840 break;
15841 default:
15842 gcc_unreachable ();
15843 }
15844
15845 /* Emit sync before stwcx. to address PPC405 Erratum. */
15846 if (PPC405_ERRATUM77)
15847 emit_insn (gen_hwsync ());
15848
15849 emit_insn (fn (res, mem, val));
15850 }
15851
15852 /* Expand barriers before and after a load_locked/store_cond sequence. */
15853
15854 static rtx
15855 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
15856 {
15857 rtx addr = XEXP (mem, 0);
15858
15859 if (!legitimate_indirect_address_p (addr, reload_completed)
15860 && !legitimate_indexed_address_p (addr, reload_completed))
15861 {
15862 addr = force_reg (Pmode, addr);
15863 mem = replace_equiv_address_nv (mem, addr);
15864 }
15865
15866 switch (model)
15867 {
15868 case MEMMODEL_RELAXED:
15869 case MEMMODEL_CONSUME:
15870 case MEMMODEL_ACQUIRE:
15871 break;
15872 case MEMMODEL_RELEASE:
15873 case MEMMODEL_ACQ_REL:
15874 emit_insn (gen_lwsync ());
15875 break;
15876 case MEMMODEL_SEQ_CST:
15877 emit_insn (gen_hwsync ());
15878 break;
15879 default:
15880 gcc_unreachable ();
15881 }
15882 return mem;
15883 }
15884
15885 static void
15886 rs6000_post_atomic_barrier (enum memmodel model)
15887 {
15888 switch (model)
15889 {
15890 case MEMMODEL_RELAXED:
15891 case MEMMODEL_CONSUME:
15892 case MEMMODEL_RELEASE:
15893 break;
15894 case MEMMODEL_ACQUIRE:
15895 case MEMMODEL_ACQ_REL:
15896 case MEMMODEL_SEQ_CST:
15897 emit_insn (gen_isync ());
15898 break;
15899 default:
15900 gcc_unreachable ();
15901 }
15902 }
15903
15904 /* A subroutine of the various atomic expanders. For sub-word operations,
15905 we must adjust things to operate on SImode. Given the original MEM,
15906 return a new aligned memory. Also build and return the quantities by
15907 which to shift and mask. */
15908
15909 static rtx
15910 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
15911 {
15912 rtx addr, align, shift, mask, mem;
15913 HOST_WIDE_INT shift_mask;
15914 machine_mode mode = GET_MODE (orig_mem);
15915
15916 /* For smaller modes, we have to implement this via SImode. */
15917 shift_mask = (mode == QImode ? 0x18 : 0x10);
15918
15919 addr = XEXP (orig_mem, 0);
15920 addr = force_reg (GET_MODE (addr), addr);
15921
15922 /* Aligned memory containing subword. Generate a new memory. We
15923 do not want any of the existing MEM_ATTR data, as we're now
15924 accessing memory outside the original object. */
15925 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
15926 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15927 mem = gen_rtx_MEM (SImode, align);
15928 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
15929 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
15930 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
15931
15932 /* Shift amount for subword relative to aligned word. */
15933 shift = gen_reg_rtx (SImode);
15934 addr = gen_lowpart (SImode, addr);
15935 rtx tmp = gen_reg_rtx (SImode);
15936 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
15937 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
15938 if (BYTES_BIG_ENDIAN)
15939 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
15940 shift, 1, OPTAB_LIB_WIDEN);
15941 *pshift = shift;
15942
15943 /* Mask for insertion. */
15944 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
15945 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
15946 *pmask = mask;
15947
15948 return mem;
15949 }
15950
15951 /* A subroutine of the various atomic expanders. For sub-word operands,
15952 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
15953
15954 static rtx
15955 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
15956 {
15957 rtx x;
15958
15959 x = gen_reg_rtx (SImode);
15960 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
15961 gen_rtx_NOT (SImode, mask),
15962 oldval)));
15963
15964 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
15965
15966 return x;
15967 }
15968
15969 /* A subroutine of the various atomic expanders. For sub-word operands,
15970 extract WIDE to NARROW via SHIFT. */
15971
15972 static void
15973 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
15974 {
15975 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
15976 wide, 1, OPTAB_LIB_WIDEN);
15977 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
15978 }
15979
15980 /* Expand an atomic compare and swap operation. */
15981
15982 void
15983 rs6000_expand_atomic_compare_and_swap (rtx operands[])
15984 {
15985 rtx boolval, retval, mem, oldval, newval, cond;
15986 rtx label1, label2, x, mask, shift;
15987 machine_mode mode, orig_mode;
15988 enum memmodel mod_s, mod_f;
15989 bool is_weak;
15990
15991 boolval = operands[0];
15992 retval = operands[1];
15993 mem = operands[2];
15994 oldval = operands[3];
15995 newval = operands[4];
15996 is_weak = (INTVAL (operands[5]) != 0);
15997 mod_s = memmodel_base (INTVAL (operands[6]));
15998 mod_f = memmodel_base (INTVAL (operands[7]));
15999 orig_mode = mode = GET_MODE (mem);
16000
16001 mask = shift = NULL_RTX;
16002 if (mode == QImode || mode == HImode)
16003 {
16004 /* Before power8, we didn't have access to lbarx/lharx, so generate a
16005 lwarx and shift/mask operations. With power8, we need to do the
16006 comparison in SImode, but the store is still done in QI/HImode. */
16007 oldval = convert_modes (SImode, mode, oldval, 1);
16008
16009 if (!TARGET_SYNC_HI_QI)
16010 {
16011 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16012
16013 /* Shift and mask OLDVAL into position with the word. */
16014 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
16015 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16016
16017 /* Shift and mask NEWVAL into position within the word. */
16018 newval = convert_modes (SImode, mode, newval, 1);
16019 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
16020 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16021 }
16022
16023 /* Prepare to adjust the return value. */
16024 retval = gen_reg_rtx (SImode);
16025 mode = SImode;
16026 }
16027 else if (reg_overlap_mentioned_p (retval, oldval))
16028 oldval = copy_to_reg (oldval);
16029
16030 if (mode != TImode && !reg_or_short_operand (oldval, mode))
16031 oldval = copy_to_mode_reg (mode, oldval);
16032
16033 if (reg_overlap_mentioned_p (retval, newval))
16034 newval = copy_to_reg (newval);
16035
16036 mem = rs6000_pre_atomic_barrier (mem, mod_s);
16037
16038 label1 = NULL_RTX;
16039 if (!is_weak)
16040 {
16041 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16042 emit_label (XEXP (label1, 0));
16043 }
16044 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16045
16046 emit_load_locked (mode, retval, mem);
16047
16048 x = retval;
16049 if (mask)
16050 x = expand_simple_binop (SImode, AND, retval, mask,
16051 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16052
16053 cond = gen_reg_rtx (CCmode);
16054 /* If we have TImode, synthesize a comparison. */
16055 if (mode != TImode)
16056 x = gen_rtx_COMPARE (CCmode, x, oldval);
16057 else
16058 {
16059 rtx xor1_result = gen_reg_rtx (DImode);
16060 rtx xor2_result = gen_reg_rtx (DImode);
16061 rtx or_result = gen_reg_rtx (DImode);
16062 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
16063 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
16064 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
16065 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
16066
16067 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
16068 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
16069 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
16070 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
16071 }
16072
16073 emit_insn (gen_rtx_SET (cond, x));
16074
16075 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16076 emit_unlikely_jump (x, label2);
16077
16078 x = newval;
16079 if (mask)
16080 x = rs6000_mask_atomic_subword (retval, newval, mask);
16081
16082 emit_store_conditional (orig_mode, cond, mem, x);
16083
16084 if (!is_weak)
16085 {
16086 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16087 emit_unlikely_jump (x, label1);
16088 }
16089
16090 if (!is_mm_relaxed (mod_f))
16091 emit_label (XEXP (label2, 0));
16092
16093 rs6000_post_atomic_barrier (mod_s);
16094
16095 if (is_mm_relaxed (mod_f))
16096 emit_label (XEXP (label2, 0));
16097
16098 if (shift)
16099 rs6000_finish_atomic_subword (operands[1], retval, shift);
16100 else if (mode != GET_MODE (operands[1]))
16101 convert_move (operands[1], retval, 1);
16102
16103 /* In all cases, CR0 contains EQ on success, and NE on failure. */
16104 x = gen_rtx_EQ (SImode, cond, const0_rtx);
16105 emit_insn (gen_rtx_SET (boolval, x));
16106 }
16107
16108 /* Expand an atomic exchange operation. */
16109
16110 void
16111 rs6000_expand_atomic_exchange (rtx operands[])
16112 {
16113 rtx retval, mem, val, cond;
16114 machine_mode mode;
16115 enum memmodel model;
16116 rtx label, x, mask, shift;
16117
16118 retval = operands[0];
16119 mem = operands[1];
16120 val = operands[2];
16121 model = memmodel_base (INTVAL (operands[3]));
16122 mode = GET_MODE (mem);
16123
16124 mask = shift = NULL_RTX;
16125 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
16126 {
16127 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16128
16129 /* Shift and mask VAL into position with the word. */
16130 val = convert_modes (SImode, mode, val, 1);
16131 val = expand_simple_binop (SImode, ASHIFT, val, shift,
16132 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16133
16134 /* Prepare to adjust the return value. */
16135 retval = gen_reg_rtx (SImode);
16136 mode = SImode;
16137 }
16138
16139 mem = rs6000_pre_atomic_barrier (mem, model);
16140
16141 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16142 emit_label (XEXP (label, 0));
16143
16144 emit_load_locked (mode, retval, mem);
16145
16146 x = val;
16147 if (mask)
16148 x = rs6000_mask_atomic_subword (retval, val, mask);
16149
16150 cond = gen_reg_rtx (CCmode);
16151 emit_store_conditional (mode, cond, mem, x);
16152
16153 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16154 emit_unlikely_jump (x, label);
16155
16156 rs6000_post_atomic_barrier (model);
16157
16158 if (shift)
16159 rs6000_finish_atomic_subword (operands[0], retval, shift);
16160 }
16161
16162 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
16163 to perform. MEM is the memory on which to operate. VAL is the second
16164 operand of the binary operator. BEFORE and AFTER are optional locations to
16165 return the value of MEM either before of after the operation. MODEL_RTX
16166 is a CONST_INT containing the memory model to use. */
16167
16168 void
16169 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
16170 rtx orig_before, rtx orig_after, rtx model_rtx)
16171 {
16172 enum memmodel model = memmodel_base (INTVAL (model_rtx));
16173 machine_mode mode = GET_MODE (mem);
16174 machine_mode store_mode = mode;
16175 rtx label, x, cond, mask, shift;
16176 rtx before = orig_before, after = orig_after;
16177
16178 mask = shift = NULL_RTX;
16179 /* On power8, we want to use SImode for the operation. On previous systems,
16180 use the operation in a subword and shift/mask to get the proper byte or
16181 halfword. */
16182 if (mode == QImode || mode == HImode)
16183 {
16184 if (TARGET_SYNC_HI_QI)
16185 {
16186 val = convert_modes (SImode, mode, val, 1);
16187
16188 /* Prepare to adjust the return value. */
16189 before = gen_reg_rtx (SImode);
16190 if (after)
16191 after = gen_reg_rtx (SImode);
16192 mode = SImode;
16193 }
16194 else
16195 {
16196 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16197
16198 /* Shift and mask VAL into position with the word. */
16199 val = convert_modes (SImode, mode, val, 1);
16200 val = expand_simple_binop (SImode, ASHIFT, val, shift,
16201 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16202
16203 switch (code)
16204 {
16205 case IOR:
16206 case XOR:
16207 /* We've already zero-extended VAL. That is sufficient to
16208 make certain that it does not affect other bits. */
16209 mask = NULL;
16210 break;
16211
16212 case AND:
16213 /* If we make certain that all of the other bits in VAL are
16214 set, that will be sufficient to not affect other bits. */
16215 x = gen_rtx_NOT (SImode, mask);
16216 x = gen_rtx_IOR (SImode, x, val);
16217 emit_insn (gen_rtx_SET (val, x));
16218 mask = NULL;
16219 break;
16220
16221 case NOT:
16222 case PLUS:
16223 case MINUS:
16224 /* These will all affect bits outside the field and need
16225 adjustment via MASK within the loop. */
16226 break;
16227
16228 default:
16229 gcc_unreachable ();
16230 }
16231
16232 /* Prepare to adjust the return value. */
16233 before = gen_reg_rtx (SImode);
16234 if (after)
16235 after = gen_reg_rtx (SImode);
16236 store_mode = mode = SImode;
16237 }
16238 }
16239
16240 mem = rs6000_pre_atomic_barrier (mem, model);
16241
16242 label = gen_label_rtx ();
16243 emit_label (label);
16244 label = gen_rtx_LABEL_REF (VOIDmode, label);
16245
16246 if (before == NULL_RTX)
16247 before = gen_reg_rtx (mode);
16248
16249 emit_load_locked (mode, before, mem);
16250
16251 if (code == NOT)
16252 {
16253 x = expand_simple_binop (mode, AND, before, val,
16254 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16255 after = expand_simple_unop (mode, NOT, x, after, 1);
16256 }
16257 else
16258 {
16259 after = expand_simple_binop (mode, code, before, val,
16260 after, 1, OPTAB_LIB_WIDEN);
16261 }
16262
16263 x = after;
16264 if (mask)
16265 {
16266 x = expand_simple_binop (SImode, AND, after, mask,
16267 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16268 x = rs6000_mask_atomic_subword (before, x, mask);
16269 }
16270 else if (store_mode != mode)
16271 x = convert_modes (store_mode, mode, x, 1);
16272
16273 cond = gen_reg_rtx (CCmode);
16274 emit_store_conditional (store_mode, cond, mem, x);
16275
16276 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16277 emit_unlikely_jump (x, label);
16278
16279 rs6000_post_atomic_barrier (model);
16280
16281 if (shift)
16282 {
16283 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
16284 then do the calcuations in a SImode register. */
16285 if (orig_before)
16286 rs6000_finish_atomic_subword (orig_before, before, shift);
16287 if (orig_after)
16288 rs6000_finish_atomic_subword (orig_after, after, shift);
16289 }
16290 else if (store_mode != mode)
16291 {
16292 /* QImode/HImode on machines with lbarx/lharx where we do the native
16293 operation and then do the calcuations in a SImode register. */
16294 if (orig_before)
16295 convert_move (orig_before, before, 1);
16296 if (orig_after)
16297 convert_move (orig_after, after, 1);
16298 }
16299 else if (orig_after && after != orig_after)
16300 emit_move_insn (orig_after, after);
16301 }
16302
16303 /* Emit instructions to move SRC to DST. Called by splitters for
16304 multi-register moves. It will emit at most one instruction for
16305 each register that is accessed; that is, it won't emit li/lis pairs
16306 (or equivalent for 64-bit code). One of SRC or DST must be a hard
16307 register. */
16308
16309 void
16310 rs6000_split_multireg_move (rtx dst, rtx src)
16311 {
16312 /* The register number of the first register being moved. */
16313 int reg;
16314 /* The mode that is to be moved. */
16315 machine_mode mode;
16316 /* The mode that the move is being done in, and its size. */
16317 machine_mode reg_mode;
16318 int reg_mode_size;
16319 /* The number of registers that will be moved. */
16320 int nregs;
16321
16322 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
16323 mode = GET_MODE (dst);
16324 nregs = hard_regno_nregs (reg, mode);
16325
16326 /* If we have a vector quad register for MMA, and this is a load or store,
16327 see if we can use vector paired load/stores. */
16328 if (mode == XOmode && TARGET_MMA
16329 && (MEM_P (dst) || MEM_P (src)))
16330 {
16331 reg_mode = OOmode;
16332 nregs /= 2;
16333 }
16334 /* If we have a vector pair/quad mode, split it into two/four separate
16335 vectors. */
16336 else if (mode == OOmode || mode == XOmode)
16337 reg_mode = V1TImode;
16338 else if (FP_REGNO_P (reg))
16339 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
16340 (TARGET_HARD_FLOAT ? DFmode : SFmode);
16341 else if (ALTIVEC_REGNO_P (reg))
16342 reg_mode = V16QImode;
16343 else
16344 reg_mode = word_mode;
16345 reg_mode_size = GET_MODE_SIZE (reg_mode);
16346
16347 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
16348
16349 /* TDmode residing in FP registers is special, since the ISA requires that
16350 the lower-numbered word of a register pair is always the most significant
16351 word, even in little-endian mode. This does not match the usual subreg
16352 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
16353 the appropriate constituent registers "by hand" in little-endian mode.
16354
16355 Note we do not need to check for destructive overlap here since TDmode
16356 can only reside in even/odd register pairs. */
16357 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
16358 {
16359 rtx p_src, p_dst;
16360 int i;
16361
16362 for (i = 0; i < nregs; i++)
16363 {
16364 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
16365 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
16366 else
16367 p_src = simplify_gen_subreg (reg_mode, src, mode,
16368 i * reg_mode_size);
16369
16370 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
16371 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
16372 else
16373 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
16374 i * reg_mode_size);
16375
16376 emit_insn (gen_rtx_SET (p_dst, p_src));
16377 }
16378
16379 return;
16380 }
16381
16382 /* The __vector_pair and __vector_quad modes are multi-register
16383 modes, so if we have to load or store the registers, we have to be
16384 careful to properly swap them if we're in little endian mode
16385 below. This means the last register gets the first memory
16386 location. We also need to be careful of using the right register
16387 numbers if we are splitting XO to OO. */
16388 if (mode == OOmode || mode == XOmode)
16389 {
16390 nregs = hard_regno_nregs (reg, mode);
16391 int reg_mode_nregs = hard_regno_nregs (reg, reg_mode);
16392 if (MEM_P (dst))
16393 {
16394 unsigned offset = 0;
16395 unsigned size = GET_MODE_SIZE (reg_mode);
16396
16397 /* If we are reading an accumulator register, we have to
16398 deprime it before we can access it. */
16399 if (TARGET_MMA
16400 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
16401 emit_insn (gen_mma_xxmfacc (src, src));
16402
16403 for (int i = 0; i < nregs; i += reg_mode_nregs)
16404 {
16405 unsigned subreg =
16406 (WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i);
16407 rtx dst2 = adjust_address (dst, reg_mode, offset);
16408 rtx src2 = gen_rtx_REG (reg_mode, reg + subreg);
16409 offset += size;
16410 emit_insn (gen_rtx_SET (dst2, src2));
16411 }
16412
16413 return;
16414 }
16415
16416 if (MEM_P (src))
16417 {
16418 unsigned offset = 0;
16419 unsigned size = GET_MODE_SIZE (reg_mode);
16420
16421 for (int i = 0; i < nregs; i += reg_mode_nregs)
16422 {
16423 unsigned subreg =
16424 (WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i);
16425 rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg);
16426 rtx src2 = adjust_address (src, reg_mode, offset);
16427 offset += size;
16428 emit_insn (gen_rtx_SET (dst2, src2));
16429 }
16430
16431 /* If we are writing an accumulator register, we have to
16432 prime it after we've written it. */
16433 if (TARGET_MMA
16434 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
16435 emit_insn (gen_mma_xxmtacc (dst, dst));
16436
16437 return;
16438 }
16439
16440 if (GET_CODE (src) == UNSPEC)
16441 {
16442 gcc_assert (XINT (src, 1) == UNSPEC_MMA_ASSEMBLE);
16443 gcc_assert (REG_P (dst));
16444 if (GET_MODE (src) == XOmode)
16445 gcc_assert (FP_REGNO_P (REGNO (dst)));
16446 if (GET_MODE (src) == OOmode)
16447 gcc_assert (VSX_REGNO_P (REGNO (dst)));
16448
16449 reg_mode = GET_MODE (XVECEXP (src, 0, 0));
16450 for (int i = 0; i < XVECLEN (src, 0); i++)
16451 {
16452 rtx dst_i = gen_rtx_REG (reg_mode, reg + i);
16453 emit_insn (gen_rtx_SET (dst_i, XVECEXP (src, 0, i)));
16454 }
16455
16456 /* We are writing an accumulator register, so we have to
16457 prime it after we've written it. */
16458 if (GET_MODE (src) == XOmode)
16459 emit_insn (gen_mma_xxmtacc (dst, dst));
16460
16461 return;
16462 }
16463
16464 /* Register -> register moves can use common code. */
16465 }
16466
16467 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
16468 {
16469 /* If we are reading an accumulator register, we have to
16470 deprime it before we can access it. */
16471 if (TARGET_MMA
16472 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
16473 emit_insn (gen_mma_xxmfacc (src, src));
16474
16475 /* Move register range backwards, if we might have destructive
16476 overlap. */
16477 int i;
16478 /* XO/OO are opaque so cannot use subregs. */
16479 if (mode == OOmode || mode == XOmode )
16480 {
16481 for (i = nregs - 1; i >= 0; i--)
16482 {
16483 rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i);
16484 rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i);
16485 emit_insn (gen_rtx_SET (dst_i, src_i));
16486 }
16487 }
16488 else
16489 {
16490 for (i = nregs - 1; i >= 0; i--)
16491 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
16492 i * reg_mode_size),
16493 simplify_gen_subreg (reg_mode, src, mode,
16494 i * reg_mode_size)));
16495 }
16496
16497 /* If we are writing an accumulator register, we have to
16498 prime it after we've written it. */
16499 if (TARGET_MMA
16500 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
16501 emit_insn (gen_mma_xxmtacc (dst, dst));
16502 }
16503 else
16504 {
16505 int i;
16506 int j = -1;
16507 bool used_update = false;
16508 rtx restore_basereg = NULL_RTX;
16509
16510 if (MEM_P (src) && INT_REGNO_P (reg))
16511 {
16512 rtx breg;
16513
16514 if (GET_CODE (XEXP (src, 0)) == PRE_INC
16515 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
16516 {
16517 rtx delta_rtx;
16518 breg = XEXP (XEXP (src, 0), 0);
16519 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
16520 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
16521 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
16522 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
16523 src = replace_equiv_address (src, breg);
16524 }
16525 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
16526 {
16527 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
16528 {
16529 rtx basereg = XEXP (XEXP (src, 0), 0);
16530 if (TARGET_UPDATE)
16531 {
16532 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
16533 emit_insn (gen_rtx_SET (ndst,
16534 gen_rtx_MEM (reg_mode,
16535 XEXP (src, 0))));
16536 used_update = true;
16537 }
16538 else
16539 emit_insn (gen_rtx_SET (basereg,
16540 XEXP (XEXP (src, 0), 1)));
16541 src = replace_equiv_address (src, basereg);
16542 }
16543 else
16544 {
16545 rtx basereg = gen_rtx_REG (Pmode, reg);
16546 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
16547 src = replace_equiv_address (src, basereg);
16548 }
16549 }
16550
16551 breg = XEXP (src, 0);
16552 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
16553 breg = XEXP (breg, 0);
16554
16555 /* If the base register we are using to address memory is
16556 also a destination reg, then change that register last. */
16557 if (REG_P (breg)
16558 && REGNO (breg) >= REGNO (dst)
16559 && REGNO (breg) < REGNO (dst) + nregs)
16560 j = REGNO (breg) - REGNO (dst);
16561 }
16562 else if (MEM_P (dst) && INT_REGNO_P (reg))
16563 {
16564 rtx breg;
16565
16566 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
16567 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
16568 {
16569 rtx delta_rtx;
16570 breg = XEXP (XEXP (dst, 0), 0);
16571 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
16572 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
16573 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
16574
16575 /* We have to update the breg before doing the store.
16576 Use store with update, if available. */
16577
16578 if (TARGET_UPDATE)
16579 {
16580 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
16581 emit_insn (TARGET_32BIT
16582 ? (TARGET_POWERPC64
16583 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
16584 : gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
16585 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
16586 used_update = true;
16587 }
16588 else
16589 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
16590 dst = replace_equiv_address (dst, breg);
16591 }
16592 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
16593 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
16594 {
16595 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
16596 {
16597 rtx basereg = XEXP (XEXP (dst, 0), 0);
16598 if (TARGET_UPDATE)
16599 {
16600 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
16601 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
16602 XEXP (dst, 0)),
16603 nsrc));
16604 used_update = true;
16605 }
16606 else
16607 emit_insn (gen_rtx_SET (basereg,
16608 XEXP (XEXP (dst, 0), 1)));
16609 dst = replace_equiv_address (dst, basereg);
16610 }
16611 else
16612 {
16613 rtx basereg = XEXP (XEXP (dst, 0), 0);
16614 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
16615 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
16616 && REG_P (basereg)
16617 && REG_P (offsetreg)
16618 && REGNO (basereg) != REGNO (offsetreg));
16619 if (REGNO (basereg) == 0)
16620 {
16621 rtx tmp = offsetreg;
16622 offsetreg = basereg;
16623 basereg = tmp;
16624 }
16625 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
16626 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
16627 dst = replace_equiv_address (dst, basereg);
16628 }
16629 }
16630 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
16631 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
16632 }
16633
16634 /* If we are reading an accumulator register, we have to
16635 deprime it before we can access it. */
16636 if (TARGET_MMA && REG_P (src)
16637 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
16638 emit_insn (gen_mma_xxmfacc (src, src));
16639
16640 for (i = 0; i < nregs; i++)
16641 {
16642 /* Calculate index to next subword. */
16643 ++j;
16644 if (j == nregs)
16645 j = 0;
16646
16647 /* If compiler already emitted move of first word by
16648 store with update, no need to do anything. */
16649 if (j == 0 && used_update)
16650 continue;
16651
16652 /* XO/OO are opaque so cannot use subregs. */
16653 if (mode == OOmode || mode == XOmode )
16654 {
16655 rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
16656 rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
16657 emit_insn (gen_rtx_SET (dst_i, src_i));
16658 }
16659 else
16660 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
16661 j * reg_mode_size),
16662 simplify_gen_subreg (reg_mode, src, mode,
16663 j * reg_mode_size)));
16664 }
16665
16666 /* If we are writing an accumulator register, we have to
16667 prime it after we've written it. */
16668 if (TARGET_MMA && REG_P (dst)
16669 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
16670 emit_insn (gen_mma_xxmtacc (dst, dst));
16671
16672 if (restore_basereg != NULL_RTX)
16673 emit_insn (restore_basereg);
16674 }
16675 }
16676
16677 static GTY(()) alias_set_type TOC_alias_set = -1;
16678
16679 alias_set_type
16680 get_TOC_alias_set (void)
16681 {
16682 if (TOC_alias_set == -1)
16683 TOC_alias_set = new_alias_set ();
16684 return TOC_alias_set;
16685 }
16686
16687 /* The mode the ABI uses for a word. This is not the same as word_mode
16688 for -m32 -mpowerpc64. This is used to implement various target hooks. */
16689
16690 static scalar_int_mode
16691 rs6000_abi_word_mode (void)
16692 {
16693 return TARGET_32BIT ? SImode : DImode;
16694 }
16695
16696 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
16697 static char *
16698 rs6000_offload_options (void)
16699 {
16700 if (TARGET_64BIT)
16701 return xstrdup ("-foffload-abi=lp64");
16702 else
16703 return xstrdup ("-foffload-abi=ilp32");
16704 }
16705
16706 \f
16707 /* A quick summary of the various types of 'constant-pool tables'
16708 under PowerPC:
16709
16710 Target Flags Name One table per
16711 AIX (none) AIX TOC object file
16712 AIX -mfull-toc AIX TOC object file
16713 AIX -mminimal-toc AIX minimal TOC translation unit
16714 SVR4/EABI (none) SVR4 SDATA object file
16715 SVR4/EABI -fpic SVR4 pic object file
16716 SVR4/EABI -fPIC SVR4 PIC translation unit
16717 SVR4/EABI -mrelocatable EABI TOC function
16718 SVR4/EABI -maix AIX TOC object file
16719 SVR4/EABI -maix -mminimal-toc
16720 AIX minimal TOC translation unit
16721
16722 Name Reg. Set by entries contains:
16723 made by addrs? fp? sum?
16724
16725 AIX TOC 2 crt0 as Y option option
16726 AIX minimal TOC 30 prolog gcc Y Y option
16727 SVR4 SDATA 13 crt0 gcc N Y N
16728 SVR4 pic 30 prolog ld Y not yet N
16729 SVR4 PIC 30 prolog gcc Y option option
16730 EABI TOC 30 prolog gcc Y option option
16731
16732 */
16733
16734 /* Hash functions for the hash table. */
16735
16736 static unsigned
16737 rs6000_hash_constant (rtx k)
16738 {
16739 enum rtx_code code = GET_CODE (k);
16740 machine_mode mode = GET_MODE (k);
16741 unsigned result = (code << 3) ^ mode;
16742 const char *format;
16743 int flen, fidx;
16744
16745 format = GET_RTX_FORMAT (code);
16746 flen = strlen (format);
16747 fidx = 0;
16748
16749 switch (code)
16750 {
16751 case LABEL_REF:
16752 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
16753
16754 case CONST_WIDE_INT:
16755 {
16756 int i;
16757 flen = CONST_WIDE_INT_NUNITS (k);
16758 for (i = 0; i < flen; i++)
16759 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
16760 return result;
16761 }
16762
16763 case CONST_DOUBLE:
16764 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
16765
16766 case CODE_LABEL:
16767 fidx = 3;
16768 break;
16769
16770 default:
16771 break;
16772 }
16773
16774 for (; fidx < flen; fidx++)
16775 switch (format[fidx])
16776 {
16777 case 's':
16778 {
16779 unsigned i, len;
16780 const char *str = XSTR (k, fidx);
16781 len = strlen (str);
16782 result = result * 613 + len;
16783 for (i = 0; i < len; i++)
16784 result = result * 613 + (unsigned) str[i];
16785 break;
16786 }
16787 case 'u':
16788 case 'e':
16789 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
16790 break;
16791 case 'i':
16792 case 'n':
16793 result = result * 613 + (unsigned) XINT (k, fidx);
16794 break;
16795 case 'w':
16796 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
16797 result = result * 613 + (unsigned) XWINT (k, fidx);
16798 else
16799 {
16800 size_t i;
16801 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
16802 result = result * 613 + (unsigned) (XWINT (k, fidx)
16803 >> CHAR_BIT * i);
16804 }
16805 break;
16806 case '0':
16807 break;
16808 default:
16809 gcc_unreachable ();
16810 }
16811
16812 return result;
16813 }
16814
16815 hashval_t
16816 toc_hasher::hash (toc_hash_struct *thc)
16817 {
16818 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
16819 }
16820
16821 /* Compare H1 and H2 for equivalence. */
16822
16823 bool
16824 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
16825 {
16826 rtx r1 = h1->key;
16827 rtx r2 = h2->key;
16828
16829 if (h1->key_mode != h2->key_mode)
16830 return 0;
16831
16832 return rtx_equal_p (r1, r2);
16833 }
16834
16835 /* These are the names given by the C++ front-end to vtables, and
16836 vtable-like objects. Ideally, this logic should not be here;
16837 instead, there should be some programmatic way of inquiring as
16838 to whether or not an object is a vtable. */
16839
16840 #define VTABLE_NAME_P(NAME) \
16841 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
16842 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
16843 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
16844 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
16845 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
16846
16847 #ifdef NO_DOLLAR_IN_LABEL
16848 /* Return a GGC-allocated character string translating dollar signs in
16849 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
16850
16851 const char *
16852 rs6000_xcoff_strip_dollar (const char *name)
16853 {
16854 char *strip, *p;
16855 const char *q;
16856 size_t len;
16857
16858 q = (const char *) strchr (name, '$');
16859
16860 if (q == 0 || q == name)
16861 return name;
16862
16863 len = strlen (name);
16864 strip = XALLOCAVEC (char, len + 1);
16865 strcpy (strip, name);
16866 p = strip + (q - name);
16867 while (p)
16868 {
16869 *p = '_';
16870 p = strchr (p + 1, '$');
16871 }
16872
16873 return ggc_alloc_string (strip, len);
16874 }
16875 #endif
16876
16877 void
16878 rs6000_output_symbol_ref (FILE *file, rtx x)
16879 {
16880 const char *name = XSTR (x, 0);
16881
16882 /* Currently C++ toc references to vtables can be emitted before it
16883 is decided whether the vtable is public or private. If this is
16884 the case, then the linker will eventually complain that there is
16885 a reference to an unknown section. Thus, for vtables only,
16886 we emit the TOC reference to reference the identifier and not the
16887 symbol. */
16888 if (VTABLE_NAME_P (name))
16889 {
16890 RS6000_OUTPUT_BASENAME (file, name);
16891 }
16892 else
16893 assemble_name (file, name);
16894 }
16895
16896 /* Output a TOC entry. We derive the entry name from what is being
16897 written. */
16898
16899 void
16900 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
16901 {
16902 char buf[256];
16903 const char *name = buf;
16904 rtx base = x;
16905 HOST_WIDE_INT offset = 0;
16906
16907 gcc_assert (!TARGET_NO_TOC_OR_PCREL);
16908
16909 /* When the linker won't eliminate them, don't output duplicate
16910 TOC entries (this happens on AIX if there is any kind of TOC,
16911 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
16912 CODE_LABELs. */
16913 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
16914 {
16915 struct toc_hash_struct *h;
16916
16917 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
16918 time because GGC is not initialized at that point. */
16919 if (toc_hash_table == NULL)
16920 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
16921
16922 h = ggc_alloc<toc_hash_struct> ();
16923 h->key = x;
16924 h->key_mode = mode;
16925 h->labelno = labelno;
16926
16927 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
16928 if (*found == NULL)
16929 *found = h;
16930 else /* This is indeed a duplicate.
16931 Set this label equal to that label. */
16932 {
16933 fputs ("\t.set ", file);
16934 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
16935 fprintf (file, "%d,", labelno);
16936 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
16937 fprintf (file, "%d\n", ((*found)->labelno));
16938
16939 #ifdef HAVE_AS_TLS
16940 if (TARGET_XCOFF && SYMBOL_REF_P (x)
16941 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
16942 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
16943 {
16944 fputs ("\t.set ", file);
16945 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
16946 fprintf (file, "%d,", labelno);
16947 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
16948 fprintf (file, "%d\n", ((*found)->labelno));
16949 }
16950 #endif
16951 return;
16952 }
16953 }
16954
16955 /* If we're going to put a double constant in the TOC, make sure it's
16956 aligned properly when strict alignment is on. */
16957 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
16958 && STRICT_ALIGNMENT
16959 && GET_MODE_BITSIZE (mode) >= 64
16960 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
16961 ASM_OUTPUT_ALIGN (file, 3);
16962 }
16963
16964 (*targetm.asm_out.internal_label) (file, "LC", labelno);
16965
16966 /* Handle FP constants specially. Note that if we have a minimal
16967 TOC, things we put here aren't actually in the TOC, so we can allow
16968 FP constants. */
16969 if (CONST_DOUBLE_P (x)
16970 && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
16971 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
16972 {
16973 long k[4];
16974
16975 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16976 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
16977 else
16978 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
16979
16980 if (TARGET_64BIT)
16981 {
16982 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16983 fputs (DOUBLE_INT_ASM_OP, file);
16984 else
16985 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
16986 k[0] & 0xffffffff, k[1] & 0xffffffff,
16987 k[2] & 0xffffffff, k[3] & 0xffffffff);
16988 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
16989 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
16990 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
16991 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
16992 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
16993 return;
16994 }
16995 else
16996 {
16997 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16998 fputs ("\t.long ", file);
16999 else
17000 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17001 k[0] & 0xffffffff, k[1] & 0xffffffff,
17002 k[2] & 0xffffffff, k[3] & 0xffffffff);
17003 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
17004 k[0] & 0xffffffff, k[1] & 0xffffffff,
17005 k[2] & 0xffffffff, k[3] & 0xffffffff);
17006 return;
17007 }
17008 }
17009 else if (CONST_DOUBLE_P (x)
17010 && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
17011 {
17012 long k[2];
17013
17014 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17015 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
17016 else
17017 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
17018
17019 if (TARGET_64BIT)
17020 {
17021 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17022 fputs (DOUBLE_INT_ASM_OP, file);
17023 else
17024 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
17025 k[0] & 0xffffffff, k[1] & 0xffffffff);
17026 fprintf (file, "0x%lx%08lx\n",
17027 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
17028 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
17029 return;
17030 }
17031 else
17032 {
17033 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17034 fputs ("\t.long ", file);
17035 else
17036 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
17037 k[0] & 0xffffffff, k[1] & 0xffffffff);
17038 fprintf (file, "0x%lx,0x%lx\n",
17039 k[0] & 0xffffffff, k[1] & 0xffffffff);
17040 return;
17041 }
17042 }
17043 else if (CONST_DOUBLE_P (x)
17044 && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
17045 {
17046 long l;
17047
17048 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17049 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
17050 else
17051 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17052
17053 if (TARGET_64BIT)
17054 {
17055 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17056 fputs (DOUBLE_INT_ASM_OP, file);
17057 else
17058 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
17059 if (WORDS_BIG_ENDIAN)
17060 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
17061 else
17062 fprintf (file, "0x%lx\n", l & 0xffffffff);
17063 return;
17064 }
17065 else
17066 {
17067 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17068 fputs ("\t.long ", file);
17069 else
17070 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
17071 fprintf (file, "0x%lx\n", l & 0xffffffff);
17072 return;
17073 }
17074 }
17075 else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
17076 {
17077 unsigned HOST_WIDE_INT low;
17078 HOST_WIDE_INT high;
17079
17080 low = INTVAL (x) & 0xffffffff;
17081 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
17082
17083 /* TOC entries are always Pmode-sized, so when big-endian
17084 smaller integer constants in the TOC need to be padded.
17085 (This is still a win over putting the constants in
17086 a separate constant pool, because then we'd have
17087 to have both a TOC entry _and_ the actual constant.)
17088
17089 For a 32-bit target, CONST_INT values are loaded and shifted
17090 entirely within `low' and can be stored in one TOC entry. */
17091
17092 /* It would be easy to make this work, but it doesn't now. */
17093 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
17094
17095 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
17096 {
17097 low |= high << 32;
17098 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
17099 high = (HOST_WIDE_INT) low >> 32;
17100 low &= 0xffffffff;
17101 }
17102
17103 if (TARGET_64BIT)
17104 {
17105 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17106 fputs (DOUBLE_INT_ASM_OP, file);
17107 else
17108 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
17109 (long) high & 0xffffffff, (long) low & 0xffffffff);
17110 fprintf (file, "0x%lx%08lx\n",
17111 (long) high & 0xffffffff, (long) low & 0xffffffff);
17112 return;
17113 }
17114 else
17115 {
17116 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
17117 {
17118 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17119 fputs ("\t.long ", file);
17120 else
17121 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
17122 (long) high & 0xffffffff, (long) low & 0xffffffff);
17123 fprintf (file, "0x%lx,0x%lx\n",
17124 (long) high & 0xffffffff, (long) low & 0xffffffff);
17125 }
17126 else
17127 {
17128 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17129 fputs ("\t.long ", file);
17130 else
17131 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
17132 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
17133 }
17134 return;
17135 }
17136 }
17137
17138 if (GET_CODE (x) == CONST)
17139 {
17140 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
17141 && CONST_INT_P (XEXP (XEXP (x, 0), 1)));
17142
17143 base = XEXP (XEXP (x, 0), 0);
17144 offset = INTVAL (XEXP (XEXP (x, 0), 1));
17145 }
17146
17147 switch (GET_CODE (base))
17148 {
17149 case SYMBOL_REF:
17150 name = XSTR (base, 0);
17151 break;
17152
17153 case LABEL_REF:
17154 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
17155 CODE_LABEL_NUMBER (XEXP (base, 0)));
17156 break;
17157
17158 case CODE_LABEL:
17159 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
17160 break;
17161
17162 default:
17163 gcc_unreachable ();
17164 }
17165
17166 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17167 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
17168 else
17169 {
17170 fputs ("\t.tc ", file);
17171 RS6000_OUTPUT_BASENAME (file, name);
17172
17173 if (offset < 0)
17174 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
17175 else if (offset)
17176 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
17177
17178 /* Mark large TOC symbols on AIX with [TE] so they are mapped
17179 after other TOC symbols, reducing overflow of small TOC access
17180 to [TC] symbols. */
17181 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
17182 ? "[TE]," : "[TC],", file);
17183 }
17184
17185 /* Currently C++ toc references to vtables can be emitted before it
17186 is decided whether the vtable is public or private. If this is
17187 the case, then the linker will eventually complain that there is
17188 a TOC reference to an unknown section. Thus, for vtables only,
17189 we emit the TOC reference to reference the symbol and not the
17190 section. */
17191 if (VTABLE_NAME_P (name))
17192 {
17193 RS6000_OUTPUT_BASENAME (file, name);
17194 if (offset < 0)
17195 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
17196 else if (offset > 0)
17197 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
17198 }
17199 else
17200 output_addr_const (file, x);
17201
17202 #if HAVE_AS_TLS
17203 if (TARGET_XCOFF && SYMBOL_REF_P (base))
17204 {
17205 switch (SYMBOL_REF_TLS_MODEL (base))
17206 {
17207 case 0:
17208 break;
17209 case TLS_MODEL_LOCAL_EXEC:
17210 fputs ("@le", file);
17211 break;
17212 case TLS_MODEL_INITIAL_EXEC:
17213 fputs ("@ie", file);
17214 break;
17215 /* Use global-dynamic for local-dynamic. */
17216 case TLS_MODEL_GLOBAL_DYNAMIC:
17217 case TLS_MODEL_LOCAL_DYNAMIC:
17218 putc ('\n', file);
17219 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
17220 fputs ("\t.tc .", file);
17221 RS6000_OUTPUT_BASENAME (file, name);
17222 fputs ("[TC],", file);
17223 output_addr_const (file, x);
17224 fputs ("@m", file);
17225 break;
17226 default:
17227 gcc_unreachable ();
17228 }
17229 }
17230 #endif
17231
17232 putc ('\n', file);
17233 }
17234 \f
17235 /* Output an assembler pseudo-op to write an ASCII string of N characters
17236 starting at P to FILE.
17237
17238 On the RS/6000, we have to do this using the .byte operation and
17239 write out special characters outside the quoted string.
17240 Also, the assembler is broken; very long strings are truncated,
17241 so we must artificially break them up early. */
17242
17243 void
17244 output_ascii (FILE *file, const char *p, int n)
17245 {
17246 char c;
17247 int i, count_string;
17248 const char *for_string = "\t.byte \"";
17249 const char *for_decimal = "\t.byte ";
17250 const char *to_close = NULL;
17251
17252 count_string = 0;
17253 for (i = 0; i < n; i++)
17254 {
17255 c = *p++;
17256 if (c >= ' ' && c < 0177)
17257 {
17258 if (for_string)
17259 fputs (for_string, file);
17260 putc (c, file);
17261
17262 /* Write two quotes to get one. */
17263 if (c == '"')
17264 {
17265 putc (c, file);
17266 ++count_string;
17267 }
17268
17269 for_string = NULL;
17270 for_decimal = "\"\n\t.byte ";
17271 to_close = "\"\n";
17272 ++count_string;
17273
17274 if (count_string >= 512)
17275 {
17276 fputs (to_close, file);
17277
17278 for_string = "\t.byte \"";
17279 for_decimal = "\t.byte ";
17280 to_close = NULL;
17281 count_string = 0;
17282 }
17283 }
17284 else
17285 {
17286 if (for_decimal)
17287 fputs (for_decimal, file);
17288 fprintf (file, "%d", c);
17289
17290 for_string = "\n\t.byte \"";
17291 for_decimal = ", ";
17292 to_close = "\n";
17293 count_string = 0;
17294 }
17295 }
17296
17297 /* Now close the string if we have written one. Then end the line. */
17298 if (to_close)
17299 fputs (to_close, file);
17300 }
17301 \f
17302 /* Generate a unique section name for FILENAME for a section type
17303 represented by SECTION_DESC. Output goes into BUF.
17304
17305 SECTION_DESC can be any string, as long as it is different for each
17306 possible section type.
17307
17308 We name the section in the same manner as xlc. The name begins with an
17309 underscore followed by the filename (after stripping any leading directory
17310 names) with the last period replaced by the string SECTION_DESC. If
17311 FILENAME does not contain a period, SECTION_DESC is appended to the end of
17312 the name. */
17313
17314 void
17315 rs6000_gen_section_name (char **buf, const char *filename,
17316 const char *section_desc)
17317 {
17318 const char *q, *after_last_slash, *last_period = 0;
17319 char *p;
17320 int len;
17321
17322 after_last_slash = filename;
17323 for (q = filename; *q; q++)
17324 {
17325 if (*q == '/')
17326 after_last_slash = q + 1;
17327 else if (*q == '.')
17328 last_period = q;
17329 }
17330
17331 len = strlen (after_last_slash) + strlen (section_desc) + 2;
17332 *buf = (char *) xmalloc (len);
17333
17334 p = *buf;
17335 *p++ = '_';
17336
17337 for (q = after_last_slash; *q; q++)
17338 {
17339 if (q == last_period)
17340 {
17341 strcpy (p, section_desc);
17342 p += strlen (section_desc);
17343 break;
17344 }
17345
17346 else if (ISALNUM (*q))
17347 *p++ = *q;
17348 }
17349
17350 if (last_period == 0)
17351 strcpy (p, section_desc);
17352 else
17353 *p = '\0';
17354 }
17355 \f
17356 /* Emit profile function. */
17357
17358 void
17359 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
17360 {
17361 /* Non-standard profiling for kernels, which just saves LR then calls
17362 _mcount without worrying about arg saves. The idea is to change
17363 the function prologue as little as possible as it isn't easy to
17364 account for arg save/restore code added just for _mcount. */
17365 if (TARGET_PROFILE_KERNEL)
17366 return;
17367
17368 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
17369 {
17370 #ifndef NO_PROFILE_COUNTERS
17371 # define NO_PROFILE_COUNTERS 0
17372 #endif
17373 if (NO_PROFILE_COUNTERS)
17374 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
17375 LCT_NORMAL, VOIDmode);
17376 else
17377 {
17378 char buf[30];
17379 const char *label_name;
17380 rtx fun;
17381
17382 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
17383 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
17384 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
17385
17386 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
17387 LCT_NORMAL, VOIDmode, fun, Pmode);
17388 }
17389 }
17390 else if (DEFAULT_ABI == ABI_DARWIN)
17391 {
17392 const char *mcount_name = RS6000_MCOUNT;
17393 int caller_addr_regno = LR_REGNO;
17394
17395 /* Be conservative and always set this, at least for now. */
17396 crtl->uses_pic_offset_table = 1;
17397
17398 #if TARGET_MACHO
17399 /* For PIC code, set up a stub and collect the caller's address
17400 from r0, which is where the prologue puts it. */
17401 if (MACHOPIC_INDIRECT
17402 && crtl->uses_pic_offset_table)
17403 caller_addr_regno = 0;
17404 #endif
17405 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
17406 LCT_NORMAL, VOIDmode,
17407 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
17408 }
17409 }
17410
17411 /* Write function profiler code. */
17412
17413 void
17414 output_function_profiler (FILE *file, int labelno)
17415 {
17416 char buf[100];
17417
17418 switch (DEFAULT_ABI)
17419 {
17420 default:
17421 gcc_unreachable ();
17422
17423 case ABI_V4:
17424 if (!TARGET_32BIT)
17425 {
17426 warning (0, "no profiling of 64-bit code for this ABI");
17427 return;
17428 }
17429 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
17430 fprintf (file, "\tmflr %s\n", reg_names[0]);
17431 if (NO_PROFILE_COUNTERS)
17432 {
17433 asm_fprintf (file, "\tstw %s,4(%s)\n",
17434 reg_names[0], reg_names[1]);
17435 }
17436 else if (TARGET_SECURE_PLT && flag_pic)
17437 {
17438 if (TARGET_LINK_STACK)
17439 {
17440 char name[32];
17441 get_ppc476_thunk_name (name);
17442 asm_fprintf (file, "\tbl %s\n", name);
17443 }
17444 else
17445 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
17446 asm_fprintf (file, "\tstw %s,4(%s)\n",
17447 reg_names[0], reg_names[1]);
17448 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
17449 asm_fprintf (file, "\taddis %s,%s,",
17450 reg_names[12], reg_names[12]);
17451 assemble_name (file, buf);
17452 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
17453 assemble_name (file, buf);
17454 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
17455 }
17456 else if (flag_pic == 1)
17457 {
17458 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
17459 asm_fprintf (file, "\tstw %s,4(%s)\n",
17460 reg_names[0], reg_names[1]);
17461 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
17462 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
17463 assemble_name (file, buf);
17464 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
17465 }
17466 else if (flag_pic > 1)
17467 {
17468 asm_fprintf (file, "\tstw %s,4(%s)\n",
17469 reg_names[0], reg_names[1]);
17470 /* Now, we need to get the address of the label. */
17471 if (TARGET_LINK_STACK)
17472 {
17473 char name[32];
17474 get_ppc476_thunk_name (name);
17475 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
17476 assemble_name (file, buf);
17477 fputs ("-.\n1:", file);
17478 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
17479 asm_fprintf (file, "\taddi %s,%s,4\n",
17480 reg_names[11], reg_names[11]);
17481 }
17482 else
17483 {
17484 fputs ("\tbcl 20,31,1f\n\t.long ", file);
17485 assemble_name (file, buf);
17486 fputs ("-.\n1:", file);
17487 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
17488 }
17489 asm_fprintf (file, "\tlwz %s,0(%s)\n",
17490 reg_names[0], reg_names[11]);
17491 asm_fprintf (file, "\tadd %s,%s,%s\n",
17492 reg_names[0], reg_names[0], reg_names[11]);
17493 }
17494 else
17495 {
17496 asm_fprintf (file, "\tlis %s,", reg_names[12]);
17497 assemble_name (file, buf);
17498 fputs ("@ha\n", file);
17499 asm_fprintf (file, "\tstw %s,4(%s)\n",
17500 reg_names[0], reg_names[1]);
17501 asm_fprintf (file, "\tla %s,", reg_names[0]);
17502 assemble_name (file, buf);
17503 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
17504 }
17505
17506 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
17507 fprintf (file, "\tbl %s%s\n",
17508 RS6000_MCOUNT, flag_pic ? "@plt" : "");
17509 break;
17510
17511 case ABI_AIX:
17512 case ABI_ELFv2:
17513 case ABI_DARWIN:
17514 /* Don't do anything, done in output_profile_hook (). */
17515 break;
17516 }
17517 }
17518
17519 \f
17520
17521 /* The following variable value is the last issued insn. */
17522
17523 static rtx_insn *last_scheduled_insn;
17524
17525 /* The following variable helps to balance issuing of load and
17526 store instructions */
17527
17528 static int load_store_pendulum;
17529
17530 /* The following variable helps pair divide insns during scheduling. */
17531 static int divide_cnt;
17532 /* The following variable helps pair and alternate vector and vector load
17533 insns during scheduling. */
17534 static int vec_pairing;
17535
17536
17537 /* Power4 load update and store update instructions are cracked into a
17538 load or store and an integer insn which are executed in the same cycle.
17539 Branches have their own dispatch slot which does not count against the
17540 GCC issue rate, but it changes the program flow so there are no other
17541 instructions to issue in this cycle. */
17542
17543 static int
17544 rs6000_variable_issue_1 (rtx_insn *insn, int more)
17545 {
17546 last_scheduled_insn = insn;
17547 if (GET_CODE (PATTERN (insn)) == USE
17548 || GET_CODE (PATTERN (insn)) == CLOBBER)
17549 {
17550 cached_can_issue_more = more;
17551 return cached_can_issue_more;
17552 }
17553
17554 if (insn_terminates_group_p (insn, current_group))
17555 {
17556 cached_can_issue_more = 0;
17557 return cached_can_issue_more;
17558 }
17559
17560 /* If no reservation, but reach here */
17561 if (recog_memoized (insn) < 0)
17562 return more;
17563
17564 if (rs6000_sched_groups)
17565 {
17566 if (is_microcoded_insn (insn))
17567 cached_can_issue_more = 0;
17568 else if (is_cracked_insn (insn))
17569 cached_can_issue_more = more > 2 ? more - 2 : 0;
17570 else
17571 cached_can_issue_more = more - 1;
17572
17573 return cached_can_issue_more;
17574 }
17575
17576 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
17577 return 0;
17578
17579 cached_can_issue_more = more - 1;
17580 return cached_can_issue_more;
17581 }
17582
17583 static int
17584 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
17585 {
17586 int r = rs6000_variable_issue_1 (insn, more);
17587 if (verbose)
17588 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
17589 return r;
17590 }
17591
17592 /* Adjust the cost of a scheduling dependency. Return the new cost of
17593 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
17594
17595 static int
17596 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
17597 unsigned int)
17598 {
17599 enum attr_type attr_type;
17600
17601 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
17602 return cost;
17603
17604 switch (dep_type)
17605 {
17606 case REG_DEP_TRUE:
17607 {
17608 /* Data dependency; DEP_INSN writes a register that INSN reads
17609 some cycles later. */
17610
17611 /* Separate a load from a narrower, dependent store. */
17612 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9
17613 || rs6000_tune == PROCESSOR_POWER10)
17614 && GET_CODE (PATTERN (insn)) == SET
17615 && GET_CODE (PATTERN (dep_insn)) == SET
17616 && MEM_P (XEXP (PATTERN (insn), 1))
17617 && MEM_P (XEXP (PATTERN (dep_insn), 0))
17618 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
17619 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
17620 return cost + 14;
17621
17622 attr_type = get_attr_type (insn);
17623
17624 switch (attr_type)
17625 {
17626 case TYPE_JMPREG:
17627 /* Tell the first scheduling pass about the latency between
17628 a mtctr and bctr (and mtlr and br/blr). The first
17629 scheduling pass will not know about this latency since
17630 the mtctr instruction, which has the latency associated
17631 to it, will be generated by reload. */
17632 return 4;
17633 case TYPE_BRANCH:
17634 /* Leave some extra cycles between a compare and its
17635 dependent branch, to inhibit expensive mispredicts. */
17636 if ((rs6000_tune == PROCESSOR_PPC603
17637 || rs6000_tune == PROCESSOR_PPC604
17638 || rs6000_tune == PROCESSOR_PPC604e
17639 || rs6000_tune == PROCESSOR_PPC620
17640 || rs6000_tune == PROCESSOR_PPC630
17641 || rs6000_tune == PROCESSOR_PPC750
17642 || rs6000_tune == PROCESSOR_PPC7400
17643 || rs6000_tune == PROCESSOR_PPC7450
17644 || rs6000_tune == PROCESSOR_PPCE5500
17645 || rs6000_tune == PROCESSOR_PPCE6500
17646 || rs6000_tune == PROCESSOR_POWER4
17647 || rs6000_tune == PROCESSOR_POWER5
17648 || rs6000_tune == PROCESSOR_POWER7
17649 || rs6000_tune == PROCESSOR_POWER8
17650 || rs6000_tune == PROCESSOR_POWER9
17651 || rs6000_tune == PROCESSOR_POWER10
17652 || rs6000_tune == PROCESSOR_CELL)
17653 && recog_memoized (dep_insn)
17654 && (INSN_CODE (dep_insn) >= 0))
17655
17656 switch (get_attr_type (dep_insn))
17657 {
17658 case TYPE_CMP:
17659 case TYPE_FPCOMPARE:
17660 case TYPE_CR_LOGICAL:
17661 return cost + 2;
17662 case TYPE_EXTS:
17663 case TYPE_MUL:
17664 if (get_attr_dot (dep_insn) == DOT_YES)
17665 return cost + 2;
17666 else
17667 break;
17668 case TYPE_SHIFT:
17669 if (get_attr_dot (dep_insn) == DOT_YES
17670 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
17671 return cost + 2;
17672 else
17673 break;
17674 default:
17675 break;
17676 }
17677 break;
17678
17679 case TYPE_STORE:
17680 case TYPE_FPSTORE:
17681 if ((rs6000_tune == PROCESSOR_POWER6)
17682 && recog_memoized (dep_insn)
17683 && (INSN_CODE (dep_insn) >= 0))
17684 {
17685
17686 if (GET_CODE (PATTERN (insn)) != SET)
17687 /* If this happens, we have to extend this to schedule
17688 optimally. Return default for now. */
17689 return cost;
17690
17691 /* Adjust the cost for the case where the value written
17692 by a fixed point operation is used as the address
17693 gen value on a store. */
17694 switch (get_attr_type (dep_insn))
17695 {
17696 case TYPE_LOAD:
17697 case TYPE_CNTLZ:
17698 {
17699 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17700 return get_attr_sign_extend (dep_insn)
17701 == SIGN_EXTEND_YES ? 6 : 4;
17702 break;
17703 }
17704 case TYPE_SHIFT:
17705 {
17706 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17707 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
17708 6 : 3;
17709 break;
17710 }
17711 case TYPE_INTEGER:
17712 case TYPE_ADD:
17713 case TYPE_LOGICAL:
17714 case TYPE_EXTS:
17715 case TYPE_INSERT:
17716 {
17717 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17718 return 3;
17719 break;
17720 }
17721 case TYPE_STORE:
17722 case TYPE_FPLOAD:
17723 case TYPE_FPSTORE:
17724 {
17725 if (get_attr_update (dep_insn) == UPDATE_YES
17726 && ! rs6000_store_data_bypass_p (dep_insn, insn))
17727 return 3;
17728 break;
17729 }
17730 case TYPE_MUL:
17731 {
17732 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17733 return 17;
17734 break;
17735 }
17736 case TYPE_DIV:
17737 {
17738 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17739 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17740 break;
17741 }
17742 default:
17743 break;
17744 }
17745 }
17746 break;
17747
17748 case TYPE_LOAD:
17749 if ((rs6000_tune == PROCESSOR_POWER6)
17750 && recog_memoized (dep_insn)
17751 && (INSN_CODE (dep_insn) >= 0))
17752 {
17753
17754 /* Adjust the cost for the case where the value written
17755 by a fixed point instruction is used within the address
17756 gen portion of a subsequent load(u)(x) */
17757 switch (get_attr_type (dep_insn))
17758 {
17759 case TYPE_LOAD:
17760 case TYPE_CNTLZ:
17761 {
17762 if (set_to_load_agen (dep_insn, insn))
17763 return get_attr_sign_extend (dep_insn)
17764 == SIGN_EXTEND_YES ? 6 : 4;
17765 break;
17766 }
17767 case TYPE_SHIFT:
17768 {
17769 if (set_to_load_agen (dep_insn, insn))
17770 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
17771 6 : 3;
17772 break;
17773 }
17774 case TYPE_INTEGER:
17775 case TYPE_ADD:
17776 case TYPE_LOGICAL:
17777 case TYPE_EXTS:
17778 case TYPE_INSERT:
17779 {
17780 if (set_to_load_agen (dep_insn, insn))
17781 return 3;
17782 break;
17783 }
17784 case TYPE_STORE:
17785 case TYPE_FPLOAD:
17786 case TYPE_FPSTORE:
17787 {
17788 if (get_attr_update (dep_insn) == UPDATE_YES
17789 && set_to_load_agen (dep_insn, insn))
17790 return 3;
17791 break;
17792 }
17793 case TYPE_MUL:
17794 {
17795 if (set_to_load_agen (dep_insn, insn))
17796 return 17;
17797 break;
17798 }
17799 case TYPE_DIV:
17800 {
17801 if (set_to_load_agen (dep_insn, insn))
17802 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17803 break;
17804 }
17805 default:
17806 break;
17807 }
17808 }
17809 break;
17810
17811 default:
17812 break;
17813 }
17814
17815 /* Fall out to return default cost. */
17816 }
17817 break;
17818
17819 case REG_DEP_OUTPUT:
17820 /* Output dependency; DEP_INSN writes a register that INSN writes some
17821 cycles later. */
17822 if ((rs6000_tune == PROCESSOR_POWER6)
17823 && recog_memoized (dep_insn)
17824 && (INSN_CODE (dep_insn) >= 0))
17825 {
17826 attr_type = get_attr_type (insn);
17827
17828 switch (attr_type)
17829 {
17830 case TYPE_FP:
17831 case TYPE_FPSIMPLE:
17832 if (get_attr_type (dep_insn) == TYPE_FP
17833 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
17834 return 1;
17835 break;
17836 default:
17837 break;
17838 }
17839 }
17840 /* Fall through, no cost for output dependency. */
17841 /* FALLTHRU */
17842
17843 case REG_DEP_ANTI:
17844 /* Anti dependency; DEP_INSN reads a register that INSN writes some
17845 cycles later. */
17846 return 0;
17847
17848 default:
17849 gcc_unreachable ();
17850 }
17851
17852 return cost;
17853 }
17854
17855 /* Debug version of rs6000_adjust_cost. */
17856
17857 static int
17858 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
17859 int cost, unsigned int dw)
17860 {
17861 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
17862
17863 if (ret != cost)
17864 {
17865 const char *dep;
17866
17867 switch (dep_type)
17868 {
17869 default: dep = "unknown depencency"; break;
17870 case REG_DEP_TRUE: dep = "data dependency"; break;
17871 case REG_DEP_OUTPUT: dep = "output dependency"; break;
17872 case REG_DEP_ANTI: dep = "anti depencency"; break;
17873 }
17874
17875 fprintf (stderr,
17876 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
17877 "%s, insn:\n", ret, cost, dep);
17878
17879 debug_rtx (insn);
17880 }
17881
17882 return ret;
17883 }
17884
17885 /* The function returns a true if INSN is microcoded.
17886 Return false otherwise. */
17887
17888 static bool
17889 is_microcoded_insn (rtx_insn *insn)
17890 {
17891 if (!insn || !NONDEBUG_INSN_P (insn)
17892 || GET_CODE (PATTERN (insn)) == USE
17893 || GET_CODE (PATTERN (insn)) == CLOBBER)
17894 return false;
17895
17896 if (rs6000_tune == PROCESSOR_CELL)
17897 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
17898
17899 if (rs6000_sched_groups
17900 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
17901 {
17902 enum attr_type type = get_attr_type (insn);
17903 if ((type == TYPE_LOAD
17904 && get_attr_update (insn) == UPDATE_YES
17905 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
17906 || ((type == TYPE_LOAD || type == TYPE_STORE)
17907 && get_attr_update (insn) == UPDATE_YES
17908 && get_attr_indexed (insn) == INDEXED_YES)
17909 || type == TYPE_MFCR)
17910 return true;
17911 }
17912
17913 return false;
17914 }
17915
17916 /* The function returns true if INSN is cracked into 2 instructions
17917 by the processor (and therefore occupies 2 issue slots). */
17918
17919 static bool
17920 is_cracked_insn (rtx_insn *insn)
17921 {
17922 if (!insn || !NONDEBUG_INSN_P (insn)
17923 || GET_CODE (PATTERN (insn)) == USE
17924 || GET_CODE (PATTERN (insn)) == CLOBBER)
17925 return false;
17926
17927 if (rs6000_sched_groups
17928 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
17929 {
17930 enum attr_type type = get_attr_type (insn);
17931 if ((type == TYPE_LOAD
17932 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
17933 && get_attr_update (insn) == UPDATE_NO)
17934 || (type == TYPE_LOAD
17935 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
17936 && get_attr_update (insn) == UPDATE_YES
17937 && get_attr_indexed (insn) == INDEXED_NO)
17938 || (type == TYPE_STORE
17939 && get_attr_update (insn) == UPDATE_YES
17940 && get_attr_indexed (insn) == INDEXED_NO)
17941 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
17942 && get_attr_update (insn) == UPDATE_YES)
17943 || (type == TYPE_CR_LOGICAL
17944 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
17945 || (type == TYPE_EXTS
17946 && get_attr_dot (insn) == DOT_YES)
17947 || (type == TYPE_SHIFT
17948 && get_attr_dot (insn) == DOT_YES
17949 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
17950 || (type == TYPE_MUL
17951 && get_attr_dot (insn) == DOT_YES)
17952 || type == TYPE_DIV
17953 || (type == TYPE_INSERT
17954 && get_attr_size (insn) == SIZE_32))
17955 return true;
17956 }
17957
17958 return false;
17959 }
17960
17961 /* The function returns true if INSN can be issued only from
17962 the branch slot. */
17963
17964 static bool
17965 is_branch_slot_insn (rtx_insn *insn)
17966 {
17967 if (!insn || !NONDEBUG_INSN_P (insn)
17968 || GET_CODE (PATTERN (insn)) == USE
17969 || GET_CODE (PATTERN (insn)) == CLOBBER)
17970 return false;
17971
17972 if (rs6000_sched_groups)
17973 {
17974 enum attr_type type = get_attr_type (insn);
17975 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
17976 return true;
17977 return false;
17978 }
17979
17980 return false;
17981 }
17982
17983 /* The function returns true if out_inst sets a value that is
17984 used in the address generation computation of in_insn */
17985 static bool
17986 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
17987 {
17988 rtx out_set, in_set;
17989
17990 /* For performance reasons, only handle the simple case where
17991 both loads are a single_set. */
17992 out_set = single_set (out_insn);
17993 if (out_set)
17994 {
17995 in_set = single_set (in_insn);
17996 if (in_set)
17997 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
17998 }
17999
18000 return false;
18001 }
18002
18003 /* Try to determine base/offset/size parts of the given MEM.
18004 Return true if successful, false if all the values couldn't
18005 be determined.
18006
18007 This function only looks for REG or REG+CONST address forms.
18008 REG+REG address form will return false. */
18009
18010 static bool
18011 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
18012 HOST_WIDE_INT *size)
18013 {
18014 rtx addr_rtx;
18015 if MEM_SIZE_KNOWN_P (mem)
18016 *size = MEM_SIZE (mem);
18017 else
18018 return false;
18019
18020 addr_rtx = (XEXP (mem, 0));
18021 if (GET_CODE (addr_rtx) == PRE_MODIFY)
18022 addr_rtx = XEXP (addr_rtx, 1);
18023
18024 *offset = 0;
18025 while (GET_CODE (addr_rtx) == PLUS
18026 && CONST_INT_P (XEXP (addr_rtx, 1)))
18027 {
18028 *offset += INTVAL (XEXP (addr_rtx, 1));
18029 addr_rtx = XEXP (addr_rtx, 0);
18030 }
18031 if (!REG_P (addr_rtx))
18032 return false;
18033
18034 *base = addr_rtx;
18035 return true;
18036 }
18037
18038 /* The function returns true if the target storage location of
18039 mem1 is adjacent to the target storage location of mem2 */
18040 /* Return 1 if memory locations are adjacent. */
18041
18042 static bool
18043 adjacent_mem_locations (rtx mem1, rtx mem2)
18044 {
18045 rtx reg1, reg2;
18046 HOST_WIDE_INT off1, size1, off2, size2;
18047
18048 if (get_memref_parts (mem1, &reg1, &off1, &size1)
18049 && get_memref_parts (mem2, &reg2, &off2, &size2))
18050 return ((REGNO (reg1) == REGNO (reg2))
18051 && ((off1 + size1 == off2)
18052 || (off2 + size2 == off1)));
18053
18054 return false;
18055 }
18056
18057 /* This function returns true if it can be determined that the two MEM
18058 locations overlap by at least 1 byte based on base reg/offset/size. */
18059
18060 static bool
18061 mem_locations_overlap (rtx mem1, rtx mem2)
18062 {
18063 rtx reg1, reg2;
18064 HOST_WIDE_INT off1, size1, off2, size2;
18065
18066 if (get_memref_parts (mem1, &reg1, &off1, &size1)
18067 && get_memref_parts (mem2, &reg2, &off2, &size2))
18068 return ((REGNO (reg1) == REGNO (reg2))
18069 && (((off1 <= off2) && (off1 + size1 > off2))
18070 || ((off2 <= off1) && (off2 + size2 > off1))));
18071
18072 return false;
18073 }
18074
18075 /* A C statement (sans semicolon) to update the integer scheduling
18076 priority INSN_PRIORITY (INSN). Increase the priority to execute the
18077 INSN earlier, reduce the priority to execute INSN later. Do not
18078 define this macro if you do not need to adjust the scheduling
18079 priorities of insns. */
18080
18081 static int
18082 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
18083 {
18084 rtx load_mem, str_mem;
18085 /* On machines (like the 750) which have asymmetric integer units,
18086 where one integer unit can do multiply and divides and the other
18087 can't, reduce the priority of multiply/divide so it is scheduled
18088 before other integer operations. */
18089
18090 #if 0
18091 if (! INSN_P (insn))
18092 return priority;
18093
18094 if (GET_CODE (PATTERN (insn)) == USE)
18095 return priority;
18096
18097 switch (rs6000_tune) {
18098 case PROCESSOR_PPC750:
18099 switch (get_attr_type (insn))
18100 {
18101 default:
18102 break;
18103
18104 case TYPE_MUL:
18105 case TYPE_DIV:
18106 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
18107 priority, priority);
18108 if (priority >= 0 && priority < 0x01000000)
18109 priority >>= 3;
18110 break;
18111 }
18112 }
18113 #endif
18114
18115 if (insn_must_be_first_in_group (insn)
18116 && reload_completed
18117 && current_sched_info->sched_max_insns_priority
18118 && rs6000_sched_restricted_insns_priority)
18119 {
18120
18121 /* Prioritize insns that can be dispatched only in the first
18122 dispatch slot. */
18123 if (rs6000_sched_restricted_insns_priority == 1)
18124 /* Attach highest priority to insn. This means that in
18125 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
18126 precede 'priority' (critical path) considerations. */
18127 return current_sched_info->sched_max_insns_priority;
18128 else if (rs6000_sched_restricted_insns_priority == 2)
18129 /* Increase priority of insn by a minimal amount. This means that in
18130 haifa-sched.c:ready_sort(), only 'priority' (critical path)
18131 considerations precede dispatch-slot restriction considerations. */
18132 return (priority + 1);
18133 }
18134
18135 if (rs6000_tune == PROCESSOR_POWER6
18136 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
18137 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
18138 /* Attach highest priority to insn if the scheduler has just issued two
18139 stores and this instruction is a load, or two loads and this instruction
18140 is a store. Power6 wants loads and stores scheduled alternately
18141 when possible */
18142 return current_sched_info->sched_max_insns_priority;
18143
18144 return priority;
18145 }
18146
18147 /* Return true if the instruction is nonpipelined on the Cell. */
18148 static bool
18149 is_nonpipeline_insn (rtx_insn *insn)
18150 {
18151 enum attr_type type;
18152 if (!insn || !NONDEBUG_INSN_P (insn)
18153 || GET_CODE (PATTERN (insn)) == USE
18154 || GET_CODE (PATTERN (insn)) == CLOBBER)
18155 return false;
18156
18157 type = get_attr_type (insn);
18158 if (type == TYPE_MUL
18159 || type == TYPE_DIV
18160 || type == TYPE_SDIV
18161 || type == TYPE_DDIV
18162 || type == TYPE_SSQRT
18163 || type == TYPE_DSQRT
18164 || type == TYPE_MFCR
18165 || type == TYPE_MFCRF
18166 || type == TYPE_MFJMPR)
18167 {
18168 return true;
18169 }
18170 return false;
18171 }
18172
18173
18174 /* Return how many instructions the machine can issue per cycle. */
18175
18176 static int
18177 rs6000_issue_rate (void)
18178 {
18179 /* Unless scheduling for register pressure, use issue rate of 1 for
18180 first scheduling pass to decrease degradation. */
18181 if (!reload_completed && !flag_sched_pressure)
18182 return 1;
18183
18184 switch (rs6000_tune) {
18185 case PROCESSOR_RS64A:
18186 case PROCESSOR_PPC601: /* ? */
18187 case PROCESSOR_PPC7450:
18188 return 3;
18189 case PROCESSOR_PPC440:
18190 case PROCESSOR_PPC603:
18191 case PROCESSOR_PPC750:
18192 case PROCESSOR_PPC7400:
18193 case PROCESSOR_PPC8540:
18194 case PROCESSOR_PPC8548:
18195 case PROCESSOR_CELL:
18196 case PROCESSOR_PPCE300C2:
18197 case PROCESSOR_PPCE300C3:
18198 case PROCESSOR_PPCE500MC:
18199 case PROCESSOR_PPCE500MC64:
18200 case PROCESSOR_PPCE5500:
18201 case PROCESSOR_PPCE6500:
18202 case PROCESSOR_TITAN:
18203 return 2;
18204 case PROCESSOR_PPC476:
18205 case PROCESSOR_PPC604:
18206 case PROCESSOR_PPC604e:
18207 case PROCESSOR_PPC620:
18208 case PROCESSOR_PPC630:
18209 return 4;
18210 case PROCESSOR_POWER4:
18211 case PROCESSOR_POWER5:
18212 case PROCESSOR_POWER6:
18213 case PROCESSOR_POWER7:
18214 return 5;
18215 case PROCESSOR_POWER8:
18216 return 7;
18217 case PROCESSOR_POWER9:
18218 case PROCESSOR_POWER10:
18219 return 6;
18220 default:
18221 return 1;
18222 }
18223 }
18224
18225 /* Return how many instructions to look ahead for better insn
18226 scheduling. */
18227
18228 static int
18229 rs6000_use_sched_lookahead (void)
18230 {
18231 switch (rs6000_tune)
18232 {
18233 case PROCESSOR_PPC8540:
18234 case PROCESSOR_PPC8548:
18235 return 4;
18236
18237 case PROCESSOR_CELL:
18238 return (reload_completed ? 8 : 0);
18239
18240 default:
18241 return 0;
18242 }
18243 }
18244
18245 /* We are choosing insn from the ready queue. Return zero if INSN can be
18246 chosen. */
18247 static int
18248 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
18249 {
18250 if (ready_index == 0)
18251 return 0;
18252
18253 if (rs6000_tune != PROCESSOR_CELL)
18254 return 0;
18255
18256 gcc_assert (insn != NULL_RTX && INSN_P (insn));
18257
18258 if (!reload_completed
18259 || is_nonpipeline_insn (insn)
18260 || is_microcoded_insn (insn))
18261 return 1;
18262
18263 return 0;
18264 }
18265
18266 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
18267 and return true. */
18268
18269 static bool
18270 find_mem_ref (rtx pat, rtx *mem_ref)
18271 {
18272 const char * fmt;
18273 int i, j;
18274
18275 /* stack_tie does not produce any real memory traffic. */
18276 if (tie_operand (pat, VOIDmode))
18277 return false;
18278
18279 if (MEM_P (pat))
18280 {
18281 *mem_ref = pat;
18282 return true;
18283 }
18284
18285 /* Recursively process the pattern. */
18286 fmt = GET_RTX_FORMAT (GET_CODE (pat));
18287
18288 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
18289 {
18290 if (fmt[i] == 'e')
18291 {
18292 if (find_mem_ref (XEXP (pat, i), mem_ref))
18293 return true;
18294 }
18295 else if (fmt[i] == 'E')
18296 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
18297 {
18298 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
18299 return true;
18300 }
18301 }
18302
18303 return false;
18304 }
18305
18306 /* Determine if PAT is a PATTERN of a load insn. */
18307
18308 static bool
18309 is_load_insn1 (rtx pat, rtx *load_mem)
18310 {
18311 if (!pat || pat == NULL_RTX)
18312 return false;
18313
18314 if (GET_CODE (pat) == SET)
18315 return find_mem_ref (SET_SRC (pat), load_mem);
18316
18317 if (GET_CODE (pat) == PARALLEL)
18318 {
18319 int i;
18320
18321 for (i = 0; i < XVECLEN (pat, 0); i++)
18322 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
18323 return true;
18324 }
18325
18326 return false;
18327 }
18328
18329 /* Determine if INSN loads from memory. */
18330
18331 static bool
18332 is_load_insn (rtx insn, rtx *load_mem)
18333 {
18334 if (!insn || !INSN_P (insn))
18335 return false;
18336
18337 if (CALL_P (insn))
18338 return false;
18339
18340 return is_load_insn1 (PATTERN (insn), load_mem);
18341 }
18342
18343 /* Determine if PAT is a PATTERN of a store insn. */
18344
18345 static bool
18346 is_store_insn1 (rtx pat, rtx *str_mem)
18347 {
18348 if (!pat || pat == NULL_RTX)
18349 return false;
18350
18351 if (GET_CODE (pat) == SET)
18352 return find_mem_ref (SET_DEST (pat), str_mem);
18353
18354 if (GET_CODE (pat) == PARALLEL)
18355 {
18356 int i;
18357
18358 for (i = 0; i < XVECLEN (pat, 0); i++)
18359 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
18360 return true;
18361 }
18362
18363 return false;
18364 }
18365
18366 /* Determine if INSN stores to memory. */
18367
18368 static bool
18369 is_store_insn (rtx insn, rtx *str_mem)
18370 {
18371 if (!insn || !INSN_P (insn))
18372 return false;
18373
18374 return is_store_insn1 (PATTERN (insn), str_mem);
18375 }
18376
18377 /* Return whether TYPE is a Power9 pairable vector instruction type. */
18378
18379 static bool
18380 is_power9_pairable_vec_type (enum attr_type type)
18381 {
18382 switch (type)
18383 {
18384 case TYPE_VECSIMPLE:
18385 case TYPE_VECCOMPLEX:
18386 case TYPE_VECDIV:
18387 case TYPE_VECCMP:
18388 case TYPE_VECPERM:
18389 case TYPE_VECFLOAT:
18390 case TYPE_VECFDIV:
18391 case TYPE_VECDOUBLE:
18392 return true;
18393 default:
18394 break;
18395 }
18396 return false;
18397 }
18398
18399 /* Returns whether the dependence between INSN and NEXT is considered
18400 costly by the given target. */
18401
18402 static bool
18403 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
18404 {
18405 rtx insn;
18406 rtx next;
18407 rtx load_mem, str_mem;
18408
18409 /* If the flag is not enabled - no dependence is considered costly;
18410 allow all dependent insns in the same group.
18411 This is the most aggressive option. */
18412 if (rs6000_sched_costly_dep == no_dep_costly)
18413 return false;
18414
18415 /* If the flag is set to 1 - a dependence is always considered costly;
18416 do not allow dependent instructions in the same group.
18417 This is the most conservative option. */
18418 if (rs6000_sched_costly_dep == all_deps_costly)
18419 return true;
18420
18421 insn = DEP_PRO (dep);
18422 next = DEP_CON (dep);
18423
18424 if (rs6000_sched_costly_dep == store_to_load_dep_costly
18425 && is_load_insn (next, &load_mem)
18426 && is_store_insn (insn, &str_mem))
18427 /* Prevent load after store in the same group. */
18428 return true;
18429
18430 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
18431 && is_load_insn (next, &load_mem)
18432 && is_store_insn (insn, &str_mem)
18433 && DEP_TYPE (dep) == REG_DEP_TRUE
18434 && mem_locations_overlap(str_mem, load_mem))
18435 /* Prevent load after store in the same group if it is a true
18436 dependence. */
18437 return true;
18438
18439 /* The flag is set to X; dependences with latency >= X are considered costly,
18440 and will not be scheduled in the same group. */
18441 if (rs6000_sched_costly_dep <= max_dep_latency
18442 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
18443 return true;
18444
18445 return false;
18446 }
18447
18448 /* Return the next insn after INSN that is found before TAIL is reached,
18449 skipping any "non-active" insns - insns that will not actually occupy
18450 an issue slot. Return NULL_RTX if such an insn is not found. */
18451
18452 static rtx_insn *
18453 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
18454 {
18455 if (insn == NULL_RTX || insn == tail)
18456 return NULL;
18457
18458 while (1)
18459 {
18460 insn = NEXT_INSN (insn);
18461 if (insn == NULL_RTX || insn == tail)
18462 return NULL;
18463
18464 if (CALL_P (insn)
18465 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
18466 || (NONJUMP_INSN_P (insn)
18467 && GET_CODE (PATTERN (insn)) != USE
18468 && GET_CODE (PATTERN (insn)) != CLOBBER
18469 && INSN_CODE (insn) != CODE_FOR_stack_tie))
18470 break;
18471 }
18472 return insn;
18473 }
18474
18475 /* Move instruction at POS to the end of the READY list. */
18476
18477 static void
18478 move_to_end_of_ready (rtx_insn **ready, int pos, int lastpos)
18479 {
18480 rtx_insn *tmp;
18481 int i;
18482
18483 tmp = ready[pos];
18484 for (i = pos; i < lastpos; i++)
18485 ready[i] = ready[i + 1];
18486 ready[lastpos] = tmp;
18487 }
18488
18489 /* Do Power6 specific sched_reorder2 reordering of ready list. */
18490
18491 static int
18492 power6_sched_reorder2 (rtx_insn **ready, int lastpos)
18493 {
18494 /* For Power6, we need to handle some special cases to try and keep the
18495 store queue from overflowing and triggering expensive flushes.
18496
18497 This code monitors how load and store instructions are being issued
18498 and skews the ready list one way or the other to increase the likelihood
18499 that a desired instruction is issued at the proper time.
18500
18501 A couple of things are done. First, we maintain a "load_store_pendulum"
18502 to track the current state of load/store issue.
18503
18504 - If the pendulum is at zero, then no loads or stores have been
18505 issued in the current cycle so we do nothing.
18506
18507 - If the pendulum is 1, then a single load has been issued in this
18508 cycle and we attempt to locate another load in the ready list to
18509 issue with it.
18510
18511 - If the pendulum is -2, then two stores have already been
18512 issued in this cycle, so we increase the priority of the first load
18513 in the ready list to increase it's likelihood of being chosen first
18514 in the next cycle.
18515
18516 - If the pendulum is -1, then a single store has been issued in this
18517 cycle and we attempt to locate another store in the ready list to
18518 issue with it, preferring a store to an adjacent memory location to
18519 facilitate store pairing in the store queue.
18520
18521 - If the pendulum is 2, then two loads have already been
18522 issued in this cycle, so we increase the priority of the first store
18523 in the ready list to increase it's likelihood of being chosen first
18524 in the next cycle.
18525
18526 - If the pendulum < -2 or > 2, then do nothing.
18527
18528 Note: This code covers the most common scenarios. There exist non
18529 load/store instructions which make use of the LSU and which
18530 would need to be accounted for to strictly model the behavior
18531 of the machine. Those instructions are currently unaccounted
18532 for to help minimize compile time overhead of this code.
18533 */
18534 int pos;
18535 rtx load_mem, str_mem;
18536
18537 if (is_store_insn (last_scheduled_insn, &str_mem))
18538 /* Issuing a store, swing the load_store_pendulum to the left */
18539 load_store_pendulum--;
18540 else if (is_load_insn (last_scheduled_insn, &load_mem))
18541 /* Issuing a load, swing the load_store_pendulum to the right */
18542 load_store_pendulum++;
18543 else
18544 return cached_can_issue_more;
18545
18546 /* If the pendulum is balanced, or there is only one instruction on
18547 the ready list, then all is well, so return. */
18548 if ((load_store_pendulum == 0) || (lastpos <= 0))
18549 return cached_can_issue_more;
18550
18551 if (load_store_pendulum == 1)
18552 {
18553 /* A load has been issued in this cycle. Scan the ready list
18554 for another load to issue with it */
18555 pos = lastpos;
18556
18557 while (pos >= 0)
18558 {
18559 if (is_load_insn (ready[pos], &load_mem))
18560 {
18561 /* Found a load. Move it to the head of the ready list,
18562 and adjust it's priority so that it is more likely to
18563 stay there */
18564 move_to_end_of_ready (ready, pos, lastpos);
18565
18566 if (!sel_sched_p ()
18567 && INSN_PRIORITY_KNOWN (ready[lastpos]))
18568 INSN_PRIORITY (ready[lastpos])++;
18569 break;
18570 }
18571 pos--;
18572 }
18573 }
18574 else if (load_store_pendulum == -2)
18575 {
18576 /* Two stores have been issued in this cycle. Increase the
18577 priority of the first load in the ready list to favor it for
18578 issuing in the next cycle. */
18579 pos = lastpos;
18580
18581 while (pos >= 0)
18582 {
18583 if (is_load_insn (ready[pos], &load_mem)
18584 && !sel_sched_p ()
18585 && INSN_PRIORITY_KNOWN (ready[pos]))
18586 {
18587 INSN_PRIORITY (ready[pos])++;
18588
18589 /* Adjust the pendulum to account for the fact that a load
18590 was found and increased in priority. This is to prevent
18591 increasing the priority of multiple loads */
18592 load_store_pendulum--;
18593
18594 break;
18595 }
18596 pos--;
18597 }
18598 }
18599 else if (load_store_pendulum == -1)
18600 {
18601 /* A store has been issued in this cycle. Scan the ready list for
18602 another store to issue with it, preferring a store to an adjacent
18603 memory location */
18604 int first_store_pos = -1;
18605
18606 pos = lastpos;
18607
18608 while (pos >= 0)
18609 {
18610 if (is_store_insn (ready[pos], &str_mem))
18611 {
18612 rtx str_mem2;
18613 /* Maintain the index of the first store found on the
18614 list */
18615 if (first_store_pos == -1)
18616 first_store_pos = pos;
18617
18618 if (is_store_insn (last_scheduled_insn, &str_mem2)
18619 && adjacent_mem_locations (str_mem, str_mem2))
18620 {
18621 /* Found an adjacent store. Move it to the head of the
18622 ready list, and adjust it's priority so that it is
18623 more likely to stay there */
18624 move_to_end_of_ready (ready, pos, lastpos);
18625
18626 if (!sel_sched_p ()
18627 && INSN_PRIORITY_KNOWN (ready[lastpos]))
18628 INSN_PRIORITY (ready[lastpos])++;
18629
18630 first_store_pos = -1;
18631
18632 break;
18633 };
18634 }
18635 pos--;
18636 }
18637
18638 if (first_store_pos >= 0)
18639 {
18640 /* An adjacent store wasn't found, but a non-adjacent store was,
18641 so move the non-adjacent store to the front of the ready
18642 list, and adjust its priority so that it is more likely to
18643 stay there. */
18644 move_to_end_of_ready (ready, first_store_pos, lastpos);
18645 if (!sel_sched_p ()
18646 && INSN_PRIORITY_KNOWN (ready[lastpos]))
18647 INSN_PRIORITY (ready[lastpos])++;
18648 }
18649 }
18650 else if (load_store_pendulum == 2)
18651 {
18652 /* Two loads have been issued in this cycle. Increase the priority
18653 of the first store in the ready list to favor it for issuing in
18654 the next cycle. */
18655 pos = lastpos;
18656
18657 while (pos >= 0)
18658 {
18659 if (is_store_insn (ready[pos], &str_mem)
18660 && !sel_sched_p ()
18661 && INSN_PRIORITY_KNOWN (ready[pos]))
18662 {
18663 INSN_PRIORITY (ready[pos])++;
18664
18665 /* Adjust the pendulum to account for the fact that a store
18666 was found and increased in priority. This is to prevent
18667 increasing the priority of multiple stores */
18668 load_store_pendulum++;
18669
18670 break;
18671 }
18672 pos--;
18673 }
18674 }
18675
18676 return cached_can_issue_more;
18677 }
18678
18679 /* Do Power9 specific sched_reorder2 reordering of ready list. */
18680
18681 static int
18682 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
18683 {
18684 int pos;
18685 enum attr_type type, type2;
18686
18687 type = get_attr_type (last_scheduled_insn);
18688
18689 /* Try to issue fixed point divides back-to-back in pairs so they will be
18690 routed to separate execution units and execute in parallel. */
18691 if (type == TYPE_DIV && divide_cnt == 0)
18692 {
18693 /* First divide has been scheduled. */
18694 divide_cnt = 1;
18695
18696 /* Scan the ready list looking for another divide, if found move it
18697 to the end of the list so it is chosen next. */
18698 pos = lastpos;
18699 while (pos >= 0)
18700 {
18701 if (recog_memoized (ready[pos]) >= 0
18702 && get_attr_type (ready[pos]) == TYPE_DIV)
18703 {
18704 move_to_end_of_ready (ready, pos, lastpos);
18705 break;
18706 }
18707 pos--;
18708 }
18709 }
18710 else
18711 {
18712 /* Last insn was the 2nd divide or not a divide, reset the counter. */
18713 divide_cnt = 0;
18714
18715 /* The best dispatch throughput for vector and vector load insns can be
18716 achieved by interleaving a vector and vector load such that they'll
18717 dispatch to the same superslice. If this pairing cannot be achieved
18718 then it is best to pair vector insns together and vector load insns
18719 together.
18720
18721 To aid in this pairing, vec_pairing maintains the current state with
18722 the following values:
18723
18724 0 : Initial state, no vecload/vector pairing has been started.
18725
18726 1 : A vecload or vector insn has been issued and a candidate for
18727 pairing has been found and moved to the end of the ready
18728 list. */
18729 if (type == TYPE_VECLOAD)
18730 {
18731 /* Issued a vecload. */
18732 if (vec_pairing == 0)
18733 {
18734 int vecload_pos = -1;
18735 /* We issued a single vecload, look for a vector insn to pair it
18736 with. If one isn't found, try to pair another vecload. */
18737 pos = lastpos;
18738 while (pos >= 0)
18739 {
18740 if (recog_memoized (ready[pos]) >= 0)
18741 {
18742 type2 = get_attr_type (ready[pos]);
18743 if (is_power9_pairable_vec_type (type2))
18744 {
18745 /* Found a vector insn to pair with, move it to the
18746 end of the ready list so it is scheduled next. */
18747 move_to_end_of_ready (ready, pos, lastpos);
18748 vec_pairing = 1;
18749 return cached_can_issue_more;
18750 }
18751 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
18752 /* Remember position of first vecload seen. */
18753 vecload_pos = pos;
18754 }
18755 pos--;
18756 }
18757 if (vecload_pos >= 0)
18758 {
18759 /* Didn't find a vector to pair with but did find a vecload,
18760 move it to the end of the ready list. */
18761 move_to_end_of_ready (ready, vecload_pos, lastpos);
18762 vec_pairing = 1;
18763 return cached_can_issue_more;
18764 }
18765 }
18766 }
18767 else if (is_power9_pairable_vec_type (type))
18768 {
18769 /* Issued a vector operation. */
18770 if (vec_pairing == 0)
18771 {
18772 int vec_pos = -1;
18773 /* We issued a single vector insn, look for a vecload to pair it
18774 with. If one isn't found, try to pair another vector. */
18775 pos = lastpos;
18776 while (pos >= 0)
18777 {
18778 if (recog_memoized (ready[pos]) >= 0)
18779 {
18780 type2 = get_attr_type (ready[pos]);
18781 if (type2 == TYPE_VECLOAD)
18782 {
18783 /* Found a vecload insn to pair with, move it to the
18784 end of the ready list so it is scheduled next. */
18785 move_to_end_of_ready (ready, pos, lastpos);
18786 vec_pairing = 1;
18787 return cached_can_issue_more;
18788 }
18789 else if (is_power9_pairable_vec_type (type2)
18790 && vec_pos == -1)
18791 /* Remember position of first vector insn seen. */
18792 vec_pos = pos;
18793 }
18794 pos--;
18795 }
18796 if (vec_pos >= 0)
18797 {
18798 /* Didn't find a vecload to pair with but did find a vector
18799 insn, move it to the end of the ready list. */
18800 move_to_end_of_ready (ready, vec_pos, lastpos);
18801 vec_pairing = 1;
18802 return cached_can_issue_more;
18803 }
18804 }
18805 }
18806
18807 /* We've either finished a vec/vecload pair, couldn't find an insn to
18808 continue the current pair, or the last insn had nothing to do with
18809 with pairing. In any case, reset the state. */
18810 vec_pairing = 0;
18811 }
18812
18813 return cached_can_issue_more;
18814 }
18815
18816 /* We are about to begin issuing insns for this clock cycle. */
18817
18818 static int
18819 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
18820 rtx_insn **ready ATTRIBUTE_UNUSED,
18821 int *pn_ready ATTRIBUTE_UNUSED,
18822 int clock_var ATTRIBUTE_UNUSED)
18823 {
18824 int n_ready = *pn_ready;
18825
18826 if (sched_verbose)
18827 fprintf (dump, "// rs6000_sched_reorder :\n");
18828
18829 /* Reorder the ready list, if the second to last ready insn
18830 is a nonepipeline insn. */
18831 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
18832 {
18833 if (is_nonpipeline_insn (ready[n_ready - 1])
18834 && (recog_memoized (ready[n_ready - 2]) > 0))
18835 /* Simply swap first two insns. */
18836 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
18837 }
18838
18839 if (rs6000_tune == PROCESSOR_POWER6)
18840 load_store_pendulum = 0;
18841
18842 return rs6000_issue_rate ();
18843 }
18844
18845 /* Like rs6000_sched_reorder, but called after issuing each insn. */
18846
18847 static int
18848 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
18849 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
18850 {
18851 if (sched_verbose)
18852 fprintf (dump, "// rs6000_sched_reorder2 :\n");
18853
18854 /* Do Power6 dependent reordering if necessary. */
18855 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
18856 return power6_sched_reorder2 (ready, *pn_ready - 1);
18857
18858 /* Do Power9 dependent reordering if necessary. */
18859 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
18860 && recog_memoized (last_scheduled_insn) >= 0)
18861 return power9_sched_reorder2 (ready, *pn_ready - 1);
18862
18863 return cached_can_issue_more;
18864 }
18865
18866 /* Return whether the presence of INSN causes a dispatch group termination
18867 of group WHICH_GROUP.
18868
18869 If WHICH_GROUP == current_group, this function will return true if INSN
18870 causes the termination of the current group (i.e, the dispatch group to
18871 which INSN belongs). This means that INSN will be the last insn in the
18872 group it belongs to.
18873
18874 If WHICH_GROUP == previous_group, this function will return true if INSN
18875 causes the termination of the previous group (i.e, the dispatch group that
18876 precedes the group to which INSN belongs). This means that INSN will be
18877 the first insn in the group it belongs to). */
18878
18879 static bool
18880 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
18881 {
18882 bool first, last;
18883
18884 if (! insn)
18885 return false;
18886
18887 first = insn_must_be_first_in_group (insn);
18888 last = insn_must_be_last_in_group (insn);
18889
18890 if (first && last)
18891 return true;
18892
18893 if (which_group == current_group)
18894 return last;
18895 else if (which_group == previous_group)
18896 return first;
18897
18898 return false;
18899 }
18900
18901
18902 static bool
18903 insn_must_be_first_in_group (rtx_insn *insn)
18904 {
18905 enum attr_type type;
18906
18907 if (!insn
18908 || NOTE_P (insn)
18909 || DEBUG_INSN_P (insn)
18910 || GET_CODE (PATTERN (insn)) == USE
18911 || GET_CODE (PATTERN (insn)) == CLOBBER)
18912 return false;
18913
18914 switch (rs6000_tune)
18915 {
18916 case PROCESSOR_POWER5:
18917 if (is_cracked_insn (insn))
18918 return true;
18919 /* FALLTHRU */
18920 case PROCESSOR_POWER4:
18921 if (is_microcoded_insn (insn))
18922 return true;
18923
18924 if (!rs6000_sched_groups)
18925 return false;
18926
18927 type = get_attr_type (insn);
18928
18929 switch (type)
18930 {
18931 case TYPE_MFCR:
18932 case TYPE_MFCRF:
18933 case TYPE_MTCR:
18934 case TYPE_CR_LOGICAL:
18935 case TYPE_MTJMPR:
18936 case TYPE_MFJMPR:
18937 case TYPE_DIV:
18938 case TYPE_LOAD_L:
18939 case TYPE_STORE_C:
18940 case TYPE_ISYNC:
18941 case TYPE_SYNC:
18942 return true;
18943 default:
18944 break;
18945 }
18946 break;
18947 case PROCESSOR_POWER6:
18948 type = get_attr_type (insn);
18949
18950 switch (type)
18951 {
18952 case TYPE_EXTS:
18953 case TYPE_CNTLZ:
18954 case TYPE_TRAP:
18955 case TYPE_MUL:
18956 case TYPE_INSERT:
18957 case TYPE_FPCOMPARE:
18958 case TYPE_MFCR:
18959 case TYPE_MTCR:
18960 case TYPE_MFJMPR:
18961 case TYPE_MTJMPR:
18962 case TYPE_ISYNC:
18963 case TYPE_SYNC:
18964 case TYPE_LOAD_L:
18965 case TYPE_STORE_C:
18966 return true;
18967 case TYPE_SHIFT:
18968 if (get_attr_dot (insn) == DOT_NO
18969 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
18970 return true;
18971 else
18972 break;
18973 case TYPE_DIV:
18974 if (get_attr_size (insn) == SIZE_32)
18975 return true;
18976 else
18977 break;
18978 case TYPE_LOAD:
18979 case TYPE_STORE:
18980 case TYPE_FPLOAD:
18981 case TYPE_FPSTORE:
18982 if (get_attr_update (insn) == UPDATE_YES)
18983 return true;
18984 else
18985 break;
18986 default:
18987 break;
18988 }
18989 break;
18990 case PROCESSOR_POWER7:
18991 type = get_attr_type (insn);
18992
18993 switch (type)
18994 {
18995 case TYPE_CR_LOGICAL:
18996 case TYPE_MFCR:
18997 case TYPE_MFCRF:
18998 case TYPE_MTCR:
18999 case TYPE_DIV:
19000 case TYPE_ISYNC:
19001 case TYPE_LOAD_L:
19002 case TYPE_STORE_C:
19003 case TYPE_MFJMPR:
19004 case TYPE_MTJMPR:
19005 return true;
19006 case TYPE_MUL:
19007 case TYPE_SHIFT:
19008 case TYPE_EXTS:
19009 if (get_attr_dot (insn) == DOT_YES)
19010 return true;
19011 else
19012 break;
19013 case TYPE_LOAD:
19014 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19015 || get_attr_update (insn) == UPDATE_YES)
19016 return true;
19017 else
19018 break;
19019 case TYPE_STORE:
19020 case TYPE_FPLOAD:
19021 case TYPE_FPSTORE:
19022 if (get_attr_update (insn) == UPDATE_YES)
19023 return true;
19024 else
19025 break;
19026 default:
19027 break;
19028 }
19029 break;
19030 case PROCESSOR_POWER8:
19031 type = get_attr_type (insn);
19032
19033 switch (type)
19034 {
19035 case TYPE_CR_LOGICAL:
19036 case TYPE_MFCR:
19037 case TYPE_MFCRF:
19038 case TYPE_MTCR:
19039 case TYPE_SYNC:
19040 case TYPE_ISYNC:
19041 case TYPE_LOAD_L:
19042 case TYPE_STORE_C:
19043 case TYPE_VECSTORE:
19044 case TYPE_MFJMPR:
19045 case TYPE_MTJMPR:
19046 return true;
19047 case TYPE_SHIFT:
19048 case TYPE_EXTS:
19049 case TYPE_MUL:
19050 if (get_attr_dot (insn) == DOT_YES)
19051 return true;
19052 else
19053 break;
19054 case TYPE_LOAD:
19055 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19056 || get_attr_update (insn) == UPDATE_YES)
19057 return true;
19058 else
19059 break;
19060 case TYPE_STORE:
19061 if (get_attr_update (insn) == UPDATE_YES
19062 && get_attr_indexed (insn) == INDEXED_YES)
19063 return true;
19064 else
19065 break;
19066 default:
19067 break;
19068 }
19069 break;
19070 default:
19071 break;
19072 }
19073
19074 return false;
19075 }
19076
19077 static bool
19078 insn_must_be_last_in_group (rtx_insn *insn)
19079 {
19080 enum attr_type type;
19081
19082 if (!insn
19083 || NOTE_P (insn)
19084 || DEBUG_INSN_P (insn)
19085 || GET_CODE (PATTERN (insn)) == USE
19086 || GET_CODE (PATTERN (insn)) == CLOBBER)
19087 return false;
19088
19089 switch (rs6000_tune) {
19090 case PROCESSOR_POWER4:
19091 case PROCESSOR_POWER5:
19092 if (is_microcoded_insn (insn))
19093 return true;
19094
19095 if (is_branch_slot_insn (insn))
19096 return true;
19097
19098 break;
19099 case PROCESSOR_POWER6:
19100 type = get_attr_type (insn);
19101
19102 switch (type)
19103 {
19104 case TYPE_EXTS:
19105 case TYPE_CNTLZ:
19106 case TYPE_TRAP:
19107 case TYPE_MUL:
19108 case TYPE_FPCOMPARE:
19109 case TYPE_MFCR:
19110 case TYPE_MTCR:
19111 case TYPE_MFJMPR:
19112 case TYPE_MTJMPR:
19113 case TYPE_ISYNC:
19114 case TYPE_SYNC:
19115 case TYPE_LOAD_L:
19116 case TYPE_STORE_C:
19117 return true;
19118 case TYPE_SHIFT:
19119 if (get_attr_dot (insn) == DOT_NO
19120 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
19121 return true;
19122 else
19123 break;
19124 case TYPE_DIV:
19125 if (get_attr_size (insn) == SIZE_32)
19126 return true;
19127 else
19128 break;
19129 default:
19130 break;
19131 }
19132 break;
19133 case PROCESSOR_POWER7:
19134 type = get_attr_type (insn);
19135
19136 switch (type)
19137 {
19138 case TYPE_ISYNC:
19139 case TYPE_SYNC:
19140 case TYPE_LOAD_L:
19141 case TYPE_STORE_C:
19142 return true;
19143 case TYPE_LOAD:
19144 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19145 && get_attr_update (insn) == UPDATE_YES)
19146 return true;
19147 else
19148 break;
19149 case TYPE_STORE:
19150 if (get_attr_update (insn) == UPDATE_YES
19151 && get_attr_indexed (insn) == INDEXED_YES)
19152 return true;
19153 else
19154 break;
19155 default:
19156 break;
19157 }
19158 break;
19159 case PROCESSOR_POWER8:
19160 type = get_attr_type (insn);
19161
19162 switch (type)
19163 {
19164 case TYPE_MFCR:
19165 case TYPE_MTCR:
19166 case TYPE_ISYNC:
19167 case TYPE_SYNC:
19168 case TYPE_LOAD_L:
19169 case TYPE_STORE_C:
19170 return true;
19171 case TYPE_LOAD:
19172 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19173 && get_attr_update (insn) == UPDATE_YES)
19174 return true;
19175 else
19176 break;
19177 case TYPE_STORE:
19178 if (get_attr_update (insn) == UPDATE_YES
19179 && get_attr_indexed (insn) == INDEXED_YES)
19180 return true;
19181 else
19182 break;
19183 default:
19184 break;
19185 }
19186 break;
19187 default:
19188 break;
19189 }
19190
19191 return false;
19192 }
19193
19194 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
19195 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
19196
19197 static bool
19198 is_costly_group (rtx *group_insns, rtx next_insn)
19199 {
19200 int i;
19201 int issue_rate = rs6000_issue_rate ();
19202
19203 for (i = 0; i < issue_rate; i++)
19204 {
19205 sd_iterator_def sd_it;
19206 dep_t dep;
19207 rtx insn = group_insns[i];
19208
19209 if (!insn)
19210 continue;
19211
19212 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
19213 {
19214 rtx next = DEP_CON (dep);
19215
19216 if (next == next_insn
19217 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
19218 return true;
19219 }
19220 }
19221
19222 return false;
19223 }
19224
19225 /* Utility of the function redefine_groups.
19226 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
19227 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
19228 to keep it "far" (in a separate group) from GROUP_INSNS, following
19229 one of the following schemes, depending on the value of the flag
19230 -minsert_sched_nops = X:
19231 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
19232 in order to force NEXT_INSN into a separate group.
19233 (2) X < sched_finish_regroup_exact: insert exactly X nops.
19234 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
19235 insertion (has a group just ended, how many vacant issue slots remain in the
19236 last group, and how many dispatch groups were encountered so far). */
19237
19238 static int
19239 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
19240 rtx_insn *next_insn, bool *group_end, int can_issue_more,
19241 int *group_count)
19242 {
19243 rtx nop;
19244 bool force;
19245 int issue_rate = rs6000_issue_rate ();
19246 bool end = *group_end;
19247 int i;
19248
19249 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
19250 return can_issue_more;
19251
19252 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
19253 return can_issue_more;
19254
19255 force = is_costly_group (group_insns, next_insn);
19256 if (!force)
19257 return can_issue_more;
19258
19259 if (sched_verbose > 6)
19260 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
19261 *group_count ,can_issue_more);
19262
19263 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
19264 {
19265 if (*group_end)
19266 can_issue_more = 0;
19267
19268 /* Since only a branch can be issued in the last issue_slot, it is
19269 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
19270 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
19271 in this case the last nop will start a new group and the branch
19272 will be forced to the new group. */
19273 if (can_issue_more && !is_branch_slot_insn (next_insn))
19274 can_issue_more--;
19275
19276 /* Do we have a special group ending nop? */
19277 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
19278 || rs6000_tune == PROCESSOR_POWER8)
19279 {
19280 nop = gen_group_ending_nop ();
19281 emit_insn_before (nop, next_insn);
19282 can_issue_more = 0;
19283 }
19284 else
19285 while (can_issue_more > 0)
19286 {
19287 nop = gen_nop ();
19288 emit_insn_before (nop, next_insn);
19289 can_issue_more--;
19290 }
19291
19292 *group_end = true;
19293 return 0;
19294 }
19295
19296 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
19297 {
19298 int n_nops = rs6000_sched_insert_nops;
19299
19300 /* Nops can't be issued from the branch slot, so the effective
19301 issue_rate for nops is 'issue_rate - 1'. */
19302 if (can_issue_more == 0)
19303 can_issue_more = issue_rate;
19304 can_issue_more--;
19305 if (can_issue_more == 0)
19306 {
19307 can_issue_more = issue_rate - 1;
19308 (*group_count)++;
19309 end = true;
19310 for (i = 0; i < issue_rate; i++)
19311 {
19312 group_insns[i] = 0;
19313 }
19314 }
19315
19316 while (n_nops > 0)
19317 {
19318 nop = gen_nop ();
19319 emit_insn_before (nop, next_insn);
19320 if (can_issue_more == issue_rate - 1) /* new group begins */
19321 end = false;
19322 can_issue_more--;
19323 if (can_issue_more == 0)
19324 {
19325 can_issue_more = issue_rate - 1;
19326 (*group_count)++;
19327 end = true;
19328 for (i = 0; i < issue_rate; i++)
19329 {
19330 group_insns[i] = 0;
19331 }
19332 }
19333 n_nops--;
19334 }
19335
19336 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
19337 can_issue_more++;
19338
19339 /* Is next_insn going to start a new group? */
19340 *group_end
19341 = (end
19342 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
19343 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
19344 || (can_issue_more < issue_rate &&
19345 insn_terminates_group_p (next_insn, previous_group)));
19346 if (*group_end && end)
19347 (*group_count)--;
19348
19349 if (sched_verbose > 6)
19350 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
19351 *group_count, can_issue_more);
19352 return can_issue_more;
19353 }
19354
19355 return can_issue_more;
19356 }
19357
19358 /* This function tries to synch the dispatch groups that the compiler "sees"
19359 with the dispatch groups that the processor dispatcher is expected to
19360 form in practice. It tries to achieve this synchronization by forcing the
19361 estimated processor grouping on the compiler (as opposed to the function
19362 'pad_goups' which tries to force the scheduler's grouping on the processor).
19363
19364 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
19365 examines the (estimated) dispatch groups that will be formed by the processor
19366 dispatcher. It marks these group boundaries to reflect the estimated
19367 processor grouping, overriding the grouping that the scheduler had marked.
19368 Depending on the value of the flag '-minsert-sched-nops' this function can
19369 force certain insns into separate groups or force a certain distance between
19370 them by inserting nops, for example, if there exists a "costly dependence"
19371 between the insns.
19372
19373 The function estimates the group boundaries that the processor will form as
19374 follows: It keeps track of how many vacant issue slots are available after
19375 each insn. A subsequent insn will start a new group if one of the following
19376 4 cases applies:
19377 - no more vacant issue slots remain in the current dispatch group.
19378 - only the last issue slot, which is the branch slot, is vacant, but the next
19379 insn is not a branch.
19380 - only the last 2 or less issue slots, including the branch slot, are vacant,
19381 which means that a cracked insn (which occupies two issue slots) can't be
19382 issued in this group.
19383 - less than 'issue_rate' slots are vacant, and the next insn always needs to
19384 start a new group. */
19385
19386 static int
19387 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
19388 rtx_insn *tail)
19389 {
19390 rtx_insn *insn, *next_insn;
19391 int issue_rate;
19392 int can_issue_more;
19393 int slot, i;
19394 bool group_end;
19395 int group_count = 0;
19396 rtx *group_insns;
19397
19398 /* Initialize. */
19399 issue_rate = rs6000_issue_rate ();
19400 group_insns = XALLOCAVEC (rtx, issue_rate);
19401 for (i = 0; i < issue_rate; i++)
19402 {
19403 group_insns[i] = 0;
19404 }
19405 can_issue_more = issue_rate;
19406 slot = 0;
19407 insn = get_next_active_insn (prev_head_insn, tail);
19408 group_end = false;
19409
19410 while (insn != NULL_RTX)
19411 {
19412 slot = (issue_rate - can_issue_more);
19413 group_insns[slot] = insn;
19414 can_issue_more =
19415 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
19416 if (insn_terminates_group_p (insn, current_group))
19417 can_issue_more = 0;
19418
19419 next_insn = get_next_active_insn (insn, tail);
19420 if (next_insn == NULL_RTX)
19421 return group_count + 1;
19422
19423 /* Is next_insn going to start a new group? */
19424 group_end
19425 = (can_issue_more == 0
19426 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
19427 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
19428 || (can_issue_more < issue_rate &&
19429 insn_terminates_group_p (next_insn, previous_group)));
19430
19431 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
19432 next_insn, &group_end, can_issue_more,
19433 &group_count);
19434
19435 if (group_end)
19436 {
19437 group_count++;
19438 can_issue_more = 0;
19439 for (i = 0; i < issue_rate; i++)
19440 {
19441 group_insns[i] = 0;
19442 }
19443 }
19444
19445 if (GET_MODE (next_insn) == TImode && can_issue_more)
19446 PUT_MODE (next_insn, VOIDmode);
19447 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
19448 PUT_MODE (next_insn, TImode);
19449
19450 insn = next_insn;
19451 if (can_issue_more == 0)
19452 can_issue_more = issue_rate;
19453 } /* while */
19454
19455 return group_count;
19456 }
19457
19458 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
19459 dispatch group boundaries that the scheduler had marked. Pad with nops
19460 any dispatch groups which have vacant issue slots, in order to force the
19461 scheduler's grouping on the processor dispatcher. The function
19462 returns the number of dispatch groups found. */
19463
19464 static int
19465 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
19466 rtx_insn *tail)
19467 {
19468 rtx_insn *insn, *next_insn;
19469 rtx nop;
19470 int issue_rate;
19471 int can_issue_more;
19472 int group_end;
19473 int group_count = 0;
19474
19475 /* Initialize issue_rate. */
19476 issue_rate = rs6000_issue_rate ();
19477 can_issue_more = issue_rate;
19478
19479 insn = get_next_active_insn (prev_head_insn, tail);
19480 next_insn = get_next_active_insn (insn, tail);
19481
19482 while (insn != NULL_RTX)
19483 {
19484 can_issue_more =
19485 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
19486
19487 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
19488
19489 if (next_insn == NULL_RTX)
19490 break;
19491
19492 if (group_end)
19493 {
19494 /* If the scheduler had marked group termination at this location
19495 (between insn and next_insn), and neither insn nor next_insn will
19496 force group termination, pad the group with nops to force group
19497 termination. */
19498 if (can_issue_more
19499 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
19500 && !insn_terminates_group_p (insn, current_group)
19501 && !insn_terminates_group_p (next_insn, previous_group))
19502 {
19503 if (!is_branch_slot_insn (next_insn))
19504 can_issue_more--;
19505
19506 while (can_issue_more)
19507 {
19508 nop = gen_nop ();
19509 emit_insn_before (nop, next_insn);
19510 can_issue_more--;
19511 }
19512 }
19513
19514 can_issue_more = issue_rate;
19515 group_count++;
19516 }
19517
19518 insn = next_insn;
19519 next_insn = get_next_active_insn (insn, tail);
19520 }
19521
19522 return group_count;
19523 }
19524
19525 /* We're beginning a new block. Initialize data structures as necessary. */
19526
19527 static void
19528 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
19529 int sched_verbose ATTRIBUTE_UNUSED,
19530 int max_ready ATTRIBUTE_UNUSED)
19531 {
19532 last_scheduled_insn = NULL;
19533 load_store_pendulum = 0;
19534 divide_cnt = 0;
19535 vec_pairing = 0;
19536 }
19537
19538 /* The following function is called at the end of scheduling BB.
19539 After reload, it inserts nops at insn group bundling. */
19540
19541 static void
19542 rs6000_sched_finish (FILE *dump, int sched_verbose)
19543 {
19544 int n_groups;
19545
19546 if (sched_verbose)
19547 fprintf (dump, "=== Finishing schedule.\n");
19548
19549 if (reload_completed && rs6000_sched_groups)
19550 {
19551 /* Do not run sched_finish hook when selective scheduling enabled. */
19552 if (sel_sched_p ())
19553 return;
19554
19555 if (rs6000_sched_insert_nops == sched_finish_none)
19556 return;
19557
19558 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
19559 n_groups = pad_groups (dump, sched_verbose,
19560 current_sched_info->prev_head,
19561 current_sched_info->next_tail);
19562 else
19563 n_groups = redefine_groups (dump, sched_verbose,
19564 current_sched_info->prev_head,
19565 current_sched_info->next_tail);
19566
19567 if (sched_verbose >= 6)
19568 {
19569 fprintf (dump, "ngroups = %d\n", n_groups);
19570 print_rtl (dump, current_sched_info->prev_head);
19571 fprintf (dump, "Done finish_sched\n");
19572 }
19573 }
19574 }
19575
19576 struct rs6000_sched_context
19577 {
19578 short cached_can_issue_more;
19579 rtx_insn *last_scheduled_insn;
19580 int load_store_pendulum;
19581 int divide_cnt;
19582 int vec_pairing;
19583 };
19584
19585 typedef struct rs6000_sched_context rs6000_sched_context_def;
19586 typedef rs6000_sched_context_def *rs6000_sched_context_t;
19587
19588 /* Allocate store for new scheduling context. */
19589 static void *
19590 rs6000_alloc_sched_context (void)
19591 {
19592 return xmalloc (sizeof (rs6000_sched_context_def));
19593 }
19594
19595 /* If CLEAN_P is true then initializes _SC with clean data,
19596 and from the global context otherwise. */
19597 static void
19598 rs6000_init_sched_context (void *_sc, bool clean_p)
19599 {
19600 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
19601
19602 if (clean_p)
19603 {
19604 sc->cached_can_issue_more = 0;
19605 sc->last_scheduled_insn = NULL;
19606 sc->load_store_pendulum = 0;
19607 sc->divide_cnt = 0;
19608 sc->vec_pairing = 0;
19609 }
19610 else
19611 {
19612 sc->cached_can_issue_more = cached_can_issue_more;
19613 sc->last_scheduled_insn = last_scheduled_insn;
19614 sc->load_store_pendulum = load_store_pendulum;
19615 sc->divide_cnt = divide_cnt;
19616 sc->vec_pairing = vec_pairing;
19617 }
19618 }
19619
19620 /* Sets the global scheduling context to the one pointed to by _SC. */
19621 static void
19622 rs6000_set_sched_context (void *_sc)
19623 {
19624 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
19625
19626 gcc_assert (sc != NULL);
19627
19628 cached_can_issue_more = sc->cached_can_issue_more;
19629 last_scheduled_insn = sc->last_scheduled_insn;
19630 load_store_pendulum = sc->load_store_pendulum;
19631 divide_cnt = sc->divide_cnt;
19632 vec_pairing = sc->vec_pairing;
19633 }
19634
19635 /* Free _SC. */
19636 static void
19637 rs6000_free_sched_context (void *_sc)
19638 {
19639 gcc_assert (_sc != NULL);
19640
19641 free (_sc);
19642 }
19643
19644 static bool
19645 rs6000_sched_can_speculate_insn (rtx_insn *insn)
19646 {
19647 switch (get_attr_type (insn))
19648 {
19649 case TYPE_DIV:
19650 case TYPE_SDIV:
19651 case TYPE_DDIV:
19652 case TYPE_VECDIV:
19653 case TYPE_SSQRT:
19654 case TYPE_DSQRT:
19655 return false;
19656
19657 default:
19658 return true;
19659 }
19660 }
19661 \f
19662 /* Length in units of the trampoline for entering a nested function. */
19663
19664 int
19665 rs6000_trampoline_size (void)
19666 {
19667 int ret = 0;
19668
19669 switch (DEFAULT_ABI)
19670 {
19671 default:
19672 gcc_unreachable ();
19673
19674 case ABI_AIX:
19675 ret = (TARGET_32BIT) ? 12 : 24;
19676 break;
19677
19678 case ABI_ELFv2:
19679 gcc_assert (!TARGET_32BIT);
19680 ret = 32;
19681 break;
19682
19683 case ABI_DARWIN:
19684 case ABI_V4:
19685 ret = (TARGET_32BIT) ? 40 : 48;
19686 break;
19687 }
19688
19689 return ret;
19690 }
19691
19692 /* Emit RTL insns to initialize the variable parts of a trampoline.
19693 FNADDR is an RTX for the address of the function's pure code.
19694 CXT is an RTX for the static chain value for the function. */
19695
19696 static void
19697 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
19698 {
19699 int regsize = (TARGET_32BIT) ? 4 : 8;
19700 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
19701 rtx ctx_reg = force_reg (Pmode, cxt);
19702 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
19703
19704 switch (DEFAULT_ABI)
19705 {
19706 default:
19707 gcc_unreachable ();
19708
19709 /* Under AIX, just build the 3 word function descriptor */
19710 case ABI_AIX:
19711 {
19712 rtx fnmem, fn_reg, toc_reg;
19713
19714 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
19715 error ("you cannot take the address of a nested function if you use "
19716 "the %qs option", "-mno-pointers-to-nested-functions");
19717
19718 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
19719 fn_reg = gen_reg_rtx (Pmode);
19720 toc_reg = gen_reg_rtx (Pmode);
19721
19722 /* Macro to shorten the code expansions below. */
19723 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
19724
19725 m_tramp = replace_equiv_address (m_tramp, addr);
19726
19727 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
19728 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
19729 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
19730 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
19731 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
19732
19733 # undef MEM_PLUS
19734 }
19735 break;
19736
19737 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
19738 case ABI_ELFv2:
19739 case ABI_DARWIN:
19740 case ABI_V4:
19741 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
19742 LCT_NORMAL, VOIDmode,
19743 addr, Pmode,
19744 GEN_INT (rs6000_trampoline_size ()), SImode,
19745 fnaddr, Pmode,
19746 ctx_reg, Pmode);
19747 break;
19748 }
19749 }
19750
19751 \f
19752 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
19753 identifier as an argument, so the front end shouldn't look it up. */
19754
19755 static bool
19756 rs6000_attribute_takes_identifier_p (const_tree attr_id)
19757 {
19758 return is_attribute_p ("altivec", attr_id);
19759 }
19760
19761 /* Handle the "altivec" attribute. The attribute may have
19762 arguments as follows:
19763
19764 __attribute__((altivec(vector__)))
19765 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
19766 __attribute__((altivec(bool__))) (always followed by 'unsigned')
19767
19768 and may appear more than once (e.g., 'vector bool char') in a
19769 given declaration. */
19770
19771 static tree
19772 rs6000_handle_altivec_attribute (tree *node,
19773 tree name ATTRIBUTE_UNUSED,
19774 tree args,
19775 int flags ATTRIBUTE_UNUSED,
19776 bool *no_add_attrs)
19777 {
19778 tree type = *node, result = NULL_TREE;
19779 machine_mode mode;
19780 int unsigned_p;
19781 char altivec_type
19782 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
19783 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
19784 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
19785 : '?');
19786
19787 while (POINTER_TYPE_P (type)
19788 || TREE_CODE (type) == FUNCTION_TYPE
19789 || TREE_CODE (type) == METHOD_TYPE
19790 || TREE_CODE (type) == ARRAY_TYPE)
19791 type = TREE_TYPE (type);
19792
19793 mode = TYPE_MODE (type);
19794
19795 /* Check for invalid AltiVec type qualifiers. */
19796 if (type == long_double_type_node)
19797 error ("use of %<long double%> in AltiVec types is invalid");
19798 else if (type == boolean_type_node)
19799 error ("use of boolean types in AltiVec types is invalid");
19800 else if (TREE_CODE (type) == COMPLEX_TYPE)
19801 error ("use of %<complex%> in AltiVec types is invalid");
19802 else if (DECIMAL_FLOAT_MODE_P (mode))
19803 error ("use of decimal floating point types in AltiVec types is invalid");
19804 else if (!TARGET_VSX)
19805 {
19806 if (type == long_unsigned_type_node || type == long_integer_type_node)
19807 {
19808 if (TARGET_64BIT)
19809 error ("use of %<long%> in AltiVec types is invalid for "
19810 "64-bit code without %qs", "-mvsx");
19811 else if (rs6000_warn_altivec_long)
19812 warning (0, "use of %<long%> in AltiVec types is deprecated; "
19813 "use %<int%>");
19814 }
19815 else if (type == long_long_unsigned_type_node
19816 || type == long_long_integer_type_node)
19817 error ("use of %<long long%> in AltiVec types is invalid without %qs",
19818 "-mvsx");
19819 else if (type == double_type_node)
19820 error ("use of %<double%> in AltiVec types is invalid without %qs",
19821 "-mvsx");
19822 }
19823
19824 switch (altivec_type)
19825 {
19826 case 'v':
19827 unsigned_p = TYPE_UNSIGNED (type);
19828 switch (mode)
19829 {
19830 case E_TImode:
19831 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
19832 break;
19833 case E_DImode:
19834 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
19835 break;
19836 case E_SImode:
19837 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
19838 break;
19839 case E_HImode:
19840 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
19841 break;
19842 case E_QImode:
19843 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
19844 break;
19845 case E_SFmode: result = V4SF_type_node; break;
19846 case E_DFmode: result = V2DF_type_node; break;
19847 /* If the user says 'vector int bool', we may be handed the 'bool'
19848 attribute _before_ the 'vector' attribute, and so select the
19849 proper type in the 'b' case below. */
19850 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
19851 case E_V2DImode: case E_V2DFmode:
19852 result = type;
19853 default: break;
19854 }
19855 break;
19856 case 'b':
19857 switch (mode)
19858 {
19859 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
19860 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
19861 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
19862 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
19863 default: break;
19864 }
19865 break;
19866 case 'p':
19867 switch (mode)
19868 {
19869 case E_V8HImode: result = pixel_V8HI_type_node;
19870 default: break;
19871 }
19872 default: break;
19873 }
19874
19875 /* Propagate qualifiers attached to the element type
19876 onto the vector type. */
19877 if (result && result != type && TYPE_QUALS (type))
19878 result = build_qualified_type (result, TYPE_QUALS (type));
19879
19880 *no_add_attrs = true; /* No need to hang on to the attribute. */
19881
19882 if (result)
19883 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
19884
19885 return NULL_TREE;
19886 }
19887
19888 /* AltiVec defines five built-in scalar types that serve as vector
19889 elements; we must teach the compiler how to mangle them. The 128-bit
19890 floating point mangling is target-specific as well. MMA defines
19891 two built-in types to be used as opaque vector types. */
19892
19893 static const char *
19894 rs6000_mangle_type (const_tree type)
19895 {
19896 type = TYPE_MAIN_VARIANT (type);
19897
19898 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
19899 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE
19900 && TREE_CODE (type) != OPAQUE_TYPE)
19901 return NULL;
19902
19903 if (type == bool_char_type_node) return "U6__boolc";
19904 if (type == bool_short_type_node) return "U6__bools";
19905 if (type == pixel_type_node) return "u7__pixel";
19906 if (type == bool_int_type_node) return "U6__booli";
19907 if (type == bool_long_long_type_node) return "U6__boolx";
19908
19909 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
19910 return "g";
19911 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
19912 return ieee128_mangling_gcc_8_1 ? "U10__float128" : "u9__ieee128";
19913
19914 if (type == vector_pair_type_node)
19915 return "u13__vector_pair";
19916 if (type == vector_quad_type_node)
19917 return "u13__vector_quad";
19918
19919 /* For all other types, use the default mangling. */
19920 return NULL;
19921 }
19922
19923 /* Handle a "longcall" or "shortcall" attribute; arguments as in
19924 struct attribute_spec.handler. */
19925
19926 static tree
19927 rs6000_handle_longcall_attribute (tree *node, tree name,
19928 tree args ATTRIBUTE_UNUSED,
19929 int flags ATTRIBUTE_UNUSED,
19930 bool *no_add_attrs)
19931 {
19932 if (TREE_CODE (*node) != FUNCTION_TYPE
19933 && TREE_CODE (*node) != FIELD_DECL
19934 && TREE_CODE (*node) != TYPE_DECL)
19935 {
19936 warning (OPT_Wattributes, "%qE attribute only applies to functions",
19937 name);
19938 *no_add_attrs = true;
19939 }
19940
19941 return NULL_TREE;
19942 }
19943
19944 /* Set longcall attributes on all functions declared when
19945 rs6000_default_long_calls is true. */
19946 static void
19947 rs6000_set_default_type_attributes (tree type)
19948 {
19949 if (rs6000_default_long_calls
19950 && (TREE_CODE (type) == FUNCTION_TYPE
19951 || TREE_CODE (type) == METHOD_TYPE))
19952 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
19953 NULL_TREE,
19954 TYPE_ATTRIBUTES (type));
19955
19956 #if TARGET_MACHO
19957 darwin_set_default_type_attributes (type);
19958 #endif
19959 }
19960
19961 /* Return a reference suitable for calling a function with the
19962 longcall attribute. */
19963
19964 static rtx
19965 rs6000_longcall_ref (rtx call_ref, rtx arg)
19966 {
19967 /* System V adds '.' to the internal name, so skip them. */
19968 const char *call_name = XSTR (call_ref, 0);
19969 if (*call_name == '.')
19970 {
19971 while (*call_name == '.')
19972 call_name++;
19973
19974 tree node = get_identifier (call_name);
19975 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
19976 }
19977
19978 if (TARGET_PLTSEQ)
19979 {
19980 rtx base = const0_rtx;
19981 int regno = 12;
19982 if (rs6000_pcrel_p ())
19983 {
19984 rtx reg = gen_rtx_REG (Pmode, regno);
19985 rtx u = gen_rtx_UNSPEC_VOLATILE (Pmode,
19986 gen_rtvec (3, base, call_ref, arg),
19987 UNSPECV_PLT_PCREL);
19988 emit_insn (gen_rtx_SET (reg, u));
19989 return reg;
19990 }
19991
19992 if (DEFAULT_ABI == ABI_ELFv2)
19993 base = gen_rtx_REG (Pmode, TOC_REGISTER);
19994 else
19995 {
19996 if (flag_pic)
19997 base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
19998 regno = 11;
19999 }
20000 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
20001 may be used by a function global entry point. For SysV4, r11
20002 is used by __glink_PLTresolve lazy resolver entry. */
20003 rtx reg = gen_rtx_REG (Pmode, regno);
20004 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
20005 UNSPEC_PLT16_HA);
20006 rtx lo = gen_rtx_UNSPEC_VOLATILE (Pmode,
20007 gen_rtvec (3, reg, call_ref, arg),
20008 UNSPECV_PLT16_LO);
20009 emit_insn (gen_rtx_SET (reg, hi));
20010 emit_insn (gen_rtx_SET (reg, lo));
20011 return reg;
20012 }
20013
20014 return force_reg (Pmode, call_ref);
20015 }
20016 \f
20017 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
20018 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
20019 #endif
20020
20021 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20022 struct attribute_spec.handler. */
20023 static tree
20024 rs6000_handle_struct_attribute (tree *node, tree name,
20025 tree args ATTRIBUTE_UNUSED,
20026 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
20027 {
20028 tree *type = NULL;
20029 if (DECL_P (*node))
20030 {
20031 if (TREE_CODE (*node) == TYPE_DECL)
20032 type = &TREE_TYPE (*node);
20033 }
20034 else
20035 type = node;
20036
20037 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
20038 || TREE_CODE (*type) == UNION_TYPE)))
20039 {
20040 warning (OPT_Wattributes, "%qE attribute ignored", name);
20041 *no_add_attrs = true;
20042 }
20043
20044 else if ((is_attribute_p ("ms_struct", name)
20045 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
20046 || ((is_attribute_p ("gcc_struct", name)
20047 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
20048 {
20049 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
20050 name);
20051 *no_add_attrs = true;
20052 }
20053
20054 return NULL_TREE;
20055 }
20056
20057 static bool
20058 rs6000_ms_bitfield_layout_p (const_tree record_type)
20059 {
20060 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
20061 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
20062 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
20063 }
20064 \f
20065 #ifdef USING_ELFOS_H
20066
20067 /* A get_unnamed_section callback, used for switching to toc_section. */
20068
20069 static void
20070 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
20071 {
20072 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20073 && TARGET_MINIMAL_TOC)
20074 {
20075 if (!toc_initialized)
20076 {
20077 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
20078 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20079 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
20080 fprintf (asm_out_file, "\t.tc ");
20081 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
20082 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20083 fprintf (asm_out_file, "\n");
20084
20085 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20086 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20087 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20088 fprintf (asm_out_file, " = .+32768\n");
20089 toc_initialized = 1;
20090 }
20091 else
20092 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20093 }
20094 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20095 {
20096 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
20097 if (!toc_initialized)
20098 {
20099 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20100 toc_initialized = 1;
20101 }
20102 }
20103 else
20104 {
20105 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20106 if (!toc_initialized)
20107 {
20108 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20109 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20110 fprintf (asm_out_file, " = .+32768\n");
20111 toc_initialized = 1;
20112 }
20113 }
20114 }
20115
20116 /* Implement TARGET_ASM_INIT_SECTIONS. */
20117
20118 static void
20119 rs6000_elf_asm_init_sections (void)
20120 {
20121 toc_section
20122 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
20123
20124 sdata2_section
20125 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
20126 SDATA2_SECTION_ASM_OP);
20127 }
20128
20129 /* Implement TARGET_SELECT_RTX_SECTION. */
20130
20131 static section *
20132 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
20133 unsigned HOST_WIDE_INT align)
20134 {
20135 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
20136 return toc_section;
20137 else
20138 return default_elf_select_rtx_section (mode, x, align);
20139 }
20140 \f
20141 /* For a SYMBOL_REF, set generic flags and then perform some
20142 target-specific processing.
20143
20144 When the AIX ABI is requested on a non-AIX system, replace the
20145 function name with the real name (with a leading .) rather than the
20146 function descriptor name. This saves a lot of overriding code to
20147 read the prefixes. */
20148
20149 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
20150 static void
20151 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
20152 {
20153 default_encode_section_info (decl, rtl, first);
20154
20155 if (first
20156 && TREE_CODE (decl) == FUNCTION_DECL
20157 && !TARGET_AIX
20158 && DEFAULT_ABI == ABI_AIX)
20159 {
20160 rtx sym_ref = XEXP (rtl, 0);
20161 size_t len = strlen (XSTR (sym_ref, 0));
20162 char *str = XALLOCAVEC (char, len + 2);
20163 str[0] = '.';
20164 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
20165 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
20166 }
20167 }
20168
20169 static inline bool
20170 compare_section_name (const char *section, const char *templ)
20171 {
20172 int len;
20173
20174 len = strlen (templ);
20175 return (strncmp (section, templ, len) == 0
20176 && (section[len] == 0 || section[len] == '.'));
20177 }
20178
20179 bool
20180 rs6000_elf_in_small_data_p (const_tree decl)
20181 {
20182 if (rs6000_sdata == SDATA_NONE)
20183 return false;
20184
20185 /* We want to merge strings, so we never consider them small data. */
20186 if (TREE_CODE (decl) == STRING_CST)
20187 return false;
20188
20189 /* Functions are never in the small data area. */
20190 if (TREE_CODE (decl) == FUNCTION_DECL)
20191 return false;
20192
20193 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
20194 {
20195 const char *section = DECL_SECTION_NAME (decl);
20196 if (compare_section_name (section, ".sdata")
20197 || compare_section_name (section, ".sdata2")
20198 || compare_section_name (section, ".gnu.linkonce.s")
20199 || compare_section_name (section, ".sbss")
20200 || compare_section_name (section, ".sbss2")
20201 || compare_section_name (section, ".gnu.linkonce.sb")
20202 || strcmp (section, ".PPC.EMB.sdata0") == 0
20203 || strcmp (section, ".PPC.EMB.sbss0") == 0)
20204 return true;
20205 }
20206 else
20207 {
20208 /* If we are told not to put readonly data in sdata, then don't. */
20209 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
20210 && !rs6000_readonly_in_sdata)
20211 return false;
20212
20213 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
20214
20215 if (size > 0
20216 && size <= g_switch_value
20217 /* If it's not public, and we're not going to reference it there,
20218 there's no need to put it in the small data section. */
20219 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
20220 return true;
20221 }
20222
20223 return false;
20224 }
20225
20226 #endif /* USING_ELFOS_H */
20227 \f
20228 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
20229
20230 static bool
20231 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
20232 {
20233 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
20234 }
20235
20236 /* Do not place thread-local symbols refs in the object blocks. */
20237
20238 static bool
20239 rs6000_use_blocks_for_decl_p (const_tree decl)
20240 {
20241 return !DECL_THREAD_LOCAL_P (decl);
20242 }
20243 \f
20244 /* Return a REG that occurs in ADDR with coefficient 1.
20245 ADDR can be effectively incremented by incrementing REG.
20246
20247 r0 is special and we must not select it as an address
20248 register by this routine since our caller will try to
20249 increment the returned register via an "la" instruction. */
20250
20251 rtx
20252 find_addr_reg (rtx addr)
20253 {
20254 while (GET_CODE (addr) == PLUS)
20255 {
20256 if (REG_P (XEXP (addr, 0))
20257 && REGNO (XEXP (addr, 0)) != 0)
20258 addr = XEXP (addr, 0);
20259 else if (REG_P (XEXP (addr, 1))
20260 && REGNO (XEXP (addr, 1)) != 0)
20261 addr = XEXP (addr, 1);
20262 else if (CONSTANT_P (XEXP (addr, 0)))
20263 addr = XEXP (addr, 1);
20264 else if (CONSTANT_P (XEXP (addr, 1)))
20265 addr = XEXP (addr, 0);
20266 else
20267 gcc_unreachable ();
20268 }
20269 gcc_assert (REG_P (addr) && REGNO (addr) != 0);
20270 return addr;
20271 }
20272
20273 void
20274 rs6000_fatal_bad_address (rtx op)
20275 {
20276 fatal_insn ("bad address", op);
20277 }
20278
20279 #if TARGET_MACHO
20280
20281 vec<branch_island, va_gc> *branch_islands;
20282
20283 /* Remember to generate a branch island for far calls to the given
20284 function. */
20285
20286 static void
20287 add_compiler_branch_island (tree label_name, tree function_name,
20288 int line_number)
20289 {
20290 branch_island bi = {function_name, label_name, line_number};
20291 vec_safe_push (branch_islands, bi);
20292 }
20293
20294 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
20295 already there or not. */
20296
20297 static int
20298 no_previous_def (tree function_name)
20299 {
20300 branch_island *bi;
20301 unsigned ix;
20302
20303 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
20304 if (function_name == bi->function_name)
20305 return 0;
20306 return 1;
20307 }
20308
20309 /* GET_PREV_LABEL gets the label name from the previous definition of
20310 the function. */
20311
20312 static tree
20313 get_prev_label (tree function_name)
20314 {
20315 branch_island *bi;
20316 unsigned ix;
20317
20318 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
20319 if (function_name == bi->function_name)
20320 return bi->label_name;
20321 return NULL_TREE;
20322 }
20323
20324 /* Generate external symbol indirection stubs (PIC and non-PIC). */
20325
20326 void
20327 machopic_output_stub (FILE *file, const char *symb, const char *stub)
20328 {
20329 unsigned int length;
20330 char *symbol_name, *lazy_ptr_name;
20331 char *local_label_0;
20332 static unsigned label = 0;
20333
20334 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20335 symb = (*targetm.strip_name_encoding) (symb);
20336
20337 length = strlen (symb);
20338 symbol_name = XALLOCAVEC (char, length + 32);
20339 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
20340
20341 lazy_ptr_name = XALLOCAVEC (char, length + 32);
20342 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
20343
20344 if (MACHOPIC_PURE)
20345 {
20346 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
20347 fprintf (file, "\t.align 5\n");
20348
20349 fprintf (file, "%s:\n", stub);
20350 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20351
20352 label++;
20353 local_label_0 = XALLOCAVEC (char, 16);
20354 sprintf (local_label_0, "L%u$spb", label);
20355
20356 fprintf (file, "\tmflr r0\n");
20357 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
20358 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
20359 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
20360 lazy_ptr_name, local_label_0);
20361 fprintf (file, "\tmtlr r0\n");
20362 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
20363 (TARGET_64BIT ? "ldu" : "lwzu"),
20364 lazy_ptr_name, local_label_0);
20365 fprintf (file, "\tmtctr r12\n");
20366 fprintf (file, "\tbctr\n");
20367 }
20368 else /* mdynamic-no-pic or mkernel. */
20369 {
20370 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
20371 fprintf (file, "\t.align 4\n");
20372
20373 fprintf (file, "%s:\n", stub);
20374 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20375
20376 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
20377 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
20378 (TARGET_64BIT ? "ldu" : "lwzu"),
20379 lazy_ptr_name);
20380 fprintf (file, "\tmtctr r12\n");
20381 fprintf (file, "\tbctr\n");
20382 }
20383
20384 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
20385 fprintf (file, "%s:\n", lazy_ptr_name);
20386 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20387 fprintf (file, "%sdyld_stub_binding_helper\n",
20388 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
20389 }
20390
20391 /* Legitimize PIC addresses. If the address is already
20392 position-independent, we return ORIG. Newly generated
20393 position-independent addresses go into a reg. This is REG if non
20394 zero, otherwise we allocate register(s) as necessary. */
20395
20396 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
20397
20398 rtx
20399 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
20400 rtx reg)
20401 {
20402 rtx base, offset;
20403
20404 if (reg == NULL && !reload_completed)
20405 reg = gen_reg_rtx (Pmode);
20406
20407 if (GET_CODE (orig) == CONST)
20408 {
20409 rtx reg_temp;
20410
20411 if (GET_CODE (XEXP (orig, 0)) == PLUS
20412 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
20413 return orig;
20414
20415 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
20416
20417 /* Use a different reg for the intermediate value, as
20418 it will be marked UNCHANGING. */
20419 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
20420 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
20421 Pmode, reg_temp);
20422 offset =
20423 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
20424 Pmode, reg);
20425
20426 if (CONST_INT_P (offset))
20427 {
20428 if (SMALL_INT (offset))
20429 return plus_constant (Pmode, base, INTVAL (offset));
20430 else if (!reload_completed)
20431 offset = force_reg (Pmode, offset);
20432 else
20433 {
20434 rtx mem = force_const_mem (Pmode, orig);
20435 return machopic_legitimize_pic_address (mem, Pmode, reg);
20436 }
20437 }
20438 return gen_rtx_PLUS (Pmode, base, offset);
20439 }
20440
20441 /* Fall back on generic machopic code. */
20442 return machopic_legitimize_pic_address (orig, mode, reg);
20443 }
20444
20445 /* Output a .machine directive for the Darwin assembler, and call
20446 the generic start_file routine. */
20447
20448 static void
20449 rs6000_darwin_file_start (void)
20450 {
20451 static const struct
20452 {
20453 const char *arg;
20454 const char *name;
20455 HOST_WIDE_INT if_set;
20456 } mapping[] = {
20457 { "ppc64", "ppc64", MASK_64BIT },
20458 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
20459 { "power4", "ppc970", 0 },
20460 { "G5", "ppc970", 0 },
20461 { "7450", "ppc7450", 0 },
20462 { "7400", "ppc7400", MASK_ALTIVEC },
20463 { "G4", "ppc7400", 0 },
20464 { "750", "ppc750", 0 },
20465 { "740", "ppc750", 0 },
20466 { "G3", "ppc750", 0 },
20467 { "604e", "ppc604e", 0 },
20468 { "604", "ppc604", 0 },
20469 { "603e", "ppc603", 0 },
20470 { "603", "ppc603", 0 },
20471 { "601", "ppc601", 0 },
20472 { NULL, "ppc", 0 } };
20473 const char *cpu_id = "";
20474 size_t i;
20475
20476 rs6000_file_start ();
20477 darwin_file_start ();
20478
20479 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
20480
20481 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
20482 cpu_id = rs6000_default_cpu;
20483
20484 if (global_options_set.x_rs6000_cpu_index)
20485 cpu_id = processor_target_table[rs6000_cpu_index].name;
20486
20487 /* Look through the mapping array. Pick the first name that either
20488 matches the argument, has a bit set in IF_SET that is also set
20489 in the target flags, or has a NULL name. */
20490
20491 i = 0;
20492 while (mapping[i].arg != NULL
20493 && strcmp (mapping[i].arg, cpu_id) != 0
20494 && (mapping[i].if_set & rs6000_isa_flags) == 0)
20495 i++;
20496
20497 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
20498 }
20499
20500 #endif /* TARGET_MACHO */
20501
20502 #if TARGET_ELF
20503 static int
20504 rs6000_elf_reloc_rw_mask (void)
20505 {
20506 if (flag_pic)
20507 return 3;
20508 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20509 return 2;
20510 else
20511 return 0;
20512 }
20513
20514 /* Record an element in the table of global constructors. SYMBOL is
20515 a SYMBOL_REF of the function to be called; PRIORITY is a number
20516 between 0 and MAX_INIT_PRIORITY.
20517
20518 This differs from default_named_section_asm_out_constructor in
20519 that we have special handling for -mrelocatable. */
20520
20521 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
20522 static void
20523 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
20524 {
20525 const char *section = ".ctors";
20526 char buf[18];
20527
20528 if (priority != DEFAULT_INIT_PRIORITY)
20529 {
20530 sprintf (buf, ".ctors.%.5u",
20531 /* Invert the numbering so the linker puts us in the proper
20532 order; constructors are run from right to left, and the
20533 linker sorts in increasing order. */
20534 MAX_INIT_PRIORITY - priority);
20535 section = buf;
20536 }
20537
20538 switch_to_section (get_section (section, SECTION_WRITE, NULL));
20539 assemble_align (POINTER_SIZE);
20540
20541 if (DEFAULT_ABI == ABI_V4
20542 && (TARGET_RELOCATABLE || flag_pic > 1))
20543 {
20544 fputs ("\t.long (", asm_out_file);
20545 output_addr_const (asm_out_file, symbol);
20546 fputs (")@fixup\n", asm_out_file);
20547 }
20548 else
20549 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
20550 }
20551
20552 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
20553 static void
20554 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
20555 {
20556 const char *section = ".dtors";
20557 char buf[18];
20558
20559 if (priority != DEFAULT_INIT_PRIORITY)
20560 {
20561 sprintf (buf, ".dtors.%.5u",
20562 /* Invert the numbering so the linker puts us in the proper
20563 order; constructors are run from right to left, and the
20564 linker sorts in increasing order. */
20565 MAX_INIT_PRIORITY - priority);
20566 section = buf;
20567 }
20568
20569 switch_to_section (get_section (section, SECTION_WRITE, NULL));
20570 assemble_align (POINTER_SIZE);
20571
20572 if (DEFAULT_ABI == ABI_V4
20573 && (TARGET_RELOCATABLE || flag_pic > 1))
20574 {
20575 fputs ("\t.long (", asm_out_file);
20576 output_addr_const (asm_out_file, symbol);
20577 fputs (")@fixup\n", asm_out_file);
20578 }
20579 else
20580 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
20581 }
20582
20583 void
20584 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
20585 {
20586 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
20587 {
20588 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
20589 ASM_OUTPUT_LABEL (file, name);
20590 fputs (DOUBLE_INT_ASM_OP, file);
20591 rs6000_output_function_entry (file, name);
20592 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
20593 if (DOT_SYMBOLS)
20594 {
20595 fputs ("\t.size\t", file);
20596 assemble_name (file, name);
20597 fputs (",24\n\t.type\t.", file);
20598 assemble_name (file, name);
20599 fputs (",@function\n", file);
20600 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
20601 {
20602 fputs ("\t.globl\t.", file);
20603 assemble_name (file, name);
20604 putc ('\n', file);
20605 }
20606 }
20607 else
20608 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
20609 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
20610 rs6000_output_function_entry (file, name);
20611 fputs (":\n", file);
20612 return;
20613 }
20614
20615 int uses_toc;
20616 if (DEFAULT_ABI == ABI_V4
20617 && (TARGET_RELOCATABLE || flag_pic > 1)
20618 && !TARGET_SECURE_PLT
20619 && (!constant_pool_empty_p () || crtl->profile)
20620 && (uses_toc = uses_TOC ()))
20621 {
20622 char buf[256];
20623
20624 if (uses_toc == 2)
20625 switch_to_other_text_partition ();
20626 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
20627
20628 fprintf (file, "\t.long ");
20629 assemble_name (file, toc_label_name);
20630 need_toc_init = 1;
20631 putc ('-', file);
20632 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
20633 assemble_name (file, buf);
20634 putc ('\n', file);
20635 if (uses_toc == 2)
20636 switch_to_other_text_partition ();
20637 }
20638
20639 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
20640 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
20641
20642 if (TARGET_CMODEL == CMODEL_LARGE
20643 && rs6000_global_entry_point_prologue_needed_p ())
20644 {
20645 char buf[256];
20646
20647 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
20648
20649 fprintf (file, "\t.quad .TOC.-");
20650 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
20651 assemble_name (file, buf);
20652 putc ('\n', file);
20653 }
20654
20655 if (DEFAULT_ABI == ABI_AIX)
20656 {
20657 const char *desc_name, *orig_name;
20658
20659 orig_name = (*targetm.strip_name_encoding) (name);
20660 desc_name = orig_name;
20661 while (*desc_name == '.')
20662 desc_name++;
20663
20664 if (TREE_PUBLIC (decl))
20665 fprintf (file, "\t.globl %s\n", desc_name);
20666
20667 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20668 fprintf (file, "%s:\n", desc_name);
20669 fprintf (file, "\t.long %s\n", orig_name);
20670 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
20671 fputs ("\t.long 0\n", file);
20672 fprintf (file, "\t.previous\n");
20673 }
20674 ASM_OUTPUT_LABEL (file, name);
20675 }
20676
20677 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
20678 static void
20679 rs6000_elf_file_end (void)
20680 {
20681 #ifdef HAVE_AS_GNU_ATTRIBUTE
20682 /* ??? The value emitted depends on options active at file end.
20683 Assume anyone using #pragma or attributes that might change
20684 options knows what they are doing. */
20685 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
20686 && rs6000_passes_float)
20687 {
20688 int fp;
20689
20690 if (TARGET_HARD_FLOAT)
20691 fp = 1;
20692 else
20693 fp = 2;
20694 if (rs6000_passes_long_double)
20695 {
20696 if (!TARGET_LONG_DOUBLE_128)
20697 fp |= 2 * 4;
20698 else if (TARGET_IEEEQUAD)
20699 fp |= 3 * 4;
20700 else
20701 fp |= 1 * 4;
20702 }
20703 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
20704 }
20705 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
20706 {
20707 if (rs6000_passes_vector)
20708 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
20709 (TARGET_ALTIVEC_ABI ? 2 : 1));
20710 if (rs6000_returns_struct)
20711 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
20712 aix_struct_return ? 2 : 1);
20713 }
20714 #endif
20715 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
20716 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
20717 file_end_indicate_exec_stack ();
20718 #endif
20719
20720 if (flag_split_stack)
20721 file_end_indicate_split_stack ();
20722
20723 if (cpu_builtin_p)
20724 {
20725 /* We have expanded a CPU builtin, so we need to emit a reference to
20726 the special symbol that LIBC uses to declare it supports the
20727 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
20728 switch_to_section (data_section);
20729 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
20730 fprintf (asm_out_file, "\t%s %s\n",
20731 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
20732 }
20733 }
20734 #endif
20735
20736 #if TARGET_XCOFF
20737
20738 #ifndef HAVE_XCOFF_DWARF_EXTRAS
20739 #define HAVE_XCOFF_DWARF_EXTRAS 0
20740 #endif
20741
20742 static enum unwind_info_type
20743 rs6000_xcoff_debug_unwind_info (void)
20744 {
20745 return UI_NONE;
20746 }
20747
20748 static void
20749 rs6000_xcoff_asm_output_anchor (rtx symbol)
20750 {
20751 char buffer[100];
20752
20753 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
20754 SYMBOL_REF_BLOCK_OFFSET (symbol));
20755 fprintf (asm_out_file, "%s", SET_ASM_OP);
20756 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
20757 fprintf (asm_out_file, ",");
20758 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
20759 fprintf (asm_out_file, "\n");
20760 }
20761
20762 static void
20763 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
20764 {
20765 fputs (GLOBAL_ASM_OP, stream);
20766 RS6000_OUTPUT_BASENAME (stream, name);
20767 putc ('\n', stream);
20768 }
20769
20770 /* A get_unnamed_decl callback, used for read-only sections. PTR
20771 points to the section string variable. */
20772
20773 static void
20774 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
20775 {
20776 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
20777 *(const char *const *) directive,
20778 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20779 }
20780
20781 /* Likewise for read-write sections. */
20782
20783 static void
20784 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
20785 {
20786 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
20787 *(const char *const *) directive,
20788 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20789 }
20790
20791 static void
20792 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
20793 {
20794 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
20795 *(const char *const *) directive,
20796 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20797 }
20798
20799 /* A get_unnamed_section callback, used for switching to toc_section. */
20800
20801 static void
20802 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
20803 {
20804 if (TARGET_MINIMAL_TOC)
20805 {
20806 /* toc_section is always selected at least once from
20807 rs6000_xcoff_file_start, so this is guaranteed to
20808 always be defined once and only once in each file. */
20809 if (!toc_initialized)
20810 {
20811 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
20812 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
20813 toc_initialized = 1;
20814 }
20815 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
20816 (TARGET_32BIT ? "" : ",3"));
20817 }
20818 else
20819 fputs ("\t.toc\n", asm_out_file);
20820 }
20821
20822 /* Implement TARGET_ASM_INIT_SECTIONS. */
20823
20824 static void
20825 rs6000_xcoff_asm_init_sections (void)
20826 {
20827 read_only_data_section
20828 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
20829 &xcoff_read_only_section_name);
20830
20831 private_data_section
20832 = get_unnamed_section (SECTION_WRITE,
20833 rs6000_xcoff_output_readwrite_section_asm_op,
20834 &xcoff_private_data_section_name);
20835
20836 read_only_private_data_section
20837 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
20838 &xcoff_private_rodata_section_name);
20839
20840 tls_data_section
20841 = get_unnamed_section (SECTION_TLS,
20842 rs6000_xcoff_output_tls_section_asm_op,
20843 &xcoff_tls_data_section_name);
20844
20845 tls_private_data_section
20846 = get_unnamed_section (SECTION_TLS,
20847 rs6000_xcoff_output_tls_section_asm_op,
20848 &xcoff_private_data_section_name);
20849
20850 toc_section
20851 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
20852
20853 readonly_data_section = read_only_data_section;
20854 }
20855
20856 static int
20857 rs6000_xcoff_reloc_rw_mask (void)
20858 {
20859 return 3;
20860 }
20861
20862 static void
20863 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
20864 tree decl ATTRIBUTE_UNUSED)
20865 {
20866 int smclass;
20867 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
20868
20869 if (flags & SECTION_EXCLUDE)
20870 smclass = 4;
20871 else if (flags & SECTION_DEBUG)
20872 {
20873 fprintf (asm_out_file, "\t.dwsect %s\n", name);
20874 return;
20875 }
20876 else if (flags & SECTION_CODE)
20877 smclass = 0;
20878 else if (flags & SECTION_TLS)
20879 smclass = 3;
20880 else if (flags & SECTION_WRITE)
20881 smclass = 2;
20882 else
20883 smclass = 1;
20884
20885 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
20886 (flags & SECTION_CODE) ? "." : "",
20887 name, suffix[smclass], flags & SECTION_ENTSIZE);
20888 }
20889
20890 #define IN_NAMED_SECTION(DECL) \
20891 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
20892 && DECL_SECTION_NAME (DECL) != NULL)
20893
20894 static section *
20895 rs6000_xcoff_select_section (tree decl, int reloc,
20896 unsigned HOST_WIDE_INT align)
20897 {
20898 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
20899 named section. */
20900 if (align > BIGGEST_ALIGNMENT && VAR_OR_FUNCTION_DECL_P (decl))
20901 {
20902 resolve_unique_section (decl, reloc, true);
20903 if (IN_NAMED_SECTION (decl))
20904 return get_named_section (decl, NULL, reloc);
20905 }
20906
20907 if (decl_readonly_section (decl, reloc))
20908 {
20909 if (TREE_PUBLIC (decl))
20910 return read_only_data_section;
20911 else
20912 return read_only_private_data_section;
20913 }
20914 else
20915 {
20916 #if HAVE_AS_TLS
20917 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
20918 {
20919 if (TREE_PUBLIC (decl))
20920 return tls_data_section;
20921 else if (bss_initializer_p (decl))
20922 {
20923 /* Convert to COMMON to emit in BSS. */
20924 DECL_COMMON (decl) = 1;
20925 return tls_comm_section;
20926 }
20927 else
20928 return tls_private_data_section;
20929 }
20930 else
20931 #endif
20932 if (TREE_PUBLIC (decl))
20933 return data_section;
20934 else
20935 return private_data_section;
20936 }
20937 }
20938
20939 static void
20940 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
20941 {
20942 const char *name;
20943
20944 /* Use select_section for private data and uninitialized data with
20945 alignment <= BIGGEST_ALIGNMENT. */
20946 if (!TREE_PUBLIC (decl)
20947 || DECL_COMMON (decl)
20948 || (DECL_INITIAL (decl) == NULL_TREE
20949 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
20950 || DECL_INITIAL (decl) == error_mark_node
20951 || (flag_zero_initialized_in_bss
20952 && initializer_zerop (DECL_INITIAL (decl))))
20953 return;
20954
20955 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
20956 name = (*targetm.strip_name_encoding) (name);
20957 set_decl_section_name (decl, name);
20958 }
20959
20960 /* Select section for constant in constant pool.
20961
20962 On RS/6000, all constants are in the private read-only data area.
20963 However, if this is being placed in the TOC it must be output as a
20964 toc entry. */
20965
20966 static section *
20967 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
20968 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
20969 {
20970 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
20971 return toc_section;
20972 else
20973 return read_only_private_data_section;
20974 }
20975
20976 /* Remove any trailing [DS] or the like from the symbol name. */
20977
20978 static const char *
20979 rs6000_xcoff_strip_name_encoding (const char *name)
20980 {
20981 size_t len;
20982 if (*name == '*')
20983 name++;
20984 len = strlen (name);
20985 if (name[len - 1] == ']')
20986 return ggc_alloc_string (name, len - 4);
20987 else
20988 return name;
20989 }
20990
20991 /* Section attributes. AIX is always PIC. */
20992
20993 static unsigned int
20994 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
20995 {
20996 unsigned int align;
20997 unsigned int flags = default_section_type_flags (decl, name, reloc);
20998
20999 /* Align to at least UNIT size. */
21000 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
21001 align = MIN_UNITS_PER_WORD;
21002 else
21003 /* Increase alignment of large objects if not already stricter. */
21004 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
21005 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
21006 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
21007
21008 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
21009 }
21010
21011 /* Output at beginning of assembler file.
21012
21013 Initialize the section names for the RS/6000 at this point.
21014
21015 Specify filename, including full path, to assembler.
21016
21017 We want to go into the TOC section so at least one .toc will be emitted.
21018 Also, in order to output proper .bs/.es pairs, we need at least one static
21019 [RW] section emitted.
21020
21021 Finally, declare mcount when profiling to make the assembler happy. */
21022
21023 static void
21024 rs6000_xcoff_file_start (void)
21025 {
21026 rs6000_gen_section_name (&xcoff_bss_section_name,
21027 main_input_filename, ".bss_");
21028 rs6000_gen_section_name (&xcoff_private_data_section_name,
21029 main_input_filename, ".rw_");
21030 rs6000_gen_section_name (&xcoff_private_rodata_section_name,
21031 main_input_filename, ".rop_");
21032 rs6000_gen_section_name (&xcoff_read_only_section_name,
21033 main_input_filename, ".ro_");
21034 rs6000_gen_section_name (&xcoff_tls_data_section_name,
21035 main_input_filename, ".tls_");
21036 rs6000_gen_section_name (&xcoff_tbss_section_name,
21037 main_input_filename, ".tbss_[UL]");
21038
21039 fputs ("\t.file\t", asm_out_file);
21040 output_quoted_string (asm_out_file, main_input_filename);
21041 fputc ('\n', asm_out_file);
21042 if (write_symbols != NO_DEBUG)
21043 switch_to_section (private_data_section);
21044 switch_to_section (toc_section);
21045 switch_to_section (text_section);
21046 if (profile_flag)
21047 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
21048 rs6000_file_start ();
21049 }
21050
21051 /* Output at end of assembler file.
21052 On the RS/6000, referencing data should automatically pull in text. */
21053
21054 static void
21055 rs6000_xcoff_file_end (void)
21056 {
21057 switch_to_section (text_section);
21058 fputs ("_section_.text:\n", asm_out_file);
21059 switch_to_section (data_section);
21060 fputs (TARGET_32BIT
21061 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
21062 asm_out_file);
21063 }
21064
21065 struct declare_alias_data
21066 {
21067 FILE *file;
21068 bool function_descriptor;
21069 };
21070
21071 /* Declare alias N. A helper function for for_node_and_aliases. */
21072
21073 static bool
21074 rs6000_declare_alias (struct symtab_node *n, void *d)
21075 {
21076 struct declare_alias_data *data = (struct declare_alias_data *)d;
21077 /* Main symbol is output specially, because varasm machinery does part of
21078 the job for us - we do not need to declare .globl/lglobs and such. */
21079 if (!n->alias || n->weakref)
21080 return false;
21081
21082 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
21083 return false;
21084
21085 /* Prevent assemble_alias from trying to use .set pseudo operation
21086 that does not behave as expected by the middle-end. */
21087 TREE_ASM_WRITTEN (n->decl) = true;
21088
21089 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
21090 char *buffer = (char *) alloca (strlen (name) + 2);
21091 char *p;
21092 int dollar_inside = 0;
21093
21094 strcpy (buffer, name);
21095 p = strchr (buffer, '$');
21096 while (p) {
21097 *p = '_';
21098 dollar_inside++;
21099 p = strchr (p + 1, '$');
21100 }
21101 if (TREE_PUBLIC (n->decl))
21102 {
21103 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
21104 {
21105 if (dollar_inside) {
21106 if (data->function_descriptor)
21107 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
21108 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
21109 }
21110 if (data->function_descriptor)
21111 {
21112 fputs ("\t.globl .", data->file);
21113 RS6000_OUTPUT_BASENAME (data->file, buffer);
21114 putc ('\n', data->file);
21115 }
21116 fputs ("\t.globl ", data->file);
21117 RS6000_OUTPUT_BASENAME (data->file, buffer);
21118 putc ('\n', data->file);
21119 }
21120 #ifdef ASM_WEAKEN_DECL
21121 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
21122 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
21123 #endif
21124 }
21125 else
21126 {
21127 if (dollar_inside)
21128 {
21129 if (data->function_descriptor)
21130 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
21131 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
21132 }
21133 if (data->function_descriptor)
21134 {
21135 fputs ("\t.lglobl .", data->file);
21136 RS6000_OUTPUT_BASENAME (data->file, buffer);
21137 putc ('\n', data->file);
21138 }
21139 fputs ("\t.lglobl ", data->file);
21140 RS6000_OUTPUT_BASENAME (data->file, buffer);
21141 putc ('\n', data->file);
21142 }
21143 if (data->function_descriptor)
21144 fputs (".", data->file);
21145 RS6000_OUTPUT_BASENAME (data->file, buffer);
21146 fputs (":\n", data->file);
21147 return false;
21148 }
21149
21150
21151 #ifdef HAVE_GAS_HIDDEN
21152 /* Helper function to calculate visibility of a DECL
21153 and return the value as a const string. */
21154
21155 static const char *
21156 rs6000_xcoff_visibility (tree decl)
21157 {
21158 static const char * const visibility_types[] = {
21159 "", ",protected", ",hidden", ",internal"
21160 };
21161
21162 enum symbol_visibility vis = DECL_VISIBILITY (decl);
21163 return visibility_types[vis];
21164 }
21165 #endif
21166
21167
21168 /* This macro produces the initial definition of a function name.
21169 On the RS/6000, we need to place an extra '.' in the function name and
21170 output the function descriptor.
21171 Dollar signs are converted to underscores.
21172
21173 The csect for the function will have already been created when
21174 text_section was selected. We do have to go back to that csect, however.
21175
21176 The third and fourth parameters to the .function pseudo-op (16 and 044)
21177 are placeholders which no longer have any use.
21178
21179 Because AIX assembler's .set command has unexpected semantics, we output
21180 all aliases as alternative labels in front of the definition. */
21181
21182 void
21183 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
21184 {
21185 char *buffer = (char *) alloca (strlen (name) + 1);
21186 char *p;
21187 int dollar_inside = 0;
21188 struct declare_alias_data data = {file, false};
21189
21190 strcpy (buffer, name);
21191 p = strchr (buffer, '$');
21192 while (p) {
21193 *p = '_';
21194 dollar_inside++;
21195 p = strchr (p + 1, '$');
21196 }
21197 if (TREE_PUBLIC (decl))
21198 {
21199 if (!RS6000_WEAK || !DECL_WEAK (decl))
21200 {
21201 if (dollar_inside) {
21202 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
21203 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
21204 }
21205 fputs ("\t.globl .", file);
21206 RS6000_OUTPUT_BASENAME (file, buffer);
21207 #ifdef HAVE_GAS_HIDDEN
21208 fputs (rs6000_xcoff_visibility (decl), file);
21209 #endif
21210 putc ('\n', file);
21211 }
21212 }
21213 else
21214 {
21215 if (dollar_inside) {
21216 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
21217 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
21218 }
21219 fputs ("\t.lglobl .", file);
21220 RS6000_OUTPUT_BASENAME (file, buffer);
21221 putc ('\n', file);
21222 }
21223 fputs ("\t.csect ", file);
21224 RS6000_OUTPUT_BASENAME (file, buffer);
21225 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
21226 RS6000_OUTPUT_BASENAME (file, buffer);
21227 fputs (":\n", file);
21228 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21229 &data, true);
21230 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
21231 RS6000_OUTPUT_BASENAME (file, buffer);
21232 fputs (", TOC[tc0], 0\n", file);
21233 in_section = NULL;
21234 switch_to_section (function_section (decl));
21235 putc ('.', file);
21236 RS6000_OUTPUT_BASENAME (file, buffer);
21237 fputs (":\n", file);
21238 data.function_descriptor = true;
21239 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21240 &data, true);
21241 if (!DECL_IGNORED_P (decl))
21242 {
21243 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
21244 xcoffout_declare_function (file, decl, buffer);
21245 else if (write_symbols == DWARF2_DEBUG)
21246 {
21247 name = (*targetm.strip_name_encoding) (name);
21248 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
21249 }
21250 }
21251 return;
21252 }
21253
21254
21255 /* Output assembly language to globalize a symbol from a DECL,
21256 possibly with visibility. */
21257
21258 void
21259 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
21260 {
21261 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
21262 fputs (GLOBAL_ASM_OP, stream);
21263 RS6000_OUTPUT_BASENAME (stream, name);
21264 #ifdef HAVE_GAS_HIDDEN
21265 fputs (rs6000_xcoff_visibility (decl), stream);
21266 #endif
21267 putc ('\n', stream);
21268 }
21269
21270 /* Output assembly language to define a symbol as COMMON from a DECL,
21271 possibly with visibility. */
21272
21273 void
21274 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
21275 tree decl ATTRIBUTE_UNUSED,
21276 const char *name,
21277 unsigned HOST_WIDE_INT size,
21278 unsigned HOST_WIDE_INT align)
21279 {
21280 unsigned HOST_WIDE_INT align2 = 2;
21281
21282 if (align > 32)
21283 align2 = floor_log2 (align / BITS_PER_UNIT);
21284 else if (size > 4)
21285 align2 = 3;
21286
21287 fputs (COMMON_ASM_OP, stream);
21288 RS6000_OUTPUT_BASENAME (stream, name);
21289
21290 fprintf (stream,
21291 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
21292 size, align2);
21293
21294 #ifdef HAVE_GAS_HIDDEN
21295 if (decl != NULL)
21296 fputs (rs6000_xcoff_visibility (decl), stream);
21297 #endif
21298 putc ('\n', stream);
21299 }
21300
21301 /* This macro produces the initial definition of a object (variable) name.
21302 Because AIX assembler's .set command has unexpected semantics, we output
21303 all aliases as alternative labels in front of the definition. */
21304
21305 void
21306 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
21307 {
21308 struct declare_alias_data data = {file, false};
21309 RS6000_OUTPUT_BASENAME (file, name);
21310 fputs (":\n", file);
21311 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21312 &data, true);
21313 }
21314
21315 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
21316
21317 void
21318 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
21319 {
21320 fputs (integer_asm_op (size, FALSE), file);
21321 assemble_name (file, label);
21322 fputs ("-$", file);
21323 }
21324
21325 /* Output a symbol offset relative to the dbase for the current object.
21326 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
21327 signed offsets.
21328
21329 __gcc_unwind_dbase is embedded in all executables/libraries through
21330 libgcc/config/rs6000/crtdbase.S. */
21331
21332 void
21333 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
21334 {
21335 fputs (integer_asm_op (size, FALSE), file);
21336 assemble_name (file, label);
21337 fputs("-__gcc_unwind_dbase", file);
21338 }
21339
21340 #ifdef HAVE_AS_TLS
21341 static void
21342 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
21343 {
21344 rtx symbol;
21345 int flags;
21346 const char *symname;
21347
21348 default_encode_section_info (decl, rtl, first);
21349
21350 /* Careful not to prod global register variables. */
21351 if (!MEM_P (rtl))
21352 return;
21353 symbol = XEXP (rtl, 0);
21354 if (!SYMBOL_REF_P (symbol))
21355 return;
21356
21357 flags = SYMBOL_REF_FLAGS (symbol);
21358
21359 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
21360 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
21361
21362 SYMBOL_REF_FLAGS (symbol) = flags;
21363
21364 /* Append mapping class to extern decls. */
21365 symname = XSTR (symbol, 0);
21366 if (decl /* sync condition with assemble_external () */
21367 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
21368 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
21369 || TREE_CODE (decl) == FUNCTION_DECL)
21370 && symname[strlen (symname) - 1] != ']')
21371 {
21372 char *newname = (char *) alloca (strlen (symname) + 5);
21373 strcpy (newname, symname);
21374 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
21375 ? "[DS]" : "[UA]"));
21376 XSTR (symbol, 0) = ggc_strdup (newname);
21377 }
21378 }
21379 #endif /* HAVE_AS_TLS */
21380 #endif /* TARGET_XCOFF */
21381
21382 void
21383 rs6000_asm_weaken_decl (FILE *stream, tree decl,
21384 const char *name, const char *val)
21385 {
21386 fputs ("\t.weak\t", stream);
21387 RS6000_OUTPUT_BASENAME (stream, name);
21388 if (decl && TREE_CODE (decl) == FUNCTION_DECL
21389 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
21390 {
21391 if (TARGET_XCOFF)
21392 fputs ("[DS]", stream);
21393 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
21394 if (TARGET_XCOFF)
21395 fputs (rs6000_xcoff_visibility (decl), stream);
21396 #endif
21397 fputs ("\n\t.weak\t.", stream);
21398 RS6000_OUTPUT_BASENAME (stream, name);
21399 }
21400 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
21401 if (TARGET_XCOFF)
21402 fputs (rs6000_xcoff_visibility (decl), stream);
21403 #endif
21404 fputc ('\n', stream);
21405 if (val)
21406 {
21407 #ifdef ASM_OUTPUT_DEF
21408 ASM_OUTPUT_DEF (stream, name, val);
21409 #endif
21410 if (decl && TREE_CODE (decl) == FUNCTION_DECL
21411 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
21412 {
21413 fputs ("\t.set\t.", stream);
21414 RS6000_OUTPUT_BASENAME (stream, name);
21415 fputs (",.", stream);
21416 RS6000_OUTPUT_BASENAME (stream, val);
21417 fputc ('\n', stream);
21418 }
21419 }
21420 }
21421
21422
21423 /* Return true if INSN should not be copied. */
21424
21425 static bool
21426 rs6000_cannot_copy_insn_p (rtx_insn *insn)
21427 {
21428 return recog_memoized (insn) >= 0
21429 && get_attr_cannot_copy (insn);
21430 }
21431
21432 /* Compute a (partial) cost for rtx X. Return true if the complete
21433 cost has been computed, and false if subexpressions should be
21434 scanned. In either case, *TOTAL contains the cost result. */
21435
21436 static bool
21437 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
21438 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
21439 {
21440 int code = GET_CODE (x);
21441
21442 switch (code)
21443 {
21444 /* On the RS/6000, if it is valid in the insn, it is free. */
21445 case CONST_INT:
21446 if (((outer_code == SET
21447 || outer_code == PLUS
21448 || outer_code == MINUS)
21449 && (satisfies_constraint_I (x)
21450 || satisfies_constraint_L (x)))
21451 || (outer_code == AND
21452 && (satisfies_constraint_K (x)
21453 || (mode == SImode
21454 ? satisfies_constraint_L (x)
21455 : satisfies_constraint_J (x))))
21456 || ((outer_code == IOR || outer_code == XOR)
21457 && (satisfies_constraint_K (x)
21458 || (mode == SImode
21459 ? satisfies_constraint_L (x)
21460 : satisfies_constraint_J (x))))
21461 || outer_code == ASHIFT
21462 || outer_code == ASHIFTRT
21463 || outer_code == LSHIFTRT
21464 || outer_code == ROTATE
21465 || outer_code == ROTATERT
21466 || outer_code == ZERO_EXTRACT
21467 || (outer_code == MULT
21468 && satisfies_constraint_I (x))
21469 || ((outer_code == DIV || outer_code == UDIV
21470 || outer_code == MOD || outer_code == UMOD)
21471 && exact_log2 (INTVAL (x)) >= 0)
21472 || (outer_code == COMPARE
21473 && (satisfies_constraint_I (x)
21474 || satisfies_constraint_K (x)))
21475 || ((outer_code == EQ || outer_code == NE)
21476 && (satisfies_constraint_I (x)
21477 || satisfies_constraint_K (x)
21478 || (mode == SImode
21479 ? satisfies_constraint_L (x)
21480 : satisfies_constraint_J (x))))
21481 || (outer_code == GTU
21482 && satisfies_constraint_I (x))
21483 || (outer_code == LTU
21484 && satisfies_constraint_P (x)))
21485 {
21486 *total = 0;
21487 return true;
21488 }
21489 else if ((outer_code == PLUS
21490 && reg_or_add_cint_operand (x, mode))
21491 || (outer_code == MINUS
21492 && reg_or_sub_cint_operand (x, mode))
21493 || ((outer_code == SET
21494 || outer_code == IOR
21495 || outer_code == XOR)
21496 && (INTVAL (x)
21497 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
21498 {
21499 *total = COSTS_N_INSNS (1);
21500 return true;
21501 }
21502 /* FALLTHRU */
21503
21504 case CONST_DOUBLE:
21505 case CONST_WIDE_INT:
21506 case CONST:
21507 case HIGH:
21508 case SYMBOL_REF:
21509 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
21510 return true;
21511
21512 case MEM:
21513 /* When optimizing for size, MEM should be slightly more expensive
21514 than generating address, e.g., (plus (reg) (const)).
21515 L1 cache latency is about two instructions. */
21516 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
21517 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
21518 *total += COSTS_N_INSNS (100);
21519 return true;
21520
21521 case LABEL_REF:
21522 *total = 0;
21523 return true;
21524
21525 case PLUS:
21526 case MINUS:
21527 if (FLOAT_MODE_P (mode))
21528 *total = rs6000_cost->fp;
21529 else
21530 *total = COSTS_N_INSNS (1);
21531 return false;
21532
21533 case MULT:
21534 if (CONST_INT_P (XEXP (x, 1))
21535 && satisfies_constraint_I (XEXP (x, 1)))
21536 {
21537 if (INTVAL (XEXP (x, 1)) >= -256
21538 && INTVAL (XEXP (x, 1)) <= 255)
21539 *total = rs6000_cost->mulsi_const9;
21540 else
21541 *total = rs6000_cost->mulsi_const;
21542 }
21543 else if (mode == SFmode)
21544 *total = rs6000_cost->fp;
21545 else if (FLOAT_MODE_P (mode))
21546 *total = rs6000_cost->dmul;
21547 else if (mode == DImode)
21548 *total = rs6000_cost->muldi;
21549 else
21550 *total = rs6000_cost->mulsi;
21551 return false;
21552
21553 case FMA:
21554 if (mode == SFmode)
21555 *total = rs6000_cost->fp;
21556 else
21557 *total = rs6000_cost->dmul;
21558 break;
21559
21560 case DIV:
21561 case MOD:
21562 if (FLOAT_MODE_P (mode))
21563 {
21564 *total = mode == DFmode ? rs6000_cost->ddiv
21565 : rs6000_cost->sdiv;
21566 return false;
21567 }
21568 /* FALLTHRU */
21569
21570 case UDIV:
21571 case UMOD:
21572 if (CONST_INT_P (XEXP (x, 1))
21573 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
21574 {
21575 if (code == DIV || code == MOD)
21576 /* Shift, addze */
21577 *total = COSTS_N_INSNS (2);
21578 else
21579 /* Shift */
21580 *total = COSTS_N_INSNS (1);
21581 }
21582 else
21583 {
21584 if (GET_MODE (XEXP (x, 1)) == DImode)
21585 *total = rs6000_cost->divdi;
21586 else
21587 *total = rs6000_cost->divsi;
21588 }
21589 /* Add in shift and subtract for MOD unless we have a mod instruction. */
21590 if (!TARGET_MODULO && (code == MOD || code == UMOD))
21591 *total += COSTS_N_INSNS (2);
21592 return false;
21593
21594 case CTZ:
21595 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
21596 return false;
21597
21598 case FFS:
21599 *total = COSTS_N_INSNS (4);
21600 return false;
21601
21602 case POPCOUNT:
21603 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
21604 return false;
21605
21606 case PARITY:
21607 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
21608 return false;
21609
21610 case NOT:
21611 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
21612 *total = 0;
21613 else
21614 *total = COSTS_N_INSNS (1);
21615 return false;
21616
21617 case AND:
21618 if (CONST_INT_P (XEXP (x, 1)))
21619 {
21620 rtx left = XEXP (x, 0);
21621 rtx_code left_code = GET_CODE (left);
21622
21623 /* rotate-and-mask: 1 insn. */
21624 if ((left_code == ROTATE
21625 || left_code == ASHIFT
21626 || left_code == LSHIFTRT)
21627 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
21628 {
21629 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
21630 if (!CONST_INT_P (XEXP (left, 1)))
21631 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
21632 *total += COSTS_N_INSNS (1);
21633 return true;
21634 }
21635
21636 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
21637 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
21638 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
21639 || (val & 0xffff) == val
21640 || (val & 0xffff0000) == val
21641 || ((val & 0xffff) == 0 && mode == SImode))
21642 {
21643 *total = rtx_cost (left, mode, AND, 0, speed);
21644 *total += COSTS_N_INSNS (1);
21645 return true;
21646 }
21647
21648 /* 2 insns. */
21649 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
21650 {
21651 *total = rtx_cost (left, mode, AND, 0, speed);
21652 *total += COSTS_N_INSNS (2);
21653 return true;
21654 }
21655 }
21656
21657 *total = COSTS_N_INSNS (1);
21658 return false;
21659
21660 case IOR:
21661 /* FIXME */
21662 *total = COSTS_N_INSNS (1);
21663 return true;
21664
21665 case CLZ:
21666 case XOR:
21667 case ZERO_EXTRACT:
21668 *total = COSTS_N_INSNS (1);
21669 return false;
21670
21671 case ASHIFT:
21672 /* The EXTSWSLI instruction is a combined instruction. Don't count both
21673 the sign extend and shift separately within the insn. */
21674 if (TARGET_EXTSWSLI && mode == DImode
21675 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
21676 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
21677 {
21678 *total = 0;
21679 return false;
21680 }
21681 /* fall through */
21682
21683 case ASHIFTRT:
21684 case LSHIFTRT:
21685 case ROTATE:
21686 case ROTATERT:
21687 /* Handle mul_highpart. */
21688 if (outer_code == TRUNCATE
21689 && GET_CODE (XEXP (x, 0)) == MULT)
21690 {
21691 if (mode == DImode)
21692 *total = rs6000_cost->muldi;
21693 else
21694 *total = rs6000_cost->mulsi;
21695 return true;
21696 }
21697 else if (outer_code == AND)
21698 *total = 0;
21699 else
21700 *total = COSTS_N_INSNS (1);
21701 return false;
21702
21703 case SIGN_EXTEND:
21704 case ZERO_EXTEND:
21705 if (MEM_P (XEXP (x, 0)))
21706 *total = 0;
21707 else
21708 *total = COSTS_N_INSNS (1);
21709 return false;
21710
21711 case COMPARE:
21712 case NEG:
21713 case ABS:
21714 if (!FLOAT_MODE_P (mode))
21715 {
21716 *total = COSTS_N_INSNS (1);
21717 return false;
21718 }
21719 /* FALLTHRU */
21720
21721 case FLOAT:
21722 case UNSIGNED_FLOAT:
21723 case FIX:
21724 case UNSIGNED_FIX:
21725 case FLOAT_TRUNCATE:
21726 *total = rs6000_cost->fp;
21727 return false;
21728
21729 case FLOAT_EXTEND:
21730 if (mode == DFmode)
21731 *total = rs6000_cost->sfdf_convert;
21732 else
21733 *total = rs6000_cost->fp;
21734 return false;
21735
21736 case CALL:
21737 case IF_THEN_ELSE:
21738 if (!speed)
21739 {
21740 *total = COSTS_N_INSNS (1);
21741 return true;
21742 }
21743 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
21744 {
21745 *total = rs6000_cost->fp;
21746 return false;
21747 }
21748 break;
21749
21750 case NE:
21751 case EQ:
21752 case GTU:
21753 case LTU:
21754 /* Carry bit requires mode == Pmode.
21755 NEG or PLUS already counted so only add one. */
21756 if (mode == Pmode
21757 && (outer_code == NEG || outer_code == PLUS))
21758 {
21759 *total = COSTS_N_INSNS (1);
21760 return true;
21761 }
21762 /* FALLTHRU */
21763
21764 case GT:
21765 case LT:
21766 case UNORDERED:
21767 if (outer_code == SET)
21768 {
21769 if (XEXP (x, 1) == const0_rtx)
21770 {
21771 *total = COSTS_N_INSNS (2);
21772 return true;
21773 }
21774 else
21775 {
21776 *total = COSTS_N_INSNS (3);
21777 return false;
21778 }
21779 }
21780 /* CC COMPARE. */
21781 if (outer_code == COMPARE)
21782 {
21783 *total = 0;
21784 return true;
21785 }
21786 break;
21787
21788 case UNSPEC:
21789 if (XINT (x, 1) == UNSPEC_MMA_XXSETACCZ)
21790 {
21791 *total = 0;
21792 return true;
21793 }
21794 break;
21795
21796 default:
21797 break;
21798 }
21799
21800 return false;
21801 }
21802
21803 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
21804
21805 static bool
21806 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
21807 int opno, int *total, bool speed)
21808 {
21809 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
21810
21811 fprintf (stderr,
21812 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
21813 "opno = %d, total = %d, speed = %s, x:\n",
21814 ret ? "complete" : "scan inner",
21815 GET_MODE_NAME (mode),
21816 GET_RTX_NAME (outer_code),
21817 opno,
21818 *total,
21819 speed ? "true" : "false");
21820
21821 debug_rtx (x);
21822
21823 return ret;
21824 }
21825
21826 static int
21827 rs6000_insn_cost (rtx_insn *insn, bool speed)
21828 {
21829 if (recog_memoized (insn) < 0)
21830 return 0;
21831
21832 /* If we are optimizing for size, just use the length. */
21833 if (!speed)
21834 return get_attr_length (insn);
21835
21836 /* Use the cost if provided. */
21837 int cost = get_attr_cost (insn);
21838 if (cost > 0)
21839 return cost;
21840
21841 /* If the insn tells us how many insns there are, use that. Otherwise use
21842 the length/4. Adjust the insn length to remove the extra size that
21843 prefixed instructions take. */
21844 int n = get_attr_num_insns (insn);
21845 if (n == 0)
21846 {
21847 int length = get_attr_length (insn);
21848 if (get_attr_prefixed (insn) == PREFIXED_YES)
21849 {
21850 int adjust = 0;
21851 ADJUST_INSN_LENGTH (insn, adjust);
21852 length -= adjust;
21853 }
21854
21855 n = length / 4;
21856 }
21857
21858 enum attr_type type = get_attr_type (insn);
21859
21860 switch (type)
21861 {
21862 case TYPE_LOAD:
21863 case TYPE_FPLOAD:
21864 case TYPE_VECLOAD:
21865 cost = COSTS_N_INSNS (n + 1);
21866 break;
21867
21868 case TYPE_MUL:
21869 switch (get_attr_size (insn))
21870 {
21871 case SIZE_8:
21872 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
21873 break;
21874 case SIZE_16:
21875 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
21876 break;
21877 case SIZE_32:
21878 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
21879 break;
21880 case SIZE_64:
21881 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
21882 break;
21883 default:
21884 gcc_unreachable ();
21885 }
21886 break;
21887 case TYPE_DIV:
21888 switch (get_attr_size (insn))
21889 {
21890 case SIZE_32:
21891 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
21892 break;
21893 case SIZE_64:
21894 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
21895 break;
21896 default:
21897 gcc_unreachable ();
21898 }
21899 break;
21900
21901 case TYPE_FP:
21902 cost = n * rs6000_cost->fp;
21903 break;
21904 case TYPE_DMUL:
21905 cost = n * rs6000_cost->dmul;
21906 break;
21907 case TYPE_SDIV:
21908 cost = n * rs6000_cost->sdiv;
21909 break;
21910 case TYPE_DDIV:
21911 cost = n * rs6000_cost->ddiv;
21912 break;
21913
21914 case TYPE_SYNC:
21915 case TYPE_LOAD_L:
21916 case TYPE_MFCR:
21917 case TYPE_MFCRF:
21918 cost = COSTS_N_INSNS (n + 2);
21919 break;
21920
21921 default:
21922 cost = COSTS_N_INSNS (n);
21923 }
21924
21925 return cost;
21926 }
21927
21928 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
21929
21930 static int
21931 rs6000_debug_address_cost (rtx x, machine_mode mode,
21932 addr_space_t as, bool speed)
21933 {
21934 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
21935
21936 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
21937 ret, speed ? "true" : "false");
21938 debug_rtx (x);
21939
21940 return ret;
21941 }
21942
21943
21944 /* A C expression returning the cost of moving data from a register of class
21945 CLASS1 to one of CLASS2. */
21946
21947 static int
21948 rs6000_register_move_cost (machine_mode mode,
21949 reg_class_t from, reg_class_t to)
21950 {
21951 int ret;
21952 reg_class_t rclass;
21953
21954 if (TARGET_DEBUG_COST)
21955 dbg_cost_ctrl++;
21956
21957 /* If we have VSX, we can easily move between FPR or Altivec registers,
21958 otherwise we can only easily move within classes.
21959 Do this first so we give best-case answers for union classes
21960 containing both gprs and vsx regs. */
21961 HARD_REG_SET to_vsx, from_vsx;
21962 to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS];
21963 from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS];
21964 if (!hard_reg_set_empty_p (to_vsx)
21965 && !hard_reg_set_empty_p (from_vsx)
21966 && (TARGET_VSX
21967 || hard_reg_set_intersect_p (to_vsx, from_vsx)))
21968 {
21969 int reg = FIRST_FPR_REGNO;
21970 if (TARGET_VSX
21971 || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
21972 && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
21973 reg = FIRST_ALTIVEC_REGNO;
21974 ret = 2 * hard_regno_nregs (reg, mode);
21975 }
21976
21977 /* Moves from/to GENERAL_REGS. */
21978 else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
21979 || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
21980 {
21981 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
21982 {
21983 if (TARGET_DIRECT_MOVE)
21984 {
21985 /* Keep the cost for direct moves above that for within
21986 a register class even if the actual processor cost is
21987 comparable. We do this because a direct move insn
21988 can't be a nop, whereas with ideal register
21989 allocation a move within the same class might turn
21990 out to be a nop. */
21991 if (rs6000_tune == PROCESSOR_POWER9
21992 || rs6000_tune == PROCESSOR_POWER10)
21993 ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21994 else
21995 ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21996 /* SFmode requires a conversion when moving between gprs
21997 and vsx. */
21998 if (mode == SFmode)
21999 ret += 2;
22000 }
22001 else
22002 ret = (rs6000_memory_move_cost (mode, rclass, false)
22003 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
22004 }
22005
22006 /* It's more expensive to move CR_REGS than CR0_REGS because of the
22007 shift. */
22008 else if (rclass == CR_REGS)
22009 ret = 4;
22010
22011 /* For those processors that have slow LR/CTR moves, make them more
22012 expensive than memory in order to bias spills to memory .*/
22013 else if ((rs6000_tune == PROCESSOR_POWER6
22014 || rs6000_tune == PROCESSOR_POWER7
22015 || rs6000_tune == PROCESSOR_POWER8
22016 || rs6000_tune == PROCESSOR_POWER9)
22017 && reg_class_subset_p (rclass, SPECIAL_REGS))
22018 ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22019
22020 else
22021 /* A move will cost one instruction per GPR moved. */
22022 ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22023 }
22024
22025 /* Everything else has to go through GENERAL_REGS. */
22026 else
22027 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
22028 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
22029
22030 if (TARGET_DEBUG_COST)
22031 {
22032 if (dbg_cost_ctrl == 1)
22033 fprintf (stderr,
22034 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
22035 ret, GET_MODE_NAME (mode), reg_class_names[from],
22036 reg_class_names[to]);
22037 dbg_cost_ctrl--;
22038 }
22039
22040 return ret;
22041 }
22042
22043 /* A C expressions returning the cost of moving data of MODE from a register to
22044 or from memory. */
22045
22046 static int
22047 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
22048 bool in ATTRIBUTE_UNUSED)
22049 {
22050 int ret;
22051
22052 if (TARGET_DEBUG_COST)
22053 dbg_cost_ctrl++;
22054
22055 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
22056 ret = 4 * hard_regno_nregs (0, mode);
22057 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
22058 || reg_classes_intersect_p (rclass, VSX_REGS)))
22059 ret = 4 * hard_regno_nregs (32, mode);
22060 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
22061 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
22062 else
22063 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
22064
22065 if (TARGET_DEBUG_COST)
22066 {
22067 if (dbg_cost_ctrl == 1)
22068 fprintf (stderr,
22069 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
22070 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
22071 dbg_cost_ctrl--;
22072 }
22073
22074 return ret;
22075 }
22076
22077 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
22078
22079 The register allocator chooses GEN_OR_VSX_REGS for the allocno
22080 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
22081 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
22082 move cost between GENERAL_REGS and VSX_REGS low.
22083
22084 It might seem reasonable to use a union class. After all, if usage
22085 of vsr is low and gpr high, it might make sense to spill gpr to vsr
22086 rather than memory. However, in cases where register pressure of
22087 both is high, like the cactus_adm spec test, allowing
22088 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
22089 the first scheduling pass. This is partly due to an allocno of
22090 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
22091 class, which gives too high a pressure for GENERAL_REGS and too low
22092 for VSX_REGS. So, force a choice of the subclass here.
22093
22094 The best class is also the union if GENERAL_REGS and VSX_REGS have
22095 the same cost. In that case we do use GEN_OR_VSX_REGS as the
22096 allocno class, since trying to narrow down the class by regno mode
22097 is prone to error. For example, SImode is allowed in VSX regs and
22098 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
22099 it would be wrong to choose an allocno of GENERAL_REGS based on
22100 SImode. */
22101
22102 static reg_class_t
22103 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
22104 reg_class_t allocno_class,
22105 reg_class_t best_class)
22106 {
22107 switch (allocno_class)
22108 {
22109 case GEN_OR_VSX_REGS:
22110 /* best_class must be a subset of allocno_class. */
22111 gcc_checking_assert (best_class == GEN_OR_VSX_REGS
22112 || best_class == GEN_OR_FLOAT_REGS
22113 || best_class == VSX_REGS
22114 || best_class == ALTIVEC_REGS
22115 || best_class == FLOAT_REGS
22116 || best_class == GENERAL_REGS
22117 || best_class == BASE_REGS);
22118 /* Use best_class but choose wider classes when copying from the
22119 wider class to best_class is cheap. This mimics IRA choice
22120 of allocno class. */
22121 if (best_class == BASE_REGS)
22122 return GENERAL_REGS;
22123 if (TARGET_VSX
22124 && (best_class == FLOAT_REGS || best_class == ALTIVEC_REGS))
22125 return VSX_REGS;
22126 return best_class;
22127
22128 default:
22129 break;
22130 }
22131
22132 return allocno_class;
22133 }
22134
22135 /* Returns a code for a target-specific builtin that implements
22136 reciprocal of the function, or NULL_TREE if not available. */
22137
22138 static tree
22139 rs6000_builtin_reciprocal (tree fndecl)
22140 {
22141 switch (DECL_MD_FUNCTION_CODE (fndecl))
22142 {
22143 case VSX_BUILTIN_XVSQRTDP:
22144 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
22145 return NULL_TREE;
22146
22147 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
22148
22149 case VSX_BUILTIN_XVSQRTSP:
22150 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
22151 return NULL_TREE;
22152
22153 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
22154
22155 default:
22156 return NULL_TREE;
22157 }
22158 }
22159
22160 /* Load up a constant. If the mode is a vector mode, splat the value across
22161 all of the vector elements. */
22162
22163 static rtx
22164 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
22165 {
22166 rtx reg;
22167
22168 if (mode == SFmode || mode == DFmode)
22169 {
22170 rtx d = const_double_from_real_value (dconst, mode);
22171 reg = force_reg (mode, d);
22172 }
22173 else if (mode == V4SFmode)
22174 {
22175 rtx d = const_double_from_real_value (dconst, SFmode);
22176 rtvec v = gen_rtvec (4, d, d, d, d);
22177 reg = gen_reg_rtx (mode);
22178 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
22179 }
22180 else if (mode == V2DFmode)
22181 {
22182 rtx d = const_double_from_real_value (dconst, DFmode);
22183 rtvec v = gen_rtvec (2, d, d);
22184 reg = gen_reg_rtx (mode);
22185 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
22186 }
22187 else
22188 gcc_unreachable ();
22189
22190 return reg;
22191 }
22192
22193 /* Generate an FMA instruction. */
22194
22195 static void
22196 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
22197 {
22198 machine_mode mode = GET_MODE (target);
22199 rtx dst;
22200
22201 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
22202 gcc_assert (dst != NULL);
22203
22204 if (dst != target)
22205 emit_move_insn (target, dst);
22206 }
22207
22208 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
22209
22210 static void
22211 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
22212 {
22213 machine_mode mode = GET_MODE (dst);
22214 rtx r;
22215
22216 /* This is a tad more complicated, since the fnma_optab is for
22217 a different expression: fma(-m1, m2, a), which is the same
22218 thing except in the case of signed zeros.
22219
22220 Fortunately we know that if FMA is supported that FNMSUB is
22221 also supported in the ISA. Just expand it directly. */
22222
22223 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
22224
22225 r = gen_rtx_NEG (mode, a);
22226 r = gen_rtx_FMA (mode, m1, m2, r);
22227 r = gen_rtx_NEG (mode, r);
22228 emit_insn (gen_rtx_SET (dst, r));
22229 }
22230
22231 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
22232 add a reg_note saying that this was a division. Support both scalar and
22233 vector divide. Assumes no trapping math and finite arguments. */
22234
22235 void
22236 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
22237 {
22238 machine_mode mode = GET_MODE (dst);
22239 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
22240 int i;
22241
22242 /* Low precision estimates guarantee 5 bits of accuracy. High
22243 precision estimates guarantee 14 bits of accuracy. SFmode
22244 requires 23 bits of accuracy. DFmode requires 52 bits of
22245 accuracy. Each pass at least doubles the accuracy, leading
22246 to the following. */
22247 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
22248 if (mode == DFmode || mode == V2DFmode)
22249 passes++;
22250
22251 enum insn_code code = optab_handler (smul_optab, mode);
22252 insn_gen_fn gen_mul = GEN_FCN (code);
22253
22254 gcc_assert (code != CODE_FOR_nothing);
22255
22256 one = rs6000_load_constant_and_splat (mode, dconst1);
22257
22258 /* x0 = 1./d estimate */
22259 x0 = gen_reg_rtx (mode);
22260 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
22261 UNSPEC_FRES)));
22262
22263 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
22264 if (passes > 1) {
22265
22266 /* e0 = 1. - d * x0 */
22267 e0 = gen_reg_rtx (mode);
22268 rs6000_emit_nmsub (e0, d, x0, one);
22269
22270 /* x1 = x0 + e0 * x0 */
22271 x1 = gen_reg_rtx (mode);
22272 rs6000_emit_madd (x1, e0, x0, x0);
22273
22274 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
22275 ++i, xprev = xnext, eprev = enext) {
22276
22277 /* enext = eprev * eprev */
22278 enext = gen_reg_rtx (mode);
22279 emit_insn (gen_mul (enext, eprev, eprev));
22280
22281 /* xnext = xprev + enext * xprev */
22282 xnext = gen_reg_rtx (mode);
22283 rs6000_emit_madd (xnext, enext, xprev, xprev);
22284 }
22285
22286 } else
22287 xprev = x0;
22288
22289 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
22290
22291 /* u = n * xprev */
22292 u = gen_reg_rtx (mode);
22293 emit_insn (gen_mul (u, n, xprev));
22294
22295 /* v = n - (d * u) */
22296 v = gen_reg_rtx (mode);
22297 rs6000_emit_nmsub (v, d, u, n);
22298
22299 /* dst = (v * xprev) + u */
22300 rs6000_emit_madd (dst, v, xprev, u);
22301
22302 if (note_p)
22303 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
22304 }
22305
22306 /* Goldschmidt's Algorithm for single/double-precision floating point
22307 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
22308
22309 void
22310 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
22311 {
22312 machine_mode mode = GET_MODE (src);
22313 rtx e = gen_reg_rtx (mode);
22314 rtx g = gen_reg_rtx (mode);
22315 rtx h = gen_reg_rtx (mode);
22316
22317 /* Low precision estimates guarantee 5 bits of accuracy. High
22318 precision estimates guarantee 14 bits of accuracy. SFmode
22319 requires 23 bits of accuracy. DFmode requires 52 bits of
22320 accuracy. Each pass at least doubles the accuracy, leading
22321 to the following. */
22322 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
22323 if (mode == DFmode || mode == V2DFmode)
22324 passes++;
22325
22326 int i;
22327 rtx mhalf;
22328 enum insn_code code = optab_handler (smul_optab, mode);
22329 insn_gen_fn gen_mul = GEN_FCN (code);
22330
22331 gcc_assert (code != CODE_FOR_nothing);
22332
22333 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
22334
22335 /* e = rsqrt estimate */
22336 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
22337 UNSPEC_RSQRT)));
22338
22339 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
22340 if (!recip)
22341 {
22342 rtx zero = force_reg (mode, CONST0_RTX (mode));
22343
22344 if (mode == SFmode)
22345 {
22346 rtx target = emit_conditional_move (e, GT, src, zero, mode,
22347 e, zero, mode, 0);
22348 if (target != e)
22349 emit_move_insn (e, target);
22350 }
22351 else
22352 {
22353 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
22354 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
22355 }
22356 }
22357
22358 /* g = sqrt estimate. */
22359 emit_insn (gen_mul (g, e, src));
22360 /* h = 1/(2*sqrt) estimate. */
22361 emit_insn (gen_mul (h, e, mhalf));
22362
22363 if (recip)
22364 {
22365 if (passes == 1)
22366 {
22367 rtx t = gen_reg_rtx (mode);
22368 rs6000_emit_nmsub (t, g, h, mhalf);
22369 /* Apply correction directly to 1/rsqrt estimate. */
22370 rs6000_emit_madd (dst, e, t, e);
22371 }
22372 else
22373 {
22374 for (i = 0; i < passes; i++)
22375 {
22376 rtx t1 = gen_reg_rtx (mode);
22377 rtx g1 = gen_reg_rtx (mode);
22378 rtx h1 = gen_reg_rtx (mode);
22379
22380 rs6000_emit_nmsub (t1, g, h, mhalf);
22381 rs6000_emit_madd (g1, g, t1, g);
22382 rs6000_emit_madd (h1, h, t1, h);
22383
22384 g = g1;
22385 h = h1;
22386 }
22387 /* Multiply by 2 for 1/rsqrt. */
22388 emit_insn (gen_add3_insn (dst, h, h));
22389 }
22390 }
22391 else
22392 {
22393 rtx t = gen_reg_rtx (mode);
22394 rs6000_emit_nmsub (t, g, h, mhalf);
22395 rs6000_emit_madd (dst, g, t, g);
22396 }
22397
22398 return;
22399 }
22400
22401 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
22402 (Power7) targets. DST is the target, and SRC is the argument operand. */
22403
22404 void
22405 rs6000_emit_popcount (rtx dst, rtx src)
22406 {
22407 machine_mode mode = GET_MODE (dst);
22408 rtx tmp1, tmp2;
22409
22410 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
22411 if (TARGET_POPCNTD)
22412 {
22413 if (mode == SImode)
22414 emit_insn (gen_popcntdsi2 (dst, src));
22415 else
22416 emit_insn (gen_popcntddi2 (dst, src));
22417 return;
22418 }
22419
22420 tmp1 = gen_reg_rtx (mode);
22421
22422 if (mode == SImode)
22423 {
22424 emit_insn (gen_popcntbsi2 (tmp1, src));
22425 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
22426 NULL_RTX, 0);
22427 tmp2 = force_reg (SImode, tmp2);
22428 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
22429 }
22430 else
22431 {
22432 emit_insn (gen_popcntbdi2 (tmp1, src));
22433 tmp2 = expand_mult (DImode, tmp1,
22434 GEN_INT ((HOST_WIDE_INT)
22435 0x01010101 << 32 | 0x01010101),
22436 NULL_RTX, 0);
22437 tmp2 = force_reg (DImode, tmp2);
22438 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
22439 }
22440 }
22441
22442
22443 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
22444 target, and SRC is the argument operand. */
22445
22446 void
22447 rs6000_emit_parity (rtx dst, rtx src)
22448 {
22449 machine_mode mode = GET_MODE (dst);
22450 rtx tmp;
22451
22452 tmp = gen_reg_rtx (mode);
22453
22454 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
22455 if (TARGET_CMPB)
22456 {
22457 if (mode == SImode)
22458 {
22459 emit_insn (gen_popcntbsi2 (tmp, src));
22460 emit_insn (gen_paritysi2_cmpb (dst, tmp));
22461 }
22462 else
22463 {
22464 emit_insn (gen_popcntbdi2 (tmp, src));
22465 emit_insn (gen_paritydi2_cmpb (dst, tmp));
22466 }
22467 return;
22468 }
22469
22470 if (mode == SImode)
22471 {
22472 /* Is mult+shift >= shift+xor+shift+xor? */
22473 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
22474 {
22475 rtx tmp1, tmp2, tmp3, tmp4;
22476
22477 tmp1 = gen_reg_rtx (SImode);
22478 emit_insn (gen_popcntbsi2 (tmp1, src));
22479
22480 tmp2 = gen_reg_rtx (SImode);
22481 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
22482 tmp3 = gen_reg_rtx (SImode);
22483 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
22484
22485 tmp4 = gen_reg_rtx (SImode);
22486 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
22487 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
22488 }
22489 else
22490 rs6000_emit_popcount (tmp, src);
22491 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
22492 }
22493 else
22494 {
22495 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
22496 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
22497 {
22498 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
22499
22500 tmp1 = gen_reg_rtx (DImode);
22501 emit_insn (gen_popcntbdi2 (tmp1, src));
22502
22503 tmp2 = gen_reg_rtx (DImode);
22504 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
22505 tmp3 = gen_reg_rtx (DImode);
22506 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
22507
22508 tmp4 = gen_reg_rtx (DImode);
22509 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
22510 tmp5 = gen_reg_rtx (DImode);
22511 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
22512
22513 tmp6 = gen_reg_rtx (DImode);
22514 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
22515 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
22516 }
22517 else
22518 rs6000_emit_popcount (tmp, src);
22519 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
22520 }
22521 }
22522
22523 /* Expand an Altivec constant permutation for little endian mode.
22524 OP0 and OP1 are the input vectors and TARGET is the output vector.
22525 SEL specifies the constant permutation vector.
22526
22527 There are two issues: First, the two input operands must be
22528 swapped so that together they form a double-wide array in LE
22529 order. Second, the vperm instruction has surprising behavior
22530 in LE mode: it interprets the elements of the source vectors
22531 in BE mode ("left to right") and interprets the elements of
22532 the destination vector in LE mode ("right to left"). To
22533 correct for this, we must subtract each element of the permute
22534 control vector from 31.
22535
22536 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
22537 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
22538 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
22539 serve as the permute control vector. Then, in BE mode,
22540
22541 vperm 9,10,11,12
22542
22543 places the desired result in vr9. However, in LE mode the
22544 vector contents will be
22545
22546 vr10 = 00000003 00000002 00000001 00000000
22547 vr11 = 00000007 00000006 00000005 00000004
22548
22549 The result of the vperm using the same permute control vector is
22550
22551 vr9 = 05000000 07000000 01000000 03000000
22552
22553 That is, the leftmost 4 bytes of vr10 are interpreted as the
22554 source for the rightmost 4 bytes of vr9, and so on.
22555
22556 If we change the permute control vector to
22557
22558 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
22559
22560 and issue
22561
22562 vperm 9,11,10,12
22563
22564 we get the desired
22565
22566 vr9 = 00000006 00000004 00000002 00000000. */
22567
22568 static void
22569 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
22570 const vec_perm_indices &sel)
22571 {
22572 unsigned int i;
22573 rtx perm[16];
22574 rtx constv, unspec;
22575
22576 /* Unpack and adjust the constant selector. */
22577 for (i = 0; i < 16; ++i)
22578 {
22579 unsigned int elt = 31 - (sel[i] & 31);
22580 perm[i] = GEN_INT (elt);
22581 }
22582
22583 /* Expand to a permute, swapping the inputs and using the
22584 adjusted selector. */
22585 if (!REG_P (op0))
22586 op0 = force_reg (V16QImode, op0);
22587 if (!REG_P (op1))
22588 op1 = force_reg (V16QImode, op1);
22589
22590 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
22591 constv = force_reg (V16QImode, constv);
22592 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
22593 UNSPEC_VPERM);
22594 if (!REG_P (target))
22595 {
22596 rtx tmp = gen_reg_rtx (V16QImode);
22597 emit_move_insn (tmp, unspec);
22598 unspec = tmp;
22599 }
22600
22601 emit_move_insn (target, unspec);
22602 }
22603
22604 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
22605 permute control vector. But here it's not a constant, so we must
22606 generate a vector NAND or NOR to do the adjustment. */
22607
22608 void
22609 altivec_expand_vec_perm_le (rtx operands[4])
22610 {
22611 rtx notx, iorx, unspec;
22612 rtx target = operands[0];
22613 rtx op0 = operands[1];
22614 rtx op1 = operands[2];
22615 rtx sel = operands[3];
22616 rtx tmp = target;
22617 rtx norreg = gen_reg_rtx (V16QImode);
22618 machine_mode mode = GET_MODE (target);
22619
22620 /* Get everything in regs so the pattern matches. */
22621 if (!REG_P (op0))
22622 op0 = force_reg (mode, op0);
22623 if (!REG_P (op1))
22624 op1 = force_reg (mode, op1);
22625 if (!REG_P (sel))
22626 sel = force_reg (V16QImode, sel);
22627 if (!REG_P (target))
22628 tmp = gen_reg_rtx (mode);
22629
22630 if (TARGET_P9_VECTOR)
22631 {
22632 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
22633 UNSPEC_VPERMR);
22634 }
22635 else
22636 {
22637 /* Invert the selector with a VNAND if available, else a VNOR.
22638 The VNAND is preferred for future fusion opportunities. */
22639 notx = gen_rtx_NOT (V16QImode, sel);
22640 iorx = (TARGET_P8_VECTOR
22641 ? gen_rtx_IOR (V16QImode, notx, notx)
22642 : gen_rtx_AND (V16QImode, notx, notx));
22643 emit_insn (gen_rtx_SET (norreg, iorx));
22644
22645 /* Permute with operands reversed and adjusted selector. */
22646 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
22647 UNSPEC_VPERM);
22648 }
22649
22650 /* Copy into target, possibly by way of a register. */
22651 if (!REG_P (target))
22652 {
22653 emit_move_insn (tmp, unspec);
22654 unspec = tmp;
22655 }
22656
22657 emit_move_insn (target, unspec);
22658 }
22659
22660 /* Expand an Altivec constant permutation. Return true if we match
22661 an efficient implementation; false to fall back to VPERM.
22662
22663 OP0 and OP1 are the input vectors and TARGET is the output vector.
22664 SEL specifies the constant permutation vector. */
22665
22666 static bool
22667 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
22668 const vec_perm_indices &sel)
22669 {
22670 struct altivec_perm_insn {
22671 HOST_WIDE_INT mask;
22672 enum insn_code impl;
22673 unsigned char perm[16];
22674 };
22675 static const struct altivec_perm_insn patterns[] = {
22676 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
22677 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
22678 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
22679 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
22680 { OPTION_MASK_ALTIVEC,
22681 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
22682 : CODE_FOR_altivec_vmrglb_direct),
22683 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
22684 { OPTION_MASK_ALTIVEC,
22685 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
22686 : CODE_FOR_altivec_vmrglh_direct),
22687 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
22688 { OPTION_MASK_ALTIVEC,
22689 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
22690 : CODE_FOR_altivec_vmrglw_direct),
22691 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
22692 { OPTION_MASK_ALTIVEC,
22693 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
22694 : CODE_FOR_altivec_vmrghb_direct),
22695 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
22696 { OPTION_MASK_ALTIVEC,
22697 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
22698 : CODE_FOR_altivec_vmrghh_direct),
22699 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
22700 { OPTION_MASK_ALTIVEC,
22701 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
22702 : CODE_FOR_altivec_vmrghw_direct),
22703 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
22704 { OPTION_MASK_P8_VECTOR,
22705 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
22706 : CODE_FOR_p8_vmrgow_v4sf_direct),
22707 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
22708 { OPTION_MASK_P8_VECTOR,
22709 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
22710 : CODE_FOR_p8_vmrgew_v4sf_direct),
22711 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
22712 };
22713
22714 unsigned int i, j, elt, which;
22715 unsigned char perm[16];
22716 rtx x;
22717 bool one_vec;
22718
22719 /* Unpack the constant selector. */
22720 for (i = which = 0; i < 16; ++i)
22721 {
22722 elt = sel[i] & 31;
22723 which |= (elt < 16 ? 1 : 2);
22724 perm[i] = elt;
22725 }
22726
22727 /* Simplify the constant selector based on operands. */
22728 switch (which)
22729 {
22730 default:
22731 gcc_unreachable ();
22732
22733 case 3:
22734 one_vec = false;
22735 if (!rtx_equal_p (op0, op1))
22736 break;
22737 /* FALLTHRU */
22738
22739 case 2:
22740 for (i = 0; i < 16; ++i)
22741 perm[i] &= 15;
22742 op0 = op1;
22743 one_vec = true;
22744 break;
22745
22746 case 1:
22747 op1 = op0;
22748 one_vec = true;
22749 break;
22750 }
22751
22752 /* Look for splat patterns. */
22753 if (one_vec)
22754 {
22755 elt = perm[0];
22756
22757 for (i = 0; i < 16; ++i)
22758 if (perm[i] != elt)
22759 break;
22760 if (i == 16)
22761 {
22762 if (!BYTES_BIG_ENDIAN)
22763 elt = 15 - elt;
22764 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
22765 return true;
22766 }
22767
22768 if (elt % 2 == 0)
22769 {
22770 for (i = 0; i < 16; i += 2)
22771 if (perm[i] != elt || perm[i + 1] != elt + 1)
22772 break;
22773 if (i == 16)
22774 {
22775 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
22776 x = gen_reg_rtx (V8HImode);
22777 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
22778 GEN_INT (field)));
22779 emit_move_insn (target, gen_lowpart (V16QImode, x));
22780 return true;
22781 }
22782 }
22783
22784 if (elt % 4 == 0)
22785 {
22786 for (i = 0; i < 16; i += 4)
22787 if (perm[i] != elt
22788 || perm[i + 1] != elt + 1
22789 || perm[i + 2] != elt + 2
22790 || perm[i + 3] != elt + 3)
22791 break;
22792 if (i == 16)
22793 {
22794 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
22795 x = gen_reg_rtx (V4SImode);
22796 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
22797 GEN_INT (field)));
22798 emit_move_insn (target, gen_lowpart (V16QImode, x));
22799 return true;
22800 }
22801 }
22802 }
22803
22804 /* Look for merge and pack patterns. */
22805 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
22806 {
22807 bool swapped;
22808
22809 if ((patterns[j].mask & rs6000_isa_flags) == 0)
22810 continue;
22811
22812 elt = patterns[j].perm[0];
22813 if (perm[0] == elt)
22814 swapped = false;
22815 else if (perm[0] == elt + 16)
22816 swapped = true;
22817 else
22818 continue;
22819 for (i = 1; i < 16; ++i)
22820 {
22821 elt = patterns[j].perm[i];
22822 if (swapped)
22823 elt = (elt >= 16 ? elt - 16 : elt + 16);
22824 else if (one_vec && elt >= 16)
22825 elt -= 16;
22826 if (perm[i] != elt)
22827 break;
22828 }
22829 if (i == 16)
22830 {
22831 enum insn_code icode = patterns[j].impl;
22832 machine_mode omode = insn_data[icode].operand[0].mode;
22833 machine_mode imode = insn_data[icode].operand[1].mode;
22834
22835 /* For little-endian, don't use vpkuwum and vpkuhum if the
22836 underlying vector type is not V4SI and V8HI, respectively.
22837 For example, using vpkuwum with a V8HI picks up the even
22838 halfwords (BE numbering) when the even halfwords (LE
22839 numbering) are what we need. */
22840 if (!BYTES_BIG_ENDIAN
22841 && icode == CODE_FOR_altivec_vpkuwum_direct
22842 && ((REG_P (op0)
22843 && GET_MODE (op0) != V4SImode)
22844 || (SUBREG_P (op0)
22845 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
22846 continue;
22847 if (!BYTES_BIG_ENDIAN
22848 && icode == CODE_FOR_altivec_vpkuhum_direct
22849 && ((REG_P (op0)
22850 && GET_MODE (op0) != V8HImode)
22851 || (SUBREG_P (op0)
22852 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
22853 continue;
22854
22855 /* For little-endian, the two input operands must be swapped
22856 (or swapped back) to ensure proper right-to-left numbering
22857 from 0 to 2N-1. */
22858 if (swapped ^ !BYTES_BIG_ENDIAN)
22859 std::swap (op0, op1);
22860 if (imode != V16QImode)
22861 {
22862 op0 = gen_lowpart (imode, op0);
22863 op1 = gen_lowpart (imode, op1);
22864 }
22865 if (omode == V16QImode)
22866 x = target;
22867 else
22868 x = gen_reg_rtx (omode);
22869 emit_insn (GEN_FCN (icode) (x, op0, op1));
22870 if (omode != V16QImode)
22871 emit_move_insn (target, gen_lowpart (V16QImode, x));
22872 return true;
22873 }
22874 }
22875
22876 if (!BYTES_BIG_ENDIAN)
22877 {
22878 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
22879 return true;
22880 }
22881
22882 return false;
22883 }
22884
22885 /* Expand a VSX Permute Doubleword constant permutation.
22886 Return true if we match an efficient implementation. */
22887
22888 static bool
22889 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
22890 unsigned char perm0, unsigned char perm1)
22891 {
22892 rtx x;
22893
22894 /* If both selectors come from the same operand, fold to single op. */
22895 if ((perm0 & 2) == (perm1 & 2))
22896 {
22897 if (perm0 & 2)
22898 op0 = op1;
22899 else
22900 op1 = op0;
22901 }
22902 /* If both operands are equal, fold to simpler permutation. */
22903 if (rtx_equal_p (op0, op1))
22904 {
22905 perm0 = perm0 & 1;
22906 perm1 = (perm1 & 1) + 2;
22907 }
22908 /* If the first selector comes from the second operand, swap. */
22909 else if (perm0 & 2)
22910 {
22911 if (perm1 & 2)
22912 return false;
22913 perm0 -= 2;
22914 perm1 += 2;
22915 std::swap (op0, op1);
22916 }
22917 /* If the second selector does not come from the second operand, fail. */
22918 else if ((perm1 & 2) == 0)
22919 return false;
22920
22921 /* Success! */
22922 if (target != NULL)
22923 {
22924 machine_mode vmode, dmode;
22925 rtvec v;
22926
22927 vmode = GET_MODE (target);
22928 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
22929 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
22930 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
22931 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
22932 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
22933 emit_insn (gen_rtx_SET (target, x));
22934 }
22935 return true;
22936 }
22937
22938 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
22939
22940 static bool
22941 rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
22942 rtx op1, const vec_perm_indices &sel)
22943 {
22944 bool testing_p = !target;
22945
22946 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
22947 if (TARGET_ALTIVEC && testing_p)
22948 return true;
22949
22950 /* Check for ps_merge* or xxpermdi insns. */
22951 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
22952 {
22953 if (testing_p)
22954 {
22955 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
22956 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
22957 }
22958 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
22959 return true;
22960 }
22961
22962 if (TARGET_ALTIVEC)
22963 {
22964 /* Force the target-independent code to lower to V16QImode. */
22965 if (vmode != V16QImode)
22966 return false;
22967 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
22968 return true;
22969 }
22970
22971 return false;
22972 }
22973
22974 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
22975 OP0 and OP1 are the input vectors and TARGET is the output vector.
22976 PERM specifies the constant permutation vector. */
22977
22978 static void
22979 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
22980 machine_mode vmode, const vec_perm_builder &perm)
22981 {
22982 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
22983 if (x != target)
22984 emit_move_insn (target, x);
22985 }
22986
22987 /* Expand an extract even operation. */
22988
22989 void
22990 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
22991 {
22992 machine_mode vmode = GET_MODE (target);
22993 unsigned i, nelt = GET_MODE_NUNITS (vmode);
22994 vec_perm_builder perm (nelt, nelt, 1);
22995
22996 for (i = 0; i < nelt; i++)
22997 perm.quick_push (i * 2);
22998
22999 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
23000 }
23001
23002 /* Expand a vector interleave operation. */
23003
23004 void
23005 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
23006 {
23007 machine_mode vmode = GET_MODE (target);
23008 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
23009 vec_perm_builder perm (nelt, nelt, 1);
23010
23011 high = (highp ? 0 : nelt / 2);
23012 for (i = 0; i < nelt / 2; i++)
23013 {
23014 perm.quick_push (i + high);
23015 perm.quick_push (i + nelt + high);
23016 }
23017
23018 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
23019 }
23020
23021 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
23022 void
23023 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
23024 {
23025 HOST_WIDE_INT hwi_scale (scale);
23026 REAL_VALUE_TYPE r_pow;
23027 rtvec v = rtvec_alloc (2);
23028 rtx elt;
23029 rtx scale_vec = gen_reg_rtx (V2DFmode);
23030 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
23031 elt = const_double_from_real_value (r_pow, DFmode);
23032 RTVEC_ELT (v, 0) = elt;
23033 RTVEC_ELT (v, 1) = elt;
23034 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
23035 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
23036 }
23037
23038 /* Return an RTX representing where to find the function value of a
23039 function returning MODE. */
23040 static rtx
23041 rs6000_complex_function_value (machine_mode mode)
23042 {
23043 unsigned int regno;
23044 rtx r1, r2;
23045 machine_mode inner = GET_MODE_INNER (mode);
23046 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
23047
23048 if (TARGET_FLOAT128_TYPE
23049 && (mode == KCmode
23050 || (mode == TCmode && TARGET_IEEEQUAD)))
23051 regno = ALTIVEC_ARG_RETURN;
23052
23053 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23054 regno = FP_ARG_RETURN;
23055
23056 else
23057 {
23058 regno = GP_ARG_RETURN;
23059
23060 /* 32-bit is OK since it'll go in r3/r4. */
23061 if (TARGET_32BIT && inner_bytes >= 4)
23062 return gen_rtx_REG (mode, regno);
23063 }
23064
23065 if (inner_bytes >= 8)
23066 return gen_rtx_REG (mode, regno);
23067
23068 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
23069 const0_rtx);
23070 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
23071 GEN_INT (inner_bytes));
23072 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
23073 }
23074
23075 /* Return an rtx describing a return value of MODE as a PARALLEL
23076 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
23077 stride REG_STRIDE. */
23078
23079 static rtx
23080 rs6000_parallel_return (machine_mode mode,
23081 int n_elts, machine_mode elt_mode,
23082 unsigned int regno, unsigned int reg_stride)
23083 {
23084 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
23085
23086 int i;
23087 for (i = 0; i < n_elts; i++)
23088 {
23089 rtx r = gen_rtx_REG (elt_mode, regno);
23090 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
23091 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
23092 regno += reg_stride;
23093 }
23094
23095 return par;
23096 }
23097
23098 /* Target hook for TARGET_FUNCTION_VALUE.
23099
23100 An integer value is in r3 and a floating-point value is in fp1,
23101 unless -msoft-float. */
23102
23103 static rtx
23104 rs6000_function_value (const_tree valtype,
23105 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
23106 bool outgoing ATTRIBUTE_UNUSED)
23107 {
23108 machine_mode mode;
23109 unsigned int regno;
23110 machine_mode elt_mode;
23111 int n_elts;
23112
23113 /* Special handling for structs in darwin64. */
23114 if (TARGET_MACHO
23115 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
23116 {
23117 CUMULATIVE_ARGS valcum;
23118 rtx valret;
23119
23120 valcum.words = 0;
23121 valcum.fregno = FP_ARG_MIN_REG;
23122 valcum.vregno = ALTIVEC_ARG_MIN_REG;
23123 /* Do a trial code generation as if this were going to be passed as
23124 an argument; if any part goes in memory, we return NULL. */
23125 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
23126 if (valret)
23127 return valret;
23128 /* Otherwise fall through to standard ABI rules. */
23129 }
23130
23131 mode = TYPE_MODE (valtype);
23132
23133 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
23134 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
23135 {
23136 int first_reg, n_regs;
23137
23138 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
23139 {
23140 /* _Decimal128 must use even/odd register pairs. */
23141 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23142 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
23143 }
23144 else
23145 {
23146 first_reg = ALTIVEC_ARG_RETURN;
23147 n_regs = 1;
23148 }
23149
23150 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
23151 }
23152
23153 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
23154 if (TARGET_32BIT && TARGET_POWERPC64)
23155 switch (mode)
23156 {
23157 default:
23158 break;
23159 case E_DImode:
23160 case E_SCmode:
23161 case E_DCmode:
23162 case E_TCmode:
23163 int count = GET_MODE_SIZE (mode) / 4;
23164 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
23165 }
23166
23167 if ((INTEGRAL_TYPE_P (valtype)
23168 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
23169 || POINTER_TYPE_P (valtype))
23170 mode = TARGET_32BIT ? SImode : DImode;
23171
23172 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23173 /* _Decimal128 must use an even/odd register pair. */
23174 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23175 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
23176 && !FLOAT128_VECTOR_P (mode))
23177 regno = FP_ARG_RETURN;
23178 else if (TREE_CODE (valtype) == COMPLEX_TYPE
23179 && targetm.calls.split_complex_arg)
23180 return rs6000_complex_function_value (mode);
23181 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23182 return register is used in both cases, and we won't see V2DImode/V2DFmode
23183 for pure altivec, combine the two cases. */
23184 else if ((TREE_CODE (valtype) == VECTOR_TYPE || VECTOR_ALIGNMENT_P (mode))
23185 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
23186 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
23187 regno = ALTIVEC_ARG_RETURN;
23188 else
23189 regno = GP_ARG_RETURN;
23190
23191 return gen_rtx_REG (mode, regno);
23192 }
23193
23194 /* Define how to find the value returned by a library function
23195 assuming the value has mode MODE. */
23196 rtx
23197 rs6000_libcall_value (machine_mode mode)
23198 {
23199 unsigned int regno;
23200
23201 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
23202 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
23203 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
23204
23205 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23206 /* _Decimal128 must use an even/odd register pair. */
23207 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23208 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
23209 regno = FP_ARG_RETURN;
23210 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23211 return register is used in both cases, and we won't see V2DImode/V2DFmode
23212 for pure altivec, combine the two cases. */
23213 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
23214 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
23215 regno = ALTIVEC_ARG_RETURN;
23216 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
23217 return rs6000_complex_function_value (mode);
23218 else
23219 regno = GP_ARG_RETURN;
23220
23221 return gen_rtx_REG (mode, regno);
23222 }
23223
23224 /* Compute register pressure classes. We implement the target hook to avoid
23225 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
23226 lead to incorrect estimates of number of available registers and therefor
23227 increased register pressure/spill. */
23228 static int
23229 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
23230 {
23231 int n;
23232
23233 n = 0;
23234 pressure_classes[n++] = GENERAL_REGS;
23235 if (TARGET_VSX)
23236 pressure_classes[n++] = VSX_REGS;
23237 else
23238 {
23239 if (TARGET_ALTIVEC)
23240 pressure_classes[n++] = ALTIVEC_REGS;
23241 if (TARGET_HARD_FLOAT)
23242 pressure_classes[n++] = FLOAT_REGS;
23243 }
23244 pressure_classes[n++] = CR_REGS;
23245 pressure_classes[n++] = SPECIAL_REGS;
23246
23247 return n;
23248 }
23249
23250 /* Given FROM and TO register numbers, say whether this elimination is allowed.
23251 Frame pointer elimination is automatically handled.
23252
23253 For the RS/6000, if frame pointer elimination is being done, we would like
23254 to convert ap into fp, not sp.
23255
23256 We need r30 if -mminimal-toc was specified, and there are constant pool
23257 references. */
23258
23259 static bool
23260 rs6000_can_eliminate (const int from, const int to)
23261 {
23262 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
23263 ? ! frame_pointer_needed
23264 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
23265 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC_OR_PCREL
23266 || constant_pool_empty_p ()
23267 : true);
23268 }
23269
23270 /* Define the offset between two registers, FROM to be eliminated and its
23271 replacement TO, at the start of a routine. */
23272 HOST_WIDE_INT
23273 rs6000_initial_elimination_offset (int from, int to)
23274 {
23275 rs6000_stack_t *info = rs6000_stack_info ();
23276 HOST_WIDE_INT offset;
23277
23278 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23279 offset = info->push_p ? 0 : -info->total_size;
23280 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23281 {
23282 offset = info->push_p ? 0 : -info->total_size;
23283 if (FRAME_GROWS_DOWNWARD)
23284 offset += info->fixed_size + info->vars_size + info->parm_size;
23285 }
23286 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
23287 offset = FRAME_GROWS_DOWNWARD
23288 ? info->fixed_size + info->vars_size + info->parm_size
23289 : 0;
23290 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
23291 offset = info->total_size;
23292 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23293 offset = info->push_p ? info->total_size : 0;
23294 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
23295 offset = 0;
23296 else
23297 gcc_unreachable ();
23298
23299 return offset;
23300 }
23301
23302 /* Fill in sizes of registers used by unwinder. */
23303
23304 static void
23305 rs6000_init_dwarf_reg_sizes_extra (tree address)
23306 {
23307 if (TARGET_MACHO && ! TARGET_ALTIVEC)
23308 {
23309 int i;
23310 machine_mode mode = TYPE_MODE (char_type_node);
23311 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
23312 rtx mem = gen_rtx_MEM (BLKmode, addr);
23313 rtx value = gen_int_mode (16, mode);
23314
23315 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
23316 The unwinder still needs to know the size of Altivec registers. */
23317
23318 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
23319 {
23320 int column = DWARF_REG_TO_UNWIND_COLUMN
23321 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
23322 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
23323
23324 emit_move_insn (adjust_address (mem, mode, offset), value);
23325 }
23326 }
23327 }
23328
23329 /* Map internal gcc register numbers to debug format register numbers.
23330 FORMAT specifies the type of debug register number to use:
23331 0 -- debug information, except for frame-related sections
23332 1 -- DWARF .debug_frame section
23333 2 -- DWARF .eh_frame section */
23334
23335 unsigned int
23336 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
23337 {
23338 /* On some platforms, we use the standard DWARF register
23339 numbering for .debug_info and .debug_frame. */
23340 if ((format == 0 && write_symbols == DWARF2_DEBUG) || format == 1)
23341 {
23342 #ifdef RS6000_USE_DWARF_NUMBERING
23343 if (regno <= 31)
23344 return regno;
23345 if (FP_REGNO_P (regno))
23346 return regno - FIRST_FPR_REGNO + 32;
23347 if (ALTIVEC_REGNO_P (regno))
23348 return regno - FIRST_ALTIVEC_REGNO + 1124;
23349 if (regno == LR_REGNO)
23350 return 108;
23351 if (regno == CTR_REGNO)
23352 return 109;
23353 if (regno == CA_REGNO)
23354 return 101; /* XER */
23355 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
23356 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
23357 The actual code emitted saves the whole of CR, so we map CR2_REGNO
23358 to the DWARF reg for CR. */
23359 if (format == 1 && regno == CR2_REGNO)
23360 return 64;
23361 if (CR_REGNO_P (regno))
23362 return regno - CR0_REGNO + 86;
23363 if (regno == VRSAVE_REGNO)
23364 return 356;
23365 if (regno == VSCR_REGNO)
23366 return 67;
23367
23368 /* These do not make much sense. */
23369 if (regno == FRAME_POINTER_REGNUM)
23370 return 111;
23371 if (regno == ARG_POINTER_REGNUM)
23372 return 67;
23373 if (regno == 64)
23374 return 100;
23375
23376 gcc_unreachable ();
23377 #endif
23378 }
23379
23380 /* We use the GCC 7 (and before) internal number for non-DWARF debug
23381 information, and also for .eh_frame. */
23382 /* Translate the regnos to their numbers in GCC 7 (and before). */
23383 if (regno <= 31)
23384 return regno;
23385 if (FP_REGNO_P (regno))
23386 return regno - FIRST_FPR_REGNO + 32;
23387 if (ALTIVEC_REGNO_P (regno))
23388 return regno - FIRST_ALTIVEC_REGNO + 77;
23389 if (regno == LR_REGNO)
23390 return 65;
23391 if (regno == CTR_REGNO)
23392 return 66;
23393 if (regno == CA_REGNO)
23394 return 76; /* XER */
23395 if (CR_REGNO_P (regno))
23396 return regno - CR0_REGNO + 68;
23397 if (regno == VRSAVE_REGNO)
23398 return 109;
23399 if (regno == VSCR_REGNO)
23400 return 110;
23401
23402 if (regno == FRAME_POINTER_REGNUM)
23403 return 111;
23404 if (regno == ARG_POINTER_REGNUM)
23405 return 67;
23406 if (regno == 64)
23407 return 64;
23408
23409 gcc_unreachable ();
23410 }
23411
23412 /* target hook eh_return_filter_mode */
23413 static scalar_int_mode
23414 rs6000_eh_return_filter_mode (void)
23415 {
23416 return TARGET_32BIT ? SImode : word_mode;
23417 }
23418
23419 /* Target hook for translate_mode_attribute. */
23420 static machine_mode
23421 rs6000_translate_mode_attribute (machine_mode mode)
23422 {
23423 if ((FLOAT128_IEEE_P (mode)
23424 && ieee128_float_type_node == long_double_type_node)
23425 || (FLOAT128_IBM_P (mode)
23426 && ibm128_float_type_node == long_double_type_node))
23427 return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
23428 return mode;
23429 }
23430
23431 /* Target hook for scalar_mode_supported_p. */
23432 static bool
23433 rs6000_scalar_mode_supported_p (scalar_mode mode)
23434 {
23435 /* -m32 does not support TImode. This is the default, from
23436 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
23437 same ABI as for -m32. But default_scalar_mode_supported_p allows
23438 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
23439 for -mpowerpc64. */
23440 if (TARGET_32BIT && mode == TImode)
23441 return false;
23442
23443 if (DECIMAL_FLOAT_MODE_P (mode))
23444 return default_decimal_float_supported_p ();
23445 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
23446 return true;
23447 else
23448 return default_scalar_mode_supported_p (mode);
23449 }
23450
23451 /* Target hook for vector_mode_supported_p. */
23452 static bool
23453 rs6000_vector_mode_supported_p (machine_mode mode)
23454 {
23455 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
23456 128-bit, the compiler might try to widen IEEE 128-bit to IBM
23457 double-double. */
23458 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
23459 return true;
23460
23461 else
23462 return false;
23463 }
23464
23465 /* Target hook for floatn_mode. */
23466 static opt_scalar_float_mode
23467 rs6000_floatn_mode (int n, bool extended)
23468 {
23469 if (extended)
23470 {
23471 switch (n)
23472 {
23473 case 32:
23474 return DFmode;
23475
23476 case 64:
23477 if (TARGET_FLOAT128_TYPE)
23478 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
23479 else
23480 return opt_scalar_float_mode ();
23481
23482 case 128:
23483 return opt_scalar_float_mode ();
23484
23485 default:
23486 /* Those are the only valid _FloatNx types. */
23487 gcc_unreachable ();
23488 }
23489 }
23490 else
23491 {
23492 switch (n)
23493 {
23494 case 32:
23495 return SFmode;
23496
23497 case 64:
23498 return DFmode;
23499
23500 case 128:
23501 if (TARGET_FLOAT128_TYPE)
23502 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
23503 else
23504 return opt_scalar_float_mode ();
23505
23506 default:
23507 return opt_scalar_float_mode ();
23508 }
23509 }
23510
23511 }
23512
23513 /* Target hook for c_mode_for_suffix. */
23514 static machine_mode
23515 rs6000_c_mode_for_suffix (char suffix)
23516 {
23517 if (TARGET_FLOAT128_TYPE)
23518 {
23519 if (suffix == 'q' || suffix == 'Q')
23520 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
23521
23522 /* At the moment, we are not defining a suffix for IBM extended double.
23523 If/when the default for -mabi=ieeelongdouble is changed, and we want
23524 to support __ibm128 constants in legacy library code, we may need to
23525 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
23526 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
23527 __float80 constants. */
23528 }
23529
23530 return VOIDmode;
23531 }
23532
23533 /* Target hook for invalid_arg_for_unprototyped_fn. */
23534 static const char *
23535 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
23536 {
23537 return (!rs6000_darwin64_abi
23538 && typelist == 0
23539 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
23540 && (funcdecl == NULL_TREE
23541 || (TREE_CODE (funcdecl) == FUNCTION_DECL
23542 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
23543 ? N_("AltiVec argument passed to unprototyped function")
23544 : NULL;
23545 }
23546
23547 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
23548 setup by using __stack_chk_fail_local hidden function instead of
23549 calling __stack_chk_fail directly. Otherwise it is better to call
23550 __stack_chk_fail directly. */
23551
23552 static tree ATTRIBUTE_UNUSED
23553 rs6000_stack_protect_fail (void)
23554 {
23555 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
23556 ? default_hidden_stack_protect_fail ()
23557 : default_external_stack_protect_fail ();
23558 }
23559
23560 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
23561
23562 #if TARGET_ELF
23563 static unsigned HOST_WIDE_INT
23564 rs6000_asan_shadow_offset (void)
23565 {
23566 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
23567 }
23568 #endif
23569 \f
23570 /* Mask options that we want to support inside of attribute((target)) and
23571 #pragma GCC target operations. Note, we do not include things like
23572 64/32-bit, endianness, hard/soft floating point, etc. that would have
23573 different calling sequences. */
23574
23575 struct rs6000_opt_mask {
23576 const char *name; /* option name */
23577 HOST_WIDE_INT mask; /* mask to set */
23578 bool invert; /* invert sense of mask */
23579 bool valid_target; /* option is a target option */
23580 };
23581
23582 static struct rs6000_opt_mask const rs6000_opt_masks[] =
23583 {
23584 { "altivec", OPTION_MASK_ALTIVEC, false, true },
23585 { "block-ops-unaligned-vsx", OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX,
23586 false, true },
23587 { "block-ops-vector-pair", OPTION_MASK_BLOCK_OPS_VECTOR_PAIR,
23588 false, true },
23589 { "cmpb", OPTION_MASK_CMPB, false, true },
23590 { "crypto", OPTION_MASK_CRYPTO, false, true },
23591 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
23592 { "dlmzb", OPTION_MASK_DLMZB, false, true },
23593 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
23594 false, true },
23595 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
23596 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
23597 { "fprnd", OPTION_MASK_FPRND, false, true },
23598 { "power10", OPTION_MASK_POWER10, false, true },
23599 { "hard-dfp", OPTION_MASK_DFP, false, true },
23600 { "htm", OPTION_MASK_HTM, false, true },
23601 { "isel", OPTION_MASK_ISEL, false, true },
23602 { "mfcrf", OPTION_MASK_MFCRF, false, true },
23603 { "mfpgpr", 0, false, true },
23604 { "mma", OPTION_MASK_MMA, false, true },
23605 { "modulo", OPTION_MASK_MODULO, false, true },
23606 { "mulhw", OPTION_MASK_MULHW, false, true },
23607 { "multiple", OPTION_MASK_MULTIPLE, false, true },
23608 { "pcrel", OPTION_MASK_PCREL, false, true },
23609 { "popcntb", OPTION_MASK_POPCNTB, false, true },
23610 { "popcntd", OPTION_MASK_POPCNTD, false, true },
23611 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
23612 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
23613 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
23614 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
23615 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
23616 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
23617 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
23618 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
23619 { "prefixed", OPTION_MASK_PREFIXED, false, true },
23620 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
23621 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
23622 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
23623 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
23624 { "string", 0, false, true },
23625 { "update", OPTION_MASK_NO_UPDATE, true , true },
23626 { "vsx", OPTION_MASK_VSX, false, true },
23627 #ifdef OPTION_MASK_64BIT
23628 #if TARGET_AIX_OS
23629 { "aix64", OPTION_MASK_64BIT, false, false },
23630 { "aix32", OPTION_MASK_64BIT, true, false },
23631 #else
23632 { "64", OPTION_MASK_64BIT, false, false },
23633 { "32", OPTION_MASK_64BIT, true, false },
23634 #endif
23635 #endif
23636 #ifdef OPTION_MASK_EABI
23637 { "eabi", OPTION_MASK_EABI, false, false },
23638 #endif
23639 #ifdef OPTION_MASK_LITTLE_ENDIAN
23640 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
23641 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
23642 #endif
23643 #ifdef OPTION_MASK_RELOCATABLE
23644 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
23645 #endif
23646 #ifdef OPTION_MASK_STRICT_ALIGN
23647 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
23648 #endif
23649 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
23650 { "string", 0, false, false },
23651 };
23652
23653 /* Builtin mask mapping for printing the flags. */
23654 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
23655 {
23656 { "altivec", RS6000_BTM_ALTIVEC, false, false },
23657 { "vsx", RS6000_BTM_VSX, false, false },
23658 { "fre", RS6000_BTM_FRE, false, false },
23659 { "fres", RS6000_BTM_FRES, false, false },
23660 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
23661 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
23662 { "popcntd", RS6000_BTM_POPCNTD, false, false },
23663 { "cell", RS6000_BTM_CELL, false, false },
23664 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
23665 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
23666 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
23667 { "crypto", RS6000_BTM_CRYPTO, false, false },
23668 { "htm", RS6000_BTM_HTM, false, false },
23669 { "hard-dfp", RS6000_BTM_DFP, false, false },
23670 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
23671 { "long-double-128", RS6000_BTM_LDBL128, false, false },
23672 { "powerpc64", RS6000_BTM_POWERPC64, false, false },
23673 { "float128", RS6000_BTM_FLOAT128, false, false },
23674 { "float128-hw", RS6000_BTM_FLOAT128_HW,false, false },
23675 { "mma", RS6000_BTM_MMA, false, false },
23676 { "power10", RS6000_BTM_P10, false, false },
23677 };
23678
23679 /* Option variables that we want to support inside attribute((target)) and
23680 #pragma GCC target operations. */
23681
23682 struct rs6000_opt_var {
23683 const char *name; /* option name */
23684 size_t global_offset; /* offset of the option in global_options. */
23685 size_t target_offset; /* offset of the option in target options. */
23686 };
23687
23688 static struct rs6000_opt_var const rs6000_opt_vars[] =
23689 {
23690 { "friz",
23691 offsetof (struct gcc_options, x_TARGET_FRIZ),
23692 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
23693 { "avoid-indexed-addresses",
23694 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
23695 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
23696 { "longcall",
23697 offsetof (struct gcc_options, x_rs6000_default_long_calls),
23698 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
23699 { "optimize-swaps",
23700 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
23701 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
23702 { "allow-movmisalign",
23703 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
23704 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
23705 { "sched-groups",
23706 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
23707 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
23708 { "always-hint",
23709 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
23710 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
23711 { "align-branch-targets",
23712 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
23713 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
23714 { "sched-prolog",
23715 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
23716 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
23717 { "sched-epilog",
23718 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
23719 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
23720 { "speculate-indirect-jumps",
23721 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
23722 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
23723 };
23724
23725 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
23726 parsing. Return true if there were no errors. */
23727
23728 static bool
23729 rs6000_inner_target_options (tree args, bool attr_p)
23730 {
23731 bool ret = true;
23732
23733 if (args == NULL_TREE)
23734 ;
23735
23736 else if (TREE_CODE (args) == STRING_CST)
23737 {
23738 char *p = ASTRDUP (TREE_STRING_POINTER (args));
23739 char *q;
23740
23741 while ((q = strtok (p, ",")) != NULL)
23742 {
23743 bool error_p = false;
23744 bool not_valid_p = false;
23745 const char *cpu_opt = NULL;
23746
23747 p = NULL;
23748 if (strncmp (q, "cpu=", 4) == 0)
23749 {
23750 int cpu_index = rs6000_cpu_name_lookup (q+4);
23751 if (cpu_index >= 0)
23752 rs6000_cpu_index = cpu_index;
23753 else
23754 {
23755 error_p = true;
23756 cpu_opt = q+4;
23757 }
23758 }
23759 else if (strncmp (q, "tune=", 5) == 0)
23760 {
23761 int tune_index = rs6000_cpu_name_lookup (q+5);
23762 if (tune_index >= 0)
23763 rs6000_tune_index = tune_index;
23764 else
23765 {
23766 error_p = true;
23767 cpu_opt = q+5;
23768 }
23769 }
23770 else
23771 {
23772 size_t i;
23773 bool invert = false;
23774 char *r = q;
23775
23776 error_p = true;
23777 if (strncmp (r, "no-", 3) == 0)
23778 {
23779 invert = true;
23780 r += 3;
23781 }
23782
23783 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
23784 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
23785 {
23786 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
23787
23788 if (!rs6000_opt_masks[i].valid_target)
23789 not_valid_p = true;
23790 else
23791 {
23792 error_p = false;
23793 rs6000_isa_flags_explicit |= mask;
23794
23795 /* VSX needs altivec, so -mvsx automagically sets
23796 altivec and disables -mavoid-indexed-addresses. */
23797 if (!invert)
23798 {
23799 if (mask == OPTION_MASK_VSX)
23800 {
23801 mask |= OPTION_MASK_ALTIVEC;
23802 TARGET_AVOID_XFORM = 0;
23803 }
23804 }
23805
23806 if (rs6000_opt_masks[i].invert)
23807 invert = !invert;
23808
23809 if (invert)
23810 rs6000_isa_flags &= ~mask;
23811 else
23812 rs6000_isa_flags |= mask;
23813 }
23814 break;
23815 }
23816
23817 if (error_p && !not_valid_p)
23818 {
23819 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
23820 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
23821 {
23822 size_t j = rs6000_opt_vars[i].global_offset;
23823 *((int *) ((char *)&global_options + j)) = !invert;
23824 error_p = false;
23825 not_valid_p = false;
23826 break;
23827 }
23828 }
23829 }
23830
23831 if (error_p)
23832 {
23833 const char *eprefix, *esuffix;
23834
23835 ret = false;
23836 if (attr_p)
23837 {
23838 eprefix = "__attribute__((__target__(";
23839 esuffix = ")))";
23840 }
23841 else
23842 {
23843 eprefix = "#pragma GCC target ";
23844 esuffix = "";
23845 }
23846
23847 if (cpu_opt)
23848 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
23849 q, esuffix);
23850 else if (not_valid_p)
23851 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
23852 else
23853 error ("%s%qs%s is invalid", eprefix, q, esuffix);
23854 }
23855 }
23856 }
23857
23858 else if (TREE_CODE (args) == TREE_LIST)
23859 {
23860 do
23861 {
23862 tree value = TREE_VALUE (args);
23863 if (value)
23864 {
23865 bool ret2 = rs6000_inner_target_options (value, attr_p);
23866 if (!ret2)
23867 ret = false;
23868 }
23869 args = TREE_CHAIN (args);
23870 }
23871 while (args != NULL_TREE);
23872 }
23873
23874 else
23875 {
23876 error ("attribute %<target%> argument not a string");
23877 return false;
23878 }
23879
23880 return ret;
23881 }
23882
23883 /* Print out the target options as a list for -mdebug=target. */
23884
23885 static void
23886 rs6000_debug_target_options (tree args, const char *prefix)
23887 {
23888 if (args == NULL_TREE)
23889 fprintf (stderr, "%s<NULL>", prefix);
23890
23891 else if (TREE_CODE (args) == STRING_CST)
23892 {
23893 char *p = ASTRDUP (TREE_STRING_POINTER (args));
23894 char *q;
23895
23896 while ((q = strtok (p, ",")) != NULL)
23897 {
23898 p = NULL;
23899 fprintf (stderr, "%s\"%s\"", prefix, q);
23900 prefix = ", ";
23901 }
23902 }
23903
23904 else if (TREE_CODE (args) == TREE_LIST)
23905 {
23906 do
23907 {
23908 tree value = TREE_VALUE (args);
23909 if (value)
23910 {
23911 rs6000_debug_target_options (value, prefix);
23912 prefix = ", ";
23913 }
23914 args = TREE_CHAIN (args);
23915 }
23916 while (args != NULL_TREE);
23917 }
23918
23919 else
23920 gcc_unreachable ();
23921
23922 return;
23923 }
23924
23925 \f
23926 /* Hook to validate attribute((target("..."))). */
23927
23928 static bool
23929 rs6000_valid_attribute_p (tree fndecl,
23930 tree ARG_UNUSED (name),
23931 tree args,
23932 int flags)
23933 {
23934 struct cl_target_option cur_target;
23935 bool ret;
23936 tree old_optimize;
23937 tree new_target, new_optimize;
23938 tree func_optimize;
23939
23940 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
23941
23942 if (TARGET_DEBUG_TARGET)
23943 {
23944 tree tname = DECL_NAME (fndecl);
23945 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
23946 if (tname)
23947 fprintf (stderr, "function: %.*s\n",
23948 (int) IDENTIFIER_LENGTH (tname),
23949 IDENTIFIER_POINTER (tname));
23950 else
23951 fprintf (stderr, "function: unknown\n");
23952
23953 fprintf (stderr, "args:");
23954 rs6000_debug_target_options (args, " ");
23955 fprintf (stderr, "\n");
23956
23957 if (flags)
23958 fprintf (stderr, "flags: 0x%x\n", flags);
23959
23960 fprintf (stderr, "--------------------\n");
23961 }
23962
23963 /* attribute((target("default"))) does nothing, beyond
23964 affecting multi-versioning. */
23965 if (TREE_VALUE (args)
23966 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
23967 && TREE_CHAIN (args) == NULL_TREE
23968 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
23969 return true;
23970
23971 old_optimize = build_optimization_node (&global_options,
23972 &global_options_set);
23973 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
23974
23975 /* If the function changed the optimization levels as well as setting target
23976 options, start with the optimizations specified. */
23977 if (func_optimize && func_optimize != old_optimize)
23978 cl_optimization_restore (&global_options, &global_options_set,
23979 TREE_OPTIMIZATION (func_optimize));
23980
23981 /* The target attributes may also change some optimization flags, so update
23982 the optimization options if necessary. */
23983 cl_target_option_save (&cur_target, &global_options, &global_options_set);
23984 rs6000_cpu_index = rs6000_tune_index = -1;
23985 ret = rs6000_inner_target_options (args, true);
23986
23987 /* Set up any additional state. */
23988 if (ret)
23989 {
23990 ret = rs6000_option_override_internal (false);
23991 new_target = build_target_option_node (&global_options,
23992 &global_options_set);
23993 }
23994 else
23995 new_target = NULL;
23996
23997 new_optimize = build_optimization_node (&global_options,
23998 &global_options_set);
23999
24000 if (!new_target)
24001 ret = false;
24002
24003 else if (fndecl)
24004 {
24005 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
24006
24007 if (old_optimize != new_optimize)
24008 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
24009 }
24010
24011 cl_target_option_restore (&global_options, &global_options_set, &cur_target);
24012
24013 if (old_optimize != new_optimize)
24014 cl_optimization_restore (&global_options, &global_options_set,
24015 TREE_OPTIMIZATION (old_optimize));
24016
24017 return ret;
24018 }
24019
24020 \f
24021 /* Hook to validate the current #pragma GCC target and set the state, and
24022 update the macros based on what was changed. If ARGS is NULL, then
24023 POP_TARGET is used to reset the options. */
24024
24025 bool
24026 rs6000_pragma_target_parse (tree args, tree pop_target)
24027 {
24028 tree prev_tree = build_target_option_node (&global_options,
24029 &global_options_set);
24030 tree cur_tree;
24031 struct cl_target_option *prev_opt, *cur_opt;
24032 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
24033 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
24034
24035 if (TARGET_DEBUG_TARGET)
24036 {
24037 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
24038 fprintf (stderr, "args:");
24039 rs6000_debug_target_options (args, " ");
24040 fprintf (stderr, "\n");
24041
24042 if (pop_target)
24043 {
24044 fprintf (stderr, "pop_target:\n");
24045 debug_tree (pop_target);
24046 }
24047 else
24048 fprintf (stderr, "pop_target: <NULL>\n");
24049
24050 fprintf (stderr, "--------------------\n");
24051 }
24052
24053 if (! args)
24054 {
24055 cur_tree = ((pop_target)
24056 ? pop_target
24057 : target_option_default_node);
24058 cl_target_option_restore (&global_options, &global_options_set,
24059 TREE_TARGET_OPTION (cur_tree));
24060 }
24061 else
24062 {
24063 rs6000_cpu_index = rs6000_tune_index = -1;
24064 if (!rs6000_inner_target_options (args, false)
24065 || !rs6000_option_override_internal (false)
24066 || (cur_tree = build_target_option_node (&global_options,
24067 &global_options_set))
24068 == NULL_TREE)
24069 {
24070 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
24071 fprintf (stderr, "invalid pragma\n");
24072
24073 return false;
24074 }
24075 }
24076
24077 target_option_current_node = cur_tree;
24078 rs6000_activate_target_options (target_option_current_node);
24079
24080 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
24081 change the macros that are defined. */
24082 if (rs6000_target_modify_macros_ptr)
24083 {
24084 prev_opt = TREE_TARGET_OPTION (prev_tree);
24085 prev_bumask = prev_opt->x_rs6000_builtin_mask;
24086 prev_flags = prev_opt->x_rs6000_isa_flags;
24087
24088 cur_opt = TREE_TARGET_OPTION (cur_tree);
24089 cur_flags = cur_opt->x_rs6000_isa_flags;
24090 cur_bumask = cur_opt->x_rs6000_builtin_mask;
24091
24092 diff_bumask = (prev_bumask ^ cur_bumask);
24093 diff_flags = (prev_flags ^ cur_flags);
24094
24095 if ((diff_flags != 0) || (diff_bumask != 0))
24096 {
24097 /* Delete old macros. */
24098 rs6000_target_modify_macros_ptr (false,
24099 prev_flags & diff_flags,
24100 prev_bumask & diff_bumask);
24101
24102 /* Define new macros. */
24103 rs6000_target_modify_macros_ptr (true,
24104 cur_flags & diff_flags,
24105 cur_bumask & diff_bumask);
24106 }
24107 }
24108
24109 return true;
24110 }
24111
24112 \f
24113 /* Remember the last target of rs6000_set_current_function. */
24114 static GTY(()) tree rs6000_previous_fndecl;
24115
24116 /* Restore target's globals from NEW_TREE and invalidate the
24117 rs6000_previous_fndecl cache. */
24118
24119 void
24120 rs6000_activate_target_options (tree new_tree)
24121 {
24122 cl_target_option_restore (&global_options, &global_options_set,
24123 TREE_TARGET_OPTION (new_tree));
24124 if (TREE_TARGET_GLOBALS (new_tree))
24125 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
24126 else if (new_tree == target_option_default_node)
24127 restore_target_globals (&default_target_globals);
24128 else
24129 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
24130 rs6000_previous_fndecl = NULL_TREE;
24131 }
24132
24133 /* Establish appropriate back-end context for processing the function
24134 FNDECL. The argument might be NULL to indicate processing at top
24135 level, outside of any function scope. */
24136 static void
24137 rs6000_set_current_function (tree fndecl)
24138 {
24139 if (TARGET_DEBUG_TARGET)
24140 {
24141 fprintf (stderr, "\n==================== rs6000_set_current_function");
24142
24143 if (fndecl)
24144 fprintf (stderr, ", fndecl %s (%p)",
24145 (DECL_NAME (fndecl)
24146 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
24147 : "<unknown>"), (void *)fndecl);
24148
24149 if (rs6000_previous_fndecl)
24150 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
24151
24152 fprintf (stderr, "\n");
24153 }
24154
24155 /* Only change the context if the function changes. This hook is called
24156 several times in the course of compiling a function, and we don't want to
24157 slow things down too much or call target_reinit when it isn't safe. */
24158 if (fndecl == rs6000_previous_fndecl)
24159 return;
24160
24161 tree old_tree;
24162 if (rs6000_previous_fndecl == NULL_TREE)
24163 old_tree = target_option_current_node;
24164 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
24165 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
24166 else
24167 old_tree = target_option_default_node;
24168
24169 tree new_tree;
24170 if (fndecl == NULL_TREE)
24171 {
24172 if (old_tree != target_option_current_node)
24173 new_tree = target_option_current_node;
24174 else
24175 new_tree = NULL_TREE;
24176 }
24177 else
24178 {
24179 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
24180 if (new_tree == NULL_TREE)
24181 new_tree = target_option_default_node;
24182 }
24183
24184 if (TARGET_DEBUG_TARGET)
24185 {
24186 if (new_tree)
24187 {
24188 fprintf (stderr, "\nnew fndecl target specific options:\n");
24189 debug_tree (new_tree);
24190 }
24191
24192 if (old_tree)
24193 {
24194 fprintf (stderr, "\nold fndecl target specific options:\n");
24195 debug_tree (old_tree);
24196 }
24197
24198 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
24199 fprintf (stderr, "--------------------\n");
24200 }
24201
24202 if (new_tree && old_tree != new_tree)
24203 rs6000_activate_target_options (new_tree);
24204
24205 if (fndecl)
24206 rs6000_previous_fndecl = fndecl;
24207 }
24208
24209 \f
24210 /* Save the current options */
24211
24212 static void
24213 rs6000_function_specific_save (struct cl_target_option *ptr,
24214 struct gcc_options *opts,
24215 struct gcc_options */* opts_set */)
24216 {
24217 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
24218 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
24219 }
24220
24221 /* Restore the current options */
24222
24223 static void
24224 rs6000_function_specific_restore (struct gcc_options *opts,
24225 struct gcc_options */* opts_set */,
24226 struct cl_target_option *ptr)
24227
24228 {
24229 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
24230 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
24231 (void) rs6000_option_override_internal (false);
24232 }
24233
24234 /* Print the current options */
24235
24236 static void
24237 rs6000_function_specific_print (FILE *file, int indent,
24238 struct cl_target_option *ptr)
24239 {
24240 rs6000_print_isa_options (file, indent, "Isa options set",
24241 ptr->x_rs6000_isa_flags);
24242
24243 rs6000_print_isa_options (file, indent, "Isa options explicit",
24244 ptr->x_rs6000_isa_flags_explicit);
24245 }
24246
24247 /* Helper function to print the current isa or misc options on a line. */
24248
24249 static void
24250 rs6000_print_options_internal (FILE *file,
24251 int indent,
24252 const char *string,
24253 HOST_WIDE_INT flags,
24254 const char *prefix,
24255 const struct rs6000_opt_mask *opts,
24256 size_t num_elements)
24257 {
24258 size_t i;
24259 size_t start_column = 0;
24260 size_t cur_column;
24261 size_t max_column = 120;
24262 size_t prefix_len = strlen (prefix);
24263 size_t comma_len = 0;
24264 const char *comma = "";
24265
24266 if (indent)
24267 start_column += fprintf (file, "%*s", indent, "");
24268
24269 if (!flags)
24270 {
24271 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
24272 return;
24273 }
24274
24275 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
24276
24277 /* Print the various mask options. */
24278 cur_column = start_column;
24279 for (i = 0; i < num_elements; i++)
24280 {
24281 bool invert = opts[i].invert;
24282 const char *name = opts[i].name;
24283 const char *no_str = "";
24284 HOST_WIDE_INT mask = opts[i].mask;
24285 size_t len = comma_len + prefix_len + strlen (name);
24286
24287 if (!invert)
24288 {
24289 if ((flags & mask) == 0)
24290 {
24291 no_str = "no-";
24292 len += strlen ("no-");
24293 }
24294
24295 flags &= ~mask;
24296 }
24297
24298 else
24299 {
24300 if ((flags & mask) != 0)
24301 {
24302 no_str = "no-";
24303 len += strlen ("no-");
24304 }
24305
24306 flags |= mask;
24307 }
24308
24309 cur_column += len;
24310 if (cur_column > max_column)
24311 {
24312 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
24313 cur_column = start_column + len;
24314 comma = "";
24315 }
24316
24317 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
24318 comma = ", ";
24319 comma_len = strlen (", ");
24320 }
24321
24322 fputs ("\n", file);
24323 }
24324
24325 /* Helper function to print the current isa options on a line. */
24326
24327 static void
24328 rs6000_print_isa_options (FILE *file, int indent, const char *string,
24329 HOST_WIDE_INT flags)
24330 {
24331 rs6000_print_options_internal (file, indent, string, flags, "-m",
24332 &rs6000_opt_masks[0],
24333 ARRAY_SIZE (rs6000_opt_masks));
24334 }
24335
24336 static void
24337 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
24338 HOST_WIDE_INT flags)
24339 {
24340 rs6000_print_options_internal (file, indent, string, flags, "",
24341 &rs6000_builtin_mask_names[0],
24342 ARRAY_SIZE (rs6000_builtin_mask_names));
24343 }
24344
24345 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
24346 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
24347 -mupper-regs-df, etc.).
24348
24349 If the user used -mno-power8-vector, we need to turn off all of the implicit
24350 ISA 2.07 and 3.0 options that relate to the vector unit.
24351
24352 If the user used -mno-power9-vector, we need to turn off all of the implicit
24353 ISA 3.0 options that relate to the vector unit.
24354
24355 This function does not handle explicit options such as the user specifying
24356 -mdirect-move. These are handled in rs6000_option_override_internal, and
24357 the appropriate error is given if needed.
24358
24359 We return a mask of all of the implicit options that should not be enabled
24360 by default. */
24361
24362 static HOST_WIDE_INT
24363 rs6000_disable_incompatible_switches (void)
24364 {
24365 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
24366 size_t i, j;
24367
24368 static const struct {
24369 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
24370 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
24371 const char *const name; /* name of the switch. */
24372 } flags[] = {
24373 { OPTION_MASK_POWER10, OTHER_POWER10_MASKS, "power10" },
24374 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
24375 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
24376 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
24377 { OPTION_MASK_ALTIVEC, OTHER_ALTIVEC_MASKS, "altivec" },
24378 };
24379
24380 for (i = 0; i < ARRAY_SIZE (flags); i++)
24381 {
24382 HOST_WIDE_INT no_flag = flags[i].no_flag;
24383
24384 if ((rs6000_isa_flags & no_flag) == 0
24385 && (rs6000_isa_flags_explicit & no_flag) != 0)
24386 {
24387 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
24388 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
24389 & rs6000_isa_flags
24390 & dep_flags);
24391
24392 if (set_flags)
24393 {
24394 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
24395 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
24396 {
24397 set_flags &= ~rs6000_opt_masks[j].mask;
24398 error ("%<-mno-%s%> turns off %<-m%s%>",
24399 flags[i].name,
24400 rs6000_opt_masks[j].name);
24401 }
24402
24403 gcc_assert (!set_flags);
24404 }
24405
24406 rs6000_isa_flags &= ~dep_flags;
24407 ignore_masks |= no_flag | dep_flags;
24408 }
24409 }
24410
24411 return ignore_masks;
24412 }
24413
24414 \f
24415 /* Helper function for printing the function name when debugging. */
24416
24417 static const char *
24418 get_decl_name (tree fn)
24419 {
24420 tree name;
24421
24422 if (!fn)
24423 return "<null>";
24424
24425 name = DECL_NAME (fn);
24426 if (!name)
24427 return "<no-name>";
24428
24429 return IDENTIFIER_POINTER (name);
24430 }
24431
24432 /* Return the clone id of the target we are compiling code for in a target
24433 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
24434 the priority list for the target clones (ordered from lowest to
24435 highest). */
24436
24437 static int
24438 rs6000_clone_priority (tree fndecl)
24439 {
24440 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
24441 HOST_WIDE_INT isa_masks;
24442 int ret = CLONE_DEFAULT;
24443 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
24444 const char *attrs_str = NULL;
24445
24446 attrs = TREE_VALUE (TREE_VALUE (attrs));
24447 attrs_str = TREE_STRING_POINTER (attrs);
24448
24449 /* Return priority zero for default function. Return the ISA needed for the
24450 function if it is not the default. */
24451 if (strcmp (attrs_str, "default") != 0)
24452 {
24453 if (fn_opts == NULL_TREE)
24454 fn_opts = target_option_default_node;
24455
24456 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
24457 isa_masks = rs6000_isa_flags;
24458 else
24459 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
24460
24461 for (ret = CLONE_MAX - 1; ret != 0; ret--)
24462 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
24463 break;
24464 }
24465
24466 if (TARGET_DEBUG_TARGET)
24467 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
24468 get_decl_name (fndecl), ret);
24469
24470 return ret;
24471 }
24472
24473 /* This compares the priority of target features in function DECL1 and DECL2.
24474 It returns positive value if DECL1 is higher priority, negative value if
24475 DECL2 is higher priority and 0 if they are the same. Note, priorities are
24476 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
24477
24478 static int
24479 rs6000_compare_version_priority (tree decl1, tree decl2)
24480 {
24481 int priority1 = rs6000_clone_priority (decl1);
24482 int priority2 = rs6000_clone_priority (decl2);
24483 int ret = priority1 - priority2;
24484
24485 if (TARGET_DEBUG_TARGET)
24486 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
24487 get_decl_name (decl1), get_decl_name (decl2), ret);
24488
24489 return ret;
24490 }
24491
24492 /* Make a dispatcher declaration for the multi-versioned function DECL.
24493 Calls to DECL function will be replaced with calls to the dispatcher
24494 by the front-end. Returns the decl of the dispatcher function. */
24495
24496 static tree
24497 rs6000_get_function_versions_dispatcher (void *decl)
24498 {
24499 tree fn = (tree) decl;
24500 struct cgraph_node *node = NULL;
24501 struct cgraph_node *default_node = NULL;
24502 struct cgraph_function_version_info *node_v = NULL;
24503 struct cgraph_function_version_info *first_v = NULL;
24504
24505 tree dispatch_decl = NULL;
24506
24507 struct cgraph_function_version_info *default_version_info = NULL;
24508 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
24509
24510 if (TARGET_DEBUG_TARGET)
24511 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
24512 get_decl_name (fn));
24513
24514 node = cgraph_node::get (fn);
24515 gcc_assert (node != NULL);
24516
24517 node_v = node->function_version ();
24518 gcc_assert (node_v != NULL);
24519
24520 if (node_v->dispatcher_resolver != NULL)
24521 return node_v->dispatcher_resolver;
24522
24523 /* Find the default version and make it the first node. */
24524 first_v = node_v;
24525 /* Go to the beginning of the chain. */
24526 while (first_v->prev != NULL)
24527 first_v = first_v->prev;
24528
24529 default_version_info = first_v;
24530 while (default_version_info != NULL)
24531 {
24532 const tree decl2 = default_version_info->this_node->decl;
24533 if (is_function_default_version (decl2))
24534 break;
24535 default_version_info = default_version_info->next;
24536 }
24537
24538 /* If there is no default node, just return NULL. */
24539 if (default_version_info == NULL)
24540 return NULL;
24541
24542 /* Make default info the first node. */
24543 if (first_v != default_version_info)
24544 {
24545 default_version_info->prev->next = default_version_info->next;
24546 if (default_version_info->next)
24547 default_version_info->next->prev = default_version_info->prev;
24548 first_v->prev = default_version_info;
24549 default_version_info->next = first_v;
24550 default_version_info->prev = NULL;
24551 }
24552
24553 default_node = default_version_info->this_node;
24554
24555 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
24556 error_at (DECL_SOURCE_LOCATION (default_node->decl),
24557 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
24558 "exports hardware capability bits");
24559 #else
24560
24561 if (targetm.has_ifunc_p ())
24562 {
24563 struct cgraph_function_version_info *it_v = NULL;
24564 struct cgraph_node *dispatcher_node = NULL;
24565 struct cgraph_function_version_info *dispatcher_version_info = NULL;
24566
24567 /* Right now, the dispatching is done via ifunc. */
24568 dispatch_decl = make_dispatcher_decl (default_node->decl);
24569
24570 dispatcher_node = cgraph_node::get_create (dispatch_decl);
24571 gcc_assert (dispatcher_node != NULL);
24572 dispatcher_node->dispatcher_function = 1;
24573 dispatcher_version_info
24574 = dispatcher_node->insert_new_function_version ();
24575 dispatcher_version_info->next = default_version_info;
24576 dispatcher_node->definition = 1;
24577
24578 /* Set the dispatcher for all the versions. */
24579 it_v = default_version_info;
24580 while (it_v != NULL)
24581 {
24582 it_v->dispatcher_resolver = dispatch_decl;
24583 it_v = it_v->next;
24584 }
24585 }
24586 else
24587 {
24588 error_at (DECL_SOURCE_LOCATION (default_node->decl),
24589 "multiversioning needs ifunc which is not supported "
24590 "on this target");
24591 }
24592 #endif
24593
24594 return dispatch_decl;
24595 }
24596
24597 /* Make the resolver function decl to dispatch the versions of a multi-
24598 versioned function, DEFAULT_DECL. Create an empty basic block in the
24599 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
24600 function. */
24601
24602 static tree
24603 make_resolver_func (const tree default_decl,
24604 const tree dispatch_decl,
24605 basic_block *empty_bb)
24606 {
24607 /* Make the resolver function static. The resolver function returns
24608 void *. */
24609 tree decl_name = clone_function_name (default_decl, "resolver");
24610 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
24611 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
24612 tree decl = build_fn_decl (resolver_name, type);
24613 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
24614
24615 DECL_NAME (decl) = decl_name;
24616 TREE_USED (decl) = 1;
24617 DECL_ARTIFICIAL (decl) = 1;
24618 DECL_IGNORED_P (decl) = 0;
24619 TREE_PUBLIC (decl) = 0;
24620 DECL_UNINLINABLE (decl) = 1;
24621
24622 /* Resolver is not external, body is generated. */
24623 DECL_EXTERNAL (decl) = 0;
24624 DECL_EXTERNAL (dispatch_decl) = 0;
24625
24626 DECL_CONTEXT (decl) = NULL_TREE;
24627 DECL_INITIAL (decl) = make_node (BLOCK);
24628 DECL_STATIC_CONSTRUCTOR (decl) = 0;
24629
24630 if (DECL_COMDAT_GROUP (default_decl)
24631 || TREE_PUBLIC (default_decl))
24632 {
24633 /* In this case, each translation unit with a call to this
24634 versioned function will put out a resolver. Ensure it
24635 is comdat to keep just one copy. */
24636 DECL_COMDAT (decl) = 1;
24637 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
24638 }
24639 else
24640 TREE_PUBLIC (dispatch_decl) = 0;
24641
24642 /* Build result decl and add to function_decl. */
24643 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
24644 DECL_CONTEXT (t) = decl;
24645 DECL_ARTIFICIAL (t) = 1;
24646 DECL_IGNORED_P (t) = 1;
24647 DECL_RESULT (decl) = t;
24648
24649 gimplify_function_tree (decl);
24650 push_cfun (DECL_STRUCT_FUNCTION (decl));
24651 *empty_bb = init_lowered_empty_function (decl, false,
24652 profile_count::uninitialized ());
24653
24654 cgraph_node::add_new_function (decl, true);
24655 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
24656
24657 pop_cfun ();
24658
24659 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
24660 DECL_ATTRIBUTES (dispatch_decl)
24661 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
24662
24663 cgraph_node::create_same_body_alias (dispatch_decl, decl);
24664
24665 return decl;
24666 }
24667
24668 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
24669 return a pointer to VERSION_DECL if we are running on a machine that
24670 supports the index CLONE_ISA hardware architecture bits. This function will
24671 be called during version dispatch to decide which function version to
24672 execute. It returns the basic block at the end, to which more conditions
24673 can be added. */
24674
24675 static basic_block
24676 add_condition_to_bb (tree function_decl, tree version_decl,
24677 int clone_isa, basic_block new_bb)
24678 {
24679 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
24680
24681 gcc_assert (new_bb != NULL);
24682 gimple_seq gseq = bb_seq (new_bb);
24683
24684
24685 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
24686 build_fold_addr_expr (version_decl));
24687 tree result_var = create_tmp_var (ptr_type_node);
24688 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
24689 gimple *return_stmt = gimple_build_return (result_var);
24690
24691 if (clone_isa == CLONE_DEFAULT)
24692 {
24693 gimple_seq_add_stmt (&gseq, convert_stmt);
24694 gimple_seq_add_stmt (&gseq, return_stmt);
24695 set_bb_seq (new_bb, gseq);
24696 gimple_set_bb (convert_stmt, new_bb);
24697 gimple_set_bb (return_stmt, new_bb);
24698 pop_cfun ();
24699 return new_bb;
24700 }
24701
24702 tree bool_zero = build_int_cst (bool_int_type_node, 0);
24703 tree cond_var = create_tmp_var (bool_int_type_node);
24704 tree predicate_decl = rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS];
24705 const char *arg_str = rs6000_clone_map[clone_isa].name;
24706 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
24707 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
24708 gimple_call_set_lhs (call_cond_stmt, cond_var);
24709
24710 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
24711 gimple_set_bb (call_cond_stmt, new_bb);
24712 gimple_seq_add_stmt (&gseq, call_cond_stmt);
24713
24714 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
24715 NULL_TREE, NULL_TREE);
24716 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
24717 gimple_set_bb (if_else_stmt, new_bb);
24718 gimple_seq_add_stmt (&gseq, if_else_stmt);
24719
24720 gimple_seq_add_stmt (&gseq, convert_stmt);
24721 gimple_seq_add_stmt (&gseq, return_stmt);
24722 set_bb_seq (new_bb, gseq);
24723
24724 basic_block bb1 = new_bb;
24725 edge e12 = split_block (bb1, if_else_stmt);
24726 basic_block bb2 = e12->dest;
24727 e12->flags &= ~EDGE_FALLTHRU;
24728 e12->flags |= EDGE_TRUE_VALUE;
24729
24730 edge e23 = split_block (bb2, return_stmt);
24731 gimple_set_bb (convert_stmt, bb2);
24732 gimple_set_bb (return_stmt, bb2);
24733
24734 basic_block bb3 = e23->dest;
24735 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
24736
24737 remove_edge (e23);
24738 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
24739
24740 pop_cfun ();
24741 return bb3;
24742 }
24743
24744 /* This function generates the dispatch function for multi-versioned functions.
24745 DISPATCH_DECL is the function which will contain the dispatch logic.
24746 FNDECLS are the function choices for dispatch, and is a tree chain.
24747 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
24748 code is generated. */
24749
24750 static int
24751 dispatch_function_versions (tree dispatch_decl,
24752 void *fndecls_p,
24753 basic_block *empty_bb)
24754 {
24755 int ix;
24756 tree ele;
24757 vec<tree> *fndecls;
24758 tree clones[CLONE_MAX];
24759
24760 if (TARGET_DEBUG_TARGET)
24761 fputs ("dispatch_function_versions, top\n", stderr);
24762
24763 gcc_assert (dispatch_decl != NULL
24764 && fndecls_p != NULL
24765 && empty_bb != NULL);
24766
24767 /* fndecls_p is actually a vector. */
24768 fndecls = static_cast<vec<tree> *> (fndecls_p);
24769
24770 /* At least one more version other than the default. */
24771 gcc_assert (fndecls->length () >= 2);
24772
24773 /* The first version in the vector is the default decl. */
24774 memset ((void *) clones, '\0', sizeof (clones));
24775 clones[CLONE_DEFAULT] = (*fndecls)[0];
24776
24777 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
24778 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
24779 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
24780 recent glibc. If we ever need to call __builtin_cpu_init, we would need
24781 to insert the code here to do the call. */
24782
24783 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
24784 {
24785 int priority = rs6000_clone_priority (ele);
24786 if (!clones[priority])
24787 clones[priority] = ele;
24788 }
24789
24790 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
24791 if (clones[ix])
24792 {
24793 if (TARGET_DEBUG_TARGET)
24794 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
24795 ix, get_decl_name (clones[ix]));
24796
24797 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
24798 *empty_bb);
24799 }
24800
24801 return 0;
24802 }
24803
24804 /* Generate the dispatching code body to dispatch multi-versioned function
24805 DECL. The target hook is called to process the "target" attributes and
24806 provide the code to dispatch the right function at run-time. NODE points
24807 to the dispatcher decl whose body will be created. */
24808
24809 static tree
24810 rs6000_generate_version_dispatcher_body (void *node_p)
24811 {
24812 tree resolver;
24813 basic_block empty_bb;
24814 struct cgraph_node *node = (cgraph_node *) node_p;
24815 struct cgraph_function_version_info *ninfo = node->function_version ();
24816
24817 if (ninfo->dispatcher_resolver)
24818 return ninfo->dispatcher_resolver;
24819
24820 /* node is going to be an alias, so remove the finalized bit. */
24821 node->definition = false;
24822
24823 /* The first version in the chain corresponds to the default version. */
24824 ninfo->dispatcher_resolver = resolver
24825 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
24826
24827 if (TARGET_DEBUG_TARGET)
24828 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
24829 get_decl_name (resolver));
24830
24831 push_cfun (DECL_STRUCT_FUNCTION (resolver));
24832 auto_vec<tree, 2> fn_ver_vec;
24833
24834 for (struct cgraph_function_version_info *vinfo = ninfo->next;
24835 vinfo;
24836 vinfo = vinfo->next)
24837 {
24838 struct cgraph_node *version = vinfo->this_node;
24839 /* Check for virtual functions here again, as by this time it should
24840 have been determined if this function needs a vtable index or
24841 not. This happens for methods in derived classes that override
24842 virtual methods in base classes but are not explicitly marked as
24843 virtual. */
24844 if (DECL_VINDEX (version->decl))
24845 sorry ("Virtual function multiversioning not supported");
24846
24847 fn_ver_vec.safe_push (version->decl);
24848 }
24849
24850 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
24851 cgraph_edge::rebuild_edges ();
24852 pop_cfun ();
24853 return resolver;
24854 }
24855
24856 \f
24857 /* Hook to determine if one function can safely inline another. */
24858
24859 static bool
24860 rs6000_can_inline_p (tree caller, tree callee)
24861 {
24862 bool ret = false;
24863 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
24864 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
24865
24866 /* If the callee has no option attributes, then it is ok to inline. */
24867 if (!callee_tree)
24868 ret = true;
24869
24870 else
24871 {
24872 HOST_WIDE_INT caller_isa;
24873 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24874 HOST_WIDE_INT callee_isa = callee_opts->x_rs6000_isa_flags;
24875 HOST_WIDE_INT explicit_isa = callee_opts->x_rs6000_isa_flags_explicit;
24876
24877 /* If the caller has option attributes, then use them.
24878 Otherwise, use the command line options. */
24879 if (caller_tree)
24880 caller_isa = TREE_TARGET_OPTION (caller_tree)->x_rs6000_isa_flags;
24881 else
24882 caller_isa = rs6000_isa_flags;
24883
24884 /* The callee's options must be a subset of the caller's options, i.e.
24885 a vsx function may inline an altivec function, but a no-vsx function
24886 must not inline a vsx function. However, for those options that the
24887 callee has explicitly enabled or disabled, then we must enforce that
24888 the callee's and caller's options match exactly; see PR70010. */
24889 if (((caller_isa & callee_isa) == callee_isa)
24890 && (caller_isa & explicit_isa) == (callee_isa & explicit_isa))
24891 ret = true;
24892 }
24893
24894 if (TARGET_DEBUG_TARGET)
24895 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
24896 get_decl_name (caller), get_decl_name (callee),
24897 (ret ? "can" : "cannot"));
24898
24899 return ret;
24900 }
24901 \f
24902 /* Allocate a stack temp and fixup the address so it meets the particular
24903 memory requirements (either offetable or REG+REG addressing). */
24904
24905 rtx
24906 rs6000_allocate_stack_temp (machine_mode mode,
24907 bool offsettable_p,
24908 bool reg_reg_p)
24909 {
24910 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
24911 rtx addr = XEXP (stack, 0);
24912 int strict_p = reload_completed;
24913
24914 if (!legitimate_indirect_address_p (addr, strict_p))
24915 {
24916 if (offsettable_p
24917 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
24918 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
24919
24920 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
24921 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
24922 }
24923
24924 return stack;
24925 }
24926
24927 /* Given a memory reference, if it is not a reg or reg+reg addressing,
24928 convert to such a form to deal with memory reference instructions
24929 like STFIWX and LDBRX that only take reg+reg addressing. */
24930
24931 rtx
24932 rs6000_force_indexed_or_indirect_mem (rtx x)
24933 {
24934 machine_mode mode = GET_MODE (x);
24935
24936 gcc_assert (MEM_P (x));
24937 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode))
24938 {
24939 rtx addr = XEXP (x, 0);
24940 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
24941 {
24942 rtx reg = XEXP (addr, 0);
24943 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
24944 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
24945 gcc_assert (REG_P (reg));
24946 emit_insn (gen_add3_insn (reg, reg, size_rtx));
24947 addr = reg;
24948 }
24949 else if (GET_CODE (addr) == PRE_MODIFY)
24950 {
24951 rtx reg = XEXP (addr, 0);
24952 rtx expr = XEXP (addr, 1);
24953 gcc_assert (REG_P (reg));
24954 gcc_assert (GET_CODE (expr) == PLUS);
24955 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
24956 addr = reg;
24957 }
24958
24959 if (GET_CODE (addr) == PLUS)
24960 {
24961 rtx op0 = XEXP (addr, 0);
24962 rtx op1 = XEXP (addr, 1);
24963 op0 = force_reg (Pmode, op0);
24964 op1 = force_reg (Pmode, op1);
24965 x = replace_equiv_address (x, gen_rtx_PLUS (Pmode, op0, op1));
24966 }
24967 else
24968 x = replace_equiv_address (x, force_reg (Pmode, addr));
24969 }
24970
24971 return x;
24972 }
24973
24974 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
24975
24976 On the RS/6000, all integer constants are acceptable, most won't be valid
24977 for particular insns, though. Only easy FP constants are acceptable. */
24978
24979 static bool
24980 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
24981 {
24982 if (TARGET_ELF && tls_referenced_p (x))
24983 return false;
24984
24985 if (CONST_DOUBLE_P (x))
24986 return easy_fp_constant (x, mode);
24987
24988 if (GET_CODE (x) == CONST_VECTOR)
24989 return easy_vector_constant (x, mode);
24990
24991 return true;
24992 }
24993
24994 \f
24995 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
24996
24997 static bool
24998 chain_already_loaded (rtx_insn *last)
24999 {
25000 for (; last != NULL; last = PREV_INSN (last))
25001 {
25002 if (NONJUMP_INSN_P (last))
25003 {
25004 rtx patt = PATTERN (last);
25005
25006 if (GET_CODE (patt) == SET)
25007 {
25008 rtx lhs = XEXP (patt, 0);
25009
25010 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
25011 return true;
25012 }
25013 }
25014 }
25015 return false;
25016 }
25017
25018 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
25019
25020 void
25021 rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25022 {
25023 rtx func = func_desc;
25024 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
25025 rtx toc_load = NULL_RTX;
25026 rtx toc_restore = NULL_RTX;
25027 rtx func_addr;
25028 rtx abi_reg = NULL_RTX;
25029 rtx call[5];
25030 int n_call;
25031 rtx insn;
25032 bool is_pltseq_longcall;
25033
25034 if (global_tlsarg)
25035 tlsarg = global_tlsarg;
25036
25037 /* Handle longcall attributes. */
25038 is_pltseq_longcall = false;
25039 if ((INTVAL (cookie) & CALL_LONG) != 0
25040 && GET_CODE (func_desc) == SYMBOL_REF)
25041 {
25042 func = rs6000_longcall_ref (func_desc, tlsarg);
25043 if (TARGET_PLTSEQ)
25044 is_pltseq_longcall = true;
25045 }
25046
25047 /* Handle indirect calls. */
25048 if (!SYMBOL_REF_P (func)
25049 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
25050 {
25051 if (!rs6000_pcrel_p ())
25052 {
25053 /* Save the TOC into its reserved slot before the call,
25054 and prepare to restore it after the call. */
25055 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
25056 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
25057 gen_rtvec (1, stack_toc_offset),
25058 UNSPEC_TOCSLOT);
25059 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
25060
25061 /* Can we optimize saving the TOC in the prologue or
25062 do we need to do it at every call? */
25063 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
25064 cfun->machine->save_toc_in_prologue = true;
25065 else
25066 {
25067 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25068 rtx stack_toc_mem = gen_frame_mem (Pmode,
25069 gen_rtx_PLUS (Pmode, stack_ptr,
25070 stack_toc_offset));
25071 MEM_VOLATILE_P (stack_toc_mem) = 1;
25072 if (is_pltseq_longcall)
25073 {
25074 rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg);
25075 rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25076 emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg));
25077 }
25078 else
25079 emit_move_insn (stack_toc_mem, toc_reg);
25080 }
25081 }
25082
25083 if (DEFAULT_ABI == ABI_ELFv2)
25084 {
25085 /* A function pointer in the ELFv2 ABI is just a plain address, but
25086 the ABI requires it to be loaded into r12 before the call. */
25087 func_addr = gen_rtx_REG (Pmode, 12);
25088 emit_move_insn (func_addr, func);
25089 abi_reg = func_addr;
25090 /* Indirect calls via CTR are strongly preferred over indirect
25091 calls via LR, so move the address there. Needed to mark
25092 this insn for linker plt sequence editing too. */
25093 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25094 if (is_pltseq_longcall)
25095 {
25096 rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg);
25097 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25098 emit_insn (gen_rtx_SET (func_addr, mark_func));
25099 v = gen_rtvec (2, func_addr, func_desc);
25100 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25101 }
25102 else
25103 emit_move_insn (func_addr, abi_reg);
25104 }
25105 else
25106 {
25107 /* A function pointer under AIX is a pointer to a data area whose
25108 first word contains the actual address of the function, whose
25109 second word contains a pointer to its TOC, and whose third word
25110 contains a value to place in the static chain register (r11).
25111 Note that if we load the static chain, our "trampoline" need
25112 not have any executable code. */
25113
25114 /* Load up address of the actual function. */
25115 func = force_reg (Pmode, func);
25116 func_addr = gen_reg_rtx (Pmode);
25117 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func));
25118
25119 /* Indirect calls via CTR are strongly preferred over indirect
25120 calls via LR, so move the address there. */
25121 rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO);
25122 emit_move_insn (ctr_reg, func_addr);
25123 func_addr = ctr_reg;
25124
25125 /* Prepare to load the TOC of the called function. Note that the
25126 TOC load must happen immediately before the actual call so
25127 that unwinding the TOC registers works correctly. See the
25128 comment in frob_update_context. */
25129 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
25130 rtx func_toc_mem = gen_rtx_MEM (Pmode,
25131 gen_rtx_PLUS (Pmode, func,
25132 func_toc_offset));
25133 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
25134
25135 /* If we have a static chain, load it up. But, if the call was
25136 originally direct, the 3rd word has not been written since no
25137 trampoline has been built, so we ought not to load it, lest we
25138 override a static chain value. */
25139 if (!(GET_CODE (func_desc) == SYMBOL_REF
25140 && SYMBOL_REF_FUNCTION_P (func_desc))
25141 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
25142 && !chain_already_loaded (get_current_sequence ()->next->last))
25143 {
25144 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
25145 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
25146 rtx func_sc_mem = gen_rtx_MEM (Pmode,
25147 gen_rtx_PLUS (Pmode, func,
25148 func_sc_offset));
25149 emit_move_insn (sc_reg, func_sc_mem);
25150 abi_reg = sc_reg;
25151 }
25152 }
25153 }
25154 else
25155 {
25156 /* No TOC register needed for calls from PC-relative callers. */
25157 if (!rs6000_pcrel_p ())
25158 /* Direct calls use the TOC: for local calls, the callee will
25159 assume the TOC register is set; for non-local calls, the
25160 PLT stub needs the TOC register. */
25161 abi_reg = toc_reg;
25162 func_addr = func;
25163 }
25164
25165 /* Create the call. */
25166 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25167 if (value != NULL_RTX)
25168 call[0] = gen_rtx_SET (value, call[0]);
25169 call[1] = gen_rtx_USE (VOIDmode, cookie);
25170 n_call = 2;
25171
25172 if (toc_load)
25173 call[n_call++] = toc_load;
25174 if (toc_restore)
25175 call[n_call++] = toc_restore;
25176
25177 call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
25178
25179 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
25180 insn = emit_call_insn (insn);
25181
25182 /* Mention all registers defined by the ABI to hold information
25183 as uses in CALL_INSN_FUNCTION_USAGE. */
25184 if (abi_reg)
25185 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
25186 }
25187
25188 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
25189
25190 void
25191 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25192 {
25193 rtx call[2];
25194 rtx insn;
25195 rtx r12 = NULL_RTX;
25196 rtx func_addr = func_desc;
25197
25198 gcc_assert (INTVAL (cookie) == 0);
25199
25200 if (global_tlsarg)
25201 tlsarg = global_tlsarg;
25202
25203 /* For ELFv2, r12 and CTR need to hold the function address
25204 for an indirect call. */
25205 if (GET_CODE (func_desc) != SYMBOL_REF && DEFAULT_ABI == ABI_ELFv2)
25206 {
25207 r12 = gen_rtx_REG (Pmode, 12);
25208 emit_move_insn (r12, func_desc);
25209 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25210 emit_move_insn (func_addr, r12);
25211 }
25212
25213 /* Create the call. */
25214 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25215 if (value != NULL_RTX)
25216 call[0] = gen_rtx_SET (value, call[0]);
25217
25218 call[1] = simple_return_rtx;
25219
25220 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
25221 insn = emit_call_insn (insn);
25222
25223 /* Note use of the TOC register. */
25224 if (!rs6000_pcrel_p ())
25225 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
25226 gen_rtx_REG (Pmode, TOC_REGNUM));
25227
25228 /* Note use of r12. */
25229 if (r12)
25230 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r12);
25231 }
25232
25233 /* Expand code to perform a call under the SYSV4 ABI. */
25234
25235 void
25236 rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25237 {
25238 rtx func = func_desc;
25239 rtx func_addr;
25240 rtx call[4];
25241 rtx insn;
25242 rtx abi_reg = NULL_RTX;
25243 int n;
25244
25245 if (global_tlsarg)
25246 tlsarg = global_tlsarg;
25247
25248 /* Handle longcall attributes. */
25249 if ((INTVAL (cookie) & CALL_LONG) != 0
25250 && GET_CODE (func_desc) == SYMBOL_REF)
25251 {
25252 func = rs6000_longcall_ref (func_desc, tlsarg);
25253 /* If the longcall was implemented as an inline PLT call using
25254 PLT unspecs then func will be REG:r11. If not, func will be
25255 a pseudo reg. The inline PLT call sequence supports lazy
25256 linking (and longcalls to functions in dlopen'd libraries).
25257 The other style of longcalls don't. The lazy linking entry
25258 to the dynamic symbol resolver requires r11 be the function
25259 address (as it is for linker generated PLT stubs). Ensure
25260 r11 stays valid to the bctrl by marking r11 used by the call. */
25261 if (TARGET_PLTSEQ)
25262 abi_reg = func;
25263 }
25264
25265 /* Handle indirect calls. */
25266 if (GET_CODE (func) != SYMBOL_REF)
25267 {
25268 func = force_reg (Pmode, func);
25269
25270 /* Indirect calls via CTR are strongly preferred over indirect
25271 calls via LR, so move the address there. That can't be left
25272 to reload because we want to mark every instruction in an
25273 inline PLT call sequence with a reloc, enabling the linker to
25274 edit the sequence back to a direct call when that makes sense. */
25275 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25276 if (abi_reg)
25277 {
25278 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
25279 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25280 emit_insn (gen_rtx_SET (func_addr, mark_func));
25281 v = gen_rtvec (2, func_addr, func_desc);
25282 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25283 }
25284 else
25285 emit_move_insn (func_addr, func);
25286 }
25287 else
25288 func_addr = func;
25289
25290 /* Create the call. */
25291 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25292 if (value != NULL_RTX)
25293 call[0] = gen_rtx_SET (value, call[0]);
25294
25295 call[1] = gen_rtx_USE (VOIDmode, cookie);
25296 n = 2;
25297 if (TARGET_SECURE_PLT
25298 && flag_pic
25299 && GET_CODE (func_addr) == SYMBOL_REF
25300 && !SYMBOL_REF_LOCAL_P (func_addr))
25301 call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx);
25302
25303 call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
25304
25305 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call));
25306 insn = emit_call_insn (insn);
25307 if (abi_reg)
25308 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
25309 }
25310
25311 /* Expand code to perform a sibling call under the SysV4 ABI. */
25312
25313 void
25314 rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25315 {
25316 rtx func = func_desc;
25317 rtx func_addr;
25318 rtx call[3];
25319 rtx insn;
25320 rtx abi_reg = NULL_RTX;
25321
25322 if (global_tlsarg)
25323 tlsarg = global_tlsarg;
25324
25325 /* Handle longcall attributes. */
25326 if ((INTVAL (cookie) & CALL_LONG) != 0
25327 && GET_CODE (func_desc) == SYMBOL_REF)
25328 {
25329 func = rs6000_longcall_ref (func_desc, tlsarg);
25330 /* If the longcall was implemented as an inline PLT call using
25331 PLT unspecs then func will be REG:r11. If not, func will be
25332 a pseudo reg. The inline PLT call sequence supports lazy
25333 linking (and longcalls to functions in dlopen'd libraries).
25334 The other style of longcalls don't. The lazy linking entry
25335 to the dynamic symbol resolver requires r11 be the function
25336 address (as it is for linker generated PLT stubs). Ensure
25337 r11 stays valid to the bctr by marking r11 used by the call. */
25338 if (TARGET_PLTSEQ)
25339 abi_reg = func;
25340 }
25341
25342 /* Handle indirect calls. */
25343 if (GET_CODE (func) != SYMBOL_REF)
25344 {
25345 func = force_reg (Pmode, func);
25346
25347 /* Indirect sibcalls must go via CTR. That can't be left to
25348 reload because we want to mark every instruction in an inline
25349 PLT call sequence with a reloc, enabling the linker to edit
25350 the sequence back to a direct call when that makes sense. */
25351 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25352 if (abi_reg)
25353 {
25354 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
25355 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25356 emit_insn (gen_rtx_SET (func_addr, mark_func));
25357 v = gen_rtvec (2, func_addr, func_desc);
25358 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25359 }
25360 else
25361 emit_move_insn (func_addr, func);
25362 }
25363 else
25364 func_addr = func;
25365
25366 /* Create the call. */
25367 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25368 if (value != NULL_RTX)
25369 call[0] = gen_rtx_SET (value, call[0]);
25370
25371 call[1] = gen_rtx_USE (VOIDmode, cookie);
25372 call[2] = simple_return_rtx;
25373
25374 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
25375 insn = emit_call_insn (insn);
25376 if (abi_reg)
25377 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
25378 }
25379
25380 #if TARGET_MACHO
25381
25382 /* Expand code to perform a call under the Darwin ABI.
25383 Modulo handling of mlongcall, this is much the same as sysv.
25384 if/when the longcall optimisation is removed, we could drop this
25385 code and use the sysv case (taking care to avoid the tls stuff).
25386
25387 We can use this for sibcalls too, if needed. */
25388
25389 void
25390 rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg,
25391 rtx cookie, bool sibcall)
25392 {
25393 rtx func = func_desc;
25394 rtx func_addr;
25395 rtx call[3];
25396 rtx insn;
25397 int cookie_val = INTVAL (cookie);
25398 bool make_island = false;
25399
25400 /* Handle longcall attributes, there are two cases for Darwin:
25401 1) Newer linkers are capable of synthesising any branch islands needed.
25402 2) We need a helper branch island synthesised by the compiler.
25403 The second case has mostly been retired and we don't use it for m64.
25404 In fact, it's is an optimisation, we could just indirect as sysv does..
25405 ... however, backwards compatibility for now.
25406 If we're going to use this, then we need to keep the CALL_LONG bit set,
25407 so that we can pick up the special insn form later. */
25408 if ((cookie_val & CALL_LONG) != 0
25409 && GET_CODE (func_desc) == SYMBOL_REF)
25410 {
25411 /* FIXME: the longcall opt should not hang off this flag, it is most
25412 likely incorrect for kernel-mode code-generation. */
25413 if (darwin_symbol_stubs && TARGET_32BIT)
25414 make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */
25415 else
25416 {
25417 /* The linker is capable of doing this, but the user explicitly
25418 asked for -mlongcall, so we'll do the 'normal' version. */
25419 func = rs6000_longcall_ref (func_desc, NULL_RTX);
25420 cookie_val &= ~CALL_LONG; /* Handled, zap it. */
25421 }
25422 }
25423
25424 /* Handle indirect calls. */
25425 if (GET_CODE (func) != SYMBOL_REF)
25426 {
25427 func = force_reg (Pmode, func);
25428
25429 /* Indirect calls via CTR are strongly preferred over indirect
25430 calls via LR, and are required for indirect sibcalls, so move
25431 the address there. */
25432 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25433 emit_move_insn (func_addr, func);
25434 }
25435 else
25436 func_addr = func;
25437
25438 /* Create the call. */
25439 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25440 if (value != NULL_RTX)
25441 call[0] = gen_rtx_SET (value, call[0]);
25442
25443 call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val));
25444
25445 if (sibcall)
25446 call[2] = simple_return_rtx;
25447 else
25448 call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO);
25449
25450 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
25451 insn = emit_call_insn (insn);
25452 /* Now we have the debug info in the insn, we can set up the branch island
25453 if we're using one. */
25454 if (make_island)
25455 {
25456 tree funname = get_identifier (XSTR (func_desc, 0));
25457
25458 if (no_previous_def (funname))
25459 {
25460 rtx label_rtx = gen_label_rtx ();
25461 char *label_buf, temp_buf[256];
25462 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
25463 CODE_LABEL_NUMBER (label_rtx));
25464 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
25465 tree labelname = get_identifier (label_buf);
25466 add_compiler_branch_island (labelname, funname,
25467 insn_line ((const rtx_insn*)insn));
25468 }
25469 }
25470 }
25471 #endif
25472
25473 void
25474 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
25475 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
25476 {
25477 #if TARGET_MACHO
25478 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false);
25479 #else
25480 gcc_unreachable();
25481 #endif
25482 }
25483
25484
25485 void
25486 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
25487 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
25488 {
25489 #if TARGET_MACHO
25490 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true);
25491 #else
25492 gcc_unreachable();
25493 #endif
25494 }
25495
25496 /* Return whether we should generate PC-relative code for FNDECL. */
25497 bool
25498 rs6000_fndecl_pcrel_p (const_tree fndecl)
25499 {
25500 if (DEFAULT_ABI != ABI_ELFv2)
25501 return false;
25502
25503 struct cl_target_option *opts = target_opts_for_fn (fndecl);
25504
25505 return ((opts->x_rs6000_isa_flags & OPTION_MASK_PCREL) != 0
25506 && TARGET_CMODEL == CMODEL_MEDIUM);
25507 }
25508
25509 /* Return whether we should generate PC-relative code for *FN. */
25510 bool
25511 rs6000_function_pcrel_p (struct function *fn)
25512 {
25513 if (DEFAULT_ABI != ABI_ELFv2)
25514 return false;
25515
25516 /* Optimize usual case. */
25517 if (fn == cfun)
25518 return ((rs6000_isa_flags & OPTION_MASK_PCREL) != 0
25519 && TARGET_CMODEL == CMODEL_MEDIUM);
25520
25521 return rs6000_fndecl_pcrel_p (fn->decl);
25522 }
25523
25524 /* Return whether we should generate PC-relative code for the current
25525 function. */
25526 bool
25527 rs6000_pcrel_p ()
25528 {
25529 return (DEFAULT_ABI == ABI_ELFv2
25530 && (rs6000_isa_flags & OPTION_MASK_PCREL) != 0
25531 && TARGET_CMODEL == CMODEL_MEDIUM);
25532 }
25533
25534 \f
25535 /* Given an address (ADDR), a mode (MODE), and what the format of the
25536 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
25537 for the address. */
25538
25539 enum insn_form
25540 address_to_insn_form (rtx addr,
25541 machine_mode mode,
25542 enum non_prefixed_form non_prefixed_format)
25543 {
25544 /* Single register is easy. */
25545 if (REG_P (addr) || SUBREG_P (addr))
25546 return INSN_FORM_BASE_REG;
25547
25548 /* If the non prefixed instruction format doesn't support offset addressing,
25549 make sure only indexed addressing is allowed.
25550
25551 We special case SDmode so that the register allocator does not try to move
25552 SDmode through GPR registers, but instead uses the 32-bit integer load and
25553 store instructions for the floating point registers. */
25554 if (non_prefixed_format == NON_PREFIXED_X || (mode == SDmode && TARGET_DFP))
25555 {
25556 if (GET_CODE (addr) != PLUS)
25557 return INSN_FORM_BAD;
25558
25559 rtx op0 = XEXP (addr, 0);
25560 rtx op1 = XEXP (addr, 1);
25561 if (!REG_P (op0) && !SUBREG_P (op0))
25562 return INSN_FORM_BAD;
25563
25564 if (!REG_P (op1) && !SUBREG_P (op1))
25565 return INSN_FORM_BAD;
25566
25567 return INSN_FORM_X;
25568 }
25569
25570 /* Deal with update forms. */
25571 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
25572 return INSN_FORM_UPDATE;
25573
25574 /* Handle PC-relative symbols and labels. Check for both local and
25575 external symbols. Assume labels are always local. TLS symbols
25576 are not PC-relative for rs6000. */
25577 if (TARGET_PCREL)
25578 {
25579 if (LABEL_REF_P (addr))
25580 return INSN_FORM_PCREL_LOCAL;
25581
25582 if (SYMBOL_REF_P (addr) && !SYMBOL_REF_TLS_MODEL (addr))
25583 {
25584 if (!SYMBOL_REF_LOCAL_P (addr))
25585 return INSN_FORM_PCREL_EXTERNAL;
25586 else
25587 return INSN_FORM_PCREL_LOCAL;
25588 }
25589 }
25590
25591 if (GET_CODE (addr) == CONST)
25592 addr = XEXP (addr, 0);
25593
25594 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
25595 if (GET_CODE (addr) == LO_SUM)
25596 return INSN_FORM_LO_SUM;
25597
25598 /* Everything below must be an offset address of some form. */
25599 if (GET_CODE (addr) != PLUS)
25600 return INSN_FORM_BAD;
25601
25602 rtx op0 = XEXP (addr, 0);
25603 rtx op1 = XEXP (addr, 1);
25604
25605 /* Check for indexed addresses. */
25606 if (REG_P (op1) || SUBREG_P (op1))
25607 {
25608 if (REG_P (op0) || SUBREG_P (op0))
25609 return INSN_FORM_X;
25610
25611 return INSN_FORM_BAD;
25612 }
25613
25614 if (!CONST_INT_P (op1))
25615 return INSN_FORM_BAD;
25616
25617 HOST_WIDE_INT offset = INTVAL (op1);
25618 if (!SIGNED_INTEGER_34BIT_P (offset))
25619 return INSN_FORM_BAD;
25620
25621 /* Check for local and external PC-relative addresses. Labels are always
25622 local. TLS symbols are not PC-relative for rs6000. */
25623 if (TARGET_PCREL)
25624 {
25625 if (LABEL_REF_P (op0))
25626 return INSN_FORM_PCREL_LOCAL;
25627
25628 if (SYMBOL_REF_P (op0) && !SYMBOL_REF_TLS_MODEL (op0))
25629 {
25630 if (!SYMBOL_REF_LOCAL_P (op0))
25631 return INSN_FORM_PCREL_EXTERNAL;
25632 else
25633 return INSN_FORM_PCREL_LOCAL;
25634 }
25635 }
25636
25637 /* If it isn't PC-relative, the address must use a base register. */
25638 if (!REG_P (op0) && !SUBREG_P (op0))
25639 return INSN_FORM_BAD;
25640
25641 /* Large offsets must be prefixed. */
25642 if (!SIGNED_INTEGER_16BIT_P (offset))
25643 {
25644 if (TARGET_PREFIXED)
25645 return INSN_FORM_PREFIXED_NUMERIC;
25646
25647 return INSN_FORM_BAD;
25648 }
25649
25650 /* We have a 16-bit offset, see what default instruction format to use. */
25651 if (non_prefixed_format == NON_PREFIXED_DEFAULT)
25652 {
25653 unsigned size = GET_MODE_SIZE (mode);
25654
25655 /* On 64-bit systems, assume 64-bit integers need to use DS form
25656 addresses (for LD/STD). VSX vectors need to use DQ form addresses
25657 (for LXV and STXV). TImode is problematical in that its normal usage
25658 is expected to be GPRs where it wants a DS instruction format, but if
25659 it goes into the vector registers, it wants a DQ instruction
25660 format. */
25661 if (TARGET_POWERPC64 && size >= 8 && GET_MODE_CLASS (mode) == MODE_INT)
25662 non_prefixed_format = NON_PREFIXED_DS;
25663
25664 else if (TARGET_VSX && size >= 16
25665 && (VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode)))
25666 non_prefixed_format = NON_PREFIXED_DQ;
25667
25668 else
25669 non_prefixed_format = NON_PREFIXED_D;
25670 }
25671
25672 /* Classify the D/DS/DQ-form addresses. */
25673 switch (non_prefixed_format)
25674 {
25675 /* Instruction format D, all 16 bits are valid. */
25676 case NON_PREFIXED_D:
25677 return INSN_FORM_D;
25678
25679 /* Instruction format DS, bottom 2 bits must be 0. */
25680 case NON_PREFIXED_DS:
25681 if ((offset & 3) == 0)
25682 return INSN_FORM_DS;
25683
25684 else if (TARGET_PREFIXED)
25685 return INSN_FORM_PREFIXED_NUMERIC;
25686
25687 else
25688 return INSN_FORM_BAD;
25689
25690 /* Instruction format DQ, bottom 4 bits must be 0. */
25691 case NON_PREFIXED_DQ:
25692 if ((offset & 15) == 0)
25693 return INSN_FORM_DQ;
25694
25695 else if (TARGET_PREFIXED)
25696 return INSN_FORM_PREFIXED_NUMERIC;
25697
25698 else
25699 return INSN_FORM_BAD;
25700
25701 default:
25702 break;
25703 }
25704
25705 return INSN_FORM_BAD;
25706 }
25707
25708 /* Helper function to see if we're potentially looking at lfs/stfs.
25709 - PARALLEL containing a SET and a CLOBBER
25710 - stfs:
25711 - SET is from UNSPEC_SI_FROM_SF to MEM:SI
25712 - CLOBBER is a V4SF
25713 - lfs:
25714 - SET is from UNSPEC_SF_FROM_SI to REG:SF
25715 - CLOBBER is a DI
25716 */
25717
25718 static bool
25719 is_lfs_stfs_insn (rtx_insn *insn)
25720 {
25721 rtx pattern = PATTERN (insn);
25722 if (GET_CODE (pattern) != PARALLEL)
25723 return false;
25724
25725 /* This should be a parallel with exactly one set and one clobber. */
25726 if (XVECLEN (pattern, 0) != 2)
25727 return false;
25728
25729 rtx set = XVECEXP (pattern, 0, 0);
25730 if (GET_CODE (set) != SET)
25731 return false;
25732
25733 rtx clobber = XVECEXP (pattern, 0, 1);
25734 if (GET_CODE (clobber) != CLOBBER)
25735 return false;
25736
25737 /* All we care is that the destination of the SET is a mem:SI,
25738 the source should be an UNSPEC_SI_FROM_SF, and the clobber
25739 should be a scratch:V4SF. */
25740
25741 rtx dest = SET_DEST (set);
25742 rtx src = SET_SRC (set);
25743 rtx scratch = SET_DEST (clobber);
25744
25745 if (GET_CODE (src) != UNSPEC)
25746 return false;
25747
25748 /* stfs case. */
25749 if (XINT (src, 1) == UNSPEC_SI_FROM_SF
25750 && GET_CODE (dest) == MEM && GET_MODE (dest) == SImode
25751 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == V4SFmode)
25752 return true;
25753
25754 /* lfs case. */
25755 if (XINT (src, 1) == UNSPEC_SF_FROM_SI
25756 && GET_CODE (dest) == REG && GET_MODE (dest) == SFmode
25757 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == DImode)
25758 return true;
25759
25760 return false;
25761 }
25762
25763 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
25764 instruction format (D/DS/DQ) used for offset memory. */
25765
25766 static enum non_prefixed_form
25767 reg_to_non_prefixed (rtx reg, machine_mode mode)
25768 {
25769 /* If it isn't a register, use the defaults. */
25770 if (!REG_P (reg) && !SUBREG_P (reg))
25771 return NON_PREFIXED_DEFAULT;
25772
25773 unsigned int r = reg_or_subregno (reg);
25774
25775 /* If we have a pseudo, use the default instruction format. */
25776 if (!HARD_REGISTER_NUM_P (r))
25777 return NON_PREFIXED_DEFAULT;
25778
25779 unsigned size = GET_MODE_SIZE (mode);
25780
25781 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
25782 128-bit floating point, and 128-bit integers. Before power9, only indexed
25783 addressing was available for vectors. */
25784 if (FP_REGNO_P (r))
25785 {
25786 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
25787 return NON_PREFIXED_D;
25788
25789 else if (size < 8)
25790 return NON_PREFIXED_X;
25791
25792 else if (TARGET_VSX && size >= 16
25793 && (VECTOR_MODE_P (mode)
25794 || VECTOR_ALIGNMENT_P (mode)
25795 || mode == TImode || mode == CTImode))
25796 return (TARGET_P9_VECTOR) ? NON_PREFIXED_DQ : NON_PREFIXED_X;
25797
25798 else
25799 return NON_PREFIXED_DEFAULT;
25800 }
25801
25802 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
25803 128-bit floating point, and 128-bit integers. Before power9, only indexed
25804 addressing was available. */
25805 else if (ALTIVEC_REGNO_P (r))
25806 {
25807 if (!TARGET_P9_VECTOR)
25808 return NON_PREFIXED_X;
25809
25810 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
25811 return NON_PREFIXED_DS;
25812
25813 else if (size < 8)
25814 return NON_PREFIXED_X;
25815
25816 else if (TARGET_VSX && size >= 16
25817 && (VECTOR_MODE_P (mode)
25818 || VECTOR_ALIGNMENT_P (mode)
25819 || mode == TImode || mode == CTImode))
25820 return NON_PREFIXED_DQ;
25821
25822 else
25823 return NON_PREFIXED_DEFAULT;
25824 }
25825
25826 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
25827 otherwise. Assume that any other register, such as LR, CRs, etc. will go
25828 through the GPR registers for memory operations. */
25829 else if (TARGET_POWERPC64 && size >= 8)
25830 return NON_PREFIXED_DS;
25831
25832 return NON_PREFIXED_D;
25833 }
25834
25835 \f
25836 /* Whether a load instruction is a prefixed instruction. This is called from
25837 the prefixed attribute processing. */
25838
25839 bool
25840 prefixed_load_p (rtx_insn *insn)
25841 {
25842 /* Validate the insn to make sure it is a normal load insn. */
25843 extract_insn_cached (insn);
25844 if (recog_data.n_operands < 2)
25845 return false;
25846
25847 rtx reg = recog_data.operand[0];
25848 rtx mem = recog_data.operand[1];
25849
25850 if (!REG_P (reg) && !SUBREG_P (reg))
25851 return false;
25852
25853 if (!MEM_P (mem))
25854 return false;
25855
25856 /* Prefixed load instructions do not support update or indexed forms. */
25857 if (get_attr_indexed (insn) == INDEXED_YES
25858 || get_attr_update (insn) == UPDATE_YES)
25859 return false;
25860
25861 /* LWA uses the DS format instead of the D format that LWZ uses. */
25862 enum non_prefixed_form non_prefixed;
25863 machine_mode reg_mode = GET_MODE (reg);
25864 machine_mode mem_mode = GET_MODE (mem);
25865
25866 if (mem_mode == SImode && reg_mode == DImode
25867 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
25868 non_prefixed = NON_PREFIXED_DS;
25869
25870 else
25871 non_prefixed = reg_to_non_prefixed (reg, mem_mode);
25872
25873 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
25874 return address_is_prefixed (XEXP (mem, 0), mem_mode, NON_PREFIXED_DEFAULT);
25875 else
25876 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
25877 }
25878
25879 /* Whether a store instruction is a prefixed instruction. This is called from
25880 the prefixed attribute processing. */
25881
25882 bool
25883 prefixed_store_p (rtx_insn *insn)
25884 {
25885 /* Validate the insn to make sure it is a normal store insn. */
25886 extract_insn_cached (insn);
25887 if (recog_data.n_operands < 2)
25888 return false;
25889
25890 rtx mem = recog_data.operand[0];
25891 rtx reg = recog_data.operand[1];
25892
25893 if (!REG_P (reg) && !SUBREG_P (reg))
25894 return false;
25895
25896 if (!MEM_P (mem))
25897 return false;
25898
25899 /* Prefixed store instructions do not support update or indexed forms. */
25900 if (get_attr_indexed (insn) == INDEXED_YES
25901 || get_attr_update (insn) == UPDATE_YES)
25902 return false;
25903
25904 machine_mode mem_mode = GET_MODE (mem);
25905 rtx addr = XEXP (mem, 0);
25906 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode);
25907
25908 /* Need to make sure we aren't looking at a stfs which doesn't look
25909 like the other things reg_to_non_prefixed/address_is_prefixed
25910 looks for. */
25911 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
25912 return address_is_prefixed (addr, mem_mode, NON_PREFIXED_DEFAULT);
25913 else
25914 return address_is_prefixed (addr, mem_mode, non_prefixed);
25915 }
25916
25917 /* Whether a load immediate or add instruction is a prefixed instruction. This
25918 is called from the prefixed attribute processing. */
25919
25920 bool
25921 prefixed_paddi_p (rtx_insn *insn)
25922 {
25923 rtx set = single_set (insn);
25924 if (!set)
25925 return false;
25926
25927 rtx dest = SET_DEST (set);
25928 rtx src = SET_SRC (set);
25929
25930 if (!REG_P (dest) && !SUBREG_P (dest))
25931 return false;
25932
25933 /* Is this a load immediate that can't be done with a simple ADDI or
25934 ADDIS? */
25935 if (CONST_INT_P (src))
25936 return (satisfies_constraint_eI (src)
25937 && !satisfies_constraint_I (src)
25938 && !satisfies_constraint_L (src));
25939
25940 /* Is this a PADDI instruction that can't be done with a simple ADDI or
25941 ADDIS? */
25942 if (GET_CODE (src) == PLUS)
25943 {
25944 rtx op1 = XEXP (src, 1);
25945
25946 return (CONST_INT_P (op1)
25947 && satisfies_constraint_eI (op1)
25948 && !satisfies_constraint_I (op1)
25949 && !satisfies_constraint_L (op1));
25950 }
25951
25952 /* If not, is it a load of a PC-relative address? */
25953 if (!TARGET_PCREL || GET_MODE (dest) != Pmode)
25954 return false;
25955
25956 if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST)
25957 return false;
25958
25959 enum insn_form iform = address_to_insn_form (src, Pmode,
25960 NON_PREFIXED_DEFAULT);
25961
25962 return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL);
25963 }
25964
25965 /* Whether the next instruction needs a 'p' prefix issued before the
25966 instruction is printed out. */
25967 static bool next_insn_prefixed_p;
25968
25969 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
25970 outputting the assembler code. On the PowerPC, we remember if the current
25971 insn is a prefixed insn where we need to emit a 'p' before the insn.
25972
25973 In addition, if the insn is part of a PC-relative reference to an external
25974 label optimization, this is recorded also. */
25975 void
25976 rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
25977 {
25978 next_insn_prefixed_p = (get_attr_prefixed (insn) != PREFIXED_NO);
25979 return;
25980 }
25981
25982 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
25983 We use it to emit a 'p' for prefixed insns that is set in
25984 FINAL_PRESCAN_INSN. */
25985 void
25986 rs6000_asm_output_opcode (FILE *stream)
25987 {
25988 if (next_insn_prefixed_p)
25989 fprintf (stream, "p");
25990
25991 return;
25992 }
25993
25994 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
25995 should be adjusted to reflect any required changes. This macro is used when
25996 there is some systematic length adjustment required that would be difficult
25997 to express in the length attribute.
25998
25999 In the PowerPC, we use this to adjust the length of an instruction if one or
26000 more prefixed instructions are generated, using the attribute
26001 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
26002 hardware requires that a prefied instruciton does not cross a 64-byte
26003 boundary. This means the compiler has to assume the length of the first
26004 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
26005 already set for the non-prefixed instruction, we just need to udpate for the
26006 difference. */
26007
26008 int
26009 rs6000_adjust_insn_length (rtx_insn *insn, int length)
26010 {
26011 if (TARGET_PREFIXED && NONJUMP_INSN_P (insn))
26012 {
26013 rtx pattern = PATTERN (insn);
26014 if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER
26015 && get_attr_prefixed (insn) == PREFIXED_YES)
26016 {
26017 int num_prefixed = get_attr_max_prefixed_insns (insn);
26018 length += 4 * (num_prefixed + 1);
26019 }
26020 }
26021
26022 return length;
26023 }
26024
26025 \f
26026 #ifdef HAVE_GAS_HIDDEN
26027 # define USE_HIDDEN_LINKONCE 1
26028 #else
26029 # define USE_HIDDEN_LINKONCE 0
26030 #endif
26031
26032 /* Fills in the label name that should be used for a 476 link stack thunk. */
26033
26034 void
26035 get_ppc476_thunk_name (char name[32])
26036 {
26037 gcc_assert (TARGET_LINK_STACK);
26038
26039 if (USE_HIDDEN_LINKONCE)
26040 sprintf (name, "__ppc476.get_thunk");
26041 else
26042 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
26043 }
26044
26045 /* This function emits the simple thunk routine that is used to preserve
26046 the link stack on the 476 cpu. */
26047
26048 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
26049 static void
26050 rs6000_code_end (void)
26051 {
26052 char name[32];
26053 tree decl;
26054
26055 if (!TARGET_LINK_STACK)
26056 return;
26057
26058 get_ppc476_thunk_name (name);
26059
26060 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
26061 build_function_type_list (void_type_node, NULL_TREE));
26062 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
26063 NULL_TREE, void_type_node);
26064 TREE_PUBLIC (decl) = 1;
26065 TREE_STATIC (decl) = 1;
26066
26067 #if RS6000_WEAK
26068 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
26069 {
26070 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
26071 targetm.asm_out.unique_section (decl, 0);
26072 switch_to_section (get_named_section (decl, NULL, 0));
26073 DECL_WEAK (decl) = 1;
26074 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
26075 targetm.asm_out.globalize_label (asm_out_file, name);
26076 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
26077 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
26078 }
26079 else
26080 #endif
26081 {
26082 switch_to_section (text_section);
26083 ASM_OUTPUT_LABEL (asm_out_file, name);
26084 }
26085
26086 DECL_INITIAL (decl) = make_node (BLOCK);
26087 current_function_decl = decl;
26088 allocate_struct_function (decl, false);
26089 init_function_start (decl);
26090 first_function_block_is_cold = false;
26091 /* Make sure unwind info is emitted for the thunk if needed. */
26092 final_start_function (emit_barrier (), asm_out_file, 1);
26093
26094 fputs ("\tblr\n", asm_out_file);
26095
26096 final_end_function ();
26097 init_insn_lengths ();
26098 free_after_compilation (cfun);
26099 set_cfun (NULL);
26100 current_function_decl = NULL;
26101 }
26102
26103 /* Add r30 to hard reg set if the prologue sets it up and it is not
26104 pic_offset_table_rtx. */
26105
26106 static void
26107 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
26108 {
26109 if (!TARGET_SINGLE_PIC_BASE
26110 && TARGET_TOC
26111 && TARGET_MINIMAL_TOC
26112 && !constant_pool_empty_p ())
26113 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
26114 if (cfun->machine->split_stack_argp_used)
26115 add_to_hard_reg_set (&set->set, Pmode, 12);
26116
26117 /* Make sure the hard reg set doesn't include r2, which was possibly added
26118 via PIC_OFFSET_TABLE_REGNUM. */
26119 if (TARGET_TOC)
26120 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
26121 }
26122
26123 \f
26124 /* Helper function for rs6000_split_logical to emit a logical instruction after
26125 spliting the operation to single GPR registers.
26126
26127 DEST is the destination register.
26128 OP1 and OP2 are the input source registers.
26129 CODE is the base operation (AND, IOR, XOR, NOT).
26130 MODE is the machine mode.
26131 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26132 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26133 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
26134
26135 static void
26136 rs6000_split_logical_inner (rtx dest,
26137 rtx op1,
26138 rtx op2,
26139 enum rtx_code code,
26140 machine_mode mode,
26141 bool complement_final_p,
26142 bool complement_op1_p,
26143 bool complement_op2_p)
26144 {
26145 rtx bool_rtx;
26146
26147 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
26148 if (op2 && CONST_INT_P (op2)
26149 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
26150 && !complement_final_p && !complement_op1_p && !complement_op2_p)
26151 {
26152 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
26153 HOST_WIDE_INT value = INTVAL (op2) & mask;
26154
26155 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
26156 if (code == AND)
26157 {
26158 if (value == 0)
26159 {
26160 emit_insn (gen_rtx_SET (dest, const0_rtx));
26161 return;
26162 }
26163
26164 else if (value == mask)
26165 {
26166 if (!rtx_equal_p (dest, op1))
26167 emit_insn (gen_rtx_SET (dest, op1));
26168 return;
26169 }
26170 }
26171
26172 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
26173 into separate ORI/ORIS or XORI/XORIS instrucitons. */
26174 else if (code == IOR || code == XOR)
26175 {
26176 if (value == 0)
26177 {
26178 if (!rtx_equal_p (dest, op1))
26179 emit_insn (gen_rtx_SET (dest, op1));
26180 return;
26181 }
26182 }
26183 }
26184
26185 if (code == AND && mode == SImode
26186 && !complement_final_p && !complement_op1_p && !complement_op2_p)
26187 {
26188 emit_insn (gen_andsi3 (dest, op1, op2));
26189 return;
26190 }
26191
26192 if (complement_op1_p)
26193 op1 = gen_rtx_NOT (mode, op1);
26194
26195 if (complement_op2_p)
26196 op2 = gen_rtx_NOT (mode, op2);
26197
26198 /* For canonical RTL, if only one arm is inverted it is the first. */
26199 if (!complement_op1_p && complement_op2_p)
26200 std::swap (op1, op2);
26201
26202 bool_rtx = ((code == NOT)
26203 ? gen_rtx_NOT (mode, op1)
26204 : gen_rtx_fmt_ee (code, mode, op1, op2));
26205
26206 if (complement_final_p)
26207 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
26208
26209 emit_insn (gen_rtx_SET (dest, bool_rtx));
26210 }
26211
26212 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
26213 operations are split immediately during RTL generation to allow for more
26214 optimizations of the AND/IOR/XOR.
26215
26216 OPERANDS is an array containing the destination and two input operands.
26217 CODE is the base operation (AND, IOR, XOR, NOT).
26218 MODE is the machine mode.
26219 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26220 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26221 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
26222 CLOBBER_REG is either NULL or a scratch register of type CC to allow
26223 formation of the AND instructions. */
26224
26225 static void
26226 rs6000_split_logical_di (rtx operands[3],
26227 enum rtx_code code,
26228 bool complement_final_p,
26229 bool complement_op1_p,
26230 bool complement_op2_p)
26231 {
26232 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
26233 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
26234 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
26235 enum hi_lo { hi = 0, lo = 1 };
26236 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
26237 size_t i;
26238
26239 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
26240 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
26241 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
26242 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
26243
26244 if (code == NOT)
26245 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
26246 else
26247 {
26248 if (!CONST_INT_P (operands[2]))
26249 {
26250 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
26251 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
26252 }
26253 else
26254 {
26255 HOST_WIDE_INT value = INTVAL (operands[2]);
26256 HOST_WIDE_INT value_hi_lo[2];
26257
26258 gcc_assert (!complement_final_p);
26259 gcc_assert (!complement_op1_p);
26260 gcc_assert (!complement_op2_p);
26261
26262 value_hi_lo[hi] = value >> 32;
26263 value_hi_lo[lo] = value & lower_32bits;
26264
26265 for (i = 0; i < 2; i++)
26266 {
26267 HOST_WIDE_INT sub_value = value_hi_lo[i];
26268
26269 if (sub_value & sign_bit)
26270 sub_value |= upper_32bits;
26271
26272 op2_hi_lo[i] = GEN_INT (sub_value);
26273
26274 /* If this is an AND instruction, check to see if we need to load
26275 the value in a register. */
26276 if (code == AND && sub_value != -1 && sub_value != 0
26277 && !and_operand (op2_hi_lo[i], SImode))
26278 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
26279 }
26280 }
26281 }
26282
26283 for (i = 0; i < 2; i++)
26284 {
26285 /* Split large IOR/XOR operations. */
26286 if ((code == IOR || code == XOR)
26287 && CONST_INT_P (op2_hi_lo[i])
26288 && !complement_final_p
26289 && !complement_op1_p
26290 && !complement_op2_p
26291 && !logical_const_operand (op2_hi_lo[i], SImode))
26292 {
26293 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
26294 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
26295 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
26296 rtx tmp = gen_reg_rtx (SImode);
26297
26298 /* Make sure the constant is sign extended. */
26299 if ((hi_16bits & sign_bit) != 0)
26300 hi_16bits |= upper_32bits;
26301
26302 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
26303 code, SImode, false, false, false);
26304
26305 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
26306 code, SImode, false, false, false);
26307 }
26308 else
26309 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
26310 code, SImode, complement_final_p,
26311 complement_op1_p, complement_op2_p);
26312 }
26313
26314 return;
26315 }
26316
26317 /* Split the insns that make up boolean operations operating on multiple GPR
26318 registers. The boolean MD patterns ensure that the inputs either are
26319 exactly the same as the output registers, or there is no overlap.
26320
26321 OPERANDS is an array containing the destination and two input operands.
26322 CODE is the base operation (AND, IOR, XOR, NOT).
26323 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26324 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26325 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
26326
26327 void
26328 rs6000_split_logical (rtx operands[3],
26329 enum rtx_code code,
26330 bool complement_final_p,
26331 bool complement_op1_p,
26332 bool complement_op2_p)
26333 {
26334 machine_mode mode = GET_MODE (operands[0]);
26335 machine_mode sub_mode;
26336 rtx op0, op1, op2;
26337 int sub_size, regno0, regno1, nregs, i;
26338
26339 /* If this is DImode, use the specialized version that can run before
26340 register allocation. */
26341 if (mode == DImode && !TARGET_POWERPC64)
26342 {
26343 rs6000_split_logical_di (operands, code, complement_final_p,
26344 complement_op1_p, complement_op2_p);
26345 return;
26346 }
26347
26348 op0 = operands[0];
26349 op1 = operands[1];
26350 op2 = (code == NOT) ? NULL_RTX : operands[2];
26351 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
26352 sub_size = GET_MODE_SIZE (sub_mode);
26353 regno0 = REGNO (op0);
26354 regno1 = REGNO (op1);
26355
26356 gcc_assert (reload_completed);
26357 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
26358 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
26359
26360 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
26361 gcc_assert (nregs > 1);
26362
26363 if (op2 && REG_P (op2))
26364 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
26365
26366 for (i = 0; i < nregs; i++)
26367 {
26368 int offset = i * sub_size;
26369 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
26370 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
26371 rtx sub_op2 = ((code == NOT)
26372 ? NULL_RTX
26373 : simplify_subreg (sub_mode, op2, mode, offset));
26374
26375 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
26376 complement_final_p, complement_op1_p,
26377 complement_op2_p);
26378 }
26379
26380 return;
26381 }
26382
26383 \f
26384 /* Return true if the peephole2 can combine a load involving a combination of
26385 an addis instruction and a load with an offset that can be fused together on
26386 a power8. */
26387
26388 bool
26389 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
26390 rtx addis_value, /* addis value. */
26391 rtx target, /* target register that is loaded. */
26392 rtx mem) /* bottom part of the memory addr. */
26393 {
26394 rtx addr;
26395 rtx base_reg;
26396
26397 /* Validate arguments. */
26398 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
26399 return false;
26400
26401 if (!base_reg_operand (target, GET_MODE (target)))
26402 return false;
26403
26404 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
26405 return false;
26406
26407 /* Allow sign/zero extension. */
26408 if (GET_CODE (mem) == ZERO_EXTEND
26409 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
26410 mem = XEXP (mem, 0);
26411
26412 if (!MEM_P (mem))
26413 return false;
26414
26415 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
26416 return false;
26417
26418 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
26419 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
26420 return false;
26421
26422 /* Validate that the register used to load the high value is either the
26423 register being loaded, or we can safely replace its use.
26424
26425 This function is only called from the peephole2 pass and we assume that
26426 there are 2 instructions in the peephole (addis and load), so we want to
26427 check if the target register was not used in the memory address and the
26428 register to hold the addis result is dead after the peephole. */
26429 if (REGNO (addis_reg) != REGNO (target))
26430 {
26431 if (reg_mentioned_p (target, mem))
26432 return false;
26433
26434 if (!peep2_reg_dead_p (2, addis_reg))
26435 return false;
26436
26437 /* If the target register being loaded is the stack pointer, we must
26438 avoid loading any other value into it, even temporarily. */
26439 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
26440 return false;
26441 }
26442
26443 base_reg = XEXP (addr, 0);
26444 return REGNO (addis_reg) == REGNO (base_reg);
26445 }
26446
26447 /* During the peephole2 pass, adjust and expand the insns for a load fusion
26448 sequence. We adjust the addis register to use the target register. If the
26449 load sign extends, we adjust the code to do the zero extending load, and an
26450 explicit sign extension later since the fusion only covers zero extending
26451 loads.
26452
26453 The operands are:
26454 operands[0] register set with addis (to be replaced with target)
26455 operands[1] value set via addis
26456 operands[2] target register being loaded
26457 operands[3] D-form memory reference using operands[0]. */
26458
26459 void
26460 expand_fusion_gpr_load (rtx *operands)
26461 {
26462 rtx addis_value = operands[1];
26463 rtx target = operands[2];
26464 rtx orig_mem = operands[3];
26465 rtx new_addr, new_mem, orig_addr, offset;
26466 enum rtx_code plus_or_lo_sum;
26467 machine_mode target_mode = GET_MODE (target);
26468 machine_mode extend_mode = target_mode;
26469 machine_mode ptr_mode = Pmode;
26470 enum rtx_code extend = UNKNOWN;
26471
26472 if (GET_CODE (orig_mem) == ZERO_EXTEND
26473 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
26474 {
26475 extend = GET_CODE (orig_mem);
26476 orig_mem = XEXP (orig_mem, 0);
26477 target_mode = GET_MODE (orig_mem);
26478 }
26479
26480 gcc_assert (MEM_P (orig_mem));
26481
26482 orig_addr = XEXP (orig_mem, 0);
26483 plus_or_lo_sum = GET_CODE (orig_addr);
26484 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
26485
26486 offset = XEXP (orig_addr, 1);
26487 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
26488 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
26489
26490 if (extend != UNKNOWN)
26491 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
26492
26493 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
26494 UNSPEC_FUSION_GPR);
26495 emit_insn (gen_rtx_SET (target, new_mem));
26496
26497 if (extend == SIGN_EXTEND)
26498 {
26499 int sub_off = ((BYTES_BIG_ENDIAN)
26500 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
26501 : 0);
26502 rtx sign_reg
26503 = simplify_subreg (target_mode, target, extend_mode, sub_off);
26504
26505 emit_insn (gen_rtx_SET (target,
26506 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
26507 }
26508
26509 return;
26510 }
26511
26512 /* Emit the addis instruction that will be part of a fused instruction
26513 sequence. */
26514
26515 void
26516 emit_fusion_addis (rtx target, rtx addis_value)
26517 {
26518 rtx fuse_ops[10];
26519 const char *addis_str = NULL;
26520
26521 /* Emit the addis instruction. */
26522 fuse_ops[0] = target;
26523 if (satisfies_constraint_L (addis_value))
26524 {
26525 fuse_ops[1] = addis_value;
26526 addis_str = "lis %0,%v1";
26527 }
26528
26529 else if (GET_CODE (addis_value) == PLUS)
26530 {
26531 rtx op0 = XEXP (addis_value, 0);
26532 rtx op1 = XEXP (addis_value, 1);
26533
26534 if (REG_P (op0) && CONST_INT_P (op1)
26535 && satisfies_constraint_L (op1))
26536 {
26537 fuse_ops[1] = op0;
26538 fuse_ops[2] = op1;
26539 addis_str = "addis %0,%1,%v2";
26540 }
26541 }
26542
26543 else if (GET_CODE (addis_value) == HIGH)
26544 {
26545 rtx value = XEXP (addis_value, 0);
26546 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
26547 {
26548 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
26549 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
26550 if (TARGET_ELF)
26551 addis_str = "addis %0,%2,%1@toc@ha";
26552
26553 else if (TARGET_XCOFF)
26554 addis_str = "addis %0,%1@u(%2)";
26555
26556 else
26557 gcc_unreachable ();
26558 }
26559
26560 else if (GET_CODE (value) == PLUS)
26561 {
26562 rtx op0 = XEXP (value, 0);
26563 rtx op1 = XEXP (value, 1);
26564
26565 if (GET_CODE (op0) == UNSPEC
26566 && XINT (op0, 1) == UNSPEC_TOCREL
26567 && CONST_INT_P (op1))
26568 {
26569 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
26570 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
26571 fuse_ops[3] = op1;
26572 if (TARGET_ELF)
26573 addis_str = "addis %0,%2,%1+%3@toc@ha";
26574
26575 else if (TARGET_XCOFF)
26576 addis_str = "addis %0,%1+%3@u(%2)";
26577
26578 else
26579 gcc_unreachable ();
26580 }
26581 }
26582
26583 else if (satisfies_constraint_L (value))
26584 {
26585 fuse_ops[1] = value;
26586 addis_str = "lis %0,%v1";
26587 }
26588
26589 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
26590 {
26591 fuse_ops[1] = value;
26592 addis_str = "lis %0,%1@ha";
26593 }
26594 }
26595
26596 if (!addis_str)
26597 fatal_insn ("Could not generate addis value for fusion", addis_value);
26598
26599 output_asm_insn (addis_str, fuse_ops);
26600 }
26601
26602 /* Emit a D-form load or store instruction that is the second instruction
26603 of a fusion sequence. */
26604
26605 static void
26606 emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str)
26607 {
26608 rtx fuse_ops[10];
26609 char insn_template[80];
26610
26611 fuse_ops[0] = load_reg;
26612 fuse_ops[1] = addis_reg;
26613
26614 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
26615 {
26616 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
26617 fuse_ops[2] = offset;
26618 output_asm_insn (insn_template, fuse_ops);
26619 }
26620
26621 else if (GET_CODE (offset) == UNSPEC
26622 && XINT (offset, 1) == UNSPEC_TOCREL)
26623 {
26624 if (TARGET_ELF)
26625 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
26626
26627 else if (TARGET_XCOFF)
26628 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
26629
26630 else
26631 gcc_unreachable ();
26632
26633 fuse_ops[2] = XVECEXP (offset, 0, 0);
26634 output_asm_insn (insn_template, fuse_ops);
26635 }
26636
26637 else if (GET_CODE (offset) == PLUS
26638 && GET_CODE (XEXP (offset, 0)) == UNSPEC
26639 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
26640 && CONST_INT_P (XEXP (offset, 1)))
26641 {
26642 rtx tocrel_unspec = XEXP (offset, 0);
26643 if (TARGET_ELF)
26644 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
26645
26646 else if (TARGET_XCOFF)
26647 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
26648
26649 else
26650 gcc_unreachable ();
26651
26652 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
26653 fuse_ops[3] = XEXP (offset, 1);
26654 output_asm_insn (insn_template, fuse_ops);
26655 }
26656
26657 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
26658 {
26659 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
26660
26661 fuse_ops[2] = offset;
26662 output_asm_insn (insn_template, fuse_ops);
26663 }
26664
26665 else
26666 fatal_insn ("Unable to generate load/store offset for fusion", offset);
26667
26668 return;
26669 }
26670
26671 /* Given an address, convert it into the addis and load offset parts. Addresses
26672 created during the peephole2 process look like:
26673 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
26674 (unspec [(...)] UNSPEC_TOCREL)) */
26675
26676 static void
26677 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
26678 {
26679 rtx hi, lo;
26680
26681 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
26682 {
26683 hi = XEXP (addr, 0);
26684 lo = XEXP (addr, 1);
26685 }
26686 else
26687 gcc_unreachable ();
26688
26689 *p_hi = hi;
26690 *p_lo = lo;
26691 }
26692
26693 /* Return a string to fuse an addis instruction with a gpr load to the same
26694 register that we loaded up the addis instruction. The address that is used
26695 is the logical address that was formed during peephole2:
26696 (lo_sum (high) (low-part))
26697
26698 The code is complicated, so we call output_asm_insn directly, and just
26699 return "". */
26700
26701 const char *
26702 emit_fusion_gpr_load (rtx target, rtx mem)
26703 {
26704 rtx addis_value;
26705 rtx addr;
26706 rtx load_offset;
26707 const char *load_str = NULL;
26708 machine_mode mode;
26709
26710 if (GET_CODE (mem) == ZERO_EXTEND)
26711 mem = XEXP (mem, 0);
26712
26713 gcc_assert (REG_P (target) && MEM_P (mem));
26714
26715 addr = XEXP (mem, 0);
26716 fusion_split_address (addr, &addis_value, &load_offset);
26717
26718 /* Now emit the load instruction to the same register. */
26719 mode = GET_MODE (mem);
26720 switch (mode)
26721 {
26722 case E_QImode:
26723 load_str = "lbz";
26724 break;
26725
26726 case E_HImode:
26727 load_str = "lhz";
26728 break;
26729
26730 case E_SImode:
26731 case E_SFmode:
26732 load_str = "lwz";
26733 break;
26734
26735 case E_DImode:
26736 case E_DFmode:
26737 gcc_assert (TARGET_POWERPC64);
26738 load_str = "ld";
26739 break;
26740
26741 default:
26742 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
26743 }
26744
26745 /* Emit the addis instruction. */
26746 emit_fusion_addis (target, addis_value);
26747
26748 /* Emit the D-form load instruction. */
26749 emit_fusion_load (target, target, load_offset, load_str);
26750
26751 return "";
26752 }
26753 \f
26754
26755 #ifdef RS6000_GLIBC_ATOMIC_FENV
26756 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
26757 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
26758 #endif
26759
26760 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
26761
26762 static void
26763 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
26764 {
26765 if (!TARGET_HARD_FLOAT)
26766 {
26767 #ifdef RS6000_GLIBC_ATOMIC_FENV
26768 if (atomic_hold_decl == NULL_TREE)
26769 {
26770 atomic_hold_decl
26771 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
26772 get_identifier ("__atomic_feholdexcept"),
26773 build_function_type_list (void_type_node,
26774 double_ptr_type_node,
26775 NULL_TREE));
26776 TREE_PUBLIC (atomic_hold_decl) = 1;
26777 DECL_EXTERNAL (atomic_hold_decl) = 1;
26778 }
26779
26780 if (atomic_clear_decl == NULL_TREE)
26781 {
26782 atomic_clear_decl
26783 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
26784 get_identifier ("__atomic_feclearexcept"),
26785 build_function_type_list (void_type_node,
26786 NULL_TREE));
26787 TREE_PUBLIC (atomic_clear_decl) = 1;
26788 DECL_EXTERNAL (atomic_clear_decl) = 1;
26789 }
26790
26791 tree const_double = build_qualified_type (double_type_node,
26792 TYPE_QUAL_CONST);
26793 tree const_double_ptr = build_pointer_type (const_double);
26794 if (atomic_update_decl == NULL_TREE)
26795 {
26796 atomic_update_decl
26797 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
26798 get_identifier ("__atomic_feupdateenv"),
26799 build_function_type_list (void_type_node,
26800 const_double_ptr,
26801 NULL_TREE));
26802 TREE_PUBLIC (atomic_update_decl) = 1;
26803 DECL_EXTERNAL (atomic_update_decl) = 1;
26804 }
26805
26806 tree fenv_var = create_tmp_var_raw (double_type_node);
26807 TREE_ADDRESSABLE (fenv_var) = 1;
26808 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node,
26809 build4 (TARGET_EXPR, double_type_node, fenv_var,
26810 void_node, NULL_TREE, NULL_TREE));
26811
26812 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
26813 *clear = build_call_expr (atomic_clear_decl, 0);
26814 *update = build_call_expr (atomic_update_decl, 1,
26815 fold_convert (const_double_ptr, fenv_addr));
26816 #endif
26817 return;
26818 }
26819
26820 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
26821 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
26822 tree call_mffs = build_call_expr (mffs, 0);
26823
26824 /* Generates the equivalent of feholdexcept (&fenv_var)
26825
26826 *fenv_var = __builtin_mffs ();
26827 double fenv_hold;
26828 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
26829 __builtin_mtfsf (0xff, fenv_hold); */
26830
26831 /* Mask to clear everything except for the rounding modes and non-IEEE
26832 arithmetic flag. */
26833 const unsigned HOST_WIDE_INT hold_exception_mask
26834 = HOST_WIDE_INT_C (0xffffffff00000007);
26835
26836 tree fenv_var = create_tmp_var_raw (double_type_node);
26837
26838 tree hold_mffs = build4 (TARGET_EXPR, double_type_node, fenv_var, call_mffs,
26839 NULL_TREE, NULL_TREE);
26840
26841 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
26842 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
26843 build_int_cst (uint64_type_node,
26844 hold_exception_mask));
26845
26846 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
26847 fenv_llu_and);
26848
26849 tree hold_mtfsf = build_call_expr (mtfsf, 2,
26850 build_int_cst (unsigned_type_node, 0xff),
26851 fenv_hold_mtfsf);
26852
26853 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
26854
26855 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
26856
26857 double fenv_clear = __builtin_mffs ();
26858 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
26859 __builtin_mtfsf (0xff, fenv_clear); */
26860
26861 /* Mask to clear everything except for the rounding modes and non-IEEE
26862 arithmetic flag. */
26863 const unsigned HOST_WIDE_INT clear_exception_mask
26864 = HOST_WIDE_INT_C (0xffffffff00000000);
26865
26866 tree fenv_clear = create_tmp_var_raw (double_type_node);
26867
26868 tree clear_mffs = build4 (TARGET_EXPR, double_type_node, fenv_clear,
26869 call_mffs, NULL_TREE, NULL_TREE);
26870
26871 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
26872 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
26873 fenv_clean_llu,
26874 build_int_cst (uint64_type_node,
26875 clear_exception_mask));
26876
26877 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
26878 fenv_clear_llu_and);
26879
26880 tree clear_mtfsf = build_call_expr (mtfsf, 2,
26881 build_int_cst (unsigned_type_node, 0xff),
26882 fenv_clear_mtfsf);
26883
26884 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
26885
26886 /* Generates the equivalent of feupdateenv (&fenv_var)
26887
26888 double old_fenv = __builtin_mffs ();
26889 double fenv_update;
26890 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
26891 (*(uint64_t*)fenv_var 0x1ff80fff);
26892 __builtin_mtfsf (0xff, fenv_update); */
26893
26894 const unsigned HOST_WIDE_INT update_exception_mask
26895 = HOST_WIDE_INT_C (0xffffffff1fffff00);
26896 const unsigned HOST_WIDE_INT new_exception_mask
26897 = HOST_WIDE_INT_C (0x1ff80fff);
26898
26899 tree old_fenv = create_tmp_var_raw (double_type_node);
26900 tree update_mffs = build4 (TARGET_EXPR, double_type_node, old_fenv,
26901 call_mffs, NULL_TREE, NULL_TREE);
26902
26903 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
26904 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
26905 build_int_cst (uint64_type_node,
26906 update_exception_mask));
26907
26908 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
26909 build_int_cst (uint64_type_node,
26910 new_exception_mask));
26911
26912 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
26913 old_llu_and, new_llu_and);
26914
26915 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
26916 new_llu_mask);
26917
26918 tree update_mtfsf = build_call_expr (mtfsf, 2,
26919 build_int_cst (unsigned_type_node, 0xff),
26920 fenv_update_mtfsf);
26921
26922 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
26923 }
26924
26925 void
26926 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
26927 {
26928 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26929
26930 rtx_tmp0 = gen_reg_rtx (V2DFmode);
26931 rtx_tmp1 = gen_reg_rtx (V2DFmode);
26932
26933 /* The destination of the vmrgew instruction layout is:
26934 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
26935 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
26936 vmrgew instruction will be correct. */
26937 if (BYTES_BIG_ENDIAN)
26938 {
26939 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
26940 GEN_INT (0)));
26941 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
26942 GEN_INT (3)));
26943 }
26944 else
26945 {
26946 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
26947 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
26948 }
26949
26950 rtx_tmp2 = gen_reg_rtx (V4SFmode);
26951 rtx_tmp3 = gen_reg_rtx (V4SFmode);
26952
26953 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2, rtx_tmp0));
26954 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3, rtx_tmp1));
26955
26956 if (BYTES_BIG_ENDIAN)
26957 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
26958 else
26959 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
26960 }
26961
26962 void
26963 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
26964 {
26965 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26966
26967 rtx_tmp0 = gen_reg_rtx (V2DImode);
26968 rtx_tmp1 = gen_reg_rtx (V2DImode);
26969
26970 /* The destination of the vmrgew instruction layout is:
26971 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
26972 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
26973 vmrgew instruction will be correct. */
26974 if (BYTES_BIG_ENDIAN)
26975 {
26976 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
26977 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
26978 }
26979 else
26980 {
26981 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
26982 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
26983 }
26984
26985 rtx_tmp2 = gen_reg_rtx (V4SFmode);
26986 rtx_tmp3 = gen_reg_rtx (V4SFmode);
26987
26988 if (signed_convert)
26989 {
26990 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
26991 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
26992 }
26993 else
26994 {
26995 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
26996 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
26997 }
26998
26999 if (BYTES_BIG_ENDIAN)
27000 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
27001 else
27002 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
27003 }
27004
27005 void
27006 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
27007 rtx src2)
27008 {
27009 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
27010
27011 rtx_tmp0 = gen_reg_rtx (V2DFmode);
27012 rtx_tmp1 = gen_reg_rtx (V2DFmode);
27013
27014 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
27015 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
27016
27017 rtx_tmp2 = gen_reg_rtx (V4SImode);
27018 rtx_tmp3 = gen_reg_rtx (V4SImode);
27019
27020 if (signed_convert)
27021 {
27022 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
27023 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
27024 }
27025 else
27026 {
27027 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
27028 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
27029 }
27030
27031 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
27032 }
27033
27034 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
27035
27036 static bool
27037 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
27038 optimization_type opt_type)
27039 {
27040 switch (op)
27041 {
27042 case rsqrt_optab:
27043 return (opt_type == OPTIMIZE_FOR_SPEED
27044 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
27045
27046 default:
27047 return true;
27048 }
27049 }
27050
27051 /* Implement TARGET_CONSTANT_ALIGNMENT. */
27052
27053 static HOST_WIDE_INT
27054 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
27055 {
27056 if (TREE_CODE (exp) == STRING_CST
27057 && (STRICT_ALIGNMENT || !optimize_size))
27058 return MAX (align, BITS_PER_WORD);
27059 return align;
27060 }
27061
27062 /* Implement TARGET_STARTING_FRAME_OFFSET. */
27063
27064 static HOST_WIDE_INT
27065 rs6000_starting_frame_offset (void)
27066 {
27067 if (FRAME_GROWS_DOWNWARD)
27068 return 0;
27069 return RS6000_STARTING_FRAME_OFFSET;
27070 }
27071 \f
27072
27073 /* Create an alias for a mangled name where we have changed the mangling (in
27074 GCC 8.1, we used U10__float128, and now we use u9__ieee128). This is called
27075 via the target hook TARGET_ASM_GLOBALIZE_DECL_NAME. */
27076
27077 #if TARGET_ELF && RS6000_WEAK
27078 static void
27079 rs6000_globalize_decl_name (FILE * stream, tree decl)
27080 {
27081 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
27082
27083 targetm.asm_out.globalize_label (stream, name);
27084
27085 if (rs6000_passes_ieee128 && name[0] == '_' && name[1] == 'Z')
27086 {
27087 tree save_asm_name = DECL_ASSEMBLER_NAME (decl);
27088 const char *old_name;
27089
27090 ieee128_mangling_gcc_8_1 = true;
27091 lang_hooks.set_decl_assembler_name (decl);
27092 old_name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
27093 SET_DECL_ASSEMBLER_NAME (decl, save_asm_name);
27094 ieee128_mangling_gcc_8_1 = false;
27095
27096 if (strcmp (name, old_name) != 0)
27097 {
27098 fprintf (stream, "\t.weak %s\n", old_name);
27099 fprintf (stream, "\t.set %s,%s\n", old_name, name);
27100 }
27101 }
27102 }
27103 #endif
27104
27105 \f
27106 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
27107 function names from <foo>l to <foo>f128 if the default long double type is
27108 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
27109 include file switches the names on systems that support long double as IEEE
27110 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
27111 In the future, glibc will export names like __ieee128_sinf128 and we can
27112 switch to using those instead of using sinf128, which pollutes the user's
27113 namespace.
27114
27115 This will switch the names for Fortran math functions as well (which doesn't
27116 use math.h). However, Fortran needs other changes to the compiler and
27117 library before you can switch the real*16 type at compile time.
27118
27119 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
27120 only do this if the default is that long double is IBM extended double, and
27121 the user asked for IEEE 128-bit. */
27122
27123 static tree
27124 rs6000_mangle_decl_assembler_name (tree decl, tree id)
27125 {
27126 if (!TARGET_IEEEQUAD_DEFAULT && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
27127 && TREE_CODE (decl) == FUNCTION_DECL
27128 && DECL_IS_UNDECLARED_BUILTIN (decl))
27129 {
27130 size_t len = IDENTIFIER_LENGTH (id);
27131 const char *name = IDENTIFIER_POINTER (id);
27132
27133 if (name[len - 1] == 'l')
27134 {
27135 bool uses_ieee128_p = false;
27136 tree type = TREE_TYPE (decl);
27137 machine_mode ret_mode = TYPE_MODE (type);
27138
27139 /* See if the function returns a IEEE 128-bit floating point type or
27140 complex type. */
27141 if (ret_mode == TFmode || ret_mode == TCmode)
27142 uses_ieee128_p = true;
27143 else
27144 {
27145 function_args_iterator args_iter;
27146 tree arg;
27147
27148 /* See if the function passes a IEEE 128-bit floating point type
27149 or complex type. */
27150 FOREACH_FUNCTION_ARGS (type, arg, args_iter)
27151 {
27152 machine_mode arg_mode = TYPE_MODE (arg);
27153 if (arg_mode == TFmode || arg_mode == TCmode)
27154 {
27155 uses_ieee128_p = true;
27156 break;
27157 }
27158 }
27159 }
27160
27161 /* If we passed or returned an IEEE 128-bit floating point type,
27162 change the name. */
27163 if (uses_ieee128_p)
27164 {
27165 char *name2 = (char *) alloca (len + 4);
27166 memcpy (name2, name, len - 1);
27167 strcpy (name2 + len - 1, "f128");
27168 id = get_identifier (name2);
27169 }
27170 }
27171 }
27172
27173 return id;
27174 }
27175
27176 /* Predict whether the given loop in gimple will be transformed in the RTL
27177 doloop_optimize pass. */
27178
27179 static bool
27180 rs6000_predict_doloop_p (struct loop *loop)
27181 {
27182 gcc_assert (loop);
27183
27184 /* On rs6000, targetm.can_use_doloop_p is actually
27185 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
27186 if (loop->inner != NULL)
27187 {
27188 if (dump_file && (dump_flags & TDF_DETAILS))
27189 fprintf (dump_file, "Predict doloop failure due to"
27190 " loop nesting.\n");
27191 return false;
27192 }
27193
27194 return true;
27195 }
27196
27197 /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */
27198
27199 static bool
27200 rs6000_cannot_substitute_mem_equiv_p (rtx mem)
27201 {
27202 gcc_assert (MEM_P (mem));
27203
27204 /* curr_insn_transform()'s handling of subregs cannot handle altivec AND:
27205 type addresses, so don't allow MEMs with those address types to be
27206 substituted as an equivalent expression. See PR93974 for details. */
27207 if (GET_CODE (XEXP (mem, 0)) == AND)
27208 return true;
27209
27210 return false;
27211 }
27212
27213 /* Implement TARGET_INVALID_CONVERSION. */
27214
27215 static const char *
27216 rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
27217 {
27218 /* Make sure we're working with the canonical types. */
27219 if (TYPE_CANONICAL (fromtype) != NULL_TREE)
27220 fromtype = TYPE_CANONICAL (fromtype);
27221 if (TYPE_CANONICAL (totype) != NULL_TREE)
27222 totype = TYPE_CANONICAL (totype);
27223
27224 machine_mode frommode = TYPE_MODE (fromtype);
27225 machine_mode tomode = TYPE_MODE (totype);
27226
27227 if (frommode != tomode)
27228 {
27229 /* Do not allow conversions to/from XOmode and OOmode types. */
27230 if (frommode == XOmode)
27231 return N_("invalid conversion from type %<__vector_quad%>");
27232 if (tomode == XOmode)
27233 return N_("invalid conversion to type %<__vector_quad%>");
27234 if (frommode == OOmode)
27235 return N_("invalid conversion from type %<__vector_pair%>");
27236 if (tomode == OOmode)
27237 return N_("invalid conversion to type %<__vector_pair%>");
27238 }
27239 else if (POINTER_TYPE_P (fromtype) && POINTER_TYPE_P (totype))
27240 {
27241 /* We really care about the modes of the base types. */
27242 frommode = TYPE_MODE (TREE_TYPE (fromtype));
27243 tomode = TYPE_MODE (TREE_TYPE (totype));
27244
27245 /* Do not allow conversions to/from XOmode and OOmode pointer
27246 types, except to/from void pointers. */
27247 if (frommode != tomode
27248 && frommode != VOIDmode
27249 && tomode != VOIDmode)
27250 {
27251 if (frommode == XOmode)
27252 return N_("invalid conversion from type %<* __vector_quad%>");
27253 if (tomode == XOmode)
27254 return N_("invalid conversion to type %<* __vector_quad%>");
27255 if (frommode == OOmode)
27256 return N_("invalid conversion from type %<* __vector_pair%>");
27257 if (tomode == OOmode)
27258 return N_("invalid conversion to type %<* __vector_pair%>");
27259 }
27260 }
27261
27262 /* Conversion allowed. */
27263 return NULL;
27264 }
27265
27266 long long
27267 rs6000_const_f32_to_i32 (rtx operand)
27268 {
27269 long long value;
27270 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (operand);
27271
27272 gcc_assert (GET_MODE (operand) == SFmode);
27273 REAL_VALUE_TO_TARGET_SINGLE (*rv, value);
27274 return value;
27275 }
27276
27277 void
27278 rs6000_emit_xxspltidp_v2df (rtx dst, long value)
27279 {
27280 if (((value & 0x7F800000) == 0) && ((value & 0x7FFFFF) != 0))
27281 inform (input_location,
27282 "the result for the xxspltidp instruction "
27283 "is undefined for subnormal input values");
27284 emit_insn( gen_xxspltidp_v2df_inst (dst, GEN_INT (value)));
27285 }
27286
27287 /* Implement TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC. */
27288
27289 static bool
27290 rs6000_gen_pic_addr_diff_vec (void)
27291 {
27292 return rs6000_relative_jumptables;
27293 }
27294
27295 void
27296 rs6000_output_addr_vec_elt (FILE *file, int value)
27297 {
27298 const char *directive = TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t";
27299 char buf[100];
27300
27301 fprintf (file, "%s", directive);
27302 ASM_GENERATE_INTERNAL_LABEL (buf, "L", value);
27303 assemble_name (file, buf);
27304 fprintf (file, "\n");
27305 }
27306
27307 struct gcc_target targetm = TARGET_INITIALIZER;
27308
27309 #include "gt-rs6000.h"