]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/rs6000/rs6000.c
7505a0e1e8e18b34110c82d2fdc4dca6f6796720
[thirdparty/gcc.git] / gcc / config / rs6000 / rs6000.c
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2020 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #define IN_TARGET_CODE 1
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "gimple.h"
31 #include "cfghooks.h"
32 #include "cfgloop.h"
33 #include "df.h"
34 #include "tm_p.h"
35 #include "stringpool.h"
36 #include "expmed.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "ira.h"
40 #include "recog.h"
41 #include "cgraph.h"
42 #include "diagnostic-core.h"
43 #include "insn-attr.h"
44 #include "flags.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "attribs.h"
48 #include "stor-layout.h"
49 #include "calls.h"
50 #include "print-tree.h"
51 #include "varasm.h"
52 #include "explow.h"
53 #include "expr.h"
54 #include "output.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "reload.h"
58 #include "sched-int.h"
59 #include "gimplify.h"
60 #include "gimple-fold.h"
61 #include "gimple-iterator.h"
62 #include "gimple-ssa.h"
63 #include "gimple-walk.h"
64 #include "intl.h"
65 #include "tm-constrs.h"
66 #include "tree-vectorizer.h"
67 #include "target-globals.h"
68 #include "builtins.h"
69 #include "tree-vector-builder.h"
70 #include "context.h"
71 #include "tree-pass.h"
72 #include "except.h"
73 #if TARGET_XCOFF
74 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
75 #endif
76 #include "case-cfn-macros.h"
77 #include "ppc-auxv.h"
78 #include "tree-ssa-propagate.h"
79 #include "tree-vrp.h"
80 #include "tree-ssanames.h"
81 #include "rs6000-internal.h"
82 #include "opts.h"
83
84 /* This file should be included last. */
85 #include "target-def.h"
86
87 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
88 systems will also set long double to be IEEE 128-bit. AIX and Darwin
89 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
90 those systems will not pick up this default. This needs to be after all
91 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
92 properly defined. */
93 #ifndef TARGET_IEEEQUAD_DEFAULT
94 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
95 #define TARGET_IEEEQUAD_DEFAULT 1
96 #else
97 #define TARGET_IEEEQUAD_DEFAULT 0
98 #endif
99 #endif
100
101 /* Support targetm.vectorize.builtin_mask_for_load. */
102 tree altivec_builtin_mask_for_load;
103
104 #ifdef USING_ELFOS_H
105 /* Counter for labels which are to be placed in .fixup. */
106 int fixuplabelno = 0;
107 #endif
108
109 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
110 int dot_symbols;
111
112 /* Specify the machine mode that pointers have. After generation of rtl, the
113 compiler makes no further distinction between pointers and any other objects
114 of this machine mode. */
115 scalar_int_mode rs6000_pmode;
116
117 #if TARGET_ELF
118 /* Note whether IEEE 128-bit floating point was passed or returned, either as
119 the __float128/_Float128 explicit type, or when long double is IEEE 128-bit
120 floating point. We changed the default C++ mangling for these types and we
121 may want to generate a weak alias of the old mangling (U10__float128) to the
122 new mangling (u9__ieee128). */
123 bool rs6000_passes_ieee128 = false;
124 #endif
125
126 /* Generate the manged name (i.e. U10__float128) used in GCC 8.1, and not the
127 name used in current releases (i.e. u9__ieee128). */
128 static bool ieee128_mangling_gcc_8_1;
129
130 /* Width in bits of a pointer. */
131 unsigned rs6000_pointer_size;
132
133 #ifdef HAVE_AS_GNU_ATTRIBUTE
134 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
135 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
136 # endif
137 /* Flag whether floating point values have been passed/returned.
138 Note that this doesn't say whether fprs are used, since the
139 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
140 should be set for soft-float values passed in gprs and ieee128
141 values passed in vsx registers. */
142 bool rs6000_passes_float = false;
143 bool rs6000_passes_long_double = false;
144 /* Flag whether vector values have been passed/returned. */
145 bool rs6000_passes_vector = false;
146 /* Flag whether small (<= 8 byte) structures have been returned. */
147 bool rs6000_returns_struct = false;
148 #endif
149
150 /* Value is TRUE if register/mode pair is acceptable. */
151 static bool rs6000_hard_regno_mode_ok_p
152 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
153
154 /* Maximum number of registers needed for a given register class and mode. */
155 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
156
157 /* How many registers are needed for a given register and mode. */
158 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
159
160 /* Map register number to register class. */
161 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
162
163 static int dbg_cost_ctrl;
164
165 /* Built in types. */
166 tree rs6000_builtin_types[RS6000_BTI_MAX];
167 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
168
169 /* Flag to say the TOC is initialized */
170 int toc_initialized, need_toc_init;
171 char toc_label_name[10];
172
173 /* Cached value of rs6000_variable_issue. This is cached in
174 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
175 static short cached_can_issue_more;
176
177 static GTY(()) section *read_only_data_section;
178 static GTY(()) section *private_data_section;
179 static GTY(()) section *tls_data_section;
180 static GTY(()) section *tls_private_data_section;
181 static GTY(()) section *read_only_private_data_section;
182 static GTY(()) section *sdata2_section;
183
184 section *toc_section = 0;
185
186 /* Describe the vector unit used for modes. */
187 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
188 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
189
190 /* Register classes for various constraints that are based on the target
191 switches. */
192 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
193
194 /* Describe the alignment of a vector. */
195 int rs6000_vector_align[NUM_MACHINE_MODES];
196
197 /* Map selected modes to types for builtins. */
198 tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
199
200 /* What modes to automatically generate reciprocal divide estimate (fre) and
201 reciprocal sqrt (frsqrte) for. */
202 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
203
204 /* Masks to determine which reciprocal esitmate instructions to generate
205 automatically. */
206 enum rs6000_recip_mask {
207 RECIP_SF_DIV = 0x001, /* Use divide estimate */
208 RECIP_DF_DIV = 0x002,
209 RECIP_V4SF_DIV = 0x004,
210 RECIP_V2DF_DIV = 0x008,
211
212 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
213 RECIP_DF_RSQRT = 0x020,
214 RECIP_V4SF_RSQRT = 0x040,
215 RECIP_V2DF_RSQRT = 0x080,
216
217 /* Various combination of flags for -mrecip=xxx. */
218 RECIP_NONE = 0,
219 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
220 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
221 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
222
223 RECIP_HIGH_PRECISION = RECIP_ALL,
224
225 /* On low precision machines like the power5, don't enable double precision
226 reciprocal square root estimate, since it isn't accurate enough. */
227 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
228 };
229
230 /* -mrecip options. */
231 static struct
232 {
233 const char *string; /* option name */
234 unsigned int mask; /* mask bits to set */
235 } recip_options[] = {
236 { "all", RECIP_ALL },
237 { "none", RECIP_NONE },
238 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
239 | RECIP_V2DF_DIV) },
240 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
241 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
242 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
243 | RECIP_V2DF_RSQRT) },
244 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
245 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
246 };
247
248 /* On PowerPC, we have a limited number of target clones that we care about
249 which means we can use an array to hold the options, rather than having more
250 elaborate data structures to identify each possible variation. Order the
251 clones from the default to the highest ISA. */
252 enum {
253 CLONE_DEFAULT = 0, /* default clone. */
254 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
255 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
256 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
257 CLONE_ISA_3_00, /* ISA 3.00 (power9). */
258 CLONE_MAX
259 };
260
261 /* Map compiler ISA bits into HWCAP names. */
262 struct clone_map {
263 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
264 const char *name; /* name to use in __builtin_cpu_supports. */
265 };
266
267 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
268 { 0, "" }, /* Default options. */
269 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
270 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
271 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
272 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.00 (power9). */
273 };
274
275
276 /* Newer LIBCs explicitly export this symbol to declare that they provide
277 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
278 reference to this symbol whenever we expand a CPU builtin, so that
279 we never link against an old LIBC. */
280 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
281
282 /* True if we have expanded a CPU builtin. */
283 bool cpu_builtin_p = false;
284
285 /* Pointer to function (in rs6000-c.c) that can define or undefine target
286 macros that have changed. Languages that don't support the preprocessor
287 don't link in rs6000-c.c, so we can't call it directly. */
288 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
289
290 /* Simplfy register classes into simpler classifications. We assume
291 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
292 check for standard register classes (gpr/floating/altivec/vsx) and
293 floating/vector classes (float/altivec/vsx). */
294
295 enum rs6000_reg_type {
296 NO_REG_TYPE,
297 PSEUDO_REG_TYPE,
298 GPR_REG_TYPE,
299 VSX_REG_TYPE,
300 ALTIVEC_REG_TYPE,
301 FPR_REG_TYPE,
302 SPR_REG_TYPE,
303 CR_REG_TYPE
304 };
305
306 /* Map register class to register type. */
307 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
308
309 /* First/last register type for the 'normal' register types (i.e. general
310 purpose, floating point, altivec, and VSX registers). */
311 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
312
313 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
314
315
316 /* Register classes we care about in secondary reload or go if legitimate
317 address. We only need to worry about GPR, FPR, and Altivec registers here,
318 along an ANY field that is the OR of the 3 register classes. */
319
320 enum rs6000_reload_reg_type {
321 RELOAD_REG_GPR, /* General purpose registers. */
322 RELOAD_REG_FPR, /* Traditional floating point regs. */
323 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
324 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
325 N_RELOAD_REG
326 };
327
328 /* For setting up register classes, loop through the 3 register classes mapping
329 into real registers, and skip the ANY class, which is just an OR of the
330 bits. */
331 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
332 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
333
334 /* Map reload register type to a register in the register class. */
335 struct reload_reg_map_type {
336 const char *name; /* Register class name. */
337 int reg; /* Register in the register class. */
338 };
339
340 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
341 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
342 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
343 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
344 { "Any", -1 }, /* RELOAD_REG_ANY. */
345 };
346
347 /* Mask bits for each register class, indexed per mode. Historically the
348 compiler has been more restrictive which types can do PRE_MODIFY instead of
349 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
350 typedef unsigned char addr_mask_type;
351
352 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
353 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
354 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
355 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
356 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
357 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
358 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
359 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
360
361 /* Register type masks based on the type, of valid addressing modes. */
362 struct rs6000_reg_addr {
363 enum insn_code reload_load; /* INSN to reload for loading. */
364 enum insn_code reload_store; /* INSN to reload for storing. */
365 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
366 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
367 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
368 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
369 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
370 };
371
372 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
373
374 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
375 static inline bool
376 mode_supports_pre_incdec_p (machine_mode mode)
377 {
378 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
379 != 0);
380 }
381
382 /* Helper function to say whether a mode supports PRE_MODIFY. */
383 static inline bool
384 mode_supports_pre_modify_p (machine_mode mode)
385 {
386 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
387 != 0);
388 }
389
390 /* Return true if we have D-form addressing in altivec registers. */
391 static inline bool
392 mode_supports_vmx_dform (machine_mode mode)
393 {
394 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
395 }
396
397 /* Return true if we have D-form addressing in VSX registers. This addressing
398 is more limited than normal d-form addressing in that the offset must be
399 aligned on a 16-byte boundary. */
400 static inline bool
401 mode_supports_dq_form (machine_mode mode)
402 {
403 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
404 != 0);
405 }
406
407 /* Given that there exists at least one variable that is set (produced)
408 by OUT_INSN and read (consumed) by IN_INSN, return true iff
409 IN_INSN represents one or more memory store operations and none of
410 the variables set by OUT_INSN is used by IN_INSN as the address of a
411 store operation. If either IN_INSN or OUT_INSN does not represent
412 a "single" RTL SET expression (as loosely defined by the
413 implementation of the single_set function) or a PARALLEL with only
414 SETs, CLOBBERs, and USEs inside, this function returns false.
415
416 This rs6000-specific version of store_data_bypass_p checks for
417 certain conditions that result in assertion failures (and internal
418 compiler errors) in the generic store_data_bypass_p function and
419 returns false rather than calling store_data_bypass_p if one of the
420 problematic conditions is detected. */
421
422 int
423 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
424 {
425 rtx out_set, in_set;
426 rtx out_pat, in_pat;
427 rtx out_exp, in_exp;
428 int i, j;
429
430 in_set = single_set (in_insn);
431 if (in_set)
432 {
433 if (MEM_P (SET_DEST (in_set)))
434 {
435 out_set = single_set (out_insn);
436 if (!out_set)
437 {
438 out_pat = PATTERN (out_insn);
439 if (GET_CODE (out_pat) == PARALLEL)
440 {
441 for (i = 0; i < XVECLEN (out_pat, 0); i++)
442 {
443 out_exp = XVECEXP (out_pat, 0, i);
444 if ((GET_CODE (out_exp) == CLOBBER)
445 || (GET_CODE (out_exp) == USE))
446 continue;
447 else if (GET_CODE (out_exp) != SET)
448 return false;
449 }
450 }
451 }
452 }
453 }
454 else
455 {
456 in_pat = PATTERN (in_insn);
457 if (GET_CODE (in_pat) != PARALLEL)
458 return false;
459
460 for (i = 0; i < XVECLEN (in_pat, 0); i++)
461 {
462 in_exp = XVECEXP (in_pat, 0, i);
463 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
464 continue;
465 else if (GET_CODE (in_exp) != SET)
466 return false;
467
468 if (MEM_P (SET_DEST (in_exp)))
469 {
470 out_set = single_set (out_insn);
471 if (!out_set)
472 {
473 out_pat = PATTERN (out_insn);
474 if (GET_CODE (out_pat) != PARALLEL)
475 return false;
476 for (j = 0; j < XVECLEN (out_pat, 0); j++)
477 {
478 out_exp = XVECEXP (out_pat, 0, j);
479 if ((GET_CODE (out_exp) == CLOBBER)
480 || (GET_CODE (out_exp) == USE))
481 continue;
482 else if (GET_CODE (out_exp) != SET)
483 return false;
484 }
485 }
486 }
487 }
488 }
489 return store_data_bypass_p (out_insn, in_insn);
490 }
491
492 \f
493 /* Processor costs (relative to an add) */
494
495 const struct processor_costs *rs6000_cost;
496
497 /* Instruction size costs on 32bit processors. */
498 static const
499 struct processor_costs size32_cost = {
500 COSTS_N_INSNS (1), /* mulsi */
501 COSTS_N_INSNS (1), /* mulsi_const */
502 COSTS_N_INSNS (1), /* mulsi_const9 */
503 COSTS_N_INSNS (1), /* muldi */
504 COSTS_N_INSNS (1), /* divsi */
505 COSTS_N_INSNS (1), /* divdi */
506 COSTS_N_INSNS (1), /* fp */
507 COSTS_N_INSNS (1), /* dmul */
508 COSTS_N_INSNS (1), /* sdiv */
509 COSTS_N_INSNS (1), /* ddiv */
510 32, /* cache line size */
511 0, /* l1 cache */
512 0, /* l2 cache */
513 0, /* streams */
514 0, /* SF->DF convert */
515 };
516
517 /* Instruction size costs on 64bit processors. */
518 static const
519 struct processor_costs size64_cost = {
520 COSTS_N_INSNS (1), /* mulsi */
521 COSTS_N_INSNS (1), /* mulsi_const */
522 COSTS_N_INSNS (1), /* mulsi_const9 */
523 COSTS_N_INSNS (1), /* muldi */
524 COSTS_N_INSNS (1), /* divsi */
525 COSTS_N_INSNS (1), /* divdi */
526 COSTS_N_INSNS (1), /* fp */
527 COSTS_N_INSNS (1), /* dmul */
528 COSTS_N_INSNS (1), /* sdiv */
529 COSTS_N_INSNS (1), /* ddiv */
530 128, /* cache line size */
531 0, /* l1 cache */
532 0, /* l2 cache */
533 0, /* streams */
534 0, /* SF->DF convert */
535 };
536
537 /* Instruction costs on RS64A processors. */
538 static const
539 struct processor_costs rs64a_cost = {
540 COSTS_N_INSNS (20), /* mulsi */
541 COSTS_N_INSNS (12), /* mulsi_const */
542 COSTS_N_INSNS (8), /* mulsi_const9 */
543 COSTS_N_INSNS (34), /* muldi */
544 COSTS_N_INSNS (65), /* divsi */
545 COSTS_N_INSNS (67), /* divdi */
546 COSTS_N_INSNS (4), /* fp */
547 COSTS_N_INSNS (4), /* dmul */
548 COSTS_N_INSNS (31), /* sdiv */
549 COSTS_N_INSNS (31), /* ddiv */
550 128, /* cache line size */
551 128, /* l1 cache */
552 2048, /* l2 cache */
553 1, /* streams */
554 0, /* SF->DF convert */
555 };
556
557 /* Instruction costs on MPCCORE processors. */
558 static const
559 struct processor_costs mpccore_cost = {
560 COSTS_N_INSNS (2), /* mulsi */
561 COSTS_N_INSNS (2), /* mulsi_const */
562 COSTS_N_INSNS (2), /* mulsi_const9 */
563 COSTS_N_INSNS (2), /* muldi */
564 COSTS_N_INSNS (6), /* divsi */
565 COSTS_N_INSNS (6), /* divdi */
566 COSTS_N_INSNS (4), /* fp */
567 COSTS_N_INSNS (5), /* dmul */
568 COSTS_N_INSNS (10), /* sdiv */
569 COSTS_N_INSNS (17), /* ddiv */
570 32, /* cache line size */
571 4, /* l1 cache */
572 16, /* l2 cache */
573 1, /* streams */
574 0, /* SF->DF convert */
575 };
576
577 /* Instruction costs on PPC403 processors. */
578 static const
579 struct processor_costs ppc403_cost = {
580 COSTS_N_INSNS (4), /* mulsi */
581 COSTS_N_INSNS (4), /* mulsi_const */
582 COSTS_N_INSNS (4), /* mulsi_const9 */
583 COSTS_N_INSNS (4), /* muldi */
584 COSTS_N_INSNS (33), /* divsi */
585 COSTS_N_INSNS (33), /* divdi */
586 COSTS_N_INSNS (11), /* fp */
587 COSTS_N_INSNS (11), /* dmul */
588 COSTS_N_INSNS (11), /* sdiv */
589 COSTS_N_INSNS (11), /* ddiv */
590 32, /* cache line size */
591 4, /* l1 cache */
592 16, /* l2 cache */
593 1, /* streams */
594 0, /* SF->DF convert */
595 };
596
597 /* Instruction costs on PPC405 processors. */
598 static const
599 struct processor_costs ppc405_cost = {
600 COSTS_N_INSNS (5), /* mulsi */
601 COSTS_N_INSNS (4), /* mulsi_const */
602 COSTS_N_INSNS (3), /* mulsi_const9 */
603 COSTS_N_INSNS (5), /* muldi */
604 COSTS_N_INSNS (35), /* divsi */
605 COSTS_N_INSNS (35), /* divdi */
606 COSTS_N_INSNS (11), /* fp */
607 COSTS_N_INSNS (11), /* dmul */
608 COSTS_N_INSNS (11), /* sdiv */
609 COSTS_N_INSNS (11), /* ddiv */
610 32, /* cache line size */
611 16, /* l1 cache */
612 128, /* l2 cache */
613 1, /* streams */
614 0, /* SF->DF convert */
615 };
616
617 /* Instruction costs on PPC440 processors. */
618 static const
619 struct processor_costs ppc440_cost = {
620 COSTS_N_INSNS (3), /* mulsi */
621 COSTS_N_INSNS (2), /* mulsi_const */
622 COSTS_N_INSNS (2), /* mulsi_const9 */
623 COSTS_N_INSNS (3), /* muldi */
624 COSTS_N_INSNS (34), /* divsi */
625 COSTS_N_INSNS (34), /* divdi */
626 COSTS_N_INSNS (5), /* fp */
627 COSTS_N_INSNS (5), /* dmul */
628 COSTS_N_INSNS (19), /* sdiv */
629 COSTS_N_INSNS (33), /* ddiv */
630 32, /* cache line size */
631 32, /* l1 cache */
632 256, /* l2 cache */
633 1, /* streams */
634 0, /* SF->DF convert */
635 };
636
637 /* Instruction costs on PPC476 processors. */
638 static const
639 struct processor_costs ppc476_cost = {
640 COSTS_N_INSNS (4), /* mulsi */
641 COSTS_N_INSNS (4), /* mulsi_const */
642 COSTS_N_INSNS (4), /* mulsi_const9 */
643 COSTS_N_INSNS (4), /* muldi */
644 COSTS_N_INSNS (11), /* divsi */
645 COSTS_N_INSNS (11), /* divdi */
646 COSTS_N_INSNS (6), /* fp */
647 COSTS_N_INSNS (6), /* dmul */
648 COSTS_N_INSNS (19), /* sdiv */
649 COSTS_N_INSNS (33), /* ddiv */
650 32, /* l1 cache line size */
651 32, /* l1 cache */
652 512, /* l2 cache */
653 1, /* streams */
654 0, /* SF->DF convert */
655 };
656
657 /* Instruction costs on PPC601 processors. */
658 static const
659 struct processor_costs ppc601_cost = {
660 COSTS_N_INSNS (5), /* mulsi */
661 COSTS_N_INSNS (5), /* mulsi_const */
662 COSTS_N_INSNS (5), /* mulsi_const9 */
663 COSTS_N_INSNS (5), /* muldi */
664 COSTS_N_INSNS (36), /* divsi */
665 COSTS_N_INSNS (36), /* divdi */
666 COSTS_N_INSNS (4), /* fp */
667 COSTS_N_INSNS (5), /* dmul */
668 COSTS_N_INSNS (17), /* sdiv */
669 COSTS_N_INSNS (31), /* ddiv */
670 32, /* cache line size */
671 32, /* l1 cache */
672 256, /* l2 cache */
673 1, /* streams */
674 0, /* SF->DF convert */
675 };
676
677 /* Instruction costs on PPC603 processors. */
678 static const
679 struct processor_costs ppc603_cost = {
680 COSTS_N_INSNS (5), /* mulsi */
681 COSTS_N_INSNS (3), /* mulsi_const */
682 COSTS_N_INSNS (2), /* mulsi_const9 */
683 COSTS_N_INSNS (5), /* muldi */
684 COSTS_N_INSNS (37), /* divsi */
685 COSTS_N_INSNS (37), /* divdi */
686 COSTS_N_INSNS (3), /* fp */
687 COSTS_N_INSNS (4), /* dmul */
688 COSTS_N_INSNS (18), /* sdiv */
689 COSTS_N_INSNS (33), /* ddiv */
690 32, /* cache line size */
691 8, /* l1 cache */
692 64, /* l2 cache */
693 1, /* streams */
694 0, /* SF->DF convert */
695 };
696
697 /* Instruction costs on PPC604 processors. */
698 static const
699 struct processor_costs ppc604_cost = {
700 COSTS_N_INSNS (4), /* mulsi */
701 COSTS_N_INSNS (4), /* mulsi_const */
702 COSTS_N_INSNS (4), /* mulsi_const9 */
703 COSTS_N_INSNS (4), /* muldi */
704 COSTS_N_INSNS (20), /* divsi */
705 COSTS_N_INSNS (20), /* divdi */
706 COSTS_N_INSNS (3), /* fp */
707 COSTS_N_INSNS (3), /* dmul */
708 COSTS_N_INSNS (18), /* sdiv */
709 COSTS_N_INSNS (32), /* ddiv */
710 32, /* cache line size */
711 16, /* l1 cache */
712 512, /* l2 cache */
713 1, /* streams */
714 0, /* SF->DF convert */
715 };
716
717 /* Instruction costs on PPC604e processors. */
718 static const
719 struct processor_costs ppc604e_cost = {
720 COSTS_N_INSNS (2), /* mulsi */
721 COSTS_N_INSNS (2), /* mulsi_const */
722 COSTS_N_INSNS (2), /* mulsi_const9 */
723 COSTS_N_INSNS (2), /* muldi */
724 COSTS_N_INSNS (20), /* divsi */
725 COSTS_N_INSNS (20), /* divdi */
726 COSTS_N_INSNS (3), /* fp */
727 COSTS_N_INSNS (3), /* dmul */
728 COSTS_N_INSNS (18), /* sdiv */
729 COSTS_N_INSNS (32), /* ddiv */
730 32, /* cache line size */
731 32, /* l1 cache */
732 1024, /* l2 cache */
733 1, /* streams */
734 0, /* SF->DF convert */
735 };
736
737 /* Instruction costs on PPC620 processors. */
738 static const
739 struct processor_costs ppc620_cost = {
740 COSTS_N_INSNS (5), /* mulsi */
741 COSTS_N_INSNS (4), /* mulsi_const */
742 COSTS_N_INSNS (3), /* mulsi_const9 */
743 COSTS_N_INSNS (7), /* muldi */
744 COSTS_N_INSNS (21), /* divsi */
745 COSTS_N_INSNS (37), /* divdi */
746 COSTS_N_INSNS (3), /* fp */
747 COSTS_N_INSNS (3), /* dmul */
748 COSTS_N_INSNS (18), /* sdiv */
749 COSTS_N_INSNS (32), /* ddiv */
750 128, /* cache line size */
751 32, /* l1 cache */
752 1024, /* l2 cache */
753 1, /* streams */
754 0, /* SF->DF convert */
755 };
756
757 /* Instruction costs on PPC630 processors. */
758 static const
759 struct processor_costs ppc630_cost = {
760 COSTS_N_INSNS (5), /* mulsi */
761 COSTS_N_INSNS (4), /* mulsi_const */
762 COSTS_N_INSNS (3), /* mulsi_const9 */
763 COSTS_N_INSNS (7), /* muldi */
764 COSTS_N_INSNS (21), /* divsi */
765 COSTS_N_INSNS (37), /* divdi */
766 COSTS_N_INSNS (3), /* fp */
767 COSTS_N_INSNS (3), /* dmul */
768 COSTS_N_INSNS (17), /* sdiv */
769 COSTS_N_INSNS (21), /* ddiv */
770 128, /* cache line size */
771 64, /* l1 cache */
772 1024, /* l2 cache */
773 1, /* streams */
774 0, /* SF->DF convert */
775 };
776
777 /* Instruction costs on Cell processor. */
778 /* COSTS_N_INSNS (1) ~ one add. */
779 static const
780 struct processor_costs ppccell_cost = {
781 COSTS_N_INSNS (9/2)+2, /* mulsi */
782 COSTS_N_INSNS (6/2), /* mulsi_const */
783 COSTS_N_INSNS (6/2), /* mulsi_const9 */
784 COSTS_N_INSNS (15/2)+2, /* muldi */
785 COSTS_N_INSNS (38/2), /* divsi */
786 COSTS_N_INSNS (70/2), /* divdi */
787 COSTS_N_INSNS (10/2), /* fp */
788 COSTS_N_INSNS (10/2), /* dmul */
789 COSTS_N_INSNS (74/2), /* sdiv */
790 COSTS_N_INSNS (74/2), /* ddiv */
791 128, /* cache line size */
792 32, /* l1 cache */
793 512, /* l2 cache */
794 6, /* streams */
795 0, /* SF->DF convert */
796 };
797
798 /* Instruction costs on PPC750 and PPC7400 processors. */
799 static const
800 struct processor_costs ppc750_cost = {
801 COSTS_N_INSNS (5), /* mulsi */
802 COSTS_N_INSNS (3), /* mulsi_const */
803 COSTS_N_INSNS (2), /* mulsi_const9 */
804 COSTS_N_INSNS (5), /* muldi */
805 COSTS_N_INSNS (17), /* divsi */
806 COSTS_N_INSNS (17), /* divdi */
807 COSTS_N_INSNS (3), /* fp */
808 COSTS_N_INSNS (3), /* dmul */
809 COSTS_N_INSNS (17), /* sdiv */
810 COSTS_N_INSNS (31), /* ddiv */
811 32, /* cache line size */
812 32, /* l1 cache */
813 512, /* l2 cache */
814 1, /* streams */
815 0, /* SF->DF convert */
816 };
817
818 /* Instruction costs on PPC7450 processors. */
819 static const
820 struct processor_costs ppc7450_cost = {
821 COSTS_N_INSNS (4), /* mulsi */
822 COSTS_N_INSNS (3), /* mulsi_const */
823 COSTS_N_INSNS (3), /* mulsi_const9 */
824 COSTS_N_INSNS (4), /* muldi */
825 COSTS_N_INSNS (23), /* divsi */
826 COSTS_N_INSNS (23), /* divdi */
827 COSTS_N_INSNS (5), /* fp */
828 COSTS_N_INSNS (5), /* dmul */
829 COSTS_N_INSNS (21), /* sdiv */
830 COSTS_N_INSNS (35), /* ddiv */
831 32, /* cache line size */
832 32, /* l1 cache */
833 1024, /* l2 cache */
834 1, /* streams */
835 0, /* SF->DF convert */
836 };
837
838 /* Instruction costs on PPC8540 processors. */
839 static const
840 struct processor_costs ppc8540_cost = {
841 COSTS_N_INSNS (4), /* mulsi */
842 COSTS_N_INSNS (4), /* mulsi_const */
843 COSTS_N_INSNS (4), /* mulsi_const9 */
844 COSTS_N_INSNS (4), /* muldi */
845 COSTS_N_INSNS (19), /* divsi */
846 COSTS_N_INSNS (19), /* divdi */
847 COSTS_N_INSNS (4), /* fp */
848 COSTS_N_INSNS (4), /* dmul */
849 COSTS_N_INSNS (29), /* sdiv */
850 COSTS_N_INSNS (29), /* ddiv */
851 32, /* cache line size */
852 32, /* l1 cache */
853 256, /* l2 cache */
854 1, /* prefetch streams /*/
855 0, /* SF->DF convert */
856 };
857
858 /* Instruction costs on E300C2 and E300C3 cores. */
859 static const
860 struct processor_costs ppce300c2c3_cost = {
861 COSTS_N_INSNS (4), /* mulsi */
862 COSTS_N_INSNS (4), /* mulsi_const */
863 COSTS_N_INSNS (4), /* mulsi_const9 */
864 COSTS_N_INSNS (4), /* muldi */
865 COSTS_N_INSNS (19), /* divsi */
866 COSTS_N_INSNS (19), /* divdi */
867 COSTS_N_INSNS (3), /* fp */
868 COSTS_N_INSNS (4), /* dmul */
869 COSTS_N_INSNS (18), /* sdiv */
870 COSTS_N_INSNS (33), /* ddiv */
871 32,
872 16, /* l1 cache */
873 16, /* l2 cache */
874 1, /* prefetch streams /*/
875 0, /* SF->DF convert */
876 };
877
878 /* Instruction costs on PPCE500MC processors. */
879 static const
880 struct processor_costs ppce500mc_cost = {
881 COSTS_N_INSNS (4), /* mulsi */
882 COSTS_N_INSNS (4), /* mulsi_const */
883 COSTS_N_INSNS (4), /* mulsi_const9 */
884 COSTS_N_INSNS (4), /* muldi */
885 COSTS_N_INSNS (14), /* divsi */
886 COSTS_N_INSNS (14), /* divdi */
887 COSTS_N_INSNS (8), /* fp */
888 COSTS_N_INSNS (10), /* dmul */
889 COSTS_N_INSNS (36), /* sdiv */
890 COSTS_N_INSNS (66), /* ddiv */
891 64, /* cache line size */
892 32, /* l1 cache */
893 128, /* l2 cache */
894 1, /* prefetch streams /*/
895 0, /* SF->DF convert */
896 };
897
898 /* Instruction costs on PPCE500MC64 processors. */
899 static const
900 struct processor_costs ppce500mc64_cost = {
901 COSTS_N_INSNS (4), /* mulsi */
902 COSTS_N_INSNS (4), /* mulsi_const */
903 COSTS_N_INSNS (4), /* mulsi_const9 */
904 COSTS_N_INSNS (4), /* muldi */
905 COSTS_N_INSNS (14), /* divsi */
906 COSTS_N_INSNS (14), /* divdi */
907 COSTS_N_INSNS (4), /* fp */
908 COSTS_N_INSNS (10), /* dmul */
909 COSTS_N_INSNS (36), /* sdiv */
910 COSTS_N_INSNS (66), /* ddiv */
911 64, /* cache line size */
912 32, /* l1 cache */
913 128, /* l2 cache */
914 1, /* prefetch streams /*/
915 0, /* SF->DF convert */
916 };
917
918 /* Instruction costs on PPCE5500 processors. */
919 static const
920 struct processor_costs ppce5500_cost = {
921 COSTS_N_INSNS (5), /* mulsi */
922 COSTS_N_INSNS (5), /* mulsi_const */
923 COSTS_N_INSNS (4), /* mulsi_const9 */
924 COSTS_N_INSNS (5), /* muldi */
925 COSTS_N_INSNS (14), /* divsi */
926 COSTS_N_INSNS (14), /* divdi */
927 COSTS_N_INSNS (7), /* fp */
928 COSTS_N_INSNS (10), /* dmul */
929 COSTS_N_INSNS (36), /* sdiv */
930 COSTS_N_INSNS (66), /* ddiv */
931 64, /* cache line size */
932 32, /* l1 cache */
933 128, /* l2 cache */
934 1, /* prefetch streams /*/
935 0, /* SF->DF convert */
936 };
937
938 /* Instruction costs on PPCE6500 processors. */
939 static const
940 struct processor_costs ppce6500_cost = {
941 COSTS_N_INSNS (5), /* mulsi */
942 COSTS_N_INSNS (5), /* mulsi_const */
943 COSTS_N_INSNS (4), /* mulsi_const9 */
944 COSTS_N_INSNS (5), /* muldi */
945 COSTS_N_INSNS (14), /* divsi */
946 COSTS_N_INSNS (14), /* divdi */
947 COSTS_N_INSNS (7), /* fp */
948 COSTS_N_INSNS (10), /* dmul */
949 COSTS_N_INSNS (36), /* sdiv */
950 COSTS_N_INSNS (66), /* ddiv */
951 64, /* cache line size */
952 32, /* l1 cache */
953 128, /* l2 cache */
954 1, /* prefetch streams /*/
955 0, /* SF->DF convert */
956 };
957
958 /* Instruction costs on AppliedMicro Titan processors. */
959 static const
960 struct processor_costs titan_cost = {
961 COSTS_N_INSNS (5), /* mulsi */
962 COSTS_N_INSNS (5), /* mulsi_const */
963 COSTS_N_INSNS (5), /* mulsi_const9 */
964 COSTS_N_INSNS (5), /* muldi */
965 COSTS_N_INSNS (18), /* divsi */
966 COSTS_N_INSNS (18), /* divdi */
967 COSTS_N_INSNS (10), /* fp */
968 COSTS_N_INSNS (10), /* dmul */
969 COSTS_N_INSNS (46), /* sdiv */
970 COSTS_N_INSNS (72), /* ddiv */
971 32, /* cache line size */
972 32, /* l1 cache */
973 512, /* l2 cache */
974 1, /* prefetch streams /*/
975 0, /* SF->DF convert */
976 };
977
978 /* Instruction costs on POWER4 and POWER5 processors. */
979 static const
980 struct processor_costs power4_cost = {
981 COSTS_N_INSNS (3), /* mulsi */
982 COSTS_N_INSNS (2), /* mulsi_const */
983 COSTS_N_INSNS (2), /* mulsi_const9 */
984 COSTS_N_INSNS (4), /* muldi */
985 COSTS_N_INSNS (18), /* divsi */
986 COSTS_N_INSNS (34), /* divdi */
987 COSTS_N_INSNS (3), /* fp */
988 COSTS_N_INSNS (3), /* dmul */
989 COSTS_N_INSNS (17), /* sdiv */
990 COSTS_N_INSNS (17), /* ddiv */
991 128, /* cache line size */
992 32, /* l1 cache */
993 1024, /* l2 cache */
994 8, /* prefetch streams /*/
995 0, /* SF->DF convert */
996 };
997
998 /* Instruction costs on POWER6 processors. */
999 static const
1000 struct processor_costs power6_cost = {
1001 COSTS_N_INSNS (8), /* mulsi */
1002 COSTS_N_INSNS (8), /* mulsi_const */
1003 COSTS_N_INSNS (8), /* mulsi_const9 */
1004 COSTS_N_INSNS (8), /* muldi */
1005 COSTS_N_INSNS (22), /* divsi */
1006 COSTS_N_INSNS (28), /* divdi */
1007 COSTS_N_INSNS (3), /* fp */
1008 COSTS_N_INSNS (3), /* dmul */
1009 COSTS_N_INSNS (13), /* sdiv */
1010 COSTS_N_INSNS (16), /* ddiv */
1011 128, /* cache line size */
1012 64, /* l1 cache */
1013 2048, /* l2 cache */
1014 16, /* prefetch streams */
1015 0, /* SF->DF convert */
1016 };
1017
1018 /* Instruction costs on POWER7 processors. */
1019 static const
1020 struct processor_costs power7_cost = {
1021 COSTS_N_INSNS (2), /* mulsi */
1022 COSTS_N_INSNS (2), /* mulsi_const */
1023 COSTS_N_INSNS (2), /* mulsi_const9 */
1024 COSTS_N_INSNS (2), /* muldi */
1025 COSTS_N_INSNS (18), /* divsi */
1026 COSTS_N_INSNS (34), /* divdi */
1027 COSTS_N_INSNS (3), /* fp */
1028 COSTS_N_INSNS (3), /* dmul */
1029 COSTS_N_INSNS (13), /* sdiv */
1030 COSTS_N_INSNS (16), /* ddiv */
1031 128, /* cache line size */
1032 32, /* l1 cache */
1033 256, /* l2 cache */
1034 12, /* prefetch streams */
1035 COSTS_N_INSNS (3), /* SF->DF convert */
1036 };
1037
1038 /* Instruction costs on POWER8 processors. */
1039 static const
1040 struct processor_costs power8_cost = {
1041 COSTS_N_INSNS (3), /* mulsi */
1042 COSTS_N_INSNS (3), /* mulsi_const */
1043 COSTS_N_INSNS (3), /* mulsi_const9 */
1044 COSTS_N_INSNS (3), /* muldi */
1045 COSTS_N_INSNS (19), /* divsi */
1046 COSTS_N_INSNS (35), /* divdi */
1047 COSTS_N_INSNS (3), /* fp */
1048 COSTS_N_INSNS (3), /* dmul */
1049 COSTS_N_INSNS (14), /* sdiv */
1050 COSTS_N_INSNS (17), /* ddiv */
1051 128, /* cache line size */
1052 32, /* l1 cache */
1053 256, /* l2 cache */
1054 12, /* prefetch streams */
1055 COSTS_N_INSNS (3), /* SF->DF convert */
1056 };
1057
1058 /* Instruction costs on POWER9 processors. */
1059 static const
1060 struct processor_costs power9_cost = {
1061 COSTS_N_INSNS (3), /* mulsi */
1062 COSTS_N_INSNS (3), /* mulsi_const */
1063 COSTS_N_INSNS (3), /* mulsi_const9 */
1064 COSTS_N_INSNS (3), /* muldi */
1065 COSTS_N_INSNS (8), /* divsi */
1066 COSTS_N_INSNS (12), /* divdi */
1067 COSTS_N_INSNS (3), /* fp */
1068 COSTS_N_INSNS (3), /* dmul */
1069 COSTS_N_INSNS (13), /* sdiv */
1070 COSTS_N_INSNS (18), /* ddiv */
1071 128, /* cache line size */
1072 32, /* l1 cache */
1073 512, /* l2 cache */
1074 8, /* prefetch streams */
1075 COSTS_N_INSNS (3), /* SF->DF convert */
1076 };
1077
1078 /* Instruction costs on POWER A2 processors. */
1079 static const
1080 struct processor_costs ppca2_cost = {
1081 COSTS_N_INSNS (16), /* mulsi */
1082 COSTS_N_INSNS (16), /* mulsi_const */
1083 COSTS_N_INSNS (16), /* mulsi_const9 */
1084 COSTS_N_INSNS (16), /* muldi */
1085 COSTS_N_INSNS (22), /* divsi */
1086 COSTS_N_INSNS (28), /* divdi */
1087 COSTS_N_INSNS (3), /* fp */
1088 COSTS_N_INSNS (3), /* dmul */
1089 COSTS_N_INSNS (59), /* sdiv */
1090 COSTS_N_INSNS (72), /* ddiv */
1091 64,
1092 16, /* l1 cache */
1093 2048, /* l2 cache */
1094 16, /* prefetch streams */
1095 0, /* SF->DF convert */
1096 };
1097
1098 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1099 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1100
1101 \f
1102 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1103 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1104 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1105 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1106 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1107 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1108 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1109 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1110 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1111 bool);
1112 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1113 unsigned int);
1114 static bool is_microcoded_insn (rtx_insn *);
1115 static bool is_nonpipeline_insn (rtx_insn *);
1116 static bool is_cracked_insn (rtx_insn *);
1117 static bool is_load_insn (rtx, rtx *);
1118 static bool is_store_insn (rtx, rtx *);
1119 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1120 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1121 static bool insn_must_be_first_in_group (rtx_insn *);
1122 static bool insn_must_be_last_in_group (rtx_insn *);
1123 int easy_vector_constant (rtx, machine_mode);
1124 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1125 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1126 #if TARGET_MACHO
1127 static tree get_prev_label (tree);
1128 #endif
1129 static bool rs6000_mode_dependent_address (const_rtx);
1130 static bool rs6000_debug_mode_dependent_address (const_rtx);
1131 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1132 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1133 machine_mode, rtx);
1134 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1135 machine_mode,
1136 rtx);
1137 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1138 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1139 enum reg_class);
1140 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1141 reg_class_t,
1142 reg_class_t);
1143 static bool rs6000_debug_can_change_mode_class (machine_mode,
1144 machine_mode,
1145 reg_class_t);
1146
1147 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1148 = rs6000_mode_dependent_address;
1149
1150 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1151 machine_mode, rtx)
1152 = rs6000_secondary_reload_class;
1153
1154 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1155 = rs6000_preferred_reload_class;
1156
1157 const int INSN_NOT_AVAILABLE = -1;
1158
1159 static void rs6000_print_isa_options (FILE *, int, const char *,
1160 HOST_WIDE_INT);
1161 static void rs6000_print_builtin_options (FILE *, int, const char *,
1162 HOST_WIDE_INT);
1163 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1164
1165 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1166 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1167 enum rs6000_reg_type,
1168 machine_mode,
1169 secondary_reload_info *,
1170 bool);
1171 static enum non_prefixed_form reg_to_non_prefixed (rtx reg, machine_mode mode);
1172 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1173
1174 /* Hash table stuff for keeping track of TOC entries. */
1175
1176 struct GTY((for_user)) toc_hash_struct
1177 {
1178 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1179 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1180 rtx key;
1181 machine_mode key_mode;
1182 int labelno;
1183 };
1184
1185 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1186 {
1187 static hashval_t hash (toc_hash_struct *);
1188 static bool equal (toc_hash_struct *, toc_hash_struct *);
1189 };
1190
1191 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1192
1193
1194 \f
1195 /* Default register names. */
1196 char rs6000_reg_names[][8] =
1197 {
1198 /* GPRs */
1199 "0", "1", "2", "3", "4", "5", "6", "7",
1200 "8", "9", "10", "11", "12", "13", "14", "15",
1201 "16", "17", "18", "19", "20", "21", "22", "23",
1202 "24", "25", "26", "27", "28", "29", "30", "31",
1203 /* FPRs */
1204 "0", "1", "2", "3", "4", "5", "6", "7",
1205 "8", "9", "10", "11", "12", "13", "14", "15",
1206 "16", "17", "18", "19", "20", "21", "22", "23",
1207 "24", "25", "26", "27", "28", "29", "30", "31",
1208 /* VRs */
1209 "0", "1", "2", "3", "4", "5", "6", "7",
1210 "8", "9", "10", "11", "12", "13", "14", "15",
1211 "16", "17", "18", "19", "20", "21", "22", "23",
1212 "24", "25", "26", "27", "28", "29", "30", "31",
1213 /* lr ctr ca ap */
1214 "lr", "ctr", "ca", "ap",
1215 /* cr0..cr7 */
1216 "0", "1", "2", "3", "4", "5", "6", "7",
1217 /* vrsave vscr sfp */
1218 "vrsave", "vscr", "sfp",
1219 };
1220
1221 #ifdef TARGET_REGNAMES
1222 static const char alt_reg_names[][8] =
1223 {
1224 /* GPRs */
1225 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1226 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1227 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1228 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1229 /* FPRs */
1230 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1231 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1232 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1233 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1234 /* VRs */
1235 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1236 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1237 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1238 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1239 /* lr ctr ca ap */
1240 "lr", "ctr", "ca", "ap",
1241 /* cr0..cr7 */
1242 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1243 /* vrsave vscr sfp */
1244 "vrsave", "vscr", "sfp",
1245 };
1246 #endif
1247
1248 /* Table of valid machine attributes. */
1249
1250 static const struct attribute_spec rs6000_attribute_table[] =
1251 {
1252 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1253 affects_type_identity, handler, exclude } */
1254 { "altivec", 1, 1, false, true, false, false,
1255 rs6000_handle_altivec_attribute, NULL },
1256 { "longcall", 0, 0, false, true, true, false,
1257 rs6000_handle_longcall_attribute, NULL },
1258 { "shortcall", 0, 0, false, true, true, false,
1259 rs6000_handle_longcall_attribute, NULL },
1260 { "ms_struct", 0, 0, false, false, false, false,
1261 rs6000_handle_struct_attribute, NULL },
1262 { "gcc_struct", 0, 0, false, false, false, false,
1263 rs6000_handle_struct_attribute, NULL },
1264 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1265 SUBTARGET_ATTRIBUTE_TABLE,
1266 #endif
1267 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1268 };
1269 \f
1270 #ifndef TARGET_PROFILE_KERNEL
1271 #define TARGET_PROFILE_KERNEL 0
1272 #endif
1273 \f
1274 /* Initialize the GCC target structure. */
1275 #undef TARGET_ATTRIBUTE_TABLE
1276 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1277 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1278 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1279 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1280 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1281
1282 #undef TARGET_ASM_ALIGNED_DI_OP
1283 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1284
1285 /* Default unaligned ops are only provided for ELF. Find the ops needed
1286 for non-ELF systems. */
1287 #ifndef OBJECT_FORMAT_ELF
1288 #if TARGET_XCOFF
1289 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1290 64-bit targets. */
1291 #undef TARGET_ASM_UNALIGNED_HI_OP
1292 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1293 #undef TARGET_ASM_UNALIGNED_SI_OP
1294 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1295 #undef TARGET_ASM_UNALIGNED_DI_OP
1296 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1297 #else
1298 /* For Darwin. */
1299 #undef TARGET_ASM_UNALIGNED_HI_OP
1300 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1301 #undef TARGET_ASM_UNALIGNED_SI_OP
1302 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1303 #undef TARGET_ASM_UNALIGNED_DI_OP
1304 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1305 #undef TARGET_ASM_ALIGNED_DI_OP
1306 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1307 #endif
1308 #endif
1309
1310 /* This hook deals with fixups for relocatable code and DI-mode objects
1311 in 64-bit code. */
1312 #undef TARGET_ASM_INTEGER
1313 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1314
1315 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1316 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1317 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1318 #endif
1319
1320 #undef TARGET_SET_UP_BY_PROLOGUE
1321 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1322
1323 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1324 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1325 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1326 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1327 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1328 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1329 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1330 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1331 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1332 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1333 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1334 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1335
1336 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1337 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1338
1339 #undef TARGET_INTERNAL_ARG_POINTER
1340 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1341
1342 #undef TARGET_HAVE_TLS
1343 #define TARGET_HAVE_TLS HAVE_AS_TLS
1344
1345 #undef TARGET_CANNOT_FORCE_CONST_MEM
1346 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1347
1348 #undef TARGET_DELEGITIMIZE_ADDRESS
1349 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1350
1351 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1352 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1353
1354 #undef TARGET_LEGITIMATE_COMBINED_INSN
1355 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1356
1357 #undef TARGET_ASM_FUNCTION_PROLOGUE
1358 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1359 #undef TARGET_ASM_FUNCTION_EPILOGUE
1360 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1361
1362 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1363 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1364
1365 #undef TARGET_LEGITIMIZE_ADDRESS
1366 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1367
1368 #undef TARGET_SCHED_VARIABLE_ISSUE
1369 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1370
1371 #undef TARGET_SCHED_ISSUE_RATE
1372 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1373 #undef TARGET_SCHED_ADJUST_COST
1374 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1375 #undef TARGET_SCHED_ADJUST_PRIORITY
1376 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1377 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1378 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1379 #undef TARGET_SCHED_INIT
1380 #define TARGET_SCHED_INIT rs6000_sched_init
1381 #undef TARGET_SCHED_FINISH
1382 #define TARGET_SCHED_FINISH rs6000_sched_finish
1383 #undef TARGET_SCHED_REORDER
1384 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1385 #undef TARGET_SCHED_REORDER2
1386 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1387
1388 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1389 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1390
1391 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1392 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1393
1394 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1395 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1396 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1397 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1398 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1399 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1400 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1401 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1402
1403 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1404 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1405
1406 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1407 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1408 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1409 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1410 rs6000_builtin_support_vector_misalignment
1411 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1412 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1413 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1414 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1415 rs6000_builtin_vectorization_cost
1416 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1417 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1418 rs6000_preferred_simd_mode
1419 #undef TARGET_VECTORIZE_INIT_COST
1420 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1421 #undef TARGET_VECTORIZE_ADD_STMT_COST
1422 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1423 #undef TARGET_VECTORIZE_FINISH_COST
1424 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1425 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1426 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1427
1428 #undef TARGET_LOOP_UNROLL_ADJUST
1429 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1430
1431 #undef TARGET_INIT_BUILTINS
1432 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1433 #undef TARGET_BUILTIN_DECL
1434 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1435
1436 #undef TARGET_FOLD_BUILTIN
1437 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1438 #undef TARGET_GIMPLE_FOLD_BUILTIN
1439 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1440
1441 #undef TARGET_EXPAND_BUILTIN
1442 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1443
1444 #undef TARGET_MANGLE_TYPE
1445 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1446
1447 #undef TARGET_INIT_LIBFUNCS
1448 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1449
1450 #if TARGET_MACHO
1451 #undef TARGET_BINDS_LOCAL_P
1452 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1453 #endif
1454
1455 #undef TARGET_MS_BITFIELD_LAYOUT_P
1456 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1457
1458 #undef TARGET_ASM_OUTPUT_MI_THUNK
1459 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1460
1461 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1462 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1463
1464 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1465 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1466
1467 #undef TARGET_REGISTER_MOVE_COST
1468 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1469 #undef TARGET_MEMORY_MOVE_COST
1470 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1471 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1472 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1473 rs6000_ira_change_pseudo_allocno_class
1474 #undef TARGET_CANNOT_COPY_INSN_P
1475 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1476 #undef TARGET_RTX_COSTS
1477 #define TARGET_RTX_COSTS rs6000_rtx_costs
1478 #undef TARGET_ADDRESS_COST
1479 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1480 #undef TARGET_INSN_COST
1481 #define TARGET_INSN_COST rs6000_insn_cost
1482
1483 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1484 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1485
1486 #undef TARGET_PROMOTE_FUNCTION_MODE
1487 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1488
1489 #undef TARGET_RETURN_IN_MEMORY
1490 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1491
1492 #undef TARGET_RETURN_IN_MSB
1493 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1494
1495 #undef TARGET_SETUP_INCOMING_VARARGS
1496 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1497
1498 /* Always strict argument naming on rs6000. */
1499 #undef TARGET_STRICT_ARGUMENT_NAMING
1500 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1501 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1502 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1503 #undef TARGET_SPLIT_COMPLEX_ARG
1504 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1505 #undef TARGET_MUST_PASS_IN_STACK
1506 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1507 #undef TARGET_PASS_BY_REFERENCE
1508 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1509 #undef TARGET_ARG_PARTIAL_BYTES
1510 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1511 #undef TARGET_FUNCTION_ARG_ADVANCE
1512 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1513 #undef TARGET_FUNCTION_ARG
1514 #define TARGET_FUNCTION_ARG rs6000_function_arg
1515 #undef TARGET_FUNCTION_ARG_PADDING
1516 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1517 #undef TARGET_FUNCTION_ARG_BOUNDARY
1518 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1519
1520 #undef TARGET_BUILD_BUILTIN_VA_LIST
1521 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1522
1523 #undef TARGET_EXPAND_BUILTIN_VA_START
1524 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1525
1526 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1527 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1528
1529 #undef TARGET_EH_RETURN_FILTER_MODE
1530 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1531
1532 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1533 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1534
1535 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1536 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1537
1538 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1539 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1540
1541 #undef TARGET_FLOATN_MODE
1542 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1543
1544 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1545 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1546
1547 #undef TARGET_MD_ASM_ADJUST
1548 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1549
1550 #undef TARGET_OPTION_OVERRIDE
1551 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1552
1553 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1554 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1555 rs6000_builtin_vectorized_function
1556
1557 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1558 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1559 rs6000_builtin_md_vectorized_function
1560
1561 #undef TARGET_STACK_PROTECT_GUARD
1562 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1563
1564 #if !TARGET_MACHO
1565 #undef TARGET_STACK_PROTECT_FAIL
1566 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1567 #endif
1568
1569 #ifdef HAVE_AS_TLS
1570 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1571 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1572 #endif
1573
1574 /* Use a 32-bit anchor range. This leads to sequences like:
1575
1576 addis tmp,anchor,high
1577 add dest,tmp,low
1578
1579 where tmp itself acts as an anchor, and can be shared between
1580 accesses to the same 64k page. */
1581 #undef TARGET_MIN_ANCHOR_OFFSET
1582 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1583 #undef TARGET_MAX_ANCHOR_OFFSET
1584 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1585 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1586 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1587 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1588 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1589
1590 #undef TARGET_BUILTIN_RECIPROCAL
1591 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1592
1593 #undef TARGET_SECONDARY_RELOAD
1594 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1595 #undef TARGET_SECONDARY_MEMORY_NEEDED
1596 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1597 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1598 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1599
1600 #undef TARGET_LEGITIMATE_ADDRESS_P
1601 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1602
1603 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1604 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1605
1606 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1607 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1608
1609 #undef TARGET_CAN_ELIMINATE
1610 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1611
1612 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1613 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1614
1615 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1616 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1617
1618 #undef TARGET_TRAMPOLINE_INIT
1619 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1620
1621 #undef TARGET_FUNCTION_VALUE
1622 #define TARGET_FUNCTION_VALUE rs6000_function_value
1623
1624 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1625 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1626
1627 #undef TARGET_OPTION_SAVE
1628 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1629
1630 #undef TARGET_OPTION_RESTORE
1631 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1632
1633 #undef TARGET_OPTION_PRINT
1634 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1635
1636 #undef TARGET_CAN_INLINE_P
1637 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1638
1639 #undef TARGET_SET_CURRENT_FUNCTION
1640 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1641
1642 #undef TARGET_LEGITIMATE_CONSTANT_P
1643 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1644
1645 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1646 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1647
1648 #undef TARGET_CAN_USE_DOLOOP_P
1649 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1650
1651 #undef TARGET_PREDICT_DOLOOP_P
1652 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1653
1654 #undef TARGET_HAVE_COUNT_REG_DECR_P
1655 #define TARGET_HAVE_COUNT_REG_DECR_P true
1656
1657 /* 1000000000 is infinite cost in IVOPTs. */
1658 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1659 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1660
1661 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1662 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1663
1664 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1665 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1666
1667 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1668 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1669 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1670 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1671 #undef TARGET_UNWIND_WORD_MODE
1672 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1673
1674 #undef TARGET_OFFLOAD_OPTIONS
1675 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1676
1677 #undef TARGET_C_MODE_FOR_SUFFIX
1678 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1679
1680 #undef TARGET_INVALID_BINARY_OP
1681 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1682
1683 #undef TARGET_OPTAB_SUPPORTED_P
1684 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1685
1686 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1687 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1688
1689 #undef TARGET_COMPARE_VERSION_PRIORITY
1690 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1691
1692 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1693 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1694 rs6000_generate_version_dispatcher_body
1695
1696 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1697 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1698 rs6000_get_function_versions_dispatcher
1699
1700 #undef TARGET_OPTION_FUNCTION_VERSIONS
1701 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1702
1703 #undef TARGET_HARD_REGNO_NREGS
1704 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1705 #undef TARGET_HARD_REGNO_MODE_OK
1706 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1707
1708 #undef TARGET_MODES_TIEABLE_P
1709 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1710
1711 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1712 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1713 rs6000_hard_regno_call_part_clobbered
1714
1715 #undef TARGET_SLOW_UNALIGNED_ACCESS
1716 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1717
1718 #undef TARGET_CAN_CHANGE_MODE_CLASS
1719 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1720
1721 #undef TARGET_CONSTANT_ALIGNMENT
1722 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1723
1724 #undef TARGET_STARTING_FRAME_OFFSET
1725 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1726
1727 #if TARGET_ELF && RS6000_WEAK
1728 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
1729 #define TARGET_ASM_GLOBALIZE_DECL_NAME rs6000_globalize_decl_name
1730 #endif
1731
1732 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1733 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1734
1735 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1736 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1737 \f
1738
1739 /* Processor table. */
1740 struct rs6000_ptt
1741 {
1742 const char *const name; /* Canonical processor name. */
1743 const enum processor_type processor; /* Processor type enum value. */
1744 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1745 };
1746
1747 static struct rs6000_ptt const processor_target_table[] =
1748 {
1749 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1750 #include "rs6000-cpus.def"
1751 #undef RS6000_CPU
1752 };
1753
1754 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1755 name is invalid. */
1756
1757 static int
1758 rs6000_cpu_name_lookup (const char *name)
1759 {
1760 size_t i;
1761
1762 if (name != NULL)
1763 {
1764 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1765 if (! strcmp (name, processor_target_table[i].name))
1766 return (int)i;
1767 }
1768
1769 return -1;
1770 }
1771
1772 \f
1773 /* Return number of consecutive hard regs needed starting at reg REGNO
1774 to hold something of mode MODE.
1775 This is ordinarily the length in words of a value of mode MODE
1776 but can be less for certain modes in special long registers.
1777
1778 POWER and PowerPC GPRs hold 32 bits worth;
1779 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1780
1781 static int
1782 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1783 {
1784 unsigned HOST_WIDE_INT reg_size;
1785
1786 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1787 128-bit floating point that can go in vector registers, which has VSX
1788 memory addressing. */
1789 if (FP_REGNO_P (regno))
1790 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
1791 ? UNITS_PER_VSX_WORD
1792 : UNITS_PER_FP_WORD);
1793
1794 else if (ALTIVEC_REGNO_P (regno))
1795 reg_size = UNITS_PER_ALTIVEC_WORD;
1796
1797 else
1798 reg_size = UNITS_PER_WORD;
1799
1800 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1801 }
1802
1803 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1804 MODE. */
1805 static int
1806 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
1807 {
1808 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1809
1810 if (COMPLEX_MODE_P (mode))
1811 mode = GET_MODE_INNER (mode);
1812
1813 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1814 register combinations, and use PTImode where we need to deal with quad
1815 word memory operations. Don't allow quad words in the argument or frame
1816 pointer registers, just registers 0..31. */
1817 if (mode == PTImode)
1818 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1819 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1820 && ((regno & 1) == 0));
1821
1822 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1823 implementations. Don't allow an item to be split between a FP register
1824 and an Altivec register. Allow TImode in all VSX registers if the user
1825 asked for it. */
1826 if (TARGET_VSX && VSX_REGNO_P (regno)
1827 && (VECTOR_MEM_VSX_P (mode)
1828 || FLOAT128_VECTOR_P (mode)
1829 || reg_addr[mode].scalar_in_vmx_p
1830 || mode == TImode
1831 || (TARGET_VADDUQM && mode == V1TImode)))
1832 {
1833 if (FP_REGNO_P (regno))
1834 return FP_REGNO_P (last_regno);
1835
1836 if (ALTIVEC_REGNO_P (regno))
1837 {
1838 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
1839 return 0;
1840
1841 return ALTIVEC_REGNO_P (last_regno);
1842 }
1843 }
1844
1845 /* The GPRs can hold any mode, but values bigger than one register
1846 cannot go past R31. */
1847 if (INT_REGNO_P (regno))
1848 return INT_REGNO_P (last_regno);
1849
1850 /* The float registers (except for VSX vector modes) can only hold floating
1851 modes and DImode. */
1852 if (FP_REGNO_P (regno))
1853 {
1854 if (FLOAT128_VECTOR_P (mode))
1855 return false;
1856
1857 if (SCALAR_FLOAT_MODE_P (mode)
1858 && (mode != TDmode || (regno % 2) == 0)
1859 && FP_REGNO_P (last_regno))
1860 return 1;
1861
1862 if (GET_MODE_CLASS (mode) == MODE_INT)
1863 {
1864 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1865 return 1;
1866
1867 if (TARGET_P8_VECTOR && (mode == SImode))
1868 return 1;
1869
1870 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
1871 return 1;
1872 }
1873
1874 return 0;
1875 }
1876
1877 /* The CR register can only hold CC modes. */
1878 if (CR_REGNO_P (regno))
1879 return GET_MODE_CLASS (mode) == MODE_CC;
1880
1881 if (CA_REGNO_P (regno))
1882 return mode == Pmode || mode == SImode;
1883
1884 /* AltiVec only in AldyVec registers. */
1885 if (ALTIVEC_REGNO_P (regno))
1886 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1887 || mode == V1TImode);
1888
1889 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1890 and it must be able to fit within the register set. */
1891
1892 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1893 }
1894
1895 /* Implement TARGET_HARD_REGNO_NREGS. */
1896
1897 static unsigned int
1898 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
1899 {
1900 return rs6000_hard_regno_nregs[mode][regno];
1901 }
1902
1903 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1904
1905 static bool
1906 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
1907 {
1908 return rs6000_hard_regno_mode_ok_p[mode][regno];
1909 }
1910
1911 /* Implement TARGET_MODES_TIEABLE_P.
1912
1913 PTImode cannot tie with other modes because PTImode is restricted to even
1914 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1915 57744).
1916
1917 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1918 128-bit floating point on VSX systems ties with other vectors. */
1919
1920 static bool
1921 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
1922 {
1923 if (mode1 == PTImode)
1924 return mode2 == PTImode;
1925 if (mode2 == PTImode)
1926 return false;
1927
1928 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
1929 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
1930 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
1931 return false;
1932
1933 if (SCALAR_FLOAT_MODE_P (mode1))
1934 return SCALAR_FLOAT_MODE_P (mode2);
1935 if (SCALAR_FLOAT_MODE_P (mode2))
1936 return false;
1937
1938 if (GET_MODE_CLASS (mode1) == MODE_CC)
1939 return GET_MODE_CLASS (mode2) == MODE_CC;
1940 if (GET_MODE_CLASS (mode2) == MODE_CC)
1941 return false;
1942
1943 return true;
1944 }
1945
1946 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
1947
1948 static bool
1949 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
1950 machine_mode mode)
1951 {
1952 if (TARGET_32BIT
1953 && TARGET_POWERPC64
1954 && GET_MODE_SIZE (mode) > 4
1955 && INT_REGNO_P (regno))
1956 return true;
1957
1958 if (TARGET_VSX
1959 && FP_REGNO_P (regno)
1960 && GET_MODE_SIZE (mode) > 8
1961 && !FLOAT128_2REG_P (mode))
1962 return true;
1963
1964 return false;
1965 }
1966
1967 /* Print interesting facts about registers. */
1968 static void
1969 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
1970 {
1971 int r, m;
1972
1973 for (r = first_regno; r <= last_regno; ++r)
1974 {
1975 const char *comma = "";
1976 int len;
1977
1978 if (first_regno == last_regno)
1979 fprintf (stderr, "%s:\t", reg_name);
1980 else
1981 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
1982
1983 len = 8;
1984 for (m = 0; m < NUM_MACHINE_MODES; ++m)
1985 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
1986 {
1987 if (len > 70)
1988 {
1989 fprintf (stderr, ",\n\t");
1990 len = 8;
1991 comma = "";
1992 }
1993
1994 if (rs6000_hard_regno_nregs[m][r] > 1)
1995 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
1996 rs6000_hard_regno_nregs[m][r]);
1997 else
1998 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
1999
2000 comma = ", ";
2001 }
2002
2003 if (call_used_or_fixed_reg_p (r))
2004 {
2005 if (len > 70)
2006 {
2007 fprintf (stderr, ",\n\t");
2008 len = 8;
2009 comma = "";
2010 }
2011
2012 len += fprintf (stderr, "%s%s", comma, "call-used");
2013 comma = ", ";
2014 }
2015
2016 if (fixed_regs[r])
2017 {
2018 if (len > 70)
2019 {
2020 fprintf (stderr, ",\n\t");
2021 len = 8;
2022 comma = "";
2023 }
2024
2025 len += fprintf (stderr, "%s%s", comma, "fixed");
2026 comma = ", ";
2027 }
2028
2029 if (len > 70)
2030 {
2031 fprintf (stderr, ",\n\t");
2032 comma = "";
2033 }
2034
2035 len += fprintf (stderr, "%sreg-class = %s", comma,
2036 reg_class_names[(int)rs6000_regno_regclass[r]]);
2037 comma = ", ";
2038
2039 if (len > 70)
2040 {
2041 fprintf (stderr, ",\n\t");
2042 comma = "";
2043 }
2044
2045 fprintf (stderr, "%sregno = %d\n", comma, r);
2046 }
2047 }
2048
2049 static const char *
2050 rs6000_debug_vector_unit (enum rs6000_vector v)
2051 {
2052 const char *ret;
2053
2054 switch (v)
2055 {
2056 case VECTOR_NONE: ret = "none"; break;
2057 case VECTOR_ALTIVEC: ret = "altivec"; break;
2058 case VECTOR_VSX: ret = "vsx"; break;
2059 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2060 default: ret = "unknown"; break;
2061 }
2062
2063 return ret;
2064 }
2065
2066 /* Inner function printing just the address mask for a particular reload
2067 register class. */
2068 DEBUG_FUNCTION char *
2069 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2070 {
2071 static char ret[8];
2072 char *p = ret;
2073
2074 if ((mask & RELOAD_REG_VALID) != 0)
2075 *p++ = 'v';
2076 else if (keep_spaces)
2077 *p++ = ' ';
2078
2079 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2080 *p++ = 'm';
2081 else if (keep_spaces)
2082 *p++ = ' ';
2083
2084 if ((mask & RELOAD_REG_INDEXED) != 0)
2085 *p++ = 'i';
2086 else if (keep_spaces)
2087 *p++ = ' ';
2088
2089 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2090 *p++ = 'O';
2091 else if ((mask & RELOAD_REG_OFFSET) != 0)
2092 *p++ = 'o';
2093 else if (keep_spaces)
2094 *p++ = ' ';
2095
2096 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2097 *p++ = '+';
2098 else if (keep_spaces)
2099 *p++ = ' ';
2100
2101 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2102 *p++ = '+';
2103 else if (keep_spaces)
2104 *p++ = ' ';
2105
2106 if ((mask & RELOAD_REG_AND_M16) != 0)
2107 *p++ = '&';
2108 else if (keep_spaces)
2109 *p++ = ' ';
2110
2111 *p = '\0';
2112
2113 return ret;
2114 }
2115
2116 /* Print the address masks in a human readble fashion. */
2117 DEBUG_FUNCTION void
2118 rs6000_debug_print_mode (ssize_t m)
2119 {
2120 ssize_t rc;
2121 int spaces = 0;
2122
2123 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2124 for (rc = 0; rc < N_RELOAD_REG; rc++)
2125 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2126 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2127
2128 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2129 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2130 {
2131 fprintf (stderr, "%*s Reload=%c%c", spaces, "",
2132 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2133 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2134 spaces = 0;
2135 }
2136 else
2137 spaces += strlen (" Reload=sl");
2138
2139 if (reg_addr[m].scalar_in_vmx_p)
2140 {
2141 fprintf (stderr, "%*s Upper=y", spaces, "");
2142 spaces = 0;
2143 }
2144 else
2145 spaces += strlen (" Upper=y");
2146
2147 if (rs6000_vector_unit[m] != VECTOR_NONE
2148 || rs6000_vector_mem[m] != VECTOR_NONE)
2149 {
2150 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2151 spaces, "",
2152 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2153 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2154 }
2155
2156 fputs ("\n", stderr);
2157 }
2158
2159 #define DEBUG_FMT_ID "%-32s= "
2160 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2161 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2162 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2163
2164 /* Print various interesting information with -mdebug=reg. */
2165 static void
2166 rs6000_debug_reg_global (void)
2167 {
2168 static const char *const tf[2] = { "false", "true" };
2169 const char *nl = (const char *)0;
2170 int m;
2171 size_t m1, m2, v;
2172 char costly_num[20];
2173 char nop_num[20];
2174 char flags_buffer[40];
2175 const char *costly_str;
2176 const char *nop_str;
2177 const char *trace_str;
2178 const char *abi_str;
2179 const char *cmodel_str;
2180 struct cl_target_option cl_opts;
2181
2182 /* Modes we want tieable information on. */
2183 static const machine_mode print_tieable_modes[] = {
2184 QImode,
2185 HImode,
2186 SImode,
2187 DImode,
2188 TImode,
2189 PTImode,
2190 SFmode,
2191 DFmode,
2192 TFmode,
2193 IFmode,
2194 KFmode,
2195 SDmode,
2196 DDmode,
2197 TDmode,
2198 V16QImode,
2199 V8HImode,
2200 V4SImode,
2201 V2DImode,
2202 V1TImode,
2203 V32QImode,
2204 V16HImode,
2205 V8SImode,
2206 V4DImode,
2207 V2TImode,
2208 V4SFmode,
2209 V2DFmode,
2210 V8SFmode,
2211 V4DFmode,
2212 CCmode,
2213 CCUNSmode,
2214 CCEQmode,
2215 };
2216
2217 /* Virtual regs we are interested in. */
2218 const static struct {
2219 int regno; /* register number. */
2220 const char *name; /* register name. */
2221 } virtual_regs[] = {
2222 { STACK_POINTER_REGNUM, "stack pointer:" },
2223 { TOC_REGNUM, "toc: " },
2224 { STATIC_CHAIN_REGNUM, "static chain: " },
2225 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2226 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2227 { ARG_POINTER_REGNUM, "arg pointer: " },
2228 { FRAME_POINTER_REGNUM, "frame pointer:" },
2229 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2230 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2231 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2232 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2233 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2234 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2235 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2236 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2237 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2238 };
2239
2240 fputs ("\nHard register information:\n", stderr);
2241 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2242 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2243 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2244 LAST_ALTIVEC_REGNO,
2245 "vs");
2246 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2247 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2248 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2249 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2250 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2251 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2252
2253 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2254 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2255 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2256
2257 fprintf (stderr,
2258 "\n"
2259 "d reg_class = %s\n"
2260 "f reg_class = %s\n"
2261 "v reg_class = %s\n"
2262 "wa reg_class = %s\n"
2263 "we reg_class = %s\n"
2264 "wr reg_class = %s\n"
2265 "wx reg_class = %s\n"
2266 "wA reg_class = %s\n"
2267 "\n",
2268 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2269 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2270 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2271 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2272 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2273 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2274 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2275 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
2276
2277 nl = "\n";
2278 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2279 rs6000_debug_print_mode (m);
2280
2281 fputs ("\n", stderr);
2282
2283 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2284 {
2285 machine_mode mode1 = print_tieable_modes[m1];
2286 bool first_time = true;
2287
2288 nl = (const char *)0;
2289 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2290 {
2291 machine_mode mode2 = print_tieable_modes[m2];
2292 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2293 {
2294 if (first_time)
2295 {
2296 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2297 nl = "\n";
2298 first_time = false;
2299 }
2300
2301 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2302 }
2303 }
2304
2305 if (!first_time)
2306 fputs ("\n", stderr);
2307 }
2308
2309 if (nl)
2310 fputs (nl, stderr);
2311
2312 if (rs6000_recip_control)
2313 {
2314 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2315
2316 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2317 if (rs6000_recip_bits[m])
2318 {
2319 fprintf (stderr,
2320 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2321 GET_MODE_NAME (m),
2322 (RS6000_RECIP_AUTO_RE_P (m)
2323 ? "auto"
2324 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2325 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2326 ? "auto"
2327 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2328 }
2329
2330 fputs ("\n", stderr);
2331 }
2332
2333 if (rs6000_cpu_index >= 0)
2334 {
2335 const char *name = processor_target_table[rs6000_cpu_index].name;
2336 HOST_WIDE_INT flags
2337 = processor_target_table[rs6000_cpu_index].target_enable;
2338
2339 sprintf (flags_buffer, "-mcpu=%s flags", name);
2340 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2341 }
2342 else
2343 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2344
2345 if (rs6000_tune_index >= 0)
2346 {
2347 const char *name = processor_target_table[rs6000_tune_index].name;
2348 HOST_WIDE_INT flags
2349 = processor_target_table[rs6000_tune_index].target_enable;
2350
2351 sprintf (flags_buffer, "-mtune=%s flags", name);
2352 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2353 }
2354 else
2355 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2356
2357 cl_target_option_save (&cl_opts, &global_options);
2358 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2359 rs6000_isa_flags);
2360
2361 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2362 rs6000_isa_flags_explicit);
2363
2364 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2365 rs6000_builtin_mask);
2366
2367 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2368
2369 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2370 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2371
2372 switch (rs6000_sched_costly_dep)
2373 {
2374 case max_dep_latency:
2375 costly_str = "max_dep_latency";
2376 break;
2377
2378 case no_dep_costly:
2379 costly_str = "no_dep_costly";
2380 break;
2381
2382 case all_deps_costly:
2383 costly_str = "all_deps_costly";
2384 break;
2385
2386 case true_store_to_load_dep_costly:
2387 costly_str = "true_store_to_load_dep_costly";
2388 break;
2389
2390 case store_to_load_dep_costly:
2391 costly_str = "store_to_load_dep_costly";
2392 break;
2393
2394 default:
2395 costly_str = costly_num;
2396 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2397 break;
2398 }
2399
2400 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2401
2402 switch (rs6000_sched_insert_nops)
2403 {
2404 case sched_finish_regroup_exact:
2405 nop_str = "sched_finish_regroup_exact";
2406 break;
2407
2408 case sched_finish_pad_groups:
2409 nop_str = "sched_finish_pad_groups";
2410 break;
2411
2412 case sched_finish_none:
2413 nop_str = "sched_finish_none";
2414 break;
2415
2416 default:
2417 nop_str = nop_num;
2418 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2419 break;
2420 }
2421
2422 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2423
2424 switch (rs6000_sdata)
2425 {
2426 default:
2427 case SDATA_NONE:
2428 break;
2429
2430 case SDATA_DATA:
2431 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2432 break;
2433
2434 case SDATA_SYSV:
2435 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2436 break;
2437
2438 case SDATA_EABI:
2439 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2440 break;
2441
2442 }
2443
2444 switch (rs6000_traceback)
2445 {
2446 case traceback_default: trace_str = "default"; break;
2447 case traceback_none: trace_str = "none"; break;
2448 case traceback_part: trace_str = "part"; break;
2449 case traceback_full: trace_str = "full"; break;
2450 default: trace_str = "unknown"; break;
2451 }
2452
2453 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2454
2455 switch (rs6000_current_cmodel)
2456 {
2457 case CMODEL_SMALL: cmodel_str = "small"; break;
2458 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2459 case CMODEL_LARGE: cmodel_str = "large"; break;
2460 default: cmodel_str = "unknown"; break;
2461 }
2462
2463 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2464
2465 switch (rs6000_current_abi)
2466 {
2467 case ABI_NONE: abi_str = "none"; break;
2468 case ABI_AIX: abi_str = "aix"; break;
2469 case ABI_ELFv2: abi_str = "ELFv2"; break;
2470 case ABI_V4: abi_str = "V4"; break;
2471 case ABI_DARWIN: abi_str = "darwin"; break;
2472 default: abi_str = "unknown"; break;
2473 }
2474
2475 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2476
2477 if (rs6000_altivec_abi)
2478 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2479
2480 if (rs6000_darwin64_abi)
2481 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2482
2483 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2484 (TARGET_SOFT_FLOAT ? "true" : "false"));
2485
2486 if (TARGET_LINK_STACK)
2487 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2488
2489 if (TARGET_P8_FUSION)
2490 {
2491 char options[80];
2492
2493 strcpy (options, "power8");
2494 if (TARGET_P8_FUSION_SIGN)
2495 strcat (options, ", sign");
2496
2497 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2498 }
2499
2500 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2501 TARGET_SECURE_PLT ? "secure" : "bss");
2502 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2503 aix_struct_return ? "aix" : "sysv");
2504 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2505 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2506 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2507 tf[!!rs6000_align_branch_targets]);
2508 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2509 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2510 rs6000_long_double_type_size);
2511 if (rs6000_long_double_type_size > 64)
2512 {
2513 fprintf (stderr, DEBUG_FMT_S, "long double type",
2514 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2515 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2516 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2517 }
2518 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2519 (int)rs6000_sched_restricted_insns_priority);
2520 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2521 (int)END_BUILTINS);
2522 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2523 (int)RS6000_BUILTIN_COUNT);
2524
2525 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2526 (int)TARGET_FLOAT128_ENABLE_TYPE);
2527
2528 if (TARGET_VSX)
2529 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2530 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2531
2532 if (TARGET_DIRECT_MOVE_128)
2533 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2534 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2535 }
2536
2537 \f
2538 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2539 legitimate address support to figure out the appropriate addressing to
2540 use. */
2541
2542 static void
2543 rs6000_setup_reg_addr_masks (void)
2544 {
2545 ssize_t rc, reg, m, nregs;
2546 addr_mask_type any_addr_mask, addr_mask;
2547
2548 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2549 {
2550 machine_mode m2 = (machine_mode) m;
2551 bool complex_p = false;
2552 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2553 size_t msize;
2554
2555 if (COMPLEX_MODE_P (m2))
2556 {
2557 complex_p = true;
2558 m2 = GET_MODE_INNER (m2);
2559 }
2560
2561 msize = GET_MODE_SIZE (m2);
2562
2563 /* SDmode is special in that we want to access it only via REG+REG
2564 addressing on power7 and above, since we want to use the LFIWZX and
2565 STFIWZX instructions to load it. */
2566 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2567
2568 any_addr_mask = 0;
2569 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2570 {
2571 addr_mask = 0;
2572 reg = reload_reg_map[rc].reg;
2573
2574 /* Can mode values go in the GPR/FPR/Altivec registers? */
2575 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2576 {
2577 bool small_int_vsx_p = (small_int_p
2578 && (rc == RELOAD_REG_FPR
2579 || rc == RELOAD_REG_VMX));
2580
2581 nregs = rs6000_hard_regno_nregs[m][reg];
2582 addr_mask |= RELOAD_REG_VALID;
2583
2584 /* Indicate if the mode takes more than 1 physical register. If
2585 it takes a single register, indicate it can do REG+REG
2586 addressing. Small integers in VSX registers can only do
2587 REG+REG addressing. */
2588 if (small_int_vsx_p)
2589 addr_mask |= RELOAD_REG_INDEXED;
2590 else if (nregs > 1 || m == BLKmode || complex_p)
2591 addr_mask |= RELOAD_REG_MULTIPLE;
2592 else
2593 addr_mask |= RELOAD_REG_INDEXED;
2594
2595 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2596 addressing. If we allow scalars into Altivec registers,
2597 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2598
2599 For VSX systems, we don't allow update addressing for
2600 DFmode/SFmode if those registers can go in both the
2601 traditional floating point registers and Altivec registers.
2602 The load/store instructions for the Altivec registers do not
2603 have update forms. If we allowed update addressing, it seems
2604 to break IV-OPT code using floating point if the index type is
2605 int instead of long (PR target/81550 and target/84042). */
2606
2607 if (TARGET_UPDATE
2608 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2609 && msize <= 8
2610 && !VECTOR_MODE_P (m2)
2611 && !FLOAT128_VECTOR_P (m2)
2612 && !complex_p
2613 && (m != E_DFmode || !TARGET_VSX)
2614 && (m != E_SFmode || !TARGET_P8_VECTOR)
2615 && !small_int_vsx_p)
2616 {
2617 addr_mask |= RELOAD_REG_PRE_INCDEC;
2618
2619 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2620 we don't allow PRE_MODIFY for some multi-register
2621 operations. */
2622 switch (m)
2623 {
2624 default:
2625 addr_mask |= RELOAD_REG_PRE_MODIFY;
2626 break;
2627
2628 case E_DImode:
2629 if (TARGET_POWERPC64)
2630 addr_mask |= RELOAD_REG_PRE_MODIFY;
2631 break;
2632
2633 case E_DFmode:
2634 case E_DDmode:
2635 if (TARGET_HARD_FLOAT)
2636 addr_mask |= RELOAD_REG_PRE_MODIFY;
2637 break;
2638 }
2639 }
2640 }
2641
2642 /* GPR and FPR registers can do REG+OFFSET addressing, except
2643 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2644 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2645 if ((addr_mask != 0) && !indexed_only_p
2646 && msize <= 8
2647 && (rc == RELOAD_REG_GPR
2648 || ((msize == 8 || m2 == SFmode)
2649 && (rc == RELOAD_REG_FPR
2650 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2651 addr_mask |= RELOAD_REG_OFFSET;
2652
2653 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2654 instructions are enabled. The offset for 128-bit VSX registers is
2655 only 12-bits. While GPRs can handle the full offset range, VSX
2656 registers can only handle the restricted range. */
2657 else if ((addr_mask != 0) && !indexed_only_p
2658 && msize == 16 && TARGET_P9_VECTOR
2659 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2660 || (m2 == TImode && TARGET_VSX)))
2661 {
2662 addr_mask |= RELOAD_REG_OFFSET;
2663 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2664 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2665 }
2666
2667 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2668 addressing on 128-bit types. */
2669 if (rc == RELOAD_REG_VMX && msize == 16
2670 && (addr_mask & RELOAD_REG_VALID) != 0)
2671 addr_mask |= RELOAD_REG_AND_M16;
2672
2673 reg_addr[m].addr_mask[rc] = addr_mask;
2674 any_addr_mask |= addr_mask;
2675 }
2676
2677 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2678 }
2679 }
2680
2681 \f
2682 /* Initialize the various global tables that are based on register size. */
2683 static void
2684 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2685 {
2686 ssize_t r, m, c;
2687 int align64;
2688 int align32;
2689
2690 /* Precalculate REGNO_REG_CLASS. */
2691 rs6000_regno_regclass[0] = GENERAL_REGS;
2692 for (r = 1; r < 32; ++r)
2693 rs6000_regno_regclass[r] = BASE_REGS;
2694
2695 for (r = 32; r < 64; ++r)
2696 rs6000_regno_regclass[r] = FLOAT_REGS;
2697
2698 for (r = 64; HARD_REGISTER_NUM_P (r); ++r)
2699 rs6000_regno_regclass[r] = NO_REGS;
2700
2701 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2702 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2703
2704 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2705 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2706 rs6000_regno_regclass[r] = CR_REGS;
2707
2708 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2709 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2710 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2711 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2712 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2713 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2714 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2715
2716 /* Precalculate register class to simpler reload register class. We don't
2717 need all of the register classes that are combinations of different
2718 classes, just the simple ones that have constraint letters. */
2719 for (c = 0; c < N_REG_CLASSES; c++)
2720 reg_class_to_reg_type[c] = NO_REG_TYPE;
2721
2722 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2723 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2724 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2725 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2726 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2727 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2728 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2729 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2730 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2731 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2732
2733 if (TARGET_VSX)
2734 {
2735 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2736 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2737 }
2738 else
2739 {
2740 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2741 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2742 }
2743
2744 /* Precalculate the valid memory formats as well as the vector information,
2745 this must be set up before the rs6000_hard_regno_nregs_internal calls
2746 below. */
2747 gcc_assert ((int)VECTOR_NONE == 0);
2748 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2749 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem));
2750
2751 gcc_assert ((int)CODE_FOR_nothing == 0);
2752 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2753
2754 gcc_assert ((int)NO_REGS == 0);
2755 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2756
2757 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2758 believes it can use native alignment or still uses 128-bit alignment. */
2759 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2760 {
2761 align64 = 64;
2762 align32 = 32;
2763 }
2764 else
2765 {
2766 align64 = 128;
2767 align32 = 128;
2768 }
2769
2770 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2771 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2772 if (TARGET_FLOAT128_TYPE)
2773 {
2774 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2775 rs6000_vector_align[KFmode] = 128;
2776
2777 if (FLOAT128_IEEE_P (TFmode))
2778 {
2779 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2780 rs6000_vector_align[TFmode] = 128;
2781 }
2782 }
2783
2784 /* V2DF mode, VSX only. */
2785 if (TARGET_VSX)
2786 {
2787 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2788 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2789 rs6000_vector_align[V2DFmode] = align64;
2790 }
2791
2792 /* V4SF mode, either VSX or Altivec. */
2793 if (TARGET_VSX)
2794 {
2795 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2796 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2797 rs6000_vector_align[V4SFmode] = align32;
2798 }
2799 else if (TARGET_ALTIVEC)
2800 {
2801 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2802 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2803 rs6000_vector_align[V4SFmode] = align32;
2804 }
2805
2806 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2807 and stores. */
2808 if (TARGET_ALTIVEC)
2809 {
2810 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2811 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2812 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2813 rs6000_vector_align[V4SImode] = align32;
2814 rs6000_vector_align[V8HImode] = align32;
2815 rs6000_vector_align[V16QImode] = align32;
2816
2817 if (TARGET_VSX)
2818 {
2819 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2820 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2821 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2822 }
2823 else
2824 {
2825 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2826 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2827 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2828 }
2829 }
2830
2831 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2832 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2833 if (TARGET_VSX)
2834 {
2835 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2836 rs6000_vector_unit[V2DImode]
2837 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2838 rs6000_vector_align[V2DImode] = align64;
2839
2840 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2841 rs6000_vector_unit[V1TImode]
2842 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2843 rs6000_vector_align[V1TImode] = 128;
2844 }
2845
2846 /* DFmode, see if we want to use the VSX unit. Memory is handled
2847 differently, so don't set rs6000_vector_mem. */
2848 if (TARGET_VSX)
2849 {
2850 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2851 rs6000_vector_align[DFmode] = 64;
2852 }
2853
2854 /* SFmode, see if we want to use the VSX unit. */
2855 if (TARGET_P8_VECTOR)
2856 {
2857 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2858 rs6000_vector_align[SFmode] = 32;
2859 }
2860
2861 /* Allow TImode in VSX register and set the VSX memory macros. */
2862 if (TARGET_VSX)
2863 {
2864 rs6000_vector_mem[TImode] = VECTOR_VSX;
2865 rs6000_vector_align[TImode] = align64;
2866 }
2867
2868 /* Register class constraints for the constraints that depend on compile
2869 switches. When the VSX code was added, different constraints were added
2870 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2871 of the VSX registers are used. The register classes for scalar floating
2872 point types is set, based on whether we allow that type into the upper
2873 (Altivec) registers. GCC has register classes to target the Altivec
2874 registers for load/store operations, to select using a VSX memory
2875 operation instead of the traditional floating point operation. The
2876 constraints are:
2877
2878 d - Register class to use with traditional DFmode instructions.
2879 f - Register class to use with traditional SFmode instructions.
2880 v - Altivec register.
2881 wa - Any VSX register.
2882 wc - Reserved to represent individual CR bits (used in LLVM).
2883 wn - always NO_REGS.
2884 wr - GPR if 64-bit mode is permitted.
2885 wx - Float register if we can do 32-bit int stores. */
2886
2887 if (TARGET_HARD_FLOAT)
2888 {
2889 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
2890 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
2891 }
2892
2893 if (TARGET_VSX)
2894 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2895
2896 /* Add conditional constraints based on various options, to allow us to
2897 collapse multiple insn patterns. */
2898 if (TARGET_ALTIVEC)
2899 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2900
2901 if (TARGET_POWERPC64)
2902 {
2903 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2904 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
2905 }
2906
2907 if (TARGET_STFIWX)
2908 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2909
2910 /* Support for new direct moves (ISA 3.0 + 64bit). */
2911 if (TARGET_DIRECT_MOVE_128)
2912 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
2913
2914 /* Set up the reload helper and direct move functions. */
2915 if (TARGET_VSX || TARGET_ALTIVEC)
2916 {
2917 if (TARGET_64BIT)
2918 {
2919 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2920 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2921 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2922 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
2923 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
2924 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
2925 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
2926 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
2927 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
2928 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
2929 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
2930 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
2931 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
2932 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
2933 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
2934 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
2935 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
2936 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
2937 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
2938 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
2939
2940 if (FLOAT128_VECTOR_P (KFmode))
2941 {
2942 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
2943 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
2944 }
2945
2946 if (FLOAT128_VECTOR_P (TFmode))
2947 {
2948 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
2949 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
2950 }
2951
2952 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
2953 available. */
2954 if (TARGET_NO_SDMODE_STACK)
2955 {
2956 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
2957 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
2958 }
2959
2960 if (TARGET_VSX)
2961 {
2962 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
2963 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
2964 }
2965
2966 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
2967 {
2968 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
2969 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
2970 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
2971 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
2972 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
2973 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
2974 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
2975 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
2976 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
2977
2978 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
2979 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
2980 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
2981 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
2982 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
2983 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
2984 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
2985 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
2986 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
2987
2988 if (FLOAT128_VECTOR_P (KFmode))
2989 {
2990 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
2991 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
2992 }
2993
2994 if (FLOAT128_VECTOR_P (TFmode))
2995 {
2996 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
2997 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
2998 }
2999 }
3000 }
3001 else
3002 {
3003 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3004 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3005 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3006 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3007 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3008 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3009 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3010 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3011 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3012 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3013 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3014 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3015 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3016 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3017 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3018 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3019 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3020 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3021 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3022 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3023
3024 if (FLOAT128_VECTOR_P (KFmode))
3025 {
3026 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3027 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3028 }
3029
3030 if (FLOAT128_IEEE_P (TFmode))
3031 {
3032 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3033 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3034 }
3035
3036 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3037 available. */
3038 if (TARGET_NO_SDMODE_STACK)
3039 {
3040 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3041 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3042 }
3043
3044 if (TARGET_VSX)
3045 {
3046 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3047 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3048 }
3049
3050 if (TARGET_DIRECT_MOVE)
3051 {
3052 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3053 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3054 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3055 }
3056 }
3057
3058 reg_addr[DFmode].scalar_in_vmx_p = true;
3059 reg_addr[DImode].scalar_in_vmx_p = true;
3060
3061 if (TARGET_P8_VECTOR)
3062 {
3063 reg_addr[SFmode].scalar_in_vmx_p = true;
3064 reg_addr[SImode].scalar_in_vmx_p = true;
3065
3066 if (TARGET_P9_VECTOR)
3067 {
3068 reg_addr[HImode].scalar_in_vmx_p = true;
3069 reg_addr[QImode].scalar_in_vmx_p = true;
3070 }
3071 }
3072 }
3073
3074 /* Precalculate HARD_REGNO_NREGS. */
3075 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3076 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3077 rs6000_hard_regno_nregs[m][r]
3078 = rs6000_hard_regno_nregs_internal (r, (machine_mode) m);
3079
3080 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3081 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3082 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3083 rs6000_hard_regno_mode_ok_p[m][r]
3084 = rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m);
3085
3086 /* Precalculate CLASS_MAX_NREGS sizes. */
3087 for (c = 0; c < LIM_REG_CLASSES; ++c)
3088 {
3089 int reg_size;
3090
3091 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3092 reg_size = UNITS_PER_VSX_WORD;
3093
3094 else if (c == ALTIVEC_REGS)
3095 reg_size = UNITS_PER_ALTIVEC_WORD;
3096
3097 else if (c == FLOAT_REGS)
3098 reg_size = UNITS_PER_FP_WORD;
3099
3100 else
3101 reg_size = UNITS_PER_WORD;
3102
3103 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3104 {
3105 machine_mode m2 = (machine_mode)m;
3106 int reg_size2 = reg_size;
3107
3108 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3109 in VSX. */
3110 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3111 reg_size2 = UNITS_PER_FP_WORD;
3112
3113 rs6000_class_max_nregs[m][c]
3114 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3115 }
3116 }
3117
3118 /* Calculate which modes to automatically generate code to use a the
3119 reciprocal divide and square root instructions. In the future, possibly
3120 automatically generate the instructions even if the user did not specify
3121 -mrecip. The older machines double precision reciprocal sqrt estimate is
3122 not accurate enough. */
3123 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3124 if (TARGET_FRES)
3125 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3126 if (TARGET_FRE)
3127 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3128 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3129 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3130 if (VECTOR_UNIT_VSX_P (V2DFmode))
3131 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3132
3133 if (TARGET_FRSQRTES)
3134 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3135 if (TARGET_FRSQRTE)
3136 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3137 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3138 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3139 if (VECTOR_UNIT_VSX_P (V2DFmode))
3140 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3141
3142 if (rs6000_recip_control)
3143 {
3144 if (!flag_finite_math_only)
3145 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3146 "-ffast-math");
3147 if (flag_trapping_math)
3148 warning (0, "%qs requires %qs or %qs", "-mrecip",
3149 "-fno-trapping-math", "-ffast-math");
3150 if (!flag_reciprocal_math)
3151 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3152 "-ffast-math");
3153 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3154 {
3155 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3156 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3157 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3158
3159 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3160 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3161 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3162
3163 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3164 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3165 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3166
3167 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3168 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3169 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3170
3171 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3172 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3173 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3174
3175 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3176 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3177 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3178
3179 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3180 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3181 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3182
3183 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3184 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3185 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3186 }
3187 }
3188
3189 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3190 legitimate address support to figure out the appropriate addressing to
3191 use. */
3192 rs6000_setup_reg_addr_masks ();
3193
3194 if (global_init_p || TARGET_DEBUG_TARGET)
3195 {
3196 if (TARGET_DEBUG_REG)
3197 rs6000_debug_reg_global ();
3198
3199 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3200 fprintf (stderr,
3201 "SImode variable mult cost = %d\n"
3202 "SImode constant mult cost = %d\n"
3203 "SImode short constant mult cost = %d\n"
3204 "DImode multipliciation cost = %d\n"
3205 "SImode division cost = %d\n"
3206 "DImode division cost = %d\n"
3207 "Simple fp operation cost = %d\n"
3208 "DFmode multiplication cost = %d\n"
3209 "SFmode division cost = %d\n"
3210 "DFmode division cost = %d\n"
3211 "cache line size = %d\n"
3212 "l1 cache size = %d\n"
3213 "l2 cache size = %d\n"
3214 "simultaneous prefetches = %d\n"
3215 "\n",
3216 rs6000_cost->mulsi,
3217 rs6000_cost->mulsi_const,
3218 rs6000_cost->mulsi_const9,
3219 rs6000_cost->muldi,
3220 rs6000_cost->divsi,
3221 rs6000_cost->divdi,
3222 rs6000_cost->fp,
3223 rs6000_cost->dmul,
3224 rs6000_cost->sdiv,
3225 rs6000_cost->ddiv,
3226 rs6000_cost->cache_line_size,
3227 rs6000_cost->l1_cache_size,
3228 rs6000_cost->l2_cache_size,
3229 rs6000_cost->simultaneous_prefetches);
3230 }
3231 }
3232
3233 #if TARGET_MACHO
3234 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3235
3236 static void
3237 darwin_rs6000_override_options (void)
3238 {
3239 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3240 off. */
3241 rs6000_altivec_abi = 1;
3242 TARGET_ALTIVEC_VRSAVE = 1;
3243 rs6000_current_abi = ABI_DARWIN;
3244
3245 if (DEFAULT_ABI == ABI_DARWIN
3246 && TARGET_64BIT)
3247 darwin_one_byte_bool = 1;
3248
3249 if (TARGET_64BIT && ! TARGET_POWERPC64)
3250 {
3251 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3252 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3253 }
3254
3255 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3256 optimisation, and will not work with the most generic case (where the
3257 symbol is undefined external, but there is no symbl stub). */
3258 if (TARGET_64BIT)
3259 rs6000_default_long_calls = 0;
3260
3261 /* ld_classic is (so far) still used for kernel (static) code, and supports
3262 the JBSR longcall / branch islands. */
3263 if (flag_mkernel)
3264 {
3265 rs6000_default_long_calls = 1;
3266
3267 /* Allow a kext author to do -mkernel -mhard-float. */
3268 if (! (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT))
3269 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3270 }
3271
3272 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3273 Altivec. */
3274 if (!flag_mkernel && !flag_apple_kext
3275 && TARGET_64BIT
3276 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3277 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3278
3279 /* Unless the user (not the configurer) has explicitly overridden
3280 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3281 G4 unless targeting the kernel. */
3282 if (!flag_mkernel
3283 && !flag_apple_kext
3284 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3285 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3286 && ! global_options_set.x_rs6000_cpu_index)
3287 {
3288 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3289 }
3290 }
3291 #endif
3292
3293 /* If not otherwise specified by a target, make 'long double' equivalent to
3294 'double'. */
3295
3296 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3297 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3298 #endif
3299
3300 /* Return the builtin mask of the various options used that could affect which
3301 builtins were used. In the past we used target_flags, but we've run out of
3302 bits, and some options are no longer in target_flags. */
3303
3304 HOST_WIDE_INT
3305 rs6000_builtin_mask_calculate (void)
3306 {
3307 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3308 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0)
3309 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3310 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3311 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3312 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3313 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3314 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3315 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3316 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3317 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3318 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3319 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3320 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3321 | ((TARGET_POWERPC64) ? RS6000_BTM_POWERPC64 : 0)
3322 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3323 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3324 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3325 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3326 | ((TARGET_LONG_DOUBLE_128
3327 && TARGET_HARD_FLOAT
3328 && !TARGET_IEEEQUAD) ? RS6000_BTM_LDBL128 : 0)
3329 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)
3330 | ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0));
3331 }
3332
3333 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3334 to clobber the XER[CA] bit because clobbering that bit without telling
3335 the compiler worked just fine with versions of GCC before GCC 5, and
3336 breaking a lot of older code in ways that are hard to track down is
3337 not such a great idea. */
3338
3339 static rtx_insn *
3340 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3341 vec<const char *> &/*constraints*/,
3342 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3343 {
3344 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3345 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3346 return NULL;
3347 }
3348
3349 /* Override command line options.
3350
3351 Combine build-specific configuration information with options
3352 specified on the command line to set various state variables which
3353 influence code generation, optimization, and expansion of built-in
3354 functions. Assure that command-line configuration preferences are
3355 compatible with each other and with the build configuration; issue
3356 warnings while adjusting configuration or error messages while
3357 rejecting configuration.
3358
3359 Upon entry to this function:
3360
3361 This function is called once at the beginning of
3362 compilation, and then again at the start and end of compiling
3363 each section of code that has a different configuration, as
3364 indicated, for example, by adding the
3365
3366 __attribute__((__target__("cpu=power9")))
3367
3368 qualifier to a function definition or, for example, by bracketing
3369 code between
3370
3371 #pragma GCC target("altivec")
3372
3373 and
3374
3375 #pragma GCC reset_options
3376
3377 directives. Parameter global_init_p is true for the initial
3378 invocation, which initializes global variables, and false for all
3379 subsequent invocations.
3380
3381
3382 Various global state information is assumed to be valid. This
3383 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3384 default CPU specified at build configure time, TARGET_DEFAULT,
3385 representing the default set of option flags for the default
3386 target, and global_options_set.x_rs6000_isa_flags, representing
3387 which options were requested on the command line.
3388
3389 Upon return from this function:
3390
3391 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3392 was set by name on the command line. Additionally, if certain
3393 attributes are automatically enabled or disabled by this function
3394 in order to assure compatibility between options and
3395 configuration, the flags associated with those attributes are
3396 also set. By setting these "explicit bits", we avoid the risk
3397 that other code might accidentally overwrite these particular
3398 attributes with "default values".
3399
3400 The various bits of rs6000_isa_flags are set to indicate the
3401 target options that have been selected for the most current
3402 compilation efforts. This has the effect of also turning on the
3403 associated TARGET_XXX values since these are macros which are
3404 generally defined to test the corresponding bit of the
3405 rs6000_isa_flags variable.
3406
3407 The variable rs6000_builtin_mask is set to represent the target
3408 options for the most current compilation efforts, consistent with
3409 the current contents of rs6000_isa_flags. This variable controls
3410 expansion of built-in functions.
3411
3412 Various other global variables and fields of global structures
3413 (over 50 in all) are initialized to reflect the desired options
3414 for the most current compilation efforts. */
3415
3416 static bool
3417 rs6000_option_override_internal (bool global_init_p)
3418 {
3419 bool ret = true;
3420
3421 HOST_WIDE_INT set_masks;
3422 HOST_WIDE_INT ignore_masks;
3423 int cpu_index = -1;
3424 int tune_index;
3425 struct cl_target_option *main_target_opt
3426 = ((global_init_p || target_option_default_node == NULL)
3427 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3428
3429 /* Print defaults. */
3430 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3431 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3432
3433 /* Remember the explicit arguments. */
3434 if (global_init_p)
3435 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3436
3437 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3438 library functions, so warn about it. The flag may be useful for
3439 performance studies from time to time though, so don't disable it
3440 entirely. */
3441 if (global_options_set.x_rs6000_alignment_flags
3442 && rs6000_alignment_flags == MASK_ALIGN_POWER
3443 && DEFAULT_ABI == ABI_DARWIN
3444 && TARGET_64BIT)
3445 warning (0, "%qs is not supported for 64-bit Darwin;"
3446 " it is incompatible with the installed C and C++ libraries",
3447 "-malign-power");
3448
3449 /* Numerous experiment shows that IRA based loop pressure
3450 calculation works better for RTL loop invariant motion on targets
3451 with enough (>= 32) registers. It is an expensive optimization.
3452 So it is on only for peak performance. */
3453 if (optimize >= 3 && global_init_p
3454 && !global_options_set.x_flag_ira_loop_pressure)
3455 flag_ira_loop_pressure = 1;
3456
3457 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3458 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3459 options were already specified. */
3460 if (flag_sanitize & SANITIZE_USER_ADDRESS
3461 && !global_options_set.x_flag_asynchronous_unwind_tables)
3462 flag_asynchronous_unwind_tables = 1;
3463
3464 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3465 loop unroller is active. It is only checked during unrolling, so
3466 we can just set it on by default. */
3467 if (!global_options_set.x_flag_variable_expansion_in_unroller)
3468 flag_variable_expansion_in_unroller = 1;
3469
3470 /* Set the pointer size. */
3471 if (TARGET_64BIT)
3472 {
3473 rs6000_pmode = DImode;
3474 rs6000_pointer_size = 64;
3475 }
3476 else
3477 {
3478 rs6000_pmode = SImode;
3479 rs6000_pointer_size = 32;
3480 }
3481
3482 /* Some OSs don't support saving the high part of 64-bit registers on context
3483 switch. Other OSs don't support saving Altivec registers. On those OSs,
3484 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3485 if the user wants either, the user must explicitly specify them and we
3486 won't interfere with the user's specification. */
3487
3488 set_masks = POWERPC_MASKS;
3489 #ifdef OS_MISSING_POWERPC64
3490 if (OS_MISSING_POWERPC64)
3491 set_masks &= ~OPTION_MASK_POWERPC64;
3492 #endif
3493 #ifdef OS_MISSING_ALTIVEC
3494 if (OS_MISSING_ALTIVEC)
3495 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3496 | OTHER_VSX_VECTOR_MASKS);
3497 #endif
3498
3499 /* Don't override by the processor default if given explicitly. */
3500 set_masks &= ~rs6000_isa_flags_explicit;
3501
3502 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3503 the cpu in a target attribute or pragma, but did not specify a tuning
3504 option, use the cpu for the tuning option rather than the option specified
3505 with -mtune on the command line. Process a '--with-cpu' configuration
3506 request as an implicit --cpu. */
3507 if (rs6000_cpu_index >= 0)
3508 cpu_index = rs6000_cpu_index;
3509 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3510 cpu_index = main_target_opt->x_rs6000_cpu_index;
3511 else if (OPTION_TARGET_CPU_DEFAULT)
3512 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3513
3514 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3515 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3516 with those from the cpu, except for options that were explicitly set. If
3517 we don't have a cpu, do not override the target bits set in
3518 TARGET_DEFAULT. */
3519 if (cpu_index >= 0)
3520 {
3521 rs6000_cpu_index = cpu_index;
3522 rs6000_isa_flags &= ~set_masks;
3523 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3524 & set_masks);
3525 }
3526 else
3527 {
3528 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3529 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3530 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3531 to using rs6000_isa_flags, we need to do the initialization here.
3532
3533 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3534 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3535 HOST_WIDE_INT flags;
3536 if (TARGET_DEFAULT)
3537 flags = TARGET_DEFAULT;
3538 else
3539 {
3540 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3541 const char *default_cpu = (!TARGET_POWERPC64
3542 ? "powerpc"
3543 : (BYTES_BIG_ENDIAN
3544 ? "powerpc64"
3545 : "powerpc64le"));
3546 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3547 flags = processor_target_table[default_cpu_index].target_enable;
3548 }
3549 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3550 }
3551
3552 if (rs6000_tune_index >= 0)
3553 tune_index = rs6000_tune_index;
3554 else if (cpu_index >= 0)
3555 rs6000_tune_index = tune_index = cpu_index;
3556 else
3557 {
3558 size_t i;
3559 enum processor_type tune_proc
3560 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3561
3562 tune_index = -1;
3563 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3564 if (processor_target_table[i].processor == tune_proc)
3565 {
3566 tune_index = i;
3567 break;
3568 }
3569 }
3570
3571 if (cpu_index >= 0)
3572 rs6000_cpu = processor_target_table[cpu_index].processor;
3573 else
3574 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3575
3576 gcc_assert (tune_index >= 0);
3577 rs6000_tune = processor_target_table[tune_index].processor;
3578
3579 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3580 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3581 || rs6000_cpu == PROCESSOR_PPCE5500)
3582 {
3583 if (TARGET_ALTIVEC)
3584 error ("AltiVec not supported in this target");
3585 }
3586
3587 /* If we are optimizing big endian systems for space, use the load/store
3588 multiple instructions. */
3589 if (BYTES_BIG_ENDIAN && optimize_size)
3590 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3591
3592 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3593 because the hardware doesn't support the instructions used in little
3594 endian mode, and causes an alignment trap. The 750 does not cause an
3595 alignment trap (except when the target is unaligned). */
3596
3597 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
3598 {
3599 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3600 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3601 warning (0, "%qs is not supported on little endian systems",
3602 "-mmultiple");
3603 }
3604
3605 /* If little-endian, default to -mstrict-align on older processors.
3606 Testing for htm matches power8 and later. */
3607 if (!BYTES_BIG_ENDIAN
3608 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3609 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3610
3611 if (!rs6000_fold_gimple)
3612 fprintf (stderr,
3613 "gimple folding of rs6000 builtins has been disabled.\n");
3614
3615 /* Add some warnings for VSX. */
3616 if (TARGET_VSX)
3617 {
3618 const char *msg = NULL;
3619 if (!TARGET_HARD_FLOAT)
3620 {
3621 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3622 msg = N_("%<-mvsx%> requires hardware floating point");
3623 else
3624 {
3625 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3626 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3627 }
3628 }
3629 else if (TARGET_AVOID_XFORM > 0)
3630 msg = N_("%<-mvsx%> needs indexed addressing");
3631 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3632 & OPTION_MASK_ALTIVEC))
3633 {
3634 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3635 msg = N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3636 else
3637 msg = N_("%<-mno-altivec%> disables vsx");
3638 }
3639
3640 if (msg)
3641 {
3642 warning (0, msg);
3643 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3644 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3645 }
3646 }
3647
3648 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3649 the -mcpu setting to enable options that conflict. */
3650 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3651 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3652 | OPTION_MASK_ALTIVEC
3653 | OPTION_MASK_VSX)) != 0)
3654 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3655 | OPTION_MASK_DIRECT_MOVE)
3656 & ~rs6000_isa_flags_explicit);
3657
3658 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3659 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3660
3661 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3662 off all of the options that depend on those flags. */
3663 ignore_masks = rs6000_disable_incompatible_switches ();
3664
3665 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3666 unless the user explicitly used the -mno-<option> to disable the code. */
3667 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
3668 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3669 else if (TARGET_P9_MINMAX)
3670 {
3671 if (cpu_index >= 0)
3672 {
3673 if (cpu_index == PROCESSOR_POWER9)
3674 {
3675 /* legacy behavior: allow -mcpu=power9 with certain
3676 capabilities explicitly disabled. */
3677 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3678 }
3679 else
3680 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3681 "for <xxx> less than power9", "-mcpu");
3682 }
3683 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
3684 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
3685 & rs6000_isa_flags_explicit))
3686 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3687 were explicitly cleared. */
3688 error ("%qs incompatible with explicitly disabled options",
3689 "-mpower9-minmax");
3690 else
3691 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
3692 }
3693 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3694 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
3695 else if (TARGET_VSX)
3696 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
3697 else if (TARGET_POPCNTD)
3698 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
3699 else if (TARGET_DFP)
3700 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
3701 else if (TARGET_CMPB)
3702 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
3703 else if (TARGET_FPRND)
3704 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
3705 else if (TARGET_POPCNTB)
3706 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
3707 else if (TARGET_ALTIVEC)
3708 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
3709
3710 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3711 {
3712 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3713 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3714 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3715 }
3716
3717 if (!TARGET_FPRND && TARGET_VSX)
3718 {
3719 if (rs6000_isa_flags_explicit & OPTION_MASK_FPRND)
3720 /* TARGET_VSX = 1 implies Power 7 and newer */
3721 error ("%qs requires %qs", "-mvsx", "-mfprnd");
3722 rs6000_isa_flags &= ~OPTION_MASK_FPRND;
3723 }
3724
3725 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3726 {
3727 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3728 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3729 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3730 }
3731
3732 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3733 {
3734 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3735 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3736 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3737 }
3738
3739 if (TARGET_P8_VECTOR && !TARGET_VSX)
3740 {
3741 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3742 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
3743 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3744 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
3745 {
3746 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3747 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3748 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3749 }
3750 else
3751 {
3752 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
3753 not explicit. */
3754 rs6000_isa_flags |= OPTION_MASK_VSX;
3755 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3756 }
3757 }
3758
3759 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3760 {
3761 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3762 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3763 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3764 }
3765
3766 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3767 silently turn off quad memory mode. */
3768 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3769 {
3770 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3771 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
3772
3773 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3774 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
3775
3776 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3777 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3778 }
3779
3780 /* Non-atomic quad memory load/store are disabled for little endian, since
3781 the words are reversed, but atomic operations can still be done by
3782 swapping the words. */
3783 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3784 {
3785 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3786 warning (0, N_("%<-mquad-memory%> is not available in little endian "
3787 "mode"));
3788
3789 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3790 }
3791
3792 /* Assume if the user asked for normal quad memory instructions, they want
3793 the atomic versions as well, unless they explicity told us not to use quad
3794 word atomic instructions. */
3795 if (TARGET_QUAD_MEMORY
3796 && !TARGET_QUAD_MEMORY_ATOMIC
3797 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
3798 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
3799
3800 /* If we can shrink-wrap the TOC register save separately, then use
3801 -msave-toc-indirect unless explicitly disabled. */
3802 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
3803 && flag_shrink_wrap_separate
3804 && optimize_function_for_speed_p (cfun))
3805 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
3806
3807 /* Enable power8 fusion if we are tuning for power8, even if we aren't
3808 generating power8 instructions. Power9 does not optimize power8 fusion
3809 cases. */
3810 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
3811 {
3812 if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
3813 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
3814 else
3815 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
3816 }
3817
3818 /* Setting additional fusion flags turns on base fusion. */
3819 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
3820 {
3821 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
3822 {
3823 if (TARGET_P8_FUSION_SIGN)
3824 error ("%qs requires %qs", "-mpower8-fusion-sign",
3825 "-mpower8-fusion");
3826
3827 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
3828 }
3829 else
3830 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
3831 }
3832
3833 /* Power8 does not fuse sign extended loads with the addis. If we are
3834 optimizing at high levels for speed, convert a sign extended load into a
3835 zero extending load, and an explicit sign extension. */
3836 if (TARGET_P8_FUSION
3837 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
3838 && optimize_function_for_speed_p (cfun)
3839 && optimize >= 3)
3840 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
3841
3842 /* ISA 3.0 vector instructions include ISA 2.07. */
3843 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
3844 {
3845 /* We prefer to not mention undocumented options in
3846 error messages. However, if users have managed to select
3847 power9-vector without selecting power8-vector, they
3848 already know about undocumented flags. */
3849 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
3850 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
3851 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
3852 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
3853 {
3854 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
3855 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3856 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
3857 }
3858 else
3859 {
3860 /* OPTION_MASK_P9_VECTOR is explicit and
3861 OPTION_MASK_P8_VECTOR is not explicit. */
3862 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
3863 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3864 }
3865 }
3866
3867 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
3868 support. If we only have ISA 2.06 support, and the user did not specify
3869 the switch, leave it set to -1 so the movmisalign patterns are enabled,
3870 but we don't enable the full vectorization support */
3871 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
3872 TARGET_ALLOW_MOVMISALIGN = 1;
3873
3874 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
3875 {
3876 if (TARGET_ALLOW_MOVMISALIGN > 0
3877 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
3878 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
3879
3880 TARGET_ALLOW_MOVMISALIGN = 0;
3881 }
3882
3883 /* Determine when unaligned vector accesses are permitted, and when
3884 they are preferred over masked Altivec loads. Note that if
3885 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
3886 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
3887 not true. */
3888 if (TARGET_EFFICIENT_UNALIGNED_VSX)
3889 {
3890 if (!TARGET_VSX)
3891 {
3892 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
3893 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
3894
3895 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
3896 }
3897
3898 else if (!TARGET_ALLOW_MOVMISALIGN)
3899 {
3900 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
3901 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
3902 "-mallow-movmisalign");
3903
3904 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
3905 }
3906 }
3907
3908 /* Use long double size to select the appropriate long double. We use
3909 TYPE_PRECISION to differentiate the 3 different long double types. We map
3910 128 into the precision used for TFmode. */
3911 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
3912 ? 64
3913 : FLOAT_PRECISION_TFmode);
3914
3915 /* Set long double size before the IEEE 128-bit tests. */
3916 if (!global_options_set.x_rs6000_long_double_type_size)
3917 {
3918 if (main_target_opt != NULL
3919 && (main_target_opt->x_rs6000_long_double_type_size
3920 != default_long_double_size))
3921 error ("target attribute or pragma changes %<long double%> size");
3922 else
3923 rs6000_long_double_type_size = default_long_double_size;
3924 }
3925 else if (rs6000_long_double_type_size == 128)
3926 rs6000_long_double_type_size = FLOAT_PRECISION_TFmode;
3927 else if (global_options_set.x_rs6000_ieeequad)
3928 {
3929 if (global_options.x_rs6000_ieeequad)
3930 error ("%qs requires %qs", "-mabi=ieeelongdouble", "-mlong-double-128");
3931 else
3932 error ("%qs requires %qs", "-mabi=ibmlongdouble", "-mlong-double-128");
3933 }
3934
3935 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
3936 systems will also set long double to be IEEE 128-bit. AIX and Darwin
3937 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
3938 those systems will not pick up this default. Warn if the user changes the
3939 default unless -Wno-psabi. */
3940 if (!global_options_set.x_rs6000_ieeequad)
3941 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
3942
3943 else
3944 {
3945 if (global_options.x_rs6000_ieeequad
3946 && (!TARGET_POPCNTD || !TARGET_VSX))
3947 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
3948
3949 if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT && TARGET_LONG_DOUBLE_128)
3950 {
3951 static bool warned_change_long_double;
3952 if (!warned_change_long_double)
3953 {
3954 warned_change_long_double = true;
3955 if (TARGET_IEEEQUAD)
3956 warning (OPT_Wpsabi, "Using IEEE extended precision "
3957 "%<long double%>");
3958 else
3959 warning (OPT_Wpsabi, "Using IBM extended precision "
3960 "%<long double%>");
3961 }
3962 }
3963 }
3964
3965 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
3966 sytems. In GCC 7, we would enable the IEEE 128-bit floating point
3967 infrastructure (-mfloat128-type) but not enable the actual __float128 type
3968 unless the user used the explicit -mfloat128. In GCC 8, we enable both
3969 the keyword as well as the type. */
3970 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
3971
3972 /* IEEE 128-bit floating point requires VSX support. */
3973 if (TARGET_FLOAT128_KEYWORD)
3974 {
3975 if (!TARGET_VSX)
3976 {
3977 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
3978 error ("%qs requires VSX support", "-mfloat128");
3979
3980 TARGET_FLOAT128_TYPE = 0;
3981 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
3982 | OPTION_MASK_FLOAT128_HW);
3983 }
3984 else if (!TARGET_FLOAT128_TYPE)
3985 {
3986 TARGET_FLOAT128_TYPE = 1;
3987 warning (0, "The %<-mfloat128%> option may not be fully supported");
3988 }
3989 }
3990
3991 /* Enable the __float128 keyword under Linux by default. */
3992 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
3993 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
3994 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
3995
3996 /* If we have are supporting the float128 type and full ISA 3.0 support,
3997 enable -mfloat128-hardware by default. However, don't enable the
3998 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
3999 because sometimes the compiler wants to put things in an integer
4000 container, and if we don't have __int128 support, it is impossible. */
4001 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
4002 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4003 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4004 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4005
4006 if (TARGET_FLOAT128_HW
4007 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4008 {
4009 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4010 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4011
4012 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4013 }
4014
4015 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4016 {
4017 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4018 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4019
4020 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4021 }
4022
4023 /* -mprefixed (and hence -mpcrel) requires -mcpu=future. */
4024 if (TARGET_PREFIXED && !TARGET_FUTURE)
4025 {
4026 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4027 error ("%qs requires %qs", "-mpcrel", "-mcpu=future");
4028 else if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) != 0)
4029 error ("%qs requires %qs", "-mprefixed", "-mcpu=future");
4030
4031 rs6000_isa_flags &= ~(OPTION_MASK_PCREL | OPTION_MASK_PREFIXED);
4032 }
4033
4034 /* -mpcrel requires prefixed load/store addressing. */
4035 if (TARGET_PCREL && !TARGET_PREFIXED)
4036 {
4037 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4038 error ("%qs requires %qs", "-mpcrel", "-mprefixed");
4039
4040 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4041 }
4042
4043 /* Print the options after updating the defaults. */
4044 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4045 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4046
4047 /* E500mc does "better" if we inline more aggressively. Respect the
4048 user's opinion, though. */
4049 if (rs6000_block_move_inline_limit == 0
4050 && (rs6000_tune == PROCESSOR_PPCE500MC
4051 || rs6000_tune == PROCESSOR_PPCE500MC64
4052 || rs6000_tune == PROCESSOR_PPCE5500
4053 || rs6000_tune == PROCESSOR_PPCE6500))
4054 rs6000_block_move_inline_limit = 128;
4055
4056 /* store_one_arg depends on expand_block_move to handle at least the
4057 size of reg_parm_stack_space. */
4058 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4059 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4060
4061 if (global_init_p)
4062 {
4063 /* If the appropriate debug option is enabled, replace the target hooks
4064 with debug versions that call the real version and then prints
4065 debugging information. */
4066 if (TARGET_DEBUG_COST)
4067 {
4068 targetm.rtx_costs = rs6000_debug_rtx_costs;
4069 targetm.address_cost = rs6000_debug_address_cost;
4070 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4071 }
4072
4073 if (TARGET_DEBUG_ADDR)
4074 {
4075 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4076 targetm.legitimize_address = rs6000_debug_legitimize_address;
4077 rs6000_secondary_reload_class_ptr
4078 = rs6000_debug_secondary_reload_class;
4079 targetm.secondary_memory_needed
4080 = rs6000_debug_secondary_memory_needed;
4081 targetm.can_change_mode_class
4082 = rs6000_debug_can_change_mode_class;
4083 rs6000_preferred_reload_class_ptr
4084 = rs6000_debug_preferred_reload_class;
4085 rs6000_mode_dependent_address_ptr
4086 = rs6000_debug_mode_dependent_address;
4087 }
4088
4089 if (rs6000_veclibabi_name)
4090 {
4091 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4092 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4093 else
4094 {
4095 error ("unknown vectorization library ABI type (%qs) for "
4096 "%qs switch", rs6000_veclibabi_name, "-mveclibabi=");
4097 ret = false;
4098 }
4099 }
4100 }
4101
4102 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4103 target attribute or pragma which automatically enables both options,
4104 unless the altivec ABI was set. This is set by default for 64-bit, but
4105 not for 32-bit. */
4106 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4107 {
4108 TARGET_FLOAT128_TYPE = 0;
4109 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4110 | OPTION_MASK_FLOAT128_KEYWORD)
4111 & ~rs6000_isa_flags_explicit);
4112 }
4113
4114 /* Enable Altivec ABI for AIX -maltivec. */
4115 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4116 {
4117 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4118 error ("target attribute or pragma changes AltiVec ABI");
4119 else
4120 rs6000_altivec_abi = 1;
4121 }
4122
4123 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4124 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4125 be explicitly overridden in either case. */
4126 if (TARGET_ELF)
4127 {
4128 if (!global_options_set.x_rs6000_altivec_abi
4129 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4130 {
4131 if (main_target_opt != NULL &&
4132 !main_target_opt->x_rs6000_altivec_abi)
4133 error ("target attribute or pragma changes AltiVec ABI");
4134 else
4135 rs6000_altivec_abi = 1;
4136 }
4137 }
4138
4139 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4140 So far, the only darwin64 targets are also MACH-O. */
4141 if (TARGET_MACHO
4142 && DEFAULT_ABI == ABI_DARWIN
4143 && TARGET_64BIT)
4144 {
4145 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4146 error ("target attribute or pragma changes darwin64 ABI");
4147 else
4148 {
4149 rs6000_darwin64_abi = 1;
4150 /* Default to natural alignment, for better performance. */
4151 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4152 }
4153 }
4154
4155 /* Place FP constants in the constant pool instead of TOC
4156 if section anchors enabled. */
4157 if (flag_section_anchors
4158 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4159 TARGET_NO_FP_IN_TOC = 1;
4160
4161 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4162 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4163
4164 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4165 SUBTARGET_OVERRIDE_OPTIONS;
4166 #endif
4167 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4168 SUBSUBTARGET_OVERRIDE_OPTIONS;
4169 #endif
4170 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4171 SUB3TARGET_OVERRIDE_OPTIONS;
4172 #endif
4173
4174 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4175 after the subtarget override options are done. */
4176 if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM)
4177 {
4178 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4179 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4180
4181 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4182 }
4183
4184 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4185 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4186
4187 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4188 && rs6000_tune != PROCESSOR_POWER5
4189 && rs6000_tune != PROCESSOR_POWER6
4190 && rs6000_tune != PROCESSOR_POWER7
4191 && rs6000_tune != PROCESSOR_POWER8
4192 && rs6000_tune != PROCESSOR_POWER9
4193 && rs6000_tune != PROCESSOR_FUTURE
4194 && rs6000_tune != PROCESSOR_PPCA2
4195 && rs6000_tune != PROCESSOR_CELL
4196 && rs6000_tune != PROCESSOR_PPC476);
4197 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4198 || rs6000_tune == PROCESSOR_POWER5
4199 || rs6000_tune == PROCESSOR_POWER7
4200 || rs6000_tune == PROCESSOR_POWER8);
4201 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4202 || rs6000_tune == PROCESSOR_POWER5
4203 || rs6000_tune == PROCESSOR_POWER6
4204 || rs6000_tune == PROCESSOR_POWER7
4205 || rs6000_tune == PROCESSOR_POWER8
4206 || rs6000_tune == PROCESSOR_POWER9
4207 || rs6000_tune == PROCESSOR_FUTURE
4208 || rs6000_tune == PROCESSOR_PPCE500MC
4209 || rs6000_tune == PROCESSOR_PPCE500MC64
4210 || rs6000_tune == PROCESSOR_PPCE5500
4211 || rs6000_tune == PROCESSOR_PPCE6500);
4212
4213 /* Allow debug switches to override the above settings. These are set to -1
4214 in rs6000.opt to indicate the user hasn't directly set the switch. */
4215 if (TARGET_ALWAYS_HINT >= 0)
4216 rs6000_always_hint = TARGET_ALWAYS_HINT;
4217
4218 if (TARGET_SCHED_GROUPS >= 0)
4219 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4220
4221 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4222 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4223
4224 rs6000_sched_restricted_insns_priority
4225 = (rs6000_sched_groups ? 1 : 0);
4226
4227 /* Handle -msched-costly-dep option. */
4228 rs6000_sched_costly_dep
4229 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4230
4231 if (rs6000_sched_costly_dep_str)
4232 {
4233 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4234 rs6000_sched_costly_dep = no_dep_costly;
4235 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4236 rs6000_sched_costly_dep = all_deps_costly;
4237 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4238 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4239 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4240 rs6000_sched_costly_dep = store_to_load_dep_costly;
4241 else
4242 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4243 atoi (rs6000_sched_costly_dep_str));
4244 }
4245
4246 /* Handle -minsert-sched-nops option. */
4247 rs6000_sched_insert_nops
4248 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4249
4250 if (rs6000_sched_insert_nops_str)
4251 {
4252 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4253 rs6000_sched_insert_nops = sched_finish_none;
4254 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4255 rs6000_sched_insert_nops = sched_finish_pad_groups;
4256 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4257 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4258 else
4259 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4260 atoi (rs6000_sched_insert_nops_str));
4261 }
4262
4263 /* Handle stack protector */
4264 if (!global_options_set.x_rs6000_stack_protector_guard)
4265 #ifdef TARGET_THREAD_SSP_OFFSET
4266 rs6000_stack_protector_guard = SSP_TLS;
4267 #else
4268 rs6000_stack_protector_guard = SSP_GLOBAL;
4269 #endif
4270
4271 #ifdef TARGET_THREAD_SSP_OFFSET
4272 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4273 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4274 #endif
4275
4276 if (global_options_set.x_rs6000_stack_protector_guard_offset_str)
4277 {
4278 char *endp;
4279 const char *str = rs6000_stack_protector_guard_offset_str;
4280
4281 errno = 0;
4282 long offset = strtol (str, &endp, 0);
4283 if (!*str || *endp || errno)
4284 error ("%qs is not a valid number in %qs", str,
4285 "-mstack-protector-guard-offset=");
4286
4287 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4288 || (TARGET_64BIT && (offset & 3)))
4289 error ("%qs is not a valid offset in %qs", str,
4290 "-mstack-protector-guard-offset=");
4291
4292 rs6000_stack_protector_guard_offset = offset;
4293 }
4294
4295 if (global_options_set.x_rs6000_stack_protector_guard_reg_str)
4296 {
4297 const char *str = rs6000_stack_protector_guard_reg_str;
4298 int reg = decode_reg_name (str);
4299
4300 if (!IN_RANGE (reg, 1, 31))
4301 error ("%qs is not a valid base register in %qs", str,
4302 "-mstack-protector-guard-reg=");
4303
4304 rs6000_stack_protector_guard_reg = reg;
4305 }
4306
4307 if (rs6000_stack_protector_guard == SSP_TLS
4308 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4309 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4310
4311 if (global_init_p)
4312 {
4313 #ifdef TARGET_REGNAMES
4314 /* If the user desires alternate register names, copy in the
4315 alternate names now. */
4316 if (TARGET_REGNAMES)
4317 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4318 #endif
4319
4320 /* Set aix_struct_return last, after the ABI is determined.
4321 If -maix-struct-return or -msvr4-struct-return was explicitly
4322 used, don't override with the ABI default. */
4323 if (!global_options_set.x_aix_struct_return)
4324 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4325
4326 #if 0
4327 /* IBM XL compiler defaults to unsigned bitfields. */
4328 if (TARGET_XL_COMPAT)
4329 flag_signed_bitfields = 0;
4330 #endif
4331
4332 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4333 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4334
4335 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4336
4337 /* We can only guarantee the availability of DI pseudo-ops when
4338 assembling for 64-bit targets. */
4339 if (!TARGET_64BIT)
4340 {
4341 targetm.asm_out.aligned_op.di = NULL;
4342 targetm.asm_out.unaligned_op.di = NULL;
4343 }
4344
4345
4346 /* Set branch target alignment, if not optimizing for size. */
4347 if (!optimize_size)
4348 {
4349 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4350 aligned 8byte to avoid misprediction by the branch predictor. */
4351 if (rs6000_tune == PROCESSOR_TITAN
4352 || rs6000_tune == PROCESSOR_CELL)
4353 {
4354 if (flag_align_functions && !str_align_functions)
4355 str_align_functions = "8";
4356 if (flag_align_jumps && !str_align_jumps)
4357 str_align_jumps = "8";
4358 if (flag_align_loops && !str_align_loops)
4359 str_align_loops = "8";
4360 }
4361 if (rs6000_align_branch_targets)
4362 {
4363 if (flag_align_functions && !str_align_functions)
4364 str_align_functions = "16";
4365 if (flag_align_jumps && !str_align_jumps)
4366 str_align_jumps = "16";
4367 if (flag_align_loops && !str_align_loops)
4368 {
4369 can_override_loop_align = 1;
4370 str_align_loops = "16";
4371 }
4372 }
4373 }
4374
4375 /* Arrange to save and restore machine status around nested functions. */
4376 init_machine_status = rs6000_init_machine_status;
4377
4378 /* We should always be splitting complex arguments, but we can't break
4379 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4380 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4381 targetm.calls.split_complex_arg = NULL;
4382
4383 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4384 if (DEFAULT_ABI == ABI_AIX)
4385 targetm.calls.custom_function_descriptors = 0;
4386 }
4387
4388 /* Initialize rs6000_cost with the appropriate target costs. */
4389 if (optimize_size)
4390 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4391 else
4392 switch (rs6000_tune)
4393 {
4394 case PROCESSOR_RS64A:
4395 rs6000_cost = &rs64a_cost;
4396 break;
4397
4398 case PROCESSOR_MPCCORE:
4399 rs6000_cost = &mpccore_cost;
4400 break;
4401
4402 case PROCESSOR_PPC403:
4403 rs6000_cost = &ppc403_cost;
4404 break;
4405
4406 case PROCESSOR_PPC405:
4407 rs6000_cost = &ppc405_cost;
4408 break;
4409
4410 case PROCESSOR_PPC440:
4411 rs6000_cost = &ppc440_cost;
4412 break;
4413
4414 case PROCESSOR_PPC476:
4415 rs6000_cost = &ppc476_cost;
4416 break;
4417
4418 case PROCESSOR_PPC601:
4419 rs6000_cost = &ppc601_cost;
4420 break;
4421
4422 case PROCESSOR_PPC603:
4423 rs6000_cost = &ppc603_cost;
4424 break;
4425
4426 case PROCESSOR_PPC604:
4427 rs6000_cost = &ppc604_cost;
4428 break;
4429
4430 case PROCESSOR_PPC604e:
4431 rs6000_cost = &ppc604e_cost;
4432 break;
4433
4434 case PROCESSOR_PPC620:
4435 rs6000_cost = &ppc620_cost;
4436 break;
4437
4438 case PROCESSOR_PPC630:
4439 rs6000_cost = &ppc630_cost;
4440 break;
4441
4442 case PROCESSOR_CELL:
4443 rs6000_cost = &ppccell_cost;
4444 break;
4445
4446 case PROCESSOR_PPC750:
4447 case PROCESSOR_PPC7400:
4448 rs6000_cost = &ppc750_cost;
4449 break;
4450
4451 case PROCESSOR_PPC7450:
4452 rs6000_cost = &ppc7450_cost;
4453 break;
4454
4455 case PROCESSOR_PPC8540:
4456 case PROCESSOR_PPC8548:
4457 rs6000_cost = &ppc8540_cost;
4458 break;
4459
4460 case PROCESSOR_PPCE300C2:
4461 case PROCESSOR_PPCE300C3:
4462 rs6000_cost = &ppce300c2c3_cost;
4463 break;
4464
4465 case PROCESSOR_PPCE500MC:
4466 rs6000_cost = &ppce500mc_cost;
4467 break;
4468
4469 case PROCESSOR_PPCE500MC64:
4470 rs6000_cost = &ppce500mc64_cost;
4471 break;
4472
4473 case PROCESSOR_PPCE5500:
4474 rs6000_cost = &ppce5500_cost;
4475 break;
4476
4477 case PROCESSOR_PPCE6500:
4478 rs6000_cost = &ppce6500_cost;
4479 break;
4480
4481 case PROCESSOR_TITAN:
4482 rs6000_cost = &titan_cost;
4483 break;
4484
4485 case PROCESSOR_POWER4:
4486 case PROCESSOR_POWER5:
4487 rs6000_cost = &power4_cost;
4488 break;
4489
4490 case PROCESSOR_POWER6:
4491 rs6000_cost = &power6_cost;
4492 break;
4493
4494 case PROCESSOR_POWER7:
4495 rs6000_cost = &power7_cost;
4496 break;
4497
4498 case PROCESSOR_POWER8:
4499 rs6000_cost = &power8_cost;
4500 break;
4501
4502 case PROCESSOR_POWER9:
4503 case PROCESSOR_FUTURE:
4504 rs6000_cost = &power9_cost;
4505 break;
4506
4507 case PROCESSOR_PPCA2:
4508 rs6000_cost = &ppca2_cost;
4509 break;
4510
4511 default:
4512 gcc_unreachable ();
4513 }
4514
4515 if (global_init_p)
4516 {
4517 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4518 param_simultaneous_prefetches,
4519 rs6000_cost->simultaneous_prefetches);
4520 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4521 param_l1_cache_size,
4522 rs6000_cost->l1_cache_size);
4523 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4524 param_l1_cache_line_size,
4525 rs6000_cost->cache_line_size);
4526 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4527 param_l2_cache_size,
4528 rs6000_cost->l2_cache_size);
4529
4530 /* Increase loop peeling limits based on performance analysis. */
4531 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4532 param_max_peeled_insns, 400);
4533 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4534 param_max_completely_peeled_insns, 400);
4535
4536 /* Use the 'model' -fsched-pressure algorithm by default. */
4537 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4538 param_sched_pressure_algorithm,
4539 SCHED_PRESSURE_MODEL);
4540
4541 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
4542 turns -fweb and -frename-registers on. */
4543 if ((global_options_set.x_flag_unroll_loops && flag_unroll_loops)
4544 || (global_options_set.x_flag_unroll_all_loops
4545 && flag_unroll_all_loops))
4546 {
4547 if (!global_options_set.x_unroll_only_small_loops)
4548 unroll_only_small_loops = 0;
4549 if (!global_options_set.x_flag_rename_registers)
4550 flag_rename_registers = 1;
4551 if (!global_options_set.x_flag_web)
4552 flag_web = 1;
4553 }
4554
4555 /* If using typedef char *va_list, signal that
4556 __builtin_va_start (&ap, 0) can be optimized to
4557 ap = __builtin_next_arg (0). */
4558 if (DEFAULT_ABI != ABI_V4)
4559 targetm.expand_builtin_va_start = NULL;
4560 }
4561
4562 /* If not explicitly specified via option, decide whether to generate indexed
4563 load/store instructions. A value of -1 indicates that the
4564 initial value of this variable has not been overwritten. During
4565 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4566 if (TARGET_AVOID_XFORM == -1)
4567 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4568 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4569 need indexed accesses and the type used is the scalar type of the element
4570 being loaded or stored. */
4571 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4572 && !TARGET_ALTIVEC);
4573
4574 /* Set the -mrecip options. */
4575 if (rs6000_recip_name)
4576 {
4577 char *p = ASTRDUP (rs6000_recip_name);
4578 char *q;
4579 unsigned int mask, i;
4580 bool invert;
4581
4582 while ((q = strtok (p, ",")) != NULL)
4583 {
4584 p = NULL;
4585 if (*q == '!')
4586 {
4587 invert = true;
4588 q++;
4589 }
4590 else
4591 invert = false;
4592
4593 if (!strcmp (q, "default"))
4594 mask = ((TARGET_RECIP_PRECISION)
4595 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4596 else
4597 {
4598 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4599 if (!strcmp (q, recip_options[i].string))
4600 {
4601 mask = recip_options[i].mask;
4602 break;
4603 }
4604
4605 if (i == ARRAY_SIZE (recip_options))
4606 {
4607 error ("unknown option for %<%s=%s%>", "-mrecip", q);
4608 invert = false;
4609 mask = 0;
4610 ret = false;
4611 }
4612 }
4613
4614 if (invert)
4615 rs6000_recip_control &= ~mask;
4616 else
4617 rs6000_recip_control |= mask;
4618 }
4619 }
4620
4621 /* Set the builtin mask of the various options used that could affect which
4622 builtins were used. In the past we used target_flags, but we've run out
4623 of bits, and some options are no longer in target_flags. */
4624 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4625 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4626 rs6000_print_builtin_options (stderr, 0, "builtin mask",
4627 rs6000_builtin_mask);
4628
4629 /* Initialize all of the registers. */
4630 rs6000_init_hard_regno_mode_ok (global_init_p);
4631
4632 /* Save the initial options in case the user does function specific options */
4633 if (global_init_p)
4634 target_option_default_node = target_option_current_node
4635 = build_target_option_node (&global_options);
4636
4637 /* If not explicitly specified via option, decide whether to generate the
4638 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4639 if (TARGET_LINK_STACK == -1)
4640 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
4641
4642 /* Deprecate use of -mno-speculate-indirect-jumps. */
4643 if (!rs6000_speculate_indirect_jumps)
4644 warning (0, "%qs is deprecated and not recommended in any circumstances",
4645 "-mno-speculate-indirect-jumps");
4646
4647 return ret;
4648 }
4649
4650 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4651 define the target cpu type. */
4652
4653 static void
4654 rs6000_option_override (void)
4655 {
4656 (void) rs6000_option_override_internal (true);
4657 }
4658
4659 \f
4660 /* Implement targetm.vectorize.builtin_mask_for_load. */
4661 static tree
4662 rs6000_builtin_mask_for_load (void)
4663 {
4664 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
4665 if ((TARGET_ALTIVEC && !TARGET_VSX)
4666 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
4667 return altivec_builtin_mask_for_load;
4668 else
4669 return 0;
4670 }
4671
4672 /* Implement LOOP_ALIGN. */
4673 align_flags
4674 rs6000_loop_align (rtx label)
4675 {
4676 basic_block bb;
4677 int ninsns;
4678
4679 /* Don't override loop alignment if -falign-loops was specified. */
4680 if (!can_override_loop_align)
4681 return align_loops;
4682
4683 bb = BLOCK_FOR_INSN (label);
4684 ninsns = num_loop_insns(bb->loop_father);
4685
4686 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4687 if (ninsns > 4 && ninsns <= 8
4688 && (rs6000_tune == PROCESSOR_POWER4
4689 || rs6000_tune == PROCESSOR_POWER5
4690 || rs6000_tune == PROCESSOR_POWER6
4691 || rs6000_tune == PROCESSOR_POWER7
4692 || rs6000_tune == PROCESSOR_POWER8))
4693 return align_flags (5);
4694 else
4695 return align_loops;
4696 }
4697
4698 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4699 after applying N number of iterations. This routine does not determine
4700 how may iterations are required to reach desired alignment. */
4701
4702 static bool
4703 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4704 {
4705 if (is_packed)
4706 return false;
4707
4708 if (TARGET_32BIT)
4709 {
4710 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4711 return true;
4712
4713 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4714 return true;
4715
4716 return false;
4717 }
4718 else
4719 {
4720 if (TARGET_MACHO)
4721 return false;
4722
4723 /* Assuming that all other types are naturally aligned. CHECKME! */
4724 return true;
4725 }
4726 }
4727
4728 /* Return true if the vector misalignment factor is supported by the
4729 target. */
4730 static bool
4731 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4732 const_tree type,
4733 int misalignment,
4734 bool is_packed)
4735 {
4736 if (TARGET_VSX)
4737 {
4738 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4739 return true;
4740
4741 /* Return if movmisalign pattern is not supported for this mode. */
4742 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4743 return false;
4744
4745 if (misalignment == -1)
4746 {
4747 /* Misalignment factor is unknown at compile time but we know
4748 it's word aligned. */
4749 if (rs6000_vector_alignment_reachable (type, is_packed))
4750 {
4751 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4752
4753 if (element_size == 64 || element_size == 32)
4754 return true;
4755 }
4756
4757 return false;
4758 }
4759
4760 /* VSX supports word-aligned vector. */
4761 if (misalignment % 4 == 0)
4762 return true;
4763 }
4764 return false;
4765 }
4766
4767 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4768 static int
4769 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4770 tree vectype, int misalign)
4771 {
4772 unsigned elements;
4773 tree elem_type;
4774
4775 switch (type_of_cost)
4776 {
4777 case scalar_stmt:
4778 case scalar_store:
4779 case vector_stmt:
4780 case vector_store:
4781 case vec_to_scalar:
4782 case scalar_to_vec:
4783 case cond_branch_not_taken:
4784 return 1;
4785 case scalar_load:
4786 case vector_load:
4787 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4788 return 2;
4789
4790 case vec_perm:
4791 /* Power7 has only one permute unit, make it a bit expensive. */
4792 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
4793 return 3;
4794 else
4795 return 1;
4796
4797 case vec_promote_demote:
4798 /* Power7 has only one permute/pack unit, make it a bit expensive. */
4799 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
4800 return 4;
4801 else
4802 return 1;
4803
4804 case cond_branch_taken:
4805 return 3;
4806
4807 case unaligned_load:
4808 case vector_gather_load:
4809 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4810 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4811 return 2;
4812
4813 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4814 {
4815 elements = TYPE_VECTOR_SUBPARTS (vectype);
4816 if (elements == 2)
4817 /* Double word aligned. */
4818 return 4;
4819
4820 if (elements == 4)
4821 {
4822 switch (misalign)
4823 {
4824 case 8:
4825 /* Double word aligned. */
4826 return 4;
4827
4828 case -1:
4829 /* Unknown misalignment. */
4830 case 4:
4831 case 12:
4832 /* Word aligned. */
4833 return 33;
4834
4835 default:
4836 gcc_unreachable ();
4837 }
4838 }
4839 }
4840
4841 if (TARGET_ALTIVEC)
4842 /* Misaligned loads are not supported. */
4843 gcc_unreachable ();
4844
4845 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4846 return 4;
4847
4848 case unaligned_store:
4849 case vector_scatter_store:
4850 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4851 return 1;
4852
4853 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4854 {
4855 elements = TYPE_VECTOR_SUBPARTS (vectype);
4856 if (elements == 2)
4857 /* Double word aligned. */
4858 return 2;
4859
4860 if (elements == 4)
4861 {
4862 switch (misalign)
4863 {
4864 case 8:
4865 /* Double word aligned. */
4866 return 2;
4867
4868 case -1:
4869 /* Unknown misalignment. */
4870 case 4:
4871 case 12:
4872 /* Word aligned. */
4873 return 23;
4874
4875 default:
4876 gcc_unreachable ();
4877 }
4878 }
4879 }
4880
4881 if (TARGET_ALTIVEC)
4882 /* Misaligned stores are not supported. */
4883 gcc_unreachable ();
4884
4885 return 2;
4886
4887 case vec_construct:
4888 /* This is a rough approximation assuming non-constant elements
4889 constructed into a vector via element insertion. FIXME:
4890 vec_construct is not granular enough for uniformly good
4891 decisions. If the initialization is a splat, this is
4892 cheaper than we estimate. Improve this someday. */
4893 elem_type = TREE_TYPE (vectype);
4894 /* 32-bit vectors loaded into registers are stored as double
4895 precision, so we need 2 permutes, 2 converts, and 1 merge
4896 to construct a vector of short floats from them. */
4897 if (SCALAR_FLOAT_TYPE_P (elem_type)
4898 && TYPE_PRECISION (elem_type) == 32)
4899 return 5;
4900 /* On POWER9, integer vector types are built up in GPRs and then
4901 use a direct move (2 cycles). For POWER8 this is even worse,
4902 as we need two direct moves and a merge, and the direct moves
4903 are five cycles. */
4904 else if (INTEGRAL_TYPE_P (elem_type))
4905 {
4906 if (TARGET_P9_VECTOR)
4907 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
4908 else
4909 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
4910 }
4911 else
4912 /* V2DFmode doesn't need a direct move. */
4913 return 2;
4914
4915 default:
4916 gcc_unreachable ();
4917 }
4918 }
4919
4920 /* Implement targetm.vectorize.preferred_simd_mode. */
4921
4922 static machine_mode
4923 rs6000_preferred_simd_mode (scalar_mode mode)
4924 {
4925 opt_machine_mode vmode = mode_for_vector (mode, 16 / GET_MODE_SIZE (mode));
4926
4927 if (vmode.exists () && !VECTOR_MEM_NONE_P (vmode.require ()))
4928 return vmode.require ();
4929
4930 return word_mode;
4931 }
4932
4933 typedef struct _rs6000_cost_data
4934 {
4935 struct loop *loop_info;
4936 unsigned cost[3];
4937 } rs6000_cost_data;
4938
4939 /* Test for likely overcommitment of vector hardware resources. If a
4940 loop iteration is relatively large, and too large a percentage of
4941 instructions in the loop are vectorized, the cost model may not
4942 adequately reflect delays from unavailable vector resources.
4943 Penalize the loop body cost for this case. */
4944
4945 static void
4946 rs6000_density_test (rs6000_cost_data *data)
4947 {
4948 const int DENSITY_PCT_THRESHOLD = 85;
4949 const int DENSITY_SIZE_THRESHOLD = 70;
4950 const int DENSITY_PENALTY = 10;
4951 struct loop *loop = data->loop_info;
4952 basic_block *bbs = get_loop_body (loop);
4953 int nbbs = loop->num_nodes;
4954 loop_vec_info loop_vinfo = loop_vec_info_for_loop (data->loop_info);
4955 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
4956 int i, density_pct;
4957
4958 for (i = 0; i < nbbs; i++)
4959 {
4960 basic_block bb = bbs[i];
4961 gimple_stmt_iterator gsi;
4962
4963 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4964 {
4965 gimple *stmt = gsi_stmt (gsi);
4966 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
4967
4968 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4969 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
4970 not_vec_cost++;
4971 }
4972 }
4973
4974 free (bbs);
4975 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
4976
4977 if (density_pct > DENSITY_PCT_THRESHOLD
4978 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
4979 {
4980 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
4981 if (dump_enabled_p ())
4982 dump_printf_loc (MSG_NOTE, vect_location,
4983 "density %d%%, cost %d exceeds threshold, penalizing "
4984 "loop body cost by %d%%", density_pct,
4985 vec_cost + not_vec_cost, DENSITY_PENALTY);
4986 }
4987 }
4988
4989 /* Implement targetm.vectorize.init_cost. */
4990
4991 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
4992 instruction is needed by the vectorization. */
4993 static bool rs6000_vect_nonmem;
4994
4995 static void *
4996 rs6000_init_cost (struct loop *loop_info)
4997 {
4998 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
4999 data->loop_info = loop_info;
5000 data->cost[vect_prologue] = 0;
5001 data->cost[vect_body] = 0;
5002 data->cost[vect_epilogue] = 0;
5003 rs6000_vect_nonmem = false;
5004 return data;
5005 }
5006
5007 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5008 For some statement, we would like to further fine-grain tweak the cost on
5009 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5010 information on statement operation codes etc. One typical case here is
5011 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5012 for scalar cost, but it should be priced more whatever transformed to either
5013 compare + branch or compare + isel instructions. */
5014
5015 static unsigned
5016 adjust_vectorization_cost (enum vect_cost_for_stmt kind,
5017 struct _stmt_vec_info *stmt_info)
5018 {
5019 if (kind == scalar_stmt && stmt_info && stmt_info->stmt
5020 && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
5021 {
5022 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
5023 if (subcode == COND_EXPR)
5024 return 2;
5025 }
5026
5027 return 0;
5028 }
5029
5030 /* Implement targetm.vectorize.add_stmt_cost. */
5031
5032 static unsigned
5033 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5034 struct _stmt_vec_info *stmt_info, int misalign,
5035 enum vect_cost_model_location where)
5036 {
5037 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5038 unsigned retval = 0;
5039
5040 if (flag_vect_cost_model)
5041 {
5042 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5043 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5044 misalign);
5045 stmt_cost += adjust_vectorization_cost (kind, stmt_info);
5046 /* Statements in an inner loop relative to the loop being
5047 vectorized are weighted more heavily. The value here is
5048 arbitrary and could potentially be improved with analysis. */
5049 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5050 count *= 50; /* FIXME. */
5051
5052 retval = (unsigned) (count * stmt_cost);
5053 cost_data->cost[where] += retval;
5054
5055 /* Check whether we're doing something other than just a copy loop.
5056 Not all such loops may be profitably vectorized; see
5057 rs6000_finish_cost. */
5058 if ((kind == vec_to_scalar || kind == vec_perm
5059 || kind == vec_promote_demote || kind == vec_construct
5060 || kind == scalar_to_vec)
5061 || (where == vect_body && kind == vector_stmt))
5062 rs6000_vect_nonmem = true;
5063 }
5064
5065 return retval;
5066 }
5067
5068 /* Implement targetm.vectorize.finish_cost. */
5069
5070 static void
5071 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5072 unsigned *body_cost, unsigned *epilogue_cost)
5073 {
5074 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5075
5076 if (cost_data->loop_info)
5077 rs6000_density_test (cost_data);
5078
5079 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5080 that require versioning for any reason. The vectorization is at
5081 best a wash inside the loop, and the versioning checks make
5082 profitability highly unlikely and potentially quite harmful. */
5083 if (cost_data->loop_info)
5084 {
5085 loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
5086 if (!rs6000_vect_nonmem
5087 && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
5088 && LOOP_REQUIRES_VERSIONING (vec_info))
5089 cost_data->cost[vect_body] += 10000;
5090 }
5091
5092 *prologue_cost = cost_data->cost[vect_prologue];
5093 *body_cost = cost_data->cost[vect_body];
5094 *epilogue_cost = cost_data->cost[vect_epilogue];
5095 }
5096
5097 /* Implement targetm.vectorize.destroy_cost_data. */
5098
5099 static void
5100 rs6000_destroy_cost_data (void *data)
5101 {
5102 free (data);
5103 }
5104
5105 /* Implement targetm.loop_unroll_adjust. */
5106
5107 static unsigned
5108 rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
5109 {
5110 if (unroll_only_small_loops)
5111 {
5112 /* TODO: This is hardcoded to 10 right now. It can be refined, for
5113 example we may want to unroll very small loops more times (4 perhaps).
5114 We also should use a PARAM for this. */
5115 if (loop->ninsns <= 10)
5116 return MIN (2, nunroll);
5117 else
5118 return 0;
5119 }
5120
5121 return nunroll;
5122 }
5123
5124 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5125 library with vectorized intrinsics. */
5126
5127 static tree
5128 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5129 tree type_in)
5130 {
5131 char name[32];
5132 const char *suffix = NULL;
5133 tree fntype, new_fndecl, bdecl = NULL_TREE;
5134 int n_args = 1;
5135 const char *bname;
5136 machine_mode el_mode, in_mode;
5137 int n, in_n;
5138
5139 /* Libmass is suitable for unsafe math only as it does not correctly support
5140 parts of IEEE with the required precision such as denormals. Only support
5141 it if we have VSX to use the simd d2 or f4 functions.
5142 XXX: Add variable length support. */
5143 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5144 return NULL_TREE;
5145
5146 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5147 n = TYPE_VECTOR_SUBPARTS (type_out);
5148 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5149 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5150 if (el_mode != in_mode
5151 || n != in_n)
5152 return NULL_TREE;
5153
5154 switch (fn)
5155 {
5156 CASE_CFN_ATAN2:
5157 CASE_CFN_HYPOT:
5158 CASE_CFN_POW:
5159 n_args = 2;
5160 gcc_fallthrough ();
5161
5162 CASE_CFN_ACOS:
5163 CASE_CFN_ACOSH:
5164 CASE_CFN_ASIN:
5165 CASE_CFN_ASINH:
5166 CASE_CFN_ATAN:
5167 CASE_CFN_ATANH:
5168 CASE_CFN_CBRT:
5169 CASE_CFN_COS:
5170 CASE_CFN_COSH:
5171 CASE_CFN_ERF:
5172 CASE_CFN_ERFC:
5173 CASE_CFN_EXP2:
5174 CASE_CFN_EXP:
5175 CASE_CFN_EXPM1:
5176 CASE_CFN_LGAMMA:
5177 CASE_CFN_LOG10:
5178 CASE_CFN_LOG1P:
5179 CASE_CFN_LOG2:
5180 CASE_CFN_LOG:
5181 CASE_CFN_SIN:
5182 CASE_CFN_SINH:
5183 CASE_CFN_SQRT:
5184 CASE_CFN_TAN:
5185 CASE_CFN_TANH:
5186 if (el_mode == DFmode && n == 2)
5187 {
5188 bdecl = mathfn_built_in (double_type_node, fn);
5189 suffix = "d2"; /* pow -> powd2 */
5190 }
5191 else if (el_mode == SFmode && n == 4)
5192 {
5193 bdecl = mathfn_built_in (float_type_node, fn);
5194 suffix = "4"; /* powf -> powf4 */
5195 }
5196 else
5197 return NULL_TREE;
5198 if (!bdecl)
5199 return NULL_TREE;
5200 break;
5201
5202 default:
5203 return NULL_TREE;
5204 }
5205
5206 gcc_assert (suffix != NULL);
5207 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5208 if (!bname)
5209 return NULL_TREE;
5210
5211 strcpy (name, bname + strlen ("__builtin_"));
5212 strcat (name, suffix);
5213
5214 if (n_args == 1)
5215 fntype = build_function_type_list (type_out, type_in, NULL);
5216 else if (n_args == 2)
5217 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5218 else
5219 gcc_unreachable ();
5220
5221 /* Build a function declaration for the vectorized function. */
5222 new_fndecl = build_decl (BUILTINS_LOCATION,
5223 FUNCTION_DECL, get_identifier (name), fntype);
5224 TREE_PUBLIC (new_fndecl) = 1;
5225 DECL_EXTERNAL (new_fndecl) = 1;
5226 DECL_IS_NOVOPS (new_fndecl) = 1;
5227 TREE_READONLY (new_fndecl) = 1;
5228
5229 return new_fndecl;
5230 }
5231
5232 /* Returns a function decl for a vectorized version of the builtin function
5233 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5234 if it is not available. */
5235
5236 static tree
5237 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5238 tree type_in)
5239 {
5240 machine_mode in_mode, out_mode;
5241 int in_n, out_n;
5242
5243 if (TARGET_DEBUG_BUILTIN)
5244 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5245 combined_fn_name (combined_fn (fn)),
5246 GET_MODE_NAME (TYPE_MODE (type_out)),
5247 GET_MODE_NAME (TYPE_MODE (type_in)));
5248
5249 if (TREE_CODE (type_out) != VECTOR_TYPE
5250 || TREE_CODE (type_in) != VECTOR_TYPE)
5251 return NULL_TREE;
5252
5253 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5254 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5255 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5256 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5257
5258 switch (fn)
5259 {
5260 CASE_CFN_COPYSIGN:
5261 if (VECTOR_UNIT_VSX_P (V2DFmode)
5262 && out_mode == DFmode && out_n == 2
5263 && in_mode == DFmode && in_n == 2)
5264 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5265 if (VECTOR_UNIT_VSX_P (V4SFmode)
5266 && out_mode == SFmode && out_n == 4
5267 && in_mode == SFmode && in_n == 4)
5268 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5269 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5270 && out_mode == SFmode && out_n == 4
5271 && in_mode == SFmode && in_n == 4)
5272 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5273 break;
5274 CASE_CFN_CEIL:
5275 if (VECTOR_UNIT_VSX_P (V2DFmode)
5276 && out_mode == DFmode && out_n == 2
5277 && in_mode == DFmode && in_n == 2)
5278 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5279 if (VECTOR_UNIT_VSX_P (V4SFmode)
5280 && out_mode == SFmode && out_n == 4
5281 && in_mode == SFmode && in_n == 4)
5282 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5283 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5284 && out_mode == SFmode && out_n == 4
5285 && in_mode == SFmode && in_n == 4)
5286 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5287 break;
5288 CASE_CFN_FLOOR:
5289 if (VECTOR_UNIT_VSX_P (V2DFmode)
5290 && out_mode == DFmode && out_n == 2
5291 && in_mode == DFmode && in_n == 2)
5292 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5293 if (VECTOR_UNIT_VSX_P (V4SFmode)
5294 && out_mode == SFmode && out_n == 4
5295 && in_mode == SFmode && in_n == 4)
5296 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5297 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5298 && out_mode == SFmode && out_n == 4
5299 && in_mode == SFmode && in_n == 4)
5300 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5301 break;
5302 CASE_CFN_FMA:
5303 if (VECTOR_UNIT_VSX_P (V2DFmode)
5304 && out_mode == DFmode && out_n == 2
5305 && in_mode == DFmode && in_n == 2)
5306 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5307 if (VECTOR_UNIT_VSX_P (V4SFmode)
5308 && out_mode == SFmode && out_n == 4
5309 && in_mode == SFmode && in_n == 4)
5310 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5311 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5312 && out_mode == SFmode && out_n == 4
5313 && in_mode == SFmode && in_n == 4)
5314 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5315 break;
5316 CASE_CFN_TRUNC:
5317 if (VECTOR_UNIT_VSX_P (V2DFmode)
5318 && out_mode == DFmode && out_n == 2
5319 && in_mode == DFmode && in_n == 2)
5320 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5321 if (VECTOR_UNIT_VSX_P (V4SFmode)
5322 && out_mode == SFmode && out_n == 4
5323 && in_mode == SFmode && in_n == 4)
5324 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5325 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5326 && out_mode == SFmode && out_n == 4
5327 && in_mode == SFmode && in_n == 4)
5328 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5329 break;
5330 CASE_CFN_NEARBYINT:
5331 if (VECTOR_UNIT_VSX_P (V2DFmode)
5332 && flag_unsafe_math_optimizations
5333 && out_mode == DFmode && out_n == 2
5334 && in_mode == DFmode && in_n == 2)
5335 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5336 if (VECTOR_UNIT_VSX_P (V4SFmode)
5337 && flag_unsafe_math_optimizations
5338 && out_mode == SFmode && out_n == 4
5339 && in_mode == SFmode && in_n == 4)
5340 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5341 break;
5342 CASE_CFN_RINT:
5343 if (VECTOR_UNIT_VSX_P (V2DFmode)
5344 && !flag_trapping_math
5345 && out_mode == DFmode && out_n == 2
5346 && in_mode == DFmode && in_n == 2)
5347 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5348 if (VECTOR_UNIT_VSX_P (V4SFmode)
5349 && !flag_trapping_math
5350 && out_mode == SFmode && out_n == 4
5351 && in_mode == SFmode && in_n == 4)
5352 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5353 break;
5354 default:
5355 break;
5356 }
5357
5358 /* Generate calls to libmass if appropriate. */
5359 if (rs6000_veclib_handler)
5360 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5361
5362 return NULL_TREE;
5363 }
5364
5365 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5366
5367 static tree
5368 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5369 tree type_in)
5370 {
5371 machine_mode in_mode, out_mode;
5372 int in_n, out_n;
5373
5374 if (TARGET_DEBUG_BUILTIN)
5375 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5376 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5377 GET_MODE_NAME (TYPE_MODE (type_out)),
5378 GET_MODE_NAME (TYPE_MODE (type_in)));
5379
5380 if (TREE_CODE (type_out) != VECTOR_TYPE
5381 || TREE_CODE (type_in) != VECTOR_TYPE)
5382 return NULL_TREE;
5383
5384 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5385 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5386 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5387 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5388
5389 enum rs6000_builtins fn
5390 = (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
5391 switch (fn)
5392 {
5393 case RS6000_BUILTIN_RSQRTF:
5394 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5395 && out_mode == SFmode && out_n == 4
5396 && in_mode == SFmode && in_n == 4)
5397 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5398 break;
5399 case RS6000_BUILTIN_RSQRT:
5400 if (VECTOR_UNIT_VSX_P (V2DFmode)
5401 && out_mode == DFmode && out_n == 2
5402 && in_mode == DFmode && in_n == 2)
5403 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5404 break;
5405 case RS6000_BUILTIN_RECIPF:
5406 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5407 && out_mode == SFmode && out_n == 4
5408 && in_mode == SFmode && in_n == 4)
5409 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5410 break;
5411 case RS6000_BUILTIN_RECIP:
5412 if (VECTOR_UNIT_VSX_P (V2DFmode)
5413 && out_mode == DFmode && out_n == 2
5414 && in_mode == DFmode && in_n == 2)
5415 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5416 break;
5417 default:
5418 break;
5419 }
5420 return NULL_TREE;
5421 }
5422 \f
5423 /* Default CPU string for rs6000*_file_start functions. */
5424 static const char *rs6000_default_cpu;
5425
5426 #ifdef USING_ELFOS_H
5427 const char *rs6000_machine;
5428
5429 const char *
5430 rs6000_machine_from_flags (void)
5431 {
5432 HOST_WIDE_INT flags = rs6000_isa_flags;
5433
5434 /* Disable the flags that should never influence the .machine selection. */
5435 flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT);
5436
5437 if ((flags & (ISA_FUTURE_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER)) != 0)
5438 return "future";
5439 if ((flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0)
5440 return "power9";
5441 if ((flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0)
5442 return "power8";
5443 if ((flags & (ISA_2_6_MASKS_SERVER & ~ISA_2_5_MASKS_SERVER)) != 0)
5444 return "power7";
5445 if ((flags & (ISA_2_5_MASKS_SERVER & ~ISA_2_4_MASKS)) != 0)
5446 return "power6";
5447 if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0)
5448 return "power5";
5449 if ((flags & ISA_2_1_MASKS) != 0)
5450 return "power4";
5451 if ((flags & OPTION_MASK_POWERPC64) != 0)
5452 return "ppc64";
5453 return "ppc";
5454 }
5455
5456 void
5457 emit_asm_machine (void)
5458 {
5459 fprintf (asm_out_file, "\t.machine %s\n", rs6000_machine);
5460 }
5461 #endif
5462
5463 /* Do anything needed at the start of the asm file. */
5464
5465 static void
5466 rs6000_file_start (void)
5467 {
5468 char buffer[80];
5469 const char *start = buffer;
5470 FILE *file = asm_out_file;
5471
5472 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5473
5474 default_file_start ();
5475
5476 if (flag_verbose_asm)
5477 {
5478 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5479
5480 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5481 {
5482 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5483 start = "";
5484 }
5485
5486 if (global_options_set.x_rs6000_cpu_index)
5487 {
5488 fprintf (file, "%s -mcpu=%s", start,
5489 processor_target_table[rs6000_cpu_index].name);
5490 start = "";
5491 }
5492
5493 if (global_options_set.x_rs6000_tune_index)
5494 {
5495 fprintf (file, "%s -mtune=%s", start,
5496 processor_target_table[rs6000_tune_index].name);
5497 start = "";
5498 }
5499
5500 if (PPC405_ERRATUM77)
5501 {
5502 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5503 start = "";
5504 }
5505
5506 #ifdef USING_ELFOS_H
5507 switch (rs6000_sdata)
5508 {
5509 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5510 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5511 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5512 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5513 }
5514
5515 if (rs6000_sdata && g_switch_value)
5516 {
5517 fprintf (file, "%s -G %d", start,
5518 g_switch_value);
5519 start = "";
5520 }
5521 #endif
5522
5523 if (*start == '\0')
5524 putc ('\n', file);
5525 }
5526
5527 #ifdef USING_ELFOS_H
5528 rs6000_machine = rs6000_machine_from_flags ();
5529 emit_asm_machine ();
5530 #endif
5531
5532 if (DEFAULT_ABI == ABI_ELFv2)
5533 fprintf (file, "\t.abiversion 2\n");
5534 }
5535
5536 \f
5537 /* Return nonzero if this function is known to have a null epilogue. */
5538
5539 int
5540 direct_return (void)
5541 {
5542 if (reload_completed)
5543 {
5544 rs6000_stack_t *info = rs6000_stack_info ();
5545
5546 if (info->first_gp_reg_save == 32
5547 && info->first_fp_reg_save == 64
5548 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5549 && ! info->lr_save_p
5550 && ! info->cr_save_p
5551 && info->vrsave_size == 0
5552 && ! info->push_p)
5553 return 1;
5554 }
5555
5556 return 0;
5557 }
5558
5559 /* Helper for num_insns_constant. Calculate number of instructions to
5560 load VALUE to a single gpr using combinations of addi, addis, ori,
5561 oris and sldi instructions. */
5562
5563 static int
5564 num_insns_constant_gpr (HOST_WIDE_INT value)
5565 {
5566 /* signed constant loadable with addi */
5567 if (SIGNED_INTEGER_16BIT_P (value))
5568 return 1;
5569
5570 /* constant loadable with addis */
5571 else if ((value & 0xffff) == 0
5572 && (value >> 31 == -1 || value >> 31 == 0))
5573 return 1;
5574
5575 /* PADDI can support up to 34 bit signed integers. */
5576 else if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (value))
5577 return 1;
5578
5579 else if (TARGET_POWERPC64)
5580 {
5581 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5582 HOST_WIDE_INT high = value >> 31;
5583
5584 if (high == 0 || high == -1)
5585 return 2;
5586
5587 high >>= 1;
5588
5589 if (low == 0)
5590 return num_insns_constant_gpr (high) + 1;
5591 else if (high == 0)
5592 return num_insns_constant_gpr (low) + 1;
5593 else
5594 return (num_insns_constant_gpr (high)
5595 + num_insns_constant_gpr (low) + 1);
5596 }
5597
5598 else
5599 return 2;
5600 }
5601
5602 /* Helper for num_insns_constant. Allow constants formed by the
5603 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
5604 and handle modes that require multiple gprs. */
5605
5606 static int
5607 num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode)
5608 {
5609 int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5610 int total = 0;
5611 while (nregs-- > 0)
5612 {
5613 HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD);
5614 int insns = num_insns_constant_gpr (low);
5615 if (insns > 2
5616 /* We won't get more than 2 from num_insns_constant_gpr
5617 except when TARGET_POWERPC64 and mode is DImode or
5618 wider, so the register mode must be DImode. */
5619 && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
5620 insns = 2;
5621 total += insns;
5622 /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
5623 it all at once would be UB. */
5624 value >>= (BITS_PER_WORD - 1);
5625 value >>= 1;
5626 }
5627 return total;
5628 }
5629
5630 /* Return the number of instructions it takes to form a constant in as
5631 many gprs are needed for MODE. */
5632
5633 int
5634 num_insns_constant (rtx op, machine_mode mode)
5635 {
5636 HOST_WIDE_INT val;
5637
5638 switch (GET_CODE (op))
5639 {
5640 case CONST_INT:
5641 val = INTVAL (op);
5642 break;
5643
5644 case CONST_WIDE_INT:
5645 {
5646 int insns = 0;
5647 for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5648 insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i),
5649 DImode);
5650 return insns;
5651 }
5652
5653 case CONST_DOUBLE:
5654 {
5655 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
5656
5657 if (mode == SFmode || mode == SDmode)
5658 {
5659 long l;
5660
5661 if (mode == SDmode)
5662 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l);
5663 else
5664 REAL_VALUE_TO_TARGET_SINGLE (*rv, l);
5665 /* See the first define_split in rs6000.md handling a
5666 const_double_operand. */
5667 val = l;
5668 mode = SImode;
5669 }
5670 else if (mode == DFmode || mode == DDmode)
5671 {
5672 long l[2];
5673
5674 if (mode == DDmode)
5675 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l);
5676 else
5677 REAL_VALUE_TO_TARGET_DOUBLE (*rv, l);
5678
5679 /* See the second (32-bit) and third (64-bit) define_split
5680 in rs6000.md handling a const_double_operand. */
5681 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32;
5682 val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL;
5683 mode = DImode;
5684 }
5685 else if (mode == TFmode || mode == TDmode
5686 || mode == KFmode || mode == IFmode)
5687 {
5688 long l[4];
5689 int insns;
5690
5691 if (mode == TDmode)
5692 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l);
5693 else
5694 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l);
5695
5696 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32;
5697 val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL;
5698 insns = num_insns_constant_multi (val, DImode);
5699 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32;
5700 val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL;
5701 insns += num_insns_constant_multi (val, DImode);
5702 return insns;
5703 }
5704 else
5705 gcc_unreachable ();
5706 }
5707 break;
5708
5709 default:
5710 gcc_unreachable ();
5711 }
5712
5713 return num_insns_constant_multi (val, mode);
5714 }
5715
5716 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5717 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5718 corresponding element of the vector, but for V4SFmode, the
5719 corresponding "float" is interpreted as an SImode integer. */
5720
5721 HOST_WIDE_INT
5722 const_vector_elt_as_int (rtx op, unsigned int elt)
5723 {
5724 rtx tmp;
5725
5726 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5727 gcc_assert (GET_MODE (op) != V2DImode
5728 && GET_MODE (op) != V2DFmode);
5729
5730 tmp = CONST_VECTOR_ELT (op, elt);
5731 if (GET_MODE (op) == V4SFmode)
5732 tmp = gen_lowpart (SImode, tmp);
5733 return INTVAL (tmp);
5734 }
5735
5736 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5737 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5738 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5739 all items are set to the same value and contain COPIES replicas of the
5740 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5741 operand and the others are set to the value of the operand's msb. */
5742
5743 static bool
5744 vspltis_constant (rtx op, unsigned step, unsigned copies)
5745 {
5746 machine_mode mode = GET_MODE (op);
5747 machine_mode inner = GET_MODE_INNER (mode);
5748
5749 unsigned i;
5750 unsigned nunits;
5751 unsigned bitsize;
5752 unsigned mask;
5753
5754 HOST_WIDE_INT val;
5755 HOST_WIDE_INT splat_val;
5756 HOST_WIDE_INT msb_val;
5757
5758 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
5759 return false;
5760
5761 nunits = GET_MODE_NUNITS (mode);
5762 bitsize = GET_MODE_BITSIZE (inner);
5763 mask = GET_MODE_MASK (inner);
5764
5765 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5766 splat_val = val;
5767 msb_val = val >= 0 ? 0 : -1;
5768
5769 /* Construct the value to be splatted, if possible. If not, return 0. */
5770 for (i = 2; i <= copies; i *= 2)
5771 {
5772 HOST_WIDE_INT small_val;
5773 bitsize /= 2;
5774 small_val = splat_val >> bitsize;
5775 mask >>= bitsize;
5776 if (splat_val != ((HOST_WIDE_INT)
5777 ((unsigned HOST_WIDE_INT) small_val << bitsize)
5778 | (small_val & mask)))
5779 return false;
5780 splat_val = small_val;
5781 }
5782
5783 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
5784 if (EASY_VECTOR_15 (splat_val))
5785 ;
5786
5787 /* Also check if we can splat, and then add the result to itself. Do so if
5788 the value is positive, of if the splat instruction is using OP's mode;
5789 for splat_val < 0, the splat and the add should use the same mode. */
5790 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
5791 && (splat_val >= 0 || (step == 1 && copies == 1)))
5792 ;
5793
5794 /* Also check if are loading up the most significant bit which can be done by
5795 loading up -1 and shifting the value left by -1. */
5796 else if (EASY_VECTOR_MSB (splat_val, inner))
5797 ;
5798
5799 else
5800 return false;
5801
5802 /* Check if VAL is present in every STEP-th element, and the
5803 other elements are filled with its most significant bit. */
5804 for (i = 1; i < nunits; ++i)
5805 {
5806 HOST_WIDE_INT desired_val;
5807 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
5808 if ((i & (step - 1)) == 0)
5809 desired_val = val;
5810 else
5811 desired_val = msb_val;
5812
5813 if (desired_val != const_vector_elt_as_int (op, elt))
5814 return false;
5815 }
5816
5817 return true;
5818 }
5819
5820 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
5821 instruction, filling in the bottom elements with 0 or -1.
5822
5823 Return 0 if the constant cannot be generated with VSLDOI. Return positive
5824 for the number of zeroes to shift in, or negative for the number of 0xff
5825 bytes to shift in.
5826
5827 OP is a CONST_VECTOR. */
5828
5829 int
5830 vspltis_shifted (rtx op)
5831 {
5832 machine_mode mode = GET_MODE (op);
5833 machine_mode inner = GET_MODE_INNER (mode);
5834
5835 unsigned i, j;
5836 unsigned nunits;
5837 unsigned mask;
5838
5839 HOST_WIDE_INT val;
5840
5841 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
5842 return false;
5843
5844 /* We need to create pseudo registers to do the shift, so don't recognize
5845 shift vector constants after reload. */
5846 if (!can_create_pseudo_p ())
5847 return false;
5848
5849 nunits = GET_MODE_NUNITS (mode);
5850 mask = GET_MODE_MASK (inner);
5851
5852 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
5853
5854 /* Check if the value can really be the operand of a vspltis[bhw]. */
5855 if (EASY_VECTOR_15 (val))
5856 ;
5857
5858 /* Also check if we are loading up the most significant bit which can be done
5859 by loading up -1 and shifting the value left by -1. */
5860 else if (EASY_VECTOR_MSB (val, inner))
5861 ;
5862
5863 else
5864 return 0;
5865
5866 /* Check if VAL is present in every STEP-th element until we find elements
5867 that are 0 or all 1 bits. */
5868 for (i = 1; i < nunits; ++i)
5869 {
5870 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
5871 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
5872
5873 /* If the value isn't the splat value, check for the remaining elements
5874 being 0/-1. */
5875 if (val != elt_val)
5876 {
5877 if (elt_val == 0)
5878 {
5879 for (j = i+1; j < nunits; ++j)
5880 {
5881 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
5882 if (const_vector_elt_as_int (op, elt2) != 0)
5883 return 0;
5884 }
5885
5886 return (nunits - i) * GET_MODE_SIZE (inner);
5887 }
5888
5889 else if ((elt_val & mask) == mask)
5890 {
5891 for (j = i+1; j < nunits; ++j)
5892 {
5893 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
5894 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
5895 return 0;
5896 }
5897
5898 return -((nunits - i) * GET_MODE_SIZE (inner));
5899 }
5900
5901 else
5902 return 0;
5903 }
5904 }
5905
5906 /* If all elements are equal, we don't need to do VLSDOI. */
5907 return 0;
5908 }
5909
5910
5911 /* Return true if OP is of the given MODE and can be synthesized
5912 with a vspltisb, vspltish or vspltisw. */
5913
5914 bool
5915 easy_altivec_constant (rtx op, machine_mode mode)
5916 {
5917 unsigned step, copies;
5918
5919 if (mode == VOIDmode)
5920 mode = GET_MODE (op);
5921 else if (mode != GET_MODE (op))
5922 return false;
5923
5924 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
5925 constants. */
5926 if (mode == V2DFmode)
5927 return zero_constant (op, mode);
5928
5929 else if (mode == V2DImode)
5930 {
5931 if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
5932 || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
5933 return false;
5934
5935 if (zero_constant (op, mode))
5936 return true;
5937
5938 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
5939 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
5940 return true;
5941
5942 return false;
5943 }
5944
5945 /* V1TImode is a special container for TImode. Ignore for now. */
5946 else if (mode == V1TImode)
5947 return false;
5948
5949 /* Start with a vspltisw. */
5950 step = GET_MODE_NUNITS (mode) / 4;
5951 copies = 1;
5952
5953 if (vspltis_constant (op, step, copies))
5954 return true;
5955
5956 /* Then try with a vspltish. */
5957 if (step == 1)
5958 copies <<= 1;
5959 else
5960 step >>= 1;
5961
5962 if (vspltis_constant (op, step, copies))
5963 return true;
5964
5965 /* And finally a vspltisb. */
5966 if (step == 1)
5967 copies <<= 1;
5968 else
5969 step >>= 1;
5970
5971 if (vspltis_constant (op, step, copies))
5972 return true;
5973
5974 if (vspltis_shifted (op) != 0)
5975 return true;
5976
5977 return false;
5978 }
5979
5980 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
5981 result is OP. Abort if it is not possible. */
5982
5983 rtx
5984 gen_easy_altivec_constant (rtx op)
5985 {
5986 machine_mode mode = GET_MODE (op);
5987 int nunits = GET_MODE_NUNITS (mode);
5988 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5989 unsigned step = nunits / 4;
5990 unsigned copies = 1;
5991
5992 /* Start with a vspltisw. */
5993 if (vspltis_constant (op, step, copies))
5994 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
5995
5996 /* Then try with a vspltish. */
5997 if (step == 1)
5998 copies <<= 1;
5999 else
6000 step >>= 1;
6001
6002 if (vspltis_constant (op, step, copies))
6003 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6004
6005 /* And finally a vspltisb. */
6006 if (step == 1)
6007 copies <<= 1;
6008 else
6009 step >>= 1;
6010
6011 if (vspltis_constant (op, step, copies))
6012 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6013
6014 gcc_unreachable ();
6015 }
6016
6017 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6018 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6019
6020 Return the number of instructions needed (1 or 2) into the address pointed
6021 via NUM_INSNS_PTR.
6022
6023 Return the constant that is being split via CONSTANT_PTR. */
6024
6025 bool
6026 xxspltib_constant_p (rtx op,
6027 machine_mode mode,
6028 int *num_insns_ptr,
6029 int *constant_ptr)
6030 {
6031 size_t nunits = GET_MODE_NUNITS (mode);
6032 size_t i;
6033 HOST_WIDE_INT value;
6034 rtx element;
6035
6036 /* Set the returned values to out of bound values. */
6037 *num_insns_ptr = -1;
6038 *constant_ptr = 256;
6039
6040 if (!TARGET_P9_VECTOR)
6041 return false;
6042
6043 if (mode == VOIDmode)
6044 mode = GET_MODE (op);
6045
6046 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6047 return false;
6048
6049 /* Handle (vec_duplicate <constant>). */
6050 if (GET_CODE (op) == VEC_DUPLICATE)
6051 {
6052 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6053 && mode != V2DImode)
6054 return false;
6055
6056 element = XEXP (op, 0);
6057 if (!CONST_INT_P (element))
6058 return false;
6059
6060 value = INTVAL (element);
6061 if (!IN_RANGE (value, -128, 127))
6062 return false;
6063 }
6064
6065 /* Handle (const_vector [...]). */
6066 else if (GET_CODE (op) == CONST_VECTOR)
6067 {
6068 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6069 && mode != V2DImode)
6070 return false;
6071
6072 element = CONST_VECTOR_ELT (op, 0);
6073 if (!CONST_INT_P (element))
6074 return false;
6075
6076 value = INTVAL (element);
6077 if (!IN_RANGE (value, -128, 127))
6078 return false;
6079
6080 for (i = 1; i < nunits; i++)
6081 {
6082 element = CONST_VECTOR_ELT (op, i);
6083 if (!CONST_INT_P (element))
6084 return false;
6085
6086 if (value != INTVAL (element))
6087 return false;
6088 }
6089 }
6090
6091 /* Handle integer constants being loaded into the upper part of the VSX
6092 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6093 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6094 else if (CONST_INT_P (op))
6095 {
6096 if (!SCALAR_INT_MODE_P (mode))
6097 return false;
6098
6099 value = INTVAL (op);
6100 if (!IN_RANGE (value, -128, 127))
6101 return false;
6102
6103 if (!IN_RANGE (value, -1, 0))
6104 {
6105 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6106 return false;
6107
6108 if (EASY_VECTOR_15 (value))
6109 return false;
6110 }
6111 }
6112
6113 else
6114 return false;
6115
6116 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6117 sign extend. Special case 0/-1 to allow getting any VSX register instead
6118 of an Altivec register. */
6119 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6120 && EASY_VECTOR_15 (value))
6121 return false;
6122
6123 /* Return # of instructions and the constant byte for XXSPLTIB. */
6124 if (mode == V16QImode)
6125 *num_insns_ptr = 1;
6126
6127 else if (IN_RANGE (value, -1, 0))
6128 *num_insns_ptr = 1;
6129
6130 else
6131 *num_insns_ptr = 2;
6132
6133 *constant_ptr = (int) value;
6134 return true;
6135 }
6136
6137 const char *
6138 output_vec_const_move (rtx *operands)
6139 {
6140 int shift;
6141 machine_mode mode;
6142 rtx dest, vec;
6143
6144 dest = operands[0];
6145 vec = operands[1];
6146 mode = GET_MODE (dest);
6147
6148 if (TARGET_VSX)
6149 {
6150 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6151 int xxspltib_value = 256;
6152 int num_insns = -1;
6153
6154 if (zero_constant (vec, mode))
6155 {
6156 if (TARGET_P9_VECTOR)
6157 return "xxspltib %x0,0";
6158
6159 else if (dest_vmx_p)
6160 return "vspltisw %0,0";
6161
6162 else
6163 return "xxlxor %x0,%x0,%x0";
6164 }
6165
6166 if (all_ones_constant (vec, mode))
6167 {
6168 if (TARGET_P9_VECTOR)
6169 return "xxspltib %x0,255";
6170
6171 else if (dest_vmx_p)
6172 return "vspltisw %0,-1";
6173
6174 else if (TARGET_P8_VECTOR)
6175 return "xxlorc %x0,%x0,%x0";
6176
6177 else
6178 gcc_unreachable ();
6179 }
6180
6181 if (TARGET_P9_VECTOR
6182 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6183 {
6184 if (num_insns == 1)
6185 {
6186 operands[2] = GEN_INT (xxspltib_value & 0xff);
6187 return "xxspltib %x0,%2";
6188 }
6189
6190 return "#";
6191 }
6192 }
6193
6194 if (TARGET_ALTIVEC)
6195 {
6196 rtx splat_vec;
6197
6198 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6199 if (zero_constant (vec, mode))
6200 return "vspltisw %0,0";
6201
6202 if (all_ones_constant (vec, mode))
6203 return "vspltisw %0,-1";
6204
6205 /* Do we need to construct a value using VSLDOI? */
6206 shift = vspltis_shifted (vec);
6207 if (shift != 0)
6208 return "#";
6209
6210 splat_vec = gen_easy_altivec_constant (vec);
6211 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6212 operands[1] = XEXP (splat_vec, 0);
6213 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6214 return "#";
6215
6216 switch (GET_MODE (splat_vec))
6217 {
6218 case E_V4SImode:
6219 return "vspltisw %0,%1";
6220
6221 case E_V8HImode:
6222 return "vspltish %0,%1";
6223
6224 case E_V16QImode:
6225 return "vspltisb %0,%1";
6226
6227 default:
6228 gcc_unreachable ();
6229 }
6230 }
6231
6232 gcc_unreachable ();
6233 }
6234
6235 /* Initialize vector TARGET to VALS. */
6236
6237 void
6238 rs6000_expand_vector_init (rtx target, rtx vals)
6239 {
6240 machine_mode mode = GET_MODE (target);
6241 machine_mode inner_mode = GET_MODE_INNER (mode);
6242 int n_elts = GET_MODE_NUNITS (mode);
6243 int n_var = 0, one_var = -1;
6244 bool all_same = true, all_const_zero = true;
6245 rtx x, mem;
6246 int i;
6247
6248 for (i = 0; i < n_elts; ++i)
6249 {
6250 x = XVECEXP (vals, 0, i);
6251 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6252 ++n_var, one_var = i;
6253 else if (x != CONST0_RTX (inner_mode))
6254 all_const_zero = false;
6255
6256 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6257 all_same = false;
6258 }
6259
6260 if (n_var == 0)
6261 {
6262 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6263 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6264 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6265 {
6266 /* Zero register. */
6267 emit_move_insn (target, CONST0_RTX (mode));
6268 return;
6269 }
6270 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6271 {
6272 /* Splat immediate. */
6273 emit_insn (gen_rtx_SET (target, const_vec));
6274 return;
6275 }
6276 else
6277 {
6278 /* Load from constant pool. */
6279 emit_move_insn (target, const_vec);
6280 return;
6281 }
6282 }
6283
6284 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6285 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6286 {
6287 rtx op[2];
6288 size_t i;
6289 size_t num_elements = all_same ? 1 : 2;
6290 for (i = 0; i < num_elements; i++)
6291 {
6292 op[i] = XVECEXP (vals, 0, i);
6293 /* Just in case there is a SUBREG with a smaller mode, do a
6294 conversion. */
6295 if (GET_MODE (op[i]) != inner_mode)
6296 {
6297 rtx tmp = gen_reg_rtx (inner_mode);
6298 convert_move (tmp, op[i], 0);
6299 op[i] = tmp;
6300 }
6301 /* Allow load with splat double word. */
6302 else if (MEM_P (op[i]))
6303 {
6304 if (!all_same)
6305 op[i] = force_reg (inner_mode, op[i]);
6306 }
6307 else if (!REG_P (op[i]))
6308 op[i] = force_reg (inner_mode, op[i]);
6309 }
6310
6311 if (all_same)
6312 {
6313 if (mode == V2DFmode)
6314 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6315 else
6316 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6317 }
6318 else
6319 {
6320 if (mode == V2DFmode)
6321 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6322 else
6323 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6324 }
6325 return;
6326 }
6327
6328 /* Special case initializing vector int if we are on 64-bit systems with
6329 direct move or we have the ISA 3.0 instructions. */
6330 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6331 && TARGET_DIRECT_MOVE_64BIT)
6332 {
6333 if (all_same)
6334 {
6335 rtx element0 = XVECEXP (vals, 0, 0);
6336 if (MEM_P (element0))
6337 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6338 else
6339 element0 = force_reg (SImode, element0);
6340
6341 if (TARGET_P9_VECTOR)
6342 emit_insn (gen_vsx_splat_v4si (target, element0));
6343 else
6344 {
6345 rtx tmp = gen_reg_rtx (DImode);
6346 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6347 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6348 }
6349 return;
6350 }
6351 else
6352 {
6353 rtx elements[4];
6354 size_t i;
6355
6356 for (i = 0; i < 4; i++)
6357 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
6358
6359 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6360 elements[2], elements[3]));
6361 return;
6362 }
6363 }
6364
6365 /* With single precision floating point on VSX, know that internally single
6366 precision is actually represented as a double, and either make 2 V2DF
6367 vectors, and convert these vectors to single precision, or do one
6368 conversion, and splat the result to the other elements. */
6369 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6370 {
6371 if (all_same)
6372 {
6373 rtx element0 = XVECEXP (vals, 0, 0);
6374
6375 if (TARGET_P9_VECTOR)
6376 {
6377 if (MEM_P (element0))
6378 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6379
6380 emit_insn (gen_vsx_splat_v4sf (target, element0));
6381 }
6382
6383 else
6384 {
6385 rtx freg = gen_reg_rtx (V4SFmode);
6386 rtx sreg = force_reg (SFmode, element0);
6387 rtx cvt = (TARGET_XSCVDPSPN
6388 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6389 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6390
6391 emit_insn (cvt);
6392 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6393 const0_rtx));
6394 }
6395 }
6396 else
6397 {
6398 rtx dbl_even = gen_reg_rtx (V2DFmode);
6399 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6400 rtx flt_even = gen_reg_rtx (V4SFmode);
6401 rtx flt_odd = gen_reg_rtx (V4SFmode);
6402 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6403 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6404 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6405 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6406
6407 /* Use VMRGEW if we can instead of doing a permute. */
6408 if (TARGET_P8_VECTOR)
6409 {
6410 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
6411 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
6412 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6413 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6414 if (BYTES_BIG_ENDIAN)
6415 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
6416 else
6417 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
6418 }
6419 else
6420 {
6421 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6422 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6423 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6424 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6425 rs6000_expand_extract_even (target, flt_even, flt_odd);
6426 }
6427 }
6428 return;
6429 }
6430
6431 /* Special case initializing vector short/char that are splats if we are on
6432 64-bit systems with direct move. */
6433 if (all_same && TARGET_DIRECT_MOVE_64BIT
6434 && (mode == V16QImode || mode == V8HImode))
6435 {
6436 rtx op0 = XVECEXP (vals, 0, 0);
6437 rtx di_tmp = gen_reg_rtx (DImode);
6438
6439 if (!REG_P (op0))
6440 op0 = force_reg (GET_MODE_INNER (mode), op0);
6441
6442 if (mode == V16QImode)
6443 {
6444 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
6445 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
6446 return;
6447 }
6448
6449 if (mode == V8HImode)
6450 {
6451 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
6452 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
6453 return;
6454 }
6455 }
6456
6457 /* Store value to stack temp. Load vector element. Splat. However, splat
6458 of 64-bit items is not supported on Altivec. */
6459 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6460 {
6461 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6462 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6463 XVECEXP (vals, 0, 0));
6464 x = gen_rtx_UNSPEC (VOIDmode,
6465 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6466 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6467 gen_rtvec (2,
6468 gen_rtx_SET (target, mem),
6469 x)));
6470 x = gen_rtx_VEC_SELECT (inner_mode, target,
6471 gen_rtx_PARALLEL (VOIDmode,
6472 gen_rtvec (1, const0_rtx)));
6473 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6474 return;
6475 }
6476
6477 /* One field is non-constant. Load constant then overwrite
6478 varying field. */
6479 if (n_var == 1)
6480 {
6481 rtx copy = copy_rtx (vals);
6482
6483 /* Load constant part of vector, substitute neighboring value for
6484 varying element. */
6485 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6486 rs6000_expand_vector_init (target, copy);
6487
6488 /* Insert variable. */
6489 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
6490 return;
6491 }
6492
6493 /* Construct the vector in memory one field at a time
6494 and load the whole vector. */
6495 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6496 for (i = 0; i < n_elts; i++)
6497 emit_move_insn (adjust_address_nv (mem, inner_mode,
6498 i * GET_MODE_SIZE (inner_mode)),
6499 XVECEXP (vals, 0, i));
6500 emit_move_insn (target, mem);
6501 }
6502
6503 /* Set field ELT of TARGET to VAL. */
6504
6505 void
6506 rs6000_expand_vector_set (rtx target, rtx val, int elt)
6507 {
6508 machine_mode mode = GET_MODE (target);
6509 machine_mode inner_mode = GET_MODE_INNER (mode);
6510 rtx reg = gen_reg_rtx (mode);
6511 rtx mask, mem, x;
6512 int width = GET_MODE_SIZE (inner_mode);
6513 int i;
6514
6515 val = force_reg (GET_MODE (val), val);
6516
6517 if (VECTOR_MEM_VSX_P (mode))
6518 {
6519 rtx insn = NULL_RTX;
6520 rtx elt_rtx = GEN_INT (elt);
6521
6522 if (mode == V2DFmode)
6523 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
6524
6525 else if (mode == V2DImode)
6526 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
6527
6528 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
6529 {
6530 if (mode == V4SImode)
6531 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
6532 else if (mode == V8HImode)
6533 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
6534 else if (mode == V16QImode)
6535 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
6536 else if (mode == V4SFmode)
6537 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
6538 }
6539
6540 if (insn)
6541 {
6542 emit_insn (insn);
6543 return;
6544 }
6545 }
6546
6547 /* Simplify setting single element vectors like V1TImode. */
6548 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
6549 {
6550 emit_move_insn (target, gen_lowpart (mode, val));
6551 return;
6552 }
6553
6554 /* Load single variable value. */
6555 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6556 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
6557 x = gen_rtx_UNSPEC (VOIDmode,
6558 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6559 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6560 gen_rtvec (2,
6561 gen_rtx_SET (reg, mem),
6562 x)));
6563
6564 /* Linear sequence. */
6565 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
6566 for (i = 0; i < 16; ++i)
6567 XVECEXP (mask, 0, i) = GEN_INT (i);
6568
6569 /* Set permute mask to insert element into target. */
6570 for (i = 0; i < width; ++i)
6571 XVECEXP (mask, 0, elt*width + i)
6572 = GEN_INT (i + 0x10);
6573 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
6574
6575 if (BYTES_BIG_ENDIAN)
6576 x = gen_rtx_UNSPEC (mode,
6577 gen_rtvec (3, target, reg,
6578 force_reg (V16QImode, x)),
6579 UNSPEC_VPERM);
6580 else
6581 {
6582 if (TARGET_P9_VECTOR)
6583 x = gen_rtx_UNSPEC (mode,
6584 gen_rtvec (3, reg, target,
6585 force_reg (V16QImode, x)),
6586 UNSPEC_VPERMR);
6587 else
6588 {
6589 /* Invert selector. We prefer to generate VNAND on P8 so
6590 that future fusion opportunities can kick in, but must
6591 generate VNOR elsewhere. */
6592 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
6593 rtx iorx = (TARGET_P8_VECTOR
6594 ? gen_rtx_IOR (V16QImode, notx, notx)
6595 : gen_rtx_AND (V16QImode, notx, notx));
6596 rtx tmp = gen_reg_rtx (V16QImode);
6597 emit_insn (gen_rtx_SET (tmp, iorx));
6598
6599 /* Permute with operands reversed and adjusted selector. */
6600 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
6601 UNSPEC_VPERM);
6602 }
6603 }
6604
6605 emit_insn (gen_rtx_SET (target, x));
6606 }
6607
6608 /* Extract field ELT from VEC into TARGET. */
6609
6610 void
6611 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
6612 {
6613 machine_mode mode = GET_MODE (vec);
6614 machine_mode inner_mode = GET_MODE_INNER (mode);
6615 rtx mem;
6616
6617 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
6618 {
6619 switch (mode)
6620 {
6621 default:
6622 break;
6623 case E_V1TImode:
6624 emit_move_insn (target, gen_lowpart (TImode, vec));
6625 break;
6626 case E_V2DFmode:
6627 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
6628 return;
6629 case E_V2DImode:
6630 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
6631 return;
6632 case E_V4SFmode:
6633 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
6634 return;
6635 case E_V16QImode:
6636 if (TARGET_DIRECT_MOVE_64BIT)
6637 {
6638 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
6639 return;
6640 }
6641 else
6642 break;
6643 case E_V8HImode:
6644 if (TARGET_DIRECT_MOVE_64BIT)
6645 {
6646 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
6647 return;
6648 }
6649 else
6650 break;
6651 case E_V4SImode:
6652 if (TARGET_DIRECT_MOVE_64BIT)
6653 {
6654 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
6655 return;
6656 }
6657 break;
6658 }
6659 }
6660 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
6661 && TARGET_DIRECT_MOVE_64BIT)
6662 {
6663 if (GET_MODE (elt) != DImode)
6664 {
6665 rtx tmp = gen_reg_rtx (DImode);
6666 convert_move (tmp, elt, 0);
6667 elt = tmp;
6668 }
6669 else if (!REG_P (elt))
6670 elt = force_reg (DImode, elt);
6671
6672 switch (mode)
6673 {
6674 case E_V1TImode:
6675 emit_move_insn (target, gen_lowpart (TImode, vec));
6676 return;
6677
6678 case E_V2DFmode:
6679 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
6680 return;
6681
6682 case E_V2DImode:
6683 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
6684 return;
6685
6686 case E_V4SFmode:
6687 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
6688 return;
6689
6690 case E_V4SImode:
6691 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
6692 return;
6693
6694 case E_V8HImode:
6695 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
6696 return;
6697
6698 case E_V16QImode:
6699 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
6700 return;
6701
6702 default:
6703 gcc_unreachable ();
6704 }
6705 }
6706
6707 /* Allocate mode-sized buffer. */
6708 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6709
6710 emit_move_insn (mem, vec);
6711 if (CONST_INT_P (elt))
6712 {
6713 int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode);
6714
6715 /* Add offset to field within buffer matching vector element. */
6716 mem = adjust_address_nv (mem, inner_mode,
6717 modulo_elt * GET_MODE_SIZE (inner_mode));
6718 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
6719 }
6720 else
6721 {
6722 unsigned int ele_size = GET_MODE_SIZE (inner_mode);
6723 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
6724 rtx new_addr = gen_reg_rtx (Pmode);
6725
6726 elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
6727 if (ele_size > 1)
6728 elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
6729 new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
6730 new_addr = change_address (mem, inner_mode, new_addr);
6731 emit_move_insn (target, new_addr);
6732 }
6733 }
6734
6735 /* Return the offset within a memory object (MEM) of a vector type to a given
6736 element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If
6737 the element is constant, we return a constant integer.
6738
6739 Otherwise, we use a base register temporary to calculate the offset after
6740 masking it to fit within the bounds of the vector and scaling it. The
6741 masking is required by the 64-bit ELF version 2 ABI for the vec_extract
6742 built-in function. */
6743
6744 static rtx
6745 get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
6746 {
6747 if (CONST_INT_P (element))
6748 return GEN_INT (INTVAL (element) * scalar_size);
6749
6750 /* All insns should use the 'Q' constraint (address is a single register) if
6751 the element number is not a constant. */
6752 gcc_assert (satisfies_constraint_Q (mem));
6753
6754 /* Mask the element to make sure the element number is between 0 and the
6755 maximum number of elements - 1 so that we don't generate an address
6756 outside the vector. */
6757 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (GET_MODE (mem)) - 1);
6758 rtx and_op = gen_rtx_AND (Pmode, element, num_ele_m1);
6759 emit_insn (gen_rtx_SET (base_tmp, and_op));
6760
6761 /* Shift the element to get the byte offset from the element number. */
6762 int shift = exact_log2 (scalar_size);
6763 gcc_assert (shift >= 0);
6764
6765 if (shift > 0)
6766 {
6767 rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
6768 emit_insn (gen_rtx_SET (base_tmp, shift_op));
6769 }
6770
6771 return base_tmp;
6772 }
6773
6774 /* Helper function update PC-relative addresses when we are adjusting a memory
6775 address (ADDR) to a vector to point to a scalar field within the vector with
6776 a constant offset (ELEMENT_OFFSET). If the address is not valid, we can
6777 use the base register temporary (BASE_TMP) to form the address. */
6778
6779 static rtx
6780 adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
6781 {
6782 rtx new_addr = NULL;
6783
6784 gcc_assert (CONST_INT_P (element_offset));
6785
6786 if (GET_CODE (addr) == CONST)
6787 addr = XEXP (addr, 0);
6788
6789 if (GET_CODE (addr) == PLUS)
6790 {
6791 rtx op0 = XEXP (addr, 0);
6792 rtx op1 = XEXP (addr, 1);
6793
6794 if (CONST_INT_P (op1))
6795 {
6796 HOST_WIDE_INT offset
6797 = INTVAL (XEXP (addr, 1)) + INTVAL (element_offset);
6798
6799 if (offset == 0)
6800 new_addr = op0;
6801
6802 else
6803 {
6804 rtx plus = gen_rtx_PLUS (Pmode, op0, GEN_INT (offset));
6805 new_addr = gen_rtx_CONST (Pmode, plus);
6806 }
6807 }
6808
6809 else
6810 {
6811 emit_move_insn (base_tmp, addr);
6812 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
6813 }
6814 }
6815
6816 else if (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
6817 {
6818 rtx plus = gen_rtx_PLUS (Pmode, addr, element_offset);
6819 new_addr = gen_rtx_CONST (Pmode, plus);
6820 }
6821
6822 else
6823 gcc_unreachable ();
6824
6825 return new_addr;
6826 }
6827
6828 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
6829 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
6830 temporary (BASE_TMP) to fixup the address. Return the new memory address
6831 that is valid for reads or writes to a given register (SCALAR_REG).
6832
6833 This function is expected to be called after reload is completed when we are
6834 splitting insns. The temporary BASE_TMP might be set multiple times with
6835 this code. */
6836
6837 rtx
6838 rs6000_adjust_vec_address (rtx scalar_reg,
6839 rtx mem,
6840 rtx element,
6841 rtx base_tmp,
6842 machine_mode scalar_mode)
6843 {
6844 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
6845 rtx addr = XEXP (mem, 0);
6846 rtx new_addr;
6847
6848 gcc_assert (!reg_mentioned_p (base_tmp, addr));
6849 gcc_assert (!reg_mentioned_p (base_tmp, element));
6850
6851 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
6852 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
6853
6854 /* Calculate what we need to add to the address to get the element
6855 address. */
6856 rtx element_offset = get_vector_offset (mem, element, base_tmp, scalar_size);
6857
6858 /* Create the new address pointing to the element within the vector. If we
6859 are adding 0, we don't have to change the address. */
6860 if (element_offset == const0_rtx)
6861 new_addr = addr;
6862
6863 /* A simple indirect address can be converted into a reg + offset
6864 address. */
6865 else if (REG_P (addr) || SUBREG_P (addr))
6866 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
6867
6868 /* For references to local static variables, fold a constant offset into the
6869 address. */
6870 else if (pcrel_local_address (addr, Pmode) && CONST_INT_P (element_offset))
6871 new_addr = adjust_vec_address_pcrel (addr, element_offset, base_tmp);
6872
6873 /* Optimize D-FORM addresses with constant offset with a constant element, to
6874 include the element offset in the address directly. */
6875 else if (GET_CODE (addr) == PLUS)
6876 {
6877 rtx op0 = XEXP (addr, 0);
6878 rtx op1 = XEXP (addr, 1);
6879
6880 gcc_assert (REG_P (op0) || SUBREG_P (op0));
6881 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
6882 {
6883 /* op0 should never be r0, because r0+offset is not valid. But it
6884 doesn't hurt to make sure it is not r0. */
6885 gcc_assert (reg_or_subregno (op0) != 0);
6886
6887 /* D-FORM address with constant element number. */
6888 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
6889 rtx offset_rtx = GEN_INT (offset);
6890 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
6891 }
6892 else
6893 {
6894 /* If we don't have a D-FORM address with a constant element number,
6895 add the two elements in the current address. Then add the offset.
6896
6897 Previously, we tried to add the offset to OP1 and change the
6898 address to an X-FORM format adding OP0 and BASE_TMP, but it became
6899 complicated because we had to verify that op1 was not GPR0 and we
6900 had a constant element offset (due to the way ADDI is defined).
6901 By doing the add of OP0 and OP1 first, and then adding in the
6902 offset, it has the benefit that if D-FORM instructions are
6903 allowed, the offset is part of the memory access to the vector
6904 element. */
6905 emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
6906 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
6907 }
6908 }
6909
6910 else
6911 {
6912 emit_move_insn (base_tmp, addr);
6913 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
6914 }
6915
6916 /* If the address isn't valid, move the address into the temporary base
6917 register. Some reasons it could not be valid include:
6918
6919 The address offset overflowed the 16 or 34 bit offset size;
6920 We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
6921 We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
6922 Only X_FORM loads can be done, and the address is D_FORM. */
6923
6924 enum insn_form iform
6925 = address_to_insn_form (new_addr, scalar_mode,
6926 reg_to_non_prefixed (scalar_reg, scalar_mode));
6927
6928 if (iform == INSN_FORM_BAD)
6929 {
6930 emit_move_insn (base_tmp, new_addr);
6931 new_addr = base_tmp;
6932 }
6933
6934 return change_address (mem, scalar_mode, new_addr);
6935 }
6936
6937 /* Split a variable vec_extract operation into the component instructions. */
6938
6939 void
6940 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
6941 rtx tmp_altivec)
6942 {
6943 machine_mode mode = GET_MODE (src);
6944 machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src));
6945 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
6946 int byte_shift = exact_log2 (scalar_size);
6947
6948 gcc_assert (byte_shift >= 0);
6949
6950 /* If we are given a memory address, optimize to load just the element. We
6951 don't have to adjust the vector element number on little endian
6952 systems. */
6953 if (MEM_P (src))
6954 {
6955 emit_move_insn (dest,
6956 rs6000_adjust_vec_address (dest, src, element, tmp_gpr,
6957 scalar_mode));
6958 return;
6959 }
6960
6961 else if (REG_P (src) || SUBREG_P (src))
6962 {
6963 int num_elements = GET_MODE_NUNITS (mode);
6964 int bits_in_element = mode_to_bits (GET_MODE_INNER (mode));
6965 int bit_shift = 7 - exact_log2 (num_elements);
6966 rtx element2;
6967 unsigned int dest_regno = reg_or_subregno (dest);
6968 unsigned int src_regno = reg_or_subregno (src);
6969 unsigned int element_regno = reg_or_subregno (element);
6970
6971 gcc_assert (REG_P (tmp_gpr));
6972
6973 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
6974 a general purpose register. */
6975 if (TARGET_P9_VECTOR
6976 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
6977 && INT_REGNO_P (dest_regno)
6978 && ALTIVEC_REGNO_P (src_regno)
6979 && INT_REGNO_P (element_regno))
6980 {
6981 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
6982 rtx element_si = gen_rtx_REG (SImode, element_regno);
6983
6984 if (mode == V16QImode)
6985 emit_insn (BYTES_BIG_ENDIAN
6986 ? gen_vextublx (dest_si, element_si, src)
6987 : gen_vextubrx (dest_si, element_si, src));
6988
6989 else if (mode == V8HImode)
6990 {
6991 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
6992 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
6993 emit_insn (BYTES_BIG_ENDIAN
6994 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
6995 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
6996 }
6997
6998
6999 else
7000 {
7001 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7002 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7003 emit_insn (BYTES_BIG_ENDIAN
7004 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7005 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7006 }
7007
7008 return;
7009 }
7010
7011
7012 gcc_assert (REG_P (tmp_altivec));
7013
7014 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7015 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7016 will shift the element into the upper position (adding 3 to convert a
7017 byte shift into a bit shift). */
7018 if (scalar_size == 8)
7019 {
7020 if (!BYTES_BIG_ENDIAN)
7021 {
7022 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7023 element2 = tmp_gpr;
7024 }
7025 else
7026 element2 = element;
7027
7028 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7029 bit. */
7030 emit_insn (gen_rtx_SET (tmp_gpr,
7031 gen_rtx_AND (DImode,
7032 gen_rtx_ASHIFT (DImode,
7033 element2,
7034 GEN_INT (6)),
7035 GEN_INT (64))));
7036 }
7037 else
7038 {
7039 if (!BYTES_BIG_ENDIAN)
7040 {
7041 rtx num_ele_m1 = GEN_INT (num_elements - 1);
7042
7043 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7044 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7045 element2 = tmp_gpr;
7046 }
7047 else
7048 element2 = element;
7049
7050 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7051 }
7052
7053 /* Get the value into the lower byte of the Altivec register where VSLO
7054 expects it. */
7055 if (TARGET_P9_VECTOR)
7056 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7057 else if (can_create_pseudo_p ())
7058 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7059 else
7060 {
7061 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7062 emit_move_insn (tmp_di, tmp_gpr);
7063 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7064 }
7065
7066 /* Do the VSLO to get the value into the final location. */
7067 switch (mode)
7068 {
7069 case E_V2DFmode:
7070 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7071 return;
7072
7073 case E_V2DImode:
7074 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7075 return;
7076
7077 case E_V4SFmode:
7078 {
7079 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7080 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7081 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7082 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7083 tmp_altivec));
7084
7085 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7086 return;
7087 }
7088
7089 case E_V4SImode:
7090 case E_V8HImode:
7091 case E_V16QImode:
7092 {
7093 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7094 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7095 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7096 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7097 tmp_altivec));
7098 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7099 emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di,
7100 GEN_INT (64 - bits_in_element)));
7101 return;
7102 }
7103
7104 default:
7105 gcc_unreachable ();
7106 }
7107
7108 return;
7109 }
7110 else
7111 gcc_unreachable ();
7112 }
7113
7114 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7115 selects whether the alignment is abi mandated, optional, or
7116 both abi and optional alignment. */
7117
7118 unsigned int
7119 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7120 {
7121 if (how != align_opt)
7122 {
7123 if (TREE_CODE (type) == VECTOR_TYPE && align < 128)
7124 align = 128;
7125 }
7126
7127 if (how != align_abi)
7128 {
7129 if (TREE_CODE (type) == ARRAY_TYPE
7130 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7131 {
7132 if (align < BITS_PER_WORD)
7133 align = BITS_PER_WORD;
7134 }
7135 }
7136
7137 return align;
7138 }
7139
7140 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
7141 instructions simply ignore the low bits; VSX memory instructions
7142 are aligned to 4 or 8 bytes. */
7143
7144 static bool
7145 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
7146 {
7147 return (STRICT_ALIGNMENT
7148 || (!TARGET_EFFICIENT_UNALIGNED_VSX
7149 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
7150 || ((VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode))
7151 && (int) align < VECTOR_ALIGN (mode)))));
7152 }
7153
7154 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7155
7156 bool
7157 rs6000_special_adjust_field_align_p (tree type, unsigned int computed)
7158 {
7159 if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE)
7160 {
7161 if (computed != 128)
7162 {
7163 static bool warned;
7164 if (!warned && warn_psabi)
7165 {
7166 warned = true;
7167 inform (input_location,
7168 "the layout of aggregates containing vectors with"
7169 " %d-byte alignment has changed in GCC 5",
7170 computed / BITS_PER_UNIT);
7171 }
7172 }
7173 /* In current GCC there is no special case. */
7174 return false;
7175 }
7176
7177 return false;
7178 }
7179
7180 /* AIX increases natural record alignment to doubleword if the first
7181 field is an FP double while the FP fields remain word aligned. */
7182
7183 unsigned int
7184 rs6000_special_round_type_align (tree type, unsigned int computed,
7185 unsigned int specified)
7186 {
7187 unsigned int align = MAX (computed, specified);
7188 tree field = TYPE_FIELDS (type);
7189
7190 /* Skip all non field decls */
7191 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7192 field = DECL_CHAIN (field);
7193
7194 if (field != NULL && field != type)
7195 {
7196 type = TREE_TYPE (field);
7197 while (TREE_CODE (type) == ARRAY_TYPE)
7198 type = TREE_TYPE (type);
7199
7200 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
7201 align = MAX (align, 64);
7202 }
7203
7204 return align;
7205 }
7206
7207 /* Darwin increases record alignment to the natural alignment of
7208 the first field. */
7209
7210 unsigned int
7211 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
7212 unsigned int specified)
7213 {
7214 unsigned int align = MAX (computed, specified);
7215
7216 if (TYPE_PACKED (type))
7217 return align;
7218
7219 /* Find the first field, looking down into aggregates. */
7220 do {
7221 tree field = TYPE_FIELDS (type);
7222 /* Skip all non field decls */
7223 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7224 field = DECL_CHAIN (field);
7225 if (! field)
7226 break;
7227 /* A packed field does not contribute any extra alignment. */
7228 if (DECL_PACKED (field))
7229 return align;
7230 type = TREE_TYPE (field);
7231 while (TREE_CODE (type) == ARRAY_TYPE)
7232 type = TREE_TYPE (type);
7233 } while (AGGREGATE_TYPE_P (type));
7234
7235 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
7236 align = MAX (align, TYPE_ALIGN (type));
7237
7238 return align;
7239 }
7240
7241 /* Return 1 for an operand in small memory on V.4/eabi. */
7242
7243 int
7244 small_data_operand (rtx op ATTRIBUTE_UNUSED,
7245 machine_mode mode ATTRIBUTE_UNUSED)
7246 {
7247 #if TARGET_ELF
7248 rtx sym_ref;
7249
7250 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
7251 return 0;
7252
7253 if (DEFAULT_ABI != ABI_V4)
7254 return 0;
7255
7256 if (SYMBOL_REF_P (op))
7257 sym_ref = op;
7258
7259 else if (GET_CODE (op) != CONST
7260 || GET_CODE (XEXP (op, 0)) != PLUS
7261 || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0))
7262 || !CONST_INT_P (XEXP (XEXP (op, 0), 1)))
7263 return 0;
7264
7265 else
7266 {
7267 rtx sum = XEXP (op, 0);
7268 HOST_WIDE_INT summand;
7269
7270 /* We have to be careful here, because it is the referenced address
7271 that must be 32k from _SDA_BASE_, not just the symbol. */
7272 summand = INTVAL (XEXP (sum, 1));
7273 if (summand < 0 || summand > g_switch_value)
7274 return 0;
7275
7276 sym_ref = XEXP (sum, 0);
7277 }
7278
7279 return SYMBOL_REF_SMALL_P (sym_ref);
7280 #else
7281 return 0;
7282 #endif
7283 }
7284
7285 /* Return true if either operand is a general purpose register. */
7286
7287 bool
7288 gpr_or_gpr_p (rtx op0, rtx op1)
7289 {
7290 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
7291 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
7292 }
7293
7294 /* Return true if this is a move direct operation between GPR registers and
7295 floating point/VSX registers. */
7296
7297 bool
7298 direct_move_p (rtx op0, rtx op1)
7299 {
7300 if (!REG_P (op0) || !REG_P (op1))
7301 return false;
7302
7303 if (!TARGET_DIRECT_MOVE)
7304 return false;
7305
7306 int regno0 = REGNO (op0);
7307 int regno1 = REGNO (op1);
7308 if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1))
7309 return false;
7310
7311 if (INT_REGNO_P (regno0) && VSX_REGNO_P (regno1))
7312 return true;
7313
7314 if (VSX_REGNO_P (regno0) && INT_REGNO_P (regno1))
7315 return true;
7316
7317 return false;
7318 }
7319
7320 /* Return true if the ADDR is an acceptable address for a quad memory
7321 operation of mode MODE (either LQ/STQ for general purpose registers, or
7322 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
7323 is intended for LQ/STQ. If it is false, the address is intended for the ISA
7324 3.0 LXV/STXV instruction. */
7325
7326 bool
7327 quad_address_p (rtx addr, machine_mode mode, bool strict)
7328 {
7329 rtx op0, op1;
7330
7331 if (GET_MODE_SIZE (mode) != 16)
7332 return false;
7333
7334 if (legitimate_indirect_address_p (addr, strict))
7335 return true;
7336
7337 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
7338 return false;
7339
7340 /* Is this a valid prefixed address? If the bottom four bits of the offset
7341 are non-zero, we could use a prefixed instruction (which does not have the
7342 DQ-form constraint that the traditional instruction had) instead of
7343 forcing the unaligned offset to a GPR. */
7344 if (address_is_prefixed (addr, mode, NON_PREFIXED_DQ))
7345 return true;
7346
7347 if (GET_CODE (addr) != PLUS)
7348 return false;
7349
7350 op0 = XEXP (addr, 0);
7351 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
7352 return false;
7353
7354 op1 = XEXP (addr, 1);
7355 if (!CONST_INT_P (op1))
7356 return false;
7357
7358 return quad_address_offset_p (INTVAL (op1));
7359 }
7360
7361 /* Return true if this is a load or store quad operation. This function does
7362 not handle the atomic quad memory instructions. */
7363
7364 bool
7365 quad_load_store_p (rtx op0, rtx op1)
7366 {
7367 bool ret;
7368
7369 if (!TARGET_QUAD_MEMORY)
7370 ret = false;
7371
7372 else if (REG_P (op0) && MEM_P (op1))
7373 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
7374 && quad_memory_operand (op1, GET_MODE (op1))
7375 && !reg_overlap_mentioned_p (op0, op1));
7376
7377 else if (MEM_P (op0) && REG_P (op1))
7378 ret = (quad_memory_operand (op0, GET_MODE (op0))
7379 && quad_int_reg_operand (op1, GET_MODE (op1)));
7380
7381 else
7382 ret = false;
7383
7384 if (TARGET_DEBUG_ADDR)
7385 {
7386 fprintf (stderr, "\n========== quad_load_store, return %s\n",
7387 ret ? "true" : "false");
7388 debug_rtx (gen_rtx_SET (op0, op1));
7389 }
7390
7391 return ret;
7392 }
7393
7394 /* Given an address, return a constant offset term if one exists. */
7395
7396 static rtx
7397 address_offset (rtx op)
7398 {
7399 if (GET_CODE (op) == PRE_INC
7400 || GET_CODE (op) == PRE_DEC)
7401 op = XEXP (op, 0);
7402 else if (GET_CODE (op) == PRE_MODIFY
7403 || GET_CODE (op) == LO_SUM)
7404 op = XEXP (op, 1);
7405
7406 if (GET_CODE (op) == CONST)
7407 op = XEXP (op, 0);
7408
7409 if (GET_CODE (op) == PLUS)
7410 op = XEXP (op, 1);
7411
7412 if (CONST_INT_P (op))
7413 return op;
7414
7415 return NULL_RTX;
7416 }
7417
7418 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
7419 the mode. If we can't find (or don't know) the alignment of the symbol
7420 we assume (optimistically) that it's sufficiently aligned [??? maybe we
7421 should be pessimistic]. Offsets are validated in the same way as for
7422 reg + offset. */
7423 static bool
7424 darwin_rs6000_legitimate_lo_sum_const_p (rtx x, machine_mode mode)
7425 {
7426 /* We should not get here with this. */
7427 gcc_checking_assert (! mode_supports_dq_form (mode));
7428
7429 if (GET_CODE (x) == CONST)
7430 x = XEXP (x, 0);
7431
7432 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
7433 x = XVECEXP (x, 0, 0);
7434
7435 rtx sym = NULL_RTX;
7436 unsigned HOST_WIDE_INT offset = 0;
7437
7438 if (GET_CODE (x) == PLUS)
7439 {
7440 sym = XEXP (x, 0);
7441 if (! SYMBOL_REF_P (sym))
7442 return false;
7443 if (!CONST_INT_P (XEXP (x, 1)))
7444 return false;
7445 offset = INTVAL (XEXP (x, 1));
7446 }
7447 else if (SYMBOL_REF_P (x))
7448 sym = x;
7449 else if (CONST_INT_P (x))
7450 offset = INTVAL (x);
7451 else if (GET_CODE (x) == LABEL_REF)
7452 offset = 0; // We assume code labels are Pmode aligned
7453 else
7454 return false; // not sure what we have here.
7455
7456 /* If we don't know the alignment of the thing to which the symbol refers,
7457 we assume optimistically it is "enough".
7458 ??? maybe we should be pessimistic instead. */
7459 unsigned align = 0;
7460
7461 if (sym)
7462 {
7463 tree decl = SYMBOL_REF_DECL (sym);
7464 #if TARGET_MACHO
7465 if (MACHO_SYMBOL_INDIRECTION_P (sym))
7466 /* The decl in an indirection symbol is the original one, which might
7467 be less aligned than the indirection. Our indirections are always
7468 pointer-aligned. */
7469 ;
7470 else
7471 #endif
7472 if (decl && DECL_ALIGN (decl))
7473 align = DECL_ALIGN_UNIT (decl);
7474 }
7475
7476 unsigned int extra = 0;
7477 switch (mode)
7478 {
7479 case E_DFmode:
7480 case E_DDmode:
7481 case E_DImode:
7482 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7483 addressing. */
7484 if (VECTOR_MEM_VSX_P (mode))
7485 return false;
7486
7487 if (!TARGET_POWERPC64)
7488 extra = 4;
7489 else if ((offset & 3) || (align & 3))
7490 return false;
7491 break;
7492
7493 case E_TFmode:
7494 case E_IFmode:
7495 case E_KFmode:
7496 case E_TDmode:
7497 case E_TImode:
7498 case E_PTImode:
7499 extra = 8;
7500 if (!TARGET_POWERPC64)
7501 extra = 12;
7502 else if ((offset & 3) || (align & 3))
7503 return false;
7504 break;
7505
7506 default:
7507 break;
7508 }
7509
7510 /* We only care if the access(es) would cause a change to the high part. */
7511 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7512 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7513 }
7514
7515 /* Return true if the MEM operand is a memory operand suitable for use
7516 with a (full width, possibly multiple) gpr load/store. On
7517 powerpc64 this means the offset must be divisible by 4.
7518 Implements 'Y' constraint.
7519
7520 Accept direct, indexed, offset, lo_sum and tocref. Since this is
7521 a constraint function we know the operand has satisfied a suitable
7522 memory predicate.
7523
7524 Offsetting a lo_sum should not be allowed, except where we know by
7525 alignment that a 32k boundary is not crossed. Note that by
7526 "offsetting" here we mean a further offset to access parts of the
7527 MEM. It's fine to have a lo_sum where the inner address is offset
7528 from a sym, since the same sym+offset will appear in the high part
7529 of the address calculation. */
7530
7531 bool
7532 mem_operand_gpr (rtx op, machine_mode mode)
7533 {
7534 unsigned HOST_WIDE_INT offset;
7535 int extra;
7536 rtx addr = XEXP (op, 0);
7537
7538 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
7539 if (TARGET_UPDATE
7540 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
7541 && mode_supports_pre_incdec_p (mode)
7542 && legitimate_indirect_address_p (XEXP (addr, 0), false))
7543 return true;
7544
7545 /* Allow prefixed instructions if supported. If the bottom two bits of the
7546 offset are non-zero, we could use a prefixed instruction (which does not
7547 have the DS-form constraint that the traditional instruction had) instead
7548 of forcing the unaligned offset to a GPR. */
7549 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
7550 return true;
7551
7552 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
7553 really OK. Doing this early avoids teaching all the other machinery
7554 about them. */
7555 if (TARGET_MACHO && GET_CODE (addr) == LO_SUM)
7556 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr, 1), mode);
7557
7558 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
7559 if (!rs6000_offsettable_memref_p (op, mode, false))
7560 return false;
7561
7562 op = address_offset (addr);
7563 if (op == NULL_RTX)
7564 return true;
7565
7566 offset = INTVAL (op);
7567 if (TARGET_POWERPC64 && (offset & 3) != 0)
7568 return false;
7569
7570 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7571 if (extra < 0)
7572 extra = 0;
7573
7574 if (GET_CODE (addr) == LO_SUM)
7575 /* For lo_sum addresses, we must allow any offset except one that
7576 causes a wrap, so test only the low 16 bits. */
7577 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7578
7579 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7580 }
7581
7582 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
7583 enforce an offset divisible by 4 even for 32-bit. */
7584
7585 bool
7586 mem_operand_ds_form (rtx op, machine_mode mode)
7587 {
7588 unsigned HOST_WIDE_INT offset;
7589 int extra;
7590 rtx addr = XEXP (op, 0);
7591
7592 /* Allow prefixed instructions if supported. If the bottom two bits of the
7593 offset are non-zero, we could use a prefixed instruction (which does not
7594 have the DS-form constraint that the traditional instruction had) instead
7595 of forcing the unaligned offset to a GPR. */
7596 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
7597 return true;
7598
7599 if (!offsettable_address_p (false, mode, addr))
7600 return false;
7601
7602 op = address_offset (addr);
7603 if (op == NULL_RTX)
7604 return true;
7605
7606 offset = INTVAL (op);
7607 if ((offset & 3) != 0)
7608 return false;
7609
7610 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7611 if (extra < 0)
7612 extra = 0;
7613
7614 if (GET_CODE (addr) == LO_SUM)
7615 /* For lo_sum addresses, we must allow any offset except one that
7616 causes a wrap, so test only the low 16 bits. */
7617 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7618
7619 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7620 }
7621 \f
7622 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
7623
7624 static bool
7625 reg_offset_addressing_ok_p (machine_mode mode)
7626 {
7627 switch (mode)
7628 {
7629 case E_V16QImode:
7630 case E_V8HImode:
7631 case E_V4SFmode:
7632 case E_V4SImode:
7633 case E_V2DFmode:
7634 case E_V2DImode:
7635 case E_V1TImode:
7636 case E_TImode:
7637 case E_TFmode:
7638 case E_KFmode:
7639 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
7640 ISA 3.0 vector d-form addressing mode was added. While TImode is not
7641 a vector mode, if we want to use the VSX registers to move it around,
7642 we need to restrict ourselves to reg+reg addressing. Similarly for
7643 IEEE 128-bit floating point that is passed in a single vector
7644 register. */
7645 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
7646 return mode_supports_dq_form (mode);
7647 break;
7648
7649 case E_SDmode:
7650 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
7651 addressing for the LFIWZX and STFIWX instructions. */
7652 if (TARGET_NO_SDMODE_STACK)
7653 return false;
7654 break;
7655
7656 default:
7657 break;
7658 }
7659
7660 return true;
7661 }
7662
7663 static bool
7664 virtual_stack_registers_memory_p (rtx op)
7665 {
7666 int regnum;
7667
7668 if (REG_P (op))
7669 regnum = REGNO (op);
7670
7671 else if (GET_CODE (op) == PLUS
7672 && REG_P (XEXP (op, 0))
7673 && CONST_INT_P (XEXP (op, 1)))
7674 regnum = REGNO (XEXP (op, 0));
7675
7676 else
7677 return false;
7678
7679 return (regnum >= FIRST_VIRTUAL_REGISTER
7680 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
7681 }
7682
7683 /* Return true if a MODE sized memory accesses to OP plus OFFSET
7684 is known to not straddle a 32k boundary. This function is used
7685 to determine whether -mcmodel=medium code can use TOC pointer
7686 relative addressing for OP. This means the alignment of the TOC
7687 pointer must also be taken into account, and unfortunately that is
7688 only 8 bytes. */
7689
7690 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
7691 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
7692 #endif
7693
7694 static bool
7695 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
7696 machine_mode mode)
7697 {
7698 tree decl;
7699 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
7700
7701 if (!SYMBOL_REF_P (op))
7702 return false;
7703
7704 /* ISA 3.0 vector d-form addressing is restricted, don't allow
7705 SYMBOL_REF. */
7706 if (mode_supports_dq_form (mode))
7707 return false;
7708
7709 dsize = GET_MODE_SIZE (mode);
7710 decl = SYMBOL_REF_DECL (op);
7711 if (!decl)
7712 {
7713 if (dsize == 0)
7714 return false;
7715
7716 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
7717 replacing memory addresses with an anchor plus offset. We
7718 could find the decl by rummaging around in the block->objects
7719 VEC for the given offset but that seems like too much work. */
7720 dalign = BITS_PER_UNIT;
7721 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
7722 && SYMBOL_REF_ANCHOR_P (op)
7723 && SYMBOL_REF_BLOCK (op) != NULL)
7724 {
7725 struct object_block *block = SYMBOL_REF_BLOCK (op);
7726
7727 dalign = block->alignment;
7728 offset += SYMBOL_REF_BLOCK_OFFSET (op);
7729 }
7730 else if (CONSTANT_POOL_ADDRESS_P (op))
7731 {
7732 /* It would be nice to have get_pool_align().. */
7733 machine_mode cmode = get_pool_mode (op);
7734
7735 dalign = GET_MODE_ALIGNMENT (cmode);
7736 }
7737 }
7738 else if (DECL_P (decl))
7739 {
7740 dalign = DECL_ALIGN (decl);
7741
7742 if (dsize == 0)
7743 {
7744 /* Allow BLKmode when the entire object is known to not
7745 cross a 32k boundary. */
7746 if (!DECL_SIZE_UNIT (decl))
7747 return false;
7748
7749 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
7750 return false;
7751
7752 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
7753 if (dsize > 32768)
7754 return false;
7755
7756 dalign /= BITS_PER_UNIT;
7757 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7758 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7759 return dalign >= dsize;
7760 }
7761 }
7762 else
7763 gcc_unreachable ();
7764
7765 /* Find how many bits of the alignment we know for this access. */
7766 dalign /= BITS_PER_UNIT;
7767 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7768 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7769 mask = dalign - 1;
7770 lsb = offset & -offset;
7771 mask &= lsb - 1;
7772 dalign = mask + 1;
7773
7774 return dalign >= dsize;
7775 }
7776
7777 static bool
7778 constant_pool_expr_p (rtx op)
7779 {
7780 rtx base, offset;
7781
7782 split_const (op, &base, &offset);
7783 return (SYMBOL_REF_P (base)
7784 && CONSTANT_POOL_ADDRESS_P (base)
7785 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
7786 }
7787
7788 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
7789 use that as the register to put the HIGH value into if register allocation
7790 is already done. */
7791
7792 rtx
7793 create_TOC_reference (rtx symbol, rtx largetoc_reg)
7794 {
7795 rtx tocrel, tocreg, hi;
7796
7797 gcc_assert (TARGET_TOC);
7798
7799 if (TARGET_DEBUG_ADDR)
7800 {
7801 if (SYMBOL_REF_P (symbol))
7802 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
7803 XSTR (symbol, 0));
7804 else
7805 {
7806 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
7807 GET_RTX_NAME (GET_CODE (symbol)));
7808 debug_rtx (symbol);
7809 }
7810 }
7811
7812 if (!can_create_pseudo_p ())
7813 df_set_regs_ever_live (TOC_REGISTER, true);
7814
7815 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
7816 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
7817 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
7818 return tocrel;
7819
7820 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
7821 if (largetoc_reg != NULL)
7822 {
7823 emit_move_insn (largetoc_reg, hi);
7824 hi = largetoc_reg;
7825 }
7826 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
7827 }
7828
7829 /* These are only used to pass through from print_operand/print_operand_address
7830 to rs6000_output_addr_const_extra over the intervening function
7831 output_addr_const which is not target code. */
7832 static const_rtx tocrel_base_oac, tocrel_offset_oac;
7833
7834 /* Return true if OP is a toc pointer relative address (the output
7835 of create_TOC_reference). If STRICT, do not match non-split
7836 -mcmodel=large/medium toc pointer relative addresses. If the pointers
7837 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
7838 TOCREL_OFFSET_RET respectively. */
7839
7840 bool
7841 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
7842 const_rtx *tocrel_offset_ret)
7843 {
7844 if (!TARGET_TOC)
7845 return false;
7846
7847 if (TARGET_CMODEL != CMODEL_SMALL)
7848 {
7849 /* When strict ensure we have everything tidy. */
7850 if (strict
7851 && !(GET_CODE (op) == LO_SUM
7852 && REG_P (XEXP (op, 0))
7853 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
7854 return false;
7855
7856 /* When not strict, allow non-split TOC addresses and also allow
7857 (lo_sum (high ..)) TOC addresses created during reload. */
7858 if (GET_CODE (op) == LO_SUM)
7859 op = XEXP (op, 1);
7860 }
7861
7862 const_rtx tocrel_base = op;
7863 const_rtx tocrel_offset = const0_rtx;
7864
7865 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
7866 {
7867 tocrel_base = XEXP (op, 0);
7868 tocrel_offset = XEXP (op, 1);
7869 }
7870
7871 if (tocrel_base_ret)
7872 *tocrel_base_ret = tocrel_base;
7873 if (tocrel_offset_ret)
7874 *tocrel_offset_ret = tocrel_offset;
7875
7876 return (GET_CODE (tocrel_base) == UNSPEC
7877 && XINT (tocrel_base, 1) == UNSPEC_TOCREL
7878 && REG_P (XVECEXP (tocrel_base, 0, 1))
7879 && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER);
7880 }
7881
7882 /* Return true if X is a constant pool address, and also for cmodel=medium
7883 if X is a toc-relative address known to be offsettable within MODE. */
7884
7885 bool
7886 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
7887 bool strict)
7888 {
7889 const_rtx tocrel_base, tocrel_offset;
7890 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
7891 && (TARGET_CMODEL != CMODEL_MEDIUM
7892 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
7893 || mode == QImode
7894 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
7895 INTVAL (tocrel_offset), mode)));
7896 }
7897
7898 static bool
7899 legitimate_small_data_p (machine_mode mode, rtx x)
7900 {
7901 return (DEFAULT_ABI == ABI_V4
7902 && !flag_pic && !TARGET_TOC
7903 && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST)
7904 && small_data_operand (x, mode));
7905 }
7906
7907 bool
7908 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
7909 bool strict, bool worst_case)
7910 {
7911 unsigned HOST_WIDE_INT offset;
7912 unsigned int extra;
7913
7914 if (GET_CODE (x) != PLUS)
7915 return false;
7916 if (!REG_P (XEXP (x, 0)))
7917 return false;
7918 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7919 return false;
7920 if (mode_supports_dq_form (mode))
7921 return quad_address_p (x, mode, strict);
7922 if (!reg_offset_addressing_ok_p (mode))
7923 return virtual_stack_registers_memory_p (x);
7924 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
7925 return true;
7926 if (!CONST_INT_P (XEXP (x, 1)))
7927 return false;
7928
7929 offset = INTVAL (XEXP (x, 1));
7930 extra = 0;
7931 switch (mode)
7932 {
7933 case E_DFmode:
7934 case E_DDmode:
7935 case E_DImode:
7936 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7937 addressing. */
7938 if (VECTOR_MEM_VSX_P (mode))
7939 return false;
7940
7941 if (!worst_case)
7942 break;
7943 if (!TARGET_POWERPC64)
7944 extra = 4;
7945 else if (offset & 3)
7946 return false;
7947 break;
7948
7949 case E_TFmode:
7950 case E_IFmode:
7951 case E_KFmode:
7952 case E_TDmode:
7953 case E_TImode:
7954 case E_PTImode:
7955 extra = 8;
7956 if (!worst_case)
7957 break;
7958 if (!TARGET_POWERPC64)
7959 extra = 12;
7960 else if (offset & 3)
7961 return false;
7962 break;
7963
7964 default:
7965 break;
7966 }
7967
7968 if (TARGET_PREFIXED)
7969 return SIGNED_34BIT_OFFSET_EXTRA_P (offset, extra);
7970 else
7971 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7972 }
7973
7974 bool
7975 legitimate_indexed_address_p (rtx x, int strict)
7976 {
7977 rtx op0, op1;
7978
7979 if (GET_CODE (x) != PLUS)
7980 return false;
7981
7982 op0 = XEXP (x, 0);
7983 op1 = XEXP (x, 1);
7984
7985 return (REG_P (op0) && REG_P (op1)
7986 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
7987 && INT_REG_OK_FOR_INDEX_P (op1, strict))
7988 || (INT_REG_OK_FOR_BASE_P (op1, strict)
7989 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
7990 }
7991
7992 bool
7993 avoiding_indexed_address_p (machine_mode mode)
7994 {
7995 /* Avoid indexed addressing for modes that have non-indexed
7996 load/store instruction forms. */
7997 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
7998 }
7999
8000 bool
8001 legitimate_indirect_address_p (rtx x, int strict)
8002 {
8003 return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict);
8004 }
8005
8006 bool
8007 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
8008 {
8009 if (!TARGET_MACHO || !flag_pic
8010 || mode != SImode || !MEM_P (x))
8011 return false;
8012 x = XEXP (x, 0);
8013
8014 if (GET_CODE (x) != LO_SUM)
8015 return false;
8016 if (!REG_P (XEXP (x, 0)))
8017 return false;
8018 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
8019 return false;
8020 x = XEXP (x, 1);
8021
8022 return CONSTANT_P (x);
8023 }
8024
8025 static bool
8026 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
8027 {
8028 if (GET_CODE (x) != LO_SUM)
8029 return false;
8030 if (!REG_P (XEXP (x, 0)))
8031 return false;
8032 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8033 return false;
8034 /* quad word addresses are restricted, and we can't use LO_SUM. */
8035 if (mode_supports_dq_form (mode))
8036 return false;
8037 x = XEXP (x, 1);
8038
8039 if (TARGET_ELF || TARGET_MACHO)
8040 {
8041 bool large_toc_ok;
8042
8043 if (DEFAULT_ABI == ABI_V4 && flag_pic)
8044 return false;
8045 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8046 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8047 recognizes some LO_SUM addresses as valid although this
8048 function says opposite. In most cases, LRA through different
8049 transformations can generate correct code for address reloads.
8050 It cannot manage only some LO_SUM cases. So we need to add
8051 code here saying that some addresses are still valid. */
8052 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8053 && small_toc_ref (x, VOIDmode));
8054 if (TARGET_TOC && ! large_toc_ok)
8055 return false;
8056 if (GET_MODE_NUNITS (mode) != 1)
8057 return false;
8058 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8059 && !(/* ??? Assume floating point reg based on mode? */
8060 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8061 return false;
8062
8063 return CONSTANT_P (x) || large_toc_ok;
8064 }
8065
8066 return false;
8067 }
8068
8069
8070 /* Try machine-dependent ways of modifying an illegitimate address
8071 to be legitimate. If we find one, return the new, valid address.
8072 This is used from only one place: `memory_address' in explow.c.
8073
8074 OLDX is the address as it was before break_out_memory_refs was
8075 called. In some cases it is useful to look at this to decide what
8076 needs to be done.
8077
8078 It is always safe for this function to do nothing. It exists to
8079 recognize opportunities to optimize the output.
8080
8081 On RS/6000, first check for the sum of a register with a constant
8082 integer that is out of range. If so, generate code to add the
8083 constant with the low-order 16 bits masked to the register and force
8084 this result into another register (this can be done with `cau').
8085 Then generate an address of REG+(CONST&0xffff), allowing for the
8086 possibility of bit 16 being a one.
8087
8088 Then check for the sum of a register and something not constant, try to
8089 load the other things into a register and return the sum. */
8090
8091 static rtx
8092 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8093 machine_mode mode)
8094 {
8095 unsigned int extra;
8096
8097 if (!reg_offset_addressing_ok_p (mode)
8098 || mode_supports_dq_form (mode))
8099 {
8100 if (virtual_stack_registers_memory_p (x))
8101 return x;
8102
8103 /* In theory we should not be seeing addresses of the form reg+0,
8104 but just in case it is generated, optimize it away. */
8105 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
8106 return force_reg (Pmode, XEXP (x, 0));
8107
8108 /* For TImode with load/store quad, restrict addresses to just a single
8109 pointer, so it works with both GPRs and VSX registers. */
8110 /* Make sure both operands are registers. */
8111 else if (GET_CODE (x) == PLUS
8112 && (mode != TImode || !TARGET_VSX))
8113 return gen_rtx_PLUS (Pmode,
8114 force_reg (Pmode, XEXP (x, 0)),
8115 force_reg (Pmode, XEXP (x, 1)));
8116 else
8117 return force_reg (Pmode, x);
8118 }
8119 if (SYMBOL_REF_P (x))
8120 {
8121 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8122 if (model != 0)
8123 return rs6000_legitimize_tls_address (x, model);
8124 }
8125
8126 extra = 0;
8127 switch (mode)
8128 {
8129 case E_TFmode:
8130 case E_TDmode:
8131 case E_TImode:
8132 case E_PTImode:
8133 case E_IFmode:
8134 case E_KFmode:
8135 /* As in legitimate_offset_address_p we do not assume
8136 worst-case. The mode here is just a hint as to the registers
8137 used. A TImode is usually in gprs, but may actually be in
8138 fprs. Leave worst-case scenario for reload to handle via
8139 insn constraints. PTImode is only GPRs. */
8140 extra = 8;
8141 break;
8142 default:
8143 break;
8144 }
8145
8146 if (GET_CODE (x) == PLUS
8147 && REG_P (XEXP (x, 0))
8148 && CONST_INT_P (XEXP (x, 1))
8149 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
8150 >= 0x10000 - extra))
8151 {
8152 HOST_WIDE_INT high_int, low_int;
8153 rtx sum;
8154 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8155 if (low_int >= 0x8000 - extra)
8156 low_int = 0;
8157 high_int = INTVAL (XEXP (x, 1)) - low_int;
8158 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
8159 GEN_INT (high_int)), 0);
8160 return plus_constant (Pmode, sum, low_int);
8161 }
8162 else if (GET_CODE (x) == PLUS
8163 && REG_P (XEXP (x, 0))
8164 && !CONST_INT_P (XEXP (x, 1))
8165 && GET_MODE_NUNITS (mode) == 1
8166 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8167 || (/* ??? Assume floating point reg based on mode? */
8168 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8169 && !avoiding_indexed_address_p (mode))
8170 {
8171 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
8172 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
8173 }
8174 else if ((TARGET_ELF
8175 #if TARGET_MACHO
8176 || !MACHO_DYNAMIC_NO_PIC_P
8177 #endif
8178 )
8179 && TARGET_32BIT
8180 && TARGET_NO_TOC_OR_PCREL
8181 && !flag_pic
8182 && !CONST_INT_P (x)
8183 && !CONST_WIDE_INT_P (x)
8184 && !CONST_DOUBLE_P (x)
8185 && CONSTANT_P (x)
8186 && GET_MODE_NUNITS (mode) == 1
8187 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8188 || (/* ??? Assume floating point reg based on mode? */
8189 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
8190 {
8191 rtx reg = gen_reg_rtx (Pmode);
8192 if (TARGET_ELF)
8193 emit_insn (gen_elf_high (reg, x));
8194 else
8195 emit_insn (gen_macho_high (Pmode, reg, x));
8196 return gen_rtx_LO_SUM (Pmode, reg, x);
8197 }
8198 else if (TARGET_TOC
8199 && SYMBOL_REF_P (x)
8200 && constant_pool_expr_p (x)
8201 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
8202 return create_TOC_reference (x, NULL_RTX);
8203 else
8204 return x;
8205 }
8206
8207 /* Debug version of rs6000_legitimize_address. */
8208 static rtx
8209 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
8210 {
8211 rtx ret;
8212 rtx_insn *insns;
8213
8214 start_sequence ();
8215 ret = rs6000_legitimize_address (x, oldx, mode);
8216 insns = get_insns ();
8217 end_sequence ();
8218
8219 if (ret != x)
8220 {
8221 fprintf (stderr,
8222 "\nrs6000_legitimize_address: mode %s, old code %s, "
8223 "new code %s, modified\n",
8224 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
8225 GET_RTX_NAME (GET_CODE (ret)));
8226
8227 fprintf (stderr, "Original address:\n");
8228 debug_rtx (x);
8229
8230 fprintf (stderr, "oldx:\n");
8231 debug_rtx (oldx);
8232
8233 fprintf (stderr, "New address:\n");
8234 debug_rtx (ret);
8235
8236 if (insns)
8237 {
8238 fprintf (stderr, "Insns added:\n");
8239 debug_rtx_list (insns, 20);
8240 }
8241 }
8242 else
8243 {
8244 fprintf (stderr,
8245 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8246 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
8247
8248 debug_rtx (x);
8249 }
8250
8251 if (insns)
8252 emit_insn (insns);
8253
8254 return ret;
8255 }
8256
8257 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8258 We need to emit DTP-relative relocations. */
8259
8260 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
8261 static void
8262 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
8263 {
8264 switch (size)
8265 {
8266 case 4:
8267 fputs ("\t.long\t", file);
8268 break;
8269 case 8:
8270 fputs (DOUBLE_INT_ASM_OP, file);
8271 break;
8272 default:
8273 gcc_unreachable ();
8274 }
8275 output_addr_const (file, x);
8276 if (TARGET_ELF)
8277 fputs ("@dtprel+0x8000", file);
8278 else if (TARGET_XCOFF && SYMBOL_REF_P (x))
8279 {
8280 switch (SYMBOL_REF_TLS_MODEL (x))
8281 {
8282 case 0:
8283 break;
8284 case TLS_MODEL_LOCAL_EXEC:
8285 fputs ("@le", file);
8286 break;
8287 case TLS_MODEL_INITIAL_EXEC:
8288 fputs ("@ie", file);
8289 break;
8290 case TLS_MODEL_GLOBAL_DYNAMIC:
8291 case TLS_MODEL_LOCAL_DYNAMIC:
8292 fputs ("@m", file);
8293 break;
8294 default:
8295 gcc_unreachable ();
8296 }
8297 }
8298 }
8299
8300 /* Return true if X is a symbol that refers to real (rather than emulated)
8301 TLS. */
8302
8303 static bool
8304 rs6000_real_tls_symbol_ref_p (rtx x)
8305 {
8306 return (SYMBOL_REF_P (x)
8307 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
8308 }
8309
8310 /* In the name of slightly smaller debug output, and to cater to
8311 general assembler lossage, recognize various UNSPEC sequences
8312 and turn them back into a direct symbol reference. */
8313
8314 static rtx
8315 rs6000_delegitimize_address (rtx orig_x)
8316 {
8317 rtx x, y, offset;
8318
8319 if (GET_CODE (orig_x) == UNSPEC && XINT (orig_x, 1) == UNSPEC_FUSION_GPR)
8320 orig_x = XVECEXP (orig_x, 0, 0);
8321
8322 orig_x = delegitimize_mem_from_attrs (orig_x);
8323
8324 x = orig_x;
8325 if (MEM_P (x))
8326 x = XEXP (x, 0);
8327
8328 y = x;
8329 if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM)
8330 y = XEXP (y, 1);
8331
8332 offset = NULL_RTX;
8333 if (GET_CODE (y) == PLUS
8334 && GET_MODE (y) == Pmode
8335 && CONST_INT_P (XEXP (y, 1)))
8336 {
8337 offset = XEXP (y, 1);
8338 y = XEXP (y, 0);
8339 }
8340
8341 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL)
8342 {
8343 y = XVECEXP (y, 0, 0);
8344
8345 #ifdef HAVE_AS_TLS
8346 /* Do not associate thread-local symbols with the original
8347 constant pool symbol. */
8348 if (TARGET_XCOFF
8349 && SYMBOL_REF_P (y)
8350 && CONSTANT_POOL_ADDRESS_P (y)
8351 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
8352 return orig_x;
8353 #endif
8354
8355 if (offset != NULL_RTX)
8356 y = gen_rtx_PLUS (Pmode, y, offset);
8357 if (!MEM_P (orig_x))
8358 return y;
8359 else
8360 return replace_equiv_address_nv (orig_x, y);
8361 }
8362
8363 if (TARGET_MACHO
8364 && GET_CODE (orig_x) == LO_SUM
8365 && GET_CODE (XEXP (orig_x, 1)) == CONST)
8366 {
8367 y = XEXP (XEXP (orig_x, 1), 0);
8368 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
8369 return XVECEXP (y, 0, 0);
8370 }
8371
8372 return orig_x;
8373 }
8374
8375 /* Return true if X shouldn't be emitted into the debug info.
8376 The linker doesn't like .toc section references from
8377 .debug_* sections, so reject .toc section symbols. */
8378
8379 static bool
8380 rs6000_const_not_ok_for_debug_p (rtx x)
8381 {
8382 if (GET_CODE (x) == UNSPEC)
8383 return true;
8384 if (SYMBOL_REF_P (x)
8385 && CONSTANT_POOL_ADDRESS_P (x))
8386 {
8387 rtx c = get_pool_constant (x);
8388 machine_mode cmode = get_pool_mode (x);
8389 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
8390 return true;
8391 }
8392
8393 return false;
8394 }
8395
8396 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
8397
8398 static bool
8399 rs6000_legitimate_combined_insn (rtx_insn *insn)
8400 {
8401 int icode = INSN_CODE (insn);
8402
8403 /* Reject creating doloop insns. Combine should not be allowed
8404 to create these for a number of reasons:
8405 1) In a nested loop, if combine creates one of these in an
8406 outer loop and the register allocator happens to allocate ctr
8407 to the outer loop insn, then the inner loop can't use ctr.
8408 Inner loops ought to be more highly optimized.
8409 2) Combine often wants to create one of these from what was
8410 originally a three insn sequence, first combining the three
8411 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
8412 allocated ctr, the splitter takes use back to the three insn
8413 sequence. It's better to stop combine at the two insn
8414 sequence.
8415 3) Faced with not being able to allocate ctr for ctrsi/crtdi
8416 insns, the register allocator sometimes uses floating point
8417 or vector registers for the pseudo. Since ctrsi/ctrdi is a
8418 jump insn and output reloads are not implemented for jumps,
8419 the ctrsi/ctrdi splitters need to handle all possible cases.
8420 That's a pain, and it gets to be seriously difficult when a
8421 splitter that runs after reload needs memory to transfer from
8422 a gpr to fpr. See PR70098 and PR71763 which are not fixed
8423 for the difficult case. It's better to not create problems
8424 in the first place. */
8425 if (icode != CODE_FOR_nothing
8426 && (icode == CODE_FOR_bdz_si
8427 || icode == CODE_FOR_bdz_di
8428 || icode == CODE_FOR_bdnz_si
8429 || icode == CODE_FOR_bdnz_di
8430 || icode == CODE_FOR_bdztf_si
8431 || icode == CODE_FOR_bdztf_di
8432 || icode == CODE_FOR_bdnztf_si
8433 || icode == CODE_FOR_bdnztf_di))
8434 return false;
8435
8436 return true;
8437 }
8438
8439 /* Construct the SYMBOL_REF for the tls_get_addr function. */
8440
8441 static GTY(()) rtx rs6000_tls_symbol;
8442 static rtx
8443 rs6000_tls_get_addr (void)
8444 {
8445 if (!rs6000_tls_symbol)
8446 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
8447
8448 return rs6000_tls_symbol;
8449 }
8450
8451 /* Construct the SYMBOL_REF for TLS GOT references. */
8452
8453 static GTY(()) rtx rs6000_got_symbol;
8454 rtx
8455 rs6000_got_sym (void)
8456 {
8457 if (!rs6000_got_symbol)
8458 {
8459 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8460 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
8461 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
8462 }
8463
8464 return rs6000_got_symbol;
8465 }
8466
8467 /* AIX Thread-Local Address support. */
8468
8469 static rtx
8470 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
8471 {
8472 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
8473 const char *name;
8474 char *tlsname;
8475
8476 name = XSTR (addr, 0);
8477 /* Append TLS CSECT qualifier, unless the symbol already is qualified
8478 or the symbol will be in TLS private data section. */
8479 if (name[strlen (name) - 1] != ']'
8480 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
8481 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
8482 {
8483 tlsname = XALLOCAVEC (char, strlen (name) + 4);
8484 strcpy (tlsname, name);
8485 strcat (tlsname,
8486 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
8487 tlsaddr = copy_rtx (addr);
8488 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
8489 }
8490 else
8491 tlsaddr = addr;
8492
8493 /* Place addr into TOC constant pool. */
8494 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
8495
8496 /* Output the TOC entry and create the MEM referencing the value. */
8497 if (constant_pool_expr_p (XEXP (sym, 0))
8498 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
8499 {
8500 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
8501 mem = gen_const_mem (Pmode, tocref);
8502 set_mem_alias_set (mem, get_TOC_alias_set ());
8503 }
8504 else
8505 return sym;
8506
8507 /* Use global-dynamic for local-dynamic. */
8508 if (model == TLS_MODEL_GLOBAL_DYNAMIC
8509 || model == TLS_MODEL_LOCAL_DYNAMIC)
8510 {
8511 /* Create new TOC reference for @m symbol. */
8512 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
8513 tlsname = XALLOCAVEC (char, strlen (name) + 1);
8514 strcpy (tlsname, "*LCM");
8515 strcat (tlsname, name + 3);
8516 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
8517 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
8518 tocref = create_TOC_reference (modaddr, NULL_RTX);
8519 rtx modmem = gen_const_mem (Pmode, tocref);
8520 set_mem_alias_set (modmem, get_TOC_alias_set ());
8521
8522 rtx modreg = gen_reg_rtx (Pmode);
8523 emit_insn (gen_rtx_SET (modreg, modmem));
8524
8525 tmpreg = gen_reg_rtx (Pmode);
8526 emit_insn (gen_rtx_SET (tmpreg, mem));
8527
8528 dest = gen_reg_rtx (Pmode);
8529 if (TARGET_32BIT)
8530 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
8531 else
8532 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
8533 return dest;
8534 }
8535 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
8536 else if (TARGET_32BIT)
8537 {
8538 tlsreg = gen_reg_rtx (SImode);
8539 emit_insn (gen_tls_get_tpointer (tlsreg));
8540 }
8541 else
8542 tlsreg = gen_rtx_REG (DImode, 13);
8543
8544 /* Load the TOC value into temporary register. */
8545 tmpreg = gen_reg_rtx (Pmode);
8546 emit_insn (gen_rtx_SET (tmpreg, mem));
8547 set_unique_reg_note (get_last_insn (), REG_EQUAL,
8548 gen_rtx_MINUS (Pmode, addr, tlsreg));
8549
8550 /* Add TOC symbol value to TLS pointer. */
8551 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
8552
8553 return dest;
8554 }
8555
8556 /* Passes the tls arg value for global dynamic and local dynamic
8557 emit_library_call_value in rs6000_legitimize_tls_address to
8558 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
8559 marker relocs put on __tls_get_addr calls. */
8560 static rtx global_tlsarg;
8561
8562 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
8563 this (thread-local) address. */
8564
8565 static rtx
8566 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
8567 {
8568 rtx dest, insn;
8569
8570 if (TARGET_XCOFF)
8571 return rs6000_legitimize_tls_address_aix (addr, model);
8572
8573 dest = gen_reg_rtx (Pmode);
8574 if (model == TLS_MODEL_LOCAL_EXEC
8575 && (rs6000_tls_size == 16 || rs6000_pcrel_p (cfun)))
8576 {
8577 rtx tlsreg;
8578
8579 if (TARGET_64BIT)
8580 {
8581 tlsreg = gen_rtx_REG (Pmode, 13);
8582 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
8583 }
8584 else
8585 {
8586 tlsreg = gen_rtx_REG (Pmode, 2);
8587 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
8588 }
8589 emit_insn (insn);
8590 }
8591 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
8592 {
8593 rtx tlsreg, tmp;
8594
8595 tmp = gen_reg_rtx (Pmode);
8596 if (TARGET_64BIT)
8597 {
8598 tlsreg = gen_rtx_REG (Pmode, 13);
8599 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
8600 }
8601 else
8602 {
8603 tlsreg = gen_rtx_REG (Pmode, 2);
8604 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
8605 }
8606 emit_insn (insn);
8607 if (TARGET_64BIT)
8608 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
8609 else
8610 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
8611 emit_insn (insn);
8612 }
8613 else
8614 {
8615 rtx got, tga, tmp1, tmp2;
8616
8617 /* We currently use relocations like @got@tlsgd for tls, which
8618 means the linker will handle allocation of tls entries, placing
8619 them in the .got section. So use a pointer to the .got section,
8620 not one to secondary TOC sections used by 64-bit -mminimal-toc,
8621 or to secondary GOT sections used by 32-bit -fPIC. */
8622 if (rs6000_pcrel_p (cfun))
8623 got = const0_rtx;
8624 else if (TARGET_64BIT)
8625 got = gen_rtx_REG (Pmode, 2);
8626 else
8627 {
8628 if (flag_pic == 1)
8629 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
8630 else
8631 {
8632 rtx gsym = rs6000_got_sym ();
8633 got = gen_reg_rtx (Pmode);
8634 if (flag_pic == 0)
8635 rs6000_emit_move (got, gsym, Pmode);
8636 else
8637 {
8638 rtx mem, lab;
8639
8640 tmp1 = gen_reg_rtx (Pmode);
8641 tmp2 = gen_reg_rtx (Pmode);
8642 mem = gen_const_mem (Pmode, tmp1);
8643 lab = gen_label_rtx ();
8644 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
8645 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
8646 if (TARGET_LINK_STACK)
8647 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
8648 emit_move_insn (tmp2, mem);
8649 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
8650 set_unique_reg_note (last, REG_EQUAL, gsym);
8651 }
8652 }
8653 }
8654
8655 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
8656 {
8657 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
8658 UNSPEC_TLSGD);
8659 tga = rs6000_tls_get_addr ();
8660 rtx argreg = gen_rtx_REG (Pmode, 3);
8661 emit_insn (gen_rtx_SET (argreg, arg));
8662 global_tlsarg = arg;
8663 emit_library_call_value (tga, dest, LCT_CONST, Pmode, argreg, Pmode);
8664 global_tlsarg = NULL_RTX;
8665
8666 /* Make a note so that the result of this call can be CSEd. */
8667 rtvec vec = gen_rtvec (1, copy_rtx (arg));
8668 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
8669 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
8670 }
8671 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
8672 {
8673 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
8674 tga = rs6000_tls_get_addr ();
8675 tmp1 = gen_reg_rtx (Pmode);
8676 rtx argreg = gen_rtx_REG (Pmode, 3);
8677 emit_insn (gen_rtx_SET (argreg, arg));
8678 global_tlsarg = arg;
8679 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode, argreg, Pmode);
8680 global_tlsarg = NULL_RTX;
8681
8682 /* Make a note so that the result of this call can be CSEd. */
8683 rtvec vec = gen_rtvec (1, copy_rtx (arg));
8684 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
8685 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
8686
8687 if (rs6000_tls_size == 16 || rs6000_pcrel_p (cfun))
8688 {
8689 if (TARGET_64BIT)
8690 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
8691 else
8692 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
8693 }
8694 else if (rs6000_tls_size == 32)
8695 {
8696 tmp2 = gen_reg_rtx (Pmode);
8697 if (TARGET_64BIT)
8698 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
8699 else
8700 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
8701 emit_insn (insn);
8702 if (TARGET_64BIT)
8703 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
8704 else
8705 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
8706 }
8707 else
8708 {
8709 tmp2 = gen_reg_rtx (Pmode);
8710 if (TARGET_64BIT)
8711 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
8712 else
8713 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
8714 emit_insn (insn);
8715 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
8716 }
8717 emit_insn (insn);
8718 }
8719 else
8720 {
8721 /* IE, or 64-bit offset LE. */
8722 tmp2 = gen_reg_rtx (Pmode);
8723 if (TARGET_64BIT)
8724 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
8725 else
8726 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
8727 emit_insn (insn);
8728 if (rs6000_pcrel_p (cfun))
8729 {
8730 if (TARGET_64BIT)
8731 insn = gen_tls_tls_pcrel_64 (dest, tmp2, addr);
8732 else
8733 insn = gen_tls_tls_pcrel_32 (dest, tmp2, addr);
8734 }
8735 else if (TARGET_64BIT)
8736 insn = gen_tls_tls_64 (dest, tmp2, addr);
8737 else
8738 insn = gen_tls_tls_32 (dest, tmp2, addr);
8739 emit_insn (insn);
8740 }
8741 }
8742
8743 return dest;
8744 }
8745
8746 /* Only create the global variable for the stack protect guard if we are using
8747 the global flavor of that guard. */
8748 static tree
8749 rs6000_init_stack_protect_guard (void)
8750 {
8751 if (rs6000_stack_protector_guard == SSP_GLOBAL)
8752 return default_stack_protect_guard ();
8753
8754 return NULL_TREE;
8755 }
8756
8757 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8758
8759 static bool
8760 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8761 {
8762 if (GET_CODE (x) == HIGH
8763 && GET_CODE (XEXP (x, 0)) == UNSPEC)
8764 return true;
8765
8766 /* A TLS symbol in the TOC cannot contain a sum. */
8767 if (GET_CODE (x) == CONST
8768 && GET_CODE (XEXP (x, 0)) == PLUS
8769 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
8770 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
8771 return true;
8772
8773 /* Do not place an ELF TLS symbol in the constant pool. */
8774 return TARGET_ELF && tls_referenced_p (x);
8775 }
8776
8777 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
8778 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
8779 can be addressed relative to the toc pointer. */
8780
8781 static bool
8782 use_toc_relative_ref (rtx sym, machine_mode mode)
8783 {
8784 return ((constant_pool_expr_p (sym)
8785 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
8786 get_pool_mode (sym)))
8787 || (TARGET_CMODEL == CMODEL_MEDIUM
8788 && SYMBOL_REF_LOCAL_P (sym)
8789 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
8790 }
8791
8792 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
8793 that is a valid memory address for an instruction.
8794 The MODE argument is the machine mode for the MEM expression
8795 that wants to use this address.
8796
8797 On the RS/6000, there are four valid address: a SYMBOL_REF that
8798 refers to a constant pool entry of an address (or the sum of it
8799 plus a constant), a short (16-bit signed) constant plus a register,
8800 the sum of two registers, or a register indirect, possibly with an
8801 auto-increment. For DFmode, DDmode and DImode with a constant plus
8802 register, we must ensure that both words are addressable or PowerPC64
8803 with offset word aligned.
8804
8805 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
8806 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
8807 because adjacent memory cells are accessed by adding word-sized offsets
8808 during assembly output. */
8809 static bool
8810 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
8811 {
8812 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
8813 bool quad_offset_p = mode_supports_dq_form (mode);
8814
8815 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
8816 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
8817 && GET_CODE (x) == AND
8818 && CONST_INT_P (XEXP (x, 1))
8819 && INTVAL (XEXP (x, 1)) == -16)
8820 x = XEXP (x, 0);
8821
8822 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
8823 return 0;
8824 if (legitimate_indirect_address_p (x, reg_ok_strict))
8825 return 1;
8826 if (TARGET_UPDATE
8827 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
8828 && mode_supports_pre_incdec_p (mode)
8829 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
8830 return 1;
8831
8832 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
8833 if (address_is_prefixed (x, mode, NON_PREFIXED_DEFAULT))
8834 return 1;
8835
8836 /* Handle restricted vector d-form offsets in ISA 3.0. */
8837 if (quad_offset_p)
8838 {
8839 if (quad_address_p (x, mode, reg_ok_strict))
8840 return 1;
8841 }
8842 else if (virtual_stack_registers_memory_p (x))
8843 return 1;
8844
8845 else if (reg_offset_p)
8846 {
8847 if (legitimate_small_data_p (mode, x))
8848 return 1;
8849 if (legitimate_constant_pool_address_p (x, mode,
8850 reg_ok_strict || lra_in_progress))
8851 return 1;
8852 }
8853
8854 /* For TImode, if we have TImode in VSX registers, only allow register
8855 indirect addresses. This will allow the values to go in either GPRs
8856 or VSX registers without reloading. The vector types would tend to
8857 go into VSX registers, so we allow REG+REG, while TImode seems
8858 somewhat split, in that some uses are GPR based, and some VSX based. */
8859 /* FIXME: We could loosen this by changing the following to
8860 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
8861 but currently we cannot allow REG+REG addressing for TImode. See
8862 PR72827 for complete details on how this ends up hoodwinking DSE. */
8863 if (mode == TImode && TARGET_VSX)
8864 return 0;
8865 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
8866 if (! reg_ok_strict
8867 && reg_offset_p
8868 && GET_CODE (x) == PLUS
8869 && REG_P (XEXP (x, 0))
8870 && (XEXP (x, 0) == virtual_stack_vars_rtx
8871 || XEXP (x, 0) == arg_pointer_rtx)
8872 && CONST_INT_P (XEXP (x, 1)))
8873 return 1;
8874 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
8875 return 1;
8876 if (!FLOAT128_2REG_P (mode)
8877 && (TARGET_HARD_FLOAT
8878 || TARGET_POWERPC64
8879 || (mode != DFmode && mode != DDmode))
8880 && (TARGET_POWERPC64 || mode != DImode)
8881 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
8882 && mode != PTImode
8883 && !avoiding_indexed_address_p (mode)
8884 && legitimate_indexed_address_p (x, reg_ok_strict))
8885 return 1;
8886 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
8887 && mode_supports_pre_modify_p (mode)
8888 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
8889 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
8890 reg_ok_strict, false)
8891 || (!avoiding_indexed_address_p (mode)
8892 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
8893 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8894 {
8895 /* There is no prefixed version of the load/store with update. */
8896 rtx addr = XEXP (x, 1);
8897 return !address_is_prefixed (addr, mode, NON_PREFIXED_DEFAULT);
8898 }
8899 if (reg_offset_p && !quad_offset_p
8900 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
8901 return 1;
8902 return 0;
8903 }
8904
8905 /* Debug version of rs6000_legitimate_address_p. */
8906 static bool
8907 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
8908 bool reg_ok_strict)
8909 {
8910 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
8911 fprintf (stderr,
8912 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
8913 "strict = %d, reload = %s, code = %s\n",
8914 ret ? "true" : "false",
8915 GET_MODE_NAME (mode),
8916 reg_ok_strict,
8917 (reload_completed ? "after" : "before"),
8918 GET_RTX_NAME (GET_CODE (x)));
8919 debug_rtx (x);
8920
8921 return ret;
8922 }
8923
8924 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
8925
8926 static bool
8927 rs6000_mode_dependent_address_p (const_rtx addr,
8928 addr_space_t as ATTRIBUTE_UNUSED)
8929 {
8930 return rs6000_mode_dependent_address_ptr (addr);
8931 }
8932
8933 /* Go to LABEL if ADDR (a legitimate address expression)
8934 has an effect that depends on the machine mode it is used for.
8935
8936 On the RS/6000 this is true of all integral offsets (since AltiVec
8937 and VSX modes don't allow them) or is a pre-increment or decrement.
8938
8939 ??? Except that due to conceptual problems in offsettable_address_p
8940 we can't really report the problems of integral offsets. So leave
8941 this assuming that the adjustable offset must be valid for the
8942 sub-words of a TFmode operand, which is what we had before. */
8943
8944 static bool
8945 rs6000_mode_dependent_address (const_rtx addr)
8946 {
8947 switch (GET_CODE (addr))
8948 {
8949 case PLUS:
8950 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
8951 is considered a legitimate address before reload, so there
8952 are no offset restrictions in that case. Note that this
8953 condition is safe in strict mode because any address involving
8954 virtual_stack_vars_rtx or arg_pointer_rtx would already have
8955 been rejected as illegitimate. */
8956 if (XEXP (addr, 0) != virtual_stack_vars_rtx
8957 && XEXP (addr, 0) != arg_pointer_rtx
8958 && CONST_INT_P (XEXP (addr, 1)))
8959 {
8960 HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
8961 HOST_WIDE_INT extra = TARGET_POWERPC64 ? 8 : 12;
8962 if (TARGET_PREFIXED)
8963 return !SIGNED_34BIT_OFFSET_EXTRA_P (val, extra);
8964 else
8965 return !SIGNED_16BIT_OFFSET_EXTRA_P (val, extra);
8966 }
8967 break;
8968
8969 case LO_SUM:
8970 /* Anything in the constant pool is sufficiently aligned that
8971 all bytes have the same high part address. */
8972 return !legitimate_constant_pool_address_p (addr, QImode, false);
8973
8974 /* Auto-increment cases are now treated generically in recog.c. */
8975 case PRE_MODIFY:
8976 return TARGET_UPDATE;
8977
8978 /* AND is only allowed in Altivec loads. */
8979 case AND:
8980 return true;
8981
8982 default:
8983 break;
8984 }
8985
8986 return false;
8987 }
8988
8989 /* Debug version of rs6000_mode_dependent_address. */
8990 static bool
8991 rs6000_debug_mode_dependent_address (const_rtx addr)
8992 {
8993 bool ret = rs6000_mode_dependent_address (addr);
8994
8995 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
8996 ret ? "true" : "false");
8997 debug_rtx (addr);
8998
8999 return ret;
9000 }
9001
9002 /* Implement FIND_BASE_TERM. */
9003
9004 rtx
9005 rs6000_find_base_term (rtx op)
9006 {
9007 rtx base;
9008
9009 base = op;
9010 if (GET_CODE (base) == CONST)
9011 base = XEXP (base, 0);
9012 if (GET_CODE (base) == PLUS)
9013 base = XEXP (base, 0);
9014 if (GET_CODE (base) == UNSPEC)
9015 switch (XINT (base, 1))
9016 {
9017 case UNSPEC_TOCREL:
9018 case UNSPEC_MACHOPIC_OFFSET:
9019 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
9020 for aliasing purposes. */
9021 return XVECEXP (base, 0, 0);
9022 }
9023
9024 return op;
9025 }
9026
9027 /* More elaborate version of recog's offsettable_memref_p predicate
9028 that works around the ??? note of rs6000_mode_dependent_address.
9029 In particular it accepts
9030
9031 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9032
9033 in 32-bit mode, that the recog predicate rejects. */
9034
9035 static bool
9036 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
9037 {
9038 bool worst_case;
9039
9040 if (!MEM_P (op))
9041 return false;
9042
9043 /* First mimic offsettable_memref_p. */
9044 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
9045 return true;
9046
9047 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
9048 the latter predicate knows nothing about the mode of the memory
9049 reference and, therefore, assumes that it is the largest supported
9050 mode (TFmode). As a consequence, legitimate offsettable memory
9051 references are rejected. rs6000_legitimate_offset_address_p contains
9052 the correct logic for the PLUS case of rs6000_mode_dependent_address,
9053 at least with a little bit of help here given that we know the
9054 actual registers used. */
9055 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
9056 || GET_MODE_SIZE (reg_mode) == 4);
9057 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
9058 strict, worst_case);
9059 }
9060
9061 /* Determine the reassociation width to be used in reassociate_bb.
9062 This takes into account how many parallel operations we
9063 can actually do of a given type, and also the latency.
9064 P8:
9065 int add/sub 6/cycle
9066 mul 2/cycle
9067 vect add/sub/mul 2/cycle
9068 fp add/sub/mul 2/cycle
9069 dfp 1/cycle
9070 */
9071
9072 static int
9073 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
9074 machine_mode mode)
9075 {
9076 switch (rs6000_tune)
9077 {
9078 case PROCESSOR_POWER8:
9079 case PROCESSOR_POWER9:
9080 case PROCESSOR_FUTURE:
9081 if (DECIMAL_FLOAT_MODE_P (mode))
9082 return 1;
9083 if (VECTOR_MODE_P (mode))
9084 return 4;
9085 if (INTEGRAL_MODE_P (mode))
9086 return 1;
9087 if (FLOAT_MODE_P (mode))
9088 return 4;
9089 break;
9090 default:
9091 break;
9092 }
9093 return 1;
9094 }
9095
9096 /* Change register usage conditional on target flags. */
9097 static void
9098 rs6000_conditional_register_usage (void)
9099 {
9100 int i;
9101
9102 if (TARGET_DEBUG_TARGET)
9103 fprintf (stderr, "rs6000_conditional_register_usage called\n");
9104
9105 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
9106 if (TARGET_64BIT)
9107 fixed_regs[13] = call_used_regs[13] = 1;
9108
9109 /* Conditionally disable FPRs. */
9110 if (TARGET_SOFT_FLOAT)
9111 for (i = 32; i < 64; i++)
9112 fixed_regs[i] = call_used_regs[i] = 1;
9113
9114 /* The TOC register is not killed across calls in a way that is
9115 visible to the compiler. */
9116 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9117 call_used_regs[2] = 0;
9118
9119 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
9120 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9121
9122 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
9123 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9124 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9125
9126 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
9127 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9128 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9129
9130 if (TARGET_TOC && TARGET_MINIMAL_TOC)
9131 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9132
9133 if (!TARGET_ALTIVEC && !TARGET_VSX)
9134 {
9135 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
9136 fixed_regs[i] = call_used_regs[i] = 1;
9137 call_used_regs[VRSAVE_REGNO] = 1;
9138 }
9139
9140 if (TARGET_ALTIVEC || TARGET_VSX)
9141 global_regs[VSCR_REGNO] = 1;
9142
9143 if (TARGET_ALTIVEC_ABI)
9144 {
9145 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
9146 call_used_regs[i] = 1;
9147
9148 /* AIX reserves VR20:31 in non-extended ABI mode. */
9149 if (TARGET_XCOFF)
9150 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
9151 fixed_regs[i] = call_used_regs[i] = 1;
9152 }
9153 }
9154
9155 \f
9156 /* Output insns to set DEST equal to the constant SOURCE as a series of
9157 lis, ori and shl instructions and return TRUE. */
9158
9159 bool
9160 rs6000_emit_set_const (rtx dest, rtx source)
9161 {
9162 machine_mode mode = GET_MODE (dest);
9163 rtx temp, set;
9164 rtx_insn *insn;
9165 HOST_WIDE_INT c;
9166
9167 gcc_checking_assert (CONST_INT_P (source));
9168 c = INTVAL (source);
9169 switch (mode)
9170 {
9171 case E_QImode:
9172 case E_HImode:
9173 emit_insn (gen_rtx_SET (dest, source));
9174 return true;
9175
9176 case E_SImode:
9177 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
9178
9179 emit_insn (gen_rtx_SET (copy_rtx (temp),
9180 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
9181 emit_insn (gen_rtx_SET (dest,
9182 gen_rtx_IOR (SImode, copy_rtx (temp),
9183 GEN_INT (c & 0xffff))));
9184 break;
9185
9186 case E_DImode:
9187 if (!TARGET_POWERPC64)
9188 {
9189 rtx hi, lo;
9190
9191 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
9192 DImode);
9193 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
9194 DImode);
9195 emit_move_insn (hi, GEN_INT (c >> 32));
9196 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
9197 emit_move_insn (lo, GEN_INT (c));
9198 }
9199 else
9200 rs6000_emit_set_long_const (dest, c);
9201 break;
9202
9203 default:
9204 gcc_unreachable ();
9205 }
9206
9207 insn = get_last_insn ();
9208 set = single_set (insn);
9209 if (! CONSTANT_P (SET_SRC (set)))
9210 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
9211
9212 return true;
9213 }
9214
9215 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
9216 Output insns to set DEST equal to the constant C as a series of
9217 lis, ori and shl instructions. */
9218
9219 static void
9220 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
9221 {
9222 rtx temp;
9223 HOST_WIDE_INT ud1, ud2, ud3, ud4;
9224
9225 ud1 = c & 0xffff;
9226 c = c >> 16;
9227 ud2 = c & 0xffff;
9228 c = c >> 16;
9229 ud3 = c & 0xffff;
9230 c = c >> 16;
9231 ud4 = c & 0xffff;
9232
9233 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
9234 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
9235 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
9236
9237 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
9238 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
9239 {
9240 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9241
9242 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9243 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9244 if (ud1 != 0)
9245 emit_move_insn (dest,
9246 gen_rtx_IOR (DImode, copy_rtx (temp),
9247 GEN_INT (ud1)));
9248 }
9249 else if (ud3 == 0 && ud4 == 0)
9250 {
9251 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9252
9253 gcc_assert (ud2 & 0x8000);
9254 emit_move_insn (copy_rtx (temp),
9255 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9256 if (ud1 != 0)
9257 emit_move_insn (copy_rtx (temp),
9258 gen_rtx_IOR (DImode, copy_rtx (temp),
9259 GEN_INT (ud1)));
9260 emit_move_insn (dest,
9261 gen_rtx_ZERO_EXTEND (DImode,
9262 gen_lowpart (SImode,
9263 copy_rtx (temp))));
9264 }
9265 else if (ud1 == ud3 && ud2 == ud4)
9266 {
9267 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9268 HOST_WIDE_INT num = (ud2 << 16) | ud1;
9269 rs6000_emit_set_long_const (temp, (num ^ 0x80000000) - 0x80000000);
9270 rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff));
9271 rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32));
9272 emit_move_insn (dest, gen_rtx_IOR (DImode, one, two));
9273 }
9274 else if ((ud4 == 0xffff && (ud3 & 0x8000))
9275 || (ud4 == 0 && ! (ud3 & 0x8000)))
9276 {
9277 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9278
9279 emit_move_insn (copy_rtx (temp),
9280 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
9281 if (ud2 != 0)
9282 emit_move_insn (copy_rtx (temp),
9283 gen_rtx_IOR (DImode, copy_rtx (temp),
9284 GEN_INT (ud2)));
9285 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9286 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9287 GEN_INT (16)));
9288 if (ud1 != 0)
9289 emit_move_insn (dest,
9290 gen_rtx_IOR (DImode, copy_rtx (temp),
9291 GEN_INT (ud1)));
9292 }
9293 else
9294 {
9295 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9296
9297 emit_move_insn (copy_rtx (temp),
9298 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
9299 if (ud3 != 0)
9300 emit_move_insn (copy_rtx (temp),
9301 gen_rtx_IOR (DImode, copy_rtx (temp),
9302 GEN_INT (ud3)));
9303
9304 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
9305 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9306 GEN_INT (32)));
9307 if (ud2 != 0)
9308 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9309 gen_rtx_IOR (DImode, copy_rtx (temp),
9310 GEN_INT (ud2 << 16)));
9311 if (ud1 != 0)
9312 emit_move_insn (dest,
9313 gen_rtx_IOR (DImode, copy_rtx (temp),
9314 GEN_INT (ud1)));
9315 }
9316 }
9317
9318 /* Helper for the following. Get rid of [r+r] memory refs
9319 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
9320
9321 static void
9322 rs6000_eliminate_indexed_memrefs (rtx operands[2])
9323 {
9324 if (MEM_P (operands[0])
9325 && !REG_P (XEXP (operands[0], 0))
9326 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
9327 GET_MODE (operands[0]), false))
9328 operands[0]
9329 = replace_equiv_address (operands[0],
9330 copy_addr_to_reg (XEXP (operands[0], 0)));
9331
9332 if (MEM_P (operands[1])
9333 && !REG_P (XEXP (operands[1], 0))
9334 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
9335 GET_MODE (operands[1]), false))
9336 operands[1]
9337 = replace_equiv_address (operands[1],
9338 copy_addr_to_reg (XEXP (operands[1], 0)));
9339 }
9340
9341 /* Generate a vector of constants to permute MODE for a little-endian
9342 storage operation by swapping the two halves of a vector. */
9343 static rtvec
9344 rs6000_const_vec (machine_mode mode)
9345 {
9346 int i, subparts;
9347 rtvec v;
9348
9349 switch (mode)
9350 {
9351 case E_V1TImode:
9352 subparts = 1;
9353 break;
9354 case E_V2DFmode:
9355 case E_V2DImode:
9356 subparts = 2;
9357 break;
9358 case E_V4SFmode:
9359 case E_V4SImode:
9360 subparts = 4;
9361 break;
9362 case E_V8HImode:
9363 subparts = 8;
9364 break;
9365 case E_V16QImode:
9366 subparts = 16;
9367 break;
9368 default:
9369 gcc_unreachable();
9370 }
9371
9372 v = rtvec_alloc (subparts);
9373
9374 for (i = 0; i < subparts / 2; ++i)
9375 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
9376 for (i = subparts / 2; i < subparts; ++i)
9377 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
9378
9379 return v;
9380 }
9381
9382 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
9383 store operation. */
9384 void
9385 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
9386 {
9387 /* Scalar permutations are easier to express in integer modes rather than
9388 floating-point modes, so cast them here. We use V1TImode instead
9389 of TImode to ensure that the values don't go through GPRs. */
9390 if (FLOAT128_VECTOR_P (mode))
9391 {
9392 dest = gen_lowpart (V1TImode, dest);
9393 source = gen_lowpart (V1TImode, source);
9394 mode = V1TImode;
9395 }
9396
9397 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
9398 scalar. */
9399 if (mode == TImode || mode == V1TImode)
9400 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
9401 GEN_INT (64))));
9402 else
9403 {
9404 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
9405 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
9406 }
9407 }
9408
9409 /* Emit a little-endian load from vector memory location SOURCE to VSX
9410 register DEST in mode MODE. The load is done with two permuting
9411 insn's that represent an lxvd2x and xxpermdi. */
9412 void
9413 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
9414 {
9415 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
9416 V1TImode). */
9417 if (mode == TImode || mode == V1TImode)
9418 {
9419 mode = V2DImode;
9420 dest = gen_lowpart (V2DImode, dest);
9421 source = adjust_address (source, V2DImode, 0);
9422 }
9423
9424 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
9425 rs6000_emit_le_vsx_permute (tmp, source, mode);
9426 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9427 }
9428
9429 /* Emit a little-endian store to vector memory location DEST from VSX
9430 register SOURCE in mode MODE. The store is done with two permuting
9431 insn's that represent an xxpermdi and an stxvd2x. */
9432 void
9433 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
9434 {
9435 /* This should never be called during or after LRA, because it does
9436 not re-permute the source register. It is intended only for use
9437 during expand. */
9438 gcc_assert (!lra_in_progress && !reload_completed);
9439
9440 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
9441 V1TImode). */
9442 if (mode == TImode || mode == V1TImode)
9443 {
9444 mode = V2DImode;
9445 dest = adjust_address (dest, V2DImode, 0);
9446 source = gen_lowpart (V2DImode, source);
9447 }
9448
9449 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
9450 rs6000_emit_le_vsx_permute (tmp, source, mode);
9451 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9452 }
9453
9454 /* Emit a sequence representing a little-endian VSX load or store,
9455 moving data from SOURCE to DEST in mode MODE. This is done
9456 separately from rs6000_emit_move to ensure it is called only
9457 during expand. LE VSX loads and stores introduced later are
9458 handled with a split. The expand-time RTL generation allows
9459 us to optimize away redundant pairs of register-permutes. */
9460 void
9461 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
9462 {
9463 gcc_assert (!BYTES_BIG_ENDIAN
9464 && VECTOR_MEM_VSX_P (mode)
9465 && !TARGET_P9_VECTOR
9466 && !gpr_or_gpr_p (dest, source)
9467 && (MEM_P (source) ^ MEM_P (dest)));
9468
9469 if (MEM_P (source))
9470 {
9471 gcc_assert (REG_P (dest) || SUBREG_P (dest));
9472 rs6000_emit_le_vsx_load (dest, source, mode);
9473 }
9474 else
9475 {
9476 if (!REG_P (source))
9477 source = force_reg (mode, source);
9478 rs6000_emit_le_vsx_store (dest, source, mode);
9479 }
9480 }
9481
9482 /* Return whether a SFmode or SImode move can be done without converting one
9483 mode to another. This arrises when we have:
9484
9485 (SUBREG:SF (REG:SI ...))
9486 (SUBREG:SI (REG:SF ...))
9487
9488 and one of the values is in a floating point/vector register, where SFmode
9489 scalars are stored in DFmode format. */
9490
9491 bool
9492 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
9493 {
9494 if (TARGET_ALLOW_SF_SUBREG)
9495 return true;
9496
9497 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
9498 return true;
9499
9500 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
9501 return true;
9502
9503 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
9504 if (SUBREG_P (dest))
9505 {
9506 rtx dest_subreg = SUBREG_REG (dest);
9507 rtx src_subreg = SUBREG_REG (src);
9508 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
9509 }
9510
9511 return false;
9512 }
9513
9514
9515 /* Helper function to change moves with:
9516
9517 (SUBREG:SF (REG:SI)) and
9518 (SUBREG:SI (REG:SF))
9519
9520 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
9521 values are stored as DFmode values in the VSX registers. We need to convert
9522 the bits before we can use a direct move or operate on the bits in the
9523 vector register as an integer type.
9524
9525 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
9526
9527 static bool
9528 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
9529 {
9530 if (TARGET_DIRECT_MOVE_64BIT && !reload_completed
9531 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
9532 && SUBREG_P (source) && sf_subreg_operand (source, mode))
9533 {
9534 rtx inner_source = SUBREG_REG (source);
9535 machine_mode inner_mode = GET_MODE (inner_source);
9536
9537 if (mode == SImode && inner_mode == SFmode)
9538 {
9539 emit_insn (gen_movsi_from_sf (dest, inner_source));
9540 return true;
9541 }
9542
9543 if (mode == SFmode && inner_mode == SImode)
9544 {
9545 emit_insn (gen_movsf_from_si (dest, inner_source));
9546 return true;
9547 }
9548 }
9549
9550 return false;
9551 }
9552
9553 /* Emit a move from SOURCE to DEST in mode MODE. */
9554 void
9555 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
9556 {
9557 rtx operands[2];
9558 operands[0] = dest;
9559 operands[1] = source;
9560
9561 if (TARGET_DEBUG_ADDR)
9562 {
9563 fprintf (stderr,
9564 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
9565 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
9566 GET_MODE_NAME (mode),
9567 lra_in_progress,
9568 reload_completed,
9569 can_create_pseudo_p ());
9570 debug_rtx (dest);
9571 fprintf (stderr, "source:\n");
9572 debug_rtx (source);
9573 }
9574
9575 /* Check that we get CONST_WIDE_INT only when we should. */
9576 if (CONST_WIDE_INT_P (operands[1])
9577 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
9578 gcc_unreachable ();
9579
9580 #ifdef HAVE_AS_GNU_ATTRIBUTE
9581 /* If we use a long double type, set the flags in .gnu_attribute that say
9582 what the long double type is. This is to allow the linker's warning
9583 message for the wrong long double to be useful, even if the function does
9584 not do a call (for example, doing a 128-bit add on power9 if the long
9585 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
9586 used if they aren't the default long dobule type. */
9587 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
9588 {
9589 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
9590 rs6000_passes_float = rs6000_passes_long_double = true;
9591
9592 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
9593 rs6000_passes_float = rs6000_passes_long_double = true;
9594 }
9595 #endif
9596
9597 /* See if we need to special case SImode/SFmode SUBREG moves. */
9598 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
9599 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
9600 return;
9601
9602 /* Check if GCC is setting up a block move that will end up using FP
9603 registers as temporaries. We must make sure this is acceptable. */
9604 if (MEM_P (operands[0])
9605 && MEM_P (operands[1])
9606 && mode == DImode
9607 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
9608 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
9609 && ! (rs6000_slow_unaligned_access (SImode,
9610 (MEM_ALIGN (operands[0]) > 32
9611 ? 32 : MEM_ALIGN (operands[0])))
9612 || rs6000_slow_unaligned_access (SImode,
9613 (MEM_ALIGN (operands[1]) > 32
9614 ? 32 : MEM_ALIGN (operands[1]))))
9615 && ! MEM_VOLATILE_P (operands [0])
9616 && ! MEM_VOLATILE_P (operands [1]))
9617 {
9618 emit_move_insn (adjust_address (operands[0], SImode, 0),
9619 adjust_address (operands[1], SImode, 0));
9620 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
9621 adjust_address (copy_rtx (operands[1]), SImode, 4));
9622 return;
9623 }
9624
9625 if (can_create_pseudo_p () && MEM_P (operands[0])
9626 && !gpc_reg_operand (operands[1], mode))
9627 operands[1] = force_reg (mode, operands[1]);
9628
9629 /* Recognize the case where operand[1] is a reference to thread-local
9630 data and load its address to a register. */
9631 if (tls_referenced_p (operands[1]))
9632 {
9633 enum tls_model model;
9634 rtx tmp = operands[1];
9635 rtx addend = NULL;
9636
9637 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
9638 {
9639 addend = XEXP (XEXP (tmp, 0), 1);
9640 tmp = XEXP (XEXP (tmp, 0), 0);
9641 }
9642
9643 gcc_assert (SYMBOL_REF_P (tmp));
9644 model = SYMBOL_REF_TLS_MODEL (tmp);
9645 gcc_assert (model != 0);
9646
9647 tmp = rs6000_legitimize_tls_address (tmp, model);
9648 if (addend)
9649 {
9650 tmp = gen_rtx_PLUS (mode, tmp, addend);
9651 tmp = force_operand (tmp, operands[0]);
9652 }
9653 operands[1] = tmp;
9654 }
9655
9656 /* 128-bit constant floating-point values on Darwin should really be loaded
9657 as two parts. However, this premature splitting is a problem when DFmode
9658 values can go into Altivec registers. */
9659 if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode)
9660 && !reg_addr[DFmode].scalar_in_vmx_p)
9661 {
9662 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
9663 simplify_gen_subreg (DFmode, operands[1], mode, 0),
9664 DFmode);
9665 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
9666 GET_MODE_SIZE (DFmode)),
9667 simplify_gen_subreg (DFmode, operands[1], mode,
9668 GET_MODE_SIZE (DFmode)),
9669 DFmode);
9670 return;
9671 }
9672
9673 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
9674 p1:SD) if p1 is not of floating point class and p0 is spilled as
9675 we can have no analogous movsd_store for this. */
9676 if (lra_in_progress && mode == DDmode
9677 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
9678 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9679 && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))
9680 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
9681 {
9682 enum reg_class cl;
9683 int regno = REGNO (SUBREG_REG (operands[1]));
9684
9685 if (!HARD_REGISTER_NUM_P (regno))
9686 {
9687 cl = reg_preferred_class (regno);
9688 regno = reg_renumber[regno];
9689 if (regno < 0)
9690 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
9691 }
9692 if (regno >= 0 && ! FP_REGNO_P (regno))
9693 {
9694 mode = SDmode;
9695 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
9696 operands[1] = SUBREG_REG (operands[1]);
9697 }
9698 }
9699 if (lra_in_progress
9700 && mode == SDmode
9701 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
9702 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9703 && (REG_P (operands[1])
9704 || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])))))
9705 {
9706 int regno = reg_or_subregno (operands[1]);
9707 enum reg_class cl;
9708
9709 if (!HARD_REGISTER_NUM_P (regno))
9710 {
9711 cl = reg_preferred_class (regno);
9712 gcc_assert (cl != NO_REGS);
9713 regno = reg_renumber[regno];
9714 if (regno < 0)
9715 regno = ira_class_hard_regs[cl][0];
9716 }
9717 if (FP_REGNO_P (regno))
9718 {
9719 if (GET_MODE (operands[0]) != DDmode)
9720 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
9721 emit_insn (gen_movsd_store (operands[0], operands[1]));
9722 }
9723 else if (INT_REGNO_P (regno))
9724 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9725 else
9726 gcc_unreachable();
9727 return;
9728 }
9729 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
9730 p:DD)) if p0 is not of floating point class and p1 is spilled as
9731 we can have no analogous movsd_load for this. */
9732 if (lra_in_progress && mode == DDmode
9733 && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))
9734 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
9735 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
9736 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9737 {
9738 enum reg_class cl;
9739 int regno = REGNO (SUBREG_REG (operands[0]));
9740
9741 if (!HARD_REGISTER_NUM_P (regno))
9742 {
9743 cl = reg_preferred_class (regno);
9744 regno = reg_renumber[regno];
9745 if (regno < 0)
9746 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
9747 }
9748 if (regno >= 0 && ! FP_REGNO_P (regno))
9749 {
9750 mode = SDmode;
9751 operands[0] = SUBREG_REG (operands[0]);
9752 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
9753 }
9754 }
9755 if (lra_in_progress
9756 && mode == SDmode
9757 && (REG_P (operands[0])
9758 || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))))
9759 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
9760 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9761 {
9762 int regno = reg_or_subregno (operands[0]);
9763 enum reg_class cl;
9764
9765 if (!HARD_REGISTER_NUM_P (regno))
9766 {
9767 cl = reg_preferred_class (regno);
9768 gcc_assert (cl != NO_REGS);
9769 regno = reg_renumber[regno];
9770 if (regno < 0)
9771 regno = ira_class_hard_regs[cl][0];
9772 }
9773 if (FP_REGNO_P (regno))
9774 {
9775 if (GET_MODE (operands[1]) != DDmode)
9776 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
9777 emit_insn (gen_movsd_load (operands[0], operands[1]));
9778 }
9779 else if (INT_REGNO_P (regno))
9780 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9781 else
9782 gcc_unreachable();
9783 return;
9784 }
9785
9786 /* FIXME: In the long term, this switch statement should go away
9787 and be replaced by a sequence of tests based on things like
9788 mode == Pmode. */
9789 switch (mode)
9790 {
9791 case E_HImode:
9792 case E_QImode:
9793 if (CONSTANT_P (operands[1])
9794 && !CONST_INT_P (operands[1]))
9795 operands[1] = force_const_mem (mode, operands[1]);
9796 break;
9797
9798 case E_TFmode:
9799 case E_TDmode:
9800 case E_IFmode:
9801 case E_KFmode:
9802 if (FLOAT128_2REG_P (mode))
9803 rs6000_eliminate_indexed_memrefs (operands);
9804 /* fall through */
9805
9806 case E_DFmode:
9807 case E_DDmode:
9808 case E_SFmode:
9809 case E_SDmode:
9810 if (CONSTANT_P (operands[1])
9811 && ! easy_fp_constant (operands[1], mode))
9812 operands[1] = force_const_mem (mode, operands[1]);
9813 break;
9814
9815 case E_V16QImode:
9816 case E_V8HImode:
9817 case E_V4SFmode:
9818 case E_V4SImode:
9819 case E_V2DFmode:
9820 case E_V2DImode:
9821 case E_V1TImode:
9822 if (CONSTANT_P (operands[1])
9823 && !easy_vector_constant (operands[1], mode))
9824 operands[1] = force_const_mem (mode, operands[1]);
9825 break;
9826
9827 case E_SImode:
9828 case E_DImode:
9829 /* Use default pattern for address of ELF small data */
9830 if (TARGET_ELF
9831 && mode == Pmode
9832 && DEFAULT_ABI == ABI_V4
9833 && (SYMBOL_REF_P (operands[1])
9834 || GET_CODE (operands[1]) == CONST)
9835 && small_data_operand (operands[1], mode))
9836 {
9837 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9838 return;
9839 }
9840
9841 /* Use the default pattern for loading up PC-relative addresses. */
9842 if (TARGET_PCREL && mode == Pmode
9843 && pcrel_local_or_external_address (operands[1], Pmode))
9844 {
9845 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9846 return;
9847 }
9848
9849 if (DEFAULT_ABI == ABI_V4
9850 && mode == Pmode && mode == SImode
9851 && flag_pic == 1 && got_operand (operands[1], mode))
9852 {
9853 emit_insn (gen_movsi_got (operands[0], operands[1]));
9854 return;
9855 }
9856
9857 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
9858 && TARGET_NO_TOC_OR_PCREL
9859 && ! flag_pic
9860 && mode == Pmode
9861 && CONSTANT_P (operands[1])
9862 && GET_CODE (operands[1]) != HIGH
9863 && !CONST_INT_P (operands[1]))
9864 {
9865 rtx target = (!can_create_pseudo_p ()
9866 ? operands[0]
9867 : gen_reg_rtx (mode));
9868
9869 /* If this is a function address on -mcall-aixdesc,
9870 convert it to the address of the descriptor. */
9871 if (DEFAULT_ABI == ABI_AIX
9872 && SYMBOL_REF_P (operands[1])
9873 && XSTR (operands[1], 0)[0] == '.')
9874 {
9875 const char *name = XSTR (operands[1], 0);
9876 rtx new_ref;
9877 while (*name == '.')
9878 name++;
9879 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
9880 CONSTANT_POOL_ADDRESS_P (new_ref)
9881 = CONSTANT_POOL_ADDRESS_P (operands[1]);
9882 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
9883 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
9884 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
9885 operands[1] = new_ref;
9886 }
9887
9888 if (DEFAULT_ABI == ABI_DARWIN)
9889 {
9890 #if TARGET_MACHO
9891 /* This is not PIC code, but could require the subset of
9892 indirections used by mdynamic-no-pic. */
9893 if (MACHO_DYNAMIC_NO_PIC_P)
9894 {
9895 /* Take care of any required data indirection. */
9896 operands[1] = rs6000_machopic_legitimize_pic_address (
9897 operands[1], mode, operands[0]);
9898 if (operands[0] != operands[1])
9899 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9900 return;
9901 }
9902 #endif
9903 emit_insn (gen_macho_high (Pmode, target, operands[1]));
9904 emit_insn (gen_macho_low (Pmode, operands[0],
9905 target, operands[1]));
9906 return;
9907 }
9908
9909 emit_insn (gen_elf_high (target, operands[1]));
9910 emit_insn (gen_elf_low (operands[0], target, operands[1]));
9911 return;
9912 }
9913
9914 /* If this is a SYMBOL_REF that refers to a constant pool entry,
9915 and we have put it in the TOC, we just need to make a TOC-relative
9916 reference to it. */
9917 if (TARGET_TOC
9918 && SYMBOL_REF_P (operands[1])
9919 && use_toc_relative_ref (operands[1], mode))
9920 operands[1] = create_TOC_reference (operands[1], operands[0]);
9921 else if (mode == Pmode
9922 && CONSTANT_P (operands[1])
9923 && GET_CODE (operands[1]) != HIGH
9924 && ((REG_P (operands[0])
9925 && FP_REGNO_P (REGNO (operands[0])))
9926 || !CONST_INT_P (operands[1])
9927 || (num_insns_constant (operands[1], mode)
9928 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
9929 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
9930 && (TARGET_CMODEL == CMODEL_SMALL
9931 || can_create_pseudo_p ()
9932 || (REG_P (operands[0])
9933 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
9934 {
9935
9936 #if TARGET_MACHO
9937 /* Darwin uses a special PIC legitimizer. */
9938 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
9939 {
9940 operands[1] =
9941 rs6000_machopic_legitimize_pic_address (operands[1], mode,
9942 operands[0]);
9943 if (operands[0] != operands[1])
9944 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9945 return;
9946 }
9947 #endif
9948
9949 /* If we are to limit the number of things we put in the TOC and
9950 this is a symbol plus a constant we can add in one insn,
9951 just put the symbol in the TOC and add the constant. */
9952 if (GET_CODE (operands[1]) == CONST
9953 && TARGET_NO_SUM_IN_TOC
9954 && GET_CODE (XEXP (operands[1], 0)) == PLUS
9955 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
9956 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
9957 || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0)))
9958 && ! side_effects_p (operands[0]))
9959 {
9960 rtx sym =
9961 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
9962 rtx other = XEXP (XEXP (operands[1], 0), 1);
9963
9964 sym = force_reg (mode, sym);
9965 emit_insn (gen_add3_insn (operands[0], sym, other));
9966 return;
9967 }
9968
9969 operands[1] = force_const_mem (mode, operands[1]);
9970
9971 if (TARGET_TOC
9972 && SYMBOL_REF_P (XEXP (operands[1], 0))
9973 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
9974 {
9975 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
9976 operands[0]);
9977 operands[1] = gen_const_mem (mode, tocref);
9978 set_mem_alias_set (operands[1], get_TOC_alias_set ());
9979 }
9980 }
9981 break;
9982
9983 case E_TImode:
9984 if (!VECTOR_MEM_VSX_P (TImode))
9985 rs6000_eliminate_indexed_memrefs (operands);
9986 break;
9987
9988 case E_PTImode:
9989 rs6000_eliminate_indexed_memrefs (operands);
9990 break;
9991
9992 default:
9993 fatal_insn ("bad move", gen_rtx_SET (dest, source));
9994 }
9995
9996 /* Above, we may have called force_const_mem which may have returned
9997 an invalid address. If we can, fix this up; otherwise, reload will
9998 have to deal with it. */
9999 if (MEM_P (operands[1]))
10000 operands[1] = validize_mem (operands[1]);
10001
10002 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10003 }
10004 \f
10005
10006 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
10007 static void
10008 init_float128_ibm (machine_mode mode)
10009 {
10010 if (!TARGET_XL_COMPAT)
10011 {
10012 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
10013 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
10014 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
10015 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
10016
10017 if (!TARGET_HARD_FLOAT)
10018 {
10019 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
10020 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
10021 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
10022 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
10023 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
10024 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
10025 set_optab_libfunc (le_optab, mode, "__gcc_qle");
10026 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
10027
10028 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
10029 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
10030 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
10031 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
10032 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
10033 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
10034 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
10035 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
10036 }
10037 }
10038 else
10039 {
10040 set_optab_libfunc (add_optab, mode, "_xlqadd");
10041 set_optab_libfunc (sub_optab, mode, "_xlqsub");
10042 set_optab_libfunc (smul_optab, mode, "_xlqmul");
10043 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
10044 }
10045
10046 /* Add various conversions for IFmode to use the traditional TFmode
10047 names. */
10048 if (mode == IFmode)
10049 {
10050 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
10051 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
10052 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
10053 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
10054 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
10055 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
10056
10057 if (TARGET_POWERPC64)
10058 {
10059 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
10060 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
10061 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
10062 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
10063 }
10064 }
10065 }
10066
10067 /* Create a decl for either complex long double multiply or complex long double
10068 divide when long double is IEEE 128-bit floating point. We can't use
10069 __multc3 and __divtc3 because the original long double using IBM extended
10070 double used those names. The complex multiply/divide functions are encoded
10071 as builtin functions with a complex result and 4 scalar inputs. */
10072
10073 static void
10074 create_complex_muldiv (const char *name, built_in_function fncode, tree fntype)
10075 {
10076 tree fndecl = add_builtin_function (name, fntype, fncode, BUILT_IN_NORMAL,
10077 name, NULL_TREE);
10078
10079 set_builtin_decl (fncode, fndecl, true);
10080
10081 if (TARGET_DEBUG_BUILTIN)
10082 fprintf (stderr, "create complex %s, fncode: %d\n", name, (int) fncode);
10083
10084 return;
10085 }
10086
10087 /* Set up IEEE 128-bit floating point routines. Use different names if the
10088 arguments can be passed in a vector register. The historical PowerPC
10089 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
10090 continue to use that if we aren't using vector registers to pass IEEE
10091 128-bit floating point. */
10092
10093 static void
10094 init_float128_ieee (machine_mode mode)
10095 {
10096 if (FLOAT128_VECTOR_P (mode))
10097 {
10098 static bool complex_muldiv_init_p = false;
10099
10100 /* Set up to call __mulkc3 and __divkc3 under -mabi=ieeelongdouble. If
10101 we have clone or target attributes, this will be called a second
10102 time. We want to create the built-in function only once. */
10103 if (mode == TFmode && TARGET_IEEEQUAD && !complex_muldiv_init_p)
10104 {
10105 complex_muldiv_init_p = true;
10106 built_in_function fncode_mul =
10107 (built_in_function) (BUILT_IN_COMPLEX_MUL_MIN + TCmode
10108 - MIN_MODE_COMPLEX_FLOAT);
10109 built_in_function fncode_div =
10110 (built_in_function) (BUILT_IN_COMPLEX_DIV_MIN + TCmode
10111 - MIN_MODE_COMPLEX_FLOAT);
10112
10113 tree fntype = build_function_type_list (complex_long_double_type_node,
10114 long_double_type_node,
10115 long_double_type_node,
10116 long_double_type_node,
10117 long_double_type_node,
10118 NULL_TREE);
10119
10120 create_complex_muldiv ("__mulkc3", fncode_mul, fntype);
10121 create_complex_muldiv ("__divkc3", fncode_div, fntype);
10122 }
10123
10124 set_optab_libfunc (add_optab, mode, "__addkf3");
10125 set_optab_libfunc (sub_optab, mode, "__subkf3");
10126 set_optab_libfunc (neg_optab, mode, "__negkf2");
10127 set_optab_libfunc (smul_optab, mode, "__mulkf3");
10128 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
10129 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
10130 set_optab_libfunc (abs_optab, mode, "__abskf2");
10131 set_optab_libfunc (powi_optab, mode, "__powikf2");
10132
10133 set_optab_libfunc (eq_optab, mode, "__eqkf2");
10134 set_optab_libfunc (ne_optab, mode, "__nekf2");
10135 set_optab_libfunc (gt_optab, mode, "__gtkf2");
10136 set_optab_libfunc (ge_optab, mode, "__gekf2");
10137 set_optab_libfunc (lt_optab, mode, "__ltkf2");
10138 set_optab_libfunc (le_optab, mode, "__lekf2");
10139 set_optab_libfunc (unord_optab, mode, "__unordkf2");
10140
10141 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
10142 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
10143 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
10144 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
10145
10146 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
10147 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
10148 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
10149
10150 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
10151 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
10152 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
10153
10154 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
10155 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
10156 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
10157 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
10158 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
10159 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
10160
10161 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
10162 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
10163 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
10164 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
10165
10166 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
10167 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
10168 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
10169 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
10170
10171 if (TARGET_POWERPC64)
10172 {
10173 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
10174 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
10175 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
10176 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
10177 }
10178 }
10179
10180 else
10181 {
10182 set_optab_libfunc (add_optab, mode, "_q_add");
10183 set_optab_libfunc (sub_optab, mode, "_q_sub");
10184 set_optab_libfunc (neg_optab, mode, "_q_neg");
10185 set_optab_libfunc (smul_optab, mode, "_q_mul");
10186 set_optab_libfunc (sdiv_optab, mode, "_q_div");
10187 if (TARGET_PPC_GPOPT)
10188 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
10189
10190 set_optab_libfunc (eq_optab, mode, "_q_feq");
10191 set_optab_libfunc (ne_optab, mode, "_q_fne");
10192 set_optab_libfunc (gt_optab, mode, "_q_fgt");
10193 set_optab_libfunc (ge_optab, mode, "_q_fge");
10194 set_optab_libfunc (lt_optab, mode, "_q_flt");
10195 set_optab_libfunc (le_optab, mode, "_q_fle");
10196
10197 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
10198 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
10199 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
10200 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
10201 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
10202 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
10203 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
10204 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
10205 }
10206 }
10207
10208 static void
10209 rs6000_init_libfuncs (void)
10210 {
10211 /* __float128 support. */
10212 if (TARGET_FLOAT128_TYPE)
10213 {
10214 init_float128_ibm (IFmode);
10215 init_float128_ieee (KFmode);
10216 }
10217
10218 /* AIX/Darwin/64-bit Linux quad floating point routines. */
10219 if (TARGET_LONG_DOUBLE_128)
10220 {
10221 if (!TARGET_IEEEQUAD)
10222 init_float128_ibm (TFmode);
10223
10224 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
10225 else
10226 init_float128_ieee (TFmode);
10227 }
10228 }
10229
10230 /* Emit a potentially record-form instruction, setting DST from SRC.
10231 If DOT is 0, that is all; otherwise, set CCREG to the result of the
10232 signed comparison of DST with zero. If DOT is 1, the generated RTL
10233 doesn't care about the DST result; if DOT is 2, it does. If CCREG
10234 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
10235 a separate COMPARE. */
10236
10237 void
10238 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
10239 {
10240 if (dot == 0)
10241 {
10242 emit_move_insn (dst, src);
10243 return;
10244 }
10245
10246 if (cc_reg_not_cr0_operand (ccreg, CCmode))
10247 {
10248 emit_move_insn (dst, src);
10249 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
10250 return;
10251 }
10252
10253 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
10254 if (dot == 1)
10255 {
10256 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
10257 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
10258 }
10259 else
10260 {
10261 rtx set = gen_rtx_SET (dst, src);
10262 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
10263 }
10264 }
10265
10266 \f
10267 /* A validation routine: say whether CODE, a condition code, and MODE
10268 match. The other alternatives either don't make sense or should
10269 never be generated. */
10270
10271 void
10272 validate_condition_mode (enum rtx_code code, machine_mode mode)
10273 {
10274 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
10275 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
10276 && GET_MODE_CLASS (mode) == MODE_CC);
10277
10278 /* These don't make sense. */
10279 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
10280 || mode != CCUNSmode);
10281
10282 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
10283 || mode == CCUNSmode);
10284
10285 gcc_assert (mode == CCFPmode
10286 || (code != ORDERED && code != UNORDERED
10287 && code != UNEQ && code != LTGT
10288 && code != UNGT && code != UNLT
10289 && code != UNGE && code != UNLE));
10290
10291 /* These are invalid; the information is not there. */
10292 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
10293 }
10294
10295 \f
10296 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
10297 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
10298 not zero, store there the bit offset (counted from the right) where
10299 the single stretch of 1 bits begins; and similarly for B, the bit
10300 offset where it ends. */
10301
10302 bool
10303 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
10304 {
10305 unsigned HOST_WIDE_INT val = INTVAL (mask);
10306 unsigned HOST_WIDE_INT bit;
10307 int nb, ne;
10308 int n = GET_MODE_PRECISION (mode);
10309
10310 if (mode != DImode && mode != SImode)
10311 return false;
10312
10313 if (INTVAL (mask) >= 0)
10314 {
10315 bit = val & -val;
10316 ne = exact_log2 (bit);
10317 nb = exact_log2 (val + bit);
10318 }
10319 else if (val + 1 == 0)
10320 {
10321 nb = n;
10322 ne = 0;
10323 }
10324 else if (val & 1)
10325 {
10326 val = ~val;
10327 bit = val & -val;
10328 nb = exact_log2 (bit);
10329 ne = exact_log2 (val + bit);
10330 }
10331 else
10332 {
10333 bit = val & -val;
10334 ne = exact_log2 (bit);
10335 if (val + bit == 0)
10336 nb = n;
10337 else
10338 nb = 0;
10339 }
10340
10341 nb--;
10342
10343 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
10344 return false;
10345
10346 if (b)
10347 *b = nb;
10348 if (e)
10349 *e = ne;
10350
10351 return true;
10352 }
10353
10354 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
10355 or rldicr instruction, to implement an AND with it in mode MODE. */
10356
10357 bool
10358 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
10359 {
10360 int nb, ne;
10361
10362 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10363 return false;
10364
10365 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
10366 does not wrap. */
10367 if (mode == DImode)
10368 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
10369
10370 /* For SImode, rlwinm can do everything. */
10371 if (mode == SImode)
10372 return (nb < 32 && ne < 32);
10373
10374 return false;
10375 }
10376
10377 /* Return the instruction template for an AND with mask in mode MODE, with
10378 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10379
10380 const char *
10381 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
10382 {
10383 int nb, ne;
10384
10385 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
10386 gcc_unreachable ();
10387
10388 if (mode == DImode && ne == 0)
10389 {
10390 operands[3] = GEN_INT (63 - nb);
10391 if (dot)
10392 return "rldicl. %0,%1,0,%3";
10393 return "rldicl %0,%1,0,%3";
10394 }
10395
10396 if (mode == DImode && nb == 63)
10397 {
10398 operands[3] = GEN_INT (63 - ne);
10399 if (dot)
10400 return "rldicr. %0,%1,0,%3";
10401 return "rldicr %0,%1,0,%3";
10402 }
10403
10404 if (nb < 32 && ne < 32)
10405 {
10406 operands[3] = GEN_INT (31 - nb);
10407 operands[4] = GEN_INT (31 - ne);
10408 if (dot)
10409 return "rlwinm. %0,%1,0,%3,%4";
10410 return "rlwinm %0,%1,0,%3,%4";
10411 }
10412
10413 gcc_unreachable ();
10414 }
10415
10416 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
10417 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
10418 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
10419
10420 bool
10421 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
10422 {
10423 int nb, ne;
10424
10425 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10426 return false;
10427
10428 int n = GET_MODE_PRECISION (mode);
10429 int sh = -1;
10430
10431 if (CONST_INT_P (XEXP (shift, 1)))
10432 {
10433 sh = INTVAL (XEXP (shift, 1));
10434 if (sh < 0 || sh >= n)
10435 return false;
10436 }
10437
10438 rtx_code code = GET_CODE (shift);
10439
10440 /* Convert any shift by 0 to a rotate, to simplify below code. */
10441 if (sh == 0)
10442 code = ROTATE;
10443
10444 /* Convert rotate to simple shift if we can, to make analysis simpler. */
10445 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
10446 code = ASHIFT;
10447 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
10448 {
10449 code = LSHIFTRT;
10450 sh = n - sh;
10451 }
10452
10453 /* DImode rotates need rld*. */
10454 if (mode == DImode && code == ROTATE)
10455 return (nb == 63 || ne == 0 || ne == sh);
10456
10457 /* SImode rotates need rlw*. */
10458 if (mode == SImode && code == ROTATE)
10459 return (nb < 32 && ne < 32 && sh < 32);
10460
10461 /* Wrap-around masks are only okay for rotates. */
10462 if (ne > nb)
10463 return false;
10464
10465 /* Variable shifts are only okay for rotates. */
10466 if (sh < 0)
10467 return false;
10468
10469 /* Don't allow ASHIFT if the mask is wrong for that. */
10470 if (code == ASHIFT && ne < sh)
10471 return false;
10472
10473 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
10474 if the mask is wrong for that. */
10475 if (nb < 32 && ne < 32 && sh < 32
10476 && !(code == LSHIFTRT && nb >= 32 - sh))
10477 return true;
10478
10479 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
10480 if the mask is wrong for that. */
10481 if (code == LSHIFTRT)
10482 sh = 64 - sh;
10483 if (nb == 63 || ne == 0 || ne == sh)
10484 return !(code == LSHIFTRT && nb >= sh);
10485
10486 return false;
10487 }
10488
10489 /* Return the instruction template for a shift with mask in mode MODE, with
10490 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10491
10492 const char *
10493 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
10494 {
10495 int nb, ne;
10496
10497 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
10498 gcc_unreachable ();
10499
10500 if (mode == DImode && ne == 0)
10501 {
10502 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10503 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
10504 operands[3] = GEN_INT (63 - nb);
10505 if (dot)
10506 return "rld%I2cl. %0,%1,%2,%3";
10507 return "rld%I2cl %0,%1,%2,%3";
10508 }
10509
10510 if (mode == DImode && nb == 63)
10511 {
10512 operands[3] = GEN_INT (63 - ne);
10513 if (dot)
10514 return "rld%I2cr. %0,%1,%2,%3";
10515 return "rld%I2cr %0,%1,%2,%3";
10516 }
10517
10518 if (mode == DImode
10519 && GET_CODE (operands[4]) != LSHIFTRT
10520 && CONST_INT_P (operands[2])
10521 && ne == INTVAL (operands[2]))
10522 {
10523 operands[3] = GEN_INT (63 - nb);
10524 if (dot)
10525 return "rld%I2c. %0,%1,%2,%3";
10526 return "rld%I2c %0,%1,%2,%3";
10527 }
10528
10529 if (nb < 32 && ne < 32)
10530 {
10531 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10532 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
10533 operands[3] = GEN_INT (31 - nb);
10534 operands[4] = GEN_INT (31 - ne);
10535 /* This insn can also be a 64-bit rotate with mask that really makes
10536 it just a shift right (with mask); the %h below are to adjust for
10537 that situation (shift count is >= 32 in that case). */
10538 if (dot)
10539 return "rlw%I2nm. %0,%1,%h2,%3,%4";
10540 return "rlw%I2nm %0,%1,%h2,%3,%4";
10541 }
10542
10543 gcc_unreachable ();
10544 }
10545
10546 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
10547 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
10548 ASHIFT, or LSHIFTRT) in mode MODE. */
10549
10550 bool
10551 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
10552 {
10553 int nb, ne;
10554
10555 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10556 return false;
10557
10558 int n = GET_MODE_PRECISION (mode);
10559
10560 int sh = INTVAL (XEXP (shift, 1));
10561 if (sh < 0 || sh >= n)
10562 return false;
10563
10564 rtx_code code = GET_CODE (shift);
10565
10566 /* Convert any shift by 0 to a rotate, to simplify below code. */
10567 if (sh == 0)
10568 code = ROTATE;
10569
10570 /* Convert rotate to simple shift if we can, to make analysis simpler. */
10571 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
10572 code = ASHIFT;
10573 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
10574 {
10575 code = LSHIFTRT;
10576 sh = n - sh;
10577 }
10578
10579 /* DImode rotates need rldimi. */
10580 if (mode == DImode && code == ROTATE)
10581 return (ne == sh);
10582
10583 /* SImode rotates need rlwimi. */
10584 if (mode == SImode && code == ROTATE)
10585 return (nb < 32 && ne < 32 && sh < 32);
10586
10587 /* Wrap-around masks are only okay for rotates. */
10588 if (ne > nb)
10589 return false;
10590
10591 /* Don't allow ASHIFT if the mask is wrong for that. */
10592 if (code == ASHIFT && ne < sh)
10593 return false;
10594
10595 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
10596 if the mask is wrong for that. */
10597 if (nb < 32 && ne < 32 && sh < 32
10598 && !(code == LSHIFTRT && nb >= 32 - sh))
10599 return true;
10600
10601 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
10602 if the mask is wrong for that. */
10603 if (code == LSHIFTRT)
10604 sh = 64 - sh;
10605 if (ne == sh)
10606 return !(code == LSHIFTRT && nb >= sh);
10607
10608 return false;
10609 }
10610
10611 /* Return the instruction template for an insert with mask in mode MODE, with
10612 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10613
10614 const char *
10615 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
10616 {
10617 int nb, ne;
10618
10619 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
10620 gcc_unreachable ();
10621
10622 /* Prefer rldimi because rlwimi is cracked. */
10623 if (TARGET_POWERPC64
10624 && (!dot || mode == DImode)
10625 && GET_CODE (operands[4]) != LSHIFTRT
10626 && ne == INTVAL (operands[2]))
10627 {
10628 operands[3] = GEN_INT (63 - nb);
10629 if (dot)
10630 return "rldimi. %0,%1,%2,%3";
10631 return "rldimi %0,%1,%2,%3";
10632 }
10633
10634 if (nb < 32 && ne < 32)
10635 {
10636 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10637 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
10638 operands[3] = GEN_INT (31 - nb);
10639 operands[4] = GEN_INT (31 - ne);
10640 if (dot)
10641 return "rlwimi. %0,%1,%2,%3,%4";
10642 return "rlwimi %0,%1,%2,%3,%4";
10643 }
10644
10645 gcc_unreachable ();
10646 }
10647
10648 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
10649 using two machine instructions. */
10650
10651 bool
10652 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
10653 {
10654 /* There are two kinds of AND we can handle with two insns:
10655 1) those we can do with two rl* insn;
10656 2) ori[s];xori[s].
10657
10658 We do not handle that last case yet. */
10659
10660 /* If there is just one stretch of ones, we can do it. */
10661 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
10662 return true;
10663
10664 /* Otherwise, fill in the lowest "hole"; if we can do the result with
10665 one insn, we can do the whole thing with two. */
10666 unsigned HOST_WIDE_INT val = INTVAL (c);
10667 unsigned HOST_WIDE_INT bit1 = val & -val;
10668 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
10669 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
10670 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
10671 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
10672 }
10673
10674 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
10675 If EXPAND is true, split rotate-and-mask instructions we generate to
10676 their constituent parts as well (this is used during expand); if DOT
10677 is 1, make the last insn a record-form instruction clobbering the
10678 destination GPR and setting the CC reg (from operands[3]); if 2, set
10679 that GPR as well as the CC reg. */
10680
10681 void
10682 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
10683 {
10684 gcc_assert (!(expand && dot));
10685
10686 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
10687
10688 /* If it is one stretch of ones, it is DImode; shift left, mask, then
10689 shift right. This generates better code than doing the masks without
10690 shifts, or shifting first right and then left. */
10691 int nb, ne;
10692 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
10693 {
10694 gcc_assert (mode == DImode);
10695
10696 int shift = 63 - nb;
10697 if (expand)
10698 {
10699 rtx tmp1 = gen_reg_rtx (DImode);
10700 rtx tmp2 = gen_reg_rtx (DImode);
10701 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
10702 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
10703 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
10704 }
10705 else
10706 {
10707 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
10708 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
10709 emit_move_insn (operands[0], tmp);
10710 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
10711 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10712 }
10713 return;
10714 }
10715
10716 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
10717 that does the rest. */
10718 unsigned HOST_WIDE_INT bit1 = val & -val;
10719 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
10720 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
10721 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
10722
10723 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
10724 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
10725
10726 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
10727
10728 /* Two "no-rotate"-and-mask instructions, for SImode. */
10729 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
10730 {
10731 gcc_assert (mode == SImode);
10732
10733 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
10734 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
10735 emit_move_insn (reg, tmp);
10736 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
10737 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10738 return;
10739 }
10740
10741 gcc_assert (mode == DImode);
10742
10743 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
10744 insns; we have to do the first in SImode, because it wraps. */
10745 if (mask2 <= 0xffffffff
10746 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
10747 {
10748 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
10749 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
10750 GEN_INT (mask1));
10751 rtx reg_low = gen_lowpart (SImode, reg);
10752 emit_move_insn (reg_low, tmp);
10753 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
10754 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10755 return;
10756 }
10757
10758 /* Two rld* insns: rotate, clear the hole in the middle (which now is
10759 at the top end), rotate back and clear the other hole. */
10760 int right = exact_log2 (bit3);
10761 int left = 64 - right;
10762
10763 /* Rotate the mask too. */
10764 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
10765
10766 if (expand)
10767 {
10768 rtx tmp1 = gen_reg_rtx (DImode);
10769 rtx tmp2 = gen_reg_rtx (DImode);
10770 rtx tmp3 = gen_reg_rtx (DImode);
10771 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
10772 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
10773 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
10774 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
10775 }
10776 else
10777 {
10778 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
10779 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
10780 emit_move_insn (operands[0], tmp);
10781 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
10782 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
10783 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10784 }
10785 }
10786 \f
10787 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
10788 for lfq and stfq insns iff the registers are hard registers. */
10789
10790 int
10791 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
10792 {
10793 /* We might have been passed a SUBREG. */
10794 if (!REG_P (reg1) || !REG_P (reg2))
10795 return 0;
10796
10797 /* We might have been passed non floating point registers. */
10798 if (!FP_REGNO_P (REGNO (reg1))
10799 || !FP_REGNO_P (REGNO (reg2)))
10800 return 0;
10801
10802 return (REGNO (reg1) == REGNO (reg2) - 1);
10803 }
10804
10805 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
10806 addr1 and addr2 must be in consecutive memory locations
10807 (addr2 == addr1 + 8). */
10808
10809 int
10810 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
10811 {
10812 rtx addr1, addr2;
10813 unsigned int reg1, reg2;
10814 int offset1, offset2;
10815
10816 /* The mems cannot be volatile. */
10817 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
10818 return 0;
10819
10820 addr1 = XEXP (mem1, 0);
10821 addr2 = XEXP (mem2, 0);
10822
10823 /* Extract an offset (if used) from the first addr. */
10824 if (GET_CODE (addr1) == PLUS)
10825 {
10826 /* If not a REG, return zero. */
10827 if (!REG_P (XEXP (addr1, 0)))
10828 return 0;
10829 else
10830 {
10831 reg1 = REGNO (XEXP (addr1, 0));
10832 /* The offset must be constant! */
10833 if (!CONST_INT_P (XEXP (addr1, 1)))
10834 return 0;
10835 offset1 = INTVAL (XEXP (addr1, 1));
10836 }
10837 }
10838 else if (!REG_P (addr1))
10839 return 0;
10840 else
10841 {
10842 reg1 = REGNO (addr1);
10843 /* This was a simple (mem (reg)) expression. Offset is 0. */
10844 offset1 = 0;
10845 }
10846
10847 /* And now for the second addr. */
10848 if (GET_CODE (addr2) == PLUS)
10849 {
10850 /* If not a REG, return zero. */
10851 if (!REG_P (XEXP (addr2, 0)))
10852 return 0;
10853 else
10854 {
10855 reg2 = REGNO (XEXP (addr2, 0));
10856 /* The offset must be constant. */
10857 if (!CONST_INT_P (XEXP (addr2, 1)))
10858 return 0;
10859 offset2 = INTVAL (XEXP (addr2, 1));
10860 }
10861 }
10862 else if (!REG_P (addr2))
10863 return 0;
10864 else
10865 {
10866 reg2 = REGNO (addr2);
10867 /* This was a simple (mem (reg)) expression. Offset is 0. */
10868 offset2 = 0;
10869 }
10870
10871 /* Both of these must have the same base register. */
10872 if (reg1 != reg2)
10873 return 0;
10874
10875 /* The offset for the second addr must be 8 more than the first addr. */
10876 if (offset2 != offset1 + 8)
10877 return 0;
10878
10879 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
10880 instructions. */
10881 return 1;
10882 }
10883 \f
10884 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
10885 need to use DDmode, in all other cases we can use the same mode. */
10886 static machine_mode
10887 rs6000_secondary_memory_needed_mode (machine_mode mode)
10888 {
10889 if (lra_in_progress && mode == SDmode)
10890 return DDmode;
10891 return mode;
10892 }
10893
10894 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
10895 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
10896 only work on the traditional altivec registers, note if an altivec register
10897 was chosen. */
10898
10899 static enum rs6000_reg_type
10900 register_to_reg_type (rtx reg, bool *is_altivec)
10901 {
10902 HOST_WIDE_INT regno;
10903 enum reg_class rclass;
10904
10905 if (SUBREG_P (reg))
10906 reg = SUBREG_REG (reg);
10907
10908 if (!REG_P (reg))
10909 return NO_REG_TYPE;
10910
10911 regno = REGNO (reg);
10912 if (!HARD_REGISTER_NUM_P (regno))
10913 {
10914 if (!lra_in_progress && !reload_completed)
10915 return PSEUDO_REG_TYPE;
10916
10917 regno = true_regnum (reg);
10918 if (regno < 0 || !HARD_REGISTER_NUM_P (regno))
10919 return PSEUDO_REG_TYPE;
10920 }
10921
10922 gcc_assert (regno >= 0);
10923
10924 if (is_altivec && ALTIVEC_REGNO_P (regno))
10925 *is_altivec = true;
10926
10927 rclass = rs6000_regno_regclass[regno];
10928 return reg_class_to_reg_type[(int)rclass];
10929 }
10930
10931 /* Helper function to return the cost of adding a TOC entry address. */
10932
10933 static inline int
10934 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
10935 {
10936 int ret;
10937
10938 if (TARGET_CMODEL != CMODEL_SMALL)
10939 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
10940
10941 else
10942 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
10943
10944 return ret;
10945 }
10946
10947 /* Helper function for rs6000_secondary_reload to determine whether the memory
10948 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
10949 needs reloading. Return negative if the memory is not handled by the memory
10950 helper functions and to try a different reload method, 0 if no additional
10951 instructions are need, and positive to give the extra cost for the
10952 memory. */
10953
10954 static int
10955 rs6000_secondary_reload_memory (rtx addr,
10956 enum reg_class rclass,
10957 machine_mode mode)
10958 {
10959 int extra_cost = 0;
10960 rtx reg, and_arg, plus_arg0, plus_arg1;
10961 addr_mask_type addr_mask;
10962 const char *type = NULL;
10963 const char *fail_msg = NULL;
10964
10965 if (GPR_REG_CLASS_P (rclass))
10966 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
10967
10968 else if (rclass == FLOAT_REGS)
10969 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
10970
10971 else if (rclass == ALTIVEC_REGS)
10972 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
10973
10974 /* For the combined VSX_REGS, turn off Altivec AND -16. */
10975 else if (rclass == VSX_REGS)
10976 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
10977 & ~RELOAD_REG_AND_M16);
10978
10979 /* If the register allocator hasn't made up its mind yet on the register
10980 class to use, settle on defaults to use. */
10981 else if (rclass == NO_REGS)
10982 {
10983 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
10984 & ~RELOAD_REG_AND_M16);
10985
10986 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
10987 addr_mask &= ~(RELOAD_REG_INDEXED
10988 | RELOAD_REG_PRE_INCDEC
10989 | RELOAD_REG_PRE_MODIFY);
10990 }
10991
10992 else
10993 addr_mask = 0;
10994
10995 /* If the register isn't valid in this register class, just return now. */
10996 if ((addr_mask & RELOAD_REG_VALID) == 0)
10997 {
10998 if (TARGET_DEBUG_ADDR)
10999 {
11000 fprintf (stderr,
11001 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11002 "not valid in class\n",
11003 GET_MODE_NAME (mode), reg_class_names[rclass]);
11004 debug_rtx (addr);
11005 }
11006
11007 return -1;
11008 }
11009
11010 switch (GET_CODE (addr))
11011 {
11012 /* Does the register class supports auto update forms for this mode? We
11013 don't need a scratch register, since the powerpc only supports
11014 PRE_INC, PRE_DEC, and PRE_MODIFY. */
11015 case PRE_INC:
11016 case PRE_DEC:
11017 reg = XEXP (addr, 0);
11018 if (!base_reg_operand (addr, GET_MODE (reg)))
11019 {
11020 fail_msg = "no base register #1";
11021 extra_cost = -1;
11022 }
11023
11024 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
11025 {
11026 extra_cost = 1;
11027 type = "update";
11028 }
11029 break;
11030
11031 case PRE_MODIFY:
11032 reg = XEXP (addr, 0);
11033 plus_arg1 = XEXP (addr, 1);
11034 if (!base_reg_operand (reg, GET_MODE (reg))
11035 || GET_CODE (plus_arg1) != PLUS
11036 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
11037 {
11038 fail_msg = "bad PRE_MODIFY";
11039 extra_cost = -1;
11040 }
11041
11042 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
11043 {
11044 extra_cost = 1;
11045 type = "update";
11046 }
11047 break;
11048
11049 /* Do we need to simulate AND -16 to clear the bottom address bits used
11050 in VMX load/stores? Only allow the AND for vector sizes. */
11051 case AND:
11052 and_arg = XEXP (addr, 0);
11053 if (GET_MODE_SIZE (mode) != 16
11054 || !CONST_INT_P (XEXP (addr, 1))
11055 || INTVAL (XEXP (addr, 1)) != -16)
11056 {
11057 fail_msg = "bad Altivec AND #1";
11058 extra_cost = -1;
11059 }
11060
11061 if (rclass != ALTIVEC_REGS)
11062 {
11063 if (legitimate_indirect_address_p (and_arg, false))
11064 extra_cost = 1;
11065
11066 else if (legitimate_indexed_address_p (and_arg, false))
11067 extra_cost = 2;
11068
11069 else
11070 {
11071 fail_msg = "bad Altivec AND #2";
11072 extra_cost = -1;
11073 }
11074
11075 type = "and";
11076 }
11077 break;
11078
11079 /* If this is an indirect address, make sure it is a base register. */
11080 case REG:
11081 case SUBREG:
11082 if (!legitimate_indirect_address_p (addr, false))
11083 {
11084 extra_cost = 1;
11085 type = "move";
11086 }
11087 break;
11088
11089 /* If this is an indexed address, make sure the register class can handle
11090 indexed addresses for this mode. */
11091 case PLUS:
11092 plus_arg0 = XEXP (addr, 0);
11093 plus_arg1 = XEXP (addr, 1);
11094
11095 /* (plus (plus (reg) (constant)) (constant)) is generated during
11096 push_reload processing, so handle it now. */
11097 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
11098 {
11099 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11100 {
11101 extra_cost = 1;
11102 type = "offset";
11103 }
11104 }
11105
11106 /* (plus (plus (reg) (constant)) (reg)) is also generated during
11107 push_reload processing, so handle it now. */
11108 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
11109 {
11110 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11111 {
11112 extra_cost = 1;
11113 type = "indexed #2";
11114 }
11115 }
11116
11117 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
11118 {
11119 fail_msg = "no base register #2";
11120 extra_cost = -1;
11121 }
11122
11123 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
11124 {
11125 if ((addr_mask & RELOAD_REG_INDEXED) == 0
11126 || !legitimate_indexed_address_p (addr, false))
11127 {
11128 extra_cost = 1;
11129 type = "indexed";
11130 }
11131 }
11132
11133 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
11134 && CONST_INT_P (plus_arg1))
11135 {
11136 if (!quad_address_offset_p (INTVAL (plus_arg1)))
11137 {
11138 extra_cost = 1;
11139 type = "vector d-form offset";
11140 }
11141 }
11142
11143 /* Make sure the register class can handle offset addresses. */
11144 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
11145 {
11146 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11147 {
11148 extra_cost = 1;
11149 type = "offset #2";
11150 }
11151 }
11152
11153 else
11154 {
11155 fail_msg = "bad PLUS";
11156 extra_cost = -1;
11157 }
11158
11159 break;
11160
11161 case LO_SUM:
11162 /* Quad offsets are restricted and can't handle normal addresses. */
11163 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11164 {
11165 extra_cost = -1;
11166 type = "vector d-form lo_sum";
11167 }
11168
11169 else if (!legitimate_lo_sum_address_p (mode, addr, false))
11170 {
11171 fail_msg = "bad LO_SUM";
11172 extra_cost = -1;
11173 }
11174
11175 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11176 {
11177 extra_cost = 1;
11178 type = "lo_sum";
11179 }
11180 break;
11181
11182 /* Static addresses need to create a TOC entry. */
11183 case CONST:
11184 case SYMBOL_REF:
11185 case LABEL_REF:
11186 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11187 {
11188 extra_cost = -1;
11189 type = "vector d-form lo_sum #2";
11190 }
11191
11192 else
11193 {
11194 type = "address";
11195 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
11196 }
11197 break;
11198
11199 /* TOC references look like offsetable memory. */
11200 case UNSPEC:
11201 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
11202 {
11203 fail_msg = "bad UNSPEC";
11204 extra_cost = -1;
11205 }
11206
11207 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11208 {
11209 extra_cost = -1;
11210 type = "vector d-form lo_sum #3";
11211 }
11212
11213 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11214 {
11215 extra_cost = 1;
11216 type = "toc reference";
11217 }
11218 break;
11219
11220 default:
11221 {
11222 fail_msg = "bad address";
11223 extra_cost = -1;
11224 }
11225 }
11226
11227 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
11228 {
11229 if (extra_cost < 0)
11230 fprintf (stderr,
11231 "rs6000_secondary_reload_memory error: mode = %s, "
11232 "class = %s, addr_mask = '%s', %s\n",
11233 GET_MODE_NAME (mode),
11234 reg_class_names[rclass],
11235 rs6000_debug_addr_mask (addr_mask, false),
11236 (fail_msg != NULL) ? fail_msg : "<bad address>");
11237
11238 else
11239 fprintf (stderr,
11240 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11241 "addr_mask = '%s', extra cost = %d, %s\n",
11242 GET_MODE_NAME (mode),
11243 reg_class_names[rclass],
11244 rs6000_debug_addr_mask (addr_mask, false),
11245 extra_cost,
11246 (type) ? type : "<none>");
11247
11248 debug_rtx (addr);
11249 }
11250
11251 return extra_cost;
11252 }
11253
11254 /* Helper function for rs6000_secondary_reload to return true if a move to a
11255 different register classe is really a simple move. */
11256
11257 static bool
11258 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
11259 enum rs6000_reg_type from_type,
11260 machine_mode mode)
11261 {
11262 int size = GET_MODE_SIZE (mode);
11263
11264 /* Add support for various direct moves available. In this function, we only
11265 look at cases where we don't need any extra registers, and one or more
11266 simple move insns are issued. Originally small integers are not allowed
11267 in FPR/VSX registers. Single precision binary floating is not a simple
11268 move because we need to convert to the single precision memory layout.
11269 The 4-byte SDmode can be moved. TDmode values are disallowed since they
11270 need special direct move handling, which we do not support yet. */
11271 if (TARGET_DIRECT_MOVE
11272 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11273 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
11274 {
11275 if (TARGET_POWERPC64)
11276 {
11277 /* ISA 2.07: MTVSRD or MVFVSRD. */
11278 if (size == 8)
11279 return true;
11280
11281 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
11282 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
11283 return true;
11284 }
11285
11286 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11287 if (TARGET_P8_VECTOR)
11288 {
11289 if (mode == SImode)
11290 return true;
11291
11292 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
11293 return true;
11294 }
11295
11296 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11297 if (mode == SDmode)
11298 return true;
11299 }
11300
11301 /* Move to/from SPR. */
11302 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
11303 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
11304 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
11305 return true;
11306
11307 return false;
11308 }
11309
11310 /* Direct move helper function for rs6000_secondary_reload, handle all of the
11311 special direct moves that involve allocating an extra register, return the
11312 insn code of the helper function if there is such a function or
11313 CODE_FOR_nothing if not. */
11314
11315 static bool
11316 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
11317 enum rs6000_reg_type from_type,
11318 machine_mode mode,
11319 secondary_reload_info *sri,
11320 bool altivec_p)
11321 {
11322 bool ret = false;
11323 enum insn_code icode = CODE_FOR_nothing;
11324 int cost = 0;
11325 int size = GET_MODE_SIZE (mode);
11326
11327 if (TARGET_POWERPC64 && size == 16)
11328 {
11329 /* Handle moving 128-bit values from GPRs to VSX point registers on
11330 ISA 2.07 (power8, power9) when running in 64-bit mode using
11331 XXPERMDI to glue the two 64-bit values back together. */
11332 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
11333 {
11334 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
11335 icode = reg_addr[mode].reload_vsx_gpr;
11336 }
11337
11338 /* Handle moving 128-bit values from VSX point registers to GPRs on
11339 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
11340 bottom 64-bit value. */
11341 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11342 {
11343 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
11344 icode = reg_addr[mode].reload_gpr_vsx;
11345 }
11346 }
11347
11348 else if (TARGET_POWERPC64 && mode == SFmode)
11349 {
11350 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11351 {
11352 cost = 3; /* xscvdpspn, mfvsrd, and. */
11353 icode = reg_addr[mode].reload_gpr_vsx;
11354 }
11355
11356 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
11357 {
11358 cost = 2; /* mtvsrz, xscvspdpn. */
11359 icode = reg_addr[mode].reload_vsx_gpr;
11360 }
11361 }
11362
11363 else if (!TARGET_POWERPC64 && size == 8)
11364 {
11365 /* Handle moving 64-bit values from GPRs to floating point registers on
11366 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
11367 32-bit values back together. Altivec register classes must be handled
11368 specially since a different instruction is used, and the secondary
11369 reload support requires a single instruction class in the scratch
11370 register constraint. However, right now TFmode is not allowed in
11371 Altivec registers, so the pattern will never match. */
11372 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
11373 {
11374 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
11375 icode = reg_addr[mode].reload_fpr_gpr;
11376 }
11377 }
11378
11379 if (icode != CODE_FOR_nothing)
11380 {
11381 ret = true;
11382 if (sri)
11383 {
11384 sri->icode = icode;
11385 sri->extra_cost = cost;
11386 }
11387 }
11388
11389 return ret;
11390 }
11391
11392 /* Return whether a move between two register classes can be done either
11393 directly (simple move) or via a pattern that uses a single extra temporary
11394 (using ISA 2.07's direct move in this case. */
11395
11396 static bool
11397 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
11398 enum rs6000_reg_type from_type,
11399 machine_mode mode,
11400 secondary_reload_info *sri,
11401 bool altivec_p)
11402 {
11403 /* Fall back to load/store reloads if either type is not a register. */
11404 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
11405 return false;
11406
11407 /* If we haven't allocated registers yet, assume the move can be done for the
11408 standard register types. */
11409 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
11410 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
11411 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
11412 return true;
11413
11414 /* Moves to the same set of registers is a simple move for non-specialized
11415 registers. */
11416 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
11417 return true;
11418
11419 /* Check whether a simple move can be done directly. */
11420 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
11421 {
11422 if (sri)
11423 {
11424 sri->icode = CODE_FOR_nothing;
11425 sri->extra_cost = 0;
11426 }
11427 return true;
11428 }
11429
11430 /* Now check if we can do it in a few steps. */
11431 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
11432 altivec_p);
11433 }
11434
11435 /* Inform reload about cases where moving X with a mode MODE to a register in
11436 RCLASS requires an extra scratch or immediate register. Return the class
11437 needed for the immediate register.
11438
11439 For VSX and Altivec, we may need a register to convert sp+offset into
11440 reg+sp.
11441
11442 For misaligned 64-bit gpr loads and stores we need a register to
11443 convert an offset address to indirect. */
11444
11445 static reg_class_t
11446 rs6000_secondary_reload (bool in_p,
11447 rtx x,
11448 reg_class_t rclass_i,
11449 machine_mode mode,
11450 secondary_reload_info *sri)
11451 {
11452 enum reg_class rclass = (enum reg_class) rclass_i;
11453 reg_class_t ret = ALL_REGS;
11454 enum insn_code icode;
11455 bool default_p = false;
11456 bool done_p = false;
11457
11458 /* Allow subreg of memory before/during reload. */
11459 bool memory_p = (MEM_P (x)
11460 || (!reload_completed && SUBREG_P (x)
11461 && MEM_P (SUBREG_REG (x))));
11462
11463 sri->icode = CODE_FOR_nothing;
11464 sri->t_icode = CODE_FOR_nothing;
11465 sri->extra_cost = 0;
11466 icode = ((in_p)
11467 ? reg_addr[mode].reload_load
11468 : reg_addr[mode].reload_store);
11469
11470 if (REG_P (x) || register_operand (x, mode))
11471 {
11472 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
11473 bool altivec_p = (rclass == ALTIVEC_REGS);
11474 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
11475
11476 if (!in_p)
11477 std::swap (to_type, from_type);
11478
11479 /* Can we do a direct move of some sort? */
11480 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
11481 altivec_p))
11482 {
11483 icode = (enum insn_code)sri->icode;
11484 default_p = false;
11485 done_p = true;
11486 ret = NO_REGS;
11487 }
11488 }
11489
11490 /* Make sure 0.0 is not reloaded or forced into memory. */
11491 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
11492 {
11493 ret = NO_REGS;
11494 default_p = false;
11495 done_p = true;
11496 }
11497
11498 /* If this is a scalar floating point value and we want to load it into the
11499 traditional Altivec registers, do it via a move via a traditional floating
11500 point register, unless we have D-form addressing. Also make sure that
11501 non-zero constants use a FPR. */
11502 if (!done_p && reg_addr[mode].scalar_in_vmx_p
11503 && !mode_supports_vmx_dform (mode)
11504 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
11505 && (memory_p || CONST_DOUBLE_P (x)))
11506 {
11507 ret = FLOAT_REGS;
11508 default_p = false;
11509 done_p = true;
11510 }
11511
11512 /* Handle reload of load/stores if we have reload helper functions. */
11513 if (!done_p && icode != CODE_FOR_nothing && memory_p)
11514 {
11515 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
11516 mode);
11517
11518 if (extra_cost >= 0)
11519 {
11520 done_p = true;
11521 ret = NO_REGS;
11522 if (extra_cost > 0)
11523 {
11524 sri->extra_cost = extra_cost;
11525 sri->icode = icode;
11526 }
11527 }
11528 }
11529
11530 /* Handle unaligned loads and stores of integer registers. */
11531 if (!done_p && TARGET_POWERPC64
11532 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
11533 && memory_p
11534 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
11535 {
11536 rtx addr = XEXP (x, 0);
11537 rtx off = address_offset (addr);
11538
11539 if (off != NULL_RTX)
11540 {
11541 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
11542 unsigned HOST_WIDE_INT offset = INTVAL (off);
11543
11544 /* We need a secondary reload when our legitimate_address_p
11545 says the address is good (as otherwise the entire address
11546 will be reloaded), and the offset is not a multiple of
11547 four or we have an address wrap. Address wrap will only
11548 occur for LO_SUMs since legitimate_offset_address_p
11549 rejects addresses for 16-byte mems that will wrap. */
11550 if (GET_CODE (addr) == LO_SUM
11551 ? (1 /* legitimate_address_p allows any offset for lo_sum */
11552 && ((offset & 3) != 0
11553 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
11554 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
11555 && (offset & 3) != 0))
11556 {
11557 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
11558 if (in_p)
11559 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
11560 : CODE_FOR_reload_di_load);
11561 else
11562 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
11563 : CODE_FOR_reload_di_store);
11564 sri->extra_cost = 2;
11565 ret = NO_REGS;
11566 done_p = true;
11567 }
11568 else
11569 default_p = true;
11570 }
11571 else
11572 default_p = true;
11573 }
11574
11575 if (!done_p && !TARGET_POWERPC64
11576 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
11577 && memory_p
11578 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
11579 {
11580 rtx addr = XEXP (x, 0);
11581 rtx off = address_offset (addr);
11582
11583 if (off != NULL_RTX)
11584 {
11585 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
11586 unsigned HOST_WIDE_INT offset = INTVAL (off);
11587
11588 /* We need a secondary reload when our legitimate_address_p
11589 says the address is good (as otherwise the entire address
11590 will be reloaded), and we have a wrap.
11591
11592 legitimate_lo_sum_address_p allows LO_SUM addresses to
11593 have any offset so test for wrap in the low 16 bits.
11594
11595 legitimate_offset_address_p checks for the range
11596 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
11597 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
11598 [0x7ff4,0x7fff] respectively, so test for the
11599 intersection of these ranges, [0x7ffc,0x7fff] and
11600 [0x7ff4,0x7ff7] respectively.
11601
11602 Note that the address we see here may have been
11603 manipulated by legitimize_reload_address. */
11604 if (GET_CODE (addr) == LO_SUM
11605 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
11606 : offset - (0x8000 - extra) < UNITS_PER_WORD)
11607 {
11608 if (in_p)
11609 sri->icode = CODE_FOR_reload_si_load;
11610 else
11611 sri->icode = CODE_FOR_reload_si_store;
11612 sri->extra_cost = 2;
11613 ret = NO_REGS;
11614 done_p = true;
11615 }
11616 else
11617 default_p = true;
11618 }
11619 else
11620 default_p = true;
11621 }
11622
11623 if (!done_p)
11624 default_p = true;
11625
11626 if (default_p)
11627 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
11628
11629 gcc_assert (ret != ALL_REGS);
11630
11631 if (TARGET_DEBUG_ADDR)
11632 {
11633 fprintf (stderr,
11634 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
11635 "mode = %s",
11636 reg_class_names[ret],
11637 in_p ? "true" : "false",
11638 reg_class_names[rclass],
11639 GET_MODE_NAME (mode));
11640
11641 if (reload_completed)
11642 fputs (", after reload", stderr);
11643
11644 if (!done_p)
11645 fputs (", done_p not set", stderr);
11646
11647 if (default_p)
11648 fputs (", default secondary reload", stderr);
11649
11650 if (sri->icode != CODE_FOR_nothing)
11651 fprintf (stderr, ", reload func = %s, extra cost = %d",
11652 insn_data[sri->icode].name, sri->extra_cost);
11653
11654 else if (sri->extra_cost > 0)
11655 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
11656
11657 fputs ("\n", stderr);
11658 debug_rtx (x);
11659 }
11660
11661 return ret;
11662 }
11663
11664 /* Better tracing for rs6000_secondary_reload_inner. */
11665
11666 static void
11667 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
11668 bool store_p)
11669 {
11670 rtx set, clobber;
11671
11672 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
11673
11674 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
11675 store_p ? "store" : "load");
11676
11677 if (store_p)
11678 set = gen_rtx_SET (mem, reg);
11679 else
11680 set = gen_rtx_SET (reg, mem);
11681
11682 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
11683 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
11684 }
11685
11686 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
11687 ATTRIBUTE_NORETURN;
11688
11689 static void
11690 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
11691 bool store_p)
11692 {
11693 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
11694 gcc_unreachable ();
11695 }
11696
11697 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
11698 reload helper functions. These were identified in
11699 rs6000_secondary_reload_memory, and if reload decided to use the secondary
11700 reload, it calls the insns:
11701 reload_<RELOAD:mode>_<P:mptrsize>_store
11702 reload_<RELOAD:mode>_<P:mptrsize>_load
11703
11704 which in turn calls this function, to do whatever is necessary to create
11705 valid addresses. */
11706
11707 void
11708 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
11709 {
11710 int regno = true_regnum (reg);
11711 machine_mode mode = GET_MODE (reg);
11712 addr_mask_type addr_mask;
11713 rtx addr;
11714 rtx new_addr;
11715 rtx op_reg, op0, op1;
11716 rtx and_op;
11717 rtx cc_clobber;
11718 rtvec rv;
11719
11720 if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem)
11721 || !base_reg_operand (scratch, GET_MODE (scratch)))
11722 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11723
11724 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
11725 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
11726
11727 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
11728 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
11729
11730 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
11731 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
11732
11733 else
11734 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11735
11736 /* Make sure the mode is valid in this register class. */
11737 if ((addr_mask & RELOAD_REG_VALID) == 0)
11738 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11739
11740 if (TARGET_DEBUG_ADDR)
11741 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
11742
11743 new_addr = addr = XEXP (mem, 0);
11744 switch (GET_CODE (addr))
11745 {
11746 /* Does the register class support auto update forms for this mode? If
11747 not, do the update now. We don't need a scratch register, since the
11748 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
11749 case PRE_INC:
11750 case PRE_DEC:
11751 op_reg = XEXP (addr, 0);
11752 if (!base_reg_operand (op_reg, Pmode))
11753 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11754
11755 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
11756 {
11757 int delta = GET_MODE_SIZE (mode);
11758 if (GET_CODE (addr) == PRE_DEC)
11759 delta = -delta;
11760 emit_insn (gen_add2_insn (op_reg, GEN_INT (delta)));
11761 new_addr = op_reg;
11762 }
11763 break;
11764
11765 case PRE_MODIFY:
11766 op0 = XEXP (addr, 0);
11767 op1 = XEXP (addr, 1);
11768 if (!base_reg_operand (op0, Pmode)
11769 || GET_CODE (op1) != PLUS
11770 || !rtx_equal_p (op0, XEXP (op1, 0)))
11771 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11772
11773 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
11774 {
11775 emit_insn (gen_rtx_SET (op0, op1));
11776 new_addr = reg;
11777 }
11778 break;
11779
11780 /* Do we need to simulate AND -16 to clear the bottom address bits used
11781 in VMX load/stores? */
11782 case AND:
11783 op0 = XEXP (addr, 0);
11784 op1 = XEXP (addr, 1);
11785 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
11786 {
11787 if (REG_P (op0) || SUBREG_P (op0))
11788 op_reg = op0;
11789
11790 else if (GET_CODE (op1) == PLUS)
11791 {
11792 emit_insn (gen_rtx_SET (scratch, op1));
11793 op_reg = scratch;
11794 }
11795
11796 else
11797 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11798
11799 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
11800 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
11801 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
11802 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
11803 new_addr = scratch;
11804 }
11805 break;
11806
11807 /* If this is an indirect address, make sure it is a base register. */
11808 case REG:
11809 case SUBREG:
11810 if (!base_reg_operand (addr, GET_MODE (addr)))
11811 {
11812 emit_insn (gen_rtx_SET (scratch, addr));
11813 new_addr = scratch;
11814 }
11815 break;
11816
11817 /* If this is an indexed address, make sure the register class can handle
11818 indexed addresses for this mode. */
11819 case PLUS:
11820 op0 = XEXP (addr, 0);
11821 op1 = XEXP (addr, 1);
11822 if (!base_reg_operand (op0, Pmode))
11823 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11824
11825 else if (int_reg_operand (op1, Pmode))
11826 {
11827 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11828 {
11829 emit_insn (gen_rtx_SET (scratch, addr));
11830 new_addr = scratch;
11831 }
11832 }
11833
11834 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
11835 {
11836 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
11837 || !quad_address_p (addr, mode, false))
11838 {
11839 emit_insn (gen_rtx_SET (scratch, addr));
11840 new_addr = scratch;
11841 }
11842 }
11843
11844 /* Make sure the register class can handle offset addresses. */
11845 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
11846 {
11847 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11848 {
11849 emit_insn (gen_rtx_SET (scratch, addr));
11850 new_addr = scratch;
11851 }
11852 }
11853
11854 else
11855 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11856
11857 break;
11858
11859 case LO_SUM:
11860 op0 = XEXP (addr, 0);
11861 op1 = XEXP (addr, 1);
11862 if (!base_reg_operand (op0, Pmode))
11863 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11864
11865 else if (int_reg_operand (op1, Pmode))
11866 {
11867 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11868 {
11869 emit_insn (gen_rtx_SET (scratch, addr));
11870 new_addr = scratch;
11871 }
11872 }
11873
11874 /* Quad offsets are restricted and can't handle normal addresses. */
11875 else if (mode_supports_dq_form (mode))
11876 {
11877 emit_insn (gen_rtx_SET (scratch, addr));
11878 new_addr = scratch;
11879 }
11880
11881 /* Make sure the register class can handle offset addresses. */
11882 else if (legitimate_lo_sum_address_p (mode, addr, false))
11883 {
11884 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11885 {
11886 emit_insn (gen_rtx_SET (scratch, addr));
11887 new_addr = scratch;
11888 }
11889 }
11890
11891 else
11892 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11893
11894 break;
11895
11896 case SYMBOL_REF:
11897 case CONST:
11898 case LABEL_REF:
11899 rs6000_emit_move (scratch, addr, Pmode);
11900 new_addr = scratch;
11901 break;
11902
11903 default:
11904 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11905 }
11906
11907 /* Adjust the address if it changed. */
11908 if (addr != new_addr)
11909 {
11910 mem = replace_equiv_address_nv (mem, new_addr);
11911 if (TARGET_DEBUG_ADDR)
11912 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
11913 }
11914
11915 /* Now create the move. */
11916 if (store_p)
11917 emit_insn (gen_rtx_SET (mem, reg));
11918 else
11919 emit_insn (gen_rtx_SET (reg, mem));
11920
11921 return;
11922 }
11923
11924 /* Convert reloads involving 64-bit gprs and misaligned offset
11925 addressing, or multiple 32-bit gprs and offsets that are too large,
11926 to use indirect addressing. */
11927
11928 void
11929 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
11930 {
11931 int regno = true_regnum (reg);
11932 enum reg_class rclass;
11933 rtx addr;
11934 rtx scratch_or_premodify = scratch;
11935
11936 if (TARGET_DEBUG_ADDR)
11937 {
11938 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
11939 store_p ? "store" : "load");
11940 fprintf (stderr, "reg:\n");
11941 debug_rtx (reg);
11942 fprintf (stderr, "mem:\n");
11943 debug_rtx (mem);
11944 fprintf (stderr, "scratch:\n");
11945 debug_rtx (scratch);
11946 }
11947
11948 gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno));
11949 gcc_assert (MEM_P (mem));
11950 rclass = REGNO_REG_CLASS (regno);
11951 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
11952 addr = XEXP (mem, 0);
11953
11954 if (GET_CODE (addr) == PRE_MODIFY)
11955 {
11956 gcc_assert (REG_P (XEXP (addr, 0))
11957 && GET_CODE (XEXP (addr, 1)) == PLUS
11958 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
11959 scratch_or_premodify = XEXP (addr, 0);
11960 addr = XEXP (addr, 1);
11961 }
11962 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
11963
11964 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
11965
11966 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
11967
11968 /* Now create the move. */
11969 if (store_p)
11970 emit_insn (gen_rtx_SET (mem, reg));
11971 else
11972 emit_insn (gen_rtx_SET (reg, mem));
11973
11974 return;
11975 }
11976
11977 /* Given an rtx X being reloaded into a reg required to be
11978 in class CLASS, return the class of reg to actually use.
11979 In general this is just CLASS; but on some machines
11980 in some cases it is preferable to use a more restrictive class.
11981
11982 On the RS/6000, we have to return NO_REGS when we want to reload a
11983 floating-point CONST_DOUBLE to force it to be copied to memory.
11984
11985 We also don't want to reload integer values into floating-point
11986 registers if we can at all help it. In fact, this can
11987 cause reload to die, if it tries to generate a reload of CTR
11988 into a FP register and discovers it doesn't have the memory location
11989 required.
11990
11991 ??? Would it be a good idea to have reload do the converse, that is
11992 try to reload floating modes into FP registers if possible?
11993 */
11994
11995 static enum reg_class
11996 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
11997 {
11998 machine_mode mode = GET_MODE (x);
11999 bool is_constant = CONSTANT_P (x);
12000
12001 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
12002 reload class for it. */
12003 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
12004 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
12005 return NO_REGS;
12006
12007 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
12008 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
12009 return NO_REGS;
12010
12011 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
12012 the reloading of address expressions using PLUS into floating point
12013 registers. */
12014 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
12015 {
12016 if (is_constant)
12017 {
12018 /* Zero is always allowed in all VSX registers. */
12019 if (x == CONST0_RTX (mode))
12020 return rclass;
12021
12022 /* If this is a vector constant that can be formed with a few Altivec
12023 instructions, we want altivec registers. */
12024 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
12025 return ALTIVEC_REGS;
12026
12027 /* If this is an integer constant that can easily be loaded into
12028 vector registers, allow it. */
12029 if (CONST_INT_P (x))
12030 {
12031 HOST_WIDE_INT value = INTVAL (x);
12032
12033 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
12034 2.06 can generate it in the Altivec registers with
12035 VSPLTI<x>. */
12036 if (value == -1)
12037 {
12038 if (TARGET_P8_VECTOR)
12039 return rclass;
12040 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
12041 return ALTIVEC_REGS;
12042 else
12043 return NO_REGS;
12044 }
12045
12046 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
12047 a sign extend in the Altivec registers. */
12048 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
12049 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
12050 return ALTIVEC_REGS;
12051 }
12052
12053 /* Force constant to memory. */
12054 return NO_REGS;
12055 }
12056
12057 /* D-form addressing can easily reload the value. */
12058 if (mode_supports_vmx_dform (mode)
12059 || mode_supports_dq_form (mode))
12060 return rclass;
12061
12062 /* If this is a scalar floating point value and we don't have D-form
12063 addressing, prefer the traditional floating point registers so that we
12064 can use D-form (register+offset) addressing. */
12065 if (rclass == VSX_REGS
12066 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
12067 return FLOAT_REGS;
12068
12069 /* Prefer the Altivec registers if Altivec is handling the vector
12070 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
12071 loads. */
12072 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
12073 || mode == V1TImode)
12074 return ALTIVEC_REGS;
12075
12076 return rclass;
12077 }
12078
12079 if (is_constant || GET_CODE (x) == PLUS)
12080 {
12081 if (reg_class_subset_p (GENERAL_REGS, rclass))
12082 return GENERAL_REGS;
12083 if (reg_class_subset_p (BASE_REGS, rclass))
12084 return BASE_REGS;
12085 return NO_REGS;
12086 }
12087
12088 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == GEN_OR_FLOAT_REGS)
12089 return GENERAL_REGS;
12090
12091 return rclass;
12092 }
12093
12094 /* Debug version of rs6000_preferred_reload_class. */
12095 static enum reg_class
12096 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
12097 {
12098 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
12099
12100 fprintf (stderr,
12101 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
12102 "mode = %s, x:\n",
12103 reg_class_names[ret], reg_class_names[rclass],
12104 GET_MODE_NAME (GET_MODE (x)));
12105 debug_rtx (x);
12106
12107 return ret;
12108 }
12109
12110 /* If we are copying between FP or AltiVec registers and anything else, we need
12111 a memory location. The exception is when we are targeting ppc64 and the
12112 move to/from fpr to gpr instructions are available. Also, under VSX, you
12113 can copy vector registers from the FP register set to the Altivec register
12114 set and vice versa. */
12115
12116 static bool
12117 rs6000_secondary_memory_needed (machine_mode mode,
12118 reg_class_t from_class,
12119 reg_class_t to_class)
12120 {
12121 enum rs6000_reg_type from_type, to_type;
12122 bool altivec_p = ((from_class == ALTIVEC_REGS)
12123 || (to_class == ALTIVEC_REGS));
12124
12125 /* If a simple/direct move is available, we don't need secondary memory */
12126 from_type = reg_class_to_reg_type[(int)from_class];
12127 to_type = reg_class_to_reg_type[(int)to_class];
12128
12129 if (rs6000_secondary_reload_move (to_type, from_type, mode,
12130 (secondary_reload_info *)0, altivec_p))
12131 return false;
12132
12133 /* If we have a floating point or vector register class, we need to use
12134 memory to transfer the data. */
12135 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
12136 return true;
12137
12138 return false;
12139 }
12140
12141 /* Debug version of rs6000_secondary_memory_needed. */
12142 static bool
12143 rs6000_debug_secondary_memory_needed (machine_mode mode,
12144 reg_class_t from_class,
12145 reg_class_t to_class)
12146 {
12147 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
12148
12149 fprintf (stderr,
12150 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
12151 "to_class = %s, mode = %s\n",
12152 ret ? "true" : "false",
12153 reg_class_names[from_class],
12154 reg_class_names[to_class],
12155 GET_MODE_NAME (mode));
12156
12157 return ret;
12158 }
12159
12160 /* Return the register class of a scratch register needed to copy IN into
12161 or out of a register in RCLASS in MODE. If it can be done directly,
12162 NO_REGS is returned. */
12163
12164 static enum reg_class
12165 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
12166 rtx in)
12167 {
12168 int regno;
12169
12170 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
12171 #if TARGET_MACHO
12172 && MACHOPIC_INDIRECT
12173 #endif
12174 ))
12175 {
12176 /* We cannot copy a symbolic operand directly into anything
12177 other than BASE_REGS for TARGET_ELF. So indicate that a
12178 register from BASE_REGS is needed as an intermediate
12179 register.
12180
12181 On Darwin, pic addresses require a load from memory, which
12182 needs a base register. */
12183 if (rclass != BASE_REGS
12184 && (SYMBOL_REF_P (in)
12185 || GET_CODE (in) == HIGH
12186 || GET_CODE (in) == LABEL_REF
12187 || GET_CODE (in) == CONST))
12188 return BASE_REGS;
12189 }
12190
12191 if (REG_P (in))
12192 {
12193 regno = REGNO (in);
12194 if (!HARD_REGISTER_NUM_P (regno))
12195 {
12196 regno = true_regnum (in);
12197 if (!HARD_REGISTER_NUM_P (regno))
12198 regno = -1;
12199 }
12200 }
12201 else if (SUBREG_P (in))
12202 {
12203 regno = true_regnum (in);
12204 if (!HARD_REGISTER_NUM_P (regno))
12205 regno = -1;
12206 }
12207 else
12208 regno = -1;
12209
12210 /* If we have VSX register moves, prefer moving scalar values between
12211 Altivec registers and GPR by going via an FPR (and then via memory)
12212 instead of reloading the secondary memory address for Altivec moves. */
12213 if (TARGET_VSX
12214 && GET_MODE_SIZE (mode) < 16
12215 && !mode_supports_vmx_dform (mode)
12216 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
12217 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
12218 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
12219 && (regno >= 0 && INT_REGNO_P (regno)))))
12220 return FLOAT_REGS;
12221
12222 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
12223 into anything. */
12224 if (rclass == GENERAL_REGS || rclass == BASE_REGS
12225 || (regno >= 0 && INT_REGNO_P (regno)))
12226 return NO_REGS;
12227
12228 /* Constants, memory, and VSX registers can go into VSX registers (both the
12229 traditional floating point and the altivec registers). */
12230 if (rclass == VSX_REGS
12231 && (regno == -1 || VSX_REGNO_P (regno)))
12232 return NO_REGS;
12233
12234 /* Constants, memory, and FP registers can go into FP registers. */
12235 if ((regno == -1 || FP_REGNO_P (regno))
12236 && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS))
12237 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
12238
12239 /* Memory, and AltiVec registers can go into AltiVec registers. */
12240 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
12241 && rclass == ALTIVEC_REGS)
12242 return NO_REGS;
12243
12244 /* We can copy among the CR registers. */
12245 if ((rclass == CR_REGS || rclass == CR0_REGS)
12246 && regno >= 0 && CR_REGNO_P (regno))
12247 return NO_REGS;
12248
12249 /* Otherwise, we need GENERAL_REGS. */
12250 return GENERAL_REGS;
12251 }
12252
12253 /* Debug version of rs6000_secondary_reload_class. */
12254 static enum reg_class
12255 rs6000_debug_secondary_reload_class (enum reg_class rclass,
12256 machine_mode mode, rtx in)
12257 {
12258 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
12259 fprintf (stderr,
12260 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
12261 "mode = %s, input rtx:\n",
12262 reg_class_names[ret], reg_class_names[rclass],
12263 GET_MODE_NAME (mode));
12264 debug_rtx (in);
12265
12266 return ret;
12267 }
12268
12269 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
12270
12271 static bool
12272 rs6000_can_change_mode_class (machine_mode from,
12273 machine_mode to,
12274 reg_class_t rclass)
12275 {
12276 unsigned from_size = GET_MODE_SIZE (from);
12277 unsigned to_size = GET_MODE_SIZE (to);
12278
12279 if (from_size != to_size)
12280 {
12281 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
12282
12283 if (reg_classes_intersect_p (xclass, rclass))
12284 {
12285 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
12286 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
12287 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
12288 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
12289
12290 /* Don't allow 64-bit types to overlap with 128-bit types that take a
12291 single register under VSX because the scalar part of the register
12292 is in the upper 64-bits, and not the lower 64-bits. Types like
12293 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
12294 IEEE floating point can't overlap, and neither can small
12295 values. */
12296
12297 if (to_float128_vector_p && from_float128_vector_p)
12298 return true;
12299
12300 else if (to_float128_vector_p || from_float128_vector_p)
12301 return false;
12302
12303 /* TDmode in floating-mode registers must always go into a register
12304 pair with the most significant word in the even-numbered register
12305 to match ISA requirements. In little-endian mode, this does not
12306 match subreg numbering, so we cannot allow subregs. */
12307 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
12308 return false;
12309
12310 if (from_size < 8 || to_size < 8)
12311 return false;
12312
12313 if (from_size == 8 && (8 * to_nregs) != to_size)
12314 return false;
12315
12316 if (to_size == 8 && (8 * from_nregs) != from_size)
12317 return false;
12318
12319 return true;
12320 }
12321 else
12322 return true;
12323 }
12324
12325 /* Since the VSX register set includes traditional floating point registers
12326 and altivec registers, just check for the size being different instead of
12327 trying to check whether the modes are vector modes. Otherwise it won't
12328 allow say DF and DI to change classes. For types like TFmode and TDmode
12329 that take 2 64-bit registers, rather than a single 128-bit register, don't
12330 allow subregs of those types to other 128 bit types. */
12331 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
12332 {
12333 unsigned num_regs = (from_size + 15) / 16;
12334 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
12335 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
12336 return false;
12337
12338 return (from_size == 8 || from_size == 16);
12339 }
12340
12341 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
12342 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
12343 return false;
12344
12345 return true;
12346 }
12347
12348 /* Debug version of rs6000_can_change_mode_class. */
12349 static bool
12350 rs6000_debug_can_change_mode_class (machine_mode from,
12351 machine_mode to,
12352 reg_class_t rclass)
12353 {
12354 bool ret = rs6000_can_change_mode_class (from, to, rclass);
12355
12356 fprintf (stderr,
12357 "rs6000_can_change_mode_class, return %s, from = %s, "
12358 "to = %s, rclass = %s\n",
12359 ret ? "true" : "false",
12360 GET_MODE_NAME (from), GET_MODE_NAME (to),
12361 reg_class_names[rclass]);
12362
12363 return ret;
12364 }
12365 \f
12366 /* Return a string to do a move operation of 128 bits of data. */
12367
12368 const char *
12369 rs6000_output_move_128bit (rtx operands[])
12370 {
12371 rtx dest = operands[0];
12372 rtx src = operands[1];
12373 machine_mode mode = GET_MODE (dest);
12374 int dest_regno;
12375 int src_regno;
12376 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
12377 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
12378
12379 if (REG_P (dest))
12380 {
12381 dest_regno = REGNO (dest);
12382 dest_gpr_p = INT_REGNO_P (dest_regno);
12383 dest_fp_p = FP_REGNO_P (dest_regno);
12384 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
12385 dest_vsx_p = dest_fp_p | dest_vmx_p;
12386 }
12387 else
12388 {
12389 dest_regno = -1;
12390 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
12391 }
12392
12393 if (REG_P (src))
12394 {
12395 src_regno = REGNO (src);
12396 src_gpr_p = INT_REGNO_P (src_regno);
12397 src_fp_p = FP_REGNO_P (src_regno);
12398 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
12399 src_vsx_p = src_fp_p | src_vmx_p;
12400 }
12401 else
12402 {
12403 src_regno = -1;
12404 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
12405 }
12406
12407 /* Register moves. */
12408 if (dest_regno >= 0 && src_regno >= 0)
12409 {
12410 if (dest_gpr_p)
12411 {
12412 if (src_gpr_p)
12413 return "#";
12414
12415 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
12416 return (WORDS_BIG_ENDIAN
12417 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
12418 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
12419
12420 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
12421 return "#";
12422 }
12423
12424 else if (TARGET_VSX && dest_vsx_p)
12425 {
12426 if (src_vsx_p)
12427 return "xxlor %x0,%x1,%x1";
12428
12429 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
12430 return (WORDS_BIG_ENDIAN
12431 ? "mtvsrdd %x0,%1,%L1"
12432 : "mtvsrdd %x0,%L1,%1");
12433
12434 else if (TARGET_DIRECT_MOVE && src_gpr_p)
12435 return "#";
12436 }
12437
12438 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
12439 return "vor %0,%1,%1";
12440
12441 else if (dest_fp_p && src_fp_p)
12442 return "#";
12443 }
12444
12445 /* Loads. */
12446 else if (dest_regno >= 0 && MEM_P (src))
12447 {
12448 if (dest_gpr_p)
12449 {
12450 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
12451 return "lq %0,%1";
12452 else
12453 return "#";
12454 }
12455
12456 else if (TARGET_ALTIVEC && dest_vmx_p
12457 && altivec_indexed_or_indirect_operand (src, mode))
12458 return "lvx %0,%y1";
12459
12460 else if (TARGET_VSX && dest_vsx_p)
12461 {
12462 if (mode_supports_dq_form (mode)
12463 && quad_address_p (XEXP (src, 0), mode, true))
12464 return "lxv %x0,%1";
12465
12466 else if (TARGET_P9_VECTOR)
12467 return "lxvx %x0,%y1";
12468
12469 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
12470 return "lxvw4x %x0,%y1";
12471
12472 else
12473 return "lxvd2x %x0,%y1";
12474 }
12475
12476 else if (TARGET_ALTIVEC && dest_vmx_p)
12477 return "lvx %0,%y1";
12478
12479 else if (dest_fp_p)
12480 return "#";
12481 }
12482
12483 /* Stores. */
12484 else if (src_regno >= 0 && MEM_P (dest))
12485 {
12486 if (src_gpr_p)
12487 {
12488 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
12489 return "stq %1,%0";
12490 else
12491 return "#";
12492 }
12493
12494 else if (TARGET_ALTIVEC && src_vmx_p
12495 && altivec_indexed_or_indirect_operand (dest, mode))
12496 return "stvx %1,%y0";
12497
12498 else if (TARGET_VSX && src_vsx_p)
12499 {
12500 if (mode_supports_dq_form (mode)
12501 && quad_address_p (XEXP (dest, 0), mode, true))
12502 return "stxv %x1,%0";
12503
12504 else if (TARGET_P9_VECTOR)
12505 return "stxvx %x1,%y0";
12506
12507 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
12508 return "stxvw4x %x1,%y0";
12509
12510 else
12511 return "stxvd2x %x1,%y0";
12512 }
12513
12514 else if (TARGET_ALTIVEC && src_vmx_p)
12515 return "stvx %1,%y0";
12516
12517 else if (src_fp_p)
12518 return "#";
12519 }
12520
12521 /* Constants. */
12522 else if (dest_regno >= 0
12523 && (CONST_INT_P (src)
12524 || CONST_WIDE_INT_P (src)
12525 || CONST_DOUBLE_P (src)
12526 || GET_CODE (src) == CONST_VECTOR))
12527 {
12528 if (dest_gpr_p)
12529 return "#";
12530
12531 else if ((dest_vmx_p && TARGET_ALTIVEC)
12532 || (dest_vsx_p && TARGET_VSX))
12533 return output_vec_const_move (operands);
12534 }
12535
12536 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
12537 }
12538
12539 /* Validate a 128-bit move. */
12540 bool
12541 rs6000_move_128bit_ok_p (rtx operands[])
12542 {
12543 machine_mode mode = GET_MODE (operands[0]);
12544 return (gpc_reg_operand (operands[0], mode)
12545 || gpc_reg_operand (operands[1], mode));
12546 }
12547
12548 /* Return true if a 128-bit move needs to be split. */
12549 bool
12550 rs6000_split_128bit_ok_p (rtx operands[])
12551 {
12552 if (!reload_completed)
12553 return false;
12554
12555 if (!gpr_or_gpr_p (operands[0], operands[1]))
12556 return false;
12557
12558 if (quad_load_store_p (operands[0], operands[1]))
12559 return false;
12560
12561 return true;
12562 }
12563
12564 \f
12565 /* Given a comparison operation, return the bit number in CCR to test. We
12566 know this is a valid comparison.
12567
12568 SCC_P is 1 if this is for an scc. That means that %D will have been
12569 used instead of %C, so the bits will be in different places.
12570
12571 Return -1 if OP isn't a valid comparison for some reason. */
12572
12573 int
12574 ccr_bit (rtx op, int scc_p)
12575 {
12576 enum rtx_code code = GET_CODE (op);
12577 machine_mode cc_mode;
12578 int cc_regnum;
12579 int base_bit;
12580 rtx reg;
12581
12582 if (!COMPARISON_P (op))
12583 return -1;
12584
12585 reg = XEXP (op, 0);
12586
12587 if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg)))
12588 return -1;
12589
12590 cc_mode = GET_MODE (reg);
12591 cc_regnum = REGNO (reg);
12592 base_bit = 4 * (cc_regnum - CR0_REGNO);
12593
12594 validate_condition_mode (code, cc_mode);
12595
12596 /* When generating a sCOND operation, only positive conditions are
12597 allowed. */
12598 if (scc_p)
12599 switch (code)
12600 {
12601 case EQ:
12602 case GT:
12603 case LT:
12604 case UNORDERED:
12605 case GTU:
12606 case LTU:
12607 break;
12608 default:
12609 return -1;
12610 }
12611
12612 switch (code)
12613 {
12614 case NE:
12615 return scc_p ? base_bit + 3 : base_bit + 2;
12616 case EQ:
12617 return base_bit + 2;
12618 case GT: case GTU: case UNLE:
12619 return base_bit + 1;
12620 case LT: case LTU: case UNGE:
12621 return base_bit;
12622 case ORDERED: case UNORDERED:
12623 return base_bit + 3;
12624
12625 case GE: case GEU:
12626 /* If scc, we will have done a cror to put the bit in the
12627 unordered position. So test that bit. For integer, this is ! LT
12628 unless this is an scc insn. */
12629 return scc_p ? base_bit + 3 : base_bit;
12630
12631 case LE: case LEU:
12632 return scc_p ? base_bit + 3 : base_bit + 1;
12633
12634 default:
12635 return -1;
12636 }
12637 }
12638 \f
12639 /* Return the GOT register. */
12640
12641 rtx
12642 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
12643 {
12644 /* The second flow pass currently (June 1999) can't update
12645 regs_ever_live without disturbing other parts of the compiler, so
12646 update it here to make the prolog/epilogue code happy. */
12647 if (!can_create_pseudo_p ()
12648 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
12649 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
12650
12651 crtl->uses_pic_offset_table = 1;
12652
12653 return pic_offset_table_rtx;
12654 }
12655 \f
12656 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
12657
12658 /* Write out a function code label. */
12659
12660 void
12661 rs6000_output_function_entry (FILE *file, const char *fname)
12662 {
12663 if (fname[0] != '.')
12664 {
12665 switch (DEFAULT_ABI)
12666 {
12667 default:
12668 gcc_unreachable ();
12669
12670 case ABI_AIX:
12671 if (DOT_SYMBOLS)
12672 putc ('.', file);
12673 else
12674 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
12675 break;
12676
12677 case ABI_ELFv2:
12678 case ABI_V4:
12679 case ABI_DARWIN:
12680 break;
12681 }
12682 }
12683
12684 RS6000_OUTPUT_BASENAME (file, fname);
12685 }
12686
12687 /* Print an operand. Recognize special options, documented below. */
12688
12689 #if TARGET_ELF
12690 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
12691 only introduced by the linker, when applying the sda21
12692 relocation. */
12693 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
12694 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
12695 #else
12696 #define SMALL_DATA_RELOC "sda21"
12697 #define SMALL_DATA_REG 0
12698 #endif
12699
12700 void
12701 print_operand (FILE *file, rtx x, int code)
12702 {
12703 int i;
12704 unsigned HOST_WIDE_INT uval;
12705
12706 switch (code)
12707 {
12708 /* %a is output_address. */
12709
12710 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
12711 output_operand. */
12712
12713 case 'D':
12714 /* Like 'J' but get to the GT bit only. */
12715 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12716 {
12717 output_operand_lossage ("invalid %%D value");
12718 return;
12719 }
12720
12721 /* Bit 1 is GT bit. */
12722 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
12723
12724 /* Add one for shift count in rlinm for scc. */
12725 fprintf (file, "%d", i + 1);
12726 return;
12727
12728 case 'e':
12729 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
12730 if (! INT_P (x))
12731 {
12732 output_operand_lossage ("invalid %%e value");
12733 return;
12734 }
12735
12736 uval = INTVAL (x);
12737 if ((uval & 0xffff) == 0 && uval != 0)
12738 putc ('s', file);
12739 return;
12740
12741 case 'E':
12742 /* X is a CR register. Print the number of the EQ bit of the CR */
12743 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12744 output_operand_lossage ("invalid %%E value");
12745 else
12746 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
12747 return;
12748
12749 case 'f':
12750 /* X is a CR register. Print the shift count needed to move it
12751 to the high-order four bits. */
12752 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12753 output_operand_lossage ("invalid %%f value");
12754 else
12755 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
12756 return;
12757
12758 case 'F':
12759 /* Similar, but print the count for the rotate in the opposite
12760 direction. */
12761 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12762 output_operand_lossage ("invalid %%F value");
12763 else
12764 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
12765 return;
12766
12767 case 'G':
12768 /* X is a constant integer. If it is negative, print "m",
12769 otherwise print "z". This is to make an aze or ame insn. */
12770 if (!CONST_INT_P (x))
12771 output_operand_lossage ("invalid %%G value");
12772 else if (INTVAL (x) >= 0)
12773 putc ('z', file);
12774 else
12775 putc ('m', file);
12776 return;
12777
12778 case 'h':
12779 /* If constant, output low-order five bits. Otherwise, write
12780 normally. */
12781 if (INT_P (x))
12782 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
12783 else
12784 print_operand (file, x, 0);
12785 return;
12786
12787 case 'H':
12788 /* If constant, output low-order six bits. Otherwise, write
12789 normally. */
12790 if (INT_P (x))
12791 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
12792 else
12793 print_operand (file, x, 0);
12794 return;
12795
12796 case 'I':
12797 /* Print `i' if this is a constant, else nothing. */
12798 if (INT_P (x))
12799 putc ('i', file);
12800 return;
12801
12802 case 'j':
12803 /* Write the bit number in CCR for jump. */
12804 i = ccr_bit (x, 0);
12805 if (i == -1)
12806 output_operand_lossage ("invalid %%j code");
12807 else
12808 fprintf (file, "%d", i);
12809 return;
12810
12811 case 'J':
12812 /* Similar, but add one for shift count in rlinm for scc and pass
12813 scc flag to `ccr_bit'. */
12814 i = ccr_bit (x, 1);
12815 if (i == -1)
12816 output_operand_lossage ("invalid %%J code");
12817 else
12818 /* If we want bit 31, write a shift count of zero, not 32. */
12819 fprintf (file, "%d", i == 31 ? 0 : i + 1);
12820 return;
12821
12822 case 'k':
12823 /* X must be a constant. Write the 1's complement of the
12824 constant. */
12825 if (! INT_P (x))
12826 output_operand_lossage ("invalid %%k value");
12827 else
12828 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
12829 return;
12830
12831 case 'K':
12832 /* X must be a symbolic constant on ELF. Write an
12833 expression suitable for an 'addi' that adds in the low 16
12834 bits of the MEM. */
12835 if (GET_CODE (x) == CONST)
12836 {
12837 if (GET_CODE (XEXP (x, 0)) != PLUS
12838 || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
12839 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
12840 || !CONST_INT_P (XEXP (XEXP (x, 0), 1)))
12841 output_operand_lossage ("invalid %%K value");
12842 }
12843 print_operand_address (file, x);
12844 fputs ("@l", file);
12845 return;
12846
12847 /* %l is output_asm_label. */
12848
12849 case 'L':
12850 /* Write second word of DImode or DFmode reference. Works on register
12851 or non-indexed memory only. */
12852 if (REG_P (x))
12853 fputs (reg_names[REGNO (x) + 1], file);
12854 else if (MEM_P (x))
12855 {
12856 machine_mode mode = GET_MODE (x);
12857 /* Handle possible auto-increment. Since it is pre-increment and
12858 we have already done it, we can just use an offset of word. */
12859 if (GET_CODE (XEXP (x, 0)) == PRE_INC
12860 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
12861 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
12862 UNITS_PER_WORD));
12863 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
12864 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
12865 UNITS_PER_WORD));
12866 else
12867 output_address (mode, XEXP (adjust_address_nv (x, SImode,
12868 UNITS_PER_WORD),
12869 0));
12870
12871 if (small_data_operand (x, GET_MODE (x)))
12872 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
12873 reg_names[SMALL_DATA_REG]);
12874 }
12875 return;
12876
12877 case 'N': /* Unused */
12878 /* Write the number of elements in the vector times 4. */
12879 if (GET_CODE (x) != PARALLEL)
12880 output_operand_lossage ("invalid %%N value");
12881 else
12882 fprintf (file, "%d", XVECLEN (x, 0) * 4);
12883 return;
12884
12885 case 'O': /* Unused */
12886 /* Similar, but subtract 1 first. */
12887 if (GET_CODE (x) != PARALLEL)
12888 output_operand_lossage ("invalid %%O value");
12889 else
12890 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
12891 return;
12892
12893 case 'p':
12894 /* X is a CONST_INT that is a power of two. Output the logarithm. */
12895 if (! INT_P (x)
12896 || INTVAL (x) < 0
12897 || (i = exact_log2 (INTVAL (x))) < 0)
12898 output_operand_lossage ("invalid %%p value");
12899 else
12900 fprintf (file, "%d", i);
12901 return;
12902
12903 case 'P':
12904 /* The operand must be an indirect memory reference. The result
12905 is the register name. */
12906 if (!MEM_P (x) || !REG_P (XEXP (x, 0))
12907 || REGNO (XEXP (x, 0)) >= 32)
12908 output_operand_lossage ("invalid %%P value");
12909 else
12910 fputs (reg_names[REGNO (XEXP (x, 0))], file);
12911 return;
12912
12913 case 'q':
12914 /* This outputs the logical code corresponding to a boolean
12915 expression. The expression may have one or both operands
12916 negated (if one, only the first one). For condition register
12917 logical operations, it will also treat the negated
12918 CR codes as NOTs, but not handle NOTs of them. */
12919 {
12920 const char *const *t = 0;
12921 const char *s;
12922 enum rtx_code code = GET_CODE (x);
12923 static const char * const tbl[3][3] = {
12924 { "and", "andc", "nor" },
12925 { "or", "orc", "nand" },
12926 { "xor", "eqv", "xor" } };
12927
12928 if (code == AND)
12929 t = tbl[0];
12930 else if (code == IOR)
12931 t = tbl[1];
12932 else if (code == XOR)
12933 t = tbl[2];
12934 else
12935 output_operand_lossage ("invalid %%q value");
12936
12937 if (GET_CODE (XEXP (x, 0)) != NOT)
12938 s = t[0];
12939 else
12940 {
12941 if (GET_CODE (XEXP (x, 1)) == NOT)
12942 s = t[2];
12943 else
12944 s = t[1];
12945 }
12946
12947 fputs (s, file);
12948 }
12949 return;
12950
12951 case 'Q':
12952 if (! TARGET_MFCRF)
12953 return;
12954 fputc (',', file);
12955 /* FALLTHRU */
12956
12957 case 'R':
12958 /* X is a CR register. Print the mask for `mtcrf'. */
12959 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12960 output_operand_lossage ("invalid %%R value");
12961 else
12962 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
12963 return;
12964
12965 case 's':
12966 /* Low 5 bits of 32 - value */
12967 if (! INT_P (x))
12968 output_operand_lossage ("invalid %%s value");
12969 else
12970 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
12971 return;
12972
12973 case 't':
12974 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
12975 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12976 {
12977 output_operand_lossage ("invalid %%t value");
12978 return;
12979 }
12980
12981 /* Bit 3 is OV bit. */
12982 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
12983
12984 /* If we want bit 31, write a shift count of zero, not 32. */
12985 fprintf (file, "%d", i == 31 ? 0 : i + 1);
12986 return;
12987
12988 case 'T':
12989 /* Print the symbolic name of a branch target register. */
12990 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
12991 x = XVECEXP (x, 0, 0);
12992 if (!REG_P (x) || (REGNO (x) != LR_REGNO
12993 && REGNO (x) != CTR_REGNO))
12994 output_operand_lossage ("invalid %%T value");
12995 else if (REGNO (x) == LR_REGNO)
12996 fputs ("lr", file);
12997 else
12998 fputs ("ctr", file);
12999 return;
13000
13001 case 'u':
13002 /* High-order or low-order 16 bits of constant, whichever is non-zero,
13003 for use in unsigned operand. */
13004 if (! INT_P (x))
13005 {
13006 output_operand_lossage ("invalid %%u value");
13007 return;
13008 }
13009
13010 uval = INTVAL (x);
13011 if ((uval & 0xffff) == 0)
13012 uval >>= 16;
13013
13014 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
13015 return;
13016
13017 case 'v':
13018 /* High-order 16 bits of constant for use in signed operand. */
13019 if (! INT_P (x))
13020 output_operand_lossage ("invalid %%v value");
13021 else
13022 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
13023 (INTVAL (x) >> 16) & 0xffff);
13024 return;
13025
13026 case 'U':
13027 /* Print `u' if this has an auto-increment or auto-decrement. */
13028 if (MEM_P (x)
13029 && (GET_CODE (XEXP (x, 0)) == PRE_INC
13030 || GET_CODE (XEXP (x, 0)) == PRE_DEC
13031 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
13032 putc ('u', file);
13033 return;
13034
13035 case 'V':
13036 /* Print the trap code for this operand. */
13037 switch (GET_CODE (x))
13038 {
13039 case EQ:
13040 fputs ("eq", file); /* 4 */
13041 break;
13042 case NE:
13043 fputs ("ne", file); /* 24 */
13044 break;
13045 case LT:
13046 fputs ("lt", file); /* 16 */
13047 break;
13048 case LE:
13049 fputs ("le", file); /* 20 */
13050 break;
13051 case GT:
13052 fputs ("gt", file); /* 8 */
13053 break;
13054 case GE:
13055 fputs ("ge", file); /* 12 */
13056 break;
13057 case LTU:
13058 fputs ("llt", file); /* 2 */
13059 break;
13060 case LEU:
13061 fputs ("lle", file); /* 6 */
13062 break;
13063 case GTU:
13064 fputs ("lgt", file); /* 1 */
13065 break;
13066 case GEU:
13067 fputs ("lge", file); /* 5 */
13068 break;
13069 default:
13070 output_operand_lossage ("invalid %%V value");
13071 }
13072 break;
13073
13074 case 'w':
13075 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
13076 normally. */
13077 if (INT_P (x))
13078 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
13079 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
13080 else
13081 print_operand (file, x, 0);
13082 return;
13083
13084 case 'x':
13085 /* X is a FPR or Altivec register used in a VSX context. */
13086 if (!REG_P (x) || !VSX_REGNO_P (REGNO (x)))
13087 output_operand_lossage ("invalid %%x value");
13088 else
13089 {
13090 int reg = REGNO (x);
13091 int vsx_reg = (FP_REGNO_P (reg)
13092 ? reg - 32
13093 : reg - FIRST_ALTIVEC_REGNO + 32);
13094
13095 #ifdef TARGET_REGNAMES
13096 if (TARGET_REGNAMES)
13097 fprintf (file, "%%vs%d", vsx_reg);
13098 else
13099 #endif
13100 fprintf (file, "%d", vsx_reg);
13101 }
13102 return;
13103
13104 case 'X':
13105 if (MEM_P (x)
13106 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
13107 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
13108 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
13109 putc ('x', file);
13110 return;
13111
13112 case 'Y':
13113 /* Like 'L', for third word of TImode/PTImode */
13114 if (REG_P (x))
13115 fputs (reg_names[REGNO (x) + 2], file);
13116 else if (MEM_P (x))
13117 {
13118 machine_mode mode = GET_MODE (x);
13119 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13120 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13121 output_address (mode, plus_constant (Pmode,
13122 XEXP (XEXP (x, 0), 0), 8));
13123 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13124 output_address (mode, plus_constant (Pmode,
13125 XEXP (XEXP (x, 0), 0), 8));
13126 else
13127 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
13128 if (small_data_operand (x, GET_MODE (x)))
13129 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13130 reg_names[SMALL_DATA_REG]);
13131 }
13132 return;
13133
13134 case 'z':
13135 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13136 x = XVECEXP (x, 0, 1);
13137 /* X is a SYMBOL_REF. Write out the name preceded by a
13138 period and without any trailing data in brackets. Used for function
13139 names. If we are configured for System V (or the embedded ABI) on
13140 the PowerPC, do not emit the period, since those systems do not use
13141 TOCs and the like. */
13142 if (!SYMBOL_REF_P (x))
13143 {
13144 output_operand_lossage ("invalid %%z value");
13145 return;
13146 }
13147
13148 /* For macho, check to see if we need a stub. */
13149 if (TARGET_MACHO)
13150 {
13151 const char *name = XSTR (x, 0);
13152 #if TARGET_MACHO
13153 if (darwin_symbol_stubs
13154 && MACHOPIC_INDIRECT
13155 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13156 name = machopic_indirection_name (x, /*stub_p=*/true);
13157 #endif
13158 assemble_name (file, name);
13159 }
13160 else if (!DOT_SYMBOLS)
13161 assemble_name (file, XSTR (x, 0));
13162 else
13163 rs6000_output_function_entry (file, XSTR (x, 0));
13164 return;
13165
13166 case 'Z':
13167 /* Like 'L', for last word of TImode/PTImode. */
13168 if (REG_P (x))
13169 fputs (reg_names[REGNO (x) + 3], file);
13170 else if (MEM_P (x))
13171 {
13172 machine_mode mode = GET_MODE (x);
13173 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13174 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13175 output_address (mode, plus_constant (Pmode,
13176 XEXP (XEXP (x, 0), 0), 12));
13177 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13178 output_address (mode, plus_constant (Pmode,
13179 XEXP (XEXP (x, 0), 0), 12));
13180 else
13181 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
13182 if (small_data_operand (x, GET_MODE (x)))
13183 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13184 reg_names[SMALL_DATA_REG]);
13185 }
13186 return;
13187
13188 /* Print AltiVec memory operand. */
13189 case 'y':
13190 {
13191 rtx tmp;
13192
13193 gcc_assert (MEM_P (x));
13194
13195 tmp = XEXP (x, 0);
13196
13197 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
13198 && GET_CODE (tmp) == AND
13199 && CONST_INT_P (XEXP (tmp, 1))
13200 && INTVAL (XEXP (tmp, 1)) == -16)
13201 tmp = XEXP (tmp, 0);
13202 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
13203 && GET_CODE (tmp) == PRE_MODIFY)
13204 tmp = XEXP (tmp, 1);
13205 if (REG_P (tmp))
13206 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
13207 else
13208 {
13209 if (GET_CODE (tmp) != PLUS
13210 || !REG_P (XEXP (tmp, 0))
13211 || !REG_P (XEXP (tmp, 1)))
13212 {
13213 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
13214 break;
13215 }
13216
13217 if (REGNO (XEXP (tmp, 0)) == 0)
13218 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
13219 reg_names[ REGNO (XEXP (tmp, 0)) ]);
13220 else
13221 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
13222 reg_names[ REGNO (XEXP (tmp, 1)) ]);
13223 }
13224 break;
13225 }
13226
13227 case 0:
13228 if (REG_P (x))
13229 fprintf (file, "%s", reg_names[REGNO (x)]);
13230 else if (MEM_P (x))
13231 {
13232 /* We need to handle PRE_INC and PRE_DEC here, since we need to
13233 know the width from the mode. */
13234 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
13235 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
13236 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
13237 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
13238 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
13239 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
13240 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13241 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
13242 else
13243 output_address (GET_MODE (x), XEXP (x, 0));
13244 }
13245 else if (toc_relative_expr_p (x, false,
13246 &tocrel_base_oac, &tocrel_offset_oac))
13247 /* This hack along with a corresponding hack in
13248 rs6000_output_addr_const_extra arranges to output addends
13249 where the assembler expects to find them. eg.
13250 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
13251 without this hack would be output as "x@toc+4". We
13252 want "x+4@toc". */
13253 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
13254 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
13255 output_addr_const (file, XVECEXP (x, 0, 0));
13256 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13257 output_addr_const (file, XVECEXP (x, 0, 1));
13258 else
13259 output_addr_const (file, x);
13260 return;
13261
13262 case '&':
13263 if (const char *name = get_some_local_dynamic_name ())
13264 assemble_name (file, name);
13265 else
13266 output_operand_lossage ("'%%&' used without any "
13267 "local dynamic TLS references");
13268 return;
13269
13270 default:
13271 output_operand_lossage ("invalid %%xn code");
13272 }
13273 }
13274 \f
13275 /* Print the address of an operand. */
13276
13277 void
13278 print_operand_address (FILE *file, rtx x)
13279 {
13280 if (REG_P (x))
13281 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
13282
13283 /* Is it a PC-relative address? */
13284 else if (TARGET_PCREL && pcrel_local_or_external_address (x, VOIDmode))
13285 {
13286 HOST_WIDE_INT offset;
13287
13288 if (GET_CODE (x) == CONST)
13289 x = XEXP (x, 0);
13290
13291 if (GET_CODE (x) == PLUS)
13292 {
13293 offset = INTVAL (XEXP (x, 1));
13294 x = XEXP (x, 0);
13295 }
13296 else
13297 offset = 0;
13298
13299 output_addr_const (file, x);
13300
13301 if (offset)
13302 fprintf (file, "%+" PRId64, offset);
13303
13304 if (SYMBOL_REF_P (x) && !SYMBOL_REF_LOCAL_P (x))
13305 fprintf (file, "@got");
13306
13307 fprintf (file, "@pcrel");
13308 }
13309 else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
13310 || GET_CODE (x) == LABEL_REF)
13311 {
13312 output_addr_const (file, x);
13313 if (small_data_operand (x, GET_MODE (x)))
13314 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13315 reg_names[SMALL_DATA_REG]);
13316 else
13317 gcc_assert (!TARGET_TOC);
13318 }
13319 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
13320 && REG_P (XEXP (x, 1)))
13321 {
13322 if (REGNO (XEXP (x, 0)) == 0)
13323 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
13324 reg_names[ REGNO (XEXP (x, 0)) ]);
13325 else
13326 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
13327 reg_names[ REGNO (XEXP (x, 1)) ]);
13328 }
13329 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
13330 && CONST_INT_P (XEXP (x, 1)))
13331 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
13332 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
13333 #if TARGET_MACHO
13334 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
13335 && CONSTANT_P (XEXP (x, 1)))
13336 {
13337 fprintf (file, "lo16(");
13338 output_addr_const (file, XEXP (x, 1));
13339 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
13340 }
13341 #endif
13342 #if TARGET_ELF
13343 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
13344 && CONSTANT_P (XEXP (x, 1)))
13345 {
13346 output_addr_const (file, XEXP (x, 1));
13347 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
13348 }
13349 #endif
13350 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
13351 {
13352 /* This hack along with a corresponding hack in
13353 rs6000_output_addr_const_extra arranges to output addends
13354 where the assembler expects to find them. eg.
13355 (lo_sum (reg 9)
13356 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
13357 without this hack would be output as "x@toc+8@l(9)". We
13358 want "x+8@toc@l(9)". */
13359 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
13360 if (GET_CODE (x) == LO_SUM)
13361 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
13362 else
13363 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
13364 }
13365 else
13366 output_addr_const (file, x);
13367 }
13368 \f
13369 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13370
13371 bool
13372 rs6000_output_addr_const_extra (FILE *file, rtx x)
13373 {
13374 if (GET_CODE (x) == UNSPEC)
13375 switch (XINT (x, 1))
13376 {
13377 case UNSPEC_TOCREL:
13378 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0))
13379 && REG_P (XVECEXP (x, 0, 1))
13380 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
13381 output_addr_const (file, XVECEXP (x, 0, 0));
13382 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
13383 {
13384 if (INTVAL (tocrel_offset_oac) >= 0)
13385 fprintf (file, "+");
13386 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
13387 }
13388 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
13389 {
13390 putc ('-', file);
13391 assemble_name (file, toc_label_name);
13392 need_toc_init = 1;
13393 }
13394 else if (TARGET_ELF)
13395 fputs ("@toc", file);
13396 return true;
13397
13398 #if TARGET_MACHO
13399 case UNSPEC_MACHOPIC_OFFSET:
13400 output_addr_const (file, XVECEXP (x, 0, 0));
13401 putc ('-', file);
13402 machopic_output_function_base_name (file);
13403 return true;
13404 #endif
13405 }
13406 return false;
13407 }
13408 \f
13409 /* Target hook for assembling integer objects. The PowerPC version has
13410 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
13411 is defined. It also needs to handle DI-mode objects on 64-bit
13412 targets. */
13413
13414 static bool
13415 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
13416 {
13417 #ifdef RELOCATABLE_NEEDS_FIXUP
13418 /* Special handling for SI values. */
13419 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
13420 {
13421 static int recurse = 0;
13422
13423 /* For -mrelocatable, we mark all addresses that need to be fixed up in
13424 the .fixup section. Since the TOC section is already relocated, we
13425 don't need to mark it here. We used to skip the text section, but it
13426 should never be valid for relocated addresses to be placed in the text
13427 section. */
13428 if (DEFAULT_ABI == ABI_V4
13429 && (TARGET_RELOCATABLE || flag_pic > 1)
13430 && in_section != toc_section
13431 && !recurse
13432 && !CONST_SCALAR_INT_P (x)
13433 && CONSTANT_P (x))
13434 {
13435 char buf[256];
13436
13437 recurse = 1;
13438 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
13439 fixuplabelno++;
13440 ASM_OUTPUT_LABEL (asm_out_file, buf);
13441 fprintf (asm_out_file, "\t.long\t(");
13442 output_addr_const (asm_out_file, x);
13443 fprintf (asm_out_file, ")@fixup\n");
13444 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
13445 ASM_OUTPUT_ALIGN (asm_out_file, 2);
13446 fprintf (asm_out_file, "\t.long\t");
13447 assemble_name (asm_out_file, buf);
13448 fprintf (asm_out_file, "\n\t.previous\n");
13449 recurse = 0;
13450 return true;
13451 }
13452 /* Remove initial .'s to turn a -mcall-aixdesc function
13453 address into the address of the descriptor, not the function
13454 itself. */
13455 else if (SYMBOL_REF_P (x)
13456 && XSTR (x, 0)[0] == '.'
13457 && DEFAULT_ABI == ABI_AIX)
13458 {
13459 const char *name = XSTR (x, 0);
13460 while (*name == '.')
13461 name++;
13462
13463 fprintf (asm_out_file, "\t.long\t%s\n", name);
13464 return true;
13465 }
13466 }
13467 #endif /* RELOCATABLE_NEEDS_FIXUP */
13468 return default_assemble_integer (x, size, aligned_p);
13469 }
13470
13471 /* Return a template string for assembly to emit when making an
13472 external call. FUNOP is the call mem argument operand number. */
13473
13474 static const char *
13475 rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall)
13476 {
13477 /* -Wformat-overflow workaround, without which gcc thinks that %u
13478 might produce 10 digits. */
13479 gcc_assert (funop <= MAX_RECOG_OPERANDS);
13480
13481 char arg[12];
13482 arg[0] = 0;
13483 if (GET_CODE (operands[funop + 1]) == UNSPEC)
13484 {
13485 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
13486 sprintf (arg, "(%%%u@tlsgd)", funop + 1);
13487 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
13488 sprintf (arg, "(%%&@tlsld)");
13489 }
13490
13491 /* The magic 32768 offset here corresponds to the offset of
13492 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
13493 char z[11];
13494 sprintf (z, "%%z%u%s", funop,
13495 (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2
13496 ? "+32768" : ""));
13497
13498 static char str[32]; /* 1 spare */
13499 if (rs6000_pcrel_p (cfun))
13500 sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg);
13501 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
13502 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
13503 sibcall ? "" : "\n\tnop");
13504 else if (DEFAULT_ABI == ABI_V4)
13505 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
13506 flag_pic ? "@plt" : "");
13507 #if TARGET_MACHO
13508 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
13509 else if (DEFAULT_ABI == ABI_DARWIN)
13510 {
13511 /* The cookie is in operand func+2. */
13512 gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT);
13513 int cookie = INTVAL (operands[funop + 2]);
13514 if (cookie & CALL_LONG)
13515 {
13516 tree funname = get_identifier (XSTR (operands[funop], 0));
13517 tree labelname = get_prev_label (funname);
13518 gcc_checking_assert (labelname && !sibcall);
13519
13520 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
13521 instruction will reach 'foo', otherwise link as 'bl L42'".
13522 "L42" should be a 'branch island', that will do a far jump to
13523 'foo'. Branch islands are generated in
13524 macho_branch_islands(). */
13525 sprintf (str, "jbsr %%z%u,%.10s", funop,
13526 IDENTIFIER_POINTER (labelname));
13527 }
13528 else
13529 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
13530 after the call. */
13531 sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg);
13532 }
13533 #endif
13534 else
13535 gcc_unreachable ();
13536 return str;
13537 }
13538
13539 const char *
13540 rs6000_call_template (rtx *operands, unsigned int funop)
13541 {
13542 return rs6000_call_template_1 (operands, funop, false);
13543 }
13544
13545 const char *
13546 rs6000_sibcall_template (rtx *operands, unsigned int funop)
13547 {
13548 return rs6000_call_template_1 (operands, funop, true);
13549 }
13550
13551 /* As above, for indirect calls. */
13552
13553 static const char *
13554 rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop,
13555 bool sibcall)
13556 {
13557 /* -Wformat-overflow workaround, without which gcc thinks that %u
13558 might produce 10 digits. Note that -Wformat-overflow will not
13559 currently warn here for str[], so do not rely on a warning to
13560 ensure str[] is correctly sized. */
13561 gcc_assert (funop <= MAX_RECOG_OPERANDS);
13562
13563 /* Currently, funop is either 0 or 1. The maximum string is always
13564 a !speculate 64-bit __tls_get_addr call.
13565
13566 ABI_ELFv2, pcrel:
13567 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13568 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
13569 . 9 crset 2\n\t
13570 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13571 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
13572 . 8 beq%T1l-
13573 .---
13574 .142
13575
13576 ABI_AIX:
13577 . 9 ld 2,%3\n\t
13578 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13579 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
13580 . 9 crset 2\n\t
13581 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13582 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
13583 . 10 beq%T1l-\n\t
13584 . 10 ld 2,%4(1)
13585 .---
13586 .151
13587
13588 ABI_ELFv2:
13589 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13590 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
13591 . 9 crset 2\n\t
13592 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13593 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
13594 . 10 beq%T1l-\n\t
13595 . 10 ld 2,%3(1)
13596 .---
13597 .142
13598
13599 ABI_V4:
13600 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13601 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
13602 . 9 crset 2\n\t
13603 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13604 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
13605 . 8 beq%T1l-
13606 .---
13607 .141 */
13608 static char str[160]; /* 8 spare */
13609 char *s = str;
13610 const char *ptrload = TARGET_64BIT ? "d" : "wz";
13611
13612 if (DEFAULT_ABI == ABI_AIX)
13613 s += sprintf (s,
13614 "l%s 2,%%%u\n\t",
13615 ptrload, funop + 2);
13616
13617 /* We don't need the extra code to stop indirect call speculation if
13618 calling via LR. */
13619 bool speculate = (TARGET_MACHO
13620 || rs6000_speculate_indirect_jumps
13621 || (REG_P (operands[funop])
13622 && REGNO (operands[funop]) == LR_REGNO));
13623
13624 if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)
13625 {
13626 const char *rel64 = TARGET_64BIT ? "64" : "";
13627 char tls[29];
13628 tls[0] = 0;
13629 if (GET_CODE (operands[funop + 1]) == UNSPEC)
13630 {
13631 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
13632 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
13633 rel64, funop + 1);
13634 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
13635 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
13636 rel64);
13637 }
13638
13639 const char *notoc = rs6000_pcrel_p (cfun) ? "_NOTOC" : "";
13640 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
13641 && flag_pic == 2 ? "+32768" : "");
13642 if (!speculate)
13643 {
13644 s += sprintf (s,
13645 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
13646 tls, rel64, notoc, funop, addend);
13647 s += sprintf (s, "crset 2\n\t");
13648 }
13649 s += sprintf (s,
13650 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
13651 tls, rel64, notoc, funop, addend);
13652 }
13653 else if (!speculate)
13654 s += sprintf (s, "crset 2\n\t");
13655
13656 if (rs6000_pcrel_p (cfun))
13657 {
13658 if (speculate)
13659 sprintf (s, "b%%T%ul", funop);
13660 else
13661 sprintf (s, "beq%%T%ul-", funop);
13662 }
13663 else if (DEFAULT_ABI == ABI_AIX)
13664 {
13665 if (speculate)
13666 sprintf (s,
13667 "b%%T%ul\n\t"
13668 "l%s 2,%%%u(1)",
13669 funop, ptrload, funop + 3);
13670 else
13671 sprintf (s,
13672 "beq%%T%ul-\n\t"
13673 "l%s 2,%%%u(1)",
13674 funop, ptrload, funop + 3);
13675 }
13676 else if (DEFAULT_ABI == ABI_ELFv2)
13677 {
13678 if (speculate)
13679 sprintf (s,
13680 "b%%T%ul\n\t"
13681 "l%s 2,%%%u(1)",
13682 funop, ptrload, funop + 2);
13683 else
13684 sprintf (s,
13685 "beq%%T%ul-\n\t"
13686 "l%s 2,%%%u(1)",
13687 funop, ptrload, funop + 2);
13688 }
13689 else
13690 {
13691 if (speculate)
13692 sprintf (s,
13693 "b%%T%u%s",
13694 funop, sibcall ? "" : "l");
13695 else
13696 sprintf (s,
13697 "beq%%T%u%s-%s",
13698 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : "");
13699 }
13700 return str;
13701 }
13702
13703 const char *
13704 rs6000_indirect_call_template (rtx *operands, unsigned int funop)
13705 {
13706 return rs6000_indirect_call_template_1 (operands, funop, false);
13707 }
13708
13709 const char *
13710 rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop)
13711 {
13712 return rs6000_indirect_call_template_1 (operands, funop, true);
13713 }
13714
13715 #if HAVE_AS_PLTSEQ
13716 /* Output indirect call insns. WHICH identifies the type of sequence. */
13717 const char *
13718 rs6000_pltseq_template (rtx *operands, int which)
13719 {
13720 const char *rel64 = TARGET_64BIT ? "64" : "";
13721 char tls[30];
13722 tls[0] = 0;
13723 if (GET_CODE (operands[3]) == UNSPEC)
13724 {
13725 char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4';
13726 if (XINT (operands[3], 1) == UNSPEC_TLSGD)
13727 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
13728 off, rel64);
13729 else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
13730 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
13731 off, rel64);
13732 }
13733
13734 gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
13735 static char str[96]; /* 10 spare */
13736 char off = WORDS_BIG_ENDIAN ? '2' : '4';
13737 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
13738 && flag_pic == 2 ? "+32768" : "");
13739 switch (which)
13740 {
13741 case RS6000_PLTSEQ_TOCSAVE:
13742 sprintf (str,
13743 "st%s\n\t"
13744 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
13745 TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)",
13746 tls, rel64);
13747 break;
13748 case RS6000_PLTSEQ_PLT16_HA:
13749 if (DEFAULT_ABI == ABI_V4 && !flag_pic)
13750 sprintf (str,
13751 "lis %%0,0\n\t"
13752 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
13753 tls, off, rel64);
13754 else
13755 sprintf (str,
13756 "addis %%0,%%1,0\n\t"
13757 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
13758 tls, off, rel64, addend);
13759 break;
13760 case RS6000_PLTSEQ_PLT16_LO:
13761 sprintf (str,
13762 "l%s %%0,0(%%1)\n\t"
13763 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
13764 TARGET_64BIT ? "d" : "wz",
13765 tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend);
13766 break;
13767 case RS6000_PLTSEQ_MTCTR:
13768 sprintf (str,
13769 "mtctr %%1\n\t"
13770 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
13771 tls, rel64, addend);
13772 break;
13773 case RS6000_PLTSEQ_PLT_PCREL34:
13774 sprintf (str,
13775 "pl%s %%0,0(0),1\n\t"
13776 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
13777 TARGET_64BIT ? "d" : "wz",
13778 tls, rel64);
13779 break;
13780 default:
13781 gcc_unreachable ();
13782 }
13783 return str;
13784 }
13785 #endif
13786 \f
13787 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
13788 /* Emit an assembler directive to set symbol visibility for DECL to
13789 VISIBILITY_TYPE. */
13790
13791 static void
13792 rs6000_assemble_visibility (tree decl, int vis)
13793 {
13794 if (TARGET_XCOFF)
13795 return;
13796
13797 /* Functions need to have their entry point symbol visibility set as
13798 well as their descriptor symbol visibility. */
13799 if (DEFAULT_ABI == ABI_AIX
13800 && DOT_SYMBOLS
13801 && TREE_CODE (decl) == FUNCTION_DECL)
13802 {
13803 static const char * const visibility_types[] = {
13804 NULL, "protected", "hidden", "internal"
13805 };
13806
13807 const char *name, *type;
13808
13809 name = ((* targetm.strip_name_encoding)
13810 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
13811 type = visibility_types[vis];
13812
13813 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
13814 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
13815 }
13816 else
13817 default_assemble_visibility (decl, vis);
13818 }
13819 #endif
13820 \f
13821 enum rtx_code
13822 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
13823 {
13824 /* Reversal of FP compares takes care -- an ordered compare
13825 becomes an unordered compare and vice versa. */
13826 if (mode == CCFPmode
13827 && (!flag_finite_math_only
13828 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
13829 || code == UNEQ || code == LTGT))
13830 return reverse_condition_maybe_unordered (code);
13831 else
13832 return reverse_condition (code);
13833 }
13834
13835 /* Generate a compare for CODE. Return a brand-new rtx that
13836 represents the result of the compare. */
13837
13838 static rtx
13839 rs6000_generate_compare (rtx cmp, machine_mode mode)
13840 {
13841 machine_mode comp_mode;
13842 rtx compare_result;
13843 enum rtx_code code = GET_CODE (cmp);
13844 rtx op0 = XEXP (cmp, 0);
13845 rtx op1 = XEXP (cmp, 1);
13846
13847 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
13848 comp_mode = CCmode;
13849 else if (FLOAT_MODE_P (mode))
13850 comp_mode = CCFPmode;
13851 else if (code == GTU || code == LTU
13852 || code == GEU || code == LEU)
13853 comp_mode = CCUNSmode;
13854 else if ((code == EQ || code == NE)
13855 && unsigned_reg_p (op0)
13856 && (unsigned_reg_p (op1)
13857 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
13858 /* These are unsigned values, perhaps there will be a later
13859 ordering compare that can be shared with this one. */
13860 comp_mode = CCUNSmode;
13861 else
13862 comp_mode = CCmode;
13863
13864 /* If we have an unsigned compare, make sure we don't have a signed value as
13865 an immediate. */
13866 if (comp_mode == CCUNSmode && CONST_INT_P (op1)
13867 && INTVAL (op1) < 0)
13868 {
13869 op0 = copy_rtx_if_shared (op0);
13870 op1 = force_reg (GET_MODE (op0), op1);
13871 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
13872 }
13873
13874 /* First, the compare. */
13875 compare_result = gen_reg_rtx (comp_mode);
13876
13877 /* IEEE 128-bit support in VSX registers when we do not have hardware
13878 support. */
13879 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
13880 {
13881 rtx libfunc = NULL_RTX;
13882 bool check_nan = false;
13883 rtx dest;
13884
13885 switch (code)
13886 {
13887 case EQ:
13888 case NE:
13889 libfunc = optab_libfunc (eq_optab, mode);
13890 break;
13891
13892 case GT:
13893 case GE:
13894 libfunc = optab_libfunc (ge_optab, mode);
13895 break;
13896
13897 case LT:
13898 case LE:
13899 libfunc = optab_libfunc (le_optab, mode);
13900 break;
13901
13902 case UNORDERED:
13903 case ORDERED:
13904 libfunc = optab_libfunc (unord_optab, mode);
13905 code = (code == UNORDERED) ? NE : EQ;
13906 break;
13907
13908 case UNGE:
13909 case UNGT:
13910 check_nan = true;
13911 libfunc = optab_libfunc (ge_optab, mode);
13912 code = (code == UNGE) ? GE : GT;
13913 break;
13914
13915 case UNLE:
13916 case UNLT:
13917 check_nan = true;
13918 libfunc = optab_libfunc (le_optab, mode);
13919 code = (code == UNLE) ? LE : LT;
13920 break;
13921
13922 case UNEQ:
13923 case LTGT:
13924 check_nan = true;
13925 libfunc = optab_libfunc (eq_optab, mode);
13926 code = (code = UNEQ) ? EQ : NE;
13927 break;
13928
13929 default:
13930 gcc_unreachable ();
13931 }
13932
13933 gcc_assert (libfunc);
13934
13935 if (!check_nan)
13936 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
13937 SImode, op0, mode, op1, mode);
13938
13939 /* The library signals an exception for signalling NaNs, so we need to
13940 handle isgreater, etc. by first checking isordered. */
13941 else
13942 {
13943 rtx ne_rtx, normal_dest, unord_dest;
13944 rtx unord_func = optab_libfunc (unord_optab, mode);
13945 rtx join_label = gen_label_rtx ();
13946 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
13947 rtx unord_cmp = gen_reg_rtx (comp_mode);
13948
13949
13950 /* Test for either value being a NaN. */
13951 gcc_assert (unord_func);
13952 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
13953 SImode, op0, mode, op1, mode);
13954
13955 /* Set value (0) if either value is a NaN, and jump to the join
13956 label. */
13957 dest = gen_reg_rtx (SImode);
13958 emit_move_insn (dest, const1_rtx);
13959 emit_insn (gen_rtx_SET (unord_cmp,
13960 gen_rtx_COMPARE (comp_mode, unord_dest,
13961 const0_rtx)));
13962
13963 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
13964 emit_jump_insn (gen_rtx_SET (pc_rtx,
13965 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
13966 join_ref,
13967 pc_rtx)));
13968
13969 /* Do the normal comparison, knowing that the values are not
13970 NaNs. */
13971 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
13972 SImode, op0, mode, op1, mode);
13973
13974 emit_insn (gen_cstoresi4 (dest,
13975 gen_rtx_fmt_ee (code, SImode, normal_dest,
13976 const0_rtx),
13977 normal_dest, const0_rtx));
13978
13979 /* Join NaN and non-Nan paths. Compare dest against 0. */
13980 emit_label (join_label);
13981 code = NE;
13982 }
13983
13984 emit_insn (gen_rtx_SET (compare_result,
13985 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
13986 }
13987
13988 else
13989 {
13990 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
13991 CLOBBERs to match cmptf_internal2 pattern. */
13992 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
13993 && FLOAT128_IBM_P (GET_MODE (op0))
13994 && TARGET_HARD_FLOAT)
13995 emit_insn (gen_rtx_PARALLEL (VOIDmode,
13996 gen_rtvec (10,
13997 gen_rtx_SET (compare_result,
13998 gen_rtx_COMPARE (comp_mode, op0, op1)),
13999 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14000 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14001 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14002 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14003 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14004 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14005 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14006 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14007 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
14008 else if (GET_CODE (op1) == UNSPEC
14009 && XINT (op1, 1) == UNSPEC_SP_TEST)
14010 {
14011 rtx op1b = XVECEXP (op1, 0, 0);
14012 comp_mode = CCEQmode;
14013 compare_result = gen_reg_rtx (CCEQmode);
14014 if (TARGET_64BIT)
14015 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
14016 else
14017 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
14018 }
14019 else
14020 emit_insn (gen_rtx_SET (compare_result,
14021 gen_rtx_COMPARE (comp_mode, op0, op1)));
14022 }
14023
14024 validate_condition_mode (code, GET_MODE (compare_result));
14025
14026 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
14027 }
14028
14029 \f
14030 /* Return the diagnostic message string if the binary operation OP is
14031 not permitted on TYPE1 and TYPE2, NULL otherwise. */
14032
14033 static const char*
14034 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
14035 const_tree type1,
14036 const_tree type2)
14037 {
14038 machine_mode mode1 = TYPE_MODE (type1);
14039 machine_mode mode2 = TYPE_MODE (type2);
14040
14041 /* For complex modes, use the inner type. */
14042 if (COMPLEX_MODE_P (mode1))
14043 mode1 = GET_MODE_INNER (mode1);
14044
14045 if (COMPLEX_MODE_P (mode2))
14046 mode2 = GET_MODE_INNER (mode2);
14047
14048 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
14049 double to intermix unless -mfloat128-convert. */
14050 if (mode1 == mode2)
14051 return NULL;
14052
14053 if (!TARGET_FLOAT128_CVT)
14054 {
14055 if ((mode1 == KFmode && mode2 == IFmode)
14056 || (mode1 == IFmode && mode2 == KFmode))
14057 return N_("__float128 and __ibm128 cannot be used in the same "
14058 "expression");
14059
14060 if (TARGET_IEEEQUAD
14061 && ((mode1 == IFmode && mode2 == TFmode)
14062 || (mode1 == TFmode && mode2 == IFmode)))
14063 return N_("__ibm128 and long double cannot be used in the same "
14064 "expression");
14065
14066 if (!TARGET_IEEEQUAD
14067 && ((mode1 == KFmode && mode2 == TFmode)
14068 || (mode1 == TFmode && mode2 == KFmode)))
14069 return N_("__float128 and long double cannot be used in the same "
14070 "expression");
14071 }
14072
14073 return NULL;
14074 }
14075
14076 \f
14077 /* Expand floating point conversion to/from __float128 and __ibm128. */
14078
14079 void
14080 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
14081 {
14082 machine_mode dest_mode = GET_MODE (dest);
14083 machine_mode src_mode = GET_MODE (src);
14084 convert_optab cvt = unknown_optab;
14085 bool do_move = false;
14086 rtx libfunc = NULL_RTX;
14087 rtx dest2;
14088 typedef rtx (*rtx_2func_t) (rtx, rtx);
14089 rtx_2func_t hw_convert = (rtx_2func_t)0;
14090 size_t kf_or_tf;
14091
14092 struct hw_conv_t {
14093 rtx_2func_t from_df;
14094 rtx_2func_t from_sf;
14095 rtx_2func_t from_si_sign;
14096 rtx_2func_t from_si_uns;
14097 rtx_2func_t from_di_sign;
14098 rtx_2func_t from_di_uns;
14099 rtx_2func_t to_df;
14100 rtx_2func_t to_sf;
14101 rtx_2func_t to_si_sign;
14102 rtx_2func_t to_si_uns;
14103 rtx_2func_t to_di_sign;
14104 rtx_2func_t to_di_uns;
14105 } hw_conversions[2] = {
14106 /* convertions to/from KFmode */
14107 {
14108 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
14109 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
14110 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
14111 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
14112 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
14113 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
14114 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
14115 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
14116 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
14117 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
14118 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
14119 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
14120 },
14121
14122 /* convertions to/from TFmode */
14123 {
14124 gen_extenddftf2_hw, /* TFmode <- DFmode. */
14125 gen_extendsftf2_hw, /* TFmode <- SFmode. */
14126 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
14127 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
14128 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
14129 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
14130 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
14131 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
14132 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
14133 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
14134 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
14135 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
14136 },
14137 };
14138
14139 if (dest_mode == src_mode)
14140 gcc_unreachable ();
14141
14142 /* Eliminate memory operations. */
14143 if (MEM_P (src))
14144 src = force_reg (src_mode, src);
14145
14146 if (MEM_P (dest))
14147 {
14148 rtx tmp = gen_reg_rtx (dest_mode);
14149 rs6000_expand_float128_convert (tmp, src, unsigned_p);
14150 rs6000_emit_move (dest, tmp, dest_mode);
14151 return;
14152 }
14153
14154 /* Convert to IEEE 128-bit floating point. */
14155 if (FLOAT128_IEEE_P (dest_mode))
14156 {
14157 if (dest_mode == KFmode)
14158 kf_or_tf = 0;
14159 else if (dest_mode == TFmode)
14160 kf_or_tf = 1;
14161 else
14162 gcc_unreachable ();
14163
14164 switch (src_mode)
14165 {
14166 case E_DFmode:
14167 cvt = sext_optab;
14168 hw_convert = hw_conversions[kf_or_tf].from_df;
14169 break;
14170
14171 case E_SFmode:
14172 cvt = sext_optab;
14173 hw_convert = hw_conversions[kf_or_tf].from_sf;
14174 break;
14175
14176 case E_KFmode:
14177 case E_IFmode:
14178 case E_TFmode:
14179 if (FLOAT128_IBM_P (src_mode))
14180 cvt = sext_optab;
14181 else
14182 do_move = true;
14183 break;
14184
14185 case E_SImode:
14186 if (unsigned_p)
14187 {
14188 cvt = ufloat_optab;
14189 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
14190 }
14191 else
14192 {
14193 cvt = sfloat_optab;
14194 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
14195 }
14196 break;
14197
14198 case E_DImode:
14199 if (unsigned_p)
14200 {
14201 cvt = ufloat_optab;
14202 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
14203 }
14204 else
14205 {
14206 cvt = sfloat_optab;
14207 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
14208 }
14209 break;
14210
14211 default:
14212 gcc_unreachable ();
14213 }
14214 }
14215
14216 /* Convert from IEEE 128-bit floating point. */
14217 else if (FLOAT128_IEEE_P (src_mode))
14218 {
14219 if (src_mode == KFmode)
14220 kf_or_tf = 0;
14221 else if (src_mode == TFmode)
14222 kf_or_tf = 1;
14223 else
14224 gcc_unreachable ();
14225
14226 switch (dest_mode)
14227 {
14228 case E_DFmode:
14229 cvt = trunc_optab;
14230 hw_convert = hw_conversions[kf_or_tf].to_df;
14231 break;
14232
14233 case E_SFmode:
14234 cvt = trunc_optab;
14235 hw_convert = hw_conversions[kf_or_tf].to_sf;
14236 break;
14237
14238 case E_KFmode:
14239 case E_IFmode:
14240 case E_TFmode:
14241 if (FLOAT128_IBM_P (dest_mode))
14242 cvt = trunc_optab;
14243 else
14244 do_move = true;
14245 break;
14246
14247 case E_SImode:
14248 if (unsigned_p)
14249 {
14250 cvt = ufix_optab;
14251 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
14252 }
14253 else
14254 {
14255 cvt = sfix_optab;
14256 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
14257 }
14258 break;
14259
14260 case E_DImode:
14261 if (unsigned_p)
14262 {
14263 cvt = ufix_optab;
14264 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
14265 }
14266 else
14267 {
14268 cvt = sfix_optab;
14269 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
14270 }
14271 break;
14272
14273 default:
14274 gcc_unreachable ();
14275 }
14276 }
14277
14278 /* Both IBM format. */
14279 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
14280 do_move = true;
14281
14282 else
14283 gcc_unreachable ();
14284
14285 /* Handle conversion between TFmode/KFmode/IFmode. */
14286 if (do_move)
14287 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
14288
14289 /* Handle conversion if we have hardware support. */
14290 else if (TARGET_FLOAT128_HW && hw_convert)
14291 emit_insn ((hw_convert) (dest, src));
14292
14293 /* Call an external function to do the conversion. */
14294 else if (cvt != unknown_optab)
14295 {
14296 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
14297 gcc_assert (libfunc != NULL_RTX);
14298
14299 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
14300 src, src_mode);
14301
14302 gcc_assert (dest2 != NULL_RTX);
14303 if (!rtx_equal_p (dest, dest2))
14304 emit_move_insn (dest, dest2);
14305 }
14306
14307 else
14308 gcc_unreachable ();
14309
14310 return;
14311 }
14312
14313 \f
14314 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
14315 can be used as that dest register. Return the dest register. */
14316
14317 rtx
14318 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
14319 {
14320 if (op2 == const0_rtx)
14321 return op1;
14322
14323 if (GET_CODE (scratch) == SCRATCH)
14324 scratch = gen_reg_rtx (mode);
14325
14326 if (logical_operand (op2, mode))
14327 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
14328 else
14329 emit_insn (gen_rtx_SET (scratch,
14330 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
14331
14332 return scratch;
14333 }
14334
14335 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
14336 requires this. The result is mode MODE. */
14337 rtx
14338 rs6000_emit_fp_cror (rtx_code code, machine_mode mode, rtx x)
14339 {
14340 rtx cond[2];
14341 int n = 0;
14342 if (code == LTGT || code == LE || code == UNLT)
14343 cond[n++] = gen_rtx_fmt_ee (LT, mode, x, const0_rtx);
14344 if (code == LTGT || code == GE || code == UNGT)
14345 cond[n++] = gen_rtx_fmt_ee (GT, mode, x, const0_rtx);
14346 if (code == LE || code == GE || code == UNEQ)
14347 cond[n++] = gen_rtx_fmt_ee (EQ, mode, x, const0_rtx);
14348 if (code == UNLT || code == UNGT || code == UNEQ)
14349 cond[n++] = gen_rtx_fmt_ee (UNORDERED, mode, x, const0_rtx);
14350
14351 gcc_assert (n == 2);
14352
14353 rtx cc = gen_reg_rtx (CCEQmode);
14354 rtx logical = gen_rtx_IOR (mode, cond[0], cond[1]);
14355 emit_insn (gen_cceq_ior_compare (mode, cc, logical, cond[0], x, cond[1], x));
14356
14357 return cc;
14358 }
14359
14360 void
14361 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
14362 {
14363 rtx condition_rtx = rs6000_generate_compare (operands[1], mode);
14364 rtx_code cond_code = GET_CODE (condition_rtx);
14365
14366 if (FLOAT_MODE_P (mode) && HONOR_NANS (mode)
14367 && !(FLOAT128_VECTOR_P (mode) && !TARGET_FLOAT128_HW))
14368 ;
14369 else if (cond_code == NE
14370 || cond_code == GE || cond_code == LE
14371 || cond_code == GEU || cond_code == LEU
14372 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
14373 {
14374 rtx not_result = gen_reg_rtx (CCEQmode);
14375 rtx not_op, rev_cond_rtx;
14376 machine_mode cc_mode;
14377
14378 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
14379
14380 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
14381 SImode, XEXP (condition_rtx, 0), const0_rtx);
14382 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
14383 emit_insn (gen_rtx_SET (not_result, not_op));
14384 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
14385 }
14386
14387 machine_mode op_mode = GET_MODE (XEXP (operands[1], 0));
14388 if (op_mode == VOIDmode)
14389 op_mode = GET_MODE (XEXP (operands[1], 1));
14390
14391 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
14392 {
14393 PUT_MODE (condition_rtx, DImode);
14394 convert_move (operands[0], condition_rtx, 0);
14395 }
14396 else
14397 {
14398 PUT_MODE (condition_rtx, SImode);
14399 emit_insn (gen_rtx_SET (operands[0], condition_rtx));
14400 }
14401 }
14402
14403 /* Emit a branch of kind CODE to location LOC. */
14404
14405 void
14406 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
14407 {
14408 rtx condition_rtx = rs6000_generate_compare (operands[0], mode);
14409 rtx loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
14410 rtx ite = gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx, loc_ref, pc_rtx);
14411 emit_jump_insn (gen_rtx_SET (pc_rtx, ite));
14412 }
14413
14414 /* Return the string to output a conditional branch to LABEL, which is
14415 the operand template of the label, or NULL if the branch is really a
14416 conditional return.
14417
14418 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
14419 condition code register and its mode specifies what kind of
14420 comparison we made.
14421
14422 REVERSED is nonzero if we should reverse the sense of the comparison.
14423
14424 INSN is the insn. */
14425
14426 char *
14427 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
14428 {
14429 static char string[64];
14430 enum rtx_code code = GET_CODE (op);
14431 rtx cc_reg = XEXP (op, 0);
14432 machine_mode mode = GET_MODE (cc_reg);
14433 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
14434 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
14435 int really_reversed = reversed ^ need_longbranch;
14436 char *s = string;
14437 const char *ccode;
14438 const char *pred;
14439 rtx note;
14440
14441 validate_condition_mode (code, mode);
14442
14443 /* Work out which way this really branches. We could use
14444 reverse_condition_maybe_unordered here always but this
14445 makes the resulting assembler clearer. */
14446 if (really_reversed)
14447 {
14448 /* Reversal of FP compares takes care -- an ordered compare
14449 becomes an unordered compare and vice versa. */
14450 if (mode == CCFPmode)
14451 code = reverse_condition_maybe_unordered (code);
14452 else
14453 code = reverse_condition (code);
14454 }
14455
14456 switch (code)
14457 {
14458 /* Not all of these are actually distinct opcodes, but
14459 we distinguish them for clarity of the resulting assembler. */
14460 case NE: case LTGT:
14461 ccode = "ne"; break;
14462 case EQ: case UNEQ:
14463 ccode = "eq"; break;
14464 case GE: case GEU:
14465 ccode = "ge"; break;
14466 case GT: case GTU: case UNGT:
14467 ccode = "gt"; break;
14468 case LE: case LEU:
14469 ccode = "le"; break;
14470 case LT: case LTU: case UNLT:
14471 ccode = "lt"; break;
14472 case UNORDERED: ccode = "un"; break;
14473 case ORDERED: ccode = "nu"; break;
14474 case UNGE: ccode = "nl"; break;
14475 case UNLE: ccode = "ng"; break;
14476 default:
14477 gcc_unreachable ();
14478 }
14479
14480 /* Maybe we have a guess as to how likely the branch is. */
14481 pred = "";
14482 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
14483 if (note != NULL_RTX)
14484 {
14485 /* PROB is the difference from 50%. */
14486 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
14487 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
14488
14489 /* Only hint for highly probable/improbable branches on newer cpus when
14490 we have real profile data, as static prediction overrides processor
14491 dynamic prediction. For older cpus we may as well always hint, but
14492 assume not taken for branches that are very close to 50% as a
14493 mispredicted taken branch is more expensive than a
14494 mispredicted not-taken branch. */
14495 if (rs6000_always_hint
14496 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
14497 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
14498 && br_prob_note_reliable_p (note)))
14499 {
14500 if (abs (prob) > REG_BR_PROB_BASE / 20
14501 && ((prob > 0) ^ need_longbranch))
14502 pred = "+";
14503 else
14504 pred = "-";
14505 }
14506 }
14507
14508 if (label == NULL)
14509 s += sprintf (s, "b%slr%s ", ccode, pred);
14510 else
14511 s += sprintf (s, "b%s%s ", ccode, pred);
14512
14513 /* We need to escape any '%' characters in the reg_names string.
14514 Assume they'd only be the first character.... */
14515 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
14516 *s++ = '%';
14517 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
14518
14519 if (label != NULL)
14520 {
14521 /* If the branch distance was too far, we may have to use an
14522 unconditional branch to go the distance. */
14523 if (need_longbranch)
14524 s += sprintf (s, ",$+8\n\tb %s", label);
14525 else
14526 s += sprintf (s, ",%s", label);
14527 }
14528
14529 return string;
14530 }
14531
14532 /* Return insn for VSX or Altivec comparisons. */
14533
14534 static rtx
14535 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
14536 {
14537 rtx mask;
14538 machine_mode mode = GET_MODE (op0);
14539
14540 switch (code)
14541 {
14542 default:
14543 break;
14544
14545 case GE:
14546 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14547 return NULL_RTX;
14548 /* FALLTHRU */
14549
14550 case EQ:
14551 case GT:
14552 case GTU:
14553 case ORDERED:
14554 case UNORDERED:
14555 case UNEQ:
14556 case LTGT:
14557 mask = gen_reg_rtx (mode);
14558 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
14559 return mask;
14560 }
14561
14562 return NULL_RTX;
14563 }
14564
14565 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
14566 DMODE is expected destination mode. This is a recursive function. */
14567
14568 static rtx
14569 rs6000_emit_vector_compare (enum rtx_code rcode,
14570 rtx op0, rtx op1,
14571 machine_mode dmode)
14572 {
14573 rtx mask;
14574 bool swap_operands = false;
14575 bool try_again = false;
14576
14577 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
14578 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
14579
14580 /* See if the comparison works as is. */
14581 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
14582 if (mask)
14583 return mask;
14584
14585 switch (rcode)
14586 {
14587 case LT:
14588 rcode = GT;
14589 swap_operands = true;
14590 try_again = true;
14591 break;
14592 case LTU:
14593 rcode = GTU;
14594 swap_operands = true;
14595 try_again = true;
14596 break;
14597 case NE:
14598 case UNLE:
14599 case UNLT:
14600 case UNGE:
14601 case UNGT:
14602 /* Invert condition and try again.
14603 e.g., A != B becomes ~(A==B). */
14604 {
14605 enum rtx_code rev_code;
14606 enum insn_code nor_code;
14607 rtx mask2;
14608
14609 rev_code = reverse_condition_maybe_unordered (rcode);
14610 if (rev_code == UNKNOWN)
14611 return NULL_RTX;
14612
14613 nor_code = optab_handler (one_cmpl_optab, dmode);
14614 if (nor_code == CODE_FOR_nothing)
14615 return NULL_RTX;
14616
14617 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
14618 if (!mask2)
14619 return NULL_RTX;
14620
14621 mask = gen_reg_rtx (dmode);
14622 emit_insn (GEN_FCN (nor_code) (mask, mask2));
14623 return mask;
14624 }
14625 break;
14626 case GE:
14627 case GEU:
14628 case LE:
14629 case LEU:
14630 /* Try GT/GTU/LT/LTU OR EQ */
14631 {
14632 rtx c_rtx, eq_rtx;
14633 enum insn_code ior_code;
14634 enum rtx_code new_code;
14635
14636 switch (rcode)
14637 {
14638 case GE:
14639 new_code = GT;
14640 break;
14641
14642 case GEU:
14643 new_code = GTU;
14644 break;
14645
14646 case LE:
14647 new_code = LT;
14648 break;
14649
14650 case LEU:
14651 new_code = LTU;
14652 break;
14653
14654 default:
14655 gcc_unreachable ();
14656 }
14657
14658 ior_code = optab_handler (ior_optab, dmode);
14659 if (ior_code == CODE_FOR_nothing)
14660 return NULL_RTX;
14661
14662 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
14663 if (!c_rtx)
14664 return NULL_RTX;
14665
14666 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
14667 if (!eq_rtx)
14668 return NULL_RTX;
14669
14670 mask = gen_reg_rtx (dmode);
14671 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
14672 return mask;
14673 }
14674 break;
14675 default:
14676 return NULL_RTX;
14677 }
14678
14679 if (try_again)
14680 {
14681 if (swap_operands)
14682 std::swap (op0, op1);
14683
14684 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
14685 if (mask)
14686 return mask;
14687 }
14688
14689 /* You only get two chances. */
14690 return NULL_RTX;
14691 }
14692
14693 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
14694 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
14695 operands for the relation operation COND. */
14696
14697 int
14698 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
14699 rtx cond, rtx cc_op0, rtx cc_op1)
14700 {
14701 machine_mode dest_mode = GET_MODE (dest);
14702 machine_mode mask_mode = GET_MODE (cc_op0);
14703 enum rtx_code rcode = GET_CODE (cond);
14704 machine_mode cc_mode = CCmode;
14705 rtx mask;
14706 rtx cond2;
14707 bool invert_move = false;
14708
14709 if (VECTOR_UNIT_NONE_P (dest_mode))
14710 return 0;
14711
14712 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
14713 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
14714
14715 switch (rcode)
14716 {
14717 /* Swap operands if we can, and fall back to doing the operation as
14718 specified, and doing a NOR to invert the test. */
14719 case NE:
14720 case UNLE:
14721 case UNLT:
14722 case UNGE:
14723 case UNGT:
14724 /* Invert condition and try again.
14725 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
14726 invert_move = true;
14727 rcode = reverse_condition_maybe_unordered (rcode);
14728 if (rcode == UNKNOWN)
14729 return 0;
14730 break;
14731
14732 case GE:
14733 case LE:
14734 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
14735 {
14736 /* Invert condition to avoid compound test. */
14737 invert_move = true;
14738 rcode = reverse_condition (rcode);
14739 }
14740 break;
14741
14742 case GTU:
14743 case GEU:
14744 case LTU:
14745 case LEU:
14746 /* Mark unsigned tests with CCUNSmode. */
14747 cc_mode = CCUNSmode;
14748
14749 /* Invert condition to avoid compound test if necessary. */
14750 if (rcode == GEU || rcode == LEU)
14751 {
14752 invert_move = true;
14753 rcode = reverse_condition (rcode);
14754 }
14755 break;
14756
14757 default:
14758 break;
14759 }
14760
14761 /* Get the vector mask for the given relational operations. */
14762 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
14763
14764 if (!mask)
14765 return 0;
14766
14767 if (invert_move)
14768 std::swap (op_true, op_false);
14769
14770 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
14771 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
14772 && (GET_CODE (op_true) == CONST_VECTOR
14773 || GET_CODE (op_false) == CONST_VECTOR))
14774 {
14775 rtx constant_0 = CONST0_RTX (dest_mode);
14776 rtx constant_m1 = CONSTM1_RTX (dest_mode);
14777
14778 if (op_true == constant_m1 && op_false == constant_0)
14779 {
14780 emit_move_insn (dest, mask);
14781 return 1;
14782 }
14783
14784 else if (op_true == constant_0 && op_false == constant_m1)
14785 {
14786 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
14787 return 1;
14788 }
14789
14790 /* If we can't use the vector comparison directly, perhaps we can use
14791 the mask for the true or false fields, instead of loading up a
14792 constant. */
14793 if (op_true == constant_m1)
14794 op_true = mask;
14795
14796 if (op_false == constant_0)
14797 op_false = mask;
14798 }
14799
14800 if (!REG_P (op_true) && !SUBREG_P (op_true))
14801 op_true = force_reg (dest_mode, op_true);
14802
14803 if (!REG_P (op_false) && !SUBREG_P (op_false))
14804 op_false = force_reg (dest_mode, op_false);
14805
14806 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
14807 CONST0_RTX (dest_mode));
14808 emit_insn (gen_rtx_SET (dest,
14809 gen_rtx_IF_THEN_ELSE (dest_mode,
14810 cond2,
14811 op_true,
14812 op_false)));
14813 return 1;
14814 }
14815
14816 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
14817 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
14818 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
14819 hardware has no such operation. */
14820
14821 static int
14822 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
14823 {
14824 enum rtx_code code = GET_CODE (op);
14825 rtx op0 = XEXP (op, 0);
14826 rtx op1 = XEXP (op, 1);
14827 machine_mode compare_mode = GET_MODE (op0);
14828 machine_mode result_mode = GET_MODE (dest);
14829 bool max_p = false;
14830
14831 if (result_mode != compare_mode)
14832 return 0;
14833
14834 if (code == GE || code == GT)
14835 max_p = true;
14836 else if (code == LE || code == LT)
14837 max_p = false;
14838 else
14839 return 0;
14840
14841 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
14842 ;
14843
14844 /* Only when NaNs and signed-zeros are not in effect, smax could be
14845 used for `op0 < op1 ? op1 : op0`, and smin could be used for
14846 `op0 > op1 ? op1 : op0`. */
14847 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond)
14848 && !HONOR_NANS (compare_mode) && !HONOR_SIGNED_ZEROS (compare_mode))
14849 max_p = !max_p;
14850
14851 else
14852 return 0;
14853
14854 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
14855 return 1;
14856 }
14857
14858 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
14859 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
14860 operands of the last comparison is nonzero/true, FALSE_COND if it is
14861 zero/false. Return 0 if the hardware has no such operation. */
14862
14863 static int
14864 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
14865 {
14866 enum rtx_code code = GET_CODE (op);
14867 rtx op0 = XEXP (op, 0);
14868 rtx op1 = XEXP (op, 1);
14869 machine_mode result_mode = GET_MODE (dest);
14870 rtx compare_rtx;
14871 rtx cmove_rtx;
14872 rtx clobber_rtx;
14873
14874 if (!can_create_pseudo_p ())
14875 return 0;
14876
14877 switch (code)
14878 {
14879 case EQ:
14880 case GE:
14881 case GT:
14882 break;
14883
14884 case NE:
14885 case LT:
14886 case LE:
14887 code = swap_condition (code);
14888 std::swap (op0, op1);
14889 break;
14890
14891 default:
14892 return 0;
14893 }
14894
14895 /* Generate: [(parallel [(set (dest)
14896 (if_then_else (op (cmp1) (cmp2))
14897 (true)
14898 (false)))
14899 (clobber (scratch))])]. */
14900
14901 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
14902 cmove_rtx = gen_rtx_SET (dest,
14903 gen_rtx_IF_THEN_ELSE (result_mode,
14904 compare_rtx,
14905 true_cond,
14906 false_cond));
14907
14908 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
14909 emit_insn (gen_rtx_PARALLEL (VOIDmode,
14910 gen_rtvec (2, cmove_rtx, clobber_rtx)));
14911
14912 return 1;
14913 }
14914
14915 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
14916 operands of the last comparison is nonzero/true, FALSE_COND if it
14917 is zero/false. Return 0 if the hardware has no such operation. */
14918
14919 int
14920 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
14921 {
14922 enum rtx_code code = GET_CODE (op);
14923 rtx op0 = XEXP (op, 0);
14924 rtx op1 = XEXP (op, 1);
14925 machine_mode compare_mode = GET_MODE (op0);
14926 machine_mode result_mode = GET_MODE (dest);
14927 rtx temp;
14928 bool is_against_zero;
14929
14930 /* These modes should always match. */
14931 if (GET_MODE (op1) != compare_mode
14932 /* In the isel case however, we can use a compare immediate, so
14933 op1 may be a small constant. */
14934 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
14935 return 0;
14936 if (GET_MODE (true_cond) != result_mode)
14937 return 0;
14938 if (GET_MODE (false_cond) != result_mode)
14939 return 0;
14940
14941 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
14942 if (TARGET_P9_MINMAX
14943 && (compare_mode == SFmode || compare_mode == DFmode)
14944 && (result_mode == SFmode || result_mode == DFmode))
14945 {
14946 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
14947 return 1;
14948
14949 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
14950 return 1;
14951 }
14952
14953 /* Don't allow using floating point comparisons for integer results for
14954 now. */
14955 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
14956 return 0;
14957
14958 /* First, work out if the hardware can do this at all, or
14959 if it's too slow.... */
14960 if (!FLOAT_MODE_P (compare_mode))
14961 {
14962 if (TARGET_ISEL)
14963 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
14964 return 0;
14965 }
14966
14967 is_against_zero = op1 == CONST0_RTX (compare_mode);
14968
14969 /* A floating-point subtract might overflow, underflow, or produce
14970 an inexact result, thus changing the floating-point flags, so it
14971 can't be generated if we care about that. It's safe if one side
14972 of the construct is zero, since then no subtract will be
14973 generated. */
14974 if (SCALAR_FLOAT_MODE_P (compare_mode)
14975 && flag_trapping_math && ! is_against_zero)
14976 return 0;
14977
14978 /* Eliminate half of the comparisons by switching operands, this
14979 makes the remaining code simpler. */
14980 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
14981 || code == LTGT || code == LT || code == UNLE)
14982 {
14983 code = reverse_condition_maybe_unordered (code);
14984 temp = true_cond;
14985 true_cond = false_cond;
14986 false_cond = temp;
14987 }
14988
14989 /* UNEQ and LTGT take four instructions for a comparison with zero,
14990 it'll probably be faster to use a branch here too. */
14991 if (code == UNEQ && HONOR_NANS (compare_mode))
14992 return 0;
14993
14994 /* We're going to try to implement comparisons by performing
14995 a subtract, then comparing against zero. Unfortunately,
14996 Inf - Inf is NaN which is not zero, and so if we don't
14997 know that the operand is finite and the comparison
14998 would treat EQ different to UNORDERED, we can't do it. */
14999 if (HONOR_INFINITIES (compare_mode)
15000 && code != GT && code != UNGE
15001 && (!CONST_DOUBLE_P (op1)
15002 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
15003 /* Constructs of the form (a OP b ? a : b) are safe. */
15004 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
15005 || (! rtx_equal_p (op0, true_cond)
15006 && ! rtx_equal_p (op1, true_cond))))
15007 return 0;
15008
15009 /* At this point we know we can use fsel. */
15010
15011 /* Don't allow compare_mode other than SFmode or DFmode, for others there
15012 is no fsel instruction. */
15013 if (compare_mode != SFmode && compare_mode != DFmode)
15014 return 0;
15015
15016 /* Reduce the comparison to a comparison against zero. */
15017 if (! is_against_zero)
15018 {
15019 temp = gen_reg_rtx (compare_mode);
15020 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
15021 op0 = temp;
15022 op1 = CONST0_RTX (compare_mode);
15023 }
15024
15025 /* If we don't care about NaNs we can reduce some of the comparisons
15026 down to faster ones. */
15027 if (! HONOR_NANS (compare_mode))
15028 switch (code)
15029 {
15030 case GT:
15031 code = LE;
15032 temp = true_cond;
15033 true_cond = false_cond;
15034 false_cond = temp;
15035 break;
15036 case UNGE:
15037 code = GE;
15038 break;
15039 case UNEQ:
15040 code = EQ;
15041 break;
15042 default:
15043 break;
15044 }
15045
15046 /* Now, reduce everything down to a GE. */
15047 switch (code)
15048 {
15049 case GE:
15050 break;
15051
15052 case LE:
15053 temp = gen_reg_rtx (compare_mode);
15054 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15055 op0 = temp;
15056 break;
15057
15058 case ORDERED:
15059 temp = gen_reg_rtx (compare_mode);
15060 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
15061 op0 = temp;
15062 break;
15063
15064 case EQ:
15065 temp = gen_reg_rtx (compare_mode);
15066 emit_insn (gen_rtx_SET (temp,
15067 gen_rtx_NEG (compare_mode,
15068 gen_rtx_ABS (compare_mode, op0))));
15069 op0 = temp;
15070 break;
15071
15072 case UNGE:
15073 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
15074 temp = gen_reg_rtx (result_mode);
15075 emit_insn (gen_rtx_SET (temp,
15076 gen_rtx_IF_THEN_ELSE (result_mode,
15077 gen_rtx_GE (VOIDmode,
15078 op0, op1),
15079 true_cond, false_cond)));
15080 false_cond = true_cond;
15081 true_cond = temp;
15082
15083 temp = gen_reg_rtx (compare_mode);
15084 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15085 op0 = temp;
15086 break;
15087
15088 case GT:
15089 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
15090 temp = gen_reg_rtx (result_mode);
15091 emit_insn (gen_rtx_SET (temp,
15092 gen_rtx_IF_THEN_ELSE (result_mode,
15093 gen_rtx_GE (VOIDmode,
15094 op0, op1),
15095 true_cond, false_cond)));
15096 true_cond = false_cond;
15097 false_cond = temp;
15098
15099 temp = gen_reg_rtx (compare_mode);
15100 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15101 op0 = temp;
15102 break;
15103
15104 default:
15105 gcc_unreachable ();
15106 }
15107
15108 emit_insn (gen_rtx_SET (dest,
15109 gen_rtx_IF_THEN_ELSE (result_mode,
15110 gen_rtx_GE (VOIDmode,
15111 op0, op1),
15112 true_cond, false_cond)));
15113 return 1;
15114 }
15115
15116 /* Same as above, but for ints (isel). */
15117
15118 int
15119 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15120 {
15121 rtx condition_rtx, cr;
15122 machine_mode mode = GET_MODE (dest);
15123 enum rtx_code cond_code;
15124 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
15125 bool signedp;
15126
15127 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
15128 return 0;
15129
15130 /* We still have to do the compare, because isel doesn't do a
15131 compare, it just looks at the CRx bits set by a previous compare
15132 instruction. */
15133 condition_rtx = rs6000_generate_compare (op, mode);
15134 cond_code = GET_CODE (condition_rtx);
15135 cr = XEXP (condition_rtx, 0);
15136 signedp = GET_MODE (cr) == CCmode;
15137
15138 isel_func = (mode == SImode
15139 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
15140 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
15141
15142 switch (cond_code)
15143 {
15144 case LT: case GT: case LTU: case GTU: case EQ:
15145 /* isel handles these directly. */
15146 break;
15147
15148 default:
15149 /* We need to swap the sense of the comparison. */
15150 {
15151 std::swap (false_cond, true_cond);
15152 PUT_CODE (condition_rtx, reverse_condition (cond_code));
15153 }
15154 break;
15155 }
15156
15157 false_cond = force_reg (mode, false_cond);
15158 if (true_cond != const0_rtx)
15159 true_cond = force_reg (mode, true_cond);
15160
15161 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
15162
15163 return 1;
15164 }
15165
15166 void
15167 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
15168 {
15169 machine_mode mode = GET_MODE (op0);
15170 enum rtx_code c;
15171 rtx target;
15172
15173 /* VSX/altivec have direct min/max insns. */
15174 if ((code == SMAX || code == SMIN)
15175 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
15176 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
15177 {
15178 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
15179 return;
15180 }
15181
15182 if (code == SMAX || code == SMIN)
15183 c = GE;
15184 else
15185 c = GEU;
15186
15187 if (code == SMAX || code == UMAX)
15188 target = emit_conditional_move (dest, c, op0, op1, mode,
15189 op0, op1, mode, 0);
15190 else
15191 target = emit_conditional_move (dest, c, op0, op1, mode,
15192 op1, op0, mode, 0);
15193 gcc_assert (target);
15194 if (target != dest)
15195 emit_move_insn (dest, target);
15196 }
15197
15198 /* A subroutine of the atomic operation splitters. Jump to LABEL if
15199 COND is true. Mark the jump as unlikely to be taken. */
15200
15201 static void
15202 emit_unlikely_jump (rtx cond, rtx label)
15203 {
15204 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
15205 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
15206 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
15207 }
15208
15209 /* A subroutine of the atomic operation splitters. Emit a load-locked
15210 instruction in MODE. For QI/HImode, possibly use a pattern than includes
15211 the zero_extend operation. */
15212
15213 static void
15214 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
15215 {
15216 rtx (*fn) (rtx, rtx) = NULL;
15217
15218 switch (mode)
15219 {
15220 case E_QImode:
15221 fn = gen_load_lockedqi;
15222 break;
15223 case E_HImode:
15224 fn = gen_load_lockedhi;
15225 break;
15226 case E_SImode:
15227 if (GET_MODE (mem) == QImode)
15228 fn = gen_load_lockedqi_si;
15229 else if (GET_MODE (mem) == HImode)
15230 fn = gen_load_lockedhi_si;
15231 else
15232 fn = gen_load_lockedsi;
15233 break;
15234 case E_DImode:
15235 fn = gen_load_lockeddi;
15236 break;
15237 case E_TImode:
15238 fn = gen_load_lockedti;
15239 break;
15240 default:
15241 gcc_unreachable ();
15242 }
15243 emit_insn (fn (reg, mem));
15244 }
15245
15246 /* A subroutine of the atomic operation splitters. Emit a store-conditional
15247 instruction in MODE. */
15248
15249 static void
15250 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
15251 {
15252 rtx (*fn) (rtx, rtx, rtx) = NULL;
15253
15254 switch (mode)
15255 {
15256 case E_QImode:
15257 fn = gen_store_conditionalqi;
15258 break;
15259 case E_HImode:
15260 fn = gen_store_conditionalhi;
15261 break;
15262 case E_SImode:
15263 fn = gen_store_conditionalsi;
15264 break;
15265 case E_DImode:
15266 fn = gen_store_conditionaldi;
15267 break;
15268 case E_TImode:
15269 fn = gen_store_conditionalti;
15270 break;
15271 default:
15272 gcc_unreachable ();
15273 }
15274
15275 /* Emit sync before stwcx. to address PPC405 Erratum. */
15276 if (PPC405_ERRATUM77)
15277 emit_insn (gen_hwsync ());
15278
15279 emit_insn (fn (res, mem, val));
15280 }
15281
15282 /* Expand barriers before and after a load_locked/store_cond sequence. */
15283
15284 static rtx
15285 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
15286 {
15287 rtx addr = XEXP (mem, 0);
15288
15289 if (!legitimate_indirect_address_p (addr, reload_completed)
15290 && !legitimate_indexed_address_p (addr, reload_completed))
15291 {
15292 addr = force_reg (Pmode, addr);
15293 mem = replace_equiv_address_nv (mem, addr);
15294 }
15295
15296 switch (model)
15297 {
15298 case MEMMODEL_RELAXED:
15299 case MEMMODEL_CONSUME:
15300 case MEMMODEL_ACQUIRE:
15301 break;
15302 case MEMMODEL_RELEASE:
15303 case MEMMODEL_ACQ_REL:
15304 emit_insn (gen_lwsync ());
15305 break;
15306 case MEMMODEL_SEQ_CST:
15307 emit_insn (gen_hwsync ());
15308 break;
15309 default:
15310 gcc_unreachable ();
15311 }
15312 return mem;
15313 }
15314
15315 static void
15316 rs6000_post_atomic_barrier (enum memmodel model)
15317 {
15318 switch (model)
15319 {
15320 case MEMMODEL_RELAXED:
15321 case MEMMODEL_CONSUME:
15322 case MEMMODEL_RELEASE:
15323 break;
15324 case MEMMODEL_ACQUIRE:
15325 case MEMMODEL_ACQ_REL:
15326 case MEMMODEL_SEQ_CST:
15327 emit_insn (gen_isync ());
15328 break;
15329 default:
15330 gcc_unreachable ();
15331 }
15332 }
15333
15334 /* A subroutine of the various atomic expanders. For sub-word operations,
15335 we must adjust things to operate on SImode. Given the original MEM,
15336 return a new aligned memory. Also build and return the quantities by
15337 which to shift and mask. */
15338
15339 static rtx
15340 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
15341 {
15342 rtx addr, align, shift, mask, mem;
15343 HOST_WIDE_INT shift_mask;
15344 machine_mode mode = GET_MODE (orig_mem);
15345
15346 /* For smaller modes, we have to implement this via SImode. */
15347 shift_mask = (mode == QImode ? 0x18 : 0x10);
15348
15349 addr = XEXP (orig_mem, 0);
15350 addr = force_reg (GET_MODE (addr), addr);
15351
15352 /* Aligned memory containing subword. Generate a new memory. We
15353 do not want any of the existing MEM_ATTR data, as we're now
15354 accessing memory outside the original object. */
15355 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
15356 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15357 mem = gen_rtx_MEM (SImode, align);
15358 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
15359 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
15360 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
15361
15362 /* Shift amount for subword relative to aligned word. */
15363 shift = gen_reg_rtx (SImode);
15364 addr = gen_lowpart (SImode, addr);
15365 rtx tmp = gen_reg_rtx (SImode);
15366 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
15367 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
15368 if (BYTES_BIG_ENDIAN)
15369 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
15370 shift, 1, OPTAB_LIB_WIDEN);
15371 *pshift = shift;
15372
15373 /* Mask for insertion. */
15374 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
15375 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
15376 *pmask = mask;
15377
15378 return mem;
15379 }
15380
15381 /* A subroutine of the various atomic expanders. For sub-word operands,
15382 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
15383
15384 static rtx
15385 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
15386 {
15387 rtx x;
15388
15389 x = gen_reg_rtx (SImode);
15390 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
15391 gen_rtx_NOT (SImode, mask),
15392 oldval)));
15393
15394 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
15395
15396 return x;
15397 }
15398
15399 /* A subroutine of the various atomic expanders. For sub-word operands,
15400 extract WIDE to NARROW via SHIFT. */
15401
15402 static void
15403 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
15404 {
15405 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
15406 wide, 1, OPTAB_LIB_WIDEN);
15407 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
15408 }
15409
15410 /* Expand an atomic compare and swap operation. */
15411
15412 void
15413 rs6000_expand_atomic_compare_and_swap (rtx operands[])
15414 {
15415 rtx boolval, retval, mem, oldval, newval, cond;
15416 rtx label1, label2, x, mask, shift;
15417 machine_mode mode, orig_mode;
15418 enum memmodel mod_s, mod_f;
15419 bool is_weak;
15420
15421 boolval = operands[0];
15422 retval = operands[1];
15423 mem = operands[2];
15424 oldval = operands[3];
15425 newval = operands[4];
15426 is_weak = (INTVAL (operands[5]) != 0);
15427 mod_s = memmodel_base (INTVAL (operands[6]));
15428 mod_f = memmodel_base (INTVAL (operands[7]));
15429 orig_mode = mode = GET_MODE (mem);
15430
15431 mask = shift = NULL_RTX;
15432 if (mode == QImode || mode == HImode)
15433 {
15434 /* Before power8, we didn't have access to lbarx/lharx, so generate a
15435 lwarx and shift/mask operations. With power8, we need to do the
15436 comparison in SImode, but the store is still done in QI/HImode. */
15437 oldval = convert_modes (SImode, mode, oldval, 1);
15438
15439 if (!TARGET_SYNC_HI_QI)
15440 {
15441 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15442
15443 /* Shift and mask OLDVAL into position with the word. */
15444 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
15445 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15446
15447 /* Shift and mask NEWVAL into position within the word. */
15448 newval = convert_modes (SImode, mode, newval, 1);
15449 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
15450 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15451 }
15452
15453 /* Prepare to adjust the return value. */
15454 retval = gen_reg_rtx (SImode);
15455 mode = SImode;
15456 }
15457 else if (reg_overlap_mentioned_p (retval, oldval))
15458 oldval = copy_to_reg (oldval);
15459
15460 if (mode != TImode && !reg_or_short_operand (oldval, mode))
15461 oldval = copy_to_mode_reg (mode, oldval);
15462
15463 if (reg_overlap_mentioned_p (retval, newval))
15464 newval = copy_to_reg (newval);
15465
15466 mem = rs6000_pre_atomic_barrier (mem, mod_s);
15467
15468 label1 = NULL_RTX;
15469 if (!is_weak)
15470 {
15471 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15472 emit_label (XEXP (label1, 0));
15473 }
15474 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15475
15476 emit_load_locked (mode, retval, mem);
15477
15478 x = retval;
15479 if (mask)
15480 x = expand_simple_binop (SImode, AND, retval, mask,
15481 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15482
15483 cond = gen_reg_rtx (CCmode);
15484 /* If we have TImode, synthesize a comparison. */
15485 if (mode != TImode)
15486 x = gen_rtx_COMPARE (CCmode, x, oldval);
15487 else
15488 {
15489 rtx xor1_result = gen_reg_rtx (DImode);
15490 rtx xor2_result = gen_reg_rtx (DImode);
15491 rtx or_result = gen_reg_rtx (DImode);
15492 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
15493 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
15494 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
15495 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
15496
15497 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
15498 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
15499 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
15500 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
15501 }
15502
15503 emit_insn (gen_rtx_SET (cond, x));
15504
15505 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15506 emit_unlikely_jump (x, label2);
15507
15508 x = newval;
15509 if (mask)
15510 x = rs6000_mask_atomic_subword (retval, newval, mask);
15511
15512 emit_store_conditional (orig_mode, cond, mem, x);
15513
15514 if (!is_weak)
15515 {
15516 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15517 emit_unlikely_jump (x, label1);
15518 }
15519
15520 if (!is_mm_relaxed (mod_f))
15521 emit_label (XEXP (label2, 0));
15522
15523 rs6000_post_atomic_barrier (mod_s);
15524
15525 if (is_mm_relaxed (mod_f))
15526 emit_label (XEXP (label2, 0));
15527
15528 if (shift)
15529 rs6000_finish_atomic_subword (operands[1], retval, shift);
15530 else if (mode != GET_MODE (operands[1]))
15531 convert_move (operands[1], retval, 1);
15532
15533 /* In all cases, CR0 contains EQ on success, and NE on failure. */
15534 x = gen_rtx_EQ (SImode, cond, const0_rtx);
15535 emit_insn (gen_rtx_SET (boolval, x));
15536 }
15537
15538 /* Expand an atomic exchange operation. */
15539
15540 void
15541 rs6000_expand_atomic_exchange (rtx operands[])
15542 {
15543 rtx retval, mem, val, cond;
15544 machine_mode mode;
15545 enum memmodel model;
15546 rtx label, x, mask, shift;
15547
15548 retval = operands[0];
15549 mem = operands[1];
15550 val = operands[2];
15551 model = memmodel_base (INTVAL (operands[3]));
15552 mode = GET_MODE (mem);
15553
15554 mask = shift = NULL_RTX;
15555 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
15556 {
15557 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15558
15559 /* Shift and mask VAL into position with the word. */
15560 val = convert_modes (SImode, mode, val, 1);
15561 val = expand_simple_binop (SImode, ASHIFT, val, shift,
15562 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15563
15564 /* Prepare to adjust the return value. */
15565 retval = gen_reg_rtx (SImode);
15566 mode = SImode;
15567 }
15568
15569 mem = rs6000_pre_atomic_barrier (mem, model);
15570
15571 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15572 emit_label (XEXP (label, 0));
15573
15574 emit_load_locked (mode, retval, mem);
15575
15576 x = val;
15577 if (mask)
15578 x = rs6000_mask_atomic_subword (retval, val, mask);
15579
15580 cond = gen_reg_rtx (CCmode);
15581 emit_store_conditional (mode, cond, mem, x);
15582
15583 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15584 emit_unlikely_jump (x, label);
15585
15586 rs6000_post_atomic_barrier (model);
15587
15588 if (shift)
15589 rs6000_finish_atomic_subword (operands[0], retval, shift);
15590 }
15591
15592 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
15593 to perform. MEM is the memory on which to operate. VAL is the second
15594 operand of the binary operator. BEFORE and AFTER are optional locations to
15595 return the value of MEM either before of after the operation. MODEL_RTX
15596 is a CONST_INT containing the memory model to use. */
15597
15598 void
15599 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
15600 rtx orig_before, rtx orig_after, rtx model_rtx)
15601 {
15602 enum memmodel model = memmodel_base (INTVAL (model_rtx));
15603 machine_mode mode = GET_MODE (mem);
15604 machine_mode store_mode = mode;
15605 rtx label, x, cond, mask, shift;
15606 rtx before = orig_before, after = orig_after;
15607
15608 mask = shift = NULL_RTX;
15609 /* On power8, we want to use SImode for the operation. On previous systems,
15610 use the operation in a subword and shift/mask to get the proper byte or
15611 halfword. */
15612 if (mode == QImode || mode == HImode)
15613 {
15614 if (TARGET_SYNC_HI_QI)
15615 {
15616 val = convert_modes (SImode, mode, val, 1);
15617
15618 /* Prepare to adjust the return value. */
15619 before = gen_reg_rtx (SImode);
15620 if (after)
15621 after = gen_reg_rtx (SImode);
15622 mode = SImode;
15623 }
15624 else
15625 {
15626 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15627
15628 /* Shift and mask VAL into position with the word. */
15629 val = convert_modes (SImode, mode, val, 1);
15630 val = expand_simple_binop (SImode, ASHIFT, val, shift,
15631 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15632
15633 switch (code)
15634 {
15635 case IOR:
15636 case XOR:
15637 /* We've already zero-extended VAL. That is sufficient to
15638 make certain that it does not affect other bits. */
15639 mask = NULL;
15640 break;
15641
15642 case AND:
15643 /* If we make certain that all of the other bits in VAL are
15644 set, that will be sufficient to not affect other bits. */
15645 x = gen_rtx_NOT (SImode, mask);
15646 x = gen_rtx_IOR (SImode, x, val);
15647 emit_insn (gen_rtx_SET (val, x));
15648 mask = NULL;
15649 break;
15650
15651 case NOT:
15652 case PLUS:
15653 case MINUS:
15654 /* These will all affect bits outside the field and need
15655 adjustment via MASK within the loop. */
15656 break;
15657
15658 default:
15659 gcc_unreachable ();
15660 }
15661
15662 /* Prepare to adjust the return value. */
15663 before = gen_reg_rtx (SImode);
15664 if (after)
15665 after = gen_reg_rtx (SImode);
15666 store_mode = mode = SImode;
15667 }
15668 }
15669
15670 mem = rs6000_pre_atomic_barrier (mem, model);
15671
15672 label = gen_label_rtx ();
15673 emit_label (label);
15674 label = gen_rtx_LABEL_REF (VOIDmode, label);
15675
15676 if (before == NULL_RTX)
15677 before = gen_reg_rtx (mode);
15678
15679 emit_load_locked (mode, before, mem);
15680
15681 if (code == NOT)
15682 {
15683 x = expand_simple_binop (mode, AND, before, val,
15684 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15685 after = expand_simple_unop (mode, NOT, x, after, 1);
15686 }
15687 else
15688 {
15689 after = expand_simple_binop (mode, code, before, val,
15690 after, 1, OPTAB_LIB_WIDEN);
15691 }
15692
15693 x = after;
15694 if (mask)
15695 {
15696 x = expand_simple_binop (SImode, AND, after, mask,
15697 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15698 x = rs6000_mask_atomic_subword (before, x, mask);
15699 }
15700 else if (store_mode != mode)
15701 x = convert_modes (store_mode, mode, x, 1);
15702
15703 cond = gen_reg_rtx (CCmode);
15704 emit_store_conditional (store_mode, cond, mem, x);
15705
15706 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15707 emit_unlikely_jump (x, label);
15708
15709 rs6000_post_atomic_barrier (model);
15710
15711 if (shift)
15712 {
15713 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
15714 then do the calcuations in a SImode register. */
15715 if (orig_before)
15716 rs6000_finish_atomic_subword (orig_before, before, shift);
15717 if (orig_after)
15718 rs6000_finish_atomic_subword (orig_after, after, shift);
15719 }
15720 else if (store_mode != mode)
15721 {
15722 /* QImode/HImode on machines with lbarx/lharx where we do the native
15723 operation and then do the calcuations in a SImode register. */
15724 if (orig_before)
15725 convert_move (orig_before, before, 1);
15726 if (orig_after)
15727 convert_move (orig_after, after, 1);
15728 }
15729 else if (orig_after && after != orig_after)
15730 emit_move_insn (orig_after, after);
15731 }
15732
15733 /* Emit instructions to move SRC to DST. Called by splitters for
15734 multi-register moves. It will emit at most one instruction for
15735 each register that is accessed; that is, it won't emit li/lis pairs
15736 (or equivalent for 64-bit code). One of SRC or DST must be a hard
15737 register. */
15738
15739 void
15740 rs6000_split_multireg_move (rtx dst, rtx src)
15741 {
15742 /* The register number of the first register being moved. */
15743 int reg;
15744 /* The mode that is to be moved. */
15745 machine_mode mode;
15746 /* The mode that the move is being done in, and its size. */
15747 machine_mode reg_mode;
15748 int reg_mode_size;
15749 /* The number of registers that will be moved. */
15750 int nregs;
15751
15752 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
15753 mode = GET_MODE (dst);
15754 nregs = hard_regno_nregs (reg, mode);
15755 if (FP_REGNO_P (reg))
15756 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
15757 (TARGET_HARD_FLOAT ? DFmode : SFmode);
15758 else if (ALTIVEC_REGNO_P (reg))
15759 reg_mode = V16QImode;
15760 else
15761 reg_mode = word_mode;
15762 reg_mode_size = GET_MODE_SIZE (reg_mode);
15763
15764 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
15765
15766 /* TDmode residing in FP registers is special, since the ISA requires that
15767 the lower-numbered word of a register pair is always the most significant
15768 word, even in little-endian mode. This does not match the usual subreg
15769 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
15770 the appropriate constituent registers "by hand" in little-endian mode.
15771
15772 Note we do not need to check for destructive overlap here since TDmode
15773 can only reside in even/odd register pairs. */
15774 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
15775 {
15776 rtx p_src, p_dst;
15777 int i;
15778
15779 for (i = 0; i < nregs; i++)
15780 {
15781 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
15782 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
15783 else
15784 p_src = simplify_gen_subreg (reg_mode, src, mode,
15785 i * reg_mode_size);
15786
15787 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
15788 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
15789 else
15790 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
15791 i * reg_mode_size);
15792
15793 emit_insn (gen_rtx_SET (p_dst, p_src));
15794 }
15795
15796 return;
15797 }
15798
15799 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
15800 {
15801 /* Move register range backwards, if we might have destructive
15802 overlap. */
15803 int i;
15804 for (i = nregs - 1; i >= 0; i--)
15805 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
15806 i * reg_mode_size),
15807 simplify_gen_subreg (reg_mode, src, mode,
15808 i * reg_mode_size)));
15809 }
15810 else
15811 {
15812 int i;
15813 int j = -1;
15814 bool used_update = false;
15815 rtx restore_basereg = NULL_RTX;
15816
15817 if (MEM_P (src) && INT_REGNO_P (reg))
15818 {
15819 rtx breg;
15820
15821 if (GET_CODE (XEXP (src, 0)) == PRE_INC
15822 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
15823 {
15824 rtx delta_rtx;
15825 breg = XEXP (XEXP (src, 0), 0);
15826 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
15827 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
15828 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
15829 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
15830 src = replace_equiv_address (src, breg);
15831 }
15832 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
15833 {
15834 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
15835 {
15836 rtx basereg = XEXP (XEXP (src, 0), 0);
15837 if (TARGET_UPDATE)
15838 {
15839 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
15840 emit_insn (gen_rtx_SET (ndst,
15841 gen_rtx_MEM (reg_mode,
15842 XEXP (src, 0))));
15843 used_update = true;
15844 }
15845 else
15846 emit_insn (gen_rtx_SET (basereg,
15847 XEXP (XEXP (src, 0), 1)));
15848 src = replace_equiv_address (src, basereg);
15849 }
15850 else
15851 {
15852 rtx basereg = gen_rtx_REG (Pmode, reg);
15853 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
15854 src = replace_equiv_address (src, basereg);
15855 }
15856 }
15857
15858 breg = XEXP (src, 0);
15859 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
15860 breg = XEXP (breg, 0);
15861
15862 /* If the base register we are using to address memory is
15863 also a destination reg, then change that register last. */
15864 if (REG_P (breg)
15865 && REGNO (breg) >= REGNO (dst)
15866 && REGNO (breg) < REGNO (dst) + nregs)
15867 j = REGNO (breg) - REGNO (dst);
15868 }
15869 else if (MEM_P (dst) && INT_REGNO_P (reg))
15870 {
15871 rtx breg;
15872
15873 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
15874 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
15875 {
15876 rtx delta_rtx;
15877 breg = XEXP (XEXP (dst, 0), 0);
15878 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
15879 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
15880 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
15881
15882 /* We have to update the breg before doing the store.
15883 Use store with update, if available. */
15884
15885 if (TARGET_UPDATE)
15886 {
15887 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
15888 emit_insn (TARGET_32BIT
15889 ? (TARGET_POWERPC64
15890 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
15891 : gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
15892 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
15893 used_update = true;
15894 }
15895 else
15896 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
15897 dst = replace_equiv_address (dst, breg);
15898 }
15899 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
15900 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
15901 {
15902 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
15903 {
15904 rtx basereg = XEXP (XEXP (dst, 0), 0);
15905 if (TARGET_UPDATE)
15906 {
15907 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
15908 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
15909 XEXP (dst, 0)),
15910 nsrc));
15911 used_update = true;
15912 }
15913 else
15914 emit_insn (gen_rtx_SET (basereg,
15915 XEXP (XEXP (dst, 0), 1)));
15916 dst = replace_equiv_address (dst, basereg);
15917 }
15918 else
15919 {
15920 rtx basereg = XEXP (XEXP (dst, 0), 0);
15921 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
15922 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
15923 && REG_P (basereg)
15924 && REG_P (offsetreg)
15925 && REGNO (basereg) != REGNO (offsetreg));
15926 if (REGNO (basereg) == 0)
15927 {
15928 rtx tmp = offsetreg;
15929 offsetreg = basereg;
15930 basereg = tmp;
15931 }
15932 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
15933 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
15934 dst = replace_equiv_address (dst, basereg);
15935 }
15936 }
15937 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
15938 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
15939 }
15940
15941 for (i = 0; i < nregs; i++)
15942 {
15943 /* Calculate index to next subword. */
15944 ++j;
15945 if (j == nregs)
15946 j = 0;
15947
15948 /* If compiler already emitted move of first word by
15949 store with update, no need to do anything. */
15950 if (j == 0 && used_update)
15951 continue;
15952
15953 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
15954 j * reg_mode_size),
15955 simplify_gen_subreg (reg_mode, src, mode,
15956 j * reg_mode_size)));
15957 }
15958 if (restore_basereg != NULL_RTX)
15959 emit_insn (restore_basereg);
15960 }
15961 }
15962
15963 static GTY(()) alias_set_type TOC_alias_set = -1;
15964
15965 alias_set_type
15966 get_TOC_alias_set (void)
15967 {
15968 if (TOC_alias_set == -1)
15969 TOC_alias_set = new_alias_set ();
15970 return TOC_alias_set;
15971 }
15972
15973 /* The mode the ABI uses for a word. This is not the same as word_mode
15974 for -m32 -mpowerpc64. This is used to implement various target hooks. */
15975
15976 static scalar_int_mode
15977 rs6000_abi_word_mode (void)
15978 {
15979 return TARGET_32BIT ? SImode : DImode;
15980 }
15981
15982 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
15983 static char *
15984 rs6000_offload_options (void)
15985 {
15986 if (TARGET_64BIT)
15987 return xstrdup ("-foffload-abi=lp64");
15988 else
15989 return xstrdup ("-foffload-abi=ilp32");
15990 }
15991
15992 \f
15993 /* A quick summary of the various types of 'constant-pool tables'
15994 under PowerPC:
15995
15996 Target Flags Name One table per
15997 AIX (none) AIX TOC object file
15998 AIX -mfull-toc AIX TOC object file
15999 AIX -mminimal-toc AIX minimal TOC translation unit
16000 SVR4/EABI (none) SVR4 SDATA object file
16001 SVR4/EABI -fpic SVR4 pic object file
16002 SVR4/EABI -fPIC SVR4 PIC translation unit
16003 SVR4/EABI -mrelocatable EABI TOC function
16004 SVR4/EABI -maix AIX TOC object file
16005 SVR4/EABI -maix -mminimal-toc
16006 AIX minimal TOC translation unit
16007
16008 Name Reg. Set by entries contains:
16009 made by addrs? fp? sum?
16010
16011 AIX TOC 2 crt0 as Y option option
16012 AIX minimal TOC 30 prolog gcc Y Y option
16013 SVR4 SDATA 13 crt0 gcc N Y N
16014 SVR4 pic 30 prolog ld Y not yet N
16015 SVR4 PIC 30 prolog gcc Y option option
16016 EABI TOC 30 prolog gcc Y option option
16017
16018 */
16019
16020 /* Hash functions for the hash table. */
16021
16022 static unsigned
16023 rs6000_hash_constant (rtx k)
16024 {
16025 enum rtx_code code = GET_CODE (k);
16026 machine_mode mode = GET_MODE (k);
16027 unsigned result = (code << 3) ^ mode;
16028 const char *format;
16029 int flen, fidx;
16030
16031 format = GET_RTX_FORMAT (code);
16032 flen = strlen (format);
16033 fidx = 0;
16034
16035 switch (code)
16036 {
16037 case LABEL_REF:
16038 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
16039
16040 case CONST_WIDE_INT:
16041 {
16042 int i;
16043 flen = CONST_WIDE_INT_NUNITS (k);
16044 for (i = 0; i < flen; i++)
16045 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
16046 return result;
16047 }
16048
16049 case CONST_DOUBLE:
16050 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
16051
16052 case CODE_LABEL:
16053 fidx = 3;
16054 break;
16055
16056 default:
16057 break;
16058 }
16059
16060 for (; fidx < flen; fidx++)
16061 switch (format[fidx])
16062 {
16063 case 's':
16064 {
16065 unsigned i, len;
16066 const char *str = XSTR (k, fidx);
16067 len = strlen (str);
16068 result = result * 613 + len;
16069 for (i = 0; i < len; i++)
16070 result = result * 613 + (unsigned) str[i];
16071 break;
16072 }
16073 case 'u':
16074 case 'e':
16075 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
16076 break;
16077 case 'i':
16078 case 'n':
16079 result = result * 613 + (unsigned) XINT (k, fidx);
16080 break;
16081 case 'w':
16082 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
16083 result = result * 613 + (unsigned) XWINT (k, fidx);
16084 else
16085 {
16086 size_t i;
16087 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
16088 result = result * 613 + (unsigned) (XWINT (k, fidx)
16089 >> CHAR_BIT * i);
16090 }
16091 break;
16092 case '0':
16093 break;
16094 default:
16095 gcc_unreachable ();
16096 }
16097
16098 return result;
16099 }
16100
16101 hashval_t
16102 toc_hasher::hash (toc_hash_struct *thc)
16103 {
16104 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
16105 }
16106
16107 /* Compare H1 and H2 for equivalence. */
16108
16109 bool
16110 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
16111 {
16112 rtx r1 = h1->key;
16113 rtx r2 = h2->key;
16114
16115 if (h1->key_mode != h2->key_mode)
16116 return 0;
16117
16118 return rtx_equal_p (r1, r2);
16119 }
16120
16121 /* These are the names given by the C++ front-end to vtables, and
16122 vtable-like objects. Ideally, this logic should not be here;
16123 instead, there should be some programmatic way of inquiring as
16124 to whether or not an object is a vtable. */
16125
16126 #define VTABLE_NAME_P(NAME) \
16127 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
16128 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
16129 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
16130 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
16131 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
16132
16133 #ifdef NO_DOLLAR_IN_LABEL
16134 /* Return a GGC-allocated character string translating dollar signs in
16135 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
16136
16137 const char *
16138 rs6000_xcoff_strip_dollar (const char *name)
16139 {
16140 char *strip, *p;
16141 const char *q;
16142 size_t len;
16143
16144 q = (const char *) strchr (name, '$');
16145
16146 if (q == 0 || q == name)
16147 return name;
16148
16149 len = strlen (name);
16150 strip = XALLOCAVEC (char, len + 1);
16151 strcpy (strip, name);
16152 p = strip + (q - name);
16153 while (p)
16154 {
16155 *p = '_';
16156 p = strchr (p + 1, '$');
16157 }
16158
16159 return ggc_alloc_string (strip, len);
16160 }
16161 #endif
16162
16163 void
16164 rs6000_output_symbol_ref (FILE *file, rtx x)
16165 {
16166 const char *name = XSTR (x, 0);
16167
16168 /* Currently C++ toc references to vtables can be emitted before it
16169 is decided whether the vtable is public or private. If this is
16170 the case, then the linker will eventually complain that there is
16171 a reference to an unknown section. Thus, for vtables only,
16172 we emit the TOC reference to reference the identifier and not the
16173 symbol. */
16174 if (VTABLE_NAME_P (name))
16175 {
16176 RS6000_OUTPUT_BASENAME (file, name);
16177 }
16178 else
16179 assemble_name (file, name);
16180 }
16181
16182 /* Output a TOC entry. We derive the entry name from what is being
16183 written. */
16184
16185 void
16186 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
16187 {
16188 char buf[256];
16189 const char *name = buf;
16190 rtx base = x;
16191 HOST_WIDE_INT offset = 0;
16192
16193 gcc_assert (!TARGET_NO_TOC_OR_PCREL);
16194
16195 /* When the linker won't eliminate them, don't output duplicate
16196 TOC entries (this happens on AIX if there is any kind of TOC,
16197 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
16198 CODE_LABELs. */
16199 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
16200 {
16201 struct toc_hash_struct *h;
16202
16203 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
16204 time because GGC is not initialized at that point. */
16205 if (toc_hash_table == NULL)
16206 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
16207
16208 h = ggc_alloc<toc_hash_struct> ();
16209 h->key = x;
16210 h->key_mode = mode;
16211 h->labelno = labelno;
16212
16213 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
16214 if (*found == NULL)
16215 *found = h;
16216 else /* This is indeed a duplicate.
16217 Set this label equal to that label. */
16218 {
16219 fputs ("\t.set ", file);
16220 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
16221 fprintf (file, "%d,", labelno);
16222 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
16223 fprintf (file, "%d\n", ((*found)->labelno));
16224
16225 #ifdef HAVE_AS_TLS
16226 if (TARGET_XCOFF && SYMBOL_REF_P (x)
16227 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
16228 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
16229 {
16230 fputs ("\t.set ", file);
16231 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
16232 fprintf (file, "%d,", labelno);
16233 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
16234 fprintf (file, "%d\n", ((*found)->labelno));
16235 }
16236 #endif
16237 return;
16238 }
16239 }
16240
16241 /* If we're going to put a double constant in the TOC, make sure it's
16242 aligned properly when strict alignment is on. */
16243 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
16244 && STRICT_ALIGNMENT
16245 && GET_MODE_BITSIZE (mode) >= 64
16246 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
16247 ASM_OUTPUT_ALIGN (file, 3);
16248 }
16249
16250 (*targetm.asm_out.internal_label) (file, "LC", labelno);
16251
16252 /* Handle FP constants specially. Note that if we have a minimal
16253 TOC, things we put here aren't actually in the TOC, so we can allow
16254 FP constants. */
16255 if (CONST_DOUBLE_P (x)
16256 && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
16257 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
16258 {
16259 long k[4];
16260
16261 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16262 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
16263 else
16264 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
16265
16266 if (TARGET_64BIT)
16267 {
16268 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16269 fputs (DOUBLE_INT_ASM_OP, file);
16270 else
16271 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
16272 k[0] & 0xffffffff, k[1] & 0xffffffff,
16273 k[2] & 0xffffffff, k[3] & 0xffffffff);
16274 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
16275 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
16276 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
16277 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
16278 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
16279 return;
16280 }
16281 else
16282 {
16283 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16284 fputs ("\t.long ", file);
16285 else
16286 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
16287 k[0] & 0xffffffff, k[1] & 0xffffffff,
16288 k[2] & 0xffffffff, k[3] & 0xffffffff);
16289 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
16290 k[0] & 0xffffffff, k[1] & 0xffffffff,
16291 k[2] & 0xffffffff, k[3] & 0xffffffff);
16292 return;
16293 }
16294 }
16295 else if (CONST_DOUBLE_P (x)
16296 && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
16297 {
16298 long k[2];
16299
16300 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16301 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
16302 else
16303 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
16304
16305 if (TARGET_64BIT)
16306 {
16307 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16308 fputs (DOUBLE_INT_ASM_OP, file);
16309 else
16310 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
16311 k[0] & 0xffffffff, k[1] & 0xffffffff);
16312 fprintf (file, "0x%lx%08lx\n",
16313 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
16314 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
16315 return;
16316 }
16317 else
16318 {
16319 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16320 fputs ("\t.long ", file);
16321 else
16322 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
16323 k[0] & 0xffffffff, k[1] & 0xffffffff);
16324 fprintf (file, "0x%lx,0x%lx\n",
16325 k[0] & 0xffffffff, k[1] & 0xffffffff);
16326 return;
16327 }
16328 }
16329 else if (CONST_DOUBLE_P (x)
16330 && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
16331 {
16332 long l;
16333
16334 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16335 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
16336 else
16337 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
16338
16339 if (TARGET_64BIT)
16340 {
16341 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16342 fputs (DOUBLE_INT_ASM_OP, file);
16343 else
16344 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
16345 if (WORDS_BIG_ENDIAN)
16346 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
16347 else
16348 fprintf (file, "0x%lx\n", l & 0xffffffff);
16349 return;
16350 }
16351 else
16352 {
16353 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16354 fputs ("\t.long ", file);
16355 else
16356 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
16357 fprintf (file, "0x%lx\n", l & 0xffffffff);
16358 return;
16359 }
16360 }
16361 else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
16362 {
16363 unsigned HOST_WIDE_INT low;
16364 HOST_WIDE_INT high;
16365
16366 low = INTVAL (x) & 0xffffffff;
16367 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
16368
16369 /* TOC entries are always Pmode-sized, so when big-endian
16370 smaller integer constants in the TOC need to be padded.
16371 (This is still a win over putting the constants in
16372 a separate constant pool, because then we'd have
16373 to have both a TOC entry _and_ the actual constant.)
16374
16375 For a 32-bit target, CONST_INT values are loaded and shifted
16376 entirely within `low' and can be stored in one TOC entry. */
16377
16378 /* It would be easy to make this work, but it doesn't now. */
16379 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
16380
16381 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
16382 {
16383 low |= high << 32;
16384 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
16385 high = (HOST_WIDE_INT) low >> 32;
16386 low &= 0xffffffff;
16387 }
16388
16389 if (TARGET_64BIT)
16390 {
16391 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16392 fputs (DOUBLE_INT_ASM_OP, file);
16393 else
16394 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
16395 (long) high & 0xffffffff, (long) low & 0xffffffff);
16396 fprintf (file, "0x%lx%08lx\n",
16397 (long) high & 0xffffffff, (long) low & 0xffffffff);
16398 return;
16399 }
16400 else
16401 {
16402 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
16403 {
16404 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16405 fputs ("\t.long ", file);
16406 else
16407 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
16408 (long) high & 0xffffffff, (long) low & 0xffffffff);
16409 fprintf (file, "0x%lx,0x%lx\n",
16410 (long) high & 0xffffffff, (long) low & 0xffffffff);
16411 }
16412 else
16413 {
16414 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16415 fputs ("\t.long ", file);
16416 else
16417 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
16418 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
16419 }
16420 return;
16421 }
16422 }
16423
16424 if (GET_CODE (x) == CONST)
16425 {
16426 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
16427 && CONST_INT_P (XEXP (XEXP (x, 0), 1)));
16428
16429 base = XEXP (XEXP (x, 0), 0);
16430 offset = INTVAL (XEXP (XEXP (x, 0), 1));
16431 }
16432
16433 switch (GET_CODE (base))
16434 {
16435 case SYMBOL_REF:
16436 name = XSTR (base, 0);
16437 break;
16438
16439 case LABEL_REF:
16440 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
16441 CODE_LABEL_NUMBER (XEXP (base, 0)));
16442 break;
16443
16444 case CODE_LABEL:
16445 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
16446 break;
16447
16448 default:
16449 gcc_unreachable ();
16450 }
16451
16452 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16453 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
16454 else
16455 {
16456 fputs ("\t.tc ", file);
16457 RS6000_OUTPUT_BASENAME (file, name);
16458
16459 if (offset < 0)
16460 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
16461 else if (offset)
16462 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
16463
16464 /* Mark large TOC symbols on AIX with [TE] so they are mapped
16465 after other TOC symbols, reducing overflow of small TOC access
16466 to [TC] symbols. */
16467 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
16468 ? "[TE]," : "[TC],", file);
16469 }
16470
16471 /* Currently C++ toc references to vtables can be emitted before it
16472 is decided whether the vtable is public or private. If this is
16473 the case, then the linker will eventually complain that there is
16474 a TOC reference to an unknown section. Thus, for vtables only,
16475 we emit the TOC reference to reference the symbol and not the
16476 section. */
16477 if (VTABLE_NAME_P (name))
16478 {
16479 RS6000_OUTPUT_BASENAME (file, name);
16480 if (offset < 0)
16481 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
16482 else if (offset > 0)
16483 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
16484 }
16485 else
16486 output_addr_const (file, x);
16487
16488 #if HAVE_AS_TLS
16489 if (TARGET_XCOFF && SYMBOL_REF_P (base))
16490 {
16491 switch (SYMBOL_REF_TLS_MODEL (base))
16492 {
16493 case 0:
16494 break;
16495 case TLS_MODEL_LOCAL_EXEC:
16496 fputs ("@le", file);
16497 break;
16498 case TLS_MODEL_INITIAL_EXEC:
16499 fputs ("@ie", file);
16500 break;
16501 /* Use global-dynamic for local-dynamic. */
16502 case TLS_MODEL_GLOBAL_DYNAMIC:
16503 case TLS_MODEL_LOCAL_DYNAMIC:
16504 putc ('\n', file);
16505 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
16506 fputs ("\t.tc .", file);
16507 RS6000_OUTPUT_BASENAME (file, name);
16508 fputs ("[TC],", file);
16509 output_addr_const (file, x);
16510 fputs ("@m", file);
16511 break;
16512 default:
16513 gcc_unreachable ();
16514 }
16515 }
16516 #endif
16517
16518 putc ('\n', file);
16519 }
16520 \f
16521 /* Output an assembler pseudo-op to write an ASCII string of N characters
16522 starting at P to FILE.
16523
16524 On the RS/6000, we have to do this using the .byte operation and
16525 write out special characters outside the quoted string.
16526 Also, the assembler is broken; very long strings are truncated,
16527 so we must artificially break them up early. */
16528
16529 void
16530 output_ascii (FILE *file, const char *p, int n)
16531 {
16532 char c;
16533 int i, count_string;
16534 const char *for_string = "\t.byte \"";
16535 const char *for_decimal = "\t.byte ";
16536 const char *to_close = NULL;
16537
16538 count_string = 0;
16539 for (i = 0; i < n; i++)
16540 {
16541 c = *p++;
16542 if (c >= ' ' && c < 0177)
16543 {
16544 if (for_string)
16545 fputs (for_string, file);
16546 putc (c, file);
16547
16548 /* Write two quotes to get one. */
16549 if (c == '"')
16550 {
16551 putc (c, file);
16552 ++count_string;
16553 }
16554
16555 for_string = NULL;
16556 for_decimal = "\"\n\t.byte ";
16557 to_close = "\"\n";
16558 ++count_string;
16559
16560 if (count_string >= 512)
16561 {
16562 fputs (to_close, file);
16563
16564 for_string = "\t.byte \"";
16565 for_decimal = "\t.byte ";
16566 to_close = NULL;
16567 count_string = 0;
16568 }
16569 }
16570 else
16571 {
16572 if (for_decimal)
16573 fputs (for_decimal, file);
16574 fprintf (file, "%d", c);
16575
16576 for_string = "\n\t.byte \"";
16577 for_decimal = ", ";
16578 to_close = "\n";
16579 count_string = 0;
16580 }
16581 }
16582
16583 /* Now close the string if we have written one. Then end the line. */
16584 if (to_close)
16585 fputs (to_close, file);
16586 }
16587 \f
16588 /* Generate a unique section name for FILENAME for a section type
16589 represented by SECTION_DESC. Output goes into BUF.
16590
16591 SECTION_DESC can be any string, as long as it is different for each
16592 possible section type.
16593
16594 We name the section in the same manner as xlc. The name begins with an
16595 underscore followed by the filename (after stripping any leading directory
16596 names) with the last period replaced by the string SECTION_DESC. If
16597 FILENAME does not contain a period, SECTION_DESC is appended to the end of
16598 the name. */
16599
16600 void
16601 rs6000_gen_section_name (char **buf, const char *filename,
16602 const char *section_desc)
16603 {
16604 const char *q, *after_last_slash, *last_period = 0;
16605 char *p;
16606 int len;
16607
16608 after_last_slash = filename;
16609 for (q = filename; *q; q++)
16610 {
16611 if (*q == '/')
16612 after_last_slash = q + 1;
16613 else if (*q == '.')
16614 last_period = q;
16615 }
16616
16617 len = strlen (after_last_slash) + strlen (section_desc) + 2;
16618 *buf = (char *) xmalloc (len);
16619
16620 p = *buf;
16621 *p++ = '_';
16622
16623 for (q = after_last_slash; *q; q++)
16624 {
16625 if (q == last_period)
16626 {
16627 strcpy (p, section_desc);
16628 p += strlen (section_desc);
16629 break;
16630 }
16631
16632 else if (ISALNUM (*q))
16633 *p++ = *q;
16634 }
16635
16636 if (last_period == 0)
16637 strcpy (p, section_desc);
16638 else
16639 *p = '\0';
16640 }
16641 \f
16642 /* Emit profile function. */
16643
16644 void
16645 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
16646 {
16647 /* Non-standard profiling for kernels, which just saves LR then calls
16648 _mcount without worrying about arg saves. The idea is to change
16649 the function prologue as little as possible as it isn't easy to
16650 account for arg save/restore code added just for _mcount. */
16651 if (TARGET_PROFILE_KERNEL)
16652 return;
16653
16654 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
16655 {
16656 #ifndef NO_PROFILE_COUNTERS
16657 # define NO_PROFILE_COUNTERS 0
16658 #endif
16659 if (NO_PROFILE_COUNTERS)
16660 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
16661 LCT_NORMAL, VOIDmode);
16662 else
16663 {
16664 char buf[30];
16665 const char *label_name;
16666 rtx fun;
16667
16668 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
16669 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
16670 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
16671
16672 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
16673 LCT_NORMAL, VOIDmode, fun, Pmode);
16674 }
16675 }
16676 else if (DEFAULT_ABI == ABI_DARWIN)
16677 {
16678 const char *mcount_name = RS6000_MCOUNT;
16679 int caller_addr_regno = LR_REGNO;
16680
16681 /* Be conservative and always set this, at least for now. */
16682 crtl->uses_pic_offset_table = 1;
16683
16684 #if TARGET_MACHO
16685 /* For PIC code, set up a stub and collect the caller's address
16686 from r0, which is where the prologue puts it. */
16687 if (MACHOPIC_INDIRECT
16688 && crtl->uses_pic_offset_table)
16689 caller_addr_regno = 0;
16690 #endif
16691 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
16692 LCT_NORMAL, VOIDmode,
16693 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
16694 }
16695 }
16696
16697 /* Write function profiler code. */
16698
16699 void
16700 output_function_profiler (FILE *file, int labelno)
16701 {
16702 char buf[100];
16703
16704 switch (DEFAULT_ABI)
16705 {
16706 default:
16707 gcc_unreachable ();
16708
16709 case ABI_V4:
16710 if (!TARGET_32BIT)
16711 {
16712 warning (0, "no profiling of 64-bit code for this ABI");
16713 return;
16714 }
16715 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
16716 fprintf (file, "\tmflr %s\n", reg_names[0]);
16717 if (NO_PROFILE_COUNTERS)
16718 {
16719 asm_fprintf (file, "\tstw %s,4(%s)\n",
16720 reg_names[0], reg_names[1]);
16721 }
16722 else if (TARGET_SECURE_PLT && flag_pic)
16723 {
16724 if (TARGET_LINK_STACK)
16725 {
16726 char name[32];
16727 get_ppc476_thunk_name (name);
16728 asm_fprintf (file, "\tbl %s\n", name);
16729 }
16730 else
16731 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
16732 asm_fprintf (file, "\tstw %s,4(%s)\n",
16733 reg_names[0], reg_names[1]);
16734 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
16735 asm_fprintf (file, "\taddis %s,%s,",
16736 reg_names[12], reg_names[12]);
16737 assemble_name (file, buf);
16738 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
16739 assemble_name (file, buf);
16740 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
16741 }
16742 else if (flag_pic == 1)
16743 {
16744 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
16745 asm_fprintf (file, "\tstw %s,4(%s)\n",
16746 reg_names[0], reg_names[1]);
16747 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
16748 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
16749 assemble_name (file, buf);
16750 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
16751 }
16752 else if (flag_pic > 1)
16753 {
16754 asm_fprintf (file, "\tstw %s,4(%s)\n",
16755 reg_names[0], reg_names[1]);
16756 /* Now, we need to get the address of the label. */
16757 if (TARGET_LINK_STACK)
16758 {
16759 char name[32];
16760 get_ppc476_thunk_name (name);
16761 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
16762 assemble_name (file, buf);
16763 fputs ("-.\n1:", file);
16764 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
16765 asm_fprintf (file, "\taddi %s,%s,4\n",
16766 reg_names[11], reg_names[11]);
16767 }
16768 else
16769 {
16770 fputs ("\tbcl 20,31,1f\n\t.long ", file);
16771 assemble_name (file, buf);
16772 fputs ("-.\n1:", file);
16773 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
16774 }
16775 asm_fprintf (file, "\tlwz %s,0(%s)\n",
16776 reg_names[0], reg_names[11]);
16777 asm_fprintf (file, "\tadd %s,%s,%s\n",
16778 reg_names[0], reg_names[0], reg_names[11]);
16779 }
16780 else
16781 {
16782 asm_fprintf (file, "\tlis %s,", reg_names[12]);
16783 assemble_name (file, buf);
16784 fputs ("@ha\n", file);
16785 asm_fprintf (file, "\tstw %s,4(%s)\n",
16786 reg_names[0], reg_names[1]);
16787 asm_fprintf (file, "\tla %s,", reg_names[0]);
16788 assemble_name (file, buf);
16789 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
16790 }
16791
16792 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
16793 fprintf (file, "\tbl %s%s\n",
16794 RS6000_MCOUNT, flag_pic ? "@plt" : "");
16795 break;
16796
16797 case ABI_AIX:
16798 case ABI_ELFv2:
16799 case ABI_DARWIN:
16800 /* Don't do anything, done in output_profile_hook (). */
16801 break;
16802 }
16803 }
16804
16805 \f
16806
16807 /* The following variable value is the last issued insn. */
16808
16809 static rtx_insn *last_scheduled_insn;
16810
16811 /* The following variable helps to balance issuing of load and
16812 store instructions */
16813
16814 static int load_store_pendulum;
16815
16816 /* The following variable helps pair divide insns during scheduling. */
16817 static int divide_cnt;
16818 /* The following variable helps pair and alternate vector and vector load
16819 insns during scheduling. */
16820 static int vec_pairing;
16821
16822
16823 /* Power4 load update and store update instructions are cracked into a
16824 load or store and an integer insn which are executed in the same cycle.
16825 Branches have their own dispatch slot which does not count against the
16826 GCC issue rate, but it changes the program flow so there are no other
16827 instructions to issue in this cycle. */
16828
16829 static int
16830 rs6000_variable_issue_1 (rtx_insn *insn, int more)
16831 {
16832 last_scheduled_insn = insn;
16833 if (GET_CODE (PATTERN (insn)) == USE
16834 || GET_CODE (PATTERN (insn)) == CLOBBER)
16835 {
16836 cached_can_issue_more = more;
16837 return cached_can_issue_more;
16838 }
16839
16840 if (insn_terminates_group_p (insn, current_group))
16841 {
16842 cached_can_issue_more = 0;
16843 return cached_can_issue_more;
16844 }
16845
16846 /* If no reservation, but reach here */
16847 if (recog_memoized (insn) < 0)
16848 return more;
16849
16850 if (rs6000_sched_groups)
16851 {
16852 if (is_microcoded_insn (insn))
16853 cached_can_issue_more = 0;
16854 else if (is_cracked_insn (insn))
16855 cached_can_issue_more = more > 2 ? more - 2 : 0;
16856 else
16857 cached_can_issue_more = more - 1;
16858
16859 return cached_can_issue_more;
16860 }
16861
16862 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
16863 return 0;
16864
16865 cached_can_issue_more = more - 1;
16866 return cached_can_issue_more;
16867 }
16868
16869 static int
16870 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
16871 {
16872 int r = rs6000_variable_issue_1 (insn, more);
16873 if (verbose)
16874 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
16875 return r;
16876 }
16877
16878 /* Adjust the cost of a scheduling dependency. Return the new cost of
16879 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
16880
16881 static int
16882 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
16883 unsigned int)
16884 {
16885 enum attr_type attr_type;
16886
16887 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
16888 return cost;
16889
16890 switch (dep_type)
16891 {
16892 case REG_DEP_TRUE:
16893 {
16894 /* Data dependency; DEP_INSN writes a register that INSN reads
16895 some cycles later. */
16896
16897 /* Separate a load from a narrower, dependent store. */
16898 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9
16899 || rs6000_tune == PROCESSOR_FUTURE)
16900 && GET_CODE (PATTERN (insn)) == SET
16901 && GET_CODE (PATTERN (dep_insn)) == SET
16902 && MEM_P (XEXP (PATTERN (insn), 1))
16903 && MEM_P (XEXP (PATTERN (dep_insn), 0))
16904 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
16905 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
16906 return cost + 14;
16907
16908 attr_type = get_attr_type (insn);
16909
16910 switch (attr_type)
16911 {
16912 case TYPE_JMPREG:
16913 /* Tell the first scheduling pass about the latency between
16914 a mtctr and bctr (and mtlr and br/blr). The first
16915 scheduling pass will not know about this latency since
16916 the mtctr instruction, which has the latency associated
16917 to it, will be generated by reload. */
16918 return 4;
16919 case TYPE_BRANCH:
16920 /* Leave some extra cycles between a compare and its
16921 dependent branch, to inhibit expensive mispredicts. */
16922 if ((rs6000_tune == PROCESSOR_PPC603
16923 || rs6000_tune == PROCESSOR_PPC604
16924 || rs6000_tune == PROCESSOR_PPC604e
16925 || rs6000_tune == PROCESSOR_PPC620
16926 || rs6000_tune == PROCESSOR_PPC630
16927 || rs6000_tune == PROCESSOR_PPC750
16928 || rs6000_tune == PROCESSOR_PPC7400
16929 || rs6000_tune == PROCESSOR_PPC7450
16930 || rs6000_tune == PROCESSOR_PPCE5500
16931 || rs6000_tune == PROCESSOR_PPCE6500
16932 || rs6000_tune == PROCESSOR_POWER4
16933 || rs6000_tune == PROCESSOR_POWER5
16934 || rs6000_tune == PROCESSOR_POWER7
16935 || rs6000_tune == PROCESSOR_POWER8
16936 || rs6000_tune == PROCESSOR_POWER9
16937 || rs6000_tune == PROCESSOR_FUTURE
16938 || rs6000_tune == PROCESSOR_CELL)
16939 && recog_memoized (dep_insn)
16940 && (INSN_CODE (dep_insn) >= 0))
16941
16942 switch (get_attr_type (dep_insn))
16943 {
16944 case TYPE_CMP:
16945 case TYPE_FPCOMPARE:
16946 case TYPE_CR_LOGICAL:
16947 return cost + 2;
16948 case TYPE_EXTS:
16949 case TYPE_MUL:
16950 if (get_attr_dot (dep_insn) == DOT_YES)
16951 return cost + 2;
16952 else
16953 break;
16954 case TYPE_SHIFT:
16955 if (get_attr_dot (dep_insn) == DOT_YES
16956 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
16957 return cost + 2;
16958 else
16959 break;
16960 default:
16961 break;
16962 }
16963 break;
16964
16965 case TYPE_STORE:
16966 case TYPE_FPSTORE:
16967 if ((rs6000_tune == PROCESSOR_POWER6)
16968 && recog_memoized (dep_insn)
16969 && (INSN_CODE (dep_insn) >= 0))
16970 {
16971
16972 if (GET_CODE (PATTERN (insn)) != SET)
16973 /* If this happens, we have to extend this to schedule
16974 optimally. Return default for now. */
16975 return cost;
16976
16977 /* Adjust the cost for the case where the value written
16978 by a fixed point operation is used as the address
16979 gen value on a store. */
16980 switch (get_attr_type (dep_insn))
16981 {
16982 case TYPE_LOAD:
16983 case TYPE_CNTLZ:
16984 {
16985 if (! rs6000_store_data_bypass_p (dep_insn, insn))
16986 return get_attr_sign_extend (dep_insn)
16987 == SIGN_EXTEND_YES ? 6 : 4;
16988 break;
16989 }
16990 case TYPE_SHIFT:
16991 {
16992 if (! rs6000_store_data_bypass_p (dep_insn, insn))
16993 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
16994 6 : 3;
16995 break;
16996 }
16997 case TYPE_INTEGER:
16998 case TYPE_ADD:
16999 case TYPE_LOGICAL:
17000 case TYPE_EXTS:
17001 case TYPE_INSERT:
17002 {
17003 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17004 return 3;
17005 break;
17006 }
17007 case TYPE_STORE:
17008 case TYPE_FPLOAD:
17009 case TYPE_FPSTORE:
17010 {
17011 if (get_attr_update (dep_insn) == UPDATE_YES
17012 && ! rs6000_store_data_bypass_p (dep_insn, insn))
17013 return 3;
17014 break;
17015 }
17016 case TYPE_MUL:
17017 {
17018 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17019 return 17;
17020 break;
17021 }
17022 case TYPE_DIV:
17023 {
17024 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17025 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17026 break;
17027 }
17028 default:
17029 break;
17030 }
17031 }
17032 break;
17033
17034 case TYPE_LOAD:
17035 if ((rs6000_tune == PROCESSOR_POWER6)
17036 && recog_memoized (dep_insn)
17037 && (INSN_CODE (dep_insn) >= 0))
17038 {
17039
17040 /* Adjust the cost for the case where the value written
17041 by a fixed point instruction is used within the address
17042 gen portion of a subsequent load(u)(x) */
17043 switch (get_attr_type (dep_insn))
17044 {
17045 case TYPE_LOAD:
17046 case TYPE_CNTLZ:
17047 {
17048 if (set_to_load_agen (dep_insn, insn))
17049 return get_attr_sign_extend (dep_insn)
17050 == SIGN_EXTEND_YES ? 6 : 4;
17051 break;
17052 }
17053 case TYPE_SHIFT:
17054 {
17055 if (set_to_load_agen (dep_insn, insn))
17056 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
17057 6 : 3;
17058 break;
17059 }
17060 case TYPE_INTEGER:
17061 case TYPE_ADD:
17062 case TYPE_LOGICAL:
17063 case TYPE_EXTS:
17064 case TYPE_INSERT:
17065 {
17066 if (set_to_load_agen (dep_insn, insn))
17067 return 3;
17068 break;
17069 }
17070 case TYPE_STORE:
17071 case TYPE_FPLOAD:
17072 case TYPE_FPSTORE:
17073 {
17074 if (get_attr_update (dep_insn) == UPDATE_YES
17075 && set_to_load_agen (dep_insn, insn))
17076 return 3;
17077 break;
17078 }
17079 case TYPE_MUL:
17080 {
17081 if (set_to_load_agen (dep_insn, insn))
17082 return 17;
17083 break;
17084 }
17085 case TYPE_DIV:
17086 {
17087 if (set_to_load_agen (dep_insn, insn))
17088 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17089 break;
17090 }
17091 default:
17092 break;
17093 }
17094 }
17095 break;
17096
17097 case TYPE_FPLOAD:
17098 if ((rs6000_tune == PROCESSOR_POWER6)
17099 && get_attr_update (insn) == UPDATE_NO
17100 && recog_memoized (dep_insn)
17101 && (INSN_CODE (dep_insn) >= 0)
17102 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
17103 return 2;
17104
17105 default:
17106 break;
17107 }
17108
17109 /* Fall out to return default cost. */
17110 }
17111 break;
17112
17113 case REG_DEP_OUTPUT:
17114 /* Output dependency; DEP_INSN writes a register that INSN writes some
17115 cycles later. */
17116 if ((rs6000_tune == PROCESSOR_POWER6)
17117 && recog_memoized (dep_insn)
17118 && (INSN_CODE (dep_insn) >= 0))
17119 {
17120 attr_type = get_attr_type (insn);
17121
17122 switch (attr_type)
17123 {
17124 case TYPE_FP:
17125 case TYPE_FPSIMPLE:
17126 if (get_attr_type (dep_insn) == TYPE_FP
17127 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
17128 return 1;
17129 break;
17130 case TYPE_FPLOAD:
17131 if (get_attr_update (insn) == UPDATE_NO
17132 && get_attr_type (dep_insn) == TYPE_MFFGPR)
17133 return 2;
17134 break;
17135 default:
17136 break;
17137 }
17138 }
17139 /* Fall through, no cost for output dependency. */
17140 /* FALLTHRU */
17141
17142 case REG_DEP_ANTI:
17143 /* Anti dependency; DEP_INSN reads a register that INSN writes some
17144 cycles later. */
17145 return 0;
17146
17147 default:
17148 gcc_unreachable ();
17149 }
17150
17151 return cost;
17152 }
17153
17154 /* Debug version of rs6000_adjust_cost. */
17155
17156 static int
17157 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
17158 int cost, unsigned int dw)
17159 {
17160 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
17161
17162 if (ret != cost)
17163 {
17164 const char *dep;
17165
17166 switch (dep_type)
17167 {
17168 default: dep = "unknown depencency"; break;
17169 case REG_DEP_TRUE: dep = "data dependency"; break;
17170 case REG_DEP_OUTPUT: dep = "output dependency"; break;
17171 case REG_DEP_ANTI: dep = "anti depencency"; break;
17172 }
17173
17174 fprintf (stderr,
17175 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
17176 "%s, insn:\n", ret, cost, dep);
17177
17178 debug_rtx (insn);
17179 }
17180
17181 return ret;
17182 }
17183
17184 /* The function returns a true if INSN is microcoded.
17185 Return false otherwise. */
17186
17187 static bool
17188 is_microcoded_insn (rtx_insn *insn)
17189 {
17190 if (!insn || !NONDEBUG_INSN_P (insn)
17191 || GET_CODE (PATTERN (insn)) == USE
17192 || GET_CODE (PATTERN (insn)) == CLOBBER)
17193 return false;
17194
17195 if (rs6000_tune == PROCESSOR_CELL)
17196 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
17197
17198 if (rs6000_sched_groups
17199 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
17200 {
17201 enum attr_type type = get_attr_type (insn);
17202 if ((type == TYPE_LOAD
17203 && get_attr_update (insn) == UPDATE_YES
17204 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
17205 || ((type == TYPE_LOAD || type == TYPE_STORE)
17206 && get_attr_update (insn) == UPDATE_YES
17207 && get_attr_indexed (insn) == INDEXED_YES)
17208 || type == TYPE_MFCR)
17209 return true;
17210 }
17211
17212 return false;
17213 }
17214
17215 /* The function returns true if INSN is cracked into 2 instructions
17216 by the processor (and therefore occupies 2 issue slots). */
17217
17218 static bool
17219 is_cracked_insn (rtx_insn *insn)
17220 {
17221 if (!insn || !NONDEBUG_INSN_P (insn)
17222 || GET_CODE (PATTERN (insn)) == USE
17223 || GET_CODE (PATTERN (insn)) == CLOBBER)
17224 return false;
17225
17226 if (rs6000_sched_groups
17227 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
17228 {
17229 enum attr_type type = get_attr_type (insn);
17230 if ((type == TYPE_LOAD
17231 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
17232 && get_attr_update (insn) == UPDATE_NO)
17233 || (type == TYPE_LOAD
17234 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
17235 && get_attr_update (insn) == UPDATE_YES
17236 && get_attr_indexed (insn) == INDEXED_NO)
17237 || (type == TYPE_STORE
17238 && get_attr_update (insn) == UPDATE_YES
17239 && get_attr_indexed (insn) == INDEXED_NO)
17240 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
17241 && get_attr_update (insn) == UPDATE_YES)
17242 || (type == TYPE_CR_LOGICAL
17243 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
17244 || (type == TYPE_EXTS
17245 && get_attr_dot (insn) == DOT_YES)
17246 || (type == TYPE_SHIFT
17247 && get_attr_dot (insn) == DOT_YES
17248 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
17249 || (type == TYPE_MUL
17250 && get_attr_dot (insn) == DOT_YES)
17251 || type == TYPE_DIV
17252 || (type == TYPE_INSERT
17253 && get_attr_size (insn) == SIZE_32))
17254 return true;
17255 }
17256
17257 return false;
17258 }
17259
17260 /* The function returns true if INSN can be issued only from
17261 the branch slot. */
17262
17263 static bool
17264 is_branch_slot_insn (rtx_insn *insn)
17265 {
17266 if (!insn || !NONDEBUG_INSN_P (insn)
17267 || GET_CODE (PATTERN (insn)) == USE
17268 || GET_CODE (PATTERN (insn)) == CLOBBER)
17269 return false;
17270
17271 if (rs6000_sched_groups)
17272 {
17273 enum attr_type type = get_attr_type (insn);
17274 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
17275 return true;
17276 return false;
17277 }
17278
17279 return false;
17280 }
17281
17282 /* The function returns true if out_inst sets a value that is
17283 used in the address generation computation of in_insn */
17284 static bool
17285 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
17286 {
17287 rtx out_set, in_set;
17288
17289 /* For performance reasons, only handle the simple case where
17290 both loads are a single_set. */
17291 out_set = single_set (out_insn);
17292 if (out_set)
17293 {
17294 in_set = single_set (in_insn);
17295 if (in_set)
17296 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
17297 }
17298
17299 return false;
17300 }
17301
17302 /* Try to determine base/offset/size parts of the given MEM.
17303 Return true if successful, false if all the values couldn't
17304 be determined.
17305
17306 This function only looks for REG or REG+CONST address forms.
17307 REG+REG address form will return false. */
17308
17309 static bool
17310 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
17311 HOST_WIDE_INT *size)
17312 {
17313 rtx addr_rtx;
17314 if MEM_SIZE_KNOWN_P (mem)
17315 *size = MEM_SIZE (mem);
17316 else
17317 return false;
17318
17319 addr_rtx = (XEXP (mem, 0));
17320 if (GET_CODE (addr_rtx) == PRE_MODIFY)
17321 addr_rtx = XEXP (addr_rtx, 1);
17322
17323 *offset = 0;
17324 while (GET_CODE (addr_rtx) == PLUS
17325 && CONST_INT_P (XEXP (addr_rtx, 1)))
17326 {
17327 *offset += INTVAL (XEXP (addr_rtx, 1));
17328 addr_rtx = XEXP (addr_rtx, 0);
17329 }
17330 if (!REG_P (addr_rtx))
17331 return false;
17332
17333 *base = addr_rtx;
17334 return true;
17335 }
17336
17337 /* The function returns true if the target storage location of
17338 mem1 is adjacent to the target storage location of mem2 */
17339 /* Return 1 if memory locations are adjacent. */
17340
17341 static bool
17342 adjacent_mem_locations (rtx mem1, rtx mem2)
17343 {
17344 rtx reg1, reg2;
17345 HOST_WIDE_INT off1, size1, off2, size2;
17346
17347 if (get_memref_parts (mem1, &reg1, &off1, &size1)
17348 && get_memref_parts (mem2, &reg2, &off2, &size2))
17349 return ((REGNO (reg1) == REGNO (reg2))
17350 && ((off1 + size1 == off2)
17351 || (off2 + size2 == off1)));
17352
17353 return false;
17354 }
17355
17356 /* This function returns true if it can be determined that the two MEM
17357 locations overlap by at least 1 byte based on base reg/offset/size. */
17358
17359 static bool
17360 mem_locations_overlap (rtx mem1, rtx mem2)
17361 {
17362 rtx reg1, reg2;
17363 HOST_WIDE_INT off1, size1, off2, size2;
17364
17365 if (get_memref_parts (mem1, &reg1, &off1, &size1)
17366 && get_memref_parts (mem2, &reg2, &off2, &size2))
17367 return ((REGNO (reg1) == REGNO (reg2))
17368 && (((off1 <= off2) && (off1 + size1 > off2))
17369 || ((off2 <= off1) && (off2 + size2 > off1))));
17370
17371 return false;
17372 }
17373
17374 /* A C statement (sans semicolon) to update the integer scheduling
17375 priority INSN_PRIORITY (INSN). Increase the priority to execute the
17376 INSN earlier, reduce the priority to execute INSN later. Do not
17377 define this macro if you do not need to adjust the scheduling
17378 priorities of insns. */
17379
17380 static int
17381 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
17382 {
17383 rtx load_mem, str_mem;
17384 /* On machines (like the 750) which have asymmetric integer units,
17385 where one integer unit can do multiply and divides and the other
17386 can't, reduce the priority of multiply/divide so it is scheduled
17387 before other integer operations. */
17388
17389 #if 0
17390 if (! INSN_P (insn))
17391 return priority;
17392
17393 if (GET_CODE (PATTERN (insn)) == USE)
17394 return priority;
17395
17396 switch (rs6000_tune) {
17397 case PROCESSOR_PPC750:
17398 switch (get_attr_type (insn))
17399 {
17400 default:
17401 break;
17402
17403 case TYPE_MUL:
17404 case TYPE_DIV:
17405 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
17406 priority, priority);
17407 if (priority >= 0 && priority < 0x01000000)
17408 priority >>= 3;
17409 break;
17410 }
17411 }
17412 #endif
17413
17414 if (insn_must_be_first_in_group (insn)
17415 && reload_completed
17416 && current_sched_info->sched_max_insns_priority
17417 && rs6000_sched_restricted_insns_priority)
17418 {
17419
17420 /* Prioritize insns that can be dispatched only in the first
17421 dispatch slot. */
17422 if (rs6000_sched_restricted_insns_priority == 1)
17423 /* Attach highest priority to insn. This means that in
17424 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
17425 precede 'priority' (critical path) considerations. */
17426 return current_sched_info->sched_max_insns_priority;
17427 else if (rs6000_sched_restricted_insns_priority == 2)
17428 /* Increase priority of insn by a minimal amount. This means that in
17429 haifa-sched.c:ready_sort(), only 'priority' (critical path)
17430 considerations precede dispatch-slot restriction considerations. */
17431 return (priority + 1);
17432 }
17433
17434 if (rs6000_tune == PROCESSOR_POWER6
17435 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
17436 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
17437 /* Attach highest priority to insn if the scheduler has just issued two
17438 stores and this instruction is a load, or two loads and this instruction
17439 is a store. Power6 wants loads and stores scheduled alternately
17440 when possible */
17441 return current_sched_info->sched_max_insns_priority;
17442
17443 return priority;
17444 }
17445
17446 /* Return true if the instruction is nonpipelined on the Cell. */
17447 static bool
17448 is_nonpipeline_insn (rtx_insn *insn)
17449 {
17450 enum attr_type type;
17451 if (!insn || !NONDEBUG_INSN_P (insn)
17452 || GET_CODE (PATTERN (insn)) == USE
17453 || GET_CODE (PATTERN (insn)) == CLOBBER)
17454 return false;
17455
17456 type = get_attr_type (insn);
17457 if (type == TYPE_MUL
17458 || type == TYPE_DIV
17459 || type == TYPE_SDIV
17460 || type == TYPE_DDIV
17461 || type == TYPE_SSQRT
17462 || type == TYPE_DSQRT
17463 || type == TYPE_MFCR
17464 || type == TYPE_MFCRF
17465 || type == TYPE_MFJMPR)
17466 {
17467 return true;
17468 }
17469 return false;
17470 }
17471
17472
17473 /* Return how many instructions the machine can issue per cycle. */
17474
17475 static int
17476 rs6000_issue_rate (void)
17477 {
17478 /* Unless scheduling for register pressure, use issue rate of 1 for
17479 first scheduling pass to decrease degradation. */
17480 if (!reload_completed && !flag_sched_pressure)
17481 return 1;
17482
17483 switch (rs6000_tune) {
17484 case PROCESSOR_RS64A:
17485 case PROCESSOR_PPC601: /* ? */
17486 case PROCESSOR_PPC7450:
17487 return 3;
17488 case PROCESSOR_PPC440:
17489 case PROCESSOR_PPC603:
17490 case PROCESSOR_PPC750:
17491 case PROCESSOR_PPC7400:
17492 case PROCESSOR_PPC8540:
17493 case PROCESSOR_PPC8548:
17494 case PROCESSOR_CELL:
17495 case PROCESSOR_PPCE300C2:
17496 case PROCESSOR_PPCE300C3:
17497 case PROCESSOR_PPCE500MC:
17498 case PROCESSOR_PPCE500MC64:
17499 case PROCESSOR_PPCE5500:
17500 case PROCESSOR_PPCE6500:
17501 case PROCESSOR_TITAN:
17502 return 2;
17503 case PROCESSOR_PPC476:
17504 case PROCESSOR_PPC604:
17505 case PROCESSOR_PPC604e:
17506 case PROCESSOR_PPC620:
17507 case PROCESSOR_PPC630:
17508 return 4;
17509 case PROCESSOR_POWER4:
17510 case PROCESSOR_POWER5:
17511 case PROCESSOR_POWER6:
17512 case PROCESSOR_POWER7:
17513 return 5;
17514 case PROCESSOR_POWER8:
17515 return 7;
17516 case PROCESSOR_POWER9:
17517 case PROCESSOR_FUTURE:
17518 return 6;
17519 default:
17520 return 1;
17521 }
17522 }
17523
17524 /* Return how many instructions to look ahead for better insn
17525 scheduling. */
17526
17527 static int
17528 rs6000_use_sched_lookahead (void)
17529 {
17530 switch (rs6000_tune)
17531 {
17532 case PROCESSOR_PPC8540:
17533 case PROCESSOR_PPC8548:
17534 return 4;
17535
17536 case PROCESSOR_CELL:
17537 return (reload_completed ? 8 : 0);
17538
17539 default:
17540 return 0;
17541 }
17542 }
17543
17544 /* We are choosing insn from the ready queue. Return zero if INSN can be
17545 chosen. */
17546 static int
17547 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
17548 {
17549 if (ready_index == 0)
17550 return 0;
17551
17552 if (rs6000_tune != PROCESSOR_CELL)
17553 return 0;
17554
17555 gcc_assert (insn != NULL_RTX && INSN_P (insn));
17556
17557 if (!reload_completed
17558 || is_nonpipeline_insn (insn)
17559 || is_microcoded_insn (insn))
17560 return 1;
17561
17562 return 0;
17563 }
17564
17565 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
17566 and return true. */
17567
17568 static bool
17569 find_mem_ref (rtx pat, rtx *mem_ref)
17570 {
17571 const char * fmt;
17572 int i, j;
17573
17574 /* stack_tie does not produce any real memory traffic. */
17575 if (tie_operand (pat, VOIDmode))
17576 return false;
17577
17578 if (MEM_P (pat))
17579 {
17580 *mem_ref = pat;
17581 return true;
17582 }
17583
17584 /* Recursively process the pattern. */
17585 fmt = GET_RTX_FORMAT (GET_CODE (pat));
17586
17587 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
17588 {
17589 if (fmt[i] == 'e')
17590 {
17591 if (find_mem_ref (XEXP (pat, i), mem_ref))
17592 return true;
17593 }
17594 else if (fmt[i] == 'E')
17595 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
17596 {
17597 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
17598 return true;
17599 }
17600 }
17601
17602 return false;
17603 }
17604
17605 /* Determine if PAT is a PATTERN of a load insn. */
17606
17607 static bool
17608 is_load_insn1 (rtx pat, rtx *load_mem)
17609 {
17610 if (!pat || pat == NULL_RTX)
17611 return false;
17612
17613 if (GET_CODE (pat) == SET)
17614 return find_mem_ref (SET_SRC (pat), load_mem);
17615
17616 if (GET_CODE (pat) == PARALLEL)
17617 {
17618 int i;
17619
17620 for (i = 0; i < XVECLEN (pat, 0); i++)
17621 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
17622 return true;
17623 }
17624
17625 return false;
17626 }
17627
17628 /* Determine if INSN loads from memory. */
17629
17630 static bool
17631 is_load_insn (rtx insn, rtx *load_mem)
17632 {
17633 if (!insn || !INSN_P (insn))
17634 return false;
17635
17636 if (CALL_P (insn))
17637 return false;
17638
17639 return is_load_insn1 (PATTERN (insn), load_mem);
17640 }
17641
17642 /* Determine if PAT is a PATTERN of a store insn. */
17643
17644 static bool
17645 is_store_insn1 (rtx pat, rtx *str_mem)
17646 {
17647 if (!pat || pat == NULL_RTX)
17648 return false;
17649
17650 if (GET_CODE (pat) == SET)
17651 return find_mem_ref (SET_DEST (pat), str_mem);
17652
17653 if (GET_CODE (pat) == PARALLEL)
17654 {
17655 int i;
17656
17657 for (i = 0; i < XVECLEN (pat, 0); i++)
17658 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
17659 return true;
17660 }
17661
17662 return false;
17663 }
17664
17665 /* Determine if INSN stores to memory. */
17666
17667 static bool
17668 is_store_insn (rtx insn, rtx *str_mem)
17669 {
17670 if (!insn || !INSN_P (insn))
17671 return false;
17672
17673 return is_store_insn1 (PATTERN (insn), str_mem);
17674 }
17675
17676 /* Return whether TYPE is a Power9 pairable vector instruction type. */
17677
17678 static bool
17679 is_power9_pairable_vec_type (enum attr_type type)
17680 {
17681 switch (type)
17682 {
17683 case TYPE_VECSIMPLE:
17684 case TYPE_VECCOMPLEX:
17685 case TYPE_VECDIV:
17686 case TYPE_VECCMP:
17687 case TYPE_VECPERM:
17688 case TYPE_VECFLOAT:
17689 case TYPE_VECFDIV:
17690 case TYPE_VECDOUBLE:
17691 return true;
17692 default:
17693 break;
17694 }
17695 return false;
17696 }
17697
17698 /* Returns whether the dependence between INSN and NEXT is considered
17699 costly by the given target. */
17700
17701 static bool
17702 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
17703 {
17704 rtx insn;
17705 rtx next;
17706 rtx load_mem, str_mem;
17707
17708 /* If the flag is not enabled - no dependence is considered costly;
17709 allow all dependent insns in the same group.
17710 This is the most aggressive option. */
17711 if (rs6000_sched_costly_dep == no_dep_costly)
17712 return false;
17713
17714 /* If the flag is set to 1 - a dependence is always considered costly;
17715 do not allow dependent instructions in the same group.
17716 This is the most conservative option. */
17717 if (rs6000_sched_costly_dep == all_deps_costly)
17718 return true;
17719
17720 insn = DEP_PRO (dep);
17721 next = DEP_CON (dep);
17722
17723 if (rs6000_sched_costly_dep == store_to_load_dep_costly
17724 && is_load_insn (next, &load_mem)
17725 && is_store_insn (insn, &str_mem))
17726 /* Prevent load after store in the same group. */
17727 return true;
17728
17729 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
17730 && is_load_insn (next, &load_mem)
17731 && is_store_insn (insn, &str_mem)
17732 && DEP_TYPE (dep) == REG_DEP_TRUE
17733 && mem_locations_overlap(str_mem, load_mem))
17734 /* Prevent load after store in the same group if it is a true
17735 dependence. */
17736 return true;
17737
17738 /* The flag is set to X; dependences with latency >= X are considered costly,
17739 and will not be scheduled in the same group. */
17740 if (rs6000_sched_costly_dep <= max_dep_latency
17741 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
17742 return true;
17743
17744 return false;
17745 }
17746
17747 /* Return the next insn after INSN that is found before TAIL is reached,
17748 skipping any "non-active" insns - insns that will not actually occupy
17749 an issue slot. Return NULL_RTX if such an insn is not found. */
17750
17751 static rtx_insn *
17752 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
17753 {
17754 if (insn == NULL_RTX || insn == tail)
17755 return NULL;
17756
17757 while (1)
17758 {
17759 insn = NEXT_INSN (insn);
17760 if (insn == NULL_RTX || insn == tail)
17761 return NULL;
17762
17763 if (CALL_P (insn)
17764 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
17765 || (NONJUMP_INSN_P (insn)
17766 && GET_CODE (PATTERN (insn)) != USE
17767 && GET_CODE (PATTERN (insn)) != CLOBBER
17768 && INSN_CODE (insn) != CODE_FOR_stack_tie))
17769 break;
17770 }
17771 return insn;
17772 }
17773
17774 /* Move instruction at POS to the end of the READY list. */
17775
17776 static void
17777 move_to_end_of_ready (rtx_insn **ready, int pos, int lastpos)
17778 {
17779 rtx_insn *tmp;
17780 int i;
17781
17782 tmp = ready[pos];
17783 for (i = pos; i < lastpos; i++)
17784 ready[i] = ready[i + 1];
17785 ready[lastpos] = tmp;
17786 }
17787
17788 /* Do Power6 specific sched_reorder2 reordering of ready list. */
17789
17790 static int
17791 power6_sched_reorder2 (rtx_insn **ready, int lastpos)
17792 {
17793 /* For Power6, we need to handle some special cases to try and keep the
17794 store queue from overflowing and triggering expensive flushes.
17795
17796 This code monitors how load and store instructions are being issued
17797 and skews the ready list one way or the other to increase the likelihood
17798 that a desired instruction is issued at the proper time.
17799
17800 A couple of things are done. First, we maintain a "load_store_pendulum"
17801 to track the current state of load/store issue.
17802
17803 - If the pendulum is at zero, then no loads or stores have been
17804 issued in the current cycle so we do nothing.
17805
17806 - If the pendulum is 1, then a single load has been issued in this
17807 cycle and we attempt to locate another load in the ready list to
17808 issue with it.
17809
17810 - If the pendulum is -2, then two stores have already been
17811 issued in this cycle, so we increase the priority of the first load
17812 in the ready list to increase it's likelihood of being chosen first
17813 in the next cycle.
17814
17815 - If the pendulum is -1, then a single store has been issued in this
17816 cycle and we attempt to locate another store in the ready list to
17817 issue with it, preferring a store to an adjacent memory location to
17818 facilitate store pairing in the store queue.
17819
17820 - If the pendulum is 2, then two loads have already been
17821 issued in this cycle, so we increase the priority of the first store
17822 in the ready list to increase it's likelihood of being chosen first
17823 in the next cycle.
17824
17825 - If the pendulum < -2 or > 2, then do nothing.
17826
17827 Note: This code covers the most common scenarios. There exist non
17828 load/store instructions which make use of the LSU and which
17829 would need to be accounted for to strictly model the behavior
17830 of the machine. Those instructions are currently unaccounted
17831 for to help minimize compile time overhead of this code.
17832 */
17833 int pos;
17834 rtx load_mem, str_mem;
17835
17836 if (is_store_insn (last_scheduled_insn, &str_mem))
17837 /* Issuing a store, swing the load_store_pendulum to the left */
17838 load_store_pendulum--;
17839 else if (is_load_insn (last_scheduled_insn, &load_mem))
17840 /* Issuing a load, swing the load_store_pendulum to the right */
17841 load_store_pendulum++;
17842 else
17843 return cached_can_issue_more;
17844
17845 /* If the pendulum is balanced, or there is only one instruction on
17846 the ready list, then all is well, so return. */
17847 if ((load_store_pendulum == 0) || (lastpos <= 0))
17848 return cached_can_issue_more;
17849
17850 if (load_store_pendulum == 1)
17851 {
17852 /* A load has been issued in this cycle. Scan the ready list
17853 for another load to issue with it */
17854 pos = lastpos;
17855
17856 while (pos >= 0)
17857 {
17858 if (is_load_insn (ready[pos], &load_mem))
17859 {
17860 /* Found a load. Move it to the head of the ready list,
17861 and adjust it's priority so that it is more likely to
17862 stay there */
17863 move_to_end_of_ready (ready, pos, lastpos);
17864
17865 if (!sel_sched_p ()
17866 && INSN_PRIORITY_KNOWN (ready[lastpos]))
17867 INSN_PRIORITY (ready[lastpos])++;
17868 break;
17869 }
17870 pos--;
17871 }
17872 }
17873 else if (load_store_pendulum == -2)
17874 {
17875 /* Two stores have been issued in this cycle. Increase the
17876 priority of the first load in the ready list to favor it for
17877 issuing in the next cycle. */
17878 pos = lastpos;
17879
17880 while (pos >= 0)
17881 {
17882 if (is_load_insn (ready[pos], &load_mem)
17883 && !sel_sched_p ()
17884 && INSN_PRIORITY_KNOWN (ready[pos]))
17885 {
17886 INSN_PRIORITY (ready[pos])++;
17887
17888 /* Adjust the pendulum to account for the fact that a load
17889 was found and increased in priority. This is to prevent
17890 increasing the priority of multiple loads */
17891 load_store_pendulum--;
17892
17893 break;
17894 }
17895 pos--;
17896 }
17897 }
17898 else if (load_store_pendulum == -1)
17899 {
17900 /* A store has been issued in this cycle. Scan the ready list for
17901 another store to issue with it, preferring a store to an adjacent
17902 memory location */
17903 int first_store_pos = -1;
17904
17905 pos = lastpos;
17906
17907 while (pos >= 0)
17908 {
17909 if (is_store_insn (ready[pos], &str_mem))
17910 {
17911 rtx str_mem2;
17912 /* Maintain the index of the first store found on the
17913 list */
17914 if (first_store_pos == -1)
17915 first_store_pos = pos;
17916
17917 if (is_store_insn (last_scheduled_insn, &str_mem2)
17918 && adjacent_mem_locations (str_mem, str_mem2))
17919 {
17920 /* Found an adjacent store. Move it to the head of the
17921 ready list, and adjust it's priority so that it is
17922 more likely to stay there */
17923 move_to_end_of_ready (ready, pos, lastpos);
17924
17925 if (!sel_sched_p ()
17926 && INSN_PRIORITY_KNOWN (ready[lastpos]))
17927 INSN_PRIORITY (ready[lastpos])++;
17928
17929 first_store_pos = -1;
17930
17931 break;
17932 };
17933 }
17934 pos--;
17935 }
17936
17937 if (first_store_pos >= 0)
17938 {
17939 /* An adjacent store wasn't found, but a non-adjacent store was,
17940 so move the non-adjacent store to the front of the ready
17941 list, and adjust its priority so that it is more likely to
17942 stay there. */
17943 move_to_end_of_ready (ready, first_store_pos, lastpos);
17944 if (!sel_sched_p ()
17945 && INSN_PRIORITY_KNOWN (ready[lastpos]))
17946 INSN_PRIORITY (ready[lastpos])++;
17947 }
17948 }
17949 else if (load_store_pendulum == 2)
17950 {
17951 /* Two loads have been issued in this cycle. Increase the priority
17952 of the first store in the ready list to favor it for issuing in
17953 the next cycle. */
17954 pos = lastpos;
17955
17956 while (pos >= 0)
17957 {
17958 if (is_store_insn (ready[pos], &str_mem)
17959 && !sel_sched_p ()
17960 && INSN_PRIORITY_KNOWN (ready[pos]))
17961 {
17962 INSN_PRIORITY (ready[pos])++;
17963
17964 /* Adjust the pendulum to account for the fact that a store
17965 was found and increased in priority. This is to prevent
17966 increasing the priority of multiple stores */
17967 load_store_pendulum++;
17968
17969 break;
17970 }
17971 pos--;
17972 }
17973 }
17974
17975 return cached_can_issue_more;
17976 }
17977
17978 /* Do Power9 specific sched_reorder2 reordering of ready list. */
17979
17980 static int
17981 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
17982 {
17983 int pos;
17984 enum attr_type type, type2;
17985
17986 type = get_attr_type (last_scheduled_insn);
17987
17988 /* Try to issue fixed point divides back-to-back in pairs so they will be
17989 routed to separate execution units and execute in parallel. */
17990 if (type == TYPE_DIV && divide_cnt == 0)
17991 {
17992 /* First divide has been scheduled. */
17993 divide_cnt = 1;
17994
17995 /* Scan the ready list looking for another divide, if found move it
17996 to the end of the list so it is chosen next. */
17997 pos = lastpos;
17998 while (pos >= 0)
17999 {
18000 if (recog_memoized (ready[pos]) >= 0
18001 && get_attr_type (ready[pos]) == TYPE_DIV)
18002 {
18003 move_to_end_of_ready (ready, pos, lastpos);
18004 break;
18005 }
18006 pos--;
18007 }
18008 }
18009 else
18010 {
18011 /* Last insn was the 2nd divide or not a divide, reset the counter. */
18012 divide_cnt = 0;
18013
18014 /* The best dispatch throughput for vector and vector load insns can be
18015 achieved by interleaving a vector and vector load such that they'll
18016 dispatch to the same superslice. If this pairing cannot be achieved
18017 then it is best to pair vector insns together and vector load insns
18018 together.
18019
18020 To aid in this pairing, vec_pairing maintains the current state with
18021 the following values:
18022
18023 0 : Initial state, no vecload/vector pairing has been started.
18024
18025 1 : A vecload or vector insn has been issued and a candidate for
18026 pairing has been found and moved to the end of the ready
18027 list. */
18028 if (type == TYPE_VECLOAD)
18029 {
18030 /* Issued a vecload. */
18031 if (vec_pairing == 0)
18032 {
18033 int vecload_pos = -1;
18034 /* We issued a single vecload, look for a vector insn to pair it
18035 with. If one isn't found, try to pair another vecload. */
18036 pos = lastpos;
18037 while (pos >= 0)
18038 {
18039 if (recog_memoized (ready[pos]) >= 0)
18040 {
18041 type2 = get_attr_type (ready[pos]);
18042 if (is_power9_pairable_vec_type (type2))
18043 {
18044 /* Found a vector insn to pair with, move it to the
18045 end of the ready list so it is scheduled next. */
18046 move_to_end_of_ready (ready, pos, lastpos);
18047 vec_pairing = 1;
18048 return cached_can_issue_more;
18049 }
18050 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
18051 /* Remember position of first vecload seen. */
18052 vecload_pos = pos;
18053 }
18054 pos--;
18055 }
18056 if (vecload_pos >= 0)
18057 {
18058 /* Didn't find a vector to pair with but did find a vecload,
18059 move it to the end of the ready list. */
18060 move_to_end_of_ready (ready, vecload_pos, lastpos);
18061 vec_pairing = 1;
18062 return cached_can_issue_more;
18063 }
18064 }
18065 }
18066 else if (is_power9_pairable_vec_type (type))
18067 {
18068 /* Issued a vector operation. */
18069 if (vec_pairing == 0)
18070 {
18071 int vec_pos = -1;
18072 /* We issued a single vector insn, look for a vecload to pair it
18073 with. If one isn't found, try to pair another vector. */
18074 pos = lastpos;
18075 while (pos >= 0)
18076 {
18077 if (recog_memoized (ready[pos]) >= 0)
18078 {
18079 type2 = get_attr_type (ready[pos]);
18080 if (type2 == TYPE_VECLOAD)
18081 {
18082 /* Found a vecload insn to pair with, move it to the
18083 end of the ready list so it is scheduled next. */
18084 move_to_end_of_ready (ready, pos, lastpos);
18085 vec_pairing = 1;
18086 return cached_can_issue_more;
18087 }
18088 else if (is_power9_pairable_vec_type (type2)
18089 && vec_pos == -1)
18090 /* Remember position of first vector insn seen. */
18091 vec_pos = pos;
18092 }
18093 pos--;
18094 }
18095 if (vec_pos >= 0)
18096 {
18097 /* Didn't find a vecload to pair with but did find a vector
18098 insn, move it to the end of the ready list. */
18099 move_to_end_of_ready (ready, vec_pos, lastpos);
18100 vec_pairing = 1;
18101 return cached_can_issue_more;
18102 }
18103 }
18104 }
18105
18106 /* We've either finished a vec/vecload pair, couldn't find an insn to
18107 continue the current pair, or the last insn had nothing to do with
18108 with pairing. In any case, reset the state. */
18109 vec_pairing = 0;
18110 }
18111
18112 return cached_can_issue_more;
18113 }
18114
18115 /* We are about to begin issuing insns for this clock cycle. */
18116
18117 static int
18118 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
18119 rtx_insn **ready ATTRIBUTE_UNUSED,
18120 int *pn_ready ATTRIBUTE_UNUSED,
18121 int clock_var ATTRIBUTE_UNUSED)
18122 {
18123 int n_ready = *pn_ready;
18124
18125 if (sched_verbose)
18126 fprintf (dump, "// rs6000_sched_reorder :\n");
18127
18128 /* Reorder the ready list, if the second to last ready insn
18129 is a nonepipeline insn. */
18130 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
18131 {
18132 if (is_nonpipeline_insn (ready[n_ready - 1])
18133 && (recog_memoized (ready[n_ready - 2]) > 0))
18134 /* Simply swap first two insns. */
18135 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
18136 }
18137
18138 if (rs6000_tune == PROCESSOR_POWER6)
18139 load_store_pendulum = 0;
18140
18141 return rs6000_issue_rate ();
18142 }
18143
18144 /* Like rs6000_sched_reorder, but called after issuing each insn. */
18145
18146 static int
18147 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
18148 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
18149 {
18150 if (sched_verbose)
18151 fprintf (dump, "// rs6000_sched_reorder2 :\n");
18152
18153 /* Do Power6 dependent reordering if necessary. */
18154 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
18155 return power6_sched_reorder2 (ready, *pn_ready - 1);
18156
18157 /* Do Power9 dependent reordering if necessary. */
18158 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
18159 && recog_memoized (last_scheduled_insn) >= 0)
18160 return power9_sched_reorder2 (ready, *pn_ready - 1);
18161
18162 return cached_can_issue_more;
18163 }
18164
18165 /* Return whether the presence of INSN causes a dispatch group termination
18166 of group WHICH_GROUP.
18167
18168 If WHICH_GROUP == current_group, this function will return true if INSN
18169 causes the termination of the current group (i.e, the dispatch group to
18170 which INSN belongs). This means that INSN will be the last insn in the
18171 group it belongs to.
18172
18173 If WHICH_GROUP == previous_group, this function will return true if INSN
18174 causes the termination of the previous group (i.e, the dispatch group that
18175 precedes the group to which INSN belongs). This means that INSN will be
18176 the first insn in the group it belongs to). */
18177
18178 static bool
18179 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
18180 {
18181 bool first, last;
18182
18183 if (! insn)
18184 return false;
18185
18186 first = insn_must_be_first_in_group (insn);
18187 last = insn_must_be_last_in_group (insn);
18188
18189 if (first && last)
18190 return true;
18191
18192 if (which_group == current_group)
18193 return last;
18194 else if (which_group == previous_group)
18195 return first;
18196
18197 return false;
18198 }
18199
18200
18201 static bool
18202 insn_must_be_first_in_group (rtx_insn *insn)
18203 {
18204 enum attr_type type;
18205
18206 if (!insn
18207 || NOTE_P (insn)
18208 || DEBUG_INSN_P (insn)
18209 || GET_CODE (PATTERN (insn)) == USE
18210 || GET_CODE (PATTERN (insn)) == CLOBBER)
18211 return false;
18212
18213 switch (rs6000_tune)
18214 {
18215 case PROCESSOR_POWER5:
18216 if (is_cracked_insn (insn))
18217 return true;
18218 /* FALLTHRU */
18219 case PROCESSOR_POWER4:
18220 if (is_microcoded_insn (insn))
18221 return true;
18222
18223 if (!rs6000_sched_groups)
18224 return false;
18225
18226 type = get_attr_type (insn);
18227
18228 switch (type)
18229 {
18230 case TYPE_MFCR:
18231 case TYPE_MFCRF:
18232 case TYPE_MTCR:
18233 case TYPE_CR_LOGICAL:
18234 case TYPE_MTJMPR:
18235 case TYPE_MFJMPR:
18236 case TYPE_DIV:
18237 case TYPE_LOAD_L:
18238 case TYPE_STORE_C:
18239 case TYPE_ISYNC:
18240 case TYPE_SYNC:
18241 return true;
18242 default:
18243 break;
18244 }
18245 break;
18246 case PROCESSOR_POWER6:
18247 type = get_attr_type (insn);
18248
18249 switch (type)
18250 {
18251 case TYPE_EXTS:
18252 case TYPE_CNTLZ:
18253 case TYPE_TRAP:
18254 case TYPE_MUL:
18255 case TYPE_INSERT:
18256 case TYPE_FPCOMPARE:
18257 case TYPE_MFCR:
18258 case TYPE_MTCR:
18259 case TYPE_MFJMPR:
18260 case TYPE_MTJMPR:
18261 case TYPE_ISYNC:
18262 case TYPE_SYNC:
18263 case TYPE_LOAD_L:
18264 case TYPE_STORE_C:
18265 return true;
18266 case TYPE_SHIFT:
18267 if (get_attr_dot (insn) == DOT_NO
18268 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
18269 return true;
18270 else
18271 break;
18272 case TYPE_DIV:
18273 if (get_attr_size (insn) == SIZE_32)
18274 return true;
18275 else
18276 break;
18277 case TYPE_LOAD:
18278 case TYPE_STORE:
18279 case TYPE_FPLOAD:
18280 case TYPE_FPSTORE:
18281 if (get_attr_update (insn) == UPDATE_YES)
18282 return true;
18283 else
18284 break;
18285 default:
18286 break;
18287 }
18288 break;
18289 case PROCESSOR_POWER7:
18290 type = get_attr_type (insn);
18291
18292 switch (type)
18293 {
18294 case TYPE_CR_LOGICAL:
18295 case TYPE_MFCR:
18296 case TYPE_MFCRF:
18297 case TYPE_MTCR:
18298 case TYPE_DIV:
18299 case TYPE_ISYNC:
18300 case TYPE_LOAD_L:
18301 case TYPE_STORE_C:
18302 case TYPE_MFJMPR:
18303 case TYPE_MTJMPR:
18304 return true;
18305 case TYPE_MUL:
18306 case TYPE_SHIFT:
18307 case TYPE_EXTS:
18308 if (get_attr_dot (insn) == DOT_YES)
18309 return true;
18310 else
18311 break;
18312 case TYPE_LOAD:
18313 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18314 || get_attr_update (insn) == UPDATE_YES)
18315 return true;
18316 else
18317 break;
18318 case TYPE_STORE:
18319 case TYPE_FPLOAD:
18320 case TYPE_FPSTORE:
18321 if (get_attr_update (insn) == UPDATE_YES)
18322 return true;
18323 else
18324 break;
18325 default:
18326 break;
18327 }
18328 break;
18329 case PROCESSOR_POWER8:
18330 type = get_attr_type (insn);
18331
18332 switch (type)
18333 {
18334 case TYPE_CR_LOGICAL:
18335 case TYPE_MFCR:
18336 case TYPE_MFCRF:
18337 case TYPE_MTCR:
18338 case TYPE_SYNC:
18339 case TYPE_ISYNC:
18340 case TYPE_LOAD_L:
18341 case TYPE_STORE_C:
18342 case TYPE_VECSTORE:
18343 case TYPE_MFJMPR:
18344 case TYPE_MTJMPR:
18345 return true;
18346 case TYPE_SHIFT:
18347 case TYPE_EXTS:
18348 case TYPE_MUL:
18349 if (get_attr_dot (insn) == DOT_YES)
18350 return true;
18351 else
18352 break;
18353 case TYPE_LOAD:
18354 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18355 || get_attr_update (insn) == UPDATE_YES)
18356 return true;
18357 else
18358 break;
18359 case TYPE_STORE:
18360 if (get_attr_update (insn) == UPDATE_YES
18361 && get_attr_indexed (insn) == INDEXED_YES)
18362 return true;
18363 else
18364 break;
18365 default:
18366 break;
18367 }
18368 break;
18369 default:
18370 break;
18371 }
18372
18373 return false;
18374 }
18375
18376 static bool
18377 insn_must_be_last_in_group (rtx_insn *insn)
18378 {
18379 enum attr_type type;
18380
18381 if (!insn
18382 || NOTE_P (insn)
18383 || DEBUG_INSN_P (insn)
18384 || GET_CODE (PATTERN (insn)) == USE
18385 || GET_CODE (PATTERN (insn)) == CLOBBER)
18386 return false;
18387
18388 switch (rs6000_tune) {
18389 case PROCESSOR_POWER4:
18390 case PROCESSOR_POWER5:
18391 if (is_microcoded_insn (insn))
18392 return true;
18393
18394 if (is_branch_slot_insn (insn))
18395 return true;
18396
18397 break;
18398 case PROCESSOR_POWER6:
18399 type = get_attr_type (insn);
18400
18401 switch (type)
18402 {
18403 case TYPE_EXTS:
18404 case TYPE_CNTLZ:
18405 case TYPE_TRAP:
18406 case TYPE_MUL:
18407 case TYPE_FPCOMPARE:
18408 case TYPE_MFCR:
18409 case TYPE_MTCR:
18410 case TYPE_MFJMPR:
18411 case TYPE_MTJMPR:
18412 case TYPE_ISYNC:
18413 case TYPE_SYNC:
18414 case TYPE_LOAD_L:
18415 case TYPE_STORE_C:
18416 return true;
18417 case TYPE_SHIFT:
18418 if (get_attr_dot (insn) == DOT_NO
18419 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
18420 return true;
18421 else
18422 break;
18423 case TYPE_DIV:
18424 if (get_attr_size (insn) == SIZE_32)
18425 return true;
18426 else
18427 break;
18428 default:
18429 break;
18430 }
18431 break;
18432 case PROCESSOR_POWER7:
18433 type = get_attr_type (insn);
18434
18435 switch (type)
18436 {
18437 case TYPE_ISYNC:
18438 case TYPE_SYNC:
18439 case TYPE_LOAD_L:
18440 case TYPE_STORE_C:
18441 return true;
18442 case TYPE_LOAD:
18443 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18444 && get_attr_update (insn) == UPDATE_YES)
18445 return true;
18446 else
18447 break;
18448 case TYPE_STORE:
18449 if (get_attr_update (insn) == UPDATE_YES
18450 && get_attr_indexed (insn) == INDEXED_YES)
18451 return true;
18452 else
18453 break;
18454 default:
18455 break;
18456 }
18457 break;
18458 case PROCESSOR_POWER8:
18459 type = get_attr_type (insn);
18460
18461 switch (type)
18462 {
18463 case TYPE_MFCR:
18464 case TYPE_MTCR:
18465 case TYPE_ISYNC:
18466 case TYPE_SYNC:
18467 case TYPE_LOAD_L:
18468 case TYPE_STORE_C:
18469 return true;
18470 case TYPE_LOAD:
18471 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18472 && get_attr_update (insn) == UPDATE_YES)
18473 return true;
18474 else
18475 break;
18476 case TYPE_STORE:
18477 if (get_attr_update (insn) == UPDATE_YES
18478 && get_attr_indexed (insn) == INDEXED_YES)
18479 return true;
18480 else
18481 break;
18482 default:
18483 break;
18484 }
18485 break;
18486 default:
18487 break;
18488 }
18489
18490 return false;
18491 }
18492
18493 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
18494 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
18495
18496 static bool
18497 is_costly_group (rtx *group_insns, rtx next_insn)
18498 {
18499 int i;
18500 int issue_rate = rs6000_issue_rate ();
18501
18502 for (i = 0; i < issue_rate; i++)
18503 {
18504 sd_iterator_def sd_it;
18505 dep_t dep;
18506 rtx insn = group_insns[i];
18507
18508 if (!insn)
18509 continue;
18510
18511 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
18512 {
18513 rtx next = DEP_CON (dep);
18514
18515 if (next == next_insn
18516 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
18517 return true;
18518 }
18519 }
18520
18521 return false;
18522 }
18523
18524 /* Utility of the function redefine_groups.
18525 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
18526 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
18527 to keep it "far" (in a separate group) from GROUP_INSNS, following
18528 one of the following schemes, depending on the value of the flag
18529 -minsert_sched_nops = X:
18530 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
18531 in order to force NEXT_INSN into a separate group.
18532 (2) X < sched_finish_regroup_exact: insert exactly X nops.
18533 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
18534 insertion (has a group just ended, how many vacant issue slots remain in the
18535 last group, and how many dispatch groups were encountered so far). */
18536
18537 static int
18538 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
18539 rtx_insn *next_insn, bool *group_end, int can_issue_more,
18540 int *group_count)
18541 {
18542 rtx nop;
18543 bool force;
18544 int issue_rate = rs6000_issue_rate ();
18545 bool end = *group_end;
18546 int i;
18547
18548 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
18549 return can_issue_more;
18550
18551 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
18552 return can_issue_more;
18553
18554 force = is_costly_group (group_insns, next_insn);
18555 if (!force)
18556 return can_issue_more;
18557
18558 if (sched_verbose > 6)
18559 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
18560 *group_count ,can_issue_more);
18561
18562 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
18563 {
18564 if (*group_end)
18565 can_issue_more = 0;
18566
18567 /* Since only a branch can be issued in the last issue_slot, it is
18568 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
18569 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
18570 in this case the last nop will start a new group and the branch
18571 will be forced to the new group. */
18572 if (can_issue_more && !is_branch_slot_insn (next_insn))
18573 can_issue_more--;
18574
18575 /* Do we have a special group ending nop? */
18576 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
18577 || rs6000_tune == PROCESSOR_POWER8)
18578 {
18579 nop = gen_group_ending_nop ();
18580 emit_insn_before (nop, next_insn);
18581 can_issue_more = 0;
18582 }
18583 else
18584 while (can_issue_more > 0)
18585 {
18586 nop = gen_nop ();
18587 emit_insn_before (nop, next_insn);
18588 can_issue_more--;
18589 }
18590
18591 *group_end = true;
18592 return 0;
18593 }
18594
18595 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
18596 {
18597 int n_nops = rs6000_sched_insert_nops;
18598
18599 /* Nops can't be issued from the branch slot, so the effective
18600 issue_rate for nops is 'issue_rate - 1'. */
18601 if (can_issue_more == 0)
18602 can_issue_more = issue_rate;
18603 can_issue_more--;
18604 if (can_issue_more == 0)
18605 {
18606 can_issue_more = issue_rate - 1;
18607 (*group_count)++;
18608 end = true;
18609 for (i = 0; i < issue_rate; i++)
18610 {
18611 group_insns[i] = 0;
18612 }
18613 }
18614
18615 while (n_nops > 0)
18616 {
18617 nop = gen_nop ();
18618 emit_insn_before (nop, next_insn);
18619 if (can_issue_more == issue_rate - 1) /* new group begins */
18620 end = false;
18621 can_issue_more--;
18622 if (can_issue_more == 0)
18623 {
18624 can_issue_more = issue_rate - 1;
18625 (*group_count)++;
18626 end = true;
18627 for (i = 0; i < issue_rate; i++)
18628 {
18629 group_insns[i] = 0;
18630 }
18631 }
18632 n_nops--;
18633 }
18634
18635 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
18636 can_issue_more++;
18637
18638 /* Is next_insn going to start a new group? */
18639 *group_end
18640 = (end
18641 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
18642 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
18643 || (can_issue_more < issue_rate &&
18644 insn_terminates_group_p (next_insn, previous_group)));
18645 if (*group_end && end)
18646 (*group_count)--;
18647
18648 if (sched_verbose > 6)
18649 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
18650 *group_count, can_issue_more);
18651 return can_issue_more;
18652 }
18653
18654 return can_issue_more;
18655 }
18656
18657 /* This function tries to synch the dispatch groups that the compiler "sees"
18658 with the dispatch groups that the processor dispatcher is expected to
18659 form in practice. It tries to achieve this synchronization by forcing the
18660 estimated processor grouping on the compiler (as opposed to the function
18661 'pad_goups' which tries to force the scheduler's grouping on the processor).
18662
18663 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
18664 examines the (estimated) dispatch groups that will be formed by the processor
18665 dispatcher. It marks these group boundaries to reflect the estimated
18666 processor grouping, overriding the grouping that the scheduler had marked.
18667 Depending on the value of the flag '-minsert-sched-nops' this function can
18668 force certain insns into separate groups or force a certain distance between
18669 them by inserting nops, for example, if there exists a "costly dependence"
18670 between the insns.
18671
18672 The function estimates the group boundaries that the processor will form as
18673 follows: It keeps track of how many vacant issue slots are available after
18674 each insn. A subsequent insn will start a new group if one of the following
18675 4 cases applies:
18676 - no more vacant issue slots remain in the current dispatch group.
18677 - only the last issue slot, which is the branch slot, is vacant, but the next
18678 insn is not a branch.
18679 - only the last 2 or less issue slots, including the branch slot, are vacant,
18680 which means that a cracked insn (which occupies two issue slots) can't be
18681 issued in this group.
18682 - less than 'issue_rate' slots are vacant, and the next insn always needs to
18683 start a new group. */
18684
18685 static int
18686 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
18687 rtx_insn *tail)
18688 {
18689 rtx_insn *insn, *next_insn;
18690 int issue_rate;
18691 int can_issue_more;
18692 int slot, i;
18693 bool group_end;
18694 int group_count = 0;
18695 rtx *group_insns;
18696
18697 /* Initialize. */
18698 issue_rate = rs6000_issue_rate ();
18699 group_insns = XALLOCAVEC (rtx, issue_rate);
18700 for (i = 0; i < issue_rate; i++)
18701 {
18702 group_insns[i] = 0;
18703 }
18704 can_issue_more = issue_rate;
18705 slot = 0;
18706 insn = get_next_active_insn (prev_head_insn, tail);
18707 group_end = false;
18708
18709 while (insn != NULL_RTX)
18710 {
18711 slot = (issue_rate - can_issue_more);
18712 group_insns[slot] = insn;
18713 can_issue_more =
18714 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
18715 if (insn_terminates_group_p (insn, current_group))
18716 can_issue_more = 0;
18717
18718 next_insn = get_next_active_insn (insn, tail);
18719 if (next_insn == NULL_RTX)
18720 return group_count + 1;
18721
18722 /* Is next_insn going to start a new group? */
18723 group_end
18724 = (can_issue_more == 0
18725 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
18726 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
18727 || (can_issue_more < issue_rate &&
18728 insn_terminates_group_p (next_insn, previous_group)));
18729
18730 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
18731 next_insn, &group_end, can_issue_more,
18732 &group_count);
18733
18734 if (group_end)
18735 {
18736 group_count++;
18737 can_issue_more = 0;
18738 for (i = 0; i < issue_rate; i++)
18739 {
18740 group_insns[i] = 0;
18741 }
18742 }
18743
18744 if (GET_MODE (next_insn) == TImode && can_issue_more)
18745 PUT_MODE (next_insn, VOIDmode);
18746 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
18747 PUT_MODE (next_insn, TImode);
18748
18749 insn = next_insn;
18750 if (can_issue_more == 0)
18751 can_issue_more = issue_rate;
18752 } /* while */
18753
18754 return group_count;
18755 }
18756
18757 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
18758 dispatch group boundaries that the scheduler had marked. Pad with nops
18759 any dispatch groups which have vacant issue slots, in order to force the
18760 scheduler's grouping on the processor dispatcher. The function
18761 returns the number of dispatch groups found. */
18762
18763 static int
18764 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
18765 rtx_insn *tail)
18766 {
18767 rtx_insn *insn, *next_insn;
18768 rtx nop;
18769 int issue_rate;
18770 int can_issue_more;
18771 int group_end;
18772 int group_count = 0;
18773
18774 /* Initialize issue_rate. */
18775 issue_rate = rs6000_issue_rate ();
18776 can_issue_more = issue_rate;
18777
18778 insn = get_next_active_insn (prev_head_insn, tail);
18779 next_insn = get_next_active_insn (insn, tail);
18780
18781 while (insn != NULL_RTX)
18782 {
18783 can_issue_more =
18784 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
18785
18786 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
18787
18788 if (next_insn == NULL_RTX)
18789 break;
18790
18791 if (group_end)
18792 {
18793 /* If the scheduler had marked group termination at this location
18794 (between insn and next_insn), and neither insn nor next_insn will
18795 force group termination, pad the group with nops to force group
18796 termination. */
18797 if (can_issue_more
18798 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
18799 && !insn_terminates_group_p (insn, current_group)
18800 && !insn_terminates_group_p (next_insn, previous_group))
18801 {
18802 if (!is_branch_slot_insn (next_insn))
18803 can_issue_more--;
18804
18805 while (can_issue_more)
18806 {
18807 nop = gen_nop ();
18808 emit_insn_before (nop, next_insn);
18809 can_issue_more--;
18810 }
18811 }
18812
18813 can_issue_more = issue_rate;
18814 group_count++;
18815 }
18816
18817 insn = next_insn;
18818 next_insn = get_next_active_insn (insn, tail);
18819 }
18820
18821 return group_count;
18822 }
18823
18824 /* We're beginning a new block. Initialize data structures as necessary. */
18825
18826 static void
18827 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
18828 int sched_verbose ATTRIBUTE_UNUSED,
18829 int max_ready ATTRIBUTE_UNUSED)
18830 {
18831 last_scheduled_insn = NULL;
18832 load_store_pendulum = 0;
18833 divide_cnt = 0;
18834 vec_pairing = 0;
18835 }
18836
18837 /* The following function is called at the end of scheduling BB.
18838 After reload, it inserts nops at insn group bundling. */
18839
18840 static void
18841 rs6000_sched_finish (FILE *dump, int sched_verbose)
18842 {
18843 int n_groups;
18844
18845 if (sched_verbose)
18846 fprintf (dump, "=== Finishing schedule.\n");
18847
18848 if (reload_completed && rs6000_sched_groups)
18849 {
18850 /* Do not run sched_finish hook when selective scheduling enabled. */
18851 if (sel_sched_p ())
18852 return;
18853
18854 if (rs6000_sched_insert_nops == sched_finish_none)
18855 return;
18856
18857 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
18858 n_groups = pad_groups (dump, sched_verbose,
18859 current_sched_info->prev_head,
18860 current_sched_info->next_tail);
18861 else
18862 n_groups = redefine_groups (dump, sched_verbose,
18863 current_sched_info->prev_head,
18864 current_sched_info->next_tail);
18865
18866 if (sched_verbose >= 6)
18867 {
18868 fprintf (dump, "ngroups = %d\n", n_groups);
18869 print_rtl (dump, current_sched_info->prev_head);
18870 fprintf (dump, "Done finish_sched\n");
18871 }
18872 }
18873 }
18874
18875 struct rs6000_sched_context
18876 {
18877 short cached_can_issue_more;
18878 rtx_insn *last_scheduled_insn;
18879 int load_store_pendulum;
18880 int divide_cnt;
18881 int vec_pairing;
18882 };
18883
18884 typedef struct rs6000_sched_context rs6000_sched_context_def;
18885 typedef rs6000_sched_context_def *rs6000_sched_context_t;
18886
18887 /* Allocate store for new scheduling context. */
18888 static void *
18889 rs6000_alloc_sched_context (void)
18890 {
18891 return xmalloc (sizeof (rs6000_sched_context_def));
18892 }
18893
18894 /* If CLEAN_P is true then initializes _SC with clean data,
18895 and from the global context otherwise. */
18896 static void
18897 rs6000_init_sched_context (void *_sc, bool clean_p)
18898 {
18899 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
18900
18901 if (clean_p)
18902 {
18903 sc->cached_can_issue_more = 0;
18904 sc->last_scheduled_insn = NULL;
18905 sc->load_store_pendulum = 0;
18906 sc->divide_cnt = 0;
18907 sc->vec_pairing = 0;
18908 }
18909 else
18910 {
18911 sc->cached_can_issue_more = cached_can_issue_more;
18912 sc->last_scheduled_insn = last_scheduled_insn;
18913 sc->load_store_pendulum = load_store_pendulum;
18914 sc->divide_cnt = divide_cnt;
18915 sc->vec_pairing = vec_pairing;
18916 }
18917 }
18918
18919 /* Sets the global scheduling context to the one pointed to by _SC. */
18920 static void
18921 rs6000_set_sched_context (void *_sc)
18922 {
18923 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
18924
18925 gcc_assert (sc != NULL);
18926
18927 cached_can_issue_more = sc->cached_can_issue_more;
18928 last_scheduled_insn = sc->last_scheduled_insn;
18929 load_store_pendulum = sc->load_store_pendulum;
18930 divide_cnt = sc->divide_cnt;
18931 vec_pairing = sc->vec_pairing;
18932 }
18933
18934 /* Free _SC. */
18935 static void
18936 rs6000_free_sched_context (void *_sc)
18937 {
18938 gcc_assert (_sc != NULL);
18939
18940 free (_sc);
18941 }
18942
18943 static bool
18944 rs6000_sched_can_speculate_insn (rtx_insn *insn)
18945 {
18946 switch (get_attr_type (insn))
18947 {
18948 case TYPE_DIV:
18949 case TYPE_SDIV:
18950 case TYPE_DDIV:
18951 case TYPE_VECDIV:
18952 case TYPE_SSQRT:
18953 case TYPE_DSQRT:
18954 return false;
18955
18956 default:
18957 return true;
18958 }
18959 }
18960 \f
18961 /* Length in units of the trampoline for entering a nested function. */
18962
18963 int
18964 rs6000_trampoline_size (void)
18965 {
18966 int ret = 0;
18967
18968 switch (DEFAULT_ABI)
18969 {
18970 default:
18971 gcc_unreachable ();
18972
18973 case ABI_AIX:
18974 ret = (TARGET_32BIT) ? 12 : 24;
18975 break;
18976
18977 case ABI_ELFv2:
18978 gcc_assert (!TARGET_32BIT);
18979 ret = 32;
18980 break;
18981
18982 case ABI_DARWIN:
18983 case ABI_V4:
18984 ret = (TARGET_32BIT) ? 40 : 48;
18985 break;
18986 }
18987
18988 return ret;
18989 }
18990
18991 /* Emit RTL insns to initialize the variable parts of a trampoline.
18992 FNADDR is an RTX for the address of the function's pure code.
18993 CXT is an RTX for the static chain value for the function. */
18994
18995 static void
18996 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
18997 {
18998 int regsize = (TARGET_32BIT) ? 4 : 8;
18999 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
19000 rtx ctx_reg = force_reg (Pmode, cxt);
19001 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
19002
19003 switch (DEFAULT_ABI)
19004 {
19005 default:
19006 gcc_unreachable ();
19007
19008 /* Under AIX, just build the 3 word function descriptor */
19009 case ABI_AIX:
19010 {
19011 rtx fnmem, fn_reg, toc_reg;
19012
19013 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
19014 error ("you cannot take the address of a nested function if you use "
19015 "the %qs option", "-mno-pointers-to-nested-functions");
19016
19017 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
19018 fn_reg = gen_reg_rtx (Pmode);
19019 toc_reg = gen_reg_rtx (Pmode);
19020
19021 /* Macro to shorten the code expansions below. */
19022 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
19023
19024 m_tramp = replace_equiv_address (m_tramp, addr);
19025
19026 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
19027 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
19028 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
19029 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
19030 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
19031
19032 # undef MEM_PLUS
19033 }
19034 break;
19035
19036 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
19037 case ABI_ELFv2:
19038 case ABI_DARWIN:
19039 case ABI_V4:
19040 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
19041 LCT_NORMAL, VOIDmode,
19042 addr, Pmode,
19043 GEN_INT (rs6000_trampoline_size ()), SImode,
19044 fnaddr, Pmode,
19045 ctx_reg, Pmode);
19046 break;
19047 }
19048 }
19049
19050 \f
19051 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
19052 identifier as an argument, so the front end shouldn't look it up. */
19053
19054 static bool
19055 rs6000_attribute_takes_identifier_p (const_tree attr_id)
19056 {
19057 return is_attribute_p ("altivec", attr_id);
19058 }
19059
19060 /* Handle the "altivec" attribute. The attribute may have
19061 arguments as follows:
19062
19063 __attribute__((altivec(vector__)))
19064 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
19065 __attribute__((altivec(bool__))) (always followed by 'unsigned')
19066
19067 and may appear more than once (e.g., 'vector bool char') in a
19068 given declaration. */
19069
19070 static tree
19071 rs6000_handle_altivec_attribute (tree *node,
19072 tree name ATTRIBUTE_UNUSED,
19073 tree args,
19074 int flags ATTRIBUTE_UNUSED,
19075 bool *no_add_attrs)
19076 {
19077 tree type = *node, result = NULL_TREE;
19078 machine_mode mode;
19079 int unsigned_p;
19080 char altivec_type
19081 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
19082 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
19083 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
19084 : '?');
19085
19086 while (POINTER_TYPE_P (type)
19087 || TREE_CODE (type) == FUNCTION_TYPE
19088 || TREE_CODE (type) == METHOD_TYPE
19089 || TREE_CODE (type) == ARRAY_TYPE)
19090 type = TREE_TYPE (type);
19091
19092 mode = TYPE_MODE (type);
19093
19094 /* Check for invalid AltiVec type qualifiers. */
19095 if (type == long_double_type_node)
19096 error ("use of %<long double%> in AltiVec types is invalid");
19097 else if (type == boolean_type_node)
19098 error ("use of boolean types in AltiVec types is invalid");
19099 else if (TREE_CODE (type) == COMPLEX_TYPE)
19100 error ("use of %<complex%> in AltiVec types is invalid");
19101 else if (DECIMAL_FLOAT_MODE_P (mode))
19102 error ("use of decimal floating point types in AltiVec types is invalid");
19103 else if (!TARGET_VSX)
19104 {
19105 if (type == long_unsigned_type_node || type == long_integer_type_node)
19106 {
19107 if (TARGET_64BIT)
19108 error ("use of %<long%> in AltiVec types is invalid for "
19109 "64-bit code without %qs", "-mvsx");
19110 else if (rs6000_warn_altivec_long)
19111 warning (0, "use of %<long%> in AltiVec types is deprecated; "
19112 "use %<int%>");
19113 }
19114 else if (type == long_long_unsigned_type_node
19115 || type == long_long_integer_type_node)
19116 error ("use of %<long long%> in AltiVec types is invalid without %qs",
19117 "-mvsx");
19118 else if (type == double_type_node)
19119 error ("use of %<double%> in AltiVec types is invalid without %qs",
19120 "-mvsx");
19121 }
19122
19123 switch (altivec_type)
19124 {
19125 case 'v':
19126 unsigned_p = TYPE_UNSIGNED (type);
19127 switch (mode)
19128 {
19129 case E_TImode:
19130 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
19131 break;
19132 case E_DImode:
19133 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
19134 break;
19135 case E_SImode:
19136 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
19137 break;
19138 case E_HImode:
19139 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
19140 break;
19141 case E_QImode:
19142 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
19143 break;
19144 case E_SFmode: result = V4SF_type_node; break;
19145 case E_DFmode: result = V2DF_type_node; break;
19146 /* If the user says 'vector int bool', we may be handed the 'bool'
19147 attribute _before_ the 'vector' attribute, and so select the
19148 proper type in the 'b' case below. */
19149 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
19150 case E_V2DImode: case E_V2DFmode:
19151 result = type;
19152 default: break;
19153 }
19154 break;
19155 case 'b':
19156 switch (mode)
19157 {
19158 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
19159 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
19160 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
19161 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
19162 default: break;
19163 }
19164 break;
19165 case 'p':
19166 switch (mode)
19167 {
19168 case E_V8HImode: result = pixel_V8HI_type_node;
19169 default: break;
19170 }
19171 default: break;
19172 }
19173
19174 /* Propagate qualifiers attached to the element type
19175 onto the vector type. */
19176 if (result && result != type && TYPE_QUALS (type))
19177 result = build_qualified_type (result, TYPE_QUALS (type));
19178
19179 *no_add_attrs = true; /* No need to hang on to the attribute. */
19180
19181 if (result)
19182 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
19183
19184 return NULL_TREE;
19185 }
19186
19187 /* AltiVec defines five built-in scalar types that serve as vector
19188 elements; we must teach the compiler how to mangle them. The 128-bit
19189 floating point mangling is target-specific as well. */
19190
19191 static const char *
19192 rs6000_mangle_type (const_tree type)
19193 {
19194 type = TYPE_MAIN_VARIANT (type);
19195
19196 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
19197 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
19198 return NULL;
19199
19200 if (type == bool_char_type_node) return "U6__boolc";
19201 if (type == bool_short_type_node) return "U6__bools";
19202 if (type == pixel_type_node) return "u7__pixel";
19203 if (type == bool_int_type_node) return "U6__booli";
19204 if (type == bool_long_long_type_node) return "U6__boolx";
19205
19206 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
19207 return "g";
19208 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
19209 return ieee128_mangling_gcc_8_1 ? "U10__float128" : "u9__ieee128";
19210
19211 /* For all other types, use the default mangling. */
19212 return NULL;
19213 }
19214
19215 /* Handle a "longcall" or "shortcall" attribute; arguments as in
19216 struct attribute_spec.handler. */
19217
19218 static tree
19219 rs6000_handle_longcall_attribute (tree *node, tree name,
19220 tree args ATTRIBUTE_UNUSED,
19221 int flags ATTRIBUTE_UNUSED,
19222 bool *no_add_attrs)
19223 {
19224 if (TREE_CODE (*node) != FUNCTION_TYPE
19225 && TREE_CODE (*node) != FIELD_DECL
19226 && TREE_CODE (*node) != TYPE_DECL)
19227 {
19228 warning (OPT_Wattributes, "%qE attribute only applies to functions",
19229 name);
19230 *no_add_attrs = true;
19231 }
19232
19233 return NULL_TREE;
19234 }
19235
19236 /* Set longcall attributes on all functions declared when
19237 rs6000_default_long_calls is true. */
19238 static void
19239 rs6000_set_default_type_attributes (tree type)
19240 {
19241 if (rs6000_default_long_calls
19242 && (TREE_CODE (type) == FUNCTION_TYPE
19243 || TREE_CODE (type) == METHOD_TYPE))
19244 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
19245 NULL_TREE,
19246 TYPE_ATTRIBUTES (type));
19247
19248 #if TARGET_MACHO
19249 darwin_set_default_type_attributes (type);
19250 #endif
19251 }
19252
19253 /* Return a reference suitable for calling a function with the
19254 longcall attribute. */
19255
19256 static rtx
19257 rs6000_longcall_ref (rtx call_ref, rtx arg)
19258 {
19259 /* System V adds '.' to the internal name, so skip them. */
19260 const char *call_name = XSTR (call_ref, 0);
19261 if (*call_name == '.')
19262 {
19263 while (*call_name == '.')
19264 call_name++;
19265
19266 tree node = get_identifier (call_name);
19267 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
19268 }
19269
19270 if (TARGET_PLTSEQ)
19271 {
19272 rtx base = const0_rtx;
19273 int regno = 12;
19274 if (rs6000_pcrel_p (cfun))
19275 {
19276 rtx reg = gen_rtx_REG (Pmode, regno);
19277 rtx u = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
19278 UNSPEC_PLT_PCREL);
19279 emit_insn (gen_rtx_SET (reg, u));
19280 return reg;
19281 }
19282
19283 if (DEFAULT_ABI == ABI_ELFv2)
19284 base = gen_rtx_REG (Pmode, TOC_REGISTER);
19285 else
19286 {
19287 if (flag_pic)
19288 base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
19289 regno = 11;
19290 }
19291 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
19292 may be used by a function global entry point. For SysV4, r11
19293 is used by __glink_PLTresolve lazy resolver entry. */
19294 rtx reg = gen_rtx_REG (Pmode, regno);
19295 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
19296 UNSPEC_PLT16_HA);
19297 rtx lo = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, reg, call_ref, arg),
19298 UNSPEC_PLT16_LO);
19299 emit_insn (gen_rtx_SET (reg, hi));
19300 emit_insn (gen_rtx_SET (reg, lo));
19301 return reg;
19302 }
19303
19304 return force_reg (Pmode, call_ref);
19305 }
19306 \f
19307 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
19308 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
19309 #endif
19310
19311 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19312 struct attribute_spec.handler. */
19313 static tree
19314 rs6000_handle_struct_attribute (tree *node, tree name,
19315 tree args ATTRIBUTE_UNUSED,
19316 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
19317 {
19318 tree *type = NULL;
19319 if (DECL_P (*node))
19320 {
19321 if (TREE_CODE (*node) == TYPE_DECL)
19322 type = &TREE_TYPE (*node);
19323 }
19324 else
19325 type = node;
19326
19327 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
19328 || TREE_CODE (*type) == UNION_TYPE)))
19329 {
19330 warning (OPT_Wattributes, "%qE attribute ignored", name);
19331 *no_add_attrs = true;
19332 }
19333
19334 else if ((is_attribute_p ("ms_struct", name)
19335 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
19336 || ((is_attribute_p ("gcc_struct", name)
19337 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
19338 {
19339 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
19340 name);
19341 *no_add_attrs = true;
19342 }
19343
19344 return NULL_TREE;
19345 }
19346
19347 static bool
19348 rs6000_ms_bitfield_layout_p (const_tree record_type)
19349 {
19350 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
19351 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
19352 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
19353 }
19354 \f
19355 #ifdef USING_ELFOS_H
19356
19357 /* A get_unnamed_section callback, used for switching to toc_section. */
19358
19359 static void
19360 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
19361 {
19362 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19363 && TARGET_MINIMAL_TOC)
19364 {
19365 if (!toc_initialized)
19366 {
19367 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
19368 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19369 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
19370 fprintf (asm_out_file, "\t.tc ");
19371 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
19372 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19373 fprintf (asm_out_file, "\n");
19374
19375 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19376 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19377 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19378 fprintf (asm_out_file, " = .+32768\n");
19379 toc_initialized = 1;
19380 }
19381 else
19382 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19383 }
19384 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19385 {
19386 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
19387 if (!toc_initialized)
19388 {
19389 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19390 toc_initialized = 1;
19391 }
19392 }
19393 else
19394 {
19395 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19396 if (!toc_initialized)
19397 {
19398 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19399 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19400 fprintf (asm_out_file, " = .+32768\n");
19401 toc_initialized = 1;
19402 }
19403 }
19404 }
19405
19406 /* Implement TARGET_ASM_INIT_SECTIONS. */
19407
19408 static void
19409 rs6000_elf_asm_init_sections (void)
19410 {
19411 toc_section
19412 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
19413
19414 sdata2_section
19415 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
19416 SDATA2_SECTION_ASM_OP);
19417 }
19418
19419 /* Implement TARGET_SELECT_RTX_SECTION. */
19420
19421 static section *
19422 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
19423 unsigned HOST_WIDE_INT align)
19424 {
19425 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
19426 return toc_section;
19427 else
19428 return default_elf_select_rtx_section (mode, x, align);
19429 }
19430 \f
19431 /* For a SYMBOL_REF, set generic flags and then perform some
19432 target-specific processing.
19433
19434 When the AIX ABI is requested on a non-AIX system, replace the
19435 function name with the real name (with a leading .) rather than the
19436 function descriptor name. This saves a lot of overriding code to
19437 read the prefixes. */
19438
19439 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
19440 static void
19441 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
19442 {
19443 default_encode_section_info (decl, rtl, first);
19444
19445 if (first
19446 && TREE_CODE (decl) == FUNCTION_DECL
19447 && !TARGET_AIX
19448 && DEFAULT_ABI == ABI_AIX)
19449 {
19450 rtx sym_ref = XEXP (rtl, 0);
19451 size_t len = strlen (XSTR (sym_ref, 0));
19452 char *str = XALLOCAVEC (char, len + 2);
19453 str[0] = '.';
19454 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
19455 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
19456 }
19457 }
19458
19459 static inline bool
19460 compare_section_name (const char *section, const char *templ)
19461 {
19462 int len;
19463
19464 len = strlen (templ);
19465 return (strncmp (section, templ, len) == 0
19466 && (section[len] == 0 || section[len] == '.'));
19467 }
19468
19469 bool
19470 rs6000_elf_in_small_data_p (const_tree decl)
19471 {
19472 if (rs6000_sdata == SDATA_NONE)
19473 return false;
19474
19475 /* We want to merge strings, so we never consider them small data. */
19476 if (TREE_CODE (decl) == STRING_CST)
19477 return false;
19478
19479 /* Functions are never in the small data area. */
19480 if (TREE_CODE (decl) == FUNCTION_DECL)
19481 return false;
19482
19483 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
19484 {
19485 const char *section = DECL_SECTION_NAME (decl);
19486 if (compare_section_name (section, ".sdata")
19487 || compare_section_name (section, ".sdata2")
19488 || compare_section_name (section, ".gnu.linkonce.s")
19489 || compare_section_name (section, ".sbss")
19490 || compare_section_name (section, ".sbss2")
19491 || compare_section_name (section, ".gnu.linkonce.sb")
19492 || strcmp (section, ".PPC.EMB.sdata0") == 0
19493 || strcmp (section, ".PPC.EMB.sbss0") == 0)
19494 return true;
19495 }
19496 else
19497 {
19498 /* If we are told not to put readonly data in sdata, then don't. */
19499 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
19500 && !rs6000_readonly_in_sdata)
19501 return false;
19502
19503 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
19504
19505 if (size > 0
19506 && size <= g_switch_value
19507 /* If it's not public, and we're not going to reference it there,
19508 there's no need to put it in the small data section. */
19509 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
19510 return true;
19511 }
19512
19513 return false;
19514 }
19515
19516 #endif /* USING_ELFOS_H */
19517 \f
19518 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
19519
19520 static bool
19521 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
19522 {
19523 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
19524 }
19525
19526 /* Do not place thread-local symbols refs in the object blocks. */
19527
19528 static bool
19529 rs6000_use_blocks_for_decl_p (const_tree decl)
19530 {
19531 return !DECL_THREAD_LOCAL_P (decl);
19532 }
19533 \f
19534 /* Return a REG that occurs in ADDR with coefficient 1.
19535 ADDR can be effectively incremented by incrementing REG.
19536
19537 r0 is special and we must not select it as an address
19538 register by this routine since our caller will try to
19539 increment the returned register via an "la" instruction. */
19540
19541 rtx
19542 find_addr_reg (rtx addr)
19543 {
19544 while (GET_CODE (addr) == PLUS)
19545 {
19546 if (REG_P (XEXP (addr, 0))
19547 && REGNO (XEXP (addr, 0)) != 0)
19548 addr = XEXP (addr, 0);
19549 else if (REG_P (XEXP (addr, 1))
19550 && REGNO (XEXP (addr, 1)) != 0)
19551 addr = XEXP (addr, 1);
19552 else if (CONSTANT_P (XEXP (addr, 0)))
19553 addr = XEXP (addr, 1);
19554 else if (CONSTANT_P (XEXP (addr, 1)))
19555 addr = XEXP (addr, 0);
19556 else
19557 gcc_unreachable ();
19558 }
19559 gcc_assert (REG_P (addr) && REGNO (addr) != 0);
19560 return addr;
19561 }
19562
19563 void
19564 rs6000_fatal_bad_address (rtx op)
19565 {
19566 fatal_insn ("bad address", op);
19567 }
19568
19569 #if TARGET_MACHO
19570
19571 vec<branch_island, va_gc> *branch_islands;
19572
19573 /* Remember to generate a branch island for far calls to the given
19574 function. */
19575
19576 static void
19577 add_compiler_branch_island (tree label_name, tree function_name,
19578 int line_number)
19579 {
19580 branch_island bi = {function_name, label_name, line_number};
19581 vec_safe_push (branch_islands, bi);
19582 }
19583
19584 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
19585 already there or not. */
19586
19587 static int
19588 no_previous_def (tree function_name)
19589 {
19590 branch_island *bi;
19591 unsigned ix;
19592
19593 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
19594 if (function_name == bi->function_name)
19595 return 0;
19596 return 1;
19597 }
19598
19599 /* GET_PREV_LABEL gets the label name from the previous definition of
19600 the function. */
19601
19602 static tree
19603 get_prev_label (tree function_name)
19604 {
19605 branch_island *bi;
19606 unsigned ix;
19607
19608 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
19609 if (function_name == bi->function_name)
19610 return bi->label_name;
19611 return NULL_TREE;
19612 }
19613
19614 /* Generate external symbol indirection stubs (PIC and non-PIC). */
19615
19616 void
19617 machopic_output_stub (FILE *file, const char *symb, const char *stub)
19618 {
19619 unsigned int length;
19620 char *symbol_name, *lazy_ptr_name;
19621 char *local_label_0;
19622 static unsigned label = 0;
19623
19624 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19625 symb = (*targetm.strip_name_encoding) (symb);
19626
19627 length = strlen (symb);
19628 symbol_name = XALLOCAVEC (char, length + 32);
19629 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19630
19631 lazy_ptr_name = XALLOCAVEC (char, length + 32);
19632 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
19633
19634 if (MACHOPIC_PURE)
19635 {
19636 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
19637 fprintf (file, "\t.align 5\n");
19638
19639 fprintf (file, "%s:\n", stub);
19640 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19641
19642 label++;
19643 local_label_0 = XALLOCAVEC (char, 16);
19644 sprintf (local_label_0, "L%u$spb", label);
19645
19646 fprintf (file, "\tmflr r0\n");
19647 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
19648 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
19649 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
19650 lazy_ptr_name, local_label_0);
19651 fprintf (file, "\tmtlr r0\n");
19652 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
19653 (TARGET_64BIT ? "ldu" : "lwzu"),
19654 lazy_ptr_name, local_label_0);
19655 fprintf (file, "\tmtctr r12\n");
19656 fprintf (file, "\tbctr\n");
19657 }
19658 else /* mdynamic-no-pic or mkernel. */
19659 {
19660 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
19661 fprintf (file, "\t.align 4\n");
19662
19663 fprintf (file, "%s:\n", stub);
19664 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19665
19666 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
19667 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
19668 (TARGET_64BIT ? "ldu" : "lwzu"),
19669 lazy_ptr_name);
19670 fprintf (file, "\tmtctr r12\n");
19671 fprintf (file, "\tbctr\n");
19672 }
19673
19674 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
19675 fprintf (file, "%s:\n", lazy_ptr_name);
19676 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19677 fprintf (file, "%sdyld_stub_binding_helper\n",
19678 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
19679 }
19680
19681 /* Legitimize PIC addresses. If the address is already
19682 position-independent, we return ORIG. Newly generated
19683 position-independent addresses go into a reg. This is REG if non
19684 zero, otherwise we allocate register(s) as necessary. */
19685
19686 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
19687
19688 rtx
19689 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
19690 rtx reg)
19691 {
19692 rtx base, offset;
19693
19694 if (reg == NULL && !reload_completed)
19695 reg = gen_reg_rtx (Pmode);
19696
19697 if (GET_CODE (orig) == CONST)
19698 {
19699 rtx reg_temp;
19700
19701 if (GET_CODE (XEXP (orig, 0)) == PLUS
19702 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
19703 return orig;
19704
19705 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
19706
19707 /* Use a different reg for the intermediate value, as
19708 it will be marked UNCHANGING. */
19709 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
19710 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
19711 Pmode, reg_temp);
19712 offset =
19713 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
19714 Pmode, reg);
19715
19716 if (CONST_INT_P (offset))
19717 {
19718 if (SMALL_INT (offset))
19719 return plus_constant (Pmode, base, INTVAL (offset));
19720 else if (!reload_completed)
19721 offset = force_reg (Pmode, offset);
19722 else
19723 {
19724 rtx mem = force_const_mem (Pmode, orig);
19725 return machopic_legitimize_pic_address (mem, Pmode, reg);
19726 }
19727 }
19728 return gen_rtx_PLUS (Pmode, base, offset);
19729 }
19730
19731 /* Fall back on generic machopic code. */
19732 return machopic_legitimize_pic_address (orig, mode, reg);
19733 }
19734
19735 /* Output a .machine directive for the Darwin assembler, and call
19736 the generic start_file routine. */
19737
19738 static void
19739 rs6000_darwin_file_start (void)
19740 {
19741 static const struct
19742 {
19743 const char *arg;
19744 const char *name;
19745 HOST_WIDE_INT if_set;
19746 } mapping[] = {
19747 { "ppc64", "ppc64", MASK_64BIT },
19748 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
19749 { "power4", "ppc970", 0 },
19750 { "G5", "ppc970", 0 },
19751 { "7450", "ppc7450", 0 },
19752 { "7400", "ppc7400", MASK_ALTIVEC },
19753 { "G4", "ppc7400", 0 },
19754 { "750", "ppc750", 0 },
19755 { "740", "ppc750", 0 },
19756 { "G3", "ppc750", 0 },
19757 { "604e", "ppc604e", 0 },
19758 { "604", "ppc604", 0 },
19759 { "603e", "ppc603", 0 },
19760 { "603", "ppc603", 0 },
19761 { "601", "ppc601", 0 },
19762 { NULL, "ppc", 0 } };
19763 const char *cpu_id = "";
19764 size_t i;
19765
19766 rs6000_file_start ();
19767 darwin_file_start ();
19768
19769 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
19770
19771 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
19772 cpu_id = rs6000_default_cpu;
19773
19774 if (global_options_set.x_rs6000_cpu_index)
19775 cpu_id = processor_target_table[rs6000_cpu_index].name;
19776
19777 /* Look through the mapping array. Pick the first name that either
19778 matches the argument, has a bit set in IF_SET that is also set
19779 in the target flags, or has a NULL name. */
19780
19781 i = 0;
19782 while (mapping[i].arg != NULL
19783 && strcmp (mapping[i].arg, cpu_id) != 0
19784 && (mapping[i].if_set & rs6000_isa_flags) == 0)
19785 i++;
19786
19787 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
19788 }
19789
19790 #endif /* TARGET_MACHO */
19791
19792 #if TARGET_ELF
19793 static int
19794 rs6000_elf_reloc_rw_mask (void)
19795 {
19796 if (flag_pic)
19797 return 3;
19798 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19799 return 2;
19800 else
19801 return 0;
19802 }
19803
19804 /* Record an element in the table of global constructors. SYMBOL is
19805 a SYMBOL_REF of the function to be called; PRIORITY is a number
19806 between 0 and MAX_INIT_PRIORITY.
19807
19808 This differs from default_named_section_asm_out_constructor in
19809 that we have special handling for -mrelocatable. */
19810
19811 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
19812 static void
19813 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
19814 {
19815 const char *section = ".ctors";
19816 char buf[18];
19817
19818 if (priority != DEFAULT_INIT_PRIORITY)
19819 {
19820 sprintf (buf, ".ctors.%.5u",
19821 /* Invert the numbering so the linker puts us in the proper
19822 order; constructors are run from right to left, and the
19823 linker sorts in increasing order. */
19824 MAX_INIT_PRIORITY - priority);
19825 section = buf;
19826 }
19827
19828 switch_to_section (get_section (section, SECTION_WRITE, NULL));
19829 assemble_align (POINTER_SIZE);
19830
19831 if (DEFAULT_ABI == ABI_V4
19832 && (TARGET_RELOCATABLE || flag_pic > 1))
19833 {
19834 fputs ("\t.long (", asm_out_file);
19835 output_addr_const (asm_out_file, symbol);
19836 fputs (")@fixup\n", asm_out_file);
19837 }
19838 else
19839 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
19840 }
19841
19842 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
19843 static void
19844 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
19845 {
19846 const char *section = ".dtors";
19847 char buf[18];
19848
19849 if (priority != DEFAULT_INIT_PRIORITY)
19850 {
19851 sprintf (buf, ".dtors.%.5u",
19852 /* Invert the numbering so the linker puts us in the proper
19853 order; constructors are run from right to left, and the
19854 linker sorts in increasing order. */
19855 MAX_INIT_PRIORITY - priority);
19856 section = buf;
19857 }
19858
19859 switch_to_section (get_section (section, SECTION_WRITE, NULL));
19860 assemble_align (POINTER_SIZE);
19861
19862 if (DEFAULT_ABI == ABI_V4
19863 && (TARGET_RELOCATABLE || flag_pic > 1))
19864 {
19865 fputs ("\t.long (", asm_out_file);
19866 output_addr_const (asm_out_file, symbol);
19867 fputs (")@fixup\n", asm_out_file);
19868 }
19869 else
19870 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
19871 }
19872
19873 void
19874 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
19875 {
19876 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
19877 {
19878 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
19879 ASM_OUTPUT_LABEL (file, name);
19880 fputs (DOUBLE_INT_ASM_OP, file);
19881 rs6000_output_function_entry (file, name);
19882 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
19883 if (DOT_SYMBOLS)
19884 {
19885 fputs ("\t.size\t", file);
19886 assemble_name (file, name);
19887 fputs (",24\n\t.type\t.", file);
19888 assemble_name (file, name);
19889 fputs (",@function\n", file);
19890 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
19891 {
19892 fputs ("\t.globl\t.", file);
19893 assemble_name (file, name);
19894 putc ('\n', file);
19895 }
19896 }
19897 else
19898 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19899 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19900 rs6000_output_function_entry (file, name);
19901 fputs (":\n", file);
19902 return;
19903 }
19904
19905 int uses_toc;
19906 if (DEFAULT_ABI == ABI_V4
19907 && (TARGET_RELOCATABLE || flag_pic > 1)
19908 && !TARGET_SECURE_PLT
19909 && (!constant_pool_empty_p () || crtl->profile)
19910 && (uses_toc = uses_TOC ()))
19911 {
19912 char buf[256];
19913
19914 if (uses_toc == 2)
19915 switch_to_other_text_partition ();
19916 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
19917
19918 fprintf (file, "\t.long ");
19919 assemble_name (file, toc_label_name);
19920 need_toc_init = 1;
19921 putc ('-', file);
19922 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
19923 assemble_name (file, buf);
19924 putc ('\n', file);
19925 if (uses_toc == 2)
19926 switch_to_other_text_partition ();
19927 }
19928
19929 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19930 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19931
19932 if (TARGET_CMODEL == CMODEL_LARGE
19933 && rs6000_global_entry_point_prologue_needed_p ())
19934 {
19935 char buf[256];
19936
19937 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
19938
19939 fprintf (file, "\t.quad .TOC.-");
19940 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
19941 assemble_name (file, buf);
19942 putc ('\n', file);
19943 }
19944
19945 if (DEFAULT_ABI == ABI_AIX)
19946 {
19947 const char *desc_name, *orig_name;
19948
19949 orig_name = (*targetm.strip_name_encoding) (name);
19950 desc_name = orig_name;
19951 while (*desc_name == '.')
19952 desc_name++;
19953
19954 if (TREE_PUBLIC (decl))
19955 fprintf (file, "\t.globl %s\n", desc_name);
19956
19957 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19958 fprintf (file, "%s:\n", desc_name);
19959 fprintf (file, "\t.long %s\n", orig_name);
19960 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
19961 fputs ("\t.long 0\n", file);
19962 fprintf (file, "\t.previous\n");
19963 }
19964 ASM_OUTPUT_LABEL (file, name);
19965 }
19966
19967 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
19968 static void
19969 rs6000_elf_file_end (void)
19970 {
19971 #ifdef HAVE_AS_GNU_ATTRIBUTE
19972 /* ??? The value emitted depends on options active at file end.
19973 Assume anyone using #pragma or attributes that might change
19974 options knows what they are doing. */
19975 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
19976 && rs6000_passes_float)
19977 {
19978 int fp;
19979
19980 if (TARGET_HARD_FLOAT)
19981 fp = 1;
19982 else
19983 fp = 2;
19984 if (rs6000_passes_long_double)
19985 {
19986 if (!TARGET_LONG_DOUBLE_128)
19987 fp |= 2 * 4;
19988 else if (TARGET_IEEEQUAD)
19989 fp |= 3 * 4;
19990 else
19991 fp |= 1 * 4;
19992 }
19993 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
19994 }
19995 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
19996 {
19997 if (rs6000_passes_vector)
19998 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
19999 (TARGET_ALTIVEC_ABI ? 2 : 1));
20000 if (rs6000_returns_struct)
20001 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
20002 aix_struct_return ? 2 : 1);
20003 }
20004 #endif
20005 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
20006 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
20007 file_end_indicate_exec_stack ();
20008 #endif
20009
20010 if (flag_split_stack)
20011 file_end_indicate_split_stack ();
20012
20013 if (cpu_builtin_p)
20014 {
20015 /* We have expanded a CPU builtin, so we need to emit a reference to
20016 the special symbol that LIBC uses to declare it supports the
20017 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
20018 switch_to_section (data_section);
20019 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
20020 fprintf (asm_out_file, "\t%s %s\n",
20021 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
20022 }
20023 }
20024 #endif
20025
20026 #if TARGET_XCOFF
20027
20028 #ifndef HAVE_XCOFF_DWARF_EXTRAS
20029 #define HAVE_XCOFF_DWARF_EXTRAS 0
20030 #endif
20031
20032 static enum unwind_info_type
20033 rs6000_xcoff_debug_unwind_info (void)
20034 {
20035 return UI_NONE;
20036 }
20037
20038 static void
20039 rs6000_xcoff_asm_output_anchor (rtx symbol)
20040 {
20041 char buffer[100];
20042
20043 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
20044 SYMBOL_REF_BLOCK_OFFSET (symbol));
20045 fprintf (asm_out_file, "%s", SET_ASM_OP);
20046 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
20047 fprintf (asm_out_file, ",");
20048 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
20049 fprintf (asm_out_file, "\n");
20050 }
20051
20052 static void
20053 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
20054 {
20055 fputs (GLOBAL_ASM_OP, stream);
20056 RS6000_OUTPUT_BASENAME (stream, name);
20057 putc ('\n', stream);
20058 }
20059
20060 /* A get_unnamed_decl callback, used for read-only sections. PTR
20061 points to the section string variable. */
20062
20063 static void
20064 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
20065 {
20066 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
20067 *(const char *const *) directive,
20068 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20069 }
20070
20071 /* Likewise for read-write sections. */
20072
20073 static void
20074 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
20075 {
20076 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
20077 *(const char *const *) directive,
20078 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20079 }
20080
20081 static void
20082 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
20083 {
20084 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
20085 *(const char *const *) directive,
20086 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20087 }
20088
20089 /* A get_unnamed_section callback, used for switching to toc_section. */
20090
20091 static void
20092 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
20093 {
20094 if (TARGET_MINIMAL_TOC)
20095 {
20096 /* toc_section is always selected at least once from
20097 rs6000_xcoff_file_start, so this is guaranteed to
20098 always be defined once and only once in each file. */
20099 if (!toc_initialized)
20100 {
20101 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
20102 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
20103 toc_initialized = 1;
20104 }
20105 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
20106 (TARGET_32BIT ? "" : ",3"));
20107 }
20108 else
20109 fputs ("\t.toc\n", asm_out_file);
20110 }
20111
20112 /* Implement TARGET_ASM_INIT_SECTIONS. */
20113
20114 static void
20115 rs6000_xcoff_asm_init_sections (void)
20116 {
20117 read_only_data_section
20118 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
20119 &xcoff_read_only_section_name);
20120
20121 private_data_section
20122 = get_unnamed_section (SECTION_WRITE,
20123 rs6000_xcoff_output_readwrite_section_asm_op,
20124 &xcoff_private_data_section_name);
20125
20126 read_only_private_data_section
20127 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
20128 &xcoff_private_rodata_section_name);
20129
20130 tls_data_section
20131 = get_unnamed_section (SECTION_TLS,
20132 rs6000_xcoff_output_tls_section_asm_op,
20133 &xcoff_tls_data_section_name);
20134
20135 tls_private_data_section
20136 = get_unnamed_section (SECTION_TLS,
20137 rs6000_xcoff_output_tls_section_asm_op,
20138 &xcoff_private_data_section_name);
20139
20140 toc_section
20141 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
20142
20143 readonly_data_section = read_only_data_section;
20144 }
20145
20146 static int
20147 rs6000_xcoff_reloc_rw_mask (void)
20148 {
20149 return 3;
20150 }
20151
20152 static void
20153 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
20154 tree decl ATTRIBUTE_UNUSED)
20155 {
20156 int smclass;
20157 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
20158
20159 if (flags & SECTION_EXCLUDE)
20160 smclass = 4;
20161 else if (flags & SECTION_DEBUG)
20162 {
20163 fprintf (asm_out_file, "\t.dwsect %s\n", name);
20164 return;
20165 }
20166 else if (flags & SECTION_CODE)
20167 smclass = 0;
20168 else if (flags & SECTION_TLS)
20169 smclass = 3;
20170 else if (flags & SECTION_WRITE)
20171 smclass = 2;
20172 else
20173 smclass = 1;
20174
20175 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
20176 (flags & SECTION_CODE) ? "." : "",
20177 name, suffix[smclass], flags & SECTION_ENTSIZE);
20178 }
20179
20180 #define IN_NAMED_SECTION(DECL) \
20181 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
20182 && DECL_SECTION_NAME (DECL) != NULL)
20183
20184 static section *
20185 rs6000_xcoff_select_section (tree decl, int reloc,
20186 unsigned HOST_WIDE_INT align)
20187 {
20188 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
20189 named section. */
20190 if (align > BIGGEST_ALIGNMENT)
20191 {
20192 resolve_unique_section (decl, reloc, true);
20193 if (IN_NAMED_SECTION (decl))
20194 return get_named_section (decl, NULL, reloc);
20195 }
20196
20197 if (decl_readonly_section (decl, reloc))
20198 {
20199 if (TREE_PUBLIC (decl))
20200 return read_only_data_section;
20201 else
20202 return read_only_private_data_section;
20203 }
20204 else
20205 {
20206 #if HAVE_AS_TLS
20207 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
20208 {
20209 if (TREE_PUBLIC (decl))
20210 return tls_data_section;
20211 else if (bss_initializer_p (decl))
20212 {
20213 /* Convert to COMMON to emit in BSS. */
20214 DECL_COMMON (decl) = 1;
20215 return tls_comm_section;
20216 }
20217 else
20218 return tls_private_data_section;
20219 }
20220 else
20221 #endif
20222 if (TREE_PUBLIC (decl))
20223 return data_section;
20224 else
20225 return private_data_section;
20226 }
20227 }
20228
20229 static void
20230 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
20231 {
20232 const char *name;
20233
20234 /* Use select_section for private data and uninitialized data with
20235 alignment <= BIGGEST_ALIGNMENT. */
20236 if (!TREE_PUBLIC (decl)
20237 || DECL_COMMON (decl)
20238 || (DECL_INITIAL (decl) == NULL_TREE
20239 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
20240 || DECL_INITIAL (decl) == error_mark_node
20241 || (flag_zero_initialized_in_bss
20242 && initializer_zerop (DECL_INITIAL (decl))))
20243 return;
20244
20245 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
20246 name = (*targetm.strip_name_encoding) (name);
20247 set_decl_section_name (decl, name);
20248 }
20249
20250 /* Select section for constant in constant pool.
20251
20252 On RS/6000, all constants are in the private read-only data area.
20253 However, if this is being placed in the TOC it must be output as a
20254 toc entry. */
20255
20256 static section *
20257 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
20258 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
20259 {
20260 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
20261 return toc_section;
20262 else
20263 return read_only_private_data_section;
20264 }
20265
20266 /* Remove any trailing [DS] or the like from the symbol name. */
20267
20268 static const char *
20269 rs6000_xcoff_strip_name_encoding (const char *name)
20270 {
20271 size_t len;
20272 if (*name == '*')
20273 name++;
20274 len = strlen (name);
20275 if (name[len - 1] == ']')
20276 return ggc_alloc_string (name, len - 4);
20277 else
20278 return name;
20279 }
20280
20281 /* Section attributes. AIX is always PIC. */
20282
20283 static unsigned int
20284 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
20285 {
20286 unsigned int align;
20287 unsigned int flags = default_section_type_flags (decl, name, reloc);
20288
20289 /* Align to at least UNIT size. */
20290 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
20291 align = MIN_UNITS_PER_WORD;
20292 else
20293 /* Increase alignment of large objects if not already stricter. */
20294 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
20295 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
20296 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
20297
20298 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
20299 }
20300
20301 /* Output at beginning of assembler file.
20302
20303 Initialize the section names for the RS/6000 at this point.
20304
20305 Specify filename, including full path, to assembler.
20306
20307 We want to go into the TOC section so at least one .toc will be emitted.
20308 Also, in order to output proper .bs/.es pairs, we need at least one static
20309 [RW] section emitted.
20310
20311 Finally, declare mcount when profiling to make the assembler happy. */
20312
20313 static void
20314 rs6000_xcoff_file_start (void)
20315 {
20316 rs6000_gen_section_name (&xcoff_bss_section_name,
20317 main_input_filename, ".bss_");
20318 rs6000_gen_section_name (&xcoff_private_data_section_name,
20319 main_input_filename, ".rw_");
20320 rs6000_gen_section_name (&xcoff_private_rodata_section_name,
20321 main_input_filename, ".rop_");
20322 rs6000_gen_section_name (&xcoff_read_only_section_name,
20323 main_input_filename, ".ro_");
20324 rs6000_gen_section_name (&xcoff_tls_data_section_name,
20325 main_input_filename, ".tls_");
20326 rs6000_gen_section_name (&xcoff_tbss_section_name,
20327 main_input_filename, ".tbss_[UL]");
20328
20329 fputs ("\t.file\t", asm_out_file);
20330 output_quoted_string (asm_out_file, main_input_filename);
20331 fputc ('\n', asm_out_file);
20332 if (write_symbols != NO_DEBUG)
20333 switch_to_section (private_data_section);
20334 switch_to_section (toc_section);
20335 switch_to_section (text_section);
20336 if (profile_flag)
20337 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
20338 rs6000_file_start ();
20339 }
20340
20341 /* Output at end of assembler file.
20342 On the RS/6000, referencing data should automatically pull in text. */
20343
20344 static void
20345 rs6000_xcoff_file_end (void)
20346 {
20347 switch_to_section (text_section);
20348 fputs ("_section_.text:\n", asm_out_file);
20349 switch_to_section (data_section);
20350 fputs (TARGET_32BIT
20351 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
20352 asm_out_file);
20353 }
20354
20355 struct declare_alias_data
20356 {
20357 FILE *file;
20358 bool function_descriptor;
20359 };
20360
20361 /* Declare alias N. A helper function for for_node_and_aliases. */
20362
20363 static bool
20364 rs6000_declare_alias (struct symtab_node *n, void *d)
20365 {
20366 struct declare_alias_data *data = (struct declare_alias_data *)d;
20367 /* Main symbol is output specially, because varasm machinery does part of
20368 the job for us - we do not need to declare .globl/lglobs and such. */
20369 if (!n->alias || n->weakref)
20370 return false;
20371
20372 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
20373 return false;
20374
20375 /* Prevent assemble_alias from trying to use .set pseudo operation
20376 that does not behave as expected by the middle-end. */
20377 TREE_ASM_WRITTEN (n->decl) = true;
20378
20379 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
20380 char *buffer = (char *) alloca (strlen (name) + 2);
20381 char *p;
20382 int dollar_inside = 0;
20383
20384 strcpy (buffer, name);
20385 p = strchr (buffer, '$');
20386 while (p) {
20387 *p = '_';
20388 dollar_inside++;
20389 p = strchr (p + 1, '$');
20390 }
20391 if (TREE_PUBLIC (n->decl))
20392 {
20393 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
20394 {
20395 if (dollar_inside) {
20396 if (data->function_descriptor)
20397 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
20398 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
20399 }
20400 if (data->function_descriptor)
20401 {
20402 fputs ("\t.globl .", data->file);
20403 RS6000_OUTPUT_BASENAME (data->file, buffer);
20404 putc ('\n', data->file);
20405 }
20406 fputs ("\t.globl ", data->file);
20407 RS6000_OUTPUT_BASENAME (data->file, buffer);
20408 putc ('\n', data->file);
20409 }
20410 #ifdef ASM_WEAKEN_DECL
20411 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
20412 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
20413 #endif
20414 }
20415 else
20416 {
20417 if (dollar_inside)
20418 {
20419 if (data->function_descriptor)
20420 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
20421 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
20422 }
20423 if (data->function_descriptor)
20424 {
20425 fputs ("\t.lglobl .", data->file);
20426 RS6000_OUTPUT_BASENAME (data->file, buffer);
20427 putc ('\n', data->file);
20428 }
20429 fputs ("\t.lglobl ", data->file);
20430 RS6000_OUTPUT_BASENAME (data->file, buffer);
20431 putc ('\n', data->file);
20432 }
20433 if (data->function_descriptor)
20434 fputs (".", data->file);
20435 RS6000_OUTPUT_BASENAME (data->file, buffer);
20436 fputs (":\n", data->file);
20437 return false;
20438 }
20439
20440
20441 #ifdef HAVE_GAS_HIDDEN
20442 /* Helper function to calculate visibility of a DECL
20443 and return the value as a const string. */
20444
20445 static const char *
20446 rs6000_xcoff_visibility (tree decl)
20447 {
20448 static const char * const visibility_types[] = {
20449 "", ",protected", ",hidden", ",internal"
20450 };
20451
20452 enum symbol_visibility vis = DECL_VISIBILITY (decl);
20453 return visibility_types[vis];
20454 }
20455 #endif
20456
20457
20458 /* This macro produces the initial definition of a function name.
20459 On the RS/6000, we need to place an extra '.' in the function name and
20460 output the function descriptor.
20461 Dollar signs are converted to underscores.
20462
20463 The csect for the function will have already been created when
20464 text_section was selected. We do have to go back to that csect, however.
20465
20466 The third and fourth parameters to the .function pseudo-op (16 and 044)
20467 are placeholders which no longer have any use.
20468
20469 Because AIX assembler's .set command has unexpected semantics, we output
20470 all aliases as alternative labels in front of the definition. */
20471
20472 void
20473 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
20474 {
20475 char *buffer = (char *) alloca (strlen (name) + 1);
20476 char *p;
20477 int dollar_inside = 0;
20478 struct declare_alias_data data = {file, false};
20479
20480 strcpy (buffer, name);
20481 p = strchr (buffer, '$');
20482 while (p) {
20483 *p = '_';
20484 dollar_inside++;
20485 p = strchr (p + 1, '$');
20486 }
20487 if (TREE_PUBLIC (decl))
20488 {
20489 if (!RS6000_WEAK || !DECL_WEAK (decl))
20490 {
20491 if (dollar_inside) {
20492 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
20493 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
20494 }
20495 fputs ("\t.globl .", file);
20496 RS6000_OUTPUT_BASENAME (file, buffer);
20497 #ifdef HAVE_GAS_HIDDEN
20498 fputs (rs6000_xcoff_visibility (decl), file);
20499 #endif
20500 putc ('\n', file);
20501 }
20502 }
20503 else
20504 {
20505 if (dollar_inside) {
20506 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
20507 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
20508 }
20509 fputs ("\t.lglobl .", file);
20510 RS6000_OUTPUT_BASENAME (file, buffer);
20511 putc ('\n', file);
20512 }
20513 fputs ("\t.csect ", file);
20514 RS6000_OUTPUT_BASENAME (file, buffer);
20515 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
20516 RS6000_OUTPUT_BASENAME (file, buffer);
20517 fputs (":\n", file);
20518 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
20519 &data, true);
20520 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
20521 RS6000_OUTPUT_BASENAME (file, buffer);
20522 fputs (", TOC[tc0], 0\n", file);
20523 in_section = NULL;
20524 switch_to_section (function_section (decl));
20525 putc ('.', file);
20526 RS6000_OUTPUT_BASENAME (file, buffer);
20527 fputs (":\n", file);
20528 data.function_descriptor = true;
20529 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
20530 &data, true);
20531 if (!DECL_IGNORED_P (decl))
20532 {
20533 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
20534 xcoffout_declare_function (file, decl, buffer);
20535 else if (write_symbols == DWARF2_DEBUG)
20536 {
20537 name = (*targetm.strip_name_encoding) (name);
20538 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
20539 }
20540 }
20541 return;
20542 }
20543
20544
20545 /* Output assembly language to globalize a symbol from a DECL,
20546 possibly with visibility. */
20547
20548 void
20549 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
20550 {
20551 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
20552 fputs (GLOBAL_ASM_OP, stream);
20553 RS6000_OUTPUT_BASENAME (stream, name);
20554 #ifdef HAVE_GAS_HIDDEN
20555 fputs (rs6000_xcoff_visibility (decl), stream);
20556 #endif
20557 putc ('\n', stream);
20558 }
20559
20560 /* Output assembly language to define a symbol as COMMON from a DECL,
20561 possibly with visibility. */
20562
20563 void
20564 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
20565 tree decl ATTRIBUTE_UNUSED,
20566 const char *name,
20567 unsigned HOST_WIDE_INT size,
20568 unsigned HOST_WIDE_INT align)
20569 {
20570 unsigned HOST_WIDE_INT align2 = 2;
20571
20572 if (align > 32)
20573 align2 = floor_log2 (align / BITS_PER_UNIT);
20574 else if (size > 4)
20575 align2 = 3;
20576
20577 fputs (COMMON_ASM_OP, stream);
20578 RS6000_OUTPUT_BASENAME (stream, name);
20579
20580 fprintf (stream,
20581 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
20582 size, align2);
20583
20584 #ifdef HAVE_GAS_HIDDEN
20585 if (decl != NULL)
20586 fputs (rs6000_xcoff_visibility (decl), stream);
20587 #endif
20588 putc ('\n', stream);
20589 }
20590
20591 /* This macro produces the initial definition of a object (variable) name.
20592 Because AIX assembler's .set command has unexpected semantics, we output
20593 all aliases as alternative labels in front of the definition. */
20594
20595 void
20596 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
20597 {
20598 struct declare_alias_data data = {file, false};
20599 RS6000_OUTPUT_BASENAME (file, name);
20600 fputs (":\n", file);
20601 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
20602 &data, true);
20603 }
20604
20605 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
20606
20607 void
20608 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
20609 {
20610 fputs (integer_asm_op (size, FALSE), file);
20611 assemble_name (file, label);
20612 fputs ("-$", file);
20613 }
20614
20615 /* Output a symbol offset relative to the dbase for the current object.
20616 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
20617 signed offsets.
20618
20619 __gcc_unwind_dbase is embedded in all executables/libraries through
20620 libgcc/config/rs6000/crtdbase.S. */
20621
20622 void
20623 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
20624 {
20625 fputs (integer_asm_op (size, FALSE), file);
20626 assemble_name (file, label);
20627 fputs("-__gcc_unwind_dbase", file);
20628 }
20629
20630 #ifdef HAVE_AS_TLS
20631 static void
20632 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
20633 {
20634 rtx symbol;
20635 int flags;
20636 const char *symname;
20637
20638 default_encode_section_info (decl, rtl, first);
20639
20640 /* Careful not to prod global register variables. */
20641 if (!MEM_P (rtl))
20642 return;
20643 symbol = XEXP (rtl, 0);
20644 if (!SYMBOL_REF_P (symbol))
20645 return;
20646
20647 flags = SYMBOL_REF_FLAGS (symbol);
20648
20649 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
20650 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
20651
20652 SYMBOL_REF_FLAGS (symbol) = flags;
20653
20654 /* Append mapping class to extern decls. */
20655 symname = XSTR (symbol, 0);
20656 if (decl /* sync condition with assemble_external () */
20657 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
20658 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
20659 || TREE_CODE (decl) == FUNCTION_DECL)
20660 && symname[strlen (symname) - 1] != ']')
20661 {
20662 char *newname = (char *) alloca (strlen (symname) + 5);
20663 strcpy (newname, symname);
20664 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
20665 ? "[DS]" : "[UA]"));
20666 XSTR (symbol, 0) = ggc_strdup (newname);
20667 }
20668 }
20669 #endif /* HAVE_AS_TLS */
20670 #endif /* TARGET_XCOFF */
20671
20672 void
20673 rs6000_asm_weaken_decl (FILE *stream, tree decl,
20674 const char *name, const char *val)
20675 {
20676 fputs ("\t.weak\t", stream);
20677 RS6000_OUTPUT_BASENAME (stream, name);
20678 if (decl && TREE_CODE (decl) == FUNCTION_DECL
20679 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
20680 {
20681 if (TARGET_XCOFF)
20682 fputs ("[DS]", stream);
20683 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
20684 if (TARGET_XCOFF)
20685 fputs (rs6000_xcoff_visibility (decl), stream);
20686 #endif
20687 fputs ("\n\t.weak\t.", stream);
20688 RS6000_OUTPUT_BASENAME (stream, name);
20689 }
20690 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
20691 if (TARGET_XCOFF)
20692 fputs (rs6000_xcoff_visibility (decl), stream);
20693 #endif
20694 fputc ('\n', stream);
20695 if (val)
20696 {
20697 #ifdef ASM_OUTPUT_DEF
20698 ASM_OUTPUT_DEF (stream, name, val);
20699 #endif
20700 if (decl && TREE_CODE (decl) == FUNCTION_DECL
20701 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
20702 {
20703 fputs ("\t.set\t.", stream);
20704 RS6000_OUTPUT_BASENAME (stream, name);
20705 fputs (",.", stream);
20706 RS6000_OUTPUT_BASENAME (stream, val);
20707 fputc ('\n', stream);
20708 }
20709 }
20710 }
20711
20712
20713 /* Return true if INSN should not be copied. */
20714
20715 static bool
20716 rs6000_cannot_copy_insn_p (rtx_insn *insn)
20717 {
20718 return recog_memoized (insn) >= 0
20719 && get_attr_cannot_copy (insn);
20720 }
20721
20722 /* Compute a (partial) cost for rtx X. Return true if the complete
20723 cost has been computed, and false if subexpressions should be
20724 scanned. In either case, *TOTAL contains the cost result. */
20725
20726 static bool
20727 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
20728 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
20729 {
20730 int code = GET_CODE (x);
20731
20732 switch (code)
20733 {
20734 /* On the RS/6000, if it is valid in the insn, it is free. */
20735 case CONST_INT:
20736 if (((outer_code == SET
20737 || outer_code == PLUS
20738 || outer_code == MINUS)
20739 && (satisfies_constraint_I (x)
20740 || satisfies_constraint_L (x)))
20741 || (outer_code == AND
20742 && (satisfies_constraint_K (x)
20743 || (mode == SImode
20744 ? satisfies_constraint_L (x)
20745 : satisfies_constraint_J (x))))
20746 || ((outer_code == IOR || outer_code == XOR)
20747 && (satisfies_constraint_K (x)
20748 || (mode == SImode
20749 ? satisfies_constraint_L (x)
20750 : satisfies_constraint_J (x))))
20751 || outer_code == ASHIFT
20752 || outer_code == ASHIFTRT
20753 || outer_code == LSHIFTRT
20754 || outer_code == ROTATE
20755 || outer_code == ROTATERT
20756 || outer_code == ZERO_EXTRACT
20757 || (outer_code == MULT
20758 && satisfies_constraint_I (x))
20759 || ((outer_code == DIV || outer_code == UDIV
20760 || outer_code == MOD || outer_code == UMOD)
20761 && exact_log2 (INTVAL (x)) >= 0)
20762 || (outer_code == COMPARE
20763 && (satisfies_constraint_I (x)
20764 || satisfies_constraint_K (x)))
20765 || ((outer_code == EQ || outer_code == NE)
20766 && (satisfies_constraint_I (x)
20767 || satisfies_constraint_K (x)
20768 || (mode == SImode
20769 ? satisfies_constraint_L (x)
20770 : satisfies_constraint_J (x))))
20771 || (outer_code == GTU
20772 && satisfies_constraint_I (x))
20773 || (outer_code == LTU
20774 && satisfies_constraint_P (x)))
20775 {
20776 *total = 0;
20777 return true;
20778 }
20779 else if ((outer_code == PLUS
20780 && reg_or_add_cint_operand (x, VOIDmode))
20781 || (outer_code == MINUS
20782 && reg_or_sub_cint_operand (x, VOIDmode))
20783 || ((outer_code == SET
20784 || outer_code == IOR
20785 || outer_code == XOR)
20786 && (INTVAL (x)
20787 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
20788 {
20789 *total = COSTS_N_INSNS (1);
20790 return true;
20791 }
20792 /* FALLTHRU */
20793
20794 case CONST_DOUBLE:
20795 case CONST_WIDE_INT:
20796 case CONST:
20797 case HIGH:
20798 case SYMBOL_REF:
20799 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
20800 return true;
20801
20802 case MEM:
20803 /* When optimizing for size, MEM should be slightly more expensive
20804 than generating address, e.g., (plus (reg) (const)).
20805 L1 cache latency is about two instructions. */
20806 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
20807 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
20808 *total += COSTS_N_INSNS (100);
20809 return true;
20810
20811 case LABEL_REF:
20812 *total = 0;
20813 return true;
20814
20815 case PLUS:
20816 case MINUS:
20817 if (FLOAT_MODE_P (mode))
20818 *total = rs6000_cost->fp;
20819 else
20820 *total = COSTS_N_INSNS (1);
20821 return false;
20822
20823 case MULT:
20824 if (CONST_INT_P (XEXP (x, 1))
20825 && satisfies_constraint_I (XEXP (x, 1)))
20826 {
20827 if (INTVAL (XEXP (x, 1)) >= -256
20828 && INTVAL (XEXP (x, 1)) <= 255)
20829 *total = rs6000_cost->mulsi_const9;
20830 else
20831 *total = rs6000_cost->mulsi_const;
20832 }
20833 else if (mode == SFmode)
20834 *total = rs6000_cost->fp;
20835 else if (FLOAT_MODE_P (mode))
20836 *total = rs6000_cost->dmul;
20837 else if (mode == DImode)
20838 *total = rs6000_cost->muldi;
20839 else
20840 *total = rs6000_cost->mulsi;
20841 return false;
20842
20843 case FMA:
20844 if (mode == SFmode)
20845 *total = rs6000_cost->fp;
20846 else
20847 *total = rs6000_cost->dmul;
20848 break;
20849
20850 case DIV:
20851 case MOD:
20852 if (FLOAT_MODE_P (mode))
20853 {
20854 *total = mode == DFmode ? rs6000_cost->ddiv
20855 : rs6000_cost->sdiv;
20856 return false;
20857 }
20858 /* FALLTHRU */
20859
20860 case UDIV:
20861 case UMOD:
20862 if (CONST_INT_P (XEXP (x, 1))
20863 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
20864 {
20865 if (code == DIV || code == MOD)
20866 /* Shift, addze */
20867 *total = COSTS_N_INSNS (2);
20868 else
20869 /* Shift */
20870 *total = COSTS_N_INSNS (1);
20871 }
20872 else
20873 {
20874 if (GET_MODE (XEXP (x, 1)) == DImode)
20875 *total = rs6000_cost->divdi;
20876 else
20877 *total = rs6000_cost->divsi;
20878 }
20879 /* Add in shift and subtract for MOD unless we have a mod instruction. */
20880 if (!TARGET_MODULO && (code == MOD || code == UMOD))
20881 *total += COSTS_N_INSNS (2);
20882 return false;
20883
20884 case CTZ:
20885 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
20886 return false;
20887
20888 case FFS:
20889 *total = COSTS_N_INSNS (4);
20890 return false;
20891
20892 case POPCOUNT:
20893 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
20894 return false;
20895
20896 case PARITY:
20897 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
20898 return false;
20899
20900 case NOT:
20901 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
20902 *total = 0;
20903 else
20904 *total = COSTS_N_INSNS (1);
20905 return false;
20906
20907 case AND:
20908 if (CONST_INT_P (XEXP (x, 1)))
20909 {
20910 rtx left = XEXP (x, 0);
20911 rtx_code left_code = GET_CODE (left);
20912
20913 /* rotate-and-mask: 1 insn. */
20914 if ((left_code == ROTATE
20915 || left_code == ASHIFT
20916 || left_code == LSHIFTRT)
20917 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
20918 {
20919 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
20920 if (!CONST_INT_P (XEXP (left, 1)))
20921 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
20922 *total += COSTS_N_INSNS (1);
20923 return true;
20924 }
20925
20926 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
20927 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
20928 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
20929 || (val & 0xffff) == val
20930 || (val & 0xffff0000) == val
20931 || ((val & 0xffff) == 0 && mode == SImode))
20932 {
20933 *total = rtx_cost (left, mode, AND, 0, speed);
20934 *total += COSTS_N_INSNS (1);
20935 return true;
20936 }
20937
20938 /* 2 insns. */
20939 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
20940 {
20941 *total = rtx_cost (left, mode, AND, 0, speed);
20942 *total += COSTS_N_INSNS (2);
20943 return true;
20944 }
20945 }
20946
20947 *total = COSTS_N_INSNS (1);
20948 return false;
20949
20950 case IOR:
20951 /* FIXME */
20952 *total = COSTS_N_INSNS (1);
20953 return true;
20954
20955 case CLZ:
20956 case XOR:
20957 case ZERO_EXTRACT:
20958 *total = COSTS_N_INSNS (1);
20959 return false;
20960
20961 case ASHIFT:
20962 /* The EXTSWSLI instruction is a combined instruction. Don't count both
20963 the sign extend and shift separately within the insn. */
20964 if (TARGET_EXTSWSLI && mode == DImode
20965 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
20966 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
20967 {
20968 *total = 0;
20969 return false;
20970 }
20971 /* fall through */
20972
20973 case ASHIFTRT:
20974 case LSHIFTRT:
20975 case ROTATE:
20976 case ROTATERT:
20977 /* Handle mul_highpart. */
20978 if (outer_code == TRUNCATE
20979 && GET_CODE (XEXP (x, 0)) == MULT)
20980 {
20981 if (mode == DImode)
20982 *total = rs6000_cost->muldi;
20983 else
20984 *total = rs6000_cost->mulsi;
20985 return true;
20986 }
20987 else if (outer_code == AND)
20988 *total = 0;
20989 else
20990 *total = COSTS_N_INSNS (1);
20991 return false;
20992
20993 case SIGN_EXTEND:
20994 case ZERO_EXTEND:
20995 if (MEM_P (XEXP (x, 0)))
20996 *total = 0;
20997 else
20998 *total = COSTS_N_INSNS (1);
20999 return false;
21000
21001 case COMPARE:
21002 case NEG:
21003 case ABS:
21004 if (!FLOAT_MODE_P (mode))
21005 {
21006 *total = COSTS_N_INSNS (1);
21007 return false;
21008 }
21009 /* FALLTHRU */
21010
21011 case FLOAT:
21012 case UNSIGNED_FLOAT:
21013 case FIX:
21014 case UNSIGNED_FIX:
21015 case FLOAT_TRUNCATE:
21016 *total = rs6000_cost->fp;
21017 return false;
21018
21019 case FLOAT_EXTEND:
21020 if (mode == DFmode)
21021 *total = rs6000_cost->sfdf_convert;
21022 else
21023 *total = rs6000_cost->fp;
21024 return false;
21025
21026 case CALL:
21027 case IF_THEN_ELSE:
21028 if (!speed)
21029 {
21030 *total = COSTS_N_INSNS (1);
21031 return true;
21032 }
21033 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
21034 {
21035 *total = rs6000_cost->fp;
21036 return false;
21037 }
21038 break;
21039
21040 case NE:
21041 case EQ:
21042 case GTU:
21043 case LTU:
21044 /* Carry bit requires mode == Pmode.
21045 NEG or PLUS already counted so only add one. */
21046 if (mode == Pmode
21047 && (outer_code == NEG || outer_code == PLUS))
21048 {
21049 *total = COSTS_N_INSNS (1);
21050 return true;
21051 }
21052 /* FALLTHRU */
21053
21054 case GT:
21055 case LT:
21056 case UNORDERED:
21057 if (outer_code == SET)
21058 {
21059 if (XEXP (x, 1) == const0_rtx)
21060 {
21061 *total = COSTS_N_INSNS (2);
21062 return true;
21063 }
21064 else
21065 {
21066 *total = COSTS_N_INSNS (3);
21067 return false;
21068 }
21069 }
21070 /* CC COMPARE. */
21071 if (outer_code == COMPARE)
21072 {
21073 *total = 0;
21074 return true;
21075 }
21076 break;
21077
21078 default:
21079 break;
21080 }
21081
21082 return false;
21083 }
21084
21085 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
21086
21087 static bool
21088 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
21089 int opno, int *total, bool speed)
21090 {
21091 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
21092
21093 fprintf (stderr,
21094 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
21095 "opno = %d, total = %d, speed = %s, x:\n",
21096 ret ? "complete" : "scan inner",
21097 GET_MODE_NAME (mode),
21098 GET_RTX_NAME (outer_code),
21099 opno,
21100 *total,
21101 speed ? "true" : "false");
21102
21103 debug_rtx (x);
21104
21105 return ret;
21106 }
21107
21108 static int
21109 rs6000_insn_cost (rtx_insn *insn, bool speed)
21110 {
21111 if (recog_memoized (insn) < 0)
21112 return 0;
21113
21114 /* If we are optimizing for size, just use the length. */
21115 if (!speed)
21116 return get_attr_length (insn);
21117
21118 /* Use the cost if provided. */
21119 int cost = get_attr_cost (insn);
21120 if (cost > 0)
21121 return cost;
21122
21123 /* If the insn tells us how many insns there are, use that. Otherwise use
21124 the length/4. Adjust the insn length to remove the extra size that
21125 prefixed instructions take. */
21126 int n = get_attr_num_insns (insn);
21127 if (n == 0)
21128 {
21129 int length = get_attr_length (insn);
21130 if (get_attr_prefixed (insn) == PREFIXED_YES)
21131 {
21132 int adjust = 0;
21133 ADJUST_INSN_LENGTH (insn, adjust);
21134 length -= adjust;
21135 }
21136
21137 n = length / 4;
21138 }
21139
21140 enum attr_type type = get_attr_type (insn);
21141
21142 switch (type)
21143 {
21144 case TYPE_LOAD:
21145 case TYPE_FPLOAD:
21146 case TYPE_VECLOAD:
21147 cost = COSTS_N_INSNS (n + 1);
21148 break;
21149
21150 case TYPE_MUL:
21151 switch (get_attr_size (insn))
21152 {
21153 case SIZE_8:
21154 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
21155 break;
21156 case SIZE_16:
21157 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
21158 break;
21159 case SIZE_32:
21160 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
21161 break;
21162 case SIZE_64:
21163 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
21164 break;
21165 default:
21166 gcc_unreachable ();
21167 }
21168 break;
21169 case TYPE_DIV:
21170 switch (get_attr_size (insn))
21171 {
21172 case SIZE_32:
21173 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
21174 break;
21175 case SIZE_64:
21176 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
21177 break;
21178 default:
21179 gcc_unreachable ();
21180 }
21181 break;
21182
21183 case TYPE_FP:
21184 cost = n * rs6000_cost->fp;
21185 break;
21186 case TYPE_DMUL:
21187 cost = n * rs6000_cost->dmul;
21188 break;
21189 case TYPE_SDIV:
21190 cost = n * rs6000_cost->sdiv;
21191 break;
21192 case TYPE_DDIV:
21193 cost = n * rs6000_cost->ddiv;
21194 break;
21195
21196 case TYPE_SYNC:
21197 case TYPE_LOAD_L:
21198 case TYPE_MFCR:
21199 case TYPE_MFCRF:
21200 cost = COSTS_N_INSNS (n + 2);
21201 break;
21202
21203 default:
21204 cost = COSTS_N_INSNS (n);
21205 }
21206
21207 return cost;
21208 }
21209
21210 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
21211
21212 static int
21213 rs6000_debug_address_cost (rtx x, machine_mode mode,
21214 addr_space_t as, bool speed)
21215 {
21216 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
21217
21218 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
21219 ret, speed ? "true" : "false");
21220 debug_rtx (x);
21221
21222 return ret;
21223 }
21224
21225
21226 /* A C expression returning the cost of moving data from a register of class
21227 CLASS1 to one of CLASS2. */
21228
21229 static int
21230 rs6000_register_move_cost (machine_mode mode,
21231 reg_class_t from, reg_class_t to)
21232 {
21233 int ret;
21234 reg_class_t rclass;
21235
21236 if (TARGET_DEBUG_COST)
21237 dbg_cost_ctrl++;
21238
21239 /* If we have VSX, we can easily move between FPR or Altivec registers,
21240 otherwise we can only easily move within classes.
21241 Do this first so we give best-case answers for union classes
21242 containing both gprs and vsx regs. */
21243 HARD_REG_SET to_vsx, from_vsx;
21244 to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS];
21245 from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS];
21246 if (!hard_reg_set_empty_p (to_vsx)
21247 && !hard_reg_set_empty_p (from_vsx)
21248 && (TARGET_VSX
21249 || hard_reg_set_intersect_p (to_vsx, from_vsx)))
21250 {
21251 int reg = FIRST_FPR_REGNO;
21252 if (TARGET_VSX
21253 || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
21254 && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
21255 reg = FIRST_ALTIVEC_REGNO;
21256 ret = 2 * hard_regno_nregs (reg, mode);
21257 }
21258
21259 /* Moves from/to GENERAL_REGS. */
21260 else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
21261 || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
21262 {
21263 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
21264 {
21265 if (TARGET_DIRECT_MOVE)
21266 {
21267 /* Keep the cost for direct moves above that for within
21268 a register class even if the actual processor cost is
21269 comparable. We do this because a direct move insn
21270 can't be a nop, whereas with ideal register
21271 allocation a move within the same class might turn
21272 out to be a nop. */
21273 if (rs6000_tune == PROCESSOR_POWER9
21274 || rs6000_tune == PROCESSOR_FUTURE)
21275 ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21276 else
21277 ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21278 /* SFmode requires a conversion when moving between gprs
21279 and vsx. */
21280 if (mode == SFmode)
21281 ret += 2;
21282 }
21283 else
21284 ret = (rs6000_memory_move_cost (mode, rclass, false)
21285 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
21286 }
21287
21288 /* It's more expensive to move CR_REGS than CR0_REGS because of the
21289 shift. */
21290 else if (rclass == CR_REGS)
21291 ret = 4;
21292
21293 /* For those processors that have slow LR/CTR moves, make them more
21294 expensive than memory in order to bias spills to memory .*/
21295 else if ((rs6000_tune == PROCESSOR_POWER6
21296 || rs6000_tune == PROCESSOR_POWER7
21297 || rs6000_tune == PROCESSOR_POWER8
21298 || rs6000_tune == PROCESSOR_POWER9)
21299 && reg_class_subset_p (rclass, SPECIAL_REGS))
21300 ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21301
21302 else
21303 /* A move will cost one instruction per GPR moved. */
21304 ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21305 }
21306
21307 /* Everything else has to go through GENERAL_REGS. */
21308 else
21309 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
21310 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
21311
21312 if (TARGET_DEBUG_COST)
21313 {
21314 if (dbg_cost_ctrl == 1)
21315 fprintf (stderr,
21316 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
21317 ret, GET_MODE_NAME (mode), reg_class_names[from],
21318 reg_class_names[to]);
21319 dbg_cost_ctrl--;
21320 }
21321
21322 return ret;
21323 }
21324
21325 /* A C expressions returning the cost of moving data of MODE from a register to
21326 or from memory. */
21327
21328 static int
21329 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
21330 bool in ATTRIBUTE_UNUSED)
21331 {
21332 int ret;
21333
21334 if (TARGET_DEBUG_COST)
21335 dbg_cost_ctrl++;
21336
21337 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
21338 ret = 4 * hard_regno_nregs (0, mode);
21339 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
21340 || reg_classes_intersect_p (rclass, VSX_REGS)))
21341 ret = 4 * hard_regno_nregs (32, mode);
21342 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
21343 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
21344 else
21345 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
21346
21347 if (TARGET_DEBUG_COST)
21348 {
21349 if (dbg_cost_ctrl == 1)
21350 fprintf (stderr,
21351 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
21352 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
21353 dbg_cost_ctrl--;
21354 }
21355
21356 return ret;
21357 }
21358
21359 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
21360
21361 The register allocator chooses GEN_OR_VSX_REGS for the allocno
21362 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
21363 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
21364 move cost between GENERAL_REGS and VSX_REGS low.
21365
21366 It might seem reasonable to use a union class. After all, if usage
21367 of vsr is low and gpr high, it might make sense to spill gpr to vsr
21368 rather than memory. However, in cases where register pressure of
21369 both is high, like the cactus_adm spec test, allowing
21370 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
21371 the first scheduling pass. This is partly due to an allocno of
21372 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
21373 class, which gives too high a pressure for GENERAL_REGS and too low
21374 for VSX_REGS. So, force a choice of the subclass here.
21375
21376 The best class is also the union if GENERAL_REGS and VSX_REGS have
21377 the same cost. In that case we do use GEN_OR_VSX_REGS as the
21378 allocno class, since trying to narrow down the class by regno mode
21379 is prone to error. For example, SImode is allowed in VSX regs and
21380 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
21381 it would be wrong to choose an allocno of GENERAL_REGS based on
21382 SImode. */
21383
21384 static reg_class_t
21385 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
21386 reg_class_t allocno_class,
21387 reg_class_t best_class)
21388 {
21389 switch (allocno_class)
21390 {
21391 case GEN_OR_VSX_REGS:
21392 /* best_class must be a subset of allocno_class. */
21393 gcc_checking_assert (best_class == GEN_OR_VSX_REGS
21394 || best_class == GEN_OR_FLOAT_REGS
21395 || best_class == VSX_REGS
21396 || best_class == ALTIVEC_REGS
21397 || best_class == FLOAT_REGS
21398 || best_class == GENERAL_REGS
21399 || best_class == BASE_REGS);
21400 /* Use best_class but choose wider classes when copying from the
21401 wider class to best_class is cheap. This mimics IRA choice
21402 of allocno class. */
21403 if (best_class == BASE_REGS)
21404 return GENERAL_REGS;
21405 if (TARGET_VSX
21406 && (best_class == FLOAT_REGS || best_class == ALTIVEC_REGS))
21407 return VSX_REGS;
21408 return best_class;
21409
21410 default:
21411 break;
21412 }
21413
21414 return allocno_class;
21415 }
21416
21417 /* Returns a code for a target-specific builtin that implements
21418 reciprocal of the function, or NULL_TREE if not available. */
21419
21420 static tree
21421 rs6000_builtin_reciprocal (tree fndecl)
21422 {
21423 switch (DECL_MD_FUNCTION_CODE (fndecl))
21424 {
21425 case VSX_BUILTIN_XVSQRTDP:
21426 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
21427 return NULL_TREE;
21428
21429 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
21430
21431 case VSX_BUILTIN_XVSQRTSP:
21432 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
21433 return NULL_TREE;
21434
21435 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
21436
21437 default:
21438 return NULL_TREE;
21439 }
21440 }
21441
21442 /* Load up a constant. If the mode is a vector mode, splat the value across
21443 all of the vector elements. */
21444
21445 static rtx
21446 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
21447 {
21448 rtx reg;
21449
21450 if (mode == SFmode || mode == DFmode)
21451 {
21452 rtx d = const_double_from_real_value (dconst, mode);
21453 reg = force_reg (mode, d);
21454 }
21455 else if (mode == V4SFmode)
21456 {
21457 rtx d = const_double_from_real_value (dconst, SFmode);
21458 rtvec v = gen_rtvec (4, d, d, d, d);
21459 reg = gen_reg_rtx (mode);
21460 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
21461 }
21462 else if (mode == V2DFmode)
21463 {
21464 rtx d = const_double_from_real_value (dconst, DFmode);
21465 rtvec v = gen_rtvec (2, d, d);
21466 reg = gen_reg_rtx (mode);
21467 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
21468 }
21469 else
21470 gcc_unreachable ();
21471
21472 return reg;
21473 }
21474
21475 /* Generate an FMA instruction. */
21476
21477 static void
21478 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
21479 {
21480 machine_mode mode = GET_MODE (target);
21481 rtx dst;
21482
21483 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
21484 gcc_assert (dst != NULL);
21485
21486 if (dst != target)
21487 emit_move_insn (target, dst);
21488 }
21489
21490 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
21491
21492 static void
21493 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
21494 {
21495 machine_mode mode = GET_MODE (dst);
21496 rtx r;
21497
21498 /* This is a tad more complicated, since the fnma_optab is for
21499 a different expression: fma(-m1, m2, a), which is the same
21500 thing except in the case of signed zeros.
21501
21502 Fortunately we know that if FMA is supported that FNMSUB is
21503 also supported in the ISA. Just expand it directly. */
21504
21505 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
21506
21507 r = gen_rtx_NEG (mode, a);
21508 r = gen_rtx_FMA (mode, m1, m2, r);
21509 r = gen_rtx_NEG (mode, r);
21510 emit_insn (gen_rtx_SET (dst, r));
21511 }
21512
21513 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
21514 add a reg_note saying that this was a division. Support both scalar and
21515 vector divide. Assumes no trapping math and finite arguments. */
21516
21517 void
21518 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
21519 {
21520 machine_mode mode = GET_MODE (dst);
21521 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
21522 int i;
21523
21524 /* Low precision estimates guarantee 5 bits of accuracy. High
21525 precision estimates guarantee 14 bits of accuracy. SFmode
21526 requires 23 bits of accuracy. DFmode requires 52 bits of
21527 accuracy. Each pass at least doubles the accuracy, leading
21528 to the following. */
21529 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
21530 if (mode == DFmode || mode == V2DFmode)
21531 passes++;
21532
21533 enum insn_code code = optab_handler (smul_optab, mode);
21534 insn_gen_fn gen_mul = GEN_FCN (code);
21535
21536 gcc_assert (code != CODE_FOR_nothing);
21537
21538 one = rs6000_load_constant_and_splat (mode, dconst1);
21539
21540 /* x0 = 1./d estimate */
21541 x0 = gen_reg_rtx (mode);
21542 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
21543 UNSPEC_FRES)));
21544
21545 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
21546 if (passes > 1) {
21547
21548 /* e0 = 1. - d * x0 */
21549 e0 = gen_reg_rtx (mode);
21550 rs6000_emit_nmsub (e0, d, x0, one);
21551
21552 /* x1 = x0 + e0 * x0 */
21553 x1 = gen_reg_rtx (mode);
21554 rs6000_emit_madd (x1, e0, x0, x0);
21555
21556 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
21557 ++i, xprev = xnext, eprev = enext) {
21558
21559 /* enext = eprev * eprev */
21560 enext = gen_reg_rtx (mode);
21561 emit_insn (gen_mul (enext, eprev, eprev));
21562
21563 /* xnext = xprev + enext * xprev */
21564 xnext = gen_reg_rtx (mode);
21565 rs6000_emit_madd (xnext, enext, xprev, xprev);
21566 }
21567
21568 } else
21569 xprev = x0;
21570
21571 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
21572
21573 /* u = n * xprev */
21574 u = gen_reg_rtx (mode);
21575 emit_insn (gen_mul (u, n, xprev));
21576
21577 /* v = n - (d * u) */
21578 v = gen_reg_rtx (mode);
21579 rs6000_emit_nmsub (v, d, u, n);
21580
21581 /* dst = (v * xprev) + u */
21582 rs6000_emit_madd (dst, v, xprev, u);
21583
21584 if (note_p)
21585 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
21586 }
21587
21588 /* Goldschmidt's Algorithm for single/double-precision floating point
21589 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
21590
21591 void
21592 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
21593 {
21594 machine_mode mode = GET_MODE (src);
21595 rtx e = gen_reg_rtx (mode);
21596 rtx g = gen_reg_rtx (mode);
21597 rtx h = gen_reg_rtx (mode);
21598
21599 /* Low precision estimates guarantee 5 bits of accuracy. High
21600 precision estimates guarantee 14 bits of accuracy. SFmode
21601 requires 23 bits of accuracy. DFmode requires 52 bits of
21602 accuracy. Each pass at least doubles the accuracy, leading
21603 to the following. */
21604 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
21605 if (mode == DFmode || mode == V2DFmode)
21606 passes++;
21607
21608 int i;
21609 rtx mhalf;
21610 enum insn_code code = optab_handler (smul_optab, mode);
21611 insn_gen_fn gen_mul = GEN_FCN (code);
21612
21613 gcc_assert (code != CODE_FOR_nothing);
21614
21615 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
21616
21617 /* e = rsqrt estimate */
21618 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
21619 UNSPEC_RSQRT)));
21620
21621 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
21622 if (!recip)
21623 {
21624 rtx zero = force_reg (mode, CONST0_RTX (mode));
21625
21626 if (mode == SFmode)
21627 {
21628 rtx target = emit_conditional_move (e, GT, src, zero, mode,
21629 e, zero, mode, 0);
21630 if (target != e)
21631 emit_move_insn (e, target);
21632 }
21633 else
21634 {
21635 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
21636 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
21637 }
21638 }
21639
21640 /* g = sqrt estimate. */
21641 emit_insn (gen_mul (g, e, src));
21642 /* h = 1/(2*sqrt) estimate. */
21643 emit_insn (gen_mul (h, e, mhalf));
21644
21645 if (recip)
21646 {
21647 if (passes == 1)
21648 {
21649 rtx t = gen_reg_rtx (mode);
21650 rs6000_emit_nmsub (t, g, h, mhalf);
21651 /* Apply correction directly to 1/rsqrt estimate. */
21652 rs6000_emit_madd (dst, e, t, e);
21653 }
21654 else
21655 {
21656 for (i = 0; i < passes; i++)
21657 {
21658 rtx t1 = gen_reg_rtx (mode);
21659 rtx g1 = gen_reg_rtx (mode);
21660 rtx h1 = gen_reg_rtx (mode);
21661
21662 rs6000_emit_nmsub (t1, g, h, mhalf);
21663 rs6000_emit_madd (g1, g, t1, g);
21664 rs6000_emit_madd (h1, h, t1, h);
21665
21666 g = g1;
21667 h = h1;
21668 }
21669 /* Multiply by 2 for 1/rsqrt. */
21670 emit_insn (gen_add3_insn (dst, h, h));
21671 }
21672 }
21673 else
21674 {
21675 rtx t = gen_reg_rtx (mode);
21676 rs6000_emit_nmsub (t, g, h, mhalf);
21677 rs6000_emit_madd (dst, g, t, g);
21678 }
21679
21680 return;
21681 }
21682
21683 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
21684 (Power7) targets. DST is the target, and SRC is the argument operand. */
21685
21686 void
21687 rs6000_emit_popcount (rtx dst, rtx src)
21688 {
21689 machine_mode mode = GET_MODE (dst);
21690 rtx tmp1, tmp2;
21691
21692 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
21693 if (TARGET_POPCNTD)
21694 {
21695 if (mode == SImode)
21696 emit_insn (gen_popcntdsi2 (dst, src));
21697 else
21698 emit_insn (gen_popcntddi2 (dst, src));
21699 return;
21700 }
21701
21702 tmp1 = gen_reg_rtx (mode);
21703
21704 if (mode == SImode)
21705 {
21706 emit_insn (gen_popcntbsi2 (tmp1, src));
21707 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
21708 NULL_RTX, 0);
21709 tmp2 = force_reg (SImode, tmp2);
21710 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
21711 }
21712 else
21713 {
21714 emit_insn (gen_popcntbdi2 (tmp1, src));
21715 tmp2 = expand_mult (DImode, tmp1,
21716 GEN_INT ((HOST_WIDE_INT)
21717 0x01010101 << 32 | 0x01010101),
21718 NULL_RTX, 0);
21719 tmp2 = force_reg (DImode, tmp2);
21720 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
21721 }
21722 }
21723
21724
21725 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
21726 target, and SRC is the argument operand. */
21727
21728 void
21729 rs6000_emit_parity (rtx dst, rtx src)
21730 {
21731 machine_mode mode = GET_MODE (dst);
21732 rtx tmp;
21733
21734 tmp = gen_reg_rtx (mode);
21735
21736 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
21737 if (TARGET_CMPB)
21738 {
21739 if (mode == SImode)
21740 {
21741 emit_insn (gen_popcntbsi2 (tmp, src));
21742 emit_insn (gen_paritysi2_cmpb (dst, tmp));
21743 }
21744 else
21745 {
21746 emit_insn (gen_popcntbdi2 (tmp, src));
21747 emit_insn (gen_paritydi2_cmpb (dst, tmp));
21748 }
21749 return;
21750 }
21751
21752 if (mode == SImode)
21753 {
21754 /* Is mult+shift >= shift+xor+shift+xor? */
21755 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
21756 {
21757 rtx tmp1, tmp2, tmp3, tmp4;
21758
21759 tmp1 = gen_reg_rtx (SImode);
21760 emit_insn (gen_popcntbsi2 (tmp1, src));
21761
21762 tmp2 = gen_reg_rtx (SImode);
21763 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
21764 tmp3 = gen_reg_rtx (SImode);
21765 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
21766
21767 tmp4 = gen_reg_rtx (SImode);
21768 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
21769 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
21770 }
21771 else
21772 rs6000_emit_popcount (tmp, src);
21773 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
21774 }
21775 else
21776 {
21777 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
21778 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
21779 {
21780 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
21781
21782 tmp1 = gen_reg_rtx (DImode);
21783 emit_insn (gen_popcntbdi2 (tmp1, src));
21784
21785 tmp2 = gen_reg_rtx (DImode);
21786 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
21787 tmp3 = gen_reg_rtx (DImode);
21788 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
21789
21790 tmp4 = gen_reg_rtx (DImode);
21791 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
21792 tmp5 = gen_reg_rtx (DImode);
21793 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
21794
21795 tmp6 = gen_reg_rtx (DImode);
21796 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
21797 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
21798 }
21799 else
21800 rs6000_emit_popcount (tmp, src);
21801 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
21802 }
21803 }
21804
21805 /* Expand an Altivec constant permutation for little endian mode.
21806 OP0 and OP1 are the input vectors and TARGET is the output vector.
21807 SEL specifies the constant permutation vector.
21808
21809 There are two issues: First, the two input operands must be
21810 swapped so that together they form a double-wide array in LE
21811 order. Second, the vperm instruction has surprising behavior
21812 in LE mode: it interprets the elements of the source vectors
21813 in BE mode ("left to right") and interprets the elements of
21814 the destination vector in LE mode ("right to left"). To
21815 correct for this, we must subtract each element of the permute
21816 control vector from 31.
21817
21818 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
21819 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
21820 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
21821 serve as the permute control vector. Then, in BE mode,
21822
21823 vperm 9,10,11,12
21824
21825 places the desired result in vr9. However, in LE mode the
21826 vector contents will be
21827
21828 vr10 = 00000003 00000002 00000001 00000000
21829 vr11 = 00000007 00000006 00000005 00000004
21830
21831 The result of the vperm using the same permute control vector is
21832
21833 vr9 = 05000000 07000000 01000000 03000000
21834
21835 That is, the leftmost 4 bytes of vr10 are interpreted as the
21836 source for the rightmost 4 bytes of vr9, and so on.
21837
21838 If we change the permute control vector to
21839
21840 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
21841
21842 and issue
21843
21844 vperm 9,11,10,12
21845
21846 we get the desired
21847
21848 vr9 = 00000006 00000004 00000002 00000000. */
21849
21850 static void
21851 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
21852 const vec_perm_indices &sel)
21853 {
21854 unsigned int i;
21855 rtx perm[16];
21856 rtx constv, unspec;
21857
21858 /* Unpack and adjust the constant selector. */
21859 for (i = 0; i < 16; ++i)
21860 {
21861 unsigned int elt = 31 - (sel[i] & 31);
21862 perm[i] = GEN_INT (elt);
21863 }
21864
21865 /* Expand to a permute, swapping the inputs and using the
21866 adjusted selector. */
21867 if (!REG_P (op0))
21868 op0 = force_reg (V16QImode, op0);
21869 if (!REG_P (op1))
21870 op1 = force_reg (V16QImode, op1);
21871
21872 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
21873 constv = force_reg (V16QImode, constv);
21874 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
21875 UNSPEC_VPERM);
21876 if (!REG_P (target))
21877 {
21878 rtx tmp = gen_reg_rtx (V16QImode);
21879 emit_move_insn (tmp, unspec);
21880 unspec = tmp;
21881 }
21882
21883 emit_move_insn (target, unspec);
21884 }
21885
21886 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
21887 permute control vector. But here it's not a constant, so we must
21888 generate a vector NAND or NOR to do the adjustment. */
21889
21890 void
21891 altivec_expand_vec_perm_le (rtx operands[4])
21892 {
21893 rtx notx, iorx, unspec;
21894 rtx target = operands[0];
21895 rtx op0 = operands[1];
21896 rtx op1 = operands[2];
21897 rtx sel = operands[3];
21898 rtx tmp = target;
21899 rtx norreg = gen_reg_rtx (V16QImode);
21900 machine_mode mode = GET_MODE (target);
21901
21902 /* Get everything in regs so the pattern matches. */
21903 if (!REG_P (op0))
21904 op0 = force_reg (mode, op0);
21905 if (!REG_P (op1))
21906 op1 = force_reg (mode, op1);
21907 if (!REG_P (sel))
21908 sel = force_reg (V16QImode, sel);
21909 if (!REG_P (target))
21910 tmp = gen_reg_rtx (mode);
21911
21912 if (TARGET_P9_VECTOR)
21913 {
21914 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
21915 UNSPEC_VPERMR);
21916 }
21917 else
21918 {
21919 /* Invert the selector with a VNAND if available, else a VNOR.
21920 The VNAND is preferred for future fusion opportunities. */
21921 notx = gen_rtx_NOT (V16QImode, sel);
21922 iorx = (TARGET_P8_VECTOR
21923 ? gen_rtx_IOR (V16QImode, notx, notx)
21924 : gen_rtx_AND (V16QImode, notx, notx));
21925 emit_insn (gen_rtx_SET (norreg, iorx));
21926
21927 /* Permute with operands reversed and adjusted selector. */
21928 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
21929 UNSPEC_VPERM);
21930 }
21931
21932 /* Copy into target, possibly by way of a register. */
21933 if (!REG_P (target))
21934 {
21935 emit_move_insn (tmp, unspec);
21936 unspec = tmp;
21937 }
21938
21939 emit_move_insn (target, unspec);
21940 }
21941
21942 /* Expand an Altivec constant permutation. Return true if we match
21943 an efficient implementation; false to fall back to VPERM.
21944
21945 OP0 and OP1 are the input vectors and TARGET is the output vector.
21946 SEL specifies the constant permutation vector. */
21947
21948 static bool
21949 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
21950 const vec_perm_indices &sel)
21951 {
21952 struct altivec_perm_insn {
21953 HOST_WIDE_INT mask;
21954 enum insn_code impl;
21955 unsigned char perm[16];
21956 };
21957 static const struct altivec_perm_insn patterns[] = {
21958 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
21959 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
21960 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
21961 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
21962 { OPTION_MASK_ALTIVEC,
21963 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
21964 : CODE_FOR_altivec_vmrglb_direct),
21965 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
21966 { OPTION_MASK_ALTIVEC,
21967 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
21968 : CODE_FOR_altivec_vmrglh_direct),
21969 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
21970 { OPTION_MASK_ALTIVEC,
21971 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
21972 : CODE_FOR_altivec_vmrglw_direct),
21973 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
21974 { OPTION_MASK_ALTIVEC,
21975 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
21976 : CODE_FOR_altivec_vmrghb_direct),
21977 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
21978 { OPTION_MASK_ALTIVEC,
21979 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
21980 : CODE_FOR_altivec_vmrghh_direct),
21981 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
21982 { OPTION_MASK_ALTIVEC,
21983 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
21984 : CODE_FOR_altivec_vmrghw_direct),
21985 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
21986 { OPTION_MASK_P8_VECTOR,
21987 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
21988 : CODE_FOR_p8_vmrgow_v4sf_direct),
21989 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
21990 { OPTION_MASK_P8_VECTOR,
21991 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
21992 : CODE_FOR_p8_vmrgew_v4sf_direct),
21993 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
21994 };
21995
21996 unsigned int i, j, elt, which;
21997 unsigned char perm[16];
21998 rtx x;
21999 bool one_vec;
22000
22001 /* Unpack the constant selector. */
22002 for (i = which = 0; i < 16; ++i)
22003 {
22004 elt = sel[i] & 31;
22005 which |= (elt < 16 ? 1 : 2);
22006 perm[i] = elt;
22007 }
22008
22009 /* Simplify the constant selector based on operands. */
22010 switch (which)
22011 {
22012 default:
22013 gcc_unreachable ();
22014
22015 case 3:
22016 one_vec = false;
22017 if (!rtx_equal_p (op0, op1))
22018 break;
22019 /* FALLTHRU */
22020
22021 case 2:
22022 for (i = 0; i < 16; ++i)
22023 perm[i] &= 15;
22024 op0 = op1;
22025 one_vec = true;
22026 break;
22027
22028 case 1:
22029 op1 = op0;
22030 one_vec = true;
22031 break;
22032 }
22033
22034 /* Look for splat patterns. */
22035 if (one_vec)
22036 {
22037 elt = perm[0];
22038
22039 for (i = 0; i < 16; ++i)
22040 if (perm[i] != elt)
22041 break;
22042 if (i == 16)
22043 {
22044 if (!BYTES_BIG_ENDIAN)
22045 elt = 15 - elt;
22046 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
22047 return true;
22048 }
22049
22050 if (elt % 2 == 0)
22051 {
22052 for (i = 0; i < 16; i += 2)
22053 if (perm[i] != elt || perm[i + 1] != elt + 1)
22054 break;
22055 if (i == 16)
22056 {
22057 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
22058 x = gen_reg_rtx (V8HImode);
22059 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
22060 GEN_INT (field)));
22061 emit_move_insn (target, gen_lowpart (V16QImode, x));
22062 return true;
22063 }
22064 }
22065
22066 if (elt % 4 == 0)
22067 {
22068 for (i = 0; i < 16; i += 4)
22069 if (perm[i] != elt
22070 || perm[i + 1] != elt + 1
22071 || perm[i + 2] != elt + 2
22072 || perm[i + 3] != elt + 3)
22073 break;
22074 if (i == 16)
22075 {
22076 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
22077 x = gen_reg_rtx (V4SImode);
22078 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
22079 GEN_INT (field)));
22080 emit_move_insn (target, gen_lowpart (V16QImode, x));
22081 return true;
22082 }
22083 }
22084 }
22085
22086 /* Look for merge and pack patterns. */
22087 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
22088 {
22089 bool swapped;
22090
22091 if ((patterns[j].mask & rs6000_isa_flags) == 0)
22092 continue;
22093
22094 elt = patterns[j].perm[0];
22095 if (perm[0] == elt)
22096 swapped = false;
22097 else if (perm[0] == elt + 16)
22098 swapped = true;
22099 else
22100 continue;
22101 for (i = 1; i < 16; ++i)
22102 {
22103 elt = patterns[j].perm[i];
22104 if (swapped)
22105 elt = (elt >= 16 ? elt - 16 : elt + 16);
22106 else if (one_vec && elt >= 16)
22107 elt -= 16;
22108 if (perm[i] != elt)
22109 break;
22110 }
22111 if (i == 16)
22112 {
22113 enum insn_code icode = patterns[j].impl;
22114 machine_mode omode = insn_data[icode].operand[0].mode;
22115 machine_mode imode = insn_data[icode].operand[1].mode;
22116
22117 /* For little-endian, don't use vpkuwum and vpkuhum if the
22118 underlying vector type is not V4SI and V8HI, respectively.
22119 For example, using vpkuwum with a V8HI picks up the even
22120 halfwords (BE numbering) when the even halfwords (LE
22121 numbering) are what we need. */
22122 if (!BYTES_BIG_ENDIAN
22123 && icode == CODE_FOR_altivec_vpkuwum_direct
22124 && ((REG_P (op0)
22125 && GET_MODE (op0) != V4SImode)
22126 || (SUBREG_P (op0)
22127 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
22128 continue;
22129 if (!BYTES_BIG_ENDIAN
22130 && icode == CODE_FOR_altivec_vpkuhum_direct
22131 && ((REG_P (op0)
22132 && GET_MODE (op0) != V8HImode)
22133 || (SUBREG_P (op0)
22134 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
22135 continue;
22136
22137 /* For little-endian, the two input operands must be swapped
22138 (or swapped back) to ensure proper right-to-left numbering
22139 from 0 to 2N-1. */
22140 if (swapped ^ !BYTES_BIG_ENDIAN)
22141 std::swap (op0, op1);
22142 if (imode != V16QImode)
22143 {
22144 op0 = gen_lowpart (imode, op0);
22145 op1 = gen_lowpart (imode, op1);
22146 }
22147 if (omode == V16QImode)
22148 x = target;
22149 else
22150 x = gen_reg_rtx (omode);
22151 emit_insn (GEN_FCN (icode) (x, op0, op1));
22152 if (omode != V16QImode)
22153 emit_move_insn (target, gen_lowpart (V16QImode, x));
22154 return true;
22155 }
22156 }
22157
22158 if (!BYTES_BIG_ENDIAN)
22159 {
22160 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
22161 return true;
22162 }
22163
22164 return false;
22165 }
22166
22167 /* Expand a VSX Permute Doubleword constant permutation.
22168 Return true if we match an efficient implementation. */
22169
22170 static bool
22171 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
22172 unsigned char perm0, unsigned char perm1)
22173 {
22174 rtx x;
22175
22176 /* If both selectors come from the same operand, fold to single op. */
22177 if ((perm0 & 2) == (perm1 & 2))
22178 {
22179 if (perm0 & 2)
22180 op0 = op1;
22181 else
22182 op1 = op0;
22183 }
22184 /* If both operands are equal, fold to simpler permutation. */
22185 if (rtx_equal_p (op0, op1))
22186 {
22187 perm0 = perm0 & 1;
22188 perm1 = (perm1 & 1) + 2;
22189 }
22190 /* If the first selector comes from the second operand, swap. */
22191 else if (perm0 & 2)
22192 {
22193 if (perm1 & 2)
22194 return false;
22195 perm0 -= 2;
22196 perm1 += 2;
22197 std::swap (op0, op1);
22198 }
22199 /* If the second selector does not come from the second operand, fail. */
22200 else if ((perm1 & 2) == 0)
22201 return false;
22202
22203 /* Success! */
22204 if (target != NULL)
22205 {
22206 machine_mode vmode, dmode;
22207 rtvec v;
22208
22209 vmode = GET_MODE (target);
22210 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
22211 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
22212 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
22213 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
22214 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
22215 emit_insn (gen_rtx_SET (target, x));
22216 }
22217 return true;
22218 }
22219
22220 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
22221
22222 static bool
22223 rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
22224 rtx op1, const vec_perm_indices &sel)
22225 {
22226 bool testing_p = !target;
22227
22228 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
22229 if (TARGET_ALTIVEC && testing_p)
22230 return true;
22231
22232 /* Check for ps_merge* or xxpermdi insns. */
22233 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
22234 {
22235 if (testing_p)
22236 {
22237 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
22238 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
22239 }
22240 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
22241 return true;
22242 }
22243
22244 if (TARGET_ALTIVEC)
22245 {
22246 /* Force the target-independent code to lower to V16QImode. */
22247 if (vmode != V16QImode)
22248 return false;
22249 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
22250 return true;
22251 }
22252
22253 return false;
22254 }
22255
22256 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
22257 OP0 and OP1 are the input vectors and TARGET is the output vector.
22258 PERM specifies the constant permutation vector. */
22259
22260 static void
22261 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
22262 machine_mode vmode, const vec_perm_builder &perm)
22263 {
22264 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
22265 if (x != target)
22266 emit_move_insn (target, x);
22267 }
22268
22269 /* Expand an extract even operation. */
22270
22271 void
22272 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
22273 {
22274 machine_mode vmode = GET_MODE (target);
22275 unsigned i, nelt = GET_MODE_NUNITS (vmode);
22276 vec_perm_builder perm (nelt, nelt, 1);
22277
22278 for (i = 0; i < nelt; i++)
22279 perm.quick_push (i * 2);
22280
22281 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
22282 }
22283
22284 /* Expand a vector interleave operation. */
22285
22286 void
22287 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
22288 {
22289 machine_mode vmode = GET_MODE (target);
22290 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
22291 vec_perm_builder perm (nelt, nelt, 1);
22292
22293 high = (highp ? 0 : nelt / 2);
22294 for (i = 0; i < nelt / 2; i++)
22295 {
22296 perm.quick_push (i + high);
22297 perm.quick_push (i + nelt + high);
22298 }
22299
22300 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
22301 }
22302
22303 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
22304 void
22305 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
22306 {
22307 HOST_WIDE_INT hwi_scale (scale);
22308 REAL_VALUE_TYPE r_pow;
22309 rtvec v = rtvec_alloc (2);
22310 rtx elt;
22311 rtx scale_vec = gen_reg_rtx (V2DFmode);
22312 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
22313 elt = const_double_from_real_value (r_pow, DFmode);
22314 RTVEC_ELT (v, 0) = elt;
22315 RTVEC_ELT (v, 1) = elt;
22316 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
22317 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
22318 }
22319
22320 /* Return an RTX representing where to find the function value of a
22321 function returning MODE. */
22322 static rtx
22323 rs6000_complex_function_value (machine_mode mode)
22324 {
22325 unsigned int regno;
22326 rtx r1, r2;
22327 machine_mode inner = GET_MODE_INNER (mode);
22328 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
22329
22330 if (TARGET_FLOAT128_TYPE
22331 && (mode == KCmode
22332 || (mode == TCmode && TARGET_IEEEQUAD)))
22333 regno = ALTIVEC_ARG_RETURN;
22334
22335 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22336 regno = FP_ARG_RETURN;
22337
22338 else
22339 {
22340 regno = GP_ARG_RETURN;
22341
22342 /* 32-bit is OK since it'll go in r3/r4. */
22343 if (TARGET_32BIT && inner_bytes >= 4)
22344 return gen_rtx_REG (mode, regno);
22345 }
22346
22347 if (inner_bytes >= 8)
22348 return gen_rtx_REG (mode, regno);
22349
22350 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
22351 const0_rtx);
22352 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
22353 GEN_INT (inner_bytes));
22354 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
22355 }
22356
22357 /* Return an rtx describing a return value of MODE as a PARALLEL
22358 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
22359 stride REG_STRIDE. */
22360
22361 static rtx
22362 rs6000_parallel_return (machine_mode mode,
22363 int n_elts, machine_mode elt_mode,
22364 unsigned int regno, unsigned int reg_stride)
22365 {
22366 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
22367
22368 int i;
22369 for (i = 0; i < n_elts; i++)
22370 {
22371 rtx r = gen_rtx_REG (elt_mode, regno);
22372 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
22373 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
22374 regno += reg_stride;
22375 }
22376
22377 return par;
22378 }
22379
22380 /* Target hook for TARGET_FUNCTION_VALUE.
22381
22382 An integer value is in r3 and a floating-point value is in fp1,
22383 unless -msoft-float. */
22384
22385 static rtx
22386 rs6000_function_value (const_tree valtype,
22387 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
22388 bool outgoing ATTRIBUTE_UNUSED)
22389 {
22390 machine_mode mode;
22391 unsigned int regno;
22392 machine_mode elt_mode;
22393 int n_elts;
22394
22395 /* Special handling for structs in darwin64. */
22396 if (TARGET_MACHO
22397 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
22398 {
22399 CUMULATIVE_ARGS valcum;
22400 rtx valret;
22401
22402 valcum.words = 0;
22403 valcum.fregno = FP_ARG_MIN_REG;
22404 valcum.vregno = ALTIVEC_ARG_MIN_REG;
22405 /* Do a trial code generation as if this were going to be passed as
22406 an argument; if any part goes in memory, we return NULL. */
22407 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
22408 if (valret)
22409 return valret;
22410 /* Otherwise fall through to standard ABI rules. */
22411 }
22412
22413 mode = TYPE_MODE (valtype);
22414
22415 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
22416 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
22417 {
22418 int first_reg, n_regs;
22419
22420 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
22421 {
22422 /* _Decimal128 must use even/odd register pairs. */
22423 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22424 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
22425 }
22426 else
22427 {
22428 first_reg = ALTIVEC_ARG_RETURN;
22429 n_regs = 1;
22430 }
22431
22432 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
22433 }
22434
22435 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
22436 if (TARGET_32BIT && TARGET_POWERPC64)
22437 switch (mode)
22438 {
22439 default:
22440 break;
22441 case E_DImode:
22442 case E_SCmode:
22443 case E_DCmode:
22444 case E_TCmode:
22445 int count = GET_MODE_SIZE (mode) / 4;
22446 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
22447 }
22448
22449 if ((INTEGRAL_TYPE_P (valtype)
22450 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
22451 || POINTER_TYPE_P (valtype))
22452 mode = TARGET_32BIT ? SImode : DImode;
22453
22454 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22455 /* _Decimal128 must use an even/odd register pair. */
22456 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22457 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
22458 && !FLOAT128_VECTOR_P (mode))
22459 regno = FP_ARG_RETURN;
22460 else if (TREE_CODE (valtype) == COMPLEX_TYPE
22461 && targetm.calls.split_complex_arg)
22462 return rs6000_complex_function_value (mode);
22463 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
22464 return register is used in both cases, and we won't see V2DImode/V2DFmode
22465 for pure altivec, combine the two cases. */
22466 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
22467 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
22468 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
22469 regno = ALTIVEC_ARG_RETURN;
22470 else
22471 regno = GP_ARG_RETURN;
22472
22473 return gen_rtx_REG (mode, regno);
22474 }
22475
22476 /* Define how to find the value returned by a library function
22477 assuming the value has mode MODE. */
22478 rtx
22479 rs6000_libcall_value (machine_mode mode)
22480 {
22481 unsigned int regno;
22482
22483 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
22484 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
22485 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
22486
22487 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22488 /* _Decimal128 must use an even/odd register pair. */
22489 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22490 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
22491 regno = FP_ARG_RETURN;
22492 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
22493 return register is used in both cases, and we won't see V2DImode/V2DFmode
22494 for pure altivec, combine the two cases. */
22495 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
22496 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
22497 regno = ALTIVEC_ARG_RETURN;
22498 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
22499 return rs6000_complex_function_value (mode);
22500 else
22501 regno = GP_ARG_RETURN;
22502
22503 return gen_rtx_REG (mode, regno);
22504 }
22505
22506 /* Compute register pressure classes. We implement the target hook to avoid
22507 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
22508 lead to incorrect estimates of number of available registers and therefor
22509 increased register pressure/spill. */
22510 static int
22511 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
22512 {
22513 int n;
22514
22515 n = 0;
22516 pressure_classes[n++] = GENERAL_REGS;
22517 if (TARGET_VSX)
22518 pressure_classes[n++] = VSX_REGS;
22519 else
22520 {
22521 if (TARGET_ALTIVEC)
22522 pressure_classes[n++] = ALTIVEC_REGS;
22523 if (TARGET_HARD_FLOAT)
22524 pressure_classes[n++] = FLOAT_REGS;
22525 }
22526 pressure_classes[n++] = CR_REGS;
22527 pressure_classes[n++] = SPECIAL_REGS;
22528
22529 return n;
22530 }
22531
22532 /* Given FROM and TO register numbers, say whether this elimination is allowed.
22533 Frame pointer elimination is automatically handled.
22534
22535 For the RS/6000, if frame pointer elimination is being done, we would like
22536 to convert ap into fp, not sp.
22537
22538 We need r30 if -mminimal-toc was specified, and there are constant pool
22539 references. */
22540
22541 static bool
22542 rs6000_can_eliminate (const int from, const int to)
22543 {
22544 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
22545 ? ! frame_pointer_needed
22546 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
22547 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC_OR_PCREL
22548 || constant_pool_empty_p ()
22549 : true);
22550 }
22551
22552 /* Define the offset between two registers, FROM to be eliminated and its
22553 replacement TO, at the start of a routine. */
22554 HOST_WIDE_INT
22555 rs6000_initial_elimination_offset (int from, int to)
22556 {
22557 rs6000_stack_t *info = rs6000_stack_info ();
22558 HOST_WIDE_INT offset;
22559
22560 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22561 offset = info->push_p ? 0 : -info->total_size;
22562 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22563 {
22564 offset = info->push_p ? 0 : -info->total_size;
22565 if (FRAME_GROWS_DOWNWARD)
22566 offset += info->fixed_size + info->vars_size + info->parm_size;
22567 }
22568 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
22569 offset = FRAME_GROWS_DOWNWARD
22570 ? info->fixed_size + info->vars_size + info->parm_size
22571 : 0;
22572 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
22573 offset = info->total_size;
22574 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22575 offset = info->push_p ? info->total_size : 0;
22576 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
22577 offset = 0;
22578 else
22579 gcc_unreachable ();
22580
22581 return offset;
22582 }
22583
22584 /* Fill in sizes of registers used by unwinder. */
22585
22586 static void
22587 rs6000_init_dwarf_reg_sizes_extra (tree address)
22588 {
22589 if (TARGET_MACHO && ! TARGET_ALTIVEC)
22590 {
22591 int i;
22592 machine_mode mode = TYPE_MODE (char_type_node);
22593 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
22594 rtx mem = gen_rtx_MEM (BLKmode, addr);
22595 rtx value = gen_int_mode (16, mode);
22596
22597 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
22598 The unwinder still needs to know the size of Altivec registers. */
22599
22600 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
22601 {
22602 int column = DWARF_REG_TO_UNWIND_COLUMN
22603 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
22604 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
22605
22606 emit_move_insn (adjust_address (mem, mode, offset), value);
22607 }
22608 }
22609 }
22610
22611 /* Map internal gcc register numbers to debug format register numbers.
22612 FORMAT specifies the type of debug register number to use:
22613 0 -- debug information, except for frame-related sections
22614 1 -- DWARF .debug_frame section
22615 2 -- DWARF .eh_frame section */
22616
22617 unsigned int
22618 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
22619 {
22620 /* On some platforms, we use the standard DWARF register
22621 numbering for .debug_info and .debug_frame. */
22622 if ((format == 0 && write_symbols == DWARF2_DEBUG) || format == 1)
22623 {
22624 #ifdef RS6000_USE_DWARF_NUMBERING
22625 if (regno <= 31)
22626 return regno;
22627 if (FP_REGNO_P (regno))
22628 return regno - FIRST_FPR_REGNO + 32;
22629 if (ALTIVEC_REGNO_P (regno))
22630 return regno - FIRST_ALTIVEC_REGNO + 1124;
22631 if (regno == LR_REGNO)
22632 return 108;
22633 if (regno == CTR_REGNO)
22634 return 109;
22635 if (regno == CA_REGNO)
22636 return 101; /* XER */
22637 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
22638 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
22639 The actual code emitted saves the whole of CR, so we map CR2_REGNO
22640 to the DWARF reg for CR. */
22641 if (format == 1 && regno == CR2_REGNO)
22642 return 64;
22643 if (CR_REGNO_P (regno))
22644 return regno - CR0_REGNO + 86;
22645 if (regno == VRSAVE_REGNO)
22646 return 356;
22647 if (regno == VSCR_REGNO)
22648 return 67;
22649
22650 /* These do not make much sense. */
22651 if (regno == FRAME_POINTER_REGNUM)
22652 return 111;
22653 if (regno == ARG_POINTER_REGNUM)
22654 return 67;
22655 if (regno == 64)
22656 return 100;
22657
22658 gcc_unreachable ();
22659 #endif
22660 }
22661
22662 /* We use the GCC 7 (and before) internal number for non-DWARF debug
22663 information, and also for .eh_frame. */
22664 /* Translate the regnos to their numbers in GCC 7 (and before). */
22665 if (regno <= 31)
22666 return regno;
22667 if (FP_REGNO_P (regno))
22668 return regno - FIRST_FPR_REGNO + 32;
22669 if (ALTIVEC_REGNO_P (regno))
22670 return regno - FIRST_ALTIVEC_REGNO + 77;
22671 if (regno == LR_REGNO)
22672 return 65;
22673 if (regno == CTR_REGNO)
22674 return 66;
22675 if (regno == CA_REGNO)
22676 return 76; /* XER */
22677 if (CR_REGNO_P (regno))
22678 return regno - CR0_REGNO + 68;
22679 if (regno == VRSAVE_REGNO)
22680 return 109;
22681 if (regno == VSCR_REGNO)
22682 return 110;
22683
22684 if (regno == FRAME_POINTER_REGNUM)
22685 return 111;
22686 if (regno == ARG_POINTER_REGNUM)
22687 return 67;
22688 if (regno == 64)
22689 return 64;
22690
22691 gcc_unreachable ();
22692 }
22693
22694 /* target hook eh_return_filter_mode */
22695 static scalar_int_mode
22696 rs6000_eh_return_filter_mode (void)
22697 {
22698 return TARGET_32BIT ? SImode : word_mode;
22699 }
22700
22701 /* Target hook for translate_mode_attribute. */
22702 static machine_mode
22703 rs6000_translate_mode_attribute (machine_mode mode)
22704 {
22705 if ((FLOAT128_IEEE_P (mode)
22706 && ieee128_float_type_node == long_double_type_node)
22707 || (FLOAT128_IBM_P (mode)
22708 && ibm128_float_type_node == long_double_type_node))
22709 return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
22710 return mode;
22711 }
22712
22713 /* Target hook for scalar_mode_supported_p. */
22714 static bool
22715 rs6000_scalar_mode_supported_p (scalar_mode mode)
22716 {
22717 /* -m32 does not support TImode. This is the default, from
22718 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
22719 same ABI as for -m32. But default_scalar_mode_supported_p allows
22720 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
22721 for -mpowerpc64. */
22722 if (TARGET_32BIT && mode == TImode)
22723 return false;
22724
22725 if (DECIMAL_FLOAT_MODE_P (mode))
22726 return default_decimal_float_supported_p ();
22727 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
22728 return true;
22729 else
22730 return default_scalar_mode_supported_p (mode);
22731 }
22732
22733 /* Target hook for vector_mode_supported_p. */
22734 static bool
22735 rs6000_vector_mode_supported_p (machine_mode mode)
22736 {
22737 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
22738 128-bit, the compiler might try to widen IEEE 128-bit to IBM
22739 double-double. */
22740 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
22741 return true;
22742
22743 else
22744 return false;
22745 }
22746
22747 /* Target hook for floatn_mode. */
22748 static opt_scalar_float_mode
22749 rs6000_floatn_mode (int n, bool extended)
22750 {
22751 if (extended)
22752 {
22753 switch (n)
22754 {
22755 case 32:
22756 return DFmode;
22757
22758 case 64:
22759 if (TARGET_FLOAT128_TYPE)
22760 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
22761 else
22762 return opt_scalar_float_mode ();
22763
22764 case 128:
22765 return opt_scalar_float_mode ();
22766
22767 default:
22768 /* Those are the only valid _FloatNx types. */
22769 gcc_unreachable ();
22770 }
22771 }
22772 else
22773 {
22774 switch (n)
22775 {
22776 case 32:
22777 return SFmode;
22778
22779 case 64:
22780 return DFmode;
22781
22782 case 128:
22783 if (TARGET_FLOAT128_TYPE)
22784 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
22785 else
22786 return opt_scalar_float_mode ();
22787
22788 default:
22789 return opt_scalar_float_mode ();
22790 }
22791 }
22792
22793 }
22794
22795 /* Target hook for c_mode_for_suffix. */
22796 static machine_mode
22797 rs6000_c_mode_for_suffix (char suffix)
22798 {
22799 if (TARGET_FLOAT128_TYPE)
22800 {
22801 if (suffix == 'q' || suffix == 'Q')
22802 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
22803
22804 /* At the moment, we are not defining a suffix for IBM extended double.
22805 If/when the default for -mabi=ieeelongdouble is changed, and we want
22806 to support __ibm128 constants in legacy library code, we may need to
22807 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
22808 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
22809 __float80 constants. */
22810 }
22811
22812 return VOIDmode;
22813 }
22814
22815 /* Target hook for invalid_arg_for_unprototyped_fn. */
22816 static const char *
22817 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
22818 {
22819 return (!rs6000_darwin64_abi
22820 && typelist == 0
22821 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
22822 && (funcdecl == NULL_TREE
22823 || (TREE_CODE (funcdecl) == FUNCTION_DECL
22824 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
22825 ? N_("AltiVec argument passed to unprototyped function")
22826 : NULL;
22827 }
22828
22829 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
22830 setup by using __stack_chk_fail_local hidden function instead of
22831 calling __stack_chk_fail directly. Otherwise it is better to call
22832 __stack_chk_fail directly. */
22833
22834 static tree ATTRIBUTE_UNUSED
22835 rs6000_stack_protect_fail (void)
22836 {
22837 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
22838 ? default_hidden_stack_protect_fail ()
22839 : default_external_stack_protect_fail ();
22840 }
22841
22842 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
22843
22844 #if TARGET_ELF
22845 static unsigned HOST_WIDE_INT
22846 rs6000_asan_shadow_offset (void)
22847 {
22848 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
22849 }
22850 #endif
22851 \f
22852 /* Mask options that we want to support inside of attribute((target)) and
22853 #pragma GCC target operations. Note, we do not include things like
22854 64/32-bit, endianness, hard/soft floating point, etc. that would have
22855 different calling sequences. */
22856
22857 struct rs6000_opt_mask {
22858 const char *name; /* option name */
22859 HOST_WIDE_INT mask; /* mask to set */
22860 bool invert; /* invert sense of mask */
22861 bool valid_target; /* option is a target option */
22862 };
22863
22864 static struct rs6000_opt_mask const rs6000_opt_masks[] =
22865 {
22866 { "altivec", OPTION_MASK_ALTIVEC, false, true },
22867 { "cmpb", OPTION_MASK_CMPB, false, true },
22868 { "crypto", OPTION_MASK_CRYPTO, false, true },
22869 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
22870 { "dlmzb", OPTION_MASK_DLMZB, false, true },
22871 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
22872 false, true },
22873 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
22874 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
22875 { "fprnd", OPTION_MASK_FPRND, false, true },
22876 { "future", OPTION_MASK_FUTURE, false, true },
22877 { "hard-dfp", OPTION_MASK_DFP, false, true },
22878 { "htm", OPTION_MASK_HTM, false, true },
22879 { "isel", OPTION_MASK_ISEL, false, true },
22880 { "mfcrf", OPTION_MASK_MFCRF, false, true },
22881 { "mfpgpr", 0, false, true },
22882 { "modulo", OPTION_MASK_MODULO, false, true },
22883 { "mulhw", OPTION_MASK_MULHW, false, true },
22884 { "multiple", OPTION_MASK_MULTIPLE, false, true },
22885 { "pcrel", OPTION_MASK_PCREL, false, true },
22886 { "popcntb", OPTION_MASK_POPCNTB, false, true },
22887 { "popcntd", OPTION_MASK_POPCNTD, false, true },
22888 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
22889 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
22890 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
22891 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
22892 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
22893 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
22894 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
22895 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
22896 { "prefixed", OPTION_MASK_PREFIXED, false, true },
22897 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
22898 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
22899 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
22900 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
22901 { "string", 0, false, true },
22902 { "update", OPTION_MASK_NO_UPDATE, true , true },
22903 { "vsx", OPTION_MASK_VSX, false, true },
22904 #ifdef OPTION_MASK_64BIT
22905 #if TARGET_AIX_OS
22906 { "aix64", OPTION_MASK_64BIT, false, false },
22907 { "aix32", OPTION_MASK_64BIT, true, false },
22908 #else
22909 { "64", OPTION_MASK_64BIT, false, false },
22910 { "32", OPTION_MASK_64BIT, true, false },
22911 #endif
22912 #endif
22913 #ifdef OPTION_MASK_EABI
22914 { "eabi", OPTION_MASK_EABI, false, false },
22915 #endif
22916 #ifdef OPTION_MASK_LITTLE_ENDIAN
22917 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
22918 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
22919 #endif
22920 #ifdef OPTION_MASK_RELOCATABLE
22921 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
22922 #endif
22923 #ifdef OPTION_MASK_STRICT_ALIGN
22924 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
22925 #endif
22926 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
22927 { "string", 0, false, false },
22928 };
22929
22930 /* Builtin mask mapping for printing the flags. */
22931 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
22932 {
22933 { "altivec", RS6000_BTM_ALTIVEC, false, false },
22934 { "vsx", RS6000_BTM_VSX, false, false },
22935 { "fre", RS6000_BTM_FRE, false, false },
22936 { "fres", RS6000_BTM_FRES, false, false },
22937 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
22938 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
22939 { "popcntd", RS6000_BTM_POPCNTD, false, false },
22940 { "cell", RS6000_BTM_CELL, false, false },
22941 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
22942 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
22943 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
22944 { "crypto", RS6000_BTM_CRYPTO, false, false },
22945 { "htm", RS6000_BTM_HTM, false, false },
22946 { "hard-dfp", RS6000_BTM_DFP, false, false },
22947 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
22948 { "long-double-128", RS6000_BTM_LDBL128, false, false },
22949 { "powerpc64", RS6000_BTM_POWERPC64, false, false },
22950 { "float128", RS6000_BTM_FLOAT128, false, false },
22951 { "float128-hw", RS6000_BTM_FLOAT128_HW,false, false },
22952 };
22953
22954 /* Option variables that we want to support inside attribute((target)) and
22955 #pragma GCC target operations. */
22956
22957 struct rs6000_opt_var {
22958 const char *name; /* option name */
22959 size_t global_offset; /* offset of the option in global_options. */
22960 size_t target_offset; /* offset of the option in target options. */
22961 };
22962
22963 static struct rs6000_opt_var const rs6000_opt_vars[] =
22964 {
22965 { "friz",
22966 offsetof (struct gcc_options, x_TARGET_FRIZ),
22967 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
22968 { "avoid-indexed-addresses",
22969 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
22970 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
22971 { "longcall",
22972 offsetof (struct gcc_options, x_rs6000_default_long_calls),
22973 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
22974 { "optimize-swaps",
22975 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
22976 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
22977 { "allow-movmisalign",
22978 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
22979 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
22980 { "sched-groups",
22981 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
22982 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
22983 { "always-hint",
22984 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
22985 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
22986 { "align-branch-targets",
22987 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
22988 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
22989 { "sched-prolog",
22990 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
22991 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
22992 { "sched-epilog",
22993 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
22994 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
22995 { "speculate-indirect-jumps",
22996 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
22997 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
22998 };
22999
23000 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
23001 parsing. Return true if there were no errors. */
23002
23003 static bool
23004 rs6000_inner_target_options (tree args, bool attr_p)
23005 {
23006 bool ret = true;
23007
23008 if (args == NULL_TREE)
23009 ;
23010
23011 else if (TREE_CODE (args) == STRING_CST)
23012 {
23013 char *p = ASTRDUP (TREE_STRING_POINTER (args));
23014 char *q;
23015
23016 while ((q = strtok (p, ",")) != NULL)
23017 {
23018 bool error_p = false;
23019 bool not_valid_p = false;
23020 const char *cpu_opt = NULL;
23021
23022 p = NULL;
23023 if (strncmp (q, "cpu=", 4) == 0)
23024 {
23025 int cpu_index = rs6000_cpu_name_lookup (q+4);
23026 if (cpu_index >= 0)
23027 rs6000_cpu_index = cpu_index;
23028 else
23029 {
23030 error_p = true;
23031 cpu_opt = q+4;
23032 }
23033 }
23034 else if (strncmp (q, "tune=", 5) == 0)
23035 {
23036 int tune_index = rs6000_cpu_name_lookup (q+5);
23037 if (tune_index >= 0)
23038 rs6000_tune_index = tune_index;
23039 else
23040 {
23041 error_p = true;
23042 cpu_opt = q+5;
23043 }
23044 }
23045 else
23046 {
23047 size_t i;
23048 bool invert = false;
23049 char *r = q;
23050
23051 error_p = true;
23052 if (strncmp (r, "no-", 3) == 0)
23053 {
23054 invert = true;
23055 r += 3;
23056 }
23057
23058 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
23059 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
23060 {
23061 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
23062
23063 if (!rs6000_opt_masks[i].valid_target)
23064 not_valid_p = true;
23065 else
23066 {
23067 error_p = false;
23068 rs6000_isa_flags_explicit |= mask;
23069
23070 /* VSX needs altivec, so -mvsx automagically sets
23071 altivec and disables -mavoid-indexed-addresses. */
23072 if (!invert)
23073 {
23074 if (mask == OPTION_MASK_VSX)
23075 {
23076 mask |= OPTION_MASK_ALTIVEC;
23077 TARGET_AVOID_XFORM = 0;
23078 }
23079 }
23080
23081 if (rs6000_opt_masks[i].invert)
23082 invert = !invert;
23083
23084 if (invert)
23085 rs6000_isa_flags &= ~mask;
23086 else
23087 rs6000_isa_flags |= mask;
23088 }
23089 break;
23090 }
23091
23092 if (error_p && !not_valid_p)
23093 {
23094 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
23095 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
23096 {
23097 size_t j = rs6000_opt_vars[i].global_offset;
23098 *((int *) ((char *)&global_options + j)) = !invert;
23099 error_p = false;
23100 not_valid_p = false;
23101 break;
23102 }
23103 }
23104 }
23105
23106 if (error_p)
23107 {
23108 const char *eprefix, *esuffix;
23109
23110 ret = false;
23111 if (attr_p)
23112 {
23113 eprefix = "__attribute__((__target__(";
23114 esuffix = ")))";
23115 }
23116 else
23117 {
23118 eprefix = "#pragma GCC target ";
23119 esuffix = "";
23120 }
23121
23122 if (cpu_opt)
23123 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
23124 q, esuffix);
23125 else if (not_valid_p)
23126 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
23127 else
23128 error ("%s%qs%s is invalid", eprefix, q, esuffix);
23129 }
23130 }
23131 }
23132
23133 else if (TREE_CODE (args) == TREE_LIST)
23134 {
23135 do
23136 {
23137 tree value = TREE_VALUE (args);
23138 if (value)
23139 {
23140 bool ret2 = rs6000_inner_target_options (value, attr_p);
23141 if (!ret2)
23142 ret = false;
23143 }
23144 args = TREE_CHAIN (args);
23145 }
23146 while (args != NULL_TREE);
23147 }
23148
23149 else
23150 {
23151 error ("attribute %<target%> argument not a string");
23152 return false;
23153 }
23154
23155 return ret;
23156 }
23157
23158 /* Print out the target options as a list for -mdebug=target. */
23159
23160 static void
23161 rs6000_debug_target_options (tree args, const char *prefix)
23162 {
23163 if (args == NULL_TREE)
23164 fprintf (stderr, "%s<NULL>", prefix);
23165
23166 else if (TREE_CODE (args) == STRING_CST)
23167 {
23168 char *p = ASTRDUP (TREE_STRING_POINTER (args));
23169 char *q;
23170
23171 while ((q = strtok (p, ",")) != NULL)
23172 {
23173 p = NULL;
23174 fprintf (stderr, "%s\"%s\"", prefix, q);
23175 prefix = ", ";
23176 }
23177 }
23178
23179 else if (TREE_CODE (args) == TREE_LIST)
23180 {
23181 do
23182 {
23183 tree value = TREE_VALUE (args);
23184 if (value)
23185 {
23186 rs6000_debug_target_options (value, prefix);
23187 prefix = ", ";
23188 }
23189 args = TREE_CHAIN (args);
23190 }
23191 while (args != NULL_TREE);
23192 }
23193
23194 else
23195 gcc_unreachable ();
23196
23197 return;
23198 }
23199
23200 \f
23201 /* Hook to validate attribute((target("..."))). */
23202
23203 static bool
23204 rs6000_valid_attribute_p (tree fndecl,
23205 tree ARG_UNUSED (name),
23206 tree args,
23207 int flags)
23208 {
23209 struct cl_target_option cur_target;
23210 bool ret;
23211 tree old_optimize;
23212 tree new_target, new_optimize;
23213 tree func_optimize;
23214
23215 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
23216
23217 if (TARGET_DEBUG_TARGET)
23218 {
23219 tree tname = DECL_NAME (fndecl);
23220 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
23221 if (tname)
23222 fprintf (stderr, "function: %.*s\n",
23223 (int) IDENTIFIER_LENGTH (tname),
23224 IDENTIFIER_POINTER (tname));
23225 else
23226 fprintf (stderr, "function: unknown\n");
23227
23228 fprintf (stderr, "args:");
23229 rs6000_debug_target_options (args, " ");
23230 fprintf (stderr, "\n");
23231
23232 if (flags)
23233 fprintf (stderr, "flags: 0x%x\n", flags);
23234
23235 fprintf (stderr, "--------------------\n");
23236 }
23237
23238 /* attribute((target("default"))) does nothing, beyond
23239 affecting multi-versioning. */
23240 if (TREE_VALUE (args)
23241 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
23242 && TREE_CHAIN (args) == NULL_TREE
23243 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
23244 return true;
23245
23246 old_optimize = build_optimization_node (&global_options);
23247 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
23248
23249 /* If the function changed the optimization levels as well as setting target
23250 options, start with the optimizations specified. */
23251 if (func_optimize && func_optimize != old_optimize)
23252 cl_optimization_restore (&global_options,
23253 TREE_OPTIMIZATION (func_optimize));
23254
23255 /* The target attributes may also change some optimization flags, so update
23256 the optimization options if necessary. */
23257 cl_target_option_save (&cur_target, &global_options);
23258 rs6000_cpu_index = rs6000_tune_index = -1;
23259 ret = rs6000_inner_target_options (args, true);
23260
23261 /* Set up any additional state. */
23262 if (ret)
23263 {
23264 ret = rs6000_option_override_internal (false);
23265 new_target = build_target_option_node (&global_options);
23266 }
23267 else
23268 new_target = NULL;
23269
23270 new_optimize = build_optimization_node (&global_options);
23271
23272 if (!new_target)
23273 ret = false;
23274
23275 else if (fndecl)
23276 {
23277 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
23278
23279 if (old_optimize != new_optimize)
23280 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
23281 }
23282
23283 cl_target_option_restore (&global_options, &cur_target);
23284
23285 if (old_optimize != new_optimize)
23286 cl_optimization_restore (&global_options,
23287 TREE_OPTIMIZATION (old_optimize));
23288
23289 return ret;
23290 }
23291
23292 \f
23293 /* Hook to validate the current #pragma GCC target and set the state, and
23294 update the macros based on what was changed. If ARGS is NULL, then
23295 POP_TARGET is used to reset the options. */
23296
23297 bool
23298 rs6000_pragma_target_parse (tree args, tree pop_target)
23299 {
23300 tree prev_tree = build_target_option_node (&global_options);
23301 tree cur_tree;
23302 struct cl_target_option *prev_opt, *cur_opt;
23303 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
23304 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
23305
23306 if (TARGET_DEBUG_TARGET)
23307 {
23308 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
23309 fprintf (stderr, "args:");
23310 rs6000_debug_target_options (args, " ");
23311 fprintf (stderr, "\n");
23312
23313 if (pop_target)
23314 {
23315 fprintf (stderr, "pop_target:\n");
23316 debug_tree (pop_target);
23317 }
23318 else
23319 fprintf (stderr, "pop_target: <NULL>\n");
23320
23321 fprintf (stderr, "--------------------\n");
23322 }
23323
23324 if (! args)
23325 {
23326 cur_tree = ((pop_target)
23327 ? pop_target
23328 : target_option_default_node);
23329 cl_target_option_restore (&global_options,
23330 TREE_TARGET_OPTION (cur_tree));
23331 }
23332 else
23333 {
23334 rs6000_cpu_index = rs6000_tune_index = -1;
23335 if (!rs6000_inner_target_options (args, false)
23336 || !rs6000_option_override_internal (false)
23337 || (cur_tree = build_target_option_node (&global_options))
23338 == NULL_TREE)
23339 {
23340 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
23341 fprintf (stderr, "invalid pragma\n");
23342
23343 return false;
23344 }
23345 }
23346
23347 target_option_current_node = cur_tree;
23348 rs6000_activate_target_options (target_option_current_node);
23349
23350 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
23351 change the macros that are defined. */
23352 if (rs6000_target_modify_macros_ptr)
23353 {
23354 prev_opt = TREE_TARGET_OPTION (prev_tree);
23355 prev_bumask = prev_opt->x_rs6000_builtin_mask;
23356 prev_flags = prev_opt->x_rs6000_isa_flags;
23357
23358 cur_opt = TREE_TARGET_OPTION (cur_tree);
23359 cur_flags = cur_opt->x_rs6000_isa_flags;
23360 cur_bumask = cur_opt->x_rs6000_builtin_mask;
23361
23362 diff_bumask = (prev_bumask ^ cur_bumask);
23363 diff_flags = (prev_flags ^ cur_flags);
23364
23365 if ((diff_flags != 0) || (diff_bumask != 0))
23366 {
23367 /* Delete old macros. */
23368 rs6000_target_modify_macros_ptr (false,
23369 prev_flags & diff_flags,
23370 prev_bumask & diff_bumask);
23371
23372 /* Define new macros. */
23373 rs6000_target_modify_macros_ptr (true,
23374 cur_flags & diff_flags,
23375 cur_bumask & diff_bumask);
23376 }
23377 }
23378
23379 return true;
23380 }
23381
23382 \f
23383 /* Remember the last target of rs6000_set_current_function. */
23384 static GTY(()) tree rs6000_previous_fndecl;
23385
23386 /* Restore target's globals from NEW_TREE and invalidate the
23387 rs6000_previous_fndecl cache. */
23388
23389 void
23390 rs6000_activate_target_options (tree new_tree)
23391 {
23392 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
23393 if (TREE_TARGET_GLOBALS (new_tree))
23394 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
23395 else if (new_tree == target_option_default_node)
23396 restore_target_globals (&default_target_globals);
23397 else
23398 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
23399 rs6000_previous_fndecl = NULL_TREE;
23400 }
23401
23402 /* Establish appropriate back-end context for processing the function
23403 FNDECL. The argument might be NULL to indicate processing at top
23404 level, outside of any function scope. */
23405 static void
23406 rs6000_set_current_function (tree fndecl)
23407 {
23408 if (TARGET_DEBUG_TARGET)
23409 {
23410 fprintf (stderr, "\n==================== rs6000_set_current_function");
23411
23412 if (fndecl)
23413 fprintf (stderr, ", fndecl %s (%p)",
23414 (DECL_NAME (fndecl)
23415 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
23416 : "<unknown>"), (void *)fndecl);
23417
23418 if (rs6000_previous_fndecl)
23419 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
23420
23421 fprintf (stderr, "\n");
23422 }
23423
23424 /* Only change the context if the function changes. This hook is called
23425 several times in the course of compiling a function, and we don't want to
23426 slow things down too much or call target_reinit when it isn't safe. */
23427 if (fndecl == rs6000_previous_fndecl)
23428 return;
23429
23430 tree old_tree;
23431 if (rs6000_previous_fndecl == NULL_TREE)
23432 old_tree = target_option_current_node;
23433 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
23434 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
23435 else
23436 old_tree = target_option_default_node;
23437
23438 tree new_tree;
23439 if (fndecl == NULL_TREE)
23440 {
23441 if (old_tree != target_option_current_node)
23442 new_tree = target_option_current_node;
23443 else
23444 new_tree = NULL_TREE;
23445 }
23446 else
23447 {
23448 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
23449 if (new_tree == NULL_TREE)
23450 new_tree = target_option_default_node;
23451 }
23452
23453 if (TARGET_DEBUG_TARGET)
23454 {
23455 if (new_tree)
23456 {
23457 fprintf (stderr, "\nnew fndecl target specific options:\n");
23458 debug_tree (new_tree);
23459 }
23460
23461 if (old_tree)
23462 {
23463 fprintf (stderr, "\nold fndecl target specific options:\n");
23464 debug_tree (old_tree);
23465 }
23466
23467 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
23468 fprintf (stderr, "--------------------\n");
23469 }
23470
23471 if (new_tree && old_tree != new_tree)
23472 rs6000_activate_target_options (new_tree);
23473
23474 if (fndecl)
23475 rs6000_previous_fndecl = fndecl;
23476 }
23477
23478 \f
23479 /* Save the current options */
23480
23481 static void
23482 rs6000_function_specific_save (struct cl_target_option *ptr,
23483 struct gcc_options *opts)
23484 {
23485 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
23486 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
23487 }
23488
23489 /* Restore the current options */
23490
23491 static void
23492 rs6000_function_specific_restore (struct gcc_options *opts,
23493 struct cl_target_option *ptr)
23494
23495 {
23496 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
23497 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
23498 (void) rs6000_option_override_internal (false);
23499 }
23500
23501 /* Print the current options */
23502
23503 static void
23504 rs6000_function_specific_print (FILE *file, int indent,
23505 struct cl_target_option *ptr)
23506 {
23507 rs6000_print_isa_options (file, indent, "Isa options set",
23508 ptr->x_rs6000_isa_flags);
23509
23510 rs6000_print_isa_options (file, indent, "Isa options explicit",
23511 ptr->x_rs6000_isa_flags_explicit);
23512 }
23513
23514 /* Helper function to print the current isa or misc options on a line. */
23515
23516 static void
23517 rs6000_print_options_internal (FILE *file,
23518 int indent,
23519 const char *string,
23520 HOST_WIDE_INT flags,
23521 const char *prefix,
23522 const struct rs6000_opt_mask *opts,
23523 size_t num_elements)
23524 {
23525 size_t i;
23526 size_t start_column = 0;
23527 size_t cur_column;
23528 size_t max_column = 120;
23529 size_t prefix_len = strlen (prefix);
23530 size_t comma_len = 0;
23531 const char *comma = "";
23532
23533 if (indent)
23534 start_column += fprintf (file, "%*s", indent, "");
23535
23536 if (!flags)
23537 {
23538 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
23539 return;
23540 }
23541
23542 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
23543
23544 /* Print the various mask options. */
23545 cur_column = start_column;
23546 for (i = 0; i < num_elements; i++)
23547 {
23548 bool invert = opts[i].invert;
23549 const char *name = opts[i].name;
23550 const char *no_str = "";
23551 HOST_WIDE_INT mask = opts[i].mask;
23552 size_t len = comma_len + prefix_len + strlen (name);
23553
23554 if (!invert)
23555 {
23556 if ((flags & mask) == 0)
23557 {
23558 no_str = "no-";
23559 len += strlen ("no-");
23560 }
23561
23562 flags &= ~mask;
23563 }
23564
23565 else
23566 {
23567 if ((flags & mask) != 0)
23568 {
23569 no_str = "no-";
23570 len += strlen ("no-");
23571 }
23572
23573 flags |= mask;
23574 }
23575
23576 cur_column += len;
23577 if (cur_column > max_column)
23578 {
23579 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
23580 cur_column = start_column + len;
23581 comma = "";
23582 }
23583
23584 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
23585 comma = ", ";
23586 comma_len = strlen (", ");
23587 }
23588
23589 fputs ("\n", file);
23590 }
23591
23592 /* Helper function to print the current isa options on a line. */
23593
23594 static void
23595 rs6000_print_isa_options (FILE *file, int indent, const char *string,
23596 HOST_WIDE_INT flags)
23597 {
23598 rs6000_print_options_internal (file, indent, string, flags, "-m",
23599 &rs6000_opt_masks[0],
23600 ARRAY_SIZE (rs6000_opt_masks));
23601 }
23602
23603 static void
23604 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
23605 HOST_WIDE_INT flags)
23606 {
23607 rs6000_print_options_internal (file, indent, string, flags, "",
23608 &rs6000_builtin_mask_names[0],
23609 ARRAY_SIZE (rs6000_builtin_mask_names));
23610 }
23611
23612 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
23613 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
23614 -mupper-regs-df, etc.).
23615
23616 If the user used -mno-power8-vector, we need to turn off all of the implicit
23617 ISA 2.07 and 3.0 options that relate to the vector unit.
23618
23619 If the user used -mno-power9-vector, we need to turn off all of the implicit
23620 ISA 3.0 options that relate to the vector unit.
23621
23622 This function does not handle explicit options such as the user specifying
23623 -mdirect-move. These are handled in rs6000_option_override_internal, and
23624 the appropriate error is given if needed.
23625
23626 We return a mask of all of the implicit options that should not be enabled
23627 by default. */
23628
23629 static HOST_WIDE_INT
23630 rs6000_disable_incompatible_switches (void)
23631 {
23632 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
23633 size_t i, j;
23634
23635 static const struct {
23636 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
23637 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
23638 const char *const name; /* name of the switch. */
23639 } flags[] = {
23640 { OPTION_MASK_FUTURE, OTHER_FUTURE_MASKS, "future" },
23641 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
23642 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
23643 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
23644 { OPTION_MASK_ALTIVEC, OTHER_ALTIVEC_MASKS, "altivec" },
23645 };
23646
23647 for (i = 0; i < ARRAY_SIZE (flags); i++)
23648 {
23649 HOST_WIDE_INT no_flag = flags[i].no_flag;
23650
23651 if ((rs6000_isa_flags & no_flag) == 0
23652 && (rs6000_isa_flags_explicit & no_flag) != 0)
23653 {
23654 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
23655 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
23656 & rs6000_isa_flags
23657 & dep_flags);
23658
23659 if (set_flags)
23660 {
23661 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
23662 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
23663 {
23664 set_flags &= ~rs6000_opt_masks[j].mask;
23665 error ("%<-mno-%s%> turns off %<-m%s%>",
23666 flags[i].name,
23667 rs6000_opt_masks[j].name);
23668 }
23669
23670 gcc_assert (!set_flags);
23671 }
23672
23673 rs6000_isa_flags &= ~dep_flags;
23674 ignore_masks |= no_flag | dep_flags;
23675 }
23676 }
23677
23678 return ignore_masks;
23679 }
23680
23681 \f
23682 /* Helper function for printing the function name when debugging. */
23683
23684 static const char *
23685 get_decl_name (tree fn)
23686 {
23687 tree name;
23688
23689 if (!fn)
23690 return "<null>";
23691
23692 name = DECL_NAME (fn);
23693 if (!name)
23694 return "<no-name>";
23695
23696 return IDENTIFIER_POINTER (name);
23697 }
23698
23699 /* Return the clone id of the target we are compiling code for in a target
23700 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
23701 the priority list for the target clones (ordered from lowest to
23702 highest). */
23703
23704 static int
23705 rs6000_clone_priority (tree fndecl)
23706 {
23707 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
23708 HOST_WIDE_INT isa_masks;
23709 int ret = CLONE_DEFAULT;
23710 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
23711 const char *attrs_str = NULL;
23712
23713 attrs = TREE_VALUE (TREE_VALUE (attrs));
23714 attrs_str = TREE_STRING_POINTER (attrs);
23715
23716 /* Return priority zero for default function. Return the ISA needed for the
23717 function if it is not the default. */
23718 if (strcmp (attrs_str, "default") != 0)
23719 {
23720 if (fn_opts == NULL_TREE)
23721 fn_opts = target_option_default_node;
23722
23723 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
23724 isa_masks = rs6000_isa_flags;
23725 else
23726 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
23727
23728 for (ret = CLONE_MAX - 1; ret != 0; ret--)
23729 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
23730 break;
23731 }
23732
23733 if (TARGET_DEBUG_TARGET)
23734 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
23735 get_decl_name (fndecl), ret);
23736
23737 return ret;
23738 }
23739
23740 /* This compares the priority of target features in function DECL1 and DECL2.
23741 It returns positive value if DECL1 is higher priority, negative value if
23742 DECL2 is higher priority and 0 if they are the same. Note, priorities are
23743 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
23744
23745 static int
23746 rs6000_compare_version_priority (tree decl1, tree decl2)
23747 {
23748 int priority1 = rs6000_clone_priority (decl1);
23749 int priority2 = rs6000_clone_priority (decl2);
23750 int ret = priority1 - priority2;
23751
23752 if (TARGET_DEBUG_TARGET)
23753 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
23754 get_decl_name (decl1), get_decl_name (decl2), ret);
23755
23756 return ret;
23757 }
23758
23759 /* Make a dispatcher declaration for the multi-versioned function DECL.
23760 Calls to DECL function will be replaced with calls to the dispatcher
23761 by the front-end. Returns the decl of the dispatcher function. */
23762
23763 static tree
23764 rs6000_get_function_versions_dispatcher (void *decl)
23765 {
23766 tree fn = (tree) decl;
23767 struct cgraph_node *node = NULL;
23768 struct cgraph_node *default_node = NULL;
23769 struct cgraph_function_version_info *node_v = NULL;
23770 struct cgraph_function_version_info *first_v = NULL;
23771
23772 tree dispatch_decl = NULL;
23773
23774 struct cgraph_function_version_info *default_version_info = NULL;
23775 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
23776
23777 if (TARGET_DEBUG_TARGET)
23778 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
23779 get_decl_name (fn));
23780
23781 node = cgraph_node::get (fn);
23782 gcc_assert (node != NULL);
23783
23784 node_v = node->function_version ();
23785 gcc_assert (node_v != NULL);
23786
23787 if (node_v->dispatcher_resolver != NULL)
23788 return node_v->dispatcher_resolver;
23789
23790 /* Find the default version and make it the first node. */
23791 first_v = node_v;
23792 /* Go to the beginning of the chain. */
23793 while (first_v->prev != NULL)
23794 first_v = first_v->prev;
23795
23796 default_version_info = first_v;
23797 while (default_version_info != NULL)
23798 {
23799 const tree decl2 = default_version_info->this_node->decl;
23800 if (is_function_default_version (decl2))
23801 break;
23802 default_version_info = default_version_info->next;
23803 }
23804
23805 /* If there is no default node, just return NULL. */
23806 if (default_version_info == NULL)
23807 return NULL;
23808
23809 /* Make default info the first node. */
23810 if (first_v != default_version_info)
23811 {
23812 default_version_info->prev->next = default_version_info->next;
23813 if (default_version_info->next)
23814 default_version_info->next->prev = default_version_info->prev;
23815 first_v->prev = default_version_info;
23816 default_version_info->next = first_v;
23817 default_version_info->prev = NULL;
23818 }
23819
23820 default_node = default_version_info->this_node;
23821
23822 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
23823 error_at (DECL_SOURCE_LOCATION (default_node->decl),
23824 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
23825 "exports hardware capability bits");
23826 #else
23827
23828 if (targetm.has_ifunc_p ())
23829 {
23830 struct cgraph_function_version_info *it_v = NULL;
23831 struct cgraph_node *dispatcher_node = NULL;
23832 struct cgraph_function_version_info *dispatcher_version_info = NULL;
23833
23834 /* Right now, the dispatching is done via ifunc. */
23835 dispatch_decl = make_dispatcher_decl (default_node->decl);
23836
23837 dispatcher_node = cgraph_node::get_create (dispatch_decl);
23838 gcc_assert (dispatcher_node != NULL);
23839 dispatcher_node->dispatcher_function = 1;
23840 dispatcher_version_info
23841 = dispatcher_node->insert_new_function_version ();
23842 dispatcher_version_info->next = default_version_info;
23843 dispatcher_node->definition = 1;
23844
23845 /* Set the dispatcher for all the versions. */
23846 it_v = default_version_info;
23847 while (it_v != NULL)
23848 {
23849 it_v->dispatcher_resolver = dispatch_decl;
23850 it_v = it_v->next;
23851 }
23852 }
23853 else
23854 {
23855 error_at (DECL_SOURCE_LOCATION (default_node->decl),
23856 "multiversioning needs ifunc which is not supported "
23857 "on this target");
23858 }
23859 #endif
23860
23861 return dispatch_decl;
23862 }
23863
23864 /* Make the resolver function decl to dispatch the versions of a multi-
23865 versioned function, DEFAULT_DECL. Create an empty basic block in the
23866 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
23867 function. */
23868
23869 static tree
23870 make_resolver_func (const tree default_decl,
23871 const tree dispatch_decl,
23872 basic_block *empty_bb)
23873 {
23874 /* Make the resolver function static. The resolver function returns
23875 void *. */
23876 tree decl_name = clone_function_name (default_decl, "resolver");
23877 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
23878 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
23879 tree decl = build_fn_decl (resolver_name, type);
23880 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
23881
23882 DECL_NAME (decl) = decl_name;
23883 TREE_USED (decl) = 1;
23884 DECL_ARTIFICIAL (decl) = 1;
23885 DECL_IGNORED_P (decl) = 0;
23886 TREE_PUBLIC (decl) = 0;
23887 DECL_UNINLINABLE (decl) = 1;
23888
23889 /* Resolver is not external, body is generated. */
23890 DECL_EXTERNAL (decl) = 0;
23891 DECL_EXTERNAL (dispatch_decl) = 0;
23892
23893 DECL_CONTEXT (decl) = NULL_TREE;
23894 DECL_INITIAL (decl) = make_node (BLOCK);
23895 DECL_STATIC_CONSTRUCTOR (decl) = 0;
23896
23897 if (DECL_COMDAT_GROUP (default_decl)
23898 || TREE_PUBLIC (default_decl))
23899 {
23900 /* In this case, each translation unit with a call to this
23901 versioned function will put out a resolver. Ensure it
23902 is comdat to keep just one copy. */
23903 DECL_COMDAT (decl) = 1;
23904 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
23905 }
23906 else
23907 TREE_PUBLIC (dispatch_decl) = 0;
23908
23909 /* Build result decl and add to function_decl. */
23910 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
23911 DECL_CONTEXT (t) = decl;
23912 DECL_ARTIFICIAL (t) = 1;
23913 DECL_IGNORED_P (t) = 1;
23914 DECL_RESULT (decl) = t;
23915
23916 gimplify_function_tree (decl);
23917 push_cfun (DECL_STRUCT_FUNCTION (decl));
23918 *empty_bb = init_lowered_empty_function (decl, false,
23919 profile_count::uninitialized ());
23920
23921 cgraph_node::add_new_function (decl, true);
23922 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
23923
23924 pop_cfun ();
23925
23926 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
23927 DECL_ATTRIBUTES (dispatch_decl)
23928 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
23929
23930 cgraph_node::create_same_body_alias (dispatch_decl, decl);
23931
23932 return decl;
23933 }
23934
23935 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
23936 return a pointer to VERSION_DECL if we are running on a machine that
23937 supports the index CLONE_ISA hardware architecture bits. This function will
23938 be called during version dispatch to decide which function version to
23939 execute. It returns the basic block at the end, to which more conditions
23940 can be added. */
23941
23942 static basic_block
23943 add_condition_to_bb (tree function_decl, tree version_decl,
23944 int clone_isa, basic_block new_bb)
23945 {
23946 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
23947
23948 gcc_assert (new_bb != NULL);
23949 gimple_seq gseq = bb_seq (new_bb);
23950
23951
23952 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
23953 build_fold_addr_expr (version_decl));
23954 tree result_var = create_tmp_var (ptr_type_node);
23955 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
23956 gimple *return_stmt = gimple_build_return (result_var);
23957
23958 if (clone_isa == CLONE_DEFAULT)
23959 {
23960 gimple_seq_add_stmt (&gseq, convert_stmt);
23961 gimple_seq_add_stmt (&gseq, return_stmt);
23962 set_bb_seq (new_bb, gseq);
23963 gimple_set_bb (convert_stmt, new_bb);
23964 gimple_set_bb (return_stmt, new_bb);
23965 pop_cfun ();
23966 return new_bb;
23967 }
23968
23969 tree bool_zero = build_int_cst (bool_int_type_node, 0);
23970 tree cond_var = create_tmp_var (bool_int_type_node);
23971 tree predicate_decl = rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS];
23972 const char *arg_str = rs6000_clone_map[clone_isa].name;
23973 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
23974 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
23975 gimple_call_set_lhs (call_cond_stmt, cond_var);
23976
23977 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
23978 gimple_set_bb (call_cond_stmt, new_bb);
23979 gimple_seq_add_stmt (&gseq, call_cond_stmt);
23980
23981 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
23982 NULL_TREE, NULL_TREE);
23983 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
23984 gimple_set_bb (if_else_stmt, new_bb);
23985 gimple_seq_add_stmt (&gseq, if_else_stmt);
23986
23987 gimple_seq_add_stmt (&gseq, convert_stmt);
23988 gimple_seq_add_stmt (&gseq, return_stmt);
23989 set_bb_seq (new_bb, gseq);
23990
23991 basic_block bb1 = new_bb;
23992 edge e12 = split_block (bb1, if_else_stmt);
23993 basic_block bb2 = e12->dest;
23994 e12->flags &= ~EDGE_FALLTHRU;
23995 e12->flags |= EDGE_TRUE_VALUE;
23996
23997 edge e23 = split_block (bb2, return_stmt);
23998 gimple_set_bb (convert_stmt, bb2);
23999 gimple_set_bb (return_stmt, bb2);
24000
24001 basic_block bb3 = e23->dest;
24002 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
24003
24004 remove_edge (e23);
24005 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
24006
24007 pop_cfun ();
24008 return bb3;
24009 }
24010
24011 /* This function generates the dispatch function for multi-versioned functions.
24012 DISPATCH_DECL is the function which will contain the dispatch logic.
24013 FNDECLS are the function choices for dispatch, and is a tree chain.
24014 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
24015 code is generated. */
24016
24017 static int
24018 dispatch_function_versions (tree dispatch_decl,
24019 void *fndecls_p,
24020 basic_block *empty_bb)
24021 {
24022 int ix;
24023 tree ele;
24024 vec<tree> *fndecls;
24025 tree clones[CLONE_MAX];
24026
24027 if (TARGET_DEBUG_TARGET)
24028 fputs ("dispatch_function_versions, top\n", stderr);
24029
24030 gcc_assert (dispatch_decl != NULL
24031 && fndecls_p != NULL
24032 && empty_bb != NULL);
24033
24034 /* fndecls_p is actually a vector. */
24035 fndecls = static_cast<vec<tree> *> (fndecls_p);
24036
24037 /* At least one more version other than the default. */
24038 gcc_assert (fndecls->length () >= 2);
24039
24040 /* The first version in the vector is the default decl. */
24041 memset ((void *) clones, '\0', sizeof (clones));
24042 clones[CLONE_DEFAULT] = (*fndecls)[0];
24043
24044 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
24045 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
24046 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
24047 recent glibc. If we ever need to call __builtin_cpu_init, we would need
24048 to insert the code here to do the call. */
24049
24050 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
24051 {
24052 int priority = rs6000_clone_priority (ele);
24053 if (!clones[priority])
24054 clones[priority] = ele;
24055 }
24056
24057 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
24058 if (clones[ix])
24059 {
24060 if (TARGET_DEBUG_TARGET)
24061 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
24062 ix, get_decl_name (clones[ix]));
24063
24064 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
24065 *empty_bb);
24066 }
24067
24068 return 0;
24069 }
24070
24071 /* Generate the dispatching code body to dispatch multi-versioned function
24072 DECL. The target hook is called to process the "target" attributes and
24073 provide the code to dispatch the right function at run-time. NODE points
24074 to the dispatcher decl whose body will be created. */
24075
24076 static tree
24077 rs6000_generate_version_dispatcher_body (void *node_p)
24078 {
24079 tree resolver;
24080 basic_block empty_bb;
24081 struct cgraph_node *node = (cgraph_node *) node_p;
24082 struct cgraph_function_version_info *ninfo = node->function_version ();
24083
24084 if (ninfo->dispatcher_resolver)
24085 return ninfo->dispatcher_resolver;
24086
24087 /* node is going to be an alias, so remove the finalized bit. */
24088 node->definition = false;
24089
24090 /* The first version in the chain corresponds to the default version. */
24091 ninfo->dispatcher_resolver = resolver
24092 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
24093
24094 if (TARGET_DEBUG_TARGET)
24095 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
24096 get_decl_name (resolver));
24097
24098 push_cfun (DECL_STRUCT_FUNCTION (resolver));
24099 auto_vec<tree, 2> fn_ver_vec;
24100
24101 for (struct cgraph_function_version_info *vinfo = ninfo->next;
24102 vinfo;
24103 vinfo = vinfo->next)
24104 {
24105 struct cgraph_node *version = vinfo->this_node;
24106 /* Check for virtual functions here again, as by this time it should
24107 have been determined if this function needs a vtable index or
24108 not. This happens for methods in derived classes that override
24109 virtual methods in base classes but are not explicitly marked as
24110 virtual. */
24111 if (DECL_VINDEX (version->decl))
24112 sorry ("Virtual function multiversioning not supported");
24113
24114 fn_ver_vec.safe_push (version->decl);
24115 }
24116
24117 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
24118 cgraph_edge::rebuild_edges ();
24119 pop_cfun ();
24120 return resolver;
24121 }
24122
24123 \f
24124 /* Hook to determine if one function can safely inline another. */
24125
24126 static bool
24127 rs6000_can_inline_p (tree caller, tree callee)
24128 {
24129 bool ret = false;
24130 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
24131 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
24132
24133 /* If the callee has no option attributes, then it is ok to inline. */
24134 if (!callee_tree)
24135 ret = true;
24136
24137 else
24138 {
24139 HOST_WIDE_INT caller_isa;
24140 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24141 HOST_WIDE_INT callee_isa = callee_opts->x_rs6000_isa_flags;
24142 HOST_WIDE_INT explicit_isa = callee_opts->x_rs6000_isa_flags_explicit;
24143
24144 /* If the caller has option attributes, then use them.
24145 Otherwise, use the command line options. */
24146 if (caller_tree)
24147 caller_isa = TREE_TARGET_OPTION (caller_tree)->x_rs6000_isa_flags;
24148 else
24149 caller_isa = rs6000_isa_flags;
24150
24151 /* The callee's options must be a subset of the caller's options, i.e.
24152 a vsx function may inline an altivec function, but a no-vsx function
24153 must not inline a vsx function. However, for those options that the
24154 callee has explicitly enabled or disabled, then we must enforce that
24155 the callee's and caller's options match exactly; see PR70010. */
24156 if (((caller_isa & callee_isa) == callee_isa)
24157 && (caller_isa & explicit_isa) == (callee_isa & explicit_isa))
24158 ret = true;
24159 }
24160
24161 if (TARGET_DEBUG_TARGET)
24162 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
24163 get_decl_name (caller), get_decl_name (callee),
24164 (ret ? "can" : "cannot"));
24165
24166 return ret;
24167 }
24168 \f
24169 /* Allocate a stack temp and fixup the address so it meets the particular
24170 memory requirements (either offetable or REG+REG addressing). */
24171
24172 rtx
24173 rs6000_allocate_stack_temp (machine_mode mode,
24174 bool offsettable_p,
24175 bool reg_reg_p)
24176 {
24177 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
24178 rtx addr = XEXP (stack, 0);
24179 int strict_p = reload_completed;
24180
24181 if (!legitimate_indirect_address_p (addr, strict_p))
24182 {
24183 if (offsettable_p
24184 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
24185 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
24186
24187 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
24188 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
24189 }
24190
24191 return stack;
24192 }
24193
24194 /* Given a memory reference, if it is not a reg or reg+reg addressing,
24195 convert to such a form to deal with memory reference instructions
24196 like STFIWX and LDBRX that only take reg+reg addressing. */
24197
24198 rtx
24199 rs6000_force_indexed_or_indirect_mem (rtx x)
24200 {
24201 machine_mode mode = GET_MODE (x);
24202
24203 gcc_assert (MEM_P (x));
24204 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode))
24205 {
24206 rtx addr = XEXP (x, 0);
24207 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
24208 {
24209 rtx reg = XEXP (addr, 0);
24210 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
24211 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
24212 gcc_assert (REG_P (reg));
24213 emit_insn (gen_add3_insn (reg, reg, size_rtx));
24214 addr = reg;
24215 }
24216 else if (GET_CODE (addr) == PRE_MODIFY)
24217 {
24218 rtx reg = XEXP (addr, 0);
24219 rtx expr = XEXP (addr, 1);
24220 gcc_assert (REG_P (reg));
24221 gcc_assert (GET_CODE (expr) == PLUS);
24222 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
24223 addr = reg;
24224 }
24225
24226 if (GET_CODE (addr) == PLUS)
24227 {
24228 rtx op0 = XEXP (addr, 0);
24229 rtx op1 = XEXP (addr, 1);
24230 op0 = force_reg (Pmode, op0);
24231 op1 = force_reg (Pmode, op1);
24232 x = replace_equiv_address (x, gen_rtx_PLUS (Pmode, op0, op1));
24233 }
24234 else
24235 x = replace_equiv_address (x, force_reg (Pmode, addr));
24236 }
24237
24238 return x;
24239 }
24240
24241 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
24242
24243 On the RS/6000, all integer constants are acceptable, most won't be valid
24244 for particular insns, though. Only easy FP constants are acceptable. */
24245
24246 static bool
24247 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
24248 {
24249 if (TARGET_ELF && tls_referenced_p (x))
24250 return false;
24251
24252 if (CONST_DOUBLE_P (x))
24253 return easy_fp_constant (x, mode);
24254
24255 if (GET_CODE (x) == CONST_VECTOR)
24256 return easy_vector_constant (x, mode);
24257
24258 return true;
24259 }
24260
24261 \f
24262 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
24263
24264 static bool
24265 chain_already_loaded (rtx_insn *last)
24266 {
24267 for (; last != NULL; last = PREV_INSN (last))
24268 {
24269 if (NONJUMP_INSN_P (last))
24270 {
24271 rtx patt = PATTERN (last);
24272
24273 if (GET_CODE (patt) == SET)
24274 {
24275 rtx lhs = XEXP (patt, 0);
24276
24277 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
24278 return true;
24279 }
24280 }
24281 }
24282 return false;
24283 }
24284
24285 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
24286
24287 void
24288 rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24289 {
24290 rtx func = func_desc;
24291 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
24292 rtx toc_load = NULL_RTX;
24293 rtx toc_restore = NULL_RTX;
24294 rtx func_addr;
24295 rtx abi_reg = NULL_RTX;
24296 rtx call[4];
24297 int n_call;
24298 rtx insn;
24299 bool is_pltseq_longcall;
24300
24301 if (global_tlsarg)
24302 tlsarg = global_tlsarg;
24303
24304 /* Handle longcall attributes. */
24305 is_pltseq_longcall = false;
24306 if ((INTVAL (cookie) & CALL_LONG) != 0
24307 && GET_CODE (func_desc) == SYMBOL_REF)
24308 {
24309 func = rs6000_longcall_ref (func_desc, tlsarg);
24310 if (TARGET_PLTSEQ)
24311 is_pltseq_longcall = true;
24312 }
24313
24314 /* Handle indirect calls. */
24315 if (!SYMBOL_REF_P (func)
24316 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
24317 {
24318 if (!rs6000_pcrel_p (cfun))
24319 {
24320 /* Save the TOC into its reserved slot before the call,
24321 and prepare to restore it after the call. */
24322 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
24323 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
24324 gen_rtvec (1, stack_toc_offset),
24325 UNSPEC_TOCSLOT);
24326 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
24327
24328 /* Can we optimize saving the TOC in the prologue or
24329 do we need to do it at every call? */
24330 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
24331 cfun->machine->save_toc_in_prologue = true;
24332 else
24333 {
24334 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
24335 rtx stack_toc_mem = gen_frame_mem (Pmode,
24336 gen_rtx_PLUS (Pmode, stack_ptr,
24337 stack_toc_offset));
24338 MEM_VOLATILE_P (stack_toc_mem) = 1;
24339 if (is_pltseq_longcall)
24340 {
24341 rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg);
24342 rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24343 emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg));
24344 }
24345 else
24346 emit_move_insn (stack_toc_mem, toc_reg);
24347 }
24348 }
24349
24350 if (DEFAULT_ABI == ABI_ELFv2)
24351 {
24352 /* A function pointer in the ELFv2 ABI is just a plain address, but
24353 the ABI requires it to be loaded into r12 before the call. */
24354 func_addr = gen_rtx_REG (Pmode, 12);
24355 if (!rtx_equal_p (func_addr, func))
24356 emit_move_insn (func_addr, func);
24357 abi_reg = func_addr;
24358 /* Indirect calls via CTR are strongly preferred over indirect
24359 calls via LR, so move the address there. Needed to mark
24360 this insn for linker plt sequence editing too. */
24361 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24362 if (is_pltseq_longcall)
24363 {
24364 rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg);
24365 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24366 emit_insn (gen_rtx_SET (func_addr, mark_func));
24367 v = gen_rtvec (2, func_addr, func_desc);
24368 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24369 }
24370 else
24371 emit_move_insn (func_addr, abi_reg);
24372 }
24373 else
24374 {
24375 /* A function pointer under AIX is a pointer to a data area whose
24376 first word contains the actual address of the function, whose
24377 second word contains a pointer to its TOC, and whose third word
24378 contains a value to place in the static chain register (r11).
24379 Note that if we load the static chain, our "trampoline" need
24380 not have any executable code. */
24381
24382 /* Load up address of the actual function. */
24383 func = force_reg (Pmode, func);
24384 func_addr = gen_reg_rtx (Pmode);
24385 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func));
24386
24387 /* Indirect calls via CTR are strongly preferred over indirect
24388 calls via LR, so move the address there. */
24389 rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO);
24390 emit_move_insn (ctr_reg, func_addr);
24391 func_addr = ctr_reg;
24392
24393 /* Prepare to load the TOC of the called function. Note that the
24394 TOC load must happen immediately before the actual call so
24395 that unwinding the TOC registers works correctly. See the
24396 comment in frob_update_context. */
24397 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
24398 rtx func_toc_mem = gen_rtx_MEM (Pmode,
24399 gen_rtx_PLUS (Pmode, func,
24400 func_toc_offset));
24401 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
24402
24403 /* If we have a static chain, load it up. But, if the call was
24404 originally direct, the 3rd word has not been written since no
24405 trampoline has been built, so we ought not to load it, lest we
24406 override a static chain value. */
24407 if (!(GET_CODE (func_desc) == SYMBOL_REF
24408 && SYMBOL_REF_FUNCTION_P (func_desc))
24409 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
24410 && !chain_already_loaded (get_current_sequence ()->next->last))
24411 {
24412 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
24413 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
24414 rtx func_sc_mem = gen_rtx_MEM (Pmode,
24415 gen_rtx_PLUS (Pmode, func,
24416 func_sc_offset));
24417 emit_move_insn (sc_reg, func_sc_mem);
24418 abi_reg = sc_reg;
24419 }
24420 }
24421 }
24422 else
24423 {
24424 /* No TOC register needed for calls from PC-relative callers. */
24425 if (!rs6000_pcrel_p (cfun))
24426 /* Direct calls use the TOC: for local calls, the callee will
24427 assume the TOC register is set; for non-local calls, the
24428 PLT stub needs the TOC register. */
24429 abi_reg = toc_reg;
24430 func_addr = func;
24431 }
24432
24433 /* Create the call. */
24434 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24435 if (value != NULL_RTX)
24436 call[0] = gen_rtx_SET (value, call[0]);
24437 n_call = 1;
24438
24439 if (toc_load)
24440 call[n_call++] = toc_load;
24441 if (toc_restore)
24442 call[n_call++] = toc_restore;
24443
24444 call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
24445
24446 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
24447 insn = emit_call_insn (insn);
24448
24449 /* Mention all registers defined by the ABI to hold information
24450 as uses in CALL_INSN_FUNCTION_USAGE. */
24451 if (abi_reg)
24452 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24453 }
24454
24455 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
24456
24457 void
24458 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24459 {
24460 rtx call[2];
24461 rtx insn;
24462
24463 gcc_assert (INTVAL (cookie) == 0);
24464
24465 if (global_tlsarg)
24466 tlsarg = global_tlsarg;
24467
24468 /* Create the call. */
24469 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), tlsarg);
24470 if (value != NULL_RTX)
24471 call[0] = gen_rtx_SET (value, call[0]);
24472
24473 call[1] = simple_return_rtx;
24474
24475 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
24476 insn = emit_call_insn (insn);
24477
24478 /* Note use of the TOC register. */
24479 if (!rs6000_pcrel_p (cfun))
24480 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
24481 gen_rtx_REG (Pmode, TOC_REGNUM));
24482 }
24483
24484 /* Expand code to perform a call under the SYSV4 ABI. */
24485
24486 void
24487 rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24488 {
24489 rtx func = func_desc;
24490 rtx func_addr;
24491 rtx call[4];
24492 rtx insn;
24493 rtx abi_reg = NULL_RTX;
24494 int n;
24495
24496 if (global_tlsarg)
24497 tlsarg = global_tlsarg;
24498
24499 /* Handle longcall attributes. */
24500 if ((INTVAL (cookie) & CALL_LONG) != 0
24501 && GET_CODE (func_desc) == SYMBOL_REF)
24502 {
24503 func = rs6000_longcall_ref (func_desc, tlsarg);
24504 /* If the longcall was implemented as an inline PLT call using
24505 PLT unspecs then func will be REG:r11. If not, func will be
24506 a pseudo reg. The inline PLT call sequence supports lazy
24507 linking (and longcalls to functions in dlopen'd libraries).
24508 The other style of longcalls don't. The lazy linking entry
24509 to the dynamic symbol resolver requires r11 be the function
24510 address (as it is for linker generated PLT stubs). Ensure
24511 r11 stays valid to the bctrl by marking r11 used by the call. */
24512 if (TARGET_PLTSEQ)
24513 abi_reg = func;
24514 }
24515
24516 /* Handle indirect calls. */
24517 if (GET_CODE (func) != SYMBOL_REF)
24518 {
24519 func = force_reg (Pmode, func);
24520
24521 /* Indirect calls via CTR are strongly preferred over indirect
24522 calls via LR, so move the address there. That can't be left
24523 to reload because we want to mark every instruction in an
24524 inline PLT call sequence with a reloc, enabling the linker to
24525 edit the sequence back to a direct call when that makes sense. */
24526 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24527 if (abi_reg)
24528 {
24529 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
24530 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24531 emit_insn (gen_rtx_SET (func_addr, mark_func));
24532 v = gen_rtvec (2, func_addr, func_desc);
24533 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24534 }
24535 else
24536 emit_move_insn (func_addr, func);
24537 }
24538 else
24539 func_addr = func;
24540
24541 /* Create the call. */
24542 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24543 if (value != NULL_RTX)
24544 call[0] = gen_rtx_SET (value, call[0]);
24545
24546 call[1] = gen_rtx_USE (VOIDmode, cookie);
24547 n = 2;
24548 if (TARGET_SECURE_PLT
24549 && flag_pic
24550 && GET_CODE (func_addr) == SYMBOL_REF
24551 && !SYMBOL_REF_LOCAL_P (func_addr))
24552 call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx);
24553
24554 call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
24555
24556 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call));
24557 insn = emit_call_insn (insn);
24558 if (abi_reg)
24559 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24560 }
24561
24562 /* Expand code to perform a sibling call under the SysV4 ABI. */
24563
24564 void
24565 rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24566 {
24567 rtx func = func_desc;
24568 rtx func_addr;
24569 rtx call[3];
24570 rtx insn;
24571 rtx abi_reg = NULL_RTX;
24572
24573 if (global_tlsarg)
24574 tlsarg = global_tlsarg;
24575
24576 /* Handle longcall attributes. */
24577 if ((INTVAL (cookie) & CALL_LONG) != 0
24578 && GET_CODE (func_desc) == SYMBOL_REF)
24579 {
24580 func = rs6000_longcall_ref (func_desc, tlsarg);
24581 /* If the longcall was implemented as an inline PLT call using
24582 PLT unspecs then func will be REG:r11. If not, func will be
24583 a pseudo reg. The inline PLT call sequence supports lazy
24584 linking (and longcalls to functions in dlopen'd libraries).
24585 The other style of longcalls don't. The lazy linking entry
24586 to the dynamic symbol resolver requires r11 be the function
24587 address (as it is for linker generated PLT stubs). Ensure
24588 r11 stays valid to the bctr by marking r11 used by the call. */
24589 if (TARGET_PLTSEQ)
24590 abi_reg = func;
24591 }
24592
24593 /* Handle indirect calls. */
24594 if (GET_CODE (func) != SYMBOL_REF)
24595 {
24596 func = force_reg (Pmode, func);
24597
24598 /* Indirect sibcalls must go via CTR. That can't be left to
24599 reload because we want to mark every instruction in an inline
24600 PLT call sequence with a reloc, enabling the linker to edit
24601 the sequence back to a direct call when that makes sense. */
24602 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24603 if (abi_reg)
24604 {
24605 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
24606 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24607 emit_insn (gen_rtx_SET (func_addr, mark_func));
24608 v = gen_rtvec (2, func_addr, func_desc);
24609 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24610 }
24611 else
24612 emit_move_insn (func_addr, func);
24613 }
24614 else
24615 func_addr = func;
24616
24617 /* Create the call. */
24618 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24619 if (value != NULL_RTX)
24620 call[0] = gen_rtx_SET (value, call[0]);
24621
24622 call[1] = gen_rtx_USE (VOIDmode, cookie);
24623 call[2] = simple_return_rtx;
24624
24625 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
24626 insn = emit_call_insn (insn);
24627 if (abi_reg)
24628 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24629 }
24630
24631 #if TARGET_MACHO
24632
24633 /* Expand code to perform a call under the Darwin ABI.
24634 Modulo handling of mlongcall, this is much the same as sysv.
24635 if/when the longcall optimisation is removed, we could drop this
24636 code and use the sysv case (taking care to avoid the tls stuff).
24637
24638 We can use this for sibcalls too, if needed. */
24639
24640 void
24641 rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg,
24642 rtx cookie, bool sibcall)
24643 {
24644 rtx func = func_desc;
24645 rtx func_addr;
24646 rtx call[3];
24647 rtx insn;
24648 int cookie_val = INTVAL (cookie);
24649 bool make_island = false;
24650
24651 /* Handle longcall attributes, there are two cases for Darwin:
24652 1) Newer linkers are capable of synthesising any branch islands needed.
24653 2) We need a helper branch island synthesised by the compiler.
24654 The second case has mostly been retired and we don't use it for m64.
24655 In fact, it's is an optimisation, we could just indirect as sysv does..
24656 ... however, backwards compatibility for now.
24657 If we're going to use this, then we need to keep the CALL_LONG bit set,
24658 so that we can pick up the special insn form later. */
24659 if ((cookie_val & CALL_LONG) != 0
24660 && GET_CODE (func_desc) == SYMBOL_REF)
24661 {
24662 /* FIXME: the longcall opt should not hang off this flag, it is most
24663 likely incorrect for kernel-mode code-generation. */
24664 if (darwin_symbol_stubs && TARGET_32BIT)
24665 make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */
24666 else
24667 {
24668 /* The linker is capable of doing this, but the user explicitly
24669 asked for -mlongcall, so we'll do the 'normal' version. */
24670 func = rs6000_longcall_ref (func_desc, NULL_RTX);
24671 cookie_val &= ~CALL_LONG; /* Handled, zap it. */
24672 }
24673 }
24674
24675 /* Handle indirect calls. */
24676 if (GET_CODE (func) != SYMBOL_REF)
24677 {
24678 func = force_reg (Pmode, func);
24679
24680 /* Indirect calls via CTR are strongly preferred over indirect
24681 calls via LR, and are required for indirect sibcalls, so move
24682 the address there. */
24683 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24684 emit_move_insn (func_addr, func);
24685 }
24686 else
24687 func_addr = func;
24688
24689 /* Create the call. */
24690 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24691 if (value != NULL_RTX)
24692 call[0] = gen_rtx_SET (value, call[0]);
24693
24694 call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val));
24695
24696 if (sibcall)
24697 call[2] = simple_return_rtx;
24698 else
24699 call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO);
24700
24701 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
24702 insn = emit_call_insn (insn);
24703 /* Now we have the debug info in the insn, we can set up the branch island
24704 if we're using one. */
24705 if (make_island)
24706 {
24707 tree funname = get_identifier (XSTR (func_desc, 0));
24708
24709 if (no_previous_def (funname))
24710 {
24711 rtx label_rtx = gen_label_rtx ();
24712 char *label_buf, temp_buf[256];
24713 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
24714 CODE_LABEL_NUMBER (label_rtx));
24715 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
24716 tree labelname = get_identifier (label_buf);
24717 add_compiler_branch_island (labelname, funname,
24718 insn_line ((const rtx_insn*)insn));
24719 }
24720 }
24721 }
24722 #endif
24723
24724 void
24725 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
24726 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
24727 {
24728 #if TARGET_MACHO
24729 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false);
24730 #else
24731 gcc_unreachable();
24732 #endif
24733 }
24734
24735
24736 void
24737 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
24738 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
24739 {
24740 #if TARGET_MACHO
24741 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true);
24742 #else
24743 gcc_unreachable();
24744 #endif
24745 }
24746
24747 /* Return whether we should generate PC-relative code for FNDECL. */
24748 bool
24749 rs6000_fndecl_pcrel_p (const_tree fndecl)
24750 {
24751 if (DEFAULT_ABI != ABI_ELFv2)
24752 return false;
24753
24754 struct cl_target_option *opts = target_opts_for_fn (fndecl);
24755
24756 return ((opts->x_rs6000_isa_flags & OPTION_MASK_PCREL) != 0
24757 && TARGET_CMODEL == CMODEL_MEDIUM);
24758 }
24759
24760 /* Return whether we should generate PC-relative code for *FN. */
24761 bool
24762 rs6000_pcrel_p (struct function *fn)
24763 {
24764 if (DEFAULT_ABI != ABI_ELFv2)
24765 return false;
24766
24767 /* Optimize usual case. */
24768 if (fn == cfun)
24769 return ((rs6000_isa_flags & OPTION_MASK_PCREL) != 0
24770 && TARGET_CMODEL == CMODEL_MEDIUM);
24771
24772 return rs6000_fndecl_pcrel_p (fn->decl);
24773 }
24774
24775 \f
24776 /* Given an address (ADDR), a mode (MODE), and what the format of the
24777 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
24778 for the address. */
24779
24780 enum insn_form
24781 address_to_insn_form (rtx addr,
24782 machine_mode mode,
24783 enum non_prefixed_form non_prefixed_format)
24784 {
24785 /* Single register is easy. */
24786 if (REG_P (addr) || SUBREG_P (addr))
24787 return INSN_FORM_BASE_REG;
24788
24789 /* If the non prefixed instruction format doesn't support offset addressing,
24790 make sure only indexed addressing is allowed.
24791
24792 We special case SDmode so that the register allocator does not try to move
24793 SDmode through GPR registers, but instead uses the 32-bit integer load and
24794 store instructions for the floating point registers. */
24795 if (non_prefixed_format == NON_PREFIXED_X || (mode == SDmode && TARGET_DFP))
24796 {
24797 if (GET_CODE (addr) != PLUS)
24798 return INSN_FORM_BAD;
24799
24800 rtx op0 = XEXP (addr, 0);
24801 rtx op1 = XEXP (addr, 1);
24802 if (!REG_P (op0) && !SUBREG_P (op0))
24803 return INSN_FORM_BAD;
24804
24805 if (!REG_P (op1) && !SUBREG_P (op1))
24806 return INSN_FORM_BAD;
24807
24808 return INSN_FORM_X;
24809 }
24810
24811 /* Deal with update forms. */
24812 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
24813 return INSN_FORM_UPDATE;
24814
24815 /* Handle PC-relative symbols and labels. Check for both local and external
24816 symbols. Assume labels are always local. */
24817 if (TARGET_PCREL)
24818 {
24819 if (SYMBOL_REF_P (addr) && !SYMBOL_REF_LOCAL_P (addr))
24820 return INSN_FORM_PCREL_EXTERNAL;
24821
24822 if (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
24823 return INSN_FORM_PCREL_LOCAL;
24824 }
24825
24826 if (GET_CODE (addr) == CONST)
24827 addr = XEXP (addr, 0);
24828
24829 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
24830 if (GET_CODE (addr) == LO_SUM)
24831 return INSN_FORM_LO_SUM;
24832
24833 /* Everything below must be an offset address of some form. */
24834 if (GET_CODE (addr) != PLUS)
24835 return INSN_FORM_BAD;
24836
24837 rtx op0 = XEXP (addr, 0);
24838 rtx op1 = XEXP (addr, 1);
24839
24840 /* Check for indexed addresses. */
24841 if (REG_P (op1) || SUBREG_P (op1))
24842 {
24843 if (REG_P (op0) || SUBREG_P (op0))
24844 return INSN_FORM_X;
24845
24846 return INSN_FORM_BAD;
24847 }
24848
24849 if (!CONST_INT_P (op1))
24850 return INSN_FORM_BAD;
24851
24852 HOST_WIDE_INT offset = INTVAL (op1);
24853 if (!SIGNED_INTEGER_34BIT_P (offset))
24854 return INSN_FORM_BAD;
24855
24856 /* Check for local and external PC-relative addresses. Labels are always
24857 local. */
24858 if (TARGET_PCREL)
24859 {
24860 if (SYMBOL_REF_P (op0) && !SYMBOL_REF_LOCAL_P (op0))
24861 return INSN_FORM_PCREL_EXTERNAL;
24862
24863 if (SYMBOL_REF_P (op0) || LABEL_REF_P (op0))
24864 return INSN_FORM_PCREL_LOCAL;
24865 }
24866
24867 /* If it isn't PC-relative, the address must use a base register. */
24868 if (!REG_P (op0) && !SUBREG_P (op0))
24869 return INSN_FORM_BAD;
24870
24871 /* Large offsets must be prefixed. */
24872 if (!SIGNED_INTEGER_16BIT_P (offset))
24873 {
24874 if (TARGET_PREFIXED)
24875 return INSN_FORM_PREFIXED_NUMERIC;
24876
24877 return INSN_FORM_BAD;
24878 }
24879
24880 /* We have a 16-bit offset, see what default instruction format to use. */
24881 if (non_prefixed_format == NON_PREFIXED_DEFAULT)
24882 {
24883 unsigned size = GET_MODE_SIZE (mode);
24884
24885 /* On 64-bit systems, assume 64-bit integers need to use DS form
24886 addresses (for LD/STD). VSX vectors need to use DQ form addresses
24887 (for LXV and STXV). TImode is problematical in that its normal usage
24888 is expected to be GPRs where it wants a DS instruction format, but if
24889 it goes into the vector registers, it wants a DQ instruction
24890 format. */
24891 if (TARGET_POWERPC64 && size >= 8 && GET_MODE_CLASS (mode) == MODE_INT)
24892 non_prefixed_format = NON_PREFIXED_DS;
24893
24894 else if (TARGET_VSX && size >= 16
24895 && (VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode)))
24896 non_prefixed_format = NON_PREFIXED_DQ;
24897
24898 else
24899 non_prefixed_format = NON_PREFIXED_D;
24900 }
24901
24902 /* Classify the D/DS/DQ-form addresses. */
24903 switch (non_prefixed_format)
24904 {
24905 /* Instruction format D, all 16 bits are valid. */
24906 case NON_PREFIXED_D:
24907 return INSN_FORM_D;
24908
24909 /* Instruction format DS, bottom 2 bits must be 0. */
24910 case NON_PREFIXED_DS:
24911 if ((offset & 3) == 0)
24912 return INSN_FORM_DS;
24913
24914 else if (TARGET_PREFIXED)
24915 return INSN_FORM_PREFIXED_NUMERIC;
24916
24917 else
24918 return INSN_FORM_BAD;
24919
24920 /* Instruction format DQ, bottom 4 bits must be 0. */
24921 case NON_PREFIXED_DQ:
24922 if ((offset & 15) == 0)
24923 return INSN_FORM_DQ;
24924
24925 else if (TARGET_PREFIXED)
24926 return INSN_FORM_PREFIXED_NUMERIC;
24927
24928 else
24929 return INSN_FORM_BAD;
24930
24931 default:
24932 break;
24933 }
24934
24935 return INSN_FORM_BAD;
24936 }
24937
24938 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
24939 instruction format (D/DS/DQ) used for offset memory. */
24940
24941 static enum non_prefixed_form
24942 reg_to_non_prefixed (rtx reg, machine_mode mode)
24943 {
24944 /* If it isn't a register, use the defaults. */
24945 if (!REG_P (reg) && !SUBREG_P (reg))
24946 return NON_PREFIXED_DEFAULT;
24947
24948 unsigned int r = reg_or_subregno (reg);
24949
24950 /* If we have a pseudo, use the default instruction format. */
24951 if (!HARD_REGISTER_NUM_P (r))
24952 return NON_PREFIXED_DEFAULT;
24953
24954 unsigned size = GET_MODE_SIZE (mode);
24955
24956 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
24957 128-bit floating point, and 128-bit integers. Before power9, only indexed
24958 addressing was available for vectors. */
24959 if (FP_REGNO_P (r))
24960 {
24961 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
24962 return NON_PREFIXED_D;
24963
24964 else if (size < 8)
24965 return NON_PREFIXED_X;
24966
24967 else if (TARGET_VSX && size >= 16
24968 && (VECTOR_MODE_P (mode)
24969 || FLOAT128_VECTOR_P (mode)
24970 || mode == TImode || mode == CTImode))
24971 return (TARGET_P9_VECTOR) ? NON_PREFIXED_DQ : NON_PREFIXED_X;
24972
24973 else
24974 return NON_PREFIXED_DEFAULT;
24975 }
24976
24977 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
24978 128-bit floating point, and 128-bit integers. Before power9, only indexed
24979 addressing was available. */
24980 else if (ALTIVEC_REGNO_P (r))
24981 {
24982 if (!TARGET_P9_VECTOR)
24983 return NON_PREFIXED_X;
24984
24985 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
24986 return NON_PREFIXED_DS;
24987
24988 else if (size < 8)
24989 return NON_PREFIXED_X;
24990
24991 else if (TARGET_VSX && size >= 16
24992 && (VECTOR_MODE_P (mode)
24993 || FLOAT128_VECTOR_P (mode)
24994 || mode == TImode || mode == CTImode))
24995 return NON_PREFIXED_DQ;
24996
24997 else
24998 return NON_PREFIXED_DEFAULT;
24999 }
25000
25001 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
25002 otherwise. Assume that any other register, such as LR, CRs, etc. will go
25003 through the GPR registers for memory operations. */
25004 else if (TARGET_POWERPC64 && size >= 8)
25005 return NON_PREFIXED_DS;
25006
25007 return NON_PREFIXED_D;
25008 }
25009
25010 \f
25011 /* Whether a load instruction is a prefixed instruction. This is called from
25012 the prefixed attribute processing. */
25013
25014 bool
25015 prefixed_load_p (rtx_insn *insn)
25016 {
25017 /* Validate the insn to make sure it is a normal load insn. */
25018 extract_insn_cached (insn);
25019 if (recog_data.n_operands < 2)
25020 return false;
25021
25022 rtx reg = recog_data.operand[0];
25023 rtx mem = recog_data.operand[1];
25024
25025 if (!REG_P (reg) && !SUBREG_P (reg))
25026 return false;
25027
25028 if (!MEM_P (mem))
25029 return false;
25030
25031 /* Prefixed load instructions do not support update or indexed forms. */
25032 if (get_attr_indexed (insn) == INDEXED_YES
25033 || get_attr_update (insn) == UPDATE_YES)
25034 return false;
25035
25036 /* LWA uses the DS format instead of the D format that LWZ uses. */
25037 enum non_prefixed_form non_prefixed;
25038 machine_mode reg_mode = GET_MODE (reg);
25039 machine_mode mem_mode = GET_MODE (mem);
25040
25041 if (mem_mode == SImode && reg_mode == DImode
25042 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
25043 non_prefixed = NON_PREFIXED_DS;
25044
25045 else
25046 non_prefixed = reg_to_non_prefixed (reg, mem_mode);
25047
25048 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
25049 }
25050
25051 /* Whether a store instruction is a prefixed instruction. This is called from
25052 the prefixed attribute processing. */
25053
25054 bool
25055 prefixed_store_p (rtx_insn *insn)
25056 {
25057 /* Validate the insn to make sure it is a normal store insn. */
25058 extract_insn_cached (insn);
25059 if (recog_data.n_operands < 2)
25060 return false;
25061
25062 rtx mem = recog_data.operand[0];
25063 rtx reg = recog_data.operand[1];
25064
25065 if (!REG_P (reg) && !SUBREG_P (reg))
25066 return false;
25067
25068 if (!MEM_P (mem))
25069 return false;
25070
25071 /* Prefixed store instructions do not support update or indexed forms. */
25072 if (get_attr_indexed (insn) == INDEXED_YES
25073 || get_attr_update (insn) == UPDATE_YES)
25074 return false;
25075
25076 machine_mode mem_mode = GET_MODE (mem);
25077 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode);
25078 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
25079 }
25080
25081 /* Whether a load immediate or add instruction is a prefixed instruction. This
25082 is called from the prefixed attribute processing. */
25083
25084 bool
25085 prefixed_paddi_p (rtx_insn *insn)
25086 {
25087 rtx set = single_set (insn);
25088 if (!set)
25089 return false;
25090
25091 rtx dest = SET_DEST (set);
25092 rtx src = SET_SRC (set);
25093
25094 if (!REG_P (dest) && !SUBREG_P (dest))
25095 return false;
25096
25097 /* Is this a load immediate that can't be done with a simple ADDI or
25098 ADDIS? */
25099 if (CONST_INT_P (src))
25100 return (satisfies_constraint_eI (src)
25101 && !satisfies_constraint_I (src)
25102 && !satisfies_constraint_L (src));
25103
25104 /* Is this a PADDI instruction that can't be done with a simple ADDI or
25105 ADDIS? */
25106 if (GET_CODE (src) == PLUS)
25107 {
25108 rtx op1 = XEXP (src, 1);
25109
25110 return (CONST_INT_P (op1)
25111 && satisfies_constraint_eI (op1)
25112 && !satisfies_constraint_I (op1)
25113 && !satisfies_constraint_L (op1));
25114 }
25115
25116 /* If not, is it a load of a PC-relative address? */
25117 if (!TARGET_PCREL || GET_MODE (dest) != Pmode)
25118 return false;
25119
25120 if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST)
25121 return false;
25122
25123 enum insn_form iform = address_to_insn_form (src, Pmode,
25124 NON_PREFIXED_DEFAULT);
25125
25126 return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL);
25127 }
25128
25129 /* Whether the next instruction needs a 'p' prefix issued before the
25130 instruction is printed out. */
25131 static bool next_insn_prefixed_p;
25132
25133 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
25134 outputting the assembler code. On the PowerPC, we remember if the current
25135 insn is a prefixed insn where we need to emit a 'p' before the insn.
25136
25137 In addition, if the insn is part of a PC-relative reference to an external
25138 label optimization, this is recorded also. */
25139 void
25140 rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
25141 {
25142 next_insn_prefixed_p = (get_attr_prefixed (insn) != PREFIXED_NO);
25143 return;
25144 }
25145
25146 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
25147 We use it to emit a 'p' for prefixed insns that is set in
25148 FINAL_PRESCAN_INSN. */
25149 void
25150 rs6000_asm_output_opcode (FILE *stream)
25151 {
25152 if (next_insn_prefixed_p)
25153 fprintf (stream, "p");
25154
25155 return;
25156 }
25157
25158 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
25159 should be adjusted to reflect any required changes. This macro is used when
25160 there is some systematic length adjustment required that would be difficult
25161 to express in the length attribute.
25162
25163 In the PowerPC, we use this to adjust the length of an instruction if one or
25164 more prefixed instructions are generated, using the attribute
25165 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
25166 hardware requires that a prefied instruciton does not cross a 64-byte
25167 boundary. This means the compiler has to assume the length of the first
25168 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
25169 already set for the non-prefixed instruction, we just need to udpate for the
25170 difference. */
25171
25172 int
25173 rs6000_adjust_insn_length (rtx_insn *insn, int length)
25174 {
25175 if (TARGET_PREFIXED && NONJUMP_INSN_P (insn))
25176 {
25177 rtx pattern = PATTERN (insn);
25178 if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER
25179 && get_attr_prefixed (insn) == PREFIXED_YES)
25180 {
25181 int num_prefixed = get_attr_max_prefixed_insns (insn);
25182 length += 4 * (num_prefixed + 1);
25183 }
25184 }
25185
25186 return length;
25187 }
25188
25189 \f
25190 #ifdef HAVE_GAS_HIDDEN
25191 # define USE_HIDDEN_LINKONCE 1
25192 #else
25193 # define USE_HIDDEN_LINKONCE 0
25194 #endif
25195
25196 /* Fills in the label name that should be used for a 476 link stack thunk. */
25197
25198 void
25199 get_ppc476_thunk_name (char name[32])
25200 {
25201 gcc_assert (TARGET_LINK_STACK);
25202
25203 if (USE_HIDDEN_LINKONCE)
25204 sprintf (name, "__ppc476.get_thunk");
25205 else
25206 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
25207 }
25208
25209 /* This function emits the simple thunk routine that is used to preserve
25210 the link stack on the 476 cpu. */
25211
25212 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
25213 static void
25214 rs6000_code_end (void)
25215 {
25216 char name[32];
25217 tree decl;
25218
25219 if (!TARGET_LINK_STACK)
25220 return;
25221
25222 get_ppc476_thunk_name (name);
25223
25224 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
25225 build_function_type_list (void_type_node, NULL_TREE));
25226 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
25227 NULL_TREE, void_type_node);
25228 TREE_PUBLIC (decl) = 1;
25229 TREE_STATIC (decl) = 1;
25230
25231 #if RS6000_WEAK
25232 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
25233 {
25234 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
25235 targetm.asm_out.unique_section (decl, 0);
25236 switch_to_section (get_named_section (decl, NULL, 0));
25237 DECL_WEAK (decl) = 1;
25238 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
25239 targetm.asm_out.globalize_label (asm_out_file, name);
25240 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
25241 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
25242 }
25243 else
25244 #endif
25245 {
25246 switch_to_section (text_section);
25247 ASM_OUTPUT_LABEL (asm_out_file, name);
25248 }
25249
25250 DECL_INITIAL (decl) = make_node (BLOCK);
25251 current_function_decl = decl;
25252 allocate_struct_function (decl, false);
25253 init_function_start (decl);
25254 first_function_block_is_cold = false;
25255 /* Make sure unwind info is emitted for the thunk if needed. */
25256 final_start_function (emit_barrier (), asm_out_file, 1);
25257
25258 fputs ("\tblr\n", asm_out_file);
25259
25260 final_end_function ();
25261 init_insn_lengths ();
25262 free_after_compilation (cfun);
25263 set_cfun (NULL);
25264 current_function_decl = NULL;
25265 }
25266
25267 /* Add r30 to hard reg set if the prologue sets it up and it is not
25268 pic_offset_table_rtx. */
25269
25270 static void
25271 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
25272 {
25273 if (!TARGET_SINGLE_PIC_BASE
25274 && TARGET_TOC
25275 && TARGET_MINIMAL_TOC
25276 && !constant_pool_empty_p ())
25277 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
25278 if (cfun->machine->split_stack_argp_used)
25279 add_to_hard_reg_set (&set->set, Pmode, 12);
25280
25281 /* Make sure the hard reg set doesn't include r2, which was possibly added
25282 via PIC_OFFSET_TABLE_REGNUM. */
25283 if (TARGET_TOC)
25284 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
25285 }
25286
25287 \f
25288 /* Helper function for rs6000_split_logical to emit a logical instruction after
25289 spliting the operation to single GPR registers.
25290
25291 DEST is the destination register.
25292 OP1 and OP2 are the input source registers.
25293 CODE is the base operation (AND, IOR, XOR, NOT).
25294 MODE is the machine mode.
25295 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25296 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25297 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
25298
25299 static void
25300 rs6000_split_logical_inner (rtx dest,
25301 rtx op1,
25302 rtx op2,
25303 enum rtx_code code,
25304 machine_mode mode,
25305 bool complement_final_p,
25306 bool complement_op1_p,
25307 bool complement_op2_p)
25308 {
25309 rtx bool_rtx;
25310
25311 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
25312 if (op2 && CONST_INT_P (op2)
25313 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
25314 && !complement_final_p && !complement_op1_p && !complement_op2_p)
25315 {
25316 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
25317 HOST_WIDE_INT value = INTVAL (op2) & mask;
25318
25319 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
25320 if (code == AND)
25321 {
25322 if (value == 0)
25323 {
25324 emit_insn (gen_rtx_SET (dest, const0_rtx));
25325 return;
25326 }
25327
25328 else if (value == mask)
25329 {
25330 if (!rtx_equal_p (dest, op1))
25331 emit_insn (gen_rtx_SET (dest, op1));
25332 return;
25333 }
25334 }
25335
25336 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
25337 into separate ORI/ORIS or XORI/XORIS instrucitons. */
25338 else if (code == IOR || code == XOR)
25339 {
25340 if (value == 0)
25341 {
25342 if (!rtx_equal_p (dest, op1))
25343 emit_insn (gen_rtx_SET (dest, op1));
25344 return;
25345 }
25346 }
25347 }
25348
25349 if (code == AND && mode == SImode
25350 && !complement_final_p && !complement_op1_p && !complement_op2_p)
25351 {
25352 emit_insn (gen_andsi3 (dest, op1, op2));
25353 return;
25354 }
25355
25356 if (complement_op1_p)
25357 op1 = gen_rtx_NOT (mode, op1);
25358
25359 if (complement_op2_p)
25360 op2 = gen_rtx_NOT (mode, op2);
25361
25362 /* For canonical RTL, if only one arm is inverted it is the first. */
25363 if (!complement_op1_p && complement_op2_p)
25364 std::swap (op1, op2);
25365
25366 bool_rtx = ((code == NOT)
25367 ? gen_rtx_NOT (mode, op1)
25368 : gen_rtx_fmt_ee (code, mode, op1, op2));
25369
25370 if (complement_final_p)
25371 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
25372
25373 emit_insn (gen_rtx_SET (dest, bool_rtx));
25374 }
25375
25376 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
25377 operations are split immediately during RTL generation to allow for more
25378 optimizations of the AND/IOR/XOR.
25379
25380 OPERANDS is an array containing the destination and two input operands.
25381 CODE is the base operation (AND, IOR, XOR, NOT).
25382 MODE is the machine mode.
25383 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25384 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25385 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
25386 CLOBBER_REG is either NULL or a scratch register of type CC to allow
25387 formation of the AND instructions. */
25388
25389 static void
25390 rs6000_split_logical_di (rtx operands[3],
25391 enum rtx_code code,
25392 bool complement_final_p,
25393 bool complement_op1_p,
25394 bool complement_op2_p)
25395 {
25396 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
25397 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
25398 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
25399 enum hi_lo { hi = 0, lo = 1 };
25400 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
25401 size_t i;
25402
25403 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
25404 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
25405 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
25406 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
25407
25408 if (code == NOT)
25409 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
25410 else
25411 {
25412 if (!CONST_INT_P (operands[2]))
25413 {
25414 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
25415 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
25416 }
25417 else
25418 {
25419 HOST_WIDE_INT value = INTVAL (operands[2]);
25420 HOST_WIDE_INT value_hi_lo[2];
25421
25422 gcc_assert (!complement_final_p);
25423 gcc_assert (!complement_op1_p);
25424 gcc_assert (!complement_op2_p);
25425
25426 value_hi_lo[hi] = value >> 32;
25427 value_hi_lo[lo] = value & lower_32bits;
25428
25429 for (i = 0; i < 2; i++)
25430 {
25431 HOST_WIDE_INT sub_value = value_hi_lo[i];
25432
25433 if (sub_value & sign_bit)
25434 sub_value |= upper_32bits;
25435
25436 op2_hi_lo[i] = GEN_INT (sub_value);
25437
25438 /* If this is an AND instruction, check to see if we need to load
25439 the value in a register. */
25440 if (code == AND && sub_value != -1 && sub_value != 0
25441 && !and_operand (op2_hi_lo[i], SImode))
25442 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
25443 }
25444 }
25445 }
25446
25447 for (i = 0; i < 2; i++)
25448 {
25449 /* Split large IOR/XOR operations. */
25450 if ((code == IOR || code == XOR)
25451 && CONST_INT_P (op2_hi_lo[i])
25452 && !complement_final_p
25453 && !complement_op1_p
25454 && !complement_op2_p
25455 && !logical_const_operand (op2_hi_lo[i], SImode))
25456 {
25457 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
25458 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
25459 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
25460 rtx tmp = gen_reg_rtx (SImode);
25461
25462 /* Make sure the constant is sign extended. */
25463 if ((hi_16bits & sign_bit) != 0)
25464 hi_16bits |= upper_32bits;
25465
25466 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
25467 code, SImode, false, false, false);
25468
25469 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
25470 code, SImode, false, false, false);
25471 }
25472 else
25473 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
25474 code, SImode, complement_final_p,
25475 complement_op1_p, complement_op2_p);
25476 }
25477
25478 return;
25479 }
25480
25481 /* Split the insns that make up boolean operations operating on multiple GPR
25482 registers. The boolean MD patterns ensure that the inputs either are
25483 exactly the same as the output registers, or there is no overlap.
25484
25485 OPERANDS is an array containing the destination and two input operands.
25486 CODE is the base operation (AND, IOR, XOR, NOT).
25487 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25488 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25489 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
25490
25491 void
25492 rs6000_split_logical (rtx operands[3],
25493 enum rtx_code code,
25494 bool complement_final_p,
25495 bool complement_op1_p,
25496 bool complement_op2_p)
25497 {
25498 machine_mode mode = GET_MODE (operands[0]);
25499 machine_mode sub_mode;
25500 rtx op0, op1, op2;
25501 int sub_size, regno0, regno1, nregs, i;
25502
25503 /* If this is DImode, use the specialized version that can run before
25504 register allocation. */
25505 if (mode == DImode && !TARGET_POWERPC64)
25506 {
25507 rs6000_split_logical_di (operands, code, complement_final_p,
25508 complement_op1_p, complement_op2_p);
25509 return;
25510 }
25511
25512 op0 = operands[0];
25513 op1 = operands[1];
25514 op2 = (code == NOT) ? NULL_RTX : operands[2];
25515 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
25516 sub_size = GET_MODE_SIZE (sub_mode);
25517 regno0 = REGNO (op0);
25518 regno1 = REGNO (op1);
25519
25520 gcc_assert (reload_completed);
25521 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
25522 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
25523
25524 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
25525 gcc_assert (nregs > 1);
25526
25527 if (op2 && REG_P (op2))
25528 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
25529
25530 for (i = 0; i < nregs; i++)
25531 {
25532 int offset = i * sub_size;
25533 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
25534 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
25535 rtx sub_op2 = ((code == NOT)
25536 ? NULL_RTX
25537 : simplify_subreg (sub_mode, op2, mode, offset));
25538
25539 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
25540 complement_final_p, complement_op1_p,
25541 complement_op2_p);
25542 }
25543
25544 return;
25545 }
25546
25547 \f
25548 /* Return true if the peephole2 can combine a load involving a combination of
25549 an addis instruction and a load with an offset that can be fused together on
25550 a power8. */
25551
25552 bool
25553 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
25554 rtx addis_value, /* addis value. */
25555 rtx target, /* target register that is loaded. */
25556 rtx mem) /* bottom part of the memory addr. */
25557 {
25558 rtx addr;
25559 rtx base_reg;
25560
25561 /* Validate arguments. */
25562 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
25563 return false;
25564
25565 if (!base_reg_operand (target, GET_MODE (target)))
25566 return false;
25567
25568 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
25569 return false;
25570
25571 /* Allow sign/zero extension. */
25572 if (GET_CODE (mem) == ZERO_EXTEND
25573 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
25574 mem = XEXP (mem, 0);
25575
25576 if (!MEM_P (mem))
25577 return false;
25578
25579 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
25580 return false;
25581
25582 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
25583 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
25584 return false;
25585
25586 /* Validate that the register used to load the high value is either the
25587 register being loaded, or we can safely replace its use.
25588
25589 This function is only called from the peephole2 pass and we assume that
25590 there are 2 instructions in the peephole (addis and load), so we want to
25591 check if the target register was not used in the memory address and the
25592 register to hold the addis result is dead after the peephole. */
25593 if (REGNO (addis_reg) != REGNO (target))
25594 {
25595 if (reg_mentioned_p (target, mem))
25596 return false;
25597
25598 if (!peep2_reg_dead_p (2, addis_reg))
25599 return false;
25600
25601 /* If the target register being loaded is the stack pointer, we must
25602 avoid loading any other value into it, even temporarily. */
25603 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
25604 return false;
25605 }
25606
25607 base_reg = XEXP (addr, 0);
25608 return REGNO (addis_reg) == REGNO (base_reg);
25609 }
25610
25611 /* During the peephole2 pass, adjust and expand the insns for a load fusion
25612 sequence. We adjust the addis register to use the target register. If the
25613 load sign extends, we adjust the code to do the zero extending load, and an
25614 explicit sign extension later since the fusion only covers zero extending
25615 loads.
25616
25617 The operands are:
25618 operands[0] register set with addis (to be replaced with target)
25619 operands[1] value set via addis
25620 operands[2] target register being loaded
25621 operands[3] D-form memory reference using operands[0]. */
25622
25623 void
25624 expand_fusion_gpr_load (rtx *operands)
25625 {
25626 rtx addis_value = operands[1];
25627 rtx target = operands[2];
25628 rtx orig_mem = operands[3];
25629 rtx new_addr, new_mem, orig_addr, offset;
25630 enum rtx_code plus_or_lo_sum;
25631 machine_mode target_mode = GET_MODE (target);
25632 machine_mode extend_mode = target_mode;
25633 machine_mode ptr_mode = Pmode;
25634 enum rtx_code extend = UNKNOWN;
25635
25636 if (GET_CODE (orig_mem) == ZERO_EXTEND
25637 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
25638 {
25639 extend = GET_CODE (orig_mem);
25640 orig_mem = XEXP (orig_mem, 0);
25641 target_mode = GET_MODE (orig_mem);
25642 }
25643
25644 gcc_assert (MEM_P (orig_mem));
25645
25646 orig_addr = XEXP (orig_mem, 0);
25647 plus_or_lo_sum = GET_CODE (orig_addr);
25648 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
25649
25650 offset = XEXP (orig_addr, 1);
25651 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
25652 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
25653
25654 if (extend != UNKNOWN)
25655 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
25656
25657 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
25658 UNSPEC_FUSION_GPR);
25659 emit_insn (gen_rtx_SET (target, new_mem));
25660
25661 if (extend == SIGN_EXTEND)
25662 {
25663 int sub_off = ((BYTES_BIG_ENDIAN)
25664 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
25665 : 0);
25666 rtx sign_reg
25667 = simplify_subreg (target_mode, target, extend_mode, sub_off);
25668
25669 emit_insn (gen_rtx_SET (target,
25670 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
25671 }
25672
25673 return;
25674 }
25675
25676 /* Emit the addis instruction that will be part of a fused instruction
25677 sequence. */
25678
25679 void
25680 emit_fusion_addis (rtx target, rtx addis_value)
25681 {
25682 rtx fuse_ops[10];
25683 const char *addis_str = NULL;
25684
25685 /* Emit the addis instruction. */
25686 fuse_ops[0] = target;
25687 if (satisfies_constraint_L (addis_value))
25688 {
25689 fuse_ops[1] = addis_value;
25690 addis_str = "lis %0,%v1";
25691 }
25692
25693 else if (GET_CODE (addis_value) == PLUS)
25694 {
25695 rtx op0 = XEXP (addis_value, 0);
25696 rtx op1 = XEXP (addis_value, 1);
25697
25698 if (REG_P (op0) && CONST_INT_P (op1)
25699 && satisfies_constraint_L (op1))
25700 {
25701 fuse_ops[1] = op0;
25702 fuse_ops[2] = op1;
25703 addis_str = "addis %0,%1,%v2";
25704 }
25705 }
25706
25707 else if (GET_CODE (addis_value) == HIGH)
25708 {
25709 rtx value = XEXP (addis_value, 0);
25710 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
25711 {
25712 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
25713 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
25714 if (TARGET_ELF)
25715 addis_str = "addis %0,%2,%1@toc@ha";
25716
25717 else if (TARGET_XCOFF)
25718 addis_str = "addis %0,%1@u(%2)";
25719
25720 else
25721 gcc_unreachable ();
25722 }
25723
25724 else if (GET_CODE (value) == PLUS)
25725 {
25726 rtx op0 = XEXP (value, 0);
25727 rtx op1 = XEXP (value, 1);
25728
25729 if (GET_CODE (op0) == UNSPEC
25730 && XINT (op0, 1) == UNSPEC_TOCREL
25731 && CONST_INT_P (op1))
25732 {
25733 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
25734 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
25735 fuse_ops[3] = op1;
25736 if (TARGET_ELF)
25737 addis_str = "addis %0,%2,%1+%3@toc@ha";
25738
25739 else if (TARGET_XCOFF)
25740 addis_str = "addis %0,%1+%3@u(%2)";
25741
25742 else
25743 gcc_unreachable ();
25744 }
25745 }
25746
25747 else if (satisfies_constraint_L (value))
25748 {
25749 fuse_ops[1] = value;
25750 addis_str = "lis %0,%v1";
25751 }
25752
25753 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
25754 {
25755 fuse_ops[1] = value;
25756 addis_str = "lis %0,%1@ha";
25757 }
25758 }
25759
25760 if (!addis_str)
25761 fatal_insn ("Could not generate addis value for fusion", addis_value);
25762
25763 output_asm_insn (addis_str, fuse_ops);
25764 }
25765
25766 /* Emit a D-form load or store instruction that is the second instruction
25767 of a fusion sequence. */
25768
25769 static void
25770 emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str)
25771 {
25772 rtx fuse_ops[10];
25773 char insn_template[80];
25774
25775 fuse_ops[0] = load_reg;
25776 fuse_ops[1] = addis_reg;
25777
25778 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
25779 {
25780 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
25781 fuse_ops[2] = offset;
25782 output_asm_insn (insn_template, fuse_ops);
25783 }
25784
25785 else if (GET_CODE (offset) == UNSPEC
25786 && XINT (offset, 1) == UNSPEC_TOCREL)
25787 {
25788 if (TARGET_ELF)
25789 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
25790
25791 else if (TARGET_XCOFF)
25792 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
25793
25794 else
25795 gcc_unreachable ();
25796
25797 fuse_ops[2] = XVECEXP (offset, 0, 0);
25798 output_asm_insn (insn_template, fuse_ops);
25799 }
25800
25801 else if (GET_CODE (offset) == PLUS
25802 && GET_CODE (XEXP (offset, 0)) == UNSPEC
25803 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
25804 && CONST_INT_P (XEXP (offset, 1)))
25805 {
25806 rtx tocrel_unspec = XEXP (offset, 0);
25807 if (TARGET_ELF)
25808 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
25809
25810 else if (TARGET_XCOFF)
25811 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
25812
25813 else
25814 gcc_unreachable ();
25815
25816 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
25817 fuse_ops[3] = XEXP (offset, 1);
25818 output_asm_insn (insn_template, fuse_ops);
25819 }
25820
25821 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
25822 {
25823 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
25824
25825 fuse_ops[2] = offset;
25826 output_asm_insn (insn_template, fuse_ops);
25827 }
25828
25829 else
25830 fatal_insn ("Unable to generate load/store offset for fusion", offset);
25831
25832 return;
25833 }
25834
25835 /* Given an address, convert it into the addis and load offset parts. Addresses
25836 created during the peephole2 process look like:
25837 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
25838 (unspec [(...)] UNSPEC_TOCREL)) */
25839
25840 static void
25841 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
25842 {
25843 rtx hi, lo;
25844
25845 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
25846 {
25847 hi = XEXP (addr, 0);
25848 lo = XEXP (addr, 1);
25849 }
25850 else
25851 gcc_unreachable ();
25852
25853 *p_hi = hi;
25854 *p_lo = lo;
25855 }
25856
25857 /* Return a string to fuse an addis instruction with a gpr load to the same
25858 register that we loaded up the addis instruction. The address that is used
25859 is the logical address that was formed during peephole2:
25860 (lo_sum (high) (low-part))
25861
25862 The code is complicated, so we call output_asm_insn directly, and just
25863 return "". */
25864
25865 const char *
25866 emit_fusion_gpr_load (rtx target, rtx mem)
25867 {
25868 rtx addis_value;
25869 rtx addr;
25870 rtx load_offset;
25871 const char *load_str = NULL;
25872 machine_mode mode;
25873
25874 if (GET_CODE (mem) == ZERO_EXTEND)
25875 mem = XEXP (mem, 0);
25876
25877 gcc_assert (REG_P (target) && MEM_P (mem));
25878
25879 addr = XEXP (mem, 0);
25880 fusion_split_address (addr, &addis_value, &load_offset);
25881
25882 /* Now emit the load instruction to the same register. */
25883 mode = GET_MODE (mem);
25884 switch (mode)
25885 {
25886 case E_QImode:
25887 load_str = "lbz";
25888 break;
25889
25890 case E_HImode:
25891 load_str = "lhz";
25892 break;
25893
25894 case E_SImode:
25895 case E_SFmode:
25896 load_str = "lwz";
25897 break;
25898
25899 case E_DImode:
25900 case E_DFmode:
25901 gcc_assert (TARGET_POWERPC64);
25902 load_str = "ld";
25903 break;
25904
25905 default:
25906 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
25907 }
25908
25909 /* Emit the addis instruction. */
25910 emit_fusion_addis (target, addis_value);
25911
25912 /* Emit the D-form load instruction. */
25913 emit_fusion_load (target, target, load_offset, load_str);
25914
25915 return "";
25916 }
25917 \f
25918
25919 #ifdef RS6000_GLIBC_ATOMIC_FENV
25920 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
25921 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
25922 #endif
25923
25924 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
25925
25926 static void
25927 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
25928 {
25929 if (!TARGET_HARD_FLOAT)
25930 {
25931 #ifdef RS6000_GLIBC_ATOMIC_FENV
25932 if (atomic_hold_decl == NULL_TREE)
25933 {
25934 atomic_hold_decl
25935 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
25936 get_identifier ("__atomic_feholdexcept"),
25937 build_function_type_list (void_type_node,
25938 double_ptr_type_node,
25939 NULL_TREE));
25940 TREE_PUBLIC (atomic_hold_decl) = 1;
25941 DECL_EXTERNAL (atomic_hold_decl) = 1;
25942 }
25943
25944 if (atomic_clear_decl == NULL_TREE)
25945 {
25946 atomic_clear_decl
25947 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
25948 get_identifier ("__atomic_feclearexcept"),
25949 build_function_type_list (void_type_node,
25950 NULL_TREE));
25951 TREE_PUBLIC (atomic_clear_decl) = 1;
25952 DECL_EXTERNAL (atomic_clear_decl) = 1;
25953 }
25954
25955 tree const_double = build_qualified_type (double_type_node,
25956 TYPE_QUAL_CONST);
25957 tree const_double_ptr = build_pointer_type (const_double);
25958 if (atomic_update_decl == NULL_TREE)
25959 {
25960 atomic_update_decl
25961 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
25962 get_identifier ("__atomic_feupdateenv"),
25963 build_function_type_list (void_type_node,
25964 const_double_ptr,
25965 NULL_TREE));
25966 TREE_PUBLIC (atomic_update_decl) = 1;
25967 DECL_EXTERNAL (atomic_update_decl) = 1;
25968 }
25969
25970 tree fenv_var = create_tmp_var_raw (double_type_node);
25971 TREE_ADDRESSABLE (fenv_var) = 1;
25972 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
25973
25974 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
25975 *clear = build_call_expr (atomic_clear_decl, 0);
25976 *update = build_call_expr (atomic_update_decl, 1,
25977 fold_convert (const_double_ptr, fenv_addr));
25978 #endif
25979 return;
25980 }
25981
25982 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
25983 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
25984 tree call_mffs = build_call_expr (mffs, 0);
25985
25986 /* Generates the equivalent of feholdexcept (&fenv_var)
25987
25988 *fenv_var = __builtin_mffs ();
25989 double fenv_hold;
25990 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
25991 __builtin_mtfsf (0xff, fenv_hold); */
25992
25993 /* Mask to clear everything except for the rounding modes and non-IEEE
25994 arithmetic flag. */
25995 const unsigned HOST_WIDE_INT hold_exception_mask =
25996 HOST_WIDE_INT_C (0xffffffff00000007);
25997
25998 tree fenv_var = create_tmp_var_raw (double_type_node);
25999
26000 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
26001
26002 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
26003 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
26004 build_int_cst (uint64_type_node,
26005 hold_exception_mask));
26006
26007 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
26008 fenv_llu_and);
26009
26010 tree hold_mtfsf = build_call_expr (mtfsf, 2,
26011 build_int_cst (unsigned_type_node, 0xff),
26012 fenv_hold_mtfsf);
26013
26014 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
26015
26016 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
26017
26018 double fenv_clear = __builtin_mffs ();
26019 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
26020 __builtin_mtfsf (0xff, fenv_clear); */
26021
26022 /* Mask to clear everything except for the rounding modes and non-IEEE
26023 arithmetic flag. */
26024 const unsigned HOST_WIDE_INT clear_exception_mask =
26025 HOST_WIDE_INT_C (0xffffffff00000000);
26026
26027 tree fenv_clear = create_tmp_var_raw (double_type_node);
26028
26029 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
26030
26031 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
26032 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
26033 fenv_clean_llu,
26034 build_int_cst (uint64_type_node,
26035 clear_exception_mask));
26036
26037 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
26038 fenv_clear_llu_and);
26039
26040 tree clear_mtfsf = build_call_expr (mtfsf, 2,
26041 build_int_cst (unsigned_type_node, 0xff),
26042 fenv_clear_mtfsf);
26043
26044 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
26045
26046 /* Generates the equivalent of feupdateenv (&fenv_var)
26047
26048 double old_fenv = __builtin_mffs ();
26049 double fenv_update;
26050 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
26051 (*(uint64_t*)fenv_var 0x1ff80fff);
26052 __builtin_mtfsf (0xff, fenv_update); */
26053
26054 const unsigned HOST_WIDE_INT update_exception_mask =
26055 HOST_WIDE_INT_C (0xffffffff1fffff00);
26056 const unsigned HOST_WIDE_INT new_exception_mask =
26057 HOST_WIDE_INT_C (0x1ff80fff);
26058
26059 tree old_fenv = create_tmp_var_raw (double_type_node);
26060 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
26061
26062 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
26063 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
26064 build_int_cst (uint64_type_node,
26065 update_exception_mask));
26066
26067 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
26068 build_int_cst (uint64_type_node,
26069 new_exception_mask));
26070
26071 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
26072 old_llu_and, new_llu_and);
26073
26074 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
26075 new_llu_mask);
26076
26077 tree update_mtfsf = build_call_expr (mtfsf, 2,
26078 build_int_cst (unsigned_type_node, 0xff),
26079 fenv_update_mtfsf);
26080
26081 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
26082 }
26083
26084 void
26085 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
26086 {
26087 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26088
26089 rtx_tmp0 = gen_reg_rtx (V2DFmode);
26090 rtx_tmp1 = gen_reg_rtx (V2DFmode);
26091
26092 /* The destination of the vmrgew instruction layout is:
26093 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
26094 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
26095 vmrgew instruction will be correct. */
26096 if (BYTES_BIG_ENDIAN)
26097 {
26098 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
26099 GEN_INT (0)));
26100 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
26101 GEN_INT (3)));
26102 }
26103 else
26104 {
26105 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
26106 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
26107 }
26108
26109 rtx_tmp2 = gen_reg_rtx (V4SFmode);
26110 rtx_tmp3 = gen_reg_rtx (V4SFmode);
26111
26112 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2, rtx_tmp0));
26113 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3, rtx_tmp1));
26114
26115 if (BYTES_BIG_ENDIAN)
26116 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
26117 else
26118 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
26119 }
26120
26121 void
26122 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
26123 {
26124 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26125
26126 rtx_tmp0 = gen_reg_rtx (V2DImode);
26127 rtx_tmp1 = gen_reg_rtx (V2DImode);
26128
26129 /* The destination of the vmrgew instruction layout is:
26130 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
26131 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
26132 vmrgew instruction will be correct. */
26133 if (BYTES_BIG_ENDIAN)
26134 {
26135 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
26136 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
26137 }
26138 else
26139 {
26140 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
26141 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
26142 }
26143
26144 rtx_tmp2 = gen_reg_rtx (V4SFmode);
26145 rtx_tmp3 = gen_reg_rtx (V4SFmode);
26146
26147 if (signed_convert)
26148 {
26149 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
26150 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
26151 }
26152 else
26153 {
26154 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
26155 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
26156 }
26157
26158 if (BYTES_BIG_ENDIAN)
26159 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
26160 else
26161 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
26162 }
26163
26164 void
26165 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
26166 rtx src2)
26167 {
26168 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26169
26170 rtx_tmp0 = gen_reg_rtx (V2DFmode);
26171 rtx_tmp1 = gen_reg_rtx (V2DFmode);
26172
26173 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
26174 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
26175
26176 rtx_tmp2 = gen_reg_rtx (V4SImode);
26177 rtx_tmp3 = gen_reg_rtx (V4SImode);
26178
26179 if (signed_convert)
26180 {
26181 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
26182 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
26183 }
26184 else
26185 {
26186 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
26187 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
26188 }
26189
26190 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
26191 }
26192
26193 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
26194
26195 static bool
26196 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
26197 optimization_type opt_type)
26198 {
26199 switch (op)
26200 {
26201 case rsqrt_optab:
26202 return (opt_type == OPTIMIZE_FOR_SPEED
26203 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
26204
26205 default:
26206 return true;
26207 }
26208 }
26209
26210 /* Implement TARGET_CONSTANT_ALIGNMENT. */
26211
26212 static HOST_WIDE_INT
26213 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
26214 {
26215 if (TREE_CODE (exp) == STRING_CST
26216 && (STRICT_ALIGNMENT || !optimize_size))
26217 return MAX (align, BITS_PER_WORD);
26218 return align;
26219 }
26220
26221 /* Implement TARGET_STARTING_FRAME_OFFSET. */
26222
26223 static HOST_WIDE_INT
26224 rs6000_starting_frame_offset (void)
26225 {
26226 if (FRAME_GROWS_DOWNWARD)
26227 return 0;
26228 return RS6000_STARTING_FRAME_OFFSET;
26229 }
26230 \f
26231
26232 /* Create an alias for a mangled name where we have changed the mangling (in
26233 GCC 8.1, we used U10__float128, and now we use u9__ieee128). This is called
26234 via the target hook TARGET_ASM_GLOBALIZE_DECL_NAME. */
26235
26236 #if TARGET_ELF && RS6000_WEAK
26237 static void
26238 rs6000_globalize_decl_name (FILE * stream, tree decl)
26239 {
26240 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
26241
26242 targetm.asm_out.globalize_label (stream, name);
26243
26244 if (rs6000_passes_ieee128 && name[0] == '_' && name[1] == 'Z')
26245 {
26246 tree save_asm_name = DECL_ASSEMBLER_NAME (decl);
26247 const char *old_name;
26248
26249 ieee128_mangling_gcc_8_1 = true;
26250 lang_hooks.set_decl_assembler_name (decl);
26251 old_name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
26252 SET_DECL_ASSEMBLER_NAME (decl, save_asm_name);
26253 ieee128_mangling_gcc_8_1 = false;
26254
26255 if (strcmp (name, old_name) != 0)
26256 {
26257 fprintf (stream, "\t.weak %s\n", old_name);
26258 fprintf (stream, "\t.set %s,%s\n", old_name, name);
26259 }
26260 }
26261 }
26262 #endif
26263
26264 \f
26265 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
26266 function names from <foo>l to <foo>f128 if the default long double type is
26267 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
26268 include file switches the names on systems that support long double as IEEE
26269 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
26270 In the future, glibc will export names like __ieee128_sinf128 and we can
26271 switch to using those instead of using sinf128, which pollutes the user's
26272 namespace.
26273
26274 This will switch the names for Fortran math functions as well (which doesn't
26275 use math.h). However, Fortran needs other changes to the compiler and
26276 library before you can switch the real*16 type at compile time.
26277
26278 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
26279 only do this if the default is that long double is IBM extended double, and
26280 the user asked for IEEE 128-bit. */
26281
26282 static tree
26283 rs6000_mangle_decl_assembler_name (tree decl, tree id)
26284 {
26285 if (!TARGET_IEEEQUAD_DEFAULT && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
26286 && TREE_CODE (decl) == FUNCTION_DECL && DECL_IS_BUILTIN (decl) )
26287 {
26288 size_t len = IDENTIFIER_LENGTH (id);
26289 const char *name = IDENTIFIER_POINTER (id);
26290
26291 if (name[len - 1] == 'l')
26292 {
26293 bool uses_ieee128_p = false;
26294 tree type = TREE_TYPE (decl);
26295 machine_mode ret_mode = TYPE_MODE (type);
26296
26297 /* See if the function returns a IEEE 128-bit floating point type or
26298 complex type. */
26299 if (ret_mode == TFmode || ret_mode == TCmode)
26300 uses_ieee128_p = true;
26301 else
26302 {
26303 function_args_iterator args_iter;
26304 tree arg;
26305
26306 /* See if the function passes a IEEE 128-bit floating point type
26307 or complex type. */
26308 FOREACH_FUNCTION_ARGS (type, arg, args_iter)
26309 {
26310 machine_mode arg_mode = TYPE_MODE (arg);
26311 if (arg_mode == TFmode || arg_mode == TCmode)
26312 {
26313 uses_ieee128_p = true;
26314 break;
26315 }
26316 }
26317 }
26318
26319 /* If we passed or returned an IEEE 128-bit floating point type,
26320 change the name. */
26321 if (uses_ieee128_p)
26322 {
26323 char *name2 = (char *) alloca (len + 4);
26324 memcpy (name2, name, len - 1);
26325 strcpy (name2 + len - 1, "f128");
26326 id = get_identifier (name2);
26327 }
26328 }
26329 }
26330
26331 return id;
26332 }
26333
26334 /* Predict whether the given loop in gimple will be transformed in the RTL
26335 doloop_optimize pass. */
26336
26337 static bool
26338 rs6000_predict_doloop_p (struct loop *loop)
26339 {
26340 gcc_assert (loop);
26341
26342 /* On rs6000, targetm.can_use_doloop_p is actually
26343 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
26344 if (loop->inner != NULL)
26345 {
26346 if (dump_file && (dump_flags & TDF_DETAILS))
26347 fprintf (dump_file, "Predict doloop failure due to"
26348 " loop nesting.\n");
26349 return false;
26350 }
26351
26352 return true;
26353 }
26354
26355 struct gcc_target targetm = TARGET_INITIALIZER;
26356
26357 #include "gt-rs6000.h"