]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/rs6000/rs6000.c
Update copyright years.
[thirdparty/gcc.git] / gcc / config / rs6000 / rs6000.c
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2020 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #define IN_TARGET_CODE 1
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "gimple.h"
31 #include "cfghooks.h"
32 #include "cfgloop.h"
33 #include "df.h"
34 #include "tm_p.h"
35 #include "stringpool.h"
36 #include "expmed.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "ira.h"
40 #include "recog.h"
41 #include "cgraph.h"
42 #include "diagnostic-core.h"
43 #include "insn-attr.h"
44 #include "flags.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "attribs.h"
48 #include "stor-layout.h"
49 #include "calls.h"
50 #include "print-tree.h"
51 #include "varasm.h"
52 #include "explow.h"
53 #include "expr.h"
54 #include "output.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "reload.h"
58 #include "sched-int.h"
59 #include "gimplify.h"
60 #include "gimple-fold.h"
61 #include "gimple-iterator.h"
62 #include "gimple-ssa.h"
63 #include "gimple-walk.h"
64 #include "intl.h"
65 #include "tm-constrs.h"
66 #include "tree-vectorizer.h"
67 #include "target-globals.h"
68 #include "builtins.h"
69 #include "tree-vector-builder.h"
70 #include "context.h"
71 #include "tree-pass.h"
72 #include "except.h"
73 #if TARGET_XCOFF
74 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
75 #endif
76 #include "case-cfn-macros.h"
77 #include "ppc-auxv.h"
78 #include "tree-ssa-propagate.h"
79 #include "tree-vrp.h"
80 #include "tree-ssanames.h"
81 #include "rs6000-internal.h"
82 #include "opts.h"
83
84 /* This file should be included last. */
85 #include "target-def.h"
86
87 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
88 systems will also set long double to be IEEE 128-bit. AIX and Darwin
89 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
90 those systems will not pick up this default. This needs to be after all
91 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
92 properly defined. */
93 #ifndef TARGET_IEEEQUAD_DEFAULT
94 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
95 #define TARGET_IEEEQUAD_DEFAULT 1
96 #else
97 #define TARGET_IEEEQUAD_DEFAULT 0
98 #endif
99 #endif
100
101 /* Support targetm.vectorize.builtin_mask_for_load. */
102 GTY(()) tree altivec_builtin_mask_for_load;
103
104 /* Set to nonzero once AIX common-mode calls have been defined. */
105 static GTY(()) int common_mode_defined;
106
107 #ifdef USING_ELFOS_H
108 /* Counter for labels which are to be placed in .fixup. */
109 int fixuplabelno = 0;
110 #endif
111
112 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
113 int dot_symbols;
114
115 /* Specify the machine mode that pointers have. After generation of rtl, the
116 compiler makes no further distinction between pointers and any other objects
117 of this machine mode. */
118 scalar_int_mode rs6000_pmode;
119
120 #if TARGET_ELF
121 /* Note whether IEEE 128-bit floating point was passed or returned, either as
122 the __float128/_Float128 explicit type, or when long double is IEEE 128-bit
123 floating point. We changed the default C++ mangling for these types and we
124 may want to generate a weak alias of the old mangling (U10__float128) to the
125 new mangling (u9__ieee128). */
126 bool rs6000_passes_ieee128 = false;
127 #endif
128
129 /* Generate the manged name (i.e. U10__float128) used in GCC 8.1, and not the
130 name used in current releases (i.e. u9__ieee128). */
131 static bool ieee128_mangling_gcc_8_1;
132
133 /* Width in bits of a pointer. */
134 unsigned rs6000_pointer_size;
135
136 #ifdef HAVE_AS_GNU_ATTRIBUTE
137 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
138 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
139 # endif
140 /* Flag whether floating point values have been passed/returned.
141 Note that this doesn't say whether fprs are used, since the
142 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
143 should be set for soft-float values passed in gprs and ieee128
144 values passed in vsx registers. */
145 bool rs6000_passes_float = false;
146 bool rs6000_passes_long_double = false;
147 /* Flag whether vector values have been passed/returned. */
148 bool rs6000_passes_vector = false;
149 /* Flag whether small (<= 8 byte) structures have been returned. */
150 bool rs6000_returns_struct = false;
151 #endif
152
153 /* Value is TRUE if register/mode pair is acceptable. */
154 static bool rs6000_hard_regno_mode_ok_p
155 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
156
157 /* Maximum number of registers needed for a given register class and mode. */
158 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
159
160 /* How many registers are needed for a given register and mode. */
161 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
162
163 /* Map register number to register class. */
164 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
165
166 static int dbg_cost_ctrl;
167
168 /* Built in types. */
169 tree rs6000_builtin_types[RS6000_BTI_MAX];
170 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
171
172 /* Flag to say the TOC is initialized */
173 int toc_initialized, need_toc_init;
174 char toc_label_name[10];
175
176 /* Cached value of rs6000_variable_issue. This is cached in
177 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
178 static short cached_can_issue_more;
179
180 static GTY(()) section *read_only_data_section;
181 static GTY(()) section *private_data_section;
182 static GTY(()) section *tls_data_section;
183 static GTY(()) section *tls_private_data_section;
184 static GTY(()) section *read_only_private_data_section;
185 static GTY(()) section *sdata2_section;
186
187 extern GTY(()) section *toc_section;
188 section *toc_section = 0;
189
190 /* Describe the vector unit used for modes. */
191 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
192 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
193
194 /* Register classes for various constraints that are based on the target
195 switches. */
196 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
197
198 /* Describe the alignment of a vector. */
199 int rs6000_vector_align[NUM_MACHINE_MODES];
200
201 /* Map selected modes to types for builtins. */
202 GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
203
204 /* What modes to automatically generate reciprocal divide estimate (fre) and
205 reciprocal sqrt (frsqrte) for. */
206 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
207
208 /* Masks to determine which reciprocal esitmate instructions to generate
209 automatically. */
210 enum rs6000_recip_mask {
211 RECIP_SF_DIV = 0x001, /* Use divide estimate */
212 RECIP_DF_DIV = 0x002,
213 RECIP_V4SF_DIV = 0x004,
214 RECIP_V2DF_DIV = 0x008,
215
216 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
217 RECIP_DF_RSQRT = 0x020,
218 RECIP_V4SF_RSQRT = 0x040,
219 RECIP_V2DF_RSQRT = 0x080,
220
221 /* Various combination of flags for -mrecip=xxx. */
222 RECIP_NONE = 0,
223 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
224 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
225 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
226
227 RECIP_HIGH_PRECISION = RECIP_ALL,
228
229 /* On low precision machines like the power5, don't enable double precision
230 reciprocal square root estimate, since it isn't accurate enough. */
231 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
232 };
233
234 /* -mrecip options. */
235 static struct
236 {
237 const char *string; /* option name */
238 unsigned int mask; /* mask bits to set */
239 } recip_options[] = {
240 { "all", RECIP_ALL },
241 { "none", RECIP_NONE },
242 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
243 | RECIP_V2DF_DIV) },
244 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
245 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
246 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
247 | RECIP_V2DF_RSQRT) },
248 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
249 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
250 };
251
252 /* On PowerPC, we have a limited number of target clones that we care about
253 which means we can use an array to hold the options, rather than having more
254 elaborate data structures to identify each possible variation. Order the
255 clones from the default to the highest ISA. */
256 enum {
257 CLONE_DEFAULT = 0, /* default clone. */
258 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
259 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
260 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
261 CLONE_ISA_3_00, /* ISA 3.00 (power9). */
262 CLONE_MAX
263 };
264
265 /* Map compiler ISA bits into HWCAP names. */
266 struct clone_map {
267 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
268 const char *name; /* name to use in __builtin_cpu_supports. */
269 };
270
271 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
272 { 0, "" }, /* Default options. */
273 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
274 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
275 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
276 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.00 (power9). */
277 };
278
279
280 /* Newer LIBCs explicitly export this symbol to declare that they provide
281 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
282 reference to this symbol whenever we expand a CPU builtin, so that
283 we never link against an old LIBC. */
284 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
285
286 /* True if we have expanded a CPU builtin. */
287 bool cpu_builtin_p = false;
288
289 /* Pointer to function (in rs6000-c.c) that can define or undefine target
290 macros that have changed. Languages that don't support the preprocessor
291 don't link in rs6000-c.c, so we can't call it directly. */
292 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
293
294 /* Simplfy register classes into simpler classifications. We assume
295 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
296 check for standard register classes (gpr/floating/altivec/vsx) and
297 floating/vector classes (float/altivec/vsx). */
298
299 enum rs6000_reg_type {
300 NO_REG_TYPE,
301 PSEUDO_REG_TYPE,
302 GPR_REG_TYPE,
303 VSX_REG_TYPE,
304 ALTIVEC_REG_TYPE,
305 FPR_REG_TYPE,
306 SPR_REG_TYPE,
307 CR_REG_TYPE
308 };
309
310 /* Map register class to register type. */
311 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
312
313 /* First/last register type for the 'normal' register types (i.e. general
314 purpose, floating point, altivec, and VSX registers). */
315 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
316
317 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
318
319
320 /* Register classes we care about in secondary reload or go if legitimate
321 address. We only need to worry about GPR, FPR, and Altivec registers here,
322 along an ANY field that is the OR of the 3 register classes. */
323
324 enum rs6000_reload_reg_type {
325 RELOAD_REG_GPR, /* General purpose registers. */
326 RELOAD_REG_FPR, /* Traditional floating point regs. */
327 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
328 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
329 N_RELOAD_REG
330 };
331
332 /* For setting up register classes, loop through the 3 register classes mapping
333 into real registers, and skip the ANY class, which is just an OR of the
334 bits. */
335 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
336 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
337
338 /* Map reload register type to a register in the register class. */
339 struct reload_reg_map_type {
340 const char *name; /* Register class name. */
341 int reg; /* Register in the register class. */
342 };
343
344 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
345 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
346 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
347 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
348 { "Any", -1 }, /* RELOAD_REG_ANY. */
349 };
350
351 /* Mask bits for each register class, indexed per mode. Historically the
352 compiler has been more restrictive which types can do PRE_MODIFY instead of
353 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
354 typedef unsigned char addr_mask_type;
355
356 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
357 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
358 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
359 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
360 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
361 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
362 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
363 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
364
365 /* Register type masks based on the type, of valid addressing modes. */
366 struct rs6000_reg_addr {
367 enum insn_code reload_load; /* INSN to reload for loading. */
368 enum insn_code reload_store; /* INSN to reload for storing. */
369 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
370 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
371 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
372 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
373 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
374 };
375
376 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
377
378 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
379 static inline bool
380 mode_supports_pre_incdec_p (machine_mode mode)
381 {
382 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
383 != 0);
384 }
385
386 /* Helper function to say whether a mode supports PRE_MODIFY. */
387 static inline bool
388 mode_supports_pre_modify_p (machine_mode mode)
389 {
390 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
391 != 0);
392 }
393
394 /* Return true if we have D-form addressing in altivec registers. */
395 static inline bool
396 mode_supports_vmx_dform (machine_mode mode)
397 {
398 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
399 }
400
401 /* Return true if we have D-form addressing in VSX registers. This addressing
402 is more limited than normal d-form addressing in that the offset must be
403 aligned on a 16-byte boundary. */
404 static inline bool
405 mode_supports_dq_form (machine_mode mode)
406 {
407 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
408 != 0);
409 }
410
411 /* Given that there exists at least one variable that is set (produced)
412 by OUT_INSN and read (consumed) by IN_INSN, return true iff
413 IN_INSN represents one or more memory store operations and none of
414 the variables set by OUT_INSN is used by IN_INSN as the address of a
415 store operation. If either IN_INSN or OUT_INSN does not represent
416 a "single" RTL SET expression (as loosely defined by the
417 implementation of the single_set function) or a PARALLEL with only
418 SETs, CLOBBERs, and USEs inside, this function returns false.
419
420 This rs6000-specific version of store_data_bypass_p checks for
421 certain conditions that result in assertion failures (and internal
422 compiler errors) in the generic store_data_bypass_p function and
423 returns false rather than calling store_data_bypass_p if one of the
424 problematic conditions is detected. */
425
426 int
427 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
428 {
429 rtx out_set, in_set;
430 rtx out_pat, in_pat;
431 rtx out_exp, in_exp;
432 int i, j;
433
434 in_set = single_set (in_insn);
435 if (in_set)
436 {
437 if (MEM_P (SET_DEST (in_set)))
438 {
439 out_set = single_set (out_insn);
440 if (!out_set)
441 {
442 out_pat = PATTERN (out_insn);
443 if (GET_CODE (out_pat) == PARALLEL)
444 {
445 for (i = 0; i < XVECLEN (out_pat, 0); i++)
446 {
447 out_exp = XVECEXP (out_pat, 0, i);
448 if ((GET_CODE (out_exp) == CLOBBER)
449 || (GET_CODE (out_exp) == USE))
450 continue;
451 else if (GET_CODE (out_exp) != SET)
452 return false;
453 }
454 }
455 }
456 }
457 }
458 else
459 {
460 in_pat = PATTERN (in_insn);
461 if (GET_CODE (in_pat) != PARALLEL)
462 return false;
463
464 for (i = 0; i < XVECLEN (in_pat, 0); i++)
465 {
466 in_exp = XVECEXP (in_pat, 0, i);
467 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
468 continue;
469 else if (GET_CODE (in_exp) != SET)
470 return false;
471
472 if (MEM_P (SET_DEST (in_exp)))
473 {
474 out_set = single_set (out_insn);
475 if (!out_set)
476 {
477 out_pat = PATTERN (out_insn);
478 if (GET_CODE (out_pat) != PARALLEL)
479 return false;
480 for (j = 0; j < XVECLEN (out_pat, 0); j++)
481 {
482 out_exp = XVECEXP (out_pat, 0, j);
483 if ((GET_CODE (out_exp) == CLOBBER)
484 || (GET_CODE (out_exp) == USE))
485 continue;
486 else if (GET_CODE (out_exp) != SET)
487 return false;
488 }
489 }
490 }
491 }
492 }
493 return store_data_bypass_p (out_insn, in_insn);
494 }
495
496 \f
497 /* Processor costs (relative to an add) */
498
499 const struct processor_costs *rs6000_cost;
500
501 /* Instruction size costs on 32bit processors. */
502 static const
503 struct processor_costs size32_cost = {
504 COSTS_N_INSNS (1), /* mulsi */
505 COSTS_N_INSNS (1), /* mulsi_const */
506 COSTS_N_INSNS (1), /* mulsi_const9 */
507 COSTS_N_INSNS (1), /* muldi */
508 COSTS_N_INSNS (1), /* divsi */
509 COSTS_N_INSNS (1), /* divdi */
510 COSTS_N_INSNS (1), /* fp */
511 COSTS_N_INSNS (1), /* dmul */
512 COSTS_N_INSNS (1), /* sdiv */
513 COSTS_N_INSNS (1), /* ddiv */
514 32, /* cache line size */
515 0, /* l1 cache */
516 0, /* l2 cache */
517 0, /* streams */
518 0, /* SF->DF convert */
519 };
520
521 /* Instruction size costs on 64bit processors. */
522 static const
523 struct processor_costs size64_cost = {
524 COSTS_N_INSNS (1), /* mulsi */
525 COSTS_N_INSNS (1), /* mulsi_const */
526 COSTS_N_INSNS (1), /* mulsi_const9 */
527 COSTS_N_INSNS (1), /* muldi */
528 COSTS_N_INSNS (1), /* divsi */
529 COSTS_N_INSNS (1), /* divdi */
530 COSTS_N_INSNS (1), /* fp */
531 COSTS_N_INSNS (1), /* dmul */
532 COSTS_N_INSNS (1), /* sdiv */
533 COSTS_N_INSNS (1), /* ddiv */
534 128, /* cache line size */
535 0, /* l1 cache */
536 0, /* l2 cache */
537 0, /* streams */
538 0, /* SF->DF convert */
539 };
540
541 /* Instruction costs on RS64A processors. */
542 static const
543 struct processor_costs rs64a_cost = {
544 COSTS_N_INSNS (20), /* mulsi */
545 COSTS_N_INSNS (12), /* mulsi_const */
546 COSTS_N_INSNS (8), /* mulsi_const9 */
547 COSTS_N_INSNS (34), /* muldi */
548 COSTS_N_INSNS (65), /* divsi */
549 COSTS_N_INSNS (67), /* divdi */
550 COSTS_N_INSNS (4), /* fp */
551 COSTS_N_INSNS (4), /* dmul */
552 COSTS_N_INSNS (31), /* sdiv */
553 COSTS_N_INSNS (31), /* ddiv */
554 128, /* cache line size */
555 128, /* l1 cache */
556 2048, /* l2 cache */
557 1, /* streams */
558 0, /* SF->DF convert */
559 };
560
561 /* Instruction costs on MPCCORE processors. */
562 static const
563 struct processor_costs mpccore_cost = {
564 COSTS_N_INSNS (2), /* mulsi */
565 COSTS_N_INSNS (2), /* mulsi_const */
566 COSTS_N_INSNS (2), /* mulsi_const9 */
567 COSTS_N_INSNS (2), /* muldi */
568 COSTS_N_INSNS (6), /* divsi */
569 COSTS_N_INSNS (6), /* divdi */
570 COSTS_N_INSNS (4), /* fp */
571 COSTS_N_INSNS (5), /* dmul */
572 COSTS_N_INSNS (10), /* sdiv */
573 COSTS_N_INSNS (17), /* ddiv */
574 32, /* cache line size */
575 4, /* l1 cache */
576 16, /* l2 cache */
577 1, /* streams */
578 0, /* SF->DF convert */
579 };
580
581 /* Instruction costs on PPC403 processors. */
582 static const
583 struct processor_costs ppc403_cost = {
584 COSTS_N_INSNS (4), /* mulsi */
585 COSTS_N_INSNS (4), /* mulsi_const */
586 COSTS_N_INSNS (4), /* mulsi_const9 */
587 COSTS_N_INSNS (4), /* muldi */
588 COSTS_N_INSNS (33), /* divsi */
589 COSTS_N_INSNS (33), /* divdi */
590 COSTS_N_INSNS (11), /* fp */
591 COSTS_N_INSNS (11), /* dmul */
592 COSTS_N_INSNS (11), /* sdiv */
593 COSTS_N_INSNS (11), /* ddiv */
594 32, /* cache line size */
595 4, /* l1 cache */
596 16, /* l2 cache */
597 1, /* streams */
598 0, /* SF->DF convert */
599 };
600
601 /* Instruction costs on PPC405 processors. */
602 static const
603 struct processor_costs ppc405_cost = {
604 COSTS_N_INSNS (5), /* mulsi */
605 COSTS_N_INSNS (4), /* mulsi_const */
606 COSTS_N_INSNS (3), /* mulsi_const9 */
607 COSTS_N_INSNS (5), /* muldi */
608 COSTS_N_INSNS (35), /* divsi */
609 COSTS_N_INSNS (35), /* divdi */
610 COSTS_N_INSNS (11), /* fp */
611 COSTS_N_INSNS (11), /* dmul */
612 COSTS_N_INSNS (11), /* sdiv */
613 COSTS_N_INSNS (11), /* ddiv */
614 32, /* cache line size */
615 16, /* l1 cache */
616 128, /* l2 cache */
617 1, /* streams */
618 0, /* SF->DF convert */
619 };
620
621 /* Instruction costs on PPC440 processors. */
622 static const
623 struct processor_costs ppc440_cost = {
624 COSTS_N_INSNS (3), /* mulsi */
625 COSTS_N_INSNS (2), /* mulsi_const */
626 COSTS_N_INSNS (2), /* mulsi_const9 */
627 COSTS_N_INSNS (3), /* muldi */
628 COSTS_N_INSNS (34), /* divsi */
629 COSTS_N_INSNS (34), /* divdi */
630 COSTS_N_INSNS (5), /* fp */
631 COSTS_N_INSNS (5), /* dmul */
632 COSTS_N_INSNS (19), /* sdiv */
633 COSTS_N_INSNS (33), /* ddiv */
634 32, /* cache line size */
635 32, /* l1 cache */
636 256, /* l2 cache */
637 1, /* streams */
638 0, /* SF->DF convert */
639 };
640
641 /* Instruction costs on PPC476 processors. */
642 static const
643 struct processor_costs ppc476_cost = {
644 COSTS_N_INSNS (4), /* mulsi */
645 COSTS_N_INSNS (4), /* mulsi_const */
646 COSTS_N_INSNS (4), /* mulsi_const9 */
647 COSTS_N_INSNS (4), /* muldi */
648 COSTS_N_INSNS (11), /* divsi */
649 COSTS_N_INSNS (11), /* divdi */
650 COSTS_N_INSNS (6), /* fp */
651 COSTS_N_INSNS (6), /* dmul */
652 COSTS_N_INSNS (19), /* sdiv */
653 COSTS_N_INSNS (33), /* ddiv */
654 32, /* l1 cache line size */
655 32, /* l1 cache */
656 512, /* l2 cache */
657 1, /* streams */
658 0, /* SF->DF convert */
659 };
660
661 /* Instruction costs on PPC601 processors. */
662 static const
663 struct processor_costs ppc601_cost = {
664 COSTS_N_INSNS (5), /* mulsi */
665 COSTS_N_INSNS (5), /* mulsi_const */
666 COSTS_N_INSNS (5), /* mulsi_const9 */
667 COSTS_N_INSNS (5), /* muldi */
668 COSTS_N_INSNS (36), /* divsi */
669 COSTS_N_INSNS (36), /* divdi */
670 COSTS_N_INSNS (4), /* fp */
671 COSTS_N_INSNS (5), /* dmul */
672 COSTS_N_INSNS (17), /* sdiv */
673 COSTS_N_INSNS (31), /* ddiv */
674 32, /* cache line size */
675 32, /* l1 cache */
676 256, /* l2 cache */
677 1, /* streams */
678 0, /* SF->DF convert */
679 };
680
681 /* Instruction costs on PPC603 processors. */
682 static const
683 struct processor_costs ppc603_cost = {
684 COSTS_N_INSNS (5), /* mulsi */
685 COSTS_N_INSNS (3), /* mulsi_const */
686 COSTS_N_INSNS (2), /* mulsi_const9 */
687 COSTS_N_INSNS (5), /* muldi */
688 COSTS_N_INSNS (37), /* divsi */
689 COSTS_N_INSNS (37), /* divdi */
690 COSTS_N_INSNS (3), /* fp */
691 COSTS_N_INSNS (4), /* dmul */
692 COSTS_N_INSNS (18), /* sdiv */
693 COSTS_N_INSNS (33), /* ddiv */
694 32, /* cache line size */
695 8, /* l1 cache */
696 64, /* l2 cache */
697 1, /* streams */
698 0, /* SF->DF convert */
699 };
700
701 /* Instruction costs on PPC604 processors. */
702 static const
703 struct processor_costs ppc604_cost = {
704 COSTS_N_INSNS (4), /* mulsi */
705 COSTS_N_INSNS (4), /* mulsi_const */
706 COSTS_N_INSNS (4), /* mulsi_const9 */
707 COSTS_N_INSNS (4), /* muldi */
708 COSTS_N_INSNS (20), /* divsi */
709 COSTS_N_INSNS (20), /* divdi */
710 COSTS_N_INSNS (3), /* fp */
711 COSTS_N_INSNS (3), /* dmul */
712 COSTS_N_INSNS (18), /* sdiv */
713 COSTS_N_INSNS (32), /* ddiv */
714 32, /* cache line size */
715 16, /* l1 cache */
716 512, /* l2 cache */
717 1, /* streams */
718 0, /* SF->DF convert */
719 };
720
721 /* Instruction costs on PPC604e processors. */
722 static const
723 struct processor_costs ppc604e_cost = {
724 COSTS_N_INSNS (2), /* mulsi */
725 COSTS_N_INSNS (2), /* mulsi_const */
726 COSTS_N_INSNS (2), /* mulsi_const9 */
727 COSTS_N_INSNS (2), /* muldi */
728 COSTS_N_INSNS (20), /* divsi */
729 COSTS_N_INSNS (20), /* divdi */
730 COSTS_N_INSNS (3), /* fp */
731 COSTS_N_INSNS (3), /* dmul */
732 COSTS_N_INSNS (18), /* sdiv */
733 COSTS_N_INSNS (32), /* ddiv */
734 32, /* cache line size */
735 32, /* l1 cache */
736 1024, /* l2 cache */
737 1, /* streams */
738 0, /* SF->DF convert */
739 };
740
741 /* Instruction costs on PPC620 processors. */
742 static const
743 struct processor_costs ppc620_cost = {
744 COSTS_N_INSNS (5), /* mulsi */
745 COSTS_N_INSNS (4), /* mulsi_const */
746 COSTS_N_INSNS (3), /* mulsi_const9 */
747 COSTS_N_INSNS (7), /* muldi */
748 COSTS_N_INSNS (21), /* divsi */
749 COSTS_N_INSNS (37), /* divdi */
750 COSTS_N_INSNS (3), /* fp */
751 COSTS_N_INSNS (3), /* dmul */
752 COSTS_N_INSNS (18), /* sdiv */
753 COSTS_N_INSNS (32), /* ddiv */
754 128, /* cache line size */
755 32, /* l1 cache */
756 1024, /* l2 cache */
757 1, /* streams */
758 0, /* SF->DF convert */
759 };
760
761 /* Instruction costs on PPC630 processors. */
762 static const
763 struct processor_costs ppc630_cost = {
764 COSTS_N_INSNS (5), /* mulsi */
765 COSTS_N_INSNS (4), /* mulsi_const */
766 COSTS_N_INSNS (3), /* mulsi_const9 */
767 COSTS_N_INSNS (7), /* muldi */
768 COSTS_N_INSNS (21), /* divsi */
769 COSTS_N_INSNS (37), /* divdi */
770 COSTS_N_INSNS (3), /* fp */
771 COSTS_N_INSNS (3), /* dmul */
772 COSTS_N_INSNS (17), /* sdiv */
773 COSTS_N_INSNS (21), /* ddiv */
774 128, /* cache line size */
775 64, /* l1 cache */
776 1024, /* l2 cache */
777 1, /* streams */
778 0, /* SF->DF convert */
779 };
780
781 /* Instruction costs on Cell processor. */
782 /* COSTS_N_INSNS (1) ~ one add. */
783 static const
784 struct processor_costs ppccell_cost = {
785 COSTS_N_INSNS (9/2)+2, /* mulsi */
786 COSTS_N_INSNS (6/2), /* mulsi_const */
787 COSTS_N_INSNS (6/2), /* mulsi_const9 */
788 COSTS_N_INSNS (15/2)+2, /* muldi */
789 COSTS_N_INSNS (38/2), /* divsi */
790 COSTS_N_INSNS (70/2), /* divdi */
791 COSTS_N_INSNS (10/2), /* fp */
792 COSTS_N_INSNS (10/2), /* dmul */
793 COSTS_N_INSNS (74/2), /* sdiv */
794 COSTS_N_INSNS (74/2), /* ddiv */
795 128, /* cache line size */
796 32, /* l1 cache */
797 512, /* l2 cache */
798 6, /* streams */
799 0, /* SF->DF convert */
800 };
801
802 /* Instruction costs on PPC750 and PPC7400 processors. */
803 static const
804 struct processor_costs ppc750_cost = {
805 COSTS_N_INSNS (5), /* mulsi */
806 COSTS_N_INSNS (3), /* mulsi_const */
807 COSTS_N_INSNS (2), /* mulsi_const9 */
808 COSTS_N_INSNS (5), /* muldi */
809 COSTS_N_INSNS (17), /* divsi */
810 COSTS_N_INSNS (17), /* divdi */
811 COSTS_N_INSNS (3), /* fp */
812 COSTS_N_INSNS (3), /* dmul */
813 COSTS_N_INSNS (17), /* sdiv */
814 COSTS_N_INSNS (31), /* ddiv */
815 32, /* cache line size */
816 32, /* l1 cache */
817 512, /* l2 cache */
818 1, /* streams */
819 0, /* SF->DF convert */
820 };
821
822 /* Instruction costs on PPC7450 processors. */
823 static const
824 struct processor_costs ppc7450_cost = {
825 COSTS_N_INSNS (4), /* mulsi */
826 COSTS_N_INSNS (3), /* mulsi_const */
827 COSTS_N_INSNS (3), /* mulsi_const9 */
828 COSTS_N_INSNS (4), /* muldi */
829 COSTS_N_INSNS (23), /* divsi */
830 COSTS_N_INSNS (23), /* divdi */
831 COSTS_N_INSNS (5), /* fp */
832 COSTS_N_INSNS (5), /* dmul */
833 COSTS_N_INSNS (21), /* sdiv */
834 COSTS_N_INSNS (35), /* ddiv */
835 32, /* cache line size */
836 32, /* l1 cache */
837 1024, /* l2 cache */
838 1, /* streams */
839 0, /* SF->DF convert */
840 };
841
842 /* Instruction costs on PPC8540 processors. */
843 static const
844 struct processor_costs ppc8540_cost = {
845 COSTS_N_INSNS (4), /* mulsi */
846 COSTS_N_INSNS (4), /* mulsi_const */
847 COSTS_N_INSNS (4), /* mulsi_const9 */
848 COSTS_N_INSNS (4), /* muldi */
849 COSTS_N_INSNS (19), /* divsi */
850 COSTS_N_INSNS (19), /* divdi */
851 COSTS_N_INSNS (4), /* fp */
852 COSTS_N_INSNS (4), /* dmul */
853 COSTS_N_INSNS (29), /* sdiv */
854 COSTS_N_INSNS (29), /* ddiv */
855 32, /* cache line size */
856 32, /* l1 cache */
857 256, /* l2 cache */
858 1, /* prefetch streams /*/
859 0, /* SF->DF convert */
860 };
861
862 /* Instruction costs on E300C2 and E300C3 cores. */
863 static const
864 struct processor_costs ppce300c2c3_cost = {
865 COSTS_N_INSNS (4), /* mulsi */
866 COSTS_N_INSNS (4), /* mulsi_const */
867 COSTS_N_INSNS (4), /* mulsi_const9 */
868 COSTS_N_INSNS (4), /* muldi */
869 COSTS_N_INSNS (19), /* divsi */
870 COSTS_N_INSNS (19), /* divdi */
871 COSTS_N_INSNS (3), /* fp */
872 COSTS_N_INSNS (4), /* dmul */
873 COSTS_N_INSNS (18), /* sdiv */
874 COSTS_N_INSNS (33), /* ddiv */
875 32,
876 16, /* l1 cache */
877 16, /* l2 cache */
878 1, /* prefetch streams /*/
879 0, /* SF->DF convert */
880 };
881
882 /* Instruction costs on PPCE500MC processors. */
883 static const
884 struct processor_costs ppce500mc_cost = {
885 COSTS_N_INSNS (4), /* mulsi */
886 COSTS_N_INSNS (4), /* mulsi_const */
887 COSTS_N_INSNS (4), /* mulsi_const9 */
888 COSTS_N_INSNS (4), /* muldi */
889 COSTS_N_INSNS (14), /* divsi */
890 COSTS_N_INSNS (14), /* divdi */
891 COSTS_N_INSNS (8), /* fp */
892 COSTS_N_INSNS (10), /* dmul */
893 COSTS_N_INSNS (36), /* sdiv */
894 COSTS_N_INSNS (66), /* ddiv */
895 64, /* cache line size */
896 32, /* l1 cache */
897 128, /* l2 cache */
898 1, /* prefetch streams /*/
899 0, /* SF->DF convert */
900 };
901
902 /* Instruction costs on PPCE500MC64 processors. */
903 static const
904 struct processor_costs ppce500mc64_cost = {
905 COSTS_N_INSNS (4), /* mulsi */
906 COSTS_N_INSNS (4), /* mulsi_const */
907 COSTS_N_INSNS (4), /* mulsi_const9 */
908 COSTS_N_INSNS (4), /* muldi */
909 COSTS_N_INSNS (14), /* divsi */
910 COSTS_N_INSNS (14), /* divdi */
911 COSTS_N_INSNS (4), /* fp */
912 COSTS_N_INSNS (10), /* dmul */
913 COSTS_N_INSNS (36), /* sdiv */
914 COSTS_N_INSNS (66), /* ddiv */
915 64, /* cache line size */
916 32, /* l1 cache */
917 128, /* l2 cache */
918 1, /* prefetch streams /*/
919 0, /* SF->DF convert */
920 };
921
922 /* Instruction costs on PPCE5500 processors. */
923 static const
924 struct processor_costs ppce5500_cost = {
925 COSTS_N_INSNS (5), /* mulsi */
926 COSTS_N_INSNS (5), /* mulsi_const */
927 COSTS_N_INSNS (4), /* mulsi_const9 */
928 COSTS_N_INSNS (5), /* muldi */
929 COSTS_N_INSNS (14), /* divsi */
930 COSTS_N_INSNS (14), /* divdi */
931 COSTS_N_INSNS (7), /* fp */
932 COSTS_N_INSNS (10), /* dmul */
933 COSTS_N_INSNS (36), /* sdiv */
934 COSTS_N_INSNS (66), /* ddiv */
935 64, /* cache line size */
936 32, /* l1 cache */
937 128, /* l2 cache */
938 1, /* prefetch streams /*/
939 0, /* SF->DF convert */
940 };
941
942 /* Instruction costs on PPCE6500 processors. */
943 static const
944 struct processor_costs ppce6500_cost = {
945 COSTS_N_INSNS (5), /* mulsi */
946 COSTS_N_INSNS (5), /* mulsi_const */
947 COSTS_N_INSNS (4), /* mulsi_const9 */
948 COSTS_N_INSNS (5), /* muldi */
949 COSTS_N_INSNS (14), /* divsi */
950 COSTS_N_INSNS (14), /* divdi */
951 COSTS_N_INSNS (7), /* fp */
952 COSTS_N_INSNS (10), /* dmul */
953 COSTS_N_INSNS (36), /* sdiv */
954 COSTS_N_INSNS (66), /* ddiv */
955 64, /* cache line size */
956 32, /* l1 cache */
957 128, /* l2 cache */
958 1, /* prefetch streams /*/
959 0, /* SF->DF convert */
960 };
961
962 /* Instruction costs on AppliedMicro Titan processors. */
963 static const
964 struct processor_costs titan_cost = {
965 COSTS_N_INSNS (5), /* mulsi */
966 COSTS_N_INSNS (5), /* mulsi_const */
967 COSTS_N_INSNS (5), /* mulsi_const9 */
968 COSTS_N_INSNS (5), /* muldi */
969 COSTS_N_INSNS (18), /* divsi */
970 COSTS_N_INSNS (18), /* divdi */
971 COSTS_N_INSNS (10), /* fp */
972 COSTS_N_INSNS (10), /* dmul */
973 COSTS_N_INSNS (46), /* sdiv */
974 COSTS_N_INSNS (72), /* ddiv */
975 32, /* cache line size */
976 32, /* l1 cache */
977 512, /* l2 cache */
978 1, /* prefetch streams /*/
979 0, /* SF->DF convert */
980 };
981
982 /* Instruction costs on POWER4 and POWER5 processors. */
983 static const
984 struct processor_costs power4_cost = {
985 COSTS_N_INSNS (3), /* mulsi */
986 COSTS_N_INSNS (2), /* mulsi_const */
987 COSTS_N_INSNS (2), /* mulsi_const9 */
988 COSTS_N_INSNS (4), /* muldi */
989 COSTS_N_INSNS (18), /* divsi */
990 COSTS_N_INSNS (34), /* divdi */
991 COSTS_N_INSNS (3), /* fp */
992 COSTS_N_INSNS (3), /* dmul */
993 COSTS_N_INSNS (17), /* sdiv */
994 COSTS_N_INSNS (17), /* ddiv */
995 128, /* cache line size */
996 32, /* l1 cache */
997 1024, /* l2 cache */
998 8, /* prefetch streams /*/
999 0, /* SF->DF convert */
1000 };
1001
1002 /* Instruction costs on POWER6 processors. */
1003 static const
1004 struct processor_costs power6_cost = {
1005 COSTS_N_INSNS (8), /* mulsi */
1006 COSTS_N_INSNS (8), /* mulsi_const */
1007 COSTS_N_INSNS (8), /* mulsi_const9 */
1008 COSTS_N_INSNS (8), /* muldi */
1009 COSTS_N_INSNS (22), /* divsi */
1010 COSTS_N_INSNS (28), /* divdi */
1011 COSTS_N_INSNS (3), /* fp */
1012 COSTS_N_INSNS (3), /* dmul */
1013 COSTS_N_INSNS (13), /* sdiv */
1014 COSTS_N_INSNS (16), /* ddiv */
1015 128, /* cache line size */
1016 64, /* l1 cache */
1017 2048, /* l2 cache */
1018 16, /* prefetch streams */
1019 0, /* SF->DF convert */
1020 };
1021
1022 /* Instruction costs on POWER7 processors. */
1023 static const
1024 struct processor_costs power7_cost = {
1025 COSTS_N_INSNS (2), /* mulsi */
1026 COSTS_N_INSNS (2), /* mulsi_const */
1027 COSTS_N_INSNS (2), /* mulsi_const9 */
1028 COSTS_N_INSNS (2), /* muldi */
1029 COSTS_N_INSNS (18), /* divsi */
1030 COSTS_N_INSNS (34), /* divdi */
1031 COSTS_N_INSNS (3), /* fp */
1032 COSTS_N_INSNS (3), /* dmul */
1033 COSTS_N_INSNS (13), /* sdiv */
1034 COSTS_N_INSNS (16), /* ddiv */
1035 128, /* cache line size */
1036 32, /* l1 cache */
1037 256, /* l2 cache */
1038 12, /* prefetch streams */
1039 COSTS_N_INSNS (3), /* SF->DF convert */
1040 };
1041
1042 /* Instruction costs on POWER8 processors. */
1043 static const
1044 struct processor_costs power8_cost = {
1045 COSTS_N_INSNS (3), /* mulsi */
1046 COSTS_N_INSNS (3), /* mulsi_const */
1047 COSTS_N_INSNS (3), /* mulsi_const9 */
1048 COSTS_N_INSNS (3), /* muldi */
1049 COSTS_N_INSNS (19), /* divsi */
1050 COSTS_N_INSNS (35), /* divdi */
1051 COSTS_N_INSNS (3), /* fp */
1052 COSTS_N_INSNS (3), /* dmul */
1053 COSTS_N_INSNS (14), /* sdiv */
1054 COSTS_N_INSNS (17), /* ddiv */
1055 128, /* cache line size */
1056 32, /* l1 cache */
1057 256, /* l2 cache */
1058 12, /* prefetch streams */
1059 COSTS_N_INSNS (3), /* SF->DF convert */
1060 };
1061
1062 /* Instruction costs on POWER9 processors. */
1063 static const
1064 struct processor_costs power9_cost = {
1065 COSTS_N_INSNS (3), /* mulsi */
1066 COSTS_N_INSNS (3), /* mulsi_const */
1067 COSTS_N_INSNS (3), /* mulsi_const9 */
1068 COSTS_N_INSNS (3), /* muldi */
1069 COSTS_N_INSNS (8), /* divsi */
1070 COSTS_N_INSNS (12), /* divdi */
1071 COSTS_N_INSNS (3), /* fp */
1072 COSTS_N_INSNS (3), /* dmul */
1073 COSTS_N_INSNS (13), /* sdiv */
1074 COSTS_N_INSNS (18), /* ddiv */
1075 128, /* cache line size */
1076 32, /* l1 cache */
1077 512, /* l2 cache */
1078 8, /* prefetch streams */
1079 COSTS_N_INSNS (3), /* SF->DF convert */
1080 };
1081
1082 /* Instruction costs on POWER A2 processors. */
1083 static const
1084 struct processor_costs ppca2_cost = {
1085 COSTS_N_INSNS (16), /* mulsi */
1086 COSTS_N_INSNS (16), /* mulsi_const */
1087 COSTS_N_INSNS (16), /* mulsi_const9 */
1088 COSTS_N_INSNS (16), /* muldi */
1089 COSTS_N_INSNS (22), /* divsi */
1090 COSTS_N_INSNS (28), /* divdi */
1091 COSTS_N_INSNS (3), /* fp */
1092 COSTS_N_INSNS (3), /* dmul */
1093 COSTS_N_INSNS (59), /* sdiv */
1094 COSTS_N_INSNS (72), /* ddiv */
1095 64,
1096 16, /* l1 cache */
1097 2048, /* l2 cache */
1098 16, /* prefetch streams */
1099 0, /* SF->DF convert */
1100 };
1101
1102 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1103 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1104
1105 \f
1106 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1107 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1108 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1109 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1110 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1111 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1112 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1113 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1114 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1115 bool);
1116 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1117 unsigned int);
1118 static bool is_microcoded_insn (rtx_insn *);
1119 static bool is_nonpipeline_insn (rtx_insn *);
1120 static bool is_cracked_insn (rtx_insn *);
1121 static bool is_load_insn (rtx, rtx *);
1122 static bool is_store_insn (rtx, rtx *);
1123 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1124 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1125 static bool insn_must_be_first_in_group (rtx_insn *);
1126 static bool insn_must_be_last_in_group (rtx_insn *);
1127 int easy_vector_constant (rtx, machine_mode);
1128 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1129 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1130 #if TARGET_MACHO
1131 static tree get_prev_label (tree);
1132 #endif
1133 static bool rs6000_mode_dependent_address (const_rtx);
1134 static bool rs6000_debug_mode_dependent_address (const_rtx);
1135 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1136 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1137 machine_mode, rtx);
1138 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1139 machine_mode,
1140 rtx);
1141 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1142 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1143 enum reg_class);
1144 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1145 reg_class_t,
1146 reg_class_t);
1147 static bool rs6000_debug_can_change_mode_class (machine_mode,
1148 machine_mode,
1149 reg_class_t);
1150
1151 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1152 = rs6000_mode_dependent_address;
1153
1154 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1155 machine_mode, rtx)
1156 = rs6000_secondary_reload_class;
1157
1158 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1159 = rs6000_preferred_reload_class;
1160
1161 const int INSN_NOT_AVAILABLE = -1;
1162
1163 static void rs6000_print_isa_options (FILE *, int, const char *,
1164 HOST_WIDE_INT);
1165 static void rs6000_print_builtin_options (FILE *, int, const char *,
1166 HOST_WIDE_INT);
1167 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1168
1169 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1170 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1171 enum rs6000_reg_type,
1172 machine_mode,
1173 secondary_reload_info *,
1174 bool);
1175 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1176
1177 /* Hash table stuff for keeping track of TOC entries. */
1178
1179 struct GTY((for_user)) toc_hash_struct
1180 {
1181 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1182 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1183 rtx key;
1184 machine_mode key_mode;
1185 int labelno;
1186 };
1187
1188 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1189 {
1190 static hashval_t hash (toc_hash_struct *);
1191 static bool equal (toc_hash_struct *, toc_hash_struct *);
1192 };
1193
1194 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1195
1196
1197 \f
1198 /* Default register names. */
1199 char rs6000_reg_names[][8] =
1200 {
1201 /* GPRs */
1202 "0", "1", "2", "3", "4", "5", "6", "7",
1203 "8", "9", "10", "11", "12", "13", "14", "15",
1204 "16", "17", "18", "19", "20", "21", "22", "23",
1205 "24", "25", "26", "27", "28", "29", "30", "31",
1206 /* FPRs */
1207 "0", "1", "2", "3", "4", "5", "6", "7",
1208 "8", "9", "10", "11", "12", "13", "14", "15",
1209 "16", "17", "18", "19", "20", "21", "22", "23",
1210 "24", "25", "26", "27", "28", "29", "30", "31",
1211 /* VRs */
1212 "0", "1", "2", "3", "4", "5", "6", "7",
1213 "8", "9", "10", "11", "12", "13", "14", "15",
1214 "16", "17", "18", "19", "20", "21", "22", "23",
1215 "24", "25", "26", "27", "28", "29", "30", "31",
1216 /* lr ctr ca ap */
1217 "lr", "ctr", "ca", "ap",
1218 /* cr0..cr7 */
1219 "0", "1", "2", "3", "4", "5", "6", "7",
1220 /* vrsave vscr sfp */
1221 "vrsave", "vscr", "sfp",
1222 };
1223
1224 #ifdef TARGET_REGNAMES
1225 static const char alt_reg_names[][8] =
1226 {
1227 /* GPRs */
1228 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1229 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1230 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1231 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1232 /* FPRs */
1233 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1234 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1235 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1236 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1237 /* VRs */
1238 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1239 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1240 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1241 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1242 /* lr ctr ca ap */
1243 "lr", "ctr", "ca", "ap",
1244 /* cr0..cr7 */
1245 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1246 /* vrsave vscr sfp */
1247 "vrsave", "vscr", "sfp",
1248 };
1249 #endif
1250
1251 /* Table of valid machine attributes. */
1252
1253 static const struct attribute_spec rs6000_attribute_table[] =
1254 {
1255 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1256 affects_type_identity, handler, exclude } */
1257 { "altivec", 1, 1, false, true, false, false,
1258 rs6000_handle_altivec_attribute, NULL },
1259 { "longcall", 0, 0, false, true, true, false,
1260 rs6000_handle_longcall_attribute, NULL },
1261 { "shortcall", 0, 0, false, true, true, false,
1262 rs6000_handle_longcall_attribute, NULL },
1263 { "ms_struct", 0, 0, false, false, false, false,
1264 rs6000_handle_struct_attribute, NULL },
1265 { "gcc_struct", 0, 0, false, false, false, false,
1266 rs6000_handle_struct_attribute, NULL },
1267 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1268 SUBTARGET_ATTRIBUTE_TABLE,
1269 #endif
1270 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1271 };
1272 \f
1273 #ifndef TARGET_PROFILE_KERNEL
1274 #define TARGET_PROFILE_KERNEL 0
1275 #endif
1276 \f
1277 /* Initialize the GCC target structure. */
1278 #undef TARGET_ATTRIBUTE_TABLE
1279 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1280 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1281 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1282 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1283 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1284
1285 #undef TARGET_ASM_ALIGNED_DI_OP
1286 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1287
1288 /* Default unaligned ops are only provided for ELF. Find the ops needed
1289 for non-ELF systems. */
1290 #ifndef OBJECT_FORMAT_ELF
1291 #if TARGET_XCOFF
1292 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1293 64-bit targets. */
1294 #undef TARGET_ASM_UNALIGNED_HI_OP
1295 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1296 #undef TARGET_ASM_UNALIGNED_SI_OP
1297 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1298 #undef TARGET_ASM_UNALIGNED_DI_OP
1299 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1300 #else
1301 /* For Darwin. */
1302 #undef TARGET_ASM_UNALIGNED_HI_OP
1303 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1304 #undef TARGET_ASM_UNALIGNED_SI_OP
1305 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1306 #undef TARGET_ASM_UNALIGNED_DI_OP
1307 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1308 #undef TARGET_ASM_ALIGNED_DI_OP
1309 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1310 #endif
1311 #endif
1312
1313 /* This hook deals with fixups for relocatable code and DI-mode objects
1314 in 64-bit code. */
1315 #undef TARGET_ASM_INTEGER
1316 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1317
1318 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1319 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1320 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1321 #endif
1322
1323 #undef TARGET_SET_UP_BY_PROLOGUE
1324 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1325
1326 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1327 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1328 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1329 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1330 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1331 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1332 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1333 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1334 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1335 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1336 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1337 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1338
1339 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1340 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1341
1342 #undef TARGET_INTERNAL_ARG_POINTER
1343 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1344
1345 #undef TARGET_HAVE_TLS
1346 #define TARGET_HAVE_TLS HAVE_AS_TLS
1347
1348 #undef TARGET_CANNOT_FORCE_CONST_MEM
1349 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1350
1351 #undef TARGET_DELEGITIMIZE_ADDRESS
1352 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1353
1354 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1355 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1356
1357 #undef TARGET_LEGITIMATE_COMBINED_INSN
1358 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1359
1360 #undef TARGET_ASM_FUNCTION_PROLOGUE
1361 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1362 #undef TARGET_ASM_FUNCTION_EPILOGUE
1363 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1364
1365 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1366 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1367
1368 #undef TARGET_LEGITIMIZE_ADDRESS
1369 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1370
1371 #undef TARGET_SCHED_VARIABLE_ISSUE
1372 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1373
1374 #undef TARGET_SCHED_ISSUE_RATE
1375 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1376 #undef TARGET_SCHED_ADJUST_COST
1377 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1378 #undef TARGET_SCHED_ADJUST_PRIORITY
1379 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1380 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1381 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1382 #undef TARGET_SCHED_INIT
1383 #define TARGET_SCHED_INIT rs6000_sched_init
1384 #undef TARGET_SCHED_FINISH
1385 #define TARGET_SCHED_FINISH rs6000_sched_finish
1386 #undef TARGET_SCHED_REORDER
1387 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1388 #undef TARGET_SCHED_REORDER2
1389 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1390
1391 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1392 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1393
1394 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1395 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1396
1397 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1398 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1399 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1400 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1401 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1402 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1403 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1404 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1405
1406 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1407 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1408
1409 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1410 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1411 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1412 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1413 rs6000_builtin_support_vector_misalignment
1414 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1415 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1416 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1417 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1418 rs6000_builtin_vectorization_cost
1419 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1420 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1421 rs6000_preferred_simd_mode
1422 #undef TARGET_VECTORIZE_INIT_COST
1423 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1424 #undef TARGET_VECTORIZE_ADD_STMT_COST
1425 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1426 #undef TARGET_VECTORIZE_FINISH_COST
1427 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1428 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1429 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1430
1431 #undef TARGET_LOOP_UNROLL_ADJUST
1432 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1433
1434 #undef TARGET_INIT_BUILTINS
1435 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1436 #undef TARGET_BUILTIN_DECL
1437 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1438
1439 #undef TARGET_FOLD_BUILTIN
1440 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1441 #undef TARGET_GIMPLE_FOLD_BUILTIN
1442 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1443
1444 #undef TARGET_EXPAND_BUILTIN
1445 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1446
1447 #undef TARGET_MANGLE_TYPE
1448 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1449
1450 #undef TARGET_INIT_LIBFUNCS
1451 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1452
1453 #if TARGET_MACHO
1454 #undef TARGET_BINDS_LOCAL_P
1455 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1456 #endif
1457
1458 #undef TARGET_MS_BITFIELD_LAYOUT_P
1459 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1460
1461 #undef TARGET_ASM_OUTPUT_MI_THUNK
1462 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1463
1464 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1465 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1466
1467 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1468 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1469
1470 #undef TARGET_REGISTER_MOVE_COST
1471 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1472 #undef TARGET_MEMORY_MOVE_COST
1473 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1474 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1475 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1476 rs6000_ira_change_pseudo_allocno_class
1477 #undef TARGET_CANNOT_COPY_INSN_P
1478 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1479 #undef TARGET_RTX_COSTS
1480 #define TARGET_RTX_COSTS rs6000_rtx_costs
1481 #undef TARGET_ADDRESS_COST
1482 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1483 #undef TARGET_INSN_COST
1484 #define TARGET_INSN_COST rs6000_insn_cost
1485
1486 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1487 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1488
1489 #undef TARGET_PROMOTE_FUNCTION_MODE
1490 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1491
1492 #undef TARGET_RETURN_IN_MEMORY
1493 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1494
1495 #undef TARGET_RETURN_IN_MSB
1496 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1497
1498 #undef TARGET_SETUP_INCOMING_VARARGS
1499 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1500
1501 /* Always strict argument naming on rs6000. */
1502 #undef TARGET_STRICT_ARGUMENT_NAMING
1503 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1504 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1505 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1506 #undef TARGET_SPLIT_COMPLEX_ARG
1507 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1508 #undef TARGET_MUST_PASS_IN_STACK
1509 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1510 #undef TARGET_PASS_BY_REFERENCE
1511 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1512 #undef TARGET_ARG_PARTIAL_BYTES
1513 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1514 #undef TARGET_FUNCTION_ARG_ADVANCE
1515 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1516 #undef TARGET_FUNCTION_ARG
1517 #define TARGET_FUNCTION_ARG rs6000_function_arg
1518 #undef TARGET_FUNCTION_ARG_PADDING
1519 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1520 #undef TARGET_FUNCTION_ARG_BOUNDARY
1521 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1522
1523 #undef TARGET_BUILD_BUILTIN_VA_LIST
1524 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1525
1526 #undef TARGET_EXPAND_BUILTIN_VA_START
1527 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1528
1529 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1530 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1531
1532 #undef TARGET_EH_RETURN_FILTER_MODE
1533 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1534
1535 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1536 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1537
1538 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1539 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1540
1541 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1542 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1543
1544 #undef TARGET_FLOATN_MODE
1545 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1546
1547 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1548 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1549
1550 #undef TARGET_MD_ASM_ADJUST
1551 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1552
1553 #undef TARGET_OPTION_OVERRIDE
1554 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1555
1556 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1557 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1558 rs6000_builtin_vectorized_function
1559
1560 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1561 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1562 rs6000_builtin_md_vectorized_function
1563
1564 #undef TARGET_STACK_PROTECT_GUARD
1565 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1566
1567 #if !TARGET_MACHO
1568 #undef TARGET_STACK_PROTECT_FAIL
1569 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1570 #endif
1571
1572 #ifdef HAVE_AS_TLS
1573 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1574 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1575 #endif
1576
1577 /* Use a 32-bit anchor range. This leads to sequences like:
1578
1579 addis tmp,anchor,high
1580 add dest,tmp,low
1581
1582 where tmp itself acts as an anchor, and can be shared between
1583 accesses to the same 64k page. */
1584 #undef TARGET_MIN_ANCHOR_OFFSET
1585 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1586 #undef TARGET_MAX_ANCHOR_OFFSET
1587 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1588 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1589 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1590 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1591 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1592
1593 #undef TARGET_BUILTIN_RECIPROCAL
1594 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1595
1596 #undef TARGET_SECONDARY_RELOAD
1597 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1598 #undef TARGET_SECONDARY_MEMORY_NEEDED
1599 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1600 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1601 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1602
1603 #undef TARGET_LEGITIMATE_ADDRESS_P
1604 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1605
1606 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1607 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1608
1609 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1610 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1611
1612 #undef TARGET_CAN_ELIMINATE
1613 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1614
1615 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1616 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1617
1618 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1619 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1620
1621 #undef TARGET_TRAMPOLINE_INIT
1622 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1623
1624 #undef TARGET_FUNCTION_VALUE
1625 #define TARGET_FUNCTION_VALUE rs6000_function_value
1626
1627 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1628 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1629
1630 #undef TARGET_OPTION_SAVE
1631 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1632
1633 #undef TARGET_OPTION_RESTORE
1634 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1635
1636 #undef TARGET_OPTION_PRINT
1637 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1638
1639 #undef TARGET_CAN_INLINE_P
1640 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1641
1642 #undef TARGET_SET_CURRENT_FUNCTION
1643 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1644
1645 #undef TARGET_LEGITIMATE_CONSTANT_P
1646 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1647
1648 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1649 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1650
1651 #undef TARGET_CAN_USE_DOLOOP_P
1652 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1653
1654 #undef TARGET_PREDICT_DOLOOP_P
1655 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1656
1657 #undef TARGET_HAVE_COUNT_REG_DECR_P
1658 #define TARGET_HAVE_COUNT_REG_DECR_P true
1659
1660 /* 1000000000 is infinite cost in IVOPTs. */
1661 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1662 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1663
1664 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1665 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1666
1667 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1668 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1669
1670 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1671 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1672 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1673 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1674 #undef TARGET_UNWIND_WORD_MODE
1675 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1676
1677 #undef TARGET_OFFLOAD_OPTIONS
1678 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1679
1680 #undef TARGET_C_MODE_FOR_SUFFIX
1681 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1682
1683 #undef TARGET_INVALID_BINARY_OP
1684 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1685
1686 #undef TARGET_OPTAB_SUPPORTED_P
1687 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1688
1689 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1690 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1691
1692 #undef TARGET_COMPARE_VERSION_PRIORITY
1693 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1694
1695 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1696 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1697 rs6000_generate_version_dispatcher_body
1698
1699 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1700 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1701 rs6000_get_function_versions_dispatcher
1702
1703 #undef TARGET_OPTION_FUNCTION_VERSIONS
1704 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1705
1706 #undef TARGET_HARD_REGNO_NREGS
1707 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1708 #undef TARGET_HARD_REGNO_MODE_OK
1709 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1710
1711 #undef TARGET_MODES_TIEABLE_P
1712 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1713
1714 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1715 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1716 rs6000_hard_regno_call_part_clobbered
1717
1718 #undef TARGET_SLOW_UNALIGNED_ACCESS
1719 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1720
1721 #undef TARGET_CAN_CHANGE_MODE_CLASS
1722 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1723
1724 #undef TARGET_CONSTANT_ALIGNMENT
1725 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1726
1727 #undef TARGET_STARTING_FRAME_OFFSET
1728 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1729
1730 #if TARGET_ELF && RS6000_WEAK
1731 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
1732 #define TARGET_ASM_GLOBALIZE_DECL_NAME rs6000_globalize_decl_name
1733 #endif
1734
1735 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1736 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1737
1738 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1739 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1740 \f
1741
1742 /* Processor table. */
1743 struct rs6000_ptt
1744 {
1745 const char *const name; /* Canonical processor name. */
1746 const enum processor_type processor; /* Processor type enum value. */
1747 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1748 };
1749
1750 static struct rs6000_ptt const processor_target_table[] =
1751 {
1752 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1753 #include "rs6000-cpus.def"
1754 #undef RS6000_CPU
1755 };
1756
1757 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1758 name is invalid. */
1759
1760 static int
1761 rs6000_cpu_name_lookup (const char *name)
1762 {
1763 size_t i;
1764
1765 if (name != NULL)
1766 {
1767 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1768 if (! strcmp (name, processor_target_table[i].name))
1769 return (int)i;
1770 }
1771
1772 return -1;
1773 }
1774
1775 \f
1776 /* Return number of consecutive hard regs needed starting at reg REGNO
1777 to hold something of mode MODE.
1778 This is ordinarily the length in words of a value of mode MODE
1779 but can be less for certain modes in special long registers.
1780
1781 POWER and PowerPC GPRs hold 32 bits worth;
1782 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1783
1784 static int
1785 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1786 {
1787 unsigned HOST_WIDE_INT reg_size;
1788
1789 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1790 128-bit floating point that can go in vector registers, which has VSX
1791 memory addressing. */
1792 if (FP_REGNO_P (regno))
1793 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
1794 ? UNITS_PER_VSX_WORD
1795 : UNITS_PER_FP_WORD);
1796
1797 else if (ALTIVEC_REGNO_P (regno))
1798 reg_size = UNITS_PER_ALTIVEC_WORD;
1799
1800 else
1801 reg_size = UNITS_PER_WORD;
1802
1803 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1804 }
1805
1806 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1807 MODE. */
1808 static int
1809 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
1810 {
1811 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1812
1813 if (COMPLEX_MODE_P (mode))
1814 mode = GET_MODE_INNER (mode);
1815
1816 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1817 register combinations, and use PTImode where we need to deal with quad
1818 word memory operations. Don't allow quad words in the argument or frame
1819 pointer registers, just registers 0..31. */
1820 if (mode == PTImode)
1821 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1822 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1823 && ((regno & 1) == 0));
1824
1825 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1826 implementations. Don't allow an item to be split between a FP register
1827 and an Altivec register. Allow TImode in all VSX registers if the user
1828 asked for it. */
1829 if (TARGET_VSX && VSX_REGNO_P (regno)
1830 && (VECTOR_MEM_VSX_P (mode)
1831 || FLOAT128_VECTOR_P (mode)
1832 || reg_addr[mode].scalar_in_vmx_p
1833 || mode == TImode
1834 || (TARGET_VADDUQM && mode == V1TImode)))
1835 {
1836 if (FP_REGNO_P (regno))
1837 return FP_REGNO_P (last_regno);
1838
1839 if (ALTIVEC_REGNO_P (regno))
1840 {
1841 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
1842 return 0;
1843
1844 return ALTIVEC_REGNO_P (last_regno);
1845 }
1846 }
1847
1848 /* The GPRs can hold any mode, but values bigger than one register
1849 cannot go past R31. */
1850 if (INT_REGNO_P (regno))
1851 return INT_REGNO_P (last_regno);
1852
1853 /* The float registers (except for VSX vector modes) can only hold floating
1854 modes and DImode. */
1855 if (FP_REGNO_P (regno))
1856 {
1857 if (FLOAT128_VECTOR_P (mode))
1858 return false;
1859
1860 if (SCALAR_FLOAT_MODE_P (mode)
1861 && (mode != TDmode || (regno % 2) == 0)
1862 && FP_REGNO_P (last_regno))
1863 return 1;
1864
1865 if (GET_MODE_CLASS (mode) == MODE_INT)
1866 {
1867 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1868 return 1;
1869
1870 if (TARGET_P8_VECTOR && (mode == SImode))
1871 return 1;
1872
1873 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
1874 return 1;
1875 }
1876
1877 return 0;
1878 }
1879
1880 /* The CR register can only hold CC modes. */
1881 if (CR_REGNO_P (regno))
1882 return GET_MODE_CLASS (mode) == MODE_CC;
1883
1884 if (CA_REGNO_P (regno))
1885 return mode == Pmode || mode == SImode;
1886
1887 /* AltiVec only in AldyVec registers. */
1888 if (ALTIVEC_REGNO_P (regno))
1889 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1890 || mode == V1TImode);
1891
1892 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1893 and it must be able to fit within the register set. */
1894
1895 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1896 }
1897
1898 /* Implement TARGET_HARD_REGNO_NREGS. */
1899
1900 static unsigned int
1901 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
1902 {
1903 return rs6000_hard_regno_nregs[mode][regno];
1904 }
1905
1906 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1907
1908 static bool
1909 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
1910 {
1911 return rs6000_hard_regno_mode_ok_p[mode][regno];
1912 }
1913
1914 /* Implement TARGET_MODES_TIEABLE_P.
1915
1916 PTImode cannot tie with other modes because PTImode is restricted to even
1917 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1918 57744).
1919
1920 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1921 128-bit floating point on VSX systems ties with other vectors. */
1922
1923 static bool
1924 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
1925 {
1926 if (mode1 == PTImode)
1927 return mode2 == PTImode;
1928 if (mode2 == PTImode)
1929 return false;
1930
1931 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
1932 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
1933 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
1934 return false;
1935
1936 if (SCALAR_FLOAT_MODE_P (mode1))
1937 return SCALAR_FLOAT_MODE_P (mode2);
1938 if (SCALAR_FLOAT_MODE_P (mode2))
1939 return false;
1940
1941 if (GET_MODE_CLASS (mode1) == MODE_CC)
1942 return GET_MODE_CLASS (mode2) == MODE_CC;
1943 if (GET_MODE_CLASS (mode2) == MODE_CC)
1944 return false;
1945
1946 return true;
1947 }
1948
1949 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
1950
1951 static bool
1952 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
1953 machine_mode mode)
1954 {
1955 if (TARGET_32BIT
1956 && TARGET_POWERPC64
1957 && GET_MODE_SIZE (mode) > 4
1958 && INT_REGNO_P (regno))
1959 return true;
1960
1961 if (TARGET_VSX
1962 && FP_REGNO_P (regno)
1963 && GET_MODE_SIZE (mode) > 8
1964 && !FLOAT128_2REG_P (mode))
1965 return true;
1966
1967 return false;
1968 }
1969
1970 /* Print interesting facts about registers. */
1971 static void
1972 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
1973 {
1974 int r, m;
1975
1976 for (r = first_regno; r <= last_regno; ++r)
1977 {
1978 const char *comma = "";
1979 int len;
1980
1981 if (first_regno == last_regno)
1982 fprintf (stderr, "%s:\t", reg_name);
1983 else
1984 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
1985
1986 len = 8;
1987 for (m = 0; m < NUM_MACHINE_MODES; ++m)
1988 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
1989 {
1990 if (len > 70)
1991 {
1992 fprintf (stderr, ",\n\t");
1993 len = 8;
1994 comma = "";
1995 }
1996
1997 if (rs6000_hard_regno_nregs[m][r] > 1)
1998 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
1999 rs6000_hard_regno_nregs[m][r]);
2000 else
2001 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2002
2003 comma = ", ";
2004 }
2005
2006 if (call_used_or_fixed_reg_p (r))
2007 {
2008 if (len > 70)
2009 {
2010 fprintf (stderr, ",\n\t");
2011 len = 8;
2012 comma = "";
2013 }
2014
2015 len += fprintf (stderr, "%s%s", comma, "call-used");
2016 comma = ", ";
2017 }
2018
2019 if (fixed_regs[r])
2020 {
2021 if (len > 70)
2022 {
2023 fprintf (stderr, ",\n\t");
2024 len = 8;
2025 comma = "";
2026 }
2027
2028 len += fprintf (stderr, "%s%s", comma, "fixed");
2029 comma = ", ";
2030 }
2031
2032 if (len > 70)
2033 {
2034 fprintf (stderr, ",\n\t");
2035 comma = "";
2036 }
2037
2038 len += fprintf (stderr, "%sreg-class = %s", comma,
2039 reg_class_names[(int)rs6000_regno_regclass[r]]);
2040 comma = ", ";
2041
2042 if (len > 70)
2043 {
2044 fprintf (stderr, ",\n\t");
2045 comma = "";
2046 }
2047
2048 fprintf (stderr, "%sregno = %d\n", comma, r);
2049 }
2050 }
2051
2052 static const char *
2053 rs6000_debug_vector_unit (enum rs6000_vector v)
2054 {
2055 const char *ret;
2056
2057 switch (v)
2058 {
2059 case VECTOR_NONE: ret = "none"; break;
2060 case VECTOR_ALTIVEC: ret = "altivec"; break;
2061 case VECTOR_VSX: ret = "vsx"; break;
2062 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2063 default: ret = "unknown"; break;
2064 }
2065
2066 return ret;
2067 }
2068
2069 /* Inner function printing just the address mask for a particular reload
2070 register class. */
2071 DEBUG_FUNCTION char *
2072 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2073 {
2074 static char ret[8];
2075 char *p = ret;
2076
2077 if ((mask & RELOAD_REG_VALID) != 0)
2078 *p++ = 'v';
2079 else if (keep_spaces)
2080 *p++ = ' ';
2081
2082 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2083 *p++ = 'm';
2084 else if (keep_spaces)
2085 *p++ = ' ';
2086
2087 if ((mask & RELOAD_REG_INDEXED) != 0)
2088 *p++ = 'i';
2089 else if (keep_spaces)
2090 *p++ = ' ';
2091
2092 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2093 *p++ = 'O';
2094 else if ((mask & RELOAD_REG_OFFSET) != 0)
2095 *p++ = 'o';
2096 else if (keep_spaces)
2097 *p++ = ' ';
2098
2099 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2100 *p++ = '+';
2101 else if (keep_spaces)
2102 *p++ = ' ';
2103
2104 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2105 *p++ = '+';
2106 else if (keep_spaces)
2107 *p++ = ' ';
2108
2109 if ((mask & RELOAD_REG_AND_M16) != 0)
2110 *p++ = '&';
2111 else if (keep_spaces)
2112 *p++ = ' ';
2113
2114 *p = '\0';
2115
2116 return ret;
2117 }
2118
2119 /* Print the address masks in a human readble fashion. */
2120 DEBUG_FUNCTION void
2121 rs6000_debug_print_mode (ssize_t m)
2122 {
2123 ssize_t rc;
2124 int spaces = 0;
2125
2126 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2127 for (rc = 0; rc < N_RELOAD_REG; rc++)
2128 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2129 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2130
2131 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2132 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2133 {
2134 fprintf (stderr, "%*s Reload=%c%c", spaces, "",
2135 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2136 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2137 spaces = 0;
2138 }
2139 else
2140 spaces += sizeof (" Reload=sl") - 1;
2141
2142 if (reg_addr[m].scalar_in_vmx_p)
2143 {
2144 fprintf (stderr, "%*s Upper=y", spaces, "");
2145 spaces = 0;
2146 }
2147 else
2148 spaces += sizeof (" Upper=y") - 1;
2149
2150 if (rs6000_vector_unit[m] != VECTOR_NONE
2151 || rs6000_vector_mem[m] != VECTOR_NONE)
2152 {
2153 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2154 spaces, "",
2155 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2156 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2157 }
2158
2159 fputs ("\n", stderr);
2160 }
2161
2162 #define DEBUG_FMT_ID "%-32s= "
2163 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2164 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2165 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2166
2167 /* Print various interesting information with -mdebug=reg. */
2168 static void
2169 rs6000_debug_reg_global (void)
2170 {
2171 static const char *const tf[2] = { "false", "true" };
2172 const char *nl = (const char *)0;
2173 int m;
2174 size_t m1, m2, v;
2175 char costly_num[20];
2176 char nop_num[20];
2177 char flags_buffer[40];
2178 const char *costly_str;
2179 const char *nop_str;
2180 const char *trace_str;
2181 const char *abi_str;
2182 const char *cmodel_str;
2183 struct cl_target_option cl_opts;
2184
2185 /* Modes we want tieable information on. */
2186 static const machine_mode print_tieable_modes[] = {
2187 QImode,
2188 HImode,
2189 SImode,
2190 DImode,
2191 TImode,
2192 PTImode,
2193 SFmode,
2194 DFmode,
2195 TFmode,
2196 IFmode,
2197 KFmode,
2198 SDmode,
2199 DDmode,
2200 TDmode,
2201 V16QImode,
2202 V8HImode,
2203 V4SImode,
2204 V2DImode,
2205 V1TImode,
2206 V32QImode,
2207 V16HImode,
2208 V8SImode,
2209 V4DImode,
2210 V2TImode,
2211 V4SFmode,
2212 V2DFmode,
2213 V8SFmode,
2214 V4DFmode,
2215 CCmode,
2216 CCUNSmode,
2217 CCEQmode,
2218 };
2219
2220 /* Virtual regs we are interested in. */
2221 const static struct {
2222 int regno; /* register number. */
2223 const char *name; /* register name. */
2224 } virtual_regs[] = {
2225 { STACK_POINTER_REGNUM, "stack pointer:" },
2226 { TOC_REGNUM, "toc: " },
2227 { STATIC_CHAIN_REGNUM, "static chain: " },
2228 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2229 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2230 { ARG_POINTER_REGNUM, "arg pointer: " },
2231 { FRAME_POINTER_REGNUM, "frame pointer:" },
2232 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2233 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2234 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2235 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2236 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2237 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2238 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2239 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2240 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2241 };
2242
2243 fputs ("\nHard register information:\n", stderr);
2244 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2245 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2246 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2247 LAST_ALTIVEC_REGNO,
2248 "vs");
2249 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2250 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2251 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2252 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2253 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2254 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2255
2256 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2257 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2258 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2259
2260 fprintf (stderr,
2261 "\n"
2262 "d reg_class = %s\n"
2263 "f reg_class = %s\n"
2264 "v reg_class = %s\n"
2265 "wa reg_class = %s\n"
2266 "we reg_class = %s\n"
2267 "wr reg_class = %s\n"
2268 "wx reg_class = %s\n"
2269 "wA reg_class = %s\n"
2270 "\n",
2271 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2272 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2273 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2274 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2275 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2276 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2277 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2278 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
2279
2280 nl = "\n";
2281 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2282 rs6000_debug_print_mode (m);
2283
2284 fputs ("\n", stderr);
2285
2286 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2287 {
2288 machine_mode mode1 = print_tieable_modes[m1];
2289 bool first_time = true;
2290
2291 nl = (const char *)0;
2292 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2293 {
2294 machine_mode mode2 = print_tieable_modes[m2];
2295 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2296 {
2297 if (first_time)
2298 {
2299 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2300 nl = "\n";
2301 first_time = false;
2302 }
2303
2304 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2305 }
2306 }
2307
2308 if (!first_time)
2309 fputs ("\n", stderr);
2310 }
2311
2312 if (nl)
2313 fputs (nl, stderr);
2314
2315 if (rs6000_recip_control)
2316 {
2317 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2318
2319 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2320 if (rs6000_recip_bits[m])
2321 {
2322 fprintf (stderr,
2323 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2324 GET_MODE_NAME (m),
2325 (RS6000_RECIP_AUTO_RE_P (m)
2326 ? "auto"
2327 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2328 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2329 ? "auto"
2330 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2331 }
2332
2333 fputs ("\n", stderr);
2334 }
2335
2336 if (rs6000_cpu_index >= 0)
2337 {
2338 const char *name = processor_target_table[rs6000_cpu_index].name;
2339 HOST_WIDE_INT flags
2340 = processor_target_table[rs6000_cpu_index].target_enable;
2341
2342 sprintf (flags_buffer, "-mcpu=%s flags", name);
2343 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2344 }
2345 else
2346 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2347
2348 if (rs6000_tune_index >= 0)
2349 {
2350 const char *name = processor_target_table[rs6000_tune_index].name;
2351 HOST_WIDE_INT flags
2352 = processor_target_table[rs6000_tune_index].target_enable;
2353
2354 sprintf (flags_buffer, "-mtune=%s flags", name);
2355 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2356 }
2357 else
2358 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2359
2360 cl_target_option_save (&cl_opts, &global_options);
2361 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2362 rs6000_isa_flags);
2363
2364 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2365 rs6000_isa_flags_explicit);
2366
2367 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2368 rs6000_builtin_mask);
2369
2370 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2371
2372 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2373 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2374
2375 switch (rs6000_sched_costly_dep)
2376 {
2377 case max_dep_latency:
2378 costly_str = "max_dep_latency";
2379 break;
2380
2381 case no_dep_costly:
2382 costly_str = "no_dep_costly";
2383 break;
2384
2385 case all_deps_costly:
2386 costly_str = "all_deps_costly";
2387 break;
2388
2389 case true_store_to_load_dep_costly:
2390 costly_str = "true_store_to_load_dep_costly";
2391 break;
2392
2393 case store_to_load_dep_costly:
2394 costly_str = "store_to_load_dep_costly";
2395 break;
2396
2397 default:
2398 costly_str = costly_num;
2399 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2400 break;
2401 }
2402
2403 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2404
2405 switch (rs6000_sched_insert_nops)
2406 {
2407 case sched_finish_regroup_exact:
2408 nop_str = "sched_finish_regroup_exact";
2409 break;
2410
2411 case sched_finish_pad_groups:
2412 nop_str = "sched_finish_pad_groups";
2413 break;
2414
2415 case sched_finish_none:
2416 nop_str = "sched_finish_none";
2417 break;
2418
2419 default:
2420 nop_str = nop_num;
2421 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2422 break;
2423 }
2424
2425 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2426
2427 switch (rs6000_sdata)
2428 {
2429 default:
2430 case SDATA_NONE:
2431 break;
2432
2433 case SDATA_DATA:
2434 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2435 break;
2436
2437 case SDATA_SYSV:
2438 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2439 break;
2440
2441 case SDATA_EABI:
2442 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2443 break;
2444
2445 }
2446
2447 switch (rs6000_traceback)
2448 {
2449 case traceback_default: trace_str = "default"; break;
2450 case traceback_none: trace_str = "none"; break;
2451 case traceback_part: trace_str = "part"; break;
2452 case traceback_full: trace_str = "full"; break;
2453 default: trace_str = "unknown"; break;
2454 }
2455
2456 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2457
2458 switch (rs6000_current_cmodel)
2459 {
2460 case CMODEL_SMALL: cmodel_str = "small"; break;
2461 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2462 case CMODEL_LARGE: cmodel_str = "large"; break;
2463 default: cmodel_str = "unknown"; break;
2464 }
2465
2466 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2467
2468 switch (rs6000_current_abi)
2469 {
2470 case ABI_NONE: abi_str = "none"; break;
2471 case ABI_AIX: abi_str = "aix"; break;
2472 case ABI_ELFv2: abi_str = "ELFv2"; break;
2473 case ABI_V4: abi_str = "V4"; break;
2474 case ABI_DARWIN: abi_str = "darwin"; break;
2475 default: abi_str = "unknown"; break;
2476 }
2477
2478 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2479
2480 if (rs6000_altivec_abi)
2481 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2482
2483 if (rs6000_darwin64_abi)
2484 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2485
2486 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2487 (TARGET_SOFT_FLOAT ? "true" : "false"));
2488
2489 if (TARGET_LINK_STACK)
2490 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2491
2492 if (TARGET_P8_FUSION)
2493 {
2494 char options[80];
2495
2496 strcpy (options, "power8");
2497 if (TARGET_P8_FUSION_SIGN)
2498 strcat (options, ", sign");
2499
2500 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2501 }
2502
2503 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2504 TARGET_SECURE_PLT ? "secure" : "bss");
2505 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2506 aix_struct_return ? "aix" : "sysv");
2507 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2508 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2509 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2510 tf[!!rs6000_align_branch_targets]);
2511 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2512 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2513 rs6000_long_double_type_size);
2514 if (rs6000_long_double_type_size > 64)
2515 {
2516 fprintf (stderr, DEBUG_FMT_S, "long double type",
2517 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2518 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2519 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2520 }
2521 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2522 (int)rs6000_sched_restricted_insns_priority);
2523 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2524 (int)END_BUILTINS);
2525 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2526 (int)RS6000_BUILTIN_COUNT);
2527
2528 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2529 (int)TARGET_FLOAT128_ENABLE_TYPE);
2530
2531 if (TARGET_VSX)
2532 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2533 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2534
2535 if (TARGET_DIRECT_MOVE_128)
2536 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2537 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2538 }
2539
2540 \f
2541 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2542 legitimate address support to figure out the appropriate addressing to
2543 use. */
2544
2545 static void
2546 rs6000_setup_reg_addr_masks (void)
2547 {
2548 ssize_t rc, reg, m, nregs;
2549 addr_mask_type any_addr_mask, addr_mask;
2550
2551 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2552 {
2553 machine_mode m2 = (machine_mode) m;
2554 bool complex_p = false;
2555 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2556 size_t msize;
2557
2558 if (COMPLEX_MODE_P (m2))
2559 {
2560 complex_p = true;
2561 m2 = GET_MODE_INNER (m2);
2562 }
2563
2564 msize = GET_MODE_SIZE (m2);
2565
2566 /* SDmode is special in that we want to access it only via REG+REG
2567 addressing on power7 and above, since we want to use the LFIWZX and
2568 STFIWZX instructions to load it. */
2569 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2570
2571 any_addr_mask = 0;
2572 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2573 {
2574 addr_mask = 0;
2575 reg = reload_reg_map[rc].reg;
2576
2577 /* Can mode values go in the GPR/FPR/Altivec registers? */
2578 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2579 {
2580 bool small_int_vsx_p = (small_int_p
2581 && (rc == RELOAD_REG_FPR
2582 || rc == RELOAD_REG_VMX));
2583
2584 nregs = rs6000_hard_regno_nregs[m][reg];
2585 addr_mask |= RELOAD_REG_VALID;
2586
2587 /* Indicate if the mode takes more than 1 physical register. If
2588 it takes a single register, indicate it can do REG+REG
2589 addressing. Small integers in VSX registers can only do
2590 REG+REG addressing. */
2591 if (small_int_vsx_p)
2592 addr_mask |= RELOAD_REG_INDEXED;
2593 else if (nregs > 1 || m == BLKmode || complex_p)
2594 addr_mask |= RELOAD_REG_MULTIPLE;
2595 else
2596 addr_mask |= RELOAD_REG_INDEXED;
2597
2598 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2599 addressing. If we allow scalars into Altivec registers,
2600 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2601
2602 For VSX systems, we don't allow update addressing for
2603 DFmode/SFmode if those registers can go in both the
2604 traditional floating point registers and Altivec registers.
2605 The load/store instructions for the Altivec registers do not
2606 have update forms. If we allowed update addressing, it seems
2607 to break IV-OPT code using floating point if the index type is
2608 int instead of long (PR target/81550 and target/84042). */
2609
2610 if (TARGET_UPDATE
2611 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2612 && msize <= 8
2613 && !VECTOR_MODE_P (m2)
2614 && !FLOAT128_VECTOR_P (m2)
2615 && !complex_p
2616 && (m != E_DFmode || !TARGET_VSX)
2617 && (m != E_SFmode || !TARGET_P8_VECTOR)
2618 && !small_int_vsx_p)
2619 {
2620 addr_mask |= RELOAD_REG_PRE_INCDEC;
2621
2622 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2623 we don't allow PRE_MODIFY for some multi-register
2624 operations. */
2625 switch (m)
2626 {
2627 default:
2628 addr_mask |= RELOAD_REG_PRE_MODIFY;
2629 break;
2630
2631 case E_DImode:
2632 if (TARGET_POWERPC64)
2633 addr_mask |= RELOAD_REG_PRE_MODIFY;
2634 break;
2635
2636 case E_DFmode:
2637 case E_DDmode:
2638 if (TARGET_HARD_FLOAT)
2639 addr_mask |= RELOAD_REG_PRE_MODIFY;
2640 break;
2641 }
2642 }
2643 }
2644
2645 /* GPR and FPR registers can do REG+OFFSET addressing, except
2646 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2647 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2648 if ((addr_mask != 0) && !indexed_only_p
2649 && msize <= 8
2650 && (rc == RELOAD_REG_GPR
2651 || ((msize == 8 || m2 == SFmode)
2652 && (rc == RELOAD_REG_FPR
2653 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2654 addr_mask |= RELOAD_REG_OFFSET;
2655
2656 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2657 instructions are enabled. The offset for 128-bit VSX registers is
2658 only 12-bits. While GPRs can handle the full offset range, VSX
2659 registers can only handle the restricted range. */
2660 else if ((addr_mask != 0) && !indexed_only_p
2661 && msize == 16 && TARGET_P9_VECTOR
2662 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2663 || (m2 == TImode && TARGET_VSX)))
2664 {
2665 addr_mask |= RELOAD_REG_OFFSET;
2666 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2667 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2668 }
2669
2670 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2671 addressing on 128-bit types. */
2672 if (rc == RELOAD_REG_VMX && msize == 16
2673 && (addr_mask & RELOAD_REG_VALID) != 0)
2674 addr_mask |= RELOAD_REG_AND_M16;
2675
2676 reg_addr[m].addr_mask[rc] = addr_mask;
2677 any_addr_mask |= addr_mask;
2678 }
2679
2680 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2681 }
2682 }
2683
2684 \f
2685 /* Initialize the various global tables that are based on register size. */
2686 static void
2687 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2688 {
2689 ssize_t r, m, c;
2690 int align64;
2691 int align32;
2692
2693 /* Precalculate REGNO_REG_CLASS. */
2694 rs6000_regno_regclass[0] = GENERAL_REGS;
2695 for (r = 1; r < 32; ++r)
2696 rs6000_regno_regclass[r] = BASE_REGS;
2697
2698 for (r = 32; r < 64; ++r)
2699 rs6000_regno_regclass[r] = FLOAT_REGS;
2700
2701 for (r = 64; HARD_REGISTER_NUM_P (r); ++r)
2702 rs6000_regno_regclass[r] = NO_REGS;
2703
2704 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2705 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2706
2707 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2708 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2709 rs6000_regno_regclass[r] = CR_REGS;
2710
2711 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2712 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2713 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2714 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2715 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2716 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2717 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2718
2719 /* Precalculate register class to simpler reload register class. We don't
2720 need all of the register classes that are combinations of different
2721 classes, just the simple ones that have constraint letters. */
2722 for (c = 0; c < N_REG_CLASSES; c++)
2723 reg_class_to_reg_type[c] = NO_REG_TYPE;
2724
2725 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2726 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2727 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2728 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2729 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2730 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2731 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2732 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2733 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2734 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2735
2736 if (TARGET_VSX)
2737 {
2738 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2739 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2740 }
2741 else
2742 {
2743 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2744 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2745 }
2746
2747 /* Precalculate the valid memory formats as well as the vector information,
2748 this must be set up before the rs6000_hard_regno_nregs_internal calls
2749 below. */
2750 gcc_assert ((int)VECTOR_NONE == 0);
2751 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2752 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem));
2753
2754 gcc_assert ((int)CODE_FOR_nothing == 0);
2755 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2756
2757 gcc_assert ((int)NO_REGS == 0);
2758 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2759
2760 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2761 believes it can use native alignment or still uses 128-bit alignment. */
2762 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2763 {
2764 align64 = 64;
2765 align32 = 32;
2766 }
2767 else
2768 {
2769 align64 = 128;
2770 align32 = 128;
2771 }
2772
2773 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2774 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2775 if (TARGET_FLOAT128_TYPE)
2776 {
2777 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2778 rs6000_vector_align[KFmode] = 128;
2779
2780 if (FLOAT128_IEEE_P (TFmode))
2781 {
2782 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2783 rs6000_vector_align[TFmode] = 128;
2784 }
2785 }
2786
2787 /* V2DF mode, VSX only. */
2788 if (TARGET_VSX)
2789 {
2790 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2791 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2792 rs6000_vector_align[V2DFmode] = align64;
2793 }
2794
2795 /* V4SF mode, either VSX or Altivec. */
2796 if (TARGET_VSX)
2797 {
2798 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2799 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2800 rs6000_vector_align[V4SFmode] = align32;
2801 }
2802 else if (TARGET_ALTIVEC)
2803 {
2804 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2805 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2806 rs6000_vector_align[V4SFmode] = align32;
2807 }
2808
2809 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2810 and stores. */
2811 if (TARGET_ALTIVEC)
2812 {
2813 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2814 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2815 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2816 rs6000_vector_align[V4SImode] = align32;
2817 rs6000_vector_align[V8HImode] = align32;
2818 rs6000_vector_align[V16QImode] = align32;
2819
2820 if (TARGET_VSX)
2821 {
2822 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2823 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2824 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2825 }
2826 else
2827 {
2828 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2829 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2830 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2831 }
2832 }
2833
2834 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2835 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2836 if (TARGET_VSX)
2837 {
2838 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2839 rs6000_vector_unit[V2DImode]
2840 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2841 rs6000_vector_align[V2DImode] = align64;
2842
2843 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2844 rs6000_vector_unit[V1TImode]
2845 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2846 rs6000_vector_align[V1TImode] = 128;
2847 }
2848
2849 /* DFmode, see if we want to use the VSX unit. Memory is handled
2850 differently, so don't set rs6000_vector_mem. */
2851 if (TARGET_VSX)
2852 {
2853 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2854 rs6000_vector_align[DFmode] = 64;
2855 }
2856
2857 /* SFmode, see if we want to use the VSX unit. */
2858 if (TARGET_P8_VECTOR)
2859 {
2860 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2861 rs6000_vector_align[SFmode] = 32;
2862 }
2863
2864 /* Allow TImode in VSX register and set the VSX memory macros. */
2865 if (TARGET_VSX)
2866 {
2867 rs6000_vector_mem[TImode] = VECTOR_VSX;
2868 rs6000_vector_align[TImode] = align64;
2869 }
2870
2871 /* Register class constraints for the constraints that depend on compile
2872 switches. When the VSX code was added, different constraints were added
2873 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2874 of the VSX registers are used. The register classes for scalar floating
2875 point types is set, based on whether we allow that type into the upper
2876 (Altivec) registers. GCC has register classes to target the Altivec
2877 registers for load/store operations, to select using a VSX memory
2878 operation instead of the traditional floating point operation. The
2879 constraints are:
2880
2881 d - Register class to use with traditional DFmode instructions.
2882 f - Register class to use with traditional SFmode instructions.
2883 v - Altivec register.
2884 wa - Any VSX register.
2885 wc - Reserved to represent individual CR bits (used in LLVM).
2886 wn - always NO_REGS.
2887 wr - GPR if 64-bit mode is permitted.
2888 wx - Float register if we can do 32-bit int stores. */
2889
2890 if (TARGET_HARD_FLOAT)
2891 {
2892 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
2893 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
2894 }
2895
2896 if (TARGET_VSX)
2897 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2898
2899 /* Add conditional constraints based on various options, to allow us to
2900 collapse multiple insn patterns. */
2901 if (TARGET_ALTIVEC)
2902 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2903
2904 if (TARGET_POWERPC64)
2905 {
2906 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2907 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
2908 }
2909
2910 if (TARGET_STFIWX)
2911 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2912
2913 /* Support for new direct moves (ISA 3.0 + 64bit). */
2914 if (TARGET_DIRECT_MOVE_128)
2915 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
2916
2917 /* Set up the reload helper and direct move functions. */
2918 if (TARGET_VSX || TARGET_ALTIVEC)
2919 {
2920 if (TARGET_64BIT)
2921 {
2922 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2923 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2924 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2925 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
2926 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
2927 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
2928 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
2929 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
2930 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
2931 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
2932 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
2933 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
2934 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
2935 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
2936 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
2937 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
2938 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
2939 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
2940 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
2941 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
2942
2943 if (FLOAT128_VECTOR_P (KFmode))
2944 {
2945 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
2946 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
2947 }
2948
2949 if (FLOAT128_VECTOR_P (TFmode))
2950 {
2951 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
2952 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
2953 }
2954
2955 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
2956 available. */
2957 if (TARGET_NO_SDMODE_STACK)
2958 {
2959 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
2960 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
2961 }
2962
2963 if (TARGET_VSX)
2964 {
2965 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
2966 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
2967 }
2968
2969 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
2970 {
2971 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
2972 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
2973 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
2974 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
2975 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
2976 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
2977 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
2978 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
2979 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
2980
2981 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
2982 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
2983 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
2984 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
2985 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
2986 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
2987 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
2988 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
2989 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
2990
2991 if (FLOAT128_VECTOR_P (KFmode))
2992 {
2993 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
2994 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
2995 }
2996
2997 if (FLOAT128_VECTOR_P (TFmode))
2998 {
2999 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3000 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3001 }
3002 }
3003 }
3004 else
3005 {
3006 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3007 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3008 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3009 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3010 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3011 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3012 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3013 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3014 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3015 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3016 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3017 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3018 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3019 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3020 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3021 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3022 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3023 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3024 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3025 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3026
3027 if (FLOAT128_VECTOR_P (KFmode))
3028 {
3029 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3030 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3031 }
3032
3033 if (FLOAT128_IEEE_P (TFmode))
3034 {
3035 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3036 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3037 }
3038
3039 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3040 available. */
3041 if (TARGET_NO_SDMODE_STACK)
3042 {
3043 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3044 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3045 }
3046
3047 if (TARGET_VSX)
3048 {
3049 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3050 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3051 }
3052
3053 if (TARGET_DIRECT_MOVE)
3054 {
3055 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3056 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3057 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3058 }
3059 }
3060
3061 reg_addr[DFmode].scalar_in_vmx_p = true;
3062 reg_addr[DImode].scalar_in_vmx_p = true;
3063
3064 if (TARGET_P8_VECTOR)
3065 {
3066 reg_addr[SFmode].scalar_in_vmx_p = true;
3067 reg_addr[SImode].scalar_in_vmx_p = true;
3068
3069 if (TARGET_P9_VECTOR)
3070 {
3071 reg_addr[HImode].scalar_in_vmx_p = true;
3072 reg_addr[QImode].scalar_in_vmx_p = true;
3073 }
3074 }
3075 }
3076
3077 /* Precalculate HARD_REGNO_NREGS. */
3078 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3079 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3080 rs6000_hard_regno_nregs[m][r]
3081 = rs6000_hard_regno_nregs_internal (r, (machine_mode) m);
3082
3083 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3084 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3085 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3086 rs6000_hard_regno_mode_ok_p[m][r]
3087 = rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m);
3088
3089 /* Precalculate CLASS_MAX_NREGS sizes. */
3090 for (c = 0; c < LIM_REG_CLASSES; ++c)
3091 {
3092 int reg_size;
3093
3094 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3095 reg_size = UNITS_PER_VSX_WORD;
3096
3097 else if (c == ALTIVEC_REGS)
3098 reg_size = UNITS_PER_ALTIVEC_WORD;
3099
3100 else if (c == FLOAT_REGS)
3101 reg_size = UNITS_PER_FP_WORD;
3102
3103 else
3104 reg_size = UNITS_PER_WORD;
3105
3106 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3107 {
3108 machine_mode m2 = (machine_mode)m;
3109 int reg_size2 = reg_size;
3110
3111 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3112 in VSX. */
3113 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3114 reg_size2 = UNITS_PER_FP_WORD;
3115
3116 rs6000_class_max_nregs[m][c]
3117 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3118 }
3119 }
3120
3121 /* Calculate which modes to automatically generate code to use a the
3122 reciprocal divide and square root instructions. In the future, possibly
3123 automatically generate the instructions even if the user did not specify
3124 -mrecip. The older machines double precision reciprocal sqrt estimate is
3125 not accurate enough. */
3126 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3127 if (TARGET_FRES)
3128 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3129 if (TARGET_FRE)
3130 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3131 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3132 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3133 if (VECTOR_UNIT_VSX_P (V2DFmode))
3134 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3135
3136 if (TARGET_FRSQRTES)
3137 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3138 if (TARGET_FRSQRTE)
3139 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3140 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3141 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3142 if (VECTOR_UNIT_VSX_P (V2DFmode))
3143 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3144
3145 if (rs6000_recip_control)
3146 {
3147 if (!flag_finite_math_only)
3148 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3149 "-ffast-math");
3150 if (flag_trapping_math)
3151 warning (0, "%qs requires %qs or %qs", "-mrecip",
3152 "-fno-trapping-math", "-ffast-math");
3153 if (!flag_reciprocal_math)
3154 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3155 "-ffast-math");
3156 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3157 {
3158 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3159 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3160 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3161
3162 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3163 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3164 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3165
3166 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3167 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3168 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3169
3170 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3171 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3172 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3173
3174 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3175 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3176 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3177
3178 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3179 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3180 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3181
3182 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3183 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3184 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3185
3186 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3187 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3188 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3189 }
3190 }
3191
3192 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3193 legitimate address support to figure out the appropriate addressing to
3194 use. */
3195 rs6000_setup_reg_addr_masks ();
3196
3197 if (global_init_p || TARGET_DEBUG_TARGET)
3198 {
3199 if (TARGET_DEBUG_REG)
3200 rs6000_debug_reg_global ();
3201
3202 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3203 fprintf (stderr,
3204 "SImode variable mult cost = %d\n"
3205 "SImode constant mult cost = %d\n"
3206 "SImode short constant mult cost = %d\n"
3207 "DImode multipliciation cost = %d\n"
3208 "SImode division cost = %d\n"
3209 "DImode division cost = %d\n"
3210 "Simple fp operation cost = %d\n"
3211 "DFmode multiplication cost = %d\n"
3212 "SFmode division cost = %d\n"
3213 "DFmode division cost = %d\n"
3214 "cache line size = %d\n"
3215 "l1 cache size = %d\n"
3216 "l2 cache size = %d\n"
3217 "simultaneous prefetches = %d\n"
3218 "\n",
3219 rs6000_cost->mulsi,
3220 rs6000_cost->mulsi_const,
3221 rs6000_cost->mulsi_const9,
3222 rs6000_cost->muldi,
3223 rs6000_cost->divsi,
3224 rs6000_cost->divdi,
3225 rs6000_cost->fp,
3226 rs6000_cost->dmul,
3227 rs6000_cost->sdiv,
3228 rs6000_cost->ddiv,
3229 rs6000_cost->cache_line_size,
3230 rs6000_cost->l1_cache_size,
3231 rs6000_cost->l2_cache_size,
3232 rs6000_cost->simultaneous_prefetches);
3233 }
3234 }
3235
3236 #if TARGET_MACHO
3237 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3238
3239 static void
3240 darwin_rs6000_override_options (void)
3241 {
3242 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3243 off. */
3244 rs6000_altivec_abi = 1;
3245 TARGET_ALTIVEC_VRSAVE = 1;
3246 rs6000_current_abi = ABI_DARWIN;
3247
3248 if (DEFAULT_ABI == ABI_DARWIN
3249 && TARGET_64BIT)
3250 darwin_one_byte_bool = 1;
3251
3252 if (TARGET_64BIT && ! TARGET_POWERPC64)
3253 {
3254 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3255 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3256 }
3257
3258 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3259 optimisation, and will not work with the most generic case (where the
3260 symbol is undefined external, but there is no symbl stub). */
3261 if (TARGET_64BIT)
3262 rs6000_default_long_calls = 0;
3263
3264 /* ld_classic is (so far) still used for kernel (static) code, and supports
3265 the JBSR longcall / branch islands. */
3266 if (flag_mkernel)
3267 {
3268 rs6000_default_long_calls = 1;
3269
3270 /* Allow a kext author to do -mkernel -mhard-float. */
3271 if (! (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT))
3272 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3273 }
3274
3275 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3276 Altivec. */
3277 if (!flag_mkernel && !flag_apple_kext
3278 && TARGET_64BIT
3279 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3280 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3281
3282 /* Unless the user (not the configurer) has explicitly overridden
3283 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3284 G4 unless targeting the kernel. */
3285 if (!flag_mkernel
3286 && !flag_apple_kext
3287 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3288 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3289 && ! global_options_set.x_rs6000_cpu_index)
3290 {
3291 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3292 }
3293 }
3294 #endif
3295
3296 /* If not otherwise specified by a target, make 'long double' equivalent to
3297 'double'. */
3298
3299 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3300 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3301 #endif
3302
3303 /* Return the builtin mask of the various options used that could affect which
3304 builtins were used. In the past we used target_flags, but we've run out of
3305 bits, and some options are no longer in target_flags. */
3306
3307 HOST_WIDE_INT
3308 rs6000_builtin_mask_calculate (void)
3309 {
3310 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3311 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0)
3312 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3313 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3314 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3315 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3316 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3317 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3318 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3319 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3320 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3321 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3322 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3323 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3324 | ((TARGET_POWERPC64) ? RS6000_BTM_POWERPC64 : 0)
3325 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3326 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3327 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3328 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3329 | ((TARGET_LONG_DOUBLE_128
3330 && TARGET_HARD_FLOAT
3331 && !TARGET_IEEEQUAD) ? RS6000_BTM_LDBL128 : 0)
3332 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)
3333 | ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0));
3334 }
3335
3336 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3337 to clobber the XER[CA] bit because clobbering that bit without telling
3338 the compiler worked just fine with versions of GCC before GCC 5, and
3339 breaking a lot of older code in ways that are hard to track down is
3340 not such a great idea. */
3341
3342 static rtx_insn *
3343 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3344 vec<const char *> &/*constraints*/,
3345 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3346 {
3347 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3348 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3349 return NULL;
3350 }
3351
3352 /* Override command line options.
3353
3354 Combine build-specific configuration information with options
3355 specified on the command line to set various state variables which
3356 influence code generation, optimization, and expansion of built-in
3357 functions. Assure that command-line configuration preferences are
3358 compatible with each other and with the build configuration; issue
3359 warnings while adjusting configuration or error messages while
3360 rejecting configuration.
3361
3362 Upon entry to this function:
3363
3364 This function is called once at the beginning of
3365 compilation, and then again at the start and end of compiling
3366 each section of code that has a different configuration, as
3367 indicated, for example, by adding the
3368
3369 __attribute__((__target__("cpu=power9")))
3370
3371 qualifier to a function definition or, for example, by bracketing
3372 code between
3373
3374 #pragma GCC target("altivec")
3375
3376 and
3377
3378 #pragma GCC reset_options
3379
3380 directives. Parameter global_init_p is true for the initial
3381 invocation, which initializes global variables, and false for all
3382 subsequent invocations.
3383
3384
3385 Various global state information is assumed to be valid. This
3386 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3387 default CPU specified at build configure time, TARGET_DEFAULT,
3388 representing the default set of option flags for the default
3389 target, and global_options_set.x_rs6000_isa_flags, representing
3390 which options were requested on the command line.
3391
3392 Upon return from this function:
3393
3394 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3395 was set by name on the command line. Additionally, if certain
3396 attributes are automatically enabled or disabled by this function
3397 in order to assure compatibility between options and
3398 configuration, the flags associated with those attributes are
3399 also set. By setting these "explicit bits", we avoid the risk
3400 that other code might accidentally overwrite these particular
3401 attributes with "default values".
3402
3403 The various bits of rs6000_isa_flags are set to indicate the
3404 target options that have been selected for the most current
3405 compilation efforts. This has the effect of also turning on the
3406 associated TARGET_XXX values since these are macros which are
3407 generally defined to test the corresponding bit of the
3408 rs6000_isa_flags variable.
3409
3410 The variable rs6000_builtin_mask is set to represent the target
3411 options for the most current compilation efforts, consistent with
3412 the current contents of rs6000_isa_flags. This variable controls
3413 expansion of built-in functions.
3414
3415 Various other global variables and fields of global structures
3416 (over 50 in all) are initialized to reflect the desired options
3417 for the most current compilation efforts. */
3418
3419 static bool
3420 rs6000_option_override_internal (bool global_init_p)
3421 {
3422 bool ret = true;
3423
3424 HOST_WIDE_INT set_masks;
3425 HOST_WIDE_INT ignore_masks;
3426 int cpu_index = -1;
3427 int tune_index;
3428 struct cl_target_option *main_target_opt
3429 = ((global_init_p || target_option_default_node == NULL)
3430 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3431
3432 /* Print defaults. */
3433 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3434 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3435
3436 /* Remember the explicit arguments. */
3437 if (global_init_p)
3438 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3439
3440 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3441 library functions, so warn about it. The flag may be useful for
3442 performance studies from time to time though, so don't disable it
3443 entirely. */
3444 if (global_options_set.x_rs6000_alignment_flags
3445 && rs6000_alignment_flags == MASK_ALIGN_POWER
3446 && DEFAULT_ABI == ABI_DARWIN
3447 && TARGET_64BIT)
3448 warning (0, "%qs is not supported for 64-bit Darwin;"
3449 " it is incompatible with the installed C and C++ libraries",
3450 "-malign-power");
3451
3452 /* Numerous experiment shows that IRA based loop pressure
3453 calculation works better for RTL loop invariant motion on targets
3454 with enough (>= 32) registers. It is an expensive optimization.
3455 So it is on only for peak performance. */
3456 if (optimize >= 3 && global_init_p
3457 && !global_options_set.x_flag_ira_loop_pressure)
3458 flag_ira_loop_pressure = 1;
3459
3460 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3461 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3462 options were already specified. */
3463 if (flag_sanitize & SANITIZE_USER_ADDRESS
3464 && !global_options_set.x_flag_asynchronous_unwind_tables)
3465 flag_asynchronous_unwind_tables = 1;
3466
3467 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3468 loop unroller is active. It is only checked during unrolling, so
3469 we can just set it on by default. */
3470 if (!global_options_set.x_flag_variable_expansion_in_unroller)
3471 flag_variable_expansion_in_unroller = 1;
3472
3473 /* Set the pointer size. */
3474 if (TARGET_64BIT)
3475 {
3476 rs6000_pmode = DImode;
3477 rs6000_pointer_size = 64;
3478 }
3479 else
3480 {
3481 rs6000_pmode = SImode;
3482 rs6000_pointer_size = 32;
3483 }
3484
3485 /* Some OSs don't support saving the high part of 64-bit registers on context
3486 switch. Other OSs don't support saving Altivec registers. On those OSs,
3487 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3488 if the user wants either, the user must explicitly specify them and we
3489 won't interfere with the user's specification. */
3490
3491 set_masks = POWERPC_MASKS;
3492 #ifdef OS_MISSING_POWERPC64
3493 if (OS_MISSING_POWERPC64)
3494 set_masks &= ~OPTION_MASK_POWERPC64;
3495 #endif
3496 #ifdef OS_MISSING_ALTIVEC
3497 if (OS_MISSING_ALTIVEC)
3498 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3499 | OTHER_VSX_VECTOR_MASKS);
3500 #endif
3501
3502 /* Don't override by the processor default if given explicitly. */
3503 set_masks &= ~rs6000_isa_flags_explicit;
3504
3505 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3506 the cpu in a target attribute or pragma, but did not specify a tuning
3507 option, use the cpu for the tuning option rather than the option specified
3508 with -mtune on the command line. Process a '--with-cpu' configuration
3509 request as an implicit --cpu. */
3510 if (rs6000_cpu_index >= 0)
3511 cpu_index = rs6000_cpu_index;
3512 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3513 cpu_index = main_target_opt->x_rs6000_cpu_index;
3514 else if (OPTION_TARGET_CPU_DEFAULT)
3515 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3516
3517 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3518 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3519 with those from the cpu, except for options that were explicitly set. If
3520 we don't have a cpu, do not override the target bits set in
3521 TARGET_DEFAULT. */
3522 if (cpu_index >= 0)
3523 {
3524 rs6000_cpu_index = cpu_index;
3525 rs6000_isa_flags &= ~set_masks;
3526 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3527 & set_masks);
3528 }
3529 else
3530 {
3531 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3532 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3533 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3534 to using rs6000_isa_flags, we need to do the initialization here.
3535
3536 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3537 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3538 HOST_WIDE_INT flags;
3539 if (TARGET_DEFAULT)
3540 flags = TARGET_DEFAULT;
3541 else
3542 {
3543 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3544 const char *default_cpu = (!TARGET_POWERPC64
3545 ? "powerpc"
3546 : (BYTES_BIG_ENDIAN
3547 ? "powerpc64"
3548 : "powerpc64le"));
3549 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3550 flags = processor_target_table[default_cpu_index].target_enable;
3551 }
3552 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3553 }
3554
3555 if (rs6000_tune_index >= 0)
3556 tune_index = rs6000_tune_index;
3557 else if (cpu_index >= 0)
3558 rs6000_tune_index = tune_index = cpu_index;
3559 else
3560 {
3561 size_t i;
3562 enum processor_type tune_proc
3563 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3564
3565 tune_index = -1;
3566 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3567 if (processor_target_table[i].processor == tune_proc)
3568 {
3569 tune_index = i;
3570 break;
3571 }
3572 }
3573
3574 if (cpu_index >= 0)
3575 rs6000_cpu = processor_target_table[cpu_index].processor;
3576 else
3577 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3578
3579 gcc_assert (tune_index >= 0);
3580 rs6000_tune = processor_target_table[tune_index].processor;
3581
3582 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3583 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3584 || rs6000_cpu == PROCESSOR_PPCE5500)
3585 {
3586 if (TARGET_ALTIVEC)
3587 error ("AltiVec not supported in this target");
3588 }
3589
3590 /* If we are optimizing big endian systems for space, use the load/store
3591 multiple instructions. */
3592 if (BYTES_BIG_ENDIAN && optimize_size)
3593 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3594
3595 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3596 because the hardware doesn't support the instructions used in little
3597 endian mode, and causes an alignment trap. The 750 does not cause an
3598 alignment trap (except when the target is unaligned). */
3599
3600 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
3601 {
3602 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3603 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3604 warning (0, "%qs is not supported on little endian systems",
3605 "-mmultiple");
3606 }
3607
3608 /* If little-endian, default to -mstrict-align on older processors.
3609 Testing for htm matches power8 and later. */
3610 if (!BYTES_BIG_ENDIAN
3611 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3612 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3613
3614 if (!rs6000_fold_gimple)
3615 fprintf (stderr,
3616 "gimple folding of rs6000 builtins has been disabled.\n");
3617
3618 /* Add some warnings for VSX. */
3619 if (TARGET_VSX)
3620 {
3621 const char *msg = NULL;
3622 if (!TARGET_HARD_FLOAT)
3623 {
3624 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3625 msg = N_("%<-mvsx%> requires hardware floating point");
3626 else
3627 {
3628 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3629 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3630 }
3631 }
3632 else if (TARGET_AVOID_XFORM > 0)
3633 msg = N_("%<-mvsx%> needs indexed addressing");
3634 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3635 & OPTION_MASK_ALTIVEC))
3636 {
3637 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3638 msg = N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3639 else
3640 msg = N_("%<-mno-altivec%> disables vsx");
3641 }
3642
3643 if (msg)
3644 {
3645 warning (0, msg);
3646 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3647 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3648 }
3649 }
3650
3651 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3652 the -mcpu setting to enable options that conflict. */
3653 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3654 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3655 | OPTION_MASK_ALTIVEC
3656 | OPTION_MASK_VSX)) != 0)
3657 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3658 | OPTION_MASK_DIRECT_MOVE)
3659 & ~rs6000_isa_flags_explicit);
3660
3661 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3662 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3663
3664 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3665 off all of the options that depend on those flags. */
3666 ignore_masks = rs6000_disable_incompatible_switches ();
3667
3668 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3669 unless the user explicitly used the -mno-<option> to disable the code. */
3670 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
3671 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3672 else if (TARGET_P9_MINMAX)
3673 {
3674 if (cpu_index >= 0)
3675 {
3676 if (cpu_index == PROCESSOR_POWER9)
3677 {
3678 /* legacy behavior: allow -mcpu=power9 with certain
3679 capabilities explicitly disabled. */
3680 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3681 }
3682 else
3683 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3684 "for <xxx> less than power9", "-mcpu");
3685 }
3686 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
3687 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
3688 & rs6000_isa_flags_explicit))
3689 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3690 were explicitly cleared. */
3691 error ("%qs incompatible with explicitly disabled options",
3692 "-mpower9-minmax");
3693 else
3694 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
3695 }
3696 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3697 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
3698 else if (TARGET_VSX)
3699 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
3700 else if (TARGET_POPCNTD)
3701 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
3702 else if (TARGET_DFP)
3703 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
3704 else if (TARGET_CMPB)
3705 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
3706 else if (TARGET_FPRND)
3707 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
3708 else if (TARGET_POPCNTB)
3709 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
3710 else if (TARGET_ALTIVEC)
3711 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
3712
3713 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3714 {
3715 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3716 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3717 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3718 }
3719
3720 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3721 {
3722 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3723 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3724 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3725 }
3726
3727 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3728 {
3729 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3730 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3731 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3732 }
3733
3734 if (TARGET_P8_VECTOR && !TARGET_VSX)
3735 {
3736 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3737 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
3738 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3739 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
3740 {
3741 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3742 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3743 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3744 }
3745 else
3746 {
3747 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
3748 not explicit. */
3749 rs6000_isa_flags |= OPTION_MASK_VSX;
3750 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3751 }
3752 }
3753
3754 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3755 {
3756 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3757 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3758 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3759 }
3760
3761 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3762 silently turn off quad memory mode. */
3763 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3764 {
3765 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3766 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
3767
3768 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3769 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
3770
3771 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3772 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3773 }
3774
3775 /* Non-atomic quad memory load/store are disabled for little endian, since
3776 the words are reversed, but atomic operations can still be done by
3777 swapping the words. */
3778 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3779 {
3780 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3781 warning (0, N_("%<-mquad-memory%> is not available in little endian "
3782 "mode"));
3783
3784 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3785 }
3786
3787 /* Assume if the user asked for normal quad memory instructions, they want
3788 the atomic versions as well, unless they explicity told us not to use quad
3789 word atomic instructions. */
3790 if (TARGET_QUAD_MEMORY
3791 && !TARGET_QUAD_MEMORY_ATOMIC
3792 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
3793 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
3794
3795 /* If we can shrink-wrap the TOC register save separately, then use
3796 -msave-toc-indirect unless explicitly disabled. */
3797 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
3798 && flag_shrink_wrap_separate
3799 && optimize_function_for_speed_p (cfun))
3800 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
3801
3802 /* Enable power8 fusion if we are tuning for power8, even if we aren't
3803 generating power8 instructions. Power9 does not optimize power8 fusion
3804 cases. */
3805 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
3806 {
3807 if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
3808 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
3809 else
3810 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
3811 }
3812
3813 /* Setting additional fusion flags turns on base fusion. */
3814 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
3815 {
3816 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
3817 {
3818 if (TARGET_P8_FUSION_SIGN)
3819 error ("%qs requires %qs", "-mpower8-fusion-sign",
3820 "-mpower8-fusion");
3821
3822 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
3823 }
3824 else
3825 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
3826 }
3827
3828 /* Power8 does not fuse sign extended loads with the addis. If we are
3829 optimizing at high levels for speed, convert a sign extended load into a
3830 zero extending load, and an explicit sign extension. */
3831 if (TARGET_P8_FUSION
3832 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
3833 && optimize_function_for_speed_p (cfun)
3834 && optimize >= 3)
3835 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
3836
3837 /* ISA 3.0 vector instructions include ISA 2.07. */
3838 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
3839 {
3840 /* We prefer to not mention undocumented options in
3841 error messages. However, if users have managed to select
3842 power9-vector without selecting power8-vector, they
3843 already know about undocumented flags. */
3844 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
3845 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
3846 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
3847 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
3848 {
3849 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
3850 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3851 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
3852 }
3853 else
3854 {
3855 /* OPTION_MASK_P9_VECTOR is explicit and
3856 OPTION_MASK_P8_VECTOR is not explicit. */
3857 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
3858 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3859 }
3860 }
3861
3862 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
3863 support. If we only have ISA 2.06 support, and the user did not specify
3864 the switch, leave it set to -1 so the movmisalign patterns are enabled,
3865 but we don't enable the full vectorization support */
3866 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
3867 TARGET_ALLOW_MOVMISALIGN = 1;
3868
3869 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
3870 {
3871 if (TARGET_ALLOW_MOVMISALIGN > 0
3872 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
3873 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
3874
3875 TARGET_ALLOW_MOVMISALIGN = 0;
3876 }
3877
3878 /* Determine when unaligned vector accesses are permitted, and when
3879 they are preferred over masked Altivec loads. Note that if
3880 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
3881 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
3882 not true. */
3883 if (TARGET_EFFICIENT_UNALIGNED_VSX)
3884 {
3885 if (!TARGET_VSX)
3886 {
3887 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
3888 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
3889
3890 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
3891 }
3892
3893 else if (!TARGET_ALLOW_MOVMISALIGN)
3894 {
3895 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
3896 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
3897 "-mallow-movmisalign");
3898
3899 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
3900 }
3901 }
3902
3903 /* Use long double size to select the appropriate long double. We use
3904 TYPE_PRECISION to differentiate the 3 different long double types. We map
3905 128 into the precision used for TFmode. */
3906 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
3907 ? 64
3908 : FLOAT_PRECISION_TFmode);
3909
3910 /* Set long double size before the IEEE 128-bit tests. */
3911 if (!global_options_set.x_rs6000_long_double_type_size)
3912 {
3913 if (main_target_opt != NULL
3914 && (main_target_opt->x_rs6000_long_double_type_size
3915 != default_long_double_size))
3916 error ("target attribute or pragma changes %<long double%> size");
3917 else
3918 rs6000_long_double_type_size = default_long_double_size;
3919 }
3920 else if (rs6000_long_double_type_size == 128)
3921 rs6000_long_double_type_size = FLOAT_PRECISION_TFmode;
3922 else if (global_options_set.x_rs6000_ieeequad)
3923 {
3924 if (global_options.x_rs6000_ieeequad)
3925 error ("%qs requires %qs", "-mabi=ieeelongdouble", "-mlong-double-128");
3926 else
3927 error ("%qs requires %qs", "-mabi=ibmlongdouble", "-mlong-double-128");
3928 }
3929
3930 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
3931 systems will also set long double to be IEEE 128-bit. AIX and Darwin
3932 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
3933 those systems will not pick up this default. Warn if the user changes the
3934 default unless -Wno-psabi. */
3935 if (!global_options_set.x_rs6000_ieeequad)
3936 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
3937
3938 else
3939 {
3940 if (global_options.x_rs6000_ieeequad
3941 && (!TARGET_POPCNTD || !TARGET_VSX))
3942 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
3943
3944 if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT && TARGET_LONG_DOUBLE_128)
3945 {
3946 static bool warned_change_long_double;
3947 if (!warned_change_long_double)
3948 {
3949 warned_change_long_double = true;
3950 if (TARGET_IEEEQUAD)
3951 warning (OPT_Wpsabi, "Using IEEE extended precision "
3952 "%<long double%>");
3953 else
3954 warning (OPT_Wpsabi, "Using IBM extended precision "
3955 "%<long double%>");
3956 }
3957 }
3958 }
3959
3960 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
3961 sytems. In GCC 7, we would enable the the IEEE 128-bit floating point
3962 infrastructure (-mfloat128-type) but not enable the actual __float128 type
3963 unless the user used the explicit -mfloat128. In GCC 8, we enable both
3964 the keyword as well as the type. */
3965 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
3966
3967 /* IEEE 128-bit floating point requires VSX support. */
3968 if (TARGET_FLOAT128_KEYWORD)
3969 {
3970 if (!TARGET_VSX)
3971 {
3972 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
3973 error ("%qs requires VSX support", "%<-mfloat128%>");
3974
3975 TARGET_FLOAT128_TYPE = 0;
3976 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
3977 | OPTION_MASK_FLOAT128_HW);
3978 }
3979 else if (!TARGET_FLOAT128_TYPE)
3980 {
3981 TARGET_FLOAT128_TYPE = 1;
3982 warning (0, "The %<-mfloat128%> option may not be fully supported");
3983 }
3984 }
3985
3986 /* Enable the __float128 keyword under Linux by default. */
3987 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
3988 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
3989 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
3990
3991 /* If we have are supporting the float128 type and full ISA 3.0 support,
3992 enable -mfloat128-hardware by default. However, don't enable the
3993 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
3994 because sometimes the compiler wants to put things in an integer
3995 container, and if we don't have __int128 support, it is impossible. */
3996 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
3997 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
3998 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
3999 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4000
4001 if (TARGET_FLOAT128_HW
4002 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4003 {
4004 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4005 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4006
4007 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4008 }
4009
4010 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4011 {
4012 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4013 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4014
4015 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4016 }
4017
4018 /* -mprefixed-addr (and hence -mpcrel) requires -mcpu=future. */
4019 if (TARGET_PREFIXED_ADDR && !TARGET_FUTURE)
4020 {
4021 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4022 error ("%qs requires %qs", "-mpcrel", "-mcpu=future");
4023 else if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED_ADDR) != 0)
4024 error ("%qs requires %qs", "-mprefixed-addr", "-mcpu=future");
4025
4026 rs6000_isa_flags &= ~(OPTION_MASK_PCREL | OPTION_MASK_PREFIXED_ADDR);
4027 }
4028
4029 /* -mpcrel requires prefixed load/store addressing. */
4030 if (TARGET_PCREL && !TARGET_PREFIXED_ADDR)
4031 {
4032 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4033 error ("%qs requires %qs", "-mpcrel", "-mprefixed-addr");
4034
4035 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4036 }
4037
4038 /* Print the options after updating the defaults. */
4039 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4040 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4041
4042 /* E500mc does "better" if we inline more aggressively. Respect the
4043 user's opinion, though. */
4044 if (rs6000_block_move_inline_limit == 0
4045 && (rs6000_tune == PROCESSOR_PPCE500MC
4046 || rs6000_tune == PROCESSOR_PPCE500MC64
4047 || rs6000_tune == PROCESSOR_PPCE5500
4048 || rs6000_tune == PROCESSOR_PPCE6500))
4049 rs6000_block_move_inline_limit = 128;
4050
4051 /* store_one_arg depends on expand_block_move to handle at least the
4052 size of reg_parm_stack_space. */
4053 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4054 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4055
4056 if (global_init_p)
4057 {
4058 /* If the appropriate debug option is enabled, replace the target hooks
4059 with debug versions that call the real version and then prints
4060 debugging information. */
4061 if (TARGET_DEBUG_COST)
4062 {
4063 targetm.rtx_costs = rs6000_debug_rtx_costs;
4064 targetm.address_cost = rs6000_debug_address_cost;
4065 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4066 }
4067
4068 if (TARGET_DEBUG_ADDR)
4069 {
4070 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4071 targetm.legitimize_address = rs6000_debug_legitimize_address;
4072 rs6000_secondary_reload_class_ptr
4073 = rs6000_debug_secondary_reload_class;
4074 targetm.secondary_memory_needed
4075 = rs6000_debug_secondary_memory_needed;
4076 targetm.can_change_mode_class
4077 = rs6000_debug_can_change_mode_class;
4078 rs6000_preferred_reload_class_ptr
4079 = rs6000_debug_preferred_reload_class;
4080 rs6000_mode_dependent_address_ptr
4081 = rs6000_debug_mode_dependent_address;
4082 }
4083
4084 if (rs6000_veclibabi_name)
4085 {
4086 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4087 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4088 else
4089 {
4090 error ("unknown vectorization library ABI type (%qs) for "
4091 "%qs switch", rs6000_veclibabi_name, "-mveclibabi=");
4092 ret = false;
4093 }
4094 }
4095 }
4096
4097 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4098 target attribute or pragma which automatically enables both options,
4099 unless the altivec ABI was set. This is set by default for 64-bit, but
4100 not for 32-bit. */
4101 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4102 {
4103 TARGET_FLOAT128_TYPE = 0;
4104 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4105 | OPTION_MASK_FLOAT128_KEYWORD)
4106 & ~rs6000_isa_flags_explicit);
4107 }
4108
4109 /* Enable Altivec ABI for AIX -maltivec. */
4110 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4111 {
4112 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4113 error ("target attribute or pragma changes AltiVec ABI");
4114 else
4115 rs6000_altivec_abi = 1;
4116 }
4117
4118 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4119 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4120 be explicitly overridden in either case. */
4121 if (TARGET_ELF)
4122 {
4123 if (!global_options_set.x_rs6000_altivec_abi
4124 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4125 {
4126 if (main_target_opt != NULL &&
4127 !main_target_opt->x_rs6000_altivec_abi)
4128 error ("target attribute or pragma changes AltiVec ABI");
4129 else
4130 rs6000_altivec_abi = 1;
4131 }
4132 }
4133
4134 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4135 So far, the only darwin64 targets are also MACH-O. */
4136 if (TARGET_MACHO
4137 && DEFAULT_ABI == ABI_DARWIN
4138 && TARGET_64BIT)
4139 {
4140 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4141 error ("target attribute or pragma changes darwin64 ABI");
4142 else
4143 {
4144 rs6000_darwin64_abi = 1;
4145 /* Default to natural alignment, for better performance. */
4146 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4147 }
4148 }
4149
4150 /* Place FP constants in the constant pool instead of TOC
4151 if section anchors enabled. */
4152 if (flag_section_anchors
4153 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4154 TARGET_NO_FP_IN_TOC = 1;
4155
4156 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4157 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4158
4159 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4160 SUBTARGET_OVERRIDE_OPTIONS;
4161 #endif
4162 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4163 SUBSUBTARGET_OVERRIDE_OPTIONS;
4164 #endif
4165 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4166 SUB3TARGET_OVERRIDE_OPTIONS;
4167 #endif
4168
4169 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4170 after the subtarget override options are done. */
4171 if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM)
4172 {
4173 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4174 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4175
4176 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4177 }
4178
4179 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4180 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4181
4182 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4183 && rs6000_tune != PROCESSOR_POWER5
4184 && rs6000_tune != PROCESSOR_POWER6
4185 && rs6000_tune != PROCESSOR_POWER7
4186 && rs6000_tune != PROCESSOR_POWER8
4187 && rs6000_tune != PROCESSOR_POWER9
4188 && rs6000_tune != PROCESSOR_FUTURE
4189 && rs6000_tune != PROCESSOR_PPCA2
4190 && rs6000_tune != PROCESSOR_CELL
4191 && rs6000_tune != PROCESSOR_PPC476);
4192 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4193 || rs6000_tune == PROCESSOR_POWER5
4194 || rs6000_tune == PROCESSOR_POWER7
4195 || rs6000_tune == PROCESSOR_POWER8);
4196 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4197 || rs6000_tune == PROCESSOR_POWER5
4198 || rs6000_tune == PROCESSOR_POWER6
4199 || rs6000_tune == PROCESSOR_POWER7
4200 || rs6000_tune == PROCESSOR_POWER8
4201 || rs6000_tune == PROCESSOR_POWER9
4202 || rs6000_tune == PROCESSOR_FUTURE
4203 || rs6000_tune == PROCESSOR_PPCE500MC
4204 || rs6000_tune == PROCESSOR_PPCE500MC64
4205 || rs6000_tune == PROCESSOR_PPCE5500
4206 || rs6000_tune == PROCESSOR_PPCE6500);
4207
4208 /* Allow debug switches to override the above settings. These are set to -1
4209 in rs6000.opt to indicate the user hasn't directly set the switch. */
4210 if (TARGET_ALWAYS_HINT >= 0)
4211 rs6000_always_hint = TARGET_ALWAYS_HINT;
4212
4213 if (TARGET_SCHED_GROUPS >= 0)
4214 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4215
4216 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4217 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4218
4219 rs6000_sched_restricted_insns_priority
4220 = (rs6000_sched_groups ? 1 : 0);
4221
4222 /* Handle -msched-costly-dep option. */
4223 rs6000_sched_costly_dep
4224 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4225
4226 if (rs6000_sched_costly_dep_str)
4227 {
4228 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4229 rs6000_sched_costly_dep = no_dep_costly;
4230 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4231 rs6000_sched_costly_dep = all_deps_costly;
4232 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4233 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4234 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4235 rs6000_sched_costly_dep = store_to_load_dep_costly;
4236 else
4237 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4238 atoi (rs6000_sched_costly_dep_str));
4239 }
4240
4241 /* Handle -minsert-sched-nops option. */
4242 rs6000_sched_insert_nops
4243 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4244
4245 if (rs6000_sched_insert_nops_str)
4246 {
4247 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4248 rs6000_sched_insert_nops = sched_finish_none;
4249 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4250 rs6000_sched_insert_nops = sched_finish_pad_groups;
4251 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4252 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4253 else
4254 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4255 atoi (rs6000_sched_insert_nops_str));
4256 }
4257
4258 /* Handle stack protector */
4259 if (!global_options_set.x_rs6000_stack_protector_guard)
4260 #ifdef TARGET_THREAD_SSP_OFFSET
4261 rs6000_stack_protector_guard = SSP_TLS;
4262 #else
4263 rs6000_stack_protector_guard = SSP_GLOBAL;
4264 #endif
4265
4266 #ifdef TARGET_THREAD_SSP_OFFSET
4267 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4268 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4269 #endif
4270
4271 if (global_options_set.x_rs6000_stack_protector_guard_offset_str)
4272 {
4273 char *endp;
4274 const char *str = rs6000_stack_protector_guard_offset_str;
4275
4276 errno = 0;
4277 long offset = strtol (str, &endp, 0);
4278 if (!*str || *endp || errno)
4279 error ("%qs is not a valid number in %qs", str,
4280 "-mstack-protector-guard-offset=");
4281
4282 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4283 || (TARGET_64BIT && (offset & 3)))
4284 error ("%qs is not a valid offset in %qs", str,
4285 "-mstack-protector-guard-offset=");
4286
4287 rs6000_stack_protector_guard_offset = offset;
4288 }
4289
4290 if (global_options_set.x_rs6000_stack_protector_guard_reg_str)
4291 {
4292 const char *str = rs6000_stack_protector_guard_reg_str;
4293 int reg = decode_reg_name (str);
4294
4295 if (!IN_RANGE (reg, 1, 31))
4296 error ("%qs is not a valid base register in %qs", str,
4297 "-mstack-protector-guard-reg=");
4298
4299 rs6000_stack_protector_guard_reg = reg;
4300 }
4301
4302 if (rs6000_stack_protector_guard == SSP_TLS
4303 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4304 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4305
4306 if (global_init_p)
4307 {
4308 #ifdef TARGET_REGNAMES
4309 /* If the user desires alternate register names, copy in the
4310 alternate names now. */
4311 if (TARGET_REGNAMES)
4312 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4313 #endif
4314
4315 /* Set aix_struct_return last, after the ABI is determined.
4316 If -maix-struct-return or -msvr4-struct-return was explicitly
4317 used, don't override with the ABI default. */
4318 if (!global_options_set.x_aix_struct_return)
4319 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4320
4321 #if 0
4322 /* IBM XL compiler defaults to unsigned bitfields. */
4323 if (TARGET_XL_COMPAT)
4324 flag_signed_bitfields = 0;
4325 #endif
4326
4327 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4328 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4329
4330 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4331
4332 /* We can only guarantee the availability of DI pseudo-ops when
4333 assembling for 64-bit targets. */
4334 if (!TARGET_64BIT)
4335 {
4336 targetm.asm_out.aligned_op.di = NULL;
4337 targetm.asm_out.unaligned_op.di = NULL;
4338 }
4339
4340
4341 /* Set branch target alignment, if not optimizing for size. */
4342 if (!optimize_size)
4343 {
4344 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4345 aligned 8byte to avoid misprediction by the branch predictor. */
4346 if (rs6000_tune == PROCESSOR_TITAN
4347 || rs6000_tune == PROCESSOR_CELL)
4348 {
4349 if (flag_align_functions && !str_align_functions)
4350 str_align_functions = "8";
4351 if (flag_align_jumps && !str_align_jumps)
4352 str_align_jumps = "8";
4353 if (flag_align_loops && !str_align_loops)
4354 str_align_loops = "8";
4355 }
4356 if (rs6000_align_branch_targets)
4357 {
4358 if (flag_align_functions && !str_align_functions)
4359 str_align_functions = "16";
4360 if (flag_align_jumps && !str_align_jumps)
4361 str_align_jumps = "16";
4362 if (flag_align_loops && !str_align_loops)
4363 {
4364 can_override_loop_align = 1;
4365 str_align_loops = "16";
4366 }
4367 }
4368
4369 if (flag_align_jumps && !str_align_jumps)
4370 str_align_jumps = "16";
4371 if (flag_align_loops && !str_align_loops)
4372 str_align_loops = "16";
4373 }
4374
4375 /* Arrange to save and restore machine status around nested functions. */
4376 init_machine_status = rs6000_init_machine_status;
4377
4378 /* We should always be splitting complex arguments, but we can't break
4379 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4380 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4381 targetm.calls.split_complex_arg = NULL;
4382
4383 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4384 if (DEFAULT_ABI == ABI_AIX)
4385 targetm.calls.custom_function_descriptors = 0;
4386 }
4387
4388 /* Initialize rs6000_cost with the appropriate target costs. */
4389 if (optimize_size)
4390 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4391 else
4392 switch (rs6000_tune)
4393 {
4394 case PROCESSOR_RS64A:
4395 rs6000_cost = &rs64a_cost;
4396 break;
4397
4398 case PROCESSOR_MPCCORE:
4399 rs6000_cost = &mpccore_cost;
4400 break;
4401
4402 case PROCESSOR_PPC403:
4403 rs6000_cost = &ppc403_cost;
4404 break;
4405
4406 case PROCESSOR_PPC405:
4407 rs6000_cost = &ppc405_cost;
4408 break;
4409
4410 case PROCESSOR_PPC440:
4411 rs6000_cost = &ppc440_cost;
4412 break;
4413
4414 case PROCESSOR_PPC476:
4415 rs6000_cost = &ppc476_cost;
4416 break;
4417
4418 case PROCESSOR_PPC601:
4419 rs6000_cost = &ppc601_cost;
4420 break;
4421
4422 case PROCESSOR_PPC603:
4423 rs6000_cost = &ppc603_cost;
4424 break;
4425
4426 case PROCESSOR_PPC604:
4427 rs6000_cost = &ppc604_cost;
4428 break;
4429
4430 case PROCESSOR_PPC604e:
4431 rs6000_cost = &ppc604e_cost;
4432 break;
4433
4434 case PROCESSOR_PPC620:
4435 rs6000_cost = &ppc620_cost;
4436 break;
4437
4438 case PROCESSOR_PPC630:
4439 rs6000_cost = &ppc630_cost;
4440 break;
4441
4442 case PROCESSOR_CELL:
4443 rs6000_cost = &ppccell_cost;
4444 break;
4445
4446 case PROCESSOR_PPC750:
4447 case PROCESSOR_PPC7400:
4448 rs6000_cost = &ppc750_cost;
4449 break;
4450
4451 case PROCESSOR_PPC7450:
4452 rs6000_cost = &ppc7450_cost;
4453 break;
4454
4455 case PROCESSOR_PPC8540:
4456 case PROCESSOR_PPC8548:
4457 rs6000_cost = &ppc8540_cost;
4458 break;
4459
4460 case PROCESSOR_PPCE300C2:
4461 case PROCESSOR_PPCE300C3:
4462 rs6000_cost = &ppce300c2c3_cost;
4463 break;
4464
4465 case PROCESSOR_PPCE500MC:
4466 rs6000_cost = &ppce500mc_cost;
4467 break;
4468
4469 case PROCESSOR_PPCE500MC64:
4470 rs6000_cost = &ppce500mc64_cost;
4471 break;
4472
4473 case PROCESSOR_PPCE5500:
4474 rs6000_cost = &ppce5500_cost;
4475 break;
4476
4477 case PROCESSOR_PPCE6500:
4478 rs6000_cost = &ppce6500_cost;
4479 break;
4480
4481 case PROCESSOR_TITAN:
4482 rs6000_cost = &titan_cost;
4483 break;
4484
4485 case PROCESSOR_POWER4:
4486 case PROCESSOR_POWER5:
4487 rs6000_cost = &power4_cost;
4488 break;
4489
4490 case PROCESSOR_POWER6:
4491 rs6000_cost = &power6_cost;
4492 break;
4493
4494 case PROCESSOR_POWER7:
4495 rs6000_cost = &power7_cost;
4496 break;
4497
4498 case PROCESSOR_POWER8:
4499 rs6000_cost = &power8_cost;
4500 break;
4501
4502 case PROCESSOR_POWER9:
4503 case PROCESSOR_FUTURE:
4504 rs6000_cost = &power9_cost;
4505 break;
4506
4507 case PROCESSOR_PPCA2:
4508 rs6000_cost = &ppca2_cost;
4509 break;
4510
4511 default:
4512 gcc_unreachable ();
4513 }
4514
4515 if (global_init_p)
4516 {
4517 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4518 param_simultaneous_prefetches,
4519 rs6000_cost->simultaneous_prefetches);
4520 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4521 param_l1_cache_size,
4522 rs6000_cost->l1_cache_size);
4523 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4524 param_l1_cache_line_size,
4525 rs6000_cost->cache_line_size);
4526 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4527 param_l2_cache_size,
4528 rs6000_cost->l2_cache_size);
4529
4530 /* Increase loop peeling limits based on performance analysis. */
4531 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4532 param_max_peeled_insns, 400);
4533 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4534 param_max_completely_peeled_insns, 400);
4535
4536 /* Use the 'model' -fsched-pressure algorithm by default. */
4537 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4538 param_sched_pressure_algorithm,
4539 SCHED_PRESSURE_MODEL);
4540
4541 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
4542 turns -fweb and -frename-registers on. */
4543 if ((global_options_set.x_flag_unroll_loops && flag_unroll_loops)
4544 || (global_options_set.x_flag_unroll_all_loops
4545 && flag_unroll_all_loops))
4546 {
4547 if (!global_options_set.x_unroll_only_small_loops)
4548 unroll_only_small_loops = 0;
4549 if (!global_options_set.x_flag_rename_registers)
4550 flag_rename_registers = 1;
4551 if (!global_options_set.x_flag_web)
4552 flag_web = 1;
4553 }
4554
4555 /* If using typedef char *va_list, signal that
4556 __builtin_va_start (&ap, 0) can be optimized to
4557 ap = __builtin_next_arg (0). */
4558 if (DEFAULT_ABI != ABI_V4)
4559 targetm.expand_builtin_va_start = NULL;
4560 }
4561
4562 /* If not explicitly specified via option, decide whether to generate indexed
4563 load/store instructions. A value of -1 indicates that the
4564 initial value of this variable has not been overwritten. During
4565 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4566 if (TARGET_AVOID_XFORM == -1)
4567 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4568 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4569 need indexed accesses and the type used is the scalar type of the element
4570 being loaded or stored. */
4571 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4572 && !TARGET_ALTIVEC);
4573
4574 /* Set the -mrecip options. */
4575 if (rs6000_recip_name)
4576 {
4577 char *p = ASTRDUP (rs6000_recip_name);
4578 char *q;
4579 unsigned int mask, i;
4580 bool invert;
4581
4582 while ((q = strtok (p, ",")) != NULL)
4583 {
4584 p = NULL;
4585 if (*q == '!')
4586 {
4587 invert = true;
4588 q++;
4589 }
4590 else
4591 invert = false;
4592
4593 if (!strcmp (q, "default"))
4594 mask = ((TARGET_RECIP_PRECISION)
4595 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4596 else
4597 {
4598 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4599 if (!strcmp (q, recip_options[i].string))
4600 {
4601 mask = recip_options[i].mask;
4602 break;
4603 }
4604
4605 if (i == ARRAY_SIZE (recip_options))
4606 {
4607 error ("unknown option for %<%s=%s%>", "-mrecip", q);
4608 invert = false;
4609 mask = 0;
4610 ret = false;
4611 }
4612 }
4613
4614 if (invert)
4615 rs6000_recip_control &= ~mask;
4616 else
4617 rs6000_recip_control |= mask;
4618 }
4619 }
4620
4621 /* Set the builtin mask of the various options used that could affect which
4622 builtins were used. In the past we used target_flags, but we've run out
4623 of bits, and some options are no longer in target_flags. */
4624 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4625 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4626 rs6000_print_builtin_options (stderr, 0, "builtin mask",
4627 rs6000_builtin_mask);
4628
4629 /* Initialize all of the registers. */
4630 rs6000_init_hard_regno_mode_ok (global_init_p);
4631
4632 /* Save the initial options in case the user does function specific options */
4633 if (global_init_p)
4634 target_option_default_node = target_option_current_node
4635 = build_target_option_node (&global_options);
4636
4637 /* If not explicitly specified via option, decide whether to generate the
4638 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4639 if (TARGET_LINK_STACK == -1)
4640 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
4641
4642 /* Deprecate use of -mno-speculate-indirect-jumps. */
4643 if (!rs6000_speculate_indirect_jumps)
4644 warning (0, "%qs is deprecated and not recommended in any circumstances",
4645 "-mno-speculate-indirect-jumps");
4646
4647 return ret;
4648 }
4649
4650 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4651 define the target cpu type. */
4652
4653 static void
4654 rs6000_option_override (void)
4655 {
4656 (void) rs6000_option_override_internal (true);
4657 }
4658
4659 \f
4660 /* Implement targetm.vectorize.builtin_mask_for_load. */
4661 static tree
4662 rs6000_builtin_mask_for_load (void)
4663 {
4664 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
4665 if ((TARGET_ALTIVEC && !TARGET_VSX)
4666 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
4667 return altivec_builtin_mask_for_load;
4668 else
4669 return 0;
4670 }
4671
4672 /* Implement LOOP_ALIGN. */
4673 align_flags
4674 rs6000_loop_align (rtx label)
4675 {
4676 basic_block bb;
4677 int ninsns;
4678
4679 /* Don't override loop alignment if -falign-loops was specified. */
4680 if (!can_override_loop_align)
4681 return align_loops;
4682
4683 bb = BLOCK_FOR_INSN (label);
4684 ninsns = num_loop_insns(bb->loop_father);
4685
4686 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4687 if (ninsns > 4 && ninsns <= 8
4688 && (rs6000_tune == PROCESSOR_POWER4
4689 || rs6000_tune == PROCESSOR_POWER5
4690 || rs6000_tune == PROCESSOR_POWER6
4691 || rs6000_tune == PROCESSOR_POWER7
4692 || rs6000_tune == PROCESSOR_POWER8))
4693 return align_flags (5);
4694 else
4695 return align_loops;
4696 }
4697
4698 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4699 after applying N number of iterations. This routine does not determine
4700 how may iterations are required to reach desired alignment. */
4701
4702 static bool
4703 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4704 {
4705 if (is_packed)
4706 return false;
4707
4708 if (TARGET_32BIT)
4709 {
4710 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4711 return true;
4712
4713 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4714 return true;
4715
4716 return false;
4717 }
4718 else
4719 {
4720 if (TARGET_MACHO)
4721 return false;
4722
4723 /* Assuming that all other types are naturally aligned. CHECKME! */
4724 return true;
4725 }
4726 }
4727
4728 /* Return true if the vector misalignment factor is supported by the
4729 target. */
4730 static bool
4731 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4732 const_tree type,
4733 int misalignment,
4734 bool is_packed)
4735 {
4736 if (TARGET_VSX)
4737 {
4738 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4739 return true;
4740
4741 /* Return if movmisalign pattern is not supported for this mode. */
4742 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4743 return false;
4744
4745 if (misalignment == -1)
4746 {
4747 /* Misalignment factor is unknown at compile time but we know
4748 it's word aligned. */
4749 if (rs6000_vector_alignment_reachable (type, is_packed))
4750 {
4751 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4752
4753 if (element_size == 64 || element_size == 32)
4754 return true;
4755 }
4756
4757 return false;
4758 }
4759
4760 /* VSX supports word-aligned vector. */
4761 if (misalignment % 4 == 0)
4762 return true;
4763 }
4764 return false;
4765 }
4766
4767 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4768 static int
4769 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4770 tree vectype, int misalign)
4771 {
4772 unsigned elements;
4773 tree elem_type;
4774
4775 switch (type_of_cost)
4776 {
4777 case scalar_stmt:
4778 case scalar_store:
4779 case vector_stmt:
4780 case vector_store:
4781 case vec_to_scalar:
4782 case scalar_to_vec:
4783 case cond_branch_not_taken:
4784 return 1;
4785 case scalar_load:
4786 case vector_load:
4787 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4788 return 2;
4789
4790 case vec_perm:
4791 /* Power7 has only one permute unit, make it a bit expensive. */
4792 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
4793 return 3;
4794 else
4795 return 1;
4796
4797 case vec_promote_demote:
4798 /* Power7 has only one permute/pack unit, make it a bit expensive. */
4799 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
4800 return 4;
4801 else
4802 return 1;
4803
4804 case cond_branch_taken:
4805 return 3;
4806
4807 case unaligned_load:
4808 case vector_gather_load:
4809 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4810 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4811 return 2;
4812
4813 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4814 {
4815 elements = TYPE_VECTOR_SUBPARTS (vectype);
4816 if (elements == 2)
4817 /* Double word aligned. */
4818 return 4;
4819
4820 if (elements == 4)
4821 {
4822 switch (misalign)
4823 {
4824 case 8:
4825 /* Double word aligned. */
4826 return 4;
4827
4828 case -1:
4829 /* Unknown misalignment. */
4830 case 4:
4831 case 12:
4832 /* Word aligned. */
4833 return 33;
4834
4835 default:
4836 gcc_unreachable ();
4837 }
4838 }
4839 }
4840
4841 if (TARGET_ALTIVEC)
4842 /* Misaligned loads are not supported. */
4843 gcc_unreachable ();
4844
4845 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4846 return 4;
4847
4848 case unaligned_store:
4849 case vector_scatter_store:
4850 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4851 return 1;
4852
4853 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4854 {
4855 elements = TYPE_VECTOR_SUBPARTS (vectype);
4856 if (elements == 2)
4857 /* Double word aligned. */
4858 return 2;
4859
4860 if (elements == 4)
4861 {
4862 switch (misalign)
4863 {
4864 case 8:
4865 /* Double word aligned. */
4866 return 2;
4867
4868 case -1:
4869 /* Unknown misalignment. */
4870 case 4:
4871 case 12:
4872 /* Word aligned. */
4873 return 23;
4874
4875 default:
4876 gcc_unreachable ();
4877 }
4878 }
4879 }
4880
4881 if (TARGET_ALTIVEC)
4882 /* Misaligned stores are not supported. */
4883 gcc_unreachable ();
4884
4885 return 2;
4886
4887 case vec_construct:
4888 /* This is a rough approximation assuming non-constant elements
4889 constructed into a vector via element insertion. FIXME:
4890 vec_construct is not granular enough for uniformly good
4891 decisions. If the initialization is a splat, this is
4892 cheaper than we estimate. Improve this someday. */
4893 elem_type = TREE_TYPE (vectype);
4894 /* 32-bit vectors loaded into registers are stored as double
4895 precision, so we need 2 permutes, 2 converts, and 1 merge
4896 to construct a vector of short floats from them. */
4897 if (SCALAR_FLOAT_TYPE_P (elem_type)
4898 && TYPE_PRECISION (elem_type) == 32)
4899 return 5;
4900 /* On POWER9, integer vector types are built up in GPRs and then
4901 use a direct move (2 cycles). For POWER8 this is even worse,
4902 as we need two direct moves and a merge, and the direct moves
4903 are five cycles. */
4904 else if (INTEGRAL_TYPE_P (elem_type))
4905 {
4906 if (TARGET_P9_VECTOR)
4907 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
4908 else
4909 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
4910 }
4911 else
4912 /* V2DFmode doesn't need a direct move. */
4913 return 2;
4914
4915 default:
4916 gcc_unreachable ();
4917 }
4918 }
4919
4920 /* Implement targetm.vectorize.preferred_simd_mode. */
4921
4922 static machine_mode
4923 rs6000_preferred_simd_mode (scalar_mode mode)
4924 {
4925 opt_machine_mode vmode = mode_for_vector (mode, 16 / GET_MODE_SIZE (mode));
4926
4927 if (vmode.exists () && !VECTOR_MEM_NONE_P (vmode.require ()))
4928 return vmode.require ();
4929
4930 return word_mode;
4931 }
4932
4933 typedef struct _rs6000_cost_data
4934 {
4935 struct loop *loop_info;
4936 unsigned cost[3];
4937 } rs6000_cost_data;
4938
4939 /* Test for likely overcommitment of vector hardware resources. If a
4940 loop iteration is relatively large, and too large a percentage of
4941 instructions in the loop are vectorized, the cost model may not
4942 adequately reflect delays from unavailable vector resources.
4943 Penalize the loop body cost for this case. */
4944
4945 static void
4946 rs6000_density_test (rs6000_cost_data *data)
4947 {
4948 const int DENSITY_PCT_THRESHOLD = 85;
4949 const int DENSITY_SIZE_THRESHOLD = 70;
4950 const int DENSITY_PENALTY = 10;
4951 struct loop *loop = data->loop_info;
4952 basic_block *bbs = get_loop_body (loop);
4953 int nbbs = loop->num_nodes;
4954 loop_vec_info loop_vinfo = loop_vec_info_for_loop (data->loop_info);
4955 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
4956 int i, density_pct;
4957
4958 for (i = 0; i < nbbs; i++)
4959 {
4960 basic_block bb = bbs[i];
4961 gimple_stmt_iterator gsi;
4962
4963 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4964 {
4965 gimple *stmt = gsi_stmt (gsi);
4966 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
4967
4968 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4969 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
4970 not_vec_cost++;
4971 }
4972 }
4973
4974 free (bbs);
4975 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
4976
4977 if (density_pct > DENSITY_PCT_THRESHOLD
4978 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
4979 {
4980 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
4981 if (dump_enabled_p ())
4982 dump_printf_loc (MSG_NOTE, vect_location,
4983 "density %d%%, cost %d exceeds threshold, penalizing "
4984 "loop body cost by %d%%", density_pct,
4985 vec_cost + not_vec_cost, DENSITY_PENALTY);
4986 }
4987 }
4988
4989 /* Implement targetm.vectorize.init_cost. */
4990
4991 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
4992 instruction is needed by the vectorization. */
4993 static bool rs6000_vect_nonmem;
4994
4995 static void *
4996 rs6000_init_cost (struct loop *loop_info)
4997 {
4998 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
4999 data->loop_info = loop_info;
5000 data->cost[vect_prologue] = 0;
5001 data->cost[vect_body] = 0;
5002 data->cost[vect_epilogue] = 0;
5003 rs6000_vect_nonmem = false;
5004 return data;
5005 }
5006
5007 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5008 For some statement, we would like to further fine-grain tweak the cost on
5009 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5010 information on statement operation codes etc. One typical case here is
5011 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5012 for scalar cost, but it should be priced more whatever transformed to either
5013 compare + branch or compare + isel instructions. */
5014
5015 static unsigned
5016 adjust_vectorization_cost (enum vect_cost_for_stmt kind,
5017 struct _stmt_vec_info *stmt_info)
5018 {
5019 if (kind == scalar_stmt && stmt_info && stmt_info->stmt
5020 && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
5021 {
5022 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
5023 if (subcode == COND_EXPR)
5024 return 2;
5025 }
5026
5027 return 0;
5028 }
5029
5030 /* Implement targetm.vectorize.add_stmt_cost. */
5031
5032 static unsigned
5033 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5034 struct _stmt_vec_info *stmt_info, int misalign,
5035 enum vect_cost_model_location where)
5036 {
5037 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5038 unsigned retval = 0;
5039
5040 if (flag_vect_cost_model)
5041 {
5042 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5043 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5044 misalign);
5045 stmt_cost += adjust_vectorization_cost (kind, stmt_info);
5046 /* Statements in an inner loop relative to the loop being
5047 vectorized are weighted more heavily. The value here is
5048 arbitrary and could potentially be improved with analysis. */
5049 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5050 count *= 50; /* FIXME. */
5051
5052 retval = (unsigned) (count * stmt_cost);
5053 cost_data->cost[where] += retval;
5054
5055 /* Check whether we're doing something other than just a copy loop.
5056 Not all such loops may be profitably vectorized; see
5057 rs6000_finish_cost. */
5058 if ((kind == vec_to_scalar || kind == vec_perm
5059 || kind == vec_promote_demote || kind == vec_construct
5060 || kind == scalar_to_vec)
5061 || (where == vect_body && kind == vector_stmt))
5062 rs6000_vect_nonmem = true;
5063 }
5064
5065 return retval;
5066 }
5067
5068 /* Implement targetm.vectorize.finish_cost. */
5069
5070 static void
5071 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5072 unsigned *body_cost, unsigned *epilogue_cost)
5073 {
5074 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5075
5076 if (cost_data->loop_info)
5077 rs6000_density_test (cost_data);
5078
5079 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5080 that require versioning for any reason. The vectorization is at
5081 best a wash inside the loop, and the versioning checks make
5082 profitability highly unlikely and potentially quite harmful. */
5083 if (cost_data->loop_info)
5084 {
5085 loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
5086 if (!rs6000_vect_nonmem
5087 && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
5088 && LOOP_REQUIRES_VERSIONING (vec_info))
5089 cost_data->cost[vect_body] += 10000;
5090 }
5091
5092 *prologue_cost = cost_data->cost[vect_prologue];
5093 *body_cost = cost_data->cost[vect_body];
5094 *epilogue_cost = cost_data->cost[vect_epilogue];
5095 }
5096
5097 /* Implement targetm.vectorize.destroy_cost_data. */
5098
5099 static void
5100 rs6000_destroy_cost_data (void *data)
5101 {
5102 free (data);
5103 }
5104
5105 /* Implement targetm.loop_unroll_adjust. */
5106
5107 static unsigned
5108 rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
5109 {
5110 if (unroll_only_small_loops)
5111 {
5112 /* TODO: This is hardcoded to 10 right now. It can be refined, for
5113 example we may want to unroll very small loops more times (4 perhaps).
5114 We also should use a PARAM for this. */
5115 if (loop->ninsns <= 10)
5116 return MIN (2, nunroll);
5117 else
5118 return 0;
5119 }
5120
5121 return nunroll;
5122 }
5123
5124 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5125 library with vectorized intrinsics. */
5126
5127 static tree
5128 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5129 tree type_in)
5130 {
5131 char name[32];
5132 const char *suffix = NULL;
5133 tree fntype, new_fndecl, bdecl = NULL_TREE;
5134 int n_args = 1;
5135 const char *bname;
5136 machine_mode el_mode, in_mode;
5137 int n, in_n;
5138
5139 /* Libmass is suitable for unsafe math only as it does not correctly support
5140 parts of IEEE with the required precision such as denormals. Only support
5141 it if we have VSX to use the simd d2 or f4 functions.
5142 XXX: Add variable length support. */
5143 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5144 return NULL_TREE;
5145
5146 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5147 n = TYPE_VECTOR_SUBPARTS (type_out);
5148 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5149 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5150 if (el_mode != in_mode
5151 || n != in_n)
5152 return NULL_TREE;
5153
5154 switch (fn)
5155 {
5156 CASE_CFN_ATAN2:
5157 CASE_CFN_HYPOT:
5158 CASE_CFN_POW:
5159 n_args = 2;
5160 gcc_fallthrough ();
5161
5162 CASE_CFN_ACOS:
5163 CASE_CFN_ACOSH:
5164 CASE_CFN_ASIN:
5165 CASE_CFN_ASINH:
5166 CASE_CFN_ATAN:
5167 CASE_CFN_ATANH:
5168 CASE_CFN_CBRT:
5169 CASE_CFN_COS:
5170 CASE_CFN_COSH:
5171 CASE_CFN_ERF:
5172 CASE_CFN_ERFC:
5173 CASE_CFN_EXP2:
5174 CASE_CFN_EXP:
5175 CASE_CFN_EXPM1:
5176 CASE_CFN_LGAMMA:
5177 CASE_CFN_LOG10:
5178 CASE_CFN_LOG1P:
5179 CASE_CFN_LOG2:
5180 CASE_CFN_LOG:
5181 CASE_CFN_SIN:
5182 CASE_CFN_SINH:
5183 CASE_CFN_SQRT:
5184 CASE_CFN_TAN:
5185 CASE_CFN_TANH:
5186 if (el_mode == DFmode && n == 2)
5187 {
5188 bdecl = mathfn_built_in (double_type_node, fn);
5189 suffix = "d2"; /* pow -> powd2 */
5190 }
5191 else if (el_mode == SFmode && n == 4)
5192 {
5193 bdecl = mathfn_built_in (float_type_node, fn);
5194 suffix = "4"; /* powf -> powf4 */
5195 }
5196 else
5197 return NULL_TREE;
5198 if (!bdecl)
5199 return NULL_TREE;
5200 break;
5201
5202 default:
5203 return NULL_TREE;
5204 }
5205
5206 gcc_assert (suffix != NULL);
5207 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5208 if (!bname)
5209 return NULL_TREE;
5210
5211 strcpy (name, bname + sizeof ("__builtin_") - 1);
5212 strcat (name, suffix);
5213
5214 if (n_args == 1)
5215 fntype = build_function_type_list (type_out, type_in, NULL);
5216 else if (n_args == 2)
5217 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5218 else
5219 gcc_unreachable ();
5220
5221 /* Build a function declaration for the vectorized function. */
5222 new_fndecl = build_decl (BUILTINS_LOCATION,
5223 FUNCTION_DECL, get_identifier (name), fntype);
5224 TREE_PUBLIC (new_fndecl) = 1;
5225 DECL_EXTERNAL (new_fndecl) = 1;
5226 DECL_IS_NOVOPS (new_fndecl) = 1;
5227 TREE_READONLY (new_fndecl) = 1;
5228
5229 return new_fndecl;
5230 }
5231
5232 /* Returns a function decl for a vectorized version of the builtin function
5233 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5234 if it is not available. */
5235
5236 static tree
5237 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5238 tree type_in)
5239 {
5240 machine_mode in_mode, out_mode;
5241 int in_n, out_n;
5242
5243 if (TARGET_DEBUG_BUILTIN)
5244 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5245 combined_fn_name (combined_fn (fn)),
5246 GET_MODE_NAME (TYPE_MODE (type_out)),
5247 GET_MODE_NAME (TYPE_MODE (type_in)));
5248
5249 if (TREE_CODE (type_out) != VECTOR_TYPE
5250 || TREE_CODE (type_in) != VECTOR_TYPE)
5251 return NULL_TREE;
5252
5253 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5254 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5255 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5256 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5257
5258 switch (fn)
5259 {
5260 CASE_CFN_COPYSIGN:
5261 if (VECTOR_UNIT_VSX_P (V2DFmode)
5262 && out_mode == DFmode && out_n == 2
5263 && in_mode == DFmode && in_n == 2)
5264 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5265 if (VECTOR_UNIT_VSX_P (V4SFmode)
5266 && out_mode == SFmode && out_n == 4
5267 && in_mode == SFmode && in_n == 4)
5268 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5269 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5270 && out_mode == SFmode && out_n == 4
5271 && in_mode == SFmode && in_n == 4)
5272 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5273 break;
5274 CASE_CFN_CEIL:
5275 if (VECTOR_UNIT_VSX_P (V2DFmode)
5276 && out_mode == DFmode && out_n == 2
5277 && in_mode == DFmode && in_n == 2)
5278 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5279 if (VECTOR_UNIT_VSX_P (V4SFmode)
5280 && out_mode == SFmode && out_n == 4
5281 && in_mode == SFmode && in_n == 4)
5282 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5283 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5284 && out_mode == SFmode && out_n == 4
5285 && in_mode == SFmode && in_n == 4)
5286 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5287 break;
5288 CASE_CFN_FLOOR:
5289 if (VECTOR_UNIT_VSX_P (V2DFmode)
5290 && out_mode == DFmode && out_n == 2
5291 && in_mode == DFmode && in_n == 2)
5292 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5293 if (VECTOR_UNIT_VSX_P (V4SFmode)
5294 && out_mode == SFmode && out_n == 4
5295 && in_mode == SFmode && in_n == 4)
5296 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5297 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5298 && out_mode == SFmode && out_n == 4
5299 && in_mode == SFmode && in_n == 4)
5300 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5301 break;
5302 CASE_CFN_FMA:
5303 if (VECTOR_UNIT_VSX_P (V2DFmode)
5304 && out_mode == DFmode && out_n == 2
5305 && in_mode == DFmode && in_n == 2)
5306 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5307 if (VECTOR_UNIT_VSX_P (V4SFmode)
5308 && out_mode == SFmode && out_n == 4
5309 && in_mode == SFmode && in_n == 4)
5310 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5311 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5312 && out_mode == SFmode && out_n == 4
5313 && in_mode == SFmode && in_n == 4)
5314 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5315 break;
5316 CASE_CFN_TRUNC:
5317 if (VECTOR_UNIT_VSX_P (V2DFmode)
5318 && out_mode == DFmode && out_n == 2
5319 && in_mode == DFmode && in_n == 2)
5320 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5321 if (VECTOR_UNIT_VSX_P (V4SFmode)
5322 && out_mode == SFmode && out_n == 4
5323 && in_mode == SFmode && in_n == 4)
5324 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5325 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5326 && out_mode == SFmode && out_n == 4
5327 && in_mode == SFmode && in_n == 4)
5328 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5329 break;
5330 CASE_CFN_NEARBYINT:
5331 if (VECTOR_UNIT_VSX_P (V2DFmode)
5332 && flag_unsafe_math_optimizations
5333 && out_mode == DFmode && out_n == 2
5334 && in_mode == DFmode && in_n == 2)
5335 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5336 if (VECTOR_UNIT_VSX_P (V4SFmode)
5337 && flag_unsafe_math_optimizations
5338 && out_mode == SFmode && out_n == 4
5339 && in_mode == SFmode && in_n == 4)
5340 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5341 break;
5342 CASE_CFN_RINT:
5343 if (VECTOR_UNIT_VSX_P (V2DFmode)
5344 && !flag_trapping_math
5345 && out_mode == DFmode && out_n == 2
5346 && in_mode == DFmode && in_n == 2)
5347 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5348 if (VECTOR_UNIT_VSX_P (V4SFmode)
5349 && !flag_trapping_math
5350 && out_mode == SFmode && out_n == 4
5351 && in_mode == SFmode && in_n == 4)
5352 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5353 break;
5354 default:
5355 break;
5356 }
5357
5358 /* Generate calls to libmass if appropriate. */
5359 if (rs6000_veclib_handler)
5360 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5361
5362 return NULL_TREE;
5363 }
5364
5365 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5366
5367 static tree
5368 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5369 tree type_in)
5370 {
5371 machine_mode in_mode, out_mode;
5372 int in_n, out_n;
5373
5374 if (TARGET_DEBUG_BUILTIN)
5375 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5376 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5377 GET_MODE_NAME (TYPE_MODE (type_out)),
5378 GET_MODE_NAME (TYPE_MODE (type_in)));
5379
5380 if (TREE_CODE (type_out) != VECTOR_TYPE
5381 || TREE_CODE (type_in) != VECTOR_TYPE)
5382 return NULL_TREE;
5383
5384 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5385 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5386 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5387 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5388
5389 enum rs6000_builtins fn
5390 = (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
5391 switch (fn)
5392 {
5393 case RS6000_BUILTIN_RSQRTF:
5394 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5395 && out_mode == SFmode && out_n == 4
5396 && in_mode == SFmode && in_n == 4)
5397 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5398 break;
5399 case RS6000_BUILTIN_RSQRT:
5400 if (VECTOR_UNIT_VSX_P (V2DFmode)
5401 && out_mode == DFmode && out_n == 2
5402 && in_mode == DFmode && in_n == 2)
5403 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5404 break;
5405 case RS6000_BUILTIN_RECIPF:
5406 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5407 && out_mode == SFmode && out_n == 4
5408 && in_mode == SFmode && in_n == 4)
5409 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5410 break;
5411 case RS6000_BUILTIN_RECIP:
5412 if (VECTOR_UNIT_VSX_P (V2DFmode)
5413 && out_mode == DFmode && out_n == 2
5414 && in_mode == DFmode && in_n == 2)
5415 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5416 break;
5417 default:
5418 break;
5419 }
5420 return NULL_TREE;
5421 }
5422 \f
5423 /* Default CPU string for rs6000*_file_start functions. */
5424 static const char *rs6000_default_cpu;
5425
5426 #ifdef USING_ELFOS_H
5427 const char *rs6000_machine;
5428
5429 const char *
5430 rs6000_machine_from_flags (void)
5431 {
5432 HOST_WIDE_INT flags = rs6000_isa_flags;
5433
5434 /* Disable the flags that should never influence the .machine selection. */
5435 flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT);
5436
5437 if ((flags & (ISA_FUTURE_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER)) != 0)
5438 return "future";
5439 if ((flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0)
5440 return "power9";
5441 if ((flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0)
5442 return "power8";
5443 if ((flags & (ISA_2_6_MASKS_SERVER & ~ISA_2_5_MASKS_SERVER)) != 0)
5444 return "power7";
5445 if ((flags & (ISA_2_5_MASKS_SERVER & ~ISA_2_4_MASKS)) != 0)
5446 return "power6";
5447 if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0)
5448 return "power5";
5449 if ((flags & ISA_2_1_MASKS) != 0)
5450 return "power4";
5451 if ((flags & OPTION_MASK_POWERPC64) != 0)
5452 return "ppc64";
5453 return "ppc";
5454 }
5455
5456 void
5457 emit_asm_machine (void)
5458 {
5459 fprintf (asm_out_file, "\t.machine %s\n", rs6000_machine);
5460 }
5461 #endif
5462
5463 /* Do anything needed at the start of the asm file. */
5464
5465 static void
5466 rs6000_file_start (void)
5467 {
5468 char buffer[80];
5469 const char *start = buffer;
5470 FILE *file = asm_out_file;
5471
5472 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5473
5474 default_file_start ();
5475
5476 if (flag_verbose_asm)
5477 {
5478 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5479
5480 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5481 {
5482 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5483 start = "";
5484 }
5485
5486 if (global_options_set.x_rs6000_cpu_index)
5487 {
5488 fprintf (file, "%s -mcpu=%s", start,
5489 processor_target_table[rs6000_cpu_index].name);
5490 start = "";
5491 }
5492
5493 if (global_options_set.x_rs6000_tune_index)
5494 {
5495 fprintf (file, "%s -mtune=%s", start,
5496 processor_target_table[rs6000_tune_index].name);
5497 start = "";
5498 }
5499
5500 if (PPC405_ERRATUM77)
5501 {
5502 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5503 start = "";
5504 }
5505
5506 #ifdef USING_ELFOS_H
5507 switch (rs6000_sdata)
5508 {
5509 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5510 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5511 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5512 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5513 }
5514
5515 if (rs6000_sdata && g_switch_value)
5516 {
5517 fprintf (file, "%s -G %d", start,
5518 g_switch_value);
5519 start = "";
5520 }
5521 #endif
5522
5523 if (*start == '\0')
5524 putc ('\n', file);
5525 }
5526
5527 #ifdef USING_ELFOS_H
5528 rs6000_machine = rs6000_machine_from_flags ();
5529 emit_asm_machine ();
5530 #endif
5531
5532 if (DEFAULT_ABI == ABI_ELFv2)
5533 fprintf (file, "\t.abiversion 2\n");
5534 }
5535
5536 \f
5537 /* Return nonzero if this function is known to have a null epilogue. */
5538
5539 int
5540 direct_return (void)
5541 {
5542 if (reload_completed)
5543 {
5544 rs6000_stack_t *info = rs6000_stack_info ();
5545
5546 if (info->first_gp_reg_save == 32
5547 && info->first_fp_reg_save == 64
5548 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5549 && ! info->lr_save_p
5550 && ! info->cr_save_p
5551 && info->vrsave_size == 0
5552 && ! info->push_p)
5553 return 1;
5554 }
5555
5556 return 0;
5557 }
5558
5559 /* Helper for num_insns_constant. Calculate number of instructions to
5560 load VALUE to a single gpr using combinations of addi, addis, ori,
5561 oris and sldi instructions. */
5562
5563 static int
5564 num_insns_constant_gpr (HOST_WIDE_INT value)
5565 {
5566 /* signed constant loadable with addi */
5567 if (SIGNED_INTEGER_16BIT_P (value))
5568 return 1;
5569
5570 /* constant loadable with addis */
5571 else if ((value & 0xffff) == 0
5572 && (value >> 31 == -1 || value >> 31 == 0))
5573 return 1;
5574
5575 /* PADDI can support up to 34 bit signed integers. */
5576 else if (TARGET_PREFIXED_ADDR && SIGNED_INTEGER_34BIT_P (value))
5577 return 1;
5578
5579 else if (TARGET_POWERPC64)
5580 {
5581 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5582 HOST_WIDE_INT high = value >> 31;
5583
5584 if (high == 0 || high == -1)
5585 return 2;
5586
5587 high >>= 1;
5588
5589 if (low == 0)
5590 return num_insns_constant_gpr (high) + 1;
5591 else if (high == 0)
5592 return num_insns_constant_gpr (low) + 1;
5593 else
5594 return (num_insns_constant_gpr (high)
5595 + num_insns_constant_gpr (low) + 1);
5596 }
5597
5598 else
5599 return 2;
5600 }
5601
5602 /* Helper for num_insns_constant. Allow constants formed by the
5603 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
5604 and handle modes that require multiple gprs. */
5605
5606 static int
5607 num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode)
5608 {
5609 int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5610 int total = 0;
5611 while (nregs-- > 0)
5612 {
5613 HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD);
5614 int insns = num_insns_constant_gpr (low);
5615 if (insns > 2
5616 /* We won't get more than 2 from num_insns_constant_gpr
5617 except when TARGET_POWERPC64 and mode is DImode or
5618 wider, so the register mode must be DImode. */
5619 && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
5620 insns = 2;
5621 total += insns;
5622 value >>= BITS_PER_WORD;
5623 }
5624 return total;
5625 }
5626
5627 /* Return the number of instructions it takes to form a constant in as
5628 many gprs are needed for MODE. */
5629
5630 int
5631 num_insns_constant (rtx op, machine_mode mode)
5632 {
5633 HOST_WIDE_INT val;
5634
5635 switch (GET_CODE (op))
5636 {
5637 case CONST_INT:
5638 val = INTVAL (op);
5639 break;
5640
5641 case CONST_WIDE_INT:
5642 {
5643 int insns = 0;
5644 for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5645 insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i),
5646 DImode);
5647 return insns;
5648 }
5649
5650 case CONST_DOUBLE:
5651 {
5652 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
5653
5654 if (mode == SFmode || mode == SDmode)
5655 {
5656 long l;
5657
5658 if (mode == SDmode)
5659 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l);
5660 else
5661 REAL_VALUE_TO_TARGET_SINGLE (*rv, l);
5662 /* See the first define_split in rs6000.md handling a
5663 const_double_operand. */
5664 val = l;
5665 mode = SImode;
5666 }
5667 else if (mode == DFmode || mode == DDmode)
5668 {
5669 long l[2];
5670
5671 if (mode == DDmode)
5672 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l);
5673 else
5674 REAL_VALUE_TO_TARGET_DOUBLE (*rv, l);
5675
5676 /* See the second (32-bit) and third (64-bit) define_split
5677 in rs6000.md handling a const_double_operand. */
5678 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32;
5679 val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL;
5680 mode = DImode;
5681 }
5682 else if (mode == TFmode || mode == TDmode
5683 || mode == KFmode || mode == IFmode)
5684 {
5685 long l[4];
5686 int insns;
5687
5688 if (mode == TDmode)
5689 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l);
5690 else
5691 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l);
5692
5693 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32;
5694 val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL;
5695 insns = num_insns_constant_multi (val, DImode);
5696 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32;
5697 val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL;
5698 insns += num_insns_constant_multi (val, DImode);
5699 return insns;
5700 }
5701 else
5702 gcc_unreachable ();
5703 }
5704 break;
5705
5706 default:
5707 gcc_unreachable ();
5708 }
5709
5710 return num_insns_constant_multi (val, mode);
5711 }
5712
5713 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5714 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5715 corresponding element of the vector, but for V4SFmode, the
5716 corresponding "float" is interpreted as an SImode integer. */
5717
5718 HOST_WIDE_INT
5719 const_vector_elt_as_int (rtx op, unsigned int elt)
5720 {
5721 rtx tmp;
5722
5723 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5724 gcc_assert (GET_MODE (op) != V2DImode
5725 && GET_MODE (op) != V2DFmode);
5726
5727 tmp = CONST_VECTOR_ELT (op, elt);
5728 if (GET_MODE (op) == V4SFmode)
5729 tmp = gen_lowpart (SImode, tmp);
5730 return INTVAL (tmp);
5731 }
5732
5733 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5734 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5735 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5736 all items are set to the same value and contain COPIES replicas of the
5737 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5738 operand and the others are set to the value of the operand's msb. */
5739
5740 static bool
5741 vspltis_constant (rtx op, unsigned step, unsigned copies)
5742 {
5743 machine_mode mode = GET_MODE (op);
5744 machine_mode inner = GET_MODE_INNER (mode);
5745
5746 unsigned i;
5747 unsigned nunits;
5748 unsigned bitsize;
5749 unsigned mask;
5750
5751 HOST_WIDE_INT val;
5752 HOST_WIDE_INT splat_val;
5753 HOST_WIDE_INT msb_val;
5754
5755 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
5756 return false;
5757
5758 nunits = GET_MODE_NUNITS (mode);
5759 bitsize = GET_MODE_BITSIZE (inner);
5760 mask = GET_MODE_MASK (inner);
5761
5762 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5763 splat_val = val;
5764 msb_val = val >= 0 ? 0 : -1;
5765
5766 /* Construct the value to be splatted, if possible. If not, return 0. */
5767 for (i = 2; i <= copies; i *= 2)
5768 {
5769 HOST_WIDE_INT small_val;
5770 bitsize /= 2;
5771 small_val = splat_val >> bitsize;
5772 mask >>= bitsize;
5773 if (splat_val != ((HOST_WIDE_INT)
5774 ((unsigned HOST_WIDE_INT) small_val << bitsize)
5775 | (small_val & mask)))
5776 return false;
5777 splat_val = small_val;
5778 }
5779
5780 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
5781 if (EASY_VECTOR_15 (splat_val))
5782 ;
5783
5784 /* Also check if we can splat, and then add the result to itself. Do so if
5785 the value is positive, of if the splat instruction is using OP's mode;
5786 for splat_val < 0, the splat and the add should use the same mode. */
5787 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
5788 && (splat_val >= 0 || (step == 1 && copies == 1)))
5789 ;
5790
5791 /* Also check if are loading up the most significant bit which can be done by
5792 loading up -1 and shifting the value left by -1. */
5793 else if (EASY_VECTOR_MSB (splat_val, inner))
5794 ;
5795
5796 else
5797 return false;
5798
5799 /* Check if VAL is present in every STEP-th element, and the
5800 other elements are filled with its most significant bit. */
5801 for (i = 1; i < nunits; ++i)
5802 {
5803 HOST_WIDE_INT desired_val;
5804 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
5805 if ((i & (step - 1)) == 0)
5806 desired_val = val;
5807 else
5808 desired_val = msb_val;
5809
5810 if (desired_val != const_vector_elt_as_int (op, elt))
5811 return false;
5812 }
5813
5814 return true;
5815 }
5816
5817 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
5818 instruction, filling in the bottom elements with 0 or -1.
5819
5820 Return 0 if the constant cannot be generated with VSLDOI. Return positive
5821 for the number of zeroes to shift in, or negative for the number of 0xff
5822 bytes to shift in.
5823
5824 OP is a CONST_VECTOR. */
5825
5826 int
5827 vspltis_shifted (rtx op)
5828 {
5829 machine_mode mode = GET_MODE (op);
5830 machine_mode inner = GET_MODE_INNER (mode);
5831
5832 unsigned i, j;
5833 unsigned nunits;
5834 unsigned mask;
5835
5836 HOST_WIDE_INT val;
5837
5838 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
5839 return false;
5840
5841 /* We need to create pseudo registers to do the shift, so don't recognize
5842 shift vector constants after reload. */
5843 if (!can_create_pseudo_p ())
5844 return false;
5845
5846 nunits = GET_MODE_NUNITS (mode);
5847 mask = GET_MODE_MASK (inner);
5848
5849 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
5850
5851 /* Check if the value can really be the operand of a vspltis[bhw]. */
5852 if (EASY_VECTOR_15 (val))
5853 ;
5854
5855 /* Also check if we are loading up the most significant bit which can be done
5856 by loading up -1 and shifting the value left by -1. */
5857 else if (EASY_VECTOR_MSB (val, inner))
5858 ;
5859
5860 else
5861 return 0;
5862
5863 /* Check if VAL is present in every STEP-th element until we find elements
5864 that are 0 or all 1 bits. */
5865 for (i = 1; i < nunits; ++i)
5866 {
5867 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
5868 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
5869
5870 /* If the value isn't the splat value, check for the remaining elements
5871 being 0/-1. */
5872 if (val != elt_val)
5873 {
5874 if (elt_val == 0)
5875 {
5876 for (j = i+1; j < nunits; ++j)
5877 {
5878 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
5879 if (const_vector_elt_as_int (op, elt2) != 0)
5880 return 0;
5881 }
5882
5883 return (nunits - i) * GET_MODE_SIZE (inner);
5884 }
5885
5886 else if ((elt_val & mask) == mask)
5887 {
5888 for (j = i+1; j < nunits; ++j)
5889 {
5890 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
5891 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
5892 return 0;
5893 }
5894
5895 return -((nunits - i) * GET_MODE_SIZE (inner));
5896 }
5897
5898 else
5899 return 0;
5900 }
5901 }
5902
5903 /* If all elements are equal, we don't need to do VLSDOI. */
5904 return 0;
5905 }
5906
5907
5908 /* Return true if OP is of the given MODE and can be synthesized
5909 with a vspltisb, vspltish or vspltisw. */
5910
5911 bool
5912 easy_altivec_constant (rtx op, machine_mode mode)
5913 {
5914 unsigned step, copies;
5915
5916 if (mode == VOIDmode)
5917 mode = GET_MODE (op);
5918 else if (mode != GET_MODE (op))
5919 return false;
5920
5921 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
5922 constants. */
5923 if (mode == V2DFmode)
5924 return zero_constant (op, mode);
5925
5926 else if (mode == V2DImode)
5927 {
5928 if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
5929 || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
5930 return false;
5931
5932 if (zero_constant (op, mode))
5933 return true;
5934
5935 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
5936 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
5937 return true;
5938
5939 return false;
5940 }
5941
5942 /* V1TImode is a special container for TImode. Ignore for now. */
5943 else if (mode == V1TImode)
5944 return false;
5945
5946 /* Start with a vspltisw. */
5947 step = GET_MODE_NUNITS (mode) / 4;
5948 copies = 1;
5949
5950 if (vspltis_constant (op, step, copies))
5951 return true;
5952
5953 /* Then try with a vspltish. */
5954 if (step == 1)
5955 copies <<= 1;
5956 else
5957 step >>= 1;
5958
5959 if (vspltis_constant (op, step, copies))
5960 return true;
5961
5962 /* And finally a vspltisb. */
5963 if (step == 1)
5964 copies <<= 1;
5965 else
5966 step >>= 1;
5967
5968 if (vspltis_constant (op, step, copies))
5969 return true;
5970
5971 if (vspltis_shifted (op) != 0)
5972 return true;
5973
5974 return false;
5975 }
5976
5977 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
5978 result is OP. Abort if it is not possible. */
5979
5980 rtx
5981 gen_easy_altivec_constant (rtx op)
5982 {
5983 machine_mode mode = GET_MODE (op);
5984 int nunits = GET_MODE_NUNITS (mode);
5985 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5986 unsigned step = nunits / 4;
5987 unsigned copies = 1;
5988
5989 /* Start with a vspltisw. */
5990 if (vspltis_constant (op, step, copies))
5991 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
5992
5993 /* Then try with a vspltish. */
5994 if (step == 1)
5995 copies <<= 1;
5996 else
5997 step >>= 1;
5998
5999 if (vspltis_constant (op, step, copies))
6000 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6001
6002 /* And finally a vspltisb. */
6003 if (step == 1)
6004 copies <<= 1;
6005 else
6006 step >>= 1;
6007
6008 if (vspltis_constant (op, step, copies))
6009 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6010
6011 gcc_unreachable ();
6012 }
6013
6014 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6015 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6016
6017 Return the number of instructions needed (1 or 2) into the address pointed
6018 via NUM_INSNS_PTR.
6019
6020 Return the constant that is being split via CONSTANT_PTR. */
6021
6022 bool
6023 xxspltib_constant_p (rtx op,
6024 machine_mode mode,
6025 int *num_insns_ptr,
6026 int *constant_ptr)
6027 {
6028 size_t nunits = GET_MODE_NUNITS (mode);
6029 size_t i;
6030 HOST_WIDE_INT value;
6031 rtx element;
6032
6033 /* Set the returned values to out of bound values. */
6034 *num_insns_ptr = -1;
6035 *constant_ptr = 256;
6036
6037 if (!TARGET_P9_VECTOR)
6038 return false;
6039
6040 if (mode == VOIDmode)
6041 mode = GET_MODE (op);
6042
6043 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6044 return false;
6045
6046 /* Handle (vec_duplicate <constant>). */
6047 if (GET_CODE (op) == VEC_DUPLICATE)
6048 {
6049 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6050 && mode != V2DImode)
6051 return false;
6052
6053 element = XEXP (op, 0);
6054 if (!CONST_INT_P (element))
6055 return false;
6056
6057 value = INTVAL (element);
6058 if (!IN_RANGE (value, -128, 127))
6059 return false;
6060 }
6061
6062 /* Handle (const_vector [...]). */
6063 else if (GET_CODE (op) == CONST_VECTOR)
6064 {
6065 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6066 && mode != V2DImode)
6067 return false;
6068
6069 element = CONST_VECTOR_ELT (op, 0);
6070 if (!CONST_INT_P (element))
6071 return false;
6072
6073 value = INTVAL (element);
6074 if (!IN_RANGE (value, -128, 127))
6075 return false;
6076
6077 for (i = 1; i < nunits; i++)
6078 {
6079 element = CONST_VECTOR_ELT (op, i);
6080 if (!CONST_INT_P (element))
6081 return false;
6082
6083 if (value != INTVAL (element))
6084 return false;
6085 }
6086 }
6087
6088 /* Handle integer constants being loaded into the upper part of the VSX
6089 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6090 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6091 else if (CONST_INT_P (op))
6092 {
6093 if (!SCALAR_INT_MODE_P (mode))
6094 return false;
6095
6096 value = INTVAL (op);
6097 if (!IN_RANGE (value, -128, 127))
6098 return false;
6099
6100 if (!IN_RANGE (value, -1, 0))
6101 {
6102 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6103 return false;
6104
6105 if (EASY_VECTOR_15 (value))
6106 return false;
6107 }
6108 }
6109
6110 else
6111 return false;
6112
6113 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6114 sign extend. Special case 0/-1 to allow getting any VSX register instead
6115 of an Altivec register. */
6116 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6117 && EASY_VECTOR_15 (value))
6118 return false;
6119
6120 /* Return # of instructions and the constant byte for XXSPLTIB. */
6121 if (mode == V16QImode)
6122 *num_insns_ptr = 1;
6123
6124 else if (IN_RANGE (value, -1, 0))
6125 *num_insns_ptr = 1;
6126
6127 else
6128 *num_insns_ptr = 2;
6129
6130 *constant_ptr = (int) value;
6131 return true;
6132 }
6133
6134 const char *
6135 output_vec_const_move (rtx *operands)
6136 {
6137 int shift;
6138 machine_mode mode;
6139 rtx dest, vec;
6140
6141 dest = operands[0];
6142 vec = operands[1];
6143 mode = GET_MODE (dest);
6144
6145 if (TARGET_VSX)
6146 {
6147 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6148 int xxspltib_value = 256;
6149 int num_insns = -1;
6150
6151 if (zero_constant (vec, mode))
6152 {
6153 if (TARGET_P9_VECTOR)
6154 return "xxspltib %x0,0";
6155
6156 else if (dest_vmx_p)
6157 return "vspltisw %0,0";
6158
6159 else
6160 return "xxlxor %x0,%x0,%x0";
6161 }
6162
6163 if (all_ones_constant (vec, mode))
6164 {
6165 if (TARGET_P9_VECTOR)
6166 return "xxspltib %x0,255";
6167
6168 else if (dest_vmx_p)
6169 return "vspltisw %0,-1";
6170
6171 else if (TARGET_P8_VECTOR)
6172 return "xxlorc %x0,%x0,%x0";
6173
6174 else
6175 gcc_unreachable ();
6176 }
6177
6178 if (TARGET_P9_VECTOR
6179 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6180 {
6181 if (num_insns == 1)
6182 {
6183 operands[2] = GEN_INT (xxspltib_value & 0xff);
6184 return "xxspltib %x0,%2";
6185 }
6186
6187 return "#";
6188 }
6189 }
6190
6191 if (TARGET_ALTIVEC)
6192 {
6193 rtx splat_vec;
6194
6195 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6196 if (zero_constant (vec, mode))
6197 return "vspltisw %0,0";
6198
6199 if (all_ones_constant (vec, mode))
6200 return "vspltisw %0,-1";
6201
6202 /* Do we need to construct a value using VSLDOI? */
6203 shift = vspltis_shifted (vec);
6204 if (shift != 0)
6205 return "#";
6206
6207 splat_vec = gen_easy_altivec_constant (vec);
6208 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6209 operands[1] = XEXP (splat_vec, 0);
6210 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6211 return "#";
6212
6213 switch (GET_MODE (splat_vec))
6214 {
6215 case E_V4SImode:
6216 return "vspltisw %0,%1";
6217
6218 case E_V8HImode:
6219 return "vspltish %0,%1";
6220
6221 case E_V16QImode:
6222 return "vspltisb %0,%1";
6223
6224 default:
6225 gcc_unreachable ();
6226 }
6227 }
6228
6229 gcc_unreachable ();
6230 }
6231
6232 /* Initialize vector TARGET to VALS. */
6233
6234 void
6235 rs6000_expand_vector_init (rtx target, rtx vals)
6236 {
6237 machine_mode mode = GET_MODE (target);
6238 machine_mode inner_mode = GET_MODE_INNER (mode);
6239 int n_elts = GET_MODE_NUNITS (mode);
6240 int n_var = 0, one_var = -1;
6241 bool all_same = true, all_const_zero = true;
6242 rtx x, mem;
6243 int i;
6244
6245 for (i = 0; i < n_elts; ++i)
6246 {
6247 x = XVECEXP (vals, 0, i);
6248 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6249 ++n_var, one_var = i;
6250 else if (x != CONST0_RTX (inner_mode))
6251 all_const_zero = false;
6252
6253 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6254 all_same = false;
6255 }
6256
6257 if (n_var == 0)
6258 {
6259 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6260 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6261 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6262 {
6263 /* Zero register. */
6264 emit_move_insn (target, CONST0_RTX (mode));
6265 return;
6266 }
6267 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6268 {
6269 /* Splat immediate. */
6270 emit_insn (gen_rtx_SET (target, const_vec));
6271 return;
6272 }
6273 else
6274 {
6275 /* Load from constant pool. */
6276 emit_move_insn (target, const_vec);
6277 return;
6278 }
6279 }
6280
6281 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6282 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6283 {
6284 rtx op[2];
6285 size_t i;
6286 size_t num_elements = all_same ? 1 : 2;
6287 for (i = 0; i < num_elements; i++)
6288 {
6289 op[i] = XVECEXP (vals, 0, i);
6290 /* Just in case there is a SUBREG with a smaller mode, do a
6291 conversion. */
6292 if (GET_MODE (op[i]) != inner_mode)
6293 {
6294 rtx tmp = gen_reg_rtx (inner_mode);
6295 convert_move (tmp, op[i], 0);
6296 op[i] = tmp;
6297 }
6298 /* Allow load with splat double word. */
6299 else if (MEM_P (op[i]))
6300 {
6301 if (!all_same)
6302 op[i] = force_reg (inner_mode, op[i]);
6303 }
6304 else if (!REG_P (op[i]))
6305 op[i] = force_reg (inner_mode, op[i]);
6306 }
6307
6308 if (all_same)
6309 {
6310 if (mode == V2DFmode)
6311 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6312 else
6313 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6314 }
6315 else
6316 {
6317 if (mode == V2DFmode)
6318 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6319 else
6320 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6321 }
6322 return;
6323 }
6324
6325 /* Special case initializing vector int if we are on 64-bit systems with
6326 direct move or we have the ISA 3.0 instructions. */
6327 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6328 && TARGET_DIRECT_MOVE_64BIT)
6329 {
6330 if (all_same)
6331 {
6332 rtx element0 = XVECEXP (vals, 0, 0);
6333 if (MEM_P (element0))
6334 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6335 else
6336 element0 = force_reg (SImode, element0);
6337
6338 if (TARGET_P9_VECTOR)
6339 emit_insn (gen_vsx_splat_v4si (target, element0));
6340 else
6341 {
6342 rtx tmp = gen_reg_rtx (DImode);
6343 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6344 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6345 }
6346 return;
6347 }
6348 else
6349 {
6350 rtx elements[4];
6351 size_t i;
6352
6353 for (i = 0; i < 4; i++)
6354 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
6355
6356 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6357 elements[2], elements[3]));
6358 return;
6359 }
6360 }
6361
6362 /* With single precision floating point on VSX, know that internally single
6363 precision is actually represented as a double, and either make 2 V2DF
6364 vectors, and convert these vectors to single precision, or do one
6365 conversion, and splat the result to the other elements. */
6366 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6367 {
6368 if (all_same)
6369 {
6370 rtx element0 = XVECEXP (vals, 0, 0);
6371
6372 if (TARGET_P9_VECTOR)
6373 {
6374 if (MEM_P (element0))
6375 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6376
6377 emit_insn (gen_vsx_splat_v4sf (target, element0));
6378 }
6379
6380 else
6381 {
6382 rtx freg = gen_reg_rtx (V4SFmode);
6383 rtx sreg = force_reg (SFmode, element0);
6384 rtx cvt = (TARGET_XSCVDPSPN
6385 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6386 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6387
6388 emit_insn (cvt);
6389 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6390 const0_rtx));
6391 }
6392 }
6393 else
6394 {
6395 rtx dbl_even = gen_reg_rtx (V2DFmode);
6396 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6397 rtx flt_even = gen_reg_rtx (V4SFmode);
6398 rtx flt_odd = gen_reg_rtx (V4SFmode);
6399 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6400 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6401 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6402 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6403
6404 /* Use VMRGEW if we can instead of doing a permute. */
6405 if (TARGET_P8_VECTOR)
6406 {
6407 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
6408 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
6409 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6410 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6411 if (BYTES_BIG_ENDIAN)
6412 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
6413 else
6414 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
6415 }
6416 else
6417 {
6418 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6419 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6420 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6421 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6422 rs6000_expand_extract_even (target, flt_even, flt_odd);
6423 }
6424 }
6425 return;
6426 }
6427
6428 /* Special case initializing vector short/char that are splats if we are on
6429 64-bit systems with direct move. */
6430 if (all_same && TARGET_DIRECT_MOVE_64BIT
6431 && (mode == V16QImode || mode == V8HImode))
6432 {
6433 rtx op0 = XVECEXP (vals, 0, 0);
6434 rtx di_tmp = gen_reg_rtx (DImode);
6435
6436 if (!REG_P (op0))
6437 op0 = force_reg (GET_MODE_INNER (mode), op0);
6438
6439 if (mode == V16QImode)
6440 {
6441 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
6442 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
6443 return;
6444 }
6445
6446 if (mode == V8HImode)
6447 {
6448 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
6449 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
6450 return;
6451 }
6452 }
6453
6454 /* Store value to stack temp. Load vector element. Splat. However, splat
6455 of 64-bit items is not supported on Altivec. */
6456 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6457 {
6458 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6459 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6460 XVECEXP (vals, 0, 0));
6461 x = gen_rtx_UNSPEC (VOIDmode,
6462 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6463 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6464 gen_rtvec (2,
6465 gen_rtx_SET (target, mem),
6466 x)));
6467 x = gen_rtx_VEC_SELECT (inner_mode, target,
6468 gen_rtx_PARALLEL (VOIDmode,
6469 gen_rtvec (1, const0_rtx)));
6470 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6471 return;
6472 }
6473
6474 /* One field is non-constant. Load constant then overwrite
6475 varying field. */
6476 if (n_var == 1)
6477 {
6478 rtx copy = copy_rtx (vals);
6479
6480 /* Load constant part of vector, substitute neighboring value for
6481 varying element. */
6482 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6483 rs6000_expand_vector_init (target, copy);
6484
6485 /* Insert variable. */
6486 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
6487 return;
6488 }
6489
6490 /* Construct the vector in memory one field at a time
6491 and load the whole vector. */
6492 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6493 for (i = 0; i < n_elts; i++)
6494 emit_move_insn (adjust_address_nv (mem, inner_mode,
6495 i * GET_MODE_SIZE (inner_mode)),
6496 XVECEXP (vals, 0, i));
6497 emit_move_insn (target, mem);
6498 }
6499
6500 /* Set field ELT of TARGET to VAL. */
6501
6502 void
6503 rs6000_expand_vector_set (rtx target, rtx val, int elt)
6504 {
6505 machine_mode mode = GET_MODE (target);
6506 machine_mode inner_mode = GET_MODE_INNER (mode);
6507 rtx reg = gen_reg_rtx (mode);
6508 rtx mask, mem, x;
6509 int width = GET_MODE_SIZE (inner_mode);
6510 int i;
6511
6512 val = force_reg (GET_MODE (val), val);
6513
6514 if (VECTOR_MEM_VSX_P (mode))
6515 {
6516 rtx insn = NULL_RTX;
6517 rtx elt_rtx = GEN_INT (elt);
6518
6519 if (mode == V2DFmode)
6520 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
6521
6522 else if (mode == V2DImode)
6523 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
6524
6525 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
6526 {
6527 if (mode == V4SImode)
6528 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
6529 else if (mode == V8HImode)
6530 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
6531 else if (mode == V16QImode)
6532 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
6533 else if (mode == V4SFmode)
6534 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
6535 }
6536
6537 if (insn)
6538 {
6539 emit_insn (insn);
6540 return;
6541 }
6542 }
6543
6544 /* Simplify setting single element vectors like V1TImode. */
6545 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
6546 {
6547 emit_move_insn (target, gen_lowpart (mode, val));
6548 return;
6549 }
6550
6551 /* Load single variable value. */
6552 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6553 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
6554 x = gen_rtx_UNSPEC (VOIDmode,
6555 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6556 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6557 gen_rtvec (2,
6558 gen_rtx_SET (reg, mem),
6559 x)));
6560
6561 /* Linear sequence. */
6562 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
6563 for (i = 0; i < 16; ++i)
6564 XVECEXP (mask, 0, i) = GEN_INT (i);
6565
6566 /* Set permute mask to insert element into target. */
6567 for (i = 0; i < width; ++i)
6568 XVECEXP (mask, 0, elt*width + i)
6569 = GEN_INT (i + 0x10);
6570 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
6571
6572 if (BYTES_BIG_ENDIAN)
6573 x = gen_rtx_UNSPEC (mode,
6574 gen_rtvec (3, target, reg,
6575 force_reg (V16QImode, x)),
6576 UNSPEC_VPERM);
6577 else
6578 {
6579 if (TARGET_P9_VECTOR)
6580 x = gen_rtx_UNSPEC (mode,
6581 gen_rtvec (3, reg, target,
6582 force_reg (V16QImode, x)),
6583 UNSPEC_VPERMR);
6584 else
6585 {
6586 /* Invert selector. We prefer to generate VNAND on P8 so
6587 that future fusion opportunities can kick in, but must
6588 generate VNOR elsewhere. */
6589 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
6590 rtx iorx = (TARGET_P8_VECTOR
6591 ? gen_rtx_IOR (V16QImode, notx, notx)
6592 : gen_rtx_AND (V16QImode, notx, notx));
6593 rtx tmp = gen_reg_rtx (V16QImode);
6594 emit_insn (gen_rtx_SET (tmp, iorx));
6595
6596 /* Permute with operands reversed and adjusted selector. */
6597 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
6598 UNSPEC_VPERM);
6599 }
6600 }
6601
6602 emit_insn (gen_rtx_SET (target, x));
6603 }
6604
6605 /* Extract field ELT from VEC into TARGET. */
6606
6607 void
6608 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
6609 {
6610 machine_mode mode = GET_MODE (vec);
6611 machine_mode inner_mode = GET_MODE_INNER (mode);
6612 rtx mem;
6613
6614 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
6615 {
6616 switch (mode)
6617 {
6618 default:
6619 break;
6620 case E_V1TImode:
6621 emit_move_insn (target, gen_lowpart (TImode, vec));
6622 break;
6623 case E_V2DFmode:
6624 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
6625 return;
6626 case E_V2DImode:
6627 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
6628 return;
6629 case E_V4SFmode:
6630 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
6631 return;
6632 case E_V16QImode:
6633 if (TARGET_DIRECT_MOVE_64BIT)
6634 {
6635 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
6636 return;
6637 }
6638 else
6639 break;
6640 case E_V8HImode:
6641 if (TARGET_DIRECT_MOVE_64BIT)
6642 {
6643 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
6644 return;
6645 }
6646 else
6647 break;
6648 case E_V4SImode:
6649 if (TARGET_DIRECT_MOVE_64BIT)
6650 {
6651 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
6652 return;
6653 }
6654 break;
6655 }
6656 }
6657 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
6658 && TARGET_DIRECT_MOVE_64BIT)
6659 {
6660 if (GET_MODE (elt) != DImode)
6661 {
6662 rtx tmp = gen_reg_rtx (DImode);
6663 convert_move (tmp, elt, 0);
6664 elt = tmp;
6665 }
6666 else if (!REG_P (elt))
6667 elt = force_reg (DImode, elt);
6668
6669 switch (mode)
6670 {
6671 case E_V1TImode:
6672 emit_move_insn (target, gen_lowpart (TImode, vec));
6673 return;
6674
6675 case E_V2DFmode:
6676 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
6677 return;
6678
6679 case E_V2DImode:
6680 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
6681 return;
6682
6683 case E_V4SFmode:
6684 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
6685 return;
6686
6687 case E_V4SImode:
6688 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
6689 return;
6690
6691 case E_V8HImode:
6692 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
6693 return;
6694
6695 case E_V16QImode:
6696 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
6697 return;
6698
6699 default:
6700 gcc_unreachable ();
6701 }
6702 }
6703
6704 /* Allocate mode-sized buffer. */
6705 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6706
6707 emit_move_insn (mem, vec);
6708 if (CONST_INT_P (elt))
6709 {
6710 int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode);
6711
6712 /* Add offset to field within buffer matching vector element. */
6713 mem = adjust_address_nv (mem, inner_mode,
6714 modulo_elt * GET_MODE_SIZE (inner_mode));
6715 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
6716 }
6717 else
6718 {
6719 unsigned int ele_size = GET_MODE_SIZE (inner_mode);
6720 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
6721 rtx new_addr = gen_reg_rtx (Pmode);
6722
6723 elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
6724 if (ele_size > 1)
6725 elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
6726 new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
6727 new_addr = change_address (mem, inner_mode, new_addr);
6728 emit_move_insn (target, new_addr);
6729 }
6730 }
6731
6732 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
6733 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
6734 temporary (BASE_TMP) to fixup the address. Return the new memory address
6735 that is valid for reads or writes to a given register (SCALAR_REG). */
6736
6737 rtx
6738 rs6000_adjust_vec_address (rtx scalar_reg,
6739 rtx mem,
6740 rtx element,
6741 rtx base_tmp,
6742 machine_mode scalar_mode)
6743 {
6744 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
6745 rtx addr = XEXP (mem, 0);
6746 rtx element_offset;
6747 rtx new_addr;
6748 bool valid_addr_p;
6749
6750 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
6751 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
6752
6753 /* Calculate what we need to add to the address to get the element
6754 address. */
6755 if (CONST_INT_P (element))
6756 element_offset = GEN_INT (INTVAL (element) * scalar_size);
6757 else
6758 {
6759 int byte_shift = exact_log2 (scalar_size);
6760 gcc_assert (byte_shift >= 0);
6761
6762 if (byte_shift == 0)
6763 element_offset = element;
6764
6765 else
6766 {
6767 if (TARGET_POWERPC64)
6768 emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift)));
6769 else
6770 emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift)));
6771
6772 element_offset = base_tmp;
6773 }
6774 }
6775
6776 /* Create the new address pointing to the element within the vector. If we
6777 are adding 0, we don't have to change the address. */
6778 if (element_offset == const0_rtx)
6779 new_addr = addr;
6780
6781 /* A simple indirect address can be converted into a reg + offset
6782 address. */
6783 else if (REG_P (addr) || SUBREG_P (addr))
6784 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
6785
6786 /* Optimize D-FORM addresses with constant offset with a constant element, to
6787 include the element offset in the address directly. */
6788 else if (GET_CODE (addr) == PLUS)
6789 {
6790 rtx op0 = XEXP (addr, 0);
6791 rtx op1 = XEXP (addr, 1);
6792 rtx insn;
6793
6794 gcc_assert (REG_P (op0) || SUBREG_P (op0));
6795 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
6796 {
6797 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
6798 rtx offset_rtx = GEN_INT (offset);
6799
6800 if (IN_RANGE (offset, -32768, 32767)
6801 && (scalar_size < 8 || (offset & 0x3) == 0))
6802 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
6803 else
6804 {
6805 emit_move_insn (base_tmp, offset_rtx);
6806 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
6807 }
6808 }
6809 else
6810 {
6811 bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1));
6812 bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset));
6813
6814 /* Note, ADDI requires the register being added to be a base
6815 register. If the register was R0, load it up into the temporary
6816 and do the add. */
6817 if (op1_reg_p
6818 && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO))
6819 {
6820 insn = gen_add3_insn (base_tmp, op1, element_offset);
6821 gcc_assert (insn != NULL_RTX);
6822 emit_insn (insn);
6823 }
6824
6825 else if (ele_reg_p
6826 && reg_or_subregno (element_offset) != FIRST_GPR_REGNO)
6827 {
6828 insn = gen_add3_insn (base_tmp, element_offset, op1);
6829 gcc_assert (insn != NULL_RTX);
6830 emit_insn (insn);
6831 }
6832
6833 else
6834 {
6835 emit_move_insn (base_tmp, op1);
6836 emit_insn (gen_add2_insn (base_tmp, element_offset));
6837 }
6838
6839 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
6840 }
6841 }
6842
6843 else
6844 {
6845 emit_move_insn (base_tmp, addr);
6846 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
6847 }
6848
6849 /* If we have a PLUS, we need to see whether the particular register class
6850 allows for D-FORM or X-FORM addressing. */
6851 if (GET_CODE (new_addr) == PLUS)
6852 {
6853 rtx op1 = XEXP (new_addr, 1);
6854 addr_mask_type addr_mask;
6855 unsigned int scalar_regno = reg_or_subregno (scalar_reg);
6856
6857 gcc_assert (HARD_REGISTER_NUM_P (scalar_regno));
6858 if (INT_REGNO_P (scalar_regno))
6859 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR];
6860
6861 else if (FP_REGNO_P (scalar_regno))
6862 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR];
6863
6864 else if (ALTIVEC_REGNO_P (scalar_regno))
6865 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX];
6866
6867 else
6868 gcc_unreachable ();
6869
6870 if (REG_P (op1) || SUBREG_P (op1))
6871 valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
6872 else
6873 valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
6874 }
6875
6876 else if (REG_P (new_addr) || SUBREG_P (new_addr))
6877 valid_addr_p = true;
6878
6879 else
6880 valid_addr_p = false;
6881
6882 if (!valid_addr_p)
6883 {
6884 emit_move_insn (base_tmp, new_addr);
6885 new_addr = base_tmp;
6886 }
6887
6888 return change_address (mem, scalar_mode, new_addr);
6889 }
6890
6891 /* Split a variable vec_extract operation into the component instructions. */
6892
6893 void
6894 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
6895 rtx tmp_altivec)
6896 {
6897 machine_mode mode = GET_MODE (src);
6898 machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src));
6899 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
6900 int byte_shift = exact_log2 (scalar_size);
6901
6902 gcc_assert (byte_shift >= 0);
6903
6904 /* If we are given a memory address, optimize to load just the element. We
6905 don't have to adjust the vector element number on little endian
6906 systems. */
6907 if (MEM_P (src))
6908 {
6909 int num_elements = GET_MODE_NUNITS (mode);
6910 rtx num_ele_m1 = GEN_INT (num_elements - 1);
6911
6912 emit_insn (gen_anddi3 (element, element, num_ele_m1));
6913 gcc_assert (REG_P (tmp_gpr));
6914 emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element,
6915 tmp_gpr, scalar_mode));
6916 return;
6917 }
6918
6919 else if (REG_P (src) || SUBREG_P (src))
6920 {
6921 int num_elements = GET_MODE_NUNITS (mode);
6922 int bits_in_element = mode_to_bits (GET_MODE_INNER (mode));
6923 int bit_shift = 7 - exact_log2 (num_elements);
6924 rtx element2;
6925 unsigned int dest_regno = reg_or_subregno (dest);
6926 unsigned int src_regno = reg_or_subregno (src);
6927 unsigned int element_regno = reg_or_subregno (element);
6928
6929 gcc_assert (REG_P (tmp_gpr));
6930
6931 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
6932 a general purpose register. */
6933 if (TARGET_P9_VECTOR
6934 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
6935 && INT_REGNO_P (dest_regno)
6936 && ALTIVEC_REGNO_P (src_regno)
6937 && INT_REGNO_P (element_regno))
6938 {
6939 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
6940 rtx element_si = gen_rtx_REG (SImode, element_regno);
6941
6942 if (mode == V16QImode)
6943 emit_insn (BYTES_BIG_ENDIAN
6944 ? gen_vextublx (dest_si, element_si, src)
6945 : gen_vextubrx (dest_si, element_si, src));
6946
6947 else if (mode == V8HImode)
6948 {
6949 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
6950 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
6951 emit_insn (BYTES_BIG_ENDIAN
6952 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
6953 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
6954 }
6955
6956
6957 else
6958 {
6959 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
6960 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
6961 emit_insn (BYTES_BIG_ENDIAN
6962 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
6963 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
6964 }
6965
6966 return;
6967 }
6968
6969
6970 gcc_assert (REG_P (tmp_altivec));
6971
6972 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
6973 an XOR, otherwise we need to subtract. The shift amount is so VSLO
6974 will shift the element into the upper position (adding 3 to convert a
6975 byte shift into a bit shift). */
6976 if (scalar_size == 8)
6977 {
6978 if (!BYTES_BIG_ENDIAN)
6979 {
6980 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
6981 element2 = tmp_gpr;
6982 }
6983 else
6984 element2 = element;
6985
6986 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
6987 bit. */
6988 emit_insn (gen_rtx_SET (tmp_gpr,
6989 gen_rtx_AND (DImode,
6990 gen_rtx_ASHIFT (DImode,
6991 element2,
6992 GEN_INT (6)),
6993 GEN_INT (64))));
6994 }
6995 else
6996 {
6997 if (!BYTES_BIG_ENDIAN)
6998 {
6999 rtx num_ele_m1 = GEN_INT (num_elements - 1);
7000
7001 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7002 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7003 element2 = tmp_gpr;
7004 }
7005 else
7006 element2 = element;
7007
7008 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7009 }
7010
7011 /* Get the value into the lower byte of the Altivec register where VSLO
7012 expects it. */
7013 if (TARGET_P9_VECTOR)
7014 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7015 else if (can_create_pseudo_p ())
7016 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7017 else
7018 {
7019 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7020 emit_move_insn (tmp_di, tmp_gpr);
7021 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7022 }
7023
7024 /* Do the VSLO to get the value into the final location. */
7025 switch (mode)
7026 {
7027 case E_V2DFmode:
7028 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7029 return;
7030
7031 case E_V2DImode:
7032 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7033 return;
7034
7035 case E_V4SFmode:
7036 {
7037 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7038 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7039 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7040 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7041 tmp_altivec));
7042
7043 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7044 return;
7045 }
7046
7047 case E_V4SImode:
7048 case E_V8HImode:
7049 case E_V16QImode:
7050 {
7051 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7052 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7053 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7054 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7055 tmp_altivec));
7056 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7057 emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di,
7058 GEN_INT (64 - bits_in_element)));
7059 return;
7060 }
7061
7062 default:
7063 gcc_unreachable ();
7064 }
7065
7066 return;
7067 }
7068 else
7069 gcc_unreachable ();
7070 }
7071
7072 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7073 selects whether the alignment is abi mandated, optional, or
7074 both abi and optional alignment. */
7075
7076 unsigned int
7077 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7078 {
7079 if (how != align_opt)
7080 {
7081 if (TREE_CODE (type) == VECTOR_TYPE && align < 128)
7082 align = 128;
7083 }
7084
7085 if (how != align_abi)
7086 {
7087 if (TREE_CODE (type) == ARRAY_TYPE
7088 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7089 {
7090 if (align < BITS_PER_WORD)
7091 align = BITS_PER_WORD;
7092 }
7093 }
7094
7095 return align;
7096 }
7097
7098 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
7099 instructions simply ignore the low bits; VSX memory instructions
7100 are aligned to 4 or 8 bytes. */
7101
7102 static bool
7103 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
7104 {
7105 return (STRICT_ALIGNMENT
7106 || (!TARGET_EFFICIENT_UNALIGNED_VSX
7107 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
7108 || ((VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode))
7109 && (int) align < VECTOR_ALIGN (mode)))));
7110 }
7111
7112 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7113
7114 bool
7115 rs6000_special_adjust_field_align_p (tree type, unsigned int computed)
7116 {
7117 if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE)
7118 {
7119 if (computed != 128)
7120 {
7121 static bool warned;
7122 if (!warned && warn_psabi)
7123 {
7124 warned = true;
7125 inform (input_location,
7126 "the layout of aggregates containing vectors with"
7127 " %d-byte alignment has changed in GCC 5",
7128 computed / BITS_PER_UNIT);
7129 }
7130 }
7131 /* In current GCC there is no special case. */
7132 return false;
7133 }
7134
7135 return false;
7136 }
7137
7138 /* AIX increases natural record alignment to doubleword if the first
7139 field is an FP double while the FP fields remain word aligned. */
7140
7141 unsigned int
7142 rs6000_special_round_type_align (tree type, unsigned int computed,
7143 unsigned int specified)
7144 {
7145 unsigned int align = MAX (computed, specified);
7146 tree field = TYPE_FIELDS (type);
7147
7148 /* Skip all non field decls */
7149 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7150 field = DECL_CHAIN (field);
7151
7152 if (field != NULL && field != type)
7153 {
7154 type = TREE_TYPE (field);
7155 while (TREE_CODE (type) == ARRAY_TYPE)
7156 type = TREE_TYPE (type);
7157
7158 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
7159 align = MAX (align, 64);
7160 }
7161
7162 return align;
7163 }
7164
7165 /* Darwin increases record alignment to the natural alignment of
7166 the first field. */
7167
7168 unsigned int
7169 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
7170 unsigned int specified)
7171 {
7172 unsigned int align = MAX (computed, specified);
7173
7174 if (TYPE_PACKED (type))
7175 return align;
7176
7177 /* Find the first field, looking down into aggregates. */
7178 do {
7179 tree field = TYPE_FIELDS (type);
7180 /* Skip all non field decls */
7181 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7182 field = DECL_CHAIN (field);
7183 if (! field)
7184 break;
7185 /* A packed field does not contribute any extra alignment. */
7186 if (DECL_PACKED (field))
7187 return align;
7188 type = TREE_TYPE (field);
7189 while (TREE_CODE (type) == ARRAY_TYPE)
7190 type = TREE_TYPE (type);
7191 } while (AGGREGATE_TYPE_P (type));
7192
7193 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
7194 align = MAX (align, TYPE_ALIGN (type));
7195
7196 return align;
7197 }
7198
7199 /* Return 1 for an operand in small memory on V.4/eabi. */
7200
7201 int
7202 small_data_operand (rtx op ATTRIBUTE_UNUSED,
7203 machine_mode mode ATTRIBUTE_UNUSED)
7204 {
7205 #if TARGET_ELF
7206 rtx sym_ref;
7207
7208 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
7209 return 0;
7210
7211 if (DEFAULT_ABI != ABI_V4)
7212 return 0;
7213
7214 if (SYMBOL_REF_P (op))
7215 sym_ref = op;
7216
7217 else if (GET_CODE (op) != CONST
7218 || GET_CODE (XEXP (op, 0)) != PLUS
7219 || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0))
7220 || !CONST_INT_P (XEXP (XEXP (op, 0), 1)))
7221 return 0;
7222
7223 else
7224 {
7225 rtx sum = XEXP (op, 0);
7226 HOST_WIDE_INT summand;
7227
7228 /* We have to be careful here, because it is the referenced address
7229 that must be 32k from _SDA_BASE_, not just the symbol. */
7230 summand = INTVAL (XEXP (sum, 1));
7231 if (summand < 0 || summand > g_switch_value)
7232 return 0;
7233
7234 sym_ref = XEXP (sum, 0);
7235 }
7236
7237 return SYMBOL_REF_SMALL_P (sym_ref);
7238 #else
7239 return 0;
7240 #endif
7241 }
7242
7243 /* Return true if either operand is a general purpose register. */
7244
7245 bool
7246 gpr_or_gpr_p (rtx op0, rtx op1)
7247 {
7248 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
7249 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
7250 }
7251
7252 /* Return true if this is a move direct operation between GPR registers and
7253 floating point/VSX registers. */
7254
7255 bool
7256 direct_move_p (rtx op0, rtx op1)
7257 {
7258 if (!REG_P (op0) || !REG_P (op1))
7259 return false;
7260
7261 if (!TARGET_DIRECT_MOVE)
7262 return false;
7263
7264 int regno0 = REGNO (op0);
7265 int regno1 = REGNO (op1);
7266 if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1))
7267 return false;
7268
7269 if (INT_REGNO_P (regno0) && VSX_REGNO_P (regno1))
7270 return true;
7271
7272 if (VSX_REGNO_P (regno0) && INT_REGNO_P (regno1))
7273 return true;
7274
7275 return false;
7276 }
7277
7278 /* Return true if the ADDR is an acceptable address for a quad memory
7279 operation of mode MODE (either LQ/STQ for general purpose registers, or
7280 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
7281 is intended for LQ/STQ. If it is false, the address is intended for the ISA
7282 3.0 LXV/STXV instruction. */
7283
7284 bool
7285 quad_address_p (rtx addr, machine_mode mode, bool strict)
7286 {
7287 rtx op0, op1;
7288
7289 if (GET_MODE_SIZE (mode) != 16)
7290 return false;
7291
7292 if (legitimate_indirect_address_p (addr, strict))
7293 return true;
7294
7295 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
7296 return false;
7297
7298 /* Is this a valid prefixed address? If the bottom four bits of the offset
7299 are non-zero, we could use a prefixed instruction (which does not have the
7300 DQ-form constraint that the traditional instruction had) instead of
7301 forcing the unaligned offset to a GPR. */
7302 if (address_is_prefixed (addr, mode, NON_PREFIXED_DQ))
7303 return true;
7304
7305 if (GET_CODE (addr) != PLUS)
7306 return false;
7307
7308 op0 = XEXP (addr, 0);
7309 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
7310 return false;
7311
7312 op1 = XEXP (addr, 1);
7313 if (!CONST_INT_P (op1))
7314 return false;
7315
7316 return quad_address_offset_p (INTVAL (op1));
7317 }
7318
7319 /* Return true if this is a load or store quad operation. This function does
7320 not handle the atomic quad memory instructions. */
7321
7322 bool
7323 quad_load_store_p (rtx op0, rtx op1)
7324 {
7325 bool ret;
7326
7327 if (!TARGET_QUAD_MEMORY)
7328 ret = false;
7329
7330 else if (REG_P (op0) && MEM_P (op1))
7331 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
7332 && quad_memory_operand (op1, GET_MODE (op1))
7333 && !reg_overlap_mentioned_p (op0, op1));
7334
7335 else if (MEM_P (op0) && REG_P (op1))
7336 ret = (quad_memory_operand (op0, GET_MODE (op0))
7337 && quad_int_reg_operand (op1, GET_MODE (op1)));
7338
7339 else
7340 ret = false;
7341
7342 if (TARGET_DEBUG_ADDR)
7343 {
7344 fprintf (stderr, "\n========== quad_load_store, return %s\n",
7345 ret ? "true" : "false");
7346 debug_rtx (gen_rtx_SET (op0, op1));
7347 }
7348
7349 return ret;
7350 }
7351
7352 /* Given an address, return a constant offset term if one exists. */
7353
7354 static rtx
7355 address_offset (rtx op)
7356 {
7357 if (GET_CODE (op) == PRE_INC
7358 || GET_CODE (op) == PRE_DEC)
7359 op = XEXP (op, 0);
7360 else if (GET_CODE (op) == PRE_MODIFY
7361 || GET_CODE (op) == LO_SUM)
7362 op = XEXP (op, 1);
7363
7364 if (GET_CODE (op) == CONST)
7365 op = XEXP (op, 0);
7366
7367 if (GET_CODE (op) == PLUS)
7368 op = XEXP (op, 1);
7369
7370 if (CONST_INT_P (op))
7371 return op;
7372
7373 return NULL_RTX;
7374 }
7375
7376 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
7377 the mode. If we can't find (or don't know) the alignment of the symbol
7378 we assume (optimistically) that it's sufficiently aligned [??? maybe we
7379 should be pessimistic]. Offsets are validated in the same way as for
7380 reg + offset. */
7381 static bool
7382 darwin_rs6000_legitimate_lo_sum_const_p (rtx x, machine_mode mode)
7383 {
7384 /* We should not get here with this. */
7385 gcc_checking_assert (! mode_supports_dq_form (mode));
7386
7387 if (GET_CODE (x) == CONST)
7388 x = XEXP (x, 0);
7389
7390 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
7391 x = XVECEXP (x, 0, 0);
7392
7393 rtx sym = NULL_RTX;
7394 unsigned HOST_WIDE_INT offset = 0;
7395
7396 if (GET_CODE (x) == PLUS)
7397 {
7398 sym = XEXP (x, 0);
7399 if (! SYMBOL_REF_P (sym))
7400 return false;
7401 if (!CONST_INT_P (XEXP (x, 1)))
7402 return false;
7403 offset = INTVAL (XEXP (x, 1));
7404 }
7405 else if (SYMBOL_REF_P (x))
7406 sym = x;
7407 else if (CONST_INT_P (x))
7408 offset = INTVAL (x);
7409 else if (GET_CODE (x) == LABEL_REF)
7410 offset = 0; // We assume code labels are Pmode aligned
7411 else
7412 return false; // not sure what we have here.
7413
7414 /* If we don't know the alignment of the thing to which the symbol refers,
7415 we assume optimistically it is "enough".
7416 ??? maybe we should be pessimistic instead. */
7417 unsigned align = 0;
7418
7419 if (sym)
7420 {
7421 tree decl = SYMBOL_REF_DECL (sym);
7422 #if TARGET_MACHO
7423 if (MACHO_SYMBOL_INDIRECTION_P (sym))
7424 /* The decl in an indirection symbol is the original one, which might
7425 be less aligned than the indirection. Our indirections are always
7426 pointer-aligned. */
7427 ;
7428 else
7429 #endif
7430 if (decl && DECL_ALIGN (decl))
7431 align = DECL_ALIGN_UNIT (decl);
7432 }
7433
7434 unsigned int extra = 0;
7435 switch (mode)
7436 {
7437 case E_DFmode:
7438 case E_DDmode:
7439 case E_DImode:
7440 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7441 addressing. */
7442 if (VECTOR_MEM_VSX_P (mode))
7443 return false;
7444
7445 if (!TARGET_POWERPC64)
7446 extra = 4;
7447 else if ((offset & 3) || (align & 3))
7448 return false;
7449 break;
7450
7451 case E_TFmode:
7452 case E_IFmode:
7453 case E_KFmode:
7454 case E_TDmode:
7455 case E_TImode:
7456 case E_PTImode:
7457 extra = 8;
7458 if (!TARGET_POWERPC64)
7459 extra = 12;
7460 else if ((offset & 3) || (align & 3))
7461 return false;
7462 break;
7463
7464 default:
7465 break;
7466 }
7467
7468 /* We only care if the access(es) would cause a change to the high part. */
7469 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7470 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7471 }
7472
7473 /* Return true if the MEM operand is a memory operand suitable for use
7474 with a (full width, possibly multiple) gpr load/store. On
7475 powerpc64 this means the offset must be divisible by 4.
7476 Implements 'Y' constraint.
7477
7478 Accept direct, indexed, offset, lo_sum and tocref. Since this is
7479 a constraint function we know the operand has satisfied a suitable
7480 memory predicate.
7481
7482 Offsetting a lo_sum should not be allowed, except where we know by
7483 alignment that a 32k boundary is not crossed. Note that by
7484 "offsetting" here we mean a further offset to access parts of the
7485 MEM. It's fine to have a lo_sum where the inner address is offset
7486 from a sym, since the same sym+offset will appear in the high part
7487 of the address calculation. */
7488
7489 bool
7490 mem_operand_gpr (rtx op, machine_mode mode)
7491 {
7492 unsigned HOST_WIDE_INT offset;
7493 int extra;
7494 rtx addr = XEXP (op, 0);
7495
7496 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
7497 if (TARGET_UPDATE
7498 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
7499 && mode_supports_pre_incdec_p (mode)
7500 && legitimate_indirect_address_p (XEXP (addr, 0), false))
7501 return true;
7502
7503 /* Allow prefixed instructions if supported. If the bottom two bits of the
7504 offset are non-zero, we could use a prefixed instruction (which does not
7505 have the DS-form constraint that the traditional instruction had) instead
7506 of forcing the unaligned offset to a GPR. */
7507 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
7508 return true;
7509
7510 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
7511 really OK. Doing this early avoids teaching all the other machinery
7512 about them. */
7513 if (TARGET_MACHO && GET_CODE (addr) == LO_SUM)
7514 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr, 1), mode);
7515
7516 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
7517 if (!rs6000_offsettable_memref_p (op, mode, false))
7518 return false;
7519
7520 op = address_offset (addr);
7521 if (op == NULL_RTX)
7522 return true;
7523
7524 offset = INTVAL (op);
7525 if (TARGET_POWERPC64 && (offset & 3) != 0)
7526 return false;
7527
7528 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7529 if (extra < 0)
7530 extra = 0;
7531
7532 if (GET_CODE (addr) == LO_SUM)
7533 /* For lo_sum addresses, we must allow any offset except one that
7534 causes a wrap, so test only the low 16 bits. */
7535 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7536
7537 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7538 }
7539
7540 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
7541 enforce an offset divisible by 4 even for 32-bit. */
7542
7543 bool
7544 mem_operand_ds_form (rtx op, machine_mode mode)
7545 {
7546 unsigned HOST_WIDE_INT offset;
7547 int extra;
7548 rtx addr = XEXP (op, 0);
7549
7550 /* Allow prefixed instructions if supported. If the bottom two bits of the
7551 offset are non-zero, we could use a prefixed instruction (which does not
7552 have the DS-form constraint that the traditional instruction had) instead
7553 of forcing the unaligned offset to a GPR. */
7554 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
7555 return true;
7556
7557 if (!offsettable_address_p (false, mode, addr))
7558 return false;
7559
7560 op = address_offset (addr);
7561 if (op == NULL_RTX)
7562 return true;
7563
7564 offset = INTVAL (op);
7565 if ((offset & 3) != 0)
7566 return false;
7567
7568 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7569 if (extra < 0)
7570 extra = 0;
7571
7572 if (GET_CODE (addr) == LO_SUM)
7573 /* For lo_sum addresses, we must allow any offset except one that
7574 causes a wrap, so test only the low 16 bits. */
7575 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7576
7577 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7578 }
7579 \f
7580 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
7581
7582 static bool
7583 reg_offset_addressing_ok_p (machine_mode mode)
7584 {
7585 switch (mode)
7586 {
7587 case E_V16QImode:
7588 case E_V8HImode:
7589 case E_V4SFmode:
7590 case E_V4SImode:
7591 case E_V2DFmode:
7592 case E_V2DImode:
7593 case E_V1TImode:
7594 case E_TImode:
7595 case E_TFmode:
7596 case E_KFmode:
7597 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
7598 ISA 3.0 vector d-form addressing mode was added. While TImode is not
7599 a vector mode, if we want to use the VSX registers to move it around,
7600 we need to restrict ourselves to reg+reg addressing. Similarly for
7601 IEEE 128-bit floating point that is passed in a single vector
7602 register. */
7603 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
7604 return mode_supports_dq_form (mode);
7605 break;
7606
7607 case E_SDmode:
7608 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
7609 addressing for the LFIWZX and STFIWX instructions. */
7610 if (TARGET_NO_SDMODE_STACK)
7611 return false;
7612 break;
7613
7614 default:
7615 break;
7616 }
7617
7618 return true;
7619 }
7620
7621 static bool
7622 virtual_stack_registers_memory_p (rtx op)
7623 {
7624 int regnum;
7625
7626 if (REG_P (op))
7627 regnum = REGNO (op);
7628
7629 else if (GET_CODE (op) == PLUS
7630 && REG_P (XEXP (op, 0))
7631 && CONST_INT_P (XEXP (op, 1)))
7632 regnum = REGNO (XEXP (op, 0));
7633
7634 else
7635 return false;
7636
7637 return (regnum >= FIRST_VIRTUAL_REGISTER
7638 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
7639 }
7640
7641 /* Return true if a MODE sized memory accesses to OP plus OFFSET
7642 is known to not straddle a 32k boundary. This function is used
7643 to determine whether -mcmodel=medium code can use TOC pointer
7644 relative addressing for OP. This means the alignment of the TOC
7645 pointer must also be taken into account, and unfortunately that is
7646 only 8 bytes. */
7647
7648 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
7649 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
7650 #endif
7651
7652 static bool
7653 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
7654 machine_mode mode)
7655 {
7656 tree decl;
7657 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
7658
7659 if (!SYMBOL_REF_P (op))
7660 return false;
7661
7662 /* ISA 3.0 vector d-form addressing is restricted, don't allow
7663 SYMBOL_REF. */
7664 if (mode_supports_dq_form (mode))
7665 return false;
7666
7667 dsize = GET_MODE_SIZE (mode);
7668 decl = SYMBOL_REF_DECL (op);
7669 if (!decl)
7670 {
7671 if (dsize == 0)
7672 return false;
7673
7674 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
7675 replacing memory addresses with an anchor plus offset. We
7676 could find the decl by rummaging around in the block->objects
7677 VEC for the given offset but that seems like too much work. */
7678 dalign = BITS_PER_UNIT;
7679 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
7680 && SYMBOL_REF_ANCHOR_P (op)
7681 && SYMBOL_REF_BLOCK (op) != NULL)
7682 {
7683 struct object_block *block = SYMBOL_REF_BLOCK (op);
7684
7685 dalign = block->alignment;
7686 offset += SYMBOL_REF_BLOCK_OFFSET (op);
7687 }
7688 else if (CONSTANT_POOL_ADDRESS_P (op))
7689 {
7690 /* It would be nice to have get_pool_align().. */
7691 machine_mode cmode = get_pool_mode (op);
7692
7693 dalign = GET_MODE_ALIGNMENT (cmode);
7694 }
7695 }
7696 else if (DECL_P (decl))
7697 {
7698 dalign = DECL_ALIGN (decl);
7699
7700 if (dsize == 0)
7701 {
7702 /* Allow BLKmode when the entire object is known to not
7703 cross a 32k boundary. */
7704 if (!DECL_SIZE_UNIT (decl))
7705 return false;
7706
7707 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
7708 return false;
7709
7710 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
7711 if (dsize > 32768)
7712 return false;
7713
7714 dalign /= BITS_PER_UNIT;
7715 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7716 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7717 return dalign >= dsize;
7718 }
7719 }
7720 else
7721 gcc_unreachable ();
7722
7723 /* Find how many bits of the alignment we know for this access. */
7724 dalign /= BITS_PER_UNIT;
7725 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7726 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7727 mask = dalign - 1;
7728 lsb = offset & -offset;
7729 mask &= lsb - 1;
7730 dalign = mask + 1;
7731
7732 return dalign >= dsize;
7733 }
7734
7735 static bool
7736 constant_pool_expr_p (rtx op)
7737 {
7738 rtx base, offset;
7739
7740 split_const (op, &base, &offset);
7741 return (SYMBOL_REF_P (base)
7742 && CONSTANT_POOL_ADDRESS_P (base)
7743 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
7744 }
7745
7746 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
7747 use that as the register to put the HIGH value into if register allocation
7748 is already done. */
7749
7750 rtx
7751 create_TOC_reference (rtx symbol, rtx largetoc_reg)
7752 {
7753 rtx tocrel, tocreg, hi;
7754
7755 gcc_assert (TARGET_TOC);
7756
7757 if (TARGET_DEBUG_ADDR)
7758 {
7759 if (SYMBOL_REF_P (symbol))
7760 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
7761 XSTR (symbol, 0));
7762 else
7763 {
7764 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
7765 GET_RTX_NAME (GET_CODE (symbol)));
7766 debug_rtx (symbol);
7767 }
7768 }
7769
7770 if (!can_create_pseudo_p ())
7771 df_set_regs_ever_live (TOC_REGISTER, true);
7772
7773 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
7774 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
7775 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
7776 return tocrel;
7777
7778 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
7779 if (largetoc_reg != NULL)
7780 {
7781 emit_move_insn (largetoc_reg, hi);
7782 hi = largetoc_reg;
7783 }
7784 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
7785 }
7786
7787 /* These are only used to pass through from print_operand/print_operand_address
7788 to rs6000_output_addr_const_extra over the intervening function
7789 output_addr_const which is not target code. */
7790 static const_rtx tocrel_base_oac, tocrel_offset_oac;
7791
7792 /* Return true if OP is a toc pointer relative address (the output
7793 of create_TOC_reference). If STRICT, do not match non-split
7794 -mcmodel=large/medium toc pointer relative addresses. If the pointers
7795 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
7796 TOCREL_OFFSET_RET respectively. */
7797
7798 bool
7799 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
7800 const_rtx *tocrel_offset_ret)
7801 {
7802 if (!TARGET_TOC)
7803 return false;
7804
7805 if (TARGET_CMODEL != CMODEL_SMALL)
7806 {
7807 /* When strict ensure we have everything tidy. */
7808 if (strict
7809 && !(GET_CODE (op) == LO_SUM
7810 && REG_P (XEXP (op, 0))
7811 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
7812 return false;
7813
7814 /* When not strict, allow non-split TOC addresses and also allow
7815 (lo_sum (high ..)) TOC addresses created during reload. */
7816 if (GET_CODE (op) == LO_SUM)
7817 op = XEXP (op, 1);
7818 }
7819
7820 const_rtx tocrel_base = op;
7821 const_rtx tocrel_offset = const0_rtx;
7822
7823 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
7824 {
7825 tocrel_base = XEXP (op, 0);
7826 tocrel_offset = XEXP (op, 1);
7827 }
7828
7829 if (tocrel_base_ret)
7830 *tocrel_base_ret = tocrel_base;
7831 if (tocrel_offset_ret)
7832 *tocrel_offset_ret = tocrel_offset;
7833
7834 return (GET_CODE (tocrel_base) == UNSPEC
7835 && XINT (tocrel_base, 1) == UNSPEC_TOCREL
7836 && REG_P (XVECEXP (tocrel_base, 0, 1))
7837 && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER);
7838 }
7839
7840 /* Return true if X is a constant pool address, and also for cmodel=medium
7841 if X is a toc-relative address known to be offsettable within MODE. */
7842
7843 bool
7844 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
7845 bool strict)
7846 {
7847 const_rtx tocrel_base, tocrel_offset;
7848 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
7849 && (TARGET_CMODEL != CMODEL_MEDIUM
7850 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
7851 || mode == QImode
7852 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
7853 INTVAL (tocrel_offset), mode)));
7854 }
7855
7856 static bool
7857 legitimate_small_data_p (machine_mode mode, rtx x)
7858 {
7859 return (DEFAULT_ABI == ABI_V4
7860 && !flag_pic && !TARGET_TOC
7861 && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST)
7862 && small_data_operand (x, mode));
7863 }
7864
7865 bool
7866 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
7867 bool strict, bool worst_case)
7868 {
7869 unsigned HOST_WIDE_INT offset;
7870 unsigned int extra;
7871
7872 if (GET_CODE (x) != PLUS)
7873 return false;
7874 if (!REG_P (XEXP (x, 0)))
7875 return false;
7876 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7877 return false;
7878 if (mode_supports_dq_form (mode))
7879 return quad_address_p (x, mode, strict);
7880 if (!reg_offset_addressing_ok_p (mode))
7881 return virtual_stack_registers_memory_p (x);
7882 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
7883 return true;
7884 if (!CONST_INT_P (XEXP (x, 1)))
7885 return false;
7886
7887 offset = INTVAL (XEXP (x, 1));
7888 extra = 0;
7889 switch (mode)
7890 {
7891 case E_DFmode:
7892 case E_DDmode:
7893 case E_DImode:
7894 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7895 addressing. */
7896 if (VECTOR_MEM_VSX_P (mode))
7897 return false;
7898
7899 if (!worst_case)
7900 break;
7901 if (!TARGET_POWERPC64)
7902 extra = 4;
7903 else if (offset & 3)
7904 return false;
7905 break;
7906
7907 case E_TFmode:
7908 case E_IFmode:
7909 case E_KFmode:
7910 case E_TDmode:
7911 case E_TImode:
7912 case E_PTImode:
7913 extra = 8;
7914 if (!worst_case)
7915 break;
7916 if (!TARGET_POWERPC64)
7917 extra = 12;
7918 else if (offset & 3)
7919 return false;
7920 break;
7921
7922 default:
7923 break;
7924 }
7925
7926 if (TARGET_PREFIXED_ADDR)
7927 return SIGNED_34BIT_OFFSET_EXTRA_P (offset, extra);
7928 else
7929 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7930 }
7931
7932 bool
7933 legitimate_indexed_address_p (rtx x, int strict)
7934 {
7935 rtx op0, op1;
7936
7937 if (GET_CODE (x) != PLUS)
7938 return false;
7939
7940 op0 = XEXP (x, 0);
7941 op1 = XEXP (x, 1);
7942
7943 return (REG_P (op0) && REG_P (op1)
7944 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
7945 && INT_REG_OK_FOR_INDEX_P (op1, strict))
7946 || (INT_REG_OK_FOR_BASE_P (op1, strict)
7947 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
7948 }
7949
7950 bool
7951 avoiding_indexed_address_p (machine_mode mode)
7952 {
7953 /* Avoid indexed addressing for modes that have non-indexed
7954 load/store instruction forms. */
7955 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
7956 }
7957
7958 bool
7959 legitimate_indirect_address_p (rtx x, int strict)
7960 {
7961 return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict);
7962 }
7963
7964 bool
7965 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
7966 {
7967 if (!TARGET_MACHO || !flag_pic
7968 || mode != SImode || !MEM_P (x))
7969 return false;
7970 x = XEXP (x, 0);
7971
7972 if (GET_CODE (x) != LO_SUM)
7973 return false;
7974 if (!REG_P (XEXP (x, 0)))
7975 return false;
7976 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
7977 return false;
7978 x = XEXP (x, 1);
7979
7980 return CONSTANT_P (x);
7981 }
7982
7983 static bool
7984 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
7985 {
7986 if (GET_CODE (x) != LO_SUM)
7987 return false;
7988 if (!REG_P (XEXP (x, 0)))
7989 return false;
7990 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7991 return false;
7992 /* quad word addresses are restricted, and we can't use LO_SUM. */
7993 if (mode_supports_dq_form (mode))
7994 return false;
7995 x = XEXP (x, 1);
7996
7997 if (TARGET_ELF || TARGET_MACHO)
7998 {
7999 bool large_toc_ok;
8000
8001 if (DEFAULT_ABI == ABI_V4 && flag_pic)
8002 return false;
8003 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8004 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8005 recognizes some LO_SUM addresses as valid although this
8006 function says opposite. In most cases, LRA through different
8007 transformations can generate correct code for address reloads.
8008 It cannot manage only some LO_SUM cases. So we need to add
8009 code here saying that some addresses are still valid. */
8010 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8011 && small_toc_ref (x, VOIDmode));
8012 if (TARGET_TOC && ! large_toc_ok)
8013 return false;
8014 if (GET_MODE_NUNITS (mode) != 1)
8015 return false;
8016 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8017 && !(/* ??? Assume floating point reg based on mode? */
8018 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8019 return false;
8020
8021 return CONSTANT_P (x) || large_toc_ok;
8022 }
8023
8024 return false;
8025 }
8026
8027
8028 /* Try machine-dependent ways of modifying an illegitimate address
8029 to be legitimate. If we find one, return the new, valid address.
8030 This is used from only one place: `memory_address' in explow.c.
8031
8032 OLDX is the address as it was before break_out_memory_refs was
8033 called. In some cases it is useful to look at this to decide what
8034 needs to be done.
8035
8036 It is always safe for this function to do nothing. It exists to
8037 recognize opportunities to optimize the output.
8038
8039 On RS/6000, first check for the sum of a register with a constant
8040 integer that is out of range. If so, generate code to add the
8041 constant with the low-order 16 bits masked to the register and force
8042 this result into another register (this can be done with `cau').
8043 Then generate an address of REG+(CONST&0xffff), allowing for the
8044 possibility of bit 16 being a one.
8045
8046 Then check for the sum of a register and something not constant, try to
8047 load the other things into a register and return the sum. */
8048
8049 static rtx
8050 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8051 machine_mode mode)
8052 {
8053 unsigned int extra;
8054
8055 if (!reg_offset_addressing_ok_p (mode)
8056 || mode_supports_dq_form (mode))
8057 {
8058 if (virtual_stack_registers_memory_p (x))
8059 return x;
8060
8061 /* In theory we should not be seeing addresses of the form reg+0,
8062 but just in case it is generated, optimize it away. */
8063 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
8064 return force_reg (Pmode, XEXP (x, 0));
8065
8066 /* For TImode with load/store quad, restrict addresses to just a single
8067 pointer, so it works with both GPRs and VSX registers. */
8068 /* Make sure both operands are registers. */
8069 else if (GET_CODE (x) == PLUS
8070 && (mode != TImode || !TARGET_VSX))
8071 return gen_rtx_PLUS (Pmode,
8072 force_reg (Pmode, XEXP (x, 0)),
8073 force_reg (Pmode, XEXP (x, 1)));
8074 else
8075 return force_reg (Pmode, x);
8076 }
8077 if (SYMBOL_REF_P (x))
8078 {
8079 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8080 if (model != 0)
8081 return rs6000_legitimize_tls_address (x, model);
8082 }
8083
8084 extra = 0;
8085 switch (mode)
8086 {
8087 case E_TFmode:
8088 case E_TDmode:
8089 case E_TImode:
8090 case E_PTImode:
8091 case E_IFmode:
8092 case E_KFmode:
8093 /* As in legitimate_offset_address_p we do not assume
8094 worst-case. The mode here is just a hint as to the registers
8095 used. A TImode is usually in gprs, but may actually be in
8096 fprs. Leave worst-case scenario for reload to handle via
8097 insn constraints. PTImode is only GPRs. */
8098 extra = 8;
8099 break;
8100 default:
8101 break;
8102 }
8103
8104 if (GET_CODE (x) == PLUS
8105 && REG_P (XEXP (x, 0))
8106 && CONST_INT_P (XEXP (x, 1))
8107 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
8108 >= 0x10000 - extra))
8109 {
8110 HOST_WIDE_INT high_int, low_int;
8111 rtx sum;
8112 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8113 if (low_int >= 0x8000 - extra)
8114 low_int = 0;
8115 high_int = INTVAL (XEXP (x, 1)) - low_int;
8116 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
8117 GEN_INT (high_int)), 0);
8118 return plus_constant (Pmode, sum, low_int);
8119 }
8120 else if (GET_CODE (x) == PLUS
8121 && REG_P (XEXP (x, 0))
8122 && !CONST_INT_P (XEXP (x, 1))
8123 && GET_MODE_NUNITS (mode) == 1
8124 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8125 || (/* ??? Assume floating point reg based on mode? */
8126 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8127 && !avoiding_indexed_address_p (mode))
8128 {
8129 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
8130 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
8131 }
8132 else if ((TARGET_ELF
8133 #if TARGET_MACHO
8134 || !MACHO_DYNAMIC_NO_PIC_P
8135 #endif
8136 )
8137 && TARGET_32BIT
8138 && TARGET_NO_TOC_OR_PCREL
8139 && !flag_pic
8140 && !CONST_INT_P (x)
8141 && !CONST_WIDE_INT_P (x)
8142 && !CONST_DOUBLE_P (x)
8143 && CONSTANT_P (x)
8144 && GET_MODE_NUNITS (mode) == 1
8145 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8146 || (/* ??? Assume floating point reg based on mode? */
8147 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
8148 {
8149 rtx reg = gen_reg_rtx (Pmode);
8150 if (TARGET_ELF)
8151 emit_insn (gen_elf_high (reg, x));
8152 else
8153 emit_insn (gen_macho_high (Pmode, reg, x));
8154 return gen_rtx_LO_SUM (Pmode, reg, x);
8155 }
8156 else if (TARGET_TOC
8157 && SYMBOL_REF_P (x)
8158 && constant_pool_expr_p (x)
8159 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
8160 return create_TOC_reference (x, NULL_RTX);
8161 else
8162 return x;
8163 }
8164
8165 /* Debug version of rs6000_legitimize_address. */
8166 static rtx
8167 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
8168 {
8169 rtx ret;
8170 rtx_insn *insns;
8171
8172 start_sequence ();
8173 ret = rs6000_legitimize_address (x, oldx, mode);
8174 insns = get_insns ();
8175 end_sequence ();
8176
8177 if (ret != x)
8178 {
8179 fprintf (stderr,
8180 "\nrs6000_legitimize_address: mode %s, old code %s, "
8181 "new code %s, modified\n",
8182 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
8183 GET_RTX_NAME (GET_CODE (ret)));
8184
8185 fprintf (stderr, "Original address:\n");
8186 debug_rtx (x);
8187
8188 fprintf (stderr, "oldx:\n");
8189 debug_rtx (oldx);
8190
8191 fprintf (stderr, "New address:\n");
8192 debug_rtx (ret);
8193
8194 if (insns)
8195 {
8196 fprintf (stderr, "Insns added:\n");
8197 debug_rtx_list (insns, 20);
8198 }
8199 }
8200 else
8201 {
8202 fprintf (stderr,
8203 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8204 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
8205
8206 debug_rtx (x);
8207 }
8208
8209 if (insns)
8210 emit_insn (insns);
8211
8212 return ret;
8213 }
8214
8215 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8216 We need to emit DTP-relative relocations. */
8217
8218 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
8219 static void
8220 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
8221 {
8222 switch (size)
8223 {
8224 case 4:
8225 fputs ("\t.long\t", file);
8226 break;
8227 case 8:
8228 fputs (DOUBLE_INT_ASM_OP, file);
8229 break;
8230 default:
8231 gcc_unreachable ();
8232 }
8233 output_addr_const (file, x);
8234 if (TARGET_ELF)
8235 fputs ("@dtprel+0x8000", file);
8236 else if (TARGET_XCOFF && SYMBOL_REF_P (x))
8237 {
8238 switch (SYMBOL_REF_TLS_MODEL (x))
8239 {
8240 case 0:
8241 break;
8242 case TLS_MODEL_LOCAL_EXEC:
8243 fputs ("@le", file);
8244 break;
8245 case TLS_MODEL_INITIAL_EXEC:
8246 fputs ("@ie", file);
8247 break;
8248 case TLS_MODEL_GLOBAL_DYNAMIC:
8249 case TLS_MODEL_LOCAL_DYNAMIC:
8250 fputs ("@m", file);
8251 break;
8252 default:
8253 gcc_unreachable ();
8254 }
8255 }
8256 }
8257
8258 /* Return true if X is a symbol that refers to real (rather than emulated)
8259 TLS. */
8260
8261 static bool
8262 rs6000_real_tls_symbol_ref_p (rtx x)
8263 {
8264 return (SYMBOL_REF_P (x)
8265 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
8266 }
8267
8268 /* In the name of slightly smaller debug output, and to cater to
8269 general assembler lossage, recognize various UNSPEC sequences
8270 and turn them back into a direct symbol reference. */
8271
8272 static rtx
8273 rs6000_delegitimize_address (rtx orig_x)
8274 {
8275 rtx x, y, offset;
8276
8277 if (GET_CODE (orig_x) == UNSPEC && XINT (orig_x, 1) == UNSPEC_FUSION_GPR)
8278 orig_x = XVECEXP (orig_x, 0, 0);
8279
8280 orig_x = delegitimize_mem_from_attrs (orig_x);
8281
8282 x = orig_x;
8283 if (MEM_P (x))
8284 x = XEXP (x, 0);
8285
8286 y = x;
8287 if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM)
8288 y = XEXP (y, 1);
8289
8290 offset = NULL_RTX;
8291 if (GET_CODE (y) == PLUS
8292 && GET_MODE (y) == Pmode
8293 && CONST_INT_P (XEXP (y, 1)))
8294 {
8295 offset = XEXP (y, 1);
8296 y = XEXP (y, 0);
8297 }
8298
8299 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL)
8300 {
8301 y = XVECEXP (y, 0, 0);
8302
8303 #ifdef HAVE_AS_TLS
8304 /* Do not associate thread-local symbols with the original
8305 constant pool symbol. */
8306 if (TARGET_XCOFF
8307 && SYMBOL_REF_P (y)
8308 && CONSTANT_POOL_ADDRESS_P (y)
8309 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
8310 return orig_x;
8311 #endif
8312
8313 if (offset != NULL_RTX)
8314 y = gen_rtx_PLUS (Pmode, y, offset);
8315 if (!MEM_P (orig_x))
8316 return y;
8317 else
8318 return replace_equiv_address_nv (orig_x, y);
8319 }
8320
8321 if (TARGET_MACHO
8322 && GET_CODE (orig_x) == LO_SUM
8323 && GET_CODE (XEXP (orig_x, 1)) == CONST)
8324 {
8325 y = XEXP (XEXP (orig_x, 1), 0);
8326 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
8327 return XVECEXP (y, 0, 0);
8328 }
8329
8330 return orig_x;
8331 }
8332
8333 /* Return true if X shouldn't be emitted into the debug info.
8334 The linker doesn't like .toc section references from
8335 .debug_* sections, so reject .toc section symbols. */
8336
8337 static bool
8338 rs6000_const_not_ok_for_debug_p (rtx x)
8339 {
8340 if (GET_CODE (x) == UNSPEC)
8341 return true;
8342 if (SYMBOL_REF_P (x)
8343 && CONSTANT_POOL_ADDRESS_P (x))
8344 {
8345 rtx c = get_pool_constant (x);
8346 machine_mode cmode = get_pool_mode (x);
8347 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
8348 return true;
8349 }
8350
8351 return false;
8352 }
8353
8354 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
8355
8356 static bool
8357 rs6000_legitimate_combined_insn (rtx_insn *insn)
8358 {
8359 int icode = INSN_CODE (insn);
8360
8361 /* Reject creating doloop insns. Combine should not be allowed
8362 to create these for a number of reasons:
8363 1) In a nested loop, if combine creates one of these in an
8364 outer loop and the register allocator happens to allocate ctr
8365 to the outer loop insn, then the inner loop can't use ctr.
8366 Inner loops ought to be more highly optimized.
8367 2) Combine often wants to create one of these from what was
8368 originally a three insn sequence, first combining the three
8369 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
8370 allocated ctr, the splitter takes use back to the three insn
8371 sequence. It's better to stop combine at the two insn
8372 sequence.
8373 3) Faced with not being able to allocate ctr for ctrsi/crtdi
8374 insns, the register allocator sometimes uses floating point
8375 or vector registers for the pseudo. Since ctrsi/ctrdi is a
8376 jump insn and output reloads are not implemented for jumps,
8377 the ctrsi/ctrdi splitters need to handle all possible cases.
8378 That's a pain, and it gets to be seriously difficult when a
8379 splitter that runs after reload needs memory to transfer from
8380 a gpr to fpr. See PR70098 and PR71763 which are not fixed
8381 for the difficult case. It's better to not create problems
8382 in the first place. */
8383 if (icode != CODE_FOR_nothing
8384 && (icode == CODE_FOR_bdz_si
8385 || icode == CODE_FOR_bdz_di
8386 || icode == CODE_FOR_bdnz_si
8387 || icode == CODE_FOR_bdnz_di
8388 || icode == CODE_FOR_bdztf_si
8389 || icode == CODE_FOR_bdztf_di
8390 || icode == CODE_FOR_bdnztf_si
8391 || icode == CODE_FOR_bdnztf_di))
8392 return false;
8393
8394 return true;
8395 }
8396
8397 /* Construct the SYMBOL_REF for the tls_get_addr function. */
8398
8399 static GTY(()) rtx rs6000_tls_symbol;
8400 static rtx
8401 rs6000_tls_get_addr (void)
8402 {
8403 if (!rs6000_tls_symbol)
8404 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
8405
8406 return rs6000_tls_symbol;
8407 }
8408
8409 /* Construct the SYMBOL_REF for TLS GOT references. */
8410
8411 static GTY(()) rtx rs6000_got_symbol;
8412 rtx
8413 rs6000_got_sym (void)
8414 {
8415 if (!rs6000_got_symbol)
8416 {
8417 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8418 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
8419 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
8420 }
8421
8422 return rs6000_got_symbol;
8423 }
8424
8425 /* AIX Thread-Local Address support. */
8426
8427 static rtx
8428 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
8429 {
8430 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
8431 const char *name;
8432 char *tlsname;
8433
8434 name = XSTR (addr, 0);
8435 /* Append TLS CSECT qualifier, unless the symbol already is qualified
8436 or the symbol will be in TLS private data section. */
8437 if (name[strlen (name) - 1] != ']'
8438 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
8439 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
8440 {
8441 tlsname = XALLOCAVEC (char, strlen (name) + 4);
8442 strcpy (tlsname, name);
8443 strcat (tlsname,
8444 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
8445 tlsaddr = copy_rtx (addr);
8446 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
8447 }
8448 else
8449 tlsaddr = addr;
8450
8451 /* Place addr into TOC constant pool. */
8452 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
8453
8454 /* Output the TOC entry and create the MEM referencing the value. */
8455 if (constant_pool_expr_p (XEXP (sym, 0))
8456 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
8457 {
8458 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
8459 mem = gen_const_mem (Pmode, tocref);
8460 set_mem_alias_set (mem, get_TOC_alias_set ());
8461 }
8462 else
8463 return sym;
8464
8465 /* Use global-dynamic for local-dynamic. */
8466 if (model == TLS_MODEL_GLOBAL_DYNAMIC
8467 || model == TLS_MODEL_LOCAL_DYNAMIC)
8468 {
8469 /* Create new TOC reference for @m symbol. */
8470 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
8471 tlsname = XALLOCAVEC (char, strlen (name) + 1);
8472 strcpy (tlsname, "*LCM");
8473 strcat (tlsname, name + 3);
8474 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
8475 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
8476 tocref = create_TOC_reference (modaddr, NULL_RTX);
8477 rtx modmem = gen_const_mem (Pmode, tocref);
8478 set_mem_alias_set (modmem, get_TOC_alias_set ());
8479
8480 rtx modreg = gen_reg_rtx (Pmode);
8481 emit_insn (gen_rtx_SET (modreg, modmem));
8482
8483 tmpreg = gen_reg_rtx (Pmode);
8484 emit_insn (gen_rtx_SET (tmpreg, mem));
8485
8486 dest = gen_reg_rtx (Pmode);
8487 if (TARGET_32BIT)
8488 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
8489 else
8490 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
8491 return dest;
8492 }
8493 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
8494 else if (TARGET_32BIT)
8495 {
8496 tlsreg = gen_reg_rtx (SImode);
8497 emit_insn (gen_tls_get_tpointer (tlsreg));
8498 }
8499 else
8500 tlsreg = gen_rtx_REG (DImode, 13);
8501
8502 /* Load the TOC value into temporary register. */
8503 tmpreg = gen_reg_rtx (Pmode);
8504 emit_insn (gen_rtx_SET (tmpreg, mem));
8505 set_unique_reg_note (get_last_insn (), REG_EQUAL,
8506 gen_rtx_MINUS (Pmode, addr, tlsreg));
8507
8508 /* Add TOC symbol value to TLS pointer. */
8509 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
8510
8511 return dest;
8512 }
8513
8514 /* Passes the tls arg value for global dynamic and local dynamic
8515 emit_library_call_value in rs6000_legitimize_tls_address to
8516 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
8517 marker relocs put on __tls_get_addr calls. */
8518 static rtx global_tlsarg;
8519
8520 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
8521 this (thread-local) address. */
8522
8523 static rtx
8524 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
8525 {
8526 rtx dest, insn;
8527
8528 if (TARGET_XCOFF)
8529 return rs6000_legitimize_tls_address_aix (addr, model);
8530
8531 dest = gen_reg_rtx (Pmode);
8532 if (model == TLS_MODEL_LOCAL_EXEC
8533 && (rs6000_tls_size == 16 || rs6000_pcrel_p (cfun)))
8534 {
8535 rtx tlsreg;
8536
8537 if (TARGET_64BIT)
8538 {
8539 tlsreg = gen_rtx_REG (Pmode, 13);
8540 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
8541 }
8542 else
8543 {
8544 tlsreg = gen_rtx_REG (Pmode, 2);
8545 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
8546 }
8547 emit_insn (insn);
8548 }
8549 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
8550 {
8551 rtx tlsreg, tmp;
8552
8553 tmp = gen_reg_rtx (Pmode);
8554 if (TARGET_64BIT)
8555 {
8556 tlsreg = gen_rtx_REG (Pmode, 13);
8557 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
8558 }
8559 else
8560 {
8561 tlsreg = gen_rtx_REG (Pmode, 2);
8562 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
8563 }
8564 emit_insn (insn);
8565 if (TARGET_64BIT)
8566 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
8567 else
8568 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
8569 emit_insn (insn);
8570 }
8571 else
8572 {
8573 rtx got, tga, tmp1, tmp2;
8574
8575 /* We currently use relocations like @got@tlsgd for tls, which
8576 means the linker will handle allocation of tls entries, placing
8577 them in the .got section. So use a pointer to the .got section,
8578 not one to secondary TOC sections used by 64-bit -mminimal-toc,
8579 or to secondary GOT sections used by 32-bit -fPIC. */
8580 if (rs6000_pcrel_p (cfun))
8581 got = const0_rtx;
8582 else if (TARGET_64BIT)
8583 got = gen_rtx_REG (Pmode, 2);
8584 else
8585 {
8586 if (flag_pic == 1)
8587 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
8588 else
8589 {
8590 rtx gsym = rs6000_got_sym ();
8591 got = gen_reg_rtx (Pmode);
8592 if (flag_pic == 0)
8593 rs6000_emit_move (got, gsym, Pmode);
8594 else
8595 {
8596 rtx mem, lab;
8597
8598 tmp1 = gen_reg_rtx (Pmode);
8599 tmp2 = gen_reg_rtx (Pmode);
8600 mem = gen_const_mem (Pmode, tmp1);
8601 lab = gen_label_rtx ();
8602 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
8603 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
8604 if (TARGET_LINK_STACK)
8605 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
8606 emit_move_insn (tmp2, mem);
8607 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
8608 set_unique_reg_note (last, REG_EQUAL, gsym);
8609 }
8610 }
8611 }
8612
8613 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
8614 {
8615 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
8616 UNSPEC_TLSGD);
8617 tga = rs6000_tls_get_addr ();
8618 rtx argreg = gen_rtx_REG (Pmode, 3);
8619 emit_insn (gen_rtx_SET (argreg, arg));
8620 global_tlsarg = arg;
8621 emit_library_call_value (tga, dest, LCT_CONST, Pmode, argreg, Pmode);
8622 global_tlsarg = NULL_RTX;
8623
8624 /* Make a note so that the result of this call can be CSEd. */
8625 rtvec vec = gen_rtvec (1, copy_rtx (arg));
8626 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
8627 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
8628 }
8629 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
8630 {
8631 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
8632 tga = rs6000_tls_get_addr ();
8633 tmp1 = gen_reg_rtx (Pmode);
8634 rtx argreg = gen_rtx_REG (Pmode, 3);
8635 emit_insn (gen_rtx_SET (argreg, arg));
8636 global_tlsarg = arg;
8637 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode, argreg, Pmode);
8638 global_tlsarg = NULL_RTX;
8639
8640 /* Make a note so that the result of this call can be CSEd. */
8641 rtvec vec = gen_rtvec (1, copy_rtx (arg));
8642 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
8643 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
8644
8645 if (rs6000_tls_size == 16 || rs6000_pcrel_p (cfun))
8646 {
8647 if (TARGET_64BIT)
8648 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
8649 else
8650 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
8651 }
8652 else if (rs6000_tls_size == 32)
8653 {
8654 tmp2 = gen_reg_rtx (Pmode);
8655 if (TARGET_64BIT)
8656 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
8657 else
8658 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
8659 emit_insn (insn);
8660 if (TARGET_64BIT)
8661 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
8662 else
8663 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
8664 }
8665 else
8666 {
8667 tmp2 = gen_reg_rtx (Pmode);
8668 if (TARGET_64BIT)
8669 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
8670 else
8671 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
8672 emit_insn (insn);
8673 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
8674 }
8675 emit_insn (insn);
8676 }
8677 else
8678 {
8679 /* IE, or 64-bit offset LE. */
8680 tmp2 = gen_reg_rtx (Pmode);
8681 if (TARGET_64BIT)
8682 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
8683 else
8684 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
8685 emit_insn (insn);
8686 if (rs6000_pcrel_p (cfun))
8687 {
8688 if (TARGET_64BIT)
8689 insn = gen_tls_tls_pcrel_64 (dest, tmp2, addr);
8690 else
8691 insn = gen_tls_tls_pcrel_32 (dest, tmp2, addr);
8692 }
8693 else if (TARGET_64BIT)
8694 insn = gen_tls_tls_64 (dest, tmp2, addr);
8695 else
8696 insn = gen_tls_tls_32 (dest, tmp2, addr);
8697 emit_insn (insn);
8698 }
8699 }
8700
8701 return dest;
8702 }
8703
8704 /* Only create the global variable for the stack protect guard if we are using
8705 the global flavor of that guard. */
8706 static tree
8707 rs6000_init_stack_protect_guard (void)
8708 {
8709 if (rs6000_stack_protector_guard == SSP_GLOBAL)
8710 return default_stack_protect_guard ();
8711
8712 return NULL_TREE;
8713 }
8714
8715 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8716
8717 static bool
8718 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8719 {
8720 if (GET_CODE (x) == HIGH
8721 && GET_CODE (XEXP (x, 0)) == UNSPEC)
8722 return true;
8723
8724 /* A TLS symbol in the TOC cannot contain a sum. */
8725 if (GET_CODE (x) == CONST
8726 && GET_CODE (XEXP (x, 0)) == PLUS
8727 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
8728 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
8729 return true;
8730
8731 /* Do not place an ELF TLS symbol in the constant pool. */
8732 return TARGET_ELF && tls_referenced_p (x);
8733 }
8734
8735 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
8736 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
8737 can be addressed relative to the toc pointer. */
8738
8739 static bool
8740 use_toc_relative_ref (rtx sym, machine_mode mode)
8741 {
8742 return ((constant_pool_expr_p (sym)
8743 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
8744 get_pool_mode (sym)))
8745 || (TARGET_CMODEL == CMODEL_MEDIUM
8746 && SYMBOL_REF_LOCAL_P (sym)
8747 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
8748 }
8749
8750 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
8751 that is a valid memory address for an instruction.
8752 The MODE argument is the machine mode for the MEM expression
8753 that wants to use this address.
8754
8755 On the RS/6000, there are four valid address: a SYMBOL_REF that
8756 refers to a constant pool entry of an address (or the sum of it
8757 plus a constant), a short (16-bit signed) constant plus a register,
8758 the sum of two registers, or a register indirect, possibly with an
8759 auto-increment. For DFmode, DDmode and DImode with a constant plus
8760 register, we must ensure that both words are addressable or PowerPC64
8761 with offset word aligned.
8762
8763 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
8764 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
8765 because adjacent memory cells are accessed by adding word-sized offsets
8766 during assembly output. */
8767 static bool
8768 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
8769 {
8770 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
8771 bool quad_offset_p = mode_supports_dq_form (mode);
8772
8773 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
8774 if (VECTOR_MEM_ALTIVEC_P (mode)
8775 && GET_CODE (x) == AND
8776 && CONST_INT_P (XEXP (x, 1))
8777 && INTVAL (XEXP (x, 1)) == -16)
8778 x = XEXP (x, 0);
8779
8780 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
8781 return 0;
8782 if (legitimate_indirect_address_p (x, reg_ok_strict))
8783 return 1;
8784 if (TARGET_UPDATE
8785 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
8786 && mode_supports_pre_incdec_p (mode)
8787 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
8788 return 1;
8789
8790 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
8791 if (address_is_prefixed (x, mode, NON_PREFIXED_DEFAULT))
8792 return 1;
8793
8794 /* Handle restricted vector d-form offsets in ISA 3.0. */
8795 if (quad_offset_p)
8796 {
8797 if (quad_address_p (x, mode, reg_ok_strict))
8798 return 1;
8799 }
8800 else if (virtual_stack_registers_memory_p (x))
8801 return 1;
8802
8803 else if (reg_offset_p)
8804 {
8805 if (legitimate_small_data_p (mode, x))
8806 return 1;
8807 if (legitimate_constant_pool_address_p (x, mode,
8808 reg_ok_strict || lra_in_progress))
8809 return 1;
8810 }
8811
8812 /* For TImode, if we have TImode in VSX registers, only allow register
8813 indirect addresses. This will allow the values to go in either GPRs
8814 or VSX registers without reloading. The vector types would tend to
8815 go into VSX registers, so we allow REG+REG, while TImode seems
8816 somewhat split, in that some uses are GPR based, and some VSX based. */
8817 /* FIXME: We could loosen this by changing the following to
8818 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
8819 but currently we cannot allow REG+REG addressing for TImode. See
8820 PR72827 for complete details on how this ends up hoodwinking DSE. */
8821 if (mode == TImode && TARGET_VSX)
8822 return 0;
8823 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
8824 if (! reg_ok_strict
8825 && reg_offset_p
8826 && GET_CODE (x) == PLUS
8827 && REG_P (XEXP (x, 0))
8828 && (XEXP (x, 0) == virtual_stack_vars_rtx
8829 || XEXP (x, 0) == arg_pointer_rtx)
8830 && CONST_INT_P (XEXP (x, 1)))
8831 return 1;
8832 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
8833 return 1;
8834 if (!FLOAT128_2REG_P (mode)
8835 && (TARGET_HARD_FLOAT
8836 || TARGET_POWERPC64
8837 || (mode != DFmode && mode != DDmode))
8838 && (TARGET_POWERPC64 || mode != DImode)
8839 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
8840 && mode != PTImode
8841 && !avoiding_indexed_address_p (mode)
8842 && legitimate_indexed_address_p (x, reg_ok_strict))
8843 return 1;
8844 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
8845 && mode_supports_pre_modify_p (mode)
8846 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
8847 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
8848 reg_ok_strict, false)
8849 || (!avoiding_indexed_address_p (mode)
8850 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
8851 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8852 {
8853 /* There is no prefixed version of the load/store with update. */
8854 rtx addr = XEXP (x, 1);
8855 return !address_is_prefixed (addr, mode, NON_PREFIXED_DEFAULT);
8856 }
8857 if (reg_offset_p && !quad_offset_p
8858 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
8859 return 1;
8860 return 0;
8861 }
8862
8863 /* Debug version of rs6000_legitimate_address_p. */
8864 static bool
8865 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
8866 bool reg_ok_strict)
8867 {
8868 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
8869 fprintf (stderr,
8870 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
8871 "strict = %d, reload = %s, code = %s\n",
8872 ret ? "true" : "false",
8873 GET_MODE_NAME (mode),
8874 reg_ok_strict,
8875 (reload_completed ? "after" : "before"),
8876 GET_RTX_NAME (GET_CODE (x)));
8877 debug_rtx (x);
8878
8879 return ret;
8880 }
8881
8882 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
8883
8884 static bool
8885 rs6000_mode_dependent_address_p (const_rtx addr,
8886 addr_space_t as ATTRIBUTE_UNUSED)
8887 {
8888 return rs6000_mode_dependent_address_ptr (addr);
8889 }
8890
8891 /* Go to LABEL if ADDR (a legitimate address expression)
8892 has an effect that depends on the machine mode it is used for.
8893
8894 On the RS/6000 this is true of all integral offsets (since AltiVec
8895 and VSX modes don't allow them) or is a pre-increment or decrement.
8896
8897 ??? Except that due to conceptual problems in offsettable_address_p
8898 we can't really report the problems of integral offsets. So leave
8899 this assuming that the adjustable offset must be valid for the
8900 sub-words of a TFmode operand, which is what we had before. */
8901
8902 static bool
8903 rs6000_mode_dependent_address (const_rtx addr)
8904 {
8905 switch (GET_CODE (addr))
8906 {
8907 case PLUS:
8908 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
8909 is considered a legitimate address before reload, so there
8910 are no offset restrictions in that case. Note that this
8911 condition is safe in strict mode because any address involving
8912 virtual_stack_vars_rtx or arg_pointer_rtx would already have
8913 been rejected as illegitimate. */
8914 if (XEXP (addr, 0) != virtual_stack_vars_rtx
8915 && XEXP (addr, 0) != arg_pointer_rtx
8916 && CONST_INT_P (XEXP (addr, 1)))
8917 {
8918 HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
8919 HOST_WIDE_INT extra = TARGET_POWERPC64 ? 8 : 12;
8920 if (TARGET_PREFIXED_ADDR)
8921 return !SIGNED_34BIT_OFFSET_EXTRA_P (val, extra);
8922 else
8923 return !SIGNED_16BIT_OFFSET_EXTRA_P (val, extra);
8924 }
8925 break;
8926
8927 case LO_SUM:
8928 /* Anything in the constant pool is sufficiently aligned that
8929 all bytes have the same high part address. */
8930 return !legitimate_constant_pool_address_p (addr, QImode, false);
8931
8932 /* Auto-increment cases are now treated generically in recog.c. */
8933 case PRE_MODIFY:
8934 return TARGET_UPDATE;
8935
8936 /* AND is only allowed in Altivec loads. */
8937 case AND:
8938 return true;
8939
8940 default:
8941 break;
8942 }
8943
8944 return false;
8945 }
8946
8947 /* Debug version of rs6000_mode_dependent_address. */
8948 static bool
8949 rs6000_debug_mode_dependent_address (const_rtx addr)
8950 {
8951 bool ret = rs6000_mode_dependent_address (addr);
8952
8953 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
8954 ret ? "true" : "false");
8955 debug_rtx (addr);
8956
8957 return ret;
8958 }
8959
8960 /* Implement FIND_BASE_TERM. */
8961
8962 rtx
8963 rs6000_find_base_term (rtx op)
8964 {
8965 rtx base;
8966
8967 base = op;
8968 if (GET_CODE (base) == CONST)
8969 base = XEXP (base, 0);
8970 if (GET_CODE (base) == PLUS)
8971 base = XEXP (base, 0);
8972 if (GET_CODE (base) == UNSPEC)
8973 switch (XINT (base, 1))
8974 {
8975 case UNSPEC_TOCREL:
8976 case UNSPEC_MACHOPIC_OFFSET:
8977 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
8978 for aliasing purposes. */
8979 return XVECEXP (base, 0, 0);
8980 }
8981
8982 return op;
8983 }
8984
8985 /* More elaborate version of recog's offsettable_memref_p predicate
8986 that works around the ??? note of rs6000_mode_dependent_address.
8987 In particular it accepts
8988
8989 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
8990
8991 in 32-bit mode, that the recog predicate rejects. */
8992
8993 static bool
8994 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
8995 {
8996 bool worst_case;
8997
8998 if (!MEM_P (op))
8999 return false;
9000
9001 /* First mimic offsettable_memref_p. */
9002 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
9003 return true;
9004
9005 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
9006 the latter predicate knows nothing about the mode of the memory
9007 reference and, therefore, assumes that it is the largest supported
9008 mode (TFmode). As a consequence, legitimate offsettable memory
9009 references are rejected. rs6000_legitimate_offset_address_p contains
9010 the correct logic for the PLUS case of rs6000_mode_dependent_address,
9011 at least with a little bit of help here given that we know the
9012 actual registers used. */
9013 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
9014 || GET_MODE_SIZE (reg_mode) == 4);
9015 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
9016 strict, worst_case);
9017 }
9018
9019 /* Determine the reassociation width to be used in reassociate_bb.
9020 This takes into account how many parallel operations we
9021 can actually do of a given type, and also the latency.
9022 P8:
9023 int add/sub 6/cycle
9024 mul 2/cycle
9025 vect add/sub/mul 2/cycle
9026 fp add/sub/mul 2/cycle
9027 dfp 1/cycle
9028 */
9029
9030 static int
9031 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
9032 machine_mode mode)
9033 {
9034 switch (rs6000_tune)
9035 {
9036 case PROCESSOR_POWER8:
9037 case PROCESSOR_POWER9:
9038 case PROCESSOR_FUTURE:
9039 if (DECIMAL_FLOAT_MODE_P (mode))
9040 return 1;
9041 if (VECTOR_MODE_P (mode))
9042 return 4;
9043 if (INTEGRAL_MODE_P (mode))
9044 return 1;
9045 if (FLOAT_MODE_P (mode))
9046 return 4;
9047 break;
9048 default:
9049 break;
9050 }
9051 return 1;
9052 }
9053
9054 /* Change register usage conditional on target flags. */
9055 static void
9056 rs6000_conditional_register_usage (void)
9057 {
9058 int i;
9059
9060 if (TARGET_DEBUG_TARGET)
9061 fprintf (stderr, "rs6000_conditional_register_usage called\n");
9062
9063 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
9064 if (TARGET_64BIT)
9065 fixed_regs[13] = call_used_regs[13] = 1;
9066
9067 /* Conditionally disable FPRs. */
9068 if (TARGET_SOFT_FLOAT)
9069 for (i = 32; i < 64; i++)
9070 fixed_regs[i] = call_used_regs[i] = 1;
9071
9072 /* The TOC register is not killed across calls in a way that is
9073 visible to the compiler. */
9074 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9075 call_used_regs[2] = 0;
9076
9077 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
9078 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9079
9080 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
9081 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9082 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9083
9084 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
9085 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9086 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9087
9088 if (TARGET_TOC && TARGET_MINIMAL_TOC)
9089 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9090
9091 if (!TARGET_ALTIVEC && !TARGET_VSX)
9092 {
9093 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
9094 fixed_regs[i] = call_used_regs[i] = 1;
9095 call_used_regs[VRSAVE_REGNO] = 1;
9096 }
9097
9098 if (TARGET_ALTIVEC || TARGET_VSX)
9099 global_regs[VSCR_REGNO] = 1;
9100
9101 if (TARGET_ALTIVEC_ABI)
9102 {
9103 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
9104 call_used_regs[i] = 1;
9105
9106 /* AIX reserves VR20:31 in non-extended ABI mode. */
9107 if (TARGET_XCOFF)
9108 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
9109 fixed_regs[i] = call_used_regs[i] = 1;
9110 }
9111 }
9112
9113 \f
9114 /* Output insns to set DEST equal to the constant SOURCE as a series of
9115 lis, ori and shl instructions and return TRUE. */
9116
9117 bool
9118 rs6000_emit_set_const (rtx dest, rtx source)
9119 {
9120 machine_mode mode = GET_MODE (dest);
9121 rtx temp, set;
9122 rtx_insn *insn;
9123 HOST_WIDE_INT c;
9124
9125 gcc_checking_assert (CONST_INT_P (source));
9126 c = INTVAL (source);
9127 switch (mode)
9128 {
9129 case E_QImode:
9130 case E_HImode:
9131 emit_insn (gen_rtx_SET (dest, source));
9132 return true;
9133
9134 case E_SImode:
9135 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
9136
9137 emit_insn (gen_rtx_SET (copy_rtx (temp),
9138 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
9139 emit_insn (gen_rtx_SET (dest,
9140 gen_rtx_IOR (SImode, copy_rtx (temp),
9141 GEN_INT (c & 0xffff))));
9142 break;
9143
9144 case E_DImode:
9145 if (!TARGET_POWERPC64)
9146 {
9147 rtx hi, lo;
9148
9149 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
9150 DImode);
9151 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
9152 DImode);
9153 emit_move_insn (hi, GEN_INT (c >> 32));
9154 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
9155 emit_move_insn (lo, GEN_INT (c));
9156 }
9157 else
9158 rs6000_emit_set_long_const (dest, c);
9159 break;
9160
9161 default:
9162 gcc_unreachable ();
9163 }
9164
9165 insn = get_last_insn ();
9166 set = single_set (insn);
9167 if (! CONSTANT_P (SET_SRC (set)))
9168 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
9169
9170 return true;
9171 }
9172
9173 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
9174 Output insns to set DEST equal to the constant C as a series of
9175 lis, ori and shl instructions. */
9176
9177 static void
9178 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
9179 {
9180 rtx temp;
9181 HOST_WIDE_INT ud1, ud2, ud3, ud4;
9182
9183 ud1 = c & 0xffff;
9184 c = c >> 16;
9185 ud2 = c & 0xffff;
9186 c = c >> 16;
9187 ud3 = c & 0xffff;
9188 c = c >> 16;
9189 ud4 = c & 0xffff;
9190
9191 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
9192 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
9193 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
9194
9195 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
9196 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
9197 {
9198 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9199
9200 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9201 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9202 if (ud1 != 0)
9203 emit_move_insn (dest,
9204 gen_rtx_IOR (DImode, copy_rtx (temp),
9205 GEN_INT (ud1)));
9206 }
9207 else if (ud3 == 0 && ud4 == 0)
9208 {
9209 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9210
9211 gcc_assert (ud2 & 0x8000);
9212 emit_move_insn (copy_rtx (temp),
9213 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9214 if (ud1 != 0)
9215 emit_move_insn (copy_rtx (temp),
9216 gen_rtx_IOR (DImode, copy_rtx (temp),
9217 GEN_INT (ud1)));
9218 emit_move_insn (dest,
9219 gen_rtx_ZERO_EXTEND (DImode,
9220 gen_lowpart (SImode,
9221 copy_rtx (temp))));
9222 }
9223 else if ((ud4 == 0xffff && (ud3 & 0x8000))
9224 || (ud4 == 0 && ! (ud3 & 0x8000)))
9225 {
9226 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9227
9228 emit_move_insn (copy_rtx (temp),
9229 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
9230 if (ud2 != 0)
9231 emit_move_insn (copy_rtx (temp),
9232 gen_rtx_IOR (DImode, copy_rtx (temp),
9233 GEN_INT (ud2)));
9234 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9235 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9236 GEN_INT (16)));
9237 if (ud1 != 0)
9238 emit_move_insn (dest,
9239 gen_rtx_IOR (DImode, copy_rtx (temp),
9240 GEN_INT (ud1)));
9241 }
9242 else
9243 {
9244 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9245
9246 emit_move_insn (copy_rtx (temp),
9247 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
9248 if (ud3 != 0)
9249 emit_move_insn (copy_rtx (temp),
9250 gen_rtx_IOR (DImode, copy_rtx (temp),
9251 GEN_INT (ud3)));
9252
9253 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
9254 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9255 GEN_INT (32)));
9256 if (ud2 != 0)
9257 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9258 gen_rtx_IOR (DImode, copy_rtx (temp),
9259 GEN_INT (ud2 << 16)));
9260 if (ud1 != 0)
9261 emit_move_insn (dest,
9262 gen_rtx_IOR (DImode, copy_rtx (temp),
9263 GEN_INT (ud1)));
9264 }
9265 }
9266
9267 /* Helper for the following. Get rid of [r+r] memory refs
9268 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
9269
9270 static void
9271 rs6000_eliminate_indexed_memrefs (rtx operands[2])
9272 {
9273 if (MEM_P (operands[0])
9274 && !REG_P (XEXP (operands[0], 0))
9275 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
9276 GET_MODE (operands[0]), false))
9277 operands[0]
9278 = replace_equiv_address (operands[0],
9279 copy_addr_to_reg (XEXP (operands[0], 0)));
9280
9281 if (MEM_P (operands[1])
9282 && !REG_P (XEXP (operands[1], 0))
9283 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
9284 GET_MODE (operands[1]), false))
9285 operands[1]
9286 = replace_equiv_address (operands[1],
9287 copy_addr_to_reg (XEXP (operands[1], 0)));
9288 }
9289
9290 /* Generate a vector of constants to permute MODE for a little-endian
9291 storage operation by swapping the two halves of a vector. */
9292 static rtvec
9293 rs6000_const_vec (machine_mode mode)
9294 {
9295 int i, subparts;
9296 rtvec v;
9297
9298 switch (mode)
9299 {
9300 case E_V1TImode:
9301 subparts = 1;
9302 break;
9303 case E_V2DFmode:
9304 case E_V2DImode:
9305 subparts = 2;
9306 break;
9307 case E_V4SFmode:
9308 case E_V4SImode:
9309 subparts = 4;
9310 break;
9311 case E_V8HImode:
9312 subparts = 8;
9313 break;
9314 case E_V16QImode:
9315 subparts = 16;
9316 break;
9317 default:
9318 gcc_unreachable();
9319 }
9320
9321 v = rtvec_alloc (subparts);
9322
9323 for (i = 0; i < subparts / 2; ++i)
9324 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
9325 for (i = subparts / 2; i < subparts; ++i)
9326 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
9327
9328 return v;
9329 }
9330
9331 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
9332 store operation. */
9333 void
9334 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
9335 {
9336 /* Scalar permutations are easier to express in integer modes rather than
9337 floating-point modes, so cast them here. We use V1TImode instead
9338 of TImode to ensure that the values don't go through GPRs. */
9339 if (FLOAT128_VECTOR_P (mode))
9340 {
9341 dest = gen_lowpart (V1TImode, dest);
9342 source = gen_lowpart (V1TImode, source);
9343 mode = V1TImode;
9344 }
9345
9346 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
9347 scalar. */
9348 if (mode == TImode || mode == V1TImode)
9349 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
9350 GEN_INT (64))));
9351 else
9352 {
9353 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
9354 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
9355 }
9356 }
9357
9358 /* Emit a little-endian load from vector memory location SOURCE to VSX
9359 register DEST in mode MODE. The load is done with two permuting
9360 insn's that represent an lxvd2x and xxpermdi. */
9361 void
9362 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
9363 {
9364 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
9365 V1TImode). */
9366 if (mode == TImode || mode == V1TImode)
9367 {
9368 mode = V2DImode;
9369 dest = gen_lowpart (V2DImode, dest);
9370 source = adjust_address (source, V2DImode, 0);
9371 }
9372
9373 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
9374 rs6000_emit_le_vsx_permute (tmp, source, mode);
9375 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9376 }
9377
9378 /* Emit a little-endian store to vector memory location DEST from VSX
9379 register SOURCE in mode MODE. The store is done with two permuting
9380 insn's that represent an xxpermdi and an stxvd2x. */
9381 void
9382 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
9383 {
9384 /* This should never be called during or after LRA, because it does
9385 not re-permute the source register. It is intended only for use
9386 during expand. */
9387 gcc_assert (!lra_in_progress && !reload_completed);
9388
9389 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
9390 V1TImode). */
9391 if (mode == TImode || mode == V1TImode)
9392 {
9393 mode = V2DImode;
9394 dest = adjust_address (dest, V2DImode, 0);
9395 source = gen_lowpart (V2DImode, source);
9396 }
9397
9398 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
9399 rs6000_emit_le_vsx_permute (tmp, source, mode);
9400 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9401 }
9402
9403 /* Emit a sequence representing a little-endian VSX load or store,
9404 moving data from SOURCE to DEST in mode MODE. This is done
9405 separately from rs6000_emit_move to ensure it is called only
9406 during expand. LE VSX loads and stores introduced later are
9407 handled with a split. The expand-time RTL generation allows
9408 us to optimize away redundant pairs of register-permutes. */
9409 void
9410 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
9411 {
9412 gcc_assert (!BYTES_BIG_ENDIAN
9413 && VECTOR_MEM_VSX_P (mode)
9414 && !TARGET_P9_VECTOR
9415 && !gpr_or_gpr_p (dest, source)
9416 && (MEM_P (source) ^ MEM_P (dest)));
9417
9418 if (MEM_P (source))
9419 {
9420 gcc_assert (REG_P (dest) || SUBREG_P (dest));
9421 rs6000_emit_le_vsx_load (dest, source, mode);
9422 }
9423 else
9424 {
9425 if (!REG_P (source))
9426 source = force_reg (mode, source);
9427 rs6000_emit_le_vsx_store (dest, source, mode);
9428 }
9429 }
9430
9431 /* Return whether a SFmode or SImode move can be done without converting one
9432 mode to another. This arrises when we have:
9433
9434 (SUBREG:SF (REG:SI ...))
9435 (SUBREG:SI (REG:SF ...))
9436
9437 and one of the values is in a floating point/vector register, where SFmode
9438 scalars are stored in DFmode format. */
9439
9440 bool
9441 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
9442 {
9443 if (TARGET_ALLOW_SF_SUBREG)
9444 return true;
9445
9446 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
9447 return true;
9448
9449 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
9450 return true;
9451
9452 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
9453 if (SUBREG_P (dest))
9454 {
9455 rtx dest_subreg = SUBREG_REG (dest);
9456 rtx src_subreg = SUBREG_REG (src);
9457 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
9458 }
9459
9460 return false;
9461 }
9462
9463
9464 /* Helper function to change moves with:
9465
9466 (SUBREG:SF (REG:SI)) and
9467 (SUBREG:SI (REG:SF))
9468
9469 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
9470 values are stored as DFmode values in the VSX registers. We need to convert
9471 the bits before we can use a direct move or operate on the bits in the
9472 vector register as an integer type.
9473
9474 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
9475
9476 static bool
9477 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
9478 {
9479 if (TARGET_DIRECT_MOVE_64BIT && !reload_completed
9480 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
9481 && SUBREG_P (source) && sf_subreg_operand (source, mode))
9482 {
9483 rtx inner_source = SUBREG_REG (source);
9484 machine_mode inner_mode = GET_MODE (inner_source);
9485
9486 if (mode == SImode && inner_mode == SFmode)
9487 {
9488 emit_insn (gen_movsi_from_sf (dest, inner_source));
9489 return true;
9490 }
9491
9492 if (mode == SFmode && inner_mode == SImode)
9493 {
9494 emit_insn (gen_movsf_from_si (dest, inner_source));
9495 return true;
9496 }
9497 }
9498
9499 return false;
9500 }
9501
9502 /* Emit a move from SOURCE to DEST in mode MODE. */
9503 void
9504 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
9505 {
9506 rtx operands[2];
9507 operands[0] = dest;
9508 operands[1] = source;
9509
9510 if (TARGET_DEBUG_ADDR)
9511 {
9512 fprintf (stderr,
9513 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
9514 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
9515 GET_MODE_NAME (mode),
9516 lra_in_progress,
9517 reload_completed,
9518 can_create_pseudo_p ());
9519 debug_rtx (dest);
9520 fprintf (stderr, "source:\n");
9521 debug_rtx (source);
9522 }
9523
9524 /* Check that we get CONST_WIDE_INT only when we should. */
9525 if (CONST_WIDE_INT_P (operands[1])
9526 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
9527 gcc_unreachable ();
9528
9529 #ifdef HAVE_AS_GNU_ATTRIBUTE
9530 /* If we use a long double type, set the flags in .gnu_attribute that say
9531 what the long double type is. This is to allow the linker's warning
9532 message for the wrong long double to be useful, even if the function does
9533 not do a call (for example, doing a 128-bit add on power9 if the long
9534 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
9535 used if they aren't the default long dobule type. */
9536 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
9537 {
9538 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
9539 rs6000_passes_float = rs6000_passes_long_double = true;
9540
9541 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
9542 rs6000_passes_float = rs6000_passes_long_double = true;
9543 }
9544 #endif
9545
9546 /* See if we need to special case SImode/SFmode SUBREG moves. */
9547 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
9548 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
9549 return;
9550
9551 /* Check if GCC is setting up a block move that will end up using FP
9552 registers as temporaries. We must make sure this is acceptable. */
9553 if (MEM_P (operands[0])
9554 && MEM_P (operands[1])
9555 && mode == DImode
9556 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
9557 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
9558 && ! (rs6000_slow_unaligned_access (SImode,
9559 (MEM_ALIGN (operands[0]) > 32
9560 ? 32 : MEM_ALIGN (operands[0])))
9561 || rs6000_slow_unaligned_access (SImode,
9562 (MEM_ALIGN (operands[1]) > 32
9563 ? 32 : MEM_ALIGN (operands[1]))))
9564 && ! MEM_VOLATILE_P (operands [0])
9565 && ! MEM_VOLATILE_P (operands [1]))
9566 {
9567 emit_move_insn (adjust_address (operands[0], SImode, 0),
9568 adjust_address (operands[1], SImode, 0));
9569 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
9570 adjust_address (copy_rtx (operands[1]), SImode, 4));
9571 return;
9572 }
9573
9574 if (can_create_pseudo_p () && MEM_P (operands[0])
9575 && !gpc_reg_operand (operands[1], mode))
9576 operands[1] = force_reg (mode, operands[1]);
9577
9578 /* Recognize the case where operand[1] is a reference to thread-local
9579 data and load its address to a register. */
9580 if (tls_referenced_p (operands[1]))
9581 {
9582 enum tls_model model;
9583 rtx tmp = operands[1];
9584 rtx addend = NULL;
9585
9586 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
9587 {
9588 addend = XEXP (XEXP (tmp, 0), 1);
9589 tmp = XEXP (XEXP (tmp, 0), 0);
9590 }
9591
9592 gcc_assert (SYMBOL_REF_P (tmp));
9593 model = SYMBOL_REF_TLS_MODEL (tmp);
9594 gcc_assert (model != 0);
9595
9596 tmp = rs6000_legitimize_tls_address (tmp, model);
9597 if (addend)
9598 {
9599 tmp = gen_rtx_PLUS (mode, tmp, addend);
9600 tmp = force_operand (tmp, operands[0]);
9601 }
9602 operands[1] = tmp;
9603 }
9604
9605 /* 128-bit constant floating-point values on Darwin should really be loaded
9606 as two parts. However, this premature splitting is a problem when DFmode
9607 values can go into Altivec registers. */
9608 if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode)
9609 && !reg_addr[DFmode].scalar_in_vmx_p)
9610 {
9611 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
9612 simplify_gen_subreg (DFmode, operands[1], mode, 0),
9613 DFmode);
9614 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
9615 GET_MODE_SIZE (DFmode)),
9616 simplify_gen_subreg (DFmode, operands[1], mode,
9617 GET_MODE_SIZE (DFmode)),
9618 DFmode);
9619 return;
9620 }
9621
9622 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
9623 p1:SD) if p1 is not of floating point class and p0 is spilled as
9624 we can have no analogous movsd_store for this. */
9625 if (lra_in_progress && mode == DDmode
9626 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
9627 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9628 && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))
9629 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
9630 {
9631 enum reg_class cl;
9632 int regno = REGNO (SUBREG_REG (operands[1]));
9633
9634 if (!HARD_REGISTER_NUM_P (regno))
9635 {
9636 cl = reg_preferred_class (regno);
9637 regno = reg_renumber[regno];
9638 if (regno < 0)
9639 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
9640 }
9641 if (regno >= 0 && ! FP_REGNO_P (regno))
9642 {
9643 mode = SDmode;
9644 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
9645 operands[1] = SUBREG_REG (operands[1]);
9646 }
9647 }
9648 if (lra_in_progress
9649 && mode == SDmode
9650 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
9651 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9652 && (REG_P (operands[1])
9653 || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])))))
9654 {
9655 int regno = reg_or_subregno (operands[1]);
9656 enum reg_class cl;
9657
9658 if (!HARD_REGISTER_NUM_P (regno))
9659 {
9660 cl = reg_preferred_class (regno);
9661 gcc_assert (cl != NO_REGS);
9662 regno = reg_renumber[regno];
9663 if (regno < 0)
9664 regno = ira_class_hard_regs[cl][0];
9665 }
9666 if (FP_REGNO_P (regno))
9667 {
9668 if (GET_MODE (operands[0]) != DDmode)
9669 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
9670 emit_insn (gen_movsd_store (operands[0], operands[1]));
9671 }
9672 else if (INT_REGNO_P (regno))
9673 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9674 else
9675 gcc_unreachable();
9676 return;
9677 }
9678 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
9679 p:DD)) if p0 is not of floating point class and p1 is spilled as
9680 we can have no analogous movsd_load for this. */
9681 if (lra_in_progress && mode == DDmode
9682 && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))
9683 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
9684 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
9685 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9686 {
9687 enum reg_class cl;
9688 int regno = REGNO (SUBREG_REG (operands[0]));
9689
9690 if (!HARD_REGISTER_NUM_P (regno))
9691 {
9692 cl = reg_preferred_class (regno);
9693 regno = reg_renumber[regno];
9694 if (regno < 0)
9695 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
9696 }
9697 if (regno >= 0 && ! FP_REGNO_P (regno))
9698 {
9699 mode = SDmode;
9700 operands[0] = SUBREG_REG (operands[0]);
9701 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
9702 }
9703 }
9704 if (lra_in_progress
9705 && mode == SDmode
9706 && (REG_P (operands[0])
9707 || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))))
9708 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
9709 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9710 {
9711 int regno = reg_or_subregno (operands[0]);
9712 enum reg_class cl;
9713
9714 if (!HARD_REGISTER_NUM_P (regno))
9715 {
9716 cl = reg_preferred_class (regno);
9717 gcc_assert (cl != NO_REGS);
9718 regno = reg_renumber[regno];
9719 if (regno < 0)
9720 regno = ira_class_hard_regs[cl][0];
9721 }
9722 if (FP_REGNO_P (regno))
9723 {
9724 if (GET_MODE (operands[1]) != DDmode)
9725 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
9726 emit_insn (gen_movsd_load (operands[0], operands[1]));
9727 }
9728 else if (INT_REGNO_P (regno))
9729 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9730 else
9731 gcc_unreachable();
9732 return;
9733 }
9734
9735 /* FIXME: In the long term, this switch statement should go away
9736 and be replaced by a sequence of tests based on things like
9737 mode == Pmode. */
9738 switch (mode)
9739 {
9740 case E_HImode:
9741 case E_QImode:
9742 if (CONSTANT_P (operands[1])
9743 && !CONST_INT_P (operands[1]))
9744 operands[1] = force_const_mem (mode, operands[1]);
9745 break;
9746
9747 case E_TFmode:
9748 case E_TDmode:
9749 case E_IFmode:
9750 case E_KFmode:
9751 if (FLOAT128_2REG_P (mode))
9752 rs6000_eliminate_indexed_memrefs (operands);
9753 /* fall through */
9754
9755 case E_DFmode:
9756 case E_DDmode:
9757 case E_SFmode:
9758 case E_SDmode:
9759 if (CONSTANT_P (operands[1])
9760 && ! easy_fp_constant (operands[1], mode))
9761 operands[1] = force_const_mem (mode, operands[1]);
9762 break;
9763
9764 case E_V16QImode:
9765 case E_V8HImode:
9766 case E_V4SFmode:
9767 case E_V4SImode:
9768 case E_V2DFmode:
9769 case E_V2DImode:
9770 case E_V1TImode:
9771 if (CONSTANT_P (operands[1])
9772 && !easy_vector_constant (operands[1], mode))
9773 operands[1] = force_const_mem (mode, operands[1]);
9774 break;
9775
9776 case E_SImode:
9777 case E_DImode:
9778 /* Use default pattern for address of ELF small data */
9779 if (TARGET_ELF
9780 && mode == Pmode
9781 && DEFAULT_ABI == ABI_V4
9782 && (SYMBOL_REF_P (operands[1])
9783 || GET_CODE (operands[1]) == CONST)
9784 && small_data_operand (operands[1], mode))
9785 {
9786 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9787 return;
9788 }
9789
9790 /* Use the default pattern for loading up PC-relative addresses. */
9791 if (TARGET_PCREL && mode == Pmode
9792 && pcrel_local_or_external_address (operands[1], Pmode))
9793 {
9794 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9795 return;
9796 }
9797
9798 if (DEFAULT_ABI == ABI_V4
9799 && mode == Pmode && mode == SImode
9800 && flag_pic == 1 && got_operand (operands[1], mode))
9801 {
9802 emit_insn (gen_movsi_got (operands[0], operands[1]));
9803 return;
9804 }
9805
9806 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
9807 && TARGET_NO_TOC_OR_PCREL
9808 && ! flag_pic
9809 && mode == Pmode
9810 && CONSTANT_P (operands[1])
9811 && GET_CODE (operands[1]) != HIGH
9812 && !CONST_INT_P (operands[1]))
9813 {
9814 rtx target = (!can_create_pseudo_p ()
9815 ? operands[0]
9816 : gen_reg_rtx (mode));
9817
9818 /* If this is a function address on -mcall-aixdesc,
9819 convert it to the address of the descriptor. */
9820 if (DEFAULT_ABI == ABI_AIX
9821 && SYMBOL_REF_P (operands[1])
9822 && XSTR (operands[1], 0)[0] == '.')
9823 {
9824 const char *name = XSTR (operands[1], 0);
9825 rtx new_ref;
9826 while (*name == '.')
9827 name++;
9828 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
9829 CONSTANT_POOL_ADDRESS_P (new_ref)
9830 = CONSTANT_POOL_ADDRESS_P (operands[1]);
9831 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
9832 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
9833 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
9834 operands[1] = new_ref;
9835 }
9836
9837 if (DEFAULT_ABI == ABI_DARWIN)
9838 {
9839 #if TARGET_MACHO
9840 /* This is not PIC code, but could require the subset of
9841 indirections used by mdynamic-no-pic. */
9842 if (MACHO_DYNAMIC_NO_PIC_P)
9843 {
9844 /* Take care of any required data indirection. */
9845 operands[1] = rs6000_machopic_legitimize_pic_address (
9846 operands[1], mode, operands[0]);
9847 if (operands[0] != operands[1])
9848 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9849 return;
9850 }
9851 #endif
9852 emit_insn (gen_macho_high (Pmode, target, operands[1]));
9853 emit_insn (gen_macho_low (Pmode, operands[0],
9854 target, operands[1]));
9855 return;
9856 }
9857
9858 emit_insn (gen_elf_high (target, operands[1]));
9859 emit_insn (gen_elf_low (operands[0], target, operands[1]));
9860 return;
9861 }
9862
9863 /* If this is a SYMBOL_REF that refers to a constant pool entry,
9864 and we have put it in the TOC, we just need to make a TOC-relative
9865 reference to it. */
9866 if (TARGET_TOC
9867 && SYMBOL_REF_P (operands[1])
9868 && use_toc_relative_ref (operands[1], mode))
9869 operands[1] = create_TOC_reference (operands[1], operands[0]);
9870 else if (mode == Pmode
9871 && CONSTANT_P (operands[1])
9872 && GET_CODE (operands[1]) != HIGH
9873 && ((REG_P (operands[0])
9874 && FP_REGNO_P (REGNO (operands[0])))
9875 || !CONST_INT_P (operands[1])
9876 || (num_insns_constant (operands[1], mode)
9877 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
9878 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
9879 && (TARGET_CMODEL == CMODEL_SMALL
9880 || can_create_pseudo_p ()
9881 || (REG_P (operands[0])
9882 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
9883 {
9884
9885 #if TARGET_MACHO
9886 /* Darwin uses a special PIC legitimizer. */
9887 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
9888 {
9889 operands[1] =
9890 rs6000_machopic_legitimize_pic_address (operands[1], mode,
9891 operands[0]);
9892 if (operands[0] != operands[1])
9893 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9894 return;
9895 }
9896 #endif
9897
9898 /* If we are to limit the number of things we put in the TOC and
9899 this is a symbol plus a constant we can add in one insn,
9900 just put the symbol in the TOC and add the constant. */
9901 if (GET_CODE (operands[1]) == CONST
9902 && TARGET_NO_SUM_IN_TOC
9903 && GET_CODE (XEXP (operands[1], 0)) == PLUS
9904 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
9905 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
9906 || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0)))
9907 && ! side_effects_p (operands[0]))
9908 {
9909 rtx sym =
9910 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
9911 rtx other = XEXP (XEXP (operands[1], 0), 1);
9912
9913 sym = force_reg (mode, sym);
9914 emit_insn (gen_add3_insn (operands[0], sym, other));
9915 return;
9916 }
9917
9918 operands[1] = force_const_mem (mode, operands[1]);
9919
9920 if (TARGET_TOC
9921 && SYMBOL_REF_P (XEXP (operands[1], 0))
9922 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
9923 {
9924 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
9925 operands[0]);
9926 operands[1] = gen_const_mem (mode, tocref);
9927 set_mem_alias_set (operands[1], get_TOC_alias_set ());
9928 }
9929 }
9930 break;
9931
9932 case E_TImode:
9933 if (!VECTOR_MEM_VSX_P (TImode))
9934 rs6000_eliminate_indexed_memrefs (operands);
9935 break;
9936
9937 case E_PTImode:
9938 rs6000_eliminate_indexed_memrefs (operands);
9939 break;
9940
9941 default:
9942 fatal_insn ("bad move", gen_rtx_SET (dest, source));
9943 }
9944
9945 /* Above, we may have called force_const_mem which may have returned
9946 an invalid address. If we can, fix this up; otherwise, reload will
9947 have to deal with it. */
9948 if (MEM_P (operands[1]))
9949 operands[1] = validize_mem (operands[1]);
9950
9951 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9952 }
9953 \f
9954
9955 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
9956 static void
9957 init_float128_ibm (machine_mode mode)
9958 {
9959 if (!TARGET_XL_COMPAT)
9960 {
9961 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
9962 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
9963 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
9964 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
9965
9966 if (!TARGET_HARD_FLOAT)
9967 {
9968 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
9969 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
9970 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
9971 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
9972 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
9973 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
9974 set_optab_libfunc (le_optab, mode, "__gcc_qle");
9975 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
9976
9977 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
9978 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
9979 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
9980 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
9981 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
9982 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
9983 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
9984 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
9985 }
9986 }
9987 else
9988 {
9989 set_optab_libfunc (add_optab, mode, "_xlqadd");
9990 set_optab_libfunc (sub_optab, mode, "_xlqsub");
9991 set_optab_libfunc (smul_optab, mode, "_xlqmul");
9992 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
9993 }
9994
9995 /* Add various conversions for IFmode to use the traditional TFmode
9996 names. */
9997 if (mode == IFmode)
9998 {
9999 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
10000 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
10001 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
10002 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
10003 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
10004 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
10005
10006 if (TARGET_POWERPC64)
10007 {
10008 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
10009 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
10010 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
10011 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
10012 }
10013 }
10014 }
10015
10016 /* Create a decl for either complex long double multiply or complex long double
10017 divide when long double is IEEE 128-bit floating point. We can't use
10018 __multc3 and __divtc3 because the original long double using IBM extended
10019 double used those names. The complex multiply/divide functions are encoded
10020 as builtin functions with a complex result and 4 scalar inputs. */
10021
10022 static void
10023 create_complex_muldiv (const char *name, built_in_function fncode, tree fntype)
10024 {
10025 tree fndecl = add_builtin_function (name, fntype, fncode, BUILT_IN_NORMAL,
10026 name, NULL_TREE);
10027
10028 set_builtin_decl (fncode, fndecl, true);
10029
10030 if (TARGET_DEBUG_BUILTIN)
10031 fprintf (stderr, "create complex %s, fncode: %d\n", name, (int) fncode);
10032
10033 return;
10034 }
10035
10036 /* Set up IEEE 128-bit floating point routines. Use different names if the
10037 arguments can be passed in a vector register. The historical PowerPC
10038 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
10039 continue to use that if we aren't using vector registers to pass IEEE
10040 128-bit floating point. */
10041
10042 static void
10043 init_float128_ieee (machine_mode mode)
10044 {
10045 if (FLOAT128_VECTOR_P (mode))
10046 {
10047 static bool complex_muldiv_init_p = false;
10048
10049 /* Set up to call __mulkc3 and __divkc3 under -mabi=ieeelongdouble. If
10050 we have clone or target attributes, this will be called a second
10051 time. We want to create the built-in function only once. */
10052 if (mode == TFmode && TARGET_IEEEQUAD && !complex_muldiv_init_p)
10053 {
10054 complex_muldiv_init_p = true;
10055 built_in_function fncode_mul =
10056 (built_in_function) (BUILT_IN_COMPLEX_MUL_MIN + TCmode
10057 - MIN_MODE_COMPLEX_FLOAT);
10058 built_in_function fncode_div =
10059 (built_in_function) (BUILT_IN_COMPLEX_DIV_MIN + TCmode
10060 - MIN_MODE_COMPLEX_FLOAT);
10061
10062 tree fntype = build_function_type_list (complex_long_double_type_node,
10063 long_double_type_node,
10064 long_double_type_node,
10065 long_double_type_node,
10066 long_double_type_node,
10067 NULL_TREE);
10068
10069 create_complex_muldiv ("__mulkc3", fncode_mul, fntype);
10070 create_complex_muldiv ("__divkc3", fncode_div, fntype);
10071 }
10072
10073 set_optab_libfunc (add_optab, mode, "__addkf3");
10074 set_optab_libfunc (sub_optab, mode, "__subkf3");
10075 set_optab_libfunc (neg_optab, mode, "__negkf2");
10076 set_optab_libfunc (smul_optab, mode, "__mulkf3");
10077 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
10078 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
10079 set_optab_libfunc (abs_optab, mode, "__abskf2");
10080 set_optab_libfunc (powi_optab, mode, "__powikf2");
10081
10082 set_optab_libfunc (eq_optab, mode, "__eqkf2");
10083 set_optab_libfunc (ne_optab, mode, "__nekf2");
10084 set_optab_libfunc (gt_optab, mode, "__gtkf2");
10085 set_optab_libfunc (ge_optab, mode, "__gekf2");
10086 set_optab_libfunc (lt_optab, mode, "__ltkf2");
10087 set_optab_libfunc (le_optab, mode, "__lekf2");
10088 set_optab_libfunc (unord_optab, mode, "__unordkf2");
10089
10090 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
10091 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
10092 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
10093 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
10094
10095 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
10096 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
10097 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
10098
10099 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
10100 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
10101 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
10102
10103 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
10104 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
10105 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
10106 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
10107 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
10108 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
10109
10110 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
10111 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
10112 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
10113 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
10114
10115 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
10116 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
10117 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
10118 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
10119
10120 if (TARGET_POWERPC64)
10121 {
10122 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
10123 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
10124 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
10125 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
10126 }
10127 }
10128
10129 else
10130 {
10131 set_optab_libfunc (add_optab, mode, "_q_add");
10132 set_optab_libfunc (sub_optab, mode, "_q_sub");
10133 set_optab_libfunc (neg_optab, mode, "_q_neg");
10134 set_optab_libfunc (smul_optab, mode, "_q_mul");
10135 set_optab_libfunc (sdiv_optab, mode, "_q_div");
10136 if (TARGET_PPC_GPOPT)
10137 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
10138
10139 set_optab_libfunc (eq_optab, mode, "_q_feq");
10140 set_optab_libfunc (ne_optab, mode, "_q_fne");
10141 set_optab_libfunc (gt_optab, mode, "_q_fgt");
10142 set_optab_libfunc (ge_optab, mode, "_q_fge");
10143 set_optab_libfunc (lt_optab, mode, "_q_flt");
10144 set_optab_libfunc (le_optab, mode, "_q_fle");
10145
10146 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
10147 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
10148 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
10149 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
10150 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
10151 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
10152 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
10153 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
10154 }
10155 }
10156
10157 static void
10158 rs6000_init_libfuncs (void)
10159 {
10160 /* __float128 support. */
10161 if (TARGET_FLOAT128_TYPE)
10162 {
10163 init_float128_ibm (IFmode);
10164 init_float128_ieee (KFmode);
10165 }
10166
10167 /* AIX/Darwin/64-bit Linux quad floating point routines. */
10168 if (TARGET_LONG_DOUBLE_128)
10169 {
10170 if (!TARGET_IEEEQUAD)
10171 init_float128_ibm (TFmode);
10172
10173 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
10174 else
10175 init_float128_ieee (TFmode);
10176 }
10177 }
10178
10179 /* Emit a potentially record-form instruction, setting DST from SRC.
10180 If DOT is 0, that is all; otherwise, set CCREG to the result of the
10181 signed comparison of DST with zero. If DOT is 1, the generated RTL
10182 doesn't care about the DST result; if DOT is 2, it does. If CCREG
10183 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
10184 a separate COMPARE. */
10185
10186 void
10187 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
10188 {
10189 if (dot == 0)
10190 {
10191 emit_move_insn (dst, src);
10192 return;
10193 }
10194
10195 if (cc_reg_not_cr0_operand (ccreg, CCmode))
10196 {
10197 emit_move_insn (dst, src);
10198 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
10199 return;
10200 }
10201
10202 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
10203 if (dot == 1)
10204 {
10205 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
10206 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
10207 }
10208 else
10209 {
10210 rtx set = gen_rtx_SET (dst, src);
10211 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
10212 }
10213 }
10214
10215 \f
10216 /* A validation routine: say whether CODE, a condition code, and MODE
10217 match. The other alternatives either don't make sense or should
10218 never be generated. */
10219
10220 void
10221 validate_condition_mode (enum rtx_code code, machine_mode mode)
10222 {
10223 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
10224 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
10225 && GET_MODE_CLASS (mode) == MODE_CC);
10226
10227 /* These don't make sense. */
10228 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
10229 || mode != CCUNSmode);
10230
10231 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
10232 || mode == CCUNSmode);
10233
10234 gcc_assert (mode == CCFPmode
10235 || (code != ORDERED && code != UNORDERED
10236 && code != UNEQ && code != LTGT
10237 && code != UNGT && code != UNLT
10238 && code != UNGE && code != UNLE));
10239
10240 /* These are invalid; the information is not there. */
10241 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
10242 }
10243
10244 \f
10245 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
10246 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
10247 not zero, store there the bit offset (counted from the right) where
10248 the single stretch of 1 bits begins; and similarly for B, the bit
10249 offset where it ends. */
10250
10251 bool
10252 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
10253 {
10254 unsigned HOST_WIDE_INT val = INTVAL (mask);
10255 unsigned HOST_WIDE_INT bit;
10256 int nb, ne;
10257 int n = GET_MODE_PRECISION (mode);
10258
10259 if (mode != DImode && mode != SImode)
10260 return false;
10261
10262 if (INTVAL (mask) >= 0)
10263 {
10264 bit = val & -val;
10265 ne = exact_log2 (bit);
10266 nb = exact_log2 (val + bit);
10267 }
10268 else if (val + 1 == 0)
10269 {
10270 nb = n;
10271 ne = 0;
10272 }
10273 else if (val & 1)
10274 {
10275 val = ~val;
10276 bit = val & -val;
10277 nb = exact_log2 (bit);
10278 ne = exact_log2 (val + bit);
10279 }
10280 else
10281 {
10282 bit = val & -val;
10283 ne = exact_log2 (bit);
10284 if (val + bit == 0)
10285 nb = n;
10286 else
10287 nb = 0;
10288 }
10289
10290 nb--;
10291
10292 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
10293 return false;
10294
10295 if (b)
10296 *b = nb;
10297 if (e)
10298 *e = ne;
10299
10300 return true;
10301 }
10302
10303 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
10304 or rldicr instruction, to implement an AND with it in mode MODE. */
10305
10306 bool
10307 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
10308 {
10309 int nb, ne;
10310
10311 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10312 return false;
10313
10314 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
10315 does not wrap. */
10316 if (mode == DImode)
10317 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
10318
10319 /* For SImode, rlwinm can do everything. */
10320 if (mode == SImode)
10321 return (nb < 32 && ne < 32);
10322
10323 return false;
10324 }
10325
10326 /* Return the instruction template for an AND with mask in mode MODE, with
10327 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10328
10329 const char *
10330 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
10331 {
10332 int nb, ne;
10333
10334 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
10335 gcc_unreachable ();
10336
10337 if (mode == DImode && ne == 0)
10338 {
10339 operands[3] = GEN_INT (63 - nb);
10340 if (dot)
10341 return "rldicl. %0,%1,0,%3";
10342 return "rldicl %0,%1,0,%3";
10343 }
10344
10345 if (mode == DImode && nb == 63)
10346 {
10347 operands[3] = GEN_INT (63 - ne);
10348 if (dot)
10349 return "rldicr. %0,%1,0,%3";
10350 return "rldicr %0,%1,0,%3";
10351 }
10352
10353 if (nb < 32 && ne < 32)
10354 {
10355 operands[3] = GEN_INT (31 - nb);
10356 operands[4] = GEN_INT (31 - ne);
10357 if (dot)
10358 return "rlwinm. %0,%1,0,%3,%4";
10359 return "rlwinm %0,%1,0,%3,%4";
10360 }
10361
10362 gcc_unreachable ();
10363 }
10364
10365 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
10366 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
10367 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
10368
10369 bool
10370 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
10371 {
10372 int nb, ne;
10373
10374 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10375 return false;
10376
10377 int n = GET_MODE_PRECISION (mode);
10378 int sh = -1;
10379
10380 if (CONST_INT_P (XEXP (shift, 1)))
10381 {
10382 sh = INTVAL (XEXP (shift, 1));
10383 if (sh < 0 || sh >= n)
10384 return false;
10385 }
10386
10387 rtx_code code = GET_CODE (shift);
10388
10389 /* Convert any shift by 0 to a rotate, to simplify below code. */
10390 if (sh == 0)
10391 code = ROTATE;
10392
10393 /* Convert rotate to simple shift if we can, to make analysis simpler. */
10394 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
10395 code = ASHIFT;
10396 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
10397 {
10398 code = LSHIFTRT;
10399 sh = n - sh;
10400 }
10401
10402 /* DImode rotates need rld*. */
10403 if (mode == DImode && code == ROTATE)
10404 return (nb == 63 || ne == 0 || ne == sh);
10405
10406 /* SImode rotates need rlw*. */
10407 if (mode == SImode && code == ROTATE)
10408 return (nb < 32 && ne < 32 && sh < 32);
10409
10410 /* Wrap-around masks are only okay for rotates. */
10411 if (ne > nb)
10412 return false;
10413
10414 /* Variable shifts are only okay for rotates. */
10415 if (sh < 0)
10416 return false;
10417
10418 /* Don't allow ASHIFT if the mask is wrong for that. */
10419 if (code == ASHIFT && ne < sh)
10420 return false;
10421
10422 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
10423 if the mask is wrong for that. */
10424 if (nb < 32 && ne < 32 && sh < 32
10425 && !(code == LSHIFTRT && nb >= 32 - sh))
10426 return true;
10427
10428 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
10429 if the mask is wrong for that. */
10430 if (code == LSHIFTRT)
10431 sh = 64 - sh;
10432 if (nb == 63 || ne == 0 || ne == sh)
10433 return !(code == LSHIFTRT && nb >= sh);
10434
10435 return false;
10436 }
10437
10438 /* Return the instruction template for a shift with mask in mode MODE, with
10439 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10440
10441 const char *
10442 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
10443 {
10444 int nb, ne;
10445
10446 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
10447 gcc_unreachable ();
10448
10449 if (mode == DImode && ne == 0)
10450 {
10451 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10452 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
10453 operands[3] = GEN_INT (63 - nb);
10454 if (dot)
10455 return "rld%I2cl. %0,%1,%2,%3";
10456 return "rld%I2cl %0,%1,%2,%3";
10457 }
10458
10459 if (mode == DImode && nb == 63)
10460 {
10461 operands[3] = GEN_INT (63 - ne);
10462 if (dot)
10463 return "rld%I2cr. %0,%1,%2,%3";
10464 return "rld%I2cr %0,%1,%2,%3";
10465 }
10466
10467 if (mode == DImode
10468 && GET_CODE (operands[4]) != LSHIFTRT
10469 && CONST_INT_P (operands[2])
10470 && ne == INTVAL (operands[2]))
10471 {
10472 operands[3] = GEN_INT (63 - nb);
10473 if (dot)
10474 return "rld%I2c. %0,%1,%2,%3";
10475 return "rld%I2c %0,%1,%2,%3";
10476 }
10477
10478 if (nb < 32 && ne < 32)
10479 {
10480 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10481 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
10482 operands[3] = GEN_INT (31 - nb);
10483 operands[4] = GEN_INT (31 - ne);
10484 /* This insn can also be a 64-bit rotate with mask that really makes
10485 it just a shift right (with mask); the %h below are to adjust for
10486 that situation (shift count is >= 32 in that case). */
10487 if (dot)
10488 return "rlw%I2nm. %0,%1,%h2,%3,%4";
10489 return "rlw%I2nm %0,%1,%h2,%3,%4";
10490 }
10491
10492 gcc_unreachable ();
10493 }
10494
10495 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
10496 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
10497 ASHIFT, or LSHIFTRT) in mode MODE. */
10498
10499 bool
10500 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
10501 {
10502 int nb, ne;
10503
10504 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10505 return false;
10506
10507 int n = GET_MODE_PRECISION (mode);
10508
10509 int sh = INTVAL (XEXP (shift, 1));
10510 if (sh < 0 || sh >= n)
10511 return false;
10512
10513 rtx_code code = GET_CODE (shift);
10514
10515 /* Convert any shift by 0 to a rotate, to simplify below code. */
10516 if (sh == 0)
10517 code = ROTATE;
10518
10519 /* Convert rotate to simple shift if we can, to make analysis simpler. */
10520 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
10521 code = ASHIFT;
10522 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
10523 {
10524 code = LSHIFTRT;
10525 sh = n - sh;
10526 }
10527
10528 /* DImode rotates need rldimi. */
10529 if (mode == DImode && code == ROTATE)
10530 return (ne == sh);
10531
10532 /* SImode rotates need rlwimi. */
10533 if (mode == SImode && code == ROTATE)
10534 return (nb < 32 && ne < 32 && sh < 32);
10535
10536 /* Wrap-around masks are only okay for rotates. */
10537 if (ne > nb)
10538 return false;
10539
10540 /* Don't allow ASHIFT if the mask is wrong for that. */
10541 if (code == ASHIFT && ne < sh)
10542 return false;
10543
10544 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
10545 if the mask is wrong for that. */
10546 if (nb < 32 && ne < 32 && sh < 32
10547 && !(code == LSHIFTRT && nb >= 32 - sh))
10548 return true;
10549
10550 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
10551 if the mask is wrong for that. */
10552 if (code == LSHIFTRT)
10553 sh = 64 - sh;
10554 if (ne == sh)
10555 return !(code == LSHIFTRT && nb >= sh);
10556
10557 return false;
10558 }
10559
10560 /* Return the instruction template for an insert with mask in mode MODE, with
10561 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10562
10563 const char *
10564 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
10565 {
10566 int nb, ne;
10567
10568 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
10569 gcc_unreachable ();
10570
10571 /* Prefer rldimi because rlwimi is cracked. */
10572 if (TARGET_POWERPC64
10573 && (!dot || mode == DImode)
10574 && GET_CODE (operands[4]) != LSHIFTRT
10575 && ne == INTVAL (operands[2]))
10576 {
10577 operands[3] = GEN_INT (63 - nb);
10578 if (dot)
10579 return "rldimi. %0,%1,%2,%3";
10580 return "rldimi %0,%1,%2,%3";
10581 }
10582
10583 if (nb < 32 && ne < 32)
10584 {
10585 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10586 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
10587 operands[3] = GEN_INT (31 - nb);
10588 operands[4] = GEN_INT (31 - ne);
10589 if (dot)
10590 return "rlwimi. %0,%1,%2,%3,%4";
10591 return "rlwimi %0,%1,%2,%3,%4";
10592 }
10593
10594 gcc_unreachable ();
10595 }
10596
10597 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
10598 using two machine instructions. */
10599
10600 bool
10601 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
10602 {
10603 /* There are two kinds of AND we can handle with two insns:
10604 1) those we can do with two rl* insn;
10605 2) ori[s];xori[s].
10606
10607 We do not handle that last case yet. */
10608
10609 /* If there is just one stretch of ones, we can do it. */
10610 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
10611 return true;
10612
10613 /* Otherwise, fill in the lowest "hole"; if we can do the result with
10614 one insn, we can do the whole thing with two. */
10615 unsigned HOST_WIDE_INT val = INTVAL (c);
10616 unsigned HOST_WIDE_INT bit1 = val & -val;
10617 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
10618 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
10619 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
10620 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
10621 }
10622
10623 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
10624 If EXPAND is true, split rotate-and-mask instructions we generate to
10625 their constituent parts as well (this is used during expand); if DOT
10626 is 1, make the last insn a record-form instruction clobbering the
10627 destination GPR and setting the CC reg (from operands[3]); if 2, set
10628 that GPR as well as the CC reg. */
10629
10630 void
10631 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
10632 {
10633 gcc_assert (!(expand && dot));
10634
10635 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
10636
10637 /* If it is one stretch of ones, it is DImode; shift left, mask, then
10638 shift right. This generates better code than doing the masks without
10639 shifts, or shifting first right and then left. */
10640 int nb, ne;
10641 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
10642 {
10643 gcc_assert (mode == DImode);
10644
10645 int shift = 63 - nb;
10646 if (expand)
10647 {
10648 rtx tmp1 = gen_reg_rtx (DImode);
10649 rtx tmp2 = gen_reg_rtx (DImode);
10650 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
10651 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
10652 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
10653 }
10654 else
10655 {
10656 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
10657 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
10658 emit_move_insn (operands[0], tmp);
10659 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
10660 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10661 }
10662 return;
10663 }
10664
10665 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
10666 that does the rest. */
10667 unsigned HOST_WIDE_INT bit1 = val & -val;
10668 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
10669 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
10670 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
10671
10672 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
10673 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
10674
10675 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
10676
10677 /* Two "no-rotate"-and-mask instructions, for SImode. */
10678 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
10679 {
10680 gcc_assert (mode == SImode);
10681
10682 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
10683 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
10684 emit_move_insn (reg, tmp);
10685 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
10686 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10687 return;
10688 }
10689
10690 gcc_assert (mode == DImode);
10691
10692 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
10693 insns; we have to do the first in SImode, because it wraps. */
10694 if (mask2 <= 0xffffffff
10695 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
10696 {
10697 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
10698 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
10699 GEN_INT (mask1));
10700 rtx reg_low = gen_lowpart (SImode, reg);
10701 emit_move_insn (reg_low, tmp);
10702 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
10703 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10704 return;
10705 }
10706
10707 /* Two rld* insns: rotate, clear the hole in the middle (which now is
10708 at the top end), rotate back and clear the other hole. */
10709 int right = exact_log2 (bit3);
10710 int left = 64 - right;
10711
10712 /* Rotate the mask too. */
10713 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
10714
10715 if (expand)
10716 {
10717 rtx tmp1 = gen_reg_rtx (DImode);
10718 rtx tmp2 = gen_reg_rtx (DImode);
10719 rtx tmp3 = gen_reg_rtx (DImode);
10720 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
10721 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
10722 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
10723 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
10724 }
10725 else
10726 {
10727 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
10728 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
10729 emit_move_insn (operands[0], tmp);
10730 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
10731 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
10732 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10733 }
10734 }
10735 \f
10736 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
10737 for lfq and stfq insns iff the registers are hard registers. */
10738
10739 int
10740 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
10741 {
10742 /* We might have been passed a SUBREG. */
10743 if (!REG_P (reg1) || !REG_P (reg2))
10744 return 0;
10745
10746 /* We might have been passed non floating point registers. */
10747 if (!FP_REGNO_P (REGNO (reg1))
10748 || !FP_REGNO_P (REGNO (reg2)))
10749 return 0;
10750
10751 return (REGNO (reg1) == REGNO (reg2) - 1);
10752 }
10753
10754 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
10755 addr1 and addr2 must be in consecutive memory locations
10756 (addr2 == addr1 + 8). */
10757
10758 int
10759 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
10760 {
10761 rtx addr1, addr2;
10762 unsigned int reg1, reg2;
10763 int offset1, offset2;
10764
10765 /* The mems cannot be volatile. */
10766 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
10767 return 0;
10768
10769 addr1 = XEXP (mem1, 0);
10770 addr2 = XEXP (mem2, 0);
10771
10772 /* Extract an offset (if used) from the first addr. */
10773 if (GET_CODE (addr1) == PLUS)
10774 {
10775 /* If not a REG, return zero. */
10776 if (!REG_P (XEXP (addr1, 0)))
10777 return 0;
10778 else
10779 {
10780 reg1 = REGNO (XEXP (addr1, 0));
10781 /* The offset must be constant! */
10782 if (!CONST_INT_P (XEXP (addr1, 1)))
10783 return 0;
10784 offset1 = INTVAL (XEXP (addr1, 1));
10785 }
10786 }
10787 else if (!REG_P (addr1))
10788 return 0;
10789 else
10790 {
10791 reg1 = REGNO (addr1);
10792 /* This was a simple (mem (reg)) expression. Offset is 0. */
10793 offset1 = 0;
10794 }
10795
10796 /* And now for the second addr. */
10797 if (GET_CODE (addr2) == PLUS)
10798 {
10799 /* If not a REG, return zero. */
10800 if (!REG_P (XEXP (addr2, 0)))
10801 return 0;
10802 else
10803 {
10804 reg2 = REGNO (XEXP (addr2, 0));
10805 /* The offset must be constant. */
10806 if (!CONST_INT_P (XEXP (addr2, 1)))
10807 return 0;
10808 offset2 = INTVAL (XEXP (addr2, 1));
10809 }
10810 }
10811 else if (!REG_P (addr2))
10812 return 0;
10813 else
10814 {
10815 reg2 = REGNO (addr2);
10816 /* This was a simple (mem (reg)) expression. Offset is 0. */
10817 offset2 = 0;
10818 }
10819
10820 /* Both of these must have the same base register. */
10821 if (reg1 != reg2)
10822 return 0;
10823
10824 /* The offset for the second addr must be 8 more than the first addr. */
10825 if (offset2 != offset1 + 8)
10826 return 0;
10827
10828 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
10829 instructions. */
10830 return 1;
10831 }
10832 \f
10833 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
10834 need to use DDmode, in all other cases we can use the same mode. */
10835 static machine_mode
10836 rs6000_secondary_memory_needed_mode (machine_mode mode)
10837 {
10838 if (lra_in_progress && mode == SDmode)
10839 return DDmode;
10840 return mode;
10841 }
10842
10843 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
10844 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
10845 only work on the traditional altivec registers, note if an altivec register
10846 was chosen. */
10847
10848 static enum rs6000_reg_type
10849 register_to_reg_type (rtx reg, bool *is_altivec)
10850 {
10851 HOST_WIDE_INT regno;
10852 enum reg_class rclass;
10853
10854 if (SUBREG_P (reg))
10855 reg = SUBREG_REG (reg);
10856
10857 if (!REG_P (reg))
10858 return NO_REG_TYPE;
10859
10860 regno = REGNO (reg);
10861 if (!HARD_REGISTER_NUM_P (regno))
10862 {
10863 if (!lra_in_progress && !reload_completed)
10864 return PSEUDO_REG_TYPE;
10865
10866 regno = true_regnum (reg);
10867 if (regno < 0 || !HARD_REGISTER_NUM_P (regno))
10868 return PSEUDO_REG_TYPE;
10869 }
10870
10871 gcc_assert (regno >= 0);
10872
10873 if (is_altivec && ALTIVEC_REGNO_P (regno))
10874 *is_altivec = true;
10875
10876 rclass = rs6000_regno_regclass[regno];
10877 return reg_class_to_reg_type[(int)rclass];
10878 }
10879
10880 /* Helper function to return the cost of adding a TOC entry address. */
10881
10882 static inline int
10883 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
10884 {
10885 int ret;
10886
10887 if (TARGET_CMODEL != CMODEL_SMALL)
10888 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
10889
10890 else
10891 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
10892
10893 return ret;
10894 }
10895
10896 /* Helper function for rs6000_secondary_reload to determine whether the memory
10897 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
10898 needs reloading. Return negative if the memory is not handled by the memory
10899 helper functions and to try a different reload method, 0 if no additional
10900 instructions are need, and positive to give the extra cost for the
10901 memory. */
10902
10903 static int
10904 rs6000_secondary_reload_memory (rtx addr,
10905 enum reg_class rclass,
10906 machine_mode mode)
10907 {
10908 int extra_cost = 0;
10909 rtx reg, and_arg, plus_arg0, plus_arg1;
10910 addr_mask_type addr_mask;
10911 const char *type = NULL;
10912 const char *fail_msg = NULL;
10913
10914 if (GPR_REG_CLASS_P (rclass))
10915 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
10916
10917 else if (rclass == FLOAT_REGS)
10918 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
10919
10920 else if (rclass == ALTIVEC_REGS)
10921 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
10922
10923 /* For the combined VSX_REGS, turn off Altivec AND -16. */
10924 else if (rclass == VSX_REGS)
10925 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
10926 & ~RELOAD_REG_AND_M16);
10927
10928 /* If the register allocator hasn't made up its mind yet on the register
10929 class to use, settle on defaults to use. */
10930 else if (rclass == NO_REGS)
10931 {
10932 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
10933 & ~RELOAD_REG_AND_M16);
10934
10935 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
10936 addr_mask &= ~(RELOAD_REG_INDEXED
10937 | RELOAD_REG_PRE_INCDEC
10938 | RELOAD_REG_PRE_MODIFY);
10939 }
10940
10941 else
10942 addr_mask = 0;
10943
10944 /* If the register isn't valid in this register class, just return now. */
10945 if ((addr_mask & RELOAD_REG_VALID) == 0)
10946 {
10947 if (TARGET_DEBUG_ADDR)
10948 {
10949 fprintf (stderr,
10950 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
10951 "not valid in class\n",
10952 GET_MODE_NAME (mode), reg_class_names[rclass]);
10953 debug_rtx (addr);
10954 }
10955
10956 return -1;
10957 }
10958
10959 switch (GET_CODE (addr))
10960 {
10961 /* Does the register class supports auto update forms for this mode? We
10962 don't need a scratch register, since the powerpc only supports
10963 PRE_INC, PRE_DEC, and PRE_MODIFY. */
10964 case PRE_INC:
10965 case PRE_DEC:
10966 reg = XEXP (addr, 0);
10967 if (!base_reg_operand (addr, GET_MODE (reg)))
10968 {
10969 fail_msg = "no base register #1";
10970 extra_cost = -1;
10971 }
10972
10973 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
10974 {
10975 extra_cost = 1;
10976 type = "update";
10977 }
10978 break;
10979
10980 case PRE_MODIFY:
10981 reg = XEXP (addr, 0);
10982 plus_arg1 = XEXP (addr, 1);
10983 if (!base_reg_operand (reg, GET_MODE (reg))
10984 || GET_CODE (plus_arg1) != PLUS
10985 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
10986 {
10987 fail_msg = "bad PRE_MODIFY";
10988 extra_cost = -1;
10989 }
10990
10991 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
10992 {
10993 extra_cost = 1;
10994 type = "update";
10995 }
10996 break;
10997
10998 /* Do we need to simulate AND -16 to clear the bottom address bits used
10999 in VMX load/stores? Only allow the AND for vector sizes. */
11000 case AND:
11001 and_arg = XEXP (addr, 0);
11002 if (GET_MODE_SIZE (mode) != 16
11003 || !CONST_INT_P (XEXP (addr, 1))
11004 || INTVAL (XEXP (addr, 1)) != -16)
11005 {
11006 fail_msg = "bad Altivec AND #1";
11007 extra_cost = -1;
11008 }
11009
11010 if (rclass != ALTIVEC_REGS)
11011 {
11012 if (legitimate_indirect_address_p (and_arg, false))
11013 extra_cost = 1;
11014
11015 else if (legitimate_indexed_address_p (and_arg, false))
11016 extra_cost = 2;
11017
11018 else
11019 {
11020 fail_msg = "bad Altivec AND #2";
11021 extra_cost = -1;
11022 }
11023
11024 type = "and";
11025 }
11026 break;
11027
11028 /* If this is an indirect address, make sure it is a base register. */
11029 case REG:
11030 case SUBREG:
11031 if (!legitimate_indirect_address_p (addr, false))
11032 {
11033 extra_cost = 1;
11034 type = "move";
11035 }
11036 break;
11037
11038 /* If this is an indexed address, make sure the register class can handle
11039 indexed addresses for this mode. */
11040 case PLUS:
11041 plus_arg0 = XEXP (addr, 0);
11042 plus_arg1 = XEXP (addr, 1);
11043
11044 /* (plus (plus (reg) (constant)) (constant)) is generated during
11045 push_reload processing, so handle it now. */
11046 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
11047 {
11048 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11049 {
11050 extra_cost = 1;
11051 type = "offset";
11052 }
11053 }
11054
11055 /* (plus (plus (reg) (constant)) (reg)) is also generated during
11056 push_reload processing, so handle it now. */
11057 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
11058 {
11059 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11060 {
11061 extra_cost = 1;
11062 type = "indexed #2";
11063 }
11064 }
11065
11066 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
11067 {
11068 fail_msg = "no base register #2";
11069 extra_cost = -1;
11070 }
11071
11072 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
11073 {
11074 if ((addr_mask & RELOAD_REG_INDEXED) == 0
11075 || !legitimate_indexed_address_p (addr, false))
11076 {
11077 extra_cost = 1;
11078 type = "indexed";
11079 }
11080 }
11081
11082 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
11083 && CONST_INT_P (plus_arg1))
11084 {
11085 if (!quad_address_offset_p (INTVAL (plus_arg1)))
11086 {
11087 extra_cost = 1;
11088 type = "vector d-form offset";
11089 }
11090 }
11091
11092 /* Make sure the register class can handle offset addresses. */
11093 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
11094 {
11095 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11096 {
11097 extra_cost = 1;
11098 type = "offset #2";
11099 }
11100 }
11101
11102 else
11103 {
11104 fail_msg = "bad PLUS";
11105 extra_cost = -1;
11106 }
11107
11108 break;
11109
11110 case LO_SUM:
11111 /* Quad offsets are restricted and can't handle normal addresses. */
11112 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11113 {
11114 extra_cost = -1;
11115 type = "vector d-form lo_sum";
11116 }
11117
11118 else if (!legitimate_lo_sum_address_p (mode, addr, false))
11119 {
11120 fail_msg = "bad LO_SUM";
11121 extra_cost = -1;
11122 }
11123
11124 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11125 {
11126 extra_cost = 1;
11127 type = "lo_sum";
11128 }
11129 break;
11130
11131 /* Static addresses need to create a TOC entry. */
11132 case CONST:
11133 case SYMBOL_REF:
11134 case LABEL_REF:
11135 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11136 {
11137 extra_cost = -1;
11138 type = "vector d-form lo_sum #2";
11139 }
11140
11141 else
11142 {
11143 type = "address";
11144 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
11145 }
11146 break;
11147
11148 /* TOC references look like offsetable memory. */
11149 case UNSPEC:
11150 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
11151 {
11152 fail_msg = "bad UNSPEC";
11153 extra_cost = -1;
11154 }
11155
11156 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11157 {
11158 extra_cost = -1;
11159 type = "vector d-form lo_sum #3";
11160 }
11161
11162 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11163 {
11164 extra_cost = 1;
11165 type = "toc reference";
11166 }
11167 break;
11168
11169 default:
11170 {
11171 fail_msg = "bad address";
11172 extra_cost = -1;
11173 }
11174 }
11175
11176 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
11177 {
11178 if (extra_cost < 0)
11179 fprintf (stderr,
11180 "rs6000_secondary_reload_memory error: mode = %s, "
11181 "class = %s, addr_mask = '%s', %s\n",
11182 GET_MODE_NAME (mode),
11183 reg_class_names[rclass],
11184 rs6000_debug_addr_mask (addr_mask, false),
11185 (fail_msg != NULL) ? fail_msg : "<bad address>");
11186
11187 else
11188 fprintf (stderr,
11189 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11190 "addr_mask = '%s', extra cost = %d, %s\n",
11191 GET_MODE_NAME (mode),
11192 reg_class_names[rclass],
11193 rs6000_debug_addr_mask (addr_mask, false),
11194 extra_cost,
11195 (type) ? type : "<none>");
11196
11197 debug_rtx (addr);
11198 }
11199
11200 return extra_cost;
11201 }
11202
11203 /* Helper function for rs6000_secondary_reload to return true if a move to a
11204 different register classe is really a simple move. */
11205
11206 static bool
11207 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
11208 enum rs6000_reg_type from_type,
11209 machine_mode mode)
11210 {
11211 int size = GET_MODE_SIZE (mode);
11212
11213 /* Add support for various direct moves available. In this function, we only
11214 look at cases where we don't need any extra registers, and one or more
11215 simple move insns are issued. Originally small integers are not allowed
11216 in FPR/VSX registers. Single precision binary floating is not a simple
11217 move because we need to convert to the single precision memory layout.
11218 The 4-byte SDmode can be moved. TDmode values are disallowed since they
11219 need special direct move handling, which we do not support yet. */
11220 if (TARGET_DIRECT_MOVE
11221 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11222 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
11223 {
11224 if (TARGET_POWERPC64)
11225 {
11226 /* ISA 2.07: MTVSRD or MVFVSRD. */
11227 if (size == 8)
11228 return true;
11229
11230 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
11231 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
11232 return true;
11233 }
11234
11235 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11236 if (TARGET_P8_VECTOR)
11237 {
11238 if (mode == SImode)
11239 return true;
11240
11241 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
11242 return true;
11243 }
11244
11245 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11246 if (mode == SDmode)
11247 return true;
11248 }
11249
11250 /* Move to/from SPR. */
11251 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
11252 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
11253 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
11254 return true;
11255
11256 return false;
11257 }
11258
11259 /* Direct move helper function for rs6000_secondary_reload, handle all of the
11260 special direct moves that involve allocating an extra register, return the
11261 insn code of the helper function if there is such a function or
11262 CODE_FOR_nothing if not. */
11263
11264 static bool
11265 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
11266 enum rs6000_reg_type from_type,
11267 machine_mode mode,
11268 secondary_reload_info *sri,
11269 bool altivec_p)
11270 {
11271 bool ret = false;
11272 enum insn_code icode = CODE_FOR_nothing;
11273 int cost = 0;
11274 int size = GET_MODE_SIZE (mode);
11275
11276 if (TARGET_POWERPC64 && size == 16)
11277 {
11278 /* Handle moving 128-bit values from GPRs to VSX point registers on
11279 ISA 2.07 (power8, power9) when running in 64-bit mode using
11280 XXPERMDI to glue the two 64-bit values back together. */
11281 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
11282 {
11283 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
11284 icode = reg_addr[mode].reload_vsx_gpr;
11285 }
11286
11287 /* Handle moving 128-bit values from VSX point registers to GPRs on
11288 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
11289 bottom 64-bit value. */
11290 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11291 {
11292 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
11293 icode = reg_addr[mode].reload_gpr_vsx;
11294 }
11295 }
11296
11297 else if (TARGET_POWERPC64 && mode == SFmode)
11298 {
11299 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11300 {
11301 cost = 3; /* xscvdpspn, mfvsrd, and. */
11302 icode = reg_addr[mode].reload_gpr_vsx;
11303 }
11304
11305 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
11306 {
11307 cost = 2; /* mtvsrz, xscvspdpn. */
11308 icode = reg_addr[mode].reload_vsx_gpr;
11309 }
11310 }
11311
11312 else if (!TARGET_POWERPC64 && size == 8)
11313 {
11314 /* Handle moving 64-bit values from GPRs to floating point registers on
11315 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
11316 32-bit values back together. Altivec register classes must be handled
11317 specially since a different instruction is used, and the secondary
11318 reload support requires a single instruction class in the scratch
11319 register constraint. However, right now TFmode is not allowed in
11320 Altivec registers, so the pattern will never match. */
11321 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
11322 {
11323 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
11324 icode = reg_addr[mode].reload_fpr_gpr;
11325 }
11326 }
11327
11328 if (icode != CODE_FOR_nothing)
11329 {
11330 ret = true;
11331 if (sri)
11332 {
11333 sri->icode = icode;
11334 sri->extra_cost = cost;
11335 }
11336 }
11337
11338 return ret;
11339 }
11340
11341 /* Return whether a move between two register classes can be done either
11342 directly (simple move) or via a pattern that uses a single extra temporary
11343 (using ISA 2.07's direct move in this case. */
11344
11345 static bool
11346 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
11347 enum rs6000_reg_type from_type,
11348 machine_mode mode,
11349 secondary_reload_info *sri,
11350 bool altivec_p)
11351 {
11352 /* Fall back to load/store reloads if either type is not a register. */
11353 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
11354 return false;
11355
11356 /* If we haven't allocated registers yet, assume the move can be done for the
11357 standard register types. */
11358 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
11359 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
11360 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
11361 return true;
11362
11363 /* Moves to the same set of registers is a simple move for non-specialized
11364 registers. */
11365 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
11366 return true;
11367
11368 /* Check whether a simple move can be done directly. */
11369 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
11370 {
11371 if (sri)
11372 {
11373 sri->icode = CODE_FOR_nothing;
11374 sri->extra_cost = 0;
11375 }
11376 return true;
11377 }
11378
11379 /* Now check if we can do it in a few steps. */
11380 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
11381 altivec_p);
11382 }
11383
11384 /* Inform reload about cases where moving X with a mode MODE to a register in
11385 RCLASS requires an extra scratch or immediate register. Return the class
11386 needed for the immediate register.
11387
11388 For VSX and Altivec, we may need a register to convert sp+offset into
11389 reg+sp.
11390
11391 For misaligned 64-bit gpr loads and stores we need a register to
11392 convert an offset address to indirect. */
11393
11394 static reg_class_t
11395 rs6000_secondary_reload (bool in_p,
11396 rtx x,
11397 reg_class_t rclass_i,
11398 machine_mode mode,
11399 secondary_reload_info *sri)
11400 {
11401 enum reg_class rclass = (enum reg_class) rclass_i;
11402 reg_class_t ret = ALL_REGS;
11403 enum insn_code icode;
11404 bool default_p = false;
11405 bool done_p = false;
11406
11407 /* Allow subreg of memory before/during reload. */
11408 bool memory_p = (MEM_P (x)
11409 || (!reload_completed && SUBREG_P (x)
11410 && MEM_P (SUBREG_REG (x))));
11411
11412 sri->icode = CODE_FOR_nothing;
11413 sri->t_icode = CODE_FOR_nothing;
11414 sri->extra_cost = 0;
11415 icode = ((in_p)
11416 ? reg_addr[mode].reload_load
11417 : reg_addr[mode].reload_store);
11418
11419 if (REG_P (x) || register_operand (x, mode))
11420 {
11421 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
11422 bool altivec_p = (rclass == ALTIVEC_REGS);
11423 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
11424
11425 if (!in_p)
11426 std::swap (to_type, from_type);
11427
11428 /* Can we do a direct move of some sort? */
11429 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
11430 altivec_p))
11431 {
11432 icode = (enum insn_code)sri->icode;
11433 default_p = false;
11434 done_p = true;
11435 ret = NO_REGS;
11436 }
11437 }
11438
11439 /* Make sure 0.0 is not reloaded or forced into memory. */
11440 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
11441 {
11442 ret = NO_REGS;
11443 default_p = false;
11444 done_p = true;
11445 }
11446
11447 /* If this is a scalar floating point value and we want to load it into the
11448 traditional Altivec registers, do it via a move via a traditional floating
11449 point register, unless we have D-form addressing. Also make sure that
11450 non-zero constants use a FPR. */
11451 if (!done_p && reg_addr[mode].scalar_in_vmx_p
11452 && !mode_supports_vmx_dform (mode)
11453 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
11454 && (memory_p || CONST_DOUBLE_P (x)))
11455 {
11456 ret = FLOAT_REGS;
11457 default_p = false;
11458 done_p = true;
11459 }
11460
11461 /* Handle reload of load/stores if we have reload helper functions. */
11462 if (!done_p && icode != CODE_FOR_nothing && memory_p)
11463 {
11464 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
11465 mode);
11466
11467 if (extra_cost >= 0)
11468 {
11469 done_p = true;
11470 ret = NO_REGS;
11471 if (extra_cost > 0)
11472 {
11473 sri->extra_cost = extra_cost;
11474 sri->icode = icode;
11475 }
11476 }
11477 }
11478
11479 /* Handle unaligned loads and stores of integer registers. */
11480 if (!done_p && TARGET_POWERPC64
11481 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
11482 && memory_p
11483 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
11484 {
11485 rtx addr = XEXP (x, 0);
11486 rtx off = address_offset (addr);
11487
11488 if (off != NULL_RTX)
11489 {
11490 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
11491 unsigned HOST_WIDE_INT offset = INTVAL (off);
11492
11493 /* We need a secondary reload when our legitimate_address_p
11494 says the address is good (as otherwise the entire address
11495 will be reloaded), and the offset is not a multiple of
11496 four or we have an address wrap. Address wrap will only
11497 occur for LO_SUMs since legitimate_offset_address_p
11498 rejects addresses for 16-byte mems that will wrap. */
11499 if (GET_CODE (addr) == LO_SUM
11500 ? (1 /* legitimate_address_p allows any offset for lo_sum */
11501 && ((offset & 3) != 0
11502 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
11503 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
11504 && (offset & 3) != 0))
11505 {
11506 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
11507 if (in_p)
11508 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
11509 : CODE_FOR_reload_di_load);
11510 else
11511 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
11512 : CODE_FOR_reload_di_store);
11513 sri->extra_cost = 2;
11514 ret = NO_REGS;
11515 done_p = true;
11516 }
11517 else
11518 default_p = true;
11519 }
11520 else
11521 default_p = true;
11522 }
11523
11524 if (!done_p && !TARGET_POWERPC64
11525 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
11526 && memory_p
11527 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
11528 {
11529 rtx addr = XEXP (x, 0);
11530 rtx off = address_offset (addr);
11531
11532 if (off != NULL_RTX)
11533 {
11534 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
11535 unsigned HOST_WIDE_INT offset = INTVAL (off);
11536
11537 /* We need a secondary reload when our legitimate_address_p
11538 says the address is good (as otherwise the entire address
11539 will be reloaded), and we have a wrap.
11540
11541 legitimate_lo_sum_address_p allows LO_SUM addresses to
11542 have any offset so test for wrap in the low 16 bits.
11543
11544 legitimate_offset_address_p checks for the range
11545 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
11546 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
11547 [0x7ff4,0x7fff] respectively, so test for the
11548 intersection of these ranges, [0x7ffc,0x7fff] and
11549 [0x7ff4,0x7ff7] respectively.
11550
11551 Note that the address we see here may have been
11552 manipulated by legitimize_reload_address. */
11553 if (GET_CODE (addr) == LO_SUM
11554 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
11555 : offset - (0x8000 - extra) < UNITS_PER_WORD)
11556 {
11557 if (in_p)
11558 sri->icode = CODE_FOR_reload_si_load;
11559 else
11560 sri->icode = CODE_FOR_reload_si_store;
11561 sri->extra_cost = 2;
11562 ret = NO_REGS;
11563 done_p = true;
11564 }
11565 else
11566 default_p = true;
11567 }
11568 else
11569 default_p = true;
11570 }
11571
11572 if (!done_p)
11573 default_p = true;
11574
11575 if (default_p)
11576 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
11577
11578 gcc_assert (ret != ALL_REGS);
11579
11580 if (TARGET_DEBUG_ADDR)
11581 {
11582 fprintf (stderr,
11583 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
11584 "mode = %s",
11585 reg_class_names[ret],
11586 in_p ? "true" : "false",
11587 reg_class_names[rclass],
11588 GET_MODE_NAME (mode));
11589
11590 if (reload_completed)
11591 fputs (", after reload", stderr);
11592
11593 if (!done_p)
11594 fputs (", done_p not set", stderr);
11595
11596 if (default_p)
11597 fputs (", default secondary reload", stderr);
11598
11599 if (sri->icode != CODE_FOR_nothing)
11600 fprintf (stderr, ", reload func = %s, extra cost = %d",
11601 insn_data[sri->icode].name, sri->extra_cost);
11602
11603 else if (sri->extra_cost > 0)
11604 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
11605
11606 fputs ("\n", stderr);
11607 debug_rtx (x);
11608 }
11609
11610 return ret;
11611 }
11612
11613 /* Better tracing for rs6000_secondary_reload_inner. */
11614
11615 static void
11616 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
11617 bool store_p)
11618 {
11619 rtx set, clobber;
11620
11621 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
11622
11623 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
11624 store_p ? "store" : "load");
11625
11626 if (store_p)
11627 set = gen_rtx_SET (mem, reg);
11628 else
11629 set = gen_rtx_SET (reg, mem);
11630
11631 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
11632 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
11633 }
11634
11635 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
11636 ATTRIBUTE_NORETURN;
11637
11638 static void
11639 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
11640 bool store_p)
11641 {
11642 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
11643 gcc_unreachable ();
11644 }
11645
11646 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
11647 reload helper functions. These were identified in
11648 rs6000_secondary_reload_memory, and if reload decided to use the secondary
11649 reload, it calls the insns:
11650 reload_<RELOAD:mode>_<P:mptrsize>_store
11651 reload_<RELOAD:mode>_<P:mptrsize>_load
11652
11653 which in turn calls this function, to do whatever is necessary to create
11654 valid addresses. */
11655
11656 void
11657 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
11658 {
11659 int regno = true_regnum (reg);
11660 machine_mode mode = GET_MODE (reg);
11661 addr_mask_type addr_mask;
11662 rtx addr;
11663 rtx new_addr;
11664 rtx op_reg, op0, op1;
11665 rtx and_op;
11666 rtx cc_clobber;
11667 rtvec rv;
11668
11669 if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem)
11670 || !base_reg_operand (scratch, GET_MODE (scratch)))
11671 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11672
11673 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
11674 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
11675
11676 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
11677 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
11678
11679 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
11680 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
11681
11682 else
11683 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11684
11685 /* Make sure the mode is valid in this register class. */
11686 if ((addr_mask & RELOAD_REG_VALID) == 0)
11687 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11688
11689 if (TARGET_DEBUG_ADDR)
11690 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
11691
11692 new_addr = addr = XEXP (mem, 0);
11693 switch (GET_CODE (addr))
11694 {
11695 /* Does the register class support auto update forms for this mode? If
11696 not, do the update now. We don't need a scratch register, since the
11697 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
11698 case PRE_INC:
11699 case PRE_DEC:
11700 op_reg = XEXP (addr, 0);
11701 if (!base_reg_operand (op_reg, Pmode))
11702 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11703
11704 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
11705 {
11706 int delta = GET_MODE_SIZE (mode);
11707 if (GET_CODE (addr) == PRE_DEC)
11708 delta = -delta;
11709 emit_insn (gen_add2_insn (op_reg, GEN_INT (delta)));
11710 new_addr = op_reg;
11711 }
11712 break;
11713
11714 case PRE_MODIFY:
11715 op0 = XEXP (addr, 0);
11716 op1 = XEXP (addr, 1);
11717 if (!base_reg_operand (op0, Pmode)
11718 || GET_CODE (op1) != PLUS
11719 || !rtx_equal_p (op0, XEXP (op1, 0)))
11720 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11721
11722 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
11723 {
11724 emit_insn (gen_rtx_SET (op0, op1));
11725 new_addr = reg;
11726 }
11727 break;
11728
11729 /* Do we need to simulate AND -16 to clear the bottom address bits used
11730 in VMX load/stores? */
11731 case AND:
11732 op0 = XEXP (addr, 0);
11733 op1 = XEXP (addr, 1);
11734 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
11735 {
11736 if (REG_P (op0) || SUBREG_P (op0))
11737 op_reg = op0;
11738
11739 else if (GET_CODE (op1) == PLUS)
11740 {
11741 emit_insn (gen_rtx_SET (scratch, op1));
11742 op_reg = scratch;
11743 }
11744
11745 else
11746 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11747
11748 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
11749 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
11750 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
11751 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
11752 new_addr = scratch;
11753 }
11754 break;
11755
11756 /* If this is an indirect address, make sure it is a base register. */
11757 case REG:
11758 case SUBREG:
11759 if (!base_reg_operand (addr, GET_MODE (addr)))
11760 {
11761 emit_insn (gen_rtx_SET (scratch, addr));
11762 new_addr = scratch;
11763 }
11764 break;
11765
11766 /* If this is an indexed address, make sure the register class can handle
11767 indexed addresses for this mode. */
11768 case PLUS:
11769 op0 = XEXP (addr, 0);
11770 op1 = XEXP (addr, 1);
11771 if (!base_reg_operand (op0, Pmode))
11772 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11773
11774 else if (int_reg_operand (op1, Pmode))
11775 {
11776 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11777 {
11778 emit_insn (gen_rtx_SET (scratch, addr));
11779 new_addr = scratch;
11780 }
11781 }
11782
11783 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
11784 {
11785 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
11786 || !quad_address_p (addr, mode, false))
11787 {
11788 emit_insn (gen_rtx_SET (scratch, addr));
11789 new_addr = scratch;
11790 }
11791 }
11792
11793 /* Make sure the register class can handle offset addresses. */
11794 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
11795 {
11796 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11797 {
11798 emit_insn (gen_rtx_SET (scratch, addr));
11799 new_addr = scratch;
11800 }
11801 }
11802
11803 else
11804 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11805
11806 break;
11807
11808 case LO_SUM:
11809 op0 = XEXP (addr, 0);
11810 op1 = XEXP (addr, 1);
11811 if (!base_reg_operand (op0, Pmode))
11812 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11813
11814 else if (int_reg_operand (op1, Pmode))
11815 {
11816 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11817 {
11818 emit_insn (gen_rtx_SET (scratch, addr));
11819 new_addr = scratch;
11820 }
11821 }
11822
11823 /* Quad offsets are restricted and can't handle normal addresses. */
11824 else if (mode_supports_dq_form (mode))
11825 {
11826 emit_insn (gen_rtx_SET (scratch, addr));
11827 new_addr = scratch;
11828 }
11829
11830 /* Make sure the register class can handle offset addresses. */
11831 else if (legitimate_lo_sum_address_p (mode, addr, false))
11832 {
11833 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11834 {
11835 emit_insn (gen_rtx_SET (scratch, addr));
11836 new_addr = scratch;
11837 }
11838 }
11839
11840 else
11841 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11842
11843 break;
11844
11845 case SYMBOL_REF:
11846 case CONST:
11847 case LABEL_REF:
11848 rs6000_emit_move (scratch, addr, Pmode);
11849 new_addr = scratch;
11850 break;
11851
11852 default:
11853 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11854 }
11855
11856 /* Adjust the address if it changed. */
11857 if (addr != new_addr)
11858 {
11859 mem = replace_equiv_address_nv (mem, new_addr);
11860 if (TARGET_DEBUG_ADDR)
11861 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
11862 }
11863
11864 /* Now create the move. */
11865 if (store_p)
11866 emit_insn (gen_rtx_SET (mem, reg));
11867 else
11868 emit_insn (gen_rtx_SET (reg, mem));
11869
11870 return;
11871 }
11872
11873 /* Convert reloads involving 64-bit gprs and misaligned offset
11874 addressing, or multiple 32-bit gprs and offsets that are too large,
11875 to use indirect addressing. */
11876
11877 void
11878 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
11879 {
11880 int regno = true_regnum (reg);
11881 enum reg_class rclass;
11882 rtx addr;
11883 rtx scratch_or_premodify = scratch;
11884
11885 if (TARGET_DEBUG_ADDR)
11886 {
11887 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
11888 store_p ? "store" : "load");
11889 fprintf (stderr, "reg:\n");
11890 debug_rtx (reg);
11891 fprintf (stderr, "mem:\n");
11892 debug_rtx (mem);
11893 fprintf (stderr, "scratch:\n");
11894 debug_rtx (scratch);
11895 }
11896
11897 gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno));
11898 gcc_assert (MEM_P (mem));
11899 rclass = REGNO_REG_CLASS (regno);
11900 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
11901 addr = XEXP (mem, 0);
11902
11903 if (GET_CODE (addr) == PRE_MODIFY)
11904 {
11905 gcc_assert (REG_P (XEXP (addr, 0))
11906 && GET_CODE (XEXP (addr, 1)) == PLUS
11907 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
11908 scratch_or_premodify = XEXP (addr, 0);
11909 addr = XEXP (addr, 1);
11910 }
11911 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
11912
11913 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
11914
11915 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
11916
11917 /* Now create the move. */
11918 if (store_p)
11919 emit_insn (gen_rtx_SET (mem, reg));
11920 else
11921 emit_insn (gen_rtx_SET (reg, mem));
11922
11923 return;
11924 }
11925
11926 /* Given an rtx X being reloaded into a reg required to be
11927 in class CLASS, return the class of reg to actually use.
11928 In general this is just CLASS; but on some machines
11929 in some cases it is preferable to use a more restrictive class.
11930
11931 On the RS/6000, we have to return NO_REGS when we want to reload a
11932 floating-point CONST_DOUBLE to force it to be copied to memory.
11933
11934 We also don't want to reload integer values into floating-point
11935 registers if we can at all help it. In fact, this can
11936 cause reload to die, if it tries to generate a reload of CTR
11937 into a FP register and discovers it doesn't have the memory location
11938 required.
11939
11940 ??? Would it be a good idea to have reload do the converse, that is
11941 try to reload floating modes into FP registers if possible?
11942 */
11943
11944 static enum reg_class
11945 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
11946 {
11947 machine_mode mode = GET_MODE (x);
11948 bool is_constant = CONSTANT_P (x);
11949
11950 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
11951 reload class for it. */
11952 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
11953 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
11954 return NO_REGS;
11955
11956 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
11957 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
11958 return NO_REGS;
11959
11960 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
11961 the reloading of address expressions using PLUS into floating point
11962 registers. */
11963 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
11964 {
11965 if (is_constant)
11966 {
11967 /* Zero is always allowed in all VSX registers. */
11968 if (x == CONST0_RTX (mode))
11969 return rclass;
11970
11971 /* If this is a vector constant that can be formed with a few Altivec
11972 instructions, we want altivec registers. */
11973 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
11974 return ALTIVEC_REGS;
11975
11976 /* If this is an integer constant that can easily be loaded into
11977 vector registers, allow it. */
11978 if (CONST_INT_P (x))
11979 {
11980 HOST_WIDE_INT value = INTVAL (x);
11981
11982 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
11983 2.06 can generate it in the Altivec registers with
11984 VSPLTI<x>. */
11985 if (value == -1)
11986 {
11987 if (TARGET_P8_VECTOR)
11988 return rclass;
11989 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
11990 return ALTIVEC_REGS;
11991 else
11992 return NO_REGS;
11993 }
11994
11995 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
11996 a sign extend in the Altivec registers. */
11997 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
11998 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
11999 return ALTIVEC_REGS;
12000 }
12001
12002 /* Force constant to memory. */
12003 return NO_REGS;
12004 }
12005
12006 /* D-form addressing can easily reload the value. */
12007 if (mode_supports_vmx_dform (mode)
12008 || mode_supports_dq_form (mode))
12009 return rclass;
12010
12011 /* If this is a scalar floating point value and we don't have D-form
12012 addressing, prefer the traditional floating point registers so that we
12013 can use D-form (register+offset) addressing. */
12014 if (rclass == VSX_REGS
12015 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
12016 return FLOAT_REGS;
12017
12018 /* Prefer the Altivec registers if Altivec is handling the vector
12019 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
12020 loads. */
12021 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
12022 || mode == V1TImode)
12023 return ALTIVEC_REGS;
12024
12025 return rclass;
12026 }
12027
12028 if (is_constant || GET_CODE (x) == PLUS)
12029 {
12030 if (reg_class_subset_p (GENERAL_REGS, rclass))
12031 return GENERAL_REGS;
12032 if (reg_class_subset_p (BASE_REGS, rclass))
12033 return BASE_REGS;
12034 return NO_REGS;
12035 }
12036
12037 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == GEN_OR_FLOAT_REGS)
12038 return GENERAL_REGS;
12039
12040 return rclass;
12041 }
12042
12043 /* Debug version of rs6000_preferred_reload_class. */
12044 static enum reg_class
12045 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
12046 {
12047 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
12048
12049 fprintf (stderr,
12050 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
12051 "mode = %s, x:\n",
12052 reg_class_names[ret], reg_class_names[rclass],
12053 GET_MODE_NAME (GET_MODE (x)));
12054 debug_rtx (x);
12055
12056 return ret;
12057 }
12058
12059 /* If we are copying between FP or AltiVec registers and anything else, we need
12060 a memory location. The exception is when we are targeting ppc64 and the
12061 move to/from fpr to gpr instructions are available. Also, under VSX, you
12062 can copy vector registers from the FP register set to the Altivec register
12063 set and vice versa. */
12064
12065 static bool
12066 rs6000_secondary_memory_needed (machine_mode mode,
12067 reg_class_t from_class,
12068 reg_class_t to_class)
12069 {
12070 enum rs6000_reg_type from_type, to_type;
12071 bool altivec_p = ((from_class == ALTIVEC_REGS)
12072 || (to_class == ALTIVEC_REGS));
12073
12074 /* If a simple/direct move is available, we don't need secondary memory */
12075 from_type = reg_class_to_reg_type[(int)from_class];
12076 to_type = reg_class_to_reg_type[(int)to_class];
12077
12078 if (rs6000_secondary_reload_move (to_type, from_type, mode,
12079 (secondary_reload_info *)0, altivec_p))
12080 return false;
12081
12082 /* If we have a floating point or vector register class, we need to use
12083 memory to transfer the data. */
12084 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
12085 return true;
12086
12087 return false;
12088 }
12089
12090 /* Debug version of rs6000_secondary_memory_needed. */
12091 static bool
12092 rs6000_debug_secondary_memory_needed (machine_mode mode,
12093 reg_class_t from_class,
12094 reg_class_t to_class)
12095 {
12096 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
12097
12098 fprintf (stderr,
12099 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
12100 "to_class = %s, mode = %s\n",
12101 ret ? "true" : "false",
12102 reg_class_names[from_class],
12103 reg_class_names[to_class],
12104 GET_MODE_NAME (mode));
12105
12106 return ret;
12107 }
12108
12109 /* Return the register class of a scratch register needed to copy IN into
12110 or out of a register in RCLASS in MODE. If it can be done directly,
12111 NO_REGS is returned. */
12112
12113 static enum reg_class
12114 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
12115 rtx in)
12116 {
12117 int regno;
12118
12119 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
12120 #if TARGET_MACHO
12121 && MACHOPIC_INDIRECT
12122 #endif
12123 ))
12124 {
12125 /* We cannot copy a symbolic operand directly into anything
12126 other than BASE_REGS for TARGET_ELF. So indicate that a
12127 register from BASE_REGS is needed as an intermediate
12128 register.
12129
12130 On Darwin, pic addresses require a load from memory, which
12131 needs a base register. */
12132 if (rclass != BASE_REGS
12133 && (SYMBOL_REF_P (in)
12134 || GET_CODE (in) == HIGH
12135 || GET_CODE (in) == LABEL_REF
12136 || GET_CODE (in) == CONST))
12137 return BASE_REGS;
12138 }
12139
12140 if (REG_P (in))
12141 {
12142 regno = REGNO (in);
12143 if (!HARD_REGISTER_NUM_P (regno))
12144 {
12145 regno = true_regnum (in);
12146 if (!HARD_REGISTER_NUM_P (regno))
12147 regno = -1;
12148 }
12149 }
12150 else if (SUBREG_P (in))
12151 {
12152 regno = true_regnum (in);
12153 if (!HARD_REGISTER_NUM_P (regno))
12154 regno = -1;
12155 }
12156 else
12157 regno = -1;
12158
12159 /* If we have VSX register moves, prefer moving scalar values between
12160 Altivec registers and GPR by going via an FPR (and then via memory)
12161 instead of reloading the secondary memory address for Altivec moves. */
12162 if (TARGET_VSX
12163 && GET_MODE_SIZE (mode) < 16
12164 && !mode_supports_vmx_dform (mode)
12165 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
12166 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
12167 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
12168 && (regno >= 0 && INT_REGNO_P (regno)))))
12169 return FLOAT_REGS;
12170
12171 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
12172 into anything. */
12173 if (rclass == GENERAL_REGS || rclass == BASE_REGS
12174 || (regno >= 0 && INT_REGNO_P (regno)))
12175 return NO_REGS;
12176
12177 /* Constants, memory, and VSX registers can go into VSX registers (both the
12178 traditional floating point and the altivec registers). */
12179 if (rclass == VSX_REGS
12180 && (regno == -1 || VSX_REGNO_P (regno)))
12181 return NO_REGS;
12182
12183 /* Constants, memory, and FP registers can go into FP registers. */
12184 if ((regno == -1 || FP_REGNO_P (regno))
12185 && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS))
12186 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
12187
12188 /* Memory, and AltiVec registers can go into AltiVec registers. */
12189 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
12190 && rclass == ALTIVEC_REGS)
12191 return NO_REGS;
12192
12193 /* We can copy among the CR registers. */
12194 if ((rclass == CR_REGS || rclass == CR0_REGS)
12195 && regno >= 0 && CR_REGNO_P (regno))
12196 return NO_REGS;
12197
12198 /* Otherwise, we need GENERAL_REGS. */
12199 return GENERAL_REGS;
12200 }
12201
12202 /* Debug version of rs6000_secondary_reload_class. */
12203 static enum reg_class
12204 rs6000_debug_secondary_reload_class (enum reg_class rclass,
12205 machine_mode mode, rtx in)
12206 {
12207 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
12208 fprintf (stderr,
12209 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
12210 "mode = %s, input rtx:\n",
12211 reg_class_names[ret], reg_class_names[rclass],
12212 GET_MODE_NAME (mode));
12213 debug_rtx (in);
12214
12215 return ret;
12216 }
12217
12218 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
12219
12220 static bool
12221 rs6000_can_change_mode_class (machine_mode from,
12222 machine_mode to,
12223 reg_class_t rclass)
12224 {
12225 unsigned from_size = GET_MODE_SIZE (from);
12226 unsigned to_size = GET_MODE_SIZE (to);
12227
12228 if (from_size != to_size)
12229 {
12230 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
12231
12232 if (reg_classes_intersect_p (xclass, rclass))
12233 {
12234 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
12235 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
12236 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
12237 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
12238
12239 /* Don't allow 64-bit types to overlap with 128-bit types that take a
12240 single register under VSX because the scalar part of the register
12241 is in the upper 64-bits, and not the lower 64-bits. Types like
12242 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
12243 IEEE floating point can't overlap, and neither can small
12244 values. */
12245
12246 if (to_float128_vector_p && from_float128_vector_p)
12247 return true;
12248
12249 else if (to_float128_vector_p || from_float128_vector_p)
12250 return false;
12251
12252 /* TDmode in floating-mode registers must always go into a register
12253 pair with the most significant word in the even-numbered register
12254 to match ISA requirements. In little-endian mode, this does not
12255 match subreg numbering, so we cannot allow subregs. */
12256 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
12257 return false;
12258
12259 if (from_size < 8 || to_size < 8)
12260 return false;
12261
12262 if (from_size == 8 && (8 * to_nregs) != to_size)
12263 return false;
12264
12265 if (to_size == 8 && (8 * from_nregs) != from_size)
12266 return false;
12267
12268 return true;
12269 }
12270 else
12271 return true;
12272 }
12273
12274 /* Since the VSX register set includes traditional floating point registers
12275 and altivec registers, just check for the size being different instead of
12276 trying to check whether the modes are vector modes. Otherwise it won't
12277 allow say DF and DI to change classes. For types like TFmode and TDmode
12278 that take 2 64-bit registers, rather than a single 128-bit register, don't
12279 allow subregs of those types to other 128 bit types. */
12280 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
12281 {
12282 unsigned num_regs = (from_size + 15) / 16;
12283 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
12284 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
12285 return false;
12286
12287 return (from_size == 8 || from_size == 16);
12288 }
12289
12290 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
12291 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
12292 return false;
12293
12294 return true;
12295 }
12296
12297 /* Debug version of rs6000_can_change_mode_class. */
12298 static bool
12299 rs6000_debug_can_change_mode_class (machine_mode from,
12300 machine_mode to,
12301 reg_class_t rclass)
12302 {
12303 bool ret = rs6000_can_change_mode_class (from, to, rclass);
12304
12305 fprintf (stderr,
12306 "rs6000_can_change_mode_class, return %s, from = %s, "
12307 "to = %s, rclass = %s\n",
12308 ret ? "true" : "false",
12309 GET_MODE_NAME (from), GET_MODE_NAME (to),
12310 reg_class_names[rclass]);
12311
12312 return ret;
12313 }
12314 \f
12315 /* Return a string to do a move operation of 128 bits of data. */
12316
12317 const char *
12318 rs6000_output_move_128bit (rtx operands[])
12319 {
12320 rtx dest = operands[0];
12321 rtx src = operands[1];
12322 machine_mode mode = GET_MODE (dest);
12323 int dest_regno;
12324 int src_regno;
12325 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
12326 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
12327
12328 if (REG_P (dest))
12329 {
12330 dest_regno = REGNO (dest);
12331 dest_gpr_p = INT_REGNO_P (dest_regno);
12332 dest_fp_p = FP_REGNO_P (dest_regno);
12333 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
12334 dest_vsx_p = dest_fp_p | dest_vmx_p;
12335 }
12336 else
12337 {
12338 dest_regno = -1;
12339 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
12340 }
12341
12342 if (REG_P (src))
12343 {
12344 src_regno = REGNO (src);
12345 src_gpr_p = INT_REGNO_P (src_regno);
12346 src_fp_p = FP_REGNO_P (src_regno);
12347 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
12348 src_vsx_p = src_fp_p | src_vmx_p;
12349 }
12350 else
12351 {
12352 src_regno = -1;
12353 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
12354 }
12355
12356 /* Register moves. */
12357 if (dest_regno >= 0 && src_regno >= 0)
12358 {
12359 if (dest_gpr_p)
12360 {
12361 if (src_gpr_p)
12362 return "#";
12363
12364 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
12365 return (WORDS_BIG_ENDIAN
12366 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
12367 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
12368
12369 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
12370 return "#";
12371 }
12372
12373 else if (TARGET_VSX && dest_vsx_p)
12374 {
12375 if (src_vsx_p)
12376 return "xxlor %x0,%x1,%x1";
12377
12378 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
12379 return (WORDS_BIG_ENDIAN
12380 ? "mtvsrdd %x0,%1,%L1"
12381 : "mtvsrdd %x0,%L1,%1");
12382
12383 else if (TARGET_DIRECT_MOVE && src_gpr_p)
12384 return "#";
12385 }
12386
12387 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
12388 return "vor %0,%1,%1";
12389
12390 else if (dest_fp_p && src_fp_p)
12391 return "#";
12392 }
12393
12394 /* Loads. */
12395 else if (dest_regno >= 0 && MEM_P (src))
12396 {
12397 if (dest_gpr_p)
12398 {
12399 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
12400 return "lq %0,%1";
12401 else
12402 return "#";
12403 }
12404
12405 else if (TARGET_ALTIVEC && dest_vmx_p
12406 && altivec_indexed_or_indirect_operand (src, mode))
12407 return "lvx %0,%y1";
12408
12409 else if (TARGET_VSX && dest_vsx_p)
12410 {
12411 if (mode_supports_dq_form (mode)
12412 && quad_address_p (XEXP (src, 0), mode, true))
12413 return "lxv %x0,%1";
12414
12415 else if (TARGET_P9_VECTOR)
12416 return "lxvx %x0,%y1";
12417
12418 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
12419 return "lxvw4x %x0,%y1";
12420
12421 else
12422 return "lxvd2x %x0,%y1";
12423 }
12424
12425 else if (TARGET_ALTIVEC && dest_vmx_p)
12426 return "lvx %0,%y1";
12427
12428 else if (dest_fp_p)
12429 return "#";
12430 }
12431
12432 /* Stores. */
12433 else if (src_regno >= 0 && MEM_P (dest))
12434 {
12435 if (src_gpr_p)
12436 {
12437 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
12438 return "stq %1,%0";
12439 else
12440 return "#";
12441 }
12442
12443 else if (TARGET_ALTIVEC && src_vmx_p
12444 && altivec_indexed_or_indirect_operand (dest, mode))
12445 return "stvx %1,%y0";
12446
12447 else if (TARGET_VSX && src_vsx_p)
12448 {
12449 if (mode_supports_dq_form (mode)
12450 && quad_address_p (XEXP (dest, 0), mode, true))
12451 return "stxv %x1,%0";
12452
12453 else if (TARGET_P9_VECTOR)
12454 return "stxvx %x1,%y0";
12455
12456 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
12457 return "stxvw4x %x1,%y0";
12458
12459 else
12460 return "stxvd2x %x1,%y0";
12461 }
12462
12463 else if (TARGET_ALTIVEC && src_vmx_p)
12464 return "stvx %1,%y0";
12465
12466 else if (src_fp_p)
12467 return "#";
12468 }
12469
12470 /* Constants. */
12471 else if (dest_regno >= 0
12472 && (CONST_INT_P (src)
12473 || CONST_WIDE_INT_P (src)
12474 || CONST_DOUBLE_P (src)
12475 || GET_CODE (src) == CONST_VECTOR))
12476 {
12477 if (dest_gpr_p)
12478 return "#";
12479
12480 else if ((dest_vmx_p && TARGET_ALTIVEC)
12481 || (dest_vsx_p && TARGET_VSX))
12482 return output_vec_const_move (operands);
12483 }
12484
12485 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
12486 }
12487
12488 /* Validate a 128-bit move. */
12489 bool
12490 rs6000_move_128bit_ok_p (rtx operands[])
12491 {
12492 machine_mode mode = GET_MODE (operands[0]);
12493 return (gpc_reg_operand (operands[0], mode)
12494 || gpc_reg_operand (operands[1], mode));
12495 }
12496
12497 /* Return true if a 128-bit move needs to be split. */
12498 bool
12499 rs6000_split_128bit_ok_p (rtx operands[])
12500 {
12501 if (!reload_completed)
12502 return false;
12503
12504 if (!gpr_or_gpr_p (operands[0], operands[1]))
12505 return false;
12506
12507 if (quad_load_store_p (operands[0], operands[1]))
12508 return false;
12509
12510 return true;
12511 }
12512
12513 \f
12514 /* Given a comparison operation, return the bit number in CCR to test. We
12515 know this is a valid comparison.
12516
12517 SCC_P is 1 if this is for an scc. That means that %D will have been
12518 used instead of %C, so the bits will be in different places.
12519
12520 Return -1 if OP isn't a valid comparison for some reason. */
12521
12522 int
12523 ccr_bit (rtx op, int scc_p)
12524 {
12525 enum rtx_code code = GET_CODE (op);
12526 machine_mode cc_mode;
12527 int cc_regnum;
12528 int base_bit;
12529 rtx reg;
12530
12531 if (!COMPARISON_P (op))
12532 return -1;
12533
12534 reg = XEXP (op, 0);
12535
12536 if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg)))
12537 return -1;
12538
12539 cc_mode = GET_MODE (reg);
12540 cc_regnum = REGNO (reg);
12541 base_bit = 4 * (cc_regnum - CR0_REGNO);
12542
12543 validate_condition_mode (code, cc_mode);
12544
12545 /* When generating a sCOND operation, only positive conditions are
12546 allowed. */
12547 if (scc_p)
12548 switch (code)
12549 {
12550 case EQ:
12551 case GT:
12552 case LT:
12553 case UNORDERED:
12554 case GTU:
12555 case LTU:
12556 break;
12557 default:
12558 return -1;
12559 }
12560
12561 switch (code)
12562 {
12563 case NE:
12564 return scc_p ? base_bit + 3 : base_bit + 2;
12565 case EQ:
12566 return base_bit + 2;
12567 case GT: case GTU: case UNLE:
12568 return base_bit + 1;
12569 case LT: case LTU: case UNGE:
12570 return base_bit;
12571 case ORDERED: case UNORDERED:
12572 return base_bit + 3;
12573
12574 case GE: case GEU:
12575 /* If scc, we will have done a cror to put the bit in the
12576 unordered position. So test that bit. For integer, this is ! LT
12577 unless this is an scc insn. */
12578 return scc_p ? base_bit + 3 : base_bit;
12579
12580 case LE: case LEU:
12581 return scc_p ? base_bit + 3 : base_bit + 1;
12582
12583 default:
12584 return -1;
12585 }
12586 }
12587 \f
12588 /* Return the GOT register. */
12589
12590 rtx
12591 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
12592 {
12593 /* The second flow pass currently (June 1999) can't update
12594 regs_ever_live without disturbing other parts of the compiler, so
12595 update it here to make the prolog/epilogue code happy. */
12596 if (!can_create_pseudo_p ()
12597 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
12598 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
12599
12600 crtl->uses_pic_offset_table = 1;
12601
12602 return pic_offset_table_rtx;
12603 }
12604 \f
12605 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
12606
12607 /* Write out a function code label. */
12608
12609 void
12610 rs6000_output_function_entry (FILE *file, const char *fname)
12611 {
12612 if (fname[0] != '.')
12613 {
12614 switch (DEFAULT_ABI)
12615 {
12616 default:
12617 gcc_unreachable ();
12618
12619 case ABI_AIX:
12620 if (DOT_SYMBOLS)
12621 putc ('.', file);
12622 else
12623 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
12624 break;
12625
12626 case ABI_ELFv2:
12627 case ABI_V4:
12628 case ABI_DARWIN:
12629 break;
12630 }
12631 }
12632
12633 RS6000_OUTPUT_BASENAME (file, fname);
12634 }
12635
12636 /* Print an operand. Recognize special options, documented below. */
12637
12638 #if TARGET_ELF
12639 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
12640 only introduced by the linker, when applying the sda21
12641 relocation. */
12642 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
12643 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
12644 #else
12645 #define SMALL_DATA_RELOC "sda21"
12646 #define SMALL_DATA_REG 0
12647 #endif
12648
12649 void
12650 print_operand (FILE *file, rtx x, int code)
12651 {
12652 int i;
12653 unsigned HOST_WIDE_INT uval;
12654
12655 switch (code)
12656 {
12657 /* %a is output_address. */
12658
12659 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
12660 output_operand. */
12661
12662 case 'D':
12663 /* Like 'J' but get to the GT bit only. */
12664 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12665 {
12666 output_operand_lossage ("invalid %%D value");
12667 return;
12668 }
12669
12670 /* Bit 1 is GT bit. */
12671 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
12672
12673 /* Add one for shift count in rlinm for scc. */
12674 fprintf (file, "%d", i + 1);
12675 return;
12676
12677 case 'e':
12678 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
12679 if (! INT_P (x))
12680 {
12681 output_operand_lossage ("invalid %%e value");
12682 return;
12683 }
12684
12685 uval = INTVAL (x);
12686 if ((uval & 0xffff) == 0 && uval != 0)
12687 putc ('s', file);
12688 return;
12689
12690 case 'E':
12691 /* X is a CR register. Print the number of the EQ bit of the CR */
12692 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12693 output_operand_lossage ("invalid %%E value");
12694 else
12695 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
12696 return;
12697
12698 case 'f':
12699 /* X is a CR register. Print the shift count needed to move it
12700 to the high-order four bits. */
12701 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12702 output_operand_lossage ("invalid %%f value");
12703 else
12704 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
12705 return;
12706
12707 case 'F':
12708 /* Similar, but print the count for the rotate in the opposite
12709 direction. */
12710 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12711 output_operand_lossage ("invalid %%F value");
12712 else
12713 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
12714 return;
12715
12716 case 'G':
12717 /* X is a constant integer. If it is negative, print "m",
12718 otherwise print "z". This is to make an aze or ame insn. */
12719 if (!CONST_INT_P (x))
12720 output_operand_lossage ("invalid %%G value");
12721 else if (INTVAL (x) >= 0)
12722 putc ('z', file);
12723 else
12724 putc ('m', file);
12725 return;
12726
12727 case 'h':
12728 /* If constant, output low-order five bits. Otherwise, write
12729 normally. */
12730 if (INT_P (x))
12731 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
12732 else
12733 print_operand (file, x, 0);
12734 return;
12735
12736 case 'H':
12737 /* If constant, output low-order six bits. Otherwise, write
12738 normally. */
12739 if (INT_P (x))
12740 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
12741 else
12742 print_operand (file, x, 0);
12743 return;
12744
12745 case 'I':
12746 /* Print `i' if this is a constant, else nothing. */
12747 if (INT_P (x))
12748 putc ('i', file);
12749 return;
12750
12751 case 'j':
12752 /* Write the bit number in CCR for jump. */
12753 i = ccr_bit (x, 0);
12754 if (i == -1)
12755 output_operand_lossage ("invalid %%j code");
12756 else
12757 fprintf (file, "%d", i);
12758 return;
12759
12760 case 'J':
12761 /* Similar, but add one for shift count in rlinm for scc and pass
12762 scc flag to `ccr_bit'. */
12763 i = ccr_bit (x, 1);
12764 if (i == -1)
12765 output_operand_lossage ("invalid %%J code");
12766 else
12767 /* If we want bit 31, write a shift count of zero, not 32. */
12768 fprintf (file, "%d", i == 31 ? 0 : i + 1);
12769 return;
12770
12771 case 'k':
12772 /* X must be a constant. Write the 1's complement of the
12773 constant. */
12774 if (! INT_P (x))
12775 output_operand_lossage ("invalid %%k value");
12776 else
12777 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
12778 return;
12779
12780 case 'K':
12781 /* X must be a symbolic constant on ELF. Write an
12782 expression suitable for an 'addi' that adds in the low 16
12783 bits of the MEM. */
12784 if (GET_CODE (x) == CONST)
12785 {
12786 if (GET_CODE (XEXP (x, 0)) != PLUS
12787 || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
12788 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
12789 || !CONST_INT_P (XEXP (XEXP (x, 0), 1)))
12790 output_operand_lossage ("invalid %%K value");
12791 }
12792 print_operand_address (file, x);
12793 fputs ("@l", file);
12794 return;
12795
12796 /* %l is output_asm_label. */
12797
12798 case 'L':
12799 /* Write second word of DImode or DFmode reference. Works on register
12800 or non-indexed memory only. */
12801 if (REG_P (x))
12802 fputs (reg_names[REGNO (x) + 1], file);
12803 else if (MEM_P (x))
12804 {
12805 machine_mode mode = GET_MODE (x);
12806 /* Handle possible auto-increment. Since it is pre-increment and
12807 we have already done it, we can just use an offset of word. */
12808 if (GET_CODE (XEXP (x, 0)) == PRE_INC
12809 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
12810 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
12811 UNITS_PER_WORD));
12812 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
12813 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
12814 UNITS_PER_WORD));
12815 else
12816 output_address (mode, XEXP (adjust_address_nv (x, SImode,
12817 UNITS_PER_WORD),
12818 0));
12819
12820 if (small_data_operand (x, GET_MODE (x)))
12821 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
12822 reg_names[SMALL_DATA_REG]);
12823 }
12824 return;
12825
12826 case 'N': /* Unused */
12827 /* Write the number of elements in the vector times 4. */
12828 if (GET_CODE (x) != PARALLEL)
12829 output_operand_lossage ("invalid %%N value");
12830 else
12831 fprintf (file, "%d", XVECLEN (x, 0) * 4);
12832 return;
12833
12834 case 'O': /* Unused */
12835 /* Similar, but subtract 1 first. */
12836 if (GET_CODE (x) != PARALLEL)
12837 output_operand_lossage ("invalid %%O value");
12838 else
12839 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
12840 return;
12841
12842 case 'p':
12843 /* X is a CONST_INT that is a power of two. Output the logarithm. */
12844 if (! INT_P (x)
12845 || INTVAL (x) < 0
12846 || (i = exact_log2 (INTVAL (x))) < 0)
12847 output_operand_lossage ("invalid %%p value");
12848 else
12849 fprintf (file, "%d", i);
12850 return;
12851
12852 case 'P':
12853 /* The operand must be an indirect memory reference. The result
12854 is the register name. */
12855 if (!MEM_P (x) || !REG_P (XEXP (x, 0))
12856 || REGNO (XEXP (x, 0)) >= 32)
12857 output_operand_lossage ("invalid %%P value");
12858 else
12859 fputs (reg_names[REGNO (XEXP (x, 0))], file);
12860 return;
12861
12862 case 'q':
12863 /* This outputs the logical code corresponding to a boolean
12864 expression. The expression may have one or both operands
12865 negated (if one, only the first one). For condition register
12866 logical operations, it will also treat the negated
12867 CR codes as NOTs, but not handle NOTs of them. */
12868 {
12869 const char *const *t = 0;
12870 const char *s;
12871 enum rtx_code code = GET_CODE (x);
12872 static const char * const tbl[3][3] = {
12873 { "and", "andc", "nor" },
12874 { "or", "orc", "nand" },
12875 { "xor", "eqv", "xor" } };
12876
12877 if (code == AND)
12878 t = tbl[0];
12879 else if (code == IOR)
12880 t = tbl[1];
12881 else if (code == XOR)
12882 t = tbl[2];
12883 else
12884 output_operand_lossage ("invalid %%q value");
12885
12886 if (GET_CODE (XEXP (x, 0)) != NOT)
12887 s = t[0];
12888 else
12889 {
12890 if (GET_CODE (XEXP (x, 1)) == NOT)
12891 s = t[2];
12892 else
12893 s = t[1];
12894 }
12895
12896 fputs (s, file);
12897 }
12898 return;
12899
12900 case 'Q':
12901 if (! TARGET_MFCRF)
12902 return;
12903 fputc (',', file);
12904 /* FALLTHRU */
12905
12906 case 'R':
12907 /* X is a CR register. Print the mask for `mtcrf'. */
12908 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12909 output_operand_lossage ("invalid %%R value");
12910 else
12911 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
12912 return;
12913
12914 case 's':
12915 /* Low 5 bits of 32 - value */
12916 if (! INT_P (x))
12917 output_operand_lossage ("invalid %%s value");
12918 else
12919 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
12920 return;
12921
12922 case 't':
12923 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
12924 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12925 {
12926 output_operand_lossage ("invalid %%t value");
12927 return;
12928 }
12929
12930 /* Bit 3 is OV bit. */
12931 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
12932
12933 /* If we want bit 31, write a shift count of zero, not 32. */
12934 fprintf (file, "%d", i == 31 ? 0 : i + 1);
12935 return;
12936
12937 case 'T':
12938 /* Print the symbolic name of a branch target register. */
12939 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
12940 x = XVECEXP (x, 0, 0);
12941 if (!REG_P (x) || (REGNO (x) != LR_REGNO
12942 && REGNO (x) != CTR_REGNO))
12943 output_operand_lossage ("invalid %%T value");
12944 else if (REGNO (x) == LR_REGNO)
12945 fputs ("lr", file);
12946 else
12947 fputs ("ctr", file);
12948 return;
12949
12950 case 'u':
12951 /* High-order or low-order 16 bits of constant, whichever is non-zero,
12952 for use in unsigned operand. */
12953 if (! INT_P (x))
12954 {
12955 output_operand_lossage ("invalid %%u value");
12956 return;
12957 }
12958
12959 uval = INTVAL (x);
12960 if ((uval & 0xffff) == 0)
12961 uval >>= 16;
12962
12963 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
12964 return;
12965
12966 case 'v':
12967 /* High-order 16 bits of constant for use in signed operand. */
12968 if (! INT_P (x))
12969 output_operand_lossage ("invalid %%v value");
12970 else
12971 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
12972 (INTVAL (x) >> 16) & 0xffff);
12973 return;
12974
12975 case 'U':
12976 /* Print `u' if this has an auto-increment or auto-decrement. */
12977 if (MEM_P (x)
12978 && (GET_CODE (XEXP (x, 0)) == PRE_INC
12979 || GET_CODE (XEXP (x, 0)) == PRE_DEC
12980 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
12981 putc ('u', file);
12982 return;
12983
12984 case 'V':
12985 /* Print the trap code for this operand. */
12986 switch (GET_CODE (x))
12987 {
12988 case EQ:
12989 fputs ("eq", file); /* 4 */
12990 break;
12991 case NE:
12992 fputs ("ne", file); /* 24 */
12993 break;
12994 case LT:
12995 fputs ("lt", file); /* 16 */
12996 break;
12997 case LE:
12998 fputs ("le", file); /* 20 */
12999 break;
13000 case GT:
13001 fputs ("gt", file); /* 8 */
13002 break;
13003 case GE:
13004 fputs ("ge", file); /* 12 */
13005 break;
13006 case LTU:
13007 fputs ("llt", file); /* 2 */
13008 break;
13009 case LEU:
13010 fputs ("lle", file); /* 6 */
13011 break;
13012 case GTU:
13013 fputs ("lgt", file); /* 1 */
13014 break;
13015 case GEU:
13016 fputs ("lge", file); /* 5 */
13017 break;
13018 default:
13019 output_operand_lossage ("invalid %%V value");
13020 }
13021 break;
13022
13023 case 'w':
13024 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
13025 normally. */
13026 if (INT_P (x))
13027 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
13028 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
13029 else
13030 print_operand (file, x, 0);
13031 return;
13032
13033 case 'x':
13034 /* X is a FPR or Altivec register used in a VSX context. */
13035 if (!REG_P (x) || !VSX_REGNO_P (REGNO (x)))
13036 output_operand_lossage ("invalid %%x value");
13037 else
13038 {
13039 int reg = REGNO (x);
13040 int vsx_reg = (FP_REGNO_P (reg)
13041 ? reg - 32
13042 : reg - FIRST_ALTIVEC_REGNO + 32);
13043
13044 #ifdef TARGET_REGNAMES
13045 if (TARGET_REGNAMES)
13046 fprintf (file, "%%vs%d", vsx_reg);
13047 else
13048 #endif
13049 fprintf (file, "%d", vsx_reg);
13050 }
13051 return;
13052
13053 case 'X':
13054 if (MEM_P (x)
13055 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
13056 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
13057 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
13058 putc ('x', file);
13059 return;
13060
13061 case 'Y':
13062 /* Like 'L', for third word of TImode/PTImode */
13063 if (REG_P (x))
13064 fputs (reg_names[REGNO (x) + 2], file);
13065 else if (MEM_P (x))
13066 {
13067 machine_mode mode = GET_MODE (x);
13068 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13069 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13070 output_address (mode, plus_constant (Pmode,
13071 XEXP (XEXP (x, 0), 0), 8));
13072 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13073 output_address (mode, plus_constant (Pmode,
13074 XEXP (XEXP (x, 0), 0), 8));
13075 else
13076 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
13077 if (small_data_operand (x, GET_MODE (x)))
13078 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13079 reg_names[SMALL_DATA_REG]);
13080 }
13081 return;
13082
13083 case 'z':
13084 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13085 x = XVECEXP (x, 0, 1);
13086 /* X is a SYMBOL_REF. Write out the name preceded by a
13087 period and without any trailing data in brackets. Used for function
13088 names. If we are configured for System V (or the embedded ABI) on
13089 the PowerPC, do not emit the period, since those systems do not use
13090 TOCs and the like. */
13091 if (!SYMBOL_REF_P (x))
13092 {
13093 output_operand_lossage ("invalid %%z value");
13094 return;
13095 }
13096
13097 /* For macho, check to see if we need a stub. */
13098 if (TARGET_MACHO)
13099 {
13100 const char *name = XSTR (x, 0);
13101 #if TARGET_MACHO
13102 if (darwin_symbol_stubs
13103 && MACHOPIC_INDIRECT
13104 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13105 name = machopic_indirection_name (x, /*stub_p=*/true);
13106 #endif
13107 assemble_name (file, name);
13108 }
13109 else if (!DOT_SYMBOLS)
13110 assemble_name (file, XSTR (x, 0));
13111 else
13112 rs6000_output_function_entry (file, XSTR (x, 0));
13113 return;
13114
13115 case 'Z':
13116 /* Like 'L', for last word of TImode/PTImode. */
13117 if (REG_P (x))
13118 fputs (reg_names[REGNO (x) + 3], file);
13119 else if (MEM_P (x))
13120 {
13121 machine_mode mode = GET_MODE (x);
13122 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13123 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13124 output_address (mode, plus_constant (Pmode,
13125 XEXP (XEXP (x, 0), 0), 12));
13126 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13127 output_address (mode, plus_constant (Pmode,
13128 XEXP (XEXP (x, 0), 0), 12));
13129 else
13130 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
13131 if (small_data_operand (x, GET_MODE (x)))
13132 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13133 reg_names[SMALL_DATA_REG]);
13134 }
13135 return;
13136
13137 /* Print AltiVec memory operand. */
13138 case 'y':
13139 {
13140 rtx tmp;
13141
13142 gcc_assert (MEM_P (x));
13143
13144 tmp = XEXP (x, 0);
13145
13146 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
13147 && GET_CODE (tmp) == AND
13148 && CONST_INT_P (XEXP (tmp, 1))
13149 && INTVAL (XEXP (tmp, 1)) == -16)
13150 tmp = XEXP (tmp, 0);
13151 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
13152 && GET_CODE (tmp) == PRE_MODIFY)
13153 tmp = XEXP (tmp, 1);
13154 if (REG_P (tmp))
13155 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
13156 else
13157 {
13158 if (GET_CODE (tmp) != PLUS
13159 || !REG_P (XEXP (tmp, 0))
13160 || !REG_P (XEXP (tmp, 1)))
13161 {
13162 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
13163 break;
13164 }
13165
13166 if (REGNO (XEXP (tmp, 0)) == 0)
13167 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
13168 reg_names[ REGNO (XEXP (tmp, 0)) ]);
13169 else
13170 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
13171 reg_names[ REGNO (XEXP (tmp, 1)) ]);
13172 }
13173 break;
13174 }
13175
13176 case 0:
13177 if (REG_P (x))
13178 fprintf (file, "%s", reg_names[REGNO (x)]);
13179 else if (MEM_P (x))
13180 {
13181 /* We need to handle PRE_INC and PRE_DEC here, since we need to
13182 know the width from the mode. */
13183 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
13184 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
13185 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
13186 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
13187 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
13188 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
13189 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13190 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
13191 else
13192 output_address (GET_MODE (x), XEXP (x, 0));
13193 }
13194 else if (toc_relative_expr_p (x, false,
13195 &tocrel_base_oac, &tocrel_offset_oac))
13196 /* This hack along with a corresponding hack in
13197 rs6000_output_addr_const_extra arranges to output addends
13198 where the assembler expects to find them. eg.
13199 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
13200 without this hack would be output as "x@toc+4". We
13201 want "x+4@toc". */
13202 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
13203 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
13204 output_addr_const (file, XVECEXP (x, 0, 0));
13205 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13206 output_addr_const (file, XVECEXP (x, 0, 1));
13207 else
13208 output_addr_const (file, x);
13209 return;
13210
13211 case '&':
13212 if (const char *name = get_some_local_dynamic_name ())
13213 assemble_name (file, name);
13214 else
13215 output_operand_lossage ("'%%&' used without any "
13216 "local dynamic TLS references");
13217 return;
13218
13219 default:
13220 output_operand_lossage ("invalid %%xn code");
13221 }
13222 }
13223 \f
13224 /* Print the address of an operand. */
13225
13226 void
13227 print_operand_address (FILE *file, rtx x)
13228 {
13229 if (REG_P (x))
13230 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
13231
13232 /* Is it a PC-relative address? */
13233 else if (TARGET_PCREL && pcrel_local_or_external_address (x, VOIDmode))
13234 {
13235 HOST_WIDE_INT offset;
13236
13237 if (GET_CODE (x) == CONST)
13238 x = XEXP (x, 0);
13239
13240 if (GET_CODE (x) == PLUS)
13241 {
13242 offset = INTVAL (XEXP (x, 1));
13243 x = XEXP (x, 0);
13244 }
13245 else
13246 offset = 0;
13247
13248 output_addr_const (file, x);
13249
13250 if (offset)
13251 fprintf (file, "%+" PRId64, offset);
13252
13253 if (SYMBOL_REF_P (x) && !SYMBOL_REF_LOCAL_P (x))
13254 fprintf (file, "@got");
13255
13256 fprintf (file, "@pcrel");
13257 }
13258 else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
13259 || GET_CODE (x) == LABEL_REF)
13260 {
13261 output_addr_const (file, x);
13262 if (small_data_operand (x, GET_MODE (x)))
13263 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13264 reg_names[SMALL_DATA_REG]);
13265 else
13266 gcc_assert (!TARGET_TOC);
13267 }
13268 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
13269 && REG_P (XEXP (x, 1)))
13270 {
13271 if (REGNO (XEXP (x, 0)) == 0)
13272 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
13273 reg_names[ REGNO (XEXP (x, 0)) ]);
13274 else
13275 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
13276 reg_names[ REGNO (XEXP (x, 1)) ]);
13277 }
13278 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
13279 && CONST_INT_P (XEXP (x, 1)))
13280 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
13281 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
13282 #if TARGET_MACHO
13283 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
13284 && CONSTANT_P (XEXP (x, 1)))
13285 {
13286 fprintf (file, "lo16(");
13287 output_addr_const (file, XEXP (x, 1));
13288 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
13289 }
13290 #endif
13291 #if TARGET_ELF
13292 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
13293 && CONSTANT_P (XEXP (x, 1)))
13294 {
13295 output_addr_const (file, XEXP (x, 1));
13296 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
13297 }
13298 #endif
13299 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
13300 {
13301 /* This hack along with a corresponding hack in
13302 rs6000_output_addr_const_extra arranges to output addends
13303 where the assembler expects to find them. eg.
13304 (lo_sum (reg 9)
13305 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
13306 without this hack would be output as "x@toc+8@l(9)". We
13307 want "x+8@toc@l(9)". */
13308 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
13309 if (GET_CODE (x) == LO_SUM)
13310 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
13311 else
13312 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
13313 }
13314 else
13315 output_addr_const (file, x);
13316 }
13317 \f
13318 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13319
13320 bool
13321 rs6000_output_addr_const_extra (FILE *file, rtx x)
13322 {
13323 if (GET_CODE (x) == UNSPEC)
13324 switch (XINT (x, 1))
13325 {
13326 case UNSPEC_TOCREL:
13327 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0))
13328 && REG_P (XVECEXP (x, 0, 1))
13329 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
13330 output_addr_const (file, XVECEXP (x, 0, 0));
13331 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
13332 {
13333 if (INTVAL (tocrel_offset_oac) >= 0)
13334 fprintf (file, "+");
13335 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
13336 }
13337 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
13338 {
13339 putc ('-', file);
13340 assemble_name (file, toc_label_name);
13341 need_toc_init = 1;
13342 }
13343 else if (TARGET_ELF)
13344 fputs ("@toc", file);
13345 return true;
13346
13347 #if TARGET_MACHO
13348 case UNSPEC_MACHOPIC_OFFSET:
13349 output_addr_const (file, XVECEXP (x, 0, 0));
13350 putc ('-', file);
13351 machopic_output_function_base_name (file);
13352 return true;
13353 #endif
13354 }
13355 return false;
13356 }
13357 \f
13358 /* Target hook for assembling integer objects. The PowerPC version has
13359 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
13360 is defined. It also needs to handle DI-mode objects on 64-bit
13361 targets. */
13362
13363 static bool
13364 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
13365 {
13366 #ifdef RELOCATABLE_NEEDS_FIXUP
13367 /* Special handling for SI values. */
13368 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
13369 {
13370 static int recurse = 0;
13371
13372 /* For -mrelocatable, we mark all addresses that need to be fixed up in
13373 the .fixup section. Since the TOC section is already relocated, we
13374 don't need to mark it here. We used to skip the text section, but it
13375 should never be valid for relocated addresses to be placed in the text
13376 section. */
13377 if (DEFAULT_ABI == ABI_V4
13378 && (TARGET_RELOCATABLE || flag_pic > 1)
13379 && in_section != toc_section
13380 && !recurse
13381 && !CONST_SCALAR_INT_P (x)
13382 && CONSTANT_P (x))
13383 {
13384 char buf[256];
13385
13386 recurse = 1;
13387 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
13388 fixuplabelno++;
13389 ASM_OUTPUT_LABEL (asm_out_file, buf);
13390 fprintf (asm_out_file, "\t.long\t(");
13391 output_addr_const (asm_out_file, x);
13392 fprintf (asm_out_file, ")@fixup\n");
13393 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
13394 ASM_OUTPUT_ALIGN (asm_out_file, 2);
13395 fprintf (asm_out_file, "\t.long\t");
13396 assemble_name (asm_out_file, buf);
13397 fprintf (asm_out_file, "\n\t.previous\n");
13398 recurse = 0;
13399 return true;
13400 }
13401 /* Remove initial .'s to turn a -mcall-aixdesc function
13402 address into the address of the descriptor, not the function
13403 itself. */
13404 else if (SYMBOL_REF_P (x)
13405 && XSTR (x, 0)[0] == '.'
13406 && DEFAULT_ABI == ABI_AIX)
13407 {
13408 const char *name = XSTR (x, 0);
13409 while (*name == '.')
13410 name++;
13411
13412 fprintf (asm_out_file, "\t.long\t%s\n", name);
13413 return true;
13414 }
13415 }
13416 #endif /* RELOCATABLE_NEEDS_FIXUP */
13417 return default_assemble_integer (x, size, aligned_p);
13418 }
13419
13420 /* Return a template string for assembly to emit when making an
13421 external call. FUNOP is the call mem argument operand number. */
13422
13423 static const char *
13424 rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall)
13425 {
13426 /* -Wformat-overflow workaround, without which gcc thinks that %u
13427 might produce 10 digits. */
13428 gcc_assert (funop <= MAX_RECOG_OPERANDS);
13429
13430 char arg[12];
13431 arg[0] = 0;
13432 if (GET_CODE (operands[funop + 1]) == UNSPEC)
13433 {
13434 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
13435 sprintf (arg, "(%%%u@tlsgd)", funop + 1);
13436 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
13437 sprintf (arg, "(%%&@tlsld)");
13438 }
13439
13440 /* The magic 32768 offset here corresponds to the offset of
13441 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
13442 char z[11];
13443 sprintf (z, "%%z%u%s", funop,
13444 (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2
13445 ? "+32768" : ""));
13446
13447 static char str[32]; /* 1 spare */
13448 if (rs6000_pcrel_p (cfun))
13449 sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg);
13450 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
13451 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
13452 sibcall ? "" : "\n\tnop");
13453 else if (DEFAULT_ABI == ABI_V4)
13454 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
13455 flag_pic ? "@plt" : "");
13456 #if TARGET_MACHO
13457 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
13458 else if (DEFAULT_ABI == ABI_DARWIN)
13459 {
13460 /* The cookie is in operand func+2. */
13461 gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT);
13462 int cookie = INTVAL (operands[funop + 2]);
13463 if (cookie & CALL_LONG)
13464 {
13465 tree funname = get_identifier (XSTR (operands[funop], 0));
13466 tree labelname = get_prev_label (funname);
13467 gcc_checking_assert (labelname && !sibcall);
13468
13469 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
13470 instruction will reach 'foo', otherwise link as 'bl L42'".
13471 "L42" should be a 'branch island', that will do a far jump to
13472 'foo'. Branch islands are generated in
13473 macho_branch_islands(). */
13474 sprintf (str, "jbsr %%z%u,%.10s", funop,
13475 IDENTIFIER_POINTER (labelname));
13476 }
13477 else
13478 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
13479 after the call. */
13480 sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg);
13481 }
13482 #endif
13483 else
13484 gcc_unreachable ();
13485 return str;
13486 }
13487
13488 const char *
13489 rs6000_call_template (rtx *operands, unsigned int funop)
13490 {
13491 return rs6000_call_template_1 (operands, funop, false);
13492 }
13493
13494 const char *
13495 rs6000_sibcall_template (rtx *operands, unsigned int funop)
13496 {
13497 return rs6000_call_template_1 (operands, funop, true);
13498 }
13499
13500 /* As above, for indirect calls. */
13501
13502 static const char *
13503 rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop,
13504 bool sibcall)
13505 {
13506 /* -Wformat-overflow workaround, without which gcc thinks that %u
13507 might produce 10 digits. Note that -Wformat-overflow will not
13508 currently warn here for str[], so do not rely on a warning to
13509 ensure str[] is correctly sized. */
13510 gcc_assert (funop <= MAX_RECOG_OPERANDS);
13511
13512 /* Currently, funop is either 0 or 1. The maximum string is always
13513 a !speculate 64-bit __tls_get_addr call.
13514
13515 ABI_ELFv2, pcrel:
13516 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13517 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
13518 . 9 crset 2\n\t
13519 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13520 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
13521 . 8 beq%T1l-
13522 .---
13523 .142
13524
13525 ABI_AIX:
13526 . 9 ld 2,%3\n\t
13527 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13528 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
13529 . 9 crset 2\n\t
13530 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13531 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
13532 . 10 beq%T1l-\n\t
13533 . 10 ld 2,%4(1)
13534 .---
13535 .151
13536
13537 ABI_ELFv2:
13538 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13539 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
13540 . 9 crset 2\n\t
13541 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13542 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
13543 . 10 beq%T1l-\n\t
13544 . 10 ld 2,%3(1)
13545 .---
13546 .142
13547
13548 ABI_V4:
13549 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13550 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
13551 . 9 crset 2\n\t
13552 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13553 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
13554 . 8 beq%T1l-
13555 .---
13556 .141 */
13557 static char str[160]; /* 8 spare */
13558 char *s = str;
13559 const char *ptrload = TARGET_64BIT ? "d" : "wz";
13560
13561 if (DEFAULT_ABI == ABI_AIX)
13562 s += sprintf (s,
13563 "l%s 2,%%%u\n\t",
13564 ptrload, funop + 2);
13565
13566 /* We don't need the extra code to stop indirect call speculation if
13567 calling via LR. */
13568 bool speculate = (TARGET_MACHO
13569 || rs6000_speculate_indirect_jumps
13570 || (REG_P (operands[funop])
13571 && REGNO (operands[funop]) == LR_REGNO));
13572
13573 if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)
13574 {
13575 const char *rel64 = TARGET_64BIT ? "64" : "";
13576 char tls[29];
13577 tls[0] = 0;
13578 if (GET_CODE (operands[funop + 1]) == UNSPEC)
13579 {
13580 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
13581 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
13582 rel64, funop + 1);
13583 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
13584 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
13585 rel64);
13586 }
13587
13588 const char *notoc = rs6000_pcrel_p (cfun) ? "_NOTOC" : "";
13589 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
13590 && flag_pic == 2 ? "+32768" : "");
13591 if (!speculate)
13592 {
13593 s += sprintf (s,
13594 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
13595 tls, rel64, notoc, funop, addend);
13596 s += sprintf (s, "crset 2\n\t");
13597 }
13598 s += sprintf (s,
13599 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
13600 tls, rel64, notoc, funop, addend);
13601 }
13602 else if (!speculate)
13603 s += sprintf (s, "crset 2\n\t");
13604
13605 if (rs6000_pcrel_p (cfun))
13606 {
13607 if (speculate)
13608 sprintf (s, "b%%T%ul", funop);
13609 else
13610 sprintf (s, "beq%%T%ul-", funop);
13611 }
13612 else if (DEFAULT_ABI == ABI_AIX)
13613 {
13614 if (speculate)
13615 sprintf (s,
13616 "b%%T%ul\n\t"
13617 "l%s 2,%%%u(1)",
13618 funop, ptrload, funop + 3);
13619 else
13620 sprintf (s,
13621 "beq%%T%ul-\n\t"
13622 "l%s 2,%%%u(1)",
13623 funop, ptrload, funop + 3);
13624 }
13625 else if (DEFAULT_ABI == ABI_ELFv2)
13626 {
13627 if (speculate)
13628 sprintf (s,
13629 "b%%T%ul\n\t"
13630 "l%s 2,%%%u(1)",
13631 funop, ptrload, funop + 2);
13632 else
13633 sprintf (s,
13634 "beq%%T%ul-\n\t"
13635 "l%s 2,%%%u(1)",
13636 funop, ptrload, funop + 2);
13637 }
13638 else
13639 {
13640 if (speculate)
13641 sprintf (s,
13642 "b%%T%u%s",
13643 funop, sibcall ? "" : "l");
13644 else
13645 sprintf (s,
13646 "beq%%T%u%s-%s",
13647 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : "");
13648 }
13649 return str;
13650 }
13651
13652 const char *
13653 rs6000_indirect_call_template (rtx *operands, unsigned int funop)
13654 {
13655 return rs6000_indirect_call_template_1 (operands, funop, false);
13656 }
13657
13658 const char *
13659 rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop)
13660 {
13661 return rs6000_indirect_call_template_1 (operands, funop, true);
13662 }
13663
13664 #if HAVE_AS_PLTSEQ
13665 /* Output indirect call insns. WHICH identifies the type of sequence. */
13666 const char *
13667 rs6000_pltseq_template (rtx *operands, int which)
13668 {
13669 const char *rel64 = TARGET_64BIT ? "64" : "";
13670 char tls[30];
13671 tls[0] = 0;
13672 if (GET_CODE (operands[3]) == UNSPEC)
13673 {
13674 char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4';
13675 if (XINT (operands[3], 1) == UNSPEC_TLSGD)
13676 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
13677 off, rel64);
13678 else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
13679 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
13680 off, rel64);
13681 }
13682
13683 gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
13684 static char str[96]; /* 10 spare */
13685 char off = WORDS_BIG_ENDIAN ? '2' : '4';
13686 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
13687 && flag_pic == 2 ? "+32768" : "");
13688 switch (which)
13689 {
13690 case RS6000_PLTSEQ_TOCSAVE:
13691 sprintf (str,
13692 "st%s\n\t"
13693 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
13694 TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)",
13695 tls, rel64);
13696 break;
13697 case RS6000_PLTSEQ_PLT16_HA:
13698 if (DEFAULT_ABI == ABI_V4 && !flag_pic)
13699 sprintf (str,
13700 "lis %%0,0\n\t"
13701 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
13702 tls, off, rel64);
13703 else
13704 sprintf (str,
13705 "addis %%0,%%1,0\n\t"
13706 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
13707 tls, off, rel64, addend);
13708 break;
13709 case RS6000_PLTSEQ_PLT16_LO:
13710 sprintf (str,
13711 "l%s %%0,0(%%1)\n\t"
13712 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
13713 TARGET_64BIT ? "d" : "wz",
13714 tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend);
13715 break;
13716 case RS6000_PLTSEQ_MTCTR:
13717 sprintf (str,
13718 "mtctr %%1\n\t"
13719 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
13720 tls, rel64, addend);
13721 break;
13722 case RS6000_PLTSEQ_PLT_PCREL34:
13723 sprintf (str,
13724 "pl%s %%0,0(0),1\n\t"
13725 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
13726 TARGET_64BIT ? "d" : "wz",
13727 tls, rel64);
13728 break;
13729 default:
13730 gcc_unreachable ();
13731 }
13732 return str;
13733 }
13734 #endif
13735 \f
13736 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
13737 /* Emit an assembler directive to set symbol visibility for DECL to
13738 VISIBILITY_TYPE. */
13739
13740 static void
13741 rs6000_assemble_visibility (tree decl, int vis)
13742 {
13743 if (TARGET_XCOFF)
13744 return;
13745
13746 /* Functions need to have their entry point symbol visibility set as
13747 well as their descriptor symbol visibility. */
13748 if (DEFAULT_ABI == ABI_AIX
13749 && DOT_SYMBOLS
13750 && TREE_CODE (decl) == FUNCTION_DECL)
13751 {
13752 static const char * const visibility_types[] = {
13753 NULL, "protected", "hidden", "internal"
13754 };
13755
13756 const char *name, *type;
13757
13758 name = ((* targetm.strip_name_encoding)
13759 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
13760 type = visibility_types[vis];
13761
13762 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
13763 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
13764 }
13765 else
13766 default_assemble_visibility (decl, vis);
13767 }
13768 #endif
13769 \f
13770 enum rtx_code
13771 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
13772 {
13773 /* Reversal of FP compares takes care -- an ordered compare
13774 becomes an unordered compare and vice versa. */
13775 if (mode == CCFPmode
13776 && (!flag_finite_math_only
13777 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
13778 || code == UNEQ || code == LTGT))
13779 return reverse_condition_maybe_unordered (code);
13780 else
13781 return reverse_condition (code);
13782 }
13783
13784 /* Generate a compare for CODE. Return a brand-new rtx that
13785 represents the result of the compare. */
13786
13787 static rtx
13788 rs6000_generate_compare (rtx cmp, machine_mode mode)
13789 {
13790 machine_mode comp_mode;
13791 rtx compare_result;
13792 enum rtx_code code = GET_CODE (cmp);
13793 rtx op0 = XEXP (cmp, 0);
13794 rtx op1 = XEXP (cmp, 1);
13795
13796 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
13797 comp_mode = CCmode;
13798 else if (FLOAT_MODE_P (mode))
13799 comp_mode = CCFPmode;
13800 else if (code == GTU || code == LTU
13801 || code == GEU || code == LEU)
13802 comp_mode = CCUNSmode;
13803 else if ((code == EQ || code == NE)
13804 && unsigned_reg_p (op0)
13805 && (unsigned_reg_p (op1)
13806 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
13807 /* These are unsigned values, perhaps there will be a later
13808 ordering compare that can be shared with this one. */
13809 comp_mode = CCUNSmode;
13810 else
13811 comp_mode = CCmode;
13812
13813 /* If we have an unsigned compare, make sure we don't have a signed value as
13814 an immediate. */
13815 if (comp_mode == CCUNSmode && CONST_INT_P (op1)
13816 && INTVAL (op1) < 0)
13817 {
13818 op0 = copy_rtx_if_shared (op0);
13819 op1 = force_reg (GET_MODE (op0), op1);
13820 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
13821 }
13822
13823 /* First, the compare. */
13824 compare_result = gen_reg_rtx (comp_mode);
13825
13826 /* IEEE 128-bit support in VSX registers when we do not have hardware
13827 support. */
13828 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
13829 {
13830 rtx libfunc = NULL_RTX;
13831 bool check_nan = false;
13832 rtx dest;
13833
13834 switch (code)
13835 {
13836 case EQ:
13837 case NE:
13838 libfunc = optab_libfunc (eq_optab, mode);
13839 break;
13840
13841 case GT:
13842 case GE:
13843 libfunc = optab_libfunc (ge_optab, mode);
13844 break;
13845
13846 case LT:
13847 case LE:
13848 libfunc = optab_libfunc (le_optab, mode);
13849 break;
13850
13851 case UNORDERED:
13852 case ORDERED:
13853 libfunc = optab_libfunc (unord_optab, mode);
13854 code = (code == UNORDERED) ? NE : EQ;
13855 break;
13856
13857 case UNGE:
13858 case UNGT:
13859 check_nan = true;
13860 libfunc = optab_libfunc (ge_optab, mode);
13861 code = (code == UNGE) ? GE : GT;
13862 break;
13863
13864 case UNLE:
13865 case UNLT:
13866 check_nan = true;
13867 libfunc = optab_libfunc (le_optab, mode);
13868 code = (code == UNLE) ? LE : LT;
13869 break;
13870
13871 case UNEQ:
13872 case LTGT:
13873 check_nan = true;
13874 libfunc = optab_libfunc (eq_optab, mode);
13875 code = (code = UNEQ) ? EQ : NE;
13876 break;
13877
13878 default:
13879 gcc_unreachable ();
13880 }
13881
13882 gcc_assert (libfunc);
13883
13884 if (!check_nan)
13885 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
13886 SImode, op0, mode, op1, mode);
13887
13888 /* The library signals an exception for signalling NaNs, so we need to
13889 handle isgreater, etc. by first checking isordered. */
13890 else
13891 {
13892 rtx ne_rtx, normal_dest, unord_dest;
13893 rtx unord_func = optab_libfunc (unord_optab, mode);
13894 rtx join_label = gen_label_rtx ();
13895 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
13896 rtx unord_cmp = gen_reg_rtx (comp_mode);
13897
13898
13899 /* Test for either value being a NaN. */
13900 gcc_assert (unord_func);
13901 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
13902 SImode, op0, mode, op1, mode);
13903
13904 /* Set value (0) if either value is a NaN, and jump to the join
13905 label. */
13906 dest = gen_reg_rtx (SImode);
13907 emit_move_insn (dest, const1_rtx);
13908 emit_insn (gen_rtx_SET (unord_cmp,
13909 gen_rtx_COMPARE (comp_mode, unord_dest,
13910 const0_rtx)));
13911
13912 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
13913 emit_jump_insn (gen_rtx_SET (pc_rtx,
13914 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
13915 join_ref,
13916 pc_rtx)));
13917
13918 /* Do the normal comparison, knowing that the values are not
13919 NaNs. */
13920 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
13921 SImode, op0, mode, op1, mode);
13922
13923 emit_insn (gen_cstoresi4 (dest,
13924 gen_rtx_fmt_ee (code, SImode, normal_dest,
13925 const0_rtx),
13926 normal_dest, const0_rtx));
13927
13928 /* Join NaN and non-Nan paths. Compare dest against 0. */
13929 emit_label (join_label);
13930 code = NE;
13931 }
13932
13933 emit_insn (gen_rtx_SET (compare_result,
13934 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
13935 }
13936
13937 else
13938 {
13939 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
13940 CLOBBERs to match cmptf_internal2 pattern. */
13941 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
13942 && FLOAT128_IBM_P (GET_MODE (op0))
13943 && TARGET_HARD_FLOAT)
13944 emit_insn (gen_rtx_PARALLEL (VOIDmode,
13945 gen_rtvec (10,
13946 gen_rtx_SET (compare_result,
13947 gen_rtx_COMPARE (comp_mode, op0, op1)),
13948 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13949 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13950 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13951 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13952 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13953 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13954 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13955 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13956 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
13957 else if (GET_CODE (op1) == UNSPEC
13958 && XINT (op1, 1) == UNSPEC_SP_TEST)
13959 {
13960 rtx op1b = XVECEXP (op1, 0, 0);
13961 comp_mode = CCEQmode;
13962 compare_result = gen_reg_rtx (CCEQmode);
13963 if (TARGET_64BIT)
13964 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
13965 else
13966 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
13967 }
13968 else
13969 emit_insn (gen_rtx_SET (compare_result,
13970 gen_rtx_COMPARE (comp_mode, op0, op1)));
13971 }
13972
13973 validate_condition_mode (code, GET_MODE (compare_result));
13974
13975 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
13976 }
13977
13978 \f
13979 /* Return the diagnostic message string if the binary operation OP is
13980 not permitted on TYPE1 and TYPE2, NULL otherwise. */
13981
13982 static const char*
13983 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
13984 const_tree type1,
13985 const_tree type2)
13986 {
13987 machine_mode mode1 = TYPE_MODE (type1);
13988 machine_mode mode2 = TYPE_MODE (type2);
13989
13990 /* For complex modes, use the inner type. */
13991 if (COMPLEX_MODE_P (mode1))
13992 mode1 = GET_MODE_INNER (mode1);
13993
13994 if (COMPLEX_MODE_P (mode2))
13995 mode2 = GET_MODE_INNER (mode2);
13996
13997 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
13998 double to intermix unless -mfloat128-convert. */
13999 if (mode1 == mode2)
14000 return NULL;
14001
14002 if (!TARGET_FLOAT128_CVT)
14003 {
14004 if ((mode1 == KFmode && mode2 == IFmode)
14005 || (mode1 == IFmode && mode2 == KFmode))
14006 return N_("__float128 and __ibm128 cannot be used in the same "
14007 "expression");
14008
14009 if (TARGET_IEEEQUAD
14010 && ((mode1 == IFmode && mode2 == TFmode)
14011 || (mode1 == TFmode && mode2 == IFmode)))
14012 return N_("__ibm128 and long double cannot be used in the same "
14013 "expression");
14014
14015 if (!TARGET_IEEEQUAD
14016 && ((mode1 == KFmode && mode2 == TFmode)
14017 || (mode1 == TFmode && mode2 == KFmode)))
14018 return N_("__float128 and long double cannot be used in the same "
14019 "expression");
14020 }
14021
14022 return NULL;
14023 }
14024
14025 \f
14026 /* Expand floating point conversion to/from __float128 and __ibm128. */
14027
14028 void
14029 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
14030 {
14031 machine_mode dest_mode = GET_MODE (dest);
14032 machine_mode src_mode = GET_MODE (src);
14033 convert_optab cvt = unknown_optab;
14034 bool do_move = false;
14035 rtx libfunc = NULL_RTX;
14036 rtx dest2;
14037 typedef rtx (*rtx_2func_t) (rtx, rtx);
14038 rtx_2func_t hw_convert = (rtx_2func_t)0;
14039 size_t kf_or_tf;
14040
14041 struct hw_conv_t {
14042 rtx_2func_t from_df;
14043 rtx_2func_t from_sf;
14044 rtx_2func_t from_si_sign;
14045 rtx_2func_t from_si_uns;
14046 rtx_2func_t from_di_sign;
14047 rtx_2func_t from_di_uns;
14048 rtx_2func_t to_df;
14049 rtx_2func_t to_sf;
14050 rtx_2func_t to_si_sign;
14051 rtx_2func_t to_si_uns;
14052 rtx_2func_t to_di_sign;
14053 rtx_2func_t to_di_uns;
14054 } hw_conversions[2] = {
14055 /* convertions to/from KFmode */
14056 {
14057 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
14058 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
14059 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
14060 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
14061 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
14062 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
14063 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
14064 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
14065 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
14066 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
14067 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
14068 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
14069 },
14070
14071 /* convertions to/from TFmode */
14072 {
14073 gen_extenddftf2_hw, /* TFmode <- DFmode. */
14074 gen_extendsftf2_hw, /* TFmode <- SFmode. */
14075 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
14076 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
14077 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
14078 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
14079 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
14080 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
14081 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
14082 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
14083 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
14084 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
14085 },
14086 };
14087
14088 if (dest_mode == src_mode)
14089 gcc_unreachable ();
14090
14091 /* Eliminate memory operations. */
14092 if (MEM_P (src))
14093 src = force_reg (src_mode, src);
14094
14095 if (MEM_P (dest))
14096 {
14097 rtx tmp = gen_reg_rtx (dest_mode);
14098 rs6000_expand_float128_convert (tmp, src, unsigned_p);
14099 rs6000_emit_move (dest, tmp, dest_mode);
14100 return;
14101 }
14102
14103 /* Convert to IEEE 128-bit floating point. */
14104 if (FLOAT128_IEEE_P (dest_mode))
14105 {
14106 if (dest_mode == KFmode)
14107 kf_or_tf = 0;
14108 else if (dest_mode == TFmode)
14109 kf_or_tf = 1;
14110 else
14111 gcc_unreachable ();
14112
14113 switch (src_mode)
14114 {
14115 case E_DFmode:
14116 cvt = sext_optab;
14117 hw_convert = hw_conversions[kf_or_tf].from_df;
14118 break;
14119
14120 case E_SFmode:
14121 cvt = sext_optab;
14122 hw_convert = hw_conversions[kf_or_tf].from_sf;
14123 break;
14124
14125 case E_KFmode:
14126 case E_IFmode:
14127 case E_TFmode:
14128 if (FLOAT128_IBM_P (src_mode))
14129 cvt = sext_optab;
14130 else
14131 do_move = true;
14132 break;
14133
14134 case E_SImode:
14135 if (unsigned_p)
14136 {
14137 cvt = ufloat_optab;
14138 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
14139 }
14140 else
14141 {
14142 cvt = sfloat_optab;
14143 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
14144 }
14145 break;
14146
14147 case E_DImode:
14148 if (unsigned_p)
14149 {
14150 cvt = ufloat_optab;
14151 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
14152 }
14153 else
14154 {
14155 cvt = sfloat_optab;
14156 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
14157 }
14158 break;
14159
14160 default:
14161 gcc_unreachable ();
14162 }
14163 }
14164
14165 /* Convert from IEEE 128-bit floating point. */
14166 else if (FLOAT128_IEEE_P (src_mode))
14167 {
14168 if (src_mode == KFmode)
14169 kf_or_tf = 0;
14170 else if (src_mode == TFmode)
14171 kf_or_tf = 1;
14172 else
14173 gcc_unreachable ();
14174
14175 switch (dest_mode)
14176 {
14177 case E_DFmode:
14178 cvt = trunc_optab;
14179 hw_convert = hw_conversions[kf_or_tf].to_df;
14180 break;
14181
14182 case E_SFmode:
14183 cvt = trunc_optab;
14184 hw_convert = hw_conversions[kf_or_tf].to_sf;
14185 break;
14186
14187 case E_KFmode:
14188 case E_IFmode:
14189 case E_TFmode:
14190 if (FLOAT128_IBM_P (dest_mode))
14191 cvt = trunc_optab;
14192 else
14193 do_move = true;
14194 break;
14195
14196 case E_SImode:
14197 if (unsigned_p)
14198 {
14199 cvt = ufix_optab;
14200 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
14201 }
14202 else
14203 {
14204 cvt = sfix_optab;
14205 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
14206 }
14207 break;
14208
14209 case E_DImode:
14210 if (unsigned_p)
14211 {
14212 cvt = ufix_optab;
14213 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
14214 }
14215 else
14216 {
14217 cvt = sfix_optab;
14218 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
14219 }
14220 break;
14221
14222 default:
14223 gcc_unreachable ();
14224 }
14225 }
14226
14227 /* Both IBM format. */
14228 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
14229 do_move = true;
14230
14231 else
14232 gcc_unreachable ();
14233
14234 /* Handle conversion between TFmode/KFmode/IFmode. */
14235 if (do_move)
14236 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
14237
14238 /* Handle conversion if we have hardware support. */
14239 else if (TARGET_FLOAT128_HW && hw_convert)
14240 emit_insn ((hw_convert) (dest, src));
14241
14242 /* Call an external function to do the conversion. */
14243 else if (cvt != unknown_optab)
14244 {
14245 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
14246 gcc_assert (libfunc != NULL_RTX);
14247
14248 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
14249 src, src_mode);
14250
14251 gcc_assert (dest2 != NULL_RTX);
14252 if (!rtx_equal_p (dest, dest2))
14253 emit_move_insn (dest, dest2);
14254 }
14255
14256 else
14257 gcc_unreachable ();
14258
14259 return;
14260 }
14261
14262 \f
14263 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
14264 can be used as that dest register. Return the dest register. */
14265
14266 rtx
14267 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
14268 {
14269 if (op2 == const0_rtx)
14270 return op1;
14271
14272 if (GET_CODE (scratch) == SCRATCH)
14273 scratch = gen_reg_rtx (mode);
14274
14275 if (logical_operand (op2, mode))
14276 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
14277 else
14278 emit_insn (gen_rtx_SET (scratch,
14279 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
14280
14281 return scratch;
14282 }
14283
14284 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
14285 requires this. The result is mode MODE. */
14286 rtx
14287 rs6000_emit_fp_cror (rtx_code code, machine_mode mode, rtx x)
14288 {
14289 rtx cond[2];
14290 int n = 0;
14291 if (code == LTGT || code == LE || code == UNLT)
14292 cond[n++] = gen_rtx_fmt_ee (LT, mode, x, const0_rtx);
14293 if (code == LTGT || code == GE || code == UNGT)
14294 cond[n++] = gen_rtx_fmt_ee (GT, mode, x, const0_rtx);
14295 if (code == LE || code == GE || code == UNEQ)
14296 cond[n++] = gen_rtx_fmt_ee (EQ, mode, x, const0_rtx);
14297 if (code == UNLT || code == UNGT || code == UNEQ)
14298 cond[n++] = gen_rtx_fmt_ee (UNORDERED, mode, x, const0_rtx);
14299
14300 gcc_assert (n == 2);
14301
14302 rtx cc = gen_reg_rtx (CCEQmode);
14303 rtx logical = gen_rtx_IOR (mode, cond[0], cond[1]);
14304 emit_insn (gen_cceq_ior_compare (mode, cc, logical, cond[0], x, cond[1], x));
14305
14306 return cc;
14307 }
14308
14309 void
14310 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
14311 {
14312 rtx condition_rtx = rs6000_generate_compare (operands[1], mode);
14313 rtx_code cond_code = GET_CODE (condition_rtx);
14314
14315 if (FLOAT_MODE_P (mode) && HONOR_NANS (mode)
14316 && !(FLOAT128_VECTOR_P (mode) && !TARGET_FLOAT128_HW))
14317 ;
14318 else if (cond_code == NE
14319 || cond_code == GE || cond_code == LE
14320 || cond_code == GEU || cond_code == LEU
14321 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
14322 {
14323 rtx not_result = gen_reg_rtx (CCEQmode);
14324 rtx not_op, rev_cond_rtx;
14325 machine_mode cc_mode;
14326
14327 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
14328
14329 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
14330 SImode, XEXP (condition_rtx, 0), const0_rtx);
14331 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
14332 emit_insn (gen_rtx_SET (not_result, not_op));
14333 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
14334 }
14335
14336 machine_mode op_mode = GET_MODE (XEXP (operands[1], 0));
14337 if (op_mode == VOIDmode)
14338 op_mode = GET_MODE (XEXP (operands[1], 1));
14339
14340 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
14341 {
14342 PUT_MODE (condition_rtx, DImode);
14343 convert_move (operands[0], condition_rtx, 0);
14344 }
14345 else
14346 {
14347 PUT_MODE (condition_rtx, SImode);
14348 emit_insn (gen_rtx_SET (operands[0], condition_rtx));
14349 }
14350 }
14351
14352 /* Emit a branch of kind CODE to location LOC. */
14353
14354 void
14355 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
14356 {
14357 rtx condition_rtx = rs6000_generate_compare (operands[0], mode);
14358 rtx loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
14359 rtx ite = gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx, loc_ref, pc_rtx);
14360 emit_jump_insn (gen_rtx_SET (pc_rtx, ite));
14361 }
14362
14363 /* Return the string to output a conditional branch to LABEL, which is
14364 the operand template of the label, or NULL if the branch is really a
14365 conditional return.
14366
14367 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
14368 condition code register and its mode specifies what kind of
14369 comparison we made.
14370
14371 REVERSED is nonzero if we should reverse the sense of the comparison.
14372
14373 INSN is the insn. */
14374
14375 char *
14376 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
14377 {
14378 static char string[64];
14379 enum rtx_code code = GET_CODE (op);
14380 rtx cc_reg = XEXP (op, 0);
14381 machine_mode mode = GET_MODE (cc_reg);
14382 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
14383 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
14384 int really_reversed = reversed ^ need_longbranch;
14385 char *s = string;
14386 const char *ccode;
14387 const char *pred;
14388 rtx note;
14389
14390 validate_condition_mode (code, mode);
14391
14392 /* Work out which way this really branches. We could use
14393 reverse_condition_maybe_unordered here always but this
14394 makes the resulting assembler clearer. */
14395 if (really_reversed)
14396 {
14397 /* Reversal of FP compares takes care -- an ordered compare
14398 becomes an unordered compare and vice versa. */
14399 if (mode == CCFPmode)
14400 code = reverse_condition_maybe_unordered (code);
14401 else
14402 code = reverse_condition (code);
14403 }
14404
14405 switch (code)
14406 {
14407 /* Not all of these are actually distinct opcodes, but
14408 we distinguish them for clarity of the resulting assembler. */
14409 case NE: case LTGT:
14410 ccode = "ne"; break;
14411 case EQ: case UNEQ:
14412 ccode = "eq"; break;
14413 case GE: case GEU:
14414 ccode = "ge"; break;
14415 case GT: case GTU: case UNGT:
14416 ccode = "gt"; break;
14417 case LE: case LEU:
14418 ccode = "le"; break;
14419 case LT: case LTU: case UNLT:
14420 ccode = "lt"; break;
14421 case UNORDERED: ccode = "un"; break;
14422 case ORDERED: ccode = "nu"; break;
14423 case UNGE: ccode = "nl"; break;
14424 case UNLE: ccode = "ng"; break;
14425 default:
14426 gcc_unreachable ();
14427 }
14428
14429 /* Maybe we have a guess as to how likely the branch is. */
14430 pred = "";
14431 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
14432 if (note != NULL_RTX)
14433 {
14434 /* PROB is the difference from 50%. */
14435 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
14436 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
14437
14438 /* Only hint for highly probable/improbable branches on newer cpus when
14439 we have real profile data, as static prediction overrides processor
14440 dynamic prediction. For older cpus we may as well always hint, but
14441 assume not taken for branches that are very close to 50% as a
14442 mispredicted taken branch is more expensive than a
14443 mispredicted not-taken branch. */
14444 if (rs6000_always_hint
14445 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
14446 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
14447 && br_prob_note_reliable_p (note)))
14448 {
14449 if (abs (prob) > REG_BR_PROB_BASE / 20
14450 && ((prob > 0) ^ need_longbranch))
14451 pred = "+";
14452 else
14453 pred = "-";
14454 }
14455 }
14456
14457 if (label == NULL)
14458 s += sprintf (s, "b%slr%s ", ccode, pred);
14459 else
14460 s += sprintf (s, "b%s%s ", ccode, pred);
14461
14462 /* We need to escape any '%' characters in the reg_names string.
14463 Assume they'd only be the first character.... */
14464 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
14465 *s++ = '%';
14466 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
14467
14468 if (label != NULL)
14469 {
14470 /* If the branch distance was too far, we may have to use an
14471 unconditional branch to go the distance. */
14472 if (need_longbranch)
14473 s += sprintf (s, ",$+8\n\tb %s", label);
14474 else
14475 s += sprintf (s, ",%s", label);
14476 }
14477
14478 return string;
14479 }
14480
14481 /* Return insn for VSX or Altivec comparisons. */
14482
14483 static rtx
14484 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
14485 {
14486 rtx mask;
14487 machine_mode mode = GET_MODE (op0);
14488
14489 switch (code)
14490 {
14491 default:
14492 break;
14493
14494 case GE:
14495 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14496 return NULL_RTX;
14497 /* FALLTHRU */
14498
14499 case EQ:
14500 case GT:
14501 case GTU:
14502 case ORDERED:
14503 case UNORDERED:
14504 case UNEQ:
14505 case LTGT:
14506 mask = gen_reg_rtx (mode);
14507 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
14508 return mask;
14509 }
14510
14511 return NULL_RTX;
14512 }
14513
14514 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
14515 DMODE is expected destination mode. This is a recursive function. */
14516
14517 static rtx
14518 rs6000_emit_vector_compare (enum rtx_code rcode,
14519 rtx op0, rtx op1,
14520 machine_mode dmode)
14521 {
14522 rtx mask;
14523 bool swap_operands = false;
14524 bool try_again = false;
14525
14526 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
14527 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
14528
14529 /* See if the comparison works as is. */
14530 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
14531 if (mask)
14532 return mask;
14533
14534 switch (rcode)
14535 {
14536 case LT:
14537 rcode = GT;
14538 swap_operands = true;
14539 try_again = true;
14540 break;
14541 case LTU:
14542 rcode = GTU;
14543 swap_operands = true;
14544 try_again = true;
14545 break;
14546 case NE:
14547 case UNLE:
14548 case UNLT:
14549 case UNGE:
14550 case UNGT:
14551 /* Invert condition and try again.
14552 e.g., A != B becomes ~(A==B). */
14553 {
14554 enum rtx_code rev_code;
14555 enum insn_code nor_code;
14556 rtx mask2;
14557
14558 rev_code = reverse_condition_maybe_unordered (rcode);
14559 if (rev_code == UNKNOWN)
14560 return NULL_RTX;
14561
14562 nor_code = optab_handler (one_cmpl_optab, dmode);
14563 if (nor_code == CODE_FOR_nothing)
14564 return NULL_RTX;
14565
14566 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
14567 if (!mask2)
14568 return NULL_RTX;
14569
14570 mask = gen_reg_rtx (dmode);
14571 emit_insn (GEN_FCN (nor_code) (mask, mask2));
14572 return mask;
14573 }
14574 break;
14575 case GE:
14576 case GEU:
14577 case LE:
14578 case LEU:
14579 /* Try GT/GTU/LT/LTU OR EQ */
14580 {
14581 rtx c_rtx, eq_rtx;
14582 enum insn_code ior_code;
14583 enum rtx_code new_code;
14584
14585 switch (rcode)
14586 {
14587 case GE:
14588 new_code = GT;
14589 break;
14590
14591 case GEU:
14592 new_code = GTU;
14593 break;
14594
14595 case LE:
14596 new_code = LT;
14597 break;
14598
14599 case LEU:
14600 new_code = LTU;
14601 break;
14602
14603 default:
14604 gcc_unreachable ();
14605 }
14606
14607 ior_code = optab_handler (ior_optab, dmode);
14608 if (ior_code == CODE_FOR_nothing)
14609 return NULL_RTX;
14610
14611 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
14612 if (!c_rtx)
14613 return NULL_RTX;
14614
14615 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
14616 if (!eq_rtx)
14617 return NULL_RTX;
14618
14619 mask = gen_reg_rtx (dmode);
14620 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
14621 return mask;
14622 }
14623 break;
14624 default:
14625 return NULL_RTX;
14626 }
14627
14628 if (try_again)
14629 {
14630 if (swap_operands)
14631 std::swap (op0, op1);
14632
14633 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
14634 if (mask)
14635 return mask;
14636 }
14637
14638 /* You only get two chances. */
14639 return NULL_RTX;
14640 }
14641
14642 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
14643 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
14644 operands for the relation operation COND. */
14645
14646 int
14647 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
14648 rtx cond, rtx cc_op0, rtx cc_op1)
14649 {
14650 machine_mode dest_mode = GET_MODE (dest);
14651 machine_mode mask_mode = GET_MODE (cc_op0);
14652 enum rtx_code rcode = GET_CODE (cond);
14653 machine_mode cc_mode = CCmode;
14654 rtx mask;
14655 rtx cond2;
14656 bool invert_move = false;
14657
14658 if (VECTOR_UNIT_NONE_P (dest_mode))
14659 return 0;
14660
14661 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
14662 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
14663
14664 switch (rcode)
14665 {
14666 /* Swap operands if we can, and fall back to doing the operation as
14667 specified, and doing a NOR to invert the test. */
14668 case NE:
14669 case UNLE:
14670 case UNLT:
14671 case UNGE:
14672 case UNGT:
14673 /* Invert condition and try again.
14674 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
14675 invert_move = true;
14676 rcode = reverse_condition_maybe_unordered (rcode);
14677 if (rcode == UNKNOWN)
14678 return 0;
14679 break;
14680
14681 case GE:
14682 case LE:
14683 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
14684 {
14685 /* Invert condition to avoid compound test. */
14686 invert_move = true;
14687 rcode = reverse_condition (rcode);
14688 }
14689 break;
14690
14691 case GTU:
14692 case GEU:
14693 case LTU:
14694 case LEU:
14695 /* Mark unsigned tests with CCUNSmode. */
14696 cc_mode = CCUNSmode;
14697
14698 /* Invert condition to avoid compound test if necessary. */
14699 if (rcode == GEU || rcode == LEU)
14700 {
14701 invert_move = true;
14702 rcode = reverse_condition (rcode);
14703 }
14704 break;
14705
14706 default:
14707 break;
14708 }
14709
14710 /* Get the vector mask for the given relational operations. */
14711 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
14712
14713 if (!mask)
14714 return 0;
14715
14716 if (invert_move)
14717 std::swap (op_true, op_false);
14718
14719 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
14720 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
14721 && (GET_CODE (op_true) == CONST_VECTOR
14722 || GET_CODE (op_false) == CONST_VECTOR))
14723 {
14724 rtx constant_0 = CONST0_RTX (dest_mode);
14725 rtx constant_m1 = CONSTM1_RTX (dest_mode);
14726
14727 if (op_true == constant_m1 && op_false == constant_0)
14728 {
14729 emit_move_insn (dest, mask);
14730 return 1;
14731 }
14732
14733 else if (op_true == constant_0 && op_false == constant_m1)
14734 {
14735 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
14736 return 1;
14737 }
14738
14739 /* If we can't use the vector comparison directly, perhaps we can use
14740 the mask for the true or false fields, instead of loading up a
14741 constant. */
14742 if (op_true == constant_m1)
14743 op_true = mask;
14744
14745 if (op_false == constant_0)
14746 op_false = mask;
14747 }
14748
14749 if (!REG_P (op_true) && !SUBREG_P (op_true))
14750 op_true = force_reg (dest_mode, op_true);
14751
14752 if (!REG_P (op_false) && !SUBREG_P (op_false))
14753 op_false = force_reg (dest_mode, op_false);
14754
14755 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
14756 CONST0_RTX (dest_mode));
14757 emit_insn (gen_rtx_SET (dest,
14758 gen_rtx_IF_THEN_ELSE (dest_mode,
14759 cond2,
14760 op_true,
14761 op_false)));
14762 return 1;
14763 }
14764
14765 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
14766 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
14767 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
14768 hardware has no such operation. */
14769
14770 static int
14771 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
14772 {
14773 enum rtx_code code = GET_CODE (op);
14774 rtx op0 = XEXP (op, 0);
14775 rtx op1 = XEXP (op, 1);
14776 machine_mode compare_mode = GET_MODE (op0);
14777 machine_mode result_mode = GET_MODE (dest);
14778 bool max_p = false;
14779
14780 if (result_mode != compare_mode)
14781 return 0;
14782
14783 if (code == GE || code == GT)
14784 max_p = true;
14785 else if (code == LE || code == LT)
14786 max_p = false;
14787 else
14788 return 0;
14789
14790 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
14791 ;
14792
14793 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond))
14794 max_p = !max_p;
14795
14796 else
14797 return 0;
14798
14799 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
14800 return 1;
14801 }
14802
14803 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
14804 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
14805 operands of the last comparison is nonzero/true, FALSE_COND if it is
14806 zero/false. Return 0 if the hardware has no such operation. */
14807
14808 static int
14809 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
14810 {
14811 enum rtx_code code = GET_CODE (op);
14812 rtx op0 = XEXP (op, 0);
14813 rtx op1 = XEXP (op, 1);
14814 machine_mode result_mode = GET_MODE (dest);
14815 rtx compare_rtx;
14816 rtx cmove_rtx;
14817 rtx clobber_rtx;
14818
14819 if (!can_create_pseudo_p ())
14820 return 0;
14821
14822 switch (code)
14823 {
14824 case EQ:
14825 case GE:
14826 case GT:
14827 break;
14828
14829 case NE:
14830 case LT:
14831 case LE:
14832 code = swap_condition (code);
14833 std::swap (op0, op1);
14834 break;
14835
14836 default:
14837 return 0;
14838 }
14839
14840 /* Generate: [(parallel [(set (dest)
14841 (if_then_else (op (cmp1) (cmp2))
14842 (true)
14843 (false)))
14844 (clobber (scratch))])]. */
14845
14846 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
14847 cmove_rtx = gen_rtx_SET (dest,
14848 gen_rtx_IF_THEN_ELSE (result_mode,
14849 compare_rtx,
14850 true_cond,
14851 false_cond));
14852
14853 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
14854 emit_insn (gen_rtx_PARALLEL (VOIDmode,
14855 gen_rtvec (2, cmove_rtx, clobber_rtx)));
14856
14857 return 1;
14858 }
14859
14860 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
14861 operands of the last comparison is nonzero/true, FALSE_COND if it
14862 is zero/false. Return 0 if the hardware has no such operation. */
14863
14864 int
14865 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
14866 {
14867 enum rtx_code code = GET_CODE (op);
14868 rtx op0 = XEXP (op, 0);
14869 rtx op1 = XEXP (op, 1);
14870 machine_mode compare_mode = GET_MODE (op0);
14871 machine_mode result_mode = GET_MODE (dest);
14872 rtx temp;
14873 bool is_against_zero;
14874
14875 /* These modes should always match. */
14876 if (GET_MODE (op1) != compare_mode
14877 /* In the isel case however, we can use a compare immediate, so
14878 op1 may be a small constant. */
14879 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
14880 return 0;
14881 if (GET_MODE (true_cond) != result_mode)
14882 return 0;
14883 if (GET_MODE (false_cond) != result_mode)
14884 return 0;
14885
14886 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
14887 if (TARGET_P9_MINMAX
14888 && (compare_mode == SFmode || compare_mode == DFmode)
14889 && (result_mode == SFmode || result_mode == DFmode))
14890 {
14891 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
14892 return 1;
14893
14894 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
14895 return 1;
14896 }
14897
14898 /* Don't allow using floating point comparisons for integer results for
14899 now. */
14900 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
14901 return 0;
14902
14903 /* First, work out if the hardware can do this at all, or
14904 if it's too slow.... */
14905 if (!FLOAT_MODE_P (compare_mode))
14906 {
14907 if (TARGET_ISEL)
14908 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
14909 return 0;
14910 }
14911
14912 is_against_zero = op1 == CONST0_RTX (compare_mode);
14913
14914 /* A floating-point subtract might overflow, underflow, or produce
14915 an inexact result, thus changing the floating-point flags, so it
14916 can't be generated if we care about that. It's safe if one side
14917 of the construct is zero, since then no subtract will be
14918 generated. */
14919 if (SCALAR_FLOAT_MODE_P (compare_mode)
14920 && flag_trapping_math && ! is_against_zero)
14921 return 0;
14922
14923 /* Eliminate half of the comparisons by switching operands, this
14924 makes the remaining code simpler. */
14925 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
14926 || code == LTGT || code == LT || code == UNLE)
14927 {
14928 code = reverse_condition_maybe_unordered (code);
14929 temp = true_cond;
14930 true_cond = false_cond;
14931 false_cond = temp;
14932 }
14933
14934 /* UNEQ and LTGT take four instructions for a comparison with zero,
14935 it'll probably be faster to use a branch here too. */
14936 if (code == UNEQ && HONOR_NANS (compare_mode))
14937 return 0;
14938
14939 /* We're going to try to implement comparisons by performing
14940 a subtract, then comparing against zero. Unfortunately,
14941 Inf - Inf is NaN which is not zero, and so if we don't
14942 know that the operand is finite and the comparison
14943 would treat EQ different to UNORDERED, we can't do it. */
14944 if (HONOR_INFINITIES (compare_mode)
14945 && code != GT && code != UNGE
14946 && (!CONST_DOUBLE_P (op1)
14947 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
14948 /* Constructs of the form (a OP b ? a : b) are safe. */
14949 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
14950 || (! rtx_equal_p (op0, true_cond)
14951 && ! rtx_equal_p (op1, true_cond))))
14952 return 0;
14953
14954 /* At this point we know we can use fsel. */
14955
14956 /* Reduce the comparison to a comparison against zero. */
14957 if (! is_against_zero)
14958 {
14959 temp = gen_reg_rtx (compare_mode);
14960 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
14961 op0 = temp;
14962 op1 = CONST0_RTX (compare_mode);
14963 }
14964
14965 /* If we don't care about NaNs we can reduce some of the comparisons
14966 down to faster ones. */
14967 if (! HONOR_NANS (compare_mode))
14968 switch (code)
14969 {
14970 case GT:
14971 code = LE;
14972 temp = true_cond;
14973 true_cond = false_cond;
14974 false_cond = temp;
14975 break;
14976 case UNGE:
14977 code = GE;
14978 break;
14979 case UNEQ:
14980 code = EQ;
14981 break;
14982 default:
14983 break;
14984 }
14985
14986 /* Now, reduce everything down to a GE. */
14987 switch (code)
14988 {
14989 case GE:
14990 break;
14991
14992 case LE:
14993 temp = gen_reg_rtx (compare_mode);
14994 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
14995 op0 = temp;
14996 break;
14997
14998 case ORDERED:
14999 temp = gen_reg_rtx (compare_mode);
15000 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
15001 op0 = temp;
15002 break;
15003
15004 case EQ:
15005 temp = gen_reg_rtx (compare_mode);
15006 emit_insn (gen_rtx_SET (temp,
15007 gen_rtx_NEG (compare_mode,
15008 gen_rtx_ABS (compare_mode, op0))));
15009 op0 = temp;
15010 break;
15011
15012 case UNGE:
15013 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
15014 temp = gen_reg_rtx (result_mode);
15015 emit_insn (gen_rtx_SET (temp,
15016 gen_rtx_IF_THEN_ELSE (result_mode,
15017 gen_rtx_GE (VOIDmode,
15018 op0, op1),
15019 true_cond, false_cond)));
15020 false_cond = true_cond;
15021 true_cond = temp;
15022
15023 temp = gen_reg_rtx (compare_mode);
15024 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15025 op0 = temp;
15026 break;
15027
15028 case GT:
15029 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
15030 temp = gen_reg_rtx (result_mode);
15031 emit_insn (gen_rtx_SET (temp,
15032 gen_rtx_IF_THEN_ELSE (result_mode,
15033 gen_rtx_GE (VOIDmode,
15034 op0, op1),
15035 true_cond, false_cond)));
15036 true_cond = false_cond;
15037 false_cond = temp;
15038
15039 temp = gen_reg_rtx (compare_mode);
15040 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15041 op0 = temp;
15042 break;
15043
15044 default:
15045 gcc_unreachable ();
15046 }
15047
15048 emit_insn (gen_rtx_SET (dest,
15049 gen_rtx_IF_THEN_ELSE (result_mode,
15050 gen_rtx_GE (VOIDmode,
15051 op0, op1),
15052 true_cond, false_cond)));
15053 return 1;
15054 }
15055
15056 /* Same as above, but for ints (isel). */
15057
15058 int
15059 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15060 {
15061 rtx condition_rtx, cr;
15062 machine_mode mode = GET_MODE (dest);
15063 enum rtx_code cond_code;
15064 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
15065 bool signedp;
15066
15067 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
15068 return 0;
15069
15070 /* We still have to do the compare, because isel doesn't do a
15071 compare, it just looks at the CRx bits set by a previous compare
15072 instruction. */
15073 condition_rtx = rs6000_generate_compare (op, mode);
15074 cond_code = GET_CODE (condition_rtx);
15075 cr = XEXP (condition_rtx, 0);
15076 signedp = GET_MODE (cr) == CCmode;
15077
15078 isel_func = (mode == SImode
15079 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
15080 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
15081
15082 switch (cond_code)
15083 {
15084 case LT: case GT: case LTU: case GTU: case EQ:
15085 /* isel handles these directly. */
15086 break;
15087
15088 default:
15089 /* We need to swap the sense of the comparison. */
15090 {
15091 std::swap (false_cond, true_cond);
15092 PUT_CODE (condition_rtx, reverse_condition (cond_code));
15093 }
15094 break;
15095 }
15096
15097 false_cond = force_reg (mode, false_cond);
15098 if (true_cond != const0_rtx)
15099 true_cond = force_reg (mode, true_cond);
15100
15101 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
15102
15103 return 1;
15104 }
15105
15106 void
15107 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
15108 {
15109 machine_mode mode = GET_MODE (op0);
15110 enum rtx_code c;
15111 rtx target;
15112
15113 /* VSX/altivec have direct min/max insns. */
15114 if ((code == SMAX || code == SMIN)
15115 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
15116 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
15117 {
15118 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
15119 return;
15120 }
15121
15122 if (code == SMAX || code == SMIN)
15123 c = GE;
15124 else
15125 c = GEU;
15126
15127 if (code == SMAX || code == UMAX)
15128 target = emit_conditional_move (dest, c, op0, op1, mode,
15129 op0, op1, mode, 0);
15130 else
15131 target = emit_conditional_move (dest, c, op0, op1, mode,
15132 op1, op0, mode, 0);
15133 gcc_assert (target);
15134 if (target != dest)
15135 emit_move_insn (dest, target);
15136 }
15137
15138 /* A subroutine of the atomic operation splitters. Jump to LABEL if
15139 COND is true. Mark the jump as unlikely to be taken. */
15140
15141 static void
15142 emit_unlikely_jump (rtx cond, rtx label)
15143 {
15144 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
15145 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
15146 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
15147 }
15148
15149 /* A subroutine of the atomic operation splitters. Emit a load-locked
15150 instruction in MODE. For QI/HImode, possibly use a pattern than includes
15151 the zero_extend operation. */
15152
15153 static void
15154 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
15155 {
15156 rtx (*fn) (rtx, rtx) = NULL;
15157
15158 switch (mode)
15159 {
15160 case E_QImode:
15161 fn = gen_load_lockedqi;
15162 break;
15163 case E_HImode:
15164 fn = gen_load_lockedhi;
15165 break;
15166 case E_SImode:
15167 if (GET_MODE (mem) == QImode)
15168 fn = gen_load_lockedqi_si;
15169 else if (GET_MODE (mem) == HImode)
15170 fn = gen_load_lockedhi_si;
15171 else
15172 fn = gen_load_lockedsi;
15173 break;
15174 case E_DImode:
15175 fn = gen_load_lockeddi;
15176 break;
15177 case E_TImode:
15178 fn = gen_load_lockedti;
15179 break;
15180 default:
15181 gcc_unreachable ();
15182 }
15183 emit_insn (fn (reg, mem));
15184 }
15185
15186 /* A subroutine of the atomic operation splitters. Emit a store-conditional
15187 instruction in MODE. */
15188
15189 static void
15190 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
15191 {
15192 rtx (*fn) (rtx, rtx, rtx) = NULL;
15193
15194 switch (mode)
15195 {
15196 case E_QImode:
15197 fn = gen_store_conditionalqi;
15198 break;
15199 case E_HImode:
15200 fn = gen_store_conditionalhi;
15201 break;
15202 case E_SImode:
15203 fn = gen_store_conditionalsi;
15204 break;
15205 case E_DImode:
15206 fn = gen_store_conditionaldi;
15207 break;
15208 case E_TImode:
15209 fn = gen_store_conditionalti;
15210 break;
15211 default:
15212 gcc_unreachable ();
15213 }
15214
15215 /* Emit sync before stwcx. to address PPC405 Erratum. */
15216 if (PPC405_ERRATUM77)
15217 emit_insn (gen_hwsync ());
15218
15219 emit_insn (fn (res, mem, val));
15220 }
15221
15222 /* Expand barriers before and after a load_locked/store_cond sequence. */
15223
15224 static rtx
15225 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
15226 {
15227 rtx addr = XEXP (mem, 0);
15228
15229 if (!legitimate_indirect_address_p (addr, reload_completed)
15230 && !legitimate_indexed_address_p (addr, reload_completed))
15231 {
15232 addr = force_reg (Pmode, addr);
15233 mem = replace_equiv_address_nv (mem, addr);
15234 }
15235
15236 switch (model)
15237 {
15238 case MEMMODEL_RELAXED:
15239 case MEMMODEL_CONSUME:
15240 case MEMMODEL_ACQUIRE:
15241 break;
15242 case MEMMODEL_RELEASE:
15243 case MEMMODEL_ACQ_REL:
15244 emit_insn (gen_lwsync ());
15245 break;
15246 case MEMMODEL_SEQ_CST:
15247 emit_insn (gen_hwsync ());
15248 break;
15249 default:
15250 gcc_unreachable ();
15251 }
15252 return mem;
15253 }
15254
15255 static void
15256 rs6000_post_atomic_barrier (enum memmodel model)
15257 {
15258 switch (model)
15259 {
15260 case MEMMODEL_RELAXED:
15261 case MEMMODEL_CONSUME:
15262 case MEMMODEL_RELEASE:
15263 break;
15264 case MEMMODEL_ACQUIRE:
15265 case MEMMODEL_ACQ_REL:
15266 case MEMMODEL_SEQ_CST:
15267 emit_insn (gen_isync ());
15268 break;
15269 default:
15270 gcc_unreachable ();
15271 }
15272 }
15273
15274 /* A subroutine of the various atomic expanders. For sub-word operations,
15275 we must adjust things to operate on SImode. Given the original MEM,
15276 return a new aligned memory. Also build and return the quantities by
15277 which to shift and mask. */
15278
15279 static rtx
15280 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
15281 {
15282 rtx addr, align, shift, mask, mem;
15283 HOST_WIDE_INT shift_mask;
15284 machine_mode mode = GET_MODE (orig_mem);
15285
15286 /* For smaller modes, we have to implement this via SImode. */
15287 shift_mask = (mode == QImode ? 0x18 : 0x10);
15288
15289 addr = XEXP (orig_mem, 0);
15290 addr = force_reg (GET_MODE (addr), addr);
15291
15292 /* Aligned memory containing subword. Generate a new memory. We
15293 do not want any of the existing MEM_ATTR data, as we're now
15294 accessing memory outside the original object. */
15295 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
15296 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15297 mem = gen_rtx_MEM (SImode, align);
15298 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
15299 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
15300 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
15301
15302 /* Shift amount for subword relative to aligned word. */
15303 shift = gen_reg_rtx (SImode);
15304 addr = gen_lowpart (SImode, addr);
15305 rtx tmp = gen_reg_rtx (SImode);
15306 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
15307 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
15308 if (BYTES_BIG_ENDIAN)
15309 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
15310 shift, 1, OPTAB_LIB_WIDEN);
15311 *pshift = shift;
15312
15313 /* Mask for insertion. */
15314 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
15315 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
15316 *pmask = mask;
15317
15318 return mem;
15319 }
15320
15321 /* A subroutine of the various atomic expanders. For sub-word operands,
15322 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
15323
15324 static rtx
15325 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
15326 {
15327 rtx x;
15328
15329 x = gen_reg_rtx (SImode);
15330 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
15331 gen_rtx_NOT (SImode, mask),
15332 oldval)));
15333
15334 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
15335
15336 return x;
15337 }
15338
15339 /* A subroutine of the various atomic expanders. For sub-word operands,
15340 extract WIDE to NARROW via SHIFT. */
15341
15342 static void
15343 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
15344 {
15345 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
15346 wide, 1, OPTAB_LIB_WIDEN);
15347 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
15348 }
15349
15350 /* Expand an atomic compare and swap operation. */
15351
15352 void
15353 rs6000_expand_atomic_compare_and_swap (rtx operands[])
15354 {
15355 rtx boolval, retval, mem, oldval, newval, cond;
15356 rtx label1, label2, x, mask, shift;
15357 machine_mode mode, orig_mode;
15358 enum memmodel mod_s, mod_f;
15359 bool is_weak;
15360
15361 boolval = operands[0];
15362 retval = operands[1];
15363 mem = operands[2];
15364 oldval = operands[3];
15365 newval = operands[4];
15366 is_weak = (INTVAL (operands[5]) != 0);
15367 mod_s = memmodel_base (INTVAL (operands[6]));
15368 mod_f = memmodel_base (INTVAL (operands[7]));
15369 orig_mode = mode = GET_MODE (mem);
15370
15371 mask = shift = NULL_RTX;
15372 if (mode == QImode || mode == HImode)
15373 {
15374 /* Before power8, we didn't have access to lbarx/lharx, so generate a
15375 lwarx and shift/mask operations. With power8, we need to do the
15376 comparison in SImode, but the store is still done in QI/HImode. */
15377 oldval = convert_modes (SImode, mode, oldval, 1);
15378
15379 if (!TARGET_SYNC_HI_QI)
15380 {
15381 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15382
15383 /* Shift and mask OLDVAL into position with the word. */
15384 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
15385 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15386
15387 /* Shift and mask NEWVAL into position within the word. */
15388 newval = convert_modes (SImode, mode, newval, 1);
15389 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
15390 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15391 }
15392
15393 /* Prepare to adjust the return value. */
15394 retval = gen_reg_rtx (SImode);
15395 mode = SImode;
15396 }
15397 else if (reg_overlap_mentioned_p (retval, oldval))
15398 oldval = copy_to_reg (oldval);
15399
15400 if (mode != TImode && !reg_or_short_operand (oldval, mode))
15401 oldval = copy_to_mode_reg (mode, oldval);
15402
15403 if (reg_overlap_mentioned_p (retval, newval))
15404 newval = copy_to_reg (newval);
15405
15406 mem = rs6000_pre_atomic_barrier (mem, mod_s);
15407
15408 label1 = NULL_RTX;
15409 if (!is_weak)
15410 {
15411 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15412 emit_label (XEXP (label1, 0));
15413 }
15414 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15415
15416 emit_load_locked (mode, retval, mem);
15417
15418 x = retval;
15419 if (mask)
15420 x = expand_simple_binop (SImode, AND, retval, mask,
15421 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15422
15423 cond = gen_reg_rtx (CCmode);
15424 /* If we have TImode, synthesize a comparison. */
15425 if (mode != TImode)
15426 x = gen_rtx_COMPARE (CCmode, x, oldval);
15427 else
15428 {
15429 rtx xor1_result = gen_reg_rtx (DImode);
15430 rtx xor2_result = gen_reg_rtx (DImode);
15431 rtx or_result = gen_reg_rtx (DImode);
15432 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
15433 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
15434 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
15435 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
15436
15437 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
15438 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
15439 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
15440 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
15441 }
15442
15443 emit_insn (gen_rtx_SET (cond, x));
15444
15445 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15446 emit_unlikely_jump (x, label2);
15447
15448 x = newval;
15449 if (mask)
15450 x = rs6000_mask_atomic_subword (retval, newval, mask);
15451
15452 emit_store_conditional (orig_mode, cond, mem, x);
15453
15454 if (!is_weak)
15455 {
15456 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15457 emit_unlikely_jump (x, label1);
15458 }
15459
15460 if (!is_mm_relaxed (mod_f))
15461 emit_label (XEXP (label2, 0));
15462
15463 rs6000_post_atomic_barrier (mod_s);
15464
15465 if (is_mm_relaxed (mod_f))
15466 emit_label (XEXP (label2, 0));
15467
15468 if (shift)
15469 rs6000_finish_atomic_subword (operands[1], retval, shift);
15470 else if (mode != GET_MODE (operands[1]))
15471 convert_move (operands[1], retval, 1);
15472
15473 /* In all cases, CR0 contains EQ on success, and NE on failure. */
15474 x = gen_rtx_EQ (SImode, cond, const0_rtx);
15475 emit_insn (gen_rtx_SET (boolval, x));
15476 }
15477
15478 /* Expand an atomic exchange operation. */
15479
15480 void
15481 rs6000_expand_atomic_exchange (rtx operands[])
15482 {
15483 rtx retval, mem, val, cond;
15484 machine_mode mode;
15485 enum memmodel model;
15486 rtx label, x, mask, shift;
15487
15488 retval = operands[0];
15489 mem = operands[1];
15490 val = operands[2];
15491 model = memmodel_base (INTVAL (operands[3]));
15492 mode = GET_MODE (mem);
15493
15494 mask = shift = NULL_RTX;
15495 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
15496 {
15497 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15498
15499 /* Shift and mask VAL into position with the word. */
15500 val = convert_modes (SImode, mode, val, 1);
15501 val = expand_simple_binop (SImode, ASHIFT, val, shift,
15502 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15503
15504 /* Prepare to adjust the return value. */
15505 retval = gen_reg_rtx (SImode);
15506 mode = SImode;
15507 }
15508
15509 mem = rs6000_pre_atomic_barrier (mem, model);
15510
15511 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15512 emit_label (XEXP (label, 0));
15513
15514 emit_load_locked (mode, retval, mem);
15515
15516 x = val;
15517 if (mask)
15518 x = rs6000_mask_atomic_subword (retval, val, mask);
15519
15520 cond = gen_reg_rtx (CCmode);
15521 emit_store_conditional (mode, cond, mem, x);
15522
15523 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15524 emit_unlikely_jump (x, label);
15525
15526 rs6000_post_atomic_barrier (model);
15527
15528 if (shift)
15529 rs6000_finish_atomic_subword (operands[0], retval, shift);
15530 }
15531
15532 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
15533 to perform. MEM is the memory on which to operate. VAL is the second
15534 operand of the binary operator. BEFORE and AFTER are optional locations to
15535 return the value of MEM either before of after the operation. MODEL_RTX
15536 is a CONST_INT containing the memory model to use. */
15537
15538 void
15539 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
15540 rtx orig_before, rtx orig_after, rtx model_rtx)
15541 {
15542 enum memmodel model = memmodel_base (INTVAL (model_rtx));
15543 machine_mode mode = GET_MODE (mem);
15544 machine_mode store_mode = mode;
15545 rtx label, x, cond, mask, shift;
15546 rtx before = orig_before, after = orig_after;
15547
15548 mask = shift = NULL_RTX;
15549 /* On power8, we want to use SImode for the operation. On previous systems,
15550 use the operation in a subword and shift/mask to get the proper byte or
15551 halfword. */
15552 if (mode == QImode || mode == HImode)
15553 {
15554 if (TARGET_SYNC_HI_QI)
15555 {
15556 val = convert_modes (SImode, mode, val, 1);
15557
15558 /* Prepare to adjust the return value. */
15559 before = gen_reg_rtx (SImode);
15560 if (after)
15561 after = gen_reg_rtx (SImode);
15562 mode = SImode;
15563 }
15564 else
15565 {
15566 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15567
15568 /* Shift and mask VAL into position with the word. */
15569 val = convert_modes (SImode, mode, val, 1);
15570 val = expand_simple_binop (SImode, ASHIFT, val, shift,
15571 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15572
15573 switch (code)
15574 {
15575 case IOR:
15576 case XOR:
15577 /* We've already zero-extended VAL. That is sufficient to
15578 make certain that it does not affect other bits. */
15579 mask = NULL;
15580 break;
15581
15582 case AND:
15583 /* If we make certain that all of the other bits in VAL are
15584 set, that will be sufficient to not affect other bits. */
15585 x = gen_rtx_NOT (SImode, mask);
15586 x = gen_rtx_IOR (SImode, x, val);
15587 emit_insn (gen_rtx_SET (val, x));
15588 mask = NULL;
15589 break;
15590
15591 case NOT:
15592 case PLUS:
15593 case MINUS:
15594 /* These will all affect bits outside the field and need
15595 adjustment via MASK within the loop. */
15596 break;
15597
15598 default:
15599 gcc_unreachable ();
15600 }
15601
15602 /* Prepare to adjust the return value. */
15603 before = gen_reg_rtx (SImode);
15604 if (after)
15605 after = gen_reg_rtx (SImode);
15606 store_mode = mode = SImode;
15607 }
15608 }
15609
15610 mem = rs6000_pre_atomic_barrier (mem, model);
15611
15612 label = gen_label_rtx ();
15613 emit_label (label);
15614 label = gen_rtx_LABEL_REF (VOIDmode, label);
15615
15616 if (before == NULL_RTX)
15617 before = gen_reg_rtx (mode);
15618
15619 emit_load_locked (mode, before, mem);
15620
15621 if (code == NOT)
15622 {
15623 x = expand_simple_binop (mode, AND, before, val,
15624 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15625 after = expand_simple_unop (mode, NOT, x, after, 1);
15626 }
15627 else
15628 {
15629 after = expand_simple_binop (mode, code, before, val,
15630 after, 1, OPTAB_LIB_WIDEN);
15631 }
15632
15633 x = after;
15634 if (mask)
15635 {
15636 x = expand_simple_binop (SImode, AND, after, mask,
15637 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15638 x = rs6000_mask_atomic_subword (before, x, mask);
15639 }
15640 else if (store_mode != mode)
15641 x = convert_modes (store_mode, mode, x, 1);
15642
15643 cond = gen_reg_rtx (CCmode);
15644 emit_store_conditional (store_mode, cond, mem, x);
15645
15646 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15647 emit_unlikely_jump (x, label);
15648
15649 rs6000_post_atomic_barrier (model);
15650
15651 if (shift)
15652 {
15653 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
15654 then do the calcuations in a SImode register. */
15655 if (orig_before)
15656 rs6000_finish_atomic_subword (orig_before, before, shift);
15657 if (orig_after)
15658 rs6000_finish_atomic_subword (orig_after, after, shift);
15659 }
15660 else if (store_mode != mode)
15661 {
15662 /* QImode/HImode on machines with lbarx/lharx where we do the native
15663 operation and then do the calcuations in a SImode register. */
15664 if (orig_before)
15665 convert_move (orig_before, before, 1);
15666 if (orig_after)
15667 convert_move (orig_after, after, 1);
15668 }
15669 else if (orig_after && after != orig_after)
15670 emit_move_insn (orig_after, after);
15671 }
15672
15673 /* Emit instructions to move SRC to DST. Called by splitters for
15674 multi-register moves. It will emit at most one instruction for
15675 each register that is accessed; that is, it won't emit li/lis pairs
15676 (or equivalent for 64-bit code). One of SRC or DST must be a hard
15677 register. */
15678
15679 void
15680 rs6000_split_multireg_move (rtx dst, rtx src)
15681 {
15682 /* The register number of the first register being moved. */
15683 int reg;
15684 /* The mode that is to be moved. */
15685 machine_mode mode;
15686 /* The mode that the move is being done in, and its size. */
15687 machine_mode reg_mode;
15688 int reg_mode_size;
15689 /* The number of registers that will be moved. */
15690 int nregs;
15691
15692 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
15693 mode = GET_MODE (dst);
15694 nregs = hard_regno_nregs (reg, mode);
15695 if (FP_REGNO_P (reg))
15696 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
15697 (TARGET_HARD_FLOAT ? DFmode : SFmode);
15698 else if (ALTIVEC_REGNO_P (reg))
15699 reg_mode = V16QImode;
15700 else
15701 reg_mode = word_mode;
15702 reg_mode_size = GET_MODE_SIZE (reg_mode);
15703
15704 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
15705
15706 /* TDmode residing in FP registers is special, since the ISA requires that
15707 the lower-numbered word of a register pair is always the most significant
15708 word, even in little-endian mode. This does not match the usual subreg
15709 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
15710 the appropriate constituent registers "by hand" in little-endian mode.
15711
15712 Note we do not need to check for destructive overlap here since TDmode
15713 can only reside in even/odd register pairs. */
15714 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
15715 {
15716 rtx p_src, p_dst;
15717 int i;
15718
15719 for (i = 0; i < nregs; i++)
15720 {
15721 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
15722 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
15723 else
15724 p_src = simplify_gen_subreg (reg_mode, src, mode,
15725 i * reg_mode_size);
15726
15727 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
15728 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
15729 else
15730 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
15731 i * reg_mode_size);
15732
15733 emit_insn (gen_rtx_SET (p_dst, p_src));
15734 }
15735
15736 return;
15737 }
15738
15739 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
15740 {
15741 /* Move register range backwards, if we might have destructive
15742 overlap. */
15743 int i;
15744 for (i = nregs - 1; i >= 0; i--)
15745 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
15746 i * reg_mode_size),
15747 simplify_gen_subreg (reg_mode, src, mode,
15748 i * reg_mode_size)));
15749 }
15750 else
15751 {
15752 int i;
15753 int j = -1;
15754 bool used_update = false;
15755 rtx restore_basereg = NULL_RTX;
15756
15757 if (MEM_P (src) && INT_REGNO_P (reg))
15758 {
15759 rtx breg;
15760
15761 if (GET_CODE (XEXP (src, 0)) == PRE_INC
15762 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
15763 {
15764 rtx delta_rtx;
15765 breg = XEXP (XEXP (src, 0), 0);
15766 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
15767 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
15768 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
15769 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
15770 src = replace_equiv_address (src, breg);
15771 }
15772 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
15773 {
15774 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
15775 {
15776 rtx basereg = XEXP (XEXP (src, 0), 0);
15777 if (TARGET_UPDATE)
15778 {
15779 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
15780 emit_insn (gen_rtx_SET (ndst,
15781 gen_rtx_MEM (reg_mode,
15782 XEXP (src, 0))));
15783 used_update = true;
15784 }
15785 else
15786 emit_insn (gen_rtx_SET (basereg,
15787 XEXP (XEXP (src, 0), 1)));
15788 src = replace_equiv_address (src, basereg);
15789 }
15790 else
15791 {
15792 rtx basereg = gen_rtx_REG (Pmode, reg);
15793 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
15794 src = replace_equiv_address (src, basereg);
15795 }
15796 }
15797
15798 breg = XEXP (src, 0);
15799 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
15800 breg = XEXP (breg, 0);
15801
15802 /* If the base register we are using to address memory is
15803 also a destination reg, then change that register last. */
15804 if (REG_P (breg)
15805 && REGNO (breg) >= REGNO (dst)
15806 && REGNO (breg) < REGNO (dst) + nregs)
15807 j = REGNO (breg) - REGNO (dst);
15808 }
15809 else if (MEM_P (dst) && INT_REGNO_P (reg))
15810 {
15811 rtx breg;
15812
15813 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
15814 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
15815 {
15816 rtx delta_rtx;
15817 breg = XEXP (XEXP (dst, 0), 0);
15818 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
15819 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
15820 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
15821
15822 /* We have to update the breg before doing the store.
15823 Use store with update, if available. */
15824
15825 if (TARGET_UPDATE)
15826 {
15827 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
15828 emit_insn (TARGET_32BIT
15829 ? (TARGET_POWERPC64
15830 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
15831 : gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
15832 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
15833 used_update = true;
15834 }
15835 else
15836 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
15837 dst = replace_equiv_address (dst, breg);
15838 }
15839 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
15840 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
15841 {
15842 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
15843 {
15844 rtx basereg = XEXP (XEXP (dst, 0), 0);
15845 if (TARGET_UPDATE)
15846 {
15847 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
15848 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
15849 XEXP (dst, 0)),
15850 nsrc));
15851 used_update = true;
15852 }
15853 else
15854 emit_insn (gen_rtx_SET (basereg,
15855 XEXP (XEXP (dst, 0), 1)));
15856 dst = replace_equiv_address (dst, basereg);
15857 }
15858 else
15859 {
15860 rtx basereg = XEXP (XEXP (dst, 0), 0);
15861 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
15862 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
15863 && REG_P (basereg)
15864 && REG_P (offsetreg)
15865 && REGNO (basereg) != REGNO (offsetreg));
15866 if (REGNO (basereg) == 0)
15867 {
15868 rtx tmp = offsetreg;
15869 offsetreg = basereg;
15870 basereg = tmp;
15871 }
15872 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
15873 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
15874 dst = replace_equiv_address (dst, basereg);
15875 }
15876 }
15877 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
15878 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
15879 }
15880
15881 for (i = 0; i < nregs; i++)
15882 {
15883 /* Calculate index to next subword. */
15884 ++j;
15885 if (j == nregs)
15886 j = 0;
15887
15888 /* If compiler already emitted move of first word by
15889 store with update, no need to do anything. */
15890 if (j == 0 && used_update)
15891 continue;
15892
15893 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
15894 j * reg_mode_size),
15895 simplify_gen_subreg (reg_mode, src, mode,
15896 j * reg_mode_size)));
15897 }
15898 if (restore_basereg != NULL_RTX)
15899 emit_insn (restore_basereg);
15900 }
15901 }
15902
15903 static GTY(()) alias_set_type TOC_alias_set = -1;
15904
15905 alias_set_type
15906 get_TOC_alias_set (void)
15907 {
15908 if (TOC_alias_set == -1)
15909 TOC_alias_set = new_alias_set ();
15910 return TOC_alias_set;
15911 }
15912
15913 /* The mode the ABI uses for a word. This is not the same as word_mode
15914 for -m32 -mpowerpc64. This is used to implement various target hooks. */
15915
15916 static scalar_int_mode
15917 rs6000_abi_word_mode (void)
15918 {
15919 return TARGET_32BIT ? SImode : DImode;
15920 }
15921
15922 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
15923 static char *
15924 rs6000_offload_options (void)
15925 {
15926 if (TARGET_64BIT)
15927 return xstrdup ("-foffload-abi=lp64");
15928 else
15929 return xstrdup ("-foffload-abi=ilp32");
15930 }
15931
15932 \f
15933 /* A quick summary of the various types of 'constant-pool tables'
15934 under PowerPC:
15935
15936 Target Flags Name One table per
15937 AIX (none) AIX TOC object file
15938 AIX -mfull-toc AIX TOC object file
15939 AIX -mminimal-toc AIX minimal TOC translation unit
15940 SVR4/EABI (none) SVR4 SDATA object file
15941 SVR4/EABI -fpic SVR4 pic object file
15942 SVR4/EABI -fPIC SVR4 PIC translation unit
15943 SVR4/EABI -mrelocatable EABI TOC function
15944 SVR4/EABI -maix AIX TOC object file
15945 SVR4/EABI -maix -mminimal-toc
15946 AIX minimal TOC translation unit
15947
15948 Name Reg. Set by entries contains:
15949 made by addrs? fp? sum?
15950
15951 AIX TOC 2 crt0 as Y option option
15952 AIX minimal TOC 30 prolog gcc Y Y option
15953 SVR4 SDATA 13 crt0 gcc N Y N
15954 SVR4 pic 30 prolog ld Y not yet N
15955 SVR4 PIC 30 prolog gcc Y option option
15956 EABI TOC 30 prolog gcc Y option option
15957
15958 */
15959
15960 /* Hash functions for the hash table. */
15961
15962 static unsigned
15963 rs6000_hash_constant (rtx k)
15964 {
15965 enum rtx_code code = GET_CODE (k);
15966 machine_mode mode = GET_MODE (k);
15967 unsigned result = (code << 3) ^ mode;
15968 const char *format;
15969 int flen, fidx;
15970
15971 format = GET_RTX_FORMAT (code);
15972 flen = strlen (format);
15973 fidx = 0;
15974
15975 switch (code)
15976 {
15977 case LABEL_REF:
15978 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
15979
15980 case CONST_WIDE_INT:
15981 {
15982 int i;
15983 flen = CONST_WIDE_INT_NUNITS (k);
15984 for (i = 0; i < flen; i++)
15985 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
15986 return result;
15987 }
15988
15989 case CONST_DOUBLE:
15990 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
15991
15992 case CODE_LABEL:
15993 fidx = 3;
15994 break;
15995
15996 default:
15997 break;
15998 }
15999
16000 for (; fidx < flen; fidx++)
16001 switch (format[fidx])
16002 {
16003 case 's':
16004 {
16005 unsigned i, len;
16006 const char *str = XSTR (k, fidx);
16007 len = strlen (str);
16008 result = result * 613 + len;
16009 for (i = 0; i < len; i++)
16010 result = result * 613 + (unsigned) str[i];
16011 break;
16012 }
16013 case 'u':
16014 case 'e':
16015 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
16016 break;
16017 case 'i':
16018 case 'n':
16019 result = result * 613 + (unsigned) XINT (k, fidx);
16020 break;
16021 case 'w':
16022 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
16023 result = result * 613 + (unsigned) XWINT (k, fidx);
16024 else
16025 {
16026 size_t i;
16027 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
16028 result = result * 613 + (unsigned) (XWINT (k, fidx)
16029 >> CHAR_BIT * i);
16030 }
16031 break;
16032 case '0':
16033 break;
16034 default:
16035 gcc_unreachable ();
16036 }
16037
16038 return result;
16039 }
16040
16041 hashval_t
16042 toc_hasher::hash (toc_hash_struct *thc)
16043 {
16044 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
16045 }
16046
16047 /* Compare H1 and H2 for equivalence. */
16048
16049 bool
16050 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
16051 {
16052 rtx r1 = h1->key;
16053 rtx r2 = h2->key;
16054
16055 if (h1->key_mode != h2->key_mode)
16056 return 0;
16057
16058 return rtx_equal_p (r1, r2);
16059 }
16060
16061 /* These are the names given by the C++ front-end to vtables, and
16062 vtable-like objects. Ideally, this logic should not be here;
16063 instead, there should be some programmatic way of inquiring as
16064 to whether or not an object is a vtable. */
16065
16066 #define VTABLE_NAME_P(NAME) \
16067 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
16068 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
16069 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
16070 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
16071 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
16072
16073 #ifdef NO_DOLLAR_IN_LABEL
16074 /* Return a GGC-allocated character string translating dollar signs in
16075 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
16076
16077 const char *
16078 rs6000_xcoff_strip_dollar (const char *name)
16079 {
16080 char *strip, *p;
16081 const char *q;
16082 size_t len;
16083
16084 q = (const char *) strchr (name, '$');
16085
16086 if (q == 0 || q == name)
16087 return name;
16088
16089 len = strlen (name);
16090 strip = XALLOCAVEC (char, len + 1);
16091 strcpy (strip, name);
16092 p = strip + (q - name);
16093 while (p)
16094 {
16095 *p = '_';
16096 p = strchr (p + 1, '$');
16097 }
16098
16099 return ggc_alloc_string (strip, len);
16100 }
16101 #endif
16102
16103 void
16104 rs6000_output_symbol_ref (FILE *file, rtx x)
16105 {
16106 const char *name = XSTR (x, 0);
16107
16108 /* Currently C++ toc references to vtables can be emitted before it
16109 is decided whether the vtable is public or private. If this is
16110 the case, then the linker will eventually complain that there is
16111 a reference to an unknown section. Thus, for vtables only,
16112 we emit the TOC reference to reference the identifier and not the
16113 symbol. */
16114 if (VTABLE_NAME_P (name))
16115 {
16116 RS6000_OUTPUT_BASENAME (file, name);
16117 }
16118 else
16119 assemble_name (file, name);
16120 }
16121
16122 /* Output a TOC entry. We derive the entry name from what is being
16123 written. */
16124
16125 void
16126 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
16127 {
16128 char buf[256];
16129 const char *name = buf;
16130 rtx base = x;
16131 HOST_WIDE_INT offset = 0;
16132
16133 gcc_assert (!TARGET_NO_TOC_OR_PCREL);
16134
16135 /* When the linker won't eliminate them, don't output duplicate
16136 TOC entries (this happens on AIX if there is any kind of TOC,
16137 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
16138 CODE_LABELs. */
16139 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
16140 {
16141 struct toc_hash_struct *h;
16142
16143 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
16144 time because GGC is not initialized at that point. */
16145 if (toc_hash_table == NULL)
16146 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
16147
16148 h = ggc_alloc<toc_hash_struct> ();
16149 h->key = x;
16150 h->key_mode = mode;
16151 h->labelno = labelno;
16152
16153 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
16154 if (*found == NULL)
16155 *found = h;
16156 else /* This is indeed a duplicate.
16157 Set this label equal to that label. */
16158 {
16159 fputs ("\t.set ", file);
16160 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
16161 fprintf (file, "%d,", labelno);
16162 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
16163 fprintf (file, "%d\n", ((*found)->labelno));
16164
16165 #ifdef HAVE_AS_TLS
16166 if (TARGET_XCOFF && SYMBOL_REF_P (x)
16167 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
16168 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
16169 {
16170 fputs ("\t.set ", file);
16171 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
16172 fprintf (file, "%d,", labelno);
16173 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
16174 fprintf (file, "%d\n", ((*found)->labelno));
16175 }
16176 #endif
16177 return;
16178 }
16179 }
16180
16181 /* If we're going to put a double constant in the TOC, make sure it's
16182 aligned properly when strict alignment is on. */
16183 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
16184 && STRICT_ALIGNMENT
16185 && GET_MODE_BITSIZE (mode) >= 64
16186 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
16187 ASM_OUTPUT_ALIGN (file, 3);
16188 }
16189
16190 (*targetm.asm_out.internal_label) (file, "LC", labelno);
16191
16192 /* Handle FP constants specially. Note that if we have a minimal
16193 TOC, things we put here aren't actually in the TOC, so we can allow
16194 FP constants. */
16195 if (CONST_DOUBLE_P (x)
16196 && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
16197 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
16198 {
16199 long k[4];
16200
16201 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16202 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
16203 else
16204 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
16205
16206 if (TARGET_64BIT)
16207 {
16208 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16209 fputs (DOUBLE_INT_ASM_OP, file);
16210 else
16211 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
16212 k[0] & 0xffffffff, k[1] & 0xffffffff,
16213 k[2] & 0xffffffff, k[3] & 0xffffffff);
16214 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
16215 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
16216 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
16217 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
16218 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
16219 return;
16220 }
16221 else
16222 {
16223 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16224 fputs ("\t.long ", file);
16225 else
16226 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
16227 k[0] & 0xffffffff, k[1] & 0xffffffff,
16228 k[2] & 0xffffffff, k[3] & 0xffffffff);
16229 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
16230 k[0] & 0xffffffff, k[1] & 0xffffffff,
16231 k[2] & 0xffffffff, k[3] & 0xffffffff);
16232 return;
16233 }
16234 }
16235 else if (CONST_DOUBLE_P (x)
16236 && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
16237 {
16238 long k[2];
16239
16240 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16241 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
16242 else
16243 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
16244
16245 if (TARGET_64BIT)
16246 {
16247 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16248 fputs (DOUBLE_INT_ASM_OP, file);
16249 else
16250 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
16251 k[0] & 0xffffffff, k[1] & 0xffffffff);
16252 fprintf (file, "0x%lx%08lx\n",
16253 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
16254 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
16255 return;
16256 }
16257 else
16258 {
16259 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16260 fputs ("\t.long ", file);
16261 else
16262 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
16263 k[0] & 0xffffffff, k[1] & 0xffffffff);
16264 fprintf (file, "0x%lx,0x%lx\n",
16265 k[0] & 0xffffffff, k[1] & 0xffffffff);
16266 return;
16267 }
16268 }
16269 else if (CONST_DOUBLE_P (x)
16270 && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
16271 {
16272 long l;
16273
16274 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16275 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
16276 else
16277 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
16278
16279 if (TARGET_64BIT)
16280 {
16281 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16282 fputs (DOUBLE_INT_ASM_OP, file);
16283 else
16284 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
16285 if (WORDS_BIG_ENDIAN)
16286 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
16287 else
16288 fprintf (file, "0x%lx\n", l & 0xffffffff);
16289 return;
16290 }
16291 else
16292 {
16293 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16294 fputs ("\t.long ", file);
16295 else
16296 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
16297 fprintf (file, "0x%lx\n", l & 0xffffffff);
16298 return;
16299 }
16300 }
16301 else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
16302 {
16303 unsigned HOST_WIDE_INT low;
16304 HOST_WIDE_INT high;
16305
16306 low = INTVAL (x) & 0xffffffff;
16307 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
16308
16309 /* TOC entries are always Pmode-sized, so when big-endian
16310 smaller integer constants in the TOC need to be padded.
16311 (This is still a win over putting the constants in
16312 a separate constant pool, because then we'd have
16313 to have both a TOC entry _and_ the actual constant.)
16314
16315 For a 32-bit target, CONST_INT values are loaded and shifted
16316 entirely within `low' and can be stored in one TOC entry. */
16317
16318 /* It would be easy to make this work, but it doesn't now. */
16319 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
16320
16321 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
16322 {
16323 low |= high << 32;
16324 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
16325 high = (HOST_WIDE_INT) low >> 32;
16326 low &= 0xffffffff;
16327 }
16328
16329 if (TARGET_64BIT)
16330 {
16331 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16332 fputs (DOUBLE_INT_ASM_OP, file);
16333 else
16334 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
16335 (long) high & 0xffffffff, (long) low & 0xffffffff);
16336 fprintf (file, "0x%lx%08lx\n",
16337 (long) high & 0xffffffff, (long) low & 0xffffffff);
16338 return;
16339 }
16340 else
16341 {
16342 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
16343 {
16344 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16345 fputs ("\t.long ", file);
16346 else
16347 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
16348 (long) high & 0xffffffff, (long) low & 0xffffffff);
16349 fprintf (file, "0x%lx,0x%lx\n",
16350 (long) high & 0xffffffff, (long) low & 0xffffffff);
16351 }
16352 else
16353 {
16354 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16355 fputs ("\t.long ", file);
16356 else
16357 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
16358 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
16359 }
16360 return;
16361 }
16362 }
16363
16364 if (GET_CODE (x) == CONST)
16365 {
16366 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
16367 && CONST_INT_P (XEXP (XEXP (x, 0), 1)));
16368
16369 base = XEXP (XEXP (x, 0), 0);
16370 offset = INTVAL (XEXP (XEXP (x, 0), 1));
16371 }
16372
16373 switch (GET_CODE (base))
16374 {
16375 case SYMBOL_REF:
16376 name = XSTR (base, 0);
16377 break;
16378
16379 case LABEL_REF:
16380 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
16381 CODE_LABEL_NUMBER (XEXP (base, 0)));
16382 break;
16383
16384 case CODE_LABEL:
16385 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
16386 break;
16387
16388 default:
16389 gcc_unreachable ();
16390 }
16391
16392 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16393 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
16394 else
16395 {
16396 fputs ("\t.tc ", file);
16397 RS6000_OUTPUT_BASENAME (file, name);
16398
16399 if (offset < 0)
16400 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
16401 else if (offset)
16402 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
16403
16404 /* Mark large TOC symbols on AIX with [TE] so they are mapped
16405 after other TOC symbols, reducing overflow of small TOC access
16406 to [TC] symbols. */
16407 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
16408 ? "[TE]," : "[TC],", file);
16409 }
16410
16411 /* Currently C++ toc references to vtables can be emitted before it
16412 is decided whether the vtable is public or private. If this is
16413 the case, then the linker will eventually complain that there is
16414 a TOC reference to an unknown section. Thus, for vtables only,
16415 we emit the TOC reference to reference the symbol and not the
16416 section. */
16417 if (VTABLE_NAME_P (name))
16418 {
16419 RS6000_OUTPUT_BASENAME (file, name);
16420 if (offset < 0)
16421 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
16422 else if (offset > 0)
16423 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
16424 }
16425 else
16426 output_addr_const (file, x);
16427
16428 #if HAVE_AS_TLS
16429 if (TARGET_XCOFF && SYMBOL_REF_P (base))
16430 {
16431 switch (SYMBOL_REF_TLS_MODEL (base))
16432 {
16433 case 0:
16434 break;
16435 case TLS_MODEL_LOCAL_EXEC:
16436 fputs ("@le", file);
16437 break;
16438 case TLS_MODEL_INITIAL_EXEC:
16439 fputs ("@ie", file);
16440 break;
16441 /* Use global-dynamic for local-dynamic. */
16442 case TLS_MODEL_GLOBAL_DYNAMIC:
16443 case TLS_MODEL_LOCAL_DYNAMIC:
16444 putc ('\n', file);
16445 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
16446 fputs ("\t.tc .", file);
16447 RS6000_OUTPUT_BASENAME (file, name);
16448 fputs ("[TC],", file);
16449 output_addr_const (file, x);
16450 fputs ("@m", file);
16451 break;
16452 default:
16453 gcc_unreachable ();
16454 }
16455 }
16456 #endif
16457
16458 putc ('\n', file);
16459 }
16460 \f
16461 /* Output an assembler pseudo-op to write an ASCII string of N characters
16462 starting at P to FILE.
16463
16464 On the RS/6000, we have to do this using the .byte operation and
16465 write out special characters outside the quoted string.
16466 Also, the assembler is broken; very long strings are truncated,
16467 so we must artificially break them up early. */
16468
16469 void
16470 output_ascii (FILE *file, const char *p, int n)
16471 {
16472 char c;
16473 int i, count_string;
16474 const char *for_string = "\t.byte \"";
16475 const char *for_decimal = "\t.byte ";
16476 const char *to_close = NULL;
16477
16478 count_string = 0;
16479 for (i = 0; i < n; i++)
16480 {
16481 c = *p++;
16482 if (c >= ' ' && c < 0177)
16483 {
16484 if (for_string)
16485 fputs (for_string, file);
16486 putc (c, file);
16487
16488 /* Write two quotes to get one. */
16489 if (c == '"')
16490 {
16491 putc (c, file);
16492 ++count_string;
16493 }
16494
16495 for_string = NULL;
16496 for_decimal = "\"\n\t.byte ";
16497 to_close = "\"\n";
16498 ++count_string;
16499
16500 if (count_string >= 512)
16501 {
16502 fputs (to_close, file);
16503
16504 for_string = "\t.byte \"";
16505 for_decimal = "\t.byte ";
16506 to_close = NULL;
16507 count_string = 0;
16508 }
16509 }
16510 else
16511 {
16512 if (for_decimal)
16513 fputs (for_decimal, file);
16514 fprintf (file, "%d", c);
16515
16516 for_string = "\n\t.byte \"";
16517 for_decimal = ", ";
16518 to_close = "\n";
16519 count_string = 0;
16520 }
16521 }
16522
16523 /* Now close the string if we have written one. Then end the line. */
16524 if (to_close)
16525 fputs (to_close, file);
16526 }
16527 \f
16528 /* Generate a unique section name for FILENAME for a section type
16529 represented by SECTION_DESC. Output goes into BUF.
16530
16531 SECTION_DESC can be any string, as long as it is different for each
16532 possible section type.
16533
16534 We name the section in the same manner as xlc. The name begins with an
16535 underscore followed by the filename (after stripping any leading directory
16536 names) with the last period replaced by the string SECTION_DESC. If
16537 FILENAME does not contain a period, SECTION_DESC is appended to the end of
16538 the name. */
16539
16540 void
16541 rs6000_gen_section_name (char **buf, const char *filename,
16542 const char *section_desc)
16543 {
16544 const char *q, *after_last_slash, *last_period = 0;
16545 char *p;
16546 int len;
16547
16548 after_last_slash = filename;
16549 for (q = filename; *q; q++)
16550 {
16551 if (*q == '/')
16552 after_last_slash = q + 1;
16553 else if (*q == '.')
16554 last_period = q;
16555 }
16556
16557 len = strlen (after_last_slash) + strlen (section_desc) + 2;
16558 *buf = (char *) xmalloc (len);
16559
16560 p = *buf;
16561 *p++ = '_';
16562
16563 for (q = after_last_slash; *q; q++)
16564 {
16565 if (q == last_period)
16566 {
16567 strcpy (p, section_desc);
16568 p += strlen (section_desc);
16569 break;
16570 }
16571
16572 else if (ISALNUM (*q))
16573 *p++ = *q;
16574 }
16575
16576 if (last_period == 0)
16577 strcpy (p, section_desc);
16578 else
16579 *p = '\0';
16580 }
16581 \f
16582 /* Emit profile function. */
16583
16584 void
16585 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
16586 {
16587 /* Non-standard profiling for kernels, which just saves LR then calls
16588 _mcount without worrying about arg saves. The idea is to change
16589 the function prologue as little as possible as it isn't easy to
16590 account for arg save/restore code added just for _mcount. */
16591 if (TARGET_PROFILE_KERNEL)
16592 return;
16593
16594 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
16595 {
16596 #ifndef NO_PROFILE_COUNTERS
16597 # define NO_PROFILE_COUNTERS 0
16598 #endif
16599 if (NO_PROFILE_COUNTERS)
16600 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
16601 LCT_NORMAL, VOIDmode);
16602 else
16603 {
16604 char buf[30];
16605 const char *label_name;
16606 rtx fun;
16607
16608 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
16609 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
16610 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
16611
16612 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
16613 LCT_NORMAL, VOIDmode, fun, Pmode);
16614 }
16615 }
16616 else if (DEFAULT_ABI == ABI_DARWIN)
16617 {
16618 const char *mcount_name = RS6000_MCOUNT;
16619 int caller_addr_regno = LR_REGNO;
16620
16621 /* Be conservative and always set this, at least for now. */
16622 crtl->uses_pic_offset_table = 1;
16623
16624 #if TARGET_MACHO
16625 /* For PIC code, set up a stub and collect the caller's address
16626 from r0, which is where the prologue puts it. */
16627 if (MACHOPIC_INDIRECT
16628 && crtl->uses_pic_offset_table)
16629 caller_addr_regno = 0;
16630 #endif
16631 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
16632 LCT_NORMAL, VOIDmode,
16633 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
16634 }
16635 }
16636
16637 /* Write function profiler code. */
16638
16639 void
16640 output_function_profiler (FILE *file, int labelno)
16641 {
16642 char buf[100];
16643
16644 switch (DEFAULT_ABI)
16645 {
16646 default:
16647 gcc_unreachable ();
16648
16649 case ABI_V4:
16650 if (!TARGET_32BIT)
16651 {
16652 warning (0, "no profiling of 64-bit code for this ABI");
16653 return;
16654 }
16655 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
16656 fprintf (file, "\tmflr %s\n", reg_names[0]);
16657 if (NO_PROFILE_COUNTERS)
16658 {
16659 asm_fprintf (file, "\tstw %s,4(%s)\n",
16660 reg_names[0], reg_names[1]);
16661 }
16662 else if (TARGET_SECURE_PLT && flag_pic)
16663 {
16664 if (TARGET_LINK_STACK)
16665 {
16666 char name[32];
16667 get_ppc476_thunk_name (name);
16668 asm_fprintf (file, "\tbl %s\n", name);
16669 }
16670 else
16671 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
16672 asm_fprintf (file, "\tstw %s,4(%s)\n",
16673 reg_names[0], reg_names[1]);
16674 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
16675 asm_fprintf (file, "\taddis %s,%s,",
16676 reg_names[12], reg_names[12]);
16677 assemble_name (file, buf);
16678 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
16679 assemble_name (file, buf);
16680 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
16681 }
16682 else if (flag_pic == 1)
16683 {
16684 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
16685 asm_fprintf (file, "\tstw %s,4(%s)\n",
16686 reg_names[0], reg_names[1]);
16687 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
16688 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
16689 assemble_name (file, buf);
16690 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
16691 }
16692 else if (flag_pic > 1)
16693 {
16694 asm_fprintf (file, "\tstw %s,4(%s)\n",
16695 reg_names[0], reg_names[1]);
16696 /* Now, we need to get the address of the label. */
16697 if (TARGET_LINK_STACK)
16698 {
16699 char name[32];
16700 get_ppc476_thunk_name (name);
16701 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
16702 assemble_name (file, buf);
16703 fputs ("-.\n1:", file);
16704 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
16705 asm_fprintf (file, "\taddi %s,%s,4\n",
16706 reg_names[11], reg_names[11]);
16707 }
16708 else
16709 {
16710 fputs ("\tbcl 20,31,1f\n\t.long ", file);
16711 assemble_name (file, buf);
16712 fputs ("-.\n1:", file);
16713 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
16714 }
16715 asm_fprintf (file, "\tlwz %s,0(%s)\n",
16716 reg_names[0], reg_names[11]);
16717 asm_fprintf (file, "\tadd %s,%s,%s\n",
16718 reg_names[0], reg_names[0], reg_names[11]);
16719 }
16720 else
16721 {
16722 asm_fprintf (file, "\tlis %s,", reg_names[12]);
16723 assemble_name (file, buf);
16724 fputs ("@ha\n", file);
16725 asm_fprintf (file, "\tstw %s,4(%s)\n",
16726 reg_names[0], reg_names[1]);
16727 asm_fprintf (file, "\tla %s,", reg_names[0]);
16728 assemble_name (file, buf);
16729 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
16730 }
16731
16732 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
16733 fprintf (file, "\tbl %s%s\n",
16734 RS6000_MCOUNT, flag_pic ? "@plt" : "");
16735 break;
16736
16737 case ABI_AIX:
16738 case ABI_ELFv2:
16739 case ABI_DARWIN:
16740 /* Don't do anything, done in output_profile_hook (). */
16741 break;
16742 }
16743 }
16744
16745 \f
16746
16747 /* The following variable value is the last issued insn. */
16748
16749 static rtx_insn *last_scheduled_insn;
16750
16751 /* The following variable helps to balance issuing of load and
16752 store instructions */
16753
16754 static int load_store_pendulum;
16755
16756 /* The following variable helps pair divide insns during scheduling. */
16757 static int divide_cnt;
16758 /* The following variable helps pair and alternate vector and vector load
16759 insns during scheduling. */
16760 static int vec_pairing;
16761
16762
16763 /* Power4 load update and store update instructions are cracked into a
16764 load or store and an integer insn which are executed in the same cycle.
16765 Branches have their own dispatch slot which does not count against the
16766 GCC issue rate, but it changes the program flow so there are no other
16767 instructions to issue in this cycle. */
16768
16769 static int
16770 rs6000_variable_issue_1 (rtx_insn *insn, int more)
16771 {
16772 last_scheduled_insn = insn;
16773 if (GET_CODE (PATTERN (insn)) == USE
16774 || GET_CODE (PATTERN (insn)) == CLOBBER)
16775 {
16776 cached_can_issue_more = more;
16777 return cached_can_issue_more;
16778 }
16779
16780 if (insn_terminates_group_p (insn, current_group))
16781 {
16782 cached_can_issue_more = 0;
16783 return cached_can_issue_more;
16784 }
16785
16786 /* If no reservation, but reach here */
16787 if (recog_memoized (insn) < 0)
16788 return more;
16789
16790 if (rs6000_sched_groups)
16791 {
16792 if (is_microcoded_insn (insn))
16793 cached_can_issue_more = 0;
16794 else if (is_cracked_insn (insn))
16795 cached_can_issue_more = more > 2 ? more - 2 : 0;
16796 else
16797 cached_can_issue_more = more - 1;
16798
16799 return cached_can_issue_more;
16800 }
16801
16802 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
16803 return 0;
16804
16805 cached_can_issue_more = more - 1;
16806 return cached_can_issue_more;
16807 }
16808
16809 static int
16810 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
16811 {
16812 int r = rs6000_variable_issue_1 (insn, more);
16813 if (verbose)
16814 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
16815 return r;
16816 }
16817
16818 /* Adjust the cost of a scheduling dependency. Return the new cost of
16819 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
16820
16821 static int
16822 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
16823 unsigned int)
16824 {
16825 enum attr_type attr_type;
16826
16827 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
16828 return cost;
16829
16830 switch (dep_type)
16831 {
16832 case REG_DEP_TRUE:
16833 {
16834 /* Data dependency; DEP_INSN writes a register that INSN reads
16835 some cycles later. */
16836
16837 /* Separate a load from a narrower, dependent store. */
16838 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9
16839 || rs6000_tune == PROCESSOR_FUTURE)
16840 && GET_CODE (PATTERN (insn)) == SET
16841 && GET_CODE (PATTERN (dep_insn)) == SET
16842 && MEM_P (XEXP (PATTERN (insn), 1))
16843 && MEM_P (XEXP (PATTERN (dep_insn), 0))
16844 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
16845 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
16846 return cost + 14;
16847
16848 attr_type = get_attr_type (insn);
16849
16850 switch (attr_type)
16851 {
16852 case TYPE_JMPREG:
16853 /* Tell the first scheduling pass about the latency between
16854 a mtctr and bctr (and mtlr and br/blr). The first
16855 scheduling pass will not know about this latency since
16856 the mtctr instruction, which has the latency associated
16857 to it, will be generated by reload. */
16858 return 4;
16859 case TYPE_BRANCH:
16860 /* Leave some extra cycles between a compare and its
16861 dependent branch, to inhibit expensive mispredicts. */
16862 if ((rs6000_tune == PROCESSOR_PPC603
16863 || rs6000_tune == PROCESSOR_PPC604
16864 || rs6000_tune == PROCESSOR_PPC604e
16865 || rs6000_tune == PROCESSOR_PPC620
16866 || rs6000_tune == PROCESSOR_PPC630
16867 || rs6000_tune == PROCESSOR_PPC750
16868 || rs6000_tune == PROCESSOR_PPC7400
16869 || rs6000_tune == PROCESSOR_PPC7450
16870 || rs6000_tune == PROCESSOR_PPCE5500
16871 || rs6000_tune == PROCESSOR_PPCE6500
16872 || rs6000_tune == PROCESSOR_POWER4
16873 || rs6000_tune == PROCESSOR_POWER5
16874 || rs6000_tune == PROCESSOR_POWER7
16875 || rs6000_tune == PROCESSOR_POWER8
16876 || rs6000_tune == PROCESSOR_POWER9
16877 || rs6000_tune == PROCESSOR_FUTURE
16878 || rs6000_tune == PROCESSOR_CELL)
16879 && recog_memoized (dep_insn)
16880 && (INSN_CODE (dep_insn) >= 0))
16881
16882 switch (get_attr_type (dep_insn))
16883 {
16884 case TYPE_CMP:
16885 case TYPE_FPCOMPARE:
16886 case TYPE_CR_LOGICAL:
16887 return cost + 2;
16888 case TYPE_EXTS:
16889 case TYPE_MUL:
16890 if (get_attr_dot (dep_insn) == DOT_YES)
16891 return cost + 2;
16892 else
16893 break;
16894 case TYPE_SHIFT:
16895 if (get_attr_dot (dep_insn) == DOT_YES
16896 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
16897 return cost + 2;
16898 else
16899 break;
16900 default:
16901 break;
16902 }
16903 break;
16904
16905 case TYPE_STORE:
16906 case TYPE_FPSTORE:
16907 if ((rs6000_tune == PROCESSOR_POWER6)
16908 && recog_memoized (dep_insn)
16909 && (INSN_CODE (dep_insn) >= 0))
16910 {
16911
16912 if (GET_CODE (PATTERN (insn)) != SET)
16913 /* If this happens, we have to extend this to schedule
16914 optimally. Return default for now. */
16915 return cost;
16916
16917 /* Adjust the cost for the case where the value written
16918 by a fixed point operation is used as the address
16919 gen value on a store. */
16920 switch (get_attr_type (dep_insn))
16921 {
16922 case TYPE_LOAD:
16923 case TYPE_CNTLZ:
16924 {
16925 if (! rs6000_store_data_bypass_p (dep_insn, insn))
16926 return get_attr_sign_extend (dep_insn)
16927 == SIGN_EXTEND_YES ? 6 : 4;
16928 break;
16929 }
16930 case TYPE_SHIFT:
16931 {
16932 if (! rs6000_store_data_bypass_p (dep_insn, insn))
16933 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
16934 6 : 3;
16935 break;
16936 }
16937 case TYPE_INTEGER:
16938 case TYPE_ADD:
16939 case TYPE_LOGICAL:
16940 case TYPE_EXTS:
16941 case TYPE_INSERT:
16942 {
16943 if (! rs6000_store_data_bypass_p (dep_insn, insn))
16944 return 3;
16945 break;
16946 }
16947 case TYPE_STORE:
16948 case TYPE_FPLOAD:
16949 case TYPE_FPSTORE:
16950 {
16951 if (get_attr_update (dep_insn) == UPDATE_YES
16952 && ! rs6000_store_data_bypass_p (dep_insn, insn))
16953 return 3;
16954 break;
16955 }
16956 case TYPE_MUL:
16957 {
16958 if (! rs6000_store_data_bypass_p (dep_insn, insn))
16959 return 17;
16960 break;
16961 }
16962 case TYPE_DIV:
16963 {
16964 if (! rs6000_store_data_bypass_p (dep_insn, insn))
16965 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
16966 break;
16967 }
16968 default:
16969 break;
16970 }
16971 }
16972 break;
16973
16974 case TYPE_LOAD:
16975 if ((rs6000_tune == PROCESSOR_POWER6)
16976 && recog_memoized (dep_insn)
16977 && (INSN_CODE (dep_insn) >= 0))
16978 {
16979
16980 /* Adjust the cost for the case where the value written
16981 by a fixed point instruction is used within the address
16982 gen portion of a subsequent load(u)(x) */
16983 switch (get_attr_type (dep_insn))
16984 {
16985 case TYPE_LOAD:
16986 case TYPE_CNTLZ:
16987 {
16988 if (set_to_load_agen (dep_insn, insn))
16989 return get_attr_sign_extend (dep_insn)
16990 == SIGN_EXTEND_YES ? 6 : 4;
16991 break;
16992 }
16993 case TYPE_SHIFT:
16994 {
16995 if (set_to_load_agen (dep_insn, insn))
16996 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
16997 6 : 3;
16998 break;
16999 }
17000 case TYPE_INTEGER:
17001 case TYPE_ADD:
17002 case TYPE_LOGICAL:
17003 case TYPE_EXTS:
17004 case TYPE_INSERT:
17005 {
17006 if (set_to_load_agen (dep_insn, insn))
17007 return 3;
17008 break;
17009 }
17010 case TYPE_STORE:
17011 case TYPE_FPLOAD:
17012 case TYPE_FPSTORE:
17013 {
17014 if (get_attr_update (dep_insn) == UPDATE_YES
17015 && set_to_load_agen (dep_insn, insn))
17016 return 3;
17017 break;
17018 }
17019 case TYPE_MUL:
17020 {
17021 if (set_to_load_agen (dep_insn, insn))
17022 return 17;
17023 break;
17024 }
17025 case TYPE_DIV:
17026 {
17027 if (set_to_load_agen (dep_insn, insn))
17028 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17029 break;
17030 }
17031 default:
17032 break;
17033 }
17034 }
17035 break;
17036
17037 case TYPE_FPLOAD:
17038 if ((rs6000_tune == PROCESSOR_POWER6)
17039 && get_attr_update (insn) == UPDATE_NO
17040 && recog_memoized (dep_insn)
17041 && (INSN_CODE (dep_insn) >= 0)
17042 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
17043 return 2;
17044
17045 default:
17046 break;
17047 }
17048
17049 /* Fall out to return default cost. */
17050 }
17051 break;
17052
17053 case REG_DEP_OUTPUT:
17054 /* Output dependency; DEP_INSN writes a register that INSN writes some
17055 cycles later. */
17056 if ((rs6000_tune == PROCESSOR_POWER6)
17057 && recog_memoized (dep_insn)
17058 && (INSN_CODE (dep_insn) >= 0))
17059 {
17060 attr_type = get_attr_type (insn);
17061
17062 switch (attr_type)
17063 {
17064 case TYPE_FP:
17065 case TYPE_FPSIMPLE:
17066 if (get_attr_type (dep_insn) == TYPE_FP
17067 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
17068 return 1;
17069 break;
17070 case TYPE_FPLOAD:
17071 if (get_attr_update (insn) == UPDATE_NO
17072 && get_attr_type (dep_insn) == TYPE_MFFGPR)
17073 return 2;
17074 break;
17075 default:
17076 break;
17077 }
17078 }
17079 /* Fall through, no cost for output dependency. */
17080 /* FALLTHRU */
17081
17082 case REG_DEP_ANTI:
17083 /* Anti dependency; DEP_INSN reads a register that INSN writes some
17084 cycles later. */
17085 return 0;
17086
17087 default:
17088 gcc_unreachable ();
17089 }
17090
17091 return cost;
17092 }
17093
17094 /* Debug version of rs6000_adjust_cost. */
17095
17096 static int
17097 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
17098 int cost, unsigned int dw)
17099 {
17100 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
17101
17102 if (ret != cost)
17103 {
17104 const char *dep;
17105
17106 switch (dep_type)
17107 {
17108 default: dep = "unknown depencency"; break;
17109 case REG_DEP_TRUE: dep = "data dependency"; break;
17110 case REG_DEP_OUTPUT: dep = "output dependency"; break;
17111 case REG_DEP_ANTI: dep = "anti depencency"; break;
17112 }
17113
17114 fprintf (stderr,
17115 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
17116 "%s, insn:\n", ret, cost, dep);
17117
17118 debug_rtx (insn);
17119 }
17120
17121 return ret;
17122 }
17123
17124 /* The function returns a true if INSN is microcoded.
17125 Return false otherwise. */
17126
17127 static bool
17128 is_microcoded_insn (rtx_insn *insn)
17129 {
17130 if (!insn || !NONDEBUG_INSN_P (insn)
17131 || GET_CODE (PATTERN (insn)) == USE
17132 || GET_CODE (PATTERN (insn)) == CLOBBER)
17133 return false;
17134
17135 if (rs6000_tune == PROCESSOR_CELL)
17136 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
17137
17138 if (rs6000_sched_groups
17139 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
17140 {
17141 enum attr_type type = get_attr_type (insn);
17142 if ((type == TYPE_LOAD
17143 && get_attr_update (insn) == UPDATE_YES
17144 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
17145 || ((type == TYPE_LOAD || type == TYPE_STORE)
17146 && get_attr_update (insn) == UPDATE_YES
17147 && get_attr_indexed (insn) == INDEXED_YES)
17148 || type == TYPE_MFCR)
17149 return true;
17150 }
17151
17152 return false;
17153 }
17154
17155 /* The function returns true if INSN is cracked into 2 instructions
17156 by the processor (and therefore occupies 2 issue slots). */
17157
17158 static bool
17159 is_cracked_insn (rtx_insn *insn)
17160 {
17161 if (!insn || !NONDEBUG_INSN_P (insn)
17162 || GET_CODE (PATTERN (insn)) == USE
17163 || GET_CODE (PATTERN (insn)) == CLOBBER)
17164 return false;
17165
17166 if (rs6000_sched_groups
17167 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
17168 {
17169 enum attr_type type = get_attr_type (insn);
17170 if ((type == TYPE_LOAD
17171 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
17172 && get_attr_update (insn) == UPDATE_NO)
17173 || (type == TYPE_LOAD
17174 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
17175 && get_attr_update (insn) == UPDATE_YES
17176 && get_attr_indexed (insn) == INDEXED_NO)
17177 || (type == TYPE_STORE
17178 && get_attr_update (insn) == UPDATE_YES
17179 && get_attr_indexed (insn) == INDEXED_NO)
17180 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
17181 && get_attr_update (insn) == UPDATE_YES)
17182 || (type == TYPE_CR_LOGICAL
17183 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
17184 || (type == TYPE_EXTS
17185 && get_attr_dot (insn) == DOT_YES)
17186 || (type == TYPE_SHIFT
17187 && get_attr_dot (insn) == DOT_YES
17188 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
17189 || (type == TYPE_MUL
17190 && get_attr_dot (insn) == DOT_YES)
17191 || type == TYPE_DIV
17192 || (type == TYPE_INSERT
17193 && get_attr_size (insn) == SIZE_32))
17194 return true;
17195 }
17196
17197 return false;
17198 }
17199
17200 /* The function returns true if INSN can be issued only from
17201 the branch slot. */
17202
17203 static bool
17204 is_branch_slot_insn (rtx_insn *insn)
17205 {
17206 if (!insn || !NONDEBUG_INSN_P (insn)
17207 || GET_CODE (PATTERN (insn)) == USE
17208 || GET_CODE (PATTERN (insn)) == CLOBBER)
17209 return false;
17210
17211 if (rs6000_sched_groups)
17212 {
17213 enum attr_type type = get_attr_type (insn);
17214 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
17215 return true;
17216 return false;
17217 }
17218
17219 return false;
17220 }
17221
17222 /* The function returns true if out_inst sets a value that is
17223 used in the address generation computation of in_insn */
17224 static bool
17225 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
17226 {
17227 rtx out_set, in_set;
17228
17229 /* For performance reasons, only handle the simple case where
17230 both loads are a single_set. */
17231 out_set = single_set (out_insn);
17232 if (out_set)
17233 {
17234 in_set = single_set (in_insn);
17235 if (in_set)
17236 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
17237 }
17238
17239 return false;
17240 }
17241
17242 /* Try to determine base/offset/size parts of the given MEM.
17243 Return true if successful, false if all the values couldn't
17244 be determined.
17245
17246 This function only looks for REG or REG+CONST address forms.
17247 REG+REG address form will return false. */
17248
17249 static bool
17250 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
17251 HOST_WIDE_INT *size)
17252 {
17253 rtx addr_rtx;
17254 if MEM_SIZE_KNOWN_P (mem)
17255 *size = MEM_SIZE (mem);
17256 else
17257 return false;
17258
17259 addr_rtx = (XEXP (mem, 0));
17260 if (GET_CODE (addr_rtx) == PRE_MODIFY)
17261 addr_rtx = XEXP (addr_rtx, 1);
17262
17263 *offset = 0;
17264 while (GET_CODE (addr_rtx) == PLUS
17265 && CONST_INT_P (XEXP (addr_rtx, 1)))
17266 {
17267 *offset += INTVAL (XEXP (addr_rtx, 1));
17268 addr_rtx = XEXP (addr_rtx, 0);
17269 }
17270 if (!REG_P (addr_rtx))
17271 return false;
17272
17273 *base = addr_rtx;
17274 return true;
17275 }
17276
17277 /* The function returns true if the target storage location of
17278 mem1 is adjacent to the target storage location of mem2 */
17279 /* Return 1 if memory locations are adjacent. */
17280
17281 static bool
17282 adjacent_mem_locations (rtx mem1, rtx mem2)
17283 {
17284 rtx reg1, reg2;
17285 HOST_WIDE_INT off1, size1, off2, size2;
17286
17287 if (get_memref_parts (mem1, &reg1, &off1, &size1)
17288 && get_memref_parts (mem2, &reg2, &off2, &size2))
17289 return ((REGNO (reg1) == REGNO (reg2))
17290 && ((off1 + size1 == off2)
17291 || (off2 + size2 == off1)));
17292
17293 return false;
17294 }
17295
17296 /* This function returns true if it can be determined that the two MEM
17297 locations overlap by at least 1 byte based on base reg/offset/size. */
17298
17299 static bool
17300 mem_locations_overlap (rtx mem1, rtx mem2)
17301 {
17302 rtx reg1, reg2;
17303 HOST_WIDE_INT off1, size1, off2, size2;
17304
17305 if (get_memref_parts (mem1, &reg1, &off1, &size1)
17306 && get_memref_parts (mem2, &reg2, &off2, &size2))
17307 return ((REGNO (reg1) == REGNO (reg2))
17308 && (((off1 <= off2) && (off1 + size1 > off2))
17309 || ((off2 <= off1) && (off2 + size2 > off1))));
17310
17311 return false;
17312 }
17313
17314 /* A C statement (sans semicolon) to update the integer scheduling
17315 priority INSN_PRIORITY (INSN). Increase the priority to execute the
17316 INSN earlier, reduce the priority to execute INSN later. Do not
17317 define this macro if you do not need to adjust the scheduling
17318 priorities of insns. */
17319
17320 static int
17321 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
17322 {
17323 rtx load_mem, str_mem;
17324 /* On machines (like the 750) which have asymmetric integer units,
17325 where one integer unit can do multiply and divides and the other
17326 can't, reduce the priority of multiply/divide so it is scheduled
17327 before other integer operations. */
17328
17329 #if 0
17330 if (! INSN_P (insn))
17331 return priority;
17332
17333 if (GET_CODE (PATTERN (insn)) == USE)
17334 return priority;
17335
17336 switch (rs6000_tune) {
17337 case PROCESSOR_PPC750:
17338 switch (get_attr_type (insn))
17339 {
17340 default:
17341 break;
17342
17343 case TYPE_MUL:
17344 case TYPE_DIV:
17345 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
17346 priority, priority);
17347 if (priority >= 0 && priority < 0x01000000)
17348 priority >>= 3;
17349 break;
17350 }
17351 }
17352 #endif
17353
17354 if (insn_must_be_first_in_group (insn)
17355 && reload_completed
17356 && current_sched_info->sched_max_insns_priority
17357 && rs6000_sched_restricted_insns_priority)
17358 {
17359
17360 /* Prioritize insns that can be dispatched only in the first
17361 dispatch slot. */
17362 if (rs6000_sched_restricted_insns_priority == 1)
17363 /* Attach highest priority to insn. This means that in
17364 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
17365 precede 'priority' (critical path) considerations. */
17366 return current_sched_info->sched_max_insns_priority;
17367 else if (rs6000_sched_restricted_insns_priority == 2)
17368 /* Increase priority of insn by a minimal amount. This means that in
17369 haifa-sched.c:ready_sort(), only 'priority' (critical path)
17370 considerations precede dispatch-slot restriction considerations. */
17371 return (priority + 1);
17372 }
17373
17374 if (rs6000_tune == PROCESSOR_POWER6
17375 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
17376 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
17377 /* Attach highest priority to insn if the scheduler has just issued two
17378 stores and this instruction is a load, or two loads and this instruction
17379 is a store. Power6 wants loads and stores scheduled alternately
17380 when possible */
17381 return current_sched_info->sched_max_insns_priority;
17382
17383 return priority;
17384 }
17385
17386 /* Return true if the instruction is nonpipelined on the Cell. */
17387 static bool
17388 is_nonpipeline_insn (rtx_insn *insn)
17389 {
17390 enum attr_type type;
17391 if (!insn || !NONDEBUG_INSN_P (insn)
17392 || GET_CODE (PATTERN (insn)) == USE
17393 || GET_CODE (PATTERN (insn)) == CLOBBER)
17394 return false;
17395
17396 type = get_attr_type (insn);
17397 if (type == TYPE_MUL
17398 || type == TYPE_DIV
17399 || type == TYPE_SDIV
17400 || type == TYPE_DDIV
17401 || type == TYPE_SSQRT
17402 || type == TYPE_DSQRT
17403 || type == TYPE_MFCR
17404 || type == TYPE_MFCRF
17405 || type == TYPE_MFJMPR)
17406 {
17407 return true;
17408 }
17409 return false;
17410 }
17411
17412
17413 /* Return how many instructions the machine can issue per cycle. */
17414
17415 static int
17416 rs6000_issue_rate (void)
17417 {
17418 /* Unless scheduling for register pressure, use issue rate of 1 for
17419 first scheduling pass to decrease degradation. */
17420 if (!reload_completed && !flag_sched_pressure)
17421 return 1;
17422
17423 switch (rs6000_tune) {
17424 case PROCESSOR_RS64A:
17425 case PROCESSOR_PPC601: /* ? */
17426 case PROCESSOR_PPC7450:
17427 return 3;
17428 case PROCESSOR_PPC440:
17429 case PROCESSOR_PPC603:
17430 case PROCESSOR_PPC750:
17431 case PROCESSOR_PPC7400:
17432 case PROCESSOR_PPC8540:
17433 case PROCESSOR_PPC8548:
17434 case PROCESSOR_CELL:
17435 case PROCESSOR_PPCE300C2:
17436 case PROCESSOR_PPCE300C3:
17437 case PROCESSOR_PPCE500MC:
17438 case PROCESSOR_PPCE500MC64:
17439 case PROCESSOR_PPCE5500:
17440 case PROCESSOR_PPCE6500:
17441 case PROCESSOR_TITAN:
17442 return 2;
17443 case PROCESSOR_PPC476:
17444 case PROCESSOR_PPC604:
17445 case PROCESSOR_PPC604e:
17446 case PROCESSOR_PPC620:
17447 case PROCESSOR_PPC630:
17448 return 4;
17449 case PROCESSOR_POWER4:
17450 case PROCESSOR_POWER5:
17451 case PROCESSOR_POWER6:
17452 case PROCESSOR_POWER7:
17453 return 5;
17454 case PROCESSOR_POWER8:
17455 return 7;
17456 case PROCESSOR_POWER9:
17457 case PROCESSOR_FUTURE:
17458 return 6;
17459 default:
17460 return 1;
17461 }
17462 }
17463
17464 /* Return how many instructions to look ahead for better insn
17465 scheduling. */
17466
17467 static int
17468 rs6000_use_sched_lookahead (void)
17469 {
17470 switch (rs6000_tune)
17471 {
17472 case PROCESSOR_PPC8540:
17473 case PROCESSOR_PPC8548:
17474 return 4;
17475
17476 case PROCESSOR_CELL:
17477 return (reload_completed ? 8 : 0);
17478
17479 default:
17480 return 0;
17481 }
17482 }
17483
17484 /* We are choosing insn from the ready queue. Return zero if INSN can be
17485 chosen. */
17486 static int
17487 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
17488 {
17489 if (ready_index == 0)
17490 return 0;
17491
17492 if (rs6000_tune != PROCESSOR_CELL)
17493 return 0;
17494
17495 gcc_assert (insn != NULL_RTX && INSN_P (insn));
17496
17497 if (!reload_completed
17498 || is_nonpipeline_insn (insn)
17499 || is_microcoded_insn (insn))
17500 return 1;
17501
17502 return 0;
17503 }
17504
17505 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
17506 and return true. */
17507
17508 static bool
17509 find_mem_ref (rtx pat, rtx *mem_ref)
17510 {
17511 const char * fmt;
17512 int i, j;
17513
17514 /* stack_tie does not produce any real memory traffic. */
17515 if (tie_operand (pat, VOIDmode))
17516 return false;
17517
17518 if (MEM_P (pat))
17519 {
17520 *mem_ref = pat;
17521 return true;
17522 }
17523
17524 /* Recursively process the pattern. */
17525 fmt = GET_RTX_FORMAT (GET_CODE (pat));
17526
17527 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
17528 {
17529 if (fmt[i] == 'e')
17530 {
17531 if (find_mem_ref (XEXP (pat, i), mem_ref))
17532 return true;
17533 }
17534 else if (fmt[i] == 'E')
17535 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
17536 {
17537 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
17538 return true;
17539 }
17540 }
17541
17542 return false;
17543 }
17544
17545 /* Determine if PAT is a PATTERN of a load insn. */
17546
17547 static bool
17548 is_load_insn1 (rtx pat, rtx *load_mem)
17549 {
17550 if (!pat || pat == NULL_RTX)
17551 return false;
17552
17553 if (GET_CODE (pat) == SET)
17554 return find_mem_ref (SET_SRC (pat), load_mem);
17555
17556 if (GET_CODE (pat) == PARALLEL)
17557 {
17558 int i;
17559
17560 for (i = 0; i < XVECLEN (pat, 0); i++)
17561 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
17562 return true;
17563 }
17564
17565 return false;
17566 }
17567
17568 /* Determine if INSN loads from memory. */
17569
17570 static bool
17571 is_load_insn (rtx insn, rtx *load_mem)
17572 {
17573 if (!insn || !INSN_P (insn))
17574 return false;
17575
17576 if (CALL_P (insn))
17577 return false;
17578
17579 return is_load_insn1 (PATTERN (insn), load_mem);
17580 }
17581
17582 /* Determine if PAT is a PATTERN of a store insn. */
17583
17584 static bool
17585 is_store_insn1 (rtx pat, rtx *str_mem)
17586 {
17587 if (!pat || pat == NULL_RTX)
17588 return false;
17589
17590 if (GET_CODE (pat) == SET)
17591 return find_mem_ref (SET_DEST (pat), str_mem);
17592
17593 if (GET_CODE (pat) == PARALLEL)
17594 {
17595 int i;
17596
17597 for (i = 0; i < XVECLEN (pat, 0); i++)
17598 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
17599 return true;
17600 }
17601
17602 return false;
17603 }
17604
17605 /* Determine if INSN stores to memory. */
17606
17607 static bool
17608 is_store_insn (rtx insn, rtx *str_mem)
17609 {
17610 if (!insn || !INSN_P (insn))
17611 return false;
17612
17613 return is_store_insn1 (PATTERN (insn), str_mem);
17614 }
17615
17616 /* Return whether TYPE is a Power9 pairable vector instruction type. */
17617
17618 static bool
17619 is_power9_pairable_vec_type (enum attr_type type)
17620 {
17621 switch (type)
17622 {
17623 case TYPE_VECSIMPLE:
17624 case TYPE_VECCOMPLEX:
17625 case TYPE_VECDIV:
17626 case TYPE_VECCMP:
17627 case TYPE_VECPERM:
17628 case TYPE_VECFLOAT:
17629 case TYPE_VECFDIV:
17630 case TYPE_VECDOUBLE:
17631 return true;
17632 default:
17633 break;
17634 }
17635 return false;
17636 }
17637
17638 /* Returns whether the dependence between INSN and NEXT is considered
17639 costly by the given target. */
17640
17641 static bool
17642 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
17643 {
17644 rtx insn;
17645 rtx next;
17646 rtx load_mem, str_mem;
17647
17648 /* If the flag is not enabled - no dependence is considered costly;
17649 allow all dependent insns in the same group.
17650 This is the most aggressive option. */
17651 if (rs6000_sched_costly_dep == no_dep_costly)
17652 return false;
17653
17654 /* If the flag is set to 1 - a dependence is always considered costly;
17655 do not allow dependent instructions in the same group.
17656 This is the most conservative option. */
17657 if (rs6000_sched_costly_dep == all_deps_costly)
17658 return true;
17659
17660 insn = DEP_PRO (dep);
17661 next = DEP_CON (dep);
17662
17663 if (rs6000_sched_costly_dep == store_to_load_dep_costly
17664 && is_load_insn (next, &load_mem)
17665 && is_store_insn (insn, &str_mem))
17666 /* Prevent load after store in the same group. */
17667 return true;
17668
17669 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
17670 && is_load_insn (next, &load_mem)
17671 && is_store_insn (insn, &str_mem)
17672 && DEP_TYPE (dep) == REG_DEP_TRUE
17673 && mem_locations_overlap(str_mem, load_mem))
17674 /* Prevent load after store in the same group if it is a true
17675 dependence. */
17676 return true;
17677
17678 /* The flag is set to X; dependences with latency >= X are considered costly,
17679 and will not be scheduled in the same group. */
17680 if (rs6000_sched_costly_dep <= max_dep_latency
17681 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
17682 return true;
17683
17684 return false;
17685 }
17686
17687 /* Return the next insn after INSN that is found before TAIL is reached,
17688 skipping any "non-active" insns - insns that will not actually occupy
17689 an issue slot. Return NULL_RTX if such an insn is not found. */
17690
17691 static rtx_insn *
17692 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
17693 {
17694 if (insn == NULL_RTX || insn == tail)
17695 return NULL;
17696
17697 while (1)
17698 {
17699 insn = NEXT_INSN (insn);
17700 if (insn == NULL_RTX || insn == tail)
17701 return NULL;
17702
17703 if (CALL_P (insn)
17704 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
17705 || (NONJUMP_INSN_P (insn)
17706 && GET_CODE (PATTERN (insn)) != USE
17707 && GET_CODE (PATTERN (insn)) != CLOBBER
17708 && INSN_CODE (insn) != CODE_FOR_stack_tie))
17709 break;
17710 }
17711 return insn;
17712 }
17713
17714 /* Move instruction at POS to the end of the READY list. */
17715
17716 static void
17717 move_to_end_of_ready (rtx_insn **ready, int pos, int lastpos)
17718 {
17719 rtx_insn *tmp;
17720 int i;
17721
17722 tmp = ready[pos];
17723 for (i = pos; i < lastpos; i++)
17724 ready[i] = ready[i + 1];
17725 ready[lastpos] = tmp;
17726 }
17727
17728 /* Do Power6 specific sched_reorder2 reordering of ready list. */
17729
17730 static int
17731 power6_sched_reorder2 (rtx_insn **ready, int lastpos)
17732 {
17733 /* For Power6, we need to handle some special cases to try and keep the
17734 store queue from overflowing and triggering expensive flushes.
17735
17736 This code monitors how load and store instructions are being issued
17737 and skews the ready list one way or the other to increase the likelihood
17738 that a desired instruction is issued at the proper time.
17739
17740 A couple of things are done. First, we maintain a "load_store_pendulum"
17741 to track the current state of load/store issue.
17742
17743 - If the pendulum is at zero, then no loads or stores have been
17744 issued in the current cycle so we do nothing.
17745
17746 - If the pendulum is 1, then a single load has been issued in this
17747 cycle and we attempt to locate another load in the ready list to
17748 issue with it.
17749
17750 - If the pendulum is -2, then two stores have already been
17751 issued in this cycle, so we increase the priority of the first load
17752 in the ready list to increase it's likelihood of being chosen first
17753 in the next cycle.
17754
17755 - If the pendulum is -1, then a single store has been issued in this
17756 cycle and we attempt to locate another store in the ready list to
17757 issue with it, preferring a store to an adjacent memory location to
17758 facilitate store pairing in the store queue.
17759
17760 - If the pendulum is 2, then two loads have already been
17761 issued in this cycle, so we increase the priority of the first store
17762 in the ready list to increase it's likelihood of being chosen first
17763 in the next cycle.
17764
17765 - If the pendulum < -2 or > 2, then do nothing.
17766
17767 Note: This code covers the most common scenarios. There exist non
17768 load/store instructions which make use of the LSU and which
17769 would need to be accounted for to strictly model the behavior
17770 of the machine. Those instructions are currently unaccounted
17771 for to help minimize compile time overhead of this code.
17772 */
17773 int pos;
17774 rtx load_mem, str_mem;
17775
17776 if (is_store_insn (last_scheduled_insn, &str_mem))
17777 /* Issuing a store, swing the load_store_pendulum to the left */
17778 load_store_pendulum--;
17779 else if (is_load_insn (last_scheduled_insn, &load_mem))
17780 /* Issuing a load, swing the load_store_pendulum to the right */
17781 load_store_pendulum++;
17782 else
17783 return cached_can_issue_more;
17784
17785 /* If the pendulum is balanced, or there is only one instruction on
17786 the ready list, then all is well, so return. */
17787 if ((load_store_pendulum == 0) || (lastpos <= 0))
17788 return cached_can_issue_more;
17789
17790 if (load_store_pendulum == 1)
17791 {
17792 /* A load has been issued in this cycle. Scan the ready list
17793 for another load to issue with it */
17794 pos = lastpos;
17795
17796 while (pos >= 0)
17797 {
17798 if (is_load_insn (ready[pos], &load_mem))
17799 {
17800 /* Found a load. Move it to the head of the ready list,
17801 and adjust it's priority so that it is more likely to
17802 stay there */
17803 move_to_end_of_ready (ready, pos, lastpos);
17804
17805 if (!sel_sched_p ()
17806 && INSN_PRIORITY_KNOWN (ready[lastpos]))
17807 INSN_PRIORITY (ready[lastpos])++;
17808 break;
17809 }
17810 pos--;
17811 }
17812 }
17813 else if (load_store_pendulum == -2)
17814 {
17815 /* Two stores have been issued in this cycle. Increase the
17816 priority of the first load in the ready list to favor it for
17817 issuing in the next cycle. */
17818 pos = lastpos;
17819
17820 while (pos >= 0)
17821 {
17822 if (is_load_insn (ready[pos], &load_mem)
17823 && !sel_sched_p ()
17824 && INSN_PRIORITY_KNOWN (ready[pos]))
17825 {
17826 INSN_PRIORITY (ready[pos])++;
17827
17828 /* Adjust the pendulum to account for the fact that a load
17829 was found and increased in priority. This is to prevent
17830 increasing the priority of multiple loads */
17831 load_store_pendulum--;
17832
17833 break;
17834 }
17835 pos--;
17836 }
17837 }
17838 else if (load_store_pendulum == -1)
17839 {
17840 /* A store has been issued in this cycle. Scan the ready list for
17841 another store to issue with it, preferring a store to an adjacent
17842 memory location */
17843 int first_store_pos = -1;
17844
17845 pos = lastpos;
17846
17847 while (pos >= 0)
17848 {
17849 if (is_store_insn (ready[pos], &str_mem))
17850 {
17851 rtx str_mem2;
17852 /* Maintain the index of the first store found on the
17853 list */
17854 if (first_store_pos == -1)
17855 first_store_pos = pos;
17856
17857 if (is_store_insn (last_scheduled_insn, &str_mem2)
17858 && adjacent_mem_locations (str_mem, str_mem2))
17859 {
17860 /* Found an adjacent store. Move it to the head of the
17861 ready list, and adjust it's priority so that it is
17862 more likely to stay there */
17863 move_to_end_of_ready (ready, pos, lastpos);
17864
17865 if (!sel_sched_p ()
17866 && INSN_PRIORITY_KNOWN (ready[lastpos]))
17867 INSN_PRIORITY (ready[lastpos])++;
17868
17869 first_store_pos = -1;
17870
17871 break;
17872 };
17873 }
17874 pos--;
17875 }
17876
17877 if (first_store_pos >= 0)
17878 {
17879 /* An adjacent store wasn't found, but a non-adjacent store was,
17880 so move the non-adjacent store to the front of the ready
17881 list, and adjust its priority so that it is more likely to
17882 stay there. */
17883 move_to_end_of_ready (ready, first_store_pos, lastpos);
17884 if (!sel_sched_p ()
17885 && INSN_PRIORITY_KNOWN (ready[lastpos]))
17886 INSN_PRIORITY (ready[lastpos])++;
17887 }
17888 }
17889 else if (load_store_pendulum == 2)
17890 {
17891 /* Two loads have been issued in this cycle. Increase the priority
17892 of the first store in the ready list to favor it for issuing in
17893 the next cycle. */
17894 pos = lastpos;
17895
17896 while (pos >= 0)
17897 {
17898 if (is_store_insn (ready[pos], &str_mem)
17899 && !sel_sched_p ()
17900 && INSN_PRIORITY_KNOWN (ready[pos]))
17901 {
17902 INSN_PRIORITY (ready[pos])++;
17903
17904 /* Adjust the pendulum to account for the fact that a store
17905 was found and increased in priority. This is to prevent
17906 increasing the priority of multiple stores */
17907 load_store_pendulum++;
17908
17909 break;
17910 }
17911 pos--;
17912 }
17913 }
17914
17915 return cached_can_issue_more;
17916 }
17917
17918 /* Do Power9 specific sched_reorder2 reordering of ready list. */
17919
17920 static int
17921 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
17922 {
17923 int pos;
17924 enum attr_type type, type2;
17925
17926 type = get_attr_type (last_scheduled_insn);
17927
17928 /* Try to issue fixed point divides back-to-back in pairs so they will be
17929 routed to separate execution units and execute in parallel. */
17930 if (type == TYPE_DIV && divide_cnt == 0)
17931 {
17932 /* First divide has been scheduled. */
17933 divide_cnt = 1;
17934
17935 /* Scan the ready list looking for another divide, if found move it
17936 to the end of the list so it is chosen next. */
17937 pos = lastpos;
17938 while (pos >= 0)
17939 {
17940 if (recog_memoized (ready[pos]) >= 0
17941 && get_attr_type (ready[pos]) == TYPE_DIV)
17942 {
17943 move_to_end_of_ready (ready, pos, lastpos);
17944 break;
17945 }
17946 pos--;
17947 }
17948 }
17949 else
17950 {
17951 /* Last insn was the 2nd divide or not a divide, reset the counter. */
17952 divide_cnt = 0;
17953
17954 /* The best dispatch throughput for vector and vector load insns can be
17955 achieved by interleaving a vector and vector load such that they'll
17956 dispatch to the same superslice. If this pairing cannot be achieved
17957 then it is best to pair vector insns together and vector load insns
17958 together.
17959
17960 To aid in this pairing, vec_pairing maintains the current state with
17961 the following values:
17962
17963 0 : Initial state, no vecload/vector pairing has been started.
17964
17965 1 : A vecload or vector insn has been issued and a candidate for
17966 pairing has been found and moved to the end of the ready
17967 list. */
17968 if (type == TYPE_VECLOAD)
17969 {
17970 /* Issued a vecload. */
17971 if (vec_pairing == 0)
17972 {
17973 int vecload_pos = -1;
17974 /* We issued a single vecload, look for a vector insn to pair it
17975 with. If one isn't found, try to pair another vecload. */
17976 pos = lastpos;
17977 while (pos >= 0)
17978 {
17979 if (recog_memoized (ready[pos]) >= 0)
17980 {
17981 type2 = get_attr_type (ready[pos]);
17982 if (is_power9_pairable_vec_type (type2))
17983 {
17984 /* Found a vector insn to pair with, move it to the
17985 end of the ready list so it is scheduled next. */
17986 move_to_end_of_ready (ready, pos, lastpos);
17987 vec_pairing = 1;
17988 return cached_can_issue_more;
17989 }
17990 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
17991 /* Remember position of first vecload seen. */
17992 vecload_pos = pos;
17993 }
17994 pos--;
17995 }
17996 if (vecload_pos >= 0)
17997 {
17998 /* Didn't find a vector to pair with but did find a vecload,
17999 move it to the end of the ready list. */
18000 move_to_end_of_ready (ready, vecload_pos, lastpos);
18001 vec_pairing = 1;
18002 return cached_can_issue_more;
18003 }
18004 }
18005 }
18006 else if (is_power9_pairable_vec_type (type))
18007 {
18008 /* Issued a vector operation. */
18009 if (vec_pairing == 0)
18010 {
18011 int vec_pos = -1;
18012 /* We issued a single vector insn, look for a vecload to pair it
18013 with. If one isn't found, try to pair another vector. */
18014 pos = lastpos;
18015 while (pos >= 0)
18016 {
18017 if (recog_memoized (ready[pos]) >= 0)
18018 {
18019 type2 = get_attr_type (ready[pos]);
18020 if (type2 == TYPE_VECLOAD)
18021 {
18022 /* Found a vecload insn to pair with, move it to the
18023 end of the ready list so it is scheduled next. */
18024 move_to_end_of_ready (ready, pos, lastpos);
18025 vec_pairing = 1;
18026 return cached_can_issue_more;
18027 }
18028 else if (is_power9_pairable_vec_type (type2)
18029 && vec_pos == -1)
18030 /* Remember position of first vector insn seen. */
18031 vec_pos = pos;
18032 }
18033 pos--;
18034 }
18035 if (vec_pos >= 0)
18036 {
18037 /* Didn't find a vecload to pair with but did find a vector
18038 insn, move it to the end of the ready list. */
18039 move_to_end_of_ready (ready, vec_pos, lastpos);
18040 vec_pairing = 1;
18041 return cached_can_issue_more;
18042 }
18043 }
18044 }
18045
18046 /* We've either finished a vec/vecload pair, couldn't find an insn to
18047 continue the current pair, or the last insn had nothing to do with
18048 with pairing. In any case, reset the state. */
18049 vec_pairing = 0;
18050 }
18051
18052 return cached_can_issue_more;
18053 }
18054
18055 /* We are about to begin issuing insns for this clock cycle. */
18056
18057 static int
18058 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
18059 rtx_insn **ready ATTRIBUTE_UNUSED,
18060 int *pn_ready ATTRIBUTE_UNUSED,
18061 int clock_var ATTRIBUTE_UNUSED)
18062 {
18063 int n_ready = *pn_ready;
18064
18065 if (sched_verbose)
18066 fprintf (dump, "// rs6000_sched_reorder :\n");
18067
18068 /* Reorder the ready list, if the second to last ready insn
18069 is a nonepipeline insn. */
18070 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
18071 {
18072 if (is_nonpipeline_insn (ready[n_ready - 1])
18073 && (recog_memoized (ready[n_ready - 2]) > 0))
18074 /* Simply swap first two insns. */
18075 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
18076 }
18077
18078 if (rs6000_tune == PROCESSOR_POWER6)
18079 load_store_pendulum = 0;
18080
18081 return rs6000_issue_rate ();
18082 }
18083
18084 /* Like rs6000_sched_reorder, but called after issuing each insn. */
18085
18086 static int
18087 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
18088 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
18089 {
18090 if (sched_verbose)
18091 fprintf (dump, "// rs6000_sched_reorder2 :\n");
18092
18093 /* Do Power6 dependent reordering if necessary. */
18094 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
18095 return power6_sched_reorder2 (ready, *pn_ready - 1);
18096
18097 /* Do Power9 dependent reordering if necessary. */
18098 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
18099 && recog_memoized (last_scheduled_insn) >= 0)
18100 return power9_sched_reorder2 (ready, *pn_ready - 1);
18101
18102 return cached_can_issue_more;
18103 }
18104
18105 /* Return whether the presence of INSN causes a dispatch group termination
18106 of group WHICH_GROUP.
18107
18108 If WHICH_GROUP == current_group, this function will return true if INSN
18109 causes the termination of the current group (i.e, the dispatch group to
18110 which INSN belongs). This means that INSN will be the last insn in the
18111 group it belongs to.
18112
18113 If WHICH_GROUP == previous_group, this function will return true if INSN
18114 causes the termination of the previous group (i.e, the dispatch group that
18115 precedes the group to which INSN belongs). This means that INSN will be
18116 the first insn in the group it belongs to). */
18117
18118 static bool
18119 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
18120 {
18121 bool first, last;
18122
18123 if (! insn)
18124 return false;
18125
18126 first = insn_must_be_first_in_group (insn);
18127 last = insn_must_be_last_in_group (insn);
18128
18129 if (first && last)
18130 return true;
18131
18132 if (which_group == current_group)
18133 return last;
18134 else if (which_group == previous_group)
18135 return first;
18136
18137 return false;
18138 }
18139
18140
18141 static bool
18142 insn_must_be_first_in_group (rtx_insn *insn)
18143 {
18144 enum attr_type type;
18145
18146 if (!insn
18147 || NOTE_P (insn)
18148 || DEBUG_INSN_P (insn)
18149 || GET_CODE (PATTERN (insn)) == USE
18150 || GET_CODE (PATTERN (insn)) == CLOBBER)
18151 return false;
18152
18153 switch (rs6000_tune)
18154 {
18155 case PROCESSOR_POWER5:
18156 if (is_cracked_insn (insn))
18157 return true;
18158 /* FALLTHRU */
18159 case PROCESSOR_POWER4:
18160 if (is_microcoded_insn (insn))
18161 return true;
18162
18163 if (!rs6000_sched_groups)
18164 return false;
18165
18166 type = get_attr_type (insn);
18167
18168 switch (type)
18169 {
18170 case TYPE_MFCR:
18171 case TYPE_MFCRF:
18172 case TYPE_MTCR:
18173 case TYPE_CR_LOGICAL:
18174 case TYPE_MTJMPR:
18175 case TYPE_MFJMPR:
18176 case TYPE_DIV:
18177 case TYPE_LOAD_L:
18178 case TYPE_STORE_C:
18179 case TYPE_ISYNC:
18180 case TYPE_SYNC:
18181 return true;
18182 default:
18183 break;
18184 }
18185 break;
18186 case PROCESSOR_POWER6:
18187 type = get_attr_type (insn);
18188
18189 switch (type)
18190 {
18191 case TYPE_EXTS:
18192 case TYPE_CNTLZ:
18193 case TYPE_TRAP:
18194 case TYPE_MUL:
18195 case TYPE_INSERT:
18196 case TYPE_FPCOMPARE:
18197 case TYPE_MFCR:
18198 case TYPE_MTCR:
18199 case TYPE_MFJMPR:
18200 case TYPE_MTJMPR:
18201 case TYPE_ISYNC:
18202 case TYPE_SYNC:
18203 case TYPE_LOAD_L:
18204 case TYPE_STORE_C:
18205 return true;
18206 case TYPE_SHIFT:
18207 if (get_attr_dot (insn) == DOT_NO
18208 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
18209 return true;
18210 else
18211 break;
18212 case TYPE_DIV:
18213 if (get_attr_size (insn) == SIZE_32)
18214 return true;
18215 else
18216 break;
18217 case TYPE_LOAD:
18218 case TYPE_STORE:
18219 case TYPE_FPLOAD:
18220 case TYPE_FPSTORE:
18221 if (get_attr_update (insn) == UPDATE_YES)
18222 return true;
18223 else
18224 break;
18225 default:
18226 break;
18227 }
18228 break;
18229 case PROCESSOR_POWER7:
18230 type = get_attr_type (insn);
18231
18232 switch (type)
18233 {
18234 case TYPE_CR_LOGICAL:
18235 case TYPE_MFCR:
18236 case TYPE_MFCRF:
18237 case TYPE_MTCR:
18238 case TYPE_DIV:
18239 case TYPE_ISYNC:
18240 case TYPE_LOAD_L:
18241 case TYPE_STORE_C:
18242 case TYPE_MFJMPR:
18243 case TYPE_MTJMPR:
18244 return true;
18245 case TYPE_MUL:
18246 case TYPE_SHIFT:
18247 case TYPE_EXTS:
18248 if (get_attr_dot (insn) == DOT_YES)
18249 return true;
18250 else
18251 break;
18252 case TYPE_LOAD:
18253 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18254 || get_attr_update (insn) == UPDATE_YES)
18255 return true;
18256 else
18257 break;
18258 case TYPE_STORE:
18259 case TYPE_FPLOAD:
18260 case TYPE_FPSTORE:
18261 if (get_attr_update (insn) == UPDATE_YES)
18262 return true;
18263 else
18264 break;
18265 default:
18266 break;
18267 }
18268 break;
18269 case PROCESSOR_POWER8:
18270 type = get_attr_type (insn);
18271
18272 switch (type)
18273 {
18274 case TYPE_CR_LOGICAL:
18275 case TYPE_MFCR:
18276 case TYPE_MFCRF:
18277 case TYPE_MTCR:
18278 case TYPE_SYNC:
18279 case TYPE_ISYNC:
18280 case TYPE_LOAD_L:
18281 case TYPE_STORE_C:
18282 case TYPE_VECSTORE:
18283 case TYPE_MFJMPR:
18284 case TYPE_MTJMPR:
18285 return true;
18286 case TYPE_SHIFT:
18287 case TYPE_EXTS:
18288 case TYPE_MUL:
18289 if (get_attr_dot (insn) == DOT_YES)
18290 return true;
18291 else
18292 break;
18293 case TYPE_LOAD:
18294 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18295 || get_attr_update (insn) == UPDATE_YES)
18296 return true;
18297 else
18298 break;
18299 case TYPE_STORE:
18300 if (get_attr_update (insn) == UPDATE_YES
18301 && get_attr_indexed (insn) == INDEXED_YES)
18302 return true;
18303 else
18304 break;
18305 default:
18306 break;
18307 }
18308 break;
18309 default:
18310 break;
18311 }
18312
18313 return false;
18314 }
18315
18316 static bool
18317 insn_must_be_last_in_group (rtx_insn *insn)
18318 {
18319 enum attr_type type;
18320
18321 if (!insn
18322 || NOTE_P (insn)
18323 || DEBUG_INSN_P (insn)
18324 || GET_CODE (PATTERN (insn)) == USE
18325 || GET_CODE (PATTERN (insn)) == CLOBBER)
18326 return false;
18327
18328 switch (rs6000_tune) {
18329 case PROCESSOR_POWER4:
18330 case PROCESSOR_POWER5:
18331 if (is_microcoded_insn (insn))
18332 return true;
18333
18334 if (is_branch_slot_insn (insn))
18335 return true;
18336
18337 break;
18338 case PROCESSOR_POWER6:
18339 type = get_attr_type (insn);
18340
18341 switch (type)
18342 {
18343 case TYPE_EXTS:
18344 case TYPE_CNTLZ:
18345 case TYPE_TRAP:
18346 case TYPE_MUL:
18347 case TYPE_FPCOMPARE:
18348 case TYPE_MFCR:
18349 case TYPE_MTCR:
18350 case TYPE_MFJMPR:
18351 case TYPE_MTJMPR:
18352 case TYPE_ISYNC:
18353 case TYPE_SYNC:
18354 case TYPE_LOAD_L:
18355 case TYPE_STORE_C:
18356 return true;
18357 case TYPE_SHIFT:
18358 if (get_attr_dot (insn) == DOT_NO
18359 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
18360 return true;
18361 else
18362 break;
18363 case TYPE_DIV:
18364 if (get_attr_size (insn) == SIZE_32)
18365 return true;
18366 else
18367 break;
18368 default:
18369 break;
18370 }
18371 break;
18372 case PROCESSOR_POWER7:
18373 type = get_attr_type (insn);
18374
18375 switch (type)
18376 {
18377 case TYPE_ISYNC:
18378 case TYPE_SYNC:
18379 case TYPE_LOAD_L:
18380 case TYPE_STORE_C:
18381 return true;
18382 case TYPE_LOAD:
18383 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18384 && get_attr_update (insn) == UPDATE_YES)
18385 return true;
18386 else
18387 break;
18388 case TYPE_STORE:
18389 if (get_attr_update (insn) == UPDATE_YES
18390 && get_attr_indexed (insn) == INDEXED_YES)
18391 return true;
18392 else
18393 break;
18394 default:
18395 break;
18396 }
18397 break;
18398 case PROCESSOR_POWER8:
18399 type = get_attr_type (insn);
18400
18401 switch (type)
18402 {
18403 case TYPE_MFCR:
18404 case TYPE_MTCR:
18405 case TYPE_ISYNC:
18406 case TYPE_SYNC:
18407 case TYPE_LOAD_L:
18408 case TYPE_STORE_C:
18409 return true;
18410 case TYPE_LOAD:
18411 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18412 && get_attr_update (insn) == UPDATE_YES)
18413 return true;
18414 else
18415 break;
18416 case TYPE_STORE:
18417 if (get_attr_update (insn) == UPDATE_YES
18418 && get_attr_indexed (insn) == INDEXED_YES)
18419 return true;
18420 else
18421 break;
18422 default:
18423 break;
18424 }
18425 break;
18426 default:
18427 break;
18428 }
18429
18430 return false;
18431 }
18432
18433 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
18434 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
18435
18436 static bool
18437 is_costly_group (rtx *group_insns, rtx next_insn)
18438 {
18439 int i;
18440 int issue_rate = rs6000_issue_rate ();
18441
18442 for (i = 0; i < issue_rate; i++)
18443 {
18444 sd_iterator_def sd_it;
18445 dep_t dep;
18446 rtx insn = group_insns[i];
18447
18448 if (!insn)
18449 continue;
18450
18451 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
18452 {
18453 rtx next = DEP_CON (dep);
18454
18455 if (next == next_insn
18456 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
18457 return true;
18458 }
18459 }
18460
18461 return false;
18462 }
18463
18464 /* Utility of the function redefine_groups.
18465 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
18466 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
18467 to keep it "far" (in a separate group) from GROUP_INSNS, following
18468 one of the following schemes, depending on the value of the flag
18469 -minsert_sched_nops = X:
18470 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
18471 in order to force NEXT_INSN into a separate group.
18472 (2) X < sched_finish_regroup_exact: insert exactly X nops.
18473 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
18474 insertion (has a group just ended, how many vacant issue slots remain in the
18475 last group, and how many dispatch groups were encountered so far). */
18476
18477 static int
18478 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
18479 rtx_insn *next_insn, bool *group_end, int can_issue_more,
18480 int *group_count)
18481 {
18482 rtx nop;
18483 bool force;
18484 int issue_rate = rs6000_issue_rate ();
18485 bool end = *group_end;
18486 int i;
18487
18488 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
18489 return can_issue_more;
18490
18491 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
18492 return can_issue_more;
18493
18494 force = is_costly_group (group_insns, next_insn);
18495 if (!force)
18496 return can_issue_more;
18497
18498 if (sched_verbose > 6)
18499 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
18500 *group_count ,can_issue_more);
18501
18502 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
18503 {
18504 if (*group_end)
18505 can_issue_more = 0;
18506
18507 /* Since only a branch can be issued in the last issue_slot, it is
18508 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
18509 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
18510 in this case the last nop will start a new group and the branch
18511 will be forced to the new group. */
18512 if (can_issue_more && !is_branch_slot_insn (next_insn))
18513 can_issue_more--;
18514
18515 /* Do we have a special group ending nop? */
18516 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
18517 || rs6000_tune == PROCESSOR_POWER8)
18518 {
18519 nop = gen_group_ending_nop ();
18520 emit_insn_before (nop, next_insn);
18521 can_issue_more = 0;
18522 }
18523 else
18524 while (can_issue_more > 0)
18525 {
18526 nop = gen_nop ();
18527 emit_insn_before (nop, next_insn);
18528 can_issue_more--;
18529 }
18530
18531 *group_end = true;
18532 return 0;
18533 }
18534
18535 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
18536 {
18537 int n_nops = rs6000_sched_insert_nops;
18538
18539 /* Nops can't be issued from the branch slot, so the effective
18540 issue_rate for nops is 'issue_rate - 1'. */
18541 if (can_issue_more == 0)
18542 can_issue_more = issue_rate;
18543 can_issue_more--;
18544 if (can_issue_more == 0)
18545 {
18546 can_issue_more = issue_rate - 1;
18547 (*group_count)++;
18548 end = true;
18549 for (i = 0; i < issue_rate; i++)
18550 {
18551 group_insns[i] = 0;
18552 }
18553 }
18554
18555 while (n_nops > 0)
18556 {
18557 nop = gen_nop ();
18558 emit_insn_before (nop, next_insn);
18559 if (can_issue_more == issue_rate - 1) /* new group begins */
18560 end = false;
18561 can_issue_more--;
18562 if (can_issue_more == 0)
18563 {
18564 can_issue_more = issue_rate - 1;
18565 (*group_count)++;
18566 end = true;
18567 for (i = 0; i < issue_rate; i++)
18568 {
18569 group_insns[i] = 0;
18570 }
18571 }
18572 n_nops--;
18573 }
18574
18575 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
18576 can_issue_more++;
18577
18578 /* Is next_insn going to start a new group? */
18579 *group_end
18580 = (end
18581 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
18582 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
18583 || (can_issue_more < issue_rate &&
18584 insn_terminates_group_p (next_insn, previous_group)));
18585 if (*group_end && end)
18586 (*group_count)--;
18587
18588 if (sched_verbose > 6)
18589 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
18590 *group_count, can_issue_more);
18591 return can_issue_more;
18592 }
18593
18594 return can_issue_more;
18595 }
18596
18597 /* This function tries to synch the dispatch groups that the compiler "sees"
18598 with the dispatch groups that the processor dispatcher is expected to
18599 form in practice. It tries to achieve this synchronization by forcing the
18600 estimated processor grouping on the compiler (as opposed to the function
18601 'pad_goups' which tries to force the scheduler's grouping on the processor).
18602
18603 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
18604 examines the (estimated) dispatch groups that will be formed by the processor
18605 dispatcher. It marks these group boundaries to reflect the estimated
18606 processor grouping, overriding the grouping that the scheduler had marked.
18607 Depending on the value of the flag '-minsert-sched-nops' this function can
18608 force certain insns into separate groups or force a certain distance between
18609 them by inserting nops, for example, if there exists a "costly dependence"
18610 between the insns.
18611
18612 The function estimates the group boundaries that the processor will form as
18613 follows: It keeps track of how many vacant issue slots are available after
18614 each insn. A subsequent insn will start a new group if one of the following
18615 4 cases applies:
18616 - no more vacant issue slots remain in the current dispatch group.
18617 - only the last issue slot, which is the branch slot, is vacant, but the next
18618 insn is not a branch.
18619 - only the last 2 or less issue slots, including the branch slot, are vacant,
18620 which means that a cracked insn (which occupies two issue slots) can't be
18621 issued in this group.
18622 - less than 'issue_rate' slots are vacant, and the next insn always needs to
18623 start a new group. */
18624
18625 static int
18626 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
18627 rtx_insn *tail)
18628 {
18629 rtx_insn *insn, *next_insn;
18630 int issue_rate;
18631 int can_issue_more;
18632 int slot, i;
18633 bool group_end;
18634 int group_count = 0;
18635 rtx *group_insns;
18636
18637 /* Initialize. */
18638 issue_rate = rs6000_issue_rate ();
18639 group_insns = XALLOCAVEC (rtx, issue_rate);
18640 for (i = 0; i < issue_rate; i++)
18641 {
18642 group_insns[i] = 0;
18643 }
18644 can_issue_more = issue_rate;
18645 slot = 0;
18646 insn = get_next_active_insn (prev_head_insn, tail);
18647 group_end = false;
18648
18649 while (insn != NULL_RTX)
18650 {
18651 slot = (issue_rate - can_issue_more);
18652 group_insns[slot] = insn;
18653 can_issue_more =
18654 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
18655 if (insn_terminates_group_p (insn, current_group))
18656 can_issue_more = 0;
18657
18658 next_insn = get_next_active_insn (insn, tail);
18659 if (next_insn == NULL_RTX)
18660 return group_count + 1;
18661
18662 /* Is next_insn going to start a new group? */
18663 group_end
18664 = (can_issue_more == 0
18665 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
18666 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
18667 || (can_issue_more < issue_rate &&
18668 insn_terminates_group_p (next_insn, previous_group)));
18669
18670 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
18671 next_insn, &group_end, can_issue_more,
18672 &group_count);
18673
18674 if (group_end)
18675 {
18676 group_count++;
18677 can_issue_more = 0;
18678 for (i = 0; i < issue_rate; i++)
18679 {
18680 group_insns[i] = 0;
18681 }
18682 }
18683
18684 if (GET_MODE (next_insn) == TImode && can_issue_more)
18685 PUT_MODE (next_insn, VOIDmode);
18686 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
18687 PUT_MODE (next_insn, TImode);
18688
18689 insn = next_insn;
18690 if (can_issue_more == 0)
18691 can_issue_more = issue_rate;
18692 } /* while */
18693
18694 return group_count;
18695 }
18696
18697 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
18698 dispatch group boundaries that the scheduler had marked. Pad with nops
18699 any dispatch groups which have vacant issue slots, in order to force the
18700 scheduler's grouping on the processor dispatcher. The function
18701 returns the number of dispatch groups found. */
18702
18703 static int
18704 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
18705 rtx_insn *tail)
18706 {
18707 rtx_insn *insn, *next_insn;
18708 rtx nop;
18709 int issue_rate;
18710 int can_issue_more;
18711 int group_end;
18712 int group_count = 0;
18713
18714 /* Initialize issue_rate. */
18715 issue_rate = rs6000_issue_rate ();
18716 can_issue_more = issue_rate;
18717
18718 insn = get_next_active_insn (prev_head_insn, tail);
18719 next_insn = get_next_active_insn (insn, tail);
18720
18721 while (insn != NULL_RTX)
18722 {
18723 can_issue_more =
18724 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
18725
18726 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
18727
18728 if (next_insn == NULL_RTX)
18729 break;
18730
18731 if (group_end)
18732 {
18733 /* If the scheduler had marked group termination at this location
18734 (between insn and next_insn), and neither insn nor next_insn will
18735 force group termination, pad the group with nops to force group
18736 termination. */
18737 if (can_issue_more
18738 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
18739 && !insn_terminates_group_p (insn, current_group)
18740 && !insn_terminates_group_p (next_insn, previous_group))
18741 {
18742 if (!is_branch_slot_insn (next_insn))
18743 can_issue_more--;
18744
18745 while (can_issue_more)
18746 {
18747 nop = gen_nop ();
18748 emit_insn_before (nop, next_insn);
18749 can_issue_more--;
18750 }
18751 }
18752
18753 can_issue_more = issue_rate;
18754 group_count++;
18755 }
18756
18757 insn = next_insn;
18758 next_insn = get_next_active_insn (insn, tail);
18759 }
18760
18761 return group_count;
18762 }
18763
18764 /* We're beginning a new block. Initialize data structures as necessary. */
18765
18766 static void
18767 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
18768 int sched_verbose ATTRIBUTE_UNUSED,
18769 int max_ready ATTRIBUTE_UNUSED)
18770 {
18771 last_scheduled_insn = NULL;
18772 load_store_pendulum = 0;
18773 divide_cnt = 0;
18774 vec_pairing = 0;
18775 }
18776
18777 /* The following function is called at the end of scheduling BB.
18778 After reload, it inserts nops at insn group bundling. */
18779
18780 static void
18781 rs6000_sched_finish (FILE *dump, int sched_verbose)
18782 {
18783 int n_groups;
18784
18785 if (sched_verbose)
18786 fprintf (dump, "=== Finishing schedule.\n");
18787
18788 if (reload_completed && rs6000_sched_groups)
18789 {
18790 /* Do not run sched_finish hook when selective scheduling enabled. */
18791 if (sel_sched_p ())
18792 return;
18793
18794 if (rs6000_sched_insert_nops == sched_finish_none)
18795 return;
18796
18797 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
18798 n_groups = pad_groups (dump, sched_verbose,
18799 current_sched_info->prev_head,
18800 current_sched_info->next_tail);
18801 else
18802 n_groups = redefine_groups (dump, sched_verbose,
18803 current_sched_info->prev_head,
18804 current_sched_info->next_tail);
18805
18806 if (sched_verbose >= 6)
18807 {
18808 fprintf (dump, "ngroups = %d\n", n_groups);
18809 print_rtl (dump, current_sched_info->prev_head);
18810 fprintf (dump, "Done finish_sched\n");
18811 }
18812 }
18813 }
18814
18815 struct rs6000_sched_context
18816 {
18817 short cached_can_issue_more;
18818 rtx_insn *last_scheduled_insn;
18819 int load_store_pendulum;
18820 int divide_cnt;
18821 int vec_pairing;
18822 };
18823
18824 typedef struct rs6000_sched_context rs6000_sched_context_def;
18825 typedef rs6000_sched_context_def *rs6000_sched_context_t;
18826
18827 /* Allocate store for new scheduling context. */
18828 static void *
18829 rs6000_alloc_sched_context (void)
18830 {
18831 return xmalloc (sizeof (rs6000_sched_context_def));
18832 }
18833
18834 /* If CLEAN_P is true then initializes _SC with clean data,
18835 and from the global context otherwise. */
18836 static void
18837 rs6000_init_sched_context (void *_sc, bool clean_p)
18838 {
18839 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
18840
18841 if (clean_p)
18842 {
18843 sc->cached_can_issue_more = 0;
18844 sc->last_scheduled_insn = NULL;
18845 sc->load_store_pendulum = 0;
18846 sc->divide_cnt = 0;
18847 sc->vec_pairing = 0;
18848 }
18849 else
18850 {
18851 sc->cached_can_issue_more = cached_can_issue_more;
18852 sc->last_scheduled_insn = last_scheduled_insn;
18853 sc->load_store_pendulum = load_store_pendulum;
18854 sc->divide_cnt = divide_cnt;
18855 sc->vec_pairing = vec_pairing;
18856 }
18857 }
18858
18859 /* Sets the global scheduling context to the one pointed to by _SC. */
18860 static void
18861 rs6000_set_sched_context (void *_sc)
18862 {
18863 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
18864
18865 gcc_assert (sc != NULL);
18866
18867 cached_can_issue_more = sc->cached_can_issue_more;
18868 last_scheduled_insn = sc->last_scheduled_insn;
18869 load_store_pendulum = sc->load_store_pendulum;
18870 divide_cnt = sc->divide_cnt;
18871 vec_pairing = sc->vec_pairing;
18872 }
18873
18874 /* Free _SC. */
18875 static void
18876 rs6000_free_sched_context (void *_sc)
18877 {
18878 gcc_assert (_sc != NULL);
18879
18880 free (_sc);
18881 }
18882
18883 static bool
18884 rs6000_sched_can_speculate_insn (rtx_insn *insn)
18885 {
18886 switch (get_attr_type (insn))
18887 {
18888 case TYPE_DIV:
18889 case TYPE_SDIV:
18890 case TYPE_DDIV:
18891 case TYPE_VECDIV:
18892 case TYPE_SSQRT:
18893 case TYPE_DSQRT:
18894 return false;
18895
18896 default:
18897 return true;
18898 }
18899 }
18900 \f
18901 /* Length in units of the trampoline for entering a nested function. */
18902
18903 int
18904 rs6000_trampoline_size (void)
18905 {
18906 int ret = 0;
18907
18908 switch (DEFAULT_ABI)
18909 {
18910 default:
18911 gcc_unreachable ();
18912
18913 case ABI_AIX:
18914 ret = (TARGET_32BIT) ? 12 : 24;
18915 break;
18916
18917 case ABI_ELFv2:
18918 gcc_assert (!TARGET_32BIT);
18919 ret = 32;
18920 break;
18921
18922 case ABI_DARWIN:
18923 case ABI_V4:
18924 ret = (TARGET_32BIT) ? 40 : 48;
18925 break;
18926 }
18927
18928 return ret;
18929 }
18930
18931 /* Emit RTL insns to initialize the variable parts of a trampoline.
18932 FNADDR is an RTX for the address of the function's pure code.
18933 CXT is an RTX for the static chain value for the function. */
18934
18935 static void
18936 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
18937 {
18938 int regsize = (TARGET_32BIT) ? 4 : 8;
18939 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
18940 rtx ctx_reg = force_reg (Pmode, cxt);
18941 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
18942
18943 switch (DEFAULT_ABI)
18944 {
18945 default:
18946 gcc_unreachable ();
18947
18948 /* Under AIX, just build the 3 word function descriptor */
18949 case ABI_AIX:
18950 {
18951 rtx fnmem, fn_reg, toc_reg;
18952
18953 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
18954 error ("you cannot take the address of a nested function if you use "
18955 "the %qs option", "-mno-pointers-to-nested-functions");
18956
18957 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
18958 fn_reg = gen_reg_rtx (Pmode);
18959 toc_reg = gen_reg_rtx (Pmode);
18960
18961 /* Macro to shorten the code expansions below. */
18962 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
18963
18964 m_tramp = replace_equiv_address (m_tramp, addr);
18965
18966 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
18967 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
18968 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
18969 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
18970 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
18971
18972 # undef MEM_PLUS
18973 }
18974 break;
18975
18976 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
18977 case ABI_ELFv2:
18978 case ABI_DARWIN:
18979 case ABI_V4:
18980 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
18981 LCT_NORMAL, VOIDmode,
18982 addr, Pmode,
18983 GEN_INT (rs6000_trampoline_size ()), SImode,
18984 fnaddr, Pmode,
18985 ctx_reg, Pmode);
18986 break;
18987 }
18988 }
18989
18990 \f
18991 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
18992 identifier as an argument, so the front end shouldn't look it up. */
18993
18994 static bool
18995 rs6000_attribute_takes_identifier_p (const_tree attr_id)
18996 {
18997 return is_attribute_p ("altivec", attr_id);
18998 }
18999
19000 /* Handle the "altivec" attribute. The attribute may have
19001 arguments as follows:
19002
19003 __attribute__((altivec(vector__)))
19004 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
19005 __attribute__((altivec(bool__))) (always followed by 'unsigned')
19006
19007 and may appear more than once (e.g., 'vector bool char') in a
19008 given declaration. */
19009
19010 static tree
19011 rs6000_handle_altivec_attribute (tree *node,
19012 tree name ATTRIBUTE_UNUSED,
19013 tree args,
19014 int flags ATTRIBUTE_UNUSED,
19015 bool *no_add_attrs)
19016 {
19017 tree type = *node, result = NULL_TREE;
19018 machine_mode mode;
19019 int unsigned_p;
19020 char altivec_type
19021 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
19022 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
19023 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
19024 : '?');
19025
19026 while (POINTER_TYPE_P (type)
19027 || TREE_CODE (type) == FUNCTION_TYPE
19028 || TREE_CODE (type) == METHOD_TYPE
19029 || TREE_CODE (type) == ARRAY_TYPE)
19030 type = TREE_TYPE (type);
19031
19032 mode = TYPE_MODE (type);
19033
19034 /* Check for invalid AltiVec type qualifiers. */
19035 if (type == long_double_type_node)
19036 error ("use of %<long double%> in AltiVec types is invalid");
19037 else if (type == boolean_type_node)
19038 error ("use of boolean types in AltiVec types is invalid");
19039 else if (TREE_CODE (type) == COMPLEX_TYPE)
19040 error ("use of %<complex%> in AltiVec types is invalid");
19041 else if (DECIMAL_FLOAT_MODE_P (mode))
19042 error ("use of decimal floating point types in AltiVec types is invalid");
19043 else if (!TARGET_VSX)
19044 {
19045 if (type == long_unsigned_type_node || type == long_integer_type_node)
19046 {
19047 if (TARGET_64BIT)
19048 error ("use of %<long%> in AltiVec types is invalid for "
19049 "64-bit code without %qs", "-mvsx");
19050 else if (rs6000_warn_altivec_long)
19051 warning (0, "use of %<long%> in AltiVec types is deprecated; "
19052 "use %<int%>");
19053 }
19054 else if (type == long_long_unsigned_type_node
19055 || type == long_long_integer_type_node)
19056 error ("use of %<long long%> in AltiVec types is invalid without %qs",
19057 "-mvsx");
19058 else if (type == double_type_node)
19059 error ("use of %<double%> in AltiVec types is invalid without %qs",
19060 "-mvsx");
19061 }
19062
19063 switch (altivec_type)
19064 {
19065 case 'v':
19066 unsigned_p = TYPE_UNSIGNED (type);
19067 switch (mode)
19068 {
19069 case E_TImode:
19070 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
19071 break;
19072 case E_DImode:
19073 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
19074 break;
19075 case E_SImode:
19076 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
19077 break;
19078 case E_HImode:
19079 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
19080 break;
19081 case E_QImode:
19082 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
19083 break;
19084 case E_SFmode: result = V4SF_type_node; break;
19085 case E_DFmode: result = V2DF_type_node; break;
19086 /* If the user says 'vector int bool', we may be handed the 'bool'
19087 attribute _before_ the 'vector' attribute, and so select the
19088 proper type in the 'b' case below. */
19089 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
19090 case E_V2DImode: case E_V2DFmode:
19091 result = type;
19092 default: break;
19093 }
19094 break;
19095 case 'b':
19096 switch (mode)
19097 {
19098 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
19099 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
19100 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
19101 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
19102 default: break;
19103 }
19104 break;
19105 case 'p':
19106 switch (mode)
19107 {
19108 case E_V8HImode: result = pixel_V8HI_type_node;
19109 default: break;
19110 }
19111 default: break;
19112 }
19113
19114 /* Propagate qualifiers attached to the element type
19115 onto the vector type. */
19116 if (result && result != type && TYPE_QUALS (type))
19117 result = build_qualified_type (result, TYPE_QUALS (type));
19118
19119 *no_add_attrs = true; /* No need to hang on to the attribute. */
19120
19121 if (result)
19122 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
19123
19124 return NULL_TREE;
19125 }
19126
19127 /* AltiVec defines five built-in scalar types that serve as vector
19128 elements; we must teach the compiler how to mangle them. The 128-bit
19129 floating point mangling is target-specific as well. */
19130
19131 static const char *
19132 rs6000_mangle_type (const_tree type)
19133 {
19134 type = TYPE_MAIN_VARIANT (type);
19135
19136 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
19137 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
19138 return NULL;
19139
19140 if (type == bool_char_type_node) return "U6__boolc";
19141 if (type == bool_short_type_node) return "U6__bools";
19142 if (type == pixel_type_node) return "u7__pixel";
19143 if (type == bool_int_type_node) return "U6__booli";
19144 if (type == bool_long_long_type_node) return "U6__boolx";
19145
19146 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
19147 return "g";
19148 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
19149 return ieee128_mangling_gcc_8_1 ? "U10__float128" : "u9__ieee128";
19150
19151 /* For all other types, use the default mangling. */
19152 return NULL;
19153 }
19154
19155 /* Handle a "longcall" or "shortcall" attribute; arguments as in
19156 struct attribute_spec.handler. */
19157
19158 static tree
19159 rs6000_handle_longcall_attribute (tree *node, tree name,
19160 tree args ATTRIBUTE_UNUSED,
19161 int flags ATTRIBUTE_UNUSED,
19162 bool *no_add_attrs)
19163 {
19164 if (TREE_CODE (*node) != FUNCTION_TYPE
19165 && TREE_CODE (*node) != FIELD_DECL
19166 && TREE_CODE (*node) != TYPE_DECL)
19167 {
19168 warning (OPT_Wattributes, "%qE attribute only applies to functions",
19169 name);
19170 *no_add_attrs = true;
19171 }
19172
19173 return NULL_TREE;
19174 }
19175
19176 /* Set longcall attributes on all functions declared when
19177 rs6000_default_long_calls is true. */
19178 static void
19179 rs6000_set_default_type_attributes (tree type)
19180 {
19181 if (rs6000_default_long_calls
19182 && (TREE_CODE (type) == FUNCTION_TYPE
19183 || TREE_CODE (type) == METHOD_TYPE))
19184 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
19185 NULL_TREE,
19186 TYPE_ATTRIBUTES (type));
19187
19188 #if TARGET_MACHO
19189 darwin_set_default_type_attributes (type);
19190 #endif
19191 }
19192
19193 /* Return a reference suitable for calling a function with the
19194 longcall attribute. */
19195
19196 static rtx
19197 rs6000_longcall_ref (rtx call_ref, rtx arg)
19198 {
19199 /* System V adds '.' to the internal name, so skip them. */
19200 const char *call_name = XSTR (call_ref, 0);
19201 if (*call_name == '.')
19202 {
19203 while (*call_name == '.')
19204 call_name++;
19205
19206 tree node = get_identifier (call_name);
19207 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
19208 }
19209
19210 if (TARGET_PLTSEQ)
19211 {
19212 rtx base = const0_rtx;
19213 int regno = 12;
19214 if (rs6000_pcrel_p (cfun))
19215 {
19216 rtx reg = gen_rtx_REG (Pmode, regno);
19217 rtx u = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
19218 UNSPEC_PLT_PCREL);
19219 emit_insn (gen_rtx_SET (reg, u));
19220 return reg;
19221 }
19222
19223 if (DEFAULT_ABI == ABI_ELFv2)
19224 base = gen_rtx_REG (Pmode, TOC_REGISTER);
19225 else
19226 {
19227 if (flag_pic)
19228 base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
19229 regno = 11;
19230 }
19231 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
19232 may be used by a function global entry point. For SysV4, r11
19233 is used by __glink_PLTresolve lazy resolver entry. */
19234 rtx reg = gen_rtx_REG (Pmode, regno);
19235 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
19236 UNSPEC_PLT16_HA);
19237 rtx lo = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, reg, call_ref, arg),
19238 UNSPEC_PLT16_LO);
19239 emit_insn (gen_rtx_SET (reg, hi));
19240 emit_insn (gen_rtx_SET (reg, lo));
19241 return reg;
19242 }
19243
19244 return force_reg (Pmode, call_ref);
19245 }
19246 \f
19247 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
19248 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
19249 #endif
19250
19251 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19252 struct attribute_spec.handler. */
19253 static tree
19254 rs6000_handle_struct_attribute (tree *node, tree name,
19255 tree args ATTRIBUTE_UNUSED,
19256 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
19257 {
19258 tree *type = NULL;
19259 if (DECL_P (*node))
19260 {
19261 if (TREE_CODE (*node) == TYPE_DECL)
19262 type = &TREE_TYPE (*node);
19263 }
19264 else
19265 type = node;
19266
19267 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
19268 || TREE_CODE (*type) == UNION_TYPE)))
19269 {
19270 warning (OPT_Wattributes, "%qE attribute ignored", name);
19271 *no_add_attrs = true;
19272 }
19273
19274 else if ((is_attribute_p ("ms_struct", name)
19275 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
19276 || ((is_attribute_p ("gcc_struct", name)
19277 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
19278 {
19279 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
19280 name);
19281 *no_add_attrs = true;
19282 }
19283
19284 return NULL_TREE;
19285 }
19286
19287 static bool
19288 rs6000_ms_bitfield_layout_p (const_tree record_type)
19289 {
19290 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
19291 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
19292 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
19293 }
19294 \f
19295 #ifdef USING_ELFOS_H
19296
19297 /* A get_unnamed_section callback, used for switching to toc_section. */
19298
19299 static void
19300 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
19301 {
19302 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19303 && TARGET_MINIMAL_TOC)
19304 {
19305 if (!toc_initialized)
19306 {
19307 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
19308 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19309 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
19310 fprintf (asm_out_file, "\t.tc ");
19311 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
19312 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19313 fprintf (asm_out_file, "\n");
19314
19315 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19316 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19317 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19318 fprintf (asm_out_file, " = .+32768\n");
19319 toc_initialized = 1;
19320 }
19321 else
19322 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19323 }
19324 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19325 {
19326 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
19327 if (!toc_initialized)
19328 {
19329 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19330 toc_initialized = 1;
19331 }
19332 }
19333 else
19334 {
19335 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19336 if (!toc_initialized)
19337 {
19338 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19339 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19340 fprintf (asm_out_file, " = .+32768\n");
19341 toc_initialized = 1;
19342 }
19343 }
19344 }
19345
19346 /* Implement TARGET_ASM_INIT_SECTIONS. */
19347
19348 static void
19349 rs6000_elf_asm_init_sections (void)
19350 {
19351 toc_section
19352 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
19353
19354 sdata2_section
19355 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
19356 SDATA2_SECTION_ASM_OP);
19357 }
19358
19359 /* Implement TARGET_SELECT_RTX_SECTION. */
19360
19361 static section *
19362 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
19363 unsigned HOST_WIDE_INT align)
19364 {
19365 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
19366 return toc_section;
19367 else
19368 return default_elf_select_rtx_section (mode, x, align);
19369 }
19370 \f
19371 /* For a SYMBOL_REF, set generic flags and then perform some
19372 target-specific processing.
19373
19374 When the AIX ABI is requested on a non-AIX system, replace the
19375 function name with the real name (with a leading .) rather than the
19376 function descriptor name. This saves a lot of overriding code to
19377 read the prefixes. */
19378
19379 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
19380 static void
19381 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
19382 {
19383 default_encode_section_info (decl, rtl, first);
19384
19385 if (first
19386 && TREE_CODE (decl) == FUNCTION_DECL
19387 && !TARGET_AIX
19388 && DEFAULT_ABI == ABI_AIX)
19389 {
19390 rtx sym_ref = XEXP (rtl, 0);
19391 size_t len = strlen (XSTR (sym_ref, 0));
19392 char *str = XALLOCAVEC (char, len + 2);
19393 str[0] = '.';
19394 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
19395 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
19396 }
19397 }
19398
19399 static inline bool
19400 compare_section_name (const char *section, const char *templ)
19401 {
19402 int len;
19403
19404 len = strlen (templ);
19405 return (strncmp (section, templ, len) == 0
19406 && (section[len] == 0 || section[len] == '.'));
19407 }
19408
19409 bool
19410 rs6000_elf_in_small_data_p (const_tree decl)
19411 {
19412 if (rs6000_sdata == SDATA_NONE)
19413 return false;
19414
19415 /* We want to merge strings, so we never consider them small data. */
19416 if (TREE_CODE (decl) == STRING_CST)
19417 return false;
19418
19419 /* Functions are never in the small data area. */
19420 if (TREE_CODE (decl) == FUNCTION_DECL)
19421 return false;
19422
19423 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
19424 {
19425 const char *section = DECL_SECTION_NAME (decl);
19426 if (compare_section_name (section, ".sdata")
19427 || compare_section_name (section, ".sdata2")
19428 || compare_section_name (section, ".gnu.linkonce.s")
19429 || compare_section_name (section, ".sbss")
19430 || compare_section_name (section, ".sbss2")
19431 || compare_section_name (section, ".gnu.linkonce.sb")
19432 || strcmp (section, ".PPC.EMB.sdata0") == 0
19433 || strcmp (section, ".PPC.EMB.sbss0") == 0)
19434 return true;
19435 }
19436 else
19437 {
19438 /* If we are told not to put readonly data in sdata, then don't. */
19439 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
19440 && !rs6000_readonly_in_sdata)
19441 return false;
19442
19443 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
19444
19445 if (size > 0
19446 && size <= g_switch_value
19447 /* If it's not public, and we're not going to reference it there,
19448 there's no need to put it in the small data section. */
19449 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
19450 return true;
19451 }
19452
19453 return false;
19454 }
19455
19456 #endif /* USING_ELFOS_H */
19457 \f
19458 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
19459
19460 static bool
19461 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
19462 {
19463 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
19464 }
19465
19466 /* Do not place thread-local symbols refs in the object blocks. */
19467
19468 static bool
19469 rs6000_use_blocks_for_decl_p (const_tree decl)
19470 {
19471 return !DECL_THREAD_LOCAL_P (decl);
19472 }
19473 \f
19474 /* Return a REG that occurs in ADDR with coefficient 1.
19475 ADDR can be effectively incremented by incrementing REG.
19476
19477 r0 is special and we must not select it as an address
19478 register by this routine since our caller will try to
19479 increment the returned register via an "la" instruction. */
19480
19481 rtx
19482 find_addr_reg (rtx addr)
19483 {
19484 while (GET_CODE (addr) == PLUS)
19485 {
19486 if (REG_P (XEXP (addr, 0))
19487 && REGNO (XEXP (addr, 0)) != 0)
19488 addr = XEXP (addr, 0);
19489 else if (REG_P (XEXP (addr, 1))
19490 && REGNO (XEXP (addr, 1)) != 0)
19491 addr = XEXP (addr, 1);
19492 else if (CONSTANT_P (XEXP (addr, 0)))
19493 addr = XEXP (addr, 1);
19494 else if (CONSTANT_P (XEXP (addr, 1)))
19495 addr = XEXP (addr, 0);
19496 else
19497 gcc_unreachable ();
19498 }
19499 gcc_assert (REG_P (addr) && REGNO (addr) != 0);
19500 return addr;
19501 }
19502
19503 void
19504 rs6000_fatal_bad_address (rtx op)
19505 {
19506 fatal_insn ("bad address", op);
19507 }
19508
19509 #if TARGET_MACHO
19510
19511 vec<branch_island, va_gc> *branch_islands;
19512
19513 /* Remember to generate a branch island for far calls to the given
19514 function. */
19515
19516 static void
19517 add_compiler_branch_island (tree label_name, tree function_name,
19518 int line_number)
19519 {
19520 branch_island bi = {function_name, label_name, line_number};
19521 vec_safe_push (branch_islands, bi);
19522 }
19523
19524 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
19525 already there or not. */
19526
19527 static int
19528 no_previous_def (tree function_name)
19529 {
19530 branch_island *bi;
19531 unsigned ix;
19532
19533 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
19534 if (function_name == bi->function_name)
19535 return 0;
19536 return 1;
19537 }
19538
19539 /* GET_PREV_LABEL gets the label name from the previous definition of
19540 the function. */
19541
19542 static tree
19543 get_prev_label (tree function_name)
19544 {
19545 branch_island *bi;
19546 unsigned ix;
19547
19548 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
19549 if (function_name == bi->function_name)
19550 return bi->label_name;
19551 return NULL_TREE;
19552 }
19553
19554 /* Generate external symbol indirection stubs (PIC and non-PIC). */
19555
19556 void
19557 machopic_output_stub (FILE *file, const char *symb, const char *stub)
19558 {
19559 unsigned int length;
19560 char *symbol_name, *lazy_ptr_name;
19561 char *local_label_0;
19562 static unsigned label = 0;
19563
19564 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19565 symb = (*targetm.strip_name_encoding) (symb);
19566
19567 length = strlen (symb);
19568 symbol_name = XALLOCAVEC (char, length + 32);
19569 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19570
19571 lazy_ptr_name = XALLOCAVEC (char, length + 32);
19572 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
19573
19574 if (MACHOPIC_PURE)
19575 {
19576 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
19577 fprintf (file, "\t.align 5\n");
19578
19579 fprintf (file, "%s:\n", stub);
19580 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19581
19582 label++;
19583 local_label_0 = XALLOCAVEC (char, 16);
19584 sprintf (local_label_0, "L%u$spb", label);
19585
19586 fprintf (file, "\tmflr r0\n");
19587 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
19588 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
19589 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
19590 lazy_ptr_name, local_label_0);
19591 fprintf (file, "\tmtlr r0\n");
19592 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
19593 (TARGET_64BIT ? "ldu" : "lwzu"),
19594 lazy_ptr_name, local_label_0);
19595 fprintf (file, "\tmtctr r12\n");
19596 fprintf (file, "\tbctr\n");
19597 }
19598 else /* mdynamic-no-pic or mkernel. */
19599 {
19600 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
19601 fprintf (file, "\t.align 4\n");
19602
19603 fprintf (file, "%s:\n", stub);
19604 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19605
19606 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
19607 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
19608 (TARGET_64BIT ? "ldu" : "lwzu"),
19609 lazy_ptr_name);
19610 fprintf (file, "\tmtctr r12\n");
19611 fprintf (file, "\tbctr\n");
19612 }
19613
19614 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
19615 fprintf (file, "%s:\n", lazy_ptr_name);
19616 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19617 fprintf (file, "%sdyld_stub_binding_helper\n",
19618 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
19619 }
19620
19621 /* Legitimize PIC addresses. If the address is already
19622 position-independent, we return ORIG. Newly generated
19623 position-independent addresses go into a reg. This is REG if non
19624 zero, otherwise we allocate register(s) as necessary. */
19625
19626 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
19627
19628 rtx
19629 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
19630 rtx reg)
19631 {
19632 rtx base, offset;
19633
19634 if (reg == NULL && !reload_completed)
19635 reg = gen_reg_rtx (Pmode);
19636
19637 if (GET_CODE (orig) == CONST)
19638 {
19639 rtx reg_temp;
19640
19641 if (GET_CODE (XEXP (orig, 0)) == PLUS
19642 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
19643 return orig;
19644
19645 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
19646
19647 /* Use a different reg for the intermediate value, as
19648 it will be marked UNCHANGING. */
19649 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
19650 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
19651 Pmode, reg_temp);
19652 offset =
19653 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
19654 Pmode, reg);
19655
19656 if (CONST_INT_P (offset))
19657 {
19658 if (SMALL_INT (offset))
19659 return plus_constant (Pmode, base, INTVAL (offset));
19660 else if (!reload_completed)
19661 offset = force_reg (Pmode, offset);
19662 else
19663 {
19664 rtx mem = force_const_mem (Pmode, orig);
19665 return machopic_legitimize_pic_address (mem, Pmode, reg);
19666 }
19667 }
19668 return gen_rtx_PLUS (Pmode, base, offset);
19669 }
19670
19671 /* Fall back on generic machopic code. */
19672 return machopic_legitimize_pic_address (orig, mode, reg);
19673 }
19674
19675 /* Output a .machine directive for the Darwin assembler, and call
19676 the generic start_file routine. */
19677
19678 static void
19679 rs6000_darwin_file_start (void)
19680 {
19681 static const struct
19682 {
19683 const char *arg;
19684 const char *name;
19685 HOST_WIDE_INT if_set;
19686 } mapping[] = {
19687 { "ppc64", "ppc64", MASK_64BIT },
19688 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
19689 { "power4", "ppc970", 0 },
19690 { "G5", "ppc970", 0 },
19691 { "7450", "ppc7450", 0 },
19692 { "7400", "ppc7400", MASK_ALTIVEC },
19693 { "G4", "ppc7400", 0 },
19694 { "750", "ppc750", 0 },
19695 { "740", "ppc750", 0 },
19696 { "G3", "ppc750", 0 },
19697 { "604e", "ppc604e", 0 },
19698 { "604", "ppc604", 0 },
19699 { "603e", "ppc603", 0 },
19700 { "603", "ppc603", 0 },
19701 { "601", "ppc601", 0 },
19702 { NULL, "ppc", 0 } };
19703 const char *cpu_id = "";
19704 size_t i;
19705
19706 rs6000_file_start ();
19707 darwin_file_start ();
19708
19709 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
19710
19711 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
19712 cpu_id = rs6000_default_cpu;
19713
19714 if (global_options_set.x_rs6000_cpu_index)
19715 cpu_id = processor_target_table[rs6000_cpu_index].name;
19716
19717 /* Look through the mapping array. Pick the first name that either
19718 matches the argument, has a bit set in IF_SET that is also set
19719 in the target flags, or has a NULL name. */
19720
19721 i = 0;
19722 while (mapping[i].arg != NULL
19723 && strcmp (mapping[i].arg, cpu_id) != 0
19724 && (mapping[i].if_set & rs6000_isa_flags) == 0)
19725 i++;
19726
19727 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
19728 }
19729
19730 #endif /* TARGET_MACHO */
19731
19732 #if TARGET_ELF
19733 static int
19734 rs6000_elf_reloc_rw_mask (void)
19735 {
19736 if (flag_pic)
19737 return 3;
19738 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19739 return 2;
19740 else
19741 return 0;
19742 }
19743
19744 /* Record an element in the table of global constructors. SYMBOL is
19745 a SYMBOL_REF of the function to be called; PRIORITY is a number
19746 between 0 and MAX_INIT_PRIORITY.
19747
19748 This differs from default_named_section_asm_out_constructor in
19749 that we have special handling for -mrelocatable. */
19750
19751 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
19752 static void
19753 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
19754 {
19755 const char *section = ".ctors";
19756 char buf[18];
19757
19758 if (priority != DEFAULT_INIT_PRIORITY)
19759 {
19760 sprintf (buf, ".ctors.%.5u",
19761 /* Invert the numbering so the linker puts us in the proper
19762 order; constructors are run from right to left, and the
19763 linker sorts in increasing order. */
19764 MAX_INIT_PRIORITY - priority);
19765 section = buf;
19766 }
19767
19768 switch_to_section (get_section (section, SECTION_WRITE, NULL));
19769 assemble_align (POINTER_SIZE);
19770
19771 if (DEFAULT_ABI == ABI_V4
19772 && (TARGET_RELOCATABLE || flag_pic > 1))
19773 {
19774 fputs ("\t.long (", asm_out_file);
19775 output_addr_const (asm_out_file, symbol);
19776 fputs (")@fixup\n", asm_out_file);
19777 }
19778 else
19779 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
19780 }
19781
19782 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
19783 static void
19784 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
19785 {
19786 const char *section = ".dtors";
19787 char buf[18];
19788
19789 if (priority != DEFAULT_INIT_PRIORITY)
19790 {
19791 sprintf (buf, ".dtors.%.5u",
19792 /* Invert the numbering so the linker puts us in the proper
19793 order; constructors are run from right to left, and the
19794 linker sorts in increasing order. */
19795 MAX_INIT_PRIORITY - priority);
19796 section = buf;
19797 }
19798
19799 switch_to_section (get_section (section, SECTION_WRITE, NULL));
19800 assemble_align (POINTER_SIZE);
19801
19802 if (DEFAULT_ABI == ABI_V4
19803 && (TARGET_RELOCATABLE || flag_pic > 1))
19804 {
19805 fputs ("\t.long (", asm_out_file);
19806 output_addr_const (asm_out_file, symbol);
19807 fputs (")@fixup\n", asm_out_file);
19808 }
19809 else
19810 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
19811 }
19812
19813 void
19814 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
19815 {
19816 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
19817 {
19818 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
19819 ASM_OUTPUT_LABEL (file, name);
19820 fputs (DOUBLE_INT_ASM_OP, file);
19821 rs6000_output_function_entry (file, name);
19822 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
19823 if (DOT_SYMBOLS)
19824 {
19825 fputs ("\t.size\t", file);
19826 assemble_name (file, name);
19827 fputs (",24\n\t.type\t.", file);
19828 assemble_name (file, name);
19829 fputs (",@function\n", file);
19830 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
19831 {
19832 fputs ("\t.globl\t.", file);
19833 assemble_name (file, name);
19834 putc ('\n', file);
19835 }
19836 }
19837 else
19838 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19839 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19840 rs6000_output_function_entry (file, name);
19841 fputs (":\n", file);
19842 return;
19843 }
19844
19845 int uses_toc;
19846 if (DEFAULT_ABI == ABI_V4
19847 && (TARGET_RELOCATABLE || flag_pic > 1)
19848 && !TARGET_SECURE_PLT
19849 && (!constant_pool_empty_p () || crtl->profile)
19850 && (uses_toc = uses_TOC ()))
19851 {
19852 char buf[256];
19853
19854 if (uses_toc == 2)
19855 switch_to_other_text_partition ();
19856 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
19857
19858 fprintf (file, "\t.long ");
19859 assemble_name (file, toc_label_name);
19860 need_toc_init = 1;
19861 putc ('-', file);
19862 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
19863 assemble_name (file, buf);
19864 putc ('\n', file);
19865 if (uses_toc == 2)
19866 switch_to_other_text_partition ();
19867 }
19868
19869 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19870 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19871
19872 if (TARGET_CMODEL == CMODEL_LARGE
19873 && rs6000_global_entry_point_prologue_needed_p ())
19874 {
19875 char buf[256];
19876
19877 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
19878
19879 fprintf (file, "\t.quad .TOC.-");
19880 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
19881 assemble_name (file, buf);
19882 putc ('\n', file);
19883 }
19884
19885 if (DEFAULT_ABI == ABI_AIX)
19886 {
19887 const char *desc_name, *orig_name;
19888
19889 orig_name = (*targetm.strip_name_encoding) (name);
19890 desc_name = orig_name;
19891 while (*desc_name == '.')
19892 desc_name++;
19893
19894 if (TREE_PUBLIC (decl))
19895 fprintf (file, "\t.globl %s\n", desc_name);
19896
19897 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19898 fprintf (file, "%s:\n", desc_name);
19899 fprintf (file, "\t.long %s\n", orig_name);
19900 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
19901 fputs ("\t.long 0\n", file);
19902 fprintf (file, "\t.previous\n");
19903 }
19904 ASM_OUTPUT_LABEL (file, name);
19905 }
19906
19907 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
19908 static void
19909 rs6000_elf_file_end (void)
19910 {
19911 #ifdef HAVE_AS_GNU_ATTRIBUTE
19912 /* ??? The value emitted depends on options active at file end.
19913 Assume anyone using #pragma or attributes that might change
19914 options knows what they are doing. */
19915 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
19916 && rs6000_passes_float)
19917 {
19918 int fp;
19919
19920 if (TARGET_HARD_FLOAT)
19921 fp = 1;
19922 else
19923 fp = 2;
19924 if (rs6000_passes_long_double)
19925 {
19926 if (!TARGET_LONG_DOUBLE_128)
19927 fp |= 2 * 4;
19928 else if (TARGET_IEEEQUAD)
19929 fp |= 3 * 4;
19930 else
19931 fp |= 1 * 4;
19932 }
19933 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
19934 }
19935 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
19936 {
19937 if (rs6000_passes_vector)
19938 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
19939 (TARGET_ALTIVEC_ABI ? 2 : 1));
19940 if (rs6000_returns_struct)
19941 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
19942 aix_struct_return ? 2 : 1);
19943 }
19944 #endif
19945 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
19946 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
19947 file_end_indicate_exec_stack ();
19948 #endif
19949
19950 if (flag_split_stack)
19951 file_end_indicate_split_stack ();
19952
19953 if (cpu_builtin_p)
19954 {
19955 /* We have expanded a CPU builtin, so we need to emit a reference to
19956 the special symbol that LIBC uses to declare it supports the
19957 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
19958 switch_to_section (data_section);
19959 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
19960 fprintf (asm_out_file, "\t%s %s\n",
19961 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
19962 }
19963 }
19964 #endif
19965
19966 #if TARGET_XCOFF
19967
19968 #ifndef HAVE_XCOFF_DWARF_EXTRAS
19969 #define HAVE_XCOFF_DWARF_EXTRAS 0
19970 #endif
19971
19972 static enum unwind_info_type
19973 rs6000_xcoff_debug_unwind_info (void)
19974 {
19975 return UI_NONE;
19976 }
19977
19978 static void
19979 rs6000_xcoff_asm_output_anchor (rtx symbol)
19980 {
19981 char buffer[100];
19982
19983 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
19984 SYMBOL_REF_BLOCK_OFFSET (symbol));
19985 fprintf (asm_out_file, "%s", SET_ASM_OP);
19986 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
19987 fprintf (asm_out_file, ",");
19988 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
19989 fprintf (asm_out_file, "\n");
19990 }
19991
19992 static void
19993 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
19994 {
19995 fputs (GLOBAL_ASM_OP, stream);
19996 RS6000_OUTPUT_BASENAME (stream, name);
19997 putc ('\n', stream);
19998 }
19999
20000 /* A get_unnamed_decl callback, used for read-only sections. PTR
20001 points to the section string variable. */
20002
20003 static void
20004 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
20005 {
20006 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
20007 *(const char *const *) directive,
20008 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20009 }
20010
20011 /* Likewise for read-write sections. */
20012
20013 static void
20014 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
20015 {
20016 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
20017 *(const char *const *) directive,
20018 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20019 }
20020
20021 static void
20022 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
20023 {
20024 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
20025 *(const char *const *) directive,
20026 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20027 }
20028
20029 /* A get_unnamed_section callback, used for switching to toc_section. */
20030
20031 static void
20032 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
20033 {
20034 if (TARGET_MINIMAL_TOC)
20035 {
20036 /* toc_section is always selected at least once from
20037 rs6000_xcoff_file_start, so this is guaranteed to
20038 always be defined once and only once in each file. */
20039 if (!toc_initialized)
20040 {
20041 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
20042 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
20043 toc_initialized = 1;
20044 }
20045 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
20046 (TARGET_32BIT ? "" : ",3"));
20047 }
20048 else
20049 fputs ("\t.toc\n", asm_out_file);
20050 }
20051
20052 /* Implement TARGET_ASM_INIT_SECTIONS. */
20053
20054 static void
20055 rs6000_xcoff_asm_init_sections (void)
20056 {
20057 read_only_data_section
20058 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
20059 &xcoff_read_only_section_name);
20060
20061 private_data_section
20062 = get_unnamed_section (SECTION_WRITE,
20063 rs6000_xcoff_output_readwrite_section_asm_op,
20064 &xcoff_private_data_section_name);
20065
20066 read_only_private_data_section
20067 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
20068 &xcoff_private_rodata_section_name);
20069
20070 tls_data_section
20071 = get_unnamed_section (SECTION_TLS,
20072 rs6000_xcoff_output_tls_section_asm_op,
20073 &xcoff_tls_data_section_name);
20074
20075 tls_private_data_section
20076 = get_unnamed_section (SECTION_TLS,
20077 rs6000_xcoff_output_tls_section_asm_op,
20078 &xcoff_private_data_section_name);
20079
20080 toc_section
20081 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
20082
20083 readonly_data_section = read_only_data_section;
20084 }
20085
20086 static int
20087 rs6000_xcoff_reloc_rw_mask (void)
20088 {
20089 return 3;
20090 }
20091
20092 static void
20093 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
20094 tree decl ATTRIBUTE_UNUSED)
20095 {
20096 int smclass;
20097 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
20098
20099 if (flags & SECTION_EXCLUDE)
20100 smclass = 4;
20101 else if (flags & SECTION_DEBUG)
20102 {
20103 fprintf (asm_out_file, "\t.dwsect %s\n", name);
20104 return;
20105 }
20106 else if (flags & SECTION_CODE)
20107 smclass = 0;
20108 else if (flags & SECTION_TLS)
20109 smclass = 3;
20110 else if (flags & SECTION_WRITE)
20111 smclass = 2;
20112 else
20113 smclass = 1;
20114
20115 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
20116 (flags & SECTION_CODE) ? "." : "",
20117 name, suffix[smclass], flags & SECTION_ENTSIZE);
20118 }
20119
20120 #define IN_NAMED_SECTION(DECL) \
20121 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
20122 && DECL_SECTION_NAME (DECL) != NULL)
20123
20124 static section *
20125 rs6000_xcoff_select_section (tree decl, int reloc,
20126 unsigned HOST_WIDE_INT align)
20127 {
20128 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
20129 named section. */
20130 if (align > BIGGEST_ALIGNMENT)
20131 {
20132 resolve_unique_section (decl, reloc, true);
20133 if (IN_NAMED_SECTION (decl))
20134 return get_named_section (decl, NULL, reloc);
20135 }
20136
20137 if (decl_readonly_section (decl, reloc))
20138 {
20139 if (TREE_PUBLIC (decl))
20140 return read_only_data_section;
20141 else
20142 return read_only_private_data_section;
20143 }
20144 else
20145 {
20146 #if HAVE_AS_TLS
20147 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
20148 {
20149 if (TREE_PUBLIC (decl))
20150 return tls_data_section;
20151 else if (bss_initializer_p (decl))
20152 {
20153 /* Convert to COMMON to emit in BSS. */
20154 DECL_COMMON (decl) = 1;
20155 return tls_comm_section;
20156 }
20157 else
20158 return tls_private_data_section;
20159 }
20160 else
20161 #endif
20162 if (TREE_PUBLIC (decl))
20163 return data_section;
20164 else
20165 return private_data_section;
20166 }
20167 }
20168
20169 static void
20170 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
20171 {
20172 const char *name;
20173
20174 /* Use select_section for private data and uninitialized data with
20175 alignment <= BIGGEST_ALIGNMENT. */
20176 if (!TREE_PUBLIC (decl)
20177 || DECL_COMMON (decl)
20178 || (DECL_INITIAL (decl) == NULL_TREE
20179 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
20180 || DECL_INITIAL (decl) == error_mark_node
20181 || (flag_zero_initialized_in_bss
20182 && initializer_zerop (DECL_INITIAL (decl))))
20183 return;
20184
20185 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
20186 name = (*targetm.strip_name_encoding) (name);
20187 set_decl_section_name (decl, name);
20188 }
20189
20190 /* Select section for constant in constant pool.
20191
20192 On RS/6000, all constants are in the private read-only data area.
20193 However, if this is being placed in the TOC it must be output as a
20194 toc entry. */
20195
20196 static section *
20197 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
20198 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
20199 {
20200 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
20201 return toc_section;
20202 else
20203 return read_only_private_data_section;
20204 }
20205
20206 /* Remove any trailing [DS] or the like from the symbol name. */
20207
20208 static const char *
20209 rs6000_xcoff_strip_name_encoding (const char *name)
20210 {
20211 size_t len;
20212 if (*name == '*')
20213 name++;
20214 len = strlen (name);
20215 if (name[len - 1] == ']')
20216 return ggc_alloc_string (name, len - 4);
20217 else
20218 return name;
20219 }
20220
20221 /* Section attributes. AIX is always PIC. */
20222
20223 static unsigned int
20224 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
20225 {
20226 unsigned int align;
20227 unsigned int flags = default_section_type_flags (decl, name, reloc);
20228
20229 /* Align to at least UNIT size. */
20230 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
20231 align = MIN_UNITS_PER_WORD;
20232 else
20233 /* Increase alignment of large objects if not already stricter. */
20234 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
20235 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
20236 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
20237
20238 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
20239 }
20240
20241 /* Output at beginning of assembler file.
20242
20243 Initialize the section names for the RS/6000 at this point.
20244
20245 Specify filename, including full path, to assembler.
20246
20247 We want to go into the TOC section so at least one .toc will be emitted.
20248 Also, in order to output proper .bs/.es pairs, we need at least one static
20249 [RW] section emitted.
20250
20251 Finally, declare mcount when profiling to make the assembler happy. */
20252
20253 static void
20254 rs6000_xcoff_file_start (void)
20255 {
20256 rs6000_gen_section_name (&xcoff_bss_section_name,
20257 main_input_filename, ".bss_");
20258 rs6000_gen_section_name (&xcoff_private_data_section_name,
20259 main_input_filename, ".rw_");
20260 rs6000_gen_section_name (&xcoff_private_rodata_section_name,
20261 main_input_filename, ".rop_");
20262 rs6000_gen_section_name (&xcoff_read_only_section_name,
20263 main_input_filename, ".ro_");
20264 rs6000_gen_section_name (&xcoff_tls_data_section_name,
20265 main_input_filename, ".tls_");
20266 rs6000_gen_section_name (&xcoff_tbss_section_name,
20267 main_input_filename, ".tbss_[UL]");
20268
20269 fputs ("\t.file\t", asm_out_file);
20270 output_quoted_string (asm_out_file, main_input_filename);
20271 fputc ('\n', asm_out_file);
20272 if (write_symbols != NO_DEBUG)
20273 switch_to_section (private_data_section);
20274 switch_to_section (toc_section);
20275 switch_to_section (text_section);
20276 if (profile_flag)
20277 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
20278 rs6000_file_start ();
20279 }
20280
20281 /* Output at end of assembler file.
20282 On the RS/6000, referencing data should automatically pull in text. */
20283
20284 static void
20285 rs6000_xcoff_file_end (void)
20286 {
20287 switch_to_section (text_section);
20288 fputs ("_section_.text:\n", asm_out_file);
20289 switch_to_section (data_section);
20290 fputs (TARGET_32BIT
20291 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
20292 asm_out_file);
20293 }
20294
20295 struct declare_alias_data
20296 {
20297 FILE *file;
20298 bool function_descriptor;
20299 };
20300
20301 /* Declare alias N. A helper function for for_node_and_aliases. */
20302
20303 static bool
20304 rs6000_declare_alias (struct symtab_node *n, void *d)
20305 {
20306 struct declare_alias_data *data = (struct declare_alias_data *)d;
20307 /* Main symbol is output specially, because varasm machinery does part of
20308 the job for us - we do not need to declare .globl/lglobs and such. */
20309 if (!n->alias || n->weakref)
20310 return false;
20311
20312 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
20313 return false;
20314
20315 /* Prevent assemble_alias from trying to use .set pseudo operation
20316 that does not behave as expected by the middle-end. */
20317 TREE_ASM_WRITTEN (n->decl) = true;
20318
20319 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
20320 char *buffer = (char *) alloca (strlen (name) + 2);
20321 char *p;
20322 int dollar_inside = 0;
20323
20324 strcpy (buffer, name);
20325 p = strchr (buffer, '$');
20326 while (p) {
20327 *p = '_';
20328 dollar_inside++;
20329 p = strchr (p + 1, '$');
20330 }
20331 if (TREE_PUBLIC (n->decl))
20332 {
20333 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
20334 {
20335 if (dollar_inside) {
20336 if (data->function_descriptor)
20337 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
20338 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
20339 }
20340 if (data->function_descriptor)
20341 {
20342 fputs ("\t.globl .", data->file);
20343 RS6000_OUTPUT_BASENAME (data->file, buffer);
20344 putc ('\n', data->file);
20345 }
20346 fputs ("\t.globl ", data->file);
20347 RS6000_OUTPUT_BASENAME (data->file, buffer);
20348 putc ('\n', data->file);
20349 }
20350 #ifdef ASM_WEAKEN_DECL
20351 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
20352 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
20353 #endif
20354 }
20355 else
20356 {
20357 if (dollar_inside)
20358 {
20359 if (data->function_descriptor)
20360 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
20361 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
20362 }
20363 if (data->function_descriptor)
20364 {
20365 fputs ("\t.lglobl .", data->file);
20366 RS6000_OUTPUT_BASENAME (data->file, buffer);
20367 putc ('\n', data->file);
20368 }
20369 fputs ("\t.lglobl ", data->file);
20370 RS6000_OUTPUT_BASENAME (data->file, buffer);
20371 putc ('\n', data->file);
20372 }
20373 if (data->function_descriptor)
20374 fputs (".", data->file);
20375 RS6000_OUTPUT_BASENAME (data->file, buffer);
20376 fputs (":\n", data->file);
20377 return false;
20378 }
20379
20380
20381 #ifdef HAVE_GAS_HIDDEN
20382 /* Helper function to calculate visibility of a DECL
20383 and return the value as a const string. */
20384
20385 static const char *
20386 rs6000_xcoff_visibility (tree decl)
20387 {
20388 static const char * const visibility_types[] = {
20389 "", ",protected", ",hidden", ",internal"
20390 };
20391
20392 enum symbol_visibility vis = DECL_VISIBILITY (decl);
20393 return visibility_types[vis];
20394 }
20395 #endif
20396
20397
20398 /* This macro produces the initial definition of a function name.
20399 On the RS/6000, we need to place an extra '.' in the function name and
20400 output the function descriptor.
20401 Dollar signs are converted to underscores.
20402
20403 The csect for the function will have already been created when
20404 text_section was selected. We do have to go back to that csect, however.
20405
20406 The third and fourth parameters to the .function pseudo-op (16 and 044)
20407 are placeholders which no longer have any use.
20408
20409 Because AIX assembler's .set command has unexpected semantics, we output
20410 all aliases as alternative labels in front of the definition. */
20411
20412 void
20413 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
20414 {
20415 char *buffer = (char *) alloca (strlen (name) + 1);
20416 char *p;
20417 int dollar_inside = 0;
20418 struct declare_alias_data data = {file, false};
20419
20420 strcpy (buffer, name);
20421 p = strchr (buffer, '$');
20422 while (p) {
20423 *p = '_';
20424 dollar_inside++;
20425 p = strchr (p + 1, '$');
20426 }
20427 if (TREE_PUBLIC (decl))
20428 {
20429 if (!RS6000_WEAK || !DECL_WEAK (decl))
20430 {
20431 if (dollar_inside) {
20432 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
20433 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
20434 }
20435 fputs ("\t.globl .", file);
20436 RS6000_OUTPUT_BASENAME (file, buffer);
20437 #ifdef HAVE_GAS_HIDDEN
20438 fputs (rs6000_xcoff_visibility (decl), file);
20439 #endif
20440 putc ('\n', file);
20441 }
20442 }
20443 else
20444 {
20445 if (dollar_inside) {
20446 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
20447 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
20448 }
20449 fputs ("\t.lglobl .", file);
20450 RS6000_OUTPUT_BASENAME (file, buffer);
20451 putc ('\n', file);
20452 }
20453 fputs ("\t.csect ", file);
20454 RS6000_OUTPUT_BASENAME (file, buffer);
20455 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
20456 RS6000_OUTPUT_BASENAME (file, buffer);
20457 fputs (":\n", file);
20458 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
20459 &data, true);
20460 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
20461 RS6000_OUTPUT_BASENAME (file, buffer);
20462 fputs (", TOC[tc0], 0\n", file);
20463 in_section = NULL;
20464 switch_to_section (function_section (decl));
20465 putc ('.', file);
20466 RS6000_OUTPUT_BASENAME (file, buffer);
20467 fputs (":\n", file);
20468 data.function_descriptor = true;
20469 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
20470 &data, true);
20471 if (!DECL_IGNORED_P (decl))
20472 {
20473 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
20474 xcoffout_declare_function (file, decl, buffer);
20475 else if (write_symbols == DWARF2_DEBUG)
20476 {
20477 name = (*targetm.strip_name_encoding) (name);
20478 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
20479 }
20480 }
20481 return;
20482 }
20483
20484
20485 /* Output assembly language to globalize a symbol from a DECL,
20486 possibly with visibility. */
20487
20488 void
20489 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
20490 {
20491 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
20492 fputs (GLOBAL_ASM_OP, stream);
20493 RS6000_OUTPUT_BASENAME (stream, name);
20494 #ifdef HAVE_GAS_HIDDEN
20495 fputs (rs6000_xcoff_visibility (decl), stream);
20496 #endif
20497 putc ('\n', stream);
20498 }
20499
20500 /* Output assembly language to define a symbol as COMMON from a DECL,
20501 possibly with visibility. */
20502
20503 void
20504 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
20505 tree decl ATTRIBUTE_UNUSED,
20506 const char *name,
20507 unsigned HOST_WIDE_INT size,
20508 unsigned HOST_WIDE_INT align)
20509 {
20510 unsigned HOST_WIDE_INT align2 = 2;
20511
20512 if (align > 32)
20513 align2 = floor_log2 (align / BITS_PER_UNIT);
20514 else if (size > 4)
20515 align2 = 3;
20516
20517 fputs (COMMON_ASM_OP, stream);
20518 RS6000_OUTPUT_BASENAME (stream, name);
20519
20520 fprintf (stream,
20521 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
20522 size, align2);
20523
20524 #ifdef HAVE_GAS_HIDDEN
20525 if (decl != NULL)
20526 fputs (rs6000_xcoff_visibility (decl), stream);
20527 #endif
20528 putc ('\n', stream);
20529 }
20530
20531 /* This macro produces the initial definition of a object (variable) name.
20532 Because AIX assembler's .set command has unexpected semantics, we output
20533 all aliases as alternative labels in front of the definition. */
20534
20535 void
20536 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
20537 {
20538 struct declare_alias_data data = {file, false};
20539 RS6000_OUTPUT_BASENAME (file, name);
20540 fputs (":\n", file);
20541 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
20542 &data, true);
20543 }
20544
20545 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
20546
20547 void
20548 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
20549 {
20550 fputs (integer_asm_op (size, FALSE), file);
20551 assemble_name (file, label);
20552 fputs ("-$", file);
20553 }
20554
20555 /* Output a symbol offset relative to the dbase for the current object.
20556 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
20557 signed offsets.
20558
20559 __gcc_unwind_dbase is embedded in all executables/libraries through
20560 libgcc/config/rs6000/crtdbase.S. */
20561
20562 void
20563 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
20564 {
20565 fputs (integer_asm_op (size, FALSE), file);
20566 assemble_name (file, label);
20567 fputs("-__gcc_unwind_dbase", file);
20568 }
20569
20570 #ifdef HAVE_AS_TLS
20571 static void
20572 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
20573 {
20574 rtx symbol;
20575 int flags;
20576 const char *symname;
20577
20578 default_encode_section_info (decl, rtl, first);
20579
20580 /* Careful not to prod global register variables. */
20581 if (!MEM_P (rtl))
20582 return;
20583 symbol = XEXP (rtl, 0);
20584 if (!SYMBOL_REF_P (symbol))
20585 return;
20586
20587 flags = SYMBOL_REF_FLAGS (symbol);
20588
20589 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
20590 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
20591
20592 SYMBOL_REF_FLAGS (symbol) = flags;
20593
20594 /* Append mapping class to extern decls. */
20595 symname = XSTR (symbol, 0);
20596 if (decl /* sync condition with assemble_external () */
20597 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
20598 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
20599 || TREE_CODE (decl) == FUNCTION_DECL)
20600 && symname[strlen (symname) - 1] != ']')
20601 {
20602 char *newname = (char *) alloca (strlen (symname) + 5);
20603 strcpy (newname, symname);
20604 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
20605 ? "[DS]" : "[UA]"));
20606 XSTR (symbol, 0) = ggc_strdup (newname);
20607 }
20608 }
20609 #endif /* HAVE_AS_TLS */
20610 #endif /* TARGET_XCOFF */
20611
20612 void
20613 rs6000_asm_weaken_decl (FILE *stream, tree decl,
20614 const char *name, const char *val)
20615 {
20616 fputs ("\t.weak\t", stream);
20617 RS6000_OUTPUT_BASENAME (stream, name);
20618 if (decl && TREE_CODE (decl) == FUNCTION_DECL
20619 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
20620 {
20621 if (TARGET_XCOFF)
20622 fputs ("[DS]", stream);
20623 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
20624 if (TARGET_XCOFF)
20625 fputs (rs6000_xcoff_visibility (decl), stream);
20626 #endif
20627 fputs ("\n\t.weak\t.", stream);
20628 RS6000_OUTPUT_BASENAME (stream, name);
20629 }
20630 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
20631 if (TARGET_XCOFF)
20632 fputs (rs6000_xcoff_visibility (decl), stream);
20633 #endif
20634 fputc ('\n', stream);
20635 if (val)
20636 {
20637 #ifdef ASM_OUTPUT_DEF
20638 ASM_OUTPUT_DEF (stream, name, val);
20639 #endif
20640 if (decl && TREE_CODE (decl) == FUNCTION_DECL
20641 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
20642 {
20643 fputs ("\t.set\t.", stream);
20644 RS6000_OUTPUT_BASENAME (stream, name);
20645 fputs (",.", stream);
20646 RS6000_OUTPUT_BASENAME (stream, val);
20647 fputc ('\n', stream);
20648 }
20649 }
20650 }
20651
20652
20653 /* Return true if INSN should not be copied. */
20654
20655 static bool
20656 rs6000_cannot_copy_insn_p (rtx_insn *insn)
20657 {
20658 return recog_memoized (insn) >= 0
20659 && get_attr_cannot_copy (insn);
20660 }
20661
20662 /* Compute a (partial) cost for rtx X. Return true if the complete
20663 cost has been computed, and false if subexpressions should be
20664 scanned. In either case, *TOTAL contains the cost result. */
20665
20666 static bool
20667 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
20668 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
20669 {
20670 int code = GET_CODE (x);
20671
20672 switch (code)
20673 {
20674 /* On the RS/6000, if it is valid in the insn, it is free. */
20675 case CONST_INT:
20676 if (((outer_code == SET
20677 || outer_code == PLUS
20678 || outer_code == MINUS)
20679 && (satisfies_constraint_I (x)
20680 || satisfies_constraint_L (x)))
20681 || (outer_code == AND
20682 && (satisfies_constraint_K (x)
20683 || (mode == SImode
20684 ? satisfies_constraint_L (x)
20685 : satisfies_constraint_J (x))))
20686 || ((outer_code == IOR || outer_code == XOR)
20687 && (satisfies_constraint_K (x)
20688 || (mode == SImode
20689 ? satisfies_constraint_L (x)
20690 : satisfies_constraint_J (x))))
20691 || outer_code == ASHIFT
20692 || outer_code == ASHIFTRT
20693 || outer_code == LSHIFTRT
20694 || outer_code == ROTATE
20695 || outer_code == ROTATERT
20696 || outer_code == ZERO_EXTRACT
20697 || (outer_code == MULT
20698 && satisfies_constraint_I (x))
20699 || ((outer_code == DIV || outer_code == UDIV
20700 || outer_code == MOD || outer_code == UMOD)
20701 && exact_log2 (INTVAL (x)) >= 0)
20702 || (outer_code == COMPARE
20703 && (satisfies_constraint_I (x)
20704 || satisfies_constraint_K (x)))
20705 || ((outer_code == EQ || outer_code == NE)
20706 && (satisfies_constraint_I (x)
20707 || satisfies_constraint_K (x)
20708 || (mode == SImode
20709 ? satisfies_constraint_L (x)
20710 : satisfies_constraint_J (x))))
20711 || (outer_code == GTU
20712 && satisfies_constraint_I (x))
20713 || (outer_code == LTU
20714 && satisfies_constraint_P (x)))
20715 {
20716 *total = 0;
20717 return true;
20718 }
20719 else if ((outer_code == PLUS
20720 && reg_or_add_cint_operand (x, VOIDmode))
20721 || (outer_code == MINUS
20722 && reg_or_sub_cint_operand (x, VOIDmode))
20723 || ((outer_code == SET
20724 || outer_code == IOR
20725 || outer_code == XOR)
20726 && (INTVAL (x)
20727 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
20728 {
20729 *total = COSTS_N_INSNS (1);
20730 return true;
20731 }
20732 /* FALLTHRU */
20733
20734 case CONST_DOUBLE:
20735 case CONST_WIDE_INT:
20736 case CONST:
20737 case HIGH:
20738 case SYMBOL_REF:
20739 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
20740 return true;
20741
20742 case MEM:
20743 /* When optimizing for size, MEM should be slightly more expensive
20744 than generating address, e.g., (plus (reg) (const)).
20745 L1 cache latency is about two instructions. */
20746 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
20747 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
20748 *total += COSTS_N_INSNS (100);
20749 return true;
20750
20751 case LABEL_REF:
20752 *total = 0;
20753 return true;
20754
20755 case PLUS:
20756 case MINUS:
20757 if (FLOAT_MODE_P (mode))
20758 *total = rs6000_cost->fp;
20759 else
20760 *total = COSTS_N_INSNS (1);
20761 return false;
20762
20763 case MULT:
20764 if (CONST_INT_P (XEXP (x, 1))
20765 && satisfies_constraint_I (XEXP (x, 1)))
20766 {
20767 if (INTVAL (XEXP (x, 1)) >= -256
20768 && INTVAL (XEXP (x, 1)) <= 255)
20769 *total = rs6000_cost->mulsi_const9;
20770 else
20771 *total = rs6000_cost->mulsi_const;
20772 }
20773 else if (mode == SFmode)
20774 *total = rs6000_cost->fp;
20775 else if (FLOAT_MODE_P (mode))
20776 *total = rs6000_cost->dmul;
20777 else if (mode == DImode)
20778 *total = rs6000_cost->muldi;
20779 else
20780 *total = rs6000_cost->mulsi;
20781 return false;
20782
20783 case FMA:
20784 if (mode == SFmode)
20785 *total = rs6000_cost->fp;
20786 else
20787 *total = rs6000_cost->dmul;
20788 break;
20789
20790 case DIV:
20791 case MOD:
20792 if (FLOAT_MODE_P (mode))
20793 {
20794 *total = mode == DFmode ? rs6000_cost->ddiv
20795 : rs6000_cost->sdiv;
20796 return false;
20797 }
20798 /* FALLTHRU */
20799
20800 case UDIV:
20801 case UMOD:
20802 if (CONST_INT_P (XEXP (x, 1))
20803 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
20804 {
20805 if (code == DIV || code == MOD)
20806 /* Shift, addze */
20807 *total = COSTS_N_INSNS (2);
20808 else
20809 /* Shift */
20810 *total = COSTS_N_INSNS (1);
20811 }
20812 else
20813 {
20814 if (GET_MODE (XEXP (x, 1)) == DImode)
20815 *total = rs6000_cost->divdi;
20816 else
20817 *total = rs6000_cost->divsi;
20818 }
20819 /* Add in shift and subtract for MOD unless we have a mod instruction. */
20820 if (!TARGET_MODULO && (code == MOD || code == UMOD))
20821 *total += COSTS_N_INSNS (2);
20822 return false;
20823
20824 case CTZ:
20825 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
20826 return false;
20827
20828 case FFS:
20829 *total = COSTS_N_INSNS (4);
20830 return false;
20831
20832 case POPCOUNT:
20833 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
20834 return false;
20835
20836 case PARITY:
20837 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
20838 return false;
20839
20840 case NOT:
20841 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
20842 *total = 0;
20843 else
20844 *total = COSTS_N_INSNS (1);
20845 return false;
20846
20847 case AND:
20848 if (CONST_INT_P (XEXP (x, 1)))
20849 {
20850 rtx left = XEXP (x, 0);
20851 rtx_code left_code = GET_CODE (left);
20852
20853 /* rotate-and-mask: 1 insn. */
20854 if ((left_code == ROTATE
20855 || left_code == ASHIFT
20856 || left_code == LSHIFTRT)
20857 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
20858 {
20859 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
20860 if (!CONST_INT_P (XEXP (left, 1)))
20861 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
20862 *total += COSTS_N_INSNS (1);
20863 return true;
20864 }
20865
20866 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
20867 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
20868 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
20869 || (val & 0xffff) == val
20870 || (val & 0xffff0000) == val
20871 || ((val & 0xffff) == 0 && mode == SImode))
20872 {
20873 *total = rtx_cost (left, mode, AND, 0, speed);
20874 *total += COSTS_N_INSNS (1);
20875 return true;
20876 }
20877
20878 /* 2 insns. */
20879 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
20880 {
20881 *total = rtx_cost (left, mode, AND, 0, speed);
20882 *total += COSTS_N_INSNS (2);
20883 return true;
20884 }
20885 }
20886
20887 *total = COSTS_N_INSNS (1);
20888 return false;
20889
20890 case IOR:
20891 /* FIXME */
20892 *total = COSTS_N_INSNS (1);
20893 return true;
20894
20895 case CLZ:
20896 case XOR:
20897 case ZERO_EXTRACT:
20898 *total = COSTS_N_INSNS (1);
20899 return false;
20900
20901 case ASHIFT:
20902 /* The EXTSWSLI instruction is a combined instruction. Don't count both
20903 the sign extend and shift separately within the insn. */
20904 if (TARGET_EXTSWSLI && mode == DImode
20905 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
20906 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
20907 {
20908 *total = 0;
20909 return false;
20910 }
20911 /* fall through */
20912
20913 case ASHIFTRT:
20914 case LSHIFTRT:
20915 case ROTATE:
20916 case ROTATERT:
20917 /* Handle mul_highpart. */
20918 if (outer_code == TRUNCATE
20919 && GET_CODE (XEXP (x, 0)) == MULT)
20920 {
20921 if (mode == DImode)
20922 *total = rs6000_cost->muldi;
20923 else
20924 *total = rs6000_cost->mulsi;
20925 return true;
20926 }
20927 else if (outer_code == AND)
20928 *total = 0;
20929 else
20930 *total = COSTS_N_INSNS (1);
20931 return false;
20932
20933 case SIGN_EXTEND:
20934 case ZERO_EXTEND:
20935 if (MEM_P (XEXP (x, 0)))
20936 *total = 0;
20937 else
20938 *total = COSTS_N_INSNS (1);
20939 return false;
20940
20941 case COMPARE:
20942 case NEG:
20943 case ABS:
20944 if (!FLOAT_MODE_P (mode))
20945 {
20946 *total = COSTS_N_INSNS (1);
20947 return false;
20948 }
20949 /* FALLTHRU */
20950
20951 case FLOAT:
20952 case UNSIGNED_FLOAT:
20953 case FIX:
20954 case UNSIGNED_FIX:
20955 case FLOAT_TRUNCATE:
20956 *total = rs6000_cost->fp;
20957 return false;
20958
20959 case FLOAT_EXTEND:
20960 if (mode == DFmode)
20961 *total = rs6000_cost->sfdf_convert;
20962 else
20963 *total = rs6000_cost->fp;
20964 return false;
20965
20966 case CALL:
20967 case IF_THEN_ELSE:
20968 if (!speed)
20969 {
20970 *total = COSTS_N_INSNS (1);
20971 return true;
20972 }
20973 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
20974 {
20975 *total = rs6000_cost->fp;
20976 return false;
20977 }
20978 break;
20979
20980 case NE:
20981 case EQ:
20982 case GTU:
20983 case LTU:
20984 /* Carry bit requires mode == Pmode.
20985 NEG or PLUS already counted so only add one. */
20986 if (mode == Pmode
20987 && (outer_code == NEG || outer_code == PLUS))
20988 {
20989 *total = COSTS_N_INSNS (1);
20990 return true;
20991 }
20992 /* FALLTHRU */
20993
20994 case GT:
20995 case LT:
20996 case UNORDERED:
20997 if (outer_code == SET)
20998 {
20999 if (XEXP (x, 1) == const0_rtx)
21000 {
21001 *total = COSTS_N_INSNS (2);
21002 return true;
21003 }
21004 else
21005 {
21006 *total = COSTS_N_INSNS (3);
21007 return false;
21008 }
21009 }
21010 /* CC COMPARE. */
21011 if (outer_code == COMPARE)
21012 {
21013 *total = 0;
21014 return true;
21015 }
21016 break;
21017
21018 default:
21019 break;
21020 }
21021
21022 return false;
21023 }
21024
21025 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
21026
21027 static bool
21028 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
21029 int opno, int *total, bool speed)
21030 {
21031 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
21032
21033 fprintf (stderr,
21034 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
21035 "opno = %d, total = %d, speed = %s, x:\n",
21036 ret ? "complete" : "scan inner",
21037 GET_MODE_NAME (mode),
21038 GET_RTX_NAME (outer_code),
21039 opno,
21040 *total,
21041 speed ? "true" : "false");
21042
21043 debug_rtx (x);
21044
21045 return ret;
21046 }
21047
21048 static int
21049 rs6000_insn_cost (rtx_insn *insn, bool speed)
21050 {
21051 if (recog_memoized (insn) < 0)
21052 return 0;
21053
21054 /* If we are optimizing for size, just use the length. */
21055 if (!speed)
21056 return get_attr_length (insn);
21057
21058 /* Use the cost if provided. */
21059 int cost = get_attr_cost (insn);
21060 if (cost > 0)
21061 return cost;
21062
21063 /* If the insn tells us how many insns there are, use that. Otherwise use
21064 the length/4. Adjust the insn length to remove the extra size that
21065 prefixed instructions take. */
21066 int n = get_attr_num_insns (insn);
21067 if (n == 0)
21068 {
21069 int length = get_attr_length (insn);
21070 if (get_attr_prefixed (insn) == PREFIXED_YES)
21071 {
21072 int adjust = 0;
21073 ADJUST_INSN_LENGTH (insn, adjust);
21074 length -= adjust;
21075 }
21076
21077 n = length / 4;
21078 }
21079
21080 enum attr_type type = get_attr_type (insn);
21081
21082 switch (type)
21083 {
21084 case TYPE_LOAD:
21085 case TYPE_FPLOAD:
21086 case TYPE_VECLOAD:
21087 cost = COSTS_N_INSNS (n + 1);
21088 break;
21089
21090 case TYPE_MUL:
21091 switch (get_attr_size (insn))
21092 {
21093 case SIZE_8:
21094 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
21095 break;
21096 case SIZE_16:
21097 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
21098 break;
21099 case SIZE_32:
21100 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
21101 break;
21102 case SIZE_64:
21103 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
21104 break;
21105 default:
21106 gcc_unreachable ();
21107 }
21108 break;
21109 case TYPE_DIV:
21110 switch (get_attr_size (insn))
21111 {
21112 case SIZE_32:
21113 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
21114 break;
21115 case SIZE_64:
21116 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
21117 break;
21118 default:
21119 gcc_unreachable ();
21120 }
21121 break;
21122
21123 case TYPE_FP:
21124 cost = n * rs6000_cost->fp;
21125 break;
21126 case TYPE_DMUL:
21127 cost = n * rs6000_cost->dmul;
21128 break;
21129 case TYPE_SDIV:
21130 cost = n * rs6000_cost->sdiv;
21131 break;
21132 case TYPE_DDIV:
21133 cost = n * rs6000_cost->ddiv;
21134 break;
21135
21136 case TYPE_SYNC:
21137 case TYPE_LOAD_L:
21138 case TYPE_MFCR:
21139 case TYPE_MFCRF:
21140 cost = COSTS_N_INSNS (n + 2);
21141 break;
21142
21143 default:
21144 cost = COSTS_N_INSNS (n);
21145 }
21146
21147 return cost;
21148 }
21149
21150 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
21151
21152 static int
21153 rs6000_debug_address_cost (rtx x, machine_mode mode,
21154 addr_space_t as, bool speed)
21155 {
21156 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
21157
21158 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
21159 ret, speed ? "true" : "false");
21160 debug_rtx (x);
21161
21162 return ret;
21163 }
21164
21165
21166 /* A C expression returning the cost of moving data from a register of class
21167 CLASS1 to one of CLASS2. */
21168
21169 static int
21170 rs6000_register_move_cost (machine_mode mode,
21171 reg_class_t from, reg_class_t to)
21172 {
21173 int ret;
21174 reg_class_t rclass;
21175
21176 if (TARGET_DEBUG_COST)
21177 dbg_cost_ctrl++;
21178
21179 /* If we have VSX, we can easily move between FPR or Altivec registers,
21180 otherwise we can only easily move within classes.
21181 Do this first so we give best-case answers for union classes
21182 containing both gprs and vsx regs. */
21183 HARD_REG_SET to_vsx, from_vsx;
21184 to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS];
21185 from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS];
21186 if (!hard_reg_set_empty_p (to_vsx)
21187 && !hard_reg_set_empty_p (from_vsx)
21188 && (TARGET_VSX
21189 || hard_reg_set_intersect_p (to_vsx, from_vsx)))
21190 {
21191 int reg = FIRST_FPR_REGNO;
21192 if (TARGET_VSX
21193 || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
21194 && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
21195 reg = FIRST_ALTIVEC_REGNO;
21196 ret = 2 * hard_regno_nregs (reg, mode);
21197 }
21198
21199 /* Moves from/to GENERAL_REGS. */
21200 else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
21201 || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
21202 {
21203 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
21204 {
21205 if (TARGET_DIRECT_MOVE)
21206 {
21207 /* Keep the cost for direct moves above that for within
21208 a register class even if the actual processor cost is
21209 comparable. We do this because a direct move insn
21210 can't be a nop, whereas with ideal register
21211 allocation a move within the same class might turn
21212 out to be a nop. */
21213 if (rs6000_tune == PROCESSOR_POWER9
21214 || rs6000_tune == PROCESSOR_FUTURE)
21215 ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21216 else
21217 ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21218 /* SFmode requires a conversion when moving between gprs
21219 and vsx. */
21220 if (mode == SFmode)
21221 ret += 2;
21222 }
21223 else
21224 ret = (rs6000_memory_move_cost (mode, rclass, false)
21225 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
21226 }
21227
21228 /* It's more expensive to move CR_REGS than CR0_REGS because of the
21229 shift. */
21230 else if (rclass == CR_REGS)
21231 ret = 4;
21232
21233 /* For those processors that have slow LR/CTR moves, make them more
21234 expensive than memory in order to bias spills to memory .*/
21235 else if ((rs6000_tune == PROCESSOR_POWER6
21236 || rs6000_tune == PROCESSOR_POWER7
21237 || rs6000_tune == PROCESSOR_POWER8
21238 || rs6000_tune == PROCESSOR_POWER9)
21239 && reg_class_subset_p (rclass, SPECIAL_REGS))
21240 ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21241
21242 else
21243 /* A move will cost one instruction per GPR moved. */
21244 ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21245 }
21246
21247 /* Everything else has to go through GENERAL_REGS. */
21248 else
21249 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
21250 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
21251
21252 if (TARGET_DEBUG_COST)
21253 {
21254 if (dbg_cost_ctrl == 1)
21255 fprintf (stderr,
21256 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
21257 ret, GET_MODE_NAME (mode), reg_class_names[from],
21258 reg_class_names[to]);
21259 dbg_cost_ctrl--;
21260 }
21261
21262 return ret;
21263 }
21264
21265 /* A C expressions returning the cost of moving data of MODE from a register to
21266 or from memory. */
21267
21268 static int
21269 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
21270 bool in ATTRIBUTE_UNUSED)
21271 {
21272 int ret;
21273
21274 if (TARGET_DEBUG_COST)
21275 dbg_cost_ctrl++;
21276
21277 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
21278 ret = 4 * hard_regno_nregs (0, mode);
21279 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
21280 || reg_classes_intersect_p (rclass, VSX_REGS)))
21281 ret = 4 * hard_regno_nregs (32, mode);
21282 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
21283 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
21284 else
21285 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
21286
21287 if (TARGET_DEBUG_COST)
21288 {
21289 if (dbg_cost_ctrl == 1)
21290 fprintf (stderr,
21291 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
21292 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
21293 dbg_cost_ctrl--;
21294 }
21295
21296 return ret;
21297 }
21298
21299 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
21300
21301 The register allocator chooses GEN_OR_VSX_REGS for the allocno
21302 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
21303 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
21304 move cost between GENERAL_REGS and VSX_REGS low.
21305
21306 It might seem reasonable to use a union class. After all, if usage
21307 of vsr is low and gpr high, it might make sense to spill gpr to vsr
21308 rather than memory. However, in cases where register pressure of
21309 both is high, like the cactus_adm spec test, allowing
21310 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
21311 the first scheduling pass. This is partly due to an allocno of
21312 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
21313 class, which gives too high a pressure for GENERAL_REGS and too low
21314 for VSX_REGS. So, force a choice of the subclass here.
21315
21316 The best class is also the union if GENERAL_REGS and VSX_REGS have
21317 the same cost. In that case we do use GEN_OR_VSX_REGS as the
21318 allocno class, since trying to narrow down the class by regno mode
21319 is prone to error. For example, SImode is allowed in VSX regs and
21320 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
21321 it would be wrong to choose an allocno of GENERAL_REGS based on
21322 SImode. */
21323
21324 static reg_class_t
21325 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
21326 reg_class_t allocno_class,
21327 reg_class_t best_class)
21328 {
21329 switch (allocno_class)
21330 {
21331 case GEN_OR_VSX_REGS:
21332 /* best_class must be a subset of allocno_class. */
21333 gcc_checking_assert (best_class == GEN_OR_VSX_REGS
21334 || best_class == GEN_OR_FLOAT_REGS
21335 || best_class == VSX_REGS
21336 || best_class == ALTIVEC_REGS
21337 || best_class == FLOAT_REGS
21338 || best_class == GENERAL_REGS
21339 || best_class == BASE_REGS);
21340 /* Use best_class but choose wider classes when copying from the
21341 wider class to best_class is cheap. This mimics IRA choice
21342 of allocno class. */
21343 if (best_class == BASE_REGS)
21344 return GENERAL_REGS;
21345 if (TARGET_VSX
21346 && (best_class == FLOAT_REGS || best_class == ALTIVEC_REGS))
21347 return VSX_REGS;
21348 return best_class;
21349
21350 default:
21351 break;
21352 }
21353
21354 return allocno_class;
21355 }
21356
21357 /* Returns a code for a target-specific builtin that implements
21358 reciprocal of the function, or NULL_TREE if not available. */
21359
21360 static tree
21361 rs6000_builtin_reciprocal (tree fndecl)
21362 {
21363 switch (DECL_MD_FUNCTION_CODE (fndecl))
21364 {
21365 case VSX_BUILTIN_XVSQRTDP:
21366 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
21367 return NULL_TREE;
21368
21369 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
21370
21371 case VSX_BUILTIN_XVSQRTSP:
21372 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
21373 return NULL_TREE;
21374
21375 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
21376
21377 default:
21378 return NULL_TREE;
21379 }
21380 }
21381
21382 /* Load up a constant. If the mode is a vector mode, splat the value across
21383 all of the vector elements. */
21384
21385 static rtx
21386 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
21387 {
21388 rtx reg;
21389
21390 if (mode == SFmode || mode == DFmode)
21391 {
21392 rtx d = const_double_from_real_value (dconst, mode);
21393 reg = force_reg (mode, d);
21394 }
21395 else if (mode == V4SFmode)
21396 {
21397 rtx d = const_double_from_real_value (dconst, SFmode);
21398 rtvec v = gen_rtvec (4, d, d, d, d);
21399 reg = gen_reg_rtx (mode);
21400 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
21401 }
21402 else if (mode == V2DFmode)
21403 {
21404 rtx d = const_double_from_real_value (dconst, DFmode);
21405 rtvec v = gen_rtvec (2, d, d);
21406 reg = gen_reg_rtx (mode);
21407 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
21408 }
21409 else
21410 gcc_unreachable ();
21411
21412 return reg;
21413 }
21414
21415 /* Generate an FMA instruction. */
21416
21417 static void
21418 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
21419 {
21420 machine_mode mode = GET_MODE (target);
21421 rtx dst;
21422
21423 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
21424 gcc_assert (dst != NULL);
21425
21426 if (dst != target)
21427 emit_move_insn (target, dst);
21428 }
21429
21430 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
21431
21432 static void
21433 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
21434 {
21435 machine_mode mode = GET_MODE (dst);
21436 rtx r;
21437
21438 /* This is a tad more complicated, since the fnma_optab is for
21439 a different expression: fma(-m1, m2, a), which is the same
21440 thing except in the case of signed zeros.
21441
21442 Fortunately we know that if FMA is supported that FNMSUB is
21443 also supported in the ISA. Just expand it directly. */
21444
21445 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
21446
21447 r = gen_rtx_NEG (mode, a);
21448 r = gen_rtx_FMA (mode, m1, m2, r);
21449 r = gen_rtx_NEG (mode, r);
21450 emit_insn (gen_rtx_SET (dst, r));
21451 }
21452
21453 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
21454 add a reg_note saying that this was a division. Support both scalar and
21455 vector divide. Assumes no trapping math and finite arguments. */
21456
21457 void
21458 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
21459 {
21460 machine_mode mode = GET_MODE (dst);
21461 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
21462 int i;
21463
21464 /* Low precision estimates guarantee 5 bits of accuracy. High
21465 precision estimates guarantee 14 bits of accuracy. SFmode
21466 requires 23 bits of accuracy. DFmode requires 52 bits of
21467 accuracy. Each pass at least doubles the accuracy, leading
21468 to the following. */
21469 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
21470 if (mode == DFmode || mode == V2DFmode)
21471 passes++;
21472
21473 enum insn_code code = optab_handler (smul_optab, mode);
21474 insn_gen_fn gen_mul = GEN_FCN (code);
21475
21476 gcc_assert (code != CODE_FOR_nothing);
21477
21478 one = rs6000_load_constant_and_splat (mode, dconst1);
21479
21480 /* x0 = 1./d estimate */
21481 x0 = gen_reg_rtx (mode);
21482 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
21483 UNSPEC_FRES)));
21484
21485 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
21486 if (passes > 1) {
21487
21488 /* e0 = 1. - d * x0 */
21489 e0 = gen_reg_rtx (mode);
21490 rs6000_emit_nmsub (e0, d, x0, one);
21491
21492 /* x1 = x0 + e0 * x0 */
21493 x1 = gen_reg_rtx (mode);
21494 rs6000_emit_madd (x1, e0, x0, x0);
21495
21496 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
21497 ++i, xprev = xnext, eprev = enext) {
21498
21499 /* enext = eprev * eprev */
21500 enext = gen_reg_rtx (mode);
21501 emit_insn (gen_mul (enext, eprev, eprev));
21502
21503 /* xnext = xprev + enext * xprev */
21504 xnext = gen_reg_rtx (mode);
21505 rs6000_emit_madd (xnext, enext, xprev, xprev);
21506 }
21507
21508 } else
21509 xprev = x0;
21510
21511 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
21512
21513 /* u = n * xprev */
21514 u = gen_reg_rtx (mode);
21515 emit_insn (gen_mul (u, n, xprev));
21516
21517 /* v = n - (d * u) */
21518 v = gen_reg_rtx (mode);
21519 rs6000_emit_nmsub (v, d, u, n);
21520
21521 /* dst = (v * xprev) + u */
21522 rs6000_emit_madd (dst, v, xprev, u);
21523
21524 if (note_p)
21525 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
21526 }
21527
21528 /* Goldschmidt's Algorithm for single/double-precision floating point
21529 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
21530
21531 void
21532 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
21533 {
21534 machine_mode mode = GET_MODE (src);
21535 rtx e = gen_reg_rtx (mode);
21536 rtx g = gen_reg_rtx (mode);
21537 rtx h = gen_reg_rtx (mode);
21538
21539 /* Low precision estimates guarantee 5 bits of accuracy. High
21540 precision estimates guarantee 14 bits of accuracy. SFmode
21541 requires 23 bits of accuracy. DFmode requires 52 bits of
21542 accuracy. Each pass at least doubles the accuracy, leading
21543 to the following. */
21544 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
21545 if (mode == DFmode || mode == V2DFmode)
21546 passes++;
21547
21548 int i;
21549 rtx mhalf;
21550 enum insn_code code = optab_handler (smul_optab, mode);
21551 insn_gen_fn gen_mul = GEN_FCN (code);
21552
21553 gcc_assert (code != CODE_FOR_nothing);
21554
21555 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
21556
21557 /* e = rsqrt estimate */
21558 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
21559 UNSPEC_RSQRT)));
21560
21561 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
21562 if (!recip)
21563 {
21564 rtx zero = force_reg (mode, CONST0_RTX (mode));
21565
21566 if (mode == SFmode)
21567 {
21568 rtx target = emit_conditional_move (e, GT, src, zero, mode,
21569 e, zero, mode, 0);
21570 if (target != e)
21571 emit_move_insn (e, target);
21572 }
21573 else
21574 {
21575 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
21576 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
21577 }
21578 }
21579
21580 /* g = sqrt estimate. */
21581 emit_insn (gen_mul (g, e, src));
21582 /* h = 1/(2*sqrt) estimate. */
21583 emit_insn (gen_mul (h, e, mhalf));
21584
21585 if (recip)
21586 {
21587 if (passes == 1)
21588 {
21589 rtx t = gen_reg_rtx (mode);
21590 rs6000_emit_nmsub (t, g, h, mhalf);
21591 /* Apply correction directly to 1/rsqrt estimate. */
21592 rs6000_emit_madd (dst, e, t, e);
21593 }
21594 else
21595 {
21596 for (i = 0; i < passes; i++)
21597 {
21598 rtx t1 = gen_reg_rtx (mode);
21599 rtx g1 = gen_reg_rtx (mode);
21600 rtx h1 = gen_reg_rtx (mode);
21601
21602 rs6000_emit_nmsub (t1, g, h, mhalf);
21603 rs6000_emit_madd (g1, g, t1, g);
21604 rs6000_emit_madd (h1, h, t1, h);
21605
21606 g = g1;
21607 h = h1;
21608 }
21609 /* Multiply by 2 for 1/rsqrt. */
21610 emit_insn (gen_add3_insn (dst, h, h));
21611 }
21612 }
21613 else
21614 {
21615 rtx t = gen_reg_rtx (mode);
21616 rs6000_emit_nmsub (t, g, h, mhalf);
21617 rs6000_emit_madd (dst, g, t, g);
21618 }
21619
21620 return;
21621 }
21622
21623 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
21624 (Power7) targets. DST is the target, and SRC is the argument operand. */
21625
21626 void
21627 rs6000_emit_popcount (rtx dst, rtx src)
21628 {
21629 machine_mode mode = GET_MODE (dst);
21630 rtx tmp1, tmp2;
21631
21632 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
21633 if (TARGET_POPCNTD)
21634 {
21635 if (mode == SImode)
21636 emit_insn (gen_popcntdsi2 (dst, src));
21637 else
21638 emit_insn (gen_popcntddi2 (dst, src));
21639 return;
21640 }
21641
21642 tmp1 = gen_reg_rtx (mode);
21643
21644 if (mode == SImode)
21645 {
21646 emit_insn (gen_popcntbsi2 (tmp1, src));
21647 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
21648 NULL_RTX, 0);
21649 tmp2 = force_reg (SImode, tmp2);
21650 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
21651 }
21652 else
21653 {
21654 emit_insn (gen_popcntbdi2 (tmp1, src));
21655 tmp2 = expand_mult (DImode, tmp1,
21656 GEN_INT ((HOST_WIDE_INT)
21657 0x01010101 << 32 | 0x01010101),
21658 NULL_RTX, 0);
21659 tmp2 = force_reg (DImode, tmp2);
21660 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
21661 }
21662 }
21663
21664
21665 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
21666 target, and SRC is the argument operand. */
21667
21668 void
21669 rs6000_emit_parity (rtx dst, rtx src)
21670 {
21671 machine_mode mode = GET_MODE (dst);
21672 rtx tmp;
21673
21674 tmp = gen_reg_rtx (mode);
21675
21676 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
21677 if (TARGET_CMPB)
21678 {
21679 if (mode == SImode)
21680 {
21681 emit_insn (gen_popcntbsi2 (tmp, src));
21682 emit_insn (gen_paritysi2_cmpb (dst, tmp));
21683 }
21684 else
21685 {
21686 emit_insn (gen_popcntbdi2 (tmp, src));
21687 emit_insn (gen_paritydi2_cmpb (dst, tmp));
21688 }
21689 return;
21690 }
21691
21692 if (mode == SImode)
21693 {
21694 /* Is mult+shift >= shift+xor+shift+xor? */
21695 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
21696 {
21697 rtx tmp1, tmp2, tmp3, tmp4;
21698
21699 tmp1 = gen_reg_rtx (SImode);
21700 emit_insn (gen_popcntbsi2 (tmp1, src));
21701
21702 tmp2 = gen_reg_rtx (SImode);
21703 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
21704 tmp3 = gen_reg_rtx (SImode);
21705 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
21706
21707 tmp4 = gen_reg_rtx (SImode);
21708 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
21709 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
21710 }
21711 else
21712 rs6000_emit_popcount (tmp, src);
21713 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
21714 }
21715 else
21716 {
21717 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
21718 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
21719 {
21720 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
21721
21722 tmp1 = gen_reg_rtx (DImode);
21723 emit_insn (gen_popcntbdi2 (tmp1, src));
21724
21725 tmp2 = gen_reg_rtx (DImode);
21726 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
21727 tmp3 = gen_reg_rtx (DImode);
21728 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
21729
21730 tmp4 = gen_reg_rtx (DImode);
21731 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
21732 tmp5 = gen_reg_rtx (DImode);
21733 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
21734
21735 tmp6 = gen_reg_rtx (DImode);
21736 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
21737 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
21738 }
21739 else
21740 rs6000_emit_popcount (tmp, src);
21741 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
21742 }
21743 }
21744
21745 /* Expand an Altivec constant permutation for little endian mode.
21746 OP0 and OP1 are the input vectors and TARGET is the output vector.
21747 SEL specifies the constant permutation vector.
21748
21749 There are two issues: First, the two input operands must be
21750 swapped so that together they form a double-wide array in LE
21751 order. Second, the vperm instruction has surprising behavior
21752 in LE mode: it interprets the elements of the source vectors
21753 in BE mode ("left to right") and interprets the elements of
21754 the destination vector in LE mode ("right to left"). To
21755 correct for this, we must subtract each element of the permute
21756 control vector from 31.
21757
21758 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
21759 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
21760 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
21761 serve as the permute control vector. Then, in BE mode,
21762
21763 vperm 9,10,11,12
21764
21765 places the desired result in vr9. However, in LE mode the
21766 vector contents will be
21767
21768 vr10 = 00000003 00000002 00000001 00000000
21769 vr11 = 00000007 00000006 00000005 00000004
21770
21771 The result of the vperm using the same permute control vector is
21772
21773 vr9 = 05000000 07000000 01000000 03000000
21774
21775 That is, the leftmost 4 bytes of vr10 are interpreted as the
21776 source for the rightmost 4 bytes of vr9, and so on.
21777
21778 If we change the permute control vector to
21779
21780 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
21781
21782 and issue
21783
21784 vperm 9,11,10,12
21785
21786 we get the desired
21787
21788 vr9 = 00000006 00000004 00000002 00000000. */
21789
21790 static void
21791 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
21792 const vec_perm_indices &sel)
21793 {
21794 unsigned int i;
21795 rtx perm[16];
21796 rtx constv, unspec;
21797
21798 /* Unpack and adjust the constant selector. */
21799 for (i = 0; i < 16; ++i)
21800 {
21801 unsigned int elt = 31 - (sel[i] & 31);
21802 perm[i] = GEN_INT (elt);
21803 }
21804
21805 /* Expand to a permute, swapping the inputs and using the
21806 adjusted selector. */
21807 if (!REG_P (op0))
21808 op0 = force_reg (V16QImode, op0);
21809 if (!REG_P (op1))
21810 op1 = force_reg (V16QImode, op1);
21811
21812 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
21813 constv = force_reg (V16QImode, constv);
21814 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
21815 UNSPEC_VPERM);
21816 if (!REG_P (target))
21817 {
21818 rtx tmp = gen_reg_rtx (V16QImode);
21819 emit_move_insn (tmp, unspec);
21820 unspec = tmp;
21821 }
21822
21823 emit_move_insn (target, unspec);
21824 }
21825
21826 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
21827 permute control vector. But here it's not a constant, so we must
21828 generate a vector NAND or NOR to do the adjustment. */
21829
21830 void
21831 altivec_expand_vec_perm_le (rtx operands[4])
21832 {
21833 rtx notx, iorx, unspec;
21834 rtx target = operands[0];
21835 rtx op0 = operands[1];
21836 rtx op1 = operands[2];
21837 rtx sel = operands[3];
21838 rtx tmp = target;
21839 rtx norreg = gen_reg_rtx (V16QImode);
21840 machine_mode mode = GET_MODE (target);
21841
21842 /* Get everything in regs so the pattern matches. */
21843 if (!REG_P (op0))
21844 op0 = force_reg (mode, op0);
21845 if (!REG_P (op1))
21846 op1 = force_reg (mode, op1);
21847 if (!REG_P (sel))
21848 sel = force_reg (V16QImode, sel);
21849 if (!REG_P (target))
21850 tmp = gen_reg_rtx (mode);
21851
21852 if (TARGET_P9_VECTOR)
21853 {
21854 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
21855 UNSPEC_VPERMR);
21856 }
21857 else
21858 {
21859 /* Invert the selector with a VNAND if available, else a VNOR.
21860 The VNAND is preferred for future fusion opportunities. */
21861 notx = gen_rtx_NOT (V16QImode, sel);
21862 iorx = (TARGET_P8_VECTOR
21863 ? gen_rtx_IOR (V16QImode, notx, notx)
21864 : gen_rtx_AND (V16QImode, notx, notx));
21865 emit_insn (gen_rtx_SET (norreg, iorx));
21866
21867 /* Permute with operands reversed and adjusted selector. */
21868 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
21869 UNSPEC_VPERM);
21870 }
21871
21872 /* Copy into target, possibly by way of a register. */
21873 if (!REG_P (target))
21874 {
21875 emit_move_insn (tmp, unspec);
21876 unspec = tmp;
21877 }
21878
21879 emit_move_insn (target, unspec);
21880 }
21881
21882 /* Expand an Altivec constant permutation. Return true if we match
21883 an efficient implementation; false to fall back to VPERM.
21884
21885 OP0 and OP1 are the input vectors and TARGET is the output vector.
21886 SEL specifies the constant permutation vector. */
21887
21888 static bool
21889 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
21890 const vec_perm_indices &sel)
21891 {
21892 struct altivec_perm_insn {
21893 HOST_WIDE_INT mask;
21894 enum insn_code impl;
21895 unsigned char perm[16];
21896 };
21897 static const struct altivec_perm_insn patterns[] = {
21898 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
21899 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
21900 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
21901 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
21902 { OPTION_MASK_ALTIVEC,
21903 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
21904 : CODE_FOR_altivec_vmrglb_direct),
21905 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
21906 { OPTION_MASK_ALTIVEC,
21907 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
21908 : CODE_FOR_altivec_vmrglh_direct),
21909 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
21910 { OPTION_MASK_ALTIVEC,
21911 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
21912 : CODE_FOR_altivec_vmrglw_direct),
21913 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
21914 { OPTION_MASK_ALTIVEC,
21915 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
21916 : CODE_FOR_altivec_vmrghb_direct),
21917 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
21918 { OPTION_MASK_ALTIVEC,
21919 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
21920 : CODE_FOR_altivec_vmrghh_direct),
21921 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
21922 { OPTION_MASK_ALTIVEC,
21923 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
21924 : CODE_FOR_altivec_vmrghw_direct),
21925 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
21926 { OPTION_MASK_P8_VECTOR,
21927 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
21928 : CODE_FOR_p8_vmrgow_v4sf_direct),
21929 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
21930 { OPTION_MASK_P8_VECTOR,
21931 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
21932 : CODE_FOR_p8_vmrgew_v4sf_direct),
21933 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
21934 };
21935
21936 unsigned int i, j, elt, which;
21937 unsigned char perm[16];
21938 rtx x;
21939 bool one_vec;
21940
21941 /* Unpack the constant selector. */
21942 for (i = which = 0; i < 16; ++i)
21943 {
21944 elt = sel[i] & 31;
21945 which |= (elt < 16 ? 1 : 2);
21946 perm[i] = elt;
21947 }
21948
21949 /* Simplify the constant selector based on operands. */
21950 switch (which)
21951 {
21952 default:
21953 gcc_unreachable ();
21954
21955 case 3:
21956 one_vec = false;
21957 if (!rtx_equal_p (op0, op1))
21958 break;
21959 /* FALLTHRU */
21960
21961 case 2:
21962 for (i = 0; i < 16; ++i)
21963 perm[i] &= 15;
21964 op0 = op1;
21965 one_vec = true;
21966 break;
21967
21968 case 1:
21969 op1 = op0;
21970 one_vec = true;
21971 break;
21972 }
21973
21974 /* Look for splat patterns. */
21975 if (one_vec)
21976 {
21977 elt = perm[0];
21978
21979 for (i = 0; i < 16; ++i)
21980 if (perm[i] != elt)
21981 break;
21982 if (i == 16)
21983 {
21984 if (!BYTES_BIG_ENDIAN)
21985 elt = 15 - elt;
21986 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
21987 return true;
21988 }
21989
21990 if (elt % 2 == 0)
21991 {
21992 for (i = 0; i < 16; i += 2)
21993 if (perm[i] != elt || perm[i + 1] != elt + 1)
21994 break;
21995 if (i == 16)
21996 {
21997 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
21998 x = gen_reg_rtx (V8HImode);
21999 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
22000 GEN_INT (field)));
22001 emit_move_insn (target, gen_lowpart (V16QImode, x));
22002 return true;
22003 }
22004 }
22005
22006 if (elt % 4 == 0)
22007 {
22008 for (i = 0; i < 16; i += 4)
22009 if (perm[i] != elt
22010 || perm[i + 1] != elt + 1
22011 || perm[i + 2] != elt + 2
22012 || perm[i + 3] != elt + 3)
22013 break;
22014 if (i == 16)
22015 {
22016 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
22017 x = gen_reg_rtx (V4SImode);
22018 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
22019 GEN_INT (field)));
22020 emit_move_insn (target, gen_lowpart (V16QImode, x));
22021 return true;
22022 }
22023 }
22024 }
22025
22026 /* Look for merge and pack patterns. */
22027 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
22028 {
22029 bool swapped;
22030
22031 if ((patterns[j].mask & rs6000_isa_flags) == 0)
22032 continue;
22033
22034 elt = patterns[j].perm[0];
22035 if (perm[0] == elt)
22036 swapped = false;
22037 else if (perm[0] == elt + 16)
22038 swapped = true;
22039 else
22040 continue;
22041 for (i = 1; i < 16; ++i)
22042 {
22043 elt = patterns[j].perm[i];
22044 if (swapped)
22045 elt = (elt >= 16 ? elt - 16 : elt + 16);
22046 else if (one_vec && elt >= 16)
22047 elt -= 16;
22048 if (perm[i] != elt)
22049 break;
22050 }
22051 if (i == 16)
22052 {
22053 enum insn_code icode = patterns[j].impl;
22054 machine_mode omode = insn_data[icode].operand[0].mode;
22055 machine_mode imode = insn_data[icode].operand[1].mode;
22056
22057 /* For little-endian, don't use vpkuwum and vpkuhum if the
22058 underlying vector type is not V4SI and V8HI, respectively.
22059 For example, using vpkuwum with a V8HI picks up the even
22060 halfwords (BE numbering) when the even halfwords (LE
22061 numbering) are what we need. */
22062 if (!BYTES_BIG_ENDIAN
22063 && icode == CODE_FOR_altivec_vpkuwum_direct
22064 && ((REG_P (op0)
22065 && GET_MODE (op0) != V4SImode)
22066 || (SUBREG_P (op0)
22067 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
22068 continue;
22069 if (!BYTES_BIG_ENDIAN
22070 && icode == CODE_FOR_altivec_vpkuhum_direct
22071 && ((REG_P (op0)
22072 && GET_MODE (op0) != V8HImode)
22073 || (SUBREG_P (op0)
22074 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
22075 continue;
22076
22077 /* For little-endian, the two input operands must be swapped
22078 (or swapped back) to ensure proper right-to-left numbering
22079 from 0 to 2N-1. */
22080 if (swapped ^ !BYTES_BIG_ENDIAN)
22081 std::swap (op0, op1);
22082 if (imode != V16QImode)
22083 {
22084 op0 = gen_lowpart (imode, op0);
22085 op1 = gen_lowpart (imode, op1);
22086 }
22087 if (omode == V16QImode)
22088 x = target;
22089 else
22090 x = gen_reg_rtx (omode);
22091 emit_insn (GEN_FCN (icode) (x, op0, op1));
22092 if (omode != V16QImode)
22093 emit_move_insn (target, gen_lowpart (V16QImode, x));
22094 return true;
22095 }
22096 }
22097
22098 if (!BYTES_BIG_ENDIAN)
22099 {
22100 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
22101 return true;
22102 }
22103
22104 return false;
22105 }
22106
22107 /* Expand a VSX Permute Doubleword constant permutation.
22108 Return true if we match an efficient implementation. */
22109
22110 static bool
22111 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
22112 unsigned char perm0, unsigned char perm1)
22113 {
22114 rtx x;
22115
22116 /* If both selectors come from the same operand, fold to single op. */
22117 if ((perm0 & 2) == (perm1 & 2))
22118 {
22119 if (perm0 & 2)
22120 op0 = op1;
22121 else
22122 op1 = op0;
22123 }
22124 /* If both operands are equal, fold to simpler permutation. */
22125 if (rtx_equal_p (op0, op1))
22126 {
22127 perm0 = perm0 & 1;
22128 perm1 = (perm1 & 1) + 2;
22129 }
22130 /* If the first selector comes from the second operand, swap. */
22131 else if (perm0 & 2)
22132 {
22133 if (perm1 & 2)
22134 return false;
22135 perm0 -= 2;
22136 perm1 += 2;
22137 std::swap (op0, op1);
22138 }
22139 /* If the second selector does not come from the second operand, fail. */
22140 else if ((perm1 & 2) == 0)
22141 return false;
22142
22143 /* Success! */
22144 if (target != NULL)
22145 {
22146 machine_mode vmode, dmode;
22147 rtvec v;
22148
22149 vmode = GET_MODE (target);
22150 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
22151 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
22152 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
22153 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
22154 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
22155 emit_insn (gen_rtx_SET (target, x));
22156 }
22157 return true;
22158 }
22159
22160 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
22161
22162 static bool
22163 rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
22164 rtx op1, const vec_perm_indices &sel)
22165 {
22166 bool testing_p = !target;
22167
22168 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
22169 if (TARGET_ALTIVEC && testing_p)
22170 return true;
22171
22172 /* Check for ps_merge* or xxpermdi insns. */
22173 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
22174 {
22175 if (testing_p)
22176 {
22177 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
22178 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
22179 }
22180 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
22181 return true;
22182 }
22183
22184 if (TARGET_ALTIVEC)
22185 {
22186 /* Force the target-independent code to lower to V16QImode. */
22187 if (vmode != V16QImode)
22188 return false;
22189 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
22190 return true;
22191 }
22192
22193 return false;
22194 }
22195
22196 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
22197 OP0 and OP1 are the input vectors and TARGET is the output vector.
22198 PERM specifies the constant permutation vector. */
22199
22200 static void
22201 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
22202 machine_mode vmode, const vec_perm_builder &perm)
22203 {
22204 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
22205 if (x != target)
22206 emit_move_insn (target, x);
22207 }
22208
22209 /* Expand an extract even operation. */
22210
22211 void
22212 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
22213 {
22214 machine_mode vmode = GET_MODE (target);
22215 unsigned i, nelt = GET_MODE_NUNITS (vmode);
22216 vec_perm_builder perm (nelt, nelt, 1);
22217
22218 for (i = 0; i < nelt; i++)
22219 perm.quick_push (i * 2);
22220
22221 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
22222 }
22223
22224 /* Expand a vector interleave operation. */
22225
22226 void
22227 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
22228 {
22229 machine_mode vmode = GET_MODE (target);
22230 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
22231 vec_perm_builder perm (nelt, nelt, 1);
22232
22233 high = (highp ? 0 : nelt / 2);
22234 for (i = 0; i < nelt / 2; i++)
22235 {
22236 perm.quick_push (i + high);
22237 perm.quick_push (i + nelt + high);
22238 }
22239
22240 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
22241 }
22242
22243 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
22244 void
22245 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
22246 {
22247 HOST_WIDE_INT hwi_scale (scale);
22248 REAL_VALUE_TYPE r_pow;
22249 rtvec v = rtvec_alloc (2);
22250 rtx elt;
22251 rtx scale_vec = gen_reg_rtx (V2DFmode);
22252 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
22253 elt = const_double_from_real_value (r_pow, DFmode);
22254 RTVEC_ELT (v, 0) = elt;
22255 RTVEC_ELT (v, 1) = elt;
22256 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
22257 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
22258 }
22259
22260 /* Return an RTX representing where to find the function value of a
22261 function returning MODE. */
22262 static rtx
22263 rs6000_complex_function_value (machine_mode mode)
22264 {
22265 unsigned int regno;
22266 rtx r1, r2;
22267 machine_mode inner = GET_MODE_INNER (mode);
22268 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
22269
22270 if (TARGET_FLOAT128_TYPE
22271 && (mode == KCmode
22272 || (mode == TCmode && TARGET_IEEEQUAD)))
22273 regno = ALTIVEC_ARG_RETURN;
22274
22275 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22276 regno = FP_ARG_RETURN;
22277
22278 else
22279 {
22280 regno = GP_ARG_RETURN;
22281
22282 /* 32-bit is OK since it'll go in r3/r4. */
22283 if (TARGET_32BIT && inner_bytes >= 4)
22284 return gen_rtx_REG (mode, regno);
22285 }
22286
22287 if (inner_bytes >= 8)
22288 return gen_rtx_REG (mode, regno);
22289
22290 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
22291 const0_rtx);
22292 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
22293 GEN_INT (inner_bytes));
22294 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
22295 }
22296
22297 /* Return an rtx describing a return value of MODE as a PARALLEL
22298 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
22299 stride REG_STRIDE. */
22300
22301 static rtx
22302 rs6000_parallel_return (machine_mode mode,
22303 int n_elts, machine_mode elt_mode,
22304 unsigned int regno, unsigned int reg_stride)
22305 {
22306 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
22307
22308 int i;
22309 for (i = 0; i < n_elts; i++)
22310 {
22311 rtx r = gen_rtx_REG (elt_mode, regno);
22312 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
22313 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
22314 regno += reg_stride;
22315 }
22316
22317 return par;
22318 }
22319
22320 /* Target hook for TARGET_FUNCTION_VALUE.
22321
22322 An integer value is in r3 and a floating-point value is in fp1,
22323 unless -msoft-float. */
22324
22325 static rtx
22326 rs6000_function_value (const_tree valtype,
22327 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
22328 bool outgoing ATTRIBUTE_UNUSED)
22329 {
22330 machine_mode mode;
22331 unsigned int regno;
22332 machine_mode elt_mode;
22333 int n_elts;
22334
22335 /* Special handling for structs in darwin64. */
22336 if (TARGET_MACHO
22337 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
22338 {
22339 CUMULATIVE_ARGS valcum;
22340 rtx valret;
22341
22342 valcum.words = 0;
22343 valcum.fregno = FP_ARG_MIN_REG;
22344 valcum.vregno = ALTIVEC_ARG_MIN_REG;
22345 /* Do a trial code generation as if this were going to be passed as
22346 an argument; if any part goes in memory, we return NULL. */
22347 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
22348 if (valret)
22349 return valret;
22350 /* Otherwise fall through to standard ABI rules. */
22351 }
22352
22353 mode = TYPE_MODE (valtype);
22354
22355 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
22356 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
22357 {
22358 int first_reg, n_regs;
22359
22360 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
22361 {
22362 /* _Decimal128 must use even/odd register pairs. */
22363 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22364 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
22365 }
22366 else
22367 {
22368 first_reg = ALTIVEC_ARG_RETURN;
22369 n_regs = 1;
22370 }
22371
22372 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
22373 }
22374
22375 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
22376 if (TARGET_32BIT && TARGET_POWERPC64)
22377 switch (mode)
22378 {
22379 default:
22380 break;
22381 case E_DImode:
22382 case E_SCmode:
22383 case E_DCmode:
22384 case E_TCmode:
22385 int count = GET_MODE_SIZE (mode) / 4;
22386 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
22387 }
22388
22389 if ((INTEGRAL_TYPE_P (valtype)
22390 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
22391 || POINTER_TYPE_P (valtype))
22392 mode = TARGET_32BIT ? SImode : DImode;
22393
22394 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22395 /* _Decimal128 must use an even/odd register pair. */
22396 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22397 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
22398 && !FLOAT128_VECTOR_P (mode))
22399 regno = FP_ARG_RETURN;
22400 else if (TREE_CODE (valtype) == COMPLEX_TYPE
22401 && targetm.calls.split_complex_arg)
22402 return rs6000_complex_function_value (mode);
22403 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
22404 return register is used in both cases, and we won't see V2DImode/V2DFmode
22405 for pure altivec, combine the two cases. */
22406 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
22407 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
22408 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
22409 regno = ALTIVEC_ARG_RETURN;
22410 else
22411 regno = GP_ARG_RETURN;
22412
22413 return gen_rtx_REG (mode, regno);
22414 }
22415
22416 /* Define how to find the value returned by a library function
22417 assuming the value has mode MODE. */
22418 rtx
22419 rs6000_libcall_value (machine_mode mode)
22420 {
22421 unsigned int regno;
22422
22423 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
22424 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
22425 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
22426
22427 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22428 /* _Decimal128 must use an even/odd register pair. */
22429 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22430 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
22431 regno = FP_ARG_RETURN;
22432 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
22433 return register is used in both cases, and we won't see V2DImode/V2DFmode
22434 for pure altivec, combine the two cases. */
22435 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
22436 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
22437 regno = ALTIVEC_ARG_RETURN;
22438 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
22439 return rs6000_complex_function_value (mode);
22440 else
22441 regno = GP_ARG_RETURN;
22442
22443 return gen_rtx_REG (mode, regno);
22444 }
22445
22446 /* Compute register pressure classes. We implement the target hook to avoid
22447 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
22448 lead to incorrect estimates of number of available registers and therefor
22449 increased register pressure/spill. */
22450 static int
22451 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
22452 {
22453 int n;
22454
22455 n = 0;
22456 pressure_classes[n++] = GENERAL_REGS;
22457 if (TARGET_VSX)
22458 pressure_classes[n++] = VSX_REGS;
22459 else
22460 {
22461 if (TARGET_ALTIVEC)
22462 pressure_classes[n++] = ALTIVEC_REGS;
22463 if (TARGET_HARD_FLOAT)
22464 pressure_classes[n++] = FLOAT_REGS;
22465 }
22466 pressure_classes[n++] = CR_REGS;
22467 pressure_classes[n++] = SPECIAL_REGS;
22468
22469 return n;
22470 }
22471
22472 /* Given FROM and TO register numbers, say whether this elimination is allowed.
22473 Frame pointer elimination is automatically handled.
22474
22475 For the RS/6000, if frame pointer elimination is being done, we would like
22476 to convert ap into fp, not sp.
22477
22478 We need r30 if -mminimal-toc was specified, and there are constant pool
22479 references. */
22480
22481 static bool
22482 rs6000_can_eliminate (const int from, const int to)
22483 {
22484 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
22485 ? ! frame_pointer_needed
22486 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
22487 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC_OR_PCREL
22488 || constant_pool_empty_p ()
22489 : true);
22490 }
22491
22492 /* Define the offset between two registers, FROM to be eliminated and its
22493 replacement TO, at the start of a routine. */
22494 HOST_WIDE_INT
22495 rs6000_initial_elimination_offset (int from, int to)
22496 {
22497 rs6000_stack_t *info = rs6000_stack_info ();
22498 HOST_WIDE_INT offset;
22499
22500 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22501 offset = info->push_p ? 0 : -info->total_size;
22502 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22503 {
22504 offset = info->push_p ? 0 : -info->total_size;
22505 if (FRAME_GROWS_DOWNWARD)
22506 offset += info->fixed_size + info->vars_size + info->parm_size;
22507 }
22508 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
22509 offset = FRAME_GROWS_DOWNWARD
22510 ? info->fixed_size + info->vars_size + info->parm_size
22511 : 0;
22512 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
22513 offset = info->total_size;
22514 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22515 offset = info->push_p ? info->total_size : 0;
22516 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
22517 offset = 0;
22518 else
22519 gcc_unreachable ();
22520
22521 return offset;
22522 }
22523
22524 /* Fill in sizes of registers used by unwinder. */
22525
22526 static void
22527 rs6000_init_dwarf_reg_sizes_extra (tree address)
22528 {
22529 if (TARGET_MACHO && ! TARGET_ALTIVEC)
22530 {
22531 int i;
22532 machine_mode mode = TYPE_MODE (char_type_node);
22533 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
22534 rtx mem = gen_rtx_MEM (BLKmode, addr);
22535 rtx value = gen_int_mode (16, mode);
22536
22537 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
22538 The unwinder still needs to know the size of Altivec registers. */
22539
22540 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
22541 {
22542 int column = DWARF_REG_TO_UNWIND_COLUMN
22543 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
22544 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
22545
22546 emit_move_insn (adjust_address (mem, mode, offset), value);
22547 }
22548 }
22549 }
22550
22551 /* Map internal gcc register numbers to debug format register numbers.
22552 FORMAT specifies the type of debug register number to use:
22553 0 -- debug information, except for frame-related sections
22554 1 -- DWARF .debug_frame section
22555 2 -- DWARF .eh_frame section */
22556
22557 unsigned int
22558 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
22559 {
22560 /* On some platforms, we use the standard DWARF register
22561 numbering for .debug_info and .debug_frame. */
22562 if ((format == 0 && write_symbols == DWARF2_DEBUG) || format == 1)
22563 {
22564 #ifdef RS6000_USE_DWARF_NUMBERING
22565 if (regno <= 31)
22566 return regno;
22567 if (FP_REGNO_P (regno))
22568 return regno - FIRST_FPR_REGNO + 32;
22569 if (ALTIVEC_REGNO_P (regno))
22570 return regno - FIRST_ALTIVEC_REGNO + 1124;
22571 if (regno == LR_REGNO)
22572 return 108;
22573 if (regno == CTR_REGNO)
22574 return 109;
22575 if (regno == CA_REGNO)
22576 return 101; /* XER */
22577 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
22578 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
22579 The actual code emitted saves the whole of CR, so we map CR2_REGNO
22580 to the DWARF reg for CR. */
22581 if (format == 1 && regno == CR2_REGNO)
22582 return 64;
22583 if (CR_REGNO_P (regno))
22584 return regno - CR0_REGNO + 86;
22585 if (regno == VRSAVE_REGNO)
22586 return 356;
22587 if (regno == VSCR_REGNO)
22588 return 67;
22589
22590 /* These do not make much sense. */
22591 if (regno == FRAME_POINTER_REGNUM)
22592 return 111;
22593 if (regno == ARG_POINTER_REGNUM)
22594 return 67;
22595 if (regno == 64)
22596 return 100;
22597
22598 gcc_unreachable ();
22599 #endif
22600 }
22601
22602 /* We use the GCC 7 (and before) internal number for non-DWARF debug
22603 information, and also for .eh_frame. */
22604 /* Translate the regnos to their numbers in GCC 7 (and before). */
22605 if (regno <= 31)
22606 return regno;
22607 if (FP_REGNO_P (regno))
22608 return regno - FIRST_FPR_REGNO + 32;
22609 if (ALTIVEC_REGNO_P (regno))
22610 return regno - FIRST_ALTIVEC_REGNO + 77;
22611 if (regno == LR_REGNO)
22612 return 65;
22613 if (regno == CTR_REGNO)
22614 return 66;
22615 if (regno == CA_REGNO)
22616 return 76; /* XER */
22617 if (CR_REGNO_P (regno))
22618 return regno - CR0_REGNO + 68;
22619 if (regno == VRSAVE_REGNO)
22620 return 109;
22621 if (regno == VSCR_REGNO)
22622 return 110;
22623
22624 if (regno == FRAME_POINTER_REGNUM)
22625 return 111;
22626 if (regno == ARG_POINTER_REGNUM)
22627 return 67;
22628 if (regno == 64)
22629 return 64;
22630
22631 gcc_unreachable ();
22632 }
22633
22634 /* target hook eh_return_filter_mode */
22635 static scalar_int_mode
22636 rs6000_eh_return_filter_mode (void)
22637 {
22638 return TARGET_32BIT ? SImode : word_mode;
22639 }
22640
22641 /* Target hook for translate_mode_attribute. */
22642 static machine_mode
22643 rs6000_translate_mode_attribute (machine_mode mode)
22644 {
22645 if ((FLOAT128_IEEE_P (mode)
22646 && ieee128_float_type_node == long_double_type_node)
22647 || (FLOAT128_IBM_P (mode)
22648 && ibm128_float_type_node == long_double_type_node))
22649 return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
22650 return mode;
22651 }
22652
22653 /* Target hook for scalar_mode_supported_p. */
22654 static bool
22655 rs6000_scalar_mode_supported_p (scalar_mode mode)
22656 {
22657 /* -m32 does not support TImode. This is the default, from
22658 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
22659 same ABI as for -m32. But default_scalar_mode_supported_p allows
22660 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
22661 for -mpowerpc64. */
22662 if (TARGET_32BIT && mode == TImode)
22663 return false;
22664
22665 if (DECIMAL_FLOAT_MODE_P (mode))
22666 return default_decimal_float_supported_p ();
22667 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
22668 return true;
22669 else
22670 return default_scalar_mode_supported_p (mode);
22671 }
22672
22673 /* Target hook for vector_mode_supported_p. */
22674 static bool
22675 rs6000_vector_mode_supported_p (machine_mode mode)
22676 {
22677 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
22678 128-bit, the compiler might try to widen IEEE 128-bit to IBM
22679 double-double. */
22680 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
22681 return true;
22682
22683 else
22684 return false;
22685 }
22686
22687 /* Target hook for floatn_mode. */
22688 static opt_scalar_float_mode
22689 rs6000_floatn_mode (int n, bool extended)
22690 {
22691 if (extended)
22692 {
22693 switch (n)
22694 {
22695 case 32:
22696 return DFmode;
22697
22698 case 64:
22699 if (TARGET_FLOAT128_TYPE)
22700 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
22701 else
22702 return opt_scalar_float_mode ();
22703
22704 case 128:
22705 return opt_scalar_float_mode ();
22706
22707 default:
22708 /* Those are the only valid _FloatNx types. */
22709 gcc_unreachable ();
22710 }
22711 }
22712 else
22713 {
22714 switch (n)
22715 {
22716 case 32:
22717 return SFmode;
22718
22719 case 64:
22720 return DFmode;
22721
22722 case 128:
22723 if (TARGET_FLOAT128_TYPE)
22724 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
22725 else
22726 return opt_scalar_float_mode ();
22727
22728 default:
22729 return opt_scalar_float_mode ();
22730 }
22731 }
22732
22733 }
22734
22735 /* Target hook for c_mode_for_suffix. */
22736 static machine_mode
22737 rs6000_c_mode_for_suffix (char suffix)
22738 {
22739 if (TARGET_FLOAT128_TYPE)
22740 {
22741 if (suffix == 'q' || suffix == 'Q')
22742 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
22743
22744 /* At the moment, we are not defining a suffix for IBM extended double.
22745 If/when the default for -mabi=ieeelongdouble is changed, and we want
22746 to support __ibm128 constants in legacy library code, we may need to
22747 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
22748 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
22749 __float80 constants. */
22750 }
22751
22752 return VOIDmode;
22753 }
22754
22755 /* Target hook for invalid_arg_for_unprototyped_fn. */
22756 static const char *
22757 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
22758 {
22759 return (!rs6000_darwin64_abi
22760 && typelist == 0
22761 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
22762 && (funcdecl == NULL_TREE
22763 || (TREE_CODE (funcdecl) == FUNCTION_DECL
22764 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
22765 ? N_("AltiVec argument passed to unprototyped function")
22766 : NULL;
22767 }
22768
22769 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
22770 setup by using __stack_chk_fail_local hidden function instead of
22771 calling __stack_chk_fail directly. Otherwise it is better to call
22772 __stack_chk_fail directly. */
22773
22774 static tree ATTRIBUTE_UNUSED
22775 rs6000_stack_protect_fail (void)
22776 {
22777 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
22778 ? default_hidden_stack_protect_fail ()
22779 : default_external_stack_protect_fail ();
22780 }
22781
22782 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
22783
22784 #if TARGET_ELF
22785 static unsigned HOST_WIDE_INT
22786 rs6000_asan_shadow_offset (void)
22787 {
22788 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
22789 }
22790 #endif
22791 \f
22792 /* Mask options that we want to support inside of attribute((target)) and
22793 #pragma GCC target operations. Note, we do not include things like
22794 64/32-bit, endianness, hard/soft floating point, etc. that would have
22795 different calling sequences. */
22796
22797 struct rs6000_opt_mask {
22798 const char *name; /* option name */
22799 HOST_WIDE_INT mask; /* mask to set */
22800 bool invert; /* invert sense of mask */
22801 bool valid_target; /* option is a target option */
22802 };
22803
22804 static struct rs6000_opt_mask const rs6000_opt_masks[] =
22805 {
22806 { "altivec", OPTION_MASK_ALTIVEC, false, true },
22807 { "cmpb", OPTION_MASK_CMPB, false, true },
22808 { "crypto", OPTION_MASK_CRYPTO, false, true },
22809 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
22810 { "dlmzb", OPTION_MASK_DLMZB, false, true },
22811 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
22812 false, true },
22813 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
22814 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
22815 { "fprnd", OPTION_MASK_FPRND, false, true },
22816 { "future", OPTION_MASK_FUTURE, false, true },
22817 { "hard-dfp", OPTION_MASK_DFP, false, true },
22818 { "htm", OPTION_MASK_HTM, false, true },
22819 { "isel", OPTION_MASK_ISEL, false, true },
22820 { "mfcrf", OPTION_MASK_MFCRF, false, true },
22821 { "mfpgpr", 0, false, true },
22822 { "modulo", OPTION_MASK_MODULO, false, true },
22823 { "mulhw", OPTION_MASK_MULHW, false, true },
22824 { "multiple", OPTION_MASK_MULTIPLE, false, true },
22825 { "pcrel", OPTION_MASK_PCREL, false, true },
22826 { "popcntb", OPTION_MASK_POPCNTB, false, true },
22827 { "popcntd", OPTION_MASK_POPCNTD, false, true },
22828 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
22829 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
22830 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
22831 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
22832 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
22833 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
22834 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
22835 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
22836 { "prefixed-addr", OPTION_MASK_PREFIXED_ADDR, false, true },
22837 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
22838 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
22839 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
22840 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
22841 { "string", 0, false, true },
22842 { "update", OPTION_MASK_NO_UPDATE, true , true },
22843 { "vsx", OPTION_MASK_VSX, false, true },
22844 #ifdef OPTION_MASK_64BIT
22845 #if TARGET_AIX_OS
22846 { "aix64", OPTION_MASK_64BIT, false, false },
22847 { "aix32", OPTION_MASK_64BIT, true, false },
22848 #else
22849 { "64", OPTION_MASK_64BIT, false, false },
22850 { "32", OPTION_MASK_64BIT, true, false },
22851 #endif
22852 #endif
22853 #ifdef OPTION_MASK_EABI
22854 { "eabi", OPTION_MASK_EABI, false, false },
22855 #endif
22856 #ifdef OPTION_MASK_LITTLE_ENDIAN
22857 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
22858 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
22859 #endif
22860 #ifdef OPTION_MASK_RELOCATABLE
22861 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
22862 #endif
22863 #ifdef OPTION_MASK_STRICT_ALIGN
22864 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
22865 #endif
22866 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
22867 { "string", 0, false, false },
22868 };
22869
22870 /* Builtin mask mapping for printing the flags. */
22871 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
22872 {
22873 { "altivec", RS6000_BTM_ALTIVEC, false, false },
22874 { "vsx", RS6000_BTM_VSX, false, false },
22875 { "fre", RS6000_BTM_FRE, false, false },
22876 { "fres", RS6000_BTM_FRES, false, false },
22877 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
22878 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
22879 { "popcntd", RS6000_BTM_POPCNTD, false, false },
22880 { "cell", RS6000_BTM_CELL, false, false },
22881 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
22882 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
22883 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
22884 { "crypto", RS6000_BTM_CRYPTO, false, false },
22885 { "htm", RS6000_BTM_HTM, false, false },
22886 { "hard-dfp", RS6000_BTM_DFP, false, false },
22887 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
22888 { "long-double-128", RS6000_BTM_LDBL128, false, false },
22889 { "powerpc64", RS6000_BTM_POWERPC64, false, false },
22890 { "float128", RS6000_BTM_FLOAT128, false, false },
22891 { "float128-hw", RS6000_BTM_FLOAT128_HW,false, false },
22892 };
22893
22894 /* Option variables that we want to support inside attribute((target)) and
22895 #pragma GCC target operations. */
22896
22897 struct rs6000_opt_var {
22898 const char *name; /* option name */
22899 size_t global_offset; /* offset of the option in global_options. */
22900 size_t target_offset; /* offset of the option in target options. */
22901 };
22902
22903 static struct rs6000_opt_var const rs6000_opt_vars[] =
22904 {
22905 { "friz",
22906 offsetof (struct gcc_options, x_TARGET_FRIZ),
22907 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
22908 { "avoid-indexed-addresses",
22909 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
22910 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
22911 { "longcall",
22912 offsetof (struct gcc_options, x_rs6000_default_long_calls),
22913 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
22914 { "optimize-swaps",
22915 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
22916 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
22917 { "allow-movmisalign",
22918 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
22919 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
22920 { "sched-groups",
22921 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
22922 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
22923 { "always-hint",
22924 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
22925 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
22926 { "align-branch-targets",
22927 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
22928 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
22929 { "sched-prolog",
22930 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
22931 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
22932 { "sched-epilog",
22933 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
22934 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
22935 { "speculate-indirect-jumps",
22936 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
22937 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
22938 };
22939
22940 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
22941 parsing. Return true if there were no errors. */
22942
22943 static bool
22944 rs6000_inner_target_options (tree args, bool attr_p)
22945 {
22946 bool ret = true;
22947
22948 if (args == NULL_TREE)
22949 ;
22950
22951 else if (TREE_CODE (args) == STRING_CST)
22952 {
22953 char *p = ASTRDUP (TREE_STRING_POINTER (args));
22954 char *q;
22955
22956 while ((q = strtok (p, ",")) != NULL)
22957 {
22958 bool error_p = false;
22959 bool not_valid_p = false;
22960 const char *cpu_opt = NULL;
22961
22962 p = NULL;
22963 if (strncmp (q, "cpu=", 4) == 0)
22964 {
22965 int cpu_index = rs6000_cpu_name_lookup (q+4);
22966 if (cpu_index >= 0)
22967 rs6000_cpu_index = cpu_index;
22968 else
22969 {
22970 error_p = true;
22971 cpu_opt = q+4;
22972 }
22973 }
22974 else if (strncmp (q, "tune=", 5) == 0)
22975 {
22976 int tune_index = rs6000_cpu_name_lookup (q+5);
22977 if (tune_index >= 0)
22978 rs6000_tune_index = tune_index;
22979 else
22980 {
22981 error_p = true;
22982 cpu_opt = q+5;
22983 }
22984 }
22985 else
22986 {
22987 size_t i;
22988 bool invert = false;
22989 char *r = q;
22990
22991 error_p = true;
22992 if (strncmp (r, "no-", 3) == 0)
22993 {
22994 invert = true;
22995 r += 3;
22996 }
22997
22998 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
22999 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
23000 {
23001 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
23002
23003 if (!rs6000_opt_masks[i].valid_target)
23004 not_valid_p = true;
23005 else
23006 {
23007 error_p = false;
23008 rs6000_isa_flags_explicit |= mask;
23009
23010 /* VSX needs altivec, so -mvsx automagically sets
23011 altivec and disables -mavoid-indexed-addresses. */
23012 if (!invert)
23013 {
23014 if (mask == OPTION_MASK_VSX)
23015 {
23016 mask |= OPTION_MASK_ALTIVEC;
23017 TARGET_AVOID_XFORM = 0;
23018 }
23019 }
23020
23021 if (rs6000_opt_masks[i].invert)
23022 invert = !invert;
23023
23024 if (invert)
23025 rs6000_isa_flags &= ~mask;
23026 else
23027 rs6000_isa_flags |= mask;
23028 }
23029 break;
23030 }
23031
23032 if (error_p && !not_valid_p)
23033 {
23034 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
23035 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
23036 {
23037 size_t j = rs6000_opt_vars[i].global_offset;
23038 *((int *) ((char *)&global_options + j)) = !invert;
23039 error_p = false;
23040 not_valid_p = false;
23041 break;
23042 }
23043 }
23044 }
23045
23046 if (error_p)
23047 {
23048 const char *eprefix, *esuffix;
23049
23050 ret = false;
23051 if (attr_p)
23052 {
23053 eprefix = "__attribute__((__target__(";
23054 esuffix = ")))";
23055 }
23056 else
23057 {
23058 eprefix = "#pragma GCC target ";
23059 esuffix = "";
23060 }
23061
23062 if (cpu_opt)
23063 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
23064 q, esuffix);
23065 else if (not_valid_p)
23066 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
23067 else
23068 error ("%s%qs%s is invalid", eprefix, q, esuffix);
23069 }
23070 }
23071 }
23072
23073 else if (TREE_CODE (args) == TREE_LIST)
23074 {
23075 do
23076 {
23077 tree value = TREE_VALUE (args);
23078 if (value)
23079 {
23080 bool ret2 = rs6000_inner_target_options (value, attr_p);
23081 if (!ret2)
23082 ret = false;
23083 }
23084 args = TREE_CHAIN (args);
23085 }
23086 while (args != NULL_TREE);
23087 }
23088
23089 else
23090 {
23091 error ("attribute %<target%> argument not a string");
23092 return false;
23093 }
23094
23095 return ret;
23096 }
23097
23098 /* Print out the target options as a list for -mdebug=target. */
23099
23100 static void
23101 rs6000_debug_target_options (tree args, const char *prefix)
23102 {
23103 if (args == NULL_TREE)
23104 fprintf (stderr, "%s<NULL>", prefix);
23105
23106 else if (TREE_CODE (args) == STRING_CST)
23107 {
23108 char *p = ASTRDUP (TREE_STRING_POINTER (args));
23109 char *q;
23110
23111 while ((q = strtok (p, ",")) != NULL)
23112 {
23113 p = NULL;
23114 fprintf (stderr, "%s\"%s\"", prefix, q);
23115 prefix = ", ";
23116 }
23117 }
23118
23119 else if (TREE_CODE (args) == TREE_LIST)
23120 {
23121 do
23122 {
23123 tree value = TREE_VALUE (args);
23124 if (value)
23125 {
23126 rs6000_debug_target_options (value, prefix);
23127 prefix = ", ";
23128 }
23129 args = TREE_CHAIN (args);
23130 }
23131 while (args != NULL_TREE);
23132 }
23133
23134 else
23135 gcc_unreachable ();
23136
23137 return;
23138 }
23139
23140 \f
23141 /* Hook to validate attribute((target("..."))). */
23142
23143 static bool
23144 rs6000_valid_attribute_p (tree fndecl,
23145 tree ARG_UNUSED (name),
23146 tree args,
23147 int flags)
23148 {
23149 struct cl_target_option cur_target;
23150 bool ret;
23151 tree old_optimize;
23152 tree new_target, new_optimize;
23153 tree func_optimize;
23154
23155 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
23156
23157 if (TARGET_DEBUG_TARGET)
23158 {
23159 tree tname = DECL_NAME (fndecl);
23160 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
23161 if (tname)
23162 fprintf (stderr, "function: %.*s\n",
23163 (int) IDENTIFIER_LENGTH (tname),
23164 IDENTIFIER_POINTER (tname));
23165 else
23166 fprintf (stderr, "function: unknown\n");
23167
23168 fprintf (stderr, "args:");
23169 rs6000_debug_target_options (args, " ");
23170 fprintf (stderr, "\n");
23171
23172 if (flags)
23173 fprintf (stderr, "flags: 0x%x\n", flags);
23174
23175 fprintf (stderr, "--------------------\n");
23176 }
23177
23178 /* attribute((target("default"))) does nothing, beyond
23179 affecting multi-versioning. */
23180 if (TREE_VALUE (args)
23181 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
23182 && TREE_CHAIN (args) == NULL_TREE
23183 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
23184 return true;
23185
23186 old_optimize = build_optimization_node (&global_options);
23187 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
23188
23189 /* If the function changed the optimization levels as well as setting target
23190 options, start with the optimizations specified. */
23191 if (func_optimize && func_optimize != old_optimize)
23192 cl_optimization_restore (&global_options,
23193 TREE_OPTIMIZATION (func_optimize));
23194
23195 /* The target attributes may also change some optimization flags, so update
23196 the optimization options if necessary. */
23197 cl_target_option_save (&cur_target, &global_options);
23198 rs6000_cpu_index = rs6000_tune_index = -1;
23199 ret = rs6000_inner_target_options (args, true);
23200
23201 /* Set up any additional state. */
23202 if (ret)
23203 {
23204 ret = rs6000_option_override_internal (false);
23205 new_target = build_target_option_node (&global_options);
23206 }
23207 else
23208 new_target = NULL;
23209
23210 new_optimize = build_optimization_node (&global_options);
23211
23212 if (!new_target)
23213 ret = false;
23214
23215 else if (fndecl)
23216 {
23217 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
23218
23219 if (old_optimize != new_optimize)
23220 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
23221 }
23222
23223 cl_target_option_restore (&global_options, &cur_target);
23224
23225 if (old_optimize != new_optimize)
23226 cl_optimization_restore (&global_options,
23227 TREE_OPTIMIZATION (old_optimize));
23228
23229 return ret;
23230 }
23231
23232 \f
23233 /* Hook to validate the current #pragma GCC target and set the state, and
23234 update the macros based on what was changed. If ARGS is NULL, then
23235 POP_TARGET is used to reset the options. */
23236
23237 bool
23238 rs6000_pragma_target_parse (tree args, tree pop_target)
23239 {
23240 tree prev_tree = build_target_option_node (&global_options);
23241 tree cur_tree;
23242 struct cl_target_option *prev_opt, *cur_opt;
23243 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
23244 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
23245
23246 if (TARGET_DEBUG_TARGET)
23247 {
23248 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
23249 fprintf (stderr, "args:");
23250 rs6000_debug_target_options (args, " ");
23251 fprintf (stderr, "\n");
23252
23253 if (pop_target)
23254 {
23255 fprintf (stderr, "pop_target:\n");
23256 debug_tree (pop_target);
23257 }
23258 else
23259 fprintf (stderr, "pop_target: <NULL>\n");
23260
23261 fprintf (stderr, "--------------------\n");
23262 }
23263
23264 if (! args)
23265 {
23266 cur_tree = ((pop_target)
23267 ? pop_target
23268 : target_option_default_node);
23269 cl_target_option_restore (&global_options,
23270 TREE_TARGET_OPTION (cur_tree));
23271 }
23272 else
23273 {
23274 rs6000_cpu_index = rs6000_tune_index = -1;
23275 if (!rs6000_inner_target_options (args, false)
23276 || !rs6000_option_override_internal (false)
23277 || (cur_tree = build_target_option_node (&global_options))
23278 == NULL_TREE)
23279 {
23280 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
23281 fprintf (stderr, "invalid pragma\n");
23282
23283 return false;
23284 }
23285 }
23286
23287 target_option_current_node = cur_tree;
23288 rs6000_activate_target_options (target_option_current_node);
23289
23290 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
23291 change the macros that are defined. */
23292 if (rs6000_target_modify_macros_ptr)
23293 {
23294 prev_opt = TREE_TARGET_OPTION (prev_tree);
23295 prev_bumask = prev_opt->x_rs6000_builtin_mask;
23296 prev_flags = prev_opt->x_rs6000_isa_flags;
23297
23298 cur_opt = TREE_TARGET_OPTION (cur_tree);
23299 cur_flags = cur_opt->x_rs6000_isa_flags;
23300 cur_bumask = cur_opt->x_rs6000_builtin_mask;
23301
23302 diff_bumask = (prev_bumask ^ cur_bumask);
23303 diff_flags = (prev_flags ^ cur_flags);
23304
23305 if ((diff_flags != 0) || (diff_bumask != 0))
23306 {
23307 /* Delete old macros. */
23308 rs6000_target_modify_macros_ptr (false,
23309 prev_flags & diff_flags,
23310 prev_bumask & diff_bumask);
23311
23312 /* Define new macros. */
23313 rs6000_target_modify_macros_ptr (true,
23314 cur_flags & diff_flags,
23315 cur_bumask & diff_bumask);
23316 }
23317 }
23318
23319 return true;
23320 }
23321
23322 \f
23323 /* Remember the last target of rs6000_set_current_function. */
23324 static GTY(()) tree rs6000_previous_fndecl;
23325
23326 /* Restore target's globals from NEW_TREE and invalidate the
23327 rs6000_previous_fndecl cache. */
23328
23329 void
23330 rs6000_activate_target_options (tree new_tree)
23331 {
23332 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
23333 if (TREE_TARGET_GLOBALS (new_tree))
23334 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
23335 else if (new_tree == target_option_default_node)
23336 restore_target_globals (&default_target_globals);
23337 else
23338 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
23339 rs6000_previous_fndecl = NULL_TREE;
23340 }
23341
23342 /* Establish appropriate back-end context for processing the function
23343 FNDECL. The argument might be NULL to indicate processing at top
23344 level, outside of any function scope. */
23345 static void
23346 rs6000_set_current_function (tree fndecl)
23347 {
23348 if (TARGET_DEBUG_TARGET)
23349 {
23350 fprintf (stderr, "\n==================== rs6000_set_current_function");
23351
23352 if (fndecl)
23353 fprintf (stderr, ", fndecl %s (%p)",
23354 (DECL_NAME (fndecl)
23355 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
23356 : "<unknown>"), (void *)fndecl);
23357
23358 if (rs6000_previous_fndecl)
23359 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
23360
23361 fprintf (stderr, "\n");
23362 }
23363
23364 /* Only change the context if the function changes. This hook is called
23365 several times in the course of compiling a function, and we don't want to
23366 slow things down too much or call target_reinit when it isn't safe. */
23367 if (fndecl == rs6000_previous_fndecl)
23368 return;
23369
23370 tree old_tree;
23371 if (rs6000_previous_fndecl == NULL_TREE)
23372 old_tree = target_option_current_node;
23373 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
23374 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
23375 else
23376 old_tree = target_option_default_node;
23377
23378 tree new_tree;
23379 if (fndecl == NULL_TREE)
23380 {
23381 if (old_tree != target_option_current_node)
23382 new_tree = target_option_current_node;
23383 else
23384 new_tree = NULL_TREE;
23385 }
23386 else
23387 {
23388 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
23389 if (new_tree == NULL_TREE)
23390 new_tree = target_option_default_node;
23391 }
23392
23393 if (TARGET_DEBUG_TARGET)
23394 {
23395 if (new_tree)
23396 {
23397 fprintf (stderr, "\nnew fndecl target specific options:\n");
23398 debug_tree (new_tree);
23399 }
23400
23401 if (old_tree)
23402 {
23403 fprintf (stderr, "\nold fndecl target specific options:\n");
23404 debug_tree (old_tree);
23405 }
23406
23407 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
23408 fprintf (stderr, "--------------------\n");
23409 }
23410
23411 if (new_tree && old_tree != new_tree)
23412 rs6000_activate_target_options (new_tree);
23413
23414 if (fndecl)
23415 rs6000_previous_fndecl = fndecl;
23416 }
23417
23418 \f
23419 /* Save the current options */
23420
23421 static void
23422 rs6000_function_specific_save (struct cl_target_option *ptr,
23423 struct gcc_options *opts)
23424 {
23425 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
23426 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
23427 }
23428
23429 /* Restore the current options */
23430
23431 static void
23432 rs6000_function_specific_restore (struct gcc_options *opts,
23433 struct cl_target_option *ptr)
23434
23435 {
23436 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
23437 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
23438 (void) rs6000_option_override_internal (false);
23439 }
23440
23441 /* Print the current options */
23442
23443 static void
23444 rs6000_function_specific_print (FILE *file, int indent,
23445 struct cl_target_option *ptr)
23446 {
23447 rs6000_print_isa_options (file, indent, "Isa options set",
23448 ptr->x_rs6000_isa_flags);
23449
23450 rs6000_print_isa_options (file, indent, "Isa options explicit",
23451 ptr->x_rs6000_isa_flags_explicit);
23452 }
23453
23454 /* Helper function to print the current isa or misc options on a line. */
23455
23456 static void
23457 rs6000_print_options_internal (FILE *file,
23458 int indent,
23459 const char *string,
23460 HOST_WIDE_INT flags,
23461 const char *prefix,
23462 const struct rs6000_opt_mask *opts,
23463 size_t num_elements)
23464 {
23465 size_t i;
23466 size_t start_column = 0;
23467 size_t cur_column;
23468 size_t max_column = 120;
23469 size_t prefix_len = strlen (prefix);
23470 size_t comma_len = 0;
23471 const char *comma = "";
23472
23473 if (indent)
23474 start_column += fprintf (file, "%*s", indent, "");
23475
23476 if (!flags)
23477 {
23478 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
23479 return;
23480 }
23481
23482 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
23483
23484 /* Print the various mask options. */
23485 cur_column = start_column;
23486 for (i = 0; i < num_elements; i++)
23487 {
23488 bool invert = opts[i].invert;
23489 const char *name = opts[i].name;
23490 const char *no_str = "";
23491 HOST_WIDE_INT mask = opts[i].mask;
23492 size_t len = comma_len + prefix_len + strlen (name);
23493
23494 if (!invert)
23495 {
23496 if ((flags & mask) == 0)
23497 {
23498 no_str = "no-";
23499 len += sizeof ("no-") - 1;
23500 }
23501
23502 flags &= ~mask;
23503 }
23504
23505 else
23506 {
23507 if ((flags & mask) != 0)
23508 {
23509 no_str = "no-";
23510 len += sizeof ("no-") - 1;
23511 }
23512
23513 flags |= mask;
23514 }
23515
23516 cur_column += len;
23517 if (cur_column > max_column)
23518 {
23519 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
23520 cur_column = start_column + len;
23521 comma = "";
23522 }
23523
23524 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
23525 comma = ", ";
23526 comma_len = sizeof (", ") - 1;
23527 }
23528
23529 fputs ("\n", file);
23530 }
23531
23532 /* Helper function to print the current isa options on a line. */
23533
23534 static void
23535 rs6000_print_isa_options (FILE *file, int indent, const char *string,
23536 HOST_WIDE_INT flags)
23537 {
23538 rs6000_print_options_internal (file, indent, string, flags, "-m",
23539 &rs6000_opt_masks[0],
23540 ARRAY_SIZE (rs6000_opt_masks));
23541 }
23542
23543 static void
23544 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
23545 HOST_WIDE_INT flags)
23546 {
23547 rs6000_print_options_internal (file, indent, string, flags, "",
23548 &rs6000_builtin_mask_names[0],
23549 ARRAY_SIZE (rs6000_builtin_mask_names));
23550 }
23551
23552 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
23553 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
23554 -mupper-regs-df, etc.).
23555
23556 If the user used -mno-power8-vector, we need to turn off all of the implicit
23557 ISA 2.07 and 3.0 options that relate to the vector unit.
23558
23559 If the user used -mno-power9-vector, we need to turn off all of the implicit
23560 ISA 3.0 options that relate to the vector unit.
23561
23562 This function does not handle explicit options such as the user specifying
23563 -mdirect-move. These are handled in rs6000_option_override_internal, and
23564 the appropriate error is given if needed.
23565
23566 We return a mask of all of the implicit options that should not be enabled
23567 by default. */
23568
23569 static HOST_WIDE_INT
23570 rs6000_disable_incompatible_switches (void)
23571 {
23572 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
23573 size_t i, j;
23574
23575 static const struct {
23576 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
23577 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
23578 const char *const name; /* name of the switch. */
23579 } flags[] = {
23580 { OPTION_MASK_FUTURE, OTHER_FUTURE_MASKS, "future" },
23581 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
23582 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
23583 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
23584 };
23585
23586 for (i = 0; i < ARRAY_SIZE (flags); i++)
23587 {
23588 HOST_WIDE_INT no_flag = flags[i].no_flag;
23589
23590 if ((rs6000_isa_flags & no_flag) == 0
23591 && (rs6000_isa_flags_explicit & no_flag) != 0)
23592 {
23593 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
23594 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
23595 & rs6000_isa_flags
23596 & dep_flags);
23597
23598 if (set_flags)
23599 {
23600 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
23601 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
23602 {
23603 set_flags &= ~rs6000_opt_masks[j].mask;
23604 error ("%<-mno-%s%> turns off %<-m%s%>",
23605 flags[i].name,
23606 rs6000_opt_masks[j].name);
23607 }
23608
23609 gcc_assert (!set_flags);
23610 }
23611
23612 rs6000_isa_flags &= ~dep_flags;
23613 ignore_masks |= no_flag | dep_flags;
23614 }
23615 }
23616
23617 return ignore_masks;
23618 }
23619
23620 \f
23621 /* Helper function for printing the function name when debugging. */
23622
23623 static const char *
23624 get_decl_name (tree fn)
23625 {
23626 tree name;
23627
23628 if (!fn)
23629 return "<null>";
23630
23631 name = DECL_NAME (fn);
23632 if (!name)
23633 return "<no-name>";
23634
23635 return IDENTIFIER_POINTER (name);
23636 }
23637
23638 /* Return the clone id of the target we are compiling code for in a target
23639 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
23640 the priority list for the target clones (ordered from lowest to
23641 highest). */
23642
23643 static int
23644 rs6000_clone_priority (tree fndecl)
23645 {
23646 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
23647 HOST_WIDE_INT isa_masks;
23648 int ret = CLONE_DEFAULT;
23649 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
23650 const char *attrs_str = NULL;
23651
23652 attrs = TREE_VALUE (TREE_VALUE (attrs));
23653 attrs_str = TREE_STRING_POINTER (attrs);
23654
23655 /* Return priority zero for default function. Return the ISA needed for the
23656 function if it is not the default. */
23657 if (strcmp (attrs_str, "default") != 0)
23658 {
23659 if (fn_opts == NULL_TREE)
23660 fn_opts = target_option_default_node;
23661
23662 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
23663 isa_masks = rs6000_isa_flags;
23664 else
23665 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
23666
23667 for (ret = CLONE_MAX - 1; ret != 0; ret--)
23668 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
23669 break;
23670 }
23671
23672 if (TARGET_DEBUG_TARGET)
23673 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
23674 get_decl_name (fndecl), ret);
23675
23676 return ret;
23677 }
23678
23679 /* This compares the priority of target features in function DECL1 and DECL2.
23680 It returns positive value if DECL1 is higher priority, negative value if
23681 DECL2 is higher priority and 0 if they are the same. Note, priorities are
23682 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
23683
23684 static int
23685 rs6000_compare_version_priority (tree decl1, tree decl2)
23686 {
23687 int priority1 = rs6000_clone_priority (decl1);
23688 int priority2 = rs6000_clone_priority (decl2);
23689 int ret = priority1 - priority2;
23690
23691 if (TARGET_DEBUG_TARGET)
23692 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
23693 get_decl_name (decl1), get_decl_name (decl2), ret);
23694
23695 return ret;
23696 }
23697
23698 /* Make a dispatcher declaration for the multi-versioned function DECL.
23699 Calls to DECL function will be replaced with calls to the dispatcher
23700 by the front-end. Returns the decl of the dispatcher function. */
23701
23702 static tree
23703 rs6000_get_function_versions_dispatcher (void *decl)
23704 {
23705 tree fn = (tree) decl;
23706 struct cgraph_node *node = NULL;
23707 struct cgraph_node *default_node = NULL;
23708 struct cgraph_function_version_info *node_v = NULL;
23709 struct cgraph_function_version_info *first_v = NULL;
23710
23711 tree dispatch_decl = NULL;
23712
23713 struct cgraph_function_version_info *default_version_info = NULL;
23714 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
23715
23716 if (TARGET_DEBUG_TARGET)
23717 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
23718 get_decl_name (fn));
23719
23720 node = cgraph_node::get (fn);
23721 gcc_assert (node != NULL);
23722
23723 node_v = node->function_version ();
23724 gcc_assert (node_v != NULL);
23725
23726 if (node_v->dispatcher_resolver != NULL)
23727 return node_v->dispatcher_resolver;
23728
23729 /* Find the default version and make it the first node. */
23730 first_v = node_v;
23731 /* Go to the beginning of the chain. */
23732 while (first_v->prev != NULL)
23733 first_v = first_v->prev;
23734
23735 default_version_info = first_v;
23736 while (default_version_info != NULL)
23737 {
23738 const tree decl2 = default_version_info->this_node->decl;
23739 if (is_function_default_version (decl2))
23740 break;
23741 default_version_info = default_version_info->next;
23742 }
23743
23744 /* If there is no default node, just return NULL. */
23745 if (default_version_info == NULL)
23746 return NULL;
23747
23748 /* Make default info the first node. */
23749 if (first_v != default_version_info)
23750 {
23751 default_version_info->prev->next = default_version_info->next;
23752 if (default_version_info->next)
23753 default_version_info->next->prev = default_version_info->prev;
23754 first_v->prev = default_version_info;
23755 default_version_info->next = first_v;
23756 default_version_info->prev = NULL;
23757 }
23758
23759 default_node = default_version_info->this_node;
23760
23761 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
23762 error_at (DECL_SOURCE_LOCATION (default_node->decl),
23763 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
23764 "exports hardware capability bits");
23765 #else
23766
23767 if (targetm.has_ifunc_p ())
23768 {
23769 struct cgraph_function_version_info *it_v = NULL;
23770 struct cgraph_node *dispatcher_node = NULL;
23771 struct cgraph_function_version_info *dispatcher_version_info = NULL;
23772
23773 /* Right now, the dispatching is done via ifunc. */
23774 dispatch_decl = make_dispatcher_decl (default_node->decl);
23775
23776 dispatcher_node = cgraph_node::get_create (dispatch_decl);
23777 gcc_assert (dispatcher_node != NULL);
23778 dispatcher_node->dispatcher_function = 1;
23779 dispatcher_version_info
23780 = dispatcher_node->insert_new_function_version ();
23781 dispatcher_version_info->next = default_version_info;
23782 dispatcher_node->definition = 1;
23783
23784 /* Set the dispatcher for all the versions. */
23785 it_v = default_version_info;
23786 while (it_v != NULL)
23787 {
23788 it_v->dispatcher_resolver = dispatch_decl;
23789 it_v = it_v->next;
23790 }
23791 }
23792 else
23793 {
23794 error_at (DECL_SOURCE_LOCATION (default_node->decl),
23795 "multiversioning needs ifunc which is not supported "
23796 "on this target");
23797 }
23798 #endif
23799
23800 return dispatch_decl;
23801 }
23802
23803 /* Make the resolver function decl to dispatch the versions of a multi-
23804 versioned function, DEFAULT_DECL. Create an empty basic block in the
23805 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
23806 function. */
23807
23808 static tree
23809 make_resolver_func (const tree default_decl,
23810 const tree dispatch_decl,
23811 basic_block *empty_bb)
23812 {
23813 /* Make the resolver function static. The resolver function returns
23814 void *. */
23815 tree decl_name = clone_function_name (default_decl, "resolver");
23816 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
23817 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
23818 tree decl = build_fn_decl (resolver_name, type);
23819 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
23820
23821 DECL_NAME (decl) = decl_name;
23822 TREE_USED (decl) = 1;
23823 DECL_ARTIFICIAL (decl) = 1;
23824 DECL_IGNORED_P (decl) = 0;
23825 TREE_PUBLIC (decl) = 0;
23826 DECL_UNINLINABLE (decl) = 1;
23827
23828 /* Resolver is not external, body is generated. */
23829 DECL_EXTERNAL (decl) = 0;
23830 DECL_EXTERNAL (dispatch_decl) = 0;
23831
23832 DECL_CONTEXT (decl) = NULL_TREE;
23833 DECL_INITIAL (decl) = make_node (BLOCK);
23834 DECL_STATIC_CONSTRUCTOR (decl) = 0;
23835
23836 /* Build result decl and add to function_decl. */
23837 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
23838 DECL_CONTEXT (t) = decl;
23839 DECL_ARTIFICIAL (t) = 1;
23840 DECL_IGNORED_P (t) = 1;
23841 DECL_RESULT (decl) = t;
23842
23843 gimplify_function_tree (decl);
23844 push_cfun (DECL_STRUCT_FUNCTION (decl));
23845 *empty_bb = init_lowered_empty_function (decl, false,
23846 profile_count::uninitialized ());
23847
23848 cgraph_node::add_new_function (decl, true);
23849 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
23850
23851 pop_cfun ();
23852
23853 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
23854 DECL_ATTRIBUTES (dispatch_decl)
23855 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
23856
23857 cgraph_node::create_same_body_alias (dispatch_decl, decl);
23858
23859 return decl;
23860 }
23861
23862 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
23863 return a pointer to VERSION_DECL if we are running on a machine that
23864 supports the index CLONE_ISA hardware architecture bits. This function will
23865 be called during version dispatch to decide which function version to
23866 execute. It returns the basic block at the end, to which more conditions
23867 can be added. */
23868
23869 static basic_block
23870 add_condition_to_bb (tree function_decl, tree version_decl,
23871 int clone_isa, basic_block new_bb)
23872 {
23873 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
23874
23875 gcc_assert (new_bb != NULL);
23876 gimple_seq gseq = bb_seq (new_bb);
23877
23878
23879 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
23880 build_fold_addr_expr (version_decl));
23881 tree result_var = create_tmp_var (ptr_type_node);
23882 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
23883 gimple *return_stmt = gimple_build_return (result_var);
23884
23885 if (clone_isa == CLONE_DEFAULT)
23886 {
23887 gimple_seq_add_stmt (&gseq, convert_stmt);
23888 gimple_seq_add_stmt (&gseq, return_stmt);
23889 set_bb_seq (new_bb, gseq);
23890 gimple_set_bb (convert_stmt, new_bb);
23891 gimple_set_bb (return_stmt, new_bb);
23892 pop_cfun ();
23893 return new_bb;
23894 }
23895
23896 tree bool_zero = build_int_cst (bool_int_type_node, 0);
23897 tree cond_var = create_tmp_var (bool_int_type_node);
23898 tree predicate_decl = rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS];
23899 const char *arg_str = rs6000_clone_map[clone_isa].name;
23900 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
23901 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
23902 gimple_call_set_lhs (call_cond_stmt, cond_var);
23903
23904 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
23905 gimple_set_bb (call_cond_stmt, new_bb);
23906 gimple_seq_add_stmt (&gseq, call_cond_stmt);
23907
23908 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
23909 NULL_TREE, NULL_TREE);
23910 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
23911 gimple_set_bb (if_else_stmt, new_bb);
23912 gimple_seq_add_stmt (&gseq, if_else_stmt);
23913
23914 gimple_seq_add_stmt (&gseq, convert_stmt);
23915 gimple_seq_add_stmt (&gseq, return_stmt);
23916 set_bb_seq (new_bb, gseq);
23917
23918 basic_block bb1 = new_bb;
23919 edge e12 = split_block (bb1, if_else_stmt);
23920 basic_block bb2 = e12->dest;
23921 e12->flags &= ~EDGE_FALLTHRU;
23922 e12->flags |= EDGE_TRUE_VALUE;
23923
23924 edge e23 = split_block (bb2, return_stmt);
23925 gimple_set_bb (convert_stmt, bb2);
23926 gimple_set_bb (return_stmt, bb2);
23927
23928 basic_block bb3 = e23->dest;
23929 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
23930
23931 remove_edge (e23);
23932 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
23933
23934 pop_cfun ();
23935 return bb3;
23936 }
23937
23938 /* This function generates the dispatch function for multi-versioned functions.
23939 DISPATCH_DECL is the function which will contain the dispatch logic.
23940 FNDECLS are the function choices for dispatch, and is a tree chain.
23941 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
23942 code is generated. */
23943
23944 static int
23945 dispatch_function_versions (tree dispatch_decl,
23946 void *fndecls_p,
23947 basic_block *empty_bb)
23948 {
23949 int ix;
23950 tree ele;
23951 vec<tree> *fndecls;
23952 tree clones[CLONE_MAX];
23953
23954 if (TARGET_DEBUG_TARGET)
23955 fputs ("dispatch_function_versions, top\n", stderr);
23956
23957 gcc_assert (dispatch_decl != NULL
23958 && fndecls_p != NULL
23959 && empty_bb != NULL);
23960
23961 /* fndecls_p is actually a vector. */
23962 fndecls = static_cast<vec<tree> *> (fndecls_p);
23963
23964 /* At least one more version other than the default. */
23965 gcc_assert (fndecls->length () >= 2);
23966
23967 /* The first version in the vector is the default decl. */
23968 memset ((void *) clones, '\0', sizeof (clones));
23969 clones[CLONE_DEFAULT] = (*fndecls)[0];
23970
23971 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
23972 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
23973 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
23974 recent glibc. If we ever need to call __builtin_cpu_init, we would need
23975 to insert the code here to do the call. */
23976
23977 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
23978 {
23979 int priority = rs6000_clone_priority (ele);
23980 if (!clones[priority])
23981 clones[priority] = ele;
23982 }
23983
23984 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
23985 if (clones[ix])
23986 {
23987 if (TARGET_DEBUG_TARGET)
23988 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
23989 ix, get_decl_name (clones[ix]));
23990
23991 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
23992 *empty_bb);
23993 }
23994
23995 return 0;
23996 }
23997
23998 /* Generate the dispatching code body to dispatch multi-versioned function
23999 DECL. The target hook is called to process the "target" attributes and
24000 provide the code to dispatch the right function at run-time. NODE points
24001 to the dispatcher decl whose body will be created. */
24002
24003 static tree
24004 rs6000_generate_version_dispatcher_body (void *node_p)
24005 {
24006 tree resolver;
24007 basic_block empty_bb;
24008 struct cgraph_node *node = (cgraph_node *) node_p;
24009 struct cgraph_function_version_info *ninfo = node->function_version ();
24010
24011 if (ninfo->dispatcher_resolver)
24012 return ninfo->dispatcher_resolver;
24013
24014 /* node is going to be an alias, so remove the finalized bit. */
24015 node->definition = false;
24016
24017 /* The first version in the chain corresponds to the default version. */
24018 ninfo->dispatcher_resolver = resolver
24019 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
24020
24021 if (TARGET_DEBUG_TARGET)
24022 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
24023 get_decl_name (resolver));
24024
24025 push_cfun (DECL_STRUCT_FUNCTION (resolver));
24026 auto_vec<tree, 2> fn_ver_vec;
24027
24028 for (struct cgraph_function_version_info *vinfo = ninfo->next;
24029 vinfo;
24030 vinfo = vinfo->next)
24031 {
24032 struct cgraph_node *version = vinfo->this_node;
24033 /* Check for virtual functions here again, as by this time it should
24034 have been determined if this function needs a vtable index or
24035 not. This happens for methods in derived classes that override
24036 virtual methods in base classes but are not explicitly marked as
24037 virtual. */
24038 if (DECL_VINDEX (version->decl))
24039 sorry ("Virtual function multiversioning not supported");
24040
24041 fn_ver_vec.safe_push (version->decl);
24042 }
24043
24044 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
24045 cgraph_edge::rebuild_edges ();
24046 pop_cfun ();
24047 return resolver;
24048 }
24049
24050 \f
24051 /* Hook to determine if one function can safely inline another. */
24052
24053 static bool
24054 rs6000_can_inline_p (tree caller, tree callee)
24055 {
24056 bool ret = false;
24057 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
24058 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
24059
24060 /* If the callee has no option attributes, then it is ok to inline. */
24061 if (!callee_tree)
24062 ret = true;
24063
24064 else
24065 {
24066 HOST_WIDE_INT caller_isa;
24067 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24068 HOST_WIDE_INT callee_isa = callee_opts->x_rs6000_isa_flags;
24069 HOST_WIDE_INT explicit_isa = callee_opts->x_rs6000_isa_flags_explicit;
24070
24071 /* If the caller has option attributes, then use them.
24072 Otherwise, use the command line options. */
24073 if (caller_tree)
24074 caller_isa = TREE_TARGET_OPTION (caller_tree)->x_rs6000_isa_flags;
24075 else
24076 caller_isa = rs6000_isa_flags;
24077
24078 /* The callee's options must be a subset of the caller's options, i.e.
24079 a vsx function may inline an altivec function, but a no-vsx function
24080 must not inline a vsx function. However, for those options that the
24081 callee has explicitly enabled or disabled, then we must enforce that
24082 the callee's and caller's options match exactly; see PR70010. */
24083 if (((caller_isa & callee_isa) == callee_isa)
24084 && (caller_isa & explicit_isa) == (callee_isa & explicit_isa))
24085 ret = true;
24086 }
24087
24088 if (TARGET_DEBUG_TARGET)
24089 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
24090 get_decl_name (caller), get_decl_name (callee),
24091 (ret ? "can" : "cannot"));
24092
24093 return ret;
24094 }
24095 \f
24096 /* Allocate a stack temp and fixup the address so it meets the particular
24097 memory requirements (either offetable or REG+REG addressing). */
24098
24099 rtx
24100 rs6000_allocate_stack_temp (machine_mode mode,
24101 bool offsettable_p,
24102 bool reg_reg_p)
24103 {
24104 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
24105 rtx addr = XEXP (stack, 0);
24106 int strict_p = reload_completed;
24107
24108 if (!legitimate_indirect_address_p (addr, strict_p))
24109 {
24110 if (offsettable_p
24111 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
24112 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
24113
24114 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
24115 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
24116 }
24117
24118 return stack;
24119 }
24120
24121 /* Given a memory reference, if it is not a reg or reg+reg addressing,
24122 convert to such a form to deal with memory reference instructions
24123 like STFIWX and LDBRX that only take reg+reg addressing. */
24124
24125 rtx
24126 rs6000_force_indexed_or_indirect_mem (rtx x)
24127 {
24128 machine_mode mode = GET_MODE (x);
24129
24130 gcc_assert (MEM_P (x));
24131 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode))
24132 {
24133 rtx addr = XEXP (x, 0);
24134 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
24135 {
24136 rtx reg = XEXP (addr, 0);
24137 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
24138 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
24139 gcc_assert (REG_P (reg));
24140 emit_insn (gen_add3_insn (reg, reg, size_rtx));
24141 addr = reg;
24142 }
24143 else if (GET_CODE (addr) == PRE_MODIFY)
24144 {
24145 rtx reg = XEXP (addr, 0);
24146 rtx expr = XEXP (addr, 1);
24147 gcc_assert (REG_P (reg));
24148 gcc_assert (GET_CODE (expr) == PLUS);
24149 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
24150 addr = reg;
24151 }
24152
24153 if (GET_CODE (addr) == PLUS)
24154 {
24155 rtx op0 = XEXP (addr, 0);
24156 rtx op1 = XEXP (addr, 1);
24157 op0 = force_reg (Pmode, op0);
24158 op1 = force_reg (Pmode, op1);
24159 x = replace_equiv_address (x, gen_rtx_PLUS (Pmode, op0, op1));
24160 }
24161 else
24162 x = replace_equiv_address (x, force_reg (Pmode, addr));
24163 }
24164
24165 return x;
24166 }
24167
24168 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
24169
24170 On the RS/6000, all integer constants are acceptable, most won't be valid
24171 for particular insns, though. Only easy FP constants are acceptable. */
24172
24173 static bool
24174 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
24175 {
24176 if (TARGET_ELF && tls_referenced_p (x))
24177 return false;
24178
24179 if (CONST_DOUBLE_P (x))
24180 return easy_fp_constant (x, mode);
24181
24182 if (GET_CODE (x) == CONST_VECTOR)
24183 return easy_vector_constant (x, mode);
24184
24185 return true;
24186 }
24187
24188 \f
24189 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
24190
24191 static bool
24192 chain_already_loaded (rtx_insn *last)
24193 {
24194 for (; last != NULL; last = PREV_INSN (last))
24195 {
24196 if (NONJUMP_INSN_P (last))
24197 {
24198 rtx patt = PATTERN (last);
24199
24200 if (GET_CODE (patt) == SET)
24201 {
24202 rtx lhs = XEXP (patt, 0);
24203
24204 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
24205 return true;
24206 }
24207 }
24208 }
24209 return false;
24210 }
24211
24212 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
24213
24214 void
24215 rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24216 {
24217 rtx func = func_desc;
24218 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
24219 rtx toc_load = NULL_RTX;
24220 rtx toc_restore = NULL_RTX;
24221 rtx func_addr;
24222 rtx abi_reg = NULL_RTX;
24223 rtx call[4];
24224 int n_call;
24225 rtx insn;
24226 bool is_pltseq_longcall;
24227
24228 if (global_tlsarg)
24229 tlsarg = global_tlsarg;
24230
24231 /* Handle longcall attributes. */
24232 is_pltseq_longcall = false;
24233 if ((INTVAL (cookie) & CALL_LONG) != 0
24234 && GET_CODE (func_desc) == SYMBOL_REF)
24235 {
24236 func = rs6000_longcall_ref (func_desc, tlsarg);
24237 if (TARGET_PLTSEQ)
24238 is_pltseq_longcall = true;
24239 }
24240
24241 /* Handle indirect calls. */
24242 if (!SYMBOL_REF_P (func)
24243 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
24244 {
24245 if (!rs6000_pcrel_p (cfun))
24246 {
24247 /* Save the TOC into its reserved slot before the call,
24248 and prepare to restore it after the call. */
24249 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
24250 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
24251 gen_rtvec (1, stack_toc_offset),
24252 UNSPEC_TOCSLOT);
24253 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
24254
24255 /* Can we optimize saving the TOC in the prologue or
24256 do we need to do it at every call? */
24257 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
24258 cfun->machine->save_toc_in_prologue = true;
24259 else
24260 {
24261 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
24262 rtx stack_toc_mem = gen_frame_mem (Pmode,
24263 gen_rtx_PLUS (Pmode, stack_ptr,
24264 stack_toc_offset));
24265 MEM_VOLATILE_P (stack_toc_mem) = 1;
24266 if (is_pltseq_longcall)
24267 {
24268 rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg);
24269 rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24270 emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg));
24271 }
24272 else
24273 emit_move_insn (stack_toc_mem, toc_reg);
24274 }
24275 }
24276
24277 if (DEFAULT_ABI == ABI_ELFv2)
24278 {
24279 /* A function pointer in the ELFv2 ABI is just a plain address, but
24280 the ABI requires it to be loaded into r12 before the call. */
24281 func_addr = gen_rtx_REG (Pmode, 12);
24282 if (!rtx_equal_p (func_addr, func))
24283 emit_move_insn (func_addr, func);
24284 abi_reg = func_addr;
24285 /* Indirect calls via CTR are strongly preferred over indirect
24286 calls via LR, so move the address there. Needed to mark
24287 this insn for linker plt sequence editing too. */
24288 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24289 if (is_pltseq_longcall)
24290 {
24291 rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg);
24292 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24293 emit_insn (gen_rtx_SET (func_addr, mark_func));
24294 v = gen_rtvec (2, func_addr, func_desc);
24295 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24296 }
24297 else
24298 emit_move_insn (func_addr, abi_reg);
24299 }
24300 else
24301 {
24302 /* A function pointer under AIX is a pointer to a data area whose
24303 first word contains the actual address of the function, whose
24304 second word contains a pointer to its TOC, and whose third word
24305 contains a value to place in the static chain register (r11).
24306 Note that if we load the static chain, our "trampoline" need
24307 not have any executable code. */
24308
24309 /* Load up address of the actual function. */
24310 func = force_reg (Pmode, func);
24311 func_addr = gen_reg_rtx (Pmode);
24312 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func));
24313
24314 /* Indirect calls via CTR are strongly preferred over indirect
24315 calls via LR, so move the address there. */
24316 rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO);
24317 emit_move_insn (ctr_reg, func_addr);
24318 func_addr = ctr_reg;
24319
24320 /* Prepare to load the TOC of the called function. Note that the
24321 TOC load must happen immediately before the actual call so
24322 that unwinding the TOC registers works correctly. See the
24323 comment in frob_update_context. */
24324 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
24325 rtx func_toc_mem = gen_rtx_MEM (Pmode,
24326 gen_rtx_PLUS (Pmode, func,
24327 func_toc_offset));
24328 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
24329
24330 /* If we have a static chain, load it up. But, if the call was
24331 originally direct, the 3rd word has not been written since no
24332 trampoline has been built, so we ought not to load it, lest we
24333 override a static chain value. */
24334 if (!(GET_CODE (func_desc) == SYMBOL_REF
24335 && SYMBOL_REF_FUNCTION_P (func_desc))
24336 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
24337 && !chain_already_loaded (get_current_sequence ()->next->last))
24338 {
24339 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
24340 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
24341 rtx func_sc_mem = gen_rtx_MEM (Pmode,
24342 gen_rtx_PLUS (Pmode, func,
24343 func_sc_offset));
24344 emit_move_insn (sc_reg, func_sc_mem);
24345 abi_reg = sc_reg;
24346 }
24347 }
24348 }
24349 else
24350 {
24351 /* No TOC register needed for calls from PC-relative callers. */
24352 if (!rs6000_pcrel_p (cfun))
24353 /* Direct calls use the TOC: for local calls, the callee will
24354 assume the TOC register is set; for non-local calls, the
24355 PLT stub needs the TOC register. */
24356 abi_reg = toc_reg;
24357 func_addr = func;
24358 }
24359
24360 /* Create the call. */
24361 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24362 if (value != NULL_RTX)
24363 call[0] = gen_rtx_SET (value, call[0]);
24364 n_call = 1;
24365
24366 if (toc_load)
24367 call[n_call++] = toc_load;
24368 if (toc_restore)
24369 call[n_call++] = toc_restore;
24370
24371 call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
24372
24373 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
24374 insn = emit_call_insn (insn);
24375
24376 /* Mention all registers defined by the ABI to hold information
24377 as uses in CALL_INSN_FUNCTION_USAGE. */
24378 if (abi_reg)
24379 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24380 }
24381
24382 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
24383
24384 void
24385 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24386 {
24387 rtx call[2];
24388 rtx insn;
24389
24390 gcc_assert (INTVAL (cookie) == 0);
24391
24392 if (global_tlsarg)
24393 tlsarg = global_tlsarg;
24394
24395 /* Create the call. */
24396 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), tlsarg);
24397 if (value != NULL_RTX)
24398 call[0] = gen_rtx_SET (value, call[0]);
24399
24400 call[1] = simple_return_rtx;
24401
24402 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
24403 insn = emit_call_insn (insn);
24404
24405 /* Note use of the TOC register. */
24406 if (!rs6000_pcrel_p (cfun))
24407 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
24408 gen_rtx_REG (Pmode, TOC_REGNUM));
24409 }
24410
24411 /* Expand code to perform a call under the SYSV4 ABI. */
24412
24413 void
24414 rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24415 {
24416 rtx func = func_desc;
24417 rtx func_addr;
24418 rtx call[4];
24419 rtx insn;
24420 rtx abi_reg = NULL_RTX;
24421 int n;
24422
24423 if (global_tlsarg)
24424 tlsarg = global_tlsarg;
24425
24426 /* Handle longcall attributes. */
24427 if ((INTVAL (cookie) & CALL_LONG) != 0
24428 && GET_CODE (func_desc) == SYMBOL_REF)
24429 {
24430 func = rs6000_longcall_ref (func_desc, tlsarg);
24431 /* If the longcall was implemented as an inline PLT call using
24432 PLT unspecs then func will be REG:r11. If not, func will be
24433 a pseudo reg. The inline PLT call sequence supports lazy
24434 linking (and longcalls to functions in dlopen'd libraries).
24435 The other style of longcalls don't. The lazy linking entry
24436 to the dynamic symbol resolver requires r11 be the function
24437 address (as it is for linker generated PLT stubs). Ensure
24438 r11 stays valid to the bctrl by marking r11 used by the call. */
24439 if (TARGET_PLTSEQ)
24440 abi_reg = func;
24441 }
24442
24443 /* Handle indirect calls. */
24444 if (GET_CODE (func) != SYMBOL_REF)
24445 {
24446 func = force_reg (Pmode, func);
24447
24448 /* Indirect calls via CTR are strongly preferred over indirect
24449 calls via LR, so move the address there. That can't be left
24450 to reload because we want to mark every instruction in an
24451 inline PLT call sequence with a reloc, enabling the linker to
24452 edit the sequence back to a direct call when that makes sense. */
24453 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24454 if (abi_reg)
24455 {
24456 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
24457 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24458 emit_insn (gen_rtx_SET (func_addr, mark_func));
24459 v = gen_rtvec (2, func_addr, func_desc);
24460 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24461 }
24462 else
24463 emit_move_insn (func_addr, func);
24464 }
24465 else
24466 func_addr = func;
24467
24468 /* Create the call. */
24469 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24470 if (value != NULL_RTX)
24471 call[0] = gen_rtx_SET (value, call[0]);
24472
24473 call[1] = gen_rtx_USE (VOIDmode, cookie);
24474 n = 2;
24475 if (TARGET_SECURE_PLT
24476 && flag_pic
24477 && GET_CODE (func_addr) == SYMBOL_REF
24478 && !SYMBOL_REF_LOCAL_P (func_addr))
24479 call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx);
24480
24481 call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
24482
24483 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call));
24484 insn = emit_call_insn (insn);
24485 if (abi_reg)
24486 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24487 }
24488
24489 /* Expand code to perform a sibling call under the SysV4 ABI. */
24490
24491 void
24492 rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24493 {
24494 rtx func = func_desc;
24495 rtx func_addr;
24496 rtx call[3];
24497 rtx insn;
24498 rtx abi_reg = NULL_RTX;
24499
24500 if (global_tlsarg)
24501 tlsarg = global_tlsarg;
24502
24503 /* Handle longcall attributes. */
24504 if ((INTVAL (cookie) & CALL_LONG) != 0
24505 && GET_CODE (func_desc) == SYMBOL_REF)
24506 {
24507 func = rs6000_longcall_ref (func_desc, tlsarg);
24508 /* If the longcall was implemented as an inline PLT call using
24509 PLT unspecs then func will be REG:r11. If not, func will be
24510 a pseudo reg. The inline PLT call sequence supports lazy
24511 linking (and longcalls to functions in dlopen'd libraries).
24512 The other style of longcalls don't. The lazy linking entry
24513 to the dynamic symbol resolver requires r11 be the function
24514 address (as it is for linker generated PLT stubs). Ensure
24515 r11 stays valid to the bctr by marking r11 used by the call. */
24516 if (TARGET_PLTSEQ)
24517 abi_reg = func;
24518 }
24519
24520 /* Handle indirect calls. */
24521 if (GET_CODE (func) != SYMBOL_REF)
24522 {
24523 func = force_reg (Pmode, func);
24524
24525 /* Indirect sibcalls must go via CTR. That can't be left to
24526 reload because we want to mark every instruction in an inline
24527 PLT call sequence with a reloc, enabling the linker to edit
24528 the sequence back to a direct call when that makes sense. */
24529 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24530 if (abi_reg)
24531 {
24532 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
24533 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24534 emit_insn (gen_rtx_SET (func_addr, mark_func));
24535 v = gen_rtvec (2, func_addr, func_desc);
24536 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24537 }
24538 else
24539 emit_move_insn (func_addr, func);
24540 }
24541 else
24542 func_addr = func;
24543
24544 /* Create the call. */
24545 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24546 if (value != NULL_RTX)
24547 call[0] = gen_rtx_SET (value, call[0]);
24548
24549 call[1] = gen_rtx_USE (VOIDmode, cookie);
24550 call[2] = simple_return_rtx;
24551
24552 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
24553 insn = emit_call_insn (insn);
24554 if (abi_reg)
24555 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24556 }
24557
24558 #if TARGET_MACHO
24559
24560 /* Expand code to perform a call under the Darwin ABI.
24561 Modulo handling of mlongcall, this is much the same as sysv.
24562 if/when the longcall optimisation is removed, we could drop this
24563 code and use the sysv case (taking care to avoid the tls stuff).
24564
24565 We can use this for sibcalls too, if needed. */
24566
24567 void
24568 rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg,
24569 rtx cookie, bool sibcall)
24570 {
24571 rtx func = func_desc;
24572 rtx func_addr;
24573 rtx call[3];
24574 rtx insn;
24575 int cookie_val = INTVAL (cookie);
24576 bool make_island = false;
24577
24578 /* Handle longcall attributes, there are two cases for Darwin:
24579 1) Newer linkers are capable of synthesising any branch islands needed.
24580 2) We need a helper branch island synthesised by the compiler.
24581 The second case has mostly been retired and we don't use it for m64.
24582 In fact, it's is an optimisation, we could just indirect as sysv does..
24583 ... however, backwards compatibility for now.
24584 If we're going to use this, then we need to keep the CALL_LONG bit set,
24585 so that we can pick up the special insn form later. */
24586 if ((cookie_val & CALL_LONG) != 0
24587 && GET_CODE (func_desc) == SYMBOL_REF)
24588 {
24589 /* FIXME: the longcall opt should not hang off this flag, it is most
24590 likely incorrect for kernel-mode code-generation. */
24591 if (darwin_symbol_stubs && TARGET_32BIT)
24592 make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */
24593 else
24594 {
24595 /* The linker is capable of doing this, but the user explicitly
24596 asked for -mlongcall, so we'll do the 'normal' version. */
24597 func = rs6000_longcall_ref (func_desc, NULL_RTX);
24598 cookie_val &= ~CALL_LONG; /* Handled, zap it. */
24599 }
24600 }
24601
24602 /* Handle indirect calls. */
24603 if (GET_CODE (func) != SYMBOL_REF)
24604 {
24605 func = force_reg (Pmode, func);
24606
24607 /* Indirect calls via CTR are strongly preferred over indirect
24608 calls via LR, and are required for indirect sibcalls, so move
24609 the address there. */
24610 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24611 emit_move_insn (func_addr, func);
24612 }
24613 else
24614 func_addr = func;
24615
24616 /* Create the call. */
24617 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24618 if (value != NULL_RTX)
24619 call[0] = gen_rtx_SET (value, call[0]);
24620
24621 call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val));
24622
24623 if (sibcall)
24624 call[2] = simple_return_rtx;
24625 else
24626 call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO);
24627
24628 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
24629 insn = emit_call_insn (insn);
24630 /* Now we have the debug info in the insn, we can set up the branch island
24631 if we're using one. */
24632 if (make_island)
24633 {
24634 tree funname = get_identifier (XSTR (func_desc, 0));
24635
24636 if (no_previous_def (funname))
24637 {
24638 rtx label_rtx = gen_label_rtx ();
24639 char *label_buf, temp_buf[256];
24640 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
24641 CODE_LABEL_NUMBER (label_rtx));
24642 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
24643 tree labelname = get_identifier (label_buf);
24644 add_compiler_branch_island (labelname, funname,
24645 insn_line ((const rtx_insn*)insn));
24646 }
24647 }
24648 }
24649 #endif
24650
24651 void
24652 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
24653 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
24654 {
24655 #if TARGET_MACHO
24656 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false);
24657 #else
24658 gcc_unreachable();
24659 #endif
24660 }
24661
24662
24663 void
24664 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
24665 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
24666 {
24667 #if TARGET_MACHO
24668 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true);
24669 #else
24670 gcc_unreachable();
24671 #endif
24672 }
24673
24674 /* Return whether we should generate PC-relative code for FNDECL. */
24675 bool
24676 rs6000_fndecl_pcrel_p (const_tree fndecl)
24677 {
24678 if (DEFAULT_ABI != ABI_ELFv2)
24679 return false;
24680
24681 struct cl_target_option *opts = target_opts_for_fn (fndecl);
24682
24683 return ((opts->x_rs6000_isa_flags & OPTION_MASK_PCREL) != 0
24684 && TARGET_CMODEL == CMODEL_MEDIUM);
24685 }
24686
24687 /* Return whether we should generate PC-relative code for *FN. */
24688 bool
24689 rs6000_pcrel_p (struct function *fn)
24690 {
24691 if (DEFAULT_ABI != ABI_ELFv2)
24692 return false;
24693
24694 /* Optimize usual case. */
24695 if (fn == cfun)
24696 return ((rs6000_isa_flags & OPTION_MASK_PCREL) != 0
24697 && TARGET_CMODEL == CMODEL_MEDIUM);
24698
24699 return rs6000_fndecl_pcrel_p (fn->decl);
24700 }
24701
24702 \f
24703 /* Given an address (ADDR), a mode (MODE), and what the format of the
24704 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
24705 for the address. */
24706
24707 enum insn_form
24708 address_to_insn_form (rtx addr,
24709 machine_mode mode,
24710 enum non_prefixed_form non_prefixed_format)
24711 {
24712 /* Single register is easy. */
24713 if (REG_P (addr) || SUBREG_P (addr))
24714 return INSN_FORM_BASE_REG;
24715
24716 /* If the non prefixed instruction format doesn't support offset addressing,
24717 make sure only indexed addressing is allowed.
24718
24719 We special case SDmode so that the register allocator does not try to move
24720 SDmode through GPR registers, but instead uses the 32-bit integer load and
24721 store instructions for the floating point registers. */
24722 if (non_prefixed_format == NON_PREFIXED_X || (mode == SDmode && TARGET_DFP))
24723 {
24724 if (GET_CODE (addr) != PLUS)
24725 return INSN_FORM_BAD;
24726
24727 rtx op0 = XEXP (addr, 0);
24728 rtx op1 = XEXP (addr, 1);
24729 if (!REG_P (op0) && !SUBREG_P (op0))
24730 return INSN_FORM_BAD;
24731
24732 if (!REG_P (op1) && !SUBREG_P (op1))
24733 return INSN_FORM_BAD;
24734
24735 return INSN_FORM_X;
24736 }
24737
24738 /* Deal with update forms. */
24739 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
24740 return INSN_FORM_UPDATE;
24741
24742 /* Handle PC-relative symbols and labels. Check for both local and external
24743 symbols. Assume labels are always local. */
24744 if (TARGET_PCREL)
24745 {
24746 if (SYMBOL_REF_P (addr) && !SYMBOL_REF_LOCAL_P (addr))
24747 return INSN_FORM_PCREL_EXTERNAL;
24748
24749 if (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
24750 return INSN_FORM_PCREL_LOCAL;
24751 }
24752
24753 if (GET_CODE (addr) == CONST)
24754 addr = XEXP (addr, 0);
24755
24756 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
24757 if (GET_CODE (addr) == LO_SUM)
24758 return INSN_FORM_LO_SUM;
24759
24760 /* Everything below must be an offset address of some form. */
24761 if (GET_CODE (addr) != PLUS)
24762 return INSN_FORM_BAD;
24763
24764 rtx op0 = XEXP (addr, 0);
24765 rtx op1 = XEXP (addr, 1);
24766
24767 /* Check for indexed addresses. */
24768 if (REG_P (op1) || SUBREG_P (op1))
24769 {
24770 if (REG_P (op0) || SUBREG_P (op0))
24771 return INSN_FORM_X;
24772
24773 return INSN_FORM_BAD;
24774 }
24775
24776 if (!CONST_INT_P (op1))
24777 return INSN_FORM_BAD;
24778
24779 HOST_WIDE_INT offset = INTVAL (op1);
24780 if (!SIGNED_INTEGER_34BIT_P (offset))
24781 return INSN_FORM_BAD;
24782
24783 /* Check for local and external PC-relative addresses. Labels are always
24784 local. */
24785 if (TARGET_PCREL)
24786 {
24787 if (SYMBOL_REF_P (op0) && !SYMBOL_REF_LOCAL_P (op0))
24788 return INSN_FORM_PCREL_EXTERNAL;
24789
24790 if (SYMBOL_REF_P (op0) || LABEL_REF_P (op0))
24791 return INSN_FORM_PCREL_LOCAL;
24792 }
24793
24794 /* If it isn't PC-relative, the address must use a base register. */
24795 if (!REG_P (op0) && !SUBREG_P (op0))
24796 return INSN_FORM_BAD;
24797
24798 /* Large offsets must be prefixed. */
24799 if (!SIGNED_INTEGER_16BIT_P (offset))
24800 {
24801 if (TARGET_PREFIXED_ADDR)
24802 return INSN_FORM_PREFIXED_NUMERIC;
24803
24804 return INSN_FORM_BAD;
24805 }
24806
24807 /* We have a 16-bit offset, see what default instruction format to use. */
24808 if (non_prefixed_format == NON_PREFIXED_DEFAULT)
24809 {
24810 unsigned size = GET_MODE_SIZE (mode);
24811
24812 /* On 64-bit systems, assume 64-bit integers need to use DS form
24813 addresses (for LD/STD). VSX vectors need to use DQ form addresses
24814 (for LXV and STXV). TImode is problematical in that its normal usage
24815 is expected to be GPRs where it wants a DS instruction format, but if
24816 it goes into the vector registers, it wants a DQ instruction
24817 format. */
24818 if (TARGET_POWERPC64 && size >= 8 && GET_MODE_CLASS (mode) == MODE_INT)
24819 non_prefixed_format = NON_PREFIXED_DS;
24820
24821 else if (TARGET_VSX && size >= 16
24822 && (VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode)))
24823 non_prefixed_format = NON_PREFIXED_DQ;
24824
24825 else
24826 non_prefixed_format = NON_PREFIXED_D;
24827 }
24828
24829 /* Classify the D/DS/DQ-form addresses. */
24830 switch (non_prefixed_format)
24831 {
24832 /* Instruction format D, all 16 bits are valid. */
24833 case NON_PREFIXED_D:
24834 return INSN_FORM_D;
24835
24836 /* Instruction format DS, bottom 2 bits must be 0. */
24837 case NON_PREFIXED_DS:
24838 if ((offset & 3) == 0)
24839 return INSN_FORM_DS;
24840
24841 else if (TARGET_PREFIXED_ADDR)
24842 return INSN_FORM_PREFIXED_NUMERIC;
24843
24844 else
24845 return INSN_FORM_BAD;
24846
24847 /* Instruction format DQ, bottom 4 bits must be 0. */
24848 case NON_PREFIXED_DQ:
24849 if ((offset & 15) == 0)
24850 return INSN_FORM_DQ;
24851
24852 else if (TARGET_PREFIXED_ADDR)
24853 return INSN_FORM_PREFIXED_NUMERIC;
24854
24855 else
24856 return INSN_FORM_BAD;
24857
24858 default:
24859 break;
24860 }
24861
24862 return INSN_FORM_BAD;
24863 }
24864
24865 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
24866 instruction format (D/DS/DQ) used for offset memory. */
24867
24868 static enum non_prefixed_form
24869 reg_to_non_prefixed (rtx reg, machine_mode mode)
24870 {
24871 /* If it isn't a register, use the defaults. */
24872 if (!REG_P (reg) && !SUBREG_P (reg))
24873 return NON_PREFIXED_DEFAULT;
24874
24875 unsigned int r = reg_or_subregno (reg);
24876
24877 /* If we have a pseudo, use the default instruction format. */
24878 if (!HARD_REGISTER_NUM_P (r))
24879 return NON_PREFIXED_DEFAULT;
24880
24881 unsigned size = GET_MODE_SIZE (mode);
24882
24883 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
24884 128-bit floating point, and 128-bit integers. */
24885 if (FP_REGNO_P (r))
24886 {
24887 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
24888 return NON_PREFIXED_D;
24889
24890 else if (size < 8)
24891 return NON_PREFIXED_X;
24892
24893 else if (TARGET_VSX && size >= 16
24894 && (VECTOR_MODE_P (mode)
24895 || FLOAT128_VECTOR_P (mode)
24896 || mode == TImode || mode == CTImode))
24897 return NON_PREFIXED_DQ;
24898
24899 else
24900 return NON_PREFIXED_DEFAULT;
24901 }
24902
24903 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
24904 128-bit floating point, and 128-bit integers. */
24905 else if (ALTIVEC_REGNO_P (r))
24906 {
24907 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
24908 return NON_PREFIXED_DS;
24909
24910 else if (size < 8)
24911 return NON_PREFIXED_X;
24912
24913 else if (TARGET_VSX && size >= 16
24914 && (VECTOR_MODE_P (mode)
24915 || FLOAT128_VECTOR_P (mode)
24916 || mode == TImode || mode == CTImode))
24917 return NON_PREFIXED_DQ;
24918
24919 else
24920 return NON_PREFIXED_DEFAULT;
24921 }
24922
24923 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
24924 otherwise. Assume that any other register, such as LR, CRs, etc. will go
24925 through the GPR registers for memory operations. */
24926 else if (TARGET_POWERPC64 && size >= 8)
24927 return NON_PREFIXED_DS;
24928
24929 return NON_PREFIXED_D;
24930 }
24931
24932 \f
24933 /* Whether a load instruction is a prefixed instruction. This is called from
24934 the prefixed attribute processing. */
24935
24936 bool
24937 prefixed_load_p (rtx_insn *insn)
24938 {
24939 /* Validate the insn to make sure it is a normal load insn. */
24940 extract_insn_cached (insn);
24941 if (recog_data.n_operands < 2)
24942 return false;
24943
24944 rtx reg = recog_data.operand[0];
24945 rtx mem = recog_data.operand[1];
24946
24947 if (!REG_P (reg) && !SUBREG_P (reg))
24948 return false;
24949
24950 if (!MEM_P (mem))
24951 return false;
24952
24953 /* Prefixed load instructions do not support update or indexed forms. */
24954 if (get_attr_indexed (insn) == INDEXED_YES
24955 || get_attr_update (insn) == UPDATE_YES)
24956 return false;
24957
24958 /* LWA uses the DS format instead of the D format that LWZ uses. */
24959 enum non_prefixed_form non_prefixed;
24960 machine_mode reg_mode = GET_MODE (reg);
24961 machine_mode mem_mode = GET_MODE (mem);
24962
24963 if (mem_mode == SImode && reg_mode == DImode
24964 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
24965 non_prefixed = NON_PREFIXED_DS;
24966
24967 else
24968 non_prefixed = reg_to_non_prefixed (reg, mem_mode);
24969
24970 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
24971 }
24972
24973 /* Whether a store instruction is a prefixed instruction. This is called from
24974 the prefixed attribute processing. */
24975
24976 bool
24977 prefixed_store_p (rtx_insn *insn)
24978 {
24979 /* Validate the insn to make sure it is a normal store insn. */
24980 extract_insn_cached (insn);
24981 if (recog_data.n_operands < 2)
24982 return false;
24983
24984 rtx mem = recog_data.operand[0];
24985 rtx reg = recog_data.operand[1];
24986
24987 if (!REG_P (reg) && !SUBREG_P (reg))
24988 return false;
24989
24990 if (!MEM_P (mem))
24991 return false;
24992
24993 /* Prefixed store instructions do not support update or indexed forms. */
24994 if (get_attr_indexed (insn) == INDEXED_YES
24995 || get_attr_update (insn) == UPDATE_YES)
24996 return false;
24997
24998 machine_mode mem_mode = GET_MODE (mem);
24999 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode);
25000 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
25001 }
25002
25003 /* Whether a load immediate or add instruction is a prefixed instruction. This
25004 is called from the prefixed attribute processing. */
25005
25006 bool
25007 prefixed_paddi_p (rtx_insn *insn)
25008 {
25009 rtx set = single_set (insn);
25010 if (!set)
25011 return false;
25012
25013 rtx dest = SET_DEST (set);
25014 rtx src = SET_SRC (set);
25015
25016 if (!REG_P (dest) && !SUBREG_P (dest))
25017 return false;
25018
25019 /* Is this a load immediate that can't be done with a simple ADDI or
25020 ADDIS? */
25021 if (CONST_INT_P (src))
25022 return (satisfies_constraint_eI (src)
25023 && !satisfies_constraint_I (src)
25024 && !satisfies_constraint_L (src));
25025
25026 /* Is this a PADDI instruction that can't be done with a simple ADDI or
25027 ADDIS? */
25028 if (GET_CODE (src) == PLUS)
25029 {
25030 rtx op1 = XEXP (src, 1);
25031
25032 return (CONST_INT_P (op1)
25033 && satisfies_constraint_eI (op1)
25034 && !satisfies_constraint_I (op1)
25035 && !satisfies_constraint_L (op1));
25036 }
25037
25038 /* If not, is it a load of a PC-relative address? */
25039 if (!TARGET_PCREL || GET_MODE (dest) != Pmode)
25040 return false;
25041
25042 if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST)
25043 return false;
25044
25045 enum insn_form iform = address_to_insn_form (src, Pmode,
25046 NON_PREFIXED_DEFAULT);
25047
25048 return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL);
25049 }
25050
25051 /* Whether the next instruction needs a 'p' prefix issued before the
25052 instruction is printed out. */
25053 static bool next_insn_prefixed_p;
25054
25055 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
25056 outputting the assembler code. On the PowerPC, we remember if the current
25057 insn is a prefixed insn where we need to emit a 'p' before the insn.
25058
25059 In addition, if the insn is part of a PC-relative reference to an external
25060 label optimization, this is recorded also. */
25061 void
25062 rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
25063 {
25064 next_insn_prefixed_p = (get_attr_prefixed (insn) != PREFIXED_NO);
25065 return;
25066 }
25067
25068 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
25069 We use it to emit a 'p' for prefixed insns that is set in
25070 FINAL_PRESCAN_INSN. */
25071 void
25072 rs6000_asm_output_opcode (FILE *stream)
25073 {
25074 if (next_insn_prefixed_p)
25075 fprintf (stream, "p");
25076
25077 return;
25078 }
25079
25080 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
25081 should be adjusted to reflect any required changes. This macro is used when
25082 there is some systematic length adjustment required that would be difficult
25083 to express in the length attribute.
25084
25085 In the PowerPC, we use this to adjust the length of an instruction if one or
25086 more prefixed instructions are generated, using the attribute
25087 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
25088 hardware requires that a prefied instruciton does not cross a 64-byte
25089 boundary. This means the compiler has to assume the length of the first
25090 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
25091 already set for the non-prefixed instruction, we just need to udpate for the
25092 difference. */
25093
25094 int
25095 rs6000_adjust_insn_length (rtx_insn *insn, int length)
25096 {
25097 if (TARGET_PREFIXED_ADDR && NONJUMP_INSN_P (insn))
25098 {
25099 rtx pattern = PATTERN (insn);
25100 if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER
25101 && get_attr_prefixed (insn) == PREFIXED_YES)
25102 {
25103 int num_prefixed = get_attr_max_prefixed_insns (insn);
25104 length += 4 * (num_prefixed + 1);
25105 }
25106 }
25107
25108 return length;
25109 }
25110
25111 \f
25112 #ifdef HAVE_GAS_HIDDEN
25113 # define USE_HIDDEN_LINKONCE 1
25114 #else
25115 # define USE_HIDDEN_LINKONCE 0
25116 #endif
25117
25118 /* Fills in the label name that should be used for a 476 link stack thunk. */
25119
25120 void
25121 get_ppc476_thunk_name (char name[32])
25122 {
25123 gcc_assert (TARGET_LINK_STACK);
25124
25125 if (USE_HIDDEN_LINKONCE)
25126 sprintf (name, "__ppc476.get_thunk");
25127 else
25128 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
25129 }
25130
25131 /* This function emits the simple thunk routine that is used to preserve
25132 the link stack on the 476 cpu. */
25133
25134 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
25135 static void
25136 rs6000_code_end (void)
25137 {
25138 char name[32];
25139 tree decl;
25140
25141 if (!TARGET_LINK_STACK)
25142 return;
25143
25144 get_ppc476_thunk_name (name);
25145
25146 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
25147 build_function_type_list (void_type_node, NULL_TREE));
25148 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
25149 NULL_TREE, void_type_node);
25150 TREE_PUBLIC (decl) = 1;
25151 TREE_STATIC (decl) = 1;
25152
25153 #if RS6000_WEAK
25154 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
25155 {
25156 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
25157 targetm.asm_out.unique_section (decl, 0);
25158 switch_to_section (get_named_section (decl, NULL, 0));
25159 DECL_WEAK (decl) = 1;
25160 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
25161 targetm.asm_out.globalize_label (asm_out_file, name);
25162 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
25163 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
25164 }
25165 else
25166 #endif
25167 {
25168 switch_to_section (text_section);
25169 ASM_OUTPUT_LABEL (asm_out_file, name);
25170 }
25171
25172 DECL_INITIAL (decl) = make_node (BLOCK);
25173 current_function_decl = decl;
25174 allocate_struct_function (decl, false);
25175 init_function_start (decl);
25176 first_function_block_is_cold = false;
25177 /* Make sure unwind info is emitted for the thunk if needed. */
25178 final_start_function (emit_barrier (), asm_out_file, 1);
25179
25180 fputs ("\tblr\n", asm_out_file);
25181
25182 final_end_function ();
25183 init_insn_lengths ();
25184 free_after_compilation (cfun);
25185 set_cfun (NULL);
25186 current_function_decl = NULL;
25187 }
25188
25189 /* Add r30 to hard reg set if the prologue sets it up and it is not
25190 pic_offset_table_rtx. */
25191
25192 static void
25193 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
25194 {
25195 if (!TARGET_SINGLE_PIC_BASE
25196 && TARGET_TOC
25197 && TARGET_MINIMAL_TOC
25198 && !constant_pool_empty_p ())
25199 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
25200 if (cfun->machine->split_stack_argp_used)
25201 add_to_hard_reg_set (&set->set, Pmode, 12);
25202
25203 /* Make sure the hard reg set doesn't include r2, which was possibly added
25204 via PIC_OFFSET_TABLE_REGNUM. */
25205 if (TARGET_TOC)
25206 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
25207 }
25208
25209 \f
25210 /* Helper function for rs6000_split_logical to emit a logical instruction after
25211 spliting the operation to single GPR registers.
25212
25213 DEST is the destination register.
25214 OP1 and OP2 are the input source registers.
25215 CODE is the base operation (AND, IOR, XOR, NOT).
25216 MODE is the machine mode.
25217 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25218 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25219 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
25220
25221 static void
25222 rs6000_split_logical_inner (rtx dest,
25223 rtx op1,
25224 rtx op2,
25225 enum rtx_code code,
25226 machine_mode mode,
25227 bool complement_final_p,
25228 bool complement_op1_p,
25229 bool complement_op2_p)
25230 {
25231 rtx bool_rtx;
25232
25233 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
25234 if (op2 && CONST_INT_P (op2)
25235 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
25236 && !complement_final_p && !complement_op1_p && !complement_op2_p)
25237 {
25238 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
25239 HOST_WIDE_INT value = INTVAL (op2) & mask;
25240
25241 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
25242 if (code == AND)
25243 {
25244 if (value == 0)
25245 {
25246 emit_insn (gen_rtx_SET (dest, const0_rtx));
25247 return;
25248 }
25249
25250 else if (value == mask)
25251 {
25252 if (!rtx_equal_p (dest, op1))
25253 emit_insn (gen_rtx_SET (dest, op1));
25254 return;
25255 }
25256 }
25257
25258 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
25259 into separate ORI/ORIS or XORI/XORIS instrucitons. */
25260 else if (code == IOR || code == XOR)
25261 {
25262 if (value == 0)
25263 {
25264 if (!rtx_equal_p (dest, op1))
25265 emit_insn (gen_rtx_SET (dest, op1));
25266 return;
25267 }
25268 }
25269 }
25270
25271 if (code == AND && mode == SImode
25272 && !complement_final_p && !complement_op1_p && !complement_op2_p)
25273 {
25274 emit_insn (gen_andsi3 (dest, op1, op2));
25275 return;
25276 }
25277
25278 if (complement_op1_p)
25279 op1 = gen_rtx_NOT (mode, op1);
25280
25281 if (complement_op2_p)
25282 op2 = gen_rtx_NOT (mode, op2);
25283
25284 /* For canonical RTL, if only one arm is inverted it is the first. */
25285 if (!complement_op1_p && complement_op2_p)
25286 std::swap (op1, op2);
25287
25288 bool_rtx = ((code == NOT)
25289 ? gen_rtx_NOT (mode, op1)
25290 : gen_rtx_fmt_ee (code, mode, op1, op2));
25291
25292 if (complement_final_p)
25293 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
25294
25295 emit_insn (gen_rtx_SET (dest, bool_rtx));
25296 }
25297
25298 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
25299 operations are split immediately during RTL generation to allow for more
25300 optimizations of the AND/IOR/XOR.
25301
25302 OPERANDS is an array containing the destination and two input operands.
25303 CODE is the base operation (AND, IOR, XOR, NOT).
25304 MODE is the machine mode.
25305 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25306 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25307 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
25308 CLOBBER_REG is either NULL or a scratch register of type CC to allow
25309 formation of the AND instructions. */
25310
25311 static void
25312 rs6000_split_logical_di (rtx operands[3],
25313 enum rtx_code code,
25314 bool complement_final_p,
25315 bool complement_op1_p,
25316 bool complement_op2_p)
25317 {
25318 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
25319 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
25320 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
25321 enum hi_lo { hi = 0, lo = 1 };
25322 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
25323 size_t i;
25324
25325 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
25326 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
25327 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
25328 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
25329
25330 if (code == NOT)
25331 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
25332 else
25333 {
25334 if (!CONST_INT_P (operands[2]))
25335 {
25336 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
25337 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
25338 }
25339 else
25340 {
25341 HOST_WIDE_INT value = INTVAL (operands[2]);
25342 HOST_WIDE_INT value_hi_lo[2];
25343
25344 gcc_assert (!complement_final_p);
25345 gcc_assert (!complement_op1_p);
25346 gcc_assert (!complement_op2_p);
25347
25348 value_hi_lo[hi] = value >> 32;
25349 value_hi_lo[lo] = value & lower_32bits;
25350
25351 for (i = 0; i < 2; i++)
25352 {
25353 HOST_WIDE_INT sub_value = value_hi_lo[i];
25354
25355 if (sub_value & sign_bit)
25356 sub_value |= upper_32bits;
25357
25358 op2_hi_lo[i] = GEN_INT (sub_value);
25359
25360 /* If this is an AND instruction, check to see if we need to load
25361 the value in a register. */
25362 if (code == AND && sub_value != -1 && sub_value != 0
25363 && !and_operand (op2_hi_lo[i], SImode))
25364 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
25365 }
25366 }
25367 }
25368
25369 for (i = 0; i < 2; i++)
25370 {
25371 /* Split large IOR/XOR operations. */
25372 if ((code == IOR || code == XOR)
25373 && CONST_INT_P (op2_hi_lo[i])
25374 && !complement_final_p
25375 && !complement_op1_p
25376 && !complement_op2_p
25377 && !logical_const_operand (op2_hi_lo[i], SImode))
25378 {
25379 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
25380 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
25381 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
25382 rtx tmp = gen_reg_rtx (SImode);
25383
25384 /* Make sure the constant is sign extended. */
25385 if ((hi_16bits & sign_bit) != 0)
25386 hi_16bits |= upper_32bits;
25387
25388 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
25389 code, SImode, false, false, false);
25390
25391 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
25392 code, SImode, false, false, false);
25393 }
25394 else
25395 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
25396 code, SImode, complement_final_p,
25397 complement_op1_p, complement_op2_p);
25398 }
25399
25400 return;
25401 }
25402
25403 /* Split the insns that make up boolean operations operating on multiple GPR
25404 registers. The boolean MD patterns ensure that the inputs either are
25405 exactly the same as the output registers, or there is no overlap.
25406
25407 OPERANDS is an array containing the destination and two input operands.
25408 CODE is the base operation (AND, IOR, XOR, NOT).
25409 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25410 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25411 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
25412
25413 void
25414 rs6000_split_logical (rtx operands[3],
25415 enum rtx_code code,
25416 bool complement_final_p,
25417 bool complement_op1_p,
25418 bool complement_op2_p)
25419 {
25420 machine_mode mode = GET_MODE (operands[0]);
25421 machine_mode sub_mode;
25422 rtx op0, op1, op2;
25423 int sub_size, regno0, regno1, nregs, i;
25424
25425 /* If this is DImode, use the specialized version that can run before
25426 register allocation. */
25427 if (mode == DImode && !TARGET_POWERPC64)
25428 {
25429 rs6000_split_logical_di (operands, code, complement_final_p,
25430 complement_op1_p, complement_op2_p);
25431 return;
25432 }
25433
25434 op0 = operands[0];
25435 op1 = operands[1];
25436 op2 = (code == NOT) ? NULL_RTX : operands[2];
25437 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
25438 sub_size = GET_MODE_SIZE (sub_mode);
25439 regno0 = REGNO (op0);
25440 regno1 = REGNO (op1);
25441
25442 gcc_assert (reload_completed);
25443 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
25444 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
25445
25446 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
25447 gcc_assert (nregs > 1);
25448
25449 if (op2 && REG_P (op2))
25450 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
25451
25452 for (i = 0; i < nregs; i++)
25453 {
25454 int offset = i * sub_size;
25455 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
25456 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
25457 rtx sub_op2 = ((code == NOT)
25458 ? NULL_RTX
25459 : simplify_subreg (sub_mode, op2, mode, offset));
25460
25461 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
25462 complement_final_p, complement_op1_p,
25463 complement_op2_p);
25464 }
25465
25466 return;
25467 }
25468
25469 \f
25470 /* Return true if the peephole2 can combine a load involving a combination of
25471 an addis instruction and a load with an offset that can be fused together on
25472 a power8. */
25473
25474 bool
25475 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
25476 rtx addis_value, /* addis value. */
25477 rtx target, /* target register that is loaded. */
25478 rtx mem) /* bottom part of the memory addr. */
25479 {
25480 rtx addr;
25481 rtx base_reg;
25482
25483 /* Validate arguments. */
25484 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
25485 return false;
25486
25487 if (!base_reg_operand (target, GET_MODE (target)))
25488 return false;
25489
25490 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
25491 return false;
25492
25493 /* Allow sign/zero extension. */
25494 if (GET_CODE (mem) == ZERO_EXTEND
25495 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
25496 mem = XEXP (mem, 0);
25497
25498 if (!MEM_P (mem))
25499 return false;
25500
25501 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
25502 return false;
25503
25504 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
25505 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
25506 return false;
25507
25508 /* Validate that the register used to load the high value is either the
25509 register being loaded, or we can safely replace its use.
25510
25511 This function is only called from the peephole2 pass and we assume that
25512 there are 2 instructions in the peephole (addis and load), so we want to
25513 check if the target register was not used in the memory address and the
25514 register to hold the addis result is dead after the peephole. */
25515 if (REGNO (addis_reg) != REGNO (target))
25516 {
25517 if (reg_mentioned_p (target, mem))
25518 return false;
25519
25520 if (!peep2_reg_dead_p (2, addis_reg))
25521 return false;
25522
25523 /* If the target register being loaded is the stack pointer, we must
25524 avoid loading any other value into it, even temporarily. */
25525 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
25526 return false;
25527 }
25528
25529 base_reg = XEXP (addr, 0);
25530 return REGNO (addis_reg) == REGNO (base_reg);
25531 }
25532
25533 /* During the peephole2 pass, adjust and expand the insns for a load fusion
25534 sequence. We adjust the addis register to use the target register. If the
25535 load sign extends, we adjust the code to do the zero extending load, and an
25536 explicit sign extension later since the fusion only covers zero extending
25537 loads.
25538
25539 The operands are:
25540 operands[0] register set with addis (to be replaced with target)
25541 operands[1] value set via addis
25542 operands[2] target register being loaded
25543 operands[3] D-form memory reference using operands[0]. */
25544
25545 void
25546 expand_fusion_gpr_load (rtx *operands)
25547 {
25548 rtx addis_value = operands[1];
25549 rtx target = operands[2];
25550 rtx orig_mem = operands[3];
25551 rtx new_addr, new_mem, orig_addr, offset;
25552 enum rtx_code plus_or_lo_sum;
25553 machine_mode target_mode = GET_MODE (target);
25554 machine_mode extend_mode = target_mode;
25555 machine_mode ptr_mode = Pmode;
25556 enum rtx_code extend = UNKNOWN;
25557
25558 if (GET_CODE (orig_mem) == ZERO_EXTEND
25559 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
25560 {
25561 extend = GET_CODE (orig_mem);
25562 orig_mem = XEXP (orig_mem, 0);
25563 target_mode = GET_MODE (orig_mem);
25564 }
25565
25566 gcc_assert (MEM_P (orig_mem));
25567
25568 orig_addr = XEXP (orig_mem, 0);
25569 plus_or_lo_sum = GET_CODE (orig_addr);
25570 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
25571
25572 offset = XEXP (orig_addr, 1);
25573 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
25574 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
25575
25576 if (extend != UNKNOWN)
25577 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
25578
25579 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
25580 UNSPEC_FUSION_GPR);
25581 emit_insn (gen_rtx_SET (target, new_mem));
25582
25583 if (extend == SIGN_EXTEND)
25584 {
25585 int sub_off = ((BYTES_BIG_ENDIAN)
25586 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
25587 : 0);
25588 rtx sign_reg
25589 = simplify_subreg (target_mode, target, extend_mode, sub_off);
25590
25591 emit_insn (gen_rtx_SET (target,
25592 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
25593 }
25594
25595 return;
25596 }
25597
25598 /* Emit the addis instruction that will be part of a fused instruction
25599 sequence. */
25600
25601 void
25602 emit_fusion_addis (rtx target, rtx addis_value)
25603 {
25604 rtx fuse_ops[10];
25605 const char *addis_str = NULL;
25606
25607 /* Emit the addis instruction. */
25608 fuse_ops[0] = target;
25609 if (satisfies_constraint_L (addis_value))
25610 {
25611 fuse_ops[1] = addis_value;
25612 addis_str = "lis %0,%v1";
25613 }
25614
25615 else if (GET_CODE (addis_value) == PLUS)
25616 {
25617 rtx op0 = XEXP (addis_value, 0);
25618 rtx op1 = XEXP (addis_value, 1);
25619
25620 if (REG_P (op0) && CONST_INT_P (op1)
25621 && satisfies_constraint_L (op1))
25622 {
25623 fuse_ops[1] = op0;
25624 fuse_ops[2] = op1;
25625 addis_str = "addis %0,%1,%v2";
25626 }
25627 }
25628
25629 else if (GET_CODE (addis_value) == HIGH)
25630 {
25631 rtx value = XEXP (addis_value, 0);
25632 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
25633 {
25634 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
25635 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
25636 if (TARGET_ELF)
25637 addis_str = "addis %0,%2,%1@toc@ha";
25638
25639 else if (TARGET_XCOFF)
25640 addis_str = "addis %0,%1@u(%2)";
25641
25642 else
25643 gcc_unreachable ();
25644 }
25645
25646 else if (GET_CODE (value) == PLUS)
25647 {
25648 rtx op0 = XEXP (value, 0);
25649 rtx op1 = XEXP (value, 1);
25650
25651 if (GET_CODE (op0) == UNSPEC
25652 && XINT (op0, 1) == UNSPEC_TOCREL
25653 && CONST_INT_P (op1))
25654 {
25655 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
25656 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
25657 fuse_ops[3] = op1;
25658 if (TARGET_ELF)
25659 addis_str = "addis %0,%2,%1+%3@toc@ha";
25660
25661 else if (TARGET_XCOFF)
25662 addis_str = "addis %0,%1+%3@u(%2)";
25663
25664 else
25665 gcc_unreachable ();
25666 }
25667 }
25668
25669 else if (satisfies_constraint_L (value))
25670 {
25671 fuse_ops[1] = value;
25672 addis_str = "lis %0,%v1";
25673 }
25674
25675 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
25676 {
25677 fuse_ops[1] = value;
25678 addis_str = "lis %0,%1@ha";
25679 }
25680 }
25681
25682 if (!addis_str)
25683 fatal_insn ("Could not generate addis value for fusion", addis_value);
25684
25685 output_asm_insn (addis_str, fuse_ops);
25686 }
25687
25688 /* Emit a D-form load or store instruction that is the second instruction
25689 of a fusion sequence. */
25690
25691 static void
25692 emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str)
25693 {
25694 rtx fuse_ops[10];
25695 char insn_template[80];
25696
25697 fuse_ops[0] = load_reg;
25698 fuse_ops[1] = addis_reg;
25699
25700 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
25701 {
25702 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
25703 fuse_ops[2] = offset;
25704 output_asm_insn (insn_template, fuse_ops);
25705 }
25706
25707 else if (GET_CODE (offset) == UNSPEC
25708 && XINT (offset, 1) == UNSPEC_TOCREL)
25709 {
25710 if (TARGET_ELF)
25711 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
25712
25713 else if (TARGET_XCOFF)
25714 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
25715
25716 else
25717 gcc_unreachable ();
25718
25719 fuse_ops[2] = XVECEXP (offset, 0, 0);
25720 output_asm_insn (insn_template, fuse_ops);
25721 }
25722
25723 else if (GET_CODE (offset) == PLUS
25724 && GET_CODE (XEXP (offset, 0)) == UNSPEC
25725 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
25726 && CONST_INT_P (XEXP (offset, 1)))
25727 {
25728 rtx tocrel_unspec = XEXP (offset, 0);
25729 if (TARGET_ELF)
25730 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
25731
25732 else if (TARGET_XCOFF)
25733 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
25734
25735 else
25736 gcc_unreachable ();
25737
25738 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
25739 fuse_ops[3] = XEXP (offset, 1);
25740 output_asm_insn (insn_template, fuse_ops);
25741 }
25742
25743 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
25744 {
25745 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
25746
25747 fuse_ops[2] = offset;
25748 output_asm_insn (insn_template, fuse_ops);
25749 }
25750
25751 else
25752 fatal_insn ("Unable to generate load/store offset for fusion", offset);
25753
25754 return;
25755 }
25756
25757 /* Given an address, convert it into the addis and load offset parts. Addresses
25758 created during the peephole2 process look like:
25759 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
25760 (unspec [(...)] UNSPEC_TOCREL)) */
25761
25762 static void
25763 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
25764 {
25765 rtx hi, lo;
25766
25767 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
25768 {
25769 hi = XEXP (addr, 0);
25770 lo = XEXP (addr, 1);
25771 }
25772 else
25773 gcc_unreachable ();
25774
25775 *p_hi = hi;
25776 *p_lo = lo;
25777 }
25778
25779 /* Return a string to fuse an addis instruction with a gpr load to the same
25780 register that we loaded up the addis instruction. The address that is used
25781 is the logical address that was formed during peephole2:
25782 (lo_sum (high) (low-part))
25783
25784 The code is complicated, so we call output_asm_insn directly, and just
25785 return "". */
25786
25787 const char *
25788 emit_fusion_gpr_load (rtx target, rtx mem)
25789 {
25790 rtx addis_value;
25791 rtx addr;
25792 rtx load_offset;
25793 const char *load_str = NULL;
25794 machine_mode mode;
25795
25796 if (GET_CODE (mem) == ZERO_EXTEND)
25797 mem = XEXP (mem, 0);
25798
25799 gcc_assert (REG_P (target) && MEM_P (mem));
25800
25801 addr = XEXP (mem, 0);
25802 fusion_split_address (addr, &addis_value, &load_offset);
25803
25804 /* Now emit the load instruction to the same register. */
25805 mode = GET_MODE (mem);
25806 switch (mode)
25807 {
25808 case E_QImode:
25809 load_str = "lbz";
25810 break;
25811
25812 case E_HImode:
25813 load_str = "lhz";
25814 break;
25815
25816 case E_SImode:
25817 case E_SFmode:
25818 load_str = "lwz";
25819 break;
25820
25821 case E_DImode:
25822 case E_DFmode:
25823 gcc_assert (TARGET_POWERPC64);
25824 load_str = "ld";
25825 break;
25826
25827 default:
25828 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
25829 }
25830
25831 /* Emit the addis instruction. */
25832 emit_fusion_addis (target, addis_value);
25833
25834 /* Emit the D-form load instruction. */
25835 emit_fusion_load (target, target, load_offset, load_str);
25836
25837 return "";
25838 }
25839 \f
25840
25841 #ifdef RS6000_GLIBC_ATOMIC_FENV
25842 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
25843 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
25844 #endif
25845
25846 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
25847
25848 static void
25849 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
25850 {
25851 if (!TARGET_HARD_FLOAT)
25852 {
25853 #ifdef RS6000_GLIBC_ATOMIC_FENV
25854 if (atomic_hold_decl == NULL_TREE)
25855 {
25856 atomic_hold_decl
25857 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
25858 get_identifier ("__atomic_feholdexcept"),
25859 build_function_type_list (void_type_node,
25860 double_ptr_type_node,
25861 NULL_TREE));
25862 TREE_PUBLIC (atomic_hold_decl) = 1;
25863 DECL_EXTERNAL (atomic_hold_decl) = 1;
25864 }
25865
25866 if (atomic_clear_decl == NULL_TREE)
25867 {
25868 atomic_clear_decl
25869 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
25870 get_identifier ("__atomic_feclearexcept"),
25871 build_function_type_list (void_type_node,
25872 NULL_TREE));
25873 TREE_PUBLIC (atomic_clear_decl) = 1;
25874 DECL_EXTERNAL (atomic_clear_decl) = 1;
25875 }
25876
25877 tree const_double = build_qualified_type (double_type_node,
25878 TYPE_QUAL_CONST);
25879 tree const_double_ptr = build_pointer_type (const_double);
25880 if (atomic_update_decl == NULL_TREE)
25881 {
25882 atomic_update_decl
25883 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
25884 get_identifier ("__atomic_feupdateenv"),
25885 build_function_type_list (void_type_node,
25886 const_double_ptr,
25887 NULL_TREE));
25888 TREE_PUBLIC (atomic_update_decl) = 1;
25889 DECL_EXTERNAL (atomic_update_decl) = 1;
25890 }
25891
25892 tree fenv_var = create_tmp_var_raw (double_type_node);
25893 TREE_ADDRESSABLE (fenv_var) = 1;
25894 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
25895
25896 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
25897 *clear = build_call_expr (atomic_clear_decl, 0);
25898 *update = build_call_expr (atomic_update_decl, 1,
25899 fold_convert (const_double_ptr, fenv_addr));
25900 #endif
25901 return;
25902 }
25903
25904 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
25905 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
25906 tree call_mffs = build_call_expr (mffs, 0);
25907
25908 /* Generates the equivalent of feholdexcept (&fenv_var)
25909
25910 *fenv_var = __builtin_mffs ();
25911 double fenv_hold;
25912 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
25913 __builtin_mtfsf (0xff, fenv_hold); */
25914
25915 /* Mask to clear everything except for the rounding modes and non-IEEE
25916 arithmetic flag. */
25917 const unsigned HOST_WIDE_INT hold_exception_mask =
25918 HOST_WIDE_INT_C (0xffffffff00000007);
25919
25920 tree fenv_var = create_tmp_var_raw (double_type_node);
25921
25922 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
25923
25924 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
25925 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
25926 build_int_cst (uint64_type_node,
25927 hold_exception_mask));
25928
25929 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
25930 fenv_llu_and);
25931
25932 tree hold_mtfsf = build_call_expr (mtfsf, 2,
25933 build_int_cst (unsigned_type_node, 0xff),
25934 fenv_hold_mtfsf);
25935
25936 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
25937
25938 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
25939
25940 double fenv_clear = __builtin_mffs ();
25941 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
25942 __builtin_mtfsf (0xff, fenv_clear); */
25943
25944 /* Mask to clear everything except for the rounding modes and non-IEEE
25945 arithmetic flag. */
25946 const unsigned HOST_WIDE_INT clear_exception_mask =
25947 HOST_WIDE_INT_C (0xffffffff00000000);
25948
25949 tree fenv_clear = create_tmp_var_raw (double_type_node);
25950
25951 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
25952
25953 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
25954 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
25955 fenv_clean_llu,
25956 build_int_cst (uint64_type_node,
25957 clear_exception_mask));
25958
25959 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
25960 fenv_clear_llu_and);
25961
25962 tree clear_mtfsf = build_call_expr (mtfsf, 2,
25963 build_int_cst (unsigned_type_node, 0xff),
25964 fenv_clear_mtfsf);
25965
25966 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
25967
25968 /* Generates the equivalent of feupdateenv (&fenv_var)
25969
25970 double old_fenv = __builtin_mffs ();
25971 double fenv_update;
25972 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
25973 (*(uint64_t*)fenv_var 0x1ff80fff);
25974 __builtin_mtfsf (0xff, fenv_update); */
25975
25976 const unsigned HOST_WIDE_INT update_exception_mask =
25977 HOST_WIDE_INT_C (0xffffffff1fffff00);
25978 const unsigned HOST_WIDE_INT new_exception_mask =
25979 HOST_WIDE_INT_C (0x1ff80fff);
25980
25981 tree old_fenv = create_tmp_var_raw (double_type_node);
25982 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
25983
25984 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
25985 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
25986 build_int_cst (uint64_type_node,
25987 update_exception_mask));
25988
25989 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
25990 build_int_cst (uint64_type_node,
25991 new_exception_mask));
25992
25993 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
25994 old_llu_and, new_llu_and);
25995
25996 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
25997 new_llu_mask);
25998
25999 tree update_mtfsf = build_call_expr (mtfsf, 2,
26000 build_int_cst (unsigned_type_node, 0xff),
26001 fenv_update_mtfsf);
26002
26003 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
26004 }
26005
26006 void
26007 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
26008 {
26009 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26010
26011 rtx_tmp0 = gen_reg_rtx (V2DFmode);
26012 rtx_tmp1 = gen_reg_rtx (V2DFmode);
26013
26014 /* The destination of the vmrgew instruction layout is:
26015 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
26016 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
26017 vmrgew instruction will be correct. */
26018 if (BYTES_BIG_ENDIAN)
26019 {
26020 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
26021 GEN_INT (0)));
26022 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
26023 GEN_INT (3)));
26024 }
26025 else
26026 {
26027 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
26028 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
26029 }
26030
26031 rtx_tmp2 = gen_reg_rtx (V4SFmode);
26032 rtx_tmp3 = gen_reg_rtx (V4SFmode);
26033
26034 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2, rtx_tmp0));
26035 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3, rtx_tmp1));
26036
26037 if (BYTES_BIG_ENDIAN)
26038 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
26039 else
26040 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
26041 }
26042
26043 void
26044 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
26045 {
26046 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26047
26048 rtx_tmp0 = gen_reg_rtx (V2DImode);
26049 rtx_tmp1 = gen_reg_rtx (V2DImode);
26050
26051 /* The destination of the vmrgew instruction layout is:
26052 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
26053 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
26054 vmrgew instruction will be correct. */
26055 if (BYTES_BIG_ENDIAN)
26056 {
26057 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
26058 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
26059 }
26060 else
26061 {
26062 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
26063 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
26064 }
26065
26066 rtx_tmp2 = gen_reg_rtx (V4SFmode);
26067 rtx_tmp3 = gen_reg_rtx (V4SFmode);
26068
26069 if (signed_convert)
26070 {
26071 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
26072 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
26073 }
26074 else
26075 {
26076 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
26077 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
26078 }
26079
26080 if (BYTES_BIG_ENDIAN)
26081 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
26082 else
26083 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
26084 }
26085
26086 void
26087 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
26088 rtx src2)
26089 {
26090 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26091
26092 rtx_tmp0 = gen_reg_rtx (V2DFmode);
26093 rtx_tmp1 = gen_reg_rtx (V2DFmode);
26094
26095 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
26096 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
26097
26098 rtx_tmp2 = gen_reg_rtx (V4SImode);
26099 rtx_tmp3 = gen_reg_rtx (V4SImode);
26100
26101 if (signed_convert)
26102 {
26103 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
26104 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
26105 }
26106 else
26107 {
26108 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
26109 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
26110 }
26111
26112 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
26113 }
26114
26115 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
26116
26117 static bool
26118 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
26119 optimization_type opt_type)
26120 {
26121 switch (op)
26122 {
26123 case rsqrt_optab:
26124 return (opt_type == OPTIMIZE_FOR_SPEED
26125 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
26126
26127 default:
26128 return true;
26129 }
26130 }
26131
26132 /* Implement TARGET_CONSTANT_ALIGNMENT. */
26133
26134 static HOST_WIDE_INT
26135 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
26136 {
26137 if (TREE_CODE (exp) == STRING_CST
26138 && (STRICT_ALIGNMENT || !optimize_size))
26139 return MAX (align, BITS_PER_WORD);
26140 return align;
26141 }
26142
26143 /* Implement TARGET_STARTING_FRAME_OFFSET. */
26144
26145 static HOST_WIDE_INT
26146 rs6000_starting_frame_offset (void)
26147 {
26148 if (FRAME_GROWS_DOWNWARD)
26149 return 0;
26150 return RS6000_STARTING_FRAME_OFFSET;
26151 }
26152 \f
26153
26154 /* Create an alias for a mangled name where we have changed the mangling (in
26155 GCC 8.1, we used U10__float128, and now we use u9__ieee128). This is called
26156 via the target hook TARGET_ASM_GLOBALIZE_DECL_NAME. */
26157
26158 #if TARGET_ELF && RS6000_WEAK
26159 static void
26160 rs6000_globalize_decl_name (FILE * stream, tree decl)
26161 {
26162 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
26163
26164 targetm.asm_out.globalize_label (stream, name);
26165
26166 if (rs6000_passes_ieee128 && name[0] == '_' && name[1] == 'Z')
26167 {
26168 tree save_asm_name = DECL_ASSEMBLER_NAME (decl);
26169 const char *old_name;
26170
26171 ieee128_mangling_gcc_8_1 = true;
26172 lang_hooks.set_decl_assembler_name (decl);
26173 old_name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
26174 SET_DECL_ASSEMBLER_NAME (decl, save_asm_name);
26175 ieee128_mangling_gcc_8_1 = false;
26176
26177 if (strcmp (name, old_name) != 0)
26178 {
26179 fprintf (stream, "\t.weak %s\n", old_name);
26180 fprintf (stream, "\t.set %s,%s\n", old_name, name);
26181 }
26182 }
26183 }
26184 #endif
26185
26186 \f
26187 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
26188 function names from <foo>l to <foo>f128 if the default long double type is
26189 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
26190 include file switches the names on systems that support long double as IEEE
26191 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
26192 In the future, glibc will export names like __ieee128_sinf128 and we can
26193 switch to using those instead of using sinf128, which pollutes the user's
26194 namespace.
26195
26196 This will switch the names for Fortran math functions as well (which doesn't
26197 use math.h). However, Fortran needs other changes to the compiler and
26198 library before you can switch the real*16 type at compile time.
26199
26200 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
26201 only do this if the default is that long double is IBM extended double, and
26202 the user asked for IEEE 128-bit. */
26203
26204 static tree
26205 rs6000_mangle_decl_assembler_name (tree decl, tree id)
26206 {
26207 if (!TARGET_IEEEQUAD_DEFAULT && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
26208 && TREE_CODE (decl) == FUNCTION_DECL && DECL_IS_BUILTIN (decl) )
26209 {
26210 size_t len = IDENTIFIER_LENGTH (id);
26211 const char *name = IDENTIFIER_POINTER (id);
26212
26213 if (name[len - 1] == 'l')
26214 {
26215 bool uses_ieee128_p = false;
26216 tree type = TREE_TYPE (decl);
26217 machine_mode ret_mode = TYPE_MODE (type);
26218
26219 /* See if the function returns a IEEE 128-bit floating point type or
26220 complex type. */
26221 if (ret_mode == TFmode || ret_mode == TCmode)
26222 uses_ieee128_p = true;
26223 else
26224 {
26225 function_args_iterator args_iter;
26226 tree arg;
26227
26228 /* See if the function passes a IEEE 128-bit floating point type
26229 or complex type. */
26230 FOREACH_FUNCTION_ARGS (type, arg, args_iter)
26231 {
26232 machine_mode arg_mode = TYPE_MODE (arg);
26233 if (arg_mode == TFmode || arg_mode == TCmode)
26234 {
26235 uses_ieee128_p = true;
26236 break;
26237 }
26238 }
26239 }
26240
26241 /* If we passed or returned an IEEE 128-bit floating point type,
26242 change the name. */
26243 if (uses_ieee128_p)
26244 {
26245 char *name2 = (char *) alloca (len + 4);
26246 memcpy (name2, name, len - 1);
26247 strcpy (name2 + len - 1, "f128");
26248 id = get_identifier (name2);
26249 }
26250 }
26251 }
26252
26253 return id;
26254 }
26255
26256 /* Predict whether the given loop in gimple will be transformed in the RTL
26257 doloop_optimize pass. */
26258
26259 static bool
26260 rs6000_predict_doloop_p (struct loop *loop)
26261 {
26262 gcc_assert (loop);
26263
26264 /* On rs6000, targetm.can_use_doloop_p is actually
26265 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
26266 if (loop->inner != NULL)
26267 {
26268 if (dump_file && (dump_flags & TDF_DETAILS))
26269 fprintf (dump_file, "Predict doloop failure due to"
26270 " loop nesting.\n");
26271 return false;
26272 }
26273
26274 return true;
26275 }
26276
26277 struct gcc_target targetm = TARGET_INITIALIZER;
26278
26279 #include "gt-rs6000.h"