]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/rs6000/rs6000.cc
rs6000: Load high and low part of 64bit constant independently
[thirdparty/gcc.git] / gcc / config / rs6000 / rs6000.cc
1 // SPDX-License-Identifier: GPL-3.0-or-later
2 /* Subroutines used for code generation on IBM RS/6000.
3 Copyright (C) 1991-2023 Free Software Foundation, Inc.
4 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #define IN_TARGET_CODE 1
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "cfgloop.h"
34 #include "df.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "expmed.h"
38 #include "optabs.h"
39 #include "regs.h"
40 #include "ira.h"
41 #include "recog.h"
42 #include "cgraph.h"
43 #include "diagnostic-core.h"
44 #include "insn-attr.h"
45 #include "flags.h"
46 #include "alias.h"
47 #include "fold-const.h"
48 #include "attribs.h"
49 #include "stor-layout.h"
50 #include "calls.h"
51 #include "print-tree.h"
52 #include "varasm.h"
53 #include "explow.h"
54 #include "expr.h"
55 #include "output.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "sched-int.h"
60 #include "gimplify.h"
61 #include "gimple-iterator.h"
62 #include "gimple-fold.h"
63 #include "gimple-walk.h"
64 #include "ssa.h"
65 #include "tree-vectorizer.h"
66 #include "tree-ssa-propagate.h"
67 #include "intl.h"
68 #include "tm-constrs.h"
69 #include "target-globals.h"
70 #include "builtins.h"
71 #include "tree-vector-builder.h"
72 #include "context.h"
73 #include "tree-pass.h"
74 #include "symbol-summary.h"
75 #include "ipa-prop.h"
76 #include "ipa-fnsummary.h"
77 #include "except.h"
78 #include "case-cfn-macros.h"
79 #include "ppc-auxv.h"
80 #include "rs6000-internal.h"
81 #include "opts.h"
82
83 /* This file should be included last. */
84 #include "target-def.h"
85
86 extern tree rs6000_builtin_mask_for_load (void);
87 extern tree rs6000_builtin_md_vectorized_function (tree, tree, tree);
88 extern tree rs6000_builtin_reciprocal (tree);
89
90 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
91 systems will also set long double to be IEEE 128-bit. AIX and Darwin
92 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
93 those systems will not pick up this default. This needs to be after all
94 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
95 properly defined. */
96 #ifndef TARGET_IEEEQUAD_DEFAULT
97 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
98 #define TARGET_IEEEQUAD_DEFAULT 1
99 #else
100 #define TARGET_IEEEQUAD_DEFAULT 0
101 #endif
102 #endif
103
104 /* Don't enable PC-relative addressing if the target does not support it. */
105 #ifndef PCREL_SUPPORTED_BY_OS
106 #define PCREL_SUPPORTED_BY_OS 0
107 #endif
108
109 #ifdef USING_ELFOS_H
110 /* Counter for labels which are to be placed in .fixup. */
111 int fixuplabelno = 0;
112 #endif
113
114 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
115 int dot_symbols;
116
117 /* Specify the machine mode that pointers have. After generation of rtl, the
118 compiler makes no further distinction between pointers and any other objects
119 of this machine mode. */
120 scalar_int_mode rs6000_pmode;
121
122 /* Track use of r13 in 64bit AIX TLS. */
123 static bool xcoff_tls_exec_model_detected = false;
124
125 /* Width in bits of a pointer. */
126 unsigned rs6000_pointer_size;
127
128 #ifdef HAVE_AS_GNU_ATTRIBUTE
129 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
130 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
131 # endif
132 /* Flag whether floating point values have been passed/returned.
133 Note that this doesn't say whether fprs are used, since the
134 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
135 should be set for soft-float values passed in gprs and ieee128
136 values passed in vsx registers. */
137 bool rs6000_passes_float = false;
138 bool rs6000_passes_long_double = false;
139 /* Flag whether vector values have been passed/returned. */
140 bool rs6000_passes_vector = false;
141 /* Flag whether small (<= 8 byte) structures have been returned. */
142 bool rs6000_returns_struct = false;
143 #endif
144
145 /* Value is TRUE if register/mode pair is acceptable. */
146 static bool rs6000_hard_regno_mode_ok_p
147 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
148
149 /* Maximum number of registers needed for a given register class and mode. */
150 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
151
152 /* How many registers are needed for a given register and mode. */
153 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
154
155 /* Map register number to register class. */
156 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
157
158 static int dbg_cost_ctrl;
159
160 /* Flag to say the TOC is initialized */
161 int toc_initialized, need_toc_init;
162 char toc_label_name[10];
163
164 /* Cached value of rs6000_variable_issue. This is cached in
165 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
166 static short cached_can_issue_more;
167
168 static GTY(()) section *read_only_data_section;
169 static GTY(()) section *private_data_section;
170 static GTY(()) section *tls_data_section;
171 static GTY(()) section *tls_private_data_section;
172 static GTY(()) section *read_only_private_data_section;
173 static GTY(()) section *sdata2_section;
174
175 section *toc_section = 0;
176
177 /* Describe the vector unit used for modes. */
178 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
179 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
180
181 /* Register classes for various constraints that are based on the target
182 switches. */
183 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
184
185 /* Describe the alignment of a vector. */
186 int rs6000_vector_align[NUM_MACHINE_MODES];
187
188 /* What modes to automatically generate reciprocal divide estimate (fre) and
189 reciprocal sqrt (frsqrte) for. */
190 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
191
192 /* Masks to determine which reciprocal esitmate instructions to generate
193 automatically. */
194 enum rs6000_recip_mask {
195 RECIP_SF_DIV = 0x001, /* Use divide estimate */
196 RECIP_DF_DIV = 0x002,
197 RECIP_V4SF_DIV = 0x004,
198 RECIP_V2DF_DIV = 0x008,
199
200 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
201 RECIP_DF_RSQRT = 0x020,
202 RECIP_V4SF_RSQRT = 0x040,
203 RECIP_V2DF_RSQRT = 0x080,
204
205 /* Various combination of flags for -mrecip=xxx. */
206 RECIP_NONE = 0,
207 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
208 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
209 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
210
211 RECIP_HIGH_PRECISION = RECIP_ALL,
212
213 /* On low precision machines like the power5, don't enable double precision
214 reciprocal square root estimate, since it isn't accurate enough. */
215 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
216 };
217
218 /* -mrecip options. */
219 static struct
220 {
221 const char *string; /* option name */
222 unsigned int mask; /* mask bits to set */
223 } recip_options[] = {
224 { "all", RECIP_ALL },
225 { "none", RECIP_NONE },
226 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
227 | RECIP_V2DF_DIV) },
228 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
229 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
230 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
231 | RECIP_V2DF_RSQRT) },
232 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
233 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
234 };
235
236 /* On PowerPC, we have a limited number of target clones that we care about
237 which means we can use an array to hold the options, rather than having more
238 elaborate data structures to identify each possible variation. Order the
239 clones from the default to the highest ISA. */
240 enum {
241 CLONE_DEFAULT = 0, /* default clone. */
242 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
243 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
244 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
245 CLONE_ISA_3_00, /* ISA 3.0 (power9). */
246 CLONE_ISA_3_1, /* ISA 3.1 (power10). */
247 CLONE_MAX
248 };
249
250 /* Map compiler ISA bits into HWCAP names. */
251 struct clone_map {
252 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
253 const char *name; /* name to use in __builtin_cpu_supports. */
254 };
255
256 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
257 { 0, "" }, /* Default options. */
258 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
259 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
260 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
261 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.0 (power9). */
262 { OPTION_MASK_POWER10, "arch_3_1" }, /* ISA 3.1 (power10). */
263 };
264
265
266 /* Newer LIBCs explicitly export this symbol to declare that they provide
267 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
268 reference to this symbol whenever we expand a CPU builtin, so that
269 we never link against an old LIBC. */
270 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
271
272 /* True if we have expanded a CPU builtin. */
273 bool cpu_builtin_p = false;
274
275 /* Pointer to function (in rs6000-c.cc) that can define or undefine target
276 macros that have changed. Languages that don't support the preprocessor
277 don't link in rs6000-c.cc, so we can't call it directly. */
278 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT);
279
280 /* Simplfy register classes into simpler classifications. We assume
281 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
282 check for standard register classes (gpr/floating/altivec/vsx) and
283 floating/vector classes (float/altivec/vsx). */
284
285 enum rs6000_reg_type {
286 NO_REG_TYPE,
287 PSEUDO_REG_TYPE,
288 GPR_REG_TYPE,
289 VSX_REG_TYPE,
290 ALTIVEC_REG_TYPE,
291 FPR_REG_TYPE,
292 SPR_REG_TYPE,
293 CR_REG_TYPE
294 };
295
296 /* Map register class to register type. */
297 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
298
299 /* First/last register type for the 'normal' register types (i.e. general
300 purpose, floating point, altivec, and VSX registers). */
301 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
302
303 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
304
305
306 /* Register classes we care about in secondary reload or go if legitimate
307 address. We only need to worry about GPR, FPR, and Altivec registers here,
308 along an ANY field that is the OR of the 3 register classes. */
309
310 enum rs6000_reload_reg_type {
311 RELOAD_REG_GPR, /* General purpose registers. */
312 RELOAD_REG_FPR, /* Traditional floating point regs. */
313 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
314 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
315 N_RELOAD_REG
316 };
317
318 /* For setting up register classes, loop through the 3 register classes mapping
319 into real registers, and skip the ANY class, which is just an OR of the
320 bits. */
321 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
322 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
323
324 /* Map reload register type to a register in the register class. */
325 struct reload_reg_map_type {
326 const char *name; /* Register class name. */
327 int reg; /* Register in the register class. */
328 };
329
330 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
331 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
332 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
333 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
334 { "Any", -1 }, /* RELOAD_REG_ANY. */
335 };
336
337 /* Mask bits for each register class, indexed per mode. Historically the
338 compiler has been more restrictive which types can do PRE_MODIFY instead of
339 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
340 typedef unsigned char addr_mask_type;
341
342 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
343 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
344 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
345 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
346 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
347 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
348 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
349 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
350
351 /* Register type masks based on the type, of valid addressing modes. */
352 struct rs6000_reg_addr {
353 enum insn_code reload_load; /* INSN to reload for loading. */
354 enum insn_code reload_store; /* INSN to reload for storing. */
355 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
356 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
357 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
358 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
359 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
360 };
361
362 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
363
364 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
365 static inline bool
366 mode_supports_pre_incdec_p (machine_mode mode)
367 {
368 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
369 != 0);
370 }
371
372 /* Helper function to say whether a mode supports PRE_MODIFY. */
373 static inline bool
374 mode_supports_pre_modify_p (machine_mode mode)
375 {
376 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
377 != 0);
378 }
379
380 /* Return true if we have D-form addressing in altivec registers. */
381 static inline bool
382 mode_supports_vmx_dform (machine_mode mode)
383 {
384 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
385 }
386
387 /* Return true if we have D-form addressing in VSX registers. This addressing
388 is more limited than normal d-form addressing in that the offset must be
389 aligned on a 16-byte boundary. */
390 static inline bool
391 mode_supports_dq_form (machine_mode mode)
392 {
393 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
394 != 0);
395 }
396
397 /* Given that there exists at least one variable that is set (produced)
398 by OUT_INSN and read (consumed) by IN_INSN, return true iff
399 IN_INSN represents one or more memory store operations and none of
400 the variables set by OUT_INSN is used by IN_INSN as the address of a
401 store operation. If either IN_INSN or OUT_INSN does not represent
402 a "single" RTL SET expression (as loosely defined by the
403 implementation of the single_set function) or a PARALLEL with only
404 SETs, CLOBBERs, and USEs inside, this function returns false.
405
406 This rs6000-specific version of store_data_bypass_p checks for
407 certain conditions that result in assertion failures (and internal
408 compiler errors) in the generic store_data_bypass_p function and
409 returns false rather than calling store_data_bypass_p if one of the
410 problematic conditions is detected. */
411
412 int
413 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
414 {
415 rtx out_set, in_set;
416 rtx out_pat, in_pat;
417 rtx out_exp, in_exp;
418 int i, j;
419
420 in_set = single_set (in_insn);
421 if (in_set)
422 {
423 if (MEM_P (SET_DEST (in_set)))
424 {
425 out_set = single_set (out_insn);
426 if (!out_set)
427 {
428 out_pat = PATTERN (out_insn);
429 if (GET_CODE (out_pat) == PARALLEL)
430 {
431 for (i = 0; i < XVECLEN (out_pat, 0); i++)
432 {
433 out_exp = XVECEXP (out_pat, 0, i);
434 if ((GET_CODE (out_exp) == CLOBBER)
435 || (GET_CODE (out_exp) == USE))
436 continue;
437 else if (GET_CODE (out_exp) != SET)
438 return false;
439 }
440 }
441 }
442 }
443 }
444 else
445 {
446 in_pat = PATTERN (in_insn);
447 if (GET_CODE (in_pat) != PARALLEL)
448 return false;
449
450 for (i = 0; i < XVECLEN (in_pat, 0); i++)
451 {
452 in_exp = XVECEXP (in_pat, 0, i);
453 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
454 continue;
455 else if (GET_CODE (in_exp) != SET)
456 return false;
457
458 if (MEM_P (SET_DEST (in_exp)))
459 {
460 out_set = single_set (out_insn);
461 if (!out_set)
462 {
463 out_pat = PATTERN (out_insn);
464 if (GET_CODE (out_pat) != PARALLEL)
465 return false;
466 for (j = 0; j < XVECLEN (out_pat, 0); j++)
467 {
468 out_exp = XVECEXP (out_pat, 0, j);
469 if ((GET_CODE (out_exp) == CLOBBER)
470 || (GET_CODE (out_exp) == USE))
471 continue;
472 else if (GET_CODE (out_exp) != SET)
473 return false;
474 }
475 }
476 }
477 }
478 }
479 return store_data_bypass_p (out_insn, in_insn);
480 }
481
482 \f
483 /* Processor costs (relative to an add) */
484
485 const struct processor_costs *rs6000_cost;
486
487 /* Instruction size costs on 32bit processors. */
488 static const
489 struct processor_costs size32_cost = {
490 COSTS_N_INSNS (1), /* mulsi */
491 COSTS_N_INSNS (1), /* mulsi_const */
492 COSTS_N_INSNS (1), /* mulsi_const9 */
493 COSTS_N_INSNS (1), /* muldi */
494 COSTS_N_INSNS (1), /* divsi */
495 COSTS_N_INSNS (1), /* divdi */
496 COSTS_N_INSNS (1), /* fp */
497 COSTS_N_INSNS (1), /* dmul */
498 COSTS_N_INSNS (1), /* sdiv */
499 COSTS_N_INSNS (1), /* ddiv */
500 32, /* cache line size */
501 0, /* l1 cache */
502 0, /* l2 cache */
503 0, /* streams */
504 0, /* SF->DF convert */
505 };
506
507 /* Instruction size costs on 64bit processors. */
508 static const
509 struct processor_costs size64_cost = {
510 COSTS_N_INSNS (1), /* mulsi */
511 COSTS_N_INSNS (1), /* mulsi_const */
512 COSTS_N_INSNS (1), /* mulsi_const9 */
513 COSTS_N_INSNS (1), /* muldi */
514 COSTS_N_INSNS (1), /* divsi */
515 COSTS_N_INSNS (1), /* divdi */
516 COSTS_N_INSNS (1), /* fp */
517 COSTS_N_INSNS (1), /* dmul */
518 COSTS_N_INSNS (1), /* sdiv */
519 COSTS_N_INSNS (1), /* ddiv */
520 128, /* cache line size */
521 0, /* l1 cache */
522 0, /* l2 cache */
523 0, /* streams */
524 0, /* SF->DF convert */
525 };
526
527 /* Instruction costs on RS64A processors. */
528 static const
529 struct processor_costs rs64a_cost = {
530 COSTS_N_INSNS (20), /* mulsi */
531 COSTS_N_INSNS (12), /* mulsi_const */
532 COSTS_N_INSNS (8), /* mulsi_const9 */
533 COSTS_N_INSNS (34), /* muldi */
534 COSTS_N_INSNS (65), /* divsi */
535 COSTS_N_INSNS (67), /* divdi */
536 COSTS_N_INSNS (4), /* fp */
537 COSTS_N_INSNS (4), /* dmul */
538 COSTS_N_INSNS (31), /* sdiv */
539 COSTS_N_INSNS (31), /* ddiv */
540 128, /* cache line size */
541 128, /* l1 cache */
542 2048, /* l2 cache */
543 1, /* streams */
544 0, /* SF->DF convert */
545 };
546
547 /* Instruction costs on MPCCORE processors. */
548 static const
549 struct processor_costs mpccore_cost = {
550 COSTS_N_INSNS (2), /* mulsi */
551 COSTS_N_INSNS (2), /* mulsi_const */
552 COSTS_N_INSNS (2), /* mulsi_const9 */
553 COSTS_N_INSNS (2), /* muldi */
554 COSTS_N_INSNS (6), /* divsi */
555 COSTS_N_INSNS (6), /* divdi */
556 COSTS_N_INSNS (4), /* fp */
557 COSTS_N_INSNS (5), /* dmul */
558 COSTS_N_INSNS (10), /* sdiv */
559 COSTS_N_INSNS (17), /* ddiv */
560 32, /* cache line size */
561 4, /* l1 cache */
562 16, /* l2 cache */
563 1, /* streams */
564 0, /* SF->DF convert */
565 };
566
567 /* Instruction costs on PPC403 processors. */
568 static const
569 struct processor_costs ppc403_cost = {
570 COSTS_N_INSNS (4), /* mulsi */
571 COSTS_N_INSNS (4), /* mulsi_const */
572 COSTS_N_INSNS (4), /* mulsi_const9 */
573 COSTS_N_INSNS (4), /* muldi */
574 COSTS_N_INSNS (33), /* divsi */
575 COSTS_N_INSNS (33), /* divdi */
576 COSTS_N_INSNS (11), /* fp */
577 COSTS_N_INSNS (11), /* dmul */
578 COSTS_N_INSNS (11), /* sdiv */
579 COSTS_N_INSNS (11), /* ddiv */
580 32, /* cache line size */
581 4, /* l1 cache */
582 16, /* l2 cache */
583 1, /* streams */
584 0, /* SF->DF convert */
585 };
586
587 /* Instruction costs on PPC405 processors. */
588 static const
589 struct processor_costs ppc405_cost = {
590 COSTS_N_INSNS (5), /* mulsi */
591 COSTS_N_INSNS (4), /* mulsi_const */
592 COSTS_N_INSNS (3), /* mulsi_const9 */
593 COSTS_N_INSNS (5), /* muldi */
594 COSTS_N_INSNS (35), /* divsi */
595 COSTS_N_INSNS (35), /* divdi */
596 COSTS_N_INSNS (11), /* fp */
597 COSTS_N_INSNS (11), /* dmul */
598 COSTS_N_INSNS (11), /* sdiv */
599 COSTS_N_INSNS (11), /* ddiv */
600 32, /* cache line size */
601 16, /* l1 cache */
602 128, /* l2 cache */
603 1, /* streams */
604 0, /* SF->DF convert */
605 };
606
607 /* Instruction costs on PPC440 processors. */
608 static const
609 struct processor_costs ppc440_cost = {
610 COSTS_N_INSNS (3), /* mulsi */
611 COSTS_N_INSNS (2), /* mulsi_const */
612 COSTS_N_INSNS (2), /* mulsi_const9 */
613 COSTS_N_INSNS (3), /* muldi */
614 COSTS_N_INSNS (34), /* divsi */
615 COSTS_N_INSNS (34), /* divdi */
616 COSTS_N_INSNS (5), /* fp */
617 COSTS_N_INSNS (5), /* dmul */
618 COSTS_N_INSNS (19), /* sdiv */
619 COSTS_N_INSNS (33), /* ddiv */
620 32, /* cache line size */
621 32, /* l1 cache */
622 256, /* l2 cache */
623 1, /* streams */
624 0, /* SF->DF convert */
625 };
626
627 /* Instruction costs on PPC476 processors. */
628 static const
629 struct processor_costs ppc476_cost = {
630 COSTS_N_INSNS (4), /* mulsi */
631 COSTS_N_INSNS (4), /* mulsi_const */
632 COSTS_N_INSNS (4), /* mulsi_const9 */
633 COSTS_N_INSNS (4), /* muldi */
634 COSTS_N_INSNS (11), /* divsi */
635 COSTS_N_INSNS (11), /* divdi */
636 COSTS_N_INSNS (6), /* fp */
637 COSTS_N_INSNS (6), /* dmul */
638 COSTS_N_INSNS (19), /* sdiv */
639 COSTS_N_INSNS (33), /* ddiv */
640 32, /* l1 cache line size */
641 32, /* l1 cache */
642 512, /* l2 cache */
643 1, /* streams */
644 0, /* SF->DF convert */
645 };
646
647 /* Instruction costs on PPC601 processors. */
648 static const
649 struct processor_costs ppc601_cost = {
650 COSTS_N_INSNS (5), /* mulsi */
651 COSTS_N_INSNS (5), /* mulsi_const */
652 COSTS_N_INSNS (5), /* mulsi_const9 */
653 COSTS_N_INSNS (5), /* muldi */
654 COSTS_N_INSNS (36), /* divsi */
655 COSTS_N_INSNS (36), /* divdi */
656 COSTS_N_INSNS (4), /* fp */
657 COSTS_N_INSNS (5), /* dmul */
658 COSTS_N_INSNS (17), /* sdiv */
659 COSTS_N_INSNS (31), /* ddiv */
660 32, /* cache line size */
661 32, /* l1 cache */
662 256, /* l2 cache */
663 1, /* streams */
664 0, /* SF->DF convert */
665 };
666
667 /* Instruction costs on PPC603 processors. */
668 static const
669 struct processor_costs ppc603_cost = {
670 COSTS_N_INSNS (5), /* mulsi */
671 COSTS_N_INSNS (3), /* mulsi_const */
672 COSTS_N_INSNS (2), /* mulsi_const9 */
673 COSTS_N_INSNS (5), /* muldi */
674 COSTS_N_INSNS (37), /* divsi */
675 COSTS_N_INSNS (37), /* divdi */
676 COSTS_N_INSNS (3), /* fp */
677 COSTS_N_INSNS (4), /* dmul */
678 COSTS_N_INSNS (18), /* sdiv */
679 COSTS_N_INSNS (33), /* ddiv */
680 32, /* cache line size */
681 8, /* l1 cache */
682 64, /* l2 cache */
683 1, /* streams */
684 0, /* SF->DF convert */
685 };
686
687 /* Instruction costs on PPC604 processors. */
688 static const
689 struct processor_costs ppc604_cost = {
690 COSTS_N_INSNS (4), /* mulsi */
691 COSTS_N_INSNS (4), /* mulsi_const */
692 COSTS_N_INSNS (4), /* mulsi_const9 */
693 COSTS_N_INSNS (4), /* muldi */
694 COSTS_N_INSNS (20), /* divsi */
695 COSTS_N_INSNS (20), /* divdi */
696 COSTS_N_INSNS (3), /* fp */
697 COSTS_N_INSNS (3), /* dmul */
698 COSTS_N_INSNS (18), /* sdiv */
699 COSTS_N_INSNS (32), /* ddiv */
700 32, /* cache line size */
701 16, /* l1 cache */
702 512, /* l2 cache */
703 1, /* streams */
704 0, /* SF->DF convert */
705 };
706
707 /* Instruction costs on PPC604e processors. */
708 static const
709 struct processor_costs ppc604e_cost = {
710 COSTS_N_INSNS (2), /* mulsi */
711 COSTS_N_INSNS (2), /* mulsi_const */
712 COSTS_N_INSNS (2), /* mulsi_const9 */
713 COSTS_N_INSNS (2), /* muldi */
714 COSTS_N_INSNS (20), /* divsi */
715 COSTS_N_INSNS (20), /* divdi */
716 COSTS_N_INSNS (3), /* fp */
717 COSTS_N_INSNS (3), /* dmul */
718 COSTS_N_INSNS (18), /* sdiv */
719 COSTS_N_INSNS (32), /* ddiv */
720 32, /* cache line size */
721 32, /* l1 cache */
722 1024, /* l2 cache */
723 1, /* streams */
724 0, /* SF->DF convert */
725 };
726
727 /* Instruction costs on PPC620 processors. */
728 static const
729 struct processor_costs ppc620_cost = {
730 COSTS_N_INSNS (5), /* mulsi */
731 COSTS_N_INSNS (4), /* mulsi_const */
732 COSTS_N_INSNS (3), /* mulsi_const9 */
733 COSTS_N_INSNS (7), /* muldi */
734 COSTS_N_INSNS (21), /* divsi */
735 COSTS_N_INSNS (37), /* divdi */
736 COSTS_N_INSNS (3), /* fp */
737 COSTS_N_INSNS (3), /* dmul */
738 COSTS_N_INSNS (18), /* sdiv */
739 COSTS_N_INSNS (32), /* ddiv */
740 128, /* cache line size */
741 32, /* l1 cache */
742 1024, /* l2 cache */
743 1, /* streams */
744 0, /* SF->DF convert */
745 };
746
747 /* Instruction costs on PPC630 processors. */
748 static const
749 struct processor_costs ppc630_cost = {
750 COSTS_N_INSNS (5), /* mulsi */
751 COSTS_N_INSNS (4), /* mulsi_const */
752 COSTS_N_INSNS (3), /* mulsi_const9 */
753 COSTS_N_INSNS (7), /* muldi */
754 COSTS_N_INSNS (21), /* divsi */
755 COSTS_N_INSNS (37), /* divdi */
756 COSTS_N_INSNS (3), /* fp */
757 COSTS_N_INSNS (3), /* dmul */
758 COSTS_N_INSNS (17), /* sdiv */
759 COSTS_N_INSNS (21), /* ddiv */
760 128, /* cache line size */
761 64, /* l1 cache */
762 1024, /* l2 cache */
763 1, /* streams */
764 0, /* SF->DF convert */
765 };
766
767 /* Instruction costs on Cell processor. */
768 /* COSTS_N_INSNS (1) ~ one add. */
769 static const
770 struct processor_costs ppccell_cost = {
771 COSTS_N_INSNS (9/2)+2, /* mulsi */
772 COSTS_N_INSNS (6/2), /* mulsi_const */
773 COSTS_N_INSNS (6/2), /* mulsi_const9 */
774 COSTS_N_INSNS (15/2)+2, /* muldi */
775 COSTS_N_INSNS (38/2), /* divsi */
776 COSTS_N_INSNS (70/2), /* divdi */
777 COSTS_N_INSNS (10/2), /* fp */
778 COSTS_N_INSNS (10/2), /* dmul */
779 COSTS_N_INSNS (74/2), /* sdiv */
780 COSTS_N_INSNS (74/2), /* ddiv */
781 128, /* cache line size */
782 32, /* l1 cache */
783 512, /* l2 cache */
784 6, /* streams */
785 0, /* SF->DF convert */
786 };
787
788 /* Instruction costs on PPC750 and PPC7400 processors. */
789 static const
790 struct processor_costs ppc750_cost = {
791 COSTS_N_INSNS (5), /* mulsi */
792 COSTS_N_INSNS (3), /* mulsi_const */
793 COSTS_N_INSNS (2), /* mulsi_const9 */
794 COSTS_N_INSNS (5), /* muldi */
795 COSTS_N_INSNS (17), /* divsi */
796 COSTS_N_INSNS (17), /* divdi */
797 COSTS_N_INSNS (3), /* fp */
798 COSTS_N_INSNS (3), /* dmul */
799 COSTS_N_INSNS (17), /* sdiv */
800 COSTS_N_INSNS (31), /* ddiv */
801 32, /* cache line size */
802 32, /* l1 cache */
803 512, /* l2 cache */
804 1, /* streams */
805 0, /* SF->DF convert */
806 };
807
808 /* Instruction costs on PPC7450 processors. */
809 static const
810 struct processor_costs ppc7450_cost = {
811 COSTS_N_INSNS (4), /* mulsi */
812 COSTS_N_INSNS (3), /* mulsi_const */
813 COSTS_N_INSNS (3), /* mulsi_const9 */
814 COSTS_N_INSNS (4), /* muldi */
815 COSTS_N_INSNS (23), /* divsi */
816 COSTS_N_INSNS (23), /* divdi */
817 COSTS_N_INSNS (5), /* fp */
818 COSTS_N_INSNS (5), /* dmul */
819 COSTS_N_INSNS (21), /* sdiv */
820 COSTS_N_INSNS (35), /* ddiv */
821 32, /* cache line size */
822 32, /* l1 cache */
823 1024, /* l2 cache */
824 1, /* streams */
825 0, /* SF->DF convert */
826 };
827
828 /* Instruction costs on PPC8540 processors. */
829 static const
830 struct processor_costs ppc8540_cost = {
831 COSTS_N_INSNS (4), /* mulsi */
832 COSTS_N_INSNS (4), /* mulsi_const */
833 COSTS_N_INSNS (4), /* mulsi_const9 */
834 COSTS_N_INSNS (4), /* muldi */
835 COSTS_N_INSNS (19), /* divsi */
836 COSTS_N_INSNS (19), /* divdi */
837 COSTS_N_INSNS (4), /* fp */
838 COSTS_N_INSNS (4), /* dmul */
839 COSTS_N_INSNS (29), /* sdiv */
840 COSTS_N_INSNS (29), /* ddiv */
841 32, /* cache line size */
842 32, /* l1 cache */
843 256, /* l2 cache */
844 1, /* prefetch streams /*/
845 0, /* SF->DF convert */
846 };
847
848 /* Instruction costs on E300C2 and E300C3 cores. */
849 static const
850 struct processor_costs ppce300c2c3_cost = {
851 COSTS_N_INSNS (4), /* mulsi */
852 COSTS_N_INSNS (4), /* mulsi_const */
853 COSTS_N_INSNS (4), /* mulsi_const9 */
854 COSTS_N_INSNS (4), /* muldi */
855 COSTS_N_INSNS (19), /* divsi */
856 COSTS_N_INSNS (19), /* divdi */
857 COSTS_N_INSNS (3), /* fp */
858 COSTS_N_INSNS (4), /* dmul */
859 COSTS_N_INSNS (18), /* sdiv */
860 COSTS_N_INSNS (33), /* ddiv */
861 32,
862 16, /* l1 cache */
863 16, /* l2 cache */
864 1, /* prefetch streams /*/
865 0, /* SF->DF convert */
866 };
867
868 /* Instruction costs on PPCE500MC processors. */
869 static const
870 struct processor_costs ppce500mc_cost = {
871 COSTS_N_INSNS (4), /* mulsi */
872 COSTS_N_INSNS (4), /* mulsi_const */
873 COSTS_N_INSNS (4), /* mulsi_const9 */
874 COSTS_N_INSNS (4), /* muldi */
875 COSTS_N_INSNS (14), /* divsi */
876 COSTS_N_INSNS (14), /* divdi */
877 COSTS_N_INSNS (8), /* fp */
878 COSTS_N_INSNS (10), /* dmul */
879 COSTS_N_INSNS (36), /* sdiv */
880 COSTS_N_INSNS (66), /* ddiv */
881 64, /* cache line size */
882 32, /* l1 cache */
883 128, /* l2 cache */
884 1, /* prefetch streams /*/
885 0, /* SF->DF convert */
886 };
887
888 /* Instruction costs on PPCE500MC64 processors. */
889 static const
890 struct processor_costs ppce500mc64_cost = {
891 COSTS_N_INSNS (4), /* mulsi */
892 COSTS_N_INSNS (4), /* mulsi_const */
893 COSTS_N_INSNS (4), /* mulsi_const9 */
894 COSTS_N_INSNS (4), /* muldi */
895 COSTS_N_INSNS (14), /* divsi */
896 COSTS_N_INSNS (14), /* divdi */
897 COSTS_N_INSNS (4), /* fp */
898 COSTS_N_INSNS (10), /* dmul */
899 COSTS_N_INSNS (36), /* sdiv */
900 COSTS_N_INSNS (66), /* ddiv */
901 64, /* cache line size */
902 32, /* l1 cache */
903 128, /* l2 cache */
904 1, /* prefetch streams /*/
905 0, /* SF->DF convert */
906 };
907
908 /* Instruction costs on PPCE5500 processors. */
909 static const
910 struct processor_costs ppce5500_cost = {
911 COSTS_N_INSNS (5), /* mulsi */
912 COSTS_N_INSNS (5), /* mulsi_const */
913 COSTS_N_INSNS (4), /* mulsi_const9 */
914 COSTS_N_INSNS (5), /* muldi */
915 COSTS_N_INSNS (14), /* divsi */
916 COSTS_N_INSNS (14), /* divdi */
917 COSTS_N_INSNS (7), /* fp */
918 COSTS_N_INSNS (10), /* dmul */
919 COSTS_N_INSNS (36), /* sdiv */
920 COSTS_N_INSNS (66), /* ddiv */
921 64, /* cache line size */
922 32, /* l1 cache */
923 128, /* l2 cache */
924 1, /* prefetch streams /*/
925 0, /* SF->DF convert */
926 };
927
928 /* Instruction costs on PPCE6500 processors. */
929 static const
930 struct processor_costs ppce6500_cost = {
931 COSTS_N_INSNS (5), /* mulsi */
932 COSTS_N_INSNS (5), /* mulsi_const */
933 COSTS_N_INSNS (4), /* mulsi_const9 */
934 COSTS_N_INSNS (5), /* muldi */
935 COSTS_N_INSNS (14), /* divsi */
936 COSTS_N_INSNS (14), /* divdi */
937 COSTS_N_INSNS (7), /* fp */
938 COSTS_N_INSNS (10), /* dmul */
939 COSTS_N_INSNS (36), /* sdiv */
940 COSTS_N_INSNS (66), /* ddiv */
941 64, /* cache line size */
942 32, /* l1 cache */
943 128, /* l2 cache */
944 1, /* prefetch streams /*/
945 0, /* SF->DF convert */
946 };
947
948 /* Instruction costs on AppliedMicro Titan processors. */
949 static const
950 struct processor_costs titan_cost = {
951 COSTS_N_INSNS (5), /* mulsi */
952 COSTS_N_INSNS (5), /* mulsi_const */
953 COSTS_N_INSNS (5), /* mulsi_const9 */
954 COSTS_N_INSNS (5), /* muldi */
955 COSTS_N_INSNS (18), /* divsi */
956 COSTS_N_INSNS (18), /* divdi */
957 COSTS_N_INSNS (10), /* fp */
958 COSTS_N_INSNS (10), /* dmul */
959 COSTS_N_INSNS (46), /* sdiv */
960 COSTS_N_INSNS (72), /* ddiv */
961 32, /* cache line size */
962 32, /* l1 cache */
963 512, /* l2 cache */
964 1, /* prefetch streams /*/
965 0, /* SF->DF convert */
966 };
967
968 /* Instruction costs on POWER4 and POWER5 processors. */
969 static const
970 struct processor_costs power4_cost = {
971 COSTS_N_INSNS (3), /* mulsi */
972 COSTS_N_INSNS (2), /* mulsi_const */
973 COSTS_N_INSNS (2), /* mulsi_const9 */
974 COSTS_N_INSNS (4), /* muldi */
975 COSTS_N_INSNS (18), /* divsi */
976 COSTS_N_INSNS (34), /* divdi */
977 COSTS_N_INSNS (3), /* fp */
978 COSTS_N_INSNS (3), /* dmul */
979 COSTS_N_INSNS (17), /* sdiv */
980 COSTS_N_INSNS (17), /* ddiv */
981 128, /* cache line size */
982 32, /* l1 cache */
983 1024, /* l2 cache */
984 8, /* prefetch streams /*/
985 0, /* SF->DF convert */
986 };
987
988 /* Instruction costs on POWER6 processors. */
989 static const
990 struct processor_costs power6_cost = {
991 COSTS_N_INSNS (8), /* mulsi */
992 COSTS_N_INSNS (8), /* mulsi_const */
993 COSTS_N_INSNS (8), /* mulsi_const9 */
994 COSTS_N_INSNS (8), /* muldi */
995 COSTS_N_INSNS (22), /* divsi */
996 COSTS_N_INSNS (28), /* divdi */
997 COSTS_N_INSNS (3), /* fp */
998 COSTS_N_INSNS (3), /* dmul */
999 COSTS_N_INSNS (13), /* sdiv */
1000 COSTS_N_INSNS (16), /* ddiv */
1001 128, /* cache line size */
1002 64, /* l1 cache */
1003 2048, /* l2 cache */
1004 16, /* prefetch streams */
1005 0, /* SF->DF convert */
1006 };
1007
1008 /* Instruction costs on POWER7 processors. */
1009 static const
1010 struct processor_costs power7_cost = {
1011 COSTS_N_INSNS (2), /* mulsi */
1012 COSTS_N_INSNS (2), /* mulsi_const */
1013 COSTS_N_INSNS (2), /* mulsi_const9 */
1014 COSTS_N_INSNS (2), /* muldi */
1015 COSTS_N_INSNS (18), /* divsi */
1016 COSTS_N_INSNS (34), /* divdi */
1017 COSTS_N_INSNS (3), /* fp */
1018 COSTS_N_INSNS (3), /* dmul */
1019 COSTS_N_INSNS (13), /* sdiv */
1020 COSTS_N_INSNS (16), /* ddiv */
1021 128, /* cache line size */
1022 32, /* l1 cache */
1023 256, /* l2 cache */
1024 12, /* prefetch streams */
1025 COSTS_N_INSNS (3), /* SF->DF convert */
1026 };
1027
1028 /* Instruction costs on POWER8 processors. */
1029 static const
1030 struct processor_costs power8_cost = {
1031 COSTS_N_INSNS (3), /* mulsi */
1032 COSTS_N_INSNS (3), /* mulsi_const */
1033 COSTS_N_INSNS (3), /* mulsi_const9 */
1034 COSTS_N_INSNS (3), /* muldi */
1035 COSTS_N_INSNS (19), /* divsi */
1036 COSTS_N_INSNS (35), /* divdi */
1037 COSTS_N_INSNS (3), /* fp */
1038 COSTS_N_INSNS (3), /* dmul */
1039 COSTS_N_INSNS (14), /* sdiv */
1040 COSTS_N_INSNS (17), /* ddiv */
1041 128, /* cache line size */
1042 32, /* l1 cache */
1043 512, /* l2 cache */
1044 12, /* prefetch streams */
1045 COSTS_N_INSNS (3), /* SF->DF convert */
1046 };
1047
1048 /* Instruction costs on POWER9 processors. */
1049 static const
1050 struct processor_costs power9_cost = {
1051 COSTS_N_INSNS (3), /* mulsi */
1052 COSTS_N_INSNS (3), /* mulsi_const */
1053 COSTS_N_INSNS (3), /* mulsi_const9 */
1054 COSTS_N_INSNS (3), /* muldi */
1055 COSTS_N_INSNS (8), /* divsi */
1056 COSTS_N_INSNS (12), /* divdi */
1057 COSTS_N_INSNS (3), /* fp */
1058 COSTS_N_INSNS (3), /* dmul */
1059 COSTS_N_INSNS (13), /* sdiv */
1060 COSTS_N_INSNS (18), /* ddiv */
1061 128, /* cache line size */
1062 32, /* l1 cache */
1063 512, /* l2 cache */
1064 8, /* prefetch streams */
1065 COSTS_N_INSNS (3), /* SF->DF convert */
1066 };
1067
1068 /* Instruction costs on POWER10 processors. */
1069 static const
1070 struct processor_costs power10_cost = {
1071 COSTS_N_INSNS (2), /* mulsi */
1072 COSTS_N_INSNS (2), /* mulsi_const */
1073 COSTS_N_INSNS (2), /* mulsi_const9 */
1074 COSTS_N_INSNS (2), /* muldi */
1075 COSTS_N_INSNS (6), /* divsi */
1076 COSTS_N_INSNS (6), /* divdi */
1077 COSTS_N_INSNS (2), /* fp */
1078 COSTS_N_INSNS (2), /* dmul */
1079 COSTS_N_INSNS (11), /* sdiv */
1080 COSTS_N_INSNS (13), /* ddiv */
1081 128, /* cache line size */
1082 32, /* l1 cache */
1083 512, /* l2 cache */
1084 16, /* prefetch streams */
1085 COSTS_N_INSNS (2), /* SF->DF convert */
1086 };
1087
1088 /* Instruction costs on POWER A2 processors. */
1089 static const
1090 struct processor_costs ppca2_cost = {
1091 COSTS_N_INSNS (16), /* mulsi */
1092 COSTS_N_INSNS (16), /* mulsi_const */
1093 COSTS_N_INSNS (16), /* mulsi_const9 */
1094 COSTS_N_INSNS (16), /* muldi */
1095 COSTS_N_INSNS (22), /* divsi */
1096 COSTS_N_INSNS (28), /* divdi */
1097 COSTS_N_INSNS (3), /* fp */
1098 COSTS_N_INSNS (3), /* dmul */
1099 COSTS_N_INSNS (59), /* sdiv */
1100 COSTS_N_INSNS (72), /* ddiv */
1101 64,
1102 16, /* l1 cache */
1103 2048, /* l2 cache */
1104 16, /* prefetch streams */
1105 0, /* SF->DF convert */
1106 };
1107
1108 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1109 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1110
1111 \f
1112 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1113 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1114 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1115 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1116 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1117 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1118 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1119 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1120 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1121 bool);
1122 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1123 unsigned int);
1124 static bool is_microcoded_insn (rtx_insn *);
1125 static bool is_nonpipeline_insn (rtx_insn *);
1126 static bool is_cracked_insn (rtx_insn *);
1127 static bool is_load_insn (rtx, rtx *);
1128 static bool is_store_insn (rtx, rtx *);
1129 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1130 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1131 static bool insn_must_be_first_in_group (rtx_insn *);
1132 static bool insn_must_be_last_in_group (rtx_insn *);
1133 bool easy_vector_constant (rtx, machine_mode);
1134 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1135 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1136 #if TARGET_MACHO
1137 static tree get_prev_label (tree);
1138 #endif
1139 static bool rs6000_mode_dependent_address (const_rtx);
1140 static bool rs6000_debug_mode_dependent_address (const_rtx);
1141 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1142 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1143 machine_mode, rtx);
1144 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1145 machine_mode,
1146 rtx);
1147 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1148 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1149 enum reg_class);
1150 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1151 reg_class_t,
1152 reg_class_t);
1153 static bool rs6000_debug_can_change_mode_class (machine_mode,
1154 machine_mode,
1155 reg_class_t);
1156
1157 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1158 = rs6000_mode_dependent_address;
1159
1160 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1161 machine_mode, rtx)
1162 = rs6000_secondary_reload_class;
1163
1164 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1165 = rs6000_preferred_reload_class;
1166
1167 const int INSN_NOT_AVAILABLE = -1;
1168
1169 static void rs6000_print_isa_options (FILE *, int, const char *,
1170 HOST_WIDE_INT);
1171 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1172
1173 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1174 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1175 enum rs6000_reg_type,
1176 machine_mode,
1177 secondary_reload_info *,
1178 bool);
1179 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1180
1181 /* Hash table stuff for keeping track of TOC entries. */
1182
1183 struct GTY((for_user)) toc_hash_struct
1184 {
1185 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1186 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1187 rtx key;
1188 machine_mode key_mode;
1189 int labelno;
1190 };
1191
1192 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1193 {
1194 static hashval_t hash (toc_hash_struct *);
1195 static bool equal (toc_hash_struct *, toc_hash_struct *);
1196 };
1197
1198 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1199
1200
1201 \f
1202 /* Default register names. */
1203 char rs6000_reg_names[][8] =
1204 {
1205 /* GPRs */
1206 "0", "1", "2", "3", "4", "5", "6", "7",
1207 "8", "9", "10", "11", "12", "13", "14", "15",
1208 "16", "17", "18", "19", "20", "21", "22", "23",
1209 "24", "25", "26", "27", "28", "29", "30", "31",
1210 /* FPRs */
1211 "0", "1", "2", "3", "4", "5", "6", "7",
1212 "8", "9", "10", "11", "12", "13", "14", "15",
1213 "16", "17", "18", "19", "20", "21", "22", "23",
1214 "24", "25", "26", "27", "28", "29", "30", "31",
1215 /* VRs */
1216 "0", "1", "2", "3", "4", "5", "6", "7",
1217 "8", "9", "10", "11", "12", "13", "14", "15",
1218 "16", "17", "18", "19", "20", "21", "22", "23",
1219 "24", "25", "26", "27", "28", "29", "30", "31",
1220 /* lr ctr ca ap */
1221 "lr", "ctr", "ca", "ap",
1222 /* cr0..cr7 */
1223 "0", "1", "2", "3", "4", "5", "6", "7",
1224 /* vrsave vscr sfp */
1225 "vrsave", "vscr", "sfp",
1226 };
1227
1228 #ifdef TARGET_REGNAMES
1229 static const char alt_reg_names[][8] =
1230 {
1231 /* GPRs */
1232 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1233 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1234 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1235 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1236 /* FPRs */
1237 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1238 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1239 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1240 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1241 /* VRs */
1242 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1243 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1244 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1245 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1246 /* lr ctr ca ap */
1247 "lr", "ctr", "ca", "ap",
1248 /* cr0..cr7 */
1249 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1250 /* vrsave vscr sfp */
1251 "vrsave", "vscr", "sfp",
1252 };
1253 #endif
1254
1255 /* Table of valid machine attributes. */
1256
1257 static const struct attribute_spec rs6000_attribute_table[] =
1258 {
1259 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1260 affects_type_identity, handler, exclude } */
1261 { "altivec", 1, 1, false, true, false, false,
1262 rs6000_handle_altivec_attribute, NULL },
1263 { "longcall", 0, 0, false, true, true, false,
1264 rs6000_handle_longcall_attribute, NULL },
1265 { "shortcall", 0, 0, false, true, true, false,
1266 rs6000_handle_longcall_attribute, NULL },
1267 { "ms_struct", 0, 0, false, false, false, false,
1268 rs6000_handle_struct_attribute, NULL },
1269 { "gcc_struct", 0, 0, false, false, false, false,
1270 rs6000_handle_struct_attribute, NULL },
1271 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1272 SUBTARGET_ATTRIBUTE_TABLE,
1273 #endif
1274 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1275 };
1276 \f
1277 #ifndef TARGET_PROFILE_KERNEL
1278 #define TARGET_PROFILE_KERNEL 0
1279 #endif
1280 \f
1281 /* Initialize the GCC target structure. */
1282 #undef TARGET_ATTRIBUTE_TABLE
1283 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1284 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1285 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1286 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1287 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1288
1289 #undef TARGET_ASM_ALIGNED_DI_OP
1290 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1291
1292 /* Default unaligned ops are only provided for ELF. Find the ops needed
1293 for non-ELF systems. */
1294 #ifndef OBJECT_FORMAT_ELF
1295 #if TARGET_XCOFF
1296 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1297 64-bit targets. */
1298 #undef TARGET_ASM_UNALIGNED_HI_OP
1299 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1300 #undef TARGET_ASM_UNALIGNED_SI_OP
1301 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1302 #undef TARGET_ASM_UNALIGNED_DI_OP
1303 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1304 #else
1305 /* For Darwin. */
1306 #undef TARGET_ASM_UNALIGNED_HI_OP
1307 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1308 #undef TARGET_ASM_UNALIGNED_SI_OP
1309 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1310 #undef TARGET_ASM_UNALIGNED_DI_OP
1311 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1312 #undef TARGET_ASM_ALIGNED_DI_OP
1313 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1314 #endif
1315 #endif
1316
1317 /* This hook deals with fixups for relocatable code and DI-mode objects
1318 in 64-bit code. */
1319 #undef TARGET_ASM_INTEGER
1320 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1321
1322 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1323 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1324 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1325 #endif
1326
1327 #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
1328 #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
1329 rs6000_print_patchable_function_entry
1330
1331 #undef TARGET_SET_UP_BY_PROLOGUE
1332 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1333
1334 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1335 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1336 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1337 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1338 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1339 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1340 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1341 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1342 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1343 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1344 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1345 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1346
1347 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1348 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1349
1350 #undef TARGET_INTERNAL_ARG_POINTER
1351 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1352
1353 #undef TARGET_HAVE_TLS
1354 #define TARGET_HAVE_TLS HAVE_AS_TLS
1355
1356 #undef TARGET_CANNOT_FORCE_CONST_MEM
1357 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1358
1359 #undef TARGET_DELEGITIMIZE_ADDRESS
1360 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1361
1362 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1363 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1364
1365 #undef TARGET_LEGITIMATE_COMBINED_INSN
1366 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1367
1368 #undef TARGET_ASM_FUNCTION_PROLOGUE
1369 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1370 #undef TARGET_ASM_FUNCTION_EPILOGUE
1371 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1372
1373 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1374 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1375
1376 #undef TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC
1377 #define TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC rs6000_gen_pic_addr_diff_vec
1378
1379 #undef TARGET_LEGITIMIZE_ADDRESS
1380 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1381
1382 #undef TARGET_SCHED_VARIABLE_ISSUE
1383 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1384
1385 #undef TARGET_SCHED_ISSUE_RATE
1386 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1387 #undef TARGET_SCHED_ADJUST_COST
1388 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1389 #undef TARGET_SCHED_ADJUST_PRIORITY
1390 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1391 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1392 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1393 #undef TARGET_SCHED_INIT
1394 #define TARGET_SCHED_INIT rs6000_sched_init
1395 #undef TARGET_SCHED_FINISH
1396 #define TARGET_SCHED_FINISH rs6000_sched_finish
1397 #undef TARGET_SCHED_REORDER
1398 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1399 #undef TARGET_SCHED_REORDER2
1400 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1401
1402 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1403 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1404
1405 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1406 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1407
1408 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1409 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1410 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1411 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1412 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1413 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1414 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1415 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1416
1417 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1418 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1419
1420 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1421 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1422 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1423 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1424 rs6000_builtin_support_vector_misalignment
1425 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1426 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1427 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1428 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1429 rs6000_builtin_vectorization_cost
1430 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1431 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1432 rs6000_preferred_simd_mode
1433 #undef TARGET_VECTORIZE_CREATE_COSTS
1434 #define TARGET_VECTORIZE_CREATE_COSTS rs6000_vectorize_create_costs
1435
1436 #undef TARGET_LOOP_UNROLL_ADJUST
1437 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1438
1439 #undef TARGET_INIT_BUILTINS
1440 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1441 #undef TARGET_BUILTIN_DECL
1442 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1443
1444 #undef TARGET_FOLD_BUILTIN
1445 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1446 #undef TARGET_GIMPLE_FOLD_BUILTIN
1447 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1448
1449 #undef TARGET_EXPAND_BUILTIN
1450 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1451
1452 #undef TARGET_MANGLE_TYPE
1453 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1454
1455 #undef TARGET_INIT_LIBFUNCS
1456 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1457
1458 #if TARGET_MACHO
1459 #undef TARGET_BINDS_LOCAL_P
1460 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1461 #endif
1462
1463 #undef TARGET_MS_BITFIELD_LAYOUT_P
1464 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1465
1466 #undef TARGET_ASM_OUTPUT_MI_THUNK
1467 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1468
1469 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1470 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1471
1472 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1473 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1474
1475 #undef TARGET_REGISTER_MOVE_COST
1476 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1477 #undef TARGET_MEMORY_MOVE_COST
1478 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1479 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1480 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1481 rs6000_ira_change_pseudo_allocno_class
1482 #undef TARGET_CANNOT_COPY_INSN_P
1483 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1484 #undef TARGET_RTX_COSTS
1485 #define TARGET_RTX_COSTS rs6000_rtx_costs
1486 #undef TARGET_ADDRESS_COST
1487 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1488 #undef TARGET_INSN_COST
1489 #define TARGET_INSN_COST rs6000_insn_cost
1490
1491 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1492 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1493
1494 #undef TARGET_PROMOTE_FUNCTION_MODE
1495 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1496
1497 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
1498 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE rs6000_override_options_after_change
1499
1500 #undef TARGET_RETURN_IN_MEMORY
1501 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1502
1503 #undef TARGET_RETURN_IN_MSB
1504 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1505
1506 #undef TARGET_SETUP_INCOMING_VARARGS
1507 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1508
1509 /* Always strict argument naming on rs6000. */
1510 #undef TARGET_STRICT_ARGUMENT_NAMING
1511 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1512 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1513 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1514 #undef TARGET_SPLIT_COMPLEX_ARG
1515 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1516 #undef TARGET_MUST_PASS_IN_STACK
1517 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1518 #undef TARGET_PASS_BY_REFERENCE
1519 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1520 #undef TARGET_ARG_PARTIAL_BYTES
1521 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1522 #undef TARGET_FUNCTION_ARG_ADVANCE
1523 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1524 #undef TARGET_FUNCTION_ARG
1525 #define TARGET_FUNCTION_ARG rs6000_function_arg
1526 #undef TARGET_FUNCTION_ARG_PADDING
1527 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1528 #undef TARGET_FUNCTION_ARG_BOUNDARY
1529 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1530
1531 #undef TARGET_BUILD_BUILTIN_VA_LIST
1532 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1533
1534 #undef TARGET_EXPAND_BUILTIN_VA_START
1535 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1536
1537 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1538 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1539
1540 #undef TARGET_EH_RETURN_FILTER_MODE
1541 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1542
1543 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1544 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1545
1546 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1547 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1548
1549 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
1550 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
1551 rs6000_libgcc_floating_mode_supported_p
1552
1553 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1554 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1555
1556 #undef TARGET_FLOATN_MODE
1557 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1558
1559 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1560 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1561
1562 #undef TARGET_MD_ASM_ADJUST
1563 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1564
1565 #undef TARGET_OPTION_OVERRIDE
1566 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1567
1568 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1569 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1570 rs6000_builtin_vectorized_function
1571
1572 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1573 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1574 rs6000_builtin_md_vectorized_function
1575
1576 #undef TARGET_STACK_PROTECT_GUARD
1577 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1578
1579 #if !TARGET_MACHO
1580 #undef TARGET_STACK_PROTECT_FAIL
1581 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1582 #endif
1583
1584 #ifdef HAVE_AS_TLS
1585 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1586 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1587 #endif
1588
1589 /* Use a 32-bit anchor range. This leads to sequences like:
1590
1591 addis tmp,anchor,high
1592 add dest,tmp,low
1593
1594 where tmp itself acts as an anchor, and can be shared between
1595 accesses to the same 64k page. */
1596 #undef TARGET_MIN_ANCHOR_OFFSET
1597 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1598 #undef TARGET_MAX_ANCHOR_OFFSET
1599 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1600 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1601 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1602 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1603 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1604
1605 #undef TARGET_BUILTIN_RECIPROCAL
1606 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1607
1608 #undef TARGET_SECONDARY_RELOAD
1609 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1610 #undef TARGET_SECONDARY_MEMORY_NEEDED
1611 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1612 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1613 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1614
1615 #undef TARGET_LEGITIMATE_ADDRESS_P
1616 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1617
1618 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1619 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1620
1621 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1622 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1623
1624 #undef TARGET_CAN_ELIMINATE
1625 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1626
1627 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1628 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1629
1630 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1631 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1632
1633 #undef TARGET_TRAMPOLINE_INIT
1634 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1635
1636 #undef TARGET_FUNCTION_VALUE
1637 #define TARGET_FUNCTION_VALUE rs6000_function_value
1638
1639 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1640 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1641
1642 #undef TARGET_OPTION_SAVE
1643 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1644
1645 #undef TARGET_OPTION_RESTORE
1646 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1647
1648 #undef TARGET_OPTION_PRINT
1649 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1650
1651 #undef TARGET_CAN_INLINE_P
1652 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1653
1654 #undef TARGET_SET_CURRENT_FUNCTION
1655 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1656
1657 #undef TARGET_LEGITIMATE_CONSTANT_P
1658 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1659
1660 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1661 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1662
1663 #undef TARGET_CAN_USE_DOLOOP_P
1664 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1665
1666 #undef TARGET_PREDICT_DOLOOP_P
1667 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1668
1669 #undef TARGET_HAVE_COUNT_REG_DECR_P
1670 #define TARGET_HAVE_COUNT_REG_DECR_P true
1671
1672 /* 1000000000 is infinite cost in IVOPTs. */
1673 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1674 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1675
1676 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1677 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1678
1679 #undef TARGET_PREFERRED_DOLOOP_MODE
1680 #define TARGET_PREFERRED_DOLOOP_MODE rs6000_preferred_doloop_mode
1681
1682 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1683 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1684
1685 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1686 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1687 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1688 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1689 #undef TARGET_UNWIND_WORD_MODE
1690 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1691
1692 #undef TARGET_OFFLOAD_OPTIONS
1693 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1694
1695 #undef TARGET_C_MODE_FOR_SUFFIX
1696 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1697
1698 #undef TARGET_INVALID_BINARY_OP
1699 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1700
1701 #undef TARGET_OPTAB_SUPPORTED_P
1702 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1703
1704 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1705 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1706
1707 #undef TARGET_COMPARE_VERSION_PRIORITY
1708 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1709
1710 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1711 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1712 rs6000_generate_version_dispatcher_body
1713
1714 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1715 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1716 rs6000_get_function_versions_dispatcher
1717
1718 #undef TARGET_OPTION_FUNCTION_VERSIONS
1719 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1720
1721 #undef TARGET_HARD_REGNO_NREGS
1722 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1723 #undef TARGET_HARD_REGNO_MODE_OK
1724 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1725
1726 #undef TARGET_MODES_TIEABLE_P
1727 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1728
1729 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1730 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1731 rs6000_hard_regno_call_part_clobbered
1732
1733 #undef TARGET_SLOW_UNALIGNED_ACCESS
1734 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1735
1736 #undef TARGET_CAN_CHANGE_MODE_CLASS
1737 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1738
1739 #undef TARGET_CONSTANT_ALIGNMENT
1740 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1741
1742 #undef TARGET_STARTING_FRAME_OFFSET
1743 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1744
1745 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1746 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1747
1748 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1749 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1750
1751 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
1752 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
1753 rs6000_cannot_substitute_mem_equiv_p
1754
1755 #undef TARGET_INVALID_CONVERSION
1756 #define TARGET_INVALID_CONVERSION rs6000_invalid_conversion
1757
1758 #undef TARGET_NEED_IPA_FN_TARGET_INFO
1759 #define TARGET_NEED_IPA_FN_TARGET_INFO rs6000_need_ipa_fn_target_info
1760
1761 #undef TARGET_UPDATE_IPA_FN_TARGET_INFO
1762 #define TARGET_UPDATE_IPA_FN_TARGET_INFO rs6000_update_ipa_fn_target_info
1763 \f
1764
1765 /* Processor table. */
1766 struct rs6000_ptt
1767 {
1768 const char *const name; /* Canonical processor name. */
1769 const enum processor_type processor; /* Processor type enum value. */
1770 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1771 };
1772
1773 static struct rs6000_ptt const processor_target_table[] =
1774 {
1775 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1776 #include "rs6000-cpus.def"
1777 #undef RS6000_CPU
1778 };
1779
1780 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1781 name is invalid. */
1782
1783 static int
1784 rs6000_cpu_name_lookup (const char *name)
1785 {
1786 size_t i;
1787
1788 if (name != NULL)
1789 {
1790 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1791 if (! strcmp (name, processor_target_table[i].name))
1792 return (int)i;
1793 }
1794
1795 return -1;
1796 }
1797
1798 \f
1799 /* Return number of consecutive hard regs needed starting at reg REGNO
1800 to hold something of mode MODE.
1801 This is ordinarily the length in words of a value of mode MODE
1802 but can be less for certain modes in special long registers.
1803
1804 POWER and PowerPC GPRs hold 32 bits worth;
1805 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1806
1807 static int
1808 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1809 {
1810 unsigned HOST_WIDE_INT reg_size;
1811
1812 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1813 128-bit floating point that can go in vector registers, which has VSX
1814 memory addressing. */
1815 if (FP_REGNO_P (regno))
1816 reg_size = (VECTOR_MEM_VSX_P (mode) || VECTOR_ALIGNMENT_P (mode)
1817 ? UNITS_PER_VSX_WORD
1818 : UNITS_PER_FP_WORD);
1819
1820 else if (ALTIVEC_REGNO_P (regno))
1821 reg_size = UNITS_PER_ALTIVEC_WORD;
1822
1823 else
1824 reg_size = UNITS_PER_WORD;
1825
1826 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1827 }
1828
1829 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1830 MODE. */
1831 static int
1832 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
1833 {
1834 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1835
1836 if (COMPLEX_MODE_P (mode))
1837 mode = GET_MODE_INNER (mode);
1838
1839 /* Vector pair modes need even/odd VSX register pairs. Only allow vector
1840 registers. */
1841 if (mode == OOmode)
1842 return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0);
1843
1844 /* MMA accumulator modes need FPR registers divisible by 4. */
1845 if (mode == XOmode)
1846 return (TARGET_MMA && FP_REGNO_P (regno) && (regno & 3) == 0);
1847
1848 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1849 register combinations, and use PTImode where we need to deal with quad
1850 word memory operations. Don't allow quad words in the argument or frame
1851 pointer registers, just registers 0..31. */
1852 if (mode == PTImode)
1853 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1854 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1855 && ((regno & 1) == 0));
1856
1857 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1858 implementations. Don't allow an item to be split between a FP register
1859 and an Altivec register. Allow TImode in all VSX registers if the user
1860 asked for it. */
1861 if (TARGET_VSX && VSX_REGNO_P (regno)
1862 && (VECTOR_MEM_VSX_P (mode)
1863 || VECTOR_ALIGNMENT_P (mode)
1864 || reg_addr[mode].scalar_in_vmx_p
1865 || mode == TImode
1866 || (TARGET_VADDUQM && mode == V1TImode)))
1867 {
1868 if (FP_REGNO_P (regno))
1869 return FP_REGNO_P (last_regno);
1870
1871 if (ALTIVEC_REGNO_P (regno))
1872 {
1873 if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p)
1874 return 0;
1875
1876 return ALTIVEC_REGNO_P (last_regno);
1877 }
1878 }
1879
1880 /* The GPRs can hold any mode, but values bigger than one register
1881 cannot go past R31. */
1882 if (INT_REGNO_P (regno))
1883 return INT_REGNO_P (last_regno);
1884
1885 /* The float registers (except for VSX vector modes) can only hold floating
1886 modes and DImode. */
1887 if (FP_REGNO_P (regno))
1888 {
1889 if (VECTOR_ALIGNMENT_P (mode))
1890 return false;
1891
1892 if (SCALAR_FLOAT_MODE_P (mode)
1893 && (mode != TDmode || (regno % 2) == 0)
1894 && FP_REGNO_P (last_regno))
1895 return 1;
1896
1897 if (GET_MODE_CLASS (mode) == MODE_INT)
1898 {
1899 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1900 return 1;
1901
1902 if (TARGET_P8_VECTOR && (mode == SImode))
1903 return 1;
1904
1905 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
1906 return 1;
1907 }
1908
1909 return 0;
1910 }
1911
1912 /* The CR register can only hold CC modes. */
1913 if (CR_REGNO_P (regno))
1914 return GET_MODE_CLASS (mode) == MODE_CC;
1915
1916 if (CA_REGNO_P (regno))
1917 return mode == Pmode || mode == SImode;
1918
1919 /* AltiVec only in AldyVec registers. */
1920 if (ALTIVEC_REGNO_P (regno))
1921 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1922 || mode == V1TImode);
1923
1924 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1925 and it must be able to fit within the register set. */
1926
1927 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1928 }
1929
1930 /* Implement TARGET_HARD_REGNO_NREGS. */
1931
1932 static unsigned int
1933 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
1934 {
1935 return rs6000_hard_regno_nregs[mode][regno];
1936 }
1937
1938 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1939
1940 static bool
1941 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
1942 {
1943 return rs6000_hard_regno_mode_ok_p[mode][regno];
1944 }
1945
1946 /* Implement TARGET_MODES_TIEABLE_P.
1947
1948 PTImode cannot tie with other modes because PTImode is restricted to even
1949 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1950 57744).
1951
1952 Similarly, don't allow OOmode (vector pair, restricted to even VSX
1953 registers) or XOmode (vector quad, restricted to FPR registers divisible
1954 by 4) to tie with other modes.
1955
1956 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1957 128-bit floating point on VSX systems ties with other vectors. */
1958
1959 static bool
1960 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
1961 {
1962 if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
1963 || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode)
1964 return mode1 == mode2;
1965
1966 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
1967 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
1968 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
1969 return false;
1970
1971 if (SCALAR_FLOAT_MODE_P (mode1))
1972 return SCALAR_FLOAT_MODE_P (mode2);
1973 if (SCALAR_FLOAT_MODE_P (mode2))
1974 return false;
1975
1976 if (GET_MODE_CLASS (mode1) == MODE_CC)
1977 return GET_MODE_CLASS (mode2) == MODE_CC;
1978 if (GET_MODE_CLASS (mode2) == MODE_CC)
1979 return false;
1980
1981 return true;
1982 }
1983
1984 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
1985
1986 static bool
1987 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
1988 machine_mode mode)
1989 {
1990 if (TARGET_32BIT
1991 && TARGET_POWERPC64
1992 && GET_MODE_SIZE (mode) > 4
1993 && INT_REGNO_P (regno))
1994 return true;
1995
1996 if (TARGET_VSX
1997 && FP_REGNO_P (regno)
1998 && GET_MODE_SIZE (mode) > 8
1999 && !FLOAT128_2REG_P (mode))
2000 return true;
2001
2002 return false;
2003 }
2004
2005 /* Print interesting facts about registers. */
2006 static void
2007 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2008 {
2009 int r, m;
2010
2011 for (r = first_regno; r <= last_regno; ++r)
2012 {
2013 const char *comma = "";
2014 int len;
2015
2016 if (first_regno == last_regno)
2017 fprintf (stderr, "%s:\t", reg_name);
2018 else
2019 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2020
2021 len = 8;
2022 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2023 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2024 {
2025 if (len > 70)
2026 {
2027 fprintf (stderr, ",\n\t");
2028 len = 8;
2029 comma = "";
2030 }
2031
2032 if (rs6000_hard_regno_nregs[m][r] > 1)
2033 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2034 rs6000_hard_regno_nregs[m][r]);
2035 else
2036 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2037
2038 comma = ", ";
2039 }
2040
2041 if (call_used_or_fixed_reg_p (r))
2042 {
2043 if (len > 70)
2044 {
2045 fprintf (stderr, ",\n\t");
2046 len = 8;
2047 comma = "";
2048 }
2049
2050 len += fprintf (stderr, "%s%s", comma, "call-used");
2051 comma = ", ";
2052 }
2053
2054 if (fixed_regs[r])
2055 {
2056 if (len > 70)
2057 {
2058 fprintf (stderr, ",\n\t");
2059 len = 8;
2060 comma = "";
2061 }
2062
2063 len += fprintf (stderr, "%s%s", comma, "fixed");
2064 comma = ", ";
2065 }
2066
2067 if (len > 70)
2068 {
2069 fprintf (stderr, ",\n\t");
2070 comma = "";
2071 }
2072
2073 len += fprintf (stderr, "%sreg-class = %s", comma,
2074 reg_class_names[(int)rs6000_regno_regclass[r]]);
2075 comma = ", ";
2076
2077 if (len > 70)
2078 {
2079 fprintf (stderr, ",\n\t");
2080 comma = "";
2081 }
2082
2083 fprintf (stderr, "%sregno = %d\n", comma, r);
2084 }
2085 }
2086
2087 static const char *
2088 rs6000_debug_vector_unit (enum rs6000_vector v)
2089 {
2090 const char *ret;
2091
2092 switch (v)
2093 {
2094 case VECTOR_NONE: ret = "none"; break;
2095 case VECTOR_ALTIVEC: ret = "altivec"; break;
2096 case VECTOR_VSX: ret = "vsx"; break;
2097 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2098 default: ret = "unknown"; break;
2099 }
2100
2101 return ret;
2102 }
2103
2104 /* Inner function printing just the address mask for a particular reload
2105 register class. */
2106 DEBUG_FUNCTION char *
2107 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2108 {
2109 static char ret[8];
2110 char *p = ret;
2111
2112 if ((mask & RELOAD_REG_VALID) != 0)
2113 *p++ = 'v';
2114 else if (keep_spaces)
2115 *p++ = ' ';
2116
2117 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2118 *p++ = 'm';
2119 else if (keep_spaces)
2120 *p++ = ' ';
2121
2122 if ((mask & RELOAD_REG_INDEXED) != 0)
2123 *p++ = 'i';
2124 else if (keep_spaces)
2125 *p++ = ' ';
2126
2127 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2128 *p++ = 'O';
2129 else if ((mask & RELOAD_REG_OFFSET) != 0)
2130 *p++ = 'o';
2131 else if (keep_spaces)
2132 *p++ = ' ';
2133
2134 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2135 *p++ = '+';
2136 else if (keep_spaces)
2137 *p++ = ' ';
2138
2139 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2140 *p++ = '+';
2141 else if (keep_spaces)
2142 *p++ = ' ';
2143
2144 if ((mask & RELOAD_REG_AND_M16) != 0)
2145 *p++ = '&';
2146 else if (keep_spaces)
2147 *p++ = ' ';
2148
2149 *p = '\0';
2150
2151 return ret;
2152 }
2153
2154 /* Print the address masks in a human readble fashion. */
2155 DEBUG_FUNCTION void
2156 rs6000_debug_print_mode (ssize_t m)
2157 {
2158 ssize_t rc;
2159 int spaces = 0;
2160
2161 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2162 for (rc = 0; rc < N_RELOAD_REG; rc++)
2163 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2164 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2165
2166 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2167 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2168 {
2169 fprintf (stderr, "%*s Reload=%c%c", spaces, "",
2170 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2171 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2172 spaces = 0;
2173 }
2174 else
2175 spaces += strlen (" Reload=sl");
2176
2177 if (reg_addr[m].scalar_in_vmx_p)
2178 {
2179 fprintf (stderr, "%*s Upper=y", spaces, "");
2180 spaces = 0;
2181 }
2182 else
2183 spaces += strlen (" Upper=y");
2184
2185 if (rs6000_vector_unit[m] != VECTOR_NONE
2186 || rs6000_vector_mem[m] != VECTOR_NONE)
2187 {
2188 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2189 spaces, "",
2190 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2191 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2192 }
2193
2194 fputs ("\n", stderr);
2195 }
2196
2197 #define DEBUG_FMT_ID "%-32s= "
2198 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2199 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2200 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2201
2202 /* Print various interesting information with -mdebug=reg. */
2203 static void
2204 rs6000_debug_reg_global (void)
2205 {
2206 static const char *const tf[2] = { "false", "true" };
2207 const char *nl = (const char *)0;
2208 int m;
2209 size_t m1, m2, v;
2210 char costly_num[20];
2211 char nop_num[20];
2212 char flags_buffer[40];
2213 const char *costly_str;
2214 const char *nop_str;
2215 const char *trace_str;
2216 const char *abi_str;
2217 const char *cmodel_str;
2218 struct cl_target_option cl_opts;
2219
2220 /* Modes we want tieable information on. */
2221 static const machine_mode print_tieable_modes[] = {
2222 QImode,
2223 HImode,
2224 SImode,
2225 DImode,
2226 TImode,
2227 PTImode,
2228 SFmode,
2229 DFmode,
2230 TFmode,
2231 IFmode,
2232 KFmode,
2233 SDmode,
2234 DDmode,
2235 TDmode,
2236 V2SImode,
2237 V2SFmode,
2238 V16QImode,
2239 V8HImode,
2240 V4SImode,
2241 V2DImode,
2242 V1TImode,
2243 V32QImode,
2244 V16HImode,
2245 V8SImode,
2246 V4DImode,
2247 V2TImode,
2248 V4SFmode,
2249 V2DFmode,
2250 V8SFmode,
2251 V4DFmode,
2252 OOmode,
2253 XOmode,
2254 CCmode,
2255 CCUNSmode,
2256 CCEQmode,
2257 CCFPmode,
2258 };
2259
2260 /* Virtual regs we are interested in. */
2261 const static struct {
2262 int regno; /* register number. */
2263 const char *name; /* register name. */
2264 } virtual_regs[] = {
2265 { STACK_POINTER_REGNUM, "stack pointer:" },
2266 { TOC_REGNUM, "toc: " },
2267 { STATIC_CHAIN_REGNUM, "static chain: " },
2268 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2269 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2270 { ARG_POINTER_REGNUM, "arg pointer: " },
2271 { FRAME_POINTER_REGNUM, "frame pointer:" },
2272 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2273 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2274 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2275 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2276 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2277 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2278 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2279 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2280 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2281 };
2282
2283 fputs ("\nHard register information:\n", stderr);
2284 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2285 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2286 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2287 LAST_ALTIVEC_REGNO,
2288 "vs");
2289 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2290 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2291 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2292 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2293 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2294 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2295
2296 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2297 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2298 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2299
2300 fprintf (stderr,
2301 "\n"
2302 "d reg_class = %s\n"
2303 "v reg_class = %s\n"
2304 "wa reg_class = %s\n"
2305 "we reg_class = %s\n"
2306 "wr reg_class = %s\n"
2307 "wx reg_class = %s\n"
2308 "wA reg_class = %s\n"
2309 "\n",
2310 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2311 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2312 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2313 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2314 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2315 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2316 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
2317
2318 nl = "\n";
2319 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2320 rs6000_debug_print_mode (m);
2321
2322 fputs ("\n", stderr);
2323
2324 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2325 {
2326 machine_mode mode1 = print_tieable_modes[m1];
2327 bool first_time = true;
2328
2329 nl = (const char *)0;
2330 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2331 {
2332 machine_mode mode2 = print_tieable_modes[m2];
2333 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2334 {
2335 if (first_time)
2336 {
2337 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2338 nl = "\n";
2339 first_time = false;
2340 }
2341
2342 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2343 }
2344 }
2345
2346 if (!first_time)
2347 fputs ("\n", stderr);
2348 }
2349
2350 if (nl)
2351 fputs (nl, stderr);
2352
2353 if (rs6000_recip_control)
2354 {
2355 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2356
2357 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2358 if (rs6000_recip_bits[m])
2359 {
2360 fprintf (stderr,
2361 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2362 GET_MODE_NAME (m),
2363 (RS6000_RECIP_AUTO_RE_P (m)
2364 ? "auto"
2365 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2366 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2367 ? "auto"
2368 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2369 }
2370
2371 fputs ("\n", stderr);
2372 }
2373
2374 if (rs6000_cpu_index >= 0)
2375 {
2376 const char *name = processor_target_table[rs6000_cpu_index].name;
2377 HOST_WIDE_INT flags
2378 = processor_target_table[rs6000_cpu_index].target_enable;
2379
2380 sprintf (flags_buffer, "-mcpu=%s flags", name);
2381 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2382 }
2383 else
2384 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2385
2386 if (rs6000_tune_index >= 0)
2387 {
2388 const char *name = processor_target_table[rs6000_tune_index].name;
2389 HOST_WIDE_INT flags
2390 = processor_target_table[rs6000_tune_index].target_enable;
2391
2392 sprintf (flags_buffer, "-mtune=%s flags", name);
2393 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2394 }
2395 else
2396 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2397
2398 cl_target_option_save (&cl_opts, &global_options, &global_options_set);
2399 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2400 rs6000_isa_flags);
2401
2402 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2403 rs6000_isa_flags_explicit);
2404
2405 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2406
2407 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2408 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2409
2410 switch (rs6000_sched_costly_dep)
2411 {
2412 case max_dep_latency:
2413 costly_str = "max_dep_latency";
2414 break;
2415
2416 case no_dep_costly:
2417 costly_str = "no_dep_costly";
2418 break;
2419
2420 case all_deps_costly:
2421 costly_str = "all_deps_costly";
2422 break;
2423
2424 case true_store_to_load_dep_costly:
2425 costly_str = "true_store_to_load_dep_costly";
2426 break;
2427
2428 case store_to_load_dep_costly:
2429 costly_str = "store_to_load_dep_costly";
2430 break;
2431
2432 default:
2433 costly_str = costly_num;
2434 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2435 break;
2436 }
2437
2438 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2439
2440 switch (rs6000_sched_insert_nops)
2441 {
2442 case sched_finish_regroup_exact:
2443 nop_str = "sched_finish_regroup_exact";
2444 break;
2445
2446 case sched_finish_pad_groups:
2447 nop_str = "sched_finish_pad_groups";
2448 break;
2449
2450 case sched_finish_none:
2451 nop_str = "sched_finish_none";
2452 break;
2453
2454 default:
2455 nop_str = nop_num;
2456 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2457 break;
2458 }
2459
2460 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2461
2462 switch (rs6000_sdata)
2463 {
2464 default:
2465 case SDATA_NONE:
2466 break;
2467
2468 case SDATA_DATA:
2469 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2470 break;
2471
2472 case SDATA_SYSV:
2473 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2474 break;
2475
2476 case SDATA_EABI:
2477 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2478 break;
2479
2480 }
2481
2482 switch (rs6000_traceback)
2483 {
2484 case traceback_default: trace_str = "default"; break;
2485 case traceback_none: trace_str = "none"; break;
2486 case traceback_part: trace_str = "part"; break;
2487 case traceback_full: trace_str = "full"; break;
2488 default: trace_str = "unknown"; break;
2489 }
2490
2491 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2492
2493 switch (rs6000_current_cmodel)
2494 {
2495 case CMODEL_SMALL: cmodel_str = "small"; break;
2496 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2497 case CMODEL_LARGE: cmodel_str = "large"; break;
2498 default: cmodel_str = "unknown"; break;
2499 }
2500
2501 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2502
2503 switch (rs6000_current_abi)
2504 {
2505 case ABI_NONE: abi_str = "none"; break;
2506 case ABI_AIX: abi_str = "aix"; break;
2507 case ABI_ELFv2: abi_str = "ELFv2"; break;
2508 case ABI_V4: abi_str = "V4"; break;
2509 case ABI_DARWIN: abi_str = "darwin"; break;
2510 default: abi_str = "unknown"; break;
2511 }
2512
2513 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2514
2515 if (rs6000_altivec_abi)
2516 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2517
2518 if (rs6000_aix_extabi)
2519 fprintf (stderr, DEBUG_FMT_S, "AIX vec-extabi", "true");
2520
2521 if (rs6000_darwin64_abi)
2522 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2523
2524 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2525 (TARGET_SOFT_FLOAT ? "true" : "false"));
2526
2527 if (TARGET_LINK_STACK)
2528 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2529
2530 if (TARGET_P8_FUSION)
2531 {
2532 char options[80];
2533
2534 strcpy (options, "power8");
2535 if (TARGET_P8_FUSION_SIGN)
2536 strcat (options, ", sign");
2537
2538 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2539 }
2540
2541 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2542 TARGET_SECURE_PLT ? "secure" : "bss");
2543 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2544 aix_struct_return ? "aix" : "sysv");
2545 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2546 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2547 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2548 tf[!!rs6000_align_branch_targets]);
2549 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2550 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2551 rs6000_long_double_type_size);
2552 if (rs6000_long_double_type_size > 64)
2553 {
2554 fprintf (stderr, DEBUG_FMT_S, "long double type",
2555 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2556 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2557 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2558 }
2559 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2560 (int)rs6000_sched_restricted_insns_priority);
2561 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2562 (int)END_BUILTINS);
2563
2564 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2565 (int)TARGET_FLOAT128_ENABLE_TYPE);
2566
2567 if (TARGET_VSX)
2568 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2569 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2570
2571 if (TARGET_DIRECT_MOVE_128)
2572 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2573 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2574 }
2575
2576 \f
2577 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2578 legitimate address support to figure out the appropriate addressing to
2579 use. */
2580
2581 static void
2582 rs6000_setup_reg_addr_masks (void)
2583 {
2584 ssize_t rc, reg, m, nregs;
2585 addr_mask_type any_addr_mask, addr_mask;
2586
2587 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2588 {
2589 machine_mode m2 = (machine_mode) m;
2590 bool complex_p = false;
2591 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2592 size_t msize;
2593
2594 if (COMPLEX_MODE_P (m2))
2595 {
2596 complex_p = true;
2597 m2 = GET_MODE_INNER (m2);
2598 }
2599
2600 msize = GET_MODE_SIZE (m2);
2601
2602 /* SDmode is special in that we want to access it only via REG+REG
2603 addressing on power7 and above, since we want to use the LFIWZX and
2604 STFIWZX instructions to load it. */
2605 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2606
2607 any_addr_mask = 0;
2608 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2609 {
2610 addr_mask = 0;
2611 reg = reload_reg_map[rc].reg;
2612
2613 /* Can mode values go in the GPR/FPR/Altivec registers? */
2614 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2615 {
2616 bool small_int_vsx_p = (small_int_p
2617 && (rc == RELOAD_REG_FPR
2618 || rc == RELOAD_REG_VMX));
2619
2620 nregs = rs6000_hard_regno_nregs[m][reg];
2621 addr_mask |= RELOAD_REG_VALID;
2622
2623 /* Indicate if the mode takes more than 1 physical register. If
2624 it takes a single register, indicate it can do REG+REG
2625 addressing. Small integers in VSX registers can only do
2626 REG+REG addressing. */
2627 if (small_int_vsx_p)
2628 addr_mask |= RELOAD_REG_INDEXED;
2629 else if (nregs > 1 || m == BLKmode || complex_p)
2630 addr_mask |= RELOAD_REG_MULTIPLE;
2631 else
2632 addr_mask |= RELOAD_REG_INDEXED;
2633
2634 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2635 addressing. If we allow scalars into Altivec registers,
2636 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2637
2638 For VSX systems, we don't allow update addressing for
2639 DFmode/SFmode if those registers can go in both the
2640 traditional floating point registers and Altivec registers.
2641 The load/store instructions for the Altivec registers do not
2642 have update forms. If we allowed update addressing, it seems
2643 to break IV-OPT code using floating point if the index type is
2644 int instead of long (PR target/81550 and target/84042). */
2645
2646 if (TARGET_UPDATE
2647 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2648 && msize <= 8
2649 && !VECTOR_MODE_P (m2)
2650 && !VECTOR_ALIGNMENT_P (m2)
2651 && !complex_p
2652 && (m != E_DFmode || !TARGET_VSX)
2653 && (m != E_SFmode || !TARGET_P8_VECTOR)
2654 && !small_int_vsx_p)
2655 {
2656 addr_mask |= RELOAD_REG_PRE_INCDEC;
2657
2658 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2659 we don't allow PRE_MODIFY for some multi-register
2660 operations. */
2661 switch (m)
2662 {
2663 default:
2664 addr_mask |= RELOAD_REG_PRE_MODIFY;
2665 break;
2666
2667 case E_DImode:
2668 if (TARGET_POWERPC64)
2669 addr_mask |= RELOAD_REG_PRE_MODIFY;
2670 break;
2671
2672 case E_DFmode:
2673 case E_DDmode:
2674 if (TARGET_HARD_FLOAT)
2675 addr_mask |= RELOAD_REG_PRE_MODIFY;
2676 break;
2677 }
2678 }
2679 }
2680
2681 /* GPR and FPR registers can do REG+OFFSET addressing, except
2682 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2683 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2684 if ((addr_mask != 0) && !indexed_only_p
2685 && msize <= 8
2686 && (rc == RELOAD_REG_GPR
2687 || ((msize == 8 || m2 == SFmode)
2688 && (rc == RELOAD_REG_FPR
2689 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2690 addr_mask |= RELOAD_REG_OFFSET;
2691
2692 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2693 instructions are enabled. The offset for 128-bit VSX registers is
2694 only 12-bits. While GPRs can handle the full offset range, VSX
2695 registers can only handle the restricted range. */
2696 else if ((addr_mask != 0) && !indexed_only_p
2697 && msize == 16 && TARGET_P9_VECTOR
2698 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2699 || (m2 == TImode && TARGET_VSX)))
2700 {
2701 addr_mask |= RELOAD_REG_OFFSET;
2702 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2703 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2704 }
2705
2706 /* Vector pairs can do both indexed and offset loads if the
2707 instructions are enabled, otherwise they can only do offset loads
2708 since it will be broken into two vector moves. Vector quads can
2709 only do offset loads. */
2710 else if ((addr_mask != 0) && TARGET_MMA
2711 && (m2 == OOmode || m2 == XOmode))
2712 {
2713 addr_mask |= RELOAD_REG_OFFSET;
2714 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2715 {
2716 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2717 if (m2 == OOmode)
2718 addr_mask |= RELOAD_REG_INDEXED;
2719 }
2720 }
2721
2722 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2723 addressing on 128-bit types. */
2724 if (rc == RELOAD_REG_VMX && msize == 16
2725 && (addr_mask & RELOAD_REG_VALID) != 0)
2726 addr_mask |= RELOAD_REG_AND_M16;
2727
2728 reg_addr[m].addr_mask[rc] = addr_mask;
2729 any_addr_mask |= addr_mask;
2730 }
2731
2732 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2733 }
2734 }
2735
2736 \f
2737 /* Initialize the various global tables that are based on register size. */
2738 static void
2739 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2740 {
2741 ssize_t r, m, c;
2742 int align64;
2743 int align32;
2744
2745 /* Precalculate REGNO_REG_CLASS. */
2746 rs6000_regno_regclass[0] = GENERAL_REGS;
2747 for (r = 1; r < 32; ++r)
2748 rs6000_regno_regclass[r] = BASE_REGS;
2749
2750 for (r = 32; r < 64; ++r)
2751 rs6000_regno_regclass[r] = FLOAT_REGS;
2752
2753 for (r = 64; HARD_REGISTER_NUM_P (r); ++r)
2754 rs6000_regno_regclass[r] = NO_REGS;
2755
2756 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2757 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2758
2759 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2760 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2761 rs6000_regno_regclass[r] = CR_REGS;
2762
2763 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2764 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2765 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2766 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2767 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2768 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2769 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2770
2771 /* Precalculate register class to simpler reload register class. We don't
2772 need all of the register classes that are combinations of different
2773 classes, just the simple ones that have constraint letters. */
2774 for (c = 0; c < N_REG_CLASSES; c++)
2775 reg_class_to_reg_type[c] = NO_REG_TYPE;
2776
2777 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2778 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2779 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2780 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2781 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2782 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2783 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2784 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2785 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2786 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2787
2788 if (TARGET_VSX)
2789 {
2790 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2791 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2792 }
2793 else
2794 {
2795 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2796 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2797 }
2798
2799 /* Precalculate the valid memory formats as well as the vector information,
2800 this must be set up before the rs6000_hard_regno_nregs_internal calls
2801 below. */
2802 gcc_assert ((int)VECTOR_NONE == 0);
2803 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2804 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem));
2805
2806 gcc_assert ((int)CODE_FOR_nothing == 0);
2807 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2808
2809 gcc_assert ((int)NO_REGS == 0);
2810 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2811
2812 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2813 believes it can use native alignment or still uses 128-bit alignment. */
2814 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2815 {
2816 align64 = 64;
2817 align32 = 32;
2818 }
2819 else
2820 {
2821 align64 = 128;
2822 align32 = 128;
2823 }
2824
2825 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2826 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2827 if (TARGET_FLOAT128_TYPE)
2828 {
2829 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2830 rs6000_vector_align[KFmode] = 128;
2831
2832 if (FLOAT128_IEEE_P (TFmode))
2833 {
2834 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2835 rs6000_vector_align[TFmode] = 128;
2836 }
2837 }
2838
2839 /* V2DF mode, VSX only. */
2840 if (TARGET_VSX)
2841 {
2842 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2843 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2844 rs6000_vector_align[V2DFmode] = align64;
2845 }
2846
2847 /* V4SF mode, either VSX or Altivec. */
2848 if (TARGET_VSX)
2849 {
2850 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2851 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2852 rs6000_vector_align[V4SFmode] = align32;
2853 }
2854 else if (TARGET_ALTIVEC)
2855 {
2856 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2857 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2858 rs6000_vector_align[V4SFmode] = align32;
2859 }
2860
2861 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2862 and stores. */
2863 if (TARGET_ALTIVEC)
2864 {
2865 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2866 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2867 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2868 rs6000_vector_align[V4SImode] = align32;
2869 rs6000_vector_align[V8HImode] = align32;
2870 rs6000_vector_align[V16QImode] = align32;
2871
2872 if (TARGET_VSX)
2873 {
2874 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2875 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2876 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2877 }
2878 else
2879 {
2880 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2881 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2882 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2883 }
2884 }
2885
2886 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2887 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2888 if (TARGET_VSX)
2889 {
2890 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2891 rs6000_vector_unit[V2DImode]
2892 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2893 rs6000_vector_align[V2DImode] = align64;
2894
2895 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2896 rs6000_vector_unit[V1TImode]
2897 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2898 rs6000_vector_align[V1TImode] = 128;
2899 }
2900
2901 /* DFmode, see if we want to use the VSX unit. Memory is handled
2902 differently, so don't set rs6000_vector_mem. */
2903 if (TARGET_VSX)
2904 {
2905 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2906 rs6000_vector_align[DFmode] = 64;
2907 }
2908
2909 /* SFmode, see if we want to use the VSX unit. */
2910 if (TARGET_P8_VECTOR)
2911 {
2912 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2913 rs6000_vector_align[SFmode] = 32;
2914 }
2915
2916 /* Allow TImode in VSX register and set the VSX memory macros. */
2917 if (TARGET_VSX)
2918 {
2919 rs6000_vector_mem[TImode] = VECTOR_VSX;
2920 rs6000_vector_align[TImode] = align64;
2921 }
2922
2923 /* Add support for vector pairs and vector quad registers. */
2924 if (TARGET_MMA)
2925 {
2926 rs6000_vector_unit[OOmode] = VECTOR_NONE;
2927 rs6000_vector_mem[OOmode] = VECTOR_VSX;
2928 rs6000_vector_align[OOmode] = 256;
2929
2930 rs6000_vector_unit[XOmode] = VECTOR_NONE;
2931 rs6000_vector_mem[XOmode] = VECTOR_VSX;
2932 rs6000_vector_align[XOmode] = 512;
2933 }
2934
2935 /* Register class constraints for the constraints that depend on compile
2936 switches. When the VSX code was added, different constraints were added
2937 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2938 of the VSX registers are used. The register classes for scalar floating
2939 point types is set, based on whether we allow that type into the upper
2940 (Altivec) registers. GCC has register classes to target the Altivec
2941 registers for load/store operations, to select using a VSX memory
2942 operation instead of the traditional floating point operation. The
2943 constraints are:
2944
2945 d - Register class to use with traditional DFmode instructions.
2946 v - Altivec register.
2947 wa - Any VSX register.
2948 wc - Reserved to represent individual CR bits (used in LLVM).
2949 wn - always NO_REGS.
2950 wr - GPR if 64-bit mode is permitted.
2951 wx - Float register if we can do 32-bit int stores. */
2952
2953 if (TARGET_HARD_FLOAT)
2954 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS;
2955 if (TARGET_ALTIVEC)
2956 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2957 if (TARGET_VSX)
2958 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2959
2960 if (TARGET_POWERPC64)
2961 {
2962 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2963 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
2964 }
2965
2966 if (TARGET_STFIWX)
2967 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2968
2969 /* Support for new direct moves (ISA 3.0 + 64bit). */
2970 if (TARGET_DIRECT_MOVE_128)
2971 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
2972
2973 /* Set up the reload helper and direct move functions. */
2974 if (TARGET_VSX || TARGET_ALTIVEC)
2975 {
2976 if (TARGET_64BIT)
2977 {
2978 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2979 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2980 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2981 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
2982 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
2983 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
2984 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
2985 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
2986 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
2987 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
2988 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
2989 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
2990 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
2991 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
2992 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
2993 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
2994 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
2995 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
2996 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
2997 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
2998
2999 if (FLOAT128_VECTOR_P (KFmode))
3000 {
3001 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3002 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3003 }
3004
3005 if (FLOAT128_VECTOR_P (TFmode))
3006 {
3007 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3008 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3009 }
3010
3011 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3012 available. */
3013 if (TARGET_NO_SDMODE_STACK)
3014 {
3015 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3016 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3017 }
3018
3019 if (TARGET_VSX)
3020 {
3021 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3022 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3023 }
3024
3025 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3026 {
3027 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3028 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3029 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3030 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3031 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3032 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3033 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3034 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3035 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3036
3037 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3038 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3039 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3040 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3041 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3042 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3043 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3044 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3045 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3046
3047 if (FLOAT128_VECTOR_P (KFmode))
3048 {
3049 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3050 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3051 }
3052
3053 if (FLOAT128_VECTOR_P (TFmode))
3054 {
3055 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3056 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3057 }
3058
3059 if (TARGET_MMA)
3060 {
3061 reg_addr[OOmode].reload_store = CODE_FOR_reload_oo_di_store;
3062 reg_addr[OOmode].reload_load = CODE_FOR_reload_oo_di_load;
3063 reg_addr[XOmode].reload_store = CODE_FOR_reload_xo_di_store;
3064 reg_addr[XOmode].reload_load = CODE_FOR_reload_xo_di_load;
3065 }
3066 }
3067 }
3068 else
3069 {
3070 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3071 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3072 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3073 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3074 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3075 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3076 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3077 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3078 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3079 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3080 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3081 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3082 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3083 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3084 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3085 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3086 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3087 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3088 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3089 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3090
3091 if (FLOAT128_VECTOR_P (KFmode))
3092 {
3093 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3094 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3095 }
3096
3097 if (FLOAT128_IEEE_P (TFmode))
3098 {
3099 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3100 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3101 }
3102
3103 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3104 available. */
3105 if (TARGET_NO_SDMODE_STACK)
3106 {
3107 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3108 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3109 }
3110
3111 if (TARGET_VSX)
3112 {
3113 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3114 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3115 }
3116
3117 if (TARGET_DIRECT_MOVE)
3118 {
3119 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3120 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3121 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3122 }
3123 }
3124
3125 reg_addr[DFmode].scalar_in_vmx_p = true;
3126 reg_addr[DImode].scalar_in_vmx_p = true;
3127
3128 if (TARGET_P8_VECTOR)
3129 {
3130 reg_addr[SFmode].scalar_in_vmx_p = true;
3131 reg_addr[SImode].scalar_in_vmx_p = true;
3132
3133 if (TARGET_P9_VECTOR)
3134 {
3135 reg_addr[HImode].scalar_in_vmx_p = true;
3136 reg_addr[QImode].scalar_in_vmx_p = true;
3137 }
3138 }
3139 }
3140
3141 /* Precalculate HARD_REGNO_NREGS. */
3142 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3143 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3144 rs6000_hard_regno_nregs[m][r]
3145 = rs6000_hard_regno_nregs_internal (r, (machine_mode) m);
3146
3147 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3148 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3149 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3150 rs6000_hard_regno_mode_ok_p[m][r]
3151 = rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m);
3152
3153 /* Precalculate CLASS_MAX_NREGS sizes. */
3154 for (c = 0; c < LIM_REG_CLASSES; ++c)
3155 {
3156 int reg_size;
3157
3158 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3159 reg_size = UNITS_PER_VSX_WORD;
3160
3161 else if (c == ALTIVEC_REGS)
3162 reg_size = UNITS_PER_ALTIVEC_WORD;
3163
3164 else if (c == FLOAT_REGS)
3165 reg_size = UNITS_PER_FP_WORD;
3166
3167 else
3168 reg_size = UNITS_PER_WORD;
3169
3170 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3171 {
3172 machine_mode m2 = (machine_mode)m;
3173 int reg_size2 = reg_size;
3174
3175 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3176 in VSX. */
3177 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3178 reg_size2 = UNITS_PER_FP_WORD;
3179
3180 rs6000_class_max_nregs[m][c]
3181 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3182 }
3183 }
3184
3185 /* Calculate which modes to automatically generate code to use a the
3186 reciprocal divide and square root instructions. In the future, possibly
3187 automatically generate the instructions even if the user did not specify
3188 -mrecip. The older machines double precision reciprocal sqrt estimate is
3189 not accurate enough. */
3190 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3191 if (TARGET_FRES)
3192 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3193 if (TARGET_FRE)
3194 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3195 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3196 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3197 if (VECTOR_UNIT_VSX_P (V2DFmode))
3198 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3199
3200 if (TARGET_FRSQRTES)
3201 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3202 if (TARGET_FRSQRTE)
3203 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3204 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3205 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3206 if (VECTOR_UNIT_VSX_P (V2DFmode))
3207 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3208
3209 if (rs6000_recip_control)
3210 {
3211 if (!flag_finite_math_only)
3212 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3213 "-ffast-math");
3214 if (flag_trapping_math)
3215 warning (0, "%qs requires %qs or %qs", "-mrecip",
3216 "-fno-trapping-math", "-ffast-math");
3217 if (!flag_reciprocal_math)
3218 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3219 "-ffast-math");
3220 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3221 {
3222 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3223 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3224 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3225
3226 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3227 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3228 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3229
3230 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3231 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3232 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3233
3234 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3235 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3236 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3237
3238 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3239 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3240 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3241
3242 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3243 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3244 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3245
3246 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3247 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3248 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3249
3250 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3251 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3252 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3253 }
3254 }
3255
3256 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3257 legitimate address support to figure out the appropriate addressing to
3258 use. */
3259 rs6000_setup_reg_addr_masks ();
3260
3261 if (global_init_p || TARGET_DEBUG_TARGET)
3262 {
3263 if (TARGET_DEBUG_REG)
3264 rs6000_debug_reg_global ();
3265
3266 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3267 fprintf (stderr,
3268 "SImode variable mult cost = %d\n"
3269 "SImode constant mult cost = %d\n"
3270 "SImode short constant mult cost = %d\n"
3271 "DImode multipliciation cost = %d\n"
3272 "SImode division cost = %d\n"
3273 "DImode division cost = %d\n"
3274 "Simple fp operation cost = %d\n"
3275 "DFmode multiplication cost = %d\n"
3276 "SFmode division cost = %d\n"
3277 "DFmode division cost = %d\n"
3278 "cache line size = %d\n"
3279 "l1 cache size = %d\n"
3280 "l2 cache size = %d\n"
3281 "simultaneous prefetches = %d\n"
3282 "\n",
3283 rs6000_cost->mulsi,
3284 rs6000_cost->mulsi_const,
3285 rs6000_cost->mulsi_const9,
3286 rs6000_cost->muldi,
3287 rs6000_cost->divsi,
3288 rs6000_cost->divdi,
3289 rs6000_cost->fp,
3290 rs6000_cost->dmul,
3291 rs6000_cost->sdiv,
3292 rs6000_cost->ddiv,
3293 rs6000_cost->cache_line_size,
3294 rs6000_cost->l1_cache_size,
3295 rs6000_cost->l2_cache_size,
3296 rs6000_cost->simultaneous_prefetches);
3297 }
3298 }
3299
3300 #if TARGET_MACHO
3301 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3302
3303 static void
3304 darwin_rs6000_override_options (void)
3305 {
3306 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3307 off. */
3308 rs6000_altivec_abi = 1;
3309 TARGET_ALTIVEC_VRSAVE = 1;
3310 rs6000_current_abi = ABI_DARWIN;
3311
3312 if (DEFAULT_ABI == ABI_DARWIN
3313 && TARGET_64BIT)
3314 darwin_one_byte_bool = 1;
3315
3316 if (TARGET_64BIT && ! TARGET_POWERPC64)
3317 {
3318 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3319 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3320 }
3321
3322 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3323 optimisation, and will not work with the most generic case (where the
3324 symbol is undefined external, but there is no symbl stub). */
3325 if (TARGET_64BIT)
3326 rs6000_default_long_calls = 0;
3327
3328 /* ld_classic is (so far) still used for kernel (static) code, and supports
3329 the JBSR longcall / branch islands. */
3330 if (flag_mkernel)
3331 {
3332 rs6000_default_long_calls = 1;
3333
3334 /* Allow a kext author to do -mkernel -mhard-float. */
3335 if (! (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT))
3336 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3337 }
3338
3339 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3340 Altivec. */
3341 if (!flag_mkernel && !flag_apple_kext
3342 && TARGET_64BIT
3343 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3344 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3345
3346 /* Unless the user (not the configurer) has explicitly overridden
3347 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3348 G4 unless targeting the kernel. */
3349 if (!flag_mkernel
3350 && !flag_apple_kext
3351 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3352 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3353 && ! OPTION_SET_P (rs6000_cpu_index))
3354 {
3355 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3356 }
3357 }
3358 #endif
3359
3360 /* If not otherwise specified by a target, make 'long double' equivalent to
3361 'double'. */
3362
3363 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3364 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3365 #endif
3366
3367 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3368 to clobber the XER[CA] bit because clobbering that bit without telling
3369 the compiler worked just fine with versions of GCC before GCC 5, and
3370 breaking a lot of older code in ways that are hard to track down is
3371 not such a great idea. */
3372
3373 static rtx_insn *
3374 rs6000_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/,
3375 vec<machine_mode> & /*input_modes*/,
3376 vec<const char *> & /*constraints*/, vec<rtx> &clobbers,
3377 HARD_REG_SET &clobbered_regs, location_t /*loc*/)
3378 {
3379 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3380 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3381 return NULL;
3382 }
3383
3384 /* This target function is similar to the hook TARGET_OPTION_OVERRIDE
3385 but is called when the optimize level is changed via an attribute or
3386 pragma or when it is reset at the end of the code affected by the
3387 attribute or pragma. It is not called at the beginning of compilation
3388 when TARGET_OPTION_OVERRIDE is called so if you want to perform these
3389 actions then, you should have TARGET_OPTION_OVERRIDE call
3390 TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE. */
3391
3392 static void
3393 rs6000_override_options_after_change (void)
3394 {
3395 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
3396 turns -frename-registers on. */
3397 if ((OPTION_SET_P (flag_unroll_loops) && flag_unroll_loops)
3398 || (OPTION_SET_P (flag_unroll_all_loops)
3399 && flag_unroll_all_loops))
3400 {
3401 if (!OPTION_SET_P (unroll_only_small_loops))
3402 unroll_only_small_loops = 0;
3403 if (!OPTION_SET_P (flag_rename_registers))
3404 flag_rename_registers = 1;
3405 if (!OPTION_SET_P (flag_cunroll_grow_size))
3406 flag_cunroll_grow_size = 1;
3407 }
3408 else if (!OPTION_SET_P (flag_cunroll_grow_size))
3409 flag_cunroll_grow_size = flag_peel_loops || optimize >= 3;
3410
3411 /* If we are inserting ROP-protect instructions, disable shrink wrap. */
3412 if (rs6000_rop_protect)
3413 flag_shrink_wrap = 0;
3414 }
3415
3416 #ifdef TARGET_USES_LINUX64_OPT
3417 static void
3418 rs6000_linux64_override_options ()
3419 {
3420 if (!OPTION_SET_P (rs6000_alignment_flags))
3421 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
3422 if (rs6000_isa_flags & OPTION_MASK_64BIT)
3423 {
3424 if (DEFAULT_ABI != ABI_AIX)
3425 {
3426 rs6000_current_abi = ABI_AIX;
3427 error (INVALID_64BIT, "call");
3428 }
3429 dot_symbols = !strcmp (rs6000_abi_name, "aixdesc");
3430 if (ELFv2_ABI_CHECK)
3431 {
3432 rs6000_current_abi = ABI_ELFv2;
3433 if (dot_symbols)
3434 error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>");
3435 }
3436 if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE)
3437 {
3438 rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE;
3439 error (INVALID_64BIT, "relocatable");
3440 }
3441 if (rs6000_isa_flags & OPTION_MASK_EABI)
3442 {
3443 rs6000_isa_flags &= ~OPTION_MASK_EABI;
3444 error (INVALID_64BIT, "eabi");
3445 }
3446 if (TARGET_PROTOTYPE)
3447 {
3448 target_prototype = 0;
3449 error (INVALID_64BIT, "prototype");
3450 }
3451 if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) == 0)
3452 {
3453 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3454 error ("%<-m64%> requires a PowerPC64 cpu");
3455 }
3456 if (!OPTION_SET_P (rs6000_current_cmodel))
3457 SET_CMODEL (CMODEL_MEDIUM);
3458 if ((rs6000_isa_flags_explicit & OPTION_MASK_MINIMAL_TOC) != 0)
3459 {
3460 if (OPTION_SET_P (rs6000_current_cmodel)
3461 && rs6000_current_cmodel != CMODEL_SMALL)
3462 error ("%<-mcmodel%> incompatible with other toc options");
3463 if (TARGET_MINIMAL_TOC)
3464 SET_CMODEL (CMODEL_SMALL);
3465 else if (TARGET_PCREL
3466 || (PCREL_SUPPORTED_BY_OS
3467 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0))
3468 /* Ignore -mno-minimal-toc. */
3469 ;
3470 else
3471 SET_CMODEL (CMODEL_SMALL);
3472 }
3473 if (rs6000_current_cmodel != CMODEL_SMALL)
3474 {
3475 if (!OPTION_SET_P (TARGET_NO_FP_IN_TOC))
3476 TARGET_NO_FP_IN_TOC = rs6000_current_cmodel == CMODEL_MEDIUM;
3477 if (!OPTION_SET_P (TARGET_NO_SUM_IN_TOC))
3478 TARGET_NO_SUM_IN_TOC = 0;
3479 }
3480 if (TARGET_PLTSEQ && DEFAULT_ABI != ABI_ELFv2)
3481 {
3482 if (OPTION_SET_P (rs6000_pltseq))
3483 warning (0, "%qs unsupported for this ABI",
3484 "-mpltseq");
3485 rs6000_pltseq = false;
3486 }
3487 }
3488 else if (TARGET_64BIT)
3489 error (INVALID_32BIT, "32");
3490 else
3491 {
3492 if (TARGET_PROFILE_KERNEL)
3493 {
3494 profile_kernel = 0;
3495 error (INVALID_32BIT, "profile-kernel");
3496 }
3497 if (OPTION_SET_P (rs6000_current_cmodel))
3498 {
3499 SET_CMODEL (CMODEL_SMALL);
3500 error (INVALID_32BIT, "cmodel");
3501 }
3502 }
3503 }
3504 #endif
3505
3506 /* Return true if we are using GLIBC, and it supports IEEE 128-bit long double.
3507 This support is only in little endian GLIBC 2.32 or newer. */
3508 static bool
3509 glibc_supports_ieee_128bit (void)
3510 {
3511 #ifdef OPTION_GLIBC
3512 if (OPTION_GLIBC && !BYTES_BIG_ENDIAN
3513 && ((TARGET_GLIBC_MAJOR * 1000) + TARGET_GLIBC_MINOR) >= 2032)
3514 return true;
3515 #endif /* OPTION_GLIBC. */
3516
3517 return false;
3518 }
3519
3520 /* Override command line options.
3521
3522 Combine build-specific configuration information with options
3523 specified on the command line to set various state variables which
3524 influence code generation, optimization, and expansion of built-in
3525 functions. Assure that command-line configuration preferences are
3526 compatible with each other and with the build configuration; issue
3527 warnings while adjusting configuration or error messages while
3528 rejecting configuration.
3529
3530 Upon entry to this function:
3531
3532 This function is called once at the beginning of
3533 compilation, and then again at the start and end of compiling
3534 each section of code that has a different configuration, as
3535 indicated, for example, by adding the
3536
3537 __attribute__((__target__("cpu=power9")))
3538
3539 qualifier to a function definition or, for example, by bracketing
3540 code between
3541
3542 #pragma GCC target("altivec")
3543
3544 and
3545
3546 #pragma GCC reset_options
3547
3548 directives. Parameter global_init_p is true for the initial
3549 invocation, which initializes global variables, and false for all
3550 subsequent invocations.
3551
3552
3553 Various global state information is assumed to be valid. This
3554 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3555 default CPU specified at build configure time, TARGET_DEFAULT,
3556 representing the default set of option flags for the default
3557 target, and OPTION_SET_P (rs6000_isa_flags), representing
3558 which options were requested on the command line.
3559
3560 Upon return from this function:
3561
3562 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3563 was set by name on the command line. Additionally, if certain
3564 attributes are automatically enabled or disabled by this function
3565 in order to assure compatibility between options and
3566 configuration, the flags associated with those attributes are
3567 also set. By setting these "explicit bits", we avoid the risk
3568 that other code might accidentally overwrite these particular
3569 attributes with "default values".
3570
3571 The various bits of rs6000_isa_flags are set to indicate the
3572 target options that have been selected for the most current
3573 compilation efforts. This has the effect of also turning on the
3574 associated TARGET_XXX values since these are macros which are
3575 generally defined to test the corresponding bit of the
3576 rs6000_isa_flags variable.
3577
3578 Various other global variables and fields of global structures
3579 (over 50 in all) are initialized to reflect the desired options
3580 for the most current compilation efforts. */
3581
3582 static bool
3583 rs6000_option_override_internal (bool global_init_p)
3584 {
3585 bool ret = true;
3586
3587 HOST_WIDE_INT set_masks;
3588 HOST_WIDE_INT ignore_masks;
3589 int cpu_index = -1;
3590 int tune_index;
3591 struct cl_target_option *main_target_opt
3592 = ((global_init_p || target_option_default_node == NULL)
3593 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3594
3595 /* Print defaults. */
3596 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3597 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3598
3599 /* Remember the explicit arguments. */
3600 if (global_init_p)
3601 rs6000_isa_flags_explicit = OPTION_SET_P (rs6000_isa_flags);
3602
3603 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3604 library functions, so warn about it. The flag may be useful for
3605 performance studies from time to time though, so don't disable it
3606 entirely. */
3607 if (OPTION_SET_P (rs6000_alignment_flags)
3608 && rs6000_alignment_flags == MASK_ALIGN_POWER
3609 && DEFAULT_ABI == ABI_DARWIN
3610 && TARGET_64BIT)
3611 warning (0, "%qs is not supported for 64-bit Darwin;"
3612 " it is incompatible with the installed C and C++ libraries",
3613 "-malign-power");
3614
3615 /* Numerous experiment shows that IRA based loop pressure
3616 calculation works better for RTL loop invariant motion on targets
3617 with enough (>= 32) registers. It is an expensive optimization.
3618 So it is on only for peak performance. */
3619 if (optimize >= 3 && global_init_p
3620 && !OPTION_SET_P (flag_ira_loop_pressure))
3621 flag_ira_loop_pressure = 1;
3622
3623 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3624 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3625 options were already specified. */
3626 if (flag_sanitize & SANITIZE_USER_ADDRESS
3627 && !OPTION_SET_P (flag_asynchronous_unwind_tables))
3628 flag_asynchronous_unwind_tables = 1;
3629
3630 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3631 loop unroller is active. It is only checked during unrolling, so
3632 we can just set it on by default. */
3633 if (!OPTION_SET_P (flag_variable_expansion_in_unroller))
3634 flag_variable_expansion_in_unroller = 1;
3635
3636 /* Set the pointer size. */
3637 if (TARGET_64BIT)
3638 {
3639 rs6000_pmode = DImode;
3640 rs6000_pointer_size = 64;
3641 }
3642 else
3643 {
3644 rs6000_pmode = SImode;
3645 rs6000_pointer_size = 32;
3646 }
3647
3648 /* Some OSs don't support saving Altivec registers. On those OSs, we don't
3649 touch the OPTION_MASK_ALTIVEC settings; if the user wants it, the user
3650 must explicitly specify it and we won't interfere with the user's
3651 specification. */
3652
3653 set_masks = POWERPC_MASKS;
3654 #ifdef OS_MISSING_ALTIVEC
3655 if (OS_MISSING_ALTIVEC)
3656 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3657 | OTHER_VSX_VECTOR_MASKS);
3658 #endif
3659
3660 /* Don't override by the processor default if given explicitly. */
3661 set_masks &= ~rs6000_isa_flags_explicit;
3662
3663 /* Without option powerpc64 specified explicitly, we need to ensure
3664 powerpc64 always enabled for 64 bit here, otherwise some following
3665 checks can use unexpected TARGET_POWERPC64 value. */
3666 if (!(rs6000_isa_flags_explicit & OPTION_MASK_POWERPC64)
3667 && TARGET_64BIT)
3668 {
3669 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3670 /* Need to stop powerpc64 from being unset in later processing,
3671 so clear it in set_masks. But as PR108240 shows, to keep it
3672 consistent with before, we want to make this only if 64 bit
3673 is enabled explicitly. This is a hack, revisit this later. */
3674 if (rs6000_isa_flags_explicit & OPTION_MASK_64BIT)
3675 set_masks &= ~OPTION_MASK_POWERPC64;
3676 }
3677
3678 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3679 the cpu in a target attribute or pragma, but did not specify a tuning
3680 option, use the cpu for the tuning option rather than the option specified
3681 with -mtune on the command line. Process a '--with-cpu' configuration
3682 request as an implicit --cpu. */
3683 if (rs6000_cpu_index >= 0)
3684 cpu_index = rs6000_cpu_index;
3685 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3686 cpu_index = main_target_opt->x_rs6000_cpu_index;
3687 else if (OPTION_TARGET_CPU_DEFAULT)
3688 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3689
3690 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3691 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3692 with those from the cpu, except for options that were explicitly set. If
3693 we don't have a cpu, do not override the target bits set in
3694 TARGET_DEFAULT. */
3695 if (cpu_index >= 0)
3696 {
3697 rs6000_cpu_index = cpu_index;
3698 rs6000_isa_flags &= ~set_masks;
3699 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3700 & set_masks);
3701 }
3702 else
3703 {
3704 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3705 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3706 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3707 to using rs6000_isa_flags, we need to do the initialization here.
3708
3709 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3710 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3711 HOST_WIDE_INT flags;
3712 if (TARGET_DEFAULT)
3713 flags = TARGET_DEFAULT;
3714 else
3715 {
3716 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3717 const char *default_cpu = (!TARGET_POWERPC64
3718 ? "powerpc"
3719 : (BYTES_BIG_ENDIAN
3720 ? "powerpc64"
3721 : "powerpc64le"));
3722 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3723 flags = processor_target_table[default_cpu_index].target_enable;
3724 }
3725 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3726 }
3727
3728 /* Don't expect powerpc64 enabled on those OSes with OS_MISSING_POWERPC64,
3729 since they do not save and restore the high half of the GPRs correctly
3730 in all cases. If the user explicitly specifies it, we won't interfere
3731 with the user's specification. */
3732 #ifdef OS_MISSING_POWERPC64
3733 if (OS_MISSING_POWERPC64
3734 && TARGET_32BIT
3735 && TARGET_POWERPC64
3736 && !(rs6000_isa_flags_explicit & OPTION_MASK_POWERPC64))
3737 rs6000_isa_flags &= ~OPTION_MASK_POWERPC64;
3738 #endif
3739
3740 if (rs6000_tune_index >= 0)
3741 tune_index = rs6000_tune_index;
3742 else if (cpu_index >= 0)
3743 rs6000_tune_index = tune_index = cpu_index;
3744 else
3745 {
3746 size_t i;
3747 enum processor_type tune_proc
3748 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3749
3750 tune_index = -1;
3751 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3752 if (processor_target_table[i].processor == tune_proc)
3753 {
3754 tune_index = i;
3755 break;
3756 }
3757 }
3758
3759 if (cpu_index >= 0)
3760 rs6000_cpu = processor_target_table[cpu_index].processor;
3761 else
3762 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3763
3764 gcc_assert (tune_index >= 0);
3765 rs6000_tune = processor_target_table[tune_index].processor;
3766
3767 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3768 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3769 || rs6000_cpu == PROCESSOR_PPCE5500)
3770 {
3771 if (TARGET_ALTIVEC)
3772 error ("AltiVec not supported in this target");
3773 }
3774
3775 /* If we are optimizing big endian systems for space, use the load/store
3776 multiple instructions. */
3777 if (BYTES_BIG_ENDIAN && optimize_size)
3778 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3779
3780 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3781 because the hardware doesn't support the instructions used in little
3782 endian mode, and causes an alignment trap. The 750 does not cause an
3783 alignment trap (except when the target is unaligned). */
3784
3785 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
3786 {
3787 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3788 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3789 warning (0, "%qs is not supported on little endian systems",
3790 "-mmultiple");
3791 }
3792
3793 /* If little-endian, default to -mstrict-align on older processors.
3794 Testing for direct_move matches power8 and later. */
3795 if (!BYTES_BIG_ENDIAN
3796 && !(processor_target_table[tune_index].target_enable
3797 & OPTION_MASK_DIRECT_MOVE))
3798 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3799
3800 /* Add some warnings for VSX. */
3801 if (TARGET_VSX)
3802 {
3803 const char *msg = NULL;
3804 if (!TARGET_HARD_FLOAT)
3805 {
3806 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3807 msg = N_("%<-mvsx%> requires hardware floating point");
3808 else
3809 {
3810 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3811 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3812 }
3813 }
3814 else if (TARGET_AVOID_XFORM > 0)
3815 msg = N_("%<-mvsx%> needs indexed addressing");
3816 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3817 & OPTION_MASK_ALTIVEC))
3818 {
3819 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3820 msg = N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3821 else
3822 msg = N_("%<-mno-altivec%> disables vsx");
3823 }
3824
3825 if (msg)
3826 {
3827 warning (0, msg);
3828 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3829 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3830 }
3831 }
3832
3833 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3834 the -mcpu setting to enable options that conflict. */
3835 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3836 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3837 | OPTION_MASK_ALTIVEC
3838 | OPTION_MASK_VSX)) != 0)
3839 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3840 | OPTION_MASK_DIRECT_MOVE)
3841 & ~rs6000_isa_flags_explicit);
3842
3843 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3844 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3845
3846 #ifdef XCOFF_DEBUGGING_INFO
3847 /* For AIX default to 64-bit DWARF. */
3848 if (!OPTION_SET_P (dwarf_offset_size))
3849 dwarf_offset_size = POINTER_SIZE_UNITS;
3850 #endif
3851
3852 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3853 off all of the options that depend on those flags. */
3854 ignore_masks = rs6000_disable_incompatible_switches ();
3855
3856 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3857 unless the user explicitly used the -mno-<option> to disable the code. */
3858 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
3859 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3860 else if (TARGET_P9_MINMAX)
3861 {
3862 if (cpu_index >= 0)
3863 {
3864 if (cpu_index == PROCESSOR_POWER9)
3865 {
3866 /* legacy behavior: allow -mcpu=power9 with certain
3867 capabilities explicitly disabled. */
3868 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3869 }
3870 else
3871 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3872 "for <xxx> less than power9", "-mcpu");
3873 }
3874 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
3875 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
3876 & rs6000_isa_flags_explicit))
3877 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3878 were explicitly cleared. */
3879 error ("%qs incompatible with explicitly disabled options",
3880 "-mpower9-minmax");
3881 else
3882 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
3883 }
3884 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3885 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
3886 else if (TARGET_VSX)
3887 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
3888 else if (TARGET_POPCNTD)
3889 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
3890 else if (TARGET_DFP)
3891 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
3892 else if (TARGET_CMPB)
3893 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
3894 else if (TARGET_FPRND)
3895 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
3896 else if (TARGET_POPCNTB)
3897 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
3898 else if (TARGET_ALTIVEC)
3899 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
3900
3901 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
3902 target attribute or pragma which automatically enables both options,
3903 unless the altivec ABI was set. This is set by default for 64-bit, but
3904 not for 32-bit. Don't move this before the above code using ignore_masks,
3905 since it can reset the cleared VSX/ALTIVEC flag again. */
3906 if (main_target_opt && !main_target_opt->x_rs6000_altivec_abi)
3907 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC)
3908 & ~rs6000_isa_flags_explicit);
3909
3910 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3911 {
3912 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3913 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3914 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3915 }
3916
3917 if (!TARGET_FPRND && TARGET_VSX)
3918 {
3919 if (rs6000_isa_flags_explicit & OPTION_MASK_FPRND)
3920 /* TARGET_VSX = 1 implies Power 7 and newer */
3921 error ("%qs requires %qs", "-mvsx", "-mfprnd");
3922 rs6000_isa_flags &= ~OPTION_MASK_FPRND;
3923 }
3924
3925 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3926 {
3927 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3928 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3929 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3930 }
3931
3932 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3933 {
3934 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3935 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3936 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3937 }
3938
3939 if (TARGET_P8_VECTOR && !TARGET_VSX)
3940 {
3941 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3942 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
3943 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3944 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
3945 {
3946 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3947 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3948 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3949 }
3950 else
3951 {
3952 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
3953 not explicit. */
3954 rs6000_isa_flags |= OPTION_MASK_VSX;
3955 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3956 }
3957 }
3958
3959 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3960 {
3961 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3962 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3963 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3964 }
3965
3966 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3967 silently turn off quad memory mode. */
3968 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3969 {
3970 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3971 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
3972
3973 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3974 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
3975
3976 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3977 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3978 }
3979
3980 /* Non-atomic quad memory load/store are disabled for little endian, since
3981 the words are reversed, but atomic operations can still be done by
3982 swapping the words. */
3983 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3984 {
3985 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3986 warning (0, N_("%<-mquad-memory%> is not available in little endian "
3987 "mode"));
3988
3989 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3990 }
3991
3992 /* Assume if the user asked for normal quad memory instructions, they want
3993 the atomic versions as well, unless they explicity told us not to use quad
3994 word atomic instructions. */
3995 if (TARGET_QUAD_MEMORY
3996 && !TARGET_QUAD_MEMORY_ATOMIC
3997 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
3998 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
3999
4000 /* If we can shrink-wrap the TOC register save separately, then use
4001 -msave-toc-indirect unless explicitly disabled. */
4002 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
4003 && flag_shrink_wrap_separate
4004 && optimize_function_for_speed_p (cfun))
4005 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
4006
4007 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4008 generating power8 instructions. Power9 does not optimize power8 fusion
4009 cases. */
4010 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4011 {
4012 if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
4013 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4014 else
4015 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4016 }
4017
4018 /* Setting additional fusion flags turns on base fusion. */
4019 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
4020 {
4021 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4022 {
4023 if (TARGET_P8_FUSION_SIGN)
4024 error ("%qs requires %qs", "-mpower8-fusion-sign",
4025 "-mpower8-fusion");
4026
4027 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4028 }
4029 else
4030 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4031 }
4032
4033 /* Power8 does not fuse sign extended loads with the addis. If we are
4034 optimizing at high levels for speed, convert a sign extended load into a
4035 zero extending load, and an explicit sign extension. */
4036 if (TARGET_P8_FUSION
4037 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4038 && optimize_function_for_speed_p (cfun)
4039 && optimize >= 3)
4040 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4041
4042 /* ISA 3.0 vector instructions include ISA 2.07. */
4043 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4044 {
4045 /* We prefer to not mention undocumented options in
4046 error messages. However, if users have managed to select
4047 power9-vector without selecting power8-vector, they
4048 already know about undocumented flags. */
4049 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
4050 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
4051 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
4052 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
4053 {
4054 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4055 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4056 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4057 }
4058 else
4059 {
4060 /* OPTION_MASK_P9_VECTOR is explicit and
4061 OPTION_MASK_P8_VECTOR is not explicit. */
4062 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
4063 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4064 }
4065 }
4066
4067 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4068 support. If we only have ISA 2.06 support, and the user did not specify
4069 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4070 but we don't enable the full vectorization support */
4071 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4072 TARGET_ALLOW_MOVMISALIGN = 1;
4073
4074 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4075 {
4076 if (TARGET_ALLOW_MOVMISALIGN > 0
4077 && OPTION_SET_P (TARGET_ALLOW_MOVMISALIGN))
4078 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4079
4080 TARGET_ALLOW_MOVMISALIGN = 0;
4081 }
4082
4083 /* Determine when unaligned vector accesses are permitted, and when
4084 they are preferred over masked Altivec loads. Note that if
4085 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4086 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4087 not true. */
4088 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4089 {
4090 if (!TARGET_VSX)
4091 {
4092 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4093 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4094
4095 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4096 }
4097
4098 else if (!TARGET_ALLOW_MOVMISALIGN)
4099 {
4100 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4101 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4102 "-mallow-movmisalign");
4103
4104 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4105 }
4106 }
4107
4108 if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX))
4109 {
4110 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4111 rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4112 else
4113 rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4114 }
4115
4116 /* Use long double size to select the appropriate long double. We use
4117 TYPE_PRECISION to differentiate the 3 different long double types. We map
4118 128 into the precision used for TFmode. */
4119 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
4120 ? 64
4121 : FLOAT_PRECISION_TFmode);
4122
4123 /* Set long double size before the IEEE 128-bit tests. */
4124 if (!OPTION_SET_P (rs6000_long_double_type_size))
4125 {
4126 if (main_target_opt != NULL
4127 && (main_target_opt->x_rs6000_long_double_type_size
4128 != default_long_double_size))
4129 error ("target attribute or pragma changes %<long double%> size");
4130 else
4131 rs6000_long_double_type_size = default_long_double_size;
4132 }
4133 else if (rs6000_long_double_type_size == FLOAT_PRECISION_TFmode)
4134 ; /* The option value can be seen when cl_target_option_restore is called. */
4135 else if (rs6000_long_double_type_size == 128)
4136 rs6000_long_double_type_size = FLOAT_PRECISION_TFmode;
4137
4138 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4139 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4140 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4141 those systems will not pick up this default. Warn if the user changes the
4142 default unless -Wno-psabi. */
4143 if (!OPTION_SET_P (rs6000_ieeequad))
4144 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
4145
4146 else if (TARGET_LONG_DOUBLE_128)
4147 {
4148 if (global_options.x_rs6000_ieeequad
4149 && (!TARGET_POPCNTD || !TARGET_VSX))
4150 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
4151
4152 if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT)
4153 {
4154 /* Determine if the user can change the default long double type at
4155 compilation time. You need GLIBC 2.32 or newer to be able to
4156 change the long double type. Only issue one warning. */
4157 static bool warned_change_long_double;
4158
4159 if (!warned_change_long_double && !glibc_supports_ieee_128bit ())
4160 {
4161 warned_change_long_double = true;
4162 if (TARGET_IEEEQUAD)
4163 warning (OPT_Wpsabi, "Using IEEE extended precision "
4164 "%<long double%>");
4165 else
4166 warning (OPT_Wpsabi, "Using IBM extended precision "
4167 "%<long double%>");
4168 }
4169 }
4170 }
4171
4172 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4173 sytems. In GCC 7, we would enable the IEEE 128-bit floating point
4174 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4175 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4176 the keyword as well as the type. */
4177 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
4178
4179 /* IEEE 128-bit floating point requires VSX support. */
4180 if (TARGET_FLOAT128_KEYWORD)
4181 {
4182 if (!TARGET_VSX)
4183 {
4184 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4185 error ("%qs requires VSX support", "-mfloat128");
4186
4187 TARGET_FLOAT128_TYPE = 0;
4188 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
4189 | OPTION_MASK_FLOAT128_HW);
4190 }
4191 else if (!TARGET_FLOAT128_TYPE)
4192 {
4193 TARGET_FLOAT128_TYPE = 1;
4194 warning (0, "The %<-mfloat128%> option may not be fully supported");
4195 }
4196 }
4197
4198 /* Enable the __float128 keyword under Linux by default. */
4199 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
4200 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4201 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4202
4203 /* If we have are supporting the float128 type and full ISA 3.0 support,
4204 enable -mfloat128-hardware by default. However, don't enable the
4205 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4206 because sometimes the compiler wants to put things in an integer
4207 container, and if we don't have __int128 support, it is impossible. */
4208 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
4209 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4210 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4211 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4212
4213 if (TARGET_FLOAT128_HW
4214 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4215 {
4216 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4217 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4218
4219 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4220 }
4221
4222 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4223 {
4224 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4225 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4226
4227 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4228 }
4229
4230 /* Enable -mprefixed by default on power10 systems. */
4231 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) == 0)
4232 rs6000_isa_flags |= OPTION_MASK_PREFIXED;
4233
4234 /* -mprefixed requires -mcpu=power10 (or later). */
4235 else if (TARGET_PREFIXED && !TARGET_POWER10)
4236 {
4237 if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) != 0)
4238 error ("%qs requires %qs", "-mprefixed", "-mcpu=power10");
4239
4240 rs6000_isa_flags &= ~OPTION_MASK_PREFIXED;
4241 }
4242
4243 /* -mpcrel requires prefixed load/store addressing. */
4244 if (TARGET_PCREL && !TARGET_PREFIXED)
4245 {
4246 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4247 error ("%qs requires %qs", "-mpcrel", "-mprefixed");
4248
4249 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4250 }
4251
4252 /* Print the options after updating the defaults. */
4253 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4254 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4255
4256 /* E500mc does "better" if we inline more aggressively. Respect the
4257 user's opinion, though. */
4258 if (rs6000_block_move_inline_limit == 0
4259 && (rs6000_tune == PROCESSOR_PPCE500MC
4260 || rs6000_tune == PROCESSOR_PPCE500MC64
4261 || rs6000_tune == PROCESSOR_PPCE5500
4262 || rs6000_tune == PROCESSOR_PPCE6500))
4263 rs6000_block_move_inline_limit = 128;
4264
4265 /* store_one_arg depends on expand_block_move to handle at least the
4266 size of reg_parm_stack_space. */
4267 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4268 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4269
4270 if (global_init_p)
4271 {
4272 /* If the appropriate debug option is enabled, replace the target hooks
4273 with debug versions that call the real version and then prints
4274 debugging information. */
4275 if (TARGET_DEBUG_COST)
4276 {
4277 targetm.rtx_costs = rs6000_debug_rtx_costs;
4278 targetm.address_cost = rs6000_debug_address_cost;
4279 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4280 }
4281
4282 if (TARGET_DEBUG_ADDR)
4283 {
4284 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4285 targetm.legitimize_address = rs6000_debug_legitimize_address;
4286 rs6000_secondary_reload_class_ptr
4287 = rs6000_debug_secondary_reload_class;
4288 targetm.secondary_memory_needed
4289 = rs6000_debug_secondary_memory_needed;
4290 targetm.can_change_mode_class
4291 = rs6000_debug_can_change_mode_class;
4292 rs6000_preferred_reload_class_ptr
4293 = rs6000_debug_preferred_reload_class;
4294 rs6000_mode_dependent_address_ptr
4295 = rs6000_debug_mode_dependent_address;
4296 }
4297
4298 if (rs6000_veclibabi_name)
4299 {
4300 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4301 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4302 else
4303 {
4304 error ("unknown vectorization library ABI type in "
4305 "%<-mveclibabi=%s%>", rs6000_veclibabi_name);
4306 ret = false;
4307 }
4308 }
4309 }
4310
4311 /* Enable Altivec ABI for AIX -maltivec. */
4312 if (TARGET_XCOFF
4313 && (TARGET_ALTIVEC || TARGET_VSX)
4314 && !OPTION_SET_P (rs6000_altivec_abi))
4315 {
4316 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4317 error ("target attribute or pragma changes AltiVec ABI");
4318 else
4319 rs6000_altivec_abi = 1;
4320 }
4321
4322 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4323 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4324 be explicitly overridden in either case. */
4325 if (TARGET_ELF)
4326 {
4327 if (!OPTION_SET_P (rs6000_altivec_abi)
4328 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4329 {
4330 if (main_target_opt != NULL &&
4331 !main_target_opt->x_rs6000_altivec_abi)
4332 error ("target attribute or pragma changes AltiVec ABI");
4333 else
4334 rs6000_altivec_abi = 1;
4335 }
4336 }
4337
4338 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4339 So far, the only darwin64 targets are also MACH-O. */
4340 if (TARGET_MACHO
4341 && DEFAULT_ABI == ABI_DARWIN
4342 && TARGET_64BIT)
4343 {
4344 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4345 error ("target attribute or pragma changes darwin64 ABI");
4346 else
4347 {
4348 rs6000_darwin64_abi = 1;
4349 /* Default to natural alignment, for better performance. */
4350 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4351 }
4352 }
4353
4354 /* Place FP constants in the constant pool instead of TOC
4355 if section anchors enabled. */
4356 if (flag_section_anchors
4357 && !OPTION_SET_P (TARGET_NO_FP_IN_TOC))
4358 TARGET_NO_FP_IN_TOC = 1;
4359
4360 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4361 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4362
4363 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4364 SUBTARGET_OVERRIDE_OPTIONS;
4365 #endif
4366 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4367 SUBSUBTARGET_OVERRIDE_OPTIONS;
4368 #endif
4369 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4370 SUB3TARGET_OVERRIDE_OPTIONS;
4371 #endif
4372
4373 /* If the ABI has support for PC-relative relocations, enable it by default.
4374 This test depends on the sub-target tests above setting the code model to
4375 medium for ELF v2 systems. */
4376 if (PCREL_SUPPORTED_BY_OS
4377 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0)
4378 rs6000_isa_flags |= OPTION_MASK_PCREL;
4379
4380 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4381 after the subtarget override options are done. */
4382 else if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM)
4383 {
4384 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4385 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4386
4387 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4388 }
4389
4390 /* Enable -mmma by default on power10 systems. */
4391 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0)
4392 rs6000_isa_flags |= OPTION_MASK_MMA;
4393
4394 /* Turn off vector pair/mma options on non-power10 systems. */
4395 else if (!TARGET_POWER10 && TARGET_MMA)
4396 {
4397 if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0)
4398 error ("%qs requires %qs", "-mmma", "-mcpu=power10");
4399
4400 rs6000_isa_flags &= ~OPTION_MASK_MMA;
4401 }
4402
4403 /* Enable power10 fusion if we are tuning for power10, even if we aren't
4404 generating power10 instructions. */
4405 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION))
4406 {
4407 if (rs6000_tune == PROCESSOR_POWER10)
4408 rs6000_isa_flags |= OPTION_MASK_P10_FUSION;
4409 else
4410 rs6000_isa_flags &= ~OPTION_MASK_P10_FUSION;
4411 }
4412
4413 /* MMA requires SIMD support as ISA 3.1 claims and our implementation
4414 such as "*movoo" uses vector pair access which use VSX registers.
4415 So make MMA require VSX support here. */
4416 if (TARGET_MMA && !TARGET_VSX)
4417 {
4418 if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0)
4419 error ("%qs requires %qs", "-mmma", "-mvsx");
4420 rs6000_isa_flags &= ~OPTION_MASK_MMA;
4421 }
4422
4423 if (!TARGET_PCREL && TARGET_PCREL_OPT)
4424 rs6000_isa_flags &= ~OPTION_MASK_PCREL_OPT;
4425
4426 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4427 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4428
4429 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4430 && rs6000_tune != PROCESSOR_POWER5
4431 && rs6000_tune != PROCESSOR_POWER6
4432 && rs6000_tune != PROCESSOR_POWER7
4433 && rs6000_tune != PROCESSOR_POWER8
4434 && rs6000_tune != PROCESSOR_POWER9
4435 && rs6000_tune != PROCESSOR_POWER10
4436 && rs6000_tune != PROCESSOR_PPCA2
4437 && rs6000_tune != PROCESSOR_CELL
4438 && rs6000_tune != PROCESSOR_PPC476);
4439 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4440 || rs6000_tune == PROCESSOR_POWER5
4441 || rs6000_tune == PROCESSOR_POWER7
4442 || rs6000_tune == PROCESSOR_POWER8);
4443 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4444 || rs6000_tune == PROCESSOR_POWER5
4445 || rs6000_tune == PROCESSOR_POWER6
4446 || rs6000_tune == PROCESSOR_POWER7
4447 || rs6000_tune == PROCESSOR_POWER8
4448 || rs6000_tune == PROCESSOR_POWER9
4449 || rs6000_tune == PROCESSOR_POWER10
4450 || rs6000_tune == PROCESSOR_PPCE500MC
4451 || rs6000_tune == PROCESSOR_PPCE500MC64
4452 || rs6000_tune == PROCESSOR_PPCE5500
4453 || rs6000_tune == PROCESSOR_PPCE6500);
4454
4455 /* Allow debug switches to override the above settings. These are set to -1
4456 in rs6000.opt to indicate the user hasn't directly set the switch. */
4457 if (TARGET_ALWAYS_HINT >= 0)
4458 rs6000_always_hint = TARGET_ALWAYS_HINT;
4459
4460 if (TARGET_SCHED_GROUPS >= 0)
4461 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4462
4463 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4464 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4465
4466 rs6000_sched_restricted_insns_priority
4467 = (rs6000_sched_groups ? 1 : 0);
4468
4469 /* Handle -msched-costly-dep option. */
4470 rs6000_sched_costly_dep
4471 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4472
4473 if (rs6000_sched_costly_dep_str)
4474 {
4475 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4476 rs6000_sched_costly_dep = no_dep_costly;
4477 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4478 rs6000_sched_costly_dep = all_deps_costly;
4479 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4480 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4481 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4482 rs6000_sched_costly_dep = store_to_load_dep_costly;
4483 else
4484 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4485 atoi (rs6000_sched_costly_dep_str));
4486 }
4487
4488 /* Handle -minsert-sched-nops option. */
4489 rs6000_sched_insert_nops
4490 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4491
4492 if (rs6000_sched_insert_nops_str)
4493 {
4494 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4495 rs6000_sched_insert_nops = sched_finish_none;
4496 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4497 rs6000_sched_insert_nops = sched_finish_pad_groups;
4498 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4499 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4500 else
4501 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4502 atoi (rs6000_sched_insert_nops_str));
4503 }
4504
4505 /* Handle stack protector */
4506 if (!OPTION_SET_P (rs6000_stack_protector_guard))
4507 #ifdef TARGET_THREAD_SSP_OFFSET
4508 rs6000_stack_protector_guard = SSP_TLS;
4509 #else
4510 rs6000_stack_protector_guard = SSP_GLOBAL;
4511 #endif
4512
4513 #ifdef TARGET_THREAD_SSP_OFFSET
4514 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4515 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4516 #endif
4517
4518 if (OPTION_SET_P (rs6000_stack_protector_guard_offset_str))
4519 {
4520 char *endp;
4521 const char *str = rs6000_stack_protector_guard_offset_str;
4522
4523 errno = 0;
4524 long offset = strtol (str, &endp, 0);
4525 if (!*str || *endp || errno)
4526 error ("%qs is not a valid number in %qs", str,
4527 "-mstack-protector-guard-offset=");
4528
4529 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4530 || (TARGET_64BIT && (offset & 3)))
4531 error ("%qs is not a valid offset in %qs", str,
4532 "-mstack-protector-guard-offset=");
4533
4534 rs6000_stack_protector_guard_offset = offset;
4535 }
4536
4537 if (OPTION_SET_P (rs6000_stack_protector_guard_reg_str))
4538 {
4539 const char *str = rs6000_stack_protector_guard_reg_str;
4540 int reg = decode_reg_name (str);
4541
4542 if (!IN_RANGE (reg, 1, 31))
4543 error ("%qs is not a valid base register in %qs", str,
4544 "-mstack-protector-guard-reg=");
4545
4546 rs6000_stack_protector_guard_reg = reg;
4547 }
4548
4549 if (rs6000_stack_protector_guard == SSP_TLS
4550 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4551 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4552
4553 if (global_init_p)
4554 {
4555 #ifdef TARGET_REGNAMES
4556 /* If the user desires alternate register names, copy in the
4557 alternate names now. */
4558 if (TARGET_REGNAMES)
4559 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4560 #endif
4561
4562 /* Set aix_struct_return last, after the ABI is determined.
4563 If -maix-struct-return or -msvr4-struct-return was explicitly
4564 used, don't override with the ABI default. */
4565 if (!OPTION_SET_P (aix_struct_return))
4566 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4567
4568 #if 0
4569 /* IBM XL compiler defaults to unsigned bitfields. */
4570 if (TARGET_XL_COMPAT)
4571 flag_signed_bitfields = 0;
4572 #endif
4573
4574 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4575 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4576
4577 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4578
4579 /* We can only guarantee the availability of DI pseudo-ops when
4580 assembling for 64-bit targets. */
4581 if (!TARGET_64BIT)
4582 {
4583 targetm.asm_out.aligned_op.di = NULL;
4584 targetm.asm_out.unaligned_op.di = NULL;
4585 }
4586
4587
4588 /* Set branch target alignment, if not optimizing for size. */
4589 if (!optimize_size)
4590 {
4591 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4592 aligned 8byte to avoid misprediction by the branch predictor. */
4593 if (rs6000_tune == PROCESSOR_TITAN
4594 || rs6000_tune == PROCESSOR_CELL)
4595 {
4596 if (flag_align_functions && !str_align_functions)
4597 str_align_functions = "8";
4598 if (flag_align_jumps && !str_align_jumps)
4599 str_align_jumps = "8";
4600 if (flag_align_loops && !str_align_loops)
4601 str_align_loops = "8";
4602 }
4603 if (rs6000_align_branch_targets)
4604 {
4605 if (flag_align_functions && !str_align_functions)
4606 str_align_functions = "16";
4607 if (flag_align_jumps && !str_align_jumps)
4608 str_align_jumps = "16";
4609 if (flag_align_loops && !str_align_loops)
4610 {
4611 can_override_loop_align = 1;
4612 str_align_loops = "16";
4613 }
4614 }
4615 }
4616
4617 /* Arrange to save and restore machine status around nested functions. */
4618 init_machine_status = rs6000_init_machine_status;
4619
4620 /* We should always be splitting complex arguments, but we can't break
4621 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4622 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4623 targetm.calls.split_complex_arg = NULL;
4624
4625 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4626 if (DEFAULT_ABI == ABI_AIX)
4627 targetm.calls.custom_function_descriptors = 0;
4628 }
4629
4630 /* Initialize rs6000_cost with the appropriate target costs. */
4631 if (optimize_size)
4632 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4633 else
4634 switch (rs6000_tune)
4635 {
4636 case PROCESSOR_RS64A:
4637 rs6000_cost = &rs64a_cost;
4638 break;
4639
4640 case PROCESSOR_MPCCORE:
4641 rs6000_cost = &mpccore_cost;
4642 break;
4643
4644 case PROCESSOR_PPC403:
4645 rs6000_cost = &ppc403_cost;
4646 break;
4647
4648 case PROCESSOR_PPC405:
4649 rs6000_cost = &ppc405_cost;
4650 break;
4651
4652 case PROCESSOR_PPC440:
4653 rs6000_cost = &ppc440_cost;
4654 break;
4655
4656 case PROCESSOR_PPC476:
4657 rs6000_cost = &ppc476_cost;
4658 break;
4659
4660 case PROCESSOR_PPC601:
4661 rs6000_cost = &ppc601_cost;
4662 break;
4663
4664 case PROCESSOR_PPC603:
4665 rs6000_cost = &ppc603_cost;
4666 break;
4667
4668 case PROCESSOR_PPC604:
4669 rs6000_cost = &ppc604_cost;
4670 break;
4671
4672 case PROCESSOR_PPC604e:
4673 rs6000_cost = &ppc604e_cost;
4674 break;
4675
4676 case PROCESSOR_PPC620:
4677 rs6000_cost = &ppc620_cost;
4678 break;
4679
4680 case PROCESSOR_PPC630:
4681 rs6000_cost = &ppc630_cost;
4682 break;
4683
4684 case PROCESSOR_CELL:
4685 rs6000_cost = &ppccell_cost;
4686 break;
4687
4688 case PROCESSOR_PPC750:
4689 case PROCESSOR_PPC7400:
4690 rs6000_cost = &ppc750_cost;
4691 break;
4692
4693 case PROCESSOR_PPC7450:
4694 rs6000_cost = &ppc7450_cost;
4695 break;
4696
4697 case PROCESSOR_PPC8540:
4698 case PROCESSOR_PPC8548:
4699 rs6000_cost = &ppc8540_cost;
4700 break;
4701
4702 case PROCESSOR_PPCE300C2:
4703 case PROCESSOR_PPCE300C3:
4704 rs6000_cost = &ppce300c2c3_cost;
4705 break;
4706
4707 case PROCESSOR_PPCE500MC:
4708 rs6000_cost = &ppce500mc_cost;
4709 break;
4710
4711 case PROCESSOR_PPCE500MC64:
4712 rs6000_cost = &ppce500mc64_cost;
4713 break;
4714
4715 case PROCESSOR_PPCE5500:
4716 rs6000_cost = &ppce5500_cost;
4717 break;
4718
4719 case PROCESSOR_PPCE6500:
4720 rs6000_cost = &ppce6500_cost;
4721 break;
4722
4723 case PROCESSOR_TITAN:
4724 rs6000_cost = &titan_cost;
4725 break;
4726
4727 case PROCESSOR_POWER4:
4728 case PROCESSOR_POWER5:
4729 rs6000_cost = &power4_cost;
4730 break;
4731
4732 case PROCESSOR_POWER6:
4733 rs6000_cost = &power6_cost;
4734 break;
4735
4736 case PROCESSOR_POWER7:
4737 rs6000_cost = &power7_cost;
4738 break;
4739
4740 case PROCESSOR_POWER8:
4741 rs6000_cost = &power8_cost;
4742 break;
4743
4744 case PROCESSOR_POWER9:
4745 rs6000_cost = &power9_cost;
4746 break;
4747
4748 case PROCESSOR_POWER10:
4749 rs6000_cost = &power10_cost;
4750 break;
4751
4752 case PROCESSOR_PPCA2:
4753 rs6000_cost = &ppca2_cost;
4754 break;
4755
4756 default:
4757 gcc_unreachable ();
4758 }
4759
4760 if (global_init_p)
4761 {
4762 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4763 param_simultaneous_prefetches,
4764 rs6000_cost->simultaneous_prefetches);
4765 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4766 param_l1_cache_size,
4767 rs6000_cost->l1_cache_size);
4768 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4769 param_l1_cache_line_size,
4770 rs6000_cost->cache_line_size);
4771 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4772 param_l2_cache_size,
4773 rs6000_cost->l2_cache_size);
4774
4775 /* Increase loop peeling limits based on performance analysis. */
4776 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4777 param_max_peeled_insns, 400);
4778 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4779 param_max_completely_peeled_insns, 400);
4780
4781 /* The lxvl/stxvl instructions don't perform well before Power10. */
4782 if (TARGET_POWER10)
4783 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4784 param_vect_partial_vector_usage, 1);
4785 else
4786 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4787 param_vect_partial_vector_usage, 0);
4788
4789 /* Use the 'model' -fsched-pressure algorithm by default. */
4790 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4791 param_sched_pressure_algorithm,
4792 SCHED_PRESSURE_MODEL);
4793
4794 /* If using typedef char *va_list, signal that
4795 __builtin_va_start (&ap, 0) can be optimized to
4796 ap = __builtin_next_arg (0). */
4797 if (DEFAULT_ABI != ABI_V4)
4798 targetm.expand_builtin_va_start = NULL;
4799 }
4800
4801 rs6000_override_options_after_change ();
4802
4803 /* If not explicitly specified via option, decide whether to generate indexed
4804 load/store instructions. A value of -1 indicates that the
4805 initial value of this variable has not been overwritten. During
4806 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4807 if (TARGET_AVOID_XFORM == -1)
4808 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4809 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4810 need indexed accesses and the type used is the scalar type of the element
4811 being loaded or stored. */
4812 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4813 && !TARGET_ALTIVEC);
4814
4815 /* Set the -mrecip options. */
4816 if (rs6000_recip_name)
4817 {
4818 char *p = ASTRDUP (rs6000_recip_name);
4819 char *q;
4820 unsigned int mask, i;
4821 bool invert;
4822
4823 while ((q = strtok (p, ",")) != NULL)
4824 {
4825 p = NULL;
4826 if (*q == '!')
4827 {
4828 invert = true;
4829 q++;
4830 }
4831 else
4832 invert = false;
4833
4834 if (!strcmp (q, "default"))
4835 mask = ((TARGET_RECIP_PRECISION)
4836 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4837 else
4838 {
4839 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4840 if (!strcmp (q, recip_options[i].string))
4841 {
4842 mask = recip_options[i].mask;
4843 break;
4844 }
4845
4846 if (i == ARRAY_SIZE (recip_options))
4847 {
4848 error ("unknown option for %<%s=%s%>", "-mrecip", q);
4849 invert = false;
4850 mask = 0;
4851 ret = false;
4852 }
4853 }
4854
4855 if (invert)
4856 rs6000_recip_control &= ~mask;
4857 else
4858 rs6000_recip_control |= mask;
4859 }
4860 }
4861
4862 /* Initialize all of the registers. */
4863 rs6000_init_hard_regno_mode_ok (global_init_p);
4864
4865 /* Save the initial options in case the user does function specific options */
4866 if (global_init_p)
4867 target_option_default_node = target_option_current_node
4868 = build_target_option_node (&global_options, &global_options_set);
4869
4870 /* If not explicitly specified via option, decide whether to generate the
4871 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4872 if (TARGET_LINK_STACK == -1)
4873 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
4874
4875 /* Deprecate use of -mno-speculate-indirect-jumps. */
4876 if (!rs6000_speculate_indirect_jumps)
4877 warning (0, "%qs is deprecated and not recommended in any circumstances",
4878 "-mno-speculate-indirect-jumps");
4879
4880 return ret;
4881 }
4882
4883 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4884 define the target cpu type. */
4885
4886 static void
4887 rs6000_option_override (void)
4888 {
4889 (void) rs6000_option_override_internal (true);
4890 }
4891
4892 \f
4893 /* Implement LOOP_ALIGN. */
4894 align_flags
4895 rs6000_loop_align (rtx label)
4896 {
4897 basic_block bb;
4898 int ninsns;
4899
4900 /* Don't override loop alignment if -falign-loops was specified. */
4901 if (!can_override_loop_align)
4902 return align_loops;
4903
4904 bb = BLOCK_FOR_INSN (label);
4905 ninsns = num_loop_insns(bb->loop_father);
4906
4907 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4908 if (ninsns > 4 && ninsns <= 8
4909 && (rs6000_tune == PROCESSOR_POWER4
4910 || rs6000_tune == PROCESSOR_POWER5
4911 || rs6000_tune == PROCESSOR_POWER6
4912 || rs6000_tune == PROCESSOR_POWER7
4913 || rs6000_tune == PROCESSOR_POWER8))
4914 return align_flags (5);
4915 else
4916 return align_loops;
4917 }
4918
4919 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4920 after applying N number of iterations. This routine does not determine
4921 how may iterations are required to reach desired alignment. */
4922
4923 static bool
4924 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4925 {
4926 if (is_packed)
4927 return false;
4928
4929 if (TARGET_32BIT)
4930 {
4931 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4932 return true;
4933
4934 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4935 return true;
4936
4937 return false;
4938 }
4939 else
4940 {
4941 if (TARGET_MACHO)
4942 return false;
4943
4944 /* Assuming that all other types are naturally aligned. CHECKME! */
4945 return true;
4946 }
4947 }
4948
4949 /* Return true if the vector misalignment factor is supported by the
4950 target. */
4951 static bool
4952 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4953 const_tree type,
4954 int misalignment,
4955 bool is_packed)
4956 {
4957 if (TARGET_VSX)
4958 {
4959 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4960 return true;
4961
4962 /* Return if movmisalign pattern is not supported for this mode. */
4963 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4964 return false;
4965
4966 if (misalignment == -1)
4967 {
4968 /* Misalignment factor is unknown at compile time but we know
4969 it's word aligned. */
4970 if (rs6000_vector_alignment_reachable (type, is_packed))
4971 {
4972 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4973
4974 if (element_size == 64 || element_size == 32)
4975 return true;
4976 }
4977
4978 return false;
4979 }
4980
4981 /* VSX supports word-aligned vector. */
4982 if (misalignment % 4 == 0)
4983 return true;
4984 }
4985 return false;
4986 }
4987
4988 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4989 static int
4990 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4991 tree vectype, int misalign)
4992 {
4993 unsigned elements;
4994 tree elem_type;
4995
4996 switch (type_of_cost)
4997 {
4998 case scalar_stmt:
4999 case scalar_store:
5000 case vector_stmt:
5001 case vector_store:
5002 case vec_to_scalar:
5003 case scalar_to_vec:
5004 case cond_branch_not_taken:
5005 return 1;
5006 case scalar_load:
5007 case vector_load:
5008 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5009 return 2;
5010
5011 case vec_perm:
5012 /* Power7 has only one permute unit, make it a bit expensive. */
5013 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
5014 return 3;
5015 else
5016 return 1;
5017
5018 case vec_promote_demote:
5019 /* Power7 has only one permute/pack unit, make it a bit expensive. */
5020 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
5021 return 4;
5022 else
5023 return 1;
5024
5025 case cond_branch_taken:
5026 return 3;
5027
5028 case unaligned_load:
5029 case vector_gather_load:
5030 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5031 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5032 return 2;
5033
5034 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5035 {
5036 elements = TYPE_VECTOR_SUBPARTS (vectype);
5037 /* See PR102767, consider V1TI to keep consistency. */
5038 if (elements == 2 || elements == 1)
5039 /* Double word aligned. */
5040 return 4;
5041
5042 if (elements == 4)
5043 {
5044 switch (misalign)
5045 {
5046 case 8:
5047 /* Double word aligned. */
5048 return 4;
5049
5050 case -1:
5051 /* Unknown misalignment. */
5052 case 4:
5053 case 12:
5054 /* Word aligned. */
5055 return 33;
5056
5057 default:
5058 gcc_unreachable ();
5059 }
5060 }
5061 }
5062
5063 if (TARGET_ALTIVEC)
5064 /* Misaligned loads are not supported. */
5065 gcc_unreachable ();
5066
5067 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5068 return 4;
5069
5070 case unaligned_store:
5071 case vector_scatter_store:
5072 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5073 return 1;
5074
5075 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5076 {
5077 elements = TYPE_VECTOR_SUBPARTS (vectype);
5078 /* See PR102767, consider V1TI to keep consistency. */
5079 if (elements == 2 || elements == 1)
5080 /* Double word aligned. */
5081 return 2;
5082
5083 if (elements == 4)
5084 {
5085 switch (misalign)
5086 {
5087 case 8:
5088 /* Double word aligned. */
5089 return 2;
5090
5091 case -1:
5092 /* Unknown misalignment. */
5093 case 4:
5094 case 12:
5095 /* Word aligned. */
5096 return 23;
5097
5098 default:
5099 gcc_unreachable ();
5100 }
5101 }
5102 }
5103
5104 if (TARGET_ALTIVEC)
5105 /* Misaligned stores are not supported. */
5106 gcc_unreachable ();
5107
5108 return 2;
5109
5110 case vec_construct:
5111 /* This is a rough approximation assuming non-constant elements
5112 constructed into a vector via element insertion. FIXME:
5113 vec_construct is not granular enough for uniformly good
5114 decisions. If the initialization is a splat, this is
5115 cheaper than we estimate. Improve this someday. */
5116 elem_type = TREE_TYPE (vectype);
5117 /* 32-bit vectors loaded into registers are stored as double
5118 precision, so we need 2 permutes, 2 converts, and 1 merge
5119 to construct a vector of short floats from them. */
5120 if (SCALAR_FLOAT_TYPE_P (elem_type)
5121 && TYPE_PRECISION (elem_type) == 32)
5122 return 5;
5123 /* On POWER9, integer vector types are built up in GPRs and then
5124 use a direct move (2 cycles). For POWER8 this is even worse,
5125 as we need two direct moves and a merge, and the direct moves
5126 are five cycles. */
5127 else if (INTEGRAL_TYPE_P (elem_type))
5128 {
5129 if (TARGET_P9_VECTOR)
5130 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5131 else
5132 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
5133 }
5134 else
5135 /* V2DFmode doesn't need a direct move. */
5136 return 2;
5137
5138 default:
5139 gcc_unreachable ();
5140 }
5141 }
5142
5143 /* Implement targetm.vectorize.preferred_simd_mode. */
5144
5145 static machine_mode
5146 rs6000_preferred_simd_mode (scalar_mode mode)
5147 {
5148 opt_machine_mode vmode = mode_for_vector (mode, 16 / GET_MODE_SIZE (mode));
5149
5150 if (vmode.exists () && !VECTOR_MEM_NONE_P (vmode.require ()))
5151 return vmode.require ();
5152
5153 return word_mode;
5154 }
5155
5156 class rs6000_cost_data : public vector_costs
5157 {
5158 public:
5159 using vector_costs::vector_costs;
5160
5161 unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
5162 stmt_vec_info stmt_info, slp_tree, tree vectype,
5163 int misalign,
5164 vect_cost_model_location where) override;
5165 void finish_cost (const vector_costs *) override;
5166
5167 protected:
5168 void update_target_cost_per_stmt (vect_cost_for_stmt, stmt_vec_info,
5169 vect_cost_model_location, unsigned int);
5170 void density_test (loop_vec_info);
5171 void adjust_vect_cost_per_loop (loop_vec_info);
5172 unsigned int determine_suggested_unroll_factor (loop_vec_info);
5173
5174 /* Total number of vectorized stmts (loop only). */
5175 unsigned m_nstmts = 0;
5176 /* Total number of loads (loop only). */
5177 unsigned m_nloads = 0;
5178 /* Total number of stores (loop only). */
5179 unsigned m_nstores = 0;
5180 /* Reduction factor for suggesting unroll factor (loop only). */
5181 unsigned m_reduc_factor = 0;
5182 /* Possible extra penalized cost on vector construction (loop only). */
5183 unsigned m_extra_ctor_cost = 0;
5184 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5185 instruction is needed by the vectorization. */
5186 bool m_vect_nonmem = false;
5187 /* If this loop gets vectorized with emulated gather load. */
5188 bool m_gather_load = false;
5189 };
5190
5191 /* Test for likely overcommitment of vector hardware resources. If a
5192 loop iteration is relatively large, and too large a percentage of
5193 instructions in the loop are vectorized, the cost model may not
5194 adequately reflect delays from unavailable vector resources.
5195 Penalize the loop body cost for this case. */
5196
5197 void
5198 rs6000_cost_data::density_test (loop_vec_info loop_vinfo)
5199 {
5200 /* This density test only cares about the cost of vector version of the
5201 loop, so immediately return if we are passed costing for the scalar
5202 version (namely computing single scalar iteration cost). */
5203 if (m_costing_for_scalar)
5204 return;
5205
5206 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5207 basic_block *bbs = get_loop_body (loop);
5208 int nbbs = loop->num_nodes;
5209 int vec_cost = m_costs[vect_body], not_vec_cost = 0;
5210
5211 for (int i = 0; i < nbbs; i++)
5212 {
5213 basic_block bb = bbs[i];
5214 gimple_stmt_iterator gsi;
5215
5216 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5217 {
5218 gimple *stmt = gsi_stmt (gsi);
5219 if (is_gimple_debug (stmt))
5220 continue;
5221
5222 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
5223
5224 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5225 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5226 not_vec_cost++;
5227 }
5228 }
5229
5230 free (bbs);
5231 int density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5232
5233 if (density_pct > rs6000_density_pct_threshold
5234 && vec_cost + not_vec_cost > rs6000_density_size_threshold)
5235 {
5236 m_costs[vect_body] = vec_cost * (100 + rs6000_density_penalty) / 100;
5237 if (dump_enabled_p ())
5238 dump_printf_loc (MSG_NOTE, vect_location,
5239 "density %d%%, cost %d exceeds threshold, penalizing "
5240 "loop body cost by %u%%\n", density_pct,
5241 vec_cost + not_vec_cost, rs6000_density_penalty);
5242 }
5243
5244 /* Check whether we need to penalize the body cost to account
5245 for excess strided or elementwise loads. */
5246 if (m_extra_ctor_cost > 0)
5247 {
5248 gcc_assert (m_nloads <= m_nstmts);
5249 unsigned int load_pct = (m_nloads * 100) / m_nstmts;
5250
5251 /* It's likely to be bounded by latency and execution resources
5252 from many scalar loads which are strided or elementwise loads
5253 into a vector if both conditions below are found:
5254 1. there are many loads, it's easy to result in a long wait
5255 for load units;
5256 2. load has a big proportion of all vectorized statements,
5257 it's not easy to schedule other statements to spread among
5258 the loads.
5259 One typical case is the innermost loop of the hotspot of SPEC2017
5260 503.bwaves_r without loop interchange. */
5261 if (m_nloads > (unsigned int) rs6000_density_load_num_threshold
5262 && load_pct > (unsigned int) rs6000_density_load_pct_threshold)
5263 {
5264 m_costs[vect_body] += m_extra_ctor_cost;
5265 if (dump_enabled_p ())
5266 dump_printf_loc (MSG_NOTE, vect_location,
5267 "Found %u loads and "
5268 "load pct. %u%% exceed "
5269 "the threshold, "
5270 "penalizing loop body "
5271 "cost by extra cost %u "
5272 "for ctor.\n",
5273 m_nloads, load_pct,
5274 m_extra_ctor_cost);
5275 }
5276 }
5277 }
5278
5279 /* Implement targetm.vectorize.create_costs. */
5280
5281 static vector_costs *
5282 rs6000_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
5283 {
5284 return new rs6000_cost_data (vinfo, costing_for_scalar);
5285 }
5286
5287 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5288 For some statement, we would like to further fine-grain tweak the cost on
5289 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5290 information on statement operation codes etc. One typical case here is
5291 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5292 for scalar cost, but it should be priced more whatever transformed to either
5293 compare + branch or compare + isel instructions. */
5294
5295 static unsigned
5296 rs6000_adjust_vect_cost_per_stmt (enum vect_cost_for_stmt kind,
5297 struct _stmt_vec_info *stmt_info)
5298 {
5299 if (kind == scalar_stmt && stmt_info && stmt_info->stmt
5300 && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
5301 {
5302 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
5303 if (subcode == COND_EXPR)
5304 return 2;
5305 }
5306
5307 return 0;
5308 }
5309
5310 /* Helper function for add_stmt_cost. Check each statement cost
5311 entry, gather information and update the target_cost fields
5312 accordingly. */
5313 void
5314 rs6000_cost_data::update_target_cost_per_stmt (vect_cost_for_stmt kind,
5315 stmt_vec_info stmt_info,
5316 vect_cost_model_location where,
5317 unsigned int orig_count)
5318 {
5319
5320 /* Check whether we're doing something other than just a copy loop.
5321 Not all such loops may be profitably vectorized; see
5322 rs6000_finish_cost. */
5323 if (kind == vec_to_scalar
5324 || kind == vec_perm
5325 || kind == vec_promote_demote
5326 || kind == vec_construct
5327 || kind == scalar_to_vec
5328 || (where == vect_body && kind == vector_stmt))
5329 m_vect_nonmem = true;
5330
5331 /* Gather some information when we are costing the vectorized instruction
5332 for the statements located in a loop body. */
5333 if (!m_costing_for_scalar
5334 && is_a<loop_vec_info> (m_vinfo)
5335 && where == vect_body)
5336 {
5337 m_nstmts += orig_count;
5338
5339 if (kind == scalar_load
5340 || kind == vector_load
5341 || kind == unaligned_load
5342 || kind == vector_gather_load)
5343 {
5344 m_nloads += orig_count;
5345 if (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5346 m_gather_load = true;
5347 }
5348 else if (kind == scalar_store
5349 || kind == vector_store
5350 || kind == unaligned_store
5351 || kind == vector_scatter_store)
5352 m_nstores += orig_count;
5353 else if ((kind == scalar_stmt
5354 || kind == vector_stmt
5355 || kind == vec_to_scalar)
5356 && stmt_info
5357 && vect_is_reduction (stmt_info))
5358 {
5359 /* Loop body contains normal int or fp operations and epilogue
5360 contains vector reduction. For simplicity, we assume int
5361 operation takes one cycle and fp operation takes one more. */
5362 tree lhs = gimple_get_lhs (stmt_info->stmt);
5363 bool is_float = FLOAT_TYPE_P (TREE_TYPE (lhs));
5364 unsigned int basic_cost = is_float ? 2 : 1;
5365 m_reduc_factor = MAX (basic_cost * orig_count, m_reduc_factor);
5366 }
5367
5368 /* Power processors do not currently have instructions for strided
5369 and elementwise loads, and instead we must generate multiple
5370 scalar loads. This leads to undercounting of the cost. We
5371 account for this by scaling the construction cost by the number
5372 of elements involved, and saving this as extra cost that we may
5373 or may not need to apply. When finalizing the cost of the loop,
5374 the extra penalty is applied when the load density heuristics
5375 are satisfied. */
5376 if (kind == vec_construct && stmt_info
5377 && STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
5378 && (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
5379 || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_STRIDED_SLP))
5380 {
5381 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5382 unsigned int nunits = vect_nunits_for_cost (vectype);
5383 /* As PR103702 shows, it's possible that vectorizer wants to do
5384 costings for only one unit here, it's no need to do any
5385 penalization for it, so simply early return here. */
5386 if (nunits == 1)
5387 return;
5388 /* i386 port adopts nunits * stmt_cost as the penalized cost
5389 for this kind of penalization, we used to follow it but
5390 found it could result in an unreliable body cost especially
5391 for V16QI/V8HI modes. To make it better, we choose this
5392 new heuristic: for each scalar load, we use 2 as penalized
5393 cost for the case with 2 nunits and use 1 for the other
5394 cases. It's without much supporting theory, mainly
5395 concluded from the broad performance evaluations on Power8,
5396 Power9 and Power10. One possibly related point is that:
5397 vector construction for more units would use more insns,
5398 it has more chances to schedule them better (even run in
5399 parallelly when enough available units at that time), so
5400 it seems reasonable not to penalize that much for them. */
5401 unsigned int adjusted_cost = (nunits == 2) ? 2 : 1;
5402 unsigned int extra_cost = nunits * adjusted_cost;
5403 m_extra_ctor_cost += extra_cost;
5404 }
5405 }
5406 }
5407
5408 unsigned
5409 rs6000_cost_data::add_stmt_cost (int count, vect_cost_for_stmt kind,
5410 stmt_vec_info stmt_info, slp_tree,
5411 tree vectype, int misalign,
5412 vect_cost_model_location where)
5413 {
5414 unsigned retval = 0;
5415
5416 if (flag_vect_cost_model)
5417 {
5418 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5419 misalign);
5420 stmt_cost += rs6000_adjust_vect_cost_per_stmt (kind, stmt_info);
5421 /* Statements in an inner loop relative to the loop being
5422 vectorized are weighted more heavily. The value here is
5423 arbitrary and could potentially be improved with analysis. */
5424 unsigned int orig_count = count;
5425 retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
5426 m_costs[where] += retval;
5427
5428 update_target_cost_per_stmt (kind, stmt_info, where, orig_count);
5429 }
5430
5431 return retval;
5432 }
5433
5434 /* For some target specific vectorization cost which can't be handled per stmt,
5435 we check the requisite conditions and adjust the vectorization cost
5436 accordingly if satisfied. One typical example is to model shift cost for
5437 vector with length by counting number of required lengths under condition
5438 LOOP_VINFO_FULLY_WITH_LENGTH_P. */
5439
5440 void
5441 rs6000_cost_data::adjust_vect_cost_per_loop (loop_vec_info loop_vinfo)
5442 {
5443 if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
5444 {
5445 rgroup_controls *rgc;
5446 unsigned int num_vectors_m1;
5447 unsigned int shift_cnt = 0;
5448 FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), num_vectors_m1, rgc)
5449 if (rgc->type)
5450 /* Each length needs one shift to fill into bits 0-7. */
5451 shift_cnt += num_vectors_m1 + 1;
5452
5453 add_stmt_cost (shift_cnt, scalar_stmt, NULL, NULL,
5454 NULL_TREE, 0, vect_body);
5455 }
5456 }
5457
5458 /* Determine suggested unroll factor by considering some below factors:
5459
5460 - unroll option/pragma which can disable unrolling for this loop;
5461 - simple hardware resource model for non memory vector insns;
5462 - aggressive heuristics when iteration count is unknown:
5463 - reduction case to break cross iteration dependency;
5464 - emulated gather load;
5465 - estimated iteration count when iteration count is unknown;
5466 */
5467
5468
5469 unsigned int
5470 rs6000_cost_data::determine_suggested_unroll_factor (loop_vec_info loop_vinfo)
5471 {
5472 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5473
5474 /* Don't unroll if it's specified explicitly not to be unrolled. */
5475 if (loop->unroll == 1
5476 || (OPTION_SET_P (flag_unroll_loops) && !flag_unroll_loops)
5477 || (OPTION_SET_P (flag_unroll_all_loops) && !flag_unroll_all_loops))
5478 return 1;
5479
5480 unsigned int nstmts_nonldst = m_nstmts - m_nloads - m_nstores;
5481 /* Don't unroll if no vector instructions excepting for memory access. */
5482 if (nstmts_nonldst == 0)
5483 return 1;
5484
5485 /* Consider breaking cross iteration dependency for reduction. */
5486 unsigned int reduc_factor = m_reduc_factor > 1 ? m_reduc_factor : 1;
5487
5488 /* Use this simple hardware resource model that how many non ld/st
5489 vector instructions can be issued per cycle. */
5490 unsigned int issue_width = rs6000_vect_unroll_issue;
5491 unsigned int uf = CEIL (reduc_factor * issue_width, nstmts_nonldst);
5492 uf = MIN ((unsigned int) rs6000_vect_unroll_limit, uf);
5493 /* Make sure it is power of 2. */
5494 uf = 1 << ceil_log2 (uf);
5495
5496 /* If the iteration count is known, the costing would be exact enough,
5497 don't worry it could be worse. */
5498 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5499 return uf;
5500
5501 /* Inspired by SPEC2017 parest_r, we want to aggressively unroll the
5502 loop if either condition is satisfied:
5503 - reduction factor exceeds the threshold;
5504 - emulated gather load adopted. */
5505 if (reduc_factor > (unsigned int) rs6000_vect_unroll_reduc_threshold
5506 || m_gather_load)
5507 return uf;
5508
5509 /* Check if we can conclude it's good to unroll from the estimated
5510 iteration count. */
5511 HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
5512 unsigned int vf = vect_vf_for_cost (loop_vinfo);
5513 unsigned int unrolled_vf = vf * uf;
5514 if (est_niter == -1 || est_niter < unrolled_vf)
5515 /* When the estimated iteration of this loop is unknown, it's possible
5516 that we are able to vectorize this loop with the original VF but fail
5517 to vectorize it with the unrolled VF any more if the actual iteration
5518 count is in between. */
5519 return 1;
5520 else
5521 {
5522 unsigned int epil_niter_unr = est_niter % unrolled_vf;
5523 unsigned int epil_niter = est_niter % vf;
5524 /* Even if we have partial vector support, it can be still inefficent
5525 to calculate the length when the iteration count is unknown, so
5526 only expect it's good to unroll when the epilogue iteration count
5527 is not bigger than VF (only one time length calculation). */
5528 if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
5529 && epil_niter_unr <= vf)
5530 return uf;
5531 /* Without partial vector support, conservatively unroll this when
5532 the epilogue iteration count is less than the original one
5533 (epilogue execution time wouldn't be longer than before). */
5534 else if (!LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
5535 && epil_niter_unr <= epil_niter)
5536 return uf;
5537 }
5538
5539 return 1;
5540 }
5541
5542 void
5543 rs6000_cost_data::finish_cost (const vector_costs *scalar_costs)
5544 {
5545 if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo))
5546 {
5547 adjust_vect_cost_per_loop (loop_vinfo);
5548 density_test (loop_vinfo);
5549
5550 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5551 that require versioning for any reason. The vectorization is at
5552 best a wash inside the loop, and the versioning checks make
5553 profitability highly unlikely and potentially quite harmful. */
5554 if (!m_vect_nonmem
5555 && LOOP_VINFO_VECT_FACTOR (loop_vinfo) == 2
5556 && LOOP_REQUIRES_VERSIONING (loop_vinfo))
5557 m_costs[vect_body] += 10000;
5558
5559 m_suggested_unroll_factor
5560 = determine_suggested_unroll_factor (loop_vinfo);
5561 }
5562
5563 vector_costs::finish_cost (scalar_costs);
5564 }
5565
5566 /* Implement targetm.loop_unroll_adjust. */
5567
5568 static unsigned
5569 rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
5570 {
5571 if (unroll_only_small_loops)
5572 {
5573 /* TODO: These are hardcoded values right now. We probably should use
5574 a PARAM here. */
5575 if (loop->ninsns <= 6)
5576 return MIN (4, nunroll);
5577 if (loop->ninsns <= 10)
5578 return MIN (2, nunroll);
5579
5580 return 0;
5581 }
5582
5583 return nunroll;
5584 }
5585
5586 /* Returns a function decl for a vectorized version of the builtin function
5587 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5588 if it is not available.
5589
5590 Implement targetm.vectorize.builtin_vectorized_function. */
5591
5592 static tree
5593 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5594 tree type_in)
5595 {
5596 machine_mode in_mode, out_mode;
5597 int in_n, out_n;
5598
5599 if (TARGET_DEBUG_BUILTIN)
5600 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5601 combined_fn_name (combined_fn (fn)),
5602 GET_MODE_NAME (TYPE_MODE (type_out)),
5603 GET_MODE_NAME (TYPE_MODE (type_in)));
5604
5605 /* TODO: Should this be gcc_assert? */
5606 if (TREE_CODE (type_out) != VECTOR_TYPE
5607 || TREE_CODE (type_in) != VECTOR_TYPE)
5608 return NULL_TREE;
5609
5610 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5611 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5612 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5613 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5614
5615 switch (fn)
5616 {
5617 CASE_CFN_COPYSIGN:
5618 if (VECTOR_UNIT_VSX_P (V2DFmode)
5619 && out_mode == DFmode && out_n == 2
5620 && in_mode == DFmode && in_n == 2)
5621 return rs6000_builtin_decls[RS6000_BIF_CPSGNDP];
5622 if (VECTOR_UNIT_VSX_P (V4SFmode)
5623 && out_mode == SFmode && out_n == 4
5624 && in_mode == SFmode && in_n == 4)
5625 return rs6000_builtin_decls[RS6000_BIF_CPSGNSP];
5626 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5627 && out_mode == SFmode && out_n == 4
5628 && in_mode == SFmode && in_n == 4)
5629 return rs6000_builtin_decls[RS6000_BIF_COPYSIGN_V4SF];
5630 break;
5631 CASE_CFN_CEIL:
5632 if (VECTOR_UNIT_VSX_P (V2DFmode)
5633 && out_mode == DFmode && out_n == 2
5634 && in_mode == DFmode && in_n == 2)
5635 return rs6000_builtin_decls[RS6000_BIF_XVRDPIP];
5636 if (VECTOR_UNIT_VSX_P (V4SFmode)
5637 && out_mode == SFmode && out_n == 4
5638 && in_mode == SFmode && in_n == 4)
5639 return rs6000_builtin_decls[RS6000_BIF_XVRSPIP];
5640 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5641 && out_mode == SFmode && out_n == 4
5642 && in_mode == SFmode && in_n == 4)
5643 return rs6000_builtin_decls[RS6000_BIF_VRFIP];
5644 break;
5645 CASE_CFN_FLOOR:
5646 if (VECTOR_UNIT_VSX_P (V2DFmode)
5647 && out_mode == DFmode && out_n == 2
5648 && in_mode == DFmode && in_n == 2)
5649 return rs6000_builtin_decls[RS6000_BIF_XVRDPIM];
5650 if (VECTOR_UNIT_VSX_P (V4SFmode)
5651 && out_mode == SFmode && out_n == 4
5652 && in_mode == SFmode && in_n == 4)
5653 return rs6000_builtin_decls[RS6000_BIF_XVRSPIM];
5654 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5655 && out_mode == SFmode && out_n == 4
5656 && in_mode == SFmode && in_n == 4)
5657 return rs6000_builtin_decls[RS6000_BIF_VRFIM];
5658 break;
5659 CASE_CFN_FMA:
5660 if (VECTOR_UNIT_VSX_P (V2DFmode)
5661 && out_mode == DFmode && out_n == 2
5662 && in_mode == DFmode && in_n == 2)
5663 return rs6000_builtin_decls[RS6000_BIF_XVMADDDP];
5664 if (VECTOR_UNIT_VSX_P (V4SFmode)
5665 && out_mode == SFmode && out_n == 4
5666 && in_mode == SFmode && in_n == 4)
5667 return rs6000_builtin_decls[RS6000_BIF_XVMADDSP];
5668 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5669 && out_mode == SFmode && out_n == 4
5670 && in_mode == SFmode && in_n == 4)
5671 return rs6000_builtin_decls[RS6000_BIF_VMADDFP];
5672 break;
5673 CASE_CFN_TRUNC:
5674 if (VECTOR_UNIT_VSX_P (V2DFmode)
5675 && out_mode == DFmode && out_n == 2
5676 && in_mode == DFmode && in_n == 2)
5677 return rs6000_builtin_decls[RS6000_BIF_XVRDPIZ];
5678 if (VECTOR_UNIT_VSX_P (V4SFmode)
5679 && out_mode == SFmode && out_n == 4
5680 && in_mode == SFmode && in_n == 4)
5681 return rs6000_builtin_decls[RS6000_BIF_XVRSPIZ];
5682 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5683 && out_mode == SFmode && out_n == 4
5684 && in_mode == SFmode && in_n == 4)
5685 return rs6000_builtin_decls[RS6000_BIF_VRFIZ];
5686 break;
5687 CASE_CFN_NEARBYINT:
5688 if (VECTOR_UNIT_VSX_P (V2DFmode)
5689 && flag_unsafe_math_optimizations
5690 && out_mode == DFmode && out_n == 2
5691 && in_mode == DFmode && in_n == 2)
5692 return rs6000_builtin_decls[RS6000_BIF_XVRDPI];
5693 if (VECTOR_UNIT_VSX_P (V4SFmode)
5694 && flag_unsafe_math_optimizations
5695 && out_mode == SFmode && out_n == 4
5696 && in_mode == SFmode && in_n == 4)
5697 return rs6000_builtin_decls[RS6000_BIF_XVRSPI];
5698 break;
5699 CASE_CFN_RINT:
5700 if (VECTOR_UNIT_VSX_P (V2DFmode)
5701 && !flag_trapping_math
5702 && out_mode == DFmode && out_n == 2
5703 && in_mode == DFmode && in_n == 2)
5704 return rs6000_builtin_decls[RS6000_BIF_XVRDPIC];
5705 if (VECTOR_UNIT_VSX_P (V4SFmode)
5706 && !flag_trapping_math
5707 && out_mode == SFmode && out_n == 4
5708 && in_mode == SFmode && in_n == 4)
5709 return rs6000_builtin_decls[RS6000_BIF_XVRSPIC];
5710 break;
5711 default:
5712 break;
5713 }
5714
5715 /* Generate calls to libmass if appropriate. */
5716 if (rs6000_veclib_handler)
5717 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5718
5719 return NULL_TREE;
5720 }
5721
5722 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5723 library with vectorized intrinsics. */
5724
5725 static tree
5726 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5727 tree type_in)
5728 {
5729 char name[32];
5730 const char *suffix = NULL;
5731 tree fntype, new_fndecl, bdecl = NULL_TREE;
5732 int n_args = 1;
5733 const char *bname;
5734 machine_mode el_mode, in_mode;
5735 int n, in_n;
5736
5737 /* Libmass is suitable for unsafe math only as it does not correctly support
5738 parts of IEEE with the required precision such as denormals. Only support
5739 it if we have VSX to use the simd d2 or f4 functions.
5740 XXX: Add variable length support. */
5741 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5742 return NULL_TREE;
5743
5744 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5745 n = TYPE_VECTOR_SUBPARTS (type_out);
5746 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5747 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5748 if (el_mode != in_mode
5749 || n != in_n)
5750 return NULL_TREE;
5751
5752 switch (fn)
5753 {
5754 CASE_CFN_ATAN2:
5755 CASE_CFN_HYPOT:
5756 CASE_CFN_POW:
5757 n_args = 2;
5758 gcc_fallthrough ();
5759
5760 CASE_CFN_ACOS:
5761 CASE_CFN_ACOSH:
5762 CASE_CFN_ASIN:
5763 CASE_CFN_ASINH:
5764 CASE_CFN_ATAN:
5765 CASE_CFN_ATANH:
5766 CASE_CFN_CBRT:
5767 CASE_CFN_COS:
5768 CASE_CFN_COSH:
5769 CASE_CFN_ERF:
5770 CASE_CFN_ERFC:
5771 CASE_CFN_EXP2:
5772 CASE_CFN_EXP:
5773 CASE_CFN_EXPM1:
5774 CASE_CFN_LGAMMA:
5775 CASE_CFN_LOG10:
5776 CASE_CFN_LOG1P:
5777 CASE_CFN_LOG2:
5778 CASE_CFN_LOG:
5779 CASE_CFN_SIN:
5780 CASE_CFN_SINH:
5781 CASE_CFN_SQRT:
5782 CASE_CFN_TAN:
5783 CASE_CFN_TANH:
5784 if (el_mode == DFmode && n == 2)
5785 {
5786 bdecl = mathfn_built_in (double_type_node, fn);
5787 suffix = "d2"; /* pow -> powd2 */
5788 }
5789 else if (el_mode == SFmode && n == 4)
5790 {
5791 bdecl = mathfn_built_in (float_type_node, fn);
5792 suffix = "4"; /* powf -> powf4 */
5793 }
5794 else
5795 return NULL_TREE;
5796 if (!bdecl)
5797 return NULL_TREE;
5798 break;
5799
5800 default:
5801 return NULL_TREE;
5802 }
5803
5804 gcc_assert (suffix != NULL);
5805 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5806 if (!bname)
5807 return NULL_TREE;
5808
5809 strcpy (name, bname + strlen ("__builtin_"));
5810 strcat (name, suffix);
5811
5812 if (n_args == 1)
5813 fntype = build_function_type_list (type_out, type_in, NULL);
5814 else if (n_args == 2)
5815 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5816 else
5817 gcc_unreachable ();
5818
5819 /* Build a function declaration for the vectorized function. */
5820 new_fndecl = build_decl (BUILTINS_LOCATION,
5821 FUNCTION_DECL, get_identifier (name), fntype);
5822 TREE_PUBLIC (new_fndecl) = 1;
5823 DECL_EXTERNAL (new_fndecl) = 1;
5824 DECL_IS_NOVOPS (new_fndecl) = 1;
5825 TREE_READONLY (new_fndecl) = 1;
5826
5827 return new_fndecl;
5828 }
5829
5830 \f
5831 /* Default CPU string for rs6000*_file_start functions. */
5832 static const char *rs6000_default_cpu;
5833
5834 #ifdef USING_ELFOS_H
5835 const char *rs6000_machine;
5836
5837 const char *
5838 rs6000_machine_from_flags (void)
5839 {
5840 /* e300 and e500 */
5841 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3)
5842 return "e300";
5843 if (rs6000_cpu == PROCESSOR_PPC8540 || rs6000_cpu == PROCESSOR_PPC8548)
5844 return "e500";
5845 if (rs6000_cpu == PROCESSOR_PPCE500MC)
5846 return "e500mc";
5847 if (rs6000_cpu == PROCESSOR_PPCE500MC64)
5848 return "e500mc64";
5849 if (rs6000_cpu == PROCESSOR_PPCE5500)
5850 return "e5500";
5851 if (rs6000_cpu == PROCESSOR_PPCE6500)
5852 return "e6500";
5853
5854 /* 400 series */
5855 if (rs6000_cpu == PROCESSOR_PPC403)
5856 return "\"403\"";
5857 if (rs6000_cpu == PROCESSOR_PPC405)
5858 return "\"405\"";
5859 if (rs6000_cpu == PROCESSOR_PPC440)
5860 return "\"440\"";
5861 if (rs6000_cpu == PROCESSOR_PPC476)
5862 return "\"476\"";
5863
5864 /* A2 */
5865 if (rs6000_cpu == PROCESSOR_PPCA2)
5866 return "a2";
5867
5868 /* Cell BE */
5869 if (rs6000_cpu == PROCESSOR_CELL)
5870 return "cell";
5871
5872 /* Titan */
5873 if (rs6000_cpu == PROCESSOR_TITAN)
5874 return "titan";
5875
5876 /* 500 series and 800 series */
5877 if (rs6000_cpu == PROCESSOR_MPCCORE)
5878 return "\"821\"";
5879
5880 #if 0
5881 /* This (and ppc64 below) are disabled here (for now at least) because
5882 PROCESSOR_POWERPC, PROCESSOR_POWERPC64, and PROCESSOR_COMMON
5883 are #define'd as some of these. Untangling that is a job for later. */
5884
5885 /* 600 series and 700 series, "classic" */
5886 if (rs6000_cpu == PROCESSOR_PPC601 || rs6000_cpu == PROCESSOR_PPC603
5887 || rs6000_cpu == PROCESSOR_PPC604 || rs6000_cpu == PROCESSOR_PPC604e
5888 || rs6000_cpu == PROCESSOR_PPC750)
5889 return "ppc";
5890 #endif
5891
5892 /* Classic with AltiVec, "G4" */
5893 if (rs6000_cpu == PROCESSOR_PPC7400 || rs6000_cpu == PROCESSOR_PPC7450)
5894 return "\"7450\"";
5895
5896 #if 0
5897 /* The older 64-bit CPUs */
5898 if (rs6000_cpu == PROCESSOR_PPC620 || rs6000_cpu == PROCESSOR_PPC630
5899 || rs6000_cpu == PROCESSOR_RS64A)
5900 return "ppc64";
5901 #endif
5902
5903 HOST_WIDE_INT flags = rs6000_isa_flags;
5904
5905 /* Disable the flags that should never influence the .machine selection. */
5906 flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT | OPTION_MASK_ISEL);
5907
5908 if ((flags & (ISA_3_1_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER)) != 0)
5909 return "power10";
5910 if ((flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0)
5911 return "power9";
5912 if ((flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0)
5913 return "power8";
5914 if ((flags & (ISA_2_6_MASKS_SERVER & ~ISA_2_5_MASKS_SERVER)) != 0)
5915 return "power7";
5916 if ((flags & (ISA_2_5_MASKS_SERVER & ~ISA_2_4_MASKS)) != 0)
5917 return "power6";
5918 if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0)
5919 return "power5";
5920 if ((flags & ISA_2_1_MASKS) != 0)
5921 return "power4";
5922 if ((flags & OPTION_MASK_POWERPC64) != 0)
5923 return "ppc64";
5924 return "ppc";
5925 }
5926
5927 void
5928 emit_asm_machine (void)
5929 {
5930 fprintf (asm_out_file, "\t.machine %s\n", rs6000_machine);
5931 }
5932 #endif
5933
5934 /* Do anything needed at the start of the asm file. */
5935
5936 static void
5937 rs6000_file_start (void)
5938 {
5939 char buffer[80];
5940 const char *start = buffer;
5941 FILE *file = asm_out_file;
5942
5943 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5944
5945 default_file_start ();
5946
5947 if (flag_verbose_asm)
5948 {
5949 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5950
5951 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5952 {
5953 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5954 start = "";
5955 }
5956
5957 if (OPTION_SET_P (rs6000_cpu_index))
5958 {
5959 fprintf (file, "%s -mcpu=%s", start,
5960 processor_target_table[rs6000_cpu_index].name);
5961 start = "";
5962 }
5963
5964 if (OPTION_SET_P (rs6000_tune_index))
5965 {
5966 fprintf (file, "%s -mtune=%s", start,
5967 processor_target_table[rs6000_tune_index].name);
5968 start = "";
5969 }
5970
5971 if (PPC405_ERRATUM77)
5972 {
5973 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5974 start = "";
5975 }
5976
5977 #ifdef USING_ELFOS_H
5978 switch (rs6000_sdata)
5979 {
5980 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5981 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5982 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5983 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5984 }
5985
5986 if (rs6000_sdata && g_switch_value)
5987 {
5988 fprintf (file, "%s -G %d", start,
5989 g_switch_value);
5990 start = "";
5991 }
5992 #endif
5993
5994 if (*start == '\0')
5995 putc ('\n', file);
5996 }
5997
5998 #ifdef USING_ELFOS_H
5999 rs6000_machine = rs6000_machine_from_flags ();
6000 emit_asm_machine ();
6001 #endif
6002
6003 if (DEFAULT_ABI == ABI_ELFv2)
6004 fprintf (file, "\t.abiversion 2\n");
6005 }
6006
6007 \f
6008 /* Return nonzero if this function is known to have a null epilogue. */
6009
6010 int
6011 direct_return (void)
6012 {
6013 if (reload_completed)
6014 {
6015 rs6000_stack_t *info = rs6000_stack_info ();
6016
6017 if (info->first_gp_reg_save == 32
6018 && info->first_fp_reg_save == 64
6019 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
6020 && ! info->lr_save_p
6021 && ! info->cr_save_p
6022 && info->vrsave_size == 0
6023 && ! info->push_p)
6024 return 1;
6025 }
6026
6027 return 0;
6028 }
6029
6030 /* Helper for num_insns_constant. Calculate number of instructions to
6031 load VALUE to a single gpr using combinations of addi, addis, ori,
6032 oris, sldi and rldimi instructions. */
6033
6034 static int
6035 num_insns_constant_gpr (HOST_WIDE_INT value)
6036 {
6037 /* signed constant loadable with addi */
6038 if (SIGNED_INTEGER_16BIT_P (value))
6039 return 1;
6040
6041 /* constant loadable with addis */
6042 else if ((value & 0xffff) == 0
6043 && (value >> 31 == -1 || value >> 31 == 0))
6044 return 1;
6045
6046 /* PADDI can support up to 34 bit signed integers. */
6047 else if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (value))
6048 return 1;
6049
6050 else if (TARGET_POWERPC64)
6051 {
6052 HOST_WIDE_INT low = sext_hwi (value, 32);
6053 HOST_WIDE_INT high = value >> 31;
6054
6055 if (high == 0 || high == -1)
6056 return 2;
6057
6058 high >>= 1;
6059
6060 if (low == 0 || low == high)
6061 return num_insns_constant_gpr (high) + 1;
6062 else if (high == 0)
6063 return num_insns_constant_gpr (low) + 1;
6064 else
6065 return (num_insns_constant_gpr (high)
6066 + num_insns_constant_gpr (low) + 1);
6067 }
6068
6069 else
6070 return 2;
6071 }
6072
6073 /* Helper for num_insns_constant. Allow constants formed by the
6074 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
6075 and handle modes that require multiple gprs. */
6076
6077 static int
6078 num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode)
6079 {
6080 int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6081 int total = 0;
6082 while (nregs-- > 0)
6083 {
6084 HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD);
6085 int insns = num_insns_constant_gpr (low);
6086 if (insns > 2
6087 /* We won't get more than 2 from num_insns_constant_gpr
6088 except when TARGET_POWERPC64 and mode is DImode or
6089 wider, so the register mode must be DImode. */
6090 && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
6091 insns = 2;
6092 total += insns;
6093 /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
6094 it all at once would be UB. */
6095 value >>= (BITS_PER_WORD - 1);
6096 value >>= 1;
6097 }
6098 return total;
6099 }
6100
6101 /* Return the number of instructions it takes to form a constant in as
6102 many gprs are needed for MODE. */
6103
6104 int
6105 num_insns_constant (rtx op, machine_mode mode)
6106 {
6107 HOST_WIDE_INT val;
6108
6109 switch (GET_CODE (op))
6110 {
6111 case CONST_INT:
6112 val = INTVAL (op);
6113 break;
6114
6115 case CONST_WIDE_INT:
6116 {
6117 int insns = 0;
6118 for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
6119 insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i),
6120 DImode);
6121 return insns;
6122 }
6123
6124 case CONST_DOUBLE:
6125 {
6126 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
6127
6128 if (mode == SFmode || mode == SDmode)
6129 {
6130 long l;
6131
6132 if (mode == SDmode)
6133 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l);
6134 else
6135 REAL_VALUE_TO_TARGET_SINGLE (*rv, l);
6136 /* See the first define_split in rs6000.md handling a
6137 const_double_operand. */
6138 val = l;
6139 mode = SImode;
6140 }
6141 else if (mode == DFmode || mode == DDmode)
6142 {
6143 long l[2];
6144
6145 if (mode == DDmode)
6146 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l);
6147 else
6148 REAL_VALUE_TO_TARGET_DOUBLE (*rv, l);
6149
6150 /* See the second (32-bit) and third (64-bit) define_split
6151 in rs6000.md handling a const_double_operand. */
6152 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32;
6153 val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL;
6154 mode = DImode;
6155 }
6156 else if (mode == TFmode || mode == TDmode
6157 || mode == KFmode || mode == IFmode)
6158 {
6159 long l[4];
6160 int insns;
6161
6162 if (mode == TDmode)
6163 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l);
6164 else
6165 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l);
6166
6167 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32;
6168 val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL;
6169 insns = num_insns_constant_multi (val, DImode);
6170 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32;
6171 val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL;
6172 insns += num_insns_constant_multi (val, DImode);
6173 return insns;
6174 }
6175 else
6176 gcc_unreachable ();
6177 }
6178 break;
6179
6180 default:
6181 gcc_unreachable ();
6182 }
6183
6184 return num_insns_constant_multi (val, mode);
6185 }
6186
6187 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6188 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6189 corresponding element of the vector, but for V4SFmode, the
6190 corresponding "float" is interpreted as an SImode integer. */
6191
6192 HOST_WIDE_INT
6193 const_vector_elt_as_int (rtx op, unsigned int elt)
6194 {
6195 rtx tmp;
6196
6197 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6198 gcc_assert (GET_MODE (op) != V2DImode
6199 && GET_MODE (op) != V2DFmode);
6200
6201 tmp = CONST_VECTOR_ELT (op, elt);
6202 if (GET_MODE (op) == V4SFmode)
6203 tmp = gen_lowpart (SImode, tmp);
6204 return INTVAL (tmp);
6205 }
6206
6207 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6208 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6209 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6210 all items are set to the same value and contain COPIES replicas of the
6211 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6212 operand and the others are set to the value of the operand's msb. */
6213
6214 static bool
6215 vspltis_constant (rtx op, unsigned step, unsigned copies)
6216 {
6217 machine_mode mode = GET_MODE (op);
6218 machine_mode inner = GET_MODE_INNER (mode);
6219
6220 unsigned i;
6221 unsigned nunits;
6222 unsigned bitsize;
6223 unsigned mask;
6224
6225 HOST_WIDE_INT val;
6226 HOST_WIDE_INT splat_val;
6227 HOST_WIDE_INT msb_val;
6228
6229 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6230 return false;
6231
6232 nunits = GET_MODE_NUNITS (mode);
6233 bitsize = GET_MODE_BITSIZE (inner);
6234 mask = GET_MODE_MASK (inner);
6235
6236 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6237 splat_val = val;
6238 msb_val = val >= 0 ? 0 : -1;
6239
6240 if (val == 0 && step > 1)
6241 {
6242 /* Special case for loading most significant bit with step > 1.
6243 In that case, match 0s in all but step-1s elements, where match
6244 EASY_VECTOR_MSB. */
6245 for (i = 1; i < nunits; ++i)
6246 {
6247 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6248 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6249 if ((i & (step - 1)) == step - 1)
6250 {
6251 if (!EASY_VECTOR_MSB (elt_val, inner))
6252 break;
6253 }
6254 else if (elt_val)
6255 break;
6256 }
6257 if (i == nunits)
6258 return true;
6259 }
6260
6261 /* Construct the value to be splatted, if possible. If not, return 0. */
6262 for (i = 2; i <= copies; i *= 2)
6263 {
6264 HOST_WIDE_INT small_val;
6265 bitsize /= 2;
6266 small_val = splat_val >> bitsize;
6267 mask >>= bitsize;
6268 if (splat_val != ((HOST_WIDE_INT)
6269 ((unsigned HOST_WIDE_INT) small_val << bitsize)
6270 | (small_val & mask)))
6271 return false;
6272 splat_val = small_val;
6273 inner = smallest_int_mode_for_size (bitsize);
6274 }
6275
6276 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6277 if (EASY_VECTOR_15 (splat_val))
6278 ;
6279
6280 /* Also check if we can splat, and then add the result to itself. Do so if
6281 the value is positive, of if the splat instruction is using OP's mode;
6282 for splat_val < 0, the splat and the add should use the same mode. */
6283 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6284 && (splat_val >= 0 || (step == 1 && copies == 1)))
6285 ;
6286
6287 /* Also check if are loading up the most significant bit which can be done by
6288 loading up -1 and shifting the value left by -1. Only do this for
6289 step 1 here, for larger steps it is done earlier. */
6290 else if (EASY_VECTOR_MSB (splat_val, inner) && step == 1)
6291 ;
6292
6293 else
6294 return false;
6295
6296 /* Check if VAL is present in every STEP-th element, and the
6297 other elements are filled with its most significant bit. */
6298 for (i = 1; i < nunits; ++i)
6299 {
6300 HOST_WIDE_INT desired_val;
6301 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6302 if ((i & (step - 1)) == 0)
6303 desired_val = val;
6304 else
6305 desired_val = msb_val;
6306
6307 if (desired_val != const_vector_elt_as_int (op, elt))
6308 return false;
6309 }
6310
6311 return true;
6312 }
6313
6314 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6315 instruction, filling in the bottom elements with 0 or -1.
6316
6317 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6318 for the number of zeroes to shift in, or negative for the number of 0xff
6319 bytes to shift in.
6320
6321 OP is a CONST_VECTOR. */
6322
6323 int
6324 vspltis_shifted (rtx op)
6325 {
6326 machine_mode mode = GET_MODE (op);
6327 machine_mode inner = GET_MODE_INNER (mode);
6328
6329 unsigned i, j;
6330 unsigned nunits;
6331 unsigned mask;
6332
6333 HOST_WIDE_INT val;
6334
6335 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6336 return false;
6337
6338 /* We need to create pseudo registers to do the shift, so don't recognize
6339 shift vector constants after reload. Don't match it even before RA
6340 after split1 is done, because there won't be further splitting pass
6341 before RA to do the splitting. */
6342 if (!can_create_pseudo_p ()
6343 || (cfun->curr_properties & PROP_rtl_split_insns))
6344 return false;
6345
6346 nunits = GET_MODE_NUNITS (mode);
6347 mask = GET_MODE_MASK (inner);
6348
6349 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6350
6351 /* Check if the value can really be the operand of a vspltis[bhw]. */
6352 if (EASY_VECTOR_15 (val))
6353 ;
6354
6355 /* Also check if we are loading up the most significant bit which can be done
6356 by loading up -1 and shifting the value left by -1. */
6357 else if (EASY_VECTOR_MSB (val, inner))
6358 ;
6359
6360 else
6361 return 0;
6362
6363 /* Check if VAL is present in every STEP-th element until we find elements
6364 that are 0 or all 1 bits. */
6365 for (i = 1; i < nunits; ++i)
6366 {
6367 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6368 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6369
6370 /* If the value isn't the splat value, check for the remaining elements
6371 being 0/-1. */
6372 if (val != elt_val)
6373 {
6374 if (elt_val == 0)
6375 {
6376 for (j = i+1; j < nunits; ++j)
6377 {
6378 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6379 if (const_vector_elt_as_int (op, elt2) != 0)
6380 return 0;
6381 }
6382
6383 return (nunits - i) * GET_MODE_SIZE (inner);
6384 }
6385
6386 else if ((elt_val & mask) == mask)
6387 {
6388 for (j = i+1; j < nunits; ++j)
6389 {
6390 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6391 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6392 return 0;
6393 }
6394
6395 return -((nunits - i) * GET_MODE_SIZE (inner));
6396 }
6397
6398 else
6399 return 0;
6400 }
6401 }
6402
6403 /* If all elements are equal, we don't need to do VSLDOI. */
6404 return 0;
6405 }
6406
6407
6408 /* Return non-zero (element mode byte size) if OP is of the given MODE
6409 and can be synthesized with a vspltisb, vspltish or vspltisw. */
6410
6411 int
6412 easy_altivec_constant (rtx op, machine_mode mode)
6413 {
6414 unsigned step, copies;
6415
6416 if (mode == VOIDmode)
6417 mode = GET_MODE (op);
6418 else if (mode != GET_MODE (op))
6419 return 0;
6420
6421 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6422 constants. */
6423 if (mode == V2DFmode)
6424 return zero_constant (op, mode) ? 8 : 0;
6425
6426 else if (mode == V2DImode)
6427 {
6428 if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
6429 || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
6430 return 0;
6431
6432 if (zero_constant (op, mode))
6433 return 8;
6434
6435 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6436 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6437 return 8;
6438
6439 return 0;
6440 }
6441
6442 /* V1TImode is a special container for TImode. Ignore for now. */
6443 else if (mode == V1TImode)
6444 return 0;
6445
6446 /* Start with a vspltisw. */
6447 step = GET_MODE_NUNITS (mode) / 4;
6448 copies = 1;
6449
6450 if (vspltis_constant (op, step, copies))
6451 return 4;
6452
6453 /* Then try with a vspltish. */
6454 if (step == 1)
6455 copies <<= 1;
6456 else
6457 step >>= 1;
6458
6459 if (vspltis_constant (op, step, copies))
6460 return 2;
6461
6462 /* And finally a vspltisb. */
6463 if (step == 1)
6464 copies <<= 1;
6465 else
6466 step >>= 1;
6467
6468 if (vspltis_constant (op, step, copies))
6469 return 1;
6470
6471 if (vspltis_shifted (op) != 0)
6472 return GET_MODE_SIZE (GET_MODE_INNER (mode));
6473
6474 return 0;
6475 }
6476
6477 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6478 result is OP. Abort if it is not possible. */
6479
6480 rtx
6481 gen_easy_altivec_constant (rtx op)
6482 {
6483 machine_mode mode = GET_MODE (op);
6484 int nunits = GET_MODE_NUNITS (mode);
6485 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6486 unsigned step = nunits / 4;
6487 unsigned copies = 1;
6488
6489 /* Start with a vspltisw. */
6490 if (vspltis_constant (op, step, copies))
6491 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6492
6493 /* Then try with a vspltish. */
6494 if (step == 1)
6495 copies <<= 1;
6496 else
6497 step >>= 1;
6498
6499 if (vspltis_constant (op, step, copies))
6500 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6501
6502 /* And finally a vspltisb. */
6503 if (step == 1)
6504 copies <<= 1;
6505 else
6506 step >>= 1;
6507
6508 if (vspltis_constant (op, step, copies))
6509 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6510
6511 gcc_unreachable ();
6512 }
6513
6514 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6515 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6516
6517 Return the number of instructions needed (1 or 2) into the address pointed
6518 via NUM_INSNS_PTR.
6519
6520 Return the constant that is being split via CONSTANT_PTR. */
6521
6522 bool
6523 xxspltib_constant_p (rtx op,
6524 machine_mode mode,
6525 int *num_insns_ptr,
6526 int *constant_ptr)
6527 {
6528 size_t nunits = GET_MODE_NUNITS (mode);
6529 size_t i;
6530 HOST_WIDE_INT value;
6531 rtx element;
6532
6533 /* Set the returned values to out of bound values. */
6534 *num_insns_ptr = -1;
6535 *constant_ptr = 256;
6536
6537 if (!TARGET_P9_VECTOR)
6538 return false;
6539
6540 if (mode == VOIDmode)
6541 mode = GET_MODE (op);
6542
6543 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6544 return false;
6545
6546 /* Handle (vec_duplicate <constant>). */
6547 if (GET_CODE (op) == VEC_DUPLICATE)
6548 {
6549 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6550 && mode != V2DImode)
6551 return false;
6552
6553 element = XEXP (op, 0);
6554 if (!CONST_INT_P (element))
6555 return false;
6556
6557 value = INTVAL (element);
6558 if (!IN_RANGE (value, -128, 127))
6559 return false;
6560 }
6561
6562 /* Handle (const_vector [...]). */
6563 else if (GET_CODE (op) == CONST_VECTOR)
6564 {
6565 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6566 && mode != V2DImode)
6567 return false;
6568
6569 element = CONST_VECTOR_ELT (op, 0);
6570 if (!CONST_INT_P (element))
6571 return false;
6572
6573 value = INTVAL (element);
6574 if (!IN_RANGE (value, -128, 127))
6575 return false;
6576
6577 for (i = 1; i < nunits; i++)
6578 {
6579 element = CONST_VECTOR_ELT (op, i);
6580 if (!CONST_INT_P (element))
6581 return false;
6582
6583 if (value != INTVAL (element))
6584 return false;
6585 }
6586 }
6587
6588 /* Handle integer constants being loaded into the upper part of the VSX
6589 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6590 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6591 else if (CONST_INT_P (op))
6592 {
6593 if (!SCALAR_INT_MODE_P (mode))
6594 return false;
6595
6596 value = INTVAL (op);
6597 if (!IN_RANGE (value, -128, 127))
6598 return false;
6599
6600 if (!IN_RANGE (value, -1, 0))
6601 {
6602 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6603 return false;
6604
6605 if (EASY_VECTOR_15 (value))
6606 return false;
6607 }
6608 }
6609
6610 else
6611 return false;
6612
6613 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6614 sign extend. Special case 0/-1 to allow getting any VSX register instead
6615 of an Altivec register. */
6616 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6617 && EASY_VECTOR_15 (value))
6618 return false;
6619
6620 /* Return # of instructions and the constant byte for XXSPLTIB. */
6621 if (mode == V16QImode)
6622 *num_insns_ptr = 1;
6623
6624 else if (IN_RANGE (value, -1, 0))
6625 *num_insns_ptr = 1;
6626
6627 /* Do not generate XXSPLTIB and a sign extend operation if we can generate a
6628 single XXSPLTIW or XXSPLTIDP instruction. */
6629 else if (vsx_prefixed_constant (op, mode))
6630 return false;
6631
6632 /* Return XXSPLITB followed by a sign extend operation to convert the
6633 constant to V8HImode or V4SImode. */
6634 else
6635 *num_insns_ptr = 2;
6636
6637 *constant_ptr = (int) value;
6638 return true;
6639 }
6640
6641 const char *
6642 output_vec_const_move (rtx *operands)
6643 {
6644 int shift;
6645 machine_mode mode;
6646 rtx dest, vec;
6647
6648 dest = operands[0];
6649 vec = operands[1];
6650 mode = GET_MODE (dest);
6651
6652 if (TARGET_VSX)
6653 {
6654 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6655 int xxspltib_value = 256;
6656 int num_insns = -1;
6657
6658 if (zero_constant (vec, mode))
6659 {
6660 if (TARGET_P9_VECTOR)
6661 return "xxspltib %x0,0";
6662
6663 else if (dest_vmx_p)
6664 return "vspltisw %0,0";
6665
6666 else
6667 return "xxlxor %x0,%x0,%x0";
6668 }
6669
6670 if (all_ones_constant (vec, mode))
6671 {
6672 if (TARGET_P9_VECTOR)
6673 return "xxspltib %x0,255";
6674
6675 else if (dest_vmx_p)
6676 return "vspltisw %0,-1";
6677
6678 else if (TARGET_P8_VECTOR)
6679 return "xxlorc %x0,%x0,%x0";
6680
6681 else
6682 gcc_unreachable ();
6683 }
6684
6685 vec_const_128bit_type vsx_const;
6686 if (TARGET_POWER10 && vec_const_128bit_to_bytes (vec, mode, &vsx_const))
6687 {
6688 unsigned imm = constant_generates_lxvkq (&vsx_const);
6689 if (imm)
6690 {
6691 operands[2] = GEN_INT (imm);
6692 return "lxvkq %x0,%2";
6693 }
6694
6695 imm = constant_generates_xxspltiw (&vsx_const);
6696 if (imm)
6697 {
6698 operands[2] = GEN_INT (imm);
6699 return "xxspltiw %x0,%2";
6700 }
6701
6702 imm = constant_generates_xxspltidp (&vsx_const);
6703 if (imm)
6704 {
6705 operands[2] = GEN_INT (imm);
6706 return "xxspltidp %x0,%2";
6707 }
6708 }
6709
6710 if (TARGET_P9_VECTOR
6711 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6712 {
6713 if (num_insns == 1)
6714 {
6715 operands[2] = GEN_INT (xxspltib_value & 0xff);
6716 return "xxspltib %x0,%2";
6717 }
6718
6719 return "#";
6720 }
6721 }
6722
6723 if (TARGET_ALTIVEC)
6724 {
6725 rtx splat_vec;
6726
6727 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6728 if (zero_constant (vec, mode))
6729 return "vspltisw %0,0";
6730
6731 if (all_ones_constant (vec, mode))
6732 return "vspltisw %0,-1";
6733
6734 /* Do we need to construct a value using VSLDOI? */
6735 shift = vspltis_shifted (vec);
6736 if (shift != 0)
6737 return "#";
6738
6739 splat_vec = gen_easy_altivec_constant (vec);
6740 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6741 operands[1] = XEXP (splat_vec, 0);
6742 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6743 return "#";
6744
6745 switch (GET_MODE (splat_vec))
6746 {
6747 case E_V4SImode:
6748 return "vspltisw %0,%1";
6749
6750 case E_V8HImode:
6751 return "vspltish %0,%1";
6752
6753 case E_V16QImode:
6754 return "vspltisb %0,%1";
6755
6756 default:
6757 gcc_unreachable ();
6758 }
6759 }
6760
6761 gcc_unreachable ();
6762 }
6763
6764 /* Initialize vector TARGET to VALS. */
6765
6766 void
6767 rs6000_expand_vector_init (rtx target, rtx vals)
6768 {
6769 machine_mode mode = GET_MODE (target);
6770 machine_mode inner_mode = GET_MODE_INNER (mode);
6771 unsigned int n_elts = GET_MODE_NUNITS (mode);
6772 int n_var = 0, one_var = -1;
6773 bool all_same = true, all_const_zero = true;
6774 rtx x, mem;
6775 unsigned int i;
6776
6777 for (i = 0; i < n_elts; ++i)
6778 {
6779 x = XVECEXP (vals, 0, i);
6780 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6781 ++n_var, one_var = i;
6782 else if (x != CONST0_RTX (inner_mode))
6783 all_const_zero = false;
6784
6785 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6786 all_same = false;
6787 }
6788
6789 if (n_var == 0)
6790 {
6791 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6792 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6793 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6794 {
6795 /* Zero register. */
6796 emit_move_insn (target, CONST0_RTX (mode));
6797 return;
6798 }
6799 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6800 {
6801 /* Splat immediate. */
6802 emit_insn (gen_rtx_SET (target, const_vec));
6803 return;
6804 }
6805 else
6806 {
6807 /* Load from constant pool. */
6808 emit_move_insn (target, const_vec);
6809 return;
6810 }
6811 }
6812
6813 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6814 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6815 {
6816 rtx op[2];
6817 size_t i;
6818 size_t num_elements = all_same ? 1 : 2;
6819 for (i = 0; i < num_elements; i++)
6820 {
6821 op[i] = XVECEXP (vals, 0, i);
6822 /* Just in case there is a SUBREG with a smaller mode, do a
6823 conversion. */
6824 if (GET_MODE (op[i]) != inner_mode)
6825 {
6826 rtx tmp = gen_reg_rtx (inner_mode);
6827 convert_move (tmp, op[i], 0);
6828 op[i] = tmp;
6829 }
6830 /* Allow load with splat double word. */
6831 else if (MEM_P (op[i]))
6832 {
6833 if (!all_same)
6834 op[i] = force_reg (inner_mode, op[i]);
6835 }
6836 else if (!REG_P (op[i]))
6837 op[i] = force_reg (inner_mode, op[i]);
6838 }
6839
6840 if (all_same)
6841 {
6842 if (mode == V2DFmode)
6843 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6844 else
6845 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6846 }
6847 else
6848 {
6849 if (mode == V2DFmode)
6850 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6851 else
6852 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6853 }
6854 return;
6855 }
6856
6857 /* Special case initializing vector int if we are on 64-bit systems with
6858 direct move or we have the ISA 3.0 instructions. */
6859 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6860 && TARGET_DIRECT_MOVE_64BIT)
6861 {
6862 if (all_same)
6863 {
6864 rtx element0 = XVECEXP (vals, 0, 0);
6865 if (MEM_P (element0))
6866 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6867 else
6868 element0 = force_reg (SImode, element0);
6869
6870 if (TARGET_P9_VECTOR)
6871 emit_insn (gen_vsx_splat_v4si (target, element0));
6872 else
6873 {
6874 rtx tmp = gen_reg_rtx (DImode);
6875 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6876 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6877 }
6878 return;
6879 }
6880 else
6881 {
6882 rtx elements[4];
6883 size_t i;
6884
6885 for (i = 0; i < 4; i++)
6886 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
6887
6888 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6889 elements[2], elements[3]));
6890 return;
6891 }
6892 }
6893
6894 /* With single precision floating point on VSX, know that internally single
6895 precision is actually represented as a double, and either make 2 V2DF
6896 vectors, and convert these vectors to single precision, or do one
6897 conversion, and splat the result to the other elements. */
6898 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6899 {
6900 if (all_same)
6901 {
6902 rtx element0 = XVECEXP (vals, 0, 0);
6903
6904 if (TARGET_P9_VECTOR)
6905 {
6906 if (MEM_P (element0))
6907 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6908
6909 emit_insn (gen_vsx_splat_v4sf (target, element0));
6910 }
6911
6912 else
6913 {
6914 rtx freg = gen_reg_rtx (V4SFmode);
6915 rtx sreg = force_reg (SFmode, element0);
6916 rtx cvt = (TARGET_XSCVDPSPN
6917 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6918 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6919
6920 emit_insn (cvt);
6921 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6922 const0_rtx));
6923 }
6924 }
6925 else
6926 {
6927 if (TARGET_P8_VECTOR && TARGET_POWERPC64)
6928 {
6929 rtx tmp_sf[4];
6930 rtx tmp_si[4];
6931 rtx tmp_di[4];
6932 rtx mrg_di[4];
6933 for (i = 0; i < 4; i++)
6934 {
6935 tmp_si[i] = gen_reg_rtx (SImode);
6936 tmp_di[i] = gen_reg_rtx (DImode);
6937 mrg_di[i] = gen_reg_rtx (DImode);
6938 tmp_sf[i] = force_reg (SFmode, XVECEXP (vals, 0, i));
6939 emit_insn (gen_movsi_from_sf (tmp_si[i], tmp_sf[i]));
6940 emit_insn (gen_zero_extendsidi2 (tmp_di[i], tmp_si[i]));
6941 }
6942
6943 if (!BYTES_BIG_ENDIAN)
6944 {
6945 std::swap (tmp_di[0], tmp_di[1]);
6946 std::swap (tmp_di[2], tmp_di[3]);
6947 }
6948
6949 emit_insn (gen_ashldi3 (mrg_di[0], tmp_di[0], GEN_INT (32)));
6950 emit_insn (gen_iordi3 (mrg_di[1], mrg_di[0], tmp_di[1]));
6951 emit_insn (gen_ashldi3 (mrg_di[2], tmp_di[2], GEN_INT (32)));
6952 emit_insn (gen_iordi3 (mrg_di[3], mrg_di[2], tmp_di[3]));
6953
6954 rtx tmp_v2di = gen_reg_rtx (V2DImode);
6955 emit_insn (gen_vsx_concat_v2di (tmp_v2di, mrg_di[1], mrg_di[3]));
6956 emit_move_insn (target, gen_lowpart (V4SFmode, tmp_v2di));
6957 }
6958 else
6959 {
6960 rtx dbl_even = gen_reg_rtx (V2DFmode);
6961 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6962 rtx flt_even = gen_reg_rtx (V4SFmode);
6963 rtx flt_odd = gen_reg_rtx (V4SFmode);
6964 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6965 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6966 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6967 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6968
6969 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6970 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6971 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6972 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6973 rs6000_expand_extract_even (target, flt_even, flt_odd);
6974 }
6975 }
6976 return;
6977 }
6978
6979 /* Special case initializing vector short/char that are splats if we are on
6980 64-bit systems with direct move. */
6981 if (all_same && TARGET_DIRECT_MOVE_64BIT
6982 && (mode == V16QImode || mode == V8HImode))
6983 {
6984 rtx op0 = XVECEXP (vals, 0, 0);
6985 rtx di_tmp = gen_reg_rtx (DImode);
6986
6987 if (!REG_P (op0))
6988 op0 = force_reg (GET_MODE_INNER (mode), op0);
6989
6990 if (mode == V16QImode)
6991 {
6992 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
6993 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
6994 return;
6995 }
6996
6997 if (mode == V8HImode)
6998 {
6999 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
7000 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
7001 return;
7002 }
7003 }
7004
7005 /* Store value to stack temp. Load vector element. Splat. However, splat
7006 of 64-bit items is not supported on Altivec. */
7007 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
7008 {
7009 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7010 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
7011 XVECEXP (vals, 0, 0));
7012 x = gen_rtx_UNSPEC (VOIDmode,
7013 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7014 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7015 gen_rtvec (2,
7016 gen_rtx_SET (target, mem),
7017 x)));
7018 x = gen_rtx_VEC_SELECT (inner_mode, target,
7019 gen_rtx_PARALLEL (VOIDmode,
7020 gen_rtvec (1, const0_rtx)));
7021 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
7022 return;
7023 }
7024
7025 /* One field is non-constant. Load constant then overwrite
7026 varying field. */
7027 if (n_var == 1)
7028 {
7029 rtx copy = copy_rtx (vals);
7030
7031 /* Load constant part of vector, substitute neighboring value for
7032 varying element. */
7033 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
7034 rs6000_expand_vector_init (target, copy);
7035
7036 /* Insert variable. */
7037 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var),
7038 GEN_INT (one_var));
7039 return;
7040 }
7041
7042 if (TARGET_DIRECT_MOVE && (mode == V16QImode || mode == V8HImode))
7043 {
7044 rtx op[16];
7045 /* Force the values into word_mode registers. */
7046 for (i = 0; i < n_elts; i++)
7047 {
7048 rtx tmp = force_reg (inner_mode, XVECEXP (vals, 0, i));
7049 machine_mode tmode = TARGET_POWERPC64 ? DImode : SImode;
7050 op[i] = simplify_gen_subreg (tmode, tmp, inner_mode, 0);
7051 }
7052
7053 /* Take unsigned char big endianness on 64bit as example for below
7054 construction, the input values are: A, B, C, D, ..., O, P. */
7055
7056 if (TARGET_DIRECT_MOVE_128)
7057 {
7058 /* Move to VSX register with vec_concat, each has 2 values.
7059 eg: vr1[0] = { xxxxxxxA, xxxxxxxB };
7060 vr1[1] = { xxxxxxxC, xxxxxxxD };
7061 ...
7062 vr1[7] = { xxxxxxxO, xxxxxxxP }; */
7063 rtx vr1[8];
7064 for (i = 0; i < n_elts / 2; i++)
7065 {
7066 vr1[i] = gen_reg_rtx (V2DImode);
7067 emit_insn (gen_vsx_concat_v2di (vr1[i], op[i * 2],
7068 op[i * 2 + 1]));
7069 }
7070
7071 /* Pack vectors with 2 values into vectors with 4 values.
7072 eg: vr2[0] = { xxxAxxxB, xxxCxxxD };
7073 vr2[1] = { xxxExxxF, xxxGxxxH };
7074 vr2[1] = { xxxIxxxJ, xxxKxxxL };
7075 vr2[3] = { xxxMxxxN, xxxOxxxP }; */
7076 rtx vr2[4];
7077 for (i = 0; i < n_elts / 4; i++)
7078 {
7079 vr2[i] = gen_reg_rtx (V4SImode);
7080 emit_insn (gen_altivec_vpkudum (vr2[i], vr1[i * 2],
7081 vr1[i * 2 + 1]));
7082 }
7083
7084 /* Pack vectors with 4 values into vectors with 8 values.
7085 eg: vr3[0] = { xAxBxCxD, xExFxGxH };
7086 vr3[1] = { xIxJxKxL, xMxNxOxP }; */
7087 rtx vr3[2];
7088 for (i = 0; i < n_elts / 8; i++)
7089 {
7090 vr3[i] = gen_reg_rtx (V8HImode);
7091 emit_insn (gen_altivec_vpkuwum (vr3[i], vr2[i * 2],
7092 vr2[i * 2 + 1]));
7093 }
7094
7095 /* If it's V8HImode, it's done and return it. */
7096 if (mode == V8HImode)
7097 {
7098 emit_insn (gen_rtx_SET (target, vr3[0]));
7099 return;
7100 }
7101
7102 /* Pack vectors with 8 values into 16 values. */
7103 rtx res = gen_reg_rtx (V16QImode);
7104 emit_insn (gen_altivec_vpkuhum (res, vr3[0], vr3[1]));
7105 emit_insn (gen_rtx_SET (target, res));
7106 }
7107 else
7108 {
7109 rtx (*merge_v16qi) (rtx, rtx, rtx) = NULL;
7110 rtx (*merge_v8hi) (rtx, rtx, rtx) = NULL;
7111 rtx (*merge_v4si) (rtx, rtx, rtx) = NULL;
7112 rtx perm_idx;
7113
7114 /* Set up some common gen routines and values. */
7115 if (BYTES_BIG_ENDIAN)
7116 {
7117 if (mode == V16QImode)
7118 {
7119 merge_v16qi = gen_altivec_vmrghb;
7120 merge_v8hi = gen_altivec_vmrglh;
7121 }
7122 else
7123 merge_v8hi = gen_altivec_vmrghh;
7124
7125 merge_v4si = gen_altivec_vmrglw;
7126 perm_idx = GEN_INT (3);
7127 }
7128 else
7129 {
7130 if (mode == V16QImode)
7131 {
7132 merge_v16qi = gen_altivec_vmrglb;
7133 merge_v8hi = gen_altivec_vmrghh;
7134 }
7135 else
7136 merge_v8hi = gen_altivec_vmrglh;
7137
7138 merge_v4si = gen_altivec_vmrghw;
7139 perm_idx = GEN_INT (0);
7140 }
7141
7142 /* Move to VSX register with direct move.
7143 eg: vr_qi[0] = { xxxxxxxA, xxxxxxxx };
7144 vr_qi[1] = { xxxxxxxB, xxxxxxxx };
7145 ...
7146 vr_qi[15] = { xxxxxxxP, xxxxxxxx }; */
7147 rtx vr_qi[16];
7148 for (i = 0; i < n_elts; i++)
7149 {
7150 vr_qi[i] = gen_reg_rtx (V16QImode);
7151 if (TARGET_POWERPC64)
7152 emit_insn (gen_p8_mtvsrd_v16qidi2 (vr_qi[i], op[i]));
7153 else
7154 emit_insn (gen_p8_mtvsrwz_v16qisi2 (vr_qi[i], op[i]));
7155 }
7156
7157 /* Merge/move to vector short.
7158 eg: vr_hi[0] = { xxxxxxxx, xxxxxxAB };
7159 vr_hi[1] = { xxxxxxxx, xxxxxxCD };
7160 ...
7161 vr_hi[7] = { xxxxxxxx, xxxxxxOP }; */
7162 rtx vr_hi[8];
7163 for (i = 0; i < 8; i++)
7164 {
7165 rtx tmp = vr_qi[i];
7166 if (mode == V16QImode)
7167 {
7168 tmp = gen_reg_rtx (V16QImode);
7169 emit_insn (merge_v16qi (tmp, vr_qi[2 * i], vr_qi[2 * i + 1]));
7170 }
7171 vr_hi[i] = gen_reg_rtx (V8HImode);
7172 emit_move_insn (vr_hi[i], gen_lowpart (V8HImode, tmp));
7173 }
7174
7175 /* Merge vector short to vector int.
7176 eg: vr_si[0] = { xxxxxxxx, xxxxABCD };
7177 vr_si[1] = { xxxxxxxx, xxxxEFGH };
7178 ...
7179 vr_si[3] = { xxxxxxxx, xxxxMNOP }; */
7180 rtx vr_si[4];
7181 for (i = 0; i < 4; i++)
7182 {
7183 rtx tmp = gen_reg_rtx (V8HImode);
7184 emit_insn (merge_v8hi (tmp, vr_hi[2 * i], vr_hi[2 * i + 1]));
7185 vr_si[i] = gen_reg_rtx (V4SImode);
7186 emit_move_insn (vr_si[i], gen_lowpart (V4SImode, tmp));
7187 }
7188
7189 /* Merge vector int to vector long.
7190 eg: vr_di[0] = { xxxxxxxx, ABCDEFGH };
7191 vr_di[1] = { xxxxxxxx, IJKLMNOP }; */
7192 rtx vr_di[2];
7193 for (i = 0; i < 2; i++)
7194 {
7195 rtx tmp = gen_reg_rtx (V4SImode);
7196 emit_insn (merge_v4si (tmp, vr_si[2 * i], vr_si[2 * i + 1]));
7197 vr_di[i] = gen_reg_rtx (V2DImode);
7198 emit_move_insn (vr_di[i], gen_lowpart (V2DImode, tmp));
7199 }
7200
7201 rtx res = gen_reg_rtx (V2DImode);
7202 emit_insn (gen_vsx_xxpermdi_v2di (res, vr_di[0], vr_di[1], perm_idx));
7203 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, res)));
7204 }
7205
7206 return;
7207 }
7208
7209 /* Construct the vector in memory one field at a time
7210 and load the whole vector. */
7211 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7212 for (i = 0; i < n_elts; i++)
7213 emit_move_insn (adjust_address_nv (mem, inner_mode,
7214 i * GET_MODE_SIZE (inner_mode)),
7215 XVECEXP (vals, 0, i));
7216 emit_move_insn (target, mem);
7217 }
7218
7219 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7220 is variable and also counts by vector element size for p9 and above. */
7221
7222 static void
7223 rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx)
7224 {
7225 machine_mode mode = GET_MODE (target);
7226
7227 gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
7228
7229 machine_mode inner_mode = GET_MODE (val);
7230
7231 int width = GET_MODE_SIZE (inner_mode);
7232
7233 gcc_assert (width >= 1 && width <= 8);
7234
7235 int shift = exact_log2 (width);
7236
7237 machine_mode idx_mode = GET_MODE (idx);
7238
7239 machine_mode shift_mode;
7240 /* Gen function pointers for shifting left and generation of permutation
7241 control vectors. */
7242 rtx (*gen_ashl) (rtx, rtx, rtx);
7243 rtx (*gen_pcvr1) (rtx, rtx);
7244 rtx (*gen_pcvr2) (rtx, rtx);
7245
7246 if (TARGET_POWERPC64)
7247 {
7248 shift_mode = DImode;
7249 gen_ashl = gen_ashldi3;
7250 gen_pcvr1 = BYTES_BIG_ENDIAN ? gen_altivec_lvsl_reg_di
7251 : gen_altivec_lvsr_reg_di;
7252 gen_pcvr2 = BYTES_BIG_ENDIAN ? gen_altivec_lvsr_reg_di
7253 : gen_altivec_lvsl_reg_di;
7254 }
7255 else
7256 {
7257 shift_mode = SImode;
7258 gen_ashl = gen_ashlsi3;
7259 gen_pcvr1 = BYTES_BIG_ENDIAN ? gen_altivec_lvsl_reg_si
7260 : gen_altivec_lvsr_reg_si;
7261 gen_pcvr2 = BYTES_BIG_ENDIAN ? gen_altivec_lvsr_reg_si
7262 : gen_altivec_lvsl_reg_si;
7263 }
7264 /* Generate the IDX for permute shift, width is the vector element size.
7265 idx = idx * width. */
7266 rtx tmp = gen_reg_rtx (shift_mode);
7267 idx = convert_modes (shift_mode, idx_mode, idx, 1);
7268
7269 emit_insn (gen_ashl (tmp, idx, GEN_INT (shift)));
7270
7271 /* Generate one permutation control vector used for rotating the element
7272 at to-insert position to element zero in target vector. lvsl is
7273 used for big endianness while lvsr is used for little endianness:
7274 lvs[lr] v1,0,idx. */
7275 rtx pcvr1 = gen_reg_rtx (V16QImode);
7276 emit_insn (gen_pcvr1 (pcvr1, tmp));
7277
7278 rtx sub_target = simplify_gen_subreg (V16QImode, target, mode, 0);
7279 rtx perm1 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target,
7280 pcvr1);
7281 emit_insn (perm1);
7282
7283 /* Insert val into element 0 of target vector. */
7284 rs6000_expand_vector_set (target, val, const0_rtx);
7285
7286 /* Rotate back with a reversed permutation control vector generated from:
7287 lvs[rl] v2,0,idx. */
7288 rtx pcvr2 = gen_reg_rtx (V16QImode);
7289 emit_insn (gen_pcvr2 (pcvr2, tmp));
7290
7291 rtx perm2 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target,
7292 pcvr2);
7293 emit_insn (perm2);
7294 }
7295
7296 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7297 is variable and also counts by vector element size for p7 & p8. */
7298
7299 static void
7300 rs6000_expand_vector_set_var_p7 (rtx target, rtx val, rtx idx)
7301 {
7302 machine_mode mode = GET_MODE (target);
7303
7304 gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
7305
7306 machine_mode inner_mode = GET_MODE (val);
7307 HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode);
7308
7309 int width = GET_MODE_SIZE (inner_mode);
7310 gcc_assert (width >= 1 && width <= 4);
7311
7312 int shift = exact_log2 (width);
7313
7314 machine_mode idx_mode = GET_MODE (idx);
7315
7316 machine_mode shift_mode;
7317 rtx (*gen_ashl)(rtx, rtx, rtx);
7318 rtx (*gen_add)(rtx, rtx, rtx);
7319 rtx (*gen_sub)(rtx, rtx, rtx);
7320 rtx (*gen_lvsl)(rtx, rtx);
7321
7322 if (TARGET_POWERPC64)
7323 {
7324 shift_mode = DImode;
7325 gen_ashl = gen_ashldi3;
7326 gen_add = gen_adddi3;
7327 gen_sub = gen_subdi3;
7328 gen_lvsl = gen_altivec_lvsl_reg_di;
7329 }
7330 else
7331 {
7332 shift_mode = SImode;
7333 gen_ashl = gen_ashlsi3;
7334 gen_add = gen_addsi3;
7335 gen_sub = gen_subsi3;
7336 gen_lvsl = gen_altivec_lvsl_reg_si;
7337 }
7338
7339 /* idx = idx * width. */
7340 rtx tmp = gen_reg_rtx (shift_mode);
7341 idx = convert_modes (shift_mode, idx_mode, idx, 1);
7342
7343 emit_insn (gen_ashl (tmp, idx, GEN_INT (shift)));
7344
7345 /* For LE: idx = idx + 8. */
7346 if (!BYTES_BIG_ENDIAN)
7347 emit_insn (gen_add (tmp, tmp, GEN_INT (8)));
7348 else
7349 emit_insn (gen_sub (tmp, GEN_INT (24 - width), tmp));
7350
7351 /* lxv vs33, mask.
7352 DImode: 0xffffffffffffffff0000000000000000
7353 SImode: 0x00000000ffffffff0000000000000000
7354 HImode: 0x000000000000ffff0000000000000000.
7355 QImode: 0x00000000000000ff0000000000000000. */
7356 rtx mask = gen_reg_rtx (V16QImode);
7357 rtx mask_v2di = gen_reg_rtx (V2DImode);
7358 rtvec v = rtvec_alloc (2);
7359 if (!BYTES_BIG_ENDIAN)
7360 {
7361 RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, 0);
7362 RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, mode_mask);
7363 }
7364 else
7365 {
7366 RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, mode_mask);
7367 RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, 0);
7368 }
7369 emit_insn (gen_vec_initv2didi (mask_v2di, gen_rtx_PARALLEL (V2DImode, v)));
7370 rtx sub_mask = simplify_gen_subreg (V16QImode, mask_v2di, V2DImode, 0);
7371 emit_insn (gen_rtx_SET (mask, sub_mask));
7372
7373 /* mtvsrd[wz] f0,tmp_val. */
7374 rtx tmp_val = gen_reg_rtx (SImode);
7375 if (inner_mode == E_SFmode)
7376 if (TARGET_DIRECT_MOVE_64BIT)
7377 emit_insn (gen_movsi_from_sf (tmp_val, val));
7378 else
7379 {
7380 rtx stack = rs6000_allocate_stack_temp (SFmode, false, true);
7381 emit_insn (gen_movsf_hardfloat (stack, val));
7382 rtx stack2 = copy_rtx (stack);
7383 PUT_MODE (stack2, SImode);
7384 emit_move_insn (tmp_val, stack2);
7385 }
7386 else
7387 tmp_val = force_reg (SImode, val);
7388
7389 rtx val_v16qi = gen_reg_rtx (V16QImode);
7390 rtx val_v2di = gen_reg_rtx (V2DImode);
7391 rtvec vec_val = rtvec_alloc (2);
7392 if (!BYTES_BIG_ENDIAN)
7393 {
7394 RTVEC_ELT (vec_val, 0) = gen_rtx_CONST_INT (DImode, 0);
7395 RTVEC_ELT (vec_val, 1) = tmp_val;
7396 }
7397 else
7398 {
7399 RTVEC_ELT (vec_val, 0) = tmp_val;
7400 RTVEC_ELT (vec_val, 1) = gen_rtx_CONST_INT (DImode, 0);
7401 }
7402 emit_insn (
7403 gen_vec_initv2didi (val_v2di, gen_rtx_PARALLEL (V2DImode, vec_val)));
7404 rtx sub_val = simplify_gen_subreg (V16QImode, val_v2di, V2DImode, 0);
7405 emit_insn (gen_rtx_SET (val_v16qi, sub_val));
7406
7407 /* lvsl 13,0,idx. */
7408 rtx pcv = gen_reg_rtx (V16QImode);
7409 emit_insn (gen_lvsl (pcv, tmp));
7410
7411 /* vperm 1,1,1,13. */
7412 /* vperm 0,0,0,13. */
7413 rtx val_perm = gen_reg_rtx (V16QImode);
7414 rtx mask_perm = gen_reg_rtx (V16QImode);
7415 emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm, val_v16qi, val_v16qi, pcv));
7416 emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm, mask, mask, pcv));
7417
7418 rtx target_v16qi = simplify_gen_subreg (V16QImode, target, mode, 0);
7419
7420 /* xxsel 34,34,32,33. */
7421 emit_insn (
7422 gen_vector_select_v16qi (target_v16qi, target_v16qi, val_perm, mask_perm));
7423 }
7424
7425 /* Set field ELT_RTX of TARGET to VAL. */
7426
7427 void
7428 rs6000_expand_vector_set (rtx target, rtx val, rtx elt_rtx)
7429 {
7430 machine_mode mode = GET_MODE (target);
7431 machine_mode inner_mode = GET_MODE_INNER (mode);
7432 rtx reg = gen_reg_rtx (mode);
7433 rtx mask, mem, x;
7434 int width = GET_MODE_SIZE (inner_mode);
7435 int i;
7436
7437 val = force_reg (GET_MODE (val), val);
7438
7439 if (VECTOR_MEM_VSX_P (mode))
7440 {
7441 if (!CONST_INT_P (elt_rtx))
7442 {
7443 /* For V2DI/V2DF, could leverage the P9 version to generate xxpermdi
7444 when elt_rtx is variable. */
7445 if ((TARGET_P9_VECTOR && TARGET_POWERPC64) || width == 8)
7446 {
7447 rs6000_expand_vector_set_var_p9 (target, val, elt_rtx);
7448 return;
7449 }
7450 else if (TARGET_VSX)
7451 {
7452 rs6000_expand_vector_set_var_p7 (target, val, elt_rtx);
7453 return;
7454 }
7455 else
7456 gcc_assert (CONST_INT_P (elt_rtx));
7457 }
7458
7459 rtx insn = NULL_RTX;
7460
7461 if (mode == V2DFmode)
7462 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
7463
7464 else if (mode == V2DImode)
7465 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
7466
7467 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
7468 {
7469 if (mode == V4SImode)
7470 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
7471 else if (mode == V8HImode)
7472 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
7473 else if (mode == V16QImode)
7474 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
7475 else if (mode == V4SFmode)
7476 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
7477 }
7478
7479 if (insn)
7480 {
7481 emit_insn (insn);
7482 return;
7483 }
7484 }
7485
7486 /* Simplify setting single element vectors like V1TImode. */
7487 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode)
7488 && INTVAL (elt_rtx) == 0)
7489 {
7490 emit_move_insn (target, gen_lowpart (mode, val));
7491 return;
7492 }
7493
7494 /* Load single variable value. */
7495 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7496 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
7497 x = gen_rtx_UNSPEC (VOIDmode,
7498 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7499 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7500 gen_rtvec (2,
7501 gen_rtx_SET (reg, mem),
7502 x)));
7503
7504 /* Linear sequence. */
7505 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
7506 for (i = 0; i < 16; ++i)
7507 XVECEXP (mask, 0, i) = GEN_INT (i);
7508
7509 /* Set permute mask to insert element into target. */
7510 for (i = 0; i < width; ++i)
7511 XVECEXP (mask, 0, INTVAL (elt_rtx) * width + i) = GEN_INT (i + 0x10);
7512 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
7513
7514 if (BYTES_BIG_ENDIAN)
7515 x = gen_rtx_UNSPEC (mode,
7516 gen_rtvec (3, target, reg,
7517 force_reg (V16QImode, x)),
7518 UNSPEC_VPERM);
7519 else
7520 {
7521 if (TARGET_P9_VECTOR)
7522 x = gen_rtx_UNSPEC (mode,
7523 gen_rtvec (3, reg, target,
7524 force_reg (V16QImode, x)),
7525 UNSPEC_VPERMR);
7526 else
7527 {
7528 /* Invert selector. We prefer to generate VNAND on P8 so
7529 that future fusion opportunities can kick in, but must
7530 generate VNOR elsewhere. */
7531 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
7532 rtx iorx = (TARGET_P8_VECTOR
7533 ? gen_rtx_IOR (V16QImode, notx, notx)
7534 : gen_rtx_AND (V16QImode, notx, notx));
7535 rtx tmp = gen_reg_rtx (V16QImode);
7536 emit_insn (gen_rtx_SET (tmp, iorx));
7537
7538 /* Permute with operands reversed and adjusted selector. */
7539 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
7540 UNSPEC_VPERM);
7541 }
7542 }
7543
7544 emit_insn (gen_rtx_SET (target, x));
7545 }
7546
7547 /* Extract field ELT from VEC into TARGET. */
7548
7549 void
7550 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
7551 {
7552 machine_mode mode = GET_MODE (vec);
7553 machine_mode inner_mode = GET_MODE_INNER (mode);
7554 rtx mem;
7555
7556 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
7557 {
7558 switch (mode)
7559 {
7560 default:
7561 break;
7562 case E_V1TImode:
7563 emit_move_insn (target, gen_lowpart (TImode, vec));
7564 break;
7565 case E_V2DFmode:
7566 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
7567 return;
7568 case E_V2DImode:
7569 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
7570 return;
7571 case E_V4SFmode:
7572 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
7573 return;
7574 case E_V16QImode:
7575 if (TARGET_DIRECT_MOVE_64BIT)
7576 {
7577 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
7578 return;
7579 }
7580 else
7581 break;
7582 case E_V8HImode:
7583 if (TARGET_DIRECT_MOVE_64BIT)
7584 {
7585 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
7586 return;
7587 }
7588 else
7589 break;
7590 case E_V4SImode:
7591 if (TARGET_DIRECT_MOVE_64BIT)
7592 {
7593 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
7594 return;
7595 }
7596 break;
7597 }
7598 }
7599 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
7600 && TARGET_DIRECT_MOVE_64BIT)
7601 {
7602 if (GET_MODE (elt) != DImode)
7603 {
7604 rtx tmp = gen_reg_rtx (DImode);
7605 convert_move (tmp, elt, 0);
7606 elt = tmp;
7607 }
7608 else if (!REG_P (elt))
7609 elt = force_reg (DImode, elt);
7610
7611 switch (mode)
7612 {
7613 case E_V1TImode:
7614 emit_move_insn (target, gen_lowpart (TImode, vec));
7615 return;
7616
7617 case E_V2DFmode:
7618 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
7619 return;
7620
7621 case E_V2DImode:
7622 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
7623 return;
7624
7625 case E_V4SFmode:
7626 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
7627 return;
7628
7629 case E_V4SImode:
7630 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
7631 return;
7632
7633 case E_V8HImode:
7634 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
7635 return;
7636
7637 case E_V16QImode:
7638 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7639 return;
7640
7641 default:
7642 gcc_unreachable ();
7643 }
7644 }
7645
7646 /* Allocate mode-sized buffer. */
7647 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7648
7649 emit_move_insn (mem, vec);
7650 if (CONST_INT_P (elt))
7651 {
7652 int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode);
7653
7654 /* Add offset to field within buffer matching vector element. */
7655 mem = adjust_address_nv (mem, inner_mode,
7656 modulo_elt * GET_MODE_SIZE (inner_mode));
7657 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7658 }
7659 else
7660 {
7661 unsigned int ele_size = GET_MODE_SIZE (inner_mode);
7662 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
7663 rtx new_addr = gen_reg_rtx (Pmode);
7664
7665 elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
7666 if (ele_size > 1)
7667 elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
7668 new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
7669 new_addr = change_address (mem, inner_mode, new_addr);
7670 emit_move_insn (target, new_addr);
7671 }
7672 }
7673
7674 /* Return the offset within a memory object (MEM) of a vector type to a given
7675 element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If
7676 the element is constant, we return a constant integer.
7677
7678 Otherwise, we use a base register temporary to calculate the offset after
7679 masking it to fit within the bounds of the vector and scaling it. The
7680 masking is required by the 64-bit ELF version 2 ABI for the vec_extract
7681 built-in function. */
7682
7683 static rtx
7684 get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
7685 {
7686 if (CONST_INT_P (element))
7687 return GEN_INT (INTVAL (element) * scalar_size);
7688
7689 /* All insns should use the 'Q' constraint (address is a single register) if
7690 the element number is not a constant. */
7691 gcc_assert (satisfies_constraint_Q (mem));
7692
7693 /* Mask the element to make sure the element number is between 0 and the
7694 maximum number of elements - 1 so that we don't generate an address
7695 outside the vector. */
7696 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (GET_MODE (mem)) - 1);
7697 rtx and_op = gen_rtx_AND (Pmode, element, num_ele_m1);
7698 emit_insn (gen_rtx_SET (base_tmp, and_op));
7699
7700 /* Shift the element to get the byte offset from the element number. */
7701 int shift = exact_log2 (scalar_size);
7702 gcc_assert (shift >= 0);
7703
7704 if (shift > 0)
7705 {
7706 rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
7707 emit_insn (gen_rtx_SET (base_tmp, shift_op));
7708 }
7709
7710 return base_tmp;
7711 }
7712
7713 /* Helper function update PC-relative addresses when we are adjusting a memory
7714 address (ADDR) to a vector to point to a scalar field within the vector with
7715 a constant offset (ELEMENT_OFFSET). If the address is not valid, we can
7716 use the base register temporary (BASE_TMP) to form the address. */
7717
7718 static rtx
7719 adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
7720 {
7721 rtx new_addr = NULL;
7722
7723 gcc_assert (CONST_INT_P (element_offset));
7724
7725 if (GET_CODE (addr) == CONST)
7726 addr = XEXP (addr, 0);
7727
7728 if (GET_CODE (addr) == PLUS)
7729 {
7730 rtx op0 = XEXP (addr, 0);
7731 rtx op1 = XEXP (addr, 1);
7732
7733 if (CONST_INT_P (op1))
7734 {
7735 HOST_WIDE_INT offset
7736 = INTVAL (XEXP (addr, 1)) + INTVAL (element_offset);
7737
7738 if (offset == 0)
7739 new_addr = op0;
7740
7741 else
7742 {
7743 rtx plus = gen_rtx_PLUS (Pmode, op0, GEN_INT (offset));
7744 new_addr = gen_rtx_CONST (Pmode, plus);
7745 }
7746 }
7747
7748 else
7749 {
7750 emit_move_insn (base_tmp, addr);
7751 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7752 }
7753 }
7754
7755 else if (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
7756 {
7757 rtx plus = gen_rtx_PLUS (Pmode, addr, element_offset);
7758 new_addr = gen_rtx_CONST (Pmode, plus);
7759 }
7760
7761 else
7762 gcc_unreachable ();
7763
7764 return new_addr;
7765 }
7766
7767 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7768 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7769 temporary (BASE_TMP) to fixup the address. Return the new memory address
7770 that is valid for reads or writes to a given register (SCALAR_REG).
7771
7772 This function is expected to be called after reload is completed when we are
7773 splitting insns. The temporary BASE_TMP might be set multiple times with
7774 this code. */
7775
7776 rtx
7777 rs6000_adjust_vec_address (rtx scalar_reg,
7778 rtx mem,
7779 rtx element,
7780 rtx base_tmp,
7781 machine_mode scalar_mode)
7782 {
7783 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7784 rtx addr = XEXP (mem, 0);
7785 rtx new_addr;
7786
7787 gcc_assert (!reg_mentioned_p (base_tmp, addr));
7788 gcc_assert (!reg_mentioned_p (base_tmp, element));
7789
7790 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7791 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7792
7793 /* Calculate what we need to add to the address to get the element
7794 address. */
7795 rtx element_offset = get_vector_offset (mem, element, base_tmp, scalar_size);
7796
7797 /* Create the new address pointing to the element within the vector. If we
7798 are adding 0, we don't have to change the address. */
7799 if (element_offset == const0_rtx)
7800 new_addr = addr;
7801
7802 /* A simple indirect address can be converted into a reg + offset
7803 address. */
7804 else if (REG_P (addr) || SUBREG_P (addr))
7805 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7806
7807 /* For references to local static variables, fold a constant offset into the
7808 address. */
7809 else if (pcrel_local_address (addr, Pmode) && CONST_INT_P (element_offset))
7810 new_addr = adjust_vec_address_pcrel (addr, element_offset, base_tmp);
7811
7812 /* Optimize D-FORM addresses with constant offset with a constant element, to
7813 include the element offset in the address directly. */
7814 else if (GET_CODE (addr) == PLUS)
7815 {
7816 rtx op0 = XEXP (addr, 0);
7817 rtx op1 = XEXP (addr, 1);
7818
7819 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7820 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7821 {
7822 /* op0 should never be r0, because r0+offset is not valid. But it
7823 doesn't hurt to make sure it is not r0. */
7824 gcc_assert (reg_or_subregno (op0) != 0);
7825
7826 /* D-FORM address with constant element number. */
7827 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7828 rtx offset_rtx = GEN_INT (offset);
7829 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7830 }
7831 else
7832 {
7833 /* If we don't have a D-FORM address with a constant element number,
7834 add the two elements in the current address. Then add the offset.
7835
7836 Previously, we tried to add the offset to OP1 and change the
7837 address to an X-FORM format adding OP0 and BASE_TMP, but it became
7838 complicated because we had to verify that op1 was not GPR0 and we
7839 had a constant element offset (due to the way ADDI is defined).
7840 By doing the add of OP0 and OP1 first, and then adding in the
7841 offset, it has the benefit that if D-FORM instructions are
7842 allowed, the offset is part of the memory access to the vector
7843 element. */
7844 emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
7845 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7846 }
7847 }
7848
7849 else
7850 {
7851 emit_move_insn (base_tmp, addr);
7852 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7853 }
7854
7855 /* If the address isn't valid, move the address into the temporary base
7856 register. Some reasons it could not be valid include:
7857
7858 The address offset overflowed the 16 or 34 bit offset size;
7859 We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
7860 We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
7861 Only X_FORM loads can be done, and the address is D_FORM. */
7862
7863 enum insn_form iform
7864 = address_to_insn_form (new_addr, scalar_mode,
7865 reg_to_non_prefixed (scalar_reg, scalar_mode));
7866
7867 if (iform == INSN_FORM_BAD)
7868 {
7869 emit_move_insn (base_tmp, new_addr);
7870 new_addr = base_tmp;
7871 }
7872
7873 return change_address (mem, scalar_mode, new_addr);
7874 }
7875
7876 /* Split a variable vec_extract operation into the component instructions. */
7877
7878 void
7879 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7880 rtx tmp_altivec)
7881 {
7882 machine_mode mode = GET_MODE (src);
7883 machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src));
7884 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7885 int byte_shift = exact_log2 (scalar_size);
7886
7887 gcc_assert (byte_shift >= 0);
7888
7889 /* If we are given a memory address, optimize to load just the element. We
7890 don't have to adjust the vector element number on little endian
7891 systems. */
7892 if (MEM_P (src))
7893 {
7894 emit_move_insn (dest,
7895 rs6000_adjust_vec_address (dest, src, element, tmp_gpr,
7896 scalar_mode));
7897 return;
7898 }
7899
7900 else if (REG_P (src) || SUBREG_P (src))
7901 {
7902 int num_elements = GET_MODE_NUNITS (mode);
7903 int bits_in_element = mode_to_bits (GET_MODE_INNER (mode));
7904 int bit_shift = 7 - exact_log2 (num_elements);
7905 rtx element2;
7906 unsigned int dest_regno = reg_or_subregno (dest);
7907 unsigned int src_regno = reg_or_subregno (src);
7908 unsigned int element_regno = reg_or_subregno (element);
7909
7910 gcc_assert (REG_P (tmp_gpr));
7911
7912 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7913 a general purpose register. */
7914 if (TARGET_P9_VECTOR
7915 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
7916 && INT_REGNO_P (dest_regno)
7917 && ALTIVEC_REGNO_P (src_regno)
7918 && INT_REGNO_P (element_regno))
7919 {
7920 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7921 rtx element_si = gen_rtx_REG (SImode, element_regno);
7922
7923 if (mode == V16QImode)
7924 emit_insn (BYTES_BIG_ENDIAN
7925 ? gen_vextublx (dest_si, element_si, src)
7926 : gen_vextubrx (dest_si, element_si, src));
7927
7928 else if (mode == V8HImode)
7929 {
7930 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7931 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7932 emit_insn (BYTES_BIG_ENDIAN
7933 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7934 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7935 }
7936
7937
7938 else
7939 {
7940 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7941 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7942 emit_insn (BYTES_BIG_ENDIAN
7943 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7944 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7945 }
7946
7947 return;
7948 }
7949
7950
7951 gcc_assert (REG_P (tmp_altivec));
7952
7953 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7954 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7955 will shift the element into the upper position (adding 3 to convert a
7956 byte shift into a bit shift). */
7957 if (scalar_size == 8)
7958 {
7959 if (!BYTES_BIG_ENDIAN)
7960 {
7961 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7962 element2 = tmp_gpr;
7963 }
7964 else
7965 element2 = element;
7966
7967 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7968 bit. */
7969 emit_insn (gen_rtx_SET (tmp_gpr,
7970 gen_rtx_AND (DImode,
7971 gen_rtx_ASHIFT (DImode,
7972 element2,
7973 GEN_INT (6)),
7974 GEN_INT (64))));
7975 }
7976 else
7977 {
7978 if (!BYTES_BIG_ENDIAN)
7979 {
7980 rtx num_ele_m1 = GEN_INT (num_elements - 1);
7981
7982 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7983 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7984 element2 = tmp_gpr;
7985 }
7986 else
7987 element2 = element;
7988
7989 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7990 }
7991
7992 /* Get the value into the lower byte of the Altivec register where VSLO
7993 expects it. */
7994 if (TARGET_P9_VECTOR)
7995 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7996 else if (can_create_pseudo_p ())
7997 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7998 else
7999 {
8000 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8001 emit_move_insn (tmp_di, tmp_gpr);
8002 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
8003 }
8004
8005 /* Do the VSLO to get the value into the final location. */
8006 switch (mode)
8007 {
8008 case E_V2DFmode:
8009 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
8010 return;
8011
8012 case E_V2DImode:
8013 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
8014 return;
8015
8016 case E_V4SFmode:
8017 {
8018 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8019 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
8020 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
8021 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
8022 tmp_altivec));
8023
8024 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
8025 return;
8026 }
8027
8028 case E_V4SImode:
8029 case E_V8HImode:
8030 case E_V16QImode:
8031 {
8032 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8033 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
8034 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
8035 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
8036 tmp_altivec));
8037 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
8038 emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di,
8039 GEN_INT (64 - bits_in_element)));
8040 return;
8041 }
8042
8043 default:
8044 gcc_unreachable ();
8045 }
8046
8047 return;
8048 }
8049 else
8050 gcc_unreachable ();
8051 }
8052
8053 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
8054 selects whether the alignment is abi mandated, optional, or
8055 both abi and optional alignment. */
8056
8057 unsigned int
8058 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
8059 {
8060 if (how != align_opt)
8061 {
8062 if (TREE_CODE (type) == VECTOR_TYPE && align < 128)
8063 align = 128;
8064 }
8065
8066 if (how != align_abi)
8067 {
8068 if (TREE_CODE (type) == ARRAY_TYPE
8069 && TYPE_MODE (TREE_TYPE (type)) == QImode)
8070 {
8071 if (align < BITS_PER_WORD)
8072 align = BITS_PER_WORD;
8073 }
8074 }
8075
8076 return align;
8077 }
8078
8079 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
8080 instructions simply ignore the low bits; VSX memory instructions
8081 are aligned to 4 or 8 bytes. */
8082
8083 static bool
8084 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
8085 {
8086 return (STRICT_ALIGNMENT
8087 || (!TARGET_EFFICIENT_UNALIGNED_VSX
8088 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
8089 || ((VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode))
8090 && (int) align < VECTOR_ALIGN (mode)))));
8091 }
8092
8093 /* AIX word-aligns FP doubles but doubleword-aligns 64-bit ints. */
8094
8095 unsigned int
8096 rs6000_special_adjust_field_align (tree type, unsigned int computed)
8097 {
8098 if (computed <= 32 || TYPE_PACKED (type))
8099 return computed;
8100
8101 /* Strip initial arrays. */
8102 while (TREE_CODE (type) == ARRAY_TYPE)
8103 type = TREE_TYPE (type);
8104
8105 /* If RECORD or UNION, recursively find the first field. */
8106 while (AGGREGATE_TYPE_P (type))
8107 {
8108 tree field = TYPE_FIELDS (type);
8109
8110 /* Skip all non field decls */
8111 while (field != NULL
8112 && (TREE_CODE (field) != FIELD_DECL
8113 || DECL_FIELD_ABI_IGNORED (field)))
8114 field = DECL_CHAIN (field);
8115
8116 if (! field)
8117 break;
8118
8119 /* A packed field does not contribute any extra alignment. */
8120 if (DECL_PACKED (field))
8121 return computed;
8122
8123 type = TREE_TYPE (field);
8124
8125 /* Strip arrays. */
8126 while (TREE_CODE (type) == ARRAY_TYPE)
8127 type = TREE_TYPE (type);
8128 }
8129
8130 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node
8131 && (TYPE_MODE (type) == DFmode || TYPE_MODE (type) == DCmode))
8132 computed = MIN (computed, 32);
8133
8134 return computed;
8135 }
8136
8137 /* AIX increases natural record alignment to doubleword if the innermost first
8138 field is an FP double while the FP fields remain word aligned.
8139 Only called if TYPE initially is a RECORD or UNION. */
8140
8141 unsigned int
8142 rs6000_special_round_type_align (tree type, unsigned int computed,
8143 unsigned int specified)
8144 {
8145 unsigned int align = MAX (computed, specified);
8146
8147 if (TYPE_PACKED (type) || align >= 64)
8148 return align;
8149
8150 /* If RECORD or UNION, recursively find the first field. */
8151 do
8152 {
8153 tree field = TYPE_FIELDS (type);
8154
8155 /* Skip all non field decls */
8156 while (field != NULL
8157 && (TREE_CODE (field) != FIELD_DECL
8158 || DECL_FIELD_ABI_IGNORED (field)))
8159 field = DECL_CHAIN (field);
8160
8161 if (! field)
8162 break;
8163
8164 /* A packed field does not contribute any extra alignment. */
8165 if (DECL_PACKED (field))
8166 return align;
8167
8168 type = TREE_TYPE (field);
8169
8170 /* Strip arrays. */
8171 while (TREE_CODE (type) == ARRAY_TYPE)
8172 type = TREE_TYPE (type);
8173 } while (AGGREGATE_TYPE_P (type));
8174
8175 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node
8176 && (TYPE_MODE (type) == DFmode || TYPE_MODE (type) == DCmode))
8177 align = MAX (align, 64);
8178
8179 return align;
8180 }
8181
8182 /* Darwin increases record alignment to the natural alignment of
8183 the first field. */
8184
8185 unsigned int
8186 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
8187 unsigned int specified)
8188 {
8189 unsigned int align = MAX (computed, specified);
8190
8191 if (TYPE_PACKED (type))
8192 return align;
8193
8194 /* Find the first field, looking down into aggregates. */
8195 do {
8196 tree field = TYPE_FIELDS (type);
8197 /* Skip all non field decls */
8198 while (field != NULL
8199 && (TREE_CODE (field) != FIELD_DECL
8200 || DECL_FIELD_ABI_IGNORED (field)))
8201 field = DECL_CHAIN (field);
8202 if (! field)
8203 break;
8204 /* A packed field does not contribute any extra alignment. */
8205 if (DECL_PACKED (field))
8206 return align;
8207 type = TREE_TYPE (field);
8208 while (TREE_CODE (type) == ARRAY_TYPE)
8209 type = TREE_TYPE (type);
8210 } while (AGGREGATE_TYPE_P (type));
8211
8212 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
8213 align = MAX (align, TYPE_ALIGN (type));
8214
8215 return align;
8216 }
8217
8218 /* Return 1 for an operand in small memory on V.4/eabi. */
8219
8220 int
8221 small_data_operand (rtx op ATTRIBUTE_UNUSED,
8222 machine_mode mode ATTRIBUTE_UNUSED)
8223 {
8224 #if TARGET_ELF
8225 rtx sym_ref;
8226
8227 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
8228 return 0;
8229
8230 if (DEFAULT_ABI != ABI_V4)
8231 return 0;
8232
8233 if (SYMBOL_REF_P (op))
8234 sym_ref = op;
8235
8236 else if (GET_CODE (op) != CONST
8237 || GET_CODE (XEXP (op, 0)) != PLUS
8238 || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0))
8239 || !CONST_INT_P (XEXP (XEXP (op, 0), 1)))
8240 return 0;
8241
8242 else
8243 {
8244 rtx sum = XEXP (op, 0);
8245 HOST_WIDE_INT summand;
8246
8247 /* We have to be careful here, because it is the referenced address
8248 that must be 32k from _SDA_BASE_, not just the symbol. */
8249 summand = INTVAL (XEXP (sum, 1));
8250 if (summand < 0 || summand > g_switch_value)
8251 return 0;
8252
8253 sym_ref = XEXP (sum, 0);
8254 }
8255
8256 return SYMBOL_REF_SMALL_P (sym_ref);
8257 #else
8258 return 0;
8259 #endif
8260 }
8261
8262 /* Return true if either operand is a general purpose register. */
8263
8264 bool
8265 gpr_or_gpr_p (rtx op0, rtx op1)
8266 {
8267 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
8268 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
8269 }
8270
8271 /* Return true if this is a move direct operation between GPR registers and
8272 floating point/VSX registers. */
8273
8274 bool
8275 direct_move_p (rtx op0, rtx op1)
8276 {
8277 if (!REG_P (op0) || !REG_P (op1))
8278 return false;
8279
8280 if (!TARGET_DIRECT_MOVE)
8281 return false;
8282
8283 int regno0 = REGNO (op0);
8284 int regno1 = REGNO (op1);
8285 if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1))
8286 return false;
8287
8288 if (INT_REGNO_P (regno0) && VSX_REGNO_P (regno1))
8289 return true;
8290
8291 if (VSX_REGNO_P (regno0) && INT_REGNO_P (regno1))
8292 return true;
8293
8294 return false;
8295 }
8296
8297 /* Return true if the ADDR is an acceptable address for a quad memory
8298 operation of mode MODE (either LQ/STQ for general purpose registers, or
8299 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
8300 is intended for LQ/STQ. If it is false, the address is intended for the ISA
8301 3.0 LXV/STXV instruction. */
8302
8303 bool
8304 quad_address_p (rtx addr, machine_mode mode, bool strict)
8305 {
8306 rtx op0, op1;
8307
8308 if (GET_MODE_SIZE (mode) < 16)
8309 return false;
8310
8311 if (legitimate_indirect_address_p (addr, strict))
8312 return true;
8313
8314 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
8315 return false;
8316
8317 /* Is this a valid prefixed address? If the bottom four bits of the offset
8318 are non-zero, we could use a prefixed instruction (which does not have the
8319 DQ-form constraint that the traditional instruction had) instead of
8320 forcing the unaligned offset to a GPR. */
8321 if (address_is_prefixed (addr, mode, NON_PREFIXED_DQ))
8322 return true;
8323
8324 if (GET_CODE (addr) != PLUS)
8325 return false;
8326
8327 op0 = XEXP (addr, 0);
8328 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
8329 return false;
8330
8331 op1 = XEXP (addr, 1);
8332 if (!CONST_INT_P (op1))
8333 return false;
8334
8335 return quad_address_offset_p (INTVAL (op1));
8336 }
8337
8338 /* Return true if this is a load or store quad operation. This function does
8339 not handle the atomic quad memory instructions. */
8340
8341 bool
8342 quad_load_store_p (rtx op0, rtx op1)
8343 {
8344 bool ret;
8345
8346 if (!TARGET_QUAD_MEMORY)
8347 ret = false;
8348
8349 else if (REG_P (op0) && MEM_P (op1))
8350 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
8351 && quad_memory_operand (op1, GET_MODE (op1))
8352 && !reg_overlap_mentioned_p (op0, op1));
8353
8354 else if (MEM_P (op0) && REG_P (op1))
8355 ret = (quad_memory_operand (op0, GET_MODE (op0))
8356 && quad_int_reg_operand (op1, GET_MODE (op1)));
8357
8358 else
8359 ret = false;
8360
8361 if (TARGET_DEBUG_ADDR)
8362 {
8363 fprintf (stderr, "\n========== quad_load_store, return %s\n",
8364 ret ? "true" : "false");
8365 debug_rtx (gen_rtx_SET (op0, op1));
8366 }
8367
8368 return ret;
8369 }
8370
8371 /* Given an address, return a constant offset term if one exists. */
8372
8373 static rtx
8374 address_offset (rtx op)
8375 {
8376 if (GET_CODE (op) == PRE_INC
8377 || GET_CODE (op) == PRE_DEC)
8378 op = XEXP (op, 0);
8379 else if (GET_CODE (op) == PRE_MODIFY
8380 || GET_CODE (op) == LO_SUM)
8381 op = XEXP (op, 1);
8382
8383 if (GET_CODE (op) == CONST)
8384 op = XEXP (op, 0);
8385
8386 if (GET_CODE (op) == PLUS)
8387 op = XEXP (op, 1);
8388
8389 if (CONST_INT_P (op))
8390 return op;
8391
8392 return NULL_RTX;
8393 }
8394
8395 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
8396 the mode. If we can't find (or don't know) the alignment of the symbol
8397 we assume (optimistically) that it's sufficiently aligned [??? maybe we
8398 should be pessimistic]. Offsets are validated in the same way as for
8399 reg + offset. */
8400 static bool
8401 darwin_rs6000_legitimate_lo_sum_const_p (rtx x, machine_mode mode)
8402 {
8403 /* We should not get here with this. */
8404 gcc_checking_assert (! mode_supports_dq_form (mode));
8405
8406 if (GET_CODE (x) == CONST)
8407 x = XEXP (x, 0);
8408
8409 /* If we are building PIC code, then any symbol must be wrapped in an
8410 UNSPEC_MACHOPIC_OFFSET so that it will get the picbase subtracted. */
8411 bool machopic_offs_p = false;
8412 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
8413 {
8414 x = XVECEXP (x, 0, 0);
8415 machopic_offs_p = true;
8416 }
8417
8418 rtx sym = NULL_RTX;
8419 unsigned HOST_WIDE_INT offset = 0;
8420
8421 if (GET_CODE (x) == PLUS)
8422 {
8423 sym = XEXP (x, 0);
8424 if (! SYMBOL_REF_P (sym))
8425 return false;
8426 if (!CONST_INT_P (XEXP (x, 1)))
8427 return false;
8428 offset = INTVAL (XEXP (x, 1));
8429 }
8430 else if (SYMBOL_REF_P (x))
8431 sym = x;
8432 else if (CONST_INT_P (x))
8433 offset = INTVAL (x);
8434 else if (GET_CODE (x) == LABEL_REF)
8435 offset = 0; // We assume code labels are Pmode aligned
8436 else
8437 return false; // not sure what we have here.
8438
8439 /* If we don't know the alignment of the thing to which the symbol refers,
8440 we assume optimistically it is "enough".
8441 ??? maybe we should be pessimistic instead. */
8442 unsigned align = 0;
8443
8444 if (sym)
8445 {
8446 tree decl = SYMBOL_REF_DECL (sym);
8447 /* As noted above, PIC code cannot use a bare SYMBOL_REF. */
8448 if (TARGET_MACHO && flag_pic && !machopic_offs_p)
8449 return false;
8450 #if TARGET_MACHO
8451 if (MACHO_SYMBOL_INDIRECTION_P (sym))
8452 /* The decl in an indirection symbol is the original one, which might
8453 be less aligned than the indirection. Our indirections are always
8454 pointer-aligned. */
8455 ;
8456 else
8457 #endif
8458 if (decl && DECL_ALIGN (decl))
8459 align = DECL_ALIGN_UNIT (decl);
8460 }
8461
8462 unsigned int extra = 0;
8463 switch (mode)
8464 {
8465 case E_DFmode:
8466 case E_DDmode:
8467 case E_DImode:
8468 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8469 addressing. */
8470 if (VECTOR_MEM_VSX_P (mode))
8471 return false;
8472
8473 if (!TARGET_POWERPC64)
8474 extra = 4;
8475 else if ((offset & 3) || (align & 3))
8476 return false;
8477 break;
8478
8479 case E_TFmode:
8480 case E_IFmode:
8481 case E_KFmode:
8482 case E_TDmode:
8483 case E_TImode:
8484 case E_PTImode:
8485 extra = 8;
8486 if (!TARGET_POWERPC64)
8487 extra = 12;
8488 else if ((offset & 3) || (align & 3))
8489 return false;
8490 break;
8491
8492 default:
8493 break;
8494 }
8495
8496 /* We only care if the access(es) would cause a change to the high part. */
8497 offset = sext_hwi (offset, 16);
8498 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8499 }
8500
8501 /* Return true if the MEM operand is a memory operand suitable for use
8502 with a (full width, possibly multiple) gpr load/store. On
8503 powerpc64 this means the offset must be divisible by 4.
8504 Implements 'Y' constraint.
8505
8506 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8507 a constraint function we know the operand has satisfied a suitable
8508 memory predicate.
8509
8510 Offsetting a lo_sum should not be allowed, except where we know by
8511 alignment that a 32k boundary is not crossed. Note that by
8512 "offsetting" here we mean a further offset to access parts of the
8513 MEM. It's fine to have a lo_sum where the inner address is offset
8514 from a sym, since the same sym+offset will appear in the high part
8515 of the address calculation. */
8516
8517 bool
8518 mem_operand_gpr (rtx op, machine_mode mode)
8519 {
8520 unsigned HOST_WIDE_INT offset;
8521 int extra;
8522 rtx addr = XEXP (op, 0);
8523
8524 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
8525 if (TARGET_UPDATE
8526 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
8527 && mode_supports_pre_incdec_p (mode)
8528 && legitimate_indirect_address_p (XEXP (addr, 0), false))
8529 return true;
8530
8531 /* Allow prefixed instructions if supported. If the bottom two bits of the
8532 offset are non-zero, we could use a prefixed instruction (which does not
8533 have the DS-form constraint that the traditional instruction had) instead
8534 of forcing the unaligned offset to a GPR. */
8535 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
8536 return true;
8537
8538 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
8539 really OK. Doing this early avoids teaching all the other machinery
8540 about them. */
8541 if (TARGET_MACHO && GET_CODE (addr) == LO_SUM)
8542 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr, 1), mode);
8543
8544 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
8545 if (!rs6000_offsettable_memref_p (op, mode, false))
8546 return false;
8547
8548 op = address_offset (addr);
8549 if (op == NULL_RTX)
8550 return true;
8551
8552 offset = INTVAL (op);
8553 if (TARGET_POWERPC64 && (offset & 3) != 0)
8554 return false;
8555
8556 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8557 if (extra < 0)
8558 extra = 0;
8559
8560 if (GET_CODE (addr) == LO_SUM)
8561 /* For lo_sum addresses, we must allow any offset except one that
8562 causes a wrap, so test only the low 16 bits. */
8563 offset = sext_hwi (offset, 16);
8564
8565 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8566 }
8567
8568 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8569 enforce an offset divisible by 4 even for 32-bit. */
8570
8571 bool
8572 mem_operand_ds_form (rtx op, machine_mode mode)
8573 {
8574 unsigned HOST_WIDE_INT offset;
8575 int extra;
8576 rtx addr = XEXP (op, 0);
8577
8578 /* Allow prefixed instructions if supported. If the bottom two bits of the
8579 offset are non-zero, we could use a prefixed instruction (which does not
8580 have the DS-form constraint that the traditional instruction had) instead
8581 of forcing the unaligned offset to a GPR. */
8582 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
8583 return true;
8584
8585 if (!offsettable_address_p (false, mode, addr))
8586 return false;
8587
8588 op = address_offset (addr);
8589 if (op == NULL_RTX)
8590 return true;
8591
8592 offset = INTVAL (op);
8593 if ((offset & 3) != 0)
8594 return false;
8595
8596 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8597 if (extra < 0)
8598 extra = 0;
8599
8600 if (GET_CODE (addr) == LO_SUM)
8601 /* For lo_sum addresses, we must allow any offset except one that
8602 causes a wrap, so test only the low 16 bits. */
8603 offset = sext_hwi (offset, 16);
8604
8605 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8606 }
8607 \f
8608 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8609
8610 static bool
8611 reg_offset_addressing_ok_p (machine_mode mode)
8612 {
8613 switch (mode)
8614 {
8615 case E_V16QImode:
8616 case E_V8HImode:
8617 case E_V4SFmode:
8618 case E_V4SImode:
8619 case E_V2DFmode:
8620 case E_V2DImode:
8621 case E_V1TImode:
8622 case E_TImode:
8623 case E_TFmode:
8624 case E_KFmode:
8625 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8626 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8627 a vector mode, if we want to use the VSX registers to move it around,
8628 we need to restrict ourselves to reg+reg addressing. Similarly for
8629 IEEE 128-bit floating point that is passed in a single vector
8630 register. */
8631 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
8632 return mode_supports_dq_form (mode);
8633 break;
8634
8635 /* The vector pair/quad types support offset addressing if the
8636 underlying vectors support offset addressing. */
8637 case E_OOmode:
8638 case E_XOmode:
8639 return TARGET_MMA;
8640
8641 case E_SDmode:
8642 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8643 addressing for the LFIWZX and STFIWX instructions. */
8644 if (TARGET_NO_SDMODE_STACK)
8645 return false;
8646 break;
8647
8648 default:
8649 break;
8650 }
8651
8652 return true;
8653 }
8654
8655 static bool
8656 virtual_stack_registers_memory_p (rtx op)
8657 {
8658 int regnum;
8659
8660 if (REG_P (op))
8661 regnum = REGNO (op);
8662
8663 else if (GET_CODE (op) == PLUS
8664 && REG_P (XEXP (op, 0))
8665 && CONST_INT_P (XEXP (op, 1)))
8666 regnum = REGNO (XEXP (op, 0));
8667
8668 else
8669 return false;
8670
8671 return (regnum >= FIRST_VIRTUAL_REGISTER
8672 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
8673 }
8674
8675 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8676 is known to not straddle a 32k boundary. This function is used
8677 to determine whether -mcmodel=medium code can use TOC pointer
8678 relative addressing for OP. This means the alignment of the TOC
8679 pointer must also be taken into account, and unfortunately that is
8680 only 8 bytes. */
8681
8682 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8683 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8684 #endif
8685
8686 static bool
8687 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
8688 machine_mode mode)
8689 {
8690 tree decl;
8691 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
8692
8693 if (!SYMBOL_REF_P (op))
8694 return false;
8695
8696 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8697 SYMBOL_REF. */
8698 if (mode_supports_dq_form (mode))
8699 return false;
8700
8701 dsize = GET_MODE_SIZE (mode);
8702 decl = SYMBOL_REF_DECL (op);
8703 if (!decl)
8704 {
8705 if (dsize == 0)
8706 return false;
8707
8708 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8709 replacing memory addresses with an anchor plus offset. We
8710 could find the decl by rummaging around in the block->objects
8711 VEC for the given offset but that seems like too much work. */
8712 dalign = BITS_PER_UNIT;
8713 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
8714 && SYMBOL_REF_ANCHOR_P (op)
8715 && SYMBOL_REF_BLOCK (op) != NULL)
8716 {
8717 struct object_block *block = SYMBOL_REF_BLOCK (op);
8718
8719 dalign = block->alignment;
8720 offset += SYMBOL_REF_BLOCK_OFFSET (op);
8721 }
8722 else if (CONSTANT_POOL_ADDRESS_P (op))
8723 {
8724 /* It would be nice to have get_pool_align().. */
8725 machine_mode cmode = get_pool_mode (op);
8726
8727 dalign = GET_MODE_ALIGNMENT (cmode);
8728 }
8729 }
8730 else if (DECL_P (decl))
8731 {
8732 dalign = DECL_ALIGN (decl);
8733
8734 if (dsize == 0)
8735 {
8736 /* Allow BLKmode when the entire object is known to not
8737 cross a 32k boundary. */
8738 if (!DECL_SIZE_UNIT (decl))
8739 return false;
8740
8741 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
8742 return false;
8743
8744 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
8745 if (dsize > 32768)
8746 return false;
8747
8748 dalign /= BITS_PER_UNIT;
8749 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8750 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8751 return dalign >= dsize;
8752 }
8753 }
8754 else
8755 gcc_unreachable ();
8756
8757 /* Find how many bits of the alignment we know for this access. */
8758 dalign /= BITS_PER_UNIT;
8759 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8760 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8761 mask = dalign - 1;
8762 lsb = offset & -offset;
8763 mask &= lsb - 1;
8764 dalign = mask + 1;
8765
8766 return dalign >= dsize;
8767 }
8768
8769 static bool
8770 constant_pool_expr_p (rtx op)
8771 {
8772 rtx base, offset;
8773
8774 split_const (op, &base, &offset);
8775 return (SYMBOL_REF_P (base)
8776 && CONSTANT_POOL_ADDRESS_P (base)
8777 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
8778 }
8779
8780 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
8781 use that as the register to put the HIGH value into if register allocation
8782 is already done. */
8783
8784 rtx
8785 create_TOC_reference (rtx symbol, rtx largetoc_reg)
8786 {
8787 rtx tocrel, tocreg, hi;
8788
8789 gcc_assert (TARGET_TOC);
8790
8791 if (TARGET_DEBUG_ADDR)
8792 {
8793 if (SYMBOL_REF_P (symbol))
8794 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
8795 XSTR (symbol, 0));
8796 else
8797 {
8798 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
8799 GET_RTX_NAME (GET_CODE (symbol)));
8800 debug_rtx (symbol);
8801 }
8802 }
8803
8804 if (!can_create_pseudo_p ())
8805 df_set_regs_ever_live (TOC_REGISTER, true);
8806
8807 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
8808 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
8809 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
8810 return tocrel;
8811
8812 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
8813 if (largetoc_reg != NULL)
8814 {
8815 emit_move_insn (largetoc_reg, hi);
8816 hi = largetoc_reg;
8817 }
8818 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
8819 }
8820
8821 /* These are only used to pass through from print_operand/print_operand_address
8822 to rs6000_output_addr_const_extra over the intervening function
8823 output_addr_const which is not target code. */
8824 static const_rtx tocrel_base_oac, tocrel_offset_oac;
8825
8826 /* Return true if OP is a toc pointer relative address (the output
8827 of create_TOC_reference). If STRICT, do not match non-split
8828 -mcmodel=large/medium toc pointer relative addresses. If the pointers
8829 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
8830 TOCREL_OFFSET_RET respectively. */
8831
8832 bool
8833 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
8834 const_rtx *tocrel_offset_ret)
8835 {
8836 if (!TARGET_TOC)
8837 return false;
8838
8839 if (TARGET_CMODEL != CMODEL_SMALL)
8840 {
8841 /* When strict ensure we have everything tidy. */
8842 if (strict
8843 && !(GET_CODE (op) == LO_SUM
8844 && REG_P (XEXP (op, 0))
8845 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8846 return false;
8847
8848 /* When not strict, allow non-split TOC addresses and also allow
8849 (lo_sum (high ..)) TOC addresses created during reload. */
8850 if (GET_CODE (op) == LO_SUM)
8851 op = XEXP (op, 1);
8852 }
8853
8854 const_rtx tocrel_base = op;
8855 const_rtx tocrel_offset = const0_rtx;
8856
8857 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8858 {
8859 tocrel_base = XEXP (op, 0);
8860 tocrel_offset = XEXP (op, 1);
8861 }
8862
8863 if (tocrel_base_ret)
8864 *tocrel_base_ret = tocrel_base;
8865 if (tocrel_offset_ret)
8866 *tocrel_offset_ret = tocrel_offset;
8867
8868 return (GET_CODE (tocrel_base) == UNSPEC
8869 && XINT (tocrel_base, 1) == UNSPEC_TOCREL
8870 && REG_P (XVECEXP (tocrel_base, 0, 1))
8871 && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER);
8872 }
8873
8874 /* Return true if X is a constant pool address, and also for cmodel=medium
8875 if X is a toc-relative address known to be offsettable within MODE. */
8876
8877 bool
8878 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
8879 bool strict)
8880 {
8881 const_rtx tocrel_base, tocrel_offset;
8882 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
8883 && (TARGET_CMODEL != CMODEL_MEDIUM
8884 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
8885 || mode == QImode
8886 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
8887 INTVAL (tocrel_offset), mode)));
8888 }
8889
8890 static bool
8891 legitimate_small_data_p (machine_mode mode, rtx x)
8892 {
8893 return (DEFAULT_ABI == ABI_V4
8894 && !flag_pic && !TARGET_TOC
8895 && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST)
8896 && small_data_operand (x, mode));
8897 }
8898
8899 bool
8900 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
8901 bool strict, bool worst_case)
8902 {
8903 unsigned HOST_WIDE_INT offset;
8904 unsigned int extra;
8905
8906 if (GET_CODE (x) != PLUS)
8907 return false;
8908 if (!REG_P (XEXP (x, 0)))
8909 return false;
8910 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8911 return false;
8912 if (mode_supports_dq_form (mode))
8913 return quad_address_p (x, mode, strict);
8914 if (!reg_offset_addressing_ok_p (mode))
8915 return virtual_stack_registers_memory_p (x);
8916 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
8917 return true;
8918 if (!CONST_INT_P (XEXP (x, 1)))
8919 return false;
8920
8921 offset = INTVAL (XEXP (x, 1));
8922 extra = 0;
8923 switch (mode)
8924 {
8925 case E_DFmode:
8926 case E_DDmode:
8927 case E_DImode:
8928 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8929 addressing. */
8930 if (VECTOR_MEM_VSX_P (mode))
8931 return false;
8932
8933 if (!worst_case)
8934 break;
8935 if (!TARGET_POWERPC64)
8936 extra = 4;
8937 else if (offset & 3)
8938 return false;
8939 break;
8940
8941 case E_TFmode:
8942 case E_IFmode:
8943 case E_KFmode:
8944 case E_TDmode:
8945 case E_TImode:
8946 case E_PTImode:
8947 extra = 8;
8948 if (!worst_case)
8949 break;
8950 if (!TARGET_POWERPC64)
8951 extra = 12;
8952 else if (offset & 3)
8953 return false;
8954 break;
8955
8956 default:
8957 break;
8958 }
8959
8960 if (TARGET_PREFIXED)
8961 return SIGNED_34BIT_OFFSET_EXTRA_P (offset, extra);
8962 else
8963 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8964 }
8965
8966 bool
8967 legitimate_indexed_address_p (rtx x, int strict)
8968 {
8969 rtx op0, op1;
8970
8971 if (GET_CODE (x) != PLUS)
8972 return false;
8973
8974 op0 = XEXP (x, 0);
8975 op1 = XEXP (x, 1);
8976
8977 return (REG_P (op0) && REG_P (op1)
8978 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
8979 && INT_REG_OK_FOR_INDEX_P (op1, strict))
8980 || (INT_REG_OK_FOR_BASE_P (op1, strict)
8981 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
8982 }
8983
8984 bool
8985 avoiding_indexed_address_p (machine_mode mode)
8986 {
8987 unsigned int msize = GET_MODE_SIZE (mode);
8988
8989 /* Avoid indexed addressing for modes that have non-indexed load/store
8990 instruction forms. On power10, vector pairs have an indexed
8991 form, but vector quads don't. */
8992 if (msize > 16)
8993 return msize != 32;
8994
8995 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
8996 }
8997
8998 bool
8999 legitimate_indirect_address_p (rtx x, int strict)
9000 {
9001 return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict);
9002 }
9003
9004 bool
9005 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
9006 {
9007 if (!TARGET_MACHO || !flag_pic
9008 || mode != SImode || !MEM_P (x))
9009 return false;
9010 x = XEXP (x, 0);
9011
9012 if (GET_CODE (x) != LO_SUM)
9013 return false;
9014 if (!REG_P (XEXP (x, 0)))
9015 return false;
9016 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
9017 return false;
9018 x = XEXP (x, 1);
9019
9020 return CONSTANT_P (x);
9021 }
9022
9023 static bool
9024 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
9025 {
9026 if (GET_CODE (x) != LO_SUM)
9027 return false;
9028 if (!REG_P (XEXP (x, 0)))
9029 return false;
9030 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
9031 return false;
9032 /* quad word addresses are restricted, and we can't use LO_SUM. */
9033 if (mode_supports_dq_form (mode))
9034 return false;
9035 x = XEXP (x, 1);
9036
9037 if (TARGET_ELF)
9038 {
9039 bool large_toc_ok;
9040
9041 if (DEFAULT_ABI == ABI_V4 && flag_pic)
9042 return false;
9043 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
9044 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
9045 recognizes some LO_SUM addresses as valid although this
9046 function says opposite. In most cases, LRA through different
9047 transformations can generate correct code for address reloads.
9048 It cannot manage only some LO_SUM cases. So we need to add
9049 code here saying that some addresses are still valid. */
9050 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
9051 && small_toc_ref (x, VOIDmode));
9052 if (TARGET_TOC && ! large_toc_ok)
9053 return false;
9054 if (GET_MODE_NUNITS (mode) != 1)
9055 return false;
9056 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
9057 && !(/* ??? Assume floating point reg based on mode? */
9058 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
9059 return false;
9060
9061 return CONSTANT_P (x) || large_toc_ok;
9062 }
9063 else if (TARGET_MACHO)
9064 {
9065 if (GET_MODE_NUNITS (mode) != 1)
9066 return false;
9067 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
9068 && !(/* see above */
9069 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
9070 return false;
9071 #if TARGET_MACHO
9072 if (MACHO_DYNAMIC_NO_PIC_P || !flag_pic)
9073 return CONSTANT_P (x);
9074 #endif
9075 /* Macho-O PIC code from here. */
9076 if (GET_CODE (x) == CONST)
9077 x = XEXP (x, 0);
9078
9079 /* SYMBOL_REFs need to be wrapped in an UNSPEC_MACHOPIC_OFFSET. */
9080 if (SYMBOL_REF_P (x))
9081 return false;
9082
9083 /* So this is OK if the wrapped object is const. */
9084 if (GET_CODE (x) == UNSPEC
9085 && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
9086 return CONSTANT_P (XVECEXP (x, 0, 0));
9087 return CONSTANT_P (x);
9088 }
9089 return false;
9090 }
9091
9092
9093 /* Try machine-dependent ways of modifying an illegitimate address
9094 to be legitimate. If we find one, return the new, valid address.
9095 This is used from only one place: `memory_address' in explow.cc.
9096
9097 OLDX is the address as it was before break_out_memory_refs was
9098 called. In some cases it is useful to look at this to decide what
9099 needs to be done.
9100
9101 It is always safe for this function to do nothing. It exists to
9102 recognize opportunities to optimize the output.
9103
9104 On RS/6000, first check for the sum of a register with a constant
9105 integer that is out of range. If so, generate code to add the
9106 constant with the low-order 16 bits masked to the register and force
9107 this result into another register (this can be done with `cau').
9108 Then generate an address of REG+(CONST&0xffff), allowing for the
9109 possibility of bit 16 being a one.
9110
9111 Then check for the sum of a register and something not constant, try to
9112 load the other things into a register and return the sum. */
9113
9114 static rtx
9115 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
9116 machine_mode mode)
9117 {
9118 unsigned int extra;
9119
9120 if (!reg_offset_addressing_ok_p (mode)
9121 || mode_supports_dq_form (mode))
9122 {
9123 if (virtual_stack_registers_memory_p (x))
9124 return x;
9125
9126 /* In theory we should not be seeing addresses of the form reg+0,
9127 but just in case it is generated, optimize it away. */
9128 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
9129 return force_reg (Pmode, XEXP (x, 0));
9130
9131 /* For TImode with load/store quad, restrict addresses to just a single
9132 pointer, so it works with both GPRs and VSX registers. */
9133 /* Make sure both operands are registers. */
9134 else if (GET_CODE (x) == PLUS
9135 && (mode != TImode || !TARGET_VSX))
9136 return gen_rtx_PLUS (Pmode,
9137 force_reg (Pmode, XEXP (x, 0)),
9138 force_reg (Pmode, XEXP (x, 1)));
9139 else
9140 return force_reg (Pmode, x);
9141 }
9142 if (SYMBOL_REF_P (x) && !TARGET_MACHO)
9143 {
9144 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
9145 if (model != 0)
9146 return rs6000_legitimize_tls_address (x, model);
9147 }
9148
9149 extra = 0;
9150 switch (mode)
9151 {
9152 case E_TFmode:
9153 case E_TDmode:
9154 case E_TImode:
9155 case E_PTImode:
9156 case E_IFmode:
9157 case E_KFmode:
9158 /* As in legitimate_offset_address_p we do not assume
9159 worst-case. The mode here is just a hint as to the registers
9160 used. A TImode is usually in gprs, but may actually be in
9161 fprs. Leave worst-case scenario for reload to handle via
9162 insn constraints. PTImode is only GPRs. */
9163 extra = 8;
9164 break;
9165 default:
9166 break;
9167 }
9168
9169 if (GET_CODE (x) == PLUS
9170 && REG_P (XEXP (x, 0))
9171 && CONST_INT_P (XEXP (x, 1))
9172 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
9173 >= 0x10000 - extra))
9174 {
9175 HOST_WIDE_INT high_int, low_int;
9176 rtx sum;
9177 low_int = sext_hwi (INTVAL (XEXP (x, 1)), 16);
9178 if (low_int >= 0x8000 - extra)
9179 low_int = 0;
9180 high_int = INTVAL (XEXP (x, 1)) - low_int;
9181 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
9182 gen_int_mode (high_int, Pmode)), 0);
9183 return plus_constant (Pmode, sum, low_int);
9184 }
9185 else if (GET_CODE (x) == PLUS
9186 && REG_P (XEXP (x, 0))
9187 && !CONST_INT_P (XEXP (x, 1))
9188 && GET_MODE_NUNITS (mode) == 1
9189 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9190 || (/* ??? Assume floating point reg based on mode? */
9191 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
9192 && !avoiding_indexed_address_p (mode))
9193 {
9194 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
9195 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
9196 }
9197 else if ((TARGET_ELF
9198 #if TARGET_MACHO
9199 || !MACHO_DYNAMIC_NO_PIC_P
9200 #endif
9201 )
9202 && TARGET_32BIT
9203 && TARGET_NO_TOC_OR_PCREL
9204 && !flag_pic
9205 && !CONST_INT_P (x)
9206 && !CONST_WIDE_INT_P (x)
9207 && !CONST_DOUBLE_P (x)
9208 && CONSTANT_P (x)
9209 && GET_MODE_NUNITS (mode) == 1
9210 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9211 || (/* ??? Assume floating point reg based on mode? */
9212 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
9213 {
9214 rtx reg = gen_reg_rtx (Pmode);
9215 if (TARGET_ELF)
9216 emit_insn (gen_elf_high (reg, x));
9217 else
9218 emit_insn (gen_macho_high (Pmode, reg, x));
9219 return gen_rtx_LO_SUM (Pmode, reg, x);
9220 }
9221 else if (TARGET_TOC
9222 && SYMBOL_REF_P (x)
9223 && constant_pool_expr_p (x)
9224 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
9225 return create_TOC_reference (x, NULL_RTX);
9226 else
9227 return x;
9228 }
9229
9230 /* Debug version of rs6000_legitimize_address. */
9231 static rtx
9232 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
9233 {
9234 rtx ret;
9235 rtx_insn *insns;
9236
9237 start_sequence ();
9238 ret = rs6000_legitimize_address (x, oldx, mode);
9239 insns = get_insns ();
9240 end_sequence ();
9241
9242 if (ret != x)
9243 {
9244 fprintf (stderr,
9245 "\nrs6000_legitimize_address: mode %s, old code %s, "
9246 "new code %s, modified\n",
9247 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
9248 GET_RTX_NAME (GET_CODE (ret)));
9249
9250 fprintf (stderr, "Original address:\n");
9251 debug_rtx (x);
9252
9253 fprintf (stderr, "oldx:\n");
9254 debug_rtx (oldx);
9255
9256 fprintf (stderr, "New address:\n");
9257 debug_rtx (ret);
9258
9259 if (insns)
9260 {
9261 fprintf (stderr, "Insns added:\n");
9262 debug_rtx_list (insns, 20);
9263 }
9264 }
9265 else
9266 {
9267 fprintf (stderr,
9268 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
9269 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
9270
9271 debug_rtx (x);
9272 }
9273
9274 if (insns)
9275 emit_insn (insns);
9276
9277 return ret;
9278 }
9279
9280 /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9281 We need to emit DTP-relative relocations. */
9282
9283 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
9284 static void
9285 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
9286 {
9287 switch (size)
9288 {
9289 case 4:
9290 fputs ("\t.long\t", file);
9291 break;
9292 case 8:
9293 fputs (DOUBLE_INT_ASM_OP, file);
9294 break;
9295 default:
9296 gcc_unreachable ();
9297 }
9298 output_addr_const (file, x);
9299 if (TARGET_ELF)
9300 fputs ("@dtprel+0x8000", file);
9301 }
9302
9303 /* Return true if X is a symbol that refers to real (rather than emulated)
9304 TLS. */
9305
9306 static bool
9307 rs6000_real_tls_symbol_ref_p (rtx x)
9308 {
9309 return (SYMBOL_REF_P (x)
9310 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
9311 }
9312
9313 /* In the name of slightly smaller debug output, and to cater to
9314 general assembler lossage, recognize various UNSPEC sequences
9315 and turn them back into a direct symbol reference. */
9316
9317 static rtx
9318 rs6000_delegitimize_address (rtx orig_x)
9319 {
9320 rtx x, y, offset;
9321
9322 /* UNSPEC_FUSION_GPR is created by the peephole2 for power8 fusion. It
9323 encodes loading up the high part of the address of a TOC reference along
9324 with a load of a GPR using the same base register used for the load. We
9325 return the original SYMBOL_REF.
9326
9327 (set (reg:INT1 <reg>
9328 (unspec:INT1 [<combined-address>] UNSPEC_FUSION_GPR)))
9329
9330 UNSPEC_PCREL_OPT_LD_ADDR is used by the power10 PCREL_OPT pass. These
9331 UNSPECs include the external SYMBOL_REF along with the value being loaded.
9332 We return the original SYMBOL_REF.
9333
9334 (parallel [(set (reg:DI <base-reg>)
9335 (unspec:DI [(symbol_ref <symbol>)
9336 (const_int <marker>)]
9337 UNSPEC_PCREL_OPT_LD_ADDR))
9338 (set (reg:DI <load-reg>)
9339 (unspec:DI [(const_int 0)]
9340 UNSPEC_PCREL_OPT_LD_DATA))])
9341
9342 UNSPEC_PCREL_OPT_LD_SAME_REG is an alternative that is used if the
9343 GPR being loaded is the same as the GPR used to hold the external address.
9344
9345 (set (reg:DI <base-reg>)
9346 (unspec:DI [(symbol_ref <symbol>)
9347 (const_int <marker>)]
9348 UNSPEC_PCREL_OPT_LD_SAME_REG))
9349
9350 UNSPEC_PCREL_OPT_ST_ADDR is used by the power10 PCREL_OPT pass. This
9351 UNSPEC include the external SYMBOL_REF along with the value being loaded.
9352 We return the original SYMBOL_REF.
9353
9354 (parallel [(set (reg:DI <base-reg>)
9355 (unspec:DI [(symbol_ref <symbol>)
9356 (const_int <marker>)]
9357 UNSPEC_PCREL_OPT_ST_ADDR))
9358 (use (reg <store-reg>))]) */
9359
9360 if (GET_CODE (orig_x) == UNSPEC)
9361 switch (XINT (orig_x, 1))
9362 {
9363 case UNSPEC_FUSION_GPR:
9364 case UNSPEC_PCREL_OPT_LD_ADDR:
9365 case UNSPEC_PCREL_OPT_LD_SAME_REG:
9366 case UNSPEC_PCREL_OPT_ST_ADDR:
9367 orig_x = XVECEXP (orig_x, 0, 0);
9368 break;
9369
9370 default:
9371 break;
9372 }
9373
9374 orig_x = delegitimize_mem_from_attrs (orig_x);
9375
9376 x = orig_x;
9377 if (MEM_P (x))
9378 x = XEXP (x, 0);
9379
9380 y = x;
9381 if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM)
9382 y = XEXP (y, 1);
9383
9384 offset = NULL_RTX;
9385 if (GET_CODE (y) == PLUS
9386 && GET_MODE (y) == Pmode
9387 && CONST_INT_P (XEXP (y, 1)))
9388 {
9389 offset = XEXP (y, 1);
9390 y = XEXP (y, 0);
9391 }
9392
9393 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL)
9394 {
9395 y = XVECEXP (y, 0, 0);
9396
9397 #ifdef HAVE_AS_TLS
9398 /* Do not associate thread-local symbols with the original
9399 constant pool symbol. */
9400 if (TARGET_XCOFF
9401 && SYMBOL_REF_P (y)
9402 && CONSTANT_POOL_ADDRESS_P (y)
9403 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
9404 return orig_x;
9405 #endif
9406
9407 if (offset != NULL_RTX)
9408 y = gen_rtx_PLUS (Pmode, y, offset);
9409 if (!MEM_P (orig_x))
9410 return y;
9411 else
9412 return replace_equiv_address_nv (orig_x, y);
9413 }
9414
9415 if (TARGET_MACHO
9416 && GET_CODE (orig_x) == LO_SUM
9417 && GET_CODE (XEXP (orig_x, 1)) == CONST)
9418 {
9419 y = XEXP (XEXP (orig_x, 1), 0);
9420 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
9421 return XVECEXP (y, 0, 0);
9422 }
9423
9424 return orig_x;
9425 }
9426
9427 /* Return true if X shouldn't be emitted into the debug info.
9428 The linker doesn't like .toc section references from
9429 .debug_* sections, so reject .toc section symbols. */
9430
9431 static bool
9432 rs6000_const_not_ok_for_debug_p (rtx x)
9433 {
9434 if (GET_CODE (x) == UNSPEC)
9435 return true;
9436 if (SYMBOL_REF_P (x)
9437 && CONSTANT_POOL_ADDRESS_P (x))
9438 {
9439 rtx c = get_pool_constant (x);
9440 machine_mode cmode = get_pool_mode (x);
9441 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
9442 return true;
9443 }
9444
9445 return false;
9446 }
9447
9448 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
9449
9450 static bool
9451 rs6000_legitimate_combined_insn (rtx_insn *insn)
9452 {
9453 int icode = INSN_CODE (insn);
9454
9455 /* Reject creating doloop insns. Combine should not be allowed
9456 to create these for a number of reasons:
9457 1) In a nested loop, if combine creates one of these in an
9458 outer loop and the register allocator happens to allocate ctr
9459 to the outer loop insn, then the inner loop can't use ctr.
9460 Inner loops ought to be more highly optimized.
9461 2) Combine often wants to create one of these from what was
9462 originally a three insn sequence, first combining the three
9463 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
9464 allocated ctr, the splitter takes use back to the three insn
9465 sequence. It's better to stop combine at the two insn
9466 sequence.
9467 3) Faced with not being able to allocate ctr for ctrsi/crtdi
9468 insns, the register allocator sometimes uses floating point
9469 or vector registers for the pseudo. Since ctrsi/ctrdi is a
9470 jump insn and output reloads are not implemented for jumps,
9471 the ctrsi/ctrdi splitters need to handle all possible cases.
9472 That's a pain, and it gets to be seriously difficult when a
9473 splitter that runs after reload needs memory to transfer from
9474 a gpr to fpr. See PR70098 and PR71763 which are not fixed
9475 for the difficult case. It's better to not create problems
9476 in the first place. */
9477 if (icode != CODE_FOR_nothing
9478 && (icode == CODE_FOR_bdz_si
9479 || icode == CODE_FOR_bdz_di
9480 || icode == CODE_FOR_bdnz_si
9481 || icode == CODE_FOR_bdnz_di
9482 || icode == CODE_FOR_bdztf_si
9483 || icode == CODE_FOR_bdztf_di
9484 || icode == CODE_FOR_bdnztf_si
9485 || icode == CODE_FOR_bdnztf_di))
9486 return false;
9487
9488 return true;
9489 }
9490
9491 /* Construct the SYMBOL_REF for the tls_get_addr function. */
9492
9493 static GTY(()) rtx rs6000_tls_symbol;
9494 static rtx
9495 rs6000_tls_get_addr (void)
9496 {
9497 if (!rs6000_tls_symbol)
9498 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
9499
9500 return rs6000_tls_symbol;
9501 }
9502
9503 /* Construct the SYMBOL_REF for TLS GOT references. */
9504
9505 static GTY(()) rtx rs6000_got_symbol;
9506 rtx
9507 rs6000_got_sym (void)
9508 {
9509 if (!rs6000_got_symbol)
9510 {
9511 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
9512 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
9513 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
9514 }
9515
9516 return rs6000_got_symbol;
9517 }
9518
9519 /* AIX Thread-Local Address support. */
9520
9521 static rtx
9522 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
9523 {
9524 rtx sym, mem, tocref, tlsreg, tmpreg, dest;
9525 const char *name;
9526 char *tlsname;
9527
9528 /* Place addr into TOC constant pool. */
9529 sym = force_const_mem (GET_MODE (addr), addr);
9530
9531 /* Output the TOC entry and create the MEM referencing the value. */
9532 if (constant_pool_expr_p (XEXP (sym, 0))
9533 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
9534 {
9535 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
9536 mem = gen_const_mem (Pmode, tocref);
9537 set_mem_alias_set (mem, get_TOC_alias_set ());
9538 }
9539 else
9540 return sym;
9541
9542 /* Use global-dynamic for local-dynamic. */
9543 if (model == TLS_MODEL_GLOBAL_DYNAMIC
9544 || model == TLS_MODEL_LOCAL_DYNAMIC)
9545 {
9546 /* Create new TOC reference for @m symbol. */
9547 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
9548 tlsname = XALLOCAVEC (char, strlen (name) + 1);
9549 strcpy (tlsname, "*LCM");
9550 strcat (tlsname, name + 3);
9551 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
9552 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
9553 tocref = create_TOC_reference (modaddr, NULL_RTX);
9554 rtx modmem = gen_const_mem (Pmode, tocref);
9555 set_mem_alias_set (modmem, get_TOC_alias_set ());
9556
9557 rtx modreg = gen_reg_rtx (Pmode);
9558 emit_insn (gen_rtx_SET (modreg, modmem));
9559
9560 tmpreg = gen_reg_rtx (Pmode);
9561 emit_insn (gen_rtx_SET (tmpreg, mem));
9562
9563 dest = gen_reg_rtx (Pmode);
9564 if (TARGET_32BIT)
9565 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
9566 else
9567 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
9568 return dest;
9569 }
9570 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9571 else if (TARGET_32BIT)
9572 {
9573 tlsreg = gen_reg_rtx (SImode);
9574 emit_insn (gen_tls_get_tpointer (tlsreg));
9575 }
9576 else
9577 {
9578 tlsreg = gen_rtx_REG (DImode, 13);
9579 xcoff_tls_exec_model_detected = true;
9580 }
9581
9582 /* Load the TOC value into temporary register. */
9583 tmpreg = gen_reg_rtx (Pmode);
9584 emit_insn (gen_rtx_SET (tmpreg, mem));
9585 set_unique_reg_note (get_last_insn (), REG_EQUAL,
9586 gen_rtx_MINUS (Pmode, addr, tlsreg));
9587
9588 /* Add TOC symbol value to TLS pointer. */
9589 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
9590
9591 return dest;
9592 }
9593
9594 /* Passes the tls arg value for global dynamic and local dynamic
9595 emit_library_call_value in rs6000_legitimize_tls_address to
9596 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
9597 marker relocs put on __tls_get_addr calls. */
9598 static rtx global_tlsarg;
9599
9600 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9601 this (thread-local) address. */
9602
9603 static rtx
9604 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
9605 {
9606 rtx dest, insn;
9607
9608 if (TARGET_XCOFF)
9609 return rs6000_legitimize_tls_address_aix (addr, model);
9610
9611 dest = gen_reg_rtx (Pmode);
9612 if (model == TLS_MODEL_LOCAL_EXEC
9613 && (rs6000_tls_size == 16 || rs6000_pcrel_p ()))
9614 {
9615 rtx tlsreg;
9616
9617 if (TARGET_64BIT)
9618 {
9619 tlsreg = gen_rtx_REG (Pmode, 13);
9620 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
9621 }
9622 else
9623 {
9624 tlsreg = gen_rtx_REG (Pmode, 2);
9625 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
9626 }
9627 emit_insn (insn);
9628 }
9629 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
9630 {
9631 rtx tlsreg, tmp;
9632
9633 tmp = gen_reg_rtx (Pmode);
9634 if (TARGET_64BIT)
9635 {
9636 tlsreg = gen_rtx_REG (Pmode, 13);
9637 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
9638 }
9639 else
9640 {
9641 tlsreg = gen_rtx_REG (Pmode, 2);
9642 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
9643 }
9644 emit_insn (insn);
9645 if (TARGET_64BIT)
9646 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
9647 else
9648 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
9649 emit_insn (insn);
9650 }
9651 else
9652 {
9653 rtx got, tga, tmp1, tmp2;
9654
9655 /* We currently use relocations like @got@tlsgd for tls, which
9656 means the linker will handle allocation of tls entries, placing
9657 them in the .got section. So use a pointer to the .got section,
9658 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9659 or to secondary GOT sections used by 32-bit -fPIC. */
9660 if (rs6000_pcrel_p ())
9661 got = const0_rtx;
9662 else if (TARGET_64BIT)
9663 got = gen_rtx_REG (Pmode, 2);
9664 else
9665 {
9666 if (flag_pic == 1)
9667 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
9668 else
9669 {
9670 rtx gsym = rs6000_got_sym ();
9671 got = gen_reg_rtx (Pmode);
9672 if (flag_pic == 0)
9673 rs6000_emit_move (got, gsym, Pmode);
9674 else
9675 {
9676 rtx mem, lab;
9677
9678 tmp1 = gen_reg_rtx (Pmode);
9679 tmp2 = gen_reg_rtx (Pmode);
9680 mem = gen_const_mem (Pmode, tmp1);
9681 lab = gen_label_rtx ();
9682 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
9683 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
9684 if (TARGET_LINK_STACK)
9685 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
9686 emit_move_insn (tmp2, mem);
9687 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
9688 set_unique_reg_note (last, REG_EQUAL, gsym);
9689 }
9690 }
9691 }
9692
9693 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
9694 {
9695 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
9696 UNSPEC_TLSGD);
9697 tga = rs6000_tls_get_addr ();
9698 rtx argreg = gen_rtx_REG (Pmode, 3);
9699 emit_insn (gen_rtx_SET (argreg, arg));
9700 global_tlsarg = arg;
9701 emit_library_call_value (tga, dest, LCT_CONST, Pmode, argreg, Pmode);
9702 global_tlsarg = NULL_RTX;
9703
9704 /* Make a note so that the result of this call can be CSEd. */
9705 rtvec vec = gen_rtvec (1, copy_rtx (arg));
9706 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
9707 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
9708 }
9709 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
9710 {
9711 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
9712 tga = rs6000_tls_get_addr ();
9713 tmp1 = gen_reg_rtx (Pmode);
9714 rtx argreg = gen_rtx_REG (Pmode, 3);
9715 emit_insn (gen_rtx_SET (argreg, arg));
9716 global_tlsarg = arg;
9717 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode, argreg, Pmode);
9718 global_tlsarg = NULL_RTX;
9719
9720 /* Make a note so that the result of this call can be CSEd. */
9721 rtvec vec = gen_rtvec (1, copy_rtx (arg));
9722 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
9723 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
9724
9725 if (rs6000_tls_size == 16 || rs6000_pcrel_p ())
9726 {
9727 if (TARGET_64BIT)
9728 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
9729 else
9730 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
9731 }
9732 else if (rs6000_tls_size == 32)
9733 {
9734 tmp2 = gen_reg_rtx (Pmode);
9735 if (TARGET_64BIT)
9736 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
9737 else
9738 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
9739 emit_insn (insn);
9740 if (TARGET_64BIT)
9741 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
9742 else
9743 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
9744 }
9745 else
9746 {
9747 tmp2 = gen_reg_rtx (Pmode);
9748 if (TARGET_64BIT)
9749 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
9750 else
9751 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
9752 emit_insn (insn);
9753 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
9754 }
9755 emit_insn (insn);
9756 }
9757 else
9758 {
9759 /* IE, or 64-bit offset LE. */
9760 tmp2 = gen_reg_rtx (Pmode);
9761 if (TARGET_64BIT)
9762 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
9763 else
9764 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
9765 emit_insn (insn);
9766 if (rs6000_pcrel_p ())
9767 {
9768 if (TARGET_64BIT)
9769 insn = gen_tls_tls_pcrel_64 (dest, tmp2, addr);
9770 else
9771 insn = gen_tls_tls_pcrel_32 (dest, tmp2, addr);
9772 }
9773 else if (TARGET_64BIT)
9774 insn = gen_tls_tls_64 (dest, tmp2, addr);
9775 else
9776 insn = gen_tls_tls_32 (dest, tmp2, addr);
9777 emit_insn (insn);
9778 }
9779 }
9780
9781 return dest;
9782 }
9783
9784 /* Only create the global variable for the stack protect guard if we are using
9785 the global flavor of that guard. */
9786 static tree
9787 rs6000_init_stack_protect_guard (void)
9788 {
9789 if (rs6000_stack_protector_guard == SSP_GLOBAL)
9790 return default_stack_protect_guard ();
9791
9792 return NULL_TREE;
9793 }
9794
9795 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9796
9797 static bool
9798 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9799 {
9800 /* If GET_CODE (x) is HIGH, the 'X' represets the high part of a symbol_ref.
9801 It can not be put into a constant pool. e.g.
9802 (high:DI (unspec:DI [(symbol_ref/u:DI ("*.LC0")..)
9803 (high:DI (symbol_ref:DI ("var")..)). */
9804 if (GET_CODE (x) == HIGH)
9805 return true;
9806
9807 /* A TLS symbol in the TOC cannot contain a sum. */
9808 if (GET_CODE (x) == CONST
9809 && GET_CODE (XEXP (x, 0)) == PLUS
9810 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
9811 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
9812 return true;
9813
9814 /* Allow AIX TOC TLS symbols in the constant pool,
9815 but not ELF TLS symbols. */
9816 return TARGET_ELF && tls_referenced_p (x);
9817 }
9818
9819 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9820 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9821 can be addressed relative to the toc pointer. */
9822
9823 static bool
9824 use_toc_relative_ref (rtx sym, machine_mode mode)
9825 {
9826 return ((constant_pool_expr_p (sym)
9827 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
9828 get_pool_mode (sym)))
9829 || (TARGET_CMODEL == CMODEL_MEDIUM
9830 && SYMBOL_REF_LOCAL_P (sym)
9831 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9832 }
9833
9834 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9835 that is a valid memory address for an instruction.
9836 The MODE argument is the machine mode for the MEM expression
9837 that wants to use this address.
9838
9839 On the RS/6000, there are four valid address: a SYMBOL_REF that
9840 refers to a constant pool entry of an address (or the sum of it
9841 plus a constant), a short (16-bit signed) constant plus a register,
9842 the sum of two registers, or a register indirect, possibly with an
9843 auto-increment. For DFmode, DDmode and DImode with a constant plus
9844 register, we must ensure that both words are addressable or PowerPC64
9845 with offset word aligned.
9846
9847 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9848 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9849 because adjacent memory cells are accessed by adding word-sized offsets
9850 during assembly output. */
9851 static bool
9852 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
9853 {
9854 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9855 bool quad_offset_p = mode_supports_dq_form (mode);
9856
9857 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
9858 return 0;
9859
9860 /* Handle unaligned altivec lvx/stvx type addresses. */
9861 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
9862 && GET_CODE (x) == AND
9863 && CONST_INT_P (XEXP (x, 1))
9864 && INTVAL (XEXP (x, 1)) == -16)
9865 {
9866 x = XEXP (x, 0);
9867 return (legitimate_indirect_address_p (x, reg_ok_strict)
9868 || legitimate_indexed_address_p (x, reg_ok_strict)
9869 || virtual_stack_registers_memory_p (x));
9870 }
9871
9872 if (legitimate_indirect_address_p (x, reg_ok_strict))
9873 return 1;
9874 if (TARGET_UPDATE
9875 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
9876 && mode_supports_pre_incdec_p (mode)
9877 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
9878 return 1;
9879
9880 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
9881 if (address_is_prefixed (x, mode, NON_PREFIXED_DEFAULT))
9882 return 1;
9883
9884 /* Handle restricted vector d-form offsets in ISA 3.0. */
9885 if (quad_offset_p)
9886 {
9887 if (quad_address_p (x, mode, reg_ok_strict))
9888 return 1;
9889 }
9890 else if (virtual_stack_registers_memory_p (x))
9891 return 1;
9892
9893 else if (reg_offset_p)
9894 {
9895 if (legitimate_small_data_p (mode, x))
9896 return 1;
9897 if (legitimate_constant_pool_address_p (x, mode,
9898 reg_ok_strict || lra_in_progress))
9899 return 1;
9900 }
9901
9902 /* For TImode, if we have TImode in VSX registers, only allow register
9903 indirect addresses. This will allow the values to go in either GPRs
9904 or VSX registers without reloading. The vector types would tend to
9905 go into VSX registers, so we allow REG+REG, while TImode seems
9906 somewhat split, in that some uses are GPR based, and some VSX based. */
9907 /* FIXME: We could loosen this by changing the following to
9908 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
9909 but currently we cannot allow REG+REG addressing for TImode. See
9910 PR72827 for complete details on how this ends up hoodwinking DSE. */
9911 if (mode == TImode && TARGET_VSX)
9912 return 0;
9913 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9914 if (! reg_ok_strict
9915 && reg_offset_p
9916 && GET_CODE (x) == PLUS
9917 && REG_P (XEXP (x, 0))
9918 && (XEXP (x, 0) == virtual_stack_vars_rtx
9919 || XEXP (x, 0) == arg_pointer_rtx)
9920 && CONST_INT_P (XEXP (x, 1)))
9921 return 1;
9922 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
9923 return 1;
9924 if (!FLOAT128_2REG_P (mode)
9925 && (TARGET_HARD_FLOAT
9926 || TARGET_POWERPC64
9927 || (mode != DFmode && mode != DDmode))
9928 && (TARGET_POWERPC64 || mode != DImode)
9929 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
9930 && mode != PTImode
9931 && !avoiding_indexed_address_p (mode)
9932 && legitimate_indexed_address_p (x, reg_ok_strict))
9933 return 1;
9934 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
9935 && mode_supports_pre_modify_p (mode)
9936 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
9937 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
9938 reg_ok_strict, false)
9939 || (!avoiding_indexed_address_p (mode)
9940 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
9941 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
9942 {
9943 /* There is no prefixed version of the load/store with update. */
9944 rtx addr = XEXP (x, 1);
9945 return !address_is_prefixed (addr, mode, NON_PREFIXED_DEFAULT);
9946 }
9947 if (reg_offset_p && !quad_offset_p
9948 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
9949 return 1;
9950 return 0;
9951 }
9952
9953 /* Debug version of rs6000_legitimate_address_p. */
9954 static bool
9955 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
9956 bool reg_ok_strict)
9957 {
9958 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
9959 fprintf (stderr,
9960 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9961 "strict = %d, reload = %s, code = %s\n",
9962 ret ? "true" : "false",
9963 GET_MODE_NAME (mode),
9964 reg_ok_strict,
9965 (reload_completed ? "after" : "before"),
9966 GET_RTX_NAME (GET_CODE (x)));
9967 debug_rtx (x);
9968
9969 return ret;
9970 }
9971
9972 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
9973
9974 static bool
9975 rs6000_mode_dependent_address_p (const_rtx addr,
9976 addr_space_t as ATTRIBUTE_UNUSED)
9977 {
9978 return rs6000_mode_dependent_address_ptr (addr);
9979 }
9980
9981 /* Go to LABEL if ADDR (a legitimate address expression)
9982 has an effect that depends on the machine mode it is used for.
9983
9984 On the RS/6000 this is true of all integral offsets (since AltiVec
9985 and VSX modes don't allow them) or is a pre-increment or decrement.
9986
9987 ??? Except that due to conceptual problems in offsettable_address_p
9988 we can't really report the problems of integral offsets. So leave
9989 this assuming that the adjustable offset must be valid for the
9990 sub-words of a TFmode operand, which is what we had before. */
9991
9992 static bool
9993 rs6000_mode_dependent_address (const_rtx addr)
9994 {
9995 switch (GET_CODE (addr))
9996 {
9997 case PLUS:
9998 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
9999 is considered a legitimate address before reload, so there
10000 are no offset restrictions in that case. Note that this
10001 condition is safe in strict mode because any address involving
10002 virtual_stack_vars_rtx or arg_pointer_rtx would already have
10003 been rejected as illegitimate. */
10004 if (XEXP (addr, 0) != virtual_stack_vars_rtx
10005 && XEXP (addr, 0) != arg_pointer_rtx
10006 && CONST_INT_P (XEXP (addr, 1)))
10007 {
10008 HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
10009 HOST_WIDE_INT extra = TARGET_POWERPC64 ? 8 : 12;
10010 if (TARGET_PREFIXED)
10011 return !SIGNED_34BIT_OFFSET_EXTRA_P (val, extra);
10012 else
10013 return !SIGNED_16BIT_OFFSET_EXTRA_P (val, extra);
10014 }
10015 break;
10016
10017 case LO_SUM:
10018 /* Anything in the constant pool is sufficiently aligned that
10019 all bytes have the same high part address. */
10020 return !legitimate_constant_pool_address_p (addr, QImode, false);
10021
10022 /* Auto-increment cases are now treated generically in recog.cc. */
10023 case PRE_MODIFY:
10024 return TARGET_UPDATE;
10025
10026 /* AND is only allowed in Altivec loads. */
10027 case AND:
10028 return true;
10029
10030 default:
10031 break;
10032 }
10033
10034 return false;
10035 }
10036
10037 /* Debug version of rs6000_mode_dependent_address. */
10038 static bool
10039 rs6000_debug_mode_dependent_address (const_rtx addr)
10040 {
10041 bool ret = rs6000_mode_dependent_address (addr);
10042
10043 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
10044 ret ? "true" : "false");
10045 debug_rtx (addr);
10046
10047 return ret;
10048 }
10049
10050 /* Implement FIND_BASE_TERM. */
10051
10052 rtx
10053 rs6000_find_base_term (rtx op)
10054 {
10055 rtx base;
10056
10057 base = op;
10058 if (GET_CODE (base) == CONST)
10059 base = XEXP (base, 0);
10060 if (GET_CODE (base) == PLUS)
10061 base = XEXP (base, 0);
10062 if (GET_CODE (base) == UNSPEC)
10063 switch (XINT (base, 1))
10064 {
10065 case UNSPEC_TOCREL:
10066 case UNSPEC_MACHOPIC_OFFSET:
10067 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
10068 for aliasing purposes. */
10069 return XVECEXP (base, 0, 0);
10070 }
10071
10072 return op;
10073 }
10074
10075 /* More elaborate version of recog's offsettable_memref_p predicate
10076 that works around the ??? note of rs6000_mode_dependent_address.
10077 In particular it accepts
10078
10079 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
10080
10081 in 32-bit mode, that the recog predicate rejects. */
10082
10083 static bool
10084 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
10085 {
10086 bool worst_case;
10087
10088 if (!MEM_P (op))
10089 return false;
10090
10091 /* First mimic offsettable_memref_p. */
10092 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
10093 return true;
10094
10095 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
10096 the latter predicate knows nothing about the mode of the memory
10097 reference and, therefore, assumes that it is the largest supported
10098 mode (TFmode). As a consequence, legitimate offsettable memory
10099 references are rejected. rs6000_legitimate_offset_address_p contains
10100 the correct logic for the PLUS case of rs6000_mode_dependent_address,
10101 at least with a little bit of help here given that we know the
10102 actual registers used. */
10103 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
10104 || GET_MODE_SIZE (reg_mode) == 4);
10105 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
10106 strict, worst_case);
10107 }
10108
10109 /* Determine the reassociation width to be used in reassociate_bb.
10110 This takes into account how many parallel operations we
10111 can actually do of a given type, and also the latency.
10112 P8:
10113 int add/sub 6/cycle
10114 mul 2/cycle
10115 vect add/sub/mul 2/cycle
10116 fp add/sub/mul 2/cycle
10117 dfp 1/cycle
10118 */
10119
10120 static int
10121 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
10122 machine_mode mode)
10123 {
10124 switch (rs6000_tune)
10125 {
10126 case PROCESSOR_POWER8:
10127 case PROCESSOR_POWER9:
10128 case PROCESSOR_POWER10:
10129 if (DECIMAL_FLOAT_MODE_P (mode))
10130 return 1;
10131 if (VECTOR_MODE_P (mode))
10132 return 4;
10133 if (INTEGRAL_MODE_P (mode))
10134 return 1;
10135 if (FLOAT_MODE_P (mode))
10136 return 4;
10137 break;
10138 default:
10139 break;
10140 }
10141 return 1;
10142 }
10143
10144 /* Change register usage conditional on target flags. */
10145 static void
10146 rs6000_conditional_register_usage (void)
10147 {
10148 int i;
10149
10150 if (TARGET_DEBUG_TARGET)
10151 fprintf (stderr, "rs6000_conditional_register_usage called\n");
10152
10153 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
10154 if (TARGET_64BIT)
10155 fixed_regs[13] = call_used_regs[13] = 1;
10156
10157 /* Conditionally disable FPRs. */
10158 if (TARGET_SOFT_FLOAT)
10159 for (i = 32; i < 64; i++)
10160 fixed_regs[i] = call_used_regs[i] = 1;
10161
10162 /* The TOC register is not killed across calls in a way that is
10163 visible to the compiler. */
10164 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10165 call_used_regs[2] = 0;
10166
10167 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
10168 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10169
10170 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
10171 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10172 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10173
10174 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
10175 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10176 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10177
10178 if (TARGET_TOC && TARGET_MINIMAL_TOC)
10179 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10180
10181 if (!TARGET_ALTIVEC && !TARGET_VSX)
10182 {
10183 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
10184 fixed_regs[i] = call_used_regs[i] = 1;
10185 call_used_regs[VRSAVE_REGNO] = 1;
10186 }
10187
10188 if (TARGET_ALTIVEC || TARGET_VSX)
10189 global_regs[VSCR_REGNO] = 1;
10190
10191 if (TARGET_ALTIVEC_ABI)
10192 {
10193 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
10194 call_used_regs[i] = 1;
10195
10196 /* AIX reserves VR20:31 in non-extended ABI mode. */
10197 if (TARGET_XCOFF && !rs6000_aix_extabi)
10198 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
10199 fixed_regs[i] = call_used_regs[i] = 1;
10200 }
10201 }
10202
10203 \f
10204 /* Output insns to set DEST equal to the constant SOURCE as a series of
10205 lis, ori and shl instructions and return TRUE. */
10206
10207 bool
10208 rs6000_emit_set_const (rtx dest, rtx source)
10209 {
10210 machine_mode mode = GET_MODE (dest);
10211 rtx temp, set;
10212 rtx_insn *insn;
10213 HOST_WIDE_INT c;
10214
10215 gcc_checking_assert (CONST_INT_P (source));
10216 c = INTVAL (source);
10217 switch (mode)
10218 {
10219 case E_QImode:
10220 case E_HImode:
10221 emit_insn (gen_rtx_SET (dest, source));
10222 return true;
10223
10224 case E_SImode:
10225 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
10226
10227 emit_insn (gen_rtx_SET (temp, GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
10228 emit_insn (gen_rtx_SET (dest,
10229 gen_rtx_IOR (SImode, temp,
10230 GEN_INT (c & 0xffff))));
10231 break;
10232
10233 case E_DImode:
10234 if (!TARGET_POWERPC64)
10235 {
10236 rtx hi, lo;
10237
10238 hi = operand_subword_force (dest, WORDS_BIG_ENDIAN == 0, DImode);
10239 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0, DImode);
10240 emit_move_insn (hi, GEN_INT (c >> 32));
10241 c = sext_hwi (c, 32);
10242 emit_move_insn (lo, GEN_INT (c));
10243 }
10244 else
10245 rs6000_emit_set_long_const (dest, c);
10246 break;
10247
10248 default:
10249 gcc_unreachable ();
10250 }
10251
10252 insn = get_last_insn ();
10253 set = single_set (insn);
10254 if (! CONSTANT_P (SET_SRC (set)))
10255 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
10256
10257 return true;
10258 }
10259
10260 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10261 Output insns to set DEST equal to the constant C as a series of
10262 lis, ori and shl instructions. */
10263
10264 static void
10265 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
10266 {
10267 rtx temp;
10268 HOST_WIDE_INT ud1, ud2, ud3, ud4;
10269
10270 ud1 = c & 0xffff;
10271 c = c >> 16;
10272 ud2 = c & 0xffff;
10273 c = c >> 16;
10274 ud3 = c & 0xffff;
10275 c = c >> 16;
10276 ud4 = c & 0xffff;
10277
10278 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
10279 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
10280 emit_move_insn (dest, GEN_INT (sext_hwi (ud1, 16)));
10281
10282 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
10283 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
10284 {
10285 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10286
10287 emit_move_insn (ud1 != 0 ? temp : dest,
10288 GEN_INT (sext_hwi (ud2 << 16, 32)));
10289 if (ud1 != 0)
10290 emit_move_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
10291 }
10292 else if (ud4 == 0xffff && ud3 == 0xffff && (ud1 & 0x8000))
10293 {
10294 /* li; xoris */
10295 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10296 emit_move_insn (temp, GEN_INT (sext_hwi (ud1, 16)));
10297 emit_move_insn (dest, gen_rtx_XOR (DImode, temp,
10298 GEN_INT ((ud2 ^ 0xffff) << 16)));
10299 }
10300 else if (ud3 == 0 && ud4 == 0)
10301 {
10302 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10303
10304 gcc_assert (ud2 & 0x8000);
10305
10306 if (ud1 == 0)
10307 {
10308 /* lis; rldicl */
10309 emit_move_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32)));
10310 emit_move_insn (dest,
10311 gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff)));
10312 }
10313 else if (!(ud1 & 0x8000))
10314 {
10315 /* li; oris */
10316 emit_move_insn (temp, GEN_INT (ud1));
10317 emit_move_insn (dest,
10318 gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16)));
10319 }
10320 else
10321 {
10322 /* lis; ori; rldicl */
10323 emit_move_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32)));
10324 emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
10325 emit_move_insn (dest,
10326 gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff)));
10327 }
10328 }
10329 else if (ud1 == ud3 && ud2 == ud4)
10330 {
10331 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10332 HOST_WIDE_INT num = (ud2 << 16) | ud1;
10333 rs6000_emit_set_long_const (temp, sext_hwi (num, 32));
10334 rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff));
10335 rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32));
10336 emit_move_insn (dest, gen_rtx_IOR (DImode, one, two));
10337 }
10338 else if ((ud4 == 0xffff && (ud3 & 0x8000))
10339 || (ud4 == 0 && ! (ud3 & 0x8000)))
10340 {
10341 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10342
10343 emit_move_insn (temp, GEN_INT (sext_hwi (ud3 << 16, 32)));
10344 if (ud2 != 0)
10345 emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud2)));
10346 emit_move_insn (ud1 != 0 ? temp : dest,
10347 gen_rtx_ASHIFT (DImode, temp, GEN_INT (16)));
10348 if (ud1 != 0)
10349 emit_move_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
10350 }
10351 else if (TARGET_PREFIXED)
10352 {
10353 if (can_create_pseudo_p ())
10354 {
10355 /* pli A,L + pli B,H + rldimi A,B,32,0. */
10356 temp = gen_reg_rtx (DImode);
10357 rtx temp1 = gen_reg_rtx (DImode);
10358 emit_move_insn (temp, GEN_INT ((ud4 << 16) | ud3));
10359 emit_move_insn (temp1, GEN_INT ((ud2 << 16) | ud1));
10360
10361 emit_insn (gen_rotldi3_insert_3 (dest, temp, GEN_INT (32), temp1,
10362 GEN_INT (0xffffffff)));
10363 }
10364 else
10365 {
10366 /* pli A,H + sldi A,32 + paddi A,A,L. */
10367 emit_move_insn (dest, GEN_INT ((ud4 << 16) | ud3));
10368
10369 emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
10370
10371 bool can_use_paddi = REGNO (dest) != FIRST_GPR_REGNO;
10372
10373 /* Use paddi for the low 32 bits. */
10374 if (ud2 != 0 && ud1 != 0 && can_use_paddi)
10375 emit_move_insn (dest, gen_rtx_PLUS (DImode, dest,
10376 GEN_INT ((ud2 << 16) | ud1)));
10377
10378 /* Use oris, ori for low 32 bits. */
10379 if (ud2 != 0 && (ud1 == 0 || !can_use_paddi))
10380 emit_move_insn (dest,
10381 gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
10382 if (ud1 != 0 && (ud2 == 0 || !can_use_paddi))
10383 emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
10384 }
10385 }
10386 else
10387 {
10388 if (can_create_pseudo_p ())
10389 {
10390 /* lis HIGH,UD4 ; ori HIGH,UD3 ;
10391 lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0. */
10392 rtx high = gen_reg_rtx (DImode);
10393 rtx low = gen_reg_rtx (DImode);
10394 HOST_WIDE_INT num = (ud2 << 16) | ud1;
10395 rs6000_emit_set_long_const (low, sext_hwi (num, 32));
10396 num = (ud4 << 16) | ud3;
10397 rs6000_emit_set_long_const (high, sext_hwi (num, 32));
10398 emit_insn (gen_rotldi3_insert_3 (dest, high, GEN_INT (32), low,
10399 GEN_INT (0xffffffff)));
10400 }
10401 else
10402 {
10403 /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ;
10404 oris DEST,UD2 ; ori DEST,UD1. */
10405 emit_move_insn (dest, GEN_INT (sext_hwi (ud4 << 16, 32)));
10406 if (ud3 != 0)
10407 emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud3)));
10408
10409 emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
10410 if (ud2 != 0)
10411 emit_move_insn (dest,
10412 gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
10413 if (ud1 != 0)
10414 emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
10415 }
10416 }
10417 }
10418
10419 /* Helper for the following. Get rid of [r+r] memory refs
10420 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10421
10422 static void
10423 rs6000_eliminate_indexed_memrefs (rtx operands[2])
10424 {
10425 if (MEM_P (operands[0])
10426 && !REG_P (XEXP (operands[0], 0))
10427 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
10428 GET_MODE (operands[0]), false))
10429 operands[0]
10430 = replace_equiv_address (operands[0],
10431 copy_addr_to_reg (XEXP (operands[0], 0)));
10432
10433 if (MEM_P (operands[1])
10434 && !REG_P (XEXP (operands[1], 0))
10435 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
10436 GET_MODE (operands[1]), false))
10437 operands[1]
10438 = replace_equiv_address (operands[1],
10439 copy_addr_to_reg (XEXP (operands[1], 0)));
10440 }
10441
10442 /* Generate a vector of constants to permute MODE for a little-endian
10443 storage operation by swapping the two halves of a vector. */
10444 static rtvec
10445 rs6000_const_vec (machine_mode mode)
10446 {
10447 int i, subparts;
10448 rtvec v;
10449
10450 switch (mode)
10451 {
10452 case E_V1TImode:
10453 subparts = 1;
10454 break;
10455 case E_V2DFmode:
10456 case E_V2DImode:
10457 subparts = 2;
10458 break;
10459 case E_V4SFmode:
10460 case E_V4SImode:
10461 subparts = 4;
10462 break;
10463 case E_V8HImode:
10464 subparts = 8;
10465 break;
10466 case E_V16QImode:
10467 subparts = 16;
10468 break;
10469 default:
10470 gcc_unreachable();
10471 }
10472
10473 v = rtvec_alloc (subparts);
10474
10475 for (i = 0; i < subparts / 2; ++i)
10476 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
10477 for (i = subparts / 2; i < subparts; ++i)
10478 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
10479
10480 return v;
10481 }
10482
10483 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
10484 store operation. */
10485 void
10486 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
10487 {
10488 gcc_assert (!altivec_indexed_or_indirect_operand (dest, mode));
10489 gcc_assert (!altivec_indexed_or_indirect_operand (source, mode));
10490
10491 /* Scalar permutations are easier to express in integer modes rather than
10492 floating-point modes, so cast them here. We use V1TImode instead
10493 of TImode to ensure that the values don't go through GPRs. */
10494 if (FLOAT128_VECTOR_P (mode))
10495 {
10496 dest = gen_lowpart (V1TImode, dest);
10497 source = gen_lowpart (V1TImode, source);
10498 mode = V1TImode;
10499 }
10500
10501 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
10502 scalar. */
10503 if (mode == TImode || mode == V1TImode)
10504 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
10505 GEN_INT (64))));
10506 else
10507 {
10508 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
10509 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
10510 }
10511 }
10512
10513 /* Emit a little-endian load from vector memory location SOURCE to VSX
10514 register DEST in mode MODE. The load is done with two permuting
10515 insn's that represent an lxvd2x and xxpermdi. */
10516 void
10517 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
10518 {
10519 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10520 V1TImode). */
10521 if (mode == TImode || mode == V1TImode)
10522 {
10523 mode = V2DImode;
10524 dest = gen_lowpart (V2DImode, dest);
10525 source = adjust_address (source, V2DImode, 0);
10526 }
10527
10528 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
10529 rs6000_emit_le_vsx_permute (tmp, source, mode);
10530 rs6000_emit_le_vsx_permute (dest, tmp, mode);
10531 }
10532
10533 /* Emit a little-endian store to vector memory location DEST from VSX
10534 register SOURCE in mode MODE. The store is done with two permuting
10535 insn's that represent an xxpermdi and an stxvd2x. */
10536 void
10537 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
10538 {
10539 /* This should never be called after LRA. */
10540 gcc_assert (can_create_pseudo_p ());
10541
10542 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10543 V1TImode). */
10544 if (mode == TImode || mode == V1TImode)
10545 {
10546 mode = V2DImode;
10547 dest = adjust_address (dest, V2DImode, 0);
10548 source = gen_lowpart (V2DImode, source);
10549 }
10550
10551 rtx tmp = gen_reg_rtx_and_attrs (source);
10552 rs6000_emit_le_vsx_permute (tmp, source, mode);
10553 rs6000_emit_le_vsx_permute (dest, tmp, mode);
10554 }
10555
10556 /* Emit a sequence representing a little-endian VSX load or store,
10557 moving data from SOURCE to DEST in mode MODE. This is done
10558 separately from rs6000_emit_move to ensure it is called only
10559 during expand. LE VSX loads and stores introduced later are
10560 handled with a split. The expand-time RTL generation allows
10561 us to optimize away redundant pairs of register-permutes. */
10562 void
10563 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
10564 {
10565 gcc_assert (!BYTES_BIG_ENDIAN
10566 && VECTOR_MEM_VSX_P (mode)
10567 && !TARGET_P9_VECTOR
10568 && !gpr_or_gpr_p (dest, source)
10569 && (MEM_P (source) ^ MEM_P (dest)));
10570
10571 if (MEM_P (source))
10572 {
10573 gcc_assert (REG_P (dest) || SUBREG_P (dest));
10574 rs6000_emit_le_vsx_load (dest, source, mode);
10575 }
10576 else
10577 {
10578 if (!REG_P (source))
10579 source = force_reg (mode, source);
10580 rs6000_emit_le_vsx_store (dest, source, mode);
10581 }
10582 }
10583
10584 /* Return whether a SFmode or SImode move can be done without converting one
10585 mode to another. This arrises when we have:
10586
10587 (SUBREG:SF (REG:SI ...))
10588 (SUBREG:SI (REG:SF ...))
10589
10590 and one of the values is in a floating point/vector register, where SFmode
10591 scalars are stored in DFmode format. */
10592
10593 bool
10594 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
10595 {
10596 if (TARGET_ALLOW_SF_SUBREG)
10597 return true;
10598
10599 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
10600 return true;
10601
10602 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
10603 return true;
10604
10605 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10606 if (SUBREG_P (dest))
10607 {
10608 rtx dest_subreg = SUBREG_REG (dest);
10609 rtx src_subreg = SUBREG_REG (src);
10610 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
10611 }
10612
10613 return false;
10614 }
10615
10616
10617 /* Helper function to change moves with:
10618
10619 (SUBREG:SF (REG:SI)) and
10620 (SUBREG:SI (REG:SF))
10621
10622 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10623 values are stored as DFmode values in the VSX registers. We need to convert
10624 the bits before we can use a direct move or operate on the bits in the
10625 vector register as an integer type.
10626
10627 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10628
10629 static bool
10630 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
10631 {
10632 if (TARGET_DIRECT_MOVE_64BIT && !reload_completed
10633 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
10634 && SUBREG_P (source) && sf_subreg_operand (source, mode))
10635 {
10636 rtx inner_source = SUBREG_REG (source);
10637 machine_mode inner_mode = GET_MODE (inner_source);
10638
10639 if (mode == SImode && inner_mode == SFmode)
10640 {
10641 emit_insn (gen_movsi_from_sf (dest, inner_source));
10642 return true;
10643 }
10644
10645 if (mode == SFmode && inner_mode == SImode)
10646 {
10647 emit_insn (gen_movsf_from_si (dest, inner_source));
10648 return true;
10649 }
10650 }
10651
10652 return false;
10653 }
10654
10655 /* Emit a move from SOURCE to DEST in mode MODE. */
10656 void
10657 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
10658 {
10659 rtx operands[2];
10660 operands[0] = dest;
10661 operands[1] = source;
10662
10663 if (TARGET_DEBUG_ADDR)
10664 {
10665 fprintf (stderr,
10666 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
10667 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10668 GET_MODE_NAME (mode),
10669 lra_in_progress,
10670 reload_completed,
10671 can_create_pseudo_p ());
10672 debug_rtx (dest);
10673 fprintf (stderr, "source:\n");
10674 debug_rtx (source);
10675 }
10676
10677 /* Check that we get CONST_WIDE_INT only when we should. */
10678 if (CONST_WIDE_INT_P (operands[1])
10679 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
10680 gcc_unreachable ();
10681
10682 #ifdef HAVE_AS_GNU_ATTRIBUTE
10683 /* If we use a long double type, set the flags in .gnu_attribute that say
10684 what the long double type is. This is to allow the linker's warning
10685 message for the wrong long double to be useful, even if the function does
10686 not do a call (for example, doing a 128-bit add on power9 if the long
10687 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
10688 used if they aren't the default long dobule type. */
10689 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
10690 {
10691 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
10692 rs6000_passes_float = rs6000_passes_long_double = true;
10693
10694 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
10695 rs6000_passes_float = rs6000_passes_long_double = true;
10696 }
10697 #endif
10698
10699 /* See if we need to special case SImode/SFmode SUBREG moves. */
10700 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
10701 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
10702 return;
10703
10704 /* Check if GCC is setting up a block move that will end up using FP
10705 registers as temporaries. We must make sure this is acceptable. */
10706 if (MEM_P (operands[0])
10707 && MEM_P (operands[1])
10708 && mode == DImode
10709 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
10710 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
10711 && ! (rs6000_slow_unaligned_access (SImode,
10712 (MEM_ALIGN (operands[0]) > 32
10713 ? 32 : MEM_ALIGN (operands[0])))
10714 || rs6000_slow_unaligned_access (SImode,
10715 (MEM_ALIGN (operands[1]) > 32
10716 ? 32 : MEM_ALIGN (operands[1]))))
10717 && ! MEM_VOLATILE_P (operands [0])
10718 && ! MEM_VOLATILE_P (operands [1]))
10719 {
10720 emit_move_insn (adjust_address (operands[0], SImode, 0),
10721 adjust_address (operands[1], SImode, 0));
10722 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
10723 adjust_address (copy_rtx (operands[1]), SImode, 4));
10724 return;
10725 }
10726
10727 if (can_create_pseudo_p () && MEM_P (operands[0])
10728 && !gpc_reg_operand (operands[1], mode))
10729 operands[1] = force_reg (mode, operands[1]);
10730
10731 /* Recognize the case where operand[1] is a reference to thread-local
10732 data and load its address to a register. */
10733 if (tls_referenced_p (operands[1]))
10734 {
10735 enum tls_model model;
10736 rtx tmp = operands[1];
10737 rtx addend = NULL;
10738
10739 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
10740 {
10741 addend = XEXP (XEXP (tmp, 0), 1);
10742 tmp = XEXP (XEXP (tmp, 0), 0);
10743 }
10744
10745 gcc_assert (SYMBOL_REF_P (tmp));
10746 model = SYMBOL_REF_TLS_MODEL (tmp);
10747 gcc_assert (model != 0);
10748
10749 tmp = rs6000_legitimize_tls_address (tmp, model);
10750 if (addend)
10751 {
10752 tmp = gen_rtx_PLUS (mode, tmp, addend);
10753 tmp = force_operand (tmp, operands[0]);
10754 }
10755 operands[1] = tmp;
10756 }
10757
10758 /* 128-bit constant floating-point values on Darwin should really be loaded
10759 as two parts. However, this premature splitting is a problem when DFmode
10760 values can go into Altivec registers. */
10761 if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode)
10762 && !reg_addr[DFmode].scalar_in_vmx_p)
10763 {
10764 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
10765 simplify_gen_subreg (DFmode, operands[1], mode, 0),
10766 DFmode);
10767 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
10768 GET_MODE_SIZE (DFmode)),
10769 simplify_gen_subreg (DFmode, operands[1], mode,
10770 GET_MODE_SIZE (DFmode)),
10771 DFmode);
10772 return;
10773 }
10774
10775 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10776 p1:SD) if p1 is not of floating point class and p0 is spilled as
10777 we can have no analogous movsd_store for this. */
10778 if (lra_in_progress && mode == DDmode
10779 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
10780 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10781 && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))
10782 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
10783 {
10784 enum reg_class cl;
10785 int regno = REGNO (SUBREG_REG (operands[1]));
10786
10787 if (!HARD_REGISTER_NUM_P (regno))
10788 {
10789 cl = reg_preferred_class (regno);
10790 regno = reg_renumber[regno];
10791 if (regno < 0)
10792 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
10793 }
10794 if (regno >= 0 && ! FP_REGNO_P (regno))
10795 {
10796 mode = SDmode;
10797 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
10798 operands[1] = SUBREG_REG (operands[1]);
10799 }
10800 }
10801 if (lra_in_progress
10802 && mode == SDmode
10803 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
10804 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10805 && (REG_P (operands[1])
10806 || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])))))
10807 {
10808 int regno = reg_or_subregno (operands[1]);
10809 enum reg_class cl;
10810
10811 if (!HARD_REGISTER_NUM_P (regno))
10812 {
10813 cl = reg_preferred_class (regno);
10814 gcc_assert (cl != NO_REGS);
10815 regno = reg_renumber[regno];
10816 if (regno < 0)
10817 regno = ira_class_hard_regs[cl][0];
10818 }
10819 if (FP_REGNO_P (regno))
10820 {
10821 if (GET_MODE (operands[0]) != DDmode)
10822 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
10823 emit_insn (gen_movsd_store (operands[0], operands[1]));
10824 }
10825 else if (INT_REGNO_P (regno))
10826 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10827 else
10828 gcc_unreachable();
10829 return;
10830 }
10831 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10832 p:DD)) if p0 is not of floating point class and p1 is spilled as
10833 we can have no analogous movsd_load for this. */
10834 if (lra_in_progress && mode == DDmode
10835 && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))
10836 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
10837 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
10838 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10839 {
10840 enum reg_class cl;
10841 int regno = REGNO (SUBREG_REG (operands[0]));
10842
10843 if (!HARD_REGISTER_NUM_P (regno))
10844 {
10845 cl = reg_preferred_class (regno);
10846 regno = reg_renumber[regno];
10847 if (regno < 0)
10848 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
10849 }
10850 if (regno >= 0 && ! FP_REGNO_P (regno))
10851 {
10852 mode = SDmode;
10853 operands[0] = SUBREG_REG (operands[0]);
10854 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
10855 }
10856 }
10857 if (lra_in_progress
10858 && mode == SDmode
10859 && (REG_P (operands[0])
10860 || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))))
10861 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
10862 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10863 {
10864 int regno = reg_or_subregno (operands[0]);
10865 enum reg_class cl;
10866
10867 if (!HARD_REGISTER_NUM_P (regno))
10868 {
10869 cl = reg_preferred_class (regno);
10870 gcc_assert (cl != NO_REGS);
10871 regno = reg_renumber[regno];
10872 if (regno < 0)
10873 regno = ira_class_hard_regs[cl][0];
10874 }
10875 if (FP_REGNO_P (regno))
10876 {
10877 if (GET_MODE (operands[1]) != DDmode)
10878 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
10879 emit_insn (gen_movsd_load (operands[0], operands[1]));
10880 }
10881 else if (INT_REGNO_P (regno))
10882 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10883 else
10884 gcc_unreachable();
10885 return;
10886 }
10887
10888 /* FIXME: In the long term, this switch statement should go away
10889 and be replaced by a sequence of tests based on things like
10890 mode == Pmode. */
10891 switch (mode)
10892 {
10893 case E_HImode:
10894 case E_QImode:
10895 if (CONSTANT_P (operands[1])
10896 && !CONST_INT_P (operands[1]))
10897 operands[1] = force_const_mem (mode, operands[1]);
10898 break;
10899
10900 case E_TFmode:
10901 case E_TDmode:
10902 case E_IFmode:
10903 case E_KFmode:
10904 if (FLOAT128_2REG_P (mode))
10905 rs6000_eliminate_indexed_memrefs (operands);
10906 /* fall through */
10907
10908 case E_DFmode:
10909 case E_DDmode:
10910 case E_SFmode:
10911 case E_SDmode:
10912 if (CONSTANT_P (operands[1])
10913 && ! easy_fp_constant (operands[1], mode))
10914 operands[1] = force_const_mem (mode, operands[1]);
10915 break;
10916
10917 case E_V16QImode:
10918 case E_V8HImode:
10919 case E_V4SFmode:
10920 case E_V4SImode:
10921 case E_V2DFmode:
10922 case E_V2DImode:
10923 case E_V1TImode:
10924 if (CONSTANT_P (operands[1])
10925 && !easy_vector_constant (operands[1], mode))
10926 operands[1] = force_const_mem (mode, operands[1]);
10927 break;
10928
10929 case E_OOmode:
10930 case E_XOmode:
10931 if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) != 0)
10932 error ("%qs is an opaque type, and you cannot set it to other values",
10933 (mode == OOmode) ? "__vector_pair" : "__vector_quad");
10934 break;
10935
10936 case E_SImode:
10937 case E_DImode:
10938 /* Use default pattern for address of ELF small data */
10939 if (TARGET_ELF
10940 && mode == Pmode
10941 && DEFAULT_ABI == ABI_V4
10942 && (SYMBOL_REF_P (operands[1])
10943 || GET_CODE (operands[1]) == CONST)
10944 && small_data_operand (operands[1], mode))
10945 {
10946 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10947 return;
10948 }
10949
10950 /* Use the default pattern for loading up PC-relative addresses. */
10951 if (TARGET_PCREL && mode == Pmode
10952 && pcrel_local_or_external_address (operands[1], Pmode))
10953 {
10954 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10955 return;
10956 }
10957
10958 if (DEFAULT_ABI == ABI_V4
10959 && mode == Pmode && mode == SImode
10960 && flag_pic == 1 && got_operand (operands[1], mode))
10961 {
10962 emit_insn (gen_movsi_got (operands[0], operands[1]));
10963 return;
10964 }
10965
10966 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
10967 && TARGET_NO_TOC_OR_PCREL
10968 && ! flag_pic
10969 && mode == Pmode
10970 && CONSTANT_P (operands[1])
10971 && GET_CODE (operands[1]) != HIGH
10972 && !CONST_INT_P (operands[1]))
10973 {
10974 rtx target = (!can_create_pseudo_p ()
10975 ? operands[0]
10976 : gen_reg_rtx (mode));
10977
10978 /* If this is a function address on -mcall-aixdesc,
10979 convert it to the address of the descriptor. */
10980 if (DEFAULT_ABI == ABI_AIX
10981 && SYMBOL_REF_P (operands[1])
10982 && XSTR (operands[1], 0)[0] == '.')
10983 {
10984 const char *name = XSTR (operands[1], 0);
10985 rtx new_ref;
10986 while (*name == '.')
10987 name++;
10988 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
10989 CONSTANT_POOL_ADDRESS_P (new_ref)
10990 = CONSTANT_POOL_ADDRESS_P (operands[1]);
10991 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
10992 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
10993 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
10994 operands[1] = new_ref;
10995 }
10996
10997 if (DEFAULT_ABI == ABI_DARWIN)
10998 {
10999 #if TARGET_MACHO
11000 /* This is not PIC code, but could require the subset of
11001 indirections used by mdynamic-no-pic. */
11002 if (MACHO_DYNAMIC_NO_PIC_P)
11003 {
11004 /* Take care of any required data indirection. */
11005 operands[1] = rs6000_machopic_legitimize_pic_address (
11006 operands[1], mode, operands[0]);
11007 if (operands[0] != operands[1])
11008 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11009 return;
11010 }
11011 #endif
11012 emit_insn (gen_macho_high (Pmode, target, operands[1]));
11013 emit_insn (gen_macho_low (Pmode, operands[0],
11014 target, operands[1]));
11015 return;
11016 }
11017
11018 emit_insn (gen_elf_high (target, operands[1]));
11019 emit_insn (gen_elf_low (operands[0], target, operands[1]));
11020 return;
11021 }
11022
11023 /* If this is a SYMBOL_REF that refers to a constant pool entry,
11024 and we have put it in the TOC, we just need to make a TOC-relative
11025 reference to it. */
11026 if (TARGET_TOC
11027 && SYMBOL_REF_P (operands[1])
11028 && use_toc_relative_ref (operands[1], mode))
11029 operands[1] = create_TOC_reference (operands[1], operands[0]);
11030 else if (mode == Pmode
11031 && CONSTANT_P (operands[1])
11032 && GET_CODE (operands[1]) != HIGH
11033 && ((REG_P (operands[0])
11034 && FP_REGNO_P (REGNO (operands[0])))
11035 || !CONST_INT_P (operands[1])
11036 || (num_insns_constant (operands[1], mode)
11037 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
11038 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
11039 && (TARGET_CMODEL == CMODEL_SMALL
11040 || can_create_pseudo_p ()
11041 || (REG_P (operands[0])
11042 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
11043 {
11044
11045 #if TARGET_MACHO
11046 /* Darwin uses a special PIC legitimizer. */
11047 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
11048 {
11049 operands[1] =
11050 rs6000_machopic_legitimize_pic_address (operands[1], mode,
11051 operands[0]);
11052 if (operands[0] != operands[1])
11053 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11054 return;
11055 }
11056 #endif
11057
11058 /* If we are to limit the number of things we put in the TOC and
11059 this is a symbol plus a constant we can add in one insn,
11060 just put the symbol in the TOC and add the constant. */
11061 if (GET_CODE (operands[1]) == CONST
11062 && TARGET_NO_SUM_IN_TOC
11063 && GET_CODE (XEXP (operands[1], 0)) == PLUS
11064 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
11065 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
11066 || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0)))
11067 && ! side_effects_p (operands[0]))
11068 {
11069 rtx sym =
11070 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
11071 rtx other = XEXP (XEXP (operands[1], 0), 1);
11072
11073 sym = force_reg (mode, sym);
11074 emit_insn (gen_add3_insn (operands[0], sym, other));
11075 return;
11076 }
11077
11078 operands[1] = force_const_mem (mode, operands[1]);
11079
11080 if (TARGET_TOC
11081 && SYMBOL_REF_P (XEXP (operands[1], 0))
11082 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
11083 {
11084 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
11085 operands[0]);
11086 operands[1] = gen_const_mem (mode, tocref);
11087 set_mem_alias_set (operands[1], get_TOC_alias_set ());
11088 }
11089 }
11090 break;
11091
11092 case E_TImode:
11093 if (!VECTOR_MEM_VSX_P (TImode))
11094 rs6000_eliminate_indexed_memrefs (operands);
11095 break;
11096
11097 case E_PTImode:
11098 rs6000_eliminate_indexed_memrefs (operands);
11099 break;
11100
11101 default:
11102 fatal_insn ("bad move", gen_rtx_SET (dest, source));
11103 }
11104
11105 /* Above, we may have called force_const_mem which may have returned
11106 an invalid address. If we can, fix this up; otherwise, reload will
11107 have to deal with it. */
11108 if (MEM_P (operands[1]))
11109 operands[1] = validize_mem (operands[1]);
11110
11111 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11112 }
11113 \f
11114
11115 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
11116 static void
11117 init_float128_ibm (machine_mode mode)
11118 {
11119 if (!TARGET_XL_COMPAT)
11120 {
11121 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
11122 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
11123 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
11124 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
11125
11126 if (!TARGET_HARD_FLOAT)
11127 {
11128 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
11129 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
11130 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
11131 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
11132 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
11133 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
11134 set_optab_libfunc (le_optab, mode, "__gcc_qle");
11135 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
11136
11137 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
11138 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
11139 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
11140 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
11141 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
11142 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
11143 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
11144 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
11145 }
11146 }
11147 else
11148 {
11149 set_optab_libfunc (add_optab, mode, "_xlqadd");
11150 set_optab_libfunc (sub_optab, mode, "_xlqsub");
11151 set_optab_libfunc (smul_optab, mode, "_xlqmul");
11152 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
11153 }
11154
11155 /* Add various conversions for IFmode to use the traditional TFmode
11156 names. */
11157 if (mode == IFmode)
11158 {
11159 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
11160 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
11161 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
11162 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
11163 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
11164 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
11165
11166 set_conv_libfunc (sfix_optab, DImode, mode, "__fixtfdi");
11167 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunstfdi");
11168
11169 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatditf");
11170 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatunditf");
11171
11172 if (TARGET_POWERPC64)
11173 {
11174 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
11175 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
11176 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
11177 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
11178 }
11179 }
11180 }
11181
11182 /* Set up IEEE 128-bit floating point routines. Use different names if the
11183 arguments can be passed in a vector register. The historical PowerPC
11184 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
11185 continue to use that if we aren't using vector registers to pass IEEE
11186 128-bit floating point. */
11187
11188 static void
11189 init_float128_ieee (machine_mode mode)
11190 {
11191 if (FLOAT128_VECTOR_P (mode))
11192 {
11193 set_optab_libfunc (add_optab, mode, "__addkf3");
11194 set_optab_libfunc (sub_optab, mode, "__subkf3");
11195 set_optab_libfunc (neg_optab, mode, "__negkf2");
11196 set_optab_libfunc (smul_optab, mode, "__mulkf3");
11197 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
11198 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
11199 set_optab_libfunc (abs_optab, mode, "__abskf2");
11200 set_optab_libfunc (powi_optab, mode, "__powikf2");
11201
11202 set_optab_libfunc (eq_optab, mode, "__eqkf2");
11203 set_optab_libfunc (ne_optab, mode, "__nekf2");
11204 set_optab_libfunc (gt_optab, mode, "__gtkf2");
11205 set_optab_libfunc (ge_optab, mode, "__gekf2");
11206 set_optab_libfunc (lt_optab, mode, "__ltkf2");
11207 set_optab_libfunc (le_optab, mode, "__lekf2");
11208 set_optab_libfunc (unord_optab, mode, "__unordkf2");
11209
11210 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
11211 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
11212 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
11213 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
11214
11215 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
11216 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
11217 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
11218
11219 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
11220 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
11221 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
11222
11223 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
11224 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
11225 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
11226 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
11227 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
11228 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
11229
11230 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
11231 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
11232 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
11233 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
11234
11235 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
11236 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
11237 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
11238 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
11239
11240 if (TARGET_POWERPC64)
11241 {
11242 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti_sw");
11243 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti_sw");
11244 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf_sw");
11245 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf_sw");
11246 }
11247 }
11248
11249 else
11250 {
11251 set_optab_libfunc (add_optab, mode, "_q_add");
11252 set_optab_libfunc (sub_optab, mode, "_q_sub");
11253 set_optab_libfunc (neg_optab, mode, "_q_neg");
11254 set_optab_libfunc (smul_optab, mode, "_q_mul");
11255 set_optab_libfunc (sdiv_optab, mode, "_q_div");
11256 if (TARGET_PPC_GPOPT)
11257 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
11258
11259 set_optab_libfunc (eq_optab, mode, "_q_feq");
11260 set_optab_libfunc (ne_optab, mode, "_q_fne");
11261 set_optab_libfunc (gt_optab, mode, "_q_fgt");
11262 set_optab_libfunc (ge_optab, mode, "_q_fge");
11263 set_optab_libfunc (lt_optab, mode, "_q_flt");
11264 set_optab_libfunc (le_optab, mode, "_q_fle");
11265
11266 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
11267 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
11268 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
11269 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
11270 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
11271 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
11272 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
11273 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
11274 }
11275 }
11276
11277 static void
11278 rs6000_init_libfuncs (void)
11279 {
11280 /* __float128 support. */
11281 if (TARGET_FLOAT128_TYPE)
11282 {
11283 init_float128_ibm (IFmode);
11284 init_float128_ieee (KFmode);
11285 }
11286
11287 /* AIX/Darwin/64-bit Linux quad floating point routines. */
11288 if (TARGET_LONG_DOUBLE_128)
11289 {
11290 if (!TARGET_IEEEQUAD)
11291 init_float128_ibm (TFmode);
11292
11293 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
11294 else
11295 init_float128_ieee (TFmode);
11296 }
11297 }
11298
11299 /* Emit a potentially record-form instruction, setting DST from SRC.
11300 If DOT is 0, that is all; otherwise, set CCREG to the result of the
11301 signed comparison of DST with zero. If DOT is 1, the generated RTL
11302 doesn't care about the DST result; if DOT is 2, it does. If CCREG
11303 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
11304 a separate COMPARE. */
11305
11306 void
11307 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
11308 {
11309 if (dot == 0)
11310 {
11311 emit_move_insn (dst, src);
11312 return;
11313 }
11314
11315 if (cc_reg_not_cr0_operand (ccreg, CCmode))
11316 {
11317 emit_move_insn (dst, src);
11318 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
11319 return;
11320 }
11321
11322 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
11323 if (dot == 1)
11324 {
11325 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
11326 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
11327 }
11328 else
11329 {
11330 rtx set = gen_rtx_SET (dst, src);
11331 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
11332 }
11333 }
11334
11335 \f
11336 /* A validation routine: say whether CODE, a condition code, and MODE
11337 match. The other alternatives either don't make sense or should
11338 never be generated. */
11339
11340 void
11341 validate_condition_mode (enum rtx_code code, machine_mode mode)
11342 {
11343 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
11344 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
11345 && GET_MODE_CLASS (mode) == MODE_CC);
11346
11347 /* These don't make sense. */
11348 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
11349 || mode != CCUNSmode);
11350
11351 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
11352 || mode == CCUNSmode);
11353
11354 gcc_assert (mode == CCFPmode
11355 || (code != ORDERED && code != UNORDERED
11356 && code != UNEQ && code != LTGT
11357 && code != UNGT && code != UNLT
11358 && code != UNGE && code != UNLE));
11359
11360 /* These are invalid; the information is not there. */
11361 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
11362 }
11363
11364 \f
11365 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
11366 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
11367 not zero, store there the bit offset (counted from the right) where
11368 the single stretch of 1 bits begins; and similarly for B, the bit
11369 offset where it ends. */
11370
11371 bool
11372 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
11373 {
11374 unsigned HOST_WIDE_INT val = INTVAL (mask);
11375 unsigned HOST_WIDE_INT bit;
11376 int nb, ne;
11377 int n = GET_MODE_PRECISION (mode);
11378
11379 if (mode != DImode && mode != SImode)
11380 return false;
11381
11382 if (INTVAL (mask) >= 0)
11383 {
11384 bit = val & -val;
11385 ne = exact_log2 (bit);
11386 nb = exact_log2 (val + bit);
11387 }
11388 else if (val + 1 == 0)
11389 {
11390 nb = n;
11391 ne = 0;
11392 }
11393 else if (val & 1)
11394 {
11395 val = ~val;
11396 bit = val & -val;
11397 nb = exact_log2 (bit);
11398 ne = exact_log2 (val + bit);
11399 }
11400 else
11401 {
11402 bit = val & -val;
11403 ne = exact_log2 (bit);
11404 if (val + bit == 0)
11405 nb = n;
11406 else
11407 nb = 0;
11408 }
11409
11410 nb--;
11411
11412 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
11413 return false;
11414
11415 if (b)
11416 *b = nb;
11417 if (e)
11418 *e = ne;
11419
11420 return true;
11421 }
11422
11423 bool
11424 rs6000_is_valid_rotate_dot_mask (rtx mask, machine_mode mode)
11425 {
11426 int nb, ne;
11427 if (rs6000_is_valid_mask (mask, &nb, &ne, mode) && nb >= ne && ne > 0)
11428 {
11429 if (TARGET_64BIT)
11430 return true;
11431 /* *rotldi3_mask_dot requires for -m32 -mpowerpc64 that the mask is
11432 <= 0x7fffffff. */
11433 return (UINTVAL (mask) << (63 - nb)) <= 0x7fffffff;
11434 }
11435
11436 return false;
11437 }
11438
11439 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
11440 or rldicr instruction, to implement an AND with it in mode MODE. */
11441
11442 bool
11443 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
11444 {
11445 int nb, ne;
11446
11447 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11448 return false;
11449
11450 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
11451 does not wrap. */
11452 if (mode == DImode)
11453 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
11454
11455 /* For SImode, rlwinm can do everything. */
11456 if (mode == SImode)
11457 return (nb < 32 && ne < 32);
11458
11459 return false;
11460 }
11461
11462 /* Return the instruction template for an AND with mask in mode MODE, with
11463 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11464
11465 const char *
11466 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
11467 {
11468 int nb, ne;
11469
11470 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
11471 gcc_unreachable ();
11472
11473 if (mode == DImode && ne == 0)
11474 {
11475 operands[3] = GEN_INT (63 - nb);
11476 if (dot)
11477 return "rldicl. %0,%1,0,%3";
11478 return "rldicl %0,%1,0,%3";
11479 }
11480
11481 if (mode == DImode && nb == 63)
11482 {
11483 operands[3] = GEN_INT (63 - ne);
11484 if (dot)
11485 return "rldicr. %0,%1,0,%3";
11486 return "rldicr %0,%1,0,%3";
11487 }
11488
11489 if (nb < 32 && ne < 32)
11490 {
11491 operands[3] = GEN_INT (31 - nb);
11492 operands[4] = GEN_INT (31 - ne);
11493 if (dot)
11494 return "rlwinm. %0,%1,0,%3,%4";
11495 return "rlwinm %0,%1,0,%3,%4";
11496 }
11497
11498 gcc_unreachable ();
11499 }
11500
11501 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
11502 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
11503 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
11504
11505 bool
11506 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
11507 {
11508 int nb, ne;
11509
11510 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11511 return false;
11512
11513 int n = GET_MODE_PRECISION (mode);
11514 int sh = -1;
11515
11516 if (CONST_INT_P (XEXP (shift, 1)))
11517 {
11518 sh = INTVAL (XEXP (shift, 1));
11519 if (sh < 0 || sh >= n)
11520 return false;
11521 }
11522
11523 rtx_code code = GET_CODE (shift);
11524
11525 /* Convert any shift by 0 to a rotate, to simplify below code. */
11526 if (sh == 0)
11527 code = ROTATE;
11528
11529 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11530 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
11531 code = ASHIFT;
11532 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
11533 {
11534 code = LSHIFTRT;
11535 sh = n - sh;
11536 }
11537
11538 /* DImode rotates need rld*. */
11539 if (mode == DImode && code == ROTATE)
11540 return (nb == 63 || ne == 0 || ne == sh);
11541
11542 /* SImode rotates need rlw*. */
11543 if (mode == SImode && code == ROTATE)
11544 return (nb < 32 && ne < 32 && sh < 32);
11545
11546 /* Wrap-around masks are only okay for rotates. */
11547 if (ne > nb)
11548 return false;
11549
11550 /* Variable shifts are only okay for rotates. */
11551 if (sh < 0)
11552 return false;
11553
11554 /* Don't allow ASHIFT if the mask is wrong for that. */
11555 if (code == ASHIFT && ne < sh)
11556 return false;
11557
11558 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
11559 if the mask is wrong for that. */
11560 if (nb < 32 && ne < 32 && sh < 32
11561 && !(code == LSHIFTRT && nb >= 32 - sh))
11562 return true;
11563
11564 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
11565 if the mask is wrong for that. */
11566 if (code == LSHIFTRT)
11567 sh = 64 - sh;
11568 if (nb == 63 || ne == 0 || ne == sh)
11569 return !(code == LSHIFTRT && nb >= sh);
11570
11571 return false;
11572 }
11573
11574 /* Return the instruction template for a shift with mask in mode MODE, with
11575 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11576
11577 const char *
11578 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
11579 {
11580 int nb, ne;
11581
11582 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
11583 gcc_unreachable ();
11584
11585 if (mode == DImode && ne == 0)
11586 {
11587 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11588 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
11589 operands[3] = GEN_INT (63 - nb);
11590 if (dot)
11591 return "rld%I2cl. %0,%1,%2,%3";
11592 return "rld%I2cl %0,%1,%2,%3";
11593 }
11594
11595 if (mode == DImode && nb == 63)
11596 {
11597 operands[3] = GEN_INT (63 - ne);
11598 if (dot)
11599 return "rld%I2cr. %0,%1,%2,%3";
11600 return "rld%I2cr %0,%1,%2,%3";
11601 }
11602
11603 if (mode == DImode
11604 && GET_CODE (operands[4]) != LSHIFTRT
11605 && CONST_INT_P (operands[2])
11606 && ne == INTVAL (operands[2]))
11607 {
11608 operands[3] = GEN_INT (63 - nb);
11609 if (dot)
11610 return "rld%I2c. %0,%1,%2,%3";
11611 return "rld%I2c %0,%1,%2,%3";
11612 }
11613
11614 if (nb < 32 && ne < 32)
11615 {
11616 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11617 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
11618 operands[3] = GEN_INT (31 - nb);
11619 operands[4] = GEN_INT (31 - ne);
11620 /* This insn can also be a 64-bit rotate with mask that really makes
11621 it just a shift right (with mask); the %h below are to adjust for
11622 that situation (shift count is >= 32 in that case). */
11623 if (dot)
11624 return "rlw%I2nm. %0,%1,%h2,%3,%4";
11625 return "rlw%I2nm %0,%1,%h2,%3,%4";
11626 }
11627
11628 gcc_unreachable ();
11629 }
11630
11631 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
11632 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
11633 ASHIFT, or LSHIFTRT) in mode MODE. */
11634
11635 bool
11636 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
11637 {
11638 int nb, ne;
11639
11640 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11641 return false;
11642
11643 int n = GET_MODE_PRECISION (mode);
11644
11645 int sh = INTVAL (XEXP (shift, 1));
11646 if (sh < 0 || sh >= n)
11647 return false;
11648
11649 rtx_code code = GET_CODE (shift);
11650
11651 /* Convert any shift by 0 to a rotate, to simplify below code. */
11652 if (sh == 0)
11653 code = ROTATE;
11654
11655 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11656 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
11657 code = ASHIFT;
11658 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
11659 {
11660 code = LSHIFTRT;
11661 sh = n - sh;
11662 }
11663
11664 /* DImode rotates need rldimi. */
11665 if (mode == DImode && code == ROTATE)
11666 return (ne == sh);
11667
11668 /* SImode rotates need rlwimi. */
11669 if (mode == SImode && code == ROTATE)
11670 return (nb < 32 && ne < 32 && sh < 32);
11671
11672 /* Wrap-around masks are only okay for rotates. */
11673 if (ne > nb)
11674 return false;
11675
11676 /* Don't allow ASHIFT if the mask is wrong for that. */
11677 if (code == ASHIFT && ne < sh)
11678 return false;
11679
11680 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
11681 if the mask is wrong for that. */
11682 if (nb < 32 && ne < 32 && sh < 32
11683 && !(code == LSHIFTRT && nb >= 32 - sh))
11684 return true;
11685
11686 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
11687 if the mask is wrong for that. */
11688 if (code == LSHIFTRT)
11689 sh = 64 - sh;
11690 if (ne == sh)
11691 return !(code == LSHIFTRT && nb >= sh);
11692
11693 return false;
11694 }
11695
11696 /* Return the instruction template for an insert with mask in mode MODE, with
11697 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11698
11699 const char *
11700 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
11701 {
11702 int nb, ne;
11703
11704 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
11705 gcc_unreachable ();
11706
11707 /* Prefer rldimi because rlwimi is cracked. */
11708 if (TARGET_POWERPC64
11709 && (!dot || mode == DImode)
11710 && GET_CODE (operands[4]) != LSHIFTRT
11711 && ne == INTVAL (operands[2]))
11712 {
11713 operands[3] = GEN_INT (63 - nb);
11714 if (dot)
11715 return "rldimi. %0,%1,%2,%3";
11716 return "rldimi %0,%1,%2,%3";
11717 }
11718
11719 if (nb < 32 && ne < 32)
11720 {
11721 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11722 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
11723 operands[3] = GEN_INT (31 - nb);
11724 operands[4] = GEN_INT (31 - ne);
11725 if (dot)
11726 return "rlwimi. %0,%1,%2,%3,%4";
11727 return "rlwimi %0,%1,%2,%3,%4";
11728 }
11729
11730 gcc_unreachable ();
11731 }
11732
11733 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
11734 using two machine instructions. */
11735
11736 bool
11737 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
11738 {
11739 /* There are two kinds of AND we can handle with two insns:
11740 1) those we can do with two rl* insn;
11741 2) ori[s];xori[s].
11742
11743 We do not handle that last case yet. */
11744
11745 /* If there is just one stretch of ones, we can do it. */
11746 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
11747 return true;
11748
11749 /* Otherwise, fill in the lowest "hole"; if we can do the result with
11750 one insn, we can do the whole thing with two. */
11751 unsigned HOST_WIDE_INT val = INTVAL (c);
11752 unsigned HOST_WIDE_INT bit1 = val & -val;
11753 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
11754 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
11755 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
11756 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
11757 }
11758
11759 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
11760 If EXPAND is true, split rotate-and-mask instructions we generate to
11761 their constituent parts as well (this is used during expand); if DOT
11762 is 1, make the last insn a record-form instruction clobbering the
11763 destination GPR and setting the CC reg (from operands[3]); if 2, set
11764 that GPR as well as the CC reg. */
11765
11766 void
11767 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
11768 {
11769 gcc_assert (!(expand && dot));
11770
11771 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
11772
11773 /* If it is one stretch of ones, it is DImode; shift left, mask, then
11774 shift right. This generates better code than doing the masks without
11775 shifts, or shifting first right and then left. */
11776 int nb, ne;
11777 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
11778 {
11779 gcc_assert (mode == DImode);
11780
11781 int shift = 63 - nb;
11782 if (expand)
11783 {
11784 rtx tmp1 = gen_reg_rtx (DImode);
11785 rtx tmp2 = gen_reg_rtx (DImode);
11786 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
11787 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
11788 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
11789 }
11790 else
11791 {
11792 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
11793 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
11794 emit_move_insn (operands[0], tmp);
11795 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
11796 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11797 }
11798 return;
11799 }
11800
11801 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
11802 that does the rest. */
11803 unsigned HOST_WIDE_INT bit1 = val & -val;
11804 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
11805 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
11806 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
11807
11808 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
11809 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
11810
11811 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
11812
11813 /* Two "no-rotate"-and-mask instructions, for SImode. */
11814 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
11815 {
11816 gcc_assert (mode == SImode);
11817
11818 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
11819 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
11820 emit_move_insn (reg, tmp);
11821 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
11822 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11823 return;
11824 }
11825
11826 gcc_assert (mode == DImode);
11827
11828 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
11829 insns; we have to do the first in SImode, because it wraps. */
11830 if (mask2 <= 0xffffffff
11831 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
11832 {
11833 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
11834 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
11835 GEN_INT (mask1));
11836 rtx reg_low = gen_lowpart (SImode, reg);
11837 emit_move_insn (reg_low, tmp);
11838 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
11839 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11840 return;
11841 }
11842
11843 /* Two rld* insns: rotate, clear the hole in the middle (which now is
11844 at the top end), rotate back and clear the other hole. */
11845 int right = exact_log2 (bit3);
11846 int left = 64 - right;
11847
11848 /* Rotate the mask too. */
11849 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
11850
11851 if (expand)
11852 {
11853 rtx tmp1 = gen_reg_rtx (DImode);
11854 rtx tmp2 = gen_reg_rtx (DImode);
11855 rtx tmp3 = gen_reg_rtx (DImode);
11856 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
11857 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
11858 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
11859 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
11860 }
11861 else
11862 {
11863 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
11864 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
11865 emit_move_insn (operands[0], tmp);
11866 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
11867 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
11868 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11869 }
11870 }
11871 \f
11872 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
11873 for lfq and stfq insns iff the registers are hard registers. */
11874
11875 int
11876 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
11877 {
11878 /* We might have been passed a SUBREG. */
11879 if (!REG_P (reg1) || !REG_P (reg2))
11880 return 0;
11881
11882 /* We might have been passed non floating point registers. */
11883 if (!FP_REGNO_P (REGNO (reg1))
11884 || !FP_REGNO_P (REGNO (reg2)))
11885 return 0;
11886
11887 return (REGNO (reg1) == REGNO (reg2) - 1);
11888 }
11889
11890 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
11891 addr1 and addr2 must be in consecutive memory locations
11892 (addr2 == addr1 + 8). */
11893
11894 int
11895 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
11896 {
11897 rtx addr1, addr2;
11898 unsigned int reg1, reg2;
11899 int offset1, offset2;
11900
11901 /* The mems cannot be volatile. */
11902 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
11903 return 0;
11904
11905 addr1 = XEXP (mem1, 0);
11906 addr2 = XEXP (mem2, 0);
11907
11908 /* Extract an offset (if used) from the first addr. */
11909 if (GET_CODE (addr1) == PLUS)
11910 {
11911 /* If not a REG, return zero. */
11912 if (!REG_P (XEXP (addr1, 0)))
11913 return 0;
11914 else
11915 {
11916 reg1 = REGNO (XEXP (addr1, 0));
11917 /* The offset must be constant! */
11918 if (!CONST_INT_P (XEXP (addr1, 1)))
11919 return 0;
11920 offset1 = INTVAL (XEXP (addr1, 1));
11921 }
11922 }
11923 else if (!REG_P (addr1))
11924 return 0;
11925 else
11926 {
11927 reg1 = REGNO (addr1);
11928 /* This was a simple (mem (reg)) expression. Offset is 0. */
11929 offset1 = 0;
11930 }
11931
11932 /* And now for the second addr. */
11933 if (GET_CODE (addr2) == PLUS)
11934 {
11935 /* If not a REG, return zero. */
11936 if (!REG_P (XEXP (addr2, 0)))
11937 return 0;
11938 else
11939 {
11940 reg2 = REGNO (XEXP (addr2, 0));
11941 /* The offset must be constant. */
11942 if (!CONST_INT_P (XEXP (addr2, 1)))
11943 return 0;
11944 offset2 = INTVAL (XEXP (addr2, 1));
11945 }
11946 }
11947 else if (!REG_P (addr2))
11948 return 0;
11949 else
11950 {
11951 reg2 = REGNO (addr2);
11952 /* This was a simple (mem (reg)) expression. Offset is 0. */
11953 offset2 = 0;
11954 }
11955
11956 /* Both of these must have the same base register. */
11957 if (reg1 != reg2)
11958 return 0;
11959
11960 /* The offset for the second addr must be 8 more than the first addr. */
11961 if (offset2 != offset1 + 8)
11962 return 0;
11963
11964 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
11965 instructions. */
11966 return 1;
11967 }
11968 \f
11969 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
11970 need to use DDmode, in all other cases we can use the same mode. */
11971 static machine_mode
11972 rs6000_secondary_memory_needed_mode (machine_mode mode)
11973 {
11974 if (lra_in_progress && mode == SDmode)
11975 return DDmode;
11976 return mode;
11977 }
11978
11979 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
11980 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
11981 only work on the traditional altivec registers, note if an altivec register
11982 was chosen. */
11983
11984 static enum rs6000_reg_type
11985 register_to_reg_type (rtx reg, bool *is_altivec)
11986 {
11987 HOST_WIDE_INT regno;
11988 enum reg_class rclass;
11989
11990 if (SUBREG_P (reg))
11991 reg = SUBREG_REG (reg);
11992
11993 if (!REG_P (reg))
11994 return NO_REG_TYPE;
11995
11996 regno = REGNO (reg);
11997 if (!HARD_REGISTER_NUM_P (regno))
11998 {
11999 if (!lra_in_progress && !reload_completed)
12000 return PSEUDO_REG_TYPE;
12001
12002 regno = true_regnum (reg);
12003 if (regno < 0 || !HARD_REGISTER_NUM_P (regno))
12004 return PSEUDO_REG_TYPE;
12005 }
12006
12007 gcc_assert (regno >= 0);
12008
12009 if (is_altivec && ALTIVEC_REGNO_P (regno))
12010 *is_altivec = true;
12011
12012 rclass = rs6000_regno_regclass[regno];
12013 return reg_class_to_reg_type[(int)rclass];
12014 }
12015
12016 /* Helper function to return the cost of adding a TOC entry address. */
12017
12018 static inline int
12019 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
12020 {
12021 int ret;
12022
12023 if (TARGET_CMODEL != CMODEL_SMALL)
12024 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
12025
12026 else
12027 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
12028
12029 return ret;
12030 }
12031
12032 /* Helper function for rs6000_secondary_reload to determine whether the memory
12033 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
12034 needs reloading. Return negative if the memory is not handled by the memory
12035 helper functions and to try a different reload method, 0 if no additional
12036 instructions are need, and positive to give the extra cost for the
12037 memory. */
12038
12039 static int
12040 rs6000_secondary_reload_memory (rtx addr,
12041 enum reg_class rclass,
12042 machine_mode mode)
12043 {
12044 int extra_cost = 0;
12045 rtx reg, and_arg, plus_arg0, plus_arg1;
12046 addr_mask_type addr_mask;
12047 const char *type = NULL;
12048 const char *fail_msg = NULL;
12049
12050 if (GPR_REG_CLASS_P (rclass))
12051 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
12052
12053 else if (rclass == FLOAT_REGS)
12054 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
12055
12056 else if (rclass == ALTIVEC_REGS)
12057 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
12058
12059 /* For the combined VSX_REGS, turn off Altivec AND -16. */
12060 else if (rclass == VSX_REGS)
12061 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
12062 & ~RELOAD_REG_AND_M16);
12063
12064 /* If the register allocator hasn't made up its mind yet on the register
12065 class to use, settle on defaults to use. */
12066 else if (rclass == NO_REGS)
12067 {
12068 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
12069 & ~RELOAD_REG_AND_M16);
12070
12071 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
12072 addr_mask &= ~(RELOAD_REG_INDEXED
12073 | RELOAD_REG_PRE_INCDEC
12074 | RELOAD_REG_PRE_MODIFY);
12075 }
12076
12077 else
12078 addr_mask = 0;
12079
12080 /* If the register isn't valid in this register class, just return now. */
12081 if ((addr_mask & RELOAD_REG_VALID) == 0)
12082 {
12083 if (TARGET_DEBUG_ADDR)
12084 {
12085 fprintf (stderr,
12086 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12087 "not valid in class\n",
12088 GET_MODE_NAME (mode), reg_class_names[rclass]);
12089 debug_rtx (addr);
12090 }
12091
12092 return -1;
12093 }
12094
12095 switch (GET_CODE (addr))
12096 {
12097 /* Does the register class supports auto update forms for this mode? We
12098 don't need a scratch register, since the powerpc only supports
12099 PRE_INC, PRE_DEC, and PRE_MODIFY. */
12100 case PRE_INC:
12101 case PRE_DEC:
12102 reg = XEXP (addr, 0);
12103 if (!base_reg_operand (addr, GET_MODE (reg)))
12104 {
12105 fail_msg = "no base register #1";
12106 extra_cost = -1;
12107 }
12108
12109 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
12110 {
12111 extra_cost = 1;
12112 type = "update";
12113 }
12114 break;
12115
12116 case PRE_MODIFY:
12117 reg = XEXP (addr, 0);
12118 plus_arg1 = XEXP (addr, 1);
12119 if (!base_reg_operand (reg, GET_MODE (reg))
12120 || GET_CODE (plus_arg1) != PLUS
12121 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
12122 {
12123 fail_msg = "bad PRE_MODIFY";
12124 extra_cost = -1;
12125 }
12126
12127 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
12128 {
12129 extra_cost = 1;
12130 type = "update";
12131 }
12132 break;
12133
12134 /* Do we need to simulate AND -16 to clear the bottom address bits used
12135 in VMX load/stores? Only allow the AND for vector sizes. */
12136 case AND:
12137 and_arg = XEXP (addr, 0);
12138 if (GET_MODE_SIZE (mode) != 16
12139 || !CONST_INT_P (XEXP (addr, 1))
12140 || INTVAL (XEXP (addr, 1)) != -16)
12141 {
12142 fail_msg = "bad Altivec AND #1";
12143 extra_cost = -1;
12144 }
12145
12146 if (rclass != ALTIVEC_REGS)
12147 {
12148 if (legitimate_indirect_address_p (and_arg, false))
12149 extra_cost = 1;
12150
12151 else if (legitimate_indexed_address_p (and_arg, false))
12152 extra_cost = 2;
12153
12154 else
12155 {
12156 fail_msg = "bad Altivec AND #2";
12157 extra_cost = -1;
12158 }
12159
12160 type = "and";
12161 }
12162 break;
12163
12164 /* If this is an indirect address, make sure it is a base register. */
12165 case REG:
12166 case SUBREG:
12167 if (!legitimate_indirect_address_p (addr, false))
12168 {
12169 extra_cost = 1;
12170 type = "move";
12171 }
12172 break;
12173
12174 /* If this is an indexed address, make sure the register class can handle
12175 indexed addresses for this mode. */
12176 case PLUS:
12177 plus_arg0 = XEXP (addr, 0);
12178 plus_arg1 = XEXP (addr, 1);
12179
12180 /* (plus (plus (reg) (constant)) (constant)) is generated during
12181 push_reload processing, so handle it now. */
12182 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
12183 {
12184 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12185 {
12186 extra_cost = 1;
12187 type = "offset";
12188 }
12189 }
12190
12191 /* (plus (plus (reg) (constant)) (reg)) is also generated during
12192 push_reload processing, so handle it now. */
12193 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
12194 {
12195 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12196 {
12197 extra_cost = 1;
12198 type = "indexed #2";
12199 }
12200 }
12201
12202 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
12203 {
12204 fail_msg = "no base register #2";
12205 extra_cost = -1;
12206 }
12207
12208 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
12209 {
12210 if ((addr_mask & RELOAD_REG_INDEXED) == 0
12211 || !legitimate_indexed_address_p (addr, false))
12212 {
12213 extra_cost = 1;
12214 type = "indexed";
12215 }
12216 }
12217
12218 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
12219 && CONST_INT_P (plus_arg1))
12220 {
12221 if (!quad_address_offset_p (INTVAL (plus_arg1)))
12222 {
12223 extra_cost = 1;
12224 type = "vector d-form offset";
12225 }
12226 }
12227
12228 /* Make sure the register class can handle offset addresses. */
12229 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
12230 {
12231 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12232 {
12233 extra_cost = 1;
12234 type = "offset #2";
12235 }
12236 }
12237
12238 else
12239 {
12240 fail_msg = "bad PLUS";
12241 extra_cost = -1;
12242 }
12243
12244 break;
12245
12246 case LO_SUM:
12247 /* Quad offsets are restricted and can't handle normal addresses. */
12248 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12249 {
12250 extra_cost = -1;
12251 type = "vector d-form lo_sum";
12252 }
12253
12254 else if (!legitimate_lo_sum_address_p (mode, addr, false))
12255 {
12256 fail_msg = "bad LO_SUM";
12257 extra_cost = -1;
12258 }
12259
12260 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12261 {
12262 extra_cost = 1;
12263 type = "lo_sum";
12264 }
12265 break;
12266
12267 /* Static addresses need to create a TOC entry. */
12268 case CONST:
12269 case SYMBOL_REF:
12270 case LABEL_REF:
12271 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12272 {
12273 extra_cost = -1;
12274 type = "vector d-form lo_sum #2";
12275 }
12276
12277 else
12278 {
12279 type = "address";
12280 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
12281 }
12282 break;
12283
12284 /* TOC references look like offsetable memory. */
12285 case UNSPEC:
12286 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
12287 {
12288 fail_msg = "bad UNSPEC";
12289 extra_cost = -1;
12290 }
12291
12292 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12293 {
12294 extra_cost = -1;
12295 type = "vector d-form lo_sum #3";
12296 }
12297
12298 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12299 {
12300 extra_cost = 1;
12301 type = "toc reference";
12302 }
12303 break;
12304
12305 default:
12306 {
12307 fail_msg = "bad address";
12308 extra_cost = -1;
12309 }
12310 }
12311
12312 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
12313 {
12314 if (extra_cost < 0)
12315 fprintf (stderr,
12316 "rs6000_secondary_reload_memory error: mode = %s, "
12317 "class = %s, addr_mask = '%s', %s\n",
12318 GET_MODE_NAME (mode),
12319 reg_class_names[rclass],
12320 rs6000_debug_addr_mask (addr_mask, false),
12321 (fail_msg != NULL) ? fail_msg : "<bad address>");
12322
12323 else
12324 fprintf (stderr,
12325 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12326 "addr_mask = '%s', extra cost = %d, %s\n",
12327 GET_MODE_NAME (mode),
12328 reg_class_names[rclass],
12329 rs6000_debug_addr_mask (addr_mask, false),
12330 extra_cost,
12331 (type) ? type : "<none>");
12332
12333 debug_rtx (addr);
12334 }
12335
12336 return extra_cost;
12337 }
12338
12339 /* Helper function for rs6000_secondary_reload to return true if a move to a
12340 different register classe is really a simple move. */
12341
12342 static bool
12343 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
12344 enum rs6000_reg_type from_type,
12345 machine_mode mode)
12346 {
12347 int size = GET_MODE_SIZE (mode);
12348
12349 /* Add support for various direct moves available. In this function, we only
12350 look at cases where we don't need any extra registers, and one or more
12351 simple move insns are issued. Originally small integers are not allowed
12352 in FPR/VSX registers. Single precision binary floating is not a simple
12353 move because we need to convert to the single precision memory layout.
12354 The 4-byte SDmode can be moved. TDmode values are disallowed since they
12355 need special direct move handling, which we do not support yet. */
12356 if (TARGET_DIRECT_MOVE
12357 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12358 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
12359 {
12360 if (TARGET_POWERPC64)
12361 {
12362 /* ISA 2.07: MTVSRD or MVFVSRD. */
12363 if (size == 8)
12364 return true;
12365
12366 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
12367 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
12368 return true;
12369 }
12370
12371 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12372 if (TARGET_P8_VECTOR)
12373 {
12374 if (mode == SImode)
12375 return true;
12376
12377 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
12378 return true;
12379 }
12380
12381 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12382 if (mode == SDmode)
12383 return true;
12384 }
12385
12386 /* Move to/from SPR. */
12387 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
12388 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
12389 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
12390 return true;
12391
12392 return false;
12393 }
12394
12395 /* Direct move helper function for rs6000_secondary_reload, handle all of the
12396 special direct moves that involve allocating an extra register, return the
12397 insn code of the helper function if there is such a function or
12398 CODE_FOR_nothing if not. */
12399
12400 static bool
12401 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
12402 enum rs6000_reg_type from_type,
12403 machine_mode mode,
12404 secondary_reload_info *sri,
12405 bool altivec_p)
12406 {
12407 bool ret = false;
12408 enum insn_code icode = CODE_FOR_nothing;
12409 int cost = 0;
12410 int size = GET_MODE_SIZE (mode);
12411
12412 if (TARGET_POWERPC64 && size == 16)
12413 {
12414 /* Handle moving 128-bit values from GPRs to VSX point registers on
12415 ISA 2.07 (power8, power9) when running in 64-bit mode using
12416 XXPERMDI to glue the two 64-bit values back together. */
12417 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
12418 {
12419 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
12420 icode = reg_addr[mode].reload_vsx_gpr;
12421 }
12422
12423 /* Handle moving 128-bit values from VSX point registers to GPRs on
12424 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
12425 bottom 64-bit value. */
12426 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12427 {
12428 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
12429 icode = reg_addr[mode].reload_gpr_vsx;
12430 }
12431 }
12432
12433 else if (TARGET_POWERPC64 && mode == SFmode)
12434 {
12435 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12436 {
12437 cost = 3; /* xscvdpspn, mfvsrd, and. */
12438 icode = reg_addr[mode].reload_gpr_vsx;
12439 }
12440
12441 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
12442 {
12443 cost = 2; /* mtvsrz, xscvspdpn. */
12444 icode = reg_addr[mode].reload_vsx_gpr;
12445 }
12446 }
12447
12448 else if (!TARGET_POWERPC64 && size == 8)
12449 {
12450 /* Handle moving 64-bit values from GPRs to floating point registers on
12451 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
12452 32-bit values back together. Altivec register classes must be handled
12453 specially since a different instruction is used, and the secondary
12454 reload support requires a single instruction class in the scratch
12455 register constraint. However, right now TFmode is not allowed in
12456 Altivec registers, so the pattern will never match. */
12457 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
12458 {
12459 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
12460 icode = reg_addr[mode].reload_fpr_gpr;
12461 }
12462 }
12463
12464 if (icode != CODE_FOR_nothing)
12465 {
12466 ret = true;
12467 if (sri)
12468 {
12469 sri->icode = icode;
12470 sri->extra_cost = cost;
12471 }
12472 }
12473
12474 return ret;
12475 }
12476
12477 /* Return whether a move between two register classes can be done either
12478 directly (simple move) or via a pattern that uses a single extra temporary
12479 (using ISA 2.07's direct move in this case. */
12480
12481 static bool
12482 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
12483 enum rs6000_reg_type from_type,
12484 machine_mode mode,
12485 secondary_reload_info *sri,
12486 bool altivec_p)
12487 {
12488 /* Fall back to load/store reloads if either type is not a register. */
12489 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
12490 return false;
12491
12492 /* If we haven't allocated registers yet, assume the move can be done for the
12493 standard register types. */
12494 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
12495 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
12496 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
12497 return true;
12498
12499 /* Moves to the same set of registers is a simple move for non-specialized
12500 registers. */
12501 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
12502 return true;
12503
12504 /* Check whether a simple move can be done directly. */
12505 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
12506 {
12507 if (sri)
12508 {
12509 sri->icode = CODE_FOR_nothing;
12510 sri->extra_cost = 0;
12511 }
12512 return true;
12513 }
12514
12515 /* Now check if we can do it in a few steps. */
12516 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
12517 altivec_p);
12518 }
12519
12520 /* Inform reload about cases where moving X with a mode MODE to a register in
12521 RCLASS requires an extra scratch or immediate register. Return the class
12522 needed for the immediate register.
12523
12524 For VSX and Altivec, we may need a register to convert sp+offset into
12525 reg+sp.
12526
12527 For misaligned 64-bit gpr loads and stores we need a register to
12528 convert an offset address to indirect. */
12529
12530 static reg_class_t
12531 rs6000_secondary_reload (bool in_p,
12532 rtx x,
12533 reg_class_t rclass_i,
12534 machine_mode mode,
12535 secondary_reload_info *sri)
12536 {
12537 enum reg_class rclass = (enum reg_class) rclass_i;
12538 reg_class_t ret = ALL_REGS;
12539 enum insn_code icode;
12540 bool default_p = false;
12541 bool done_p = false;
12542
12543 /* Allow subreg of memory before/during reload. */
12544 bool memory_p = (MEM_P (x)
12545 || (!reload_completed && SUBREG_P (x)
12546 && MEM_P (SUBREG_REG (x))));
12547
12548 sri->icode = CODE_FOR_nothing;
12549 sri->t_icode = CODE_FOR_nothing;
12550 sri->extra_cost = 0;
12551 icode = ((in_p)
12552 ? reg_addr[mode].reload_load
12553 : reg_addr[mode].reload_store);
12554
12555 if (REG_P (x) || register_operand (x, mode))
12556 {
12557 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
12558 bool altivec_p = (rclass == ALTIVEC_REGS);
12559 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
12560
12561 if (!in_p)
12562 std::swap (to_type, from_type);
12563
12564 /* Can we do a direct move of some sort? */
12565 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
12566 altivec_p))
12567 {
12568 icode = (enum insn_code)sri->icode;
12569 default_p = false;
12570 done_p = true;
12571 ret = NO_REGS;
12572 }
12573 }
12574
12575 /* Make sure 0.0 is not reloaded or forced into memory. */
12576 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
12577 {
12578 ret = NO_REGS;
12579 default_p = false;
12580 done_p = true;
12581 }
12582
12583 /* If this is a scalar floating point value and we want to load it into the
12584 traditional Altivec registers, do it via a move via a traditional floating
12585 point register, unless we have D-form addressing. Also make sure that
12586 non-zero constants use a FPR. */
12587 if (!done_p && reg_addr[mode].scalar_in_vmx_p
12588 && !mode_supports_vmx_dform (mode)
12589 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
12590 && (memory_p || CONST_DOUBLE_P (x)))
12591 {
12592 ret = FLOAT_REGS;
12593 default_p = false;
12594 done_p = true;
12595 }
12596
12597 /* Handle reload of load/stores if we have reload helper functions. */
12598 if (!done_p && icode != CODE_FOR_nothing && memory_p)
12599 {
12600 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
12601 mode);
12602
12603 if (extra_cost >= 0)
12604 {
12605 done_p = true;
12606 ret = NO_REGS;
12607 if (extra_cost > 0)
12608 {
12609 sri->extra_cost = extra_cost;
12610 sri->icode = icode;
12611 }
12612 }
12613 }
12614
12615 /* Handle unaligned loads and stores of integer registers. */
12616 if (!done_p && TARGET_POWERPC64
12617 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
12618 && memory_p
12619 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
12620 {
12621 rtx addr = XEXP (x, 0);
12622 rtx off = address_offset (addr);
12623
12624 if (off != NULL_RTX)
12625 {
12626 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
12627 unsigned HOST_WIDE_INT offset = INTVAL (off);
12628
12629 /* We need a secondary reload when our legitimate_address_p
12630 says the address is good (as otherwise the entire address
12631 will be reloaded), and the offset is not a multiple of
12632 four or we have an address wrap. Address wrap will only
12633 occur for LO_SUMs since legitimate_offset_address_p
12634 rejects addresses for 16-byte mems that will wrap. */
12635 if (GET_CODE (addr) == LO_SUM
12636 ? (1 /* legitimate_address_p allows any offset for lo_sum */
12637 && ((offset & 3) != 0
12638 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
12639 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
12640 && (offset & 3) != 0))
12641 {
12642 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
12643 if (in_p)
12644 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
12645 : CODE_FOR_reload_di_load);
12646 else
12647 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
12648 : CODE_FOR_reload_di_store);
12649 sri->extra_cost = 2;
12650 ret = NO_REGS;
12651 done_p = true;
12652 }
12653 else
12654 default_p = true;
12655 }
12656 else
12657 default_p = true;
12658 }
12659
12660 if (!done_p && !TARGET_POWERPC64
12661 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
12662 && memory_p
12663 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
12664 {
12665 rtx addr = XEXP (x, 0);
12666 rtx off = address_offset (addr);
12667
12668 if (off != NULL_RTX)
12669 {
12670 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
12671 unsigned HOST_WIDE_INT offset = INTVAL (off);
12672
12673 /* We need a secondary reload when our legitimate_address_p
12674 says the address is good (as otherwise the entire address
12675 will be reloaded), and we have a wrap.
12676
12677 legitimate_lo_sum_address_p allows LO_SUM addresses to
12678 have any offset so test for wrap in the low 16 bits.
12679
12680 legitimate_offset_address_p checks for the range
12681 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
12682 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
12683 [0x7ff4,0x7fff] respectively, so test for the
12684 intersection of these ranges, [0x7ffc,0x7fff] and
12685 [0x7ff4,0x7ff7] respectively.
12686
12687 Note that the address we see here may have been
12688 manipulated by legitimize_reload_address. */
12689 if (GET_CODE (addr) == LO_SUM
12690 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
12691 : offset - (0x8000 - extra) < UNITS_PER_WORD)
12692 {
12693 if (in_p)
12694 sri->icode = CODE_FOR_reload_si_load;
12695 else
12696 sri->icode = CODE_FOR_reload_si_store;
12697 sri->extra_cost = 2;
12698 ret = NO_REGS;
12699 done_p = true;
12700 }
12701 else
12702 default_p = true;
12703 }
12704 else
12705 default_p = true;
12706 }
12707
12708 if (!done_p)
12709 default_p = true;
12710
12711 if (default_p)
12712 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
12713
12714 gcc_assert (ret != ALL_REGS);
12715
12716 if (TARGET_DEBUG_ADDR)
12717 {
12718 fprintf (stderr,
12719 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
12720 "mode = %s",
12721 reg_class_names[ret],
12722 in_p ? "true" : "false",
12723 reg_class_names[rclass],
12724 GET_MODE_NAME (mode));
12725
12726 if (reload_completed)
12727 fputs (", after reload", stderr);
12728
12729 if (!done_p)
12730 fputs (", done_p not set", stderr);
12731
12732 if (default_p)
12733 fputs (", default secondary reload", stderr);
12734
12735 if (sri->icode != CODE_FOR_nothing)
12736 fprintf (stderr, ", reload func = %s, extra cost = %d",
12737 insn_data[sri->icode].name, sri->extra_cost);
12738
12739 else if (sri->extra_cost > 0)
12740 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
12741
12742 fputs ("\n", stderr);
12743 debug_rtx (x);
12744 }
12745
12746 return ret;
12747 }
12748
12749 /* Better tracing for rs6000_secondary_reload_inner. */
12750
12751 static void
12752 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
12753 bool store_p)
12754 {
12755 rtx set, clobber;
12756
12757 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
12758
12759 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
12760 store_p ? "store" : "load");
12761
12762 if (store_p)
12763 set = gen_rtx_SET (mem, reg);
12764 else
12765 set = gen_rtx_SET (reg, mem);
12766
12767 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
12768 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
12769 }
12770
12771 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
12772 ATTRIBUTE_NORETURN;
12773
12774 static void
12775 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
12776 bool store_p)
12777 {
12778 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
12779 gcc_unreachable ();
12780 }
12781
12782 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
12783 reload helper functions. These were identified in
12784 rs6000_secondary_reload_memory, and if reload decided to use the secondary
12785 reload, it calls the insns:
12786 reload_<RELOAD:mode>_<P:mptrsize>_store
12787 reload_<RELOAD:mode>_<P:mptrsize>_load
12788
12789 which in turn calls this function, to do whatever is necessary to create
12790 valid addresses. */
12791
12792 void
12793 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
12794 {
12795 int regno = true_regnum (reg);
12796 machine_mode mode = GET_MODE (reg);
12797 addr_mask_type addr_mask;
12798 rtx addr;
12799 rtx new_addr;
12800 rtx op_reg, op0, op1;
12801 rtx and_op;
12802 rtx cc_clobber;
12803 rtvec rv;
12804
12805 if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem)
12806 || !base_reg_operand (scratch, GET_MODE (scratch)))
12807 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12808
12809 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
12810 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
12811
12812 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
12813 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
12814
12815 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
12816 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
12817
12818 else
12819 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12820
12821 /* Make sure the mode is valid in this register class. */
12822 if ((addr_mask & RELOAD_REG_VALID) == 0)
12823 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12824
12825 if (TARGET_DEBUG_ADDR)
12826 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
12827
12828 new_addr = addr = XEXP (mem, 0);
12829 switch (GET_CODE (addr))
12830 {
12831 /* Does the register class support auto update forms for this mode? If
12832 not, do the update now. We don't need a scratch register, since the
12833 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
12834 case PRE_INC:
12835 case PRE_DEC:
12836 op_reg = XEXP (addr, 0);
12837 if (!base_reg_operand (op_reg, Pmode))
12838 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12839
12840 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
12841 {
12842 int delta = GET_MODE_SIZE (mode);
12843 if (GET_CODE (addr) == PRE_DEC)
12844 delta = -delta;
12845 emit_insn (gen_add2_insn (op_reg, GEN_INT (delta)));
12846 new_addr = op_reg;
12847 }
12848 break;
12849
12850 case PRE_MODIFY:
12851 op0 = XEXP (addr, 0);
12852 op1 = XEXP (addr, 1);
12853 if (!base_reg_operand (op0, Pmode)
12854 || GET_CODE (op1) != PLUS
12855 || !rtx_equal_p (op0, XEXP (op1, 0)))
12856 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12857
12858 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
12859 {
12860 emit_insn (gen_rtx_SET (op0, op1));
12861 new_addr = reg;
12862 }
12863 break;
12864
12865 /* Do we need to simulate AND -16 to clear the bottom address bits used
12866 in VMX load/stores? */
12867 case AND:
12868 op0 = XEXP (addr, 0);
12869 op1 = XEXP (addr, 1);
12870 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
12871 {
12872 if (REG_P (op0) || SUBREG_P (op0))
12873 op_reg = op0;
12874
12875 else if (GET_CODE (op1) == PLUS)
12876 {
12877 emit_insn (gen_rtx_SET (scratch, op1));
12878 op_reg = scratch;
12879 }
12880
12881 else
12882 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12883
12884 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
12885 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
12886 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
12887 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
12888 new_addr = scratch;
12889 }
12890 break;
12891
12892 /* If this is an indirect address, make sure it is a base register. */
12893 case REG:
12894 case SUBREG:
12895 if (!base_reg_operand (addr, GET_MODE (addr)))
12896 {
12897 emit_insn (gen_rtx_SET (scratch, addr));
12898 new_addr = scratch;
12899 }
12900 break;
12901
12902 /* If this is an indexed address, make sure the register class can handle
12903 indexed addresses for this mode. */
12904 case PLUS:
12905 op0 = XEXP (addr, 0);
12906 op1 = XEXP (addr, 1);
12907 if (!base_reg_operand (op0, Pmode))
12908 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12909
12910 else if (int_reg_operand (op1, Pmode))
12911 {
12912 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12913 {
12914 emit_insn (gen_rtx_SET (scratch, addr));
12915 new_addr = scratch;
12916 }
12917 }
12918
12919 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
12920 {
12921 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
12922 || !quad_address_p (addr, mode, false))
12923 {
12924 emit_insn (gen_rtx_SET (scratch, addr));
12925 new_addr = scratch;
12926 }
12927 }
12928
12929 /* Make sure the register class can handle offset addresses. */
12930 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
12931 {
12932 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12933 {
12934 emit_insn (gen_rtx_SET (scratch, addr));
12935 new_addr = scratch;
12936 }
12937 }
12938
12939 else
12940 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12941
12942 break;
12943
12944 case LO_SUM:
12945 op0 = XEXP (addr, 0);
12946 op1 = XEXP (addr, 1);
12947 if (!base_reg_operand (op0, Pmode))
12948 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12949
12950 else if (int_reg_operand (op1, Pmode))
12951 {
12952 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12953 {
12954 emit_insn (gen_rtx_SET (scratch, addr));
12955 new_addr = scratch;
12956 }
12957 }
12958
12959 /* Quad offsets are restricted and can't handle normal addresses. */
12960 else if (mode_supports_dq_form (mode))
12961 {
12962 emit_insn (gen_rtx_SET (scratch, addr));
12963 new_addr = scratch;
12964 }
12965
12966 /* Make sure the register class can handle offset addresses. */
12967 else if (legitimate_lo_sum_address_p (mode, addr, false))
12968 {
12969 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12970 {
12971 emit_insn (gen_rtx_SET (scratch, addr));
12972 new_addr = scratch;
12973 }
12974 }
12975
12976 else
12977 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12978
12979 break;
12980
12981 case SYMBOL_REF:
12982 case CONST:
12983 case LABEL_REF:
12984 rs6000_emit_move (scratch, addr, Pmode);
12985 new_addr = scratch;
12986 break;
12987
12988 default:
12989 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12990 }
12991
12992 /* Adjust the address if it changed. */
12993 if (addr != new_addr)
12994 {
12995 mem = replace_equiv_address_nv (mem, new_addr);
12996 if (TARGET_DEBUG_ADDR)
12997 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
12998 }
12999
13000 /* Now create the move. */
13001 if (store_p)
13002 emit_insn (gen_rtx_SET (mem, reg));
13003 else
13004 emit_insn (gen_rtx_SET (reg, mem));
13005
13006 return;
13007 }
13008
13009 /* Convert reloads involving 64-bit gprs and misaligned offset
13010 addressing, or multiple 32-bit gprs and offsets that are too large,
13011 to use indirect addressing. */
13012
13013 void
13014 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
13015 {
13016 int regno = true_regnum (reg);
13017 enum reg_class rclass;
13018 rtx addr;
13019 rtx scratch_or_premodify = scratch;
13020
13021 if (TARGET_DEBUG_ADDR)
13022 {
13023 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
13024 store_p ? "store" : "load");
13025 fprintf (stderr, "reg:\n");
13026 debug_rtx (reg);
13027 fprintf (stderr, "mem:\n");
13028 debug_rtx (mem);
13029 fprintf (stderr, "scratch:\n");
13030 debug_rtx (scratch);
13031 }
13032
13033 gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno));
13034 gcc_assert (MEM_P (mem));
13035 rclass = REGNO_REG_CLASS (regno);
13036 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
13037 addr = XEXP (mem, 0);
13038
13039 if (GET_CODE (addr) == PRE_MODIFY)
13040 {
13041 gcc_assert (REG_P (XEXP (addr, 0))
13042 && GET_CODE (XEXP (addr, 1)) == PLUS
13043 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
13044 scratch_or_premodify = XEXP (addr, 0);
13045 addr = XEXP (addr, 1);
13046 }
13047 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
13048
13049 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
13050
13051 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
13052
13053 /* Now create the move. */
13054 if (store_p)
13055 emit_insn (gen_rtx_SET (mem, reg));
13056 else
13057 emit_insn (gen_rtx_SET (reg, mem));
13058
13059 return;
13060 }
13061
13062 /* Given an rtx X being reloaded into a reg required to be
13063 in class CLASS, return the class of reg to actually use.
13064 In general this is just CLASS; but on some machines
13065 in some cases it is preferable to use a more restrictive class.
13066
13067 On the RS/6000, we have to return NO_REGS when we want to reload a
13068 floating-point CONST_DOUBLE to force it to be copied to memory.
13069
13070 We also don't want to reload integer values into floating-point
13071 registers if we can at all help it. In fact, this can
13072 cause reload to die, if it tries to generate a reload of CTR
13073 into a FP register and discovers it doesn't have the memory location
13074 required.
13075
13076 ??? Would it be a good idea to have reload do the converse, that is
13077 try to reload floating modes into FP registers if possible?
13078 */
13079
13080 static enum reg_class
13081 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
13082 {
13083 machine_mode mode = GET_MODE (x);
13084 bool is_constant = CONSTANT_P (x);
13085
13086 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
13087 reload class for it. */
13088 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
13089 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
13090 return NO_REGS;
13091
13092 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
13093 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
13094 return NO_REGS;
13095
13096 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
13097 the reloading of address expressions using PLUS into floating point
13098 registers. */
13099 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
13100 {
13101 if (is_constant)
13102 {
13103 /* Zero is always allowed in all VSX registers. */
13104 if (x == CONST0_RTX (mode))
13105 return rclass;
13106
13107 /* If this is a vector constant that can be formed with a few Altivec
13108 instructions, we want altivec registers. */
13109 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
13110 return ALTIVEC_REGS;
13111
13112 /* If this is an integer constant that can easily be loaded into
13113 vector registers, allow it. */
13114 if (CONST_INT_P (x))
13115 {
13116 HOST_WIDE_INT value = INTVAL (x);
13117
13118 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
13119 2.06 can generate it in the Altivec registers with
13120 VSPLTI<x>. */
13121 if (value == -1)
13122 {
13123 if (TARGET_P8_VECTOR)
13124 return rclass;
13125 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
13126 return ALTIVEC_REGS;
13127 else
13128 return NO_REGS;
13129 }
13130
13131 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
13132 a sign extend in the Altivec registers. */
13133 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
13134 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
13135 return ALTIVEC_REGS;
13136 }
13137
13138 /* Force constant to memory. */
13139 return NO_REGS;
13140 }
13141
13142 /* D-form addressing can easily reload the value. */
13143 if (mode_supports_vmx_dform (mode)
13144 || mode_supports_dq_form (mode))
13145 return rclass;
13146
13147 /* If this is a scalar floating point value and we don't have D-form
13148 addressing, prefer the traditional floating point registers so that we
13149 can use D-form (register+offset) addressing. */
13150 if (rclass == VSX_REGS
13151 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
13152 return FLOAT_REGS;
13153
13154 /* Prefer the Altivec registers if Altivec is handling the vector
13155 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
13156 loads. */
13157 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
13158 || mode == V1TImode)
13159 return ALTIVEC_REGS;
13160
13161 return rclass;
13162 }
13163
13164 if (is_constant || GET_CODE (x) == PLUS)
13165 {
13166 if (reg_class_subset_p (GENERAL_REGS, rclass))
13167 return GENERAL_REGS;
13168 if (reg_class_subset_p (BASE_REGS, rclass))
13169 return BASE_REGS;
13170 return NO_REGS;
13171 }
13172
13173 /* For the vector pair and vector quad modes, prefer their natural register
13174 (VSX or FPR) rather than GPR registers. For other integer types, prefer
13175 the GPR registers. */
13176 if (rclass == GEN_OR_FLOAT_REGS)
13177 {
13178 if (mode == OOmode)
13179 return VSX_REGS;
13180
13181 if (mode == XOmode)
13182 return FLOAT_REGS;
13183
13184 if (GET_MODE_CLASS (mode) == MODE_INT)
13185 return GENERAL_REGS;
13186 }
13187
13188 return rclass;
13189 }
13190
13191 /* Debug version of rs6000_preferred_reload_class. */
13192 static enum reg_class
13193 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
13194 {
13195 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
13196
13197 fprintf (stderr,
13198 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
13199 "mode = %s, x:\n",
13200 reg_class_names[ret], reg_class_names[rclass],
13201 GET_MODE_NAME (GET_MODE (x)));
13202 debug_rtx (x);
13203
13204 return ret;
13205 }
13206
13207 /* If we are copying between FP or AltiVec registers and anything else, we need
13208 a memory location. The exception is when we are targeting ppc64 and the
13209 move to/from fpr to gpr instructions are available. Also, under VSX, you
13210 can copy vector registers from the FP register set to the Altivec register
13211 set and vice versa. */
13212
13213 static bool
13214 rs6000_secondary_memory_needed (machine_mode mode,
13215 reg_class_t from_class,
13216 reg_class_t to_class)
13217 {
13218 enum rs6000_reg_type from_type, to_type;
13219 bool altivec_p = ((from_class == ALTIVEC_REGS)
13220 || (to_class == ALTIVEC_REGS));
13221
13222 /* If a simple/direct move is available, we don't need secondary memory */
13223 from_type = reg_class_to_reg_type[(int)from_class];
13224 to_type = reg_class_to_reg_type[(int)to_class];
13225
13226 if (rs6000_secondary_reload_move (to_type, from_type, mode,
13227 (secondary_reload_info *)0, altivec_p))
13228 return false;
13229
13230 /* If we have a floating point or vector register class, we need to use
13231 memory to transfer the data. */
13232 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
13233 return true;
13234
13235 return false;
13236 }
13237
13238 /* Debug version of rs6000_secondary_memory_needed. */
13239 static bool
13240 rs6000_debug_secondary_memory_needed (machine_mode mode,
13241 reg_class_t from_class,
13242 reg_class_t to_class)
13243 {
13244 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
13245
13246 fprintf (stderr,
13247 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
13248 "to_class = %s, mode = %s\n",
13249 ret ? "true" : "false",
13250 reg_class_names[from_class],
13251 reg_class_names[to_class],
13252 GET_MODE_NAME (mode));
13253
13254 return ret;
13255 }
13256
13257 /* Return the register class of a scratch register needed to copy IN into
13258 or out of a register in RCLASS in MODE. If it can be done directly,
13259 NO_REGS is returned. */
13260
13261 static enum reg_class
13262 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
13263 rtx in)
13264 {
13265 int regno;
13266
13267 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
13268 #if TARGET_MACHO
13269 && MACHOPIC_INDIRECT
13270 #endif
13271 ))
13272 {
13273 /* We cannot copy a symbolic operand directly into anything
13274 other than BASE_REGS for TARGET_ELF. So indicate that a
13275 register from BASE_REGS is needed as an intermediate
13276 register.
13277
13278 On Darwin, pic addresses require a load from memory, which
13279 needs a base register. */
13280 if (rclass != BASE_REGS
13281 && (SYMBOL_REF_P (in)
13282 || GET_CODE (in) == HIGH
13283 || GET_CODE (in) == LABEL_REF
13284 || GET_CODE (in) == CONST))
13285 return BASE_REGS;
13286 }
13287
13288 if (REG_P (in))
13289 {
13290 regno = REGNO (in);
13291 if (!HARD_REGISTER_NUM_P (regno))
13292 {
13293 regno = true_regnum (in);
13294 if (!HARD_REGISTER_NUM_P (regno))
13295 regno = -1;
13296 }
13297 }
13298 else if (SUBREG_P (in))
13299 {
13300 regno = true_regnum (in);
13301 if (!HARD_REGISTER_NUM_P (regno))
13302 regno = -1;
13303 }
13304 else
13305 regno = -1;
13306
13307 /* If we have VSX register moves, prefer moving scalar values between
13308 Altivec registers and GPR by going via an FPR (and then via memory)
13309 instead of reloading the secondary memory address for Altivec moves. */
13310 if (TARGET_VSX
13311 && GET_MODE_SIZE (mode) < 16
13312 && !mode_supports_vmx_dform (mode)
13313 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
13314 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
13315 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
13316 && (regno >= 0 && INT_REGNO_P (regno)))))
13317 return FLOAT_REGS;
13318
13319 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
13320 into anything. */
13321 if (rclass == GENERAL_REGS || rclass == BASE_REGS
13322 || (regno >= 0 && INT_REGNO_P (regno)))
13323 return NO_REGS;
13324
13325 /* Constants, memory, and VSX registers can go into VSX registers (both the
13326 traditional floating point and the altivec registers). */
13327 if (rclass == VSX_REGS
13328 && (regno == -1 || VSX_REGNO_P (regno)))
13329 return NO_REGS;
13330
13331 /* Constants, memory, and FP registers can go into FP registers. */
13332 if ((regno == -1 || FP_REGNO_P (regno))
13333 && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS))
13334 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
13335
13336 /* Memory, and AltiVec registers can go into AltiVec registers. */
13337 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
13338 && rclass == ALTIVEC_REGS)
13339 return NO_REGS;
13340
13341 /* We can copy among the CR registers. */
13342 if ((rclass == CR_REGS || rclass == CR0_REGS)
13343 && regno >= 0 && CR_REGNO_P (regno))
13344 return NO_REGS;
13345
13346 /* Otherwise, we need GENERAL_REGS. */
13347 return GENERAL_REGS;
13348 }
13349
13350 /* Debug version of rs6000_secondary_reload_class. */
13351 static enum reg_class
13352 rs6000_debug_secondary_reload_class (enum reg_class rclass,
13353 machine_mode mode, rtx in)
13354 {
13355 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
13356 fprintf (stderr,
13357 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
13358 "mode = %s, input rtx:\n",
13359 reg_class_names[ret], reg_class_names[rclass],
13360 GET_MODE_NAME (mode));
13361 debug_rtx (in);
13362
13363 return ret;
13364 }
13365
13366 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
13367
13368 static bool
13369 rs6000_can_change_mode_class (machine_mode from,
13370 machine_mode to,
13371 reg_class_t rclass)
13372 {
13373 unsigned from_size = GET_MODE_SIZE (from);
13374 unsigned to_size = GET_MODE_SIZE (to);
13375
13376 if (from_size != to_size)
13377 {
13378 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
13379
13380 if (reg_classes_intersect_p (xclass, rclass))
13381 {
13382 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
13383 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
13384 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
13385 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
13386
13387 /* Don't allow 64-bit types to overlap with 128-bit types that take a
13388 single register under VSX because the scalar part of the register
13389 is in the upper 64-bits, and not the lower 64-bits. Types like
13390 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
13391 IEEE floating point can't overlap, and neither can small
13392 values. */
13393
13394 if (to_float128_vector_p && from_float128_vector_p)
13395 return true;
13396
13397 else if (to_float128_vector_p || from_float128_vector_p)
13398 return false;
13399
13400 /* TDmode in floating-mode registers must always go into a register
13401 pair with the most significant word in the even-numbered register
13402 to match ISA requirements. In little-endian mode, this does not
13403 match subreg numbering, so we cannot allow subregs. */
13404 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
13405 return false;
13406
13407 /* Allow SD<->DD changes, since SDmode values are stored in
13408 the low half of the DDmode, just like target-independent
13409 code expects. We need to allow at least SD->DD since
13410 rs6000_secondary_memory_needed_mode asks for that change
13411 to be made for SD reloads. */
13412 if ((to == DDmode && from == SDmode)
13413 || (to == SDmode && from == DDmode))
13414 return true;
13415
13416 if (from_size < 8 || to_size < 8)
13417 return false;
13418
13419 if (from_size == 8 && (8 * to_nregs) != to_size)
13420 return false;
13421
13422 if (to_size == 8 && (8 * from_nregs) != from_size)
13423 return false;
13424
13425 return true;
13426 }
13427 else
13428 return true;
13429 }
13430
13431 /* Since the VSX register set includes traditional floating point registers
13432 and altivec registers, just check for the size being different instead of
13433 trying to check whether the modes are vector modes. Otherwise it won't
13434 allow say DF and DI to change classes. For types like TFmode and TDmode
13435 that take 2 64-bit registers, rather than a single 128-bit register, don't
13436 allow subregs of those types to other 128 bit types. */
13437 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
13438 {
13439 unsigned num_regs = (from_size + 15) / 16;
13440 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
13441 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
13442 return false;
13443
13444 return (from_size == 8 || from_size == 16);
13445 }
13446
13447 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
13448 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
13449 return false;
13450
13451 return true;
13452 }
13453
13454 /* Debug version of rs6000_can_change_mode_class. */
13455 static bool
13456 rs6000_debug_can_change_mode_class (machine_mode from,
13457 machine_mode to,
13458 reg_class_t rclass)
13459 {
13460 bool ret = rs6000_can_change_mode_class (from, to, rclass);
13461
13462 fprintf (stderr,
13463 "rs6000_can_change_mode_class, return %s, from = %s, "
13464 "to = %s, rclass = %s\n",
13465 ret ? "true" : "false",
13466 GET_MODE_NAME (from), GET_MODE_NAME (to),
13467 reg_class_names[rclass]);
13468
13469 return ret;
13470 }
13471 \f
13472 /* Return a string to do a move operation of 128 bits of data. */
13473
13474 const char *
13475 rs6000_output_move_128bit (rtx operands[])
13476 {
13477 rtx dest = operands[0];
13478 rtx src = operands[1];
13479 machine_mode mode = GET_MODE (dest);
13480 int dest_regno;
13481 int src_regno;
13482 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
13483 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
13484
13485 if (REG_P (dest))
13486 {
13487 dest_regno = REGNO (dest);
13488 dest_gpr_p = INT_REGNO_P (dest_regno);
13489 dest_fp_p = FP_REGNO_P (dest_regno);
13490 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
13491 dest_vsx_p = dest_fp_p | dest_vmx_p;
13492 }
13493 else
13494 {
13495 dest_regno = -1;
13496 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
13497 }
13498
13499 if (REG_P (src))
13500 {
13501 src_regno = REGNO (src);
13502 src_gpr_p = INT_REGNO_P (src_regno);
13503 src_fp_p = FP_REGNO_P (src_regno);
13504 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
13505 src_vsx_p = src_fp_p | src_vmx_p;
13506 }
13507 else
13508 {
13509 src_regno = -1;
13510 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
13511 }
13512
13513 /* Register moves. */
13514 if (dest_regno >= 0 && src_regno >= 0)
13515 {
13516 if (dest_gpr_p)
13517 {
13518 if (src_gpr_p)
13519 return "#";
13520
13521 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
13522 return (WORDS_BIG_ENDIAN
13523 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
13524 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
13525
13526 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
13527 return "#";
13528 }
13529
13530 else if (TARGET_VSX && dest_vsx_p)
13531 {
13532 if (src_vsx_p)
13533 return "xxlor %x0,%x1,%x1";
13534
13535 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
13536 return (WORDS_BIG_ENDIAN
13537 ? "mtvsrdd %x0,%1,%L1"
13538 : "mtvsrdd %x0,%L1,%1");
13539
13540 else if (TARGET_DIRECT_MOVE && src_gpr_p)
13541 return "#";
13542 }
13543
13544 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
13545 return "vor %0,%1,%1";
13546
13547 else if (dest_fp_p && src_fp_p)
13548 return "#";
13549 }
13550
13551 /* Loads. */
13552 else if (dest_regno >= 0 && MEM_P (src))
13553 {
13554 if (dest_gpr_p)
13555 {
13556 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
13557 return "lq %0,%1";
13558 else
13559 return "#";
13560 }
13561
13562 else if (TARGET_ALTIVEC && dest_vmx_p
13563 && altivec_indexed_or_indirect_operand (src, mode))
13564 return "lvx %0,%y1";
13565
13566 else if (TARGET_VSX && dest_vsx_p)
13567 {
13568 if (mode_supports_dq_form (mode)
13569 && quad_address_p (XEXP (src, 0), mode, true))
13570 return "lxv %x0,%1";
13571
13572 else if (TARGET_P9_VECTOR)
13573 return "lxvx %x0,%y1";
13574
13575 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
13576 return "lxvw4x %x0,%y1";
13577
13578 else
13579 return "lxvd2x %x0,%y1";
13580 }
13581
13582 else if (TARGET_ALTIVEC && dest_vmx_p)
13583 return "lvx %0,%y1";
13584
13585 else if (dest_fp_p)
13586 return "#";
13587 }
13588
13589 /* Stores. */
13590 else if (src_regno >= 0 && MEM_P (dest))
13591 {
13592 if (src_gpr_p)
13593 {
13594 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
13595 return "stq %1,%0";
13596 else
13597 return "#";
13598 }
13599
13600 else if (TARGET_ALTIVEC && src_vmx_p
13601 && altivec_indexed_or_indirect_operand (dest, mode))
13602 return "stvx %1,%y0";
13603
13604 else if (TARGET_VSX && src_vsx_p)
13605 {
13606 if (mode_supports_dq_form (mode)
13607 && quad_address_p (XEXP (dest, 0), mode, true))
13608 return "stxv %x1,%0";
13609
13610 else if (TARGET_P9_VECTOR)
13611 return "stxvx %x1,%y0";
13612
13613 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
13614 return "stxvw4x %x1,%y0";
13615
13616 else
13617 return "stxvd2x %x1,%y0";
13618 }
13619
13620 else if (TARGET_ALTIVEC && src_vmx_p)
13621 return "stvx %1,%y0";
13622
13623 else if (src_fp_p)
13624 return "#";
13625 }
13626
13627 /* Constants. */
13628 else if (dest_regno >= 0
13629 && (CONST_INT_P (src)
13630 || CONST_WIDE_INT_P (src)
13631 || CONST_DOUBLE_P (src)
13632 || GET_CODE (src) == CONST_VECTOR))
13633 {
13634 if (dest_gpr_p)
13635 return "#";
13636
13637 else if ((dest_vmx_p && TARGET_ALTIVEC)
13638 || (dest_vsx_p && TARGET_VSX))
13639 return output_vec_const_move (operands);
13640 }
13641
13642 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
13643 }
13644
13645 /* Validate a 128-bit move. */
13646 bool
13647 rs6000_move_128bit_ok_p (rtx operands[])
13648 {
13649 machine_mode mode = GET_MODE (operands[0]);
13650 return (gpc_reg_operand (operands[0], mode)
13651 || gpc_reg_operand (operands[1], mode));
13652 }
13653
13654 /* Return true if a 128-bit move needs to be split. */
13655 bool
13656 rs6000_split_128bit_ok_p (rtx operands[])
13657 {
13658 if (!reload_completed)
13659 return false;
13660
13661 if (!gpr_or_gpr_p (operands[0], operands[1]))
13662 return false;
13663
13664 if (quad_load_store_p (operands[0], operands[1]))
13665 return false;
13666
13667 return true;
13668 }
13669
13670 \f
13671 /* Given a comparison operation, return the bit number in CCR to test. We
13672 know this is a valid comparison.
13673
13674 SCC_P is 1 if this is for an scc. That means that %D will have been
13675 used instead of %C, so the bits will be in different places.
13676
13677 Return -1 if OP isn't a valid comparison for some reason. */
13678
13679 int
13680 ccr_bit (rtx op, int scc_p)
13681 {
13682 enum rtx_code code = GET_CODE (op);
13683 machine_mode cc_mode;
13684 int cc_regnum;
13685 int base_bit;
13686 rtx reg;
13687
13688 if (!COMPARISON_P (op))
13689 return -1;
13690
13691 reg = XEXP (op, 0);
13692
13693 if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg)))
13694 return -1;
13695
13696 cc_mode = GET_MODE (reg);
13697 cc_regnum = REGNO (reg);
13698 base_bit = 4 * (cc_regnum - CR0_REGNO);
13699
13700 validate_condition_mode (code, cc_mode);
13701
13702 /* When generating a sCOND operation, only positive conditions are
13703 allowed. */
13704 if (scc_p)
13705 switch (code)
13706 {
13707 case EQ:
13708 case GT:
13709 case LT:
13710 case UNORDERED:
13711 case GTU:
13712 case LTU:
13713 break;
13714 default:
13715 return -1;
13716 }
13717
13718 switch (code)
13719 {
13720 case NE:
13721 return scc_p ? base_bit + 3 : base_bit + 2;
13722 case EQ:
13723 return base_bit + 2;
13724 case GT: case GTU: case UNLE:
13725 return base_bit + 1;
13726 case LT: case LTU: case UNGE:
13727 return base_bit;
13728 case ORDERED: case UNORDERED:
13729 return base_bit + 3;
13730
13731 case GE: case GEU:
13732 /* If scc, we will have done a cror to put the bit in the
13733 unordered position. So test that bit. For integer, this is ! LT
13734 unless this is an scc insn. */
13735 return scc_p ? base_bit + 3 : base_bit;
13736
13737 case LE: case LEU:
13738 return scc_p ? base_bit + 3 : base_bit + 1;
13739
13740 default:
13741 return -1;
13742 }
13743 }
13744 \f
13745 /* Return the GOT register. */
13746
13747 rtx
13748 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
13749 {
13750 /* The second flow pass currently (June 1999) can't update
13751 regs_ever_live without disturbing other parts of the compiler, so
13752 update it here to make the prolog/epilogue code happy. */
13753 if (!can_create_pseudo_p ()
13754 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
13755 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
13756
13757 crtl->uses_pic_offset_table = 1;
13758
13759 return pic_offset_table_rtx;
13760 }
13761 \f
13762 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
13763
13764 /* Write out a function code label. */
13765
13766 void
13767 rs6000_output_function_entry (FILE *file, const char *fname)
13768 {
13769 if (fname[0] != '.')
13770 {
13771 switch (DEFAULT_ABI)
13772 {
13773 default:
13774 gcc_unreachable ();
13775
13776 case ABI_AIX:
13777 if (DOT_SYMBOLS)
13778 putc ('.', file);
13779 else
13780 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
13781 break;
13782
13783 case ABI_ELFv2:
13784 case ABI_V4:
13785 case ABI_DARWIN:
13786 break;
13787 }
13788 }
13789
13790 RS6000_OUTPUT_BASENAME (file, fname);
13791 }
13792
13793 /* Print an operand. Recognize special options, documented below. */
13794
13795 #if TARGET_ELF
13796 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
13797 only introduced by the linker, when applying the sda21
13798 relocation. */
13799 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
13800 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
13801 #else
13802 #define SMALL_DATA_RELOC "sda21"
13803 #define SMALL_DATA_REG 0
13804 #endif
13805
13806 void
13807 print_operand (FILE *file, rtx x, int code)
13808 {
13809 int i;
13810 unsigned HOST_WIDE_INT uval;
13811
13812 switch (code)
13813 {
13814 /* %a is output_address. */
13815
13816 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
13817 output_operand. */
13818
13819 case 'A':
13820 /* Write the MMA accumulator number associated with VSX register X. */
13821 if (!REG_P (x) || !FP_REGNO_P (REGNO (x)) || (REGNO (x) % 4) != 0)
13822 output_operand_lossage ("invalid %%A value");
13823 else
13824 fprintf (file, "%d", (REGNO (x) - FIRST_FPR_REGNO) / 4);
13825 return;
13826
13827 case 'D':
13828 /* Like 'J' but get to the GT bit only. */
13829 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13830 {
13831 output_operand_lossage ("invalid %%D value");
13832 return;
13833 }
13834
13835 /* Bit 1 is GT bit. */
13836 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
13837
13838 /* Add one for shift count in rlinm for scc. */
13839 fprintf (file, "%d", i + 1);
13840 return;
13841
13842 case 'e':
13843 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
13844 if (! INT_P (x))
13845 {
13846 output_operand_lossage ("invalid %%e value");
13847 return;
13848 }
13849
13850 uval = INTVAL (x);
13851 if ((uval & 0xffff) == 0 && uval != 0)
13852 putc ('s', file);
13853 return;
13854
13855 case 'E':
13856 /* X is a CR register. Print the number of the EQ bit of the CR */
13857 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13858 output_operand_lossage ("invalid %%E value");
13859 else
13860 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
13861 return;
13862
13863 case 'f':
13864 /* X is a CR register. Print the shift count needed to move it
13865 to the high-order four bits. */
13866 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13867 output_operand_lossage ("invalid %%f value");
13868 else
13869 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
13870 return;
13871
13872 case 'F':
13873 /* Similar, but print the count for the rotate in the opposite
13874 direction. */
13875 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13876 output_operand_lossage ("invalid %%F value");
13877 else
13878 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
13879 return;
13880
13881 case 'G':
13882 /* X is a constant integer. If it is negative, print "m",
13883 otherwise print "z". This is to make an aze or ame insn. */
13884 if (!CONST_INT_P (x))
13885 output_operand_lossage ("invalid %%G value");
13886 else if (INTVAL (x) >= 0)
13887 putc ('z', file);
13888 else
13889 putc ('m', file);
13890 return;
13891
13892 case 'h':
13893 /* If constant, output low-order five bits. Otherwise, write
13894 normally. */
13895 if (INT_P (x))
13896 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
13897 else
13898 print_operand (file, x, 0);
13899 return;
13900
13901 case 'H':
13902 /* If constant, output low-order six bits. Otherwise, write
13903 normally. */
13904 if (INT_P (x))
13905 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
13906 else
13907 print_operand (file, x, 0);
13908 return;
13909
13910 case 'I':
13911 /* Print `i' if this is a constant, else nothing. */
13912 if (INT_P (x))
13913 putc ('i', file);
13914 return;
13915
13916 case 'j':
13917 /* Write the bit number in CCR for jump. */
13918 i = ccr_bit (x, 0);
13919 if (i == -1)
13920 output_operand_lossage ("invalid %%j code");
13921 else
13922 fprintf (file, "%d", i);
13923 return;
13924
13925 case 'J':
13926 /* Similar, but add one for shift count in rlinm for scc and pass
13927 scc flag to `ccr_bit'. */
13928 i = ccr_bit (x, 1);
13929 if (i == -1)
13930 output_operand_lossage ("invalid %%J code");
13931 else
13932 /* If we want bit 31, write a shift count of zero, not 32. */
13933 fprintf (file, "%d", i == 31 ? 0 : i + 1);
13934 return;
13935
13936 case 'k':
13937 /* X must be a constant. Write the 1's complement of the
13938 constant. */
13939 if (! INT_P (x))
13940 output_operand_lossage ("invalid %%k value");
13941 else
13942 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
13943 return;
13944
13945 case 'K':
13946 /* X must be a symbolic constant on ELF. Write an
13947 expression suitable for an 'addi' that adds in the low 16
13948 bits of the MEM. */
13949 if (GET_CODE (x) == CONST)
13950 {
13951 if (GET_CODE (XEXP (x, 0)) != PLUS
13952 || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
13953 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
13954 || !CONST_INT_P (XEXP (XEXP (x, 0), 1)))
13955 output_operand_lossage ("invalid %%K value");
13956 }
13957 print_operand_address (file, x);
13958 fputs ("@l", file);
13959 return;
13960
13961 /* %l is output_asm_label. */
13962
13963 case 'L':
13964 /* Write second word of DImode or DFmode reference. Works on register
13965 or non-indexed memory only. */
13966 if (REG_P (x))
13967 fputs (reg_names[REGNO (x) + 1], file);
13968 else if (MEM_P (x))
13969 {
13970 machine_mode mode = GET_MODE (x);
13971 /* Handle possible auto-increment. Since it is pre-increment and
13972 we have already done it, we can just use an offset of word. */
13973 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13974 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13975 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
13976 UNITS_PER_WORD));
13977 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13978 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
13979 UNITS_PER_WORD));
13980 else
13981 output_address (mode, XEXP (adjust_address_nv (x, SImode,
13982 UNITS_PER_WORD),
13983 0));
13984
13985 if (small_data_operand (x, GET_MODE (x)))
13986 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13987 reg_names[SMALL_DATA_REG]);
13988 }
13989 return;
13990
13991 case 'N': /* Unused */
13992 /* Write the number of elements in the vector times 4. */
13993 if (GET_CODE (x) != PARALLEL)
13994 output_operand_lossage ("invalid %%N value");
13995 else
13996 fprintf (file, "%d", XVECLEN (x, 0) * 4);
13997 return;
13998
13999 case 'O': /* Unused */
14000 /* Similar, but subtract 1 first. */
14001 if (GET_CODE (x) != PARALLEL)
14002 output_operand_lossage ("invalid %%O value");
14003 else
14004 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
14005 return;
14006
14007 case 'p':
14008 /* X is a CONST_INT that is a power of two. Output the logarithm. */
14009 if (! INT_P (x)
14010 || INTVAL (x) < 0
14011 || (i = exact_log2 (INTVAL (x))) < 0)
14012 output_operand_lossage ("invalid %%p value");
14013 else
14014 fprintf (file, "%d", i);
14015 return;
14016
14017 case 'P':
14018 /* The operand must be an indirect memory reference. The result
14019 is the register name. */
14020 if (!MEM_P (x) || !REG_P (XEXP (x, 0))
14021 || REGNO (XEXP (x, 0)) >= 32)
14022 output_operand_lossage ("invalid %%P value");
14023 else
14024 fputs (reg_names[REGNO (XEXP (x, 0))], file);
14025 return;
14026
14027 case 'q':
14028 /* This outputs the logical code corresponding to a boolean
14029 expression. The expression may have one or both operands
14030 negated (if one, only the first one). For condition register
14031 logical operations, it will also treat the negated
14032 CR codes as NOTs, but not handle NOTs of them. */
14033 {
14034 const char *const *t = 0;
14035 const char *s;
14036 enum rtx_code code = GET_CODE (x);
14037 static const char * const tbl[3][3] = {
14038 { "and", "andc", "nor" },
14039 { "or", "orc", "nand" },
14040 { "xor", "eqv", "xor" } };
14041
14042 if (code == AND)
14043 t = tbl[0];
14044 else if (code == IOR)
14045 t = tbl[1];
14046 else if (code == XOR)
14047 t = tbl[2];
14048 else
14049 output_operand_lossage ("invalid %%q value");
14050
14051 if (GET_CODE (XEXP (x, 0)) != NOT)
14052 s = t[0];
14053 else
14054 {
14055 if (GET_CODE (XEXP (x, 1)) == NOT)
14056 s = t[2];
14057 else
14058 s = t[1];
14059 }
14060
14061 fputs (s, file);
14062 }
14063 return;
14064
14065 case 'Q':
14066 if (! TARGET_MFCRF)
14067 return;
14068 fputc (',', file);
14069 /* FALLTHRU */
14070
14071 case 'R':
14072 /* X is a CR register. Print the mask for `mtcrf'. */
14073 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
14074 output_operand_lossage ("invalid %%R value");
14075 else
14076 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
14077 return;
14078
14079 case 's':
14080 /* Low 5 bits of 32 - value */
14081 if (! INT_P (x))
14082 output_operand_lossage ("invalid %%s value");
14083 else
14084 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
14085 return;
14086
14087 case 't':
14088 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
14089 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
14090 {
14091 output_operand_lossage ("invalid %%t value");
14092 return;
14093 }
14094
14095 /* Bit 3 is OV bit. */
14096 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
14097
14098 /* If we want bit 31, write a shift count of zero, not 32. */
14099 fprintf (file, "%d", i == 31 ? 0 : i + 1);
14100 return;
14101
14102 case 'T':
14103 /* Print the symbolic name of a branch target register. */
14104 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14105 x = XVECEXP (x, 0, 0);
14106 if (!REG_P (x) || (REGNO (x) != LR_REGNO
14107 && REGNO (x) != CTR_REGNO))
14108 output_operand_lossage ("invalid %%T value");
14109 else if (REGNO (x) == LR_REGNO)
14110 fputs ("lr", file);
14111 else
14112 fputs ("ctr", file);
14113 return;
14114
14115 case 'u':
14116 /* High-order or low-order 16 bits of constant, whichever is non-zero,
14117 for use in unsigned operand. */
14118 if (! INT_P (x))
14119 {
14120 output_operand_lossage ("invalid %%u value");
14121 return;
14122 }
14123
14124 uval = INTVAL (x);
14125 if ((uval & 0xffff) == 0)
14126 uval >>= 16;
14127
14128 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
14129 return;
14130
14131 case 'v':
14132 /* High-order 16 bits of constant for use in signed operand. */
14133 if (! INT_P (x))
14134 output_operand_lossage ("invalid %%v value");
14135 else
14136 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
14137 (INTVAL (x) >> 16) & 0xffff);
14138 return;
14139
14140 case 'U':
14141 /* Print `u' if this has an auto-increment or auto-decrement. */
14142 if (MEM_P (x)
14143 && (GET_CODE (XEXP (x, 0)) == PRE_INC
14144 || GET_CODE (XEXP (x, 0)) == PRE_DEC
14145 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
14146 putc ('u', file);
14147 return;
14148
14149 case 'V':
14150 /* Print the trap code for this operand. */
14151 switch (GET_CODE (x))
14152 {
14153 case EQ:
14154 fputs ("eq", file); /* 4 */
14155 break;
14156 case NE:
14157 fputs ("ne", file); /* 24 */
14158 break;
14159 case LT:
14160 fputs ("lt", file); /* 16 */
14161 break;
14162 case LE:
14163 fputs ("le", file); /* 20 */
14164 break;
14165 case GT:
14166 fputs ("gt", file); /* 8 */
14167 break;
14168 case GE:
14169 fputs ("ge", file); /* 12 */
14170 break;
14171 case LTU:
14172 fputs ("llt", file); /* 2 */
14173 break;
14174 case LEU:
14175 fputs ("lle", file); /* 6 */
14176 break;
14177 case GTU:
14178 fputs ("lgt", file); /* 1 */
14179 break;
14180 case GEU:
14181 fputs ("lge", file); /* 5 */
14182 break;
14183 default:
14184 output_operand_lossage ("invalid %%V value");
14185 }
14186 break;
14187
14188 case 'w':
14189 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
14190 normally. */
14191 if (INT_P (x))
14192 fprintf (file, HOST_WIDE_INT_PRINT_DEC, sext_hwi (INTVAL (x), 16));
14193 else
14194 print_operand (file, x, 0);
14195 return;
14196
14197 case 'x':
14198 /* X is a FPR or Altivec register used in a VSX context. */
14199 if (!REG_P (x) || !VSX_REGNO_P (REGNO (x)))
14200 output_operand_lossage ("invalid %%x value");
14201 else
14202 {
14203 int reg = REGNO (x);
14204 int vsx_reg = (FP_REGNO_P (reg)
14205 ? reg - 32
14206 : reg - FIRST_ALTIVEC_REGNO + 32);
14207
14208 #ifdef TARGET_REGNAMES
14209 if (TARGET_REGNAMES)
14210 fprintf (file, "%%vs%d", vsx_reg);
14211 else
14212 #endif
14213 fprintf (file, "%d", vsx_reg);
14214 }
14215 return;
14216
14217 case 'X':
14218 if (MEM_P (x)
14219 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
14220 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
14221 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
14222 putc ('x', file);
14223 return;
14224
14225 case 'Y':
14226 /* Like 'L', for third word of TImode/PTImode */
14227 if (REG_P (x))
14228 fputs (reg_names[REGNO (x) + 2], file);
14229 else if (MEM_P (x))
14230 {
14231 machine_mode mode = GET_MODE (x);
14232 if (GET_CODE (XEXP (x, 0)) == PRE_INC
14233 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
14234 output_address (mode, plus_constant (Pmode,
14235 XEXP (XEXP (x, 0), 0), 8));
14236 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14237 output_address (mode, plus_constant (Pmode,
14238 XEXP (XEXP (x, 0), 0), 8));
14239 else
14240 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
14241 if (small_data_operand (x, GET_MODE (x)))
14242 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14243 reg_names[SMALL_DATA_REG]);
14244 }
14245 return;
14246
14247 case 'z':
14248 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14249 x = XVECEXP (x, 0, 1);
14250 /* X is a SYMBOL_REF. Write out the name preceded by a
14251 period and without any trailing data in brackets. Used for function
14252 names. If we are configured for System V (or the embedded ABI) on
14253 the PowerPC, do not emit the period, since those systems do not use
14254 TOCs and the like. */
14255 if (!SYMBOL_REF_P (x))
14256 {
14257 output_operand_lossage ("invalid %%z value");
14258 return;
14259 }
14260
14261 /* For macho, check to see if we need a stub. */
14262 if (TARGET_MACHO)
14263 {
14264 const char *name = XSTR (x, 0);
14265 #if TARGET_MACHO
14266 if (darwin_symbol_stubs
14267 && MACHOPIC_INDIRECT
14268 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14269 name = machopic_indirection_name (x, /*stub_p=*/true);
14270 #endif
14271 assemble_name (file, name);
14272 }
14273 else if (!DOT_SYMBOLS)
14274 assemble_name (file, XSTR (x, 0));
14275 else
14276 rs6000_output_function_entry (file, XSTR (x, 0));
14277 return;
14278
14279 case 'Z':
14280 /* Like 'L', for last word of TImode/PTImode. */
14281 if (REG_P (x))
14282 fputs (reg_names[REGNO (x) + 3], file);
14283 else if (MEM_P (x))
14284 {
14285 machine_mode mode = GET_MODE (x);
14286 if (GET_CODE (XEXP (x, 0)) == PRE_INC
14287 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
14288 output_address (mode, plus_constant (Pmode,
14289 XEXP (XEXP (x, 0), 0), 12));
14290 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14291 output_address (mode, plus_constant (Pmode,
14292 XEXP (XEXP (x, 0), 0), 12));
14293 else
14294 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
14295 if (small_data_operand (x, GET_MODE (x)))
14296 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14297 reg_names[SMALL_DATA_REG]);
14298 }
14299 return;
14300
14301 /* Print AltiVec memory operand. */
14302 case 'y':
14303 {
14304 rtx tmp;
14305
14306 gcc_assert (MEM_P (x));
14307
14308 tmp = XEXP (x, 0);
14309
14310 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
14311 && GET_CODE (tmp) == AND
14312 && CONST_INT_P (XEXP (tmp, 1))
14313 && INTVAL (XEXP (tmp, 1)) == -16)
14314 tmp = XEXP (tmp, 0);
14315 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
14316 && GET_CODE (tmp) == PRE_MODIFY)
14317 tmp = XEXP (tmp, 1);
14318 if (REG_P (tmp))
14319 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
14320 else
14321 {
14322 if (GET_CODE (tmp) != PLUS
14323 || !REG_P (XEXP (tmp, 0))
14324 || !REG_P (XEXP (tmp, 1)))
14325 {
14326 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
14327 break;
14328 }
14329
14330 if (REGNO (XEXP (tmp, 0)) == 0)
14331 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
14332 reg_names[ REGNO (XEXP (tmp, 0)) ]);
14333 else
14334 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
14335 reg_names[ REGNO (XEXP (tmp, 1)) ]);
14336 }
14337 break;
14338 }
14339
14340 case 0:
14341 if (REG_P (x))
14342 fprintf (file, "%s", reg_names[REGNO (x)]);
14343 else if (MEM_P (x))
14344 {
14345 /* We need to handle PRE_INC and PRE_DEC here, since we need to
14346 know the width from the mode. */
14347 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
14348 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
14349 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
14350 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
14351 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
14352 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
14353 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14354 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
14355 else
14356 output_address (GET_MODE (x), XEXP (x, 0));
14357 }
14358 else if (toc_relative_expr_p (x, false,
14359 &tocrel_base_oac, &tocrel_offset_oac))
14360 /* This hack along with a corresponding hack in
14361 rs6000_output_addr_const_extra arranges to output addends
14362 where the assembler expects to find them. eg.
14363 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
14364 without this hack would be output as "x@toc+4". We
14365 want "x+4@toc". */
14366 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
14367 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
14368 output_addr_const (file, XVECEXP (x, 0, 0));
14369 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14370 output_addr_const (file, XVECEXP (x, 0, 1));
14371 else
14372 output_addr_const (file, x);
14373 return;
14374
14375 case '&':
14376 if (const char *name = get_some_local_dynamic_name ())
14377 assemble_name (file, name);
14378 else
14379 output_operand_lossage ("'%%&' used without any "
14380 "local dynamic TLS references");
14381 return;
14382
14383 default:
14384 output_operand_lossage ("invalid %%xn code");
14385 }
14386 }
14387 \f
14388 /* Print the address of an operand. */
14389
14390 void
14391 print_operand_address (FILE *file, rtx x)
14392 {
14393 if (REG_P (x))
14394 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
14395
14396 /* Is it a PC-relative address? */
14397 else if (TARGET_PCREL && pcrel_local_or_external_address (x, VOIDmode))
14398 {
14399 HOST_WIDE_INT offset;
14400
14401 if (GET_CODE (x) == CONST)
14402 x = XEXP (x, 0);
14403
14404 if (GET_CODE (x) == PLUS)
14405 {
14406 offset = INTVAL (XEXP (x, 1));
14407 x = XEXP (x, 0);
14408 }
14409 else
14410 offset = 0;
14411
14412 output_addr_const (file, x);
14413
14414 if (offset)
14415 fprintf (file, "%+" PRId64, offset);
14416
14417 if (SYMBOL_REF_P (x) && !SYMBOL_REF_LOCAL_P (x))
14418 fprintf (file, "@got");
14419
14420 fprintf (file, "@pcrel");
14421 }
14422 else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
14423 || GET_CODE (x) == LABEL_REF)
14424 {
14425 output_addr_const (file, x);
14426 if (small_data_operand (x, GET_MODE (x)))
14427 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14428 reg_names[SMALL_DATA_REG]);
14429 else
14430 gcc_assert (!TARGET_TOC);
14431 }
14432 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
14433 && REG_P (XEXP (x, 1)))
14434 {
14435 if (REGNO (XEXP (x, 0)) == 0)
14436 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
14437 reg_names[ REGNO (XEXP (x, 0)) ]);
14438 else
14439 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
14440 reg_names[ REGNO (XEXP (x, 1)) ]);
14441 }
14442 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
14443 && CONST_INT_P (XEXP (x, 1)))
14444 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
14445 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
14446 #if TARGET_MACHO
14447 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
14448 && CONSTANT_P (XEXP (x, 1)))
14449 {
14450 fprintf (file, "lo16(");
14451 output_addr_const (file, XEXP (x, 1));
14452 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
14453 }
14454 #endif
14455 #if TARGET_ELF
14456 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
14457 && CONSTANT_P (XEXP (x, 1)))
14458 {
14459 output_addr_const (file, XEXP (x, 1));
14460 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
14461 }
14462 #endif
14463 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
14464 {
14465 /* This hack along with a corresponding hack in
14466 rs6000_output_addr_const_extra arranges to output addends
14467 where the assembler expects to find them. eg.
14468 (lo_sum (reg 9)
14469 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
14470 without this hack would be output as "x@toc+8@l(9)". We
14471 want "x+8@toc@l(9)". */
14472 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
14473 if (GET_CODE (x) == LO_SUM)
14474 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
14475 else
14476 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
14477 }
14478 else
14479 output_addr_const (file, x);
14480 }
14481 \f
14482 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14483
14484 bool
14485 rs6000_output_addr_const_extra (FILE *file, rtx x)
14486 {
14487 if (GET_CODE (x) == UNSPEC)
14488 switch (XINT (x, 1))
14489 {
14490 case UNSPEC_TOCREL:
14491 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0))
14492 && REG_P (XVECEXP (x, 0, 1))
14493 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
14494 output_addr_const (file, XVECEXP (x, 0, 0));
14495 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
14496 {
14497 if (INTVAL (tocrel_offset_oac) >= 0)
14498 fprintf (file, "+");
14499 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
14500 }
14501 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
14502 {
14503 putc ('-', file);
14504 assemble_name (file, toc_label_name);
14505 need_toc_init = 1;
14506 }
14507 else if (TARGET_ELF)
14508 fputs ("@toc", file);
14509 return true;
14510
14511 #if TARGET_MACHO
14512 case UNSPEC_MACHOPIC_OFFSET:
14513 output_addr_const (file, XVECEXP (x, 0, 0));
14514 putc ('-', file);
14515 machopic_output_function_base_name (file);
14516 return true;
14517 #endif
14518 }
14519 return false;
14520 }
14521 \f
14522 /* Target hook for assembling integer objects. The PowerPC version has
14523 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
14524 is defined. It also needs to handle DI-mode objects on 64-bit
14525 targets. */
14526
14527 static bool
14528 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
14529 {
14530 #ifdef RELOCATABLE_NEEDS_FIXUP
14531 /* Special handling for SI values. */
14532 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
14533 {
14534 static int recurse = 0;
14535
14536 /* For -mrelocatable, we mark all addresses that need to be fixed up in
14537 the .fixup section. Since the TOC section is already relocated, we
14538 don't need to mark it here. We used to skip the text section, but it
14539 should never be valid for relocated addresses to be placed in the text
14540 section. */
14541 if (DEFAULT_ABI == ABI_V4
14542 && (TARGET_RELOCATABLE || flag_pic > 1)
14543 && in_section != toc_section
14544 && !recurse
14545 && !CONST_SCALAR_INT_P (x)
14546 && CONSTANT_P (x))
14547 {
14548 char buf[256];
14549
14550 recurse = 1;
14551 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
14552 fixuplabelno++;
14553 ASM_OUTPUT_LABEL (asm_out_file, buf);
14554 fprintf (asm_out_file, "\t.long\t(");
14555 output_addr_const (asm_out_file, x);
14556 fprintf (asm_out_file, ")@fixup\n");
14557 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
14558 ASM_OUTPUT_ALIGN (asm_out_file, 2);
14559 fprintf (asm_out_file, "\t.long\t");
14560 assemble_name (asm_out_file, buf);
14561 fprintf (asm_out_file, "\n\t.previous\n");
14562 recurse = 0;
14563 return true;
14564 }
14565 /* Remove initial .'s to turn a -mcall-aixdesc function
14566 address into the address of the descriptor, not the function
14567 itself. */
14568 else if (SYMBOL_REF_P (x)
14569 && XSTR (x, 0)[0] == '.'
14570 && DEFAULT_ABI == ABI_AIX)
14571 {
14572 const char *name = XSTR (x, 0);
14573 while (*name == '.')
14574 name++;
14575
14576 fprintf (asm_out_file, "\t.long\t%s\n", name);
14577 return true;
14578 }
14579 }
14580 #endif /* RELOCATABLE_NEEDS_FIXUP */
14581 return default_assemble_integer (x, size, aligned_p);
14582 }
14583
14584 /* Return a template string for assembly to emit when making an
14585 external call. FUNOP is the call mem argument operand number. */
14586
14587 static const char *
14588 rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall)
14589 {
14590 /* -Wformat-overflow workaround, without which gcc thinks that %u
14591 might produce 10 digits. */
14592 gcc_assert (funop <= MAX_RECOG_OPERANDS);
14593
14594 char arg[12];
14595 arg[0] = 0;
14596 if (GET_CODE (operands[funop + 1]) == UNSPEC)
14597 {
14598 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
14599 sprintf (arg, "(%%%u@tlsgd)", funop + 1);
14600 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
14601 sprintf (arg, "(%%&@tlsld)");
14602 }
14603
14604 /* The magic 32768 offset here corresponds to the offset of
14605 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
14606 char z[11];
14607 sprintf (z, "%%z%u%s", funop,
14608 (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2
14609 ? "+32768" : ""));
14610
14611 static char str[32]; /* 1 spare */
14612 if (rs6000_pcrel_p ())
14613 sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg);
14614 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
14615 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
14616 sibcall ? "" : "\n\tnop");
14617 else if (DEFAULT_ABI == ABI_V4)
14618 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
14619 flag_pic ? "@plt" : "");
14620 #if TARGET_MACHO
14621 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
14622 else if (DEFAULT_ABI == ABI_DARWIN)
14623 {
14624 /* The cookie is in operand func+2. */
14625 gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT);
14626 int cookie = INTVAL (operands[funop + 2]);
14627 if (cookie & CALL_LONG)
14628 {
14629 tree funname = get_identifier (XSTR (operands[funop], 0));
14630 tree labelname = get_prev_label (funname);
14631 gcc_checking_assert (labelname && !sibcall);
14632
14633 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
14634 instruction will reach 'foo', otherwise link as 'bl L42'".
14635 "L42" should be a 'branch island', that will do a far jump to
14636 'foo'. Branch islands are generated in
14637 macho_branch_islands(). */
14638 sprintf (str, "jbsr %%z%u,%.10s", funop,
14639 IDENTIFIER_POINTER (labelname));
14640 }
14641 else
14642 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
14643 after the call. */
14644 sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg);
14645 }
14646 #endif
14647 else
14648 gcc_unreachable ();
14649 return str;
14650 }
14651
14652 const char *
14653 rs6000_call_template (rtx *operands, unsigned int funop)
14654 {
14655 return rs6000_call_template_1 (operands, funop, false);
14656 }
14657
14658 const char *
14659 rs6000_sibcall_template (rtx *operands, unsigned int funop)
14660 {
14661 return rs6000_call_template_1 (operands, funop, true);
14662 }
14663
14664 /* As above, for indirect calls. */
14665
14666 static const char *
14667 rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop,
14668 bool sibcall)
14669 {
14670 /* -Wformat-overflow workaround, without which gcc thinks that %u
14671 might produce 10 digits. Note that -Wformat-overflow will not
14672 currently warn here for str[], so do not rely on a warning to
14673 ensure str[] is correctly sized. */
14674 gcc_assert (funop <= MAX_RECOG_OPERANDS);
14675
14676 /* Currently, funop is either 0 or 1. The maximum string is always
14677 a !speculate 64-bit __tls_get_addr call.
14678
14679 ABI_ELFv2, pcrel:
14680 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14681 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
14682 . 9 crset 2\n\t
14683 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14684 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
14685 . 8 beq%T1l-
14686 .---
14687 .142
14688
14689 ABI_AIX:
14690 . 9 ld 2,%3\n\t
14691 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14692 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14693 . 9 crset 2\n\t
14694 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14695 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14696 . 10 beq%T1l-\n\t
14697 . 10 ld 2,%4(1)
14698 .---
14699 .151
14700
14701 ABI_ELFv2:
14702 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14703 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14704 . 9 crset 2\n\t
14705 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14706 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14707 . 10 beq%T1l-\n\t
14708 . 10 ld 2,%3(1)
14709 .---
14710 .142
14711
14712 ABI_V4:
14713 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14714 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
14715 . 9 crset 2\n\t
14716 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14717 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
14718 . 8 beq%T1l-
14719 .---
14720 .141 */
14721 static char str[160]; /* 8 spare */
14722 char *s = str;
14723 const char *ptrload = TARGET_64BIT ? "d" : "wz";
14724
14725 if (DEFAULT_ABI == ABI_AIX)
14726 s += sprintf (s,
14727 "l%s 2,%%%u\n\t",
14728 ptrload, funop + 3);
14729
14730 /* We don't need the extra code to stop indirect call speculation if
14731 calling via LR. */
14732 bool speculate = (TARGET_MACHO
14733 || rs6000_speculate_indirect_jumps
14734 || (REG_P (operands[funop])
14735 && REGNO (operands[funop]) == LR_REGNO));
14736
14737 if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)
14738 {
14739 const char *rel64 = TARGET_64BIT ? "64" : "";
14740 char tls[29];
14741 tls[0] = 0;
14742 if (GET_CODE (operands[funop + 1]) == UNSPEC)
14743 {
14744 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
14745 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
14746 rel64, funop + 1);
14747 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
14748 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
14749 rel64);
14750 }
14751
14752 const char *notoc = rs6000_pcrel_p () ? "_NOTOC" : "";
14753 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
14754 && flag_pic == 2 ? "+32768" : "");
14755 if (!speculate)
14756 {
14757 s += sprintf (s,
14758 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
14759 tls, rel64, notoc, funop, addend);
14760 s += sprintf (s, "crset 2\n\t");
14761 }
14762 s += sprintf (s,
14763 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
14764 tls, rel64, notoc, funop, addend);
14765 }
14766 else if (!speculate)
14767 s += sprintf (s, "crset 2\n\t");
14768
14769 if (rs6000_pcrel_p ())
14770 {
14771 if (speculate)
14772 sprintf (s, "b%%T%ul", funop);
14773 else
14774 sprintf (s, "beq%%T%ul-", funop);
14775 }
14776 else if (DEFAULT_ABI == ABI_AIX)
14777 {
14778 if (speculate)
14779 sprintf (s,
14780 "b%%T%ul\n\t"
14781 "l%s 2,%%%u(1)",
14782 funop, ptrload, funop + 4);
14783 else
14784 sprintf (s,
14785 "beq%%T%ul-\n\t"
14786 "l%s 2,%%%u(1)",
14787 funop, ptrload, funop + 4);
14788 }
14789 else if (DEFAULT_ABI == ABI_ELFv2)
14790 {
14791 if (speculate)
14792 sprintf (s,
14793 "b%%T%ul\n\t"
14794 "l%s 2,%%%u(1)",
14795 funop, ptrload, funop + 3);
14796 else
14797 sprintf (s,
14798 "beq%%T%ul-\n\t"
14799 "l%s 2,%%%u(1)",
14800 funop, ptrload, funop + 3);
14801 }
14802 else
14803 {
14804 if (speculate)
14805 sprintf (s,
14806 "b%%T%u%s",
14807 funop, sibcall ? "" : "l");
14808 else
14809 sprintf (s,
14810 "beq%%T%u%s-%s",
14811 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : "");
14812 }
14813 return str;
14814 }
14815
14816 const char *
14817 rs6000_indirect_call_template (rtx *operands, unsigned int funop)
14818 {
14819 return rs6000_indirect_call_template_1 (operands, funop, false);
14820 }
14821
14822 const char *
14823 rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop)
14824 {
14825 return rs6000_indirect_call_template_1 (operands, funop, true);
14826 }
14827
14828 #if HAVE_AS_PLTSEQ
14829 /* Output indirect call insns. WHICH identifies the type of sequence. */
14830 const char *
14831 rs6000_pltseq_template (rtx *operands, int which)
14832 {
14833 const char *rel64 = TARGET_64BIT ? "64" : "";
14834 char tls[30];
14835 tls[0] = 0;
14836 if (GET_CODE (operands[3]) == UNSPEC)
14837 {
14838 char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4';
14839 if (XINT (operands[3], 1) == UNSPEC_TLSGD)
14840 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
14841 off, rel64);
14842 else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
14843 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
14844 off, rel64);
14845 }
14846
14847 gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
14848 static char str[96]; /* 10 spare */
14849 char off = WORDS_BIG_ENDIAN ? '2' : '4';
14850 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
14851 && flag_pic == 2 ? "+32768" : "");
14852 switch (which)
14853 {
14854 case RS6000_PLTSEQ_TOCSAVE:
14855 sprintf (str,
14856 "st%s\n\t"
14857 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
14858 TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)",
14859 tls, rel64);
14860 break;
14861 case RS6000_PLTSEQ_PLT16_HA:
14862 if (DEFAULT_ABI == ABI_V4 && !flag_pic)
14863 sprintf (str,
14864 "lis %%0,0\n\t"
14865 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
14866 tls, off, rel64);
14867 else
14868 sprintf (str,
14869 "addis %%0,%%1,0\n\t"
14870 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
14871 tls, off, rel64, addend);
14872 break;
14873 case RS6000_PLTSEQ_PLT16_LO:
14874 sprintf (str,
14875 "l%s %%0,0(%%1)\n\t"
14876 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
14877 TARGET_64BIT ? "d" : "wz",
14878 tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend);
14879 break;
14880 case RS6000_PLTSEQ_MTCTR:
14881 sprintf (str,
14882 "mtctr %%1\n\t"
14883 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
14884 tls, rel64, addend);
14885 break;
14886 case RS6000_PLTSEQ_PLT_PCREL34:
14887 sprintf (str,
14888 "pl%s %%0,0(0),1\n\t"
14889 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
14890 TARGET_64BIT ? "d" : "wz",
14891 tls, rel64);
14892 break;
14893 default:
14894 gcc_unreachable ();
14895 }
14896 return str;
14897 }
14898 #endif
14899 \f
14900 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
14901 /* Emit an assembler directive to set symbol visibility for DECL to
14902 VISIBILITY_TYPE. */
14903
14904 static void
14905 rs6000_assemble_visibility (tree decl, int vis)
14906 {
14907 if (TARGET_XCOFF)
14908 return;
14909
14910 /* Functions need to have their entry point symbol visibility set as
14911 well as their descriptor symbol visibility. */
14912 if (DEFAULT_ABI == ABI_AIX
14913 && DOT_SYMBOLS
14914 && TREE_CODE (decl) == FUNCTION_DECL)
14915 {
14916 static const char * const visibility_types[] = {
14917 NULL, "protected", "hidden", "internal"
14918 };
14919
14920 const char *name, *type;
14921
14922 name = ((* targetm.strip_name_encoding)
14923 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
14924 type = visibility_types[vis];
14925
14926 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
14927 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
14928 }
14929 else
14930 default_assemble_visibility (decl, vis);
14931 }
14932 #endif
14933 \f
14934 /* Write PATCH_AREA_SIZE NOPs into the asm outfile FILE around a function
14935 entry. If RECORD_P is true and the target supports named sections,
14936 the location of the NOPs will be recorded in a special object section
14937 called "__patchable_function_entries". This routine may be called
14938 twice per function to put NOPs before and after the function
14939 entry. */
14940
14941 void
14942 rs6000_print_patchable_function_entry (FILE *file,
14943 unsigned HOST_WIDE_INT patch_area_size,
14944 bool record_p)
14945 {
14946 bool global_entry_needed_p = rs6000_global_entry_point_prologue_needed_p ();
14947 /* For a function which needs global entry point, we will emit the
14948 patchable area before and after local entry point under the control of
14949 cfun->machine->global_entry_emitted, see the handling in function
14950 rs6000_output_function_prologue. */
14951 if (!global_entry_needed_p || cfun->machine->global_entry_emitted)
14952 default_print_patchable_function_entry (file, patch_area_size, record_p);
14953 }
14954 \f
14955 enum rtx_code
14956 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
14957 {
14958 /* Reversal of FP compares takes care -- an ordered compare
14959 becomes an unordered compare and vice versa. */
14960 if (mode == CCFPmode
14961 && (!flag_finite_math_only
14962 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
14963 || code == UNEQ || code == LTGT))
14964 return reverse_condition_maybe_unordered (code);
14965 else
14966 return reverse_condition (code);
14967 }
14968
14969 /* Check if C (as 64bit integer) can be rotated to a constant which constains
14970 nonzero bits at the LOWBITS low bits only.
14971
14972 Return true if C can be rotated to such constant. If so, *ROT is written
14973 to the number by which C is rotated.
14974 Return false otherwise. */
14975
14976 bool
14977 can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot)
14978 {
14979 int clz = HOST_BITS_PER_WIDE_INT - lowbits;
14980
14981 /* case a. 0..0xxx: already at least clz zeros. */
14982 int lz = clz_hwi (c);
14983 if (lz >= clz)
14984 {
14985 *rot = 0;
14986 return true;
14987 }
14988
14989 /* case b. 0..0xxx0..0: at least clz zeros. */
14990 int tz = ctz_hwi (c);
14991 if (lz + tz >= clz)
14992 {
14993 *rot = HOST_BITS_PER_WIDE_INT - tz;
14994 return true;
14995 }
14996
14997 /* case c. xx10.....0xx: rotate 'clz - 1' bits first, then check case b.
14998 ^bit -> Vbit, , then zeros are at head or tail.
14999 00...00xxx100, 'clz - 1' >= 'bits of xxxx'. */
15000 const int rot_bits = lowbits + 1;
15001 unsigned HOST_WIDE_INT rc = (c >> rot_bits) | (c << (clz - 1));
15002 tz = ctz_hwi (rc);
15003 if (clz_hwi (rc) + tz >= clz)
15004 {
15005 *rot = HOST_BITS_PER_WIDE_INT - (tz + rot_bits);
15006 return true;
15007 }
15008
15009 return false;
15010 }
15011
15012 /* Check if C (as 64bit integer) can be rotated to a positive 16bits constant
15013 which contains 48bits leading zeros and 16bits of any value. */
15014
15015 bool
15016 can_be_rotated_to_positive_16bits (HOST_WIDE_INT c)
15017 {
15018 int rot = 0;
15019 bool res = can_be_rotated_to_lowbits (c, 16, &rot);
15020 return res && rot > 0;
15021 }
15022
15023 /* Check if C (as 64bit integer) can be rotated to a negative 15bits constant
15024 which contains 49bits leading ones and 15bits of any value. */
15025
15026 bool
15027 can_be_rotated_to_negative_15bits (HOST_WIDE_INT c)
15028 {
15029 int rot = 0;
15030 bool res = can_be_rotated_to_lowbits (~c, 15, &rot);
15031 return res && rot > 0;
15032 }
15033
15034 /* Generate a compare for CODE. Return a brand-new rtx that
15035 represents the result of the compare. */
15036
15037 static rtx
15038 rs6000_generate_compare (rtx cmp, machine_mode mode)
15039 {
15040 machine_mode comp_mode;
15041 rtx compare_result;
15042 enum rtx_code code = GET_CODE (cmp);
15043 rtx op0 = XEXP (cmp, 0);
15044 rtx op1 = XEXP (cmp, 1);
15045
15046 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
15047 comp_mode = CCmode;
15048 else if (FLOAT_MODE_P (mode))
15049 comp_mode = CCFPmode;
15050 else if (code == GTU || code == LTU
15051 || code == GEU || code == LEU)
15052 comp_mode = CCUNSmode;
15053 else if ((code == EQ || code == NE)
15054 && unsigned_reg_p (op0)
15055 && (unsigned_reg_p (op1)
15056 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
15057 /* These are unsigned values, perhaps there will be a later
15058 ordering compare that can be shared with this one. */
15059 comp_mode = CCUNSmode;
15060 else
15061 comp_mode = CCmode;
15062
15063 /* If we have an unsigned compare, make sure we don't have a signed value as
15064 an immediate. */
15065 if (comp_mode == CCUNSmode && CONST_INT_P (op1)
15066 && INTVAL (op1) < 0)
15067 {
15068 op0 = copy_rtx_if_shared (op0);
15069 op1 = force_reg (GET_MODE (op0), op1);
15070 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
15071 }
15072
15073 /* First, the compare. */
15074 compare_result = gen_reg_rtx (comp_mode);
15075
15076 /* IEEE 128-bit support in VSX registers when we do not have hardware
15077 support. */
15078 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
15079 {
15080 rtx libfunc = NULL_RTX;
15081 bool check_nan = false;
15082 rtx dest;
15083
15084 switch (code)
15085 {
15086 case EQ:
15087 case NE:
15088 libfunc = optab_libfunc (eq_optab, mode);
15089 break;
15090
15091 case GT:
15092 case GE:
15093 libfunc = optab_libfunc (ge_optab, mode);
15094 break;
15095
15096 case LT:
15097 case LE:
15098 libfunc = optab_libfunc (le_optab, mode);
15099 break;
15100
15101 case UNORDERED:
15102 case ORDERED:
15103 libfunc = optab_libfunc (unord_optab, mode);
15104 code = (code == UNORDERED) ? NE : EQ;
15105 break;
15106
15107 case UNGE:
15108 case UNGT:
15109 check_nan = true;
15110 libfunc = optab_libfunc (ge_optab, mode);
15111 code = (code == UNGE) ? GE : GT;
15112 break;
15113
15114 case UNLE:
15115 case UNLT:
15116 check_nan = true;
15117 libfunc = optab_libfunc (le_optab, mode);
15118 code = (code == UNLE) ? LE : LT;
15119 break;
15120
15121 case UNEQ:
15122 case LTGT:
15123 check_nan = true;
15124 libfunc = optab_libfunc (eq_optab, mode);
15125 code = (code = UNEQ) ? EQ : NE;
15126 break;
15127
15128 default:
15129 gcc_unreachable ();
15130 }
15131
15132 gcc_assert (libfunc);
15133
15134 if (!check_nan)
15135 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
15136 SImode, op0, mode, op1, mode);
15137
15138 /* The library signals an exception for signalling NaNs, so we need to
15139 handle isgreater, etc. by first checking isordered. */
15140 else
15141 {
15142 rtx ne_rtx, normal_dest, unord_dest;
15143 rtx unord_func = optab_libfunc (unord_optab, mode);
15144 rtx join_label = gen_label_rtx ();
15145 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
15146 rtx unord_cmp = gen_reg_rtx (comp_mode);
15147
15148
15149 /* Test for either value being a NaN. */
15150 gcc_assert (unord_func);
15151 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
15152 SImode, op0, mode, op1, mode);
15153
15154 /* Set value (0) if either value is a NaN, and jump to the join
15155 label. */
15156 dest = gen_reg_rtx (SImode);
15157 emit_move_insn (dest, const1_rtx);
15158 emit_insn (gen_rtx_SET (unord_cmp,
15159 gen_rtx_COMPARE (comp_mode, unord_dest,
15160 const0_rtx)));
15161
15162 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
15163 emit_jump_insn (gen_rtx_SET (pc_rtx,
15164 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
15165 join_ref,
15166 pc_rtx)));
15167
15168 /* Do the normal comparison, knowing that the values are not
15169 NaNs. */
15170 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
15171 SImode, op0, mode, op1, mode);
15172
15173 emit_insn (gen_cstoresi4 (dest,
15174 gen_rtx_fmt_ee (code, SImode, normal_dest,
15175 const0_rtx),
15176 normal_dest, const0_rtx));
15177
15178 /* Join NaN and non-Nan paths. Compare dest against 0. */
15179 emit_label (join_label);
15180 code = NE;
15181 }
15182
15183 emit_insn (gen_rtx_SET (compare_result,
15184 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
15185 }
15186
15187 else
15188 {
15189 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
15190 CLOBBERs to match cmptf_internal2 pattern. */
15191 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
15192 && FLOAT128_IBM_P (GET_MODE (op0))
15193 && TARGET_HARD_FLOAT)
15194 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15195 gen_rtvec (10,
15196 gen_rtx_SET (compare_result,
15197 gen_rtx_COMPARE (comp_mode, op0, op1)),
15198 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15199 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15200 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15201 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15202 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15203 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15204 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15205 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15206 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
15207 else if (GET_CODE (op1) == UNSPEC
15208 && XINT (op1, 1) == UNSPEC_SP_TEST)
15209 {
15210 rtx op1b = XVECEXP (op1, 0, 0);
15211 comp_mode = CCEQmode;
15212 compare_result = gen_reg_rtx (CCEQmode);
15213 if (TARGET_64BIT)
15214 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
15215 else
15216 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
15217 }
15218 else
15219 emit_insn (gen_rtx_SET (compare_result,
15220 gen_rtx_COMPARE (comp_mode, op0, op1)));
15221 }
15222
15223 validate_condition_mode (code, GET_MODE (compare_result));
15224
15225 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
15226 }
15227
15228 \f
15229 /* Return the diagnostic message string if the binary operation OP is
15230 not permitted on TYPE1 and TYPE2, NULL otherwise. */
15231
15232 static const char*
15233 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
15234 const_tree type1,
15235 const_tree type2)
15236 {
15237 machine_mode mode1 = TYPE_MODE (type1);
15238 machine_mode mode2 = TYPE_MODE (type2);
15239
15240 /* For complex modes, use the inner type. */
15241 if (COMPLEX_MODE_P (mode1))
15242 mode1 = GET_MODE_INNER (mode1);
15243
15244 if (COMPLEX_MODE_P (mode2))
15245 mode2 = GET_MODE_INNER (mode2);
15246
15247 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
15248 double to intermix unless -mfloat128-convert. */
15249 if (mode1 == mode2)
15250 return NULL;
15251
15252 if (!TARGET_FLOAT128_CVT)
15253 {
15254 if ((FLOAT128_IEEE_P (mode1) && FLOAT128_IBM_P (mode2))
15255 || (FLOAT128_IBM_P (mode1) && FLOAT128_IEEE_P (mode2)))
15256 return N_("Invalid mixing of IEEE 128-bit and IBM 128-bit floating "
15257 "point types");
15258 }
15259
15260 return NULL;
15261 }
15262
15263 \f
15264 /* Expand floating point conversion to/from __float128 and __ibm128. */
15265
15266 void
15267 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
15268 {
15269 machine_mode dest_mode = GET_MODE (dest);
15270 machine_mode src_mode = GET_MODE (src);
15271 convert_optab cvt = unknown_optab;
15272 bool do_move = false;
15273 rtx libfunc = NULL_RTX;
15274 rtx dest2;
15275 typedef rtx (*rtx_2func_t) (rtx, rtx);
15276 rtx_2func_t hw_convert = (rtx_2func_t)0;
15277 size_t kf_or_tf;
15278
15279 struct hw_conv_t {
15280 rtx_2func_t from_df;
15281 rtx_2func_t from_sf;
15282 rtx_2func_t from_si_sign;
15283 rtx_2func_t from_si_uns;
15284 rtx_2func_t from_di_sign;
15285 rtx_2func_t from_di_uns;
15286 rtx_2func_t to_df;
15287 rtx_2func_t to_sf;
15288 rtx_2func_t to_si_sign;
15289 rtx_2func_t to_si_uns;
15290 rtx_2func_t to_di_sign;
15291 rtx_2func_t to_di_uns;
15292 } hw_conversions[2] = {
15293 /* convertions to/from KFmode */
15294 {
15295 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
15296 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
15297 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
15298 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
15299 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
15300 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
15301 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
15302 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
15303 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
15304 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
15305 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
15306 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
15307 },
15308
15309 /* convertions to/from TFmode */
15310 {
15311 gen_extenddftf2_hw, /* TFmode <- DFmode. */
15312 gen_extendsftf2_hw, /* TFmode <- SFmode. */
15313 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
15314 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
15315 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
15316 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
15317 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
15318 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
15319 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
15320 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
15321 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
15322 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
15323 },
15324 };
15325
15326 if (dest_mode == src_mode)
15327 gcc_unreachable ();
15328
15329 /* Eliminate memory operations. */
15330 if (MEM_P (src))
15331 src = force_reg (src_mode, src);
15332
15333 if (MEM_P (dest))
15334 {
15335 rtx tmp = gen_reg_rtx (dest_mode);
15336 rs6000_expand_float128_convert (tmp, src, unsigned_p);
15337 rs6000_emit_move (dest, tmp, dest_mode);
15338 return;
15339 }
15340
15341 /* Convert to IEEE 128-bit floating point. */
15342 if (FLOAT128_IEEE_P (dest_mode))
15343 {
15344 if (dest_mode == KFmode)
15345 kf_or_tf = 0;
15346 else if (dest_mode == TFmode)
15347 kf_or_tf = 1;
15348 else
15349 gcc_unreachable ();
15350
15351 switch (src_mode)
15352 {
15353 case E_DFmode:
15354 cvt = sext_optab;
15355 hw_convert = hw_conversions[kf_or_tf].from_df;
15356 break;
15357
15358 case E_SFmode:
15359 cvt = sext_optab;
15360 hw_convert = hw_conversions[kf_or_tf].from_sf;
15361 break;
15362
15363 case E_KFmode:
15364 case E_IFmode:
15365 case E_TFmode:
15366 if (FLOAT128_IBM_P (src_mode))
15367 cvt = sext_optab;
15368 else
15369 do_move = true;
15370 break;
15371
15372 case E_SImode:
15373 if (unsigned_p)
15374 {
15375 cvt = ufloat_optab;
15376 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
15377 }
15378 else
15379 {
15380 cvt = sfloat_optab;
15381 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
15382 }
15383 break;
15384
15385 case E_DImode:
15386 if (unsigned_p)
15387 {
15388 cvt = ufloat_optab;
15389 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
15390 }
15391 else
15392 {
15393 cvt = sfloat_optab;
15394 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
15395 }
15396 break;
15397
15398 default:
15399 gcc_unreachable ();
15400 }
15401 }
15402
15403 /* Convert from IEEE 128-bit floating point. */
15404 else if (FLOAT128_IEEE_P (src_mode))
15405 {
15406 if (src_mode == KFmode)
15407 kf_or_tf = 0;
15408 else if (src_mode == TFmode)
15409 kf_or_tf = 1;
15410 else
15411 gcc_unreachable ();
15412
15413 switch (dest_mode)
15414 {
15415 case E_DFmode:
15416 cvt = trunc_optab;
15417 hw_convert = hw_conversions[kf_or_tf].to_df;
15418 break;
15419
15420 case E_SFmode:
15421 cvt = trunc_optab;
15422 hw_convert = hw_conversions[kf_or_tf].to_sf;
15423 break;
15424
15425 case E_KFmode:
15426 case E_IFmode:
15427 case E_TFmode:
15428 if (FLOAT128_IBM_P (dest_mode))
15429 cvt = trunc_optab;
15430 else
15431 do_move = true;
15432 break;
15433
15434 case E_SImode:
15435 if (unsigned_p)
15436 {
15437 cvt = ufix_optab;
15438 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
15439 }
15440 else
15441 {
15442 cvt = sfix_optab;
15443 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
15444 }
15445 break;
15446
15447 case E_DImode:
15448 if (unsigned_p)
15449 {
15450 cvt = ufix_optab;
15451 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
15452 }
15453 else
15454 {
15455 cvt = sfix_optab;
15456 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
15457 }
15458 break;
15459
15460 default:
15461 gcc_unreachable ();
15462 }
15463 }
15464
15465 /* Both IBM format. */
15466 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
15467 do_move = true;
15468
15469 else
15470 gcc_unreachable ();
15471
15472 /* Handle conversion between TFmode/KFmode/IFmode. */
15473 if (do_move)
15474 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
15475
15476 /* Handle conversion if we have hardware support. */
15477 else if (TARGET_FLOAT128_HW && hw_convert)
15478 emit_insn ((hw_convert) (dest, src));
15479
15480 /* Call an external function to do the conversion. */
15481 else if (cvt != unknown_optab)
15482 {
15483 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
15484 gcc_assert (libfunc != NULL_RTX);
15485
15486 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
15487 src, src_mode);
15488
15489 gcc_assert (dest2 != NULL_RTX);
15490 if (!rtx_equal_p (dest, dest2))
15491 emit_move_insn (dest, dest2);
15492 }
15493
15494 else
15495 gcc_unreachable ();
15496
15497 return;
15498 }
15499
15500 \f
15501 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
15502 can be used as that dest register. Return the dest register. */
15503
15504 rtx
15505 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
15506 {
15507 if (op2 == const0_rtx)
15508 return op1;
15509
15510 if (GET_CODE (scratch) == SCRATCH)
15511 scratch = gen_reg_rtx (mode);
15512
15513 if (logical_operand (op2, mode))
15514 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
15515 else
15516 emit_insn (gen_rtx_SET (scratch,
15517 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
15518
15519 return scratch;
15520 }
15521
15522 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
15523 requires this. The result is mode MODE. */
15524 rtx
15525 rs6000_emit_fp_cror (rtx_code code, machine_mode mode, rtx x)
15526 {
15527 rtx cond[2];
15528 int n = 0;
15529 if (code == LTGT || code == LE || code == UNLT)
15530 cond[n++] = gen_rtx_fmt_ee (LT, mode, x, const0_rtx);
15531 if (code == LTGT || code == GE || code == UNGT)
15532 cond[n++] = gen_rtx_fmt_ee (GT, mode, x, const0_rtx);
15533 if (code == LE || code == GE || code == UNEQ)
15534 cond[n++] = gen_rtx_fmt_ee (EQ, mode, x, const0_rtx);
15535 if (code == UNLT || code == UNGT || code == UNEQ)
15536 cond[n++] = gen_rtx_fmt_ee (UNORDERED, mode, x, const0_rtx);
15537
15538 gcc_assert (n == 2);
15539
15540 rtx cc = gen_reg_rtx (CCEQmode);
15541 rtx logical = gen_rtx_IOR (mode, cond[0], cond[1]);
15542 emit_insn (gen_cceq_ior_compare (mode, cc, logical, cond[0], x, cond[1], x));
15543
15544 return cc;
15545 }
15546
15547 void
15548 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
15549 {
15550 rtx condition_rtx = rs6000_generate_compare (operands[1], mode);
15551 rtx_code cond_code = GET_CODE (condition_rtx);
15552
15553 if (FLOAT_MODE_P (mode) && HONOR_NANS (mode)
15554 && !(FLOAT128_VECTOR_P (mode) && !TARGET_FLOAT128_HW))
15555 ;
15556 else if (cond_code == NE
15557 || cond_code == GE || cond_code == LE
15558 || cond_code == GEU || cond_code == LEU
15559 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
15560 {
15561 rtx not_result = gen_reg_rtx (CCEQmode);
15562 rtx not_op, rev_cond_rtx;
15563 machine_mode cc_mode;
15564
15565 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
15566
15567 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
15568 SImode, XEXP (condition_rtx, 0), const0_rtx);
15569 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
15570 emit_insn (gen_rtx_SET (not_result, not_op));
15571 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
15572 }
15573
15574 machine_mode op_mode = GET_MODE (XEXP (operands[1], 0));
15575 if (op_mode == VOIDmode)
15576 op_mode = GET_MODE (XEXP (operands[1], 1));
15577
15578 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
15579 {
15580 PUT_MODE (condition_rtx, DImode);
15581 convert_move (operands[0], condition_rtx, 0);
15582 }
15583 else
15584 {
15585 PUT_MODE (condition_rtx, SImode);
15586 emit_insn (gen_rtx_SET (operands[0], condition_rtx));
15587 }
15588 }
15589
15590 /* Emit a branch of kind CODE to location LOC. */
15591
15592 void
15593 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
15594 {
15595 rtx condition_rtx = rs6000_generate_compare (operands[0], mode);
15596 rtx loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
15597 rtx ite = gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx, loc_ref, pc_rtx);
15598 emit_jump_insn (gen_rtx_SET (pc_rtx, ite));
15599 }
15600
15601 /* Return the string to output a conditional branch to LABEL, which is
15602 the operand template of the label, or NULL if the branch is really a
15603 conditional return.
15604
15605 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
15606 condition code register and its mode specifies what kind of
15607 comparison we made.
15608
15609 REVERSED is nonzero if we should reverse the sense of the comparison.
15610
15611 INSN is the insn. */
15612
15613 char *
15614 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
15615 {
15616 static char string[64];
15617 enum rtx_code code = GET_CODE (op);
15618 rtx cc_reg = XEXP (op, 0);
15619 machine_mode mode = GET_MODE (cc_reg);
15620 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
15621 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
15622 int really_reversed = reversed ^ need_longbranch;
15623 char *s = string;
15624 const char *ccode;
15625 const char *pred;
15626 rtx note;
15627
15628 validate_condition_mode (code, mode);
15629
15630 /* Work out which way this really branches. We could use
15631 reverse_condition_maybe_unordered here always but this
15632 makes the resulting assembler clearer. */
15633 if (really_reversed)
15634 {
15635 /* Reversal of FP compares takes care -- an ordered compare
15636 becomes an unordered compare and vice versa. */
15637 if (mode == CCFPmode)
15638 code = reverse_condition_maybe_unordered (code);
15639 else
15640 code = reverse_condition (code);
15641 }
15642
15643 switch (code)
15644 {
15645 /* Not all of these are actually distinct opcodes, but
15646 we distinguish them for clarity of the resulting assembler. */
15647 case NE: case LTGT:
15648 ccode = "ne"; break;
15649 case EQ: case UNEQ:
15650 ccode = "eq"; break;
15651 case GE: case GEU:
15652 ccode = "ge"; break;
15653 case GT: case GTU: case UNGT:
15654 ccode = "gt"; break;
15655 case LE: case LEU:
15656 ccode = "le"; break;
15657 case LT: case LTU: case UNLT:
15658 ccode = "lt"; break;
15659 case UNORDERED: ccode = "un"; break;
15660 case ORDERED: ccode = "nu"; break;
15661 case UNGE: ccode = "nl"; break;
15662 case UNLE: ccode = "ng"; break;
15663 default:
15664 gcc_unreachable ();
15665 }
15666
15667 /* Maybe we have a guess as to how likely the branch is. */
15668 pred = "";
15669 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
15670 if (note != NULL_RTX)
15671 {
15672 /* PROB is the difference from 50%. */
15673 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
15674 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
15675
15676 /* Only hint for highly probable/improbable branches on newer cpus when
15677 we have real profile data, as static prediction overrides processor
15678 dynamic prediction. For older cpus we may as well always hint, but
15679 assume not taken for branches that are very close to 50% as a
15680 mispredicted taken branch is more expensive than a
15681 mispredicted not-taken branch. */
15682 if (rs6000_always_hint
15683 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
15684 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
15685 && br_prob_note_reliable_p (note)))
15686 {
15687 if (abs (prob) > REG_BR_PROB_BASE / 20
15688 && ((prob > 0) ^ need_longbranch))
15689 pred = "+";
15690 else
15691 pred = "-";
15692 }
15693 }
15694
15695 if (label == NULL)
15696 s += sprintf (s, "b%slr%s ", ccode, pred);
15697 else
15698 s += sprintf (s, "b%s%s ", ccode, pred);
15699
15700 /* We need to escape any '%' characters in the reg_names string.
15701 Assume they'd only be the first character.... */
15702 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
15703 *s++ = '%';
15704 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
15705
15706 if (label != NULL)
15707 {
15708 /* If the branch distance was too far, we may have to use an
15709 unconditional branch to go the distance. */
15710 if (need_longbranch)
15711 s += sprintf (s, ",$+8\n\tb %s", label);
15712 else
15713 s += sprintf (s, ",%s", label);
15714 }
15715
15716 return string;
15717 }
15718
15719 /* Return insn for VSX or Altivec comparisons. */
15720
15721 static rtx
15722 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
15723 {
15724 rtx mask;
15725 machine_mode mode = GET_MODE (op0);
15726
15727 switch (code)
15728 {
15729 default:
15730 break;
15731
15732 case GE:
15733 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15734 return NULL_RTX;
15735 /* FALLTHRU */
15736
15737 case EQ:
15738 case GT:
15739 case GTU:
15740 case ORDERED:
15741 case UNORDERED:
15742 case UNEQ:
15743 case LTGT:
15744 mask = gen_reg_rtx (mode);
15745 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
15746 return mask;
15747 }
15748
15749 return NULL_RTX;
15750 }
15751
15752 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
15753 DMODE is expected destination mode. This is a recursive function. */
15754
15755 static rtx
15756 rs6000_emit_vector_compare (enum rtx_code rcode,
15757 rtx op0, rtx op1,
15758 machine_mode dmode)
15759 {
15760 rtx mask;
15761 bool swap_operands = false;
15762 bool try_again = false;
15763
15764 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
15765 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
15766
15767 /* See if the comparison works as is. */
15768 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
15769 if (mask)
15770 return mask;
15771
15772 switch (rcode)
15773 {
15774 case LT:
15775 rcode = GT;
15776 swap_operands = true;
15777 try_again = true;
15778 break;
15779 case LTU:
15780 rcode = GTU;
15781 swap_operands = true;
15782 try_again = true;
15783 break;
15784 case NE:
15785 case UNLE:
15786 case UNLT:
15787 case UNGE:
15788 case UNGT:
15789 /* Invert condition and try again.
15790 e.g., A != B becomes ~(A==B). */
15791 {
15792 enum rtx_code rev_code;
15793 enum insn_code nor_code;
15794 rtx mask2;
15795
15796 rev_code = reverse_condition_maybe_unordered (rcode);
15797 if (rev_code == UNKNOWN)
15798 return NULL_RTX;
15799
15800 nor_code = optab_handler (one_cmpl_optab, dmode);
15801 if (nor_code == CODE_FOR_nothing)
15802 return NULL_RTX;
15803
15804 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
15805 if (!mask2)
15806 return NULL_RTX;
15807
15808 mask = gen_reg_rtx (dmode);
15809 emit_insn (GEN_FCN (nor_code) (mask, mask2));
15810 return mask;
15811 }
15812 break;
15813 case GE:
15814 case GEU:
15815 case LE:
15816 case LEU:
15817 /* Try GT/GTU/LT/LTU OR EQ */
15818 {
15819 rtx c_rtx, eq_rtx;
15820 enum insn_code ior_code;
15821 enum rtx_code new_code;
15822
15823 switch (rcode)
15824 {
15825 case GE:
15826 new_code = GT;
15827 break;
15828
15829 case GEU:
15830 new_code = GTU;
15831 break;
15832
15833 case LE:
15834 new_code = LT;
15835 break;
15836
15837 case LEU:
15838 new_code = LTU;
15839 break;
15840
15841 default:
15842 gcc_unreachable ();
15843 }
15844
15845 ior_code = optab_handler (ior_optab, dmode);
15846 if (ior_code == CODE_FOR_nothing)
15847 return NULL_RTX;
15848
15849 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
15850 if (!c_rtx)
15851 return NULL_RTX;
15852
15853 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
15854 if (!eq_rtx)
15855 return NULL_RTX;
15856
15857 mask = gen_reg_rtx (dmode);
15858 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
15859 return mask;
15860 }
15861 break;
15862 default:
15863 return NULL_RTX;
15864 }
15865
15866 if (try_again)
15867 {
15868 if (swap_operands)
15869 std::swap (op0, op1);
15870
15871 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
15872 if (mask)
15873 return mask;
15874 }
15875
15876 /* You only get two chances. */
15877 return NULL_RTX;
15878 }
15879
15880 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
15881 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
15882 operands for the relation operation COND. */
15883
15884 int
15885 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
15886 rtx cond, rtx cc_op0, rtx cc_op1)
15887 {
15888 machine_mode dest_mode = GET_MODE (dest);
15889 machine_mode mask_mode = GET_MODE (cc_op0);
15890 enum rtx_code rcode = GET_CODE (cond);
15891 rtx mask;
15892 bool invert_move = false;
15893
15894 if (VECTOR_UNIT_NONE_P (dest_mode))
15895 return 0;
15896
15897 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
15898 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
15899
15900 switch (rcode)
15901 {
15902 /* Swap operands if we can, and fall back to doing the operation as
15903 specified, and doing a NOR to invert the test. */
15904 case NE:
15905 case UNLE:
15906 case UNLT:
15907 case UNGE:
15908 case UNGT:
15909 /* Invert condition and try again.
15910 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
15911 invert_move = true;
15912 rcode = reverse_condition_maybe_unordered (rcode);
15913 if (rcode == UNKNOWN)
15914 return 0;
15915 break;
15916
15917 case GE:
15918 case LE:
15919 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
15920 {
15921 /* Invert condition to avoid compound test. */
15922 invert_move = true;
15923 rcode = reverse_condition (rcode);
15924 }
15925 break;
15926
15927 case GTU:
15928 case GEU:
15929 case LTU:
15930 case LEU:
15931
15932 /* Invert condition to avoid compound test if necessary. */
15933 if (rcode == GEU || rcode == LEU)
15934 {
15935 invert_move = true;
15936 rcode = reverse_condition (rcode);
15937 }
15938 break;
15939
15940 default:
15941 break;
15942 }
15943
15944 /* Get the vector mask for the given relational operations. */
15945 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
15946
15947 if (!mask)
15948 return 0;
15949
15950 if (mask_mode != dest_mode)
15951 mask = simplify_gen_subreg (dest_mode, mask, mask_mode, 0);
15952
15953 if (invert_move)
15954 std::swap (op_true, op_false);
15955
15956 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
15957 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
15958 && (GET_CODE (op_true) == CONST_VECTOR
15959 || GET_CODE (op_false) == CONST_VECTOR))
15960 {
15961 rtx constant_0 = CONST0_RTX (dest_mode);
15962 rtx constant_m1 = CONSTM1_RTX (dest_mode);
15963
15964 if (op_true == constant_m1 && op_false == constant_0)
15965 {
15966 emit_move_insn (dest, mask);
15967 return 1;
15968 }
15969
15970 else if (op_true == constant_0 && op_false == constant_m1)
15971 {
15972 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
15973 return 1;
15974 }
15975
15976 /* If we can't use the vector comparison directly, perhaps we can use
15977 the mask for the true or false fields, instead of loading up a
15978 constant. */
15979 if (op_true == constant_m1)
15980 op_true = mask;
15981
15982 if (op_false == constant_0)
15983 op_false = mask;
15984 }
15985
15986 if (!REG_P (op_true) && !SUBREG_P (op_true))
15987 op_true = force_reg (dest_mode, op_true);
15988
15989 if (!REG_P (op_false) && !SUBREG_P (op_false))
15990 op_false = force_reg (dest_mode, op_false);
15991
15992 rtx tmp = gen_rtx_IOR (dest_mode,
15993 gen_rtx_AND (dest_mode, gen_rtx_NOT (dest_mode, mask),
15994 op_false),
15995 gen_rtx_AND (dest_mode, mask, op_true));
15996 emit_insn (gen_rtx_SET (dest, tmp));
15997 return 1;
15998 }
15999
16000 /* Possibly emit the xsmaxc{dp,qp} and xsminc{dp,qp} instructions to emit a
16001 maximum or minimum with "C" semantics.
16002
16003 Unless you use -ffast-math, you can't use these instructions to replace
16004 conditions that implicitly reverse the condition because the comparison
16005 might generate a NaN or signed zer0.
16006
16007 I.e. the following can be replaced all of the time
16008 ret = (op1 > op2) ? op1 : op2 ; generate xsmaxcdp
16009 ret = (op1 >= op2) ? op1 : op2 ; generate xsmaxcdp
16010 ret = (op1 < op2) ? op1 : op2; ; generate xsmincdp
16011 ret = (op1 <= op2) ? op1 : op2; ; generate xsmincdp
16012
16013 The following can be replaced only if -ffast-math is used:
16014 ret = (op1 < op2) ? op2 : op1 ; generate xsmaxcdp
16015 ret = (op1 <= op2) ? op2 : op1 ; generate xsmaxcdp
16016 ret = (op1 > op2) ? op2 : op1; ; generate xsmincdp
16017 ret = (op1 >= op2) ? op2 : op1; ; generate xsmincdp
16018
16019 Move TRUE_COND to DEST if OP of the operands of the last comparison is
16020 nonzero/true, FALSE_COND if it is zero/false.
16021
16022 Return false if we can't generate the appropriate minimum or maximum, and
16023 true if we can did the minimum or maximum. */
16024
16025 static bool
16026 rs6000_maybe_emit_maxc_minc (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16027 {
16028 enum rtx_code code = GET_CODE (op);
16029 rtx op0 = XEXP (op, 0);
16030 rtx op1 = XEXP (op, 1);
16031 machine_mode compare_mode = GET_MODE (op0);
16032 machine_mode result_mode = GET_MODE (dest);
16033
16034 if (result_mode != compare_mode)
16035 return false;
16036
16037 /* See the comments of this function, it simply expects GE/GT/LE/LT in
16038 the checks, but for the reversible equivalent UNLT/UNLE/UNGT/UNGE,
16039 we need to do the reversions first to make the following checks
16040 support fewer cases, like:
16041
16042 (a UNLT b) ? op1 : op2 => (a >= b) ? op2 : op1;
16043 (a UNLE b) ? op1 : op2 => (a > b) ? op2 : op1;
16044 (a UNGT b) ? op1 : op2 => (a <= b) ? op2 : op1;
16045 (a UNGE b) ? op1 : op2 => (a < b) ? op2 : op1;
16046
16047 By the way, if we see these UNLT/UNLE/UNGT/UNGE it's guaranteed
16048 that we have 4-way condition codes (LT/GT/EQ/UN), so we do not
16049 have to check for fast-math or the like. */
16050 if (code == UNGE || code == UNGT || code == UNLE || code == UNLT)
16051 {
16052 code = reverse_condition_maybe_unordered (code);
16053 std::swap (true_cond, false_cond);
16054 }
16055
16056 bool max_p;
16057 if (code == GE || code == GT)
16058 max_p = true;
16059 else if (code == LE || code == LT)
16060 max_p = false;
16061 else
16062 return false;
16063
16064 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
16065 ;
16066
16067 /* Only when NaNs and signed-zeros are not in effect, smax could be
16068 used for `op0 < op1 ? op1 : op0`, and smin could be used for
16069 `op0 > op1 ? op1 : op0`. */
16070 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond)
16071 && !HONOR_NANS (compare_mode) && !HONOR_SIGNED_ZEROS (compare_mode))
16072 max_p = !max_p;
16073
16074 else
16075 return false;
16076
16077 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
16078 return true;
16079 }
16080
16081 /* Possibly emit a floating point conditional move by generating a compare that
16082 sets a mask instruction and a XXSEL select instruction.
16083
16084 Move TRUE_COND to DEST if OP of the operands of the last comparison is
16085 nonzero/true, FALSE_COND if it is zero/false.
16086
16087 Return false if the operation cannot be generated, and true if we could
16088 generate the instruction. */
16089
16090 static bool
16091 rs6000_maybe_emit_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16092 {
16093 enum rtx_code code = GET_CODE (op);
16094 rtx op0 = XEXP (op, 0);
16095 rtx op1 = XEXP (op, 1);
16096 machine_mode compare_mode = GET_MODE (op0);
16097 machine_mode result_mode = GET_MODE (dest);
16098 rtx compare_rtx;
16099 rtx cmove_rtx;
16100 rtx clobber_rtx;
16101
16102 if (!can_create_pseudo_p ())
16103 return 0;
16104
16105 /* We allow the comparison to be either SFmode/DFmode and the true/false
16106 condition to be either SFmode/DFmode. I.e. we allow:
16107
16108 float a, b;
16109 double c, d, r;
16110
16111 r = (a == b) ? c : d;
16112
16113 and:
16114
16115 double a, b;
16116 float c, d, r;
16117
16118 r = (a == b) ? c : d;
16119
16120 but we don't allow intermixing the IEEE 128-bit floating point types with
16121 the 32/64-bit scalar types. */
16122
16123 if (!(compare_mode == result_mode
16124 || (compare_mode == SFmode && result_mode == DFmode)
16125 || (compare_mode == DFmode && result_mode == SFmode)))
16126 return false;
16127
16128 switch (code)
16129 {
16130 case EQ:
16131 case GE:
16132 case GT:
16133 break;
16134
16135 case NE:
16136 case LT:
16137 case LE:
16138 code = swap_condition (code);
16139 std::swap (op0, op1);
16140 break;
16141
16142 default:
16143 return false;
16144 }
16145
16146 /* Generate: [(parallel [(set (dest)
16147 (if_then_else (op (cmp1) (cmp2))
16148 (true)
16149 (false)))
16150 (clobber (scratch))])]. */
16151
16152 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
16153 cmove_rtx = gen_rtx_SET (dest,
16154 gen_rtx_IF_THEN_ELSE (result_mode,
16155 compare_rtx,
16156 true_cond,
16157 false_cond));
16158
16159 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
16160 emit_insn (gen_rtx_PARALLEL (VOIDmode,
16161 gen_rtvec (2, cmove_rtx, clobber_rtx)));
16162
16163 return true;
16164 }
16165
16166 /* Helper function to return true if the target has instructions to do a
16167 compare and set mask instruction that can be used with XXSEL to implement a
16168 conditional move. It is also assumed that such a target also supports the
16169 "C" minimum and maximum instructions. */
16170
16171 static bool
16172 have_compare_and_set_mask (machine_mode mode)
16173 {
16174 switch (mode)
16175 {
16176 case E_SFmode:
16177 case E_DFmode:
16178 return TARGET_P9_MINMAX;
16179
16180 case E_KFmode:
16181 case E_TFmode:
16182 return TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode);
16183
16184 default:
16185 break;
16186 }
16187
16188 return false;
16189 }
16190
16191 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
16192 operands of the last comparison is nonzero/true, FALSE_COND if it
16193 is zero/false. Return 0 if the hardware has no such operation. */
16194
16195 bool
16196 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16197 {
16198 enum rtx_code code = GET_CODE (op);
16199 rtx op0 = XEXP (op, 0);
16200 rtx op1 = XEXP (op, 1);
16201 machine_mode compare_mode = GET_MODE (op0);
16202 machine_mode result_mode = GET_MODE (dest);
16203 rtx temp;
16204 bool is_against_zero;
16205
16206 /* These modes should always match. */
16207 if (GET_MODE (op1) != compare_mode
16208 /* In the isel case however, we can use a compare immediate, so
16209 op1 may be a small constant. */
16210 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
16211 return false;
16212 if (GET_MODE (true_cond) != result_mode)
16213 return false;
16214 if (GET_MODE (false_cond) != result_mode)
16215 return false;
16216
16217 /* See if we can use the "C" minimum, "C" maximum, and compare and set mask
16218 instructions. */
16219 if (have_compare_and_set_mask (compare_mode)
16220 && have_compare_and_set_mask (result_mode))
16221 {
16222 if (rs6000_maybe_emit_maxc_minc (dest, op, true_cond, false_cond))
16223 return true;
16224
16225 if (rs6000_maybe_emit_fp_cmove (dest, op, true_cond, false_cond))
16226 return true;
16227 }
16228
16229 /* Don't allow using floating point comparisons for integer results for
16230 now. */
16231 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
16232 return false;
16233
16234 /* First, work out if the hardware can do this at all, or
16235 if it's too slow.... */
16236 if (!FLOAT_MODE_P (compare_mode))
16237 {
16238 if (TARGET_ISEL)
16239 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
16240 return false;
16241 }
16242
16243 is_against_zero = op1 == CONST0_RTX (compare_mode);
16244
16245 /* A floating-point subtract might overflow, underflow, or produce
16246 an inexact result, thus changing the floating-point flags, so it
16247 can't be generated if we care about that. It's safe if one side
16248 of the construct is zero, since then no subtract will be
16249 generated. */
16250 if (SCALAR_FLOAT_MODE_P (compare_mode)
16251 && flag_trapping_math && ! is_against_zero)
16252 return false;
16253
16254 /* Eliminate half of the comparisons by switching operands, this
16255 makes the remaining code simpler. */
16256 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
16257 || code == LTGT || code == LT || code == UNLE)
16258 {
16259 code = reverse_condition_maybe_unordered (code);
16260 temp = true_cond;
16261 true_cond = false_cond;
16262 false_cond = temp;
16263 }
16264
16265 /* UNEQ and LTGT take four instructions for a comparison with zero,
16266 it'll probably be faster to use a branch here too. */
16267 if (code == UNEQ && HONOR_NANS (compare_mode))
16268 return false;
16269
16270 /* We're going to try to implement comparisons by performing
16271 a subtract, then comparing against zero. Unfortunately,
16272 Inf - Inf is NaN which is not zero, and so if we don't
16273 know that the operand is finite and the comparison
16274 would treat EQ different to UNORDERED, we can't do it. */
16275 if (HONOR_INFINITIES (compare_mode)
16276 && code != GT && code != UNGE
16277 && (!CONST_DOUBLE_P (op1)
16278 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
16279 /* Constructs of the form (a OP b ? a : b) are safe. */
16280 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
16281 || (! rtx_equal_p (op0, true_cond)
16282 && ! rtx_equal_p (op1, true_cond))))
16283 return false;
16284
16285 /* At this point we know we can use fsel. */
16286
16287 /* Don't allow compare_mode other than SFmode or DFmode, for others there
16288 is no fsel instruction. */
16289 if (compare_mode != SFmode && compare_mode != DFmode)
16290 return false;
16291
16292 /* Reduce the comparison to a comparison against zero. */
16293 if (! is_against_zero)
16294 {
16295 temp = gen_reg_rtx (compare_mode);
16296 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
16297 op0 = temp;
16298 op1 = CONST0_RTX (compare_mode);
16299 }
16300
16301 /* If we don't care about NaNs we can reduce some of the comparisons
16302 down to faster ones. */
16303 if (! HONOR_NANS (compare_mode))
16304 switch (code)
16305 {
16306 case GT:
16307 code = LE;
16308 temp = true_cond;
16309 true_cond = false_cond;
16310 false_cond = temp;
16311 break;
16312 case UNGE:
16313 code = GE;
16314 break;
16315 case UNEQ:
16316 code = EQ;
16317 break;
16318 default:
16319 break;
16320 }
16321
16322 /* Now, reduce everything down to a GE. */
16323 switch (code)
16324 {
16325 case GE:
16326 break;
16327
16328 case LE:
16329 temp = gen_reg_rtx (compare_mode);
16330 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16331 op0 = temp;
16332 break;
16333
16334 case ORDERED:
16335 temp = gen_reg_rtx (compare_mode);
16336 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
16337 op0 = temp;
16338 break;
16339
16340 case EQ:
16341 temp = gen_reg_rtx (compare_mode);
16342 emit_insn (gen_rtx_SET (temp,
16343 gen_rtx_NEG (compare_mode,
16344 gen_rtx_ABS (compare_mode, op0))));
16345 op0 = temp;
16346 break;
16347
16348 case UNGE:
16349 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
16350 temp = gen_reg_rtx (result_mode);
16351 emit_insn (gen_rtx_SET (temp,
16352 gen_rtx_IF_THEN_ELSE (result_mode,
16353 gen_rtx_GE (VOIDmode,
16354 op0, op1),
16355 true_cond, false_cond)));
16356 false_cond = true_cond;
16357 true_cond = temp;
16358
16359 temp = gen_reg_rtx (compare_mode);
16360 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16361 op0 = temp;
16362 break;
16363
16364 case GT:
16365 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
16366 temp = gen_reg_rtx (result_mode);
16367 emit_insn (gen_rtx_SET (temp,
16368 gen_rtx_IF_THEN_ELSE (result_mode,
16369 gen_rtx_GE (VOIDmode,
16370 op0, op1),
16371 true_cond, false_cond)));
16372 true_cond = false_cond;
16373 false_cond = temp;
16374
16375 temp = gen_reg_rtx (compare_mode);
16376 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16377 op0 = temp;
16378 break;
16379
16380 default:
16381 gcc_unreachable ();
16382 }
16383
16384 emit_insn (gen_rtx_SET (dest,
16385 gen_rtx_IF_THEN_ELSE (result_mode,
16386 gen_rtx_GE (VOIDmode,
16387 op0, op1),
16388 true_cond, false_cond)));
16389 return true;
16390 }
16391
16392 /* Same as above, but for ints (isel). */
16393
16394 bool
16395 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16396 {
16397 rtx condition_rtx, cr;
16398 machine_mode mode = GET_MODE (dest);
16399 enum rtx_code cond_code;
16400 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
16401 bool signedp;
16402
16403 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
16404 return false;
16405
16406 /* PR104335: We now need to expect CC-mode "comparisons"
16407 coming from ifcvt. The following code expects proper
16408 comparisons so better abort here. */
16409 if (GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_CC)
16410 return false;
16411
16412 /* We still have to do the compare, because isel doesn't do a
16413 compare, it just looks at the CRx bits set by a previous compare
16414 instruction. */
16415 condition_rtx = rs6000_generate_compare (op, mode);
16416 cond_code = GET_CODE (condition_rtx);
16417 cr = XEXP (condition_rtx, 0);
16418 signedp = GET_MODE (cr) == CCmode;
16419
16420 isel_func = (mode == SImode
16421 ? (signedp ? gen_isel_cc_si : gen_isel_ccuns_si)
16422 : (signedp ? gen_isel_cc_di : gen_isel_ccuns_di));
16423
16424 switch (cond_code)
16425 {
16426 case LT: case GT: case LTU: case GTU: case EQ:
16427 /* isel handles these directly. */
16428 break;
16429
16430 default:
16431 /* We need to swap the sense of the comparison. */
16432 {
16433 std::swap (false_cond, true_cond);
16434 PUT_CODE (condition_rtx, reverse_condition (cond_code));
16435 }
16436 break;
16437 }
16438
16439 false_cond = force_reg (mode, false_cond);
16440 if (true_cond != const0_rtx)
16441 true_cond = force_reg (mode, true_cond);
16442
16443 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
16444
16445 return true;
16446 }
16447
16448 void
16449 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
16450 {
16451 machine_mode mode = GET_MODE (op0);
16452 enum rtx_code c;
16453 rtx target;
16454
16455 /* VSX/altivec have direct min/max insns. */
16456 if ((code == SMAX || code == SMIN)
16457 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
16458 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))
16459 || (TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode))))
16460 {
16461 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
16462 return;
16463 }
16464
16465 if (code == SMAX || code == SMIN)
16466 c = GE;
16467 else
16468 c = GEU;
16469
16470 if (code == SMAX || code == UMAX)
16471 target = emit_conditional_move (dest, { c, op0, op1, mode },
16472 op0, op1, mode, 0);
16473 else
16474 target = emit_conditional_move (dest, { c, op0, op1, mode },
16475 op1, op0, mode, 0);
16476 gcc_assert (target);
16477 if (target != dest)
16478 emit_move_insn (dest, target);
16479 }
16480
16481 /* A subroutine of the atomic operation splitters. Jump to LABEL if
16482 COND is true. Mark the jump as unlikely to be taken. */
16483
16484 static void
16485 emit_unlikely_jump (rtx cond, rtx label)
16486 {
16487 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
16488 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
16489 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
16490 }
16491
16492 /* A subroutine of the atomic operation splitters. Emit a load-locked
16493 instruction in MODE. For QI/HImode, possibly use a pattern than includes
16494 the zero_extend operation. */
16495
16496 static void
16497 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
16498 {
16499 rtx (*fn) (rtx, rtx) = NULL;
16500
16501 switch (mode)
16502 {
16503 case E_QImode:
16504 fn = gen_load_lockedqi;
16505 break;
16506 case E_HImode:
16507 fn = gen_load_lockedhi;
16508 break;
16509 case E_SImode:
16510 if (GET_MODE (mem) == QImode)
16511 fn = gen_load_lockedqi_si;
16512 else if (GET_MODE (mem) == HImode)
16513 fn = gen_load_lockedhi_si;
16514 else
16515 fn = gen_load_lockedsi;
16516 break;
16517 case E_DImode:
16518 fn = gen_load_lockeddi;
16519 break;
16520 case E_TImode:
16521 fn = gen_load_lockedti;
16522 break;
16523 default:
16524 gcc_unreachable ();
16525 }
16526 emit_insn (fn (reg, mem));
16527 }
16528
16529 /* A subroutine of the atomic operation splitters. Emit a store-conditional
16530 instruction in MODE. */
16531
16532 static void
16533 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
16534 {
16535 rtx (*fn) (rtx, rtx, rtx) = NULL;
16536
16537 switch (mode)
16538 {
16539 case E_QImode:
16540 fn = gen_store_conditionalqi;
16541 break;
16542 case E_HImode:
16543 fn = gen_store_conditionalhi;
16544 break;
16545 case E_SImode:
16546 fn = gen_store_conditionalsi;
16547 break;
16548 case E_DImode:
16549 fn = gen_store_conditionaldi;
16550 break;
16551 case E_TImode:
16552 fn = gen_store_conditionalti;
16553 break;
16554 default:
16555 gcc_unreachable ();
16556 }
16557
16558 /* Emit sync before stwcx. to address PPC405 Erratum. */
16559 if (PPC405_ERRATUM77)
16560 emit_insn (gen_hwsync ());
16561
16562 emit_insn (fn (res, mem, val));
16563 }
16564
16565 /* Expand barriers before and after a load_locked/store_cond sequence. */
16566
16567 static rtx
16568 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
16569 {
16570 rtx addr = XEXP (mem, 0);
16571
16572 if (!legitimate_indirect_address_p (addr, reload_completed)
16573 && !legitimate_indexed_address_p (addr, reload_completed))
16574 {
16575 addr = force_reg (Pmode, addr);
16576 mem = replace_equiv_address_nv (mem, addr);
16577 }
16578
16579 switch (model)
16580 {
16581 case MEMMODEL_RELAXED:
16582 case MEMMODEL_CONSUME:
16583 case MEMMODEL_ACQUIRE:
16584 break;
16585 case MEMMODEL_RELEASE:
16586 case MEMMODEL_ACQ_REL:
16587 emit_insn (gen_lwsync ());
16588 break;
16589 case MEMMODEL_SEQ_CST:
16590 emit_insn (gen_hwsync ());
16591 break;
16592 default:
16593 gcc_unreachable ();
16594 }
16595 return mem;
16596 }
16597
16598 static void
16599 rs6000_post_atomic_barrier (enum memmodel model)
16600 {
16601 switch (model)
16602 {
16603 case MEMMODEL_RELAXED:
16604 case MEMMODEL_CONSUME:
16605 case MEMMODEL_RELEASE:
16606 break;
16607 case MEMMODEL_ACQUIRE:
16608 case MEMMODEL_ACQ_REL:
16609 case MEMMODEL_SEQ_CST:
16610 emit_insn (gen_isync ());
16611 break;
16612 default:
16613 gcc_unreachable ();
16614 }
16615 }
16616
16617 /* A subroutine of the various atomic expanders. For sub-word operations,
16618 we must adjust things to operate on SImode. Given the original MEM,
16619 return a new aligned memory. Also build and return the quantities by
16620 which to shift and mask. */
16621
16622 static rtx
16623 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
16624 {
16625 rtx addr, align, shift, mask, mem;
16626 HOST_WIDE_INT shift_mask;
16627 machine_mode mode = GET_MODE (orig_mem);
16628
16629 /* For smaller modes, we have to implement this via SImode. */
16630 shift_mask = (mode == QImode ? 0x18 : 0x10);
16631
16632 addr = XEXP (orig_mem, 0);
16633 addr = force_reg (GET_MODE (addr), addr);
16634
16635 /* Aligned memory containing subword. Generate a new memory. We
16636 do not want any of the existing MEM_ATTR data, as we're now
16637 accessing memory outside the original object. */
16638 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
16639 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16640 mem = gen_rtx_MEM (SImode, align);
16641 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
16642 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
16643 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
16644
16645 /* Shift amount for subword relative to aligned word. */
16646 shift = gen_reg_rtx (SImode);
16647 addr = gen_lowpart (SImode, addr);
16648 rtx tmp = gen_reg_rtx (SImode);
16649 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
16650 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
16651 if (BYTES_BIG_ENDIAN)
16652 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
16653 shift, 1, OPTAB_LIB_WIDEN);
16654 *pshift = shift;
16655
16656 /* Mask for insertion. */
16657 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
16658 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
16659 *pmask = mask;
16660
16661 return mem;
16662 }
16663
16664 /* A subroutine of the various atomic expanders. For sub-word operands,
16665 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
16666
16667 static rtx
16668 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
16669 {
16670 rtx x;
16671
16672 x = gen_reg_rtx (SImode);
16673 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
16674 gen_rtx_NOT (SImode, mask),
16675 oldval)));
16676
16677 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
16678
16679 return x;
16680 }
16681
16682 /* A subroutine of the various atomic expanders. For sub-word operands,
16683 extract WIDE to NARROW via SHIFT. */
16684
16685 static void
16686 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
16687 {
16688 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
16689 wide, 1, OPTAB_LIB_WIDEN);
16690 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
16691 }
16692
16693 /* Expand an atomic compare and swap operation. */
16694
16695 void
16696 rs6000_expand_atomic_compare_and_swap (rtx operands[])
16697 {
16698 rtx boolval, retval, mem, oldval, newval, cond;
16699 rtx label1, label2, x, mask, shift;
16700 machine_mode mode, orig_mode;
16701 enum memmodel mod_s, mod_f;
16702 bool is_weak;
16703
16704 boolval = operands[0];
16705 retval = operands[1];
16706 mem = operands[2];
16707 oldval = operands[3];
16708 newval = operands[4];
16709 is_weak = (INTVAL (operands[5]) != 0);
16710 mod_s = memmodel_base (INTVAL (operands[6]));
16711 mod_f = memmodel_base (INTVAL (operands[7]));
16712 orig_mode = mode = GET_MODE (mem);
16713
16714 mask = shift = NULL_RTX;
16715 if (mode == QImode || mode == HImode)
16716 {
16717 /* Before power8, we didn't have access to lbarx/lharx, so generate a
16718 lwarx and shift/mask operations. With power8, we need to do the
16719 comparison in SImode, but the store is still done in QI/HImode. */
16720 oldval = convert_modes (SImode, mode, oldval, 1);
16721
16722 if (!TARGET_SYNC_HI_QI)
16723 {
16724 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16725
16726 /* Shift and mask OLDVAL into position with the word. */
16727 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
16728 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16729
16730 /* Shift and mask NEWVAL into position within the word. */
16731 newval = convert_modes (SImode, mode, newval, 1);
16732 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
16733 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16734 }
16735
16736 /* Prepare to adjust the return value. */
16737 retval = gen_reg_rtx (SImode);
16738 mode = SImode;
16739 }
16740 else if (reg_overlap_mentioned_p (retval, oldval))
16741 oldval = copy_to_reg (oldval);
16742
16743 if (mode != TImode && !reg_or_short_operand (oldval, mode))
16744 oldval = copy_to_mode_reg (mode, oldval);
16745
16746 if (reg_overlap_mentioned_p (retval, newval))
16747 newval = copy_to_reg (newval);
16748
16749 mem = rs6000_pre_atomic_barrier (mem, mod_s);
16750
16751 label1 = NULL_RTX;
16752 if (!is_weak)
16753 {
16754 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16755 emit_label (XEXP (label1, 0));
16756 }
16757 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16758
16759 emit_load_locked (mode, retval, mem);
16760
16761 x = retval;
16762 if (mask)
16763 x = expand_simple_binop (SImode, AND, retval, mask,
16764 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16765
16766 cond = gen_reg_rtx (CCmode);
16767 /* If we have TImode, synthesize a comparison. */
16768 if (mode != TImode)
16769 x = gen_rtx_COMPARE (CCmode, x, oldval);
16770 else
16771 {
16772 rtx xor1_result = gen_reg_rtx (DImode);
16773 rtx xor2_result = gen_reg_rtx (DImode);
16774 rtx or_result = gen_reg_rtx (DImode);
16775 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
16776 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
16777 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
16778 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
16779
16780 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
16781 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
16782 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
16783 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
16784 }
16785
16786 emit_insn (gen_rtx_SET (cond, x));
16787
16788 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16789 emit_unlikely_jump (x, label2);
16790
16791 x = newval;
16792 if (mask)
16793 x = rs6000_mask_atomic_subword (retval, newval, mask);
16794
16795 emit_store_conditional (orig_mode, cond, mem, x);
16796
16797 if (!is_weak)
16798 {
16799 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16800 emit_unlikely_jump (x, label1);
16801 }
16802
16803 if (!is_mm_relaxed (mod_f))
16804 emit_label (XEXP (label2, 0));
16805
16806 rs6000_post_atomic_barrier (mod_s);
16807
16808 if (is_mm_relaxed (mod_f))
16809 emit_label (XEXP (label2, 0));
16810
16811 if (shift)
16812 rs6000_finish_atomic_subword (operands[1], retval, shift);
16813 else if (mode != GET_MODE (operands[1]))
16814 convert_move (operands[1], retval, 1);
16815
16816 /* In all cases, CR0 contains EQ on success, and NE on failure. */
16817 x = gen_rtx_EQ (SImode, cond, const0_rtx);
16818 emit_insn (gen_rtx_SET (boolval, x));
16819 }
16820
16821 /* Expand an atomic exchange operation. */
16822
16823 void
16824 rs6000_expand_atomic_exchange (rtx operands[])
16825 {
16826 rtx retval, mem, val, cond;
16827 machine_mode mode;
16828 enum memmodel model;
16829 rtx label, x, mask, shift;
16830
16831 retval = operands[0];
16832 mem = operands[1];
16833 val = operands[2];
16834 model = memmodel_base (INTVAL (operands[3]));
16835 mode = GET_MODE (mem);
16836
16837 mask = shift = NULL_RTX;
16838 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
16839 {
16840 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16841
16842 /* Shift and mask VAL into position with the word. */
16843 val = convert_modes (SImode, mode, val, 1);
16844 val = expand_simple_binop (SImode, ASHIFT, val, shift,
16845 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16846
16847 /* Prepare to adjust the return value. */
16848 retval = gen_reg_rtx (SImode);
16849 mode = SImode;
16850 }
16851
16852 mem = rs6000_pre_atomic_barrier (mem, model);
16853
16854 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16855 emit_label (XEXP (label, 0));
16856
16857 emit_load_locked (mode, retval, mem);
16858
16859 x = val;
16860 if (mask)
16861 x = rs6000_mask_atomic_subword (retval, val, mask);
16862
16863 cond = gen_reg_rtx (CCmode);
16864 emit_store_conditional (mode, cond, mem, x);
16865
16866 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16867 emit_unlikely_jump (x, label);
16868
16869 rs6000_post_atomic_barrier (model);
16870
16871 if (shift)
16872 rs6000_finish_atomic_subword (operands[0], retval, shift);
16873 }
16874
16875 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
16876 to perform. MEM is the memory on which to operate. VAL is the second
16877 operand of the binary operator. BEFORE and AFTER are optional locations to
16878 return the value of MEM either before of after the operation. MODEL_RTX
16879 is a CONST_INT containing the memory model to use. */
16880
16881 void
16882 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
16883 rtx orig_before, rtx orig_after, rtx model_rtx)
16884 {
16885 enum memmodel model = memmodel_base (INTVAL (model_rtx));
16886 machine_mode mode = GET_MODE (mem);
16887 machine_mode store_mode = mode;
16888 rtx label, x, cond, mask, shift;
16889 rtx before = orig_before, after = orig_after;
16890
16891 mask = shift = NULL_RTX;
16892 /* On power8, we want to use SImode for the operation. On previous systems,
16893 use the operation in a subword and shift/mask to get the proper byte or
16894 halfword. */
16895 if (mode == QImode || mode == HImode)
16896 {
16897 if (TARGET_SYNC_HI_QI)
16898 {
16899 val = convert_modes (SImode, mode, val, 1);
16900
16901 /* Prepare to adjust the return value. */
16902 before = gen_reg_rtx (SImode);
16903 if (after)
16904 after = gen_reg_rtx (SImode);
16905 mode = SImode;
16906 }
16907 else
16908 {
16909 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16910
16911 /* Shift and mask VAL into position with the word. */
16912 val = convert_modes (SImode, mode, val, 1);
16913 val = expand_simple_binop (SImode, ASHIFT, val, shift,
16914 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16915
16916 switch (code)
16917 {
16918 case IOR:
16919 case XOR:
16920 /* We've already zero-extended VAL. That is sufficient to
16921 make certain that it does not affect other bits. */
16922 mask = NULL;
16923 break;
16924
16925 case AND:
16926 /* If we make certain that all of the other bits in VAL are
16927 set, that will be sufficient to not affect other bits. */
16928 x = gen_rtx_NOT (SImode, mask);
16929 x = gen_rtx_IOR (SImode, x, val);
16930 emit_insn (gen_rtx_SET (val, x));
16931 mask = NULL;
16932 break;
16933
16934 case NOT:
16935 case PLUS:
16936 case MINUS:
16937 /* These will all affect bits outside the field and need
16938 adjustment via MASK within the loop. */
16939 break;
16940
16941 default:
16942 gcc_unreachable ();
16943 }
16944
16945 /* Prepare to adjust the return value. */
16946 before = gen_reg_rtx (SImode);
16947 if (after)
16948 after = gen_reg_rtx (SImode);
16949 store_mode = mode = SImode;
16950 }
16951 }
16952
16953 mem = rs6000_pre_atomic_barrier (mem, model);
16954
16955 label = gen_label_rtx ();
16956 emit_label (label);
16957 label = gen_rtx_LABEL_REF (VOIDmode, label);
16958
16959 if (before == NULL_RTX)
16960 before = gen_reg_rtx (mode);
16961
16962 emit_load_locked (mode, before, mem);
16963
16964 if (code == NOT)
16965 {
16966 x = expand_simple_binop (mode, AND, before, val,
16967 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16968 after = expand_simple_unop (mode, NOT, x, after, 1);
16969 }
16970 else
16971 {
16972 after = expand_simple_binop (mode, code, before, val,
16973 after, 1, OPTAB_LIB_WIDEN);
16974 }
16975
16976 x = after;
16977 if (mask)
16978 {
16979 x = expand_simple_binop (SImode, AND, after, mask,
16980 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16981 x = rs6000_mask_atomic_subword (before, x, mask);
16982 }
16983 else if (store_mode != mode)
16984 x = convert_modes (store_mode, mode, x, 1);
16985
16986 cond = gen_reg_rtx (CCmode);
16987 emit_store_conditional (store_mode, cond, mem, x);
16988
16989 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16990 emit_unlikely_jump (x, label);
16991
16992 rs6000_post_atomic_barrier (model);
16993
16994 if (shift)
16995 {
16996 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
16997 then do the calcuations in a SImode register. */
16998 if (orig_before)
16999 rs6000_finish_atomic_subword (orig_before, before, shift);
17000 if (orig_after)
17001 rs6000_finish_atomic_subword (orig_after, after, shift);
17002 }
17003 else if (store_mode != mode)
17004 {
17005 /* QImode/HImode on machines with lbarx/lharx where we do the native
17006 operation and then do the calcuations in a SImode register. */
17007 if (orig_before)
17008 convert_move (orig_before, before, 1);
17009 if (orig_after)
17010 convert_move (orig_after, after, 1);
17011 }
17012 else if (orig_after && after != orig_after)
17013 emit_move_insn (orig_after, after);
17014 }
17015
17016 static GTY(()) alias_set_type TOC_alias_set = -1;
17017
17018 alias_set_type
17019 get_TOC_alias_set (void)
17020 {
17021 if (TOC_alias_set == -1)
17022 TOC_alias_set = new_alias_set ();
17023 return TOC_alias_set;
17024 }
17025
17026 /* The mode the ABI uses for a word. This is not the same as word_mode
17027 for -m32 -mpowerpc64. This is used to implement various target hooks. */
17028
17029 static scalar_int_mode
17030 rs6000_abi_word_mode (void)
17031 {
17032 return TARGET_32BIT ? SImode : DImode;
17033 }
17034
17035 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
17036 static char *
17037 rs6000_offload_options (void)
17038 {
17039 if (TARGET_64BIT)
17040 return xstrdup ("-foffload-abi=lp64");
17041 else
17042 return xstrdup ("-foffload-abi=ilp32");
17043 }
17044
17045 \f
17046 /* A quick summary of the various types of 'constant-pool tables'
17047 under PowerPC:
17048
17049 Target Flags Name One table per
17050 AIX (none) AIX TOC object file
17051 AIX -mfull-toc AIX TOC object file
17052 AIX -mminimal-toc AIX minimal TOC translation unit
17053 SVR4/EABI (none) SVR4 SDATA object file
17054 SVR4/EABI -fpic SVR4 pic object file
17055 SVR4/EABI -fPIC SVR4 PIC translation unit
17056 SVR4/EABI -mrelocatable EABI TOC function
17057 SVR4/EABI -maix AIX TOC object file
17058 SVR4/EABI -maix -mminimal-toc
17059 AIX minimal TOC translation unit
17060
17061 Name Reg. Set by entries contains:
17062 made by addrs? fp? sum?
17063
17064 AIX TOC 2 crt0 as Y option option
17065 AIX minimal TOC 30 prolog gcc Y Y option
17066 SVR4 SDATA 13 crt0 gcc N Y N
17067 SVR4 pic 30 prolog ld Y not yet N
17068 SVR4 PIC 30 prolog gcc Y option option
17069 EABI TOC 30 prolog gcc Y option option
17070
17071 */
17072
17073 /* Hash functions for the hash table. */
17074
17075 static unsigned
17076 rs6000_hash_constant (rtx k)
17077 {
17078 enum rtx_code code = GET_CODE (k);
17079 machine_mode mode = GET_MODE (k);
17080 unsigned result = (code << 3) ^ mode;
17081 const char *format;
17082 int flen, fidx;
17083
17084 format = GET_RTX_FORMAT (code);
17085 flen = strlen (format);
17086 fidx = 0;
17087
17088 switch (code)
17089 {
17090 case LABEL_REF:
17091 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
17092
17093 case CONST_WIDE_INT:
17094 {
17095 int i;
17096 flen = CONST_WIDE_INT_NUNITS (k);
17097 for (i = 0; i < flen; i++)
17098 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
17099 return result;
17100 }
17101
17102 case CONST_DOUBLE:
17103 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
17104
17105 case CODE_LABEL:
17106 fidx = 3;
17107 break;
17108
17109 default:
17110 break;
17111 }
17112
17113 for (; fidx < flen; fidx++)
17114 switch (format[fidx])
17115 {
17116 case 's':
17117 {
17118 unsigned i, len;
17119 const char *str = XSTR (k, fidx);
17120 len = strlen (str);
17121 result = result * 613 + len;
17122 for (i = 0; i < len; i++)
17123 result = result * 613 + (unsigned) str[i];
17124 break;
17125 }
17126 case 'u':
17127 case 'e':
17128 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
17129 break;
17130 case 'i':
17131 case 'n':
17132 result = result * 613 + (unsigned) XINT (k, fidx);
17133 break;
17134 case 'w':
17135 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
17136 result = result * 613 + (unsigned) XWINT (k, fidx);
17137 else
17138 {
17139 size_t i;
17140 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
17141 result = result * 613 + (unsigned) (XWINT (k, fidx)
17142 >> CHAR_BIT * i);
17143 }
17144 break;
17145 case '0':
17146 break;
17147 default:
17148 gcc_unreachable ();
17149 }
17150
17151 return result;
17152 }
17153
17154 hashval_t
17155 toc_hasher::hash (toc_hash_struct *thc)
17156 {
17157 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
17158 }
17159
17160 /* Compare H1 and H2 for equivalence. */
17161
17162 bool
17163 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
17164 {
17165 rtx r1 = h1->key;
17166 rtx r2 = h2->key;
17167
17168 if (h1->key_mode != h2->key_mode)
17169 return 0;
17170
17171 return rtx_equal_p (r1, r2);
17172 }
17173
17174 /* These are the names given by the C++ front-end to vtables, and
17175 vtable-like objects. Ideally, this logic should not be here;
17176 instead, there should be some programmatic way of inquiring as
17177 to whether or not an object is a vtable. */
17178
17179 #define VTABLE_NAME_P(NAME) \
17180 (startswith (name, "_vt.") \
17181 || startswith (name, "_ZTV") \
17182 || startswith (name, "_ZTT") \
17183 || startswith (name, "_ZTI") \
17184 || startswith (name, "_ZTC"))
17185
17186 #ifdef NO_DOLLAR_IN_LABEL
17187 /* Return a GGC-allocated character string translating dollar signs in
17188 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
17189
17190 const char *
17191 rs6000_xcoff_strip_dollar (const char *name)
17192 {
17193 char *strip, *p;
17194 const char *q;
17195 size_t len;
17196
17197 q = (const char *) strchr (name, '$');
17198
17199 if (q == 0 || q == name)
17200 return name;
17201
17202 len = strlen (name);
17203 strip = XALLOCAVEC (char, len + 1);
17204 strcpy (strip, name);
17205 p = strip + (q - name);
17206 while (p)
17207 {
17208 *p = '_';
17209 p = strchr (p + 1, '$');
17210 }
17211
17212 return ggc_alloc_string (strip, len);
17213 }
17214 #endif
17215
17216 void
17217 rs6000_output_symbol_ref (FILE *file, rtx x)
17218 {
17219 const char *name = XSTR (x, 0);
17220
17221 /* Currently C++ toc references to vtables can be emitted before it
17222 is decided whether the vtable is public or private. If this is
17223 the case, then the linker will eventually complain that there is
17224 a reference to an unknown section. Thus, for vtables only,
17225 we emit the TOC reference to reference the identifier and not the
17226 symbol. */
17227 if (VTABLE_NAME_P (name))
17228 {
17229 RS6000_OUTPUT_BASENAME (file, name);
17230 }
17231 else
17232 assemble_name (file, name);
17233 }
17234
17235 /* Output a TOC entry. We derive the entry name from what is being
17236 written. */
17237
17238 void
17239 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
17240 {
17241 char buf[256];
17242 const char *name = buf;
17243 rtx base = x;
17244 HOST_WIDE_INT offset = 0;
17245
17246 gcc_assert (!TARGET_NO_TOC_OR_PCREL);
17247
17248 /* When the linker won't eliminate them, don't output duplicate
17249 TOC entries (this happens on AIX if there is any kind of TOC,
17250 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
17251 CODE_LABELs. */
17252 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
17253 {
17254 struct toc_hash_struct *h;
17255
17256 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
17257 time because GGC is not initialized at that point. */
17258 if (toc_hash_table == NULL)
17259 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
17260
17261 h = ggc_alloc<toc_hash_struct> ();
17262 h->key = x;
17263 h->key_mode = mode;
17264 h->labelno = labelno;
17265
17266 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
17267 if (*found == NULL)
17268 *found = h;
17269 else /* This is indeed a duplicate.
17270 Set this label equal to that label. */
17271 {
17272 fputs ("\t.set ", file);
17273 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
17274 fprintf (file, "%d,", labelno);
17275 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
17276 fprintf (file, "%d\n", ((*found)->labelno));
17277
17278 #ifdef HAVE_AS_TLS
17279 if (TARGET_XCOFF && SYMBOL_REF_P (x)
17280 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
17281 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
17282 {
17283 fputs ("\t.set ", file);
17284 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
17285 fprintf (file, "%d,", labelno);
17286 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
17287 fprintf (file, "%d\n", ((*found)->labelno));
17288 }
17289 #endif
17290 return;
17291 }
17292 }
17293
17294 /* If we're going to put a double constant in the TOC, make sure it's
17295 aligned properly when strict alignment is on. */
17296 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
17297 && STRICT_ALIGNMENT
17298 && GET_MODE_BITSIZE (mode) >= 64
17299 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
17300 ASM_OUTPUT_ALIGN (file, 3);
17301 }
17302
17303 (*targetm.asm_out.internal_label) (file, "LC", labelno);
17304
17305 /* Handle FP constants specially. Note that if we have a minimal
17306 TOC, things we put here aren't actually in the TOC, so we can allow
17307 FP constants. */
17308 if (CONST_DOUBLE_P (x)
17309 && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
17310 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
17311 {
17312 long k[4];
17313
17314 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17315 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
17316 else
17317 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
17318
17319 if (TARGET_64BIT)
17320 {
17321 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17322 fputs (DOUBLE_INT_ASM_OP, file);
17323 else
17324 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17325 k[0] & 0xffffffff, k[1] & 0xffffffff,
17326 k[2] & 0xffffffff, k[3] & 0xffffffff);
17327 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
17328 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
17329 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
17330 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
17331 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
17332 return;
17333 }
17334 else
17335 {
17336 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17337 fputs ("\t.long ", file);
17338 else
17339 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17340 k[0] & 0xffffffff, k[1] & 0xffffffff,
17341 k[2] & 0xffffffff, k[3] & 0xffffffff);
17342 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
17343 k[0] & 0xffffffff, k[1] & 0xffffffff,
17344 k[2] & 0xffffffff, k[3] & 0xffffffff);
17345 return;
17346 }
17347 }
17348 else if (CONST_DOUBLE_P (x)
17349 && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
17350 {
17351 long k[2];
17352
17353 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17354 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
17355 else
17356 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
17357
17358 if (TARGET_64BIT)
17359 {
17360 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17361 fputs (DOUBLE_INT_ASM_OP, file);
17362 else
17363 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
17364 k[0] & 0xffffffff, k[1] & 0xffffffff);
17365 fprintf (file, "0x%lx%08lx\n",
17366 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
17367 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
17368 return;
17369 }
17370 else
17371 {
17372 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17373 fputs ("\t.long ", file);
17374 else
17375 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
17376 k[0] & 0xffffffff, k[1] & 0xffffffff);
17377 fprintf (file, "0x%lx,0x%lx\n",
17378 k[0] & 0xffffffff, k[1] & 0xffffffff);
17379 return;
17380 }
17381 }
17382 else if (CONST_DOUBLE_P (x)
17383 && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
17384 {
17385 long l;
17386
17387 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17388 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
17389 else
17390 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17391
17392 if (TARGET_64BIT)
17393 {
17394 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17395 fputs (DOUBLE_INT_ASM_OP, file);
17396 else
17397 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
17398 if (WORDS_BIG_ENDIAN)
17399 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
17400 else
17401 fprintf (file, "0x%lx\n", l & 0xffffffff);
17402 return;
17403 }
17404 else
17405 {
17406 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17407 fputs ("\t.long ", file);
17408 else
17409 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
17410 fprintf (file, "0x%lx\n", l & 0xffffffff);
17411 return;
17412 }
17413 }
17414 else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
17415 {
17416 unsigned HOST_WIDE_INT low;
17417 HOST_WIDE_INT high;
17418
17419 low = INTVAL (x) & 0xffffffff;
17420 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
17421
17422 /* TOC entries are always Pmode-sized, so when big-endian
17423 smaller integer constants in the TOC need to be padded.
17424 (This is still a win over putting the constants in
17425 a separate constant pool, because then we'd have
17426 to have both a TOC entry _and_ the actual constant.)
17427
17428 For a 32-bit target, CONST_INT values are loaded and shifted
17429 entirely within `low' and can be stored in one TOC entry. */
17430
17431 /* It would be easy to make this work, but it doesn't now. */
17432 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
17433
17434 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
17435 {
17436 low |= high << 32;
17437 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
17438 high = (HOST_WIDE_INT) low >> 32;
17439 low &= 0xffffffff;
17440 }
17441
17442 if (TARGET_64BIT)
17443 {
17444 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17445 fputs (DOUBLE_INT_ASM_OP, file);
17446 else
17447 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
17448 (long) high & 0xffffffff, (long) low & 0xffffffff);
17449 fprintf (file, "0x%lx%08lx\n",
17450 (long) high & 0xffffffff, (long) low & 0xffffffff);
17451 return;
17452 }
17453 else
17454 {
17455 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
17456 {
17457 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17458 fputs ("\t.long ", file);
17459 else
17460 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
17461 (long) high & 0xffffffff, (long) low & 0xffffffff);
17462 fprintf (file, "0x%lx,0x%lx\n",
17463 (long) high & 0xffffffff, (long) low & 0xffffffff);
17464 }
17465 else
17466 {
17467 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17468 fputs ("\t.long ", file);
17469 else
17470 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
17471 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
17472 }
17473 return;
17474 }
17475 }
17476
17477 if (GET_CODE (x) == CONST)
17478 {
17479 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
17480 && CONST_INT_P (XEXP (XEXP (x, 0), 1)));
17481
17482 base = XEXP (XEXP (x, 0), 0);
17483 offset = INTVAL (XEXP (XEXP (x, 0), 1));
17484 }
17485
17486 switch (GET_CODE (base))
17487 {
17488 case SYMBOL_REF:
17489 name = XSTR (base, 0);
17490 break;
17491
17492 case LABEL_REF:
17493 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
17494 CODE_LABEL_NUMBER (XEXP (base, 0)));
17495 break;
17496
17497 case CODE_LABEL:
17498 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
17499 break;
17500
17501 default:
17502 gcc_unreachable ();
17503 }
17504
17505 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17506 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
17507 else
17508 {
17509 fputs ("\t.tc ", file);
17510 RS6000_OUTPUT_BASENAME (file, name);
17511
17512 if (offset < 0)
17513 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
17514 else if (offset)
17515 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
17516
17517 /* Mark large TOC symbols on AIX with [TE] so they are mapped
17518 after other TOC symbols, reducing overflow of small TOC access
17519 to [TC] symbols. */
17520 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
17521 ? "[TE]," : "[TC],", file);
17522 }
17523
17524 /* Currently C++ toc references to vtables can be emitted before it
17525 is decided whether the vtable is public or private. If this is
17526 the case, then the linker will eventually complain that there is
17527 a TOC reference to an unknown section. Thus, for vtables only,
17528 we emit the TOC reference to reference the symbol and not the
17529 section. */
17530 if (VTABLE_NAME_P (name))
17531 {
17532 RS6000_OUTPUT_BASENAME (file, name);
17533 if (offset < 0)
17534 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
17535 else if (offset > 0)
17536 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
17537 }
17538 else
17539 output_addr_const (file, x);
17540
17541 #if HAVE_AS_TLS
17542 if (TARGET_XCOFF && SYMBOL_REF_P (base))
17543 {
17544 switch (SYMBOL_REF_TLS_MODEL (base))
17545 {
17546 case 0:
17547 break;
17548 case TLS_MODEL_LOCAL_EXEC:
17549 fputs ("@le", file);
17550 break;
17551 case TLS_MODEL_INITIAL_EXEC:
17552 fputs ("@ie", file);
17553 break;
17554 /* Use global-dynamic for local-dynamic. */
17555 case TLS_MODEL_GLOBAL_DYNAMIC:
17556 case TLS_MODEL_LOCAL_DYNAMIC:
17557 putc ('\n', file);
17558 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
17559 fputs ("\t.tc .", file);
17560 RS6000_OUTPUT_BASENAME (file, name);
17561 fputs ("[TC],", file);
17562 output_addr_const (file, x);
17563 fputs ("@m", file);
17564 break;
17565 default:
17566 gcc_unreachable ();
17567 }
17568 }
17569 #endif
17570
17571 putc ('\n', file);
17572 }
17573 \f
17574 /* Output an assembler pseudo-op to write an ASCII string of N characters
17575 starting at P to FILE.
17576
17577 On the RS/6000, we have to do this using the .byte operation and
17578 write out special characters outside the quoted string.
17579 Also, the assembler is broken; very long strings are truncated,
17580 so we must artificially break them up early. */
17581
17582 void
17583 output_ascii (FILE *file, const char *p, int n)
17584 {
17585 char c;
17586 int i, count_string;
17587 const char *for_string = "\t.byte \"";
17588 const char *for_decimal = "\t.byte ";
17589 const char *to_close = NULL;
17590
17591 count_string = 0;
17592 for (i = 0; i < n; i++)
17593 {
17594 c = *p++;
17595 if (c >= ' ' && c < 0177)
17596 {
17597 if (for_string)
17598 fputs (for_string, file);
17599 putc (c, file);
17600
17601 /* Write two quotes to get one. */
17602 if (c == '"')
17603 {
17604 putc (c, file);
17605 ++count_string;
17606 }
17607
17608 for_string = NULL;
17609 for_decimal = "\"\n\t.byte ";
17610 to_close = "\"\n";
17611 ++count_string;
17612
17613 if (count_string >= 512)
17614 {
17615 fputs (to_close, file);
17616
17617 for_string = "\t.byte \"";
17618 for_decimal = "\t.byte ";
17619 to_close = NULL;
17620 count_string = 0;
17621 }
17622 }
17623 else
17624 {
17625 if (for_decimal)
17626 fputs (for_decimal, file);
17627 fprintf (file, "%d", c);
17628
17629 for_string = "\n\t.byte \"";
17630 for_decimal = ", ";
17631 to_close = "\n";
17632 count_string = 0;
17633 }
17634 }
17635
17636 /* Now close the string if we have written one. Then end the line. */
17637 if (to_close)
17638 fputs (to_close, file);
17639 }
17640 \f
17641 /* Generate a unique section name for FILENAME for a section type
17642 represented by SECTION_DESC. Output goes into BUF.
17643
17644 SECTION_DESC can be any string, as long as it is different for each
17645 possible section type.
17646
17647 We name the section in the same manner as xlc. The name begins with an
17648 underscore followed by the filename (after stripping any leading directory
17649 names) with the last period replaced by the string SECTION_DESC. If
17650 FILENAME does not contain a period, SECTION_DESC is appended to the end of
17651 the name. */
17652
17653 void
17654 rs6000_gen_section_name (char **buf, const char *filename,
17655 const char *section_desc)
17656 {
17657 const char *q, *after_last_slash, *last_period = 0;
17658 char *p;
17659 int len;
17660
17661 after_last_slash = filename;
17662 for (q = filename; *q; q++)
17663 {
17664 if (*q == '/')
17665 after_last_slash = q + 1;
17666 else if (*q == '.')
17667 last_period = q;
17668 }
17669
17670 len = strlen (after_last_slash) + strlen (section_desc) + 2;
17671 *buf = (char *) xmalloc (len);
17672
17673 p = *buf;
17674 *p++ = '_';
17675
17676 for (q = after_last_slash; *q; q++)
17677 {
17678 if (q == last_period)
17679 {
17680 strcpy (p, section_desc);
17681 p += strlen (section_desc);
17682 break;
17683 }
17684
17685 else if (ISALNUM (*q))
17686 *p++ = *q;
17687 }
17688
17689 if (last_period == 0)
17690 strcpy (p, section_desc);
17691 else
17692 *p = '\0';
17693 }
17694 \f
17695 /* Emit profile function. */
17696
17697 void
17698 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
17699 {
17700 /* Non-standard profiling for kernels, which just saves LR then calls
17701 _mcount without worrying about arg saves. The idea is to change
17702 the function prologue as little as possible as it isn't easy to
17703 account for arg save/restore code added just for _mcount. */
17704 if (TARGET_PROFILE_KERNEL)
17705 return;
17706
17707 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
17708 {
17709 #ifndef NO_PROFILE_COUNTERS
17710 # define NO_PROFILE_COUNTERS 0
17711 #endif
17712 if (NO_PROFILE_COUNTERS)
17713 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
17714 LCT_NORMAL, VOIDmode);
17715 else
17716 {
17717 char buf[30];
17718 const char *label_name;
17719 rtx fun;
17720
17721 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
17722 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
17723 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
17724
17725 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
17726 LCT_NORMAL, VOIDmode, fun, Pmode);
17727 }
17728 }
17729 else if (DEFAULT_ABI == ABI_DARWIN)
17730 {
17731 const char *mcount_name = RS6000_MCOUNT;
17732 int caller_addr_regno = LR_REGNO;
17733
17734 /* Be conservative and always set this, at least for now. */
17735 crtl->uses_pic_offset_table = 1;
17736
17737 #if TARGET_MACHO
17738 /* For PIC code, set up a stub and collect the caller's address
17739 from r0, which is where the prologue puts it. */
17740 if (MACHOPIC_INDIRECT
17741 && crtl->uses_pic_offset_table)
17742 caller_addr_regno = 0;
17743 #endif
17744 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
17745 LCT_NORMAL, VOIDmode,
17746 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
17747 }
17748 }
17749
17750 /* Write function profiler code. */
17751
17752 void
17753 output_function_profiler (FILE *file, int labelno)
17754 {
17755 char buf[100];
17756
17757 switch (DEFAULT_ABI)
17758 {
17759 default:
17760 gcc_unreachable ();
17761
17762 case ABI_V4:
17763 if (!TARGET_32BIT)
17764 {
17765 warning (0, "no profiling of 64-bit code for this ABI");
17766 return;
17767 }
17768 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
17769 fprintf (file, "\tmflr %s\n", reg_names[0]);
17770 if (NO_PROFILE_COUNTERS)
17771 {
17772 asm_fprintf (file, "\tstw %s,4(%s)\n",
17773 reg_names[0], reg_names[1]);
17774 }
17775 else if (TARGET_SECURE_PLT && flag_pic)
17776 {
17777 if (TARGET_LINK_STACK)
17778 {
17779 char name[32];
17780 get_ppc476_thunk_name (name);
17781 asm_fprintf (file, "\tbl %s\n", name);
17782 }
17783 else
17784 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
17785 asm_fprintf (file, "\tstw %s,4(%s)\n",
17786 reg_names[0], reg_names[1]);
17787 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
17788 asm_fprintf (file, "\taddis %s,%s,",
17789 reg_names[12], reg_names[12]);
17790 assemble_name (file, buf);
17791 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
17792 assemble_name (file, buf);
17793 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
17794 }
17795 else if (flag_pic == 1)
17796 {
17797 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
17798 asm_fprintf (file, "\tstw %s,4(%s)\n",
17799 reg_names[0], reg_names[1]);
17800 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
17801 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
17802 assemble_name (file, buf);
17803 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
17804 }
17805 else if (flag_pic > 1)
17806 {
17807 asm_fprintf (file, "\tstw %s,4(%s)\n",
17808 reg_names[0], reg_names[1]);
17809 /* Now, we need to get the address of the label. */
17810 if (TARGET_LINK_STACK)
17811 {
17812 char name[32];
17813 get_ppc476_thunk_name (name);
17814 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
17815 assemble_name (file, buf);
17816 fputs ("-.\n1:", file);
17817 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
17818 asm_fprintf (file, "\taddi %s,%s,4\n",
17819 reg_names[11], reg_names[11]);
17820 }
17821 else
17822 {
17823 fputs ("\tbcl 20,31,1f\n\t.long ", file);
17824 assemble_name (file, buf);
17825 fputs ("-.\n1:", file);
17826 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
17827 }
17828 asm_fprintf (file, "\tlwz %s,0(%s)\n",
17829 reg_names[0], reg_names[11]);
17830 asm_fprintf (file, "\tadd %s,%s,%s\n",
17831 reg_names[0], reg_names[0], reg_names[11]);
17832 }
17833 else
17834 {
17835 asm_fprintf (file, "\tlis %s,", reg_names[12]);
17836 assemble_name (file, buf);
17837 fputs ("@ha\n", file);
17838 asm_fprintf (file, "\tstw %s,4(%s)\n",
17839 reg_names[0], reg_names[1]);
17840 asm_fprintf (file, "\tla %s,", reg_names[0]);
17841 assemble_name (file, buf);
17842 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
17843 }
17844
17845 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
17846 fprintf (file, "\tbl %s%s\n",
17847 RS6000_MCOUNT, flag_pic ? "@plt" : "");
17848 break;
17849
17850 case ABI_AIX:
17851 case ABI_ELFv2:
17852 case ABI_DARWIN:
17853 /* Don't do anything, done in output_profile_hook (). */
17854 break;
17855 }
17856 }
17857
17858 \f
17859
17860 /* The following variable value is the last issued insn. */
17861
17862 static rtx_insn *last_scheduled_insn;
17863
17864 /* The following variable helps to balance issuing of load and
17865 store instructions */
17866
17867 static int load_store_pendulum;
17868
17869 /* The following variable helps pair divide insns during scheduling. */
17870 static int divide_cnt;
17871 /* The following variable helps pair and alternate vector and vector load
17872 insns during scheduling. */
17873 static int vec_pairing;
17874
17875
17876 /* Power4 load update and store update instructions are cracked into a
17877 load or store and an integer insn which are executed in the same cycle.
17878 Branches have their own dispatch slot which does not count against the
17879 GCC issue rate, but it changes the program flow so there are no other
17880 instructions to issue in this cycle. */
17881
17882 static int
17883 rs6000_variable_issue_1 (rtx_insn *insn, int more)
17884 {
17885 last_scheduled_insn = insn;
17886 if (GET_CODE (PATTERN (insn)) == USE
17887 || GET_CODE (PATTERN (insn)) == CLOBBER)
17888 {
17889 cached_can_issue_more = more;
17890 return cached_can_issue_more;
17891 }
17892
17893 if (insn_terminates_group_p (insn, current_group))
17894 {
17895 cached_can_issue_more = 0;
17896 return cached_can_issue_more;
17897 }
17898
17899 /* If no reservation, but reach here */
17900 if (recog_memoized (insn) < 0)
17901 return more;
17902
17903 if (rs6000_sched_groups)
17904 {
17905 if (is_microcoded_insn (insn))
17906 cached_can_issue_more = 0;
17907 else if (is_cracked_insn (insn))
17908 cached_can_issue_more = more > 2 ? more - 2 : 0;
17909 else
17910 cached_can_issue_more = more - 1;
17911
17912 return cached_can_issue_more;
17913 }
17914
17915 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
17916 return 0;
17917
17918 cached_can_issue_more = more - 1;
17919 return cached_can_issue_more;
17920 }
17921
17922 static int
17923 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
17924 {
17925 int r = rs6000_variable_issue_1 (insn, more);
17926 if (verbose)
17927 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
17928 return r;
17929 }
17930
17931 /* Adjust the cost of a scheduling dependency. Return the new cost of
17932 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
17933
17934 static int
17935 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
17936 unsigned int)
17937 {
17938 enum attr_type attr_type;
17939
17940 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
17941 return cost;
17942
17943 switch (dep_type)
17944 {
17945 case REG_DEP_TRUE:
17946 {
17947 /* Data dependency; DEP_INSN writes a register that INSN reads
17948 some cycles later. */
17949
17950 /* Separate a load from a narrower, dependent store. */
17951 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9
17952 || rs6000_tune == PROCESSOR_POWER10)
17953 && GET_CODE (PATTERN (insn)) == SET
17954 && GET_CODE (PATTERN (dep_insn)) == SET
17955 && MEM_P (XEXP (PATTERN (insn), 1))
17956 && MEM_P (XEXP (PATTERN (dep_insn), 0))
17957 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
17958 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
17959 return cost + 14;
17960
17961 attr_type = get_attr_type (insn);
17962
17963 switch (attr_type)
17964 {
17965 case TYPE_JMPREG:
17966 /* Tell the first scheduling pass about the latency between
17967 a mtctr and bctr (and mtlr and br/blr). The first
17968 scheduling pass will not know about this latency since
17969 the mtctr instruction, which has the latency associated
17970 to it, will be generated by reload. */
17971 return 4;
17972 case TYPE_BRANCH:
17973 /* Leave some extra cycles between a compare and its
17974 dependent branch, to inhibit expensive mispredicts. */
17975 if ((rs6000_tune == PROCESSOR_PPC603
17976 || rs6000_tune == PROCESSOR_PPC604
17977 || rs6000_tune == PROCESSOR_PPC604e
17978 || rs6000_tune == PROCESSOR_PPC620
17979 || rs6000_tune == PROCESSOR_PPC630
17980 || rs6000_tune == PROCESSOR_PPC750
17981 || rs6000_tune == PROCESSOR_PPC7400
17982 || rs6000_tune == PROCESSOR_PPC7450
17983 || rs6000_tune == PROCESSOR_PPCE5500
17984 || rs6000_tune == PROCESSOR_PPCE6500
17985 || rs6000_tune == PROCESSOR_POWER4
17986 || rs6000_tune == PROCESSOR_POWER5
17987 || rs6000_tune == PROCESSOR_POWER7
17988 || rs6000_tune == PROCESSOR_POWER8
17989 || rs6000_tune == PROCESSOR_POWER9
17990 || rs6000_tune == PROCESSOR_POWER10
17991 || rs6000_tune == PROCESSOR_CELL)
17992 && recog_memoized (dep_insn)
17993 && (INSN_CODE (dep_insn) >= 0))
17994
17995 switch (get_attr_type (dep_insn))
17996 {
17997 case TYPE_CMP:
17998 case TYPE_FPCOMPARE:
17999 case TYPE_CR_LOGICAL:
18000 return cost + 2;
18001 case TYPE_EXTS:
18002 case TYPE_MUL:
18003 if (get_attr_dot (dep_insn) == DOT_YES)
18004 return cost + 2;
18005 else
18006 break;
18007 case TYPE_SHIFT:
18008 if (get_attr_dot (dep_insn) == DOT_YES
18009 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
18010 return cost + 2;
18011 else
18012 break;
18013 default:
18014 break;
18015 }
18016 break;
18017
18018 case TYPE_STORE:
18019 case TYPE_FPSTORE:
18020 if ((rs6000_tune == PROCESSOR_POWER6)
18021 && recog_memoized (dep_insn)
18022 && (INSN_CODE (dep_insn) >= 0))
18023 {
18024
18025 if (GET_CODE (PATTERN (insn)) != SET)
18026 /* If this happens, we have to extend this to schedule
18027 optimally. Return default for now. */
18028 return cost;
18029
18030 /* Adjust the cost for the case where the value written
18031 by a fixed point operation is used as the address
18032 gen value on a store. */
18033 switch (get_attr_type (dep_insn))
18034 {
18035 case TYPE_LOAD:
18036 case TYPE_CNTLZ:
18037 {
18038 if (! rs6000_store_data_bypass_p (dep_insn, insn))
18039 return get_attr_sign_extend (dep_insn)
18040 == SIGN_EXTEND_YES ? 6 : 4;
18041 break;
18042 }
18043 case TYPE_SHIFT:
18044 {
18045 if (! rs6000_store_data_bypass_p (dep_insn, insn))
18046 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
18047 6 : 3;
18048 break;
18049 }
18050 case TYPE_INTEGER:
18051 case TYPE_ADD:
18052 case TYPE_LOGICAL:
18053 case TYPE_EXTS:
18054 case TYPE_INSERT:
18055 {
18056 if (! rs6000_store_data_bypass_p (dep_insn, insn))
18057 return 3;
18058 break;
18059 }
18060 case TYPE_STORE:
18061 case TYPE_FPLOAD:
18062 case TYPE_FPSTORE:
18063 {
18064 if (get_attr_update (dep_insn) == UPDATE_YES
18065 && ! rs6000_store_data_bypass_p (dep_insn, insn))
18066 return 3;
18067 break;
18068 }
18069 case TYPE_MUL:
18070 {
18071 if (! rs6000_store_data_bypass_p (dep_insn, insn))
18072 return 17;
18073 break;
18074 }
18075 case TYPE_DIV:
18076 {
18077 if (! rs6000_store_data_bypass_p (dep_insn, insn))
18078 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
18079 break;
18080 }
18081 default:
18082 break;
18083 }
18084 }
18085 break;
18086
18087 case TYPE_LOAD:
18088 if ((rs6000_tune == PROCESSOR_POWER6)
18089 && recog_memoized (dep_insn)
18090 && (INSN_CODE (dep_insn) >= 0))
18091 {
18092
18093 /* Adjust the cost for the case where the value written
18094 by a fixed point instruction is used within the address
18095 gen portion of a subsequent load(u)(x) */
18096 switch (get_attr_type (dep_insn))
18097 {
18098 case TYPE_LOAD:
18099 case TYPE_CNTLZ:
18100 {
18101 if (set_to_load_agen (dep_insn, insn))
18102 return get_attr_sign_extend (dep_insn)
18103 == SIGN_EXTEND_YES ? 6 : 4;
18104 break;
18105 }
18106 case TYPE_SHIFT:
18107 {
18108 if (set_to_load_agen (dep_insn, insn))
18109 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
18110 6 : 3;
18111 break;
18112 }
18113 case TYPE_INTEGER:
18114 case TYPE_ADD:
18115 case TYPE_LOGICAL:
18116 case TYPE_EXTS:
18117 case TYPE_INSERT:
18118 {
18119 if (set_to_load_agen (dep_insn, insn))
18120 return 3;
18121 break;
18122 }
18123 case TYPE_STORE:
18124 case TYPE_FPLOAD:
18125 case TYPE_FPSTORE:
18126 {
18127 if (get_attr_update (dep_insn) == UPDATE_YES
18128 && set_to_load_agen (dep_insn, insn))
18129 return 3;
18130 break;
18131 }
18132 case TYPE_MUL:
18133 {
18134 if (set_to_load_agen (dep_insn, insn))
18135 return 17;
18136 break;
18137 }
18138 case TYPE_DIV:
18139 {
18140 if (set_to_load_agen (dep_insn, insn))
18141 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
18142 break;
18143 }
18144 default:
18145 break;
18146 }
18147 }
18148 break;
18149
18150 default:
18151 break;
18152 }
18153
18154 /* Fall out to return default cost. */
18155 }
18156 break;
18157
18158 case REG_DEP_OUTPUT:
18159 /* Output dependency; DEP_INSN writes a register that INSN writes some
18160 cycles later. */
18161 if ((rs6000_tune == PROCESSOR_POWER6)
18162 && recog_memoized (dep_insn)
18163 && (INSN_CODE (dep_insn) >= 0))
18164 {
18165 attr_type = get_attr_type (insn);
18166
18167 switch (attr_type)
18168 {
18169 case TYPE_FP:
18170 case TYPE_FPSIMPLE:
18171 if (get_attr_type (dep_insn) == TYPE_FP
18172 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
18173 return 1;
18174 break;
18175 default:
18176 break;
18177 }
18178 }
18179 /* Fall through, no cost for output dependency. */
18180 /* FALLTHRU */
18181
18182 case REG_DEP_ANTI:
18183 /* Anti dependency; DEP_INSN reads a register that INSN writes some
18184 cycles later. */
18185 return 0;
18186
18187 default:
18188 gcc_unreachable ();
18189 }
18190
18191 return cost;
18192 }
18193
18194 /* Debug version of rs6000_adjust_cost. */
18195
18196 static int
18197 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
18198 int cost, unsigned int dw)
18199 {
18200 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
18201
18202 if (ret != cost)
18203 {
18204 const char *dep;
18205
18206 switch (dep_type)
18207 {
18208 default: dep = "unknown depencency"; break;
18209 case REG_DEP_TRUE: dep = "data dependency"; break;
18210 case REG_DEP_OUTPUT: dep = "output dependency"; break;
18211 case REG_DEP_ANTI: dep = "anti depencency"; break;
18212 }
18213
18214 fprintf (stderr,
18215 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
18216 "%s, insn:\n", ret, cost, dep);
18217
18218 debug_rtx (insn);
18219 }
18220
18221 return ret;
18222 }
18223
18224 /* The function returns a true if INSN is microcoded.
18225 Return false otherwise. */
18226
18227 static bool
18228 is_microcoded_insn (rtx_insn *insn)
18229 {
18230 if (!insn || !NONDEBUG_INSN_P (insn)
18231 || GET_CODE (PATTERN (insn)) == USE
18232 || GET_CODE (PATTERN (insn)) == CLOBBER)
18233 return false;
18234
18235 if (rs6000_tune == PROCESSOR_CELL)
18236 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
18237
18238 if (rs6000_sched_groups
18239 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
18240 {
18241 enum attr_type type = get_attr_type (insn);
18242 if ((type == TYPE_LOAD
18243 && get_attr_update (insn) == UPDATE_YES
18244 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
18245 || ((type == TYPE_LOAD || type == TYPE_STORE)
18246 && get_attr_update (insn) == UPDATE_YES
18247 && get_attr_indexed (insn) == INDEXED_YES)
18248 || type == TYPE_MFCR)
18249 return true;
18250 }
18251
18252 return false;
18253 }
18254
18255 /* The function returns true if INSN is cracked into 2 instructions
18256 by the processor (and therefore occupies 2 issue slots). */
18257
18258 static bool
18259 is_cracked_insn (rtx_insn *insn)
18260 {
18261 if (!insn || !NONDEBUG_INSN_P (insn)
18262 || GET_CODE (PATTERN (insn)) == USE
18263 || GET_CODE (PATTERN (insn)) == CLOBBER)
18264 return false;
18265
18266 if (rs6000_sched_groups
18267 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
18268 {
18269 enum attr_type type = get_attr_type (insn);
18270 if ((type == TYPE_LOAD
18271 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18272 && get_attr_update (insn) == UPDATE_NO)
18273 || (type == TYPE_LOAD
18274 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
18275 && get_attr_update (insn) == UPDATE_YES
18276 && get_attr_indexed (insn) == INDEXED_NO)
18277 || (type == TYPE_STORE
18278 && get_attr_update (insn) == UPDATE_YES
18279 && get_attr_indexed (insn) == INDEXED_NO)
18280 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
18281 && get_attr_update (insn) == UPDATE_YES)
18282 || (type == TYPE_CR_LOGICAL
18283 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
18284 || (type == TYPE_EXTS
18285 && get_attr_dot (insn) == DOT_YES)
18286 || (type == TYPE_SHIFT
18287 && get_attr_dot (insn) == DOT_YES
18288 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
18289 || (type == TYPE_MUL
18290 && get_attr_dot (insn) == DOT_YES)
18291 || type == TYPE_DIV
18292 || (type == TYPE_INSERT
18293 && get_attr_size (insn) == SIZE_32))
18294 return true;
18295 }
18296
18297 return false;
18298 }
18299
18300 /* The function returns true if INSN can be issued only from
18301 the branch slot. */
18302
18303 static bool
18304 is_branch_slot_insn (rtx_insn *insn)
18305 {
18306 if (!insn || !NONDEBUG_INSN_P (insn)
18307 || GET_CODE (PATTERN (insn)) == USE
18308 || GET_CODE (PATTERN (insn)) == CLOBBER)
18309 return false;
18310
18311 if (rs6000_sched_groups)
18312 {
18313 enum attr_type type = get_attr_type (insn);
18314 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
18315 return true;
18316 return false;
18317 }
18318
18319 return false;
18320 }
18321
18322 /* The function returns true if out_inst sets a value that is
18323 used in the address generation computation of in_insn */
18324 static bool
18325 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
18326 {
18327 rtx out_set, in_set;
18328
18329 /* For performance reasons, only handle the simple case where
18330 both loads are a single_set. */
18331 out_set = single_set (out_insn);
18332 if (out_set)
18333 {
18334 in_set = single_set (in_insn);
18335 if (in_set)
18336 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
18337 }
18338
18339 return false;
18340 }
18341
18342 /* Try to determine base/offset/size parts of the given MEM.
18343 Return true if successful, false if all the values couldn't
18344 be determined.
18345
18346 This function only looks for REG or REG+CONST address forms.
18347 REG+REG address form will return false. */
18348
18349 static bool
18350 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
18351 HOST_WIDE_INT *size)
18352 {
18353 rtx addr_rtx;
18354 if (MEM_SIZE_KNOWN_P (mem))
18355 *size = MEM_SIZE (mem);
18356 else
18357 return false;
18358
18359 addr_rtx = (XEXP (mem, 0));
18360 if (GET_CODE (addr_rtx) == PRE_MODIFY)
18361 addr_rtx = XEXP (addr_rtx, 1);
18362
18363 *offset = 0;
18364 while (GET_CODE (addr_rtx) == PLUS
18365 && CONST_INT_P (XEXP (addr_rtx, 1)))
18366 {
18367 *offset += INTVAL (XEXP (addr_rtx, 1));
18368 addr_rtx = XEXP (addr_rtx, 0);
18369 }
18370 if (!REG_P (addr_rtx))
18371 return false;
18372
18373 *base = addr_rtx;
18374 return true;
18375 }
18376
18377 /* If the target storage locations of arguments MEM1 and MEM2 are
18378 adjacent, then return the argument that has the lower address.
18379 Otherwise, return NULL_RTX. */
18380
18381 static rtx
18382 adjacent_mem_locations (rtx mem1, rtx mem2)
18383 {
18384 rtx reg1, reg2;
18385 HOST_WIDE_INT off1, size1, off2, size2;
18386
18387 if (MEM_P (mem1)
18388 && MEM_P (mem2)
18389 && get_memref_parts (mem1, &reg1, &off1, &size1)
18390 && get_memref_parts (mem2, &reg2, &off2, &size2)
18391 && REGNO (reg1) == REGNO (reg2))
18392 {
18393 if (off1 + size1 == off2)
18394 return mem1;
18395 else if (off2 + size2 == off1)
18396 return mem2;
18397 }
18398
18399 return NULL_RTX;
18400 }
18401
18402 /* This function returns true if it can be determined that the two MEM
18403 locations overlap by at least 1 byte based on base reg/offset/size. */
18404
18405 static bool
18406 mem_locations_overlap (rtx mem1, rtx mem2)
18407 {
18408 rtx reg1, reg2;
18409 HOST_WIDE_INT off1, size1, off2, size2;
18410
18411 if (get_memref_parts (mem1, &reg1, &off1, &size1)
18412 && get_memref_parts (mem2, &reg2, &off2, &size2))
18413 return ((REGNO (reg1) == REGNO (reg2))
18414 && (((off1 <= off2) && (off1 + size1 > off2))
18415 || ((off2 <= off1) && (off2 + size2 > off1))));
18416
18417 return false;
18418 }
18419
18420 /* A C statement (sans semicolon) to update the integer scheduling
18421 priority INSN_PRIORITY (INSN). Increase the priority to execute the
18422 INSN earlier, reduce the priority to execute INSN later. Do not
18423 define this macro if you do not need to adjust the scheduling
18424 priorities of insns. */
18425
18426 static int
18427 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
18428 {
18429 rtx load_mem, str_mem;
18430 /* On machines (like the 750) which have asymmetric integer units,
18431 where one integer unit can do multiply and divides and the other
18432 can't, reduce the priority of multiply/divide so it is scheduled
18433 before other integer operations. */
18434
18435 #if 0
18436 if (! INSN_P (insn))
18437 return priority;
18438
18439 if (GET_CODE (PATTERN (insn)) == USE)
18440 return priority;
18441
18442 switch (rs6000_tune) {
18443 case PROCESSOR_PPC750:
18444 switch (get_attr_type (insn))
18445 {
18446 default:
18447 break;
18448
18449 case TYPE_MUL:
18450 case TYPE_DIV:
18451 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
18452 priority, priority);
18453 if (priority >= 0 && priority < 0x01000000)
18454 priority >>= 3;
18455 break;
18456 }
18457 }
18458 #endif
18459
18460 if (insn_must_be_first_in_group (insn)
18461 && reload_completed
18462 && current_sched_info->sched_max_insns_priority
18463 && rs6000_sched_restricted_insns_priority)
18464 {
18465
18466 /* Prioritize insns that can be dispatched only in the first
18467 dispatch slot. */
18468 if (rs6000_sched_restricted_insns_priority == 1)
18469 /* Attach highest priority to insn. This means that in
18470 haifa-sched.cc:ready_sort(), dispatch-slot restriction considerations
18471 precede 'priority' (critical path) considerations. */
18472 return current_sched_info->sched_max_insns_priority;
18473 else if (rs6000_sched_restricted_insns_priority == 2)
18474 /* Increase priority of insn by a minimal amount. This means that in
18475 haifa-sched.cc:ready_sort(), only 'priority' (critical path)
18476 considerations precede dispatch-slot restriction considerations. */
18477 return (priority + 1);
18478 }
18479
18480 if (rs6000_tune == PROCESSOR_POWER6
18481 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
18482 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
18483 /* Attach highest priority to insn if the scheduler has just issued two
18484 stores and this instruction is a load, or two loads and this instruction
18485 is a store. Power6 wants loads and stores scheduled alternately
18486 when possible */
18487 return current_sched_info->sched_max_insns_priority;
18488
18489 return priority;
18490 }
18491
18492 /* Return true if the instruction is nonpipelined on the Cell. */
18493 static bool
18494 is_nonpipeline_insn (rtx_insn *insn)
18495 {
18496 enum attr_type type;
18497 if (!insn || !NONDEBUG_INSN_P (insn)
18498 || GET_CODE (PATTERN (insn)) == USE
18499 || GET_CODE (PATTERN (insn)) == CLOBBER)
18500 return false;
18501
18502 type = get_attr_type (insn);
18503 if (type == TYPE_MUL
18504 || type == TYPE_DIV
18505 || type == TYPE_SDIV
18506 || type == TYPE_DDIV
18507 || type == TYPE_SSQRT
18508 || type == TYPE_DSQRT
18509 || type == TYPE_MFCR
18510 || type == TYPE_MFCRF
18511 || type == TYPE_MFJMPR)
18512 {
18513 return true;
18514 }
18515 return false;
18516 }
18517
18518
18519 /* Return how many instructions the machine can issue per cycle. */
18520
18521 static int
18522 rs6000_issue_rate (void)
18523 {
18524 /* Unless scheduling for register pressure, use issue rate of 1 for
18525 first scheduling pass to decrease degradation. */
18526 if (!reload_completed && !flag_sched_pressure)
18527 return 1;
18528
18529 switch (rs6000_tune) {
18530 case PROCESSOR_RS64A:
18531 case PROCESSOR_PPC601: /* ? */
18532 case PROCESSOR_PPC7450:
18533 return 3;
18534 case PROCESSOR_PPC440:
18535 case PROCESSOR_PPC603:
18536 case PROCESSOR_PPC750:
18537 case PROCESSOR_PPC7400:
18538 case PROCESSOR_PPC8540:
18539 case PROCESSOR_PPC8548:
18540 case PROCESSOR_CELL:
18541 case PROCESSOR_PPCE300C2:
18542 case PROCESSOR_PPCE300C3:
18543 case PROCESSOR_PPCE500MC:
18544 case PROCESSOR_PPCE500MC64:
18545 case PROCESSOR_PPCE5500:
18546 case PROCESSOR_PPCE6500:
18547 case PROCESSOR_TITAN:
18548 return 2;
18549 case PROCESSOR_PPC476:
18550 case PROCESSOR_PPC604:
18551 case PROCESSOR_PPC604e:
18552 case PROCESSOR_PPC620:
18553 case PROCESSOR_PPC630:
18554 return 4;
18555 case PROCESSOR_POWER4:
18556 case PROCESSOR_POWER5:
18557 case PROCESSOR_POWER6:
18558 case PROCESSOR_POWER7:
18559 return 5;
18560 case PROCESSOR_POWER8:
18561 return 7;
18562 case PROCESSOR_POWER9:
18563 return 6;
18564 case PROCESSOR_POWER10:
18565 return 8;
18566 default:
18567 return 1;
18568 }
18569 }
18570
18571 /* Return how many instructions to look ahead for better insn
18572 scheduling. */
18573
18574 static int
18575 rs6000_use_sched_lookahead (void)
18576 {
18577 switch (rs6000_tune)
18578 {
18579 case PROCESSOR_PPC8540:
18580 case PROCESSOR_PPC8548:
18581 return 4;
18582
18583 case PROCESSOR_CELL:
18584 return (reload_completed ? 8 : 0);
18585
18586 default:
18587 return 0;
18588 }
18589 }
18590
18591 /* We are choosing insn from the ready queue. Return zero if INSN can be
18592 chosen. */
18593 static int
18594 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
18595 {
18596 if (ready_index == 0)
18597 return 0;
18598
18599 if (rs6000_tune != PROCESSOR_CELL)
18600 return 0;
18601
18602 gcc_assert (insn != NULL_RTX && INSN_P (insn));
18603
18604 if (!reload_completed
18605 || is_nonpipeline_insn (insn)
18606 || is_microcoded_insn (insn))
18607 return 1;
18608
18609 return 0;
18610 }
18611
18612 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
18613 and return true. */
18614
18615 static bool
18616 find_mem_ref (rtx pat, rtx *mem_ref)
18617 {
18618 const char * fmt;
18619 int i, j;
18620
18621 /* stack_tie does not produce any real memory traffic. */
18622 if (tie_operand (pat, VOIDmode))
18623 return false;
18624
18625 if (MEM_P (pat))
18626 {
18627 *mem_ref = pat;
18628 return true;
18629 }
18630
18631 /* Recursively process the pattern. */
18632 fmt = GET_RTX_FORMAT (GET_CODE (pat));
18633
18634 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
18635 {
18636 if (fmt[i] == 'e')
18637 {
18638 if (find_mem_ref (XEXP (pat, i), mem_ref))
18639 return true;
18640 }
18641 else if (fmt[i] == 'E')
18642 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
18643 {
18644 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
18645 return true;
18646 }
18647 }
18648
18649 return false;
18650 }
18651
18652 /* Determine if PAT is a PATTERN of a load insn. */
18653
18654 static bool
18655 is_load_insn1 (rtx pat, rtx *load_mem)
18656 {
18657 if (!pat || pat == NULL_RTX)
18658 return false;
18659
18660 if (GET_CODE (pat) == SET)
18661 {
18662 if (REG_P (SET_DEST (pat)))
18663 return find_mem_ref (SET_SRC (pat), load_mem);
18664 else
18665 return false;
18666 }
18667
18668 if (GET_CODE (pat) == PARALLEL)
18669 {
18670 int i;
18671
18672 for (i = 0; i < XVECLEN (pat, 0); i++)
18673 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
18674 return true;
18675 }
18676
18677 return false;
18678 }
18679
18680 /* Determine if INSN loads from memory. */
18681
18682 static bool
18683 is_load_insn (rtx insn, rtx *load_mem)
18684 {
18685 if (!insn || !INSN_P (insn))
18686 return false;
18687
18688 if (CALL_P (insn))
18689 return false;
18690
18691 return is_load_insn1 (PATTERN (insn), load_mem);
18692 }
18693
18694 /* Determine if PAT is a PATTERN of a store insn. */
18695
18696 static bool
18697 is_store_insn1 (rtx pat, rtx *str_mem)
18698 {
18699 if (!pat || pat == NULL_RTX)
18700 return false;
18701
18702 if (GET_CODE (pat) == SET)
18703 {
18704 if (REG_P (SET_SRC (pat)) || SUBREG_P (SET_SRC (pat)))
18705 return find_mem_ref (SET_DEST (pat), str_mem);
18706 else
18707 return false;
18708 }
18709
18710 if (GET_CODE (pat) == PARALLEL)
18711 {
18712 int i;
18713
18714 for (i = 0; i < XVECLEN (pat, 0); i++)
18715 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
18716 return true;
18717 }
18718
18719 return false;
18720 }
18721
18722 /* Determine if INSN stores to memory. */
18723
18724 static bool
18725 is_store_insn (rtx insn, rtx *str_mem)
18726 {
18727 if (!insn || !INSN_P (insn))
18728 return false;
18729
18730 return is_store_insn1 (PATTERN (insn), str_mem);
18731 }
18732
18733 /* Return whether TYPE is a Power9 pairable vector instruction type. */
18734
18735 static bool
18736 is_power9_pairable_vec_type (enum attr_type type)
18737 {
18738 switch (type)
18739 {
18740 case TYPE_VECSIMPLE:
18741 case TYPE_VECCOMPLEX:
18742 case TYPE_VECDIV:
18743 case TYPE_VECCMP:
18744 case TYPE_VECPERM:
18745 case TYPE_VECFLOAT:
18746 case TYPE_VECFDIV:
18747 case TYPE_VECDOUBLE:
18748 return true;
18749 default:
18750 break;
18751 }
18752 return false;
18753 }
18754
18755 /* Returns whether the dependence between INSN and NEXT is considered
18756 costly by the given target. */
18757
18758 static bool
18759 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
18760 {
18761 rtx insn;
18762 rtx next;
18763 rtx load_mem, str_mem;
18764
18765 /* If the flag is not enabled - no dependence is considered costly;
18766 allow all dependent insns in the same group.
18767 This is the most aggressive option. */
18768 if (rs6000_sched_costly_dep == no_dep_costly)
18769 return false;
18770
18771 /* If the flag is set to 1 - a dependence is always considered costly;
18772 do not allow dependent instructions in the same group.
18773 This is the most conservative option. */
18774 if (rs6000_sched_costly_dep == all_deps_costly)
18775 return true;
18776
18777 insn = DEP_PRO (dep);
18778 next = DEP_CON (dep);
18779
18780 if (rs6000_sched_costly_dep == store_to_load_dep_costly
18781 && is_load_insn (next, &load_mem)
18782 && is_store_insn (insn, &str_mem))
18783 /* Prevent load after store in the same group. */
18784 return true;
18785
18786 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
18787 && is_load_insn (next, &load_mem)
18788 && is_store_insn (insn, &str_mem)
18789 && DEP_TYPE (dep) == REG_DEP_TRUE
18790 && mem_locations_overlap(str_mem, load_mem))
18791 /* Prevent load after store in the same group if it is a true
18792 dependence. */
18793 return true;
18794
18795 /* The flag is set to X; dependences with latency >= X are considered costly,
18796 and will not be scheduled in the same group. */
18797 if (rs6000_sched_costly_dep <= max_dep_latency
18798 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
18799 return true;
18800
18801 return false;
18802 }
18803
18804 /* Return the next insn after INSN that is found before TAIL is reached,
18805 skipping any "non-active" insns - insns that will not actually occupy
18806 an issue slot. Return NULL_RTX if such an insn is not found. */
18807
18808 static rtx_insn *
18809 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
18810 {
18811 if (insn == NULL_RTX || insn == tail)
18812 return NULL;
18813
18814 while (1)
18815 {
18816 insn = NEXT_INSN (insn);
18817 if (insn == NULL_RTX || insn == tail)
18818 return NULL;
18819
18820 if (CALL_P (insn)
18821 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
18822 || (NONJUMP_INSN_P (insn)
18823 && GET_CODE (PATTERN (insn)) != USE
18824 && GET_CODE (PATTERN (insn)) != CLOBBER
18825 && INSN_CODE (insn) != CODE_FOR_stack_tie))
18826 break;
18827 }
18828 return insn;
18829 }
18830
18831 /* Move instruction at POS to the end of the READY list. */
18832
18833 static void
18834 move_to_end_of_ready (rtx_insn **ready, int pos, int lastpos)
18835 {
18836 rtx_insn *tmp;
18837 int i;
18838
18839 tmp = ready[pos];
18840 for (i = pos; i < lastpos; i++)
18841 ready[i] = ready[i + 1];
18842 ready[lastpos] = tmp;
18843 }
18844
18845 /* Do Power6 specific sched_reorder2 reordering of ready list. */
18846
18847 static int
18848 power6_sched_reorder2 (rtx_insn **ready, int lastpos)
18849 {
18850 /* For Power6, we need to handle some special cases to try and keep the
18851 store queue from overflowing and triggering expensive flushes.
18852
18853 This code monitors how load and store instructions are being issued
18854 and skews the ready list one way or the other to increase the likelihood
18855 that a desired instruction is issued at the proper time.
18856
18857 A couple of things are done. First, we maintain a "load_store_pendulum"
18858 to track the current state of load/store issue.
18859
18860 - If the pendulum is at zero, then no loads or stores have been
18861 issued in the current cycle so we do nothing.
18862
18863 - If the pendulum is 1, then a single load has been issued in this
18864 cycle and we attempt to locate another load in the ready list to
18865 issue with it.
18866
18867 - If the pendulum is -2, then two stores have already been
18868 issued in this cycle, so we increase the priority of the first load
18869 in the ready list to increase it's likelihood of being chosen first
18870 in the next cycle.
18871
18872 - If the pendulum is -1, then a single store has been issued in this
18873 cycle and we attempt to locate another store in the ready list to
18874 issue with it, preferring a store to an adjacent memory location to
18875 facilitate store pairing in the store queue.
18876
18877 - If the pendulum is 2, then two loads have already been
18878 issued in this cycle, so we increase the priority of the first store
18879 in the ready list to increase it's likelihood of being chosen first
18880 in the next cycle.
18881
18882 - If the pendulum < -2 or > 2, then do nothing.
18883
18884 Note: This code covers the most common scenarios. There exist non
18885 load/store instructions which make use of the LSU and which
18886 would need to be accounted for to strictly model the behavior
18887 of the machine. Those instructions are currently unaccounted
18888 for to help minimize compile time overhead of this code.
18889 */
18890 int pos;
18891 rtx load_mem, str_mem;
18892
18893 if (is_store_insn (last_scheduled_insn, &str_mem))
18894 /* Issuing a store, swing the load_store_pendulum to the left */
18895 load_store_pendulum--;
18896 else if (is_load_insn (last_scheduled_insn, &load_mem))
18897 /* Issuing a load, swing the load_store_pendulum to the right */
18898 load_store_pendulum++;
18899 else
18900 return cached_can_issue_more;
18901
18902 /* If the pendulum is balanced, or there is only one instruction on
18903 the ready list, then all is well, so return. */
18904 if ((load_store_pendulum == 0) || (lastpos <= 0))
18905 return cached_can_issue_more;
18906
18907 if (load_store_pendulum == 1)
18908 {
18909 /* A load has been issued in this cycle. Scan the ready list
18910 for another load to issue with it */
18911 pos = lastpos;
18912
18913 while (pos >= 0)
18914 {
18915 if (is_load_insn (ready[pos], &load_mem))
18916 {
18917 /* Found a load. Move it to the head of the ready list,
18918 and adjust it's priority so that it is more likely to
18919 stay there */
18920 move_to_end_of_ready (ready, pos, lastpos);
18921
18922 if (!sel_sched_p ()
18923 && INSN_PRIORITY_KNOWN (ready[lastpos]))
18924 INSN_PRIORITY (ready[lastpos])++;
18925 break;
18926 }
18927 pos--;
18928 }
18929 }
18930 else if (load_store_pendulum == -2)
18931 {
18932 /* Two stores have been issued in this cycle. Increase the
18933 priority of the first load in the ready list to favor it for
18934 issuing in the next cycle. */
18935 pos = lastpos;
18936
18937 while (pos >= 0)
18938 {
18939 if (is_load_insn (ready[pos], &load_mem)
18940 && !sel_sched_p ()
18941 && INSN_PRIORITY_KNOWN (ready[pos]))
18942 {
18943 INSN_PRIORITY (ready[pos])++;
18944
18945 /* Adjust the pendulum to account for the fact that a load
18946 was found and increased in priority. This is to prevent
18947 increasing the priority of multiple loads */
18948 load_store_pendulum--;
18949
18950 break;
18951 }
18952 pos--;
18953 }
18954 }
18955 else if (load_store_pendulum == -1)
18956 {
18957 /* A store has been issued in this cycle. Scan the ready list for
18958 another store to issue with it, preferring a store to an adjacent
18959 memory location */
18960 int first_store_pos = -1;
18961
18962 pos = lastpos;
18963
18964 while (pos >= 0)
18965 {
18966 if (is_store_insn (ready[pos], &str_mem))
18967 {
18968 rtx str_mem2;
18969 /* Maintain the index of the first store found on the
18970 list */
18971 if (first_store_pos == -1)
18972 first_store_pos = pos;
18973
18974 if (is_store_insn (last_scheduled_insn, &str_mem2)
18975 && adjacent_mem_locations (str_mem, str_mem2))
18976 {
18977 /* Found an adjacent store. Move it to the head of the
18978 ready list, and adjust it's priority so that it is
18979 more likely to stay there */
18980 move_to_end_of_ready (ready, pos, lastpos);
18981
18982 if (!sel_sched_p ()
18983 && INSN_PRIORITY_KNOWN (ready[lastpos]))
18984 INSN_PRIORITY (ready[lastpos])++;
18985
18986 first_store_pos = -1;
18987
18988 break;
18989 };
18990 }
18991 pos--;
18992 }
18993
18994 if (first_store_pos >= 0)
18995 {
18996 /* An adjacent store wasn't found, but a non-adjacent store was,
18997 so move the non-adjacent store to the front of the ready
18998 list, and adjust its priority so that it is more likely to
18999 stay there. */
19000 move_to_end_of_ready (ready, first_store_pos, lastpos);
19001 if (!sel_sched_p ()
19002 && INSN_PRIORITY_KNOWN (ready[lastpos]))
19003 INSN_PRIORITY (ready[lastpos])++;
19004 }
19005 }
19006 else if (load_store_pendulum == 2)
19007 {
19008 /* Two loads have been issued in this cycle. Increase the priority
19009 of the first store in the ready list to favor it for issuing in
19010 the next cycle. */
19011 pos = lastpos;
19012
19013 while (pos >= 0)
19014 {
19015 if (is_store_insn (ready[pos], &str_mem)
19016 && !sel_sched_p ()
19017 && INSN_PRIORITY_KNOWN (ready[pos]))
19018 {
19019 INSN_PRIORITY (ready[pos])++;
19020
19021 /* Adjust the pendulum to account for the fact that a store
19022 was found and increased in priority. This is to prevent
19023 increasing the priority of multiple stores */
19024 load_store_pendulum++;
19025
19026 break;
19027 }
19028 pos--;
19029 }
19030 }
19031
19032 return cached_can_issue_more;
19033 }
19034
19035 /* Do Power9 specific sched_reorder2 reordering of ready list. */
19036
19037 static int
19038 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
19039 {
19040 int pos;
19041 enum attr_type type, type2;
19042
19043 type = get_attr_type (last_scheduled_insn);
19044
19045 /* Try to issue fixed point divides back-to-back in pairs so they will be
19046 routed to separate execution units and execute in parallel. */
19047 if (type == TYPE_DIV && divide_cnt == 0)
19048 {
19049 /* First divide has been scheduled. */
19050 divide_cnt = 1;
19051
19052 /* Scan the ready list looking for another divide, if found move it
19053 to the end of the list so it is chosen next. */
19054 pos = lastpos;
19055 while (pos >= 0)
19056 {
19057 if (recog_memoized (ready[pos]) >= 0
19058 && get_attr_type (ready[pos]) == TYPE_DIV)
19059 {
19060 move_to_end_of_ready (ready, pos, lastpos);
19061 break;
19062 }
19063 pos--;
19064 }
19065 }
19066 else
19067 {
19068 /* Last insn was the 2nd divide or not a divide, reset the counter. */
19069 divide_cnt = 0;
19070
19071 /* The best dispatch throughput for vector and vector load insns can be
19072 achieved by interleaving a vector and vector load such that they'll
19073 dispatch to the same superslice. If this pairing cannot be achieved
19074 then it is best to pair vector insns together and vector load insns
19075 together.
19076
19077 To aid in this pairing, vec_pairing maintains the current state with
19078 the following values:
19079
19080 0 : Initial state, no vecload/vector pairing has been started.
19081
19082 1 : A vecload or vector insn has been issued and a candidate for
19083 pairing has been found and moved to the end of the ready
19084 list. */
19085 if (type == TYPE_VECLOAD)
19086 {
19087 /* Issued a vecload. */
19088 if (vec_pairing == 0)
19089 {
19090 int vecload_pos = -1;
19091 /* We issued a single vecload, look for a vector insn to pair it
19092 with. If one isn't found, try to pair another vecload. */
19093 pos = lastpos;
19094 while (pos >= 0)
19095 {
19096 if (recog_memoized (ready[pos]) >= 0)
19097 {
19098 type2 = get_attr_type (ready[pos]);
19099 if (is_power9_pairable_vec_type (type2))
19100 {
19101 /* Found a vector insn to pair with, move it to the
19102 end of the ready list so it is scheduled next. */
19103 move_to_end_of_ready (ready, pos, lastpos);
19104 vec_pairing = 1;
19105 return cached_can_issue_more;
19106 }
19107 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
19108 /* Remember position of first vecload seen. */
19109 vecload_pos = pos;
19110 }
19111 pos--;
19112 }
19113 if (vecload_pos >= 0)
19114 {
19115 /* Didn't find a vector to pair with but did find a vecload,
19116 move it to the end of the ready list. */
19117 move_to_end_of_ready (ready, vecload_pos, lastpos);
19118 vec_pairing = 1;
19119 return cached_can_issue_more;
19120 }
19121 }
19122 }
19123 else if (is_power9_pairable_vec_type (type))
19124 {
19125 /* Issued a vector operation. */
19126 if (vec_pairing == 0)
19127 {
19128 int vec_pos = -1;
19129 /* We issued a single vector insn, look for a vecload to pair it
19130 with. If one isn't found, try to pair another vector. */
19131 pos = lastpos;
19132 while (pos >= 0)
19133 {
19134 if (recog_memoized (ready[pos]) >= 0)
19135 {
19136 type2 = get_attr_type (ready[pos]);
19137 if (type2 == TYPE_VECLOAD)
19138 {
19139 /* Found a vecload insn to pair with, move it to the
19140 end of the ready list so it is scheduled next. */
19141 move_to_end_of_ready (ready, pos, lastpos);
19142 vec_pairing = 1;
19143 return cached_can_issue_more;
19144 }
19145 else if (is_power9_pairable_vec_type (type2)
19146 && vec_pos == -1)
19147 /* Remember position of first vector insn seen. */
19148 vec_pos = pos;
19149 }
19150 pos--;
19151 }
19152 if (vec_pos >= 0)
19153 {
19154 /* Didn't find a vecload to pair with but did find a vector
19155 insn, move it to the end of the ready list. */
19156 move_to_end_of_ready (ready, vec_pos, lastpos);
19157 vec_pairing = 1;
19158 return cached_can_issue_more;
19159 }
19160 }
19161 }
19162
19163 /* We've either finished a vec/vecload pair, couldn't find an insn to
19164 continue the current pair, or the last insn had nothing to do with
19165 with pairing. In any case, reset the state. */
19166 vec_pairing = 0;
19167 }
19168
19169 return cached_can_issue_more;
19170 }
19171
19172 /* Determine if INSN is a store to memory that can be fused with a similar
19173 adjacent store. */
19174
19175 static bool
19176 is_fusable_store (rtx_insn *insn, rtx *str_mem)
19177 {
19178 /* Insn must be a non-prefixed base+disp form store. */
19179 if (is_store_insn (insn, str_mem)
19180 && get_attr_prefixed (insn) == PREFIXED_NO
19181 && get_attr_update (insn) == UPDATE_NO
19182 && get_attr_indexed (insn) == INDEXED_NO)
19183 {
19184 /* Further restrictions by mode and size. */
19185 if (!MEM_SIZE_KNOWN_P (*str_mem))
19186 return false;
19187
19188 machine_mode mode = GET_MODE (*str_mem);
19189 HOST_WIDE_INT size = MEM_SIZE (*str_mem);
19190
19191 if (INTEGRAL_MODE_P (mode))
19192 /* Must be word or dword size. */
19193 return (size == 4 || size == 8);
19194 else if (FLOAT_MODE_P (mode))
19195 /* Must be dword size. */
19196 return (size == 8);
19197 }
19198
19199 return false;
19200 }
19201
19202 /* Do Power10 specific reordering of the ready list. */
19203
19204 static int
19205 power10_sched_reorder (rtx_insn **ready, int lastpos)
19206 {
19207 rtx mem1;
19208
19209 /* Do store fusion during sched2 only. */
19210 if (!reload_completed)
19211 return cached_can_issue_more;
19212
19213 /* If the prior insn finished off a store fusion pair then simply
19214 reset the counter and return, nothing more to do. */
19215 if (load_store_pendulum != 0)
19216 {
19217 load_store_pendulum = 0;
19218 return cached_can_issue_more;
19219 }
19220
19221 /* Try to pair certain store insns to adjacent memory locations
19222 so that the hardware will fuse them to a single operation. */
19223 if (TARGET_P10_FUSION && is_fusable_store (last_scheduled_insn, &mem1))
19224 {
19225
19226 /* A fusable store was just scheduled. Scan the ready list for another
19227 store that it can fuse with. */
19228 int pos = lastpos;
19229 while (pos >= 0)
19230 {
19231 rtx mem2;
19232 /* GPR stores can be ascending or descending offsets, FPR/VSR stores
19233 must be ascending only. */
19234 if (is_fusable_store (ready[pos], &mem2)
19235 && ((INTEGRAL_MODE_P (GET_MODE (mem1))
19236 && adjacent_mem_locations (mem1, mem2))
19237 || (FLOAT_MODE_P (GET_MODE (mem1))
19238 && (adjacent_mem_locations (mem1, mem2) == mem1))))
19239 {
19240 /* Found a fusable store. Move it to the end of the ready list
19241 so it is scheduled next. */
19242 move_to_end_of_ready (ready, pos, lastpos);
19243
19244 load_store_pendulum = -1;
19245 break;
19246 }
19247 pos--;
19248 }
19249 }
19250
19251 return cached_can_issue_more;
19252 }
19253
19254 /* We are about to begin issuing insns for this clock cycle. */
19255
19256 static int
19257 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
19258 rtx_insn **ready ATTRIBUTE_UNUSED,
19259 int *pn_ready ATTRIBUTE_UNUSED,
19260 int clock_var ATTRIBUTE_UNUSED)
19261 {
19262 int n_ready = *pn_ready;
19263
19264 if (sched_verbose)
19265 fprintf (dump, "// rs6000_sched_reorder :\n");
19266
19267 /* Reorder the ready list, if the second to last ready insn
19268 is a nonepipeline insn. */
19269 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
19270 {
19271 if (is_nonpipeline_insn (ready[n_ready - 1])
19272 && (recog_memoized (ready[n_ready - 2]) > 0))
19273 /* Simply swap first two insns. */
19274 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
19275 }
19276
19277 if (rs6000_tune == PROCESSOR_POWER6)
19278 load_store_pendulum = 0;
19279
19280 /* Do Power10 dependent reordering. */
19281 if (rs6000_tune == PROCESSOR_POWER10 && last_scheduled_insn)
19282 power10_sched_reorder (ready, n_ready - 1);
19283
19284 return rs6000_issue_rate ();
19285 }
19286
19287 /* Like rs6000_sched_reorder, but called after issuing each insn. */
19288
19289 static int
19290 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
19291 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
19292 {
19293 if (sched_verbose)
19294 fprintf (dump, "// rs6000_sched_reorder2 :\n");
19295
19296 /* Do Power6 dependent reordering if necessary. */
19297 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
19298 return power6_sched_reorder2 (ready, *pn_ready - 1);
19299
19300 /* Do Power9 dependent reordering if necessary. */
19301 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
19302 && recog_memoized (last_scheduled_insn) >= 0)
19303 return power9_sched_reorder2 (ready, *pn_ready - 1);
19304
19305 /* Do Power10 dependent reordering. */
19306 if (rs6000_tune == PROCESSOR_POWER10 && last_scheduled_insn)
19307 return power10_sched_reorder (ready, *pn_ready - 1);
19308
19309 return cached_can_issue_more;
19310 }
19311
19312 /* Return whether the presence of INSN causes a dispatch group termination
19313 of group WHICH_GROUP.
19314
19315 If WHICH_GROUP == current_group, this function will return true if INSN
19316 causes the termination of the current group (i.e, the dispatch group to
19317 which INSN belongs). This means that INSN will be the last insn in the
19318 group it belongs to.
19319
19320 If WHICH_GROUP == previous_group, this function will return true if INSN
19321 causes the termination of the previous group (i.e, the dispatch group that
19322 precedes the group to which INSN belongs). This means that INSN will be
19323 the first insn in the group it belongs to). */
19324
19325 static bool
19326 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
19327 {
19328 bool first, last;
19329
19330 if (! insn)
19331 return false;
19332
19333 first = insn_must_be_first_in_group (insn);
19334 last = insn_must_be_last_in_group (insn);
19335
19336 if (first && last)
19337 return true;
19338
19339 if (which_group == current_group)
19340 return last;
19341 else if (which_group == previous_group)
19342 return first;
19343
19344 return false;
19345 }
19346
19347
19348 static bool
19349 insn_must_be_first_in_group (rtx_insn *insn)
19350 {
19351 enum attr_type type;
19352
19353 if (!insn
19354 || NOTE_P (insn)
19355 || DEBUG_INSN_P (insn)
19356 || GET_CODE (PATTERN (insn)) == USE
19357 || GET_CODE (PATTERN (insn)) == CLOBBER)
19358 return false;
19359
19360 switch (rs6000_tune)
19361 {
19362 case PROCESSOR_POWER5:
19363 if (is_cracked_insn (insn))
19364 return true;
19365 /* FALLTHRU */
19366 case PROCESSOR_POWER4:
19367 if (is_microcoded_insn (insn))
19368 return true;
19369
19370 if (!rs6000_sched_groups)
19371 return false;
19372
19373 type = get_attr_type (insn);
19374
19375 switch (type)
19376 {
19377 case TYPE_MFCR:
19378 case TYPE_MFCRF:
19379 case TYPE_MTCR:
19380 case TYPE_CR_LOGICAL:
19381 case TYPE_MTJMPR:
19382 case TYPE_MFJMPR:
19383 case TYPE_DIV:
19384 case TYPE_LOAD_L:
19385 case TYPE_STORE_C:
19386 case TYPE_ISYNC:
19387 case TYPE_SYNC:
19388 return true;
19389 default:
19390 break;
19391 }
19392 break;
19393 case PROCESSOR_POWER6:
19394 type = get_attr_type (insn);
19395
19396 switch (type)
19397 {
19398 case TYPE_EXTS:
19399 case TYPE_CNTLZ:
19400 case TYPE_TRAP:
19401 case TYPE_MUL:
19402 case TYPE_INSERT:
19403 case TYPE_FPCOMPARE:
19404 case TYPE_MFCR:
19405 case TYPE_MTCR:
19406 case TYPE_MFJMPR:
19407 case TYPE_MTJMPR:
19408 case TYPE_ISYNC:
19409 case TYPE_SYNC:
19410 case TYPE_LOAD_L:
19411 case TYPE_STORE_C:
19412 return true;
19413 case TYPE_SHIFT:
19414 if (get_attr_dot (insn) == DOT_NO
19415 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
19416 return true;
19417 else
19418 break;
19419 case TYPE_DIV:
19420 if (get_attr_size (insn) == SIZE_32)
19421 return true;
19422 else
19423 break;
19424 case TYPE_LOAD:
19425 case TYPE_STORE:
19426 case TYPE_FPLOAD:
19427 case TYPE_FPSTORE:
19428 if (get_attr_update (insn) == UPDATE_YES)
19429 return true;
19430 else
19431 break;
19432 default:
19433 break;
19434 }
19435 break;
19436 case PROCESSOR_POWER7:
19437 type = get_attr_type (insn);
19438
19439 switch (type)
19440 {
19441 case TYPE_CR_LOGICAL:
19442 case TYPE_MFCR:
19443 case TYPE_MFCRF:
19444 case TYPE_MTCR:
19445 case TYPE_DIV:
19446 case TYPE_ISYNC:
19447 case TYPE_LOAD_L:
19448 case TYPE_STORE_C:
19449 case TYPE_MFJMPR:
19450 case TYPE_MTJMPR:
19451 return true;
19452 case TYPE_MUL:
19453 case TYPE_SHIFT:
19454 case TYPE_EXTS:
19455 if (get_attr_dot (insn) == DOT_YES)
19456 return true;
19457 else
19458 break;
19459 case TYPE_LOAD:
19460 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19461 || get_attr_update (insn) == UPDATE_YES)
19462 return true;
19463 else
19464 break;
19465 case TYPE_STORE:
19466 case TYPE_FPLOAD:
19467 case TYPE_FPSTORE:
19468 if (get_attr_update (insn) == UPDATE_YES)
19469 return true;
19470 else
19471 break;
19472 default:
19473 break;
19474 }
19475 break;
19476 case PROCESSOR_POWER8:
19477 type = get_attr_type (insn);
19478
19479 switch (type)
19480 {
19481 case TYPE_CR_LOGICAL:
19482 case TYPE_MFCR:
19483 case TYPE_MFCRF:
19484 case TYPE_MTCR:
19485 case TYPE_SYNC:
19486 case TYPE_ISYNC:
19487 case TYPE_LOAD_L:
19488 case TYPE_STORE_C:
19489 case TYPE_VECSTORE:
19490 case TYPE_MFJMPR:
19491 case TYPE_MTJMPR:
19492 return true;
19493 case TYPE_SHIFT:
19494 case TYPE_EXTS:
19495 case TYPE_MUL:
19496 if (get_attr_dot (insn) == DOT_YES)
19497 return true;
19498 else
19499 break;
19500 case TYPE_LOAD:
19501 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19502 || get_attr_update (insn) == UPDATE_YES)
19503 return true;
19504 else
19505 break;
19506 case TYPE_STORE:
19507 if (get_attr_update (insn) == UPDATE_YES
19508 && get_attr_indexed (insn) == INDEXED_YES)
19509 return true;
19510 else
19511 break;
19512 default:
19513 break;
19514 }
19515 break;
19516 default:
19517 break;
19518 }
19519
19520 return false;
19521 }
19522
19523 static bool
19524 insn_must_be_last_in_group (rtx_insn *insn)
19525 {
19526 enum attr_type type;
19527
19528 if (!insn
19529 || NOTE_P (insn)
19530 || DEBUG_INSN_P (insn)
19531 || GET_CODE (PATTERN (insn)) == USE
19532 || GET_CODE (PATTERN (insn)) == CLOBBER)
19533 return false;
19534
19535 switch (rs6000_tune) {
19536 case PROCESSOR_POWER4:
19537 case PROCESSOR_POWER5:
19538 if (is_microcoded_insn (insn))
19539 return true;
19540
19541 if (is_branch_slot_insn (insn))
19542 return true;
19543
19544 break;
19545 case PROCESSOR_POWER6:
19546 type = get_attr_type (insn);
19547
19548 switch (type)
19549 {
19550 case TYPE_EXTS:
19551 case TYPE_CNTLZ:
19552 case TYPE_TRAP:
19553 case TYPE_MUL:
19554 case TYPE_FPCOMPARE:
19555 case TYPE_MFCR:
19556 case TYPE_MTCR:
19557 case TYPE_MFJMPR:
19558 case TYPE_MTJMPR:
19559 case TYPE_ISYNC:
19560 case TYPE_SYNC:
19561 case TYPE_LOAD_L:
19562 case TYPE_STORE_C:
19563 return true;
19564 case TYPE_SHIFT:
19565 if (get_attr_dot (insn) == DOT_NO
19566 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
19567 return true;
19568 else
19569 break;
19570 case TYPE_DIV:
19571 if (get_attr_size (insn) == SIZE_32)
19572 return true;
19573 else
19574 break;
19575 default:
19576 break;
19577 }
19578 break;
19579 case PROCESSOR_POWER7:
19580 type = get_attr_type (insn);
19581
19582 switch (type)
19583 {
19584 case TYPE_ISYNC:
19585 case TYPE_SYNC:
19586 case TYPE_LOAD_L:
19587 case TYPE_STORE_C:
19588 return true;
19589 case TYPE_LOAD:
19590 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19591 && get_attr_update (insn) == UPDATE_YES)
19592 return true;
19593 else
19594 break;
19595 case TYPE_STORE:
19596 if (get_attr_update (insn) == UPDATE_YES
19597 && get_attr_indexed (insn) == INDEXED_YES)
19598 return true;
19599 else
19600 break;
19601 default:
19602 break;
19603 }
19604 break;
19605 case PROCESSOR_POWER8:
19606 type = get_attr_type (insn);
19607
19608 switch (type)
19609 {
19610 case TYPE_MFCR:
19611 case TYPE_MTCR:
19612 case TYPE_ISYNC:
19613 case TYPE_SYNC:
19614 case TYPE_LOAD_L:
19615 case TYPE_STORE_C:
19616 return true;
19617 case TYPE_LOAD:
19618 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19619 && get_attr_update (insn) == UPDATE_YES)
19620 return true;
19621 else
19622 break;
19623 case TYPE_STORE:
19624 if (get_attr_update (insn) == UPDATE_YES
19625 && get_attr_indexed (insn) == INDEXED_YES)
19626 return true;
19627 else
19628 break;
19629 default:
19630 break;
19631 }
19632 break;
19633 default:
19634 break;
19635 }
19636
19637 return false;
19638 }
19639
19640 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
19641 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
19642
19643 static bool
19644 is_costly_group (rtx *group_insns, rtx next_insn)
19645 {
19646 int i;
19647 int issue_rate = rs6000_issue_rate ();
19648
19649 for (i = 0; i < issue_rate; i++)
19650 {
19651 sd_iterator_def sd_it;
19652 dep_t dep;
19653 rtx insn = group_insns[i];
19654
19655 if (!insn)
19656 continue;
19657
19658 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
19659 {
19660 rtx next = DEP_CON (dep);
19661
19662 if (next == next_insn
19663 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
19664 return true;
19665 }
19666 }
19667
19668 return false;
19669 }
19670
19671 /* Utility of the function redefine_groups.
19672 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
19673 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
19674 to keep it "far" (in a separate group) from GROUP_INSNS, following
19675 one of the following schemes, depending on the value of the flag
19676 -minsert_sched_nops = X:
19677 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
19678 in order to force NEXT_INSN into a separate group.
19679 (2) X < sched_finish_regroup_exact: insert exactly X nops.
19680 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
19681 insertion (has a group just ended, how many vacant issue slots remain in the
19682 last group, and how many dispatch groups were encountered so far). */
19683
19684 static int
19685 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
19686 rtx_insn *next_insn, bool *group_end, int can_issue_more,
19687 int *group_count)
19688 {
19689 rtx nop;
19690 bool force;
19691 int issue_rate = rs6000_issue_rate ();
19692 bool end = *group_end;
19693 int i;
19694
19695 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
19696 return can_issue_more;
19697
19698 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
19699 return can_issue_more;
19700
19701 force = is_costly_group (group_insns, next_insn);
19702 if (!force)
19703 return can_issue_more;
19704
19705 if (sched_verbose > 6)
19706 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
19707 *group_count ,can_issue_more);
19708
19709 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
19710 {
19711 if (*group_end)
19712 can_issue_more = 0;
19713
19714 /* Since only a branch can be issued in the last issue_slot, it is
19715 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
19716 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
19717 in this case the last nop will start a new group and the branch
19718 will be forced to the new group. */
19719 if (can_issue_more && !is_branch_slot_insn (next_insn))
19720 can_issue_more--;
19721
19722 /* Do we have a special group ending nop? */
19723 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
19724 || rs6000_tune == PROCESSOR_POWER8)
19725 {
19726 nop = gen_group_ending_nop ();
19727 emit_insn_before (nop, next_insn);
19728 can_issue_more = 0;
19729 }
19730 else
19731 while (can_issue_more > 0)
19732 {
19733 nop = gen_nop ();
19734 emit_insn_before (nop, next_insn);
19735 can_issue_more--;
19736 }
19737
19738 *group_end = true;
19739 return 0;
19740 }
19741
19742 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
19743 {
19744 int n_nops = rs6000_sched_insert_nops;
19745
19746 /* Nops can't be issued from the branch slot, so the effective
19747 issue_rate for nops is 'issue_rate - 1'. */
19748 if (can_issue_more == 0)
19749 can_issue_more = issue_rate;
19750 can_issue_more--;
19751 if (can_issue_more == 0)
19752 {
19753 can_issue_more = issue_rate - 1;
19754 (*group_count)++;
19755 end = true;
19756 for (i = 0; i < issue_rate; i++)
19757 {
19758 group_insns[i] = 0;
19759 }
19760 }
19761
19762 while (n_nops > 0)
19763 {
19764 nop = gen_nop ();
19765 emit_insn_before (nop, next_insn);
19766 if (can_issue_more == issue_rate - 1) /* new group begins */
19767 end = false;
19768 can_issue_more--;
19769 if (can_issue_more == 0)
19770 {
19771 can_issue_more = issue_rate - 1;
19772 (*group_count)++;
19773 end = true;
19774 for (i = 0; i < issue_rate; i++)
19775 {
19776 group_insns[i] = 0;
19777 }
19778 }
19779 n_nops--;
19780 }
19781
19782 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
19783 can_issue_more++;
19784
19785 /* Is next_insn going to start a new group? */
19786 *group_end
19787 = (end
19788 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
19789 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
19790 || (can_issue_more < issue_rate &&
19791 insn_terminates_group_p (next_insn, previous_group)));
19792 if (*group_end && end)
19793 (*group_count)--;
19794
19795 if (sched_verbose > 6)
19796 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
19797 *group_count, can_issue_more);
19798 return can_issue_more;
19799 }
19800
19801 return can_issue_more;
19802 }
19803
19804 /* This function tries to synch the dispatch groups that the compiler "sees"
19805 with the dispatch groups that the processor dispatcher is expected to
19806 form in practice. It tries to achieve this synchronization by forcing the
19807 estimated processor grouping on the compiler (as opposed to the function
19808 'pad_goups' which tries to force the scheduler's grouping on the processor).
19809
19810 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
19811 examines the (estimated) dispatch groups that will be formed by the processor
19812 dispatcher. It marks these group boundaries to reflect the estimated
19813 processor grouping, overriding the grouping that the scheduler had marked.
19814 Depending on the value of the flag '-minsert-sched-nops' this function can
19815 force certain insns into separate groups or force a certain distance between
19816 them by inserting nops, for example, if there exists a "costly dependence"
19817 between the insns.
19818
19819 The function estimates the group boundaries that the processor will form as
19820 follows: It keeps track of how many vacant issue slots are available after
19821 each insn. A subsequent insn will start a new group if one of the following
19822 4 cases applies:
19823 - no more vacant issue slots remain in the current dispatch group.
19824 - only the last issue slot, which is the branch slot, is vacant, but the next
19825 insn is not a branch.
19826 - only the last 2 or less issue slots, including the branch slot, are vacant,
19827 which means that a cracked insn (which occupies two issue slots) can't be
19828 issued in this group.
19829 - less than 'issue_rate' slots are vacant, and the next insn always needs to
19830 start a new group. */
19831
19832 static int
19833 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
19834 rtx_insn *tail)
19835 {
19836 rtx_insn *insn, *next_insn;
19837 int issue_rate;
19838 int can_issue_more;
19839 int slot, i;
19840 bool group_end;
19841 int group_count = 0;
19842 rtx *group_insns;
19843
19844 /* Initialize. */
19845 issue_rate = rs6000_issue_rate ();
19846 group_insns = XALLOCAVEC (rtx, issue_rate);
19847 for (i = 0; i < issue_rate; i++)
19848 {
19849 group_insns[i] = 0;
19850 }
19851 can_issue_more = issue_rate;
19852 slot = 0;
19853 insn = get_next_active_insn (prev_head_insn, tail);
19854 group_end = false;
19855
19856 while (insn != NULL_RTX)
19857 {
19858 slot = (issue_rate - can_issue_more);
19859 group_insns[slot] = insn;
19860 can_issue_more =
19861 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
19862 if (insn_terminates_group_p (insn, current_group))
19863 can_issue_more = 0;
19864
19865 next_insn = get_next_active_insn (insn, tail);
19866 if (next_insn == NULL_RTX)
19867 return group_count + 1;
19868
19869 /* Is next_insn going to start a new group? */
19870 group_end
19871 = (can_issue_more == 0
19872 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
19873 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
19874 || (can_issue_more < issue_rate &&
19875 insn_terminates_group_p (next_insn, previous_group)));
19876
19877 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
19878 next_insn, &group_end, can_issue_more,
19879 &group_count);
19880
19881 if (group_end)
19882 {
19883 group_count++;
19884 can_issue_more = 0;
19885 for (i = 0; i < issue_rate; i++)
19886 {
19887 group_insns[i] = 0;
19888 }
19889 }
19890
19891 if (GET_MODE (next_insn) == TImode && can_issue_more)
19892 PUT_MODE (next_insn, VOIDmode);
19893 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
19894 PUT_MODE (next_insn, TImode);
19895
19896 insn = next_insn;
19897 if (can_issue_more == 0)
19898 can_issue_more = issue_rate;
19899 } /* while */
19900
19901 return group_count;
19902 }
19903
19904 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
19905 dispatch group boundaries that the scheduler had marked. Pad with nops
19906 any dispatch groups which have vacant issue slots, in order to force the
19907 scheduler's grouping on the processor dispatcher. The function
19908 returns the number of dispatch groups found. */
19909
19910 static int
19911 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
19912 rtx_insn *tail)
19913 {
19914 rtx_insn *insn, *next_insn;
19915 rtx nop;
19916 int issue_rate;
19917 int can_issue_more;
19918 int group_end;
19919 int group_count = 0;
19920
19921 /* Initialize issue_rate. */
19922 issue_rate = rs6000_issue_rate ();
19923 can_issue_more = issue_rate;
19924
19925 insn = get_next_active_insn (prev_head_insn, tail);
19926 next_insn = get_next_active_insn (insn, tail);
19927
19928 while (insn != NULL_RTX)
19929 {
19930 can_issue_more =
19931 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
19932
19933 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
19934
19935 if (next_insn == NULL_RTX)
19936 break;
19937
19938 if (group_end)
19939 {
19940 /* If the scheduler had marked group termination at this location
19941 (between insn and next_insn), and neither insn nor next_insn will
19942 force group termination, pad the group with nops to force group
19943 termination. */
19944 if (can_issue_more
19945 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
19946 && !insn_terminates_group_p (insn, current_group)
19947 && !insn_terminates_group_p (next_insn, previous_group))
19948 {
19949 if (!is_branch_slot_insn (next_insn))
19950 can_issue_more--;
19951
19952 while (can_issue_more)
19953 {
19954 nop = gen_nop ();
19955 emit_insn_before (nop, next_insn);
19956 can_issue_more--;
19957 }
19958 }
19959
19960 can_issue_more = issue_rate;
19961 group_count++;
19962 }
19963
19964 insn = next_insn;
19965 next_insn = get_next_active_insn (insn, tail);
19966 }
19967
19968 return group_count;
19969 }
19970
19971 /* We're beginning a new block. Initialize data structures as necessary. */
19972
19973 static void
19974 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
19975 int sched_verbose ATTRIBUTE_UNUSED,
19976 int max_ready ATTRIBUTE_UNUSED)
19977 {
19978 last_scheduled_insn = NULL;
19979 load_store_pendulum = 0;
19980 divide_cnt = 0;
19981 vec_pairing = 0;
19982 }
19983
19984 /* The following function is called at the end of scheduling BB.
19985 After reload, it inserts nops at insn group bundling. */
19986
19987 static void
19988 rs6000_sched_finish (FILE *dump, int sched_verbose)
19989 {
19990 int n_groups;
19991
19992 if (sched_verbose)
19993 fprintf (dump, "=== Finishing schedule.\n");
19994
19995 if (reload_completed && rs6000_sched_groups)
19996 {
19997 /* Do not run sched_finish hook when selective scheduling enabled. */
19998 if (sel_sched_p ())
19999 return;
20000
20001 if (rs6000_sched_insert_nops == sched_finish_none)
20002 return;
20003
20004 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
20005 n_groups = pad_groups (dump, sched_verbose,
20006 current_sched_info->prev_head,
20007 current_sched_info->next_tail);
20008 else
20009 n_groups = redefine_groups (dump, sched_verbose,
20010 current_sched_info->prev_head,
20011 current_sched_info->next_tail);
20012
20013 if (sched_verbose >= 6)
20014 {
20015 fprintf (dump, "ngroups = %d\n", n_groups);
20016 print_rtl (dump, current_sched_info->prev_head);
20017 fprintf (dump, "Done finish_sched\n");
20018 }
20019 }
20020 }
20021
20022 struct rs6000_sched_context
20023 {
20024 short cached_can_issue_more;
20025 rtx_insn *last_scheduled_insn;
20026 int load_store_pendulum;
20027 int divide_cnt;
20028 int vec_pairing;
20029 };
20030
20031 typedef struct rs6000_sched_context rs6000_sched_context_def;
20032 typedef rs6000_sched_context_def *rs6000_sched_context_t;
20033
20034 /* Allocate store for new scheduling context. */
20035 static void *
20036 rs6000_alloc_sched_context (void)
20037 {
20038 return xmalloc (sizeof (rs6000_sched_context_def));
20039 }
20040
20041 /* If CLEAN_P is true then initializes _SC with clean data,
20042 and from the global context otherwise. */
20043 static void
20044 rs6000_init_sched_context (void *_sc, bool clean_p)
20045 {
20046 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
20047
20048 if (clean_p)
20049 {
20050 sc->cached_can_issue_more = 0;
20051 sc->last_scheduled_insn = NULL;
20052 sc->load_store_pendulum = 0;
20053 sc->divide_cnt = 0;
20054 sc->vec_pairing = 0;
20055 }
20056 else
20057 {
20058 sc->cached_can_issue_more = cached_can_issue_more;
20059 sc->last_scheduled_insn = last_scheduled_insn;
20060 sc->load_store_pendulum = load_store_pendulum;
20061 sc->divide_cnt = divide_cnt;
20062 sc->vec_pairing = vec_pairing;
20063 }
20064 }
20065
20066 /* Sets the global scheduling context to the one pointed to by _SC. */
20067 static void
20068 rs6000_set_sched_context (void *_sc)
20069 {
20070 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
20071
20072 gcc_assert (sc != NULL);
20073
20074 cached_can_issue_more = sc->cached_can_issue_more;
20075 last_scheduled_insn = sc->last_scheduled_insn;
20076 load_store_pendulum = sc->load_store_pendulum;
20077 divide_cnt = sc->divide_cnt;
20078 vec_pairing = sc->vec_pairing;
20079 }
20080
20081 /* Free _SC. */
20082 static void
20083 rs6000_free_sched_context (void *_sc)
20084 {
20085 gcc_assert (_sc != NULL);
20086
20087 free (_sc);
20088 }
20089
20090 static bool
20091 rs6000_sched_can_speculate_insn (rtx_insn *insn)
20092 {
20093 switch (get_attr_type (insn))
20094 {
20095 case TYPE_DIV:
20096 case TYPE_SDIV:
20097 case TYPE_DDIV:
20098 case TYPE_VECDIV:
20099 case TYPE_SSQRT:
20100 case TYPE_DSQRT:
20101 return false;
20102
20103 default:
20104 return true;
20105 }
20106 }
20107 \f
20108 /* Length in units of the trampoline for entering a nested function. */
20109
20110 int
20111 rs6000_trampoline_size (void)
20112 {
20113 int ret = 0;
20114
20115 switch (DEFAULT_ABI)
20116 {
20117 default:
20118 gcc_unreachable ();
20119
20120 case ABI_AIX:
20121 ret = (TARGET_32BIT) ? 12 : 24;
20122 break;
20123
20124 case ABI_ELFv2:
20125 gcc_assert (!TARGET_32BIT);
20126 ret = 32;
20127 break;
20128
20129 case ABI_DARWIN:
20130 case ABI_V4:
20131 ret = (TARGET_32BIT) ? 40 : 48;
20132 break;
20133 }
20134
20135 return ret;
20136 }
20137
20138 /* Emit RTL insns to initialize the variable parts of a trampoline.
20139 FNADDR is an RTX for the address of the function's pure code.
20140 CXT is an RTX for the static chain value for the function. */
20141
20142 static void
20143 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
20144 {
20145 int regsize = (TARGET_32BIT) ? 4 : 8;
20146 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
20147 rtx ctx_reg = force_reg (Pmode, cxt);
20148 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
20149
20150 switch (DEFAULT_ABI)
20151 {
20152 default:
20153 gcc_unreachable ();
20154
20155 /* Under AIX, just build the 3 word function descriptor */
20156 case ABI_AIX:
20157 {
20158 rtx fnmem, fn_reg, toc_reg;
20159
20160 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
20161 error ("you cannot take the address of a nested function if you use "
20162 "the %qs option", "-mno-pointers-to-nested-functions");
20163
20164 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
20165 fn_reg = gen_reg_rtx (Pmode);
20166 toc_reg = gen_reg_rtx (Pmode);
20167
20168 /* Macro to shorten the code expansions below. */
20169 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
20170
20171 m_tramp = replace_equiv_address (m_tramp, addr);
20172
20173 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
20174 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
20175 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
20176 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
20177 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
20178
20179 # undef MEM_PLUS
20180 }
20181 break;
20182
20183 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
20184 case ABI_ELFv2:
20185 case ABI_DARWIN:
20186 case ABI_V4:
20187 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
20188 LCT_NORMAL, VOIDmode,
20189 addr, Pmode,
20190 GEN_INT (rs6000_trampoline_size ()), SImode,
20191 fnaddr, Pmode,
20192 ctx_reg, Pmode);
20193 break;
20194 }
20195 }
20196
20197 \f
20198 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
20199 identifier as an argument, so the front end shouldn't look it up. */
20200
20201 static bool
20202 rs6000_attribute_takes_identifier_p (const_tree attr_id)
20203 {
20204 return is_attribute_p ("altivec", attr_id);
20205 }
20206
20207 /* Handle the "altivec" attribute. The attribute may have
20208 arguments as follows:
20209
20210 __attribute__((altivec(vector__)))
20211 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
20212 __attribute__((altivec(bool__))) (always followed by 'unsigned')
20213
20214 and may appear more than once (e.g., 'vector bool char') in a
20215 given declaration. */
20216
20217 static tree
20218 rs6000_handle_altivec_attribute (tree *node,
20219 tree name ATTRIBUTE_UNUSED,
20220 tree args,
20221 int flags ATTRIBUTE_UNUSED,
20222 bool *no_add_attrs)
20223 {
20224 tree type = *node, result = NULL_TREE;
20225 machine_mode mode;
20226 int unsigned_p;
20227 char altivec_type
20228 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
20229 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
20230 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
20231 : '?');
20232
20233 while (POINTER_TYPE_P (type)
20234 || TREE_CODE (type) == FUNCTION_TYPE
20235 || TREE_CODE (type) == METHOD_TYPE
20236 || TREE_CODE (type) == ARRAY_TYPE)
20237 type = TREE_TYPE (type);
20238
20239 mode = TYPE_MODE (type);
20240
20241 /* Check for invalid AltiVec type qualifiers. */
20242 if (type == long_double_type_node)
20243 error ("use of %<long double%> in AltiVec types is invalid");
20244 else if (type == boolean_type_node)
20245 error ("use of boolean types in AltiVec types is invalid");
20246 else if (TREE_CODE (type) == COMPLEX_TYPE)
20247 error ("use of %<complex%> in AltiVec types is invalid");
20248 else if (DECIMAL_FLOAT_MODE_P (mode))
20249 error ("use of decimal floating-point types in AltiVec types is invalid");
20250 else if (!TARGET_VSX)
20251 {
20252 if (type == long_unsigned_type_node || type == long_integer_type_node)
20253 {
20254 if (TARGET_64BIT)
20255 error ("use of %<long%> in AltiVec types is invalid for "
20256 "64-bit code without %qs", "-mvsx");
20257 else if (rs6000_warn_altivec_long)
20258 warning (0, "use of %<long%> in AltiVec types is deprecated; "
20259 "use %<int%>");
20260 }
20261 else if (type == long_long_unsigned_type_node
20262 || type == long_long_integer_type_node)
20263 error ("use of %<long long%> in AltiVec types is invalid without %qs",
20264 "-mvsx");
20265 else if (type == double_type_node)
20266 error ("use of %<double%> in AltiVec types is invalid without %qs",
20267 "-mvsx");
20268 }
20269
20270 switch (altivec_type)
20271 {
20272 case 'v':
20273 unsigned_p = TYPE_UNSIGNED (type);
20274 switch (mode)
20275 {
20276 case E_TImode:
20277 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
20278 break;
20279 case E_DImode:
20280 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
20281 break;
20282 case E_SImode:
20283 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
20284 break;
20285 case E_HImode:
20286 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
20287 break;
20288 case E_QImode:
20289 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
20290 break;
20291 case E_SFmode: result = V4SF_type_node; break;
20292 case E_DFmode: result = V2DF_type_node; break;
20293 /* If the user says 'vector int bool', we may be handed the 'bool'
20294 attribute _before_ the 'vector' attribute, and so select the
20295 proper type in the 'b' case below. */
20296 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
20297 case E_V2DImode: case E_V2DFmode:
20298 result = type;
20299 default: break;
20300 }
20301 break;
20302 case 'b':
20303 switch (mode)
20304 {
20305 case E_TImode: case E_V1TImode: result = bool_V1TI_type_node; break;
20306 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
20307 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
20308 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
20309 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
20310 default: break;
20311 }
20312 break;
20313 case 'p':
20314 switch (mode)
20315 {
20316 case E_V8HImode: result = pixel_V8HI_type_node;
20317 default: break;
20318 }
20319 default: break;
20320 }
20321
20322 /* Propagate qualifiers attached to the element type
20323 onto the vector type. */
20324 if (result && result != type && TYPE_QUALS (type))
20325 result = build_qualified_type (result, TYPE_QUALS (type));
20326
20327 *no_add_attrs = true; /* No need to hang on to the attribute. */
20328
20329 if (result)
20330 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
20331
20332 return NULL_TREE;
20333 }
20334
20335 /* AltiVec defines five built-in scalar types that serve as vector
20336 elements; we must teach the compiler how to mangle them. The 128-bit
20337 floating point mangling is target-specific as well. MMA defines
20338 two built-in types to be used as opaque vector types. */
20339
20340 static const char *
20341 rs6000_mangle_type (const_tree type)
20342 {
20343 type = TYPE_MAIN_VARIANT (type);
20344
20345 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
20346 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE
20347 && TREE_CODE (type) != OPAQUE_TYPE)
20348 return NULL;
20349
20350 if (type == bool_char_type_node) return "U6__boolc";
20351 if (type == bool_short_type_node) return "U6__bools";
20352 if (type == pixel_type_node) return "u7__pixel";
20353 if (type == bool_int_type_node) return "U6__booli";
20354 if (type == bool_long_long_type_node) return "U6__boolx";
20355
20356 if (type == float128_type_node || type == float64x_type_node)
20357 return NULL;
20358
20359 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
20360 return "g";
20361 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
20362 return "u9__ieee128";
20363
20364 if (type == vector_pair_type_node)
20365 return "u13__vector_pair";
20366 if (type == vector_quad_type_node)
20367 return "u13__vector_quad";
20368
20369 /* For all other types, use the default mangling. */
20370 return NULL;
20371 }
20372
20373 /* Handle a "longcall" or "shortcall" attribute; arguments as in
20374 struct attribute_spec.handler. */
20375
20376 static tree
20377 rs6000_handle_longcall_attribute (tree *node, tree name,
20378 tree args ATTRIBUTE_UNUSED,
20379 int flags ATTRIBUTE_UNUSED,
20380 bool *no_add_attrs)
20381 {
20382 if (TREE_CODE (*node) != FUNCTION_TYPE
20383 && TREE_CODE (*node) != FIELD_DECL
20384 && TREE_CODE (*node) != TYPE_DECL)
20385 {
20386 warning (OPT_Wattributes, "%qE attribute only applies to functions",
20387 name);
20388 *no_add_attrs = true;
20389 }
20390
20391 return NULL_TREE;
20392 }
20393
20394 /* Set longcall attributes on all functions declared when
20395 rs6000_default_long_calls is true. */
20396 static void
20397 rs6000_set_default_type_attributes (tree type)
20398 {
20399 if (rs6000_default_long_calls
20400 && (TREE_CODE (type) == FUNCTION_TYPE
20401 || TREE_CODE (type) == METHOD_TYPE))
20402 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
20403 NULL_TREE,
20404 TYPE_ATTRIBUTES (type));
20405
20406 #if TARGET_MACHO
20407 darwin_set_default_type_attributes (type);
20408 #endif
20409 }
20410
20411 /* Return a reference suitable for calling a function with the
20412 longcall attribute. */
20413
20414 static rtx
20415 rs6000_longcall_ref (rtx call_ref, rtx arg)
20416 {
20417 /* System V adds '.' to the internal name, so skip them. */
20418 const char *call_name = XSTR (call_ref, 0);
20419 if (*call_name == '.')
20420 {
20421 while (*call_name == '.')
20422 call_name++;
20423
20424 tree node = get_identifier (call_name);
20425 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
20426 }
20427
20428 if (TARGET_PLTSEQ)
20429 {
20430 rtx base = const0_rtx;
20431 int regno = 12;
20432 if (rs6000_pcrel_p ())
20433 {
20434 rtx reg = gen_rtx_REG (Pmode, regno);
20435 rtx u = gen_rtx_UNSPEC_VOLATILE (Pmode,
20436 gen_rtvec (3, base, call_ref, arg),
20437 UNSPECV_PLT_PCREL);
20438 emit_insn (gen_rtx_SET (reg, u));
20439 return reg;
20440 }
20441
20442 if (DEFAULT_ABI == ABI_ELFv2)
20443 base = gen_rtx_REG (Pmode, TOC_REGISTER);
20444 else
20445 {
20446 if (flag_pic)
20447 base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
20448 regno = 11;
20449 }
20450 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
20451 may be used by a function global entry point. For SysV4, r11
20452 is used by __glink_PLTresolve lazy resolver entry. */
20453 rtx reg = gen_rtx_REG (Pmode, regno);
20454 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
20455 UNSPEC_PLT16_HA);
20456 rtx lo = gen_rtx_UNSPEC_VOLATILE (Pmode,
20457 gen_rtvec (3, reg, call_ref, arg),
20458 UNSPECV_PLT16_LO);
20459 emit_insn (gen_rtx_SET (reg, hi));
20460 emit_insn (gen_rtx_SET (reg, lo));
20461 return reg;
20462 }
20463
20464 return force_reg (Pmode, call_ref);
20465 }
20466 \f
20467 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
20468 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
20469 #endif
20470
20471 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20472 struct attribute_spec.handler. */
20473 static tree
20474 rs6000_handle_struct_attribute (tree *node, tree name,
20475 tree args ATTRIBUTE_UNUSED,
20476 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
20477 {
20478 tree *type = NULL;
20479 if (DECL_P (*node))
20480 {
20481 if (TREE_CODE (*node) == TYPE_DECL)
20482 type = &TREE_TYPE (*node);
20483 }
20484 else
20485 type = node;
20486
20487 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
20488 || TREE_CODE (*type) == UNION_TYPE)))
20489 {
20490 warning (OPT_Wattributes, "%qE attribute ignored", name);
20491 *no_add_attrs = true;
20492 }
20493
20494 else if ((is_attribute_p ("ms_struct", name)
20495 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
20496 || ((is_attribute_p ("gcc_struct", name)
20497 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
20498 {
20499 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
20500 name);
20501 *no_add_attrs = true;
20502 }
20503
20504 return NULL_TREE;
20505 }
20506
20507 static bool
20508 rs6000_ms_bitfield_layout_p (const_tree record_type)
20509 {
20510 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
20511 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
20512 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
20513 }
20514 \f
20515 #ifdef USING_ELFOS_H
20516
20517 /* A get_unnamed_section callback, used for switching to toc_section. */
20518
20519 static void
20520 rs6000_elf_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED)
20521 {
20522 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20523 && TARGET_MINIMAL_TOC)
20524 {
20525 if (!toc_initialized)
20526 {
20527 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
20528 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20529 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
20530 fprintf (asm_out_file, "\t.tc ");
20531 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
20532 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20533 fprintf (asm_out_file, "\n");
20534
20535 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20536 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20537 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20538 fprintf (asm_out_file, " = .+32768\n");
20539 toc_initialized = 1;
20540 }
20541 else
20542 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20543 }
20544 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20545 {
20546 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
20547 if (!toc_initialized)
20548 {
20549 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20550 toc_initialized = 1;
20551 }
20552 }
20553 else
20554 {
20555 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20556 if (!toc_initialized)
20557 {
20558 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20559 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20560 fprintf (asm_out_file, " = .+32768\n");
20561 toc_initialized = 1;
20562 }
20563 }
20564 }
20565
20566 /* Implement TARGET_ASM_INIT_SECTIONS. */
20567
20568 static void
20569 rs6000_elf_asm_init_sections (void)
20570 {
20571 toc_section
20572 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
20573
20574 sdata2_section
20575 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
20576 SDATA2_SECTION_ASM_OP);
20577 }
20578
20579 /* Implement TARGET_SELECT_RTX_SECTION. */
20580
20581 static section *
20582 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
20583 unsigned HOST_WIDE_INT align)
20584 {
20585 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
20586 return toc_section;
20587 else
20588 return default_elf_select_rtx_section (mode, x, align);
20589 }
20590 \f
20591 /* For a SYMBOL_REF, set generic flags and then perform some
20592 target-specific processing.
20593
20594 When the AIX ABI is requested on a non-AIX system, replace the
20595 function name with the real name (with a leading .) rather than the
20596 function descriptor name. This saves a lot of overriding code to
20597 read the prefixes. */
20598
20599 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
20600 static void
20601 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
20602 {
20603 default_encode_section_info (decl, rtl, first);
20604
20605 if (first
20606 && TREE_CODE (decl) == FUNCTION_DECL
20607 && !TARGET_AIX
20608 && DEFAULT_ABI == ABI_AIX)
20609 {
20610 rtx sym_ref = XEXP (rtl, 0);
20611 size_t len = strlen (XSTR (sym_ref, 0));
20612 char *str = XALLOCAVEC (char, len + 2);
20613 str[0] = '.';
20614 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
20615 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
20616 }
20617 }
20618
20619 static inline bool
20620 compare_section_name (const char *section, const char *templ)
20621 {
20622 int len;
20623
20624 len = strlen (templ);
20625 return (strncmp (section, templ, len) == 0
20626 && (section[len] == 0 || section[len] == '.'));
20627 }
20628
20629 bool
20630 rs6000_elf_in_small_data_p (const_tree decl)
20631 {
20632 if (rs6000_sdata == SDATA_NONE)
20633 return false;
20634
20635 /* We want to merge strings, so we never consider them small data. */
20636 if (TREE_CODE (decl) == STRING_CST)
20637 return false;
20638
20639 /* Functions are never in the small data area. */
20640 if (TREE_CODE (decl) == FUNCTION_DECL)
20641 return false;
20642
20643 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
20644 {
20645 const char *section = DECL_SECTION_NAME (decl);
20646 if (compare_section_name (section, ".sdata")
20647 || compare_section_name (section, ".sdata2")
20648 || compare_section_name (section, ".gnu.linkonce.s")
20649 || compare_section_name (section, ".sbss")
20650 || compare_section_name (section, ".sbss2")
20651 || compare_section_name (section, ".gnu.linkonce.sb")
20652 || strcmp (section, ".PPC.EMB.sdata0") == 0
20653 || strcmp (section, ".PPC.EMB.sbss0") == 0)
20654 return true;
20655 }
20656 else
20657 {
20658 /* If we are told not to put readonly data in sdata, then don't. */
20659 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
20660 && !rs6000_readonly_in_sdata)
20661 return false;
20662
20663 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
20664
20665 if (size > 0
20666 && size <= g_switch_value
20667 /* If it's not public, and we're not going to reference it there,
20668 there's no need to put it in the small data section. */
20669 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
20670 return true;
20671 }
20672
20673 return false;
20674 }
20675
20676 #endif /* USING_ELFOS_H */
20677 \f
20678 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
20679
20680 static bool
20681 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
20682 {
20683 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
20684 }
20685
20686 /* Do not place thread-local symbols refs in the object blocks. */
20687
20688 static bool
20689 rs6000_use_blocks_for_decl_p (const_tree decl)
20690 {
20691 return !DECL_THREAD_LOCAL_P (decl);
20692 }
20693 \f
20694 /* Return a REG that occurs in ADDR with coefficient 1.
20695 ADDR can be effectively incremented by incrementing REG.
20696
20697 r0 is special and we must not select it as an address
20698 register by this routine since our caller will try to
20699 increment the returned register via an "la" instruction. */
20700
20701 rtx
20702 find_addr_reg (rtx addr)
20703 {
20704 while (GET_CODE (addr) == PLUS)
20705 {
20706 if (REG_P (XEXP (addr, 0))
20707 && REGNO (XEXP (addr, 0)) != 0)
20708 addr = XEXP (addr, 0);
20709 else if (REG_P (XEXP (addr, 1))
20710 && REGNO (XEXP (addr, 1)) != 0)
20711 addr = XEXP (addr, 1);
20712 else if (CONSTANT_P (XEXP (addr, 0)))
20713 addr = XEXP (addr, 1);
20714 else if (CONSTANT_P (XEXP (addr, 1)))
20715 addr = XEXP (addr, 0);
20716 else
20717 gcc_unreachable ();
20718 }
20719 gcc_assert (REG_P (addr) && REGNO (addr) != 0);
20720 return addr;
20721 }
20722
20723 void
20724 rs6000_fatal_bad_address (rtx op)
20725 {
20726 fatal_insn ("bad address", op);
20727 }
20728
20729 #if TARGET_MACHO
20730
20731 vec<branch_island, va_gc> *branch_islands;
20732
20733 /* Remember to generate a branch island for far calls to the given
20734 function. */
20735
20736 static void
20737 add_compiler_branch_island (tree label_name, tree function_name,
20738 int line_number)
20739 {
20740 branch_island bi = {function_name, label_name, line_number};
20741 vec_safe_push (branch_islands, bi);
20742 }
20743
20744 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
20745 already there or not. */
20746
20747 static int
20748 no_previous_def (tree function_name)
20749 {
20750 branch_island *bi;
20751 unsigned ix;
20752
20753 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
20754 if (function_name == bi->function_name)
20755 return 0;
20756 return 1;
20757 }
20758
20759 /* GET_PREV_LABEL gets the label name from the previous definition of
20760 the function. */
20761
20762 static tree
20763 get_prev_label (tree function_name)
20764 {
20765 branch_island *bi;
20766 unsigned ix;
20767
20768 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
20769 if (function_name == bi->function_name)
20770 return bi->label_name;
20771 return NULL_TREE;
20772 }
20773
20774 /* Generate external symbol indirection stubs (PIC and non-PIC). */
20775
20776 void
20777 machopic_output_stub (FILE *file, const char *symb, const char *stub)
20778 {
20779 unsigned int length;
20780 char *symbol_name, *lazy_ptr_name;
20781 char *local_label_0;
20782 static unsigned label = 0;
20783
20784 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20785 symb = (*targetm.strip_name_encoding) (symb);
20786
20787 length = strlen (symb);
20788 symbol_name = XALLOCAVEC (char, length + 32);
20789 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
20790
20791 lazy_ptr_name = XALLOCAVEC (char, length + 32);
20792 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
20793
20794 if (MACHOPIC_PURE)
20795 {
20796 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
20797 fprintf (file, "\t.align 5\n");
20798
20799 fprintf (file, "%s:\n", stub);
20800 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20801
20802 label++;
20803 local_label_0 = XALLOCAVEC (char, 16);
20804 sprintf (local_label_0, "L%u$spb", label);
20805
20806 fprintf (file, "\tmflr r0\n");
20807 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
20808 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
20809 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
20810 lazy_ptr_name, local_label_0);
20811 fprintf (file, "\tmtlr r0\n");
20812 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
20813 (TARGET_64BIT ? "ldu" : "lwzu"),
20814 lazy_ptr_name, local_label_0);
20815 fprintf (file, "\tmtctr r12\n");
20816 fprintf (file, "\tbctr\n");
20817 }
20818 else /* mdynamic-no-pic or mkernel. */
20819 {
20820 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
20821 fprintf (file, "\t.align 4\n");
20822
20823 fprintf (file, "%s:\n", stub);
20824 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20825
20826 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
20827 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
20828 (TARGET_64BIT ? "ldu" : "lwzu"),
20829 lazy_ptr_name);
20830 fprintf (file, "\tmtctr r12\n");
20831 fprintf (file, "\tbctr\n");
20832 }
20833
20834 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
20835 fprintf (file, "%s:\n", lazy_ptr_name);
20836 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20837 fprintf (file, "%sdyld_stub_binding_helper\n",
20838 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
20839 }
20840
20841 /* Legitimize PIC addresses. If the address is already
20842 position-independent, we return ORIG. Newly generated
20843 position-independent addresses go into a reg. This is REG if non
20844 zero, otherwise we allocate register(s) as necessary. */
20845
20846 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
20847
20848 rtx
20849 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
20850 rtx reg)
20851 {
20852 rtx base, offset;
20853
20854 if (reg == NULL && !reload_completed)
20855 reg = gen_reg_rtx (Pmode);
20856
20857 if (GET_CODE (orig) == CONST)
20858 {
20859 rtx reg_temp;
20860
20861 if (GET_CODE (XEXP (orig, 0)) == PLUS
20862 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
20863 return orig;
20864
20865 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
20866
20867 /* Use a different reg for the intermediate value, as
20868 it will be marked UNCHANGING. */
20869 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
20870 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
20871 Pmode, reg_temp);
20872 offset =
20873 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
20874 Pmode, reg);
20875
20876 if (CONST_INT_P (offset))
20877 {
20878 if (SMALL_INT (offset))
20879 return plus_constant (Pmode, base, INTVAL (offset));
20880 else if (!reload_completed)
20881 offset = force_reg (Pmode, offset);
20882 else
20883 {
20884 rtx mem = force_const_mem (Pmode, orig);
20885 return machopic_legitimize_pic_address (mem, Pmode, reg);
20886 }
20887 }
20888 return gen_rtx_PLUS (Pmode, base, offset);
20889 }
20890
20891 /* Fall back on generic machopic code. */
20892 return machopic_legitimize_pic_address (orig, mode, reg);
20893 }
20894
20895 /* Output a .machine directive for the Darwin assembler, and call
20896 the generic start_file routine. */
20897
20898 static void
20899 rs6000_darwin_file_start (void)
20900 {
20901 static const struct
20902 {
20903 const char *arg;
20904 const char *name;
20905 HOST_WIDE_INT if_set;
20906 } mapping[] = {
20907 { "ppc64", "ppc64", MASK_64BIT },
20908 { "970", "ppc970", OPTION_MASK_PPC_GPOPT | OPTION_MASK_MFCRF \
20909 | MASK_POWERPC64 },
20910 { "power4", "ppc970", 0 },
20911 { "G5", "ppc970", 0 },
20912 { "7450", "ppc7450", 0 },
20913 { "7400", "ppc7400", OPTION_MASK_ALTIVEC },
20914 { "G4", "ppc7400", 0 },
20915 { "750", "ppc750", 0 },
20916 { "740", "ppc750", 0 },
20917 { "G3", "ppc750", 0 },
20918 { "604e", "ppc604e", 0 },
20919 { "604", "ppc604", 0 },
20920 { "603e", "ppc603", 0 },
20921 { "603", "ppc603", 0 },
20922 { "601", "ppc601", 0 },
20923 { NULL, "ppc", 0 } };
20924 const char *cpu_id = "";
20925 size_t i;
20926
20927 rs6000_file_start ();
20928 darwin_file_start ();
20929
20930 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
20931
20932 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
20933 cpu_id = rs6000_default_cpu;
20934
20935 if (OPTION_SET_P (rs6000_cpu_index))
20936 cpu_id = processor_target_table[rs6000_cpu_index].name;
20937
20938 /* Look through the mapping array. Pick the first name that either
20939 matches the argument, has a bit set in IF_SET that is also set
20940 in the target flags, or has a NULL name. */
20941
20942 i = 0;
20943 while (mapping[i].arg != NULL
20944 && strcmp (mapping[i].arg, cpu_id) != 0
20945 && (mapping[i].if_set & rs6000_isa_flags) == 0)
20946 i++;
20947
20948 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
20949 }
20950
20951 #endif /* TARGET_MACHO */
20952
20953 #if TARGET_ELF
20954 static int
20955 rs6000_elf_reloc_rw_mask (void)
20956 {
20957 if (flag_pic)
20958 return 3;
20959 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20960 return 2;
20961 else
20962 return 0;
20963 }
20964
20965 /* Record an element in the table of global constructors. SYMBOL is
20966 a SYMBOL_REF of the function to be called; PRIORITY is a number
20967 between 0 and MAX_INIT_PRIORITY.
20968
20969 This differs from default_named_section_asm_out_constructor in
20970 that we have special handling for -mrelocatable. */
20971
20972 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
20973 static void
20974 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
20975 {
20976 const char *section = ".ctors";
20977 char buf[18];
20978
20979 if (priority != DEFAULT_INIT_PRIORITY)
20980 {
20981 sprintf (buf, ".ctors.%.5u",
20982 /* Invert the numbering so the linker puts us in the proper
20983 order; constructors are run from right to left, and the
20984 linker sorts in increasing order. */
20985 MAX_INIT_PRIORITY - priority);
20986 section = buf;
20987 }
20988
20989 switch_to_section (get_section (section, SECTION_WRITE, NULL));
20990 assemble_align (POINTER_SIZE);
20991
20992 if (DEFAULT_ABI == ABI_V4
20993 && (TARGET_RELOCATABLE || flag_pic > 1))
20994 {
20995 fputs ("\t.long (", asm_out_file);
20996 output_addr_const (asm_out_file, symbol);
20997 fputs (")@fixup\n", asm_out_file);
20998 }
20999 else
21000 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
21001 }
21002
21003 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
21004 static void
21005 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
21006 {
21007 const char *section = ".dtors";
21008 char buf[18];
21009
21010 if (priority != DEFAULT_INIT_PRIORITY)
21011 {
21012 sprintf (buf, ".dtors.%.5u",
21013 /* Invert the numbering so the linker puts us in the proper
21014 order; constructors are run from right to left, and the
21015 linker sorts in increasing order. */
21016 MAX_INIT_PRIORITY - priority);
21017 section = buf;
21018 }
21019
21020 switch_to_section (get_section (section, SECTION_WRITE, NULL));
21021 assemble_align (POINTER_SIZE);
21022
21023 if (DEFAULT_ABI == ABI_V4
21024 && (TARGET_RELOCATABLE || flag_pic > 1))
21025 {
21026 fputs ("\t.long (", asm_out_file);
21027 output_addr_const (asm_out_file, symbol);
21028 fputs (")@fixup\n", asm_out_file);
21029 }
21030 else
21031 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
21032 }
21033
21034 void
21035 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
21036 {
21037 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
21038 {
21039 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
21040 ASM_OUTPUT_LABEL (file, name);
21041 fputs (DOUBLE_INT_ASM_OP, file);
21042 rs6000_output_function_entry (file, name);
21043 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
21044 if (DOT_SYMBOLS)
21045 {
21046 fputs ("\t.size\t", file);
21047 assemble_name (file, name);
21048 fputs (",24\n\t.type\t.", file);
21049 assemble_name (file, name);
21050 fputs (",@function\n", file);
21051 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
21052 {
21053 fputs ("\t.globl\t.", file);
21054 assemble_name (file, name);
21055 putc ('\n', file);
21056 }
21057 }
21058 else
21059 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21060 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21061 rs6000_output_function_entry (file, name);
21062 fputs (":\n", file);
21063 return;
21064 }
21065
21066 int uses_toc;
21067 if (DEFAULT_ABI == ABI_V4
21068 && (TARGET_RELOCATABLE || flag_pic > 1)
21069 && !TARGET_SECURE_PLT
21070 && (!constant_pool_empty_p () || crtl->profile)
21071 && (uses_toc = uses_TOC ()))
21072 {
21073 char buf[256];
21074
21075 if (uses_toc == 2)
21076 switch_to_other_text_partition ();
21077 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
21078
21079 fprintf (file, "\t.long ");
21080 assemble_name (file, toc_label_name);
21081 need_toc_init = 1;
21082 putc ('-', file);
21083 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
21084 assemble_name (file, buf);
21085 putc ('\n', file);
21086 if (uses_toc == 2)
21087 switch_to_other_text_partition ();
21088 }
21089
21090 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21091 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21092
21093 if (TARGET_CMODEL == CMODEL_LARGE
21094 && rs6000_global_entry_point_prologue_needed_p ())
21095 {
21096 char buf[256];
21097
21098 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
21099
21100 fprintf (file, "\t.quad .TOC.-");
21101 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
21102 assemble_name (file, buf);
21103 putc ('\n', file);
21104 }
21105
21106 if (DEFAULT_ABI == ABI_AIX)
21107 {
21108 const char *desc_name, *orig_name;
21109
21110 orig_name = (*targetm.strip_name_encoding) (name);
21111 desc_name = orig_name;
21112 while (*desc_name == '.')
21113 desc_name++;
21114
21115 if (TREE_PUBLIC (decl))
21116 fprintf (file, "\t.globl %s\n", desc_name);
21117
21118 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
21119 fprintf (file, "%s:\n", desc_name);
21120 fprintf (file, "\t.long %s\n", orig_name);
21121 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
21122 fputs ("\t.long 0\n", file);
21123 fprintf (file, "\t.previous\n");
21124 }
21125 ASM_OUTPUT_LABEL (file, name);
21126 }
21127
21128 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
21129 static void
21130 rs6000_elf_file_end (void)
21131 {
21132 #ifdef HAVE_AS_GNU_ATTRIBUTE
21133 /* ??? The value emitted depends on options active at file end.
21134 Assume anyone using #pragma or attributes that might change
21135 options knows what they are doing. */
21136 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
21137 && rs6000_passes_float)
21138 {
21139 int fp;
21140
21141 if (TARGET_HARD_FLOAT)
21142 fp = 1;
21143 else
21144 fp = 2;
21145 if (rs6000_passes_long_double)
21146 {
21147 if (!TARGET_LONG_DOUBLE_128)
21148 fp |= 2 * 4;
21149 else if (TARGET_IEEEQUAD)
21150 fp |= 3 * 4;
21151 else
21152 fp |= 1 * 4;
21153 }
21154 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
21155 }
21156 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
21157 {
21158 if (rs6000_passes_vector)
21159 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
21160 (TARGET_ALTIVEC_ABI ? 2 : 1));
21161 if (rs6000_returns_struct)
21162 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
21163 aix_struct_return ? 2 : 1);
21164 }
21165 #endif
21166 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
21167 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
21168 file_end_indicate_exec_stack ();
21169 #endif
21170
21171 if (flag_split_stack)
21172 file_end_indicate_split_stack ();
21173
21174 if (cpu_builtin_p)
21175 {
21176 /* We have expanded a CPU builtin, so we need to emit a reference to
21177 the special symbol that LIBC uses to declare it supports the
21178 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
21179 switch_to_section (data_section);
21180 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
21181 fprintf (asm_out_file, "\t%s %s\n",
21182 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
21183 }
21184 }
21185 #endif
21186
21187 #if TARGET_XCOFF
21188
21189 #ifndef HAVE_XCOFF_DWARF_EXTRAS
21190 #define HAVE_XCOFF_DWARF_EXTRAS 0
21191 #endif
21192
21193
21194 /* Names of bss and data sections. These should be unique names for each
21195 compilation unit. */
21196
21197 char *xcoff_bss_section_name;
21198 char *xcoff_private_data_section_name;
21199 char *xcoff_private_rodata_section_name;
21200 char *xcoff_tls_data_section_name;
21201 char *xcoff_read_only_section_name;
21202
21203 static enum unwind_info_type
21204 rs6000_xcoff_debug_unwind_info (void)
21205 {
21206 return UI_NONE;
21207 }
21208
21209 static void
21210 rs6000_xcoff_asm_output_anchor (rtx symbol)
21211 {
21212 char buffer[100];
21213
21214 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
21215 SYMBOL_REF_BLOCK_OFFSET (symbol));
21216 fprintf (asm_out_file, "%s", SET_ASM_OP);
21217 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
21218 fprintf (asm_out_file, ",");
21219 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
21220 fprintf (asm_out_file, "\n");
21221 }
21222
21223 static void
21224 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
21225 {
21226 fputs (GLOBAL_ASM_OP, stream);
21227 RS6000_OUTPUT_BASENAME (stream, name);
21228 putc ('\n', stream);
21229 }
21230
21231 /* A get_unnamed_decl callback, used for read-only sections. PTR
21232 points to the section string variable. */
21233
21234 static void
21235 rs6000_xcoff_output_readonly_section_asm_op (const char *directive)
21236 {
21237 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
21238 directive
21239 ? xcoff_private_rodata_section_name
21240 : xcoff_read_only_section_name,
21241 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21242 }
21243
21244 /* Likewise for read-write sections. */
21245
21246 static void
21247 rs6000_xcoff_output_readwrite_section_asm_op (const char *)
21248 {
21249 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
21250 xcoff_private_data_section_name,
21251 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21252 }
21253
21254 static void
21255 rs6000_xcoff_output_tls_section_asm_op (const char *directive)
21256 {
21257 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
21258 directive
21259 ? xcoff_private_data_section_name
21260 : xcoff_tls_data_section_name,
21261 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21262 }
21263
21264 /* A get_unnamed_section callback, used for switching to toc_section. */
21265
21266 static void
21267 rs6000_xcoff_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED)
21268 {
21269 if (TARGET_MINIMAL_TOC)
21270 {
21271 /* toc_section is always selected at least once from
21272 rs6000_xcoff_file_start, so this is guaranteed to
21273 always be defined once and only once in each file. */
21274 if (!toc_initialized)
21275 {
21276 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
21277 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
21278 toc_initialized = 1;
21279 }
21280 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
21281 (TARGET_32BIT ? "" : ",3"));
21282 }
21283 else
21284 fputs ("\t.toc\n", asm_out_file);
21285 }
21286
21287 /* Implement TARGET_ASM_INIT_SECTIONS. */
21288
21289 static void
21290 rs6000_xcoff_asm_init_sections (void)
21291 {
21292 read_only_data_section
21293 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
21294 NULL);
21295
21296 private_data_section
21297 = get_unnamed_section (SECTION_WRITE,
21298 rs6000_xcoff_output_readwrite_section_asm_op,
21299 NULL);
21300
21301 read_only_private_data_section
21302 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
21303 "");
21304
21305 tls_data_section
21306 = get_unnamed_section (SECTION_TLS,
21307 rs6000_xcoff_output_tls_section_asm_op,
21308 NULL);
21309
21310 tls_private_data_section
21311 = get_unnamed_section (SECTION_TLS,
21312 rs6000_xcoff_output_tls_section_asm_op,
21313 "");
21314
21315 toc_section
21316 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
21317
21318 readonly_data_section = read_only_data_section;
21319 }
21320
21321 static int
21322 rs6000_xcoff_reloc_rw_mask (void)
21323 {
21324 return 3;
21325 }
21326
21327 static void
21328 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
21329 tree decl ATTRIBUTE_UNUSED)
21330 {
21331 int smclass;
21332 static const char * const suffix[7]
21333 = { "PR", "RO", "RW", "BS", "TL", "UL", "XO" };
21334
21335 if (flags & SECTION_EXCLUDE)
21336 smclass = 6;
21337 else if (flags & SECTION_DEBUG)
21338 {
21339 fprintf (asm_out_file, "\t.dwsect %s\n", name);
21340 return;
21341 }
21342 else if (flags & SECTION_CODE)
21343 smclass = 0;
21344 else if (flags & SECTION_TLS)
21345 {
21346 if (flags & SECTION_BSS)
21347 smclass = 5;
21348 else
21349 smclass = 4;
21350 }
21351 else if (flags & SECTION_WRITE)
21352 {
21353 if (flags & SECTION_BSS)
21354 smclass = 3;
21355 else
21356 smclass = 2;
21357 }
21358 else
21359 smclass = 1;
21360
21361 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
21362 (flags & SECTION_CODE) ? "." : "",
21363 name, suffix[smclass], flags & SECTION_ENTSIZE);
21364 }
21365
21366 #define IN_NAMED_SECTION(DECL) \
21367 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
21368 && DECL_SECTION_NAME (DECL) != NULL)
21369
21370 static section *
21371 rs6000_xcoff_select_section (tree decl, int reloc,
21372 unsigned HOST_WIDE_INT align)
21373 {
21374 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
21375 named section. */
21376 if (align > BIGGEST_ALIGNMENT && VAR_OR_FUNCTION_DECL_P (decl))
21377 {
21378 resolve_unique_section (decl, reloc, true);
21379 if (IN_NAMED_SECTION (decl))
21380 return get_named_section (decl, NULL, reloc);
21381 }
21382
21383 if (decl_readonly_section (decl, reloc))
21384 {
21385 if (TREE_PUBLIC (decl))
21386 return read_only_data_section;
21387 else
21388 return read_only_private_data_section;
21389 }
21390 else
21391 {
21392 #if HAVE_AS_TLS
21393 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
21394 {
21395 if (bss_initializer_p (decl))
21396 return tls_comm_section;
21397 else if (TREE_PUBLIC (decl))
21398 return tls_data_section;
21399 else
21400 return tls_private_data_section;
21401 }
21402 else
21403 #endif
21404 if (TREE_PUBLIC (decl))
21405 return data_section;
21406 else
21407 return private_data_section;
21408 }
21409 }
21410
21411 static void
21412 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
21413 {
21414 const char *name;
21415
21416 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
21417 name = (*targetm.strip_name_encoding) (name);
21418 set_decl_section_name (decl, name);
21419 }
21420
21421 /* Select section for constant in constant pool.
21422
21423 On RS/6000, all constants are in the private read-only data area.
21424 However, if this is being placed in the TOC it must be output as a
21425 toc entry. */
21426
21427 static section *
21428 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
21429 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
21430 {
21431 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
21432 return toc_section;
21433 else
21434 return read_only_private_data_section;
21435 }
21436
21437 /* Remove any trailing [DS] or the like from the symbol name. */
21438
21439 static const char *
21440 rs6000_xcoff_strip_name_encoding (const char *name)
21441 {
21442 size_t len;
21443 if (*name == '*')
21444 name++;
21445 len = strlen (name);
21446 if (name[len - 1] == ']')
21447 return ggc_alloc_string (name, len - 4);
21448 else
21449 return name;
21450 }
21451
21452 /* Section attributes. AIX is always PIC. */
21453
21454 static unsigned int
21455 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
21456 {
21457 unsigned int align;
21458 unsigned int flags = default_section_type_flags (decl, name, reloc);
21459
21460 if (decl && DECL_P (decl) && VAR_P (decl) && bss_initializer_p (decl))
21461 flags |= SECTION_BSS;
21462
21463 /* Align to at least UNIT size. */
21464 if (!decl || !DECL_P (decl))
21465 align = MIN_UNITS_PER_WORD;
21466 /* Align code CSECT to at least 32 bytes. */
21467 else if ((flags & SECTION_CODE) != 0)
21468 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT), 32);
21469 else
21470 /* Increase alignment of large objects if not already stricter. */
21471 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
21472 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
21473 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
21474
21475 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
21476 }
21477
21478 /* Output at beginning of assembler file.
21479
21480 Initialize the section names for the RS/6000 at this point.
21481
21482 Specify filename, including full path, to assembler.
21483
21484 We want to go into the TOC section so at least one .toc will be emitted.
21485 Also, in order to output proper .bs/.es pairs, we need at least one static
21486 [RW] section emitted.
21487
21488 Finally, declare mcount when profiling to make the assembler happy. */
21489
21490 static void
21491 rs6000_xcoff_file_start (void)
21492 {
21493 rs6000_gen_section_name (&xcoff_bss_section_name,
21494 main_input_filename, ".bss_");
21495 rs6000_gen_section_name (&xcoff_private_data_section_name,
21496 main_input_filename, ".rw_");
21497 rs6000_gen_section_name (&xcoff_private_rodata_section_name,
21498 main_input_filename, ".rop_");
21499 rs6000_gen_section_name (&xcoff_read_only_section_name,
21500 main_input_filename, ".ro_");
21501 rs6000_gen_section_name (&xcoff_tls_data_section_name,
21502 main_input_filename, ".tls_");
21503
21504 fputs ("\t.file\t", asm_out_file);
21505 output_quoted_string (asm_out_file, main_input_filename);
21506 fputc ('\n', asm_out_file);
21507 if (write_symbols != NO_DEBUG)
21508 switch_to_section (private_data_section);
21509 switch_to_section (toc_section);
21510 switch_to_section (text_section);
21511 if (profile_flag)
21512 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
21513 rs6000_file_start ();
21514 }
21515
21516 /* Output at end of assembler file.
21517 On the RS/6000, referencing data should automatically pull in text. */
21518
21519 static void
21520 rs6000_xcoff_file_end (void)
21521 {
21522 switch_to_section (text_section);
21523 if (xcoff_tls_exec_model_detected)
21524 {
21525 /* Add a .ref to __tls_get_addr to force libpthread dependency. */
21526 fputs ("\t.extern __tls_get_addr\n\t.ref __tls_get_addr\n", asm_out_file);
21527 }
21528 fputs ("_section_.text:\n", asm_out_file);
21529 switch_to_section (data_section);
21530 fputs (TARGET_32BIT
21531 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
21532 asm_out_file);
21533
21534 }
21535
21536 struct declare_alias_data
21537 {
21538 FILE *file;
21539 bool function_descriptor;
21540 };
21541
21542 /* Declare alias N. A helper function for for_node_and_aliases. */
21543
21544 static bool
21545 rs6000_declare_alias (struct symtab_node *n, void *d)
21546 {
21547 struct declare_alias_data *data = (struct declare_alias_data *)d;
21548 /* Main symbol is output specially, because varasm machinery does part of
21549 the job for us - we do not need to declare .globl/lglobs and such. */
21550 if (!n->alias || n->weakref)
21551 return false;
21552
21553 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
21554 return false;
21555
21556 /* Prevent assemble_alias from trying to use .set pseudo operation
21557 that does not behave as expected by the middle-end. */
21558 TREE_ASM_WRITTEN (n->decl) = true;
21559
21560 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
21561 char *buffer = (char *) alloca (strlen (name) + 2);
21562 char *p;
21563 int dollar_inside = 0;
21564
21565 strcpy (buffer, name);
21566 p = strchr (buffer, '$');
21567 while (p) {
21568 *p = '_';
21569 dollar_inside++;
21570 p = strchr (p + 1, '$');
21571 }
21572 if (TREE_PUBLIC (n->decl))
21573 {
21574 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
21575 {
21576 if (dollar_inside) {
21577 if (data->function_descriptor)
21578 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
21579 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
21580 }
21581 if (data->function_descriptor)
21582 {
21583 fputs ("\t.globl .", data->file);
21584 RS6000_OUTPUT_BASENAME (data->file, buffer);
21585 putc ('\n', data->file);
21586 }
21587 fputs ("\t.globl ", data->file);
21588 assemble_name (data->file, buffer);
21589 putc ('\n', data->file);
21590 }
21591 #ifdef ASM_WEAKEN_DECL
21592 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
21593 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
21594 #endif
21595 }
21596 else
21597 {
21598 if (dollar_inside)
21599 {
21600 if (data->function_descriptor)
21601 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
21602 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
21603 }
21604 if (data->function_descriptor)
21605 {
21606 fputs ("\t.lglobl .", data->file);
21607 RS6000_OUTPUT_BASENAME (data->file, buffer);
21608 putc ('\n', data->file);
21609 }
21610 fputs ("\t.lglobl ", data->file);
21611 assemble_name (data->file, buffer);
21612 putc ('\n', data->file);
21613 }
21614 if (data->function_descriptor)
21615 putc ('.', data->file);
21616 ASM_OUTPUT_LABEL (data->file, buffer);
21617 return false;
21618 }
21619
21620
21621 #ifdef HAVE_GAS_HIDDEN
21622 /* Helper function to calculate visibility of a DECL
21623 and return the value as a const string. */
21624
21625 static const char *
21626 rs6000_xcoff_visibility (tree decl)
21627 {
21628 static const char * const visibility_types[] = {
21629 "", ",protected", ",hidden", ",internal"
21630 };
21631
21632 enum symbol_visibility vis = DECL_VISIBILITY (decl);
21633 return visibility_types[vis];
21634 }
21635 #endif
21636
21637
21638 /* This macro produces the initial definition of a function name.
21639 On the RS/6000, we need to place an extra '.' in the function name and
21640 output the function descriptor.
21641 Dollar signs are converted to underscores.
21642
21643 The csect for the function will have already been created when
21644 text_section was selected. We do have to go back to that csect, however.
21645
21646 The third and fourth parameters to the .function pseudo-op (16 and 044)
21647 are placeholders which no longer have any use.
21648
21649 Because AIX assembler's .set command has unexpected semantics, we output
21650 all aliases as alternative labels in front of the definition. */
21651
21652 void
21653 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
21654 {
21655 char *buffer = (char *) alloca (strlen (name) + 1);
21656 char *p;
21657 int dollar_inside = 0;
21658 struct declare_alias_data data = {file, false};
21659
21660 strcpy (buffer, name);
21661 p = strchr (buffer, '$');
21662 while (p) {
21663 *p = '_';
21664 dollar_inside++;
21665 p = strchr (p + 1, '$');
21666 }
21667 if (TREE_PUBLIC (decl))
21668 {
21669 if (!RS6000_WEAK || !DECL_WEAK (decl))
21670 {
21671 if (dollar_inside) {
21672 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
21673 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
21674 }
21675 fputs ("\t.globl .", file);
21676 RS6000_OUTPUT_BASENAME (file, buffer);
21677 #ifdef HAVE_GAS_HIDDEN
21678 fputs (rs6000_xcoff_visibility (decl), file);
21679 #endif
21680 putc ('\n', file);
21681 }
21682 }
21683 else
21684 {
21685 if (dollar_inside) {
21686 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
21687 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
21688 }
21689 fputs ("\t.lglobl .", file);
21690 RS6000_OUTPUT_BASENAME (file, buffer);
21691 putc ('\n', file);
21692 }
21693
21694 fputs ("\t.csect ", file);
21695 assemble_name (file, buffer);
21696 fputs (TARGET_32BIT ? "\n" : ",3\n", file);
21697
21698 ASM_OUTPUT_LABEL (file, buffer);
21699
21700 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21701 &data, true);
21702 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
21703 RS6000_OUTPUT_BASENAME (file, buffer);
21704 fputs (", TOC[tc0], 0\n", file);
21705
21706 in_section = NULL;
21707 switch_to_section (function_section (decl));
21708 putc ('.', file);
21709 ASM_OUTPUT_LABEL (file, buffer);
21710
21711 data.function_descriptor = true;
21712 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21713 &data, true);
21714 if (!DECL_IGNORED_P (decl))
21715 {
21716 if (dwarf_debuginfo_p ())
21717 {
21718 name = (*targetm.strip_name_encoding) (name);
21719 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
21720 }
21721 }
21722 return;
21723 }
21724
21725
21726 /* Output assembly language to globalize a symbol from a DECL,
21727 possibly with visibility. */
21728
21729 void
21730 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
21731 {
21732 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
21733 fputs (GLOBAL_ASM_OP, stream);
21734 assemble_name (stream, name);
21735 #ifdef HAVE_GAS_HIDDEN
21736 fputs (rs6000_xcoff_visibility (decl), stream);
21737 #endif
21738 putc ('\n', stream);
21739 }
21740
21741 /* Output assembly language to define a symbol as COMMON from a DECL,
21742 possibly with visibility. */
21743
21744 void
21745 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
21746 tree decl ATTRIBUTE_UNUSED,
21747 const char *name,
21748 unsigned HOST_WIDE_INT size,
21749 unsigned int align)
21750 {
21751 unsigned int align2 = 2;
21752
21753 if (align == 0)
21754 align = DATA_ABI_ALIGNMENT (TREE_TYPE (decl), DECL_ALIGN (decl));
21755
21756 if (align > 32)
21757 align2 = floor_log2 (align / BITS_PER_UNIT);
21758 else if (size > 4)
21759 align2 = 3;
21760
21761 if (! DECL_COMMON (decl))
21762 {
21763 /* Forget section. */
21764 in_section = NULL;
21765
21766 /* Globalize TLS BSS. */
21767 if (TREE_PUBLIC (decl) && DECL_THREAD_LOCAL_P (decl))
21768 {
21769 fputs (GLOBAL_ASM_OP, stream);
21770 assemble_name (stream, name);
21771 fputc ('\n', stream);
21772 }
21773
21774 /* Switch to section and skip space. */
21775 fputs ("\t.csect ", stream);
21776 assemble_name (stream, name);
21777 fprintf (stream, ",%u\n", align2);
21778 ASM_DECLARE_OBJECT_NAME (stream, name, decl);
21779 ASM_OUTPUT_SKIP (stream, size ? size : 1);
21780 return;
21781 }
21782
21783 if (TREE_PUBLIC (decl))
21784 {
21785 fprintf (stream,
21786 "\t.comm %s," HOST_WIDE_INT_PRINT_UNSIGNED ",%u" ,
21787 name, size, align2);
21788
21789 #ifdef HAVE_GAS_HIDDEN
21790 if (decl != NULL)
21791 fputs (rs6000_xcoff_visibility (decl), stream);
21792 #endif
21793 putc ('\n', stream);
21794 }
21795 else
21796 fprintf (stream,
21797 "\t.lcomm %s," HOST_WIDE_INT_PRINT_UNSIGNED ",%s,%u\n",
21798 (*targetm.strip_name_encoding) (name), size, name, align2);
21799 }
21800
21801 /* This macro produces the initial definition of a object (variable) name.
21802 Because AIX assembler's .set command has unexpected semantics, we output
21803 all aliases as alternative labels in front of the definition. */
21804
21805 void
21806 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
21807 {
21808 struct declare_alias_data data = {file, false};
21809 ASM_OUTPUT_LABEL (file, name);
21810 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21811 &data, true);
21812 }
21813
21814 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
21815
21816 void
21817 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
21818 {
21819 fputs (integer_asm_op (size, FALSE), file);
21820 assemble_name (file, label);
21821 fputs ("-$", file);
21822 }
21823
21824 /* Output a symbol offset relative to the dbase for the current object.
21825 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
21826 signed offsets.
21827
21828 __gcc_unwind_dbase is embedded in all executables/libraries through
21829 libgcc/config/rs6000/crtdbase.S. */
21830
21831 void
21832 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
21833 {
21834 fputs (integer_asm_op (size, FALSE), file);
21835 assemble_name (file, label);
21836 fputs("-__gcc_unwind_dbase", file);
21837 }
21838
21839 #ifdef HAVE_AS_TLS
21840 static void
21841 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
21842 {
21843 rtx symbol;
21844 int flags;
21845 const char *symname;
21846
21847 default_encode_section_info (decl, rtl, first);
21848
21849 /* Careful not to prod global register variables. */
21850 if (!MEM_P (rtl))
21851 return;
21852 symbol = XEXP (rtl, 0);
21853 if (!SYMBOL_REF_P (symbol))
21854 return;
21855
21856 flags = SYMBOL_REF_FLAGS (symbol);
21857
21858 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
21859 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
21860
21861 SYMBOL_REF_FLAGS (symbol) = flags;
21862
21863 symname = XSTR (symbol, 0);
21864
21865 /* Append CSECT mapping class, unless the symbol already is qualified.
21866 Aliases are implemented as labels, so the symbol name should not add
21867 a mapping class. */
21868 if (decl
21869 && DECL_P (decl)
21870 && VAR_OR_FUNCTION_DECL_P (decl)
21871 && (symtab_node::get (decl) == NULL
21872 || symtab_node::get (decl)->alias == 0)
21873 && symname[strlen (symname) - 1] != ']')
21874 {
21875 const char *smclass = NULL;
21876
21877 if (TREE_CODE (decl) == FUNCTION_DECL)
21878 smclass = "[DS]";
21879 else if (DECL_THREAD_LOCAL_P (decl))
21880 {
21881 if (bss_initializer_p (decl))
21882 smclass = "[UL]";
21883 else if (flag_data_sections)
21884 smclass = "[TL]";
21885 }
21886 else if (DECL_EXTERNAL (decl))
21887 smclass = "[UA]";
21888 else if (bss_initializer_p (decl))
21889 smclass = "[BS]";
21890 else if (flag_data_sections)
21891 {
21892 /* This must exactly match the logic of select section. */
21893 if (decl_readonly_section (decl, compute_reloc_for_var (decl)))
21894 smclass = "[RO]";
21895 else
21896 smclass = "[RW]";
21897 }
21898
21899 if (smclass != NULL)
21900 {
21901 char *newname = XALLOCAVEC (char, strlen (symname) + 5);
21902
21903 strcpy (newname, symname);
21904 strcat (newname, smclass);
21905 XSTR (symbol, 0) = ggc_strdup (newname);
21906 }
21907 }
21908 }
21909 #endif /* HAVE_AS_TLS */
21910 #endif /* TARGET_XCOFF */
21911
21912 void
21913 rs6000_asm_weaken_decl (FILE *stream, tree decl,
21914 const char *name, const char *val)
21915 {
21916 fputs ("\t.weak\t", stream);
21917 assemble_name (stream, name);
21918 if (decl && TREE_CODE (decl) == FUNCTION_DECL
21919 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
21920 {
21921 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
21922 if (TARGET_XCOFF)
21923 fputs (rs6000_xcoff_visibility (decl), stream);
21924 #endif
21925 fputs ("\n\t.weak\t.", stream);
21926 RS6000_OUTPUT_BASENAME (stream, name);
21927 }
21928 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
21929 if (TARGET_XCOFF)
21930 fputs (rs6000_xcoff_visibility (decl), stream);
21931 #endif
21932 fputc ('\n', stream);
21933
21934 if (val)
21935 {
21936 #ifdef ASM_OUTPUT_DEF
21937 ASM_OUTPUT_DEF (stream, name, val);
21938 #endif
21939 if (decl && TREE_CODE (decl) == FUNCTION_DECL
21940 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
21941 {
21942 fputs ("\t.set\t.", stream);
21943 RS6000_OUTPUT_BASENAME (stream, name);
21944 fputs (",.", stream);
21945 RS6000_OUTPUT_BASENAME (stream, val);
21946 fputc ('\n', stream);
21947 }
21948 }
21949 }
21950
21951
21952 /* Return true if INSN should not be copied. */
21953
21954 static bool
21955 rs6000_cannot_copy_insn_p (rtx_insn *insn)
21956 {
21957 return recog_memoized (insn) >= 0
21958 && get_attr_cannot_copy (insn);
21959 }
21960
21961 /* Compute a (partial) cost for rtx X. Return true if the complete
21962 cost has been computed, and false if subexpressions should be
21963 scanned. In either case, *TOTAL contains the cost result. */
21964
21965 static bool
21966 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
21967 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
21968 {
21969 int code = GET_CODE (x);
21970
21971 switch (code)
21972 {
21973 /* On the RS/6000, if it is valid in the insn, it is free. */
21974 case CONST_INT:
21975 if (((outer_code == SET
21976 || outer_code == PLUS
21977 || outer_code == MINUS)
21978 && (satisfies_constraint_I (x)
21979 || satisfies_constraint_L (x)))
21980 || (outer_code == AND
21981 && (satisfies_constraint_K (x)
21982 || (mode == SImode
21983 ? satisfies_constraint_L (x)
21984 : satisfies_constraint_J (x))))
21985 || ((outer_code == IOR || outer_code == XOR)
21986 && (satisfies_constraint_K (x)
21987 || (mode == SImode
21988 ? satisfies_constraint_L (x)
21989 : satisfies_constraint_J (x))))
21990 || outer_code == ASHIFT
21991 || outer_code == ASHIFTRT
21992 || outer_code == LSHIFTRT
21993 || outer_code == ROTATE
21994 || outer_code == ROTATERT
21995 || outer_code == ZERO_EXTRACT
21996 || (outer_code == MULT
21997 && satisfies_constraint_I (x))
21998 || ((outer_code == DIV || outer_code == UDIV
21999 || outer_code == MOD || outer_code == UMOD)
22000 && exact_log2 (INTVAL (x)) >= 0)
22001 || (outer_code == COMPARE
22002 && (satisfies_constraint_I (x)
22003 || satisfies_constraint_K (x)))
22004 || ((outer_code == EQ || outer_code == NE)
22005 && (satisfies_constraint_I (x)
22006 || satisfies_constraint_K (x)
22007 || (mode == SImode
22008 ? satisfies_constraint_L (x)
22009 : satisfies_constraint_J (x))))
22010 || (outer_code == GTU
22011 && satisfies_constraint_I (x))
22012 || (outer_code == LTU
22013 && satisfies_constraint_P (x)))
22014 {
22015 *total = 0;
22016 return true;
22017 }
22018 else if ((outer_code == PLUS
22019 && reg_or_add_cint_operand (x, mode))
22020 || (outer_code == MINUS
22021 && reg_or_sub_cint_operand (x, mode))
22022 || ((outer_code == SET
22023 || outer_code == IOR
22024 || outer_code == XOR)
22025 && (INTVAL (x)
22026 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
22027 {
22028 *total = COSTS_N_INSNS (1);
22029 return true;
22030 }
22031 /* FALLTHRU */
22032
22033 case CONST_DOUBLE:
22034 case CONST_WIDE_INT:
22035 case CONST:
22036 case HIGH:
22037 case SYMBOL_REF:
22038 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
22039 return true;
22040
22041 case MEM:
22042 /* When optimizing for size, MEM should be slightly more expensive
22043 than generating address, e.g., (plus (reg) (const)).
22044 L1 cache latency is about two instructions. */
22045 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
22046 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
22047 *total += COSTS_N_INSNS (100);
22048 return true;
22049
22050 case LABEL_REF:
22051 *total = 0;
22052 return true;
22053
22054 case PLUS:
22055 case MINUS:
22056 if (FLOAT_MODE_P (mode))
22057 *total = rs6000_cost->fp;
22058 else
22059 *total = COSTS_N_INSNS (1);
22060 return false;
22061
22062 case MULT:
22063 if (CONST_INT_P (XEXP (x, 1))
22064 && satisfies_constraint_I (XEXP (x, 1)))
22065 {
22066 if (INTVAL (XEXP (x, 1)) >= -256
22067 && INTVAL (XEXP (x, 1)) <= 255)
22068 *total = rs6000_cost->mulsi_const9;
22069 else
22070 *total = rs6000_cost->mulsi_const;
22071 }
22072 else if (mode == SFmode)
22073 *total = rs6000_cost->fp;
22074 else if (FLOAT_MODE_P (mode))
22075 *total = rs6000_cost->dmul;
22076 else if (mode == DImode)
22077 *total = rs6000_cost->muldi;
22078 else
22079 *total = rs6000_cost->mulsi;
22080 return false;
22081
22082 case FMA:
22083 if (mode == SFmode)
22084 *total = rs6000_cost->fp;
22085 else
22086 *total = rs6000_cost->dmul;
22087 break;
22088
22089 case DIV:
22090 case MOD:
22091 if (FLOAT_MODE_P (mode))
22092 {
22093 *total = mode == DFmode ? rs6000_cost->ddiv
22094 : rs6000_cost->sdiv;
22095 return false;
22096 }
22097 /* FALLTHRU */
22098
22099 case UDIV:
22100 case UMOD:
22101 if (CONST_INT_P (XEXP (x, 1))
22102 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
22103 {
22104 if (code == DIV || code == MOD)
22105 /* Shift, addze */
22106 *total = COSTS_N_INSNS (2);
22107 else
22108 /* Shift */
22109 *total = COSTS_N_INSNS (1);
22110 }
22111 else
22112 {
22113 if (GET_MODE (XEXP (x, 1)) == DImode)
22114 *total = rs6000_cost->divdi;
22115 else
22116 *total = rs6000_cost->divsi;
22117 }
22118 /* Add in shift and subtract for MOD unless we have a mod instruction. */
22119 if (!TARGET_MODULO && (code == MOD || code == UMOD))
22120 *total += COSTS_N_INSNS (2);
22121 return false;
22122
22123 case CTZ:
22124 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
22125 return false;
22126
22127 case FFS:
22128 *total = COSTS_N_INSNS (4);
22129 return false;
22130
22131 case POPCOUNT:
22132 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
22133 return false;
22134
22135 case PARITY:
22136 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
22137 return false;
22138
22139 case NOT:
22140 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
22141 *total = 0;
22142 else
22143 *total = COSTS_N_INSNS (1);
22144 return false;
22145
22146 case AND:
22147 if (CONST_INT_P (XEXP (x, 1)))
22148 {
22149 rtx left = XEXP (x, 0);
22150 rtx_code left_code = GET_CODE (left);
22151
22152 /* rotate-and-mask: 1 insn. */
22153 if ((left_code == ROTATE
22154 || left_code == ASHIFT
22155 || left_code == LSHIFTRT)
22156 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
22157 {
22158 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
22159 if (!CONST_INT_P (XEXP (left, 1)))
22160 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
22161 *total += COSTS_N_INSNS (1);
22162 return true;
22163 }
22164
22165 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
22166 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
22167 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
22168 || (val & 0xffff) == val
22169 || (val & 0xffff0000) == val
22170 || ((val & 0xffff) == 0 && mode == SImode))
22171 {
22172 *total = rtx_cost (left, mode, AND, 0, speed);
22173 *total += COSTS_N_INSNS (1);
22174 return true;
22175 }
22176
22177 /* 2 insns. */
22178 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
22179 {
22180 *total = rtx_cost (left, mode, AND, 0, speed);
22181 *total += COSTS_N_INSNS (2);
22182 return true;
22183 }
22184 }
22185
22186 *total = COSTS_N_INSNS (1);
22187 return false;
22188
22189 case IOR:
22190 /* FIXME */
22191 *total = COSTS_N_INSNS (1);
22192 return true;
22193
22194 case CLZ:
22195 case XOR:
22196 case ZERO_EXTRACT:
22197 *total = COSTS_N_INSNS (1);
22198 return false;
22199
22200 case ASHIFT:
22201 /* The EXTSWSLI instruction is a combined instruction. Don't count both
22202 the sign extend and shift separately within the insn. */
22203 if (TARGET_EXTSWSLI && mode == DImode
22204 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
22205 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
22206 {
22207 *total = 0;
22208 return false;
22209 }
22210 /* fall through */
22211
22212 case ASHIFTRT:
22213 case LSHIFTRT:
22214 case ROTATE:
22215 case ROTATERT:
22216 /* Handle mul_highpart. */
22217 if (outer_code == TRUNCATE
22218 && GET_CODE (XEXP (x, 0)) == MULT)
22219 {
22220 if (mode == DImode)
22221 *total = rs6000_cost->muldi;
22222 else
22223 *total = rs6000_cost->mulsi;
22224 return true;
22225 }
22226 else if (outer_code == AND)
22227 *total = 0;
22228 else
22229 *total = COSTS_N_INSNS (1);
22230 return false;
22231
22232 case SIGN_EXTEND:
22233 case ZERO_EXTEND:
22234 if (MEM_P (XEXP (x, 0)))
22235 *total = 0;
22236 else
22237 *total = COSTS_N_INSNS (1);
22238 return false;
22239
22240 case COMPARE:
22241 case NEG:
22242 case ABS:
22243 if (!FLOAT_MODE_P (mode))
22244 {
22245 *total = COSTS_N_INSNS (1);
22246 return false;
22247 }
22248 /* FALLTHRU */
22249
22250 case FLOAT:
22251 case UNSIGNED_FLOAT:
22252 case FIX:
22253 case UNSIGNED_FIX:
22254 case FLOAT_TRUNCATE:
22255 *total = rs6000_cost->fp;
22256 return false;
22257
22258 case FLOAT_EXTEND:
22259 if (mode == DFmode)
22260 *total = rs6000_cost->sfdf_convert;
22261 else
22262 *total = rs6000_cost->fp;
22263 return false;
22264
22265 case CALL:
22266 case IF_THEN_ELSE:
22267 if (!speed)
22268 {
22269 *total = COSTS_N_INSNS (1);
22270 return true;
22271 }
22272 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
22273 {
22274 *total = rs6000_cost->fp;
22275 return false;
22276 }
22277 break;
22278
22279 case NE:
22280 case EQ:
22281 case GTU:
22282 case LTU:
22283 /* Carry bit requires mode == Pmode.
22284 NEG or PLUS already counted so only add one. */
22285 if (mode == Pmode
22286 && (outer_code == NEG || outer_code == PLUS))
22287 {
22288 *total = COSTS_N_INSNS (1);
22289 return true;
22290 }
22291 /* FALLTHRU */
22292
22293 case GT:
22294 case LT:
22295 case UNORDERED:
22296 if (outer_code == SET)
22297 {
22298 if (XEXP (x, 1) == const0_rtx)
22299 {
22300 *total = COSTS_N_INSNS (2);
22301 return true;
22302 }
22303 else
22304 {
22305 *total = COSTS_N_INSNS (3);
22306 return false;
22307 }
22308 }
22309 /* CC COMPARE. */
22310 if (outer_code == COMPARE)
22311 {
22312 *total = 0;
22313 return true;
22314 }
22315 break;
22316
22317 case UNSPEC:
22318 if (XINT (x, 1) == UNSPECV_MMA_XXSETACCZ)
22319 {
22320 *total = 0;
22321 return true;
22322 }
22323 break;
22324
22325 default:
22326 break;
22327 }
22328
22329 return false;
22330 }
22331
22332 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
22333
22334 static bool
22335 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
22336 int opno, int *total, bool speed)
22337 {
22338 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
22339
22340 fprintf (stderr,
22341 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
22342 "opno = %d, total = %d, speed = %s, x:\n",
22343 ret ? "complete" : "scan inner",
22344 GET_MODE_NAME (mode),
22345 GET_RTX_NAME (outer_code),
22346 opno,
22347 *total,
22348 speed ? "true" : "false");
22349
22350 debug_rtx (x);
22351
22352 return ret;
22353 }
22354
22355 static int
22356 rs6000_insn_cost (rtx_insn *insn, bool speed)
22357 {
22358 if (recog_memoized (insn) < 0)
22359 return 0;
22360
22361 /* If we are optimizing for size, just use the length. */
22362 if (!speed)
22363 return get_attr_length (insn);
22364
22365 /* Use the cost if provided. */
22366 int cost = get_attr_cost (insn);
22367 if (cost > 0)
22368 return cost;
22369
22370 /* If the insn tells us how many insns there are, use that. Otherwise use
22371 the length/4. Adjust the insn length to remove the extra size that
22372 prefixed instructions take. */
22373 int n = get_attr_num_insns (insn);
22374 if (n == 0)
22375 {
22376 int length = get_attr_length (insn);
22377 if (get_attr_prefixed (insn) == PREFIXED_YES)
22378 {
22379 int adjust = 0;
22380 ADJUST_INSN_LENGTH (insn, adjust);
22381 length -= adjust;
22382 }
22383
22384 n = length / 4;
22385 }
22386
22387 enum attr_type type = get_attr_type (insn);
22388
22389 switch (type)
22390 {
22391 case TYPE_LOAD:
22392 case TYPE_FPLOAD:
22393 case TYPE_VECLOAD:
22394 cost = COSTS_N_INSNS (n + 1);
22395 break;
22396
22397 case TYPE_MUL:
22398 switch (get_attr_size (insn))
22399 {
22400 case SIZE_8:
22401 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
22402 break;
22403 case SIZE_16:
22404 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
22405 break;
22406 case SIZE_32:
22407 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
22408 break;
22409 case SIZE_64:
22410 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
22411 break;
22412 default:
22413 gcc_unreachable ();
22414 }
22415 break;
22416 case TYPE_DIV:
22417 switch (get_attr_size (insn))
22418 {
22419 case SIZE_32:
22420 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
22421 break;
22422 case SIZE_64:
22423 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
22424 break;
22425 default:
22426 gcc_unreachable ();
22427 }
22428 break;
22429
22430 case TYPE_FP:
22431 cost = n * rs6000_cost->fp;
22432 break;
22433 case TYPE_DMUL:
22434 cost = n * rs6000_cost->dmul;
22435 break;
22436 case TYPE_SDIV:
22437 cost = n * rs6000_cost->sdiv;
22438 break;
22439 case TYPE_DDIV:
22440 cost = n * rs6000_cost->ddiv;
22441 break;
22442
22443 case TYPE_SYNC:
22444 case TYPE_LOAD_L:
22445 case TYPE_MFCR:
22446 case TYPE_MFCRF:
22447 cost = COSTS_N_INSNS (n + 2);
22448 break;
22449
22450 default:
22451 cost = COSTS_N_INSNS (n);
22452 }
22453
22454 return cost;
22455 }
22456
22457 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
22458
22459 static int
22460 rs6000_debug_address_cost (rtx x, machine_mode mode,
22461 addr_space_t as, bool speed)
22462 {
22463 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
22464
22465 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
22466 ret, speed ? "true" : "false");
22467 debug_rtx (x);
22468
22469 return ret;
22470 }
22471
22472
22473 /* A C expression returning the cost of moving data from a register of class
22474 CLASS1 to one of CLASS2. */
22475
22476 static int
22477 rs6000_register_move_cost (machine_mode mode,
22478 reg_class_t from, reg_class_t to)
22479 {
22480 int ret;
22481 reg_class_t rclass;
22482
22483 if (TARGET_DEBUG_COST)
22484 dbg_cost_ctrl++;
22485
22486 /* If we have VSX, we can easily move between FPR or Altivec registers,
22487 otherwise we can only easily move within classes.
22488 Do this first so we give best-case answers for union classes
22489 containing both gprs and vsx regs. */
22490 HARD_REG_SET to_vsx, from_vsx;
22491 to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS];
22492 from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS];
22493 if (!hard_reg_set_empty_p (to_vsx)
22494 && !hard_reg_set_empty_p (from_vsx)
22495 && (TARGET_VSX
22496 || hard_reg_set_intersect_p (to_vsx, from_vsx)))
22497 {
22498 int reg = FIRST_FPR_REGNO;
22499 if (TARGET_VSX
22500 || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
22501 && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
22502 reg = FIRST_ALTIVEC_REGNO;
22503 ret = 2 * hard_regno_nregs (reg, mode);
22504 }
22505
22506 /* Moves from/to GENERAL_REGS. */
22507 else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
22508 || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
22509 {
22510 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
22511 {
22512 if (TARGET_DIRECT_MOVE)
22513 {
22514 /* Keep the cost for direct moves above that for within
22515 a register class even if the actual processor cost is
22516 comparable. We do this because a direct move insn
22517 can't be a nop, whereas with ideal register
22518 allocation a move within the same class might turn
22519 out to be a nop. */
22520 if (rs6000_tune == PROCESSOR_POWER9
22521 || rs6000_tune == PROCESSOR_POWER10)
22522 ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22523 else
22524 ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22525 /* SFmode requires a conversion when moving between gprs
22526 and vsx. */
22527 if (mode == SFmode)
22528 ret += 2;
22529 }
22530 else
22531 ret = (rs6000_memory_move_cost (mode, rclass, false)
22532 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
22533 }
22534
22535 /* It's more expensive to move CR_REGS than CR0_REGS because of the
22536 shift. */
22537 else if (rclass == CR_REGS)
22538 ret = 4;
22539
22540 /* For those processors that have slow LR/CTR moves, make them more
22541 expensive than memory in order to bias spills to memory .*/
22542 else if ((rs6000_tune == PROCESSOR_POWER6
22543 || rs6000_tune == PROCESSOR_POWER7
22544 || rs6000_tune == PROCESSOR_POWER8
22545 || rs6000_tune == PROCESSOR_POWER9)
22546 && reg_class_subset_p (rclass, SPECIAL_REGS))
22547 ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22548
22549 else
22550 /* A move will cost one instruction per GPR moved. */
22551 ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22552 }
22553
22554 /* Everything else has to go through GENERAL_REGS. */
22555 else
22556 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
22557 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
22558
22559 if (TARGET_DEBUG_COST)
22560 {
22561 if (dbg_cost_ctrl == 1)
22562 fprintf (stderr,
22563 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
22564 ret, GET_MODE_NAME (mode), reg_class_names[from],
22565 reg_class_names[to]);
22566 dbg_cost_ctrl--;
22567 }
22568
22569 return ret;
22570 }
22571
22572 /* A C expressions returning the cost of moving data of MODE from a register to
22573 or from memory. */
22574
22575 static int
22576 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
22577 bool in ATTRIBUTE_UNUSED)
22578 {
22579 int ret;
22580
22581 if (TARGET_DEBUG_COST)
22582 dbg_cost_ctrl++;
22583
22584 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
22585 ret = 4 * hard_regno_nregs (0, mode);
22586 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
22587 || reg_classes_intersect_p (rclass, VSX_REGS)))
22588 ret = 4 * hard_regno_nregs (32, mode);
22589 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
22590 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
22591 else
22592 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
22593
22594 if (TARGET_DEBUG_COST)
22595 {
22596 if (dbg_cost_ctrl == 1)
22597 fprintf (stderr,
22598 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
22599 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
22600 dbg_cost_ctrl--;
22601 }
22602
22603 return ret;
22604 }
22605
22606 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
22607
22608 The register allocator chooses GEN_OR_VSX_REGS for the allocno
22609 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
22610 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
22611 move cost between GENERAL_REGS and VSX_REGS low.
22612
22613 It might seem reasonable to use a union class. After all, if usage
22614 of vsr is low and gpr high, it might make sense to spill gpr to vsr
22615 rather than memory. However, in cases where register pressure of
22616 both is high, like the cactus_adm spec test, allowing
22617 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
22618 the first scheduling pass. This is partly due to an allocno of
22619 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
22620 class, which gives too high a pressure for GENERAL_REGS and too low
22621 for VSX_REGS. So, force a choice of the subclass here.
22622
22623 The best class is also the union if GENERAL_REGS and VSX_REGS have
22624 the same cost. In that case we do use GEN_OR_VSX_REGS as the
22625 allocno class, since trying to narrow down the class by regno mode
22626 is prone to error. For example, SImode is allowed in VSX regs and
22627 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
22628 it would be wrong to choose an allocno of GENERAL_REGS based on
22629 SImode. */
22630
22631 static reg_class_t
22632 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
22633 reg_class_t allocno_class,
22634 reg_class_t best_class)
22635 {
22636 switch (allocno_class)
22637 {
22638 case GEN_OR_VSX_REGS:
22639 /* best_class must be a subset of allocno_class. */
22640 gcc_checking_assert (best_class == GEN_OR_VSX_REGS
22641 || best_class == GEN_OR_FLOAT_REGS
22642 || best_class == VSX_REGS
22643 || best_class == ALTIVEC_REGS
22644 || best_class == FLOAT_REGS
22645 || best_class == GENERAL_REGS
22646 || best_class == BASE_REGS);
22647 /* Use best_class but choose wider classes when copying from the
22648 wider class to best_class is cheap. This mimics IRA choice
22649 of allocno class. */
22650 if (best_class == BASE_REGS)
22651 return GENERAL_REGS;
22652 if (TARGET_VSX && best_class == FLOAT_REGS)
22653 return VSX_REGS;
22654 return best_class;
22655
22656 case VSX_REGS:
22657 if (best_class == ALTIVEC_REGS)
22658 return ALTIVEC_REGS;
22659
22660 default:
22661 break;
22662 }
22663
22664 return allocno_class;
22665 }
22666
22667 /* Load up a constant. If the mode is a vector mode, splat the value across
22668 all of the vector elements. */
22669
22670 static rtx
22671 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
22672 {
22673 rtx reg;
22674
22675 if (mode == SFmode || mode == DFmode)
22676 {
22677 rtx d = const_double_from_real_value (dconst, mode);
22678 reg = force_reg (mode, d);
22679 }
22680 else if (mode == V4SFmode)
22681 {
22682 rtx d = const_double_from_real_value (dconst, SFmode);
22683 rtvec v = gen_rtvec (4, d, d, d, d);
22684 reg = gen_reg_rtx (mode);
22685 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
22686 }
22687 else if (mode == V2DFmode)
22688 {
22689 rtx d = const_double_from_real_value (dconst, DFmode);
22690 rtvec v = gen_rtvec (2, d, d);
22691 reg = gen_reg_rtx (mode);
22692 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
22693 }
22694 else
22695 gcc_unreachable ();
22696
22697 return reg;
22698 }
22699
22700 /* Generate an FMA instruction. */
22701
22702 static void
22703 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
22704 {
22705 machine_mode mode = GET_MODE (target);
22706 rtx dst;
22707
22708 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
22709 gcc_assert (dst != NULL);
22710
22711 if (dst != target)
22712 emit_move_insn (target, dst);
22713 }
22714
22715 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
22716
22717 static void
22718 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
22719 {
22720 machine_mode mode = GET_MODE (dst);
22721 rtx r;
22722
22723 /* This is a tad more complicated, since the fnma_optab is for
22724 a different expression: fma(-m1, m2, a), which is the same
22725 thing except in the case of signed zeros.
22726
22727 Fortunately we know that if FMA is supported that FNMSUB is
22728 also supported in the ISA. Just expand it directly. */
22729
22730 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
22731
22732 r = gen_rtx_NEG (mode, a);
22733 r = gen_rtx_FMA (mode, m1, m2, r);
22734 r = gen_rtx_NEG (mode, r);
22735 emit_insn (gen_rtx_SET (dst, r));
22736 }
22737
22738 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
22739 add a reg_note saying that this was a division. Support both scalar and
22740 vector divide. Assumes no trapping math and finite arguments. */
22741
22742 void
22743 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
22744 {
22745 machine_mode mode = GET_MODE (dst);
22746 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
22747 int i;
22748
22749 /* Low precision estimates guarantee 5 bits of accuracy. High
22750 precision estimates guarantee 14 bits of accuracy. SFmode
22751 requires 23 bits of accuracy. DFmode requires 52 bits of
22752 accuracy. Each pass at least doubles the accuracy, leading
22753 to the following. */
22754 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
22755 if (mode == DFmode || mode == V2DFmode)
22756 passes++;
22757
22758 enum insn_code code = optab_handler (smul_optab, mode);
22759 insn_gen_fn gen_mul = GEN_FCN (code);
22760
22761 gcc_assert (code != CODE_FOR_nothing);
22762
22763 one = rs6000_load_constant_and_splat (mode, dconst1);
22764
22765 /* x0 = 1./d estimate */
22766 x0 = gen_reg_rtx (mode);
22767 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
22768 UNSPEC_FRES)));
22769
22770 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
22771 if (passes > 1) {
22772
22773 /* e0 = 1. - d * x0 */
22774 e0 = gen_reg_rtx (mode);
22775 rs6000_emit_nmsub (e0, d, x0, one);
22776
22777 /* x1 = x0 + e0 * x0 */
22778 x1 = gen_reg_rtx (mode);
22779 rs6000_emit_madd (x1, e0, x0, x0);
22780
22781 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
22782 ++i, xprev = xnext, eprev = enext) {
22783
22784 /* enext = eprev * eprev */
22785 enext = gen_reg_rtx (mode);
22786 emit_insn (gen_mul (enext, eprev, eprev));
22787
22788 /* xnext = xprev + enext * xprev */
22789 xnext = gen_reg_rtx (mode);
22790 rs6000_emit_madd (xnext, enext, xprev, xprev);
22791 }
22792
22793 } else
22794 xprev = x0;
22795
22796 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
22797
22798 /* u = n * xprev */
22799 u = gen_reg_rtx (mode);
22800 emit_insn (gen_mul (u, n, xprev));
22801
22802 /* v = n - (d * u) */
22803 v = gen_reg_rtx (mode);
22804 rs6000_emit_nmsub (v, d, u, n);
22805
22806 /* dst = (v * xprev) + u */
22807 rs6000_emit_madd (dst, v, xprev, u);
22808
22809 if (note_p)
22810 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
22811 }
22812
22813 /* Goldschmidt's Algorithm for single/double-precision floating point
22814 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
22815
22816 void
22817 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
22818 {
22819 machine_mode mode = GET_MODE (src);
22820 rtx e = gen_reg_rtx (mode);
22821 rtx g = gen_reg_rtx (mode);
22822 rtx h = gen_reg_rtx (mode);
22823
22824 /* Low precision estimates guarantee 5 bits of accuracy. High
22825 precision estimates guarantee 14 bits of accuracy. SFmode
22826 requires 23 bits of accuracy. DFmode requires 52 bits of
22827 accuracy. Each pass at least doubles the accuracy, leading
22828 to the following. */
22829 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
22830 if (mode == DFmode || mode == V2DFmode)
22831 passes++;
22832
22833 int i;
22834 rtx mhalf;
22835 enum insn_code code = optab_handler (smul_optab, mode);
22836 insn_gen_fn gen_mul = GEN_FCN (code);
22837
22838 gcc_assert (code != CODE_FOR_nothing);
22839
22840 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
22841
22842 /* e = rsqrt estimate */
22843 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
22844 UNSPEC_RSQRT)));
22845
22846 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
22847 if (!recip)
22848 {
22849 rtx zero = force_reg (mode, CONST0_RTX (mode));
22850
22851 if (mode == SFmode)
22852 {
22853 rtx target = emit_conditional_move (e, { GT, src, zero, mode },
22854 e, zero, mode, 0);
22855 if (target != e)
22856 emit_move_insn (e, target);
22857 }
22858 else
22859 {
22860 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
22861 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
22862 }
22863 }
22864
22865 /* g = sqrt estimate. */
22866 emit_insn (gen_mul (g, e, src));
22867 /* h = 1/(2*sqrt) estimate. */
22868 emit_insn (gen_mul (h, e, mhalf));
22869
22870 if (recip)
22871 {
22872 if (passes == 1)
22873 {
22874 rtx t = gen_reg_rtx (mode);
22875 rs6000_emit_nmsub (t, g, h, mhalf);
22876 /* Apply correction directly to 1/rsqrt estimate. */
22877 rs6000_emit_madd (dst, e, t, e);
22878 }
22879 else
22880 {
22881 for (i = 0; i < passes; i++)
22882 {
22883 rtx t1 = gen_reg_rtx (mode);
22884 rtx g1 = gen_reg_rtx (mode);
22885 rtx h1 = gen_reg_rtx (mode);
22886
22887 rs6000_emit_nmsub (t1, g, h, mhalf);
22888 rs6000_emit_madd (g1, g, t1, g);
22889 rs6000_emit_madd (h1, h, t1, h);
22890
22891 g = g1;
22892 h = h1;
22893 }
22894 /* Multiply by 2 for 1/rsqrt. */
22895 emit_insn (gen_add3_insn (dst, h, h));
22896 }
22897 }
22898 else
22899 {
22900 rtx t = gen_reg_rtx (mode);
22901 rs6000_emit_nmsub (t, g, h, mhalf);
22902 rs6000_emit_madd (dst, g, t, g);
22903 }
22904
22905 return;
22906 }
22907
22908 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
22909 (Power7) targets. DST is the target, and SRC is the argument operand. */
22910
22911 void
22912 rs6000_emit_popcount (rtx dst, rtx src)
22913 {
22914 machine_mode mode = GET_MODE (dst);
22915 rtx tmp1, tmp2;
22916
22917 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
22918 if (TARGET_POPCNTD)
22919 {
22920 if (mode == SImode)
22921 emit_insn (gen_popcntdsi2 (dst, src));
22922 else
22923 emit_insn (gen_popcntddi2 (dst, src));
22924 return;
22925 }
22926
22927 tmp1 = gen_reg_rtx (mode);
22928
22929 if (mode == SImode)
22930 {
22931 emit_insn (gen_popcntbsi2 (tmp1, src));
22932 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
22933 NULL_RTX, 0);
22934 tmp2 = force_reg (SImode, tmp2);
22935 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
22936 }
22937 else
22938 {
22939 emit_insn (gen_popcntbdi2 (tmp1, src));
22940 tmp2 = expand_mult (DImode, tmp1,
22941 GEN_INT ((HOST_WIDE_INT)
22942 0x01010101 << 32 | 0x01010101),
22943 NULL_RTX, 0);
22944 tmp2 = force_reg (DImode, tmp2);
22945 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
22946 }
22947 }
22948
22949
22950 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
22951 target, and SRC is the argument operand. */
22952
22953 void
22954 rs6000_emit_parity (rtx dst, rtx src)
22955 {
22956 machine_mode mode = GET_MODE (dst);
22957 rtx tmp;
22958
22959 tmp = gen_reg_rtx (mode);
22960
22961 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
22962 if (TARGET_CMPB)
22963 {
22964 if (mode == SImode)
22965 {
22966 emit_insn (gen_popcntbsi2 (tmp, src));
22967 emit_insn (gen_paritysi2_cmpb (dst, tmp));
22968 }
22969 else
22970 {
22971 emit_insn (gen_popcntbdi2 (tmp, src));
22972 emit_insn (gen_paritydi2_cmpb (dst, tmp));
22973 }
22974 return;
22975 }
22976
22977 if (mode == SImode)
22978 {
22979 /* Is mult+shift >= shift+xor+shift+xor? */
22980 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
22981 {
22982 rtx tmp1, tmp2, tmp3, tmp4;
22983
22984 tmp1 = gen_reg_rtx (SImode);
22985 emit_insn (gen_popcntbsi2 (tmp1, src));
22986
22987 tmp2 = gen_reg_rtx (SImode);
22988 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
22989 tmp3 = gen_reg_rtx (SImode);
22990 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
22991
22992 tmp4 = gen_reg_rtx (SImode);
22993 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
22994 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
22995 }
22996 else
22997 rs6000_emit_popcount (tmp, src);
22998 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
22999 }
23000 else
23001 {
23002 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
23003 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
23004 {
23005 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
23006
23007 tmp1 = gen_reg_rtx (DImode);
23008 emit_insn (gen_popcntbdi2 (tmp1, src));
23009
23010 tmp2 = gen_reg_rtx (DImode);
23011 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
23012 tmp3 = gen_reg_rtx (DImode);
23013 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
23014
23015 tmp4 = gen_reg_rtx (DImode);
23016 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
23017 tmp5 = gen_reg_rtx (DImode);
23018 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
23019
23020 tmp6 = gen_reg_rtx (DImode);
23021 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
23022 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
23023 }
23024 else
23025 rs6000_emit_popcount (tmp, src);
23026 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
23027 }
23028 }
23029
23030 /* Expand an Altivec constant permutation for little endian mode.
23031 OP0 and OP1 are the input vectors and TARGET is the output vector.
23032 SEL specifies the constant permutation vector.
23033
23034 There are two issues: First, the two input operands must be
23035 swapped so that together they form a double-wide array in LE
23036 order. Second, the vperm instruction has surprising behavior
23037 in LE mode: it interprets the elements of the source vectors
23038 in BE mode ("left to right") and interprets the elements of
23039 the destination vector in LE mode ("right to left"). To
23040 correct for this, we must subtract each element of the permute
23041 control vector from 31.
23042
23043 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
23044 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
23045 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
23046 serve as the permute control vector. Then, in BE mode,
23047
23048 vperm 9,10,11,12
23049
23050 places the desired result in vr9. However, in LE mode the
23051 vector contents will be
23052
23053 vr10 = 00000003 00000002 00000001 00000000
23054 vr11 = 00000007 00000006 00000005 00000004
23055
23056 The result of the vperm using the same permute control vector is
23057
23058 vr9 = 05000000 07000000 01000000 03000000
23059
23060 That is, the leftmost 4 bytes of vr10 are interpreted as the
23061 source for the rightmost 4 bytes of vr9, and so on.
23062
23063 If we change the permute control vector to
23064
23065 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
23066
23067 and issue
23068
23069 vperm 9,11,10,12
23070
23071 we get the desired
23072
23073 vr9 = 00000006 00000004 00000002 00000000. */
23074
23075 static void
23076 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
23077 const vec_perm_indices &sel)
23078 {
23079 unsigned int i;
23080 rtx perm[16];
23081 rtx constv, unspec;
23082
23083 /* Unpack and adjust the constant selector. */
23084 for (i = 0; i < 16; ++i)
23085 {
23086 unsigned int elt = 31 - (sel[i] & 31);
23087 perm[i] = GEN_INT (elt);
23088 }
23089
23090 /* Expand to a permute, swapping the inputs and using the
23091 adjusted selector. */
23092 if (!REG_P (op0))
23093 op0 = force_reg (V16QImode, op0);
23094 if (!REG_P (op1))
23095 op1 = force_reg (V16QImode, op1);
23096
23097 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
23098 constv = force_reg (V16QImode, constv);
23099 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
23100 UNSPEC_VPERM);
23101 if (!REG_P (target))
23102 {
23103 rtx tmp = gen_reg_rtx (V16QImode);
23104 emit_move_insn (tmp, unspec);
23105 unspec = tmp;
23106 }
23107
23108 emit_move_insn (target, unspec);
23109 }
23110
23111 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
23112 permute control vector. But here it's not a constant, so we must
23113 generate a vector NAND or NOR to do the adjustment. */
23114
23115 void
23116 altivec_expand_vec_perm_le (rtx operands[4])
23117 {
23118 rtx notx, iorx, unspec;
23119 rtx target = operands[0];
23120 rtx op0 = operands[1];
23121 rtx op1 = operands[2];
23122 rtx sel = operands[3];
23123 rtx tmp = target;
23124 rtx norreg = gen_reg_rtx (V16QImode);
23125 machine_mode mode = GET_MODE (target);
23126
23127 /* Get everything in regs so the pattern matches. */
23128 if (!REG_P (op0))
23129 op0 = force_reg (mode, op0);
23130 if (!REG_P (op1))
23131 op1 = force_reg (mode, op1);
23132 if (!REG_P (sel))
23133 sel = force_reg (V16QImode, sel);
23134 if (!REG_P (target))
23135 tmp = gen_reg_rtx (mode);
23136
23137 if (TARGET_P9_VECTOR)
23138 {
23139 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
23140 UNSPEC_VPERMR);
23141 }
23142 else
23143 {
23144 /* Invert the selector with a VNAND if available, else a VNOR.
23145 The VNAND is preferred for future fusion opportunities. */
23146 notx = gen_rtx_NOT (V16QImode, sel);
23147 iorx = (TARGET_P8_VECTOR
23148 ? gen_rtx_IOR (V16QImode, notx, notx)
23149 : gen_rtx_AND (V16QImode, notx, notx));
23150 emit_insn (gen_rtx_SET (norreg, iorx));
23151
23152 /* Permute with operands reversed and adjusted selector. */
23153 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
23154 UNSPEC_VPERM);
23155 }
23156
23157 /* Copy into target, possibly by way of a register. */
23158 if (!REG_P (target))
23159 {
23160 emit_move_insn (tmp, unspec);
23161 unspec = tmp;
23162 }
23163
23164 emit_move_insn (target, unspec);
23165 }
23166
23167 /* Expand an Altivec constant permutation. Return true if we match
23168 an efficient implementation; false to fall back to VPERM.
23169
23170 OP0 and OP1 are the input vectors and TARGET is the output vector.
23171 SEL specifies the constant permutation vector. */
23172
23173 static bool
23174 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
23175 const vec_perm_indices &sel)
23176 {
23177 struct altivec_perm_insn {
23178 HOST_WIDE_INT mask;
23179 enum insn_code impl;
23180 unsigned char perm[16];
23181 };
23182 static const struct altivec_perm_insn patterns[] = {
23183 {OPTION_MASK_ALTIVEC,
23184 CODE_FOR_altivec_vpkuhum_direct,
23185 {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}},
23186 {OPTION_MASK_ALTIVEC,
23187 CODE_FOR_altivec_vpkuwum_direct,
23188 {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}},
23189 {OPTION_MASK_ALTIVEC,
23190 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
23191 : CODE_FOR_altivec_vmrglb_direct,
23192 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}},
23193 {OPTION_MASK_ALTIVEC,
23194 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
23195 : CODE_FOR_altivec_vmrglh_direct,
23196 {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}},
23197 {OPTION_MASK_ALTIVEC,
23198 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct_v4si
23199 : CODE_FOR_altivec_vmrglw_direct_v4si,
23200 {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}},
23201 {OPTION_MASK_ALTIVEC,
23202 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
23203 : CODE_FOR_altivec_vmrghb_direct,
23204 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}},
23205 {OPTION_MASK_ALTIVEC,
23206 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
23207 : CODE_FOR_altivec_vmrghh_direct,
23208 {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}},
23209 {OPTION_MASK_ALTIVEC,
23210 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct_v4si
23211 : CODE_FOR_altivec_vmrghw_direct_v4si,
23212 {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}},
23213 {OPTION_MASK_P8_VECTOR,
23214 BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
23215 : CODE_FOR_p8_vmrgow_v4sf_direct,
23216 {0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27}},
23217 {OPTION_MASK_P8_VECTOR,
23218 BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
23219 : CODE_FOR_p8_vmrgew_v4sf_direct,
23220 {4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31}},
23221 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23222 {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23}},
23223 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23224 {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}},
23225 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23226 {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}},
23227 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23228 {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}}};
23229
23230 unsigned int i, j, elt, which;
23231 unsigned char perm[16];
23232 rtx x;
23233 bool one_vec;
23234
23235 /* Unpack the constant selector. */
23236 for (i = which = 0; i < 16; ++i)
23237 {
23238 elt = sel[i] & 31;
23239 which |= (elt < 16 ? 1 : 2);
23240 perm[i] = elt;
23241 }
23242
23243 /* Simplify the constant selector based on operands. */
23244 switch (which)
23245 {
23246 default:
23247 gcc_unreachable ();
23248
23249 case 3:
23250 one_vec = false;
23251 if (!rtx_equal_p (op0, op1))
23252 break;
23253 /* FALLTHRU */
23254
23255 case 2:
23256 for (i = 0; i < 16; ++i)
23257 perm[i] &= 15;
23258 op0 = op1;
23259 one_vec = true;
23260 break;
23261
23262 case 1:
23263 op1 = op0;
23264 one_vec = true;
23265 break;
23266 }
23267
23268 /* Look for splat patterns. */
23269 if (one_vec)
23270 {
23271 elt = perm[0];
23272
23273 for (i = 0; i < 16; ++i)
23274 if (perm[i] != elt)
23275 break;
23276 if (i == 16)
23277 {
23278 if (!BYTES_BIG_ENDIAN)
23279 elt = 15 - elt;
23280 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
23281 return true;
23282 }
23283
23284 if (elt % 2 == 0)
23285 {
23286 for (i = 0; i < 16; i += 2)
23287 if (perm[i] != elt || perm[i + 1] != elt + 1)
23288 break;
23289 if (i == 16)
23290 {
23291 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
23292 x = gen_reg_rtx (V8HImode);
23293 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
23294 GEN_INT (field)));
23295 emit_move_insn (target, gen_lowpart (V16QImode, x));
23296 return true;
23297 }
23298 }
23299
23300 if (elt % 4 == 0)
23301 {
23302 for (i = 0; i < 16; i += 4)
23303 if (perm[i] != elt
23304 || perm[i + 1] != elt + 1
23305 || perm[i + 2] != elt + 2
23306 || perm[i + 3] != elt + 3)
23307 break;
23308 if (i == 16)
23309 {
23310 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
23311 x = gen_reg_rtx (V4SImode);
23312 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
23313 GEN_INT (field)));
23314 emit_move_insn (target, gen_lowpart (V16QImode, x));
23315 return true;
23316 }
23317 }
23318 }
23319
23320 /* Look for merge and pack patterns. */
23321 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
23322 {
23323 bool swapped;
23324
23325 if ((patterns[j].mask & rs6000_isa_flags) == 0)
23326 continue;
23327
23328 elt = patterns[j].perm[0];
23329 if (perm[0] == elt)
23330 swapped = false;
23331 else if (perm[0] == elt + 16)
23332 swapped = true;
23333 else
23334 continue;
23335 for (i = 1; i < 16; ++i)
23336 {
23337 elt = patterns[j].perm[i];
23338 if (swapped)
23339 elt = (elt >= 16 ? elt - 16 : elt + 16);
23340 else if (one_vec && elt >= 16)
23341 elt -= 16;
23342 if (perm[i] != elt)
23343 break;
23344 }
23345 if (i == 16)
23346 {
23347 enum insn_code icode = patterns[j].impl;
23348 machine_mode omode = insn_data[icode].operand[0].mode;
23349 machine_mode imode = insn_data[icode].operand[1].mode;
23350
23351 rtx perm_idx = GEN_INT (0);
23352 if (icode == CODE_FOR_vsx_xxpermdi_v16qi)
23353 {
23354 int perm_val = 0;
23355 if (one_vec)
23356 {
23357 if (perm[0] == 8)
23358 perm_val |= 2;
23359 if (perm[8] == 8)
23360 perm_val |= 1;
23361 }
23362 else
23363 {
23364 if (perm[0] != 0)
23365 perm_val |= 2;
23366 if (perm[8] != 16)
23367 perm_val |= 1;
23368 }
23369 perm_idx = GEN_INT (perm_val);
23370 }
23371
23372 /* For little-endian, don't use vpkuwum and vpkuhum if the
23373 underlying vector type is not V4SI and V8HI, respectively.
23374 For example, using vpkuwum with a V8HI picks up the even
23375 halfwords (BE numbering) when the even halfwords (LE
23376 numbering) are what we need. */
23377 if (!BYTES_BIG_ENDIAN
23378 && icode == CODE_FOR_altivec_vpkuwum_direct
23379 && ((REG_P (op0)
23380 && GET_MODE (op0) != V4SImode)
23381 || (SUBREG_P (op0)
23382 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
23383 continue;
23384 if (!BYTES_BIG_ENDIAN
23385 && icode == CODE_FOR_altivec_vpkuhum_direct
23386 && ((REG_P (op0)
23387 && GET_MODE (op0) != V8HImode)
23388 || (SUBREG_P (op0)
23389 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
23390 continue;
23391
23392 /* For little-endian, the two input operands must be swapped
23393 (or swapped back) to ensure proper right-to-left numbering
23394 from 0 to 2N-1. */
23395 if (swapped ^ !BYTES_BIG_ENDIAN
23396 && icode != CODE_FOR_vsx_xxpermdi_v16qi)
23397 std::swap (op0, op1);
23398 if (imode != V16QImode)
23399 {
23400 op0 = gen_lowpart (imode, op0);
23401 op1 = gen_lowpart (imode, op1);
23402 }
23403 if (omode == V16QImode)
23404 x = target;
23405 else
23406 x = gen_reg_rtx (omode);
23407 if (icode == CODE_FOR_vsx_xxpermdi_v16qi)
23408 emit_insn (GEN_FCN (icode) (x, op0, op1, perm_idx));
23409 else
23410 emit_insn (GEN_FCN (icode) (x, op0, op1));
23411 if (omode != V16QImode)
23412 emit_move_insn (target, gen_lowpart (V16QImode, x));
23413 return true;
23414 }
23415 }
23416
23417 if (!BYTES_BIG_ENDIAN)
23418 {
23419 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
23420 return true;
23421 }
23422
23423 return false;
23424 }
23425
23426 /* Expand a VSX Permute Doubleword constant permutation.
23427 Return true if we match an efficient implementation. */
23428
23429 static bool
23430 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
23431 unsigned char perm0, unsigned char perm1)
23432 {
23433 rtx x;
23434
23435 /* If both selectors come from the same operand, fold to single op. */
23436 if ((perm0 & 2) == (perm1 & 2))
23437 {
23438 if (perm0 & 2)
23439 op0 = op1;
23440 else
23441 op1 = op0;
23442 }
23443 /* If both operands are equal, fold to simpler permutation. */
23444 if (rtx_equal_p (op0, op1))
23445 {
23446 perm0 = perm0 & 1;
23447 perm1 = (perm1 & 1) + 2;
23448 }
23449 /* If the first selector comes from the second operand, swap. */
23450 else if (perm0 & 2)
23451 {
23452 if (perm1 & 2)
23453 return false;
23454 perm0 -= 2;
23455 perm1 += 2;
23456 std::swap (op0, op1);
23457 }
23458 /* If the second selector does not come from the second operand, fail. */
23459 else if ((perm1 & 2) == 0)
23460 return false;
23461
23462 /* Success! */
23463 if (target != NULL)
23464 {
23465 machine_mode vmode, dmode;
23466 rtvec v;
23467
23468 vmode = GET_MODE (target);
23469 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
23470 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
23471 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
23472 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
23473 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
23474 emit_insn (gen_rtx_SET (target, x));
23475 }
23476 return true;
23477 }
23478
23479 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
23480
23481 static bool
23482 rs6000_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
23483 rtx target, rtx op0, rtx op1,
23484 const vec_perm_indices &sel)
23485 {
23486 if (vmode != op_mode)
23487 return false;
23488
23489 bool testing_p = !target;
23490
23491 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
23492 if (TARGET_ALTIVEC && testing_p)
23493 return true;
23494
23495 if (op0)
23496 {
23497 rtx nop0 = force_reg (vmode, op0);
23498 if (op0 == op1)
23499 op1 = nop0;
23500 op0 = nop0;
23501 }
23502 if (op1)
23503 op1 = force_reg (vmode, op1);
23504
23505 /* Check for ps_merge* or xxpermdi insns. */
23506 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
23507 {
23508 if (testing_p)
23509 {
23510 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
23511 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
23512 }
23513 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
23514 return true;
23515 }
23516
23517 if (TARGET_ALTIVEC)
23518 {
23519 /* Force the target-independent code to lower to V16QImode. */
23520 if (vmode != V16QImode)
23521 return false;
23522 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
23523 return true;
23524 }
23525
23526 return false;
23527 }
23528
23529 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
23530 OP0 and OP1 are the input vectors and TARGET is the output vector.
23531 PERM specifies the constant permutation vector. */
23532
23533 static void
23534 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
23535 machine_mode vmode, const vec_perm_builder &perm)
23536 {
23537 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
23538 if (x != target)
23539 emit_move_insn (target, x);
23540 }
23541
23542 /* Expand an extract even operation. */
23543
23544 void
23545 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
23546 {
23547 machine_mode vmode = GET_MODE (target);
23548 unsigned i, nelt = GET_MODE_NUNITS (vmode);
23549 vec_perm_builder perm (nelt, nelt, 1);
23550
23551 for (i = 0; i < nelt; i++)
23552 perm.quick_push (i * 2);
23553
23554 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
23555 }
23556
23557 /* Expand a vector interleave operation. */
23558
23559 void
23560 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
23561 {
23562 machine_mode vmode = GET_MODE (target);
23563 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
23564 vec_perm_builder perm (nelt, nelt, 1);
23565
23566 high = (highp ? 0 : nelt / 2);
23567 for (i = 0; i < nelt / 2; i++)
23568 {
23569 perm.quick_push (i + high);
23570 perm.quick_push (i + nelt + high);
23571 }
23572
23573 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
23574 }
23575
23576 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
23577 void
23578 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
23579 {
23580 HOST_WIDE_INT hwi_scale (scale);
23581 REAL_VALUE_TYPE r_pow;
23582 rtvec v = rtvec_alloc (2);
23583 rtx elt;
23584 rtx scale_vec = gen_reg_rtx (V2DFmode);
23585 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
23586 elt = const_double_from_real_value (r_pow, DFmode);
23587 RTVEC_ELT (v, 0) = elt;
23588 RTVEC_ELT (v, 1) = elt;
23589 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
23590 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
23591 }
23592
23593 /* Return an RTX representing where to find the function value of a
23594 function returning MODE. */
23595 static rtx
23596 rs6000_complex_function_value (machine_mode mode)
23597 {
23598 unsigned int regno;
23599 rtx r1, r2;
23600 machine_mode inner = GET_MODE_INNER (mode);
23601 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
23602
23603 if (TARGET_FLOAT128_TYPE
23604 && (mode == KCmode
23605 || (mode == TCmode && TARGET_IEEEQUAD)))
23606 regno = ALTIVEC_ARG_RETURN;
23607
23608 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23609 regno = FP_ARG_RETURN;
23610
23611 else
23612 {
23613 regno = GP_ARG_RETURN;
23614
23615 /* 32-bit is OK since it'll go in r3/r4. */
23616 if (TARGET_32BIT && inner_bytes >= 4)
23617 return gen_rtx_REG (mode, regno);
23618 }
23619
23620 if (inner_bytes >= 8)
23621 return gen_rtx_REG (mode, regno);
23622
23623 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
23624 const0_rtx);
23625 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
23626 GEN_INT (inner_bytes));
23627 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
23628 }
23629
23630 /* Return an rtx describing a return value of MODE as a PARALLEL
23631 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
23632 stride REG_STRIDE. */
23633
23634 static rtx
23635 rs6000_parallel_return (machine_mode mode,
23636 int n_elts, machine_mode elt_mode,
23637 unsigned int regno, unsigned int reg_stride)
23638 {
23639 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
23640
23641 int i;
23642 for (i = 0; i < n_elts; i++)
23643 {
23644 rtx r = gen_rtx_REG (elt_mode, regno);
23645 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
23646 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
23647 regno += reg_stride;
23648 }
23649
23650 return par;
23651 }
23652
23653 /* Target hook for TARGET_FUNCTION_VALUE.
23654
23655 An integer value is in r3 and a floating-point value is in fp1,
23656 unless -msoft-float. */
23657
23658 static rtx
23659 rs6000_function_value (const_tree valtype,
23660 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
23661 bool outgoing ATTRIBUTE_UNUSED)
23662 {
23663 machine_mode mode;
23664 unsigned int regno;
23665 machine_mode elt_mode;
23666 int n_elts;
23667
23668 /* Special handling for structs in darwin64. */
23669 if (TARGET_MACHO
23670 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
23671 {
23672 CUMULATIVE_ARGS valcum;
23673 rtx valret;
23674
23675 valcum.words = 0;
23676 valcum.fregno = FP_ARG_MIN_REG;
23677 valcum.vregno = ALTIVEC_ARG_MIN_REG;
23678 /* Do a trial code generation as if this were going to be passed as
23679 an argument; if any part goes in memory, we return NULL. */
23680 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
23681 if (valret)
23682 return valret;
23683 /* Otherwise fall through to standard ABI rules. */
23684 }
23685
23686 mode = TYPE_MODE (valtype);
23687
23688 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
23689 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
23690 {
23691 int first_reg, n_regs;
23692
23693 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
23694 {
23695 /* _Decimal128 must use even/odd register pairs. */
23696 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23697 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
23698 }
23699 else
23700 {
23701 first_reg = ALTIVEC_ARG_RETURN;
23702 n_regs = 1;
23703 }
23704
23705 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
23706 }
23707
23708 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
23709 if (TARGET_32BIT && TARGET_POWERPC64)
23710 switch (mode)
23711 {
23712 default:
23713 break;
23714 case E_DImode:
23715 case E_SCmode:
23716 case E_DCmode:
23717 case E_TCmode:
23718 int count = GET_MODE_SIZE (mode) / 4;
23719 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
23720 }
23721
23722 if ((INTEGRAL_TYPE_P (valtype)
23723 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
23724 || POINTER_TYPE_P (valtype))
23725 mode = TARGET_32BIT ? SImode : DImode;
23726
23727 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23728 /* _Decimal128 must use an even/odd register pair. */
23729 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23730 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
23731 && !FLOAT128_VECTOR_P (mode))
23732 regno = FP_ARG_RETURN;
23733 else if (TREE_CODE (valtype) == COMPLEX_TYPE
23734 && targetm.calls.split_complex_arg)
23735 return rs6000_complex_function_value (mode);
23736 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23737 return register is used in both cases, and we won't see V2DImode/V2DFmode
23738 for pure altivec, combine the two cases. */
23739 else if ((TREE_CODE (valtype) == VECTOR_TYPE || VECTOR_ALIGNMENT_P (mode))
23740 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
23741 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
23742 regno = ALTIVEC_ARG_RETURN;
23743 else
23744 regno = GP_ARG_RETURN;
23745
23746 return gen_rtx_REG (mode, regno);
23747 }
23748
23749 /* Define how to find the value returned by a library function
23750 assuming the value has mode MODE. */
23751 rtx
23752 rs6000_libcall_value (machine_mode mode)
23753 {
23754 unsigned int regno;
23755
23756 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
23757 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
23758 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
23759
23760 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23761 /* _Decimal128 must use an even/odd register pair. */
23762 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23763 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
23764 regno = FP_ARG_RETURN;
23765 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23766 return register is used in both cases, and we won't see V2DImode/V2DFmode
23767 for pure altivec, combine the two cases. */
23768 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
23769 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
23770 regno = ALTIVEC_ARG_RETURN;
23771 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
23772 return rs6000_complex_function_value (mode);
23773 else
23774 regno = GP_ARG_RETURN;
23775
23776 return gen_rtx_REG (mode, regno);
23777 }
23778
23779 /* Compute register pressure classes. We implement the target hook to avoid
23780 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
23781 lead to incorrect estimates of number of available registers and therefor
23782 increased register pressure/spill. */
23783 static int
23784 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
23785 {
23786 int n;
23787
23788 n = 0;
23789 pressure_classes[n++] = GENERAL_REGS;
23790 if (TARGET_ALTIVEC)
23791 pressure_classes[n++] = ALTIVEC_REGS;
23792 if (TARGET_VSX)
23793 pressure_classes[n++] = VSX_REGS;
23794 else
23795 {
23796 if (TARGET_HARD_FLOAT)
23797 pressure_classes[n++] = FLOAT_REGS;
23798 }
23799 pressure_classes[n++] = CR_REGS;
23800 pressure_classes[n++] = SPECIAL_REGS;
23801
23802 return n;
23803 }
23804
23805 /* Given FROM and TO register numbers, say whether this elimination is allowed.
23806 Frame pointer elimination is automatically handled.
23807
23808 For the RS/6000, if frame pointer elimination is being done, we would like
23809 to convert ap into fp, not sp.
23810
23811 We need r30 if -mminimal-toc was specified, and there are constant pool
23812 references. */
23813
23814 static bool
23815 rs6000_can_eliminate (const int from, const int to)
23816 {
23817 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
23818 ? ! frame_pointer_needed
23819 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
23820 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC_OR_PCREL
23821 || constant_pool_empty_p ()
23822 : true);
23823 }
23824
23825 /* Define the offset between two registers, FROM to be eliminated and its
23826 replacement TO, at the start of a routine. */
23827 HOST_WIDE_INT
23828 rs6000_initial_elimination_offset (int from, int to)
23829 {
23830 rs6000_stack_t *info = rs6000_stack_info ();
23831 HOST_WIDE_INT offset;
23832
23833 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23834 offset = info->push_p ? 0 : -info->total_size;
23835 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23836 {
23837 offset = info->push_p ? 0 : -info->total_size;
23838 if (FRAME_GROWS_DOWNWARD)
23839 offset += info->fixed_size + info->vars_size + info->parm_size;
23840 }
23841 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
23842 offset = FRAME_GROWS_DOWNWARD
23843 ? info->fixed_size + info->vars_size + info->parm_size
23844 : 0;
23845 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
23846 offset = info->total_size;
23847 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23848 offset = info->push_p ? info->total_size : 0;
23849 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
23850 offset = 0;
23851 else
23852 gcc_unreachable ();
23853
23854 return offset;
23855 }
23856
23857 /* Fill in sizes of registers used by unwinder. */
23858
23859 static void
23860 rs6000_init_dwarf_reg_sizes_extra (tree address)
23861 {
23862 if (TARGET_MACHO && ! TARGET_ALTIVEC)
23863 {
23864 int i;
23865 machine_mode mode = TYPE_MODE (char_type_node);
23866 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
23867 rtx mem = gen_rtx_MEM (BLKmode, addr);
23868 rtx value = gen_int_mode (16, mode);
23869
23870 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
23871 The unwinder still needs to know the size of Altivec registers. */
23872
23873 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
23874 {
23875 int column = DWARF_REG_TO_UNWIND_COLUMN
23876 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
23877 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
23878
23879 emit_move_insn (adjust_address (mem, mode, offset), value);
23880 }
23881 }
23882 }
23883
23884 /* Map internal gcc register numbers to debug format register numbers.
23885 FORMAT specifies the type of debug register number to use:
23886 0 -- debug information, except for frame-related sections
23887 1 -- DWARF .debug_frame section
23888 2 -- DWARF .eh_frame section */
23889
23890 unsigned int
23891 rs6000_debugger_regno (unsigned int regno, unsigned int format)
23892 {
23893 /* On some platforms, we use the standard DWARF register
23894 numbering for .debug_info and .debug_frame. */
23895 if ((format == 0 && dwarf_debuginfo_p ()) || format == 1)
23896 {
23897 #ifdef RS6000_USE_DWARF_NUMBERING
23898 if (regno <= 31)
23899 return regno;
23900 if (FP_REGNO_P (regno))
23901 return regno - FIRST_FPR_REGNO + 32;
23902 if (ALTIVEC_REGNO_P (regno))
23903 return regno - FIRST_ALTIVEC_REGNO + 1124;
23904 if (regno == LR_REGNO)
23905 return 108;
23906 if (regno == CTR_REGNO)
23907 return 109;
23908 if (regno == CA_REGNO)
23909 return 101; /* XER */
23910 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
23911 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
23912 The actual code emitted saves the whole of CR, so we map CR2_REGNO
23913 to the DWARF reg for CR. */
23914 if (format == 1 && regno == CR2_REGNO)
23915 return 64;
23916 if (CR_REGNO_P (regno))
23917 return regno - CR0_REGNO + 86;
23918 if (regno == VRSAVE_REGNO)
23919 return 356;
23920 if (regno == VSCR_REGNO)
23921 return 67;
23922
23923 /* These do not make much sense. */
23924 if (regno == FRAME_POINTER_REGNUM)
23925 return 111;
23926 if (regno == ARG_POINTER_REGNUM)
23927 return 67;
23928 if (regno == 64)
23929 return 100;
23930
23931 gcc_unreachable ();
23932 #endif
23933 }
23934
23935 /* We use the GCC 7 (and before) internal number for non-DWARF debug
23936 information, and also for .eh_frame. */
23937 /* Translate the regnos to their numbers in GCC 7 (and before). */
23938 if (regno <= 31)
23939 return regno;
23940 if (FP_REGNO_P (regno))
23941 return regno - FIRST_FPR_REGNO + 32;
23942 if (ALTIVEC_REGNO_P (regno))
23943 return regno - FIRST_ALTIVEC_REGNO + 77;
23944 if (regno == LR_REGNO)
23945 return 65;
23946 if (regno == CTR_REGNO)
23947 return 66;
23948 if (regno == CA_REGNO)
23949 return 76; /* XER */
23950 if (CR_REGNO_P (regno))
23951 return regno - CR0_REGNO + 68;
23952 if (regno == VRSAVE_REGNO)
23953 return 109;
23954 if (regno == VSCR_REGNO)
23955 return 110;
23956
23957 if (regno == FRAME_POINTER_REGNUM)
23958 return 111;
23959 if (regno == ARG_POINTER_REGNUM)
23960 return 67;
23961 if (regno == 64)
23962 return 64;
23963
23964 gcc_unreachable ();
23965 }
23966
23967 /* target hook eh_return_filter_mode */
23968 static scalar_int_mode
23969 rs6000_eh_return_filter_mode (void)
23970 {
23971 return TARGET_32BIT ? SImode : word_mode;
23972 }
23973
23974 /* Target hook for translate_mode_attribute. */
23975 static machine_mode
23976 rs6000_translate_mode_attribute (machine_mode mode)
23977 {
23978 if ((FLOAT128_IEEE_P (mode)
23979 && ieee128_float_type_node == long_double_type_node)
23980 || (FLOAT128_IBM_P (mode)
23981 && ibm128_float_type_node == long_double_type_node))
23982 return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
23983 return mode;
23984 }
23985
23986 /* Target hook for scalar_mode_supported_p. */
23987 static bool
23988 rs6000_scalar_mode_supported_p (scalar_mode mode)
23989 {
23990 /* -m32 does not support TImode. This is the default, from
23991 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
23992 same ABI as for -m32. But default_scalar_mode_supported_p allows
23993 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
23994 for -mpowerpc64. */
23995 if (TARGET_32BIT && mode == TImode)
23996 return false;
23997
23998 if (DECIMAL_FLOAT_MODE_P (mode))
23999 return default_decimal_float_supported_p ();
24000 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
24001 return true;
24002 else
24003 return default_scalar_mode_supported_p (mode);
24004 }
24005
24006 /* Target hook for libgcc_floating_mode_supported_p. */
24007
24008 static bool
24009 rs6000_libgcc_floating_mode_supported_p (scalar_float_mode mode)
24010 {
24011 switch (mode)
24012 {
24013 case E_SFmode:
24014 case E_DFmode:
24015 case E_TFmode:
24016 return true;
24017
24018 /* We only return true for KFmode if IEEE 128-bit types are supported, and
24019 if long double does not use the IEEE 128-bit format. If long double
24020 uses the IEEE 128-bit format, it will use TFmode and not KFmode.
24021 Because the code will not use KFmode in that case, there will be aborts
24022 because it can't find KFmode in the Floatn types. */
24023 case E_KFmode:
24024 return TARGET_FLOAT128_TYPE && !TARGET_IEEEQUAD;
24025
24026 default:
24027 return false;
24028 }
24029 }
24030
24031 /* Target hook for vector_mode_supported_p. */
24032 static bool
24033 rs6000_vector_mode_supported_p (machine_mode mode)
24034 {
24035 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
24036 128-bit, the compiler might try to widen IEEE 128-bit to IBM
24037 double-double. */
24038 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
24039 return true;
24040
24041 else
24042 return false;
24043 }
24044
24045 /* Target hook for floatn_mode. */
24046 static opt_scalar_float_mode
24047 rs6000_floatn_mode (int n, bool extended)
24048 {
24049 if (extended)
24050 {
24051 switch (n)
24052 {
24053 case 32:
24054 return DFmode;
24055
24056 case 64:
24057 if (TARGET_FLOAT128_TYPE)
24058 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
24059 else
24060 return opt_scalar_float_mode ();
24061
24062 case 128:
24063 return opt_scalar_float_mode ();
24064
24065 default:
24066 /* Those are the only valid _FloatNx types. */
24067 gcc_unreachable ();
24068 }
24069 }
24070 else
24071 {
24072 switch (n)
24073 {
24074 case 32:
24075 return SFmode;
24076
24077 case 64:
24078 return DFmode;
24079
24080 case 128:
24081 if (TARGET_FLOAT128_TYPE)
24082 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
24083 else
24084 return opt_scalar_float_mode ();
24085
24086 default:
24087 return opt_scalar_float_mode ();
24088 }
24089 }
24090
24091 }
24092
24093 /* Target hook for c_mode_for_suffix. */
24094 static machine_mode
24095 rs6000_c_mode_for_suffix (char suffix)
24096 {
24097 if (TARGET_FLOAT128_TYPE)
24098 {
24099 if (suffix == 'q' || suffix == 'Q')
24100 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
24101
24102 /* At the moment, we are not defining a suffix for IBM extended double.
24103 If/when the default for -mabi=ieeelongdouble is changed, and we want
24104 to support __ibm128 constants in legacy library code, we may need to
24105 re-evalaute this decision. Currently, c-lex.cc only supports 'w' and
24106 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
24107 __float80 constants. */
24108 }
24109
24110 return VOIDmode;
24111 }
24112
24113 /* Target hook for invalid_arg_for_unprototyped_fn. */
24114 static const char *
24115 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
24116 {
24117 return (!rs6000_darwin64_abi
24118 && typelist == 0
24119 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
24120 && (funcdecl == NULL_TREE
24121 || (TREE_CODE (funcdecl) == FUNCTION_DECL
24122 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
24123 ? N_("AltiVec argument passed to unprototyped function")
24124 : NULL;
24125 }
24126
24127 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
24128 setup by using __stack_chk_fail_local hidden function instead of
24129 calling __stack_chk_fail directly. Otherwise it is better to call
24130 __stack_chk_fail directly. */
24131
24132 static tree ATTRIBUTE_UNUSED
24133 rs6000_stack_protect_fail (void)
24134 {
24135 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
24136 ? default_hidden_stack_protect_fail ()
24137 : default_external_stack_protect_fail ();
24138 }
24139
24140 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
24141
24142 #if TARGET_ELF
24143 static unsigned HOST_WIDE_INT
24144 rs6000_asan_shadow_offset (void)
24145 {
24146 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
24147 }
24148 #endif
24149 \f
24150 /* Mask options that we want to support inside of attribute((target)) and
24151 #pragma GCC target operations. Note, we do not include things like
24152 64/32-bit, endianness, hard/soft floating point, etc. that would have
24153 different calling sequences. */
24154
24155 struct rs6000_opt_mask {
24156 const char *name; /* option name */
24157 HOST_WIDE_INT mask; /* mask to set */
24158 bool invert; /* invert sense of mask */
24159 bool valid_target; /* option is a target option */
24160 };
24161
24162 static struct rs6000_opt_mask const rs6000_opt_masks[] =
24163 {
24164 { "altivec", OPTION_MASK_ALTIVEC, false, true },
24165 { "block-ops-unaligned-vsx", OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX,
24166 false, true },
24167 { "block-ops-vector-pair", OPTION_MASK_BLOCK_OPS_VECTOR_PAIR,
24168 false, true },
24169 { "cmpb", OPTION_MASK_CMPB, false, true },
24170 { "crypto", OPTION_MASK_CRYPTO, false, true },
24171 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
24172 { "dlmzb", OPTION_MASK_DLMZB, false, true },
24173 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
24174 false, true },
24175 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
24176 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
24177 { "fprnd", OPTION_MASK_FPRND, false, true },
24178 { "power10", OPTION_MASK_POWER10, false, true },
24179 { "hard-dfp", OPTION_MASK_DFP, false, true },
24180 { "htm", OPTION_MASK_HTM, false, true },
24181 { "isel", OPTION_MASK_ISEL, false, true },
24182 { "mfcrf", OPTION_MASK_MFCRF, false, true },
24183 { "mfpgpr", 0, false, true },
24184 { "mma", OPTION_MASK_MMA, false, true },
24185 { "modulo", OPTION_MASK_MODULO, false, true },
24186 { "mulhw", OPTION_MASK_MULHW, false, true },
24187 { "multiple", OPTION_MASK_MULTIPLE, false, true },
24188 { "pcrel", OPTION_MASK_PCREL, false, true },
24189 { "pcrel-opt", OPTION_MASK_PCREL_OPT, false, true },
24190 { "popcntb", OPTION_MASK_POPCNTB, false, true },
24191 { "popcntd", OPTION_MASK_POPCNTD, false, true },
24192 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
24193 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
24194 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
24195 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
24196 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
24197 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
24198 { "power10-fusion", OPTION_MASK_P10_FUSION, false, true },
24199 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
24200 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
24201 { "prefixed", OPTION_MASK_PREFIXED, false, true },
24202 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
24203 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
24204 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
24205 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
24206 { "string", 0, false, true },
24207 { "update", OPTION_MASK_NO_UPDATE, true , true },
24208 { "vsx", OPTION_MASK_VSX, false, true },
24209 #ifdef OPTION_MASK_64BIT
24210 #if TARGET_AIX_OS
24211 { "aix64", OPTION_MASK_64BIT, false, false },
24212 { "aix32", OPTION_MASK_64BIT, true, false },
24213 #else
24214 { "64", OPTION_MASK_64BIT, false, false },
24215 { "32", OPTION_MASK_64BIT, true, false },
24216 #endif
24217 #endif
24218 #ifdef OPTION_MASK_EABI
24219 { "eabi", OPTION_MASK_EABI, false, false },
24220 #endif
24221 #ifdef OPTION_MASK_LITTLE_ENDIAN
24222 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
24223 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
24224 #endif
24225 #ifdef OPTION_MASK_RELOCATABLE
24226 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
24227 #endif
24228 #ifdef OPTION_MASK_STRICT_ALIGN
24229 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
24230 #endif
24231 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
24232 { "string", 0, false, false },
24233 };
24234
24235 /* Option variables that we want to support inside attribute((target)) and
24236 #pragma GCC target operations. */
24237
24238 struct rs6000_opt_var {
24239 const char *name; /* option name */
24240 size_t global_offset; /* offset of the option in global_options. */
24241 size_t target_offset; /* offset of the option in target options. */
24242 };
24243
24244 static struct rs6000_opt_var const rs6000_opt_vars[] =
24245 {
24246 { "friz",
24247 offsetof (struct gcc_options, x_TARGET_FRIZ),
24248 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
24249 { "avoid-indexed-addresses",
24250 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
24251 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
24252 { "longcall",
24253 offsetof (struct gcc_options, x_rs6000_default_long_calls),
24254 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
24255 { "optimize-swaps",
24256 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
24257 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
24258 { "allow-movmisalign",
24259 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
24260 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
24261 { "sched-groups",
24262 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
24263 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
24264 { "always-hint",
24265 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
24266 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
24267 { "align-branch-targets",
24268 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
24269 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
24270 { "sched-prolog",
24271 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
24272 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
24273 { "sched-epilog",
24274 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
24275 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
24276 { "speculate-indirect-jumps",
24277 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
24278 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
24279 };
24280
24281 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
24282 parsing. Return true if there were no errors. */
24283
24284 static bool
24285 rs6000_inner_target_options (tree args, bool attr_p)
24286 {
24287 bool ret = true;
24288
24289 if (args == NULL_TREE)
24290 ;
24291
24292 else if (TREE_CODE (args) == STRING_CST)
24293 {
24294 char *p = ASTRDUP (TREE_STRING_POINTER (args));
24295 char *q;
24296
24297 while ((q = strtok (p, ",")) != NULL)
24298 {
24299 bool error_p = false;
24300 bool not_valid_p = false;
24301 const char *cpu_opt = NULL;
24302
24303 p = NULL;
24304 if (startswith (q, "cpu="))
24305 {
24306 int cpu_index = rs6000_cpu_name_lookup (q+4);
24307 if (cpu_index >= 0)
24308 rs6000_cpu_index = cpu_index;
24309 else
24310 {
24311 error_p = true;
24312 cpu_opt = q+4;
24313 }
24314 }
24315 else if (startswith (q, "tune="))
24316 {
24317 int tune_index = rs6000_cpu_name_lookup (q+5);
24318 if (tune_index >= 0)
24319 rs6000_tune_index = tune_index;
24320 else
24321 {
24322 error_p = true;
24323 cpu_opt = q+5;
24324 }
24325 }
24326 else
24327 {
24328 size_t i;
24329 bool invert = false;
24330 char *r = q;
24331
24332 error_p = true;
24333 if (startswith (r, "no-"))
24334 {
24335 invert = true;
24336 r += 3;
24337 }
24338
24339 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
24340 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
24341 {
24342 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
24343
24344 if (!rs6000_opt_masks[i].valid_target)
24345 not_valid_p = true;
24346 else
24347 {
24348 error_p = false;
24349 rs6000_isa_flags_explicit |= mask;
24350
24351 /* VSX needs altivec, so -mvsx automagically sets
24352 altivec and disables -mavoid-indexed-addresses. */
24353 if (!invert)
24354 {
24355 if (mask == OPTION_MASK_VSX)
24356 {
24357 mask |= OPTION_MASK_ALTIVEC;
24358 TARGET_AVOID_XFORM = 0;
24359 }
24360 }
24361
24362 if (rs6000_opt_masks[i].invert)
24363 invert = !invert;
24364
24365 if (invert)
24366 rs6000_isa_flags &= ~mask;
24367 else
24368 rs6000_isa_flags |= mask;
24369 }
24370 break;
24371 }
24372
24373 if (error_p && !not_valid_p)
24374 {
24375 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
24376 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
24377 {
24378 size_t j = rs6000_opt_vars[i].global_offset;
24379 *((int *) ((char *)&global_options + j)) = !invert;
24380 error_p = false;
24381 not_valid_p = false;
24382 break;
24383 }
24384 }
24385 }
24386
24387 if (error_p)
24388 {
24389 const char *eprefix, *esuffix;
24390
24391 ret = false;
24392 if (attr_p)
24393 {
24394 eprefix = "__attribute__((__target__(";
24395 esuffix = ")))";
24396 }
24397 else
24398 {
24399 eprefix = "#pragma GCC target ";
24400 esuffix = "";
24401 }
24402
24403 if (cpu_opt)
24404 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
24405 q, esuffix);
24406 else if (not_valid_p)
24407 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
24408 else
24409 error ("%s%qs%s is invalid", eprefix, q, esuffix);
24410 }
24411 }
24412 }
24413
24414 else if (TREE_CODE (args) == TREE_LIST)
24415 {
24416 do
24417 {
24418 tree value = TREE_VALUE (args);
24419 if (value)
24420 {
24421 bool ret2 = rs6000_inner_target_options (value, attr_p);
24422 if (!ret2)
24423 ret = false;
24424 }
24425 args = TREE_CHAIN (args);
24426 }
24427 while (args != NULL_TREE);
24428 }
24429
24430 else
24431 {
24432 error ("attribute %<target%> argument not a string");
24433 return false;
24434 }
24435
24436 return ret;
24437 }
24438
24439 /* Print out the target options as a list for -mdebug=target. */
24440
24441 static void
24442 rs6000_debug_target_options (tree args, const char *prefix)
24443 {
24444 if (args == NULL_TREE)
24445 fprintf (stderr, "%s<NULL>", prefix);
24446
24447 else if (TREE_CODE (args) == STRING_CST)
24448 {
24449 char *p = ASTRDUP (TREE_STRING_POINTER (args));
24450 char *q;
24451
24452 while ((q = strtok (p, ",")) != NULL)
24453 {
24454 p = NULL;
24455 fprintf (stderr, "%s\"%s\"", prefix, q);
24456 prefix = ", ";
24457 }
24458 }
24459
24460 else if (TREE_CODE (args) == TREE_LIST)
24461 {
24462 do
24463 {
24464 tree value = TREE_VALUE (args);
24465 if (value)
24466 {
24467 rs6000_debug_target_options (value, prefix);
24468 prefix = ", ";
24469 }
24470 args = TREE_CHAIN (args);
24471 }
24472 while (args != NULL_TREE);
24473 }
24474
24475 else
24476 gcc_unreachable ();
24477
24478 return;
24479 }
24480
24481 \f
24482 /* Hook to validate attribute((target("..."))). */
24483
24484 static bool
24485 rs6000_valid_attribute_p (tree fndecl,
24486 tree ARG_UNUSED (name),
24487 tree args,
24488 int flags)
24489 {
24490 struct cl_target_option cur_target;
24491 bool ret;
24492 tree old_optimize;
24493 tree new_target, new_optimize;
24494 tree func_optimize;
24495
24496 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
24497
24498 if (TARGET_DEBUG_TARGET)
24499 {
24500 tree tname = DECL_NAME (fndecl);
24501 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
24502 if (tname)
24503 fprintf (stderr, "function: %.*s\n",
24504 (int) IDENTIFIER_LENGTH (tname),
24505 IDENTIFIER_POINTER (tname));
24506 else
24507 fprintf (stderr, "function: unknown\n");
24508
24509 fprintf (stderr, "args:");
24510 rs6000_debug_target_options (args, " ");
24511 fprintf (stderr, "\n");
24512
24513 if (flags)
24514 fprintf (stderr, "flags: 0x%x\n", flags);
24515
24516 fprintf (stderr, "--------------------\n");
24517 }
24518
24519 /* attribute((target("default"))) does nothing, beyond
24520 affecting multi-versioning. */
24521 if (TREE_VALUE (args)
24522 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
24523 && TREE_CHAIN (args) == NULL_TREE
24524 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
24525 return true;
24526
24527 old_optimize = build_optimization_node (&global_options,
24528 &global_options_set);
24529 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
24530
24531 /* If the function changed the optimization levels as well as setting target
24532 options, start with the optimizations specified. */
24533 if (func_optimize && func_optimize != old_optimize)
24534 cl_optimization_restore (&global_options, &global_options_set,
24535 TREE_OPTIMIZATION (func_optimize));
24536
24537 /* The target attributes may also change some optimization flags, so update
24538 the optimization options if necessary. */
24539 cl_target_option_save (&cur_target, &global_options, &global_options_set);
24540 rs6000_cpu_index = rs6000_tune_index = -1;
24541 ret = rs6000_inner_target_options (args, true);
24542
24543 /* Set up any additional state. */
24544 if (ret)
24545 {
24546 ret = rs6000_option_override_internal (false);
24547 new_target = build_target_option_node (&global_options,
24548 &global_options_set);
24549 }
24550 else
24551 new_target = NULL;
24552
24553 new_optimize = build_optimization_node (&global_options,
24554 &global_options_set);
24555
24556 if (!new_target)
24557 ret = false;
24558
24559 else if (fndecl)
24560 {
24561 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
24562
24563 if (old_optimize != new_optimize)
24564 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
24565 }
24566
24567 cl_target_option_restore (&global_options, &global_options_set, &cur_target);
24568
24569 if (old_optimize != new_optimize)
24570 cl_optimization_restore (&global_options, &global_options_set,
24571 TREE_OPTIMIZATION (old_optimize));
24572
24573 return ret;
24574 }
24575
24576 \f
24577 /* Hook to validate the current #pragma GCC target and set the state, and
24578 update the macros based on what was changed. If ARGS is NULL, then
24579 POP_TARGET is used to reset the options. */
24580
24581 bool
24582 rs6000_pragma_target_parse (tree args, tree pop_target)
24583 {
24584 tree prev_tree = build_target_option_node (&global_options,
24585 &global_options_set);
24586 tree cur_tree;
24587 struct cl_target_option *prev_opt, *cur_opt;
24588 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
24589
24590 if (TARGET_DEBUG_TARGET)
24591 {
24592 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
24593 fprintf (stderr, "args:");
24594 rs6000_debug_target_options (args, " ");
24595 fprintf (stderr, "\n");
24596
24597 if (pop_target)
24598 {
24599 fprintf (stderr, "pop_target:\n");
24600 debug_tree (pop_target);
24601 }
24602 else
24603 fprintf (stderr, "pop_target: <NULL>\n");
24604
24605 fprintf (stderr, "--------------------\n");
24606 }
24607
24608 if (! args)
24609 {
24610 cur_tree = ((pop_target)
24611 ? pop_target
24612 : target_option_default_node);
24613 cl_target_option_restore (&global_options, &global_options_set,
24614 TREE_TARGET_OPTION (cur_tree));
24615 }
24616 else
24617 {
24618 rs6000_cpu_index = rs6000_tune_index = -1;
24619 if (!rs6000_inner_target_options (args, false)
24620 || !rs6000_option_override_internal (false)
24621 || (cur_tree = build_target_option_node (&global_options,
24622 &global_options_set))
24623 == NULL_TREE)
24624 {
24625 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
24626 fprintf (stderr, "invalid pragma\n");
24627
24628 return false;
24629 }
24630 }
24631
24632 target_option_current_node = cur_tree;
24633 rs6000_activate_target_options (target_option_current_node);
24634
24635 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
24636 change the macros that are defined. */
24637 if (rs6000_target_modify_macros_ptr)
24638 {
24639 prev_opt = TREE_TARGET_OPTION (prev_tree);
24640 prev_flags = prev_opt->x_rs6000_isa_flags;
24641
24642 cur_opt = TREE_TARGET_OPTION (cur_tree);
24643 cur_flags = cur_opt->x_rs6000_isa_flags;
24644
24645 diff_flags = (prev_flags ^ cur_flags);
24646
24647 if (diff_flags != 0)
24648 {
24649 /* Delete old macros. */
24650 rs6000_target_modify_macros_ptr (false,
24651 prev_flags & diff_flags);
24652
24653 /* Define new macros. */
24654 rs6000_target_modify_macros_ptr (true,
24655 cur_flags & diff_flags);
24656 }
24657 }
24658
24659 return true;
24660 }
24661
24662 \f
24663 /* Remember the last target of rs6000_set_current_function. */
24664 static GTY(()) tree rs6000_previous_fndecl;
24665
24666 /* Restore target's globals from NEW_TREE and invalidate the
24667 rs6000_previous_fndecl cache. */
24668
24669 void
24670 rs6000_activate_target_options (tree new_tree)
24671 {
24672 cl_target_option_restore (&global_options, &global_options_set,
24673 TREE_TARGET_OPTION (new_tree));
24674 if (TREE_TARGET_GLOBALS (new_tree))
24675 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
24676 else if (new_tree == target_option_default_node)
24677 restore_target_globals (&default_target_globals);
24678 else
24679 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
24680 rs6000_previous_fndecl = NULL_TREE;
24681 }
24682
24683 /* Establish appropriate back-end context for processing the function
24684 FNDECL. The argument might be NULL to indicate processing at top
24685 level, outside of any function scope. */
24686 static void
24687 rs6000_set_current_function (tree fndecl)
24688 {
24689 if (TARGET_DEBUG_TARGET)
24690 {
24691 fprintf (stderr, "\n==================== rs6000_set_current_function");
24692
24693 if (fndecl)
24694 fprintf (stderr, ", fndecl %s (%p)",
24695 (DECL_NAME (fndecl)
24696 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
24697 : "<unknown>"), (void *)fndecl);
24698
24699 if (rs6000_previous_fndecl)
24700 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
24701
24702 fprintf (stderr, "\n");
24703 }
24704
24705 /* Only change the context if the function changes. This hook is called
24706 several times in the course of compiling a function, and we don't want to
24707 slow things down too much or call target_reinit when it isn't safe. */
24708 if (fndecl == rs6000_previous_fndecl)
24709 return;
24710
24711 tree old_tree;
24712 if (rs6000_previous_fndecl == NULL_TREE)
24713 old_tree = target_option_current_node;
24714 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
24715 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
24716 else
24717 old_tree = target_option_default_node;
24718
24719 tree new_tree;
24720 if (fndecl == NULL_TREE)
24721 {
24722 if (old_tree != target_option_current_node)
24723 new_tree = target_option_current_node;
24724 else
24725 new_tree = NULL_TREE;
24726 }
24727 else
24728 {
24729 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
24730 if (new_tree == NULL_TREE)
24731 new_tree = target_option_default_node;
24732 }
24733
24734 if (TARGET_DEBUG_TARGET)
24735 {
24736 if (new_tree)
24737 {
24738 fprintf (stderr, "\nnew fndecl target specific options:\n");
24739 debug_tree (new_tree);
24740 }
24741
24742 if (old_tree)
24743 {
24744 fprintf (stderr, "\nold fndecl target specific options:\n");
24745 debug_tree (old_tree);
24746 }
24747
24748 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
24749 fprintf (stderr, "--------------------\n");
24750 }
24751
24752 if (new_tree && old_tree != new_tree)
24753 rs6000_activate_target_options (new_tree);
24754
24755 if (fndecl)
24756 rs6000_previous_fndecl = fndecl;
24757 }
24758
24759 \f
24760 /* Save the current options */
24761
24762 static void
24763 rs6000_function_specific_save (struct cl_target_option *ptr,
24764 struct gcc_options *opts,
24765 struct gcc_options */* opts_set */)
24766 {
24767 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
24768 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
24769 }
24770
24771 /* Restore the current options */
24772
24773 static void
24774 rs6000_function_specific_restore (struct gcc_options *opts,
24775 struct gcc_options */* opts_set */,
24776 struct cl_target_option *ptr)
24777
24778 {
24779 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
24780 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
24781 (void) rs6000_option_override_internal (false);
24782 }
24783
24784 /* Print the current options */
24785
24786 static void
24787 rs6000_function_specific_print (FILE *file, int indent,
24788 struct cl_target_option *ptr)
24789 {
24790 rs6000_print_isa_options (file, indent, "Isa options set",
24791 ptr->x_rs6000_isa_flags);
24792
24793 rs6000_print_isa_options (file, indent, "Isa options explicit",
24794 ptr->x_rs6000_isa_flags_explicit);
24795 }
24796
24797 /* Helper function to print the current isa or misc options on a line. */
24798
24799 static void
24800 rs6000_print_options_internal (FILE *file,
24801 int indent,
24802 const char *string,
24803 HOST_WIDE_INT flags,
24804 const char *prefix,
24805 const struct rs6000_opt_mask *opts,
24806 size_t num_elements)
24807 {
24808 size_t i;
24809 size_t start_column = 0;
24810 size_t cur_column;
24811 size_t max_column = 120;
24812 size_t prefix_len = strlen (prefix);
24813 size_t comma_len = 0;
24814 const char *comma = "";
24815
24816 if (indent)
24817 start_column += fprintf (file, "%*s", indent, "");
24818
24819 if (!flags)
24820 {
24821 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
24822 return;
24823 }
24824
24825 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
24826
24827 /* Print the various mask options. */
24828 cur_column = start_column;
24829 for (i = 0; i < num_elements; i++)
24830 {
24831 bool invert = opts[i].invert;
24832 const char *name = opts[i].name;
24833 const char *no_str = "";
24834 HOST_WIDE_INT mask = opts[i].mask;
24835 size_t len = comma_len + prefix_len + strlen (name);
24836
24837 if (!invert)
24838 {
24839 if ((flags & mask) == 0)
24840 {
24841 no_str = "no-";
24842 len += strlen ("no-");
24843 }
24844
24845 flags &= ~mask;
24846 }
24847
24848 else
24849 {
24850 if ((flags & mask) != 0)
24851 {
24852 no_str = "no-";
24853 len += strlen ("no-");
24854 }
24855
24856 flags |= mask;
24857 }
24858
24859 cur_column += len;
24860 if (cur_column > max_column)
24861 {
24862 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
24863 cur_column = start_column + len;
24864 comma = "";
24865 }
24866
24867 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
24868 comma = ", ";
24869 comma_len = strlen (", ");
24870 }
24871
24872 fputs ("\n", file);
24873 }
24874
24875 /* Helper function to print the current isa options on a line. */
24876
24877 static void
24878 rs6000_print_isa_options (FILE *file, int indent, const char *string,
24879 HOST_WIDE_INT flags)
24880 {
24881 rs6000_print_options_internal (file, indent, string, flags, "-m",
24882 &rs6000_opt_masks[0],
24883 ARRAY_SIZE (rs6000_opt_masks));
24884 }
24885
24886 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
24887 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
24888 -mupper-regs-df, etc.).
24889
24890 If the user used -mno-power8-vector, we need to turn off all of the implicit
24891 ISA 2.07 and 3.0 options that relate to the vector unit.
24892
24893 If the user used -mno-power9-vector, we need to turn off all of the implicit
24894 ISA 3.0 options that relate to the vector unit.
24895
24896 This function does not handle explicit options such as the user specifying
24897 -mdirect-move. These are handled in rs6000_option_override_internal, and
24898 the appropriate error is given if needed.
24899
24900 We return a mask of all of the implicit options that should not be enabled
24901 by default. */
24902
24903 static HOST_WIDE_INT
24904 rs6000_disable_incompatible_switches (void)
24905 {
24906 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
24907 size_t i, j;
24908
24909 static const struct {
24910 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
24911 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
24912 const char *const name; /* name of the switch. */
24913 } flags[] = {
24914 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
24915 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
24916 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
24917 { OPTION_MASK_ALTIVEC, OTHER_ALTIVEC_MASKS, "altivec" },
24918 };
24919
24920 for (i = 0; i < ARRAY_SIZE (flags); i++)
24921 {
24922 HOST_WIDE_INT no_flag = flags[i].no_flag;
24923
24924 if ((rs6000_isa_flags & no_flag) == 0
24925 && (rs6000_isa_flags_explicit & no_flag) != 0)
24926 {
24927 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
24928 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
24929 & rs6000_isa_flags
24930 & dep_flags);
24931
24932 if (set_flags)
24933 {
24934 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
24935 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
24936 {
24937 set_flags &= ~rs6000_opt_masks[j].mask;
24938 error ("%<-mno-%s%> turns off %<-m%s%>",
24939 flags[i].name,
24940 rs6000_opt_masks[j].name);
24941 }
24942
24943 gcc_assert (!set_flags);
24944 }
24945
24946 rs6000_isa_flags &= ~dep_flags;
24947 ignore_masks |= no_flag | dep_flags;
24948 }
24949 }
24950
24951 return ignore_masks;
24952 }
24953
24954 \f
24955 /* Helper function for printing the function name when debugging. */
24956
24957 static const char *
24958 get_decl_name (tree fn)
24959 {
24960 tree name;
24961
24962 if (!fn)
24963 return "<null>";
24964
24965 name = DECL_NAME (fn);
24966 if (!name)
24967 return "<no-name>";
24968
24969 return IDENTIFIER_POINTER (name);
24970 }
24971
24972 /* Return the clone id of the target we are compiling code for in a target
24973 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
24974 the priority list for the target clones (ordered from lowest to
24975 highest). */
24976
24977 static int
24978 rs6000_clone_priority (tree fndecl)
24979 {
24980 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
24981 HOST_WIDE_INT isa_masks;
24982 int ret = CLONE_DEFAULT;
24983 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
24984 const char *attrs_str = NULL;
24985
24986 attrs = TREE_VALUE (TREE_VALUE (attrs));
24987 attrs_str = TREE_STRING_POINTER (attrs);
24988
24989 /* Return priority zero for default function. Return the ISA needed for the
24990 function if it is not the default. */
24991 if (strcmp (attrs_str, "default") != 0)
24992 {
24993 if (fn_opts == NULL_TREE)
24994 fn_opts = target_option_default_node;
24995
24996 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
24997 isa_masks = rs6000_isa_flags;
24998 else
24999 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
25000
25001 for (ret = CLONE_MAX - 1; ret != 0; ret--)
25002 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
25003 break;
25004 }
25005
25006 if (TARGET_DEBUG_TARGET)
25007 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
25008 get_decl_name (fndecl), ret);
25009
25010 return ret;
25011 }
25012
25013 /* This compares the priority of target features in function DECL1 and DECL2.
25014 It returns positive value if DECL1 is higher priority, negative value if
25015 DECL2 is higher priority and 0 if they are the same. Note, priorities are
25016 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
25017
25018 static int
25019 rs6000_compare_version_priority (tree decl1, tree decl2)
25020 {
25021 int priority1 = rs6000_clone_priority (decl1);
25022 int priority2 = rs6000_clone_priority (decl2);
25023 int ret = priority1 - priority2;
25024
25025 if (TARGET_DEBUG_TARGET)
25026 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
25027 get_decl_name (decl1), get_decl_name (decl2), ret);
25028
25029 return ret;
25030 }
25031
25032 /* Make a dispatcher declaration for the multi-versioned function DECL.
25033 Calls to DECL function will be replaced with calls to the dispatcher
25034 by the front-end. Returns the decl of the dispatcher function. */
25035
25036 static tree
25037 rs6000_get_function_versions_dispatcher (void *decl)
25038 {
25039 tree fn = (tree) decl;
25040 struct cgraph_node *node = NULL;
25041 struct cgraph_node *default_node = NULL;
25042 struct cgraph_function_version_info *node_v = NULL;
25043 struct cgraph_function_version_info *first_v = NULL;
25044
25045 tree dispatch_decl = NULL;
25046
25047 struct cgraph_function_version_info *default_version_info = NULL;
25048 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
25049
25050 if (TARGET_DEBUG_TARGET)
25051 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
25052 get_decl_name (fn));
25053
25054 node = cgraph_node::get (fn);
25055 gcc_assert (node != NULL);
25056
25057 node_v = node->function_version ();
25058 gcc_assert (node_v != NULL);
25059
25060 if (node_v->dispatcher_resolver != NULL)
25061 return node_v->dispatcher_resolver;
25062
25063 /* Find the default version and make it the first node. */
25064 first_v = node_v;
25065 /* Go to the beginning of the chain. */
25066 while (first_v->prev != NULL)
25067 first_v = first_v->prev;
25068
25069 default_version_info = first_v;
25070 while (default_version_info != NULL)
25071 {
25072 const tree decl2 = default_version_info->this_node->decl;
25073 if (is_function_default_version (decl2))
25074 break;
25075 default_version_info = default_version_info->next;
25076 }
25077
25078 /* If there is no default node, just return NULL. */
25079 if (default_version_info == NULL)
25080 return NULL;
25081
25082 /* Make default info the first node. */
25083 if (first_v != default_version_info)
25084 {
25085 default_version_info->prev->next = default_version_info->next;
25086 if (default_version_info->next)
25087 default_version_info->next->prev = default_version_info->prev;
25088 first_v->prev = default_version_info;
25089 default_version_info->next = first_v;
25090 default_version_info->prev = NULL;
25091 }
25092
25093 default_node = default_version_info->this_node;
25094
25095 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
25096 error_at (DECL_SOURCE_LOCATION (default_node->decl),
25097 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
25098 "exports hardware capability bits");
25099 #else
25100
25101 if (targetm.has_ifunc_p ())
25102 {
25103 struct cgraph_function_version_info *it_v = NULL;
25104 struct cgraph_node *dispatcher_node = NULL;
25105 struct cgraph_function_version_info *dispatcher_version_info = NULL;
25106
25107 /* Right now, the dispatching is done via ifunc. */
25108 dispatch_decl = make_dispatcher_decl (default_node->decl);
25109 TREE_NOTHROW (dispatch_decl) = TREE_NOTHROW (fn);
25110
25111 dispatcher_node = cgraph_node::get_create (dispatch_decl);
25112 gcc_assert (dispatcher_node != NULL);
25113 dispatcher_node->dispatcher_function = 1;
25114 dispatcher_version_info
25115 = dispatcher_node->insert_new_function_version ();
25116 dispatcher_version_info->next = default_version_info;
25117 dispatcher_node->definition = 1;
25118
25119 /* Set the dispatcher for all the versions. */
25120 it_v = default_version_info;
25121 while (it_v != NULL)
25122 {
25123 it_v->dispatcher_resolver = dispatch_decl;
25124 it_v = it_v->next;
25125 }
25126 }
25127 else
25128 {
25129 error_at (DECL_SOURCE_LOCATION (default_node->decl),
25130 "multiversioning needs %<ifunc%> which is not supported "
25131 "on this target");
25132 }
25133 #endif
25134
25135 return dispatch_decl;
25136 }
25137
25138 /* Make the resolver function decl to dispatch the versions of a multi-
25139 versioned function, DEFAULT_DECL. Create an empty basic block in the
25140 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
25141 function. */
25142
25143 static tree
25144 make_resolver_func (const tree default_decl,
25145 const tree dispatch_decl,
25146 basic_block *empty_bb)
25147 {
25148 /* Make the resolver function static. The resolver function returns
25149 void *. */
25150 tree decl_name = clone_function_name (default_decl, "resolver");
25151 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
25152 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
25153 tree decl = build_fn_decl (resolver_name, type);
25154 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
25155
25156 DECL_NAME (decl) = decl_name;
25157 TREE_USED (decl) = 1;
25158 DECL_ARTIFICIAL (decl) = 1;
25159 DECL_IGNORED_P (decl) = 0;
25160 TREE_PUBLIC (decl) = 0;
25161 DECL_UNINLINABLE (decl) = 1;
25162
25163 /* Resolver is not external, body is generated. */
25164 DECL_EXTERNAL (decl) = 0;
25165 DECL_EXTERNAL (dispatch_decl) = 0;
25166
25167 DECL_CONTEXT (decl) = NULL_TREE;
25168 DECL_INITIAL (decl) = make_node (BLOCK);
25169 DECL_STATIC_CONSTRUCTOR (decl) = 0;
25170
25171 if (DECL_COMDAT_GROUP (default_decl)
25172 || TREE_PUBLIC (default_decl))
25173 {
25174 /* In this case, each translation unit with a call to this
25175 versioned function will put out a resolver. Ensure it
25176 is comdat to keep just one copy. */
25177 DECL_COMDAT (decl) = 1;
25178 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
25179 }
25180 else
25181 TREE_PUBLIC (dispatch_decl) = 0;
25182
25183 /* Build result decl and add to function_decl. */
25184 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
25185 DECL_CONTEXT (t) = decl;
25186 DECL_ARTIFICIAL (t) = 1;
25187 DECL_IGNORED_P (t) = 1;
25188 DECL_RESULT (decl) = t;
25189
25190 gimplify_function_tree (decl);
25191 push_cfun (DECL_STRUCT_FUNCTION (decl));
25192 *empty_bb = init_lowered_empty_function (decl, false,
25193 profile_count::uninitialized ());
25194
25195 cgraph_node::add_new_function (decl, true);
25196 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
25197
25198 pop_cfun ();
25199
25200 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
25201 DECL_ATTRIBUTES (dispatch_decl)
25202 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
25203
25204 cgraph_node::create_same_body_alias (dispatch_decl, decl);
25205
25206 return decl;
25207 }
25208
25209 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
25210 return a pointer to VERSION_DECL if we are running on a machine that
25211 supports the index CLONE_ISA hardware architecture bits. This function will
25212 be called during version dispatch to decide which function version to
25213 execute. It returns the basic block at the end, to which more conditions
25214 can be added. */
25215
25216 static basic_block
25217 add_condition_to_bb (tree function_decl, tree version_decl,
25218 int clone_isa, basic_block new_bb)
25219 {
25220 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
25221
25222 gcc_assert (new_bb != NULL);
25223 gimple_seq gseq = bb_seq (new_bb);
25224
25225
25226 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
25227 build_fold_addr_expr (version_decl));
25228 tree result_var = create_tmp_var (ptr_type_node);
25229 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
25230 gimple *return_stmt = gimple_build_return (result_var);
25231
25232 if (clone_isa == CLONE_DEFAULT)
25233 {
25234 gimple_seq_add_stmt (&gseq, convert_stmt);
25235 gimple_seq_add_stmt (&gseq, return_stmt);
25236 set_bb_seq (new_bb, gseq);
25237 gimple_set_bb (convert_stmt, new_bb);
25238 gimple_set_bb (return_stmt, new_bb);
25239 pop_cfun ();
25240 return new_bb;
25241 }
25242
25243 tree bool_zero = build_int_cst (bool_int_type_node, 0);
25244 tree cond_var = create_tmp_var (bool_int_type_node);
25245 tree predicate_decl = rs6000_builtin_decls[(int) RS6000_BIF_CPU_SUPPORTS];
25246 const char *arg_str = rs6000_clone_map[clone_isa].name;
25247 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
25248 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
25249 gimple_call_set_lhs (call_cond_stmt, cond_var);
25250
25251 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
25252 gimple_set_bb (call_cond_stmt, new_bb);
25253 gimple_seq_add_stmt (&gseq, call_cond_stmt);
25254
25255 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
25256 NULL_TREE, NULL_TREE);
25257 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
25258 gimple_set_bb (if_else_stmt, new_bb);
25259 gimple_seq_add_stmt (&gseq, if_else_stmt);
25260
25261 gimple_seq_add_stmt (&gseq, convert_stmt);
25262 gimple_seq_add_stmt (&gseq, return_stmt);
25263 set_bb_seq (new_bb, gseq);
25264
25265 basic_block bb1 = new_bb;
25266 edge e12 = split_block (bb1, if_else_stmt);
25267 basic_block bb2 = e12->dest;
25268 e12->flags &= ~EDGE_FALLTHRU;
25269 e12->flags |= EDGE_TRUE_VALUE;
25270
25271 edge e23 = split_block (bb2, return_stmt);
25272 gimple_set_bb (convert_stmt, bb2);
25273 gimple_set_bb (return_stmt, bb2);
25274
25275 basic_block bb3 = e23->dest;
25276 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
25277
25278 remove_edge (e23);
25279 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
25280
25281 pop_cfun ();
25282 return bb3;
25283 }
25284
25285 /* This function generates the dispatch function for multi-versioned functions.
25286 DISPATCH_DECL is the function which will contain the dispatch logic.
25287 FNDECLS are the function choices for dispatch, and is a tree chain.
25288 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
25289 code is generated. */
25290
25291 static int
25292 dispatch_function_versions (tree dispatch_decl,
25293 void *fndecls_p,
25294 basic_block *empty_bb)
25295 {
25296 int ix;
25297 tree ele;
25298 vec<tree> *fndecls;
25299 tree clones[CLONE_MAX];
25300
25301 if (TARGET_DEBUG_TARGET)
25302 fputs ("dispatch_function_versions, top\n", stderr);
25303
25304 gcc_assert (dispatch_decl != NULL
25305 && fndecls_p != NULL
25306 && empty_bb != NULL);
25307
25308 /* fndecls_p is actually a vector. */
25309 fndecls = static_cast<vec<tree> *> (fndecls_p);
25310
25311 /* At least one more version other than the default. */
25312 gcc_assert (fndecls->length () >= 2);
25313
25314 /* The first version in the vector is the default decl. */
25315 memset ((void *) clones, '\0', sizeof (clones));
25316 clones[CLONE_DEFAULT] = (*fndecls)[0];
25317
25318 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
25319 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
25320 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
25321 recent glibc. If we ever need to call __builtin_cpu_init, we would need
25322 to insert the code here to do the call. */
25323
25324 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
25325 {
25326 int priority = rs6000_clone_priority (ele);
25327 if (!clones[priority])
25328 clones[priority] = ele;
25329 }
25330
25331 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
25332 if (clones[ix])
25333 {
25334 if (TARGET_DEBUG_TARGET)
25335 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
25336 ix, get_decl_name (clones[ix]));
25337
25338 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
25339 *empty_bb);
25340 }
25341
25342 return 0;
25343 }
25344
25345 /* Generate the dispatching code body to dispatch multi-versioned function
25346 DECL. The target hook is called to process the "target" attributes and
25347 provide the code to dispatch the right function at run-time. NODE points
25348 to the dispatcher decl whose body will be created. */
25349
25350 static tree
25351 rs6000_generate_version_dispatcher_body (void *node_p)
25352 {
25353 tree resolver;
25354 basic_block empty_bb;
25355 struct cgraph_node *node = (cgraph_node *) node_p;
25356 struct cgraph_function_version_info *ninfo = node->function_version ();
25357
25358 if (ninfo->dispatcher_resolver)
25359 return ninfo->dispatcher_resolver;
25360
25361 /* node is going to be an alias, so remove the finalized bit. */
25362 node->definition = false;
25363
25364 /* The first version in the chain corresponds to the default version. */
25365 ninfo->dispatcher_resolver = resolver
25366 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
25367
25368 if (TARGET_DEBUG_TARGET)
25369 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
25370 get_decl_name (resolver));
25371
25372 push_cfun (DECL_STRUCT_FUNCTION (resolver));
25373 auto_vec<tree, 2> fn_ver_vec;
25374
25375 for (struct cgraph_function_version_info *vinfo = ninfo->next;
25376 vinfo;
25377 vinfo = vinfo->next)
25378 {
25379 struct cgraph_node *version = vinfo->this_node;
25380 /* Check for virtual functions here again, as by this time it should
25381 have been determined if this function needs a vtable index or
25382 not. This happens for methods in derived classes that override
25383 virtual methods in base classes but are not explicitly marked as
25384 virtual. */
25385 if (DECL_VINDEX (version->decl))
25386 sorry ("Virtual function multiversioning not supported");
25387
25388 fn_ver_vec.safe_push (version->decl);
25389 }
25390
25391 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
25392 cgraph_edge::rebuild_edges ();
25393 pop_cfun ();
25394 return resolver;
25395 }
25396
25397 /* Hook to decide if we need to scan function gimple statements to
25398 collect target specific information for inlining, and update the
25399 corresponding RS6000_FN_TARGET_INFO_* bit in INFO if we are able
25400 to predict which ISA feature is used at this time. Return true
25401 if we need to scan, otherwise return false. */
25402
25403 static bool
25404 rs6000_need_ipa_fn_target_info (const_tree decl,
25405 unsigned int &info ATTRIBUTE_UNUSED)
25406 {
25407 tree target = DECL_FUNCTION_SPECIFIC_TARGET (decl);
25408 if (!target)
25409 target = target_option_default_node;
25410 struct cl_target_option *opts = TREE_TARGET_OPTION (target);
25411
25412 /* See PR102059, we only handle HTM for now, so will only do
25413 the consequent scannings when HTM feature enabled. */
25414 if (opts->x_rs6000_isa_flags & OPTION_MASK_HTM)
25415 return true;
25416
25417 return false;
25418 }
25419
25420 /* Hook to update target specific information INFO for inlining by
25421 checking the given STMT. Return false if we don't need to scan
25422 any more, otherwise return true. */
25423
25424 static bool
25425 rs6000_update_ipa_fn_target_info (unsigned int &info, const gimple *stmt)
25426 {
25427 /* Assume inline asm can use any instruction features. */
25428 if (gimple_code (stmt) == GIMPLE_ASM)
25429 {
25430 /* Should set any bits we concerned, for now OPTION_MASK_HTM is
25431 the only bit we care about. */
25432 info |= RS6000_FN_TARGET_INFO_HTM;
25433 return false;
25434 }
25435 else if (gimple_code (stmt) == GIMPLE_CALL)
25436 {
25437 tree fndecl = gimple_call_fndecl (stmt);
25438 if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD))
25439 {
25440 enum rs6000_gen_builtins fcode
25441 = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl);
25442 /* HTM bifs definitely exploit HTM insns. */
25443 if (bif_is_htm (rs6000_builtin_info[fcode]))
25444 {
25445 info |= RS6000_FN_TARGET_INFO_HTM;
25446 return false;
25447 }
25448 }
25449 }
25450
25451 return true;
25452 }
25453
25454 /* Hook to determine if one function can safely inline another. */
25455
25456 static bool
25457 rs6000_can_inline_p (tree caller, tree callee)
25458 {
25459 bool ret = false;
25460 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
25461 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
25462
25463 /* If the callee has no option attributes, then it is ok to inline. */
25464 if (!callee_tree)
25465 ret = true;
25466
25467 else
25468 {
25469 HOST_WIDE_INT caller_isa;
25470 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
25471 HOST_WIDE_INT callee_isa = callee_opts->x_rs6000_isa_flags;
25472 HOST_WIDE_INT explicit_isa = callee_opts->x_rs6000_isa_flags_explicit;
25473
25474 /* If the caller has option attributes, then use them.
25475 Otherwise, use the command line options. */
25476 if (caller_tree)
25477 caller_isa = TREE_TARGET_OPTION (caller_tree)->x_rs6000_isa_flags;
25478 else
25479 caller_isa = rs6000_isa_flags;
25480
25481 cgraph_node *callee_node = cgraph_node::get (callee);
25482 if (ipa_fn_summaries && ipa_fn_summaries->get (callee_node) != NULL)
25483 {
25484 unsigned int info = ipa_fn_summaries->get (callee_node)->target_info;
25485 if ((info & RS6000_FN_TARGET_INFO_HTM) == 0)
25486 {
25487 callee_isa &= ~OPTION_MASK_HTM;
25488 explicit_isa &= ~OPTION_MASK_HTM;
25489 }
25490 }
25491
25492 /* Ignore -mpower8-fusion and -mpower10-fusion options for inlining
25493 purposes. */
25494 callee_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
25495 explicit_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
25496
25497 /* The callee's options must be a subset of the caller's options, i.e.
25498 a vsx function may inline an altivec function, but a no-vsx function
25499 must not inline a vsx function. However, for those options that the
25500 callee has explicitly enabled or disabled, then we must enforce that
25501 the callee's and caller's options match exactly; see PR70010. */
25502 if (((caller_isa & callee_isa) == callee_isa)
25503 && (caller_isa & explicit_isa) == (callee_isa & explicit_isa))
25504 ret = true;
25505 }
25506
25507 if (TARGET_DEBUG_TARGET)
25508 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
25509 get_decl_name (caller), get_decl_name (callee),
25510 (ret ? "can" : "cannot"));
25511
25512 return ret;
25513 }
25514 \f
25515 /* Allocate a stack temp and fixup the address so it meets the particular
25516 memory requirements (either offetable or REG+REG addressing). */
25517
25518 rtx
25519 rs6000_allocate_stack_temp (machine_mode mode,
25520 bool offsettable_p,
25521 bool reg_reg_p)
25522 {
25523 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
25524 rtx addr = XEXP (stack, 0);
25525 int strict_p = reload_completed;
25526
25527 if (!legitimate_indirect_address_p (addr, strict_p))
25528 {
25529 if (offsettable_p
25530 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
25531 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
25532
25533 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
25534 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
25535 }
25536
25537 return stack;
25538 }
25539
25540 /* Given a memory reference, if it is not a reg or reg+reg addressing,
25541 convert to such a form to deal with memory reference instructions
25542 like STFIWX and LDBRX that only take reg+reg addressing. */
25543
25544 rtx
25545 rs6000_force_indexed_or_indirect_mem (rtx x)
25546 {
25547 machine_mode mode = GET_MODE (x);
25548
25549 gcc_assert (MEM_P (x));
25550 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode))
25551 {
25552 rtx addr = XEXP (x, 0);
25553 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
25554 {
25555 rtx reg = XEXP (addr, 0);
25556 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
25557 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
25558 gcc_assert (REG_P (reg));
25559 emit_insn (gen_add3_insn (reg, reg, size_rtx));
25560 addr = reg;
25561 }
25562 else if (GET_CODE (addr) == PRE_MODIFY)
25563 {
25564 rtx reg = XEXP (addr, 0);
25565 rtx expr = XEXP (addr, 1);
25566 gcc_assert (REG_P (reg));
25567 gcc_assert (GET_CODE (expr) == PLUS);
25568 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
25569 addr = reg;
25570 }
25571
25572 if (GET_CODE (addr) == PLUS)
25573 {
25574 rtx op0 = XEXP (addr, 0);
25575 rtx op1 = XEXP (addr, 1);
25576 op0 = force_reg (Pmode, op0);
25577 op1 = force_reg (Pmode, op1);
25578 x = replace_equiv_address (x, gen_rtx_PLUS (Pmode, op0, op1));
25579 }
25580 else
25581 x = replace_equiv_address (x, force_reg (Pmode, addr));
25582 }
25583
25584 return x;
25585 }
25586
25587 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
25588
25589 On the RS/6000, all integer constants are acceptable, most won't be valid
25590 for particular insns, though. Only easy FP constants are acceptable. */
25591
25592 static bool
25593 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
25594 {
25595 if (TARGET_ELF && tls_referenced_p (x))
25596 return false;
25597
25598 if (CONST_DOUBLE_P (x))
25599 return easy_fp_constant (x, mode);
25600
25601 if (GET_CODE (x) == CONST_VECTOR)
25602 return easy_vector_constant (x, mode);
25603
25604 return true;
25605 }
25606
25607 #if TARGET_AIX_OS
25608 /* Implement TARGET_PRECOMPUTE_TLS_P.
25609
25610 On the AIX, TLS symbols are in the TOC, which is maintained in the
25611 constant pool. AIX TOC TLS symbols need to be pre-computed, but
25612 must be considered legitimate constants. */
25613
25614 static bool
25615 rs6000_aix_precompute_tls_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
25616 {
25617 return tls_referenced_p (x);
25618 }
25619 #endif
25620
25621 \f
25622 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
25623
25624 static bool
25625 chain_already_loaded (rtx_insn *last)
25626 {
25627 for (; last != NULL; last = PREV_INSN (last))
25628 {
25629 if (NONJUMP_INSN_P (last))
25630 {
25631 rtx patt = PATTERN (last);
25632
25633 if (GET_CODE (patt) == SET)
25634 {
25635 rtx lhs = XEXP (patt, 0);
25636
25637 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
25638 return true;
25639 }
25640 }
25641 }
25642 return false;
25643 }
25644
25645 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
25646
25647 void
25648 rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25649 {
25650 rtx func = func_desc;
25651 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
25652 rtx toc_load = NULL_RTX;
25653 rtx toc_restore = NULL_RTX;
25654 rtx func_addr;
25655 rtx abi_reg = NULL_RTX;
25656 rtx call[5];
25657 int n_call;
25658 rtx insn;
25659 bool is_pltseq_longcall;
25660
25661 if (global_tlsarg)
25662 tlsarg = global_tlsarg;
25663
25664 /* Handle longcall attributes. */
25665 is_pltseq_longcall = false;
25666 if ((INTVAL (cookie) & CALL_LONG) != 0
25667 && GET_CODE (func_desc) == SYMBOL_REF)
25668 {
25669 func = rs6000_longcall_ref (func_desc, tlsarg);
25670 if (TARGET_PLTSEQ)
25671 is_pltseq_longcall = true;
25672 }
25673
25674 /* Handle indirect calls. */
25675 if (!SYMBOL_REF_P (func)
25676 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
25677 {
25678 if (!rs6000_pcrel_p ())
25679 {
25680 /* Save the TOC into its reserved slot before the call,
25681 and prepare to restore it after the call. */
25682 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
25683 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
25684 gen_rtvec (1, stack_toc_offset),
25685 UNSPEC_TOCSLOT);
25686 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
25687
25688 /* Can we optimize saving the TOC in the prologue or
25689 do we need to do it at every call? */
25690 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
25691 cfun->machine->save_toc_in_prologue = true;
25692 else
25693 {
25694 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25695 rtx stack_toc_mem = gen_frame_mem (Pmode,
25696 gen_rtx_PLUS (Pmode, stack_ptr,
25697 stack_toc_offset));
25698 MEM_VOLATILE_P (stack_toc_mem) = 1;
25699 if (is_pltseq_longcall)
25700 {
25701 rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg);
25702 rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25703 emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg));
25704 }
25705 else
25706 emit_move_insn (stack_toc_mem, toc_reg);
25707 }
25708 }
25709
25710 if (DEFAULT_ABI == ABI_ELFv2)
25711 {
25712 /* A function pointer in the ELFv2 ABI is just a plain address, but
25713 the ABI requires it to be loaded into r12 before the call. */
25714 func_addr = gen_rtx_REG (Pmode, 12);
25715 emit_move_insn (func_addr, func);
25716 abi_reg = func_addr;
25717 /* Indirect calls via CTR are strongly preferred over indirect
25718 calls via LR, so move the address there. Needed to mark
25719 this insn for linker plt sequence editing too. */
25720 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25721 if (is_pltseq_longcall)
25722 {
25723 rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg);
25724 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25725 emit_insn (gen_rtx_SET (func_addr, mark_func));
25726 v = gen_rtvec (2, func_addr, func_desc);
25727 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25728 }
25729 else
25730 emit_move_insn (func_addr, abi_reg);
25731 }
25732 else
25733 {
25734 /* A function pointer under AIX is a pointer to a data area whose
25735 first word contains the actual address of the function, whose
25736 second word contains a pointer to its TOC, and whose third word
25737 contains a value to place in the static chain register (r11).
25738 Note that if we load the static chain, our "trampoline" need
25739 not have any executable code. */
25740
25741 /* Load up address of the actual function. */
25742 func = force_reg (Pmode, func);
25743 func_addr = gen_reg_rtx (Pmode);
25744 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func));
25745
25746 /* Indirect calls via CTR are strongly preferred over indirect
25747 calls via LR, so move the address there. */
25748 rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO);
25749 emit_move_insn (ctr_reg, func_addr);
25750 func_addr = ctr_reg;
25751
25752 /* Prepare to load the TOC of the called function. Note that the
25753 TOC load must happen immediately before the actual call so
25754 that unwinding the TOC registers works correctly. See the
25755 comment in frob_update_context. */
25756 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
25757 rtx func_toc_mem = gen_rtx_MEM (Pmode,
25758 gen_rtx_PLUS (Pmode, func,
25759 func_toc_offset));
25760 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
25761
25762 /* If we have a static chain, load it up. But, if the call was
25763 originally direct, the 3rd word has not been written since no
25764 trampoline has been built, so we ought not to load it, lest we
25765 override a static chain value. */
25766 if (!(GET_CODE (func_desc) == SYMBOL_REF
25767 && SYMBOL_REF_FUNCTION_P (func_desc))
25768 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
25769 && !chain_already_loaded (get_current_sequence ()->next->last))
25770 {
25771 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
25772 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
25773 rtx func_sc_mem = gen_rtx_MEM (Pmode,
25774 gen_rtx_PLUS (Pmode, func,
25775 func_sc_offset));
25776 emit_move_insn (sc_reg, func_sc_mem);
25777 abi_reg = sc_reg;
25778 }
25779 }
25780 }
25781 else
25782 {
25783 /* No TOC register needed for calls from PC-relative callers. */
25784 if (!rs6000_pcrel_p ())
25785 /* Direct calls use the TOC: for local calls, the callee will
25786 assume the TOC register is set; for non-local calls, the
25787 PLT stub needs the TOC register. */
25788 abi_reg = toc_reg;
25789 func_addr = func;
25790 }
25791
25792 /* Create the call. */
25793 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25794 if (value != NULL_RTX)
25795 call[0] = gen_rtx_SET (value, call[0]);
25796 call[1] = gen_rtx_USE (VOIDmode, cookie);
25797 n_call = 2;
25798
25799 if (toc_load)
25800 call[n_call++] = toc_load;
25801 if (toc_restore)
25802 call[n_call++] = toc_restore;
25803
25804 call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
25805
25806 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
25807 insn = emit_call_insn (insn);
25808
25809 /* Mention all registers defined by the ABI to hold information
25810 as uses in CALL_INSN_FUNCTION_USAGE. */
25811 if (abi_reg)
25812 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
25813 }
25814
25815 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
25816
25817 void
25818 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25819 {
25820 rtx call[2];
25821 rtx insn;
25822 rtx r12 = NULL_RTX;
25823 rtx func_addr = func_desc;
25824
25825 if (global_tlsarg)
25826 tlsarg = global_tlsarg;
25827
25828 /* Handle longcall attributes. */
25829 if (INTVAL (cookie) & CALL_LONG && SYMBOL_REF_P (func_desc))
25830 {
25831 /* PCREL can do a sibling call to a longcall function
25832 because we don't need to restore the TOC register. */
25833 gcc_assert (rs6000_pcrel_p ());
25834 func_desc = rs6000_longcall_ref (func_desc, tlsarg);
25835 }
25836 else
25837 gcc_assert (INTVAL (cookie) == 0);
25838
25839 /* For ELFv2, r12 and CTR need to hold the function address
25840 for an indirect call. */
25841 if (GET_CODE (func_desc) != SYMBOL_REF && DEFAULT_ABI == ABI_ELFv2)
25842 {
25843 r12 = gen_rtx_REG (Pmode, 12);
25844 emit_move_insn (r12, func_desc);
25845 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25846 emit_move_insn (func_addr, r12);
25847 }
25848
25849 /* Create the call. */
25850 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25851 if (value != NULL_RTX)
25852 call[0] = gen_rtx_SET (value, call[0]);
25853
25854 call[1] = simple_return_rtx;
25855
25856 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
25857 insn = emit_call_insn (insn);
25858
25859 /* Note use of the TOC register. */
25860 if (!rs6000_pcrel_p ())
25861 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
25862 gen_rtx_REG (Pmode, TOC_REGNUM));
25863
25864 /* Note use of r12. */
25865 if (r12)
25866 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r12);
25867 }
25868
25869 /* Expand code to perform a call under the SYSV4 ABI. */
25870
25871 void
25872 rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25873 {
25874 rtx func = func_desc;
25875 rtx func_addr;
25876 rtx call[4];
25877 rtx insn;
25878 rtx abi_reg = NULL_RTX;
25879 int n;
25880
25881 if (global_tlsarg)
25882 tlsarg = global_tlsarg;
25883
25884 /* Handle longcall attributes. */
25885 if ((INTVAL (cookie) & CALL_LONG) != 0
25886 && GET_CODE (func_desc) == SYMBOL_REF)
25887 {
25888 func = rs6000_longcall_ref (func_desc, tlsarg);
25889 /* If the longcall was implemented as an inline PLT call using
25890 PLT unspecs then func will be REG:r11. If not, func will be
25891 a pseudo reg. The inline PLT call sequence supports lazy
25892 linking (and longcalls to functions in dlopen'd libraries).
25893 The other style of longcalls don't. The lazy linking entry
25894 to the dynamic symbol resolver requires r11 be the function
25895 address (as it is for linker generated PLT stubs). Ensure
25896 r11 stays valid to the bctrl by marking r11 used by the call. */
25897 if (TARGET_PLTSEQ)
25898 abi_reg = func;
25899 }
25900
25901 /* Handle indirect calls. */
25902 if (GET_CODE (func) != SYMBOL_REF)
25903 {
25904 func = force_reg (Pmode, func);
25905
25906 /* Indirect calls via CTR are strongly preferred over indirect
25907 calls via LR, so move the address there. That can't be left
25908 to reload because we want to mark every instruction in an
25909 inline PLT call sequence with a reloc, enabling the linker to
25910 edit the sequence back to a direct call when that makes sense. */
25911 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25912 if (abi_reg)
25913 {
25914 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
25915 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25916 emit_insn (gen_rtx_SET (func_addr, mark_func));
25917 v = gen_rtvec (2, func_addr, func_desc);
25918 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25919 }
25920 else
25921 emit_move_insn (func_addr, func);
25922 }
25923 else
25924 func_addr = func;
25925
25926 /* Create the call. */
25927 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25928 if (value != NULL_RTX)
25929 call[0] = gen_rtx_SET (value, call[0]);
25930
25931 call[1] = gen_rtx_USE (VOIDmode, cookie);
25932 n = 2;
25933 if (TARGET_SECURE_PLT
25934 && flag_pic
25935 && GET_CODE (func_addr) == SYMBOL_REF
25936 && !SYMBOL_REF_LOCAL_P (func_addr))
25937 call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx);
25938
25939 call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
25940
25941 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call));
25942 insn = emit_call_insn (insn);
25943 if (abi_reg)
25944 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
25945 }
25946
25947 /* Expand code to perform a sibling call under the SysV4 ABI. */
25948
25949 void
25950 rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25951 {
25952 rtx func = func_desc;
25953 rtx func_addr;
25954 rtx call[3];
25955 rtx insn;
25956 rtx abi_reg = NULL_RTX;
25957
25958 if (global_tlsarg)
25959 tlsarg = global_tlsarg;
25960
25961 /* Handle longcall attributes. */
25962 if ((INTVAL (cookie) & CALL_LONG) != 0
25963 && GET_CODE (func_desc) == SYMBOL_REF)
25964 {
25965 func = rs6000_longcall_ref (func_desc, tlsarg);
25966 /* If the longcall was implemented as an inline PLT call using
25967 PLT unspecs then func will be REG:r11. If not, func will be
25968 a pseudo reg. The inline PLT call sequence supports lazy
25969 linking (and longcalls to functions in dlopen'd libraries).
25970 The other style of longcalls don't. The lazy linking entry
25971 to the dynamic symbol resolver requires r11 be the function
25972 address (as it is for linker generated PLT stubs). Ensure
25973 r11 stays valid to the bctr by marking r11 used by the call. */
25974 if (TARGET_PLTSEQ)
25975 abi_reg = func;
25976 }
25977
25978 /* Handle indirect calls. */
25979 if (GET_CODE (func) != SYMBOL_REF)
25980 {
25981 func = force_reg (Pmode, func);
25982
25983 /* Indirect sibcalls must go via CTR. That can't be left to
25984 reload because we want to mark every instruction in an inline
25985 PLT call sequence with a reloc, enabling the linker to edit
25986 the sequence back to a direct call when that makes sense. */
25987 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25988 if (abi_reg)
25989 {
25990 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
25991 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25992 emit_insn (gen_rtx_SET (func_addr, mark_func));
25993 v = gen_rtvec (2, func_addr, func_desc);
25994 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25995 }
25996 else
25997 emit_move_insn (func_addr, func);
25998 }
25999 else
26000 func_addr = func;
26001
26002 /* Create the call. */
26003 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
26004 if (value != NULL_RTX)
26005 call[0] = gen_rtx_SET (value, call[0]);
26006
26007 call[1] = gen_rtx_USE (VOIDmode, cookie);
26008 call[2] = simple_return_rtx;
26009
26010 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
26011 insn = emit_call_insn (insn);
26012 if (abi_reg)
26013 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
26014 }
26015
26016 #if TARGET_MACHO
26017
26018 /* Expand code to perform a call under the Darwin ABI.
26019 Modulo handling of mlongcall, this is much the same as sysv.
26020 if/when the longcall optimisation is removed, we could drop this
26021 code and use the sysv case (taking care to avoid the tls stuff).
26022
26023 We can use this for sibcalls too, if needed. */
26024
26025 void
26026 rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg,
26027 rtx cookie, bool sibcall)
26028 {
26029 rtx func = func_desc;
26030 rtx func_addr;
26031 rtx call[3];
26032 rtx insn;
26033 int cookie_val = INTVAL (cookie);
26034 bool make_island = false;
26035
26036 /* Handle longcall attributes, there are two cases for Darwin:
26037 1) Newer linkers are capable of synthesising any branch islands needed.
26038 2) We need a helper branch island synthesised by the compiler.
26039 The second case has mostly been retired and we don't use it for m64.
26040 In fact, it's is an optimisation, we could just indirect as sysv does..
26041 ... however, backwards compatibility for now.
26042 If we're going to use this, then we need to keep the CALL_LONG bit set,
26043 so that we can pick up the special insn form later. */
26044 if ((cookie_val & CALL_LONG) != 0
26045 && GET_CODE (func_desc) == SYMBOL_REF)
26046 {
26047 /* FIXME: the longcall opt should not hang off this flag, it is most
26048 likely incorrect for kernel-mode code-generation. */
26049 if (darwin_symbol_stubs && TARGET_32BIT)
26050 make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */
26051 else
26052 {
26053 /* The linker is capable of doing this, but the user explicitly
26054 asked for -mlongcall, so we'll do the 'normal' version. */
26055 func = rs6000_longcall_ref (func_desc, NULL_RTX);
26056 cookie_val &= ~CALL_LONG; /* Handled, zap it. */
26057 }
26058 }
26059
26060 /* Handle indirect calls. */
26061 if (GET_CODE (func) != SYMBOL_REF)
26062 {
26063 func = force_reg (Pmode, func);
26064
26065 /* Indirect calls via CTR are strongly preferred over indirect
26066 calls via LR, and are required for indirect sibcalls, so move
26067 the address there. */
26068 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
26069 emit_move_insn (func_addr, func);
26070 }
26071 else
26072 func_addr = func;
26073
26074 /* Create the call. */
26075 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
26076 if (value != NULL_RTX)
26077 call[0] = gen_rtx_SET (value, call[0]);
26078
26079 call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val));
26080
26081 if (sibcall)
26082 call[2] = simple_return_rtx;
26083 else
26084 call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO);
26085
26086 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
26087 insn = emit_call_insn (insn);
26088 /* Now we have the debug info in the insn, we can set up the branch island
26089 if we're using one. */
26090 if (make_island)
26091 {
26092 tree funname = get_identifier (XSTR (func_desc, 0));
26093
26094 if (no_previous_def (funname))
26095 {
26096 rtx label_rtx = gen_label_rtx ();
26097 char *label_buf, temp_buf[256];
26098 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
26099 CODE_LABEL_NUMBER (label_rtx));
26100 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
26101 tree labelname = get_identifier (label_buf);
26102 add_compiler_branch_island (labelname, funname,
26103 insn_line ((const rtx_insn*)insn));
26104 }
26105 }
26106 }
26107 #endif
26108
26109 void
26110 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
26111 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
26112 {
26113 #if TARGET_MACHO
26114 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false);
26115 #else
26116 gcc_unreachable();
26117 #endif
26118 }
26119
26120
26121 void
26122 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
26123 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
26124 {
26125 #if TARGET_MACHO
26126 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true);
26127 #else
26128 gcc_unreachable();
26129 #endif
26130 }
26131
26132 /* Return whether we should generate PC-relative code for FNDECL. */
26133 bool
26134 rs6000_fndecl_pcrel_p (const_tree fndecl)
26135 {
26136 if (DEFAULT_ABI != ABI_ELFv2)
26137 return false;
26138
26139 struct cl_target_option *opts = target_opts_for_fn (fndecl);
26140
26141 return ((opts->x_rs6000_isa_flags & OPTION_MASK_PCREL) != 0
26142 && TARGET_CMODEL == CMODEL_MEDIUM);
26143 }
26144
26145 /* Return whether we should generate PC-relative code for *FN. */
26146 bool
26147 rs6000_function_pcrel_p (struct function *fn)
26148 {
26149 if (DEFAULT_ABI != ABI_ELFv2)
26150 return false;
26151
26152 /* Optimize usual case. */
26153 if (fn == cfun)
26154 return ((rs6000_isa_flags & OPTION_MASK_PCREL) != 0
26155 && TARGET_CMODEL == CMODEL_MEDIUM);
26156
26157 return rs6000_fndecl_pcrel_p (fn->decl);
26158 }
26159
26160 /* Return whether we should generate PC-relative code for the current
26161 function. */
26162 bool
26163 rs6000_pcrel_p ()
26164 {
26165 return (DEFAULT_ABI == ABI_ELFv2
26166 && (rs6000_isa_flags & OPTION_MASK_PCREL) != 0
26167 && TARGET_CMODEL == CMODEL_MEDIUM);
26168 }
26169
26170 \f
26171 /* Given an address (ADDR), a mode (MODE), and what the format of the
26172 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
26173 for the address. */
26174
26175 enum insn_form
26176 address_to_insn_form (rtx addr,
26177 machine_mode mode,
26178 enum non_prefixed_form non_prefixed_format)
26179 {
26180 /* Single register is easy. */
26181 if (REG_P (addr) || SUBREG_P (addr))
26182 return INSN_FORM_BASE_REG;
26183
26184 /* If the non prefixed instruction format doesn't support offset addressing,
26185 make sure only indexed addressing is allowed.
26186
26187 We special case SDmode so that the register allocator does not try to move
26188 SDmode through GPR registers, but instead uses the 32-bit integer load and
26189 store instructions for the floating point registers. */
26190 if (non_prefixed_format == NON_PREFIXED_X || (mode == SDmode && TARGET_DFP))
26191 {
26192 if (GET_CODE (addr) != PLUS)
26193 return INSN_FORM_BAD;
26194
26195 rtx op0 = XEXP (addr, 0);
26196 rtx op1 = XEXP (addr, 1);
26197 if (!REG_P (op0) && !SUBREG_P (op0))
26198 return INSN_FORM_BAD;
26199
26200 if (!REG_P (op1) && !SUBREG_P (op1))
26201 return INSN_FORM_BAD;
26202
26203 return INSN_FORM_X;
26204 }
26205
26206 /* Deal with update forms. */
26207 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
26208 return INSN_FORM_UPDATE;
26209
26210 /* Handle PC-relative symbols and labels. Check for both local and
26211 external symbols. Assume labels are always local. TLS symbols
26212 are not PC-relative for rs6000. */
26213 if (TARGET_PCREL)
26214 {
26215 if (LABEL_REF_P (addr))
26216 return INSN_FORM_PCREL_LOCAL;
26217
26218 if (SYMBOL_REF_P (addr) && !SYMBOL_REF_TLS_MODEL (addr))
26219 {
26220 if (!SYMBOL_REF_LOCAL_P (addr))
26221 return INSN_FORM_PCREL_EXTERNAL;
26222 else
26223 return INSN_FORM_PCREL_LOCAL;
26224 }
26225 }
26226
26227 if (GET_CODE (addr) == CONST)
26228 addr = XEXP (addr, 0);
26229
26230 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
26231 if (GET_CODE (addr) == LO_SUM)
26232 return INSN_FORM_LO_SUM;
26233
26234 /* Everything below must be an offset address of some form. */
26235 if (GET_CODE (addr) != PLUS)
26236 return INSN_FORM_BAD;
26237
26238 rtx op0 = XEXP (addr, 0);
26239 rtx op1 = XEXP (addr, 1);
26240
26241 /* Check for indexed addresses. */
26242 if (REG_P (op1) || SUBREG_P (op1))
26243 {
26244 if (REG_P (op0) || SUBREG_P (op0))
26245 return INSN_FORM_X;
26246
26247 return INSN_FORM_BAD;
26248 }
26249
26250 if (!CONST_INT_P (op1))
26251 return INSN_FORM_BAD;
26252
26253 HOST_WIDE_INT offset = INTVAL (op1);
26254 if (!SIGNED_INTEGER_34BIT_P (offset))
26255 return INSN_FORM_BAD;
26256
26257 /* Check for local and external PC-relative addresses. Labels are always
26258 local. TLS symbols are not PC-relative for rs6000. */
26259 if (TARGET_PCREL)
26260 {
26261 if (LABEL_REF_P (op0))
26262 return INSN_FORM_PCREL_LOCAL;
26263
26264 if (SYMBOL_REF_P (op0) && !SYMBOL_REF_TLS_MODEL (op0))
26265 {
26266 if (!SYMBOL_REF_LOCAL_P (op0))
26267 return INSN_FORM_PCREL_EXTERNAL;
26268 else
26269 return INSN_FORM_PCREL_LOCAL;
26270 }
26271 }
26272
26273 /* If it isn't PC-relative, the address must use a base register. */
26274 if (!REG_P (op0) && !SUBREG_P (op0))
26275 return INSN_FORM_BAD;
26276
26277 /* Large offsets must be prefixed. */
26278 if (!SIGNED_INTEGER_16BIT_P (offset))
26279 {
26280 if (TARGET_PREFIXED)
26281 return INSN_FORM_PREFIXED_NUMERIC;
26282
26283 return INSN_FORM_BAD;
26284 }
26285
26286 /* We have a 16-bit offset, see what default instruction format to use. */
26287 if (non_prefixed_format == NON_PREFIXED_DEFAULT)
26288 {
26289 unsigned size = GET_MODE_SIZE (mode);
26290
26291 /* On 64-bit systems, assume 64-bit integers need to use DS form
26292 addresses (for LD/STD). VSX vectors need to use DQ form addresses
26293 (for LXV and STXV). TImode is problematical in that its normal usage
26294 is expected to be GPRs where it wants a DS instruction format, but if
26295 it goes into the vector registers, it wants a DQ instruction
26296 format. */
26297 if (TARGET_POWERPC64 && size >= 8 && GET_MODE_CLASS (mode) == MODE_INT)
26298 non_prefixed_format = NON_PREFIXED_DS;
26299
26300 else if (TARGET_VSX && size >= 16
26301 && (VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode)))
26302 non_prefixed_format = NON_PREFIXED_DQ;
26303
26304 else
26305 non_prefixed_format = NON_PREFIXED_D;
26306 }
26307
26308 /* Classify the D/DS/DQ-form addresses. */
26309 switch (non_prefixed_format)
26310 {
26311 /* Instruction format D, all 16 bits are valid. */
26312 case NON_PREFIXED_D:
26313 return INSN_FORM_D;
26314
26315 /* Instruction format DS, bottom 2 bits must be 0. */
26316 case NON_PREFIXED_DS:
26317 if ((offset & 3) == 0)
26318 return INSN_FORM_DS;
26319
26320 else if (TARGET_PREFIXED)
26321 return INSN_FORM_PREFIXED_NUMERIC;
26322
26323 else
26324 return INSN_FORM_BAD;
26325
26326 /* Instruction format DQ, bottom 4 bits must be 0. */
26327 case NON_PREFIXED_DQ:
26328 if ((offset & 15) == 0)
26329 return INSN_FORM_DQ;
26330
26331 else if (TARGET_PREFIXED)
26332 return INSN_FORM_PREFIXED_NUMERIC;
26333
26334 else
26335 return INSN_FORM_BAD;
26336
26337 default:
26338 break;
26339 }
26340
26341 return INSN_FORM_BAD;
26342 }
26343
26344 /* Given address rtx ADDR for a load of MODE, is this legitimate for a
26345 non-prefixed D-form or X-form instruction? NON_PREFIXED_FORMAT is
26346 given NON_PREFIXED_D or NON_PREFIXED_DS to indicate whether we want
26347 a D-form or DS-form instruction. X-form and base_reg are always
26348 allowed. */
26349 bool
26350 address_is_non_pfx_d_or_x (rtx addr, machine_mode mode,
26351 enum non_prefixed_form non_prefixed_format)
26352 {
26353 enum insn_form result_form;
26354
26355 result_form = address_to_insn_form (addr, mode, non_prefixed_format);
26356
26357 switch (non_prefixed_format)
26358 {
26359 case NON_PREFIXED_D:
26360 switch (result_form)
26361 {
26362 case INSN_FORM_X:
26363 case INSN_FORM_D:
26364 case INSN_FORM_DS:
26365 case INSN_FORM_BASE_REG:
26366 return true;
26367 default:
26368 return false;
26369 }
26370 break;
26371 case NON_PREFIXED_DS:
26372 switch (result_form)
26373 {
26374 case INSN_FORM_X:
26375 case INSN_FORM_DS:
26376 case INSN_FORM_BASE_REG:
26377 return true;
26378 default:
26379 return false;
26380 }
26381 break;
26382 default:
26383 break;
26384 }
26385 return false;
26386 }
26387
26388 /* Return true if an REG with a given MODE is loaded from or stored into a MEM
26389 location uses a non-prefixed D/DS/DQ-form address. This is used to validate
26390 the load or store with the PCREL_OPT optimization to make sure it is an
26391 instruction that can be optimized.
26392
26393 We need to specify the MODE separately from the REG to allow for loads that
26394 include zero/sign/float extension. */
26395
26396 bool
26397 pcrel_opt_valid_mem_p (rtx reg, machine_mode mode, rtx mem)
26398 {
26399 /* If the instruction is indexed only like LFIWAX/LXSIWAX we cannot do the
26400 PCREL_OPT optimization. */
26401 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mode);
26402 if (non_prefixed == NON_PREFIXED_X)
26403 return false;
26404
26405 /* Check if this is a non-prefixed D/DS/DQ-form instruction. */
26406 rtx addr = XEXP (mem, 0);
26407 enum insn_form iform = address_to_insn_form (addr, mode, non_prefixed);
26408 return (iform == INSN_FORM_BASE_REG
26409 || iform == INSN_FORM_D
26410 || iform == INSN_FORM_DS
26411 || iform == INSN_FORM_DQ);
26412 }
26413
26414 /* Helper function to see if we're potentially looking at lfs/stfs.
26415 - PARALLEL containing a SET and a CLOBBER
26416 - stfs:
26417 - SET is from UNSPEC_SI_FROM_SF to MEM:SI
26418 - CLOBBER is a V4SF
26419 - lfs:
26420 - SET is from UNSPEC_SF_FROM_SI to REG:SF
26421 - CLOBBER is a DI
26422 */
26423
26424 static bool
26425 is_lfs_stfs_insn (rtx_insn *insn)
26426 {
26427 rtx pattern = PATTERN (insn);
26428 if (GET_CODE (pattern) != PARALLEL)
26429 return false;
26430
26431 /* This should be a parallel with exactly one set and one clobber. */
26432 if (XVECLEN (pattern, 0) != 2)
26433 return false;
26434
26435 rtx set = XVECEXP (pattern, 0, 0);
26436 if (GET_CODE (set) != SET)
26437 return false;
26438
26439 rtx clobber = XVECEXP (pattern, 0, 1);
26440 if (GET_CODE (clobber) != CLOBBER)
26441 return false;
26442
26443 /* All we care is that the destination of the SET is a mem:SI,
26444 the source should be an UNSPEC_SI_FROM_SF, and the clobber
26445 should be a scratch:V4SF. */
26446
26447 rtx dest = SET_DEST (set);
26448 rtx src = SET_SRC (set);
26449 rtx scratch = SET_DEST (clobber);
26450
26451 if (GET_CODE (src) != UNSPEC)
26452 return false;
26453
26454 /* stfs case. */
26455 if (XINT (src, 1) == UNSPEC_SI_FROM_SF
26456 && GET_CODE (dest) == MEM && GET_MODE (dest) == SImode
26457 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == V4SFmode)
26458 return true;
26459
26460 /* lfs case. */
26461 if (XINT (src, 1) == UNSPEC_SF_FROM_SI
26462 && GET_CODE (dest) == REG && GET_MODE (dest) == SFmode
26463 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == DImode)
26464 return true;
26465
26466 return false;
26467 }
26468
26469 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
26470 instruction format (D/DS/DQ) used for offset memory. */
26471
26472 enum non_prefixed_form
26473 reg_to_non_prefixed (rtx reg, machine_mode mode)
26474 {
26475 /* If it isn't a register, use the defaults. */
26476 if (!REG_P (reg) && !SUBREG_P (reg))
26477 return NON_PREFIXED_DEFAULT;
26478
26479 unsigned int r = reg_or_subregno (reg);
26480
26481 /* If we have a pseudo, use the default instruction format. */
26482 if (!HARD_REGISTER_NUM_P (r))
26483 return NON_PREFIXED_DEFAULT;
26484
26485 unsigned size = GET_MODE_SIZE (mode);
26486
26487 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
26488 128-bit floating point, and 128-bit integers. Before power9, only indexed
26489 addressing was available for vectors. */
26490 if (FP_REGNO_P (r))
26491 {
26492 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
26493 return NON_PREFIXED_D;
26494
26495 else if (size < 8)
26496 return NON_PREFIXED_X;
26497
26498 else if (TARGET_VSX && size >= 16
26499 && (VECTOR_MODE_P (mode)
26500 || VECTOR_ALIGNMENT_P (mode)
26501 || mode == TImode || mode == CTImode))
26502 return (TARGET_P9_VECTOR) ? NON_PREFIXED_DQ : NON_PREFIXED_X;
26503
26504 else
26505 return NON_PREFIXED_DEFAULT;
26506 }
26507
26508 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
26509 128-bit floating point, and 128-bit integers. Before power9, only indexed
26510 addressing was available. */
26511 else if (ALTIVEC_REGNO_P (r))
26512 {
26513 if (!TARGET_P9_VECTOR)
26514 return NON_PREFIXED_X;
26515
26516 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
26517 return NON_PREFIXED_DS;
26518
26519 else if (size < 8)
26520 return NON_PREFIXED_X;
26521
26522 else if (TARGET_VSX && size >= 16
26523 && (VECTOR_MODE_P (mode)
26524 || VECTOR_ALIGNMENT_P (mode)
26525 || mode == TImode || mode == CTImode))
26526 return NON_PREFIXED_DQ;
26527
26528 else
26529 return NON_PREFIXED_DEFAULT;
26530 }
26531
26532 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
26533 otherwise. Assume that any other register, such as LR, CRs, etc. will go
26534 through the GPR registers for memory operations. */
26535 else if (TARGET_POWERPC64 && size >= 8)
26536 return NON_PREFIXED_DS;
26537
26538 return NON_PREFIXED_D;
26539 }
26540
26541 \f
26542 /* Whether a load instruction is a prefixed instruction. This is called from
26543 the prefixed attribute processing. */
26544
26545 bool
26546 prefixed_load_p (rtx_insn *insn)
26547 {
26548 /* Validate the insn to make sure it is a normal load insn. */
26549 extract_insn_cached (insn);
26550 if (recog_data.n_operands < 2)
26551 return false;
26552
26553 rtx reg = recog_data.operand[0];
26554 rtx mem = recog_data.operand[1];
26555
26556 if (!REG_P (reg) && !SUBREG_P (reg))
26557 return false;
26558
26559 if (!MEM_P (mem))
26560 return false;
26561
26562 /* Prefixed load instructions do not support update or indexed forms. */
26563 if (get_attr_indexed (insn) == INDEXED_YES
26564 || get_attr_update (insn) == UPDATE_YES)
26565 return false;
26566
26567 /* LWA uses the DS format instead of the D format that LWZ uses. */
26568 enum non_prefixed_form non_prefixed;
26569 machine_mode reg_mode = GET_MODE (reg);
26570 machine_mode mem_mode = GET_MODE (mem);
26571
26572 if (mem_mode == SImode && reg_mode == DImode
26573 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
26574 non_prefixed = NON_PREFIXED_DS;
26575
26576 else
26577 non_prefixed = reg_to_non_prefixed (reg, mem_mode);
26578
26579 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
26580 return address_is_prefixed (XEXP (mem, 0), mem_mode, NON_PREFIXED_DEFAULT);
26581 else
26582 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
26583 }
26584
26585 /* Whether a store instruction is a prefixed instruction. This is called from
26586 the prefixed attribute processing. */
26587
26588 bool
26589 prefixed_store_p (rtx_insn *insn)
26590 {
26591 /* Validate the insn to make sure it is a normal store insn. */
26592 extract_insn_cached (insn);
26593 if (recog_data.n_operands < 2)
26594 return false;
26595
26596 rtx mem = recog_data.operand[0];
26597 rtx reg = recog_data.operand[1];
26598
26599 if (!REG_P (reg) && !SUBREG_P (reg))
26600 return false;
26601
26602 if (!MEM_P (mem))
26603 return false;
26604
26605 /* Prefixed store instructions do not support update or indexed forms. */
26606 if (get_attr_indexed (insn) == INDEXED_YES
26607 || get_attr_update (insn) == UPDATE_YES)
26608 return false;
26609
26610 machine_mode mem_mode = GET_MODE (mem);
26611 rtx addr = XEXP (mem, 0);
26612 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode);
26613
26614 /* Need to make sure we aren't looking at a stfs which doesn't look
26615 like the other things reg_to_non_prefixed/address_is_prefixed
26616 looks for. */
26617 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
26618 return address_is_prefixed (addr, mem_mode, NON_PREFIXED_DEFAULT);
26619 else
26620 return address_is_prefixed (addr, mem_mode, non_prefixed);
26621 }
26622
26623 /* Whether a load immediate or add instruction is a prefixed instruction. This
26624 is called from the prefixed attribute processing. */
26625
26626 bool
26627 prefixed_paddi_p (rtx_insn *insn)
26628 {
26629 rtx set = single_set (insn);
26630 if (!set)
26631 return false;
26632
26633 rtx dest = SET_DEST (set);
26634 rtx src = SET_SRC (set);
26635
26636 if (!REG_P (dest) && !SUBREG_P (dest))
26637 return false;
26638
26639 /* Is this a load immediate that can't be done with a simple ADDI or
26640 ADDIS? */
26641 if (CONST_INT_P (src))
26642 return (satisfies_constraint_eI (src)
26643 && !satisfies_constraint_I (src)
26644 && !satisfies_constraint_L (src));
26645
26646 /* Is this a PADDI instruction that can't be done with a simple ADDI or
26647 ADDIS? */
26648 if (GET_CODE (src) == PLUS)
26649 {
26650 rtx op1 = XEXP (src, 1);
26651
26652 return (CONST_INT_P (op1)
26653 && satisfies_constraint_eI (op1)
26654 && !satisfies_constraint_I (op1)
26655 && !satisfies_constraint_L (op1));
26656 }
26657
26658 /* If not, is it a load of a PC-relative address? */
26659 if (!TARGET_PCREL || GET_MODE (dest) != Pmode)
26660 return false;
26661
26662 if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST)
26663 return false;
26664
26665 enum insn_form iform = address_to_insn_form (src, Pmode,
26666 NON_PREFIXED_DEFAULT);
26667
26668 return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL);
26669 }
26670
26671 /* Whether the next instruction needs a 'p' prefix issued before the
26672 instruction is printed out. */
26673 static bool prepend_p_to_next_insn;
26674
26675 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
26676 outputting the assembler code. On the PowerPC, we remember if the current
26677 insn is a prefixed insn where we need to emit a 'p' before the insn.
26678
26679 In addition, if the insn is part of a PC-relative reference to an external
26680 label optimization, this is recorded also. */
26681 void
26682 rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
26683 {
26684 prepend_p_to_next_insn = (get_attr_maybe_prefixed (insn)
26685 == MAYBE_PREFIXED_YES
26686 && get_attr_prefixed (insn) == PREFIXED_YES);
26687 return;
26688 }
26689
26690 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
26691 We use it to emit a 'p' for prefixed insns that is set in
26692 FINAL_PRESCAN_INSN. */
26693 void
26694 rs6000_asm_output_opcode (FILE *stream)
26695 {
26696 if (prepend_p_to_next_insn)
26697 {
26698 fprintf (stream, "p");
26699
26700 /* Reset the flag in the case where there are separate insn lines in the
26701 sequence, so the 'p' is only emitted for the first line. This shows up
26702 when we are doing the PCREL_OPT optimization, in that the label created
26703 with %r<n> would have a leading 'p' printed. */
26704 prepend_p_to_next_insn = false;
26705 }
26706
26707 return;
26708 }
26709
26710 /* Emit the relocation to tie the next instruction to a previous instruction
26711 that loads up an external address. This is used to do the PCREL_OPT
26712 optimization. Note, the label is generated after the PLD of the got
26713 pc-relative address to allow for the assembler to insert NOPs before the PLD
26714 instruction. The operand is a constant integer that is the label
26715 number. */
26716
26717 void
26718 output_pcrel_opt_reloc (rtx label_num)
26719 {
26720 rtx operands[1] = { label_num };
26721 output_asm_insn (".reloc .Lpcrel%0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%0-8)",
26722 operands);
26723 }
26724
26725 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
26726 should be adjusted to reflect any required changes. This macro is used when
26727 there is some systematic length adjustment required that would be difficult
26728 to express in the length attribute.
26729
26730 In the PowerPC, we use this to adjust the length of an instruction if one or
26731 more prefixed instructions are generated, using the attribute
26732 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
26733 hardware requires that a prefied instruciton does not cross a 64-byte
26734 boundary. This means the compiler has to assume the length of the first
26735 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
26736 already set for the non-prefixed instruction, we just need to udpate for the
26737 difference. */
26738
26739 int
26740 rs6000_adjust_insn_length (rtx_insn *insn, int length)
26741 {
26742 if (TARGET_PREFIXED && NONJUMP_INSN_P (insn))
26743 {
26744 rtx pattern = PATTERN (insn);
26745 if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER
26746 && get_attr_prefixed (insn) == PREFIXED_YES)
26747 {
26748 int num_prefixed = get_attr_max_prefixed_insns (insn);
26749 length += 4 * (num_prefixed + 1);
26750 }
26751 }
26752
26753 return length;
26754 }
26755
26756 \f
26757 #ifdef HAVE_GAS_HIDDEN
26758 # define USE_HIDDEN_LINKONCE 1
26759 #else
26760 # define USE_HIDDEN_LINKONCE 0
26761 #endif
26762
26763 /* Fills in the label name that should be used for a 476 link stack thunk. */
26764
26765 void
26766 get_ppc476_thunk_name (char name[32])
26767 {
26768 gcc_assert (TARGET_LINK_STACK);
26769
26770 if (USE_HIDDEN_LINKONCE)
26771 sprintf (name, "__ppc476.get_thunk");
26772 else
26773 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
26774 }
26775
26776 /* This function emits the simple thunk routine that is used to preserve
26777 the link stack on the 476 cpu. */
26778
26779 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
26780 static void
26781 rs6000_code_end (void)
26782 {
26783 char name[32];
26784 tree decl;
26785
26786 if (!TARGET_LINK_STACK)
26787 return;
26788
26789 get_ppc476_thunk_name (name);
26790
26791 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
26792 build_function_type_list (void_type_node, NULL_TREE));
26793 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
26794 NULL_TREE, void_type_node);
26795 TREE_PUBLIC (decl) = 1;
26796 TREE_STATIC (decl) = 1;
26797
26798 #if RS6000_WEAK
26799 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
26800 {
26801 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
26802 targetm.asm_out.unique_section (decl, 0);
26803 switch_to_section (get_named_section (decl, NULL, 0));
26804 DECL_WEAK (decl) = 1;
26805 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
26806 targetm.asm_out.globalize_label (asm_out_file, name);
26807 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
26808 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
26809 }
26810 else
26811 #endif
26812 {
26813 switch_to_section (text_section);
26814 ASM_OUTPUT_LABEL (asm_out_file, name);
26815 }
26816
26817 DECL_INITIAL (decl) = make_node (BLOCK);
26818 current_function_decl = decl;
26819 allocate_struct_function (decl, false);
26820 init_function_start (decl);
26821 first_function_block_is_cold = false;
26822 /* Make sure unwind info is emitted for the thunk if needed. */
26823 final_start_function (emit_barrier (), asm_out_file, 1);
26824
26825 fputs ("\tblr\n", asm_out_file);
26826
26827 final_end_function ();
26828 init_insn_lengths ();
26829 free_after_compilation (cfun);
26830 set_cfun (NULL);
26831 current_function_decl = NULL;
26832 }
26833
26834 /* Add r30 to hard reg set if the prologue sets it up and it is not
26835 pic_offset_table_rtx. */
26836
26837 static void
26838 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
26839 {
26840 if (!TARGET_SINGLE_PIC_BASE
26841 && TARGET_TOC
26842 && TARGET_MINIMAL_TOC
26843 && !constant_pool_empty_p ())
26844 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
26845 if (cfun->machine->split_stack_argp_used)
26846 add_to_hard_reg_set (&set->set, Pmode, 12);
26847
26848 /* Make sure the hard reg set doesn't include r2, which was possibly added
26849 via PIC_OFFSET_TABLE_REGNUM. */
26850 if (TARGET_TOC)
26851 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
26852 }
26853
26854 \f
26855 /* Helper function for rs6000_split_logical to emit a logical instruction after
26856 spliting the operation to single GPR registers.
26857
26858 DEST is the destination register.
26859 OP1 and OP2 are the input source registers.
26860 CODE is the base operation (AND, IOR, XOR, NOT).
26861 MODE is the machine mode.
26862 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26863 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26864 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
26865
26866 static void
26867 rs6000_split_logical_inner (rtx dest,
26868 rtx op1,
26869 rtx op2,
26870 enum rtx_code code,
26871 machine_mode mode,
26872 bool complement_final_p,
26873 bool complement_op1_p,
26874 bool complement_op2_p)
26875 {
26876 rtx bool_rtx;
26877
26878 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
26879 if (op2 && CONST_INT_P (op2)
26880 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
26881 && !complement_final_p && !complement_op1_p && !complement_op2_p)
26882 {
26883 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
26884 HOST_WIDE_INT value = INTVAL (op2) & mask;
26885
26886 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
26887 if (code == AND)
26888 {
26889 if (value == 0)
26890 {
26891 emit_insn (gen_rtx_SET (dest, const0_rtx));
26892 return;
26893 }
26894
26895 else if (value == mask)
26896 {
26897 if (!rtx_equal_p (dest, op1))
26898 emit_insn (gen_rtx_SET (dest, op1));
26899 return;
26900 }
26901 }
26902
26903 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
26904 into separate ORI/ORIS or XORI/XORIS instrucitons. */
26905 else if (code == IOR || code == XOR)
26906 {
26907 if (value == 0)
26908 {
26909 if (!rtx_equal_p (dest, op1))
26910 emit_insn (gen_rtx_SET (dest, op1));
26911 return;
26912 }
26913 }
26914 }
26915
26916 if (code == AND && mode == SImode
26917 && !complement_final_p && !complement_op1_p && !complement_op2_p)
26918 {
26919 emit_insn (gen_andsi3 (dest, op1, op2));
26920 return;
26921 }
26922
26923 if (complement_op1_p)
26924 op1 = gen_rtx_NOT (mode, op1);
26925
26926 if (complement_op2_p)
26927 op2 = gen_rtx_NOT (mode, op2);
26928
26929 /* For canonical RTL, if only one arm is inverted it is the first. */
26930 if (!complement_op1_p && complement_op2_p)
26931 std::swap (op1, op2);
26932
26933 bool_rtx = ((code == NOT)
26934 ? gen_rtx_NOT (mode, op1)
26935 : gen_rtx_fmt_ee (code, mode, op1, op2));
26936
26937 if (complement_final_p)
26938 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
26939
26940 emit_insn (gen_rtx_SET (dest, bool_rtx));
26941 }
26942
26943 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
26944 operations are split immediately during RTL generation to allow for more
26945 optimizations of the AND/IOR/XOR.
26946
26947 OPERANDS is an array containing the destination and two input operands.
26948 CODE is the base operation (AND, IOR, XOR, NOT).
26949 MODE is the machine mode.
26950 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26951 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26952 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
26953 CLOBBER_REG is either NULL or a scratch register of type CC to allow
26954 formation of the AND instructions. */
26955
26956 static void
26957 rs6000_split_logical_di (rtx operands[3],
26958 enum rtx_code code,
26959 bool complement_final_p,
26960 bool complement_op1_p,
26961 bool complement_op2_p)
26962 {
26963 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
26964 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
26965 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
26966 enum hi_lo { hi = 0, lo = 1 };
26967 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
26968 size_t i;
26969
26970 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
26971 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
26972 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
26973 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
26974
26975 if (code == NOT)
26976 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
26977 else
26978 {
26979 if (!CONST_INT_P (operands[2]))
26980 {
26981 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
26982 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
26983 }
26984 else
26985 {
26986 HOST_WIDE_INT value = INTVAL (operands[2]);
26987 HOST_WIDE_INT value_hi_lo[2];
26988
26989 gcc_assert (!complement_final_p);
26990 gcc_assert (!complement_op1_p);
26991 gcc_assert (!complement_op2_p);
26992
26993 value_hi_lo[hi] = value >> 32;
26994 value_hi_lo[lo] = value & lower_32bits;
26995
26996 for (i = 0; i < 2; i++)
26997 {
26998 HOST_WIDE_INT sub_value = value_hi_lo[i];
26999
27000 if (sub_value & sign_bit)
27001 sub_value |= upper_32bits;
27002
27003 op2_hi_lo[i] = GEN_INT (sub_value);
27004
27005 /* If this is an AND instruction, check to see if we need to load
27006 the value in a register. */
27007 if (code == AND && sub_value != -1 && sub_value != 0
27008 && !and_operand (op2_hi_lo[i], SImode))
27009 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
27010 }
27011 }
27012 }
27013
27014 for (i = 0; i < 2; i++)
27015 {
27016 /* Split large IOR/XOR operations. */
27017 if ((code == IOR || code == XOR)
27018 && CONST_INT_P (op2_hi_lo[i])
27019 && !complement_final_p
27020 && !complement_op1_p
27021 && !complement_op2_p
27022 && !logical_const_operand (op2_hi_lo[i], SImode))
27023 {
27024 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
27025 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
27026 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
27027 rtx tmp = gen_reg_rtx (SImode);
27028
27029 /* Make sure the constant is sign extended. */
27030 if ((hi_16bits & sign_bit) != 0)
27031 hi_16bits |= upper_32bits;
27032
27033 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
27034 code, SImode, false, false, false);
27035
27036 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
27037 code, SImode, false, false, false);
27038 }
27039 else
27040 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
27041 code, SImode, complement_final_p,
27042 complement_op1_p, complement_op2_p);
27043 }
27044
27045 return;
27046 }
27047
27048 /* Split the insns that make up boolean operations operating on multiple GPR
27049 registers. The boolean MD patterns ensure that the inputs either are
27050 exactly the same as the output registers, or there is no overlap.
27051
27052 OPERANDS is an array containing the destination and two input operands.
27053 CODE is the base operation (AND, IOR, XOR, NOT).
27054 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
27055 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
27056 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
27057
27058 void
27059 rs6000_split_logical (rtx operands[3],
27060 enum rtx_code code,
27061 bool complement_final_p,
27062 bool complement_op1_p,
27063 bool complement_op2_p)
27064 {
27065 machine_mode mode = GET_MODE (operands[0]);
27066 machine_mode sub_mode;
27067 rtx op0, op1, op2;
27068 int sub_size, regno0, regno1, nregs, i;
27069
27070 /* If this is DImode, use the specialized version that can run before
27071 register allocation. */
27072 if (mode == DImode && !TARGET_POWERPC64)
27073 {
27074 rs6000_split_logical_di (operands, code, complement_final_p,
27075 complement_op1_p, complement_op2_p);
27076 return;
27077 }
27078
27079 op0 = operands[0];
27080 op1 = operands[1];
27081 op2 = (code == NOT) ? NULL_RTX : operands[2];
27082 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
27083 sub_size = GET_MODE_SIZE (sub_mode);
27084 regno0 = REGNO (op0);
27085 regno1 = REGNO (op1);
27086
27087 gcc_assert (reload_completed);
27088 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
27089 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
27090
27091 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
27092 gcc_assert (nregs > 1);
27093
27094 if (op2 && REG_P (op2))
27095 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
27096
27097 for (i = 0; i < nregs; i++)
27098 {
27099 int offset = i * sub_size;
27100 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
27101 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
27102 rtx sub_op2 = ((code == NOT)
27103 ? NULL_RTX
27104 : simplify_subreg (sub_mode, op2, mode, offset));
27105
27106 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
27107 complement_final_p, complement_op1_p,
27108 complement_op2_p);
27109 }
27110
27111 return;
27112 }
27113
27114 /* Emit instructions to move SRC to DST. Called by splitters for
27115 multi-register moves. It will emit at most one instruction for
27116 each register that is accessed; that is, it won't emit li/lis pairs
27117 (or equivalent for 64-bit code). One of SRC or DST must be a hard
27118 register. */
27119
27120 void
27121 rs6000_split_multireg_move (rtx dst, rtx src)
27122 {
27123 /* The register number of the first register being moved. */
27124 int reg;
27125 /* The mode that is to be moved. */
27126 machine_mode mode;
27127 /* The mode that the move is being done in, and its size. */
27128 machine_mode reg_mode;
27129 int reg_mode_size;
27130 /* The number of registers that will be moved. */
27131 int nregs;
27132
27133 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
27134 mode = GET_MODE (dst);
27135 nregs = hard_regno_nregs (reg, mode);
27136
27137 /* If we have a vector quad register for MMA, and this is a load or store,
27138 see if we can use vector paired load/stores. */
27139 if (mode == XOmode && TARGET_MMA
27140 && (MEM_P (dst) || MEM_P (src)))
27141 {
27142 reg_mode = OOmode;
27143 nregs /= 2;
27144 }
27145 /* If we have a vector pair/quad mode, split it into two/four separate
27146 vectors. */
27147 else if (mode == OOmode || mode == XOmode)
27148 reg_mode = V1TImode;
27149 else if (FP_REGNO_P (reg))
27150 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
27151 (TARGET_HARD_FLOAT ? DFmode : SFmode);
27152 else if (ALTIVEC_REGNO_P (reg))
27153 reg_mode = V16QImode;
27154 else
27155 reg_mode = word_mode;
27156 reg_mode_size = GET_MODE_SIZE (reg_mode);
27157
27158 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
27159
27160 /* TDmode residing in FP registers is special, since the ISA requires that
27161 the lower-numbered word of a register pair is always the most significant
27162 word, even in little-endian mode. This does not match the usual subreg
27163 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
27164 the appropriate constituent registers "by hand" in little-endian mode.
27165
27166 Note we do not need to check for destructive overlap here since TDmode
27167 can only reside in even/odd register pairs. */
27168 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
27169 {
27170 rtx p_src, p_dst;
27171 int i;
27172
27173 for (i = 0; i < nregs; i++)
27174 {
27175 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
27176 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
27177 else
27178 p_src = simplify_gen_subreg (reg_mode, src, mode,
27179 i * reg_mode_size);
27180
27181 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
27182 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
27183 else
27184 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
27185 i * reg_mode_size);
27186
27187 emit_insn (gen_rtx_SET (p_dst, p_src));
27188 }
27189
27190 return;
27191 }
27192
27193 /* The __vector_pair and __vector_quad modes are multi-register
27194 modes, so if we have to load or store the registers, we have to be
27195 careful to properly swap them if we're in little endian mode
27196 below. This means the last register gets the first memory
27197 location. We also need to be careful of using the right register
27198 numbers if we are splitting XO to OO. */
27199 if (mode == OOmode || mode == XOmode)
27200 {
27201 nregs = hard_regno_nregs (reg, mode);
27202 int reg_mode_nregs = hard_regno_nregs (reg, reg_mode);
27203 if (MEM_P (dst))
27204 {
27205 unsigned offset = 0;
27206 unsigned size = GET_MODE_SIZE (reg_mode);
27207
27208 /* If we are reading an accumulator register, we have to
27209 deprime it before we can access it. */
27210 if (TARGET_MMA
27211 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27212 emit_insn (gen_mma_xxmfacc (src, src));
27213
27214 for (int i = 0; i < nregs; i += reg_mode_nregs)
27215 {
27216 unsigned subreg
27217 = WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i);
27218 rtx dst2 = adjust_address (dst, reg_mode, offset);
27219 rtx src2 = gen_rtx_REG (reg_mode, reg + subreg);
27220 offset += size;
27221 emit_insn (gen_rtx_SET (dst2, src2));
27222 }
27223
27224 return;
27225 }
27226
27227 if (MEM_P (src))
27228 {
27229 unsigned offset = 0;
27230 unsigned size = GET_MODE_SIZE (reg_mode);
27231
27232 for (int i = 0; i < nregs; i += reg_mode_nregs)
27233 {
27234 unsigned subreg
27235 = WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i);
27236 rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg);
27237 rtx src2 = adjust_address (src, reg_mode, offset);
27238 offset += size;
27239 emit_insn (gen_rtx_SET (dst2, src2));
27240 }
27241
27242 /* If we are writing an accumulator register, we have to
27243 prime it after we've written it. */
27244 if (TARGET_MMA
27245 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27246 emit_insn (gen_mma_xxmtacc (dst, dst));
27247
27248 return;
27249 }
27250
27251 if (GET_CODE (src) == UNSPEC
27252 || GET_CODE (src) == UNSPEC_VOLATILE)
27253 {
27254 gcc_assert (XINT (src, 1) == UNSPEC_VSX_ASSEMBLE
27255 || XINT (src, 1) == UNSPECV_MMA_ASSEMBLE);
27256 gcc_assert (REG_P (dst));
27257 if (GET_MODE (src) == XOmode)
27258 gcc_assert (FP_REGNO_P (REGNO (dst)));
27259 if (GET_MODE (src) == OOmode)
27260 gcc_assert (VSX_REGNO_P (REGNO (dst)));
27261
27262 int nvecs = XVECLEN (src, 0);
27263 for (int i = 0; i < nvecs; i++)
27264 {
27265 rtx op;
27266 int regno = reg + i;
27267
27268 if (WORDS_BIG_ENDIAN)
27269 {
27270 op = XVECEXP (src, 0, i);
27271
27272 /* If we are loading an even VSX register and the memory location
27273 is adjacent to the next register's memory location (if any),
27274 then we can load them both with one LXVP instruction. */
27275 if ((regno & 1) == 0)
27276 {
27277 rtx op2 = XVECEXP (src, 0, i + 1);
27278 if (adjacent_mem_locations (op, op2) == op)
27279 {
27280 op = adjust_address (op, OOmode, 0);
27281 /* Skip the next register, since we're going to
27282 load it together with this register. */
27283 i++;
27284 }
27285 }
27286 }
27287 else
27288 {
27289 op = XVECEXP (src, 0, nvecs - i - 1);
27290
27291 /* If we are loading an even VSX register and the memory location
27292 is adjacent to the next register's memory location (if any),
27293 then we can load them both with one LXVP instruction. */
27294 if ((regno & 1) == 0)
27295 {
27296 rtx op2 = XVECEXP (src, 0, nvecs - i - 2);
27297 if (adjacent_mem_locations (op2, op) == op2)
27298 {
27299 op = adjust_address (op2, OOmode, 0);
27300 /* Skip the next register, since we're going to
27301 load it together with this register. */
27302 i++;
27303 }
27304 }
27305 }
27306
27307 rtx dst_i = gen_rtx_REG (GET_MODE (op), regno);
27308 emit_insn (gen_rtx_SET (dst_i, op));
27309 }
27310
27311 /* We are writing an accumulator register, so we have to
27312 prime it after we've written it. */
27313 if (GET_MODE (src) == XOmode)
27314 emit_insn (gen_mma_xxmtacc (dst, dst));
27315
27316 return;
27317 }
27318
27319 /* Register -> register moves can use common code. */
27320 }
27321
27322 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
27323 {
27324 /* If we are reading an accumulator register, we have to
27325 deprime it before we can access it. */
27326 if (TARGET_MMA
27327 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27328 emit_insn (gen_mma_xxmfacc (src, src));
27329
27330 /* Move register range backwards, if we might have destructive
27331 overlap. */
27332 int i;
27333 /* XO/OO are opaque so cannot use subregs. */
27334 if (mode == OOmode || mode == XOmode )
27335 {
27336 for (i = nregs - 1; i >= 0; i--)
27337 {
27338 rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i);
27339 rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i);
27340 emit_insn (gen_rtx_SET (dst_i, src_i));
27341 }
27342 }
27343 else
27344 {
27345 for (i = nregs - 1; i >= 0; i--)
27346 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
27347 i * reg_mode_size),
27348 simplify_gen_subreg (reg_mode, src, mode,
27349 i * reg_mode_size)));
27350 }
27351
27352 /* If we are writing an accumulator register, we have to
27353 prime it after we've written it. */
27354 if (TARGET_MMA
27355 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27356 emit_insn (gen_mma_xxmtacc (dst, dst));
27357 }
27358 else
27359 {
27360 int i;
27361 int j = -1;
27362 bool used_update = false;
27363 rtx restore_basereg = NULL_RTX;
27364
27365 if (MEM_P (src) && INT_REGNO_P (reg))
27366 {
27367 rtx breg;
27368
27369 if (GET_CODE (XEXP (src, 0)) == PRE_INC
27370 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
27371 {
27372 rtx delta_rtx;
27373 breg = XEXP (XEXP (src, 0), 0);
27374 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
27375 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
27376 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
27377 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
27378 src = replace_equiv_address (src, breg);
27379 }
27380 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
27381 {
27382 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
27383 {
27384 rtx basereg = XEXP (XEXP (src, 0), 0);
27385 if (TARGET_UPDATE)
27386 {
27387 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
27388 emit_insn (gen_rtx_SET (ndst,
27389 gen_rtx_MEM (reg_mode,
27390 XEXP (src, 0))));
27391 used_update = true;
27392 }
27393 else
27394 emit_insn (gen_rtx_SET (basereg,
27395 XEXP (XEXP (src, 0), 1)));
27396 src = replace_equiv_address (src, basereg);
27397 }
27398 else
27399 {
27400 rtx basereg = gen_rtx_REG (Pmode, reg);
27401 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
27402 src = replace_equiv_address (src, basereg);
27403 }
27404 }
27405
27406 breg = XEXP (src, 0);
27407 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
27408 breg = XEXP (breg, 0);
27409
27410 /* If the base register we are using to address memory is
27411 also a destination reg, then change that register last. */
27412 if (REG_P (breg)
27413 && REGNO (breg) >= REGNO (dst)
27414 && REGNO (breg) < REGNO (dst) + nregs)
27415 j = REGNO (breg) - REGNO (dst);
27416 }
27417 else if (MEM_P (dst) && INT_REGNO_P (reg))
27418 {
27419 rtx breg;
27420
27421 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
27422 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
27423 {
27424 rtx delta_rtx;
27425 breg = XEXP (XEXP (dst, 0), 0);
27426 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
27427 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
27428 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
27429
27430 /* We have to update the breg before doing the store.
27431 Use store with update, if available. */
27432
27433 if (TARGET_UPDATE)
27434 {
27435 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
27436 emit_insn (TARGET_32BIT
27437 ? (TARGET_POWERPC64
27438 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
27439 : gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
27440 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
27441 used_update = true;
27442 }
27443 else
27444 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
27445 dst = replace_equiv_address (dst, breg);
27446 }
27447 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
27448 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
27449 {
27450 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
27451 {
27452 rtx basereg = XEXP (XEXP (dst, 0), 0);
27453 if (TARGET_UPDATE)
27454 {
27455 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
27456 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
27457 XEXP (dst, 0)),
27458 nsrc));
27459 used_update = true;
27460 }
27461 else
27462 emit_insn (gen_rtx_SET (basereg,
27463 XEXP (XEXP (dst, 0), 1)));
27464 dst = replace_equiv_address (dst, basereg);
27465 }
27466 else
27467 {
27468 rtx basereg = XEXP (XEXP (dst, 0), 0);
27469 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
27470 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
27471 && REG_P (basereg)
27472 && REG_P (offsetreg)
27473 && REGNO (basereg) != REGNO (offsetreg));
27474 if (REGNO (basereg) == 0)
27475 {
27476 rtx tmp = offsetreg;
27477 offsetreg = basereg;
27478 basereg = tmp;
27479 }
27480 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
27481 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
27482 dst = replace_equiv_address (dst, basereg);
27483 }
27484 }
27485 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
27486 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
27487 }
27488
27489 /* If we are reading an accumulator register, we have to
27490 deprime it before we can access it. */
27491 if (TARGET_MMA && REG_P (src)
27492 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27493 emit_insn (gen_mma_xxmfacc (src, src));
27494
27495 for (i = 0; i < nregs; i++)
27496 {
27497 /* Calculate index to next subword. */
27498 ++j;
27499 if (j == nregs)
27500 j = 0;
27501
27502 /* If compiler already emitted move of first word by
27503 store with update, no need to do anything. */
27504 if (j == 0 && used_update)
27505 continue;
27506
27507 /* XO/OO are opaque so cannot use subregs. */
27508 if (mode == OOmode || mode == XOmode )
27509 {
27510 rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
27511 rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
27512 emit_insn (gen_rtx_SET (dst_i, src_i));
27513 }
27514 else
27515 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
27516 j * reg_mode_size),
27517 simplify_gen_subreg (reg_mode, src, mode,
27518 j * reg_mode_size)));
27519 }
27520
27521 /* If we are writing an accumulator register, we have to
27522 prime it after we've written it. */
27523 if (TARGET_MMA && REG_P (dst)
27524 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27525 emit_insn (gen_mma_xxmtacc (dst, dst));
27526
27527 if (restore_basereg != NULL_RTX)
27528 emit_insn (restore_basereg);
27529 }
27530 }
27531 \f
27532 /* Return true if the peephole2 can combine a load involving a combination of
27533 an addis instruction and a load with an offset that can be fused together on
27534 a power8. */
27535
27536 bool
27537 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
27538 rtx addis_value, /* addis value. */
27539 rtx target, /* target register that is loaded. */
27540 rtx mem) /* bottom part of the memory addr. */
27541 {
27542 rtx addr;
27543 rtx base_reg;
27544
27545 /* Validate arguments. */
27546 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
27547 return false;
27548
27549 if (!base_reg_operand (target, GET_MODE (target)))
27550 return false;
27551
27552 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
27553 return false;
27554
27555 /* Allow sign/zero extension. */
27556 if (GET_CODE (mem) == ZERO_EXTEND
27557 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
27558 mem = XEXP (mem, 0);
27559
27560 if (!MEM_P (mem))
27561 return false;
27562
27563 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
27564 return false;
27565
27566 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
27567 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
27568 return false;
27569
27570 /* Validate that the register used to load the high value is either the
27571 register being loaded, or we can safely replace its use.
27572
27573 This function is only called from the peephole2 pass and we assume that
27574 there are 2 instructions in the peephole (addis and load), so we want to
27575 check if the target register was not used in the memory address and the
27576 register to hold the addis result is dead after the peephole. */
27577 if (REGNO (addis_reg) != REGNO (target))
27578 {
27579 if (reg_mentioned_p (target, mem))
27580 return false;
27581
27582 if (!peep2_reg_dead_p (2, addis_reg))
27583 return false;
27584
27585 /* If the target register being loaded is the stack pointer, we must
27586 avoid loading any other value into it, even temporarily. */
27587 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
27588 return false;
27589 }
27590
27591 base_reg = XEXP (addr, 0);
27592 return REGNO (addis_reg) == REGNO (base_reg);
27593 }
27594
27595 /* During the peephole2 pass, adjust and expand the insns for a load fusion
27596 sequence. We adjust the addis register to use the target register. If the
27597 load sign extends, we adjust the code to do the zero extending load, and an
27598 explicit sign extension later since the fusion only covers zero extending
27599 loads.
27600
27601 The operands are:
27602 operands[0] register set with addis (to be replaced with target)
27603 operands[1] value set via addis
27604 operands[2] target register being loaded
27605 operands[3] D-form memory reference using operands[0]. */
27606
27607 void
27608 expand_fusion_gpr_load (rtx *operands)
27609 {
27610 rtx addis_value = operands[1];
27611 rtx target = operands[2];
27612 rtx orig_mem = operands[3];
27613 rtx new_addr, new_mem, orig_addr, offset;
27614 enum rtx_code plus_or_lo_sum;
27615 machine_mode target_mode = GET_MODE (target);
27616 machine_mode extend_mode = target_mode;
27617 machine_mode ptr_mode = Pmode;
27618 enum rtx_code extend = UNKNOWN;
27619
27620 if (GET_CODE (orig_mem) == ZERO_EXTEND
27621 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
27622 {
27623 extend = GET_CODE (orig_mem);
27624 orig_mem = XEXP (orig_mem, 0);
27625 target_mode = GET_MODE (orig_mem);
27626 }
27627
27628 gcc_assert (MEM_P (orig_mem));
27629
27630 orig_addr = XEXP (orig_mem, 0);
27631 plus_or_lo_sum = GET_CODE (orig_addr);
27632 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
27633
27634 offset = XEXP (orig_addr, 1);
27635 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
27636 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
27637
27638 if (extend != UNKNOWN)
27639 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
27640
27641 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
27642 UNSPEC_FUSION_GPR);
27643 emit_insn (gen_rtx_SET (target, new_mem));
27644
27645 if (extend == SIGN_EXTEND)
27646 {
27647 int sub_off = ((BYTES_BIG_ENDIAN)
27648 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
27649 : 0);
27650 rtx sign_reg
27651 = simplify_subreg (target_mode, target, extend_mode, sub_off);
27652
27653 emit_insn (gen_rtx_SET (target,
27654 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
27655 }
27656
27657 return;
27658 }
27659
27660 /* Emit the addis instruction that will be part of a fused instruction
27661 sequence. */
27662
27663 void
27664 emit_fusion_addis (rtx target, rtx addis_value)
27665 {
27666 rtx fuse_ops[10];
27667 const char *addis_str = NULL;
27668
27669 /* Emit the addis instruction. */
27670 fuse_ops[0] = target;
27671 if (satisfies_constraint_L (addis_value))
27672 {
27673 fuse_ops[1] = addis_value;
27674 addis_str = "lis %0,%v1";
27675 }
27676
27677 else if (GET_CODE (addis_value) == PLUS)
27678 {
27679 rtx op0 = XEXP (addis_value, 0);
27680 rtx op1 = XEXP (addis_value, 1);
27681
27682 if (REG_P (op0) && CONST_INT_P (op1)
27683 && satisfies_constraint_L (op1))
27684 {
27685 fuse_ops[1] = op0;
27686 fuse_ops[2] = op1;
27687 addis_str = "addis %0,%1,%v2";
27688 }
27689 }
27690
27691 else if (GET_CODE (addis_value) == HIGH)
27692 {
27693 rtx value = XEXP (addis_value, 0);
27694 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
27695 {
27696 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
27697 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
27698 if (TARGET_ELF)
27699 addis_str = "addis %0,%2,%1@toc@ha";
27700
27701 else if (TARGET_XCOFF)
27702 addis_str = "addis %0,%1@u(%2)";
27703
27704 else
27705 gcc_unreachable ();
27706 }
27707
27708 else if (GET_CODE (value) == PLUS)
27709 {
27710 rtx op0 = XEXP (value, 0);
27711 rtx op1 = XEXP (value, 1);
27712
27713 if (GET_CODE (op0) == UNSPEC
27714 && XINT (op0, 1) == UNSPEC_TOCREL
27715 && CONST_INT_P (op1))
27716 {
27717 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
27718 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
27719 fuse_ops[3] = op1;
27720 if (TARGET_ELF)
27721 addis_str = "addis %0,%2,%1+%3@toc@ha";
27722
27723 else if (TARGET_XCOFF)
27724 addis_str = "addis %0,%1+%3@u(%2)";
27725
27726 else
27727 gcc_unreachable ();
27728 }
27729 }
27730
27731 else if (satisfies_constraint_L (value))
27732 {
27733 fuse_ops[1] = value;
27734 addis_str = "lis %0,%v1";
27735 }
27736
27737 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
27738 {
27739 fuse_ops[1] = value;
27740 addis_str = "lis %0,%1@ha";
27741 }
27742 }
27743
27744 if (!addis_str)
27745 fatal_insn ("Could not generate addis value for fusion", addis_value);
27746
27747 output_asm_insn (addis_str, fuse_ops);
27748 }
27749
27750 /* Emit a D-form load or store instruction that is the second instruction
27751 of a fusion sequence. */
27752
27753 static void
27754 emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str)
27755 {
27756 rtx fuse_ops[10];
27757 char insn_template[80];
27758
27759 fuse_ops[0] = load_reg;
27760 fuse_ops[1] = addis_reg;
27761
27762 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
27763 {
27764 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
27765 fuse_ops[2] = offset;
27766 output_asm_insn (insn_template, fuse_ops);
27767 }
27768
27769 else if (GET_CODE (offset) == UNSPEC
27770 && XINT (offset, 1) == UNSPEC_TOCREL)
27771 {
27772 if (TARGET_ELF)
27773 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
27774
27775 else if (TARGET_XCOFF)
27776 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
27777
27778 else
27779 gcc_unreachable ();
27780
27781 fuse_ops[2] = XVECEXP (offset, 0, 0);
27782 output_asm_insn (insn_template, fuse_ops);
27783 }
27784
27785 else if (GET_CODE (offset) == PLUS
27786 && GET_CODE (XEXP (offset, 0)) == UNSPEC
27787 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
27788 && CONST_INT_P (XEXP (offset, 1)))
27789 {
27790 rtx tocrel_unspec = XEXP (offset, 0);
27791 if (TARGET_ELF)
27792 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
27793
27794 else if (TARGET_XCOFF)
27795 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
27796
27797 else
27798 gcc_unreachable ();
27799
27800 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
27801 fuse_ops[3] = XEXP (offset, 1);
27802 output_asm_insn (insn_template, fuse_ops);
27803 }
27804
27805 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
27806 {
27807 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
27808
27809 fuse_ops[2] = offset;
27810 output_asm_insn (insn_template, fuse_ops);
27811 }
27812
27813 else
27814 fatal_insn ("Unable to generate load/store offset for fusion", offset);
27815
27816 return;
27817 }
27818
27819 /* Given an address, convert it into the addis and load offset parts. Addresses
27820 created during the peephole2 process look like:
27821 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
27822 (unspec [(...)] UNSPEC_TOCREL)) */
27823
27824 static void
27825 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
27826 {
27827 rtx hi, lo;
27828
27829 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
27830 {
27831 hi = XEXP (addr, 0);
27832 lo = XEXP (addr, 1);
27833 }
27834 else
27835 gcc_unreachable ();
27836
27837 *p_hi = hi;
27838 *p_lo = lo;
27839 }
27840
27841 /* Return a string to fuse an addis instruction with a gpr load to the same
27842 register that we loaded up the addis instruction. The address that is used
27843 is the logical address that was formed during peephole2:
27844 (lo_sum (high) (low-part))
27845
27846 The code is complicated, so we call output_asm_insn directly, and just
27847 return "". */
27848
27849 const char *
27850 emit_fusion_gpr_load (rtx target, rtx mem)
27851 {
27852 rtx addis_value;
27853 rtx addr;
27854 rtx load_offset;
27855 const char *load_str = NULL;
27856 machine_mode mode;
27857
27858 if (GET_CODE (mem) == ZERO_EXTEND)
27859 mem = XEXP (mem, 0);
27860
27861 gcc_assert (REG_P (target) && MEM_P (mem));
27862
27863 addr = XEXP (mem, 0);
27864 fusion_split_address (addr, &addis_value, &load_offset);
27865
27866 /* Now emit the load instruction to the same register. */
27867 mode = GET_MODE (mem);
27868 switch (mode)
27869 {
27870 case E_QImode:
27871 load_str = "lbz";
27872 break;
27873
27874 case E_HImode:
27875 load_str = "lhz";
27876 break;
27877
27878 case E_SImode:
27879 case E_SFmode:
27880 load_str = "lwz";
27881 break;
27882
27883 case E_DImode:
27884 case E_DFmode:
27885 gcc_assert (TARGET_POWERPC64);
27886 load_str = "ld";
27887 break;
27888
27889 default:
27890 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
27891 }
27892
27893 /* Emit the addis instruction. */
27894 emit_fusion_addis (target, addis_value);
27895
27896 /* Emit the D-form load instruction. */
27897 emit_fusion_load (target, target, load_offset, load_str);
27898
27899 return "";
27900 }
27901 \f
27902 /* This is not inside an #ifdef RS6000_GLIBC_ATOMIC_FENV because gengtype
27903 ignores it then. */
27904 static GTY(()) tree atomic_hold_decl;
27905 static GTY(()) tree atomic_clear_decl;
27906 static GTY(()) tree atomic_update_decl;
27907
27908 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
27909 static void
27910 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
27911 {
27912 if (!TARGET_HARD_FLOAT)
27913 {
27914 #ifdef RS6000_GLIBC_ATOMIC_FENV
27915 if (atomic_hold_decl == NULL_TREE)
27916 {
27917 atomic_hold_decl
27918 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
27919 get_identifier ("__atomic_feholdexcept"),
27920 build_function_type_list (void_type_node,
27921 double_ptr_type_node,
27922 NULL_TREE));
27923 TREE_PUBLIC (atomic_hold_decl) = 1;
27924 DECL_EXTERNAL (atomic_hold_decl) = 1;
27925 }
27926
27927 if (atomic_clear_decl == NULL_TREE)
27928 {
27929 atomic_clear_decl
27930 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
27931 get_identifier ("__atomic_feclearexcept"),
27932 build_function_type_list (void_type_node,
27933 NULL_TREE));
27934 TREE_PUBLIC (atomic_clear_decl) = 1;
27935 DECL_EXTERNAL (atomic_clear_decl) = 1;
27936 }
27937
27938 tree const_double = build_qualified_type (double_type_node,
27939 TYPE_QUAL_CONST);
27940 tree const_double_ptr = build_pointer_type (const_double);
27941 if (atomic_update_decl == NULL_TREE)
27942 {
27943 atomic_update_decl
27944 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
27945 get_identifier ("__atomic_feupdateenv"),
27946 build_function_type_list (void_type_node,
27947 const_double_ptr,
27948 NULL_TREE));
27949 TREE_PUBLIC (atomic_update_decl) = 1;
27950 DECL_EXTERNAL (atomic_update_decl) = 1;
27951 }
27952
27953 tree fenv_var = create_tmp_var_raw (double_type_node);
27954 TREE_ADDRESSABLE (fenv_var) = 1;
27955 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node,
27956 build4 (TARGET_EXPR, double_type_node, fenv_var,
27957 void_node, NULL_TREE, NULL_TREE));
27958
27959 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
27960 *clear = build_call_expr (atomic_clear_decl, 0);
27961 *update = build_call_expr (atomic_update_decl, 1,
27962 fold_convert (const_double_ptr, fenv_addr));
27963 #endif
27964 return;
27965 }
27966
27967 tree mffs = rs6000_builtin_decls[RS6000_BIF_MFFS];
27968 tree mtfsf = rs6000_builtin_decls[RS6000_BIF_MTFSF];
27969 tree call_mffs = build_call_expr (mffs, 0);
27970
27971 /* Generates the equivalent of feholdexcept (&fenv_var)
27972
27973 *fenv_var = __builtin_mffs ();
27974 double fenv_hold;
27975 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
27976 __builtin_mtfsf (0xff, fenv_hold); */
27977
27978 /* Mask to clear everything except for the rounding modes and non-IEEE
27979 arithmetic flag. */
27980 const unsigned HOST_WIDE_INT hold_exception_mask
27981 = HOST_WIDE_INT_C (0xffffffff00000007);
27982
27983 tree fenv_var = create_tmp_var_raw (double_type_node);
27984
27985 tree hold_mffs = build4 (TARGET_EXPR, double_type_node, fenv_var, call_mffs,
27986 NULL_TREE, NULL_TREE);
27987
27988 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
27989 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
27990 build_int_cst (uint64_type_node,
27991 hold_exception_mask));
27992
27993 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
27994 fenv_llu_and);
27995
27996 tree hold_mtfsf = build_call_expr (mtfsf, 2,
27997 build_int_cst (unsigned_type_node, 0xff),
27998 fenv_hold_mtfsf);
27999
28000 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
28001
28002 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
28003
28004 double fenv_clear = __builtin_mffs ();
28005 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
28006 __builtin_mtfsf (0xff, fenv_clear); */
28007
28008 /* Mask to clear everything except for the rounding modes and non-IEEE
28009 arithmetic flag. */
28010 const unsigned HOST_WIDE_INT clear_exception_mask
28011 = HOST_WIDE_INT_C (0xffffffff00000000);
28012
28013 tree fenv_clear = create_tmp_var_raw (double_type_node);
28014
28015 tree clear_mffs = build4 (TARGET_EXPR, double_type_node, fenv_clear,
28016 call_mffs, NULL_TREE, NULL_TREE);
28017
28018 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
28019 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
28020 fenv_clean_llu,
28021 build_int_cst (uint64_type_node,
28022 clear_exception_mask));
28023
28024 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
28025 fenv_clear_llu_and);
28026
28027 tree clear_mtfsf = build_call_expr (mtfsf, 2,
28028 build_int_cst (unsigned_type_node, 0xff),
28029 fenv_clear_mtfsf);
28030
28031 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
28032
28033 /* Generates the equivalent of feupdateenv (&fenv_var)
28034
28035 double old_fenv = __builtin_mffs ();
28036 double fenv_update;
28037 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
28038 (*(uint64_t*)fenv_var 0x1ff80fff);
28039 __builtin_mtfsf (0xff, fenv_update); */
28040
28041 const unsigned HOST_WIDE_INT update_exception_mask
28042 = HOST_WIDE_INT_C (0xffffffff1fffff00);
28043 const unsigned HOST_WIDE_INT new_exception_mask
28044 = HOST_WIDE_INT_C (0x1ff80fff);
28045
28046 tree old_fenv = create_tmp_var_raw (double_type_node);
28047 tree update_mffs = build4 (TARGET_EXPR, double_type_node, old_fenv,
28048 call_mffs, NULL_TREE, NULL_TREE);
28049
28050 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
28051 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
28052 build_int_cst (uint64_type_node,
28053 update_exception_mask));
28054
28055 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
28056 build_int_cst (uint64_type_node,
28057 new_exception_mask));
28058
28059 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
28060 old_llu_and, new_llu_and);
28061
28062 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
28063 new_llu_mask);
28064
28065 tree update_mtfsf = build_call_expr (mtfsf, 2,
28066 build_int_cst (unsigned_type_node, 0xff),
28067 fenv_update_mtfsf);
28068
28069 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
28070 }
28071
28072 void
28073 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
28074 {
28075 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
28076
28077 rtx_tmp0 = gen_reg_rtx (V2DFmode);
28078 rtx_tmp1 = gen_reg_rtx (V2DFmode);
28079
28080 /* The destination of the vmrgew instruction layout is:
28081 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
28082 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
28083 vmrgew instruction will be correct. */
28084 if (BYTES_BIG_ENDIAN)
28085 {
28086 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
28087 GEN_INT (0)));
28088 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
28089 GEN_INT (3)));
28090 }
28091 else
28092 {
28093 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
28094 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
28095 }
28096
28097 rtx_tmp2 = gen_reg_rtx (V4SFmode);
28098 rtx_tmp3 = gen_reg_rtx (V4SFmode);
28099
28100 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2, rtx_tmp0));
28101 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3, rtx_tmp1));
28102
28103 if (BYTES_BIG_ENDIAN)
28104 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
28105 else
28106 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
28107 }
28108
28109 void
28110 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
28111 {
28112 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
28113
28114 rtx_tmp0 = gen_reg_rtx (V2DImode);
28115 rtx_tmp1 = gen_reg_rtx (V2DImode);
28116
28117 /* The destination of the vmrgew instruction layout is:
28118 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
28119 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
28120 vmrgew instruction will be correct. */
28121 if (BYTES_BIG_ENDIAN)
28122 {
28123 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
28124 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
28125 }
28126 else
28127 {
28128 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
28129 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
28130 }
28131
28132 rtx_tmp2 = gen_reg_rtx (V4SFmode);
28133 rtx_tmp3 = gen_reg_rtx (V4SFmode);
28134
28135 if (signed_convert)
28136 {
28137 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
28138 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
28139 }
28140 else
28141 {
28142 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
28143 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
28144 }
28145
28146 if (BYTES_BIG_ENDIAN)
28147 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
28148 else
28149 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
28150 }
28151
28152 void
28153 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
28154 rtx src2)
28155 {
28156 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
28157
28158 rtx_tmp0 = gen_reg_rtx (V2DFmode);
28159 rtx_tmp1 = gen_reg_rtx (V2DFmode);
28160
28161 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
28162 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
28163
28164 rtx_tmp2 = gen_reg_rtx (V4SImode);
28165 rtx_tmp3 = gen_reg_rtx (V4SImode);
28166
28167 if (signed_convert)
28168 {
28169 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
28170 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
28171 }
28172 else
28173 {
28174 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
28175 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
28176 }
28177
28178 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
28179 }
28180
28181 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
28182
28183 static bool
28184 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
28185 optimization_type opt_type)
28186 {
28187 switch (op)
28188 {
28189 case rsqrt_optab:
28190 return (opt_type == OPTIMIZE_FOR_SPEED
28191 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
28192
28193 default:
28194 return true;
28195 }
28196 }
28197
28198 /* Implement TARGET_CONSTANT_ALIGNMENT. */
28199
28200 static HOST_WIDE_INT
28201 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
28202 {
28203 if (TREE_CODE (exp) == STRING_CST
28204 && (STRICT_ALIGNMENT || !optimize_size))
28205 return MAX (align, BITS_PER_WORD);
28206 return align;
28207 }
28208
28209 /* Implement TARGET_STARTING_FRAME_OFFSET. */
28210
28211 static HOST_WIDE_INT
28212 rs6000_starting_frame_offset (void)
28213 {
28214 if (FRAME_GROWS_DOWNWARD)
28215 return 0;
28216 return RS6000_STARTING_FRAME_OFFSET;
28217 }
28218 \f
28219 /* Internal function to return the built-in function id for the complex
28220 multiply operation for a given mode. */
28221
28222 static inline built_in_function
28223 complex_multiply_builtin_code (machine_mode mode)
28224 {
28225 gcc_assert (IN_RANGE (mode, MIN_MODE_COMPLEX_FLOAT, MAX_MODE_COMPLEX_FLOAT));
28226 int func = BUILT_IN_COMPLEX_MUL_MIN + mode - MIN_MODE_COMPLEX_FLOAT;
28227 return (built_in_function) func;
28228 }
28229
28230 /* Internal function to return the built-in function id for the complex divide
28231 operation for a given mode. */
28232
28233 static inline built_in_function
28234 complex_divide_builtin_code (machine_mode mode)
28235 {
28236 gcc_assert (IN_RANGE (mode, MIN_MODE_COMPLEX_FLOAT, MAX_MODE_COMPLEX_FLOAT));
28237 int func = BUILT_IN_COMPLEX_DIV_MIN + mode - MIN_MODE_COMPLEX_FLOAT;
28238 return (built_in_function) func;
28239 }
28240
28241 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
28242 function names from <foo>l to <foo>f128 if the default long double type is
28243 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
28244 include file switches the names on systems that support long double as IEEE
28245 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
28246 In the future, glibc will export names like __ieee128_sinf128 and we can
28247 switch to using those instead of using sinf128, which pollutes the user's
28248 namespace.
28249
28250 This will switch the names for Fortran math functions as well (which doesn't
28251 use math.h). However, Fortran needs other changes to the compiler and
28252 library before you can switch the real*16 type at compile time.
28253
28254 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
28255 only do this transformation if the __float128 type is enabled. This
28256 prevents us from doing the transformation on older 32-bit ports that might
28257 have enabled using IEEE 128-bit floating point as the default long double
28258 type.
28259
28260 We also use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change the
28261 function names used for complex multiply and divide to the appropriate
28262 names. */
28263
28264 static tree
28265 rs6000_mangle_decl_assembler_name (tree decl, tree id)
28266 {
28267 /* Handle complex multiply/divide. For IEEE 128-bit, use __mulkc3 or
28268 __divkc3 and for IBM 128-bit use __multc3 and __divtc3. */
28269 if (TARGET_FLOAT128_TYPE
28270 && TREE_CODE (decl) == FUNCTION_DECL
28271 && DECL_IS_UNDECLARED_BUILTIN (decl)
28272 && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
28273 {
28274 built_in_function id = DECL_FUNCTION_CODE (decl);
28275 const char *newname = NULL;
28276
28277 if (id == complex_multiply_builtin_code (KCmode))
28278 newname = "__mulkc3";
28279
28280 else if (id == complex_multiply_builtin_code (ICmode))
28281 newname = "__multc3";
28282
28283 else if (id == complex_multiply_builtin_code (TCmode))
28284 newname = (TARGET_IEEEQUAD) ? "__mulkc3" : "__multc3";
28285
28286 else if (id == complex_divide_builtin_code (KCmode))
28287 newname = "__divkc3";
28288
28289 else if (id == complex_divide_builtin_code (ICmode))
28290 newname = "__divtc3";
28291
28292 else if (id == complex_divide_builtin_code (TCmode))
28293 newname = (TARGET_IEEEQUAD) ? "__divkc3" : "__divtc3";
28294
28295 if (newname)
28296 {
28297 if (TARGET_DEBUG_BUILTIN)
28298 fprintf (stderr, "Map complex mul/div => %s\n", newname);
28299
28300 return get_identifier (newname);
28301 }
28302 }
28303
28304 /* Map long double built-in functions if long double is IEEE 128-bit. */
28305 if (TARGET_FLOAT128_TYPE && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
28306 && TREE_CODE (decl) == FUNCTION_DECL
28307 && DECL_IS_UNDECLARED_BUILTIN (decl)
28308 && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
28309 {
28310 size_t len = IDENTIFIER_LENGTH (id);
28311 const char *name = IDENTIFIER_POINTER (id);
28312 char *newname = NULL;
28313
28314 /* See if it is one of the built-in functions with an unusual name. */
28315 switch (DECL_FUNCTION_CODE (decl))
28316 {
28317 case BUILT_IN_DREML:
28318 newname = xstrdup ("__remainderieee128");
28319 break;
28320
28321 case BUILT_IN_GAMMAL:
28322 newname = xstrdup ("__lgammaieee128");
28323 break;
28324
28325 case BUILT_IN_GAMMAL_R:
28326 case BUILT_IN_LGAMMAL_R:
28327 newname = xstrdup ("__lgammaieee128_r");
28328 break;
28329
28330 case BUILT_IN_NEXTTOWARD:
28331 newname = xstrdup ("__nexttoward_to_ieee128");
28332 break;
28333
28334 case BUILT_IN_NEXTTOWARDF:
28335 newname = xstrdup ("__nexttowardf_to_ieee128");
28336 break;
28337
28338 case BUILT_IN_NEXTTOWARDL:
28339 newname = xstrdup ("__nexttowardieee128");
28340 break;
28341
28342 case BUILT_IN_POW10L:
28343 newname = xstrdup ("__exp10ieee128");
28344 break;
28345
28346 case BUILT_IN_SCALBL:
28347 newname = xstrdup ("__scalbieee128");
28348 break;
28349
28350 case BUILT_IN_SIGNIFICANDL:
28351 newname = xstrdup ("__significandieee128");
28352 break;
28353
28354 case BUILT_IN_SINCOSL:
28355 newname = xstrdup ("__sincosieee128");
28356 break;
28357
28358 default:
28359 break;
28360 }
28361
28362 /* Update the __builtin_*printf and __builtin_*scanf functions. */
28363 if (!newname)
28364 {
28365 size_t printf_len = strlen ("printf");
28366 size_t scanf_len = strlen ("scanf");
28367 size_t printf_chk_len = strlen ("printf_chk");
28368
28369 if (len >= printf_len
28370 && strcmp (name + len - printf_len, "printf") == 0)
28371 newname = xasprintf ("__%sieee128", name);
28372
28373 else if (len >= scanf_len
28374 && strcmp (name + len - scanf_len, "scanf") == 0)
28375 newname = xasprintf ("__isoc99_%sieee128", name);
28376
28377 else if (len >= printf_chk_len
28378 && strcmp (name + len - printf_chk_len, "printf_chk") == 0)
28379 newname = xasprintf ("%sieee128", name);
28380
28381 else if (name[len - 1] == 'l')
28382 {
28383 bool uses_ieee128_p = false;
28384 tree type = TREE_TYPE (decl);
28385 machine_mode ret_mode = TYPE_MODE (type);
28386
28387 /* See if the function returns a IEEE 128-bit floating point type or
28388 complex type. */
28389 if (ret_mode == TFmode || ret_mode == TCmode)
28390 uses_ieee128_p = true;
28391 else
28392 {
28393 function_args_iterator args_iter;
28394 tree arg;
28395
28396 /* See if the function passes a IEEE 128-bit floating point type
28397 or complex type. */
28398 FOREACH_FUNCTION_ARGS (type, arg, args_iter)
28399 {
28400 machine_mode arg_mode = TYPE_MODE (arg);
28401 if (arg_mode == TFmode || arg_mode == TCmode)
28402 {
28403 uses_ieee128_p = true;
28404 break;
28405 }
28406 }
28407 }
28408
28409 /* If we passed or returned an IEEE 128-bit floating point type,
28410 change the name. Use __<name>ieee128, instead of <name>l. */
28411 if (uses_ieee128_p)
28412 newname = xasprintf ("__%.*sieee128", (int)(len - 1), name);
28413 }
28414 }
28415
28416 if (newname)
28417 {
28418 if (TARGET_DEBUG_BUILTIN)
28419 fprintf (stderr, "Map %s => %s\n", name, newname);
28420
28421 id = get_identifier (newname);
28422 free (newname);
28423 }
28424 }
28425
28426 return id;
28427 }
28428
28429 /* Predict whether the given loop in gimple will be transformed in the RTL
28430 doloop_optimize pass. */
28431
28432 static bool
28433 rs6000_predict_doloop_p (struct loop *loop)
28434 {
28435 gcc_assert (loop);
28436
28437 /* On rs6000, targetm.can_use_doloop_p is actually
28438 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
28439 if (loop->inner != NULL)
28440 {
28441 if (dump_file && (dump_flags & TDF_DETAILS))
28442 fprintf (dump_file, "Predict doloop failure due to"
28443 " loop nesting.\n");
28444 return false;
28445 }
28446
28447 return true;
28448 }
28449
28450 /* Implement TARGET_PREFERRED_DOLOOP_MODE. */
28451
28452 static machine_mode
28453 rs6000_preferred_doloop_mode (machine_mode)
28454 {
28455 return word_mode;
28456 }
28457
28458 /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */
28459
28460 static bool
28461 rs6000_cannot_substitute_mem_equiv_p (rtx mem)
28462 {
28463 gcc_assert (MEM_P (mem));
28464
28465 /* curr_insn_transform()'s handling of subregs cannot handle altivec AND:
28466 type addresses, so don't allow MEMs with those address types to be
28467 substituted as an equivalent expression. See PR93974 for details. */
28468 if (GET_CODE (XEXP (mem, 0)) == AND)
28469 return true;
28470
28471 return false;
28472 }
28473
28474 /* Implement TARGET_INVALID_CONVERSION. */
28475
28476 static const char *
28477 rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
28478 {
28479 /* Make sure we're working with the canonical types. */
28480 if (TYPE_CANONICAL (fromtype) != NULL_TREE)
28481 fromtype = TYPE_CANONICAL (fromtype);
28482 if (TYPE_CANONICAL (totype) != NULL_TREE)
28483 totype = TYPE_CANONICAL (totype);
28484
28485 machine_mode frommode = TYPE_MODE (fromtype);
28486 machine_mode tomode = TYPE_MODE (totype);
28487
28488 if (frommode != tomode)
28489 {
28490 /* Do not allow conversions to/from XOmode and OOmode types. */
28491 if (frommode == XOmode)
28492 return N_("invalid conversion from type %<__vector_quad%>");
28493 if (tomode == XOmode)
28494 return N_("invalid conversion to type %<__vector_quad%>");
28495 if (frommode == OOmode)
28496 return N_("invalid conversion from type %<__vector_pair%>");
28497 if (tomode == OOmode)
28498 return N_("invalid conversion to type %<__vector_pair%>");
28499 }
28500
28501 /* Conversion allowed. */
28502 return NULL;
28503 }
28504
28505 /* Convert a SFmode constant to the integer bit pattern. */
28506
28507 long
28508 rs6000_const_f32_to_i32 (rtx operand)
28509 {
28510 long value;
28511 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (operand);
28512
28513 gcc_assert (GET_MODE (operand) == SFmode);
28514 REAL_VALUE_TO_TARGET_SINGLE (*rv, value);
28515 return value;
28516 }
28517
28518 void
28519 rs6000_emit_xxspltidp_v2df (rtx dst, long value)
28520 {
28521 if (((value & 0x7F800000) == 0) && ((value & 0x7FFFFF) != 0))
28522 inform (input_location,
28523 "the result for the xxspltidp instruction "
28524 "is undefined for subnormal input values");
28525 emit_insn( gen_xxspltidp_v2df_inst (dst, GEN_INT (value)));
28526 }
28527
28528 /* Implement TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC. */
28529
28530 static bool
28531 rs6000_gen_pic_addr_diff_vec (void)
28532 {
28533 return rs6000_relative_jumptables;
28534 }
28535
28536 void
28537 rs6000_output_addr_vec_elt (FILE *file, int value)
28538 {
28539 const char *directive = TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t";
28540 char buf[100];
28541
28542 fprintf (file, "%s", directive);
28543 ASM_GENERATE_INTERNAL_LABEL (buf, "L", value);
28544 assemble_name (file, buf);
28545 fprintf (file, "\n");
28546 }
28547
28548 \f
28549 /* Copy an integer constant to the vector constant structure. */
28550
28551 static void
28552 constant_int_to_128bit_vector (rtx op,
28553 machine_mode mode,
28554 size_t byte_num,
28555 vec_const_128bit_type *info)
28556 {
28557 unsigned HOST_WIDE_INT uvalue = UINTVAL (op);
28558 unsigned bitsize = GET_MODE_BITSIZE (mode);
28559
28560 for (int shift = bitsize - 8; shift >= 0; shift -= 8)
28561 info->bytes[byte_num++] = (uvalue >> shift) & 0xff;
28562 }
28563
28564 /* Copy a floating point constant to the vector constant structure. */
28565
28566 static void
28567 constant_fp_to_128bit_vector (rtx op,
28568 machine_mode mode,
28569 size_t byte_num,
28570 vec_const_128bit_type *info)
28571 {
28572 unsigned bitsize = GET_MODE_BITSIZE (mode);
28573 unsigned num_words = bitsize / 32;
28574 const REAL_VALUE_TYPE *rtype = CONST_DOUBLE_REAL_VALUE (op);
28575 long real_words[VECTOR_128BIT_WORDS];
28576
28577 /* Make sure we don't overflow the real_words array and that it is
28578 filled completely. */
28579 gcc_assert (num_words <= VECTOR_128BIT_WORDS && (bitsize % 32) == 0);
28580
28581 real_to_target (real_words, rtype, mode);
28582
28583 /* Iterate over each 32-bit word in the floating point constant. The
28584 real_to_target function puts out words in target endian fashion. We need
28585 to arrange the order so that the bytes are written in big endian order. */
28586 for (unsigned num = 0; num < num_words; num++)
28587 {
28588 unsigned endian_num = (BYTES_BIG_ENDIAN
28589 ? num
28590 : num_words - 1 - num);
28591
28592 unsigned uvalue = real_words[endian_num];
28593 for (int shift = 32 - 8; shift >= 0; shift -= 8)
28594 info->bytes[byte_num++] = (uvalue >> shift) & 0xff;
28595 }
28596
28597 /* Mark that this constant involves floating point. */
28598 info->fp_constant_p = true;
28599 }
28600
28601 /* Convert a vector constant OP with mode MODE to a vector 128-bit constant
28602 structure INFO.
28603
28604 Break out the constant out to bytes, half words, words, and double words.
28605 Return true if we have successfully converted the constant.
28606
28607 We handle CONST_INT, CONST_DOUBLE, CONST_VECTOR, and VEC_DUPLICATE of
28608 constants. Integer and floating point scalar constants are splatted to fill
28609 out the vector. */
28610
28611 bool
28612 vec_const_128bit_to_bytes (rtx op,
28613 machine_mode mode,
28614 vec_const_128bit_type *info)
28615 {
28616 /* Initialize the constant structure. */
28617 memset ((void *)info, 0, sizeof (vec_const_128bit_type));
28618
28619 /* Assume CONST_INTs are DImode. */
28620 if (mode == VOIDmode)
28621 mode = CONST_INT_P (op) ? DImode : GET_MODE (op);
28622
28623 if (mode == VOIDmode)
28624 return false;
28625
28626 unsigned size = GET_MODE_SIZE (mode);
28627 bool splat_p = false;
28628
28629 if (size > VECTOR_128BIT_BYTES)
28630 return false;
28631
28632 /* Set up the bits. */
28633 switch (GET_CODE (op))
28634 {
28635 /* Integer constants, default to double word. */
28636 case CONST_INT:
28637 {
28638 constant_int_to_128bit_vector (op, mode, 0, info);
28639 splat_p = true;
28640 break;
28641 }
28642
28643 /* Floating point constants. */
28644 case CONST_DOUBLE:
28645 {
28646 /* Fail if the floating point constant is the wrong mode. */
28647 if (GET_MODE (op) != mode)
28648 return false;
28649
28650 /* SFmode stored as scalars are stored in DFmode format. */
28651 if (mode == SFmode)
28652 {
28653 mode = DFmode;
28654 size = GET_MODE_SIZE (DFmode);
28655 }
28656
28657 constant_fp_to_128bit_vector (op, mode, 0, info);
28658 splat_p = true;
28659 break;
28660 }
28661
28662 /* Vector constants, iterate over each element. On little endian
28663 systems, we have to reverse the element numbers. */
28664 case CONST_VECTOR:
28665 {
28666 /* Fail if the vector constant is the wrong mode or size. */
28667 if (GET_MODE (op) != mode
28668 || GET_MODE_SIZE (mode) != VECTOR_128BIT_BYTES)
28669 return false;
28670
28671 machine_mode ele_mode = GET_MODE_INNER (mode);
28672 size_t ele_size = GET_MODE_SIZE (ele_mode);
28673 size_t nunits = GET_MODE_NUNITS (mode);
28674
28675 for (size_t num = 0; num < nunits; num++)
28676 {
28677 rtx ele = CONST_VECTOR_ELT (op, num);
28678 size_t byte_num = (BYTES_BIG_ENDIAN
28679 ? num
28680 : nunits - 1 - num) * ele_size;
28681
28682 if (CONST_INT_P (ele))
28683 constant_int_to_128bit_vector (ele, ele_mode, byte_num, info);
28684 else if (CONST_DOUBLE_P (ele))
28685 constant_fp_to_128bit_vector (ele, ele_mode, byte_num, info);
28686 else
28687 return false;
28688 }
28689
28690 break;
28691 }
28692
28693 /* Treat VEC_DUPLICATE of a constant just like a vector constant.
28694 Since we are duplicating the element, we don't have to worry about
28695 endian issues. */
28696 case VEC_DUPLICATE:
28697 {
28698 /* Fail if the vector duplicate is the wrong mode or size. */
28699 if (GET_MODE (op) != mode
28700 || GET_MODE_SIZE (mode) != VECTOR_128BIT_BYTES)
28701 return false;
28702
28703 machine_mode ele_mode = GET_MODE_INNER (mode);
28704 size_t ele_size = GET_MODE_SIZE (ele_mode);
28705 rtx ele = XEXP (op, 0);
28706 size_t nunits = GET_MODE_NUNITS (mode);
28707
28708 if (!CONST_INT_P (ele) && !CONST_DOUBLE_P (ele))
28709 return false;
28710
28711 for (size_t num = 0; num < nunits; num++)
28712 {
28713 size_t byte_num = num * ele_size;
28714
28715 if (CONST_INT_P (ele))
28716 constant_int_to_128bit_vector (ele, ele_mode, byte_num, info);
28717 else
28718 constant_fp_to_128bit_vector (ele, ele_mode, byte_num, info);
28719 }
28720
28721 break;
28722 }
28723
28724 /* Any thing else, just return failure. */
28725 default:
28726 return false;
28727 }
28728
28729 /* Splat the constant to fill 128 bits if desired. */
28730 if (splat_p && size < VECTOR_128BIT_BYTES)
28731 {
28732 if ((VECTOR_128BIT_BYTES % size) != 0)
28733 return false;
28734
28735 for (size_t offset = size;
28736 offset < VECTOR_128BIT_BYTES;
28737 offset += size)
28738 memcpy ((void *) &info->bytes[offset],
28739 (void *) &info->bytes[0],
28740 size);
28741 }
28742
28743 /* Remember original size. */
28744 info->original_size = size;
28745
28746 /* Determine if the bytes are all the same. */
28747 unsigned char first_byte = info->bytes[0];
28748 info->all_bytes_same = true;
28749 for (size_t i = 1; i < VECTOR_128BIT_BYTES; i++)
28750 if (first_byte != info->bytes[i])
28751 {
28752 info->all_bytes_same = false;
28753 break;
28754 }
28755
28756 /* Pack half words together & determine if all of the half words are the
28757 same. */
28758 for (size_t i = 0; i < VECTOR_128BIT_HALF_WORDS; i++)
28759 info->half_words[i] = ((info->bytes[i * 2] << 8)
28760 | info->bytes[(i * 2) + 1]);
28761
28762 unsigned short first_hword = info->half_words[0];
28763 info->all_half_words_same = true;
28764 for (size_t i = 1; i < VECTOR_128BIT_HALF_WORDS; i++)
28765 if (first_hword != info->half_words[i])
28766 {
28767 info->all_half_words_same = false;
28768 break;
28769 }
28770
28771 /* Pack words together & determine if all of the words are the same. */
28772 for (size_t i = 0; i < VECTOR_128BIT_WORDS; i++)
28773 info->words[i] = ((info->bytes[i * 4] << 24)
28774 | (info->bytes[(i * 4) + 1] << 16)
28775 | (info->bytes[(i * 4) + 2] << 8)
28776 | info->bytes[(i * 4) + 3]);
28777
28778 info->all_words_same
28779 = (info->words[0] == info->words[1]
28780 && info->words[0] == info->words[1]
28781 && info->words[0] == info->words[2]
28782 && info->words[0] == info->words[3]);
28783
28784 /* Pack double words together & determine if all of the double words are the
28785 same. */
28786 for (size_t i = 0; i < VECTOR_128BIT_DOUBLE_WORDS; i++)
28787 {
28788 unsigned HOST_WIDE_INT d_word = 0;
28789 for (size_t j = 0; j < 8; j++)
28790 d_word = (d_word << 8) | info->bytes[(i * 8) + j];
28791
28792 info->double_words[i] = d_word;
28793 }
28794
28795 info->all_double_words_same
28796 = (info->double_words[0] == info->double_words[1]);
28797
28798 return true;
28799 }
28800
28801 /* Determine if an IEEE 128-bit constant can be loaded with LXVKQ. Return zero
28802 if the LXVKQ instruction cannot be used. Otherwise return the immediate
28803 value to be used with the LXVKQ instruction. */
28804
28805 unsigned
28806 constant_generates_lxvkq (vec_const_128bit_type *vsx_const)
28807 {
28808 /* Is the instruction supported with power10 code generation, IEEE 128-bit
28809 floating point hardware and VSX registers are available. */
28810 if (!TARGET_IEEE128_CONSTANT || !TARGET_FLOAT128_HW || !TARGET_POWER10
28811 || !TARGET_VSX)
28812 return 0;
28813
28814 /* All of the constants that are generated by LXVKQ have the bottom 3 words
28815 that are 0. */
28816 if (vsx_const->words[1] != 0
28817 || vsx_const->words[2] != 0
28818 || vsx_const->words[3] != 0)
28819 return 0;
28820
28821 /* See if we have a match for the first word. */
28822 switch (vsx_const->words[0])
28823 {
28824 case 0x3FFF0000U: return 1; /* IEEE 128-bit +1.0. */
28825 case 0x40000000U: return 2; /* IEEE 128-bit +2.0. */
28826 case 0x40008000U: return 3; /* IEEE 128-bit +3.0. */
28827 case 0x40010000U: return 4; /* IEEE 128-bit +4.0. */
28828 case 0x40014000U: return 5; /* IEEE 128-bit +5.0. */
28829 case 0x40018000U: return 6; /* IEEE 128-bit +6.0. */
28830 case 0x4001C000U: return 7; /* IEEE 128-bit +7.0. */
28831 case 0x7FFF0000U: return 8; /* IEEE 128-bit +Infinity. */
28832 case 0x7FFF8000U: return 9; /* IEEE 128-bit quiet NaN. */
28833 case 0x80000000U: return 16; /* IEEE 128-bit -0.0. */
28834 case 0xBFFF0000U: return 17; /* IEEE 128-bit -1.0. */
28835 case 0xC0000000U: return 18; /* IEEE 128-bit -2.0. */
28836 case 0xC0008000U: return 19; /* IEEE 128-bit -3.0. */
28837 case 0xC0010000U: return 20; /* IEEE 128-bit -4.0. */
28838 case 0xC0014000U: return 21; /* IEEE 128-bit -5.0. */
28839 case 0xC0018000U: return 22; /* IEEE 128-bit -6.0. */
28840 case 0xC001C000U: return 23; /* IEEE 128-bit -7.0. */
28841 case 0xFFFF0000U: return 24; /* IEEE 128-bit -Infinity. */
28842
28843 /* anything else cannot be loaded. */
28844 default:
28845 break;
28846 }
28847
28848 return 0;
28849 }
28850
28851 /* Determine if a vector constant can be loaded with XXSPLTIW. Return zero if
28852 the XXSPLTIW instruction cannot be used. Otherwise return the immediate
28853 value to be used with the XXSPLTIW instruction. */
28854
28855 unsigned
28856 constant_generates_xxspltiw (vec_const_128bit_type *vsx_const)
28857 {
28858 if (!TARGET_SPLAT_WORD_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX)
28859 return 0;
28860
28861 if (!vsx_const->all_words_same)
28862 return 0;
28863
28864 /* If we can use XXSPLTIB, don't generate XXSPLTIW. */
28865 if (vsx_const->all_bytes_same)
28866 return 0;
28867
28868 /* See if we can use VSPLTISH or VSPLTISW. */
28869 if (vsx_const->all_half_words_same)
28870 {
28871 short sign_h_word = vsx_const->half_words[0];
28872 if (EASY_VECTOR_15 (sign_h_word))
28873 return 0;
28874 }
28875
28876 int sign_word = vsx_const->words[0];
28877 if (EASY_VECTOR_15 (sign_word))
28878 return 0;
28879
28880 return vsx_const->words[0];
28881 }
28882
28883 /* Determine if a vector constant can be loaded with XXSPLTIDP. Return zero if
28884 the XXSPLTIDP instruction cannot be used. Otherwise return the immediate
28885 value to be used with the XXSPLTIDP instruction. */
28886
28887 unsigned
28888 constant_generates_xxspltidp (vec_const_128bit_type *vsx_const)
28889 {
28890 if (!TARGET_SPLAT_FLOAT_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX)
28891 return 0;
28892
28893 /* Reject if the two 64-bit segments are not the same. */
28894 if (!vsx_const->all_double_words_same)
28895 return 0;
28896
28897 /* If the bytes, half words, or words are all the same, don't use XXSPLTIDP.
28898 Use a simpler instruction (XXSPLTIB, VSPLTISB, VSPLTISH, or VSPLTISW). */
28899 if (vsx_const->all_bytes_same
28900 || vsx_const->all_half_words_same
28901 || vsx_const->all_words_same)
28902 return 0;
28903
28904 unsigned HOST_WIDE_INT value = vsx_const->double_words[0];
28905
28906 /* Avoid values that look like DFmode NaN's, except for the normal NaN bit
28907 pattern and the signalling NaN bit pattern. Recognize infinity and
28908 negative infinity. */
28909
28910 /* Bit representation of DFmode normal quiet NaN. */
28911 #define RS6000_CONST_DF_NAN HOST_WIDE_INT_UC (0x7ff8000000000000)
28912
28913 /* Bit representation of DFmode normal signaling NaN. */
28914 #define RS6000_CONST_DF_NANS HOST_WIDE_INT_UC (0x7ff4000000000000)
28915
28916 /* Bit representation of DFmode positive infinity. */
28917 #define RS6000_CONST_DF_INF HOST_WIDE_INT_UC (0x7ff0000000000000)
28918
28919 /* Bit representation of DFmode negative infinity. */
28920 #define RS6000_CONST_DF_NEG_INF HOST_WIDE_INT_UC (0xfff0000000000000)
28921
28922 if (value != RS6000_CONST_DF_NAN
28923 && value != RS6000_CONST_DF_NANS
28924 && value != RS6000_CONST_DF_INF
28925 && value != RS6000_CONST_DF_NEG_INF)
28926 {
28927 /* The IEEE 754 64-bit floating format has 1 bit for sign, 11 bits for
28928 the exponent, and 52 bits for the mantissa (not counting the hidden
28929 bit used for normal numbers). NaN values have the exponent set to all
28930 1 bits, and the mantissa non-zero (mantissa == 0 is infinity). */
28931
28932 int df_exponent = (value >> 52) & 0x7ff;
28933 unsigned HOST_WIDE_INT
28934 df_mantissa = value & ((HOST_WIDE_INT_1U << 52) - HOST_WIDE_INT_1U);
28935
28936 if (df_exponent == 0x7ff && df_mantissa != 0) /* other NaNs. */
28937 return 0;
28938
28939 /* Avoid values that are DFmode subnormal values. Subnormal numbers have
28940 the exponent all 0 bits, and the mantissa non-zero. If the value is
28941 subnormal, then the hidden bit in the mantissa is not set. */
28942 if (df_exponent == 0 && df_mantissa != 0) /* subnormal. */
28943 return 0;
28944 }
28945
28946 /* Change the representation to DFmode constant. */
28947 long df_words[2] = { vsx_const->words[0], vsx_const->words[1] };
28948
28949 /* real_from_target takes the target words in target order. */
28950 if (!BYTES_BIG_ENDIAN)
28951 std::swap (df_words[0], df_words[1]);
28952
28953 REAL_VALUE_TYPE rv_type;
28954 real_from_target (&rv_type, df_words, DFmode);
28955
28956 const REAL_VALUE_TYPE *rv = &rv_type;
28957
28958 /* Validate that the number can be stored as a SFmode value. */
28959 if (!exact_real_truncate (SFmode, rv))
28960 return 0;
28961
28962 /* Validate that the number is not a SFmode subnormal value (exponent is 0,
28963 mantissa field is non-zero) which is undefined for the XXSPLTIDP
28964 instruction. */
28965 long sf_value;
28966 real_to_target (&sf_value, rv, SFmode);
28967
28968 /* IEEE 754 32-bit values have 1 bit for the sign, 8 bits for the exponent,
28969 and 23 bits for the mantissa. Subnormal numbers have the exponent all
28970 0 bits, and the mantissa non-zero. */
28971 long sf_exponent = (sf_value >> 23) & 0xFF;
28972 long sf_mantissa = sf_value & 0x7FFFFF;
28973
28974 if (sf_exponent == 0 && sf_mantissa != 0)
28975 return 0;
28976
28977 /* Return the immediate to be used. */
28978 return sf_value;
28979 }
28980
28981 /* Now we have only two opaque types, they are __vector_quad and
28982 __vector_pair built-in types. They are target specific and
28983 only available when MMA is supported. With MMA supported, it
28984 simply returns true, otherwise it checks if the given gimple
28985 STMT is an assignment, asm or call stmt and uses either of
28986 these two opaque types unexpectedly, if yes, it would raise
28987 an error message and returns true, otherwise it returns false. */
28988
28989 bool
28990 rs6000_opaque_type_invalid_use_p (gimple *stmt)
28991 {
28992 if (TARGET_MMA)
28993 return false;
28994
28995 /* If the given TYPE is one MMA opaque type, emit the corresponding
28996 error messages and return true, otherwise return false. */
28997 auto check_and_error_invalid_use = [](tree type)
28998 {
28999 tree mv = TYPE_MAIN_VARIANT (type);
29000 if (mv == vector_quad_type_node)
29001 {
29002 error ("type %<__vector_quad%> requires the %qs option", "-mmma");
29003 return true;
29004 }
29005 else if (mv == vector_pair_type_node)
29006 {
29007 error ("type %<__vector_pair%> requires the %qs option", "-mmma");
29008 return true;
29009 }
29010 return false;
29011 };
29012
29013 if (stmt)
29014 {
29015 /* The usage of MMA opaque types is very limited for now,
29016 to check with gassign, gasm and gcall is enough so far. */
29017 if (gassign *ga = dyn_cast<gassign *> (stmt))
29018 {
29019 tree lhs = gimple_assign_lhs (ga);
29020 tree type = TREE_TYPE (lhs);
29021 if (check_and_error_invalid_use (type))
29022 return true;
29023 }
29024 else if (gasm *gs = dyn_cast<gasm *> (stmt))
29025 {
29026 unsigned ninputs = gimple_asm_ninputs (gs);
29027 for (unsigned i = 0; i < ninputs; i++)
29028 {
29029 tree op = gimple_asm_input_op (gs, i);
29030 tree val = TREE_VALUE (op);
29031 tree type = TREE_TYPE (val);
29032 if (check_and_error_invalid_use (type))
29033 return true;
29034 }
29035 unsigned noutputs = gimple_asm_noutputs (gs);
29036 for (unsigned i = 0; i < noutputs; i++)
29037 {
29038 tree op = gimple_asm_output_op (gs, i);
29039 tree val = TREE_VALUE (op);
29040 tree type = TREE_TYPE (val);
29041 if (check_and_error_invalid_use (type))
29042 return true;
29043 }
29044 }
29045 else if (gcall *gc = dyn_cast<gcall *> (stmt))
29046 {
29047 unsigned nargs = gimple_call_num_args (gc);
29048 for (unsigned i = 0; i < nargs; i++)
29049 {
29050 tree arg = gimple_call_arg (gc, i);
29051 tree type = TREE_TYPE (arg);
29052 if (check_and_error_invalid_use (type))
29053 return true;
29054 }
29055 }
29056 }
29057
29058 return false;
29059 }
29060
29061 struct gcc_target targetm = TARGET_INITIALIZER;
29062
29063 #include "gt-rs6000.h"