]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/rs6000/rs6000.c
dojump.h: New header file.
[thirdparty/gcc.git] / gcc / config / rs6000 / rs6000.c
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2015 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "regs.h"
27 #include "hard-reg-set.h"
28 #include "insn-config.h"
29 #include "conditions.h"
30 #include "insn-attr.h"
31 #include "flags.h"
32 #include "recog.h"
33 #include "obstack.h"
34 #include "hash-set.h"
35 #include "machmode.h"
36 #include "vec.h"
37 #include "double-int.h"
38 #include "input.h"
39 #include "alias.h"
40 #include "symtab.h"
41 #include "wide-int.h"
42 #include "inchash.h"
43 #include "tree.h"
44 #include "fold-const.h"
45 #include "stringpool.h"
46 #include "stor-layout.h"
47 #include "calls.h"
48 #include "print-tree.h"
49 #include "varasm.h"
50 #include "hashtab.h"
51 #include "function.h"
52 #include "statistics.h"
53 #include "real.h"
54 #include "fixed-value.h"
55 #include "expmed.h"
56 #include "dojump.h"
57 #include "explow.h"
58 #include "emit-rtl.h"
59 #include "stmt.h"
60 #include "expr.h"
61 #include "insn-codes.h"
62 #include "optabs.h"
63 #include "except.h"
64 #include "output.h"
65 #include "dbxout.h"
66 #include "predict.h"
67 #include "dominance.h"
68 #include "cfg.h"
69 #include "cfgrtl.h"
70 #include "cfganal.h"
71 #include "lcm.h"
72 #include "cfgbuild.h"
73 #include "cfgcleanup.h"
74 #include "basic-block.h"
75 #include "diagnostic-core.h"
76 #include "toplev.h"
77 #include "ggc.h"
78 #include "tm_p.h"
79 #include "target.h"
80 #include "target-def.h"
81 #include "common/common-target.h"
82 #include "langhooks.h"
83 #include "reload.h"
84 #include "cfgloop.h"
85 #include "sched-int.h"
86 #include "hash-table.h"
87 #include "tree-ssa-alias.h"
88 #include "internal-fn.h"
89 #include "gimple-fold.h"
90 #include "tree-eh.h"
91 #include "gimple-expr.h"
92 #include "is-a.h"
93 #include "gimple.h"
94 #include "gimplify.h"
95 #include "gimple-iterator.h"
96 #include "gimple-walk.h"
97 #include "intl.h"
98 #include "params.h"
99 #include "tm-constrs.h"
100 #include "ira.h"
101 #include "opts.h"
102 #include "tree-vectorizer.h"
103 #include "dumpfile.h"
104 #include "hash-map.h"
105 #include "plugin-api.h"
106 #include "ipa-ref.h"
107 #include "cgraph.h"
108 #include "target-globals.h"
109 #include "builtins.h"
110 #include "context.h"
111 #include "tree-pass.h"
112 #if TARGET_XCOFF
113 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
114 #endif
115 #if TARGET_MACHO
116 #include "gstab.h" /* for N_SLINE */
117 #endif
118
119 #ifndef TARGET_NO_PROTOTYPE
120 #define TARGET_NO_PROTOTYPE 0
121 #endif
122
123 #define min(A,B) ((A) < (B) ? (A) : (B))
124 #define max(A,B) ((A) > (B) ? (A) : (B))
125
126 /* Structure used to define the rs6000 stack */
127 typedef struct rs6000_stack {
128 int reload_completed; /* stack info won't change from here on */
129 int first_gp_reg_save; /* first callee saved GP register used */
130 int first_fp_reg_save; /* first callee saved FP register used */
131 int first_altivec_reg_save; /* first callee saved AltiVec register used */
132 int lr_save_p; /* true if the link reg needs to be saved */
133 int cr_save_p; /* true if the CR reg needs to be saved */
134 unsigned int vrsave_mask; /* mask of vec registers to save */
135 int push_p; /* true if we need to allocate stack space */
136 int calls_p; /* true if the function makes any calls */
137 int world_save_p; /* true if we're saving *everything*:
138 r13-r31, cr, f14-f31, vrsave, v20-v31 */
139 enum rs6000_abi abi; /* which ABI to use */
140 int gp_save_offset; /* offset to save GP regs from initial SP */
141 int fp_save_offset; /* offset to save FP regs from initial SP */
142 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
143 int lr_save_offset; /* offset to save LR from initial SP */
144 int cr_save_offset; /* offset to save CR from initial SP */
145 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
146 int spe_gp_save_offset; /* offset to save spe 64-bit gprs */
147 int varargs_save_offset; /* offset to save the varargs registers */
148 int ehrd_offset; /* offset to EH return data */
149 int ehcr_offset; /* offset to EH CR field data */
150 int reg_size; /* register size (4 or 8) */
151 HOST_WIDE_INT vars_size; /* variable save area size */
152 int parm_size; /* outgoing parameter size */
153 int save_size; /* save area size */
154 int fixed_size; /* fixed size of stack frame */
155 int gp_size; /* size of saved GP registers */
156 int fp_size; /* size of saved FP registers */
157 int altivec_size; /* size of saved AltiVec registers */
158 int cr_size; /* size to hold CR if not in save_size */
159 int vrsave_size; /* size to hold VRSAVE if not in save_size */
160 int altivec_padding_size; /* size of altivec alignment padding if
161 not in save_size */
162 int spe_gp_size; /* size of 64-bit GPR save size for SPE */
163 int spe_padding_size;
164 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
165 int spe_64bit_regs_used;
166 int savres_strategy;
167 } rs6000_stack_t;
168
169 /* A C structure for machine-specific, per-function data.
170 This is added to the cfun structure. */
171 typedef struct GTY(()) machine_function
172 {
173 /* Whether the instruction chain has been scanned already. */
174 int insn_chain_scanned_p;
175 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
176 int ra_needs_full_frame;
177 /* Flags if __builtin_return_address (0) was used. */
178 int ra_need_lr;
179 /* Cache lr_save_p after expansion of builtin_eh_return. */
180 int lr_save_state;
181 /* Whether we need to save the TOC to the reserved stack location in the
182 function prologue. */
183 bool save_toc_in_prologue;
184 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
185 varargs save area. */
186 HOST_WIDE_INT varargs_save_offset;
187 /* Temporary stack slot to use for SDmode copies. This slot is
188 64-bits wide and is allocated early enough so that the offset
189 does not overflow the 16-bit load/store offset field. */
190 rtx sdmode_stack_slot;
191 /* Flag if r2 setup is needed with ELFv2 ABI. */
192 bool r2_setup_needed;
193 } machine_function;
194
195 /* Support targetm.vectorize.builtin_mask_for_load. */
196 static GTY(()) tree altivec_builtin_mask_for_load;
197
198 /* Set to nonzero once AIX common-mode calls have been defined. */
199 static GTY(()) int common_mode_defined;
200
201 /* Label number of label created for -mrelocatable, to call to so we can
202 get the address of the GOT section */
203 static int rs6000_pic_labelno;
204
205 #ifdef USING_ELFOS_H
206 /* Counter for labels which are to be placed in .fixup. */
207 int fixuplabelno = 0;
208 #endif
209
210 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
211 int dot_symbols;
212
213 /* Specify the machine mode that pointers have. After generation of rtl, the
214 compiler makes no further distinction between pointers and any other objects
215 of this machine mode. The type is unsigned since not all things that
216 include rs6000.h also include machmode.h. */
217 unsigned rs6000_pmode;
218
219 /* Width in bits of a pointer. */
220 unsigned rs6000_pointer_size;
221
222 #ifdef HAVE_AS_GNU_ATTRIBUTE
223 /* Flag whether floating point values have been passed/returned. */
224 static bool rs6000_passes_float;
225 /* Flag whether vector values have been passed/returned. */
226 static bool rs6000_passes_vector;
227 /* Flag whether small (<= 8 byte) structures have been returned. */
228 static bool rs6000_returns_struct;
229 #endif
230
231 /* Value is TRUE if register/mode pair is acceptable. */
232 bool rs6000_hard_regno_mode_ok_p[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
233
234 /* Maximum number of registers needed for a given register class and mode. */
235 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
236
237 /* How many registers are needed for a given register and mode. */
238 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
239
240 /* Map register number to register class. */
241 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
242
243 static int dbg_cost_ctrl;
244
245 /* Built in types. */
246 tree rs6000_builtin_types[RS6000_BTI_MAX];
247 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
248
249 /* Flag to say the TOC is initialized */
250 int toc_initialized;
251 char toc_label_name[10];
252
253 /* Cached value of rs6000_variable_issue. This is cached in
254 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
255 static short cached_can_issue_more;
256
257 static GTY(()) section *read_only_data_section;
258 static GTY(()) section *private_data_section;
259 static GTY(()) section *tls_data_section;
260 static GTY(()) section *tls_private_data_section;
261 static GTY(()) section *read_only_private_data_section;
262 static GTY(()) section *sdata2_section;
263 static GTY(()) section *toc_section;
264
265 struct builtin_description
266 {
267 const HOST_WIDE_INT mask;
268 const enum insn_code icode;
269 const char *const name;
270 const enum rs6000_builtins code;
271 };
272
273 /* Describe the vector unit used for modes. */
274 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
275 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
276
277 /* Register classes for various constraints that are based on the target
278 switches. */
279 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
280
281 /* Describe the alignment of a vector. */
282 int rs6000_vector_align[NUM_MACHINE_MODES];
283
284 /* Map selected modes to types for builtins. */
285 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
286
287 /* What modes to automatically generate reciprocal divide estimate (fre) and
288 reciprocal sqrt (frsqrte) for. */
289 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
290
291 /* Masks to determine which reciprocal esitmate instructions to generate
292 automatically. */
293 enum rs6000_recip_mask {
294 RECIP_SF_DIV = 0x001, /* Use divide estimate */
295 RECIP_DF_DIV = 0x002,
296 RECIP_V4SF_DIV = 0x004,
297 RECIP_V2DF_DIV = 0x008,
298
299 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
300 RECIP_DF_RSQRT = 0x020,
301 RECIP_V4SF_RSQRT = 0x040,
302 RECIP_V2DF_RSQRT = 0x080,
303
304 /* Various combination of flags for -mrecip=xxx. */
305 RECIP_NONE = 0,
306 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
307 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
308 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
309
310 RECIP_HIGH_PRECISION = RECIP_ALL,
311
312 /* On low precision machines like the power5, don't enable double precision
313 reciprocal square root estimate, since it isn't accurate enough. */
314 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
315 };
316
317 /* -mrecip options. */
318 static struct
319 {
320 const char *string; /* option name */
321 unsigned int mask; /* mask bits to set */
322 } recip_options[] = {
323 { "all", RECIP_ALL },
324 { "none", RECIP_NONE },
325 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
326 | RECIP_V2DF_DIV) },
327 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
328 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
329 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
330 | RECIP_V2DF_RSQRT) },
331 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
332 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
333 };
334
335 /* Pointer to function (in rs6000-c.c) that can define or undefine target
336 macros that have changed. Languages that don't support the preprocessor
337 don't link in rs6000-c.c, so we can't call it directly. */
338 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
339
340 /* Simplfy register classes into simpler classifications. We assume
341 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
342 check for standard register classes (gpr/floating/altivec/vsx) and
343 floating/vector classes (float/altivec/vsx). */
344
345 enum rs6000_reg_type {
346 NO_REG_TYPE,
347 PSEUDO_REG_TYPE,
348 GPR_REG_TYPE,
349 VSX_REG_TYPE,
350 ALTIVEC_REG_TYPE,
351 FPR_REG_TYPE,
352 SPR_REG_TYPE,
353 CR_REG_TYPE,
354 SPE_ACC_TYPE,
355 SPEFSCR_REG_TYPE
356 };
357
358 /* Map register class to register type. */
359 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
360
361 /* First/last register type for the 'normal' register types (i.e. general
362 purpose, floating point, altivec, and VSX registers). */
363 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
364
365 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
366
367
368 /* Register classes we care about in secondary reload or go if legitimate
369 address. We only need to worry about GPR, FPR, and Altivec registers here,
370 along an ANY field that is the OR of the 3 register classes. */
371
372 enum rs6000_reload_reg_type {
373 RELOAD_REG_GPR, /* General purpose registers. */
374 RELOAD_REG_FPR, /* Traditional floating point regs. */
375 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
376 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
377 N_RELOAD_REG
378 };
379
380 /* For setting up register classes, loop through the 3 register classes mapping
381 into real registers, and skip the ANY class, which is just an OR of the
382 bits. */
383 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
384 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
385
386 /* Map reload register type to a register in the register class. */
387 struct reload_reg_map_type {
388 const char *name; /* Register class name. */
389 int reg; /* Register in the register class. */
390 };
391
392 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
393 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
394 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
395 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
396 { "Any", -1 }, /* RELOAD_REG_ANY. */
397 };
398
399 /* Mask bits for each register class, indexed per mode. Historically the
400 compiler has been more restrictive which types can do PRE_MODIFY instead of
401 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
402 typedef unsigned char addr_mask_type;
403
404 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
405 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
406 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
407 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
408 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
409 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
410 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
411
412 /* Register type masks based on the type, of valid addressing modes. */
413 struct rs6000_reg_addr {
414 enum insn_code reload_load; /* INSN to reload for loading. */
415 enum insn_code reload_store; /* INSN to reload for storing. */
416 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
417 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
418 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
419 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
420 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
421 };
422
423 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
424
425 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
426 static inline bool
427 mode_supports_pre_incdec_p (machine_mode mode)
428 {
429 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
430 != 0);
431 }
432
433 /* Helper function to say whether a mode supports PRE_MODIFY. */
434 static inline bool
435 mode_supports_pre_modify_p (machine_mode mode)
436 {
437 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
438 != 0);
439 }
440
441 \f
442 /* Target cpu costs. */
443
444 struct processor_costs {
445 const int mulsi; /* cost of SImode multiplication. */
446 const int mulsi_const; /* cost of SImode multiplication by constant. */
447 const int mulsi_const9; /* cost of SImode mult by short constant. */
448 const int muldi; /* cost of DImode multiplication. */
449 const int divsi; /* cost of SImode division. */
450 const int divdi; /* cost of DImode division. */
451 const int fp; /* cost of simple SFmode and DFmode insns. */
452 const int dmul; /* cost of DFmode multiplication (and fmadd). */
453 const int sdiv; /* cost of SFmode division (fdivs). */
454 const int ddiv; /* cost of DFmode division (fdiv). */
455 const int cache_line_size; /* cache line size in bytes. */
456 const int l1_cache_size; /* size of l1 cache, in kilobytes. */
457 const int l2_cache_size; /* size of l2 cache, in kilobytes. */
458 const int simultaneous_prefetches; /* number of parallel prefetch
459 operations. */
460 };
461
462 const struct processor_costs *rs6000_cost;
463
464 /* Processor costs (relative to an add) */
465
466 /* Instruction size costs on 32bit processors. */
467 static const
468 struct processor_costs size32_cost = {
469 COSTS_N_INSNS (1), /* mulsi */
470 COSTS_N_INSNS (1), /* mulsi_const */
471 COSTS_N_INSNS (1), /* mulsi_const9 */
472 COSTS_N_INSNS (1), /* muldi */
473 COSTS_N_INSNS (1), /* divsi */
474 COSTS_N_INSNS (1), /* divdi */
475 COSTS_N_INSNS (1), /* fp */
476 COSTS_N_INSNS (1), /* dmul */
477 COSTS_N_INSNS (1), /* sdiv */
478 COSTS_N_INSNS (1), /* ddiv */
479 32,
480 0,
481 0,
482 0,
483 };
484
485 /* Instruction size costs on 64bit processors. */
486 static const
487 struct processor_costs size64_cost = {
488 COSTS_N_INSNS (1), /* mulsi */
489 COSTS_N_INSNS (1), /* mulsi_const */
490 COSTS_N_INSNS (1), /* mulsi_const9 */
491 COSTS_N_INSNS (1), /* muldi */
492 COSTS_N_INSNS (1), /* divsi */
493 COSTS_N_INSNS (1), /* divdi */
494 COSTS_N_INSNS (1), /* fp */
495 COSTS_N_INSNS (1), /* dmul */
496 COSTS_N_INSNS (1), /* sdiv */
497 COSTS_N_INSNS (1), /* ddiv */
498 128,
499 0,
500 0,
501 0,
502 };
503
504 /* Instruction costs on RS64A processors. */
505 static const
506 struct processor_costs rs64a_cost = {
507 COSTS_N_INSNS (20), /* mulsi */
508 COSTS_N_INSNS (12), /* mulsi_const */
509 COSTS_N_INSNS (8), /* mulsi_const9 */
510 COSTS_N_INSNS (34), /* muldi */
511 COSTS_N_INSNS (65), /* divsi */
512 COSTS_N_INSNS (67), /* divdi */
513 COSTS_N_INSNS (4), /* fp */
514 COSTS_N_INSNS (4), /* dmul */
515 COSTS_N_INSNS (31), /* sdiv */
516 COSTS_N_INSNS (31), /* ddiv */
517 128, /* cache line size */
518 128, /* l1 cache */
519 2048, /* l2 cache */
520 1, /* streams */
521 };
522
523 /* Instruction costs on MPCCORE processors. */
524 static const
525 struct processor_costs mpccore_cost = {
526 COSTS_N_INSNS (2), /* mulsi */
527 COSTS_N_INSNS (2), /* mulsi_const */
528 COSTS_N_INSNS (2), /* mulsi_const9 */
529 COSTS_N_INSNS (2), /* muldi */
530 COSTS_N_INSNS (6), /* divsi */
531 COSTS_N_INSNS (6), /* divdi */
532 COSTS_N_INSNS (4), /* fp */
533 COSTS_N_INSNS (5), /* dmul */
534 COSTS_N_INSNS (10), /* sdiv */
535 COSTS_N_INSNS (17), /* ddiv */
536 32, /* cache line size */
537 4, /* l1 cache */
538 16, /* l2 cache */
539 1, /* streams */
540 };
541
542 /* Instruction costs on PPC403 processors. */
543 static const
544 struct processor_costs ppc403_cost = {
545 COSTS_N_INSNS (4), /* mulsi */
546 COSTS_N_INSNS (4), /* mulsi_const */
547 COSTS_N_INSNS (4), /* mulsi_const9 */
548 COSTS_N_INSNS (4), /* muldi */
549 COSTS_N_INSNS (33), /* divsi */
550 COSTS_N_INSNS (33), /* divdi */
551 COSTS_N_INSNS (11), /* fp */
552 COSTS_N_INSNS (11), /* dmul */
553 COSTS_N_INSNS (11), /* sdiv */
554 COSTS_N_INSNS (11), /* ddiv */
555 32, /* cache line size */
556 4, /* l1 cache */
557 16, /* l2 cache */
558 1, /* streams */
559 };
560
561 /* Instruction costs on PPC405 processors. */
562 static const
563 struct processor_costs ppc405_cost = {
564 COSTS_N_INSNS (5), /* mulsi */
565 COSTS_N_INSNS (4), /* mulsi_const */
566 COSTS_N_INSNS (3), /* mulsi_const9 */
567 COSTS_N_INSNS (5), /* muldi */
568 COSTS_N_INSNS (35), /* divsi */
569 COSTS_N_INSNS (35), /* divdi */
570 COSTS_N_INSNS (11), /* fp */
571 COSTS_N_INSNS (11), /* dmul */
572 COSTS_N_INSNS (11), /* sdiv */
573 COSTS_N_INSNS (11), /* ddiv */
574 32, /* cache line size */
575 16, /* l1 cache */
576 128, /* l2 cache */
577 1, /* streams */
578 };
579
580 /* Instruction costs on PPC440 processors. */
581 static const
582 struct processor_costs ppc440_cost = {
583 COSTS_N_INSNS (3), /* mulsi */
584 COSTS_N_INSNS (2), /* mulsi_const */
585 COSTS_N_INSNS (2), /* mulsi_const9 */
586 COSTS_N_INSNS (3), /* muldi */
587 COSTS_N_INSNS (34), /* divsi */
588 COSTS_N_INSNS (34), /* divdi */
589 COSTS_N_INSNS (5), /* fp */
590 COSTS_N_INSNS (5), /* dmul */
591 COSTS_N_INSNS (19), /* sdiv */
592 COSTS_N_INSNS (33), /* ddiv */
593 32, /* cache line size */
594 32, /* l1 cache */
595 256, /* l2 cache */
596 1, /* streams */
597 };
598
599 /* Instruction costs on PPC476 processors. */
600 static const
601 struct processor_costs ppc476_cost = {
602 COSTS_N_INSNS (4), /* mulsi */
603 COSTS_N_INSNS (4), /* mulsi_const */
604 COSTS_N_INSNS (4), /* mulsi_const9 */
605 COSTS_N_INSNS (4), /* muldi */
606 COSTS_N_INSNS (11), /* divsi */
607 COSTS_N_INSNS (11), /* divdi */
608 COSTS_N_INSNS (6), /* fp */
609 COSTS_N_INSNS (6), /* dmul */
610 COSTS_N_INSNS (19), /* sdiv */
611 COSTS_N_INSNS (33), /* ddiv */
612 32, /* l1 cache line size */
613 32, /* l1 cache */
614 512, /* l2 cache */
615 1, /* streams */
616 };
617
618 /* Instruction costs on PPC601 processors. */
619 static const
620 struct processor_costs ppc601_cost = {
621 COSTS_N_INSNS (5), /* mulsi */
622 COSTS_N_INSNS (5), /* mulsi_const */
623 COSTS_N_INSNS (5), /* mulsi_const9 */
624 COSTS_N_INSNS (5), /* muldi */
625 COSTS_N_INSNS (36), /* divsi */
626 COSTS_N_INSNS (36), /* divdi */
627 COSTS_N_INSNS (4), /* fp */
628 COSTS_N_INSNS (5), /* dmul */
629 COSTS_N_INSNS (17), /* sdiv */
630 COSTS_N_INSNS (31), /* ddiv */
631 32, /* cache line size */
632 32, /* l1 cache */
633 256, /* l2 cache */
634 1, /* streams */
635 };
636
637 /* Instruction costs on PPC603 processors. */
638 static const
639 struct processor_costs ppc603_cost = {
640 COSTS_N_INSNS (5), /* mulsi */
641 COSTS_N_INSNS (3), /* mulsi_const */
642 COSTS_N_INSNS (2), /* mulsi_const9 */
643 COSTS_N_INSNS (5), /* muldi */
644 COSTS_N_INSNS (37), /* divsi */
645 COSTS_N_INSNS (37), /* divdi */
646 COSTS_N_INSNS (3), /* fp */
647 COSTS_N_INSNS (4), /* dmul */
648 COSTS_N_INSNS (18), /* sdiv */
649 COSTS_N_INSNS (33), /* ddiv */
650 32, /* cache line size */
651 8, /* l1 cache */
652 64, /* l2 cache */
653 1, /* streams */
654 };
655
656 /* Instruction costs on PPC604 processors. */
657 static const
658 struct processor_costs ppc604_cost = {
659 COSTS_N_INSNS (4), /* mulsi */
660 COSTS_N_INSNS (4), /* mulsi_const */
661 COSTS_N_INSNS (4), /* mulsi_const9 */
662 COSTS_N_INSNS (4), /* muldi */
663 COSTS_N_INSNS (20), /* divsi */
664 COSTS_N_INSNS (20), /* divdi */
665 COSTS_N_INSNS (3), /* fp */
666 COSTS_N_INSNS (3), /* dmul */
667 COSTS_N_INSNS (18), /* sdiv */
668 COSTS_N_INSNS (32), /* ddiv */
669 32, /* cache line size */
670 16, /* l1 cache */
671 512, /* l2 cache */
672 1, /* streams */
673 };
674
675 /* Instruction costs on PPC604e processors. */
676 static const
677 struct processor_costs ppc604e_cost = {
678 COSTS_N_INSNS (2), /* mulsi */
679 COSTS_N_INSNS (2), /* mulsi_const */
680 COSTS_N_INSNS (2), /* mulsi_const9 */
681 COSTS_N_INSNS (2), /* muldi */
682 COSTS_N_INSNS (20), /* divsi */
683 COSTS_N_INSNS (20), /* divdi */
684 COSTS_N_INSNS (3), /* fp */
685 COSTS_N_INSNS (3), /* dmul */
686 COSTS_N_INSNS (18), /* sdiv */
687 COSTS_N_INSNS (32), /* ddiv */
688 32, /* cache line size */
689 32, /* l1 cache */
690 1024, /* l2 cache */
691 1, /* streams */
692 };
693
694 /* Instruction costs on PPC620 processors. */
695 static const
696 struct processor_costs ppc620_cost = {
697 COSTS_N_INSNS (5), /* mulsi */
698 COSTS_N_INSNS (4), /* mulsi_const */
699 COSTS_N_INSNS (3), /* mulsi_const9 */
700 COSTS_N_INSNS (7), /* muldi */
701 COSTS_N_INSNS (21), /* divsi */
702 COSTS_N_INSNS (37), /* divdi */
703 COSTS_N_INSNS (3), /* fp */
704 COSTS_N_INSNS (3), /* dmul */
705 COSTS_N_INSNS (18), /* sdiv */
706 COSTS_N_INSNS (32), /* ddiv */
707 128, /* cache line size */
708 32, /* l1 cache */
709 1024, /* l2 cache */
710 1, /* streams */
711 };
712
713 /* Instruction costs on PPC630 processors. */
714 static const
715 struct processor_costs ppc630_cost = {
716 COSTS_N_INSNS (5), /* mulsi */
717 COSTS_N_INSNS (4), /* mulsi_const */
718 COSTS_N_INSNS (3), /* mulsi_const9 */
719 COSTS_N_INSNS (7), /* muldi */
720 COSTS_N_INSNS (21), /* divsi */
721 COSTS_N_INSNS (37), /* divdi */
722 COSTS_N_INSNS (3), /* fp */
723 COSTS_N_INSNS (3), /* dmul */
724 COSTS_N_INSNS (17), /* sdiv */
725 COSTS_N_INSNS (21), /* ddiv */
726 128, /* cache line size */
727 64, /* l1 cache */
728 1024, /* l2 cache */
729 1, /* streams */
730 };
731
732 /* Instruction costs on Cell processor. */
733 /* COSTS_N_INSNS (1) ~ one add. */
734 static const
735 struct processor_costs ppccell_cost = {
736 COSTS_N_INSNS (9/2)+2, /* mulsi */
737 COSTS_N_INSNS (6/2), /* mulsi_const */
738 COSTS_N_INSNS (6/2), /* mulsi_const9 */
739 COSTS_N_INSNS (15/2)+2, /* muldi */
740 COSTS_N_INSNS (38/2), /* divsi */
741 COSTS_N_INSNS (70/2), /* divdi */
742 COSTS_N_INSNS (10/2), /* fp */
743 COSTS_N_INSNS (10/2), /* dmul */
744 COSTS_N_INSNS (74/2), /* sdiv */
745 COSTS_N_INSNS (74/2), /* ddiv */
746 128, /* cache line size */
747 32, /* l1 cache */
748 512, /* l2 cache */
749 6, /* streams */
750 };
751
752 /* Instruction costs on PPC750 and PPC7400 processors. */
753 static const
754 struct processor_costs ppc750_cost = {
755 COSTS_N_INSNS (5), /* mulsi */
756 COSTS_N_INSNS (3), /* mulsi_const */
757 COSTS_N_INSNS (2), /* mulsi_const9 */
758 COSTS_N_INSNS (5), /* muldi */
759 COSTS_N_INSNS (17), /* divsi */
760 COSTS_N_INSNS (17), /* divdi */
761 COSTS_N_INSNS (3), /* fp */
762 COSTS_N_INSNS (3), /* dmul */
763 COSTS_N_INSNS (17), /* sdiv */
764 COSTS_N_INSNS (31), /* ddiv */
765 32, /* cache line size */
766 32, /* l1 cache */
767 512, /* l2 cache */
768 1, /* streams */
769 };
770
771 /* Instruction costs on PPC7450 processors. */
772 static const
773 struct processor_costs ppc7450_cost = {
774 COSTS_N_INSNS (4), /* mulsi */
775 COSTS_N_INSNS (3), /* mulsi_const */
776 COSTS_N_INSNS (3), /* mulsi_const9 */
777 COSTS_N_INSNS (4), /* muldi */
778 COSTS_N_INSNS (23), /* divsi */
779 COSTS_N_INSNS (23), /* divdi */
780 COSTS_N_INSNS (5), /* fp */
781 COSTS_N_INSNS (5), /* dmul */
782 COSTS_N_INSNS (21), /* sdiv */
783 COSTS_N_INSNS (35), /* ddiv */
784 32, /* cache line size */
785 32, /* l1 cache */
786 1024, /* l2 cache */
787 1, /* streams */
788 };
789
790 /* Instruction costs on PPC8540 processors. */
791 static const
792 struct processor_costs ppc8540_cost = {
793 COSTS_N_INSNS (4), /* mulsi */
794 COSTS_N_INSNS (4), /* mulsi_const */
795 COSTS_N_INSNS (4), /* mulsi_const9 */
796 COSTS_N_INSNS (4), /* muldi */
797 COSTS_N_INSNS (19), /* divsi */
798 COSTS_N_INSNS (19), /* divdi */
799 COSTS_N_INSNS (4), /* fp */
800 COSTS_N_INSNS (4), /* dmul */
801 COSTS_N_INSNS (29), /* sdiv */
802 COSTS_N_INSNS (29), /* ddiv */
803 32, /* cache line size */
804 32, /* l1 cache */
805 256, /* l2 cache */
806 1, /* prefetch streams /*/
807 };
808
809 /* Instruction costs on E300C2 and E300C3 cores. */
810 static const
811 struct processor_costs ppce300c2c3_cost = {
812 COSTS_N_INSNS (4), /* mulsi */
813 COSTS_N_INSNS (4), /* mulsi_const */
814 COSTS_N_INSNS (4), /* mulsi_const9 */
815 COSTS_N_INSNS (4), /* muldi */
816 COSTS_N_INSNS (19), /* divsi */
817 COSTS_N_INSNS (19), /* divdi */
818 COSTS_N_INSNS (3), /* fp */
819 COSTS_N_INSNS (4), /* dmul */
820 COSTS_N_INSNS (18), /* sdiv */
821 COSTS_N_INSNS (33), /* ddiv */
822 32,
823 16, /* l1 cache */
824 16, /* l2 cache */
825 1, /* prefetch streams /*/
826 };
827
828 /* Instruction costs on PPCE500MC processors. */
829 static const
830 struct processor_costs ppce500mc_cost = {
831 COSTS_N_INSNS (4), /* mulsi */
832 COSTS_N_INSNS (4), /* mulsi_const */
833 COSTS_N_INSNS (4), /* mulsi_const9 */
834 COSTS_N_INSNS (4), /* muldi */
835 COSTS_N_INSNS (14), /* divsi */
836 COSTS_N_INSNS (14), /* divdi */
837 COSTS_N_INSNS (8), /* fp */
838 COSTS_N_INSNS (10), /* dmul */
839 COSTS_N_INSNS (36), /* sdiv */
840 COSTS_N_INSNS (66), /* ddiv */
841 64, /* cache line size */
842 32, /* l1 cache */
843 128, /* l2 cache */
844 1, /* prefetch streams /*/
845 };
846
847 /* Instruction costs on PPCE500MC64 processors. */
848 static const
849 struct processor_costs ppce500mc64_cost = {
850 COSTS_N_INSNS (4), /* mulsi */
851 COSTS_N_INSNS (4), /* mulsi_const */
852 COSTS_N_INSNS (4), /* mulsi_const9 */
853 COSTS_N_INSNS (4), /* muldi */
854 COSTS_N_INSNS (14), /* divsi */
855 COSTS_N_INSNS (14), /* divdi */
856 COSTS_N_INSNS (4), /* fp */
857 COSTS_N_INSNS (10), /* dmul */
858 COSTS_N_INSNS (36), /* sdiv */
859 COSTS_N_INSNS (66), /* ddiv */
860 64, /* cache line size */
861 32, /* l1 cache */
862 128, /* l2 cache */
863 1, /* prefetch streams /*/
864 };
865
866 /* Instruction costs on PPCE5500 processors. */
867 static const
868 struct processor_costs ppce5500_cost = {
869 COSTS_N_INSNS (5), /* mulsi */
870 COSTS_N_INSNS (5), /* mulsi_const */
871 COSTS_N_INSNS (4), /* mulsi_const9 */
872 COSTS_N_INSNS (5), /* muldi */
873 COSTS_N_INSNS (14), /* divsi */
874 COSTS_N_INSNS (14), /* divdi */
875 COSTS_N_INSNS (7), /* fp */
876 COSTS_N_INSNS (10), /* dmul */
877 COSTS_N_INSNS (36), /* sdiv */
878 COSTS_N_INSNS (66), /* ddiv */
879 64, /* cache line size */
880 32, /* l1 cache */
881 128, /* l2 cache */
882 1, /* prefetch streams /*/
883 };
884
885 /* Instruction costs on PPCE6500 processors. */
886 static const
887 struct processor_costs ppce6500_cost = {
888 COSTS_N_INSNS (5), /* mulsi */
889 COSTS_N_INSNS (5), /* mulsi_const */
890 COSTS_N_INSNS (4), /* mulsi_const9 */
891 COSTS_N_INSNS (5), /* muldi */
892 COSTS_N_INSNS (14), /* divsi */
893 COSTS_N_INSNS (14), /* divdi */
894 COSTS_N_INSNS (7), /* fp */
895 COSTS_N_INSNS (10), /* dmul */
896 COSTS_N_INSNS (36), /* sdiv */
897 COSTS_N_INSNS (66), /* ddiv */
898 64, /* cache line size */
899 32, /* l1 cache */
900 128, /* l2 cache */
901 1, /* prefetch streams /*/
902 };
903
904 /* Instruction costs on AppliedMicro Titan processors. */
905 static const
906 struct processor_costs titan_cost = {
907 COSTS_N_INSNS (5), /* mulsi */
908 COSTS_N_INSNS (5), /* mulsi_const */
909 COSTS_N_INSNS (5), /* mulsi_const9 */
910 COSTS_N_INSNS (5), /* muldi */
911 COSTS_N_INSNS (18), /* divsi */
912 COSTS_N_INSNS (18), /* divdi */
913 COSTS_N_INSNS (10), /* fp */
914 COSTS_N_INSNS (10), /* dmul */
915 COSTS_N_INSNS (46), /* sdiv */
916 COSTS_N_INSNS (72), /* ddiv */
917 32, /* cache line size */
918 32, /* l1 cache */
919 512, /* l2 cache */
920 1, /* prefetch streams /*/
921 };
922
923 /* Instruction costs on POWER4 and POWER5 processors. */
924 static const
925 struct processor_costs power4_cost = {
926 COSTS_N_INSNS (3), /* mulsi */
927 COSTS_N_INSNS (2), /* mulsi_const */
928 COSTS_N_INSNS (2), /* mulsi_const9 */
929 COSTS_N_INSNS (4), /* muldi */
930 COSTS_N_INSNS (18), /* divsi */
931 COSTS_N_INSNS (34), /* divdi */
932 COSTS_N_INSNS (3), /* fp */
933 COSTS_N_INSNS (3), /* dmul */
934 COSTS_N_INSNS (17), /* sdiv */
935 COSTS_N_INSNS (17), /* ddiv */
936 128, /* cache line size */
937 32, /* l1 cache */
938 1024, /* l2 cache */
939 8, /* prefetch streams /*/
940 };
941
942 /* Instruction costs on POWER6 processors. */
943 static const
944 struct processor_costs power6_cost = {
945 COSTS_N_INSNS (8), /* mulsi */
946 COSTS_N_INSNS (8), /* mulsi_const */
947 COSTS_N_INSNS (8), /* mulsi_const9 */
948 COSTS_N_INSNS (8), /* muldi */
949 COSTS_N_INSNS (22), /* divsi */
950 COSTS_N_INSNS (28), /* divdi */
951 COSTS_N_INSNS (3), /* fp */
952 COSTS_N_INSNS (3), /* dmul */
953 COSTS_N_INSNS (13), /* sdiv */
954 COSTS_N_INSNS (16), /* ddiv */
955 128, /* cache line size */
956 64, /* l1 cache */
957 2048, /* l2 cache */
958 16, /* prefetch streams */
959 };
960
961 /* Instruction costs on POWER7 processors. */
962 static const
963 struct processor_costs power7_cost = {
964 COSTS_N_INSNS (2), /* mulsi */
965 COSTS_N_INSNS (2), /* mulsi_const */
966 COSTS_N_INSNS (2), /* mulsi_const9 */
967 COSTS_N_INSNS (2), /* muldi */
968 COSTS_N_INSNS (18), /* divsi */
969 COSTS_N_INSNS (34), /* divdi */
970 COSTS_N_INSNS (3), /* fp */
971 COSTS_N_INSNS (3), /* dmul */
972 COSTS_N_INSNS (13), /* sdiv */
973 COSTS_N_INSNS (16), /* ddiv */
974 128, /* cache line size */
975 32, /* l1 cache */
976 256, /* l2 cache */
977 12, /* prefetch streams */
978 };
979
980 /* Instruction costs on POWER8 processors. */
981 static const
982 struct processor_costs power8_cost = {
983 COSTS_N_INSNS (3), /* mulsi */
984 COSTS_N_INSNS (3), /* mulsi_const */
985 COSTS_N_INSNS (3), /* mulsi_const9 */
986 COSTS_N_INSNS (3), /* muldi */
987 COSTS_N_INSNS (19), /* divsi */
988 COSTS_N_INSNS (35), /* divdi */
989 COSTS_N_INSNS (3), /* fp */
990 COSTS_N_INSNS (3), /* dmul */
991 COSTS_N_INSNS (14), /* sdiv */
992 COSTS_N_INSNS (17), /* ddiv */
993 128, /* cache line size */
994 32, /* l1 cache */
995 256, /* l2 cache */
996 12, /* prefetch streams */
997 };
998
999 /* Instruction costs on POWER A2 processors. */
1000 static const
1001 struct processor_costs ppca2_cost = {
1002 COSTS_N_INSNS (16), /* mulsi */
1003 COSTS_N_INSNS (16), /* mulsi_const */
1004 COSTS_N_INSNS (16), /* mulsi_const9 */
1005 COSTS_N_INSNS (16), /* muldi */
1006 COSTS_N_INSNS (22), /* divsi */
1007 COSTS_N_INSNS (28), /* divdi */
1008 COSTS_N_INSNS (3), /* fp */
1009 COSTS_N_INSNS (3), /* dmul */
1010 COSTS_N_INSNS (59), /* sdiv */
1011 COSTS_N_INSNS (72), /* ddiv */
1012 64,
1013 16, /* l1 cache */
1014 2048, /* l2 cache */
1015 16, /* prefetch streams */
1016 };
1017
1018 \f
1019 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1020 #undef RS6000_BUILTIN_1
1021 #undef RS6000_BUILTIN_2
1022 #undef RS6000_BUILTIN_3
1023 #undef RS6000_BUILTIN_A
1024 #undef RS6000_BUILTIN_D
1025 #undef RS6000_BUILTIN_E
1026 #undef RS6000_BUILTIN_H
1027 #undef RS6000_BUILTIN_P
1028 #undef RS6000_BUILTIN_Q
1029 #undef RS6000_BUILTIN_S
1030 #undef RS6000_BUILTIN_X
1031
1032 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1033 { NAME, ICODE, MASK, ATTR },
1034
1035 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1036 { NAME, ICODE, MASK, ATTR },
1037
1038 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1039 { NAME, ICODE, MASK, ATTR },
1040
1041 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1042 { NAME, ICODE, MASK, ATTR },
1043
1044 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1045 { NAME, ICODE, MASK, ATTR },
1046
1047 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
1048 { NAME, ICODE, MASK, ATTR },
1049
1050 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1051 { NAME, ICODE, MASK, ATTR },
1052
1053 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1054 { NAME, ICODE, MASK, ATTR },
1055
1056 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1057 { NAME, ICODE, MASK, ATTR },
1058
1059 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
1060 { NAME, ICODE, MASK, ATTR },
1061
1062 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1063 { NAME, ICODE, MASK, ATTR },
1064
1065 struct rs6000_builtin_info_type {
1066 const char *name;
1067 const enum insn_code icode;
1068 const HOST_WIDE_INT mask;
1069 const unsigned attr;
1070 };
1071
1072 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1073 {
1074 #include "rs6000-builtin.def"
1075 };
1076
1077 #undef RS6000_BUILTIN_1
1078 #undef RS6000_BUILTIN_2
1079 #undef RS6000_BUILTIN_3
1080 #undef RS6000_BUILTIN_A
1081 #undef RS6000_BUILTIN_D
1082 #undef RS6000_BUILTIN_E
1083 #undef RS6000_BUILTIN_H
1084 #undef RS6000_BUILTIN_P
1085 #undef RS6000_BUILTIN_Q
1086 #undef RS6000_BUILTIN_S
1087 #undef RS6000_BUILTIN_X
1088
1089 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1090 static tree (*rs6000_veclib_handler) (tree, tree, tree);
1091
1092 \f
1093 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1094 static bool spe_func_has_64bit_regs_p (void);
1095 static struct machine_function * rs6000_init_machine_status (void);
1096 static int rs6000_ra_ever_killed (void);
1097 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1098 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1099 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1100 static tree rs6000_builtin_vectorized_libmass (tree, tree, tree);
1101 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1102 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1103 static bool rs6000_debug_rtx_costs (rtx, int, int, int, int *, bool);
1104 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1105 bool);
1106 static int rs6000_debug_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
1107 static bool is_microcoded_insn (rtx_insn *);
1108 static bool is_nonpipeline_insn (rtx_insn *);
1109 static bool is_cracked_insn (rtx_insn *);
1110 static bool is_load_insn (rtx, rtx *);
1111 static bool is_store_insn (rtx, rtx *);
1112 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1113 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1114 static bool insn_must_be_first_in_group (rtx_insn *);
1115 static bool insn_must_be_last_in_group (rtx_insn *);
1116 static void altivec_init_builtins (void);
1117 static tree builtin_function_type (machine_mode, machine_mode,
1118 machine_mode, machine_mode,
1119 enum rs6000_builtins, const char *name);
1120 static void rs6000_common_init_builtins (void);
1121 static void paired_init_builtins (void);
1122 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
1123 static void spe_init_builtins (void);
1124 static void htm_init_builtins (void);
1125 static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
1126 static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
1127 static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
1128 static rs6000_stack_t *rs6000_stack_info (void);
1129 static void is_altivec_return_reg (rtx, void *);
1130 int easy_vector_constant (rtx, machine_mode);
1131 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1132 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1133 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1134 bool, bool);
1135 #if TARGET_MACHO
1136 static void macho_branch_islands (void);
1137 #endif
1138 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1139 int, int *);
1140 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1141 int, int, int *);
1142 static bool rs6000_mode_dependent_address (const_rtx);
1143 static bool rs6000_debug_mode_dependent_address (const_rtx);
1144 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1145 machine_mode, rtx);
1146 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1147 machine_mode,
1148 rtx);
1149 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1150 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1151 enum reg_class);
1152 static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
1153 machine_mode);
1154 static bool rs6000_debug_secondary_memory_needed (enum reg_class,
1155 enum reg_class,
1156 machine_mode);
1157 static bool rs6000_cannot_change_mode_class (machine_mode,
1158 machine_mode,
1159 enum reg_class);
1160 static bool rs6000_debug_cannot_change_mode_class (machine_mode,
1161 machine_mode,
1162 enum reg_class);
1163 static bool rs6000_save_toc_in_prologue_p (void);
1164
1165 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1166 int, int *)
1167 = rs6000_legitimize_reload_address;
1168
1169 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1170 = rs6000_mode_dependent_address;
1171
1172 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1173 machine_mode, rtx)
1174 = rs6000_secondary_reload_class;
1175
1176 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1177 = rs6000_preferred_reload_class;
1178
1179 bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class,
1180 machine_mode)
1181 = rs6000_secondary_memory_needed;
1182
1183 bool (*rs6000_cannot_change_mode_class_ptr) (machine_mode,
1184 machine_mode,
1185 enum reg_class)
1186 = rs6000_cannot_change_mode_class;
1187
1188 const int INSN_NOT_AVAILABLE = -1;
1189
1190 static void rs6000_print_isa_options (FILE *, int, const char *,
1191 HOST_WIDE_INT);
1192 static void rs6000_print_builtin_options (FILE *, int, const char *,
1193 HOST_WIDE_INT);
1194
1195 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1196 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1197 enum rs6000_reg_type,
1198 machine_mode,
1199 secondary_reload_info *,
1200 bool);
1201 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1202
1203 /* Hash table stuff for keeping track of TOC entries. */
1204
1205 struct GTY((for_user)) toc_hash_struct
1206 {
1207 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1208 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1209 rtx key;
1210 machine_mode key_mode;
1211 int labelno;
1212 };
1213
1214 struct toc_hasher : ggc_hasher<toc_hash_struct *>
1215 {
1216 static hashval_t hash (toc_hash_struct *);
1217 static bool equal (toc_hash_struct *, toc_hash_struct *);
1218 };
1219
1220 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1221
1222 /* Hash table to keep track of the argument types for builtin functions. */
1223
1224 struct GTY((for_user)) builtin_hash_struct
1225 {
1226 tree type;
1227 machine_mode mode[4]; /* return value + 3 arguments. */
1228 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1229 };
1230
1231 struct builtin_hasher : ggc_hasher<builtin_hash_struct *>
1232 {
1233 static hashval_t hash (builtin_hash_struct *);
1234 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1235 };
1236
1237 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1238
1239 \f
1240 /* Default register names. */
1241 char rs6000_reg_names[][8] =
1242 {
1243 "0", "1", "2", "3", "4", "5", "6", "7",
1244 "8", "9", "10", "11", "12", "13", "14", "15",
1245 "16", "17", "18", "19", "20", "21", "22", "23",
1246 "24", "25", "26", "27", "28", "29", "30", "31",
1247 "0", "1", "2", "3", "4", "5", "6", "7",
1248 "8", "9", "10", "11", "12", "13", "14", "15",
1249 "16", "17", "18", "19", "20", "21", "22", "23",
1250 "24", "25", "26", "27", "28", "29", "30", "31",
1251 "mq", "lr", "ctr","ap",
1252 "0", "1", "2", "3", "4", "5", "6", "7",
1253 "ca",
1254 /* AltiVec registers. */
1255 "0", "1", "2", "3", "4", "5", "6", "7",
1256 "8", "9", "10", "11", "12", "13", "14", "15",
1257 "16", "17", "18", "19", "20", "21", "22", "23",
1258 "24", "25", "26", "27", "28", "29", "30", "31",
1259 "vrsave", "vscr",
1260 /* SPE registers. */
1261 "spe_acc", "spefscr",
1262 /* Soft frame pointer. */
1263 "sfp",
1264 /* HTM SPR registers. */
1265 "tfhar", "tfiar", "texasr",
1266 /* SPE High registers. */
1267 "0", "1", "2", "3", "4", "5", "6", "7",
1268 "8", "9", "10", "11", "12", "13", "14", "15",
1269 "16", "17", "18", "19", "20", "21", "22", "23",
1270 "24", "25", "26", "27", "28", "29", "30", "31"
1271 };
1272
1273 #ifdef TARGET_REGNAMES
1274 static const char alt_reg_names[][8] =
1275 {
1276 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1277 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1278 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1279 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1280 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1281 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1282 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1283 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1284 "mq", "lr", "ctr", "ap",
1285 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1286 "ca",
1287 /* AltiVec registers. */
1288 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1289 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1290 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1291 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1292 "vrsave", "vscr",
1293 /* SPE registers. */
1294 "spe_acc", "spefscr",
1295 /* Soft frame pointer. */
1296 "sfp",
1297 /* HTM SPR registers. */
1298 "tfhar", "tfiar", "texasr",
1299 /* SPE High registers. */
1300 "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7",
1301 "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15",
1302 "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23",
1303 "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31"
1304 };
1305 #endif
1306
1307 /* Table of valid machine attributes. */
1308
1309 static const struct attribute_spec rs6000_attribute_table[] =
1310 {
1311 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1312 affects_type_identity } */
1313 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute,
1314 false },
1315 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1316 false },
1317 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1318 false },
1319 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1320 false },
1321 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1322 false },
1323 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1324 SUBTARGET_ATTRIBUTE_TABLE,
1325 #endif
1326 { NULL, 0, 0, false, false, false, NULL, false }
1327 };
1328 \f
1329 #ifndef TARGET_PROFILE_KERNEL
1330 #define TARGET_PROFILE_KERNEL 0
1331 #endif
1332
1333 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1334 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1335 \f
1336 /* Initialize the GCC target structure. */
1337 #undef TARGET_ATTRIBUTE_TABLE
1338 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1339 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1340 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1341 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1342 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1343
1344 #undef TARGET_ASM_ALIGNED_DI_OP
1345 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1346
1347 /* Default unaligned ops are only provided for ELF. Find the ops needed
1348 for non-ELF systems. */
1349 #ifndef OBJECT_FORMAT_ELF
1350 #if TARGET_XCOFF
1351 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1352 64-bit targets. */
1353 #undef TARGET_ASM_UNALIGNED_HI_OP
1354 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1355 #undef TARGET_ASM_UNALIGNED_SI_OP
1356 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1357 #undef TARGET_ASM_UNALIGNED_DI_OP
1358 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1359 #else
1360 /* For Darwin. */
1361 #undef TARGET_ASM_UNALIGNED_HI_OP
1362 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1363 #undef TARGET_ASM_UNALIGNED_SI_OP
1364 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1365 #undef TARGET_ASM_UNALIGNED_DI_OP
1366 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1367 #undef TARGET_ASM_ALIGNED_DI_OP
1368 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1369 #endif
1370 #endif
1371
1372 /* This hook deals with fixups for relocatable code and DI-mode objects
1373 in 64-bit code. */
1374 #undef TARGET_ASM_INTEGER
1375 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1376
1377 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1378 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1379 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1380 #endif
1381
1382 #undef TARGET_SET_UP_BY_PROLOGUE
1383 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1384
1385 #undef TARGET_HAVE_TLS
1386 #define TARGET_HAVE_TLS HAVE_AS_TLS
1387
1388 #undef TARGET_CANNOT_FORCE_CONST_MEM
1389 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1390
1391 #undef TARGET_DELEGITIMIZE_ADDRESS
1392 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1393
1394 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1395 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1396
1397 #undef TARGET_ASM_FUNCTION_PROLOGUE
1398 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1399 #undef TARGET_ASM_FUNCTION_EPILOGUE
1400 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1401
1402 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1403 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1404
1405 #undef TARGET_LEGITIMIZE_ADDRESS
1406 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1407
1408 #undef TARGET_SCHED_VARIABLE_ISSUE
1409 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1410
1411 #undef TARGET_SCHED_ISSUE_RATE
1412 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1413 #undef TARGET_SCHED_ADJUST_COST
1414 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1415 #undef TARGET_SCHED_ADJUST_PRIORITY
1416 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1417 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1418 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1419 #undef TARGET_SCHED_INIT
1420 #define TARGET_SCHED_INIT rs6000_sched_init
1421 #undef TARGET_SCHED_FINISH
1422 #define TARGET_SCHED_FINISH rs6000_sched_finish
1423 #undef TARGET_SCHED_REORDER
1424 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1425 #undef TARGET_SCHED_REORDER2
1426 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1427
1428 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1429 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1430
1431 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1432 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1433
1434 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1435 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1436 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1437 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1438 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1439 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1440 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1441 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1442
1443 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1444 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1445 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1446 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1447 rs6000_builtin_support_vector_misalignment
1448 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1449 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1450 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1451 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1452 rs6000_builtin_vectorization_cost
1453 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1454 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1455 rs6000_preferred_simd_mode
1456 #undef TARGET_VECTORIZE_INIT_COST
1457 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1458 #undef TARGET_VECTORIZE_ADD_STMT_COST
1459 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1460 #undef TARGET_VECTORIZE_FINISH_COST
1461 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1462 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1463 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1464
1465 #undef TARGET_INIT_BUILTINS
1466 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1467 #undef TARGET_BUILTIN_DECL
1468 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1469
1470 #undef TARGET_EXPAND_BUILTIN
1471 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1472
1473 #undef TARGET_MANGLE_TYPE
1474 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1475
1476 #undef TARGET_INIT_LIBFUNCS
1477 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1478
1479 #if TARGET_MACHO
1480 #undef TARGET_BINDS_LOCAL_P
1481 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1482 #endif
1483
1484 #undef TARGET_MS_BITFIELD_LAYOUT_P
1485 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1486
1487 #undef TARGET_ASM_OUTPUT_MI_THUNK
1488 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1489
1490 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1491 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1492
1493 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1494 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1495
1496 #undef TARGET_REGISTER_MOVE_COST
1497 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1498 #undef TARGET_MEMORY_MOVE_COST
1499 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1500 #undef TARGET_RTX_COSTS
1501 #define TARGET_RTX_COSTS rs6000_rtx_costs
1502 #undef TARGET_ADDRESS_COST
1503 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1504
1505 #undef TARGET_DWARF_REGISTER_SPAN
1506 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
1507
1508 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1509 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1510
1511 #undef TARGET_MEMBER_TYPE_FORCES_BLK
1512 #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
1513
1514 /* On rs6000, function arguments are promoted, as are function return
1515 values. */
1516 #undef TARGET_PROMOTE_FUNCTION_MODE
1517 #define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
1518
1519 #undef TARGET_RETURN_IN_MEMORY
1520 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1521
1522 #undef TARGET_RETURN_IN_MSB
1523 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1524
1525 #undef TARGET_SETUP_INCOMING_VARARGS
1526 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1527
1528 /* Always strict argument naming on rs6000. */
1529 #undef TARGET_STRICT_ARGUMENT_NAMING
1530 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1531 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1532 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1533 #undef TARGET_SPLIT_COMPLEX_ARG
1534 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1535 #undef TARGET_MUST_PASS_IN_STACK
1536 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1537 #undef TARGET_PASS_BY_REFERENCE
1538 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1539 #undef TARGET_ARG_PARTIAL_BYTES
1540 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1541 #undef TARGET_FUNCTION_ARG_ADVANCE
1542 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1543 #undef TARGET_FUNCTION_ARG
1544 #define TARGET_FUNCTION_ARG rs6000_function_arg
1545 #undef TARGET_FUNCTION_ARG_BOUNDARY
1546 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1547
1548 #undef TARGET_BUILD_BUILTIN_VA_LIST
1549 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1550
1551 #undef TARGET_EXPAND_BUILTIN_VA_START
1552 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1553
1554 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1555 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1556
1557 #undef TARGET_EH_RETURN_FILTER_MODE
1558 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1559
1560 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1561 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1562
1563 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1564 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1565
1566 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1567 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1568
1569 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1570 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1571
1572 #undef TARGET_MD_ASM_CLOBBERS
1573 #define TARGET_MD_ASM_CLOBBERS rs6000_md_asm_clobbers
1574
1575 #undef TARGET_OPTION_OVERRIDE
1576 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1577
1578 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1579 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1580 rs6000_builtin_vectorized_function
1581
1582 #if !TARGET_MACHO
1583 #undef TARGET_STACK_PROTECT_FAIL
1584 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1585 #endif
1586
1587 /* MPC604EUM 3.5.2 Weak Consistency between Multiple Processors
1588 The PowerPC architecture requires only weak consistency among
1589 processors--that is, memory accesses between processors need not be
1590 sequentially consistent and memory accesses among processors can occur
1591 in any order. The ability to order memory accesses weakly provides
1592 opportunities for more efficient use of the system bus. Unless a
1593 dependency exists, the 604e allows read operations to precede store
1594 operations. */
1595 #undef TARGET_RELAXED_ORDERING
1596 #define TARGET_RELAXED_ORDERING true
1597
1598 #ifdef HAVE_AS_TLS
1599 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1600 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1601 #endif
1602
1603 /* Use a 32-bit anchor range. This leads to sequences like:
1604
1605 addis tmp,anchor,high
1606 add dest,tmp,low
1607
1608 where tmp itself acts as an anchor, and can be shared between
1609 accesses to the same 64k page. */
1610 #undef TARGET_MIN_ANCHOR_OFFSET
1611 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1612 #undef TARGET_MAX_ANCHOR_OFFSET
1613 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1614 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1615 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1616 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1617 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1618
1619 #undef TARGET_BUILTIN_RECIPROCAL
1620 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1621
1622 #undef TARGET_EXPAND_TO_RTL_HOOK
1623 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1624
1625 #undef TARGET_INSTANTIATE_DECLS
1626 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1627
1628 #undef TARGET_SECONDARY_RELOAD
1629 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1630
1631 #undef TARGET_LEGITIMATE_ADDRESS_P
1632 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1633
1634 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1635 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1636
1637 #undef TARGET_LRA_P
1638 #define TARGET_LRA_P rs6000_lra_p
1639
1640 #undef TARGET_CAN_ELIMINATE
1641 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1642
1643 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1644 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1645
1646 #undef TARGET_TRAMPOLINE_INIT
1647 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1648
1649 #undef TARGET_FUNCTION_VALUE
1650 #define TARGET_FUNCTION_VALUE rs6000_function_value
1651
1652 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1653 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1654
1655 #undef TARGET_OPTION_SAVE
1656 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1657
1658 #undef TARGET_OPTION_RESTORE
1659 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1660
1661 #undef TARGET_OPTION_PRINT
1662 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1663
1664 #undef TARGET_CAN_INLINE_P
1665 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1666
1667 #undef TARGET_SET_CURRENT_FUNCTION
1668 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1669
1670 #undef TARGET_LEGITIMATE_CONSTANT_P
1671 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1672
1673 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1674 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1675
1676 #undef TARGET_CAN_USE_DOLOOP_P
1677 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1678
1679 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1680 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1681 \f
1682
1683 /* Processor table. */
1684 struct rs6000_ptt
1685 {
1686 const char *const name; /* Canonical processor name. */
1687 const enum processor_type processor; /* Processor type enum value. */
1688 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1689 };
1690
1691 static struct rs6000_ptt const processor_target_table[] =
1692 {
1693 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1694 #include "rs6000-cpus.def"
1695 #undef RS6000_CPU
1696 };
1697
1698 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1699 name is invalid. */
1700
1701 static int
1702 rs6000_cpu_name_lookup (const char *name)
1703 {
1704 size_t i;
1705
1706 if (name != NULL)
1707 {
1708 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1709 if (! strcmp (name, processor_target_table[i].name))
1710 return (int)i;
1711 }
1712
1713 return -1;
1714 }
1715
1716 \f
1717 /* Return number of consecutive hard regs needed starting at reg REGNO
1718 to hold something of mode MODE.
1719 This is ordinarily the length in words of a value of mode MODE
1720 but can be less for certain modes in special long registers.
1721
1722 For the SPE, GPRs are 64 bits but only 32 bits are visible in
1723 scalar instructions. The upper 32 bits are only available to the
1724 SIMD instructions.
1725
1726 POWER and PowerPC GPRs hold 32 bits worth;
1727 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1728
1729 static int
1730 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1731 {
1732 unsigned HOST_WIDE_INT reg_size;
1733
1734 /* TF/TD modes are special in that they always take 2 registers. */
1735 if (FP_REGNO_P (regno))
1736 reg_size = ((VECTOR_MEM_VSX_P (mode) && mode != TDmode && mode != TFmode)
1737 ? UNITS_PER_VSX_WORD
1738 : UNITS_PER_FP_WORD);
1739
1740 else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1741 reg_size = UNITS_PER_SPE_WORD;
1742
1743 else if (ALTIVEC_REGNO_P (regno))
1744 reg_size = UNITS_PER_ALTIVEC_WORD;
1745
1746 /* The value returned for SCmode in the E500 double case is 2 for
1747 ABI compatibility; storing an SCmode value in a single register
1748 would require function_arg and rs6000_spe_function_arg to handle
1749 SCmode so as to pass the value correctly in a pair of
1750 registers. */
1751 else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode
1752 && !DECIMAL_FLOAT_MODE_P (mode) && SPE_SIMD_REGNO_P (regno))
1753 reg_size = UNITS_PER_FP_WORD;
1754
1755 else
1756 reg_size = UNITS_PER_WORD;
1757
1758 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1759 }
1760
1761 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1762 MODE. */
1763 static int
1764 rs6000_hard_regno_mode_ok (int regno, machine_mode mode)
1765 {
1766 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1767
1768 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1769 register combinations, and use PTImode where we need to deal with quad
1770 word memory operations. Don't allow quad words in the argument or frame
1771 pointer registers, just registers 0..31. */
1772 if (mode == PTImode)
1773 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1774 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1775 && ((regno & 1) == 0));
1776
1777 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1778 implementations. Don't allow an item to be split between a FP register
1779 and an Altivec register. Allow TImode in all VSX registers if the user
1780 asked for it. */
1781 if (TARGET_VSX && VSX_REGNO_P (regno)
1782 && (VECTOR_MEM_VSX_P (mode)
1783 || reg_addr[mode].scalar_in_vmx_p
1784 || (TARGET_VSX_TIMODE && mode == TImode)
1785 || (TARGET_VADDUQM && mode == V1TImode)))
1786 {
1787 if (FP_REGNO_P (regno))
1788 return FP_REGNO_P (last_regno);
1789
1790 if (ALTIVEC_REGNO_P (regno))
1791 {
1792 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
1793 return 0;
1794
1795 return ALTIVEC_REGNO_P (last_regno);
1796 }
1797 }
1798
1799 /* The GPRs can hold any mode, but values bigger than one register
1800 cannot go past R31. */
1801 if (INT_REGNO_P (regno))
1802 return INT_REGNO_P (last_regno);
1803
1804 /* The float registers (except for VSX vector modes) can only hold floating
1805 modes and DImode. */
1806 if (FP_REGNO_P (regno))
1807 {
1808 if (SCALAR_FLOAT_MODE_P (mode)
1809 && (mode != TDmode || (regno % 2) == 0)
1810 && FP_REGNO_P (last_regno))
1811 return 1;
1812
1813 if (GET_MODE_CLASS (mode) == MODE_INT
1814 && GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1815 return 1;
1816
1817 if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
1818 && PAIRED_VECTOR_MODE (mode))
1819 return 1;
1820
1821 return 0;
1822 }
1823
1824 /* The CR register can only hold CC modes. */
1825 if (CR_REGNO_P (regno))
1826 return GET_MODE_CLASS (mode) == MODE_CC;
1827
1828 if (CA_REGNO_P (regno))
1829 return mode == Pmode || mode == SImode;
1830
1831 /* AltiVec only in AldyVec registers. */
1832 if (ALTIVEC_REGNO_P (regno))
1833 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1834 || mode == V1TImode);
1835
1836 /* ...but GPRs can hold SIMD data on the SPE in one register. */
1837 if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1838 return 1;
1839
1840 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1841 and it must be able to fit within the register set. */
1842
1843 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1844 }
1845
1846 /* Print interesting facts about registers. */
1847 static void
1848 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
1849 {
1850 int r, m;
1851
1852 for (r = first_regno; r <= last_regno; ++r)
1853 {
1854 const char *comma = "";
1855 int len;
1856
1857 if (first_regno == last_regno)
1858 fprintf (stderr, "%s:\t", reg_name);
1859 else
1860 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
1861
1862 len = 8;
1863 for (m = 0; m < NUM_MACHINE_MODES; ++m)
1864 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
1865 {
1866 if (len > 70)
1867 {
1868 fprintf (stderr, ",\n\t");
1869 len = 8;
1870 comma = "";
1871 }
1872
1873 if (rs6000_hard_regno_nregs[m][r] > 1)
1874 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
1875 rs6000_hard_regno_nregs[m][r]);
1876 else
1877 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
1878
1879 comma = ", ";
1880 }
1881
1882 if (call_used_regs[r])
1883 {
1884 if (len > 70)
1885 {
1886 fprintf (stderr, ",\n\t");
1887 len = 8;
1888 comma = "";
1889 }
1890
1891 len += fprintf (stderr, "%s%s", comma, "call-used");
1892 comma = ", ";
1893 }
1894
1895 if (fixed_regs[r])
1896 {
1897 if (len > 70)
1898 {
1899 fprintf (stderr, ",\n\t");
1900 len = 8;
1901 comma = "";
1902 }
1903
1904 len += fprintf (stderr, "%s%s", comma, "fixed");
1905 comma = ", ";
1906 }
1907
1908 if (len > 70)
1909 {
1910 fprintf (stderr, ",\n\t");
1911 comma = "";
1912 }
1913
1914 len += fprintf (stderr, "%sreg-class = %s", comma,
1915 reg_class_names[(int)rs6000_regno_regclass[r]]);
1916 comma = ", ";
1917
1918 if (len > 70)
1919 {
1920 fprintf (stderr, ",\n\t");
1921 comma = "";
1922 }
1923
1924 fprintf (stderr, "%sregno = %d\n", comma, r);
1925 }
1926 }
1927
1928 static const char *
1929 rs6000_debug_vector_unit (enum rs6000_vector v)
1930 {
1931 const char *ret;
1932
1933 switch (v)
1934 {
1935 case VECTOR_NONE: ret = "none"; break;
1936 case VECTOR_ALTIVEC: ret = "altivec"; break;
1937 case VECTOR_VSX: ret = "vsx"; break;
1938 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
1939 case VECTOR_PAIRED: ret = "paired"; break;
1940 case VECTOR_SPE: ret = "spe"; break;
1941 case VECTOR_OTHER: ret = "other"; break;
1942 default: ret = "unknown"; break;
1943 }
1944
1945 return ret;
1946 }
1947
1948 /* Inner function printing just the address mask for a particular reload
1949 register class. */
1950 DEBUG_FUNCTION char *
1951 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
1952 {
1953 static char ret[8];
1954 char *p = ret;
1955
1956 if ((mask & RELOAD_REG_VALID) != 0)
1957 *p++ = 'v';
1958 else if (keep_spaces)
1959 *p++ = ' ';
1960
1961 if ((mask & RELOAD_REG_MULTIPLE) != 0)
1962 *p++ = 'm';
1963 else if (keep_spaces)
1964 *p++ = ' ';
1965
1966 if ((mask & RELOAD_REG_INDEXED) != 0)
1967 *p++ = 'i';
1968 else if (keep_spaces)
1969 *p++ = ' ';
1970
1971 if ((mask & RELOAD_REG_OFFSET) != 0)
1972 *p++ = 'o';
1973 else if (keep_spaces)
1974 *p++ = ' ';
1975
1976 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
1977 *p++ = '+';
1978 else if (keep_spaces)
1979 *p++ = ' ';
1980
1981 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
1982 *p++ = '+';
1983 else if (keep_spaces)
1984 *p++ = ' ';
1985
1986 if ((mask & RELOAD_REG_AND_M16) != 0)
1987 *p++ = '&';
1988 else if (keep_spaces)
1989 *p++ = ' ';
1990
1991 *p = '\0';
1992
1993 return ret;
1994 }
1995
1996 /* Print the address masks in a human readble fashion. */
1997 DEBUG_FUNCTION void
1998 rs6000_debug_print_mode (ssize_t m)
1999 {
2000 ssize_t rc;
2001
2002 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2003 for (rc = 0; rc < N_RELOAD_REG; rc++)
2004 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2005 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2006
2007 if (rs6000_vector_unit[m] != VECTOR_NONE
2008 || rs6000_vector_mem[m] != VECTOR_NONE
2009 || (reg_addr[m].reload_store != CODE_FOR_nothing)
2010 || (reg_addr[m].reload_load != CODE_FOR_nothing)
2011 || reg_addr[m].scalar_in_vmx_p)
2012 {
2013 fprintf (stderr,
2014 " Vector-arith=%-10s Vector-mem=%-10s Reload=%c%c Upper=%c",
2015 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2016 rs6000_debug_vector_unit (rs6000_vector_mem[m]),
2017 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2018 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*',
2019 (reg_addr[m].scalar_in_vmx_p) ? 'y' : 'n');
2020 }
2021
2022 fputs ("\n", stderr);
2023 }
2024
2025 #define DEBUG_FMT_ID "%-32s= "
2026 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2027 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2028 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2029
2030 /* Print various interesting information with -mdebug=reg. */
2031 static void
2032 rs6000_debug_reg_global (void)
2033 {
2034 static const char *const tf[2] = { "false", "true" };
2035 const char *nl = (const char *)0;
2036 int m;
2037 size_t m1, m2, v;
2038 char costly_num[20];
2039 char nop_num[20];
2040 char flags_buffer[40];
2041 const char *costly_str;
2042 const char *nop_str;
2043 const char *trace_str;
2044 const char *abi_str;
2045 const char *cmodel_str;
2046 struct cl_target_option cl_opts;
2047
2048 /* Modes we want tieable information on. */
2049 static const machine_mode print_tieable_modes[] = {
2050 QImode,
2051 HImode,
2052 SImode,
2053 DImode,
2054 TImode,
2055 PTImode,
2056 SFmode,
2057 DFmode,
2058 TFmode,
2059 SDmode,
2060 DDmode,
2061 TDmode,
2062 V8QImode,
2063 V4HImode,
2064 V2SImode,
2065 V16QImode,
2066 V8HImode,
2067 V4SImode,
2068 V2DImode,
2069 V1TImode,
2070 V32QImode,
2071 V16HImode,
2072 V8SImode,
2073 V4DImode,
2074 V2TImode,
2075 V2SFmode,
2076 V4SFmode,
2077 V2DFmode,
2078 V8SFmode,
2079 V4DFmode,
2080 CCmode,
2081 CCUNSmode,
2082 CCEQmode,
2083 };
2084
2085 /* Virtual regs we are interested in. */
2086 const static struct {
2087 int regno; /* register number. */
2088 const char *name; /* register name. */
2089 } virtual_regs[] = {
2090 { STACK_POINTER_REGNUM, "stack pointer:" },
2091 { TOC_REGNUM, "toc: " },
2092 { STATIC_CHAIN_REGNUM, "static chain: " },
2093 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2094 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2095 { ARG_POINTER_REGNUM, "arg pointer: " },
2096 { FRAME_POINTER_REGNUM, "frame pointer:" },
2097 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2098 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2099 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2100 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2101 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2102 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2103 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2104 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2105 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2106 };
2107
2108 fputs ("\nHard register information:\n", stderr);
2109 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2110 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2111 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2112 LAST_ALTIVEC_REGNO,
2113 "vs");
2114 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2115 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2116 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2117 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2118 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2119 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2120 rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
2121 rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
2122
2123 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2124 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2125 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2126
2127 fprintf (stderr,
2128 "\n"
2129 "d reg_class = %s\n"
2130 "f reg_class = %s\n"
2131 "v reg_class = %s\n"
2132 "wa reg_class = %s\n"
2133 "wd reg_class = %s\n"
2134 "wf reg_class = %s\n"
2135 "wg reg_class = %s\n"
2136 "wh reg_class = %s\n"
2137 "wi reg_class = %s\n"
2138 "wj reg_class = %s\n"
2139 "wk reg_class = %s\n"
2140 "wl reg_class = %s\n"
2141 "wm reg_class = %s\n"
2142 "wr reg_class = %s\n"
2143 "ws reg_class = %s\n"
2144 "wt reg_class = %s\n"
2145 "wu reg_class = %s\n"
2146 "wv reg_class = %s\n"
2147 "ww reg_class = %s\n"
2148 "wx reg_class = %s\n"
2149 "wy reg_class = %s\n"
2150 "wz reg_class = %s\n"
2151 "\n",
2152 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2153 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2154 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2155 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2156 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2157 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2158 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2159 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2160 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2161 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2162 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2163 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2164 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2165 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2166 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2167 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2168 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2169 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2170 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2171 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2172 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2173 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]);
2174
2175 nl = "\n";
2176 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2177 rs6000_debug_print_mode (m);
2178
2179 fputs ("\n", stderr);
2180
2181 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2182 {
2183 machine_mode mode1 = print_tieable_modes[m1];
2184 bool first_time = true;
2185
2186 nl = (const char *)0;
2187 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2188 {
2189 machine_mode mode2 = print_tieable_modes[m2];
2190 if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
2191 {
2192 if (first_time)
2193 {
2194 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2195 nl = "\n";
2196 first_time = false;
2197 }
2198
2199 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2200 }
2201 }
2202
2203 if (!first_time)
2204 fputs ("\n", stderr);
2205 }
2206
2207 if (nl)
2208 fputs (nl, stderr);
2209
2210 if (rs6000_recip_control)
2211 {
2212 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2213
2214 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2215 if (rs6000_recip_bits[m])
2216 {
2217 fprintf (stderr,
2218 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2219 GET_MODE_NAME (m),
2220 (RS6000_RECIP_AUTO_RE_P (m)
2221 ? "auto"
2222 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2223 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2224 ? "auto"
2225 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2226 }
2227
2228 fputs ("\n", stderr);
2229 }
2230
2231 if (rs6000_cpu_index >= 0)
2232 {
2233 const char *name = processor_target_table[rs6000_cpu_index].name;
2234 HOST_WIDE_INT flags
2235 = processor_target_table[rs6000_cpu_index].target_enable;
2236
2237 sprintf (flags_buffer, "-mcpu=%s flags", name);
2238 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2239 }
2240 else
2241 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2242
2243 if (rs6000_tune_index >= 0)
2244 {
2245 const char *name = processor_target_table[rs6000_tune_index].name;
2246 HOST_WIDE_INT flags
2247 = processor_target_table[rs6000_tune_index].target_enable;
2248
2249 sprintf (flags_buffer, "-mtune=%s flags", name);
2250 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2251 }
2252 else
2253 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2254
2255 cl_target_option_save (&cl_opts, &global_options);
2256 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2257 rs6000_isa_flags);
2258
2259 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2260 rs6000_isa_flags_explicit);
2261
2262 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2263 rs6000_builtin_mask);
2264
2265 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2266
2267 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2268 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2269
2270 switch (rs6000_sched_costly_dep)
2271 {
2272 case max_dep_latency:
2273 costly_str = "max_dep_latency";
2274 break;
2275
2276 case no_dep_costly:
2277 costly_str = "no_dep_costly";
2278 break;
2279
2280 case all_deps_costly:
2281 costly_str = "all_deps_costly";
2282 break;
2283
2284 case true_store_to_load_dep_costly:
2285 costly_str = "true_store_to_load_dep_costly";
2286 break;
2287
2288 case store_to_load_dep_costly:
2289 costly_str = "store_to_load_dep_costly";
2290 break;
2291
2292 default:
2293 costly_str = costly_num;
2294 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2295 break;
2296 }
2297
2298 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2299
2300 switch (rs6000_sched_insert_nops)
2301 {
2302 case sched_finish_regroup_exact:
2303 nop_str = "sched_finish_regroup_exact";
2304 break;
2305
2306 case sched_finish_pad_groups:
2307 nop_str = "sched_finish_pad_groups";
2308 break;
2309
2310 case sched_finish_none:
2311 nop_str = "sched_finish_none";
2312 break;
2313
2314 default:
2315 nop_str = nop_num;
2316 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2317 break;
2318 }
2319
2320 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2321
2322 switch (rs6000_sdata)
2323 {
2324 default:
2325 case SDATA_NONE:
2326 break;
2327
2328 case SDATA_DATA:
2329 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2330 break;
2331
2332 case SDATA_SYSV:
2333 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2334 break;
2335
2336 case SDATA_EABI:
2337 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2338 break;
2339
2340 }
2341
2342 switch (rs6000_traceback)
2343 {
2344 case traceback_default: trace_str = "default"; break;
2345 case traceback_none: trace_str = "none"; break;
2346 case traceback_part: trace_str = "part"; break;
2347 case traceback_full: trace_str = "full"; break;
2348 default: trace_str = "unknown"; break;
2349 }
2350
2351 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2352
2353 switch (rs6000_current_cmodel)
2354 {
2355 case CMODEL_SMALL: cmodel_str = "small"; break;
2356 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2357 case CMODEL_LARGE: cmodel_str = "large"; break;
2358 default: cmodel_str = "unknown"; break;
2359 }
2360
2361 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2362
2363 switch (rs6000_current_abi)
2364 {
2365 case ABI_NONE: abi_str = "none"; break;
2366 case ABI_AIX: abi_str = "aix"; break;
2367 case ABI_ELFv2: abi_str = "ELFv2"; break;
2368 case ABI_V4: abi_str = "V4"; break;
2369 case ABI_DARWIN: abi_str = "darwin"; break;
2370 default: abi_str = "unknown"; break;
2371 }
2372
2373 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2374
2375 if (rs6000_altivec_abi)
2376 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2377
2378 if (rs6000_spe_abi)
2379 fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true");
2380
2381 if (rs6000_darwin64_abi)
2382 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2383
2384 if (rs6000_float_gprs)
2385 fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
2386
2387 fprintf (stderr, DEBUG_FMT_S, "fprs",
2388 (TARGET_FPRS ? "true" : "false"));
2389
2390 fprintf (stderr, DEBUG_FMT_S, "single_float",
2391 (TARGET_SINGLE_FLOAT ? "true" : "false"));
2392
2393 fprintf (stderr, DEBUG_FMT_S, "double_float",
2394 (TARGET_DOUBLE_FLOAT ? "true" : "false"));
2395
2396 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2397 (TARGET_SOFT_FLOAT ? "true" : "false"));
2398
2399 fprintf (stderr, DEBUG_FMT_S, "e500_single",
2400 (TARGET_E500_SINGLE ? "true" : "false"));
2401
2402 fprintf (stderr, DEBUG_FMT_S, "e500_double",
2403 (TARGET_E500_DOUBLE ? "true" : "false"));
2404
2405 if (TARGET_LINK_STACK)
2406 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2407
2408 if (targetm.lra_p ())
2409 fprintf (stderr, DEBUG_FMT_S, "lra", "true");
2410
2411 if (TARGET_P8_FUSION)
2412 fprintf (stderr, DEBUG_FMT_S, "p8 fusion",
2413 (TARGET_P8_FUSION_SIGN) ? "zero+sign" : "zero");
2414
2415 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2416 TARGET_SECURE_PLT ? "secure" : "bss");
2417 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2418 aix_struct_return ? "aix" : "sysv");
2419 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2420 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2421 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2422 tf[!!rs6000_align_branch_targets]);
2423 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2424 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2425 rs6000_long_double_type_size);
2426 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2427 (int)rs6000_sched_restricted_insns_priority);
2428 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2429 (int)END_BUILTINS);
2430 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2431 (int)RS6000_BUILTIN_COUNT);
2432
2433 if (TARGET_VSX)
2434 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2435 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2436 }
2437
2438 \f
2439 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2440 legitimate address support to figure out the appropriate addressing to
2441 use. */
2442
2443 static void
2444 rs6000_setup_reg_addr_masks (void)
2445 {
2446 ssize_t rc, reg, m, nregs;
2447 addr_mask_type any_addr_mask, addr_mask;
2448
2449 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2450 {
2451 machine_mode m2 = (machine_mode)m;
2452
2453 /* SDmode is special in that we want to access it only via REG+REG
2454 addressing on power7 and above, since we want to use the LFIWZX and
2455 STFIWZX instructions to load it. */
2456 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2457
2458 any_addr_mask = 0;
2459 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2460 {
2461 addr_mask = 0;
2462 reg = reload_reg_map[rc].reg;
2463
2464 /* Can mode values go in the GPR/FPR/Altivec registers? */
2465 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2466 {
2467 nregs = rs6000_hard_regno_nregs[m][reg];
2468 addr_mask |= RELOAD_REG_VALID;
2469
2470 /* Indicate if the mode takes more than 1 physical register. If
2471 it takes a single register, indicate it can do REG+REG
2472 addressing. */
2473 if (nregs > 1 || m == BLKmode)
2474 addr_mask |= RELOAD_REG_MULTIPLE;
2475 else
2476 addr_mask |= RELOAD_REG_INDEXED;
2477
2478 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2479 addressing. Restrict addressing on SPE for 64-bit types
2480 because of the SUBREG hackery used to address 64-bit floats in
2481 '32-bit' GPRs. */
2482
2483 if (TARGET_UPDATE
2484 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2485 && GET_MODE_SIZE (m2) <= 8
2486 && !VECTOR_MODE_P (m2)
2487 && !COMPLEX_MODE_P (m2)
2488 && !indexed_only_p
2489 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (m2) == 8))
2490 {
2491 addr_mask |= RELOAD_REG_PRE_INCDEC;
2492
2493 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2494 we don't allow PRE_MODIFY for some multi-register
2495 operations. */
2496 switch (m)
2497 {
2498 default:
2499 addr_mask |= RELOAD_REG_PRE_MODIFY;
2500 break;
2501
2502 case DImode:
2503 if (TARGET_POWERPC64)
2504 addr_mask |= RELOAD_REG_PRE_MODIFY;
2505 break;
2506
2507 case DFmode:
2508 case DDmode:
2509 if (TARGET_DF_INSN)
2510 addr_mask |= RELOAD_REG_PRE_MODIFY;
2511 break;
2512 }
2513 }
2514 }
2515
2516 /* GPR and FPR registers can do REG+OFFSET addressing, except
2517 possibly for SDmode. */
2518 if ((addr_mask != 0) && !indexed_only_p
2519 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR))
2520 addr_mask |= RELOAD_REG_OFFSET;
2521
2522 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2523 addressing on 128-bit types. */
2524 if (rc == RELOAD_REG_VMX && GET_MODE_SIZE (m2) == 16
2525 && (addr_mask & RELOAD_REG_VALID) != 0)
2526 addr_mask |= RELOAD_REG_AND_M16;
2527
2528 reg_addr[m].addr_mask[rc] = addr_mask;
2529 any_addr_mask |= addr_mask;
2530 }
2531
2532 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2533 }
2534 }
2535
2536 \f
2537 /* Initialize the various global tables that are based on register size. */
2538 static void
2539 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2540 {
2541 ssize_t r, m, c;
2542 int align64;
2543 int align32;
2544
2545 /* Precalculate REGNO_REG_CLASS. */
2546 rs6000_regno_regclass[0] = GENERAL_REGS;
2547 for (r = 1; r < 32; ++r)
2548 rs6000_regno_regclass[r] = BASE_REGS;
2549
2550 for (r = 32; r < 64; ++r)
2551 rs6000_regno_regclass[r] = FLOAT_REGS;
2552
2553 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
2554 rs6000_regno_regclass[r] = NO_REGS;
2555
2556 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2557 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2558
2559 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2560 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2561 rs6000_regno_regclass[r] = CR_REGS;
2562
2563 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2564 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2565 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2566 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2567 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2568 rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
2569 rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
2570 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
2571 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
2572 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
2573 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2574 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2575
2576 /* Precalculate register class to simpler reload register class. We don't
2577 need all of the register classes that are combinations of different
2578 classes, just the simple ones that have constraint letters. */
2579 for (c = 0; c < N_REG_CLASSES; c++)
2580 reg_class_to_reg_type[c] = NO_REG_TYPE;
2581
2582 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2583 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2584 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2585 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2586 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2587 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2588 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2589 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2590 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2591 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2592 reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
2593 reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
2594
2595 if (TARGET_VSX)
2596 {
2597 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2598 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2599 }
2600 else
2601 {
2602 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2603 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2604 }
2605
2606 /* Precalculate the valid memory formats as well as the vector information,
2607 this must be set up before the rs6000_hard_regno_nregs_internal calls
2608 below. */
2609 gcc_assert ((int)VECTOR_NONE == 0);
2610 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2611 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
2612
2613 gcc_assert ((int)CODE_FOR_nothing == 0);
2614 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2615
2616 gcc_assert ((int)NO_REGS == 0);
2617 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2618
2619 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2620 believes it can use native alignment or still uses 128-bit alignment. */
2621 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2622 {
2623 align64 = 64;
2624 align32 = 32;
2625 }
2626 else
2627 {
2628 align64 = 128;
2629 align32 = 128;
2630 }
2631
2632 /* V2DF mode, VSX only. */
2633 if (TARGET_VSX)
2634 {
2635 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2636 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2637 rs6000_vector_align[V2DFmode] = align64;
2638 }
2639
2640 /* V4SF mode, either VSX or Altivec. */
2641 if (TARGET_VSX)
2642 {
2643 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2644 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2645 rs6000_vector_align[V4SFmode] = align32;
2646 }
2647 else if (TARGET_ALTIVEC)
2648 {
2649 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2650 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2651 rs6000_vector_align[V4SFmode] = align32;
2652 }
2653
2654 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2655 and stores. */
2656 if (TARGET_ALTIVEC)
2657 {
2658 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2659 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2660 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2661 rs6000_vector_align[V4SImode] = align32;
2662 rs6000_vector_align[V8HImode] = align32;
2663 rs6000_vector_align[V16QImode] = align32;
2664
2665 if (TARGET_VSX)
2666 {
2667 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2668 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2669 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2670 }
2671 else
2672 {
2673 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2674 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2675 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2676 }
2677 }
2678
2679 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2680 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2681 if (TARGET_VSX)
2682 {
2683 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2684 rs6000_vector_unit[V2DImode]
2685 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2686 rs6000_vector_align[V2DImode] = align64;
2687
2688 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2689 rs6000_vector_unit[V1TImode]
2690 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2691 rs6000_vector_align[V1TImode] = 128;
2692 }
2693
2694 /* DFmode, see if we want to use the VSX unit. Memory is handled
2695 differently, so don't set rs6000_vector_mem. */
2696 if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
2697 {
2698 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2699 rs6000_vector_align[DFmode] = 64;
2700 }
2701
2702 /* SFmode, see if we want to use the VSX unit. */
2703 if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT)
2704 {
2705 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2706 rs6000_vector_align[SFmode] = 32;
2707 }
2708
2709 /* Allow TImode in VSX register and set the VSX memory macros. */
2710 if (TARGET_VSX && TARGET_VSX_TIMODE)
2711 {
2712 rs6000_vector_mem[TImode] = VECTOR_VSX;
2713 rs6000_vector_align[TImode] = align64;
2714 }
2715
2716 /* TODO add SPE and paired floating point vector support. */
2717
2718 /* Register class constraints for the constraints that depend on compile
2719 switches. When the VSX code was added, different constraints were added
2720 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2721 of the VSX registers are used. The register classes for scalar floating
2722 point types is set, based on whether we allow that type into the upper
2723 (Altivec) registers. GCC has register classes to target the Altivec
2724 registers for load/store operations, to select using a VSX memory
2725 operation instead of the traditional floating point operation. The
2726 constraints are:
2727
2728 d - Register class to use with traditional DFmode instructions.
2729 f - Register class to use with traditional SFmode instructions.
2730 v - Altivec register.
2731 wa - Any VSX register.
2732 wc - Reserved to represent individual CR bits (used in LLVM).
2733 wd - Preferred register class for V2DFmode.
2734 wf - Preferred register class for V4SFmode.
2735 wg - Float register for power6x move insns.
2736 wh - FP register for direct move instructions.
2737 wi - FP or VSX register to hold 64-bit integers for VSX insns.
2738 wj - FP or VSX register to hold 64-bit integers for direct moves.
2739 wk - FP or VSX register to hold 64-bit doubles for direct moves.
2740 wl - Float register if we can do 32-bit signed int loads.
2741 wm - VSX register for ISA 2.07 direct move operations.
2742 wn - always NO_REGS.
2743 wr - GPR if 64-bit mode is permitted.
2744 ws - Register class to do ISA 2.06 DF operations.
2745 wt - VSX register for TImode in VSX registers.
2746 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
2747 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
2748 ww - Register class to do SF conversions in with VSX operations.
2749 wx - Float register if we can do 32-bit int stores.
2750 wy - Register class to do ISA 2.07 SF operations.
2751 wz - Float register if we can do 32-bit unsigned int loads. */
2752
2753 if (TARGET_HARD_FLOAT && TARGET_FPRS)
2754 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
2755
2756 if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
2757 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
2758
2759 if (TARGET_VSX)
2760 {
2761 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2762 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
2763 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
2764 rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS; /* DImode */
2765
2766 if (TARGET_VSX_TIMODE)
2767 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
2768
2769 if (TARGET_UPPER_REGS_DF) /* DFmode */
2770 {
2771 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
2772 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
2773 }
2774 else
2775 rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
2776 }
2777
2778 /* Add conditional constraints based on various options, to allow us to
2779 collapse multiple insn patterns. */
2780 if (TARGET_ALTIVEC)
2781 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2782
2783 if (TARGET_MFPGPR) /* DFmode */
2784 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
2785
2786 if (TARGET_LFIWAX)
2787 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
2788
2789 if (TARGET_DIRECT_MOVE)
2790 {
2791 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
2792 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
2793 = rs6000_constraints[RS6000_CONSTRAINT_wi];
2794 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
2795 = rs6000_constraints[RS6000_CONSTRAINT_ws];
2796 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
2797 }
2798
2799 if (TARGET_POWERPC64)
2800 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2801
2802 if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) /* SFmode */
2803 {
2804 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
2805 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
2806 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
2807 }
2808 else if (TARGET_P8_VECTOR)
2809 {
2810 rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
2811 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
2812 }
2813 else if (TARGET_VSX)
2814 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
2815
2816 if (TARGET_STFIWX)
2817 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2818
2819 if (TARGET_LFIWZX)
2820 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
2821
2822 /* Set up the reload helper and direct move functions. */
2823 if (TARGET_VSX || TARGET_ALTIVEC)
2824 {
2825 if (TARGET_64BIT)
2826 {
2827 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2828 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2829 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2830 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
2831 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
2832 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
2833 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
2834 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
2835 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
2836 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
2837 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
2838 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
2839 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
2840 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
2841 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
2842 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
2843 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
2844 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
2845 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
2846 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
2847 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
2848 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
2849
2850 if (TARGET_VSX_TIMODE)
2851 {
2852 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
2853 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
2854 }
2855
2856 if (TARGET_DIRECT_MOVE)
2857 {
2858 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
2859 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
2860 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
2861 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
2862 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
2863 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
2864 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
2865 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
2866 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
2867
2868 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
2869 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
2870 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
2871 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
2872 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
2873 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
2874 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
2875 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
2876 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
2877 }
2878 }
2879 else
2880 {
2881 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
2882 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
2883 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
2884 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
2885 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
2886 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
2887 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
2888 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
2889 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
2890 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
2891 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
2892 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
2893 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
2894 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
2895 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
2896 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
2897 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
2898 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
2899 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
2900 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
2901 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
2902 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
2903
2904 if (TARGET_VSX_TIMODE)
2905 {
2906 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
2907 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
2908 }
2909
2910 if (TARGET_DIRECT_MOVE)
2911 {
2912 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
2913 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
2914 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
2915 }
2916 }
2917
2918 if (TARGET_UPPER_REGS_DF)
2919 reg_addr[DFmode].scalar_in_vmx_p = true;
2920
2921 if (TARGET_UPPER_REGS_SF)
2922 reg_addr[SFmode].scalar_in_vmx_p = true;
2923 }
2924
2925 /* Precalculate HARD_REGNO_NREGS. */
2926 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
2927 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2928 rs6000_hard_regno_nregs[m][r]
2929 = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
2930
2931 /* Precalculate HARD_REGNO_MODE_OK. */
2932 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
2933 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2934 if (rs6000_hard_regno_mode_ok (r, (machine_mode)m))
2935 rs6000_hard_regno_mode_ok_p[m][r] = true;
2936
2937 /* Precalculate CLASS_MAX_NREGS sizes. */
2938 for (c = 0; c < LIM_REG_CLASSES; ++c)
2939 {
2940 int reg_size;
2941
2942 if (TARGET_VSX && VSX_REG_CLASS_P (c))
2943 reg_size = UNITS_PER_VSX_WORD;
2944
2945 else if (c == ALTIVEC_REGS)
2946 reg_size = UNITS_PER_ALTIVEC_WORD;
2947
2948 else if (c == FLOAT_REGS)
2949 reg_size = UNITS_PER_FP_WORD;
2950
2951 else
2952 reg_size = UNITS_PER_WORD;
2953
2954 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2955 {
2956 machine_mode m2 = (machine_mode)m;
2957 int reg_size2 = reg_size;
2958
2959 /* TFmode/TDmode always takes 2 registers, even in VSX. */
2960 if (TARGET_VSX && VSX_REG_CLASS_P (c)
2961 && (m == TDmode || m == TFmode))
2962 reg_size2 = UNITS_PER_FP_WORD;
2963
2964 rs6000_class_max_nregs[m][c]
2965 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
2966 }
2967 }
2968
2969 if (TARGET_E500_DOUBLE)
2970 rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1;
2971
2972 /* Calculate which modes to automatically generate code to use a the
2973 reciprocal divide and square root instructions. In the future, possibly
2974 automatically generate the instructions even if the user did not specify
2975 -mrecip. The older machines double precision reciprocal sqrt estimate is
2976 not accurate enough. */
2977 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
2978 if (TARGET_FRES)
2979 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
2980 if (TARGET_FRE)
2981 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
2982 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
2983 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
2984 if (VECTOR_UNIT_VSX_P (V2DFmode))
2985 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
2986
2987 if (TARGET_FRSQRTES)
2988 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
2989 if (TARGET_FRSQRTE)
2990 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
2991 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
2992 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
2993 if (VECTOR_UNIT_VSX_P (V2DFmode))
2994 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
2995
2996 if (rs6000_recip_control)
2997 {
2998 if (!flag_finite_math_only)
2999 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
3000 if (flag_trapping_math)
3001 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
3002 if (!flag_reciprocal_math)
3003 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
3004 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3005 {
3006 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3007 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3008 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3009
3010 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3011 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3012 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3013
3014 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3015 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3016 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3017
3018 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3019 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3020 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3021
3022 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3023 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3024 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3025
3026 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3027 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3028 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3029
3030 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3031 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3032 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3033
3034 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3035 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3036 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3037 }
3038 }
3039
3040 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3041 legitimate address support to figure out the appropriate addressing to
3042 use. */
3043 rs6000_setup_reg_addr_masks ();
3044
3045 if (global_init_p || TARGET_DEBUG_TARGET)
3046 {
3047 if (TARGET_DEBUG_REG)
3048 rs6000_debug_reg_global ();
3049
3050 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3051 fprintf (stderr,
3052 "SImode variable mult cost = %d\n"
3053 "SImode constant mult cost = %d\n"
3054 "SImode short constant mult cost = %d\n"
3055 "DImode multipliciation cost = %d\n"
3056 "SImode division cost = %d\n"
3057 "DImode division cost = %d\n"
3058 "Simple fp operation cost = %d\n"
3059 "DFmode multiplication cost = %d\n"
3060 "SFmode division cost = %d\n"
3061 "DFmode division cost = %d\n"
3062 "cache line size = %d\n"
3063 "l1 cache size = %d\n"
3064 "l2 cache size = %d\n"
3065 "simultaneous prefetches = %d\n"
3066 "\n",
3067 rs6000_cost->mulsi,
3068 rs6000_cost->mulsi_const,
3069 rs6000_cost->mulsi_const9,
3070 rs6000_cost->muldi,
3071 rs6000_cost->divsi,
3072 rs6000_cost->divdi,
3073 rs6000_cost->fp,
3074 rs6000_cost->dmul,
3075 rs6000_cost->sdiv,
3076 rs6000_cost->ddiv,
3077 rs6000_cost->cache_line_size,
3078 rs6000_cost->l1_cache_size,
3079 rs6000_cost->l2_cache_size,
3080 rs6000_cost->simultaneous_prefetches);
3081 }
3082 }
3083
3084 #if TARGET_MACHO
3085 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3086
3087 static void
3088 darwin_rs6000_override_options (void)
3089 {
3090 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3091 off. */
3092 rs6000_altivec_abi = 1;
3093 TARGET_ALTIVEC_VRSAVE = 1;
3094 rs6000_current_abi = ABI_DARWIN;
3095
3096 if (DEFAULT_ABI == ABI_DARWIN
3097 && TARGET_64BIT)
3098 darwin_one_byte_bool = 1;
3099
3100 if (TARGET_64BIT && ! TARGET_POWERPC64)
3101 {
3102 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3103 warning (0, "-m64 requires PowerPC64 architecture, enabling");
3104 }
3105 if (flag_mkernel)
3106 {
3107 rs6000_default_long_calls = 1;
3108 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3109 }
3110
3111 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3112 Altivec. */
3113 if (!flag_mkernel && !flag_apple_kext
3114 && TARGET_64BIT
3115 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3116 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3117
3118 /* Unless the user (not the configurer) has explicitly overridden
3119 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3120 G4 unless targeting the kernel. */
3121 if (!flag_mkernel
3122 && !flag_apple_kext
3123 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3124 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3125 && ! global_options_set.x_rs6000_cpu_index)
3126 {
3127 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3128 }
3129 }
3130 #endif
3131
3132 /* If not otherwise specified by a target, make 'long double' equivalent to
3133 'double'. */
3134
3135 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3136 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3137 #endif
3138
3139 /* Return the builtin mask of the various options used that could affect which
3140 builtins were used. In the past we used target_flags, but we've run out of
3141 bits, and some options like SPE and PAIRED are no longer in
3142 target_flags. */
3143
3144 HOST_WIDE_INT
3145 rs6000_builtin_mask_calculate (void)
3146 {
3147 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3148 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3149 | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
3150 | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
3151 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3152 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3153 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3154 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3155 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3156 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3157 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3158 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3159 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3160 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3161 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3162 | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0));
3163 }
3164
3165 /* Implement TARGET_MD_ASM_CLOBBERS. All asm statements are considered
3166 to clobber the XER[CA] bit because clobbering that bit without telling
3167 the compiler worked just fine with versions of GCC before GCC 5, and
3168 breaking a lot of older code in ways that are hard to track down is
3169 not such a great idea. */
3170
3171 static tree
3172 rs6000_md_asm_clobbers (tree, tree, tree clobbers)
3173 {
3174 tree s = build_string (strlen (reg_names[CA_REGNO]), reg_names[CA_REGNO]);
3175 return tree_cons (NULL_TREE, s, clobbers);
3176 }
3177
3178 /* Override command line options. Mostly we process the processor type and
3179 sometimes adjust other TARGET_ options. */
3180
3181 static bool
3182 rs6000_option_override_internal (bool global_init_p)
3183 {
3184 bool ret = true;
3185 bool have_cpu = false;
3186
3187 /* The default cpu requested at configure time, if any. */
3188 const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT;
3189
3190 HOST_WIDE_INT set_masks;
3191 int cpu_index;
3192 int tune_index;
3193 struct cl_target_option *main_target_opt
3194 = ((global_init_p || target_option_default_node == NULL)
3195 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3196
3197 /* Remember the explicit arguments. */
3198 if (global_init_p)
3199 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3200
3201 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3202 library functions, so warn about it. The flag may be useful for
3203 performance studies from time to time though, so don't disable it
3204 entirely. */
3205 if (global_options_set.x_rs6000_alignment_flags
3206 && rs6000_alignment_flags == MASK_ALIGN_POWER
3207 && DEFAULT_ABI == ABI_DARWIN
3208 && TARGET_64BIT)
3209 warning (0, "-malign-power is not supported for 64-bit Darwin;"
3210 " it is incompatible with the installed C and C++ libraries");
3211
3212 /* Numerous experiment shows that IRA based loop pressure
3213 calculation works better for RTL loop invariant motion on targets
3214 with enough (>= 32) registers. It is an expensive optimization.
3215 So it is on only for peak performance. */
3216 if (optimize >= 3 && global_init_p
3217 && !global_options_set.x_flag_ira_loop_pressure)
3218 flag_ira_loop_pressure = 1;
3219
3220 /* Set the pointer size. */
3221 if (TARGET_64BIT)
3222 {
3223 rs6000_pmode = (int)DImode;
3224 rs6000_pointer_size = 64;
3225 }
3226 else
3227 {
3228 rs6000_pmode = (int)SImode;
3229 rs6000_pointer_size = 32;
3230 }
3231
3232 /* Some OSs don't support saving the high part of 64-bit registers on context
3233 switch. Other OSs don't support saving Altivec registers. On those OSs,
3234 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3235 if the user wants either, the user must explicitly specify them and we
3236 won't interfere with the user's specification. */
3237
3238 set_masks = POWERPC_MASKS;
3239 #ifdef OS_MISSING_POWERPC64
3240 if (OS_MISSING_POWERPC64)
3241 set_masks &= ~OPTION_MASK_POWERPC64;
3242 #endif
3243 #ifdef OS_MISSING_ALTIVEC
3244 if (OS_MISSING_ALTIVEC)
3245 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX);
3246 #endif
3247
3248 /* Don't override by the processor default if given explicitly. */
3249 set_masks &= ~rs6000_isa_flags_explicit;
3250
3251 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3252 the cpu in a target attribute or pragma, but did not specify a tuning
3253 option, use the cpu for the tuning option rather than the option specified
3254 with -mtune on the command line. Process a '--with-cpu' configuration
3255 request as an implicit --cpu. */
3256 if (rs6000_cpu_index >= 0)
3257 {
3258 cpu_index = rs6000_cpu_index;
3259 have_cpu = true;
3260 }
3261 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3262 {
3263 rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index;
3264 have_cpu = true;
3265 }
3266 else if (implicit_cpu)
3267 {
3268 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu);
3269 have_cpu = true;
3270 }
3271 else
3272 {
3273 const char *default_cpu = (TARGET_POWERPC64 ? "powerpc64" : "powerpc");
3274 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
3275 have_cpu = false;
3276 }
3277
3278 gcc_assert (cpu_index >= 0);
3279
3280 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3281 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3282 with those from the cpu, except for options that were explicitly set. If
3283 we don't have a cpu, do not override the target bits set in
3284 TARGET_DEFAULT. */
3285 if (have_cpu)
3286 {
3287 rs6000_isa_flags &= ~set_masks;
3288 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3289 & set_masks);
3290 }
3291 else
3292 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3293 & ~rs6000_isa_flags_explicit);
3294
3295 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3296 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3297 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3298 to using rs6000_isa_flags, we need to do the initialization here. */
3299 if (!have_cpu)
3300 rs6000_isa_flags |= (TARGET_DEFAULT & ~rs6000_isa_flags_explicit);
3301
3302 if (rs6000_tune_index >= 0)
3303 tune_index = rs6000_tune_index;
3304 else if (have_cpu)
3305 rs6000_tune_index = tune_index = cpu_index;
3306 else
3307 {
3308 size_t i;
3309 enum processor_type tune_proc
3310 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3311
3312 tune_index = -1;
3313 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3314 if (processor_target_table[i].processor == tune_proc)
3315 {
3316 rs6000_tune_index = tune_index = i;
3317 break;
3318 }
3319 }
3320
3321 gcc_assert (tune_index >= 0);
3322 rs6000_cpu = processor_target_table[tune_index].processor;
3323
3324 /* Pick defaults for SPE related control flags. Do this early to make sure
3325 that the TARGET_ macros are representative ASAP. */
3326 {
3327 int spe_capable_cpu =
3328 (rs6000_cpu == PROCESSOR_PPC8540
3329 || rs6000_cpu == PROCESSOR_PPC8548);
3330
3331 if (!global_options_set.x_rs6000_spe_abi)
3332 rs6000_spe_abi = spe_capable_cpu;
3333
3334 if (!global_options_set.x_rs6000_spe)
3335 rs6000_spe = spe_capable_cpu;
3336
3337 if (!global_options_set.x_rs6000_float_gprs)
3338 rs6000_float_gprs =
3339 (rs6000_cpu == PROCESSOR_PPC8540 ? 1
3340 : rs6000_cpu == PROCESSOR_PPC8548 ? 2
3341 : 0);
3342 }
3343
3344 if (global_options_set.x_rs6000_spe_abi
3345 && rs6000_spe_abi
3346 && !TARGET_SPE_ABI)
3347 error ("not configured for SPE ABI");
3348
3349 if (global_options_set.x_rs6000_spe
3350 && rs6000_spe
3351 && !TARGET_SPE)
3352 error ("not configured for SPE instruction set");
3353
3354 if (main_target_opt != NULL
3355 && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi)
3356 || (main_target_opt->x_rs6000_spe != rs6000_spe)
3357 || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs)))
3358 error ("target attribute or pragma changes SPE ABI");
3359
3360 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3361 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3362 || rs6000_cpu == PROCESSOR_PPCE5500)
3363 {
3364 if (TARGET_ALTIVEC)
3365 error ("AltiVec not supported in this target");
3366 if (TARGET_SPE)
3367 error ("SPE not supported in this target");
3368 }
3369 if (rs6000_cpu == PROCESSOR_PPCE6500)
3370 {
3371 if (TARGET_SPE)
3372 error ("SPE not supported in this target");
3373 }
3374
3375 /* Disable Cell microcode if we are optimizing for the Cell
3376 and not optimizing for size. */
3377 if (rs6000_gen_cell_microcode == -1)
3378 rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL
3379 && !optimize_size);
3380
3381 /* If we are optimizing big endian systems for space and it's OK to
3382 use instructions that would be microcoded on the Cell, use the
3383 load/store multiple and string instructions. */
3384 if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode)
3385 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE
3386 | OPTION_MASK_STRING);
3387
3388 /* Don't allow -mmultiple or -mstring on little endian systems
3389 unless the cpu is a 750, because the hardware doesn't support the
3390 instructions used in little endian mode, and causes an alignment
3391 trap. The 750 does not cause an alignment trap (except when the
3392 target is unaligned). */
3393
3394 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
3395 {
3396 if (TARGET_MULTIPLE)
3397 {
3398 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3399 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3400 warning (0, "-mmultiple is not supported on little endian systems");
3401 }
3402
3403 if (TARGET_STRING)
3404 {
3405 rs6000_isa_flags &= ~OPTION_MASK_STRING;
3406 if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0)
3407 warning (0, "-mstring is not supported on little endian systems");
3408 }
3409 }
3410
3411 /* If little-endian, default to -mstrict-align on older processors.
3412 Testing for htm matches power8 and later. */
3413 if (!BYTES_BIG_ENDIAN
3414 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3415 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3416
3417 /* -maltivec={le,be} implies -maltivec. */
3418 if (rs6000_altivec_element_order != 0)
3419 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3420
3421 /* Disallow -maltivec=le in big endian mode for now. This is not
3422 known to be useful for anyone. */
3423 if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
3424 {
3425 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
3426 rs6000_altivec_element_order = 0;
3427 }
3428
3429 /* Add some warnings for VSX. */
3430 if (TARGET_VSX)
3431 {
3432 const char *msg = NULL;
3433 if (!TARGET_HARD_FLOAT || !TARGET_FPRS
3434 || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
3435 {
3436 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3437 msg = N_("-mvsx requires hardware floating point");
3438 else
3439 {
3440 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3441 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3442 }
3443 }
3444 else if (TARGET_PAIRED_FLOAT)
3445 msg = N_("-mvsx and -mpaired are incompatible");
3446 else if (TARGET_AVOID_XFORM > 0)
3447 msg = N_("-mvsx needs indexed addressing");
3448 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3449 & OPTION_MASK_ALTIVEC))
3450 {
3451 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3452 msg = N_("-mvsx and -mno-altivec are incompatible");
3453 else
3454 msg = N_("-mno-altivec disables vsx");
3455 }
3456
3457 if (msg)
3458 {
3459 warning (0, msg);
3460 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3461 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3462 }
3463 }
3464
3465 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3466 the -mcpu setting to enable options that conflict. */
3467 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3468 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3469 | OPTION_MASK_ALTIVEC
3470 | OPTION_MASK_VSX)) != 0)
3471 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3472 | OPTION_MASK_DIRECT_MOVE)
3473 & ~rs6000_isa_flags_explicit);
3474
3475 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3476 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3477
3478 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3479 unless the user explicitly used the -mno-<option> to disable the code. */
3480 if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3481 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3482 else if (TARGET_VSX)
3483 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3484 else if (TARGET_POPCNTD)
3485 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
3486 else if (TARGET_DFP)
3487 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3488 else if (TARGET_CMPB)
3489 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
3490 else if (TARGET_FPRND)
3491 rs6000_isa_flags |= (ISA_2_4_MASKS & ~rs6000_isa_flags_explicit);
3492 else if (TARGET_POPCNTB)
3493 rs6000_isa_flags |= (ISA_2_2_MASKS & ~rs6000_isa_flags_explicit);
3494 else if (TARGET_ALTIVEC)
3495 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit);
3496
3497 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3498 {
3499 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3500 error ("-mcrypto requires -maltivec");
3501 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3502 }
3503
3504 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3505 {
3506 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3507 error ("-mdirect-move requires -mvsx");
3508 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3509 }
3510
3511 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3512 {
3513 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3514 error ("-mpower8-vector requires -maltivec");
3515 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3516 }
3517
3518 if (TARGET_P8_VECTOR && !TARGET_VSX)
3519 {
3520 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3521 error ("-mpower8-vector requires -mvsx");
3522 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3523 }
3524
3525 if (TARGET_VSX_TIMODE && !TARGET_VSX)
3526 {
3527 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
3528 error ("-mvsx-timode requires -mvsx");
3529 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
3530 }
3531
3532 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3533 {
3534 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3535 error ("-mhard-dfp requires -mhard-float");
3536 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3537 }
3538
3539 /* Allow an explicit -mupper-regs to set both -mupper-regs-df and
3540 -mupper-regs-sf, depending on the cpu, unless the user explicitly also set
3541 the individual option. */
3542 if (TARGET_UPPER_REGS > 0)
3543 {
3544 if (TARGET_VSX
3545 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
3546 {
3547 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF;
3548 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
3549 }
3550 if (TARGET_P8_VECTOR
3551 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
3552 {
3553 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF;
3554 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
3555 }
3556 }
3557 else if (TARGET_UPPER_REGS == 0)
3558 {
3559 if (TARGET_VSX
3560 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
3561 {
3562 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
3563 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
3564 }
3565 if (TARGET_P8_VECTOR
3566 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
3567 {
3568 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
3569 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
3570 }
3571 }
3572
3573 if (TARGET_UPPER_REGS_DF && !TARGET_VSX)
3574 {
3575 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
3576 error ("-mupper-regs-df requires -mvsx");
3577 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
3578 }
3579
3580 if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR)
3581 {
3582 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
3583 error ("-mupper-regs-sf requires -mpower8-vector");
3584 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
3585 }
3586
3587 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3588 silently turn off quad memory mode. */
3589 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3590 {
3591 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3592 warning (0, N_("-mquad-memory requires 64-bit mode"));
3593
3594 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3595 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
3596
3597 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3598 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3599 }
3600
3601 /* Non-atomic quad memory load/store are disabled for little endian, since
3602 the words are reversed, but atomic operations can still be done by
3603 swapping the words. */
3604 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3605 {
3606 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3607 warning (0, N_("-mquad-memory is not available in little endian mode"));
3608
3609 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3610 }
3611
3612 /* Assume if the user asked for normal quad memory instructions, they want
3613 the atomic versions as well, unless they explicity told us not to use quad
3614 word atomic instructions. */
3615 if (TARGET_QUAD_MEMORY
3616 && !TARGET_QUAD_MEMORY_ATOMIC
3617 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
3618 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
3619
3620 /* Enable power8 fusion if we are tuning for power8, even if we aren't
3621 generating power8 instructions. */
3622 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
3623 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
3624 & OPTION_MASK_P8_FUSION);
3625
3626 /* Power8 does not fuse sign extended loads with the addis. If we are
3627 optimizing at high levels for speed, convert a sign extended load into a
3628 zero extending load, and an explicit sign extension. */
3629 if (TARGET_P8_FUSION
3630 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
3631 && optimize_function_for_speed_p (cfun)
3632 && optimize >= 3)
3633 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
3634
3635 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3636 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
3637
3638 /* E500mc does "better" if we inline more aggressively. Respect the
3639 user's opinion, though. */
3640 if (rs6000_block_move_inline_limit == 0
3641 && (rs6000_cpu == PROCESSOR_PPCE500MC
3642 || rs6000_cpu == PROCESSOR_PPCE500MC64
3643 || rs6000_cpu == PROCESSOR_PPCE5500
3644 || rs6000_cpu == PROCESSOR_PPCE6500))
3645 rs6000_block_move_inline_limit = 128;
3646
3647 /* store_one_arg depends on expand_block_move to handle at least the
3648 size of reg_parm_stack_space. */
3649 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
3650 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
3651
3652 if (global_init_p)
3653 {
3654 /* If the appropriate debug option is enabled, replace the target hooks
3655 with debug versions that call the real version and then prints
3656 debugging information. */
3657 if (TARGET_DEBUG_COST)
3658 {
3659 targetm.rtx_costs = rs6000_debug_rtx_costs;
3660 targetm.address_cost = rs6000_debug_address_cost;
3661 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
3662 }
3663
3664 if (TARGET_DEBUG_ADDR)
3665 {
3666 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
3667 targetm.legitimize_address = rs6000_debug_legitimize_address;
3668 rs6000_secondary_reload_class_ptr
3669 = rs6000_debug_secondary_reload_class;
3670 rs6000_secondary_memory_needed_ptr
3671 = rs6000_debug_secondary_memory_needed;
3672 rs6000_cannot_change_mode_class_ptr
3673 = rs6000_debug_cannot_change_mode_class;
3674 rs6000_preferred_reload_class_ptr
3675 = rs6000_debug_preferred_reload_class;
3676 rs6000_legitimize_reload_address_ptr
3677 = rs6000_debug_legitimize_reload_address;
3678 rs6000_mode_dependent_address_ptr
3679 = rs6000_debug_mode_dependent_address;
3680 }
3681
3682 if (rs6000_veclibabi_name)
3683 {
3684 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
3685 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
3686 else
3687 {
3688 error ("unknown vectorization library ABI type (%s) for "
3689 "-mveclibabi= switch", rs6000_veclibabi_name);
3690 ret = false;
3691 }
3692 }
3693 }
3694
3695 if (!global_options_set.x_rs6000_long_double_type_size)
3696 {
3697 if (main_target_opt != NULL
3698 && (main_target_opt->x_rs6000_long_double_type_size
3699 != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
3700 error ("target attribute or pragma changes long double size");
3701 else
3702 rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
3703 }
3704
3705 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
3706 if (!global_options_set.x_rs6000_ieeequad)
3707 rs6000_ieeequad = 1;
3708 #endif
3709
3710 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
3711 target attribute or pragma which automatically enables both options,
3712 unless the altivec ABI was set. This is set by default for 64-bit, but
3713 not for 32-bit. */
3714 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
3715 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC)
3716 & ~rs6000_isa_flags_explicit);
3717
3718 /* Enable Altivec ABI for AIX -maltivec. */
3719 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
3720 {
3721 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
3722 error ("target attribute or pragma changes AltiVec ABI");
3723 else
3724 rs6000_altivec_abi = 1;
3725 }
3726
3727 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
3728 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
3729 be explicitly overridden in either case. */
3730 if (TARGET_ELF)
3731 {
3732 if (!global_options_set.x_rs6000_altivec_abi
3733 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
3734 {
3735 if (main_target_opt != NULL &&
3736 !main_target_opt->x_rs6000_altivec_abi)
3737 error ("target attribute or pragma changes AltiVec ABI");
3738 else
3739 rs6000_altivec_abi = 1;
3740 }
3741 }
3742
3743 /* Set the Darwin64 ABI as default for 64-bit Darwin.
3744 So far, the only darwin64 targets are also MACH-O. */
3745 if (TARGET_MACHO
3746 && DEFAULT_ABI == ABI_DARWIN
3747 && TARGET_64BIT)
3748 {
3749 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
3750 error ("target attribute or pragma changes darwin64 ABI");
3751 else
3752 {
3753 rs6000_darwin64_abi = 1;
3754 /* Default to natural alignment, for better performance. */
3755 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
3756 }
3757 }
3758
3759 /* Place FP constants in the constant pool instead of TOC
3760 if section anchors enabled. */
3761 if (flag_section_anchors
3762 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
3763 TARGET_NO_FP_IN_TOC = 1;
3764
3765 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3766 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
3767
3768 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3769 SUBTARGET_OVERRIDE_OPTIONS;
3770 #endif
3771 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3772 SUBSUBTARGET_OVERRIDE_OPTIONS;
3773 #endif
3774 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
3775 SUB3TARGET_OVERRIDE_OPTIONS;
3776 #endif
3777
3778 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3779 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
3780
3781 /* For the E500 family of cores, reset the single/double FP flags to let us
3782 check that they remain constant across attributes or pragmas. Also,
3783 clear a possible request for string instructions, not supported and which
3784 we might have silently queried above for -Os.
3785
3786 For other families, clear ISEL in case it was set implicitly.
3787 */
3788
3789 switch (rs6000_cpu)
3790 {
3791 case PROCESSOR_PPC8540:
3792 case PROCESSOR_PPC8548:
3793 case PROCESSOR_PPCE500MC:
3794 case PROCESSOR_PPCE500MC64:
3795 case PROCESSOR_PPCE5500:
3796 case PROCESSOR_PPCE6500:
3797
3798 rs6000_single_float = TARGET_E500_SINGLE || TARGET_E500_DOUBLE;
3799 rs6000_double_float = TARGET_E500_DOUBLE;
3800
3801 rs6000_isa_flags &= ~OPTION_MASK_STRING;
3802
3803 break;
3804
3805 default:
3806
3807 if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL))
3808 rs6000_isa_flags &= ~OPTION_MASK_ISEL;
3809
3810 break;
3811 }
3812
3813 if (main_target_opt)
3814 {
3815 if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
3816 error ("target attribute or pragma changes single precision floating "
3817 "point");
3818 if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
3819 error ("target attribute or pragma changes double precision floating "
3820 "point");
3821 }
3822
3823 /* Detect invalid option combinations with E500. */
3824 CHECK_E500_OPTIONS;
3825
3826 rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
3827 && rs6000_cpu != PROCESSOR_POWER5
3828 && rs6000_cpu != PROCESSOR_POWER6
3829 && rs6000_cpu != PROCESSOR_POWER7
3830 && rs6000_cpu != PROCESSOR_POWER8
3831 && rs6000_cpu != PROCESSOR_PPCA2
3832 && rs6000_cpu != PROCESSOR_CELL
3833 && rs6000_cpu != PROCESSOR_PPC476);
3834 rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
3835 || rs6000_cpu == PROCESSOR_POWER5
3836 || rs6000_cpu == PROCESSOR_POWER7
3837 || rs6000_cpu == PROCESSOR_POWER8);
3838 rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
3839 || rs6000_cpu == PROCESSOR_POWER5
3840 || rs6000_cpu == PROCESSOR_POWER6
3841 || rs6000_cpu == PROCESSOR_POWER7
3842 || rs6000_cpu == PROCESSOR_POWER8
3843 || rs6000_cpu == PROCESSOR_PPCE500MC
3844 || rs6000_cpu == PROCESSOR_PPCE500MC64
3845 || rs6000_cpu == PROCESSOR_PPCE5500
3846 || rs6000_cpu == PROCESSOR_PPCE6500);
3847
3848 /* Allow debug switches to override the above settings. These are set to -1
3849 in rs6000.opt to indicate the user hasn't directly set the switch. */
3850 if (TARGET_ALWAYS_HINT >= 0)
3851 rs6000_always_hint = TARGET_ALWAYS_HINT;
3852
3853 if (TARGET_SCHED_GROUPS >= 0)
3854 rs6000_sched_groups = TARGET_SCHED_GROUPS;
3855
3856 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
3857 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
3858
3859 rs6000_sched_restricted_insns_priority
3860 = (rs6000_sched_groups ? 1 : 0);
3861
3862 /* Handle -msched-costly-dep option. */
3863 rs6000_sched_costly_dep
3864 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
3865
3866 if (rs6000_sched_costly_dep_str)
3867 {
3868 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
3869 rs6000_sched_costly_dep = no_dep_costly;
3870 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
3871 rs6000_sched_costly_dep = all_deps_costly;
3872 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
3873 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
3874 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
3875 rs6000_sched_costly_dep = store_to_load_dep_costly;
3876 else
3877 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
3878 atoi (rs6000_sched_costly_dep_str));
3879 }
3880
3881 /* Handle -minsert-sched-nops option. */
3882 rs6000_sched_insert_nops
3883 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
3884
3885 if (rs6000_sched_insert_nops_str)
3886 {
3887 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
3888 rs6000_sched_insert_nops = sched_finish_none;
3889 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
3890 rs6000_sched_insert_nops = sched_finish_pad_groups;
3891 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
3892 rs6000_sched_insert_nops = sched_finish_regroup_exact;
3893 else
3894 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
3895 atoi (rs6000_sched_insert_nops_str));
3896 }
3897
3898 if (global_init_p)
3899 {
3900 #ifdef TARGET_REGNAMES
3901 /* If the user desires alternate register names, copy in the
3902 alternate names now. */
3903 if (TARGET_REGNAMES)
3904 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
3905 #endif
3906
3907 /* Set aix_struct_return last, after the ABI is determined.
3908 If -maix-struct-return or -msvr4-struct-return was explicitly
3909 used, don't override with the ABI default. */
3910 if (!global_options_set.x_aix_struct_return)
3911 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
3912
3913 #if 0
3914 /* IBM XL compiler defaults to unsigned bitfields. */
3915 if (TARGET_XL_COMPAT)
3916 flag_signed_bitfields = 0;
3917 #endif
3918
3919 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
3920 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
3921
3922 if (TARGET_TOC)
3923 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
3924
3925 /* We can only guarantee the availability of DI pseudo-ops when
3926 assembling for 64-bit targets. */
3927 if (!TARGET_64BIT)
3928 {
3929 targetm.asm_out.aligned_op.di = NULL;
3930 targetm.asm_out.unaligned_op.di = NULL;
3931 }
3932
3933
3934 /* Set branch target alignment, if not optimizing for size. */
3935 if (!optimize_size)
3936 {
3937 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
3938 aligned 8byte to avoid misprediction by the branch predictor. */
3939 if (rs6000_cpu == PROCESSOR_TITAN
3940 || rs6000_cpu == PROCESSOR_CELL)
3941 {
3942 if (align_functions <= 0)
3943 align_functions = 8;
3944 if (align_jumps <= 0)
3945 align_jumps = 8;
3946 if (align_loops <= 0)
3947 align_loops = 8;
3948 }
3949 if (rs6000_align_branch_targets)
3950 {
3951 if (align_functions <= 0)
3952 align_functions = 16;
3953 if (align_jumps <= 0)
3954 align_jumps = 16;
3955 if (align_loops <= 0)
3956 {
3957 can_override_loop_align = 1;
3958 align_loops = 16;
3959 }
3960 }
3961 if (align_jumps_max_skip <= 0)
3962 align_jumps_max_skip = 15;
3963 if (align_loops_max_skip <= 0)
3964 align_loops_max_skip = 15;
3965 }
3966
3967 /* Arrange to save and restore machine status around nested functions. */
3968 init_machine_status = rs6000_init_machine_status;
3969
3970 /* We should always be splitting complex arguments, but we can't break
3971 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
3972 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
3973 targetm.calls.split_complex_arg = NULL;
3974 }
3975
3976 /* Initialize rs6000_cost with the appropriate target costs. */
3977 if (optimize_size)
3978 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
3979 else
3980 switch (rs6000_cpu)
3981 {
3982 case PROCESSOR_RS64A:
3983 rs6000_cost = &rs64a_cost;
3984 break;
3985
3986 case PROCESSOR_MPCCORE:
3987 rs6000_cost = &mpccore_cost;
3988 break;
3989
3990 case PROCESSOR_PPC403:
3991 rs6000_cost = &ppc403_cost;
3992 break;
3993
3994 case PROCESSOR_PPC405:
3995 rs6000_cost = &ppc405_cost;
3996 break;
3997
3998 case PROCESSOR_PPC440:
3999 rs6000_cost = &ppc440_cost;
4000 break;
4001
4002 case PROCESSOR_PPC476:
4003 rs6000_cost = &ppc476_cost;
4004 break;
4005
4006 case PROCESSOR_PPC601:
4007 rs6000_cost = &ppc601_cost;
4008 break;
4009
4010 case PROCESSOR_PPC603:
4011 rs6000_cost = &ppc603_cost;
4012 break;
4013
4014 case PROCESSOR_PPC604:
4015 rs6000_cost = &ppc604_cost;
4016 break;
4017
4018 case PROCESSOR_PPC604e:
4019 rs6000_cost = &ppc604e_cost;
4020 break;
4021
4022 case PROCESSOR_PPC620:
4023 rs6000_cost = &ppc620_cost;
4024 break;
4025
4026 case PROCESSOR_PPC630:
4027 rs6000_cost = &ppc630_cost;
4028 break;
4029
4030 case PROCESSOR_CELL:
4031 rs6000_cost = &ppccell_cost;
4032 break;
4033
4034 case PROCESSOR_PPC750:
4035 case PROCESSOR_PPC7400:
4036 rs6000_cost = &ppc750_cost;
4037 break;
4038
4039 case PROCESSOR_PPC7450:
4040 rs6000_cost = &ppc7450_cost;
4041 break;
4042
4043 case PROCESSOR_PPC8540:
4044 case PROCESSOR_PPC8548:
4045 rs6000_cost = &ppc8540_cost;
4046 break;
4047
4048 case PROCESSOR_PPCE300C2:
4049 case PROCESSOR_PPCE300C3:
4050 rs6000_cost = &ppce300c2c3_cost;
4051 break;
4052
4053 case PROCESSOR_PPCE500MC:
4054 rs6000_cost = &ppce500mc_cost;
4055 break;
4056
4057 case PROCESSOR_PPCE500MC64:
4058 rs6000_cost = &ppce500mc64_cost;
4059 break;
4060
4061 case PROCESSOR_PPCE5500:
4062 rs6000_cost = &ppce5500_cost;
4063 break;
4064
4065 case PROCESSOR_PPCE6500:
4066 rs6000_cost = &ppce6500_cost;
4067 break;
4068
4069 case PROCESSOR_TITAN:
4070 rs6000_cost = &titan_cost;
4071 break;
4072
4073 case PROCESSOR_POWER4:
4074 case PROCESSOR_POWER5:
4075 rs6000_cost = &power4_cost;
4076 break;
4077
4078 case PROCESSOR_POWER6:
4079 rs6000_cost = &power6_cost;
4080 break;
4081
4082 case PROCESSOR_POWER7:
4083 rs6000_cost = &power7_cost;
4084 break;
4085
4086 case PROCESSOR_POWER8:
4087 rs6000_cost = &power8_cost;
4088 break;
4089
4090 case PROCESSOR_PPCA2:
4091 rs6000_cost = &ppca2_cost;
4092 break;
4093
4094 default:
4095 gcc_unreachable ();
4096 }
4097
4098 if (global_init_p)
4099 {
4100 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4101 rs6000_cost->simultaneous_prefetches,
4102 global_options.x_param_values,
4103 global_options_set.x_param_values);
4104 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
4105 global_options.x_param_values,
4106 global_options_set.x_param_values);
4107 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4108 rs6000_cost->cache_line_size,
4109 global_options.x_param_values,
4110 global_options_set.x_param_values);
4111 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
4112 global_options.x_param_values,
4113 global_options_set.x_param_values);
4114
4115 /* Increase loop peeling limits based on performance analysis. */
4116 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
4117 global_options.x_param_values,
4118 global_options_set.x_param_values);
4119 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
4120 global_options.x_param_values,
4121 global_options_set.x_param_values);
4122
4123 /* If using typedef char *va_list, signal that
4124 __builtin_va_start (&ap, 0) can be optimized to
4125 ap = __builtin_next_arg (0). */
4126 if (DEFAULT_ABI != ABI_V4)
4127 targetm.expand_builtin_va_start = NULL;
4128 }
4129
4130 /* Set up single/double float flags.
4131 If TARGET_HARD_FLOAT is set, but neither single or double is set,
4132 then set both flags. */
4133 if (TARGET_HARD_FLOAT && TARGET_FPRS
4134 && rs6000_single_float == 0 && rs6000_double_float == 0)
4135 rs6000_single_float = rs6000_double_float = 1;
4136
4137 /* If not explicitly specified via option, decide whether to generate indexed
4138 load/store instructions. */
4139 if (TARGET_AVOID_XFORM == -1)
4140 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4141 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4142 need indexed accesses and the type used is the scalar type of the element
4143 being loaded or stored. */
4144 TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
4145 && !TARGET_ALTIVEC);
4146
4147 /* Set the -mrecip options. */
4148 if (rs6000_recip_name)
4149 {
4150 char *p = ASTRDUP (rs6000_recip_name);
4151 char *q;
4152 unsigned int mask, i;
4153 bool invert;
4154
4155 while ((q = strtok (p, ",")) != NULL)
4156 {
4157 p = NULL;
4158 if (*q == '!')
4159 {
4160 invert = true;
4161 q++;
4162 }
4163 else
4164 invert = false;
4165
4166 if (!strcmp (q, "default"))
4167 mask = ((TARGET_RECIP_PRECISION)
4168 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4169 else
4170 {
4171 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4172 if (!strcmp (q, recip_options[i].string))
4173 {
4174 mask = recip_options[i].mask;
4175 break;
4176 }
4177
4178 if (i == ARRAY_SIZE (recip_options))
4179 {
4180 error ("unknown option for -mrecip=%s", q);
4181 invert = false;
4182 mask = 0;
4183 ret = false;
4184 }
4185 }
4186
4187 if (invert)
4188 rs6000_recip_control &= ~mask;
4189 else
4190 rs6000_recip_control |= mask;
4191 }
4192 }
4193
4194 /* Set the builtin mask of the various options used that could affect which
4195 builtins were used. In the past we used target_flags, but we've run out
4196 of bits, and some options like SPE and PAIRED are no longer in
4197 target_flags. */
4198 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4199 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4200 {
4201 fprintf (stderr,
4202 "new builtin mask = " HOST_WIDE_INT_PRINT_HEX ", ",
4203 rs6000_builtin_mask);
4204 rs6000_print_builtin_options (stderr, 0, NULL, rs6000_builtin_mask);
4205 }
4206
4207 /* Initialize all of the registers. */
4208 rs6000_init_hard_regno_mode_ok (global_init_p);
4209
4210 /* Save the initial options in case the user does function specific options */
4211 if (global_init_p)
4212 target_option_default_node = target_option_current_node
4213 = build_target_option_node (&global_options);
4214
4215 /* If not explicitly specified via option, decide whether to generate the
4216 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4217 if (TARGET_LINK_STACK == -1)
4218 SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic);
4219
4220 return ret;
4221 }
4222
4223 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4224 define the target cpu type. */
4225
4226 static void
4227 rs6000_option_override (void)
4228 {
4229 (void) rs6000_option_override_internal (true);
4230
4231 /* Register machine-specific passes. This needs to be done at start-up.
4232 It's convenient to do it here (like i386 does). */
4233 opt_pass *pass_analyze_swaps = make_pass_analyze_swaps (g);
4234
4235 struct register_pass_info analyze_swaps_info
4236 = { pass_analyze_swaps, "cse1", 1, PASS_POS_INSERT_BEFORE };
4237
4238 register_pass (&analyze_swaps_info);
4239 }
4240
4241 \f
4242 /* Implement targetm.vectorize.builtin_mask_for_load. */
4243 static tree
4244 rs6000_builtin_mask_for_load (void)
4245 {
4246 if (TARGET_ALTIVEC || TARGET_VSX)
4247 return altivec_builtin_mask_for_load;
4248 else
4249 return 0;
4250 }
4251
4252 /* Implement LOOP_ALIGN. */
4253 int
4254 rs6000_loop_align (rtx label)
4255 {
4256 basic_block bb;
4257 int ninsns;
4258
4259 /* Don't override loop alignment if -falign-loops was specified. */
4260 if (!can_override_loop_align)
4261 return align_loops_log;
4262
4263 bb = BLOCK_FOR_INSN (label);
4264 ninsns = num_loop_insns(bb->loop_father);
4265
4266 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4267 if (ninsns > 4 && ninsns <= 8
4268 && (rs6000_cpu == PROCESSOR_POWER4
4269 || rs6000_cpu == PROCESSOR_POWER5
4270 || rs6000_cpu == PROCESSOR_POWER6
4271 || rs6000_cpu == PROCESSOR_POWER7
4272 || rs6000_cpu == PROCESSOR_POWER8))
4273 return 5;
4274 else
4275 return align_loops_log;
4276 }
4277
4278 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
4279 static int
4280 rs6000_loop_align_max_skip (rtx_insn *label)
4281 {
4282 return (1 << rs6000_loop_align (label)) - 1;
4283 }
4284
4285 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4286 after applying N number of iterations. This routine does not determine
4287 how may iterations are required to reach desired alignment. */
4288
4289 static bool
4290 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4291 {
4292 if (is_packed)
4293 return false;
4294
4295 if (TARGET_32BIT)
4296 {
4297 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4298 return true;
4299
4300 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4301 return true;
4302
4303 return false;
4304 }
4305 else
4306 {
4307 if (TARGET_MACHO)
4308 return false;
4309
4310 /* Assuming that all other types are naturally aligned. CHECKME! */
4311 return true;
4312 }
4313 }
4314
4315 /* Return true if the vector misalignment factor is supported by the
4316 target. */
4317 static bool
4318 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4319 const_tree type,
4320 int misalignment,
4321 bool is_packed)
4322 {
4323 if (TARGET_VSX)
4324 {
4325 /* Return if movmisalign pattern is not supported for this mode. */
4326 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4327 return false;
4328
4329 if (misalignment == -1)
4330 {
4331 /* Misalignment factor is unknown at compile time but we know
4332 it's word aligned. */
4333 if (rs6000_vector_alignment_reachable (type, is_packed))
4334 {
4335 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4336
4337 if (element_size == 64 || element_size == 32)
4338 return true;
4339 }
4340
4341 return false;
4342 }
4343
4344 /* VSX supports word-aligned vector. */
4345 if (misalignment % 4 == 0)
4346 return true;
4347 }
4348 return false;
4349 }
4350
4351 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4352 static int
4353 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4354 tree vectype, int misalign)
4355 {
4356 unsigned elements;
4357 tree elem_type;
4358
4359 switch (type_of_cost)
4360 {
4361 case scalar_stmt:
4362 case scalar_load:
4363 case scalar_store:
4364 case vector_stmt:
4365 case vector_load:
4366 case vector_store:
4367 case vec_to_scalar:
4368 case scalar_to_vec:
4369 case cond_branch_not_taken:
4370 return 1;
4371
4372 case vec_perm:
4373 if (TARGET_VSX)
4374 return 3;
4375 else
4376 return 1;
4377
4378 case vec_promote_demote:
4379 if (TARGET_VSX)
4380 return 4;
4381 else
4382 return 1;
4383
4384 case cond_branch_taken:
4385 return 3;
4386
4387 case unaligned_load:
4388 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4389 {
4390 elements = TYPE_VECTOR_SUBPARTS (vectype);
4391 if (elements == 2)
4392 /* Double word aligned. */
4393 return 2;
4394
4395 if (elements == 4)
4396 {
4397 switch (misalign)
4398 {
4399 case 8:
4400 /* Double word aligned. */
4401 return 2;
4402
4403 case -1:
4404 /* Unknown misalignment. */
4405 case 4:
4406 case 12:
4407 /* Word aligned. */
4408 return 22;
4409
4410 default:
4411 gcc_unreachable ();
4412 }
4413 }
4414 }
4415
4416 if (TARGET_ALTIVEC)
4417 /* Misaligned loads are not supported. */
4418 gcc_unreachable ();
4419
4420 return 2;
4421
4422 case unaligned_store:
4423 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4424 {
4425 elements = TYPE_VECTOR_SUBPARTS (vectype);
4426 if (elements == 2)
4427 /* Double word aligned. */
4428 return 2;
4429
4430 if (elements == 4)
4431 {
4432 switch (misalign)
4433 {
4434 case 8:
4435 /* Double word aligned. */
4436 return 2;
4437
4438 case -1:
4439 /* Unknown misalignment. */
4440 case 4:
4441 case 12:
4442 /* Word aligned. */
4443 return 23;
4444
4445 default:
4446 gcc_unreachable ();
4447 }
4448 }
4449 }
4450
4451 if (TARGET_ALTIVEC)
4452 /* Misaligned stores are not supported. */
4453 gcc_unreachable ();
4454
4455 return 2;
4456
4457 case vec_construct:
4458 elements = TYPE_VECTOR_SUBPARTS (vectype);
4459 elem_type = TREE_TYPE (vectype);
4460 /* 32-bit vectors loaded into registers are stored as double
4461 precision, so we need n/2 converts in addition to the usual
4462 n/2 merges to construct a vector of short floats from them. */
4463 if (SCALAR_FLOAT_TYPE_P (elem_type)
4464 && TYPE_PRECISION (elem_type) == 32)
4465 return elements + 1;
4466 else
4467 return elements / 2 + 1;
4468
4469 default:
4470 gcc_unreachable ();
4471 }
4472 }
4473
4474 /* Implement targetm.vectorize.preferred_simd_mode. */
4475
4476 static machine_mode
4477 rs6000_preferred_simd_mode (machine_mode mode)
4478 {
4479 if (TARGET_VSX)
4480 switch (mode)
4481 {
4482 case DFmode:
4483 return V2DFmode;
4484 default:;
4485 }
4486 if (TARGET_ALTIVEC || TARGET_VSX)
4487 switch (mode)
4488 {
4489 case SFmode:
4490 return V4SFmode;
4491 case TImode:
4492 return V1TImode;
4493 case DImode:
4494 return V2DImode;
4495 case SImode:
4496 return V4SImode;
4497 case HImode:
4498 return V8HImode;
4499 case QImode:
4500 return V16QImode;
4501 default:;
4502 }
4503 if (TARGET_SPE)
4504 switch (mode)
4505 {
4506 case SFmode:
4507 return V2SFmode;
4508 case SImode:
4509 return V2SImode;
4510 default:;
4511 }
4512 if (TARGET_PAIRED_FLOAT
4513 && mode == SFmode)
4514 return V2SFmode;
4515 return word_mode;
4516 }
4517
4518 typedef struct _rs6000_cost_data
4519 {
4520 struct loop *loop_info;
4521 unsigned cost[3];
4522 } rs6000_cost_data;
4523
4524 /* Test for likely overcommitment of vector hardware resources. If a
4525 loop iteration is relatively large, and too large a percentage of
4526 instructions in the loop are vectorized, the cost model may not
4527 adequately reflect delays from unavailable vector resources.
4528 Penalize the loop body cost for this case. */
4529
4530 static void
4531 rs6000_density_test (rs6000_cost_data *data)
4532 {
4533 const int DENSITY_PCT_THRESHOLD = 85;
4534 const int DENSITY_SIZE_THRESHOLD = 70;
4535 const int DENSITY_PENALTY = 10;
4536 struct loop *loop = data->loop_info;
4537 basic_block *bbs = get_loop_body (loop);
4538 int nbbs = loop->num_nodes;
4539 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
4540 int i, density_pct;
4541
4542 for (i = 0; i < nbbs; i++)
4543 {
4544 basic_block bb = bbs[i];
4545 gimple_stmt_iterator gsi;
4546
4547 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4548 {
4549 gimple stmt = gsi_stmt (gsi);
4550 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4551
4552 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4553 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
4554 not_vec_cost++;
4555 }
4556 }
4557
4558 free (bbs);
4559 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
4560
4561 if (density_pct > DENSITY_PCT_THRESHOLD
4562 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
4563 {
4564 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
4565 if (dump_enabled_p ())
4566 dump_printf_loc (MSG_NOTE, vect_location,
4567 "density %d%%, cost %d exceeds threshold, penalizing "
4568 "loop body cost by %d%%", density_pct,
4569 vec_cost + not_vec_cost, DENSITY_PENALTY);
4570 }
4571 }
4572
4573 /* Implement targetm.vectorize.init_cost. */
4574
4575 static void *
4576 rs6000_init_cost (struct loop *loop_info)
4577 {
4578 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
4579 data->loop_info = loop_info;
4580 data->cost[vect_prologue] = 0;
4581 data->cost[vect_body] = 0;
4582 data->cost[vect_epilogue] = 0;
4583 return data;
4584 }
4585
4586 /* Implement targetm.vectorize.add_stmt_cost. */
4587
4588 static unsigned
4589 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4590 struct _stmt_vec_info *stmt_info, int misalign,
4591 enum vect_cost_model_location where)
4592 {
4593 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
4594 unsigned retval = 0;
4595
4596 if (flag_vect_cost_model)
4597 {
4598 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4599 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
4600 misalign);
4601 /* Statements in an inner loop relative to the loop being
4602 vectorized are weighted more heavily. The value here is
4603 arbitrary and could potentially be improved with analysis. */
4604 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4605 count *= 50; /* FIXME. */
4606
4607 retval = (unsigned) (count * stmt_cost);
4608 cost_data->cost[where] += retval;
4609 }
4610
4611 return retval;
4612 }
4613
4614 /* Implement targetm.vectorize.finish_cost. */
4615
4616 static void
4617 rs6000_finish_cost (void *data, unsigned *prologue_cost,
4618 unsigned *body_cost, unsigned *epilogue_cost)
4619 {
4620 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
4621
4622 if (cost_data->loop_info)
4623 rs6000_density_test (cost_data);
4624
4625 *prologue_cost = cost_data->cost[vect_prologue];
4626 *body_cost = cost_data->cost[vect_body];
4627 *epilogue_cost = cost_data->cost[vect_epilogue];
4628 }
4629
4630 /* Implement targetm.vectorize.destroy_cost_data. */
4631
4632 static void
4633 rs6000_destroy_cost_data (void *data)
4634 {
4635 free (data);
4636 }
4637
4638 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
4639 library with vectorized intrinsics. */
4640
4641 static tree
4642 rs6000_builtin_vectorized_libmass (tree fndecl, tree type_out, tree type_in)
4643 {
4644 char name[32];
4645 const char *suffix = NULL;
4646 tree fntype, new_fndecl, bdecl = NULL_TREE;
4647 int n_args = 1;
4648 const char *bname;
4649 machine_mode el_mode, in_mode;
4650 int n, in_n;
4651
4652 /* Libmass is suitable for unsafe math only as it does not correctly support
4653 parts of IEEE with the required precision such as denormals. Only support
4654 it if we have VSX to use the simd d2 or f4 functions.
4655 XXX: Add variable length support. */
4656 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
4657 return NULL_TREE;
4658
4659 el_mode = TYPE_MODE (TREE_TYPE (type_out));
4660 n = TYPE_VECTOR_SUBPARTS (type_out);
4661 in_mode = TYPE_MODE (TREE_TYPE (type_in));
4662 in_n = TYPE_VECTOR_SUBPARTS (type_in);
4663 if (el_mode != in_mode
4664 || n != in_n)
4665 return NULL_TREE;
4666
4667 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
4668 {
4669 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
4670 switch (fn)
4671 {
4672 case BUILT_IN_ATAN2:
4673 case BUILT_IN_HYPOT:
4674 case BUILT_IN_POW:
4675 n_args = 2;
4676 /* fall through */
4677
4678 case BUILT_IN_ACOS:
4679 case BUILT_IN_ACOSH:
4680 case BUILT_IN_ASIN:
4681 case BUILT_IN_ASINH:
4682 case BUILT_IN_ATAN:
4683 case BUILT_IN_ATANH:
4684 case BUILT_IN_CBRT:
4685 case BUILT_IN_COS:
4686 case BUILT_IN_COSH:
4687 case BUILT_IN_ERF:
4688 case BUILT_IN_ERFC:
4689 case BUILT_IN_EXP2:
4690 case BUILT_IN_EXP:
4691 case BUILT_IN_EXPM1:
4692 case BUILT_IN_LGAMMA:
4693 case BUILT_IN_LOG10:
4694 case BUILT_IN_LOG1P:
4695 case BUILT_IN_LOG2:
4696 case BUILT_IN_LOG:
4697 case BUILT_IN_SIN:
4698 case BUILT_IN_SINH:
4699 case BUILT_IN_SQRT:
4700 case BUILT_IN_TAN:
4701 case BUILT_IN_TANH:
4702 bdecl = builtin_decl_implicit (fn);
4703 suffix = "d2"; /* pow -> powd2 */
4704 if (el_mode != DFmode
4705 || n != 2
4706 || !bdecl)
4707 return NULL_TREE;
4708 break;
4709
4710 case BUILT_IN_ATAN2F:
4711 case BUILT_IN_HYPOTF:
4712 case BUILT_IN_POWF:
4713 n_args = 2;
4714 /* fall through */
4715
4716 case BUILT_IN_ACOSF:
4717 case BUILT_IN_ACOSHF:
4718 case BUILT_IN_ASINF:
4719 case BUILT_IN_ASINHF:
4720 case BUILT_IN_ATANF:
4721 case BUILT_IN_ATANHF:
4722 case BUILT_IN_CBRTF:
4723 case BUILT_IN_COSF:
4724 case BUILT_IN_COSHF:
4725 case BUILT_IN_ERFF:
4726 case BUILT_IN_ERFCF:
4727 case BUILT_IN_EXP2F:
4728 case BUILT_IN_EXPF:
4729 case BUILT_IN_EXPM1F:
4730 case BUILT_IN_LGAMMAF:
4731 case BUILT_IN_LOG10F:
4732 case BUILT_IN_LOG1PF:
4733 case BUILT_IN_LOG2F:
4734 case BUILT_IN_LOGF:
4735 case BUILT_IN_SINF:
4736 case BUILT_IN_SINHF:
4737 case BUILT_IN_SQRTF:
4738 case BUILT_IN_TANF:
4739 case BUILT_IN_TANHF:
4740 bdecl = builtin_decl_implicit (fn);
4741 suffix = "4"; /* powf -> powf4 */
4742 if (el_mode != SFmode
4743 || n != 4
4744 || !bdecl)
4745 return NULL_TREE;
4746 break;
4747
4748 default:
4749 return NULL_TREE;
4750 }
4751 }
4752 else
4753 return NULL_TREE;
4754
4755 gcc_assert (suffix != NULL);
4756 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
4757 if (!bname)
4758 return NULL_TREE;
4759
4760 strcpy (name, bname + sizeof ("__builtin_") - 1);
4761 strcat (name, suffix);
4762
4763 if (n_args == 1)
4764 fntype = build_function_type_list (type_out, type_in, NULL);
4765 else if (n_args == 2)
4766 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
4767 else
4768 gcc_unreachable ();
4769
4770 /* Build a function declaration for the vectorized function. */
4771 new_fndecl = build_decl (BUILTINS_LOCATION,
4772 FUNCTION_DECL, get_identifier (name), fntype);
4773 TREE_PUBLIC (new_fndecl) = 1;
4774 DECL_EXTERNAL (new_fndecl) = 1;
4775 DECL_IS_NOVOPS (new_fndecl) = 1;
4776 TREE_READONLY (new_fndecl) = 1;
4777
4778 return new_fndecl;
4779 }
4780
4781 /* Returns a function decl for a vectorized version of the builtin function
4782 with builtin function code FN and the result vector type TYPE, or NULL_TREE
4783 if it is not available. */
4784
4785 static tree
4786 rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
4787 tree type_in)
4788 {
4789 machine_mode in_mode, out_mode;
4790 int in_n, out_n;
4791
4792 if (TARGET_DEBUG_BUILTIN)
4793 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
4794 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
4795 GET_MODE_NAME (TYPE_MODE (type_out)),
4796 GET_MODE_NAME (TYPE_MODE (type_in)));
4797
4798 if (TREE_CODE (type_out) != VECTOR_TYPE
4799 || TREE_CODE (type_in) != VECTOR_TYPE
4800 || !TARGET_VECTORIZE_BUILTINS)
4801 return NULL_TREE;
4802
4803 out_mode = TYPE_MODE (TREE_TYPE (type_out));
4804 out_n = TYPE_VECTOR_SUBPARTS (type_out);
4805 in_mode = TYPE_MODE (TREE_TYPE (type_in));
4806 in_n = TYPE_VECTOR_SUBPARTS (type_in);
4807
4808 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
4809 {
4810 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
4811 switch (fn)
4812 {
4813 case BUILT_IN_CLZIMAX:
4814 case BUILT_IN_CLZLL:
4815 case BUILT_IN_CLZL:
4816 case BUILT_IN_CLZ:
4817 if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
4818 {
4819 if (out_mode == QImode && out_n == 16)
4820 return rs6000_builtin_decls[P8V_BUILTIN_VCLZB];
4821 else if (out_mode == HImode && out_n == 8)
4822 return rs6000_builtin_decls[P8V_BUILTIN_VCLZH];
4823 else if (out_mode == SImode && out_n == 4)
4824 return rs6000_builtin_decls[P8V_BUILTIN_VCLZW];
4825 else if (out_mode == DImode && out_n == 2)
4826 return rs6000_builtin_decls[P8V_BUILTIN_VCLZD];
4827 }
4828 break;
4829 case BUILT_IN_COPYSIGN:
4830 if (VECTOR_UNIT_VSX_P (V2DFmode)
4831 && out_mode == DFmode && out_n == 2
4832 && in_mode == DFmode && in_n == 2)
4833 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
4834 break;
4835 case BUILT_IN_COPYSIGNF:
4836 if (out_mode != SFmode || out_n != 4
4837 || in_mode != SFmode || in_n != 4)
4838 break;
4839 if (VECTOR_UNIT_VSX_P (V4SFmode))
4840 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
4841 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4842 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
4843 break;
4844 case BUILT_IN_POPCOUNTIMAX:
4845 case BUILT_IN_POPCOUNTLL:
4846 case BUILT_IN_POPCOUNTL:
4847 case BUILT_IN_POPCOUNT:
4848 if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
4849 {
4850 if (out_mode == QImode && out_n == 16)
4851 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTB];
4852 else if (out_mode == HImode && out_n == 8)
4853 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTH];
4854 else if (out_mode == SImode && out_n == 4)
4855 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTW];
4856 else if (out_mode == DImode && out_n == 2)
4857 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTD];
4858 }
4859 break;
4860 case BUILT_IN_SQRT:
4861 if (VECTOR_UNIT_VSX_P (V2DFmode)
4862 && out_mode == DFmode && out_n == 2
4863 && in_mode == DFmode && in_n == 2)
4864 return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTDP];
4865 break;
4866 case BUILT_IN_SQRTF:
4867 if (VECTOR_UNIT_VSX_P (V4SFmode)
4868 && out_mode == SFmode && out_n == 4
4869 && in_mode == SFmode && in_n == 4)
4870 return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTSP];
4871 break;
4872 case BUILT_IN_CEIL:
4873 if (VECTOR_UNIT_VSX_P (V2DFmode)
4874 && out_mode == DFmode && out_n == 2
4875 && in_mode == DFmode && in_n == 2)
4876 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
4877 break;
4878 case BUILT_IN_CEILF:
4879 if (out_mode != SFmode || out_n != 4
4880 || in_mode != SFmode || in_n != 4)
4881 break;
4882 if (VECTOR_UNIT_VSX_P (V4SFmode))
4883 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
4884 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4885 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
4886 break;
4887 case BUILT_IN_FLOOR:
4888 if (VECTOR_UNIT_VSX_P (V2DFmode)
4889 && out_mode == DFmode && out_n == 2
4890 && in_mode == DFmode && in_n == 2)
4891 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
4892 break;
4893 case BUILT_IN_FLOORF:
4894 if (out_mode != SFmode || out_n != 4
4895 || in_mode != SFmode || in_n != 4)
4896 break;
4897 if (VECTOR_UNIT_VSX_P (V4SFmode))
4898 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
4899 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4900 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
4901 break;
4902 case BUILT_IN_FMA:
4903 if (VECTOR_UNIT_VSX_P (V2DFmode)
4904 && out_mode == DFmode && out_n == 2
4905 && in_mode == DFmode && in_n == 2)
4906 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
4907 break;
4908 case BUILT_IN_FMAF:
4909 if (VECTOR_UNIT_VSX_P (V4SFmode)
4910 && out_mode == SFmode && out_n == 4
4911 && in_mode == SFmode && in_n == 4)
4912 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
4913 else if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
4914 && out_mode == SFmode && out_n == 4
4915 && in_mode == SFmode && in_n == 4)
4916 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
4917 break;
4918 case BUILT_IN_TRUNC:
4919 if (VECTOR_UNIT_VSX_P (V2DFmode)
4920 && out_mode == DFmode && out_n == 2
4921 && in_mode == DFmode && in_n == 2)
4922 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
4923 break;
4924 case BUILT_IN_TRUNCF:
4925 if (out_mode != SFmode || out_n != 4
4926 || in_mode != SFmode || in_n != 4)
4927 break;
4928 if (VECTOR_UNIT_VSX_P (V4SFmode))
4929 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
4930 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4931 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
4932 break;
4933 case BUILT_IN_NEARBYINT:
4934 if (VECTOR_UNIT_VSX_P (V2DFmode)
4935 && flag_unsafe_math_optimizations
4936 && out_mode == DFmode && out_n == 2
4937 && in_mode == DFmode && in_n == 2)
4938 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
4939 break;
4940 case BUILT_IN_NEARBYINTF:
4941 if (VECTOR_UNIT_VSX_P (V4SFmode)
4942 && flag_unsafe_math_optimizations
4943 && out_mode == SFmode && out_n == 4
4944 && in_mode == SFmode && in_n == 4)
4945 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
4946 break;
4947 case BUILT_IN_RINT:
4948 if (VECTOR_UNIT_VSX_P (V2DFmode)
4949 && !flag_trapping_math
4950 && out_mode == DFmode && out_n == 2
4951 && in_mode == DFmode && in_n == 2)
4952 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
4953 break;
4954 case BUILT_IN_RINTF:
4955 if (VECTOR_UNIT_VSX_P (V4SFmode)
4956 && !flag_trapping_math
4957 && out_mode == SFmode && out_n == 4
4958 && in_mode == SFmode && in_n == 4)
4959 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
4960 break;
4961 default:
4962 break;
4963 }
4964 }
4965
4966 else if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
4967 {
4968 enum rs6000_builtins fn
4969 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
4970 switch (fn)
4971 {
4972 case RS6000_BUILTIN_RSQRTF:
4973 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
4974 && out_mode == SFmode && out_n == 4
4975 && in_mode == SFmode && in_n == 4)
4976 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
4977 break;
4978 case RS6000_BUILTIN_RSQRT:
4979 if (VECTOR_UNIT_VSX_P (V2DFmode)
4980 && out_mode == DFmode && out_n == 2
4981 && in_mode == DFmode && in_n == 2)
4982 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
4983 break;
4984 case RS6000_BUILTIN_RECIPF:
4985 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
4986 && out_mode == SFmode && out_n == 4
4987 && in_mode == SFmode && in_n == 4)
4988 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
4989 break;
4990 case RS6000_BUILTIN_RECIP:
4991 if (VECTOR_UNIT_VSX_P (V2DFmode)
4992 && out_mode == DFmode && out_n == 2
4993 && in_mode == DFmode && in_n == 2)
4994 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
4995 break;
4996 default:
4997 break;
4998 }
4999 }
5000
5001 /* Generate calls to libmass if appropriate. */
5002 if (rs6000_veclib_handler)
5003 return rs6000_veclib_handler (fndecl, type_out, type_in);
5004
5005 return NULL_TREE;
5006 }
5007 \f
5008 /* Default CPU string for rs6000*_file_start functions. */
5009 static const char *rs6000_default_cpu;
5010
5011 /* Do anything needed at the start of the asm file. */
5012
5013 static void
5014 rs6000_file_start (void)
5015 {
5016 char buffer[80];
5017 const char *start = buffer;
5018 FILE *file = asm_out_file;
5019
5020 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5021
5022 default_file_start ();
5023
5024 if (flag_verbose_asm)
5025 {
5026 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5027
5028 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5029 {
5030 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5031 start = "";
5032 }
5033
5034 if (global_options_set.x_rs6000_cpu_index)
5035 {
5036 fprintf (file, "%s -mcpu=%s", start,
5037 processor_target_table[rs6000_cpu_index].name);
5038 start = "";
5039 }
5040
5041 if (global_options_set.x_rs6000_tune_index)
5042 {
5043 fprintf (file, "%s -mtune=%s", start,
5044 processor_target_table[rs6000_tune_index].name);
5045 start = "";
5046 }
5047
5048 if (PPC405_ERRATUM77)
5049 {
5050 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5051 start = "";
5052 }
5053
5054 #ifdef USING_ELFOS_H
5055 switch (rs6000_sdata)
5056 {
5057 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5058 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5059 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5060 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5061 }
5062
5063 if (rs6000_sdata && g_switch_value)
5064 {
5065 fprintf (file, "%s -G %d", start,
5066 g_switch_value);
5067 start = "";
5068 }
5069 #endif
5070
5071 if (*start == '\0')
5072 putc ('\n', file);
5073 }
5074
5075 if (DEFAULT_ABI == ABI_ELFv2)
5076 fprintf (file, "\t.abiversion 2\n");
5077
5078 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2
5079 || (TARGET_ELF && flag_pic == 2))
5080 {
5081 switch_to_section (toc_section);
5082 switch_to_section (text_section);
5083 }
5084 }
5085
5086 \f
5087 /* Return nonzero if this function is known to have a null epilogue. */
5088
5089 int
5090 direct_return (void)
5091 {
5092 if (reload_completed)
5093 {
5094 rs6000_stack_t *info = rs6000_stack_info ();
5095
5096 if (info->first_gp_reg_save == 32
5097 && info->first_fp_reg_save == 64
5098 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5099 && ! info->lr_save_p
5100 && ! info->cr_save_p
5101 && info->vrsave_mask == 0
5102 && ! info->push_p)
5103 return 1;
5104 }
5105
5106 return 0;
5107 }
5108
5109 /* Return the number of instructions it takes to form a constant in an
5110 integer register. */
5111
5112 int
5113 num_insns_constant_wide (HOST_WIDE_INT value)
5114 {
5115 /* signed constant loadable with addi */
5116 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
5117 return 1;
5118
5119 /* constant loadable with addis */
5120 else if ((value & 0xffff) == 0
5121 && (value >> 31 == -1 || value >> 31 == 0))
5122 return 1;
5123
5124 else if (TARGET_POWERPC64)
5125 {
5126 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5127 HOST_WIDE_INT high = value >> 31;
5128
5129 if (high == 0 || high == -1)
5130 return 2;
5131
5132 high >>= 1;
5133
5134 if (low == 0)
5135 return num_insns_constant_wide (high) + 1;
5136 else if (high == 0)
5137 return num_insns_constant_wide (low) + 1;
5138 else
5139 return (num_insns_constant_wide (high)
5140 + num_insns_constant_wide (low) + 1);
5141 }
5142
5143 else
5144 return 2;
5145 }
5146
5147 int
5148 num_insns_constant (rtx op, machine_mode mode)
5149 {
5150 HOST_WIDE_INT low, high;
5151
5152 switch (GET_CODE (op))
5153 {
5154 case CONST_INT:
5155 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
5156 && mask64_operand (op, mode))
5157 return 2;
5158 else
5159 return num_insns_constant_wide (INTVAL (op));
5160
5161 case CONST_WIDE_INT:
5162 {
5163 int i;
5164 int ins = CONST_WIDE_INT_NUNITS (op) - 1;
5165 for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5166 ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
5167 return ins;
5168 }
5169
5170 case CONST_DOUBLE:
5171 if (mode == SFmode || mode == SDmode)
5172 {
5173 long l;
5174 REAL_VALUE_TYPE rv;
5175
5176 REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
5177 if (DECIMAL_FLOAT_MODE_P (mode))
5178 REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
5179 else
5180 REAL_VALUE_TO_TARGET_SINGLE (rv, l);
5181 return num_insns_constant_wide ((HOST_WIDE_INT) l);
5182 }
5183
5184 long l[2];
5185 REAL_VALUE_TYPE rv;
5186
5187 REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
5188 if (DECIMAL_FLOAT_MODE_P (mode))
5189 REAL_VALUE_TO_TARGET_DECIMAL64 (rv, l);
5190 else
5191 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
5192 high = l[WORDS_BIG_ENDIAN == 0];
5193 low = l[WORDS_BIG_ENDIAN != 0];
5194
5195 if (TARGET_32BIT)
5196 return (num_insns_constant_wide (low)
5197 + num_insns_constant_wide (high));
5198 else
5199 {
5200 if ((high == 0 && low >= 0)
5201 || (high == -1 && low < 0))
5202 return num_insns_constant_wide (low);
5203
5204 else if (mask64_operand (op, mode))
5205 return 2;
5206
5207 else if (low == 0)
5208 return num_insns_constant_wide (high) + 1;
5209
5210 else
5211 return (num_insns_constant_wide (high)
5212 + num_insns_constant_wide (low) + 1);
5213 }
5214
5215 default:
5216 gcc_unreachable ();
5217 }
5218 }
5219
5220 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5221 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5222 corresponding element of the vector, but for V4SFmode and V2SFmode,
5223 the corresponding "float" is interpreted as an SImode integer. */
5224
5225 HOST_WIDE_INT
5226 const_vector_elt_as_int (rtx op, unsigned int elt)
5227 {
5228 rtx tmp;
5229
5230 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5231 gcc_assert (GET_MODE (op) != V2DImode
5232 && GET_MODE (op) != V2DFmode);
5233
5234 tmp = CONST_VECTOR_ELT (op, elt);
5235 if (GET_MODE (op) == V4SFmode
5236 || GET_MODE (op) == V2SFmode)
5237 tmp = gen_lowpart (SImode, tmp);
5238 return INTVAL (tmp);
5239 }
5240
5241 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5242 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5243 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5244 all items are set to the same value and contain COPIES replicas of the
5245 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5246 operand and the others are set to the value of the operand's msb. */
5247
5248 static bool
5249 vspltis_constant (rtx op, unsigned step, unsigned copies)
5250 {
5251 machine_mode mode = GET_MODE (op);
5252 machine_mode inner = GET_MODE_INNER (mode);
5253
5254 unsigned i;
5255 unsigned nunits;
5256 unsigned bitsize;
5257 unsigned mask;
5258
5259 HOST_WIDE_INT val;
5260 HOST_WIDE_INT splat_val;
5261 HOST_WIDE_INT msb_val;
5262
5263 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
5264 return false;
5265
5266 nunits = GET_MODE_NUNITS (mode);
5267 bitsize = GET_MODE_BITSIZE (inner);
5268 mask = GET_MODE_MASK (inner);
5269
5270 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5271 splat_val = val;
5272 msb_val = val >= 0 ? 0 : -1;
5273
5274 /* Construct the value to be splatted, if possible. If not, return 0. */
5275 for (i = 2; i <= copies; i *= 2)
5276 {
5277 HOST_WIDE_INT small_val;
5278 bitsize /= 2;
5279 small_val = splat_val >> bitsize;
5280 mask >>= bitsize;
5281 if (splat_val != ((small_val << bitsize) | (small_val & mask)))
5282 return false;
5283 splat_val = small_val;
5284 }
5285
5286 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
5287 if (EASY_VECTOR_15 (splat_val))
5288 ;
5289
5290 /* Also check if we can splat, and then add the result to itself. Do so if
5291 the value is positive, of if the splat instruction is using OP's mode;
5292 for splat_val < 0, the splat and the add should use the same mode. */
5293 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
5294 && (splat_val >= 0 || (step == 1 && copies == 1)))
5295 ;
5296
5297 /* Also check if are loading up the most significant bit which can be done by
5298 loading up -1 and shifting the value left by -1. */
5299 else if (EASY_VECTOR_MSB (splat_val, inner))
5300 ;
5301
5302 else
5303 return false;
5304
5305 /* Check if VAL is present in every STEP-th element, and the
5306 other elements are filled with its most significant bit. */
5307 for (i = 1; i < nunits; ++i)
5308 {
5309 HOST_WIDE_INT desired_val;
5310 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
5311 if ((i & (step - 1)) == 0)
5312 desired_val = val;
5313 else
5314 desired_val = msb_val;
5315
5316 if (desired_val != const_vector_elt_as_int (op, elt))
5317 return false;
5318 }
5319
5320 return true;
5321 }
5322
5323
5324 /* Return true if OP is of the given MODE and can be synthesized
5325 with a vspltisb, vspltish or vspltisw. */
5326
5327 bool
5328 easy_altivec_constant (rtx op, machine_mode mode)
5329 {
5330 unsigned step, copies;
5331
5332 if (mode == VOIDmode)
5333 mode = GET_MODE (op);
5334 else if (mode != GET_MODE (op))
5335 return false;
5336
5337 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
5338 constants. */
5339 if (mode == V2DFmode)
5340 return zero_constant (op, mode);
5341
5342 else if (mode == V2DImode)
5343 {
5344 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
5345 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
5346 return false;
5347
5348 if (zero_constant (op, mode))
5349 return true;
5350
5351 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
5352 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
5353 return true;
5354
5355 return false;
5356 }
5357
5358 /* V1TImode is a special container for TImode. Ignore for now. */
5359 else if (mode == V1TImode)
5360 return false;
5361
5362 /* Start with a vspltisw. */
5363 step = GET_MODE_NUNITS (mode) / 4;
5364 copies = 1;
5365
5366 if (vspltis_constant (op, step, copies))
5367 return true;
5368
5369 /* Then try with a vspltish. */
5370 if (step == 1)
5371 copies <<= 1;
5372 else
5373 step >>= 1;
5374
5375 if (vspltis_constant (op, step, copies))
5376 return true;
5377
5378 /* And finally a vspltisb. */
5379 if (step == 1)
5380 copies <<= 1;
5381 else
5382 step >>= 1;
5383
5384 if (vspltis_constant (op, step, copies))
5385 return true;
5386
5387 return false;
5388 }
5389
5390 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
5391 result is OP. Abort if it is not possible. */
5392
5393 rtx
5394 gen_easy_altivec_constant (rtx op)
5395 {
5396 machine_mode mode = GET_MODE (op);
5397 int nunits = GET_MODE_NUNITS (mode);
5398 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5399 unsigned step = nunits / 4;
5400 unsigned copies = 1;
5401
5402 /* Start with a vspltisw. */
5403 if (vspltis_constant (op, step, copies))
5404 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
5405
5406 /* Then try with a vspltish. */
5407 if (step == 1)
5408 copies <<= 1;
5409 else
5410 step >>= 1;
5411
5412 if (vspltis_constant (op, step, copies))
5413 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
5414
5415 /* And finally a vspltisb. */
5416 if (step == 1)
5417 copies <<= 1;
5418 else
5419 step >>= 1;
5420
5421 if (vspltis_constant (op, step, copies))
5422 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
5423
5424 gcc_unreachable ();
5425 }
5426
5427 const char *
5428 output_vec_const_move (rtx *operands)
5429 {
5430 int cst, cst2;
5431 machine_mode mode;
5432 rtx dest, vec;
5433
5434 dest = operands[0];
5435 vec = operands[1];
5436 mode = GET_MODE (dest);
5437
5438 if (TARGET_VSX)
5439 {
5440 if (zero_constant (vec, mode))
5441 return "xxlxor %x0,%x0,%x0";
5442
5443 if ((mode == V2DImode || mode == V1TImode)
5444 && INTVAL (CONST_VECTOR_ELT (vec, 0)) == -1
5445 && INTVAL (CONST_VECTOR_ELT (vec, 1)) == -1)
5446 return "vspltisw %0,-1";
5447 }
5448
5449 if (TARGET_ALTIVEC)
5450 {
5451 rtx splat_vec;
5452 if (zero_constant (vec, mode))
5453 return "vxor %0,%0,%0";
5454
5455 splat_vec = gen_easy_altivec_constant (vec);
5456 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
5457 operands[1] = XEXP (splat_vec, 0);
5458 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
5459 return "#";
5460
5461 switch (GET_MODE (splat_vec))
5462 {
5463 case V4SImode:
5464 return "vspltisw %0,%1";
5465
5466 case V8HImode:
5467 return "vspltish %0,%1";
5468
5469 case V16QImode:
5470 return "vspltisb %0,%1";
5471
5472 default:
5473 gcc_unreachable ();
5474 }
5475 }
5476
5477 gcc_assert (TARGET_SPE);
5478
5479 /* Vector constant 0 is handled as a splitter of V2SI, and in the
5480 pattern of V1DI, V4HI, and V2SF.
5481
5482 FIXME: We should probably return # and add post reload
5483 splitters for these, but this way is so easy ;-). */
5484 cst = INTVAL (CONST_VECTOR_ELT (vec, 0));
5485 cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1));
5486 operands[1] = CONST_VECTOR_ELT (vec, 0);
5487 operands[2] = CONST_VECTOR_ELT (vec, 1);
5488 if (cst == cst2)
5489 return "li %0,%1\n\tevmergelo %0,%0,%0";
5490 else if (WORDS_BIG_ENDIAN)
5491 return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
5492 else
5493 return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1";
5494 }
5495
5496 /* Initialize TARGET of vector PAIRED to VALS. */
5497
5498 void
5499 paired_expand_vector_init (rtx target, rtx vals)
5500 {
5501 machine_mode mode = GET_MODE (target);
5502 int n_elts = GET_MODE_NUNITS (mode);
5503 int n_var = 0;
5504 rtx x, new_rtx, tmp, constant_op, op1, op2;
5505 int i;
5506
5507 for (i = 0; i < n_elts; ++i)
5508 {
5509 x = XVECEXP (vals, 0, i);
5510 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
5511 ++n_var;
5512 }
5513 if (n_var == 0)
5514 {
5515 /* Load from constant pool. */
5516 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5517 return;
5518 }
5519
5520 if (n_var == 2)
5521 {
5522 /* The vector is initialized only with non-constants. */
5523 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
5524 XVECEXP (vals, 0, 1));
5525
5526 emit_move_insn (target, new_rtx);
5527 return;
5528 }
5529
5530 /* One field is non-constant and the other one is a constant. Load the
5531 constant from the constant pool and use ps_merge instruction to
5532 construct the whole vector. */
5533 op1 = XVECEXP (vals, 0, 0);
5534 op2 = XVECEXP (vals, 0, 1);
5535
5536 constant_op = (CONSTANT_P (op1)) ? op1 : op2;
5537
5538 tmp = gen_reg_rtx (GET_MODE (constant_op));
5539 emit_move_insn (tmp, constant_op);
5540
5541 if (CONSTANT_P (op1))
5542 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
5543 else
5544 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
5545
5546 emit_move_insn (target, new_rtx);
5547 }
5548
5549 void
5550 paired_expand_vector_move (rtx operands[])
5551 {
5552 rtx op0 = operands[0], op1 = operands[1];
5553
5554 emit_move_insn (op0, op1);
5555 }
5556
5557 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
5558 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
5559 operands for the relation operation COND. This is a recursive
5560 function. */
5561
5562 static void
5563 paired_emit_vector_compare (enum rtx_code rcode,
5564 rtx dest, rtx op0, rtx op1,
5565 rtx cc_op0, rtx cc_op1)
5566 {
5567 rtx tmp = gen_reg_rtx (V2SFmode);
5568 rtx tmp1, max, min;
5569
5570 gcc_assert (TARGET_PAIRED_FLOAT);
5571 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5572
5573 switch (rcode)
5574 {
5575 case LT:
5576 case LTU:
5577 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
5578 return;
5579 case GE:
5580 case GEU:
5581 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
5582 emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
5583 return;
5584 case LE:
5585 case LEU:
5586 paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
5587 return;
5588 case GT:
5589 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
5590 return;
5591 case EQ:
5592 tmp1 = gen_reg_rtx (V2SFmode);
5593 max = gen_reg_rtx (V2SFmode);
5594 min = gen_reg_rtx (V2SFmode);
5595 gen_reg_rtx (V2SFmode);
5596
5597 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
5598 emit_insn (gen_selv2sf4
5599 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
5600 emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
5601 emit_insn (gen_selv2sf4
5602 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
5603 emit_insn (gen_subv2sf3 (tmp1, min, max));
5604 emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
5605 return;
5606 case NE:
5607 paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
5608 return;
5609 case UNLE:
5610 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
5611 return;
5612 case UNLT:
5613 paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
5614 return;
5615 case UNGE:
5616 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
5617 return;
5618 case UNGT:
5619 paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
5620 return;
5621 default:
5622 gcc_unreachable ();
5623 }
5624
5625 return;
5626 }
5627
5628 /* Emit vector conditional expression.
5629 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5630 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5631
5632 int
5633 paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5634 rtx cond, rtx cc_op0, rtx cc_op1)
5635 {
5636 enum rtx_code rcode = GET_CODE (cond);
5637
5638 if (!TARGET_PAIRED_FLOAT)
5639 return 0;
5640
5641 paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
5642
5643 return 1;
5644 }
5645
5646 /* Initialize vector TARGET to VALS. */
5647
5648 void
5649 rs6000_expand_vector_init (rtx target, rtx vals)
5650 {
5651 machine_mode mode = GET_MODE (target);
5652 machine_mode inner_mode = GET_MODE_INNER (mode);
5653 int n_elts = GET_MODE_NUNITS (mode);
5654 int n_var = 0, one_var = -1;
5655 bool all_same = true, all_const_zero = true;
5656 rtx x, mem;
5657 int i;
5658
5659 for (i = 0; i < n_elts; ++i)
5660 {
5661 x = XVECEXP (vals, 0, i);
5662 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
5663 ++n_var, one_var = i;
5664 else if (x != CONST0_RTX (inner_mode))
5665 all_const_zero = false;
5666
5667 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
5668 all_same = false;
5669 }
5670
5671 if (n_var == 0)
5672 {
5673 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
5674 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
5675 if ((int_vector_p || TARGET_VSX) && all_const_zero)
5676 {
5677 /* Zero register. */
5678 emit_insn (gen_rtx_SET (VOIDmode, target,
5679 gen_rtx_XOR (mode, target, target)));
5680 return;
5681 }
5682 else if (int_vector_p && easy_vector_constant (const_vec, mode))
5683 {
5684 /* Splat immediate. */
5685 emit_insn (gen_rtx_SET (VOIDmode, target, const_vec));
5686 return;
5687 }
5688 else
5689 {
5690 /* Load from constant pool. */
5691 emit_move_insn (target, const_vec);
5692 return;
5693 }
5694 }
5695
5696 /* Double word values on VSX can use xxpermdi or lxvdsx. */
5697 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
5698 {
5699 rtx op0 = XVECEXP (vals, 0, 0);
5700 rtx op1 = XVECEXP (vals, 0, 1);
5701 if (all_same)
5702 {
5703 if (!MEM_P (op0) && !REG_P (op0))
5704 op0 = force_reg (inner_mode, op0);
5705 if (mode == V2DFmode)
5706 emit_insn (gen_vsx_splat_v2df (target, op0));
5707 else
5708 emit_insn (gen_vsx_splat_v2di (target, op0));
5709 }
5710 else
5711 {
5712 op0 = force_reg (inner_mode, op0);
5713 op1 = force_reg (inner_mode, op1);
5714 if (mode == V2DFmode)
5715 emit_insn (gen_vsx_concat_v2df (target, op0, op1));
5716 else
5717 emit_insn (gen_vsx_concat_v2di (target, op0, op1));
5718 }
5719 return;
5720 }
5721
5722 /* With single precision floating point on VSX, know that internally single
5723 precision is actually represented as a double, and either make 2 V2DF
5724 vectors, and convert these vectors to single precision, or do one
5725 conversion, and splat the result to the other elements. */
5726 if (mode == V4SFmode && VECTOR_MEM_VSX_P (mode))
5727 {
5728 if (all_same)
5729 {
5730 rtx freg = gen_reg_rtx (V4SFmode);
5731 rtx sreg = force_reg (SFmode, XVECEXP (vals, 0, 0));
5732 rtx cvt = ((TARGET_XSCVDPSPN)
5733 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
5734 : gen_vsx_xscvdpsp_scalar (freg, sreg));
5735
5736 emit_insn (cvt);
5737 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg, const0_rtx));
5738 }
5739 else
5740 {
5741 rtx dbl_even = gen_reg_rtx (V2DFmode);
5742 rtx dbl_odd = gen_reg_rtx (V2DFmode);
5743 rtx flt_even = gen_reg_rtx (V4SFmode);
5744 rtx flt_odd = gen_reg_rtx (V4SFmode);
5745 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
5746 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
5747 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
5748 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
5749
5750 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
5751 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
5752 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
5753 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
5754 rs6000_expand_extract_even (target, flt_even, flt_odd);
5755 }
5756 return;
5757 }
5758
5759 /* Store value to stack temp. Load vector element. Splat. However, splat
5760 of 64-bit items is not supported on Altivec. */
5761 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
5762 {
5763 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
5764 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
5765 XVECEXP (vals, 0, 0));
5766 x = gen_rtx_UNSPEC (VOIDmode,
5767 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
5768 emit_insn (gen_rtx_PARALLEL (VOIDmode,
5769 gen_rtvec (2,
5770 gen_rtx_SET (VOIDmode,
5771 target, mem),
5772 x)));
5773 x = gen_rtx_VEC_SELECT (inner_mode, target,
5774 gen_rtx_PARALLEL (VOIDmode,
5775 gen_rtvec (1, const0_rtx)));
5776 emit_insn (gen_rtx_SET (VOIDmode, target,
5777 gen_rtx_VEC_DUPLICATE (mode, x)));
5778 return;
5779 }
5780
5781 /* One field is non-constant. Load constant then overwrite
5782 varying field. */
5783 if (n_var == 1)
5784 {
5785 rtx copy = copy_rtx (vals);
5786
5787 /* Load constant part of vector, substitute neighboring value for
5788 varying element. */
5789 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
5790 rs6000_expand_vector_init (target, copy);
5791
5792 /* Insert variable. */
5793 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
5794 return;
5795 }
5796
5797 /* Construct the vector in memory one field at a time
5798 and load the whole vector. */
5799 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
5800 for (i = 0; i < n_elts; i++)
5801 emit_move_insn (adjust_address_nv (mem, inner_mode,
5802 i * GET_MODE_SIZE (inner_mode)),
5803 XVECEXP (vals, 0, i));
5804 emit_move_insn (target, mem);
5805 }
5806
5807 /* Set field ELT of TARGET to VAL. */
5808
5809 void
5810 rs6000_expand_vector_set (rtx target, rtx val, int elt)
5811 {
5812 machine_mode mode = GET_MODE (target);
5813 machine_mode inner_mode = GET_MODE_INNER (mode);
5814 rtx reg = gen_reg_rtx (mode);
5815 rtx mask, mem, x;
5816 int width = GET_MODE_SIZE (inner_mode);
5817 int i;
5818
5819 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
5820 {
5821 rtx (*set_func) (rtx, rtx, rtx, rtx)
5822 = ((mode == V2DFmode) ? gen_vsx_set_v2df : gen_vsx_set_v2di);
5823 emit_insn (set_func (target, target, val, GEN_INT (elt)));
5824 return;
5825 }
5826
5827 /* Simplify setting single element vectors like V1TImode. */
5828 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
5829 {
5830 emit_move_insn (target, gen_lowpart (mode, val));
5831 return;
5832 }
5833
5834 /* Load single variable value. */
5835 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
5836 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
5837 x = gen_rtx_UNSPEC (VOIDmode,
5838 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
5839 emit_insn (gen_rtx_PARALLEL (VOIDmode,
5840 gen_rtvec (2,
5841 gen_rtx_SET (VOIDmode,
5842 reg, mem),
5843 x)));
5844
5845 /* Linear sequence. */
5846 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
5847 for (i = 0; i < 16; ++i)
5848 XVECEXP (mask, 0, i) = GEN_INT (i);
5849
5850 /* Set permute mask to insert element into target. */
5851 for (i = 0; i < width; ++i)
5852 XVECEXP (mask, 0, elt*width + i)
5853 = GEN_INT (i + 0x10);
5854 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
5855
5856 if (BYTES_BIG_ENDIAN)
5857 x = gen_rtx_UNSPEC (mode,
5858 gen_rtvec (3, target, reg,
5859 force_reg (V16QImode, x)),
5860 UNSPEC_VPERM);
5861 else
5862 {
5863 /* Invert selector. We prefer to generate VNAND on P8 so
5864 that future fusion opportunities can kick in, but must
5865 generate VNOR elsewhere. */
5866 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
5867 rtx iorx = (TARGET_P8_VECTOR
5868 ? gen_rtx_IOR (V16QImode, notx, notx)
5869 : gen_rtx_AND (V16QImode, notx, notx));
5870 rtx tmp = gen_reg_rtx (V16QImode);
5871 emit_insn (gen_rtx_SET (VOIDmode, tmp, iorx));
5872
5873 /* Permute with operands reversed and adjusted selector. */
5874 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
5875 UNSPEC_VPERM);
5876 }
5877
5878 emit_insn (gen_rtx_SET (VOIDmode, target, x));
5879 }
5880
5881 /* Extract field ELT from VEC into TARGET. */
5882
5883 void
5884 rs6000_expand_vector_extract (rtx target, rtx vec, int elt)
5885 {
5886 machine_mode mode = GET_MODE (vec);
5887 machine_mode inner_mode = GET_MODE_INNER (mode);
5888 rtx mem;
5889
5890 if (VECTOR_MEM_VSX_P (mode))
5891 {
5892 switch (mode)
5893 {
5894 default:
5895 break;
5896 case V1TImode:
5897 gcc_assert (elt == 0 && inner_mode == TImode);
5898 emit_move_insn (target, gen_lowpart (TImode, vec));
5899 break;
5900 case V2DFmode:
5901 emit_insn (gen_vsx_extract_v2df (target, vec, GEN_INT (elt)));
5902 return;
5903 case V2DImode:
5904 emit_insn (gen_vsx_extract_v2di (target, vec, GEN_INT (elt)));
5905 return;
5906 case V4SFmode:
5907 emit_insn (gen_vsx_extract_v4sf (target, vec, GEN_INT (elt)));
5908 return;
5909 }
5910 }
5911
5912 /* Allocate mode-sized buffer. */
5913 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
5914
5915 emit_move_insn (mem, vec);
5916
5917 /* Add offset to field within buffer matching vector element. */
5918 mem = adjust_address_nv (mem, inner_mode, elt * GET_MODE_SIZE (inner_mode));
5919
5920 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
5921 }
5922
5923 /* Generates shifts and masks for a pair of rldicl or rldicr insns to
5924 implement ANDing by the mask IN. */
5925 void
5926 build_mask64_2_operands (rtx in, rtx *out)
5927 {
5928 unsigned HOST_WIDE_INT c, lsb, m1, m2;
5929 int shift;
5930
5931 gcc_assert (GET_CODE (in) == CONST_INT);
5932
5933 c = INTVAL (in);
5934 if (c & 1)
5935 {
5936 /* Assume c initially something like 0x00fff000000fffff. The idea
5937 is to rotate the word so that the middle ^^^^^^ group of zeros
5938 is at the MS end and can be cleared with an rldicl mask. We then
5939 rotate back and clear off the MS ^^ group of zeros with a
5940 second rldicl. */
5941 c = ~c; /* c == 0xff000ffffff00000 */
5942 lsb = c & -c; /* lsb == 0x0000000000100000 */
5943 m1 = -lsb; /* m1 == 0xfffffffffff00000 */
5944 c = ~c; /* c == 0x00fff000000fffff */
5945 c &= -lsb; /* c == 0x00fff00000000000 */
5946 lsb = c & -c; /* lsb == 0x0000100000000000 */
5947 c = ~c; /* c == 0xff000fffffffffff */
5948 c &= -lsb; /* c == 0xff00000000000000 */
5949 shift = 0;
5950 while ((lsb >>= 1) != 0)
5951 shift++; /* shift == 44 on exit from loop */
5952 m1 <<= 64 - shift; /* m1 == 0xffffff0000000000 */
5953 m1 = ~m1; /* m1 == 0x000000ffffffffff */
5954 m2 = ~c; /* m2 == 0x00ffffffffffffff */
5955 }
5956 else
5957 {
5958 /* Assume c initially something like 0xff000f0000000000. The idea
5959 is to rotate the word so that the ^^^ middle group of zeros
5960 is at the LS end and can be cleared with an rldicr mask. We then
5961 rotate back and clear off the LS group of ^^^^^^^^^^ zeros with
5962 a second rldicr. */
5963 lsb = c & -c; /* lsb == 0x0000010000000000 */
5964 m2 = -lsb; /* m2 == 0xffffff0000000000 */
5965 c = ~c; /* c == 0x00fff0ffffffffff */
5966 c &= -lsb; /* c == 0x00fff00000000000 */
5967 lsb = c & -c; /* lsb == 0x0000100000000000 */
5968 c = ~c; /* c == 0xff000fffffffffff */
5969 c &= -lsb; /* c == 0xff00000000000000 */
5970 shift = 0;
5971 while ((lsb >>= 1) != 0)
5972 shift++; /* shift == 44 on exit from loop */
5973 m1 = ~c; /* m1 == 0x00ffffffffffffff */
5974 m1 >>= shift; /* m1 == 0x0000000000000fff */
5975 m1 = ~m1; /* m1 == 0xfffffffffffff000 */
5976 }
5977
5978 /* Note that when we only have two 0->1 and 1->0 transitions, one of the
5979 masks will be all 1's. We are guaranteed more than one transition. */
5980 out[0] = GEN_INT (64 - shift);
5981 out[1] = GEN_INT (m1);
5982 out[2] = GEN_INT (shift);
5983 out[3] = GEN_INT (m2);
5984 }
5985
5986 /* Return TRUE if OP is an invalid SUBREG operation on the e500. */
5987
5988 bool
5989 invalid_e500_subreg (rtx op, machine_mode mode)
5990 {
5991 if (TARGET_E500_DOUBLE)
5992 {
5993 /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
5994 subreg:TI and reg:TF. Decimal float modes are like integer
5995 modes (only low part of each register used) for this
5996 purpose. */
5997 if (GET_CODE (op) == SUBREG
5998 && (mode == SImode || mode == DImode || mode == TImode
5999 || mode == DDmode || mode == TDmode || mode == PTImode)
6000 && REG_P (SUBREG_REG (op))
6001 && (GET_MODE (SUBREG_REG (op)) == DFmode
6002 || GET_MODE (SUBREG_REG (op)) == TFmode))
6003 return true;
6004
6005 /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
6006 reg:TI. */
6007 if (GET_CODE (op) == SUBREG
6008 && (mode == DFmode || mode == TFmode)
6009 && REG_P (SUBREG_REG (op))
6010 && (GET_MODE (SUBREG_REG (op)) == DImode
6011 || GET_MODE (SUBREG_REG (op)) == TImode
6012 || GET_MODE (SUBREG_REG (op)) == PTImode
6013 || GET_MODE (SUBREG_REG (op)) == DDmode
6014 || GET_MODE (SUBREG_REG (op)) == TDmode))
6015 return true;
6016 }
6017
6018 if (TARGET_SPE
6019 && GET_CODE (op) == SUBREG
6020 && mode == SImode
6021 && REG_P (SUBREG_REG (op))
6022 && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op))))
6023 return true;
6024
6025 return false;
6026 }
6027
6028 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
6029 selects whether the alignment is abi mandated, optional, or
6030 both abi and optional alignment. */
6031
6032 unsigned int
6033 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
6034 {
6035 if (how != align_opt)
6036 {
6037 if (TREE_CODE (type) == VECTOR_TYPE)
6038 {
6039 if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
6040 || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
6041 {
6042 if (align < 64)
6043 align = 64;
6044 }
6045 else if (align < 128)
6046 align = 128;
6047 }
6048 else if (TARGET_E500_DOUBLE
6049 && TREE_CODE (type) == REAL_TYPE
6050 && TYPE_MODE (type) == DFmode)
6051 {
6052 if (align < 64)
6053 align = 64;
6054 }
6055 }
6056
6057 if (how != align_abi)
6058 {
6059 if (TREE_CODE (type) == ARRAY_TYPE
6060 && TYPE_MODE (TREE_TYPE (type)) == QImode)
6061 {
6062 if (align < BITS_PER_WORD)
6063 align = BITS_PER_WORD;
6064 }
6065 }
6066
6067 return align;
6068 }
6069
6070 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
6071
6072 bool
6073 rs6000_special_adjust_field_align_p (tree field, unsigned int computed)
6074 {
6075 if (TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6076 {
6077 if (computed != 128)
6078 {
6079 static bool warned;
6080 if (!warned && warn_psabi)
6081 {
6082 warned = true;
6083 inform (input_location,
6084 "the layout of aggregates containing vectors with"
6085 " %d-byte alignment has changed in GCC 5",
6086 computed / BITS_PER_UNIT);
6087 }
6088 }
6089 /* In current GCC there is no special case. */
6090 return false;
6091 }
6092
6093 return false;
6094 }
6095
6096 /* AIX increases natural record alignment to doubleword if the first
6097 field is an FP double while the FP fields remain word aligned. */
6098
6099 unsigned int
6100 rs6000_special_round_type_align (tree type, unsigned int computed,
6101 unsigned int specified)
6102 {
6103 unsigned int align = MAX (computed, specified);
6104 tree field = TYPE_FIELDS (type);
6105
6106 /* Skip all non field decls */
6107 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
6108 field = DECL_CHAIN (field);
6109
6110 if (field != NULL && field != type)
6111 {
6112 type = TREE_TYPE (field);
6113 while (TREE_CODE (type) == ARRAY_TYPE)
6114 type = TREE_TYPE (type);
6115
6116 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
6117 align = MAX (align, 64);
6118 }
6119
6120 return align;
6121 }
6122
6123 /* Darwin increases record alignment to the natural alignment of
6124 the first field. */
6125
6126 unsigned int
6127 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
6128 unsigned int specified)
6129 {
6130 unsigned int align = MAX (computed, specified);
6131
6132 if (TYPE_PACKED (type))
6133 return align;
6134
6135 /* Find the first field, looking down into aggregates. */
6136 do {
6137 tree field = TYPE_FIELDS (type);
6138 /* Skip all non field decls */
6139 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
6140 field = DECL_CHAIN (field);
6141 if (! field)
6142 break;
6143 /* A packed field does not contribute any extra alignment. */
6144 if (DECL_PACKED (field))
6145 return align;
6146 type = TREE_TYPE (field);
6147 while (TREE_CODE (type) == ARRAY_TYPE)
6148 type = TREE_TYPE (type);
6149 } while (AGGREGATE_TYPE_P (type));
6150
6151 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
6152 align = MAX (align, TYPE_ALIGN (type));
6153
6154 return align;
6155 }
6156
6157 /* Return 1 for an operand in small memory on V.4/eabi. */
6158
6159 int
6160 small_data_operand (rtx op ATTRIBUTE_UNUSED,
6161 machine_mode mode ATTRIBUTE_UNUSED)
6162 {
6163 #if TARGET_ELF
6164 rtx sym_ref;
6165
6166 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
6167 return 0;
6168
6169 if (DEFAULT_ABI != ABI_V4)
6170 return 0;
6171
6172 /* Vector and float memory instructions have a limited offset on the
6173 SPE, so using a vector or float variable directly as an operand is
6174 not useful. */
6175 if (TARGET_SPE
6176 && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode)))
6177 return 0;
6178
6179 if (GET_CODE (op) == SYMBOL_REF)
6180 sym_ref = op;
6181
6182 else if (GET_CODE (op) != CONST
6183 || GET_CODE (XEXP (op, 0)) != PLUS
6184 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
6185 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
6186 return 0;
6187
6188 else
6189 {
6190 rtx sum = XEXP (op, 0);
6191 HOST_WIDE_INT summand;
6192
6193 /* We have to be careful here, because it is the referenced address
6194 that must be 32k from _SDA_BASE_, not just the symbol. */
6195 summand = INTVAL (XEXP (sum, 1));
6196 if (summand < 0 || summand > g_switch_value)
6197 return 0;
6198
6199 sym_ref = XEXP (sum, 0);
6200 }
6201
6202 return SYMBOL_REF_SMALL_P (sym_ref);
6203 #else
6204 return 0;
6205 #endif
6206 }
6207
6208 /* Return true if either operand is a general purpose register. */
6209
6210 bool
6211 gpr_or_gpr_p (rtx op0, rtx op1)
6212 {
6213 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
6214 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
6215 }
6216
6217 /* Return true if this is a move direct operation between GPR registers and
6218 floating point/VSX registers. */
6219
6220 bool
6221 direct_move_p (rtx op0, rtx op1)
6222 {
6223 int regno0, regno1;
6224
6225 if (!REG_P (op0) || !REG_P (op1))
6226 return false;
6227
6228 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
6229 return false;
6230
6231 regno0 = REGNO (op0);
6232 regno1 = REGNO (op1);
6233 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
6234 return false;
6235
6236 if (INT_REGNO_P (regno0))
6237 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
6238
6239 else if (INT_REGNO_P (regno1))
6240 {
6241 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
6242 return true;
6243
6244 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
6245 return true;
6246 }
6247
6248 return false;
6249 }
6250
6251 /* Return true if this is a load or store quad operation. This function does
6252 not handle the atomic quad memory instructions. */
6253
6254 bool
6255 quad_load_store_p (rtx op0, rtx op1)
6256 {
6257 bool ret;
6258
6259 if (!TARGET_QUAD_MEMORY)
6260 ret = false;
6261
6262 else if (REG_P (op0) && MEM_P (op1))
6263 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
6264 && quad_memory_operand (op1, GET_MODE (op1))
6265 && !reg_overlap_mentioned_p (op0, op1));
6266
6267 else if (MEM_P (op0) && REG_P (op1))
6268 ret = (quad_memory_operand (op0, GET_MODE (op0))
6269 && quad_int_reg_operand (op1, GET_MODE (op1)));
6270
6271 else
6272 ret = false;
6273
6274 if (TARGET_DEBUG_ADDR)
6275 {
6276 fprintf (stderr, "\n========== quad_load_store, return %s\n",
6277 ret ? "true" : "false");
6278 debug_rtx (gen_rtx_SET (VOIDmode, op0, op1));
6279 }
6280
6281 return ret;
6282 }
6283
6284 /* Given an address, return a constant offset term if one exists. */
6285
6286 static rtx
6287 address_offset (rtx op)
6288 {
6289 if (GET_CODE (op) == PRE_INC
6290 || GET_CODE (op) == PRE_DEC)
6291 op = XEXP (op, 0);
6292 else if (GET_CODE (op) == PRE_MODIFY
6293 || GET_CODE (op) == LO_SUM)
6294 op = XEXP (op, 1);
6295
6296 if (GET_CODE (op) == CONST)
6297 op = XEXP (op, 0);
6298
6299 if (GET_CODE (op) == PLUS)
6300 op = XEXP (op, 1);
6301
6302 if (CONST_INT_P (op))
6303 return op;
6304
6305 return NULL_RTX;
6306 }
6307
6308 /* Return true if the MEM operand is a memory operand suitable for use
6309 with a (full width, possibly multiple) gpr load/store. On
6310 powerpc64 this means the offset must be divisible by 4.
6311 Implements 'Y' constraint.
6312
6313 Accept direct, indexed, offset, lo_sum and tocref. Since this is
6314 a constraint function we know the operand has satisfied a suitable
6315 memory predicate. Also accept some odd rtl generated by reload
6316 (see rs6000_legitimize_reload_address for various forms). It is
6317 important that reload rtl be accepted by appropriate constraints
6318 but not by the operand predicate.
6319
6320 Offsetting a lo_sum should not be allowed, except where we know by
6321 alignment that a 32k boundary is not crossed, but see the ???
6322 comment in rs6000_legitimize_reload_address. Note that by
6323 "offsetting" here we mean a further offset to access parts of the
6324 MEM. It's fine to have a lo_sum where the inner address is offset
6325 from a sym, since the same sym+offset will appear in the high part
6326 of the address calculation. */
6327
6328 bool
6329 mem_operand_gpr (rtx op, machine_mode mode)
6330 {
6331 unsigned HOST_WIDE_INT offset;
6332 int extra;
6333 rtx addr = XEXP (op, 0);
6334
6335 op = address_offset (addr);
6336 if (op == NULL_RTX)
6337 return true;
6338
6339 offset = INTVAL (op);
6340 if (TARGET_POWERPC64 && (offset & 3) != 0)
6341 return false;
6342
6343 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
6344 if (extra < 0)
6345 extra = 0;
6346
6347 if (GET_CODE (addr) == LO_SUM)
6348 /* For lo_sum addresses, we must allow any offset except one that
6349 causes a wrap, so test only the low 16 bits. */
6350 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
6351
6352 return offset + 0x8000 < 0x10000u - extra;
6353 }
6354 \f
6355 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
6356
6357 static bool
6358 reg_offset_addressing_ok_p (machine_mode mode)
6359 {
6360 switch (mode)
6361 {
6362 case V16QImode:
6363 case V8HImode:
6364 case V4SFmode:
6365 case V4SImode:
6366 case V2DFmode:
6367 case V2DImode:
6368 case V1TImode:
6369 case TImode:
6370 /* AltiVec/VSX vector modes. Only reg+reg addressing is valid. While
6371 TImode is not a vector mode, if we want to use the VSX registers to
6372 move it around, we need to restrict ourselves to reg+reg
6373 addressing. */
6374 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
6375 return false;
6376 break;
6377
6378 case V4HImode:
6379 case V2SImode:
6380 case V1DImode:
6381 case V2SFmode:
6382 /* Paired vector modes. Only reg+reg addressing is valid. */
6383 if (TARGET_PAIRED_FLOAT)
6384 return false;
6385 break;
6386
6387 case SDmode:
6388 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
6389 addressing for the LFIWZX and STFIWX instructions. */
6390 if (TARGET_NO_SDMODE_STACK)
6391 return false;
6392 break;
6393
6394 default:
6395 break;
6396 }
6397
6398 return true;
6399 }
6400
6401 static bool
6402 virtual_stack_registers_memory_p (rtx op)
6403 {
6404 int regnum;
6405
6406 if (GET_CODE (op) == REG)
6407 regnum = REGNO (op);
6408
6409 else if (GET_CODE (op) == PLUS
6410 && GET_CODE (XEXP (op, 0)) == REG
6411 && GET_CODE (XEXP (op, 1)) == CONST_INT)
6412 regnum = REGNO (XEXP (op, 0));
6413
6414 else
6415 return false;
6416
6417 return (regnum >= FIRST_VIRTUAL_REGISTER
6418 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
6419 }
6420
6421 /* Return true if a MODE sized memory accesses to OP plus OFFSET
6422 is known to not straddle a 32k boundary. */
6423
6424 static bool
6425 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
6426 machine_mode mode)
6427 {
6428 tree decl, type;
6429 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
6430
6431 if (GET_CODE (op) != SYMBOL_REF)
6432 return false;
6433
6434 dsize = GET_MODE_SIZE (mode);
6435 decl = SYMBOL_REF_DECL (op);
6436 if (!decl)
6437 {
6438 if (dsize == 0)
6439 return false;
6440
6441 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
6442 replacing memory addresses with an anchor plus offset. We
6443 could find the decl by rummaging around in the block->objects
6444 VEC for the given offset but that seems like too much work. */
6445 dalign = BITS_PER_UNIT;
6446 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
6447 && SYMBOL_REF_ANCHOR_P (op)
6448 && SYMBOL_REF_BLOCK (op) != NULL)
6449 {
6450 struct object_block *block = SYMBOL_REF_BLOCK (op);
6451
6452 dalign = block->alignment;
6453 offset += SYMBOL_REF_BLOCK_OFFSET (op);
6454 }
6455 else if (CONSTANT_POOL_ADDRESS_P (op))
6456 {
6457 /* It would be nice to have get_pool_align().. */
6458 machine_mode cmode = get_pool_mode (op);
6459
6460 dalign = GET_MODE_ALIGNMENT (cmode);
6461 }
6462 }
6463 else if (DECL_P (decl))
6464 {
6465 dalign = DECL_ALIGN (decl);
6466
6467 if (dsize == 0)
6468 {
6469 /* Allow BLKmode when the entire object is known to not
6470 cross a 32k boundary. */
6471 if (!DECL_SIZE_UNIT (decl))
6472 return false;
6473
6474 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
6475 return false;
6476
6477 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
6478 if (dsize > 32768)
6479 return false;
6480
6481 return dalign / BITS_PER_UNIT >= dsize;
6482 }
6483 }
6484 else
6485 {
6486 type = TREE_TYPE (decl);
6487
6488 dalign = TYPE_ALIGN (type);
6489 if (CONSTANT_CLASS_P (decl))
6490 dalign = CONSTANT_ALIGNMENT (decl, dalign);
6491 else
6492 dalign = DATA_ALIGNMENT (decl, dalign);
6493
6494 if (dsize == 0)
6495 {
6496 /* BLKmode, check the entire object. */
6497 if (TREE_CODE (decl) == STRING_CST)
6498 dsize = TREE_STRING_LENGTH (decl);
6499 else if (TYPE_SIZE_UNIT (type)
6500 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (type)))
6501 dsize = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6502 else
6503 return false;
6504 if (dsize > 32768)
6505 return false;
6506
6507 return dalign / BITS_PER_UNIT >= dsize;
6508 }
6509 }
6510
6511 /* Find how many bits of the alignment we know for this access. */
6512 mask = dalign / BITS_PER_UNIT - 1;
6513 lsb = offset & -offset;
6514 mask &= lsb - 1;
6515 dalign = mask + 1;
6516
6517 return dalign >= dsize;
6518 }
6519
6520 static bool
6521 constant_pool_expr_p (rtx op)
6522 {
6523 rtx base, offset;
6524
6525 split_const (op, &base, &offset);
6526 return (GET_CODE (base) == SYMBOL_REF
6527 && CONSTANT_POOL_ADDRESS_P (base)
6528 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
6529 }
6530
6531 static const_rtx tocrel_base, tocrel_offset;
6532
6533 /* Return true if OP is a toc pointer relative address (the output
6534 of create_TOC_reference). If STRICT, do not match high part or
6535 non-split -mcmodel=large/medium toc pointer relative addresses. */
6536
6537 bool
6538 toc_relative_expr_p (const_rtx op, bool strict)
6539 {
6540 if (!TARGET_TOC)
6541 return false;
6542
6543 if (TARGET_CMODEL != CMODEL_SMALL)
6544 {
6545 /* Only match the low part. */
6546 if (GET_CODE (op) == LO_SUM
6547 && REG_P (XEXP (op, 0))
6548 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict))
6549 op = XEXP (op, 1);
6550 else if (strict)
6551 return false;
6552 }
6553
6554 tocrel_base = op;
6555 tocrel_offset = const0_rtx;
6556 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
6557 {
6558 tocrel_base = XEXP (op, 0);
6559 tocrel_offset = XEXP (op, 1);
6560 }
6561
6562 return (GET_CODE (tocrel_base) == UNSPEC
6563 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
6564 }
6565
6566 /* Return true if X is a constant pool address, and also for cmodel=medium
6567 if X is a toc-relative address known to be offsettable within MODE. */
6568
6569 bool
6570 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
6571 bool strict)
6572 {
6573 return (toc_relative_expr_p (x, strict)
6574 && (TARGET_CMODEL != CMODEL_MEDIUM
6575 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
6576 || mode == QImode
6577 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
6578 INTVAL (tocrel_offset), mode)));
6579 }
6580
6581 static bool
6582 legitimate_small_data_p (machine_mode mode, rtx x)
6583 {
6584 return (DEFAULT_ABI == ABI_V4
6585 && !flag_pic && !TARGET_TOC
6586 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
6587 && small_data_operand (x, mode));
6588 }
6589
6590 /* SPE offset addressing is limited to 5-bits worth of double words. */
6591 #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
6592
6593 bool
6594 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
6595 bool strict, bool worst_case)
6596 {
6597 unsigned HOST_WIDE_INT offset;
6598 unsigned int extra;
6599
6600 if (GET_CODE (x) != PLUS)
6601 return false;
6602 if (!REG_P (XEXP (x, 0)))
6603 return false;
6604 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
6605 return false;
6606 if (!reg_offset_addressing_ok_p (mode))
6607 return virtual_stack_registers_memory_p (x);
6608 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
6609 return true;
6610 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6611 return false;
6612
6613 offset = INTVAL (XEXP (x, 1));
6614 extra = 0;
6615 switch (mode)
6616 {
6617 case V4HImode:
6618 case V2SImode:
6619 case V1DImode:
6620 case V2SFmode:
6621 /* SPE vector modes. */
6622 return SPE_CONST_OFFSET_OK (offset);
6623
6624 case DFmode:
6625 case DDmode:
6626 case DImode:
6627 /* On e500v2, we may have:
6628
6629 (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
6630
6631 Which gets addressed with evldd instructions. */
6632 if (TARGET_E500_DOUBLE)
6633 return SPE_CONST_OFFSET_OK (offset);
6634
6635 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
6636 addressing. */
6637 if (VECTOR_MEM_VSX_P (mode))
6638 return false;
6639
6640 if (!worst_case)
6641 break;
6642 if (!TARGET_POWERPC64)
6643 extra = 4;
6644 else if (offset & 3)
6645 return false;
6646 break;
6647
6648 case TFmode:
6649 if (TARGET_E500_DOUBLE)
6650 return (SPE_CONST_OFFSET_OK (offset)
6651 && SPE_CONST_OFFSET_OK (offset + 8));
6652 /* fall through */
6653
6654 case TDmode:
6655 case TImode:
6656 case PTImode:
6657 extra = 8;
6658 if (!worst_case)
6659 break;
6660 if (!TARGET_POWERPC64)
6661 extra = 12;
6662 else if (offset & 3)
6663 return false;
6664 break;
6665
6666 default:
6667 break;
6668 }
6669
6670 offset += 0x8000;
6671 return offset < 0x10000 - extra;
6672 }
6673
6674 bool
6675 legitimate_indexed_address_p (rtx x, int strict)
6676 {
6677 rtx op0, op1;
6678
6679 if (GET_CODE (x) != PLUS)
6680 return false;
6681
6682 op0 = XEXP (x, 0);
6683 op1 = XEXP (x, 1);
6684
6685 /* Recognize the rtl generated by reload which we know will later be
6686 replaced with proper base and index regs. */
6687 if (!strict
6688 && reload_in_progress
6689 && (REG_P (op0) || GET_CODE (op0) == PLUS)
6690 && REG_P (op1))
6691 return true;
6692
6693 return (REG_P (op0) && REG_P (op1)
6694 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
6695 && INT_REG_OK_FOR_INDEX_P (op1, strict))
6696 || (INT_REG_OK_FOR_BASE_P (op1, strict)
6697 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
6698 }
6699
6700 bool
6701 avoiding_indexed_address_p (machine_mode mode)
6702 {
6703 /* Avoid indexed addressing for modes that have non-indexed
6704 load/store instruction forms. */
6705 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
6706 }
6707
6708 bool
6709 legitimate_indirect_address_p (rtx x, int strict)
6710 {
6711 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
6712 }
6713
6714 bool
6715 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
6716 {
6717 if (!TARGET_MACHO || !flag_pic
6718 || mode != SImode || GET_CODE (x) != MEM)
6719 return false;
6720 x = XEXP (x, 0);
6721
6722 if (GET_CODE (x) != LO_SUM)
6723 return false;
6724 if (GET_CODE (XEXP (x, 0)) != REG)
6725 return false;
6726 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
6727 return false;
6728 x = XEXP (x, 1);
6729
6730 return CONSTANT_P (x);
6731 }
6732
6733 static bool
6734 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
6735 {
6736 if (GET_CODE (x) != LO_SUM)
6737 return false;
6738 if (GET_CODE (XEXP (x, 0)) != REG)
6739 return false;
6740 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
6741 return false;
6742 /* Restrict addressing for DI because of our SUBREG hackery. */
6743 if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
6744 return false;
6745 x = XEXP (x, 1);
6746
6747 if (TARGET_ELF || TARGET_MACHO)
6748 {
6749 bool large_toc_ok;
6750
6751 if (DEFAULT_ABI == ABI_V4 && flag_pic)
6752 return false;
6753 /* LRA don't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
6754 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
6755 recognizes some LO_SUM addresses as valid although this
6756 function says opposite. In most cases, LRA through different
6757 transformations can generate correct code for address reloads.
6758 It can not manage only some LO_SUM cases. So we need to add
6759 code analogous to one in rs6000_legitimize_reload_address for
6760 LOW_SUM here saying that some addresses are still valid. */
6761 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
6762 && small_toc_ref (x, VOIDmode));
6763 if (TARGET_TOC && ! large_toc_ok)
6764 return false;
6765 if (GET_MODE_NUNITS (mode) != 1)
6766 return false;
6767 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
6768 && !(/* ??? Assume floating point reg based on mode? */
6769 TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
6770 && (mode == DFmode || mode == DDmode)))
6771 return false;
6772
6773 return CONSTANT_P (x) || large_toc_ok;
6774 }
6775
6776 return false;
6777 }
6778
6779
6780 /* Try machine-dependent ways of modifying an illegitimate address
6781 to be legitimate. If we find one, return the new, valid address.
6782 This is used from only one place: `memory_address' in explow.c.
6783
6784 OLDX is the address as it was before break_out_memory_refs was
6785 called. In some cases it is useful to look at this to decide what
6786 needs to be done.
6787
6788 It is always safe for this function to do nothing. It exists to
6789 recognize opportunities to optimize the output.
6790
6791 On RS/6000, first check for the sum of a register with a constant
6792 integer that is out of range. If so, generate code to add the
6793 constant with the low-order 16 bits masked to the register and force
6794 this result into another register (this can be done with `cau').
6795 Then generate an address of REG+(CONST&0xffff), allowing for the
6796 possibility of bit 16 being a one.
6797
6798 Then check for the sum of a register and something not constant, try to
6799 load the other things into a register and return the sum. */
6800
6801 static rtx
6802 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
6803 machine_mode mode)
6804 {
6805 unsigned int extra;
6806
6807 if (!reg_offset_addressing_ok_p (mode))
6808 {
6809 if (virtual_stack_registers_memory_p (x))
6810 return x;
6811
6812 /* In theory we should not be seeing addresses of the form reg+0,
6813 but just in case it is generated, optimize it away. */
6814 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
6815 return force_reg (Pmode, XEXP (x, 0));
6816
6817 /* For TImode with load/store quad, restrict addresses to just a single
6818 pointer, so it works with both GPRs and VSX registers. */
6819 /* Make sure both operands are registers. */
6820 else if (GET_CODE (x) == PLUS
6821 && (mode != TImode || !TARGET_QUAD_MEMORY))
6822 return gen_rtx_PLUS (Pmode,
6823 force_reg (Pmode, XEXP (x, 0)),
6824 force_reg (Pmode, XEXP (x, 1)));
6825 else
6826 return force_reg (Pmode, x);
6827 }
6828 if (GET_CODE (x) == SYMBOL_REF)
6829 {
6830 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
6831 if (model != 0)
6832 return rs6000_legitimize_tls_address (x, model);
6833 }
6834
6835 extra = 0;
6836 switch (mode)
6837 {
6838 case TFmode:
6839 case TDmode:
6840 case TImode:
6841 case PTImode:
6842 /* As in legitimate_offset_address_p we do not assume
6843 worst-case. The mode here is just a hint as to the registers
6844 used. A TImode is usually in gprs, but may actually be in
6845 fprs. Leave worst-case scenario for reload to handle via
6846 insn constraints. PTImode is only GPRs. */
6847 extra = 8;
6848 break;
6849 default:
6850 break;
6851 }
6852
6853 if (GET_CODE (x) == PLUS
6854 && GET_CODE (XEXP (x, 0)) == REG
6855 && GET_CODE (XEXP (x, 1)) == CONST_INT
6856 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
6857 >= 0x10000 - extra)
6858 && !(SPE_VECTOR_MODE (mode)
6859 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)))
6860 {
6861 HOST_WIDE_INT high_int, low_int;
6862 rtx sum;
6863 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
6864 if (low_int >= 0x8000 - extra)
6865 low_int = 0;
6866 high_int = INTVAL (XEXP (x, 1)) - low_int;
6867 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
6868 GEN_INT (high_int)), 0);
6869 return plus_constant (Pmode, sum, low_int);
6870 }
6871 else if (GET_CODE (x) == PLUS
6872 && GET_CODE (XEXP (x, 0)) == REG
6873 && GET_CODE (XEXP (x, 1)) != CONST_INT
6874 && GET_MODE_NUNITS (mode) == 1
6875 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
6876 || (/* ??? Assume floating point reg based on mode? */
6877 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
6878 && (mode == DFmode || mode == DDmode)))
6879 && !avoiding_indexed_address_p (mode))
6880 {
6881 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
6882 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
6883 }
6884 else if (SPE_VECTOR_MODE (mode)
6885 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))
6886 {
6887 if (mode == DImode)
6888 return x;
6889 /* We accept [reg + reg] and [reg + OFFSET]. */
6890
6891 if (GET_CODE (x) == PLUS)
6892 {
6893 rtx op1 = XEXP (x, 0);
6894 rtx op2 = XEXP (x, 1);
6895 rtx y;
6896
6897 op1 = force_reg (Pmode, op1);
6898
6899 if (GET_CODE (op2) != REG
6900 && (GET_CODE (op2) != CONST_INT
6901 || !SPE_CONST_OFFSET_OK (INTVAL (op2))
6902 || (GET_MODE_SIZE (mode) > 8
6903 && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8))))
6904 op2 = force_reg (Pmode, op2);
6905
6906 /* We can't always do [reg + reg] for these, because [reg +
6907 reg + offset] is not a legitimate addressing mode. */
6908 y = gen_rtx_PLUS (Pmode, op1, op2);
6909
6910 if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
6911 return force_reg (Pmode, y);
6912 else
6913 return y;
6914 }
6915
6916 return force_reg (Pmode, x);
6917 }
6918 else if ((TARGET_ELF
6919 #if TARGET_MACHO
6920 || !MACHO_DYNAMIC_NO_PIC_P
6921 #endif
6922 )
6923 && TARGET_32BIT
6924 && TARGET_NO_TOC
6925 && ! flag_pic
6926 && GET_CODE (x) != CONST_INT
6927 && GET_CODE (x) != CONST_WIDE_INT
6928 && GET_CODE (x) != CONST_DOUBLE
6929 && CONSTANT_P (x)
6930 && GET_MODE_NUNITS (mode) == 1
6931 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
6932 || (/* ??? Assume floating point reg based on mode? */
6933 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
6934 && (mode == DFmode || mode == DDmode))))
6935 {
6936 rtx reg = gen_reg_rtx (Pmode);
6937 if (TARGET_ELF)
6938 emit_insn (gen_elf_high (reg, x));
6939 else
6940 emit_insn (gen_macho_high (reg, x));
6941 return gen_rtx_LO_SUM (Pmode, reg, x);
6942 }
6943 else if (TARGET_TOC
6944 && GET_CODE (x) == SYMBOL_REF
6945 && constant_pool_expr_p (x)
6946 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
6947 return create_TOC_reference (x, NULL_RTX);
6948 else
6949 return x;
6950 }
6951
6952 /* Debug version of rs6000_legitimize_address. */
6953 static rtx
6954 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
6955 {
6956 rtx ret;
6957 rtx_insn *insns;
6958
6959 start_sequence ();
6960 ret = rs6000_legitimize_address (x, oldx, mode);
6961 insns = get_insns ();
6962 end_sequence ();
6963
6964 if (ret != x)
6965 {
6966 fprintf (stderr,
6967 "\nrs6000_legitimize_address: mode %s, old code %s, "
6968 "new code %s, modified\n",
6969 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
6970 GET_RTX_NAME (GET_CODE (ret)));
6971
6972 fprintf (stderr, "Original address:\n");
6973 debug_rtx (x);
6974
6975 fprintf (stderr, "oldx:\n");
6976 debug_rtx (oldx);
6977
6978 fprintf (stderr, "New address:\n");
6979 debug_rtx (ret);
6980
6981 if (insns)
6982 {
6983 fprintf (stderr, "Insns added:\n");
6984 debug_rtx_list (insns, 20);
6985 }
6986 }
6987 else
6988 {
6989 fprintf (stderr,
6990 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
6991 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
6992
6993 debug_rtx (x);
6994 }
6995
6996 if (insns)
6997 emit_insn (insns);
6998
6999 return ret;
7000 }
7001
7002 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7003 We need to emit DTP-relative relocations. */
7004
7005 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
7006 static void
7007 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
7008 {
7009 switch (size)
7010 {
7011 case 4:
7012 fputs ("\t.long\t", file);
7013 break;
7014 case 8:
7015 fputs (DOUBLE_INT_ASM_OP, file);
7016 break;
7017 default:
7018 gcc_unreachable ();
7019 }
7020 output_addr_const (file, x);
7021 fputs ("@dtprel+0x8000", file);
7022 }
7023
7024 /* Return true if X is a symbol that refers to real (rather than emulated)
7025 TLS. */
7026
7027 static bool
7028 rs6000_real_tls_symbol_ref_p (rtx x)
7029 {
7030 return (GET_CODE (x) == SYMBOL_REF
7031 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
7032 }
7033
7034 /* In the name of slightly smaller debug output, and to cater to
7035 general assembler lossage, recognize various UNSPEC sequences
7036 and turn them back into a direct symbol reference. */
7037
7038 static rtx
7039 rs6000_delegitimize_address (rtx orig_x)
7040 {
7041 rtx x, y, offset;
7042
7043 orig_x = delegitimize_mem_from_attrs (orig_x);
7044 x = orig_x;
7045 if (MEM_P (x))
7046 x = XEXP (x, 0);
7047
7048 y = x;
7049 if (TARGET_CMODEL != CMODEL_SMALL
7050 && GET_CODE (y) == LO_SUM)
7051 y = XEXP (y, 1);
7052
7053 offset = NULL_RTX;
7054 if (GET_CODE (y) == PLUS
7055 && GET_MODE (y) == Pmode
7056 && CONST_INT_P (XEXP (y, 1)))
7057 {
7058 offset = XEXP (y, 1);
7059 y = XEXP (y, 0);
7060 }
7061
7062 if (GET_CODE (y) == UNSPEC
7063 && XINT (y, 1) == UNSPEC_TOCREL)
7064 {
7065 y = XVECEXP (y, 0, 0);
7066
7067 #ifdef HAVE_AS_TLS
7068 /* Do not associate thread-local symbols with the original
7069 constant pool symbol. */
7070 if (TARGET_XCOFF
7071 && GET_CODE (y) == SYMBOL_REF
7072 && CONSTANT_POOL_ADDRESS_P (y)
7073 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
7074 return orig_x;
7075 #endif
7076
7077 if (offset != NULL_RTX)
7078 y = gen_rtx_PLUS (Pmode, y, offset);
7079 if (!MEM_P (orig_x))
7080 return y;
7081 else
7082 return replace_equiv_address_nv (orig_x, y);
7083 }
7084
7085 if (TARGET_MACHO
7086 && GET_CODE (orig_x) == LO_SUM
7087 && GET_CODE (XEXP (orig_x, 1)) == CONST)
7088 {
7089 y = XEXP (XEXP (orig_x, 1), 0);
7090 if (GET_CODE (y) == UNSPEC
7091 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
7092 return XVECEXP (y, 0, 0);
7093 }
7094
7095 return orig_x;
7096 }
7097
7098 /* Return true if X shouldn't be emitted into the debug info.
7099 The linker doesn't like .toc section references from
7100 .debug_* sections, so reject .toc section symbols. */
7101
7102 static bool
7103 rs6000_const_not_ok_for_debug_p (rtx x)
7104 {
7105 if (GET_CODE (x) == SYMBOL_REF
7106 && CONSTANT_POOL_ADDRESS_P (x))
7107 {
7108 rtx c = get_pool_constant (x);
7109 machine_mode cmode = get_pool_mode (x);
7110 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
7111 return true;
7112 }
7113
7114 return false;
7115 }
7116
7117 /* Construct the SYMBOL_REF for the tls_get_addr function. */
7118
7119 static GTY(()) rtx rs6000_tls_symbol;
7120 static rtx
7121 rs6000_tls_get_addr (void)
7122 {
7123 if (!rs6000_tls_symbol)
7124 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
7125
7126 return rs6000_tls_symbol;
7127 }
7128
7129 /* Construct the SYMBOL_REF for TLS GOT references. */
7130
7131 static GTY(()) rtx rs6000_got_symbol;
7132 static rtx
7133 rs6000_got_sym (void)
7134 {
7135 if (!rs6000_got_symbol)
7136 {
7137 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
7138 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
7139 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
7140 }
7141
7142 return rs6000_got_symbol;
7143 }
7144
7145 /* AIX Thread-Local Address support. */
7146
7147 static rtx
7148 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
7149 {
7150 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
7151 const char *name;
7152 char *tlsname;
7153
7154 name = XSTR (addr, 0);
7155 /* Append TLS CSECT qualifier, unless the symbol already is qualified
7156 or the symbol will be in TLS private data section. */
7157 if (name[strlen (name) - 1] != ']'
7158 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
7159 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
7160 {
7161 tlsname = XALLOCAVEC (char, strlen (name) + 4);
7162 strcpy (tlsname, name);
7163 strcat (tlsname,
7164 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
7165 tlsaddr = copy_rtx (addr);
7166 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
7167 }
7168 else
7169 tlsaddr = addr;
7170
7171 /* Place addr into TOC constant pool. */
7172 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
7173
7174 /* Output the TOC entry and create the MEM referencing the value. */
7175 if (constant_pool_expr_p (XEXP (sym, 0))
7176 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
7177 {
7178 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
7179 mem = gen_const_mem (Pmode, tocref);
7180 set_mem_alias_set (mem, get_TOC_alias_set ());
7181 }
7182 else
7183 return sym;
7184
7185 /* Use global-dynamic for local-dynamic. */
7186 if (model == TLS_MODEL_GLOBAL_DYNAMIC
7187 || model == TLS_MODEL_LOCAL_DYNAMIC)
7188 {
7189 /* Create new TOC reference for @m symbol. */
7190 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
7191 tlsname = XALLOCAVEC (char, strlen (name) + 1);
7192 strcpy (tlsname, "*LCM");
7193 strcat (tlsname, name + 3);
7194 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
7195 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
7196 tocref = create_TOC_reference (modaddr, NULL_RTX);
7197 rtx modmem = gen_const_mem (Pmode, tocref);
7198 set_mem_alias_set (modmem, get_TOC_alias_set ());
7199
7200 rtx modreg = gen_reg_rtx (Pmode);
7201 emit_insn (gen_rtx_SET (VOIDmode, modreg, modmem));
7202
7203 tmpreg = gen_reg_rtx (Pmode);
7204 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, mem));
7205
7206 dest = gen_reg_rtx (Pmode);
7207 if (TARGET_32BIT)
7208 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
7209 else
7210 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
7211 return dest;
7212 }
7213 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
7214 else if (TARGET_32BIT)
7215 {
7216 tlsreg = gen_reg_rtx (SImode);
7217 emit_insn (gen_tls_get_tpointer (tlsreg));
7218 }
7219 else
7220 tlsreg = gen_rtx_REG (DImode, 13);
7221
7222 /* Load the TOC value into temporary register. */
7223 tmpreg = gen_reg_rtx (Pmode);
7224 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, mem));
7225 set_unique_reg_note (get_last_insn (), REG_EQUAL,
7226 gen_rtx_MINUS (Pmode, addr, tlsreg));
7227
7228 /* Add TOC symbol value to TLS pointer. */
7229 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
7230
7231 return dest;
7232 }
7233
7234 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
7235 this (thread-local) address. */
7236
7237 static rtx
7238 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
7239 {
7240 rtx dest, insn;
7241
7242 if (TARGET_XCOFF)
7243 return rs6000_legitimize_tls_address_aix (addr, model);
7244
7245 dest = gen_reg_rtx (Pmode);
7246 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
7247 {
7248 rtx tlsreg;
7249
7250 if (TARGET_64BIT)
7251 {
7252 tlsreg = gen_rtx_REG (Pmode, 13);
7253 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
7254 }
7255 else
7256 {
7257 tlsreg = gen_rtx_REG (Pmode, 2);
7258 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
7259 }
7260 emit_insn (insn);
7261 }
7262 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
7263 {
7264 rtx tlsreg, tmp;
7265
7266 tmp = gen_reg_rtx (Pmode);
7267 if (TARGET_64BIT)
7268 {
7269 tlsreg = gen_rtx_REG (Pmode, 13);
7270 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
7271 }
7272 else
7273 {
7274 tlsreg = gen_rtx_REG (Pmode, 2);
7275 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
7276 }
7277 emit_insn (insn);
7278 if (TARGET_64BIT)
7279 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
7280 else
7281 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
7282 emit_insn (insn);
7283 }
7284 else
7285 {
7286 rtx r3, got, tga, tmp1, tmp2, call_insn;
7287
7288 /* We currently use relocations like @got@tlsgd for tls, which
7289 means the linker will handle allocation of tls entries, placing
7290 them in the .got section. So use a pointer to the .got section,
7291 not one to secondary TOC sections used by 64-bit -mminimal-toc,
7292 or to secondary GOT sections used by 32-bit -fPIC. */
7293 if (TARGET_64BIT)
7294 got = gen_rtx_REG (Pmode, 2);
7295 else
7296 {
7297 if (flag_pic == 1)
7298 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
7299 else
7300 {
7301 rtx gsym = rs6000_got_sym ();
7302 got = gen_reg_rtx (Pmode);
7303 if (flag_pic == 0)
7304 rs6000_emit_move (got, gsym, Pmode);
7305 else
7306 {
7307 rtx mem, lab, last;
7308
7309 tmp1 = gen_reg_rtx (Pmode);
7310 tmp2 = gen_reg_rtx (Pmode);
7311 mem = gen_const_mem (Pmode, tmp1);
7312 lab = gen_label_rtx ();
7313 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
7314 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
7315 if (TARGET_LINK_STACK)
7316 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
7317 emit_move_insn (tmp2, mem);
7318 last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
7319 set_unique_reg_note (last, REG_EQUAL, gsym);
7320 }
7321 }
7322 }
7323
7324 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
7325 {
7326 tga = rs6000_tls_get_addr ();
7327 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
7328 1, const0_rtx, Pmode);
7329
7330 r3 = gen_rtx_REG (Pmode, 3);
7331 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
7332 {
7333 if (TARGET_64BIT)
7334 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
7335 else
7336 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
7337 }
7338 else if (DEFAULT_ABI == ABI_V4)
7339 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
7340 else
7341 gcc_unreachable ();
7342 call_insn = last_call_insn ();
7343 PATTERN (call_insn) = insn;
7344 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
7345 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
7346 pic_offset_table_rtx);
7347 }
7348 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
7349 {
7350 tga = rs6000_tls_get_addr ();
7351 tmp1 = gen_reg_rtx (Pmode);
7352 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
7353 1, const0_rtx, Pmode);
7354
7355 r3 = gen_rtx_REG (Pmode, 3);
7356 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
7357 {
7358 if (TARGET_64BIT)
7359 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
7360 else
7361 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
7362 }
7363 else if (DEFAULT_ABI == ABI_V4)
7364 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
7365 else
7366 gcc_unreachable ();
7367 call_insn = last_call_insn ();
7368 PATTERN (call_insn) = insn;
7369 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
7370 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
7371 pic_offset_table_rtx);
7372
7373 if (rs6000_tls_size == 16)
7374 {
7375 if (TARGET_64BIT)
7376 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
7377 else
7378 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
7379 }
7380 else if (rs6000_tls_size == 32)
7381 {
7382 tmp2 = gen_reg_rtx (Pmode);
7383 if (TARGET_64BIT)
7384 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
7385 else
7386 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
7387 emit_insn (insn);
7388 if (TARGET_64BIT)
7389 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
7390 else
7391 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
7392 }
7393 else
7394 {
7395 tmp2 = gen_reg_rtx (Pmode);
7396 if (TARGET_64BIT)
7397 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
7398 else
7399 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
7400 emit_insn (insn);
7401 insn = gen_rtx_SET (Pmode, dest,
7402 gen_rtx_PLUS (Pmode, tmp2, tmp1));
7403 }
7404 emit_insn (insn);
7405 }
7406 else
7407 {
7408 /* IE, or 64-bit offset LE. */
7409 tmp2 = gen_reg_rtx (Pmode);
7410 if (TARGET_64BIT)
7411 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
7412 else
7413 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
7414 emit_insn (insn);
7415 if (TARGET_64BIT)
7416 insn = gen_tls_tls_64 (dest, tmp2, addr);
7417 else
7418 insn = gen_tls_tls_32 (dest, tmp2, addr);
7419 emit_insn (insn);
7420 }
7421 }
7422
7423 return dest;
7424 }
7425
7426 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7427
7428 static bool
7429 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7430 {
7431 if (GET_CODE (x) == HIGH
7432 && GET_CODE (XEXP (x, 0)) == UNSPEC)
7433 return true;
7434
7435 /* A TLS symbol in the TOC cannot contain a sum. */
7436 if (GET_CODE (x) == CONST
7437 && GET_CODE (XEXP (x, 0)) == PLUS
7438 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7439 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
7440 return true;
7441
7442 /* Do not place an ELF TLS symbol in the constant pool. */
7443 return TARGET_ELF && tls_referenced_p (x);
7444 }
7445
7446 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
7447 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
7448 can be addressed relative to the toc pointer. */
7449
7450 static bool
7451 use_toc_relative_ref (rtx sym)
7452 {
7453 return ((constant_pool_expr_p (sym)
7454 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
7455 get_pool_mode (sym)))
7456 || (TARGET_CMODEL == CMODEL_MEDIUM
7457 && SYMBOL_REF_LOCAL_P (sym)));
7458 }
7459
7460 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
7461 replace the input X, or the original X if no replacement is called for.
7462 The output parameter *WIN is 1 if the calling macro should goto WIN,
7463 0 if it should not.
7464
7465 For RS/6000, we wish to handle large displacements off a base
7466 register by splitting the addend across an addiu/addis and the mem insn.
7467 This cuts number of extra insns needed from 3 to 1.
7468
7469 On Darwin, we use this to generate code for floating point constants.
7470 A movsf_low is generated so we wind up with 2 instructions rather than 3.
7471 The Darwin code is inside #if TARGET_MACHO because only then are the
7472 machopic_* functions defined. */
7473 static rtx
7474 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
7475 int opnum, int type,
7476 int ind_levels ATTRIBUTE_UNUSED, int *win)
7477 {
7478 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
7479
7480 /* Nasty hack for vsx_splat_V2DF/V2DI load from mem, which takes a
7481 DFmode/DImode MEM. */
7482 if (reg_offset_p
7483 && opnum == 1
7484 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
7485 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)))
7486 reg_offset_p = false;
7487
7488 /* We must recognize output that we have already generated ourselves. */
7489 if (GET_CODE (x) == PLUS
7490 && GET_CODE (XEXP (x, 0)) == PLUS
7491 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7492 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7493 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7494 {
7495 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7496 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
7497 opnum, (enum reload_type) type);
7498 *win = 1;
7499 return x;
7500 }
7501
7502 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
7503 if (GET_CODE (x) == LO_SUM
7504 && GET_CODE (XEXP (x, 0)) == HIGH)
7505 {
7506 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7507 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7508 opnum, (enum reload_type) type);
7509 *win = 1;
7510 return x;
7511 }
7512
7513 #if TARGET_MACHO
7514 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
7515 && GET_CODE (x) == LO_SUM
7516 && GET_CODE (XEXP (x, 0)) == PLUS
7517 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
7518 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
7519 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
7520 && machopic_operand_p (XEXP (x, 1)))
7521 {
7522 /* Result of previous invocation of this function on Darwin
7523 floating point constant. */
7524 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7525 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7526 opnum, (enum reload_type) type);
7527 *win = 1;
7528 return x;
7529 }
7530 #endif
7531
7532 if (TARGET_CMODEL != CMODEL_SMALL
7533 && reg_offset_p
7534 && small_toc_ref (x, VOIDmode))
7535 {
7536 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
7537 x = gen_rtx_LO_SUM (Pmode, hi, x);
7538 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7539 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7540 opnum, (enum reload_type) type);
7541 *win = 1;
7542 return x;
7543 }
7544
7545 if (GET_CODE (x) == PLUS
7546 && GET_CODE (XEXP (x, 0)) == REG
7547 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
7548 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
7549 && GET_CODE (XEXP (x, 1)) == CONST_INT
7550 && reg_offset_p
7551 && !SPE_VECTOR_MODE (mode)
7552 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
7553 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
7554 {
7555 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
7556 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
7557 HOST_WIDE_INT high
7558 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
7559
7560 /* Check for 32-bit overflow. */
7561 if (high + low != val)
7562 {
7563 *win = 0;
7564 return x;
7565 }
7566
7567 /* Reload the high part into a base reg; leave the low part
7568 in the mem directly. */
7569
7570 x = gen_rtx_PLUS (GET_MODE (x),
7571 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
7572 GEN_INT (high)),
7573 GEN_INT (low));
7574
7575 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7576 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
7577 opnum, (enum reload_type) type);
7578 *win = 1;
7579 return x;
7580 }
7581
7582 if (GET_CODE (x) == SYMBOL_REF
7583 && reg_offset_p
7584 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
7585 && !SPE_VECTOR_MODE (mode)
7586 #if TARGET_MACHO
7587 && DEFAULT_ABI == ABI_DARWIN
7588 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
7589 && machopic_symbol_defined_p (x)
7590 #else
7591 && DEFAULT_ABI == ABI_V4
7592 && !flag_pic
7593 #endif
7594 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
7595 The same goes for DImode without 64-bit gprs and DFmode and DDmode
7596 without fprs.
7597 ??? Assume floating point reg based on mode? This assumption is
7598 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
7599 where reload ends up doing a DFmode load of a constant from
7600 mem using two gprs. Unfortunately, at this point reload
7601 hasn't yet selected regs so poking around in reload data
7602 won't help and even if we could figure out the regs reliably,
7603 we'd still want to allow this transformation when the mem is
7604 naturally aligned. Since we say the address is good here, we
7605 can't disable offsets from LO_SUMs in mem_operand_gpr.
7606 FIXME: Allow offset from lo_sum for other modes too, when
7607 mem is sufficiently aligned.
7608
7609 Also disallow this if the type can go in VMX/Altivec registers, since
7610 those registers do not have d-form (reg+offset) address modes. */
7611 && !reg_addr[mode].scalar_in_vmx_p
7612 && mode != TFmode
7613 && mode != TDmode
7614 && (mode != TImode || !TARGET_VSX_TIMODE)
7615 && mode != PTImode
7616 && (mode != DImode || TARGET_POWERPC64)
7617 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
7618 || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
7619 {
7620 #if TARGET_MACHO
7621 if (flag_pic)
7622 {
7623 rtx offset = machopic_gen_offset (x);
7624 x = gen_rtx_LO_SUM (GET_MODE (x),
7625 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
7626 gen_rtx_HIGH (Pmode, offset)), offset);
7627 }
7628 else
7629 #endif
7630 x = gen_rtx_LO_SUM (GET_MODE (x),
7631 gen_rtx_HIGH (Pmode, x), x);
7632
7633 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7634 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7635 opnum, (enum reload_type) type);
7636 *win = 1;
7637 return x;
7638 }
7639
7640 /* Reload an offset address wrapped by an AND that represents the
7641 masking of the lower bits. Strip the outer AND and let reload
7642 convert the offset address into an indirect address. For VSX,
7643 force reload to create the address with an AND in a separate
7644 register, because we can't guarantee an altivec register will
7645 be used. */
7646 if (VECTOR_MEM_ALTIVEC_P (mode)
7647 && GET_CODE (x) == AND
7648 && GET_CODE (XEXP (x, 0)) == PLUS
7649 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7650 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7651 && GET_CODE (XEXP (x, 1)) == CONST_INT
7652 && INTVAL (XEXP (x, 1)) == -16)
7653 {
7654 x = XEXP (x, 0);
7655 *win = 1;
7656 return x;
7657 }
7658
7659 if (TARGET_TOC
7660 && reg_offset_p
7661 && GET_CODE (x) == SYMBOL_REF
7662 && use_toc_relative_ref (x))
7663 {
7664 x = create_TOC_reference (x, NULL_RTX);
7665 if (TARGET_CMODEL != CMODEL_SMALL)
7666 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7667 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7668 opnum, (enum reload_type) type);
7669 *win = 1;
7670 return x;
7671 }
7672 *win = 0;
7673 return x;
7674 }
7675
7676 /* Debug version of rs6000_legitimize_reload_address. */
7677 static rtx
7678 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
7679 int opnum, int type,
7680 int ind_levels, int *win)
7681 {
7682 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
7683 ind_levels, win);
7684 fprintf (stderr,
7685 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
7686 "type = %d, ind_levels = %d, win = %d, original addr:\n",
7687 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
7688 debug_rtx (x);
7689
7690 if (x == ret)
7691 fprintf (stderr, "Same address returned\n");
7692 else if (!ret)
7693 fprintf (stderr, "NULL returned\n");
7694 else
7695 {
7696 fprintf (stderr, "New address:\n");
7697 debug_rtx (ret);
7698 }
7699
7700 return ret;
7701 }
7702
7703 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
7704 that is a valid memory address for an instruction.
7705 The MODE argument is the machine mode for the MEM expression
7706 that wants to use this address.
7707
7708 On the RS/6000, there are four valid address: a SYMBOL_REF that
7709 refers to a constant pool entry of an address (or the sum of it
7710 plus a constant), a short (16-bit signed) constant plus a register,
7711 the sum of two registers, or a register indirect, possibly with an
7712 auto-increment. For DFmode, DDmode and DImode with a constant plus
7713 register, we must ensure that both words are addressable or PowerPC64
7714 with offset word aligned.
7715
7716 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
7717 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
7718 because adjacent memory cells are accessed by adding word-sized offsets
7719 during assembly output. */
7720 static bool
7721 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
7722 {
7723 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
7724
7725 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
7726 if (VECTOR_MEM_ALTIVEC_P (mode)
7727 && GET_CODE (x) == AND
7728 && GET_CODE (XEXP (x, 1)) == CONST_INT
7729 && INTVAL (XEXP (x, 1)) == -16)
7730 x = XEXP (x, 0);
7731
7732 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
7733 return 0;
7734 if (legitimate_indirect_address_p (x, reg_ok_strict))
7735 return 1;
7736 if (TARGET_UPDATE
7737 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
7738 && mode_supports_pre_incdec_p (mode)
7739 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
7740 return 1;
7741 if (virtual_stack_registers_memory_p (x))
7742 return 1;
7743 if (reg_offset_p && legitimate_small_data_p (mode, x))
7744 return 1;
7745 if (reg_offset_p
7746 && legitimate_constant_pool_address_p (x, mode,
7747 reg_ok_strict || lra_in_progress))
7748 return 1;
7749 /* For TImode, if we have load/store quad and TImode in VSX registers, only
7750 allow register indirect addresses. This will allow the values to go in
7751 either GPRs or VSX registers without reloading. The vector types would
7752 tend to go into VSX registers, so we allow REG+REG, while TImode seems
7753 somewhat split, in that some uses are GPR based, and some VSX based. */
7754 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
7755 return 0;
7756 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
7757 if (! reg_ok_strict
7758 && reg_offset_p
7759 && GET_CODE (x) == PLUS
7760 && GET_CODE (XEXP (x, 0)) == REG
7761 && (XEXP (x, 0) == virtual_stack_vars_rtx
7762 || XEXP (x, 0) == arg_pointer_rtx)
7763 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7764 return 1;
7765 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
7766 return 1;
7767 if (mode != TFmode
7768 && mode != TDmode
7769 && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
7770 || TARGET_POWERPC64
7771 || (mode != DFmode && mode != DDmode)
7772 || (TARGET_E500_DOUBLE && mode != DDmode))
7773 && (TARGET_POWERPC64 || mode != DImode)
7774 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
7775 && mode != PTImode
7776 && !avoiding_indexed_address_p (mode)
7777 && legitimate_indexed_address_p (x, reg_ok_strict))
7778 return 1;
7779 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
7780 && mode_supports_pre_modify_p (mode)
7781 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
7782 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
7783 reg_ok_strict, false)
7784 || (!avoiding_indexed_address_p (mode)
7785 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
7786 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7787 return 1;
7788 if (reg_offset_p && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
7789 return 1;
7790 return 0;
7791 }
7792
7793 /* Debug version of rs6000_legitimate_address_p. */
7794 static bool
7795 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
7796 bool reg_ok_strict)
7797 {
7798 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
7799 fprintf (stderr,
7800 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
7801 "strict = %d, reload = %s, code = %s\n",
7802 ret ? "true" : "false",
7803 GET_MODE_NAME (mode),
7804 reg_ok_strict,
7805 (reload_completed
7806 ? "after"
7807 : (reload_in_progress ? "progress" : "before")),
7808 GET_RTX_NAME (GET_CODE (x)));
7809 debug_rtx (x);
7810
7811 return ret;
7812 }
7813
7814 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
7815
7816 static bool
7817 rs6000_mode_dependent_address_p (const_rtx addr,
7818 addr_space_t as ATTRIBUTE_UNUSED)
7819 {
7820 return rs6000_mode_dependent_address_ptr (addr);
7821 }
7822
7823 /* Go to LABEL if ADDR (a legitimate address expression)
7824 has an effect that depends on the machine mode it is used for.
7825
7826 On the RS/6000 this is true of all integral offsets (since AltiVec
7827 and VSX modes don't allow them) or is a pre-increment or decrement.
7828
7829 ??? Except that due to conceptual problems in offsettable_address_p
7830 we can't really report the problems of integral offsets. So leave
7831 this assuming that the adjustable offset must be valid for the
7832 sub-words of a TFmode operand, which is what we had before. */
7833
7834 static bool
7835 rs6000_mode_dependent_address (const_rtx addr)
7836 {
7837 switch (GET_CODE (addr))
7838 {
7839 case PLUS:
7840 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
7841 is considered a legitimate address before reload, so there
7842 are no offset restrictions in that case. Note that this
7843 condition is safe in strict mode because any address involving
7844 virtual_stack_vars_rtx or arg_pointer_rtx would already have
7845 been rejected as illegitimate. */
7846 if (XEXP (addr, 0) != virtual_stack_vars_rtx
7847 && XEXP (addr, 0) != arg_pointer_rtx
7848 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
7849 {
7850 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
7851 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
7852 }
7853 break;
7854
7855 case LO_SUM:
7856 /* Anything in the constant pool is sufficiently aligned that
7857 all bytes have the same high part address. */
7858 return !legitimate_constant_pool_address_p (addr, QImode, false);
7859
7860 /* Auto-increment cases are now treated generically in recog.c. */
7861 case PRE_MODIFY:
7862 return TARGET_UPDATE;
7863
7864 /* AND is only allowed in Altivec loads. */
7865 case AND:
7866 return true;
7867
7868 default:
7869 break;
7870 }
7871
7872 return false;
7873 }
7874
7875 /* Debug version of rs6000_mode_dependent_address. */
7876 static bool
7877 rs6000_debug_mode_dependent_address (const_rtx addr)
7878 {
7879 bool ret = rs6000_mode_dependent_address (addr);
7880
7881 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
7882 ret ? "true" : "false");
7883 debug_rtx (addr);
7884
7885 return ret;
7886 }
7887
7888 /* Implement FIND_BASE_TERM. */
7889
7890 rtx
7891 rs6000_find_base_term (rtx op)
7892 {
7893 rtx base;
7894
7895 base = op;
7896 if (GET_CODE (base) == CONST)
7897 base = XEXP (base, 0);
7898 if (GET_CODE (base) == PLUS)
7899 base = XEXP (base, 0);
7900 if (GET_CODE (base) == UNSPEC)
7901 switch (XINT (base, 1))
7902 {
7903 case UNSPEC_TOCREL:
7904 case UNSPEC_MACHOPIC_OFFSET:
7905 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
7906 for aliasing purposes. */
7907 return XVECEXP (base, 0, 0);
7908 }
7909
7910 return op;
7911 }
7912
7913 /* More elaborate version of recog's offsettable_memref_p predicate
7914 that works around the ??? note of rs6000_mode_dependent_address.
7915 In particular it accepts
7916
7917 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
7918
7919 in 32-bit mode, that the recog predicate rejects. */
7920
7921 static bool
7922 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode)
7923 {
7924 bool worst_case;
7925
7926 if (!MEM_P (op))
7927 return false;
7928
7929 /* First mimic offsettable_memref_p. */
7930 if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
7931 return true;
7932
7933 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
7934 the latter predicate knows nothing about the mode of the memory
7935 reference and, therefore, assumes that it is the largest supported
7936 mode (TFmode). As a consequence, legitimate offsettable memory
7937 references are rejected. rs6000_legitimate_offset_address_p contains
7938 the correct logic for the PLUS case of rs6000_mode_dependent_address,
7939 at least with a little bit of help here given that we know the
7940 actual registers used. */
7941 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
7942 || GET_MODE_SIZE (reg_mode) == 4);
7943 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
7944 true, worst_case);
7945 }
7946
7947 /* Change register usage conditional on target flags. */
7948 static void
7949 rs6000_conditional_register_usage (void)
7950 {
7951 int i;
7952
7953 if (TARGET_DEBUG_TARGET)
7954 fprintf (stderr, "rs6000_conditional_register_usage called\n");
7955
7956 /* Set MQ register fixed (already call_used) so that it will not be
7957 allocated. */
7958 fixed_regs[64] = 1;
7959
7960 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
7961 if (TARGET_64BIT)
7962 fixed_regs[13] = call_used_regs[13]
7963 = call_really_used_regs[13] = 1;
7964
7965 /* Conditionally disable FPRs. */
7966 if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
7967 for (i = 32; i < 64; i++)
7968 fixed_regs[i] = call_used_regs[i]
7969 = call_really_used_regs[i] = 1;
7970
7971 /* The TOC register is not killed across calls in a way that is
7972 visible to the compiler. */
7973 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
7974 call_really_used_regs[2] = 0;
7975
7976 if (DEFAULT_ABI == ABI_V4
7977 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
7978 && flag_pic == 2)
7979 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
7980
7981 if (DEFAULT_ABI == ABI_V4
7982 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
7983 && flag_pic == 1)
7984 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
7985 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
7986 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
7987
7988 if (DEFAULT_ABI == ABI_DARWIN
7989 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
7990 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
7991 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
7992 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
7993
7994 if (TARGET_TOC && TARGET_MINIMAL_TOC)
7995 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
7996 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
7997
7998 if (TARGET_SPE)
7999 {
8000 global_regs[SPEFSCR_REGNO] = 1;
8001 /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
8002 registers in prologues and epilogues. We no longer use r14
8003 for FIXED_SCRATCH, but we're keeping r14 out of the allocation
8004 pool for link-compatibility with older versions of GCC. Once
8005 "old" code has died out, we can return r14 to the allocation
8006 pool. */
8007 fixed_regs[14]
8008 = call_used_regs[14]
8009 = call_really_used_regs[14] = 1;
8010 }
8011
8012 if (!TARGET_ALTIVEC && !TARGET_VSX)
8013 {
8014 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
8015 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
8016 call_really_used_regs[VRSAVE_REGNO] = 1;
8017 }
8018
8019 if (TARGET_ALTIVEC || TARGET_VSX)
8020 global_regs[VSCR_REGNO] = 1;
8021
8022 if (TARGET_ALTIVEC_ABI)
8023 {
8024 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
8025 call_used_regs[i] = call_really_used_regs[i] = 1;
8026
8027 /* AIX reserves VR20:31 in non-extended ABI mode. */
8028 if (TARGET_XCOFF)
8029 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
8030 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
8031 }
8032 }
8033
8034 \f
8035 /* Output insns to set DEST equal to the constant SOURCE as a series of
8036 lis, ori and shl instructions and return TRUE. */
8037
8038 bool
8039 rs6000_emit_set_const (rtx dest, rtx source)
8040 {
8041 machine_mode mode = GET_MODE (dest);
8042 rtx temp, set;
8043 rtx_insn *insn;
8044 HOST_WIDE_INT c;
8045
8046 gcc_checking_assert (CONST_INT_P (source));
8047 c = INTVAL (source);
8048 switch (mode)
8049 {
8050 case QImode:
8051 case HImode:
8052 emit_insn (gen_rtx_SET (VOIDmode, dest, source));
8053 return true;
8054
8055 case SImode:
8056 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
8057
8058 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (temp),
8059 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
8060 emit_insn (gen_rtx_SET (VOIDmode, dest,
8061 gen_rtx_IOR (SImode, copy_rtx (temp),
8062 GEN_INT (c & 0xffff))));
8063 break;
8064
8065 case DImode:
8066 if (!TARGET_POWERPC64)
8067 {
8068 rtx hi, lo;
8069
8070 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
8071 DImode);
8072 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
8073 DImode);
8074 emit_move_insn (hi, GEN_INT (c >> 32));
8075 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
8076 emit_move_insn (lo, GEN_INT (c));
8077 }
8078 else
8079 rs6000_emit_set_long_const (dest, c);
8080 break;
8081
8082 default:
8083 gcc_unreachable ();
8084 }
8085
8086 insn = get_last_insn ();
8087 set = single_set (insn);
8088 if (! CONSTANT_P (SET_SRC (set)))
8089 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
8090
8091 return true;
8092 }
8093
8094 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
8095 Output insns to set DEST equal to the constant C as a series of
8096 lis, ori and shl instructions. */
8097
8098 static void
8099 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
8100 {
8101 rtx temp;
8102 HOST_WIDE_INT ud1, ud2, ud3, ud4;
8103
8104 ud1 = c & 0xffff;
8105 c = c >> 16;
8106 ud2 = c & 0xffff;
8107 c = c >> 16;
8108 ud3 = c & 0xffff;
8109 c = c >> 16;
8110 ud4 = c & 0xffff;
8111
8112 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
8113 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
8114 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
8115
8116 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
8117 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
8118 {
8119 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8120
8121 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8122 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
8123 if (ud1 != 0)
8124 emit_move_insn (dest,
8125 gen_rtx_IOR (DImode, copy_rtx (temp),
8126 GEN_INT (ud1)));
8127 }
8128 else if (ud3 == 0 && ud4 == 0)
8129 {
8130 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8131
8132 gcc_assert (ud2 & 0x8000);
8133 emit_move_insn (copy_rtx (temp),
8134 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
8135 if (ud1 != 0)
8136 emit_move_insn (copy_rtx (temp),
8137 gen_rtx_IOR (DImode, copy_rtx (temp),
8138 GEN_INT (ud1)));
8139 emit_move_insn (dest,
8140 gen_rtx_ZERO_EXTEND (DImode,
8141 gen_lowpart (SImode,
8142 copy_rtx (temp))));
8143 }
8144 else if ((ud4 == 0xffff && (ud3 & 0x8000))
8145 || (ud4 == 0 && ! (ud3 & 0x8000)))
8146 {
8147 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8148
8149 emit_move_insn (copy_rtx (temp),
8150 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
8151 if (ud2 != 0)
8152 emit_move_insn (copy_rtx (temp),
8153 gen_rtx_IOR (DImode, copy_rtx (temp),
8154 GEN_INT (ud2)));
8155 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8156 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
8157 GEN_INT (16)));
8158 if (ud1 != 0)
8159 emit_move_insn (dest,
8160 gen_rtx_IOR (DImode, copy_rtx (temp),
8161 GEN_INT (ud1)));
8162 }
8163 else
8164 {
8165 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8166
8167 emit_move_insn (copy_rtx (temp),
8168 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
8169 if (ud3 != 0)
8170 emit_move_insn (copy_rtx (temp),
8171 gen_rtx_IOR (DImode, copy_rtx (temp),
8172 GEN_INT (ud3)));
8173
8174 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
8175 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
8176 GEN_INT (32)));
8177 if (ud2 != 0)
8178 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8179 gen_rtx_IOR (DImode, copy_rtx (temp),
8180 GEN_INT (ud2 << 16)));
8181 if (ud1 != 0)
8182 emit_move_insn (dest,
8183 gen_rtx_IOR (DImode, copy_rtx (temp),
8184 GEN_INT (ud1)));
8185 }
8186 }
8187
8188 /* Helper for the following. Get rid of [r+r] memory refs
8189 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
8190
8191 static void
8192 rs6000_eliminate_indexed_memrefs (rtx operands[2])
8193 {
8194 if (reload_in_progress)
8195 return;
8196
8197 if (GET_CODE (operands[0]) == MEM
8198 && GET_CODE (XEXP (operands[0], 0)) != REG
8199 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
8200 GET_MODE (operands[0]), false))
8201 operands[0]
8202 = replace_equiv_address (operands[0],
8203 copy_addr_to_reg (XEXP (operands[0], 0)));
8204
8205 if (GET_CODE (operands[1]) == MEM
8206 && GET_CODE (XEXP (operands[1], 0)) != REG
8207 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
8208 GET_MODE (operands[1]), false))
8209 operands[1]
8210 = replace_equiv_address (operands[1],
8211 copy_addr_to_reg (XEXP (operands[1], 0)));
8212 }
8213
8214 /* Generate a vector of constants to permute MODE for a little-endian
8215 storage operation by swapping the two halves of a vector. */
8216 static rtvec
8217 rs6000_const_vec (machine_mode mode)
8218 {
8219 int i, subparts;
8220 rtvec v;
8221
8222 switch (mode)
8223 {
8224 case V1TImode:
8225 subparts = 1;
8226 break;
8227 case V2DFmode:
8228 case V2DImode:
8229 subparts = 2;
8230 break;
8231 case V4SFmode:
8232 case V4SImode:
8233 subparts = 4;
8234 break;
8235 case V8HImode:
8236 subparts = 8;
8237 break;
8238 case V16QImode:
8239 subparts = 16;
8240 break;
8241 default:
8242 gcc_unreachable();
8243 }
8244
8245 v = rtvec_alloc (subparts);
8246
8247 for (i = 0; i < subparts / 2; ++i)
8248 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
8249 for (i = subparts / 2; i < subparts; ++i)
8250 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
8251
8252 return v;
8253 }
8254
8255 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
8256 for a VSX load or store operation. */
8257 rtx
8258 rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
8259 {
8260 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
8261 return gen_rtx_VEC_SELECT (mode, source, par);
8262 }
8263
8264 /* Emit a little-endian load from vector memory location SOURCE to VSX
8265 register DEST in mode MODE. The load is done with two permuting
8266 insn's that represent an lxvd2x and xxpermdi. */
8267 void
8268 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
8269 {
8270 rtx tmp, permute_mem, permute_reg;
8271
8272 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
8273 V1TImode). */
8274 if (mode == TImode || mode == V1TImode)
8275 {
8276 mode = V2DImode;
8277 dest = gen_lowpart (V2DImode, dest);
8278 source = adjust_address (source, V2DImode, 0);
8279 }
8280
8281 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
8282 permute_mem = rs6000_gen_le_vsx_permute (source, mode);
8283 permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
8284 emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_mem));
8285 emit_insn (gen_rtx_SET (VOIDmode, dest, permute_reg));
8286 }
8287
8288 /* Emit a little-endian store to vector memory location DEST from VSX
8289 register SOURCE in mode MODE. The store is done with two permuting
8290 insn's that represent an xxpermdi and an stxvd2x. */
8291 void
8292 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
8293 {
8294 rtx tmp, permute_src, permute_tmp;
8295
8296 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
8297 V1TImode). */
8298 if (mode == TImode || mode == V1TImode)
8299 {
8300 mode = V2DImode;
8301 dest = adjust_address (dest, V2DImode, 0);
8302 source = gen_lowpart (V2DImode, source);
8303 }
8304
8305 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
8306 permute_src = rs6000_gen_le_vsx_permute (source, mode);
8307 permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
8308 emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_src));
8309 emit_insn (gen_rtx_SET (VOIDmode, dest, permute_tmp));
8310 }
8311
8312 /* Emit a sequence representing a little-endian VSX load or store,
8313 moving data from SOURCE to DEST in mode MODE. This is done
8314 separately from rs6000_emit_move to ensure it is called only
8315 during expand. LE VSX loads and stores introduced later are
8316 handled with a split. The expand-time RTL generation allows
8317 us to optimize away redundant pairs of register-permutes. */
8318 void
8319 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
8320 {
8321 gcc_assert (!BYTES_BIG_ENDIAN
8322 && VECTOR_MEM_VSX_P (mode)
8323 && !gpr_or_gpr_p (dest, source)
8324 && (MEM_P (source) ^ MEM_P (dest)));
8325
8326 if (MEM_P (source))
8327 {
8328 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
8329 rs6000_emit_le_vsx_load (dest, source, mode);
8330 }
8331 else
8332 {
8333 if (!REG_P (source))
8334 source = force_reg (mode, source);
8335 rs6000_emit_le_vsx_store (dest, source, mode);
8336 }
8337 }
8338
8339 /* Emit a move from SOURCE to DEST in mode MODE. */
8340 void
8341 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
8342 {
8343 rtx operands[2];
8344 operands[0] = dest;
8345 operands[1] = source;
8346
8347 if (TARGET_DEBUG_ADDR)
8348 {
8349 fprintf (stderr,
8350 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
8351 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
8352 GET_MODE_NAME (mode),
8353 reload_in_progress,
8354 reload_completed,
8355 can_create_pseudo_p ());
8356 debug_rtx (dest);
8357 fprintf (stderr, "source:\n");
8358 debug_rtx (source);
8359 }
8360
8361 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
8362 if (CONST_WIDE_INT_P (operands[1])
8363 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
8364 {
8365 /* This should be fixed with the introduction of CONST_WIDE_INT. */
8366 gcc_unreachable ();
8367 }
8368
8369 /* Check if GCC is setting up a block move that will end up using FP
8370 registers as temporaries. We must make sure this is acceptable. */
8371 if (GET_CODE (operands[0]) == MEM
8372 && GET_CODE (operands[1]) == MEM
8373 && mode == DImode
8374 && (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0]))
8375 || SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1])))
8376 && ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32
8377 ? 32 : MEM_ALIGN (operands[0])))
8378 || SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32
8379 ? 32
8380 : MEM_ALIGN (operands[1]))))
8381 && ! MEM_VOLATILE_P (operands [0])
8382 && ! MEM_VOLATILE_P (operands [1]))
8383 {
8384 emit_move_insn (adjust_address (operands[0], SImode, 0),
8385 adjust_address (operands[1], SImode, 0));
8386 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
8387 adjust_address (copy_rtx (operands[1]), SImode, 4));
8388 return;
8389 }
8390
8391 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
8392 && !gpc_reg_operand (operands[1], mode))
8393 operands[1] = force_reg (mode, operands[1]);
8394
8395 /* Recognize the case where operand[1] is a reference to thread-local
8396 data and load its address to a register. */
8397 if (tls_referenced_p (operands[1]))
8398 {
8399 enum tls_model model;
8400 rtx tmp = operands[1];
8401 rtx addend = NULL;
8402
8403 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
8404 {
8405 addend = XEXP (XEXP (tmp, 0), 1);
8406 tmp = XEXP (XEXP (tmp, 0), 0);
8407 }
8408
8409 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
8410 model = SYMBOL_REF_TLS_MODEL (tmp);
8411 gcc_assert (model != 0);
8412
8413 tmp = rs6000_legitimize_tls_address (tmp, model);
8414 if (addend)
8415 {
8416 tmp = gen_rtx_PLUS (mode, tmp, addend);
8417 tmp = force_operand (tmp, operands[0]);
8418 }
8419 operands[1] = tmp;
8420 }
8421
8422 /* Handle the case where reload calls us with an invalid address. */
8423 if (reload_in_progress && mode == Pmode
8424 && (! general_operand (operands[1], mode)
8425 || ! nonimmediate_operand (operands[0], mode)))
8426 goto emit_set;
8427
8428 /* 128-bit constant floating-point values on Darwin should really be loaded
8429 as two parts. However, this premature splitting is a problem when DFmode
8430 values can go into Altivec registers. */
8431 if (!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
8432 && !reg_addr[DFmode].scalar_in_vmx_p
8433 && mode == TFmode && GET_CODE (operands[1]) == CONST_DOUBLE)
8434 {
8435 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
8436 simplify_gen_subreg (DFmode, operands[1], mode, 0),
8437 DFmode);
8438 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
8439 GET_MODE_SIZE (DFmode)),
8440 simplify_gen_subreg (DFmode, operands[1], mode,
8441 GET_MODE_SIZE (DFmode)),
8442 DFmode);
8443 return;
8444 }
8445
8446 if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
8447 cfun->machine->sdmode_stack_slot =
8448 eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
8449
8450
8451 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
8452 p1:SD) if p1 is not of floating point class and p0 is spilled as
8453 we can have no analogous movsd_store for this. */
8454 if (lra_in_progress && mode == DDmode
8455 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
8456 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
8457 && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
8458 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
8459 {
8460 enum reg_class cl;
8461 int regno = REGNO (SUBREG_REG (operands[1]));
8462
8463 if (regno >= FIRST_PSEUDO_REGISTER)
8464 {
8465 cl = reg_preferred_class (regno);
8466 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
8467 }
8468 if (regno >= 0 && ! FP_REGNO_P (regno))
8469 {
8470 mode = SDmode;
8471 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
8472 operands[1] = SUBREG_REG (operands[1]);
8473 }
8474 }
8475 if (lra_in_progress
8476 && mode == SDmode
8477 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
8478 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
8479 && (REG_P (operands[1])
8480 || (GET_CODE (operands[1]) == SUBREG
8481 && REG_P (SUBREG_REG (operands[1])))))
8482 {
8483 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
8484 ? SUBREG_REG (operands[1]) : operands[1]);
8485 enum reg_class cl;
8486
8487 if (regno >= FIRST_PSEUDO_REGISTER)
8488 {
8489 cl = reg_preferred_class (regno);
8490 gcc_assert (cl != NO_REGS);
8491 regno = ira_class_hard_regs[cl][0];
8492 }
8493 if (FP_REGNO_P (regno))
8494 {
8495 if (GET_MODE (operands[0]) != DDmode)
8496 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
8497 emit_insn (gen_movsd_store (operands[0], operands[1]));
8498 }
8499 else if (INT_REGNO_P (regno))
8500 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
8501 else
8502 gcc_unreachable();
8503 return;
8504 }
8505 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
8506 p:DD)) if p0 is not of floating point class and p1 is spilled as
8507 we can have no analogous movsd_load for this. */
8508 if (lra_in_progress && mode == DDmode
8509 && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
8510 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
8511 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
8512 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
8513 {
8514 enum reg_class cl;
8515 int regno = REGNO (SUBREG_REG (operands[0]));
8516
8517 if (regno >= FIRST_PSEUDO_REGISTER)
8518 {
8519 cl = reg_preferred_class (regno);
8520 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
8521 }
8522 if (regno >= 0 && ! FP_REGNO_P (regno))
8523 {
8524 mode = SDmode;
8525 operands[0] = SUBREG_REG (operands[0]);
8526 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
8527 }
8528 }
8529 if (lra_in_progress
8530 && mode == SDmode
8531 && (REG_P (operands[0])
8532 || (GET_CODE (operands[0]) == SUBREG
8533 && REG_P (SUBREG_REG (operands[0]))))
8534 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
8535 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
8536 {
8537 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
8538 ? SUBREG_REG (operands[0]) : operands[0]);
8539 enum reg_class cl;
8540
8541 if (regno >= FIRST_PSEUDO_REGISTER)
8542 {
8543 cl = reg_preferred_class (regno);
8544 gcc_assert (cl != NO_REGS);
8545 regno = ira_class_hard_regs[cl][0];
8546 }
8547 if (FP_REGNO_P (regno))
8548 {
8549 if (GET_MODE (operands[1]) != DDmode)
8550 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
8551 emit_insn (gen_movsd_load (operands[0], operands[1]));
8552 }
8553 else if (INT_REGNO_P (regno))
8554 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
8555 else
8556 gcc_unreachable();
8557 return;
8558 }
8559
8560 if (reload_in_progress
8561 && mode == SDmode
8562 && cfun->machine->sdmode_stack_slot != NULL_RTX
8563 && MEM_P (operands[0])
8564 && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
8565 && REG_P (operands[1]))
8566 {
8567 if (FP_REGNO_P (REGNO (operands[1])))
8568 {
8569 rtx mem = adjust_address_nv (operands[0], DDmode, 0);
8570 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8571 emit_insn (gen_movsd_store (mem, operands[1]));
8572 }
8573 else if (INT_REGNO_P (REGNO (operands[1])))
8574 {
8575 rtx mem = operands[0];
8576 if (BYTES_BIG_ENDIAN)
8577 mem = adjust_address_nv (mem, mode, 4);
8578 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8579 emit_insn (gen_movsd_hardfloat (mem, operands[1]));
8580 }
8581 else
8582 gcc_unreachable();
8583 return;
8584 }
8585 if (reload_in_progress
8586 && mode == SDmode
8587 && REG_P (operands[0])
8588 && MEM_P (operands[1])
8589 && cfun->machine->sdmode_stack_slot != NULL_RTX
8590 && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
8591 {
8592 if (FP_REGNO_P (REGNO (operands[0])))
8593 {
8594 rtx mem = adjust_address_nv (operands[1], DDmode, 0);
8595 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8596 emit_insn (gen_movsd_load (operands[0], mem));
8597 }
8598 else if (INT_REGNO_P (REGNO (operands[0])))
8599 {
8600 rtx mem = operands[1];
8601 if (BYTES_BIG_ENDIAN)
8602 mem = adjust_address_nv (mem, mode, 4);
8603 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8604 emit_insn (gen_movsd_hardfloat (operands[0], mem));
8605 }
8606 else
8607 gcc_unreachable();
8608 return;
8609 }
8610
8611 /* FIXME: In the long term, this switch statement should go away
8612 and be replaced by a sequence of tests based on things like
8613 mode == Pmode. */
8614 switch (mode)
8615 {
8616 case HImode:
8617 case QImode:
8618 if (CONSTANT_P (operands[1])
8619 && GET_CODE (operands[1]) != CONST_INT)
8620 operands[1] = force_const_mem (mode, operands[1]);
8621 break;
8622
8623 case TFmode:
8624 case TDmode:
8625 rs6000_eliminate_indexed_memrefs (operands);
8626 /* fall through */
8627
8628 case DFmode:
8629 case DDmode:
8630 case SFmode:
8631 case SDmode:
8632 if (CONSTANT_P (operands[1])
8633 && ! easy_fp_constant (operands[1], mode))
8634 operands[1] = force_const_mem (mode, operands[1]);
8635 break;
8636
8637 case V16QImode:
8638 case V8HImode:
8639 case V4SFmode:
8640 case V4SImode:
8641 case V4HImode:
8642 case V2SFmode:
8643 case V2SImode:
8644 case V1DImode:
8645 case V2DFmode:
8646 case V2DImode:
8647 case V1TImode:
8648 if (CONSTANT_P (operands[1])
8649 && !easy_vector_constant (operands[1], mode))
8650 operands[1] = force_const_mem (mode, operands[1]);
8651 break;
8652
8653 case SImode:
8654 case DImode:
8655 /* Use default pattern for address of ELF small data */
8656 if (TARGET_ELF
8657 && mode == Pmode
8658 && DEFAULT_ABI == ABI_V4
8659 && (GET_CODE (operands[1]) == SYMBOL_REF
8660 || GET_CODE (operands[1]) == CONST)
8661 && small_data_operand (operands[1], mode))
8662 {
8663 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8664 return;
8665 }
8666
8667 if (DEFAULT_ABI == ABI_V4
8668 && mode == Pmode && mode == SImode
8669 && flag_pic == 1 && got_operand (operands[1], mode))
8670 {
8671 emit_insn (gen_movsi_got (operands[0], operands[1]));
8672 return;
8673 }
8674
8675 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
8676 && TARGET_NO_TOC
8677 && ! flag_pic
8678 && mode == Pmode
8679 && CONSTANT_P (operands[1])
8680 && GET_CODE (operands[1]) != HIGH
8681 && GET_CODE (operands[1]) != CONST_INT)
8682 {
8683 rtx target = (!can_create_pseudo_p ()
8684 ? operands[0]
8685 : gen_reg_rtx (mode));
8686
8687 /* If this is a function address on -mcall-aixdesc,
8688 convert it to the address of the descriptor. */
8689 if (DEFAULT_ABI == ABI_AIX
8690 && GET_CODE (operands[1]) == SYMBOL_REF
8691 && XSTR (operands[1], 0)[0] == '.')
8692 {
8693 const char *name = XSTR (operands[1], 0);
8694 rtx new_ref;
8695 while (*name == '.')
8696 name++;
8697 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
8698 CONSTANT_POOL_ADDRESS_P (new_ref)
8699 = CONSTANT_POOL_ADDRESS_P (operands[1]);
8700 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
8701 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
8702 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
8703 operands[1] = new_ref;
8704 }
8705
8706 if (DEFAULT_ABI == ABI_DARWIN)
8707 {
8708 #if TARGET_MACHO
8709 if (MACHO_DYNAMIC_NO_PIC_P)
8710 {
8711 /* Take care of any required data indirection. */
8712 operands[1] = rs6000_machopic_legitimize_pic_address (
8713 operands[1], mode, operands[0]);
8714 if (operands[0] != operands[1])
8715 emit_insn (gen_rtx_SET (VOIDmode,
8716 operands[0], operands[1]));
8717 return;
8718 }
8719 #endif
8720 emit_insn (gen_macho_high (target, operands[1]));
8721 emit_insn (gen_macho_low (operands[0], target, operands[1]));
8722 return;
8723 }
8724
8725 emit_insn (gen_elf_high (target, operands[1]));
8726 emit_insn (gen_elf_low (operands[0], target, operands[1]));
8727 return;
8728 }
8729
8730 /* If this is a SYMBOL_REF that refers to a constant pool entry,
8731 and we have put it in the TOC, we just need to make a TOC-relative
8732 reference to it. */
8733 if (TARGET_TOC
8734 && GET_CODE (operands[1]) == SYMBOL_REF
8735 && use_toc_relative_ref (operands[1]))
8736 operands[1] = create_TOC_reference (operands[1], operands[0]);
8737 else if (mode == Pmode
8738 && CONSTANT_P (operands[1])
8739 && GET_CODE (operands[1]) != HIGH
8740 && ((GET_CODE (operands[1]) != CONST_INT
8741 && ! easy_fp_constant (operands[1], mode))
8742 || (GET_CODE (operands[1]) == CONST_INT
8743 && (num_insns_constant (operands[1], mode)
8744 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
8745 || (GET_CODE (operands[0]) == REG
8746 && FP_REGNO_P (REGNO (operands[0]))))
8747 && !toc_relative_expr_p (operands[1], false)
8748 && (TARGET_CMODEL == CMODEL_SMALL
8749 || can_create_pseudo_p ()
8750 || (REG_P (operands[0])
8751 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
8752 {
8753
8754 #if TARGET_MACHO
8755 /* Darwin uses a special PIC legitimizer. */
8756 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
8757 {
8758 operands[1] =
8759 rs6000_machopic_legitimize_pic_address (operands[1], mode,
8760 operands[0]);
8761 if (operands[0] != operands[1])
8762 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8763 return;
8764 }
8765 #endif
8766
8767 /* If we are to limit the number of things we put in the TOC and
8768 this is a symbol plus a constant we can add in one insn,
8769 just put the symbol in the TOC and add the constant. Don't do
8770 this if reload is in progress. */
8771 if (GET_CODE (operands[1]) == CONST
8772 && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
8773 && GET_CODE (XEXP (operands[1], 0)) == PLUS
8774 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
8775 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
8776 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
8777 && ! side_effects_p (operands[0]))
8778 {
8779 rtx sym =
8780 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
8781 rtx other = XEXP (XEXP (operands[1], 0), 1);
8782
8783 sym = force_reg (mode, sym);
8784 emit_insn (gen_add3_insn (operands[0], sym, other));
8785 return;
8786 }
8787
8788 operands[1] = force_const_mem (mode, operands[1]);
8789
8790 if (TARGET_TOC
8791 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8792 && constant_pool_expr_p (XEXP (operands[1], 0))
8793 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (
8794 get_pool_constant (XEXP (operands[1], 0)),
8795 get_pool_mode (XEXP (operands[1], 0))))
8796 {
8797 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
8798 operands[0]);
8799 operands[1] = gen_const_mem (mode, tocref);
8800 set_mem_alias_set (operands[1], get_TOC_alias_set ());
8801 }
8802 }
8803 break;
8804
8805 case TImode:
8806 if (!VECTOR_MEM_VSX_P (TImode))
8807 rs6000_eliminate_indexed_memrefs (operands);
8808 break;
8809
8810 case PTImode:
8811 rs6000_eliminate_indexed_memrefs (operands);
8812 break;
8813
8814 default:
8815 fatal_insn ("bad move", gen_rtx_SET (VOIDmode, dest, source));
8816 }
8817
8818 /* Above, we may have called force_const_mem which may have returned
8819 an invalid address. If we can, fix this up; otherwise, reload will
8820 have to deal with it. */
8821 if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
8822 operands[1] = validize_mem (operands[1]);
8823
8824 emit_set:
8825 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8826 }
8827
8828 /* Return true if a structure, union or array containing FIELD should be
8829 accessed using `BLKMODE'.
8830
8831 For the SPE, simd types are V2SI, and gcc can be tempted to put the
8832 entire thing in a DI and use subregs to access the internals.
8833 store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
8834 back-end. Because a single GPR can hold a V2SI, but not a DI, the
8835 best thing to do is set structs to BLKmode and avoid Severe Tire
8836 Damage.
8837
8838 On e500 v2, DF and DI modes suffer from the same anomaly. DF can
8839 fit into 1, whereas DI still needs two. */
8840
8841 static bool
8842 rs6000_member_type_forces_blk (const_tree field, machine_mode mode)
8843 {
8844 return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
8845 || (TARGET_E500_DOUBLE && mode == DFmode));
8846 }
8847 \f
8848 /* Nonzero if we can use a floating-point register to pass this arg. */
8849 #define USE_FP_FOR_ARG_P(CUM,MODE) \
8850 (SCALAR_FLOAT_MODE_P (MODE) \
8851 && (CUM)->fregno <= FP_ARG_MAX_REG \
8852 && TARGET_HARD_FLOAT && TARGET_FPRS)
8853
8854 /* Nonzero if we can use an AltiVec register to pass this arg. */
8855 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
8856 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
8857 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
8858 && TARGET_ALTIVEC_ABI \
8859 && (NAMED))
8860
8861 /* Walk down the type tree of TYPE counting consecutive base elements.
8862 If *MODEP is VOIDmode, then set it to the first valid floating point
8863 or vector type. If a non-floating point or vector type is found, or
8864 if a floating point or vector type that doesn't match a non-VOIDmode
8865 *MODEP is found, then return -1, otherwise return the count in the
8866 sub-tree. */
8867
8868 static int
8869 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
8870 {
8871 machine_mode mode;
8872 HOST_WIDE_INT size;
8873
8874 switch (TREE_CODE (type))
8875 {
8876 case REAL_TYPE:
8877 mode = TYPE_MODE (type);
8878 if (!SCALAR_FLOAT_MODE_P (mode))
8879 return -1;
8880
8881 if (*modep == VOIDmode)
8882 *modep = mode;
8883
8884 if (*modep == mode)
8885 return 1;
8886
8887 break;
8888
8889 case COMPLEX_TYPE:
8890 mode = TYPE_MODE (TREE_TYPE (type));
8891 if (!SCALAR_FLOAT_MODE_P (mode))
8892 return -1;
8893
8894 if (*modep == VOIDmode)
8895 *modep = mode;
8896
8897 if (*modep == mode)
8898 return 2;
8899
8900 break;
8901
8902 case VECTOR_TYPE:
8903 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
8904 return -1;
8905
8906 /* Use V4SImode as representative of all 128-bit vector types. */
8907 size = int_size_in_bytes (type);
8908 switch (size)
8909 {
8910 case 16:
8911 mode = V4SImode;
8912 break;
8913 default:
8914 return -1;
8915 }
8916
8917 if (*modep == VOIDmode)
8918 *modep = mode;
8919
8920 /* Vector modes are considered to be opaque: two vectors are
8921 equivalent for the purposes of being homogeneous aggregates
8922 if they are the same size. */
8923 if (*modep == mode)
8924 return 1;
8925
8926 break;
8927
8928 case ARRAY_TYPE:
8929 {
8930 int count;
8931 tree index = TYPE_DOMAIN (type);
8932
8933 /* Can't handle incomplete types nor sizes that are not
8934 fixed. */
8935 if (!COMPLETE_TYPE_P (type)
8936 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
8937 return -1;
8938
8939 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
8940 if (count == -1
8941 || !index
8942 || !TYPE_MAX_VALUE (index)
8943 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
8944 || !TYPE_MIN_VALUE (index)
8945 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
8946 || count < 0)
8947 return -1;
8948
8949 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
8950 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
8951
8952 /* There must be no padding. */
8953 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
8954 return -1;
8955
8956 return count;
8957 }
8958
8959 case RECORD_TYPE:
8960 {
8961 int count = 0;
8962 int sub_count;
8963 tree field;
8964
8965 /* Can't handle incomplete types nor sizes that are not
8966 fixed. */
8967 if (!COMPLETE_TYPE_P (type)
8968 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
8969 return -1;
8970
8971 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
8972 {
8973 if (TREE_CODE (field) != FIELD_DECL)
8974 continue;
8975
8976 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
8977 if (sub_count < 0)
8978 return -1;
8979 count += sub_count;
8980 }
8981
8982 /* There must be no padding. */
8983 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
8984 return -1;
8985
8986 return count;
8987 }
8988
8989 case UNION_TYPE:
8990 case QUAL_UNION_TYPE:
8991 {
8992 /* These aren't very interesting except in a degenerate case. */
8993 int count = 0;
8994 int sub_count;
8995 tree field;
8996
8997 /* Can't handle incomplete types nor sizes that are not
8998 fixed. */
8999 if (!COMPLETE_TYPE_P (type)
9000 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
9001 return -1;
9002
9003 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
9004 {
9005 if (TREE_CODE (field) != FIELD_DECL)
9006 continue;
9007
9008 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
9009 if (sub_count < 0)
9010 return -1;
9011 count = count > sub_count ? count : sub_count;
9012 }
9013
9014 /* There must be no padding. */
9015 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
9016 return -1;
9017
9018 return count;
9019 }
9020
9021 default:
9022 break;
9023 }
9024
9025 return -1;
9026 }
9027
9028 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
9029 float or vector aggregate that shall be passed in FP/vector registers
9030 according to the ELFv2 ABI, return the homogeneous element mode in
9031 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
9032
9033 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
9034
9035 static bool
9036 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
9037 machine_mode *elt_mode,
9038 int *n_elts)
9039 {
9040 /* Note that we do not accept complex types at the top level as
9041 homogeneous aggregates; these types are handled via the
9042 targetm.calls.split_complex_arg mechanism. Complex types
9043 can be elements of homogeneous aggregates, however. */
9044 if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
9045 {
9046 machine_mode field_mode = VOIDmode;
9047 int field_count = rs6000_aggregate_candidate (type, &field_mode);
9048
9049 if (field_count > 0)
9050 {
9051 int n_regs = (SCALAR_FLOAT_MODE_P (field_mode)?
9052 (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
9053
9054 /* The ELFv2 ABI allows homogeneous aggregates to occupy
9055 up to AGGR_ARG_NUM_REG registers. */
9056 if (field_count * n_regs <= AGGR_ARG_NUM_REG)
9057 {
9058 if (elt_mode)
9059 *elt_mode = field_mode;
9060 if (n_elts)
9061 *n_elts = field_count;
9062 return true;
9063 }
9064 }
9065 }
9066
9067 if (elt_mode)
9068 *elt_mode = mode;
9069 if (n_elts)
9070 *n_elts = 1;
9071 return false;
9072 }
9073
9074 /* Return a nonzero value to say to return the function value in
9075 memory, just as large structures are always returned. TYPE will be
9076 the data type of the value, and FNTYPE will be the type of the
9077 function doing the returning, or @code{NULL} for libcalls.
9078
9079 The AIX ABI for the RS/6000 specifies that all structures are
9080 returned in memory. The Darwin ABI does the same.
9081
9082 For the Darwin 64 Bit ABI, a function result can be returned in
9083 registers or in memory, depending on the size of the return data
9084 type. If it is returned in registers, the value occupies the same
9085 registers as it would if it were the first and only function
9086 argument. Otherwise, the function places its result in memory at
9087 the location pointed to by GPR3.
9088
9089 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
9090 but a draft put them in memory, and GCC used to implement the draft
9091 instead of the final standard. Therefore, aix_struct_return
9092 controls this instead of DEFAULT_ABI; V.4 targets needing backward
9093 compatibility can change DRAFT_V4_STRUCT_RET to override the
9094 default, and -m switches get the final word. See
9095 rs6000_option_override_internal for more details.
9096
9097 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
9098 long double support is enabled. These values are returned in memory.
9099
9100 int_size_in_bytes returns -1 for variable size objects, which go in
9101 memory always. The cast to unsigned makes -1 > 8. */
9102
9103 static bool
9104 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9105 {
9106 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
9107 if (TARGET_MACHO
9108 && rs6000_darwin64_abi
9109 && TREE_CODE (type) == RECORD_TYPE
9110 && int_size_in_bytes (type) > 0)
9111 {
9112 CUMULATIVE_ARGS valcum;
9113 rtx valret;
9114
9115 valcum.words = 0;
9116 valcum.fregno = FP_ARG_MIN_REG;
9117 valcum.vregno = ALTIVEC_ARG_MIN_REG;
9118 /* Do a trial code generation as if this were going to be passed
9119 as an argument; if any part goes in memory, we return NULL. */
9120 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
9121 if (valret)
9122 return false;
9123 /* Otherwise fall through to more conventional ABI rules. */
9124 }
9125
9126 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
9127 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
9128 NULL, NULL))
9129 return false;
9130
9131 /* The ELFv2 ABI returns aggregates up to 16B in registers */
9132 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
9133 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
9134 return false;
9135
9136 if (AGGREGATE_TYPE_P (type)
9137 && (aix_struct_return
9138 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
9139 return true;
9140
9141 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
9142 modes only exist for GCC vector types if -maltivec. */
9143 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
9144 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
9145 return false;
9146
9147 /* Return synthetic vectors in memory. */
9148 if (TREE_CODE (type) == VECTOR_TYPE
9149 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
9150 {
9151 static bool warned_for_return_big_vectors = false;
9152 if (!warned_for_return_big_vectors)
9153 {
9154 warning (0, "GCC vector returned by reference: "
9155 "non-standard ABI extension with no compatibility guarantee");
9156 warned_for_return_big_vectors = true;
9157 }
9158 return true;
9159 }
9160
9161 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD && TYPE_MODE (type) == TFmode)
9162 return true;
9163
9164 return false;
9165 }
9166
9167 /* Specify whether values returned in registers should be at the most
9168 significant end of a register. We want aggregates returned by
9169 value to match the way aggregates are passed to functions. */
9170
9171 static bool
9172 rs6000_return_in_msb (const_tree valtype)
9173 {
9174 return (DEFAULT_ABI == ABI_ELFv2
9175 && BYTES_BIG_ENDIAN
9176 && AGGREGATE_TYPE_P (valtype)
9177 && FUNCTION_ARG_PADDING (TYPE_MODE (valtype), valtype) == upward);
9178 }
9179
9180 #ifdef HAVE_AS_GNU_ATTRIBUTE
9181 /* Return TRUE if a call to function FNDECL may be one that
9182 potentially affects the function calling ABI of the object file. */
9183
9184 static bool
9185 call_ABI_of_interest (tree fndecl)
9186 {
9187 if (symtab->state == EXPANSION)
9188 {
9189 struct cgraph_node *c_node;
9190
9191 /* Libcalls are always interesting. */
9192 if (fndecl == NULL_TREE)
9193 return true;
9194
9195 /* Any call to an external function is interesting. */
9196 if (DECL_EXTERNAL (fndecl))
9197 return true;
9198
9199 /* Interesting functions that we are emitting in this object file. */
9200 c_node = cgraph_node::get (fndecl);
9201 c_node = c_node->ultimate_alias_target ();
9202 return !c_node->only_called_directly_p ();
9203 }
9204 return false;
9205 }
9206 #endif
9207
9208 /* Initialize a variable CUM of type CUMULATIVE_ARGS
9209 for a call to a function whose data type is FNTYPE.
9210 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
9211
9212 For incoming args we set the number of arguments in the prototype large
9213 so we never return a PARALLEL. */
9214
9215 void
9216 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
9217 rtx libname ATTRIBUTE_UNUSED, int incoming,
9218 int libcall, int n_named_args,
9219 tree fndecl ATTRIBUTE_UNUSED,
9220 machine_mode return_mode ATTRIBUTE_UNUSED)
9221 {
9222 static CUMULATIVE_ARGS zero_cumulative;
9223
9224 *cum = zero_cumulative;
9225 cum->words = 0;
9226 cum->fregno = FP_ARG_MIN_REG;
9227 cum->vregno = ALTIVEC_ARG_MIN_REG;
9228 cum->prototype = (fntype && prototype_p (fntype));
9229 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
9230 ? CALL_LIBCALL : CALL_NORMAL);
9231 cum->sysv_gregno = GP_ARG_MIN_REG;
9232 cum->stdarg = stdarg_p (fntype);
9233
9234 cum->nargs_prototype = 0;
9235 if (incoming || cum->prototype)
9236 cum->nargs_prototype = n_named_args;
9237
9238 /* Check for a longcall attribute. */
9239 if ((!fntype && rs6000_default_long_calls)
9240 || (fntype
9241 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
9242 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
9243 cum->call_cookie |= CALL_LONG;
9244
9245 if (TARGET_DEBUG_ARG)
9246 {
9247 fprintf (stderr, "\ninit_cumulative_args:");
9248 if (fntype)
9249 {
9250 tree ret_type = TREE_TYPE (fntype);
9251 fprintf (stderr, " ret code = %s,",
9252 get_tree_code_name (TREE_CODE (ret_type)));
9253 }
9254
9255 if (cum->call_cookie & CALL_LONG)
9256 fprintf (stderr, " longcall,");
9257
9258 fprintf (stderr, " proto = %d, nargs = %d\n",
9259 cum->prototype, cum->nargs_prototype);
9260 }
9261
9262 #ifdef HAVE_AS_GNU_ATTRIBUTE
9263 if (DEFAULT_ABI == ABI_V4)
9264 {
9265 cum->escapes = call_ABI_of_interest (fndecl);
9266 if (cum->escapes)
9267 {
9268 tree return_type;
9269
9270 if (fntype)
9271 {
9272 return_type = TREE_TYPE (fntype);
9273 return_mode = TYPE_MODE (return_type);
9274 }
9275 else
9276 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
9277
9278 if (return_type != NULL)
9279 {
9280 if (TREE_CODE (return_type) == RECORD_TYPE
9281 && TYPE_TRANSPARENT_AGGR (return_type))
9282 {
9283 return_type = TREE_TYPE (first_field (return_type));
9284 return_mode = TYPE_MODE (return_type);
9285 }
9286 if (AGGREGATE_TYPE_P (return_type)
9287 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
9288 <= 8))
9289 rs6000_returns_struct = true;
9290 }
9291 if (SCALAR_FLOAT_MODE_P (return_mode))
9292 rs6000_passes_float = true;
9293 else if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
9294 || SPE_VECTOR_MODE (return_mode))
9295 rs6000_passes_vector = true;
9296 }
9297 }
9298 #endif
9299
9300 if (fntype
9301 && !TARGET_ALTIVEC
9302 && TARGET_ALTIVEC_ABI
9303 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
9304 {
9305 error ("cannot return value in vector register because"
9306 " altivec instructions are disabled, use -maltivec"
9307 " to enable them");
9308 }
9309 }
9310 \f
9311 /* Return true if TYPE must be passed on the stack and not in registers. */
9312
9313 static bool
9314 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
9315 {
9316 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
9317 return must_pass_in_stack_var_size (mode, type);
9318 else
9319 return must_pass_in_stack_var_size_or_pad (mode, type);
9320 }
9321
9322 /* If defined, a C expression which determines whether, and in which
9323 direction, to pad out an argument with extra space. The value
9324 should be of type `enum direction': either `upward' to pad above
9325 the argument, `downward' to pad below, or `none' to inhibit
9326 padding.
9327
9328 For the AIX ABI structs are always stored left shifted in their
9329 argument slot. */
9330
9331 enum direction
9332 function_arg_padding (machine_mode mode, const_tree type)
9333 {
9334 #ifndef AGGREGATE_PADDING_FIXED
9335 #define AGGREGATE_PADDING_FIXED 0
9336 #endif
9337 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
9338 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
9339 #endif
9340
9341 if (!AGGREGATE_PADDING_FIXED)
9342 {
9343 /* GCC used to pass structures of the same size as integer types as
9344 if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
9345 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
9346 passed padded downward, except that -mstrict-align further
9347 muddied the water in that multi-component structures of 2 and 4
9348 bytes in size were passed padded upward.
9349
9350 The following arranges for best compatibility with previous
9351 versions of gcc, but removes the -mstrict-align dependency. */
9352 if (BYTES_BIG_ENDIAN)
9353 {
9354 HOST_WIDE_INT size = 0;
9355
9356 if (mode == BLKmode)
9357 {
9358 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
9359 size = int_size_in_bytes (type);
9360 }
9361 else
9362 size = GET_MODE_SIZE (mode);
9363
9364 if (size == 1 || size == 2 || size == 4)
9365 return downward;
9366 }
9367 return upward;
9368 }
9369
9370 if (AGGREGATES_PAD_UPWARD_ALWAYS)
9371 {
9372 if (type != 0 && AGGREGATE_TYPE_P (type))
9373 return upward;
9374 }
9375
9376 /* Fall back to the default. */
9377 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
9378 }
9379
9380 /* If defined, a C expression that gives the alignment boundary, in bits,
9381 of an argument with the specified mode and type. If it is not defined,
9382 PARM_BOUNDARY is used for all arguments.
9383
9384 V.4 wants long longs and doubles to be double word aligned. Just
9385 testing the mode size is a boneheaded way to do this as it means
9386 that other types such as complex int are also double word aligned.
9387 However, we're stuck with this because changing the ABI might break
9388 existing library interfaces.
9389
9390 Doubleword align SPE vectors.
9391 Quadword align Altivec/VSX vectors.
9392 Quadword align large synthetic vector types. */
9393
9394 static unsigned int
9395 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
9396 {
9397 machine_mode elt_mode;
9398 int n_elts;
9399
9400 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
9401
9402 if (DEFAULT_ABI == ABI_V4
9403 && (GET_MODE_SIZE (mode) == 8
9404 || (TARGET_HARD_FLOAT
9405 && TARGET_FPRS
9406 && (mode == TFmode || mode == TDmode))))
9407 return 64;
9408 else if (SPE_VECTOR_MODE (mode)
9409 || (type && TREE_CODE (type) == VECTOR_TYPE
9410 && int_size_in_bytes (type) >= 8
9411 && int_size_in_bytes (type) < 16))
9412 return 64;
9413 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
9414 || (type && TREE_CODE (type) == VECTOR_TYPE
9415 && int_size_in_bytes (type) >= 16))
9416 return 128;
9417
9418 /* Aggregate types that need > 8 byte alignment are quadword-aligned
9419 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
9420 -mcompat-align-parm is used. */
9421 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
9422 || DEFAULT_ABI == ABI_ELFv2)
9423 && type && TYPE_ALIGN (type) > 64)
9424 {
9425 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
9426 or homogeneous float/vector aggregates here. We already handled
9427 vector aggregates above, but still need to check for float here. */
9428 bool aggregate_p = (AGGREGATE_TYPE_P (type)
9429 && !SCALAR_FLOAT_MODE_P (elt_mode));
9430
9431 /* We used to check for BLKmode instead of the above aggregate type
9432 check. Warn when this results in any difference to the ABI. */
9433 if (aggregate_p != (mode == BLKmode))
9434 {
9435 static bool warned;
9436 if (!warned && warn_psabi)
9437 {
9438 warned = true;
9439 inform (input_location,
9440 "the ABI of passing aggregates with %d-byte alignment"
9441 " has changed in GCC 5",
9442 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
9443 }
9444 }
9445
9446 if (aggregate_p)
9447 return 128;
9448 }
9449
9450 /* Similar for the Darwin64 ABI. Note that for historical reasons we
9451 implement the "aggregate type" check as a BLKmode check here; this
9452 means certain aggregate types are in fact not aligned. */
9453 if (TARGET_MACHO && rs6000_darwin64_abi
9454 && mode == BLKmode
9455 && type && TYPE_ALIGN (type) > 64)
9456 return 128;
9457
9458 return PARM_BOUNDARY;
9459 }
9460
9461 /* The offset in words to the start of the parameter save area. */
9462
9463 static unsigned int
9464 rs6000_parm_offset (void)
9465 {
9466 return (DEFAULT_ABI == ABI_V4 ? 2
9467 : DEFAULT_ABI == ABI_ELFv2 ? 4
9468 : 6);
9469 }
9470
9471 /* For a function parm of MODE and TYPE, return the starting word in
9472 the parameter area. NWORDS of the parameter area are already used. */
9473
9474 static unsigned int
9475 rs6000_parm_start (machine_mode mode, const_tree type,
9476 unsigned int nwords)
9477 {
9478 unsigned int align;
9479
9480 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
9481 return nwords + (-(rs6000_parm_offset () + nwords) & align);
9482 }
9483
9484 /* Compute the size (in words) of a function argument. */
9485
9486 static unsigned long
9487 rs6000_arg_size (machine_mode mode, const_tree type)
9488 {
9489 unsigned long size;
9490
9491 if (mode != BLKmode)
9492 size = GET_MODE_SIZE (mode);
9493 else
9494 size = int_size_in_bytes (type);
9495
9496 if (TARGET_32BIT)
9497 return (size + 3) >> 2;
9498 else
9499 return (size + 7) >> 3;
9500 }
9501 \f
9502 /* Use this to flush pending int fields. */
9503
9504 static void
9505 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
9506 HOST_WIDE_INT bitpos, int final)
9507 {
9508 unsigned int startbit, endbit;
9509 int intregs, intoffset;
9510 machine_mode mode;
9511
9512 /* Handle the situations where a float is taking up the first half
9513 of the GPR, and the other half is empty (typically due to
9514 alignment restrictions). We can detect this by a 8-byte-aligned
9515 int field, or by seeing that this is the final flush for this
9516 argument. Count the word and continue on. */
9517 if (cum->floats_in_gpr == 1
9518 && (cum->intoffset % 64 == 0
9519 || (cum->intoffset == -1 && final)))
9520 {
9521 cum->words++;
9522 cum->floats_in_gpr = 0;
9523 }
9524
9525 if (cum->intoffset == -1)
9526 return;
9527
9528 intoffset = cum->intoffset;
9529 cum->intoffset = -1;
9530 cum->floats_in_gpr = 0;
9531
9532 if (intoffset % BITS_PER_WORD != 0)
9533 {
9534 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
9535 MODE_INT, 0);
9536 if (mode == BLKmode)
9537 {
9538 /* We couldn't find an appropriate mode, which happens,
9539 e.g., in packed structs when there are 3 bytes to load.
9540 Back intoffset back to the beginning of the word in this
9541 case. */
9542 intoffset = intoffset & -BITS_PER_WORD;
9543 }
9544 }
9545
9546 startbit = intoffset & -BITS_PER_WORD;
9547 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
9548 intregs = (endbit - startbit) / BITS_PER_WORD;
9549 cum->words += intregs;
9550 /* words should be unsigned. */
9551 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
9552 {
9553 int pad = (endbit/BITS_PER_WORD) - cum->words;
9554 cum->words += pad;
9555 }
9556 }
9557
9558 /* The darwin64 ABI calls for us to recurse down through structs,
9559 looking for elements passed in registers. Unfortunately, we have
9560 to track int register count here also because of misalignments
9561 in powerpc alignment mode. */
9562
9563 static void
9564 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
9565 const_tree type,
9566 HOST_WIDE_INT startbitpos)
9567 {
9568 tree f;
9569
9570 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
9571 if (TREE_CODE (f) == FIELD_DECL)
9572 {
9573 HOST_WIDE_INT bitpos = startbitpos;
9574 tree ftype = TREE_TYPE (f);
9575 machine_mode mode;
9576 if (ftype == error_mark_node)
9577 continue;
9578 mode = TYPE_MODE (ftype);
9579
9580 if (DECL_SIZE (f) != 0
9581 && tree_fits_uhwi_p (bit_position (f)))
9582 bitpos += int_bit_position (f);
9583
9584 /* ??? FIXME: else assume zero offset. */
9585
9586 if (TREE_CODE (ftype) == RECORD_TYPE)
9587 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
9588 else if (USE_FP_FOR_ARG_P (cum, mode))
9589 {
9590 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
9591 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
9592 cum->fregno += n_fpregs;
9593 /* Single-precision floats present a special problem for
9594 us, because they are smaller than an 8-byte GPR, and so
9595 the structure-packing rules combined with the standard
9596 varargs behavior mean that we want to pack float/float
9597 and float/int combinations into a single register's
9598 space. This is complicated by the arg advance flushing,
9599 which works on arbitrarily large groups of int-type
9600 fields. */
9601 if (mode == SFmode)
9602 {
9603 if (cum->floats_in_gpr == 1)
9604 {
9605 /* Two floats in a word; count the word and reset
9606 the float count. */
9607 cum->words++;
9608 cum->floats_in_gpr = 0;
9609 }
9610 else if (bitpos % 64 == 0)
9611 {
9612 /* A float at the beginning of an 8-byte word;
9613 count it and put off adjusting cum->words until
9614 we see if a arg advance flush is going to do it
9615 for us. */
9616 cum->floats_in_gpr++;
9617 }
9618 else
9619 {
9620 /* The float is at the end of a word, preceded
9621 by integer fields, so the arg advance flush
9622 just above has already set cum->words and
9623 everything is taken care of. */
9624 }
9625 }
9626 else
9627 cum->words += n_fpregs;
9628 }
9629 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
9630 {
9631 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
9632 cum->vregno++;
9633 cum->words += 2;
9634 }
9635 else if (cum->intoffset == -1)
9636 cum->intoffset = bitpos;
9637 }
9638 }
9639
9640 /* Check for an item that needs to be considered specially under the darwin 64
9641 bit ABI. These are record types where the mode is BLK or the structure is
9642 8 bytes in size. */
9643 static int
9644 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
9645 {
9646 return rs6000_darwin64_abi
9647 && ((mode == BLKmode
9648 && TREE_CODE (type) == RECORD_TYPE
9649 && int_size_in_bytes (type) > 0)
9650 || (type && TREE_CODE (type) == RECORD_TYPE
9651 && int_size_in_bytes (type) == 8)) ? 1 : 0;
9652 }
9653
9654 /* Update the data in CUM to advance over an argument
9655 of mode MODE and data type TYPE.
9656 (TYPE is null for libcalls where that information may not be available.)
9657
9658 Note that for args passed by reference, function_arg will be called
9659 with MODE and TYPE set to that of the pointer to the arg, not the arg
9660 itself. */
9661
9662 static void
9663 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
9664 const_tree type, bool named, int depth)
9665 {
9666 machine_mode elt_mode;
9667 int n_elts;
9668
9669 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
9670
9671 /* Only tick off an argument if we're not recursing. */
9672 if (depth == 0)
9673 cum->nargs_prototype--;
9674
9675 #ifdef HAVE_AS_GNU_ATTRIBUTE
9676 if (DEFAULT_ABI == ABI_V4
9677 && cum->escapes)
9678 {
9679 if (SCALAR_FLOAT_MODE_P (mode))
9680 rs6000_passes_float = true;
9681 else if (named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
9682 rs6000_passes_vector = true;
9683 else if (SPE_VECTOR_MODE (mode)
9684 && !cum->stdarg
9685 && cum->sysv_gregno <= GP_ARG_MAX_REG)
9686 rs6000_passes_vector = true;
9687 }
9688 #endif
9689
9690 if (TARGET_ALTIVEC_ABI
9691 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
9692 || (type && TREE_CODE (type) == VECTOR_TYPE
9693 && int_size_in_bytes (type) == 16)))
9694 {
9695 bool stack = false;
9696
9697 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
9698 {
9699 cum->vregno += n_elts;
9700
9701 if (!TARGET_ALTIVEC)
9702 error ("cannot pass argument in vector register because"
9703 " altivec instructions are disabled, use -maltivec"
9704 " to enable them");
9705
9706 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
9707 even if it is going to be passed in a vector register.
9708 Darwin does the same for variable-argument functions. */
9709 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9710 && TARGET_64BIT)
9711 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
9712 stack = true;
9713 }
9714 else
9715 stack = true;
9716
9717 if (stack)
9718 {
9719 int align;
9720
9721 /* Vector parameters must be 16-byte aligned. In 32-bit
9722 mode this means we need to take into account the offset
9723 to the parameter save area. In 64-bit mode, they just
9724 have to start on an even word, since the parameter save
9725 area is 16-byte aligned. */
9726 if (TARGET_32BIT)
9727 align = -(rs6000_parm_offset () + cum->words) & 3;
9728 else
9729 align = cum->words & 1;
9730 cum->words += align + rs6000_arg_size (mode, type);
9731
9732 if (TARGET_DEBUG_ARG)
9733 {
9734 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
9735 cum->words, align);
9736 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
9737 cum->nargs_prototype, cum->prototype,
9738 GET_MODE_NAME (mode));
9739 }
9740 }
9741 }
9742 else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode)
9743 && !cum->stdarg
9744 && cum->sysv_gregno <= GP_ARG_MAX_REG)
9745 cum->sysv_gregno++;
9746
9747 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
9748 {
9749 int size = int_size_in_bytes (type);
9750 /* Variable sized types have size == -1 and are
9751 treated as if consisting entirely of ints.
9752 Pad to 16 byte boundary if needed. */
9753 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
9754 && (cum->words % 2) != 0)
9755 cum->words++;
9756 /* For varargs, we can just go up by the size of the struct. */
9757 if (!named)
9758 cum->words += (size + 7) / 8;
9759 else
9760 {
9761 /* It is tempting to say int register count just goes up by
9762 sizeof(type)/8, but this is wrong in a case such as
9763 { int; double; int; } [powerpc alignment]. We have to
9764 grovel through the fields for these too. */
9765 cum->intoffset = 0;
9766 cum->floats_in_gpr = 0;
9767 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
9768 rs6000_darwin64_record_arg_advance_flush (cum,
9769 size * BITS_PER_UNIT, 1);
9770 }
9771 if (TARGET_DEBUG_ARG)
9772 {
9773 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
9774 cum->words, TYPE_ALIGN (type), size);
9775 fprintf (stderr,
9776 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
9777 cum->nargs_prototype, cum->prototype,
9778 GET_MODE_NAME (mode));
9779 }
9780 }
9781 else if (DEFAULT_ABI == ABI_V4)
9782 {
9783 if (TARGET_HARD_FLOAT && TARGET_FPRS
9784 && ((TARGET_SINGLE_FLOAT && mode == SFmode)
9785 || (TARGET_DOUBLE_FLOAT && mode == DFmode)
9786 || (mode == TFmode && !TARGET_IEEEQUAD)
9787 || mode == SDmode || mode == DDmode || mode == TDmode))
9788 {
9789 /* _Decimal128 must use an even/odd register pair. This assumes
9790 that the register number is odd when fregno is odd. */
9791 if (mode == TDmode && (cum->fregno % 2) == 1)
9792 cum->fregno++;
9793
9794 if (cum->fregno + (mode == TFmode || mode == TDmode ? 1 : 0)
9795 <= FP_ARG_V4_MAX_REG)
9796 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
9797 else
9798 {
9799 cum->fregno = FP_ARG_V4_MAX_REG + 1;
9800 if (mode == DFmode || mode == TFmode
9801 || mode == DDmode || mode == TDmode)
9802 cum->words += cum->words & 1;
9803 cum->words += rs6000_arg_size (mode, type);
9804 }
9805 }
9806 else
9807 {
9808 int n_words = rs6000_arg_size (mode, type);
9809 int gregno = cum->sysv_gregno;
9810
9811 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
9812 (r7,r8) or (r9,r10). As does any other 2 word item such
9813 as complex int due to a historical mistake. */
9814 if (n_words == 2)
9815 gregno += (1 - gregno) & 1;
9816
9817 /* Multi-reg args are not split between registers and stack. */
9818 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
9819 {
9820 /* Long long and SPE vectors are aligned on the stack.
9821 So are other 2 word items such as complex int due to
9822 a historical mistake. */
9823 if (n_words == 2)
9824 cum->words += cum->words & 1;
9825 cum->words += n_words;
9826 }
9827
9828 /* Note: continuing to accumulate gregno past when we've started
9829 spilling to the stack indicates the fact that we've started
9830 spilling to the stack to expand_builtin_saveregs. */
9831 cum->sysv_gregno = gregno + n_words;
9832 }
9833
9834 if (TARGET_DEBUG_ARG)
9835 {
9836 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
9837 cum->words, cum->fregno);
9838 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
9839 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
9840 fprintf (stderr, "mode = %4s, named = %d\n",
9841 GET_MODE_NAME (mode), named);
9842 }
9843 }
9844 else
9845 {
9846 int n_words = rs6000_arg_size (mode, type);
9847 int start_words = cum->words;
9848 int align_words = rs6000_parm_start (mode, type, start_words);
9849
9850 cum->words = align_words + n_words;
9851
9852 if (SCALAR_FLOAT_MODE_P (elt_mode)
9853 && TARGET_HARD_FLOAT && TARGET_FPRS)
9854 {
9855 /* _Decimal128 must be passed in an even/odd float register pair.
9856 This assumes that the register number is odd when fregno is
9857 odd. */
9858 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
9859 cum->fregno++;
9860 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
9861 }
9862
9863 if (TARGET_DEBUG_ARG)
9864 {
9865 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
9866 cum->words, cum->fregno);
9867 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
9868 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
9869 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
9870 named, align_words - start_words, depth);
9871 }
9872 }
9873 }
9874
9875 static void
9876 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
9877 const_tree type, bool named)
9878 {
9879 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
9880 0);
9881 }
9882
9883 static rtx
9884 spe_build_register_parallel (machine_mode mode, int gregno)
9885 {
9886 rtx r1, r3, r5, r7;
9887
9888 switch (mode)
9889 {
9890 case DFmode:
9891 r1 = gen_rtx_REG (DImode, gregno);
9892 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
9893 return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1));
9894
9895 case DCmode:
9896 case TFmode:
9897 r1 = gen_rtx_REG (DImode, gregno);
9898 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
9899 r3 = gen_rtx_REG (DImode, gregno + 2);
9900 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
9901 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3));
9902
9903 case TCmode:
9904 r1 = gen_rtx_REG (DImode, gregno);
9905 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
9906 r3 = gen_rtx_REG (DImode, gregno + 2);
9907 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
9908 r5 = gen_rtx_REG (DImode, gregno + 4);
9909 r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16));
9910 r7 = gen_rtx_REG (DImode, gregno + 6);
9911 r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24));
9912 return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7));
9913
9914 default:
9915 gcc_unreachable ();
9916 }
9917 }
9918
9919 /* Determine where to put a SIMD argument on the SPE. */
9920 static rtx
9921 rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, machine_mode mode,
9922 const_tree type)
9923 {
9924 int gregno = cum->sysv_gregno;
9925
9926 /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
9927 are passed and returned in a pair of GPRs for ABI compatibility. */
9928 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
9929 || mode == DCmode || mode == TCmode))
9930 {
9931 int n_words = rs6000_arg_size (mode, type);
9932
9933 /* Doubles go in an odd/even register pair (r5/r6, etc). */
9934 if (mode == DFmode)
9935 gregno += (1 - gregno) & 1;
9936
9937 /* Multi-reg args are not split between registers and stack. */
9938 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
9939 return NULL_RTX;
9940
9941 return spe_build_register_parallel (mode, gregno);
9942 }
9943 if (cum->stdarg)
9944 {
9945 int n_words = rs6000_arg_size (mode, type);
9946
9947 /* SPE vectors are put in odd registers. */
9948 if (n_words == 2 && (gregno & 1) == 0)
9949 gregno += 1;
9950
9951 if (gregno + n_words - 1 <= GP_ARG_MAX_REG)
9952 {
9953 rtx r1, r2;
9954 machine_mode m = SImode;
9955
9956 r1 = gen_rtx_REG (m, gregno);
9957 r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx);
9958 r2 = gen_rtx_REG (m, gregno + 1);
9959 r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4));
9960 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
9961 }
9962 else
9963 return NULL_RTX;
9964 }
9965 else
9966 {
9967 if (gregno <= GP_ARG_MAX_REG)
9968 return gen_rtx_REG (mode, gregno);
9969 else
9970 return NULL_RTX;
9971 }
9972 }
9973
9974 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
9975 structure between cum->intoffset and bitpos to integer registers. */
9976
9977 static void
9978 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
9979 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
9980 {
9981 machine_mode mode;
9982 unsigned int regno;
9983 unsigned int startbit, endbit;
9984 int this_regno, intregs, intoffset;
9985 rtx reg;
9986
9987 if (cum->intoffset == -1)
9988 return;
9989
9990 intoffset = cum->intoffset;
9991 cum->intoffset = -1;
9992
9993 /* If this is the trailing part of a word, try to only load that
9994 much into the register. Otherwise load the whole register. Note
9995 that in the latter case we may pick up unwanted bits. It's not a
9996 problem at the moment but may wish to revisit. */
9997
9998 if (intoffset % BITS_PER_WORD != 0)
9999 {
10000 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
10001 MODE_INT, 0);
10002 if (mode == BLKmode)
10003 {
10004 /* We couldn't find an appropriate mode, which happens,
10005 e.g., in packed structs when there are 3 bytes to load.
10006 Back intoffset back to the beginning of the word in this
10007 case. */
10008 intoffset = intoffset & -BITS_PER_WORD;
10009 mode = word_mode;
10010 }
10011 }
10012 else
10013 mode = word_mode;
10014
10015 startbit = intoffset & -BITS_PER_WORD;
10016 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
10017 intregs = (endbit - startbit) / BITS_PER_WORD;
10018 this_regno = cum->words + intoffset / BITS_PER_WORD;
10019
10020 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
10021 cum->use_stack = 1;
10022
10023 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
10024 if (intregs <= 0)
10025 return;
10026
10027 intoffset /= BITS_PER_UNIT;
10028 do
10029 {
10030 regno = GP_ARG_MIN_REG + this_regno;
10031 reg = gen_rtx_REG (mode, regno);
10032 rvec[(*k)++] =
10033 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
10034
10035 this_regno += 1;
10036 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
10037 mode = word_mode;
10038 intregs -= 1;
10039 }
10040 while (intregs > 0);
10041 }
10042
10043 /* Recursive workhorse for the following. */
10044
10045 static void
10046 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
10047 HOST_WIDE_INT startbitpos, rtx rvec[],
10048 int *k)
10049 {
10050 tree f;
10051
10052 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
10053 if (TREE_CODE (f) == FIELD_DECL)
10054 {
10055 HOST_WIDE_INT bitpos = startbitpos;
10056 tree ftype = TREE_TYPE (f);
10057 machine_mode mode;
10058 if (ftype == error_mark_node)
10059 continue;
10060 mode = TYPE_MODE (ftype);
10061
10062 if (DECL_SIZE (f) != 0
10063 && tree_fits_uhwi_p (bit_position (f)))
10064 bitpos += int_bit_position (f);
10065
10066 /* ??? FIXME: else assume zero offset. */
10067
10068 if (TREE_CODE (ftype) == RECORD_TYPE)
10069 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
10070 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
10071 {
10072 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
10073 #if 0
10074 switch (mode)
10075 {
10076 case SCmode: mode = SFmode; break;
10077 case DCmode: mode = DFmode; break;
10078 case TCmode: mode = TFmode; break;
10079 default: break;
10080 }
10081 #endif
10082 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
10083 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
10084 {
10085 gcc_assert (cum->fregno == FP_ARG_MAX_REG
10086 && (mode == TFmode || mode == TDmode));
10087 /* Long double or _Decimal128 split over regs and memory. */
10088 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
10089 cum->use_stack=1;
10090 }
10091 rvec[(*k)++]
10092 = gen_rtx_EXPR_LIST (VOIDmode,
10093 gen_rtx_REG (mode, cum->fregno++),
10094 GEN_INT (bitpos / BITS_PER_UNIT));
10095 if (mode == TFmode || mode == TDmode)
10096 cum->fregno++;
10097 }
10098 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
10099 {
10100 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
10101 rvec[(*k)++]
10102 = gen_rtx_EXPR_LIST (VOIDmode,
10103 gen_rtx_REG (mode, cum->vregno++),
10104 GEN_INT (bitpos / BITS_PER_UNIT));
10105 }
10106 else if (cum->intoffset == -1)
10107 cum->intoffset = bitpos;
10108 }
10109 }
10110
10111 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
10112 the register(s) to be used for each field and subfield of a struct
10113 being passed by value, along with the offset of where the
10114 register's value may be found in the block. FP fields go in FP
10115 register, vector fields go in vector registers, and everything
10116 else goes in int registers, packed as in memory.
10117
10118 This code is also used for function return values. RETVAL indicates
10119 whether this is the case.
10120
10121 Much of this is taken from the SPARC V9 port, which has a similar
10122 calling convention. */
10123
10124 static rtx
10125 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
10126 bool named, bool retval)
10127 {
10128 rtx rvec[FIRST_PSEUDO_REGISTER];
10129 int k = 1, kbase = 1;
10130 HOST_WIDE_INT typesize = int_size_in_bytes (type);
10131 /* This is a copy; modifications are not visible to our caller. */
10132 CUMULATIVE_ARGS copy_cum = *orig_cum;
10133 CUMULATIVE_ARGS *cum = &copy_cum;
10134
10135 /* Pad to 16 byte boundary if needed. */
10136 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
10137 && (cum->words % 2) != 0)
10138 cum->words++;
10139
10140 cum->intoffset = 0;
10141 cum->use_stack = 0;
10142 cum->named = named;
10143
10144 /* Put entries into rvec[] for individual FP and vector fields, and
10145 for the chunks of memory that go in int regs. Note we start at
10146 element 1; 0 is reserved for an indication of using memory, and
10147 may or may not be filled in below. */
10148 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
10149 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
10150
10151 /* If any part of the struct went on the stack put all of it there.
10152 This hack is because the generic code for
10153 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
10154 parts of the struct are not at the beginning. */
10155 if (cum->use_stack)
10156 {
10157 if (retval)
10158 return NULL_RTX; /* doesn't go in registers at all */
10159 kbase = 0;
10160 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10161 }
10162 if (k > 1 || cum->use_stack)
10163 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
10164 else
10165 return NULL_RTX;
10166 }
10167
10168 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
10169
10170 static rtx
10171 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
10172 int align_words)
10173 {
10174 int n_units;
10175 int i, k;
10176 rtx rvec[GP_ARG_NUM_REG + 1];
10177
10178 if (align_words >= GP_ARG_NUM_REG)
10179 return NULL_RTX;
10180
10181 n_units = rs6000_arg_size (mode, type);
10182
10183 /* Optimize the simple case where the arg fits in one gpr, except in
10184 the case of BLKmode due to assign_parms assuming that registers are
10185 BITS_PER_WORD wide. */
10186 if (n_units == 0
10187 || (n_units == 1 && mode != BLKmode))
10188 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10189
10190 k = 0;
10191 if (align_words + n_units > GP_ARG_NUM_REG)
10192 /* Not all of the arg fits in gprs. Say that it goes in memory too,
10193 using a magic NULL_RTX component.
10194 This is not strictly correct. Only some of the arg belongs in
10195 memory, not all of it. However, the normal scheme using
10196 function_arg_partial_nregs can result in unusual subregs, eg.
10197 (subreg:SI (reg:DF) 4), which are not handled well. The code to
10198 store the whole arg to memory is often more efficient than code
10199 to store pieces, and we know that space is available in the right
10200 place for the whole arg. */
10201 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10202
10203 i = 0;
10204 do
10205 {
10206 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
10207 rtx off = GEN_INT (i++ * 4);
10208 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10209 }
10210 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
10211
10212 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
10213 }
10214
10215 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
10216 but must also be copied into the parameter save area starting at
10217 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
10218 to the GPRs and/or memory. Return the number of elements used. */
10219
10220 static int
10221 rs6000_psave_function_arg (machine_mode mode, const_tree type,
10222 int align_words, rtx *rvec)
10223 {
10224 int k = 0;
10225
10226 if (align_words < GP_ARG_NUM_REG)
10227 {
10228 int n_words = rs6000_arg_size (mode, type);
10229
10230 if (align_words + n_words > GP_ARG_NUM_REG
10231 || mode == BLKmode
10232 || (TARGET_32BIT && TARGET_POWERPC64))
10233 {
10234 /* If this is partially on the stack, then we only
10235 include the portion actually in registers here. */
10236 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
10237 int i = 0;
10238
10239 if (align_words + n_words > GP_ARG_NUM_REG)
10240 {
10241 /* Not all of the arg fits in gprs. Say that it goes in memory
10242 too, using a magic NULL_RTX component. Also see comment in
10243 rs6000_mixed_function_arg for why the normal
10244 function_arg_partial_nregs scheme doesn't work in this case. */
10245 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10246 }
10247
10248 do
10249 {
10250 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
10251 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
10252 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10253 }
10254 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
10255 }
10256 else
10257 {
10258 /* The whole arg fits in gprs. */
10259 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10260 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
10261 }
10262 }
10263 else
10264 {
10265 /* It's entirely in memory. */
10266 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10267 }
10268
10269 return k;
10270 }
10271
10272 /* RVEC is a vector of K components of an argument of mode MODE.
10273 Construct the final function_arg return value from it. */
10274
10275 static rtx
10276 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
10277 {
10278 gcc_assert (k >= 1);
10279
10280 /* Avoid returning a PARALLEL in the trivial cases. */
10281 if (k == 1)
10282 {
10283 if (XEXP (rvec[0], 0) == NULL_RTX)
10284 return NULL_RTX;
10285
10286 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
10287 return XEXP (rvec[0], 0);
10288 }
10289
10290 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
10291 }
10292
10293 /* Determine where to put an argument to a function.
10294 Value is zero to push the argument on the stack,
10295 or a hard register in which to store the argument.
10296
10297 MODE is the argument's machine mode.
10298 TYPE is the data type of the argument (as a tree).
10299 This is null for libcalls where that information may
10300 not be available.
10301 CUM is a variable of type CUMULATIVE_ARGS which gives info about
10302 the preceding args and about the function being called. It is
10303 not modified in this routine.
10304 NAMED is nonzero if this argument is a named parameter
10305 (otherwise it is an extra parameter matching an ellipsis).
10306
10307 On RS/6000 the first eight words of non-FP are normally in registers
10308 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
10309 Under V.4, the first 8 FP args are in registers.
10310
10311 If this is floating-point and no prototype is specified, we use
10312 both an FP and integer register (or possibly FP reg and stack). Library
10313 functions (when CALL_LIBCALL is set) always have the proper types for args,
10314 so we can pass the FP value just in one register. emit_library_function
10315 doesn't support PARALLEL anyway.
10316
10317 Note that for args passed by reference, function_arg will be called
10318 with MODE and TYPE set to that of the pointer to the arg, not the arg
10319 itself. */
10320
10321 static rtx
10322 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
10323 const_tree type, bool named)
10324 {
10325 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10326 enum rs6000_abi abi = DEFAULT_ABI;
10327 machine_mode elt_mode;
10328 int n_elts;
10329
10330 /* Return a marker to indicate whether CR1 needs to set or clear the
10331 bit that V.4 uses to say fp args were passed in registers.
10332 Assume that we don't need the marker for software floating point,
10333 or compiler generated library calls. */
10334 if (mode == VOIDmode)
10335 {
10336 if (abi == ABI_V4
10337 && (cum->call_cookie & CALL_LIBCALL) == 0
10338 && (cum->stdarg
10339 || (cum->nargs_prototype < 0
10340 && (cum->prototype || TARGET_NO_PROTOTYPE))))
10341 {
10342 /* For the SPE, we need to crxor CR6 always. */
10343 if (TARGET_SPE_ABI)
10344 return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS);
10345 else if (TARGET_HARD_FLOAT && TARGET_FPRS)
10346 return GEN_INT (cum->call_cookie
10347 | ((cum->fregno == FP_ARG_MIN_REG)
10348 ? CALL_V4_SET_FP_ARGS
10349 : CALL_V4_CLEAR_FP_ARGS));
10350 }
10351
10352 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
10353 }
10354
10355 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10356
10357 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
10358 {
10359 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
10360 if (rslt != NULL_RTX)
10361 return rslt;
10362 /* Else fall through to usual handling. */
10363 }
10364
10365 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
10366 {
10367 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
10368 rtx r, off;
10369 int i, k = 0;
10370
10371 /* Do we also need to pass this argument in the parameter
10372 save area? */
10373 if (TARGET_64BIT && ! cum->prototype)
10374 {
10375 int align_words = (cum->words + 1) & ~1;
10376 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
10377 }
10378
10379 /* Describe where this argument goes in the vector registers. */
10380 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
10381 {
10382 r = gen_rtx_REG (elt_mode, cum->vregno + i);
10383 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
10384 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10385 }
10386
10387 return rs6000_finish_function_arg (mode, rvec, k);
10388 }
10389 else if (TARGET_ALTIVEC_ABI
10390 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
10391 || (type && TREE_CODE (type) == VECTOR_TYPE
10392 && int_size_in_bytes (type) == 16)))
10393 {
10394 if (named || abi == ABI_V4)
10395 return NULL_RTX;
10396 else
10397 {
10398 /* Vector parameters to varargs functions under AIX or Darwin
10399 get passed in memory and possibly also in GPRs. */
10400 int align, align_words, n_words;
10401 machine_mode part_mode;
10402
10403 /* Vector parameters must be 16-byte aligned. In 32-bit
10404 mode this means we need to take into account the offset
10405 to the parameter save area. In 64-bit mode, they just
10406 have to start on an even word, since the parameter save
10407 area is 16-byte aligned. */
10408 if (TARGET_32BIT)
10409 align = -(rs6000_parm_offset () + cum->words) & 3;
10410 else
10411 align = cum->words & 1;
10412 align_words = cum->words + align;
10413
10414 /* Out of registers? Memory, then. */
10415 if (align_words >= GP_ARG_NUM_REG)
10416 return NULL_RTX;
10417
10418 if (TARGET_32BIT && TARGET_POWERPC64)
10419 return rs6000_mixed_function_arg (mode, type, align_words);
10420
10421 /* The vector value goes in GPRs. Only the part of the
10422 value in GPRs is reported here. */
10423 part_mode = mode;
10424 n_words = rs6000_arg_size (mode, type);
10425 if (align_words + n_words > GP_ARG_NUM_REG)
10426 /* Fortunately, there are only two possibilities, the value
10427 is either wholly in GPRs or half in GPRs and half not. */
10428 part_mode = DImode;
10429
10430 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
10431 }
10432 }
10433 else if (TARGET_SPE_ABI && TARGET_SPE
10434 && (SPE_VECTOR_MODE (mode)
10435 || (TARGET_E500_DOUBLE && (mode == DFmode
10436 || mode == DCmode
10437 || mode == TFmode
10438 || mode == TCmode))))
10439 return rs6000_spe_function_arg (cum, mode, type);
10440
10441 else if (abi == ABI_V4)
10442 {
10443 if (TARGET_HARD_FLOAT && TARGET_FPRS
10444 && ((TARGET_SINGLE_FLOAT && mode == SFmode)
10445 || (TARGET_DOUBLE_FLOAT && mode == DFmode)
10446 || (mode == TFmode && !TARGET_IEEEQUAD)
10447 || mode == SDmode || mode == DDmode || mode == TDmode))
10448 {
10449 /* _Decimal128 must use an even/odd register pair. This assumes
10450 that the register number is odd when fregno is odd. */
10451 if (mode == TDmode && (cum->fregno % 2) == 1)
10452 cum->fregno++;
10453
10454 if (cum->fregno + (mode == TFmode || mode == TDmode ? 1 : 0)
10455 <= FP_ARG_V4_MAX_REG)
10456 return gen_rtx_REG (mode, cum->fregno);
10457 else
10458 return NULL_RTX;
10459 }
10460 else
10461 {
10462 int n_words = rs6000_arg_size (mode, type);
10463 int gregno = cum->sysv_gregno;
10464
10465 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
10466 (r7,r8) or (r9,r10). As does any other 2 word item such
10467 as complex int due to a historical mistake. */
10468 if (n_words == 2)
10469 gregno += (1 - gregno) & 1;
10470
10471 /* Multi-reg args are not split between registers and stack. */
10472 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
10473 return NULL_RTX;
10474
10475 if (TARGET_32BIT && TARGET_POWERPC64)
10476 return rs6000_mixed_function_arg (mode, type,
10477 gregno - GP_ARG_MIN_REG);
10478 return gen_rtx_REG (mode, gregno);
10479 }
10480 }
10481 else
10482 {
10483 int align_words = rs6000_parm_start (mode, type, cum->words);
10484
10485 /* _Decimal128 must be passed in an even/odd float register pair.
10486 This assumes that the register number is odd when fregno is odd. */
10487 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
10488 cum->fregno++;
10489
10490 if (USE_FP_FOR_ARG_P (cum, elt_mode))
10491 {
10492 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
10493 rtx r, off;
10494 int i, k = 0;
10495 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
10496 int fpr_words;
10497
10498 /* Do we also need to pass this argument in the parameter
10499 save area? */
10500 if (type && (cum->nargs_prototype <= 0
10501 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10502 && TARGET_XL_COMPAT
10503 && align_words >= GP_ARG_NUM_REG)))
10504 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
10505
10506 /* Describe where this argument goes in the fprs. */
10507 for (i = 0; i < n_elts
10508 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
10509 {
10510 /* Check if the argument is split over registers and memory.
10511 This can only ever happen for long double or _Decimal128;
10512 complex types are handled via split_complex_arg. */
10513 machine_mode fmode = elt_mode;
10514 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
10515 {
10516 gcc_assert (fmode == TFmode || fmode == TDmode);
10517 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
10518 }
10519
10520 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
10521 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
10522 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10523 }
10524
10525 /* If there were not enough FPRs to hold the argument, the rest
10526 usually goes into memory. However, if the current position
10527 is still within the register parameter area, a portion may
10528 actually have to go into GPRs.
10529
10530 Note that it may happen that the portion of the argument
10531 passed in the first "half" of the first GPR was already
10532 passed in the last FPR as well.
10533
10534 For unnamed arguments, we already set up GPRs to cover the
10535 whole argument in rs6000_psave_function_arg, so there is
10536 nothing further to do at this point. */
10537 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
10538 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
10539 && cum->nargs_prototype > 0)
10540 {
10541 static bool warned;
10542
10543 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
10544 int n_words = rs6000_arg_size (mode, type);
10545
10546 align_words += fpr_words;
10547 n_words -= fpr_words;
10548
10549 do
10550 {
10551 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
10552 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
10553 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10554 }
10555 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
10556
10557 if (!warned && warn_psabi)
10558 {
10559 warned = true;
10560 inform (input_location,
10561 "the ABI of passing homogeneous float aggregates"
10562 " has changed in GCC 5");
10563 }
10564 }
10565
10566 return rs6000_finish_function_arg (mode, rvec, k);
10567 }
10568 else if (align_words < GP_ARG_NUM_REG)
10569 {
10570 if (TARGET_32BIT && TARGET_POWERPC64)
10571 return rs6000_mixed_function_arg (mode, type, align_words);
10572
10573 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10574 }
10575 else
10576 return NULL_RTX;
10577 }
10578 }
10579 \f
10580 /* For an arg passed partly in registers and partly in memory, this is
10581 the number of bytes passed in registers. For args passed entirely in
10582 registers or entirely in memory, zero. When an arg is described by a
10583 PARALLEL, perhaps using more than one register type, this function
10584 returns the number of bytes used by the first element of the PARALLEL. */
10585
10586 static int
10587 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
10588 tree type, bool named)
10589 {
10590 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10591 bool passed_in_gprs = true;
10592 int ret = 0;
10593 int align_words;
10594 machine_mode elt_mode;
10595 int n_elts;
10596
10597 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10598
10599 if (DEFAULT_ABI == ABI_V4)
10600 return 0;
10601
10602 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
10603 {
10604 /* If we are passing this arg in the fixed parameter save area
10605 (gprs or memory) as well as VRs, we do not use the partial
10606 bytes mechanism; instead, rs6000_function_arg will return a
10607 PARALLEL including a memory element as necessary. */
10608 if (TARGET_64BIT && ! cum->prototype)
10609 return 0;
10610
10611 /* Otherwise, we pass in VRs only. Check for partial copies. */
10612 passed_in_gprs = false;
10613 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
10614 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
10615 }
10616
10617 /* In this complicated case we just disable the partial_nregs code. */
10618 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
10619 return 0;
10620
10621 align_words = rs6000_parm_start (mode, type, cum->words);
10622
10623 if (USE_FP_FOR_ARG_P (cum, elt_mode))
10624 {
10625 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
10626
10627 /* If we are passing this arg in the fixed parameter save area
10628 (gprs or memory) as well as FPRs, we do not use the partial
10629 bytes mechanism; instead, rs6000_function_arg will return a
10630 PARALLEL including a memory element as necessary. */
10631 if (type
10632 && (cum->nargs_prototype <= 0
10633 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10634 && TARGET_XL_COMPAT
10635 && align_words >= GP_ARG_NUM_REG)))
10636 return 0;
10637
10638 /* Otherwise, we pass in FPRs only. Check for partial copies. */
10639 passed_in_gprs = false;
10640 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
10641 {
10642 /* Compute number of bytes / words passed in FPRs. If there
10643 is still space available in the register parameter area
10644 *after* that amount, a part of the argument will be passed
10645 in GPRs. In that case, the total amount passed in any
10646 registers is equal to the amount that would have been passed
10647 in GPRs if everything were passed there, so we fall back to
10648 the GPR code below to compute the appropriate value. */
10649 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
10650 * MIN (8, GET_MODE_SIZE (elt_mode)));
10651 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
10652
10653 if (align_words + fpr_words < GP_ARG_NUM_REG)
10654 passed_in_gprs = true;
10655 else
10656 ret = fpr;
10657 }
10658 }
10659
10660 if (passed_in_gprs
10661 && align_words < GP_ARG_NUM_REG
10662 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
10663 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
10664
10665 if (ret != 0 && TARGET_DEBUG_ARG)
10666 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
10667
10668 return ret;
10669 }
10670 \f
10671 /* A C expression that indicates when an argument must be passed by
10672 reference. If nonzero for an argument, a copy of that argument is
10673 made in memory and a pointer to the argument is passed instead of
10674 the argument itself. The pointer is passed in whatever way is
10675 appropriate for passing a pointer to that type.
10676
10677 Under V.4, aggregates and long double are passed by reference.
10678
10679 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
10680 reference unless the AltiVec vector extension ABI is in force.
10681
10682 As an extension to all ABIs, variable sized types are passed by
10683 reference. */
10684
10685 static bool
10686 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
10687 machine_mode mode, const_tree type,
10688 bool named ATTRIBUTE_UNUSED)
10689 {
10690 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD && mode == TFmode)
10691 {
10692 if (TARGET_DEBUG_ARG)
10693 fprintf (stderr, "function_arg_pass_by_reference: V4 long double\n");
10694 return 1;
10695 }
10696
10697 if (!type)
10698 return 0;
10699
10700 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
10701 {
10702 if (TARGET_DEBUG_ARG)
10703 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
10704 return 1;
10705 }
10706
10707 if (int_size_in_bytes (type) < 0)
10708 {
10709 if (TARGET_DEBUG_ARG)
10710 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
10711 return 1;
10712 }
10713
10714 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
10715 modes only exist for GCC vector types if -maltivec. */
10716 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
10717 {
10718 if (TARGET_DEBUG_ARG)
10719 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
10720 return 1;
10721 }
10722
10723 /* Pass synthetic vectors in memory. */
10724 if (TREE_CODE (type) == VECTOR_TYPE
10725 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
10726 {
10727 static bool warned_for_pass_big_vectors = false;
10728 if (TARGET_DEBUG_ARG)
10729 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
10730 if (!warned_for_pass_big_vectors)
10731 {
10732 warning (0, "GCC vector passed by reference: "
10733 "non-standard ABI extension with no compatibility guarantee");
10734 warned_for_pass_big_vectors = true;
10735 }
10736 return 1;
10737 }
10738
10739 return 0;
10740 }
10741
10742 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
10743 already processes. Return true if the parameter must be passed
10744 (fully or partially) on the stack. */
10745
10746 static bool
10747 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
10748 {
10749 machine_mode mode;
10750 int unsignedp;
10751 rtx entry_parm;
10752
10753 /* Catch errors. */
10754 if (type == NULL || type == error_mark_node)
10755 return true;
10756
10757 /* Handle types with no storage requirement. */
10758 if (TYPE_MODE (type) == VOIDmode)
10759 return false;
10760
10761 /* Handle complex types. */
10762 if (TREE_CODE (type) == COMPLEX_TYPE)
10763 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
10764 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
10765
10766 /* Handle transparent aggregates. */
10767 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
10768 && TYPE_TRANSPARENT_AGGR (type))
10769 type = TREE_TYPE (first_field (type));
10770
10771 /* See if this arg was passed by invisible reference. */
10772 if (pass_by_reference (get_cumulative_args (args_so_far),
10773 TYPE_MODE (type), type, true))
10774 type = build_pointer_type (type);
10775
10776 /* Find mode as it is passed by the ABI. */
10777 unsignedp = TYPE_UNSIGNED (type);
10778 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
10779
10780 /* If we must pass in stack, we need a stack. */
10781 if (rs6000_must_pass_in_stack (mode, type))
10782 return true;
10783
10784 /* If there is no incoming register, we need a stack. */
10785 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
10786 if (entry_parm == NULL)
10787 return true;
10788
10789 /* Likewise if we need to pass both in registers and on the stack. */
10790 if (GET_CODE (entry_parm) == PARALLEL
10791 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
10792 return true;
10793
10794 /* Also true if we're partially in registers and partially not. */
10795 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
10796 return true;
10797
10798 /* Update info on where next arg arrives in registers. */
10799 rs6000_function_arg_advance (args_so_far, mode, type, true);
10800 return false;
10801 }
10802
10803 /* Return true if FUN has no prototype, has a variable argument
10804 list, or passes any parameter in memory. */
10805
10806 static bool
10807 rs6000_function_parms_need_stack (tree fun, bool incoming)
10808 {
10809 tree fntype, result;
10810 CUMULATIVE_ARGS args_so_far_v;
10811 cumulative_args_t args_so_far;
10812
10813 if (!fun)
10814 /* Must be a libcall, all of which only use reg parms. */
10815 return false;
10816
10817 fntype = fun;
10818 if (!TYPE_P (fun))
10819 fntype = TREE_TYPE (fun);
10820
10821 /* Varargs functions need the parameter save area. */
10822 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
10823 return true;
10824
10825 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
10826 args_so_far = pack_cumulative_args (&args_so_far_v);
10827
10828 /* When incoming, we will have been passed the function decl.
10829 It is necessary to use the decl to handle K&R style functions,
10830 where TYPE_ARG_TYPES may not be available. */
10831 if (incoming)
10832 {
10833 gcc_assert (DECL_P (fun));
10834 result = DECL_RESULT (fun);
10835 }
10836 else
10837 result = TREE_TYPE (fntype);
10838
10839 if (result && aggregate_value_p (result, fntype))
10840 {
10841 if (!TYPE_P (result))
10842 result = TREE_TYPE (result);
10843 result = build_pointer_type (result);
10844 rs6000_parm_needs_stack (args_so_far, result);
10845 }
10846
10847 if (incoming)
10848 {
10849 tree parm;
10850
10851 for (parm = DECL_ARGUMENTS (fun);
10852 parm && parm != void_list_node;
10853 parm = TREE_CHAIN (parm))
10854 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
10855 return true;
10856 }
10857 else
10858 {
10859 function_args_iterator args_iter;
10860 tree arg_type;
10861
10862 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
10863 if (rs6000_parm_needs_stack (args_so_far, arg_type))
10864 return true;
10865 }
10866
10867 return false;
10868 }
10869
10870 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
10871 usually a constant depending on the ABI. However, in the ELFv2 ABI
10872 the register parameter area is optional when calling a function that
10873 has a prototype is scope, has no variable argument list, and passes
10874 all parameters in registers. */
10875
10876 int
10877 rs6000_reg_parm_stack_space (tree fun, bool incoming)
10878 {
10879 int reg_parm_stack_space;
10880
10881 switch (DEFAULT_ABI)
10882 {
10883 default:
10884 reg_parm_stack_space = 0;
10885 break;
10886
10887 case ABI_AIX:
10888 case ABI_DARWIN:
10889 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
10890 break;
10891
10892 case ABI_ELFv2:
10893 /* ??? Recomputing this every time is a bit expensive. Is there
10894 a place to cache this information? */
10895 if (rs6000_function_parms_need_stack (fun, incoming))
10896 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
10897 else
10898 reg_parm_stack_space = 0;
10899 break;
10900 }
10901
10902 return reg_parm_stack_space;
10903 }
10904
10905 static void
10906 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
10907 {
10908 int i;
10909 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
10910
10911 if (nregs == 0)
10912 return;
10913
10914 for (i = 0; i < nregs; i++)
10915 {
10916 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
10917 if (reload_completed)
10918 {
10919 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
10920 tem = NULL_RTX;
10921 else
10922 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
10923 i * GET_MODE_SIZE (reg_mode));
10924 }
10925 else
10926 tem = replace_equiv_address (tem, XEXP (tem, 0));
10927
10928 gcc_assert (tem);
10929
10930 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
10931 }
10932 }
10933 \f
10934 /* Perform any needed actions needed for a function that is receiving a
10935 variable number of arguments.
10936
10937 CUM is as above.
10938
10939 MODE and TYPE are the mode and type of the current parameter.
10940
10941 PRETEND_SIZE is a variable that should be set to the amount of stack
10942 that must be pushed by the prolog to pretend that our caller pushed
10943 it.
10944
10945 Normally, this macro will push all remaining incoming registers on the
10946 stack and set PRETEND_SIZE to the length of the registers pushed. */
10947
10948 static void
10949 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
10950 tree type, int *pretend_size ATTRIBUTE_UNUSED,
10951 int no_rtl)
10952 {
10953 CUMULATIVE_ARGS next_cum;
10954 int reg_size = TARGET_32BIT ? 4 : 8;
10955 rtx save_area = NULL_RTX, mem;
10956 int first_reg_offset;
10957 alias_set_type set;
10958
10959 /* Skip the last named argument. */
10960 next_cum = *get_cumulative_args (cum);
10961 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
10962
10963 if (DEFAULT_ABI == ABI_V4)
10964 {
10965 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
10966
10967 if (! no_rtl)
10968 {
10969 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
10970 HOST_WIDE_INT offset = 0;
10971
10972 /* Try to optimize the size of the varargs save area.
10973 The ABI requires that ap.reg_save_area is doubleword
10974 aligned, but we don't need to allocate space for all
10975 the bytes, only those to which we actually will save
10976 anything. */
10977 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
10978 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
10979 if (TARGET_HARD_FLOAT && TARGET_FPRS
10980 && next_cum.fregno <= FP_ARG_V4_MAX_REG
10981 && cfun->va_list_fpr_size)
10982 {
10983 if (gpr_reg_num)
10984 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
10985 * UNITS_PER_FP_WORD;
10986 if (cfun->va_list_fpr_size
10987 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
10988 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
10989 else
10990 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
10991 * UNITS_PER_FP_WORD;
10992 }
10993 if (gpr_reg_num)
10994 {
10995 offset = -((first_reg_offset * reg_size) & ~7);
10996 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
10997 {
10998 gpr_reg_num = cfun->va_list_gpr_size;
10999 if (reg_size == 4 && (first_reg_offset & 1))
11000 gpr_reg_num++;
11001 }
11002 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
11003 }
11004 else if (fpr_size)
11005 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
11006 * UNITS_PER_FP_WORD
11007 - (int) (GP_ARG_NUM_REG * reg_size);
11008
11009 if (gpr_size + fpr_size)
11010 {
11011 rtx reg_save_area
11012 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
11013 gcc_assert (GET_CODE (reg_save_area) == MEM);
11014 reg_save_area = XEXP (reg_save_area, 0);
11015 if (GET_CODE (reg_save_area) == PLUS)
11016 {
11017 gcc_assert (XEXP (reg_save_area, 0)
11018 == virtual_stack_vars_rtx);
11019 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
11020 offset += INTVAL (XEXP (reg_save_area, 1));
11021 }
11022 else
11023 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
11024 }
11025
11026 cfun->machine->varargs_save_offset = offset;
11027 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
11028 }
11029 }
11030 else
11031 {
11032 first_reg_offset = next_cum.words;
11033 save_area = virtual_incoming_args_rtx;
11034
11035 if (targetm.calls.must_pass_in_stack (mode, type))
11036 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
11037 }
11038
11039 set = get_varargs_alias_set ();
11040 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
11041 && cfun->va_list_gpr_size)
11042 {
11043 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
11044
11045 if (va_list_gpr_counter_field)
11046 /* V4 va_list_gpr_size counts number of registers needed. */
11047 n_gpr = cfun->va_list_gpr_size;
11048 else
11049 /* char * va_list instead counts number of bytes needed. */
11050 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
11051
11052 if (nregs > n_gpr)
11053 nregs = n_gpr;
11054
11055 mem = gen_rtx_MEM (BLKmode,
11056 plus_constant (Pmode, save_area,
11057 first_reg_offset * reg_size));
11058 MEM_NOTRAP_P (mem) = 1;
11059 set_mem_alias_set (mem, set);
11060 set_mem_align (mem, BITS_PER_WORD);
11061
11062 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
11063 nregs);
11064 }
11065
11066 /* Save FP registers if needed. */
11067 if (DEFAULT_ABI == ABI_V4
11068 && TARGET_HARD_FLOAT && TARGET_FPRS
11069 && ! no_rtl
11070 && next_cum.fregno <= FP_ARG_V4_MAX_REG
11071 && cfun->va_list_fpr_size)
11072 {
11073 int fregno = next_cum.fregno, nregs;
11074 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
11075 rtx lab = gen_label_rtx ();
11076 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
11077 * UNITS_PER_FP_WORD);
11078
11079 emit_jump_insn
11080 (gen_rtx_SET (VOIDmode,
11081 pc_rtx,
11082 gen_rtx_IF_THEN_ELSE (VOIDmode,
11083 gen_rtx_NE (VOIDmode, cr1,
11084 const0_rtx),
11085 gen_rtx_LABEL_REF (VOIDmode, lab),
11086 pc_rtx)));
11087
11088 for (nregs = 0;
11089 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
11090 fregno++, off += UNITS_PER_FP_WORD, nregs++)
11091 {
11092 mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11093 ? DFmode : SFmode,
11094 plus_constant (Pmode, save_area, off));
11095 MEM_NOTRAP_P (mem) = 1;
11096 set_mem_alias_set (mem, set);
11097 set_mem_align (mem, GET_MODE_ALIGNMENT (
11098 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11099 ? DFmode : SFmode));
11100 emit_move_insn (mem, gen_rtx_REG (
11101 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11102 ? DFmode : SFmode, fregno));
11103 }
11104
11105 emit_label (lab);
11106 }
11107 }
11108
11109 /* Create the va_list data type. */
11110
11111 static tree
11112 rs6000_build_builtin_va_list (void)
11113 {
11114 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
11115
11116 /* For AIX, prefer 'char *' because that's what the system
11117 header files like. */
11118 if (DEFAULT_ABI != ABI_V4)
11119 return build_pointer_type (char_type_node);
11120
11121 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
11122 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
11123 get_identifier ("__va_list_tag"), record);
11124
11125 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
11126 unsigned_char_type_node);
11127 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
11128 unsigned_char_type_node);
11129 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
11130 every user file. */
11131 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11132 get_identifier ("reserved"), short_unsigned_type_node);
11133 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11134 get_identifier ("overflow_arg_area"),
11135 ptr_type_node);
11136 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11137 get_identifier ("reg_save_area"),
11138 ptr_type_node);
11139
11140 va_list_gpr_counter_field = f_gpr;
11141 va_list_fpr_counter_field = f_fpr;
11142
11143 DECL_FIELD_CONTEXT (f_gpr) = record;
11144 DECL_FIELD_CONTEXT (f_fpr) = record;
11145 DECL_FIELD_CONTEXT (f_res) = record;
11146 DECL_FIELD_CONTEXT (f_ovf) = record;
11147 DECL_FIELD_CONTEXT (f_sav) = record;
11148
11149 TYPE_STUB_DECL (record) = type_decl;
11150 TYPE_NAME (record) = type_decl;
11151 TYPE_FIELDS (record) = f_gpr;
11152 DECL_CHAIN (f_gpr) = f_fpr;
11153 DECL_CHAIN (f_fpr) = f_res;
11154 DECL_CHAIN (f_res) = f_ovf;
11155 DECL_CHAIN (f_ovf) = f_sav;
11156
11157 layout_type (record);
11158
11159 /* The correct type is an array type of one element. */
11160 return build_array_type (record, build_index_type (size_zero_node));
11161 }
11162
11163 /* Implement va_start. */
11164
11165 static void
11166 rs6000_va_start (tree valist, rtx nextarg)
11167 {
11168 HOST_WIDE_INT words, n_gpr, n_fpr;
11169 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
11170 tree gpr, fpr, ovf, sav, t;
11171
11172 /* Only SVR4 needs something special. */
11173 if (DEFAULT_ABI != ABI_V4)
11174 {
11175 std_expand_builtin_va_start (valist, nextarg);
11176 return;
11177 }
11178
11179 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11180 f_fpr = DECL_CHAIN (f_gpr);
11181 f_res = DECL_CHAIN (f_fpr);
11182 f_ovf = DECL_CHAIN (f_res);
11183 f_sav = DECL_CHAIN (f_ovf);
11184
11185 valist = build_simple_mem_ref (valist);
11186 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11187 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
11188 f_fpr, NULL_TREE);
11189 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
11190 f_ovf, NULL_TREE);
11191 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
11192 f_sav, NULL_TREE);
11193
11194 /* Count number of gp and fp argument registers used. */
11195 words = crtl->args.info.words;
11196 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
11197 GP_ARG_NUM_REG);
11198 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
11199 FP_ARG_NUM_REG);
11200
11201 if (TARGET_DEBUG_ARG)
11202 fprintf (stderr, "va_start: words = "HOST_WIDE_INT_PRINT_DEC", n_gpr = "
11203 HOST_WIDE_INT_PRINT_DEC", n_fpr = "HOST_WIDE_INT_PRINT_DEC"\n",
11204 words, n_gpr, n_fpr);
11205
11206 if (cfun->va_list_gpr_size)
11207 {
11208 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
11209 build_int_cst (NULL_TREE, n_gpr));
11210 TREE_SIDE_EFFECTS (t) = 1;
11211 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11212 }
11213
11214 if (cfun->va_list_fpr_size)
11215 {
11216 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
11217 build_int_cst (NULL_TREE, n_fpr));
11218 TREE_SIDE_EFFECTS (t) = 1;
11219 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11220
11221 #ifdef HAVE_AS_GNU_ATTRIBUTE
11222 if (call_ABI_of_interest (cfun->decl))
11223 rs6000_passes_float = true;
11224 #endif
11225 }
11226
11227 /* Find the overflow area. */
11228 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
11229 if (words != 0)
11230 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
11231 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
11232 TREE_SIDE_EFFECTS (t) = 1;
11233 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11234
11235 /* If there were no va_arg invocations, don't set up the register
11236 save area. */
11237 if (!cfun->va_list_gpr_size
11238 && !cfun->va_list_fpr_size
11239 && n_gpr < GP_ARG_NUM_REG
11240 && n_fpr < FP_ARG_V4_MAX_REG)
11241 return;
11242
11243 /* Find the register save area. */
11244 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
11245 if (cfun->machine->varargs_save_offset)
11246 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
11247 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
11248 TREE_SIDE_EFFECTS (t) = 1;
11249 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11250 }
11251
11252 /* Implement va_arg. */
11253
11254 static tree
11255 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
11256 gimple_seq *post_p)
11257 {
11258 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
11259 tree gpr, fpr, ovf, sav, reg, t, u;
11260 int size, rsize, n_reg, sav_ofs, sav_scale;
11261 tree lab_false, lab_over, addr;
11262 int align;
11263 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
11264 int regalign = 0;
11265 gimple stmt;
11266
11267 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
11268 {
11269 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
11270 return build_va_arg_indirect_ref (t);
11271 }
11272
11273 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
11274 earlier version of gcc, with the property that it always applied alignment
11275 adjustments to the va-args (even for zero-sized types). The cheapest way
11276 to deal with this is to replicate the effect of the part of
11277 std_gimplify_va_arg_expr that carries out the align adjust, for the case
11278 of relevance.
11279 We don't need to check for pass-by-reference because of the test above.
11280 We can return a simplifed answer, since we know there's no offset to add. */
11281
11282 if (((TARGET_MACHO
11283 && rs6000_darwin64_abi)
11284 || DEFAULT_ABI == ABI_ELFv2
11285 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
11286 && integer_zerop (TYPE_SIZE (type)))
11287 {
11288 unsigned HOST_WIDE_INT align, boundary;
11289 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
11290 align = PARM_BOUNDARY / BITS_PER_UNIT;
11291 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
11292 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
11293 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
11294 boundary /= BITS_PER_UNIT;
11295 if (boundary > align)
11296 {
11297 tree t ;
11298 /* This updates arg ptr by the amount that would be necessary
11299 to align the zero-sized (but not zero-alignment) item. */
11300 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
11301 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
11302 gimplify_and_add (t, pre_p);
11303
11304 t = fold_convert (sizetype, valist_tmp);
11305 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
11306 fold_convert (TREE_TYPE (valist),
11307 fold_build2 (BIT_AND_EXPR, sizetype, t,
11308 size_int (-boundary))));
11309 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
11310 gimplify_and_add (t, pre_p);
11311 }
11312 /* Since it is zero-sized there's no increment for the item itself. */
11313 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
11314 return build_va_arg_indirect_ref (valist_tmp);
11315 }
11316
11317 if (DEFAULT_ABI != ABI_V4)
11318 {
11319 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
11320 {
11321 tree elem_type = TREE_TYPE (type);
11322 machine_mode elem_mode = TYPE_MODE (elem_type);
11323 int elem_size = GET_MODE_SIZE (elem_mode);
11324
11325 if (elem_size < UNITS_PER_WORD)
11326 {
11327 tree real_part, imag_part;
11328 gimple_seq post = NULL;
11329
11330 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
11331 &post);
11332 /* Copy the value into a temporary, lest the formal temporary
11333 be reused out from under us. */
11334 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
11335 gimple_seq_add_seq (pre_p, post);
11336
11337 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
11338 post_p);
11339
11340 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
11341 }
11342 }
11343
11344 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
11345 }
11346
11347 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11348 f_fpr = DECL_CHAIN (f_gpr);
11349 f_res = DECL_CHAIN (f_fpr);
11350 f_ovf = DECL_CHAIN (f_res);
11351 f_sav = DECL_CHAIN (f_ovf);
11352
11353 valist = build_va_arg_indirect_ref (valist);
11354 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11355 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
11356 f_fpr, NULL_TREE);
11357 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
11358 f_ovf, NULL_TREE);
11359 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
11360 f_sav, NULL_TREE);
11361
11362 size = int_size_in_bytes (type);
11363 rsize = (size + 3) / 4;
11364 align = 1;
11365
11366 if (TARGET_HARD_FLOAT && TARGET_FPRS
11367 && ((TARGET_SINGLE_FLOAT && TYPE_MODE (type) == SFmode)
11368 || (TARGET_DOUBLE_FLOAT
11369 && (TYPE_MODE (type) == DFmode
11370 || TYPE_MODE (type) == TFmode
11371 || TYPE_MODE (type) == SDmode
11372 || TYPE_MODE (type) == DDmode
11373 || TYPE_MODE (type) == TDmode))))
11374 {
11375 /* FP args go in FP registers, if present. */
11376 reg = fpr;
11377 n_reg = (size + 7) / 8;
11378 sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
11379 sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
11380 if (TYPE_MODE (type) != SFmode && TYPE_MODE (type) != SDmode)
11381 align = 8;
11382 }
11383 else
11384 {
11385 /* Otherwise into GP registers. */
11386 reg = gpr;
11387 n_reg = rsize;
11388 sav_ofs = 0;
11389 sav_scale = 4;
11390 if (n_reg == 2)
11391 align = 8;
11392 }
11393
11394 /* Pull the value out of the saved registers.... */
11395
11396 lab_over = NULL;
11397 addr = create_tmp_var (ptr_type_node, "addr");
11398
11399 /* AltiVec vectors never go in registers when -mabi=altivec. */
11400 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
11401 align = 16;
11402 else
11403 {
11404 lab_false = create_artificial_label (input_location);
11405 lab_over = create_artificial_label (input_location);
11406
11407 /* Long long and SPE vectors are aligned in the registers.
11408 As are any other 2 gpr item such as complex int due to a
11409 historical mistake. */
11410 u = reg;
11411 if (n_reg == 2 && reg == gpr)
11412 {
11413 regalign = 1;
11414 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11415 build_int_cst (TREE_TYPE (reg), n_reg - 1));
11416 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
11417 unshare_expr (reg), u);
11418 }
11419 /* _Decimal128 is passed in even/odd fpr pairs; the stored
11420 reg number is 0 for f1, so we want to make it odd. */
11421 else if (reg == fpr && TYPE_MODE (type) == TDmode)
11422 {
11423 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11424 build_int_cst (TREE_TYPE (reg), 1));
11425 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
11426 }
11427
11428 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
11429 t = build2 (GE_EXPR, boolean_type_node, u, t);
11430 u = build1 (GOTO_EXPR, void_type_node, lab_false);
11431 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
11432 gimplify_and_add (t, pre_p);
11433
11434 t = sav;
11435 if (sav_ofs)
11436 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
11437
11438 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11439 build_int_cst (TREE_TYPE (reg), n_reg));
11440 u = fold_convert (sizetype, u);
11441 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
11442 t = fold_build_pointer_plus (t, u);
11443
11444 /* _Decimal32 varargs are located in the second word of the 64-bit
11445 FP register for 32-bit binaries. */
11446 if (!TARGET_POWERPC64
11447 && TARGET_HARD_FLOAT && TARGET_FPRS
11448 && TYPE_MODE (type) == SDmode)
11449 t = fold_build_pointer_plus_hwi (t, size);
11450
11451 gimplify_assign (addr, t, pre_p);
11452
11453 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
11454
11455 stmt = gimple_build_label (lab_false);
11456 gimple_seq_add_stmt (pre_p, stmt);
11457
11458 if ((n_reg == 2 && !regalign) || n_reg > 2)
11459 {
11460 /* Ensure that we don't find any more args in regs.
11461 Alignment has taken care of for special cases. */
11462 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
11463 }
11464 }
11465
11466 /* ... otherwise out of the overflow area. */
11467
11468 /* Care for on-stack alignment if needed. */
11469 t = ovf;
11470 if (align != 1)
11471 {
11472 t = fold_build_pointer_plus_hwi (t, align - 1);
11473 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
11474 build_int_cst (TREE_TYPE (t), -align));
11475 }
11476 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
11477
11478 gimplify_assign (unshare_expr (addr), t, pre_p);
11479
11480 t = fold_build_pointer_plus_hwi (t, size);
11481 gimplify_assign (unshare_expr (ovf), t, pre_p);
11482
11483 if (lab_over)
11484 {
11485 stmt = gimple_build_label (lab_over);
11486 gimple_seq_add_stmt (pre_p, stmt);
11487 }
11488
11489 if (STRICT_ALIGNMENT
11490 && (TYPE_ALIGN (type)
11491 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
11492 {
11493 /* The value (of type complex double, for example) may not be
11494 aligned in memory in the saved registers, so copy via a
11495 temporary. (This is the same code as used for SPARC.) */
11496 tree tmp = create_tmp_var (type, "va_arg_tmp");
11497 tree dest_addr = build_fold_addr_expr (tmp);
11498
11499 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
11500 3, dest_addr, addr, size_int (rsize * 4));
11501
11502 gimplify_and_add (copy, pre_p);
11503 addr = dest_addr;
11504 }
11505
11506 addr = fold_convert (ptrtype, addr);
11507 return build_va_arg_indirect_ref (addr);
11508 }
11509
11510 /* Builtins. */
11511
11512 static void
11513 def_builtin (const char *name, tree type, enum rs6000_builtins code)
11514 {
11515 tree t;
11516 unsigned classify = rs6000_builtin_info[(int)code].attr;
11517 const char *attr_string = "";
11518
11519 gcc_assert (name != NULL);
11520 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
11521
11522 if (rs6000_builtin_decls[(int)code])
11523 fatal_error ("internal error: builtin function %s already processed", name);
11524
11525 rs6000_builtin_decls[(int)code] = t =
11526 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
11527
11528 /* Set any special attributes. */
11529 if ((classify & RS6000_BTC_CONST) != 0)
11530 {
11531 /* const function, function only depends on the inputs. */
11532 TREE_READONLY (t) = 1;
11533 TREE_NOTHROW (t) = 1;
11534 attr_string = ", pure";
11535 }
11536 else if ((classify & RS6000_BTC_PURE) != 0)
11537 {
11538 /* pure function, function can read global memory, but does not set any
11539 external state. */
11540 DECL_PURE_P (t) = 1;
11541 TREE_NOTHROW (t) = 1;
11542 attr_string = ", const";
11543 }
11544 else if ((classify & RS6000_BTC_FP) != 0)
11545 {
11546 /* Function is a math function. If rounding mode is on, then treat the
11547 function as not reading global memory, but it can have arbitrary side
11548 effects. If it is off, then assume the function is a const function.
11549 This mimics the ATTR_MATHFN_FPROUNDING attribute in
11550 builtin-attribute.def that is used for the math functions. */
11551 TREE_NOTHROW (t) = 1;
11552 if (flag_rounding_math)
11553 {
11554 DECL_PURE_P (t) = 1;
11555 DECL_IS_NOVOPS (t) = 1;
11556 attr_string = ", fp, pure";
11557 }
11558 else
11559 {
11560 TREE_READONLY (t) = 1;
11561 attr_string = ", fp, const";
11562 }
11563 }
11564 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
11565 gcc_unreachable ();
11566
11567 if (TARGET_DEBUG_BUILTIN)
11568 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
11569 (int)code, name, attr_string);
11570 }
11571
11572 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
11573
11574 #undef RS6000_BUILTIN_1
11575 #undef RS6000_BUILTIN_2
11576 #undef RS6000_BUILTIN_3
11577 #undef RS6000_BUILTIN_A
11578 #undef RS6000_BUILTIN_D
11579 #undef RS6000_BUILTIN_E
11580 #undef RS6000_BUILTIN_H
11581 #undef RS6000_BUILTIN_P
11582 #undef RS6000_BUILTIN_Q
11583 #undef RS6000_BUILTIN_S
11584 #undef RS6000_BUILTIN_X
11585
11586 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11587 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11588 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
11589 { MASK, ICODE, NAME, ENUM },
11590
11591 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11592 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11593 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11594 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11595 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11596 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11597 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11598 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11599
11600 static const struct builtin_description bdesc_3arg[] =
11601 {
11602 #include "rs6000-builtin.def"
11603 };
11604
11605 /* DST operations: void foo (void *, const int, const char). */
11606
11607 #undef RS6000_BUILTIN_1
11608 #undef RS6000_BUILTIN_2
11609 #undef RS6000_BUILTIN_3
11610 #undef RS6000_BUILTIN_A
11611 #undef RS6000_BUILTIN_D
11612 #undef RS6000_BUILTIN_E
11613 #undef RS6000_BUILTIN_H
11614 #undef RS6000_BUILTIN_P
11615 #undef RS6000_BUILTIN_Q
11616 #undef RS6000_BUILTIN_S
11617 #undef RS6000_BUILTIN_X
11618
11619 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11620 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11621 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11622 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11623 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
11624 { MASK, ICODE, NAME, ENUM },
11625
11626 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11627 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11628 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11629 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11630 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11631 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11632
11633 static const struct builtin_description bdesc_dst[] =
11634 {
11635 #include "rs6000-builtin.def"
11636 };
11637
11638 /* Simple binary operations: VECc = foo (VECa, VECb). */
11639
11640 #undef RS6000_BUILTIN_1
11641 #undef RS6000_BUILTIN_2
11642 #undef RS6000_BUILTIN_3
11643 #undef RS6000_BUILTIN_A
11644 #undef RS6000_BUILTIN_D
11645 #undef RS6000_BUILTIN_E
11646 #undef RS6000_BUILTIN_H
11647 #undef RS6000_BUILTIN_P
11648 #undef RS6000_BUILTIN_Q
11649 #undef RS6000_BUILTIN_S
11650 #undef RS6000_BUILTIN_X
11651
11652 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11653 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
11654 { MASK, ICODE, NAME, ENUM },
11655
11656 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11657 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11658 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11659 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11660 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11661 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11662 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11663 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11664 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11665
11666 static const struct builtin_description bdesc_2arg[] =
11667 {
11668 #include "rs6000-builtin.def"
11669 };
11670
11671 #undef RS6000_BUILTIN_1
11672 #undef RS6000_BUILTIN_2
11673 #undef RS6000_BUILTIN_3
11674 #undef RS6000_BUILTIN_A
11675 #undef RS6000_BUILTIN_D
11676 #undef RS6000_BUILTIN_E
11677 #undef RS6000_BUILTIN_H
11678 #undef RS6000_BUILTIN_P
11679 #undef RS6000_BUILTIN_Q
11680 #undef RS6000_BUILTIN_S
11681 #undef RS6000_BUILTIN_X
11682
11683 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11684 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11685 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11686 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11687 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11688 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11689 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11690 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
11691 { MASK, ICODE, NAME, ENUM },
11692
11693 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11694 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11695 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11696
11697 /* AltiVec predicates. */
11698
11699 static const struct builtin_description bdesc_altivec_preds[] =
11700 {
11701 #include "rs6000-builtin.def"
11702 };
11703
11704 /* SPE predicates. */
11705 #undef RS6000_BUILTIN_1
11706 #undef RS6000_BUILTIN_2
11707 #undef RS6000_BUILTIN_3
11708 #undef RS6000_BUILTIN_A
11709 #undef RS6000_BUILTIN_D
11710 #undef RS6000_BUILTIN_E
11711 #undef RS6000_BUILTIN_H
11712 #undef RS6000_BUILTIN_P
11713 #undef RS6000_BUILTIN_Q
11714 #undef RS6000_BUILTIN_S
11715 #undef RS6000_BUILTIN_X
11716
11717 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11718 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11719 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11720 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11721 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11722 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11723 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11724 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11725 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11726 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
11727 { MASK, ICODE, NAME, ENUM },
11728
11729 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11730
11731 static const struct builtin_description bdesc_spe_predicates[] =
11732 {
11733 #include "rs6000-builtin.def"
11734 };
11735
11736 /* SPE evsel predicates. */
11737 #undef RS6000_BUILTIN_1
11738 #undef RS6000_BUILTIN_2
11739 #undef RS6000_BUILTIN_3
11740 #undef RS6000_BUILTIN_A
11741 #undef RS6000_BUILTIN_D
11742 #undef RS6000_BUILTIN_E
11743 #undef RS6000_BUILTIN_H
11744 #undef RS6000_BUILTIN_P
11745 #undef RS6000_BUILTIN_Q
11746 #undef RS6000_BUILTIN_S
11747 #undef RS6000_BUILTIN_X
11748
11749 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11750 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11751 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11752 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11753 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11754 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
11755 { MASK, ICODE, NAME, ENUM },
11756
11757 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11758 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11759 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11760 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11761 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11762
11763 static const struct builtin_description bdesc_spe_evsel[] =
11764 {
11765 #include "rs6000-builtin.def"
11766 };
11767
11768 /* PAIRED predicates. */
11769 #undef RS6000_BUILTIN_1
11770 #undef RS6000_BUILTIN_2
11771 #undef RS6000_BUILTIN_3
11772 #undef RS6000_BUILTIN_A
11773 #undef RS6000_BUILTIN_D
11774 #undef RS6000_BUILTIN_E
11775 #undef RS6000_BUILTIN_H
11776 #undef RS6000_BUILTIN_P
11777 #undef RS6000_BUILTIN_Q
11778 #undef RS6000_BUILTIN_S
11779 #undef RS6000_BUILTIN_X
11780
11781 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11782 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11783 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11784 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11785 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11786 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11787 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11788 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11789 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
11790 { MASK, ICODE, NAME, ENUM },
11791
11792 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11793 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11794
11795 static const struct builtin_description bdesc_paired_preds[] =
11796 {
11797 #include "rs6000-builtin.def"
11798 };
11799
11800 /* ABS* operations. */
11801
11802 #undef RS6000_BUILTIN_1
11803 #undef RS6000_BUILTIN_2
11804 #undef RS6000_BUILTIN_3
11805 #undef RS6000_BUILTIN_A
11806 #undef RS6000_BUILTIN_D
11807 #undef RS6000_BUILTIN_E
11808 #undef RS6000_BUILTIN_H
11809 #undef RS6000_BUILTIN_P
11810 #undef RS6000_BUILTIN_Q
11811 #undef RS6000_BUILTIN_S
11812 #undef RS6000_BUILTIN_X
11813
11814 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11815 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11816 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11817 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
11818 { MASK, ICODE, NAME, ENUM },
11819
11820 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11821 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11822 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11823 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11824 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11825 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11826 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11827
11828 static const struct builtin_description bdesc_abs[] =
11829 {
11830 #include "rs6000-builtin.def"
11831 };
11832
11833 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
11834 foo (VECa). */
11835
11836 #undef RS6000_BUILTIN_1
11837 #undef RS6000_BUILTIN_2
11838 #undef RS6000_BUILTIN_3
11839 #undef RS6000_BUILTIN_A
11840 #undef RS6000_BUILTIN_D
11841 #undef RS6000_BUILTIN_E
11842 #undef RS6000_BUILTIN_H
11843 #undef RS6000_BUILTIN_P
11844 #undef RS6000_BUILTIN_Q
11845 #undef RS6000_BUILTIN_S
11846 #undef RS6000_BUILTIN_X
11847
11848 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
11849 { MASK, ICODE, NAME, ENUM },
11850
11851 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11852 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11853 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11854 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11855 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11856 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11857 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11858 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11859 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11860 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11861
11862 static const struct builtin_description bdesc_1arg[] =
11863 {
11864 #include "rs6000-builtin.def"
11865 };
11866
11867 /* HTM builtins. */
11868 #undef RS6000_BUILTIN_1
11869 #undef RS6000_BUILTIN_2
11870 #undef RS6000_BUILTIN_3
11871 #undef RS6000_BUILTIN_A
11872 #undef RS6000_BUILTIN_D
11873 #undef RS6000_BUILTIN_E
11874 #undef RS6000_BUILTIN_H
11875 #undef RS6000_BUILTIN_P
11876 #undef RS6000_BUILTIN_Q
11877 #undef RS6000_BUILTIN_S
11878 #undef RS6000_BUILTIN_X
11879
11880 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11881 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11882 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11883 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11884 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11885 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11886 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
11887 { MASK, ICODE, NAME, ENUM },
11888
11889 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11890 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11891 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11892 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11893
11894 static const struct builtin_description bdesc_htm[] =
11895 {
11896 #include "rs6000-builtin.def"
11897 };
11898
11899 #undef RS6000_BUILTIN_1
11900 #undef RS6000_BUILTIN_2
11901 #undef RS6000_BUILTIN_3
11902 #undef RS6000_BUILTIN_A
11903 #undef RS6000_BUILTIN_D
11904 #undef RS6000_BUILTIN_E
11905 #undef RS6000_BUILTIN_H
11906 #undef RS6000_BUILTIN_P
11907 #undef RS6000_BUILTIN_Q
11908 #undef RS6000_BUILTIN_S
11909
11910 /* Return true if a builtin function is overloaded. */
11911 bool
11912 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
11913 {
11914 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
11915 }
11916
11917 /* Expand an expression EXP that calls a builtin without arguments. */
11918 static rtx
11919 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
11920 {
11921 rtx pat;
11922 machine_mode tmode = insn_data[icode].operand[0].mode;
11923
11924 if (icode == CODE_FOR_nothing)
11925 /* Builtin not supported on this processor. */
11926 return 0;
11927
11928 if (target == 0
11929 || GET_MODE (target) != tmode
11930 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11931 target = gen_reg_rtx (tmode);
11932
11933 pat = GEN_FCN (icode) (target);
11934 if (! pat)
11935 return 0;
11936 emit_insn (pat);
11937
11938 return target;
11939 }
11940
11941
11942 static rtx
11943 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
11944 {
11945 rtx pat;
11946 tree arg0 = CALL_EXPR_ARG (exp, 0);
11947 tree arg1 = CALL_EXPR_ARG (exp, 1);
11948 rtx op0 = expand_normal (arg0);
11949 rtx op1 = expand_normal (arg1);
11950 machine_mode mode0 = insn_data[icode].operand[0].mode;
11951 machine_mode mode1 = insn_data[icode].operand[1].mode;
11952
11953 if (icode == CODE_FOR_nothing)
11954 /* Builtin not supported on this processor. */
11955 return 0;
11956
11957 /* If we got invalid arguments bail out before generating bad rtl. */
11958 if (arg0 == error_mark_node || arg1 == error_mark_node)
11959 return const0_rtx;
11960
11961 if (GET_CODE (op0) != CONST_INT
11962 || INTVAL (op0) > 255
11963 || INTVAL (op0) < 0)
11964 {
11965 error ("argument 1 must be an 8-bit field value");
11966 return const0_rtx;
11967 }
11968
11969 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
11970 op0 = copy_to_mode_reg (mode0, op0);
11971
11972 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11973 op1 = copy_to_mode_reg (mode1, op1);
11974
11975 pat = GEN_FCN (icode) (op0, op1);
11976 if (! pat)
11977 return const0_rtx;
11978 emit_insn (pat);
11979
11980 return NULL_RTX;
11981 }
11982
11983
11984 static rtx
11985 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
11986 {
11987 rtx pat;
11988 tree arg0 = CALL_EXPR_ARG (exp, 0);
11989 rtx op0 = expand_normal (arg0);
11990 machine_mode tmode = insn_data[icode].operand[0].mode;
11991 machine_mode mode0 = insn_data[icode].operand[1].mode;
11992
11993 if (icode == CODE_FOR_nothing)
11994 /* Builtin not supported on this processor. */
11995 return 0;
11996
11997 /* If we got invalid arguments bail out before generating bad rtl. */
11998 if (arg0 == error_mark_node)
11999 return const0_rtx;
12000
12001 if (icode == CODE_FOR_altivec_vspltisb
12002 || icode == CODE_FOR_altivec_vspltish
12003 || icode == CODE_FOR_altivec_vspltisw
12004 || icode == CODE_FOR_spe_evsplatfi
12005 || icode == CODE_FOR_spe_evsplati)
12006 {
12007 /* Only allow 5-bit *signed* literals. */
12008 if (GET_CODE (op0) != CONST_INT
12009 || INTVAL (op0) > 15
12010 || INTVAL (op0) < -16)
12011 {
12012 error ("argument 1 must be a 5-bit signed literal");
12013 return const0_rtx;
12014 }
12015 }
12016
12017 if (target == 0
12018 || GET_MODE (target) != tmode
12019 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12020 target = gen_reg_rtx (tmode);
12021
12022 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12023 op0 = copy_to_mode_reg (mode0, op0);
12024
12025 pat = GEN_FCN (icode) (target, op0);
12026 if (! pat)
12027 return 0;
12028 emit_insn (pat);
12029
12030 return target;
12031 }
12032
12033 static rtx
12034 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
12035 {
12036 rtx pat, scratch1, scratch2;
12037 tree arg0 = CALL_EXPR_ARG (exp, 0);
12038 rtx op0 = expand_normal (arg0);
12039 machine_mode tmode = insn_data[icode].operand[0].mode;
12040 machine_mode mode0 = insn_data[icode].operand[1].mode;
12041
12042 /* If we have invalid arguments, bail out before generating bad rtl. */
12043 if (arg0 == error_mark_node)
12044 return const0_rtx;
12045
12046 if (target == 0
12047 || GET_MODE (target) != tmode
12048 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12049 target = gen_reg_rtx (tmode);
12050
12051 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12052 op0 = copy_to_mode_reg (mode0, op0);
12053
12054 scratch1 = gen_reg_rtx (mode0);
12055 scratch2 = gen_reg_rtx (mode0);
12056
12057 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
12058 if (! pat)
12059 return 0;
12060 emit_insn (pat);
12061
12062 return target;
12063 }
12064
12065 static rtx
12066 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
12067 {
12068 rtx pat;
12069 tree arg0 = CALL_EXPR_ARG (exp, 0);
12070 tree arg1 = CALL_EXPR_ARG (exp, 1);
12071 rtx op0 = expand_normal (arg0);
12072 rtx op1 = expand_normal (arg1);
12073 machine_mode tmode = insn_data[icode].operand[0].mode;
12074 machine_mode mode0 = insn_data[icode].operand[1].mode;
12075 machine_mode mode1 = insn_data[icode].operand[2].mode;
12076
12077 if (icode == CODE_FOR_nothing)
12078 /* Builtin not supported on this processor. */
12079 return 0;
12080
12081 /* If we got invalid arguments bail out before generating bad rtl. */
12082 if (arg0 == error_mark_node || arg1 == error_mark_node)
12083 return const0_rtx;
12084
12085 if (icode == CODE_FOR_altivec_vcfux
12086 || icode == CODE_FOR_altivec_vcfsx
12087 || icode == CODE_FOR_altivec_vctsxs
12088 || icode == CODE_FOR_altivec_vctuxs
12089 || icode == CODE_FOR_altivec_vspltb
12090 || icode == CODE_FOR_altivec_vsplth
12091 || icode == CODE_FOR_altivec_vspltw
12092 || icode == CODE_FOR_spe_evaddiw
12093 || icode == CODE_FOR_spe_evldd
12094 || icode == CODE_FOR_spe_evldh
12095 || icode == CODE_FOR_spe_evldw
12096 || icode == CODE_FOR_spe_evlhhesplat
12097 || icode == CODE_FOR_spe_evlhhossplat
12098 || icode == CODE_FOR_spe_evlhhousplat
12099 || icode == CODE_FOR_spe_evlwhe
12100 || icode == CODE_FOR_spe_evlwhos
12101 || icode == CODE_FOR_spe_evlwhou
12102 || icode == CODE_FOR_spe_evlwhsplat
12103 || icode == CODE_FOR_spe_evlwwsplat
12104 || icode == CODE_FOR_spe_evrlwi
12105 || icode == CODE_FOR_spe_evslwi
12106 || icode == CODE_FOR_spe_evsrwis
12107 || icode == CODE_FOR_spe_evsubifw
12108 || icode == CODE_FOR_spe_evsrwiu)
12109 {
12110 /* Only allow 5-bit unsigned literals. */
12111 STRIP_NOPS (arg1);
12112 if (TREE_CODE (arg1) != INTEGER_CST
12113 || TREE_INT_CST_LOW (arg1) & ~0x1f)
12114 {
12115 error ("argument 2 must be a 5-bit unsigned literal");
12116 return const0_rtx;
12117 }
12118 }
12119
12120 if (target == 0
12121 || GET_MODE (target) != tmode
12122 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12123 target = gen_reg_rtx (tmode);
12124
12125 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12126 op0 = copy_to_mode_reg (mode0, op0);
12127 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12128 op1 = copy_to_mode_reg (mode1, op1);
12129
12130 pat = GEN_FCN (icode) (target, op0, op1);
12131 if (! pat)
12132 return 0;
12133 emit_insn (pat);
12134
12135 return target;
12136 }
12137
12138 static rtx
12139 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
12140 {
12141 rtx pat, scratch;
12142 tree cr6_form = CALL_EXPR_ARG (exp, 0);
12143 tree arg0 = CALL_EXPR_ARG (exp, 1);
12144 tree arg1 = CALL_EXPR_ARG (exp, 2);
12145 rtx op0 = expand_normal (arg0);
12146 rtx op1 = expand_normal (arg1);
12147 machine_mode tmode = SImode;
12148 machine_mode mode0 = insn_data[icode].operand[1].mode;
12149 machine_mode mode1 = insn_data[icode].operand[2].mode;
12150 int cr6_form_int;
12151
12152 if (TREE_CODE (cr6_form) != INTEGER_CST)
12153 {
12154 error ("argument 1 of __builtin_altivec_predicate must be a constant");
12155 return const0_rtx;
12156 }
12157 else
12158 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
12159
12160 gcc_assert (mode0 == mode1);
12161
12162 /* If we have invalid arguments, bail out before generating bad rtl. */
12163 if (arg0 == error_mark_node || arg1 == error_mark_node)
12164 return const0_rtx;
12165
12166 if (target == 0
12167 || GET_MODE (target) != tmode
12168 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12169 target = gen_reg_rtx (tmode);
12170
12171 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12172 op0 = copy_to_mode_reg (mode0, op0);
12173 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12174 op1 = copy_to_mode_reg (mode1, op1);
12175
12176 scratch = gen_reg_rtx (mode0);
12177
12178 pat = GEN_FCN (icode) (scratch, op0, op1);
12179 if (! pat)
12180 return 0;
12181 emit_insn (pat);
12182
12183 /* The vec_any* and vec_all* predicates use the same opcodes for two
12184 different operations, but the bits in CR6 will be different
12185 depending on what information we want. So we have to play tricks
12186 with CR6 to get the right bits out.
12187
12188 If you think this is disgusting, look at the specs for the
12189 AltiVec predicates. */
12190
12191 switch (cr6_form_int)
12192 {
12193 case 0:
12194 emit_insn (gen_cr6_test_for_zero (target));
12195 break;
12196 case 1:
12197 emit_insn (gen_cr6_test_for_zero_reverse (target));
12198 break;
12199 case 2:
12200 emit_insn (gen_cr6_test_for_lt (target));
12201 break;
12202 case 3:
12203 emit_insn (gen_cr6_test_for_lt_reverse (target));
12204 break;
12205 default:
12206 error ("argument 1 of __builtin_altivec_predicate is out of range");
12207 break;
12208 }
12209
12210 return target;
12211 }
12212
12213 static rtx
12214 paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
12215 {
12216 rtx pat, addr;
12217 tree arg0 = CALL_EXPR_ARG (exp, 0);
12218 tree arg1 = CALL_EXPR_ARG (exp, 1);
12219 machine_mode tmode = insn_data[icode].operand[0].mode;
12220 machine_mode mode0 = Pmode;
12221 machine_mode mode1 = Pmode;
12222 rtx op0 = expand_normal (arg0);
12223 rtx op1 = expand_normal (arg1);
12224
12225 if (icode == CODE_FOR_nothing)
12226 /* Builtin not supported on this processor. */
12227 return 0;
12228
12229 /* If we got invalid arguments bail out before generating bad rtl. */
12230 if (arg0 == error_mark_node || arg1 == error_mark_node)
12231 return const0_rtx;
12232
12233 if (target == 0
12234 || GET_MODE (target) != tmode
12235 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12236 target = gen_reg_rtx (tmode);
12237
12238 op1 = copy_to_mode_reg (mode1, op1);
12239
12240 if (op0 == const0_rtx)
12241 {
12242 addr = gen_rtx_MEM (tmode, op1);
12243 }
12244 else
12245 {
12246 op0 = copy_to_mode_reg (mode0, op0);
12247 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
12248 }
12249
12250 pat = GEN_FCN (icode) (target, addr);
12251
12252 if (! pat)
12253 return 0;
12254 emit_insn (pat);
12255
12256 return target;
12257 }
12258
12259 /* Return a constant vector for use as a little-endian permute control vector
12260 to reverse the order of elements of the given vector mode. */
12261 static rtx
12262 swap_selector_for_mode (machine_mode mode)
12263 {
12264 /* These are little endian vectors, so their elements are reversed
12265 from what you would normally expect for a permute control vector. */
12266 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
12267 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
12268 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
12269 unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
12270 unsigned int *swaparray, i;
12271 rtx perm[16];
12272
12273 switch (mode)
12274 {
12275 case V2DFmode:
12276 case V2DImode:
12277 swaparray = swap2;
12278 break;
12279 case V4SFmode:
12280 case V4SImode:
12281 swaparray = swap4;
12282 break;
12283 case V8HImode:
12284 swaparray = swap8;
12285 break;
12286 case V16QImode:
12287 swaparray = swap16;
12288 break;
12289 default:
12290 gcc_unreachable ();
12291 }
12292
12293 for (i = 0; i < 16; ++i)
12294 perm[i] = GEN_INT (swaparray[i]);
12295
12296 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
12297 }
12298
12299 /* Generate code for an "lvx", "lvxl", or "lve*x" built-in for a little endian target
12300 with -maltivec=be specified. Issue the load followed by an element-reversing
12301 permute. */
12302 void
12303 altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12304 {
12305 rtx tmp = gen_reg_rtx (mode);
12306 rtx load = gen_rtx_SET (VOIDmode, tmp, op1);
12307 rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
12308 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
12309 rtx sel = swap_selector_for_mode (mode);
12310 rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
12311
12312 gcc_assert (REG_P (op0));
12313 emit_insn (par);
12314 emit_insn (gen_rtx_SET (VOIDmode, op0, vperm));
12315 }
12316
12317 /* Generate code for a "stvx" or "stvxl" built-in for a little endian target
12318 with -maltivec=be specified. Issue the store preceded by an element-reversing
12319 permute. */
12320 void
12321 altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12322 {
12323 rtx tmp = gen_reg_rtx (mode);
12324 rtx store = gen_rtx_SET (VOIDmode, op0, tmp);
12325 rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
12326 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
12327 rtx sel = swap_selector_for_mode (mode);
12328 rtx vperm;
12329
12330 gcc_assert (REG_P (op1));
12331 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
12332 emit_insn (gen_rtx_SET (VOIDmode, tmp, vperm));
12333 emit_insn (par);
12334 }
12335
12336 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
12337 specified. Issue the store preceded by an element-reversing permute. */
12338 void
12339 altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12340 {
12341 machine_mode inner_mode = GET_MODE_INNER (mode);
12342 rtx tmp = gen_reg_rtx (mode);
12343 rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
12344 rtx sel = swap_selector_for_mode (mode);
12345 rtx vperm;
12346
12347 gcc_assert (REG_P (op1));
12348 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
12349 emit_insn (gen_rtx_SET (VOIDmode, tmp, vperm));
12350 emit_insn (gen_rtx_SET (VOIDmode, op0, stvx));
12351 }
12352
12353 static rtx
12354 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
12355 {
12356 rtx pat, addr;
12357 tree arg0 = CALL_EXPR_ARG (exp, 0);
12358 tree arg1 = CALL_EXPR_ARG (exp, 1);
12359 machine_mode tmode = insn_data[icode].operand[0].mode;
12360 machine_mode mode0 = Pmode;
12361 machine_mode mode1 = Pmode;
12362 rtx op0 = expand_normal (arg0);
12363 rtx op1 = expand_normal (arg1);
12364
12365 if (icode == CODE_FOR_nothing)
12366 /* Builtin not supported on this processor. */
12367 return 0;
12368
12369 /* If we got invalid arguments bail out before generating bad rtl. */
12370 if (arg0 == error_mark_node || arg1 == error_mark_node)
12371 return const0_rtx;
12372
12373 if (target == 0
12374 || GET_MODE (target) != tmode
12375 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12376 target = gen_reg_rtx (tmode);
12377
12378 op1 = copy_to_mode_reg (mode1, op1);
12379
12380 if (op0 == const0_rtx)
12381 {
12382 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
12383 }
12384 else
12385 {
12386 op0 = copy_to_mode_reg (mode0, op0);
12387 addr = gen_rtx_MEM (blk ? BLKmode : tmode, gen_rtx_PLUS (Pmode, op0, op1));
12388 }
12389
12390 pat = GEN_FCN (icode) (target, addr);
12391
12392 if (! pat)
12393 return 0;
12394 emit_insn (pat);
12395
12396 return target;
12397 }
12398
12399 static rtx
12400 spe_expand_stv_builtin (enum insn_code icode, tree exp)
12401 {
12402 tree arg0 = CALL_EXPR_ARG (exp, 0);
12403 tree arg1 = CALL_EXPR_ARG (exp, 1);
12404 tree arg2 = CALL_EXPR_ARG (exp, 2);
12405 rtx op0 = expand_normal (arg0);
12406 rtx op1 = expand_normal (arg1);
12407 rtx op2 = expand_normal (arg2);
12408 rtx pat;
12409 machine_mode mode0 = insn_data[icode].operand[0].mode;
12410 machine_mode mode1 = insn_data[icode].operand[1].mode;
12411 machine_mode mode2 = insn_data[icode].operand[2].mode;
12412
12413 /* Invalid arguments. Bail before doing anything stoopid! */
12414 if (arg0 == error_mark_node
12415 || arg1 == error_mark_node
12416 || arg2 == error_mark_node)
12417 return const0_rtx;
12418
12419 if (! (*insn_data[icode].operand[2].predicate) (op0, mode2))
12420 op0 = copy_to_mode_reg (mode2, op0);
12421 if (! (*insn_data[icode].operand[0].predicate) (op1, mode0))
12422 op1 = copy_to_mode_reg (mode0, op1);
12423 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
12424 op2 = copy_to_mode_reg (mode1, op2);
12425
12426 pat = GEN_FCN (icode) (op1, op2, op0);
12427 if (pat)
12428 emit_insn (pat);
12429 return NULL_RTX;
12430 }
12431
12432 static rtx
12433 paired_expand_stv_builtin (enum insn_code icode, tree exp)
12434 {
12435 tree arg0 = CALL_EXPR_ARG (exp, 0);
12436 tree arg1 = CALL_EXPR_ARG (exp, 1);
12437 tree arg2 = CALL_EXPR_ARG (exp, 2);
12438 rtx op0 = expand_normal (arg0);
12439 rtx op1 = expand_normal (arg1);
12440 rtx op2 = expand_normal (arg2);
12441 rtx pat, addr;
12442 machine_mode tmode = insn_data[icode].operand[0].mode;
12443 machine_mode mode1 = Pmode;
12444 machine_mode mode2 = Pmode;
12445
12446 /* Invalid arguments. Bail before doing anything stoopid! */
12447 if (arg0 == error_mark_node
12448 || arg1 == error_mark_node
12449 || arg2 == error_mark_node)
12450 return const0_rtx;
12451
12452 if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
12453 op0 = copy_to_mode_reg (tmode, op0);
12454
12455 op2 = copy_to_mode_reg (mode2, op2);
12456
12457 if (op1 == const0_rtx)
12458 {
12459 addr = gen_rtx_MEM (tmode, op2);
12460 }
12461 else
12462 {
12463 op1 = copy_to_mode_reg (mode1, op1);
12464 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
12465 }
12466
12467 pat = GEN_FCN (icode) (addr, op0);
12468 if (pat)
12469 emit_insn (pat);
12470 return NULL_RTX;
12471 }
12472
12473 static rtx
12474 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
12475 {
12476 tree arg0 = CALL_EXPR_ARG (exp, 0);
12477 tree arg1 = CALL_EXPR_ARG (exp, 1);
12478 tree arg2 = CALL_EXPR_ARG (exp, 2);
12479 rtx op0 = expand_normal (arg0);
12480 rtx op1 = expand_normal (arg1);
12481 rtx op2 = expand_normal (arg2);
12482 rtx pat, addr;
12483 machine_mode tmode = insn_data[icode].operand[0].mode;
12484 machine_mode smode = insn_data[icode].operand[1].mode;
12485 machine_mode mode1 = Pmode;
12486 machine_mode mode2 = Pmode;
12487
12488 /* Invalid arguments. Bail before doing anything stoopid! */
12489 if (arg0 == error_mark_node
12490 || arg1 == error_mark_node
12491 || arg2 == error_mark_node)
12492 return const0_rtx;
12493
12494 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
12495 op0 = copy_to_mode_reg (smode, op0);
12496
12497 op2 = copy_to_mode_reg (mode2, op2);
12498
12499 if (op1 == const0_rtx)
12500 {
12501 addr = gen_rtx_MEM (tmode, op2);
12502 }
12503 else
12504 {
12505 op1 = copy_to_mode_reg (mode1, op1);
12506 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
12507 }
12508
12509 pat = GEN_FCN (icode) (addr, op0);
12510 if (pat)
12511 emit_insn (pat);
12512 return NULL_RTX;
12513 }
12514
12515 /* Return the appropriate SPR number associated with the given builtin. */
12516 static inline HOST_WIDE_INT
12517 htm_spr_num (enum rs6000_builtins code)
12518 {
12519 if (code == HTM_BUILTIN_GET_TFHAR
12520 || code == HTM_BUILTIN_SET_TFHAR)
12521 return TFHAR_SPR;
12522 else if (code == HTM_BUILTIN_GET_TFIAR
12523 || code == HTM_BUILTIN_SET_TFIAR)
12524 return TFIAR_SPR;
12525 else if (code == HTM_BUILTIN_GET_TEXASR
12526 || code == HTM_BUILTIN_SET_TEXASR)
12527 return TEXASR_SPR;
12528 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
12529 || code == HTM_BUILTIN_SET_TEXASRU);
12530 return TEXASRU_SPR;
12531 }
12532
12533 /* Return the appropriate SPR regno associated with the given builtin. */
12534 static inline HOST_WIDE_INT
12535 htm_spr_regno (enum rs6000_builtins code)
12536 {
12537 if (code == HTM_BUILTIN_GET_TFHAR
12538 || code == HTM_BUILTIN_SET_TFHAR)
12539 return TFHAR_REGNO;
12540 else if (code == HTM_BUILTIN_GET_TFIAR
12541 || code == HTM_BUILTIN_SET_TFIAR)
12542 return TFIAR_REGNO;
12543 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
12544 || code == HTM_BUILTIN_SET_TEXASR
12545 || code == HTM_BUILTIN_GET_TEXASRU
12546 || code == HTM_BUILTIN_SET_TEXASRU);
12547 return TEXASR_REGNO;
12548 }
12549
12550 /* Return the correct ICODE value depending on whether we are
12551 setting or reading the HTM SPRs. */
12552 static inline enum insn_code
12553 rs6000_htm_spr_icode (bool nonvoid)
12554 {
12555 if (nonvoid)
12556 return (TARGET_64BIT) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
12557 else
12558 return (TARGET_64BIT) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
12559 }
12560
12561 /* Expand the HTM builtin in EXP and store the result in TARGET.
12562 Store true in *EXPANDEDP if we found a builtin to expand. */
12563 static rtx
12564 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
12565 {
12566 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12567 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
12568 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
12569 const struct builtin_description *d;
12570 size_t i;
12571
12572 *expandedp = false;
12573
12574 /* Expand the HTM builtins. */
12575 d = bdesc_htm;
12576 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
12577 if (d->code == fcode)
12578 {
12579 rtx op[MAX_HTM_OPERANDS], pat;
12580 int nopnds = 0;
12581 tree arg;
12582 call_expr_arg_iterator iter;
12583 unsigned attr = rs6000_builtin_info[fcode].attr;
12584 enum insn_code icode = d->icode;
12585
12586 if (attr & RS6000_BTC_SPR)
12587 icode = rs6000_htm_spr_icode (nonvoid);
12588
12589 if (nonvoid)
12590 {
12591 machine_mode tmode = insn_data[icode].operand[0].mode;
12592 if (!target
12593 || GET_MODE (target) != tmode
12594 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
12595 target = gen_reg_rtx (tmode);
12596 op[nopnds++] = target;
12597 }
12598
12599 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
12600 {
12601 const struct insn_operand_data *insn_op;
12602
12603 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
12604 return NULL_RTX;
12605
12606 insn_op = &insn_data[icode].operand[nopnds];
12607
12608 op[nopnds] = expand_normal (arg);
12609
12610 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
12611 {
12612 if (!strcmp (insn_op->constraint, "n"))
12613 {
12614 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
12615 if (!CONST_INT_P (op[nopnds]))
12616 error ("argument %d must be an unsigned literal", arg_num);
12617 else
12618 error ("argument %d is an unsigned literal that is "
12619 "out of range", arg_num);
12620 return const0_rtx;
12621 }
12622 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
12623 }
12624
12625 nopnds++;
12626 }
12627
12628 /* Handle the builtins for extended mnemonics. These accept
12629 no arguments, but map to builtins that take arguments. */
12630 switch (fcode)
12631 {
12632 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
12633 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
12634 op[nopnds++] = GEN_INT (1);
12635 #ifdef ENABLE_CHECKING
12636 attr |= RS6000_BTC_UNARY;
12637 #endif
12638 break;
12639 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
12640 op[nopnds++] = GEN_INT (0);
12641 #ifdef ENABLE_CHECKING
12642 attr |= RS6000_BTC_UNARY;
12643 #endif
12644 break;
12645 default:
12646 break;
12647 }
12648
12649 /* If this builtin accesses SPRs, then pass in the appropriate
12650 SPR number and SPR regno as the last two operands. */
12651 if (attr & RS6000_BTC_SPR)
12652 {
12653 op[nopnds++] = gen_rtx_CONST_INT (Pmode, htm_spr_num (fcode));
12654 op[nopnds++] = gen_rtx_REG (Pmode, htm_spr_regno (fcode));
12655 }
12656
12657 #ifdef ENABLE_CHECKING
12658 int expected_nopnds = 0;
12659 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
12660 expected_nopnds = 1;
12661 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
12662 expected_nopnds = 2;
12663 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
12664 expected_nopnds = 3;
12665 if (!(attr & RS6000_BTC_VOID))
12666 expected_nopnds += 1;
12667 if (attr & RS6000_BTC_SPR)
12668 expected_nopnds += 2;
12669
12670 gcc_assert (nopnds == expected_nopnds && nopnds <= MAX_HTM_OPERANDS);
12671 #endif
12672
12673 switch (nopnds)
12674 {
12675 case 1:
12676 pat = GEN_FCN (icode) (op[0]);
12677 break;
12678 case 2:
12679 pat = GEN_FCN (icode) (op[0], op[1]);
12680 break;
12681 case 3:
12682 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
12683 break;
12684 case 4:
12685 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
12686 break;
12687 default:
12688 gcc_unreachable ();
12689 }
12690 if (!pat)
12691 return NULL_RTX;
12692 emit_insn (pat);
12693
12694 *expandedp = true;
12695 if (nonvoid)
12696 return target;
12697 return const0_rtx;
12698 }
12699
12700 return NULL_RTX;
12701 }
12702
12703 static rtx
12704 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
12705 {
12706 rtx pat;
12707 tree arg0 = CALL_EXPR_ARG (exp, 0);
12708 tree arg1 = CALL_EXPR_ARG (exp, 1);
12709 tree arg2 = CALL_EXPR_ARG (exp, 2);
12710 rtx op0 = expand_normal (arg0);
12711 rtx op1 = expand_normal (arg1);
12712 rtx op2 = expand_normal (arg2);
12713 machine_mode tmode = insn_data[icode].operand[0].mode;
12714 machine_mode mode0 = insn_data[icode].operand[1].mode;
12715 machine_mode mode1 = insn_data[icode].operand[2].mode;
12716 machine_mode mode2 = insn_data[icode].operand[3].mode;
12717
12718 if (icode == CODE_FOR_nothing)
12719 /* Builtin not supported on this processor. */
12720 return 0;
12721
12722 /* If we got invalid arguments bail out before generating bad rtl. */
12723 if (arg0 == error_mark_node
12724 || arg1 == error_mark_node
12725 || arg2 == error_mark_node)
12726 return const0_rtx;
12727
12728 /* Check and prepare argument depending on the instruction code.
12729
12730 Note that a switch statement instead of the sequence of tests
12731 would be incorrect as many of the CODE_FOR values could be
12732 CODE_FOR_nothing and that would yield multiple alternatives
12733 with identical values. We'd never reach here at runtime in
12734 this case. */
12735 if (icode == CODE_FOR_altivec_vsldoi_v4sf
12736 || icode == CODE_FOR_altivec_vsldoi_v4si
12737 || icode == CODE_FOR_altivec_vsldoi_v8hi
12738 || icode == CODE_FOR_altivec_vsldoi_v16qi)
12739 {
12740 /* Only allow 4-bit unsigned literals. */
12741 STRIP_NOPS (arg2);
12742 if (TREE_CODE (arg2) != INTEGER_CST
12743 || TREE_INT_CST_LOW (arg2) & ~0xf)
12744 {
12745 error ("argument 3 must be a 4-bit unsigned literal");
12746 return const0_rtx;
12747 }
12748 }
12749 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
12750 || icode == CODE_FOR_vsx_xxpermdi_v2di
12751 || icode == CODE_FOR_vsx_xxsldwi_v16qi
12752 || icode == CODE_FOR_vsx_xxsldwi_v8hi
12753 || icode == CODE_FOR_vsx_xxsldwi_v4si
12754 || icode == CODE_FOR_vsx_xxsldwi_v4sf
12755 || icode == CODE_FOR_vsx_xxsldwi_v2di
12756 || icode == CODE_FOR_vsx_xxsldwi_v2df)
12757 {
12758 /* Only allow 2-bit unsigned literals. */
12759 STRIP_NOPS (arg2);
12760 if (TREE_CODE (arg2) != INTEGER_CST
12761 || TREE_INT_CST_LOW (arg2) & ~0x3)
12762 {
12763 error ("argument 3 must be a 2-bit unsigned literal");
12764 return const0_rtx;
12765 }
12766 }
12767 else if (icode == CODE_FOR_vsx_set_v2df
12768 || icode == CODE_FOR_vsx_set_v2di
12769 || icode == CODE_FOR_bcdadd
12770 || icode == CODE_FOR_bcdadd_lt
12771 || icode == CODE_FOR_bcdadd_eq
12772 || icode == CODE_FOR_bcdadd_gt
12773 || icode == CODE_FOR_bcdsub
12774 || icode == CODE_FOR_bcdsub_lt
12775 || icode == CODE_FOR_bcdsub_eq
12776 || icode == CODE_FOR_bcdsub_gt)
12777 {
12778 /* Only allow 1-bit unsigned literals. */
12779 STRIP_NOPS (arg2);
12780 if (TREE_CODE (arg2) != INTEGER_CST
12781 || TREE_INT_CST_LOW (arg2) & ~0x1)
12782 {
12783 error ("argument 3 must be a 1-bit unsigned literal");
12784 return const0_rtx;
12785 }
12786 }
12787 else if (icode == CODE_FOR_dfp_ddedpd_dd
12788 || icode == CODE_FOR_dfp_ddedpd_td)
12789 {
12790 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
12791 STRIP_NOPS (arg0);
12792 if (TREE_CODE (arg0) != INTEGER_CST
12793 || TREE_INT_CST_LOW (arg2) & ~0x3)
12794 {
12795 error ("argument 1 must be 0 or 2");
12796 return const0_rtx;
12797 }
12798 }
12799 else if (icode == CODE_FOR_dfp_denbcd_dd
12800 || icode == CODE_FOR_dfp_denbcd_td)
12801 {
12802 /* Only allow 1-bit unsigned literals. */
12803 STRIP_NOPS (arg0);
12804 if (TREE_CODE (arg0) != INTEGER_CST
12805 || TREE_INT_CST_LOW (arg0) & ~0x1)
12806 {
12807 error ("argument 1 must be a 1-bit unsigned literal");
12808 return const0_rtx;
12809 }
12810 }
12811 else if (icode == CODE_FOR_dfp_dscli_dd
12812 || icode == CODE_FOR_dfp_dscli_td
12813 || icode == CODE_FOR_dfp_dscri_dd
12814 || icode == CODE_FOR_dfp_dscri_td)
12815 {
12816 /* Only allow 6-bit unsigned literals. */
12817 STRIP_NOPS (arg1);
12818 if (TREE_CODE (arg1) != INTEGER_CST
12819 || TREE_INT_CST_LOW (arg1) & ~0x3f)
12820 {
12821 error ("argument 2 must be a 6-bit unsigned literal");
12822 return const0_rtx;
12823 }
12824 }
12825 else if (icode == CODE_FOR_crypto_vshasigmaw
12826 || icode == CODE_FOR_crypto_vshasigmad)
12827 {
12828 /* Check whether the 2nd and 3rd arguments are integer constants and in
12829 range and prepare arguments. */
12830 STRIP_NOPS (arg1);
12831 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (arg1, 2))
12832 {
12833 error ("argument 2 must be 0 or 1");
12834 return const0_rtx;
12835 }
12836
12837 STRIP_NOPS (arg2);
12838 if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg1, 16))
12839 {
12840 error ("argument 3 must be in the range 0..15");
12841 return const0_rtx;
12842 }
12843 }
12844
12845 if (target == 0
12846 || GET_MODE (target) != tmode
12847 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12848 target = gen_reg_rtx (tmode);
12849
12850 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12851 op0 = copy_to_mode_reg (mode0, op0);
12852 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12853 op1 = copy_to_mode_reg (mode1, op1);
12854 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12855 op2 = copy_to_mode_reg (mode2, op2);
12856
12857 if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
12858 pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
12859 else
12860 pat = GEN_FCN (icode) (target, op0, op1, op2);
12861 if (! pat)
12862 return 0;
12863 emit_insn (pat);
12864
12865 return target;
12866 }
12867
12868 /* Expand the lvx builtins. */
12869 static rtx
12870 altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
12871 {
12872 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12873 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12874 tree arg0;
12875 machine_mode tmode, mode0;
12876 rtx pat, op0;
12877 enum insn_code icode;
12878
12879 switch (fcode)
12880 {
12881 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
12882 icode = CODE_FOR_vector_altivec_load_v16qi;
12883 break;
12884 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
12885 icode = CODE_FOR_vector_altivec_load_v8hi;
12886 break;
12887 case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
12888 icode = CODE_FOR_vector_altivec_load_v4si;
12889 break;
12890 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
12891 icode = CODE_FOR_vector_altivec_load_v4sf;
12892 break;
12893 case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
12894 icode = CODE_FOR_vector_altivec_load_v2df;
12895 break;
12896 case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
12897 icode = CODE_FOR_vector_altivec_load_v2di;
12898 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
12899 icode = CODE_FOR_vector_altivec_load_v1ti;
12900 break;
12901 default:
12902 *expandedp = false;
12903 return NULL_RTX;
12904 }
12905
12906 *expandedp = true;
12907
12908 arg0 = CALL_EXPR_ARG (exp, 0);
12909 op0 = expand_normal (arg0);
12910 tmode = insn_data[icode].operand[0].mode;
12911 mode0 = insn_data[icode].operand[1].mode;
12912
12913 if (target == 0
12914 || GET_MODE (target) != tmode
12915 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12916 target = gen_reg_rtx (tmode);
12917
12918 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12919 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12920
12921 pat = GEN_FCN (icode) (target, op0);
12922 if (! pat)
12923 return 0;
12924 emit_insn (pat);
12925 return target;
12926 }
12927
12928 /* Expand the stvx builtins. */
12929 static rtx
12930 altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
12931 bool *expandedp)
12932 {
12933 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12934 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12935 tree arg0, arg1;
12936 machine_mode mode0, mode1;
12937 rtx pat, op0, op1;
12938 enum insn_code icode;
12939
12940 switch (fcode)
12941 {
12942 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
12943 icode = CODE_FOR_vector_altivec_store_v16qi;
12944 break;
12945 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
12946 icode = CODE_FOR_vector_altivec_store_v8hi;
12947 break;
12948 case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
12949 icode = CODE_FOR_vector_altivec_store_v4si;
12950 break;
12951 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
12952 icode = CODE_FOR_vector_altivec_store_v4sf;
12953 break;
12954 case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
12955 icode = CODE_FOR_vector_altivec_store_v2df;
12956 break;
12957 case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
12958 icode = CODE_FOR_vector_altivec_store_v2di;
12959 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
12960 icode = CODE_FOR_vector_altivec_store_v1ti;
12961 break;
12962 default:
12963 *expandedp = false;
12964 return NULL_RTX;
12965 }
12966
12967 arg0 = CALL_EXPR_ARG (exp, 0);
12968 arg1 = CALL_EXPR_ARG (exp, 1);
12969 op0 = expand_normal (arg0);
12970 op1 = expand_normal (arg1);
12971 mode0 = insn_data[icode].operand[0].mode;
12972 mode1 = insn_data[icode].operand[1].mode;
12973
12974 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12975 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12976 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12977 op1 = copy_to_mode_reg (mode1, op1);
12978
12979 pat = GEN_FCN (icode) (op0, op1);
12980 if (pat)
12981 emit_insn (pat);
12982
12983 *expandedp = true;
12984 return NULL_RTX;
12985 }
12986
12987 /* Expand the dst builtins. */
12988 static rtx
12989 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
12990 bool *expandedp)
12991 {
12992 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12993 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
12994 tree arg0, arg1, arg2;
12995 machine_mode mode0, mode1;
12996 rtx pat, op0, op1, op2;
12997 const struct builtin_description *d;
12998 size_t i;
12999
13000 *expandedp = false;
13001
13002 /* Handle DST variants. */
13003 d = bdesc_dst;
13004 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
13005 if (d->code == fcode)
13006 {
13007 arg0 = CALL_EXPR_ARG (exp, 0);
13008 arg1 = CALL_EXPR_ARG (exp, 1);
13009 arg2 = CALL_EXPR_ARG (exp, 2);
13010 op0 = expand_normal (arg0);
13011 op1 = expand_normal (arg1);
13012 op2 = expand_normal (arg2);
13013 mode0 = insn_data[d->icode].operand[0].mode;
13014 mode1 = insn_data[d->icode].operand[1].mode;
13015
13016 /* Invalid arguments, bail out before generating bad rtl. */
13017 if (arg0 == error_mark_node
13018 || arg1 == error_mark_node
13019 || arg2 == error_mark_node)
13020 return const0_rtx;
13021
13022 *expandedp = true;
13023 STRIP_NOPS (arg2);
13024 if (TREE_CODE (arg2) != INTEGER_CST
13025 || TREE_INT_CST_LOW (arg2) & ~0x3)
13026 {
13027 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
13028 return const0_rtx;
13029 }
13030
13031 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13032 op0 = copy_to_mode_reg (Pmode, op0);
13033 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13034 op1 = copy_to_mode_reg (mode1, op1);
13035
13036 pat = GEN_FCN (d->icode) (op0, op1, op2);
13037 if (pat != 0)
13038 emit_insn (pat);
13039
13040 return NULL_RTX;
13041 }
13042
13043 return NULL_RTX;
13044 }
13045
13046 /* Expand vec_init builtin. */
13047 static rtx
13048 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
13049 {
13050 machine_mode tmode = TYPE_MODE (type);
13051 machine_mode inner_mode = GET_MODE_INNER (tmode);
13052 int i, n_elt = GET_MODE_NUNITS (tmode);
13053
13054 gcc_assert (VECTOR_MODE_P (tmode));
13055 gcc_assert (n_elt == call_expr_nargs (exp));
13056
13057 if (!target || !register_operand (target, tmode))
13058 target = gen_reg_rtx (tmode);
13059
13060 /* If we have a vector compromised of a single element, such as V1TImode, do
13061 the initialization directly. */
13062 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
13063 {
13064 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
13065 emit_move_insn (target, gen_lowpart (tmode, x));
13066 }
13067 else
13068 {
13069 rtvec v = rtvec_alloc (n_elt);
13070
13071 for (i = 0; i < n_elt; ++i)
13072 {
13073 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
13074 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
13075 }
13076
13077 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
13078 }
13079
13080 return target;
13081 }
13082
13083 /* Return the integer constant in ARG. Constrain it to be in the range
13084 of the subparts of VEC_TYPE; issue an error if not. */
13085
13086 static int
13087 get_element_number (tree vec_type, tree arg)
13088 {
13089 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
13090
13091 if (!tree_fits_uhwi_p (arg)
13092 || (elt = tree_to_uhwi (arg), elt > max))
13093 {
13094 error ("selector must be an integer constant in the range 0..%wi", max);
13095 return 0;
13096 }
13097
13098 return elt;
13099 }
13100
13101 /* Expand vec_set builtin. */
13102 static rtx
13103 altivec_expand_vec_set_builtin (tree exp)
13104 {
13105 machine_mode tmode, mode1;
13106 tree arg0, arg1, arg2;
13107 int elt;
13108 rtx op0, op1;
13109
13110 arg0 = CALL_EXPR_ARG (exp, 0);
13111 arg1 = CALL_EXPR_ARG (exp, 1);
13112 arg2 = CALL_EXPR_ARG (exp, 2);
13113
13114 tmode = TYPE_MODE (TREE_TYPE (arg0));
13115 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
13116 gcc_assert (VECTOR_MODE_P (tmode));
13117
13118 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
13119 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
13120 elt = get_element_number (TREE_TYPE (arg0), arg2);
13121
13122 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
13123 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
13124
13125 op0 = force_reg (tmode, op0);
13126 op1 = force_reg (mode1, op1);
13127
13128 rs6000_expand_vector_set (op0, op1, elt);
13129
13130 return op0;
13131 }
13132
13133 /* Expand vec_ext builtin. */
13134 static rtx
13135 altivec_expand_vec_ext_builtin (tree exp, rtx target)
13136 {
13137 machine_mode tmode, mode0;
13138 tree arg0, arg1;
13139 int elt;
13140 rtx op0;
13141
13142 arg0 = CALL_EXPR_ARG (exp, 0);
13143 arg1 = CALL_EXPR_ARG (exp, 1);
13144
13145 op0 = expand_normal (arg0);
13146 elt = get_element_number (TREE_TYPE (arg0), arg1);
13147
13148 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
13149 mode0 = TYPE_MODE (TREE_TYPE (arg0));
13150 gcc_assert (VECTOR_MODE_P (mode0));
13151
13152 op0 = force_reg (mode0, op0);
13153
13154 if (optimize || !target || !register_operand (target, tmode))
13155 target = gen_reg_rtx (tmode);
13156
13157 rs6000_expand_vector_extract (target, op0, elt);
13158
13159 return target;
13160 }
13161
13162 /* Expand the builtin in EXP and store the result in TARGET. Store
13163 true in *EXPANDEDP if we found a builtin to expand. */
13164 static rtx
13165 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
13166 {
13167 const struct builtin_description *d;
13168 size_t i;
13169 enum insn_code icode;
13170 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13171 tree arg0;
13172 rtx op0, pat;
13173 machine_mode tmode, mode0;
13174 enum rs6000_builtins fcode
13175 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13176
13177 if (rs6000_overloaded_builtin_p (fcode))
13178 {
13179 *expandedp = true;
13180 error ("unresolved overload for Altivec builtin %qF", fndecl);
13181
13182 /* Given it is invalid, just generate a normal call. */
13183 return expand_call (exp, target, false);
13184 }
13185
13186 target = altivec_expand_ld_builtin (exp, target, expandedp);
13187 if (*expandedp)
13188 return target;
13189
13190 target = altivec_expand_st_builtin (exp, target, expandedp);
13191 if (*expandedp)
13192 return target;
13193
13194 target = altivec_expand_dst_builtin (exp, target, expandedp);
13195 if (*expandedp)
13196 return target;
13197
13198 *expandedp = true;
13199
13200 switch (fcode)
13201 {
13202 case ALTIVEC_BUILTIN_STVX_V2DF:
13203 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df, exp);
13204 case ALTIVEC_BUILTIN_STVX_V2DI:
13205 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di, exp);
13206 case ALTIVEC_BUILTIN_STVX_V4SF:
13207 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf, exp);
13208 case ALTIVEC_BUILTIN_STVX:
13209 case ALTIVEC_BUILTIN_STVX_V4SI:
13210 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si, exp);
13211 case ALTIVEC_BUILTIN_STVX_V8HI:
13212 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi, exp);
13213 case ALTIVEC_BUILTIN_STVX_V16QI:
13214 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi, exp);
13215 case ALTIVEC_BUILTIN_STVEBX:
13216 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
13217 case ALTIVEC_BUILTIN_STVEHX:
13218 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
13219 case ALTIVEC_BUILTIN_STVEWX:
13220 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
13221 case ALTIVEC_BUILTIN_STVXL_V2DF:
13222 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
13223 case ALTIVEC_BUILTIN_STVXL_V2DI:
13224 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
13225 case ALTIVEC_BUILTIN_STVXL_V4SF:
13226 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
13227 case ALTIVEC_BUILTIN_STVXL:
13228 case ALTIVEC_BUILTIN_STVXL_V4SI:
13229 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
13230 case ALTIVEC_BUILTIN_STVXL_V8HI:
13231 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
13232 case ALTIVEC_BUILTIN_STVXL_V16QI:
13233 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
13234
13235 case ALTIVEC_BUILTIN_STVLX:
13236 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
13237 case ALTIVEC_BUILTIN_STVLXL:
13238 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
13239 case ALTIVEC_BUILTIN_STVRX:
13240 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
13241 case ALTIVEC_BUILTIN_STVRXL:
13242 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
13243
13244 case VSX_BUILTIN_STXVD2X_V1TI:
13245 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
13246 case VSX_BUILTIN_STXVD2X_V2DF:
13247 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
13248 case VSX_BUILTIN_STXVD2X_V2DI:
13249 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
13250 case VSX_BUILTIN_STXVW4X_V4SF:
13251 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
13252 case VSX_BUILTIN_STXVW4X_V4SI:
13253 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
13254 case VSX_BUILTIN_STXVW4X_V8HI:
13255 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
13256 case VSX_BUILTIN_STXVW4X_V16QI:
13257 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
13258
13259 case ALTIVEC_BUILTIN_MFVSCR:
13260 icode = CODE_FOR_altivec_mfvscr;
13261 tmode = insn_data[icode].operand[0].mode;
13262
13263 if (target == 0
13264 || GET_MODE (target) != tmode
13265 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13266 target = gen_reg_rtx (tmode);
13267
13268 pat = GEN_FCN (icode) (target);
13269 if (! pat)
13270 return 0;
13271 emit_insn (pat);
13272 return target;
13273
13274 case ALTIVEC_BUILTIN_MTVSCR:
13275 icode = CODE_FOR_altivec_mtvscr;
13276 arg0 = CALL_EXPR_ARG (exp, 0);
13277 op0 = expand_normal (arg0);
13278 mode0 = insn_data[icode].operand[0].mode;
13279
13280 /* If we got invalid arguments bail out before generating bad rtl. */
13281 if (arg0 == error_mark_node)
13282 return const0_rtx;
13283
13284 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13285 op0 = copy_to_mode_reg (mode0, op0);
13286
13287 pat = GEN_FCN (icode) (op0);
13288 if (pat)
13289 emit_insn (pat);
13290 return NULL_RTX;
13291
13292 case ALTIVEC_BUILTIN_DSSALL:
13293 emit_insn (gen_altivec_dssall ());
13294 return NULL_RTX;
13295
13296 case ALTIVEC_BUILTIN_DSS:
13297 icode = CODE_FOR_altivec_dss;
13298 arg0 = CALL_EXPR_ARG (exp, 0);
13299 STRIP_NOPS (arg0);
13300 op0 = expand_normal (arg0);
13301 mode0 = insn_data[icode].operand[0].mode;
13302
13303 /* If we got invalid arguments bail out before generating bad rtl. */
13304 if (arg0 == error_mark_node)
13305 return const0_rtx;
13306
13307 if (TREE_CODE (arg0) != INTEGER_CST
13308 || TREE_INT_CST_LOW (arg0) & ~0x3)
13309 {
13310 error ("argument to dss must be a 2-bit unsigned literal");
13311 return const0_rtx;
13312 }
13313
13314 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13315 op0 = copy_to_mode_reg (mode0, op0);
13316
13317 emit_insn (gen_altivec_dss (op0));
13318 return NULL_RTX;
13319
13320 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
13321 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
13322 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
13323 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
13324 case VSX_BUILTIN_VEC_INIT_V2DF:
13325 case VSX_BUILTIN_VEC_INIT_V2DI:
13326 case VSX_BUILTIN_VEC_INIT_V1TI:
13327 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
13328
13329 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
13330 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
13331 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
13332 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
13333 case VSX_BUILTIN_VEC_SET_V2DF:
13334 case VSX_BUILTIN_VEC_SET_V2DI:
13335 case VSX_BUILTIN_VEC_SET_V1TI:
13336 return altivec_expand_vec_set_builtin (exp);
13337
13338 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
13339 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
13340 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
13341 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
13342 case VSX_BUILTIN_VEC_EXT_V2DF:
13343 case VSX_BUILTIN_VEC_EXT_V2DI:
13344 case VSX_BUILTIN_VEC_EXT_V1TI:
13345 return altivec_expand_vec_ext_builtin (exp, target);
13346
13347 default:
13348 break;
13349 /* Fall through. */
13350 }
13351
13352 /* Expand abs* operations. */
13353 d = bdesc_abs;
13354 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
13355 if (d->code == fcode)
13356 return altivec_expand_abs_builtin (d->icode, exp, target);
13357
13358 /* Expand the AltiVec predicates. */
13359 d = bdesc_altivec_preds;
13360 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
13361 if (d->code == fcode)
13362 return altivec_expand_predicate_builtin (d->icode, exp, target);
13363
13364 /* LV* are funky. We initialized them differently. */
13365 switch (fcode)
13366 {
13367 case ALTIVEC_BUILTIN_LVSL:
13368 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
13369 exp, target, false);
13370 case ALTIVEC_BUILTIN_LVSR:
13371 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
13372 exp, target, false);
13373 case ALTIVEC_BUILTIN_LVEBX:
13374 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
13375 exp, target, false);
13376 case ALTIVEC_BUILTIN_LVEHX:
13377 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
13378 exp, target, false);
13379 case ALTIVEC_BUILTIN_LVEWX:
13380 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
13381 exp, target, false);
13382 case ALTIVEC_BUILTIN_LVXL_V2DF:
13383 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
13384 exp, target, false);
13385 case ALTIVEC_BUILTIN_LVXL_V2DI:
13386 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
13387 exp, target, false);
13388 case ALTIVEC_BUILTIN_LVXL_V4SF:
13389 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
13390 exp, target, false);
13391 case ALTIVEC_BUILTIN_LVXL:
13392 case ALTIVEC_BUILTIN_LVXL_V4SI:
13393 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
13394 exp, target, false);
13395 case ALTIVEC_BUILTIN_LVXL_V8HI:
13396 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
13397 exp, target, false);
13398 case ALTIVEC_BUILTIN_LVXL_V16QI:
13399 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
13400 exp, target, false);
13401 case ALTIVEC_BUILTIN_LVX_V2DF:
13402 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df,
13403 exp, target, false);
13404 case ALTIVEC_BUILTIN_LVX_V2DI:
13405 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di,
13406 exp, target, false);
13407 case ALTIVEC_BUILTIN_LVX_V4SF:
13408 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf,
13409 exp, target, false);
13410 case ALTIVEC_BUILTIN_LVX:
13411 case ALTIVEC_BUILTIN_LVX_V4SI:
13412 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si,
13413 exp, target, false);
13414 case ALTIVEC_BUILTIN_LVX_V8HI:
13415 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi,
13416 exp, target, false);
13417 case ALTIVEC_BUILTIN_LVX_V16QI:
13418 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi,
13419 exp, target, false);
13420 case ALTIVEC_BUILTIN_LVLX:
13421 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
13422 exp, target, true);
13423 case ALTIVEC_BUILTIN_LVLXL:
13424 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
13425 exp, target, true);
13426 case ALTIVEC_BUILTIN_LVRX:
13427 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
13428 exp, target, true);
13429 case ALTIVEC_BUILTIN_LVRXL:
13430 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
13431 exp, target, true);
13432 case VSX_BUILTIN_LXVD2X_V1TI:
13433 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
13434 exp, target, false);
13435 case VSX_BUILTIN_LXVD2X_V2DF:
13436 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
13437 exp, target, false);
13438 case VSX_BUILTIN_LXVD2X_V2DI:
13439 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
13440 exp, target, false);
13441 case VSX_BUILTIN_LXVW4X_V4SF:
13442 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
13443 exp, target, false);
13444 case VSX_BUILTIN_LXVW4X_V4SI:
13445 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
13446 exp, target, false);
13447 case VSX_BUILTIN_LXVW4X_V8HI:
13448 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
13449 exp, target, false);
13450 case VSX_BUILTIN_LXVW4X_V16QI:
13451 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
13452 exp, target, false);
13453 break;
13454 default:
13455 break;
13456 /* Fall through. */
13457 }
13458
13459 *expandedp = false;
13460 return NULL_RTX;
13461 }
13462
13463 /* Expand the builtin in EXP and store the result in TARGET. Store
13464 true in *EXPANDEDP if we found a builtin to expand. */
13465 static rtx
13466 paired_expand_builtin (tree exp, rtx target, bool * expandedp)
13467 {
13468 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13469 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13470 const struct builtin_description *d;
13471 size_t i;
13472
13473 *expandedp = true;
13474
13475 switch (fcode)
13476 {
13477 case PAIRED_BUILTIN_STX:
13478 return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
13479 case PAIRED_BUILTIN_LX:
13480 return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
13481 default:
13482 break;
13483 /* Fall through. */
13484 }
13485
13486 /* Expand the paired predicates. */
13487 d = bdesc_paired_preds;
13488 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
13489 if (d->code == fcode)
13490 return paired_expand_predicate_builtin (d->icode, exp, target);
13491
13492 *expandedp = false;
13493 return NULL_RTX;
13494 }
13495
13496 /* Binops that need to be initialized manually, but can be expanded
13497 automagically by rs6000_expand_binop_builtin. */
13498 static const struct builtin_description bdesc_2arg_spe[] =
13499 {
13500 { RS6000_BTM_SPE, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX },
13501 { RS6000_BTM_SPE, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX },
13502 { RS6000_BTM_SPE, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX },
13503 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX },
13504 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX },
13505 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX },
13506 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX },
13507 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX },
13508 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX },
13509 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX },
13510 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX },
13511 { RS6000_BTM_SPE, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD },
13512 { RS6000_BTM_SPE, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW },
13513 { RS6000_BTM_SPE, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH },
13514 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE },
13515 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU },
13516 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS },
13517 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT },
13518 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT },
13519 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT },
13520 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT },
13521 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT }
13522 };
13523
13524 /* Expand the builtin in EXP and store the result in TARGET. Store
13525 true in *EXPANDEDP if we found a builtin to expand.
13526
13527 This expands the SPE builtins that are not simple unary and binary
13528 operations. */
13529 static rtx
13530 spe_expand_builtin (tree exp, rtx target, bool *expandedp)
13531 {
13532 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13533 tree arg1, arg0;
13534 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13535 enum insn_code icode;
13536 machine_mode tmode, mode0;
13537 rtx pat, op0;
13538 const struct builtin_description *d;
13539 size_t i;
13540
13541 *expandedp = true;
13542
13543 /* Syntax check for a 5-bit unsigned immediate. */
13544 switch (fcode)
13545 {
13546 case SPE_BUILTIN_EVSTDD:
13547 case SPE_BUILTIN_EVSTDH:
13548 case SPE_BUILTIN_EVSTDW:
13549 case SPE_BUILTIN_EVSTWHE:
13550 case SPE_BUILTIN_EVSTWHO:
13551 case SPE_BUILTIN_EVSTWWE:
13552 case SPE_BUILTIN_EVSTWWO:
13553 arg1 = CALL_EXPR_ARG (exp, 2);
13554 if (TREE_CODE (arg1) != INTEGER_CST
13555 || TREE_INT_CST_LOW (arg1) & ~0x1f)
13556 {
13557 error ("argument 2 must be a 5-bit unsigned literal");
13558 return const0_rtx;
13559 }
13560 break;
13561 default:
13562 break;
13563 }
13564
13565 /* The evsplat*i instructions are not quite generic. */
13566 switch (fcode)
13567 {
13568 case SPE_BUILTIN_EVSPLATFI:
13569 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi,
13570 exp, target);
13571 case SPE_BUILTIN_EVSPLATI:
13572 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati,
13573 exp, target);
13574 default:
13575 break;
13576 }
13577
13578 d = bdesc_2arg_spe;
13579 for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d)
13580 if (d->code == fcode)
13581 return rs6000_expand_binop_builtin (d->icode, exp, target);
13582
13583 d = bdesc_spe_predicates;
13584 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d)
13585 if (d->code == fcode)
13586 return spe_expand_predicate_builtin (d->icode, exp, target);
13587
13588 d = bdesc_spe_evsel;
13589 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d)
13590 if (d->code == fcode)
13591 return spe_expand_evsel_builtin (d->icode, exp, target);
13592
13593 switch (fcode)
13594 {
13595 case SPE_BUILTIN_EVSTDDX:
13596 return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp);
13597 case SPE_BUILTIN_EVSTDHX:
13598 return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp);
13599 case SPE_BUILTIN_EVSTDWX:
13600 return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp);
13601 case SPE_BUILTIN_EVSTWHEX:
13602 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp);
13603 case SPE_BUILTIN_EVSTWHOX:
13604 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp);
13605 case SPE_BUILTIN_EVSTWWEX:
13606 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp);
13607 case SPE_BUILTIN_EVSTWWOX:
13608 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp);
13609 case SPE_BUILTIN_EVSTDD:
13610 return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp);
13611 case SPE_BUILTIN_EVSTDH:
13612 return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp);
13613 case SPE_BUILTIN_EVSTDW:
13614 return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp);
13615 case SPE_BUILTIN_EVSTWHE:
13616 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp);
13617 case SPE_BUILTIN_EVSTWHO:
13618 return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp);
13619 case SPE_BUILTIN_EVSTWWE:
13620 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp);
13621 case SPE_BUILTIN_EVSTWWO:
13622 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp);
13623 case SPE_BUILTIN_MFSPEFSCR:
13624 icode = CODE_FOR_spe_mfspefscr;
13625 tmode = insn_data[icode].operand[0].mode;
13626
13627 if (target == 0
13628 || GET_MODE (target) != tmode
13629 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13630 target = gen_reg_rtx (tmode);
13631
13632 pat = GEN_FCN (icode) (target);
13633 if (! pat)
13634 return 0;
13635 emit_insn (pat);
13636 return target;
13637 case SPE_BUILTIN_MTSPEFSCR:
13638 icode = CODE_FOR_spe_mtspefscr;
13639 arg0 = CALL_EXPR_ARG (exp, 0);
13640 op0 = expand_normal (arg0);
13641 mode0 = insn_data[icode].operand[0].mode;
13642
13643 if (arg0 == error_mark_node)
13644 return const0_rtx;
13645
13646 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13647 op0 = copy_to_mode_reg (mode0, op0);
13648
13649 pat = GEN_FCN (icode) (op0);
13650 if (pat)
13651 emit_insn (pat);
13652 return NULL_RTX;
13653 default:
13654 break;
13655 }
13656
13657 *expandedp = false;
13658 return NULL_RTX;
13659 }
13660
13661 static rtx
13662 paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
13663 {
13664 rtx pat, scratch, tmp;
13665 tree form = CALL_EXPR_ARG (exp, 0);
13666 tree arg0 = CALL_EXPR_ARG (exp, 1);
13667 tree arg1 = CALL_EXPR_ARG (exp, 2);
13668 rtx op0 = expand_normal (arg0);
13669 rtx op1 = expand_normal (arg1);
13670 machine_mode mode0 = insn_data[icode].operand[1].mode;
13671 machine_mode mode1 = insn_data[icode].operand[2].mode;
13672 int form_int;
13673 enum rtx_code code;
13674
13675 if (TREE_CODE (form) != INTEGER_CST)
13676 {
13677 error ("argument 1 of __builtin_paired_predicate must be a constant");
13678 return const0_rtx;
13679 }
13680 else
13681 form_int = TREE_INT_CST_LOW (form);
13682
13683 gcc_assert (mode0 == mode1);
13684
13685 if (arg0 == error_mark_node || arg1 == error_mark_node)
13686 return const0_rtx;
13687
13688 if (target == 0
13689 || GET_MODE (target) != SImode
13690 || !(*insn_data[icode].operand[0].predicate) (target, SImode))
13691 target = gen_reg_rtx (SImode);
13692 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
13693 op0 = copy_to_mode_reg (mode0, op0);
13694 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
13695 op1 = copy_to_mode_reg (mode1, op1);
13696
13697 scratch = gen_reg_rtx (CCFPmode);
13698
13699 pat = GEN_FCN (icode) (scratch, op0, op1);
13700 if (!pat)
13701 return const0_rtx;
13702
13703 emit_insn (pat);
13704
13705 switch (form_int)
13706 {
13707 /* LT bit. */
13708 case 0:
13709 code = LT;
13710 break;
13711 /* GT bit. */
13712 case 1:
13713 code = GT;
13714 break;
13715 /* EQ bit. */
13716 case 2:
13717 code = EQ;
13718 break;
13719 /* UN bit. */
13720 case 3:
13721 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
13722 return target;
13723 default:
13724 error ("argument 1 of __builtin_paired_predicate is out of range");
13725 return const0_rtx;
13726 }
13727
13728 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
13729 emit_move_insn (target, tmp);
13730 return target;
13731 }
13732
13733 static rtx
13734 spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
13735 {
13736 rtx pat, scratch, tmp;
13737 tree form = CALL_EXPR_ARG (exp, 0);
13738 tree arg0 = CALL_EXPR_ARG (exp, 1);
13739 tree arg1 = CALL_EXPR_ARG (exp, 2);
13740 rtx op0 = expand_normal (arg0);
13741 rtx op1 = expand_normal (arg1);
13742 machine_mode mode0 = insn_data[icode].operand[1].mode;
13743 machine_mode mode1 = insn_data[icode].operand[2].mode;
13744 int form_int;
13745 enum rtx_code code;
13746
13747 if (TREE_CODE (form) != INTEGER_CST)
13748 {
13749 error ("argument 1 of __builtin_spe_predicate must be a constant");
13750 return const0_rtx;
13751 }
13752 else
13753 form_int = TREE_INT_CST_LOW (form);
13754
13755 gcc_assert (mode0 == mode1);
13756
13757 if (arg0 == error_mark_node || arg1 == error_mark_node)
13758 return const0_rtx;
13759
13760 if (target == 0
13761 || GET_MODE (target) != SImode
13762 || ! (*insn_data[icode].operand[0].predicate) (target, SImode))
13763 target = gen_reg_rtx (SImode);
13764
13765 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13766 op0 = copy_to_mode_reg (mode0, op0);
13767 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13768 op1 = copy_to_mode_reg (mode1, op1);
13769
13770 scratch = gen_reg_rtx (CCmode);
13771
13772 pat = GEN_FCN (icode) (scratch, op0, op1);
13773 if (! pat)
13774 return const0_rtx;
13775 emit_insn (pat);
13776
13777 /* There are 4 variants for each predicate: _any_, _all_, _upper_,
13778 _lower_. We use one compare, but look in different bits of the
13779 CR for each variant.
13780
13781 There are 2 elements in each SPE simd type (upper/lower). The CR
13782 bits are set as follows:
13783
13784 BIT0 | BIT 1 | BIT 2 | BIT 3
13785 U | L | (U | L) | (U & L)
13786
13787 So, for an "all" relationship, BIT 3 would be set.
13788 For an "any" relationship, BIT 2 would be set. Etc.
13789
13790 Following traditional nomenclature, these bits map to:
13791
13792 BIT0 | BIT 1 | BIT 2 | BIT 3
13793 LT | GT | EQ | OV
13794
13795 Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
13796 */
13797
13798 switch (form_int)
13799 {
13800 /* All variant. OV bit. */
13801 case 0:
13802 /* We need to get to the OV bit, which is the ORDERED bit. We
13803 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
13804 that's ugly and will make validate_condition_mode die.
13805 So let's just use another pattern. */
13806 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
13807 return target;
13808 /* Any variant. EQ bit. */
13809 case 1:
13810 code = EQ;
13811 break;
13812 /* Upper variant. LT bit. */
13813 case 2:
13814 code = LT;
13815 break;
13816 /* Lower variant. GT bit. */
13817 case 3:
13818 code = GT;
13819 break;
13820 default:
13821 error ("argument 1 of __builtin_spe_predicate is out of range");
13822 return const0_rtx;
13823 }
13824
13825 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
13826 emit_move_insn (target, tmp);
13827
13828 return target;
13829 }
13830
13831 /* The evsel builtins look like this:
13832
13833 e = __builtin_spe_evsel_OP (a, b, c, d);
13834
13835 and work like this:
13836
13837 e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
13838 e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
13839 */
13840
13841 static rtx
13842 spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target)
13843 {
13844 rtx pat, scratch;
13845 tree arg0 = CALL_EXPR_ARG (exp, 0);
13846 tree arg1 = CALL_EXPR_ARG (exp, 1);
13847 tree arg2 = CALL_EXPR_ARG (exp, 2);
13848 tree arg3 = CALL_EXPR_ARG (exp, 3);
13849 rtx op0 = expand_normal (arg0);
13850 rtx op1 = expand_normal (arg1);
13851 rtx op2 = expand_normal (arg2);
13852 rtx op3 = expand_normal (arg3);
13853 machine_mode mode0 = insn_data[icode].operand[1].mode;
13854 machine_mode mode1 = insn_data[icode].operand[2].mode;
13855
13856 gcc_assert (mode0 == mode1);
13857
13858 if (arg0 == error_mark_node || arg1 == error_mark_node
13859 || arg2 == error_mark_node || arg3 == error_mark_node)
13860 return const0_rtx;
13861
13862 if (target == 0
13863 || GET_MODE (target) != mode0
13864 || ! (*insn_data[icode].operand[0].predicate) (target, mode0))
13865 target = gen_reg_rtx (mode0);
13866
13867 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13868 op0 = copy_to_mode_reg (mode0, op0);
13869 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13870 op1 = copy_to_mode_reg (mode0, op1);
13871 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
13872 op2 = copy_to_mode_reg (mode0, op2);
13873 if (! (*insn_data[icode].operand[1].predicate) (op3, mode1))
13874 op3 = copy_to_mode_reg (mode0, op3);
13875
13876 /* Generate the compare. */
13877 scratch = gen_reg_rtx (CCmode);
13878 pat = GEN_FCN (icode) (scratch, op0, op1);
13879 if (! pat)
13880 return const0_rtx;
13881 emit_insn (pat);
13882
13883 if (mode0 == V2SImode)
13884 emit_insn (gen_spe_evsel (target, op2, op3, scratch));
13885 else
13886 emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch));
13887
13888 return target;
13889 }
13890
13891 /* Raise an error message for a builtin function that is called without the
13892 appropriate target options being set. */
13893
13894 static void
13895 rs6000_invalid_builtin (enum rs6000_builtins fncode)
13896 {
13897 size_t uns_fncode = (size_t)fncode;
13898 const char *name = rs6000_builtin_info[uns_fncode].name;
13899 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
13900
13901 gcc_assert (name != NULL);
13902 if ((fnmask & RS6000_BTM_CELL) != 0)
13903 error ("Builtin function %s is only valid for the cell processor", name);
13904 else if ((fnmask & RS6000_BTM_VSX) != 0)
13905 error ("Builtin function %s requires the -mvsx option", name);
13906 else if ((fnmask & RS6000_BTM_HTM) != 0)
13907 error ("Builtin function %s requires the -mhtm option", name);
13908 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
13909 error ("Builtin function %s requires the -maltivec option", name);
13910 else if ((fnmask & RS6000_BTM_PAIRED) != 0)
13911 error ("Builtin function %s requires the -mpaired option", name);
13912 else if ((fnmask & RS6000_BTM_SPE) != 0)
13913 error ("Builtin function %s requires the -mspe option", name);
13914 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
13915 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
13916 error ("Builtin function %s requires the -mhard-dfp and"
13917 " -mpower8-vector options", name);
13918 else if ((fnmask & RS6000_BTM_DFP) != 0)
13919 error ("Builtin function %s requires the -mhard-dfp option", name);
13920 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
13921 error ("Builtin function %s requires the -mpower8-vector option", name);
13922 else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
13923 == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
13924 error ("Builtin function %s requires the -mhard-float and"
13925 " -mlong-double-128 options", name);
13926 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
13927 error ("Builtin function %s requires the -mhard-float option", name);
13928 else
13929 error ("Builtin function %s is not supported with the current options",
13930 name);
13931 }
13932
13933 /* Expand an expression EXP that calls a built-in function,
13934 with result going to TARGET if that's convenient
13935 (and in mode MODE if that's convenient).
13936 SUBTARGET may be used as the target for computing one of EXP's operands.
13937 IGNORE is nonzero if the value is to be ignored. */
13938
13939 static rtx
13940 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13941 machine_mode mode ATTRIBUTE_UNUSED,
13942 int ignore ATTRIBUTE_UNUSED)
13943 {
13944 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13945 enum rs6000_builtins fcode
13946 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
13947 size_t uns_fcode = (size_t)fcode;
13948 const struct builtin_description *d;
13949 size_t i;
13950 rtx ret;
13951 bool success;
13952 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
13953 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
13954
13955 if (TARGET_DEBUG_BUILTIN)
13956 {
13957 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
13958 const char *name1 = rs6000_builtin_info[uns_fcode].name;
13959 const char *name2 = ((icode != CODE_FOR_nothing)
13960 ? get_insn_name ((int)icode)
13961 : "nothing");
13962 const char *name3;
13963
13964 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
13965 {
13966 default: name3 = "unknown"; break;
13967 case RS6000_BTC_SPECIAL: name3 = "special"; break;
13968 case RS6000_BTC_UNARY: name3 = "unary"; break;
13969 case RS6000_BTC_BINARY: name3 = "binary"; break;
13970 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
13971 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
13972 case RS6000_BTC_ABS: name3 = "abs"; break;
13973 case RS6000_BTC_EVSEL: name3 = "evsel"; break;
13974 case RS6000_BTC_DST: name3 = "dst"; break;
13975 }
13976
13977
13978 fprintf (stderr,
13979 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
13980 (name1) ? name1 : "---", fcode,
13981 (name2) ? name2 : "---", (int)icode,
13982 name3,
13983 func_valid_p ? "" : ", not valid");
13984 }
13985
13986 if (!func_valid_p)
13987 {
13988 rs6000_invalid_builtin (fcode);
13989
13990 /* Given it is invalid, just generate a normal call. */
13991 return expand_call (exp, target, ignore);
13992 }
13993
13994 switch (fcode)
13995 {
13996 case RS6000_BUILTIN_RECIP:
13997 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
13998
13999 case RS6000_BUILTIN_RECIPF:
14000 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
14001
14002 case RS6000_BUILTIN_RSQRTF:
14003 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
14004
14005 case RS6000_BUILTIN_RSQRT:
14006 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
14007
14008 case POWER7_BUILTIN_BPERMD:
14009 return rs6000_expand_binop_builtin (((TARGET_64BIT)
14010 ? CODE_FOR_bpermd_di
14011 : CODE_FOR_bpermd_si), exp, target);
14012
14013 case RS6000_BUILTIN_GET_TB:
14014 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
14015 target);
14016
14017 case RS6000_BUILTIN_MFTB:
14018 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
14019 ? CODE_FOR_rs6000_mftb_di
14020 : CODE_FOR_rs6000_mftb_si),
14021 target);
14022
14023 case RS6000_BUILTIN_MFFS:
14024 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
14025
14026 case RS6000_BUILTIN_MTFSF:
14027 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
14028
14029 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
14030 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
14031 {
14032 int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
14033 : (int) CODE_FOR_altivec_lvsl_direct);
14034 machine_mode tmode = insn_data[icode].operand[0].mode;
14035 machine_mode mode = insn_data[icode].operand[1].mode;
14036 tree arg;
14037 rtx op, addr, pat;
14038
14039 gcc_assert (TARGET_ALTIVEC);
14040
14041 arg = CALL_EXPR_ARG (exp, 0);
14042 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
14043 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
14044 addr = memory_address (mode, op);
14045 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
14046 op = addr;
14047 else
14048 {
14049 /* For the load case need to negate the address. */
14050 op = gen_reg_rtx (GET_MODE (addr));
14051 emit_insn (gen_rtx_SET (VOIDmode, op,
14052 gen_rtx_NEG (GET_MODE (addr), addr)));
14053 }
14054 op = gen_rtx_MEM (mode, op);
14055
14056 if (target == 0
14057 || GET_MODE (target) != tmode
14058 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14059 target = gen_reg_rtx (tmode);
14060
14061 pat = GEN_FCN (icode) (target, op);
14062 if (!pat)
14063 return 0;
14064 emit_insn (pat);
14065
14066 return target;
14067 }
14068
14069 case ALTIVEC_BUILTIN_VCFUX:
14070 case ALTIVEC_BUILTIN_VCFSX:
14071 case ALTIVEC_BUILTIN_VCTUXS:
14072 case ALTIVEC_BUILTIN_VCTSXS:
14073 /* FIXME: There's got to be a nicer way to handle this case than
14074 constructing a new CALL_EXPR. */
14075 if (call_expr_nargs (exp) == 1)
14076 {
14077 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
14078 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
14079 }
14080 break;
14081
14082 default:
14083 break;
14084 }
14085
14086 if (TARGET_ALTIVEC)
14087 {
14088 ret = altivec_expand_builtin (exp, target, &success);
14089
14090 if (success)
14091 return ret;
14092 }
14093 if (TARGET_SPE)
14094 {
14095 ret = spe_expand_builtin (exp, target, &success);
14096
14097 if (success)
14098 return ret;
14099 }
14100 if (TARGET_PAIRED_FLOAT)
14101 {
14102 ret = paired_expand_builtin (exp, target, &success);
14103
14104 if (success)
14105 return ret;
14106 }
14107 if (TARGET_HTM)
14108 {
14109 ret = htm_expand_builtin (exp, target, &success);
14110
14111 if (success)
14112 return ret;
14113 }
14114
14115 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
14116 gcc_assert (attr == RS6000_BTC_UNARY
14117 || attr == RS6000_BTC_BINARY
14118 || attr == RS6000_BTC_TERNARY);
14119
14120 /* Handle simple unary operations. */
14121 d = bdesc_1arg;
14122 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14123 if (d->code == fcode)
14124 return rs6000_expand_unop_builtin (d->icode, exp, target);
14125
14126 /* Handle simple binary operations. */
14127 d = bdesc_2arg;
14128 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14129 if (d->code == fcode)
14130 return rs6000_expand_binop_builtin (d->icode, exp, target);
14131
14132 /* Handle simple ternary operations. */
14133 d = bdesc_3arg;
14134 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
14135 if (d->code == fcode)
14136 return rs6000_expand_ternop_builtin (d->icode, exp, target);
14137
14138 gcc_unreachable ();
14139 }
14140
14141 static void
14142 rs6000_init_builtins (void)
14143 {
14144 tree tdecl;
14145 tree ftype;
14146 machine_mode mode;
14147
14148 if (TARGET_DEBUG_BUILTIN)
14149 fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n",
14150 (TARGET_PAIRED_FLOAT) ? ", paired" : "",
14151 (TARGET_SPE) ? ", spe" : "",
14152 (TARGET_ALTIVEC) ? ", altivec" : "",
14153 (TARGET_VSX) ? ", vsx" : "");
14154
14155 V2SI_type_node = build_vector_type (intSI_type_node, 2);
14156 V2SF_type_node = build_vector_type (float_type_node, 2);
14157 V2DI_type_node = build_vector_type (intDI_type_node, 2);
14158 V2DF_type_node = build_vector_type (double_type_node, 2);
14159 V4HI_type_node = build_vector_type (intHI_type_node, 4);
14160 V4SI_type_node = build_vector_type (intSI_type_node, 4);
14161 V4SF_type_node = build_vector_type (float_type_node, 4);
14162 V8HI_type_node = build_vector_type (intHI_type_node, 8);
14163 V16QI_type_node = build_vector_type (intQI_type_node, 16);
14164
14165 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
14166 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
14167 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
14168 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
14169
14170 opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
14171 opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
14172 opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
14173 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
14174
14175 /* We use V1TI mode as a special container to hold __int128_t items that
14176 must live in VSX registers. */
14177 if (intTI_type_node)
14178 {
14179 V1TI_type_node = build_vector_type (intTI_type_node, 1);
14180 unsigned_V1TI_type_node = build_vector_type (unsigned_intTI_type_node, 1);
14181 }
14182
14183 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
14184 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
14185 'vector unsigned short'. */
14186
14187 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
14188 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
14189 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
14190 bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
14191 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
14192
14193 long_integer_type_internal_node = long_integer_type_node;
14194 long_unsigned_type_internal_node = long_unsigned_type_node;
14195 long_long_integer_type_internal_node = long_long_integer_type_node;
14196 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
14197 intQI_type_internal_node = intQI_type_node;
14198 uintQI_type_internal_node = unsigned_intQI_type_node;
14199 intHI_type_internal_node = intHI_type_node;
14200 uintHI_type_internal_node = unsigned_intHI_type_node;
14201 intSI_type_internal_node = intSI_type_node;
14202 uintSI_type_internal_node = unsigned_intSI_type_node;
14203 intDI_type_internal_node = intDI_type_node;
14204 uintDI_type_internal_node = unsigned_intDI_type_node;
14205 intTI_type_internal_node = intTI_type_node;
14206 uintTI_type_internal_node = unsigned_intTI_type_node;
14207 float_type_internal_node = float_type_node;
14208 double_type_internal_node = double_type_node;
14209 long_double_type_internal_node = long_double_type_node;
14210 dfloat64_type_internal_node = dfloat64_type_node;
14211 dfloat128_type_internal_node = dfloat128_type_node;
14212 void_type_internal_node = void_type_node;
14213
14214 /* Initialize the modes for builtin_function_type, mapping a machine mode to
14215 tree type node. */
14216 builtin_mode_to_type[QImode][0] = integer_type_node;
14217 builtin_mode_to_type[HImode][0] = integer_type_node;
14218 builtin_mode_to_type[SImode][0] = intSI_type_node;
14219 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
14220 builtin_mode_to_type[DImode][0] = intDI_type_node;
14221 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
14222 builtin_mode_to_type[TImode][0] = intTI_type_node;
14223 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
14224 builtin_mode_to_type[SFmode][0] = float_type_node;
14225 builtin_mode_to_type[DFmode][0] = double_type_node;
14226 builtin_mode_to_type[TFmode][0] = long_double_type_node;
14227 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
14228 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
14229 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
14230 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
14231 builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
14232 builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
14233 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
14234 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
14235 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
14236 builtin_mode_to_type[V4HImode][0] = V4HI_type_node;
14237 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
14238 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
14239 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
14240 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
14241 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
14242 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
14243 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
14244
14245 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
14246 TYPE_NAME (bool_char_type_node) = tdecl;
14247
14248 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
14249 TYPE_NAME (bool_short_type_node) = tdecl;
14250
14251 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
14252 TYPE_NAME (bool_int_type_node) = tdecl;
14253
14254 tdecl = add_builtin_type ("__pixel", pixel_type_node);
14255 TYPE_NAME (pixel_type_node) = tdecl;
14256
14257 bool_V16QI_type_node = build_vector_type (bool_char_type_node, 16);
14258 bool_V8HI_type_node = build_vector_type (bool_short_type_node, 8);
14259 bool_V4SI_type_node = build_vector_type (bool_int_type_node, 4);
14260 bool_V2DI_type_node = build_vector_type (bool_long_type_node, 2);
14261 pixel_V8HI_type_node = build_vector_type (pixel_type_node, 8);
14262
14263 tdecl = add_builtin_type ("__vector unsigned char", unsigned_V16QI_type_node);
14264 TYPE_NAME (unsigned_V16QI_type_node) = tdecl;
14265
14266 tdecl = add_builtin_type ("__vector signed char", V16QI_type_node);
14267 TYPE_NAME (V16QI_type_node) = tdecl;
14268
14269 tdecl = add_builtin_type ("__vector __bool char", bool_V16QI_type_node);
14270 TYPE_NAME ( bool_V16QI_type_node) = tdecl;
14271
14272 tdecl = add_builtin_type ("__vector unsigned short", unsigned_V8HI_type_node);
14273 TYPE_NAME (unsigned_V8HI_type_node) = tdecl;
14274
14275 tdecl = add_builtin_type ("__vector signed short", V8HI_type_node);
14276 TYPE_NAME (V8HI_type_node) = tdecl;
14277
14278 tdecl = add_builtin_type ("__vector __bool short", bool_V8HI_type_node);
14279 TYPE_NAME (bool_V8HI_type_node) = tdecl;
14280
14281 tdecl = add_builtin_type ("__vector unsigned int", unsigned_V4SI_type_node);
14282 TYPE_NAME (unsigned_V4SI_type_node) = tdecl;
14283
14284 tdecl = add_builtin_type ("__vector signed int", V4SI_type_node);
14285 TYPE_NAME (V4SI_type_node) = tdecl;
14286
14287 tdecl = add_builtin_type ("__vector __bool int", bool_V4SI_type_node);
14288 TYPE_NAME (bool_V4SI_type_node) = tdecl;
14289
14290 tdecl = add_builtin_type ("__vector float", V4SF_type_node);
14291 TYPE_NAME (V4SF_type_node) = tdecl;
14292
14293 tdecl = add_builtin_type ("__vector __pixel", pixel_V8HI_type_node);
14294 TYPE_NAME (pixel_V8HI_type_node) = tdecl;
14295
14296 tdecl = add_builtin_type ("__vector double", V2DF_type_node);
14297 TYPE_NAME (V2DF_type_node) = tdecl;
14298
14299 if (TARGET_POWERPC64)
14300 {
14301 tdecl = add_builtin_type ("__vector long", V2DI_type_node);
14302 TYPE_NAME (V2DI_type_node) = tdecl;
14303
14304 tdecl = add_builtin_type ("__vector unsigned long",
14305 unsigned_V2DI_type_node);
14306 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
14307
14308 tdecl = add_builtin_type ("__vector __bool long", bool_V2DI_type_node);
14309 TYPE_NAME (bool_V2DI_type_node) = tdecl;
14310 }
14311 else
14312 {
14313 tdecl = add_builtin_type ("__vector long long", V2DI_type_node);
14314 TYPE_NAME (V2DI_type_node) = tdecl;
14315
14316 tdecl = add_builtin_type ("__vector unsigned long long",
14317 unsigned_V2DI_type_node);
14318 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
14319
14320 tdecl = add_builtin_type ("__vector __bool long long",
14321 bool_V2DI_type_node);
14322 TYPE_NAME (bool_V2DI_type_node) = tdecl;
14323 }
14324
14325 if (V1TI_type_node)
14326 {
14327 tdecl = add_builtin_type ("__vector __int128", V1TI_type_node);
14328 TYPE_NAME (V1TI_type_node) = tdecl;
14329
14330 tdecl = add_builtin_type ("__vector unsigned __int128",
14331 unsigned_V1TI_type_node);
14332 TYPE_NAME (unsigned_V1TI_type_node) = tdecl;
14333 }
14334
14335 /* Paired and SPE builtins are only available if you build a compiler with
14336 the appropriate options, so only create those builtins with the
14337 appropriate compiler option. Create Altivec and VSX builtins on machines
14338 with at least the general purpose extensions (970 and newer) to allow the
14339 use of the target attribute. */
14340 if (TARGET_PAIRED_FLOAT)
14341 paired_init_builtins ();
14342 if (TARGET_SPE)
14343 spe_init_builtins ();
14344 if (TARGET_EXTRA_BUILTINS)
14345 altivec_init_builtins ();
14346 if (TARGET_HTM)
14347 htm_init_builtins ();
14348
14349 if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
14350 rs6000_common_init_builtins ();
14351
14352 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
14353 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
14354 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
14355
14356 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
14357 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
14358 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
14359
14360 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
14361 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
14362 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
14363
14364 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
14365 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
14366 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
14367
14368 mode = (TARGET_64BIT) ? DImode : SImode;
14369 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
14370 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
14371 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
14372
14373 ftype = build_function_type_list (unsigned_intDI_type_node,
14374 NULL_TREE);
14375 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
14376
14377 if (TARGET_64BIT)
14378 ftype = build_function_type_list (unsigned_intDI_type_node,
14379 NULL_TREE);
14380 else
14381 ftype = build_function_type_list (unsigned_intSI_type_node,
14382 NULL_TREE);
14383 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
14384
14385 ftype = build_function_type_list (double_type_node, NULL_TREE);
14386 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
14387
14388 ftype = build_function_type_list (void_type_node,
14389 intSI_type_node, double_type_node,
14390 NULL_TREE);
14391 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
14392
14393 #if TARGET_XCOFF
14394 /* AIX libm provides clog as __clog. */
14395 if ((tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
14396 set_user_assembler_name (tdecl, "__clog");
14397 #endif
14398
14399 #ifdef SUBTARGET_INIT_BUILTINS
14400 SUBTARGET_INIT_BUILTINS;
14401 #endif
14402 }
14403
14404 /* Returns the rs6000 builtin decl for CODE. */
14405
14406 static tree
14407 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
14408 {
14409 HOST_WIDE_INT fnmask;
14410
14411 if (code >= RS6000_BUILTIN_COUNT)
14412 return error_mark_node;
14413
14414 fnmask = rs6000_builtin_info[code].mask;
14415 if ((fnmask & rs6000_builtin_mask) != fnmask)
14416 {
14417 rs6000_invalid_builtin ((enum rs6000_builtins)code);
14418 return error_mark_node;
14419 }
14420
14421 return rs6000_builtin_decls[code];
14422 }
14423
14424 static void
14425 spe_init_builtins (void)
14426 {
14427 tree puint_type_node = build_pointer_type (unsigned_type_node);
14428 tree pushort_type_node = build_pointer_type (short_unsigned_type_node);
14429 const struct builtin_description *d;
14430 size_t i;
14431
14432 tree v2si_ftype_4_v2si
14433 = build_function_type_list (opaque_V2SI_type_node,
14434 opaque_V2SI_type_node,
14435 opaque_V2SI_type_node,
14436 opaque_V2SI_type_node,
14437 opaque_V2SI_type_node,
14438 NULL_TREE);
14439
14440 tree v2sf_ftype_4_v2sf
14441 = build_function_type_list (opaque_V2SF_type_node,
14442 opaque_V2SF_type_node,
14443 opaque_V2SF_type_node,
14444 opaque_V2SF_type_node,
14445 opaque_V2SF_type_node,
14446 NULL_TREE);
14447
14448 tree int_ftype_int_v2si_v2si
14449 = build_function_type_list (integer_type_node,
14450 integer_type_node,
14451 opaque_V2SI_type_node,
14452 opaque_V2SI_type_node,
14453 NULL_TREE);
14454
14455 tree int_ftype_int_v2sf_v2sf
14456 = build_function_type_list (integer_type_node,
14457 integer_type_node,
14458 opaque_V2SF_type_node,
14459 opaque_V2SF_type_node,
14460 NULL_TREE);
14461
14462 tree void_ftype_v2si_puint_int
14463 = build_function_type_list (void_type_node,
14464 opaque_V2SI_type_node,
14465 puint_type_node,
14466 integer_type_node,
14467 NULL_TREE);
14468
14469 tree void_ftype_v2si_puint_char
14470 = build_function_type_list (void_type_node,
14471 opaque_V2SI_type_node,
14472 puint_type_node,
14473 char_type_node,
14474 NULL_TREE);
14475
14476 tree void_ftype_v2si_pv2si_int
14477 = build_function_type_list (void_type_node,
14478 opaque_V2SI_type_node,
14479 opaque_p_V2SI_type_node,
14480 integer_type_node,
14481 NULL_TREE);
14482
14483 tree void_ftype_v2si_pv2si_char
14484 = build_function_type_list (void_type_node,
14485 opaque_V2SI_type_node,
14486 opaque_p_V2SI_type_node,
14487 char_type_node,
14488 NULL_TREE);
14489
14490 tree void_ftype_int
14491 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
14492
14493 tree int_ftype_void
14494 = build_function_type_list (integer_type_node, NULL_TREE);
14495
14496 tree v2si_ftype_pv2si_int
14497 = build_function_type_list (opaque_V2SI_type_node,
14498 opaque_p_V2SI_type_node,
14499 integer_type_node,
14500 NULL_TREE);
14501
14502 tree v2si_ftype_puint_int
14503 = build_function_type_list (opaque_V2SI_type_node,
14504 puint_type_node,
14505 integer_type_node,
14506 NULL_TREE);
14507
14508 tree v2si_ftype_pushort_int
14509 = build_function_type_list (opaque_V2SI_type_node,
14510 pushort_type_node,
14511 integer_type_node,
14512 NULL_TREE);
14513
14514 tree v2si_ftype_signed_char
14515 = build_function_type_list (opaque_V2SI_type_node,
14516 signed_char_type_node,
14517 NULL_TREE);
14518
14519 add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node);
14520
14521 /* Initialize irregular SPE builtins. */
14522
14523 def_builtin ("__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR);
14524 def_builtin ("__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR);
14525 def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX);
14526 def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX);
14527 def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX);
14528 def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX);
14529 def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX);
14530 def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX);
14531 def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX);
14532 def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD);
14533 def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH);
14534 def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW);
14535 def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE);
14536 def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO);
14537 def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE);
14538 def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO);
14539 def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI);
14540 def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI);
14541
14542 /* Loads. */
14543 def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX);
14544 def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX);
14545 def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX);
14546 def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX);
14547 def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX);
14548 def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX);
14549 def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX);
14550 def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX);
14551 def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX);
14552 def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX);
14553 def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX);
14554 def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD);
14555 def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW);
14556 def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH);
14557 def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT);
14558 def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT);
14559 def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT);
14560 def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE);
14561 def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS);
14562 def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU);
14563 def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT);
14564 def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT);
14565
14566 /* Predicates. */
14567 d = bdesc_spe_predicates;
14568 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++)
14569 {
14570 tree type;
14571
14572 switch (insn_data[d->icode].operand[1].mode)
14573 {
14574 case V2SImode:
14575 type = int_ftype_int_v2si_v2si;
14576 break;
14577 case V2SFmode:
14578 type = int_ftype_int_v2sf_v2sf;
14579 break;
14580 default:
14581 gcc_unreachable ();
14582 }
14583
14584 def_builtin (d->name, type, d->code);
14585 }
14586
14587 /* Evsel predicates. */
14588 d = bdesc_spe_evsel;
14589 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++)
14590 {
14591 tree type;
14592
14593 switch (insn_data[d->icode].operand[1].mode)
14594 {
14595 case V2SImode:
14596 type = v2si_ftype_4_v2si;
14597 break;
14598 case V2SFmode:
14599 type = v2sf_ftype_4_v2sf;
14600 break;
14601 default:
14602 gcc_unreachable ();
14603 }
14604
14605 def_builtin (d->name, type, d->code);
14606 }
14607 }
14608
14609 static void
14610 paired_init_builtins (void)
14611 {
14612 const struct builtin_description *d;
14613 size_t i;
14614
14615 tree int_ftype_int_v2sf_v2sf
14616 = build_function_type_list (integer_type_node,
14617 integer_type_node,
14618 V2SF_type_node,
14619 V2SF_type_node,
14620 NULL_TREE);
14621 tree pcfloat_type_node =
14622 build_pointer_type (build_qualified_type
14623 (float_type_node, TYPE_QUAL_CONST));
14624
14625 tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
14626 long_integer_type_node,
14627 pcfloat_type_node,
14628 NULL_TREE);
14629 tree void_ftype_v2sf_long_pcfloat =
14630 build_function_type_list (void_type_node,
14631 V2SF_type_node,
14632 long_integer_type_node,
14633 pcfloat_type_node,
14634 NULL_TREE);
14635
14636
14637 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
14638 PAIRED_BUILTIN_LX);
14639
14640
14641 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
14642 PAIRED_BUILTIN_STX);
14643
14644 /* Predicates. */
14645 d = bdesc_paired_preds;
14646 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
14647 {
14648 tree type;
14649
14650 if (TARGET_DEBUG_BUILTIN)
14651 fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
14652 (int)i, get_insn_name (d->icode), (int)d->icode,
14653 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
14654
14655 switch (insn_data[d->icode].operand[1].mode)
14656 {
14657 case V2SFmode:
14658 type = int_ftype_int_v2sf_v2sf;
14659 break;
14660 default:
14661 gcc_unreachable ();
14662 }
14663
14664 def_builtin (d->name, type, d->code);
14665 }
14666 }
14667
14668 static void
14669 altivec_init_builtins (void)
14670 {
14671 const struct builtin_description *d;
14672 size_t i;
14673 tree ftype;
14674 tree decl;
14675
14676 tree pvoid_type_node = build_pointer_type (void_type_node);
14677
14678 tree pcvoid_type_node
14679 = build_pointer_type (build_qualified_type (void_type_node,
14680 TYPE_QUAL_CONST));
14681
14682 tree int_ftype_opaque
14683 = build_function_type_list (integer_type_node,
14684 opaque_V4SI_type_node, NULL_TREE);
14685 tree opaque_ftype_opaque
14686 = build_function_type_list (integer_type_node, NULL_TREE);
14687 tree opaque_ftype_opaque_int
14688 = build_function_type_list (opaque_V4SI_type_node,
14689 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
14690 tree opaque_ftype_opaque_opaque_int
14691 = build_function_type_list (opaque_V4SI_type_node,
14692 opaque_V4SI_type_node, opaque_V4SI_type_node,
14693 integer_type_node, NULL_TREE);
14694 tree int_ftype_int_opaque_opaque
14695 = build_function_type_list (integer_type_node,
14696 integer_type_node, opaque_V4SI_type_node,
14697 opaque_V4SI_type_node, NULL_TREE);
14698 tree int_ftype_int_v4si_v4si
14699 = build_function_type_list (integer_type_node,
14700 integer_type_node, V4SI_type_node,
14701 V4SI_type_node, NULL_TREE);
14702 tree int_ftype_int_v2di_v2di
14703 = build_function_type_list (integer_type_node,
14704 integer_type_node, V2DI_type_node,
14705 V2DI_type_node, NULL_TREE);
14706 tree void_ftype_v4si
14707 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
14708 tree v8hi_ftype_void
14709 = build_function_type_list (V8HI_type_node, NULL_TREE);
14710 tree void_ftype_void
14711 = build_function_type_list (void_type_node, NULL_TREE);
14712 tree void_ftype_int
14713 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
14714
14715 tree opaque_ftype_long_pcvoid
14716 = build_function_type_list (opaque_V4SI_type_node,
14717 long_integer_type_node, pcvoid_type_node,
14718 NULL_TREE);
14719 tree v16qi_ftype_long_pcvoid
14720 = build_function_type_list (V16QI_type_node,
14721 long_integer_type_node, pcvoid_type_node,
14722 NULL_TREE);
14723 tree v8hi_ftype_long_pcvoid
14724 = build_function_type_list (V8HI_type_node,
14725 long_integer_type_node, pcvoid_type_node,
14726 NULL_TREE);
14727 tree v4si_ftype_long_pcvoid
14728 = build_function_type_list (V4SI_type_node,
14729 long_integer_type_node, pcvoid_type_node,
14730 NULL_TREE);
14731 tree v4sf_ftype_long_pcvoid
14732 = build_function_type_list (V4SF_type_node,
14733 long_integer_type_node, pcvoid_type_node,
14734 NULL_TREE);
14735 tree v2df_ftype_long_pcvoid
14736 = build_function_type_list (V2DF_type_node,
14737 long_integer_type_node, pcvoid_type_node,
14738 NULL_TREE);
14739 tree v2di_ftype_long_pcvoid
14740 = build_function_type_list (V2DI_type_node,
14741 long_integer_type_node, pcvoid_type_node,
14742 NULL_TREE);
14743
14744 tree void_ftype_opaque_long_pvoid
14745 = build_function_type_list (void_type_node,
14746 opaque_V4SI_type_node, long_integer_type_node,
14747 pvoid_type_node, NULL_TREE);
14748 tree void_ftype_v4si_long_pvoid
14749 = build_function_type_list (void_type_node,
14750 V4SI_type_node, long_integer_type_node,
14751 pvoid_type_node, NULL_TREE);
14752 tree void_ftype_v16qi_long_pvoid
14753 = build_function_type_list (void_type_node,
14754 V16QI_type_node, long_integer_type_node,
14755 pvoid_type_node, NULL_TREE);
14756 tree void_ftype_v8hi_long_pvoid
14757 = build_function_type_list (void_type_node,
14758 V8HI_type_node, long_integer_type_node,
14759 pvoid_type_node, NULL_TREE);
14760 tree void_ftype_v4sf_long_pvoid
14761 = build_function_type_list (void_type_node,
14762 V4SF_type_node, long_integer_type_node,
14763 pvoid_type_node, NULL_TREE);
14764 tree void_ftype_v2df_long_pvoid
14765 = build_function_type_list (void_type_node,
14766 V2DF_type_node, long_integer_type_node,
14767 pvoid_type_node, NULL_TREE);
14768 tree void_ftype_v2di_long_pvoid
14769 = build_function_type_list (void_type_node,
14770 V2DI_type_node, long_integer_type_node,
14771 pvoid_type_node, NULL_TREE);
14772 tree int_ftype_int_v8hi_v8hi
14773 = build_function_type_list (integer_type_node,
14774 integer_type_node, V8HI_type_node,
14775 V8HI_type_node, NULL_TREE);
14776 tree int_ftype_int_v16qi_v16qi
14777 = build_function_type_list (integer_type_node,
14778 integer_type_node, V16QI_type_node,
14779 V16QI_type_node, NULL_TREE);
14780 tree int_ftype_int_v4sf_v4sf
14781 = build_function_type_list (integer_type_node,
14782 integer_type_node, V4SF_type_node,
14783 V4SF_type_node, NULL_TREE);
14784 tree int_ftype_int_v2df_v2df
14785 = build_function_type_list (integer_type_node,
14786 integer_type_node, V2DF_type_node,
14787 V2DF_type_node, NULL_TREE);
14788 tree v2di_ftype_v2di
14789 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
14790 tree v4si_ftype_v4si
14791 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
14792 tree v8hi_ftype_v8hi
14793 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
14794 tree v16qi_ftype_v16qi
14795 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
14796 tree v4sf_ftype_v4sf
14797 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
14798 tree v2df_ftype_v2df
14799 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
14800 tree void_ftype_pcvoid_int_int
14801 = build_function_type_list (void_type_node,
14802 pcvoid_type_node, integer_type_node,
14803 integer_type_node, NULL_TREE);
14804
14805 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
14806 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
14807 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
14808 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
14809 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
14810 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
14811 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
14812 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
14813 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
14814 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
14815 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
14816 ALTIVEC_BUILTIN_LVXL_V2DF);
14817 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
14818 ALTIVEC_BUILTIN_LVXL_V2DI);
14819 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
14820 ALTIVEC_BUILTIN_LVXL_V4SF);
14821 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
14822 ALTIVEC_BUILTIN_LVXL_V4SI);
14823 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
14824 ALTIVEC_BUILTIN_LVXL_V8HI);
14825 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
14826 ALTIVEC_BUILTIN_LVXL_V16QI);
14827 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
14828 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
14829 ALTIVEC_BUILTIN_LVX_V2DF);
14830 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
14831 ALTIVEC_BUILTIN_LVX_V2DI);
14832 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
14833 ALTIVEC_BUILTIN_LVX_V4SF);
14834 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
14835 ALTIVEC_BUILTIN_LVX_V4SI);
14836 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
14837 ALTIVEC_BUILTIN_LVX_V8HI);
14838 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
14839 ALTIVEC_BUILTIN_LVX_V16QI);
14840 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
14841 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
14842 ALTIVEC_BUILTIN_STVX_V2DF);
14843 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
14844 ALTIVEC_BUILTIN_STVX_V2DI);
14845 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
14846 ALTIVEC_BUILTIN_STVX_V4SF);
14847 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
14848 ALTIVEC_BUILTIN_STVX_V4SI);
14849 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
14850 ALTIVEC_BUILTIN_STVX_V8HI);
14851 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
14852 ALTIVEC_BUILTIN_STVX_V16QI);
14853 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
14854 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
14855 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
14856 ALTIVEC_BUILTIN_STVXL_V2DF);
14857 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
14858 ALTIVEC_BUILTIN_STVXL_V2DI);
14859 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
14860 ALTIVEC_BUILTIN_STVXL_V4SF);
14861 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
14862 ALTIVEC_BUILTIN_STVXL_V4SI);
14863 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
14864 ALTIVEC_BUILTIN_STVXL_V8HI);
14865 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
14866 ALTIVEC_BUILTIN_STVXL_V16QI);
14867 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
14868 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
14869 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
14870 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
14871 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
14872 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
14873 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
14874 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
14875 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
14876 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
14877 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
14878 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
14879 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
14880 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
14881 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
14882 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
14883
14884 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
14885 VSX_BUILTIN_LXVD2X_V2DF);
14886 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
14887 VSX_BUILTIN_LXVD2X_V2DI);
14888 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
14889 VSX_BUILTIN_LXVW4X_V4SF);
14890 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
14891 VSX_BUILTIN_LXVW4X_V4SI);
14892 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
14893 VSX_BUILTIN_LXVW4X_V8HI);
14894 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
14895 VSX_BUILTIN_LXVW4X_V16QI);
14896 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
14897 VSX_BUILTIN_STXVD2X_V2DF);
14898 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
14899 VSX_BUILTIN_STXVD2X_V2DI);
14900 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
14901 VSX_BUILTIN_STXVW4X_V4SF);
14902 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
14903 VSX_BUILTIN_STXVW4X_V4SI);
14904 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
14905 VSX_BUILTIN_STXVW4X_V8HI);
14906 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
14907 VSX_BUILTIN_STXVW4X_V16QI);
14908 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
14909 VSX_BUILTIN_VEC_LD);
14910 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
14911 VSX_BUILTIN_VEC_ST);
14912
14913 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
14914 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
14915 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
14916
14917 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
14918 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
14919 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
14920 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
14921 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
14922 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
14923 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
14924 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
14925 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
14926 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
14927 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
14928 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
14929
14930 /* Cell builtins. */
14931 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
14932 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
14933 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
14934 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
14935
14936 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
14937 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
14938 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
14939 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
14940
14941 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
14942 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
14943 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
14944 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
14945
14946 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
14947 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
14948 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
14949 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
14950
14951 /* Add the DST variants. */
14952 d = bdesc_dst;
14953 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
14954 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
14955
14956 /* Initialize the predicates. */
14957 d = bdesc_altivec_preds;
14958 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
14959 {
14960 machine_mode mode1;
14961 tree type;
14962
14963 if (rs6000_overloaded_builtin_p (d->code))
14964 mode1 = VOIDmode;
14965 else
14966 mode1 = insn_data[d->icode].operand[1].mode;
14967
14968 switch (mode1)
14969 {
14970 case VOIDmode:
14971 type = int_ftype_int_opaque_opaque;
14972 break;
14973 case V2DImode:
14974 type = int_ftype_int_v2di_v2di;
14975 break;
14976 case V4SImode:
14977 type = int_ftype_int_v4si_v4si;
14978 break;
14979 case V8HImode:
14980 type = int_ftype_int_v8hi_v8hi;
14981 break;
14982 case V16QImode:
14983 type = int_ftype_int_v16qi_v16qi;
14984 break;
14985 case V4SFmode:
14986 type = int_ftype_int_v4sf_v4sf;
14987 break;
14988 case V2DFmode:
14989 type = int_ftype_int_v2df_v2df;
14990 break;
14991 default:
14992 gcc_unreachable ();
14993 }
14994
14995 def_builtin (d->name, type, d->code);
14996 }
14997
14998 /* Initialize the abs* operators. */
14999 d = bdesc_abs;
15000 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
15001 {
15002 machine_mode mode0;
15003 tree type;
15004
15005 mode0 = insn_data[d->icode].operand[0].mode;
15006
15007 switch (mode0)
15008 {
15009 case V2DImode:
15010 type = v2di_ftype_v2di;
15011 break;
15012 case V4SImode:
15013 type = v4si_ftype_v4si;
15014 break;
15015 case V8HImode:
15016 type = v8hi_ftype_v8hi;
15017 break;
15018 case V16QImode:
15019 type = v16qi_ftype_v16qi;
15020 break;
15021 case V4SFmode:
15022 type = v4sf_ftype_v4sf;
15023 break;
15024 case V2DFmode:
15025 type = v2df_ftype_v2df;
15026 break;
15027 default:
15028 gcc_unreachable ();
15029 }
15030
15031 def_builtin (d->name, type, d->code);
15032 }
15033
15034 /* Initialize target builtin that implements
15035 targetm.vectorize.builtin_mask_for_load. */
15036
15037 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
15038 v16qi_ftype_long_pcvoid,
15039 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
15040 BUILT_IN_MD, NULL, NULL_TREE);
15041 TREE_READONLY (decl) = 1;
15042 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
15043 altivec_builtin_mask_for_load = decl;
15044
15045 /* Access to the vec_init patterns. */
15046 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
15047 integer_type_node, integer_type_node,
15048 integer_type_node, NULL_TREE);
15049 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
15050
15051 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
15052 short_integer_type_node,
15053 short_integer_type_node,
15054 short_integer_type_node,
15055 short_integer_type_node,
15056 short_integer_type_node,
15057 short_integer_type_node,
15058 short_integer_type_node, NULL_TREE);
15059 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
15060
15061 ftype = build_function_type_list (V16QI_type_node, char_type_node,
15062 char_type_node, char_type_node,
15063 char_type_node, char_type_node,
15064 char_type_node, char_type_node,
15065 char_type_node, char_type_node,
15066 char_type_node, char_type_node,
15067 char_type_node, char_type_node,
15068 char_type_node, char_type_node,
15069 char_type_node, NULL_TREE);
15070 def_builtin ("__builtin_vec_init_v16qi", ftype,
15071 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
15072
15073 ftype = build_function_type_list (V4SF_type_node, float_type_node,
15074 float_type_node, float_type_node,
15075 float_type_node, NULL_TREE);
15076 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
15077
15078 /* VSX builtins. */
15079 ftype = build_function_type_list (V2DF_type_node, double_type_node,
15080 double_type_node, NULL_TREE);
15081 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
15082
15083 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
15084 intDI_type_node, NULL_TREE);
15085 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
15086
15087 /* Access to the vec_set patterns. */
15088 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
15089 intSI_type_node,
15090 integer_type_node, NULL_TREE);
15091 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
15092
15093 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15094 intHI_type_node,
15095 integer_type_node, NULL_TREE);
15096 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
15097
15098 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
15099 intQI_type_node,
15100 integer_type_node, NULL_TREE);
15101 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
15102
15103 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
15104 float_type_node,
15105 integer_type_node, NULL_TREE);
15106 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
15107
15108 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
15109 double_type_node,
15110 integer_type_node, NULL_TREE);
15111 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
15112
15113 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
15114 intDI_type_node,
15115 integer_type_node, NULL_TREE);
15116 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
15117
15118 /* Access to the vec_extract patterns. */
15119 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15120 integer_type_node, NULL_TREE);
15121 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
15122
15123 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15124 integer_type_node, NULL_TREE);
15125 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
15126
15127 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
15128 integer_type_node, NULL_TREE);
15129 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
15130
15131 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15132 integer_type_node, NULL_TREE);
15133 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
15134
15135 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15136 integer_type_node, NULL_TREE);
15137 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
15138
15139 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
15140 integer_type_node, NULL_TREE);
15141 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
15142
15143
15144 if (V1TI_type_node)
15145 {
15146 tree v1ti_ftype_long_pcvoid
15147 = build_function_type_list (V1TI_type_node,
15148 long_integer_type_node, pcvoid_type_node,
15149 NULL_TREE);
15150 tree void_ftype_v1ti_long_pvoid
15151 = build_function_type_list (void_type_node,
15152 V1TI_type_node, long_integer_type_node,
15153 pvoid_type_node, NULL_TREE);
15154 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
15155 VSX_BUILTIN_LXVD2X_V1TI);
15156 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
15157 VSX_BUILTIN_STXVD2X_V1TI);
15158 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
15159 NULL_TREE, NULL_TREE);
15160 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
15161 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
15162 intTI_type_node,
15163 integer_type_node, NULL_TREE);
15164 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
15165 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
15166 integer_type_node, NULL_TREE);
15167 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
15168 }
15169
15170 }
15171
15172 static void
15173 htm_init_builtins (void)
15174 {
15175 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
15176 const struct builtin_description *d;
15177 size_t i;
15178
15179 d = bdesc_htm;
15180 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
15181 {
15182 tree op[MAX_HTM_OPERANDS], type;
15183 HOST_WIDE_INT mask = d->mask;
15184 unsigned attr = rs6000_builtin_info[d->code].attr;
15185 bool void_func = (attr & RS6000_BTC_VOID);
15186 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
15187 int nopnds = 0;
15188 tree argtype = (attr & RS6000_BTC_SPR) ? long_unsigned_type_node
15189 : unsigned_type_node;
15190
15191 if ((mask & builtin_mask) != mask)
15192 {
15193 if (TARGET_DEBUG_BUILTIN)
15194 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
15195 continue;
15196 }
15197
15198 if (d->name == 0)
15199 {
15200 if (TARGET_DEBUG_BUILTIN)
15201 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
15202 (long unsigned) i);
15203 continue;
15204 }
15205
15206 op[nopnds++] = (void_func) ? void_type_node : argtype;
15207
15208 if (attr_args == RS6000_BTC_UNARY)
15209 op[nopnds++] = argtype;
15210 else if (attr_args == RS6000_BTC_BINARY)
15211 {
15212 op[nopnds++] = argtype;
15213 op[nopnds++] = argtype;
15214 }
15215 else if (attr_args == RS6000_BTC_TERNARY)
15216 {
15217 op[nopnds++] = argtype;
15218 op[nopnds++] = argtype;
15219 op[nopnds++] = argtype;
15220 }
15221
15222 switch (nopnds)
15223 {
15224 case 1:
15225 type = build_function_type_list (op[0], NULL_TREE);
15226 break;
15227 case 2:
15228 type = build_function_type_list (op[0], op[1], NULL_TREE);
15229 break;
15230 case 3:
15231 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
15232 break;
15233 case 4:
15234 type = build_function_type_list (op[0], op[1], op[2], op[3],
15235 NULL_TREE);
15236 break;
15237 default:
15238 gcc_unreachable ();
15239 }
15240
15241 def_builtin (d->name, type, d->code);
15242 }
15243 }
15244
15245 /* Hash function for builtin functions with up to 3 arguments and a return
15246 type. */
15247 hashval_t
15248 builtin_hasher::hash (builtin_hash_struct *bh)
15249 {
15250 unsigned ret = 0;
15251 int i;
15252
15253 for (i = 0; i < 4; i++)
15254 {
15255 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
15256 ret = (ret * 2) + bh->uns_p[i];
15257 }
15258
15259 return ret;
15260 }
15261
15262 /* Compare builtin hash entries H1 and H2 for equivalence. */
15263 bool
15264 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
15265 {
15266 return ((p1->mode[0] == p2->mode[0])
15267 && (p1->mode[1] == p2->mode[1])
15268 && (p1->mode[2] == p2->mode[2])
15269 && (p1->mode[3] == p2->mode[3])
15270 && (p1->uns_p[0] == p2->uns_p[0])
15271 && (p1->uns_p[1] == p2->uns_p[1])
15272 && (p1->uns_p[2] == p2->uns_p[2])
15273 && (p1->uns_p[3] == p2->uns_p[3]));
15274 }
15275
15276 /* Map types for builtin functions with an explicit return type and up to 3
15277 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
15278 of the argument. */
15279 static tree
15280 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
15281 machine_mode mode_arg1, machine_mode mode_arg2,
15282 enum rs6000_builtins builtin, const char *name)
15283 {
15284 struct builtin_hash_struct h;
15285 struct builtin_hash_struct *h2;
15286 int num_args = 3;
15287 int i;
15288 tree ret_type = NULL_TREE;
15289 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
15290
15291 /* Create builtin_hash_table. */
15292 if (builtin_hash_table == NULL)
15293 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
15294
15295 h.type = NULL_TREE;
15296 h.mode[0] = mode_ret;
15297 h.mode[1] = mode_arg0;
15298 h.mode[2] = mode_arg1;
15299 h.mode[3] = mode_arg2;
15300 h.uns_p[0] = 0;
15301 h.uns_p[1] = 0;
15302 h.uns_p[2] = 0;
15303 h.uns_p[3] = 0;
15304
15305 /* If the builtin is a type that produces unsigned results or takes unsigned
15306 arguments, and it is returned as a decl for the vectorizer (such as
15307 widening multiplies, permute), make sure the arguments and return value
15308 are type correct. */
15309 switch (builtin)
15310 {
15311 /* unsigned 1 argument functions. */
15312 case CRYPTO_BUILTIN_VSBOX:
15313 case P8V_BUILTIN_VGBBD:
15314 case MISC_BUILTIN_CDTBCD:
15315 case MISC_BUILTIN_CBCDTD:
15316 h.uns_p[0] = 1;
15317 h.uns_p[1] = 1;
15318 break;
15319
15320 /* unsigned 2 argument functions. */
15321 case ALTIVEC_BUILTIN_VMULEUB_UNS:
15322 case ALTIVEC_BUILTIN_VMULEUH_UNS:
15323 case ALTIVEC_BUILTIN_VMULOUB_UNS:
15324 case ALTIVEC_BUILTIN_VMULOUH_UNS:
15325 case CRYPTO_BUILTIN_VCIPHER:
15326 case CRYPTO_BUILTIN_VCIPHERLAST:
15327 case CRYPTO_BUILTIN_VNCIPHER:
15328 case CRYPTO_BUILTIN_VNCIPHERLAST:
15329 case CRYPTO_BUILTIN_VPMSUMB:
15330 case CRYPTO_BUILTIN_VPMSUMH:
15331 case CRYPTO_BUILTIN_VPMSUMW:
15332 case CRYPTO_BUILTIN_VPMSUMD:
15333 case CRYPTO_BUILTIN_VPMSUM:
15334 case MISC_BUILTIN_ADDG6S:
15335 case MISC_BUILTIN_DIVWEU:
15336 case MISC_BUILTIN_DIVWEUO:
15337 case MISC_BUILTIN_DIVDEU:
15338 case MISC_BUILTIN_DIVDEUO:
15339 h.uns_p[0] = 1;
15340 h.uns_p[1] = 1;
15341 h.uns_p[2] = 1;
15342 break;
15343
15344 /* unsigned 3 argument functions. */
15345 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
15346 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
15347 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
15348 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
15349 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
15350 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
15351 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
15352 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
15353 case VSX_BUILTIN_VPERM_16QI_UNS:
15354 case VSX_BUILTIN_VPERM_8HI_UNS:
15355 case VSX_BUILTIN_VPERM_4SI_UNS:
15356 case VSX_BUILTIN_VPERM_2DI_UNS:
15357 case VSX_BUILTIN_XXSEL_16QI_UNS:
15358 case VSX_BUILTIN_XXSEL_8HI_UNS:
15359 case VSX_BUILTIN_XXSEL_4SI_UNS:
15360 case VSX_BUILTIN_XXSEL_2DI_UNS:
15361 case CRYPTO_BUILTIN_VPERMXOR:
15362 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
15363 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
15364 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
15365 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
15366 case CRYPTO_BUILTIN_VSHASIGMAW:
15367 case CRYPTO_BUILTIN_VSHASIGMAD:
15368 case CRYPTO_BUILTIN_VSHASIGMA:
15369 h.uns_p[0] = 1;
15370 h.uns_p[1] = 1;
15371 h.uns_p[2] = 1;
15372 h.uns_p[3] = 1;
15373 break;
15374
15375 /* signed permute functions with unsigned char mask. */
15376 case ALTIVEC_BUILTIN_VPERM_16QI:
15377 case ALTIVEC_BUILTIN_VPERM_8HI:
15378 case ALTIVEC_BUILTIN_VPERM_4SI:
15379 case ALTIVEC_BUILTIN_VPERM_4SF:
15380 case ALTIVEC_BUILTIN_VPERM_2DI:
15381 case ALTIVEC_BUILTIN_VPERM_2DF:
15382 case VSX_BUILTIN_VPERM_16QI:
15383 case VSX_BUILTIN_VPERM_8HI:
15384 case VSX_BUILTIN_VPERM_4SI:
15385 case VSX_BUILTIN_VPERM_4SF:
15386 case VSX_BUILTIN_VPERM_2DI:
15387 case VSX_BUILTIN_VPERM_2DF:
15388 h.uns_p[3] = 1;
15389 break;
15390
15391 /* unsigned args, signed return. */
15392 case VSX_BUILTIN_XVCVUXDDP_UNS:
15393 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
15394 h.uns_p[1] = 1;
15395 break;
15396
15397 /* signed args, unsigned return. */
15398 case VSX_BUILTIN_XVCVDPUXDS_UNS:
15399 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
15400 case MISC_BUILTIN_UNPACK_TD:
15401 case MISC_BUILTIN_UNPACK_V1TI:
15402 h.uns_p[0] = 1;
15403 break;
15404
15405 /* unsigned arguments for 128-bit pack instructions. */
15406 case MISC_BUILTIN_PACK_TD:
15407 case MISC_BUILTIN_PACK_V1TI:
15408 h.uns_p[1] = 1;
15409 h.uns_p[2] = 1;
15410 break;
15411
15412 default:
15413 break;
15414 }
15415
15416 /* Figure out how many args are present. */
15417 while (num_args > 0 && h.mode[num_args] == VOIDmode)
15418 num_args--;
15419
15420 if (num_args == 0)
15421 fatal_error ("internal error: builtin function %s had no type", name);
15422
15423 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
15424 if (!ret_type && h.uns_p[0])
15425 ret_type = builtin_mode_to_type[h.mode[0]][0];
15426
15427 if (!ret_type)
15428 fatal_error ("internal error: builtin function %s had an unexpected "
15429 "return type %s", name, GET_MODE_NAME (h.mode[0]));
15430
15431 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
15432 arg_type[i] = NULL_TREE;
15433
15434 for (i = 0; i < num_args; i++)
15435 {
15436 int m = (int) h.mode[i+1];
15437 int uns_p = h.uns_p[i+1];
15438
15439 arg_type[i] = builtin_mode_to_type[m][uns_p];
15440 if (!arg_type[i] && uns_p)
15441 arg_type[i] = builtin_mode_to_type[m][0];
15442
15443 if (!arg_type[i])
15444 fatal_error ("internal error: builtin function %s, argument %d "
15445 "had unexpected argument type %s", name, i,
15446 GET_MODE_NAME (m));
15447 }
15448
15449 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
15450 if (*found == NULL)
15451 {
15452 h2 = ggc_alloc<builtin_hash_struct> ();
15453 *h2 = h;
15454 *found = h2;
15455
15456 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
15457 arg_type[2], NULL_TREE);
15458 }
15459
15460 return (*found)->type;
15461 }
15462
15463 static void
15464 rs6000_common_init_builtins (void)
15465 {
15466 const struct builtin_description *d;
15467 size_t i;
15468
15469 tree opaque_ftype_opaque = NULL_TREE;
15470 tree opaque_ftype_opaque_opaque = NULL_TREE;
15471 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
15472 tree v2si_ftype_qi = NULL_TREE;
15473 tree v2si_ftype_v2si_qi = NULL_TREE;
15474 tree v2si_ftype_int_qi = NULL_TREE;
15475 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
15476
15477 if (!TARGET_PAIRED_FLOAT)
15478 {
15479 builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
15480 builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
15481 }
15482
15483 /* Paired and SPE builtins are only available if you build a compiler with
15484 the appropriate options, so only create those builtins with the
15485 appropriate compiler option. Create Altivec and VSX builtins on machines
15486 with at least the general purpose extensions (970 and newer) to allow the
15487 use of the target attribute.. */
15488
15489 if (TARGET_EXTRA_BUILTINS)
15490 builtin_mask |= RS6000_BTM_COMMON;
15491
15492 /* Add the ternary operators. */
15493 d = bdesc_3arg;
15494 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
15495 {
15496 tree type;
15497 HOST_WIDE_INT mask = d->mask;
15498
15499 if ((mask & builtin_mask) != mask)
15500 {
15501 if (TARGET_DEBUG_BUILTIN)
15502 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
15503 continue;
15504 }
15505
15506 if (rs6000_overloaded_builtin_p (d->code))
15507 {
15508 if (! (type = opaque_ftype_opaque_opaque_opaque))
15509 type = opaque_ftype_opaque_opaque_opaque
15510 = build_function_type_list (opaque_V4SI_type_node,
15511 opaque_V4SI_type_node,
15512 opaque_V4SI_type_node,
15513 opaque_V4SI_type_node,
15514 NULL_TREE);
15515 }
15516 else
15517 {
15518 enum insn_code icode = d->icode;
15519 if (d->name == 0)
15520 {
15521 if (TARGET_DEBUG_BUILTIN)
15522 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
15523 (long unsigned)i);
15524
15525 continue;
15526 }
15527
15528 if (icode == CODE_FOR_nothing)
15529 {
15530 if (TARGET_DEBUG_BUILTIN)
15531 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
15532 d->name);
15533
15534 continue;
15535 }
15536
15537 type = builtin_function_type (insn_data[icode].operand[0].mode,
15538 insn_data[icode].operand[1].mode,
15539 insn_data[icode].operand[2].mode,
15540 insn_data[icode].operand[3].mode,
15541 d->code, d->name);
15542 }
15543
15544 def_builtin (d->name, type, d->code);
15545 }
15546
15547 /* Add the binary operators. */
15548 d = bdesc_2arg;
15549 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15550 {
15551 machine_mode mode0, mode1, mode2;
15552 tree type;
15553 HOST_WIDE_INT mask = d->mask;
15554
15555 if ((mask & builtin_mask) != mask)
15556 {
15557 if (TARGET_DEBUG_BUILTIN)
15558 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
15559 continue;
15560 }
15561
15562 if (rs6000_overloaded_builtin_p (d->code))
15563 {
15564 if (! (type = opaque_ftype_opaque_opaque))
15565 type = opaque_ftype_opaque_opaque
15566 = build_function_type_list (opaque_V4SI_type_node,
15567 opaque_V4SI_type_node,
15568 opaque_V4SI_type_node,
15569 NULL_TREE);
15570 }
15571 else
15572 {
15573 enum insn_code icode = d->icode;
15574 if (d->name == 0)
15575 {
15576 if (TARGET_DEBUG_BUILTIN)
15577 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
15578 (long unsigned)i);
15579
15580 continue;
15581 }
15582
15583 if (icode == CODE_FOR_nothing)
15584 {
15585 if (TARGET_DEBUG_BUILTIN)
15586 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
15587 d->name);
15588
15589 continue;
15590 }
15591
15592 mode0 = insn_data[icode].operand[0].mode;
15593 mode1 = insn_data[icode].operand[1].mode;
15594 mode2 = insn_data[icode].operand[2].mode;
15595
15596 if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
15597 {
15598 if (! (type = v2si_ftype_v2si_qi))
15599 type = v2si_ftype_v2si_qi
15600 = build_function_type_list (opaque_V2SI_type_node,
15601 opaque_V2SI_type_node,
15602 char_type_node,
15603 NULL_TREE);
15604 }
15605
15606 else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
15607 && mode2 == QImode)
15608 {
15609 if (! (type = v2si_ftype_int_qi))
15610 type = v2si_ftype_int_qi
15611 = build_function_type_list (opaque_V2SI_type_node,
15612 integer_type_node,
15613 char_type_node,
15614 NULL_TREE);
15615 }
15616
15617 else
15618 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
15619 d->code, d->name);
15620 }
15621
15622 def_builtin (d->name, type, d->code);
15623 }
15624
15625 /* Add the simple unary operators. */
15626 d = bdesc_1arg;
15627 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15628 {
15629 machine_mode mode0, mode1;
15630 tree type;
15631 HOST_WIDE_INT mask = d->mask;
15632
15633 if ((mask & builtin_mask) != mask)
15634 {
15635 if (TARGET_DEBUG_BUILTIN)
15636 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
15637 continue;
15638 }
15639
15640 if (rs6000_overloaded_builtin_p (d->code))
15641 {
15642 if (! (type = opaque_ftype_opaque))
15643 type = opaque_ftype_opaque
15644 = build_function_type_list (opaque_V4SI_type_node,
15645 opaque_V4SI_type_node,
15646 NULL_TREE);
15647 }
15648 else
15649 {
15650 enum insn_code icode = d->icode;
15651 if (d->name == 0)
15652 {
15653 if (TARGET_DEBUG_BUILTIN)
15654 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
15655 (long unsigned)i);
15656
15657 continue;
15658 }
15659
15660 if (icode == CODE_FOR_nothing)
15661 {
15662 if (TARGET_DEBUG_BUILTIN)
15663 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
15664 d->name);
15665
15666 continue;
15667 }
15668
15669 mode0 = insn_data[icode].operand[0].mode;
15670 mode1 = insn_data[icode].operand[1].mode;
15671
15672 if (mode0 == V2SImode && mode1 == QImode)
15673 {
15674 if (! (type = v2si_ftype_qi))
15675 type = v2si_ftype_qi
15676 = build_function_type_list (opaque_V2SI_type_node,
15677 char_type_node,
15678 NULL_TREE);
15679 }
15680
15681 else
15682 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
15683 d->code, d->name);
15684 }
15685
15686 def_builtin (d->name, type, d->code);
15687 }
15688 }
15689
15690 static void
15691 rs6000_init_libfuncs (void)
15692 {
15693 if (!TARGET_IEEEQUAD)
15694 /* AIX/Darwin/64-bit Linux quad floating point routines. */
15695 if (!TARGET_XL_COMPAT)
15696 {
15697 set_optab_libfunc (add_optab, TFmode, "__gcc_qadd");
15698 set_optab_libfunc (sub_optab, TFmode, "__gcc_qsub");
15699 set_optab_libfunc (smul_optab, TFmode, "__gcc_qmul");
15700 set_optab_libfunc (sdiv_optab, TFmode, "__gcc_qdiv");
15701
15702 if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
15703 {
15704 set_optab_libfunc (neg_optab, TFmode, "__gcc_qneg");
15705 set_optab_libfunc (eq_optab, TFmode, "__gcc_qeq");
15706 set_optab_libfunc (ne_optab, TFmode, "__gcc_qne");
15707 set_optab_libfunc (gt_optab, TFmode, "__gcc_qgt");
15708 set_optab_libfunc (ge_optab, TFmode, "__gcc_qge");
15709 set_optab_libfunc (lt_optab, TFmode, "__gcc_qlt");
15710 set_optab_libfunc (le_optab, TFmode, "__gcc_qle");
15711
15712 set_conv_libfunc (sext_optab, TFmode, SFmode, "__gcc_stoq");
15713 set_conv_libfunc (sext_optab, TFmode, DFmode, "__gcc_dtoq");
15714 set_conv_libfunc (trunc_optab, SFmode, TFmode, "__gcc_qtos");
15715 set_conv_libfunc (trunc_optab, DFmode, TFmode, "__gcc_qtod");
15716 set_conv_libfunc (sfix_optab, SImode, TFmode, "__gcc_qtoi");
15717 set_conv_libfunc (ufix_optab, SImode, TFmode, "__gcc_qtou");
15718 set_conv_libfunc (sfloat_optab, TFmode, SImode, "__gcc_itoq");
15719 set_conv_libfunc (ufloat_optab, TFmode, SImode, "__gcc_utoq");
15720 }
15721
15722 if (!(TARGET_HARD_FLOAT && TARGET_FPRS))
15723 set_optab_libfunc (unord_optab, TFmode, "__gcc_qunord");
15724 }
15725 else
15726 {
15727 set_optab_libfunc (add_optab, TFmode, "_xlqadd");
15728 set_optab_libfunc (sub_optab, TFmode, "_xlqsub");
15729 set_optab_libfunc (smul_optab, TFmode, "_xlqmul");
15730 set_optab_libfunc (sdiv_optab, TFmode, "_xlqdiv");
15731 }
15732 else
15733 {
15734 /* 32-bit SVR4 quad floating point routines. */
15735
15736 set_optab_libfunc (add_optab, TFmode, "_q_add");
15737 set_optab_libfunc (sub_optab, TFmode, "_q_sub");
15738 set_optab_libfunc (neg_optab, TFmode, "_q_neg");
15739 set_optab_libfunc (smul_optab, TFmode, "_q_mul");
15740 set_optab_libfunc (sdiv_optab, TFmode, "_q_div");
15741 if (TARGET_PPC_GPOPT)
15742 set_optab_libfunc (sqrt_optab, TFmode, "_q_sqrt");
15743
15744 set_optab_libfunc (eq_optab, TFmode, "_q_feq");
15745 set_optab_libfunc (ne_optab, TFmode, "_q_fne");
15746 set_optab_libfunc (gt_optab, TFmode, "_q_fgt");
15747 set_optab_libfunc (ge_optab, TFmode, "_q_fge");
15748 set_optab_libfunc (lt_optab, TFmode, "_q_flt");
15749 set_optab_libfunc (le_optab, TFmode, "_q_fle");
15750
15751 set_conv_libfunc (sext_optab, TFmode, SFmode, "_q_stoq");
15752 set_conv_libfunc (sext_optab, TFmode, DFmode, "_q_dtoq");
15753 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_q_qtos");
15754 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_q_qtod");
15755 set_conv_libfunc (sfix_optab, SImode, TFmode, "_q_qtoi");
15756 set_conv_libfunc (ufix_optab, SImode, TFmode, "_q_qtou");
15757 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_q_itoq");
15758 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_q_utoq");
15759 }
15760 }
15761
15762 \f
15763 /* Expand a block clear operation, and return 1 if successful. Return 0
15764 if we should let the compiler generate normal code.
15765
15766 operands[0] is the destination
15767 operands[1] is the length
15768 operands[3] is the alignment */
15769
15770 int
15771 expand_block_clear (rtx operands[])
15772 {
15773 rtx orig_dest = operands[0];
15774 rtx bytes_rtx = operands[1];
15775 rtx align_rtx = operands[3];
15776 bool constp = (GET_CODE (bytes_rtx) == CONST_INT);
15777 HOST_WIDE_INT align;
15778 HOST_WIDE_INT bytes;
15779 int offset;
15780 int clear_bytes;
15781 int clear_step;
15782
15783 /* If this is not a fixed size move, just call memcpy */
15784 if (! constp)
15785 return 0;
15786
15787 /* This must be a fixed size alignment */
15788 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
15789 align = INTVAL (align_rtx) * BITS_PER_UNIT;
15790
15791 /* Anything to clear? */
15792 bytes = INTVAL (bytes_rtx);
15793 if (bytes <= 0)
15794 return 1;
15795
15796 /* Use the builtin memset after a point, to avoid huge code bloat.
15797 When optimize_size, avoid any significant code bloat; calling
15798 memset is about 4 instructions, so allow for one instruction to
15799 load zero and three to do clearing. */
15800 if (TARGET_ALTIVEC && align >= 128)
15801 clear_step = 16;
15802 else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT))
15803 clear_step = 8;
15804 else if (TARGET_SPE && align >= 64)
15805 clear_step = 8;
15806 else
15807 clear_step = 4;
15808
15809 if (optimize_size && bytes > 3 * clear_step)
15810 return 0;
15811 if (! optimize_size && bytes > 8 * clear_step)
15812 return 0;
15813
15814 for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
15815 {
15816 machine_mode mode = BLKmode;
15817 rtx dest;
15818
15819 if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
15820 {
15821 clear_bytes = 16;
15822 mode = V4SImode;
15823 }
15824 else if (bytes >= 8 && TARGET_SPE && align >= 64)
15825 {
15826 clear_bytes = 8;
15827 mode = V2SImode;
15828 }
15829 else if (bytes >= 8 && TARGET_POWERPC64
15830 && (align >= 64 || !STRICT_ALIGNMENT))
15831 {
15832 clear_bytes = 8;
15833 mode = DImode;
15834 if (offset == 0 && align < 64)
15835 {
15836 rtx addr;
15837
15838 /* If the address form is reg+offset with offset not a
15839 multiple of four, reload into reg indirect form here
15840 rather than waiting for reload. This way we get one
15841 reload, not one per store. */
15842 addr = XEXP (orig_dest, 0);
15843 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
15844 && GET_CODE (XEXP (addr, 1)) == CONST_INT
15845 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
15846 {
15847 addr = copy_addr_to_reg (addr);
15848 orig_dest = replace_equiv_address (orig_dest, addr);
15849 }
15850 }
15851 }
15852 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
15853 { /* move 4 bytes */
15854 clear_bytes = 4;
15855 mode = SImode;
15856 }
15857 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
15858 { /* move 2 bytes */
15859 clear_bytes = 2;
15860 mode = HImode;
15861 }
15862 else /* move 1 byte at a time */
15863 {
15864 clear_bytes = 1;
15865 mode = QImode;
15866 }
15867
15868 dest = adjust_address (orig_dest, mode, offset);
15869
15870 emit_move_insn (dest, CONST0_RTX (mode));
15871 }
15872
15873 return 1;
15874 }
15875
15876 \f
15877 /* Expand a block move operation, and return 1 if successful. Return 0
15878 if we should let the compiler generate normal code.
15879
15880 operands[0] is the destination
15881 operands[1] is the source
15882 operands[2] is the length
15883 operands[3] is the alignment */
15884
15885 #define MAX_MOVE_REG 4
15886
15887 int
15888 expand_block_move (rtx operands[])
15889 {
15890 rtx orig_dest = operands[0];
15891 rtx orig_src = operands[1];
15892 rtx bytes_rtx = operands[2];
15893 rtx align_rtx = operands[3];
15894 int constp = (GET_CODE (bytes_rtx) == CONST_INT);
15895 int align;
15896 int bytes;
15897 int offset;
15898 int move_bytes;
15899 rtx stores[MAX_MOVE_REG];
15900 int num_reg = 0;
15901
15902 /* If this is not a fixed size move, just call memcpy */
15903 if (! constp)
15904 return 0;
15905
15906 /* This must be a fixed size alignment */
15907 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
15908 align = INTVAL (align_rtx) * BITS_PER_UNIT;
15909
15910 /* Anything to move? */
15911 bytes = INTVAL (bytes_rtx);
15912 if (bytes <= 0)
15913 return 1;
15914
15915 if (bytes > rs6000_block_move_inline_limit)
15916 return 0;
15917
15918 for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
15919 {
15920 union {
15921 rtx (*movmemsi) (rtx, rtx, rtx, rtx);
15922 rtx (*mov) (rtx, rtx);
15923 } gen_func;
15924 machine_mode mode = BLKmode;
15925 rtx src, dest;
15926
15927 /* Altivec first, since it will be faster than a string move
15928 when it applies, and usually not significantly larger. */
15929 if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
15930 {
15931 move_bytes = 16;
15932 mode = V4SImode;
15933 gen_func.mov = gen_movv4si;
15934 }
15935 else if (TARGET_SPE && bytes >= 8 && align >= 64)
15936 {
15937 move_bytes = 8;
15938 mode = V2SImode;
15939 gen_func.mov = gen_movv2si;
15940 }
15941 else if (TARGET_STRING
15942 && bytes > 24 /* move up to 32 bytes at a time */
15943 && ! fixed_regs[5]
15944 && ! fixed_regs[6]
15945 && ! fixed_regs[7]
15946 && ! fixed_regs[8]
15947 && ! fixed_regs[9]
15948 && ! fixed_regs[10]
15949 && ! fixed_regs[11]
15950 && ! fixed_regs[12])
15951 {
15952 move_bytes = (bytes > 32) ? 32 : bytes;
15953 gen_func.movmemsi = gen_movmemsi_8reg;
15954 }
15955 else if (TARGET_STRING
15956 && bytes > 16 /* move up to 24 bytes at a time */
15957 && ! fixed_regs[5]
15958 && ! fixed_regs[6]
15959 && ! fixed_regs[7]
15960 && ! fixed_regs[8]
15961 && ! fixed_regs[9]
15962 && ! fixed_regs[10])
15963 {
15964 move_bytes = (bytes > 24) ? 24 : bytes;
15965 gen_func.movmemsi = gen_movmemsi_6reg;
15966 }
15967 else if (TARGET_STRING
15968 && bytes > 8 /* move up to 16 bytes at a time */
15969 && ! fixed_regs[5]
15970 && ! fixed_regs[6]
15971 && ! fixed_regs[7]
15972 && ! fixed_regs[8])
15973 {
15974 move_bytes = (bytes > 16) ? 16 : bytes;
15975 gen_func.movmemsi = gen_movmemsi_4reg;
15976 }
15977 else if (bytes >= 8 && TARGET_POWERPC64
15978 && (align >= 64 || !STRICT_ALIGNMENT))
15979 {
15980 move_bytes = 8;
15981 mode = DImode;
15982 gen_func.mov = gen_movdi;
15983 if (offset == 0 && align < 64)
15984 {
15985 rtx addr;
15986
15987 /* If the address form is reg+offset with offset not a
15988 multiple of four, reload into reg indirect form here
15989 rather than waiting for reload. This way we get one
15990 reload, not one per load and/or store. */
15991 addr = XEXP (orig_dest, 0);
15992 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
15993 && GET_CODE (XEXP (addr, 1)) == CONST_INT
15994 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
15995 {
15996 addr = copy_addr_to_reg (addr);
15997 orig_dest = replace_equiv_address (orig_dest, addr);
15998 }
15999 addr = XEXP (orig_src, 0);
16000 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
16001 && GET_CODE (XEXP (addr, 1)) == CONST_INT
16002 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
16003 {
16004 addr = copy_addr_to_reg (addr);
16005 orig_src = replace_equiv_address (orig_src, addr);
16006 }
16007 }
16008 }
16009 else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64)
16010 { /* move up to 8 bytes at a time */
16011 move_bytes = (bytes > 8) ? 8 : bytes;
16012 gen_func.movmemsi = gen_movmemsi_2reg;
16013 }
16014 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
16015 { /* move 4 bytes */
16016 move_bytes = 4;
16017 mode = SImode;
16018 gen_func.mov = gen_movsi;
16019 }
16020 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
16021 { /* move 2 bytes */
16022 move_bytes = 2;
16023 mode = HImode;
16024 gen_func.mov = gen_movhi;
16025 }
16026 else if (TARGET_STRING && bytes > 1)
16027 { /* move up to 4 bytes at a time */
16028 move_bytes = (bytes > 4) ? 4 : bytes;
16029 gen_func.movmemsi = gen_movmemsi_1reg;
16030 }
16031 else /* move 1 byte at a time */
16032 {
16033 move_bytes = 1;
16034 mode = QImode;
16035 gen_func.mov = gen_movqi;
16036 }
16037
16038 src = adjust_address (orig_src, mode, offset);
16039 dest = adjust_address (orig_dest, mode, offset);
16040
16041 if (mode != BLKmode)
16042 {
16043 rtx tmp_reg = gen_reg_rtx (mode);
16044
16045 emit_insn ((*gen_func.mov) (tmp_reg, src));
16046 stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg);
16047 }
16048
16049 if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes)
16050 {
16051 int i;
16052 for (i = 0; i < num_reg; i++)
16053 emit_insn (stores[i]);
16054 num_reg = 0;
16055 }
16056
16057 if (mode == BLKmode)
16058 {
16059 /* Move the address into scratch registers. The movmemsi
16060 patterns require zero offset. */
16061 if (!REG_P (XEXP (src, 0)))
16062 {
16063 rtx src_reg = copy_addr_to_reg (XEXP (src, 0));
16064 src = replace_equiv_address (src, src_reg);
16065 }
16066 set_mem_size (src, move_bytes);
16067
16068 if (!REG_P (XEXP (dest, 0)))
16069 {
16070 rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0));
16071 dest = replace_equiv_address (dest, dest_reg);
16072 }
16073 set_mem_size (dest, move_bytes);
16074
16075 emit_insn ((*gen_func.movmemsi) (dest, src,
16076 GEN_INT (move_bytes & 31),
16077 align_rtx));
16078 }
16079 }
16080
16081 return 1;
16082 }
16083
16084 \f
16085 /* Return a string to perform a load_multiple operation.
16086 operands[0] is the vector.
16087 operands[1] is the source address.
16088 operands[2] is the first destination register. */
16089
16090 const char *
16091 rs6000_output_load_multiple (rtx operands[3])
16092 {
16093 /* We have to handle the case where the pseudo used to contain the address
16094 is assigned to one of the output registers. */
16095 int i, j;
16096 int words = XVECLEN (operands[0], 0);
16097 rtx xop[10];
16098
16099 if (XVECLEN (operands[0], 0) == 1)
16100 return "lwz %2,0(%1)";
16101
16102 for (i = 0; i < words; i++)
16103 if (refers_to_regno_p (REGNO (operands[2]) + i, operands[1]))
16104 {
16105 if (i == words-1)
16106 {
16107 xop[0] = GEN_INT (4 * (words-1));
16108 xop[1] = operands[1];
16109 xop[2] = operands[2];
16110 output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop);
16111 return "";
16112 }
16113 else if (i == 0)
16114 {
16115 xop[0] = GEN_INT (4 * (words-1));
16116 xop[1] = operands[1];
16117 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16118 output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop);
16119 return "";
16120 }
16121 else
16122 {
16123 for (j = 0; j < words; j++)
16124 if (j != i)
16125 {
16126 xop[0] = GEN_INT (j * 4);
16127 xop[1] = operands[1];
16128 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j);
16129 output_asm_insn ("lwz %2,%0(%1)", xop);
16130 }
16131 xop[0] = GEN_INT (i * 4);
16132 xop[1] = operands[1];
16133 output_asm_insn ("lwz %1,%0(%1)", xop);
16134 return "";
16135 }
16136 }
16137
16138 return "lswi %2,%1,%N0";
16139 }
16140
16141 \f
16142 /* A validation routine: say whether CODE, a condition code, and MODE
16143 match. The other alternatives either don't make sense or should
16144 never be generated. */
16145
16146 void
16147 validate_condition_mode (enum rtx_code code, machine_mode mode)
16148 {
16149 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
16150 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
16151 && GET_MODE_CLASS (mode) == MODE_CC);
16152
16153 /* These don't make sense. */
16154 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
16155 || mode != CCUNSmode);
16156
16157 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
16158 || mode == CCUNSmode);
16159
16160 gcc_assert (mode == CCFPmode
16161 || (code != ORDERED && code != UNORDERED
16162 && code != UNEQ && code != LTGT
16163 && code != UNGT && code != UNLT
16164 && code != UNGE && code != UNLE));
16165
16166 /* These should never be generated except for
16167 flag_finite_math_only. */
16168 gcc_assert (mode != CCFPmode
16169 || flag_finite_math_only
16170 || (code != LE && code != GE
16171 && code != UNEQ && code != LTGT
16172 && code != UNGT && code != UNLT));
16173
16174 /* These are invalid; the information is not there. */
16175 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
16176 }
16177
16178 \f
16179 /* Return 1 if ANDOP is a mask that has no bits on that are not in the
16180 mask required to convert the result of a rotate insn into a shift
16181 left insn of SHIFTOP bits. Both are known to be SImode CONST_INT. */
16182
16183 int
16184 includes_lshift_p (rtx shiftop, rtx andop)
16185 {
16186 unsigned HOST_WIDE_INT shift_mask = ~(unsigned HOST_WIDE_INT) 0;
16187
16188 shift_mask <<= INTVAL (shiftop);
16189
16190 return (INTVAL (andop) & 0xffffffff & ~shift_mask) == 0;
16191 }
16192
16193 /* Similar, but for right shift. */
16194
16195 int
16196 includes_rshift_p (rtx shiftop, rtx andop)
16197 {
16198 unsigned HOST_WIDE_INT shift_mask = ~(unsigned HOST_WIDE_INT) 0;
16199
16200 shift_mask >>= INTVAL (shiftop);
16201
16202 return (INTVAL (andop) & 0xffffffff & ~shift_mask) == 0;
16203 }
16204
16205 /* Return 1 if ANDOP is a mask suitable for use with an rldic insn
16206 to perform a left shift. It must have exactly SHIFTOP least
16207 significant 0's, then one or more 1's, then zero or more 0's. */
16208
16209 int
16210 includes_rldic_lshift_p (rtx shiftop, rtx andop)
16211 {
16212 if (GET_CODE (andop) == CONST_INT)
16213 {
16214 unsigned HOST_WIDE_INT c, lsb, shift_mask;
16215
16216 c = INTVAL (andop);
16217 if (c == 0 || c == HOST_WIDE_INT_M1U)
16218 return 0;
16219
16220 shift_mask = HOST_WIDE_INT_M1U;
16221 shift_mask <<= INTVAL (shiftop);
16222
16223 /* Find the least significant one bit. */
16224 lsb = c & -c;
16225
16226 /* It must coincide with the LSB of the shift mask. */
16227 if (-lsb != shift_mask)
16228 return 0;
16229
16230 /* Invert to look for the next transition (if any). */
16231 c = ~c;
16232
16233 /* Remove the low group of ones (originally low group of zeros). */
16234 c &= -lsb;
16235
16236 /* Again find the lsb, and check we have all 1's above. */
16237 lsb = c & -c;
16238 return c == -lsb;
16239 }
16240 else
16241 return 0;
16242 }
16243
16244 /* Return 1 if ANDOP is a mask suitable for use with an rldicr insn
16245 to perform a left shift. It must have SHIFTOP or more least
16246 significant 0's, with the remainder of the word 1's. */
16247
16248 int
16249 includes_rldicr_lshift_p (rtx shiftop, rtx andop)
16250 {
16251 if (GET_CODE (andop) == CONST_INT)
16252 {
16253 unsigned HOST_WIDE_INT c, lsb, shift_mask;
16254
16255 shift_mask = HOST_WIDE_INT_M1U;
16256 shift_mask <<= INTVAL (shiftop);
16257 c = INTVAL (andop);
16258
16259 /* Find the least significant one bit. */
16260 lsb = c & -c;
16261
16262 /* It must be covered by the shift mask.
16263 This test also rejects c == 0. */
16264 if ((lsb & shift_mask) == 0)
16265 return 0;
16266
16267 /* Check we have all 1's above the transition, and reject all 1's. */
16268 return c == -lsb && lsb != 1;
16269 }
16270 else
16271 return 0;
16272 }
16273
16274 /* Return 1 if operands will generate a valid arguments to rlwimi
16275 instruction for insert with right shift in 64-bit mode. The mask may
16276 not start on the first bit or stop on the last bit because wrap-around
16277 effects of instruction do not correspond to semantics of RTL insn. */
16278
16279 int
16280 insvdi_rshift_rlwimi_p (rtx sizeop, rtx startop, rtx shiftop)
16281 {
16282 if (INTVAL (startop) > 32
16283 && INTVAL (startop) < 64
16284 && INTVAL (sizeop) > 1
16285 && INTVAL (sizeop) + INTVAL (startop) < 64
16286 && INTVAL (shiftop) > 0
16287 && INTVAL (sizeop) + INTVAL (shiftop) < 32
16288 && (64 - (INTVAL (shiftop) & 63)) >= INTVAL (sizeop))
16289 return 1;
16290
16291 return 0;
16292 }
16293
16294 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
16295 for lfq and stfq insns iff the registers are hard registers. */
16296
16297 int
16298 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
16299 {
16300 /* We might have been passed a SUBREG. */
16301 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
16302 return 0;
16303
16304 /* We might have been passed non floating point registers. */
16305 if (!FP_REGNO_P (REGNO (reg1))
16306 || !FP_REGNO_P (REGNO (reg2)))
16307 return 0;
16308
16309 return (REGNO (reg1) == REGNO (reg2) - 1);
16310 }
16311
16312 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
16313 addr1 and addr2 must be in consecutive memory locations
16314 (addr2 == addr1 + 8). */
16315
16316 int
16317 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
16318 {
16319 rtx addr1, addr2;
16320 unsigned int reg1, reg2;
16321 int offset1, offset2;
16322
16323 /* The mems cannot be volatile. */
16324 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
16325 return 0;
16326
16327 addr1 = XEXP (mem1, 0);
16328 addr2 = XEXP (mem2, 0);
16329
16330 /* Extract an offset (if used) from the first addr. */
16331 if (GET_CODE (addr1) == PLUS)
16332 {
16333 /* If not a REG, return zero. */
16334 if (GET_CODE (XEXP (addr1, 0)) != REG)
16335 return 0;
16336 else
16337 {
16338 reg1 = REGNO (XEXP (addr1, 0));
16339 /* The offset must be constant! */
16340 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
16341 return 0;
16342 offset1 = INTVAL (XEXP (addr1, 1));
16343 }
16344 }
16345 else if (GET_CODE (addr1) != REG)
16346 return 0;
16347 else
16348 {
16349 reg1 = REGNO (addr1);
16350 /* This was a simple (mem (reg)) expression. Offset is 0. */
16351 offset1 = 0;
16352 }
16353
16354 /* And now for the second addr. */
16355 if (GET_CODE (addr2) == PLUS)
16356 {
16357 /* If not a REG, return zero. */
16358 if (GET_CODE (XEXP (addr2, 0)) != REG)
16359 return 0;
16360 else
16361 {
16362 reg2 = REGNO (XEXP (addr2, 0));
16363 /* The offset must be constant. */
16364 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
16365 return 0;
16366 offset2 = INTVAL (XEXP (addr2, 1));
16367 }
16368 }
16369 else if (GET_CODE (addr2) != REG)
16370 return 0;
16371 else
16372 {
16373 reg2 = REGNO (addr2);
16374 /* This was a simple (mem (reg)) expression. Offset is 0. */
16375 offset2 = 0;
16376 }
16377
16378 /* Both of these must have the same base register. */
16379 if (reg1 != reg2)
16380 return 0;
16381
16382 /* The offset for the second addr must be 8 more than the first addr. */
16383 if (offset2 != offset1 + 8)
16384 return 0;
16385
16386 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
16387 instructions. */
16388 return 1;
16389 }
16390 \f
16391
16392 rtx
16393 rs6000_secondary_memory_needed_rtx (machine_mode mode)
16394 {
16395 static bool eliminated = false;
16396 rtx ret;
16397
16398 if (mode != SDmode || TARGET_NO_SDMODE_STACK)
16399 ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
16400 else
16401 {
16402 rtx mem = cfun->machine->sdmode_stack_slot;
16403 gcc_assert (mem != NULL_RTX);
16404
16405 if (!eliminated)
16406 {
16407 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
16408 cfun->machine->sdmode_stack_slot = mem;
16409 eliminated = true;
16410 }
16411 ret = mem;
16412 }
16413
16414 if (TARGET_DEBUG_ADDR)
16415 {
16416 fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
16417 GET_MODE_NAME (mode));
16418 if (!ret)
16419 fprintf (stderr, "\tNULL_RTX\n");
16420 else
16421 debug_rtx (ret);
16422 }
16423
16424 return ret;
16425 }
16426
16427 /* Return the mode to be used for memory when a secondary memory
16428 location is needed. For SDmode values we need to use DDmode, in
16429 all other cases we can use the same mode. */
16430 machine_mode
16431 rs6000_secondary_memory_needed_mode (machine_mode mode)
16432 {
16433 if (lra_in_progress && mode == SDmode)
16434 return DDmode;
16435 return mode;
16436 }
16437
16438 static tree
16439 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
16440 {
16441 /* Don't walk into types. */
16442 if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
16443 {
16444 *walk_subtrees = 0;
16445 return NULL_TREE;
16446 }
16447
16448 switch (TREE_CODE (*tp))
16449 {
16450 case VAR_DECL:
16451 case PARM_DECL:
16452 case FIELD_DECL:
16453 case RESULT_DECL:
16454 case SSA_NAME:
16455 case REAL_CST:
16456 case MEM_REF:
16457 case VIEW_CONVERT_EXPR:
16458 if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
16459 return *tp;
16460 break;
16461 default:
16462 break;
16463 }
16464
16465 return NULL_TREE;
16466 }
16467
16468 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
16469 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
16470 only work on the traditional altivec registers, note if an altivec register
16471 was chosen. */
16472
16473 static enum rs6000_reg_type
16474 register_to_reg_type (rtx reg, bool *is_altivec)
16475 {
16476 HOST_WIDE_INT regno;
16477 enum reg_class rclass;
16478
16479 if (GET_CODE (reg) == SUBREG)
16480 reg = SUBREG_REG (reg);
16481
16482 if (!REG_P (reg))
16483 return NO_REG_TYPE;
16484
16485 regno = REGNO (reg);
16486 if (regno >= FIRST_PSEUDO_REGISTER)
16487 {
16488 if (!lra_in_progress && !reload_in_progress && !reload_completed)
16489 return PSEUDO_REG_TYPE;
16490
16491 regno = true_regnum (reg);
16492 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
16493 return PSEUDO_REG_TYPE;
16494 }
16495
16496 gcc_assert (regno >= 0);
16497
16498 if (is_altivec && ALTIVEC_REGNO_P (regno))
16499 *is_altivec = true;
16500
16501 rclass = rs6000_regno_regclass[regno];
16502 return reg_class_to_reg_type[(int)rclass];
16503 }
16504
16505 /* Helper function to return the cost of adding a TOC entry address. */
16506
16507 static inline int
16508 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
16509 {
16510 int ret;
16511
16512 if (TARGET_CMODEL != CMODEL_SMALL)
16513 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
16514
16515 else
16516 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
16517
16518 return ret;
16519 }
16520
16521 /* Helper function for rs6000_secondary_reload to determine whether the memory
16522 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
16523 needs reloading. Return negative if the memory is not handled by the memory
16524 helper functions and to try a different reload method, 0 if no additional
16525 instructions are need, and positive to give the extra cost for the
16526 memory. */
16527
16528 static int
16529 rs6000_secondary_reload_memory (rtx addr,
16530 enum reg_class rclass,
16531 enum machine_mode mode)
16532 {
16533 int extra_cost = 0;
16534 rtx reg, and_arg, plus_arg0, plus_arg1;
16535 addr_mask_type addr_mask;
16536 const char *type = NULL;
16537 const char *fail_msg = NULL;
16538
16539 if (GPR_REG_CLASS_P (rclass))
16540 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
16541
16542 else if (rclass == FLOAT_REGS)
16543 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
16544
16545 else if (rclass == ALTIVEC_REGS)
16546 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
16547
16548 /* For the combined VSX_REGS, turn off Altivec AND -16. */
16549 else if (rclass == VSX_REGS)
16550 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
16551 & ~RELOAD_REG_AND_M16);
16552
16553 else
16554 {
16555 if (TARGET_DEBUG_ADDR)
16556 fprintf (stderr,
16557 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
16558 "class is not GPR, FPR, VMX\n",
16559 GET_MODE_NAME (mode), reg_class_names[rclass]);
16560
16561 return -1;
16562 }
16563
16564 /* If the register isn't valid in this register class, just return now. */
16565 if ((addr_mask & RELOAD_REG_VALID) == 0)
16566 {
16567 if (TARGET_DEBUG_ADDR)
16568 fprintf (stderr,
16569 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
16570 "not valid in class\n",
16571 GET_MODE_NAME (mode), reg_class_names[rclass]);
16572
16573 return -1;
16574 }
16575
16576 switch (GET_CODE (addr))
16577 {
16578 /* Does the register class supports auto update forms for this mode? We
16579 don't need a scratch register, since the powerpc only supports
16580 PRE_INC, PRE_DEC, and PRE_MODIFY. */
16581 case PRE_INC:
16582 case PRE_DEC:
16583 reg = XEXP (addr, 0);
16584 if (!base_reg_operand (addr, GET_MODE (reg)))
16585 {
16586 fail_msg = "no base register #1";
16587 extra_cost = -1;
16588 }
16589
16590 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
16591 {
16592 extra_cost = 1;
16593 type = "update";
16594 }
16595 break;
16596
16597 case PRE_MODIFY:
16598 reg = XEXP (addr, 0);
16599 plus_arg1 = XEXP (addr, 1);
16600 if (!base_reg_operand (reg, GET_MODE (reg))
16601 || GET_CODE (plus_arg1) != PLUS
16602 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
16603 {
16604 fail_msg = "bad PRE_MODIFY";
16605 extra_cost = -1;
16606 }
16607
16608 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
16609 {
16610 extra_cost = 1;
16611 type = "update";
16612 }
16613 break;
16614
16615 /* Do we need to simulate AND -16 to clear the bottom address bits used
16616 in VMX load/stores? Only allow the AND for vector sizes. */
16617 case AND:
16618 and_arg = XEXP (addr, 0);
16619 if (GET_MODE_SIZE (mode) != 16
16620 || GET_CODE (XEXP (addr, 1)) != CONST_INT
16621 || INTVAL (XEXP (addr, 1)) != -16)
16622 {
16623 fail_msg = "bad Altivec AND #1";
16624 extra_cost = -1;
16625 }
16626
16627 if (rclass != ALTIVEC_REGS)
16628 {
16629 if (legitimate_indirect_address_p (and_arg, false))
16630 extra_cost = 1;
16631
16632 else if (legitimate_indexed_address_p (and_arg, false))
16633 extra_cost = 2;
16634
16635 else
16636 {
16637 fail_msg = "bad Altivec AND #2";
16638 extra_cost = -1;
16639 }
16640
16641 type = "and";
16642 }
16643 break;
16644
16645 /* If this is an indirect address, make sure it is a base register. */
16646 case REG:
16647 case SUBREG:
16648 if (!legitimate_indirect_address_p (addr, false))
16649 {
16650 extra_cost = 1;
16651 type = "move";
16652 }
16653 break;
16654
16655 /* If this is an indexed address, make sure the register class can handle
16656 indexed addresses for this mode. */
16657 case PLUS:
16658 plus_arg0 = XEXP (addr, 0);
16659 plus_arg1 = XEXP (addr, 1);
16660
16661 /* (plus (plus (reg) (constant)) (constant)) is generated during
16662 push_reload processing, so handle it now. */
16663 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
16664 {
16665 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16666 {
16667 extra_cost = 1;
16668 type = "offset";
16669 }
16670 }
16671
16672 /* (plus (plus (reg) (constant)) (reg)) is also generated during
16673 push_reload processing, so handle it now. */
16674 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
16675 {
16676 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
16677 {
16678 extra_cost = 1;
16679 type = "indexed #2";
16680 }
16681 }
16682
16683 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
16684 {
16685 fail_msg = "no base register #2";
16686 extra_cost = -1;
16687 }
16688
16689 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
16690 {
16691 if ((addr_mask & RELOAD_REG_INDEXED) == 0
16692 || !legitimate_indexed_address_p (addr, false))
16693 {
16694 extra_cost = 1;
16695 type = "indexed";
16696 }
16697 }
16698
16699 /* Make sure the register class can handle offset addresses. */
16700 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
16701 {
16702 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16703 {
16704 extra_cost = 1;
16705 type = "offset";
16706 }
16707 }
16708
16709 else
16710 {
16711 fail_msg = "bad PLUS";
16712 extra_cost = -1;
16713 }
16714
16715 break;
16716
16717 case LO_SUM:
16718 if (!legitimate_lo_sum_address_p (mode, addr, false))
16719 {
16720 fail_msg = "bad LO_SUM";
16721 extra_cost = -1;
16722 }
16723
16724 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16725 {
16726 extra_cost = 1;
16727 type = "lo_sum";
16728 }
16729 break;
16730
16731 /* Static addresses need to create a TOC entry. */
16732 case CONST:
16733 case SYMBOL_REF:
16734 case LABEL_REF:
16735 type = "address";
16736 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
16737 break;
16738
16739 /* TOC references look like offsetable memory. */
16740 case UNSPEC:
16741 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
16742 {
16743 fail_msg = "bad UNSPEC";
16744 extra_cost = -1;
16745 }
16746
16747 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16748 {
16749 extra_cost = 1;
16750 type = "toc reference";
16751 }
16752 break;
16753
16754 default:
16755 {
16756 fail_msg = "bad address";
16757 extra_cost = -1;
16758 }
16759 }
16760
16761 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
16762 {
16763 if (extra_cost < 0)
16764 fprintf (stderr,
16765 "rs6000_secondary_reload_memory error: mode = %s, "
16766 "class = %s, addr_mask = '%s', %s\n",
16767 GET_MODE_NAME (mode),
16768 reg_class_names[rclass],
16769 rs6000_debug_addr_mask (addr_mask, false),
16770 (fail_msg != NULL) ? fail_msg : "<bad address>");
16771
16772 else
16773 fprintf (stderr,
16774 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
16775 "addr_mask = '%s', extra cost = %d, %s\n",
16776 GET_MODE_NAME (mode),
16777 reg_class_names[rclass],
16778 rs6000_debug_addr_mask (addr_mask, false),
16779 extra_cost,
16780 (type) ? type : "<none>");
16781
16782 debug_rtx (addr);
16783 }
16784
16785 return extra_cost;
16786 }
16787
16788 /* Helper function for rs6000_secondary_reload to return true if a move to a
16789 different register classe is really a simple move. */
16790
16791 static bool
16792 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
16793 enum rs6000_reg_type from_type,
16794 machine_mode mode)
16795 {
16796 int size;
16797
16798 /* Add support for various direct moves available. In this function, we only
16799 look at cases where we don't need any extra registers, and one or more
16800 simple move insns are issued. At present, 32-bit integers are not allowed
16801 in FPR/VSX registers. Single precision binary floating is not a simple
16802 move because we need to convert to the single precision memory layout.
16803 The 4-byte SDmode can be moved. */
16804 size = GET_MODE_SIZE (mode);
16805 if (TARGET_DIRECT_MOVE
16806 && ((mode == SDmode) || (TARGET_POWERPC64 && size == 8))
16807 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16808 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
16809 return true;
16810
16811 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
16812 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
16813 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
16814 return true;
16815
16816 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
16817 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
16818 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
16819 return true;
16820
16821 return false;
16822 }
16823
16824 /* Power8 helper function for rs6000_secondary_reload, handle all of the
16825 special direct moves that involve allocating an extra register, return the
16826 insn code of the helper function if there is such a function or
16827 CODE_FOR_nothing if not. */
16828
16829 static bool
16830 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
16831 enum rs6000_reg_type from_type,
16832 machine_mode mode,
16833 secondary_reload_info *sri,
16834 bool altivec_p)
16835 {
16836 bool ret = false;
16837 enum insn_code icode = CODE_FOR_nothing;
16838 int cost = 0;
16839 int size = GET_MODE_SIZE (mode);
16840
16841 if (TARGET_POWERPC64)
16842 {
16843 if (size == 16)
16844 {
16845 /* Handle moving 128-bit values from GPRs to VSX point registers on
16846 power8 when running in 64-bit mode using XXPERMDI to glue the two
16847 64-bit values back together. */
16848 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
16849 {
16850 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
16851 icode = reg_addr[mode].reload_vsx_gpr;
16852 }
16853
16854 /* Handle moving 128-bit values from VSX point registers to GPRs on
16855 power8 when running in 64-bit mode using XXPERMDI to get access to the
16856 bottom 64-bit value. */
16857 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16858 {
16859 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
16860 icode = reg_addr[mode].reload_gpr_vsx;
16861 }
16862 }
16863
16864 else if (mode == SFmode)
16865 {
16866 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16867 {
16868 cost = 3; /* xscvdpspn, mfvsrd, and. */
16869 icode = reg_addr[mode].reload_gpr_vsx;
16870 }
16871
16872 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
16873 {
16874 cost = 2; /* mtvsrz, xscvspdpn. */
16875 icode = reg_addr[mode].reload_vsx_gpr;
16876 }
16877 }
16878 }
16879
16880 if (TARGET_POWERPC64 && size == 16)
16881 {
16882 /* Handle moving 128-bit values from GPRs to VSX point registers on
16883 power8 when running in 64-bit mode using XXPERMDI to glue the two
16884 64-bit values back together. */
16885 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
16886 {
16887 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
16888 icode = reg_addr[mode].reload_vsx_gpr;
16889 }
16890
16891 /* Handle moving 128-bit values from VSX point registers to GPRs on
16892 power8 when running in 64-bit mode using XXPERMDI to get access to the
16893 bottom 64-bit value. */
16894 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16895 {
16896 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
16897 icode = reg_addr[mode].reload_gpr_vsx;
16898 }
16899 }
16900
16901 else if (!TARGET_POWERPC64 && size == 8)
16902 {
16903 /* Handle moving 64-bit values from GPRs to floating point registers on
16904 power8 when running in 32-bit mode using FMRGOW to glue the two 32-bit
16905 values back together. Altivec register classes must be handled
16906 specially since a different instruction is used, and the secondary
16907 reload support requires a single instruction class in the scratch
16908 register constraint. However, right now TFmode is not allowed in
16909 Altivec registers, so the pattern will never match. */
16910 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
16911 {
16912 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
16913 icode = reg_addr[mode].reload_fpr_gpr;
16914 }
16915 }
16916
16917 if (icode != CODE_FOR_nothing)
16918 {
16919 ret = true;
16920 if (sri)
16921 {
16922 sri->icode = icode;
16923 sri->extra_cost = cost;
16924 }
16925 }
16926
16927 return ret;
16928 }
16929
16930 /* Return whether a move between two register classes can be done either
16931 directly (simple move) or via a pattern that uses a single extra temporary
16932 (using power8's direct move in this case. */
16933
16934 static bool
16935 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
16936 enum rs6000_reg_type from_type,
16937 machine_mode mode,
16938 secondary_reload_info *sri,
16939 bool altivec_p)
16940 {
16941 /* Fall back to load/store reloads if either type is not a register. */
16942 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
16943 return false;
16944
16945 /* If we haven't allocated registers yet, assume the move can be done for the
16946 standard register types. */
16947 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
16948 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
16949 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
16950 return true;
16951
16952 /* Moves to the same set of registers is a simple move for non-specialized
16953 registers. */
16954 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
16955 return true;
16956
16957 /* Check whether a simple move can be done directly. */
16958 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
16959 {
16960 if (sri)
16961 {
16962 sri->icode = CODE_FOR_nothing;
16963 sri->extra_cost = 0;
16964 }
16965 return true;
16966 }
16967
16968 /* Now check if we can do it in a few steps. */
16969 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
16970 altivec_p);
16971 }
16972
16973 /* Inform reload about cases where moving X with a mode MODE to a register in
16974 RCLASS requires an extra scratch or immediate register. Return the class
16975 needed for the immediate register.
16976
16977 For VSX and Altivec, we may need a register to convert sp+offset into
16978 reg+sp.
16979
16980 For misaligned 64-bit gpr loads and stores we need a register to
16981 convert an offset address to indirect. */
16982
16983 static reg_class_t
16984 rs6000_secondary_reload (bool in_p,
16985 rtx x,
16986 reg_class_t rclass_i,
16987 machine_mode mode,
16988 secondary_reload_info *sri)
16989 {
16990 enum reg_class rclass = (enum reg_class) rclass_i;
16991 reg_class_t ret = ALL_REGS;
16992 enum insn_code icode;
16993 bool default_p = false;
16994 bool done_p = false;
16995
16996 /* Allow subreg of memory before/during reload. */
16997 bool memory_p = (MEM_P (x)
16998 || (!reload_completed && GET_CODE (x) == SUBREG
16999 && MEM_P (SUBREG_REG (x))));
17000
17001 sri->icode = CODE_FOR_nothing;
17002 sri->extra_cost = 0;
17003 icode = ((in_p)
17004 ? reg_addr[mode].reload_load
17005 : reg_addr[mode].reload_store);
17006
17007 if (REG_P (x) || register_operand (x, mode))
17008 {
17009 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
17010 bool altivec_p = (rclass == ALTIVEC_REGS);
17011 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
17012
17013 if (!in_p)
17014 {
17015 enum rs6000_reg_type exchange = to_type;
17016 to_type = from_type;
17017 from_type = exchange;
17018 }
17019
17020 /* Can we do a direct move of some sort? */
17021 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
17022 altivec_p))
17023 {
17024 icode = (enum insn_code)sri->icode;
17025 default_p = false;
17026 done_p = true;
17027 ret = NO_REGS;
17028 }
17029 }
17030
17031 /* Make sure 0.0 is not reloaded or forced into memory. */
17032 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
17033 {
17034 ret = NO_REGS;
17035 default_p = false;
17036 done_p = true;
17037 }
17038
17039 /* If this is a scalar floating point value and we want to load it into the
17040 traditional Altivec registers, do it via a move via a traditional floating
17041 point register. Also make sure that non-zero constants use a FPR. */
17042 if (!done_p && reg_addr[mode].scalar_in_vmx_p
17043 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
17044 && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
17045 {
17046 ret = FLOAT_REGS;
17047 default_p = false;
17048 done_p = true;
17049 }
17050
17051 /* Handle reload of load/stores if we have reload helper functions. */
17052 if (!done_p && icode != CODE_FOR_nothing && memory_p)
17053 {
17054 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
17055 mode);
17056
17057 if (extra_cost >= 0)
17058 {
17059 done_p = true;
17060 ret = NO_REGS;
17061 if (extra_cost > 0)
17062 {
17063 sri->extra_cost = extra_cost;
17064 sri->icode = icode;
17065 }
17066 }
17067 }
17068
17069 /* Handle unaligned loads and stores of integer registers. */
17070 if (!done_p && TARGET_POWERPC64
17071 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
17072 && memory_p
17073 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
17074 {
17075 rtx addr = XEXP (x, 0);
17076 rtx off = address_offset (addr);
17077
17078 if (off != NULL_RTX)
17079 {
17080 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
17081 unsigned HOST_WIDE_INT offset = INTVAL (off);
17082
17083 /* We need a secondary reload when our legitimate_address_p
17084 says the address is good (as otherwise the entire address
17085 will be reloaded), and the offset is not a multiple of
17086 four or we have an address wrap. Address wrap will only
17087 occur for LO_SUMs since legitimate_offset_address_p
17088 rejects addresses for 16-byte mems that will wrap. */
17089 if (GET_CODE (addr) == LO_SUM
17090 ? (1 /* legitimate_address_p allows any offset for lo_sum */
17091 && ((offset & 3) != 0
17092 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
17093 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
17094 && (offset & 3) != 0))
17095 {
17096 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
17097 if (in_p)
17098 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
17099 : CODE_FOR_reload_di_load);
17100 else
17101 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
17102 : CODE_FOR_reload_di_store);
17103 sri->extra_cost = 2;
17104 ret = NO_REGS;
17105 done_p = true;
17106 }
17107 else
17108 default_p = true;
17109 }
17110 else
17111 default_p = true;
17112 }
17113
17114 if (!done_p && !TARGET_POWERPC64
17115 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
17116 && memory_p
17117 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
17118 {
17119 rtx addr = XEXP (x, 0);
17120 rtx off = address_offset (addr);
17121
17122 if (off != NULL_RTX)
17123 {
17124 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
17125 unsigned HOST_WIDE_INT offset = INTVAL (off);
17126
17127 /* We need a secondary reload when our legitimate_address_p
17128 says the address is good (as otherwise the entire address
17129 will be reloaded), and we have a wrap.
17130
17131 legitimate_lo_sum_address_p allows LO_SUM addresses to
17132 have any offset so test for wrap in the low 16 bits.
17133
17134 legitimate_offset_address_p checks for the range
17135 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
17136 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
17137 [0x7ff4,0x7fff] respectively, so test for the
17138 intersection of these ranges, [0x7ffc,0x7fff] and
17139 [0x7ff4,0x7ff7] respectively.
17140
17141 Note that the address we see here may have been
17142 manipulated by legitimize_reload_address. */
17143 if (GET_CODE (addr) == LO_SUM
17144 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
17145 : offset - (0x8000 - extra) < UNITS_PER_WORD)
17146 {
17147 if (in_p)
17148 sri->icode = CODE_FOR_reload_si_load;
17149 else
17150 sri->icode = CODE_FOR_reload_si_store;
17151 sri->extra_cost = 2;
17152 ret = NO_REGS;
17153 done_p = true;
17154 }
17155 else
17156 default_p = true;
17157 }
17158 else
17159 default_p = true;
17160 }
17161
17162 if (!done_p)
17163 default_p = true;
17164
17165 if (default_p)
17166 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
17167
17168 gcc_assert (ret != ALL_REGS);
17169
17170 if (TARGET_DEBUG_ADDR)
17171 {
17172 fprintf (stderr,
17173 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
17174 "mode = %s",
17175 reg_class_names[ret],
17176 in_p ? "true" : "false",
17177 reg_class_names[rclass],
17178 GET_MODE_NAME (mode));
17179
17180 if (reload_completed)
17181 fputs (", after reload", stderr);
17182
17183 if (!done_p)
17184 fputs (", done_p not set", stderr);
17185
17186 if (default_p)
17187 fputs (", default secondary reload", stderr);
17188
17189 if (sri->icode != CODE_FOR_nothing)
17190 fprintf (stderr, ", reload func = %s, extra cost = %d",
17191 insn_data[sri->icode].name, sri->extra_cost);
17192
17193 fputs ("\n", stderr);
17194 debug_rtx (x);
17195 }
17196
17197 return ret;
17198 }
17199
17200 /* Better tracing for rs6000_secondary_reload_inner. */
17201
17202 static void
17203 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
17204 bool store_p)
17205 {
17206 rtx set, clobber;
17207
17208 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
17209
17210 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
17211 store_p ? "store" : "load");
17212
17213 if (store_p)
17214 set = gen_rtx_SET (VOIDmode, mem, reg);
17215 else
17216 set = gen_rtx_SET (VOIDmode, reg, mem);
17217
17218 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
17219 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
17220 }
17221
17222 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
17223 ATTRIBUTE_NORETURN;
17224
17225 static void
17226 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
17227 bool store_p)
17228 {
17229 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
17230 gcc_unreachable ();
17231 }
17232
17233 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
17234 reload helper functions. These were identified in
17235 rs6000_secondary_reload_memory, and if reload decided to use the secondary
17236 reload, it calls the insns:
17237 reload_<RELOAD:mode>_<P:mptrsize>_store
17238 reload_<RELOAD:mode>_<P:mptrsize>_load
17239
17240 which in turn calls this function, to do whatever is necessary to create
17241 valid addresses. */
17242
17243 void
17244 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
17245 {
17246 int regno = true_regnum (reg);
17247 machine_mode mode = GET_MODE (reg);
17248 addr_mask_type addr_mask;
17249 rtx addr;
17250 rtx new_addr;
17251 rtx op_reg, op0, op1;
17252 rtx and_op;
17253 rtx cc_clobber;
17254 rtvec rv;
17255
17256 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
17257 || !base_reg_operand (scratch, GET_MODE (scratch)))
17258 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17259
17260 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
17261 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
17262
17263 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
17264 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
17265
17266 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
17267 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
17268
17269 else
17270 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17271
17272 /* Make sure the mode is valid in this register class. */
17273 if ((addr_mask & RELOAD_REG_VALID) == 0)
17274 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17275
17276 if (TARGET_DEBUG_ADDR)
17277 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
17278
17279 new_addr = addr = XEXP (mem, 0);
17280 switch (GET_CODE (addr))
17281 {
17282 /* Does the register class support auto update forms for this mode? If
17283 not, do the update now. We don't need a scratch register, since the
17284 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
17285 case PRE_INC:
17286 case PRE_DEC:
17287 op_reg = XEXP (addr, 0);
17288 if (!base_reg_operand (op_reg, Pmode))
17289 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17290
17291 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
17292 {
17293 emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
17294 new_addr = op_reg;
17295 }
17296 break;
17297
17298 case PRE_MODIFY:
17299 op0 = XEXP (addr, 0);
17300 op1 = XEXP (addr, 1);
17301 if (!base_reg_operand (op0, Pmode)
17302 || GET_CODE (op1) != PLUS
17303 || !rtx_equal_p (op0, XEXP (op1, 0)))
17304 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17305
17306 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
17307 {
17308 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17309 new_addr = reg;
17310 }
17311 break;
17312
17313 /* Do we need to simulate AND -16 to clear the bottom address bits used
17314 in VMX load/stores? */
17315 case AND:
17316 op0 = XEXP (addr, 0);
17317 op1 = XEXP (addr, 1);
17318 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
17319 {
17320 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
17321 op_reg = op0;
17322
17323 else if (GET_CODE (op1) == PLUS)
17324 {
17325 emit_insn (gen_rtx_SET (VOIDmode, scratch, op1));
17326 op_reg = scratch;
17327 }
17328
17329 else
17330 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17331
17332 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
17333 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
17334 rv = gen_rtvec (2, gen_rtx_SET (VOIDmode, scratch, and_op), cc_clobber);
17335 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
17336 new_addr = scratch;
17337 }
17338 break;
17339
17340 /* If this is an indirect address, make sure it is a base register. */
17341 case REG:
17342 case SUBREG:
17343 if (!base_reg_operand (addr, GET_MODE (addr)))
17344 {
17345 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17346 new_addr = scratch;
17347 }
17348 break;
17349
17350 /* If this is an indexed address, make sure the register class can handle
17351 indexed addresses for this mode. */
17352 case PLUS:
17353 op0 = XEXP (addr, 0);
17354 op1 = XEXP (addr, 1);
17355 if (!base_reg_operand (op0, Pmode))
17356 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17357
17358 else if (int_reg_operand (op1, Pmode))
17359 {
17360 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
17361 {
17362 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17363 new_addr = scratch;
17364 }
17365 }
17366
17367 /* Make sure the register class can handle offset addresses. */
17368 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
17369 {
17370 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
17371 {
17372 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17373 new_addr = scratch;
17374 }
17375 }
17376
17377 else
17378 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17379
17380 break;
17381
17382 case LO_SUM:
17383 op0 = XEXP (addr, 0);
17384 op1 = XEXP (addr, 1);
17385 if (!base_reg_operand (op0, Pmode))
17386 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17387
17388 else if (int_reg_operand (op1, Pmode))
17389 {
17390 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
17391 {
17392 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17393 new_addr = scratch;
17394 }
17395 }
17396
17397 /* Make sure the register class can handle offset addresses. */
17398 else if (legitimate_lo_sum_address_p (mode, addr, false))
17399 {
17400 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
17401 {
17402 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17403 new_addr = scratch;
17404 }
17405 }
17406
17407 else
17408 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17409
17410 break;
17411
17412 case SYMBOL_REF:
17413 case CONST:
17414 case LABEL_REF:
17415 rs6000_emit_move (scratch, addr, Pmode);
17416 new_addr = scratch;
17417 break;
17418
17419 default:
17420 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17421 }
17422
17423 /* Adjust the address if it changed. */
17424 if (addr != new_addr)
17425 {
17426 mem = replace_equiv_address_nv (mem, new_addr);
17427 if (TARGET_DEBUG_ADDR)
17428 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
17429 }
17430
17431 /* Now create the move. */
17432 if (store_p)
17433 emit_insn (gen_rtx_SET (VOIDmode, mem, reg));
17434 else
17435 emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
17436
17437 return;
17438 }
17439
17440 /* Convert reloads involving 64-bit gprs and misaligned offset
17441 addressing, or multiple 32-bit gprs and offsets that are too large,
17442 to use indirect addressing. */
17443
17444 void
17445 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
17446 {
17447 int regno = true_regnum (reg);
17448 enum reg_class rclass;
17449 rtx addr;
17450 rtx scratch_or_premodify = scratch;
17451
17452 if (TARGET_DEBUG_ADDR)
17453 {
17454 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
17455 store_p ? "store" : "load");
17456 fprintf (stderr, "reg:\n");
17457 debug_rtx (reg);
17458 fprintf (stderr, "mem:\n");
17459 debug_rtx (mem);
17460 fprintf (stderr, "scratch:\n");
17461 debug_rtx (scratch);
17462 }
17463
17464 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
17465 gcc_assert (GET_CODE (mem) == MEM);
17466 rclass = REGNO_REG_CLASS (regno);
17467 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
17468 addr = XEXP (mem, 0);
17469
17470 if (GET_CODE (addr) == PRE_MODIFY)
17471 {
17472 scratch_or_premodify = XEXP (addr, 0);
17473 gcc_assert (REG_P (scratch_or_premodify));
17474 addr = XEXP (addr, 1);
17475 }
17476 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
17477
17478 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
17479
17480 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
17481
17482 /* Now create the move. */
17483 if (store_p)
17484 emit_insn (gen_rtx_SET (VOIDmode, mem, reg));
17485 else
17486 emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
17487
17488 return;
17489 }
17490
17491 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
17492 this function has any SDmode references. If we are on a power7 or later, we
17493 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
17494 can load/store the value. */
17495
17496 static void
17497 rs6000_alloc_sdmode_stack_slot (void)
17498 {
17499 tree t;
17500 basic_block bb;
17501 gimple_stmt_iterator gsi;
17502
17503 gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
17504 /* We use a different approach for dealing with the secondary
17505 memory in LRA. */
17506 if (ira_use_lra_p)
17507 return;
17508
17509 if (TARGET_NO_SDMODE_STACK)
17510 return;
17511
17512 FOR_EACH_BB_FN (bb, cfun)
17513 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
17514 {
17515 tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
17516 if (ret)
17517 {
17518 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
17519 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
17520 SDmode, 0);
17521 return;
17522 }
17523 }
17524
17525 /* Check for any SDmode parameters of the function. */
17526 for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
17527 {
17528 if (TREE_TYPE (t) == error_mark_node)
17529 continue;
17530
17531 if (TYPE_MODE (TREE_TYPE (t)) == SDmode
17532 || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
17533 {
17534 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
17535 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
17536 SDmode, 0);
17537 return;
17538 }
17539 }
17540 }
17541
17542 static void
17543 rs6000_instantiate_decls (void)
17544 {
17545 if (cfun->machine->sdmode_stack_slot != NULL_RTX)
17546 instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
17547 }
17548
17549 /* Given an rtx X being reloaded into a reg required to be
17550 in class CLASS, return the class of reg to actually use.
17551 In general this is just CLASS; but on some machines
17552 in some cases it is preferable to use a more restrictive class.
17553
17554 On the RS/6000, we have to return NO_REGS when we want to reload a
17555 floating-point CONST_DOUBLE to force it to be copied to memory.
17556
17557 We also don't want to reload integer values into floating-point
17558 registers if we can at all help it. In fact, this can
17559 cause reload to die, if it tries to generate a reload of CTR
17560 into a FP register and discovers it doesn't have the memory location
17561 required.
17562
17563 ??? Would it be a good idea to have reload do the converse, that is
17564 try to reload floating modes into FP registers if possible?
17565 */
17566
17567 static enum reg_class
17568 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
17569 {
17570 machine_mode mode = GET_MODE (x);
17571 bool is_constant = CONSTANT_P (x);
17572
17573 /* Do VSX tests before handling traditional floaitng point registers. */
17574 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
17575 {
17576 if (is_constant)
17577 {
17578 /* Zero is always allowed in all VSX registers. */
17579 if (x == CONST0_RTX (mode))
17580 return rclass;
17581
17582 /* If this is a vector constant that can be formed with a few Altivec
17583 instructions, we want altivec registers. */
17584 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
17585 return ALTIVEC_REGS;
17586
17587 /* Force constant to memory. */
17588 return NO_REGS;
17589 }
17590
17591 /* If this is a scalar floating point value, prefer the traditional
17592 floating point registers so that we can use D-form (register+offset)
17593 addressing. */
17594 if (GET_MODE_SIZE (mode) < 16)
17595 return FLOAT_REGS;
17596
17597 /* Prefer the Altivec registers if Altivec is handling the vector
17598 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
17599 loads. */
17600 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
17601 || mode == V1TImode)
17602 return ALTIVEC_REGS;
17603
17604 return rclass;
17605 }
17606
17607 if (is_constant || GET_CODE (x) == PLUS)
17608 {
17609 if (reg_class_subset_p (GENERAL_REGS, rclass))
17610 return GENERAL_REGS;
17611 if (reg_class_subset_p (BASE_REGS, rclass))
17612 return BASE_REGS;
17613 return NO_REGS;
17614 }
17615
17616 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
17617 return GENERAL_REGS;
17618
17619 return rclass;
17620 }
17621
17622 /* Debug version of rs6000_preferred_reload_class. */
17623 static enum reg_class
17624 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
17625 {
17626 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
17627
17628 fprintf (stderr,
17629 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
17630 "mode = %s, x:\n",
17631 reg_class_names[ret], reg_class_names[rclass],
17632 GET_MODE_NAME (GET_MODE (x)));
17633 debug_rtx (x);
17634
17635 return ret;
17636 }
17637
17638 /* If we are copying between FP or AltiVec registers and anything else, we need
17639 a memory location. The exception is when we are targeting ppc64 and the
17640 move to/from fpr to gpr instructions are available. Also, under VSX, you
17641 can copy vector registers from the FP register set to the Altivec register
17642 set and vice versa. */
17643
17644 static bool
17645 rs6000_secondary_memory_needed (enum reg_class from_class,
17646 enum reg_class to_class,
17647 machine_mode mode)
17648 {
17649 enum rs6000_reg_type from_type, to_type;
17650 bool altivec_p = ((from_class == ALTIVEC_REGS)
17651 || (to_class == ALTIVEC_REGS));
17652
17653 /* If a simple/direct move is available, we don't need secondary memory */
17654 from_type = reg_class_to_reg_type[(int)from_class];
17655 to_type = reg_class_to_reg_type[(int)to_class];
17656
17657 if (rs6000_secondary_reload_move (to_type, from_type, mode,
17658 (secondary_reload_info *)0, altivec_p))
17659 return false;
17660
17661 /* If we have a floating point or vector register class, we need to use
17662 memory to transfer the data. */
17663 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
17664 return true;
17665
17666 return false;
17667 }
17668
17669 /* Debug version of rs6000_secondary_memory_needed. */
17670 static bool
17671 rs6000_debug_secondary_memory_needed (enum reg_class from_class,
17672 enum reg_class to_class,
17673 machine_mode mode)
17674 {
17675 bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
17676
17677 fprintf (stderr,
17678 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
17679 "to_class = %s, mode = %s\n",
17680 ret ? "true" : "false",
17681 reg_class_names[from_class],
17682 reg_class_names[to_class],
17683 GET_MODE_NAME (mode));
17684
17685 return ret;
17686 }
17687
17688 /* Return the register class of a scratch register needed to copy IN into
17689 or out of a register in RCLASS in MODE. If it can be done directly,
17690 NO_REGS is returned. */
17691
17692 static enum reg_class
17693 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
17694 rtx in)
17695 {
17696 int regno;
17697
17698 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
17699 #if TARGET_MACHO
17700 && MACHOPIC_INDIRECT
17701 #endif
17702 ))
17703 {
17704 /* We cannot copy a symbolic operand directly into anything
17705 other than BASE_REGS for TARGET_ELF. So indicate that a
17706 register from BASE_REGS is needed as an intermediate
17707 register.
17708
17709 On Darwin, pic addresses require a load from memory, which
17710 needs a base register. */
17711 if (rclass != BASE_REGS
17712 && (GET_CODE (in) == SYMBOL_REF
17713 || GET_CODE (in) == HIGH
17714 || GET_CODE (in) == LABEL_REF
17715 || GET_CODE (in) == CONST))
17716 return BASE_REGS;
17717 }
17718
17719 if (GET_CODE (in) == REG)
17720 {
17721 regno = REGNO (in);
17722 if (regno >= FIRST_PSEUDO_REGISTER)
17723 {
17724 regno = true_regnum (in);
17725 if (regno >= FIRST_PSEUDO_REGISTER)
17726 regno = -1;
17727 }
17728 }
17729 else if (GET_CODE (in) == SUBREG)
17730 {
17731 regno = true_regnum (in);
17732 if (regno >= FIRST_PSEUDO_REGISTER)
17733 regno = -1;
17734 }
17735 else
17736 regno = -1;
17737
17738 /* If we have VSX register moves, prefer moving scalar values between
17739 Altivec registers and GPR by going via an FPR (and then via memory)
17740 instead of reloading the secondary memory address for Altivec moves. */
17741 if (TARGET_VSX
17742 && GET_MODE_SIZE (mode) < 16
17743 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
17744 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
17745 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
17746 && (regno >= 0 && INT_REGNO_P (regno)))))
17747 return FLOAT_REGS;
17748
17749 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
17750 into anything. */
17751 if (rclass == GENERAL_REGS || rclass == BASE_REGS
17752 || (regno >= 0 && INT_REGNO_P (regno)))
17753 return NO_REGS;
17754
17755 /* Constants, memory, and VSX registers can go into VSX registers (both the
17756 traditional floating point and the altivec registers). */
17757 if (rclass == VSX_REGS
17758 && (regno == -1 || VSX_REGNO_P (regno)))
17759 return NO_REGS;
17760
17761 /* Constants, memory, and FP registers can go into FP registers. */
17762 if ((regno == -1 || FP_REGNO_P (regno))
17763 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
17764 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
17765
17766 /* Memory, and AltiVec registers can go into AltiVec registers. */
17767 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
17768 && rclass == ALTIVEC_REGS)
17769 return NO_REGS;
17770
17771 /* We can copy among the CR registers. */
17772 if ((rclass == CR_REGS || rclass == CR0_REGS)
17773 && regno >= 0 && CR_REGNO_P (regno))
17774 return NO_REGS;
17775
17776 /* Otherwise, we need GENERAL_REGS. */
17777 return GENERAL_REGS;
17778 }
17779
17780 /* Debug version of rs6000_secondary_reload_class. */
17781 static enum reg_class
17782 rs6000_debug_secondary_reload_class (enum reg_class rclass,
17783 machine_mode mode, rtx in)
17784 {
17785 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
17786 fprintf (stderr,
17787 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
17788 "mode = %s, input rtx:\n",
17789 reg_class_names[ret], reg_class_names[rclass],
17790 GET_MODE_NAME (mode));
17791 debug_rtx (in);
17792
17793 return ret;
17794 }
17795
17796 /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
17797
17798 static bool
17799 rs6000_cannot_change_mode_class (machine_mode from,
17800 machine_mode to,
17801 enum reg_class rclass)
17802 {
17803 unsigned from_size = GET_MODE_SIZE (from);
17804 unsigned to_size = GET_MODE_SIZE (to);
17805
17806 if (from_size != to_size)
17807 {
17808 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
17809
17810 if (reg_classes_intersect_p (xclass, rclass))
17811 {
17812 unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
17813 unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
17814
17815 /* Don't allow 64-bit types to overlap with 128-bit types that take a
17816 single register under VSX because the scalar part of the register
17817 is in the upper 64-bits, and not the lower 64-bits. Types like
17818 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
17819 IEEE floating point can't overlap, and neither can small
17820 values. */
17821
17822 if (TARGET_IEEEQUAD && (to == TFmode || from == TFmode))
17823 return true;
17824
17825 /* TDmode in floating-mode registers must always go into a register
17826 pair with the most significant word in the even-numbered register
17827 to match ISA requirements. In little-endian mode, this does not
17828 match subreg numbering, so we cannot allow subregs. */
17829 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
17830 return true;
17831
17832 if (from_size < 8 || to_size < 8)
17833 return true;
17834
17835 if (from_size == 8 && (8 * to_nregs) != to_size)
17836 return true;
17837
17838 if (to_size == 8 && (8 * from_nregs) != from_size)
17839 return true;
17840
17841 return false;
17842 }
17843 else
17844 return false;
17845 }
17846
17847 if (TARGET_E500_DOUBLE
17848 && ((((to) == DFmode) + ((from) == DFmode)) == 1
17849 || (((to) == TFmode) + ((from) == TFmode)) == 1
17850 || (((to) == DDmode) + ((from) == DDmode)) == 1
17851 || (((to) == TDmode) + ((from) == TDmode)) == 1
17852 || (((to) == DImode) + ((from) == DImode)) == 1))
17853 return true;
17854
17855 /* Since the VSX register set includes traditional floating point registers
17856 and altivec registers, just check for the size being different instead of
17857 trying to check whether the modes are vector modes. Otherwise it won't
17858 allow say DF and DI to change classes. For types like TFmode and TDmode
17859 that take 2 64-bit registers, rather than a single 128-bit register, don't
17860 allow subregs of those types to other 128 bit types. */
17861 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
17862 {
17863 unsigned num_regs = (from_size + 15) / 16;
17864 if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs
17865 || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs)
17866 return true;
17867
17868 return (from_size != 8 && from_size != 16);
17869 }
17870
17871 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
17872 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
17873 return true;
17874
17875 if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1
17876 && reg_classes_intersect_p (GENERAL_REGS, rclass))
17877 return true;
17878
17879 return false;
17880 }
17881
17882 /* Debug version of rs6000_cannot_change_mode_class. */
17883 static bool
17884 rs6000_debug_cannot_change_mode_class (machine_mode from,
17885 machine_mode to,
17886 enum reg_class rclass)
17887 {
17888 bool ret = rs6000_cannot_change_mode_class (from, to, rclass);
17889
17890 fprintf (stderr,
17891 "rs6000_cannot_change_mode_class, return %s, from = %s, "
17892 "to = %s, rclass = %s\n",
17893 ret ? "true" : "false",
17894 GET_MODE_NAME (from), GET_MODE_NAME (to),
17895 reg_class_names[rclass]);
17896
17897 return ret;
17898 }
17899 \f
17900 /* Return a string to do a move operation of 128 bits of data. */
17901
17902 const char *
17903 rs6000_output_move_128bit (rtx operands[])
17904 {
17905 rtx dest = operands[0];
17906 rtx src = operands[1];
17907 machine_mode mode = GET_MODE (dest);
17908 int dest_regno;
17909 int src_regno;
17910 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
17911 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
17912
17913 if (REG_P (dest))
17914 {
17915 dest_regno = REGNO (dest);
17916 dest_gpr_p = INT_REGNO_P (dest_regno);
17917 dest_fp_p = FP_REGNO_P (dest_regno);
17918 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
17919 dest_vsx_p = dest_fp_p | dest_vmx_p;
17920 }
17921 else
17922 {
17923 dest_regno = -1;
17924 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
17925 }
17926
17927 if (REG_P (src))
17928 {
17929 src_regno = REGNO (src);
17930 src_gpr_p = INT_REGNO_P (src_regno);
17931 src_fp_p = FP_REGNO_P (src_regno);
17932 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
17933 src_vsx_p = src_fp_p | src_vmx_p;
17934 }
17935 else
17936 {
17937 src_regno = -1;
17938 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
17939 }
17940
17941 /* Register moves. */
17942 if (dest_regno >= 0 && src_regno >= 0)
17943 {
17944 if (dest_gpr_p)
17945 {
17946 if (src_gpr_p)
17947 return "#";
17948
17949 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
17950 return "#";
17951 }
17952
17953 else if (TARGET_VSX && dest_vsx_p)
17954 {
17955 if (src_vsx_p)
17956 return "xxlor %x0,%x1,%x1";
17957
17958 else if (TARGET_DIRECT_MOVE && src_gpr_p)
17959 return "#";
17960 }
17961
17962 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
17963 return "vor %0,%1,%1";
17964
17965 else if (dest_fp_p && src_fp_p)
17966 return "#";
17967 }
17968
17969 /* Loads. */
17970 else if (dest_regno >= 0 && MEM_P (src))
17971 {
17972 if (dest_gpr_p)
17973 {
17974 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
17975 return "lq %0,%1";
17976 else
17977 return "#";
17978 }
17979
17980 else if (TARGET_ALTIVEC && dest_vmx_p
17981 && altivec_indexed_or_indirect_operand (src, mode))
17982 return "lvx %0,%y1";
17983
17984 else if (TARGET_VSX && dest_vsx_p)
17985 {
17986 if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
17987 return "lxvw4x %x0,%y1";
17988 else
17989 return "lxvd2x %x0,%y1";
17990 }
17991
17992 else if (TARGET_ALTIVEC && dest_vmx_p)
17993 return "lvx %0,%y1";
17994
17995 else if (dest_fp_p)
17996 return "#";
17997 }
17998
17999 /* Stores. */
18000 else if (src_regno >= 0 && MEM_P (dest))
18001 {
18002 if (src_gpr_p)
18003 {
18004 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
18005 return "stq %1,%0";
18006 else
18007 return "#";
18008 }
18009
18010 else if (TARGET_ALTIVEC && src_vmx_p
18011 && altivec_indexed_or_indirect_operand (src, mode))
18012 return "stvx %1,%y0";
18013
18014 else if (TARGET_VSX && src_vsx_p)
18015 {
18016 if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
18017 return "stxvw4x %x1,%y0";
18018 else
18019 return "stxvd2x %x1,%y0";
18020 }
18021
18022 else if (TARGET_ALTIVEC && src_vmx_p)
18023 return "stvx %1,%y0";
18024
18025 else if (src_fp_p)
18026 return "#";
18027 }
18028
18029 /* Constants. */
18030 else if (dest_regno >= 0
18031 && (GET_CODE (src) == CONST_INT
18032 || GET_CODE (src) == CONST_WIDE_INT
18033 || GET_CODE (src) == CONST_DOUBLE
18034 || GET_CODE (src) == CONST_VECTOR))
18035 {
18036 if (dest_gpr_p)
18037 return "#";
18038
18039 else if (TARGET_VSX && dest_vsx_p && zero_constant (src, mode))
18040 return "xxlxor %x0,%x0,%x0";
18041
18042 else if (TARGET_ALTIVEC && dest_vmx_p)
18043 return output_vec_const_move (operands);
18044 }
18045
18046 if (TARGET_DEBUG_ADDR)
18047 {
18048 fprintf (stderr, "\n===== Bad 128 bit move:\n");
18049 debug_rtx (gen_rtx_SET (VOIDmode, dest, src));
18050 }
18051
18052 gcc_unreachable ();
18053 }
18054
18055 /* Validate a 128-bit move. */
18056 bool
18057 rs6000_move_128bit_ok_p (rtx operands[])
18058 {
18059 machine_mode mode = GET_MODE (operands[0]);
18060 return (gpc_reg_operand (operands[0], mode)
18061 || gpc_reg_operand (operands[1], mode));
18062 }
18063
18064 /* Return true if a 128-bit move needs to be split. */
18065 bool
18066 rs6000_split_128bit_ok_p (rtx operands[])
18067 {
18068 if (!reload_completed)
18069 return false;
18070
18071 if (!gpr_or_gpr_p (operands[0], operands[1]))
18072 return false;
18073
18074 if (quad_load_store_p (operands[0], operands[1]))
18075 return false;
18076
18077 return true;
18078 }
18079
18080 \f
18081 /* Given a comparison operation, return the bit number in CCR to test. We
18082 know this is a valid comparison.
18083
18084 SCC_P is 1 if this is for an scc. That means that %D will have been
18085 used instead of %C, so the bits will be in different places.
18086
18087 Return -1 if OP isn't a valid comparison for some reason. */
18088
18089 int
18090 ccr_bit (rtx op, int scc_p)
18091 {
18092 enum rtx_code code = GET_CODE (op);
18093 machine_mode cc_mode;
18094 int cc_regnum;
18095 int base_bit;
18096 rtx reg;
18097
18098 if (!COMPARISON_P (op))
18099 return -1;
18100
18101 reg = XEXP (op, 0);
18102
18103 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
18104
18105 cc_mode = GET_MODE (reg);
18106 cc_regnum = REGNO (reg);
18107 base_bit = 4 * (cc_regnum - CR0_REGNO);
18108
18109 validate_condition_mode (code, cc_mode);
18110
18111 /* When generating a sCOND operation, only positive conditions are
18112 allowed. */
18113 gcc_assert (!scc_p
18114 || code == EQ || code == GT || code == LT || code == UNORDERED
18115 || code == GTU || code == LTU);
18116
18117 switch (code)
18118 {
18119 case NE:
18120 return scc_p ? base_bit + 3 : base_bit + 2;
18121 case EQ:
18122 return base_bit + 2;
18123 case GT: case GTU: case UNLE:
18124 return base_bit + 1;
18125 case LT: case LTU: case UNGE:
18126 return base_bit;
18127 case ORDERED: case UNORDERED:
18128 return base_bit + 3;
18129
18130 case GE: case GEU:
18131 /* If scc, we will have done a cror to put the bit in the
18132 unordered position. So test that bit. For integer, this is ! LT
18133 unless this is an scc insn. */
18134 return scc_p ? base_bit + 3 : base_bit;
18135
18136 case LE: case LEU:
18137 return scc_p ? base_bit + 3 : base_bit + 1;
18138
18139 default:
18140 gcc_unreachable ();
18141 }
18142 }
18143 \f
18144 /* Return the GOT register. */
18145
18146 rtx
18147 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
18148 {
18149 /* The second flow pass currently (June 1999) can't update
18150 regs_ever_live without disturbing other parts of the compiler, so
18151 update it here to make the prolog/epilogue code happy. */
18152 if (!can_create_pseudo_p ()
18153 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
18154 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
18155
18156 crtl->uses_pic_offset_table = 1;
18157
18158 return pic_offset_table_rtx;
18159 }
18160 \f
18161 static rs6000_stack_t stack_info;
18162
18163 /* Function to init struct machine_function.
18164 This will be called, via a pointer variable,
18165 from push_function_context. */
18166
18167 static struct machine_function *
18168 rs6000_init_machine_status (void)
18169 {
18170 stack_info.reload_completed = 0;
18171 return ggc_cleared_alloc<machine_function> ();
18172 }
18173 \f
18174 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
18175
18176 int
18177 extract_MB (rtx op)
18178 {
18179 int i;
18180 unsigned long val = INTVAL (op);
18181
18182 /* If the high bit is zero, the value is the first 1 bit we find
18183 from the left. */
18184 if ((val & 0x80000000) == 0)
18185 {
18186 gcc_assert (val & 0xffffffff);
18187
18188 i = 1;
18189 while (((val <<= 1) & 0x80000000) == 0)
18190 ++i;
18191 return i;
18192 }
18193
18194 /* If the high bit is set and the low bit is not, or the mask is all
18195 1's, the value is zero. */
18196 if ((val & 1) == 0 || (val & 0xffffffff) == 0xffffffff)
18197 return 0;
18198
18199 /* Otherwise we have a wrap-around mask. Look for the first 0 bit
18200 from the right. */
18201 i = 31;
18202 while (((val >>= 1) & 1) != 0)
18203 --i;
18204
18205 return i;
18206 }
18207
18208 int
18209 extract_ME (rtx op)
18210 {
18211 int i;
18212 unsigned long val = INTVAL (op);
18213
18214 /* If the low bit is zero, the value is the first 1 bit we find from
18215 the right. */
18216 if ((val & 1) == 0)
18217 {
18218 gcc_assert (val & 0xffffffff);
18219
18220 i = 30;
18221 while (((val >>= 1) & 1) == 0)
18222 --i;
18223
18224 return i;
18225 }
18226
18227 /* If the low bit is set and the high bit is not, or the mask is all
18228 1's, the value is 31. */
18229 if ((val & 0x80000000) == 0 || (val & 0xffffffff) == 0xffffffff)
18230 return 31;
18231
18232 /* Otherwise we have a wrap-around mask. Look for the first 0 bit
18233 from the left. */
18234 i = 0;
18235 while (((val <<= 1) & 0x80000000) != 0)
18236 ++i;
18237
18238 return i;
18239 }
18240
18241 /* Write out a function code label. */
18242
18243 void
18244 rs6000_output_function_entry (FILE *file, const char *fname)
18245 {
18246 if (fname[0] != '.')
18247 {
18248 switch (DEFAULT_ABI)
18249 {
18250 default:
18251 gcc_unreachable ();
18252
18253 case ABI_AIX:
18254 if (DOT_SYMBOLS)
18255 putc ('.', file);
18256 else
18257 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
18258 break;
18259
18260 case ABI_ELFv2:
18261 case ABI_V4:
18262 case ABI_DARWIN:
18263 break;
18264 }
18265 }
18266
18267 RS6000_OUTPUT_BASENAME (file, fname);
18268 }
18269
18270 /* Print an operand. Recognize special options, documented below. */
18271
18272 #if TARGET_ELF
18273 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
18274 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
18275 #else
18276 #define SMALL_DATA_RELOC "sda21"
18277 #define SMALL_DATA_REG 0
18278 #endif
18279
18280 void
18281 print_operand (FILE *file, rtx x, int code)
18282 {
18283 int i;
18284 unsigned HOST_WIDE_INT uval;
18285
18286 switch (code)
18287 {
18288 /* %a is output_address. */
18289
18290 case 'b':
18291 /* If constant, low-order 16 bits of constant, unsigned.
18292 Otherwise, write normally. */
18293 if (INT_P (x))
18294 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0xffff);
18295 else
18296 print_operand (file, x, 0);
18297 return;
18298
18299 case 'B':
18300 /* If the low-order bit is zero, write 'r'; otherwise, write 'l'
18301 for 64-bit mask direction. */
18302 putc (((INTVAL (x) & 1) == 0 ? 'r' : 'l'), file);
18303 return;
18304
18305 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
18306 output_operand. */
18307
18308 case 'D':
18309 /* Like 'J' but get to the GT bit only. */
18310 gcc_assert (REG_P (x));
18311
18312 /* Bit 1 is GT bit. */
18313 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
18314
18315 /* Add one for shift count in rlinm for scc. */
18316 fprintf (file, "%d", i + 1);
18317 return;
18318
18319 case 'e':
18320 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
18321 if (! INT_P (x))
18322 {
18323 output_operand_lossage ("invalid %%e value");
18324 return;
18325 }
18326
18327 uval = INTVAL (x);
18328 if ((uval & 0xffff) == 0 && uval != 0)
18329 putc ('s', file);
18330 return;
18331
18332 case 'E':
18333 /* X is a CR register. Print the number of the EQ bit of the CR */
18334 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18335 output_operand_lossage ("invalid %%E value");
18336 else
18337 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
18338 return;
18339
18340 case 'f':
18341 /* X is a CR register. Print the shift count needed to move it
18342 to the high-order four bits. */
18343 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18344 output_operand_lossage ("invalid %%f value");
18345 else
18346 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
18347 return;
18348
18349 case 'F':
18350 /* Similar, but print the count for the rotate in the opposite
18351 direction. */
18352 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18353 output_operand_lossage ("invalid %%F value");
18354 else
18355 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
18356 return;
18357
18358 case 'G':
18359 /* X is a constant integer. If it is negative, print "m",
18360 otherwise print "z". This is to make an aze or ame insn. */
18361 if (GET_CODE (x) != CONST_INT)
18362 output_operand_lossage ("invalid %%G value");
18363 else if (INTVAL (x) >= 0)
18364 putc ('z', file);
18365 else
18366 putc ('m', file);
18367 return;
18368
18369 case 'h':
18370 /* If constant, output low-order five bits. Otherwise, write
18371 normally. */
18372 if (INT_P (x))
18373 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
18374 else
18375 print_operand (file, x, 0);
18376 return;
18377
18378 case 'H':
18379 /* If constant, output low-order six bits. Otherwise, write
18380 normally. */
18381 if (INT_P (x))
18382 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
18383 else
18384 print_operand (file, x, 0);
18385 return;
18386
18387 case 'I':
18388 /* Print `i' if this is a constant, else nothing. */
18389 if (INT_P (x))
18390 putc ('i', file);
18391 return;
18392
18393 case 'j':
18394 /* Write the bit number in CCR for jump. */
18395 i = ccr_bit (x, 0);
18396 if (i == -1)
18397 output_operand_lossage ("invalid %%j code");
18398 else
18399 fprintf (file, "%d", i);
18400 return;
18401
18402 case 'J':
18403 /* Similar, but add one for shift count in rlinm for scc and pass
18404 scc flag to `ccr_bit'. */
18405 i = ccr_bit (x, 1);
18406 if (i == -1)
18407 output_operand_lossage ("invalid %%J code");
18408 else
18409 /* If we want bit 31, write a shift count of zero, not 32. */
18410 fprintf (file, "%d", i == 31 ? 0 : i + 1);
18411 return;
18412
18413 case 'k':
18414 /* X must be a constant. Write the 1's complement of the
18415 constant. */
18416 if (! INT_P (x))
18417 output_operand_lossage ("invalid %%k value");
18418 else
18419 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
18420 return;
18421
18422 case 'K':
18423 /* X must be a symbolic constant on ELF. Write an
18424 expression suitable for an 'addi' that adds in the low 16
18425 bits of the MEM. */
18426 if (GET_CODE (x) == CONST)
18427 {
18428 if (GET_CODE (XEXP (x, 0)) != PLUS
18429 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
18430 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
18431 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
18432 output_operand_lossage ("invalid %%K value");
18433 }
18434 print_operand_address (file, x);
18435 fputs ("@l", file);
18436 return;
18437
18438 /* %l is output_asm_label. */
18439
18440 case 'L':
18441 /* Write second word of DImode or DFmode reference. Works on register
18442 or non-indexed memory only. */
18443 if (REG_P (x))
18444 fputs (reg_names[REGNO (x) + 1], file);
18445 else if (MEM_P (x))
18446 {
18447 /* Handle possible auto-increment. Since it is pre-increment and
18448 we have already done it, we can just use an offset of word. */
18449 if (GET_CODE (XEXP (x, 0)) == PRE_INC
18450 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
18451 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
18452 UNITS_PER_WORD));
18453 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18454 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
18455 UNITS_PER_WORD));
18456 else
18457 output_address (XEXP (adjust_address_nv (x, SImode,
18458 UNITS_PER_WORD),
18459 0));
18460
18461 if (small_data_operand (x, GET_MODE (x)))
18462 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18463 reg_names[SMALL_DATA_REG]);
18464 }
18465 return;
18466
18467 case 'm':
18468 /* MB value for a mask operand. */
18469 if (! mask_operand (x, SImode))
18470 output_operand_lossage ("invalid %%m value");
18471
18472 fprintf (file, "%d", extract_MB (x));
18473 return;
18474
18475 case 'M':
18476 /* ME value for a mask operand. */
18477 if (! mask_operand (x, SImode))
18478 output_operand_lossage ("invalid %%M value");
18479
18480 fprintf (file, "%d", extract_ME (x));
18481 return;
18482
18483 /* %n outputs the negative of its operand. */
18484
18485 case 'N':
18486 /* Write the number of elements in the vector times 4. */
18487 if (GET_CODE (x) != PARALLEL)
18488 output_operand_lossage ("invalid %%N value");
18489 else
18490 fprintf (file, "%d", XVECLEN (x, 0) * 4);
18491 return;
18492
18493 case 'O':
18494 /* Similar, but subtract 1 first. */
18495 if (GET_CODE (x) != PARALLEL)
18496 output_operand_lossage ("invalid %%O value");
18497 else
18498 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
18499 return;
18500
18501 case 'p':
18502 /* X is a CONST_INT that is a power of two. Output the logarithm. */
18503 if (! INT_P (x)
18504 || INTVAL (x) < 0
18505 || (i = exact_log2 (INTVAL (x))) < 0)
18506 output_operand_lossage ("invalid %%p value");
18507 else
18508 fprintf (file, "%d", i);
18509 return;
18510
18511 case 'P':
18512 /* The operand must be an indirect memory reference. The result
18513 is the register name. */
18514 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
18515 || REGNO (XEXP (x, 0)) >= 32)
18516 output_operand_lossage ("invalid %%P value");
18517 else
18518 fputs (reg_names[REGNO (XEXP (x, 0))], file);
18519 return;
18520
18521 case 'q':
18522 /* This outputs the logical code corresponding to a boolean
18523 expression. The expression may have one or both operands
18524 negated (if one, only the first one). For condition register
18525 logical operations, it will also treat the negated
18526 CR codes as NOTs, but not handle NOTs of them. */
18527 {
18528 const char *const *t = 0;
18529 const char *s;
18530 enum rtx_code code = GET_CODE (x);
18531 static const char * const tbl[3][3] = {
18532 { "and", "andc", "nor" },
18533 { "or", "orc", "nand" },
18534 { "xor", "eqv", "xor" } };
18535
18536 if (code == AND)
18537 t = tbl[0];
18538 else if (code == IOR)
18539 t = tbl[1];
18540 else if (code == XOR)
18541 t = tbl[2];
18542 else
18543 output_operand_lossage ("invalid %%q value");
18544
18545 if (GET_CODE (XEXP (x, 0)) != NOT)
18546 s = t[0];
18547 else
18548 {
18549 if (GET_CODE (XEXP (x, 1)) == NOT)
18550 s = t[2];
18551 else
18552 s = t[1];
18553 }
18554
18555 fputs (s, file);
18556 }
18557 return;
18558
18559 case 'Q':
18560 if (! TARGET_MFCRF)
18561 return;
18562 fputc (',', file);
18563 /* FALLTHRU */
18564
18565 case 'R':
18566 /* X is a CR register. Print the mask for `mtcrf'. */
18567 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18568 output_operand_lossage ("invalid %%R value");
18569 else
18570 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
18571 return;
18572
18573 case 's':
18574 /* Low 5 bits of 32 - value */
18575 if (! INT_P (x))
18576 output_operand_lossage ("invalid %%s value");
18577 else
18578 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
18579 return;
18580
18581 case 'S':
18582 /* PowerPC64 mask position. All 0's is excluded.
18583 CONST_INT 32-bit mask is considered sign-extended so any
18584 transition must occur within the CONST_INT, not on the boundary. */
18585 if (! mask64_operand (x, DImode))
18586 output_operand_lossage ("invalid %%S value");
18587
18588 uval = INTVAL (x);
18589
18590 if (uval & 1) /* Clear Left */
18591 {
18592 #if HOST_BITS_PER_WIDE_INT > 64
18593 uval &= ((unsigned HOST_WIDE_INT) 1 << 64) - 1;
18594 #endif
18595 i = 64;
18596 }
18597 else /* Clear Right */
18598 {
18599 uval = ~uval;
18600 #if HOST_BITS_PER_WIDE_INT > 64
18601 uval &= ((unsigned HOST_WIDE_INT) 1 << 64) - 1;
18602 #endif
18603 i = 63;
18604 }
18605 while (uval != 0)
18606 --i, uval >>= 1;
18607 gcc_assert (i >= 0);
18608 fprintf (file, "%d", i);
18609 return;
18610
18611 case 't':
18612 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
18613 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
18614
18615 /* Bit 3 is OV bit. */
18616 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
18617
18618 /* If we want bit 31, write a shift count of zero, not 32. */
18619 fprintf (file, "%d", i == 31 ? 0 : i + 1);
18620 return;
18621
18622 case 'T':
18623 /* Print the symbolic name of a branch target register. */
18624 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
18625 && REGNO (x) != CTR_REGNO))
18626 output_operand_lossage ("invalid %%T value");
18627 else if (REGNO (x) == LR_REGNO)
18628 fputs ("lr", file);
18629 else
18630 fputs ("ctr", file);
18631 return;
18632
18633 case 'u':
18634 /* High-order or low-order 16 bits of constant, whichever is non-zero,
18635 for use in unsigned operand. */
18636 if (! INT_P (x))
18637 {
18638 output_operand_lossage ("invalid %%u value");
18639 return;
18640 }
18641
18642 uval = INTVAL (x);
18643 if ((uval & 0xffff) == 0)
18644 uval >>= 16;
18645
18646 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
18647 return;
18648
18649 case 'v':
18650 /* High-order 16 bits of constant for use in signed operand. */
18651 if (! INT_P (x))
18652 output_operand_lossage ("invalid %%v value");
18653 else
18654 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
18655 (INTVAL (x) >> 16) & 0xffff);
18656 return;
18657
18658 case 'U':
18659 /* Print `u' if this has an auto-increment or auto-decrement. */
18660 if (MEM_P (x)
18661 && (GET_CODE (XEXP (x, 0)) == PRE_INC
18662 || GET_CODE (XEXP (x, 0)) == PRE_DEC
18663 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
18664 putc ('u', file);
18665 return;
18666
18667 case 'V':
18668 /* Print the trap code for this operand. */
18669 switch (GET_CODE (x))
18670 {
18671 case EQ:
18672 fputs ("eq", file); /* 4 */
18673 break;
18674 case NE:
18675 fputs ("ne", file); /* 24 */
18676 break;
18677 case LT:
18678 fputs ("lt", file); /* 16 */
18679 break;
18680 case LE:
18681 fputs ("le", file); /* 20 */
18682 break;
18683 case GT:
18684 fputs ("gt", file); /* 8 */
18685 break;
18686 case GE:
18687 fputs ("ge", file); /* 12 */
18688 break;
18689 case LTU:
18690 fputs ("llt", file); /* 2 */
18691 break;
18692 case LEU:
18693 fputs ("lle", file); /* 6 */
18694 break;
18695 case GTU:
18696 fputs ("lgt", file); /* 1 */
18697 break;
18698 case GEU:
18699 fputs ("lge", file); /* 5 */
18700 break;
18701 default:
18702 gcc_unreachable ();
18703 }
18704 break;
18705
18706 case 'w':
18707 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
18708 normally. */
18709 if (INT_P (x))
18710 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
18711 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
18712 else
18713 print_operand (file, x, 0);
18714 return;
18715
18716 case 'W':
18717 /* MB value for a PowerPC64 rldic operand. */
18718 i = clz_hwi (INTVAL (x));
18719
18720 fprintf (file, "%d", i);
18721 return;
18722
18723 case 'x':
18724 /* X is a FPR or Altivec register used in a VSX context. */
18725 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
18726 output_operand_lossage ("invalid %%x value");
18727 else
18728 {
18729 int reg = REGNO (x);
18730 int vsx_reg = (FP_REGNO_P (reg)
18731 ? reg - 32
18732 : reg - FIRST_ALTIVEC_REGNO + 32);
18733
18734 #ifdef TARGET_REGNAMES
18735 if (TARGET_REGNAMES)
18736 fprintf (file, "%%vs%d", vsx_reg);
18737 else
18738 #endif
18739 fprintf (file, "%d", vsx_reg);
18740 }
18741 return;
18742
18743 case 'X':
18744 if (MEM_P (x)
18745 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
18746 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
18747 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
18748 putc ('x', file);
18749 return;
18750
18751 case 'Y':
18752 /* Like 'L', for third word of TImode/PTImode */
18753 if (REG_P (x))
18754 fputs (reg_names[REGNO (x) + 2], file);
18755 else if (MEM_P (x))
18756 {
18757 if (GET_CODE (XEXP (x, 0)) == PRE_INC
18758 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
18759 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 8));
18760 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18761 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 8));
18762 else
18763 output_address (XEXP (adjust_address_nv (x, SImode, 8), 0));
18764 if (small_data_operand (x, GET_MODE (x)))
18765 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18766 reg_names[SMALL_DATA_REG]);
18767 }
18768 return;
18769
18770 case 'z':
18771 /* X is a SYMBOL_REF. Write out the name preceded by a
18772 period and without any trailing data in brackets. Used for function
18773 names. If we are configured for System V (or the embedded ABI) on
18774 the PowerPC, do not emit the period, since those systems do not use
18775 TOCs and the like. */
18776 gcc_assert (GET_CODE (x) == SYMBOL_REF);
18777
18778 /* For macho, check to see if we need a stub. */
18779 if (TARGET_MACHO)
18780 {
18781 const char *name = XSTR (x, 0);
18782 #if TARGET_MACHO
18783 if (darwin_emit_branch_islands
18784 && MACHOPIC_INDIRECT
18785 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
18786 name = machopic_indirection_name (x, /*stub_p=*/true);
18787 #endif
18788 assemble_name (file, name);
18789 }
18790 else if (!DOT_SYMBOLS)
18791 assemble_name (file, XSTR (x, 0));
18792 else
18793 rs6000_output_function_entry (file, XSTR (x, 0));
18794 return;
18795
18796 case 'Z':
18797 /* Like 'L', for last word of TImode/PTImode. */
18798 if (REG_P (x))
18799 fputs (reg_names[REGNO (x) + 3], file);
18800 else if (MEM_P (x))
18801 {
18802 if (GET_CODE (XEXP (x, 0)) == PRE_INC
18803 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
18804 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 12));
18805 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18806 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 12));
18807 else
18808 output_address (XEXP (adjust_address_nv (x, SImode, 12), 0));
18809 if (small_data_operand (x, GET_MODE (x)))
18810 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18811 reg_names[SMALL_DATA_REG]);
18812 }
18813 return;
18814
18815 /* Print AltiVec or SPE memory operand. */
18816 case 'y':
18817 {
18818 rtx tmp;
18819
18820 gcc_assert (MEM_P (x));
18821
18822 tmp = XEXP (x, 0);
18823
18824 /* Ugly hack because %y is overloaded. */
18825 if ((TARGET_SPE || TARGET_E500_DOUBLE)
18826 && (GET_MODE_SIZE (GET_MODE (x)) == 8
18827 || GET_MODE (x) == TFmode
18828 || GET_MODE (x) == TImode
18829 || GET_MODE (x) == PTImode))
18830 {
18831 /* Handle [reg]. */
18832 if (REG_P (tmp))
18833 {
18834 fprintf (file, "0(%s)", reg_names[REGNO (tmp)]);
18835 break;
18836 }
18837 /* Handle [reg+UIMM]. */
18838 else if (GET_CODE (tmp) == PLUS &&
18839 GET_CODE (XEXP (tmp, 1)) == CONST_INT)
18840 {
18841 int x;
18842
18843 gcc_assert (REG_P (XEXP (tmp, 0)));
18844
18845 x = INTVAL (XEXP (tmp, 1));
18846 fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]);
18847 break;
18848 }
18849
18850 /* Fall through. Must be [reg+reg]. */
18851 }
18852 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
18853 && GET_CODE (tmp) == AND
18854 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
18855 && INTVAL (XEXP (tmp, 1)) == -16)
18856 tmp = XEXP (tmp, 0);
18857 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
18858 && GET_CODE (tmp) == PRE_MODIFY)
18859 tmp = XEXP (tmp, 1);
18860 if (REG_P (tmp))
18861 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
18862 else
18863 {
18864 if (GET_CODE (tmp) != PLUS
18865 || !REG_P (XEXP (tmp, 0))
18866 || !REG_P (XEXP (tmp, 1)))
18867 {
18868 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
18869 break;
18870 }
18871
18872 if (REGNO (XEXP (tmp, 0)) == 0)
18873 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
18874 reg_names[ REGNO (XEXP (tmp, 0)) ]);
18875 else
18876 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
18877 reg_names[ REGNO (XEXP (tmp, 1)) ]);
18878 }
18879 break;
18880 }
18881
18882 case 0:
18883 if (REG_P (x))
18884 fprintf (file, "%s", reg_names[REGNO (x)]);
18885 else if (MEM_P (x))
18886 {
18887 /* We need to handle PRE_INC and PRE_DEC here, since we need to
18888 know the width from the mode. */
18889 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
18890 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
18891 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
18892 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
18893 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
18894 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
18895 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18896 output_address (XEXP (XEXP (x, 0), 1));
18897 else
18898 output_address (XEXP (x, 0));
18899 }
18900 else
18901 {
18902 if (toc_relative_expr_p (x, false))
18903 /* This hack along with a corresponding hack in
18904 rs6000_output_addr_const_extra arranges to output addends
18905 where the assembler expects to find them. eg.
18906 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
18907 without this hack would be output as "x@toc+4". We
18908 want "x+4@toc". */
18909 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
18910 else
18911 output_addr_const (file, x);
18912 }
18913 return;
18914
18915 case '&':
18916 if (const char *name = get_some_local_dynamic_name ())
18917 assemble_name (file, name);
18918 else
18919 output_operand_lossage ("'%%&' used without any "
18920 "local dynamic TLS references");
18921 return;
18922
18923 default:
18924 output_operand_lossage ("invalid %%xn code");
18925 }
18926 }
18927 \f
18928 /* Print the address of an operand. */
18929
18930 void
18931 print_operand_address (FILE *file, rtx x)
18932 {
18933 if (REG_P (x))
18934 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
18935 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
18936 || GET_CODE (x) == LABEL_REF)
18937 {
18938 output_addr_const (file, x);
18939 if (small_data_operand (x, GET_MODE (x)))
18940 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18941 reg_names[SMALL_DATA_REG]);
18942 else
18943 gcc_assert (!TARGET_TOC);
18944 }
18945 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
18946 && REG_P (XEXP (x, 1)))
18947 {
18948 if (REGNO (XEXP (x, 0)) == 0)
18949 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
18950 reg_names[ REGNO (XEXP (x, 0)) ]);
18951 else
18952 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
18953 reg_names[ REGNO (XEXP (x, 1)) ]);
18954 }
18955 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
18956 && GET_CODE (XEXP (x, 1)) == CONST_INT)
18957 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
18958 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
18959 #if TARGET_MACHO
18960 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
18961 && CONSTANT_P (XEXP (x, 1)))
18962 {
18963 fprintf (file, "lo16(");
18964 output_addr_const (file, XEXP (x, 1));
18965 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
18966 }
18967 #endif
18968 #if TARGET_ELF
18969 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
18970 && CONSTANT_P (XEXP (x, 1)))
18971 {
18972 output_addr_const (file, XEXP (x, 1));
18973 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
18974 }
18975 #endif
18976 else if (toc_relative_expr_p (x, false))
18977 {
18978 /* This hack along with a corresponding hack in
18979 rs6000_output_addr_const_extra arranges to output addends
18980 where the assembler expects to find them. eg.
18981 (lo_sum (reg 9)
18982 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
18983 without this hack would be output as "x@toc+8@l(9)". We
18984 want "x+8@toc@l(9)". */
18985 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
18986 if (GET_CODE (x) == LO_SUM)
18987 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
18988 else
18989 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base, 0, 1))]);
18990 }
18991 else
18992 gcc_unreachable ();
18993 }
18994 \f
18995 /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */
18996
18997 static bool
18998 rs6000_output_addr_const_extra (FILE *file, rtx x)
18999 {
19000 if (GET_CODE (x) == UNSPEC)
19001 switch (XINT (x, 1))
19002 {
19003 case UNSPEC_TOCREL:
19004 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
19005 && REG_P (XVECEXP (x, 0, 1))
19006 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
19007 output_addr_const (file, XVECEXP (x, 0, 0));
19008 if (x == tocrel_base && tocrel_offset != const0_rtx)
19009 {
19010 if (INTVAL (tocrel_offset) >= 0)
19011 fprintf (file, "+");
19012 output_addr_const (file, CONST_CAST_RTX (tocrel_offset));
19013 }
19014 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
19015 {
19016 putc ('-', file);
19017 assemble_name (file, toc_label_name);
19018 }
19019 else if (TARGET_ELF)
19020 fputs ("@toc", file);
19021 return true;
19022
19023 #if TARGET_MACHO
19024 case UNSPEC_MACHOPIC_OFFSET:
19025 output_addr_const (file, XVECEXP (x, 0, 0));
19026 putc ('-', file);
19027 machopic_output_function_base_name (file);
19028 return true;
19029 #endif
19030 }
19031 return false;
19032 }
19033 \f
19034 /* Target hook for assembling integer objects. The PowerPC version has
19035 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
19036 is defined. It also needs to handle DI-mode objects on 64-bit
19037 targets. */
19038
19039 static bool
19040 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
19041 {
19042 #ifdef RELOCATABLE_NEEDS_FIXUP
19043 /* Special handling for SI values. */
19044 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
19045 {
19046 static int recurse = 0;
19047
19048 /* For -mrelocatable, we mark all addresses that need to be fixed up in
19049 the .fixup section. Since the TOC section is already relocated, we
19050 don't need to mark it here. We used to skip the text section, but it
19051 should never be valid for relocated addresses to be placed in the text
19052 section. */
19053 if (TARGET_RELOCATABLE
19054 && in_section != toc_section
19055 && !recurse
19056 && !CONST_SCALAR_INT_P (x)
19057 && CONSTANT_P (x))
19058 {
19059 char buf[256];
19060
19061 recurse = 1;
19062 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
19063 fixuplabelno++;
19064 ASM_OUTPUT_LABEL (asm_out_file, buf);
19065 fprintf (asm_out_file, "\t.long\t(");
19066 output_addr_const (asm_out_file, x);
19067 fprintf (asm_out_file, ")@fixup\n");
19068 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
19069 ASM_OUTPUT_ALIGN (asm_out_file, 2);
19070 fprintf (asm_out_file, "\t.long\t");
19071 assemble_name (asm_out_file, buf);
19072 fprintf (asm_out_file, "\n\t.previous\n");
19073 recurse = 0;
19074 return true;
19075 }
19076 /* Remove initial .'s to turn a -mcall-aixdesc function
19077 address into the address of the descriptor, not the function
19078 itself. */
19079 else if (GET_CODE (x) == SYMBOL_REF
19080 && XSTR (x, 0)[0] == '.'
19081 && DEFAULT_ABI == ABI_AIX)
19082 {
19083 const char *name = XSTR (x, 0);
19084 while (*name == '.')
19085 name++;
19086
19087 fprintf (asm_out_file, "\t.long\t%s\n", name);
19088 return true;
19089 }
19090 }
19091 #endif /* RELOCATABLE_NEEDS_FIXUP */
19092 return default_assemble_integer (x, size, aligned_p);
19093 }
19094
19095 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
19096 /* Emit an assembler directive to set symbol visibility for DECL to
19097 VISIBILITY_TYPE. */
19098
19099 static void
19100 rs6000_assemble_visibility (tree decl, int vis)
19101 {
19102 if (TARGET_XCOFF)
19103 return;
19104
19105 /* Functions need to have their entry point symbol visibility set as
19106 well as their descriptor symbol visibility. */
19107 if (DEFAULT_ABI == ABI_AIX
19108 && DOT_SYMBOLS
19109 && TREE_CODE (decl) == FUNCTION_DECL)
19110 {
19111 static const char * const visibility_types[] = {
19112 NULL, "internal", "hidden", "protected"
19113 };
19114
19115 const char *name, *type;
19116
19117 name = ((* targetm.strip_name_encoding)
19118 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
19119 type = visibility_types[vis];
19120
19121 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
19122 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
19123 }
19124 else
19125 default_assemble_visibility (decl, vis);
19126 }
19127 #endif
19128 \f
19129 enum rtx_code
19130 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
19131 {
19132 /* Reversal of FP compares takes care -- an ordered compare
19133 becomes an unordered compare and vice versa. */
19134 if (mode == CCFPmode
19135 && (!flag_finite_math_only
19136 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
19137 || code == UNEQ || code == LTGT))
19138 return reverse_condition_maybe_unordered (code);
19139 else
19140 return reverse_condition (code);
19141 }
19142
19143 /* Generate a compare for CODE. Return a brand-new rtx that
19144 represents the result of the compare. */
19145
19146 static rtx
19147 rs6000_generate_compare (rtx cmp, machine_mode mode)
19148 {
19149 machine_mode comp_mode;
19150 rtx compare_result;
19151 enum rtx_code code = GET_CODE (cmp);
19152 rtx op0 = XEXP (cmp, 0);
19153 rtx op1 = XEXP (cmp, 1);
19154
19155 if (FLOAT_MODE_P (mode))
19156 comp_mode = CCFPmode;
19157 else if (code == GTU || code == LTU
19158 || code == GEU || code == LEU)
19159 comp_mode = CCUNSmode;
19160 else if ((code == EQ || code == NE)
19161 && unsigned_reg_p (op0)
19162 && (unsigned_reg_p (op1)
19163 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
19164 /* These are unsigned values, perhaps there will be a later
19165 ordering compare that can be shared with this one. */
19166 comp_mode = CCUNSmode;
19167 else
19168 comp_mode = CCmode;
19169
19170 /* If we have an unsigned compare, make sure we don't have a signed value as
19171 an immediate. */
19172 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
19173 && INTVAL (op1) < 0)
19174 {
19175 op0 = copy_rtx_if_shared (op0);
19176 op1 = force_reg (GET_MODE (op0), op1);
19177 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
19178 }
19179
19180 /* First, the compare. */
19181 compare_result = gen_reg_rtx (comp_mode);
19182
19183 /* E500 FP compare instructions on the GPRs. Yuck! */
19184 if ((!TARGET_FPRS && TARGET_HARD_FLOAT)
19185 && FLOAT_MODE_P (mode))
19186 {
19187 rtx cmp, or_result, compare_result2;
19188 machine_mode op_mode = GET_MODE (op0);
19189 bool reverse_p;
19190
19191 if (op_mode == VOIDmode)
19192 op_mode = GET_MODE (op1);
19193
19194 /* First reverse the condition codes that aren't directly supported. */
19195 switch (code)
19196 {
19197 case NE:
19198 case UNLT:
19199 case UNLE:
19200 case UNGT:
19201 case UNGE:
19202 code = reverse_condition_maybe_unordered (code);
19203 reverse_p = true;
19204 break;
19205
19206 case EQ:
19207 case LT:
19208 case LE:
19209 case GT:
19210 case GE:
19211 reverse_p = false;
19212 break;
19213
19214 default:
19215 gcc_unreachable ();
19216 }
19217
19218 /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
19219 This explains the following mess. */
19220
19221 switch (code)
19222 {
19223 case EQ:
19224 switch (op_mode)
19225 {
19226 case SFmode:
19227 cmp = (flag_finite_math_only && !flag_trapping_math)
19228 ? gen_tstsfeq_gpr (compare_result, op0, op1)
19229 : gen_cmpsfeq_gpr (compare_result, op0, op1);
19230 break;
19231
19232 case DFmode:
19233 cmp = (flag_finite_math_only && !flag_trapping_math)
19234 ? gen_tstdfeq_gpr (compare_result, op0, op1)
19235 : gen_cmpdfeq_gpr (compare_result, op0, op1);
19236 break;
19237
19238 case TFmode:
19239 cmp = (flag_finite_math_only && !flag_trapping_math)
19240 ? gen_tsttfeq_gpr (compare_result, op0, op1)
19241 : gen_cmptfeq_gpr (compare_result, op0, op1);
19242 break;
19243
19244 default:
19245 gcc_unreachable ();
19246 }
19247 break;
19248
19249 case GT:
19250 case GE:
19251 switch (op_mode)
19252 {
19253 case SFmode:
19254 cmp = (flag_finite_math_only && !flag_trapping_math)
19255 ? gen_tstsfgt_gpr (compare_result, op0, op1)
19256 : gen_cmpsfgt_gpr (compare_result, op0, op1);
19257 break;
19258
19259 case DFmode:
19260 cmp = (flag_finite_math_only && !flag_trapping_math)
19261 ? gen_tstdfgt_gpr (compare_result, op0, op1)
19262 : gen_cmpdfgt_gpr (compare_result, op0, op1);
19263 break;
19264
19265 case TFmode:
19266 cmp = (flag_finite_math_only && !flag_trapping_math)
19267 ? gen_tsttfgt_gpr (compare_result, op0, op1)
19268 : gen_cmptfgt_gpr (compare_result, op0, op1);
19269 break;
19270
19271 default:
19272 gcc_unreachable ();
19273 }
19274 break;
19275
19276 case LT:
19277 case LE:
19278 switch (op_mode)
19279 {
19280 case SFmode:
19281 cmp = (flag_finite_math_only && !flag_trapping_math)
19282 ? gen_tstsflt_gpr (compare_result, op0, op1)
19283 : gen_cmpsflt_gpr (compare_result, op0, op1);
19284 break;
19285
19286 case DFmode:
19287 cmp = (flag_finite_math_only && !flag_trapping_math)
19288 ? gen_tstdflt_gpr (compare_result, op0, op1)
19289 : gen_cmpdflt_gpr (compare_result, op0, op1);
19290 break;
19291
19292 case TFmode:
19293 cmp = (flag_finite_math_only && !flag_trapping_math)
19294 ? gen_tsttflt_gpr (compare_result, op0, op1)
19295 : gen_cmptflt_gpr (compare_result, op0, op1);
19296 break;
19297
19298 default:
19299 gcc_unreachable ();
19300 }
19301 break;
19302
19303 default:
19304 gcc_unreachable ();
19305 }
19306
19307 /* Synthesize LE and GE from LT/GT || EQ. */
19308 if (code == LE || code == GE)
19309 {
19310 emit_insn (cmp);
19311
19312 compare_result2 = gen_reg_rtx (CCFPmode);
19313
19314 /* Do the EQ. */
19315 switch (op_mode)
19316 {
19317 case SFmode:
19318 cmp = (flag_finite_math_only && !flag_trapping_math)
19319 ? gen_tstsfeq_gpr (compare_result2, op0, op1)
19320 : gen_cmpsfeq_gpr (compare_result2, op0, op1);
19321 break;
19322
19323 case DFmode:
19324 cmp = (flag_finite_math_only && !flag_trapping_math)
19325 ? gen_tstdfeq_gpr (compare_result2, op0, op1)
19326 : gen_cmpdfeq_gpr (compare_result2, op0, op1);
19327 break;
19328
19329 case TFmode:
19330 cmp = (flag_finite_math_only && !flag_trapping_math)
19331 ? gen_tsttfeq_gpr (compare_result2, op0, op1)
19332 : gen_cmptfeq_gpr (compare_result2, op0, op1);
19333 break;
19334
19335 default:
19336 gcc_unreachable ();
19337 }
19338
19339 emit_insn (cmp);
19340
19341 /* OR them together. */
19342 or_result = gen_reg_rtx (CCFPmode);
19343 cmp = gen_e500_cr_ior_compare (or_result, compare_result,
19344 compare_result2);
19345 compare_result = or_result;
19346 }
19347
19348 code = reverse_p ? NE : EQ;
19349
19350 emit_insn (cmp);
19351 }
19352 else
19353 {
19354 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
19355 CLOBBERs to match cmptf_internal2 pattern. */
19356 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
19357 && GET_MODE (op0) == TFmode
19358 && !TARGET_IEEEQUAD
19359 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128)
19360 emit_insn (gen_rtx_PARALLEL (VOIDmode,
19361 gen_rtvec (10,
19362 gen_rtx_SET (VOIDmode,
19363 compare_result,
19364 gen_rtx_COMPARE (comp_mode, op0, op1)),
19365 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19366 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19367 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19368 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19369 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19370 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19371 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19372 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19373 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
19374 else if (GET_CODE (op1) == UNSPEC
19375 && XINT (op1, 1) == UNSPEC_SP_TEST)
19376 {
19377 rtx op1b = XVECEXP (op1, 0, 0);
19378 comp_mode = CCEQmode;
19379 compare_result = gen_reg_rtx (CCEQmode);
19380 if (TARGET_64BIT)
19381 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
19382 else
19383 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
19384 }
19385 else
19386 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
19387 gen_rtx_COMPARE (comp_mode, op0, op1)));
19388 }
19389
19390 /* Some kinds of FP comparisons need an OR operation;
19391 under flag_finite_math_only we don't bother. */
19392 if (FLOAT_MODE_P (mode)
19393 && !flag_finite_math_only
19394 && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
19395 && (code == LE || code == GE
19396 || code == UNEQ || code == LTGT
19397 || code == UNGT || code == UNLT))
19398 {
19399 enum rtx_code or1, or2;
19400 rtx or1_rtx, or2_rtx, compare2_rtx;
19401 rtx or_result = gen_reg_rtx (CCEQmode);
19402
19403 switch (code)
19404 {
19405 case LE: or1 = LT; or2 = EQ; break;
19406 case GE: or1 = GT; or2 = EQ; break;
19407 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
19408 case LTGT: or1 = LT; or2 = GT; break;
19409 case UNGT: or1 = UNORDERED; or2 = GT; break;
19410 case UNLT: or1 = UNORDERED; or2 = LT; break;
19411 default: gcc_unreachable ();
19412 }
19413 validate_condition_mode (or1, comp_mode);
19414 validate_condition_mode (or2, comp_mode);
19415 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
19416 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
19417 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
19418 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
19419 const_true_rtx);
19420 emit_insn (gen_rtx_SET (VOIDmode, or_result, compare2_rtx));
19421
19422 compare_result = or_result;
19423 code = EQ;
19424 }
19425
19426 validate_condition_mode (code, GET_MODE (compare_result));
19427
19428 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
19429 }
19430
19431
19432 /* Emit the RTL for an sISEL pattern. */
19433
19434 void
19435 rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
19436 {
19437 rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
19438 }
19439
19440 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
19441 can be used as that dest register. Return the dest register. */
19442
19443 rtx
19444 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
19445 {
19446 if (op2 == const0_rtx)
19447 return op1;
19448
19449 if (GET_CODE (scratch) == SCRATCH)
19450 scratch = gen_reg_rtx (mode);
19451
19452 if (logical_operand (op2, mode))
19453 emit_insn (gen_rtx_SET (VOIDmode, scratch, gen_rtx_XOR (mode, op1, op2)));
19454 else
19455 emit_insn (gen_rtx_SET (VOIDmode, scratch,
19456 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
19457
19458 return scratch;
19459 }
19460
19461 void
19462 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
19463 {
19464 rtx condition_rtx;
19465 machine_mode op_mode;
19466 enum rtx_code cond_code;
19467 rtx result = operands[0];
19468
19469 condition_rtx = rs6000_generate_compare (operands[1], mode);
19470 cond_code = GET_CODE (condition_rtx);
19471
19472 if (FLOAT_MODE_P (mode)
19473 && !TARGET_FPRS && TARGET_HARD_FLOAT)
19474 {
19475 rtx t;
19476
19477 PUT_MODE (condition_rtx, SImode);
19478 t = XEXP (condition_rtx, 0);
19479
19480 gcc_assert (cond_code == NE || cond_code == EQ);
19481
19482 if (cond_code == NE)
19483 emit_insn (gen_e500_flip_gt_bit (t, t));
19484
19485 emit_insn (gen_move_from_CR_gt_bit (result, t));
19486 return;
19487 }
19488
19489 if (cond_code == NE
19490 || cond_code == GE || cond_code == LE
19491 || cond_code == GEU || cond_code == LEU
19492 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
19493 {
19494 rtx not_result = gen_reg_rtx (CCEQmode);
19495 rtx not_op, rev_cond_rtx;
19496 machine_mode cc_mode;
19497
19498 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
19499
19500 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
19501 SImode, XEXP (condition_rtx, 0), const0_rtx);
19502 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
19503 emit_insn (gen_rtx_SET (VOIDmode, not_result, not_op));
19504 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
19505 }
19506
19507 op_mode = GET_MODE (XEXP (operands[1], 0));
19508 if (op_mode == VOIDmode)
19509 op_mode = GET_MODE (XEXP (operands[1], 1));
19510
19511 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
19512 {
19513 PUT_MODE (condition_rtx, DImode);
19514 convert_move (result, condition_rtx, 0);
19515 }
19516 else
19517 {
19518 PUT_MODE (condition_rtx, SImode);
19519 emit_insn (gen_rtx_SET (VOIDmode, result, condition_rtx));
19520 }
19521 }
19522
19523 /* Emit a branch of kind CODE to location LOC. */
19524
19525 void
19526 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
19527 {
19528 rtx condition_rtx, loc_ref;
19529
19530 condition_rtx = rs6000_generate_compare (operands[0], mode);
19531 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
19532 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
19533 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
19534 loc_ref, pc_rtx)));
19535 }
19536
19537 /* Return the string to output a conditional branch to LABEL, which is
19538 the operand template of the label, or NULL if the branch is really a
19539 conditional return.
19540
19541 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
19542 condition code register and its mode specifies what kind of
19543 comparison we made.
19544
19545 REVERSED is nonzero if we should reverse the sense of the comparison.
19546
19547 INSN is the insn. */
19548
19549 char *
19550 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
19551 {
19552 static char string[64];
19553 enum rtx_code code = GET_CODE (op);
19554 rtx cc_reg = XEXP (op, 0);
19555 machine_mode mode = GET_MODE (cc_reg);
19556 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
19557 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
19558 int really_reversed = reversed ^ need_longbranch;
19559 char *s = string;
19560 const char *ccode;
19561 const char *pred;
19562 rtx note;
19563
19564 validate_condition_mode (code, mode);
19565
19566 /* Work out which way this really branches. We could use
19567 reverse_condition_maybe_unordered here always but this
19568 makes the resulting assembler clearer. */
19569 if (really_reversed)
19570 {
19571 /* Reversal of FP compares takes care -- an ordered compare
19572 becomes an unordered compare and vice versa. */
19573 if (mode == CCFPmode)
19574 code = reverse_condition_maybe_unordered (code);
19575 else
19576 code = reverse_condition (code);
19577 }
19578
19579 if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode)
19580 {
19581 /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
19582 to the GT bit. */
19583 switch (code)
19584 {
19585 case EQ:
19586 /* Opposite of GT. */
19587 code = GT;
19588 break;
19589
19590 case NE:
19591 code = UNLE;
19592 break;
19593
19594 default:
19595 gcc_unreachable ();
19596 }
19597 }
19598
19599 switch (code)
19600 {
19601 /* Not all of these are actually distinct opcodes, but
19602 we distinguish them for clarity of the resulting assembler. */
19603 case NE: case LTGT:
19604 ccode = "ne"; break;
19605 case EQ: case UNEQ:
19606 ccode = "eq"; break;
19607 case GE: case GEU:
19608 ccode = "ge"; break;
19609 case GT: case GTU: case UNGT:
19610 ccode = "gt"; break;
19611 case LE: case LEU:
19612 ccode = "le"; break;
19613 case LT: case LTU: case UNLT:
19614 ccode = "lt"; break;
19615 case UNORDERED: ccode = "un"; break;
19616 case ORDERED: ccode = "nu"; break;
19617 case UNGE: ccode = "nl"; break;
19618 case UNLE: ccode = "ng"; break;
19619 default:
19620 gcc_unreachable ();
19621 }
19622
19623 /* Maybe we have a guess as to how likely the branch is. */
19624 pred = "";
19625 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
19626 if (note != NULL_RTX)
19627 {
19628 /* PROB is the difference from 50%. */
19629 int prob = XINT (note, 0) - REG_BR_PROB_BASE / 2;
19630
19631 /* Only hint for highly probable/improbable branches on newer
19632 cpus as static prediction overrides processor dynamic
19633 prediction. For older cpus we may as well always hint, but
19634 assume not taken for branches that are very close to 50% as a
19635 mispredicted taken branch is more expensive than a
19636 mispredicted not-taken branch. */
19637 if (rs6000_always_hint
19638 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
19639 && br_prob_note_reliable_p (note)))
19640 {
19641 if (abs (prob) > REG_BR_PROB_BASE / 20
19642 && ((prob > 0) ^ need_longbranch))
19643 pred = "+";
19644 else
19645 pred = "-";
19646 }
19647 }
19648
19649 if (label == NULL)
19650 s += sprintf (s, "b%slr%s ", ccode, pred);
19651 else
19652 s += sprintf (s, "b%s%s ", ccode, pred);
19653
19654 /* We need to escape any '%' characters in the reg_names string.
19655 Assume they'd only be the first character.... */
19656 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
19657 *s++ = '%';
19658 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
19659
19660 if (label != NULL)
19661 {
19662 /* If the branch distance was too far, we may have to use an
19663 unconditional branch to go the distance. */
19664 if (need_longbranch)
19665 s += sprintf (s, ",$+8\n\tb %s", label);
19666 else
19667 s += sprintf (s, ",%s", label);
19668 }
19669
19670 return string;
19671 }
19672
19673 /* Return the string to flip the GT bit on a CR. */
19674 char *
19675 output_e500_flip_gt_bit (rtx dst, rtx src)
19676 {
19677 static char string[64];
19678 int a, b;
19679
19680 gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst))
19681 && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src)));
19682
19683 /* GT bit. */
19684 a = 4 * (REGNO (dst) - CR0_REGNO) + 1;
19685 b = 4 * (REGNO (src) - CR0_REGNO) + 1;
19686
19687 sprintf (string, "crnot %d,%d", a, b);
19688 return string;
19689 }
19690
19691 /* Return insn for VSX or Altivec comparisons. */
19692
19693 static rtx
19694 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
19695 {
19696 rtx mask;
19697 machine_mode mode = GET_MODE (op0);
19698
19699 switch (code)
19700 {
19701 default:
19702 break;
19703
19704 case GE:
19705 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19706 return NULL_RTX;
19707
19708 case EQ:
19709 case GT:
19710 case GTU:
19711 case ORDERED:
19712 case UNORDERED:
19713 case UNEQ:
19714 case LTGT:
19715 mask = gen_reg_rtx (mode);
19716 emit_insn (gen_rtx_SET (VOIDmode,
19717 mask,
19718 gen_rtx_fmt_ee (code, mode, op0, op1)));
19719 return mask;
19720 }
19721
19722 return NULL_RTX;
19723 }
19724
19725 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
19726 DMODE is expected destination mode. This is a recursive function. */
19727
19728 static rtx
19729 rs6000_emit_vector_compare (enum rtx_code rcode,
19730 rtx op0, rtx op1,
19731 machine_mode dmode)
19732 {
19733 rtx mask;
19734 bool swap_operands = false;
19735 bool try_again = false;
19736
19737 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
19738 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
19739
19740 /* See if the comparison works as is. */
19741 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
19742 if (mask)
19743 return mask;
19744
19745 switch (rcode)
19746 {
19747 case LT:
19748 rcode = GT;
19749 swap_operands = true;
19750 try_again = true;
19751 break;
19752 case LTU:
19753 rcode = GTU;
19754 swap_operands = true;
19755 try_again = true;
19756 break;
19757 case NE:
19758 case UNLE:
19759 case UNLT:
19760 case UNGE:
19761 case UNGT:
19762 /* Invert condition and try again.
19763 e.g., A != B becomes ~(A==B). */
19764 {
19765 enum rtx_code rev_code;
19766 enum insn_code nor_code;
19767 rtx mask2;
19768
19769 rev_code = reverse_condition_maybe_unordered (rcode);
19770 if (rev_code == UNKNOWN)
19771 return NULL_RTX;
19772
19773 nor_code = optab_handler (one_cmpl_optab, dmode);
19774 if (nor_code == CODE_FOR_nothing)
19775 return NULL_RTX;
19776
19777 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
19778 if (!mask2)
19779 return NULL_RTX;
19780
19781 mask = gen_reg_rtx (dmode);
19782 emit_insn (GEN_FCN (nor_code) (mask, mask2));
19783 return mask;
19784 }
19785 break;
19786 case GE:
19787 case GEU:
19788 case LE:
19789 case LEU:
19790 /* Try GT/GTU/LT/LTU OR EQ */
19791 {
19792 rtx c_rtx, eq_rtx;
19793 enum insn_code ior_code;
19794 enum rtx_code new_code;
19795
19796 switch (rcode)
19797 {
19798 case GE:
19799 new_code = GT;
19800 break;
19801
19802 case GEU:
19803 new_code = GTU;
19804 break;
19805
19806 case LE:
19807 new_code = LT;
19808 break;
19809
19810 case LEU:
19811 new_code = LTU;
19812 break;
19813
19814 default:
19815 gcc_unreachable ();
19816 }
19817
19818 ior_code = optab_handler (ior_optab, dmode);
19819 if (ior_code == CODE_FOR_nothing)
19820 return NULL_RTX;
19821
19822 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
19823 if (!c_rtx)
19824 return NULL_RTX;
19825
19826 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
19827 if (!eq_rtx)
19828 return NULL_RTX;
19829
19830 mask = gen_reg_rtx (dmode);
19831 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
19832 return mask;
19833 }
19834 break;
19835 default:
19836 return NULL_RTX;
19837 }
19838
19839 if (try_again)
19840 {
19841 if (swap_operands)
19842 std::swap (op0, op1);
19843
19844 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
19845 if (mask)
19846 return mask;
19847 }
19848
19849 /* You only get two chances. */
19850 return NULL_RTX;
19851 }
19852
19853 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
19854 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
19855 operands for the relation operation COND. */
19856
19857 int
19858 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
19859 rtx cond, rtx cc_op0, rtx cc_op1)
19860 {
19861 machine_mode dest_mode = GET_MODE (dest);
19862 machine_mode mask_mode = GET_MODE (cc_op0);
19863 enum rtx_code rcode = GET_CODE (cond);
19864 machine_mode cc_mode = CCmode;
19865 rtx mask;
19866 rtx cond2;
19867 rtx tmp;
19868 bool invert_move = false;
19869
19870 if (VECTOR_UNIT_NONE_P (dest_mode))
19871 return 0;
19872
19873 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
19874 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
19875
19876 switch (rcode)
19877 {
19878 /* Swap operands if we can, and fall back to doing the operation as
19879 specified, and doing a NOR to invert the test. */
19880 case NE:
19881 case UNLE:
19882 case UNLT:
19883 case UNGE:
19884 case UNGT:
19885 /* Invert condition and try again.
19886 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
19887 invert_move = true;
19888 rcode = reverse_condition_maybe_unordered (rcode);
19889 if (rcode == UNKNOWN)
19890 return 0;
19891 break;
19892
19893 /* Mark unsigned tests with CCUNSmode. */
19894 case GTU:
19895 case GEU:
19896 case LTU:
19897 case LEU:
19898 cc_mode = CCUNSmode;
19899 break;
19900
19901 default:
19902 break;
19903 }
19904
19905 /* Get the vector mask for the given relational operations. */
19906 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
19907
19908 if (!mask)
19909 return 0;
19910
19911 if (invert_move)
19912 {
19913 tmp = op_true;
19914 op_true = op_false;
19915 op_false = tmp;
19916 }
19917
19918 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
19919 CONST0_RTX (dest_mode));
19920 emit_insn (gen_rtx_SET (VOIDmode,
19921 dest,
19922 gen_rtx_IF_THEN_ELSE (dest_mode,
19923 cond2,
19924 op_true,
19925 op_false)));
19926 return 1;
19927 }
19928
19929 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
19930 operands of the last comparison is nonzero/true, FALSE_COND if it
19931 is zero/false. Return 0 if the hardware has no such operation. */
19932
19933 int
19934 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
19935 {
19936 enum rtx_code code = GET_CODE (op);
19937 rtx op0 = XEXP (op, 0);
19938 rtx op1 = XEXP (op, 1);
19939 REAL_VALUE_TYPE c1;
19940 machine_mode compare_mode = GET_MODE (op0);
19941 machine_mode result_mode = GET_MODE (dest);
19942 rtx temp;
19943 bool is_against_zero;
19944
19945 /* These modes should always match. */
19946 if (GET_MODE (op1) != compare_mode
19947 /* In the isel case however, we can use a compare immediate, so
19948 op1 may be a small constant. */
19949 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
19950 return 0;
19951 if (GET_MODE (true_cond) != result_mode)
19952 return 0;
19953 if (GET_MODE (false_cond) != result_mode)
19954 return 0;
19955
19956 /* Don't allow using floating point comparisons for integer results for
19957 now. */
19958 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
19959 return 0;
19960
19961 /* First, work out if the hardware can do this at all, or
19962 if it's too slow.... */
19963 if (!FLOAT_MODE_P (compare_mode))
19964 {
19965 if (TARGET_ISEL)
19966 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
19967 return 0;
19968 }
19969 else if (TARGET_HARD_FLOAT && !TARGET_FPRS
19970 && SCALAR_FLOAT_MODE_P (compare_mode))
19971 return 0;
19972
19973 is_against_zero = op1 == CONST0_RTX (compare_mode);
19974
19975 /* A floating-point subtract might overflow, underflow, or produce
19976 an inexact result, thus changing the floating-point flags, so it
19977 can't be generated if we care about that. It's safe if one side
19978 of the construct is zero, since then no subtract will be
19979 generated. */
19980 if (SCALAR_FLOAT_MODE_P (compare_mode)
19981 && flag_trapping_math && ! is_against_zero)
19982 return 0;
19983
19984 /* Eliminate half of the comparisons by switching operands, this
19985 makes the remaining code simpler. */
19986 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
19987 || code == LTGT || code == LT || code == UNLE)
19988 {
19989 code = reverse_condition_maybe_unordered (code);
19990 temp = true_cond;
19991 true_cond = false_cond;
19992 false_cond = temp;
19993 }
19994
19995 /* UNEQ and LTGT take four instructions for a comparison with zero,
19996 it'll probably be faster to use a branch here too. */
19997 if (code == UNEQ && HONOR_NANS (compare_mode))
19998 return 0;
19999
20000 if (GET_CODE (op1) == CONST_DOUBLE)
20001 REAL_VALUE_FROM_CONST_DOUBLE (c1, op1);
20002
20003 /* We're going to try to implement comparisons by performing
20004 a subtract, then comparing against zero. Unfortunately,
20005 Inf - Inf is NaN which is not zero, and so if we don't
20006 know that the operand is finite and the comparison
20007 would treat EQ different to UNORDERED, we can't do it. */
20008 if (HONOR_INFINITIES (compare_mode)
20009 && code != GT && code != UNGE
20010 && (GET_CODE (op1) != CONST_DOUBLE || real_isinf (&c1))
20011 /* Constructs of the form (a OP b ? a : b) are safe. */
20012 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
20013 || (! rtx_equal_p (op0, true_cond)
20014 && ! rtx_equal_p (op1, true_cond))))
20015 return 0;
20016
20017 /* At this point we know we can use fsel. */
20018
20019 /* Reduce the comparison to a comparison against zero. */
20020 if (! is_against_zero)
20021 {
20022 temp = gen_reg_rtx (compare_mode);
20023 emit_insn (gen_rtx_SET (VOIDmode, temp,
20024 gen_rtx_MINUS (compare_mode, op0, op1)));
20025 op0 = temp;
20026 op1 = CONST0_RTX (compare_mode);
20027 }
20028
20029 /* If we don't care about NaNs we can reduce some of the comparisons
20030 down to faster ones. */
20031 if (! HONOR_NANS (compare_mode))
20032 switch (code)
20033 {
20034 case GT:
20035 code = LE;
20036 temp = true_cond;
20037 true_cond = false_cond;
20038 false_cond = temp;
20039 break;
20040 case UNGE:
20041 code = GE;
20042 break;
20043 case UNEQ:
20044 code = EQ;
20045 break;
20046 default:
20047 break;
20048 }
20049
20050 /* Now, reduce everything down to a GE. */
20051 switch (code)
20052 {
20053 case GE:
20054 break;
20055
20056 case LE:
20057 temp = gen_reg_rtx (compare_mode);
20058 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
20059 op0 = temp;
20060 break;
20061
20062 case ORDERED:
20063 temp = gen_reg_rtx (compare_mode);
20064 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_ABS (compare_mode, op0)));
20065 op0 = temp;
20066 break;
20067
20068 case EQ:
20069 temp = gen_reg_rtx (compare_mode);
20070 emit_insn (gen_rtx_SET (VOIDmode, temp,
20071 gen_rtx_NEG (compare_mode,
20072 gen_rtx_ABS (compare_mode, op0))));
20073 op0 = temp;
20074 break;
20075
20076 case UNGE:
20077 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
20078 temp = gen_reg_rtx (result_mode);
20079 emit_insn (gen_rtx_SET (VOIDmode, temp,
20080 gen_rtx_IF_THEN_ELSE (result_mode,
20081 gen_rtx_GE (VOIDmode,
20082 op0, op1),
20083 true_cond, false_cond)));
20084 false_cond = true_cond;
20085 true_cond = temp;
20086
20087 temp = gen_reg_rtx (compare_mode);
20088 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
20089 op0 = temp;
20090 break;
20091
20092 case GT:
20093 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
20094 temp = gen_reg_rtx (result_mode);
20095 emit_insn (gen_rtx_SET (VOIDmode, temp,
20096 gen_rtx_IF_THEN_ELSE (result_mode,
20097 gen_rtx_GE (VOIDmode,
20098 op0, op1),
20099 true_cond, false_cond)));
20100 true_cond = false_cond;
20101 false_cond = temp;
20102
20103 temp = gen_reg_rtx (compare_mode);
20104 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
20105 op0 = temp;
20106 break;
20107
20108 default:
20109 gcc_unreachable ();
20110 }
20111
20112 emit_insn (gen_rtx_SET (VOIDmode, dest,
20113 gen_rtx_IF_THEN_ELSE (result_mode,
20114 gen_rtx_GE (VOIDmode,
20115 op0, op1),
20116 true_cond, false_cond)));
20117 return 1;
20118 }
20119
20120 /* Same as above, but for ints (isel). */
20121
20122 static int
20123 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
20124 {
20125 rtx condition_rtx, cr;
20126 machine_mode mode = GET_MODE (dest);
20127 enum rtx_code cond_code;
20128 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
20129 bool signedp;
20130
20131 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
20132 return 0;
20133
20134 /* We still have to do the compare, because isel doesn't do a
20135 compare, it just looks at the CRx bits set by a previous compare
20136 instruction. */
20137 condition_rtx = rs6000_generate_compare (op, mode);
20138 cond_code = GET_CODE (condition_rtx);
20139 cr = XEXP (condition_rtx, 0);
20140 signedp = GET_MODE (cr) == CCmode;
20141
20142 isel_func = (mode == SImode
20143 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
20144 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
20145
20146 switch (cond_code)
20147 {
20148 case LT: case GT: case LTU: case GTU: case EQ:
20149 /* isel handles these directly. */
20150 break;
20151
20152 default:
20153 /* We need to swap the sense of the comparison. */
20154 {
20155 std::swap (false_cond, true_cond);
20156 PUT_CODE (condition_rtx, reverse_condition (cond_code));
20157 }
20158 break;
20159 }
20160
20161 false_cond = force_reg (mode, false_cond);
20162 if (true_cond != const0_rtx)
20163 true_cond = force_reg (mode, true_cond);
20164
20165 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
20166
20167 return 1;
20168 }
20169
20170 const char *
20171 output_isel (rtx *operands)
20172 {
20173 enum rtx_code code;
20174
20175 code = GET_CODE (operands[1]);
20176
20177 if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
20178 {
20179 gcc_assert (GET_CODE (operands[2]) == REG
20180 && GET_CODE (operands[3]) == REG);
20181 PUT_CODE (operands[1], reverse_condition (code));
20182 return "isel %0,%3,%2,%j1";
20183 }
20184
20185 return "isel %0,%2,%3,%j1";
20186 }
20187
20188 void
20189 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20190 {
20191 machine_mode mode = GET_MODE (op0);
20192 enum rtx_code c;
20193 rtx target;
20194
20195 /* VSX/altivec have direct min/max insns. */
20196 if ((code == SMAX || code == SMIN)
20197 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
20198 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
20199 {
20200 emit_insn (gen_rtx_SET (VOIDmode,
20201 dest,
20202 gen_rtx_fmt_ee (code, mode, op0, op1)));
20203 return;
20204 }
20205
20206 if (code == SMAX || code == SMIN)
20207 c = GE;
20208 else
20209 c = GEU;
20210
20211 if (code == SMAX || code == UMAX)
20212 target = emit_conditional_move (dest, c, op0, op1, mode,
20213 op0, op1, mode, 0);
20214 else
20215 target = emit_conditional_move (dest, c, op0, op1, mode,
20216 op1, op0, mode, 0);
20217 gcc_assert (target);
20218 if (target != dest)
20219 emit_move_insn (dest, target);
20220 }
20221
20222 /* A subroutine of the atomic operation splitters. Jump to LABEL if
20223 COND is true. Mark the jump as unlikely to be taken. */
20224
20225 static void
20226 emit_unlikely_jump (rtx cond, rtx label)
20227 {
20228 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
20229 rtx x;
20230
20231 x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
20232 x = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, x));
20233 add_int_reg_note (x, REG_BR_PROB, very_unlikely);
20234 }
20235
20236 /* A subroutine of the atomic operation splitters. Emit a load-locked
20237 instruction in MODE. For QI/HImode, possibly use a pattern than includes
20238 the zero_extend operation. */
20239
20240 static void
20241 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
20242 {
20243 rtx (*fn) (rtx, rtx) = NULL;
20244
20245 switch (mode)
20246 {
20247 case QImode:
20248 fn = gen_load_lockedqi;
20249 break;
20250 case HImode:
20251 fn = gen_load_lockedhi;
20252 break;
20253 case SImode:
20254 if (GET_MODE (mem) == QImode)
20255 fn = gen_load_lockedqi_si;
20256 else if (GET_MODE (mem) == HImode)
20257 fn = gen_load_lockedhi_si;
20258 else
20259 fn = gen_load_lockedsi;
20260 break;
20261 case DImode:
20262 fn = gen_load_lockeddi;
20263 break;
20264 case TImode:
20265 fn = gen_load_lockedti;
20266 break;
20267 default:
20268 gcc_unreachable ();
20269 }
20270 emit_insn (fn (reg, mem));
20271 }
20272
20273 /* A subroutine of the atomic operation splitters. Emit a store-conditional
20274 instruction in MODE. */
20275
20276 static void
20277 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
20278 {
20279 rtx (*fn) (rtx, rtx, rtx) = NULL;
20280
20281 switch (mode)
20282 {
20283 case QImode:
20284 fn = gen_store_conditionalqi;
20285 break;
20286 case HImode:
20287 fn = gen_store_conditionalhi;
20288 break;
20289 case SImode:
20290 fn = gen_store_conditionalsi;
20291 break;
20292 case DImode:
20293 fn = gen_store_conditionaldi;
20294 break;
20295 case TImode:
20296 fn = gen_store_conditionalti;
20297 break;
20298 default:
20299 gcc_unreachable ();
20300 }
20301
20302 /* Emit sync before stwcx. to address PPC405 Erratum. */
20303 if (PPC405_ERRATUM77)
20304 emit_insn (gen_hwsync ());
20305
20306 emit_insn (fn (res, mem, val));
20307 }
20308
20309 /* Expand barriers before and after a load_locked/store_cond sequence. */
20310
20311 static rtx
20312 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
20313 {
20314 rtx addr = XEXP (mem, 0);
20315 int strict_p = (reload_in_progress || reload_completed);
20316
20317 if (!legitimate_indirect_address_p (addr, strict_p)
20318 && !legitimate_indexed_address_p (addr, strict_p))
20319 {
20320 addr = force_reg (Pmode, addr);
20321 mem = replace_equiv_address_nv (mem, addr);
20322 }
20323
20324 switch (model)
20325 {
20326 case MEMMODEL_RELAXED:
20327 case MEMMODEL_CONSUME:
20328 case MEMMODEL_ACQUIRE:
20329 break;
20330 case MEMMODEL_RELEASE:
20331 case MEMMODEL_ACQ_REL:
20332 emit_insn (gen_lwsync ());
20333 break;
20334 case MEMMODEL_SEQ_CST:
20335 emit_insn (gen_hwsync ());
20336 break;
20337 default:
20338 gcc_unreachable ();
20339 }
20340 return mem;
20341 }
20342
20343 static void
20344 rs6000_post_atomic_barrier (enum memmodel model)
20345 {
20346 switch (model)
20347 {
20348 case MEMMODEL_RELAXED:
20349 case MEMMODEL_CONSUME:
20350 case MEMMODEL_RELEASE:
20351 break;
20352 case MEMMODEL_ACQUIRE:
20353 case MEMMODEL_ACQ_REL:
20354 case MEMMODEL_SEQ_CST:
20355 emit_insn (gen_isync ());
20356 break;
20357 default:
20358 gcc_unreachable ();
20359 }
20360 }
20361
20362 /* A subroutine of the various atomic expanders. For sub-word operations,
20363 we must adjust things to operate on SImode. Given the original MEM,
20364 return a new aligned memory. Also build and return the quantities by
20365 which to shift and mask. */
20366
20367 static rtx
20368 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
20369 {
20370 rtx addr, align, shift, mask, mem;
20371 HOST_WIDE_INT shift_mask;
20372 machine_mode mode = GET_MODE (orig_mem);
20373
20374 /* For smaller modes, we have to implement this via SImode. */
20375 shift_mask = (mode == QImode ? 0x18 : 0x10);
20376
20377 addr = XEXP (orig_mem, 0);
20378 addr = force_reg (GET_MODE (addr), addr);
20379
20380 /* Aligned memory containing subword. Generate a new memory. We
20381 do not want any of the existing MEM_ATTR data, as we're now
20382 accessing memory outside the original object. */
20383 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
20384 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20385 mem = gen_rtx_MEM (SImode, align);
20386 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
20387 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
20388 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
20389
20390 /* Shift amount for subword relative to aligned word. */
20391 shift = gen_reg_rtx (SImode);
20392 addr = gen_lowpart (SImode, addr);
20393 emit_insn (gen_rlwinm (shift, addr, GEN_INT (3), GEN_INT (shift_mask)));
20394 if (BYTES_BIG_ENDIAN)
20395 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
20396 shift, 1, OPTAB_LIB_WIDEN);
20397 *pshift = shift;
20398
20399 /* Mask for insertion. */
20400 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
20401 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
20402 *pmask = mask;
20403
20404 return mem;
20405 }
20406
20407 /* A subroutine of the various atomic expanders. For sub-word operands,
20408 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
20409
20410 static rtx
20411 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
20412 {
20413 rtx x;
20414
20415 x = gen_reg_rtx (SImode);
20416 emit_insn (gen_rtx_SET (VOIDmode, x,
20417 gen_rtx_AND (SImode,
20418 gen_rtx_NOT (SImode, mask),
20419 oldval)));
20420
20421 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
20422
20423 return x;
20424 }
20425
20426 /* A subroutine of the various atomic expanders. For sub-word operands,
20427 extract WIDE to NARROW via SHIFT. */
20428
20429 static void
20430 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
20431 {
20432 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
20433 wide, 1, OPTAB_LIB_WIDEN);
20434 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
20435 }
20436
20437 /* Expand an atomic compare and swap operation. */
20438
20439 void
20440 rs6000_expand_atomic_compare_and_swap (rtx operands[])
20441 {
20442 rtx boolval, retval, mem, oldval, newval, cond;
20443 rtx label1, label2, x, mask, shift;
20444 machine_mode mode, orig_mode;
20445 enum memmodel mod_s, mod_f;
20446 bool is_weak;
20447
20448 boolval = operands[0];
20449 retval = operands[1];
20450 mem = operands[2];
20451 oldval = operands[3];
20452 newval = operands[4];
20453 is_weak = (INTVAL (operands[5]) != 0);
20454 mod_s = (enum memmodel) INTVAL (operands[6]);
20455 mod_f = (enum memmodel) INTVAL (operands[7]);
20456 orig_mode = mode = GET_MODE (mem);
20457
20458 mask = shift = NULL_RTX;
20459 if (mode == QImode || mode == HImode)
20460 {
20461 /* Before power8, we didn't have access to lbarx/lharx, so generate a
20462 lwarx and shift/mask operations. With power8, we need to do the
20463 comparison in SImode, but the store is still done in QI/HImode. */
20464 oldval = convert_modes (SImode, mode, oldval, 1);
20465
20466 if (!TARGET_SYNC_HI_QI)
20467 {
20468 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
20469
20470 /* Shift and mask OLDVAL into position with the word. */
20471 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
20472 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20473
20474 /* Shift and mask NEWVAL into position within the word. */
20475 newval = convert_modes (SImode, mode, newval, 1);
20476 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
20477 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20478 }
20479
20480 /* Prepare to adjust the return value. */
20481 retval = gen_reg_rtx (SImode);
20482 mode = SImode;
20483 }
20484 else if (reg_overlap_mentioned_p (retval, oldval))
20485 oldval = copy_to_reg (oldval);
20486
20487 mem = rs6000_pre_atomic_barrier (mem, mod_s);
20488
20489 label1 = NULL_RTX;
20490 if (!is_weak)
20491 {
20492 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
20493 emit_label (XEXP (label1, 0));
20494 }
20495 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
20496
20497 emit_load_locked (mode, retval, mem);
20498
20499 x = retval;
20500 if (mask)
20501 {
20502 x = expand_simple_binop (SImode, AND, retval, mask,
20503 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20504 }
20505
20506 cond = gen_reg_rtx (CCmode);
20507 /* If we have TImode, synthesize a comparison. */
20508 if (mode != TImode)
20509 x = gen_rtx_COMPARE (CCmode, x, oldval);
20510 else
20511 {
20512 rtx xor1_result = gen_reg_rtx (DImode);
20513 rtx xor2_result = gen_reg_rtx (DImode);
20514 rtx or_result = gen_reg_rtx (DImode);
20515 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
20516 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
20517 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
20518 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
20519
20520 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
20521 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
20522 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
20523 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
20524 }
20525
20526 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
20527
20528 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20529 emit_unlikely_jump (x, label2);
20530
20531 x = newval;
20532 if (mask)
20533 x = rs6000_mask_atomic_subword (retval, newval, mask);
20534
20535 emit_store_conditional (orig_mode, cond, mem, x);
20536
20537 if (!is_weak)
20538 {
20539 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20540 emit_unlikely_jump (x, label1);
20541 }
20542
20543 if (mod_f != MEMMODEL_RELAXED)
20544 emit_label (XEXP (label2, 0));
20545
20546 rs6000_post_atomic_barrier (mod_s);
20547
20548 if (mod_f == MEMMODEL_RELAXED)
20549 emit_label (XEXP (label2, 0));
20550
20551 if (shift)
20552 rs6000_finish_atomic_subword (operands[1], retval, shift);
20553 else if (mode != GET_MODE (operands[1]))
20554 convert_move (operands[1], retval, 1);
20555
20556 /* In all cases, CR0 contains EQ on success, and NE on failure. */
20557 x = gen_rtx_EQ (SImode, cond, const0_rtx);
20558 emit_insn (gen_rtx_SET (VOIDmode, boolval, x));
20559 }
20560
20561 /* Expand an atomic exchange operation. */
20562
20563 void
20564 rs6000_expand_atomic_exchange (rtx operands[])
20565 {
20566 rtx retval, mem, val, cond;
20567 machine_mode mode;
20568 enum memmodel model;
20569 rtx label, x, mask, shift;
20570
20571 retval = operands[0];
20572 mem = operands[1];
20573 val = operands[2];
20574 model = (enum memmodel) INTVAL (operands[3]);
20575 mode = GET_MODE (mem);
20576
20577 mask = shift = NULL_RTX;
20578 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
20579 {
20580 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
20581
20582 /* Shift and mask VAL into position with the word. */
20583 val = convert_modes (SImode, mode, val, 1);
20584 val = expand_simple_binop (SImode, ASHIFT, val, shift,
20585 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20586
20587 /* Prepare to adjust the return value. */
20588 retval = gen_reg_rtx (SImode);
20589 mode = SImode;
20590 }
20591
20592 mem = rs6000_pre_atomic_barrier (mem, model);
20593
20594 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
20595 emit_label (XEXP (label, 0));
20596
20597 emit_load_locked (mode, retval, mem);
20598
20599 x = val;
20600 if (mask)
20601 x = rs6000_mask_atomic_subword (retval, val, mask);
20602
20603 cond = gen_reg_rtx (CCmode);
20604 emit_store_conditional (mode, cond, mem, x);
20605
20606 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20607 emit_unlikely_jump (x, label);
20608
20609 rs6000_post_atomic_barrier (model);
20610
20611 if (shift)
20612 rs6000_finish_atomic_subword (operands[0], retval, shift);
20613 }
20614
20615 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
20616 to perform. MEM is the memory on which to operate. VAL is the second
20617 operand of the binary operator. BEFORE and AFTER are optional locations to
20618 return the value of MEM either before of after the operation. MODEL_RTX
20619 is a CONST_INT containing the memory model to use. */
20620
20621 void
20622 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
20623 rtx orig_before, rtx orig_after, rtx model_rtx)
20624 {
20625 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
20626 machine_mode mode = GET_MODE (mem);
20627 machine_mode store_mode = mode;
20628 rtx label, x, cond, mask, shift;
20629 rtx before = orig_before, after = orig_after;
20630
20631 mask = shift = NULL_RTX;
20632 /* On power8, we want to use SImode for the operation. On previous systems,
20633 use the operation in a subword and shift/mask to get the proper byte or
20634 halfword. */
20635 if (mode == QImode || mode == HImode)
20636 {
20637 if (TARGET_SYNC_HI_QI)
20638 {
20639 val = convert_modes (SImode, mode, val, 1);
20640
20641 /* Prepare to adjust the return value. */
20642 before = gen_reg_rtx (SImode);
20643 if (after)
20644 after = gen_reg_rtx (SImode);
20645 mode = SImode;
20646 }
20647 else
20648 {
20649 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
20650
20651 /* Shift and mask VAL into position with the word. */
20652 val = convert_modes (SImode, mode, val, 1);
20653 val = expand_simple_binop (SImode, ASHIFT, val, shift,
20654 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20655
20656 switch (code)
20657 {
20658 case IOR:
20659 case XOR:
20660 /* We've already zero-extended VAL. That is sufficient to
20661 make certain that it does not affect other bits. */
20662 mask = NULL;
20663 break;
20664
20665 case AND:
20666 /* If we make certain that all of the other bits in VAL are
20667 set, that will be sufficient to not affect other bits. */
20668 x = gen_rtx_NOT (SImode, mask);
20669 x = gen_rtx_IOR (SImode, x, val);
20670 emit_insn (gen_rtx_SET (VOIDmode, val, x));
20671 mask = NULL;
20672 break;
20673
20674 case NOT:
20675 case PLUS:
20676 case MINUS:
20677 /* These will all affect bits outside the field and need
20678 adjustment via MASK within the loop. */
20679 break;
20680
20681 default:
20682 gcc_unreachable ();
20683 }
20684
20685 /* Prepare to adjust the return value. */
20686 before = gen_reg_rtx (SImode);
20687 if (after)
20688 after = gen_reg_rtx (SImode);
20689 store_mode = mode = SImode;
20690 }
20691 }
20692
20693 mem = rs6000_pre_atomic_barrier (mem, model);
20694
20695 label = gen_label_rtx ();
20696 emit_label (label);
20697 label = gen_rtx_LABEL_REF (VOIDmode, label);
20698
20699 if (before == NULL_RTX)
20700 before = gen_reg_rtx (mode);
20701
20702 emit_load_locked (mode, before, mem);
20703
20704 if (code == NOT)
20705 {
20706 x = expand_simple_binop (mode, AND, before, val,
20707 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20708 after = expand_simple_unop (mode, NOT, x, after, 1);
20709 }
20710 else
20711 {
20712 after = expand_simple_binop (mode, code, before, val,
20713 after, 1, OPTAB_LIB_WIDEN);
20714 }
20715
20716 x = after;
20717 if (mask)
20718 {
20719 x = expand_simple_binop (SImode, AND, after, mask,
20720 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20721 x = rs6000_mask_atomic_subword (before, x, mask);
20722 }
20723 else if (store_mode != mode)
20724 x = convert_modes (store_mode, mode, x, 1);
20725
20726 cond = gen_reg_rtx (CCmode);
20727 emit_store_conditional (store_mode, cond, mem, x);
20728
20729 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20730 emit_unlikely_jump (x, label);
20731
20732 rs6000_post_atomic_barrier (model);
20733
20734 if (shift)
20735 {
20736 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
20737 then do the calcuations in a SImode register. */
20738 if (orig_before)
20739 rs6000_finish_atomic_subword (orig_before, before, shift);
20740 if (orig_after)
20741 rs6000_finish_atomic_subword (orig_after, after, shift);
20742 }
20743 else if (store_mode != mode)
20744 {
20745 /* QImode/HImode on machines with lbarx/lharx where we do the native
20746 operation and then do the calcuations in a SImode register. */
20747 if (orig_before)
20748 convert_move (orig_before, before, 1);
20749 if (orig_after)
20750 convert_move (orig_after, after, 1);
20751 }
20752 else if (orig_after && after != orig_after)
20753 emit_move_insn (orig_after, after);
20754 }
20755
20756 /* Emit instructions to move SRC to DST. Called by splitters for
20757 multi-register moves. It will emit at most one instruction for
20758 each register that is accessed; that is, it won't emit li/lis pairs
20759 (or equivalent for 64-bit code). One of SRC or DST must be a hard
20760 register. */
20761
20762 void
20763 rs6000_split_multireg_move (rtx dst, rtx src)
20764 {
20765 /* The register number of the first register being moved. */
20766 int reg;
20767 /* The mode that is to be moved. */
20768 machine_mode mode;
20769 /* The mode that the move is being done in, and its size. */
20770 machine_mode reg_mode;
20771 int reg_mode_size;
20772 /* The number of registers that will be moved. */
20773 int nregs;
20774
20775 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
20776 mode = GET_MODE (dst);
20777 nregs = hard_regno_nregs[reg][mode];
20778 if (FP_REGNO_P (reg))
20779 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
20780 ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
20781 else if (ALTIVEC_REGNO_P (reg))
20782 reg_mode = V16QImode;
20783 else if (TARGET_E500_DOUBLE && mode == TFmode)
20784 reg_mode = DFmode;
20785 else
20786 reg_mode = word_mode;
20787 reg_mode_size = GET_MODE_SIZE (reg_mode);
20788
20789 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
20790
20791 /* TDmode residing in FP registers is special, since the ISA requires that
20792 the lower-numbered word of a register pair is always the most significant
20793 word, even in little-endian mode. This does not match the usual subreg
20794 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
20795 the appropriate constituent registers "by hand" in little-endian mode.
20796
20797 Note we do not need to check for destructive overlap here since TDmode
20798 can only reside in even/odd register pairs. */
20799 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
20800 {
20801 rtx p_src, p_dst;
20802 int i;
20803
20804 for (i = 0; i < nregs; i++)
20805 {
20806 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
20807 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
20808 else
20809 p_src = simplify_gen_subreg (reg_mode, src, mode,
20810 i * reg_mode_size);
20811
20812 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
20813 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
20814 else
20815 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
20816 i * reg_mode_size);
20817
20818 emit_insn (gen_rtx_SET (VOIDmode, p_dst, p_src));
20819 }
20820
20821 return;
20822 }
20823
20824 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
20825 {
20826 /* Move register range backwards, if we might have destructive
20827 overlap. */
20828 int i;
20829 for (i = nregs - 1; i >= 0; i--)
20830 emit_insn (gen_rtx_SET (VOIDmode,
20831 simplify_gen_subreg (reg_mode, dst, mode,
20832 i * reg_mode_size),
20833 simplify_gen_subreg (reg_mode, src, mode,
20834 i * reg_mode_size)));
20835 }
20836 else
20837 {
20838 int i;
20839 int j = -1;
20840 bool used_update = false;
20841 rtx restore_basereg = NULL_RTX;
20842
20843 if (MEM_P (src) && INT_REGNO_P (reg))
20844 {
20845 rtx breg;
20846
20847 if (GET_CODE (XEXP (src, 0)) == PRE_INC
20848 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
20849 {
20850 rtx delta_rtx;
20851 breg = XEXP (XEXP (src, 0), 0);
20852 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
20853 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
20854 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
20855 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
20856 src = replace_equiv_address (src, breg);
20857 }
20858 else if (! rs6000_offsettable_memref_p (src, reg_mode))
20859 {
20860 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
20861 {
20862 rtx basereg = XEXP (XEXP (src, 0), 0);
20863 if (TARGET_UPDATE)
20864 {
20865 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
20866 emit_insn (gen_rtx_SET (VOIDmode, ndst,
20867 gen_rtx_MEM (reg_mode, XEXP (src, 0))));
20868 used_update = true;
20869 }
20870 else
20871 emit_insn (gen_rtx_SET (VOIDmode, basereg,
20872 XEXP (XEXP (src, 0), 1)));
20873 src = replace_equiv_address (src, basereg);
20874 }
20875 else
20876 {
20877 rtx basereg = gen_rtx_REG (Pmode, reg);
20878 emit_insn (gen_rtx_SET (VOIDmode, basereg, XEXP (src, 0)));
20879 src = replace_equiv_address (src, basereg);
20880 }
20881 }
20882
20883 breg = XEXP (src, 0);
20884 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
20885 breg = XEXP (breg, 0);
20886
20887 /* If the base register we are using to address memory is
20888 also a destination reg, then change that register last. */
20889 if (REG_P (breg)
20890 && REGNO (breg) >= REGNO (dst)
20891 && REGNO (breg) < REGNO (dst) + nregs)
20892 j = REGNO (breg) - REGNO (dst);
20893 }
20894 else if (MEM_P (dst) && INT_REGNO_P (reg))
20895 {
20896 rtx breg;
20897
20898 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
20899 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
20900 {
20901 rtx delta_rtx;
20902 breg = XEXP (XEXP (dst, 0), 0);
20903 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
20904 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
20905 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
20906
20907 /* We have to update the breg before doing the store.
20908 Use store with update, if available. */
20909
20910 if (TARGET_UPDATE)
20911 {
20912 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
20913 emit_insn (TARGET_32BIT
20914 ? (TARGET_POWERPC64
20915 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
20916 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
20917 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
20918 used_update = true;
20919 }
20920 else
20921 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
20922 dst = replace_equiv_address (dst, breg);
20923 }
20924 else if (!rs6000_offsettable_memref_p (dst, reg_mode)
20925 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
20926 {
20927 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
20928 {
20929 rtx basereg = XEXP (XEXP (dst, 0), 0);
20930 if (TARGET_UPDATE)
20931 {
20932 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
20933 emit_insn (gen_rtx_SET (VOIDmode,
20934 gen_rtx_MEM (reg_mode, XEXP (dst, 0)), nsrc));
20935 used_update = true;
20936 }
20937 else
20938 emit_insn (gen_rtx_SET (VOIDmode, basereg,
20939 XEXP (XEXP (dst, 0), 1)));
20940 dst = replace_equiv_address (dst, basereg);
20941 }
20942 else
20943 {
20944 rtx basereg = XEXP (XEXP (dst, 0), 0);
20945 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
20946 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
20947 && REG_P (basereg)
20948 && REG_P (offsetreg)
20949 && REGNO (basereg) != REGNO (offsetreg));
20950 if (REGNO (basereg) == 0)
20951 {
20952 rtx tmp = offsetreg;
20953 offsetreg = basereg;
20954 basereg = tmp;
20955 }
20956 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
20957 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
20958 dst = replace_equiv_address (dst, basereg);
20959 }
20960 }
20961 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
20962 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
20963 }
20964
20965 for (i = 0; i < nregs; i++)
20966 {
20967 /* Calculate index to next subword. */
20968 ++j;
20969 if (j == nregs)
20970 j = 0;
20971
20972 /* If compiler already emitted move of first word by
20973 store with update, no need to do anything. */
20974 if (j == 0 && used_update)
20975 continue;
20976
20977 emit_insn (gen_rtx_SET (VOIDmode,
20978 simplify_gen_subreg (reg_mode, dst, mode,
20979 j * reg_mode_size),
20980 simplify_gen_subreg (reg_mode, src, mode,
20981 j * reg_mode_size)));
20982 }
20983 if (restore_basereg != NULL_RTX)
20984 emit_insn (restore_basereg);
20985 }
20986 }
20987
20988 \f
20989 /* This page contains routines that are used to determine what the
20990 function prologue and epilogue code will do and write them out. */
20991
20992 static inline bool
20993 save_reg_p (int r)
20994 {
20995 return !call_used_regs[r] && df_regs_ever_live_p (r);
20996 }
20997
20998 /* Return the first fixed-point register that is required to be
20999 saved. 32 if none. */
21000
21001 int
21002 first_reg_to_save (void)
21003 {
21004 int first_reg;
21005
21006 /* Find lowest numbered live register. */
21007 for (first_reg = 13; first_reg <= 31; first_reg++)
21008 if (save_reg_p (first_reg))
21009 break;
21010
21011 if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM
21012 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
21013 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
21014 || (TARGET_TOC && TARGET_MINIMAL_TOC))
21015 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
21016 first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM;
21017
21018 #if TARGET_MACHO
21019 if (flag_pic
21020 && crtl->uses_pic_offset_table
21021 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
21022 return RS6000_PIC_OFFSET_TABLE_REGNUM;
21023 #endif
21024
21025 return first_reg;
21026 }
21027
21028 /* Similar, for FP regs. */
21029
21030 int
21031 first_fp_reg_to_save (void)
21032 {
21033 int first_reg;
21034
21035 /* Find lowest numbered live register. */
21036 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
21037 if (save_reg_p (first_reg))
21038 break;
21039
21040 return first_reg;
21041 }
21042
21043 /* Similar, for AltiVec regs. */
21044
21045 static int
21046 first_altivec_reg_to_save (void)
21047 {
21048 int i;
21049
21050 /* Stack frame remains as is unless we are in AltiVec ABI. */
21051 if (! TARGET_ALTIVEC_ABI)
21052 return LAST_ALTIVEC_REGNO + 1;
21053
21054 /* On Darwin, the unwind routines are compiled without
21055 TARGET_ALTIVEC, and use save_world to save/restore the
21056 altivec registers when necessary. */
21057 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
21058 && ! TARGET_ALTIVEC)
21059 return FIRST_ALTIVEC_REGNO + 20;
21060
21061 /* Find lowest numbered live register. */
21062 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
21063 if (save_reg_p (i))
21064 break;
21065
21066 return i;
21067 }
21068
21069 /* Return a 32-bit mask of the AltiVec registers we need to set in
21070 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
21071 the 32-bit word is 0. */
21072
21073 static unsigned int
21074 compute_vrsave_mask (void)
21075 {
21076 unsigned int i, mask = 0;
21077
21078 /* On Darwin, the unwind routines are compiled without
21079 TARGET_ALTIVEC, and use save_world to save/restore the
21080 call-saved altivec registers when necessary. */
21081 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
21082 && ! TARGET_ALTIVEC)
21083 mask |= 0xFFF;
21084
21085 /* First, find out if we use _any_ altivec registers. */
21086 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
21087 if (df_regs_ever_live_p (i))
21088 mask |= ALTIVEC_REG_BIT (i);
21089
21090 if (mask == 0)
21091 return mask;
21092
21093 /* Next, remove the argument registers from the set. These must
21094 be in the VRSAVE mask set by the caller, so we don't need to add
21095 them in again. More importantly, the mask we compute here is
21096 used to generate CLOBBERs in the set_vrsave insn, and we do not
21097 wish the argument registers to die. */
21098 for (i = crtl->args.info.vregno - 1; i >= ALTIVEC_ARG_MIN_REG; --i)
21099 mask &= ~ALTIVEC_REG_BIT (i);
21100
21101 /* Similarly, remove the return value from the set. */
21102 {
21103 bool yes = false;
21104 diddle_return_value (is_altivec_return_reg, &yes);
21105 if (yes)
21106 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
21107 }
21108
21109 return mask;
21110 }
21111
21112 /* For a very restricted set of circumstances, we can cut down the
21113 size of prologues/epilogues by calling our own save/restore-the-world
21114 routines. */
21115
21116 static void
21117 compute_save_world_info (rs6000_stack_t *info_ptr)
21118 {
21119 info_ptr->world_save_p = 1;
21120 info_ptr->world_save_p
21121 = (WORLD_SAVE_P (info_ptr)
21122 && DEFAULT_ABI == ABI_DARWIN
21123 && !cfun->has_nonlocal_label
21124 && info_ptr->first_fp_reg_save == FIRST_SAVED_FP_REGNO
21125 && info_ptr->first_gp_reg_save == FIRST_SAVED_GP_REGNO
21126 && info_ptr->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
21127 && info_ptr->cr_save_p);
21128
21129 /* This will not work in conjunction with sibcalls. Make sure there
21130 are none. (This check is expensive, but seldom executed.) */
21131 if (WORLD_SAVE_P (info_ptr))
21132 {
21133 rtx_insn *insn;
21134 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
21135 if (CALL_P (insn) && SIBLING_CALL_P (insn))
21136 {
21137 info_ptr->world_save_p = 0;
21138 break;
21139 }
21140 }
21141
21142 if (WORLD_SAVE_P (info_ptr))
21143 {
21144 /* Even if we're not touching VRsave, make sure there's room on the
21145 stack for it, if it looks like we're calling SAVE_WORLD, which
21146 will attempt to save it. */
21147 info_ptr->vrsave_size = 4;
21148
21149 /* If we are going to save the world, we need to save the link register too. */
21150 info_ptr->lr_save_p = 1;
21151
21152 /* "Save" the VRsave register too if we're saving the world. */
21153 if (info_ptr->vrsave_mask == 0)
21154 info_ptr->vrsave_mask = compute_vrsave_mask ();
21155
21156 /* Because the Darwin register save/restore routines only handle
21157 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
21158 check. */
21159 gcc_assert (info_ptr->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
21160 && (info_ptr->first_altivec_reg_save
21161 >= FIRST_SAVED_ALTIVEC_REGNO));
21162 }
21163 return;
21164 }
21165
21166
21167 static void
21168 is_altivec_return_reg (rtx reg, void *xyes)
21169 {
21170 bool *yes = (bool *) xyes;
21171 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
21172 *yes = true;
21173 }
21174
21175 \f
21176 /* Look for user-defined global regs in the range FIRST to LAST-1.
21177 We should not restore these, and so cannot use lmw or out-of-line
21178 restore functions if there are any. We also can't save them
21179 (well, emit frame notes for them), because frame unwinding during
21180 exception handling will restore saved registers. */
21181
21182 static bool
21183 global_regs_p (unsigned first, unsigned last)
21184 {
21185 while (first < last)
21186 if (global_regs[first++])
21187 return true;
21188 return false;
21189 }
21190
21191 /* Determine the strategy for savings/restoring registers. */
21192
21193 enum {
21194 SAVRES_MULTIPLE = 0x1,
21195 SAVE_INLINE_FPRS = 0x2,
21196 SAVE_INLINE_GPRS = 0x4,
21197 REST_INLINE_FPRS = 0x8,
21198 REST_INLINE_GPRS = 0x10,
21199 SAVE_NOINLINE_GPRS_SAVES_LR = 0x20,
21200 SAVE_NOINLINE_FPRS_SAVES_LR = 0x40,
21201 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x80,
21202 SAVE_INLINE_VRS = 0x100,
21203 REST_INLINE_VRS = 0x200
21204 };
21205
21206 static int
21207 rs6000_savres_strategy (rs6000_stack_t *info,
21208 bool using_static_chain_p)
21209 {
21210 int strategy = 0;
21211 bool lr_save_p;
21212
21213 if (TARGET_MULTIPLE
21214 && !TARGET_POWERPC64
21215 && !(TARGET_SPE_ABI && info->spe_64bit_regs_used)
21216 && info->first_gp_reg_save < 31
21217 && !global_regs_p (info->first_gp_reg_save, 32))
21218 strategy |= SAVRES_MULTIPLE;
21219
21220 if (crtl->calls_eh_return
21221 || cfun->machine->ra_need_lr)
21222 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
21223 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
21224 | SAVE_INLINE_VRS | REST_INLINE_VRS);
21225
21226 if (info->first_fp_reg_save == 64
21227 /* The out-of-line FP routines use double-precision stores;
21228 we can't use those routines if we don't have such stores. */
21229 || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT)
21230 || global_regs_p (info->first_fp_reg_save, 64))
21231 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21232
21233 if (info->first_gp_reg_save == 32
21234 || (!(strategy & SAVRES_MULTIPLE)
21235 && global_regs_p (info->first_gp_reg_save, 32)))
21236 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21237
21238 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
21239 || global_regs_p (info->first_altivec_reg_save, LAST_ALTIVEC_REGNO + 1))
21240 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21241
21242 /* Define cutoff for using out-of-line functions to save registers. */
21243 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
21244 {
21245 if (!optimize_size)
21246 {
21247 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21248 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21249 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21250 }
21251 else
21252 {
21253 /* Prefer out-of-line restore if it will exit. */
21254 if (info->first_fp_reg_save > 61)
21255 strategy |= SAVE_INLINE_FPRS;
21256 if (info->first_gp_reg_save > 29)
21257 {
21258 if (info->first_fp_reg_save == 64)
21259 strategy |= SAVE_INLINE_GPRS;
21260 else
21261 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21262 }
21263 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
21264 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21265 }
21266 }
21267 else if (DEFAULT_ABI == ABI_DARWIN)
21268 {
21269 if (info->first_fp_reg_save > 60)
21270 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21271 if (info->first_gp_reg_save > 29)
21272 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21273 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21274 }
21275 else
21276 {
21277 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
21278 if (info->first_fp_reg_save > 61)
21279 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21280 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21281 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21282 }
21283
21284 /* Don't bother to try to save things out-of-line if r11 is occupied
21285 by the static chain. It would require too much fiddling and the
21286 static chain is rarely used anyway. FPRs are saved w.r.t the stack
21287 pointer on Darwin, and AIX uses r1 or r12. */
21288 if (using_static_chain_p
21289 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
21290 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
21291 | SAVE_INLINE_GPRS
21292 | SAVE_INLINE_VRS | REST_INLINE_VRS);
21293
21294 /* We can only use the out-of-line routines to restore if we've
21295 saved all the registers from first_fp_reg_save in the prologue.
21296 Otherwise, we risk loading garbage. */
21297 if ((strategy & (SAVE_INLINE_FPRS | REST_INLINE_FPRS)) == SAVE_INLINE_FPRS)
21298 {
21299 int i;
21300
21301 for (i = info->first_fp_reg_save; i < 64; i++)
21302 if (!save_reg_p (i))
21303 {
21304 strategy |= REST_INLINE_FPRS;
21305 break;
21306 }
21307 }
21308
21309 /* If we are going to use store multiple, then don't even bother
21310 with the out-of-line routines, since the store-multiple
21311 instruction will always be smaller. */
21312 if ((strategy & SAVRES_MULTIPLE))
21313 strategy |= SAVE_INLINE_GPRS;
21314
21315 /* info->lr_save_p isn't yet set if the only reason lr needs to be
21316 saved is an out-of-line save or restore. Set up the value for
21317 the next test (excluding out-of-line gpr restore). */
21318 lr_save_p = (info->lr_save_p
21319 || !(strategy & SAVE_INLINE_GPRS)
21320 || !(strategy & SAVE_INLINE_FPRS)
21321 || !(strategy & SAVE_INLINE_VRS)
21322 || !(strategy & REST_INLINE_FPRS)
21323 || !(strategy & REST_INLINE_VRS));
21324
21325 /* The situation is more complicated with load multiple. We'd
21326 prefer to use the out-of-line routines for restores, since the
21327 "exit" out-of-line routines can handle the restore of LR and the
21328 frame teardown. However if doesn't make sense to use the
21329 out-of-line routine if that is the only reason we'd need to save
21330 LR, and we can't use the "exit" out-of-line gpr restore if we
21331 have saved some fprs; In those cases it is advantageous to use
21332 load multiple when available. */
21333 if ((strategy & SAVRES_MULTIPLE)
21334 && (!lr_save_p
21335 || info->first_fp_reg_save != 64))
21336 strategy |= REST_INLINE_GPRS;
21337
21338 /* Saving CR interferes with the exit routines used on the SPE, so
21339 just punt here. */
21340 if (TARGET_SPE_ABI
21341 && info->spe_64bit_regs_used
21342 && info->cr_save_p)
21343 strategy |= REST_INLINE_GPRS;
21344
21345 /* We can only use load multiple or the out-of-line routines to
21346 restore if we've used store multiple or out-of-line routines
21347 in the prologue, i.e. if we've saved all the registers from
21348 first_gp_reg_save. Otherwise, we risk loading garbage. */
21349 if ((strategy & (SAVE_INLINE_GPRS | REST_INLINE_GPRS | SAVRES_MULTIPLE))
21350 == SAVE_INLINE_GPRS)
21351 {
21352 int i;
21353
21354 for (i = info->first_gp_reg_save; i < 32; i++)
21355 if (!save_reg_p (i))
21356 {
21357 strategy |= REST_INLINE_GPRS;
21358 break;
21359 }
21360 }
21361
21362 if (TARGET_ELF && TARGET_64BIT)
21363 {
21364 if (!(strategy & SAVE_INLINE_FPRS))
21365 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
21366 else if (!(strategy & SAVE_INLINE_GPRS)
21367 && info->first_fp_reg_save == 64)
21368 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
21369 }
21370 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
21371 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
21372
21373 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
21374 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
21375
21376 return strategy;
21377 }
21378
21379 /* Calculate the stack information for the current function. This is
21380 complicated by having two separate calling sequences, the AIX calling
21381 sequence and the V.4 calling sequence.
21382
21383 AIX (and Darwin/Mac OS X) stack frames look like:
21384 32-bit 64-bit
21385 SP----> +---------------------------------------+
21386 | back chain to caller | 0 0
21387 +---------------------------------------+
21388 | saved CR | 4 8 (8-11)
21389 +---------------------------------------+
21390 | saved LR | 8 16
21391 +---------------------------------------+
21392 | reserved for compilers | 12 24
21393 +---------------------------------------+
21394 | reserved for binders | 16 32
21395 +---------------------------------------+
21396 | saved TOC pointer | 20 40
21397 +---------------------------------------+
21398 | Parameter save area (P) | 24 48
21399 +---------------------------------------+
21400 | Alloca space (A) | 24+P etc.
21401 +---------------------------------------+
21402 | Local variable space (L) | 24+P+A
21403 +---------------------------------------+
21404 | Float/int conversion temporary (X) | 24+P+A+L
21405 +---------------------------------------+
21406 | Save area for AltiVec registers (W) | 24+P+A+L+X
21407 +---------------------------------------+
21408 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
21409 +---------------------------------------+
21410 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
21411 +---------------------------------------+
21412 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
21413 +---------------------------------------+
21414 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
21415 +---------------------------------------+
21416 old SP->| back chain to caller's caller |
21417 +---------------------------------------+
21418
21419 The required alignment for AIX configurations is two words (i.e., 8
21420 or 16 bytes).
21421
21422 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
21423
21424 SP----> +---------------------------------------+
21425 | Back chain to caller | 0
21426 +---------------------------------------+
21427 | Save area for CR | 8
21428 +---------------------------------------+
21429 | Saved LR | 16
21430 +---------------------------------------+
21431 | Saved TOC pointer | 24
21432 +---------------------------------------+
21433 | Parameter save area (P) | 32
21434 +---------------------------------------+
21435 | Alloca space (A) | 32+P
21436 +---------------------------------------+
21437 | Local variable space (L) | 32+P+A
21438 +---------------------------------------+
21439 | Save area for AltiVec registers (W) | 32+P+A+L
21440 +---------------------------------------+
21441 | AltiVec alignment padding (Y) | 32+P+A+L+W
21442 +---------------------------------------+
21443 | Save area for GP registers (G) | 32+P+A+L+W+Y
21444 +---------------------------------------+
21445 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
21446 +---------------------------------------+
21447 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
21448 +---------------------------------------+
21449
21450
21451 V.4 stack frames look like:
21452
21453 SP----> +---------------------------------------+
21454 | back chain to caller | 0
21455 +---------------------------------------+
21456 | caller's saved LR | 4
21457 +---------------------------------------+
21458 | Parameter save area (P) | 8
21459 +---------------------------------------+
21460 | Alloca space (A) | 8+P
21461 +---------------------------------------+
21462 | Varargs save area (V) | 8+P+A
21463 +---------------------------------------+
21464 | Local variable space (L) | 8+P+A+V
21465 +---------------------------------------+
21466 | Float/int conversion temporary (X) | 8+P+A+V+L
21467 +---------------------------------------+
21468 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
21469 +---------------------------------------+
21470 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
21471 +---------------------------------------+
21472 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
21473 +---------------------------------------+
21474 | SPE: area for 64-bit GP registers |
21475 +---------------------------------------+
21476 | SPE alignment padding |
21477 +---------------------------------------+
21478 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
21479 +---------------------------------------+
21480 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
21481 +---------------------------------------+
21482 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
21483 +---------------------------------------+
21484 old SP->| back chain to caller's caller |
21485 +---------------------------------------+
21486
21487 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
21488 given. (But note below and in sysv4.h that we require only 8 and
21489 may round up the size of our stack frame anyways. The historical
21490 reason is early versions of powerpc-linux which didn't properly
21491 align the stack at program startup. A happy side-effect is that
21492 -mno-eabi libraries can be used with -meabi programs.)
21493
21494 The EABI configuration defaults to the V.4 layout. However,
21495 the stack alignment requirements may differ. If -mno-eabi is not
21496 given, the required stack alignment is 8 bytes; if -mno-eabi is
21497 given, the required alignment is 16 bytes. (But see V.4 comment
21498 above.) */
21499
21500 #ifndef ABI_STACK_BOUNDARY
21501 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
21502 #endif
21503
21504 static rs6000_stack_t *
21505 rs6000_stack_info (void)
21506 {
21507 rs6000_stack_t *info_ptr = &stack_info;
21508 int reg_size = TARGET_32BIT ? 4 : 8;
21509 int ehrd_size;
21510 int ehcr_size;
21511 int save_align;
21512 int first_gp;
21513 HOST_WIDE_INT non_fixed_size;
21514 bool using_static_chain_p;
21515
21516 if (reload_completed && info_ptr->reload_completed)
21517 return info_ptr;
21518
21519 memset (info_ptr, 0, sizeof (*info_ptr));
21520 info_ptr->reload_completed = reload_completed;
21521
21522 if (TARGET_SPE)
21523 {
21524 /* Cache value so we don't rescan instruction chain over and over. */
21525 if (cfun->machine->insn_chain_scanned_p == 0)
21526 cfun->machine->insn_chain_scanned_p
21527 = spe_func_has_64bit_regs_p () + 1;
21528 info_ptr->spe_64bit_regs_used = cfun->machine->insn_chain_scanned_p - 1;
21529 }
21530
21531 /* Select which calling sequence. */
21532 info_ptr->abi = DEFAULT_ABI;
21533
21534 /* Calculate which registers need to be saved & save area size. */
21535 info_ptr->first_gp_reg_save = first_reg_to_save ();
21536 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
21537 even if it currently looks like we won't. Reload may need it to
21538 get at a constant; if so, it will have already created a constant
21539 pool entry for it. */
21540 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
21541 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
21542 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
21543 && crtl->uses_const_pool
21544 && info_ptr->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
21545 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
21546 else
21547 first_gp = info_ptr->first_gp_reg_save;
21548
21549 info_ptr->gp_size = reg_size * (32 - first_gp);
21550
21551 /* For the SPE, we have an additional upper 32-bits on each GPR.
21552 Ideally we should save the entire 64-bits only when the upper
21553 half is used in SIMD instructions. Since we only record
21554 registers live (not the size they are used in), this proves
21555 difficult because we'd have to traverse the instruction chain at
21556 the right time, taking reload into account. This is a real pain,
21557 so we opt to save the GPRs in 64-bits always if but one register
21558 gets used in 64-bits. Otherwise, all the registers in the frame
21559 get saved in 32-bits.
21560
21561 So... since when we save all GPRs (except the SP) in 64-bits, the
21562 traditional GP save area will be empty. */
21563 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
21564 info_ptr->gp_size = 0;
21565
21566 info_ptr->first_fp_reg_save = first_fp_reg_to_save ();
21567 info_ptr->fp_size = 8 * (64 - info_ptr->first_fp_reg_save);
21568
21569 info_ptr->first_altivec_reg_save = first_altivec_reg_to_save ();
21570 info_ptr->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
21571 - info_ptr->first_altivec_reg_save);
21572
21573 /* Does this function call anything? */
21574 info_ptr->calls_p = (! crtl->is_leaf
21575 || cfun->machine->ra_needs_full_frame);
21576
21577 /* Determine if we need to save the condition code registers. */
21578 if (df_regs_ever_live_p (CR2_REGNO)
21579 || df_regs_ever_live_p (CR3_REGNO)
21580 || df_regs_ever_live_p (CR4_REGNO))
21581 {
21582 info_ptr->cr_save_p = 1;
21583 if (DEFAULT_ABI == ABI_V4)
21584 info_ptr->cr_size = reg_size;
21585 }
21586
21587 /* If the current function calls __builtin_eh_return, then we need
21588 to allocate stack space for registers that will hold data for
21589 the exception handler. */
21590 if (crtl->calls_eh_return)
21591 {
21592 unsigned int i;
21593 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
21594 continue;
21595
21596 /* SPE saves EH registers in 64-bits. */
21597 ehrd_size = i * (TARGET_SPE_ABI
21598 && info_ptr->spe_64bit_regs_used != 0
21599 ? UNITS_PER_SPE_WORD : UNITS_PER_WORD);
21600 }
21601 else
21602 ehrd_size = 0;
21603
21604 /* In the ELFv2 ABI, we also need to allocate space for separate
21605 CR field save areas if the function calls __builtin_eh_return. */
21606 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
21607 {
21608 /* This hard-codes that we have three call-saved CR fields. */
21609 ehcr_size = 3 * reg_size;
21610 /* We do *not* use the regular CR save mechanism. */
21611 info_ptr->cr_save_p = 0;
21612 }
21613 else
21614 ehcr_size = 0;
21615
21616 /* Determine various sizes. */
21617 info_ptr->reg_size = reg_size;
21618 info_ptr->fixed_size = RS6000_SAVE_AREA;
21619 info_ptr->vars_size = RS6000_ALIGN (get_frame_size (), 8);
21620 info_ptr->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
21621 TARGET_ALTIVEC ? 16 : 8);
21622 if (FRAME_GROWS_DOWNWARD)
21623 info_ptr->vars_size
21624 += RS6000_ALIGN (info_ptr->fixed_size + info_ptr->vars_size
21625 + info_ptr->parm_size,
21626 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
21627 - (info_ptr->fixed_size + info_ptr->vars_size
21628 + info_ptr->parm_size);
21629
21630 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
21631 info_ptr->spe_gp_size = 8 * (32 - first_gp);
21632 else
21633 info_ptr->spe_gp_size = 0;
21634
21635 if (TARGET_ALTIVEC_ABI)
21636 info_ptr->vrsave_mask = compute_vrsave_mask ();
21637 else
21638 info_ptr->vrsave_mask = 0;
21639
21640 if (TARGET_ALTIVEC_VRSAVE && info_ptr->vrsave_mask)
21641 info_ptr->vrsave_size = 4;
21642 else
21643 info_ptr->vrsave_size = 0;
21644
21645 compute_save_world_info (info_ptr);
21646
21647 /* Calculate the offsets. */
21648 switch (DEFAULT_ABI)
21649 {
21650 case ABI_NONE:
21651 default:
21652 gcc_unreachable ();
21653
21654 case ABI_AIX:
21655 case ABI_ELFv2:
21656 case ABI_DARWIN:
21657 info_ptr->fp_save_offset = - info_ptr->fp_size;
21658 info_ptr->gp_save_offset = info_ptr->fp_save_offset - info_ptr->gp_size;
21659
21660 if (TARGET_ALTIVEC_ABI)
21661 {
21662 info_ptr->vrsave_save_offset
21663 = info_ptr->gp_save_offset - info_ptr->vrsave_size;
21664
21665 /* Align stack so vector save area is on a quadword boundary.
21666 The padding goes above the vectors. */
21667 if (info_ptr->altivec_size != 0)
21668 info_ptr->altivec_padding_size
21669 = info_ptr->vrsave_save_offset & 0xF;
21670 else
21671 info_ptr->altivec_padding_size = 0;
21672
21673 info_ptr->altivec_save_offset
21674 = info_ptr->vrsave_save_offset
21675 - info_ptr->altivec_padding_size
21676 - info_ptr->altivec_size;
21677 gcc_assert (info_ptr->altivec_size == 0
21678 || info_ptr->altivec_save_offset % 16 == 0);
21679
21680 /* Adjust for AltiVec case. */
21681 info_ptr->ehrd_offset = info_ptr->altivec_save_offset - ehrd_size;
21682 }
21683 else
21684 info_ptr->ehrd_offset = info_ptr->gp_save_offset - ehrd_size;
21685
21686 info_ptr->ehcr_offset = info_ptr->ehrd_offset - ehcr_size;
21687 info_ptr->cr_save_offset = reg_size; /* first word when 64-bit. */
21688 info_ptr->lr_save_offset = 2*reg_size;
21689 break;
21690
21691 case ABI_V4:
21692 info_ptr->fp_save_offset = - info_ptr->fp_size;
21693 info_ptr->gp_save_offset = info_ptr->fp_save_offset - info_ptr->gp_size;
21694 info_ptr->cr_save_offset = info_ptr->gp_save_offset - info_ptr->cr_size;
21695
21696 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
21697 {
21698 /* Align stack so SPE GPR save area is aligned on a
21699 double-word boundary. */
21700 if (info_ptr->spe_gp_size != 0 && info_ptr->cr_save_offset != 0)
21701 info_ptr->spe_padding_size
21702 = 8 - (-info_ptr->cr_save_offset % 8);
21703 else
21704 info_ptr->spe_padding_size = 0;
21705
21706 info_ptr->spe_gp_save_offset
21707 = info_ptr->cr_save_offset
21708 - info_ptr->spe_padding_size
21709 - info_ptr->spe_gp_size;
21710
21711 /* Adjust for SPE case. */
21712 info_ptr->ehrd_offset = info_ptr->spe_gp_save_offset;
21713 }
21714 else if (TARGET_ALTIVEC_ABI)
21715 {
21716 info_ptr->vrsave_save_offset
21717 = info_ptr->cr_save_offset - info_ptr->vrsave_size;
21718
21719 /* Align stack so vector save area is on a quadword boundary. */
21720 if (info_ptr->altivec_size != 0)
21721 info_ptr->altivec_padding_size
21722 = 16 - (-info_ptr->vrsave_save_offset % 16);
21723 else
21724 info_ptr->altivec_padding_size = 0;
21725
21726 info_ptr->altivec_save_offset
21727 = info_ptr->vrsave_save_offset
21728 - info_ptr->altivec_padding_size
21729 - info_ptr->altivec_size;
21730
21731 /* Adjust for AltiVec case. */
21732 info_ptr->ehrd_offset = info_ptr->altivec_save_offset;
21733 }
21734 else
21735 info_ptr->ehrd_offset = info_ptr->cr_save_offset;
21736 info_ptr->ehrd_offset -= ehrd_size;
21737 info_ptr->lr_save_offset = reg_size;
21738 break;
21739 }
21740
21741 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
21742 info_ptr->save_size = RS6000_ALIGN (info_ptr->fp_size
21743 + info_ptr->gp_size
21744 + info_ptr->altivec_size
21745 + info_ptr->altivec_padding_size
21746 + info_ptr->spe_gp_size
21747 + info_ptr->spe_padding_size
21748 + ehrd_size
21749 + ehcr_size
21750 + info_ptr->cr_size
21751 + info_ptr->vrsave_size,
21752 save_align);
21753
21754 non_fixed_size = (info_ptr->vars_size
21755 + info_ptr->parm_size
21756 + info_ptr->save_size);
21757
21758 info_ptr->total_size = RS6000_ALIGN (non_fixed_size + info_ptr->fixed_size,
21759 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
21760
21761 /* Determine if we need to save the link register. */
21762 if (info_ptr->calls_p
21763 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
21764 && crtl->profile
21765 && !TARGET_PROFILE_KERNEL)
21766 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
21767 #ifdef TARGET_RELOCATABLE
21768 || (TARGET_RELOCATABLE && (get_pool_size () != 0))
21769 #endif
21770 || rs6000_ra_ever_killed ())
21771 info_ptr->lr_save_p = 1;
21772
21773 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
21774 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
21775 && call_used_regs[STATIC_CHAIN_REGNUM]);
21776 info_ptr->savres_strategy = rs6000_savres_strategy (info_ptr,
21777 using_static_chain_p);
21778
21779 if (!(info_ptr->savres_strategy & SAVE_INLINE_GPRS)
21780 || !(info_ptr->savres_strategy & SAVE_INLINE_FPRS)
21781 || !(info_ptr->savres_strategy & SAVE_INLINE_VRS)
21782 || !(info_ptr->savres_strategy & REST_INLINE_GPRS)
21783 || !(info_ptr->savres_strategy & REST_INLINE_FPRS)
21784 || !(info_ptr->savres_strategy & REST_INLINE_VRS))
21785 info_ptr->lr_save_p = 1;
21786
21787 if (info_ptr->lr_save_p)
21788 df_set_regs_ever_live (LR_REGNO, true);
21789
21790 /* Determine if we need to allocate any stack frame:
21791
21792 For AIX we need to push the stack if a frame pointer is needed
21793 (because the stack might be dynamically adjusted), if we are
21794 debugging, if we make calls, or if the sum of fp_save, gp_save,
21795 and local variables are more than the space needed to save all
21796 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
21797 + 18*8 = 288 (GPR13 reserved).
21798
21799 For V.4 we don't have the stack cushion that AIX uses, but assume
21800 that the debugger can handle stackless frames. */
21801
21802 if (info_ptr->calls_p)
21803 info_ptr->push_p = 1;
21804
21805 else if (DEFAULT_ABI == ABI_V4)
21806 info_ptr->push_p = non_fixed_size != 0;
21807
21808 else if (frame_pointer_needed)
21809 info_ptr->push_p = 1;
21810
21811 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
21812 info_ptr->push_p = 1;
21813
21814 else
21815 info_ptr->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
21816
21817 /* Zero offsets if we're not saving those registers. */
21818 if (info_ptr->fp_size == 0)
21819 info_ptr->fp_save_offset = 0;
21820
21821 if (info_ptr->gp_size == 0)
21822 info_ptr->gp_save_offset = 0;
21823
21824 if (! TARGET_ALTIVEC_ABI || info_ptr->altivec_size == 0)
21825 info_ptr->altivec_save_offset = 0;
21826
21827 /* Zero VRSAVE offset if not saved and restored. */
21828 if (! TARGET_ALTIVEC_VRSAVE || info_ptr->vrsave_mask == 0)
21829 info_ptr->vrsave_save_offset = 0;
21830
21831 if (! TARGET_SPE_ABI
21832 || info_ptr->spe_64bit_regs_used == 0
21833 || info_ptr->spe_gp_size == 0)
21834 info_ptr->spe_gp_save_offset = 0;
21835
21836 if (! info_ptr->lr_save_p)
21837 info_ptr->lr_save_offset = 0;
21838
21839 if (! info_ptr->cr_save_p)
21840 info_ptr->cr_save_offset = 0;
21841
21842 return info_ptr;
21843 }
21844
21845 /* Return true if the current function uses any GPRs in 64-bit SIMD
21846 mode. */
21847
21848 static bool
21849 spe_func_has_64bit_regs_p (void)
21850 {
21851 rtx_insn *insns, *insn;
21852
21853 /* Functions that save and restore all the call-saved registers will
21854 need to save/restore the registers in 64-bits. */
21855 if (crtl->calls_eh_return
21856 || cfun->calls_setjmp
21857 || crtl->has_nonlocal_goto)
21858 return true;
21859
21860 insns = get_insns ();
21861
21862 for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn))
21863 {
21864 if (INSN_P (insn))
21865 {
21866 rtx i;
21867
21868 /* FIXME: This should be implemented with attributes...
21869
21870 (set_attr "spe64" "true")....then,
21871 if (get_spe64(insn)) return true;
21872
21873 It's the only reliable way to do the stuff below. */
21874
21875 i = PATTERN (insn);
21876 if (GET_CODE (i) == SET)
21877 {
21878 machine_mode mode = GET_MODE (SET_SRC (i));
21879
21880 if (SPE_VECTOR_MODE (mode))
21881 return true;
21882 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode))
21883 return true;
21884 }
21885 }
21886 }
21887
21888 return false;
21889 }
21890
21891 static void
21892 debug_stack_info (rs6000_stack_t *info)
21893 {
21894 const char *abi_string;
21895
21896 if (! info)
21897 info = rs6000_stack_info ();
21898
21899 fprintf (stderr, "\nStack information for function %s:\n",
21900 ((current_function_decl && DECL_NAME (current_function_decl))
21901 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
21902 : "<unknown>"));
21903
21904 switch (info->abi)
21905 {
21906 default: abi_string = "Unknown"; break;
21907 case ABI_NONE: abi_string = "NONE"; break;
21908 case ABI_AIX: abi_string = "AIX"; break;
21909 case ABI_ELFv2: abi_string = "ELFv2"; break;
21910 case ABI_DARWIN: abi_string = "Darwin"; break;
21911 case ABI_V4: abi_string = "V.4"; break;
21912 }
21913
21914 fprintf (stderr, "\tABI = %5s\n", abi_string);
21915
21916 if (TARGET_ALTIVEC_ABI)
21917 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
21918
21919 if (TARGET_SPE_ABI)
21920 fprintf (stderr, "\tSPE ABI extensions enabled.\n");
21921
21922 if (info->first_gp_reg_save != 32)
21923 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
21924
21925 if (info->first_fp_reg_save != 64)
21926 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
21927
21928 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
21929 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
21930 info->first_altivec_reg_save);
21931
21932 if (info->lr_save_p)
21933 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
21934
21935 if (info->cr_save_p)
21936 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
21937
21938 if (info->vrsave_mask)
21939 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
21940
21941 if (info->push_p)
21942 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
21943
21944 if (info->calls_p)
21945 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
21946
21947 if (info->gp_save_offset)
21948 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
21949
21950 if (info->fp_save_offset)
21951 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
21952
21953 if (info->altivec_save_offset)
21954 fprintf (stderr, "\taltivec_save_offset = %5d\n",
21955 info->altivec_save_offset);
21956
21957 if (info->spe_gp_save_offset)
21958 fprintf (stderr, "\tspe_gp_save_offset = %5d\n",
21959 info->spe_gp_save_offset);
21960
21961 if (info->vrsave_save_offset)
21962 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
21963 info->vrsave_save_offset);
21964
21965 if (info->lr_save_offset)
21966 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
21967
21968 if (info->cr_save_offset)
21969 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
21970
21971 if (info->varargs_save_offset)
21972 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
21973
21974 if (info->total_size)
21975 fprintf (stderr, "\ttotal_size = "HOST_WIDE_INT_PRINT_DEC"\n",
21976 info->total_size);
21977
21978 if (info->vars_size)
21979 fprintf (stderr, "\tvars_size = "HOST_WIDE_INT_PRINT_DEC"\n",
21980 info->vars_size);
21981
21982 if (info->parm_size)
21983 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
21984
21985 if (info->fixed_size)
21986 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
21987
21988 if (info->gp_size)
21989 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
21990
21991 if (info->spe_gp_size)
21992 fprintf (stderr, "\tspe_gp_size = %5d\n", info->spe_gp_size);
21993
21994 if (info->fp_size)
21995 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
21996
21997 if (info->altivec_size)
21998 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
21999
22000 if (info->vrsave_size)
22001 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
22002
22003 if (info->altivec_padding_size)
22004 fprintf (stderr, "\taltivec_padding_size= %5d\n",
22005 info->altivec_padding_size);
22006
22007 if (info->spe_padding_size)
22008 fprintf (stderr, "\tspe_padding_size = %5d\n",
22009 info->spe_padding_size);
22010
22011 if (info->cr_size)
22012 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
22013
22014 if (info->save_size)
22015 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
22016
22017 if (info->reg_size != 4)
22018 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
22019
22020 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
22021
22022 fprintf (stderr, "\n");
22023 }
22024
22025 rtx
22026 rs6000_return_addr (int count, rtx frame)
22027 {
22028 /* Currently we don't optimize very well between prolog and body
22029 code and for PIC code the code can be actually quite bad, so
22030 don't try to be too clever here. */
22031 if (count != 0
22032 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
22033 {
22034 cfun->machine->ra_needs_full_frame = 1;
22035
22036 return
22037 gen_rtx_MEM
22038 (Pmode,
22039 memory_address
22040 (Pmode,
22041 plus_constant (Pmode,
22042 copy_to_reg
22043 (gen_rtx_MEM (Pmode,
22044 memory_address (Pmode, frame))),
22045 RETURN_ADDRESS_OFFSET)));
22046 }
22047
22048 cfun->machine->ra_need_lr = 1;
22049 return get_hard_reg_initial_val (Pmode, LR_REGNO);
22050 }
22051
22052 /* Say whether a function is a candidate for sibcall handling or not. */
22053
22054 static bool
22055 rs6000_function_ok_for_sibcall (tree decl, tree exp)
22056 {
22057 tree fntype;
22058
22059 if (decl)
22060 fntype = TREE_TYPE (decl);
22061 else
22062 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
22063
22064 /* We can't do it if the called function has more vector parameters
22065 than the current function; there's nowhere to put the VRsave code. */
22066 if (TARGET_ALTIVEC_ABI
22067 && TARGET_ALTIVEC_VRSAVE
22068 && !(decl && decl == current_function_decl))
22069 {
22070 function_args_iterator args_iter;
22071 tree type;
22072 int nvreg = 0;
22073
22074 /* Functions with vector parameters are required to have a
22075 prototype, so the argument type info must be available
22076 here. */
22077 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
22078 if (TREE_CODE (type) == VECTOR_TYPE
22079 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
22080 nvreg++;
22081
22082 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
22083 if (TREE_CODE (type) == VECTOR_TYPE
22084 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
22085 nvreg--;
22086
22087 if (nvreg > 0)
22088 return false;
22089 }
22090
22091 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
22092 functions, because the callee may have a different TOC pointer to
22093 the caller and there's no way to ensure we restore the TOC when
22094 we return. With the secure-plt SYSV ABI we can't make non-local
22095 calls when -fpic/PIC because the plt call stubs use r30. */
22096 if (DEFAULT_ABI == ABI_DARWIN
22097 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
22098 && decl
22099 && !DECL_EXTERNAL (decl)
22100 && (*targetm.binds_local_p) (decl))
22101 || (DEFAULT_ABI == ABI_V4
22102 && (!TARGET_SECURE_PLT
22103 || !flag_pic
22104 || (decl
22105 && (*targetm.binds_local_p) (decl)))))
22106 {
22107 tree attr_list = TYPE_ATTRIBUTES (fntype);
22108
22109 if (!lookup_attribute ("longcall", attr_list)
22110 || lookup_attribute ("shortcall", attr_list))
22111 return true;
22112 }
22113
22114 return false;
22115 }
22116
22117 static int
22118 rs6000_ra_ever_killed (void)
22119 {
22120 rtx_insn *top;
22121 rtx reg;
22122 rtx_insn *insn;
22123
22124 if (cfun->is_thunk)
22125 return 0;
22126
22127 if (cfun->machine->lr_save_state)
22128 return cfun->machine->lr_save_state - 1;
22129
22130 /* regs_ever_live has LR marked as used if any sibcalls are present,
22131 but this should not force saving and restoring in the
22132 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
22133 clobbers LR, so that is inappropriate. */
22134
22135 /* Also, the prologue can generate a store into LR that
22136 doesn't really count, like this:
22137
22138 move LR->R0
22139 bcl to set PIC register
22140 move LR->R31
22141 move R0->LR
22142
22143 When we're called from the epilogue, we need to avoid counting
22144 this as a store. */
22145
22146 push_topmost_sequence ();
22147 top = get_insns ();
22148 pop_topmost_sequence ();
22149 reg = gen_rtx_REG (Pmode, LR_REGNO);
22150
22151 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
22152 {
22153 if (INSN_P (insn))
22154 {
22155 if (CALL_P (insn))
22156 {
22157 if (!SIBLING_CALL_P (insn))
22158 return 1;
22159 }
22160 else if (find_regno_note (insn, REG_INC, LR_REGNO))
22161 return 1;
22162 else if (set_of (reg, insn) != NULL_RTX
22163 && !prologue_epilogue_contains (insn))
22164 return 1;
22165 }
22166 }
22167 return 0;
22168 }
22169 \f
22170 /* Emit instructions needed to load the TOC register.
22171 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
22172 a constant pool; or for SVR4 -fpic. */
22173
22174 void
22175 rs6000_emit_load_toc_table (int fromprolog)
22176 {
22177 rtx dest;
22178 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
22179
22180 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
22181 {
22182 char buf[30];
22183 rtx lab, tmp1, tmp2, got;
22184
22185 lab = gen_label_rtx ();
22186 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
22187 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22188 if (flag_pic == 2)
22189 got = gen_rtx_SYMBOL_REF (Pmode, toc_label_name);
22190 else
22191 got = rs6000_got_sym ();
22192 tmp1 = tmp2 = dest;
22193 if (!fromprolog)
22194 {
22195 tmp1 = gen_reg_rtx (Pmode);
22196 tmp2 = gen_reg_rtx (Pmode);
22197 }
22198 emit_insn (gen_load_toc_v4_PIC_1 (lab));
22199 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
22200 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
22201 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
22202 }
22203 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
22204 {
22205 emit_insn (gen_load_toc_v4_pic_si ());
22206 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
22207 }
22208 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
22209 {
22210 char buf[30];
22211 rtx temp0 = (fromprolog
22212 ? gen_rtx_REG (Pmode, 0)
22213 : gen_reg_rtx (Pmode));
22214
22215 if (fromprolog)
22216 {
22217 rtx symF, symL;
22218
22219 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
22220 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22221
22222 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
22223 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22224
22225 emit_insn (gen_load_toc_v4_PIC_1 (symF));
22226 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
22227 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
22228 }
22229 else
22230 {
22231 rtx tocsym, lab;
22232
22233 tocsym = gen_rtx_SYMBOL_REF (Pmode, toc_label_name);
22234 lab = gen_label_rtx ();
22235 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
22236 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
22237 if (TARGET_LINK_STACK)
22238 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
22239 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
22240 }
22241 emit_insn (gen_addsi3 (dest, temp0, dest));
22242 }
22243 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
22244 {
22245 /* This is for AIX code running in non-PIC ELF32. */
22246 char buf[30];
22247 rtx realsym;
22248 ASM_GENERATE_INTERNAL_LABEL (buf, "LCTOC", 1);
22249 realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22250
22251 emit_insn (gen_elf_high (dest, realsym));
22252 emit_insn (gen_elf_low (dest, dest, realsym));
22253 }
22254 else
22255 {
22256 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
22257
22258 if (TARGET_32BIT)
22259 emit_insn (gen_load_toc_aix_si (dest));
22260 else
22261 emit_insn (gen_load_toc_aix_di (dest));
22262 }
22263 }
22264
22265 /* Emit instructions to restore the link register after determining where
22266 its value has been stored. */
22267
22268 void
22269 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
22270 {
22271 rs6000_stack_t *info = rs6000_stack_info ();
22272 rtx operands[2];
22273
22274 operands[0] = source;
22275 operands[1] = scratch;
22276
22277 if (info->lr_save_p)
22278 {
22279 rtx frame_rtx = stack_pointer_rtx;
22280 HOST_WIDE_INT sp_offset = 0;
22281 rtx tmp;
22282
22283 if (frame_pointer_needed
22284 || cfun->calls_alloca
22285 || info->total_size > 32767)
22286 {
22287 tmp = gen_frame_mem (Pmode, frame_rtx);
22288 emit_move_insn (operands[1], tmp);
22289 frame_rtx = operands[1];
22290 }
22291 else if (info->push_p)
22292 sp_offset = info->total_size;
22293
22294 tmp = plus_constant (Pmode, frame_rtx,
22295 info->lr_save_offset + sp_offset);
22296 tmp = gen_frame_mem (Pmode, tmp);
22297 emit_move_insn (tmp, operands[0]);
22298 }
22299 else
22300 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
22301
22302 /* Freeze lr_save_p. We've just emitted rtl that depends on the
22303 state of lr_save_p so any change from here on would be a bug. In
22304 particular, stop rs6000_ra_ever_killed from considering the SET
22305 of lr we may have added just above. */
22306 cfun->machine->lr_save_state = info->lr_save_p + 1;
22307 }
22308
22309 static GTY(()) alias_set_type set = -1;
22310
22311 alias_set_type
22312 get_TOC_alias_set (void)
22313 {
22314 if (set == -1)
22315 set = new_alias_set ();
22316 return set;
22317 }
22318
22319 /* This returns nonzero if the current function uses the TOC. This is
22320 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
22321 is generated by the ABI_V4 load_toc_* patterns. */
22322 #if TARGET_ELF
22323 static int
22324 uses_TOC (void)
22325 {
22326 rtx_insn *insn;
22327
22328 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22329 if (INSN_P (insn))
22330 {
22331 rtx pat = PATTERN (insn);
22332 int i;
22333
22334 if (GET_CODE (pat) == PARALLEL)
22335 for (i = 0; i < XVECLEN (pat, 0); i++)
22336 {
22337 rtx sub = XVECEXP (pat, 0, i);
22338 if (GET_CODE (sub) == USE)
22339 {
22340 sub = XEXP (sub, 0);
22341 if (GET_CODE (sub) == UNSPEC
22342 && XINT (sub, 1) == UNSPEC_TOC)
22343 return 1;
22344 }
22345 }
22346 }
22347 return 0;
22348 }
22349 #endif
22350
22351 rtx
22352 create_TOC_reference (rtx symbol, rtx largetoc_reg)
22353 {
22354 rtx tocrel, tocreg, hi;
22355
22356 if (TARGET_DEBUG_ADDR)
22357 {
22358 if (GET_CODE (symbol) == SYMBOL_REF)
22359 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
22360 XSTR (symbol, 0));
22361 else
22362 {
22363 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
22364 GET_RTX_NAME (GET_CODE (symbol)));
22365 debug_rtx (symbol);
22366 }
22367 }
22368
22369 if (!can_create_pseudo_p ())
22370 df_set_regs_ever_live (TOC_REGISTER, true);
22371
22372 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
22373 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
22374 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
22375 return tocrel;
22376
22377 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
22378 if (largetoc_reg != NULL)
22379 {
22380 emit_move_insn (largetoc_reg, hi);
22381 hi = largetoc_reg;
22382 }
22383 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
22384 }
22385
22386 /* Issue assembly directives that create a reference to the given DWARF
22387 FRAME_TABLE_LABEL from the current function section. */
22388 void
22389 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
22390 {
22391 fprintf (asm_out_file, "\t.ref %s\n",
22392 (* targetm.strip_name_encoding) (frame_table_label));
22393 }
22394 \f
22395 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
22396 and the change to the stack pointer. */
22397
22398 static void
22399 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
22400 {
22401 rtvec p;
22402 int i;
22403 rtx regs[3];
22404
22405 i = 0;
22406 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
22407 if (hard_frame_needed)
22408 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
22409 if (!(REGNO (fp) == STACK_POINTER_REGNUM
22410 || (hard_frame_needed
22411 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
22412 regs[i++] = fp;
22413
22414 p = rtvec_alloc (i);
22415 while (--i >= 0)
22416 {
22417 rtx mem = gen_frame_mem (BLKmode, regs[i]);
22418 RTVEC_ELT (p, i) = gen_rtx_SET (VOIDmode, mem, const0_rtx);
22419 }
22420
22421 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
22422 }
22423
22424 /* Emit the correct code for allocating stack space, as insns.
22425 If COPY_REG, make sure a copy of the old frame is left there.
22426 The generated code may use hard register 0 as a temporary. */
22427
22428 static void
22429 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
22430 {
22431 rtx_insn *insn;
22432 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
22433 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
22434 rtx todec = gen_int_mode (-size, Pmode);
22435 rtx par, set, mem;
22436
22437 if (INTVAL (todec) != -size)
22438 {
22439 warning (0, "stack frame too large");
22440 emit_insn (gen_trap ());
22441 return;
22442 }
22443
22444 if (crtl->limit_stack)
22445 {
22446 if (REG_P (stack_limit_rtx)
22447 && REGNO (stack_limit_rtx) > 1
22448 && REGNO (stack_limit_rtx) <= 31)
22449 {
22450 emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size)));
22451 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
22452 const0_rtx));
22453 }
22454 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
22455 && TARGET_32BIT
22456 && DEFAULT_ABI == ABI_V4)
22457 {
22458 rtx toload = gen_rtx_CONST (VOIDmode,
22459 gen_rtx_PLUS (Pmode,
22460 stack_limit_rtx,
22461 GEN_INT (size)));
22462
22463 emit_insn (gen_elf_high (tmp_reg, toload));
22464 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
22465 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
22466 const0_rtx));
22467 }
22468 else
22469 warning (0, "stack limit expression is not supported");
22470 }
22471
22472 if (copy_reg)
22473 {
22474 if (copy_off != 0)
22475 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
22476 else
22477 emit_move_insn (copy_reg, stack_reg);
22478 }
22479
22480 if (size > 32767)
22481 {
22482 /* Need a note here so that try_split doesn't get confused. */
22483 if (get_last_insn () == NULL_RTX)
22484 emit_note (NOTE_INSN_DELETED);
22485 insn = emit_move_insn (tmp_reg, todec);
22486 try_split (PATTERN (insn), insn, 0);
22487 todec = tmp_reg;
22488 }
22489
22490 insn = emit_insn (TARGET_32BIT
22491 ? gen_movsi_update_stack (stack_reg, stack_reg,
22492 todec, stack_reg)
22493 : gen_movdi_di_update_stack (stack_reg, stack_reg,
22494 todec, stack_reg));
22495 /* Since we didn't use gen_frame_mem to generate the MEM, grab
22496 it now and set the alias set/attributes. The above gen_*_update
22497 calls will generate a PARALLEL with the MEM set being the first
22498 operation. */
22499 par = PATTERN (insn);
22500 gcc_assert (GET_CODE (par) == PARALLEL);
22501 set = XVECEXP (par, 0, 0);
22502 gcc_assert (GET_CODE (set) == SET);
22503 mem = SET_DEST (set);
22504 gcc_assert (MEM_P (mem));
22505 MEM_NOTRAP_P (mem) = 1;
22506 set_mem_alias_set (mem, get_frame_alias_set ());
22507
22508 RTX_FRAME_RELATED_P (insn) = 1;
22509 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
22510 gen_rtx_SET (VOIDmode, stack_reg,
22511 gen_rtx_PLUS (Pmode, stack_reg,
22512 GEN_INT (-size))));
22513 }
22514
22515 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
22516
22517 #if PROBE_INTERVAL > 32768
22518 #error Cannot use indexed addressing mode for stack probing
22519 #endif
22520
22521 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
22522 inclusive. These are offsets from the current stack pointer. */
22523
22524 static void
22525 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
22526 {
22527 /* See if we have a constant small number of probes to generate. If so,
22528 that's the easy case. */
22529 if (first + size <= 32768)
22530 {
22531 HOST_WIDE_INT i;
22532
22533 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
22534 it exceeds SIZE. If only one probe is needed, this will not
22535 generate any code. Then probe at FIRST + SIZE. */
22536 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
22537 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
22538 -(first + i)));
22539
22540 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
22541 -(first + size)));
22542 }
22543
22544 /* Otherwise, do the same as above, but in a loop. Note that we must be
22545 extra careful with variables wrapping around because we might be at
22546 the very top (or the very bottom) of the address space and we have
22547 to be able to handle this case properly; in particular, we use an
22548 equality test for the loop condition. */
22549 else
22550 {
22551 HOST_WIDE_INT rounded_size;
22552 rtx r12 = gen_rtx_REG (Pmode, 12);
22553 rtx r0 = gen_rtx_REG (Pmode, 0);
22554
22555 /* Sanity check for the addressing mode we're going to use. */
22556 gcc_assert (first <= 32768);
22557
22558 /* Step 1: round SIZE to the previous multiple of the interval. */
22559
22560 rounded_size = size & -PROBE_INTERVAL;
22561
22562
22563 /* Step 2: compute initial and final value of the loop counter. */
22564
22565 /* TEST_ADDR = SP + FIRST. */
22566 emit_insn (gen_rtx_SET (VOIDmode, r12,
22567 plus_constant (Pmode, stack_pointer_rtx,
22568 -first)));
22569
22570 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
22571 if (rounded_size > 32768)
22572 {
22573 emit_move_insn (r0, GEN_INT (-rounded_size));
22574 emit_insn (gen_rtx_SET (VOIDmode, r0,
22575 gen_rtx_PLUS (Pmode, r12, r0)));
22576 }
22577 else
22578 emit_insn (gen_rtx_SET (VOIDmode, r0,
22579 plus_constant (Pmode, r12, -rounded_size)));
22580
22581
22582 /* Step 3: the loop
22583
22584 while (TEST_ADDR != LAST_ADDR)
22585 {
22586 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
22587 probe at TEST_ADDR
22588 }
22589
22590 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
22591 until it is equal to ROUNDED_SIZE. */
22592
22593 if (TARGET_64BIT)
22594 emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
22595 else
22596 emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
22597
22598
22599 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
22600 that SIZE is equal to ROUNDED_SIZE. */
22601
22602 if (size != rounded_size)
22603 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
22604 }
22605 }
22606
22607 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
22608 absolute addresses. */
22609
22610 const char *
22611 output_probe_stack_range (rtx reg1, rtx reg2)
22612 {
22613 static int labelno = 0;
22614 char loop_lab[32], end_lab[32];
22615 rtx xops[2];
22616
22617 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
22618 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
22619
22620 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
22621
22622 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
22623 xops[0] = reg1;
22624 xops[1] = reg2;
22625 if (TARGET_64BIT)
22626 output_asm_insn ("cmpd 0,%0,%1", xops);
22627 else
22628 output_asm_insn ("cmpw 0,%0,%1", xops);
22629
22630 fputs ("\tbeq 0,", asm_out_file);
22631 assemble_name_raw (asm_out_file, end_lab);
22632 fputc ('\n', asm_out_file);
22633
22634 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
22635 xops[1] = GEN_INT (-PROBE_INTERVAL);
22636 output_asm_insn ("addi %0,%0,%1", xops);
22637
22638 /* Probe at TEST_ADDR and branch. */
22639 xops[1] = gen_rtx_REG (Pmode, 0);
22640 output_asm_insn ("stw %1,0(%0)", xops);
22641 fprintf (asm_out_file, "\tb ");
22642 assemble_name_raw (asm_out_file, loop_lab);
22643 fputc ('\n', asm_out_file);
22644
22645 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
22646
22647 return "";
22648 }
22649
22650 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
22651 with (plus:P (reg 1) VAL), and with REG2 replaced with RREG if REG2
22652 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
22653 deduce these equivalences by itself so it wasn't necessary to hold
22654 its hand so much. Don't be tempted to always supply d2_f_d_e with
22655 the actual cfa register, ie. r31 when we are using a hard frame
22656 pointer. That fails when saving regs off r1, and sched moves the
22657 r31 setup past the reg saves. */
22658
22659 static rtx
22660 rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
22661 rtx reg2, rtx rreg, rtx split_reg)
22662 {
22663 rtx real, temp;
22664
22665 if (REGNO (reg) == STACK_POINTER_REGNUM && reg2 == NULL_RTX)
22666 {
22667 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
22668 int i;
22669
22670 gcc_checking_assert (val == 0);
22671 real = PATTERN (insn);
22672 if (GET_CODE (real) == PARALLEL)
22673 for (i = 0; i < XVECLEN (real, 0); i++)
22674 if (GET_CODE (XVECEXP (real, 0, i)) == SET)
22675 {
22676 rtx set = XVECEXP (real, 0, i);
22677
22678 RTX_FRAME_RELATED_P (set) = 1;
22679 }
22680 RTX_FRAME_RELATED_P (insn) = 1;
22681 return insn;
22682 }
22683
22684 /* copy_rtx will not make unique copies of registers, so we need to
22685 ensure we don't have unwanted sharing here. */
22686 if (reg == reg2)
22687 reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
22688
22689 if (reg == rreg)
22690 reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
22691
22692 real = copy_rtx (PATTERN (insn));
22693
22694 if (reg2 != NULL_RTX)
22695 real = replace_rtx (real, reg2, rreg);
22696
22697 if (REGNO (reg) == STACK_POINTER_REGNUM)
22698 gcc_checking_assert (val == 0);
22699 else
22700 real = replace_rtx (real, reg,
22701 gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode,
22702 STACK_POINTER_REGNUM),
22703 GEN_INT (val)));
22704
22705 /* We expect that 'real' is either a SET or a PARALLEL containing
22706 SETs (and possibly other stuff). In a PARALLEL, all the SETs
22707 are important so they all have to be marked RTX_FRAME_RELATED_P. */
22708
22709 if (GET_CODE (real) == SET)
22710 {
22711 rtx set = real;
22712
22713 temp = simplify_rtx (SET_SRC (set));
22714 if (temp)
22715 SET_SRC (set) = temp;
22716 temp = simplify_rtx (SET_DEST (set));
22717 if (temp)
22718 SET_DEST (set) = temp;
22719 if (GET_CODE (SET_DEST (set)) == MEM)
22720 {
22721 temp = simplify_rtx (XEXP (SET_DEST (set), 0));
22722 if (temp)
22723 XEXP (SET_DEST (set), 0) = temp;
22724 }
22725 }
22726 else
22727 {
22728 int i;
22729
22730 gcc_assert (GET_CODE (real) == PARALLEL);
22731 for (i = 0; i < XVECLEN (real, 0); i++)
22732 if (GET_CODE (XVECEXP (real, 0, i)) == SET)
22733 {
22734 rtx set = XVECEXP (real, 0, i);
22735
22736 temp = simplify_rtx (SET_SRC (set));
22737 if (temp)
22738 SET_SRC (set) = temp;
22739 temp = simplify_rtx (SET_DEST (set));
22740 if (temp)
22741 SET_DEST (set) = temp;
22742 if (GET_CODE (SET_DEST (set)) == MEM)
22743 {
22744 temp = simplify_rtx (XEXP (SET_DEST (set), 0));
22745 if (temp)
22746 XEXP (SET_DEST (set), 0) = temp;
22747 }
22748 RTX_FRAME_RELATED_P (set) = 1;
22749 }
22750 }
22751
22752 /* If a store insn has been split into multiple insns, the
22753 true source register is given by split_reg. */
22754 if (split_reg != NULL_RTX)
22755 real = gen_rtx_SET (VOIDmode, SET_DEST (real), split_reg);
22756
22757 RTX_FRAME_RELATED_P (insn) = 1;
22758 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
22759
22760 return insn;
22761 }
22762
22763 /* Returns an insn that has a vrsave set operation with the
22764 appropriate CLOBBERs. */
22765
22766 static rtx
22767 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
22768 {
22769 int nclobs, i;
22770 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
22771 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
22772
22773 clobs[0]
22774 = gen_rtx_SET (VOIDmode,
22775 vrsave,
22776 gen_rtx_UNSPEC_VOLATILE (SImode,
22777 gen_rtvec (2, reg, vrsave),
22778 UNSPECV_SET_VRSAVE));
22779
22780 nclobs = 1;
22781
22782 /* We need to clobber the registers in the mask so the scheduler
22783 does not move sets to VRSAVE before sets of AltiVec registers.
22784
22785 However, if the function receives nonlocal gotos, reload will set
22786 all call saved registers live. We will end up with:
22787
22788 (set (reg 999) (mem))
22789 (parallel [ (set (reg vrsave) (unspec blah))
22790 (clobber (reg 999))])
22791
22792 The clobber will cause the store into reg 999 to be dead, and
22793 flow will attempt to delete an epilogue insn. In this case, we
22794 need an unspec use/set of the register. */
22795
22796 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
22797 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
22798 {
22799 if (!epiloguep || call_used_regs [i])
22800 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
22801 gen_rtx_REG (V4SImode, i));
22802 else
22803 {
22804 rtx reg = gen_rtx_REG (V4SImode, i);
22805
22806 clobs[nclobs++]
22807 = gen_rtx_SET (VOIDmode,
22808 reg,
22809 gen_rtx_UNSPEC (V4SImode,
22810 gen_rtvec (1, reg), 27));
22811 }
22812 }
22813
22814 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
22815
22816 for (i = 0; i < nclobs; ++i)
22817 XVECEXP (insn, 0, i) = clobs[i];
22818
22819 return insn;
22820 }
22821
22822 static rtx
22823 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
22824 {
22825 rtx addr, mem;
22826
22827 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
22828 mem = gen_frame_mem (GET_MODE (reg), addr);
22829 return gen_rtx_SET (VOIDmode, store ? mem : reg, store ? reg : mem);
22830 }
22831
22832 static rtx
22833 gen_frame_load (rtx reg, rtx frame_reg, int offset)
22834 {
22835 return gen_frame_set (reg, frame_reg, offset, false);
22836 }
22837
22838 static rtx
22839 gen_frame_store (rtx reg, rtx frame_reg, int offset)
22840 {
22841 return gen_frame_set (reg, frame_reg, offset, true);
22842 }
22843
22844 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
22845 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
22846
22847 static rtx
22848 emit_frame_save (rtx frame_reg, machine_mode mode,
22849 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
22850 {
22851 rtx reg, insn;
22852
22853 /* Some cases that need register indexed addressing. */
22854 gcc_checking_assert (!((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
22855 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
22856 || (TARGET_E500_DOUBLE && mode == DFmode)
22857 || (TARGET_SPE_ABI
22858 && SPE_VECTOR_MODE (mode)
22859 && !SPE_CONST_OFFSET_OK (offset))));
22860
22861 reg = gen_rtx_REG (mode, regno);
22862 insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
22863 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
22864 NULL_RTX, NULL_RTX, NULL_RTX);
22865 }
22866
22867 /* Emit an offset memory reference suitable for a frame store, while
22868 converting to a valid addressing mode. */
22869
22870 static rtx
22871 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
22872 {
22873 rtx int_rtx, offset_rtx;
22874
22875 int_rtx = GEN_INT (offset);
22876
22877 if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) && !SPE_CONST_OFFSET_OK (offset))
22878 || (TARGET_E500_DOUBLE && mode == DFmode))
22879 {
22880 offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH);
22881 emit_move_insn (offset_rtx, int_rtx);
22882 }
22883 else
22884 offset_rtx = int_rtx;
22885
22886 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx));
22887 }
22888
22889 #ifndef TARGET_FIX_AND_CONTINUE
22890 #define TARGET_FIX_AND_CONTINUE 0
22891 #endif
22892
22893 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
22894 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
22895 #define LAST_SAVRES_REGISTER 31
22896 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
22897
22898 enum {
22899 SAVRES_LR = 0x1,
22900 SAVRES_SAVE = 0x2,
22901 SAVRES_REG = 0x0c,
22902 SAVRES_GPR = 0,
22903 SAVRES_FPR = 4,
22904 SAVRES_VR = 8
22905 };
22906
22907 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
22908
22909 /* Temporary holding space for an out-of-line register save/restore
22910 routine name. */
22911 static char savres_routine_name[30];
22912
22913 /* Return the name for an out-of-line register save/restore routine.
22914 We are saving/restoring GPRs if GPR is true. */
22915
22916 static char *
22917 rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel)
22918 {
22919 const char *prefix = "";
22920 const char *suffix = "";
22921
22922 /* Different targets are supposed to define
22923 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
22924 routine name could be defined with:
22925
22926 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
22927
22928 This is a nice idea in practice, but in reality, things are
22929 complicated in several ways:
22930
22931 - ELF targets have save/restore routines for GPRs.
22932
22933 - SPE targets use different prefixes for 32/64-bit registers, and
22934 neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
22935
22936 - PPC64 ELF targets have routines for save/restore of GPRs that
22937 differ in what they do with the link register, so having a set
22938 prefix doesn't work. (We only use one of the save routines at
22939 the moment, though.)
22940
22941 - PPC32 elf targets have "exit" versions of the restore routines
22942 that restore the link register and can save some extra space.
22943 These require an extra suffix. (There are also "tail" versions
22944 of the restore routines and "GOT" versions of the save routines,
22945 but we don't generate those at present. Same problems apply,
22946 though.)
22947
22948 We deal with all this by synthesizing our own prefix/suffix and
22949 using that for the simple sprintf call shown above. */
22950 if (TARGET_SPE)
22951 {
22952 /* No floating point saves on the SPE. */
22953 gcc_assert ((sel & SAVRES_REG) == SAVRES_GPR);
22954
22955 if ((sel & SAVRES_SAVE))
22956 prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_";
22957 else
22958 prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_";
22959
22960 if ((sel & SAVRES_LR))
22961 suffix = "_x";
22962 }
22963 else if (DEFAULT_ABI == ABI_V4)
22964 {
22965 if (TARGET_64BIT)
22966 goto aix_names;
22967
22968 if ((sel & SAVRES_REG) == SAVRES_GPR)
22969 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
22970 else if ((sel & SAVRES_REG) == SAVRES_FPR)
22971 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
22972 else if ((sel & SAVRES_REG) == SAVRES_VR)
22973 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
22974 else
22975 abort ();
22976
22977 if ((sel & SAVRES_LR))
22978 suffix = "_x";
22979 }
22980 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
22981 {
22982 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
22983 /* No out-of-line save/restore routines for GPRs on AIX. */
22984 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
22985 #endif
22986
22987 aix_names:
22988 if ((sel & SAVRES_REG) == SAVRES_GPR)
22989 prefix = ((sel & SAVRES_SAVE)
22990 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
22991 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
22992 else if ((sel & SAVRES_REG) == SAVRES_FPR)
22993 {
22994 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
22995 if ((sel & SAVRES_LR))
22996 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
22997 else
22998 #endif
22999 {
23000 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
23001 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
23002 }
23003 }
23004 else if ((sel & SAVRES_REG) == SAVRES_VR)
23005 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
23006 else
23007 abort ();
23008 }
23009
23010 if (DEFAULT_ABI == ABI_DARWIN)
23011 {
23012 /* The Darwin approach is (slightly) different, in order to be
23013 compatible with code generated by the system toolchain. There is a
23014 single symbol for the start of save sequence, and the code here
23015 embeds an offset into that code on the basis of the first register
23016 to be saved. */
23017 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
23018 if ((sel & SAVRES_REG) == SAVRES_GPR)
23019 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
23020 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
23021 (regno - 13) * 4, prefix, regno);
23022 else if ((sel & SAVRES_REG) == SAVRES_FPR)
23023 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
23024 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
23025 else if ((sel & SAVRES_REG) == SAVRES_VR)
23026 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
23027 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
23028 else
23029 abort ();
23030 }
23031 else
23032 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
23033
23034 return savres_routine_name;
23035 }
23036
23037 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
23038 We are saving/restoring GPRs if GPR is true. */
23039
23040 static rtx
23041 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
23042 {
23043 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
23044 ? info->first_gp_reg_save
23045 : (sel & SAVRES_REG) == SAVRES_FPR
23046 ? info->first_fp_reg_save - 32
23047 : (sel & SAVRES_REG) == SAVRES_VR
23048 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
23049 : -1);
23050 rtx sym;
23051 int select = sel;
23052
23053 /* On the SPE, we never have any FPRs, but we do have 32/64-bit
23054 versions of the gpr routines. */
23055 if (TARGET_SPE_ABI && (sel & SAVRES_REG) == SAVRES_GPR
23056 && info->spe_64bit_regs_used)
23057 select ^= SAVRES_FPR ^ SAVRES_GPR;
23058
23059 /* Don't generate bogus routine names. */
23060 gcc_assert (FIRST_SAVRES_REGISTER <= regno
23061 && regno <= LAST_SAVRES_REGISTER
23062 && select >= 0 && select <= 12);
23063
23064 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
23065
23066 if (sym == NULL)
23067 {
23068 char *name;
23069
23070 name = rs6000_savres_routine_name (info, regno, sel);
23071
23072 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
23073 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
23074 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
23075 }
23076
23077 return sym;
23078 }
23079
23080 /* Emit a sequence of insns, including a stack tie if needed, for
23081 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
23082 reset the stack pointer, but move the base of the frame into
23083 reg UPDT_REGNO for use by out-of-line register restore routines. */
23084
23085 static rtx
23086 rs6000_emit_stack_reset (rs6000_stack_t *info,
23087 rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
23088 unsigned updt_regno)
23089 {
23090 rtx updt_reg_rtx;
23091
23092 /* This blockage is needed so that sched doesn't decide to move
23093 the sp change before the register restores. */
23094 if (DEFAULT_ABI == ABI_V4
23095 || (TARGET_SPE_ABI
23096 && info->spe_64bit_regs_used != 0
23097 && info->first_gp_reg_save != 32))
23098 rs6000_emit_stack_tie (frame_reg_rtx, frame_pointer_needed);
23099
23100 /* If we are restoring registers out-of-line, we will be using the
23101 "exit" variants of the restore routines, which will reset the
23102 stack for us. But we do need to point updt_reg into the
23103 right place for those routines. */
23104 updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
23105
23106 if (frame_off != 0)
23107 return emit_insn (gen_add3_insn (updt_reg_rtx,
23108 frame_reg_rtx, GEN_INT (frame_off)));
23109 else if (REGNO (frame_reg_rtx) != updt_regno)
23110 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
23111
23112 return NULL_RTX;
23113 }
23114
23115 /* Return the register number used as a pointer by out-of-line
23116 save/restore functions. */
23117
23118 static inline unsigned
23119 ptr_regno_for_savres (int sel)
23120 {
23121 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23122 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
23123 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
23124 }
23125
23126 /* Construct a parallel rtx describing the effect of a call to an
23127 out-of-line register save/restore routine, and emit the insn
23128 or jump_insn as appropriate. */
23129
23130 static rtx
23131 rs6000_emit_savres_rtx (rs6000_stack_t *info,
23132 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
23133 machine_mode reg_mode, int sel)
23134 {
23135 int i;
23136 int offset, start_reg, end_reg, n_regs, use_reg;
23137 int reg_size = GET_MODE_SIZE (reg_mode);
23138 rtx sym;
23139 rtvec p;
23140 rtx par, insn;
23141
23142 offset = 0;
23143 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
23144 ? info->first_gp_reg_save
23145 : (sel & SAVRES_REG) == SAVRES_FPR
23146 ? info->first_fp_reg_save
23147 : (sel & SAVRES_REG) == SAVRES_VR
23148 ? info->first_altivec_reg_save
23149 : -1);
23150 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
23151 ? 32
23152 : (sel & SAVRES_REG) == SAVRES_FPR
23153 ? 64
23154 : (sel & SAVRES_REG) == SAVRES_VR
23155 ? LAST_ALTIVEC_REGNO + 1
23156 : -1);
23157 n_regs = end_reg - start_reg;
23158 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
23159 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
23160 + n_regs);
23161
23162 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
23163 RTVEC_ELT (p, offset++) = ret_rtx;
23164
23165 RTVEC_ELT (p, offset++)
23166 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
23167
23168 sym = rs6000_savres_routine_sym (info, sel);
23169 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
23170
23171 use_reg = ptr_regno_for_savres (sel);
23172 if ((sel & SAVRES_REG) == SAVRES_VR)
23173 {
23174 /* Vector regs are saved/restored using [reg+reg] addressing. */
23175 RTVEC_ELT (p, offset++)
23176 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
23177 RTVEC_ELT (p, offset++)
23178 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
23179 }
23180 else
23181 RTVEC_ELT (p, offset++)
23182 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
23183
23184 for (i = 0; i < end_reg - start_reg; i++)
23185 RTVEC_ELT (p, i + offset)
23186 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
23187 frame_reg_rtx, save_area_offset + reg_size * i,
23188 (sel & SAVRES_SAVE) != 0);
23189
23190 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
23191 RTVEC_ELT (p, i + offset)
23192 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
23193
23194 par = gen_rtx_PARALLEL (VOIDmode, p);
23195
23196 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
23197 {
23198 insn = emit_jump_insn (par);
23199 JUMP_LABEL (insn) = ret_rtx;
23200 }
23201 else
23202 insn = emit_insn (par);
23203 return insn;
23204 }
23205
23206 /* Emit code to store CR fields that need to be saved into REG. */
23207
23208 static void
23209 rs6000_emit_move_from_cr (rtx reg)
23210 {
23211 /* Only the ELFv2 ABI allows storing only selected fields. */
23212 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
23213 {
23214 int i, cr_reg[8], count = 0;
23215
23216 /* Collect CR fields that must be saved. */
23217 for (i = 0; i < 8; i++)
23218 if (save_reg_p (CR0_REGNO + i))
23219 cr_reg[count++] = i;
23220
23221 /* If it's just a single one, use mfcrf. */
23222 if (count == 1)
23223 {
23224 rtvec p = rtvec_alloc (1);
23225 rtvec r = rtvec_alloc (2);
23226 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
23227 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
23228 RTVEC_ELT (p, 0)
23229 = gen_rtx_SET (VOIDmode, reg,
23230 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
23231
23232 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23233 return;
23234 }
23235
23236 /* ??? It might be better to handle count == 2 / 3 cases here
23237 as well, using logical operations to combine the values. */
23238 }
23239
23240 emit_insn (gen_movesi_from_cr (reg));
23241 }
23242
23243 /* Determine whether the gp REG is really used. */
23244
23245 static bool
23246 rs6000_reg_live_or_pic_offset_p (int reg)
23247 {
23248 /* If the function calls eh_return, claim used all the registers that would
23249 be checked for liveness otherwise. This is required for the PIC offset
23250 register with -mminimal-toc on AIX, as it is advertised as "fixed" for
23251 register allocation purposes in this case. */
23252
23253 return (((crtl->calls_eh_return || df_regs_ever_live_p (reg))
23254 && (!call_used_regs[reg]
23255 || (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
23256 && !TARGET_SINGLE_PIC_BASE
23257 && TARGET_TOC && TARGET_MINIMAL_TOC)))
23258 || (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
23259 && !TARGET_SINGLE_PIC_BASE
23260 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
23261 || (DEFAULT_ABI == ABI_DARWIN && flag_pic))));
23262 }
23263
23264 /* Emit function prologue as insns. */
23265
23266 void
23267 rs6000_emit_prologue (void)
23268 {
23269 rs6000_stack_t *info = rs6000_stack_info ();
23270 machine_mode reg_mode = Pmode;
23271 int reg_size = TARGET_32BIT ? 4 : 8;
23272 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
23273 rtx frame_reg_rtx = sp_reg_rtx;
23274 unsigned int cr_save_regno;
23275 rtx cr_save_rtx = NULL_RTX;
23276 rtx insn;
23277 int strategy;
23278 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
23279 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
23280 && call_used_regs[STATIC_CHAIN_REGNUM]);
23281 /* Offset to top of frame for frame_reg and sp respectively. */
23282 HOST_WIDE_INT frame_off = 0;
23283 HOST_WIDE_INT sp_off = 0;
23284
23285 #ifdef ENABLE_CHECKING
23286 /* Track and check usage of r0, r11, r12. */
23287 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
23288 #define START_USE(R) do \
23289 { \
23290 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
23291 reg_inuse |= 1 << (R); \
23292 } while (0)
23293 #define END_USE(R) do \
23294 { \
23295 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
23296 reg_inuse &= ~(1 << (R)); \
23297 } while (0)
23298 #define NOT_INUSE(R) do \
23299 { \
23300 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
23301 } while (0)
23302 #else
23303 #define START_USE(R) do {} while (0)
23304 #define END_USE(R) do {} while (0)
23305 #define NOT_INUSE(R) do {} while (0)
23306 #endif
23307
23308 if (DEFAULT_ABI == ABI_ELFv2)
23309 {
23310 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
23311
23312 /* With -mminimal-toc we may generate an extra use of r2 below. */
23313 if (!TARGET_SINGLE_PIC_BASE
23314 && TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
23315 cfun->machine->r2_setup_needed = true;
23316 }
23317
23318
23319 if (flag_stack_usage_info)
23320 current_function_static_stack_size = info->total_size;
23321
23322 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
23323 {
23324 HOST_WIDE_INT size = info->total_size;
23325
23326 if (crtl->is_leaf && !cfun->calls_alloca)
23327 {
23328 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
23329 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
23330 size - STACK_CHECK_PROTECT);
23331 }
23332 else if (size > 0)
23333 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
23334 }
23335
23336 if (TARGET_FIX_AND_CONTINUE)
23337 {
23338 /* gdb on darwin arranges to forward a function from the old
23339 address by modifying the first 5 instructions of the function
23340 to branch to the overriding function. This is necessary to
23341 permit function pointers that point to the old function to
23342 actually forward to the new function. */
23343 emit_insn (gen_nop ());
23344 emit_insn (gen_nop ());
23345 emit_insn (gen_nop ());
23346 emit_insn (gen_nop ());
23347 emit_insn (gen_nop ());
23348 }
23349
23350 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
23351 {
23352 reg_mode = V2SImode;
23353 reg_size = 8;
23354 }
23355
23356 /* Handle world saves specially here. */
23357 if (WORLD_SAVE_P (info))
23358 {
23359 int i, j, sz;
23360 rtx treg;
23361 rtvec p;
23362 rtx reg0;
23363
23364 /* save_world expects lr in r0. */
23365 reg0 = gen_rtx_REG (Pmode, 0);
23366 if (info->lr_save_p)
23367 {
23368 insn = emit_move_insn (reg0,
23369 gen_rtx_REG (Pmode, LR_REGNO));
23370 RTX_FRAME_RELATED_P (insn) = 1;
23371 }
23372
23373 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
23374 assumptions about the offsets of various bits of the stack
23375 frame. */
23376 gcc_assert (info->gp_save_offset == -220
23377 && info->fp_save_offset == -144
23378 && info->lr_save_offset == 8
23379 && info->cr_save_offset == 4
23380 && info->push_p
23381 && info->lr_save_p
23382 && (!crtl->calls_eh_return
23383 || info->ehrd_offset == -432)
23384 && info->vrsave_save_offset == -224
23385 && info->altivec_save_offset == -416);
23386
23387 treg = gen_rtx_REG (SImode, 11);
23388 emit_move_insn (treg, GEN_INT (-info->total_size));
23389
23390 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
23391 in R11. It also clobbers R12, so beware! */
23392
23393 /* Preserve CR2 for save_world prologues */
23394 sz = 5;
23395 sz += 32 - info->first_gp_reg_save;
23396 sz += 64 - info->first_fp_reg_save;
23397 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
23398 p = rtvec_alloc (sz);
23399 j = 0;
23400 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
23401 gen_rtx_REG (SImode,
23402 LR_REGNO));
23403 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
23404 gen_rtx_SYMBOL_REF (Pmode,
23405 "*save_world"));
23406 /* We do floats first so that the instruction pattern matches
23407 properly. */
23408 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
23409 RTVEC_ELT (p, j++)
23410 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
23411 ? DFmode : SFmode,
23412 info->first_fp_reg_save + i),
23413 frame_reg_rtx,
23414 info->fp_save_offset + frame_off + 8 * i);
23415 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
23416 RTVEC_ELT (p, j++)
23417 = gen_frame_store (gen_rtx_REG (V4SImode,
23418 info->first_altivec_reg_save + i),
23419 frame_reg_rtx,
23420 info->altivec_save_offset + frame_off + 16 * i);
23421 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23422 RTVEC_ELT (p, j++)
23423 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
23424 frame_reg_rtx,
23425 info->gp_save_offset + frame_off + reg_size * i);
23426
23427 /* CR register traditionally saved as CR2. */
23428 RTVEC_ELT (p, j++)
23429 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
23430 frame_reg_rtx, info->cr_save_offset + frame_off);
23431 /* Explain about use of R0. */
23432 if (info->lr_save_p)
23433 RTVEC_ELT (p, j++)
23434 = gen_frame_store (reg0,
23435 frame_reg_rtx, info->lr_save_offset + frame_off);
23436 /* Explain what happens to the stack pointer. */
23437 {
23438 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
23439 RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, sp_reg_rtx, newval);
23440 }
23441
23442 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23443 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
23444 treg, GEN_INT (-info->total_size), NULL_RTX);
23445 sp_off = frame_off = info->total_size;
23446 }
23447
23448 strategy = info->savres_strategy;
23449
23450 /* For V.4, update stack before we do any saving and set back pointer. */
23451 if (! WORLD_SAVE_P (info)
23452 && info->push_p
23453 && (DEFAULT_ABI == ABI_V4
23454 || crtl->calls_eh_return))
23455 {
23456 bool need_r11 = (TARGET_SPE
23457 ? (!(strategy & SAVE_INLINE_GPRS)
23458 && info->spe_64bit_regs_used == 0)
23459 : (!(strategy & SAVE_INLINE_FPRS)
23460 || !(strategy & SAVE_INLINE_GPRS)
23461 || !(strategy & SAVE_INLINE_VRS)));
23462 int ptr_regno = -1;
23463 rtx ptr_reg = NULL_RTX;
23464 int ptr_off = 0;
23465
23466 if (info->total_size < 32767)
23467 frame_off = info->total_size;
23468 else if (need_r11)
23469 ptr_regno = 11;
23470 else if (info->cr_save_p
23471 || info->lr_save_p
23472 || info->first_fp_reg_save < 64
23473 || info->first_gp_reg_save < 32
23474 || info->altivec_size != 0
23475 || info->vrsave_mask != 0
23476 || crtl->calls_eh_return)
23477 ptr_regno = 12;
23478 else
23479 {
23480 /* The prologue won't be saving any regs so there is no need
23481 to set up a frame register to access any frame save area.
23482 We also won't be using frame_off anywhere below, but set
23483 the correct value anyway to protect against future
23484 changes to this function. */
23485 frame_off = info->total_size;
23486 }
23487 if (ptr_regno != -1)
23488 {
23489 /* Set up the frame offset to that needed by the first
23490 out-of-line save function. */
23491 START_USE (ptr_regno);
23492 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23493 frame_reg_rtx = ptr_reg;
23494 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
23495 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
23496 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
23497 ptr_off = info->gp_save_offset + info->gp_size;
23498 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
23499 ptr_off = info->altivec_save_offset + info->altivec_size;
23500 frame_off = -ptr_off;
23501 }
23502 rs6000_emit_allocate_stack (info->total_size, ptr_reg, ptr_off);
23503 sp_off = info->total_size;
23504 if (frame_reg_rtx != sp_reg_rtx)
23505 rs6000_emit_stack_tie (frame_reg_rtx, false);
23506 }
23507
23508 /* If we use the link register, get it into r0. */
23509 if (!WORLD_SAVE_P (info) && info->lr_save_p)
23510 {
23511 rtx addr, reg, mem;
23512
23513 reg = gen_rtx_REG (Pmode, 0);
23514 START_USE (0);
23515 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
23516 RTX_FRAME_RELATED_P (insn) = 1;
23517
23518 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
23519 | SAVE_NOINLINE_FPRS_SAVES_LR)))
23520 {
23521 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
23522 GEN_INT (info->lr_save_offset + frame_off));
23523 mem = gen_rtx_MEM (Pmode, addr);
23524 /* This should not be of rs6000_sr_alias_set, because of
23525 __builtin_return_address. */
23526
23527 insn = emit_move_insn (mem, reg);
23528 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
23529 NULL_RTX, NULL_RTX, NULL_RTX);
23530 END_USE (0);
23531 }
23532 }
23533
23534 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
23535 r12 will be needed by out-of-line gpr restore. */
23536 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23537 && !(strategy & (SAVE_INLINE_GPRS
23538 | SAVE_NOINLINE_GPRS_SAVES_LR))
23539 ? 11 : 12);
23540 if (!WORLD_SAVE_P (info)
23541 && info->cr_save_p
23542 && REGNO (frame_reg_rtx) != cr_save_regno
23543 && !(using_static_chain_p && cr_save_regno == 11))
23544 {
23545 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
23546 START_USE (cr_save_regno);
23547 rs6000_emit_move_from_cr (cr_save_rtx);
23548 }
23549
23550 /* Do any required saving of fpr's. If only one or two to save, do
23551 it ourselves. Otherwise, call function. */
23552 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
23553 {
23554 int i;
23555 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
23556 if (save_reg_p (info->first_fp_reg_save + i))
23557 emit_frame_save (frame_reg_rtx,
23558 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
23559 ? DFmode : SFmode),
23560 info->first_fp_reg_save + i,
23561 info->fp_save_offset + frame_off + 8 * i,
23562 sp_off - frame_off);
23563 }
23564 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
23565 {
23566 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
23567 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
23568 unsigned ptr_regno = ptr_regno_for_savres (sel);
23569 rtx ptr_reg = frame_reg_rtx;
23570
23571 if (REGNO (frame_reg_rtx) == ptr_regno)
23572 gcc_checking_assert (frame_off == 0);
23573 else
23574 {
23575 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23576 NOT_INUSE (ptr_regno);
23577 emit_insn (gen_add3_insn (ptr_reg,
23578 frame_reg_rtx, GEN_INT (frame_off)));
23579 }
23580 insn = rs6000_emit_savres_rtx (info, ptr_reg,
23581 info->fp_save_offset,
23582 info->lr_save_offset,
23583 DFmode, sel);
23584 rs6000_frame_related (insn, ptr_reg, sp_off,
23585 NULL_RTX, NULL_RTX, NULL_RTX);
23586 if (lr)
23587 END_USE (0);
23588 }
23589
23590 /* Save GPRs. This is done as a PARALLEL if we are using
23591 the store-multiple instructions. */
23592 if (!WORLD_SAVE_P (info)
23593 && TARGET_SPE_ABI
23594 && info->spe_64bit_regs_used != 0
23595 && info->first_gp_reg_save != 32)
23596 {
23597 int i;
23598 rtx spe_save_area_ptr;
23599 HOST_WIDE_INT save_off;
23600 int ool_adjust = 0;
23601
23602 /* Determine whether we can address all of the registers that need
23603 to be saved with an offset from frame_reg_rtx that fits in
23604 the small const field for SPE memory instructions. */
23605 int spe_regs_addressable
23606 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
23607 + reg_size * (32 - info->first_gp_reg_save - 1))
23608 && (strategy & SAVE_INLINE_GPRS));
23609
23610 if (spe_regs_addressable)
23611 {
23612 spe_save_area_ptr = frame_reg_rtx;
23613 save_off = frame_off;
23614 }
23615 else
23616 {
23617 /* Make r11 point to the start of the SPE save area. We need
23618 to be careful here if r11 is holding the static chain. If
23619 it is, then temporarily save it in r0. */
23620 HOST_WIDE_INT offset;
23621
23622 if (!(strategy & SAVE_INLINE_GPRS))
23623 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
23624 offset = info->spe_gp_save_offset + frame_off - ool_adjust;
23625 spe_save_area_ptr = gen_rtx_REG (Pmode, 11);
23626 save_off = frame_off - offset;
23627
23628 if (using_static_chain_p)
23629 {
23630 rtx r0 = gen_rtx_REG (Pmode, 0);
23631
23632 START_USE (0);
23633 gcc_assert (info->first_gp_reg_save > 11);
23634
23635 emit_move_insn (r0, spe_save_area_ptr);
23636 }
23637 else if (REGNO (frame_reg_rtx) != 11)
23638 START_USE (11);
23639
23640 emit_insn (gen_addsi3 (spe_save_area_ptr,
23641 frame_reg_rtx, GEN_INT (offset)));
23642 if (!using_static_chain_p && REGNO (frame_reg_rtx) == 11)
23643 frame_off = -info->spe_gp_save_offset + ool_adjust;
23644 }
23645
23646 if ((strategy & SAVE_INLINE_GPRS))
23647 {
23648 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23649 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
23650 emit_frame_save (spe_save_area_ptr, reg_mode,
23651 info->first_gp_reg_save + i,
23652 (info->spe_gp_save_offset + save_off
23653 + reg_size * i),
23654 sp_off - save_off);
23655 }
23656 else
23657 {
23658 insn = rs6000_emit_savres_rtx (info, spe_save_area_ptr,
23659 info->spe_gp_save_offset + save_off,
23660 0, reg_mode,
23661 SAVRES_SAVE | SAVRES_GPR);
23662
23663 rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
23664 NULL_RTX, NULL_RTX, NULL_RTX);
23665 }
23666
23667 /* Move the static chain pointer back. */
23668 if (!spe_regs_addressable)
23669 {
23670 if (using_static_chain_p)
23671 {
23672 emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0));
23673 END_USE (0);
23674 }
23675 else if (REGNO (frame_reg_rtx) != 11)
23676 END_USE (11);
23677 }
23678 }
23679 else if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
23680 {
23681 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
23682 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
23683 unsigned ptr_regno = ptr_regno_for_savres (sel);
23684 rtx ptr_reg = frame_reg_rtx;
23685 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
23686 int end_save = info->gp_save_offset + info->gp_size;
23687 int ptr_off;
23688
23689 if (!ptr_set_up)
23690 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23691
23692 /* Need to adjust r11 (r12) if we saved any FPRs. */
23693 if (end_save + frame_off != 0)
23694 {
23695 rtx offset = GEN_INT (end_save + frame_off);
23696
23697 if (ptr_set_up)
23698 frame_off = -end_save;
23699 else
23700 NOT_INUSE (ptr_regno);
23701 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
23702 }
23703 else if (!ptr_set_up)
23704 {
23705 NOT_INUSE (ptr_regno);
23706 emit_move_insn (ptr_reg, frame_reg_rtx);
23707 }
23708 ptr_off = -end_save;
23709 insn = rs6000_emit_savres_rtx (info, ptr_reg,
23710 info->gp_save_offset + ptr_off,
23711 info->lr_save_offset + ptr_off,
23712 reg_mode, sel);
23713 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
23714 NULL_RTX, NULL_RTX, NULL_RTX);
23715 if (lr)
23716 END_USE (0);
23717 }
23718 else if (!WORLD_SAVE_P (info) && (strategy & SAVRES_MULTIPLE))
23719 {
23720 rtvec p;
23721 int i;
23722 p = rtvec_alloc (32 - info->first_gp_reg_save);
23723 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23724 RTVEC_ELT (p, i)
23725 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
23726 frame_reg_rtx,
23727 info->gp_save_offset + frame_off + reg_size * i);
23728 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23729 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
23730 NULL_RTX, NULL_RTX, NULL_RTX);
23731 }
23732 else if (!WORLD_SAVE_P (info))
23733 {
23734 int i;
23735 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23736 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
23737 emit_frame_save (frame_reg_rtx, reg_mode,
23738 info->first_gp_reg_save + i,
23739 info->gp_save_offset + frame_off + reg_size * i,
23740 sp_off - frame_off);
23741 }
23742
23743 if (crtl->calls_eh_return)
23744 {
23745 unsigned int i;
23746 rtvec p;
23747
23748 for (i = 0; ; ++i)
23749 {
23750 unsigned int regno = EH_RETURN_DATA_REGNO (i);
23751 if (regno == INVALID_REGNUM)
23752 break;
23753 }
23754
23755 p = rtvec_alloc (i);
23756
23757 for (i = 0; ; ++i)
23758 {
23759 unsigned int regno = EH_RETURN_DATA_REGNO (i);
23760 if (regno == INVALID_REGNUM)
23761 break;
23762
23763 insn
23764 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
23765 sp_reg_rtx,
23766 info->ehrd_offset + sp_off + reg_size * (int) i);
23767 RTVEC_ELT (p, i) = insn;
23768 RTX_FRAME_RELATED_P (insn) = 1;
23769 }
23770
23771 insn = emit_insn (gen_blockage ());
23772 RTX_FRAME_RELATED_P (insn) = 1;
23773 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
23774 }
23775
23776 /* In AIX ABI we need to make sure r2 is really saved. */
23777 if (TARGET_AIX && crtl->calls_eh_return)
23778 {
23779 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
23780 rtx save_insn, join_insn, note;
23781 long toc_restore_insn;
23782
23783 tmp_reg = gen_rtx_REG (Pmode, 11);
23784 tmp_reg_si = gen_rtx_REG (SImode, 11);
23785 if (using_static_chain_p)
23786 {
23787 START_USE (0);
23788 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
23789 }
23790 else
23791 START_USE (11);
23792 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
23793 /* Peek at instruction to which this function returns. If it's
23794 restoring r2, then we know we've already saved r2. We can't
23795 unconditionally save r2 because the value we have will already
23796 be updated if we arrived at this function via a plt call or
23797 toc adjusting stub. */
23798 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
23799 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
23800 + RS6000_TOC_SAVE_SLOT);
23801 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
23802 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
23803 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
23804 validate_condition_mode (EQ, CCUNSmode);
23805 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
23806 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
23807 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
23808 toc_save_done = gen_label_rtx ();
23809 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
23810 gen_rtx_EQ (VOIDmode, compare_result,
23811 const0_rtx),
23812 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
23813 pc_rtx);
23814 jump = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, jump));
23815 JUMP_LABEL (jump) = toc_save_done;
23816 LABEL_NUSES (toc_save_done) += 1;
23817
23818 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
23819 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
23820 sp_off - frame_off);
23821
23822 emit_label (toc_save_done);
23823
23824 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
23825 have a CFG that has different saves along different paths.
23826 Move the note to a dummy blockage insn, which describes that
23827 R2 is unconditionally saved after the label. */
23828 /* ??? An alternate representation might be a special insn pattern
23829 containing both the branch and the store. That might let the
23830 code that minimizes the number of DW_CFA_advance opcodes better
23831 freedom in placing the annotations. */
23832 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
23833 if (note)
23834 remove_note (save_insn, note);
23835 else
23836 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
23837 copy_rtx (PATTERN (save_insn)), NULL_RTX);
23838 RTX_FRAME_RELATED_P (save_insn) = 0;
23839
23840 join_insn = emit_insn (gen_blockage ());
23841 REG_NOTES (join_insn) = note;
23842 RTX_FRAME_RELATED_P (join_insn) = 1;
23843
23844 if (using_static_chain_p)
23845 {
23846 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
23847 END_USE (0);
23848 }
23849 else
23850 END_USE (11);
23851 }
23852
23853 /* Save CR if we use any that must be preserved. */
23854 if (!WORLD_SAVE_P (info) && info->cr_save_p)
23855 {
23856 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
23857 GEN_INT (info->cr_save_offset + frame_off));
23858 rtx mem = gen_frame_mem (SImode, addr);
23859
23860 /* If we didn't copy cr before, do so now using r0. */
23861 if (cr_save_rtx == NULL_RTX)
23862 {
23863 START_USE (0);
23864 cr_save_rtx = gen_rtx_REG (SImode, 0);
23865 rs6000_emit_move_from_cr (cr_save_rtx);
23866 }
23867
23868 /* Saving CR requires a two-instruction sequence: one instruction
23869 to move the CR to a general-purpose register, and a second
23870 instruction that stores the GPR to memory.
23871
23872 We do not emit any DWARF CFI records for the first of these,
23873 because we cannot properly represent the fact that CR is saved in
23874 a register. One reason is that we cannot express that multiple
23875 CR fields are saved; another reason is that on 64-bit, the size
23876 of the CR register in DWARF (4 bytes) differs from the size of
23877 a general-purpose register.
23878
23879 This means if any intervening instruction were to clobber one of
23880 the call-saved CR fields, we'd have incorrect CFI. To prevent
23881 this from happening, we mark the store to memory as a use of
23882 those CR fields, which prevents any such instruction from being
23883 scheduled in between the two instructions. */
23884 rtx crsave_v[9];
23885 int n_crsave = 0;
23886 int i;
23887
23888 crsave_v[n_crsave++] = gen_rtx_SET (VOIDmode, mem, cr_save_rtx);
23889 for (i = 0; i < 8; i++)
23890 if (save_reg_p (CR0_REGNO + i))
23891 crsave_v[n_crsave++]
23892 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
23893
23894 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
23895 gen_rtvec_v (n_crsave, crsave_v)));
23896 END_USE (REGNO (cr_save_rtx));
23897
23898 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
23899 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
23900 so we need to construct a frame expression manually. */
23901 RTX_FRAME_RELATED_P (insn) = 1;
23902
23903 /* Update address to be stack-pointer relative, like
23904 rs6000_frame_related would do. */
23905 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
23906 GEN_INT (info->cr_save_offset + sp_off));
23907 mem = gen_frame_mem (SImode, addr);
23908
23909 if (DEFAULT_ABI == ABI_ELFv2)
23910 {
23911 /* In the ELFv2 ABI we generate separate CFI records for each
23912 CR field that was actually saved. They all point to the
23913 same 32-bit stack slot. */
23914 rtx crframe[8];
23915 int n_crframe = 0;
23916
23917 for (i = 0; i < 8; i++)
23918 if (save_reg_p (CR0_REGNO + i))
23919 {
23920 crframe[n_crframe]
23921 = gen_rtx_SET (VOIDmode, mem,
23922 gen_rtx_REG (SImode, CR0_REGNO + i));
23923
23924 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
23925 n_crframe++;
23926 }
23927
23928 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
23929 gen_rtx_PARALLEL (VOIDmode,
23930 gen_rtvec_v (n_crframe, crframe)));
23931 }
23932 else
23933 {
23934 /* In other ABIs, by convention, we use a single CR regnum to
23935 represent the fact that all call-saved CR fields are saved.
23936 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
23937 rtx set = gen_rtx_SET (VOIDmode, mem,
23938 gen_rtx_REG (SImode, CR2_REGNO));
23939 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
23940 }
23941 }
23942
23943 /* In the ELFv2 ABI we need to save all call-saved CR fields into
23944 *separate* slots if the routine calls __builtin_eh_return, so
23945 that they can be independently restored by the unwinder. */
23946 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
23947 {
23948 int i, cr_off = info->ehcr_offset;
23949 rtx crsave;
23950
23951 /* ??? We might get better performance by using multiple mfocrf
23952 instructions. */
23953 crsave = gen_rtx_REG (SImode, 0);
23954 emit_insn (gen_movesi_from_cr (crsave));
23955
23956 for (i = 0; i < 8; i++)
23957 if (!call_used_regs[CR0_REGNO + i])
23958 {
23959 rtvec p = rtvec_alloc (2);
23960 RTVEC_ELT (p, 0)
23961 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
23962 RTVEC_ELT (p, 1)
23963 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
23964
23965 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23966
23967 RTX_FRAME_RELATED_P (insn) = 1;
23968 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
23969 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
23970 sp_reg_rtx, cr_off + sp_off));
23971
23972 cr_off += reg_size;
23973 }
23974 }
23975
23976 /* Update stack and set back pointer unless this is V.4,
23977 for which it was done previously. */
23978 if (!WORLD_SAVE_P (info) && info->push_p
23979 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
23980 {
23981 rtx ptr_reg = NULL;
23982 int ptr_off = 0;
23983
23984 /* If saving altivec regs we need to be able to address all save
23985 locations using a 16-bit offset. */
23986 if ((strategy & SAVE_INLINE_VRS) == 0
23987 || (info->altivec_size != 0
23988 && (info->altivec_save_offset + info->altivec_size - 16
23989 + info->total_size - frame_off) > 32767)
23990 || (info->vrsave_size != 0
23991 && (info->vrsave_save_offset
23992 + info->total_size - frame_off) > 32767))
23993 {
23994 int sel = SAVRES_SAVE | SAVRES_VR;
23995 unsigned ptr_regno = ptr_regno_for_savres (sel);
23996
23997 if (using_static_chain_p
23998 && ptr_regno == STATIC_CHAIN_REGNUM)
23999 ptr_regno = 12;
24000 if (REGNO (frame_reg_rtx) != ptr_regno)
24001 START_USE (ptr_regno);
24002 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
24003 frame_reg_rtx = ptr_reg;
24004 ptr_off = info->altivec_save_offset + info->altivec_size;
24005 frame_off = -ptr_off;
24006 }
24007 else if (REGNO (frame_reg_rtx) == 1)
24008 frame_off = info->total_size;
24009 rs6000_emit_allocate_stack (info->total_size, ptr_reg, ptr_off);
24010 sp_off = info->total_size;
24011 if (frame_reg_rtx != sp_reg_rtx)
24012 rs6000_emit_stack_tie (frame_reg_rtx, false);
24013 }
24014
24015 /* Set frame pointer, if needed. */
24016 if (frame_pointer_needed)
24017 {
24018 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
24019 sp_reg_rtx);
24020 RTX_FRAME_RELATED_P (insn) = 1;
24021 }
24022
24023 /* Save AltiVec registers if needed. Save here because the red zone does
24024 not always include AltiVec registers. */
24025 if (!WORLD_SAVE_P (info) && TARGET_ALTIVEC_ABI
24026 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
24027 {
24028 int end_save = info->altivec_save_offset + info->altivec_size;
24029 int ptr_off;
24030 /* Oddly, the vector save/restore functions point r0 at the end
24031 of the save area, then use r11 or r12 to load offsets for
24032 [reg+reg] addressing. */
24033 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
24034 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
24035 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
24036
24037 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
24038 NOT_INUSE (0);
24039 if (end_save + frame_off != 0)
24040 {
24041 rtx offset = GEN_INT (end_save + frame_off);
24042
24043 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
24044 }
24045 else
24046 emit_move_insn (ptr_reg, frame_reg_rtx);
24047
24048 ptr_off = -end_save;
24049 insn = rs6000_emit_savres_rtx (info, scratch_reg,
24050 info->altivec_save_offset + ptr_off,
24051 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
24052 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
24053 NULL_RTX, NULL_RTX, NULL_RTX);
24054 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
24055 {
24056 /* The oddity mentioned above clobbered our frame reg. */
24057 emit_move_insn (frame_reg_rtx, ptr_reg);
24058 frame_off = ptr_off;
24059 }
24060 }
24061 else if (!WORLD_SAVE_P (info) && TARGET_ALTIVEC_ABI
24062 && info->altivec_size != 0)
24063 {
24064 int i;
24065
24066 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24067 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
24068 {
24069 rtx areg, savereg, mem, split_reg;
24070 int offset;
24071
24072 offset = (info->altivec_save_offset + frame_off
24073 + 16 * (i - info->first_altivec_reg_save));
24074
24075 savereg = gen_rtx_REG (V4SImode, i);
24076
24077 NOT_INUSE (0);
24078 areg = gen_rtx_REG (Pmode, 0);
24079 emit_move_insn (areg, GEN_INT (offset));
24080
24081 /* AltiVec addressing mode is [reg+reg]. */
24082 mem = gen_frame_mem (V4SImode,
24083 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
24084
24085 insn = emit_move_insn (mem, savereg);
24086
24087 /* When we split a VSX store into two insns, we need to make
24088 sure the DWARF info knows which register we are storing.
24089 Pass it in to be used on the appropriate note. */
24090 if (!BYTES_BIG_ENDIAN
24091 && GET_CODE (PATTERN (insn)) == SET
24092 && GET_CODE (SET_SRC (PATTERN (insn))) == VEC_SELECT)
24093 split_reg = savereg;
24094 else
24095 split_reg = NULL_RTX;
24096
24097 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
24098 areg, GEN_INT (offset), split_reg);
24099 }
24100 }
24101
24102 /* VRSAVE is a bit vector representing which AltiVec registers
24103 are used. The OS uses this to determine which vector
24104 registers to save on a context switch. We need to save
24105 VRSAVE on the stack frame, add whatever AltiVec registers we
24106 used in this function, and do the corresponding magic in the
24107 epilogue. */
24108
24109 if (!WORLD_SAVE_P (info)
24110 && TARGET_ALTIVEC
24111 && TARGET_ALTIVEC_VRSAVE
24112 && info->vrsave_mask != 0)
24113 {
24114 rtx reg, vrsave;
24115 int offset;
24116 int save_regno;
24117
24118 /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might
24119 be using r12 as frame_reg_rtx and r11 as the static chain
24120 pointer for nested functions. */
24121 save_regno = 12;
24122 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
24123 && !using_static_chain_p)
24124 save_regno = 11;
24125 else if (REGNO (frame_reg_rtx) == 12)
24126 {
24127 save_regno = 11;
24128 if (using_static_chain_p)
24129 save_regno = 0;
24130 }
24131
24132 NOT_INUSE (save_regno);
24133 reg = gen_rtx_REG (SImode, save_regno);
24134 vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
24135 if (TARGET_MACHO)
24136 emit_insn (gen_get_vrsave_internal (reg));
24137 else
24138 emit_insn (gen_rtx_SET (VOIDmode, reg, vrsave));
24139
24140 /* Save VRSAVE. */
24141 offset = info->vrsave_save_offset + frame_off;
24142 insn = emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
24143
24144 /* Include the registers in the mask. */
24145 emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask)));
24146
24147 insn = emit_insn (generate_set_vrsave (reg, info, 0));
24148 }
24149
24150 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
24151 if (!TARGET_SINGLE_PIC_BASE
24152 && ((TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
24153 || (DEFAULT_ABI == ABI_V4
24154 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
24155 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
24156 {
24157 /* If emit_load_toc_table will use the link register, we need to save
24158 it. We use R12 for this purpose because emit_load_toc_table
24159 can use register 0. This allows us to use a plain 'blr' to return
24160 from the procedure more often. */
24161 int save_LR_around_toc_setup = (TARGET_ELF
24162 && DEFAULT_ABI == ABI_V4
24163 && flag_pic
24164 && ! info->lr_save_p
24165 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
24166 if (save_LR_around_toc_setup)
24167 {
24168 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
24169 rtx tmp = gen_rtx_REG (Pmode, 12);
24170
24171 insn = emit_move_insn (tmp, lr);
24172 RTX_FRAME_RELATED_P (insn) = 1;
24173
24174 rs6000_emit_load_toc_table (TRUE);
24175
24176 insn = emit_move_insn (lr, tmp);
24177 add_reg_note (insn, REG_CFA_RESTORE, lr);
24178 RTX_FRAME_RELATED_P (insn) = 1;
24179 }
24180 else
24181 rs6000_emit_load_toc_table (TRUE);
24182 }
24183
24184 #if TARGET_MACHO
24185 if (!TARGET_SINGLE_PIC_BASE
24186 && DEFAULT_ABI == ABI_DARWIN
24187 && flag_pic && crtl->uses_pic_offset_table)
24188 {
24189 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
24190 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
24191
24192 /* Save and restore LR locally around this call (in R0). */
24193 if (!info->lr_save_p)
24194 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
24195
24196 emit_insn (gen_load_macho_picbase (src));
24197
24198 emit_move_insn (gen_rtx_REG (Pmode,
24199 RS6000_PIC_OFFSET_TABLE_REGNUM),
24200 lr);
24201
24202 if (!info->lr_save_p)
24203 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
24204 }
24205 #endif
24206
24207 /* If we need to, save the TOC register after doing the stack setup.
24208 Do not emit eh frame info for this save. The unwinder wants info,
24209 conceptually attached to instructions in this function, about
24210 register values in the caller of this function. This R2 may have
24211 already been changed from the value in the caller.
24212 We don't attempt to write accurate DWARF EH frame info for R2
24213 because code emitted by gcc for a (non-pointer) function call
24214 doesn't save and restore R2. Instead, R2 is managed out-of-line
24215 by a linker generated plt call stub when the function resides in
24216 a shared library. This behaviour is costly to describe in DWARF,
24217 both in terms of the size of DWARF info and the time taken in the
24218 unwinder to interpret it. R2 changes, apart from the
24219 calls_eh_return case earlier in this function, are handled by
24220 linux-unwind.h frob_update_context. */
24221 if (rs6000_save_toc_in_prologue_p ())
24222 {
24223 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
24224 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
24225 }
24226 }
24227
24228 /* Write function prologue. */
24229
24230 static void
24231 rs6000_output_function_prologue (FILE *file,
24232 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
24233 {
24234 rs6000_stack_t *info = rs6000_stack_info ();
24235
24236 if (TARGET_DEBUG_STACK)
24237 debug_stack_info (info);
24238
24239 /* Write .extern for any function we will call to save and restore
24240 fp values. */
24241 if (info->first_fp_reg_save < 64
24242 && !TARGET_MACHO
24243 && !TARGET_ELF)
24244 {
24245 char *name;
24246 int regno = info->first_fp_reg_save - 32;
24247
24248 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
24249 {
24250 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
24251 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
24252 name = rs6000_savres_routine_name (info, regno, sel);
24253 fprintf (file, "\t.extern %s\n", name);
24254 }
24255 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
24256 {
24257 bool lr = (info->savres_strategy
24258 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
24259 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
24260 name = rs6000_savres_routine_name (info, regno, sel);
24261 fprintf (file, "\t.extern %s\n", name);
24262 }
24263 }
24264
24265 /* ELFv2 ABI r2 setup code and local entry point. This must follow
24266 immediately after the global entry point label. */
24267 if (DEFAULT_ABI == ABI_ELFv2 && cfun->machine->r2_setup_needed)
24268 {
24269 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
24270
24271 fprintf (file, "0:\taddis 2,12,.TOC.-0b@ha\n");
24272 fprintf (file, "\taddi 2,2,.TOC.-0b@l\n");
24273
24274 fputs ("\t.localentry\t", file);
24275 assemble_name (file, name);
24276 fputs (",.-", file);
24277 assemble_name (file, name);
24278 fputs ("\n", file);
24279 }
24280
24281 /* Output -mprofile-kernel code. This needs to be done here instead of
24282 in output_function_profile since it must go after the ELFv2 ABI
24283 local entry point. */
24284 if (TARGET_PROFILE_KERNEL && crtl->profile)
24285 {
24286 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
24287 gcc_assert (!TARGET_32BIT);
24288
24289 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
24290 asm_fprintf (file, "\tstd %s,16(%s)\n", reg_names[0], reg_names[1]);
24291
24292 /* In the ELFv2 ABI we have no compiler stack word. It must be
24293 the resposibility of _mcount to preserve the static chain
24294 register if required. */
24295 if (DEFAULT_ABI != ABI_ELFv2
24296 && cfun->static_chain_decl != NULL)
24297 {
24298 asm_fprintf (file, "\tstd %s,24(%s)\n",
24299 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
24300 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
24301 asm_fprintf (file, "\tld %s,24(%s)\n",
24302 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
24303 }
24304 else
24305 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
24306 }
24307
24308 rs6000_pic_labelno++;
24309 }
24310
24311 /* Non-zero if vmx regs are restored before the frame pop, zero if
24312 we restore after the pop when possible. */
24313 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
24314
24315 /* Restoring cr is a two step process: loading a reg from the frame
24316 save, then moving the reg to cr. For ABI_V4 we must let the
24317 unwinder know that the stack location is no longer valid at or
24318 before the stack deallocation, but we can't emit a cfa_restore for
24319 cr at the stack deallocation like we do for other registers.
24320 The trouble is that it is possible for the move to cr to be
24321 scheduled after the stack deallocation. So say exactly where cr
24322 is located on each of the two insns. */
24323
24324 static rtx
24325 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
24326 {
24327 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
24328 rtx reg = gen_rtx_REG (SImode, regno);
24329 rtx_insn *insn = emit_move_insn (reg, mem);
24330
24331 if (!exit_func && DEFAULT_ABI == ABI_V4)
24332 {
24333 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
24334 rtx set = gen_rtx_SET (VOIDmode, reg, cr);
24335
24336 add_reg_note (insn, REG_CFA_REGISTER, set);
24337 RTX_FRAME_RELATED_P (insn) = 1;
24338 }
24339 return reg;
24340 }
24341
24342 /* Reload CR from REG. */
24343
24344 static void
24345 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
24346 {
24347 int count = 0;
24348 int i;
24349
24350 if (using_mfcr_multiple)
24351 {
24352 for (i = 0; i < 8; i++)
24353 if (save_reg_p (CR0_REGNO + i))
24354 count++;
24355 gcc_assert (count);
24356 }
24357
24358 if (using_mfcr_multiple && count > 1)
24359 {
24360 rtx_insn *insn;
24361 rtvec p;
24362 int ndx;
24363
24364 p = rtvec_alloc (count);
24365
24366 ndx = 0;
24367 for (i = 0; i < 8; i++)
24368 if (save_reg_p (CR0_REGNO + i))
24369 {
24370 rtvec r = rtvec_alloc (2);
24371 RTVEC_ELT (r, 0) = reg;
24372 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
24373 RTVEC_ELT (p, ndx) =
24374 gen_rtx_SET (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i),
24375 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
24376 ndx++;
24377 }
24378 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
24379 gcc_assert (ndx == count);
24380
24381 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
24382 CR field separately. */
24383 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
24384 {
24385 for (i = 0; i < 8; i++)
24386 if (save_reg_p (CR0_REGNO + i))
24387 add_reg_note (insn, REG_CFA_RESTORE,
24388 gen_rtx_REG (SImode, CR0_REGNO + i));
24389
24390 RTX_FRAME_RELATED_P (insn) = 1;
24391 }
24392 }
24393 else
24394 for (i = 0; i < 8; i++)
24395 if (save_reg_p (CR0_REGNO + i))
24396 {
24397 rtx insn = emit_insn (gen_movsi_to_cr_one
24398 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
24399
24400 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
24401 CR field separately, attached to the insn that in fact
24402 restores this particular CR field. */
24403 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
24404 {
24405 add_reg_note (insn, REG_CFA_RESTORE,
24406 gen_rtx_REG (SImode, CR0_REGNO + i));
24407
24408 RTX_FRAME_RELATED_P (insn) = 1;
24409 }
24410 }
24411
24412 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
24413 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
24414 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
24415 {
24416 rtx_insn *insn = get_last_insn ();
24417 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
24418
24419 add_reg_note (insn, REG_CFA_RESTORE, cr);
24420 RTX_FRAME_RELATED_P (insn) = 1;
24421 }
24422 }
24423
24424 /* Like cr, the move to lr instruction can be scheduled after the
24425 stack deallocation, but unlike cr, its stack frame save is still
24426 valid. So we only need to emit the cfa_restore on the correct
24427 instruction. */
24428
24429 static void
24430 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
24431 {
24432 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
24433 rtx reg = gen_rtx_REG (Pmode, regno);
24434
24435 emit_move_insn (reg, mem);
24436 }
24437
24438 static void
24439 restore_saved_lr (int regno, bool exit_func)
24440 {
24441 rtx reg = gen_rtx_REG (Pmode, regno);
24442 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
24443 rtx_insn *insn = emit_move_insn (lr, reg);
24444
24445 if (!exit_func && flag_shrink_wrap)
24446 {
24447 add_reg_note (insn, REG_CFA_RESTORE, lr);
24448 RTX_FRAME_RELATED_P (insn) = 1;
24449 }
24450 }
24451
24452 static rtx
24453 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
24454 {
24455 if (DEFAULT_ABI == ABI_ELFv2)
24456 {
24457 int i;
24458 for (i = 0; i < 8; i++)
24459 if (save_reg_p (CR0_REGNO + i))
24460 {
24461 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
24462 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
24463 cfa_restores);
24464 }
24465 }
24466 else if (info->cr_save_p)
24467 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
24468 gen_rtx_REG (SImode, CR2_REGNO),
24469 cfa_restores);
24470
24471 if (info->lr_save_p)
24472 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
24473 gen_rtx_REG (Pmode, LR_REGNO),
24474 cfa_restores);
24475 return cfa_restores;
24476 }
24477
24478 /* Return true if OFFSET from stack pointer can be clobbered by signals.
24479 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
24480 below stack pointer not cloberred by signals. */
24481
24482 static inline bool
24483 offset_below_red_zone_p (HOST_WIDE_INT offset)
24484 {
24485 return offset < (DEFAULT_ABI == ABI_V4
24486 ? 0
24487 : TARGET_32BIT ? -220 : -288);
24488 }
24489
24490 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
24491
24492 static void
24493 emit_cfa_restores (rtx cfa_restores)
24494 {
24495 rtx_insn *insn = get_last_insn ();
24496 rtx *loc = &REG_NOTES (insn);
24497
24498 while (*loc)
24499 loc = &XEXP (*loc, 1);
24500 *loc = cfa_restores;
24501 RTX_FRAME_RELATED_P (insn) = 1;
24502 }
24503
24504 /* Emit function epilogue as insns. */
24505
24506 void
24507 rs6000_emit_epilogue (int sibcall)
24508 {
24509 rs6000_stack_t *info;
24510 int restoring_GPRs_inline;
24511 int restoring_FPRs_inline;
24512 int using_load_multiple;
24513 int using_mtcr_multiple;
24514 int use_backchain_to_restore_sp;
24515 int restore_lr;
24516 int strategy;
24517 HOST_WIDE_INT frame_off = 0;
24518 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
24519 rtx frame_reg_rtx = sp_reg_rtx;
24520 rtx cfa_restores = NULL_RTX;
24521 rtx insn;
24522 rtx cr_save_reg = NULL_RTX;
24523 machine_mode reg_mode = Pmode;
24524 int reg_size = TARGET_32BIT ? 4 : 8;
24525 int i;
24526 bool exit_func;
24527 unsigned ptr_regno;
24528
24529 info = rs6000_stack_info ();
24530
24531 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
24532 {
24533 reg_mode = V2SImode;
24534 reg_size = 8;
24535 }
24536
24537 strategy = info->savres_strategy;
24538 using_load_multiple = strategy & SAVRES_MULTIPLE;
24539 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
24540 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
24541 using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
24542 || rs6000_cpu == PROCESSOR_PPC603
24543 || rs6000_cpu == PROCESSOR_PPC750
24544 || optimize_size);
24545 /* Restore via the backchain when we have a large frame, since this
24546 is more efficient than an addis, addi pair. The second condition
24547 here will not trigger at the moment; We don't actually need a
24548 frame pointer for alloca, but the generic parts of the compiler
24549 give us one anyway. */
24550 use_backchain_to_restore_sp = (info->total_size > 32767 - info->lr_save_offset
24551 || (cfun->calls_alloca
24552 && !frame_pointer_needed));
24553 restore_lr = (info->lr_save_p
24554 && (restoring_FPRs_inline
24555 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
24556 && (restoring_GPRs_inline
24557 || info->first_fp_reg_save < 64));
24558
24559 if (WORLD_SAVE_P (info))
24560 {
24561 int i, j;
24562 char rname[30];
24563 const char *alloc_rname;
24564 rtvec p;
24565
24566 /* eh_rest_world_r10 will return to the location saved in the LR
24567 stack slot (which is not likely to be our caller.)
24568 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
24569 rest_world is similar, except any R10 parameter is ignored.
24570 The exception-handling stuff that was here in 2.95 is no
24571 longer necessary. */
24572
24573 p = rtvec_alloc (9
24574 + 1
24575 + 32 - info->first_gp_reg_save
24576 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
24577 + 63 + 1 - info->first_fp_reg_save);
24578
24579 strcpy (rname, ((crtl->calls_eh_return) ?
24580 "*eh_rest_world_r10" : "*rest_world"));
24581 alloc_rname = ggc_strdup (rname);
24582
24583 j = 0;
24584 RTVEC_ELT (p, j++) = ret_rtx;
24585 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
24586 gen_rtx_REG (Pmode,
24587 LR_REGNO));
24588 RTVEC_ELT (p, j++)
24589 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
24590 /* The instruction pattern requires a clobber here;
24591 it is shared with the restVEC helper. */
24592 RTVEC_ELT (p, j++)
24593 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
24594
24595 {
24596 /* CR register traditionally saved as CR2. */
24597 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
24598 RTVEC_ELT (p, j++)
24599 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
24600 if (flag_shrink_wrap)
24601 {
24602 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
24603 gen_rtx_REG (Pmode, LR_REGNO),
24604 cfa_restores);
24605 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24606 }
24607 }
24608
24609 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
24610 {
24611 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
24612 RTVEC_ELT (p, j++)
24613 = gen_frame_load (reg,
24614 frame_reg_rtx, info->gp_save_offset + reg_size * i);
24615 if (flag_shrink_wrap)
24616 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24617 }
24618 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
24619 {
24620 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
24621 RTVEC_ELT (p, j++)
24622 = gen_frame_load (reg,
24623 frame_reg_rtx, info->altivec_save_offset + 16 * i);
24624 if (flag_shrink_wrap)
24625 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24626 }
24627 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
24628 {
24629 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
24630 ? DFmode : SFmode),
24631 info->first_fp_reg_save + i);
24632 RTVEC_ELT (p, j++)
24633 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
24634 if (flag_shrink_wrap)
24635 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24636 }
24637 RTVEC_ELT (p, j++)
24638 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
24639 RTVEC_ELT (p, j++)
24640 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
24641 RTVEC_ELT (p, j++)
24642 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
24643 RTVEC_ELT (p, j++)
24644 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
24645 RTVEC_ELT (p, j++)
24646 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
24647 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
24648
24649 if (flag_shrink_wrap)
24650 {
24651 REG_NOTES (insn) = cfa_restores;
24652 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
24653 RTX_FRAME_RELATED_P (insn) = 1;
24654 }
24655 return;
24656 }
24657
24658 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
24659 if (info->push_p)
24660 frame_off = info->total_size;
24661
24662 /* Restore AltiVec registers if we must do so before adjusting the
24663 stack. */
24664 if (TARGET_ALTIVEC_ABI
24665 && info->altivec_size != 0
24666 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24667 || (DEFAULT_ABI != ABI_V4
24668 && offset_below_red_zone_p (info->altivec_save_offset))))
24669 {
24670 int i;
24671 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
24672
24673 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
24674 if (use_backchain_to_restore_sp)
24675 {
24676 int frame_regno = 11;
24677
24678 if ((strategy & REST_INLINE_VRS) == 0)
24679 {
24680 /* Of r11 and r12, select the one not clobbered by an
24681 out-of-line restore function for the frame register. */
24682 frame_regno = 11 + 12 - scratch_regno;
24683 }
24684 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
24685 emit_move_insn (frame_reg_rtx,
24686 gen_rtx_MEM (Pmode, sp_reg_rtx));
24687 frame_off = 0;
24688 }
24689 else if (frame_pointer_needed)
24690 frame_reg_rtx = hard_frame_pointer_rtx;
24691
24692 if ((strategy & REST_INLINE_VRS) == 0)
24693 {
24694 int end_save = info->altivec_save_offset + info->altivec_size;
24695 int ptr_off;
24696 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
24697 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
24698
24699 if (end_save + frame_off != 0)
24700 {
24701 rtx offset = GEN_INT (end_save + frame_off);
24702
24703 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
24704 }
24705 else
24706 emit_move_insn (ptr_reg, frame_reg_rtx);
24707
24708 ptr_off = -end_save;
24709 insn = rs6000_emit_savres_rtx (info, scratch_reg,
24710 info->altivec_save_offset + ptr_off,
24711 0, V4SImode, SAVRES_VR);
24712 }
24713 else
24714 {
24715 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24716 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
24717 {
24718 rtx addr, areg, mem, reg;
24719
24720 areg = gen_rtx_REG (Pmode, 0);
24721 emit_move_insn
24722 (areg, GEN_INT (info->altivec_save_offset
24723 + frame_off
24724 + 16 * (i - info->first_altivec_reg_save)));
24725
24726 /* AltiVec addressing mode is [reg+reg]. */
24727 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
24728 mem = gen_frame_mem (V4SImode, addr);
24729
24730 reg = gen_rtx_REG (V4SImode, i);
24731 emit_move_insn (reg, mem);
24732 }
24733 }
24734
24735 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24736 if (((strategy & REST_INLINE_VRS) == 0
24737 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
24738 && (flag_shrink_wrap
24739 || (offset_below_red_zone_p
24740 (info->altivec_save_offset
24741 + 16 * (i - info->first_altivec_reg_save)))))
24742 {
24743 rtx reg = gen_rtx_REG (V4SImode, i);
24744 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24745 }
24746 }
24747
24748 /* Restore VRSAVE if we must do so before adjusting the stack. */
24749 if (TARGET_ALTIVEC
24750 && TARGET_ALTIVEC_VRSAVE
24751 && info->vrsave_mask != 0
24752 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24753 || (DEFAULT_ABI != ABI_V4
24754 && offset_below_red_zone_p (info->vrsave_save_offset))))
24755 {
24756 rtx reg;
24757
24758 if (frame_reg_rtx == sp_reg_rtx)
24759 {
24760 if (use_backchain_to_restore_sp)
24761 {
24762 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
24763 emit_move_insn (frame_reg_rtx,
24764 gen_rtx_MEM (Pmode, sp_reg_rtx));
24765 frame_off = 0;
24766 }
24767 else if (frame_pointer_needed)
24768 frame_reg_rtx = hard_frame_pointer_rtx;
24769 }
24770
24771 reg = gen_rtx_REG (SImode, 12);
24772 emit_insn (gen_frame_load (reg, frame_reg_rtx,
24773 info->vrsave_save_offset + frame_off));
24774
24775 emit_insn (generate_set_vrsave (reg, info, 1));
24776 }
24777
24778 insn = NULL_RTX;
24779 /* If we have a large stack frame, restore the old stack pointer
24780 using the backchain. */
24781 if (use_backchain_to_restore_sp)
24782 {
24783 if (frame_reg_rtx == sp_reg_rtx)
24784 {
24785 /* Under V.4, don't reset the stack pointer until after we're done
24786 loading the saved registers. */
24787 if (DEFAULT_ABI == ABI_V4)
24788 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
24789
24790 insn = emit_move_insn (frame_reg_rtx,
24791 gen_rtx_MEM (Pmode, sp_reg_rtx));
24792 frame_off = 0;
24793 }
24794 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24795 && DEFAULT_ABI == ABI_V4)
24796 /* frame_reg_rtx has been set up by the altivec restore. */
24797 ;
24798 else
24799 {
24800 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
24801 frame_reg_rtx = sp_reg_rtx;
24802 }
24803 }
24804 /* If we have a frame pointer, we can restore the old stack pointer
24805 from it. */
24806 else if (frame_pointer_needed)
24807 {
24808 frame_reg_rtx = sp_reg_rtx;
24809 if (DEFAULT_ABI == ABI_V4)
24810 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
24811 /* Prevent reordering memory accesses against stack pointer restore. */
24812 else if (cfun->calls_alloca
24813 || offset_below_red_zone_p (-info->total_size))
24814 rs6000_emit_stack_tie (frame_reg_rtx, true);
24815
24816 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
24817 GEN_INT (info->total_size)));
24818 frame_off = 0;
24819 }
24820 else if (info->push_p
24821 && DEFAULT_ABI != ABI_V4
24822 && !crtl->calls_eh_return)
24823 {
24824 /* Prevent reordering memory accesses against stack pointer restore. */
24825 if (cfun->calls_alloca
24826 || offset_below_red_zone_p (-info->total_size))
24827 rs6000_emit_stack_tie (frame_reg_rtx, false);
24828 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
24829 GEN_INT (info->total_size)));
24830 frame_off = 0;
24831 }
24832 if (insn && frame_reg_rtx == sp_reg_rtx)
24833 {
24834 if (cfa_restores)
24835 {
24836 REG_NOTES (insn) = cfa_restores;
24837 cfa_restores = NULL_RTX;
24838 }
24839 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
24840 RTX_FRAME_RELATED_P (insn) = 1;
24841 }
24842
24843 /* Restore AltiVec registers if we have not done so already. */
24844 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24845 && TARGET_ALTIVEC_ABI
24846 && info->altivec_size != 0
24847 && (DEFAULT_ABI == ABI_V4
24848 || !offset_below_red_zone_p (info->altivec_save_offset)))
24849 {
24850 int i;
24851
24852 if ((strategy & REST_INLINE_VRS) == 0)
24853 {
24854 int end_save = info->altivec_save_offset + info->altivec_size;
24855 int ptr_off;
24856 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
24857 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
24858 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
24859
24860 if (end_save + frame_off != 0)
24861 {
24862 rtx offset = GEN_INT (end_save + frame_off);
24863
24864 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
24865 }
24866 else
24867 emit_move_insn (ptr_reg, frame_reg_rtx);
24868
24869 ptr_off = -end_save;
24870 insn = rs6000_emit_savres_rtx (info, scratch_reg,
24871 info->altivec_save_offset + ptr_off,
24872 0, V4SImode, SAVRES_VR);
24873 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
24874 {
24875 /* Frame reg was clobbered by out-of-line save. Restore it
24876 from ptr_reg, and if we are calling out-of-line gpr or
24877 fpr restore set up the correct pointer and offset. */
24878 unsigned newptr_regno = 1;
24879 if (!restoring_GPRs_inline)
24880 {
24881 bool lr = info->gp_save_offset + info->gp_size == 0;
24882 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
24883 newptr_regno = ptr_regno_for_savres (sel);
24884 end_save = info->gp_save_offset + info->gp_size;
24885 }
24886 else if (!restoring_FPRs_inline)
24887 {
24888 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
24889 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
24890 newptr_regno = ptr_regno_for_savres (sel);
24891 end_save = info->gp_save_offset + info->gp_size;
24892 }
24893
24894 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
24895 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
24896
24897 if (end_save + ptr_off != 0)
24898 {
24899 rtx offset = GEN_INT (end_save + ptr_off);
24900
24901 frame_off = -end_save;
24902 emit_insn (gen_add3_insn (frame_reg_rtx, ptr_reg, offset));
24903 }
24904 else
24905 {
24906 frame_off = ptr_off;
24907 emit_move_insn (frame_reg_rtx, ptr_reg);
24908 }
24909 }
24910 }
24911 else
24912 {
24913 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24914 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
24915 {
24916 rtx addr, areg, mem, reg;
24917
24918 areg = gen_rtx_REG (Pmode, 0);
24919 emit_move_insn
24920 (areg, GEN_INT (info->altivec_save_offset
24921 + frame_off
24922 + 16 * (i - info->first_altivec_reg_save)));
24923
24924 /* AltiVec addressing mode is [reg+reg]. */
24925 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
24926 mem = gen_frame_mem (V4SImode, addr);
24927
24928 reg = gen_rtx_REG (V4SImode, i);
24929 emit_move_insn (reg, mem);
24930 }
24931 }
24932
24933 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24934 if (((strategy & REST_INLINE_VRS) == 0
24935 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
24936 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
24937 {
24938 rtx reg = gen_rtx_REG (V4SImode, i);
24939 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24940 }
24941 }
24942
24943 /* Restore VRSAVE if we have not done so already. */
24944 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24945 && TARGET_ALTIVEC
24946 && TARGET_ALTIVEC_VRSAVE
24947 && info->vrsave_mask != 0
24948 && (DEFAULT_ABI == ABI_V4
24949 || !offset_below_red_zone_p (info->vrsave_save_offset)))
24950 {
24951 rtx reg;
24952
24953 reg = gen_rtx_REG (SImode, 12);
24954 emit_insn (gen_frame_load (reg, frame_reg_rtx,
24955 info->vrsave_save_offset + frame_off));
24956
24957 emit_insn (generate_set_vrsave (reg, info, 1));
24958 }
24959
24960 /* If we exit by an out-of-line restore function on ABI_V4 then that
24961 function will deallocate the stack, so we don't need to worry
24962 about the unwinder restoring cr from an invalid stack frame
24963 location. */
24964 exit_func = (!restoring_FPRs_inline
24965 || (!restoring_GPRs_inline
24966 && info->first_fp_reg_save == 64));
24967
24968 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
24969 *separate* slots if the routine calls __builtin_eh_return, so
24970 that they can be independently restored by the unwinder. */
24971 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
24972 {
24973 int i, cr_off = info->ehcr_offset;
24974
24975 for (i = 0; i < 8; i++)
24976 if (!call_used_regs[CR0_REGNO + i])
24977 {
24978 rtx reg = gen_rtx_REG (SImode, 0);
24979 emit_insn (gen_frame_load (reg, frame_reg_rtx,
24980 cr_off + frame_off));
24981
24982 insn = emit_insn (gen_movsi_to_cr_one
24983 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
24984
24985 if (!exit_func && flag_shrink_wrap)
24986 {
24987 add_reg_note (insn, REG_CFA_RESTORE,
24988 gen_rtx_REG (SImode, CR0_REGNO + i));
24989
24990 RTX_FRAME_RELATED_P (insn) = 1;
24991 }
24992
24993 cr_off += reg_size;
24994 }
24995 }
24996
24997 /* Get the old lr if we saved it. If we are restoring registers
24998 out-of-line, then the out-of-line routines can do this for us. */
24999 if (restore_lr && restoring_GPRs_inline)
25000 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
25001
25002 /* Get the old cr if we saved it. */
25003 if (info->cr_save_p)
25004 {
25005 unsigned cr_save_regno = 12;
25006
25007 if (!restoring_GPRs_inline)
25008 {
25009 /* Ensure we don't use the register used by the out-of-line
25010 gpr register restore below. */
25011 bool lr = info->gp_save_offset + info->gp_size == 0;
25012 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
25013 int gpr_ptr_regno = ptr_regno_for_savres (sel);
25014
25015 if (gpr_ptr_regno == 12)
25016 cr_save_regno = 11;
25017 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
25018 }
25019 else if (REGNO (frame_reg_rtx) == 12)
25020 cr_save_regno = 11;
25021
25022 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
25023 info->cr_save_offset + frame_off,
25024 exit_func);
25025 }
25026
25027 /* Set LR here to try to overlap restores below. */
25028 if (restore_lr && restoring_GPRs_inline)
25029 restore_saved_lr (0, exit_func);
25030
25031 /* Load exception handler data registers, if needed. */
25032 if (crtl->calls_eh_return)
25033 {
25034 unsigned int i, regno;
25035
25036 if (TARGET_AIX)
25037 {
25038 rtx reg = gen_rtx_REG (reg_mode, 2);
25039 emit_insn (gen_frame_load (reg, frame_reg_rtx,
25040 frame_off + RS6000_TOC_SAVE_SLOT));
25041 }
25042
25043 for (i = 0; ; ++i)
25044 {
25045 rtx mem;
25046
25047 regno = EH_RETURN_DATA_REGNO (i);
25048 if (regno == INVALID_REGNUM)
25049 break;
25050
25051 /* Note: possible use of r0 here to address SPE regs. */
25052 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
25053 info->ehrd_offset + frame_off
25054 + reg_size * (int) i);
25055
25056 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
25057 }
25058 }
25059
25060 /* Restore GPRs. This is done as a PARALLEL if we are using
25061 the load-multiple instructions. */
25062 if (TARGET_SPE_ABI
25063 && info->spe_64bit_regs_used
25064 && info->first_gp_reg_save != 32)
25065 {
25066 /* Determine whether we can address all of the registers that need
25067 to be saved with an offset from frame_reg_rtx that fits in
25068 the small const field for SPE memory instructions. */
25069 int spe_regs_addressable
25070 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
25071 + reg_size * (32 - info->first_gp_reg_save - 1))
25072 && restoring_GPRs_inline);
25073
25074 if (!spe_regs_addressable)
25075 {
25076 int ool_adjust = 0;
25077 rtx old_frame_reg_rtx = frame_reg_rtx;
25078 /* Make r11 point to the start of the SPE save area. We worried about
25079 not clobbering it when we were saving registers in the prologue.
25080 There's no need to worry here because the static chain is passed
25081 anew to every function. */
25082
25083 if (!restoring_GPRs_inline)
25084 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
25085 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
25086 emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx,
25087 GEN_INT (info->spe_gp_save_offset
25088 + frame_off
25089 - ool_adjust)));
25090 /* Keep the invariant that frame_reg_rtx + frame_off points
25091 at the top of the stack frame. */
25092 frame_off = -info->spe_gp_save_offset + ool_adjust;
25093 }
25094
25095 if (restoring_GPRs_inline)
25096 {
25097 HOST_WIDE_INT spe_offset = info->spe_gp_save_offset + frame_off;
25098
25099 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25100 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
25101 {
25102 rtx offset, addr, mem, reg;
25103
25104 /* We're doing all this to ensure that the immediate offset
25105 fits into the immediate field of 'evldd'. */
25106 gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i));
25107
25108 offset = GEN_INT (spe_offset + reg_size * i);
25109 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset);
25110 mem = gen_rtx_MEM (V2SImode, addr);
25111 reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
25112
25113 emit_move_insn (reg, mem);
25114 }
25115 }
25116 else
25117 rs6000_emit_savres_rtx (info, frame_reg_rtx,
25118 info->spe_gp_save_offset + frame_off,
25119 info->lr_save_offset + frame_off,
25120 reg_mode,
25121 SAVRES_GPR | SAVRES_LR);
25122 }
25123 else if (!restoring_GPRs_inline)
25124 {
25125 /* We are jumping to an out-of-line function. */
25126 rtx ptr_reg;
25127 int end_save = info->gp_save_offset + info->gp_size;
25128 bool can_use_exit = end_save == 0;
25129 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
25130 int ptr_off;
25131
25132 /* Emit stack reset code if we need it. */
25133 ptr_regno = ptr_regno_for_savres (sel);
25134 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
25135 if (can_use_exit)
25136 rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
25137 else if (end_save + frame_off != 0)
25138 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
25139 GEN_INT (end_save + frame_off)));
25140 else if (REGNO (frame_reg_rtx) != ptr_regno)
25141 emit_move_insn (ptr_reg, frame_reg_rtx);
25142 if (REGNO (frame_reg_rtx) == ptr_regno)
25143 frame_off = -end_save;
25144
25145 if (can_use_exit && info->cr_save_p)
25146 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
25147
25148 ptr_off = -end_save;
25149 rs6000_emit_savres_rtx (info, ptr_reg,
25150 info->gp_save_offset + ptr_off,
25151 info->lr_save_offset + ptr_off,
25152 reg_mode, sel);
25153 }
25154 else if (using_load_multiple)
25155 {
25156 rtvec p;
25157 p = rtvec_alloc (32 - info->first_gp_reg_save);
25158 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25159 RTVEC_ELT (p, i)
25160 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
25161 frame_reg_rtx,
25162 info->gp_save_offset + frame_off + reg_size * i);
25163 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
25164 }
25165 else
25166 {
25167 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25168 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
25169 emit_insn (gen_frame_load
25170 (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
25171 frame_reg_rtx,
25172 info->gp_save_offset + frame_off + reg_size * i));
25173 }
25174
25175 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
25176 {
25177 /* If the frame pointer was used then we can't delay emitting
25178 a REG_CFA_DEF_CFA note. This must happen on the insn that
25179 restores the frame pointer, r31. We may have already emitted
25180 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
25181 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
25182 be harmless if emitted. */
25183 if (frame_pointer_needed)
25184 {
25185 insn = get_last_insn ();
25186 add_reg_note (insn, REG_CFA_DEF_CFA,
25187 plus_constant (Pmode, frame_reg_rtx, frame_off));
25188 RTX_FRAME_RELATED_P (insn) = 1;
25189 }
25190
25191 /* Set up cfa_restores. We always need these when
25192 shrink-wrapping. If not shrink-wrapping then we only need
25193 the cfa_restore when the stack location is no longer valid.
25194 The cfa_restores must be emitted on or before the insn that
25195 invalidates the stack, and of course must not be emitted
25196 before the insn that actually does the restore. The latter
25197 is why it is a bad idea to emit the cfa_restores as a group
25198 on the last instruction here that actually does a restore:
25199 That insn may be reordered with respect to others doing
25200 restores. */
25201 if (flag_shrink_wrap
25202 && !restoring_GPRs_inline
25203 && info->first_fp_reg_save == 64)
25204 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
25205
25206 for (i = info->first_gp_reg_save; i < 32; i++)
25207 if (!restoring_GPRs_inline
25208 || using_load_multiple
25209 || rs6000_reg_live_or_pic_offset_p (i))
25210 {
25211 rtx reg = gen_rtx_REG (reg_mode, i);
25212
25213 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
25214 }
25215 }
25216
25217 if (!restoring_GPRs_inline
25218 && info->first_fp_reg_save == 64)
25219 {
25220 /* We are jumping to an out-of-line function. */
25221 if (cfa_restores)
25222 emit_cfa_restores (cfa_restores);
25223 return;
25224 }
25225
25226 if (restore_lr && !restoring_GPRs_inline)
25227 {
25228 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
25229 restore_saved_lr (0, exit_func);
25230 }
25231
25232 /* Restore fpr's if we need to do it without calling a function. */
25233 if (restoring_FPRs_inline)
25234 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
25235 if (save_reg_p (info->first_fp_reg_save + i))
25236 {
25237 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
25238 ? DFmode : SFmode),
25239 info->first_fp_reg_save + i);
25240 emit_insn (gen_frame_load (reg, frame_reg_rtx,
25241 info->fp_save_offset + frame_off + 8 * i));
25242 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
25243 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
25244 }
25245
25246 /* If we saved cr, restore it here. Just those that were used. */
25247 if (info->cr_save_p)
25248 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
25249
25250 /* If this is V.4, unwind the stack pointer after all of the loads
25251 have been done, or set up r11 if we are restoring fp out of line. */
25252 ptr_regno = 1;
25253 if (!restoring_FPRs_inline)
25254 {
25255 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
25256 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
25257 ptr_regno = ptr_regno_for_savres (sel);
25258 }
25259
25260 insn = rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
25261 if (REGNO (frame_reg_rtx) == ptr_regno)
25262 frame_off = 0;
25263
25264 if (insn && restoring_FPRs_inline)
25265 {
25266 if (cfa_restores)
25267 {
25268 REG_NOTES (insn) = cfa_restores;
25269 cfa_restores = NULL_RTX;
25270 }
25271 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
25272 RTX_FRAME_RELATED_P (insn) = 1;
25273 }
25274
25275 if (crtl->calls_eh_return)
25276 {
25277 rtx sa = EH_RETURN_STACKADJ_RTX;
25278 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
25279 }
25280
25281 if (!sibcall)
25282 {
25283 rtvec p;
25284 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
25285 if (! restoring_FPRs_inline)
25286 {
25287 p = rtvec_alloc (4 + 64 - info->first_fp_reg_save);
25288 RTVEC_ELT (p, 0) = ret_rtx;
25289 }
25290 else
25291 {
25292 if (cfa_restores)
25293 {
25294 /* We can't hang the cfa_restores off a simple return,
25295 since the shrink-wrap code sometimes uses an existing
25296 return. This means there might be a path from
25297 pre-prologue code to this return, and dwarf2cfi code
25298 wants the eh_frame unwinder state to be the same on
25299 all paths to any point. So we need to emit the
25300 cfa_restores before the return. For -m64 we really
25301 don't need epilogue cfa_restores at all, except for
25302 this irritating dwarf2cfi with shrink-wrap
25303 requirement; The stack red-zone means eh_frame info
25304 from the prologue telling the unwinder to restore
25305 from the stack is perfectly good right to the end of
25306 the function. */
25307 emit_insn (gen_blockage ());
25308 emit_cfa_restores (cfa_restores);
25309 cfa_restores = NULL_RTX;
25310 }
25311 p = rtvec_alloc (2);
25312 RTVEC_ELT (p, 0) = simple_return_rtx;
25313 }
25314
25315 RTVEC_ELT (p, 1) = ((restoring_FPRs_inline || !lr)
25316 ? gen_rtx_USE (VOIDmode,
25317 gen_rtx_REG (Pmode, LR_REGNO))
25318 : gen_rtx_CLOBBER (VOIDmode,
25319 gen_rtx_REG (Pmode, LR_REGNO)));
25320
25321 /* If we have to restore more than two FP registers, branch to the
25322 restore function. It will return to our caller. */
25323 if (! restoring_FPRs_inline)
25324 {
25325 int i;
25326 int reg;
25327 rtx sym;
25328
25329 if (flag_shrink_wrap)
25330 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
25331
25332 sym = rs6000_savres_routine_sym (info,
25333 SAVRES_FPR | (lr ? SAVRES_LR : 0));
25334 RTVEC_ELT (p, 2) = gen_rtx_USE (VOIDmode, sym);
25335 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
25336 RTVEC_ELT (p, 3) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
25337
25338 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
25339 {
25340 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
25341
25342 RTVEC_ELT (p, i + 4)
25343 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
25344 if (flag_shrink_wrap)
25345 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
25346 cfa_restores);
25347 }
25348 }
25349
25350 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
25351 }
25352
25353 if (cfa_restores)
25354 {
25355 if (sibcall)
25356 /* Ensure the cfa_restores are hung off an insn that won't
25357 be reordered above other restores. */
25358 emit_insn (gen_blockage ());
25359
25360 emit_cfa_restores (cfa_restores);
25361 }
25362 }
25363
25364 /* Write function epilogue. */
25365
25366 static void
25367 rs6000_output_function_epilogue (FILE *file,
25368 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
25369 {
25370 #if TARGET_MACHO
25371 macho_branch_islands ();
25372 /* Mach-O doesn't support labels at the end of objects, so if
25373 it looks like we might want one, insert a NOP. */
25374 {
25375 rtx_insn *insn = get_last_insn ();
25376 rtx_insn *deleted_debug_label = NULL;
25377 while (insn
25378 && NOTE_P (insn)
25379 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
25380 {
25381 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
25382 notes only, instead set their CODE_LABEL_NUMBER to -1,
25383 otherwise there would be code generation differences
25384 in between -g and -g0. */
25385 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
25386 deleted_debug_label = insn;
25387 insn = PREV_INSN (insn);
25388 }
25389 if (insn
25390 && (LABEL_P (insn)
25391 || (NOTE_P (insn)
25392 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
25393 fputs ("\tnop\n", file);
25394 else if (deleted_debug_label)
25395 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
25396 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
25397 CODE_LABEL_NUMBER (insn) = -1;
25398 }
25399 #endif
25400
25401 /* Output a traceback table here. See /usr/include/sys/debug.h for info
25402 on its format.
25403
25404 We don't output a traceback table if -finhibit-size-directive was
25405 used. The documentation for -finhibit-size-directive reads
25406 ``don't output a @code{.size} assembler directive, or anything
25407 else that would cause trouble if the function is split in the
25408 middle, and the two halves are placed at locations far apart in
25409 memory.'' The traceback table has this property, since it
25410 includes the offset from the start of the function to the
25411 traceback table itself.
25412
25413 System V.4 Powerpc's (and the embedded ABI derived from it) use a
25414 different traceback table. */
25415 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25416 && ! flag_inhibit_size_directive
25417 && rs6000_traceback != traceback_none && !cfun->is_thunk)
25418 {
25419 const char *fname = NULL;
25420 const char *language_string = lang_hooks.name;
25421 int fixed_parms = 0, float_parms = 0, parm_info = 0;
25422 int i;
25423 int optional_tbtab;
25424 rs6000_stack_t *info = rs6000_stack_info ();
25425
25426 if (rs6000_traceback == traceback_full)
25427 optional_tbtab = 1;
25428 else if (rs6000_traceback == traceback_part)
25429 optional_tbtab = 0;
25430 else
25431 optional_tbtab = !optimize_size && !TARGET_ELF;
25432
25433 if (optional_tbtab)
25434 {
25435 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25436 while (*fname == '.') /* V.4 encodes . in the name */
25437 fname++;
25438
25439 /* Need label immediately before tbtab, so we can compute
25440 its offset from the function start. */
25441 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
25442 ASM_OUTPUT_LABEL (file, fname);
25443 }
25444
25445 /* The .tbtab pseudo-op can only be used for the first eight
25446 expressions, since it can't handle the possibly variable
25447 length fields that follow. However, if you omit the optional
25448 fields, the assembler outputs zeros for all optional fields
25449 anyways, giving each variable length field is minimum length
25450 (as defined in sys/debug.h). Thus we can not use the .tbtab
25451 pseudo-op at all. */
25452
25453 /* An all-zero word flags the start of the tbtab, for debuggers
25454 that have to find it by searching forward from the entry
25455 point or from the current pc. */
25456 fputs ("\t.long 0\n", file);
25457
25458 /* Tbtab format type. Use format type 0. */
25459 fputs ("\t.byte 0,", file);
25460
25461 /* Language type. Unfortunately, there does not seem to be any
25462 official way to discover the language being compiled, so we
25463 use language_string.
25464 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
25465 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
25466 a number, so for now use 9. LTO and Go aren't assigned numbers
25467 either, so for now use 0. */
25468 if (lang_GNU_C ()
25469 || ! strcmp (language_string, "GNU GIMPLE")
25470 || ! strcmp (language_string, "GNU Go"))
25471 i = 0;
25472 else if (! strcmp (language_string, "GNU F77")
25473 || ! strcmp (language_string, "GNU Fortran"))
25474 i = 1;
25475 else if (! strcmp (language_string, "GNU Pascal"))
25476 i = 2;
25477 else if (! strcmp (language_string, "GNU Ada"))
25478 i = 3;
25479 else if (lang_GNU_CXX ()
25480 || ! strcmp (language_string, "GNU Objective-C++"))
25481 i = 9;
25482 else if (! strcmp (language_string, "GNU Java"))
25483 i = 13;
25484 else if (! strcmp (language_string, "GNU Objective-C"))
25485 i = 14;
25486 else
25487 gcc_unreachable ();
25488 fprintf (file, "%d,", i);
25489
25490 /* 8 single bit fields: global linkage (not set for C extern linkage,
25491 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
25492 from start of procedure stored in tbtab, internal function, function
25493 has controlled storage, function has no toc, function uses fp,
25494 function logs/aborts fp operations. */
25495 /* Assume that fp operations are used if any fp reg must be saved. */
25496 fprintf (file, "%d,",
25497 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
25498
25499 /* 6 bitfields: function is interrupt handler, name present in
25500 proc table, function calls alloca, on condition directives
25501 (controls stack walks, 3 bits), saves condition reg, saves
25502 link reg. */
25503 /* The `function calls alloca' bit seems to be set whenever reg 31 is
25504 set up as a frame pointer, even when there is no alloca call. */
25505 fprintf (file, "%d,",
25506 ((optional_tbtab << 6)
25507 | ((optional_tbtab & frame_pointer_needed) << 5)
25508 | (info->cr_save_p << 1)
25509 | (info->lr_save_p)));
25510
25511 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
25512 (6 bits). */
25513 fprintf (file, "%d,",
25514 (info->push_p << 7) | (64 - info->first_fp_reg_save));
25515
25516 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
25517 fprintf (file, "%d,", (32 - first_reg_to_save ()));
25518
25519 if (optional_tbtab)
25520 {
25521 /* Compute the parameter info from the function decl argument
25522 list. */
25523 tree decl;
25524 int next_parm_info_bit = 31;
25525
25526 for (decl = DECL_ARGUMENTS (current_function_decl);
25527 decl; decl = DECL_CHAIN (decl))
25528 {
25529 rtx parameter = DECL_INCOMING_RTL (decl);
25530 machine_mode mode = GET_MODE (parameter);
25531
25532 if (GET_CODE (parameter) == REG)
25533 {
25534 if (SCALAR_FLOAT_MODE_P (mode))
25535 {
25536 int bits;
25537
25538 float_parms++;
25539
25540 switch (mode)
25541 {
25542 case SFmode:
25543 case SDmode:
25544 bits = 0x2;
25545 break;
25546
25547 case DFmode:
25548 case DDmode:
25549 case TFmode:
25550 case TDmode:
25551 bits = 0x3;
25552 break;
25553
25554 default:
25555 gcc_unreachable ();
25556 }
25557
25558 /* If only one bit will fit, don't or in this entry. */
25559 if (next_parm_info_bit > 0)
25560 parm_info |= (bits << (next_parm_info_bit - 1));
25561 next_parm_info_bit -= 2;
25562 }
25563 else
25564 {
25565 fixed_parms += ((GET_MODE_SIZE (mode)
25566 + (UNITS_PER_WORD - 1))
25567 / UNITS_PER_WORD);
25568 next_parm_info_bit -= 1;
25569 }
25570 }
25571 }
25572 }
25573
25574 /* Number of fixed point parameters. */
25575 /* This is actually the number of words of fixed point parameters; thus
25576 an 8 byte struct counts as 2; and thus the maximum value is 8. */
25577 fprintf (file, "%d,", fixed_parms);
25578
25579 /* 2 bitfields: number of floating point parameters (7 bits), parameters
25580 all on stack. */
25581 /* This is actually the number of fp registers that hold parameters;
25582 and thus the maximum value is 13. */
25583 /* Set parameters on stack bit if parameters are not in their original
25584 registers, regardless of whether they are on the stack? Xlc
25585 seems to set the bit when not optimizing. */
25586 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
25587
25588 if (! optional_tbtab)
25589 return;
25590
25591 /* Optional fields follow. Some are variable length. */
25592
25593 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single float,
25594 11 double float. */
25595 /* There is an entry for each parameter in a register, in the order that
25596 they occur in the parameter list. Any intervening arguments on the
25597 stack are ignored. If the list overflows a long (max possible length
25598 34 bits) then completely leave off all elements that don't fit. */
25599 /* Only emit this long if there was at least one parameter. */
25600 if (fixed_parms || float_parms)
25601 fprintf (file, "\t.long %d\n", parm_info);
25602
25603 /* Offset from start of code to tb table. */
25604 fputs ("\t.long ", file);
25605 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
25606 RS6000_OUTPUT_BASENAME (file, fname);
25607 putc ('-', file);
25608 rs6000_output_function_entry (file, fname);
25609 putc ('\n', file);
25610
25611 /* Interrupt handler mask. */
25612 /* Omit this long, since we never set the interrupt handler bit
25613 above. */
25614
25615 /* Number of CTL (controlled storage) anchors. */
25616 /* Omit this long, since the has_ctl bit is never set above. */
25617
25618 /* Displacement into stack of each CTL anchor. */
25619 /* Omit this list of longs, because there are no CTL anchors. */
25620
25621 /* Length of function name. */
25622 if (*fname == '*')
25623 ++fname;
25624 fprintf (file, "\t.short %d\n", (int) strlen (fname));
25625
25626 /* Function name. */
25627 assemble_string (fname, strlen (fname));
25628
25629 /* Register for alloca automatic storage; this is always reg 31.
25630 Only emit this if the alloca bit was set above. */
25631 if (frame_pointer_needed)
25632 fputs ("\t.byte 31\n", file);
25633
25634 fputs ("\t.align 2\n", file);
25635 }
25636 }
25637 \f
25638 /* A C compound statement that outputs the assembler code for a thunk
25639 function, used to implement C++ virtual function calls with
25640 multiple inheritance. The thunk acts as a wrapper around a virtual
25641 function, adjusting the implicit object parameter before handing
25642 control off to the real function.
25643
25644 First, emit code to add the integer DELTA to the location that
25645 contains the incoming first argument. Assume that this argument
25646 contains a pointer, and is the one used to pass the `this' pointer
25647 in C++. This is the incoming argument *before* the function
25648 prologue, e.g. `%o0' on a sparc. The addition must preserve the
25649 values of all other incoming arguments.
25650
25651 After the addition, emit code to jump to FUNCTION, which is a
25652 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
25653 not touch the return address. Hence returning from FUNCTION will
25654 return to whoever called the current `thunk'.
25655
25656 The effect must be as if FUNCTION had been called directly with the
25657 adjusted first argument. This macro is responsible for emitting
25658 all of the code for a thunk function; output_function_prologue()
25659 and output_function_epilogue() are not invoked.
25660
25661 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
25662 been extracted from it.) It might possibly be useful on some
25663 targets, but probably not.
25664
25665 If you do not define this macro, the target-independent code in the
25666 C++ frontend will generate a less efficient heavyweight thunk that
25667 calls FUNCTION instead of jumping to it. The generic approach does
25668 not support varargs. */
25669
25670 static void
25671 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
25672 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
25673 tree function)
25674 {
25675 rtx this_rtx, funexp;
25676 rtx_insn *insn;
25677
25678 reload_completed = 1;
25679 epilogue_completed = 1;
25680
25681 /* Mark the end of the (empty) prologue. */
25682 emit_note (NOTE_INSN_PROLOGUE_END);
25683
25684 /* Find the "this" pointer. If the function returns a structure,
25685 the structure return pointer is in r3. */
25686 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
25687 this_rtx = gen_rtx_REG (Pmode, 4);
25688 else
25689 this_rtx = gen_rtx_REG (Pmode, 3);
25690
25691 /* Apply the constant offset, if required. */
25692 if (delta)
25693 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
25694
25695 /* Apply the offset from the vtable, if required. */
25696 if (vcall_offset)
25697 {
25698 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
25699 rtx tmp = gen_rtx_REG (Pmode, 12);
25700
25701 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
25702 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
25703 {
25704 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
25705 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
25706 }
25707 else
25708 {
25709 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
25710
25711 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
25712 }
25713 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
25714 }
25715
25716 /* Generate a tail call to the target function. */
25717 if (!TREE_USED (function))
25718 {
25719 assemble_external (function);
25720 TREE_USED (function) = 1;
25721 }
25722 funexp = XEXP (DECL_RTL (function), 0);
25723 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
25724
25725 #if TARGET_MACHO
25726 if (MACHOPIC_INDIRECT)
25727 funexp = machopic_indirect_call_target (funexp);
25728 #endif
25729
25730 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
25731 generate sibcall RTL explicitly. */
25732 insn = emit_call_insn (
25733 gen_rtx_PARALLEL (VOIDmode,
25734 gen_rtvec (4,
25735 gen_rtx_CALL (VOIDmode,
25736 funexp, const0_rtx),
25737 gen_rtx_USE (VOIDmode, const0_rtx),
25738 gen_rtx_USE (VOIDmode,
25739 gen_rtx_REG (SImode,
25740 LR_REGNO)),
25741 simple_return_rtx)));
25742 SIBLING_CALL_P (insn) = 1;
25743 emit_barrier ();
25744
25745 /* Ensure we have a global entry point for the thunk. ??? We could
25746 avoid that if the target routine doesn't need a global entry point,
25747 but we do not know whether this is the case at this point. */
25748 if (DEFAULT_ABI == ABI_ELFv2)
25749 cfun->machine->r2_setup_needed = true;
25750
25751 /* Run just enough of rest_of_compilation to get the insns emitted.
25752 There's not really enough bulk here to make other passes such as
25753 instruction scheduling worth while. Note that use_thunk calls
25754 assemble_start_function and assemble_end_function. */
25755 insn = get_insns ();
25756 shorten_branches (insn);
25757 final_start_function (insn, file, 1);
25758 final (insn, file, 1);
25759 final_end_function ();
25760
25761 reload_completed = 0;
25762 epilogue_completed = 0;
25763 }
25764 \f
25765 /* A quick summary of the various types of 'constant-pool tables'
25766 under PowerPC:
25767
25768 Target Flags Name One table per
25769 AIX (none) AIX TOC object file
25770 AIX -mfull-toc AIX TOC object file
25771 AIX -mminimal-toc AIX minimal TOC translation unit
25772 SVR4/EABI (none) SVR4 SDATA object file
25773 SVR4/EABI -fpic SVR4 pic object file
25774 SVR4/EABI -fPIC SVR4 PIC translation unit
25775 SVR4/EABI -mrelocatable EABI TOC function
25776 SVR4/EABI -maix AIX TOC object file
25777 SVR4/EABI -maix -mminimal-toc
25778 AIX minimal TOC translation unit
25779
25780 Name Reg. Set by entries contains:
25781 made by addrs? fp? sum?
25782
25783 AIX TOC 2 crt0 as Y option option
25784 AIX minimal TOC 30 prolog gcc Y Y option
25785 SVR4 SDATA 13 crt0 gcc N Y N
25786 SVR4 pic 30 prolog ld Y not yet N
25787 SVR4 PIC 30 prolog gcc Y option option
25788 EABI TOC 30 prolog gcc Y option option
25789
25790 */
25791
25792 /* Hash functions for the hash table. */
25793
25794 static unsigned
25795 rs6000_hash_constant (rtx k)
25796 {
25797 enum rtx_code code = GET_CODE (k);
25798 machine_mode mode = GET_MODE (k);
25799 unsigned result = (code << 3) ^ mode;
25800 const char *format;
25801 int flen, fidx;
25802
25803 format = GET_RTX_FORMAT (code);
25804 flen = strlen (format);
25805 fidx = 0;
25806
25807 switch (code)
25808 {
25809 case LABEL_REF:
25810 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
25811
25812 case CONST_WIDE_INT:
25813 {
25814 int i;
25815 flen = CONST_WIDE_INT_NUNITS (k);
25816 for (i = 0; i < flen; i++)
25817 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
25818 return result;
25819 }
25820
25821 case CONST_DOUBLE:
25822 if (mode != VOIDmode)
25823 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
25824 flen = 2;
25825 break;
25826
25827 case CODE_LABEL:
25828 fidx = 3;
25829 break;
25830
25831 default:
25832 break;
25833 }
25834
25835 for (; fidx < flen; fidx++)
25836 switch (format[fidx])
25837 {
25838 case 's':
25839 {
25840 unsigned i, len;
25841 const char *str = XSTR (k, fidx);
25842 len = strlen (str);
25843 result = result * 613 + len;
25844 for (i = 0; i < len; i++)
25845 result = result * 613 + (unsigned) str[i];
25846 break;
25847 }
25848 case 'u':
25849 case 'e':
25850 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
25851 break;
25852 case 'i':
25853 case 'n':
25854 result = result * 613 + (unsigned) XINT (k, fidx);
25855 break;
25856 case 'w':
25857 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
25858 result = result * 613 + (unsigned) XWINT (k, fidx);
25859 else
25860 {
25861 size_t i;
25862 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
25863 result = result * 613 + (unsigned) (XWINT (k, fidx)
25864 >> CHAR_BIT * i);
25865 }
25866 break;
25867 case '0':
25868 break;
25869 default:
25870 gcc_unreachable ();
25871 }
25872
25873 return result;
25874 }
25875
25876 hashval_t
25877 toc_hasher::hash (toc_hash_struct *thc)
25878 {
25879 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
25880 }
25881
25882 /* Compare H1 and H2 for equivalence. */
25883
25884 bool
25885 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
25886 {
25887 rtx r1 = h1->key;
25888 rtx r2 = h2->key;
25889
25890 if (h1->key_mode != h2->key_mode)
25891 return 0;
25892
25893 return rtx_equal_p (r1, r2);
25894 }
25895
25896 /* These are the names given by the C++ front-end to vtables, and
25897 vtable-like objects. Ideally, this logic should not be here;
25898 instead, there should be some programmatic way of inquiring as
25899 to whether or not an object is a vtable. */
25900
25901 #define VTABLE_NAME_P(NAME) \
25902 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
25903 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
25904 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
25905 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
25906 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
25907
25908 #ifdef NO_DOLLAR_IN_LABEL
25909 /* Return a GGC-allocated character string translating dollar signs in
25910 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
25911
25912 const char *
25913 rs6000_xcoff_strip_dollar (const char *name)
25914 {
25915 char *strip, *p;
25916 const char *q;
25917 size_t len;
25918
25919 q = (const char *) strchr (name, '$');
25920
25921 if (q == 0 || q == name)
25922 return name;
25923
25924 len = strlen (name);
25925 strip = XALLOCAVEC (char, len + 1);
25926 strcpy (strip, name);
25927 p = strip + (q - name);
25928 while (p)
25929 {
25930 *p = '_';
25931 p = strchr (p + 1, '$');
25932 }
25933
25934 return ggc_alloc_string (strip, len);
25935 }
25936 #endif
25937
25938 void
25939 rs6000_output_symbol_ref (FILE *file, rtx x)
25940 {
25941 /* Currently C++ toc references to vtables can be emitted before it
25942 is decided whether the vtable is public or private. If this is
25943 the case, then the linker will eventually complain that there is
25944 a reference to an unknown section. Thus, for vtables only,
25945 we emit the TOC reference to reference the symbol and not the
25946 section. */
25947 const char *name = XSTR (x, 0);
25948
25949 if (VTABLE_NAME_P (name))
25950 {
25951 RS6000_OUTPUT_BASENAME (file, name);
25952 }
25953 else
25954 assemble_name (file, name);
25955 }
25956
25957 /* Output a TOC entry. We derive the entry name from what is being
25958 written. */
25959
25960 void
25961 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
25962 {
25963 char buf[256];
25964 const char *name = buf;
25965 rtx base = x;
25966 HOST_WIDE_INT offset = 0;
25967
25968 gcc_assert (!TARGET_NO_TOC);
25969
25970 /* When the linker won't eliminate them, don't output duplicate
25971 TOC entries (this happens on AIX if there is any kind of TOC,
25972 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
25973 CODE_LABELs. */
25974 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
25975 {
25976 struct toc_hash_struct *h;
25977
25978 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
25979 time because GGC is not initialized at that point. */
25980 if (toc_hash_table == NULL)
25981 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
25982
25983 h = ggc_alloc<toc_hash_struct> ();
25984 h->key = x;
25985 h->key_mode = mode;
25986 h->labelno = labelno;
25987
25988 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
25989 if (*found == NULL)
25990 *found = h;
25991 else /* This is indeed a duplicate.
25992 Set this label equal to that label. */
25993 {
25994 fputs ("\t.set ", file);
25995 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
25996 fprintf (file, "%d,", labelno);
25997 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
25998 fprintf (file, "%d\n", ((*found)->labelno));
25999
26000 #ifdef HAVE_AS_TLS
26001 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
26002 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
26003 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
26004 {
26005 fputs ("\t.set ", file);
26006 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
26007 fprintf (file, "%d,", labelno);
26008 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
26009 fprintf (file, "%d\n", ((*found)->labelno));
26010 }
26011 #endif
26012 return;
26013 }
26014 }
26015
26016 /* If we're going to put a double constant in the TOC, make sure it's
26017 aligned properly when strict alignment is on. */
26018 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
26019 && STRICT_ALIGNMENT
26020 && GET_MODE_BITSIZE (mode) >= 64
26021 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
26022 ASM_OUTPUT_ALIGN (file, 3);
26023 }
26024
26025 (*targetm.asm_out.internal_label) (file, "LC", labelno);
26026
26027 /* Handle FP constants specially. Note that if we have a minimal
26028 TOC, things we put here aren't actually in the TOC, so we can allow
26029 FP constants. */
26030 if (GET_CODE (x) == CONST_DOUBLE &&
26031 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode))
26032 {
26033 REAL_VALUE_TYPE rv;
26034 long k[4];
26035
26036 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
26037 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
26038 REAL_VALUE_TO_TARGET_DECIMAL128 (rv, k);
26039 else
26040 REAL_VALUE_TO_TARGET_LONG_DOUBLE (rv, k);
26041
26042 if (TARGET_64BIT)
26043 {
26044 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26045 fputs (DOUBLE_INT_ASM_OP, file);
26046 else
26047 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
26048 k[0] & 0xffffffff, k[1] & 0xffffffff,
26049 k[2] & 0xffffffff, k[3] & 0xffffffff);
26050 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
26051 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
26052 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
26053 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
26054 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
26055 return;
26056 }
26057 else
26058 {
26059 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26060 fputs ("\t.long ", file);
26061 else
26062 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
26063 k[0] & 0xffffffff, k[1] & 0xffffffff,
26064 k[2] & 0xffffffff, k[3] & 0xffffffff);
26065 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
26066 k[0] & 0xffffffff, k[1] & 0xffffffff,
26067 k[2] & 0xffffffff, k[3] & 0xffffffff);
26068 return;
26069 }
26070 }
26071 else if (GET_CODE (x) == CONST_DOUBLE &&
26072 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
26073 {
26074 REAL_VALUE_TYPE rv;
26075 long k[2];
26076
26077 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
26078
26079 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
26080 REAL_VALUE_TO_TARGET_DECIMAL64 (rv, k);
26081 else
26082 REAL_VALUE_TO_TARGET_DOUBLE (rv, k);
26083
26084 if (TARGET_64BIT)
26085 {
26086 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26087 fputs (DOUBLE_INT_ASM_OP, file);
26088 else
26089 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
26090 k[0] & 0xffffffff, k[1] & 0xffffffff);
26091 fprintf (file, "0x%lx%08lx\n",
26092 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
26093 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
26094 return;
26095 }
26096 else
26097 {
26098 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26099 fputs ("\t.long ", file);
26100 else
26101 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
26102 k[0] & 0xffffffff, k[1] & 0xffffffff);
26103 fprintf (file, "0x%lx,0x%lx\n",
26104 k[0] & 0xffffffff, k[1] & 0xffffffff);
26105 return;
26106 }
26107 }
26108 else if (GET_CODE (x) == CONST_DOUBLE &&
26109 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
26110 {
26111 REAL_VALUE_TYPE rv;
26112 long l;
26113
26114 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
26115 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
26116 REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
26117 else
26118 REAL_VALUE_TO_TARGET_SINGLE (rv, l);
26119
26120 if (TARGET_64BIT)
26121 {
26122 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26123 fputs (DOUBLE_INT_ASM_OP, file);
26124 else
26125 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
26126 if (WORDS_BIG_ENDIAN)
26127 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
26128 else
26129 fprintf (file, "0x%lx\n", l & 0xffffffff);
26130 return;
26131 }
26132 else
26133 {
26134 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26135 fputs ("\t.long ", file);
26136 else
26137 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
26138 fprintf (file, "0x%lx\n", l & 0xffffffff);
26139 return;
26140 }
26141 }
26142 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
26143 {
26144 unsigned HOST_WIDE_INT low;
26145 HOST_WIDE_INT high;
26146
26147 low = INTVAL (x) & 0xffffffff;
26148 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
26149
26150 /* TOC entries are always Pmode-sized, so when big-endian
26151 smaller integer constants in the TOC need to be padded.
26152 (This is still a win over putting the constants in
26153 a separate constant pool, because then we'd have
26154 to have both a TOC entry _and_ the actual constant.)
26155
26156 For a 32-bit target, CONST_INT values are loaded and shifted
26157 entirely within `low' and can be stored in one TOC entry. */
26158
26159 /* It would be easy to make this work, but it doesn't now. */
26160 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
26161
26162 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
26163 {
26164 low |= high << 32;
26165 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
26166 high = (HOST_WIDE_INT) low >> 32;
26167 low &= 0xffffffff;
26168 }
26169
26170 if (TARGET_64BIT)
26171 {
26172 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26173 fputs (DOUBLE_INT_ASM_OP, file);
26174 else
26175 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
26176 (long) high & 0xffffffff, (long) low & 0xffffffff);
26177 fprintf (file, "0x%lx%08lx\n",
26178 (long) high & 0xffffffff, (long) low & 0xffffffff);
26179 return;
26180 }
26181 else
26182 {
26183 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
26184 {
26185 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26186 fputs ("\t.long ", file);
26187 else
26188 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
26189 (long) high & 0xffffffff, (long) low & 0xffffffff);
26190 fprintf (file, "0x%lx,0x%lx\n",
26191 (long) high & 0xffffffff, (long) low & 0xffffffff);
26192 }
26193 else
26194 {
26195 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26196 fputs ("\t.long ", file);
26197 else
26198 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
26199 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
26200 }
26201 return;
26202 }
26203 }
26204
26205 if (GET_CODE (x) == CONST)
26206 {
26207 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
26208 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
26209
26210 base = XEXP (XEXP (x, 0), 0);
26211 offset = INTVAL (XEXP (XEXP (x, 0), 1));
26212 }
26213
26214 switch (GET_CODE (base))
26215 {
26216 case SYMBOL_REF:
26217 name = XSTR (base, 0);
26218 break;
26219
26220 case LABEL_REF:
26221 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
26222 CODE_LABEL_NUMBER (XEXP (base, 0)));
26223 break;
26224
26225 case CODE_LABEL:
26226 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
26227 break;
26228
26229 default:
26230 gcc_unreachable ();
26231 }
26232
26233 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26234 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
26235 else
26236 {
26237 fputs ("\t.tc ", file);
26238 RS6000_OUTPUT_BASENAME (file, name);
26239
26240 if (offset < 0)
26241 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
26242 else if (offset)
26243 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
26244
26245 /* Mark large TOC symbols on AIX with [TE] so they are mapped
26246 after other TOC symbols, reducing overflow of small TOC access
26247 to [TC] symbols. */
26248 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
26249 ? "[TE]," : "[TC],", file);
26250 }
26251
26252 /* Currently C++ toc references to vtables can be emitted before it
26253 is decided whether the vtable is public or private. If this is
26254 the case, then the linker will eventually complain that there is
26255 a TOC reference to an unknown section. Thus, for vtables only,
26256 we emit the TOC reference to reference the symbol and not the
26257 section. */
26258 if (VTABLE_NAME_P (name))
26259 {
26260 RS6000_OUTPUT_BASENAME (file, name);
26261 if (offset < 0)
26262 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
26263 else if (offset > 0)
26264 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
26265 }
26266 else
26267 output_addr_const (file, x);
26268
26269 #if HAVE_AS_TLS
26270 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF
26271 && SYMBOL_REF_TLS_MODEL (base) != 0)
26272 {
26273 if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_EXEC)
26274 fputs ("@le", file);
26275 else if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_INITIAL_EXEC)
26276 fputs ("@ie", file);
26277 /* Use global-dynamic for local-dynamic. */
26278 else if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_GLOBAL_DYNAMIC
26279 || SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_DYNAMIC)
26280 {
26281 putc ('\n', file);
26282 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
26283 fputs ("\t.tc .", file);
26284 RS6000_OUTPUT_BASENAME (file, name);
26285 fputs ("[TC],", file);
26286 output_addr_const (file, x);
26287 fputs ("@m", file);
26288 }
26289 }
26290 #endif
26291
26292 putc ('\n', file);
26293 }
26294 \f
26295 /* Output an assembler pseudo-op to write an ASCII string of N characters
26296 starting at P to FILE.
26297
26298 On the RS/6000, we have to do this using the .byte operation and
26299 write out special characters outside the quoted string.
26300 Also, the assembler is broken; very long strings are truncated,
26301 so we must artificially break them up early. */
26302
26303 void
26304 output_ascii (FILE *file, const char *p, int n)
26305 {
26306 char c;
26307 int i, count_string;
26308 const char *for_string = "\t.byte \"";
26309 const char *for_decimal = "\t.byte ";
26310 const char *to_close = NULL;
26311
26312 count_string = 0;
26313 for (i = 0; i < n; i++)
26314 {
26315 c = *p++;
26316 if (c >= ' ' && c < 0177)
26317 {
26318 if (for_string)
26319 fputs (for_string, file);
26320 putc (c, file);
26321
26322 /* Write two quotes to get one. */
26323 if (c == '"')
26324 {
26325 putc (c, file);
26326 ++count_string;
26327 }
26328
26329 for_string = NULL;
26330 for_decimal = "\"\n\t.byte ";
26331 to_close = "\"\n";
26332 ++count_string;
26333
26334 if (count_string >= 512)
26335 {
26336 fputs (to_close, file);
26337
26338 for_string = "\t.byte \"";
26339 for_decimal = "\t.byte ";
26340 to_close = NULL;
26341 count_string = 0;
26342 }
26343 }
26344 else
26345 {
26346 if (for_decimal)
26347 fputs (for_decimal, file);
26348 fprintf (file, "%d", c);
26349
26350 for_string = "\n\t.byte \"";
26351 for_decimal = ", ";
26352 to_close = "\n";
26353 count_string = 0;
26354 }
26355 }
26356
26357 /* Now close the string if we have written one. Then end the line. */
26358 if (to_close)
26359 fputs (to_close, file);
26360 }
26361 \f
26362 /* Generate a unique section name for FILENAME for a section type
26363 represented by SECTION_DESC. Output goes into BUF.
26364
26365 SECTION_DESC can be any string, as long as it is different for each
26366 possible section type.
26367
26368 We name the section in the same manner as xlc. The name begins with an
26369 underscore followed by the filename (after stripping any leading directory
26370 names) with the last period replaced by the string SECTION_DESC. If
26371 FILENAME does not contain a period, SECTION_DESC is appended to the end of
26372 the name. */
26373
26374 void
26375 rs6000_gen_section_name (char **buf, const char *filename,
26376 const char *section_desc)
26377 {
26378 const char *q, *after_last_slash, *last_period = 0;
26379 char *p;
26380 int len;
26381
26382 after_last_slash = filename;
26383 for (q = filename; *q; q++)
26384 {
26385 if (*q == '/')
26386 after_last_slash = q + 1;
26387 else if (*q == '.')
26388 last_period = q;
26389 }
26390
26391 len = strlen (after_last_slash) + strlen (section_desc) + 2;
26392 *buf = (char *) xmalloc (len);
26393
26394 p = *buf;
26395 *p++ = '_';
26396
26397 for (q = after_last_slash; *q; q++)
26398 {
26399 if (q == last_period)
26400 {
26401 strcpy (p, section_desc);
26402 p += strlen (section_desc);
26403 break;
26404 }
26405
26406 else if (ISALNUM (*q))
26407 *p++ = *q;
26408 }
26409
26410 if (last_period == 0)
26411 strcpy (p, section_desc);
26412 else
26413 *p = '\0';
26414 }
26415 \f
26416 /* Emit profile function. */
26417
26418 void
26419 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
26420 {
26421 /* Non-standard profiling for kernels, which just saves LR then calls
26422 _mcount without worrying about arg saves. The idea is to change
26423 the function prologue as little as possible as it isn't easy to
26424 account for arg save/restore code added just for _mcount. */
26425 if (TARGET_PROFILE_KERNEL)
26426 return;
26427
26428 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26429 {
26430 #ifndef NO_PROFILE_COUNTERS
26431 # define NO_PROFILE_COUNTERS 0
26432 #endif
26433 if (NO_PROFILE_COUNTERS)
26434 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
26435 LCT_NORMAL, VOIDmode, 0);
26436 else
26437 {
26438 char buf[30];
26439 const char *label_name;
26440 rtx fun;
26441
26442 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
26443 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
26444 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
26445
26446 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
26447 LCT_NORMAL, VOIDmode, 1, fun, Pmode);
26448 }
26449 }
26450 else if (DEFAULT_ABI == ABI_DARWIN)
26451 {
26452 const char *mcount_name = RS6000_MCOUNT;
26453 int caller_addr_regno = LR_REGNO;
26454
26455 /* Be conservative and always set this, at least for now. */
26456 crtl->uses_pic_offset_table = 1;
26457
26458 #if TARGET_MACHO
26459 /* For PIC code, set up a stub and collect the caller's address
26460 from r0, which is where the prologue puts it. */
26461 if (MACHOPIC_INDIRECT
26462 && crtl->uses_pic_offset_table)
26463 caller_addr_regno = 0;
26464 #endif
26465 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
26466 LCT_NORMAL, VOIDmode, 1,
26467 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
26468 }
26469 }
26470
26471 /* Write function profiler code. */
26472
26473 void
26474 output_function_profiler (FILE *file, int labelno)
26475 {
26476 char buf[100];
26477
26478 switch (DEFAULT_ABI)
26479 {
26480 default:
26481 gcc_unreachable ();
26482
26483 case ABI_V4:
26484 if (!TARGET_32BIT)
26485 {
26486 warning (0, "no profiling of 64-bit code for this ABI");
26487 return;
26488 }
26489 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
26490 fprintf (file, "\tmflr %s\n", reg_names[0]);
26491 if (NO_PROFILE_COUNTERS)
26492 {
26493 asm_fprintf (file, "\tstw %s,4(%s)\n",
26494 reg_names[0], reg_names[1]);
26495 }
26496 else if (TARGET_SECURE_PLT && flag_pic)
26497 {
26498 if (TARGET_LINK_STACK)
26499 {
26500 char name[32];
26501 get_ppc476_thunk_name (name);
26502 asm_fprintf (file, "\tbl %s\n", name);
26503 }
26504 else
26505 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
26506 asm_fprintf (file, "\tstw %s,4(%s)\n",
26507 reg_names[0], reg_names[1]);
26508 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
26509 asm_fprintf (file, "\taddis %s,%s,",
26510 reg_names[12], reg_names[12]);
26511 assemble_name (file, buf);
26512 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
26513 assemble_name (file, buf);
26514 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
26515 }
26516 else if (flag_pic == 1)
26517 {
26518 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
26519 asm_fprintf (file, "\tstw %s,4(%s)\n",
26520 reg_names[0], reg_names[1]);
26521 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
26522 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
26523 assemble_name (file, buf);
26524 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
26525 }
26526 else if (flag_pic > 1)
26527 {
26528 asm_fprintf (file, "\tstw %s,4(%s)\n",
26529 reg_names[0], reg_names[1]);
26530 /* Now, we need to get the address of the label. */
26531 if (TARGET_LINK_STACK)
26532 {
26533 char name[32];
26534 get_ppc476_thunk_name (name);
26535 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
26536 assemble_name (file, buf);
26537 fputs ("-.\n1:", file);
26538 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
26539 asm_fprintf (file, "\taddi %s,%s,4\n",
26540 reg_names[11], reg_names[11]);
26541 }
26542 else
26543 {
26544 fputs ("\tbcl 20,31,1f\n\t.long ", file);
26545 assemble_name (file, buf);
26546 fputs ("-.\n1:", file);
26547 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
26548 }
26549 asm_fprintf (file, "\tlwz %s,0(%s)\n",
26550 reg_names[0], reg_names[11]);
26551 asm_fprintf (file, "\tadd %s,%s,%s\n",
26552 reg_names[0], reg_names[0], reg_names[11]);
26553 }
26554 else
26555 {
26556 asm_fprintf (file, "\tlis %s,", reg_names[12]);
26557 assemble_name (file, buf);
26558 fputs ("@ha\n", file);
26559 asm_fprintf (file, "\tstw %s,4(%s)\n",
26560 reg_names[0], reg_names[1]);
26561 asm_fprintf (file, "\tla %s,", reg_names[0]);
26562 assemble_name (file, buf);
26563 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
26564 }
26565
26566 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
26567 fprintf (file, "\tbl %s%s\n",
26568 RS6000_MCOUNT, flag_pic ? "@plt" : "");
26569 break;
26570
26571 case ABI_AIX:
26572 case ABI_ELFv2:
26573 case ABI_DARWIN:
26574 /* Don't do anything, done in output_profile_hook (). */
26575 break;
26576 }
26577 }
26578
26579 \f
26580
26581 /* The following variable value is the last issued insn. */
26582
26583 static rtx last_scheduled_insn;
26584
26585 /* The following variable helps to balance issuing of load and
26586 store instructions */
26587
26588 static int load_store_pendulum;
26589
26590 /* Power4 load update and store update instructions are cracked into a
26591 load or store and an integer insn which are executed in the same cycle.
26592 Branches have their own dispatch slot which does not count against the
26593 GCC issue rate, but it changes the program flow so there are no other
26594 instructions to issue in this cycle. */
26595
26596 static int
26597 rs6000_variable_issue_1 (rtx_insn *insn, int more)
26598 {
26599 last_scheduled_insn = insn;
26600 if (GET_CODE (PATTERN (insn)) == USE
26601 || GET_CODE (PATTERN (insn)) == CLOBBER)
26602 {
26603 cached_can_issue_more = more;
26604 return cached_can_issue_more;
26605 }
26606
26607 if (insn_terminates_group_p (insn, current_group))
26608 {
26609 cached_can_issue_more = 0;
26610 return cached_can_issue_more;
26611 }
26612
26613 /* If no reservation, but reach here */
26614 if (recog_memoized (insn) < 0)
26615 return more;
26616
26617 if (rs6000_sched_groups)
26618 {
26619 if (is_microcoded_insn (insn))
26620 cached_can_issue_more = 0;
26621 else if (is_cracked_insn (insn))
26622 cached_can_issue_more = more > 2 ? more - 2 : 0;
26623 else
26624 cached_can_issue_more = more - 1;
26625
26626 return cached_can_issue_more;
26627 }
26628
26629 if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
26630 return 0;
26631
26632 cached_can_issue_more = more - 1;
26633 return cached_can_issue_more;
26634 }
26635
26636 static int
26637 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
26638 {
26639 int r = rs6000_variable_issue_1 (insn, more);
26640 if (verbose)
26641 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
26642 return r;
26643 }
26644
26645 /* Adjust the cost of a scheduling dependency. Return the new cost of
26646 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
26647
26648 static int
26649 rs6000_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26650 {
26651 enum attr_type attr_type;
26652
26653 if (! recog_memoized (insn))
26654 return 0;
26655
26656 switch (REG_NOTE_KIND (link))
26657 {
26658 case REG_DEP_TRUE:
26659 {
26660 /* Data dependency; DEP_INSN writes a register that INSN reads
26661 some cycles later. */
26662
26663 /* Separate a load from a narrower, dependent store. */
26664 if (rs6000_sched_groups
26665 && GET_CODE (PATTERN (insn)) == SET
26666 && GET_CODE (PATTERN (dep_insn)) == SET
26667 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
26668 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
26669 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
26670 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
26671 return cost + 14;
26672
26673 attr_type = get_attr_type (insn);
26674
26675 switch (attr_type)
26676 {
26677 case TYPE_JMPREG:
26678 /* Tell the first scheduling pass about the latency between
26679 a mtctr and bctr (and mtlr and br/blr). The first
26680 scheduling pass will not know about this latency since
26681 the mtctr instruction, which has the latency associated
26682 to it, will be generated by reload. */
26683 return 4;
26684 case TYPE_BRANCH:
26685 /* Leave some extra cycles between a compare and its
26686 dependent branch, to inhibit expensive mispredicts. */
26687 if ((rs6000_cpu_attr == CPU_PPC603
26688 || rs6000_cpu_attr == CPU_PPC604
26689 || rs6000_cpu_attr == CPU_PPC604E
26690 || rs6000_cpu_attr == CPU_PPC620
26691 || rs6000_cpu_attr == CPU_PPC630
26692 || rs6000_cpu_attr == CPU_PPC750
26693 || rs6000_cpu_attr == CPU_PPC7400
26694 || rs6000_cpu_attr == CPU_PPC7450
26695 || rs6000_cpu_attr == CPU_PPCE5500
26696 || rs6000_cpu_attr == CPU_PPCE6500
26697 || rs6000_cpu_attr == CPU_POWER4
26698 || rs6000_cpu_attr == CPU_POWER5
26699 || rs6000_cpu_attr == CPU_POWER7
26700 || rs6000_cpu_attr == CPU_POWER8
26701 || rs6000_cpu_attr == CPU_CELL)
26702 && recog_memoized (dep_insn)
26703 && (INSN_CODE (dep_insn) >= 0))
26704
26705 switch (get_attr_type (dep_insn))
26706 {
26707 case TYPE_CMP:
26708 case TYPE_FPCOMPARE:
26709 case TYPE_CR_LOGICAL:
26710 case TYPE_DELAYED_CR:
26711 return cost + 2;
26712 case TYPE_EXTS:
26713 case TYPE_MUL:
26714 if (get_attr_dot (dep_insn) == DOT_YES)
26715 return cost + 2;
26716 else
26717 break;
26718 case TYPE_SHIFT:
26719 if (get_attr_dot (dep_insn) == DOT_YES
26720 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
26721 return cost + 2;
26722 else
26723 break;
26724 default:
26725 break;
26726 }
26727 break;
26728
26729 case TYPE_STORE:
26730 case TYPE_FPSTORE:
26731 if ((rs6000_cpu == PROCESSOR_POWER6)
26732 && recog_memoized (dep_insn)
26733 && (INSN_CODE (dep_insn) >= 0))
26734 {
26735
26736 if (GET_CODE (PATTERN (insn)) != SET)
26737 /* If this happens, we have to extend this to schedule
26738 optimally. Return default for now. */
26739 return cost;
26740
26741 /* Adjust the cost for the case where the value written
26742 by a fixed point operation is used as the address
26743 gen value on a store. */
26744 switch (get_attr_type (dep_insn))
26745 {
26746 case TYPE_LOAD:
26747 case TYPE_CNTLZ:
26748 {
26749 if (! store_data_bypass_p (dep_insn, insn))
26750 return get_attr_sign_extend (dep_insn)
26751 == SIGN_EXTEND_YES ? 6 : 4;
26752 break;
26753 }
26754 case TYPE_SHIFT:
26755 {
26756 if (! store_data_bypass_p (dep_insn, insn))
26757 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
26758 6 : 3;
26759 break;
26760 }
26761 case TYPE_INTEGER:
26762 case TYPE_ADD:
26763 case TYPE_LOGICAL:
26764 case TYPE_EXTS:
26765 case TYPE_INSERT:
26766 {
26767 if (! store_data_bypass_p (dep_insn, insn))
26768 return 3;
26769 break;
26770 }
26771 case TYPE_STORE:
26772 case TYPE_FPLOAD:
26773 case TYPE_FPSTORE:
26774 {
26775 if (get_attr_update (dep_insn) == UPDATE_YES
26776 && ! store_data_bypass_p (dep_insn, insn))
26777 return 3;
26778 break;
26779 }
26780 case TYPE_MUL:
26781 {
26782 if (! store_data_bypass_p (dep_insn, insn))
26783 return 17;
26784 break;
26785 }
26786 case TYPE_DIV:
26787 {
26788 if (! store_data_bypass_p (dep_insn, insn))
26789 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
26790 break;
26791 }
26792 default:
26793 break;
26794 }
26795 }
26796 break;
26797
26798 case TYPE_LOAD:
26799 if ((rs6000_cpu == PROCESSOR_POWER6)
26800 && recog_memoized (dep_insn)
26801 && (INSN_CODE (dep_insn) >= 0))
26802 {
26803
26804 /* Adjust the cost for the case where the value written
26805 by a fixed point instruction is used within the address
26806 gen portion of a subsequent load(u)(x) */
26807 switch (get_attr_type (dep_insn))
26808 {
26809 case TYPE_LOAD:
26810 case TYPE_CNTLZ:
26811 {
26812 if (set_to_load_agen (dep_insn, insn))
26813 return get_attr_sign_extend (dep_insn)
26814 == SIGN_EXTEND_YES ? 6 : 4;
26815 break;
26816 }
26817 case TYPE_SHIFT:
26818 {
26819 if (set_to_load_agen (dep_insn, insn))
26820 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
26821 6 : 3;
26822 break;
26823 }
26824 case TYPE_INTEGER:
26825 case TYPE_ADD:
26826 case TYPE_LOGICAL:
26827 case TYPE_EXTS:
26828 case TYPE_INSERT:
26829 {
26830 if (set_to_load_agen (dep_insn, insn))
26831 return 3;
26832 break;
26833 }
26834 case TYPE_STORE:
26835 case TYPE_FPLOAD:
26836 case TYPE_FPSTORE:
26837 {
26838 if (get_attr_update (dep_insn) == UPDATE_YES
26839 && set_to_load_agen (dep_insn, insn))
26840 return 3;
26841 break;
26842 }
26843 case TYPE_MUL:
26844 {
26845 if (set_to_load_agen (dep_insn, insn))
26846 return 17;
26847 break;
26848 }
26849 case TYPE_DIV:
26850 {
26851 if (set_to_load_agen (dep_insn, insn))
26852 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
26853 break;
26854 }
26855 default:
26856 break;
26857 }
26858 }
26859 break;
26860
26861 case TYPE_FPLOAD:
26862 if ((rs6000_cpu == PROCESSOR_POWER6)
26863 && get_attr_update (insn) == UPDATE_NO
26864 && recog_memoized (dep_insn)
26865 && (INSN_CODE (dep_insn) >= 0)
26866 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
26867 return 2;
26868
26869 default:
26870 break;
26871 }
26872
26873 /* Fall out to return default cost. */
26874 }
26875 break;
26876
26877 case REG_DEP_OUTPUT:
26878 /* Output dependency; DEP_INSN writes a register that INSN writes some
26879 cycles later. */
26880 if ((rs6000_cpu == PROCESSOR_POWER6)
26881 && recog_memoized (dep_insn)
26882 && (INSN_CODE (dep_insn) >= 0))
26883 {
26884 attr_type = get_attr_type (insn);
26885
26886 switch (attr_type)
26887 {
26888 case TYPE_FP:
26889 if (get_attr_type (dep_insn) == TYPE_FP)
26890 return 1;
26891 break;
26892 case TYPE_FPLOAD:
26893 if (get_attr_update (insn) == UPDATE_NO
26894 && get_attr_type (dep_insn) == TYPE_MFFGPR)
26895 return 2;
26896 break;
26897 default:
26898 break;
26899 }
26900 }
26901 case REG_DEP_ANTI:
26902 /* Anti dependency; DEP_INSN reads a register that INSN writes some
26903 cycles later. */
26904 return 0;
26905
26906 default:
26907 gcc_unreachable ();
26908 }
26909
26910 return cost;
26911 }
26912
26913 /* Debug version of rs6000_adjust_cost. */
26914
26915 static int
26916 rs6000_debug_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn,
26917 int cost)
26918 {
26919 int ret = rs6000_adjust_cost (insn, link, dep_insn, cost);
26920
26921 if (ret != cost)
26922 {
26923 const char *dep;
26924
26925 switch (REG_NOTE_KIND (link))
26926 {
26927 default: dep = "unknown depencency"; break;
26928 case REG_DEP_TRUE: dep = "data dependency"; break;
26929 case REG_DEP_OUTPUT: dep = "output dependency"; break;
26930 case REG_DEP_ANTI: dep = "anti depencency"; break;
26931 }
26932
26933 fprintf (stderr,
26934 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
26935 "%s, insn:\n", ret, cost, dep);
26936
26937 debug_rtx (insn);
26938 }
26939
26940 return ret;
26941 }
26942
26943 /* The function returns a true if INSN is microcoded.
26944 Return false otherwise. */
26945
26946 static bool
26947 is_microcoded_insn (rtx_insn *insn)
26948 {
26949 if (!insn || !NONDEBUG_INSN_P (insn)
26950 || GET_CODE (PATTERN (insn)) == USE
26951 || GET_CODE (PATTERN (insn)) == CLOBBER)
26952 return false;
26953
26954 if (rs6000_cpu_attr == CPU_CELL)
26955 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
26956
26957 if (rs6000_sched_groups
26958 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
26959 {
26960 enum attr_type type = get_attr_type (insn);
26961 if ((type == TYPE_LOAD
26962 && get_attr_update (insn) == UPDATE_YES
26963 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
26964 || ((type == TYPE_LOAD || type == TYPE_STORE)
26965 && get_attr_update (insn) == UPDATE_YES
26966 && get_attr_indexed (insn) == INDEXED_YES)
26967 || type == TYPE_MFCR)
26968 return true;
26969 }
26970
26971 return false;
26972 }
26973
26974 /* The function returns true if INSN is cracked into 2 instructions
26975 by the processor (and therefore occupies 2 issue slots). */
26976
26977 static bool
26978 is_cracked_insn (rtx_insn *insn)
26979 {
26980 if (!insn || !NONDEBUG_INSN_P (insn)
26981 || GET_CODE (PATTERN (insn)) == USE
26982 || GET_CODE (PATTERN (insn)) == CLOBBER)
26983 return false;
26984
26985 if (rs6000_sched_groups
26986 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
26987 {
26988 enum attr_type type = get_attr_type (insn);
26989 if ((type == TYPE_LOAD
26990 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
26991 && get_attr_update (insn) == UPDATE_NO)
26992 || (type == TYPE_LOAD
26993 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
26994 && get_attr_update (insn) == UPDATE_YES
26995 && get_attr_indexed (insn) == INDEXED_NO)
26996 || (type == TYPE_STORE
26997 && get_attr_update (insn) == UPDATE_YES
26998 && get_attr_indexed (insn) == INDEXED_NO)
26999 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
27000 && get_attr_update (insn) == UPDATE_YES)
27001 || type == TYPE_DELAYED_CR
27002 || (type == TYPE_EXTS
27003 && get_attr_dot (insn) == DOT_YES)
27004 || (type == TYPE_SHIFT
27005 && get_attr_dot (insn) == DOT_YES
27006 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
27007 || (type == TYPE_MUL
27008 && get_attr_dot (insn) == DOT_YES)
27009 || type == TYPE_DIV
27010 || (type == TYPE_INSERT
27011 && get_attr_size (insn) == SIZE_32))
27012 return true;
27013 }
27014
27015 return false;
27016 }
27017
27018 /* The function returns true if INSN can be issued only from
27019 the branch slot. */
27020
27021 static bool
27022 is_branch_slot_insn (rtx_insn *insn)
27023 {
27024 if (!insn || !NONDEBUG_INSN_P (insn)
27025 || GET_CODE (PATTERN (insn)) == USE
27026 || GET_CODE (PATTERN (insn)) == CLOBBER)
27027 return false;
27028
27029 if (rs6000_sched_groups)
27030 {
27031 enum attr_type type = get_attr_type (insn);
27032 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
27033 return true;
27034 return false;
27035 }
27036
27037 return false;
27038 }
27039
27040 /* The function returns true if out_inst sets a value that is
27041 used in the address generation computation of in_insn */
27042 static bool
27043 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
27044 {
27045 rtx out_set, in_set;
27046
27047 /* For performance reasons, only handle the simple case where
27048 both loads are a single_set. */
27049 out_set = single_set (out_insn);
27050 if (out_set)
27051 {
27052 in_set = single_set (in_insn);
27053 if (in_set)
27054 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
27055 }
27056
27057 return false;
27058 }
27059
27060 /* Try to determine base/offset/size parts of the given MEM.
27061 Return true if successful, false if all the values couldn't
27062 be determined.
27063
27064 This function only looks for REG or REG+CONST address forms.
27065 REG+REG address form will return false. */
27066
27067 static bool
27068 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
27069 HOST_WIDE_INT *size)
27070 {
27071 rtx addr_rtx;
27072 if MEM_SIZE_KNOWN_P (mem)
27073 *size = MEM_SIZE (mem);
27074 else
27075 return false;
27076
27077 if (GET_CODE (XEXP (mem, 0)) == PRE_MODIFY)
27078 addr_rtx = XEXP (XEXP (mem, 0), 1);
27079 else
27080 addr_rtx = (XEXP (mem, 0));
27081
27082 if (GET_CODE (addr_rtx) == REG)
27083 {
27084 *base = addr_rtx;
27085 *offset = 0;
27086 }
27087 else if (GET_CODE (addr_rtx) == PLUS
27088 && CONST_INT_P (XEXP (addr_rtx, 1)))
27089 {
27090 *base = XEXP (addr_rtx, 0);
27091 *offset = INTVAL (XEXP (addr_rtx, 1));
27092 }
27093 else
27094 return false;
27095
27096 return true;
27097 }
27098
27099 /* The function returns true if the target storage location of
27100 mem1 is adjacent to the target storage location of mem2 */
27101 /* Return 1 if memory locations are adjacent. */
27102
27103 static bool
27104 adjacent_mem_locations (rtx mem1, rtx mem2)
27105 {
27106 rtx reg1, reg2;
27107 HOST_WIDE_INT off1, size1, off2, size2;
27108
27109 if (get_memref_parts (mem1, &reg1, &off1, &size1)
27110 && get_memref_parts (mem2, &reg2, &off2, &size2))
27111 return ((REGNO (reg1) == REGNO (reg2))
27112 && ((off1 + size1 == off2)
27113 || (off2 + size2 == off1)));
27114
27115 return false;
27116 }
27117
27118 /* This function returns true if it can be determined that the two MEM
27119 locations overlap by at least 1 byte based on base reg/offset/size. */
27120
27121 static bool
27122 mem_locations_overlap (rtx mem1, rtx mem2)
27123 {
27124 rtx reg1, reg2;
27125 HOST_WIDE_INT off1, size1, off2, size2;
27126
27127 if (get_memref_parts (mem1, &reg1, &off1, &size1)
27128 && get_memref_parts (mem2, &reg2, &off2, &size2))
27129 return ((REGNO (reg1) == REGNO (reg2))
27130 && (((off1 <= off2) && (off1 + size1 > off2))
27131 || ((off2 <= off1) && (off2 + size2 > off1))));
27132
27133 return false;
27134 }
27135
27136 /* A C statement (sans semicolon) to update the integer scheduling
27137 priority INSN_PRIORITY (INSN). Increase the priority to execute the
27138 INSN earlier, reduce the priority to execute INSN later. Do not
27139 define this macro if you do not need to adjust the scheduling
27140 priorities of insns. */
27141
27142 static int
27143 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
27144 {
27145 rtx load_mem, str_mem;
27146 /* On machines (like the 750) which have asymmetric integer units,
27147 where one integer unit can do multiply and divides and the other
27148 can't, reduce the priority of multiply/divide so it is scheduled
27149 before other integer operations. */
27150
27151 #if 0
27152 if (! INSN_P (insn))
27153 return priority;
27154
27155 if (GET_CODE (PATTERN (insn)) == USE)
27156 return priority;
27157
27158 switch (rs6000_cpu_attr) {
27159 case CPU_PPC750:
27160 switch (get_attr_type (insn))
27161 {
27162 default:
27163 break;
27164
27165 case TYPE_MUL:
27166 case TYPE_DIV:
27167 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
27168 priority, priority);
27169 if (priority >= 0 && priority < 0x01000000)
27170 priority >>= 3;
27171 break;
27172 }
27173 }
27174 #endif
27175
27176 if (insn_must_be_first_in_group (insn)
27177 && reload_completed
27178 && current_sched_info->sched_max_insns_priority
27179 && rs6000_sched_restricted_insns_priority)
27180 {
27181
27182 /* Prioritize insns that can be dispatched only in the first
27183 dispatch slot. */
27184 if (rs6000_sched_restricted_insns_priority == 1)
27185 /* Attach highest priority to insn. This means that in
27186 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
27187 precede 'priority' (critical path) considerations. */
27188 return current_sched_info->sched_max_insns_priority;
27189 else if (rs6000_sched_restricted_insns_priority == 2)
27190 /* Increase priority of insn by a minimal amount. This means that in
27191 haifa-sched.c:ready_sort(), only 'priority' (critical path)
27192 considerations precede dispatch-slot restriction considerations. */
27193 return (priority + 1);
27194 }
27195
27196 if (rs6000_cpu == PROCESSOR_POWER6
27197 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
27198 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
27199 /* Attach highest priority to insn if the scheduler has just issued two
27200 stores and this instruction is a load, or two loads and this instruction
27201 is a store. Power6 wants loads and stores scheduled alternately
27202 when possible */
27203 return current_sched_info->sched_max_insns_priority;
27204
27205 return priority;
27206 }
27207
27208 /* Return true if the instruction is nonpipelined on the Cell. */
27209 static bool
27210 is_nonpipeline_insn (rtx_insn *insn)
27211 {
27212 enum attr_type type;
27213 if (!insn || !NONDEBUG_INSN_P (insn)
27214 || GET_CODE (PATTERN (insn)) == USE
27215 || GET_CODE (PATTERN (insn)) == CLOBBER)
27216 return false;
27217
27218 type = get_attr_type (insn);
27219 if (type == TYPE_MUL
27220 || type == TYPE_DIV
27221 || type == TYPE_SDIV
27222 || type == TYPE_DDIV
27223 || type == TYPE_SSQRT
27224 || type == TYPE_DSQRT
27225 || type == TYPE_MFCR
27226 || type == TYPE_MFCRF
27227 || type == TYPE_MFJMPR)
27228 {
27229 return true;
27230 }
27231 return false;
27232 }
27233
27234
27235 /* Return how many instructions the machine can issue per cycle. */
27236
27237 static int
27238 rs6000_issue_rate (void)
27239 {
27240 /* Unless scheduling for register pressure, use issue rate of 1 for
27241 first scheduling pass to decrease degradation. */
27242 if (!reload_completed && !flag_sched_pressure)
27243 return 1;
27244
27245 switch (rs6000_cpu_attr) {
27246 case CPU_RS64A:
27247 case CPU_PPC601: /* ? */
27248 case CPU_PPC7450:
27249 return 3;
27250 case CPU_PPC440:
27251 case CPU_PPC603:
27252 case CPU_PPC750:
27253 case CPU_PPC7400:
27254 case CPU_PPC8540:
27255 case CPU_PPC8548:
27256 case CPU_CELL:
27257 case CPU_PPCE300C2:
27258 case CPU_PPCE300C3:
27259 case CPU_PPCE500MC:
27260 case CPU_PPCE500MC64:
27261 case CPU_PPCE5500:
27262 case CPU_PPCE6500:
27263 case CPU_TITAN:
27264 return 2;
27265 case CPU_PPC476:
27266 case CPU_PPC604:
27267 case CPU_PPC604E:
27268 case CPU_PPC620:
27269 case CPU_PPC630:
27270 return 4;
27271 case CPU_POWER4:
27272 case CPU_POWER5:
27273 case CPU_POWER6:
27274 case CPU_POWER7:
27275 return 5;
27276 case CPU_POWER8:
27277 return 7;
27278 default:
27279 return 1;
27280 }
27281 }
27282
27283 /* Return how many instructions to look ahead for better insn
27284 scheduling. */
27285
27286 static int
27287 rs6000_use_sched_lookahead (void)
27288 {
27289 switch (rs6000_cpu_attr)
27290 {
27291 case CPU_PPC8540:
27292 case CPU_PPC8548:
27293 return 4;
27294
27295 case CPU_CELL:
27296 return (reload_completed ? 8 : 0);
27297
27298 default:
27299 return 0;
27300 }
27301 }
27302
27303 /* We are choosing insn from the ready queue. Return zero if INSN can be
27304 chosen. */
27305 static int
27306 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
27307 {
27308 if (ready_index == 0)
27309 return 0;
27310
27311 if (rs6000_cpu_attr != CPU_CELL)
27312 return 0;
27313
27314 gcc_assert (insn != NULL_RTX && INSN_P (insn));
27315
27316 if (!reload_completed
27317 || is_nonpipeline_insn (insn)
27318 || is_microcoded_insn (insn))
27319 return 1;
27320
27321 return 0;
27322 }
27323
27324 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
27325 and return true. */
27326
27327 static bool
27328 find_mem_ref (rtx pat, rtx *mem_ref)
27329 {
27330 const char * fmt;
27331 int i, j;
27332
27333 /* stack_tie does not produce any real memory traffic. */
27334 if (tie_operand (pat, VOIDmode))
27335 return false;
27336
27337 if (GET_CODE (pat) == MEM)
27338 {
27339 *mem_ref = pat;
27340 return true;
27341 }
27342
27343 /* Recursively process the pattern. */
27344 fmt = GET_RTX_FORMAT (GET_CODE (pat));
27345
27346 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
27347 {
27348 if (fmt[i] == 'e')
27349 {
27350 if (find_mem_ref (XEXP (pat, i), mem_ref))
27351 return true;
27352 }
27353 else if (fmt[i] == 'E')
27354 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
27355 {
27356 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
27357 return true;
27358 }
27359 }
27360
27361 return false;
27362 }
27363
27364 /* Determine if PAT is a PATTERN of a load insn. */
27365
27366 static bool
27367 is_load_insn1 (rtx pat, rtx *load_mem)
27368 {
27369 if (!pat || pat == NULL_RTX)
27370 return false;
27371
27372 if (GET_CODE (pat) == SET)
27373 return find_mem_ref (SET_SRC (pat), load_mem);
27374
27375 if (GET_CODE (pat) == PARALLEL)
27376 {
27377 int i;
27378
27379 for (i = 0; i < XVECLEN (pat, 0); i++)
27380 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
27381 return true;
27382 }
27383
27384 return false;
27385 }
27386
27387 /* Determine if INSN loads from memory. */
27388
27389 static bool
27390 is_load_insn (rtx insn, rtx *load_mem)
27391 {
27392 if (!insn || !INSN_P (insn))
27393 return false;
27394
27395 if (CALL_P (insn))
27396 return false;
27397
27398 return is_load_insn1 (PATTERN (insn), load_mem);
27399 }
27400
27401 /* Determine if PAT is a PATTERN of a store insn. */
27402
27403 static bool
27404 is_store_insn1 (rtx pat, rtx *str_mem)
27405 {
27406 if (!pat || pat == NULL_RTX)
27407 return false;
27408
27409 if (GET_CODE (pat) == SET)
27410 return find_mem_ref (SET_DEST (pat), str_mem);
27411
27412 if (GET_CODE (pat) == PARALLEL)
27413 {
27414 int i;
27415
27416 for (i = 0; i < XVECLEN (pat, 0); i++)
27417 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
27418 return true;
27419 }
27420
27421 return false;
27422 }
27423
27424 /* Determine if INSN stores to memory. */
27425
27426 static bool
27427 is_store_insn (rtx insn, rtx *str_mem)
27428 {
27429 if (!insn || !INSN_P (insn))
27430 return false;
27431
27432 return is_store_insn1 (PATTERN (insn), str_mem);
27433 }
27434
27435 /* Returns whether the dependence between INSN and NEXT is considered
27436 costly by the given target. */
27437
27438 static bool
27439 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
27440 {
27441 rtx insn;
27442 rtx next;
27443 rtx load_mem, str_mem;
27444
27445 /* If the flag is not enabled - no dependence is considered costly;
27446 allow all dependent insns in the same group.
27447 This is the most aggressive option. */
27448 if (rs6000_sched_costly_dep == no_dep_costly)
27449 return false;
27450
27451 /* If the flag is set to 1 - a dependence is always considered costly;
27452 do not allow dependent instructions in the same group.
27453 This is the most conservative option. */
27454 if (rs6000_sched_costly_dep == all_deps_costly)
27455 return true;
27456
27457 insn = DEP_PRO (dep);
27458 next = DEP_CON (dep);
27459
27460 if (rs6000_sched_costly_dep == store_to_load_dep_costly
27461 && is_load_insn (next, &load_mem)
27462 && is_store_insn (insn, &str_mem))
27463 /* Prevent load after store in the same group. */
27464 return true;
27465
27466 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
27467 && is_load_insn (next, &load_mem)
27468 && is_store_insn (insn, &str_mem)
27469 && DEP_TYPE (dep) == REG_DEP_TRUE
27470 && mem_locations_overlap(str_mem, load_mem))
27471 /* Prevent load after store in the same group if it is a true
27472 dependence. */
27473 return true;
27474
27475 /* The flag is set to X; dependences with latency >= X are considered costly,
27476 and will not be scheduled in the same group. */
27477 if (rs6000_sched_costly_dep <= max_dep_latency
27478 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
27479 return true;
27480
27481 return false;
27482 }
27483
27484 /* Return the next insn after INSN that is found before TAIL is reached,
27485 skipping any "non-active" insns - insns that will not actually occupy
27486 an issue slot. Return NULL_RTX if such an insn is not found. */
27487
27488 static rtx_insn *
27489 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
27490 {
27491 if (insn == NULL_RTX || insn == tail)
27492 return NULL;
27493
27494 while (1)
27495 {
27496 insn = NEXT_INSN (insn);
27497 if (insn == NULL_RTX || insn == tail)
27498 return NULL;
27499
27500 if (CALL_P (insn)
27501 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
27502 || (NONJUMP_INSN_P (insn)
27503 && GET_CODE (PATTERN (insn)) != USE
27504 && GET_CODE (PATTERN (insn)) != CLOBBER
27505 && INSN_CODE (insn) != CODE_FOR_stack_tie))
27506 break;
27507 }
27508 return insn;
27509 }
27510
27511 /* We are about to begin issuing insns for this clock cycle. */
27512
27513 static int
27514 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
27515 rtx_insn **ready ATTRIBUTE_UNUSED,
27516 int *pn_ready ATTRIBUTE_UNUSED,
27517 int clock_var ATTRIBUTE_UNUSED)
27518 {
27519 int n_ready = *pn_ready;
27520
27521 if (sched_verbose)
27522 fprintf (dump, "// rs6000_sched_reorder :\n");
27523
27524 /* Reorder the ready list, if the second to last ready insn
27525 is a nonepipeline insn. */
27526 if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
27527 {
27528 if (is_nonpipeline_insn (ready[n_ready - 1])
27529 && (recog_memoized (ready[n_ready - 2]) > 0))
27530 /* Simply swap first two insns. */
27531 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
27532 }
27533
27534 if (rs6000_cpu == PROCESSOR_POWER6)
27535 load_store_pendulum = 0;
27536
27537 return rs6000_issue_rate ();
27538 }
27539
27540 /* Like rs6000_sched_reorder, but called after issuing each insn. */
27541
27542 static int
27543 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
27544 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
27545 {
27546 if (sched_verbose)
27547 fprintf (dump, "// rs6000_sched_reorder2 :\n");
27548
27549 /* For Power6, we need to handle some special cases to try and keep the
27550 store queue from overflowing and triggering expensive flushes.
27551
27552 This code monitors how load and store instructions are being issued
27553 and skews the ready list one way or the other to increase the likelihood
27554 that a desired instruction is issued at the proper time.
27555
27556 A couple of things are done. First, we maintain a "load_store_pendulum"
27557 to track the current state of load/store issue.
27558
27559 - If the pendulum is at zero, then no loads or stores have been
27560 issued in the current cycle so we do nothing.
27561
27562 - If the pendulum is 1, then a single load has been issued in this
27563 cycle and we attempt to locate another load in the ready list to
27564 issue with it.
27565
27566 - If the pendulum is -2, then two stores have already been
27567 issued in this cycle, so we increase the priority of the first load
27568 in the ready list to increase it's likelihood of being chosen first
27569 in the next cycle.
27570
27571 - If the pendulum is -1, then a single store has been issued in this
27572 cycle and we attempt to locate another store in the ready list to
27573 issue with it, preferring a store to an adjacent memory location to
27574 facilitate store pairing in the store queue.
27575
27576 - If the pendulum is 2, then two loads have already been
27577 issued in this cycle, so we increase the priority of the first store
27578 in the ready list to increase it's likelihood of being chosen first
27579 in the next cycle.
27580
27581 - If the pendulum < -2 or > 2, then do nothing.
27582
27583 Note: This code covers the most common scenarios. There exist non
27584 load/store instructions which make use of the LSU and which
27585 would need to be accounted for to strictly model the behavior
27586 of the machine. Those instructions are currently unaccounted
27587 for to help minimize compile time overhead of this code.
27588 */
27589 if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
27590 {
27591 int pos;
27592 int i;
27593 rtx_insn *tmp;
27594 rtx load_mem, str_mem;
27595
27596 if (is_store_insn (last_scheduled_insn, &str_mem))
27597 /* Issuing a store, swing the load_store_pendulum to the left */
27598 load_store_pendulum--;
27599 else if (is_load_insn (last_scheduled_insn, &load_mem))
27600 /* Issuing a load, swing the load_store_pendulum to the right */
27601 load_store_pendulum++;
27602 else
27603 return cached_can_issue_more;
27604
27605 /* If the pendulum is balanced, or there is only one instruction on
27606 the ready list, then all is well, so return. */
27607 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
27608 return cached_can_issue_more;
27609
27610 if (load_store_pendulum == 1)
27611 {
27612 /* A load has been issued in this cycle. Scan the ready list
27613 for another load to issue with it */
27614 pos = *pn_ready-1;
27615
27616 while (pos >= 0)
27617 {
27618 if (is_load_insn (ready[pos], &load_mem))
27619 {
27620 /* Found a load. Move it to the head of the ready list,
27621 and adjust it's priority so that it is more likely to
27622 stay there */
27623 tmp = ready[pos];
27624 for (i=pos; i<*pn_ready-1; i++)
27625 ready[i] = ready[i + 1];
27626 ready[*pn_ready-1] = tmp;
27627
27628 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
27629 INSN_PRIORITY (tmp)++;
27630 break;
27631 }
27632 pos--;
27633 }
27634 }
27635 else if (load_store_pendulum == -2)
27636 {
27637 /* Two stores have been issued in this cycle. Increase the
27638 priority of the first load in the ready list to favor it for
27639 issuing in the next cycle. */
27640 pos = *pn_ready-1;
27641
27642 while (pos >= 0)
27643 {
27644 if (is_load_insn (ready[pos], &load_mem)
27645 && !sel_sched_p ()
27646 && INSN_PRIORITY_KNOWN (ready[pos]))
27647 {
27648 INSN_PRIORITY (ready[pos])++;
27649
27650 /* Adjust the pendulum to account for the fact that a load
27651 was found and increased in priority. This is to prevent
27652 increasing the priority of multiple loads */
27653 load_store_pendulum--;
27654
27655 break;
27656 }
27657 pos--;
27658 }
27659 }
27660 else if (load_store_pendulum == -1)
27661 {
27662 /* A store has been issued in this cycle. Scan the ready list for
27663 another store to issue with it, preferring a store to an adjacent
27664 memory location */
27665 int first_store_pos = -1;
27666
27667 pos = *pn_ready-1;
27668
27669 while (pos >= 0)
27670 {
27671 if (is_store_insn (ready[pos], &str_mem))
27672 {
27673 rtx str_mem2;
27674 /* Maintain the index of the first store found on the
27675 list */
27676 if (first_store_pos == -1)
27677 first_store_pos = pos;
27678
27679 if (is_store_insn (last_scheduled_insn, &str_mem2)
27680 && adjacent_mem_locations (str_mem, str_mem2))
27681 {
27682 /* Found an adjacent store. Move it to the head of the
27683 ready list, and adjust it's priority so that it is
27684 more likely to stay there */
27685 tmp = ready[pos];
27686 for (i=pos; i<*pn_ready-1; i++)
27687 ready[i] = ready[i + 1];
27688 ready[*pn_ready-1] = tmp;
27689
27690 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
27691 INSN_PRIORITY (tmp)++;
27692
27693 first_store_pos = -1;
27694
27695 break;
27696 };
27697 }
27698 pos--;
27699 }
27700
27701 if (first_store_pos >= 0)
27702 {
27703 /* An adjacent store wasn't found, but a non-adjacent store was,
27704 so move the non-adjacent store to the front of the ready
27705 list, and adjust its priority so that it is more likely to
27706 stay there. */
27707 tmp = ready[first_store_pos];
27708 for (i=first_store_pos; i<*pn_ready-1; i++)
27709 ready[i] = ready[i + 1];
27710 ready[*pn_ready-1] = tmp;
27711 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
27712 INSN_PRIORITY (tmp)++;
27713 }
27714 }
27715 else if (load_store_pendulum == 2)
27716 {
27717 /* Two loads have been issued in this cycle. Increase the priority
27718 of the first store in the ready list to favor it for issuing in
27719 the next cycle. */
27720 pos = *pn_ready-1;
27721
27722 while (pos >= 0)
27723 {
27724 if (is_store_insn (ready[pos], &str_mem)
27725 && !sel_sched_p ()
27726 && INSN_PRIORITY_KNOWN (ready[pos]))
27727 {
27728 INSN_PRIORITY (ready[pos])++;
27729
27730 /* Adjust the pendulum to account for the fact that a store
27731 was found and increased in priority. This is to prevent
27732 increasing the priority of multiple stores */
27733 load_store_pendulum++;
27734
27735 break;
27736 }
27737 pos--;
27738 }
27739 }
27740 }
27741
27742 return cached_can_issue_more;
27743 }
27744
27745 /* Return whether the presence of INSN causes a dispatch group termination
27746 of group WHICH_GROUP.
27747
27748 If WHICH_GROUP == current_group, this function will return true if INSN
27749 causes the termination of the current group (i.e, the dispatch group to
27750 which INSN belongs). This means that INSN will be the last insn in the
27751 group it belongs to.
27752
27753 If WHICH_GROUP == previous_group, this function will return true if INSN
27754 causes the termination of the previous group (i.e, the dispatch group that
27755 precedes the group to which INSN belongs). This means that INSN will be
27756 the first insn in the group it belongs to). */
27757
27758 static bool
27759 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
27760 {
27761 bool first, last;
27762
27763 if (! insn)
27764 return false;
27765
27766 first = insn_must_be_first_in_group (insn);
27767 last = insn_must_be_last_in_group (insn);
27768
27769 if (first && last)
27770 return true;
27771
27772 if (which_group == current_group)
27773 return last;
27774 else if (which_group == previous_group)
27775 return first;
27776
27777 return false;
27778 }
27779
27780
27781 static bool
27782 insn_must_be_first_in_group (rtx_insn *insn)
27783 {
27784 enum attr_type type;
27785
27786 if (!insn
27787 || NOTE_P (insn)
27788 || DEBUG_INSN_P (insn)
27789 || GET_CODE (PATTERN (insn)) == USE
27790 || GET_CODE (PATTERN (insn)) == CLOBBER)
27791 return false;
27792
27793 switch (rs6000_cpu)
27794 {
27795 case PROCESSOR_POWER5:
27796 if (is_cracked_insn (insn))
27797 return true;
27798 case PROCESSOR_POWER4:
27799 if (is_microcoded_insn (insn))
27800 return true;
27801
27802 if (!rs6000_sched_groups)
27803 return false;
27804
27805 type = get_attr_type (insn);
27806
27807 switch (type)
27808 {
27809 case TYPE_MFCR:
27810 case TYPE_MFCRF:
27811 case TYPE_MTCR:
27812 case TYPE_DELAYED_CR:
27813 case TYPE_CR_LOGICAL:
27814 case TYPE_MTJMPR:
27815 case TYPE_MFJMPR:
27816 case TYPE_DIV:
27817 case TYPE_LOAD_L:
27818 case TYPE_STORE_C:
27819 case TYPE_ISYNC:
27820 case TYPE_SYNC:
27821 return true;
27822 default:
27823 break;
27824 }
27825 break;
27826 case PROCESSOR_POWER6:
27827 type = get_attr_type (insn);
27828
27829 switch (type)
27830 {
27831 case TYPE_EXTS:
27832 case TYPE_CNTLZ:
27833 case TYPE_TRAP:
27834 case TYPE_MUL:
27835 case TYPE_INSERT:
27836 case TYPE_FPCOMPARE:
27837 case TYPE_MFCR:
27838 case TYPE_MTCR:
27839 case TYPE_MFJMPR:
27840 case TYPE_MTJMPR:
27841 case TYPE_ISYNC:
27842 case TYPE_SYNC:
27843 case TYPE_LOAD_L:
27844 case TYPE_STORE_C:
27845 return true;
27846 case TYPE_SHIFT:
27847 if (get_attr_dot (insn) == DOT_NO
27848 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
27849 return true;
27850 else
27851 break;
27852 case TYPE_DIV:
27853 if (get_attr_size (insn) == SIZE_32)
27854 return true;
27855 else
27856 break;
27857 case TYPE_LOAD:
27858 case TYPE_STORE:
27859 case TYPE_FPLOAD:
27860 case TYPE_FPSTORE:
27861 if (get_attr_update (insn) == UPDATE_YES)
27862 return true;
27863 else
27864 break;
27865 default:
27866 break;
27867 }
27868 break;
27869 case PROCESSOR_POWER7:
27870 type = get_attr_type (insn);
27871
27872 switch (type)
27873 {
27874 case TYPE_CR_LOGICAL:
27875 case TYPE_MFCR:
27876 case TYPE_MFCRF:
27877 case TYPE_MTCR:
27878 case TYPE_DIV:
27879 case TYPE_ISYNC:
27880 case TYPE_LOAD_L:
27881 case TYPE_STORE_C:
27882 case TYPE_MFJMPR:
27883 case TYPE_MTJMPR:
27884 return true;
27885 case TYPE_MUL:
27886 case TYPE_SHIFT:
27887 case TYPE_EXTS:
27888 if (get_attr_dot (insn) == DOT_YES)
27889 return true;
27890 else
27891 break;
27892 case TYPE_LOAD:
27893 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
27894 || get_attr_update (insn) == UPDATE_YES)
27895 return true;
27896 else
27897 break;
27898 case TYPE_STORE:
27899 case TYPE_FPLOAD:
27900 case TYPE_FPSTORE:
27901 if (get_attr_update (insn) == UPDATE_YES)
27902 return true;
27903 else
27904 break;
27905 default:
27906 break;
27907 }
27908 break;
27909 case PROCESSOR_POWER8:
27910 type = get_attr_type (insn);
27911
27912 switch (type)
27913 {
27914 case TYPE_CR_LOGICAL:
27915 case TYPE_DELAYED_CR:
27916 case TYPE_MFCR:
27917 case TYPE_MFCRF:
27918 case TYPE_MTCR:
27919 case TYPE_SYNC:
27920 case TYPE_ISYNC:
27921 case TYPE_LOAD_L:
27922 case TYPE_STORE_C:
27923 case TYPE_VECSTORE:
27924 case TYPE_MFJMPR:
27925 case TYPE_MTJMPR:
27926 return true;
27927 case TYPE_SHIFT:
27928 case TYPE_EXTS:
27929 case TYPE_MUL:
27930 if (get_attr_dot (insn) == DOT_YES)
27931 return true;
27932 else
27933 break;
27934 case TYPE_LOAD:
27935 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
27936 || get_attr_update (insn) == UPDATE_YES)
27937 return true;
27938 else
27939 break;
27940 case TYPE_STORE:
27941 if (get_attr_update (insn) == UPDATE_YES
27942 && get_attr_indexed (insn) == INDEXED_YES)
27943 return true;
27944 else
27945 break;
27946 default:
27947 break;
27948 }
27949 break;
27950 default:
27951 break;
27952 }
27953
27954 return false;
27955 }
27956
27957 static bool
27958 insn_must_be_last_in_group (rtx_insn *insn)
27959 {
27960 enum attr_type type;
27961
27962 if (!insn
27963 || NOTE_P (insn)
27964 || DEBUG_INSN_P (insn)
27965 || GET_CODE (PATTERN (insn)) == USE
27966 || GET_CODE (PATTERN (insn)) == CLOBBER)
27967 return false;
27968
27969 switch (rs6000_cpu) {
27970 case PROCESSOR_POWER4:
27971 case PROCESSOR_POWER5:
27972 if (is_microcoded_insn (insn))
27973 return true;
27974
27975 if (is_branch_slot_insn (insn))
27976 return true;
27977
27978 break;
27979 case PROCESSOR_POWER6:
27980 type = get_attr_type (insn);
27981
27982 switch (type)
27983 {
27984 case TYPE_EXTS:
27985 case TYPE_CNTLZ:
27986 case TYPE_TRAP:
27987 case TYPE_MUL:
27988 case TYPE_FPCOMPARE:
27989 case TYPE_MFCR:
27990 case TYPE_MTCR:
27991 case TYPE_MFJMPR:
27992 case TYPE_MTJMPR:
27993 case TYPE_ISYNC:
27994 case TYPE_SYNC:
27995 case TYPE_LOAD_L:
27996 case TYPE_STORE_C:
27997 return true;
27998 case TYPE_SHIFT:
27999 if (get_attr_dot (insn) == DOT_NO
28000 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
28001 return true;
28002 else
28003 break;
28004 case TYPE_DIV:
28005 if (get_attr_size (insn) == SIZE_32)
28006 return true;
28007 else
28008 break;
28009 default:
28010 break;
28011 }
28012 break;
28013 case PROCESSOR_POWER7:
28014 type = get_attr_type (insn);
28015
28016 switch (type)
28017 {
28018 case TYPE_ISYNC:
28019 case TYPE_SYNC:
28020 case TYPE_LOAD_L:
28021 case TYPE_STORE_C:
28022 return true;
28023 case TYPE_LOAD:
28024 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
28025 && get_attr_update (insn) == UPDATE_YES)
28026 return true;
28027 else
28028 break;
28029 case TYPE_STORE:
28030 if (get_attr_update (insn) == UPDATE_YES
28031 && get_attr_indexed (insn) == INDEXED_YES)
28032 return true;
28033 else
28034 break;
28035 default:
28036 break;
28037 }
28038 break;
28039 case PROCESSOR_POWER8:
28040 type = get_attr_type (insn);
28041
28042 switch (type)
28043 {
28044 case TYPE_MFCR:
28045 case TYPE_MTCR:
28046 case TYPE_ISYNC:
28047 case TYPE_SYNC:
28048 case TYPE_LOAD_L:
28049 case TYPE_STORE_C:
28050 return true;
28051 case TYPE_LOAD:
28052 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
28053 && get_attr_update (insn) == UPDATE_YES)
28054 return true;
28055 else
28056 break;
28057 case TYPE_STORE:
28058 if (get_attr_update (insn) == UPDATE_YES
28059 && get_attr_indexed (insn) == INDEXED_YES)
28060 return true;
28061 else
28062 break;
28063 default:
28064 break;
28065 }
28066 break;
28067 default:
28068 break;
28069 }
28070
28071 return false;
28072 }
28073
28074 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
28075 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
28076
28077 static bool
28078 is_costly_group (rtx *group_insns, rtx next_insn)
28079 {
28080 int i;
28081 int issue_rate = rs6000_issue_rate ();
28082
28083 for (i = 0; i < issue_rate; i++)
28084 {
28085 sd_iterator_def sd_it;
28086 dep_t dep;
28087 rtx insn = group_insns[i];
28088
28089 if (!insn)
28090 continue;
28091
28092 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
28093 {
28094 rtx next = DEP_CON (dep);
28095
28096 if (next == next_insn
28097 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
28098 return true;
28099 }
28100 }
28101
28102 return false;
28103 }
28104
28105 /* Utility of the function redefine_groups.
28106 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
28107 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
28108 to keep it "far" (in a separate group) from GROUP_INSNS, following
28109 one of the following schemes, depending on the value of the flag
28110 -minsert_sched_nops = X:
28111 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
28112 in order to force NEXT_INSN into a separate group.
28113 (2) X < sched_finish_regroup_exact: insert exactly X nops.
28114 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
28115 insertion (has a group just ended, how many vacant issue slots remain in the
28116 last group, and how many dispatch groups were encountered so far). */
28117
28118 static int
28119 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
28120 rtx_insn *next_insn, bool *group_end, int can_issue_more,
28121 int *group_count)
28122 {
28123 rtx nop;
28124 bool force;
28125 int issue_rate = rs6000_issue_rate ();
28126 bool end = *group_end;
28127 int i;
28128
28129 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
28130 return can_issue_more;
28131
28132 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
28133 return can_issue_more;
28134
28135 force = is_costly_group (group_insns, next_insn);
28136 if (!force)
28137 return can_issue_more;
28138
28139 if (sched_verbose > 6)
28140 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
28141 *group_count ,can_issue_more);
28142
28143 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
28144 {
28145 if (*group_end)
28146 can_issue_more = 0;
28147
28148 /* Since only a branch can be issued in the last issue_slot, it is
28149 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
28150 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
28151 in this case the last nop will start a new group and the branch
28152 will be forced to the new group. */
28153 if (can_issue_more && !is_branch_slot_insn (next_insn))
28154 can_issue_more--;
28155
28156 /* Do we have a special group ending nop? */
28157 if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
28158 || rs6000_cpu_attr == CPU_POWER8)
28159 {
28160 nop = gen_group_ending_nop ();
28161 emit_insn_before (nop, next_insn);
28162 can_issue_more = 0;
28163 }
28164 else
28165 while (can_issue_more > 0)
28166 {
28167 nop = gen_nop ();
28168 emit_insn_before (nop, next_insn);
28169 can_issue_more--;
28170 }
28171
28172 *group_end = true;
28173 return 0;
28174 }
28175
28176 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
28177 {
28178 int n_nops = rs6000_sched_insert_nops;
28179
28180 /* Nops can't be issued from the branch slot, so the effective
28181 issue_rate for nops is 'issue_rate - 1'. */
28182 if (can_issue_more == 0)
28183 can_issue_more = issue_rate;
28184 can_issue_more--;
28185 if (can_issue_more == 0)
28186 {
28187 can_issue_more = issue_rate - 1;
28188 (*group_count)++;
28189 end = true;
28190 for (i = 0; i < issue_rate; i++)
28191 {
28192 group_insns[i] = 0;
28193 }
28194 }
28195
28196 while (n_nops > 0)
28197 {
28198 nop = gen_nop ();
28199 emit_insn_before (nop, next_insn);
28200 if (can_issue_more == issue_rate - 1) /* new group begins */
28201 end = false;
28202 can_issue_more--;
28203 if (can_issue_more == 0)
28204 {
28205 can_issue_more = issue_rate - 1;
28206 (*group_count)++;
28207 end = true;
28208 for (i = 0; i < issue_rate; i++)
28209 {
28210 group_insns[i] = 0;
28211 }
28212 }
28213 n_nops--;
28214 }
28215
28216 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
28217 can_issue_more++;
28218
28219 /* Is next_insn going to start a new group? */
28220 *group_end
28221 = (end
28222 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
28223 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
28224 || (can_issue_more < issue_rate &&
28225 insn_terminates_group_p (next_insn, previous_group)));
28226 if (*group_end && end)
28227 (*group_count)--;
28228
28229 if (sched_verbose > 6)
28230 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
28231 *group_count, can_issue_more);
28232 return can_issue_more;
28233 }
28234
28235 return can_issue_more;
28236 }
28237
28238 /* This function tries to synch the dispatch groups that the compiler "sees"
28239 with the dispatch groups that the processor dispatcher is expected to
28240 form in practice. It tries to achieve this synchronization by forcing the
28241 estimated processor grouping on the compiler (as opposed to the function
28242 'pad_goups' which tries to force the scheduler's grouping on the processor).
28243
28244 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
28245 examines the (estimated) dispatch groups that will be formed by the processor
28246 dispatcher. It marks these group boundaries to reflect the estimated
28247 processor grouping, overriding the grouping that the scheduler had marked.
28248 Depending on the value of the flag '-minsert-sched-nops' this function can
28249 force certain insns into separate groups or force a certain distance between
28250 them by inserting nops, for example, if there exists a "costly dependence"
28251 between the insns.
28252
28253 The function estimates the group boundaries that the processor will form as
28254 follows: It keeps track of how many vacant issue slots are available after
28255 each insn. A subsequent insn will start a new group if one of the following
28256 4 cases applies:
28257 - no more vacant issue slots remain in the current dispatch group.
28258 - only the last issue slot, which is the branch slot, is vacant, but the next
28259 insn is not a branch.
28260 - only the last 2 or less issue slots, including the branch slot, are vacant,
28261 which means that a cracked insn (which occupies two issue slots) can't be
28262 issued in this group.
28263 - less than 'issue_rate' slots are vacant, and the next insn always needs to
28264 start a new group. */
28265
28266 static int
28267 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
28268 rtx_insn *tail)
28269 {
28270 rtx_insn *insn, *next_insn;
28271 int issue_rate;
28272 int can_issue_more;
28273 int slot, i;
28274 bool group_end;
28275 int group_count = 0;
28276 rtx *group_insns;
28277
28278 /* Initialize. */
28279 issue_rate = rs6000_issue_rate ();
28280 group_insns = XALLOCAVEC (rtx, issue_rate);
28281 for (i = 0; i < issue_rate; i++)
28282 {
28283 group_insns[i] = 0;
28284 }
28285 can_issue_more = issue_rate;
28286 slot = 0;
28287 insn = get_next_active_insn (prev_head_insn, tail);
28288 group_end = false;
28289
28290 while (insn != NULL_RTX)
28291 {
28292 slot = (issue_rate - can_issue_more);
28293 group_insns[slot] = insn;
28294 can_issue_more =
28295 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
28296 if (insn_terminates_group_p (insn, current_group))
28297 can_issue_more = 0;
28298
28299 next_insn = get_next_active_insn (insn, tail);
28300 if (next_insn == NULL_RTX)
28301 return group_count + 1;
28302
28303 /* Is next_insn going to start a new group? */
28304 group_end
28305 = (can_issue_more == 0
28306 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
28307 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
28308 || (can_issue_more < issue_rate &&
28309 insn_terminates_group_p (next_insn, previous_group)));
28310
28311 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
28312 next_insn, &group_end, can_issue_more,
28313 &group_count);
28314
28315 if (group_end)
28316 {
28317 group_count++;
28318 can_issue_more = 0;
28319 for (i = 0; i < issue_rate; i++)
28320 {
28321 group_insns[i] = 0;
28322 }
28323 }
28324
28325 if (GET_MODE (next_insn) == TImode && can_issue_more)
28326 PUT_MODE (next_insn, VOIDmode);
28327 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
28328 PUT_MODE (next_insn, TImode);
28329
28330 insn = next_insn;
28331 if (can_issue_more == 0)
28332 can_issue_more = issue_rate;
28333 } /* while */
28334
28335 return group_count;
28336 }
28337
28338 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
28339 dispatch group boundaries that the scheduler had marked. Pad with nops
28340 any dispatch groups which have vacant issue slots, in order to force the
28341 scheduler's grouping on the processor dispatcher. The function
28342 returns the number of dispatch groups found. */
28343
28344 static int
28345 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
28346 rtx_insn *tail)
28347 {
28348 rtx_insn *insn, *next_insn;
28349 rtx nop;
28350 int issue_rate;
28351 int can_issue_more;
28352 int group_end;
28353 int group_count = 0;
28354
28355 /* Initialize issue_rate. */
28356 issue_rate = rs6000_issue_rate ();
28357 can_issue_more = issue_rate;
28358
28359 insn = get_next_active_insn (prev_head_insn, tail);
28360 next_insn = get_next_active_insn (insn, tail);
28361
28362 while (insn != NULL_RTX)
28363 {
28364 can_issue_more =
28365 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
28366
28367 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
28368
28369 if (next_insn == NULL_RTX)
28370 break;
28371
28372 if (group_end)
28373 {
28374 /* If the scheduler had marked group termination at this location
28375 (between insn and next_insn), and neither insn nor next_insn will
28376 force group termination, pad the group with nops to force group
28377 termination. */
28378 if (can_issue_more
28379 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
28380 && !insn_terminates_group_p (insn, current_group)
28381 && !insn_terminates_group_p (next_insn, previous_group))
28382 {
28383 if (!is_branch_slot_insn (next_insn))
28384 can_issue_more--;
28385
28386 while (can_issue_more)
28387 {
28388 nop = gen_nop ();
28389 emit_insn_before (nop, next_insn);
28390 can_issue_more--;
28391 }
28392 }
28393
28394 can_issue_more = issue_rate;
28395 group_count++;
28396 }
28397
28398 insn = next_insn;
28399 next_insn = get_next_active_insn (insn, tail);
28400 }
28401
28402 return group_count;
28403 }
28404
28405 /* We're beginning a new block. Initialize data structures as necessary. */
28406
28407 static void
28408 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
28409 int sched_verbose ATTRIBUTE_UNUSED,
28410 int max_ready ATTRIBUTE_UNUSED)
28411 {
28412 last_scheduled_insn = NULL_RTX;
28413 load_store_pendulum = 0;
28414 }
28415
28416 /* The following function is called at the end of scheduling BB.
28417 After reload, it inserts nops at insn group bundling. */
28418
28419 static void
28420 rs6000_sched_finish (FILE *dump, int sched_verbose)
28421 {
28422 int n_groups;
28423
28424 if (sched_verbose)
28425 fprintf (dump, "=== Finishing schedule.\n");
28426
28427 if (reload_completed && rs6000_sched_groups)
28428 {
28429 /* Do not run sched_finish hook when selective scheduling enabled. */
28430 if (sel_sched_p ())
28431 return;
28432
28433 if (rs6000_sched_insert_nops == sched_finish_none)
28434 return;
28435
28436 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
28437 n_groups = pad_groups (dump, sched_verbose,
28438 current_sched_info->prev_head,
28439 current_sched_info->next_tail);
28440 else
28441 n_groups = redefine_groups (dump, sched_verbose,
28442 current_sched_info->prev_head,
28443 current_sched_info->next_tail);
28444
28445 if (sched_verbose >= 6)
28446 {
28447 fprintf (dump, "ngroups = %d\n", n_groups);
28448 print_rtl (dump, current_sched_info->prev_head);
28449 fprintf (dump, "Done finish_sched\n");
28450 }
28451 }
28452 }
28453
28454 struct _rs6000_sched_context
28455 {
28456 short cached_can_issue_more;
28457 rtx last_scheduled_insn;
28458 int load_store_pendulum;
28459 };
28460
28461 typedef struct _rs6000_sched_context rs6000_sched_context_def;
28462 typedef rs6000_sched_context_def *rs6000_sched_context_t;
28463
28464 /* Allocate store for new scheduling context. */
28465 static void *
28466 rs6000_alloc_sched_context (void)
28467 {
28468 return xmalloc (sizeof (rs6000_sched_context_def));
28469 }
28470
28471 /* If CLEAN_P is true then initializes _SC with clean data,
28472 and from the global context otherwise. */
28473 static void
28474 rs6000_init_sched_context (void *_sc, bool clean_p)
28475 {
28476 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
28477
28478 if (clean_p)
28479 {
28480 sc->cached_can_issue_more = 0;
28481 sc->last_scheduled_insn = NULL_RTX;
28482 sc->load_store_pendulum = 0;
28483 }
28484 else
28485 {
28486 sc->cached_can_issue_more = cached_can_issue_more;
28487 sc->last_scheduled_insn = last_scheduled_insn;
28488 sc->load_store_pendulum = load_store_pendulum;
28489 }
28490 }
28491
28492 /* Sets the global scheduling context to the one pointed to by _SC. */
28493 static void
28494 rs6000_set_sched_context (void *_sc)
28495 {
28496 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
28497
28498 gcc_assert (sc != NULL);
28499
28500 cached_can_issue_more = sc->cached_can_issue_more;
28501 last_scheduled_insn = sc->last_scheduled_insn;
28502 load_store_pendulum = sc->load_store_pendulum;
28503 }
28504
28505 /* Free _SC. */
28506 static void
28507 rs6000_free_sched_context (void *_sc)
28508 {
28509 gcc_assert (_sc != NULL);
28510
28511 free (_sc);
28512 }
28513
28514 \f
28515 /* Length in units of the trampoline for entering a nested function. */
28516
28517 int
28518 rs6000_trampoline_size (void)
28519 {
28520 int ret = 0;
28521
28522 switch (DEFAULT_ABI)
28523 {
28524 default:
28525 gcc_unreachable ();
28526
28527 case ABI_AIX:
28528 ret = (TARGET_32BIT) ? 12 : 24;
28529 break;
28530
28531 case ABI_ELFv2:
28532 gcc_assert (!TARGET_32BIT);
28533 ret = 32;
28534 break;
28535
28536 case ABI_DARWIN:
28537 case ABI_V4:
28538 ret = (TARGET_32BIT) ? 40 : 48;
28539 break;
28540 }
28541
28542 return ret;
28543 }
28544
28545 /* Emit RTL insns to initialize the variable parts of a trampoline.
28546 FNADDR is an RTX for the address of the function's pure code.
28547 CXT is an RTX for the static chain value for the function. */
28548
28549 static void
28550 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
28551 {
28552 int regsize = (TARGET_32BIT) ? 4 : 8;
28553 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
28554 rtx ctx_reg = force_reg (Pmode, cxt);
28555 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
28556
28557 switch (DEFAULT_ABI)
28558 {
28559 default:
28560 gcc_unreachable ();
28561
28562 /* Under AIX, just build the 3 word function descriptor */
28563 case ABI_AIX:
28564 {
28565 rtx fnmem, fn_reg, toc_reg;
28566
28567 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
28568 error ("You cannot take the address of a nested function if you use "
28569 "the -mno-pointers-to-nested-functions option.");
28570
28571 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
28572 fn_reg = gen_reg_rtx (Pmode);
28573 toc_reg = gen_reg_rtx (Pmode);
28574
28575 /* Macro to shorten the code expansions below. */
28576 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
28577
28578 m_tramp = replace_equiv_address (m_tramp, addr);
28579
28580 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
28581 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
28582 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
28583 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
28584 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
28585
28586 # undef MEM_PLUS
28587 }
28588 break;
28589
28590 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
28591 case ABI_ELFv2:
28592 case ABI_DARWIN:
28593 case ABI_V4:
28594 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
28595 LCT_NORMAL, VOIDmode, 4,
28596 addr, Pmode,
28597 GEN_INT (rs6000_trampoline_size ()), SImode,
28598 fnaddr, Pmode,
28599 ctx_reg, Pmode);
28600 break;
28601 }
28602 }
28603
28604 \f
28605 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
28606 identifier as an argument, so the front end shouldn't look it up. */
28607
28608 static bool
28609 rs6000_attribute_takes_identifier_p (const_tree attr_id)
28610 {
28611 return is_attribute_p ("altivec", attr_id);
28612 }
28613
28614 /* Handle the "altivec" attribute. The attribute may have
28615 arguments as follows:
28616
28617 __attribute__((altivec(vector__)))
28618 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
28619 __attribute__((altivec(bool__))) (always followed by 'unsigned')
28620
28621 and may appear more than once (e.g., 'vector bool char') in a
28622 given declaration. */
28623
28624 static tree
28625 rs6000_handle_altivec_attribute (tree *node,
28626 tree name ATTRIBUTE_UNUSED,
28627 tree args,
28628 int flags ATTRIBUTE_UNUSED,
28629 bool *no_add_attrs)
28630 {
28631 tree type = *node, result = NULL_TREE;
28632 machine_mode mode;
28633 int unsigned_p;
28634 char altivec_type
28635 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
28636 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
28637 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
28638 : '?');
28639
28640 while (POINTER_TYPE_P (type)
28641 || TREE_CODE (type) == FUNCTION_TYPE
28642 || TREE_CODE (type) == METHOD_TYPE
28643 || TREE_CODE (type) == ARRAY_TYPE)
28644 type = TREE_TYPE (type);
28645
28646 mode = TYPE_MODE (type);
28647
28648 /* Check for invalid AltiVec type qualifiers. */
28649 if (type == long_double_type_node)
28650 error ("use of %<long double%> in AltiVec types is invalid");
28651 else if (type == boolean_type_node)
28652 error ("use of boolean types in AltiVec types is invalid");
28653 else if (TREE_CODE (type) == COMPLEX_TYPE)
28654 error ("use of %<complex%> in AltiVec types is invalid");
28655 else if (DECIMAL_FLOAT_MODE_P (mode))
28656 error ("use of decimal floating point types in AltiVec types is invalid");
28657 else if (!TARGET_VSX)
28658 {
28659 if (type == long_unsigned_type_node || type == long_integer_type_node)
28660 {
28661 if (TARGET_64BIT)
28662 error ("use of %<long%> in AltiVec types is invalid for "
28663 "64-bit code without -mvsx");
28664 else if (rs6000_warn_altivec_long)
28665 warning (0, "use of %<long%> in AltiVec types is deprecated; "
28666 "use %<int%>");
28667 }
28668 else if (type == long_long_unsigned_type_node
28669 || type == long_long_integer_type_node)
28670 error ("use of %<long long%> in AltiVec types is invalid without "
28671 "-mvsx");
28672 else if (type == double_type_node)
28673 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
28674 }
28675
28676 switch (altivec_type)
28677 {
28678 case 'v':
28679 unsigned_p = TYPE_UNSIGNED (type);
28680 switch (mode)
28681 {
28682 case TImode:
28683 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
28684 break;
28685 case DImode:
28686 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
28687 break;
28688 case SImode:
28689 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
28690 break;
28691 case HImode:
28692 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
28693 break;
28694 case QImode:
28695 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
28696 break;
28697 case SFmode: result = V4SF_type_node; break;
28698 case DFmode: result = V2DF_type_node; break;
28699 /* If the user says 'vector int bool', we may be handed the 'bool'
28700 attribute _before_ the 'vector' attribute, and so select the
28701 proper type in the 'b' case below. */
28702 case V4SImode: case V8HImode: case V16QImode: case V4SFmode:
28703 case V2DImode: case V2DFmode:
28704 result = type;
28705 default: break;
28706 }
28707 break;
28708 case 'b':
28709 switch (mode)
28710 {
28711 case DImode: case V2DImode: result = bool_V2DI_type_node; break;
28712 case SImode: case V4SImode: result = bool_V4SI_type_node; break;
28713 case HImode: case V8HImode: result = bool_V8HI_type_node; break;
28714 case QImode: case V16QImode: result = bool_V16QI_type_node;
28715 default: break;
28716 }
28717 break;
28718 case 'p':
28719 switch (mode)
28720 {
28721 case V8HImode: result = pixel_V8HI_type_node;
28722 default: break;
28723 }
28724 default: break;
28725 }
28726
28727 /* Propagate qualifiers attached to the element type
28728 onto the vector type. */
28729 if (result && result != type && TYPE_QUALS (type))
28730 result = build_qualified_type (result, TYPE_QUALS (type));
28731
28732 *no_add_attrs = true; /* No need to hang on to the attribute. */
28733
28734 if (result)
28735 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
28736
28737 return NULL_TREE;
28738 }
28739
28740 /* AltiVec defines four built-in scalar types that serve as vector
28741 elements; we must teach the compiler how to mangle them. */
28742
28743 static const char *
28744 rs6000_mangle_type (const_tree type)
28745 {
28746 type = TYPE_MAIN_VARIANT (type);
28747
28748 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
28749 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
28750 return NULL;
28751
28752 if (type == bool_char_type_node) return "U6__boolc";
28753 if (type == bool_short_type_node) return "U6__bools";
28754 if (type == pixel_type_node) return "u7__pixel";
28755 if (type == bool_int_type_node) return "U6__booli";
28756 if (type == bool_long_type_node) return "U6__booll";
28757
28758 /* Mangle IBM extended float long double as `g' (__float128) on
28759 powerpc*-linux where long-double-64 previously was the default. */
28760 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
28761 && TARGET_ELF
28762 && TARGET_LONG_DOUBLE_128
28763 && !TARGET_IEEEQUAD)
28764 return "g";
28765
28766 /* For all other types, use normal C++ mangling. */
28767 return NULL;
28768 }
28769
28770 /* Handle a "longcall" or "shortcall" attribute; arguments as in
28771 struct attribute_spec.handler. */
28772
28773 static tree
28774 rs6000_handle_longcall_attribute (tree *node, tree name,
28775 tree args ATTRIBUTE_UNUSED,
28776 int flags ATTRIBUTE_UNUSED,
28777 bool *no_add_attrs)
28778 {
28779 if (TREE_CODE (*node) != FUNCTION_TYPE
28780 && TREE_CODE (*node) != FIELD_DECL
28781 && TREE_CODE (*node) != TYPE_DECL)
28782 {
28783 warning (OPT_Wattributes, "%qE attribute only applies to functions",
28784 name);
28785 *no_add_attrs = true;
28786 }
28787
28788 return NULL_TREE;
28789 }
28790
28791 /* Set longcall attributes on all functions declared when
28792 rs6000_default_long_calls is true. */
28793 static void
28794 rs6000_set_default_type_attributes (tree type)
28795 {
28796 if (rs6000_default_long_calls
28797 && (TREE_CODE (type) == FUNCTION_TYPE
28798 || TREE_CODE (type) == METHOD_TYPE))
28799 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
28800 NULL_TREE,
28801 TYPE_ATTRIBUTES (type));
28802
28803 #if TARGET_MACHO
28804 darwin_set_default_type_attributes (type);
28805 #endif
28806 }
28807
28808 /* Return a reference suitable for calling a function with the
28809 longcall attribute. */
28810
28811 rtx
28812 rs6000_longcall_ref (rtx call_ref)
28813 {
28814 const char *call_name;
28815 tree node;
28816
28817 if (GET_CODE (call_ref) != SYMBOL_REF)
28818 return call_ref;
28819
28820 /* System V adds '.' to the internal name, so skip them. */
28821 call_name = XSTR (call_ref, 0);
28822 if (*call_name == '.')
28823 {
28824 while (*call_name == '.')
28825 call_name++;
28826
28827 node = get_identifier (call_name);
28828 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
28829 }
28830
28831 return force_reg (Pmode, call_ref);
28832 }
28833 \f
28834 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
28835 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
28836 #endif
28837
28838 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
28839 struct attribute_spec.handler. */
28840 static tree
28841 rs6000_handle_struct_attribute (tree *node, tree name,
28842 tree args ATTRIBUTE_UNUSED,
28843 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
28844 {
28845 tree *type = NULL;
28846 if (DECL_P (*node))
28847 {
28848 if (TREE_CODE (*node) == TYPE_DECL)
28849 type = &TREE_TYPE (*node);
28850 }
28851 else
28852 type = node;
28853
28854 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
28855 || TREE_CODE (*type) == UNION_TYPE)))
28856 {
28857 warning (OPT_Wattributes, "%qE attribute ignored", name);
28858 *no_add_attrs = true;
28859 }
28860
28861 else if ((is_attribute_p ("ms_struct", name)
28862 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
28863 || ((is_attribute_p ("gcc_struct", name)
28864 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
28865 {
28866 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
28867 name);
28868 *no_add_attrs = true;
28869 }
28870
28871 return NULL_TREE;
28872 }
28873
28874 static bool
28875 rs6000_ms_bitfield_layout_p (const_tree record_type)
28876 {
28877 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
28878 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
28879 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
28880 }
28881 \f
28882 #ifdef USING_ELFOS_H
28883
28884 /* A get_unnamed_section callback, used for switching to toc_section. */
28885
28886 static void
28887 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
28888 {
28889 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28890 && TARGET_MINIMAL_TOC
28891 && !TARGET_RELOCATABLE)
28892 {
28893 if (!toc_initialized)
28894 {
28895 toc_initialized = 1;
28896 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
28897 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
28898 fprintf (asm_out_file, "\t.tc ");
28899 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
28900 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
28901 fprintf (asm_out_file, "\n");
28902
28903 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
28904 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
28905 fprintf (asm_out_file, " = .+32768\n");
28906 }
28907 else
28908 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
28909 }
28910 else if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28911 && !TARGET_RELOCATABLE)
28912 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
28913 else
28914 {
28915 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
28916 if (!toc_initialized)
28917 {
28918 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
28919 fprintf (asm_out_file, " = .+32768\n");
28920 toc_initialized = 1;
28921 }
28922 }
28923 }
28924
28925 /* Implement TARGET_ASM_INIT_SECTIONS. */
28926
28927 static void
28928 rs6000_elf_asm_init_sections (void)
28929 {
28930 toc_section
28931 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
28932
28933 sdata2_section
28934 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
28935 SDATA2_SECTION_ASM_OP);
28936 }
28937
28938 /* Implement TARGET_SELECT_RTX_SECTION. */
28939
28940 static section *
28941 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
28942 unsigned HOST_WIDE_INT align)
28943 {
28944 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
28945 return toc_section;
28946 else
28947 return default_elf_select_rtx_section (mode, x, align);
28948 }
28949 \f
28950 /* For a SYMBOL_REF, set generic flags and then perform some
28951 target-specific processing.
28952
28953 When the AIX ABI is requested on a non-AIX system, replace the
28954 function name with the real name (with a leading .) rather than the
28955 function descriptor name. This saves a lot of overriding code to
28956 read the prefixes. */
28957
28958 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
28959 static void
28960 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
28961 {
28962 default_encode_section_info (decl, rtl, first);
28963
28964 if (first
28965 && TREE_CODE (decl) == FUNCTION_DECL
28966 && !TARGET_AIX
28967 && DEFAULT_ABI == ABI_AIX)
28968 {
28969 rtx sym_ref = XEXP (rtl, 0);
28970 size_t len = strlen (XSTR (sym_ref, 0));
28971 char *str = XALLOCAVEC (char, len + 2);
28972 str[0] = '.';
28973 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
28974 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
28975 }
28976 }
28977
28978 static inline bool
28979 compare_section_name (const char *section, const char *templ)
28980 {
28981 int len;
28982
28983 len = strlen (templ);
28984 return (strncmp (section, templ, len) == 0
28985 && (section[len] == 0 || section[len] == '.'));
28986 }
28987
28988 bool
28989 rs6000_elf_in_small_data_p (const_tree decl)
28990 {
28991 if (rs6000_sdata == SDATA_NONE)
28992 return false;
28993
28994 /* We want to merge strings, so we never consider them small data. */
28995 if (TREE_CODE (decl) == STRING_CST)
28996 return false;
28997
28998 /* Functions are never in the small data area. */
28999 if (TREE_CODE (decl) == FUNCTION_DECL)
29000 return false;
29001
29002 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
29003 {
29004 const char *section = DECL_SECTION_NAME (decl);
29005 if (compare_section_name (section, ".sdata")
29006 || compare_section_name (section, ".sdata2")
29007 || compare_section_name (section, ".gnu.linkonce.s")
29008 || compare_section_name (section, ".sbss")
29009 || compare_section_name (section, ".sbss2")
29010 || compare_section_name (section, ".gnu.linkonce.sb")
29011 || strcmp (section, ".PPC.EMB.sdata0") == 0
29012 || strcmp (section, ".PPC.EMB.sbss0") == 0)
29013 return true;
29014 }
29015 else
29016 {
29017 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
29018
29019 if (size > 0
29020 && size <= g_switch_value
29021 /* If it's not public, and we're not going to reference it there,
29022 there's no need to put it in the small data section. */
29023 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
29024 return true;
29025 }
29026
29027 return false;
29028 }
29029
29030 #endif /* USING_ELFOS_H */
29031 \f
29032 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
29033
29034 static bool
29035 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
29036 {
29037 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
29038 }
29039
29040 /* Do not place thread-local symbols refs in the object blocks. */
29041
29042 static bool
29043 rs6000_use_blocks_for_decl_p (const_tree decl)
29044 {
29045 return !DECL_THREAD_LOCAL_P (decl);
29046 }
29047 \f
29048 /* Return a REG that occurs in ADDR with coefficient 1.
29049 ADDR can be effectively incremented by incrementing REG.
29050
29051 r0 is special and we must not select it as an address
29052 register by this routine since our caller will try to
29053 increment the returned register via an "la" instruction. */
29054
29055 rtx
29056 find_addr_reg (rtx addr)
29057 {
29058 while (GET_CODE (addr) == PLUS)
29059 {
29060 if (GET_CODE (XEXP (addr, 0)) == REG
29061 && REGNO (XEXP (addr, 0)) != 0)
29062 addr = XEXP (addr, 0);
29063 else if (GET_CODE (XEXP (addr, 1)) == REG
29064 && REGNO (XEXP (addr, 1)) != 0)
29065 addr = XEXP (addr, 1);
29066 else if (CONSTANT_P (XEXP (addr, 0)))
29067 addr = XEXP (addr, 1);
29068 else if (CONSTANT_P (XEXP (addr, 1)))
29069 addr = XEXP (addr, 0);
29070 else
29071 gcc_unreachable ();
29072 }
29073 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
29074 return addr;
29075 }
29076
29077 void
29078 rs6000_fatal_bad_address (rtx op)
29079 {
29080 fatal_insn ("bad address", op);
29081 }
29082
29083 #if TARGET_MACHO
29084
29085 typedef struct branch_island_d {
29086 tree function_name;
29087 tree label_name;
29088 int line_number;
29089 } branch_island;
29090
29091
29092 static vec<branch_island, va_gc> *branch_islands;
29093
29094 /* Remember to generate a branch island for far calls to the given
29095 function. */
29096
29097 static void
29098 add_compiler_branch_island (tree label_name, tree function_name,
29099 int line_number)
29100 {
29101 branch_island bi = {function_name, label_name, line_number};
29102 vec_safe_push (branch_islands, bi);
29103 }
29104
29105 /* Generate far-jump branch islands for everything recorded in
29106 branch_islands. Invoked immediately after the last instruction of
29107 the epilogue has been emitted; the branch islands must be appended
29108 to, and contiguous with, the function body. Mach-O stubs are
29109 generated in machopic_output_stub(). */
29110
29111 static void
29112 macho_branch_islands (void)
29113 {
29114 char tmp_buf[512];
29115
29116 while (!vec_safe_is_empty (branch_islands))
29117 {
29118 branch_island *bi = &branch_islands->last ();
29119 const char *label = IDENTIFIER_POINTER (bi->label_name);
29120 const char *name = IDENTIFIER_POINTER (bi->function_name);
29121 char name_buf[512];
29122 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
29123 if (name[0] == '*' || name[0] == '&')
29124 strcpy (name_buf, name+1);
29125 else
29126 {
29127 name_buf[0] = '_';
29128 strcpy (name_buf+1, name);
29129 }
29130 strcpy (tmp_buf, "\n");
29131 strcat (tmp_buf, label);
29132 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
29133 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
29134 dbxout_stabd (N_SLINE, bi->line_number);
29135 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
29136 if (flag_pic)
29137 {
29138 if (TARGET_LINK_STACK)
29139 {
29140 char name[32];
29141 get_ppc476_thunk_name (name);
29142 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
29143 strcat (tmp_buf, name);
29144 strcat (tmp_buf, "\n");
29145 strcat (tmp_buf, label);
29146 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
29147 }
29148 else
29149 {
29150 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
29151 strcat (tmp_buf, label);
29152 strcat (tmp_buf, "_pic\n");
29153 strcat (tmp_buf, label);
29154 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
29155 }
29156
29157 strcat (tmp_buf, "\taddis r11,r11,ha16(");
29158 strcat (tmp_buf, name_buf);
29159 strcat (tmp_buf, " - ");
29160 strcat (tmp_buf, label);
29161 strcat (tmp_buf, "_pic)\n");
29162
29163 strcat (tmp_buf, "\tmtlr r0\n");
29164
29165 strcat (tmp_buf, "\taddi r12,r11,lo16(");
29166 strcat (tmp_buf, name_buf);
29167 strcat (tmp_buf, " - ");
29168 strcat (tmp_buf, label);
29169 strcat (tmp_buf, "_pic)\n");
29170
29171 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
29172 }
29173 else
29174 {
29175 strcat (tmp_buf, ":\nlis r12,hi16(");
29176 strcat (tmp_buf, name_buf);
29177 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
29178 strcat (tmp_buf, name_buf);
29179 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
29180 }
29181 output_asm_insn (tmp_buf, 0);
29182 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
29183 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
29184 dbxout_stabd (N_SLINE, bi->line_number);
29185 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
29186 branch_islands->pop ();
29187 }
29188 }
29189
29190 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
29191 already there or not. */
29192
29193 static int
29194 no_previous_def (tree function_name)
29195 {
29196 branch_island *bi;
29197 unsigned ix;
29198
29199 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
29200 if (function_name == bi->function_name)
29201 return 0;
29202 return 1;
29203 }
29204
29205 /* GET_PREV_LABEL gets the label name from the previous definition of
29206 the function. */
29207
29208 static tree
29209 get_prev_label (tree function_name)
29210 {
29211 branch_island *bi;
29212 unsigned ix;
29213
29214 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
29215 if (function_name == bi->function_name)
29216 return bi->label_name;
29217 return NULL_TREE;
29218 }
29219
29220 /* INSN is either a function call or a millicode call. It may have an
29221 unconditional jump in its delay slot.
29222
29223 CALL_DEST is the routine we are calling. */
29224
29225 char *
29226 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
29227 int cookie_operand_number)
29228 {
29229 static char buf[256];
29230 if (darwin_emit_branch_islands
29231 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
29232 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
29233 {
29234 tree labelname;
29235 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
29236
29237 if (no_previous_def (funname))
29238 {
29239 rtx label_rtx = gen_label_rtx ();
29240 char *label_buf, temp_buf[256];
29241 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
29242 CODE_LABEL_NUMBER (label_rtx));
29243 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
29244 labelname = get_identifier (label_buf);
29245 add_compiler_branch_island (labelname, funname, insn_line (insn));
29246 }
29247 else
29248 labelname = get_prev_label (funname);
29249
29250 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
29251 instruction will reach 'foo', otherwise link as 'bl L42'".
29252 "L42" should be a 'branch island', that will do a far jump to
29253 'foo'. Branch islands are generated in
29254 macho_branch_islands(). */
29255 sprintf (buf, "jbsr %%z%d,%.246s",
29256 dest_operand_number, IDENTIFIER_POINTER (labelname));
29257 }
29258 else
29259 sprintf (buf, "bl %%z%d", dest_operand_number);
29260 return buf;
29261 }
29262
29263 /* Generate PIC and indirect symbol stubs. */
29264
29265 void
29266 machopic_output_stub (FILE *file, const char *symb, const char *stub)
29267 {
29268 unsigned int length;
29269 char *symbol_name, *lazy_ptr_name;
29270 char *local_label_0;
29271 static int label = 0;
29272
29273 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
29274 symb = (*targetm.strip_name_encoding) (symb);
29275
29276
29277 length = strlen (symb);
29278 symbol_name = XALLOCAVEC (char, length + 32);
29279 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
29280
29281 lazy_ptr_name = XALLOCAVEC (char, length + 32);
29282 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
29283
29284 if (flag_pic == 2)
29285 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
29286 else
29287 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
29288
29289 if (flag_pic == 2)
29290 {
29291 fprintf (file, "\t.align 5\n");
29292
29293 fprintf (file, "%s:\n", stub);
29294 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29295
29296 label++;
29297 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
29298 sprintf (local_label_0, "\"L%011d$spb\"", label);
29299
29300 fprintf (file, "\tmflr r0\n");
29301 if (TARGET_LINK_STACK)
29302 {
29303 char name[32];
29304 get_ppc476_thunk_name (name);
29305 fprintf (file, "\tbl %s\n", name);
29306 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
29307 }
29308 else
29309 {
29310 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
29311 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
29312 }
29313 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
29314 lazy_ptr_name, local_label_0);
29315 fprintf (file, "\tmtlr r0\n");
29316 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
29317 (TARGET_64BIT ? "ldu" : "lwzu"),
29318 lazy_ptr_name, local_label_0);
29319 fprintf (file, "\tmtctr r12\n");
29320 fprintf (file, "\tbctr\n");
29321 }
29322 else
29323 {
29324 fprintf (file, "\t.align 4\n");
29325
29326 fprintf (file, "%s:\n", stub);
29327 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29328
29329 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
29330 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
29331 (TARGET_64BIT ? "ldu" : "lwzu"),
29332 lazy_ptr_name);
29333 fprintf (file, "\tmtctr r12\n");
29334 fprintf (file, "\tbctr\n");
29335 }
29336
29337 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
29338 fprintf (file, "%s:\n", lazy_ptr_name);
29339 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29340 fprintf (file, "%sdyld_stub_binding_helper\n",
29341 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
29342 }
29343
29344 /* Legitimize PIC addresses. If the address is already
29345 position-independent, we return ORIG. Newly generated
29346 position-independent addresses go into a reg. This is REG if non
29347 zero, otherwise we allocate register(s) as necessary. */
29348
29349 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
29350
29351 rtx
29352 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
29353 rtx reg)
29354 {
29355 rtx base, offset;
29356
29357 if (reg == NULL && ! reload_in_progress && ! reload_completed)
29358 reg = gen_reg_rtx (Pmode);
29359
29360 if (GET_CODE (orig) == CONST)
29361 {
29362 rtx reg_temp;
29363
29364 if (GET_CODE (XEXP (orig, 0)) == PLUS
29365 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
29366 return orig;
29367
29368 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
29369
29370 /* Use a different reg for the intermediate value, as
29371 it will be marked UNCHANGING. */
29372 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
29373 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
29374 Pmode, reg_temp);
29375 offset =
29376 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
29377 Pmode, reg);
29378
29379 if (GET_CODE (offset) == CONST_INT)
29380 {
29381 if (SMALL_INT (offset))
29382 return plus_constant (Pmode, base, INTVAL (offset));
29383 else if (! reload_in_progress && ! reload_completed)
29384 offset = force_reg (Pmode, offset);
29385 else
29386 {
29387 rtx mem = force_const_mem (Pmode, orig);
29388 return machopic_legitimize_pic_address (mem, Pmode, reg);
29389 }
29390 }
29391 return gen_rtx_PLUS (Pmode, base, offset);
29392 }
29393
29394 /* Fall back on generic machopic code. */
29395 return machopic_legitimize_pic_address (orig, mode, reg);
29396 }
29397
29398 /* Output a .machine directive for the Darwin assembler, and call
29399 the generic start_file routine. */
29400
29401 static void
29402 rs6000_darwin_file_start (void)
29403 {
29404 static const struct
29405 {
29406 const char *arg;
29407 const char *name;
29408 HOST_WIDE_INT if_set;
29409 } mapping[] = {
29410 { "ppc64", "ppc64", MASK_64BIT },
29411 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
29412 { "power4", "ppc970", 0 },
29413 { "G5", "ppc970", 0 },
29414 { "7450", "ppc7450", 0 },
29415 { "7400", "ppc7400", MASK_ALTIVEC },
29416 { "G4", "ppc7400", 0 },
29417 { "750", "ppc750", 0 },
29418 { "740", "ppc750", 0 },
29419 { "G3", "ppc750", 0 },
29420 { "604e", "ppc604e", 0 },
29421 { "604", "ppc604", 0 },
29422 { "603e", "ppc603", 0 },
29423 { "603", "ppc603", 0 },
29424 { "601", "ppc601", 0 },
29425 { NULL, "ppc", 0 } };
29426 const char *cpu_id = "";
29427 size_t i;
29428
29429 rs6000_file_start ();
29430 darwin_file_start ();
29431
29432 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
29433
29434 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
29435 cpu_id = rs6000_default_cpu;
29436
29437 if (global_options_set.x_rs6000_cpu_index)
29438 cpu_id = processor_target_table[rs6000_cpu_index].name;
29439
29440 /* Look through the mapping array. Pick the first name that either
29441 matches the argument, has a bit set in IF_SET that is also set
29442 in the target flags, or has a NULL name. */
29443
29444 i = 0;
29445 while (mapping[i].arg != NULL
29446 && strcmp (mapping[i].arg, cpu_id) != 0
29447 && (mapping[i].if_set & rs6000_isa_flags) == 0)
29448 i++;
29449
29450 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
29451 }
29452
29453 #endif /* TARGET_MACHO */
29454
29455 #if TARGET_ELF
29456 static int
29457 rs6000_elf_reloc_rw_mask (void)
29458 {
29459 if (flag_pic)
29460 return 3;
29461 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29462 return 2;
29463 else
29464 return 0;
29465 }
29466
29467 /* Record an element in the table of global constructors. SYMBOL is
29468 a SYMBOL_REF of the function to be called; PRIORITY is a number
29469 between 0 and MAX_INIT_PRIORITY.
29470
29471 This differs from default_named_section_asm_out_constructor in
29472 that we have special handling for -mrelocatable. */
29473
29474 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
29475 static void
29476 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
29477 {
29478 const char *section = ".ctors";
29479 char buf[16];
29480
29481 if (priority != DEFAULT_INIT_PRIORITY)
29482 {
29483 sprintf (buf, ".ctors.%.5u",
29484 /* Invert the numbering so the linker puts us in the proper
29485 order; constructors are run from right to left, and the
29486 linker sorts in increasing order. */
29487 MAX_INIT_PRIORITY - priority);
29488 section = buf;
29489 }
29490
29491 switch_to_section (get_section (section, SECTION_WRITE, NULL));
29492 assemble_align (POINTER_SIZE);
29493
29494 if (TARGET_RELOCATABLE)
29495 {
29496 fputs ("\t.long (", asm_out_file);
29497 output_addr_const (asm_out_file, symbol);
29498 fputs (")@fixup\n", asm_out_file);
29499 }
29500 else
29501 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
29502 }
29503
29504 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
29505 static void
29506 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
29507 {
29508 const char *section = ".dtors";
29509 char buf[16];
29510
29511 if (priority != DEFAULT_INIT_PRIORITY)
29512 {
29513 sprintf (buf, ".dtors.%.5u",
29514 /* Invert the numbering so the linker puts us in the proper
29515 order; constructors are run from right to left, and the
29516 linker sorts in increasing order. */
29517 MAX_INIT_PRIORITY - priority);
29518 section = buf;
29519 }
29520
29521 switch_to_section (get_section (section, SECTION_WRITE, NULL));
29522 assemble_align (POINTER_SIZE);
29523
29524 if (TARGET_RELOCATABLE)
29525 {
29526 fputs ("\t.long (", asm_out_file);
29527 output_addr_const (asm_out_file, symbol);
29528 fputs (")@fixup\n", asm_out_file);
29529 }
29530 else
29531 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
29532 }
29533
29534 void
29535 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
29536 {
29537 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
29538 {
29539 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
29540 ASM_OUTPUT_LABEL (file, name);
29541 fputs (DOUBLE_INT_ASM_OP, file);
29542 rs6000_output_function_entry (file, name);
29543 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
29544 if (DOT_SYMBOLS)
29545 {
29546 fputs ("\t.size\t", file);
29547 assemble_name (file, name);
29548 fputs (",24\n\t.type\t.", file);
29549 assemble_name (file, name);
29550 fputs (",@function\n", file);
29551 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
29552 {
29553 fputs ("\t.globl\t.", file);
29554 assemble_name (file, name);
29555 putc ('\n', file);
29556 }
29557 }
29558 else
29559 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
29560 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
29561 rs6000_output_function_entry (file, name);
29562 fputs (":\n", file);
29563 return;
29564 }
29565
29566 if (TARGET_RELOCATABLE
29567 && !TARGET_SECURE_PLT
29568 && (get_pool_size () != 0 || crtl->profile)
29569 && uses_TOC ())
29570 {
29571 char buf[256];
29572
29573 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
29574
29575 ASM_GENERATE_INTERNAL_LABEL (buf, "LCTOC", 1);
29576 fprintf (file, "\t.long ");
29577 assemble_name (file, buf);
29578 putc ('-', file);
29579 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
29580 assemble_name (file, buf);
29581 putc ('\n', file);
29582 }
29583
29584 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
29585 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
29586
29587 if (DEFAULT_ABI == ABI_AIX)
29588 {
29589 const char *desc_name, *orig_name;
29590
29591 orig_name = (*targetm.strip_name_encoding) (name);
29592 desc_name = orig_name;
29593 while (*desc_name == '.')
29594 desc_name++;
29595
29596 if (TREE_PUBLIC (decl))
29597 fprintf (file, "\t.globl %s\n", desc_name);
29598
29599 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
29600 fprintf (file, "%s:\n", desc_name);
29601 fprintf (file, "\t.long %s\n", orig_name);
29602 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
29603 fputs ("\t.long 0\n", file);
29604 fprintf (file, "\t.previous\n");
29605 }
29606 ASM_OUTPUT_LABEL (file, name);
29607 }
29608
29609 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
29610 static void
29611 rs6000_elf_file_end (void)
29612 {
29613 #ifdef HAVE_AS_GNU_ATTRIBUTE
29614 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
29615 {
29616 if (rs6000_passes_float)
29617 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n",
29618 ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) ? 1
29619 : (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT) ? 3
29620 : 2));
29621 if (rs6000_passes_vector)
29622 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
29623 (TARGET_ALTIVEC_ABI ? 2
29624 : TARGET_SPE_ABI ? 3
29625 : 1));
29626 if (rs6000_returns_struct)
29627 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
29628 aix_struct_return ? 2 : 1);
29629 }
29630 #endif
29631 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
29632 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
29633 file_end_indicate_exec_stack ();
29634 #endif
29635 }
29636 #endif
29637
29638 #if TARGET_XCOFF
29639 static void
29640 rs6000_xcoff_asm_output_anchor (rtx symbol)
29641 {
29642 char buffer[100];
29643
29644 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
29645 SYMBOL_REF_BLOCK_OFFSET (symbol));
29646 fprintf (asm_out_file, "%s", SET_ASM_OP);
29647 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
29648 fprintf (asm_out_file, ",");
29649 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
29650 fprintf (asm_out_file, "\n");
29651 }
29652
29653 static void
29654 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
29655 {
29656 fputs (GLOBAL_ASM_OP, stream);
29657 RS6000_OUTPUT_BASENAME (stream, name);
29658 putc ('\n', stream);
29659 }
29660
29661 /* A get_unnamed_decl callback, used for read-only sections. PTR
29662 points to the section string variable. */
29663
29664 static void
29665 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
29666 {
29667 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
29668 *(const char *const *) directive,
29669 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
29670 }
29671
29672 /* Likewise for read-write sections. */
29673
29674 static void
29675 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
29676 {
29677 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
29678 *(const char *const *) directive,
29679 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
29680 }
29681
29682 static void
29683 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
29684 {
29685 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
29686 *(const char *const *) directive,
29687 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
29688 }
29689
29690 /* A get_unnamed_section callback, used for switching to toc_section. */
29691
29692 static void
29693 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
29694 {
29695 if (TARGET_MINIMAL_TOC)
29696 {
29697 /* toc_section is always selected at least once from
29698 rs6000_xcoff_file_start, so this is guaranteed to
29699 always be defined once and only once in each file. */
29700 if (!toc_initialized)
29701 {
29702 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
29703 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
29704 toc_initialized = 1;
29705 }
29706 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
29707 (TARGET_32BIT ? "" : ",3"));
29708 }
29709 else
29710 fputs ("\t.toc\n", asm_out_file);
29711 }
29712
29713 /* Implement TARGET_ASM_INIT_SECTIONS. */
29714
29715 static void
29716 rs6000_xcoff_asm_init_sections (void)
29717 {
29718 read_only_data_section
29719 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
29720 &xcoff_read_only_section_name);
29721
29722 private_data_section
29723 = get_unnamed_section (SECTION_WRITE,
29724 rs6000_xcoff_output_readwrite_section_asm_op,
29725 &xcoff_private_data_section_name);
29726
29727 tls_data_section
29728 = get_unnamed_section (SECTION_TLS,
29729 rs6000_xcoff_output_tls_section_asm_op,
29730 &xcoff_tls_data_section_name);
29731
29732 tls_private_data_section
29733 = get_unnamed_section (SECTION_TLS,
29734 rs6000_xcoff_output_tls_section_asm_op,
29735 &xcoff_private_data_section_name);
29736
29737 read_only_private_data_section
29738 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
29739 &xcoff_private_data_section_name);
29740
29741 toc_section
29742 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
29743
29744 readonly_data_section = read_only_data_section;
29745 exception_section = data_section;
29746 }
29747
29748 static int
29749 rs6000_xcoff_reloc_rw_mask (void)
29750 {
29751 return 3;
29752 }
29753
29754 static void
29755 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
29756 tree decl ATTRIBUTE_UNUSED)
29757 {
29758 int smclass;
29759 static const char * const suffix[4] = { "PR", "RO", "RW", "TL" };
29760
29761 if (flags & SECTION_CODE)
29762 smclass = 0;
29763 else if (flags & SECTION_TLS)
29764 smclass = 3;
29765 else if (flags & SECTION_WRITE)
29766 smclass = 2;
29767 else
29768 smclass = 1;
29769
29770 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
29771 (flags & SECTION_CODE) ? "." : "",
29772 name, suffix[smclass], flags & SECTION_ENTSIZE);
29773 }
29774
29775 #define IN_NAMED_SECTION(DECL) \
29776 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
29777 && DECL_SECTION_NAME (DECL) != NULL)
29778
29779 static section *
29780 rs6000_xcoff_select_section (tree decl, int reloc,
29781 unsigned HOST_WIDE_INT align)
29782 {
29783 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
29784 named section. */
29785 if (align > BIGGEST_ALIGNMENT)
29786 {
29787 resolve_unique_section (decl, reloc, true);
29788 if (IN_NAMED_SECTION (decl))
29789 return get_named_section (decl, NULL, reloc);
29790 }
29791
29792 if (decl_readonly_section (decl, reloc))
29793 {
29794 if (TREE_PUBLIC (decl))
29795 return read_only_data_section;
29796 else
29797 return read_only_private_data_section;
29798 }
29799 else
29800 {
29801 #if HAVE_AS_TLS
29802 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
29803 {
29804 if (TREE_PUBLIC (decl))
29805 return tls_data_section;
29806 else if (bss_initializer_p (decl))
29807 {
29808 /* Convert to COMMON to emit in BSS. */
29809 DECL_COMMON (decl) = 1;
29810 return tls_comm_section;
29811 }
29812 else
29813 return tls_private_data_section;
29814 }
29815 else
29816 #endif
29817 if (TREE_PUBLIC (decl))
29818 return data_section;
29819 else
29820 return private_data_section;
29821 }
29822 }
29823
29824 static void
29825 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
29826 {
29827 const char *name;
29828
29829 /* Use select_section for private data and uninitialized data with
29830 alignment <= BIGGEST_ALIGNMENT. */
29831 if (!TREE_PUBLIC (decl)
29832 || DECL_COMMON (decl)
29833 || (DECL_INITIAL (decl) == NULL_TREE
29834 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
29835 || DECL_INITIAL (decl) == error_mark_node
29836 || (flag_zero_initialized_in_bss
29837 && initializer_zerop (DECL_INITIAL (decl))))
29838 return;
29839
29840 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
29841 name = (*targetm.strip_name_encoding) (name);
29842 set_decl_section_name (decl, name);
29843 }
29844
29845 /* Select section for constant in constant pool.
29846
29847 On RS/6000, all constants are in the private read-only data area.
29848 However, if this is being placed in the TOC it must be output as a
29849 toc entry. */
29850
29851 static section *
29852 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
29853 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
29854 {
29855 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
29856 return toc_section;
29857 else
29858 return read_only_private_data_section;
29859 }
29860
29861 /* Remove any trailing [DS] or the like from the symbol name. */
29862
29863 static const char *
29864 rs6000_xcoff_strip_name_encoding (const char *name)
29865 {
29866 size_t len;
29867 if (*name == '*')
29868 name++;
29869 len = strlen (name);
29870 if (name[len - 1] == ']')
29871 return ggc_alloc_string (name, len - 4);
29872 else
29873 return name;
29874 }
29875
29876 /* Section attributes. AIX is always PIC. */
29877
29878 static unsigned int
29879 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
29880 {
29881 unsigned int align;
29882 unsigned int flags = default_section_type_flags (decl, name, reloc);
29883
29884 /* Align to at least UNIT size. */
29885 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
29886 align = MIN_UNITS_PER_WORD;
29887 else
29888 /* Increase alignment of large objects if not already stricter. */
29889 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
29890 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
29891 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
29892
29893 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
29894 }
29895
29896 /* Output at beginning of assembler file.
29897
29898 Initialize the section names for the RS/6000 at this point.
29899
29900 Specify filename, including full path, to assembler.
29901
29902 We want to go into the TOC section so at least one .toc will be emitted.
29903 Also, in order to output proper .bs/.es pairs, we need at least one static
29904 [RW] section emitted.
29905
29906 Finally, declare mcount when profiling to make the assembler happy. */
29907
29908 static void
29909 rs6000_xcoff_file_start (void)
29910 {
29911 rs6000_gen_section_name (&xcoff_bss_section_name,
29912 main_input_filename, ".bss_");
29913 rs6000_gen_section_name (&xcoff_private_data_section_name,
29914 main_input_filename, ".rw_");
29915 rs6000_gen_section_name (&xcoff_read_only_section_name,
29916 main_input_filename, ".ro_");
29917 rs6000_gen_section_name (&xcoff_tls_data_section_name,
29918 main_input_filename, ".tls_");
29919 rs6000_gen_section_name (&xcoff_tbss_section_name,
29920 main_input_filename, ".tbss_[UL]");
29921
29922 fputs ("\t.file\t", asm_out_file);
29923 output_quoted_string (asm_out_file, main_input_filename);
29924 fputc ('\n', asm_out_file);
29925 if (write_symbols != NO_DEBUG)
29926 switch_to_section (private_data_section);
29927 switch_to_section (text_section);
29928 if (profile_flag)
29929 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
29930 rs6000_file_start ();
29931 }
29932
29933 /* Output at end of assembler file.
29934 On the RS/6000, referencing data should automatically pull in text. */
29935
29936 static void
29937 rs6000_xcoff_file_end (void)
29938 {
29939 switch_to_section (text_section);
29940 fputs ("_section_.text:\n", asm_out_file);
29941 switch_to_section (data_section);
29942 fputs (TARGET_32BIT
29943 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
29944 asm_out_file);
29945 }
29946
29947 struct declare_alias_data
29948 {
29949 FILE *file;
29950 bool function_descriptor;
29951 };
29952
29953 /* Declare alias N. A helper function for for_node_and_aliases. */
29954
29955 static bool
29956 rs6000_declare_alias (struct symtab_node *n, void *d)
29957 {
29958 struct declare_alias_data *data = (struct declare_alias_data *)d;
29959 /* Main symbol is output specially, because varasm machinery does part of
29960 the job for us - we do not need to declare .globl/lglobs and such. */
29961 if (!n->alias || n->weakref)
29962 return false;
29963
29964 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
29965 return false;
29966
29967 /* Prevent assemble_alias from trying to use .set pseudo operation
29968 that does not behave as expected by the middle-end. */
29969 TREE_ASM_WRITTEN (n->decl) = true;
29970
29971 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
29972 char *buffer = (char *) alloca (strlen (name) + 2);
29973 char *p;
29974 int dollar_inside = 0;
29975
29976 strcpy (buffer, name);
29977 p = strchr (buffer, '$');
29978 while (p) {
29979 *p = '_';
29980 dollar_inside++;
29981 p = strchr (p + 1, '$');
29982 }
29983 if (TREE_PUBLIC (n->decl))
29984 {
29985 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
29986 {
29987 if (dollar_inside) {
29988 if (data->function_descriptor)
29989 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
29990 else
29991 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
29992 }
29993 if (data->function_descriptor)
29994 fputs ("\t.globl .", data->file);
29995 else
29996 fputs ("\t.globl ", data->file);
29997 RS6000_OUTPUT_BASENAME (data->file, buffer);
29998 putc ('\n', data->file);
29999 }
30000 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
30001 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
30002 }
30003 else
30004 {
30005 if (dollar_inside)
30006 {
30007 if (data->function_descriptor)
30008 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
30009 else
30010 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
30011 }
30012 if (data->function_descriptor)
30013 fputs ("\t.lglobl .", data->file);
30014 else
30015 fputs ("\t.lglobl ", data->file);
30016 RS6000_OUTPUT_BASENAME (data->file, buffer);
30017 putc ('\n', data->file);
30018 }
30019 if (data->function_descriptor)
30020 fputs (".", data->file);
30021 RS6000_OUTPUT_BASENAME (data->file, buffer);
30022 fputs (":\n", data->file);
30023 return false;
30024 }
30025
30026 /* This macro produces the initial definition of a function name.
30027 On the RS/6000, we need to place an extra '.' in the function name and
30028 output the function descriptor.
30029 Dollar signs are converted to underscores.
30030
30031 The csect for the function will have already been created when
30032 text_section was selected. We do have to go back to that csect, however.
30033
30034 The third and fourth parameters to the .function pseudo-op (16 and 044)
30035 are placeholders which no longer have any use.
30036
30037 Because AIX assembler's .set command has unexpected semantics, we output
30038 all aliases as alternative labels in front of the definition. */
30039
30040 void
30041 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
30042 {
30043 char *buffer = (char *) alloca (strlen (name) + 1);
30044 char *p;
30045 int dollar_inside = 0;
30046 struct declare_alias_data data = {file, false};
30047
30048 strcpy (buffer, name);
30049 p = strchr (buffer, '$');
30050 while (p) {
30051 *p = '_';
30052 dollar_inside++;
30053 p = strchr (p + 1, '$');
30054 }
30055 if (TREE_PUBLIC (decl))
30056 {
30057 if (!RS6000_WEAK || !DECL_WEAK (decl))
30058 {
30059 if (dollar_inside) {
30060 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
30061 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
30062 }
30063 fputs ("\t.globl .", file);
30064 RS6000_OUTPUT_BASENAME (file, buffer);
30065 putc ('\n', file);
30066 }
30067 }
30068 else
30069 {
30070 if (dollar_inside) {
30071 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
30072 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
30073 }
30074 fputs ("\t.lglobl .", file);
30075 RS6000_OUTPUT_BASENAME (file, buffer);
30076 putc ('\n', file);
30077 }
30078 fputs ("\t.csect ", file);
30079 RS6000_OUTPUT_BASENAME (file, buffer);
30080 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
30081 RS6000_OUTPUT_BASENAME (file, buffer);
30082 fputs (":\n", file);
30083 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
30084 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
30085 RS6000_OUTPUT_BASENAME (file, buffer);
30086 fputs (", TOC[tc0], 0\n", file);
30087 in_section = NULL;
30088 switch_to_section (function_section (decl));
30089 putc ('.', file);
30090 RS6000_OUTPUT_BASENAME (file, buffer);
30091 fputs (":\n", file);
30092 data.function_descriptor = true;
30093 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
30094 if (write_symbols != NO_DEBUG && !DECL_IGNORED_P (decl))
30095 xcoffout_declare_function (file, decl, buffer);
30096 return;
30097 }
30098
30099 /* This macro produces the initial definition of a object (variable) name.
30100 Because AIX assembler's .set command has unexpected semantics, we output
30101 all aliases as alternative labels in front of the definition. */
30102
30103 void
30104 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
30105 {
30106 struct declare_alias_data data = {file, false};
30107 RS6000_OUTPUT_BASENAME (file, name);
30108 fputs (":\n", file);
30109 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
30110 }
30111
30112 #ifdef HAVE_AS_TLS
30113 static void
30114 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
30115 {
30116 rtx symbol;
30117 int flags;
30118
30119 default_encode_section_info (decl, rtl, first);
30120
30121 /* Careful not to prod global register variables. */
30122 if (!MEM_P (rtl))
30123 return;
30124 symbol = XEXP (rtl, 0);
30125 if (GET_CODE (symbol) != SYMBOL_REF)
30126 return;
30127
30128 flags = SYMBOL_REF_FLAGS (symbol);
30129
30130 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
30131 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
30132
30133 SYMBOL_REF_FLAGS (symbol) = flags;
30134 }
30135 #endif /* HAVE_AS_TLS */
30136 #endif /* TARGET_XCOFF */
30137
30138 /* Compute a (partial) cost for rtx X. Return true if the complete
30139 cost has been computed, and false if subexpressions should be
30140 scanned. In either case, *TOTAL contains the cost result. */
30141
30142 static bool
30143 rs6000_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
30144 int *total, bool speed)
30145 {
30146 machine_mode mode = GET_MODE (x);
30147
30148 switch (code)
30149 {
30150 /* On the RS/6000, if it is valid in the insn, it is free. */
30151 case CONST_INT:
30152 if (((outer_code == SET
30153 || outer_code == PLUS
30154 || outer_code == MINUS)
30155 && (satisfies_constraint_I (x)
30156 || satisfies_constraint_L (x)))
30157 || (outer_code == AND
30158 && (satisfies_constraint_K (x)
30159 || (mode == SImode
30160 ? satisfies_constraint_L (x)
30161 : satisfies_constraint_J (x))
30162 || mask_operand (x, mode)
30163 || (mode == DImode
30164 && mask64_operand (x, DImode))))
30165 || ((outer_code == IOR || outer_code == XOR)
30166 && (satisfies_constraint_K (x)
30167 || (mode == SImode
30168 ? satisfies_constraint_L (x)
30169 : satisfies_constraint_J (x))))
30170 || outer_code == ASHIFT
30171 || outer_code == ASHIFTRT
30172 || outer_code == LSHIFTRT
30173 || outer_code == ROTATE
30174 || outer_code == ROTATERT
30175 || outer_code == ZERO_EXTRACT
30176 || (outer_code == MULT
30177 && satisfies_constraint_I (x))
30178 || ((outer_code == DIV || outer_code == UDIV
30179 || outer_code == MOD || outer_code == UMOD)
30180 && exact_log2 (INTVAL (x)) >= 0)
30181 || (outer_code == COMPARE
30182 && (satisfies_constraint_I (x)
30183 || satisfies_constraint_K (x)))
30184 || ((outer_code == EQ || outer_code == NE)
30185 && (satisfies_constraint_I (x)
30186 || satisfies_constraint_K (x)
30187 || (mode == SImode
30188 ? satisfies_constraint_L (x)
30189 : satisfies_constraint_J (x))))
30190 || (outer_code == GTU
30191 && satisfies_constraint_I (x))
30192 || (outer_code == LTU
30193 && satisfies_constraint_P (x)))
30194 {
30195 *total = 0;
30196 return true;
30197 }
30198 else if ((outer_code == PLUS
30199 && reg_or_add_cint_operand (x, VOIDmode))
30200 || (outer_code == MINUS
30201 && reg_or_sub_cint_operand (x, VOIDmode))
30202 || ((outer_code == SET
30203 || outer_code == IOR
30204 || outer_code == XOR)
30205 && (INTVAL (x)
30206 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
30207 {
30208 *total = COSTS_N_INSNS (1);
30209 return true;
30210 }
30211 /* FALLTHRU */
30212
30213 case CONST_DOUBLE:
30214 case CONST_WIDE_INT:
30215 case CONST:
30216 case HIGH:
30217 case SYMBOL_REF:
30218 case MEM:
30219 /* When optimizing for size, MEM should be slightly more expensive
30220 than generating address, e.g., (plus (reg) (const)).
30221 L1 cache latency is about two instructions. */
30222 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
30223 return true;
30224
30225 case LABEL_REF:
30226 *total = 0;
30227 return true;
30228
30229 case PLUS:
30230 case MINUS:
30231 if (FLOAT_MODE_P (mode))
30232 *total = rs6000_cost->fp;
30233 else
30234 *total = COSTS_N_INSNS (1);
30235 return false;
30236
30237 case MULT:
30238 if (GET_CODE (XEXP (x, 1)) == CONST_INT
30239 && satisfies_constraint_I (XEXP (x, 1)))
30240 {
30241 if (INTVAL (XEXP (x, 1)) >= -256
30242 && INTVAL (XEXP (x, 1)) <= 255)
30243 *total = rs6000_cost->mulsi_const9;
30244 else
30245 *total = rs6000_cost->mulsi_const;
30246 }
30247 else if (mode == SFmode)
30248 *total = rs6000_cost->fp;
30249 else if (FLOAT_MODE_P (mode))
30250 *total = rs6000_cost->dmul;
30251 else if (mode == DImode)
30252 *total = rs6000_cost->muldi;
30253 else
30254 *total = rs6000_cost->mulsi;
30255 return false;
30256
30257 case FMA:
30258 if (mode == SFmode)
30259 *total = rs6000_cost->fp;
30260 else
30261 *total = rs6000_cost->dmul;
30262 break;
30263
30264 case DIV:
30265 case MOD:
30266 if (FLOAT_MODE_P (mode))
30267 {
30268 *total = mode == DFmode ? rs6000_cost->ddiv
30269 : rs6000_cost->sdiv;
30270 return false;
30271 }
30272 /* FALLTHRU */
30273
30274 case UDIV:
30275 case UMOD:
30276 if (GET_CODE (XEXP (x, 1)) == CONST_INT
30277 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
30278 {
30279 if (code == DIV || code == MOD)
30280 /* Shift, addze */
30281 *total = COSTS_N_INSNS (2);
30282 else
30283 /* Shift */
30284 *total = COSTS_N_INSNS (1);
30285 }
30286 else
30287 {
30288 if (GET_MODE (XEXP (x, 1)) == DImode)
30289 *total = rs6000_cost->divdi;
30290 else
30291 *total = rs6000_cost->divsi;
30292 }
30293 /* Add in shift and subtract for MOD. */
30294 if (code == MOD || code == UMOD)
30295 *total += COSTS_N_INSNS (2);
30296 return false;
30297
30298 case CTZ:
30299 case FFS:
30300 *total = COSTS_N_INSNS (4);
30301 return false;
30302
30303 case POPCOUNT:
30304 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
30305 return false;
30306
30307 case PARITY:
30308 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
30309 return false;
30310
30311 case NOT:
30312 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
30313 {
30314 *total = 0;
30315 return false;
30316 }
30317 /* FALLTHRU */
30318
30319 case AND:
30320 case CLZ:
30321 case IOR:
30322 case XOR:
30323 case ZERO_EXTRACT:
30324 *total = COSTS_N_INSNS (1);
30325 return false;
30326
30327 case ASHIFT:
30328 case ASHIFTRT:
30329 case LSHIFTRT:
30330 case ROTATE:
30331 case ROTATERT:
30332 /* Handle mul_highpart. */
30333 if (outer_code == TRUNCATE
30334 && GET_CODE (XEXP (x, 0)) == MULT)
30335 {
30336 if (mode == DImode)
30337 *total = rs6000_cost->muldi;
30338 else
30339 *total = rs6000_cost->mulsi;
30340 return true;
30341 }
30342 else if (outer_code == AND)
30343 *total = 0;
30344 else
30345 *total = COSTS_N_INSNS (1);
30346 return false;
30347
30348 case SIGN_EXTEND:
30349 case ZERO_EXTEND:
30350 if (GET_CODE (XEXP (x, 0)) == MEM)
30351 *total = 0;
30352 else
30353 *total = COSTS_N_INSNS (1);
30354 return false;
30355
30356 case COMPARE:
30357 case NEG:
30358 case ABS:
30359 if (!FLOAT_MODE_P (mode))
30360 {
30361 *total = COSTS_N_INSNS (1);
30362 return false;
30363 }
30364 /* FALLTHRU */
30365
30366 case FLOAT:
30367 case UNSIGNED_FLOAT:
30368 case FIX:
30369 case UNSIGNED_FIX:
30370 case FLOAT_TRUNCATE:
30371 *total = rs6000_cost->fp;
30372 return false;
30373
30374 case FLOAT_EXTEND:
30375 if (mode == DFmode)
30376 *total = 0;
30377 else
30378 *total = rs6000_cost->fp;
30379 return false;
30380
30381 case UNSPEC:
30382 switch (XINT (x, 1))
30383 {
30384 case UNSPEC_FRSP:
30385 *total = rs6000_cost->fp;
30386 return true;
30387
30388 default:
30389 break;
30390 }
30391 break;
30392
30393 case CALL:
30394 case IF_THEN_ELSE:
30395 if (!speed)
30396 {
30397 *total = COSTS_N_INSNS (1);
30398 return true;
30399 }
30400 else if (FLOAT_MODE_P (mode)
30401 && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS)
30402 {
30403 *total = rs6000_cost->fp;
30404 return false;
30405 }
30406 break;
30407
30408 case NE:
30409 case EQ:
30410 case GTU:
30411 case LTU:
30412 /* Carry bit requires mode == Pmode.
30413 NEG or PLUS already counted so only add one. */
30414 if (mode == Pmode
30415 && (outer_code == NEG || outer_code == PLUS))
30416 {
30417 *total = COSTS_N_INSNS (1);
30418 return true;
30419 }
30420 if (outer_code == SET)
30421 {
30422 if (XEXP (x, 1) == const0_rtx)
30423 {
30424 if (TARGET_ISEL && !TARGET_MFCRF)
30425 *total = COSTS_N_INSNS (8);
30426 else
30427 *total = COSTS_N_INSNS (2);
30428 return true;
30429 }
30430 else if (mode == Pmode)
30431 {
30432 *total = COSTS_N_INSNS (3);
30433 return false;
30434 }
30435 }
30436 /* FALLTHRU */
30437
30438 case GT:
30439 case LT:
30440 case UNORDERED:
30441 if (outer_code == SET && (XEXP (x, 1) == const0_rtx))
30442 {
30443 if (TARGET_ISEL && !TARGET_MFCRF)
30444 *total = COSTS_N_INSNS (8);
30445 else
30446 *total = COSTS_N_INSNS (2);
30447 return true;
30448 }
30449 /* CC COMPARE. */
30450 if (outer_code == COMPARE)
30451 {
30452 *total = 0;
30453 return true;
30454 }
30455 break;
30456
30457 default:
30458 break;
30459 }
30460
30461 return false;
30462 }
30463
30464 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
30465
30466 static bool
30467 rs6000_debug_rtx_costs (rtx x, int code, int outer_code, int opno, int *total,
30468 bool speed)
30469 {
30470 bool ret = rs6000_rtx_costs (x, code, outer_code, opno, total, speed);
30471
30472 fprintf (stderr,
30473 "\nrs6000_rtx_costs, return = %s, code = %s, outer_code = %s, "
30474 "opno = %d, total = %d, speed = %s, x:\n",
30475 ret ? "complete" : "scan inner",
30476 GET_RTX_NAME (code),
30477 GET_RTX_NAME (outer_code),
30478 opno,
30479 *total,
30480 speed ? "true" : "false");
30481
30482 debug_rtx (x);
30483
30484 return ret;
30485 }
30486
30487 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
30488
30489 static int
30490 rs6000_debug_address_cost (rtx x, machine_mode mode,
30491 addr_space_t as, bool speed)
30492 {
30493 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
30494
30495 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
30496 ret, speed ? "true" : "false");
30497 debug_rtx (x);
30498
30499 return ret;
30500 }
30501
30502
30503 /* A C expression returning the cost of moving data from a register of class
30504 CLASS1 to one of CLASS2. */
30505
30506 static int
30507 rs6000_register_move_cost (machine_mode mode,
30508 reg_class_t from, reg_class_t to)
30509 {
30510 int ret;
30511
30512 if (TARGET_DEBUG_COST)
30513 dbg_cost_ctrl++;
30514
30515 /* Moves from/to GENERAL_REGS. */
30516 if (reg_classes_intersect_p (to, GENERAL_REGS)
30517 || reg_classes_intersect_p (from, GENERAL_REGS))
30518 {
30519 reg_class_t rclass = from;
30520
30521 if (! reg_classes_intersect_p (to, GENERAL_REGS))
30522 rclass = to;
30523
30524 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
30525 ret = (rs6000_memory_move_cost (mode, rclass, false)
30526 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
30527
30528 /* It's more expensive to move CR_REGS than CR0_REGS because of the
30529 shift. */
30530 else if (rclass == CR_REGS)
30531 ret = 4;
30532
30533 /* For those processors that have slow LR/CTR moves, make them more
30534 expensive than memory in order to bias spills to memory .*/
30535 else if ((rs6000_cpu == PROCESSOR_POWER6
30536 || rs6000_cpu == PROCESSOR_POWER7
30537 || rs6000_cpu == PROCESSOR_POWER8)
30538 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
30539 ret = 6 * hard_regno_nregs[0][mode];
30540
30541 else
30542 /* A move will cost one instruction per GPR moved. */
30543 ret = 2 * hard_regno_nregs[0][mode];
30544 }
30545
30546 /* If we have VSX, we can easily move between FPR or Altivec registers. */
30547 else if (VECTOR_MEM_VSX_P (mode)
30548 && reg_classes_intersect_p (to, VSX_REGS)
30549 && reg_classes_intersect_p (from, VSX_REGS))
30550 ret = 2 * hard_regno_nregs[32][mode];
30551
30552 /* Moving between two similar registers is just one instruction. */
30553 else if (reg_classes_intersect_p (to, from))
30554 ret = (mode == TFmode || mode == TDmode) ? 4 : 2;
30555
30556 /* Everything else has to go through GENERAL_REGS. */
30557 else
30558 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
30559 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
30560
30561 if (TARGET_DEBUG_COST)
30562 {
30563 if (dbg_cost_ctrl == 1)
30564 fprintf (stderr,
30565 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
30566 ret, GET_MODE_NAME (mode), reg_class_names[from],
30567 reg_class_names[to]);
30568 dbg_cost_ctrl--;
30569 }
30570
30571 return ret;
30572 }
30573
30574 /* A C expressions returning the cost of moving data of MODE from a register to
30575 or from memory. */
30576
30577 static int
30578 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
30579 bool in ATTRIBUTE_UNUSED)
30580 {
30581 int ret;
30582
30583 if (TARGET_DEBUG_COST)
30584 dbg_cost_ctrl++;
30585
30586 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
30587 ret = 4 * hard_regno_nregs[0][mode];
30588 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
30589 || reg_classes_intersect_p (rclass, VSX_REGS)))
30590 ret = 4 * hard_regno_nregs[32][mode];
30591 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
30592 ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
30593 else
30594 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
30595
30596 if (TARGET_DEBUG_COST)
30597 {
30598 if (dbg_cost_ctrl == 1)
30599 fprintf (stderr,
30600 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
30601 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
30602 dbg_cost_ctrl--;
30603 }
30604
30605 return ret;
30606 }
30607
30608 /* Returns a code for a target-specific builtin that implements
30609 reciprocal of the function, or NULL_TREE if not available. */
30610
30611 static tree
30612 rs6000_builtin_reciprocal (unsigned int fn, bool md_fn,
30613 bool sqrt ATTRIBUTE_UNUSED)
30614 {
30615 if (optimize_insn_for_size_p ())
30616 return NULL_TREE;
30617
30618 if (md_fn)
30619 switch (fn)
30620 {
30621 case VSX_BUILTIN_XVSQRTDP:
30622 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
30623 return NULL_TREE;
30624
30625 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
30626
30627 case VSX_BUILTIN_XVSQRTSP:
30628 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
30629 return NULL_TREE;
30630
30631 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
30632
30633 default:
30634 return NULL_TREE;
30635 }
30636
30637 else
30638 switch (fn)
30639 {
30640 case BUILT_IN_SQRT:
30641 if (!RS6000_RECIP_AUTO_RSQRTE_P (DFmode))
30642 return NULL_TREE;
30643
30644 return rs6000_builtin_decls[RS6000_BUILTIN_RSQRT];
30645
30646 case BUILT_IN_SQRTF:
30647 if (!RS6000_RECIP_AUTO_RSQRTE_P (SFmode))
30648 return NULL_TREE;
30649
30650 return rs6000_builtin_decls[RS6000_BUILTIN_RSQRTF];
30651
30652 default:
30653 return NULL_TREE;
30654 }
30655 }
30656
30657 /* Load up a constant. If the mode is a vector mode, splat the value across
30658 all of the vector elements. */
30659
30660 static rtx
30661 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
30662 {
30663 rtx reg;
30664
30665 if (mode == SFmode || mode == DFmode)
30666 {
30667 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, mode);
30668 reg = force_reg (mode, d);
30669 }
30670 else if (mode == V4SFmode)
30671 {
30672 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, SFmode);
30673 rtvec v = gen_rtvec (4, d, d, d, d);
30674 reg = gen_reg_rtx (mode);
30675 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
30676 }
30677 else if (mode == V2DFmode)
30678 {
30679 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, DFmode);
30680 rtvec v = gen_rtvec (2, d, d);
30681 reg = gen_reg_rtx (mode);
30682 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
30683 }
30684 else
30685 gcc_unreachable ();
30686
30687 return reg;
30688 }
30689
30690 /* Generate an FMA instruction. */
30691
30692 static void
30693 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
30694 {
30695 machine_mode mode = GET_MODE (target);
30696 rtx dst;
30697
30698 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
30699 gcc_assert (dst != NULL);
30700
30701 if (dst != target)
30702 emit_move_insn (target, dst);
30703 }
30704
30705 /* Generate a FMSUB instruction: dst = fma(m1, m2, -a). */
30706
30707 static void
30708 rs6000_emit_msub (rtx target, rtx m1, rtx m2, rtx a)
30709 {
30710 machine_mode mode = GET_MODE (target);
30711 rtx dst;
30712
30713 /* Altivec does not support fms directly;
30714 generate in terms of fma in that case. */
30715 if (optab_handler (fms_optab, mode) != CODE_FOR_nothing)
30716 dst = expand_ternary_op (mode, fms_optab, m1, m2, a, target, 0);
30717 else
30718 {
30719 a = expand_unop (mode, neg_optab, a, NULL_RTX, 0);
30720 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
30721 }
30722 gcc_assert (dst != NULL);
30723
30724 if (dst != target)
30725 emit_move_insn (target, dst);
30726 }
30727
30728 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
30729
30730 static void
30731 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
30732 {
30733 machine_mode mode = GET_MODE (dst);
30734 rtx r;
30735
30736 /* This is a tad more complicated, since the fnma_optab is for
30737 a different expression: fma(-m1, m2, a), which is the same
30738 thing except in the case of signed zeros.
30739
30740 Fortunately we know that if FMA is supported that FNMSUB is
30741 also supported in the ISA. Just expand it directly. */
30742
30743 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
30744
30745 r = gen_rtx_NEG (mode, a);
30746 r = gen_rtx_FMA (mode, m1, m2, r);
30747 r = gen_rtx_NEG (mode, r);
30748 emit_insn (gen_rtx_SET (VOIDmode, dst, r));
30749 }
30750
30751 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
30752 add a reg_note saying that this was a division. Support both scalar and
30753 vector divide. Assumes no trapping math and finite arguments. */
30754
30755 void
30756 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
30757 {
30758 machine_mode mode = GET_MODE (dst);
30759 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
30760 int i;
30761
30762 /* Low precision estimates guarantee 5 bits of accuracy. High
30763 precision estimates guarantee 14 bits of accuracy. SFmode
30764 requires 23 bits of accuracy. DFmode requires 52 bits of
30765 accuracy. Each pass at least doubles the accuracy, leading
30766 to the following. */
30767 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
30768 if (mode == DFmode || mode == V2DFmode)
30769 passes++;
30770
30771 enum insn_code code = optab_handler (smul_optab, mode);
30772 insn_gen_fn gen_mul = GEN_FCN (code);
30773
30774 gcc_assert (code != CODE_FOR_nothing);
30775
30776 one = rs6000_load_constant_and_splat (mode, dconst1);
30777
30778 /* x0 = 1./d estimate */
30779 x0 = gen_reg_rtx (mode);
30780 emit_insn (gen_rtx_SET (VOIDmode, x0,
30781 gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
30782 UNSPEC_FRES)));
30783
30784 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
30785 if (passes > 1) {
30786
30787 /* e0 = 1. - d * x0 */
30788 e0 = gen_reg_rtx (mode);
30789 rs6000_emit_nmsub (e0, d, x0, one);
30790
30791 /* x1 = x0 + e0 * x0 */
30792 x1 = gen_reg_rtx (mode);
30793 rs6000_emit_madd (x1, e0, x0, x0);
30794
30795 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
30796 ++i, xprev = xnext, eprev = enext) {
30797
30798 /* enext = eprev * eprev */
30799 enext = gen_reg_rtx (mode);
30800 emit_insn (gen_mul (enext, eprev, eprev));
30801
30802 /* xnext = xprev + enext * xprev */
30803 xnext = gen_reg_rtx (mode);
30804 rs6000_emit_madd (xnext, enext, xprev, xprev);
30805 }
30806
30807 } else
30808 xprev = x0;
30809
30810 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
30811
30812 /* u = n * xprev */
30813 u = gen_reg_rtx (mode);
30814 emit_insn (gen_mul (u, n, xprev));
30815
30816 /* v = n - (d * u) */
30817 v = gen_reg_rtx (mode);
30818 rs6000_emit_nmsub (v, d, u, n);
30819
30820 /* dst = (v * xprev) + u */
30821 rs6000_emit_madd (dst, v, xprev, u);
30822
30823 if (note_p)
30824 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
30825 }
30826
30827 /* Newton-Raphson approximation of single/double-precision floating point
30828 rsqrt. Assumes no trapping math and finite arguments. */
30829
30830 void
30831 rs6000_emit_swrsqrt (rtx dst, rtx src)
30832 {
30833 machine_mode mode = GET_MODE (src);
30834 rtx x0 = gen_reg_rtx (mode);
30835 rtx y = gen_reg_rtx (mode);
30836
30837 /* Low precision estimates guarantee 5 bits of accuracy. High
30838 precision estimates guarantee 14 bits of accuracy. SFmode
30839 requires 23 bits of accuracy. DFmode requires 52 bits of
30840 accuracy. Each pass at least doubles the accuracy, leading
30841 to the following. */
30842 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
30843 if (mode == DFmode || mode == V2DFmode)
30844 passes++;
30845
30846 REAL_VALUE_TYPE dconst3_2;
30847 int i;
30848 rtx halfthree;
30849 enum insn_code code = optab_handler (smul_optab, mode);
30850 insn_gen_fn gen_mul = GEN_FCN (code);
30851
30852 gcc_assert (code != CODE_FOR_nothing);
30853
30854 /* Load up the constant 1.5 either as a scalar, or as a vector. */
30855 real_from_integer (&dconst3_2, VOIDmode, 3, SIGNED);
30856 SET_REAL_EXP (&dconst3_2, REAL_EXP (&dconst3_2) - 1);
30857
30858 halfthree = rs6000_load_constant_and_splat (mode, dconst3_2);
30859
30860 /* x0 = rsqrt estimate */
30861 emit_insn (gen_rtx_SET (VOIDmode, x0,
30862 gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
30863 UNSPEC_RSQRT)));
30864
30865 /* y = 0.5 * src = 1.5 * src - src -> fewer constants */
30866 rs6000_emit_msub (y, src, halfthree, src);
30867
30868 for (i = 0; i < passes; i++)
30869 {
30870 rtx x1 = gen_reg_rtx (mode);
30871 rtx u = gen_reg_rtx (mode);
30872 rtx v = gen_reg_rtx (mode);
30873
30874 /* x1 = x0 * (1.5 - y * (x0 * x0)) */
30875 emit_insn (gen_mul (u, x0, x0));
30876 rs6000_emit_nmsub (v, y, u, halfthree);
30877 emit_insn (gen_mul (x1, x0, v));
30878 x0 = x1;
30879 }
30880
30881 emit_move_insn (dst, x0);
30882 return;
30883 }
30884
30885 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
30886 (Power7) targets. DST is the target, and SRC is the argument operand. */
30887
30888 void
30889 rs6000_emit_popcount (rtx dst, rtx src)
30890 {
30891 machine_mode mode = GET_MODE (dst);
30892 rtx tmp1, tmp2;
30893
30894 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
30895 if (TARGET_POPCNTD)
30896 {
30897 if (mode == SImode)
30898 emit_insn (gen_popcntdsi2 (dst, src));
30899 else
30900 emit_insn (gen_popcntddi2 (dst, src));
30901 return;
30902 }
30903
30904 tmp1 = gen_reg_rtx (mode);
30905
30906 if (mode == SImode)
30907 {
30908 emit_insn (gen_popcntbsi2 (tmp1, src));
30909 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
30910 NULL_RTX, 0);
30911 tmp2 = force_reg (SImode, tmp2);
30912 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
30913 }
30914 else
30915 {
30916 emit_insn (gen_popcntbdi2 (tmp1, src));
30917 tmp2 = expand_mult (DImode, tmp1,
30918 GEN_INT ((HOST_WIDE_INT)
30919 0x01010101 << 32 | 0x01010101),
30920 NULL_RTX, 0);
30921 tmp2 = force_reg (DImode, tmp2);
30922 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
30923 }
30924 }
30925
30926
30927 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
30928 target, and SRC is the argument operand. */
30929
30930 void
30931 rs6000_emit_parity (rtx dst, rtx src)
30932 {
30933 machine_mode mode = GET_MODE (dst);
30934 rtx tmp;
30935
30936 tmp = gen_reg_rtx (mode);
30937
30938 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
30939 if (TARGET_CMPB)
30940 {
30941 if (mode == SImode)
30942 {
30943 emit_insn (gen_popcntbsi2 (tmp, src));
30944 emit_insn (gen_paritysi2_cmpb (dst, tmp));
30945 }
30946 else
30947 {
30948 emit_insn (gen_popcntbdi2 (tmp, src));
30949 emit_insn (gen_paritydi2_cmpb (dst, tmp));
30950 }
30951 return;
30952 }
30953
30954 if (mode == SImode)
30955 {
30956 /* Is mult+shift >= shift+xor+shift+xor? */
30957 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
30958 {
30959 rtx tmp1, tmp2, tmp3, tmp4;
30960
30961 tmp1 = gen_reg_rtx (SImode);
30962 emit_insn (gen_popcntbsi2 (tmp1, src));
30963
30964 tmp2 = gen_reg_rtx (SImode);
30965 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
30966 tmp3 = gen_reg_rtx (SImode);
30967 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
30968
30969 tmp4 = gen_reg_rtx (SImode);
30970 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
30971 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
30972 }
30973 else
30974 rs6000_emit_popcount (tmp, src);
30975 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
30976 }
30977 else
30978 {
30979 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
30980 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
30981 {
30982 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
30983
30984 tmp1 = gen_reg_rtx (DImode);
30985 emit_insn (gen_popcntbdi2 (tmp1, src));
30986
30987 tmp2 = gen_reg_rtx (DImode);
30988 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
30989 tmp3 = gen_reg_rtx (DImode);
30990 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
30991
30992 tmp4 = gen_reg_rtx (DImode);
30993 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
30994 tmp5 = gen_reg_rtx (DImode);
30995 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
30996
30997 tmp6 = gen_reg_rtx (DImode);
30998 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
30999 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
31000 }
31001 else
31002 rs6000_emit_popcount (tmp, src);
31003 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
31004 }
31005 }
31006
31007 /* Expand an Altivec constant permutation for little endian mode.
31008 There are two issues: First, the two input operands must be
31009 swapped so that together they form a double-wide array in LE
31010 order. Second, the vperm instruction has surprising behavior
31011 in LE mode: it interprets the elements of the source vectors
31012 in BE mode ("left to right") and interprets the elements of
31013 the destination vector in LE mode ("right to left"). To
31014 correct for this, we must subtract each element of the permute
31015 control vector from 31.
31016
31017 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
31018 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
31019 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
31020 serve as the permute control vector. Then, in BE mode,
31021
31022 vperm 9,10,11,12
31023
31024 places the desired result in vr9. However, in LE mode the
31025 vector contents will be
31026
31027 vr10 = 00000003 00000002 00000001 00000000
31028 vr11 = 00000007 00000006 00000005 00000004
31029
31030 The result of the vperm using the same permute control vector is
31031
31032 vr9 = 05000000 07000000 01000000 03000000
31033
31034 That is, the leftmost 4 bytes of vr10 are interpreted as the
31035 source for the rightmost 4 bytes of vr9, and so on.
31036
31037 If we change the permute control vector to
31038
31039 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
31040
31041 and issue
31042
31043 vperm 9,11,10,12
31044
31045 we get the desired
31046
31047 vr9 = 00000006 00000004 00000002 00000000. */
31048
31049 void
31050 altivec_expand_vec_perm_const_le (rtx operands[4])
31051 {
31052 unsigned int i;
31053 rtx perm[16];
31054 rtx constv, unspec;
31055 rtx target = operands[0];
31056 rtx op0 = operands[1];
31057 rtx op1 = operands[2];
31058 rtx sel = operands[3];
31059
31060 /* Unpack and adjust the constant selector. */
31061 for (i = 0; i < 16; ++i)
31062 {
31063 rtx e = XVECEXP (sel, 0, i);
31064 unsigned int elt = 31 - (INTVAL (e) & 31);
31065 perm[i] = GEN_INT (elt);
31066 }
31067
31068 /* Expand to a permute, swapping the inputs and using the
31069 adjusted selector. */
31070 if (!REG_P (op0))
31071 op0 = force_reg (V16QImode, op0);
31072 if (!REG_P (op1))
31073 op1 = force_reg (V16QImode, op1);
31074
31075 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
31076 constv = force_reg (V16QImode, constv);
31077 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
31078 UNSPEC_VPERM);
31079 if (!REG_P (target))
31080 {
31081 rtx tmp = gen_reg_rtx (V16QImode);
31082 emit_move_insn (tmp, unspec);
31083 unspec = tmp;
31084 }
31085
31086 emit_move_insn (target, unspec);
31087 }
31088
31089 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
31090 permute control vector. But here it's not a constant, so we must
31091 generate a vector NAND or NOR to do the adjustment. */
31092
31093 void
31094 altivec_expand_vec_perm_le (rtx operands[4])
31095 {
31096 rtx notx, iorx, unspec;
31097 rtx target = operands[0];
31098 rtx op0 = operands[1];
31099 rtx op1 = operands[2];
31100 rtx sel = operands[3];
31101 rtx tmp = target;
31102 rtx norreg = gen_reg_rtx (V16QImode);
31103 machine_mode mode = GET_MODE (target);
31104
31105 /* Get everything in regs so the pattern matches. */
31106 if (!REG_P (op0))
31107 op0 = force_reg (mode, op0);
31108 if (!REG_P (op1))
31109 op1 = force_reg (mode, op1);
31110 if (!REG_P (sel))
31111 sel = force_reg (V16QImode, sel);
31112 if (!REG_P (target))
31113 tmp = gen_reg_rtx (mode);
31114
31115 /* Invert the selector with a VNAND if available, else a VNOR.
31116 The VNAND is preferred for future fusion opportunities. */
31117 notx = gen_rtx_NOT (V16QImode, sel);
31118 iorx = (TARGET_P8_VECTOR
31119 ? gen_rtx_IOR (V16QImode, notx, notx)
31120 : gen_rtx_AND (V16QImode, notx, notx));
31121 emit_insn (gen_rtx_SET (VOIDmode, norreg, iorx));
31122
31123 /* Permute with operands reversed and adjusted selector. */
31124 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
31125 UNSPEC_VPERM);
31126
31127 /* Copy into target, possibly by way of a register. */
31128 if (!REG_P (target))
31129 {
31130 emit_move_insn (tmp, unspec);
31131 unspec = tmp;
31132 }
31133
31134 emit_move_insn (target, unspec);
31135 }
31136
31137 /* Expand an Altivec constant permutation. Return true if we match
31138 an efficient implementation; false to fall back to VPERM. */
31139
31140 bool
31141 altivec_expand_vec_perm_const (rtx operands[4])
31142 {
31143 struct altivec_perm_insn {
31144 HOST_WIDE_INT mask;
31145 enum insn_code impl;
31146 unsigned char perm[16];
31147 };
31148 static const struct altivec_perm_insn patterns[] = {
31149 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
31150 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
31151 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
31152 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
31153 { OPTION_MASK_ALTIVEC,
31154 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
31155 : CODE_FOR_altivec_vmrglb_direct),
31156 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
31157 { OPTION_MASK_ALTIVEC,
31158 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
31159 : CODE_FOR_altivec_vmrglh_direct),
31160 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
31161 { OPTION_MASK_ALTIVEC,
31162 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
31163 : CODE_FOR_altivec_vmrglw_direct),
31164 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
31165 { OPTION_MASK_ALTIVEC,
31166 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
31167 : CODE_FOR_altivec_vmrghb_direct),
31168 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
31169 { OPTION_MASK_ALTIVEC,
31170 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
31171 : CODE_FOR_altivec_vmrghh_direct),
31172 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
31173 { OPTION_MASK_ALTIVEC,
31174 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
31175 : CODE_FOR_altivec_vmrghw_direct),
31176 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
31177 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
31178 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
31179 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
31180 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
31181 };
31182
31183 unsigned int i, j, elt, which;
31184 unsigned char perm[16];
31185 rtx target, op0, op1, sel, x;
31186 bool one_vec;
31187
31188 target = operands[0];
31189 op0 = operands[1];
31190 op1 = operands[2];
31191 sel = operands[3];
31192
31193 /* Unpack the constant selector. */
31194 for (i = which = 0; i < 16; ++i)
31195 {
31196 rtx e = XVECEXP (sel, 0, i);
31197 elt = INTVAL (e) & 31;
31198 which |= (elt < 16 ? 1 : 2);
31199 perm[i] = elt;
31200 }
31201
31202 /* Simplify the constant selector based on operands. */
31203 switch (which)
31204 {
31205 default:
31206 gcc_unreachable ();
31207
31208 case 3:
31209 one_vec = false;
31210 if (!rtx_equal_p (op0, op1))
31211 break;
31212 /* FALLTHRU */
31213
31214 case 2:
31215 for (i = 0; i < 16; ++i)
31216 perm[i] &= 15;
31217 op0 = op1;
31218 one_vec = true;
31219 break;
31220
31221 case 1:
31222 op1 = op0;
31223 one_vec = true;
31224 break;
31225 }
31226
31227 /* Look for splat patterns. */
31228 if (one_vec)
31229 {
31230 elt = perm[0];
31231
31232 for (i = 0; i < 16; ++i)
31233 if (perm[i] != elt)
31234 break;
31235 if (i == 16)
31236 {
31237 if (!BYTES_BIG_ENDIAN)
31238 elt = 15 - elt;
31239 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
31240 return true;
31241 }
31242
31243 if (elt % 2 == 0)
31244 {
31245 for (i = 0; i < 16; i += 2)
31246 if (perm[i] != elt || perm[i + 1] != elt + 1)
31247 break;
31248 if (i == 16)
31249 {
31250 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
31251 x = gen_reg_rtx (V8HImode);
31252 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
31253 GEN_INT (field)));
31254 emit_move_insn (target, gen_lowpart (V16QImode, x));
31255 return true;
31256 }
31257 }
31258
31259 if (elt % 4 == 0)
31260 {
31261 for (i = 0; i < 16; i += 4)
31262 if (perm[i] != elt
31263 || perm[i + 1] != elt + 1
31264 || perm[i + 2] != elt + 2
31265 || perm[i + 3] != elt + 3)
31266 break;
31267 if (i == 16)
31268 {
31269 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
31270 x = gen_reg_rtx (V4SImode);
31271 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
31272 GEN_INT (field)));
31273 emit_move_insn (target, gen_lowpart (V16QImode, x));
31274 return true;
31275 }
31276 }
31277 }
31278
31279 /* Look for merge and pack patterns. */
31280 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
31281 {
31282 bool swapped;
31283
31284 if ((patterns[j].mask & rs6000_isa_flags) == 0)
31285 continue;
31286
31287 elt = patterns[j].perm[0];
31288 if (perm[0] == elt)
31289 swapped = false;
31290 else if (perm[0] == elt + 16)
31291 swapped = true;
31292 else
31293 continue;
31294 for (i = 1; i < 16; ++i)
31295 {
31296 elt = patterns[j].perm[i];
31297 if (swapped)
31298 elt = (elt >= 16 ? elt - 16 : elt + 16);
31299 else if (one_vec && elt >= 16)
31300 elt -= 16;
31301 if (perm[i] != elt)
31302 break;
31303 }
31304 if (i == 16)
31305 {
31306 enum insn_code icode = patterns[j].impl;
31307 machine_mode omode = insn_data[icode].operand[0].mode;
31308 machine_mode imode = insn_data[icode].operand[1].mode;
31309
31310 /* For little-endian, don't use vpkuwum and vpkuhum if the
31311 underlying vector type is not V4SI and V8HI, respectively.
31312 For example, using vpkuwum with a V8HI picks up the even
31313 halfwords (BE numbering) when the even halfwords (LE
31314 numbering) are what we need. */
31315 if (!BYTES_BIG_ENDIAN
31316 && icode == CODE_FOR_altivec_vpkuwum_direct
31317 && ((GET_CODE (op0) == REG
31318 && GET_MODE (op0) != V4SImode)
31319 || (GET_CODE (op0) == SUBREG
31320 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
31321 continue;
31322 if (!BYTES_BIG_ENDIAN
31323 && icode == CODE_FOR_altivec_vpkuhum_direct
31324 && ((GET_CODE (op0) == REG
31325 && GET_MODE (op0) != V8HImode)
31326 || (GET_CODE (op0) == SUBREG
31327 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
31328 continue;
31329
31330 /* For little-endian, the two input operands must be swapped
31331 (or swapped back) to ensure proper right-to-left numbering
31332 from 0 to 2N-1. */
31333 if (swapped ^ !BYTES_BIG_ENDIAN)
31334 std::swap (op0, op1);
31335 if (imode != V16QImode)
31336 {
31337 op0 = gen_lowpart (imode, op0);
31338 op1 = gen_lowpart (imode, op1);
31339 }
31340 if (omode == V16QImode)
31341 x = target;
31342 else
31343 x = gen_reg_rtx (omode);
31344 emit_insn (GEN_FCN (icode) (x, op0, op1));
31345 if (omode != V16QImode)
31346 emit_move_insn (target, gen_lowpart (V16QImode, x));
31347 return true;
31348 }
31349 }
31350
31351 if (!BYTES_BIG_ENDIAN)
31352 {
31353 altivec_expand_vec_perm_const_le (operands);
31354 return true;
31355 }
31356
31357 return false;
31358 }
31359
31360 /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation.
31361 Return true if we match an efficient implementation. */
31362
31363 static bool
31364 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
31365 unsigned char perm0, unsigned char perm1)
31366 {
31367 rtx x;
31368
31369 /* If both selectors come from the same operand, fold to single op. */
31370 if ((perm0 & 2) == (perm1 & 2))
31371 {
31372 if (perm0 & 2)
31373 op0 = op1;
31374 else
31375 op1 = op0;
31376 }
31377 /* If both operands are equal, fold to simpler permutation. */
31378 if (rtx_equal_p (op0, op1))
31379 {
31380 perm0 = perm0 & 1;
31381 perm1 = (perm1 & 1) + 2;
31382 }
31383 /* If the first selector comes from the second operand, swap. */
31384 else if (perm0 & 2)
31385 {
31386 if (perm1 & 2)
31387 return false;
31388 perm0 -= 2;
31389 perm1 += 2;
31390 std::swap (op0, op1);
31391 }
31392 /* If the second selector does not come from the second operand, fail. */
31393 else if ((perm1 & 2) == 0)
31394 return false;
31395
31396 /* Success! */
31397 if (target != NULL)
31398 {
31399 machine_mode vmode, dmode;
31400 rtvec v;
31401
31402 vmode = GET_MODE (target);
31403 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
31404 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4);
31405 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
31406 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
31407 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
31408 emit_insn (gen_rtx_SET (VOIDmode, target, x));
31409 }
31410 return true;
31411 }
31412
31413 bool
31414 rs6000_expand_vec_perm_const (rtx operands[4])
31415 {
31416 rtx target, op0, op1, sel;
31417 unsigned char perm0, perm1;
31418
31419 target = operands[0];
31420 op0 = operands[1];
31421 op1 = operands[2];
31422 sel = operands[3];
31423
31424 /* Unpack the constant selector. */
31425 perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3;
31426 perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3;
31427
31428 return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1);
31429 }
31430
31431 /* Test whether a constant permutation is supported. */
31432
31433 static bool
31434 rs6000_vectorize_vec_perm_const_ok (machine_mode vmode,
31435 const unsigned char *sel)
31436 {
31437 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
31438 if (TARGET_ALTIVEC)
31439 return true;
31440
31441 /* Check for ps_merge* or evmerge* insns. */
31442 if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode)
31443 || (TARGET_SPE && vmode == V2SImode))
31444 {
31445 rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
31446 rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
31447 return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]);
31448 }
31449
31450 return false;
31451 }
31452
31453 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
31454
31455 static void
31456 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
31457 machine_mode vmode, unsigned nelt, rtx perm[])
31458 {
31459 machine_mode imode;
31460 rtx x;
31461
31462 imode = vmode;
31463 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT)
31464 {
31465 imode = GET_MODE_INNER (vmode);
31466 imode = mode_for_size (GET_MODE_BITSIZE (imode), MODE_INT, 0);
31467 imode = mode_for_vector (imode, nelt);
31468 }
31469
31470 x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm));
31471 x = expand_vec_perm (vmode, op0, op1, x, target);
31472 if (x != target)
31473 emit_move_insn (target, x);
31474 }
31475
31476 /* Expand an extract even operation. */
31477
31478 void
31479 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
31480 {
31481 machine_mode vmode = GET_MODE (target);
31482 unsigned i, nelt = GET_MODE_NUNITS (vmode);
31483 rtx perm[16];
31484
31485 for (i = 0; i < nelt; i++)
31486 perm[i] = GEN_INT (i * 2);
31487
31488 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
31489 }
31490
31491 /* Expand a vector interleave operation. */
31492
31493 void
31494 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
31495 {
31496 machine_mode vmode = GET_MODE (target);
31497 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
31498 rtx perm[16];
31499
31500 high = (highp ? 0 : nelt / 2);
31501 for (i = 0; i < nelt / 2; i++)
31502 {
31503 perm[i * 2] = GEN_INT (i + high);
31504 perm[i * 2 + 1] = GEN_INT (i + nelt + high);
31505 }
31506
31507 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
31508 }
31509
31510 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
31511 void
31512 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
31513 {
31514 HOST_WIDE_INT hwi_scale (scale);
31515 REAL_VALUE_TYPE r_pow;
31516 rtvec v = rtvec_alloc (2);
31517 rtx elt;
31518 rtx scale_vec = gen_reg_rtx (V2DFmode);
31519 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
31520 elt = CONST_DOUBLE_FROM_REAL_VALUE (r_pow, DFmode);
31521 RTVEC_ELT (v, 0) = elt;
31522 RTVEC_ELT (v, 1) = elt;
31523 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
31524 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
31525 }
31526
31527 /* Return an RTX representing where to find the function value of a
31528 function returning MODE. */
31529 static rtx
31530 rs6000_complex_function_value (machine_mode mode)
31531 {
31532 unsigned int regno;
31533 rtx r1, r2;
31534 machine_mode inner = GET_MODE_INNER (mode);
31535 unsigned int inner_bytes = GET_MODE_SIZE (inner);
31536
31537 if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
31538 regno = FP_ARG_RETURN;
31539 else
31540 {
31541 regno = GP_ARG_RETURN;
31542
31543 /* 32-bit is OK since it'll go in r3/r4. */
31544 if (TARGET_32BIT && inner_bytes >= 4)
31545 return gen_rtx_REG (mode, regno);
31546 }
31547
31548 if (inner_bytes >= 8)
31549 return gen_rtx_REG (mode, regno);
31550
31551 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
31552 const0_rtx);
31553 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
31554 GEN_INT (inner_bytes));
31555 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
31556 }
31557
31558 /* Target hook for TARGET_FUNCTION_VALUE.
31559
31560 On the SPE, both FPs and vectors are returned in r3.
31561
31562 On RS/6000 an integer value is in r3 and a floating-point value is in
31563 fp1, unless -msoft-float. */
31564
31565 static rtx
31566 rs6000_function_value (const_tree valtype,
31567 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
31568 bool outgoing ATTRIBUTE_UNUSED)
31569 {
31570 machine_mode mode;
31571 unsigned int regno;
31572 machine_mode elt_mode;
31573 int n_elts;
31574
31575 /* Special handling for structs in darwin64. */
31576 if (TARGET_MACHO
31577 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
31578 {
31579 CUMULATIVE_ARGS valcum;
31580 rtx valret;
31581
31582 valcum.words = 0;
31583 valcum.fregno = FP_ARG_MIN_REG;
31584 valcum.vregno = ALTIVEC_ARG_MIN_REG;
31585 /* Do a trial code generation as if this were going to be passed as
31586 an argument; if any part goes in memory, we return NULL. */
31587 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
31588 if (valret)
31589 return valret;
31590 /* Otherwise fall through to standard ABI rules. */
31591 }
31592
31593 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
31594 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (valtype), valtype,
31595 &elt_mode, &n_elts))
31596 {
31597 int first_reg, n_regs, i;
31598 rtx par;
31599
31600 if (SCALAR_FLOAT_MODE_P (elt_mode))
31601 {
31602 /* _Decimal128 must use even/odd register pairs. */
31603 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
31604 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
31605 }
31606 else
31607 {
31608 first_reg = ALTIVEC_ARG_RETURN;
31609 n_regs = 1;
31610 }
31611
31612 par = gen_rtx_PARALLEL (TYPE_MODE (valtype), rtvec_alloc (n_elts));
31613 for (i = 0; i < n_elts; i++)
31614 {
31615 rtx r = gen_rtx_REG (elt_mode, first_reg + i * n_regs);
31616 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
31617 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
31618 }
31619
31620 return par;
31621 }
31622
31623 if (TARGET_32BIT && TARGET_POWERPC64 && TYPE_MODE (valtype) == DImode)
31624 {
31625 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
31626 return gen_rtx_PARALLEL (DImode,
31627 gen_rtvec (2,
31628 gen_rtx_EXPR_LIST (VOIDmode,
31629 gen_rtx_REG (SImode, GP_ARG_RETURN),
31630 const0_rtx),
31631 gen_rtx_EXPR_LIST (VOIDmode,
31632 gen_rtx_REG (SImode,
31633 GP_ARG_RETURN + 1),
31634 GEN_INT (4))));
31635 }
31636 if (TARGET_32BIT && TARGET_POWERPC64 && TYPE_MODE (valtype) == DCmode)
31637 {
31638 return gen_rtx_PARALLEL (DCmode,
31639 gen_rtvec (4,
31640 gen_rtx_EXPR_LIST (VOIDmode,
31641 gen_rtx_REG (SImode, GP_ARG_RETURN),
31642 const0_rtx),
31643 gen_rtx_EXPR_LIST (VOIDmode,
31644 gen_rtx_REG (SImode,
31645 GP_ARG_RETURN + 1),
31646 GEN_INT (4)),
31647 gen_rtx_EXPR_LIST (VOIDmode,
31648 gen_rtx_REG (SImode,
31649 GP_ARG_RETURN + 2),
31650 GEN_INT (8)),
31651 gen_rtx_EXPR_LIST (VOIDmode,
31652 gen_rtx_REG (SImode,
31653 GP_ARG_RETURN + 3),
31654 GEN_INT (12))));
31655 }
31656
31657 mode = TYPE_MODE (valtype);
31658 if ((INTEGRAL_TYPE_P (valtype) && GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
31659 || POINTER_TYPE_P (valtype))
31660 mode = TARGET_32BIT ? SImode : DImode;
31661
31662 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
31663 /* _Decimal128 must use an even/odd register pair. */
31664 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
31665 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT && TARGET_FPRS
31666 && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
31667 regno = FP_ARG_RETURN;
31668 else if (TREE_CODE (valtype) == COMPLEX_TYPE
31669 && targetm.calls.split_complex_arg)
31670 return rs6000_complex_function_value (mode);
31671 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
31672 return register is used in both cases, and we won't see V2DImode/V2DFmode
31673 for pure altivec, combine the two cases. */
31674 else if (TREE_CODE (valtype) == VECTOR_TYPE
31675 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
31676 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
31677 regno = ALTIVEC_ARG_RETURN;
31678 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
31679 && (mode == DFmode || mode == DCmode
31680 || mode == TFmode || mode == TCmode))
31681 return spe_build_register_parallel (mode, GP_ARG_RETURN);
31682 else
31683 regno = GP_ARG_RETURN;
31684
31685 return gen_rtx_REG (mode, regno);
31686 }
31687
31688 /* Define how to find the value returned by a library function
31689 assuming the value has mode MODE. */
31690 rtx
31691 rs6000_libcall_value (machine_mode mode)
31692 {
31693 unsigned int regno;
31694
31695 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
31696 {
31697 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
31698 return gen_rtx_PARALLEL (DImode,
31699 gen_rtvec (2,
31700 gen_rtx_EXPR_LIST (VOIDmode,
31701 gen_rtx_REG (SImode, GP_ARG_RETURN),
31702 const0_rtx),
31703 gen_rtx_EXPR_LIST (VOIDmode,
31704 gen_rtx_REG (SImode,
31705 GP_ARG_RETURN + 1),
31706 GEN_INT (4))));
31707 }
31708
31709 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
31710 /* _Decimal128 must use an even/odd register pair. */
31711 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
31712 else if (SCALAR_FLOAT_MODE_P (mode)
31713 && TARGET_HARD_FLOAT && TARGET_FPRS
31714 && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
31715 regno = FP_ARG_RETURN;
31716 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
31717 return register is used in both cases, and we won't see V2DImode/V2DFmode
31718 for pure altivec, combine the two cases. */
31719 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
31720 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
31721 regno = ALTIVEC_ARG_RETURN;
31722 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
31723 return rs6000_complex_function_value (mode);
31724 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
31725 && (mode == DFmode || mode == DCmode
31726 || mode == TFmode || mode == TCmode))
31727 return spe_build_register_parallel (mode, GP_ARG_RETURN);
31728 else
31729 regno = GP_ARG_RETURN;
31730
31731 return gen_rtx_REG (mode, regno);
31732 }
31733
31734
31735 /* Return true if we use LRA instead of reload pass. */
31736 static bool
31737 rs6000_lra_p (void)
31738 {
31739 return rs6000_lra_flag;
31740 }
31741
31742 /* Given FROM and TO register numbers, say whether this elimination is allowed.
31743 Frame pointer elimination is automatically handled.
31744
31745 For the RS/6000, if frame pointer elimination is being done, we would like
31746 to convert ap into fp, not sp.
31747
31748 We need r30 if -mminimal-toc was specified, and there are constant pool
31749 references. */
31750
31751 static bool
31752 rs6000_can_eliminate (const int from, const int to)
31753 {
31754 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
31755 ? ! frame_pointer_needed
31756 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
31757 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC || get_pool_size () == 0
31758 : true);
31759 }
31760
31761 /* Define the offset between two registers, FROM to be eliminated and its
31762 replacement TO, at the start of a routine. */
31763 HOST_WIDE_INT
31764 rs6000_initial_elimination_offset (int from, int to)
31765 {
31766 rs6000_stack_t *info = rs6000_stack_info ();
31767 HOST_WIDE_INT offset;
31768
31769 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
31770 offset = info->push_p ? 0 : -info->total_size;
31771 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
31772 {
31773 offset = info->push_p ? 0 : -info->total_size;
31774 if (FRAME_GROWS_DOWNWARD)
31775 offset += info->fixed_size + info->vars_size + info->parm_size;
31776 }
31777 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
31778 offset = FRAME_GROWS_DOWNWARD
31779 ? info->fixed_size + info->vars_size + info->parm_size
31780 : 0;
31781 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
31782 offset = info->total_size;
31783 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
31784 offset = info->push_p ? info->total_size : 0;
31785 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
31786 offset = 0;
31787 else
31788 gcc_unreachable ();
31789
31790 return offset;
31791 }
31792
31793 static rtx
31794 rs6000_dwarf_register_span (rtx reg)
31795 {
31796 rtx parts[8];
31797 int i, words;
31798 unsigned regno = REGNO (reg);
31799 machine_mode mode = GET_MODE (reg);
31800
31801 if (TARGET_SPE
31802 && regno < 32
31803 && (SPE_VECTOR_MODE (GET_MODE (reg))
31804 || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode)
31805 && mode != SFmode && mode != SDmode && mode != SCmode)))
31806 ;
31807 else
31808 return NULL_RTX;
31809
31810 regno = REGNO (reg);
31811
31812 /* The duality of the SPE register size wreaks all kinds of havoc.
31813 This is a way of distinguishing r0 in 32-bits from r0 in
31814 64-bits. */
31815 words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
31816 gcc_assert (words <= 4);
31817 for (i = 0; i < words; i++, regno++)
31818 {
31819 if (BYTES_BIG_ENDIAN)
31820 {
31821 parts[2 * i] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
31822 parts[2 * i + 1] = gen_rtx_REG (SImode, regno);
31823 }
31824 else
31825 {
31826 parts[2 * i] = gen_rtx_REG (SImode, regno);
31827 parts[2 * i + 1] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
31828 }
31829 }
31830
31831 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts));
31832 }
31833
31834 /* Fill in sizes for SPE register high parts in table used by unwinder. */
31835
31836 static void
31837 rs6000_init_dwarf_reg_sizes_extra (tree address)
31838 {
31839 if (TARGET_SPE)
31840 {
31841 int i;
31842 machine_mode mode = TYPE_MODE (char_type_node);
31843 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
31844 rtx mem = gen_rtx_MEM (BLKmode, addr);
31845 rtx value = gen_int_mode (4, mode);
31846
31847 for (i = FIRST_SPE_HIGH_REGNO; i < LAST_SPE_HIGH_REGNO+1; i++)
31848 {
31849 int column = DWARF_REG_TO_UNWIND_COLUMN
31850 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
31851 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
31852
31853 emit_move_insn (adjust_address (mem, mode, offset), value);
31854 }
31855 }
31856
31857 if (TARGET_MACHO && ! TARGET_ALTIVEC)
31858 {
31859 int i;
31860 machine_mode mode = TYPE_MODE (char_type_node);
31861 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
31862 rtx mem = gen_rtx_MEM (BLKmode, addr);
31863 rtx value = gen_int_mode (16, mode);
31864
31865 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
31866 The unwinder still needs to know the size of Altivec registers. */
31867
31868 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
31869 {
31870 int column = DWARF_REG_TO_UNWIND_COLUMN
31871 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
31872 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
31873
31874 emit_move_insn (adjust_address (mem, mode, offset), value);
31875 }
31876 }
31877 }
31878
31879 /* Map internal gcc register numbers to debug format register numbers.
31880 FORMAT specifies the type of debug register number to use:
31881 0 -- debug information, except for frame-related sections
31882 1 -- DWARF .debug_frame section
31883 2 -- DWARF .eh_frame section */
31884
31885 unsigned int
31886 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
31887 {
31888 /* We never use the GCC internal number for SPE high registers.
31889 Those are mapped to the 1200..1231 range for all debug formats. */
31890 if (SPE_HIGH_REGNO_P (regno))
31891 return regno - FIRST_SPE_HIGH_REGNO + 1200;
31892
31893 /* Except for the above, we use the internal number for non-DWARF
31894 debug information, and also for .eh_frame. */
31895 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
31896 return regno;
31897
31898 /* On some platforms, we use the standard DWARF register
31899 numbering for .debug_info and .debug_frame. */
31900 #ifdef RS6000_USE_DWARF_NUMBERING
31901 if (regno <= 63)
31902 return regno;
31903 if (regno == LR_REGNO)
31904 return 108;
31905 if (regno == CTR_REGNO)
31906 return 109;
31907 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
31908 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
31909 The actual code emitted saves the whole of CR, so we map CR2_REGNO
31910 to the DWARF reg for CR. */
31911 if (format == 1 && regno == CR2_REGNO)
31912 return 64;
31913 if (CR_REGNO_P (regno))
31914 return regno - CR0_REGNO + 86;
31915 if (regno == CA_REGNO)
31916 return 101; /* XER */
31917 if (ALTIVEC_REGNO_P (regno))
31918 return regno - FIRST_ALTIVEC_REGNO + 1124;
31919 if (regno == VRSAVE_REGNO)
31920 return 356;
31921 if (regno == VSCR_REGNO)
31922 return 67;
31923 if (regno == SPE_ACC_REGNO)
31924 return 99;
31925 if (regno == SPEFSCR_REGNO)
31926 return 612;
31927 #endif
31928 return regno;
31929 }
31930
31931 /* target hook eh_return_filter_mode */
31932 static machine_mode
31933 rs6000_eh_return_filter_mode (void)
31934 {
31935 return TARGET_32BIT ? SImode : word_mode;
31936 }
31937
31938 /* Target hook for scalar_mode_supported_p. */
31939 static bool
31940 rs6000_scalar_mode_supported_p (machine_mode mode)
31941 {
31942 if (DECIMAL_FLOAT_MODE_P (mode))
31943 return default_decimal_float_supported_p ();
31944 else
31945 return default_scalar_mode_supported_p (mode);
31946 }
31947
31948 /* Target hook for vector_mode_supported_p. */
31949 static bool
31950 rs6000_vector_mode_supported_p (machine_mode mode)
31951 {
31952
31953 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
31954 return true;
31955
31956 if (TARGET_SPE && SPE_VECTOR_MODE (mode))
31957 return true;
31958
31959 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
31960 return true;
31961
31962 else
31963 return false;
31964 }
31965
31966 /* Target hook for invalid_arg_for_unprototyped_fn. */
31967 static const char *
31968 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
31969 {
31970 return (!rs6000_darwin64_abi
31971 && typelist == 0
31972 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
31973 && (funcdecl == NULL_TREE
31974 || (TREE_CODE (funcdecl) == FUNCTION_DECL
31975 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
31976 ? N_("AltiVec argument passed to unprototyped function")
31977 : NULL;
31978 }
31979
31980 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
31981 setup by using __stack_chk_fail_local hidden function instead of
31982 calling __stack_chk_fail directly. Otherwise it is better to call
31983 __stack_chk_fail directly. */
31984
31985 static tree ATTRIBUTE_UNUSED
31986 rs6000_stack_protect_fail (void)
31987 {
31988 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
31989 ? default_hidden_stack_protect_fail ()
31990 : default_external_stack_protect_fail ();
31991 }
31992
31993 void
31994 rs6000_final_prescan_insn (rtx_insn *insn, rtx *operand ATTRIBUTE_UNUSED,
31995 int num_operands ATTRIBUTE_UNUSED)
31996 {
31997 if (rs6000_warn_cell_microcode)
31998 {
31999 const char *temp;
32000 int insn_code_number = recog_memoized (insn);
32001 location_t location = INSN_LOCATION (insn);
32002
32003 /* Punt on insns we cannot recognize. */
32004 if (insn_code_number < 0)
32005 return;
32006
32007 temp = get_insn_template (insn_code_number, insn);
32008
32009 if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS)
32010 warning_at (location, OPT_mwarn_cell_microcode,
32011 "emitting microcode insn %s\t[%s] #%d",
32012 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
32013 else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL)
32014 warning_at (location, OPT_mwarn_cell_microcode,
32015 "emitting conditional microcode insn %s\t[%s] #%d",
32016 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
32017 }
32018 }
32019
32020 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
32021
32022 #if TARGET_ELF
32023 static unsigned HOST_WIDE_INT
32024 rs6000_asan_shadow_offset (void)
32025 {
32026 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
32027 }
32028 #endif
32029 \f
32030 /* Mask options that we want to support inside of attribute((target)) and
32031 #pragma GCC target operations. Note, we do not include things like
32032 64/32-bit, endianess, hard/soft floating point, etc. that would have
32033 different calling sequences. */
32034
32035 struct rs6000_opt_mask {
32036 const char *name; /* option name */
32037 HOST_WIDE_INT mask; /* mask to set */
32038 bool invert; /* invert sense of mask */
32039 bool valid_target; /* option is a target option */
32040 };
32041
32042 static struct rs6000_opt_mask const rs6000_opt_masks[] =
32043 {
32044 { "altivec", OPTION_MASK_ALTIVEC, false, true },
32045 { "cmpb", OPTION_MASK_CMPB, false, true },
32046 { "crypto", OPTION_MASK_CRYPTO, false, true },
32047 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
32048 { "dlmzb", OPTION_MASK_DLMZB, false, true },
32049 { "fprnd", OPTION_MASK_FPRND, false, true },
32050 { "hard-dfp", OPTION_MASK_DFP, false, true },
32051 { "htm", OPTION_MASK_HTM, false, true },
32052 { "isel", OPTION_MASK_ISEL, false, true },
32053 { "mfcrf", OPTION_MASK_MFCRF, false, true },
32054 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
32055 { "mulhw", OPTION_MASK_MULHW, false, true },
32056 { "multiple", OPTION_MASK_MULTIPLE, false, true },
32057 { "popcntb", OPTION_MASK_POPCNTB, false, true },
32058 { "popcntd", OPTION_MASK_POPCNTD, false, true },
32059 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
32060 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
32061 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
32062 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
32063 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
32064 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
32065 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
32066 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
32067 { "string", OPTION_MASK_STRING, false, true },
32068 { "update", OPTION_MASK_NO_UPDATE, true , true },
32069 { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, false },
32070 { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, false },
32071 { "vsx", OPTION_MASK_VSX, false, true },
32072 { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
32073 #ifdef OPTION_MASK_64BIT
32074 #if TARGET_AIX_OS
32075 { "aix64", OPTION_MASK_64BIT, false, false },
32076 { "aix32", OPTION_MASK_64BIT, true, false },
32077 #else
32078 { "64", OPTION_MASK_64BIT, false, false },
32079 { "32", OPTION_MASK_64BIT, true, false },
32080 #endif
32081 #endif
32082 #ifdef OPTION_MASK_EABI
32083 { "eabi", OPTION_MASK_EABI, false, false },
32084 #endif
32085 #ifdef OPTION_MASK_LITTLE_ENDIAN
32086 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
32087 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
32088 #endif
32089 #ifdef OPTION_MASK_RELOCATABLE
32090 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
32091 #endif
32092 #ifdef OPTION_MASK_STRICT_ALIGN
32093 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
32094 #endif
32095 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
32096 { "string", OPTION_MASK_STRING, false, false },
32097 };
32098
32099 /* Builtin mask mapping for printing the flags. */
32100 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
32101 {
32102 { "altivec", RS6000_BTM_ALTIVEC, false, false },
32103 { "vsx", RS6000_BTM_VSX, false, false },
32104 { "spe", RS6000_BTM_SPE, false, false },
32105 { "paired", RS6000_BTM_PAIRED, false, false },
32106 { "fre", RS6000_BTM_FRE, false, false },
32107 { "fres", RS6000_BTM_FRES, false, false },
32108 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
32109 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
32110 { "popcntd", RS6000_BTM_POPCNTD, false, false },
32111 { "cell", RS6000_BTM_CELL, false, false },
32112 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
32113 { "crypto", RS6000_BTM_CRYPTO, false, false },
32114 { "htm", RS6000_BTM_HTM, false, false },
32115 { "hard-dfp", RS6000_BTM_DFP, false, false },
32116 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
32117 { "long-double-128", RS6000_BTM_LDBL128, false, false },
32118 };
32119
32120 /* Option variables that we want to support inside attribute((target)) and
32121 #pragma GCC target operations. */
32122
32123 struct rs6000_opt_var {
32124 const char *name; /* option name */
32125 size_t global_offset; /* offset of the option in global_options. */
32126 size_t target_offset; /* offset of the option in target optiosn. */
32127 };
32128
32129 static struct rs6000_opt_var const rs6000_opt_vars[] =
32130 {
32131 { "friz",
32132 offsetof (struct gcc_options, x_TARGET_FRIZ),
32133 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
32134 { "avoid-indexed-addresses",
32135 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
32136 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
32137 { "paired",
32138 offsetof (struct gcc_options, x_rs6000_paired_float),
32139 offsetof (struct cl_target_option, x_rs6000_paired_float), },
32140 { "longcall",
32141 offsetof (struct gcc_options, x_rs6000_default_long_calls),
32142 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
32143 };
32144
32145 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
32146 parsing. Return true if there were no errors. */
32147
32148 static bool
32149 rs6000_inner_target_options (tree args, bool attr_p)
32150 {
32151 bool ret = true;
32152
32153 if (args == NULL_TREE)
32154 ;
32155
32156 else if (TREE_CODE (args) == STRING_CST)
32157 {
32158 char *p = ASTRDUP (TREE_STRING_POINTER (args));
32159 char *q;
32160
32161 while ((q = strtok (p, ",")) != NULL)
32162 {
32163 bool error_p = false;
32164 bool not_valid_p = false;
32165 const char *cpu_opt = NULL;
32166
32167 p = NULL;
32168 if (strncmp (q, "cpu=", 4) == 0)
32169 {
32170 int cpu_index = rs6000_cpu_name_lookup (q+4);
32171 if (cpu_index >= 0)
32172 rs6000_cpu_index = cpu_index;
32173 else
32174 {
32175 error_p = true;
32176 cpu_opt = q+4;
32177 }
32178 }
32179 else if (strncmp (q, "tune=", 5) == 0)
32180 {
32181 int tune_index = rs6000_cpu_name_lookup (q+5);
32182 if (tune_index >= 0)
32183 rs6000_tune_index = tune_index;
32184 else
32185 {
32186 error_p = true;
32187 cpu_opt = q+5;
32188 }
32189 }
32190 else
32191 {
32192 size_t i;
32193 bool invert = false;
32194 char *r = q;
32195
32196 error_p = true;
32197 if (strncmp (r, "no-", 3) == 0)
32198 {
32199 invert = true;
32200 r += 3;
32201 }
32202
32203 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
32204 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
32205 {
32206 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
32207
32208 if (!rs6000_opt_masks[i].valid_target)
32209 not_valid_p = true;
32210 else
32211 {
32212 error_p = false;
32213 rs6000_isa_flags_explicit |= mask;
32214
32215 /* VSX needs altivec, so -mvsx automagically sets
32216 altivec. */
32217 if (mask == OPTION_MASK_VSX && !invert)
32218 mask |= OPTION_MASK_ALTIVEC;
32219
32220 if (rs6000_opt_masks[i].invert)
32221 invert = !invert;
32222
32223 if (invert)
32224 rs6000_isa_flags &= ~mask;
32225 else
32226 rs6000_isa_flags |= mask;
32227 }
32228 break;
32229 }
32230
32231 if (error_p && !not_valid_p)
32232 {
32233 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
32234 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
32235 {
32236 size_t j = rs6000_opt_vars[i].global_offset;
32237 *((int *) ((char *)&global_options + j)) = !invert;
32238 error_p = false;
32239 break;
32240 }
32241 }
32242 }
32243
32244 if (error_p)
32245 {
32246 const char *eprefix, *esuffix;
32247
32248 ret = false;
32249 if (attr_p)
32250 {
32251 eprefix = "__attribute__((__target__(";
32252 esuffix = ")))";
32253 }
32254 else
32255 {
32256 eprefix = "#pragma GCC target ";
32257 esuffix = "";
32258 }
32259
32260 if (cpu_opt)
32261 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix,
32262 q, esuffix);
32263 else if (not_valid_p)
32264 error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix);
32265 else
32266 error ("%s\"%s\"%s is invalid", eprefix, q, esuffix);
32267 }
32268 }
32269 }
32270
32271 else if (TREE_CODE (args) == TREE_LIST)
32272 {
32273 do
32274 {
32275 tree value = TREE_VALUE (args);
32276 if (value)
32277 {
32278 bool ret2 = rs6000_inner_target_options (value, attr_p);
32279 if (!ret2)
32280 ret = false;
32281 }
32282 args = TREE_CHAIN (args);
32283 }
32284 while (args != NULL_TREE);
32285 }
32286
32287 else
32288 gcc_unreachable ();
32289
32290 return ret;
32291 }
32292
32293 /* Print out the target options as a list for -mdebug=target. */
32294
32295 static void
32296 rs6000_debug_target_options (tree args, const char *prefix)
32297 {
32298 if (args == NULL_TREE)
32299 fprintf (stderr, "%s<NULL>", prefix);
32300
32301 else if (TREE_CODE (args) == STRING_CST)
32302 {
32303 char *p = ASTRDUP (TREE_STRING_POINTER (args));
32304 char *q;
32305
32306 while ((q = strtok (p, ",")) != NULL)
32307 {
32308 p = NULL;
32309 fprintf (stderr, "%s\"%s\"", prefix, q);
32310 prefix = ", ";
32311 }
32312 }
32313
32314 else if (TREE_CODE (args) == TREE_LIST)
32315 {
32316 do
32317 {
32318 tree value = TREE_VALUE (args);
32319 if (value)
32320 {
32321 rs6000_debug_target_options (value, prefix);
32322 prefix = ", ";
32323 }
32324 args = TREE_CHAIN (args);
32325 }
32326 while (args != NULL_TREE);
32327 }
32328
32329 else
32330 gcc_unreachable ();
32331
32332 return;
32333 }
32334
32335 \f
32336 /* Hook to validate attribute((target("..."))). */
32337
32338 static bool
32339 rs6000_valid_attribute_p (tree fndecl,
32340 tree ARG_UNUSED (name),
32341 tree args,
32342 int flags)
32343 {
32344 struct cl_target_option cur_target;
32345 bool ret;
32346 tree old_optimize = build_optimization_node (&global_options);
32347 tree new_target, new_optimize;
32348 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
32349
32350 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
32351
32352 if (TARGET_DEBUG_TARGET)
32353 {
32354 tree tname = DECL_NAME (fndecl);
32355 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
32356 if (tname)
32357 fprintf (stderr, "function: %.*s\n",
32358 (int) IDENTIFIER_LENGTH (tname),
32359 IDENTIFIER_POINTER (tname));
32360 else
32361 fprintf (stderr, "function: unknown\n");
32362
32363 fprintf (stderr, "args:");
32364 rs6000_debug_target_options (args, " ");
32365 fprintf (stderr, "\n");
32366
32367 if (flags)
32368 fprintf (stderr, "flags: 0x%x\n", flags);
32369
32370 fprintf (stderr, "--------------------\n");
32371 }
32372
32373 old_optimize = build_optimization_node (&global_options);
32374 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
32375
32376 /* If the function changed the optimization levels as well as setting target
32377 options, start with the optimizations specified. */
32378 if (func_optimize && func_optimize != old_optimize)
32379 cl_optimization_restore (&global_options,
32380 TREE_OPTIMIZATION (func_optimize));
32381
32382 /* The target attributes may also change some optimization flags, so update
32383 the optimization options if necessary. */
32384 cl_target_option_save (&cur_target, &global_options);
32385 rs6000_cpu_index = rs6000_tune_index = -1;
32386 ret = rs6000_inner_target_options (args, true);
32387
32388 /* Set up any additional state. */
32389 if (ret)
32390 {
32391 ret = rs6000_option_override_internal (false);
32392 new_target = build_target_option_node (&global_options);
32393 }
32394 else
32395 new_target = NULL;
32396
32397 new_optimize = build_optimization_node (&global_options);
32398
32399 if (!new_target)
32400 ret = false;
32401
32402 else if (fndecl)
32403 {
32404 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
32405
32406 if (old_optimize != new_optimize)
32407 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
32408 }
32409
32410 cl_target_option_restore (&global_options, &cur_target);
32411
32412 if (old_optimize != new_optimize)
32413 cl_optimization_restore (&global_options,
32414 TREE_OPTIMIZATION (old_optimize));
32415
32416 return ret;
32417 }
32418
32419 \f
32420 /* Hook to validate the current #pragma GCC target and set the state, and
32421 update the macros based on what was changed. If ARGS is NULL, then
32422 POP_TARGET is used to reset the options. */
32423
32424 bool
32425 rs6000_pragma_target_parse (tree args, tree pop_target)
32426 {
32427 tree prev_tree = build_target_option_node (&global_options);
32428 tree cur_tree;
32429 struct cl_target_option *prev_opt, *cur_opt;
32430 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
32431 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
32432
32433 if (TARGET_DEBUG_TARGET)
32434 {
32435 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
32436 fprintf (stderr, "args:");
32437 rs6000_debug_target_options (args, " ");
32438 fprintf (stderr, "\n");
32439
32440 if (pop_target)
32441 {
32442 fprintf (stderr, "pop_target:\n");
32443 debug_tree (pop_target);
32444 }
32445 else
32446 fprintf (stderr, "pop_target: <NULL>\n");
32447
32448 fprintf (stderr, "--------------------\n");
32449 }
32450
32451 if (! args)
32452 {
32453 cur_tree = ((pop_target)
32454 ? pop_target
32455 : target_option_default_node);
32456 cl_target_option_restore (&global_options,
32457 TREE_TARGET_OPTION (cur_tree));
32458 }
32459 else
32460 {
32461 rs6000_cpu_index = rs6000_tune_index = -1;
32462 if (!rs6000_inner_target_options (args, false)
32463 || !rs6000_option_override_internal (false)
32464 || (cur_tree = build_target_option_node (&global_options))
32465 == NULL_TREE)
32466 {
32467 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
32468 fprintf (stderr, "invalid pragma\n");
32469
32470 return false;
32471 }
32472 }
32473
32474 target_option_current_node = cur_tree;
32475
32476 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
32477 change the macros that are defined. */
32478 if (rs6000_target_modify_macros_ptr)
32479 {
32480 prev_opt = TREE_TARGET_OPTION (prev_tree);
32481 prev_bumask = prev_opt->x_rs6000_builtin_mask;
32482 prev_flags = prev_opt->x_rs6000_isa_flags;
32483
32484 cur_opt = TREE_TARGET_OPTION (cur_tree);
32485 cur_flags = cur_opt->x_rs6000_isa_flags;
32486 cur_bumask = cur_opt->x_rs6000_builtin_mask;
32487
32488 diff_bumask = (prev_bumask ^ cur_bumask);
32489 diff_flags = (prev_flags ^ cur_flags);
32490
32491 if ((diff_flags != 0) || (diff_bumask != 0))
32492 {
32493 /* Delete old macros. */
32494 rs6000_target_modify_macros_ptr (false,
32495 prev_flags & diff_flags,
32496 prev_bumask & diff_bumask);
32497
32498 /* Define new macros. */
32499 rs6000_target_modify_macros_ptr (true,
32500 cur_flags & diff_flags,
32501 cur_bumask & diff_bumask);
32502 }
32503 }
32504
32505 return true;
32506 }
32507
32508 \f
32509 /* Remember the last target of rs6000_set_current_function. */
32510 static GTY(()) tree rs6000_previous_fndecl;
32511
32512 /* Establish appropriate back-end context for processing the function
32513 FNDECL. The argument might be NULL to indicate processing at top
32514 level, outside of any function scope. */
32515 static void
32516 rs6000_set_current_function (tree fndecl)
32517 {
32518 tree old_tree = (rs6000_previous_fndecl
32519 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)
32520 : NULL_TREE);
32521
32522 tree new_tree = (fndecl
32523 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
32524 : NULL_TREE);
32525
32526 if (TARGET_DEBUG_TARGET)
32527 {
32528 bool print_final = false;
32529 fprintf (stderr, "\n==================== rs6000_set_current_function");
32530
32531 if (fndecl)
32532 fprintf (stderr, ", fndecl %s (%p)",
32533 (DECL_NAME (fndecl)
32534 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
32535 : "<unknown>"), (void *)fndecl);
32536
32537 if (rs6000_previous_fndecl)
32538 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
32539
32540 fprintf (stderr, "\n");
32541 if (new_tree)
32542 {
32543 fprintf (stderr, "\nnew fndecl target specific options:\n");
32544 debug_tree (new_tree);
32545 print_final = true;
32546 }
32547
32548 if (old_tree)
32549 {
32550 fprintf (stderr, "\nold fndecl target specific options:\n");
32551 debug_tree (old_tree);
32552 print_final = true;
32553 }
32554
32555 if (print_final)
32556 fprintf (stderr, "--------------------\n");
32557 }
32558
32559 /* Only change the context if the function changes. This hook is called
32560 several times in the course of compiling a function, and we don't want to
32561 slow things down too much or call target_reinit when it isn't safe. */
32562 if (fndecl && fndecl != rs6000_previous_fndecl)
32563 {
32564 rs6000_previous_fndecl = fndecl;
32565 if (old_tree == new_tree)
32566 ;
32567
32568 else if (new_tree)
32569 {
32570 cl_target_option_restore (&global_options,
32571 TREE_TARGET_OPTION (new_tree));
32572 if (TREE_TARGET_GLOBALS (new_tree))
32573 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
32574 else
32575 TREE_TARGET_GLOBALS (new_tree)
32576 = save_target_globals_default_opts ();
32577 }
32578
32579 else if (old_tree)
32580 {
32581 new_tree = target_option_current_node;
32582 cl_target_option_restore (&global_options,
32583 TREE_TARGET_OPTION (new_tree));
32584 if (TREE_TARGET_GLOBALS (new_tree))
32585 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
32586 else if (new_tree == target_option_default_node)
32587 restore_target_globals (&default_target_globals);
32588 else
32589 TREE_TARGET_GLOBALS (new_tree)
32590 = save_target_globals_default_opts ();
32591 }
32592 }
32593 }
32594
32595 \f
32596 /* Save the current options */
32597
32598 static void
32599 rs6000_function_specific_save (struct cl_target_option *ptr,
32600 struct gcc_options *opts)
32601 {
32602 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
32603 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
32604 }
32605
32606 /* Restore the current options */
32607
32608 static void
32609 rs6000_function_specific_restore (struct gcc_options *opts,
32610 struct cl_target_option *ptr)
32611
32612 {
32613 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
32614 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
32615 (void) rs6000_option_override_internal (false);
32616 }
32617
32618 /* Print the current options */
32619
32620 static void
32621 rs6000_function_specific_print (FILE *file, int indent,
32622 struct cl_target_option *ptr)
32623 {
32624 rs6000_print_isa_options (file, indent, "Isa options set",
32625 ptr->x_rs6000_isa_flags);
32626
32627 rs6000_print_isa_options (file, indent, "Isa options explicit",
32628 ptr->x_rs6000_isa_flags_explicit);
32629 }
32630
32631 /* Helper function to print the current isa or misc options on a line. */
32632
32633 static void
32634 rs6000_print_options_internal (FILE *file,
32635 int indent,
32636 const char *string,
32637 HOST_WIDE_INT flags,
32638 const char *prefix,
32639 const struct rs6000_opt_mask *opts,
32640 size_t num_elements)
32641 {
32642 size_t i;
32643 size_t start_column = 0;
32644 size_t cur_column;
32645 size_t max_column = 76;
32646 const char *comma = "";
32647
32648 if (indent)
32649 start_column += fprintf (file, "%*s", indent, "");
32650
32651 if (!flags)
32652 {
32653 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
32654 return;
32655 }
32656
32657 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
32658
32659 /* Print the various mask options. */
32660 cur_column = start_column;
32661 for (i = 0; i < num_elements; i++)
32662 {
32663 if ((flags & opts[i].mask) != 0)
32664 {
32665 const char *no_str = rs6000_opt_masks[i].invert ? "no-" : "";
32666 size_t len = (strlen (comma)
32667 + strlen (prefix)
32668 + strlen (no_str)
32669 + strlen (rs6000_opt_masks[i].name));
32670
32671 cur_column += len;
32672 if (cur_column > max_column)
32673 {
32674 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
32675 cur_column = start_column + len;
32676 comma = "";
32677 }
32678
32679 fprintf (file, "%s%s%s%s", comma, prefix, no_str,
32680 rs6000_opt_masks[i].name);
32681 flags &= ~ opts[i].mask;
32682 comma = ", ";
32683 }
32684 }
32685
32686 fputs ("\n", file);
32687 }
32688
32689 /* Helper function to print the current isa options on a line. */
32690
32691 static void
32692 rs6000_print_isa_options (FILE *file, int indent, const char *string,
32693 HOST_WIDE_INT flags)
32694 {
32695 rs6000_print_options_internal (file, indent, string, flags, "-m",
32696 &rs6000_opt_masks[0],
32697 ARRAY_SIZE (rs6000_opt_masks));
32698 }
32699
32700 static void
32701 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
32702 HOST_WIDE_INT flags)
32703 {
32704 rs6000_print_options_internal (file, indent, string, flags, "",
32705 &rs6000_builtin_mask_names[0],
32706 ARRAY_SIZE (rs6000_builtin_mask_names));
32707 }
32708
32709 \f
32710 /* Hook to determine if one function can safely inline another. */
32711
32712 static bool
32713 rs6000_can_inline_p (tree caller, tree callee)
32714 {
32715 bool ret = false;
32716 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
32717 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
32718
32719 /* If callee has no option attributes, then it is ok to inline. */
32720 if (!callee_tree)
32721 ret = true;
32722
32723 /* If caller has no option attributes, but callee does then it is not ok to
32724 inline. */
32725 else if (!caller_tree)
32726 ret = false;
32727
32728 else
32729 {
32730 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
32731 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
32732
32733 /* Callee's options should a subset of the caller's, i.e. a vsx function
32734 can inline an altivec function but a non-vsx function can't inline a
32735 vsx function. */
32736 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
32737 == callee_opts->x_rs6000_isa_flags)
32738 ret = true;
32739 }
32740
32741 if (TARGET_DEBUG_TARGET)
32742 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
32743 (DECL_NAME (caller)
32744 ? IDENTIFIER_POINTER (DECL_NAME (caller))
32745 : "<unknown>"),
32746 (DECL_NAME (callee)
32747 ? IDENTIFIER_POINTER (DECL_NAME (callee))
32748 : "<unknown>"),
32749 (ret ? "can" : "cannot"));
32750
32751 return ret;
32752 }
32753 \f
32754 /* Allocate a stack temp and fixup the address so it meets the particular
32755 memory requirements (either offetable or REG+REG addressing). */
32756
32757 rtx
32758 rs6000_allocate_stack_temp (machine_mode mode,
32759 bool offsettable_p,
32760 bool reg_reg_p)
32761 {
32762 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
32763 rtx addr = XEXP (stack, 0);
32764 int strict_p = (reload_in_progress || reload_completed);
32765
32766 if (!legitimate_indirect_address_p (addr, strict_p))
32767 {
32768 if (offsettable_p
32769 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
32770 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
32771
32772 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
32773 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
32774 }
32775
32776 return stack;
32777 }
32778
32779 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
32780 to such a form to deal with memory reference instructions like STFIWX that
32781 only take reg+reg addressing. */
32782
32783 rtx
32784 rs6000_address_for_fpconvert (rtx x)
32785 {
32786 int strict_p = (reload_in_progress || reload_completed);
32787 rtx addr;
32788
32789 gcc_assert (MEM_P (x));
32790 addr = XEXP (x, 0);
32791 if (! legitimate_indirect_address_p (addr, strict_p)
32792 && ! legitimate_indexed_address_p (addr, strict_p))
32793 {
32794 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
32795 {
32796 rtx reg = XEXP (addr, 0);
32797 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
32798 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
32799 gcc_assert (REG_P (reg));
32800 emit_insn (gen_add3_insn (reg, reg, size_rtx));
32801 addr = reg;
32802 }
32803 else if (GET_CODE (addr) == PRE_MODIFY)
32804 {
32805 rtx reg = XEXP (addr, 0);
32806 rtx expr = XEXP (addr, 1);
32807 gcc_assert (REG_P (reg));
32808 gcc_assert (GET_CODE (expr) == PLUS);
32809 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
32810 addr = reg;
32811 }
32812
32813 x = replace_equiv_address (x, copy_addr_to_reg (addr));
32814 }
32815
32816 return x;
32817 }
32818
32819 /* Given a memory reference, if it is not in the form for altivec memory
32820 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
32821 convert to the altivec format. */
32822
32823 rtx
32824 rs6000_address_for_altivec (rtx x)
32825 {
32826 gcc_assert (MEM_P (x));
32827 if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
32828 {
32829 rtx addr = XEXP (x, 0);
32830 int strict_p = (reload_in_progress || reload_completed);
32831
32832 if (!legitimate_indexed_address_p (addr, strict_p)
32833 && !legitimate_indirect_address_p (addr, strict_p))
32834 addr = copy_to_mode_reg (Pmode, addr);
32835
32836 addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
32837 x = change_address (x, GET_MODE (x), addr);
32838 }
32839
32840 return x;
32841 }
32842
32843 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
32844
32845 On the RS/6000, all integer constants are acceptable, most won't be valid
32846 for particular insns, though. Only easy FP constants are acceptable. */
32847
32848 static bool
32849 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
32850 {
32851 if (TARGET_ELF && tls_referenced_p (x))
32852 return false;
32853
32854 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
32855 || GET_MODE (x) == VOIDmode
32856 || (TARGET_POWERPC64 && mode == DImode)
32857 || easy_fp_constant (x, mode)
32858 || easy_vector_constant (x, mode));
32859 }
32860
32861 \f
32862
32863 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
32864
32865 void
32866 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
32867 {
32868 const bool direct_call_p
32869 = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc);
32870 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
32871 rtx toc_load = NULL_RTX;
32872 rtx toc_restore = NULL_RTX;
32873 rtx func_addr;
32874 rtx abi_reg = NULL_RTX;
32875 rtx call[4];
32876 int n_call;
32877 rtx insn;
32878
32879 /* Handle longcall attributes. */
32880 if (INTVAL (cookie) & CALL_LONG)
32881 func_desc = rs6000_longcall_ref (func_desc);
32882
32883 /* Handle indirect calls. */
32884 if (GET_CODE (func_desc) != SYMBOL_REF
32885 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
32886 {
32887 /* Save the TOC into its reserved slot before the call,
32888 and prepare to restore it after the call. */
32889 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
32890 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
32891 rtx stack_toc_mem = gen_frame_mem (Pmode,
32892 gen_rtx_PLUS (Pmode, stack_ptr,
32893 stack_toc_offset));
32894 toc_restore = gen_rtx_SET (VOIDmode, toc_reg, stack_toc_mem);
32895
32896 /* Can we optimize saving the TOC in the prologue or
32897 do we need to do it at every call? */
32898 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
32899 cfun->machine->save_toc_in_prologue = true;
32900 else
32901 {
32902 MEM_VOLATILE_P (stack_toc_mem) = 1;
32903 emit_move_insn (stack_toc_mem, toc_reg);
32904 }
32905
32906 if (DEFAULT_ABI == ABI_ELFv2)
32907 {
32908 /* A function pointer in the ELFv2 ABI is just a plain address, but
32909 the ABI requires it to be loaded into r12 before the call. */
32910 func_addr = gen_rtx_REG (Pmode, 12);
32911 emit_move_insn (func_addr, func_desc);
32912 abi_reg = func_addr;
32913 }
32914 else
32915 {
32916 /* A function pointer under AIX is a pointer to a data area whose
32917 first word contains the actual address of the function, whose
32918 second word contains a pointer to its TOC, and whose third word
32919 contains a value to place in the static chain register (r11).
32920 Note that if we load the static chain, our "trampoline" need
32921 not have any executable code. */
32922
32923 /* Load up address of the actual function. */
32924 func_desc = force_reg (Pmode, func_desc);
32925 func_addr = gen_reg_rtx (Pmode);
32926 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
32927
32928 /* Prepare to load the TOC of the called function. Note that the
32929 TOC load must happen immediately before the actual call so
32930 that unwinding the TOC registers works correctly. See the
32931 comment in frob_update_context. */
32932 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
32933 rtx func_toc_mem = gen_rtx_MEM (Pmode,
32934 gen_rtx_PLUS (Pmode, func_desc,
32935 func_toc_offset));
32936 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
32937
32938 /* If we have a static chain, load it up. But, if the call was
32939 originally direct, the 3rd word has not been written since no
32940 trampoline has been built, so we ought not to load it, lest we
32941 override a static chain value. */
32942 if (!direct_call_p && TARGET_POINTERS_TO_NESTED_FUNCTIONS)
32943 {
32944 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
32945 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
32946 rtx func_sc_mem = gen_rtx_MEM (Pmode,
32947 gen_rtx_PLUS (Pmode, func_desc,
32948 func_sc_offset));
32949 emit_move_insn (sc_reg, func_sc_mem);
32950 abi_reg = sc_reg;
32951 }
32952 }
32953 }
32954 else
32955 {
32956 /* Direct calls use the TOC: for local calls, the callee will
32957 assume the TOC register is set; for non-local calls, the
32958 PLT stub needs the TOC register. */
32959 abi_reg = toc_reg;
32960 func_addr = func_desc;
32961 }
32962
32963 /* Create the call. */
32964 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
32965 if (value != NULL_RTX)
32966 call[0] = gen_rtx_SET (VOIDmode, value, call[0]);
32967 n_call = 1;
32968
32969 if (toc_load)
32970 call[n_call++] = toc_load;
32971 if (toc_restore)
32972 call[n_call++] = toc_restore;
32973
32974 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
32975
32976 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
32977 insn = emit_call_insn (insn);
32978
32979 /* Mention all registers defined by the ABI to hold information
32980 as uses in CALL_INSN_FUNCTION_USAGE. */
32981 if (abi_reg)
32982 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
32983 }
32984
32985 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
32986
32987 void
32988 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
32989 {
32990 rtx call[2];
32991 rtx insn;
32992
32993 gcc_assert (INTVAL (cookie) == 0);
32994
32995 /* Create the call. */
32996 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
32997 if (value != NULL_RTX)
32998 call[0] = gen_rtx_SET (VOIDmode, value, call[0]);
32999
33000 call[1] = simple_return_rtx;
33001
33002 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
33003 insn = emit_call_insn (insn);
33004
33005 /* Note use of the TOC register. */
33006 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
33007 /* We need to also mark a use of the link register since the function we
33008 sibling-call to will use it to return to our caller. */
33009 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, LR_REGNO));
33010 }
33011
33012 /* Return whether we need to always update the saved TOC pointer when we update
33013 the stack pointer. */
33014
33015 static bool
33016 rs6000_save_toc_in_prologue_p (void)
33017 {
33018 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
33019 }
33020
33021 #ifdef HAVE_GAS_HIDDEN
33022 # define USE_HIDDEN_LINKONCE 1
33023 #else
33024 # define USE_HIDDEN_LINKONCE 0
33025 #endif
33026
33027 /* Fills in the label name that should be used for a 476 link stack thunk. */
33028
33029 void
33030 get_ppc476_thunk_name (char name[32])
33031 {
33032 gcc_assert (TARGET_LINK_STACK);
33033
33034 if (USE_HIDDEN_LINKONCE)
33035 sprintf (name, "__ppc476.get_thunk");
33036 else
33037 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
33038 }
33039
33040 /* This function emits the simple thunk routine that is used to preserve
33041 the link stack on the 476 cpu. */
33042
33043 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
33044 static void
33045 rs6000_code_end (void)
33046 {
33047 char name[32];
33048 tree decl;
33049
33050 if (!TARGET_LINK_STACK)
33051 return;
33052
33053 get_ppc476_thunk_name (name);
33054
33055 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
33056 build_function_type_list (void_type_node, NULL_TREE));
33057 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
33058 NULL_TREE, void_type_node);
33059 TREE_PUBLIC (decl) = 1;
33060 TREE_STATIC (decl) = 1;
33061
33062 #if RS6000_WEAK
33063 if (USE_HIDDEN_LINKONCE)
33064 {
33065 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
33066 targetm.asm_out.unique_section (decl, 0);
33067 switch_to_section (get_named_section (decl, NULL, 0));
33068 DECL_WEAK (decl) = 1;
33069 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
33070 targetm.asm_out.globalize_label (asm_out_file, name);
33071 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
33072 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
33073 }
33074 else
33075 #endif
33076 {
33077 switch_to_section (text_section);
33078 ASM_OUTPUT_LABEL (asm_out_file, name);
33079 }
33080
33081 DECL_INITIAL (decl) = make_node (BLOCK);
33082 current_function_decl = decl;
33083 init_function_start (decl);
33084 first_function_block_is_cold = false;
33085 /* Make sure unwind info is emitted for the thunk if needed. */
33086 final_start_function (emit_barrier (), asm_out_file, 1);
33087
33088 fputs ("\tblr\n", asm_out_file);
33089
33090 final_end_function ();
33091 init_insn_lengths ();
33092 free_after_compilation (cfun);
33093 set_cfun (NULL);
33094 current_function_decl = NULL;
33095 }
33096
33097 /* Add r30 to hard reg set if the prologue sets it up and it is not
33098 pic_offset_table_rtx. */
33099
33100 static void
33101 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
33102 {
33103 if (!TARGET_SINGLE_PIC_BASE
33104 && TARGET_TOC
33105 && TARGET_MINIMAL_TOC
33106 && get_pool_size () != 0)
33107 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
33108 }
33109
33110 \f
33111 /* Helper function for rs6000_split_logical to emit a logical instruction after
33112 spliting the operation to single GPR registers.
33113
33114 DEST is the destination register.
33115 OP1 and OP2 are the input source registers.
33116 CODE is the base operation (AND, IOR, XOR, NOT).
33117 MODE is the machine mode.
33118 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
33119 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
33120 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
33121
33122 static void
33123 rs6000_split_logical_inner (rtx dest,
33124 rtx op1,
33125 rtx op2,
33126 enum rtx_code code,
33127 machine_mode mode,
33128 bool complement_final_p,
33129 bool complement_op1_p,
33130 bool complement_op2_p)
33131 {
33132 rtx bool_rtx;
33133
33134 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
33135 if (op2 && GET_CODE (op2) == CONST_INT
33136 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
33137 && !complement_final_p && !complement_op1_p && !complement_op2_p)
33138 {
33139 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
33140 HOST_WIDE_INT value = INTVAL (op2) & mask;
33141
33142 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
33143 if (code == AND)
33144 {
33145 if (value == 0)
33146 {
33147 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
33148 return;
33149 }
33150
33151 else if (value == mask)
33152 {
33153 if (!rtx_equal_p (dest, op1))
33154 emit_insn (gen_rtx_SET (VOIDmode, dest, op1));
33155 return;
33156 }
33157 }
33158
33159 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
33160 into separate ORI/ORIS or XORI/XORIS instrucitons. */
33161 else if (code == IOR || code == XOR)
33162 {
33163 if (value == 0)
33164 {
33165 if (!rtx_equal_p (dest, op1))
33166 emit_insn (gen_rtx_SET (VOIDmode, dest, op1));
33167 return;
33168 }
33169 }
33170 }
33171
33172 if (code == AND && mode == SImode
33173 && !complement_final_p && !complement_op1_p && !complement_op2_p)
33174 {
33175 emit_insn (gen_andsi3 (dest, op1, op2));
33176 return;
33177 }
33178
33179 if (complement_op1_p)
33180 op1 = gen_rtx_NOT (mode, op1);
33181
33182 if (complement_op2_p)
33183 op2 = gen_rtx_NOT (mode, op2);
33184
33185 /* For canonical RTL, if only one arm is inverted it is the first. */
33186 if (!complement_op1_p && complement_op2_p)
33187 std::swap (op1, op2);
33188
33189 bool_rtx = ((code == NOT)
33190 ? gen_rtx_NOT (mode, op1)
33191 : gen_rtx_fmt_ee (code, mode, op1, op2));
33192
33193 if (complement_final_p)
33194 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
33195
33196 emit_insn (gen_rtx_SET (VOIDmode, dest, bool_rtx));
33197 }
33198
33199 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
33200 operations are split immediately during RTL generation to allow for more
33201 optimizations of the AND/IOR/XOR.
33202
33203 OPERANDS is an array containing the destination and two input operands.
33204 CODE is the base operation (AND, IOR, XOR, NOT).
33205 MODE is the machine mode.
33206 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
33207 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
33208 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
33209 CLOBBER_REG is either NULL or a scratch register of type CC to allow
33210 formation of the AND instructions. */
33211
33212 static void
33213 rs6000_split_logical_di (rtx operands[3],
33214 enum rtx_code code,
33215 bool complement_final_p,
33216 bool complement_op1_p,
33217 bool complement_op2_p)
33218 {
33219 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
33220 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
33221 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
33222 enum hi_lo { hi = 0, lo = 1 };
33223 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
33224 size_t i;
33225
33226 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
33227 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
33228 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
33229 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
33230
33231 if (code == NOT)
33232 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
33233 else
33234 {
33235 if (GET_CODE (operands[2]) != CONST_INT)
33236 {
33237 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
33238 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
33239 }
33240 else
33241 {
33242 HOST_WIDE_INT value = INTVAL (operands[2]);
33243 HOST_WIDE_INT value_hi_lo[2];
33244
33245 gcc_assert (!complement_final_p);
33246 gcc_assert (!complement_op1_p);
33247 gcc_assert (!complement_op2_p);
33248
33249 value_hi_lo[hi] = value >> 32;
33250 value_hi_lo[lo] = value & lower_32bits;
33251
33252 for (i = 0; i < 2; i++)
33253 {
33254 HOST_WIDE_INT sub_value = value_hi_lo[i];
33255
33256 if (sub_value & sign_bit)
33257 sub_value |= upper_32bits;
33258
33259 op2_hi_lo[i] = GEN_INT (sub_value);
33260
33261 /* If this is an AND instruction, check to see if we need to load
33262 the value in a register. */
33263 if (code == AND && sub_value != -1 && sub_value != 0
33264 && !and_operand (op2_hi_lo[i], SImode))
33265 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
33266 }
33267 }
33268 }
33269
33270 for (i = 0; i < 2; i++)
33271 {
33272 /* Split large IOR/XOR operations. */
33273 if ((code == IOR || code == XOR)
33274 && GET_CODE (op2_hi_lo[i]) == CONST_INT
33275 && !complement_final_p
33276 && !complement_op1_p
33277 && !complement_op2_p
33278 && !logical_const_operand (op2_hi_lo[i], SImode))
33279 {
33280 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
33281 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
33282 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
33283 rtx tmp = gen_reg_rtx (SImode);
33284
33285 /* Make sure the constant is sign extended. */
33286 if ((hi_16bits & sign_bit) != 0)
33287 hi_16bits |= upper_32bits;
33288
33289 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
33290 code, SImode, false, false, false);
33291
33292 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
33293 code, SImode, false, false, false);
33294 }
33295 else
33296 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
33297 code, SImode, complement_final_p,
33298 complement_op1_p, complement_op2_p);
33299 }
33300
33301 return;
33302 }
33303
33304 /* Split the insns that make up boolean operations operating on multiple GPR
33305 registers. The boolean MD patterns ensure that the inputs either are
33306 exactly the same as the output registers, or there is no overlap.
33307
33308 OPERANDS is an array containing the destination and two input operands.
33309 CODE is the base operation (AND, IOR, XOR, NOT).
33310 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
33311 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
33312 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
33313
33314 void
33315 rs6000_split_logical (rtx operands[3],
33316 enum rtx_code code,
33317 bool complement_final_p,
33318 bool complement_op1_p,
33319 bool complement_op2_p)
33320 {
33321 machine_mode mode = GET_MODE (operands[0]);
33322 machine_mode sub_mode;
33323 rtx op0, op1, op2;
33324 int sub_size, regno0, regno1, nregs, i;
33325
33326 /* If this is DImode, use the specialized version that can run before
33327 register allocation. */
33328 if (mode == DImode && !TARGET_POWERPC64)
33329 {
33330 rs6000_split_logical_di (operands, code, complement_final_p,
33331 complement_op1_p, complement_op2_p);
33332 return;
33333 }
33334
33335 op0 = operands[0];
33336 op1 = operands[1];
33337 op2 = (code == NOT) ? NULL_RTX : operands[2];
33338 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
33339 sub_size = GET_MODE_SIZE (sub_mode);
33340 regno0 = REGNO (op0);
33341 regno1 = REGNO (op1);
33342
33343 gcc_assert (reload_completed);
33344 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
33345 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
33346
33347 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
33348 gcc_assert (nregs > 1);
33349
33350 if (op2 && REG_P (op2))
33351 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
33352
33353 for (i = 0; i < nregs; i++)
33354 {
33355 int offset = i * sub_size;
33356 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
33357 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
33358 rtx sub_op2 = ((code == NOT)
33359 ? NULL_RTX
33360 : simplify_subreg (sub_mode, op2, mode, offset));
33361
33362 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
33363 complement_final_p, complement_op1_p,
33364 complement_op2_p);
33365 }
33366
33367 return;
33368 }
33369
33370 \f
33371 /* Return true if the peephole2 can combine a load involving a combination of
33372 an addis instruction and a load with an offset that can be fused together on
33373 a power8. */
33374
33375 bool
33376 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
33377 rtx addis_value, /* addis value. */
33378 rtx target, /* target register that is loaded. */
33379 rtx mem) /* bottom part of the memory addr. */
33380 {
33381 rtx addr;
33382 rtx base_reg;
33383
33384 /* Validate arguments. */
33385 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
33386 return false;
33387
33388 if (!base_reg_operand (target, GET_MODE (target)))
33389 return false;
33390
33391 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
33392 return false;
33393
33394 /* Allow sign/zero extension. */
33395 if (GET_CODE (mem) == ZERO_EXTEND
33396 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
33397 mem = XEXP (mem, 0);
33398
33399 if (!MEM_P (mem))
33400 return false;
33401
33402 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
33403 return false;
33404
33405 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
33406 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
33407 return false;
33408
33409 /* Validate that the register used to load the high value is either the
33410 register being loaded, or we can safely replace its use.
33411
33412 This function is only called from the peephole2 pass and we assume that
33413 there are 2 instructions in the peephole (addis and load), so we want to
33414 check if the target register was not used in the memory address and the
33415 register to hold the addis result is dead after the peephole. */
33416 if (REGNO (addis_reg) != REGNO (target))
33417 {
33418 if (reg_mentioned_p (target, mem))
33419 return false;
33420
33421 if (!peep2_reg_dead_p (2, addis_reg))
33422 return false;
33423
33424 /* If the target register being loaded is the stack pointer, we must
33425 avoid loading any other value into it, even temporarily. */
33426 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
33427 return false;
33428 }
33429
33430 base_reg = XEXP (addr, 0);
33431 return REGNO (addis_reg) == REGNO (base_reg);
33432 }
33433
33434 /* During the peephole2 pass, adjust and expand the insns for a load fusion
33435 sequence. We adjust the addis register to use the target register. If the
33436 load sign extends, we adjust the code to do the zero extending load, and an
33437 explicit sign extension later since the fusion only covers zero extending
33438 loads.
33439
33440 The operands are:
33441 operands[0] register set with addis (to be replaced with target)
33442 operands[1] value set via addis
33443 operands[2] target register being loaded
33444 operands[3] D-form memory reference using operands[0]. */
33445
33446 void
33447 expand_fusion_gpr_load (rtx *operands)
33448 {
33449 rtx addis_value = operands[1];
33450 rtx target = operands[2];
33451 rtx orig_mem = operands[3];
33452 rtx new_addr, new_mem, orig_addr, offset;
33453 enum rtx_code plus_or_lo_sum;
33454 machine_mode target_mode = GET_MODE (target);
33455 machine_mode extend_mode = target_mode;
33456 machine_mode ptr_mode = Pmode;
33457 enum rtx_code extend = UNKNOWN;
33458
33459 if (GET_CODE (orig_mem) == ZERO_EXTEND
33460 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
33461 {
33462 extend = GET_CODE (orig_mem);
33463 orig_mem = XEXP (orig_mem, 0);
33464 target_mode = GET_MODE (orig_mem);
33465 }
33466
33467 gcc_assert (MEM_P (orig_mem));
33468
33469 orig_addr = XEXP (orig_mem, 0);
33470 plus_or_lo_sum = GET_CODE (orig_addr);
33471 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
33472
33473 offset = XEXP (orig_addr, 1);
33474 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
33475 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
33476
33477 if (extend != UNKNOWN)
33478 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
33479
33480 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
33481 UNSPEC_FUSION_GPR);
33482 emit_insn (gen_rtx_SET (VOIDmode, target, new_mem));
33483
33484 if (extend == SIGN_EXTEND)
33485 {
33486 int sub_off = ((BYTES_BIG_ENDIAN)
33487 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
33488 : 0);
33489 rtx sign_reg
33490 = simplify_subreg (target_mode, target, extend_mode, sub_off);
33491
33492 emit_insn (gen_rtx_SET (VOIDmode, target,
33493 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
33494 }
33495
33496 return;
33497 }
33498
33499 /* Return a string to fuse an addis instruction with a gpr load to the same
33500 register that we loaded up the addis instruction. The address that is used
33501 is the logical address that was formed during peephole2:
33502 (lo_sum (high) (low-part))
33503
33504 The code is complicated, so we call output_asm_insn directly, and just
33505 return "". */
33506
33507 const char *
33508 emit_fusion_gpr_load (rtx target, rtx mem)
33509 {
33510 rtx addis_value;
33511 rtx fuse_ops[10];
33512 rtx addr;
33513 rtx load_offset;
33514 const char *addis_str = NULL;
33515 const char *load_str = NULL;
33516 const char *mode_name = NULL;
33517 char insn_template[80];
33518 machine_mode mode;
33519 const char *comment_str = ASM_COMMENT_START;
33520
33521 if (GET_CODE (mem) == ZERO_EXTEND)
33522 mem = XEXP (mem, 0);
33523
33524 gcc_assert (REG_P (target) && MEM_P (mem));
33525
33526 if (*comment_str == ' ')
33527 comment_str++;
33528
33529 addr = XEXP (mem, 0);
33530 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
33531 gcc_unreachable ();
33532
33533 addis_value = XEXP (addr, 0);
33534 load_offset = XEXP (addr, 1);
33535
33536 /* Now emit the load instruction to the same register. */
33537 mode = GET_MODE (mem);
33538 switch (mode)
33539 {
33540 case QImode:
33541 mode_name = "char";
33542 load_str = "lbz";
33543 break;
33544
33545 case HImode:
33546 mode_name = "short";
33547 load_str = "lhz";
33548 break;
33549
33550 case SImode:
33551 mode_name = "int";
33552 load_str = "lwz";
33553 break;
33554
33555 case DImode:
33556 gcc_assert (TARGET_POWERPC64);
33557 mode_name = "long";
33558 load_str = "ld";
33559 break;
33560
33561 default:
33562 gcc_unreachable ();
33563 }
33564
33565 /* Emit the addis instruction. */
33566 fuse_ops[0] = target;
33567 if (satisfies_constraint_L (addis_value))
33568 {
33569 fuse_ops[1] = addis_value;
33570 addis_str = "lis %0,%v1";
33571 }
33572
33573 else if (GET_CODE (addis_value) == PLUS)
33574 {
33575 rtx op0 = XEXP (addis_value, 0);
33576 rtx op1 = XEXP (addis_value, 1);
33577
33578 if (REG_P (op0) && CONST_INT_P (op1)
33579 && satisfies_constraint_L (op1))
33580 {
33581 fuse_ops[1] = op0;
33582 fuse_ops[2] = op1;
33583 addis_str = "addis %0,%1,%v2";
33584 }
33585 }
33586
33587 else if (GET_CODE (addis_value) == HIGH)
33588 {
33589 rtx value = XEXP (addis_value, 0);
33590 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
33591 {
33592 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
33593 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
33594 if (TARGET_ELF)
33595 addis_str = "addis %0,%2,%1@toc@ha";
33596
33597 else if (TARGET_XCOFF)
33598 addis_str = "addis %0,%1@u(%2)";
33599
33600 else
33601 gcc_unreachable ();
33602 }
33603
33604 else if (GET_CODE (value) == PLUS)
33605 {
33606 rtx op0 = XEXP (value, 0);
33607 rtx op1 = XEXP (value, 1);
33608
33609 if (GET_CODE (op0) == UNSPEC
33610 && XINT (op0, 1) == UNSPEC_TOCREL
33611 && CONST_INT_P (op1))
33612 {
33613 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
33614 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
33615 fuse_ops[3] = op1;
33616 if (TARGET_ELF)
33617 addis_str = "addis %0,%2,%1+%3@toc@ha";
33618
33619 else if (TARGET_XCOFF)
33620 addis_str = "addis %0,%1+%3@u(%2)";
33621
33622 else
33623 gcc_unreachable ();
33624 }
33625 }
33626
33627 else if (satisfies_constraint_L (value))
33628 {
33629 fuse_ops[1] = value;
33630 addis_str = "lis %0,%v1";
33631 }
33632
33633 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
33634 {
33635 fuse_ops[1] = value;
33636 addis_str = "lis %0,%1@ha";
33637 }
33638 }
33639
33640 if (!addis_str)
33641 fatal_insn ("Could not generate addis value for fusion", addis_value);
33642
33643 sprintf (insn_template, "%s\t\t%s gpr load fusion, type %s", addis_str,
33644 comment_str, mode_name);
33645 output_asm_insn (insn_template, fuse_ops);
33646
33647 /* Emit the D-form load instruction. */
33648 if (CONST_INT_P (load_offset) && satisfies_constraint_I (load_offset))
33649 {
33650 sprintf (insn_template, "%s %%0,%%1(%%0)", load_str);
33651 fuse_ops[1] = load_offset;
33652 output_asm_insn (insn_template, fuse_ops);
33653 }
33654
33655 else if (GET_CODE (load_offset) == UNSPEC
33656 && XINT (load_offset, 1) == UNSPEC_TOCREL)
33657 {
33658 if (TARGET_ELF)
33659 sprintf (insn_template, "%s %%0,%%1@toc@l(%%0)", load_str);
33660
33661 else if (TARGET_XCOFF)
33662 sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
33663
33664 else
33665 gcc_unreachable ();
33666
33667 fuse_ops[1] = XVECEXP (load_offset, 0, 0);
33668 output_asm_insn (insn_template, fuse_ops);
33669 }
33670
33671 else if (GET_CODE (load_offset) == PLUS
33672 && GET_CODE (XEXP (load_offset, 0)) == UNSPEC
33673 && XINT (XEXP (load_offset, 0), 1) == UNSPEC_TOCREL
33674 && CONST_INT_P (XEXP (load_offset, 1)))
33675 {
33676 rtx tocrel_unspec = XEXP (load_offset, 0);
33677 if (TARGET_ELF)
33678 sprintf (insn_template, "%s %%0,%%1+%%2@toc@l(%%0)", load_str);
33679
33680 else if (TARGET_XCOFF)
33681 sprintf (insn_template, "%s %%0,%%1+%%2@l(%%0)", load_str);
33682
33683 else
33684 gcc_unreachable ();
33685
33686 fuse_ops[1] = XVECEXP (tocrel_unspec, 0, 0);
33687 fuse_ops[2] = XEXP (load_offset, 1);
33688 output_asm_insn (insn_template, fuse_ops);
33689 }
33690
33691 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (load_offset))
33692 {
33693 sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
33694
33695 fuse_ops[1] = load_offset;
33696 output_asm_insn (insn_template, fuse_ops);
33697 }
33698
33699 else
33700 fatal_insn ("Unable to generate load offset for fusion", load_offset);
33701
33702 return "";
33703 }
33704 \f
33705 /* Analyze vector computations and remove unnecessary doubleword
33706 swaps (xxswapdi instructions). This pass is performed only
33707 for little-endian VSX code generation.
33708
33709 For this specific case, loads and stores of 4x32 and 2x64 vectors
33710 are inefficient. These are implemented using the lvx2dx and
33711 stvx2dx instructions, which invert the order of doublewords in
33712 a vector register. Thus the code generation inserts an xxswapdi
33713 after each such load, and prior to each such store. (For spill
33714 code after register assignment, an additional xxswapdi is inserted
33715 following each store in order to return a hard register to its
33716 unpermuted value.)
33717
33718 The extra xxswapdi instructions reduce performance. This can be
33719 particularly bad for vectorized code. The purpose of this pass
33720 is to reduce the number of xxswapdi instructions required for
33721 correctness.
33722
33723 The primary insight is that much code that operates on vectors
33724 does not care about the relative order of elements in a register,
33725 so long as the correct memory order is preserved. If we have
33726 a computation where all input values are provided by lvxd2x/xxswapdi
33727 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
33728 and all intermediate computations are pure SIMD (independent of
33729 element order), then all the xxswapdi's associated with the loads
33730 and stores may be removed.
33731
33732 This pass uses some of the infrastructure and logical ideas from
33733 the "web" pass in web.c. We create maximal webs of computations
33734 fitting the description above using union-find. Each such web is
33735 then optimized by removing its unnecessary xxswapdi instructions.
33736
33737 The pass is placed prior to global optimization so that we can
33738 perform the optimization in the safest and simplest way possible;
33739 that is, by replacing each xxswapdi insn with a register copy insn.
33740 Subsequent forward propagation will remove copies where possible.
33741
33742 There are some operations sensitive to element order for which we
33743 can still allow the operation, provided we modify those operations.
33744 These include CONST_VECTORs, for which we must swap the first and
33745 second halves of the constant vector; and SUBREGs, for which we
33746 must adjust the byte offset to account for the swapped doublewords.
33747 A remaining opportunity would be non-immediate-form splats, for
33748 which we should adjust the selected lane of the input. We should
33749 also make code generation adjustments for sum-across operations,
33750 since this is a common vectorizer reduction.
33751
33752 Because we run prior to the first split, we can see loads and stores
33753 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
33754 vector loads and stores that have not yet been split into a permuting
33755 load/store and a swap. (One way this can happen is with a builtin
33756 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
33757 than deleting a swap, we convert the load/store into a permuting
33758 load/store (which effectively removes the swap). */
33759
33760 /* Notes on Permutes
33761
33762 We do not currently handle computations that contain permutes. There
33763 is a general transformation that can be performed correctly, but it
33764 may introduce more expensive code than it replaces. To handle these
33765 would require a cost model to determine when to perform the optimization.
33766 This commentary records how this could be done if desired.
33767
33768 The most general permute is something like this (example for V16QI):
33769
33770 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
33771 (parallel [(const_int a0) (const_int a1)
33772 ...
33773 (const_int a14) (const_int a15)]))
33774
33775 where a0,...,a15 are in [0,31] and select elements from op1 and op2
33776 to produce in the result.
33777
33778 Regardless of mode, we can convert the PARALLEL to a mask of 16
33779 byte-element selectors. Let's call this M, with M[i] representing
33780 the ith byte-element selector value. Then if we swap doublewords
33781 throughout the computation, we can get correct behavior by replacing
33782 M with M' as follows:
33783
33784 { M[i+8]+8 : i < 8, M[i+8] in [0,7] U [16,23]
33785 M'[i] = { M[i+8]-8 : i < 8, M[i+8] in [8,15] U [24,31]
33786 { M[i-8]+8 : i >= 8, M[i-8] in [0,7] U [16,23]
33787 { M[i-8]-8 : i >= 8, M[i-8] in [8,15] U [24,31]
33788
33789 This seems promising at first, since we are just replacing one mask
33790 with another. But certain masks are preferable to others. If M
33791 is a mask that matches a vmrghh pattern, for example, M' certainly
33792 will not. Instead of a single vmrghh, we would generate a load of
33793 M' and a vperm. So we would need to know how many xxswapd's we can
33794 remove as a result of this transformation to determine if it's
33795 profitable; and preferably the logic would need to be aware of all
33796 the special preferable masks.
33797
33798 Another form of permute is an UNSPEC_VPERM, in which the mask is
33799 already in a register. In some cases, this mask may be a constant
33800 that we can discover with ud-chains, in which case the above
33801 transformation is ok. However, the common usage here is for the
33802 mask to be produced by an UNSPEC_LVSL, in which case the mask
33803 cannot be known at compile time. In such a case we would have to
33804 generate several instructions to compute M' as above at run time,
33805 and a cost model is needed again. */
33806
33807 /* This is based on the union-find logic in web.c. web_entry_base is
33808 defined in df.h. */
33809 class swap_web_entry : public web_entry_base
33810 {
33811 public:
33812 /* Pointer to the insn. */
33813 rtx_insn *insn;
33814 /* Set if insn contains a mention of a vector register. All other
33815 fields are undefined if this field is unset. */
33816 unsigned int is_relevant : 1;
33817 /* Set if insn is a load. */
33818 unsigned int is_load : 1;
33819 /* Set if insn is a store. */
33820 unsigned int is_store : 1;
33821 /* Set if insn is a doubleword swap. This can either be a register swap
33822 or a permuting load or store (test is_load and is_store for this). */
33823 unsigned int is_swap : 1;
33824 /* Set if the insn has a live-in use of a parameter register. */
33825 unsigned int is_live_in : 1;
33826 /* Set if the insn has a live-out def of a return register. */
33827 unsigned int is_live_out : 1;
33828 /* Set if the insn contains a subreg reference of a vector register. */
33829 unsigned int contains_subreg : 1;
33830 /* Set if the insn contains a 128-bit integer operand. */
33831 unsigned int is_128_int : 1;
33832 /* Set if this is a call-insn. */
33833 unsigned int is_call : 1;
33834 /* Set if this insn does not perform a vector operation for which
33835 element order matters, or if we know how to fix it up if it does.
33836 Undefined if is_swap is set. */
33837 unsigned int is_swappable : 1;
33838 /* A nonzero value indicates what kind of special handling for this
33839 insn is required if doublewords are swapped. Undefined if
33840 is_swappable is not set. */
33841 unsigned int special_handling : 3;
33842 /* Set if the web represented by this entry cannot be optimized. */
33843 unsigned int web_not_optimizable : 1;
33844 /* Set if this insn should be deleted. */
33845 unsigned int will_delete : 1;
33846 };
33847
33848 enum special_handling_values {
33849 SH_NONE = 0,
33850 SH_CONST_VECTOR,
33851 SH_SUBREG,
33852 SH_NOSWAP_LD,
33853 SH_NOSWAP_ST,
33854 SH_EXTRACT,
33855 SH_SPLAT
33856 };
33857
33858 /* Union INSN with all insns containing definitions that reach USE.
33859 Detect whether USE is live-in to the current function. */
33860 static void
33861 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
33862 {
33863 struct df_link *link = DF_REF_CHAIN (use);
33864
33865 if (!link)
33866 insn_entry[INSN_UID (insn)].is_live_in = 1;
33867
33868 while (link)
33869 {
33870 if (DF_REF_IS_ARTIFICIAL (link->ref))
33871 insn_entry[INSN_UID (insn)].is_live_in = 1;
33872
33873 if (DF_REF_INSN_INFO (link->ref))
33874 {
33875 rtx def_insn = DF_REF_INSN (link->ref);
33876 (void)unionfind_union (insn_entry + INSN_UID (insn),
33877 insn_entry + INSN_UID (def_insn));
33878 }
33879
33880 link = link->next;
33881 }
33882 }
33883
33884 /* Union INSN with all insns containing uses reached from DEF.
33885 Detect whether DEF is live-out from the current function. */
33886 static void
33887 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
33888 {
33889 struct df_link *link = DF_REF_CHAIN (def);
33890
33891 if (!link)
33892 insn_entry[INSN_UID (insn)].is_live_out = 1;
33893
33894 while (link)
33895 {
33896 /* This could be an eh use or some other artificial use;
33897 we treat these all the same (killing the optimization). */
33898 if (DF_REF_IS_ARTIFICIAL (link->ref))
33899 insn_entry[INSN_UID (insn)].is_live_out = 1;
33900
33901 if (DF_REF_INSN_INFO (link->ref))
33902 {
33903 rtx use_insn = DF_REF_INSN (link->ref);
33904 (void)unionfind_union (insn_entry + INSN_UID (insn),
33905 insn_entry + INSN_UID (use_insn));
33906 }
33907
33908 link = link->next;
33909 }
33910 }
33911
33912 /* Return 1 iff INSN is a load insn, including permuting loads that
33913 represent an lvxd2x instruction; else return 0. */
33914 static unsigned int
33915 insn_is_load_p (rtx insn)
33916 {
33917 rtx body = PATTERN (insn);
33918
33919 if (GET_CODE (body) == SET)
33920 {
33921 if (GET_CODE (SET_SRC (body)) == MEM)
33922 return 1;
33923
33924 if (GET_CODE (SET_SRC (body)) == VEC_SELECT
33925 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
33926 return 1;
33927
33928 return 0;
33929 }
33930
33931 if (GET_CODE (body) != PARALLEL)
33932 return 0;
33933
33934 rtx set = XVECEXP (body, 0, 0);
33935
33936 if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
33937 return 1;
33938
33939 return 0;
33940 }
33941
33942 /* Return 1 iff INSN is a store insn, including permuting stores that
33943 represent an stvxd2x instruction; else return 0. */
33944 static unsigned int
33945 insn_is_store_p (rtx insn)
33946 {
33947 rtx body = PATTERN (insn);
33948 if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
33949 return 1;
33950 if (GET_CODE (body) != PARALLEL)
33951 return 0;
33952 rtx set = XVECEXP (body, 0, 0);
33953 if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
33954 return 1;
33955 return 0;
33956 }
33957
33958 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
33959 a permuting load, or a permuting store. */
33960 static unsigned int
33961 insn_is_swap_p (rtx insn)
33962 {
33963 rtx body = PATTERN (insn);
33964 if (GET_CODE (body) != SET)
33965 return 0;
33966 rtx rhs = SET_SRC (body);
33967 if (GET_CODE (rhs) != VEC_SELECT)
33968 return 0;
33969 rtx parallel = XEXP (rhs, 1);
33970 if (GET_CODE (parallel) != PARALLEL)
33971 return 0;
33972 unsigned int len = XVECLEN (parallel, 0);
33973 if (len != 2 && len != 4 && len != 8 && len != 16)
33974 return 0;
33975 for (unsigned int i = 0; i < len / 2; ++i)
33976 {
33977 rtx op = XVECEXP (parallel, 0, i);
33978 if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
33979 return 0;
33980 }
33981 for (unsigned int i = len / 2; i < len; ++i)
33982 {
33983 rtx op = XVECEXP (parallel, 0, i);
33984 if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
33985 return 0;
33986 }
33987 return 1;
33988 }
33989
33990 /* Return 1 iff OP is an operand that will not be affected by having
33991 vector doublewords swapped in memory. */
33992 static unsigned int
33993 rtx_is_swappable_p (rtx op, unsigned int *special)
33994 {
33995 enum rtx_code code = GET_CODE (op);
33996 int i, j;
33997 rtx parallel;
33998
33999 switch (code)
34000 {
34001 case LABEL_REF:
34002 case SYMBOL_REF:
34003 case CLOBBER:
34004 case REG:
34005 return 1;
34006
34007 case VEC_CONCAT:
34008 case ASM_INPUT:
34009 case ASM_OPERANDS:
34010 return 0;
34011
34012 case CONST_VECTOR:
34013 {
34014 *special = SH_CONST_VECTOR;
34015 return 1;
34016 }
34017
34018 case VEC_DUPLICATE:
34019 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
34020 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
34021 it represents a vector splat for which we can do special
34022 handling. */
34023 if (GET_CODE (XEXP (op, 0)) == CONST_INT)
34024 return 1;
34025 else if (GET_CODE (XEXP (op, 0)) == REG
34026 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
34027 /* This catches V2DF and V2DI splat, at a minimum. */
34028 return 1;
34029 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
34030 /* If the duplicated item is from a select, defer to the select
34031 processing to see if we can change the lane for the splat. */
34032 return rtx_is_swappable_p (XEXP (op, 0), special);
34033 else
34034 return 0;
34035
34036 case VEC_SELECT:
34037 /* A vec_extract operation is ok if we change the lane. */
34038 if (GET_CODE (XEXP (op, 0)) == REG
34039 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
34040 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
34041 && XVECLEN (parallel, 0) == 1
34042 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
34043 {
34044 *special = SH_EXTRACT;
34045 return 1;
34046 }
34047 else
34048 return 0;
34049
34050 case UNSPEC:
34051 {
34052 /* Various operations are unsafe for this optimization, at least
34053 without significant additional work. Permutes are obviously
34054 problematic, as both the permute control vector and the ordering
34055 of the target values are invalidated by doubleword swapping.
34056 Vector pack and unpack modify the number of vector lanes.
34057 Merge-high/low will not operate correctly on swapped operands.
34058 Vector shifts across element boundaries are clearly uncool,
34059 as are vector select and concatenate operations. Vector
34060 sum-across instructions define one operand with a specific
34061 order-dependent element, so additional fixup code would be
34062 needed to make those work. Vector set and non-immediate-form
34063 vector splat are element-order sensitive. A few of these
34064 cases might be workable with special handling if required. */
34065 int val = XINT (op, 1);
34066 switch (val)
34067 {
34068 default:
34069 break;
34070 case UNSPEC_VMRGH_DIRECT:
34071 case UNSPEC_VMRGL_DIRECT:
34072 case UNSPEC_VPACK_SIGN_SIGN_SAT:
34073 case UNSPEC_VPACK_SIGN_UNS_SAT:
34074 case UNSPEC_VPACK_UNS_UNS_MOD:
34075 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
34076 case UNSPEC_VPACK_UNS_UNS_SAT:
34077 case UNSPEC_VPERM:
34078 case UNSPEC_VPERM_UNS:
34079 case UNSPEC_VPERMHI:
34080 case UNSPEC_VPERMSI:
34081 case UNSPEC_VPKPX:
34082 case UNSPEC_VSLDOI:
34083 case UNSPEC_VSLO:
34084 case UNSPEC_VSRO:
34085 case UNSPEC_VSUM2SWS:
34086 case UNSPEC_VSUM4S:
34087 case UNSPEC_VSUM4UBS:
34088 case UNSPEC_VSUMSWS:
34089 case UNSPEC_VSUMSWS_DIRECT:
34090 case UNSPEC_VSX_CONCAT:
34091 case UNSPEC_VSX_SET:
34092 case UNSPEC_VSX_SLDWI:
34093 case UNSPEC_VUNPACK_HI_SIGN:
34094 case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
34095 case UNSPEC_VUNPACK_LO_SIGN:
34096 case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
34097 case UNSPEC_VUPKHPX:
34098 case UNSPEC_VUPKHS_V4SF:
34099 case UNSPEC_VUPKHU_V4SF:
34100 case UNSPEC_VUPKLPX:
34101 case UNSPEC_VUPKLS_V4SF:
34102 case UNSPEC_VUPKLU_V4SF:
34103 /* The following could be handled as an idiom with XXSPLTW.
34104 These place a scalar in BE element zero, but the XXSPLTW
34105 will currently expect it in BE element 2 in a swapped
34106 region. When one of these feeds an XXSPLTW with no other
34107 defs/uses either way, we can avoid the lane change for
34108 XXSPLTW and things will be correct. TBD. */
34109 case UNSPEC_VSX_CVDPSPN:
34110 case UNSPEC_VSX_CVSPDP:
34111 case UNSPEC_VSX_CVSPDPN:
34112 return 0;
34113 case UNSPEC_VSPLT_DIRECT:
34114 *special = SH_SPLAT;
34115 return 1;
34116 }
34117 }
34118
34119 default:
34120 break;
34121 }
34122
34123 const char *fmt = GET_RTX_FORMAT (code);
34124 int ok = 1;
34125
34126 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
34127 if (fmt[i] == 'e' || fmt[i] == 'u')
34128 {
34129 unsigned int special_op = SH_NONE;
34130 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
34131 /* Ensure we never have two kinds of special handling
34132 for the same insn. */
34133 if (*special != SH_NONE && special_op != SH_NONE
34134 && *special != special_op)
34135 return 0;
34136 *special = special_op;
34137 }
34138 else if (fmt[i] == 'E')
34139 for (j = 0; j < XVECLEN (op, i); ++j)
34140 {
34141 unsigned int special_op = SH_NONE;
34142 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
34143 /* Ensure we never have two kinds of special handling
34144 for the same insn. */
34145 if (*special != SH_NONE && special_op != SH_NONE
34146 && *special != special_op)
34147 return 0;
34148 *special = special_op;
34149 }
34150
34151 return ok;
34152 }
34153
34154 /* Return 1 iff INSN is an operand that will not be affected by
34155 having vector doublewords swapped in memory (in which case
34156 *SPECIAL is unchanged), or that can be modified to be correct
34157 if vector doublewords are swapped in memory (in which case
34158 *SPECIAL is changed to a value indicating how). */
34159 static unsigned int
34160 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
34161 unsigned int *special)
34162 {
34163 /* Calls are always bad. */
34164 if (GET_CODE (insn) == CALL_INSN)
34165 return 0;
34166
34167 /* Loads and stores seen here are not permuting, but we can still
34168 fix them up by converting them to permuting ones. Exceptions:
34169 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
34170 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
34171 for the SET source. */
34172 rtx body = PATTERN (insn);
34173 int i = INSN_UID (insn);
34174
34175 if (insn_entry[i].is_load)
34176 {
34177 if (GET_CODE (body) == SET)
34178 {
34179 *special = SH_NOSWAP_LD;
34180 return 1;
34181 }
34182 else
34183 return 0;
34184 }
34185
34186 if (insn_entry[i].is_store)
34187 {
34188 if (GET_CODE (body) == SET && GET_CODE (SET_SRC (body)) != UNSPEC)
34189 {
34190 *special = SH_NOSWAP_ST;
34191 return 1;
34192 }
34193 else
34194 return 0;
34195 }
34196
34197 /* Otherwise check the operands for vector lane violations. */
34198 return rtx_is_swappable_p (body, special);
34199 }
34200
34201 enum chain_purpose { FOR_LOADS, FOR_STORES };
34202
34203 /* Return true if the UD or DU chain headed by LINK is non-empty,
34204 and every entry on the chain references an insn that is a
34205 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
34206 register swap must have only permuting loads as reaching defs.
34207 If PURPOSE is FOR_STORES, each such register swap must have only
34208 register swaps or permuting stores as reached uses. */
34209 static bool
34210 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
34211 enum chain_purpose purpose)
34212 {
34213 if (!link)
34214 return false;
34215
34216 for (; link; link = link->next)
34217 {
34218 if (!VECTOR_MODE_P (GET_MODE (DF_REF_REG (link->ref))))
34219 continue;
34220
34221 if (DF_REF_IS_ARTIFICIAL (link->ref))
34222 return false;
34223
34224 rtx reached_insn = DF_REF_INSN (link->ref);
34225 unsigned uid = INSN_UID (reached_insn);
34226 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
34227
34228 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
34229 || insn_entry[uid].is_store)
34230 return false;
34231
34232 if (purpose == FOR_LOADS)
34233 {
34234 df_ref use;
34235 FOR_EACH_INSN_INFO_USE (use, insn_info)
34236 {
34237 struct df_link *swap_link = DF_REF_CHAIN (use);
34238
34239 while (swap_link)
34240 {
34241 if (DF_REF_IS_ARTIFICIAL (link->ref))
34242 return false;
34243
34244 rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
34245 unsigned uid2 = INSN_UID (swap_def_insn);
34246
34247 /* Only permuting loads are allowed. */
34248 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
34249 return false;
34250
34251 swap_link = swap_link->next;
34252 }
34253 }
34254 }
34255 else if (purpose == FOR_STORES)
34256 {
34257 df_ref def;
34258 FOR_EACH_INSN_INFO_DEF (def, insn_info)
34259 {
34260 struct df_link *swap_link = DF_REF_CHAIN (def);
34261
34262 while (swap_link)
34263 {
34264 if (DF_REF_IS_ARTIFICIAL (link->ref))
34265 return false;
34266
34267 rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
34268 unsigned uid2 = INSN_UID (swap_use_insn);
34269
34270 /* Permuting stores or register swaps are allowed. */
34271 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
34272 return false;
34273
34274 swap_link = swap_link->next;
34275 }
34276 }
34277 }
34278 }
34279
34280 return true;
34281 }
34282
34283 /* Mark the xxswapdi instructions associated with permuting loads and
34284 stores for removal. Note that we only flag them for deletion here,
34285 as there is a possibility of a swap being reached from multiple
34286 loads, etc. */
34287 static void
34288 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
34289 {
34290 rtx insn = insn_entry[i].insn;
34291 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34292
34293 if (insn_entry[i].is_load)
34294 {
34295 df_ref def;
34296 FOR_EACH_INSN_INFO_DEF (def, insn_info)
34297 {
34298 struct df_link *link = DF_REF_CHAIN (def);
34299
34300 /* We know by now that these are swaps, so we can delete
34301 them confidently. */
34302 while (link)
34303 {
34304 rtx use_insn = DF_REF_INSN (link->ref);
34305 insn_entry[INSN_UID (use_insn)].will_delete = 1;
34306 link = link->next;
34307 }
34308 }
34309 }
34310 else if (insn_entry[i].is_store)
34311 {
34312 df_ref use;
34313 FOR_EACH_INSN_INFO_USE (use, insn_info)
34314 {
34315 /* Ignore uses for addressability. */
34316 machine_mode mode = GET_MODE (DF_REF_REG (use));
34317 if (!VECTOR_MODE_P (mode))
34318 continue;
34319
34320 struct df_link *link = DF_REF_CHAIN (use);
34321
34322 /* We know by now that these are swaps, so we can delete
34323 them confidently. */
34324 while (link)
34325 {
34326 rtx def_insn = DF_REF_INSN (link->ref);
34327 insn_entry[INSN_UID (def_insn)].will_delete = 1;
34328 link = link->next;
34329 }
34330 }
34331 }
34332 }
34333
34334 /* OP is either a CONST_VECTOR or an expression containing one.
34335 Swap the first half of the vector with the second in the first
34336 case. Recurse to find it in the second. */
34337 static void
34338 swap_const_vector_halves (rtx op)
34339 {
34340 int i;
34341 enum rtx_code code = GET_CODE (op);
34342 if (GET_CODE (op) == CONST_VECTOR)
34343 {
34344 int half_units = GET_MODE_NUNITS (GET_MODE (op)) / 2;
34345 for (i = 0; i < half_units; ++i)
34346 {
34347 rtx temp = CONST_VECTOR_ELT (op, i);
34348 CONST_VECTOR_ELT (op, i) = CONST_VECTOR_ELT (op, i + half_units);
34349 CONST_VECTOR_ELT (op, i + half_units) = temp;
34350 }
34351 }
34352 else
34353 {
34354 int j;
34355 const char *fmt = GET_RTX_FORMAT (code);
34356 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
34357 if (fmt[i] == 'e' || fmt[i] == 'u')
34358 swap_const_vector_halves (XEXP (op, i));
34359 else if (fmt[i] == 'E')
34360 for (j = 0; j < XVECLEN (op, i); ++j)
34361 swap_const_vector_halves (XVECEXP (op, i, j));
34362 }
34363 }
34364
34365 /* Find all subregs of a vector expression that perform a narrowing,
34366 and adjust the subreg index to account for doubleword swapping. */
34367 static void
34368 adjust_subreg_index (rtx op)
34369 {
34370 enum rtx_code code = GET_CODE (op);
34371 if (code == SUBREG
34372 && (GET_MODE_SIZE (GET_MODE (op))
34373 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
34374 {
34375 unsigned int index = SUBREG_BYTE (op);
34376 if (index < 8)
34377 index += 8;
34378 else
34379 index -= 8;
34380 SUBREG_BYTE (op) = index;
34381 }
34382
34383 const char *fmt = GET_RTX_FORMAT (code);
34384 int i,j;
34385 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
34386 if (fmt[i] == 'e' || fmt[i] == 'u')
34387 adjust_subreg_index (XEXP (op, i));
34388 else if (fmt[i] == 'E')
34389 for (j = 0; j < XVECLEN (op, i); ++j)
34390 adjust_subreg_index (XVECEXP (op, i, j));
34391 }
34392
34393 /* Convert the non-permuting load INSN to a permuting one. */
34394 static void
34395 permute_load (rtx_insn *insn)
34396 {
34397 rtx body = PATTERN (insn);
34398 rtx mem_op = SET_SRC (body);
34399 rtx tgt_reg = SET_DEST (body);
34400 machine_mode mode = GET_MODE (tgt_reg);
34401 int n_elts = GET_MODE_NUNITS (mode);
34402 int half_elts = n_elts / 2;
34403 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
34404 int i, j;
34405 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
34406 XVECEXP (par, 0, i) = GEN_INT (j);
34407 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
34408 XVECEXP (par, 0, i) = GEN_INT (j);
34409 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
34410 SET_SRC (body) = sel;
34411 INSN_CODE (insn) = -1; /* Force re-recognition. */
34412 df_insn_rescan (insn);
34413
34414 if (dump_file)
34415 fprintf (dump_file, "Replacing load %d with permuted load\n",
34416 INSN_UID (insn));
34417 }
34418
34419 /* Convert the non-permuting store INSN to a permuting one. */
34420 static void
34421 permute_store (rtx_insn *insn)
34422 {
34423 rtx body = PATTERN (insn);
34424 rtx src_reg = SET_SRC (body);
34425 machine_mode mode = GET_MODE (src_reg);
34426 int n_elts = GET_MODE_NUNITS (mode);
34427 int half_elts = n_elts / 2;
34428 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
34429 int i, j;
34430 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
34431 XVECEXP (par, 0, i) = GEN_INT (j);
34432 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
34433 XVECEXP (par, 0, i) = GEN_INT (j);
34434 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
34435 SET_SRC (body) = sel;
34436 INSN_CODE (insn) = -1; /* Force re-recognition. */
34437 df_insn_rescan (insn);
34438
34439 if (dump_file)
34440 fprintf (dump_file, "Replacing store %d with permuted store\n",
34441 INSN_UID (insn));
34442 }
34443
34444 /* Given OP that contains a vector extract operation, adjust the index
34445 of the extracted lane to account for the doubleword swap. */
34446 static void
34447 adjust_extract (rtx_insn *insn)
34448 {
34449 rtx src = SET_SRC (PATTERN (insn));
34450 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
34451 account for that. */
34452 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
34453 rtx par = XEXP (sel, 1);
34454 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
34455 int lane = INTVAL (XVECEXP (par, 0, 0));
34456 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
34457 XVECEXP (par, 0, 0) = GEN_INT (lane);
34458 INSN_CODE (insn) = -1; /* Force re-recognition. */
34459 df_insn_rescan (insn);
34460
34461 if (dump_file)
34462 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
34463 }
34464
34465 /* Given OP that contains a vector direct-splat operation, adjust the index
34466 of the source lane to account for the doubleword swap. */
34467 static void
34468 adjust_splat (rtx_insn *insn)
34469 {
34470 rtx body = PATTERN (insn);
34471 rtx unspec = XEXP (body, 1);
34472 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
34473 int lane = INTVAL (XVECEXP (unspec, 0, 1));
34474 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
34475 XVECEXP (unspec, 0, 1) = GEN_INT (lane);
34476 INSN_CODE (insn) = -1; /* Force re-recognition. */
34477 df_insn_rescan (insn);
34478
34479 if (dump_file)
34480 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
34481 }
34482
34483 /* The insn described by INSN_ENTRY[I] can be swapped, but only
34484 with special handling. Take care of that here. */
34485 static void
34486 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
34487 {
34488 rtx_insn *insn = insn_entry[i].insn;
34489 rtx body = PATTERN (insn);
34490
34491 switch (insn_entry[i].special_handling)
34492 {
34493 default:
34494 gcc_unreachable ();
34495 case SH_CONST_VECTOR:
34496 {
34497 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
34498 gcc_assert (GET_CODE (body) == SET);
34499 rtx rhs = SET_SRC (body);
34500 swap_const_vector_halves (rhs);
34501 if (dump_file)
34502 fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
34503 break;
34504 }
34505 case SH_SUBREG:
34506 /* A subreg of the same size is already safe. For subregs that
34507 select a smaller portion of a reg, adjust the index for
34508 swapped doublewords. */
34509 adjust_subreg_index (body);
34510 if (dump_file)
34511 fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
34512 break;
34513 case SH_NOSWAP_LD:
34514 /* Convert a non-permuting load to a permuting one. */
34515 permute_load (insn);
34516 break;
34517 case SH_NOSWAP_ST:
34518 /* Convert a non-permuting store to a permuting one. */
34519 permute_store (insn);
34520 break;
34521 case SH_EXTRACT:
34522 /* Change the lane on an extract operation. */
34523 adjust_extract (insn);
34524 break;
34525 case SH_SPLAT:
34526 /* Change the lane on a direct-splat operation. */
34527 adjust_splat (insn);
34528 break;
34529 }
34530 }
34531
34532 /* Find the insn from the Ith table entry, which is known to be a
34533 register swap Y = SWAP(X). Replace it with a copy Y = X. */
34534 static void
34535 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
34536 {
34537 rtx_insn *insn = insn_entry[i].insn;
34538 rtx body = PATTERN (insn);
34539 rtx src_reg = XEXP (SET_SRC (body), 0);
34540 rtx copy = gen_rtx_SET (VOIDmode, SET_DEST (body), src_reg);
34541 rtx_insn *new_insn = emit_insn_before (copy, insn);
34542 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
34543 df_insn_rescan (new_insn);
34544
34545 if (dump_file)
34546 {
34547 unsigned int new_uid = INSN_UID (new_insn);
34548 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
34549 }
34550
34551 df_insn_delete (insn);
34552 remove_insn (insn);
34553 insn->set_deleted ();
34554 }
34555
34556 /* Dump the swap table to DUMP_FILE. */
34557 static void
34558 dump_swap_insn_table (swap_web_entry *insn_entry)
34559 {
34560 int e = get_max_uid ();
34561 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
34562
34563 for (int i = 0; i < e; ++i)
34564 if (insn_entry[i].is_relevant)
34565 {
34566 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
34567 fprintf (dump_file, "%6d %6d ", i,
34568 pred_entry && pred_entry->insn
34569 ? INSN_UID (pred_entry->insn) : 0);
34570 if (insn_entry[i].is_load)
34571 fputs ("load ", dump_file);
34572 if (insn_entry[i].is_store)
34573 fputs ("store ", dump_file);
34574 if (insn_entry[i].is_swap)
34575 fputs ("swap ", dump_file);
34576 if (insn_entry[i].is_live_in)
34577 fputs ("live-in ", dump_file);
34578 if (insn_entry[i].is_live_out)
34579 fputs ("live-out ", dump_file);
34580 if (insn_entry[i].contains_subreg)
34581 fputs ("subreg ", dump_file);
34582 if (insn_entry[i].is_128_int)
34583 fputs ("int128 ", dump_file);
34584 if (insn_entry[i].is_call)
34585 fputs ("call ", dump_file);
34586 if (insn_entry[i].is_swappable)
34587 {
34588 fputs ("swappable ", dump_file);
34589 if (insn_entry[i].special_handling == SH_CONST_VECTOR)
34590 fputs ("special:constvec ", dump_file);
34591 else if (insn_entry[i].special_handling == SH_SUBREG)
34592 fputs ("special:subreg ", dump_file);
34593 else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
34594 fputs ("special:load ", dump_file);
34595 else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
34596 fputs ("special:store ", dump_file);
34597 else if (insn_entry[i].special_handling == SH_EXTRACT)
34598 fputs ("special:extract ", dump_file);
34599 else if (insn_entry[i].special_handling == SH_SPLAT)
34600 fputs ("special:splat ", dump_file);
34601 }
34602 if (insn_entry[i].web_not_optimizable)
34603 fputs ("unoptimizable ", dump_file);
34604 if (insn_entry[i].will_delete)
34605 fputs ("delete ", dump_file);
34606 fputs ("\n", dump_file);
34607 }
34608 fputs ("\n", dump_file);
34609 }
34610
34611 /* Main entry point for this pass. */
34612 unsigned int
34613 rs6000_analyze_swaps (function *fun)
34614 {
34615 swap_web_entry *insn_entry;
34616 basic_block bb;
34617 rtx_insn *insn;
34618
34619 /* Dataflow analysis for use-def chains. */
34620 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
34621 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
34622 df_analyze ();
34623 df_set_flags (DF_DEFER_INSN_RESCAN);
34624
34625 /* Allocate structure to represent webs of insns. */
34626 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
34627
34628 /* Walk the insns to gather basic data. */
34629 FOR_ALL_BB_FN (bb, fun)
34630 FOR_BB_INSNS (bb, insn)
34631 {
34632 unsigned int uid = INSN_UID (insn);
34633 if (NONDEBUG_INSN_P (insn))
34634 {
34635 insn_entry[uid].insn = insn;
34636
34637 if (GET_CODE (insn) == CALL_INSN)
34638 insn_entry[uid].is_call = 1;
34639
34640 /* Walk the uses and defs to see if we mention vector regs.
34641 Record any constraints on optimization of such mentions. */
34642 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34643 df_ref mention;
34644 FOR_EACH_INSN_INFO_USE (mention, insn_info)
34645 {
34646 /* We use DF_REF_REAL_REG here to get inside any subregs. */
34647 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
34648
34649 /* If a use gets its value from a call insn, it will be
34650 a hard register and will look like (reg:V4SI 3 3).
34651 The df analysis creates two mentions for GPR3 and GPR4,
34652 both DImode. We must recognize this and treat it as a
34653 vector mention to ensure the call is unioned with this
34654 use. */
34655 if (mode == DImode && DF_REF_INSN_INFO (mention))
34656 {
34657 rtx feeder = DF_REF_INSN (mention);
34658 /* FIXME: It is pretty hard to get from the df mention
34659 to the mode of the use in the insn. We arbitrarily
34660 pick a vector mode here, even though the use might
34661 be a real DImode. We can be too conservative
34662 (create a web larger than necessary) because of
34663 this, so consider eventually fixing this. */
34664 if (GET_CODE (feeder) == CALL_INSN)
34665 mode = V4SImode;
34666 }
34667
34668 if (VECTOR_MODE_P (mode))
34669 {
34670 insn_entry[uid].is_relevant = 1;
34671 if (mode == TImode || mode == V1TImode)
34672 insn_entry[uid].is_128_int = 1;
34673 if (DF_REF_INSN_INFO (mention))
34674 insn_entry[uid].contains_subreg
34675 = !rtx_equal_p (DF_REF_REG (mention),
34676 DF_REF_REAL_REG (mention));
34677 union_defs (insn_entry, insn, mention);
34678 }
34679 }
34680 FOR_EACH_INSN_INFO_DEF (mention, insn_info)
34681 {
34682 /* We use DF_REF_REAL_REG here to get inside any subregs. */
34683 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
34684
34685 /* If we're loading up a hard vector register for a call,
34686 it looks like (set (reg:V4SI 9 9) (...)). The df
34687 analysis creates two mentions for GPR9 and GPR10, both
34688 DImode. So relying on the mode from the mentions
34689 isn't sufficient to ensure we union the call into the
34690 web with the parameter setup code. */
34691 if (mode == DImode && GET_CODE (insn) == SET
34692 && VECTOR_MODE_P (GET_MODE (SET_DEST (insn))))
34693 mode = GET_MODE (SET_DEST (insn));
34694
34695 if (VECTOR_MODE_P (mode))
34696 {
34697 insn_entry[uid].is_relevant = 1;
34698 if (mode == TImode || mode == V1TImode)
34699 insn_entry[uid].is_128_int = 1;
34700 if (DF_REF_INSN_INFO (mention))
34701 insn_entry[uid].contains_subreg
34702 = !rtx_equal_p (DF_REF_REG (mention),
34703 DF_REF_REAL_REG (mention));
34704 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
34705 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
34706 insn_entry[uid].is_live_out = 1;
34707 union_uses (insn_entry, insn, mention);
34708 }
34709 }
34710
34711 if (insn_entry[uid].is_relevant)
34712 {
34713 /* Determine if this is a load or store. */
34714 insn_entry[uid].is_load = insn_is_load_p (insn);
34715 insn_entry[uid].is_store = insn_is_store_p (insn);
34716
34717 /* Determine if this is a doubleword swap. If not,
34718 determine whether it can legally be swapped. */
34719 if (insn_is_swap_p (insn))
34720 insn_entry[uid].is_swap = 1;
34721 else
34722 {
34723 unsigned int special = SH_NONE;
34724 insn_entry[uid].is_swappable
34725 = insn_is_swappable_p (insn_entry, insn, &special);
34726 if (special != SH_NONE && insn_entry[uid].contains_subreg)
34727 insn_entry[uid].is_swappable = 0;
34728 else if (special != SH_NONE)
34729 insn_entry[uid].special_handling = special;
34730 else if (insn_entry[uid].contains_subreg)
34731 insn_entry[uid].special_handling = SH_SUBREG;
34732 }
34733 }
34734 }
34735 }
34736
34737 if (dump_file)
34738 {
34739 fprintf (dump_file, "\nSwap insn entry table when first built\n");
34740 dump_swap_insn_table (insn_entry);
34741 }
34742
34743 /* Record unoptimizable webs. */
34744 unsigned e = get_max_uid (), i;
34745 for (i = 0; i < e; ++i)
34746 {
34747 if (!insn_entry[i].is_relevant)
34748 continue;
34749
34750 swap_web_entry *root
34751 = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
34752
34753 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
34754 || (insn_entry[i].contains_subreg
34755 && insn_entry[i].special_handling != SH_SUBREG)
34756 || insn_entry[i].is_128_int || insn_entry[i].is_call
34757 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
34758 root->web_not_optimizable = 1;
34759
34760 /* If we have loads or stores that aren't permuting then the
34761 optimization isn't appropriate. */
34762 else if ((insn_entry[i].is_load || insn_entry[i].is_store)
34763 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
34764 root->web_not_optimizable = 1;
34765
34766 /* If we have permuting loads or stores that are not accompanied
34767 by a register swap, the optimization isn't appropriate. */
34768 else if (insn_entry[i].is_load && insn_entry[i].is_swap)
34769 {
34770 rtx insn = insn_entry[i].insn;
34771 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34772 df_ref def;
34773
34774 FOR_EACH_INSN_INFO_DEF (def, insn_info)
34775 {
34776 struct df_link *link = DF_REF_CHAIN (def);
34777
34778 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
34779 {
34780 root->web_not_optimizable = 1;
34781 break;
34782 }
34783 }
34784 }
34785 else if (insn_entry[i].is_store && insn_entry[i].is_swap)
34786 {
34787 rtx insn = insn_entry[i].insn;
34788 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34789 df_ref use;
34790
34791 FOR_EACH_INSN_INFO_USE (use, insn_info)
34792 {
34793 struct df_link *link = DF_REF_CHAIN (use);
34794
34795 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
34796 {
34797 root->web_not_optimizable = 1;
34798 break;
34799 }
34800 }
34801 }
34802 }
34803
34804 if (dump_file)
34805 {
34806 fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
34807 dump_swap_insn_table (insn_entry);
34808 }
34809
34810 /* For each load and store in an optimizable web (which implies
34811 the loads and stores are permuting), find the associated
34812 register swaps and mark them for removal. Due to various
34813 optimizations we may mark the same swap more than once. Also
34814 perform special handling for swappable insns that require it. */
34815 for (i = 0; i < e; ++i)
34816 if ((insn_entry[i].is_load || insn_entry[i].is_store)
34817 && insn_entry[i].is_swap)
34818 {
34819 swap_web_entry* root_entry
34820 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
34821 if (!root_entry->web_not_optimizable)
34822 mark_swaps_for_removal (insn_entry, i);
34823 }
34824 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
34825 {
34826 swap_web_entry* root_entry
34827 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
34828 if (!root_entry->web_not_optimizable)
34829 handle_special_swappables (insn_entry, i);
34830 }
34831
34832 /* Now delete the swaps marked for removal. */
34833 for (i = 0; i < e; ++i)
34834 if (insn_entry[i].will_delete)
34835 replace_swap_with_copy (insn_entry, i);
34836
34837 /* Clean up. */
34838 free (insn_entry);
34839 return 0;
34840 }
34841
34842 const pass_data pass_data_analyze_swaps =
34843 {
34844 RTL_PASS, /* type */
34845 "swaps", /* name */
34846 OPTGROUP_NONE, /* optinfo_flags */
34847 TV_NONE, /* tv_id */
34848 0, /* properties_required */
34849 0, /* properties_provided */
34850 0, /* properties_destroyed */
34851 0, /* todo_flags_start */
34852 TODO_df_finish, /* todo_flags_finish */
34853 };
34854
34855 class pass_analyze_swaps : public rtl_opt_pass
34856 {
34857 public:
34858 pass_analyze_swaps(gcc::context *ctxt)
34859 : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
34860 {}
34861
34862 /* opt_pass methods: */
34863 virtual bool gate (function *)
34864 {
34865 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
34866 && rs6000_optimize_swaps);
34867 }
34868
34869 virtual unsigned int execute (function *fun)
34870 {
34871 return rs6000_analyze_swaps (fun);
34872 }
34873
34874 }; // class pass_analyze_swaps
34875
34876 rtl_opt_pass *
34877 make_pass_analyze_swaps (gcc::context *ctxt)
34878 {
34879 return new pass_analyze_swaps (ctxt);
34880 }
34881
34882 #ifdef RS6000_GLIBC_ATOMIC_FENV
34883 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
34884 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
34885 #endif
34886
34887 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
34888
34889 static void
34890 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
34891 {
34892 if (!TARGET_HARD_FLOAT || !TARGET_FPRS)
34893 {
34894 #ifdef RS6000_GLIBC_ATOMIC_FENV
34895 if (atomic_hold_decl == NULL_TREE)
34896 {
34897 atomic_hold_decl
34898 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
34899 get_identifier ("__atomic_feholdexcept"),
34900 build_function_type_list (void_type_node,
34901 double_ptr_type_node,
34902 NULL_TREE));
34903 TREE_PUBLIC (atomic_hold_decl) = 1;
34904 DECL_EXTERNAL (atomic_hold_decl) = 1;
34905 }
34906
34907 if (atomic_clear_decl == NULL_TREE)
34908 {
34909 atomic_clear_decl
34910 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
34911 get_identifier ("__atomic_feclearexcept"),
34912 build_function_type_list (void_type_node,
34913 NULL_TREE));
34914 TREE_PUBLIC (atomic_clear_decl) = 1;
34915 DECL_EXTERNAL (atomic_clear_decl) = 1;
34916 }
34917
34918 tree const_double = build_qualified_type (double_type_node,
34919 TYPE_QUAL_CONST);
34920 tree const_double_ptr = build_pointer_type (const_double);
34921 if (atomic_update_decl == NULL_TREE)
34922 {
34923 atomic_update_decl
34924 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
34925 get_identifier ("__atomic_feupdateenv"),
34926 build_function_type_list (void_type_node,
34927 const_double_ptr,
34928 NULL_TREE));
34929 TREE_PUBLIC (atomic_update_decl) = 1;
34930 DECL_EXTERNAL (atomic_update_decl) = 1;
34931 }
34932
34933 tree fenv_var = create_tmp_var (double_type_node);
34934 mark_addressable (fenv_var);
34935 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
34936
34937 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
34938 *clear = build_call_expr (atomic_clear_decl, 0);
34939 *update = build_call_expr (atomic_update_decl, 1,
34940 fold_convert (const_double_ptr, fenv_addr));
34941 #endif
34942 return;
34943 }
34944
34945 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
34946 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
34947 tree call_mffs = build_call_expr (mffs, 0);
34948
34949 /* Generates the equivalent of feholdexcept (&fenv_var)
34950
34951 *fenv_var = __builtin_mffs ();
34952 double fenv_hold;
34953 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
34954 __builtin_mtfsf (0xff, fenv_hold); */
34955
34956 /* Mask to clear everything except for the rounding modes and non-IEEE
34957 arithmetic flag. */
34958 const unsigned HOST_WIDE_INT hold_exception_mask =
34959 HOST_WIDE_INT_C (0xffffffff00000007);
34960
34961 tree fenv_var = create_tmp_var (double_type_node);
34962
34963 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
34964
34965 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
34966 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
34967 build_int_cst (uint64_type_node,
34968 hold_exception_mask));
34969
34970 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
34971 fenv_llu_and);
34972
34973 tree hold_mtfsf = build_call_expr (mtfsf, 2,
34974 build_int_cst (unsigned_type_node, 0xff),
34975 fenv_hold_mtfsf);
34976
34977 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
34978
34979 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
34980
34981 double fenv_clear = __builtin_mffs ();
34982 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
34983 __builtin_mtfsf (0xff, fenv_clear); */
34984
34985 /* Mask to clear everything except for the rounding modes and non-IEEE
34986 arithmetic flag. */
34987 const unsigned HOST_WIDE_INT clear_exception_mask =
34988 HOST_WIDE_INT_C (0xffffffff00000000);
34989
34990 tree fenv_clear = create_tmp_var (double_type_node);
34991
34992 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
34993
34994 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
34995 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
34996 fenv_clean_llu,
34997 build_int_cst (uint64_type_node,
34998 clear_exception_mask));
34999
35000 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
35001 fenv_clear_llu_and);
35002
35003 tree clear_mtfsf = build_call_expr (mtfsf, 2,
35004 build_int_cst (unsigned_type_node, 0xff),
35005 fenv_clear_mtfsf);
35006
35007 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
35008
35009 /* Generates the equivalent of feupdateenv (&fenv_var)
35010
35011 double old_fenv = __builtin_mffs ();
35012 double fenv_update;
35013 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
35014 (*(uint64_t*)fenv_var 0x1ff80fff);
35015 __builtin_mtfsf (0xff, fenv_update); */
35016
35017 const unsigned HOST_WIDE_INT update_exception_mask =
35018 HOST_WIDE_INT_C (0xffffffff1fffff00);
35019 const unsigned HOST_WIDE_INT new_exception_mask =
35020 HOST_WIDE_INT_C (0x1ff80fff);
35021
35022 tree old_fenv = create_tmp_var (double_type_node);
35023 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
35024
35025 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
35026 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
35027 build_int_cst (uint64_type_node,
35028 update_exception_mask));
35029
35030 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
35031 build_int_cst (uint64_type_node,
35032 new_exception_mask));
35033
35034 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
35035 old_llu_and, new_llu_and);
35036
35037 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
35038 new_llu_mask);
35039
35040 tree update_mtfsf = build_call_expr (mtfsf, 2,
35041 build_int_cst (unsigned_type_node, 0xff),
35042 fenv_update_mtfsf);
35043
35044 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
35045 }
35046
35047 \f
35048 struct gcc_target targetm = TARGET_INITIALIZER;
35049
35050 #include "gt-rs6000.h"