]>
Commit | Line | Data |
---|---|---|
01e91138 | 1 | /* Subroutines used for code generation on IBM RS/6000. |
2 | Copyright (C) 1991-2017 Free Software Foundation, Inc. | |
3 | Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu) | |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it | |
8 | under the terms of the GNU General Public License as published | |
9 | by the Free Software Foundation; either version 3, or (at your | |
10 | option) any later version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT | |
13 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
14 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | |
15 | License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING3. If not see | |
19 | <http://www.gnu.org/licenses/>. */ | |
20 | ||
21 | #include "config.h" | |
22 | #include "system.h" | |
23 | #include "coretypes.h" | |
24 | #include "backend.h" | |
25 | #include "rtl.h" | |
26 | #include "tree.h" | |
27 | #include "memmodel.h" | |
28 | #include "gimple.h" | |
29 | #include "cfghooks.h" | |
30 | #include "cfgloop.h" | |
31 | #include "df.h" | |
32 | #include "tm_p.h" | |
33 | #include "stringpool.h" | |
30a86690 | 34 | #include "attribs.h" |
01e91138 | 35 | #include "expmed.h" |
36 | #include "optabs.h" | |
37 | #include "regs.h" | |
38 | #include "ira.h" | |
39 | #include "recog.h" | |
40 | #include "cgraph.h" | |
41 | #include "diagnostic-core.h" | |
42 | #include "insn-attr.h" | |
43 | #include "flags.h" | |
44 | #include "alias.h" | |
45 | #include "fold-const.h" | |
46 | #include "stor-layout.h" | |
47 | #include "calls.h" | |
48 | #include "print-tree.h" | |
49 | #include "varasm.h" | |
50 | #include "explow.h" | |
51 | #include "expr.h" | |
52 | #include "output.h" | |
53 | #include "dbxout.h" | |
54 | #include "common/common-target.h" | |
55 | #include "langhooks.h" | |
56 | #include "reload.h" | |
57 | #include "sched-int.h" | |
58 | #include "gimplify.h" | |
59 | #include "gimple-fold.h" | |
60 | #include "gimple-iterator.h" | |
61 | #include "gimple-ssa.h" | |
62 | #include "gimple-walk.h" | |
63 | #include "intl.h" | |
64 | #include "params.h" | |
65 | #include "tm-constrs.h" | |
66 | #include "tree-vectorizer.h" | |
67 | #include "target-globals.h" | |
68 | #include "builtins.h" | |
69 | #include "context.h" | |
70 | #include "tree-pass.h" | |
71 | #include "except.h" | |
72 | #if TARGET_XCOFF | |
73 | #include "xcoffout.h" /* get declarations of xcoff_*_section_name */ | |
74 | #endif | |
75 | #if TARGET_MACHO | |
76 | #include "gstab.h" /* for N_SLINE */ | |
77 | #endif | |
78 | #include "case-cfn-macros.h" | |
79 | #include "ppc-auxv.h" | |
80 | ||
81 | /* This file should be included last. */ | |
82 | #include "target-def.h" | |
83 | ||
84 | #ifndef TARGET_NO_PROTOTYPE | |
85 | #define TARGET_NO_PROTOTYPE 0 | |
86 | #endif | |
87 | ||
88 | #define min(A,B) ((A) < (B) ? (A) : (B)) | |
89 | #define max(A,B) ((A) > (B) ? (A) : (B)) | |
90 | ||
d7ab0e3d | 91 | static pad_direction rs6000_function_arg_padding (machine_mode, const_tree); |
92 | ||
01e91138 | 93 | /* Structure used to define the rs6000 stack */ |
94 | typedef struct rs6000_stack { | |
95 | int reload_completed; /* stack info won't change from here on */ | |
96 | int first_gp_reg_save; /* first callee saved GP register used */ | |
97 | int first_fp_reg_save; /* first callee saved FP register used */ | |
98 | int first_altivec_reg_save; /* first callee saved AltiVec register used */ | |
99 | int lr_save_p; /* true if the link reg needs to be saved */ | |
100 | int cr_save_p; /* true if the CR reg needs to be saved */ | |
101 | unsigned int vrsave_mask; /* mask of vec registers to save */ | |
102 | int push_p; /* true if we need to allocate stack space */ | |
103 | int calls_p; /* true if the function makes any calls */ | |
104 | int world_save_p; /* true if we're saving *everything*: | |
105 | r13-r31, cr, f14-f31, vrsave, v20-v31 */ | |
106 | enum rs6000_abi abi; /* which ABI to use */ | |
107 | int gp_save_offset; /* offset to save GP regs from initial SP */ | |
108 | int fp_save_offset; /* offset to save FP regs from initial SP */ | |
109 | int altivec_save_offset; /* offset to save AltiVec regs from initial SP */ | |
110 | int lr_save_offset; /* offset to save LR from initial SP */ | |
111 | int cr_save_offset; /* offset to save CR from initial SP */ | |
112 | int vrsave_save_offset; /* offset to save VRSAVE from initial SP */ | |
113 | int spe_gp_save_offset; /* offset to save spe 64-bit gprs */ | |
114 | int varargs_save_offset; /* offset to save the varargs registers */ | |
115 | int ehrd_offset; /* offset to EH return data */ | |
116 | int ehcr_offset; /* offset to EH CR field data */ | |
117 | int reg_size; /* register size (4 or 8) */ | |
118 | HOST_WIDE_INT vars_size; /* variable save area size */ | |
119 | int parm_size; /* outgoing parameter size */ | |
120 | int save_size; /* save area size */ | |
121 | int fixed_size; /* fixed size of stack frame */ | |
122 | int gp_size; /* size of saved GP registers */ | |
123 | int fp_size; /* size of saved FP registers */ | |
124 | int altivec_size; /* size of saved AltiVec registers */ | |
125 | int cr_size; /* size to hold CR if not in fixed area */ | |
126 | int vrsave_size; /* size to hold VRSAVE */ | |
127 | int altivec_padding_size; /* size of altivec alignment padding */ | |
128 | int spe_gp_size; /* size of 64-bit GPR save size for SPE */ | |
129 | int spe_padding_size; | |
130 | HOST_WIDE_INT total_size; /* total bytes allocated for stack */ | |
131 | int spe_64bit_regs_used; | |
132 | int savres_strategy; | |
133 | } rs6000_stack_t; | |
134 | ||
135 | /* A C structure for machine-specific, per-function data. | |
136 | This is added to the cfun structure. */ | |
137 | typedef struct GTY(()) machine_function | |
138 | { | |
139 | /* Whether the instruction chain has been scanned already. */ | |
140 | int spe_insn_chain_scanned_p; | |
141 | /* Flags if __builtin_return_address (n) with n >= 1 was used. */ | |
142 | int ra_needs_full_frame; | |
143 | /* Flags if __builtin_return_address (0) was used. */ | |
144 | int ra_need_lr; | |
145 | /* Cache lr_save_p after expansion of builtin_eh_return. */ | |
146 | int lr_save_state; | |
147 | /* Whether we need to save the TOC to the reserved stack location in the | |
148 | function prologue. */ | |
149 | bool save_toc_in_prologue; | |
150 | /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4 | |
151 | varargs save area. */ | |
152 | HOST_WIDE_INT varargs_save_offset; | |
153 | /* Temporary stack slot to use for SDmode copies. This slot is | |
154 | 64-bits wide and is allocated early enough so that the offset | |
155 | does not overflow the 16-bit load/store offset field. */ | |
156 | rtx sdmode_stack_slot; | |
157 | /* Alternative internal arg pointer for -fsplit-stack. */ | |
158 | rtx split_stack_arg_pointer; | |
159 | bool split_stack_argp_used; | |
160 | /* Flag if r2 setup is needed with ELFv2 ABI. */ | |
161 | bool r2_setup_needed; | |
162 | /* The number of components we use for separate shrink-wrapping. */ | |
163 | int n_components; | |
164 | /* The components already handled by separate shrink-wrapping, which should | |
165 | not be considered by the prologue and epilogue. */ | |
166 | bool gpr_is_wrapped_separately[32]; | |
167 | bool fpr_is_wrapped_separately[32]; | |
168 | bool lr_is_wrapped_separately; | |
169 | } machine_function; | |
170 | ||
171 | /* Support targetm.vectorize.builtin_mask_for_load. */ | |
172 | static GTY(()) tree altivec_builtin_mask_for_load; | |
173 | ||
174 | /* Set to nonzero once AIX common-mode calls have been defined. */ | |
175 | static GTY(()) int common_mode_defined; | |
176 | ||
177 | /* Label number of label created for -mrelocatable, to call to so we can | |
178 | get the address of the GOT section */ | |
179 | static int rs6000_pic_labelno; | |
180 | ||
181 | #ifdef USING_ELFOS_H | |
182 | /* Counter for labels which are to be placed in .fixup. */ | |
183 | int fixuplabelno = 0; | |
184 | #endif | |
185 | ||
186 | /* Whether to use variant of AIX ABI for PowerPC64 Linux. */ | |
187 | int dot_symbols; | |
188 | ||
189 | /* Specify the machine mode that pointers have. After generation of rtl, the | |
190 | compiler makes no further distinction between pointers and any other objects | |
af8303fa | 191 | of this machine mode. */ |
192 | scalar_int_mode rs6000_pmode; | |
01e91138 | 193 | |
194 | /* Width in bits of a pointer. */ | |
195 | unsigned rs6000_pointer_size; | |
196 | ||
197 | #ifdef HAVE_AS_GNU_ATTRIBUTE | |
198 | # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE | |
199 | # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0 | |
200 | # endif | |
201 | /* Flag whether floating point values have been passed/returned. | |
202 | Note that this doesn't say whether fprs are used, since the | |
203 | Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls | |
204 | should be set for soft-float values passed in gprs and ieee128 | |
205 | values passed in vsx registers. */ | |
206 | static bool rs6000_passes_float; | |
207 | static bool rs6000_passes_long_double; | |
208 | /* Flag whether vector values have been passed/returned. */ | |
209 | static bool rs6000_passes_vector; | |
210 | /* Flag whether small (<= 8 byte) structures have been returned. */ | |
211 | static bool rs6000_returns_struct; | |
212 | #endif | |
213 | ||
214 | /* Value is TRUE if register/mode pair is acceptable. */ | |
b395382f | 215 | static bool rs6000_hard_regno_mode_ok_p |
216 | [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER]; | |
01e91138 | 217 | |
218 | /* Maximum number of registers needed for a given register class and mode. */ | |
219 | unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES]; | |
220 | ||
221 | /* How many registers are needed for a given register and mode. */ | |
222 | unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER]; | |
223 | ||
224 | /* Map register number to register class. */ | |
225 | enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER]; | |
226 | ||
227 | static int dbg_cost_ctrl; | |
228 | ||
229 | /* Built in types. */ | |
230 | tree rs6000_builtin_types[RS6000_BTI_MAX]; | |
231 | tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT]; | |
232 | ||
233 | /* Flag to say the TOC is initialized */ | |
234 | int toc_initialized, need_toc_init; | |
235 | char toc_label_name[10]; | |
236 | ||
237 | /* Cached value of rs6000_variable_issue. This is cached in | |
238 | rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */ | |
239 | static short cached_can_issue_more; | |
240 | ||
241 | static GTY(()) section *read_only_data_section; | |
242 | static GTY(()) section *private_data_section; | |
243 | static GTY(()) section *tls_data_section; | |
244 | static GTY(()) section *tls_private_data_section; | |
245 | static GTY(()) section *read_only_private_data_section; | |
246 | static GTY(()) section *sdata2_section; | |
247 | static GTY(()) section *toc_section; | |
248 | ||
249 | struct builtin_description | |
250 | { | |
251 | const HOST_WIDE_INT mask; | |
252 | const enum insn_code icode; | |
253 | const char *const name; | |
254 | const enum rs6000_builtins code; | |
255 | }; | |
256 | ||
257 | /* Describe the vector unit used for modes. */ | |
258 | enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES]; | |
259 | enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES]; | |
260 | ||
261 | /* Register classes for various constraints that are based on the target | |
262 | switches. */ | |
263 | enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX]; | |
264 | ||
265 | /* Describe the alignment of a vector. */ | |
266 | int rs6000_vector_align[NUM_MACHINE_MODES]; | |
267 | ||
268 | /* Map selected modes to types for builtins. */ | |
269 | static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2]; | |
270 | ||
271 | /* What modes to automatically generate reciprocal divide estimate (fre) and | |
272 | reciprocal sqrt (frsqrte) for. */ | |
273 | unsigned char rs6000_recip_bits[MAX_MACHINE_MODE]; | |
274 | ||
275 | /* Masks to determine which reciprocal esitmate instructions to generate | |
276 | automatically. */ | |
277 | enum rs6000_recip_mask { | |
278 | RECIP_SF_DIV = 0x001, /* Use divide estimate */ | |
279 | RECIP_DF_DIV = 0x002, | |
280 | RECIP_V4SF_DIV = 0x004, | |
281 | RECIP_V2DF_DIV = 0x008, | |
282 | ||
283 | RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */ | |
284 | RECIP_DF_RSQRT = 0x020, | |
285 | RECIP_V4SF_RSQRT = 0x040, | |
286 | RECIP_V2DF_RSQRT = 0x080, | |
287 | ||
288 | /* Various combination of flags for -mrecip=xxx. */ | |
289 | RECIP_NONE = 0, | |
290 | RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV | |
291 | | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT | |
292 | | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT), | |
293 | ||
294 | RECIP_HIGH_PRECISION = RECIP_ALL, | |
295 | ||
296 | /* On low precision machines like the power5, don't enable double precision | |
297 | reciprocal square root estimate, since it isn't accurate enough. */ | |
298 | RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT)) | |
299 | }; | |
300 | ||
301 | /* -mrecip options. */ | |
302 | static struct | |
303 | { | |
304 | const char *string; /* option name */ | |
305 | unsigned int mask; /* mask bits to set */ | |
306 | } recip_options[] = { | |
307 | { "all", RECIP_ALL }, | |
308 | { "none", RECIP_NONE }, | |
309 | { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV | |
310 | | RECIP_V2DF_DIV) }, | |
311 | { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) }, | |
312 | { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) }, | |
313 | { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT | |
314 | | RECIP_V2DF_RSQRT) }, | |
315 | { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) }, | |
316 | { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) }, | |
317 | }; | |
318 | ||
319 | /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */ | |
320 | static const struct | |
321 | { | |
322 | const char *cpu; | |
323 | unsigned int cpuid; | |
324 | } cpu_is_info[] = { | |
325 | { "power9", PPC_PLATFORM_POWER9 }, | |
326 | { "power8", PPC_PLATFORM_POWER8 }, | |
327 | { "power7", PPC_PLATFORM_POWER7 }, | |
328 | { "power6x", PPC_PLATFORM_POWER6X }, | |
329 | { "power6", PPC_PLATFORM_POWER6 }, | |
330 | { "power5+", PPC_PLATFORM_POWER5_PLUS }, | |
331 | { "power5", PPC_PLATFORM_POWER5 }, | |
332 | { "ppc970", PPC_PLATFORM_PPC970 }, | |
333 | { "power4", PPC_PLATFORM_POWER4 }, | |
334 | { "ppca2", PPC_PLATFORM_PPCA2 }, | |
335 | { "ppc476", PPC_PLATFORM_PPC476 }, | |
336 | { "ppc464", PPC_PLATFORM_PPC464 }, | |
337 | { "ppc440", PPC_PLATFORM_PPC440 }, | |
338 | { "ppc405", PPC_PLATFORM_PPC405 }, | |
339 | { "ppc-cell-be", PPC_PLATFORM_CELL_BE } | |
340 | }; | |
341 | ||
342 | /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */ | |
343 | static const struct | |
344 | { | |
345 | const char *hwcap; | |
346 | int mask; | |
347 | unsigned int id; | |
348 | } cpu_supports_info[] = { | |
349 | /* AT_HWCAP masks. */ | |
350 | { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 }, | |
351 | { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 }, | |
352 | { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 }, | |
353 | { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 }, | |
354 | { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 }, | |
355 | { "booke", PPC_FEATURE_BOOKE, 0 }, | |
356 | { "cellbe", PPC_FEATURE_CELL_BE, 0 }, | |
357 | { "dfp", PPC_FEATURE_HAS_DFP, 0 }, | |
358 | { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 }, | |
359 | { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 }, | |
360 | { "fpu", PPC_FEATURE_HAS_FPU, 0 }, | |
361 | { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 }, | |
362 | { "mmu", PPC_FEATURE_HAS_MMU, 0 }, | |
363 | { "notb", PPC_FEATURE_NO_TB, 0 }, | |
364 | { "pa6t", PPC_FEATURE_PA6T, 0 }, | |
365 | { "power4", PPC_FEATURE_POWER4, 0 }, | |
366 | { "power5", PPC_FEATURE_POWER5, 0 }, | |
367 | { "power5+", PPC_FEATURE_POWER5_PLUS, 0 }, | |
368 | { "power6x", PPC_FEATURE_POWER6_EXT, 0 }, | |
369 | { "ppc32", PPC_FEATURE_32, 0 }, | |
370 | { "ppc601", PPC_FEATURE_601_INSTR, 0 }, | |
371 | { "ppc64", PPC_FEATURE_64, 0 }, | |
372 | { "ppcle", PPC_FEATURE_PPC_LE, 0 }, | |
373 | { "smt", PPC_FEATURE_SMT, 0 }, | |
374 | { "spe", PPC_FEATURE_HAS_SPE, 0 }, | |
375 | { "true_le", PPC_FEATURE_TRUE_LE, 0 }, | |
376 | { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 }, | |
377 | { "vsx", PPC_FEATURE_HAS_VSX, 0 }, | |
378 | ||
379 | /* AT_HWCAP2 masks. */ | |
380 | { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 }, | |
381 | { "dscr", PPC_FEATURE2_HAS_DSCR, 1 }, | |
382 | { "ebb", PPC_FEATURE2_HAS_EBB, 1 }, | |
383 | { "htm", PPC_FEATURE2_HAS_HTM, 1 }, | |
384 | { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 }, | |
385 | { "isel", PPC_FEATURE2_HAS_ISEL, 1 }, | |
386 | { "tar", PPC_FEATURE2_HAS_TAR, 1 }, | |
387 | { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 }, | |
388 | { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 }, | |
389 | { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 } | |
390 | }; | |
391 | ||
392 | /* Newer LIBCs explicitly export this symbol to declare that they provide | |
393 | the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a | |
394 | reference to this symbol whenever we expand a CPU builtin, so that | |
395 | we never link against an old LIBC. */ | |
396 | const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform"; | |
397 | ||
398 | /* True if we have expanded a CPU builtin. */ | |
399 | bool cpu_builtin_p; | |
400 | ||
401 | /* Pointer to function (in powerpcspe-c.c) that can define or undefine target | |
402 | macros that have changed. Languages that don't support the preprocessor | |
403 | don't link in powerpcspe-c.c, so we can't call it directly. */ | |
404 | void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT); | |
405 | ||
406 | /* Simplfy register classes into simpler classifications. We assume | |
407 | GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range | |
408 | check for standard register classes (gpr/floating/altivec/vsx) and | |
409 | floating/vector classes (float/altivec/vsx). */ | |
410 | ||
411 | enum rs6000_reg_type { | |
412 | NO_REG_TYPE, | |
413 | PSEUDO_REG_TYPE, | |
414 | GPR_REG_TYPE, | |
415 | VSX_REG_TYPE, | |
416 | ALTIVEC_REG_TYPE, | |
417 | FPR_REG_TYPE, | |
418 | SPR_REG_TYPE, | |
419 | CR_REG_TYPE, | |
420 | SPE_ACC_TYPE, | |
421 | SPEFSCR_REG_TYPE | |
422 | }; | |
423 | ||
424 | /* Map register class to register type. */ | |
425 | static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES]; | |
426 | ||
427 | /* First/last register type for the 'normal' register types (i.e. general | |
428 | purpose, floating point, altivec, and VSX registers). */ | |
429 | #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE) | |
430 | ||
431 | #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE) | |
432 | ||
433 | ||
434 | /* Register classes we care about in secondary reload or go if legitimate | |
435 | address. We only need to worry about GPR, FPR, and Altivec registers here, | |
436 | along an ANY field that is the OR of the 3 register classes. */ | |
437 | ||
438 | enum rs6000_reload_reg_type { | |
439 | RELOAD_REG_GPR, /* General purpose registers. */ | |
440 | RELOAD_REG_FPR, /* Traditional floating point regs. */ | |
441 | RELOAD_REG_VMX, /* Altivec (VMX) registers. */ | |
442 | RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */ | |
443 | N_RELOAD_REG | |
444 | }; | |
445 | ||
446 | /* For setting up register classes, loop through the 3 register classes mapping | |
447 | into real registers, and skip the ANY class, which is just an OR of the | |
448 | bits. */ | |
449 | #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR | |
450 | #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX | |
451 | ||
452 | /* Map reload register type to a register in the register class. */ | |
453 | struct reload_reg_map_type { | |
454 | const char *name; /* Register class name. */ | |
455 | int reg; /* Register in the register class. */ | |
456 | }; | |
457 | ||
458 | static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = { | |
459 | { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */ | |
460 | { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */ | |
461 | { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */ | |
462 | { "Any", -1 }, /* RELOAD_REG_ANY. */ | |
463 | }; | |
464 | ||
465 | /* Mask bits for each register class, indexed per mode. Historically the | |
466 | compiler has been more restrictive which types can do PRE_MODIFY instead of | |
467 | PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */ | |
468 | typedef unsigned char addr_mask_type; | |
469 | ||
470 | #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */ | |
471 | #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */ | |
472 | #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */ | |
473 | #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */ | |
474 | #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */ | |
475 | #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */ | |
476 | #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */ | |
477 | #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */ | |
478 | ||
479 | /* Register type masks based on the type, of valid addressing modes. */ | |
480 | struct rs6000_reg_addr { | |
481 | enum insn_code reload_load; /* INSN to reload for loading. */ | |
482 | enum insn_code reload_store; /* INSN to reload for storing. */ | |
483 | enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */ | |
484 | enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */ | |
485 | enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */ | |
486 | enum insn_code fusion_gpr_ld; /* INSN for fusing gpr ADDIS/loads. */ | |
487 | /* INSNs for fusing addi with loads | |
488 | or stores for each reg. class. */ | |
489 | enum insn_code fusion_addi_ld[(int)N_RELOAD_REG]; | |
490 | enum insn_code fusion_addi_st[(int)N_RELOAD_REG]; | |
491 | /* INSNs for fusing addis with loads | |
492 | or stores for each reg. class. */ | |
493 | enum insn_code fusion_addis_ld[(int)N_RELOAD_REG]; | |
494 | enum insn_code fusion_addis_st[(int)N_RELOAD_REG]; | |
495 | addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */ | |
496 | bool scalar_in_vmx_p; /* Scalar value can go in VMX. */ | |
497 | bool fused_toc; /* Mode supports TOC fusion. */ | |
498 | }; | |
499 | ||
500 | static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES]; | |
501 | ||
502 | /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */ | |
503 | static inline bool | |
504 | mode_supports_pre_incdec_p (machine_mode mode) | |
505 | { | |
506 | return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC) | |
507 | != 0); | |
508 | } | |
509 | ||
510 | /* Helper function to say whether a mode supports PRE_MODIFY. */ | |
511 | static inline bool | |
512 | mode_supports_pre_modify_p (machine_mode mode) | |
513 | { | |
514 | return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY) | |
515 | != 0); | |
516 | } | |
517 | ||
518 | /* Given that there exists at least one variable that is set (produced) | |
519 | by OUT_INSN and read (consumed) by IN_INSN, return true iff | |
520 | IN_INSN represents one or more memory store operations and none of | |
521 | the variables set by OUT_INSN is used by IN_INSN as the address of a | |
522 | store operation. If either IN_INSN or OUT_INSN does not represent | |
523 | a "single" RTL SET expression (as loosely defined by the | |
524 | implementation of the single_set function) or a PARALLEL with only | |
525 | SETs, CLOBBERs, and USEs inside, this function returns false. | |
526 | ||
527 | This rs6000-specific version of store_data_bypass_p checks for | |
528 | certain conditions that result in assertion failures (and internal | |
529 | compiler errors) in the generic store_data_bypass_p function and | |
530 | returns false rather than calling store_data_bypass_p if one of the | |
531 | problematic conditions is detected. */ | |
532 | ||
533 | int | |
534 | rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn) | |
535 | { | |
536 | rtx out_set, in_set; | |
537 | rtx out_pat, in_pat; | |
538 | rtx out_exp, in_exp; | |
539 | int i, j; | |
540 | ||
541 | in_set = single_set (in_insn); | |
542 | if (in_set) | |
543 | { | |
544 | if (MEM_P (SET_DEST (in_set))) | |
545 | { | |
546 | out_set = single_set (out_insn); | |
547 | if (!out_set) | |
548 | { | |
549 | out_pat = PATTERN (out_insn); | |
550 | if (GET_CODE (out_pat) == PARALLEL) | |
551 | { | |
552 | for (i = 0; i < XVECLEN (out_pat, 0); i++) | |
553 | { | |
554 | out_exp = XVECEXP (out_pat, 0, i); | |
555 | if ((GET_CODE (out_exp) == CLOBBER) | |
556 | || (GET_CODE (out_exp) == USE)) | |
557 | continue; | |
558 | else if (GET_CODE (out_exp) != SET) | |
559 | return false; | |
560 | } | |
561 | } | |
562 | } | |
563 | } | |
564 | } | |
565 | else | |
566 | { | |
567 | in_pat = PATTERN (in_insn); | |
568 | if (GET_CODE (in_pat) != PARALLEL) | |
569 | return false; | |
570 | ||
571 | for (i = 0; i < XVECLEN (in_pat, 0); i++) | |
572 | { | |
573 | in_exp = XVECEXP (in_pat, 0, i); | |
574 | if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE)) | |
575 | continue; | |
576 | else if (GET_CODE (in_exp) != SET) | |
577 | return false; | |
578 | ||
579 | if (MEM_P (SET_DEST (in_exp))) | |
580 | { | |
581 | out_set = single_set (out_insn); | |
582 | if (!out_set) | |
583 | { | |
584 | out_pat = PATTERN (out_insn); | |
585 | if (GET_CODE (out_pat) != PARALLEL) | |
586 | return false; | |
587 | for (j = 0; j < XVECLEN (out_pat, 0); j++) | |
588 | { | |
589 | out_exp = XVECEXP (out_pat, 0, j); | |
590 | if ((GET_CODE (out_exp) == CLOBBER) | |
591 | || (GET_CODE (out_exp) == USE)) | |
592 | continue; | |
593 | else if (GET_CODE (out_exp) != SET) | |
594 | return false; | |
595 | } | |
596 | } | |
597 | } | |
598 | } | |
599 | } | |
600 | return store_data_bypass_p (out_insn, in_insn); | |
601 | } | |
602 | ||
603 | /* Return true if we have D-form addressing in altivec registers. */ | |
604 | static inline bool | |
605 | mode_supports_vmx_dform (machine_mode mode) | |
606 | { | |
607 | return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0); | |
608 | } | |
609 | ||
610 | /* Return true if we have D-form addressing in VSX registers. This addressing | |
611 | is more limited than normal d-form addressing in that the offset must be | |
612 | aligned on a 16-byte boundary. */ | |
613 | static inline bool | |
614 | mode_supports_vsx_dform_quad (machine_mode mode) | |
615 | { | |
616 | return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET) | |
617 | != 0); | |
618 | } | |
619 | ||
620 | \f | |
621 | /* Target cpu costs. */ | |
622 | ||
623 | struct processor_costs { | |
624 | const int mulsi; /* cost of SImode multiplication. */ | |
625 | const int mulsi_const; /* cost of SImode multiplication by constant. */ | |
626 | const int mulsi_const9; /* cost of SImode mult by short constant. */ | |
627 | const int muldi; /* cost of DImode multiplication. */ | |
628 | const int divsi; /* cost of SImode division. */ | |
629 | const int divdi; /* cost of DImode division. */ | |
630 | const int fp; /* cost of simple SFmode and DFmode insns. */ | |
631 | const int dmul; /* cost of DFmode multiplication (and fmadd). */ | |
632 | const int sdiv; /* cost of SFmode division (fdivs). */ | |
633 | const int ddiv; /* cost of DFmode division (fdiv). */ | |
634 | const int cache_line_size; /* cache line size in bytes. */ | |
635 | const int l1_cache_size; /* size of l1 cache, in kilobytes. */ | |
636 | const int l2_cache_size; /* size of l2 cache, in kilobytes. */ | |
637 | const int simultaneous_prefetches; /* number of parallel prefetch | |
638 | operations. */ | |
639 | const int sfdf_convert; /* cost of SF->DF conversion. */ | |
640 | }; | |
641 | ||
642 | const struct processor_costs *rs6000_cost; | |
643 | ||
644 | /* Processor costs (relative to an add) */ | |
645 | ||
646 | /* Instruction size costs on 32bit processors. */ | |
647 | static const | |
648 | struct processor_costs size32_cost = { | |
649 | COSTS_N_INSNS (1), /* mulsi */ | |
650 | COSTS_N_INSNS (1), /* mulsi_const */ | |
651 | COSTS_N_INSNS (1), /* mulsi_const9 */ | |
652 | COSTS_N_INSNS (1), /* muldi */ | |
653 | COSTS_N_INSNS (1), /* divsi */ | |
654 | COSTS_N_INSNS (1), /* divdi */ | |
655 | COSTS_N_INSNS (1), /* fp */ | |
656 | COSTS_N_INSNS (1), /* dmul */ | |
657 | COSTS_N_INSNS (1), /* sdiv */ | |
658 | COSTS_N_INSNS (1), /* ddiv */ | |
659 | 32, /* cache line size */ | |
660 | 0, /* l1 cache */ | |
661 | 0, /* l2 cache */ | |
662 | 0, /* streams */ | |
663 | 0, /* SF->DF convert */ | |
664 | }; | |
665 | ||
666 | /* Instruction size costs on 64bit processors. */ | |
667 | static const | |
668 | struct processor_costs size64_cost = { | |
669 | COSTS_N_INSNS (1), /* mulsi */ | |
670 | COSTS_N_INSNS (1), /* mulsi_const */ | |
671 | COSTS_N_INSNS (1), /* mulsi_const9 */ | |
672 | COSTS_N_INSNS (1), /* muldi */ | |
673 | COSTS_N_INSNS (1), /* divsi */ | |
674 | COSTS_N_INSNS (1), /* divdi */ | |
675 | COSTS_N_INSNS (1), /* fp */ | |
676 | COSTS_N_INSNS (1), /* dmul */ | |
677 | COSTS_N_INSNS (1), /* sdiv */ | |
678 | COSTS_N_INSNS (1), /* ddiv */ | |
679 | 128, /* cache line size */ | |
680 | 0, /* l1 cache */ | |
681 | 0, /* l2 cache */ | |
682 | 0, /* streams */ | |
683 | 0, /* SF->DF convert */ | |
684 | }; | |
685 | ||
686 | /* Instruction costs on RS64A processors. */ | |
687 | static const | |
688 | struct processor_costs rs64a_cost = { | |
689 | COSTS_N_INSNS (20), /* mulsi */ | |
690 | COSTS_N_INSNS (12), /* mulsi_const */ | |
691 | COSTS_N_INSNS (8), /* mulsi_const9 */ | |
692 | COSTS_N_INSNS (34), /* muldi */ | |
693 | COSTS_N_INSNS (65), /* divsi */ | |
694 | COSTS_N_INSNS (67), /* divdi */ | |
695 | COSTS_N_INSNS (4), /* fp */ | |
696 | COSTS_N_INSNS (4), /* dmul */ | |
697 | COSTS_N_INSNS (31), /* sdiv */ | |
698 | COSTS_N_INSNS (31), /* ddiv */ | |
699 | 128, /* cache line size */ | |
700 | 128, /* l1 cache */ | |
701 | 2048, /* l2 cache */ | |
702 | 1, /* streams */ | |
703 | 0, /* SF->DF convert */ | |
704 | }; | |
705 | ||
706 | /* Instruction costs on MPCCORE processors. */ | |
707 | static const | |
708 | struct processor_costs mpccore_cost = { | |
709 | COSTS_N_INSNS (2), /* mulsi */ | |
710 | COSTS_N_INSNS (2), /* mulsi_const */ | |
711 | COSTS_N_INSNS (2), /* mulsi_const9 */ | |
712 | COSTS_N_INSNS (2), /* muldi */ | |
713 | COSTS_N_INSNS (6), /* divsi */ | |
714 | COSTS_N_INSNS (6), /* divdi */ | |
715 | COSTS_N_INSNS (4), /* fp */ | |
716 | COSTS_N_INSNS (5), /* dmul */ | |
717 | COSTS_N_INSNS (10), /* sdiv */ | |
718 | COSTS_N_INSNS (17), /* ddiv */ | |
719 | 32, /* cache line size */ | |
720 | 4, /* l1 cache */ | |
721 | 16, /* l2 cache */ | |
722 | 1, /* streams */ | |
723 | 0, /* SF->DF convert */ | |
724 | }; | |
725 | ||
726 | /* Instruction costs on PPC403 processors. */ | |
727 | static const | |
728 | struct processor_costs ppc403_cost = { | |
729 | COSTS_N_INSNS (4), /* mulsi */ | |
730 | COSTS_N_INSNS (4), /* mulsi_const */ | |
731 | COSTS_N_INSNS (4), /* mulsi_const9 */ | |
732 | COSTS_N_INSNS (4), /* muldi */ | |
733 | COSTS_N_INSNS (33), /* divsi */ | |
734 | COSTS_N_INSNS (33), /* divdi */ | |
735 | COSTS_N_INSNS (11), /* fp */ | |
736 | COSTS_N_INSNS (11), /* dmul */ | |
737 | COSTS_N_INSNS (11), /* sdiv */ | |
738 | COSTS_N_INSNS (11), /* ddiv */ | |
739 | 32, /* cache line size */ | |
740 | 4, /* l1 cache */ | |
741 | 16, /* l2 cache */ | |
742 | 1, /* streams */ | |
743 | 0, /* SF->DF convert */ | |
744 | }; | |
745 | ||
746 | /* Instruction costs on PPC405 processors. */ | |
747 | static const | |
748 | struct processor_costs ppc405_cost = { | |
749 | COSTS_N_INSNS (5), /* mulsi */ | |
750 | COSTS_N_INSNS (4), /* mulsi_const */ | |
751 | COSTS_N_INSNS (3), /* mulsi_const9 */ | |
752 | COSTS_N_INSNS (5), /* muldi */ | |
753 | COSTS_N_INSNS (35), /* divsi */ | |
754 | COSTS_N_INSNS (35), /* divdi */ | |
755 | COSTS_N_INSNS (11), /* fp */ | |
756 | COSTS_N_INSNS (11), /* dmul */ | |
757 | COSTS_N_INSNS (11), /* sdiv */ | |
758 | COSTS_N_INSNS (11), /* ddiv */ | |
759 | 32, /* cache line size */ | |
760 | 16, /* l1 cache */ | |
761 | 128, /* l2 cache */ | |
762 | 1, /* streams */ | |
763 | 0, /* SF->DF convert */ | |
764 | }; | |
765 | ||
766 | /* Instruction costs on PPC440 processors. */ | |
767 | static const | |
768 | struct processor_costs ppc440_cost = { | |
769 | COSTS_N_INSNS (3), /* mulsi */ | |
770 | COSTS_N_INSNS (2), /* mulsi_const */ | |
771 | COSTS_N_INSNS (2), /* mulsi_const9 */ | |
772 | COSTS_N_INSNS (3), /* muldi */ | |
773 | COSTS_N_INSNS (34), /* divsi */ | |
774 | COSTS_N_INSNS (34), /* divdi */ | |
775 | COSTS_N_INSNS (5), /* fp */ | |
776 | COSTS_N_INSNS (5), /* dmul */ | |
777 | COSTS_N_INSNS (19), /* sdiv */ | |
778 | COSTS_N_INSNS (33), /* ddiv */ | |
779 | 32, /* cache line size */ | |
780 | 32, /* l1 cache */ | |
781 | 256, /* l2 cache */ | |
782 | 1, /* streams */ | |
783 | 0, /* SF->DF convert */ | |
784 | }; | |
785 | ||
786 | /* Instruction costs on PPC476 processors. */ | |
787 | static const | |
788 | struct processor_costs ppc476_cost = { | |
789 | COSTS_N_INSNS (4), /* mulsi */ | |
790 | COSTS_N_INSNS (4), /* mulsi_const */ | |
791 | COSTS_N_INSNS (4), /* mulsi_const9 */ | |
792 | COSTS_N_INSNS (4), /* muldi */ | |
793 | COSTS_N_INSNS (11), /* divsi */ | |
794 | COSTS_N_INSNS (11), /* divdi */ | |
795 | COSTS_N_INSNS (6), /* fp */ | |
796 | COSTS_N_INSNS (6), /* dmul */ | |
797 | COSTS_N_INSNS (19), /* sdiv */ | |
798 | COSTS_N_INSNS (33), /* ddiv */ | |
799 | 32, /* l1 cache line size */ | |
800 | 32, /* l1 cache */ | |
801 | 512, /* l2 cache */ | |
802 | 1, /* streams */ | |
803 | 0, /* SF->DF convert */ | |
804 | }; | |
805 | ||
806 | /* Instruction costs on PPC601 processors. */ | |
807 | static const | |
808 | struct processor_costs ppc601_cost = { | |
809 | COSTS_N_INSNS (5), /* mulsi */ | |
810 | COSTS_N_INSNS (5), /* mulsi_const */ | |
811 | COSTS_N_INSNS (5), /* mulsi_const9 */ | |
812 | COSTS_N_INSNS (5), /* muldi */ | |
813 | COSTS_N_INSNS (36), /* divsi */ | |
814 | COSTS_N_INSNS (36), /* divdi */ | |
815 | COSTS_N_INSNS (4), /* fp */ | |
816 | COSTS_N_INSNS (5), /* dmul */ | |
817 | COSTS_N_INSNS (17), /* sdiv */ | |
818 | COSTS_N_INSNS (31), /* ddiv */ | |
819 | 32, /* cache line size */ | |
820 | 32, /* l1 cache */ | |
821 | 256, /* l2 cache */ | |
822 | 1, /* streams */ | |
823 | 0, /* SF->DF convert */ | |
824 | }; | |
825 | ||
826 | /* Instruction costs on PPC603 processors. */ | |
827 | static const | |
828 | struct processor_costs ppc603_cost = { | |
829 | COSTS_N_INSNS (5), /* mulsi */ | |
830 | COSTS_N_INSNS (3), /* mulsi_const */ | |
831 | COSTS_N_INSNS (2), /* mulsi_const9 */ | |
832 | COSTS_N_INSNS (5), /* muldi */ | |
833 | COSTS_N_INSNS (37), /* divsi */ | |
834 | COSTS_N_INSNS (37), /* divdi */ | |
835 | COSTS_N_INSNS (3), /* fp */ | |
836 | COSTS_N_INSNS (4), /* dmul */ | |
837 | COSTS_N_INSNS (18), /* sdiv */ | |
838 | COSTS_N_INSNS (33), /* ddiv */ | |
839 | 32, /* cache line size */ | |
840 | 8, /* l1 cache */ | |
841 | 64, /* l2 cache */ | |
842 | 1, /* streams */ | |
843 | 0, /* SF->DF convert */ | |
844 | }; | |
845 | ||
846 | /* Instruction costs on PPC604 processors. */ | |
847 | static const | |
848 | struct processor_costs ppc604_cost = { | |
849 | COSTS_N_INSNS (4), /* mulsi */ | |
850 | COSTS_N_INSNS (4), /* mulsi_const */ | |
851 | COSTS_N_INSNS (4), /* mulsi_const9 */ | |
852 | COSTS_N_INSNS (4), /* muldi */ | |
853 | COSTS_N_INSNS (20), /* divsi */ | |
854 | COSTS_N_INSNS (20), /* divdi */ | |
855 | COSTS_N_INSNS (3), /* fp */ | |
856 | COSTS_N_INSNS (3), /* dmul */ | |
857 | COSTS_N_INSNS (18), /* sdiv */ | |
858 | COSTS_N_INSNS (32), /* ddiv */ | |
859 | 32, /* cache line size */ | |
860 | 16, /* l1 cache */ | |
861 | 512, /* l2 cache */ | |
862 | 1, /* streams */ | |
863 | 0, /* SF->DF convert */ | |
864 | }; | |
865 | ||
866 | /* Instruction costs on PPC604e processors. */ | |
867 | static const | |
868 | struct processor_costs ppc604e_cost = { | |
869 | COSTS_N_INSNS (2), /* mulsi */ | |
870 | COSTS_N_INSNS (2), /* mulsi_const */ | |
871 | COSTS_N_INSNS (2), /* mulsi_const9 */ | |
872 | COSTS_N_INSNS (2), /* muldi */ | |
873 | COSTS_N_INSNS (20), /* divsi */ | |
874 | COSTS_N_INSNS (20), /* divdi */ | |
875 | COSTS_N_INSNS (3), /* fp */ | |
876 | COSTS_N_INSNS (3), /* dmul */ | |
877 | COSTS_N_INSNS (18), /* sdiv */ | |
878 | COSTS_N_INSNS (32), /* ddiv */ | |
879 | 32, /* cache line size */ | |
880 | 32, /* l1 cache */ | |
881 | 1024, /* l2 cache */ | |
882 | 1, /* streams */ | |
883 | 0, /* SF->DF convert */ | |
884 | }; | |
885 | ||
886 | /* Instruction costs on PPC620 processors. */ | |
887 | static const | |
888 | struct processor_costs ppc620_cost = { | |
889 | COSTS_N_INSNS (5), /* mulsi */ | |
890 | COSTS_N_INSNS (4), /* mulsi_const */ | |
891 | COSTS_N_INSNS (3), /* mulsi_const9 */ | |
892 | COSTS_N_INSNS (7), /* muldi */ | |
893 | COSTS_N_INSNS (21), /* divsi */ | |
894 | COSTS_N_INSNS (37), /* divdi */ | |
895 | COSTS_N_INSNS (3), /* fp */ | |
896 | COSTS_N_INSNS (3), /* dmul */ | |
897 | COSTS_N_INSNS (18), /* sdiv */ | |
898 | COSTS_N_INSNS (32), /* ddiv */ | |
899 | 128, /* cache line size */ | |
900 | 32, /* l1 cache */ | |
901 | 1024, /* l2 cache */ | |
902 | 1, /* streams */ | |
903 | 0, /* SF->DF convert */ | |
904 | }; | |
905 | ||
906 | /* Instruction costs on PPC630 processors. */ | |
907 | static const | |
908 | struct processor_costs ppc630_cost = { | |
909 | COSTS_N_INSNS (5), /* mulsi */ | |
910 | COSTS_N_INSNS (4), /* mulsi_const */ | |
911 | COSTS_N_INSNS (3), /* mulsi_const9 */ | |
912 | COSTS_N_INSNS (7), /* muldi */ | |
913 | COSTS_N_INSNS (21), /* divsi */ | |
914 | COSTS_N_INSNS (37), /* divdi */ | |
915 | COSTS_N_INSNS (3), /* fp */ | |
916 | COSTS_N_INSNS (3), /* dmul */ | |
917 | COSTS_N_INSNS (17), /* sdiv */ | |
918 | COSTS_N_INSNS (21), /* ddiv */ | |
919 | 128, /* cache line size */ | |
920 | 64, /* l1 cache */ | |
921 | 1024, /* l2 cache */ | |
922 | 1, /* streams */ | |
923 | 0, /* SF->DF convert */ | |
924 | }; | |
925 | ||
926 | /* Instruction costs on Cell processor. */ | |
927 | /* COSTS_N_INSNS (1) ~ one add. */ | |
928 | static const | |
929 | struct processor_costs ppccell_cost = { | |
930 | COSTS_N_INSNS (9/2)+2, /* mulsi */ | |
931 | COSTS_N_INSNS (6/2), /* mulsi_const */ | |
932 | COSTS_N_INSNS (6/2), /* mulsi_const9 */ | |
933 | COSTS_N_INSNS (15/2)+2, /* muldi */ | |
934 | COSTS_N_INSNS (38/2), /* divsi */ | |
935 | COSTS_N_INSNS (70/2), /* divdi */ | |
936 | COSTS_N_INSNS (10/2), /* fp */ | |
937 | COSTS_N_INSNS (10/2), /* dmul */ | |
938 | COSTS_N_INSNS (74/2), /* sdiv */ | |
939 | COSTS_N_INSNS (74/2), /* ddiv */ | |
940 | 128, /* cache line size */ | |
941 | 32, /* l1 cache */ | |
942 | 512, /* l2 cache */ | |
943 | 6, /* streams */ | |
944 | 0, /* SF->DF convert */ | |
945 | }; | |
946 | ||
947 | /* Instruction costs on PPC750 and PPC7400 processors. */ | |
948 | static const | |
949 | struct processor_costs ppc750_cost = { | |
950 | COSTS_N_INSNS (5), /* mulsi */ | |
951 | COSTS_N_INSNS (3), /* mulsi_const */ | |
952 | COSTS_N_INSNS (2), /* mulsi_const9 */ | |
953 | COSTS_N_INSNS (5), /* muldi */ | |
954 | COSTS_N_INSNS (17), /* divsi */ | |
955 | COSTS_N_INSNS (17), /* divdi */ | |
956 | COSTS_N_INSNS (3), /* fp */ | |
957 | COSTS_N_INSNS (3), /* dmul */ | |
958 | COSTS_N_INSNS (17), /* sdiv */ | |
959 | COSTS_N_INSNS (31), /* ddiv */ | |
960 | 32, /* cache line size */ | |
961 | 32, /* l1 cache */ | |
962 | 512, /* l2 cache */ | |
963 | 1, /* streams */ | |
964 | 0, /* SF->DF convert */ | |
965 | }; | |
966 | ||
967 | /* Instruction costs on PPC7450 processors. */ | |
968 | static const | |
969 | struct processor_costs ppc7450_cost = { | |
970 | COSTS_N_INSNS (4), /* mulsi */ | |
971 | COSTS_N_INSNS (3), /* mulsi_const */ | |
972 | COSTS_N_INSNS (3), /* mulsi_const9 */ | |
973 | COSTS_N_INSNS (4), /* muldi */ | |
974 | COSTS_N_INSNS (23), /* divsi */ | |
975 | COSTS_N_INSNS (23), /* divdi */ | |
976 | COSTS_N_INSNS (5), /* fp */ | |
977 | COSTS_N_INSNS (5), /* dmul */ | |
978 | COSTS_N_INSNS (21), /* sdiv */ | |
979 | COSTS_N_INSNS (35), /* ddiv */ | |
980 | 32, /* cache line size */ | |
981 | 32, /* l1 cache */ | |
982 | 1024, /* l2 cache */ | |
983 | 1, /* streams */ | |
984 | 0, /* SF->DF convert */ | |
985 | }; | |
986 | ||
987 | /* Instruction costs on PPC8540 processors. */ | |
988 | static const | |
989 | struct processor_costs ppc8540_cost = { | |
990 | COSTS_N_INSNS (4), /* mulsi */ | |
991 | COSTS_N_INSNS (4), /* mulsi_const */ | |
992 | COSTS_N_INSNS (4), /* mulsi_const9 */ | |
993 | COSTS_N_INSNS (4), /* muldi */ | |
994 | COSTS_N_INSNS (19), /* divsi */ | |
995 | COSTS_N_INSNS (19), /* divdi */ | |
996 | COSTS_N_INSNS (4), /* fp */ | |
997 | COSTS_N_INSNS (4), /* dmul */ | |
998 | COSTS_N_INSNS (29), /* sdiv */ | |
999 | COSTS_N_INSNS (29), /* ddiv */ | |
1000 | 32, /* cache line size */ | |
1001 | 32, /* l1 cache */ | |
1002 | 256, /* l2 cache */ | |
1003 | 1, /* prefetch streams /*/ | |
1004 | 0, /* SF->DF convert */ | |
1005 | }; | |
1006 | ||
1007 | /* Instruction costs on E300C2 and E300C3 cores. */ | |
1008 | static const | |
1009 | struct processor_costs ppce300c2c3_cost = { | |
1010 | COSTS_N_INSNS (4), /* mulsi */ | |
1011 | COSTS_N_INSNS (4), /* mulsi_const */ | |
1012 | COSTS_N_INSNS (4), /* mulsi_const9 */ | |
1013 | COSTS_N_INSNS (4), /* muldi */ | |
1014 | COSTS_N_INSNS (19), /* divsi */ | |
1015 | COSTS_N_INSNS (19), /* divdi */ | |
1016 | COSTS_N_INSNS (3), /* fp */ | |
1017 | COSTS_N_INSNS (4), /* dmul */ | |
1018 | COSTS_N_INSNS (18), /* sdiv */ | |
1019 | COSTS_N_INSNS (33), /* ddiv */ | |
1020 | 32, | |
1021 | 16, /* l1 cache */ | |
1022 | 16, /* l2 cache */ | |
1023 | 1, /* prefetch streams /*/ | |
1024 | 0, /* SF->DF convert */ | |
1025 | }; | |
1026 | ||
1027 | /* Instruction costs on PPCE500MC processors. */ | |
1028 | static const | |
1029 | struct processor_costs ppce500mc_cost = { | |
1030 | COSTS_N_INSNS (4), /* mulsi */ | |
1031 | COSTS_N_INSNS (4), /* mulsi_const */ | |
1032 | COSTS_N_INSNS (4), /* mulsi_const9 */ | |
1033 | COSTS_N_INSNS (4), /* muldi */ | |
1034 | COSTS_N_INSNS (14), /* divsi */ | |
1035 | COSTS_N_INSNS (14), /* divdi */ | |
1036 | COSTS_N_INSNS (8), /* fp */ | |
1037 | COSTS_N_INSNS (10), /* dmul */ | |
1038 | COSTS_N_INSNS (36), /* sdiv */ | |
1039 | COSTS_N_INSNS (66), /* ddiv */ | |
1040 | 64, /* cache line size */ | |
1041 | 32, /* l1 cache */ | |
1042 | 128, /* l2 cache */ | |
1043 | 1, /* prefetch streams /*/ | |
1044 | 0, /* SF->DF convert */ | |
1045 | }; | |
1046 | ||
1047 | /* Instruction costs on PPCE500MC64 processors. */ | |
1048 | static const | |
1049 | struct processor_costs ppce500mc64_cost = { | |
1050 | COSTS_N_INSNS (4), /* mulsi */ | |
1051 | COSTS_N_INSNS (4), /* mulsi_const */ | |
1052 | COSTS_N_INSNS (4), /* mulsi_const9 */ | |
1053 | COSTS_N_INSNS (4), /* muldi */ | |
1054 | COSTS_N_INSNS (14), /* divsi */ | |
1055 | COSTS_N_INSNS (14), /* divdi */ | |
1056 | COSTS_N_INSNS (4), /* fp */ | |
1057 | COSTS_N_INSNS (10), /* dmul */ | |
1058 | COSTS_N_INSNS (36), /* sdiv */ | |
1059 | COSTS_N_INSNS (66), /* ddiv */ | |
1060 | 64, /* cache line size */ | |
1061 | 32, /* l1 cache */ | |
1062 | 128, /* l2 cache */ | |
1063 | 1, /* prefetch streams /*/ | |
1064 | 0, /* SF->DF convert */ | |
1065 | }; | |
1066 | ||
1067 | /* Instruction costs on PPCE5500 processors. */ | |
1068 | static const | |
1069 | struct processor_costs ppce5500_cost = { | |
1070 | COSTS_N_INSNS (5), /* mulsi */ | |
1071 | COSTS_N_INSNS (5), /* mulsi_const */ | |
1072 | COSTS_N_INSNS (4), /* mulsi_const9 */ | |
1073 | COSTS_N_INSNS (5), /* muldi */ | |
1074 | COSTS_N_INSNS (14), /* divsi */ | |
1075 | COSTS_N_INSNS (14), /* divdi */ | |
1076 | COSTS_N_INSNS (7), /* fp */ | |
1077 | COSTS_N_INSNS (10), /* dmul */ | |
1078 | COSTS_N_INSNS (36), /* sdiv */ | |
1079 | COSTS_N_INSNS (66), /* ddiv */ | |
1080 | 64, /* cache line size */ | |
1081 | 32, /* l1 cache */ | |
1082 | 128, /* l2 cache */ | |
1083 | 1, /* prefetch streams /*/ | |
1084 | 0, /* SF->DF convert */ | |
1085 | }; | |
1086 | ||
1087 | /* Instruction costs on PPCE6500 processors. */ | |
1088 | static const | |
1089 | struct processor_costs ppce6500_cost = { | |
1090 | COSTS_N_INSNS (5), /* mulsi */ | |
1091 | COSTS_N_INSNS (5), /* mulsi_const */ | |
1092 | COSTS_N_INSNS (4), /* mulsi_const9 */ | |
1093 | COSTS_N_INSNS (5), /* muldi */ | |
1094 | COSTS_N_INSNS (14), /* divsi */ | |
1095 | COSTS_N_INSNS (14), /* divdi */ | |
1096 | COSTS_N_INSNS (7), /* fp */ | |
1097 | COSTS_N_INSNS (10), /* dmul */ | |
1098 | COSTS_N_INSNS (36), /* sdiv */ | |
1099 | COSTS_N_INSNS (66), /* ddiv */ | |
1100 | 64, /* cache line size */ | |
1101 | 32, /* l1 cache */ | |
1102 | 128, /* l2 cache */ | |
1103 | 1, /* prefetch streams /*/ | |
1104 | 0, /* SF->DF convert */ | |
1105 | }; | |
1106 | ||
1107 | /* Instruction costs on AppliedMicro Titan processors. */ | |
1108 | static const | |
1109 | struct processor_costs titan_cost = { | |
1110 | COSTS_N_INSNS (5), /* mulsi */ | |
1111 | COSTS_N_INSNS (5), /* mulsi_const */ | |
1112 | COSTS_N_INSNS (5), /* mulsi_const9 */ | |
1113 | COSTS_N_INSNS (5), /* muldi */ | |
1114 | COSTS_N_INSNS (18), /* divsi */ | |
1115 | COSTS_N_INSNS (18), /* divdi */ | |
1116 | COSTS_N_INSNS (10), /* fp */ | |
1117 | COSTS_N_INSNS (10), /* dmul */ | |
1118 | COSTS_N_INSNS (46), /* sdiv */ | |
1119 | COSTS_N_INSNS (72), /* ddiv */ | |
1120 | 32, /* cache line size */ | |
1121 | 32, /* l1 cache */ | |
1122 | 512, /* l2 cache */ | |
1123 | 1, /* prefetch streams /*/ | |
1124 | 0, /* SF->DF convert */ | |
1125 | }; | |
1126 | ||
1127 | /* Instruction costs on POWER4 and POWER5 processors. */ | |
1128 | static const | |
1129 | struct processor_costs power4_cost = { | |
1130 | COSTS_N_INSNS (3), /* mulsi */ | |
1131 | COSTS_N_INSNS (2), /* mulsi_const */ | |
1132 | COSTS_N_INSNS (2), /* mulsi_const9 */ | |
1133 | COSTS_N_INSNS (4), /* muldi */ | |
1134 | COSTS_N_INSNS (18), /* divsi */ | |
1135 | COSTS_N_INSNS (34), /* divdi */ | |
1136 | COSTS_N_INSNS (3), /* fp */ | |
1137 | COSTS_N_INSNS (3), /* dmul */ | |
1138 | COSTS_N_INSNS (17), /* sdiv */ | |
1139 | COSTS_N_INSNS (17), /* ddiv */ | |
1140 | 128, /* cache line size */ | |
1141 | 32, /* l1 cache */ | |
1142 | 1024, /* l2 cache */ | |
1143 | 8, /* prefetch streams /*/ | |
1144 | 0, /* SF->DF convert */ | |
1145 | }; | |
1146 | ||
1147 | /* Instruction costs on POWER6 processors. */ | |
1148 | static const | |
1149 | struct processor_costs power6_cost = { | |
1150 | COSTS_N_INSNS (8), /* mulsi */ | |
1151 | COSTS_N_INSNS (8), /* mulsi_const */ | |
1152 | COSTS_N_INSNS (8), /* mulsi_const9 */ | |
1153 | COSTS_N_INSNS (8), /* muldi */ | |
1154 | COSTS_N_INSNS (22), /* divsi */ | |
1155 | COSTS_N_INSNS (28), /* divdi */ | |
1156 | COSTS_N_INSNS (3), /* fp */ | |
1157 | COSTS_N_INSNS (3), /* dmul */ | |
1158 | COSTS_N_INSNS (13), /* sdiv */ | |
1159 | COSTS_N_INSNS (16), /* ddiv */ | |
1160 | 128, /* cache line size */ | |
1161 | 64, /* l1 cache */ | |
1162 | 2048, /* l2 cache */ | |
1163 | 16, /* prefetch streams */ | |
1164 | 0, /* SF->DF convert */ | |
1165 | }; | |
1166 | ||
1167 | /* Instruction costs on POWER7 processors. */ | |
1168 | static const | |
1169 | struct processor_costs power7_cost = { | |
1170 | COSTS_N_INSNS (2), /* mulsi */ | |
1171 | COSTS_N_INSNS (2), /* mulsi_const */ | |
1172 | COSTS_N_INSNS (2), /* mulsi_const9 */ | |
1173 | COSTS_N_INSNS (2), /* muldi */ | |
1174 | COSTS_N_INSNS (18), /* divsi */ | |
1175 | COSTS_N_INSNS (34), /* divdi */ | |
1176 | COSTS_N_INSNS (3), /* fp */ | |
1177 | COSTS_N_INSNS (3), /* dmul */ | |
1178 | COSTS_N_INSNS (13), /* sdiv */ | |
1179 | COSTS_N_INSNS (16), /* ddiv */ | |
1180 | 128, /* cache line size */ | |
1181 | 32, /* l1 cache */ | |
1182 | 256, /* l2 cache */ | |
1183 | 12, /* prefetch streams */ | |
1184 | COSTS_N_INSNS (3), /* SF->DF convert */ | |
1185 | }; | |
1186 | ||
1187 | /* Instruction costs on POWER8 processors. */ | |
1188 | static const | |
1189 | struct processor_costs power8_cost = { | |
1190 | COSTS_N_INSNS (3), /* mulsi */ | |
1191 | COSTS_N_INSNS (3), /* mulsi_const */ | |
1192 | COSTS_N_INSNS (3), /* mulsi_const9 */ | |
1193 | COSTS_N_INSNS (3), /* muldi */ | |
1194 | COSTS_N_INSNS (19), /* divsi */ | |
1195 | COSTS_N_INSNS (35), /* divdi */ | |
1196 | COSTS_N_INSNS (3), /* fp */ | |
1197 | COSTS_N_INSNS (3), /* dmul */ | |
1198 | COSTS_N_INSNS (14), /* sdiv */ | |
1199 | COSTS_N_INSNS (17), /* ddiv */ | |
1200 | 128, /* cache line size */ | |
1201 | 32, /* l1 cache */ | |
1202 | 256, /* l2 cache */ | |
1203 | 12, /* prefetch streams */ | |
1204 | COSTS_N_INSNS (3), /* SF->DF convert */ | |
1205 | }; | |
1206 | ||
1207 | /* Instruction costs on POWER9 processors. */ | |
1208 | static const | |
1209 | struct processor_costs power9_cost = { | |
1210 | COSTS_N_INSNS (3), /* mulsi */ | |
1211 | COSTS_N_INSNS (3), /* mulsi_const */ | |
1212 | COSTS_N_INSNS (3), /* mulsi_const9 */ | |
1213 | COSTS_N_INSNS (3), /* muldi */ | |
1214 | COSTS_N_INSNS (8), /* divsi */ | |
1215 | COSTS_N_INSNS (12), /* divdi */ | |
1216 | COSTS_N_INSNS (3), /* fp */ | |
1217 | COSTS_N_INSNS (3), /* dmul */ | |
1218 | COSTS_N_INSNS (13), /* sdiv */ | |
1219 | COSTS_N_INSNS (18), /* ddiv */ | |
1220 | 128, /* cache line size */ | |
1221 | 32, /* l1 cache */ | |
1222 | 512, /* l2 cache */ | |
1223 | 8, /* prefetch streams */ | |
1224 | COSTS_N_INSNS (3), /* SF->DF convert */ | |
1225 | }; | |
1226 | ||
1227 | /* Instruction costs on POWER A2 processors. */ | |
1228 | static const | |
1229 | struct processor_costs ppca2_cost = { | |
1230 | COSTS_N_INSNS (16), /* mulsi */ | |
1231 | COSTS_N_INSNS (16), /* mulsi_const */ | |
1232 | COSTS_N_INSNS (16), /* mulsi_const9 */ | |
1233 | COSTS_N_INSNS (16), /* muldi */ | |
1234 | COSTS_N_INSNS (22), /* divsi */ | |
1235 | COSTS_N_INSNS (28), /* divdi */ | |
1236 | COSTS_N_INSNS (3), /* fp */ | |
1237 | COSTS_N_INSNS (3), /* dmul */ | |
1238 | COSTS_N_INSNS (59), /* sdiv */ | |
1239 | COSTS_N_INSNS (72), /* ddiv */ | |
1240 | 64, | |
1241 | 16, /* l1 cache */ | |
1242 | 2048, /* l2 cache */ | |
1243 | 16, /* prefetch streams */ | |
1244 | 0, /* SF->DF convert */ | |
1245 | }; | |
1246 | ||
1247 | \f | |
1248 | /* Table that classifies rs6000 builtin functions (pure, const, etc.). */ | |
1249 | #undef RS6000_BUILTIN_0 | |
1250 | #undef RS6000_BUILTIN_1 | |
1251 | #undef RS6000_BUILTIN_2 | |
1252 | #undef RS6000_BUILTIN_3 | |
1253 | #undef RS6000_BUILTIN_A | |
1254 | #undef RS6000_BUILTIN_D | |
1255 | #undef RS6000_BUILTIN_E | |
1256 | #undef RS6000_BUILTIN_H | |
1257 | #undef RS6000_BUILTIN_P | |
1258 | #undef RS6000_BUILTIN_Q | |
1259 | #undef RS6000_BUILTIN_S | |
1260 | #undef RS6000_BUILTIN_X | |
1261 | ||
1262 | #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \ | |
1263 | { NAME, ICODE, MASK, ATTR }, | |
1264 | ||
1265 | #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \ | |
1266 | { NAME, ICODE, MASK, ATTR }, | |
1267 | ||
1268 | #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \ | |
1269 | { NAME, ICODE, MASK, ATTR }, | |
1270 | ||
1271 | #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \ | |
1272 | { NAME, ICODE, MASK, ATTR }, | |
1273 | ||
1274 | #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \ | |
1275 | { NAME, ICODE, MASK, ATTR }, | |
1276 | ||
1277 | #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \ | |
1278 | { NAME, ICODE, MASK, ATTR }, | |
1279 | ||
1280 | #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \ | |
1281 | { NAME, ICODE, MASK, ATTR }, | |
1282 | ||
1283 | #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \ | |
1284 | { NAME, ICODE, MASK, ATTR }, | |
1285 | ||
1286 | #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \ | |
1287 | { NAME, ICODE, MASK, ATTR }, | |
1288 | ||
1289 | #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \ | |
1290 | { NAME, ICODE, MASK, ATTR }, | |
1291 | ||
1292 | #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \ | |
1293 | { NAME, ICODE, MASK, ATTR }, | |
1294 | ||
1295 | #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \ | |
1296 | { NAME, ICODE, MASK, ATTR }, | |
1297 | ||
1298 | struct rs6000_builtin_info_type { | |
1299 | const char *name; | |
1300 | const enum insn_code icode; | |
1301 | const HOST_WIDE_INT mask; | |
1302 | const unsigned attr; | |
1303 | }; | |
1304 | ||
1305 | static const struct rs6000_builtin_info_type rs6000_builtin_info[] = | |
1306 | { | |
1307 | #include "powerpcspe-builtin.def" | |
1308 | }; | |
1309 | ||
1310 | #undef RS6000_BUILTIN_0 | |
1311 | #undef RS6000_BUILTIN_1 | |
1312 | #undef RS6000_BUILTIN_2 | |
1313 | #undef RS6000_BUILTIN_3 | |
1314 | #undef RS6000_BUILTIN_A | |
1315 | #undef RS6000_BUILTIN_D | |
1316 | #undef RS6000_BUILTIN_E | |
1317 | #undef RS6000_BUILTIN_H | |
1318 | #undef RS6000_BUILTIN_P | |
1319 | #undef RS6000_BUILTIN_Q | |
1320 | #undef RS6000_BUILTIN_S | |
1321 | #undef RS6000_BUILTIN_X | |
1322 | ||
1323 | /* Support for -mveclibabi=<xxx> to control which vector library to use. */ | |
1324 | static tree (*rs6000_veclib_handler) (combined_fn, tree, tree); | |
1325 | ||
1326 | \f | |
1327 | static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool); | |
1328 | static bool spe_func_has_64bit_regs_p (void); | |
1329 | static struct machine_function * rs6000_init_machine_status (void); | |
1330 | static int rs6000_ra_ever_killed (void); | |
1331 | static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *); | |
1332 | static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *); | |
1333 | static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *); | |
1334 | static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree); | |
1335 | static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT); | |
1336 | static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool); | |
1337 | static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool); | |
1338 | static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t, | |
1339 | bool); | |
1340 | static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int, | |
1341 | unsigned int); | |
1342 | static bool is_microcoded_insn (rtx_insn *); | |
1343 | static bool is_nonpipeline_insn (rtx_insn *); | |
1344 | static bool is_cracked_insn (rtx_insn *); | |
1345 | static bool is_load_insn (rtx, rtx *); | |
1346 | static bool is_store_insn (rtx, rtx *); | |
1347 | static bool set_to_load_agen (rtx_insn *,rtx_insn *); | |
1348 | static bool insn_terminates_group_p (rtx_insn *, enum group_termination); | |
1349 | static bool insn_must_be_first_in_group (rtx_insn *); | |
1350 | static bool insn_must_be_last_in_group (rtx_insn *); | |
1351 | static void altivec_init_builtins (void); | |
1352 | static tree builtin_function_type (machine_mode, machine_mode, | |
1353 | machine_mode, machine_mode, | |
1354 | enum rs6000_builtins, const char *name); | |
1355 | static void rs6000_common_init_builtins (void); | |
1356 | static void paired_init_builtins (void); | |
1357 | static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx); | |
1358 | static void spe_init_builtins (void); | |
1359 | static void htm_init_builtins (void); | |
1360 | static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx); | |
1361 | static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx); | |
1362 | static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx); | |
1363 | static rs6000_stack_t *rs6000_stack_info (void); | |
1364 | static void is_altivec_return_reg (rtx, void *); | |
1365 | int easy_vector_constant (rtx, machine_mode); | |
1366 | static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode); | |
1367 | static rtx rs6000_legitimize_tls_address (rtx, enum tls_model); | |
1368 | static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree, | |
1369 | bool, bool); | |
1370 | #if TARGET_MACHO | |
1371 | static void macho_branch_islands (void); | |
1372 | #endif | |
1373 | static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int, | |
1374 | int, int *); | |
1375 | static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int, | |
1376 | int, int, int *); | |
1377 | static bool rs6000_mode_dependent_address (const_rtx); | |
1378 | static bool rs6000_debug_mode_dependent_address (const_rtx); | |
1379 | static enum reg_class rs6000_secondary_reload_class (enum reg_class, | |
1380 | machine_mode, rtx); | |
1381 | static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class, | |
1382 | machine_mode, | |
1383 | rtx); | |
1384 | static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class); | |
1385 | static enum reg_class rs6000_debug_preferred_reload_class (rtx, | |
1386 | enum reg_class); | |
1387 | static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class, | |
1388 | machine_mode); | |
1389 | static bool rs6000_debug_secondary_memory_needed (enum reg_class, | |
1390 | enum reg_class, | |
1391 | machine_mode); | |
1392 | static bool rs6000_cannot_change_mode_class (machine_mode, | |
1393 | machine_mode, | |
1394 | enum reg_class); | |
1395 | static bool rs6000_debug_cannot_change_mode_class (machine_mode, | |
1396 | machine_mode, | |
1397 | enum reg_class); | |
1398 | static bool rs6000_save_toc_in_prologue_p (void); | |
1399 | static rtx rs6000_internal_arg_pointer (void); | |
1400 | ||
1401 | rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int, | |
1402 | int, int *) | |
1403 | = rs6000_legitimize_reload_address; | |
1404 | ||
1405 | static bool (*rs6000_mode_dependent_address_ptr) (const_rtx) | |
1406 | = rs6000_mode_dependent_address; | |
1407 | ||
1408 | enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class, | |
1409 | machine_mode, rtx) | |
1410 | = rs6000_secondary_reload_class; | |
1411 | ||
1412 | enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class) | |
1413 | = rs6000_preferred_reload_class; | |
1414 | ||
1415 | bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class, | |
1416 | machine_mode) | |
1417 | = rs6000_secondary_memory_needed; | |
1418 | ||
1419 | bool (*rs6000_cannot_change_mode_class_ptr) (machine_mode, | |
1420 | machine_mode, | |
1421 | enum reg_class) | |
1422 | = rs6000_cannot_change_mode_class; | |
1423 | ||
1424 | const int INSN_NOT_AVAILABLE = -1; | |
1425 | ||
1426 | static void rs6000_print_isa_options (FILE *, int, const char *, | |
1427 | HOST_WIDE_INT); | |
1428 | static void rs6000_print_builtin_options (FILE *, int, const char *, | |
1429 | HOST_WIDE_INT); | |
1430 | static HOST_WIDE_INT rs6000_disable_incompatible_switches (void); | |
1431 | ||
1432 | static enum rs6000_reg_type register_to_reg_type (rtx, bool *); | |
1433 | static bool rs6000_secondary_reload_move (enum rs6000_reg_type, | |
1434 | enum rs6000_reg_type, | |
1435 | machine_mode, | |
1436 | secondary_reload_info *, | |
1437 | bool); | |
1438 | rtl_opt_pass *make_pass_analyze_swaps (gcc::context*); | |
1439 | static bool rs6000_keep_leaf_when_profiled () __attribute__ ((unused)); | |
1440 | static tree rs6000_fold_builtin (tree, int, tree *, bool); | |
1441 | ||
1442 | /* Hash table stuff for keeping track of TOC entries. */ | |
1443 | ||
1444 | struct GTY((for_user)) toc_hash_struct | |
1445 | { | |
1446 | /* `key' will satisfy CONSTANT_P; in fact, it will satisfy | |
1447 | ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */ | |
1448 | rtx key; | |
1449 | machine_mode key_mode; | |
1450 | int labelno; | |
1451 | }; | |
1452 | ||
1453 | struct toc_hasher : ggc_ptr_hash<toc_hash_struct> | |
1454 | { | |
1455 | static hashval_t hash (toc_hash_struct *); | |
1456 | static bool equal (toc_hash_struct *, toc_hash_struct *); | |
1457 | }; | |
1458 | ||
1459 | static GTY (()) hash_table<toc_hasher> *toc_hash_table; | |
1460 | ||
1461 | /* Hash table to keep track of the argument types for builtin functions. */ | |
1462 | ||
1463 | struct GTY((for_user)) builtin_hash_struct | |
1464 | { | |
1465 | tree type; | |
1466 | machine_mode mode[4]; /* return value + 3 arguments. */ | |
1467 | unsigned char uns_p[4]; /* and whether the types are unsigned. */ | |
1468 | }; | |
1469 | ||
1470 | struct builtin_hasher : ggc_ptr_hash<builtin_hash_struct> | |
1471 | { | |
1472 | static hashval_t hash (builtin_hash_struct *); | |
1473 | static bool equal (builtin_hash_struct *, builtin_hash_struct *); | |
1474 | }; | |
1475 | ||
1476 | static GTY (()) hash_table<builtin_hasher> *builtin_hash_table; | |
1477 | ||
1478 | \f | |
1479 | /* Default register names. */ | |
1480 | char rs6000_reg_names[][8] = | |
1481 | { | |
1482 | "0", "1", "2", "3", "4", "5", "6", "7", | |
1483 | "8", "9", "10", "11", "12", "13", "14", "15", | |
1484 | "16", "17", "18", "19", "20", "21", "22", "23", | |
1485 | "24", "25", "26", "27", "28", "29", "30", "31", | |
1486 | "0", "1", "2", "3", "4", "5", "6", "7", | |
1487 | "8", "9", "10", "11", "12", "13", "14", "15", | |
1488 | "16", "17", "18", "19", "20", "21", "22", "23", | |
1489 | "24", "25", "26", "27", "28", "29", "30", "31", | |
1490 | "mq", "lr", "ctr","ap", | |
1491 | "0", "1", "2", "3", "4", "5", "6", "7", | |
1492 | "ca", | |
1493 | /* AltiVec registers. */ | |
1494 | "0", "1", "2", "3", "4", "5", "6", "7", | |
1495 | "8", "9", "10", "11", "12", "13", "14", "15", | |
1496 | "16", "17", "18", "19", "20", "21", "22", "23", | |
1497 | "24", "25", "26", "27", "28", "29", "30", "31", | |
1498 | "vrsave", "vscr", | |
1499 | /* SPE registers. */ | |
1500 | "spe_acc", "spefscr", | |
1501 | /* Soft frame pointer. */ | |
1502 | "sfp", | |
1503 | /* HTM SPR registers. */ | |
1504 | "tfhar", "tfiar", "texasr", | |
1505 | /* SPE High registers. */ | |
1506 | "0", "1", "2", "3", "4", "5", "6", "7", | |
1507 | "8", "9", "10", "11", "12", "13", "14", "15", | |
1508 | "16", "17", "18", "19", "20", "21", "22", "23", | |
1509 | "24", "25", "26", "27", "28", "29", "30", "31" | |
1510 | }; | |
1511 | ||
1512 | #ifdef TARGET_REGNAMES | |
1513 | static const char alt_reg_names[][8] = | |
1514 | { | |
1515 | "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", | |
1516 | "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", | |
1517 | "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23", | |
1518 | "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31", | |
1519 | "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7", | |
1520 | "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15", | |
1521 | "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23", | |
1522 | "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31", | |
1523 | "mq", "lr", "ctr", "ap", | |
1524 | "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7", | |
1525 | "ca", | |
1526 | /* AltiVec registers. */ | |
1527 | "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7", | |
1528 | "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15", | |
1529 | "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23", | |
1530 | "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31", | |
1531 | "vrsave", "vscr", | |
1532 | /* SPE registers. */ | |
1533 | "spe_acc", "spefscr", | |
1534 | /* Soft frame pointer. */ | |
1535 | "sfp", | |
1536 | /* HTM SPR registers. */ | |
1537 | "tfhar", "tfiar", "texasr", | |
1538 | /* SPE High registers. */ | |
1539 | "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7", | |
1540 | "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15", | |
1541 | "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23", | |
1542 | "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31" | |
1543 | }; | |
1544 | #endif | |
1545 | ||
1546 | /* Table of valid machine attributes. */ | |
1547 | ||
1548 | static const struct attribute_spec rs6000_attribute_table[] = | |
1549 | { | |
1550 | /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler, | |
1551 | affects_type_identity } */ | |
1552 | { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute, | |
1553 | false }, | |
1554 | { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute, | |
1555 | false }, | |
1556 | { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute, | |
1557 | false }, | |
1558 | { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute, | |
1559 | false }, | |
1560 | { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute, | |
1561 | false }, | |
1562 | #ifdef SUBTARGET_ATTRIBUTE_TABLE | |
1563 | SUBTARGET_ATTRIBUTE_TABLE, | |
1564 | #endif | |
1565 | { NULL, 0, 0, false, false, false, NULL, false } | |
1566 | }; | |
1567 | \f | |
1568 | #ifndef TARGET_PROFILE_KERNEL | |
1569 | #define TARGET_PROFILE_KERNEL 0 | |
1570 | #endif | |
1571 | ||
1572 | /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */ | |
1573 | #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO)) | |
1574 | \f | |
1575 | /* Initialize the GCC target structure. */ | |
1576 | #undef TARGET_ATTRIBUTE_TABLE | |
1577 | #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table | |
1578 | #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES | |
1579 | #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes | |
1580 | #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P | |
1581 | #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p | |
1582 | ||
1583 | #undef TARGET_ASM_ALIGNED_DI_OP | |
1584 | #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP | |
1585 | ||
1586 | /* Default unaligned ops are only provided for ELF. Find the ops needed | |
1587 | for non-ELF systems. */ | |
1588 | #ifndef OBJECT_FORMAT_ELF | |
1589 | #if TARGET_XCOFF | |
1590 | /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on | |
1591 | 64-bit targets. */ | |
1592 | #undef TARGET_ASM_UNALIGNED_HI_OP | |
1593 | #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2," | |
1594 | #undef TARGET_ASM_UNALIGNED_SI_OP | |
1595 | #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4," | |
1596 | #undef TARGET_ASM_UNALIGNED_DI_OP | |
1597 | #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8," | |
1598 | #else | |
1599 | /* For Darwin. */ | |
1600 | #undef TARGET_ASM_UNALIGNED_HI_OP | |
1601 | #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t" | |
1602 | #undef TARGET_ASM_UNALIGNED_SI_OP | |
1603 | #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t" | |
1604 | #undef TARGET_ASM_UNALIGNED_DI_OP | |
1605 | #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t" | |
1606 | #undef TARGET_ASM_ALIGNED_DI_OP | |
1607 | #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t" | |
1608 | #endif | |
1609 | #endif | |
1610 | ||
1611 | /* This hook deals with fixups for relocatable code and DI-mode objects | |
1612 | in 64-bit code. */ | |
1613 | #undef TARGET_ASM_INTEGER | |
1614 | #define TARGET_ASM_INTEGER rs6000_assemble_integer | |
1615 | ||
1616 | #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO | |
1617 | #undef TARGET_ASM_ASSEMBLE_VISIBILITY | |
1618 | #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility | |
1619 | #endif | |
1620 | ||
1621 | #undef TARGET_SET_UP_BY_PROLOGUE | |
1622 | #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue | |
1623 | ||
1624 | #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS | |
1625 | #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components | |
1626 | #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB | |
1627 | #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb | |
1628 | #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS | |
1629 | #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components | |
1630 | #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS | |
1631 | #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components | |
1632 | #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS | |
1633 | #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components | |
1634 | #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS | |
1635 | #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components | |
1636 | ||
1637 | #undef TARGET_EXTRA_LIVE_ON_ENTRY | |
1638 | #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry | |
1639 | ||
1640 | #undef TARGET_INTERNAL_ARG_POINTER | |
1641 | #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer | |
1642 | ||
1643 | #undef TARGET_HAVE_TLS | |
1644 | #define TARGET_HAVE_TLS HAVE_AS_TLS | |
1645 | ||
1646 | #undef TARGET_CANNOT_FORCE_CONST_MEM | |
1647 | #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem | |
1648 | ||
1649 | #undef TARGET_DELEGITIMIZE_ADDRESS | |
1650 | #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address | |
1651 | ||
1652 | #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P | |
1653 | #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p | |
1654 | ||
1655 | #undef TARGET_LEGITIMATE_COMBINED_INSN | |
1656 | #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn | |
1657 | ||
1658 | #undef TARGET_ASM_FUNCTION_PROLOGUE | |
1659 | #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue | |
1660 | #undef TARGET_ASM_FUNCTION_EPILOGUE | |
1661 | #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue | |
1662 | ||
1663 | #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA | |
1664 | #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra | |
1665 | ||
1666 | #undef TARGET_LEGITIMIZE_ADDRESS | |
1667 | #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address | |
1668 | ||
1669 | #undef TARGET_SCHED_VARIABLE_ISSUE | |
1670 | #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue | |
1671 | ||
1672 | #undef TARGET_SCHED_ISSUE_RATE | |
1673 | #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate | |
1674 | #undef TARGET_SCHED_ADJUST_COST | |
1675 | #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost | |
1676 | #undef TARGET_SCHED_ADJUST_PRIORITY | |
1677 | #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority | |
1678 | #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE | |
1679 | #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence | |
1680 | #undef TARGET_SCHED_INIT | |
1681 | #define TARGET_SCHED_INIT rs6000_sched_init | |
1682 | #undef TARGET_SCHED_FINISH | |
1683 | #define TARGET_SCHED_FINISH rs6000_sched_finish | |
1684 | #undef TARGET_SCHED_REORDER | |
1685 | #define TARGET_SCHED_REORDER rs6000_sched_reorder | |
1686 | #undef TARGET_SCHED_REORDER2 | |
1687 | #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2 | |
1688 | ||
1689 | #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD | |
1690 | #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead | |
1691 | ||
1692 | #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD | |
1693 | #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard | |
1694 | ||
1695 | #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT | |
1696 | #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context | |
1697 | #undef TARGET_SCHED_INIT_SCHED_CONTEXT | |
1698 | #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context | |
1699 | #undef TARGET_SCHED_SET_SCHED_CONTEXT | |
1700 | #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context | |
1701 | #undef TARGET_SCHED_FREE_SCHED_CONTEXT | |
1702 | #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context | |
1703 | ||
1704 | #undef TARGET_SCHED_CAN_SPECULATE_INSN | |
1705 | #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn | |
1706 | ||
1707 | #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD | |
1708 | #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load | |
1709 | #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT | |
1710 | #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \ | |
1711 | rs6000_builtin_support_vector_misalignment | |
1712 | #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE | |
1713 | #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable | |
1714 | #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST | |
1715 | #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ | |
1716 | rs6000_builtin_vectorization_cost | |
1717 | #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE | |
1718 | #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \ | |
1719 | rs6000_preferred_simd_mode | |
1720 | #undef TARGET_VECTORIZE_INIT_COST | |
1721 | #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost | |
1722 | #undef TARGET_VECTORIZE_ADD_STMT_COST | |
1723 | #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost | |
1724 | #undef TARGET_VECTORIZE_FINISH_COST | |
1725 | #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost | |
1726 | #undef TARGET_VECTORIZE_DESTROY_COST_DATA | |
1727 | #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data | |
1728 | ||
1729 | #undef TARGET_INIT_BUILTINS | |
1730 | #define TARGET_INIT_BUILTINS rs6000_init_builtins | |
1731 | #undef TARGET_BUILTIN_DECL | |
1732 | #define TARGET_BUILTIN_DECL rs6000_builtin_decl | |
1733 | ||
1734 | #undef TARGET_FOLD_BUILTIN | |
1735 | #define TARGET_FOLD_BUILTIN rs6000_fold_builtin | |
1736 | #undef TARGET_GIMPLE_FOLD_BUILTIN | |
1737 | #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin | |
1738 | ||
1739 | #undef TARGET_EXPAND_BUILTIN | |
1740 | #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin | |
1741 | ||
1742 | #undef TARGET_MANGLE_TYPE | |
1743 | #define TARGET_MANGLE_TYPE rs6000_mangle_type | |
1744 | ||
1745 | #undef TARGET_INIT_LIBFUNCS | |
1746 | #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs | |
1747 | ||
1748 | #if TARGET_MACHO | |
1749 | #undef TARGET_BINDS_LOCAL_P | |
1750 | #define TARGET_BINDS_LOCAL_P darwin_binds_local_p | |
1751 | #endif | |
1752 | ||
1753 | #undef TARGET_MS_BITFIELD_LAYOUT_P | |
1754 | #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p | |
1755 | ||
1756 | #undef TARGET_ASM_OUTPUT_MI_THUNK | |
1757 | #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk | |
1758 | ||
1759 | #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK | |
1760 | #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true | |
1761 | ||
1762 | #undef TARGET_FUNCTION_OK_FOR_SIBCALL | |
1763 | #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall | |
1764 | ||
1765 | #undef TARGET_REGISTER_MOVE_COST | |
1766 | #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost | |
1767 | #undef TARGET_MEMORY_MOVE_COST | |
1768 | #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost | |
1769 | #undef TARGET_CANNOT_COPY_INSN_P | |
1770 | #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p | |
1771 | #undef TARGET_RTX_COSTS | |
1772 | #define TARGET_RTX_COSTS rs6000_rtx_costs | |
1773 | #undef TARGET_ADDRESS_COST | |
1774 | #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0 | |
1775 | ||
1776 | #undef TARGET_DWARF_REGISTER_SPAN | |
1777 | #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span | |
1778 | ||
1779 | #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA | |
1780 | #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra | |
1781 | ||
1782 | #undef TARGET_MEMBER_TYPE_FORCES_BLK | |
1783 | #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk | |
1784 | ||
1785 | #undef TARGET_PROMOTE_FUNCTION_MODE | |
1786 | #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode | |
1787 | ||
1788 | #undef TARGET_RETURN_IN_MEMORY | |
1789 | #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory | |
1790 | ||
1791 | #undef TARGET_RETURN_IN_MSB | |
1792 | #define TARGET_RETURN_IN_MSB rs6000_return_in_msb | |
1793 | ||
1794 | #undef TARGET_SETUP_INCOMING_VARARGS | |
1795 | #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs | |
1796 | ||
1797 | /* Always strict argument naming on rs6000. */ | |
1798 | #undef TARGET_STRICT_ARGUMENT_NAMING | |
1799 | #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true | |
1800 | #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED | |
1801 | #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true | |
1802 | #undef TARGET_SPLIT_COMPLEX_ARG | |
1803 | #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true | |
1804 | #undef TARGET_MUST_PASS_IN_STACK | |
1805 | #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack | |
1806 | #undef TARGET_PASS_BY_REFERENCE | |
1807 | #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference | |
1808 | #undef TARGET_ARG_PARTIAL_BYTES | |
1809 | #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes | |
1810 | #undef TARGET_FUNCTION_ARG_ADVANCE | |
1811 | #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance | |
1812 | #undef TARGET_FUNCTION_ARG | |
1813 | #define TARGET_FUNCTION_ARG rs6000_function_arg | |
d7ab0e3d | 1814 | #undef TARGET_FUNCTION_ARG_PADDING |
1815 | #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding | |
01e91138 | 1816 | #undef TARGET_FUNCTION_ARG_BOUNDARY |
1817 | #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary | |
1818 | ||
1819 | #undef TARGET_BUILD_BUILTIN_VA_LIST | |
1820 | #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list | |
1821 | ||
1822 | #undef TARGET_EXPAND_BUILTIN_VA_START | |
1823 | #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start | |
1824 | ||
1825 | #undef TARGET_GIMPLIFY_VA_ARG_EXPR | |
1826 | #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg | |
1827 | ||
1828 | #undef TARGET_EH_RETURN_FILTER_MODE | |
1829 | #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode | |
1830 | ||
1831 | #undef TARGET_SCALAR_MODE_SUPPORTED_P | |
1832 | #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p | |
1833 | ||
1834 | #undef TARGET_VECTOR_MODE_SUPPORTED_P | |
1835 | #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p | |
1836 | ||
1837 | #undef TARGET_FLOATN_MODE | |
1838 | #define TARGET_FLOATN_MODE rs6000_floatn_mode | |
1839 | ||
1840 | #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN | |
1841 | #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn | |
1842 | ||
1843 | #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP | |
1844 | #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip | |
1845 | ||
1846 | #undef TARGET_MD_ASM_ADJUST | |
1847 | #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust | |
1848 | ||
1849 | #undef TARGET_OPTION_OVERRIDE | |
1850 | #define TARGET_OPTION_OVERRIDE rs6000_option_override | |
1851 | ||
1852 | #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION | |
1853 | #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \ | |
1854 | rs6000_builtin_vectorized_function | |
1855 | ||
1856 | #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION | |
1857 | #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \ | |
1858 | rs6000_builtin_md_vectorized_function | |
1859 | ||
1860 | #undef TARGET_STACK_PROTECT_GUARD | |
1861 | #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard | |
1862 | ||
1863 | #if !TARGET_MACHO | |
1864 | #undef TARGET_STACK_PROTECT_FAIL | |
1865 | #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail | |
1866 | #endif | |
1867 | ||
1868 | #ifdef HAVE_AS_TLS | |
1869 | #undef TARGET_ASM_OUTPUT_DWARF_DTPREL | |
1870 | #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel | |
1871 | #endif | |
1872 | ||
1873 | /* Use a 32-bit anchor range. This leads to sequences like: | |
1874 | ||
1875 | addis tmp,anchor,high | |
1876 | add dest,tmp,low | |
1877 | ||
1878 | where tmp itself acts as an anchor, and can be shared between | |
1879 | accesses to the same 64k page. */ | |
1880 | #undef TARGET_MIN_ANCHOR_OFFSET | |
1881 | #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1 | |
1882 | #undef TARGET_MAX_ANCHOR_OFFSET | |
1883 | #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff | |
1884 | #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P | |
1885 | #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p | |
1886 | #undef TARGET_USE_BLOCKS_FOR_DECL_P | |
1887 | #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p | |
1888 | ||
1889 | #undef TARGET_BUILTIN_RECIPROCAL | |
1890 | #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal | |
1891 | ||
1892 | #undef TARGET_EXPAND_TO_RTL_HOOK | |
1893 | #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot | |
1894 | ||
1895 | #undef TARGET_INSTANTIATE_DECLS | |
1896 | #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls | |
1897 | ||
1898 | #undef TARGET_SECONDARY_RELOAD | |
1899 | #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload | |
1900 | ||
1901 | #undef TARGET_LEGITIMATE_ADDRESS_P | |
1902 | #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p | |
1903 | ||
1904 | #undef TARGET_MODE_DEPENDENT_ADDRESS_P | |
1905 | #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p | |
1906 | ||
1907 | #undef TARGET_LRA_P | |
1908 | #define TARGET_LRA_P rs6000_lra_p | |
1909 | ||
1910 | #undef TARGET_COMPUTE_PRESSURE_CLASSES | |
1911 | #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes | |
1912 | ||
1913 | #undef TARGET_CAN_ELIMINATE | |
1914 | #define TARGET_CAN_ELIMINATE rs6000_can_eliminate | |
1915 | ||
1916 | #undef TARGET_CONDITIONAL_REGISTER_USAGE | |
1917 | #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage | |
1918 | ||
1919 | #undef TARGET_SCHED_REASSOCIATION_WIDTH | |
1920 | #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width | |
1921 | ||
1922 | #undef TARGET_TRAMPOLINE_INIT | |
1923 | #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init | |
1924 | ||
1925 | #undef TARGET_FUNCTION_VALUE | |
1926 | #define TARGET_FUNCTION_VALUE rs6000_function_value | |
1927 | ||
1928 | #undef TARGET_OPTION_VALID_ATTRIBUTE_P | |
1929 | #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p | |
1930 | ||
1931 | #undef TARGET_OPTION_SAVE | |
1932 | #define TARGET_OPTION_SAVE rs6000_function_specific_save | |
1933 | ||
1934 | #undef TARGET_OPTION_RESTORE | |
1935 | #define TARGET_OPTION_RESTORE rs6000_function_specific_restore | |
1936 | ||
1937 | #undef TARGET_OPTION_PRINT | |
1938 | #define TARGET_OPTION_PRINT rs6000_function_specific_print | |
1939 | ||
1940 | #undef TARGET_CAN_INLINE_P | |
1941 | #define TARGET_CAN_INLINE_P rs6000_can_inline_p | |
1942 | ||
1943 | #undef TARGET_SET_CURRENT_FUNCTION | |
1944 | #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function | |
1945 | ||
1946 | #undef TARGET_LEGITIMATE_CONSTANT_P | |
1947 | #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p | |
1948 | ||
1949 | #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK | |
1950 | #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok | |
1951 | ||
1952 | #undef TARGET_CAN_USE_DOLOOP_P | |
1953 | #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost | |
1954 | ||
1955 | #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV | |
1956 | #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv | |
1957 | ||
1958 | #undef TARGET_LIBGCC_CMP_RETURN_MODE | |
1959 | #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode | |
1960 | #undef TARGET_LIBGCC_SHIFT_COUNT_MODE | |
1961 | #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode | |
1962 | #undef TARGET_UNWIND_WORD_MODE | |
1963 | #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode | |
1964 | ||
1965 | #undef TARGET_OFFLOAD_OPTIONS | |
1966 | #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options | |
1967 | ||
1968 | #undef TARGET_C_MODE_FOR_SUFFIX | |
1969 | #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix | |
1970 | ||
1971 | #undef TARGET_INVALID_BINARY_OP | |
1972 | #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op | |
1973 | ||
1974 | #undef TARGET_OPTAB_SUPPORTED_P | |
1975 | #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p | |
1976 | ||
1977 | #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS | |
1978 | #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1 | |
5da94e60 | 1979 | |
b395382f | 1980 | #undef TARGET_HARD_REGNO_MODE_OK |
1981 | #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok | |
1982 | ||
5f6dcf1a | 1983 | #undef TARGET_MODES_TIEABLE_P |
1984 | #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p | |
1985 | ||
5da94e60 | 1986 | #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED |
1987 | #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \ | |
1988 | rs6000_hard_regno_call_part_clobbered | |
dfdced85 | 1989 | |
1990 | #undef TARGET_SLOW_UNALIGNED_ACCESS | |
1991 | #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access | |
01e91138 | 1992 | \f |
1993 | ||
1994 | /* Processor table. */ | |
1995 | struct rs6000_ptt | |
1996 | { | |
1997 | const char *const name; /* Canonical processor name. */ | |
1998 | const enum processor_type processor; /* Processor type enum value. */ | |
1999 | const HOST_WIDE_INT target_enable; /* Target flags to enable. */ | |
2000 | }; | |
2001 | ||
2002 | static struct rs6000_ptt const processor_target_table[] = | |
2003 | { | |
2004 | #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS }, | |
2005 | #include "powerpcspe-cpus.def" | |
2006 | #undef RS6000_CPU | |
2007 | }; | |
2008 | ||
2009 | /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the | |
2010 | name is invalid. */ | |
2011 | ||
2012 | static int | |
2013 | rs6000_cpu_name_lookup (const char *name) | |
2014 | { | |
2015 | size_t i; | |
2016 | ||
2017 | if (name != NULL) | |
2018 | { | |
2019 | for (i = 0; i < ARRAY_SIZE (processor_target_table); i++) | |
2020 | if (! strcmp (name, processor_target_table[i].name)) | |
2021 | return (int)i; | |
2022 | } | |
2023 | ||
2024 | return -1; | |
2025 | } | |
2026 | ||
2027 | \f | |
2028 | /* Return number of consecutive hard regs needed starting at reg REGNO | |
2029 | to hold something of mode MODE. | |
2030 | This is ordinarily the length in words of a value of mode MODE | |
2031 | but can be less for certain modes in special long registers. | |
2032 | ||
2033 | For the SPE, GPRs are 64 bits but only 32 bits are visible in | |
2034 | scalar instructions. The upper 32 bits are only available to the | |
2035 | SIMD instructions. | |
2036 | ||
2037 | POWER and PowerPC GPRs hold 32 bits worth; | |
2038 | PowerPC64 GPRs and FPRs point register holds 64 bits worth. */ | |
2039 | ||
2040 | static int | |
2041 | rs6000_hard_regno_nregs_internal (int regno, machine_mode mode) | |
2042 | { | |
2043 | unsigned HOST_WIDE_INT reg_size; | |
2044 | ||
2045 | /* 128-bit floating point usually takes 2 registers, unless it is IEEE | |
2046 | 128-bit floating point that can go in vector registers, which has VSX | |
2047 | memory addressing. */ | |
2048 | if (FP_REGNO_P (regno)) | |
2049 | reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode) | |
2050 | ? UNITS_PER_VSX_WORD | |
2051 | : UNITS_PER_FP_WORD); | |
2052 | ||
2053 | else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode)) | |
2054 | reg_size = UNITS_PER_SPE_WORD; | |
2055 | ||
2056 | else if (ALTIVEC_REGNO_P (regno)) | |
2057 | reg_size = UNITS_PER_ALTIVEC_WORD; | |
2058 | ||
2059 | /* The value returned for SCmode in the E500 double case is 2 for | |
2060 | ABI compatibility; storing an SCmode value in a single register | |
2061 | would require function_arg and rs6000_spe_function_arg to handle | |
2062 | SCmode so as to pass the value correctly in a pair of | |
2063 | registers. */ | |
2064 | else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode | |
2065 | && !DECIMAL_FLOAT_MODE_P (mode) && SPE_SIMD_REGNO_P (regno)) | |
2066 | reg_size = UNITS_PER_FP_WORD; | |
2067 | ||
2068 | else | |
2069 | reg_size = UNITS_PER_WORD; | |
2070 | ||
2071 | return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size; | |
2072 | } | |
2073 | ||
2074 | /* Value is 1 if hard register REGNO can hold a value of machine-mode | |
2075 | MODE. */ | |
2076 | static int | |
b395382f | 2077 | rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode) |
01e91138 | 2078 | { |
2079 | int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1; | |
2080 | ||
2081 | if (COMPLEX_MODE_P (mode)) | |
2082 | mode = GET_MODE_INNER (mode); | |
2083 | ||
2084 | /* PTImode can only go in GPRs. Quad word memory operations require even/odd | |
2085 | register combinations, and use PTImode where we need to deal with quad | |
2086 | word memory operations. Don't allow quad words in the argument or frame | |
2087 | pointer registers, just registers 0..31. */ | |
2088 | if (mode == PTImode) | |
2089 | return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO) | |
2090 | && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO) | |
2091 | && ((regno & 1) == 0)); | |
2092 | ||
2093 | /* VSX registers that overlap the FPR registers are larger than for non-VSX | |
2094 | implementations. Don't allow an item to be split between a FP register | |
2095 | and an Altivec register. Allow TImode in all VSX registers if the user | |
2096 | asked for it. */ | |
2097 | if (TARGET_VSX && VSX_REGNO_P (regno) | |
2098 | && (VECTOR_MEM_VSX_P (mode) | |
2099 | || FLOAT128_VECTOR_P (mode) | |
2100 | || reg_addr[mode].scalar_in_vmx_p | |
2101 | || (TARGET_VSX_TIMODE && mode == TImode) | |
2102 | || (TARGET_VADDUQM && mode == V1TImode))) | |
2103 | { | |
2104 | if (FP_REGNO_P (regno)) | |
2105 | return FP_REGNO_P (last_regno); | |
2106 | ||
2107 | if (ALTIVEC_REGNO_P (regno)) | |
2108 | { | |
2109 | if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p) | |
2110 | return 0; | |
2111 | ||
2112 | return ALTIVEC_REGNO_P (last_regno); | |
2113 | } | |
2114 | } | |
2115 | ||
2116 | /* The GPRs can hold any mode, but values bigger than one register | |
2117 | cannot go past R31. */ | |
2118 | if (INT_REGNO_P (regno)) | |
2119 | return INT_REGNO_P (last_regno); | |
2120 | ||
2121 | /* The float registers (except for VSX vector modes) can only hold floating | |
2122 | modes and DImode. */ | |
2123 | if (FP_REGNO_P (regno)) | |
2124 | { | |
2125 | if (FLOAT128_VECTOR_P (mode)) | |
2126 | return false; | |
2127 | ||
2128 | if (SCALAR_FLOAT_MODE_P (mode) | |
2129 | && (mode != TDmode || (regno % 2) == 0) | |
2130 | && FP_REGNO_P (last_regno)) | |
2131 | return 1; | |
2132 | ||
2133 | if (GET_MODE_CLASS (mode) == MODE_INT) | |
2134 | { | |
2135 | if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD) | |
2136 | return 1; | |
2137 | ||
2138 | if (TARGET_VSX_SMALL_INTEGER) | |
2139 | { | |
2140 | if (mode == SImode) | |
2141 | return 1; | |
2142 | ||
2143 | if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode)) | |
2144 | return 1; | |
2145 | } | |
2146 | } | |
2147 | ||
2148 | if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT | |
2149 | && PAIRED_VECTOR_MODE (mode)) | |
2150 | return 1; | |
2151 | ||
2152 | return 0; | |
2153 | } | |
2154 | ||
2155 | /* The CR register can only hold CC modes. */ | |
2156 | if (CR_REGNO_P (regno)) | |
2157 | return GET_MODE_CLASS (mode) == MODE_CC; | |
2158 | ||
2159 | if (CA_REGNO_P (regno)) | |
2160 | return mode == Pmode || mode == SImode; | |
2161 | ||
2162 | /* AltiVec only in AldyVec registers. */ | |
2163 | if (ALTIVEC_REGNO_P (regno)) | |
2164 | return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) | |
2165 | || mode == V1TImode); | |
2166 | ||
2167 | /* ...but GPRs can hold SIMD data on the SPE in one register. */ | |
2168 | if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode)) | |
2169 | return 1; | |
2170 | ||
2171 | /* We cannot put non-VSX TImode or PTImode anywhere except general register | |
2172 | and it must be able to fit within the register set. */ | |
2173 | ||
2174 | return GET_MODE_SIZE (mode) <= UNITS_PER_WORD; | |
2175 | } | |
2176 | ||
b395382f | 2177 | /* Implement TARGET_HARD_REGNO_MODE_OK. */ |
2178 | ||
2179 | static bool | |
2180 | rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode) | |
2181 | { | |
2182 | return rs6000_hard_regno_mode_ok_p[mode][regno]; | |
2183 | } | |
2184 | ||
5f6dcf1a | 2185 | /* Implement TARGET_MODES_TIEABLE_P. |
2186 | ||
2187 | PTImode cannot tie with other modes because PTImode is restricted to even | |
2188 | GPR registers, and TImode can go in any GPR as well as VSX registers (PR | |
2189 | 57744). | |
2190 | ||
2191 | Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE | |
2192 | 128-bit floating point on VSX systems ties with other vectors. */ | |
2193 | ||
2194 | static bool | |
2195 | rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2) | |
2196 | { | |
2197 | if (mode1 == PTImode) | |
2198 | return mode2 == PTImode; | |
2199 | if (mode2 == PTImode) | |
2200 | return false; | |
2201 | ||
2202 | if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1)) | |
2203 | return ALTIVEC_OR_VSX_VECTOR_MODE (mode2); | |
2204 | if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2)) | |
2205 | return false; | |
2206 | ||
2207 | if (SCALAR_FLOAT_MODE_P (mode1)) | |
2208 | return SCALAR_FLOAT_MODE_P (mode2); | |
2209 | if (SCALAR_FLOAT_MODE_P (mode2)) | |
2210 | return false; | |
2211 | ||
2212 | if (GET_MODE_CLASS (mode1) == MODE_CC) | |
2213 | return GET_MODE_CLASS (mode2) == MODE_CC; | |
2214 | if (GET_MODE_CLASS (mode2) == MODE_CC) | |
2215 | return false; | |
2216 | ||
2217 | if (SPE_VECTOR_MODE (mode1)) | |
2218 | return SPE_VECTOR_MODE (mode2); | |
2219 | if (SPE_VECTOR_MODE (mode2)) | |
2220 | return false; | |
2221 | ||
2222 | return true; | |
2223 | } | |
2224 | ||
5da94e60 | 2225 | /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */ |
2226 | ||
2227 | static bool | |
2228 | rs6000_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode) | |
2229 | { | |
2230 | if (TARGET_32BIT | |
2231 | && TARGET_POWERPC64 | |
2232 | && GET_MODE_SIZE (mode) > 4 | |
2233 | && INT_REGNO_P (regno)) | |
2234 | return true; | |
2235 | ||
2236 | if (TARGET_VSX | |
2237 | && FP_REGNO_P (regno) | |
2238 | && GET_MODE_SIZE (mode) > 8 | |
2239 | && !FLOAT128_2REG_P (mode)) | |
2240 | return true; | |
2241 | ||
2242 | return false; | |
2243 | } | |
2244 | ||
01e91138 | 2245 | /* Print interesting facts about registers. */ |
2246 | static void | |
2247 | rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name) | |
2248 | { | |
2249 | int r, m; | |
2250 | ||
2251 | for (r = first_regno; r <= last_regno; ++r) | |
2252 | { | |
2253 | const char *comma = ""; | |
2254 | int len; | |
2255 | ||
2256 | if (first_regno == last_regno) | |
2257 | fprintf (stderr, "%s:\t", reg_name); | |
2258 | else | |
2259 | fprintf (stderr, "%s%d:\t", reg_name, r - first_regno); | |
2260 | ||
2261 | len = 8; | |
2262 | for (m = 0; m < NUM_MACHINE_MODES; ++m) | |
2263 | if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r]) | |
2264 | { | |
2265 | if (len > 70) | |
2266 | { | |
2267 | fprintf (stderr, ",\n\t"); | |
2268 | len = 8; | |
2269 | comma = ""; | |
2270 | } | |
2271 | ||
2272 | if (rs6000_hard_regno_nregs[m][r] > 1) | |
2273 | len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m), | |
2274 | rs6000_hard_regno_nregs[m][r]); | |
2275 | else | |
2276 | len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m)); | |
2277 | ||
2278 | comma = ", "; | |
2279 | } | |
2280 | ||
2281 | if (call_used_regs[r]) | |
2282 | { | |
2283 | if (len > 70) | |
2284 | { | |
2285 | fprintf (stderr, ",\n\t"); | |
2286 | len = 8; | |
2287 | comma = ""; | |
2288 | } | |
2289 | ||
2290 | len += fprintf (stderr, "%s%s", comma, "call-used"); | |
2291 | comma = ", "; | |
2292 | } | |
2293 | ||
2294 | if (fixed_regs[r]) | |
2295 | { | |
2296 | if (len > 70) | |
2297 | { | |
2298 | fprintf (stderr, ",\n\t"); | |
2299 | len = 8; | |
2300 | comma = ""; | |
2301 | } | |
2302 | ||
2303 | len += fprintf (stderr, "%s%s", comma, "fixed"); | |
2304 | comma = ", "; | |
2305 | } | |
2306 | ||
2307 | if (len > 70) | |
2308 | { | |
2309 | fprintf (stderr, ",\n\t"); | |
2310 | comma = ""; | |
2311 | } | |
2312 | ||
2313 | len += fprintf (stderr, "%sreg-class = %s", comma, | |
2314 | reg_class_names[(int)rs6000_regno_regclass[r]]); | |
2315 | comma = ", "; | |
2316 | ||
2317 | if (len > 70) | |
2318 | { | |
2319 | fprintf (stderr, ",\n\t"); | |
2320 | comma = ""; | |
2321 | } | |
2322 | ||
2323 | fprintf (stderr, "%sregno = %d\n", comma, r); | |
2324 | } | |
2325 | } | |
2326 | ||
2327 | static const char * | |
2328 | rs6000_debug_vector_unit (enum rs6000_vector v) | |
2329 | { | |
2330 | const char *ret; | |
2331 | ||
2332 | switch (v) | |
2333 | { | |
2334 | case VECTOR_NONE: ret = "none"; break; | |
2335 | case VECTOR_ALTIVEC: ret = "altivec"; break; | |
2336 | case VECTOR_VSX: ret = "vsx"; break; | |
2337 | case VECTOR_P8_VECTOR: ret = "p8_vector"; break; | |
2338 | case VECTOR_PAIRED: ret = "paired"; break; | |
2339 | case VECTOR_SPE: ret = "spe"; break; | |
2340 | case VECTOR_OTHER: ret = "other"; break; | |
2341 | default: ret = "unknown"; break; | |
2342 | } | |
2343 | ||
2344 | return ret; | |
2345 | } | |
2346 | ||
2347 | /* Inner function printing just the address mask for a particular reload | |
2348 | register class. */ | |
2349 | DEBUG_FUNCTION char * | |
2350 | rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces) | |
2351 | { | |
2352 | static char ret[8]; | |
2353 | char *p = ret; | |
2354 | ||
2355 | if ((mask & RELOAD_REG_VALID) != 0) | |
2356 | *p++ = 'v'; | |
2357 | else if (keep_spaces) | |
2358 | *p++ = ' '; | |
2359 | ||
2360 | if ((mask & RELOAD_REG_MULTIPLE) != 0) | |
2361 | *p++ = 'm'; | |
2362 | else if (keep_spaces) | |
2363 | *p++ = ' '; | |
2364 | ||
2365 | if ((mask & RELOAD_REG_INDEXED) != 0) | |
2366 | *p++ = 'i'; | |
2367 | else if (keep_spaces) | |
2368 | *p++ = ' '; | |
2369 | ||
2370 | if ((mask & RELOAD_REG_QUAD_OFFSET) != 0) | |
2371 | *p++ = 'O'; | |
2372 | else if ((mask & RELOAD_REG_OFFSET) != 0) | |
2373 | *p++ = 'o'; | |
2374 | else if (keep_spaces) | |
2375 | *p++ = ' '; | |
2376 | ||
2377 | if ((mask & RELOAD_REG_PRE_INCDEC) != 0) | |
2378 | *p++ = '+'; | |
2379 | else if (keep_spaces) | |
2380 | *p++ = ' '; | |
2381 | ||
2382 | if ((mask & RELOAD_REG_PRE_MODIFY) != 0) | |
2383 | *p++ = '+'; | |
2384 | else if (keep_spaces) | |
2385 | *p++ = ' '; | |
2386 | ||
2387 | if ((mask & RELOAD_REG_AND_M16) != 0) | |
2388 | *p++ = '&'; | |
2389 | else if (keep_spaces) | |
2390 | *p++ = ' '; | |
2391 | ||
2392 | *p = '\0'; | |
2393 | ||
2394 | return ret; | |
2395 | } | |
2396 | ||
2397 | /* Print the address masks in a human readble fashion. */ | |
2398 | DEBUG_FUNCTION void | |
2399 | rs6000_debug_print_mode (ssize_t m) | |
2400 | { | |
2401 | ssize_t rc; | |
2402 | int spaces = 0; | |
2403 | bool fuse_extra_p; | |
2404 | ||
2405 | fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m)); | |
2406 | for (rc = 0; rc < N_RELOAD_REG; rc++) | |
2407 | fprintf (stderr, " %s: %s", reload_reg_map[rc].name, | |
2408 | rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true)); | |
2409 | ||
2410 | if ((reg_addr[m].reload_store != CODE_FOR_nothing) | |
2411 | || (reg_addr[m].reload_load != CODE_FOR_nothing)) | |
2412 | fprintf (stderr, " Reload=%c%c", | |
2413 | (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*', | |
2414 | (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*'); | |
2415 | else | |
2416 | spaces += sizeof (" Reload=sl") - 1; | |
2417 | ||
2418 | if (reg_addr[m].scalar_in_vmx_p) | |
2419 | { | |
2420 | fprintf (stderr, "%*s Upper=y", spaces, ""); | |
2421 | spaces = 0; | |
2422 | } | |
2423 | else | |
2424 | spaces += sizeof (" Upper=y") - 1; | |
2425 | ||
2426 | fuse_extra_p = ((reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing) | |
2427 | || reg_addr[m].fused_toc); | |
2428 | if (!fuse_extra_p) | |
2429 | { | |
2430 | for (rc = 0; rc < N_RELOAD_REG; rc++) | |
2431 | { | |
2432 | if (rc != RELOAD_REG_ANY) | |
2433 | { | |
2434 | if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing | |
2435 | || reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing | |
2436 | || reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing | |
2437 | || reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing | |
2438 | || reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing) | |
2439 | { | |
2440 | fuse_extra_p = true; | |
2441 | break; | |
2442 | } | |
2443 | } | |
2444 | } | |
2445 | } | |
2446 | ||
2447 | if (fuse_extra_p) | |
2448 | { | |
2449 | fprintf (stderr, "%*s Fuse:", spaces, ""); | |
2450 | spaces = 0; | |
2451 | ||
2452 | for (rc = 0; rc < N_RELOAD_REG; rc++) | |
2453 | { | |
2454 | if (rc != RELOAD_REG_ANY) | |
2455 | { | |
2456 | char load, store; | |
2457 | ||
2458 | if (reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing) | |
2459 | load = 'l'; | |
2460 | else if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing) | |
2461 | load = 'L'; | |
2462 | else | |
2463 | load = '-'; | |
2464 | ||
2465 | if (reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing) | |
2466 | store = 's'; | |
2467 | else if (reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing) | |
2468 | store = 'S'; | |
2469 | else | |
2470 | store = '-'; | |
2471 | ||
2472 | if (load == '-' && store == '-') | |
2473 | spaces += 5; | |
2474 | else | |
2475 | { | |
2476 | fprintf (stderr, "%*s%c=%c%c", (spaces + 1), "", | |
2477 | reload_reg_map[rc].name[0], load, store); | |
2478 | spaces = 0; | |
2479 | } | |
2480 | } | |
2481 | } | |
2482 | ||
2483 | if (reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing) | |
2484 | { | |
2485 | fprintf (stderr, "%*sP8gpr", (spaces + 1), ""); | |
2486 | spaces = 0; | |
2487 | } | |
2488 | else | |
2489 | spaces += sizeof (" P8gpr") - 1; | |
2490 | ||
2491 | if (reg_addr[m].fused_toc) | |
2492 | { | |
2493 | fprintf (stderr, "%*sToc", (spaces + 1), ""); | |
2494 | spaces = 0; | |
2495 | } | |
2496 | else | |
2497 | spaces += sizeof (" Toc") - 1; | |
2498 | } | |
2499 | else | |
2500 | spaces += sizeof (" Fuse: G=ls F=ls v=ls P8gpr Toc") - 1; | |
2501 | ||
2502 | if (rs6000_vector_unit[m] != VECTOR_NONE | |
2503 | || rs6000_vector_mem[m] != VECTOR_NONE) | |
2504 | { | |
2505 | fprintf (stderr, "%*s vector: arith=%-10s mem=%s", | |
2506 | spaces, "", | |
2507 | rs6000_debug_vector_unit (rs6000_vector_unit[m]), | |
2508 | rs6000_debug_vector_unit (rs6000_vector_mem[m])); | |
2509 | } | |
2510 | ||
2511 | fputs ("\n", stderr); | |
2512 | } | |
2513 | ||
2514 | #define DEBUG_FMT_ID "%-32s= " | |
2515 | #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n" | |
2516 | #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: " | |
2517 | #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n" | |
2518 | ||
2519 | /* Print various interesting information with -mdebug=reg. */ | |
2520 | static void | |
2521 | rs6000_debug_reg_global (void) | |
2522 | { | |
2523 | static const char *const tf[2] = { "false", "true" }; | |
2524 | const char *nl = (const char *)0; | |
2525 | int m; | |
2526 | size_t m1, m2, v; | |
2527 | char costly_num[20]; | |
2528 | char nop_num[20]; | |
2529 | char flags_buffer[40]; | |
2530 | const char *costly_str; | |
2531 | const char *nop_str; | |
2532 | const char *trace_str; | |
2533 | const char *abi_str; | |
2534 | const char *cmodel_str; | |
2535 | struct cl_target_option cl_opts; | |
2536 | ||
2537 | /* Modes we want tieable information on. */ | |
2538 | static const machine_mode print_tieable_modes[] = { | |
2539 | QImode, | |
2540 | HImode, | |
2541 | SImode, | |
2542 | DImode, | |
2543 | TImode, | |
2544 | PTImode, | |
2545 | SFmode, | |
2546 | DFmode, | |
2547 | TFmode, | |
2548 | IFmode, | |
2549 | KFmode, | |
2550 | SDmode, | |
2551 | DDmode, | |
2552 | TDmode, | |
2553 | V8QImode, | |
2554 | V4HImode, | |
2555 | V2SImode, | |
2556 | V16QImode, | |
2557 | V8HImode, | |
2558 | V4SImode, | |
2559 | V2DImode, | |
2560 | V1TImode, | |
2561 | V32QImode, | |
2562 | V16HImode, | |
2563 | V8SImode, | |
2564 | V4DImode, | |
2565 | V2TImode, | |
2566 | V2SFmode, | |
2567 | V4SFmode, | |
2568 | V2DFmode, | |
2569 | V8SFmode, | |
2570 | V4DFmode, | |
2571 | CCmode, | |
2572 | CCUNSmode, | |
2573 | CCEQmode, | |
2574 | }; | |
2575 | ||
2576 | /* Virtual regs we are interested in. */ | |
2577 | const static struct { | |
2578 | int regno; /* register number. */ | |
2579 | const char *name; /* register name. */ | |
2580 | } virtual_regs[] = { | |
2581 | { STACK_POINTER_REGNUM, "stack pointer:" }, | |
2582 | { TOC_REGNUM, "toc: " }, | |
2583 | { STATIC_CHAIN_REGNUM, "static chain: " }, | |
2584 | { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " }, | |
2585 | { HARD_FRAME_POINTER_REGNUM, "hard frame: " }, | |
2586 | { ARG_POINTER_REGNUM, "arg pointer: " }, | |
2587 | { FRAME_POINTER_REGNUM, "frame pointer:" }, | |
2588 | { FIRST_PSEUDO_REGISTER, "first pseudo: " }, | |
2589 | { FIRST_VIRTUAL_REGISTER, "first virtual:" }, | |
2590 | { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" }, | |
2591 | { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " }, | |
2592 | { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" }, | |
2593 | { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" }, | |
2594 | { VIRTUAL_CFA_REGNUM, "cfa (frame): " }, | |
2595 | { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" }, | |
2596 | { LAST_VIRTUAL_REGISTER, "last virtual: " }, | |
2597 | }; | |
2598 | ||
2599 | fputs ("\nHard register information:\n", stderr); | |
2600 | rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr"); | |
2601 | rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp"); | |
2602 | rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO, | |
2603 | LAST_ALTIVEC_REGNO, | |
2604 | "vs"); | |
2605 | rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr"); | |
2606 | rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr"); | |
2607 | rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr"); | |
2608 | rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca"); | |
2609 | rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave"); | |
2610 | rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr"); | |
2611 | rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a"); | |
2612 | rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f"); | |
2613 | ||
2614 | fputs ("\nVirtual/stack/frame registers:\n", stderr); | |
2615 | for (v = 0; v < ARRAY_SIZE (virtual_regs); v++) | |
2616 | fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno); | |
2617 | ||
2618 | fprintf (stderr, | |
2619 | "\n" | |
2620 | "d reg_class = %s\n" | |
2621 | "f reg_class = %s\n" | |
2622 | "v reg_class = %s\n" | |
2623 | "wa reg_class = %s\n" | |
2624 | "wb reg_class = %s\n" | |
2625 | "wd reg_class = %s\n" | |
2626 | "we reg_class = %s\n" | |
2627 | "wf reg_class = %s\n" | |
2628 | "wg reg_class = %s\n" | |
2629 | "wh reg_class = %s\n" | |
2630 | "wi reg_class = %s\n" | |
2631 | "wj reg_class = %s\n" | |
2632 | "wk reg_class = %s\n" | |
2633 | "wl reg_class = %s\n" | |
2634 | "wm reg_class = %s\n" | |
2635 | "wo reg_class = %s\n" | |
2636 | "wp reg_class = %s\n" | |
2637 | "wq reg_class = %s\n" | |
2638 | "wr reg_class = %s\n" | |
2639 | "ws reg_class = %s\n" | |
2640 | "wt reg_class = %s\n" | |
2641 | "wu reg_class = %s\n" | |
2642 | "wv reg_class = %s\n" | |
2643 | "ww reg_class = %s\n" | |
2644 | "wx reg_class = %s\n" | |
2645 | "wy reg_class = %s\n" | |
2646 | "wz reg_class = %s\n" | |
2647 | "wA reg_class = %s\n" | |
2648 | "wH reg_class = %s\n" | |
2649 | "wI reg_class = %s\n" | |
2650 | "wJ reg_class = %s\n" | |
2651 | "wK reg_class = %s\n" | |
2652 | "\n", | |
2653 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]], | |
2654 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]], | |
2655 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]], | |
2656 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]], | |
2657 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wb]], | |
2658 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]], | |
2659 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]], | |
2660 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]], | |
2661 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]], | |
2662 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]], | |
2663 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]], | |
2664 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]], | |
2665 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]], | |
2666 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]], | |
2667 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]], | |
2668 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wo]], | |
2669 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]], | |
2670 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]], | |
2671 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]], | |
2672 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]], | |
2673 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]], | |
2674 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]], | |
2675 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]], | |
2676 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]], | |
2677 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]], | |
2678 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]], | |
2679 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]], | |
2680 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]], | |
2681 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wH]], | |
2682 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wI]], | |
2683 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wJ]], | |
2684 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wK]]); | |
2685 | ||
2686 | nl = "\n"; | |
2687 | for (m = 0; m < NUM_MACHINE_MODES; ++m) | |
2688 | rs6000_debug_print_mode (m); | |
2689 | ||
2690 | fputs ("\n", stderr); | |
2691 | ||
2692 | for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++) | |
2693 | { | |
2694 | machine_mode mode1 = print_tieable_modes[m1]; | |
2695 | bool first_time = true; | |
2696 | ||
2697 | nl = (const char *)0; | |
2698 | for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++) | |
2699 | { | |
2700 | machine_mode mode2 = print_tieable_modes[m2]; | |
5f6dcf1a | 2701 | if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2)) |
01e91138 | 2702 | { |
2703 | if (first_time) | |
2704 | { | |
2705 | fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1)); | |
2706 | nl = "\n"; | |
2707 | first_time = false; | |
2708 | } | |
2709 | ||
2710 | fprintf (stderr, " %s", GET_MODE_NAME (mode2)); | |
2711 | } | |
2712 | } | |
2713 | ||
2714 | if (!first_time) | |
2715 | fputs ("\n", stderr); | |
2716 | } | |
2717 | ||
2718 | if (nl) | |
2719 | fputs (nl, stderr); | |
2720 | ||
2721 | if (rs6000_recip_control) | |
2722 | { | |
2723 | fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control); | |
2724 | ||
2725 | for (m = 0; m < NUM_MACHINE_MODES; ++m) | |
2726 | if (rs6000_recip_bits[m]) | |
2727 | { | |
2728 | fprintf (stderr, | |
2729 | "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n", | |
2730 | GET_MODE_NAME (m), | |
2731 | (RS6000_RECIP_AUTO_RE_P (m) | |
2732 | ? "auto" | |
2733 | : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")), | |
2734 | (RS6000_RECIP_AUTO_RSQRTE_P (m) | |
2735 | ? "auto" | |
2736 | : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none"))); | |
2737 | } | |
2738 | ||
2739 | fputs ("\n", stderr); | |
2740 | } | |
2741 | ||
2742 | if (rs6000_cpu_index >= 0) | |
2743 | { | |
2744 | const char *name = processor_target_table[rs6000_cpu_index].name; | |
2745 | HOST_WIDE_INT flags | |
2746 | = processor_target_table[rs6000_cpu_index].target_enable; | |
2747 | ||
2748 | sprintf (flags_buffer, "-mcpu=%s flags", name); | |
2749 | rs6000_print_isa_options (stderr, 0, flags_buffer, flags); | |
2750 | } | |
2751 | else | |
2752 | fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>"); | |
2753 | ||
2754 | if (rs6000_tune_index >= 0) | |
2755 | { | |
2756 | const char *name = processor_target_table[rs6000_tune_index].name; | |
2757 | HOST_WIDE_INT flags | |
2758 | = processor_target_table[rs6000_tune_index].target_enable; | |
2759 | ||
2760 | sprintf (flags_buffer, "-mtune=%s flags", name); | |
2761 | rs6000_print_isa_options (stderr, 0, flags_buffer, flags); | |
2762 | } | |
2763 | else | |
2764 | fprintf (stderr, DEBUG_FMT_S, "tune", "<none>"); | |
2765 | ||
2766 | cl_target_option_save (&cl_opts, &global_options); | |
2767 | rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags", | |
2768 | rs6000_isa_flags); | |
2769 | ||
2770 | rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit", | |
2771 | rs6000_isa_flags_explicit); | |
2772 | ||
2773 | rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask", | |
2774 | rs6000_builtin_mask); | |
2775 | ||
2776 | rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT); | |
2777 | ||
2778 | fprintf (stderr, DEBUG_FMT_S, "--with-cpu default", | |
2779 | OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>"); | |
2780 | ||
2781 | switch (rs6000_sched_costly_dep) | |
2782 | { | |
2783 | case max_dep_latency: | |
2784 | costly_str = "max_dep_latency"; | |
2785 | break; | |
2786 | ||
2787 | case no_dep_costly: | |
2788 | costly_str = "no_dep_costly"; | |
2789 | break; | |
2790 | ||
2791 | case all_deps_costly: | |
2792 | costly_str = "all_deps_costly"; | |
2793 | break; | |
2794 | ||
2795 | case true_store_to_load_dep_costly: | |
2796 | costly_str = "true_store_to_load_dep_costly"; | |
2797 | break; | |
2798 | ||
2799 | case store_to_load_dep_costly: | |
2800 | costly_str = "store_to_load_dep_costly"; | |
2801 | break; | |
2802 | ||
2803 | default: | |
2804 | costly_str = costly_num; | |
2805 | sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep); | |
2806 | break; | |
2807 | } | |
2808 | ||
2809 | fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str); | |
2810 | ||
2811 | switch (rs6000_sched_insert_nops) | |
2812 | { | |
2813 | case sched_finish_regroup_exact: | |
2814 | nop_str = "sched_finish_regroup_exact"; | |
2815 | break; | |
2816 | ||
2817 | case sched_finish_pad_groups: | |
2818 | nop_str = "sched_finish_pad_groups"; | |
2819 | break; | |
2820 | ||
2821 | case sched_finish_none: | |
2822 | nop_str = "sched_finish_none"; | |
2823 | break; | |
2824 | ||
2825 | default: | |
2826 | nop_str = nop_num; | |
2827 | sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops); | |
2828 | break; | |
2829 | } | |
2830 | ||
2831 | fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str); | |
2832 | ||
2833 | switch (rs6000_sdata) | |
2834 | { | |
2835 | default: | |
2836 | case SDATA_NONE: | |
2837 | break; | |
2838 | ||
2839 | case SDATA_DATA: | |
2840 | fprintf (stderr, DEBUG_FMT_S, "sdata", "data"); | |
2841 | break; | |
2842 | ||
2843 | case SDATA_SYSV: | |
2844 | fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv"); | |
2845 | break; | |
2846 | ||
2847 | case SDATA_EABI: | |
2848 | fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi"); | |
2849 | break; | |
2850 | ||
2851 | } | |
2852 | ||
2853 | switch (rs6000_traceback) | |
2854 | { | |
2855 | case traceback_default: trace_str = "default"; break; | |
2856 | case traceback_none: trace_str = "none"; break; | |
2857 | case traceback_part: trace_str = "part"; break; | |
2858 | case traceback_full: trace_str = "full"; break; | |
2859 | default: trace_str = "unknown"; break; | |
2860 | } | |
2861 | ||
2862 | fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str); | |
2863 | ||
2864 | switch (rs6000_current_cmodel) | |
2865 | { | |
2866 | case CMODEL_SMALL: cmodel_str = "small"; break; | |
2867 | case CMODEL_MEDIUM: cmodel_str = "medium"; break; | |
2868 | case CMODEL_LARGE: cmodel_str = "large"; break; | |
2869 | default: cmodel_str = "unknown"; break; | |
2870 | } | |
2871 | ||
2872 | fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str); | |
2873 | ||
2874 | switch (rs6000_current_abi) | |
2875 | { | |
2876 | case ABI_NONE: abi_str = "none"; break; | |
2877 | case ABI_AIX: abi_str = "aix"; break; | |
2878 | case ABI_ELFv2: abi_str = "ELFv2"; break; | |
2879 | case ABI_V4: abi_str = "V4"; break; | |
2880 | case ABI_DARWIN: abi_str = "darwin"; break; | |
2881 | default: abi_str = "unknown"; break; | |
2882 | } | |
2883 | ||
2884 | fprintf (stderr, DEBUG_FMT_S, "abi", abi_str); | |
2885 | ||
2886 | if (rs6000_altivec_abi) | |
2887 | fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true"); | |
2888 | ||
2889 | if (rs6000_spe_abi) | |
2890 | fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true"); | |
2891 | ||
2892 | if (rs6000_darwin64_abi) | |
2893 | fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true"); | |
2894 | ||
2895 | if (rs6000_float_gprs) | |
2896 | fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true"); | |
2897 | ||
2898 | fprintf (stderr, DEBUG_FMT_S, "fprs", | |
2899 | (TARGET_FPRS ? "true" : "false")); | |
2900 | ||
2901 | fprintf (stderr, DEBUG_FMT_S, "single_float", | |
2902 | (TARGET_SINGLE_FLOAT ? "true" : "false")); | |
2903 | ||
2904 | fprintf (stderr, DEBUG_FMT_S, "double_float", | |
2905 | (TARGET_DOUBLE_FLOAT ? "true" : "false")); | |
2906 | ||
2907 | fprintf (stderr, DEBUG_FMT_S, "soft_float", | |
2908 | (TARGET_SOFT_FLOAT ? "true" : "false")); | |
2909 | ||
2910 | fprintf (stderr, DEBUG_FMT_S, "e500_single", | |
2911 | (TARGET_E500_SINGLE ? "true" : "false")); | |
2912 | ||
2913 | fprintf (stderr, DEBUG_FMT_S, "e500_double", | |
2914 | (TARGET_E500_DOUBLE ? "true" : "false")); | |
2915 | ||
2916 | if (TARGET_LINK_STACK) | |
2917 | fprintf (stderr, DEBUG_FMT_S, "link_stack", "true"); | |
2918 | ||
2919 | fprintf (stderr, DEBUG_FMT_S, "lra", TARGET_LRA ? "true" : "false"); | |
2920 | ||
2921 | if (TARGET_P8_FUSION) | |
2922 | { | |
2923 | char options[80]; | |
2924 | ||
2925 | strcpy (options, (TARGET_P9_FUSION) ? "power9" : "power8"); | |
2926 | if (TARGET_TOC_FUSION) | |
2927 | strcat (options, ", toc"); | |
2928 | ||
2929 | if (TARGET_P8_FUSION_SIGN) | |
2930 | strcat (options, ", sign"); | |
2931 | ||
2932 | fprintf (stderr, DEBUG_FMT_S, "fusion", options); | |
2933 | } | |
2934 | ||
2935 | fprintf (stderr, DEBUG_FMT_S, "plt-format", | |
2936 | TARGET_SECURE_PLT ? "secure" : "bss"); | |
2937 | fprintf (stderr, DEBUG_FMT_S, "struct-return", | |
2938 | aix_struct_return ? "aix" : "sysv"); | |
2939 | fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]); | |
2940 | fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]); | |
2941 | fprintf (stderr, DEBUG_FMT_S, "align_branch", | |
2942 | tf[!!rs6000_align_branch_targets]); | |
2943 | fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size); | |
2944 | fprintf (stderr, DEBUG_FMT_D, "long_double_size", | |
2945 | rs6000_long_double_type_size); | |
2946 | fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority", | |
2947 | (int)rs6000_sched_restricted_insns_priority); | |
2948 | fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins", | |
2949 | (int)END_BUILTINS); | |
2950 | fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins", | |
2951 | (int)RS6000_BUILTIN_COUNT); | |
2952 | ||
2953 | fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX", | |
2954 | (int)TARGET_FLOAT128_ENABLE_TYPE); | |
2955 | ||
2956 | if (TARGET_VSX) | |
2957 | fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element", | |
2958 | (int)VECTOR_ELEMENT_SCALAR_64BIT); | |
2959 | ||
2960 | if (TARGET_DIRECT_MOVE_128) | |
2961 | fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element", | |
2962 | (int)VECTOR_ELEMENT_MFVSRLD_64BIT); | |
2963 | } | |
2964 | ||
2965 | \f | |
2966 | /* Update the addr mask bits in reg_addr to help secondary reload and go if | |
2967 | legitimate address support to figure out the appropriate addressing to | |
2968 | use. */ | |
2969 | ||
2970 | static void | |
2971 | rs6000_setup_reg_addr_masks (void) | |
2972 | { | |
2973 | ssize_t rc, reg, m, nregs; | |
2974 | addr_mask_type any_addr_mask, addr_mask; | |
2975 | ||
2976 | for (m = 0; m < NUM_MACHINE_MODES; ++m) | |
2977 | { | |
2978 | machine_mode m2 = (machine_mode) m; | |
2979 | bool complex_p = false; | |
2980 | bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode); | |
2981 | size_t msize; | |
2982 | ||
2983 | if (COMPLEX_MODE_P (m2)) | |
2984 | { | |
2985 | complex_p = true; | |
2986 | m2 = GET_MODE_INNER (m2); | |
2987 | } | |
2988 | ||
2989 | msize = GET_MODE_SIZE (m2); | |
2990 | ||
2991 | /* SDmode is special in that we want to access it only via REG+REG | |
2992 | addressing on power7 and above, since we want to use the LFIWZX and | |
2993 | STFIWZX instructions to load it. */ | |
2994 | bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK); | |
2995 | ||
2996 | any_addr_mask = 0; | |
2997 | for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++) | |
2998 | { | |
2999 | addr_mask = 0; | |
3000 | reg = reload_reg_map[rc].reg; | |
3001 | ||
3002 | /* Can mode values go in the GPR/FPR/Altivec registers? */ | |
3003 | if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg]) | |
3004 | { | |
3005 | bool small_int_vsx_p = (small_int_p | |
3006 | && (rc == RELOAD_REG_FPR | |
3007 | || rc == RELOAD_REG_VMX)); | |
3008 | ||
3009 | nregs = rs6000_hard_regno_nregs[m][reg]; | |
3010 | addr_mask |= RELOAD_REG_VALID; | |
3011 | ||
3012 | /* Indicate if the mode takes more than 1 physical register. If | |
3013 | it takes a single register, indicate it can do REG+REG | |
3014 | addressing. Small integers in VSX registers can only do | |
3015 | REG+REG addressing. */ | |
3016 | if (small_int_vsx_p) | |
3017 | addr_mask |= RELOAD_REG_INDEXED; | |
3018 | else if (nregs > 1 || m == BLKmode || complex_p) | |
3019 | addr_mask |= RELOAD_REG_MULTIPLE; | |
3020 | else | |
3021 | addr_mask |= RELOAD_REG_INDEXED; | |
3022 | ||
3023 | /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY | |
3024 | addressing. Restrict addressing on SPE for 64-bit types | |
3025 | because of the SUBREG hackery used to address 64-bit floats in | |
3026 | '32-bit' GPRs. If we allow scalars into Altivec registers, | |
3027 | don't allow PRE_INC, PRE_DEC, or PRE_MODIFY. */ | |
3028 | ||
3029 | if (TARGET_UPDATE | |
3030 | && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR) | |
3031 | && msize <= 8 | |
3032 | && !VECTOR_MODE_P (m2) | |
3033 | && !FLOAT128_VECTOR_P (m2) | |
3034 | && !complex_p | |
3035 | && !small_int_vsx_p | |
3036 | && (m2 != DFmode || !TARGET_UPPER_REGS_DF) | |
3037 | && (m2 != SFmode || !TARGET_UPPER_REGS_SF) | |
3038 | && !(TARGET_E500_DOUBLE && msize == 8)) | |
3039 | { | |
3040 | addr_mask |= RELOAD_REG_PRE_INCDEC; | |
3041 | ||
3042 | /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that | |
3043 | we don't allow PRE_MODIFY for some multi-register | |
3044 | operations. */ | |
3045 | switch (m) | |
3046 | { | |
3047 | default: | |
3048 | addr_mask |= RELOAD_REG_PRE_MODIFY; | |
3049 | break; | |
3050 | ||
916ace94 | 3051 | case E_DImode: |
01e91138 | 3052 | if (TARGET_POWERPC64) |
3053 | addr_mask |= RELOAD_REG_PRE_MODIFY; | |
3054 | break; | |
3055 | ||
916ace94 | 3056 | case E_DFmode: |
3057 | case E_DDmode: | |
01e91138 | 3058 | if (TARGET_DF_INSN) |
3059 | addr_mask |= RELOAD_REG_PRE_MODIFY; | |
3060 | break; | |
3061 | } | |
3062 | } | |
3063 | } | |
3064 | ||
3065 | /* GPR and FPR registers can do REG+OFFSET addressing, except | |
3066 | possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing | |
3067 | for 64-bit scalars and 32-bit SFmode to altivec registers. */ | |
3068 | if ((addr_mask != 0) && !indexed_only_p | |
3069 | && msize <= 8 | |
3070 | && (rc == RELOAD_REG_GPR | |
3071 | || ((msize == 8 || m2 == SFmode) | |
3072 | && (rc == RELOAD_REG_FPR | |
3073 | || (rc == RELOAD_REG_VMX | |
3074 | && TARGET_P9_DFORM_SCALAR))))) | |
3075 | addr_mask |= RELOAD_REG_OFFSET; | |
3076 | ||
3077 | /* VSX registers can do REG+OFFSET addresssing if ISA 3.0 | |
3078 | instructions are enabled. The offset for 128-bit VSX registers is | |
3079 | only 12-bits. While GPRs can handle the full offset range, VSX | |
3080 | registers can only handle the restricted range. */ | |
3081 | else if ((addr_mask != 0) && !indexed_only_p | |
3082 | && msize == 16 && TARGET_P9_DFORM_VECTOR | |
3083 | && (ALTIVEC_OR_VSX_VECTOR_MODE (m2) | |
3084 | || (m2 == TImode && TARGET_VSX_TIMODE))) | |
3085 | { | |
3086 | addr_mask |= RELOAD_REG_OFFSET; | |
3087 | if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX) | |
3088 | addr_mask |= RELOAD_REG_QUAD_OFFSET; | |
3089 | } | |
3090 | ||
3091 | /* VMX registers can do (REG & -16) and ((REG+REG) & -16) | |
3092 | addressing on 128-bit types. */ | |
3093 | if (rc == RELOAD_REG_VMX && msize == 16 | |
3094 | && (addr_mask & RELOAD_REG_VALID) != 0) | |
3095 | addr_mask |= RELOAD_REG_AND_M16; | |
3096 | ||
3097 | reg_addr[m].addr_mask[rc] = addr_mask; | |
3098 | any_addr_mask |= addr_mask; | |
3099 | } | |
3100 | ||
3101 | reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask; | |
3102 | } | |
3103 | } | |
3104 | ||
3105 | \f | |
3106 | /* Initialize the various global tables that are based on register size. */ | |
3107 | static void | |
3108 | rs6000_init_hard_regno_mode_ok (bool global_init_p) | |
3109 | { | |
3110 | ssize_t r, m, c; | |
3111 | int align64; | |
3112 | int align32; | |
3113 | ||
3114 | /* Precalculate REGNO_REG_CLASS. */ | |
3115 | rs6000_regno_regclass[0] = GENERAL_REGS; | |
3116 | for (r = 1; r < 32; ++r) | |
3117 | rs6000_regno_regclass[r] = BASE_REGS; | |
3118 | ||
3119 | for (r = 32; r < 64; ++r) | |
3120 | rs6000_regno_regclass[r] = FLOAT_REGS; | |
3121 | ||
3122 | for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r) | |
3123 | rs6000_regno_regclass[r] = NO_REGS; | |
3124 | ||
3125 | for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r) | |
3126 | rs6000_regno_regclass[r] = ALTIVEC_REGS; | |
3127 | ||
3128 | rs6000_regno_regclass[CR0_REGNO] = CR0_REGS; | |
3129 | for (r = CR1_REGNO; r <= CR7_REGNO; ++r) | |
3130 | rs6000_regno_regclass[r] = CR_REGS; | |
3131 | ||
3132 | rs6000_regno_regclass[LR_REGNO] = LINK_REGS; | |
3133 | rs6000_regno_regclass[CTR_REGNO] = CTR_REGS; | |
3134 | rs6000_regno_regclass[CA_REGNO] = NO_REGS; | |
3135 | rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS; | |
3136 | rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS; | |
3137 | rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS; | |
3138 | rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS; | |
3139 | rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS; | |
3140 | rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS; | |
3141 | rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS; | |
3142 | rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS; | |
3143 | rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS; | |
3144 | ||
3145 | /* Precalculate register class to simpler reload register class. We don't | |
3146 | need all of the register classes that are combinations of different | |
3147 | classes, just the simple ones that have constraint letters. */ | |
3148 | for (c = 0; c < N_REG_CLASSES; c++) | |
3149 | reg_class_to_reg_type[c] = NO_REG_TYPE; | |
3150 | ||
3151 | reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE; | |
3152 | reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE; | |
3153 | reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE; | |
3154 | reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE; | |
3155 | reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE; | |
3156 | reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE; | |
3157 | reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE; | |
3158 | reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE; | |
3159 | reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE; | |
3160 | reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE; | |
3161 | reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE; | |
3162 | reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE; | |
3163 | ||
3164 | if (TARGET_VSX) | |
3165 | { | |
3166 | reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE; | |
3167 | reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE; | |
3168 | } | |
3169 | else | |
3170 | { | |
3171 | reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE; | |
3172 | reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE; | |
3173 | } | |
3174 | ||
3175 | /* Precalculate the valid memory formats as well as the vector information, | |
3176 | this must be set up before the rs6000_hard_regno_nregs_internal calls | |
3177 | below. */ | |
3178 | gcc_assert ((int)VECTOR_NONE == 0); | |
3179 | memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit)); | |
3180 | memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit)); | |
3181 | ||
3182 | gcc_assert ((int)CODE_FOR_nothing == 0); | |
3183 | memset ((void *) ®_addr[0], '\0', sizeof (reg_addr)); | |
3184 | ||
3185 | gcc_assert ((int)NO_REGS == 0); | |
3186 | memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints)); | |
3187 | ||
3188 | /* The VSX hardware allows native alignment for vectors, but control whether the compiler | |
3189 | believes it can use native alignment or still uses 128-bit alignment. */ | |
3190 | if (TARGET_VSX && !TARGET_VSX_ALIGN_128) | |
3191 | { | |
3192 | align64 = 64; | |
3193 | align32 = 32; | |
3194 | } | |
3195 | else | |
3196 | { | |
3197 | align64 = 128; | |
3198 | align32 = 128; | |
3199 | } | |
3200 | ||
3201 | /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so | |
3202 | only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */ | |
3203 | if (TARGET_FLOAT128_TYPE) | |
3204 | { | |
3205 | rs6000_vector_mem[KFmode] = VECTOR_VSX; | |
3206 | rs6000_vector_align[KFmode] = 128; | |
3207 | ||
3208 | if (FLOAT128_IEEE_P (TFmode)) | |
3209 | { | |
3210 | rs6000_vector_mem[TFmode] = VECTOR_VSX; | |
3211 | rs6000_vector_align[TFmode] = 128; | |
3212 | } | |
3213 | } | |
3214 | ||
3215 | /* V2DF mode, VSX only. */ | |
3216 | if (TARGET_VSX) | |
3217 | { | |
3218 | rs6000_vector_unit[V2DFmode] = VECTOR_VSX; | |
3219 | rs6000_vector_mem[V2DFmode] = VECTOR_VSX; | |
3220 | rs6000_vector_align[V2DFmode] = align64; | |
3221 | } | |
3222 | ||
3223 | /* V4SF mode, either VSX or Altivec. */ | |
3224 | if (TARGET_VSX) | |
3225 | { | |
3226 | rs6000_vector_unit[V4SFmode] = VECTOR_VSX; | |
3227 | rs6000_vector_mem[V4SFmode] = VECTOR_VSX; | |
3228 | rs6000_vector_align[V4SFmode] = align32; | |
3229 | } | |
3230 | else if (TARGET_ALTIVEC) | |
3231 | { | |
3232 | rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC; | |
3233 | rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC; | |
3234 | rs6000_vector_align[V4SFmode] = align32; | |
3235 | } | |
3236 | ||
3237 | /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads | |
3238 | and stores. */ | |
3239 | if (TARGET_ALTIVEC) | |
3240 | { | |
3241 | rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC; | |
3242 | rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC; | |
3243 | rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC; | |
3244 | rs6000_vector_align[V4SImode] = align32; | |
3245 | rs6000_vector_align[V8HImode] = align32; | |
3246 | rs6000_vector_align[V16QImode] = align32; | |
3247 | ||
3248 | if (TARGET_VSX) | |
3249 | { | |
3250 | rs6000_vector_mem[V4SImode] = VECTOR_VSX; | |
3251 | rs6000_vector_mem[V8HImode] = VECTOR_VSX; | |
3252 | rs6000_vector_mem[V16QImode] = VECTOR_VSX; | |
3253 | } | |
3254 | else | |
3255 | { | |
3256 | rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC; | |
3257 | rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC; | |
3258 | rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC; | |
3259 | } | |
3260 | } | |
3261 | ||
3262 | /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to | |
3263 | do insert/splat/extract. Altivec doesn't have 64-bit integer support. */ | |
3264 | if (TARGET_VSX) | |
3265 | { | |
3266 | rs6000_vector_mem[V2DImode] = VECTOR_VSX; | |
3267 | rs6000_vector_unit[V2DImode] | |
3268 | = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE; | |
3269 | rs6000_vector_align[V2DImode] = align64; | |
3270 | ||
3271 | rs6000_vector_mem[V1TImode] = VECTOR_VSX; | |
3272 | rs6000_vector_unit[V1TImode] | |
3273 | = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE; | |
3274 | rs6000_vector_align[V1TImode] = 128; | |
3275 | } | |
3276 | ||
3277 | /* DFmode, see if we want to use the VSX unit. Memory is handled | |
3278 | differently, so don't set rs6000_vector_mem. */ | |
3279 | if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE) | |
3280 | { | |
3281 | rs6000_vector_unit[DFmode] = VECTOR_VSX; | |
3282 | rs6000_vector_align[DFmode] = 64; | |
3283 | } | |
3284 | ||
3285 | /* SFmode, see if we want to use the VSX unit. */ | |
3286 | if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT) | |
3287 | { | |
3288 | rs6000_vector_unit[SFmode] = VECTOR_VSX; | |
3289 | rs6000_vector_align[SFmode] = 32; | |
3290 | } | |
3291 | ||
3292 | /* Allow TImode in VSX register and set the VSX memory macros. */ | |
3293 | if (TARGET_VSX && TARGET_VSX_TIMODE) | |
3294 | { | |
3295 | rs6000_vector_mem[TImode] = VECTOR_VSX; | |
3296 | rs6000_vector_align[TImode] = align64; | |
3297 | } | |
3298 | ||
3299 | /* TODO add SPE and paired floating point vector support. */ | |
3300 | ||
3301 | /* Register class constraints for the constraints that depend on compile | |
3302 | switches. When the VSX code was added, different constraints were added | |
3303 | based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all | |
3304 | of the VSX registers are used. The register classes for scalar floating | |
3305 | point types is set, based on whether we allow that type into the upper | |
3306 | (Altivec) registers. GCC has register classes to target the Altivec | |
3307 | registers for load/store operations, to select using a VSX memory | |
3308 | operation instead of the traditional floating point operation. The | |
3309 | constraints are: | |
3310 | ||
3311 | d - Register class to use with traditional DFmode instructions. | |
3312 | f - Register class to use with traditional SFmode instructions. | |
3313 | v - Altivec register. | |
3314 | wa - Any VSX register. | |
3315 | wc - Reserved to represent individual CR bits (used in LLVM). | |
3316 | wd - Preferred register class for V2DFmode. | |
3317 | wf - Preferred register class for V4SFmode. | |
3318 | wg - Float register for power6x move insns. | |
3319 | wh - FP register for direct move instructions. | |
3320 | wi - FP or VSX register to hold 64-bit integers for VSX insns. | |
3321 | wj - FP or VSX register to hold 64-bit integers for direct moves. | |
3322 | wk - FP or VSX register to hold 64-bit doubles for direct moves. | |
3323 | wl - Float register if we can do 32-bit signed int loads. | |
3324 | wm - VSX register for ISA 2.07 direct move operations. | |
3325 | wn - always NO_REGS. | |
3326 | wr - GPR if 64-bit mode is permitted. | |
3327 | ws - Register class to do ISA 2.06 DF operations. | |
3328 | wt - VSX register for TImode in VSX registers. | |
3329 | wu - Altivec register for ISA 2.07 VSX SF/SI load/stores. | |
3330 | wv - Altivec register for ISA 2.06 VSX DF/DI load/stores. | |
3331 | ww - Register class to do SF conversions in with VSX operations. | |
3332 | wx - Float register if we can do 32-bit int stores. | |
3333 | wy - Register class to do ISA 2.07 SF operations. | |
3334 | wz - Float register if we can do 32-bit unsigned int loads. | |
3335 | wH - Altivec register if SImode is allowed in VSX registers. | |
3336 | wI - VSX register if SImode is allowed in VSX registers. | |
3337 | wJ - VSX register if QImode/HImode are allowed in VSX registers. | |
3338 | wK - Altivec register if QImode/HImode are allowed in VSX registers. */ | |
3339 | ||
3340 | if (TARGET_HARD_FLOAT && TARGET_FPRS) | |
3341 | rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */ | |
3342 | ||
3343 | if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) | |
3344 | rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */ | |
3345 | ||
3346 | if (TARGET_VSX) | |
3347 | { | |
3348 | rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS; | |
3349 | rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */ | |
3350 | rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */ | |
3351 | ||
3352 | if (TARGET_VSX_TIMODE) | |
3353 | rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */ | |
3354 | ||
3355 | if (TARGET_UPPER_REGS_DF) /* DFmode */ | |
3356 | { | |
3357 | rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS; | |
3358 | rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS; | |
3359 | } | |
3360 | else | |
3361 | rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS; | |
3362 | ||
3363 | if (TARGET_UPPER_REGS_DI) /* DImode */ | |
3364 | rs6000_constraints[RS6000_CONSTRAINT_wi] = VSX_REGS; | |
3365 | else | |
3366 | rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS; | |
3367 | } | |
3368 | ||
3369 | /* Add conditional constraints based on various options, to allow us to | |
3370 | collapse multiple insn patterns. */ | |
3371 | if (TARGET_ALTIVEC) | |
3372 | rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS; | |
3373 | ||
3374 | if (TARGET_MFPGPR) /* DFmode */ | |
3375 | rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS; | |
3376 | ||
3377 | if (TARGET_LFIWAX) | |
3378 | rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */ | |
3379 | ||
3380 | if (TARGET_DIRECT_MOVE) | |
3381 | { | |
3382 | rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS; | |
3383 | rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */ | |
3384 | = rs6000_constraints[RS6000_CONSTRAINT_wi]; | |
3385 | rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */ | |
3386 | = rs6000_constraints[RS6000_CONSTRAINT_ws]; | |
3387 | rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS; | |
3388 | } | |
3389 | ||
3390 | if (TARGET_POWERPC64) | |
3391 | { | |
3392 | rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS; | |
3393 | rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS; | |
3394 | } | |
3395 | ||
3396 | if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) /* SFmode */ | |
3397 | { | |
3398 | rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS; | |
3399 | rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS; | |
3400 | rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS; | |
3401 | } | |
3402 | else if (TARGET_P8_VECTOR) | |
3403 | { | |
3404 | rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS; | |
3405 | rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS; | |
3406 | } | |
3407 | else if (TARGET_VSX) | |
3408 | rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS; | |
3409 | ||
3410 | if (TARGET_STFIWX) | |
3411 | rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */ | |
3412 | ||
3413 | if (TARGET_LFIWZX) | |
3414 | rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */ | |
3415 | ||
3416 | if (TARGET_FLOAT128_TYPE) | |
3417 | { | |
3418 | rs6000_constraints[RS6000_CONSTRAINT_wq] = VSX_REGS; /* KFmode */ | |
3419 | if (FLOAT128_IEEE_P (TFmode)) | |
3420 | rs6000_constraints[RS6000_CONSTRAINT_wp] = VSX_REGS; /* TFmode */ | |
3421 | } | |
3422 | ||
3423 | /* Support for new D-form instructions. */ | |
3424 | if (TARGET_P9_DFORM_SCALAR) | |
3425 | rs6000_constraints[RS6000_CONSTRAINT_wb] = ALTIVEC_REGS; | |
3426 | ||
3427 | /* Support for ISA 3.0 (power9) vectors. */ | |
3428 | if (TARGET_P9_VECTOR) | |
3429 | rs6000_constraints[RS6000_CONSTRAINT_wo] = VSX_REGS; | |
3430 | ||
3431 | /* Support for new direct moves (ISA 3.0 + 64bit). */ | |
3432 | if (TARGET_DIRECT_MOVE_128) | |
3433 | rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS; | |
3434 | ||
3435 | /* Support small integers in VSX registers. */ | |
3436 | if (TARGET_VSX_SMALL_INTEGER) | |
3437 | { | |
3438 | rs6000_constraints[RS6000_CONSTRAINT_wH] = ALTIVEC_REGS; | |
3439 | rs6000_constraints[RS6000_CONSTRAINT_wI] = FLOAT_REGS; | |
3440 | if (TARGET_P9_VECTOR) | |
3441 | { | |
3442 | rs6000_constraints[RS6000_CONSTRAINT_wJ] = FLOAT_REGS; | |
3443 | rs6000_constraints[RS6000_CONSTRAINT_wK] = ALTIVEC_REGS; | |
3444 | } | |
3445 | } | |
3446 | ||
3447 | /* Set up the reload helper and direct move functions. */ | |
3448 | if (TARGET_VSX || TARGET_ALTIVEC) | |
3449 | { | |
3450 | if (TARGET_64BIT) | |
3451 | { | |
3452 | reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store; | |
3453 | reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load; | |
3454 | reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store; | |
3455 | reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load; | |
3456 | reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store; | |
3457 | reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load; | |
3458 | reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store; | |
3459 | reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load; | |
3460 | reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store; | |
3461 | reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load; | |
3462 | reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store; | |
3463 | reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load; | |
3464 | reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store; | |
3465 | reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load; | |
3466 | reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store; | |
3467 | reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load; | |
3468 | reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store; | |
3469 | reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load; | |
3470 | reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store; | |
3471 | reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load; | |
3472 | ||
3473 | if (FLOAT128_VECTOR_P (KFmode)) | |
3474 | { | |
3475 | reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store; | |
3476 | reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load; | |
3477 | } | |
3478 | ||
3479 | if (FLOAT128_VECTOR_P (TFmode)) | |
3480 | { | |
3481 | reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store; | |
3482 | reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load; | |
3483 | } | |
3484 | ||
3485 | /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are | |
3486 | available. */ | |
3487 | if (TARGET_NO_SDMODE_STACK) | |
3488 | { | |
3489 | reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store; | |
3490 | reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load; | |
3491 | } | |
3492 | ||
3493 | if (TARGET_VSX_TIMODE) | |
3494 | { | |
3495 | reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store; | |
3496 | reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load; | |
3497 | } | |
3498 | ||
3499 | if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128) | |
3500 | { | |
3501 | reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti; | |
3502 | reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti; | |
3503 | reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df; | |
3504 | reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di; | |
3505 | reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf; | |
3506 | reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si; | |
3507 | reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi; | |
3508 | reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi; | |
3509 | reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf; | |
3510 | ||
3511 | reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti; | |
3512 | reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti; | |
3513 | reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df; | |
3514 | reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di; | |
3515 | reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf; | |
3516 | reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si; | |
3517 | reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi; | |
3518 | reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi; | |
3519 | reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf; | |
3520 | ||
3521 | if (FLOAT128_VECTOR_P (KFmode)) | |
3522 | { | |
3523 | reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf; | |
3524 | reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf; | |
3525 | } | |
3526 | ||
3527 | if (FLOAT128_VECTOR_P (TFmode)) | |
3528 | { | |
3529 | reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf; | |
3530 | reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf; | |
3531 | } | |
3532 | } | |
3533 | } | |
3534 | else | |
3535 | { | |
3536 | reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store; | |
3537 | reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load; | |
3538 | reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store; | |
3539 | reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load; | |
3540 | reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store; | |
3541 | reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load; | |
3542 | reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store; | |
3543 | reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load; | |
3544 | reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store; | |
3545 | reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load; | |
3546 | reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store; | |
3547 | reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load; | |
3548 | reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store; | |
3549 | reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load; | |
3550 | reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store; | |
3551 | reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load; | |
3552 | reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store; | |
3553 | reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load; | |
3554 | reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store; | |
3555 | reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load; | |
3556 | ||
3557 | if (FLOAT128_VECTOR_P (KFmode)) | |
3558 | { | |
3559 | reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store; | |
3560 | reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load; | |
3561 | } | |
3562 | ||
3563 | if (FLOAT128_IEEE_P (TFmode)) | |
3564 | { | |
3565 | reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store; | |
3566 | reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load; | |
3567 | } | |
3568 | ||
3569 | /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are | |
3570 | available. */ | |
3571 | if (TARGET_NO_SDMODE_STACK) | |
3572 | { | |
3573 | reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store; | |
3574 | reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load; | |
3575 | } | |
3576 | ||
3577 | if (TARGET_VSX_TIMODE) | |
3578 | { | |
3579 | reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store; | |
3580 | reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load; | |
3581 | } | |
3582 | ||
3583 | if (TARGET_DIRECT_MOVE) | |
3584 | { | |
3585 | reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi; | |
3586 | reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd; | |
3587 | reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf; | |
3588 | } | |
3589 | } | |
3590 | ||
3591 | if (TARGET_UPPER_REGS_DF) | |
3592 | reg_addr[DFmode].scalar_in_vmx_p = true; | |
3593 | ||
3594 | if (TARGET_UPPER_REGS_DI) | |
3595 | reg_addr[DImode].scalar_in_vmx_p = true; | |
3596 | ||
3597 | if (TARGET_UPPER_REGS_SF) | |
3598 | reg_addr[SFmode].scalar_in_vmx_p = true; | |
3599 | ||
3600 | if (TARGET_VSX_SMALL_INTEGER) | |
3601 | { | |
3602 | reg_addr[SImode].scalar_in_vmx_p = true; | |
3603 | if (TARGET_P9_VECTOR) | |
3604 | { | |
3605 | reg_addr[HImode].scalar_in_vmx_p = true; | |
3606 | reg_addr[QImode].scalar_in_vmx_p = true; | |
3607 | } | |
3608 | } | |
3609 | } | |
3610 | ||
3611 | /* Setup the fusion operations. */ | |
3612 | if (TARGET_P8_FUSION) | |
3613 | { | |
3614 | reg_addr[QImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_qi; | |
3615 | reg_addr[HImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_hi; | |
3616 | reg_addr[SImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_si; | |
3617 | if (TARGET_64BIT) | |
3618 | reg_addr[DImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_di; | |
3619 | } | |
3620 | ||
3621 | if (TARGET_P9_FUSION) | |
3622 | { | |
3623 | struct fuse_insns { | |
3624 | enum machine_mode mode; /* mode of the fused type. */ | |
3625 | enum machine_mode pmode; /* pointer mode. */ | |
3626 | enum rs6000_reload_reg_type rtype; /* register type. */ | |
3627 | enum insn_code load; /* load insn. */ | |
3628 | enum insn_code store; /* store insn. */ | |
3629 | }; | |
3630 | ||
3631 | static const struct fuse_insns addis_insns[] = { | |
1e0295b9 | 3632 | { E_SFmode, E_DImode, RELOAD_REG_FPR, |
01e91138 | 3633 | CODE_FOR_fusion_vsx_di_sf_load, |
3634 | CODE_FOR_fusion_vsx_di_sf_store }, | |
3635 | ||
1e0295b9 | 3636 | { E_SFmode, E_SImode, RELOAD_REG_FPR, |
01e91138 | 3637 | CODE_FOR_fusion_vsx_si_sf_load, |
3638 | CODE_FOR_fusion_vsx_si_sf_store }, | |
3639 | ||
1e0295b9 | 3640 | { E_DFmode, E_DImode, RELOAD_REG_FPR, |
01e91138 | 3641 | CODE_FOR_fusion_vsx_di_df_load, |
3642 | CODE_FOR_fusion_vsx_di_df_store }, | |
3643 | ||
1e0295b9 | 3644 | { E_DFmode, E_SImode, RELOAD_REG_FPR, |
01e91138 | 3645 | CODE_FOR_fusion_vsx_si_df_load, |
3646 | CODE_FOR_fusion_vsx_si_df_store }, | |
3647 | ||
1e0295b9 | 3648 | { E_DImode, E_DImode, RELOAD_REG_FPR, |
01e91138 | 3649 | CODE_FOR_fusion_vsx_di_di_load, |
3650 | CODE_FOR_fusion_vsx_di_di_store }, | |
3651 | ||
1e0295b9 | 3652 | { E_DImode, E_SImode, RELOAD_REG_FPR, |
01e91138 | 3653 | CODE_FOR_fusion_vsx_si_di_load, |
3654 | CODE_FOR_fusion_vsx_si_di_store }, | |
3655 | ||
1e0295b9 | 3656 | { E_QImode, E_DImode, RELOAD_REG_GPR, |
01e91138 | 3657 | CODE_FOR_fusion_gpr_di_qi_load, |
3658 | CODE_FOR_fusion_gpr_di_qi_store }, | |
3659 | ||
1e0295b9 | 3660 | { E_QImode, E_SImode, RELOAD_REG_GPR, |
01e91138 | 3661 | CODE_FOR_fusion_gpr_si_qi_load, |
3662 | CODE_FOR_fusion_gpr_si_qi_store }, | |
3663 | ||
1e0295b9 | 3664 | { E_HImode, E_DImode, RELOAD_REG_GPR, |
01e91138 | 3665 | CODE_FOR_fusion_gpr_di_hi_load, |
3666 | CODE_FOR_fusion_gpr_di_hi_store }, | |
3667 | ||
1e0295b9 | 3668 | { E_HImode, E_SImode, RELOAD_REG_GPR, |
01e91138 | 3669 | CODE_FOR_fusion_gpr_si_hi_load, |
3670 | CODE_FOR_fusion_gpr_si_hi_store }, | |
3671 | ||
1e0295b9 | 3672 | { E_SImode, E_DImode, RELOAD_REG_GPR, |
01e91138 | 3673 | CODE_FOR_fusion_gpr_di_si_load, |
3674 | CODE_FOR_fusion_gpr_di_si_store }, | |
3675 | ||
1e0295b9 | 3676 | { E_SImode, E_SImode, RELOAD_REG_GPR, |
01e91138 | 3677 | CODE_FOR_fusion_gpr_si_si_load, |
3678 | CODE_FOR_fusion_gpr_si_si_store }, | |
3679 | ||
1e0295b9 | 3680 | { E_SFmode, E_DImode, RELOAD_REG_GPR, |
01e91138 | 3681 | CODE_FOR_fusion_gpr_di_sf_load, |
3682 | CODE_FOR_fusion_gpr_di_sf_store }, | |
3683 | ||
1e0295b9 | 3684 | { E_SFmode, E_SImode, RELOAD_REG_GPR, |
01e91138 | 3685 | CODE_FOR_fusion_gpr_si_sf_load, |
3686 | CODE_FOR_fusion_gpr_si_sf_store }, | |
3687 | ||
1e0295b9 | 3688 | { E_DImode, E_DImode, RELOAD_REG_GPR, |
01e91138 | 3689 | CODE_FOR_fusion_gpr_di_di_load, |
3690 | CODE_FOR_fusion_gpr_di_di_store }, | |
3691 | ||
1e0295b9 | 3692 | { E_DFmode, E_DImode, RELOAD_REG_GPR, |
01e91138 | 3693 | CODE_FOR_fusion_gpr_di_df_load, |
3694 | CODE_FOR_fusion_gpr_di_df_store }, | |
3695 | }; | |
3696 | ||
582adad1 | 3697 | machine_mode cur_pmode = Pmode; |
01e91138 | 3698 | size_t i; |
3699 | ||
3700 | for (i = 0; i < ARRAY_SIZE (addis_insns); i++) | |
3701 | { | |
582adad1 | 3702 | machine_mode xmode = addis_insns[i].mode; |
01e91138 | 3703 | enum rs6000_reload_reg_type rtype = addis_insns[i].rtype; |
3704 | ||
3705 | if (addis_insns[i].pmode != cur_pmode) | |
3706 | continue; | |
3707 | ||
3708 | if (rtype == RELOAD_REG_FPR | |
3709 | && (!TARGET_HARD_FLOAT || !TARGET_FPRS)) | |
3710 | continue; | |
3711 | ||
3712 | reg_addr[xmode].fusion_addis_ld[rtype] = addis_insns[i].load; | |
3713 | reg_addr[xmode].fusion_addis_st[rtype] = addis_insns[i].store; | |
3714 | ||
3715 | if (rtype == RELOAD_REG_FPR && TARGET_P9_DFORM_SCALAR) | |
3716 | { | |
3717 | reg_addr[xmode].fusion_addis_ld[RELOAD_REG_VMX] | |
3718 | = addis_insns[i].load; | |
3719 | reg_addr[xmode].fusion_addis_st[RELOAD_REG_VMX] | |
3720 | = addis_insns[i].store; | |
3721 | } | |
3722 | } | |
3723 | } | |
3724 | ||
3725 | /* Note which types we support fusing TOC setup plus memory insn. We only do | |
3726 | fused TOCs for medium/large code models. */ | |
3727 | if (TARGET_P8_FUSION && TARGET_TOC_FUSION && TARGET_POWERPC64 | |
3728 | && (TARGET_CMODEL != CMODEL_SMALL)) | |
3729 | { | |
3730 | reg_addr[QImode].fused_toc = true; | |
3731 | reg_addr[HImode].fused_toc = true; | |
3732 | reg_addr[SImode].fused_toc = true; | |
3733 | reg_addr[DImode].fused_toc = true; | |
3734 | if (TARGET_HARD_FLOAT && TARGET_FPRS) | |
3735 | { | |
3736 | if (TARGET_SINGLE_FLOAT) | |
3737 | reg_addr[SFmode].fused_toc = true; | |
3738 | if (TARGET_DOUBLE_FLOAT) | |
3739 | reg_addr[DFmode].fused_toc = true; | |
3740 | } | |
3741 | } | |
3742 | ||
3743 | /* Precalculate HARD_REGNO_NREGS. */ | |
3744 | for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r) | |
3745 | for (m = 0; m < NUM_MACHINE_MODES; ++m) | |
3746 | rs6000_hard_regno_nregs[m][r] | |
3747 | = rs6000_hard_regno_nregs_internal (r, (machine_mode)m); | |
3748 | ||
b395382f | 3749 | /* Precalculate TARGET_HARD_REGNO_MODE_OK. */ |
01e91138 | 3750 | for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r) |
3751 | for (m = 0; m < NUM_MACHINE_MODES; ++m) | |
b395382f | 3752 | if (rs6000_hard_regno_mode_ok_uncached (r, (machine_mode)m)) |
01e91138 | 3753 | rs6000_hard_regno_mode_ok_p[m][r] = true; |
3754 | ||
3755 | /* Precalculate CLASS_MAX_NREGS sizes. */ | |
3756 | for (c = 0; c < LIM_REG_CLASSES; ++c) | |
3757 | { | |
3758 | int reg_size; | |
3759 | ||
3760 | if (TARGET_VSX && VSX_REG_CLASS_P (c)) | |
3761 | reg_size = UNITS_PER_VSX_WORD; | |
3762 | ||
3763 | else if (c == ALTIVEC_REGS) | |
3764 | reg_size = UNITS_PER_ALTIVEC_WORD; | |
3765 | ||
3766 | else if (c == FLOAT_REGS) | |
3767 | reg_size = UNITS_PER_FP_WORD; | |
3768 | ||
3769 | else | |
3770 | reg_size = UNITS_PER_WORD; | |
3771 | ||
3772 | for (m = 0; m < NUM_MACHINE_MODES; ++m) | |
3773 | { | |
3774 | machine_mode m2 = (machine_mode)m; | |
3775 | int reg_size2 = reg_size; | |
3776 | ||
3777 | /* TDmode & IBM 128-bit floating point always takes 2 registers, even | |
3778 | in VSX. */ | |
3779 | if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m)) | |
3780 | reg_size2 = UNITS_PER_FP_WORD; | |
3781 | ||
3782 | rs6000_class_max_nregs[m][c] | |
3783 | = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2; | |
3784 | } | |
3785 | } | |
3786 | ||
3787 | if (TARGET_E500_DOUBLE) | |
3788 | rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1; | |
3789 | ||
3790 | /* Calculate which modes to automatically generate code to use a the | |
3791 | reciprocal divide and square root instructions. In the future, possibly | |
3792 | automatically generate the instructions even if the user did not specify | |
3793 | -mrecip. The older machines double precision reciprocal sqrt estimate is | |
3794 | not accurate enough. */ | |
3795 | memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits)); | |
3796 | if (TARGET_FRES) | |
3797 | rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE; | |
3798 | if (TARGET_FRE) | |
3799 | rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE; | |
3800 | if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)) | |
3801 | rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE; | |
3802 | if (VECTOR_UNIT_VSX_P (V2DFmode)) | |
3803 | rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE; | |
3804 | ||
3805 | if (TARGET_FRSQRTES) | |
3806 | rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE; | |
3807 | if (TARGET_FRSQRTE) | |
3808 | rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE; | |
3809 | if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)) | |
3810 | rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE; | |
3811 | if (VECTOR_UNIT_VSX_P (V2DFmode)) | |
3812 | rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE; | |
3813 | ||
3814 | if (rs6000_recip_control) | |
3815 | { | |
3816 | if (!flag_finite_math_only) | |
3817 | warning (0, "-mrecip requires -ffinite-math or -ffast-math"); | |
3818 | if (flag_trapping_math) | |
3819 | warning (0, "-mrecip requires -fno-trapping-math or -ffast-math"); | |
3820 | if (!flag_reciprocal_math) | |
3821 | warning (0, "-mrecip requires -freciprocal-math or -ffast-math"); | |
3822 | if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math) | |
3823 | { | |
3824 | if (RS6000_RECIP_HAVE_RE_P (SFmode) | |
3825 | && (rs6000_recip_control & RECIP_SF_DIV) != 0) | |
3826 | rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE; | |
3827 | ||
3828 | if (RS6000_RECIP_HAVE_RE_P (DFmode) | |
3829 | && (rs6000_recip_control & RECIP_DF_DIV) != 0) | |
3830 | rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE; | |
3831 | ||
3832 | if (RS6000_RECIP_HAVE_RE_P (V4SFmode) | |
3833 | && (rs6000_recip_control & RECIP_V4SF_DIV) != 0) | |
3834 | rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE; | |
3835 | ||
3836 | if (RS6000_RECIP_HAVE_RE_P (V2DFmode) | |
3837 | && (rs6000_recip_control & RECIP_V2DF_DIV) != 0) | |
3838 | rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE; | |
3839 | ||
3840 | if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode) | |
3841 | && (rs6000_recip_control & RECIP_SF_RSQRT) != 0) | |
3842 | rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE; | |
3843 | ||
3844 | if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode) | |
3845 | && (rs6000_recip_control & RECIP_DF_RSQRT) != 0) | |
3846 | rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE; | |
3847 | ||
3848 | if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode) | |
3849 | && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0) | |
3850 | rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE; | |
3851 | ||
3852 | if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode) | |
3853 | && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0) | |
3854 | rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE; | |
3855 | } | |
3856 | } | |
3857 | ||
3858 | /* Update the addr mask bits in reg_addr to help secondary reload and go if | |
3859 | legitimate address support to figure out the appropriate addressing to | |
3860 | use. */ | |
3861 | rs6000_setup_reg_addr_masks (); | |
3862 | ||
3863 | if (global_init_p || TARGET_DEBUG_TARGET) | |
3864 | { | |
3865 | if (TARGET_DEBUG_REG) | |
3866 | rs6000_debug_reg_global (); | |
3867 | ||
3868 | if (TARGET_DEBUG_COST || TARGET_DEBUG_REG) | |
3869 | fprintf (stderr, | |
3870 | "SImode variable mult cost = %d\n" | |
3871 | "SImode constant mult cost = %d\n" | |
3872 | "SImode short constant mult cost = %d\n" | |
3873 | "DImode multipliciation cost = %d\n" | |
3874 | "SImode division cost = %d\n" | |
3875 | "DImode division cost = %d\n" | |
3876 | "Simple fp operation cost = %d\n" | |
3877 | "DFmode multiplication cost = %d\n" | |
3878 | "SFmode division cost = %d\n" | |
3879 | "DFmode division cost = %d\n" | |
3880 | "cache line size = %d\n" | |
3881 | "l1 cache size = %d\n" | |
3882 | "l2 cache size = %d\n" | |
3883 | "simultaneous prefetches = %d\n" | |
3884 | "\n", | |
3885 | rs6000_cost->mulsi, | |
3886 | rs6000_cost->mulsi_const, | |
3887 | rs6000_cost->mulsi_const9, | |
3888 | rs6000_cost->muldi, | |
3889 | rs6000_cost->divsi, | |
3890 | rs6000_cost->divdi, | |
3891 | rs6000_cost->fp, | |
3892 | rs6000_cost->dmul, | |
3893 | rs6000_cost->sdiv, | |
3894 | rs6000_cost->ddiv, | |
3895 | rs6000_cost->cache_line_size, | |
3896 | rs6000_cost->l1_cache_size, | |
3897 | rs6000_cost->l2_cache_size, | |
3898 | rs6000_cost->simultaneous_prefetches); | |
3899 | } | |
3900 | } | |
3901 | ||
3902 | #if TARGET_MACHO | |
3903 | /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */ | |
3904 | ||
3905 | static void | |
3906 | darwin_rs6000_override_options (void) | |
3907 | { | |
3908 | /* The Darwin ABI always includes AltiVec, can't be (validly) turned | |
3909 | off. */ | |
3910 | rs6000_altivec_abi = 1; | |
3911 | TARGET_ALTIVEC_VRSAVE = 1; | |
3912 | rs6000_current_abi = ABI_DARWIN; | |
3913 | ||
3914 | if (DEFAULT_ABI == ABI_DARWIN | |
3915 | && TARGET_64BIT) | |
3916 | darwin_one_byte_bool = 1; | |
3917 | ||
3918 | if (TARGET_64BIT && ! TARGET_POWERPC64) | |
3919 | { | |
3920 | rs6000_isa_flags |= OPTION_MASK_POWERPC64; | |
3921 | warning (0, "-m64 requires PowerPC64 architecture, enabling"); | |
3922 | } | |
3923 | if (flag_mkernel) | |
3924 | { | |
3925 | rs6000_default_long_calls = 1; | |
3926 | rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT; | |
3927 | } | |
3928 | ||
3929 | /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes | |
3930 | Altivec. */ | |
3931 | if (!flag_mkernel && !flag_apple_kext | |
3932 | && TARGET_64BIT | |
3933 | && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)) | |
3934 | rs6000_isa_flags |= OPTION_MASK_ALTIVEC; | |
3935 | ||
3936 | /* Unless the user (not the configurer) has explicitly overridden | |
3937 | it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to | |
3938 | G4 unless targeting the kernel. */ | |
3939 | if (!flag_mkernel | |
3940 | && !flag_apple_kext | |
3941 | && strverscmp (darwin_macosx_version_min, "10.5") >= 0 | |
3942 | && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC) | |
3943 | && ! global_options_set.x_rs6000_cpu_index) | |
3944 | { | |
3945 | rs6000_isa_flags |= OPTION_MASK_ALTIVEC; | |
3946 | } | |
3947 | } | |
3948 | #endif | |
3949 | ||
3950 | /* If not otherwise specified by a target, make 'long double' equivalent to | |
3951 | 'double'. */ | |
3952 | ||
3953 | #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE | |
3954 | #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64 | |
3955 | #endif | |
3956 | ||
3957 | /* Return the builtin mask of the various options used that could affect which | |
3958 | builtins were used. In the past we used target_flags, but we've run out of | |
3959 | bits, and some options like SPE and PAIRED are no longer in | |
3960 | target_flags. */ | |
3961 | ||
3962 | HOST_WIDE_INT | |
3963 | rs6000_builtin_mask_calculate (void) | |
3964 | { | |
3965 | return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0) | |
3966 | | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0) | |
3967 | | ((TARGET_VSX) ? RS6000_BTM_VSX : 0) | |
3968 | | ((TARGET_SPE) ? RS6000_BTM_SPE : 0) | |
3969 | | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0) | |
3970 | | ((TARGET_FRE) ? RS6000_BTM_FRE : 0) | |
3971 | | ((TARGET_FRES) ? RS6000_BTM_FRES : 0) | |
3972 | | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0) | |
3973 | | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0) | |
3974 | | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0) | |
3975 | | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0) | |
3976 | | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0) | |
3977 | | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0) | |
3978 | | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0) | |
3979 | | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0) | |
3980 | | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0) | |
3981 | | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0) | |
3982 | | ((TARGET_HTM) ? RS6000_BTM_HTM : 0) | |
3983 | | ((TARGET_DFP) ? RS6000_BTM_DFP : 0) | |
3984 | | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0) | |
3985 | | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0) | |
3986 | | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)); | |
3987 | } | |
3988 | ||
3989 | /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered | |
3990 | to clobber the XER[CA] bit because clobbering that bit without telling | |
3991 | the compiler worked just fine with versions of GCC before GCC 5, and | |
3992 | breaking a lot of older code in ways that are hard to track down is | |
3993 | not such a great idea. */ | |
3994 | ||
3995 | static rtx_insn * | |
3996 | rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/, | |
3997 | vec<const char *> &/*constraints*/, | |
3998 | vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs) | |
3999 | { | |
4000 | clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO)); | |
4001 | SET_HARD_REG_BIT (clobbered_regs, CA_REGNO); | |
4002 | return NULL; | |
4003 | } | |
4004 | ||
4005 | /* Override command line options. | |
4006 | ||
4007 | Combine build-specific configuration information with options | |
4008 | specified on the command line to set various state variables which | |
4009 | influence code generation, optimization, and expansion of built-in | |
4010 | functions. Assure that command-line configuration preferences are | |
4011 | compatible with each other and with the build configuration; issue | |
4012 | warnings while adjusting configuration or error messages while | |
4013 | rejecting configuration. | |
4014 | ||
4015 | Upon entry to this function: | |
4016 | ||
4017 | This function is called once at the beginning of | |
4018 | compilation, and then again at the start and end of compiling | |
4019 | each section of code that has a different configuration, as | |
4020 | indicated, for example, by adding the | |
4021 | ||
4022 | __attribute__((__target__("cpu=power9"))) | |
4023 | ||
4024 | qualifier to a function definition or, for example, by bracketing | |
4025 | code between | |
4026 | ||
4027 | #pragma GCC target("altivec") | |
4028 | ||
4029 | and | |
4030 | ||
4031 | #pragma GCC reset_options | |
4032 | ||
4033 | directives. Parameter global_init_p is true for the initial | |
4034 | invocation, which initializes global variables, and false for all | |
4035 | subsequent invocations. | |
4036 | ||
4037 | ||
4038 | Various global state information is assumed to be valid. This | |
4039 | includes OPTION_TARGET_CPU_DEFAULT, representing the name of the | |
4040 | default CPU specified at build configure time, TARGET_DEFAULT, | |
4041 | representing the default set of option flags for the default | |
4042 | target, and global_options_set.x_rs6000_isa_flags, representing | |
4043 | which options were requested on the command line. | |
4044 | ||
4045 | Upon return from this function: | |
4046 | ||
4047 | rs6000_isa_flags_explicit has a non-zero bit for each flag that | |
4048 | was set by name on the command line. Additionally, if certain | |
4049 | attributes are automatically enabled or disabled by this function | |
4050 | in order to assure compatibility between options and | |
4051 | configuration, the flags associated with those attributes are | |
4052 | also set. By setting these "explicit bits", we avoid the risk | |
4053 | that other code might accidentally overwrite these particular | |
4054 | attributes with "default values". | |
4055 | ||
4056 | The various bits of rs6000_isa_flags are set to indicate the | |
4057 | target options that have been selected for the most current | |
4058 | compilation efforts. This has the effect of also turning on the | |
4059 | associated TARGET_XXX values since these are macros which are | |
4060 | generally defined to test the corresponding bit of the | |
4061 | rs6000_isa_flags variable. | |
4062 | ||
4063 | The variable rs6000_builtin_mask is set to represent the target | |
4064 | options for the most current compilation efforts, consistent with | |
4065 | the current contents of rs6000_isa_flags. This variable controls | |
4066 | expansion of built-in functions. | |
4067 | ||
4068 | Various other global variables and fields of global structures | |
4069 | (over 50 in all) are initialized to reflect the desired options | |
4070 | for the most current compilation efforts. */ | |
4071 | ||
4072 | static bool | |
4073 | rs6000_option_override_internal (bool global_init_p) | |
4074 | { | |
4075 | bool ret = true; | |
4076 | bool have_cpu = false; | |
4077 | ||
4078 | /* The default cpu requested at configure time, if any. */ | |
4079 | const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT; | |
4080 | ||
4081 | HOST_WIDE_INT set_masks; | |
4082 | HOST_WIDE_INT ignore_masks; | |
4083 | int cpu_index; | |
4084 | int tune_index; | |
4085 | struct cl_target_option *main_target_opt | |
4086 | = ((global_init_p || target_option_default_node == NULL) | |
4087 | ? NULL : TREE_TARGET_OPTION (target_option_default_node)); | |
4088 | ||
4089 | /* Print defaults. */ | |
4090 | if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p) | |
4091 | rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT); | |
4092 | ||
4093 | /* Remember the explicit arguments. */ | |
4094 | if (global_init_p) | |
4095 | rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags; | |
4096 | ||
4097 | /* On 64-bit Darwin, power alignment is ABI-incompatible with some C | |
4098 | library functions, so warn about it. The flag may be useful for | |
4099 | performance studies from time to time though, so don't disable it | |
4100 | entirely. */ | |
4101 | if (global_options_set.x_rs6000_alignment_flags | |
4102 | && rs6000_alignment_flags == MASK_ALIGN_POWER | |
4103 | && DEFAULT_ABI == ABI_DARWIN | |
4104 | && TARGET_64BIT) | |
4105 | warning (0, "-malign-power is not supported for 64-bit Darwin;" | |
4106 | " it is incompatible with the installed C and C++ libraries"); | |
4107 | ||
4108 | /* Numerous experiment shows that IRA based loop pressure | |
4109 | calculation works better for RTL loop invariant motion on targets | |
4110 | with enough (>= 32) registers. It is an expensive optimization. | |
4111 | So it is on only for peak performance. */ | |
4112 | if (optimize >= 3 && global_init_p | |
4113 | && !global_options_set.x_flag_ira_loop_pressure) | |
4114 | flag_ira_loop_pressure = 1; | |
4115 | ||
4116 | /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order | |
4117 | for tracebacks to be complete but not if any -fasynchronous-unwind-tables | |
4118 | options were already specified. */ | |
4119 | if (flag_sanitize & SANITIZE_USER_ADDRESS | |
4120 | && !global_options_set.x_flag_asynchronous_unwind_tables) | |
4121 | flag_asynchronous_unwind_tables = 1; | |
4122 | ||
4123 | /* Set the pointer size. */ | |
4124 | if (TARGET_64BIT) | |
4125 | { | |
af8303fa | 4126 | rs6000_pmode = DImode; |
01e91138 | 4127 | rs6000_pointer_size = 64; |
4128 | } | |
4129 | else | |
4130 | { | |
af8303fa | 4131 | rs6000_pmode = SImode; |
01e91138 | 4132 | rs6000_pointer_size = 32; |
4133 | } | |
4134 | ||
4135 | /* Some OSs don't support saving the high part of 64-bit registers on context | |
4136 | switch. Other OSs don't support saving Altivec registers. On those OSs, | |
4137 | we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings; | |
4138 | if the user wants either, the user must explicitly specify them and we | |
4139 | won't interfere with the user's specification. */ | |
4140 | ||
4141 | set_masks = POWERPC_MASKS; | |
4142 | #ifdef OS_MISSING_POWERPC64 | |
4143 | if (OS_MISSING_POWERPC64) | |
4144 | set_masks &= ~OPTION_MASK_POWERPC64; | |
4145 | #endif | |
4146 | #ifdef OS_MISSING_ALTIVEC | |
4147 | if (OS_MISSING_ALTIVEC) | |
4148 | set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX | |
4149 | | OTHER_VSX_VECTOR_MASKS); | |
4150 | #endif | |
4151 | ||
4152 | /* Don't override by the processor default if given explicitly. */ | |
4153 | set_masks &= ~rs6000_isa_flags_explicit; | |
4154 | ||
4155 | /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed | |
4156 | the cpu in a target attribute or pragma, but did not specify a tuning | |
4157 | option, use the cpu for the tuning option rather than the option specified | |
4158 | with -mtune on the command line. Process a '--with-cpu' configuration | |
4159 | request as an implicit --cpu. */ | |
4160 | if (rs6000_cpu_index >= 0) | |
4161 | { | |
4162 | cpu_index = rs6000_cpu_index; | |
4163 | have_cpu = true; | |
4164 | } | |
4165 | else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0) | |
4166 | { | |
4167 | rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index; | |
4168 | have_cpu = true; | |
4169 | } | |
4170 | else if (implicit_cpu) | |
4171 | { | |
4172 | rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu); | |
4173 | have_cpu = true; | |
4174 | } | |
4175 | else | |
4176 | { | |
4177 | /* PowerPC 64-bit LE requires at least ISA 2.07. */ | |
4178 | const char *default_cpu = ((!TARGET_POWERPC64) | |
4179 | ? "powerpc" | |
4180 | : ((BYTES_BIG_ENDIAN) | |
4181 | ? "powerpc64" | |
4182 | : "powerpc64le")); | |
4183 | ||
4184 | rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu); | |
4185 | have_cpu = false; | |
4186 | } | |
4187 | ||
4188 | gcc_assert (cpu_index >= 0); | |
4189 | ||
4190 | if (have_cpu) | |
4191 | { | |
4192 | #ifndef HAVE_AS_POWER9 | |
4193 | if (processor_target_table[rs6000_cpu_index].processor | |
4194 | == PROCESSOR_POWER9) | |
4195 | { | |
4196 | have_cpu = false; | |
4197 | warning (0, "will not generate power9 instructions because " | |
4198 | "assembler lacks power9 support"); | |
4199 | } | |
4200 | #endif | |
4201 | #ifndef HAVE_AS_POWER8 | |
4202 | if (processor_target_table[rs6000_cpu_index].processor | |
4203 | == PROCESSOR_POWER8) | |
4204 | { | |
4205 | have_cpu = false; | |
4206 | warning (0, "will not generate power8 instructions because " | |
4207 | "assembler lacks power8 support"); | |
4208 | } | |
4209 | #endif | |
4210 | #ifndef HAVE_AS_POPCNTD | |
4211 | if (processor_target_table[rs6000_cpu_index].processor | |
4212 | == PROCESSOR_POWER7) | |
4213 | { | |
4214 | have_cpu = false; | |
4215 | warning (0, "will not generate power7 instructions because " | |
4216 | "assembler lacks power7 support"); | |
4217 | } | |
4218 | #endif | |
4219 | #ifndef HAVE_AS_DFP | |
4220 | if (processor_target_table[rs6000_cpu_index].processor | |
4221 | == PROCESSOR_POWER6) | |
4222 | { | |
4223 | have_cpu = false; | |
4224 | warning (0, "will not generate power6 instructions because " | |
4225 | "assembler lacks power6 support"); | |
4226 | } | |
4227 | #endif | |
4228 | #ifndef HAVE_AS_POPCNTB | |
4229 | if (processor_target_table[rs6000_cpu_index].processor | |
4230 | == PROCESSOR_POWER5) | |
4231 | { | |
4232 | have_cpu = false; | |
4233 | warning (0, "will not generate power5 instructions because " | |
4234 | "assembler lacks power5 support"); | |
4235 | } | |
4236 | #endif | |
4237 | ||
4238 | if (!have_cpu) | |
4239 | { | |
4240 | /* PowerPC 64-bit LE requires at least ISA 2.07. */ | |
4241 | const char *default_cpu = (!TARGET_POWERPC64 | |
4242 | ? "powerpc" | |
4243 | : (BYTES_BIG_ENDIAN | |
4244 | ? "powerpc64" | |
4245 | : "powerpc64le")); | |
4246 | ||
4247 | rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu); | |
4248 | } | |
4249 | } | |
4250 | ||
4251 | /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the | |
4252 | compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits | |
4253 | with those from the cpu, except for options that were explicitly set. If | |
4254 | we don't have a cpu, do not override the target bits set in | |
4255 | TARGET_DEFAULT. */ | |
4256 | if (have_cpu) | |
4257 | { | |
4258 | rs6000_isa_flags &= ~set_masks; | |
4259 | rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable | |
4260 | & set_masks); | |
4261 | } | |
4262 | else | |
4263 | { | |
4264 | /* If no -mcpu=<xxx>, inherit any default options that were cleared via | |
4265 | POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize | |
4266 | target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched | |
4267 | to using rs6000_isa_flags, we need to do the initialization here. | |
4268 | ||
4269 | If there is a TARGET_DEFAULT, use that. Otherwise fall back to using | |
4270 | -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */ | |
4271 | HOST_WIDE_INT flags = ((TARGET_DEFAULT) ? TARGET_DEFAULT | |
4272 | : processor_target_table[cpu_index].target_enable); | |
4273 | rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit); | |
4274 | } | |
4275 | ||
4276 | if (rs6000_tune_index >= 0) | |
4277 | tune_index = rs6000_tune_index; | |
4278 | else if (have_cpu) | |
4279 | rs6000_tune_index = tune_index = cpu_index; | |
4280 | else | |
4281 | { | |
4282 | size_t i; | |
4283 | enum processor_type tune_proc | |
4284 | = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT); | |
4285 | ||
4286 | tune_index = -1; | |
4287 | for (i = 0; i < ARRAY_SIZE (processor_target_table); i++) | |
4288 | if (processor_target_table[i].processor == tune_proc) | |
4289 | { | |
4290 | rs6000_tune_index = tune_index = i; | |
4291 | break; | |
4292 | } | |
4293 | } | |
4294 | ||
4295 | gcc_assert (tune_index >= 0); | |
4296 | rs6000_cpu = processor_target_table[tune_index].processor; | |
4297 | ||
4298 | /* Pick defaults for SPE related control flags. Do this early to make sure | |
4299 | that the TARGET_ macros are representative ASAP. */ | |
4300 | { | |
4301 | int spe_capable_cpu = | |
4302 | (rs6000_cpu == PROCESSOR_PPC8540 | |
4303 | || rs6000_cpu == PROCESSOR_PPC8548); | |
4304 | ||
4305 | if (!global_options_set.x_rs6000_spe_abi) | |
4306 | rs6000_spe_abi = spe_capable_cpu; | |
4307 | ||
4308 | if (!global_options_set.x_rs6000_spe) | |
4309 | rs6000_spe = spe_capable_cpu; | |
4310 | ||
4311 | if (!global_options_set.x_rs6000_float_gprs) | |
4312 | rs6000_float_gprs = | |
4313 | (rs6000_cpu == PROCESSOR_PPC8540 ? 1 | |
4314 | : rs6000_cpu == PROCESSOR_PPC8548 ? 2 | |
4315 | : 0); | |
4316 | } | |
4317 | ||
4318 | if (global_options_set.x_rs6000_spe_abi | |
4319 | && rs6000_spe_abi | |
4320 | && !TARGET_SPE_ABI) | |
4321 | error ("not configured for SPE ABI"); | |
4322 | ||
4323 | if (global_options_set.x_rs6000_spe | |
4324 | && rs6000_spe | |
4325 | && !TARGET_SPE) | |
4326 | error ("not configured for SPE instruction set"); | |
4327 | ||
4328 | if (main_target_opt != NULL | |
4329 | && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi) | |
4330 | || (main_target_opt->x_rs6000_spe != rs6000_spe) | |
4331 | || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs))) | |
4332 | error ("target attribute or pragma changes SPE ABI"); | |
4333 | ||
4334 | if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3 | |
4335 | || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64 | |
4336 | || rs6000_cpu == PROCESSOR_PPCE5500) | |
4337 | { | |
4338 | if (TARGET_ALTIVEC) | |
4339 | error ("AltiVec not supported in this target"); | |
4340 | if (TARGET_SPE) | |
4341 | error ("SPE not supported in this target"); | |
4342 | } | |
4343 | if (rs6000_cpu == PROCESSOR_PPCE6500) | |
4344 | { | |
4345 | if (TARGET_SPE) | |
4346 | error ("SPE not supported in this target"); | |
4347 | } | |
4348 | ||
4349 | /* Disable Cell microcode if we are optimizing for the Cell | |
4350 | and not optimizing for size. */ | |
4351 | if (rs6000_gen_cell_microcode == -1) | |
4352 | rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL | |
4353 | && !optimize_size); | |
4354 | ||
4355 | /* If we are optimizing big endian systems for space and it's OK to | |
4356 | use instructions that would be microcoded on the Cell, use the | |
4357 | load/store multiple and string instructions. */ | |
4358 | if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode) | |
4359 | rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE | |
4360 | | OPTION_MASK_STRING); | |
4361 | ||
4362 | /* Don't allow -mmultiple or -mstring on little endian systems | |
4363 | unless the cpu is a 750, because the hardware doesn't support the | |
4364 | instructions used in little endian mode, and causes an alignment | |
4365 | trap. The 750 does not cause an alignment trap (except when the | |
4366 | target is unaligned). */ | |
4367 | ||
4368 | if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750) | |
4369 | { | |
4370 | if (TARGET_MULTIPLE) | |
4371 | { | |
4372 | rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE; | |
4373 | if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0) | |
4374 | warning (0, "-mmultiple is not supported on little endian systems"); | |
4375 | } | |
4376 | ||
4377 | if (TARGET_STRING) | |
4378 | { | |
4379 | rs6000_isa_flags &= ~OPTION_MASK_STRING; | |
4380 | if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0) | |
4381 | warning (0, "-mstring is not supported on little endian systems"); | |
4382 | } | |
4383 | } | |
4384 | ||
4385 | /* If little-endian, default to -mstrict-align on older processors. | |
4386 | Testing for htm matches power8 and later. */ | |
4387 | if (!BYTES_BIG_ENDIAN | |
4388 | && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM)) | |
4389 | rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN; | |
4390 | ||
4391 | /* -maltivec={le,be} implies -maltivec. */ | |
4392 | if (rs6000_altivec_element_order != 0) | |
4393 | rs6000_isa_flags |= OPTION_MASK_ALTIVEC; | |
4394 | ||
4395 | /* Disallow -maltivec=le in big endian mode for now. This is not | |
4396 | known to be useful for anyone. */ | |
4397 | if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1) | |
4398 | { | |
4399 | warning (0, N_("-maltivec=le not allowed for big-endian targets")); | |
4400 | rs6000_altivec_element_order = 0; | |
4401 | } | |
4402 | ||
4403 | /* Add some warnings for VSX. */ | |
4404 | if (TARGET_VSX) | |
4405 | { | |
4406 | const char *msg = NULL; | |
4407 | if (!TARGET_HARD_FLOAT || !TARGET_FPRS | |
4408 | || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT) | |
4409 | { | |
4410 | if (rs6000_isa_flags_explicit & OPTION_MASK_VSX) | |
4411 | msg = N_("-mvsx requires hardware floating point"); | |
4412 | else | |
4413 | { | |
4414 | rs6000_isa_flags &= ~ OPTION_MASK_VSX; | |
4415 | rs6000_isa_flags_explicit |= OPTION_MASK_VSX; | |
4416 | } | |
4417 | } | |
4418 | else if (TARGET_PAIRED_FLOAT) | |
4419 | msg = N_("-mvsx and -mpaired are incompatible"); | |
4420 | else if (TARGET_AVOID_XFORM > 0) | |
4421 | msg = N_("-mvsx needs indexed addressing"); | |
4422 | else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit | |
4423 | & OPTION_MASK_ALTIVEC)) | |
4424 | { | |
4425 | if (rs6000_isa_flags_explicit & OPTION_MASK_VSX) | |
4426 | msg = N_("-mvsx and -mno-altivec are incompatible"); | |
4427 | else | |
4428 | msg = N_("-mno-altivec disables vsx"); | |
4429 | } | |
4430 | ||
4431 | if (msg) | |
4432 | { | |
4433 | warning (0, msg); | |
4434 | rs6000_isa_flags &= ~ OPTION_MASK_VSX; | |
4435 | rs6000_isa_flags_explicit |= OPTION_MASK_VSX; | |
4436 | } | |
4437 | } | |
4438 | ||
4439 | /* If hard-float/altivec/vsx were explicitly turned off then don't allow | |
4440 | the -mcpu setting to enable options that conflict. */ | |
4441 | if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX) | |
4442 | && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT | |
4443 | | OPTION_MASK_ALTIVEC | |
4444 | | OPTION_MASK_VSX)) != 0) | |
4445 | rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO | |
4446 | | OPTION_MASK_DIRECT_MOVE) | |
4447 | & ~rs6000_isa_flags_explicit); | |
4448 | ||
4449 | if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) | |
4450 | rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags); | |
4451 | ||
4452 | /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn | |
4453 | off all of the options that depend on those flags. */ | |
4454 | ignore_masks = rs6000_disable_incompatible_switches (); | |
4455 | ||
4456 | /* For the newer switches (vsx, dfp, etc.) set some of the older options, | |
4457 | unless the user explicitly used the -mno-<option> to disable the code. */ | |
4458 | if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_DFORM_SCALAR | |
4459 | || TARGET_P9_DFORM_VECTOR || TARGET_P9_DFORM_BOTH > 0) | |
4460 | rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks); | |
4461 | else if (TARGET_P9_MINMAX) | |
4462 | { | |
4463 | if (have_cpu) | |
4464 | { | |
4465 | if (cpu_index == PROCESSOR_POWER9) | |
4466 | { | |
4467 | /* legacy behavior: allow -mcpu-power9 with certain | |
4468 | capabilities explicitly disabled. */ | |
4469 | rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks); | |
4470 | /* However, reject this automatic fix if certain | |
4471 | capabilities required for TARGET_P9_MINMAX support | |
4472 | have been explicitly disabled. */ | |
4473 | if (((OPTION_MASK_VSX | OPTION_MASK_UPPER_REGS_SF | |
4474 | | OPTION_MASK_UPPER_REGS_DF) & rs6000_isa_flags) | |
4475 | != (OPTION_MASK_VSX | OPTION_MASK_UPPER_REGS_SF | |
4476 | | OPTION_MASK_UPPER_REGS_DF)) | |
4477 | error ("-mpower9-minmax incompatible with explicitly disabled options"); | |
4478 | } | |
4479 | else | |
4480 | error ("Power9 target option is incompatible with -mcpu=<xxx> for " | |
4481 | "<xxx> less than power9"); | |
4482 | } | |
4483 | else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit) | |
4484 | != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags | |
4485 | & rs6000_isa_flags_explicit)) | |
4486 | /* Enforce that none of the ISA_3_0_MASKS_SERVER flags | |
4487 | were explicitly cleared. */ | |
4488 | error ("-mpower9-minmax incompatible with explicitly disabled options"); | |
4489 | else | |
4490 | rs6000_isa_flags |= ISA_3_0_MASKS_SERVER; | |
4491 | } | |
4492 | else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO) | |
4493 | rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks); | |
4494 | else if (TARGET_VSX) | |
4495 | rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks); | |
4496 | else if (TARGET_POPCNTD) | |
4497 | rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks); | |
4498 | else if (TARGET_DFP) | |
4499 | rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks); | |
4500 | else if (TARGET_CMPB) | |
4501 | rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks); | |
4502 | else if (TARGET_FPRND) | |
4503 | rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks); | |
4504 | else if (TARGET_POPCNTB) | |
4505 | rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks); | |
4506 | else if (TARGET_ALTIVEC) | |
4507 | rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks); | |
4508 | ||
4509 | if (TARGET_CRYPTO && !TARGET_ALTIVEC) | |
4510 | { | |
4511 | if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO) | |
4512 | error ("-mcrypto requires -maltivec"); | |
4513 | rs6000_isa_flags &= ~OPTION_MASK_CRYPTO; | |
4514 | } | |
4515 | ||
4516 | if (TARGET_DIRECT_MOVE && !TARGET_VSX) | |
4517 | { | |
4518 | if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE) | |
4519 | error ("-mdirect-move requires -mvsx"); | |
4520 | rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE; | |
4521 | } | |
4522 | ||
4523 | if (TARGET_P8_VECTOR && !TARGET_ALTIVEC) | |
4524 | { | |
4525 | if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) | |
4526 | error ("-mpower8-vector requires -maltivec"); | |
4527 | rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR; | |
4528 | } | |
4529 | ||
4530 | if (TARGET_P8_VECTOR && !TARGET_VSX) | |
4531 | { | |
4532 | if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) | |
4533 | && (rs6000_isa_flags_explicit & OPTION_MASK_VSX)) | |
4534 | error ("-mpower8-vector requires -mvsx"); | |
4535 | else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0) | |
4536 | { | |
4537 | rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR; | |
4538 | if (rs6000_isa_flags_explicit & OPTION_MASK_VSX) | |
4539 | rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR; | |
4540 | } | |
4541 | else | |
4542 | { | |
4543 | /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is | |
4544 | not explicit. */ | |
4545 | rs6000_isa_flags |= OPTION_MASK_VSX; | |
4546 | rs6000_isa_flags_explicit |= OPTION_MASK_VSX; | |
4547 | } | |
4548 | } | |
4549 | ||
4550 | if (TARGET_VSX_TIMODE && !TARGET_VSX) | |
4551 | { | |
4552 | if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) | |
4553 | error ("-mvsx-timode requires -mvsx"); | |
4554 | rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE; | |
4555 | } | |
4556 | ||
4557 | if (TARGET_DFP && !TARGET_HARD_FLOAT) | |
4558 | { | |
4559 | if (rs6000_isa_flags_explicit & OPTION_MASK_DFP) | |
4560 | error ("-mhard-dfp requires -mhard-float"); | |
4561 | rs6000_isa_flags &= ~OPTION_MASK_DFP; | |
4562 | } | |
4563 | ||
4564 | /* Allow an explicit -mupper-regs to set -mupper-regs-df, -mupper-regs-di, | |
4565 | and -mupper-regs-sf, depending on the cpu, unless the user explicitly also | |
4566 | set the individual option. */ | |
4567 | if (TARGET_UPPER_REGS > 0) | |
4568 | { | |
4569 | if (TARGET_VSX | |
4570 | && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)) | |
4571 | { | |
4572 | rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF; | |
4573 | rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF; | |
4574 | } | |
4575 | if (TARGET_VSX | |
4576 | && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI)) | |
4577 | { | |
4578 | rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DI; | |
4579 | rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI; | |
4580 | } | |
4581 | if (TARGET_P8_VECTOR | |
4582 | && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)) | |
4583 | { | |
4584 | rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF; | |
4585 | rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF; | |
4586 | } | |
4587 | } | |
4588 | else if (TARGET_UPPER_REGS == 0) | |
4589 | { | |
4590 | if (TARGET_VSX | |
4591 | && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)) | |
4592 | { | |
4593 | rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF; | |
4594 | rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF; | |
4595 | } | |
4596 | if (TARGET_VSX | |
4597 | && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI)) | |
4598 | { | |
4599 | rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DI; | |
4600 | rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI; | |
4601 | } | |
4602 | if (TARGET_P8_VECTOR | |
4603 | && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)) | |
4604 | { | |
4605 | rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF; | |
4606 | rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF; | |
4607 | } | |
4608 | } | |
4609 | ||
4610 | if (TARGET_UPPER_REGS_DF && !TARGET_VSX) | |
4611 | { | |
4612 | if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF) | |
4613 | error ("-mupper-regs-df requires -mvsx"); | |
4614 | rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF; | |
4615 | } | |
4616 | ||
4617 | if (TARGET_UPPER_REGS_DI && !TARGET_VSX) | |
4618 | { | |
4619 | if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI) | |
4620 | error ("-mupper-regs-di requires -mvsx"); | |
4621 | rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DI; | |
4622 | } | |
4623 | ||
4624 | if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR) | |
4625 | { | |
4626 | if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF) | |
4627 | error ("-mupper-regs-sf requires -mpower8-vector"); | |
4628 | rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF; | |
4629 | } | |
4630 | ||
4631 | /* The quad memory instructions only works in 64-bit mode. In 32-bit mode, | |
4632 | silently turn off quad memory mode. */ | |
4633 | if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64) | |
4634 | { | |
4635 | if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0) | |
4636 | warning (0, N_("-mquad-memory requires 64-bit mode")); | |
4637 | ||
4638 | if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0) | |
4639 | warning (0, N_("-mquad-memory-atomic requires 64-bit mode")); | |
4640 | ||
4641 | rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY | |
4642 | | OPTION_MASK_QUAD_MEMORY_ATOMIC); | |
4643 | } | |
4644 | ||
4645 | /* Non-atomic quad memory load/store are disabled for little endian, since | |
4646 | the words are reversed, but atomic operations can still be done by | |
4647 | swapping the words. */ | |
4648 | if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN) | |
4649 | { | |
4650 | if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0) | |
4651 | warning (0, N_("-mquad-memory is not available in little endian mode")); | |
4652 | ||
4653 | rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY; | |
4654 | } | |
4655 | ||
4656 | /* Assume if the user asked for normal quad memory instructions, they want | |
4657 | the atomic versions as well, unless they explicity told us not to use quad | |
4658 | word atomic instructions. */ | |
4659 | if (TARGET_QUAD_MEMORY | |
4660 | && !TARGET_QUAD_MEMORY_ATOMIC | |
4661 | && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0)) | |
4662 | rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC; | |
4663 | ||
4664 | /* Enable power8 fusion if we are tuning for power8, even if we aren't | |
4665 | generating power8 instructions. */ | |
4666 | if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)) | |
4667 | rs6000_isa_flags |= (processor_target_table[tune_index].target_enable | |
4668 | & OPTION_MASK_P8_FUSION); | |
4669 | ||
4670 | /* Setting additional fusion flags turns on base fusion. */ | |
4671 | if (!TARGET_P8_FUSION && (TARGET_P8_FUSION_SIGN || TARGET_TOC_FUSION)) | |
4672 | { | |
4673 | if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION) | |
4674 | { | |
4675 | if (TARGET_P8_FUSION_SIGN) | |
4676 | error ("-mpower8-fusion-sign requires -mpower8-fusion"); | |
4677 | ||
4678 | if (TARGET_TOC_FUSION) | |
4679 | error ("-mtoc-fusion requires -mpower8-fusion"); | |
4680 | ||
4681 | rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION; | |
4682 | } | |
4683 | else | |
4684 | rs6000_isa_flags |= OPTION_MASK_P8_FUSION; | |
4685 | } | |
4686 | ||
4687 | /* Power9 fusion is a superset over power8 fusion. */ | |
4688 | if (TARGET_P9_FUSION && !TARGET_P8_FUSION) | |
4689 | { | |
4690 | if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION) | |
4691 | { | |
4692 | /* We prefer to not mention undocumented options in | |
4693 | error messages. However, if users have managed to select | |
4694 | power9-fusion without selecting power8-fusion, they | |
4695 | already know about undocumented flags. */ | |
4696 | error ("-mpower9-fusion requires -mpower8-fusion"); | |
4697 | rs6000_isa_flags &= ~OPTION_MASK_P9_FUSION; | |
4698 | } | |
4699 | else | |
4700 | rs6000_isa_flags |= OPTION_MASK_P8_FUSION; | |
4701 | } | |
4702 | ||
4703 | /* Enable power9 fusion if we are tuning for power9, even if we aren't | |
4704 | generating power9 instructions. */ | |
4705 | if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_FUSION)) | |
4706 | rs6000_isa_flags |= (processor_target_table[tune_index].target_enable | |
4707 | & OPTION_MASK_P9_FUSION); | |
4708 | ||
4709 | /* Power8 does not fuse sign extended loads with the addis. If we are | |
4710 | optimizing at high levels for speed, convert a sign extended load into a | |
4711 | zero extending load, and an explicit sign extension. */ | |
4712 | if (TARGET_P8_FUSION | |
4713 | && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN) | |
4714 | && optimize_function_for_speed_p (cfun) | |
4715 | && optimize >= 3) | |
4716 | rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN; | |
4717 | ||
4718 | /* TOC fusion requires 64-bit and medium/large code model. */ | |
4719 | if (TARGET_TOC_FUSION && !TARGET_POWERPC64) | |
4720 | { | |
4721 | rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION; | |
4722 | if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0) | |
4723 | warning (0, N_("-mtoc-fusion requires 64-bit")); | |
4724 | } | |
4725 | ||
4726 | if (TARGET_TOC_FUSION && (TARGET_CMODEL == CMODEL_SMALL)) | |
4727 | { | |
4728 | rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION; | |
4729 | if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0) | |
4730 | warning (0, N_("-mtoc-fusion requires medium/large code model")); | |
4731 | } | |
4732 | ||
4733 | /* Turn on -mtoc-fusion by default if p8-fusion and 64-bit medium/large code | |
4734 | model. */ | |
4735 | if (TARGET_P8_FUSION && !TARGET_TOC_FUSION && TARGET_POWERPC64 | |
4736 | && (TARGET_CMODEL != CMODEL_SMALL) | |
4737 | && !(rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION)) | |
4738 | rs6000_isa_flags |= OPTION_MASK_TOC_FUSION; | |
4739 | ||
4740 | /* ISA 3.0 vector instructions include ISA 2.07. */ | |
4741 | if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR) | |
4742 | { | |
4743 | /* We prefer to not mention undocumented options in | |
4744 | error messages. However, if users have managed to select | |
4745 | power9-vector without selecting power8-vector, they | |
4746 | already know about undocumented flags. */ | |
4747 | if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) && | |
4748 | (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)) | |
4749 | error ("-mpower9-vector requires -mpower8-vector"); | |
4750 | else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0) | |
4751 | { | |
4752 | rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR; | |
4753 | if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) | |
4754 | rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR; | |
4755 | } | |
4756 | else | |
4757 | { | |
4758 | /* OPTION_MASK_P9_VECTOR is explicit and | |
4759 | OPTION_MASK_P8_VECTOR is not explicit. */ | |
4760 | rs6000_isa_flags |= OPTION_MASK_P8_VECTOR; | |
4761 | rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR; | |
4762 | } | |
4763 | } | |
4764 | ||
4765 | /* -mpower9-dform turns on both -mpower9-dform-scalar and | |
4766 | -mpower9-dform-vector. */ | |
4767 | if (TARGET_P9_DFORM_BOTH > 0) | |
4768 | { | |
4769 | if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR)) | |
4770 | rs6000_isa_flags |= OPTION_MASK_P9_DFORM_VECTOR; | |
4771 | ||
4772 | if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR)) | |
4773 | rs6000_isa_flags |= OPTION_MASK_P9_DFORM_SCALAR; | |
4774 | } | |
4775 | else if (TARGET_P9_DFORM_BOTH == 0) | |
4776 | { | |
4777 | if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR)) | |
4778 | rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_VECTOR; | |
4779 | ||
4780 | if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR)) | |
4781 | rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR; | |
4782 | } | |
4783 | ||
4784 | /* ISA 3.0 D-form instructions require p9-vector and upper-regs. */ | |
4785 | if ((TARGET_P9_DFORM_SCALAR || TARGET_P9_DFORM_VECTOR) && !TARGET_P9_VECTOR) | |
4786 | { | |
4787 | /* We prefer to not mention undocumented options in | |
4788 | error messages. However, if users have managed to select | |
4789 | power9-dform without selecting power9-vector, they | |
4790 | already know about undocumented flags. */ | |
4791 | if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) | |
4792 | && (rs6000_isa_flags_explicit & (OPTION_MASK_P9_DFORM_SCALAR | |
4793 | | OPTION_MASK_P9_DFORM_VECTOR))) | |
4794 | error ("-mpower9-dform requires -mpower9-vector"); | |
4795 | else if (rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) | |
4796 | { | |
4797 | rs6000_isa_flags &= | |
4798 | ~(OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR); | |
4799 | rs6000_isa_flags_explicit |= | |
4800 | (OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR); | |
4801 | } | |
4802 | else | |
4803 | { | |
4804 | /* We know that OPTION_MASK_P9_VECTOR is not explicit and | |
4805 | OPTION_MASK_P9_DFORM_SCALAR or OPTION_MASK_P9_DORM_VECTOR | |
4806 | may be explicit. */ | |
4807 | rs6000_isa_flags |= OPTION_MASK_P9_VECTOR; | |
4808 | rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR; | |
4809 | } | |
4810 | } | |
4811 | ||
4812 | if ((TARGET_P9_DFORM_SCALAR || TARGET_P9_DFORM_VECTOR) | |
4813 | && !TARGET_DIRECT_MOVE) | |
4814 | { | |
4815 | /* We prefer to not mention undocumented options in | |
4816 | error messages. However, if users have managed to select | |
4817 | power9-dform without selecting direct-move, they | |
4818 | already know about undocumented flags. */ | |
4819 | if ((rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE) | |
4820 | && ((rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR) || | |
4821 | (rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR) || | |
4822 | (TARGET_P9_DFORM_BOTH == 1))) | |
4823 | error ("-mpower9-dform, -mpower9-dform-vector, -mpower9-dform-scalar" | |
4824 | " require -mdirect-move"); | |
4825 | else if ((rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE) == 0) | |
4826 | { | |
4827 | rs6000_isa_flags |= OPTION_MASK_DIRECT_MOVE; | |
4828 | rs6000_isa_flags_explicit |= OPTION_MASK_DIRECT_MOVE; | |
4829 | } | |
4830 | else | |
4831 | { | |
4832 | rs6000_isa_flags &= | |
4833 | ~(OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR); | |
4834 | rs6000_isa_flags_explicit |= | |
4835 | (OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR); | |
4836 | } | |
4837 | } | |
4838 | ||
4839 | if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_DF) | |
4840 | { | |
4841 | /* We prefer to not mention undocumented options in | |
4842 | error messages. However, if users have managed to select | |
4843 | power9-dform without selecting upper-regs-df, they | |
4844 | already know about undocumented flags. */ | |
4845 | if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF) | |
4846 | error ("-mpower9-dform requires -mupper-regs-df"); | |
4847 | rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR; | |
4848 | } | |
4849 | ||
4850 | if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_SF) | |
4851 | { | |
4852 | if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF) | |
4853 | error ("-mpower9-dform requires -mupper-regs-sf"); | |
4854 | rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR; | |
4855 | } | |
4856 | ||
4857 | /* Enable LRA by default. */ | |
4858 | if ((rs6000_isa_flags_explicit & OPTION_MASK_LRA) == 0) | |
4859 | rs6000_isa_flags |= OPTION_MASK_LRA; | |
4860 | ||
4861 | /* There have been bugs with -mvsx-timode that don't show up with -mlra, | |
4862 | but do show up with -mno-lra. Given -mlra will become the default once | |
4863 | PR 69847 is fixed, turn off the options with problems by default if | |
4864 | -mno-lra was used, and warn if the user explicitly asked for the option. | |
4865 | ||
4866 | Enable -mpower9-dform-vector by default if LRA and other power9 options. | |
4867 | Enable -mvsx-timode by default if LRA and VSX. */ | |
4868 | if (!TARGET_LRA) | |
4869 | { | |
4870 | if (TARGET_VSX_TIMODE) | |
4871 | { | |
4872 | if ((rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) != 0) | |
4873 | warning (0, "-mvsx-timode might need -mlra"); | |
4874 | ||
4875 | else | |
4876 | rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE; | |
4877 | } | |
4878 | } | |
4879 | ||
4880 | else | |
4881 | { | |
4882 | if (TARGET_VSX && !TARGET_VSX_TIMODE | |
4883 | && (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) == 0) | |
4884 | rs6000_isa_flags |= OPTION_MASK_VSX_TIMODE; | |
4885 | } | |
4886 | ||
4887 | /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07 | |
4888 | support. If we only have ISA 2.06 support, and the user did not specify | |
4889 | the switch, leave it set to -1 so the movmisalign patterns are enabled, | |
4890 | but we don't enable the full vectorization support */ | |
4891 | if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE) | |
4892 | TARGET_ALLOW_MOVMISALIGN = 1; | |
4893 | ||
4894 | else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX) | |
4895 | { | |
4896 | if (TARGET_ALLOW_MOVMISALIGN > 0 | |
4897 | && global_options_set.x_TARGET_ALLOW_MOVMISALIGN) | |
4898 | error ("-mallow-movmisalign requires -mvsx"); | |
4899 | ||
4900 | TARGET_ALLOW_MOVMISALIGN = 0; | |
4901 | } | |
4902 | ||
4903 | /* Determine when unaligned vector accesses are permitted, and when | |
4904 | they are preferred over masked Altivec loads. Note that if | |
4905 | TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then | |
4906 | TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is | |
4907 | not true. */ | |
4908 | if (TARGET_EFFICIENT_UNALIGNED_VSX) | |
4909 | { | |
4910 | if (!TARGET_VSX) | |
4911 | { | |
4912 | if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX) | |
4913 | error ("-mefficient-unaligned-vsx requires -mvsx"); | |
4914 | ||
4915 | rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX; | |
4916 | } | |
4917 | ||
4918 | else if (!TARGET_ALLOW_MOVMISALIGN) | |
4919 | { | |
4920 | if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX) | |
4921 | error ("-mefficient-unaligned-vsx requires -mallow-movmisalign"); | |
4922 | ||
4923 | rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX; | |
4924 | } | |
4925 | } | |
4926 | ||
4927 | /* Check whether we should allow small integers into VSX registers. We | |
4928 | require direct move to prevent the register allocator from having to move | |
4929 | variables through memory to do moves. SImode can be used on ISA 2.07, | |
4930 | while HImode and QImode require ISA 3.0. */ | |
4931 | if (TARGET_VSX_SMALL_INTEGER | |
4932 | && (!TARGET_DIRECT_MOVE || !TARGET_P8_VECTOR || !TARGET_UPPER_REGS_DI)) | |
4933 | { | |
4934 | if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_SMALL_INTEGER) | |
4935 | error ("-mvsx-small-integer requires -mpower8-vector, " | |
4936 | "-mupper-regs-di, and -mdirect-move"); | |
4937 | ||
4938 | rs6000_isa_flags &= ~OPTION_MASK_VSX_SMALL_INTEGER; | |
4939 | } | |
4940 | ||
4941 | /* Set long double size before the IEEE 128-bit tests. */ | |
4942 | if (!global_options_set.x_rs6000_long_double_type_size) | |
4943 | { | |
4944 | if (main_target_opt != NULL | |
4945 | && (main_target_opt->x_rs6000_long_double_type_size | |
4946 | != RS6000_DEFAULT_LONG_DOUBLE_SIZE)) | |
4947 | error ("target attribute or pragma changes long double size"); | |
4948 | else | |
4949 | rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE; | |
4950 | } | |
4951 | ||
4952 | /* Set -mabi=ieeelongdouble on some old targets. Note, AIX and Darwin | |
4953 | explicitly redefine TARGET_IEEEQUAD to 0, so those systems will not | |
4954 | pick up this default. */ | |
4955 | #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD) | |
4956 | if (!global_options_set.x_rs6000_ieeequad) | |
4957 | rs6000_ieeequad = 1; | |
4958 | #endif | |
4959 | ||
4960 | /* Enable the default support for IEEE 128-bit floating point on Linux VSX | |
4961 | sytems, but don't enable the __float128 keyword. */ | |
4962 | if (TARGET_VSX && TARGET_LONG_DOUBLE_128 | |
4963 | && (TARGET_FLOAT128_ENABLE_TYPE || TARGET_IEEEQUAD) | |
4964 | && ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_TYPE) == 0)) | |
4965 | rs6000_isa_flags |= OPTION_MASK_FLOAT128_TYPE; | |
4966 | ||
4967 | /* IEEE 128-bit floating point requires VSX support. */ | |
4968 | if (!TARGET_VSX) | |
4969 | { | |
4970 | if (TARGET_FLOAT128_KEYWORD) | |
4971 | { | |
4972 | if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0) | |
4973 | error ("-mfloat128 requires VSX support"); | |
4974 | ||
4975 | rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE | |
4976 | | OPTION_MASK_FLOAT128_KEYWORD | |
4977 | | OPTION_MASK_FLOAT128_HW); | |
4978 | } | |
4979 | ||
4980 | else if (TARGET_FLOAT128_TYPE) | |
4981 | { | |
4982 | if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_TYPE) != 0) | |
4983 | error ("-mfloat128-type requires VSX support"); | |
4984 | ||
4985 | rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE | |
4986 | | OPTION_MASK_FLOAT128_KEYWORD | |
4987 | | OPTION_MASK_FLOAT128_HW); | |
4988 | } | |
4989 | } | |
4990 | ||
4991 | /* -mfloat128 and -mfloat128-hardware internally require the underlying IEEE | |
4992 | 128-bit floating point support to be enabled. */ | |
4993 | if (!TARGET_FLOAT128_TYPE) | |
4994 | { | |
4995 | if (TARGET_FLOAT128_KEYWORD) | |
4996 | { | |
4997 | if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0) | |
4998 | { | |
4999 | error ("-mfloat128 requires -mfloat128-type"); | |
5000 | rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE | |
5001 | | OPTION_MASK_FLOAT128_KEYWORD | |
5002 | | OPTION_MASK_FLOAT128_HW); | |
5003 | } | |
5004 | else | |
5005 | rs6000_isa_flags |= OPTION_MASK_FLOAT128_TYPE; | |
5006 | } | |
5007 | ||
5008 | if (TARGET_FLOAT128_HW) | |
5009 | { | |
5010 | if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0) | |
5011 | { | |
5012 | error ("-mfloat128-hardware requires -mfloat128-type"); | |
5013 | rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW; | |
5014 | } | |
5015 | else | |
5016 | rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE | |
5017 | | OPTION_MASK_FLOAT128_KEYWORD | |
5018 | | OPTION_MASK_FLOAT128_HW); | |
5019 | } | |
5020 | } | |
5021 | ||
5022 | /* If we have -mfloat128-type and full ISA 3.0 support, enable | |
5023 | -mfloat128-hardware by default. However, don't enable the __float128 | |
5024 | keyword. If the user explicitly turned on -mfloat128-hardware, enable the | |
5025 | -mfloat128 option as well if it was not already set. */ | |
5026 | if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW | |
5027 | && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE | |
5028 | && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW)) | |
5029 | rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW; | |
5030 | ||
5031 | if (TARGET_FLOAT128_HW | |
5032 | && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE) | |
5033 | { | |
5034 | if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0) | |
5035 | error ("-mfloat128-hardware requires full ISA 3.0 support"); | |
5036 | ||
5037 | rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW; | |
5038 | } | |
5039 | ||
5040 | if (TARGET_FLOAT128_HW && !TARGET_64BIT) | |
5041 | { | |
5042 | if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0) | |
5043 | error ("-mfloat128-hardware requires -m64"); | |
5044 | ||
5045 | rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW; | |
5046 | } | |
5047 | ||
5048 | if (TARGET_FLOAT128_HW && !TARGET_FLOAT128_KEYWORD | |
5049 | && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0 | |
5050 | && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0) | |
5051 | rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD; | |
5052 | ||
5053 | /* Print the options after updating the defaults. */ | |
5054 | if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) | |
5055 | rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags); | |
5056 | ||
5057 | /* E500mc does "better" if we inline more aggressively. Respect the | |
5058 | user's opinion, though. */ | |
5059 | if (rs6000_block_move_inline_limit == 0 | |
5060 | && (rs6000_cpu == PROCESSOR_PPCE500MC | |
5061 | || rs6000_cpu == PROCESSOR_PPCE500MC64 | |
5062 | || rs6000_cpu == PROCESSOR_PPCE5500 | |
5063 | || rs6000_cpu == PROCESSOR_PPCE6500)) | |
5064 | rs6000_block_move_inline_limit = 128; | |
5065 | ||
5066 | /* store_one_arg depends on expand_block_move to handle at least the | |
5067 | size of reg_parm_stack_space. */ | |
5068 | if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32)) | |
5069 | rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32); | |
5070 | ||
5071 | if (global_init_p) | |
5072 | { | |
5073 | /* If the appropriate debug option is enabled, replace the target hooks | |
5074 | with debug versions that call the real version and then prints | |
5075 | debugging information. */ | |
5076 | if (TARGET_DEBUG_COST) | |
5077 | { | |
5078 | targetm.rtx_costs = rs6000_debug_rtx_costs; | |
5079 | targetm.address_cost = rs6000_debug_address_cost; | |
5080 | targetm.sched.adjust_cost = rs6000_debug_adjust_cost; | |
5081 | } | |
5082 | ||
5083 | if (TARGET_DEBUG_ADDR) | |
5084 | { | |
5085 | targetm.legitimate_address_p = rs6000_debug_legitimate_address_p; | |
5086 | targetm.legitimize_address = rs6000_debug_legitimize_address; | |
5087 | rs6000_secondary_reload_class_ptr | |
5088 | = rs6000_debug_secondary_reload_class; | |
5089 | rs6000_secondary_memory_needed_ptr | |
5090 | = rs6000_debug_secondary_memory_needed; | |
5091 | rs6000_cannot_change_mode_class_ptr | |
5092 | = rs6000_debug_cannot_change_mode_class; | |
5093 | rs6000_preferred_reload_class_ptr | |
5094 | = rs6000_debug_preferred_reload_class; | |
5095 | rs6000_legitimize_reload_address_ptr | |
5096 | = rs6000_debug_legitimize_reload_address; | |
5097 | rs6000_mode_dependent_address_ptr | |
5098 | = rs6000_debug_mode_dependent_address; | |
5099 | } | |
5100 | ||
5101 | if (rs6000_veclibabi_name) | |
5102 | { | |
5103 | if (strcmp (rs6000_veclibabi_name, "mass") == 0) | |
5104 | rs6000_veclib_handler = rs6000_builtin_vectorized_libmass; | |
5105 | else | |
5106 | { | |
5107 | error ("unknown vectorization library ABI type (%s) for " | |
5108 | "-mveclibabi= switch", rs6000_veclibabi_name); | |
5109 | ret = false; | |
5110 | } | |
5111 | } | |
5112 | } | |
5113 | ||
5114 | /* Disable VSX and Altivec silently if the user switched cpus to power7 in a | |
5115 | target attribute or pragma which automatically enables both options, | |
5116 | unless the altivec ABI was set. This is set by default for 64-bit, but | |
5117 | not for 32-bit. */ | |
5118 | if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi) | |
5119 | rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC | |
5120 | | OPTION_MASK_FLOAT128_TYPE | |
5121 | | OPTION_MASK_FLOAT128_KEYWORD) | |
5122 | & ~rs6000_isa_flags_explicit); | |
5123 | ||
5124 | /* Enable Altivec ABI for AIX -maltivec. */ | |
5125 | if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX)) | |
5126 | { | |
5127 | if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi) | |
5128 | error ("target attribute or pragma changes AltiVec ABI"); | |
5129 | else | |
5130 | rs6000_altivec_abi = 1; | |
5131 | } | |
5132 | ||
5133 | /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For | |
5134 | PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can | |
5135 | be explicitly overridden in either case. */ | |
5136 | if (TARGET_ELF) | |
5137 | { | |
5138 | if (!global_options_set.x_rs6000_altivec_abi | |
5139 | && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX)) | |
5140 | { | |
5141 | if (main_target_opt != NULL && | |
5142 | !main_target_opt->x_rs6000_altivec_abi) | |
5143 | error ("target attribute or pragma changes AltiVec ABI"); | |
5144 | else | |
5145 | rs6000_altivec_abi = 1; | |
5146 | } | |
5147 | } | |
5148 | ||
5149 | /* Set the Darwin64 ABI as default for 64-bit Darwin. | |
5150 | So far, the only darwin64 targets are also MACH-O. */ | |
5151 | if (TARGET_MACHO | |
5152 | && DEFAULT_ABI == ABI_DARWIN | |
5153 | && TARGET_64BIT) | |
5154 | { | |
5155 | if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi) | |
5156 | error ("target attribute or pragma changes darwin64 ABI"); | |
5157 | else | |
5158 | { | |
5159 | rs6000_darwin64_abi = 1; | |
5160 | /* Default to natural alignment, for better performance. */ | |
5161 | rs6000_alignment_flags = MASK_ALIGN_NATURAL; | |
5162 | } | |
5163 | } | |
5164 | ||
5165 | /* Place FP constants in the constant pool instead of TOC | |
5166 | if section anchors enabled. */ | |
5167 | if (flag_section_anchors | |
5168 | && !global_options_set.x_TARGET_NO_FP_IN_TOC) | |
5169 | TARGET_NO_FP_IN_TOC = 1; | |
5170 | ||
5171 | if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) | |
5172 | rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags); | |
5173 | ||
5174 | #ifdef SUBTARGET_OVERRIDE_OPTIONS | |
5175 | SUBTARGET_OVERRIDE_OPTIONS; | |
5176 | #endif | |
5177 | #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS | |
5178 | SUBSUBTARGET_OVERRIDE_OPTIONS; | |
5179 | #endif | |
5180 | #ifdef SUB3TARGET_OVERRIDE_OPTIONS | |
5181 | SUB3TARGET_OVERRIDE_OPTIONS; | |
5182 | #endif | |
5183 | ||
5184 | if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) | |
5185 | rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags); | |
5186 | ||
5187 | /* For the E500 family of cores, reset the single/double FP flags to let us | |
5188 | check that they remain constant across attributes or pragmas. Also, | |
5189 | clear a possible request for string instructions, not supported and which | |
5190 | we might have silently queried above for -Os. | |
5191 | ||
5192 | For other families, clear ISEL in case it was set implicitly. | |
5193 | */ | |
5194 | ||
5195 | switch (rs6000_cpu) | |
5196 | { | |
5197 | case PROCESSOR_PPC8540: | |
5198 | case PROCESSOR_PPC8548: | |
5199 | case PROCESSOR_PPCE500MC: | |
5200 | case PROCESSOR_PPCE500MC64: | |
5201 | case PROCESSOR_PPCE5500: | |
5202 | case PROCESSOR_PPCE6500: | |
5203 | ||
5204 | rs6000_single_float = TARGET_E500_SINGLE || TARGET_E500_DOUBLE; | |
5205 | rs6000_double_float = TARGET_E500_DOUBLE; | |
5206 | ||
5207 | rs6000_isa_flags &= ~OPTION_MASK_STRING; | |
5208 | ||
5209 | break; | |
5210 | ||
5211 | default: | |
5212 | ||
5213 | if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL)) | |
5214 | rs6000_isa_flags &= ~OPTION_MASK_ISEL; | |
5215 | ||
5216 | break; | |
5217 | } | |
5218 | ||
5219 | if (main_target_opt) | |
5220 | { | |
5221 | if (main_target_opt->x_rs6000_single_float != rs6000_single_float) | |
5222 | error ("target attribute or pragma changes single precision floating " | |
5223 | "point"); | |
5224 | if (main_target_opt->x_rs6000_double_float != rs6000_double_float) | |
5225 | error ("target attribute or pragma changes double precision floating " | |
5226 | "point"); | |
5227 | } | |
5228 | ||
5229 | /* Detect invalid option combinations with E500. */ | |
5230 | CHECK_E500_OPTIONS; | |
5231 | ||
5232 | rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4 | |
5233 | && rs6000_cpu != PROCESSOR_POWER5 | |
5234 | && rs6000_cpu != PROCESSOR_POWER6 | |
5235 | && rs6000_cpu != PROCESSOR_POWER7 | |
5236 | && rs6000_cpu != PROCESSOR_POWER8 | |
5237 | && rs6000_cpu != PROCESSOR_POWER9 | |
5238 | && rs6000_cpu != PROCESSOR_PPCA2 | |
5239 | && rs6000_cpu != PROCESSOR_CELL | |
5240 | && rs6000_cpu != PROCESSOR_PPC476); | |
5241 | rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4 | |
5242 | || rs6000_cpu == PROCESSOR_POWER5 | |
5243 | || rs6000_cpu == PROCESSOR_POWER7 | |
5244 | || rs6000_cpu == PROCESSOR_POWER8); | |
5245 | rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4 | |
5246 | || rs6000_cpu == PROCESSOR_POWER5 | |
5247 | || rs6000_cpu == PROCESSOR_POWER6 | |
5248 | || rs6000_cpu == PROCESSOR_POWER7 | |
5249 | || rs6000_cpu == PROCESSOR_POWER8 | |
5250 | || rs6000_cpu == PROCESSOR_POWER9 | |
5251 | || rs6000_cpu == PROCESSOR_PPCE500MC | |
5252 | || rs6000_cpu == PROCESSOR_PPCE500MC64 | |
5253 | || rs6000_cpu == PROCESSOR_PPCE5500 | |
5254 | || rs6000_cpu == PROCESSOR_PPCE6500); | |
5255 | ||
5256 | /* Allow debug switches to override the above settings. These are set to -1 | |
5257 | in powerpcspe.opt to indicate the user hasn't directly set the switch. */ | |
5258 | if (TARGET_ALWAYS_HINT >= 0) | |
5259 | rs6000_always_hint = TARGET_ALWAYS_HINT; | |
5260 | ||
5261 | if (TARGET_SCHED_GROUPS >= 0) | |
5262 | rs6000_sched_groups = TARGET_SCHED_GROUPS; | |
5263 | ||
5264 | if (TARGET_ALIGN_BRANCH_TARGETS >= 0) | |
5265 | rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS; | |
5266 | ||
5267 | rs6000_sched_restricted_insns_priority | |
5268 | = (rs6000_sched_groups ? 1 : 0); | |
5269 | ||
5270 | /* Handle -msched-costly-dep option. */ | |
5271 | rs6000_sched_costly_dep | |
5272 | = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly); | |
5273 | ||
5274 | if (rs6000_sched_costly_dep_str) | |
5275 | { | |
5276 | if (! strcmp (rs6000_sched_costly_dep_str, "no")) | |
5277 | rs6000_sched_costly_dep = no_dep_costly; | |
5278 | else if (! strcmp (rs6000_sched_costly_dep_str, "all")) | |
5279 | rs6000_sched_costly_dep = all_deps_costly; | |
5280 | else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load")) | |
5281 | rs6000_sched_costly_dep = true_store_to_load_dep_costly; | |
5282 | else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load")) | |
5283 | rs6000_sched_costly_dep = store_to_load_dep_costly; | |
5284 | else | |
5285 | rs6000_sched_costly_dep = ((enum rs6000_dependence_cost) | |
5286 | atoi (rs6000_sched_costly_dep_str)); | |
5287 | } | |
5288 | ||
5289 | /* Handle -minsert-sched-nops option. */ | |
5290 | rs6000_sched_insert_nops | |
5291 | = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none); | |
5292 | ||
5293 | if (rs6000_sched_insert_nops_str) | |
5294 | { | |
5295 | if (! strcmp (rs6000_sched_insert_nops_str, "no")) | |
5296 | rs6000_sched_insert_nops = sched_finish_none; | |
5297 | else if (! strcmp (rs6000_sched_insert_nops_str, "pad")) | |
5298 | rs6000_sched_insert_nops = sched_finish_pad_groups; | |
5299 | else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact")) | |
5300 | rs6000_sched_insert_nops = sched_finish_regroup_exact; | |
5301 | else | |
5302 | rs6000_sched_insert_nops = ((enum rs6000_nop_insertion) | |
5303 | atoi (rs6000_sched_insert_nops_str)); | |
5304 | } | |
5305 | ||
5306 | /* Handle stack protector */ | |
5307 | if (!global_options_set.x_rs6000_stack_protector_guard) | |
5308 | #ifdef TARGET_THREAD_SSP_OFFSET | |
5309 | rs6000_stack_protector_guard = SSP_TLS; | |
5310 | #else | |
5311 | rs6000_stack_protector_guard = SSP_GLOBAL; | |
5312 | #endif | |
5313 | ||
5314 | #ifdef TARGET_THREAD_SSP_OFFSET | |
5315 | rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET; | |
5316 | rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2; | |
5317 | #endif | |
5318 | ||
5319 | if (global_options_set.x_rs6000_stack_protector_guard_offset_str) | |
5320 | { | |
5321 | char *endp; | |
5322 | const char *str = rs6000_stack_protector_guard_offset_str; | |
5323 | ||
5324 | errno = 0; | |
5325 | long offset = strtol (str, &endp, 0); | |
5326 | if (!*str || *endp || errno) | |
5327 | error ("%qs is not a valid number " | |
5328 | "in -mstack-protector-guard-offset=", str); | |
5329 | ||
5330 | if (!IN_RANGE (offset, -0x8000, 0x7fff) | |
5331 | || (TARGET_64BIT && (offset & 3))) | |
5332 | error ("%qs is not a valid offset " | |
5333 | "in -mstack-protector-guard-offset=", str); | |
5334 | ||
5335 | rs6000_stack_protector_guard_offset = offset; | |
5336 | } | |
5337 | ||
5338 | if (global_options_set.x_rs6000_stack_protector_guard_reg_str) | |
5339 | { | |
5340 | const char *str = rs6000_stack_protector_guard_reg_str; | |
5341 | int reg = decode_reg_name (str); | |
5342 | ||
5343 | if (!IN_RANGE (reg, 1, 31)) | |
5344 | error ("%qs is not a valid base register " | |
5345 | "in -mstack-protector-guard-reg=", str); | |
5346 | ||
5347 | rs6000_stack_protector_guard_reg = reg; | |
5348 | } | |
5349 | ||
5350 | if (rs6000_stack_protector_guard == SSP_TLS | |
5351 | && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31)) | |
5352 | error ("-mstack-protector-guard=tls needs a valid base register"); | |
5353 | ||
5354 | if (global_init_p) | |
5355 | { | |
5356 | #ifdef TARGET_REGNAMES | |
5357 | /* If the user desires alternate register names, copy in the | |
5358 | alternate names now. */ | |
5359 | if (TARGET_REGNAMES) | |
5360 | memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names)); | |
5361 | #endif | |
5362 | ||
5363 | /* Set aix_struct_return last, after the ABI is determined. | |
5364 | If -maix-struct-return or -msvr4-struct-return was explicitly | |
5365 | used, don't override with the ABI default. */ | |
5366 | if (!global_options_set.x_aix_struct_return) | |
5367 | aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET); | |
5368 | ||
5369 | #if 0 | |
5370 | /* IBM XL compiler defaults to unsigned bitfields. */ | |
5371 | if (TARGET_XL_COMPAT) | |
5372 | flag_signed_bitfields = 0; | |
5373 | #endif | |
5374 | ||
5375 | if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD) | |
5376 | REAL_MODE_FORMAT (TFmode) = &ibm_extended_format; | |
5377 | ||
5378 | ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1); | |
5379 | ||
5380 | /* We can only guarantee the availability of DI pseudo-ops when | |
5381 | assembling for 64-bit targets. */ | |
5382 | if (!TARGET_64BIT) | |
5383 | { | |
5384 | targetm.asm_out.aligned_op.di = NULL; | |
5385 | targetm.asm_out.unaligned_op.di = NULL; | |
5386 | } | |
5387 | ||
5388 | ||
5389 | /* Set branch target alignment, if not optimizing for size. */ | |
5390 | if (!optimize_size) | |
5391 | { | |
5392 | /* Cell wants to be aligned 8byte for dual issue. Titan wants to be | |
5393 | aligned 8byte to avoid misprediction by the branch predictor. */ | |
5394 | if (rs6000_cpu == PROCESSOR_TITAN | |
5395 | || rs6000_cpu == PROCESSOR_CELL) | |
5396 | { | |
5397 | if (align_functions <= 0) | |
5398 | align_functions = 8; | |
5399 | if (align_jumps <= 0) | |
5400 | align_jumps = 8; | |
5401 | if (align_loops <= 0) | |
5402 | align_loops = 8; | |
5403 | } | |
5404 | if (rs6000_align_branch_targets) | |
5405 | { | |
5406 | if (align_functions <= 0) | |
5407 | align_functions = 16; | |
5408 | if (align_jumps <= 0) | |
5409 | align_jumps = 16; | |
5410 | if (align_loops <= 0) | |
5411 | { | |
5412 | can_override_loop_align = 1; | |
5413 | align_loops = 16; | |
5414 | } | |
5415 | } | |
5416 | if (align_jumps_max_skip <= 0) | |
5417 | align_jumps_max_skip = 15; | |
5418 | if (align_loops_max_skip <= 0) | |
5419 | align_loops_max_skip = 15; | |
5420 | } | |
5421 | ||
5422 | /* Arrange to save and restore machine status around nested functions. */ | |
5423 | init_machine_status = rs6000_init_machine_status; | |
5424 | ||
5425 | /* We should always be splitting complex arguments, but we can't break | |
5426 | Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */ | |
5427 | if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) | |
5428 | targetm.calls.split_complex_arg = NULL; | |
5429 | ||
5430 | /* The AIX and ELFv1 ABIs define standard function descriptors. */ | |
5431 | if (DEFAULT_ABI == ABI_AIX) | |
5432 | targetm.calls.custom_function_descriptors = 0; | |
5433 | } | |
5434 | ||
5435 | /* Initialize rs6000_cost with the appropriate target costs. */ | |
5436 | if (optimize_size) | |
5437 | rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost; | |
5438 | else | |
5439 | switch (rs6000_cpu) | |
5440 | { | |
5441 | case PROCESSOR_RS64A: | |
5442 | rs6000_cost = &rs64a_cost; | |
5443 | break; | |
5444 | ||
5445 | case PROCESSOR_MPCCORE: | |
5446 | rs6000_cost = &mpccore_cost; | |
5447 | break; | |
5448 | ||
5449 | case PROCESSOR_PPC403: | |
5450 | rs6000_cost = &ppc403_cost; | |
5451 | break; | |
5452 | ||
5453 | case PROCESSOR_PPC405: | |
5454 | rs6000_cost = &ppc405_cost; | |
5455 | break; | |
5456 | ||
5457 | case PROCESSOR_PPC440: | |
5458 | rs6000_cost = &ppc440_cost; | |
5459 | break; | |
5460 | ||
5461 | case PROCESSOR_PPC476: | |
5462 | rs6000_cost = &ppc476_cost; | |
5463 | break; | |
5464 | ||
5465 | case PROCESSOR_PPC601: | |
5466 | rs6000_cost = &ppc601_cost; | |
5467 | break; | |
5468 | ||
5469 | case PROCESSOR_PPC603: | |
5470 | rs6000_cost = &ppc603_cost; | |
5471 | break; | |
5472 | ||
5473 | case PROCESSOR_PPC604: | |
5474 | rs6000_cost = &ppc604_cost; | |
5475 | break; | |
5476 | ||
5477 | case PROCESSOR_PPC604e: | |
5478 | rs6000_cost = &ppc604e_cost; | |
5479 | break; | |
5480 | ||
5481 | case PROCESSOR_PPC620: | |
5482 | rs6000_cost = &ppc620_cost; | |
5483 | break; | |
5484 | ||
5485 | case PROCESSOR_PPC630: | |
5486 | rs6000_cost = &ppc630_cost; | |
5487 | break; | |
5488 | ||
5489 | case PROCESSOR_CELL: | |
5490 | rs6000_cost = &ppccell_cost; | |
5491 | break; | |
5492 | ||
5493 | case PROCESSOR_PPC750: | |
5494 | case PROCESSOR_PPC7400: | |
5495 | rs6000_cost = &ppc750_cost; | |
5496 | break; | |
5497 | ||
5498 | case PROCESSOR_PPC7450: | |
5499 | rs6000_cost = &ppc7450_cost; | |
5500 | break; | |
5501 | ||
5502 | case PROCESSOR_PPC8540: | |
5503 | case PROCESSOR_PPC8548: | |
5504 | rs6000_cost = &ppc8540_cost; | |
5505 | break; | |
5506 | ||
5507 | case PROCESSOR_PPCE300C2: | |
5508 | case PROCESSOR_PPCE300C3: | |
5509 | rs6000_cost = &ppce300c2c3_cost; | |
5510 | break; | |
5511 | ||
5512 | case PROCESSOR_PPCE500MC: | |
5513 | rs6000_cost = &ppce500mc_cost; | |
5514 | break; | |
5515 | ||
5516 | case PROCESSOR_PPCE500MC64: | |
5517 | rs6000_cost = &ppce500mc64_cost; | |
5518 | break; | |
5519 | ||
5520 | case PROCESSOR_PPCE5500: | |
5521 | rs6000_cost = &ppce5500_cost; | |
5522 | break; | |
5523 | ||
5524 | case PROCESSOR_PPCE6500: | |
5525 | rs6000_cost = &ppce6500_cost; | |
5526 | break; | |
5527 | ||
5528 | case PROCESSOR_TITAN: | |
5529 | rs6000_cost = &titan_cost; | |
5530 | break; | |
5531 | ||
5532 | case PROCESSOR_POWER4: | |
5533 | case PROCESSOR_POWER5: | |
5534 | rs6000_cost = &power4_cost; | |
5535 | break; | |
5536 | ||
5537 | case PROCESSOR_POWER6: | |
5538 | rs6000_cost = &power6_cost; | |
5539 | break; | |
5540 | ||
5541 | case PROCESSOR_POWER7: | |
5542 | rs6000_cost = &power7_cost; | |
5543 | break; | |
5544 | ||
5545 | case PROCESSOR_POWER8: | |
5546 | rs6000_cost = &power8_cost; | |
5547 | break; | |
5548 | ||
5549 | case PROCESSOR_POWER9: | |
5550 | rs6000_cost = &power9_cost; | |
5551 | break; | |
5552 | ||
5553 | case PROCESSOR_PPCA2: | |
5554 | rs6000_cost = &ppca2_cost; | |
5555 | break; | |
5556 | ||
5557 | default: | |
5558 | gcc_unreachable (); | |
5559 | } | |
5560 | ||
5561 | if (global_init_p) | |
5562 | { | |
5563 | maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, | |
5564 | rs6000_cost->simultaneous_prefetches, | |
5565 | global_options.x_param_values, | |
5566 | global_options_set.x_param_values); | |
5567 | maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size, | |
5568 | global_options.x_param_values, | |
5569 | global_options_set.x_param_values); | |
5570 | maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, | |
5571 | rs6000_cost->cache_line_size, | |
5572 | global_options.x_param_values, | |
5573 | global_options_set.x_param_values); | |
5574 | maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size, | |
5575 | global_options.x_param_values, | |
5576 | global_options_set.x_param_values); | |
5577 | ||
5578 | /* Increase loop peeling limits based on performance analysis. */ | |
5579 | maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400, | |
5580 | global_options.x_param_values, | |
5581 | global_options_set.x_param_values); | |
5582 | maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400, | |
5583 | global_options.x_param_values, | |
5584 | global_options_set.x_param_values); | |
5585 | ||
5586 | /* Use the 'model' -fsched-pressure algorithm by default. */ | |
5587 | maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, | |
5588 | SCHED_PRESSURE_MODEL, | |
5589 | global_options.x_param_values, | |
5590 | global_options_set.x_param_values); | |
5591 | ||
5592 | /* If using typedef char *va_list, signal that | |
5593 | __builtin_va_start (&ap, 0) can be optimized to | |
5594 | ap = __builtin_next_arg (0). */ | |
5595 | if (DEFAULT_ABI != ABI_V4) | |
5596 | targetm.expand_builtin_va_start = NULL; | |
5597 | } | |
5598 | ||
5599 | /* Set up single/double float flags. | |
5600 | If TARGET_HARD_FLOAT is set, but neither single or double is set, | |
5601 | then set both flags. */ | |
5602 | if (TARGET_HARD_FLOAT && TARGET_FPRS | |
5603 | && rs6000_single_float == 0 && rs6000_double_float == 0) | |
5604 | rs6000_single_float = rs6000_double_float = 1; | |
5605 | ||
5606 | /* If not explicitly specified via option, decide whether to generate indexed | |
5607 | load/store instructions. A value of -1 indicates that the | |
5608 | initial value of this variable has not been overwritten. During | |
5609 | compilation, TARGET_AVOID_XFORM is either 0 or 1. */ | |
5610 | if (TARGET_AVOID_XFORM == -1) | |
5611 | /* Avoid indexed addressing when targeting Power6 in order to avoid the | |
5612 | DERAT mispredict penalty. However the LVE and STVE altivec instructions | |
5613 | need indexed accesses and the type used is the scalar type of the element | |
5614 | being loaded or stored. */ | |
5615 | TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB | |
5616 | && !TARGET_ALTIVEC); | |
5617 | ||
5618 | /* Set the -mrecip options. */ | |
5619 | if (rs6000_recip_name) | |
5620 | { | |
5621 | char *p = ASTRDUP (rs6000_recip_name); | |
5622 | char *q; | |
5623 | unsigned int mask, i; | |
5624 | bool invert; | |
5625 | ||
5626 | while ((q = strtok (p, ",")) != NULL) | |
5627 | { | |
5628 | p = NULL; | |
5629 | if (*q == '!') | |
5630 | { | |
5631 | invert = true; | |
5632 | q++; | |
5633 | } | |
5634 | else | |
5635 | invert = false; | |
5636 | ||
5637 | if (!strcmp (q, "default")) | |
5638 | mask = ((TARGET_RECIP_PRECISION) | |
5639 | ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION); | |
5640 | else | |
5641 | { | |
5642 | for (i = 0; i < ARRAY_SIZE (recip_options); i++) | |
5643 | if (!strcmp (q, recip_options[i].string)) | |
5644 | { | |
5645 | mask = recip_options[i].mask; | |
5646 | break; | |
5647 | } | |
5648 | ||
5649 | if (i == ARRAY_SIZE (recip_options)) | |
5650 | { | |
5651 | error ("unknown option for -mrecip=%s", q); | |
5652 | invert = false; | |
5653 | mask = 0; | |
5654 | ret = false; | |
5655 | } | |
5656 | } | |
5657 | ||
5658 | if (invert) | |
5659 | rs6000_recip_control &= ~mask; | |
5660 | else | |
5661 | rs6000_recip_control |= mask; | |
5662 | } | |
5663 | } | |
5664 | ||
5665 | /* Set the builtin mask of the various options used that could affect which | |
5666 | builtins were used. In the past we used target_flags, but we've run out | |
5667 | of bits, and some options like SPE and PAIRED are no longer in | |
5668 | target_flags. */ | |
5669 | rs6000_builtin_mask = rs6000_builtin_mask_calculate (); | |
5670 | if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET) | |
5671 | rs6000_print_builtin_options (stderr, 0, "builtin mask", | |
5672 | rs6000_builtin_mask); | |
5673 | ||
5674 | /* Initialize all of the registers. */ | |
5675 | rs6000_init_hard_regno_mode_ok (global_init_p); | |
5676 | ||
5677 | /* Save the initial options in case the user does function specific options */ | |
5678 | if (global_init_p) | |
5679 | target_option_default_node = target_option_current_node | |
5680 | = build_target_option_node (&global_options); | |
5681 | ||
5682 | /* If not explicitly specified via option, decide whether to generate the | |
5683 | extra blr's required to preserve the link stack on some cpus (eg, 476). */ | |
5684 | if (TARGET_LINK_STACK == -1) | |
5685 | SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic); | |
5686 | ||
5687 | return ret; | |
5688 | } | |
5689 | ||
5690 | /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to | |
5691 | define the target cpu type. */ | |
5692 | ||
5693 | static void | |
5694 | rs6000_option_override (void) | |
5695 | { | |
5696 | (void) rs6000_option_override_internal (true); | |
5697 | } | |
5698 | ||
5699 | \f | |
5700 | /* Implement targetm.vectorize.builtin_mask_for_load. */ | |
5701 | static tree | |
5702 | rs6000_builtin_mask_for_load (void) | |
5703 | { | |
5704 | /* Don't use lvsl/vperm for P8 and similarly efficient machines. */ | |
5705 | if ((TARGET_ALTIVEC && !TARGET_VSX) | |
5706 | || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX)) | |
5707 | return altivec_builtin_mask_for_load; | |
5708 | else | |
5709 | return 0; | |
5710 | } | |
5711 | ||
5712 | /* Implement LOOP_ALIGN. */ | |
5713 | int | |
5714 | rs6000_loop_align (rtx label) | |
5715 | { | |
5716 | basic_block bb; | |
5717 | int ninsns; | |
5718 | ||
5719 | /* Don't override loop alignment if -falign-loops was specified. */ | |
5720 | if (!can_override_loop_align) | |
5721 | return align_loops_log; | |
5722 | ||
5723 | bb = BLOCK_FOR_INSN (label); | |
5724 | ninsns = num_loop_insns(bb->loop_father); | |
5725 | ||
5726 | /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */ | |
5727 | if (ninsns > 4 && ninsns <= 8 | |
5728 | && (rs6000_cpu == PROCESSOR_POWER4 | |
5729 | || rs6000_cpu == PROCESSOR_POWER5 | |
5730 | || rs6000_cpu == PROCESSOR_POWER6 | |
5731 | || rs6000_cpu == PROCESSOR_POWER7 | |
5732 | || rs6000_cpu == PROCESSOR_POWER8 | |
5733 | || rs6000_cpu == PROCESSOR_POWER9)) | |
5734 | return 5; | |
5735 | else | |
5736 | return align_loops_log; | |
5737 | } | |
5738 | ||
5739 | /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */ | |
5740 | static int | |
5741 | rs6000_loop_align_max_skip (rtx_insn *label) | |
5742 | { | |
5743 | return (1 << rs6000_loop_align (label)) - 1; | |
5744 | } | |
5745 | ||
5746 | /* Return true iff, data reference of TYPE can reach vector alignment (16) | |
5747 | after applying N number of iterations. This routine does not determine | |
5748 | how may iterations are required to reach desired alignment. */ | |
5749 | ||
5750 | static bool | |
5751 | rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed) | |
5752 | { | |
5753 | if (is_packed) | |
5754 | return false; | |
5755 | ||
5756 | if (TARGET_32BIT) | |
5757 | { | |
5758 | if (rs6000_alignment_flags == MASK_ALIGN_NATURAL) | |
5759 | return true; | |
5760 | ||
5761 | if (rs6000_alignment_flags == MASK_ALIGN_POWER) | |
5762 | return true; | |
5763 | ||
5764 | return false; | |
5765 | } | |
5766 | else | |
5767 | { | |
5768 | if (TARGET_MACHO) | |
5769 | return false; | |
5770 | ||
5771 | /* Assuming that all other types are naturally aligned. CHECKME! */ | |
5772 | return true; | |
5773 | } | |
5774 | } | |
5775 | ||
5776 | /* Return true if the vector misalignment factor is supported by the | |
5777 | target. */ | |
5778 | static bool | |
5779 | rs6000_builtin_support_vector_misalignment (machine_mode mode, | |
5780 | const_tree type, | |
5781 | int misalignment, | |
5782 | bool is_packed) | |
5783 | { | |
5784 | if (TARGET_VSX) | |
5785 | { | |
5786 | if (TARGET_EFFICIENT_UNALIGNED_VSX) | |
5787 | return true; | |
5788 | ||
5789 | /* Return if movmisalign pattern is not supported for this mode. */ | |
5790 | if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing) | |
5791 | return false; | |
5792 | ||
5793 | if (misalignment == -1) | |
5794 | { | |
5795 | /* Misalignment factor is unknown at compile time but we know | |
5796 | it's word aligned. */ | |
5797 | if (rs6000_vector_alignment_reachable (type, is_packed)) | |
5798 | { | |
5799 | int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type)); | |
5800 | ||
5801 | if (element_size == 64 || element_size == 32) | |
5802 | return true; | |
5803 | } | |
5804 | ||
5805 | return false; | |
5806 | } | |
5807 | ||
5808 | /* VSX supports word-aligned vector. */ | |
5809 | if (misalignment % 4 == 0) | |
5810 | return true; | |
5811 | } | |
5812 | return false; | |
5813 | } | |
5814 | ||
5815 | /* Implement targetm.vectorize.builtin_vectorization_cost. */ | |
5816 | static int | |
5817 | rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, | |
5818 | tree vectype, int misalign) | |
5819 | { | |
5820 | unsigned elements; | |
5821 | tree elem_type; | |
5822 | ||
5823 | switch (type_of_cost) | |
5824 | { | |
5825 | case scalar_stmt: | |
5826 | case scalar_load: | |
5827 | case scalar_store: | |
5828 | case vector_stmt: | |
5829 | case vector_load: | |
5830 | case vector_store: | |
5831 | case vec_to_scalar: | |
5832 | case scalar_to_vec: | |
5833 | case cond_branch_not_taken: | |
5834 | return 1; | |
5835 | ||
5836 | case vec_perm: | |
5837 | if (TARGET_VSX) | |
5838 | return 3; | |
5839 | else | |
5840 | return 1; | |
5841 | ||
5842 | case vec_promote_demote: | |
5843 | if (TARGET_VSX) | |
5844 | return 4; | |
5845 | else | |
5846 | return 1; | |
5847 | ||
5848 | case cond_branch_taken: | |
5849 | return 3; | |
5850 | ||
5851 | case unaligned_load: | |
5852 | if (TARGET_P9_VECTOR) | |
5853 | return 3; | |
5854 | ||
5855 | if (TARGET_EFFICIENT_UNALIGNED_VSX) | |
5856 | return 1; | |
5857 | ||
5858 | if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN) | |
5859 | { | |
5860 | elements = TYPE_VECTOR_SUBPARTS (vectype); | |
5861 | if (elements == 2) | |
5862 | /* Double word aligned. */ | |
5863 | return 2; | |
5864 | ||
5865 | if (elements == 4) | |
5866 | { | |
5867 | switch (misalign) | |
5868 | { | |
5869 | case 8: | |
5870 | /* Double word aligned. */ | |
5871 | return 2; | |
5872 | ||
5873 | case -1: | |
5874 | /* Unknown misalignment. */ | |
5875 | case 4: | |
5876 | case 12: | |
5877 | /* Word aligned. */ | |
5878 | return 22; | |
5879 | ||
5880 | default: | |
5881 | gcc_unreachable (); | |
5882 | } | |
5883 | } | |
5884 | } | |
5885 | ||
5886 | if (TARGET_ALTIVEC) | |
5887 | /* Misaligned loads are not supported. */ | |
5888 | gcc_unreachable (); | |
5889 | ||
5890 | return 2; | |
5891 | ||
5892 | case unaligned_store: | |
5893 | if (TARGET_EFFICIENT_UNALIGNED_VSX) | |
5894 | return 1; | |
5895 | ||
5896 | if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN) | |
5897 | { | |
5898 | elements = TYPE_VECTOR_SUBPARTS (vectype); | |
5899 | if (elements == 2) | |
5900 | /* Double word aligned. */ | |
5901 | return 2; | |
5902 | ||
5903 | if (elements == 4) | |
5904 | { | |
5905 | switch (misalign) | |
5906 | { | |
5907 | case 8: | |
5908 | /* Double word aligned. */ | |
5909 | return 2; | |
5910 | ||
5911 | case -1: | |
5912 | /* Unknown misalignment. */ | |
5913 | case 4: | |
5914 | case 12: | |
5915 | /* Word aligned. */ | |
5916 | return 23; | |
5917 | ||
5918 | default: | |
5919 | gcc_unreachable (); | |
5920 | } | |
5921 | } | |
5922 | } | |
5923 | ||
5924 | if (TARGET_ALTIVEC) | |
5925 | /* Misaligned stores are not supported. */ | |
5926 | gcc_unreachable (); | |
5927 | ||
5928 | return 2; | |
5929 | ||
5930 | case vec_construct: | |
5931 | /* This is a rough approximation assuming non-constant elements | |
5932 | constructed into a vector via element insertion. FIXME: | |
5933 | vec_construct is not granular enough for uniformly good | |
5934 | decisions. If the initialization is a splat, this is | |
5935 | cheaper than we estimate. Improve this someday. */ | |
5936 | elem_type = TREE_TYPE (vectype); | |
5937 | /* 32-bit vectors loaded into registers are stored as double | |
5938 | precision, so we need 2 permutes, 2 converts, and 1 merge | |
5939 | to construct a vector of short floats from them. */ | |
5940 | if (SCALAR_FLOAT_TYPE_P (elem_type) | |
5941 | && TYPE_PRECISION (elem_type) == 32) | |
5942 | return 5; | |
5943 | /* On POWER9, integer vector types are built up in GPRs and then | |
5944 | use a direct move (2 cycles). For POWER8 this is even worse, | |
5945 | as we need two direct moves and a merge, and the direct moves | |
5946 | are five cycles. */ | |
5947 | else if (INTEGRAL_TYPE_P (elem_type)) | |
5948 | { | |
5949 | if (TARGET_P9_VECTOR) | |
5950 | return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2; | |
5951 | else | |
5952 | return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 11; | |
5953 | } | |
5954 | else | |
5955 | /* V2DFmode doesn't need a direct move. */ | |
5956 | return 2; | |
5957 | ||
5958 | default: | |
5959 | gcc_unreachable (); | |
5960 | } | |
5961 | } | |
5962 | ||
5963 | /* Implement targetm.vectorize.preferred_simd_mode. */ | |
5964 | ||
5965 | static machine_mode | |
4c1a1be2 | 5966 | rs6000_preferred_simd_mode (scalar_mode mode) |
01e91138 | 5967 | { |
5968 | if (TARGET_VSX) | |
5969 | switch (mode) | |
5970 | { | |
916ace94 | 5971 | case E_DFmode: |
01e91138 | 5972 | return V2DFmode; |
5973 | default:; | |
5974 | } | |
5975 | if (TARGET_ALTIVEC || TARGET_VSX) | |
5976 | switch (mode) | |
5977 | { | |
916ace94 | 5978 | case E_SFmode: |
01e91138 | 5979 | return V4SFmode; |
916ace94 | 5980 | case E_TImode: |
01e91138 | 5981 | return V1TImode; |
916ace94 | 5982 | case E_DImode: |
01e91138 | 5983 | return V2DImode; |
916ace94 | 5984 | case E_SImode: |
01e91138 | 5985 | return V4SImode; |
916ace94 | 5986 | case E_HImode: |
01e91138 | 5987 | return V8HImode; |
916ace94 | 5988 | case E_QImode: |
01e91138 | 5989 | return V16QImode; |
5990 | default:; | |
5991 | } | |
5992 | if (TARGET_SPE) | |
5993 | switch (mode) | |
5994 | { | |
916ace94 | 5995 | case E_SFmode: |
01e91138 | 5996 | return V2SFmode; |
916ace94 | 5997 | case E_SImode: |
01e91138 | 5998 | return V2SImode; |
5999 | default:; | |
6000 | } | |
6001 | if (TARGET_PAIRED_FLOAT | |
6002 | && mode == SFmode) | |
6003 | return V2SFmode; | |
6004 | return word_mode; | |
6005 | } | |
6006 | ||
6007 | typedef struct _rs6000_cost_data | |
6008 | { | |
6009 | struct loop *loop_info; | |
6010 | unsigned cost[3]; | |
6011 | } rs6000_cost_data; | |
6012 | ||
6013 | /* Test for likely overcommitment of vector hardware resources. If a | |
6014 | loop iteration is relatively large, and too large a percentage of | |
6015 | instructions in the loop are vectorized, the cost model may not | |
6016 | adequately reflect delays from unavailable vector resources. | |
6017 | Penalize the loop body cost for this case. */ | |
6018 | ||
6019 | static void | |
6020 | rs6000_density_test (rs6000_cost_data *data) | |
6021 | { | |
6022 | const int DENSITY_PCT_THRESHOLD = 85; | |
6023 | const int DENSITY_SIZE_THRESHOLD = 70; | |
6024 | const int DENSITY_PENALTY = 10; | |
6025 | struct loop *loop = data->loop_info; | |
6026 | basic_block *bbs = get_loop_body (loop); | |
6027 | int nbbs = loop->num_nodes; | |
6028 | int vec_cost = data->cost[vect_body], not_vec_cost = 0; | |
6029 | int i, density_pct; | |
6030 | ||
6031 | for (i = 0; i < nbbs; i++) | |
6032 | { | |
6033 | basic_block bb = bbs[i]; | |
6034 | gimple_stmt_iterator gsi; | |
6035 | ||
6036 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
6037 | { | |
6038 | gimple *stmt = gsi_stmt (gsi); | |
6039 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
6040 | ||
6041 | if (!STMT_VINFO_RELEVANT_P (stmt_info) | |
6042 | && !STMT_VINFO_IN_PATTERN_P (stmt_info)) | |
6043 | not_vec_cost++; | |
6044 | } | |
6045 | } | |
6046 | ||
6047 | free (bbs); | |
6048 | density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost); | |
6049 | ||
6050 | if (density_pct > DENSITY_PCT_THRESHOLD | |
6051 | && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD) | |
6052 | { | |
6053 | data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100; | |
6054 | if (dump_enabled_p ()) | |
6055 | dump_printf_loc (MSG_NOTE, vect_location, | |
6056 | "density %d%%, cost %d exceeds threshold, penalizing " | |
6057 | "loop body cost by %d%%", density_pct, | |
6058 | vec_cost + not_vec_cost, DENSITY_PENALTY); | |
6059 | } | |
6060 | } | |
6061 | ||
6062 | /* Implement targetm.vectorize.init_cost. */ | |
6063 | ||
6064 | /* For each vectorized loop, this var holds TRUE iff a non-memory vector | |
6065 | instruction is needed by the vectorization. */ | |
6066 | static bool rs6000_vect_nonmem; | |
6067 | ||
6068 | static void * | |
6069 | rs6000_init_cost (struct loop *loop_info) | |
6070 | { | |
6071 | rs6000_cost_data *data = XNEW (struct _rs6000_cost_data); | |
6072 | data->loop_info = loop_info; | |
6073 | data->cost[vect_prologue] = 0; | |
6074 | data->cost[vect_body] = 0; | |
6075 | data->cost[vect_epilogue] = 0; | |
6076 | rs6000_vect_nonmem = false; | |
6077 | return data; | |
6078 | } | |
6079 | ||
6080 | /* Implement targetm.vectorize.add_stmt_cost. */ | |
6081 | ||
6082 | static unsigned | |
6083 | rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, | |
6084 | struct _stmt_vec_info *stmt_info, int misalign, | |
6085 | enum vect_cost_model_location where) | |
6086 | { | |
6087 | rs6000_cost_data *cost_data = (rs6000_cost_data*) data; | |
6088 | unsigned retval = 0; | |
6089 | ||
6090 | if (flag_vect_cost_model) | |
6091 | { | |
6092 | tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE; | |
6093 | int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype, | |
6094 | misalign); | |
6095 | /* Statements in an inner loop relative to the loop being | |
6096 | vectorized are weighted more heavily. The value here is | |
6097 | arbitrary and could potentially be improved with analysis. */ | |
6098 | if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info)) | |
6099 | count *= 50; /* FIXME. */ | |
6100 | ||
6101 | retval = (unsigned) (count * stmt_cost); | |
6102 | cost_data->cost[where] += retval; | |
6103 | ||
6104 | /* Check whether we're doing something other than just a copy loop. | |
6105 | Not all such loops may be profitably vectorized; see | |
6106 | rs6000_finish_cost. */ | |
6107 | if ((kind == vec_to_scalar || kind == vec_perm | |
6108 | || kind == vec_promote_demote || kind == vec_construct | |
6109 | || kind == scalar_to_vec) | |
6110 | || (where == vect_body && kind == vector_stmt)) | |
6111 | rs6000_vect_nonmem = true; | |
6112 | } | |
6113 | ||
6114 | return retval; | |
6115 | } | |
6116 | ||
6117 | /* Implement targetm.vectorize.finish_cost. */ | |
6118 | ||
6119 | static void | |
6120 | rs6000_finish_cost (void *data, unsigned *prologue_cost, | |
6121 | unsigned *body_cost, unsigned *epilogue_cost) | |
6122 | { | |
6123 | rs6000_cost_data *cost_data = (rs6000_cost_data*) data; | |
6124 | ||
6125 | if (cost_data->loop_info) | |
6126 | rs6000_density_test (cost_data); | |
6127 | ||
6128 | /* Don't vectorize minimum-vectorization-factor, simple copy loops | |
6129 | that require versioning for any reason. The vectorization is at | |
6130 | best a wash inside the loop, and the versioning checks make | |
6131 | profitability highly unlikely and potentially quite harmful. */ | |
6132 | if (cost_data->loop_info) | |
6133 | { | |
6134 | loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info); | |
6135 | if (!rs6000_vect_nonmem | |
6136 | && LOOP_VINFO_VECT_FACTOR (vec_info) == 2 | |
6137 | && LOOP_REQUIRES_VERSIONING (vec_info)) | |
6138 | cost_data->cost[vect_body] += 10000; | |
6139 | } | |
6140 | ||
6141 | *prologue_cost = cost_data->cost[vect_prologue]; | |
6142 | *body_cost = cost_data->cost[vect_body]; | |
6143 | *epilogue_cost = cost_data->cost[vect_epilogue]; | |
6144 | } | |
6145 | ||
6146 | /* Implement targetm.vectorize.destroy_cost_data. */ | |
6147 | ||
6148 | static void | |
6149 | rs6000_destroy_cost_data (void *data) | |
6150 | { | |
6151 | free (data); | |
6152 | } | |
6153 | ||
6154 | /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a | |
6155 | library with vectorized intrinsics. */ | |
6156 | ||
6157 | static tree | |
6158 | rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out, | |
6159 | tree type_in) | |
6160 | { | |
6161 | char name[32]; | |
6162 | const char *suffix = NULL; | |
6163 | tree fntype, new_fndecl, bdecl = NULL_TREE; | |
6164 | int n_args = 1; | |
6165 | const char *bname; | |
6166 | machine_mode el_mode, in_mode; | |
6167 | int n, in_n; | |
6168 | ||
6169 | /* Libmass is suitable for unsafe math only as it does not correctly support | |
6170 | parts of IEEE with the required precision such as denormals. Only support | |
6171 | it if we have VSX to use the simd d2 or f4 functions. | |
6172 | XXX: Add variable length support. */ | |
6173 | if (!flag_unsafe_math_optimizations || !TARGET_VSX) | |
6174 | return NULL_TREE; | |
6175 | ||
6176 | el_mode = TYPE_MODE (TREE_TYPE (type_out)); | |
6177 | n = TYPE_VECTOR_SUBPARTS (type_out); | |
6178 | in_mode = TYPE_MODE (TREE_TYPE (type_in)); | |
6179 | in_n = TYPE_VECTOR_SUBPARTS (type_in); | |
6180 | if (el_mode != in_mode | |
6181 | || n != in_n) | |
6182 | return NULL_TREE; | |
6183 | ||
6184 | switch (fn) | |
6185 | { | |
6186 | CASE_CFN_ATAN2: | |
6187 | CASE_CFN_HYPOT: | |
6188 | CASE_CFN_POW: | |
6189 | n_args = 2; | |
6190 | gcc_fallthrough (); | |
6191 | ||
6192 | CASE_CFN_ACOS: | |
6193 | CASE_CFN_ACOSH: | |
6194 | CASE_CFN_ASIN: | |
6195 | CASE_CFN_ASINH: | |
6196 | CASE_CFN_ATAN: | |
6197 | CASE_CFN_ATANH: | |
6198 | CASE_CFN_CBRT: | |
6199 | CASE_CFN_COS: | |
6200 | CASE_CFN_COSH: | |
6201 | CASE_CFN_ERF: | |
6202 | CASE_CFN_ERFC: | |
6203 | CASE_CFN_EXP2: | |
6204 | CASE_CFN_EXP: | |
6205 | CASE_CFN_EXPM1: | |
6206 | CASE_CFN_LGAMMA: | |
6207 | CASE_CFN_LOG10: | |
6208 | CASE_CFN_LOG1P: | |
6209 | CASE_CFN_LOG2: | |
6210 | CASE_CFN_LOG: | |
6211 | CASE_CFN_SIN: | |
6212 | CASE_CFN_SINH: | |
6213 | CASE_CFN_SQRT: | |
6214 | CASE_CFN_TAN: | |
6215 | CASE_CFN_TANH: | |
6216 | if (el_mode == DFmode && n == 2) | |
6217 | { | |
6218 | bdecl = mathfn_built_in (double_type_node, fn); | |
6219 | suffix = "d2"; /* pow -> powd2 */ | |
6220 | } | |
6221 | else if (el_mode == SFmode && n == 4) | |
6222 | { | |
6223 | bdecl = mathfn_built_in (float_type_node, fn); | |
6224 | suffix = "4"; /* powf -> powf4 */ | |
6225 | } | |
6226 | else | |
6227 | return NULL_TREE; | |
6228 | if (!bdecl) | |
6229 | return NULL_TREE; | |
6230 | break; | |
6231 | ||
6232 | default: | |
6233 | return NULL_TREE; | |
6234 | } | |
6235 | ||
6236 | gcc_assert (suffix != NULL); | |
6237 | bname = IDENTIFIER_POINTER (DECL_NAME (bdecl)); | |
6238 | if (!bname) | |
6239 | return NULL_TREE; | |
6240 | ||
6241 | strcpy (name, bname + sizeof ("__builtin_") - 1); | |
6242 | strcat (name, suffix); | |
6243 | ||
6244 | if (n_args == 1) | |
6245 | fntype = build_function_type_list (type_out, type_in, NULL); | |
6246 | else if (n_args == 2) | |
6247 | fntype = build_function_type_list (type_out, type_in, type_in, NULL); | |
6248 | else | |
6249 | gcc_unreachable (); | |
6250 | ||
6251 | /* Build a function declaration for the vectorized function. */ | |
6252 | new_fndecl = build_decl (BUILTINS_LOCATION, | |
6253 | FUNCTION_DECL, get_identifier (name), fntype); | |
6254 | TREE_PUBLIC (new_fndecl) = 1; | |
6255 | DECL_EXTERNAL (new_fndecl) = 1; | |
6256 | DECL_IS_NOVOPS (new_fndecl) = 1; | |
6257 | TREE_READONLY (new_fndecl) = 1; | |
6258 | ||
6259 | return new_fndecl; | |
6260 | } | |
6261 | ||
6262 | /* Returns a function decl for a vectorized version of the builtin function | |
6263 | with builtin function code FN and the result vector type TYPE, or NULL_TREE | |
6264 | if it is not available. */ | |
6265 | ||
6266 | static tree | |
6267 | rs6000_builtin_vectorized_function (unsigned int fn, tree type_out, | |
6268 | tree type_in) | |
6269 | { | |
6270 | machine_mode in_mode, out_mode; | |
6271 | int in_n, out_n; | |
6272 | ||
6273 | if (TARGET_DEBUG_BUILTIN) | |
6274 | fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n", | |
6275 | combined_fn_name (combined_fn (fn)), | |
6276 | GET_MODE_NAME (TYPE_MODE (type_out)), | |
6277 | GET_MODE_NAME (TYPE_MODE (type_in))); | |
6278 | ||
6279 | if (TREE_CODE (type_out) != VECTOR_TYPE | |
6280 | || TREE_CODE (type_in) != VECTOR_TYPE | |
6281 | || !TARGET_VECTORIZE_BUILTINS) | |
6282 | return NULL_TREE; | |
6283 | ||
6284 | out_mode = TYPE_MODE (TREE_TYPE (type_out)); | |
6285 | out_n = TYPE_VECTOR_SUBPARTS (type_out); | |
6286 | in_mode = TYPE_MODE (TREE_TYPE (type_in)); | |
6287 | in_n = TYPE_VECTOR_SUBPARTS (type_in); | |
6288 | ||
6289 | switch (fn) | |
6290 | { | |
6291 | CASE_CFN_COPYSIGN: | |
6292 | if (VECTOR_UNIT_VSX_P (V2DFmode) | |
6293 | && out_mode == DFmode && out_n == 2 | |
6294 | && in_mode == DFmode && in_n == 2) | |
6295 | return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP]; | |
6296 | if (VECTOR_UNIT_VSX_P (V4SFmode) | |
6297 | && out_mode == SFmode && out_n == 4 | |
6298 | && in_mode == SFmode && in_n == 4) | |
6299 | return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP]; | |
6300 | if (VECTOR_UNIT_ALTIVEC_P (V4SFmode) | |
6301 | && out_mode == SFmode && out_n == 4 | |
6302 | && in_mode == SFmode && in_n == 4) | |
6303 | return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF]; | |
6304 | break; | |
6305 | CASE_CFN_CEIL: | |
6306 | if (VECTOR_UNIT_VSX_P (V2DFmode) | |
6307 | && out_mode == DFmode && out_n == 2 | |
6308 | && in_mode == DFmode && in_n == 2) | |
6309 | return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP]; | |
6310 | if (VECTOR_UNIT_VSX_P (V4SFmode) | |
6311 | && out_mode == SFmode && out_n == 4 | |
6312 | && in_mode == SFmode && in_n == 4) | |
6313 | return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP]; | |
6314 | if (VECTOR_UNIT_ALTIVEC_P (V4SFmode) | |
6315 | && out_mode == SFmode && out_n == 4 | |
6316 | && in_mode == SFmode && in_n == 4) | |
6317 | return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP]; | |
6318 | break; | |
6319 | CASE_CFN_FLOOR: | |
6320 | if (VECTOR_UNIT_VSX_P (V2DFmode) | |
6321 | && out_mode == DFmode && out_n == 2 | |
6322 | && in_mode == DFmode && in_n == 2) | |
6323 | return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM]; | |
6324 | if (VECTOR_UNIT_VSX_P (V4SFmode) | |
6325 | && out_mode == SFmode && out_n == 4 | |
6326 | && in_mode == SFmode && in_n == 4) | |
6327 | return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM]; | |
6328 | if (VECTOR_UNIT_ALTIVEC_P (V4SFmode) | |
6329 | && out_mode == SFmode && out_n == 4 | |
6330 | && in_mode == SFmode && in_n == 4) | |
6331 | return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM]; | |
6332 | break; | |
6333 | CASE_CFN_FMA: | |
6334 | if (VECTOR_UNIT_VSX_P (V2DFmode) | |
6335 | && out_mode == DFmode && out_n == 2 | |
6336 | && in_mode == DFmode && in_n == 2) | |
6337 | return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP]; | |
6338 | if (VECTOR_UNIT_VSX_P (V4SFmode) | |
6339 | && out_mode == SFmode && out_n == 4 | |
6340 | && in_mode == SFmode && in_n == 4) | |
6341 | return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP]; | |
6342 | if (VECTOR_UNIT_ALTIVEC_P (V4SFmode) | |
6343 | && out_mode == SFmode && out_n == 4 | |
6344 | && in_mode == SFmode && in_n == 4) | |
6345 | return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP]; | |
6346 | break; | |
6347 | CASE_CFN_TRUNC: | |
6348 | if (VECTOR_UNIT_VSX_P (V2DFmode) | |
6349 | && out_mode == DFmode && out_n == 2 | |
6350 | && in_mode == DFmode && in_n == 2) | |
6351 | return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ]; | |
6352 | if (VECTOR_UNIT_VSX_P (V4SFmode) | |
6353 | && out_mode == SFmode && out_n == 4 | |
6354 | && in_mode == SFmode && in_n == 4) | |
6355 | return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ]; | |
6356 | if (VECTOR_UNIT_ALTIVEC_P (V4SFmode) | |
6357 | && out_mode == SFmode && out_n == 4 | |
6358 | && in_mode == SFmode && in_n == 4) | |
6359 | return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ]; | |
6360 | break; | |
6361 | CASE_CFN_NEARBYINT: | |
6362 | if (VECTOR_UNIT_VSX_P (V2DFmode) | |
6363 | && flag_unsafe_math_optimizations | |
6364 | && out_mode == DFmode && out_n == 2 | |
6365 | && in_mode == DFmode && in_n == 2) | |
6366 | return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI]; | |
6367 | if (VECTOR_UNIT_VSX_P (V4SFmode) | |
6368 | && flag_unsafe_math_optimizations | |
6369 | && out_mode == SFmode && out_n == 4 | |
6370 | && in_mode == SFmode && in_n == 4) | |
6371 | return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI]; | |
6372 | break; | |
6373 | CASE_CFN_RINT: | |
6374 | if (VECTOR_UNIT_VSX_P (V2DFmode) | |
6375 | && !flag_trapping_math | |
6376 | && out_mode == DFmode && out_n == 2 | |
6377 | && in_mode == DFmode && in_n == 2) | |
6378 | return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC]; | |
6379 | if (VECTOR_UNIT_VSX_P (V4SFmode) | |
6380 | && !flag_trapping_math | |
6381 | && out_mode == SFmode && out_n == 4 | |
6382 | && in_mode == SFmode && in_n == 4) | |
6383 | return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC]; | |
6384 | break; | |
6385 | default: | |
6386 | break; | |
6387 | } | |
6388 | ||
6389 | /* Generate calls to libmass if appropriate. */ | |
6390 | if (rs6000_veclib_handler) | |
6391 | return rs6000_veclib_handler (combined_fn (fn), type_out, type_in); | |
6392 | ||
6393 | return NULL_TREE; | |
6394 | } | |
6395 | ||
6396 | /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */ | |
6397 | ||
6398 | static tree | |
6399 | rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out, | |
6400 | tree type_in) | |
6401 | { | |
6402 | machine_mode in_mode, out_mode; | |
6403 | int in_n, out_n; | |
6404 | ||
6405 | if (TARGET_DEBUG_BUILTIN) | |
6406 | fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n", | |
6407 | IDENTIFIER_POINTER (DECL_NAME (fndecl)), | |
6408 | GET_MODE_NAME (TYPE_MODE (type_out)), | |
6409 | GET_MODE_NAME (TYPE_MODE (type_in))); | |
6410 | ||
6411 | if (TREE_CODE (type_out) != VECTOR_TYPE | |
6412 | || TREE_CODE (type_in) != VECTOR_TYPE | |
6413 | || !TARGET_VECTORIZE_BUILTINS) | |
6414 | return NULL_TREE; | |
6415 | ||
6416 | out_mode = TYPE_MODE (TREE_TYPE (type_out)); | |
6417 | out_n = TYPE_VECTOR_SUBPARTS (type_out); | |
6418 | in_mode = TYPE_MODE (TREE_TYPE (type_in)); | |
6419 | in_n = TYPE_VECTOR_SUBPARTS (type_in); | |
6420 | ||
6421 | enum rs6000_builtins fn | |
6422 | = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl); | |
6423 | switch (fn) | |
6424 | { | |
6425 | case RS6000_BUILTIN_RSQRTF: | |
6426 | if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode) | |
6427 | && out_mode == SFmode && out_n == 4 | |
6428 | && in_mode == SFmode && in_n == 4) | |
6429 | return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP]; | |
6430 | break; | |
6431 | case RS6000_BUILTIN_RSQRT: | |
6432 | if (VECTOR_UNIT_VSX_P (V2DFmode) | |
6433 | && out_mode == DFmode && out_n == 2 | |
6434 | && in_mode == DFmode && in_n == 2) | |
6435 | return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF]; | |
6436 | break; | |
6437 | case RS6000_BUILTIN_RECIPF: | |
6438 | if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode) | |
6439 | && out_mode == SFmode && out_n == 4 | |
6440 | && in_mode == SFmode && in_n == 4) | |
6441 | return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP]; | |
6442 | break; | |
6443 | case RS6000_BUILTIN_RECIP: | |
6444 | if (VECTOR_UNIT_VSX_P (V2DFmode) | |
6445 | && out_mode == DFmode && out_n == 2 | |
6446 | && in_mode == DFmode && in_n == 2) | |
6447 | return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF]; | |
6448 | break; | |
6449 | default: | |
6450 | break; | |
6451 | } | |
6452 | return NULL_TREE; | |
6453 | } | |
6454 | \f | |
6455 | /* Default CPU string for rs6000*_file_start functions. */ | |
6456 | static const char *rs6000_default_cpu; | |
6457 | ||
6458 | /* Do anything needed at the start of the asm file. */ | |
6459 | ||
6460 | static void | |
6461 | rs6000_file_start (void) | |
6462 | { | |
6463 | char buffer[80]; | |
6464 | const char *start = buffer; | |
6465 | FILE *file = asm_out_file; | |
6466 | ||
6467 | rs6000_default_cpu = TARGET_CPU_DEFAULT; | |
6468 | ||
6469 | default_file_start (); | |
6470 | ||
6471 | if (flag_verbose_asm) | |
6472 | { | |
6473 | sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START); | |
6474 | ||
6475 | if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0') | |
6476 | { | |
6477 | fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu); | |
6478 | start = ""; | |
6479 | } | |
6480 | ||
6481 | if (global_options_set.x_rs6000_cpu_index) | |
6482 | { | |
6483 | fprintf (file, "%s -mcpu=%s", start, | |
6484 | processor_target_table[rs6000_cpu_index].name); | |
6485 | start = ""; | |
6486 | } | |
6487 | ||
6488 | if (global_options_set.x_rs6000_tune_index) | |
6489 | { | |
6490 | fprintf (file, "%s -mtune=%s", start, | |
6491 | processor_target_table[rs6000_tune_index].name); | |
6492 | start = ""; | |
6493 | } | |
6494 | ||
6495 | if (PPC405_ERRATUM77) | |
6496 | { | |
6497 | fprintf (file, "%s PPC405CR_ERRATUM77", start); | |
6498 | start = ""; | |
6499 | } | |
6500 | ||
6501 | #ifdef USING_ELFOS_H | |
6502 | switch (rs6000_sdata) | |
6503 | { | |
6504 | case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break; | |
6505 | case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break; | |
6506 | case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break; | |
6507 | case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break; | |
6508 | } | |
6509 | ||
6510 | if (rs6000_sdata && g_switch_value) | |
6511 | { | |
6512 | fprintf (file, "%s -G %d", start, | |
6513 | g_switch_value); | |
6514 | start = ""; | |
6515 | } | |
6516 | #endif | |
6517 | ||
6518 | if (*start == '\0') | |
6519 | putc ('\n', file); | |
6520 | } | |
6521 | ||
6522 | #ifdef USING_ELFOS_H | |
6523 | if (!(rs6000_default_cpu && rs6000_default_cpu[0]) | |
6524 | && !global_options_set.x_rs6000_cpu_index) | |
6525 | { | |
6526 | fputs ("\t.machine ", asm_out_file); | |
6527 | if ((rs6000_isa_flags & OPTION_MASK_MODULO) != 0) | |
6528 | fputs ("power9\n", asm_out_file); | |
6529 | else if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0) | |
6530 | fputs ("power8\n", asm_out_file); | |
6531 | else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0) | |
6532 | fputs ("power7\n", asm_out_file); | |
6533 | else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0) | |
6534 | fputs ("power6\n", asm_out_file); | |
6535 | else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0) | |
6536 | fputs ("power5\n", asm_out_file); | |
6537 | else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0) | |
6538 | fputs ("power4\n", asm_out_file); | |
6539 | else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0) | |
6540 | fputs ("ppc64\n", asm_out_file); | |
6541 | else | |
6542 | fputs ("ppc\n", asm_out_file); | |
6543 | } | |
6544 | #endif | |
6545 | ||
6546 | if (DEFAULT_ABI == ABI_ELFv2) | |
6547 | fprintf (file, "\t.abiversion 2\n"); | |
6548 | } | |
6549 | ||
6550 | \f | |
6551 | /* Return nonzero if this function is known to have a null epilogue. */ | |
6552 | ||
6553 | int | |
6554 | direct_return (void) | |
6555 | { | |
6556 | if (reload_completed) | |
6557 | { | |
6558 | rs6000_stack_t *info = rs6000_stack_info (); | |
6559 | ||
6560 | if (info->first_gp_reg_save == 32 | |
6561 | && info->first_fp_reg_save == 64 | |
6562 | && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1 | |
6563 | && ! info->lr_save_p | |
6564 | && ! info->cr_save_p | |
6565 | && info->vrsave_size == 0 | |
6566 | && ! info->push_p) | |
6567 | return 1; | |
6568 | } | |
6569 | ||
6570 | return 0; | |
6571 | } | |
6572 | ||
6573 | /* Return the number of instructions it takes to form a constant in an | |
6574 | integer register. */ | |
6575 | ||
6576 | int | |
6577 | num_insns_constant_wide (HOST_WIDE_INT value) | |
6578 | { | |
6579 | /* signed constant loadable with addi */ | |
6580 | if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000) | |
6581 | return 1; | |
6582 | ||
6583 | /* constant loadable with addis */ | |
6584 | else if ((value & 0xffff) == 0 | |
6585 | && (value >> 31 == -1 || value >> 31 == 0)) | |
6586 | return 1; | |
6587 | ||
6588 | else if (TARGET_POWERPC64) | |
6589 | { | |
6590 | HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000; | |
6591 | HOST_WIDE_INT high = value >> 31; | |
6592 | ||
6593 | if (high == 0 || high == -1) | |
6594 | return 2; | |
6595 | ||
6596 | high >>= 1; | |
6597 | ||
6598 | if (low == 0) | |
6599 | return num_insns_constant_wide (high) + 1; | |
6600 | else if (high == 0) | |
6601 | return num_insns_constant_wide (low) + 1; | |
6602 | else | |
6603 | return (num_insns_constant_wide (high) | |
6604 | + num_insns_constant_wide (low) + 1); | |
6605 | } | |
6606 | ||
6607 | else | |
6608 | return 2; | |
6609 | } | |
6610 | ||
6611 | int | |
6612 | num_insns_constant (rtx op, machine_mode mode) | |
6613 | { | |
6614 | HOST_WIDE_INT low, high; | |
6615 | ||
6616 | switch (GET_CODE (op)) | |
6617 | { | |
6618 | case CONST_INT: | |
6619 | if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1 | |
6620 | && rs6000_is_valid_and_mask (op, mode)) | |
6621 | return 2; | |
6622 | else | |
6623 | return num_insns_constant_wide (INTVAL (op)); | |
6624 | ||
6625 | case CONST_WIDE_INT: | |
6626 | { | |
6627 | int i; | |
6628 | int ins = CONST_WIDE_INT_NUNITS (op) - 1; | |
6629 | for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++) | |
6630 | ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i)); | |
6631 | return ins; | |
6632 | } | |
6633 | ||
6634 | case CONST_DOUBLE: | |
6635 | if (mode == SFmode || mode == SDmode) | |
6636 | { | |
6637 | long l; | |
6638 | ||
6639 | if (DECIMAL_FLOAT_MODE_P (mode)) | |
6640 | REAL_VALUE_TO_TARGET_DECIMAL32 | |
6641 | (*CONST_DOUBLE_REAL_VALUE (op), l); | |
6642 | else | |
6643 | REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), l); | |
6644 | return num_insns_constant_wide ((HOST_WIDE_INT) l); | |
6645 | } | |
6646 | ||
6647 | long l[2]; | |
6648 | if (DECIMAL_FLOAT_MODE_P (mode)) | |
6649 | REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (op), l); | |
6650 | else | |
6651 | REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op), l); | |
6652 | high = l[WORDS_BIG_ENDIAN == 0]; | |
6653 | low = l[WORDS_BIG_ENDIAN != 0]; | |
6654 | ||
6655 | if (TARGET_32BIT) | |
6656 | return (num_insns_constant_wide (low) | |
6657 | + num_insns_constant_wide (high)); | |
6658 | else | |
6659 | { | |
6660 | if ((high == 0 && low >= 0) | |
6661 | || (high == -1 && low < 0)) | |
6662 | return num_insns_constant_wide (low); | |
6663 | ||
6664 | else if (rs6000_is_valid_and_mask (op, mode)) | |
6665 | return 2; | |
6666 | ||
6667 | else if (low == 0) | |
6668 | return num_insns_constant_wide (high) + 1; | |
6669 | ||
6670 | else | |
6671 | return (num_insns_constant_wide (high) | |
6672 | + num_insns_constant_wide (low) + 1); | |
6673 | } | |
6674 | ||
6675 | default: | |
6676 | gcc_unreachable (); | |
6677 | } | |
6678 | } | |
6679 | ||
6680 | /* Interpret element ELT of the CONST_VECTOR OP as an integer value. | |
6681 | If the mode of OP is MODE_VECTOR_INT, this simply returns the | |
6682 | corresponding element of the vector, but for V4SFmode and V2SFmode, | |
6683 | the corresponding "float" is interpreted as an SImode integer. */ | |
6684 | ||
6685 | HOST_WIDE_INT | |
6686 | const_vector_elt_as_int (rtx op, unsigned int elt) | |
6687 | { | |
6688 | rtx tmp; | |
6689 | ||
6690 | /* We can't handle V2DImode and V2DFmode vector constants here yet. */ | |
6691 | gcc_assert (GET_MODE (op) != V2DImode | |
6692 | && GET_MODE (op) != V2DFmode); | |
6693 | ||
6694 | tmp = CONST_VECTOR_ELT (op, elt); | |
6695 | if (GET_MODE (op) == V4SFmode | |
6696 | || GET_MODE (op) == V2SFmode) | |
6697 | tmp = gen_lowpart (SImode, tmp); | |
6698 | return INTVAL (tmp); | |
6699 | } | |
6700 | ||
6701 | /* Return true if OP can be synthesized with a particular vspltisb, vspltish | |
6702 | or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used | |
6703 | depends on STEP and COPIES, one of which will be 1. If COPIES > 1, | |
6704 | all items are set to the same value and contain COPIES replicas of the | |
6705 | vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's | |
6706 | operand and the others are set to the value of the operand's msb. */ | |
6707 | ||
6708 | static bool | |
6709 | vspltis_constant (rtx op, unsigned step, unsigned copies) | |
6710 | { | |
6711 | machine_mode mode = GET_MODE (op); | |
6712 | machine_mode inner = GET_MODE_INNER (mode); | |
6713 | ||
6714 | unsigned i; | |
6715 | unsigned nunits; | |
6716 | unsigned bitsize; | |
6717 | unsigned mask; | |
6718 | ||
6719 | HOST_WIDE_INT val; | |
6720 | HOST_WIDE_INT splat_val; | |
6721 | HOST_WIDE_INT msb_val; | |
6722 | ||
6723 | if (mode == V2DImode || mode == V2DFmode || mode == V1TImode) | |
6724 | return false; | |
6725 | ||
6726 | nunits = GET_MODE_NUNITS (mode); | |
6727 | bitsize = GET_MODE_BITSIZE (inner); | |
6728 | mask = GET_MODE_MASK (inner); | |
6729 | ||
6730 | val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0); | |
6731 | splat_val = val; | |
6732 | msb_val = val >= 0 ? 0 : -1; | |
6733 | ||
6734 | /* Construct the value to be splatted, if possible. If not, return 0. */ | |
6735 | for (i = 2; i <= copies; i *= 2) | |
6736 | { | |
6737 | HOST_WIDE_INT small_val; | |
6738 | bitsize /= 2; | |
6739 | small_val = splat_val >> bitsize; | |
6740 | mask >>= bitsize; | |
6741 | if (splat_val != ((HOST_WIDE_INT) | |
6742 | ((unsigned HOST_WIDE_INT) small_val << bitsize) | |
6743 | | (small_val & mask))) | |
6744 | return false; | |
6745 | splat_val = small_val; | |
6746 | } | |
6747 | ||
6748 | /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */ | |
6749 | if (EASY_VECTOR_15 (splat_val)) | |
6750 | ; | |
6751 | ||
6752 | /* Also check if we can splat, and then add the result to itself. Do so if | |
6753 | the value is positive, of if the splat instruction is using OP's mode; | |
6754 | for splat_val < 0, the splat and the add should use the same mode. */ | |
6755 | else if (EASY_VECTOR_15_ADD_SELF (splat_val) | |
6756 | && (splat_val >= 0 || (step == 1 && copies == 1))) | |
6757 | ; | |
6758 | ||
6759 | /* Also check if are loading up the most significant bit which can be done by | |
6760 | loading up -1 and shifting the value left by -1. */ | |
6761 | else if (EASY_VECTOR_MSB (splat_val, inner)) | |
6762 | ; | |
6763 | ||
6764 | else | |
6765 | return false; | |
6766 | ||
6767 | /* Check if VAL is present in every STEP-th element, and the | |
6768 | other elements are filled with its most significant bit. */ | |
6769 | for (i = 1; i < nunits; ++i) | |
6770 | { | |
6771 | HOST_WIDE_INT desired_val; | |
6772 | unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i; | |
6773 | if ((i & (step - 1)) == 0) | |
6774 | desired_val = val; | |
6775 | else | |
6776 | desired_val = msb_val; | |
6777 | ||
6778 | if (desired_val != const_vector_elt_as_int (op, elt)) | |
6779 | return false; | |
6780 | } | |
6781 | ||
6782 | return true; | |
6783 | } | |
6784 | ||
6785 | /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI | |
6786 | instruction, filling in the bottom elements with 0 or -1. | |
6787 | ||
6788 | Return 0 if the constant cannot be generated with VSLDOI. Return positive | |
6789 | for the number of zeroes to shift in, or negative for the number of 0xff | |
6790 | bytes to shift in. | |
6791 | ||
6792 | OP is a CONST_VECTOR. */ | |
6793 | ||
6794 | int | |
6795 | vspltis_shifted (rtx op) | |
6796 | { | |
6797 | machine_mode mode = GET_MODE (op); | |
6798 | machine_mode inner = GET_MODE_INNER (mode); | |
6799 | ||
6800 | unsigned i, j; | |
6801 | unsigned nunits; | |
6802 | unsigned mask; | |
6803 | ||
6804 | HOST_WIDE_INT val; | |
6805 | ||
6806 | if (mode != V16QImode && mode != V8HImode && mode != V4SImode) | |
6807 | return false; | |
6808 | ||
6809 | /* We need to create pseudo registers to do the shift, so don't recognize | |
6810 | shift vector constants after reload. */ | |
6811 | if (!can_create_pseudo_p ()) | |
6812 | return false; | |
6813 | ||
6814 | nunits = GET_MODE_NUNITS (mode); | |
6815 | mask = GET_MODE_MASK (inner); | |
6816 | ||
6817 | val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1); | |
6818 | ||
6819 | /* Check if the value can really be the operand of a vspltis[bhw]. */ | |
6820 | if (EASY_VECTOR_15 (val)) | |
6821 | ; | |
6822 | ||
6823 | /* Also check if we are loading up the most significant bit which can be done | |
6824 | by loading up -1 and shifting the value left by -1. */ | |
6825 | else if (EASY_VECTOR_MSB (val, inner)) | |
6826 | ; | |
6827 | ||
6828 | else | |
6829 | return 0; | |
6830 | ||
6831 | /* Check if VAL is present in every STEP-th element until we find elements | |
6832 | that are 0 or all 1 bits. */ | |
6833 | for (i = 1; i < nunits; ++i) | |
6834 | { | |
6835 | unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i; | |
6836 | HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt); | |
6837 | ||
6838 | /* If the value isn't the splat value, check for the remaining elements | |
6839 | being 0/-1. */ | |
6840 | if (val != elt_val) | |
6841 | { | |
6842 | if (elt_val == 0) | |
6843 | { | |
6844 | for (j = i+1; j < nunits; ++j) | |
6845 | { | |
6846 | unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j; | |
6847 | if (const_vector_elt_as_int (op, elt2) != 0) | |
6848 | return 0; | |
6849 | } | |
6850 | ||
6851 | return (nunits - i) * GET_MODE_SIZE (inner); | |
6852 | } | |
6853 | ||
6854 | else if ((elt_val & mask) == mask) | |
6855 | { | |
6856 | for (j = i+1; j < nunits; ++j) | |
6857 | { | |
6858 | unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j; | |
6859 | if ((const_vector_elt_as_int (op, elt2) & mask) != mask) | |
6860 | return 0; | |
6861 | } | |
6862 | ||
6863 | return -((nunits - i) * GET_MODE_SIZE (inner)); | |
6864 | } | |
6865 | ||
6866 | else | |
6867 | return 0; | |
6868 | } | |
6869 | } | |
6870 | ||
6871 | /* If all elements are equal, we don't need to do VLSDOI. */ | |
6872 | return 0; | |
6873 | } | |
6874 | ||
6875 | ||
6876 | /* Return true if OP is of the given MODE and can be synthesized | |
6877 | with a vspltisb, vspltish or vspltisw. */ | |
6878 | ||
6879 | bool | |
6880 | easy_altivec_constant (rtx op, machine_mode mode) | |
6881 | { | |
6882 | unsigned step, copies; | |
6883 | ||
6884 | if (mode == VOIDmode) | |
6885 | mode = GET_MODE (op); | |
6886 | else if (mode != GET_MODE (op)) | |
6887 | return false; | |
6888 | ||
6889 | /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy | |
6890 | constants. */ | |
6891 | if (mode == V2DFmode) | |
6892 | return zero_constant (op, mode); | |
6893 | ||
6894 | else if (mode == V2DImode) | |
6895 | { | |
6896 | if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT | |
6897 | || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT) | |
6898 | return false; | |
6899 | ||
6900 | if (zero_constant (op, mode)) | |
6901 | return true; | |
6902 | ||
6903 | if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1 | |
6904 | && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1) | |
6905 | return true; | |
6906 | ||
6907 | return false; | |
6908 | } | |
6909 | ||
6910 | /* V1TImode is a special container for TImode. Ignore for now. */ | |
6911 | else if (mode == V1TImode) | |
6912 | return false; | |
6913 | ||
6914 | /* Start with a vspltisw. */ | |
6915 | step = GET_MODE_NUNITS (mode) / 4; | |
6916 | copies = 1; | |
6917 | ||
6918 | if (vspltis_constant (op, step, copies)) | |
6919 | return true; | |
6920 | ||
6921 | /* Then try with a vspltish. */ | |
6922 | if (step == 1) | |
6923 | copies <<= 1; | |
6924 | else | |
6925 | step >>= 1; | |
6926 | ||
6927 | if (vspltis_constant (op, step, copies)) | |
6928 | return true; | |
6929 | ||
6930 | /* And finally a vspltisb. */ | |
6931 | if (step == 1) | |
6932 | copies <<= 1; | |
6933 | else | |
6934 | step >>= 1; | |
6935 | ||
6936 | if (vspltis_constant (op, step, copies)) | |
6937 | return true; | |
6938 | ||
6939 | if (vspltis_shifted (op) != 0) | |
6940 | return true; | |
6941 | ||
6942 | return false; | |
6943 | } | |
6944 | ||
6945 | /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose | |
6946 | result is OP. Abort if it is not possible. */ | |
6947 | ||
6948 | rtx | |
6949 | gen_easy_altivec_constant (rtx op) | |
6950 | { | |
6951 | machine_mode mode = GET_MODE (op); | |
6952 | int nunits = GET_MODE_NUNITS (mode); | |
6953 | rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0); | |
6954 | unsigned step = nunits / 4; | |
6955 | unsigned copies = 1; | |
6956 | ||
6957 | /* Start with a vspltisw. */ | |
6958 | if (vspltis_constant (op, step, copies)) | |
6959 | return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val)); | |
6960 | ||
6961 | /* Then try with a vspltish. */ | |
6962 | if (step == 1) | |
6963 | copies <<= 1; | |
6964 | else | |
6965 | step >>= 1; | |
6966 | ||
6967 | if (vspltis_constant (op, step, copies)) | |
6968 | return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val)); | |
6969 | ||
6970 | /* And finally a vspltisb. */ | |
6971 | if (step == 1) | |
6972 | copies <<= 1; | |
6973 | else | |
6974 | step >>= 1; | |
6975 | ||
6976 | if (vspltis_constant (op, step, copies)) | |
6977 | return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val)); | |
6978 | ||
6979 | gcc_unreachable (); | |
6980 | } | |
6981 | ||
6982 | /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0 | |
6983 | instructions (xxspltib, vupkhsb/vextsb2w/vextb2d). | |
6984 | ||
6985 | Return the number of instructions needed (1 or 2) into the address pointed | |
6986 | via NUM_INSNS_PTR. | |
6987 | ||
6988 | Return the constant that is being split via CONSTANT_PTR. */ | |
6989 | ||
6990 | bool | |
6991 | xxspltib_constant_p (rtx op, | |
6992 | machine_mode mode, | |
6993 | int *num_insns_ptr, | |
6994 | int *constant_ptr) | |
6995 | { | |
6996 | size_t nunits = GET_MODE_NUNITS (mode); | |
6997 | size_t i; | |
6998 | HOST_WIDE_INT value; | |
6999 | rtx element; | |
7000 | ||
7001 | /* Set the returned values to out of bound values. */ | |
7002 | *num_insns_ptr = -1; | |
7003 | *constant_ptr = 256; | |
7004 | ||
7005 | if (!TARGET_P9_VECTOR) | |
7006 | return false; | |
7007 | ||
7008 | if (mode == VOIDmode) | |
7009 | mode = GET_MODE (op); | |
7010 | ||
7011 | else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode) | |
7012 | return false; | |
7013 | ||
7014 | /* Handle (vec_duplicate <constant>). */ | |
7015 | if (GET_CODE (op) == VEC_DUPLICATE) | |
7016 | { | |
7017 | if (mode != V16QImode && mode != V8HImode && mode != V4SImode | |
7018 | && mode != V2DImode) | |
7019 | return false; | |
7020 | ||
7021 | element = XEXP (op, 0); | |
7022 | if (!CONST_INT_P (element)) | |
7023 | return false; | |
7024 | ||
7025 | value = INTVAL (element); | |
7026 | if (!IN_RANGE (value, -128, 127)) | |
7027 | return false; | |
7028 | } | |
7029 | ||
7030 | /* Handle (const_vector [...]). */ | |
7031 | else if (GET_CODE (op) == CONST_VECTOR) | |
7032 | { | |
7033 | if (mode != V16QImode && mode != V8HImode && mode != V4SImode | |
7034 | && mode != V2DImode) | |
7035 | return false; | |
7036 | ||
7037 | element = CONST_VECTOR_ELT (op, 0); | |
7038 | if (!CONST_INT_P (element)) | |
7039 | return false; | |
7040 | ||
7041 | value = INTVAL (element); | |
7042 | if (!IN_RANGE (value, -128, 127)) | |
7043 | return false; | |
7044 | ||
7045 | for (i = 1; i < nunits; i++) | |
7046 | { | |
7047 | element = CONST_VECTOR_ELT (op, i); | |
7048 | if (!CONST_INT_P (element)) | |
7049 | return false; | |
7050 | ||
7051 | if (value != INTVAL (element)) | |
7052 | return false; | |
7053 | } | |
7054 | } | |
7055 | ||
7056 | /* Handle integer constants being loaded into the upper part of the VSX | |
7057 | register as a scalar. If the value isn't 0/-1, only allow it if the mode | |
7058 | can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */ | |
7059 | else if (CONST_INT_P (op)) | |
7060 | { | |
7061 | if (!SCALAR_INT_MODE_P (mode)) | |
7062 | return false; | |
7063 | ||
7064 | value = INTVAL (op); | |
7065 | if (!IN_RANGE (value, -128, 127)) | |
7066 | return false; | |
7067 | ||
7068 | if (!IN_RANGE (value, -1, 0)) | |
7069 | { | |
7070 | if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID)) | |
7071 | return false; | |
7072 | ||
7073 | if (EASY_VECTOR_15 (value)) | |
7074 | return false; | |
7075 | } | |
7076 | } | |
7077 | ||
7078 | else | |
7079 | return false; | |
7080 | ||
7081 | /* See if we could generate vspltisw/vspltish directly instead of xxspltib + | |
7082 | sign extend. Special case 0/-1 to allow getting any VSX register instead | |
7083 | of an Altivec register. */ | |
7084 | if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0) | |
7085 | && EASY_VECTOR_15 (value)) | |
7086 | return false; | |
7087 | ||
7088 | /* Return # of instructions and the constant byte for XXSPLTIB. */ | |
7089 | if (mode == V16QImode) | |
7090 | *num_insns_ptr = 1; | |
7091 | ||
7092 | else if (IN_RANGE (value, -1, 0)) | |
7093 | *num_insns_ptr = 1; | |
7094 | ||
7095 | else | |
7096 | *num_insns_ptr = 2; | |
7097 | ||
7098 | *constant_ptr = (int) value; | |
7099 | return true; | |
7100 | } | |
7101 | ||
7102 | const char * | |
7103 | output_vec_const_move (rtx *operands) | |
7104 | { | |
7105 | int cst, cst2, shift; | |
7106 | machine_mode mode; | |
7107 | rtx dest, vec; | |
7108 | ||
7109 | dest = operands[0]; | |
7110 | vec = operands[1]; | |
7111 | mode = GET_MODE (dest); | |
7112 | ||
7113 | if (TARGET_VSX) | |
7114 | { | |
7115 | bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest)); | |
7116 | int xxspltib_value = 256; | |
7117 | int num_insns = -1; | |
7118 | ||
7119 | if (zero_constant (vec, mode)) | |
7120 | { | |
7121 | if (TARGET_P9_VECTOR) | |
7122 | return "xxspltib %x0,0"; | |
7123 | ||
7124 | else if (dest_vmx_p) | |
7125 | return "vspltisw %0,0"; | |
7126 | ||
7127 | else | |
7128 | return "xxlxor %x0,%x0,%x0"; | |
7129 | } | |
7130 | ||
7131 | if (all_ones_constant (vec, mode)) | |
7132 | { | |
7133 | if (TARGET_P9_VECTOR) | |
7134 | return "xxspltib %x0,255"; | |
7135 | ||
7136 | else if (dest_vmx_p) | |
7137 | return "vspltisw %0,-1"; | |
7138 | ||
7139 | else if (TARGET_P8_VECTOR) | |
7140 | return "xxlorc %x0,%x0,%x0"; | |
7141 | ||
7142 | else | |
7143 | gcc_unreachable (); | |
7144 | } | |
7145 | ||
7146 | if (TARGET_P9_VECTOR | |
7147 | && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value)) | |
7148 | { | |
7149 | if (num_insns == 1) | |
7150 | { | |
7151 | operands[2] = GEN_INT (xxspltib_value & 0xff); | |
7152 | return "xxspltib %x0,%2"; | |
7153 | } | |
7154 | ||
7155 | return "#"; | |
7156 | } | |
7157 | } | |
7158 | ||
7159 | if (TARGET_ALTIVEC) | |
7160 | { | |
7161 | rtx splat_vec; | |
7162 | ||
7163 | gcc_assert (ALTIVEC_REGNO_P (REGNO (dest))); | |
7164 | if (zero_constant (vec, mode)) | |
7165 | return "vspltisw %0,0"; | |
7166 | ||
7167 | if (all_ones_constant (vec, mode)) | |
7168 | return "vspltisw %0,-1"; | |
7169 | ||
7170 | /* Do we need to construct a value using VSLDOI? */ | |
7171 | shift = vspltis_shifted (vec); | |
7172 | if (shift != 0) | |
7173 | return "#"; | |
7174 | ||
7175 | splat_vec = gen_easy_altivec_constant (vec); | |
7176 | gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE); | |
7177 | operands[1] = XEXP (splat_vec, 0); | |
7178 | if (!EASY_VECTOR_15 (INTVAL (operands[1]))) | |
7179 | return "#"; | |
7180 | ||
7181 | switch (GET_MODE (splat_vec)) | |
7182 | { | |
916ace94 | 7183 | case E_V4SImode: |
01e91138 | 7184 | return "vspltisw %0,%1"; |
7185 | ||
916ace94 | 7186 | case E_V8HImode: |
01e91138 | 7187 | return "vspltish %0,%1"; |
7188 | ||
916ace94 | 7189 | case E_V16QImode: |
01e91138 | 7190 | return "vspltisb %0,%1"; |
7191 | ||
7192 | default: | |
7193 | gcc_unreachable (); | |
7194 | } | |
7195 | } | |
7196 | ||
7197 | gcc_assert (TARGET_SPE); | |
7198 | ||
7199 | /* Vector constant 0 is handled as a splitter of V2SI, and in the | |
7200 | pattern of V1DI, V4HI, and V2SF. | |
7201 | ||
7202 | FIXME: We should probably return # and add post reload | |
7203 | splitters for these, but this way is so easy ;-). */ | |
7204 | cst = INTVAL (CONST_VECTOR_ELT (vec, 0)); | |
7205 | cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1)); | |
7206 | operands[1] = CONST_VECTOR_ELT (vec, 0); | |
7207 | operands[2] = CONST_VECTOR_ELT (vec, 1); | |
7208 | if (cst == cst2) | |
7209 | return "li %0,%1\n\tevmergelo %0,%0,%0"; | |
7210 | else if (WORDS_BIG_ENDIAN) | |
7211 | return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2"; | |
7212 | else | |
7213 | return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1"; | |
7214 | } | |
7215 | ||
7216 | /* Initialize TARGET of vector PAIRED to VALS. */ | |
7217 | ||
7218 | void | |
7219 | paired_expand_vector_init (rtx target, rtx vals) | |
7220 | { | |
7221 | machine_mode mode = GET_MODE (target); | |
7222 | int n_elts = GET_MODE_NUNITS (mode); | |
7223 | int n_var = 0; | |
7224 | rtx x, new_rtx, tmp, constant_op, op1, op2; | |
7225 | int i; | |
7226 | ||
7227 | for (i = 0; i < n_elts; ++i) | |
7228 | { | |
7229 | x = XVECEXP (vals, 0, i); | |
7230 | if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x))) | |
7231 | ++n_var; | |
7232 | } | |
7233 | if (n_var == 0) | |
7234 | { | |
7235 | /* Load from constant pool. */ | |
7236 | emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))); | |
7237 | return; | |
7238 | } | |
7239 | ||
7240 | if (n_var == 2) | |
7241 | { | |
7242 | /* The vector is initialized only with non-constants. */ | |
7243 | new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0), | |
7244 | XVECEXP (vals, 0, 1)); | |
7245 | ||
7246 | emit_move_insn (target, new_rtx); | |
7247 | return; | |
7248 | } | |
7249 | ||
7250 | /* One field is non-constant and the other one is a constant. Load the | |
7251 | constant from the constant pool and use ps_merge instruction to | |
7252 | construct the whole vector. */ | |
7253 | op1 = XVECEXP (vals, 0, 0); | |
7254 | op2 = XVECEXP (vals, 0, 1); | |
7255 | ||
7256 | constant_op = (CONSTANT_P (op1)) ? op1 : op2; | |
7257 | ||
7258 | tmp = gen_reg_rtx (GET_MODE (constant_op)); | |
7259 | emit_move_insn (tmp, constant_op); | |
7260 | ||
7261 | if (CONSTANT_P (op1)) | |
7262 | new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2); | |
7263 | else | |
7264 | new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp); | |
7265 | ||
7266 | emit_move_insn (target, new_rtx); | |
7267 | } | |
7268 | ||
7269 | void | |
7270 | paired_expand_vector_move (rtx operands[]) | |
7271 | { | |
7272 | rtx op0 = operands[0], op1 = operands[1]; | |
7273 | ||
7274 | emit_move_insn (op0, op1); | |
7275 | } | |
7276 | ||
7277 | /* Emit vector compare for code RCODE. DEST is destination, OP1 and | |
7278 | OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two | |
7279 | operands for the relation operation COND. This is a recursive | |
7280 | function. */ | |
7281 | ||
7282 | static void | |
7283 | paired_emit_vector_compare (enum rtx_code rcode, | |
7284 | rtx dest, rtx op0, rtx op1, | |
7285 | rtx cc_op0, rtx cc_op1) | |
7286 | { | |
7287 | rtx tmp = gen_reg_rtx (V2SFmode); | |
7288 | rtx tmp1, max, min; | |
7289 | ||
7290 | gcc_assert (TARGET_PAIRED_FLOAT); | |
7291 | gcc_assert (GET_MODE (op0) == GET_MODE (op1)); | |
7292 | ||
7293 | switch (rcode) | |
7294 | { | |
7295 | case LT: | |
7296 | case LTU: | |
7297 | paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1); | |
7298 | return; | |
7299 | case GE: | |
7300 | case GEU: | |
7301 | emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1)); | |
7302 | emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode))); | |
7303 | return; | |
7304 | case LE: | |
7305 | case LEU: | |
7306 | paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0); | |
7307 | return; | |
7308 | case GT: | |
7309 | paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1); | |
7310 | return; | |
7311 | case EQ: | |
7312 | tmp1 = gen_reg_rtx (V2SFmode); | |
7313 | max = gen_reg_rtx (V2SFmode); | |
7314 | min = gen_reg_rtx (V2SFmode); | |
7315 | gen_reg_rtx (V2SFmode); | |
7316 | ||
7317 | emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1)); | |
7318 | emit_insn (gen_selv2sf4 | |
7319 | (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode))); | |
7320 | emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0)); | |
7321 | emit_insn (gen_selv2sf4 | |
7322 | (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode))); | |
7323 | emit_insn (gen_subv2sf3 (tmp1, min, max)); | |
7324 | emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode))); | |
7325 | return; | |
7326 | case NE: | |
7327 | paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1); | |
7328 | return; | |
7329 | case UNLE: | |
7330 | paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1); | |
7331 | return; | |
7332 | case UNLT: | |
7333 | paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1); | |
7334 | return; | |
7335 | case UNGE: | |
7336 | paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1); | |
7337 | return; | |
7338 | case UNGT: | |
7339 | paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1); | |
7340 | return; | |
7341 | default: | |
7342 | gcc_unreachable (); | |
7343 | } | |
7344 | ||
7345 | return; | |
7346 | } | |
7347 | ||
7348 | /* Emit vector conditional expression. | |
7349 | DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands. | |
7350 | CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */ | |
7351 | ||
7352 | int | |
7353 | paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2, | |
7354 | rtx cond, rtx cc_op0, rtx cc_op1) | |
7355 | { | |
7356 | enum rtx_code rcode = GET_CODE (cond); | |
7357 | ||
7358 | if (!TARGET_PAIRED_FLOAT) | |
7359 | return 0; | |
7360 | ||
7361 | paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1); | |
7362 | ||
7363 | return 1; | |
7364 | } | |
7365 | ||
7366 | /* Initialize vector TARGET to VALS. */ | |
7367 | ||
7368 | void | |
7369 | rs6000_expand_vector_init (rtx target, rtx vals) | |
7370 | { | |
7371 | machine_mode mode = GET_MODE (target); | |
7372 | machine_mode inner_mode = GET_MODE_INNER (mode); | |
7373 | int n_elts = GET_MODE_NUNITS (mode); | |
7374 | int n_var = 0, one_var = -1; | |
7375 | bool all_same = true, all_const_zero = true; | |
7376 | rtx x, mem; | |
7377 | int i; | |
7378 | ||
7379 | for (i = 0; i < n_elts; ++i) | |
7380 | { | |
7381 | x = XVECEXP (vals, 0, i); | |
7382 | if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x))) | |
7383 | ++n_var, one_var = i; | |
7384 | else if (x != CONST0_RTX (inner_mode)) | |
7385 | all_const_zero = false; | |
7386 | ||
7387 | if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) | |
7388 | all_same = false; | |
7389 | } | |
7390 | ||
7391 | if (n_var == 0) | |
7392 | { | |
7393 | rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)); | |
7394 | bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT); | |
7395 | if ((int_vector_p || TARGET_VSX) && all_const_zero) | |
7396 | { | |
7397 | /* Zero register. */ | |
7398 | emit_move_insn (target, CONST0_RTX (mode)); | |
7399 | return; | |
7400 | } | |
7401 | else if (int_vector_p && easy_vector_constant (const_vec, mode)) | |
7402 | { | |
7403 | /* Splat immediate. */ | |
7404 | emit_insn (gen_rtx_SET (target, const_vec)); | |
7405 | return; | |
7406 | } | |
7407 | else | |
7408 | { | |
7409 | /* Load from constant pool. */ | |
7410 | emit_move_insn (target, const_vec); | |
7411 | return; | |
7412 | } | |
7413 | } | |
7414 | ||
7415 | /* Double word values on VSX can use xxpermdi or lxvdsx. */ | |
7416 | if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode)) | |
7417 | { | |
7418 | rtx op[2]; | |
7419 | size_t i; | |
7420 | size_t num_elements = all_same ? 1 : 2; | |
7421 | for (i = 0; i < num_elements; i++) | |
7422 | { | |
7423 | op[i] = XVECEXP (vals, 0, i); | |
7424 | /* Just in case there is a SUBREG with a smaller mode, do a | |
7425 | conversion. */ | |
7426 | if (GET_MODE (op[i]) != inner_mode) | |
7427 | { | |
7428 | rtx tmp = gen_reg_rtx (inner_mode); | |
7429 | convert_move (tmp, op[i], 0); | |
7430 | op[i] = tmp; | |
7431 | } | |
7432 | /* Allow load with splat double word. */ | |
7433 | else if (MEM_P (op[i])) | |
7434 | { | |
7435 | if (!all_same) | |
7436 | op[i] = force_reg (inner_mode, op[i]); | |
7437 | } | |
7438 | else if (!REG_P (op[i])) | |
7439 | op[i] = force_reg (inner_mode, op[i]); | |
7440 | } | |
7441 | ||
7442 | if (all_same) | |
7443 | { | |
7444 | if (mode == V2DFmode) | |
7445 | emit_insn (gen_vsx_splat_v2df (target, op[0])); | |
7446 | else | |
7447 | emit_insn (gen_vsx_splat_v2di (target, op[0])); | |
7448 | } | |
7449 | else | |
7450 | { | |
7451 | if (mode == V2DFmode) | |
7452 | emit_insn (gen_vsx_concat_v2df (target, op[0], op[1])); | |
7453 | else | |
7454 | emit_insn (gen_vsx_concat_v2di (target, op[0], op[1])); | |
7455 | } | |
7456 | return; | |
7457 | } | |
7458 | ||
7459 | /* Special case initializing vector int if we are on 64-bit systems with | |
7460 | direct move or we have the ISA 3.0 instructions. */ | |
7461 | if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode) | |
7462 | && TARGET_DIRECT_MOVE_64BIT) | |
7463 | { | |
7464 | if (all_same) | |
7465 | { | |
7466 | rtx element0 = XVECEXP (vals, 0, 0); | |
7467 | if (MEM_P (element0)) | |
7468 | element0 = rs6000_address_for_fpconvert (element0); | |
7469 | else | |
7470 | element0 = force_reg (SImode, element0); | |
7471 | ||
7472 | if (TARGET_P9_VECTOR) | |
7473 | emit_insn (gen_vsx_splat_v4si (target, element0)); | |
7474 | else | |
7475 | { | |
7476 | rtx tmp = gen_reg_rtx (DImode); | |
7477 | emit_insn (gen_zero_extendsidi2 (tmp, element0)); | |
7478 | emit_insn (gen_vsx_splat_v4si_di (target, tmp)); | |
7479 | } | |
7480 | return; | |
7481 | } | |
7482 | else | |
7483 | { | |
7484 | rtx elements[4]; | |
7485 | size_t i; | |
7486 | ||
7487 | for (i = 0; i < 4; i++) | |
7488 | { | |
7489 | elements[i] = XVECEXP (vals, 0, i); | |
7490 | if (!CONST_INT_P (elements[i]) && !REG_P (elements[i])) | |
7491 | elements[i] = copy_to_mode_reg (SImode, elements[i]); | |
7492 | } | |
7493 | ||
7494 | emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1], | |
7495 | elements[2], elements[3])); | |
7496 | return; | |
7497 | } | |
7498 | } | |
7499 | ||
7500 | /* With single precision floating point on VSX, know that internally single | |
7501 | precision is actually represented as a double, and either make 2 V2DF | |
7502 | vectors, and convert these vectors to single precision, or do one | |
7503 | conversion, and splat the result to the other elements. */ | |
7504 | if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode)) | |
7505 | { | |
7506 | if (all_same) | |
7507 | { | |
7508 | rtx element0 = XVECEXP (vals, 0, 0); | |
7509 | ||
7510 | if (TARGET_P9_VECTOR) | |
7511 | { | |
7512 | if (MEM_P (element0)) | |
7513 | element0 = rs6000_address_for_fpconvert (element0); | |
7514 | ||
7515 | emit_insn (gen_vsx_splat_v4sf (target, element0)); | |
7516 | } | |
7517 | ||
7518 | else | |
7519 | { | |
7520 | rtx freg = gen_reg_rtx (V4SFmode); | |
7521 | rtx sreg = force_reg (SFmode, element0); | |
7522 | rtx cvt = (TARGET_XSCVDPSPN | |
7523 | ? gen_vsx_xscvdpspn_scalar (freg, sreg) | |
7524 | : gen_vsx_xscvdpsp_scalar (freg, sreg)); | |
7525 | ||
7526 | emit_insn (cvt); | |
7527 | emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg, | |
7528 | const0_rtx)); | |
7529 | } | |
7530 | } | |
7531 | else | |
7532 | { | |
7533 | rtx dbl_even = gen_reg_rtx (V2DFmode); | |
7534 | rtx dbl_odd = gen_reg_rtx (V2DFmode); | |
7535 | rtx flt_even = gen_reg_rtx (V4SFmode); | |
7536 | rtx flt_odd = gen_reg_rtx (V4SFmode); | |
7537 | rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0)); | |
7538 | rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1)); | |
7539 | rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2)); | |
7540 | rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3)); | |
7541 | ||
7542 | /* Use VMRGEW if we can instead of doing a permute. */ | |
7543 | if (TARGET_P8_VECTOR) | |
7544 | { | |
7545 | emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2)); | |
7546 | emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3)); | |
7547 | emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even)); | |
7548 | emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd)); | |
7549 | if (BYTES_BIG_ENDIAN) | |
7550 | emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd)); | |
7551 | else | |
7552 | emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even)); | |
7553 | } | |
7554 | else | |
7555 | { | |
7556 | emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1)); | |
7557 | emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3)); | |
7558 | emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even)); | |
7559 | emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd)); | |
7560 | rs6000_expand_extract_even (target, flt_even, flt_odd); | |
7561 | } | |
7562 | } | |
7563 | return; | |
7564 | } | |
7565 | ||
7566 | /* Special case initializing vector short/char that are splats if we are on | |
7567 | 64-bit systems with direct move. */ | |
7568 | if (all_same && TARGET_DIRECT_MOVE_64BIT | |
7569 | && (mode == V16QImode || mode == V8HImode)) | |
7570 | { | |
7571 | rtx op0 = XVECEXP (vals, 0, 0); | |
7572 | rtx di_tmp = gen_reg_rtx (DImode); | |
7573 | ||
7574 | if (!REG_P (op0)) | |
7575 | op0 = force_reg (GET_MODE_INNER (mode), op0); | |
7576 | ||
7577 | if (mode == V16QImode) | |
7578 | { | |
7579 | emit_insn (gen_zero_extendqidi2 (di_tmp, op0)); | |
7580 | emit_insn (gen_vsx_vspltb_di (target, di_tmp)); | |
7581 | return; | |
7582 | } | |
7583 | ||
7584 | if (mode == V8HImode) | |
7585 | { | |
7586 | emit_insn (gen_zero_extendhidi2 (di_tmp, op0)); | |
7587 | emit_insn (gen_vsx_vsplth_di (target, di_tmp)); | |
7588 | return; | |
7589 | } | |
7590 | } | |
7591 | ||
7592 | /* Store value to stack temp. Load vector element. Splat. However, splat | |
7593 | of 64-bit items is not supported on Altivec. */ | |
7594 | if (all_same && GET_MODE_SIZE (inner_mode) <= 4) | |
7595 | { | |
7596 | mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode)); | |
7597 | emit_move_insn (adjust_address_nv (mem, inner_mode, 0), | |
7598 | XVECEXP (vals, 0, 0)); | |
7599 | x = gen_rtx_UNSPEC (VOIDmode, | |
7600 | gen_rtvec (1, const0_rtx), UNSPEC_LVE); | |
7601 | emit_insn (gen_rtx_PARALLEL (VOIDmode, | |
7602 | gen_rtvec (2, | |
7603 | gen_rtx_SET (target, mem), | |
7604 | x))); | |
7605 | x = gen_rtx_VEC_SELECT (inner_mode, target, | |
7606 | gen_rtx_PARALLEL (VOIDmode, | |
7607 | gen_rtvec (1, const0_rtx))); | |
7608 | emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x))); | |
7609 | return; | |
7610 | } | |
7611 | ||
7612 | /* One field is non-constant. Load constant then overwrite | |
7613 | varying field. */ | |
7614 | if (n_var == 1) | |
7615 | { | |
7616 | rtx copy = copy_rtx (vals); | |
7617 | ||
7618 | /* Load constant part of vector, substitute neighboring value for | |
7619 | varying element. */ | |
7620 | XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts); | |
7621 | rs6000_expand_vector_init (target, copy); | |
7622 | ||
7623 | /* Insert variable. */ | |
7624 | rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var); | |
7625 | return; | |
7626 | } | |
7627 | ||
7628 | /* Construct the vector in memory one field at a time | |
7629 | and load the whole vector. */ | |
7630 | mem = assign_stack_temp (mode, GET_MODE_SIZE (mode)); | |
7631 | for (i = 0; i < n_elts; i++) | |
7632 | emit_move_insn (adjust_address_nv (mem, inner_mode, | |
7633 | i * GET_MODE_SIZE (inner_mode)), | |
7634 | XVECEXP (vals, 0, i)); | |
7635 | emit_move_insn (target, mem); | |
7636 | } | |
7637 | ||
7638 | /* Set field ELT of TARGET to VAL. */ | |
7639 | ||
7640 | void | |
7641 | rs6000_expand_vector_set (rtx target, rtx val, int elt) | |
7642 | { | |
7643 | machine_mode mode = GET_MODE (target); | |
7644 | machine_mode inner_mode = GET_MODE_INNER (mode); | |
7645 | rtx reg = gen_reg_rtx (mode); | |
7646 | rtx mask, mem, x; | |
7647 | int width = GET_MODE_SIZE (inner_mode); | |
7648 | int i; | |
7649 | ||
7650 | val = force_reg (GET_MODE (val), val); | |
7651 | ||
7652 | if (VECTOR_MEM_VSX_P (mode)) | |
7653 | { | |
7654 | rtx insn = NULL_RTX; | |
7655 | rtx elt_rtx = GEN_INT (elt); | |
7656 | ||
7657 | if (mode == V2DFmode) | |
7658 | insn = gen_vsx_set_v2df (target, target, val, elt_rtx); | |
7659 | ||
7660 | else if (mode == V2DImode) | |
7661 | insn = gen_vsx_set_v2di (target, target, val, elt_rtx); | |
7662 | ||
7663 | else if (TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER | |
7664 | && TARGET_UPPER_REGS_DI && TARGET_POWERPC64) | |
7665 | { | |
7666 | if (mode == V4SImode) | |
7667 | insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx); | |
7668 | else if (mode == V8HImode) | |
7669 | insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx); | |
7670 | else if (mode == V16QImode) | |
7671 | insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx); | |
7672 | } | |
7673 | ||
7674 | if (insn) | |
7675 | { | |
7676 | emit_insn (insn); | |
7677 | return; | |
7678 | } | |
7679 | } | |
7680 | ||
7681 | /* Simplify setting single element vectors like V1TImode. */ | |
7682 | if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0) | |
7683 | { | |
7684 | emit_move_insn (target, gen_lowpart (mode, val)); | |
7685 | return; | |
7686 | } | |
7687 | ||
7688 | /* Load single variable value. */ | |
7689 | mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode)); | |
7690 | emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val); | |
7691 | x = gen_rtx_UNSPEC (VOIDmode, | |
7692 | gen_rtvec (1, const0_rtx), UNSPEC_LVE); | |
7693 | emit_insn (gen_rtx_PARALLEL (VOIDmode, | |
7694 | gen_rtvec (2, | |
7695 | gen_rtx_SET (reg, mem), | |
7696 | x))); | |
7697 | ||
7698 | /* Linear sequence. */ | |
7699 | mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16)); | |
7700 | for (i = 0; i < 16; ++i) | |
7701 | XVECEXP (mask, 0, i) = GEN_INT (i); | |
7702 | ||
7703 | /* Set permute mask to insert element into target. */ | |
7704 | for (i = 0; i < width; ++i) | |
7705 | XVECEXP (mask, 0, elt*width + i) | |
7706 | = GEN_INT (i + 0x10); | |
7707 | x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0)); | |
7708 | ||
7709 | if (BYTES_BIG_ENDIAN) | |
7710 | x = gen_rtx_UNSPEC (mode, | |
7711 | gen_rtvec (3, target, reg, | |
7712 | force_reg (V16QImode, x)), | |
7713 | UNSPEC_VPERM); | |
7714 | else | |
7715 | { | |
7716 | if (TARGET_P9_VECTOR) | |
7717 | x = gen_rtx_UNSPEC (mode, | |
7718 | gen_rtvec (3, target, reg, | |
7719 | force_reg (V16QImode, x)), | |
7720 | UNSPEC_VPERMR); | |
7721 | else | |
7722 | { | |
7723 | /* Invert selector. We prefer to generate VNAND on P8 so | |
7724 | that future fusion opportunities can kick in, but must | |
7725 | generate VNOR elsewhere. */ | |
7726 | rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x)); | |
7727 | rtx iorx = (TARGET_P8_VECTOR | |
7728 | ? gen_rtx_IOR (V16QImode, notx, notx) | |
7729 | : gen_rtx_AND (V16QImode, notx, notx)); | |
7730 | rtx tmp = gen_reg_rtx (V16QImode); | |
7731 | emit_insn (gen_rtx_SET (tmp, iorx)); | |
7732 | ||
7733 | /* Permute with operands reversed and adjusted selector. */ | |
7734 | x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp), | |
7735 | UNSPEC_VPERM); | |
7736 | } | |
7737 | } | |
7738 | ||
7739 | emit_insn (gen_rtx_SET (target, x)); | |
7740 | } | |
7741 | ||
7742 | /* Extract field ELT from VEC into TARGET. */ | |
7743 | ||
7744 | void | |
7745 | rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt) | |
7746 | { | |
7747 | machine_mode mode = GET_MODE (vec); | |
7748 | machine_mode inner_mode = GET_MODE_INNER (mode); | |
7749 | rtx mem; | |
7750 | ||
7751 | if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt)) | |
7752 | { | |
7753 | switch (mode) | |
7754 | { | |
7755 | default: | |
7756 | break; | |
916ace94 | 7757 | case E_V1TImode: |
01e91138 | 7758 | gcc_assert (INTVAL (elt) == 0 && inner_mode == TImode); |
7759 | emit_move_insn (target, gen_lowpart (TImode, vec)); | |
7760 | break; | |
916ace94 | 7761 | case E_V2DFmode: |
01e91138 | 7762 | emit_insn (gen_vsx_extract_v2df (target, vec, elt)); |
7763 | return; | |
916ace94 | 7764 | case E_V2DImode: |
01e91138 | 7765 | emit_insn (gen_vsx_extract_v2di (target, vec, elt)); |
7766 | return; | |
916ace94 | 7767 | case E_V4SFmode: |
01e91138 | 7768 | emit_insn (gen_vsx_extract_v4sf (target, vec, elt)); |
7769 | return; | |
916ace94 | 7770 | case E_V16QImode: |
01e91138 | 7771 | if (TARGET_DIRECT_MOVE_64BIT) |
7772 | { | |
7773 | emit_insn (gen_vsx_extract_v16qi (target, vec, elt)); | |
7774 | return; | |
7775 | } | |
7776 | else | |
7777 | break; | |
916ace94 | 7778 | case E_V8HImode: |
01e91138 | 7779 | if (TARGET_DIRECT_MOVE_64BIT) |
7780 | { | |
7781 | emit_insn (gen_vsx_extract_v8hi (target, vec, elt)); | |
7782 | return; | |
7783 | } | |
7784 | else | |
7785 | break; | |
916ace94 | 7786 | case E_V4SImode: |
01e91138 | 7787 | if (TARGET_DIRECT_MOVE_64BIT) |
7788 | { | |
7789 | emit_insn (gen_vsx_extract_v4si (target, vec, elt)); | |
7790 | return; | |
7791 | } | |
7792 | break; | |
7793 | } | |
7794 | } | |
7795 | else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt) | |
7796 | && TARGET_DIRECT_MOVE_64BIT) | |
7797 | { | |
7798 | if (GET_MODE (elt) != DImode) | |
7799 | { | |
7800 | rtx tmp = gen_reg_rtx (DImode); | |
7801 | convert_move (tmp, elt, 0); | |
7802 | elt = tmp; | |
7803 | } | |
7804 | else if (!REG_P (elt)) | |
7805 | elt = force_reg (DImode, elt); | |
7806 | ||
7807 | switch (mode) | |
7808 | { | |
916ace94 | 7809 | case E_V2DFmode: |
01e91138 | 7810 | emit_insn (gen_vsx_extract_v2df_var (target, vec, elt)); |
7811 | return; | |
7812 | ||
916ace94 | 7813 | case E_V2DImode: |
01e91138 | 7814 | emit_insn (gen_vsx_extract_v2di_var (target, vec, elt)); |
7815 | return; | |
7816 | ||
916ace94 | 7817 | case E_V4SFmode: |
01e91138 | 7818 | emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt)); |
7819 | return; | |
7820 | ||
916ace94 | 7821 | case E_V4SImode: |
01e91138 | 7822 | emit_insn (gen_vsx_extract_v4si_var (target, vec, elt)); |
7823 | return; | |
7824 | ||
916ace94 | 7825 | case E_V8HImode: |
01e91138 | 7826 | emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt)); |
7827 | return; | |
7828 | ||
916ace94 | 7829 | case E_V16QImode: |
01e91138 | 7830 | emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt)); |
7831 | return; | |
7832 | ||
7833 | default: | |
7834 | gcc_unreachable (); | |
7835 | } | |
7836 | } | |
7837 | ||
7838 | gcc_assert (CONST_INT_P (elt)); | |
7839 | ||
7840 | /* Allocate mode-sized buffer. */ | |
7841 | mem = assign_stack_temp (mode, GET_MODE_SIZE (mode)); | |
7842 | ||
7843 | emit_move_insn (mem, vec); | |
7844 | ||
7845 | /* Add offset to field within buffer matching vector element. */ | |
7846 | mem = adjust_address_nv (mem, inner_mode, | |
7847 | INTVAL (elt) * GET_MODE_SIZE (inner_mode)); | |
7848 | ||
7849 | emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0)); | |
7850 | } | |
7851 | ||
7852 | /* Helper function to return the register number of a RTX. */ | |
7853 | static inline int | |
7854 | regno_or_subregno (rtx op) | |
7855 | { | |
7856 | if (REG_P (op)) | |
7857 | return REGNO (op); | |
7858 | else if (SUBREG_P (op)) | |
7859 | return subreg_regno (op); | |
7860 | else | |
7861 | gcc_unreachable (); | |
7862 | } | |
7863 | ||
7864 | /* Adjust a memory address (MEM) of a vector type to point to a scalar field | |
7865 | within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register | |
7866 | temporary (BASE_TMP) to fixup the address. Return the new memory address | |
7867 | that is valid for reads or writes to a given register (SCALAR_REG). */ | |
7868 | ||
7869 | rtx | |
7870 | rs6000_adjust_vec_address (rtx scalar_reg, | |
7871 | rtx mem, | |
7872 | rtx element, | |
7873 | rtx base_tmp, | |
7874 | machine_mode scalar_mode) | |
7875 | { | |
7876 | unsigned scalar_size = GET_MODE_SIZE (scalar_mode); | |
7877 | rtx addr = XEXP (mem, 0); | |
7878 | rtx element_offset; | |
7879 | rtx new_addr; | |
7880 | bool valid_addr_p; | |
7881 | ||
7882 | /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */ | |
7883 | gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC); | |
7884 | ||
7885 | /* Calculate what we need to add to the address to get the element | |
7886 | address. */ | |
7887 | if (CONST_INT_P (element)) | |
7888 | element_offset = GEN_INT (INTVAL (element) * scalar_size); | |
7889 | else | |
7890 | { | |
7891 | int byte_shift = exact_log2 (scalar_size); | |
7892 | gcc_assert (byte_shift >= 0); | |
7893 | ||
7894 | if (byte_shift == 0) | |
7895 | element_offset = element; | |
7896 | ||
7897 | else | |
7898 | { | |
7899 | if (TARGET_POWERPC64) | |
7900 | emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift))); | |
7901 | else | |
7902 | emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift))); | |
7903 | ||
7904 | element_offset = base_tmp; | |
7905 | } | |
7906 | } | |
7907 | ||
7908 | /* Create the new address pointing to the element within the vector. If we | |
7909 | are adding 0, we don't have to change the address. */ | |
7910 | if (element_offset == const0_rtx) | |
7911 | new_addr = addr; | |
7912 | ||
7913 | /* A simple indirect address can be converted into a reg + offset | |
7914 | address. */ | |
7915 | else if (REG_P (addr) || SUBREG_P (addr)) | |
7916 | new_addr = gen_rtx_PLUS (Pmode, addr, element_offset); | |
7917 | ||
7918 | /* Optimize D-FORM addresses with constant offset with a constant element, to | |
7919 | include the element offset in the address directly. */ | |
7920 | else if (GET_CODE (addr) == PLUS) | |
7921 | { | |
7922 | rtx op0 = XEXP (addr, 0); | |
7923 | rtx op1 = XEXP (addr, 1); | |
7924 | rtx insn; | |
7925 | ||
7926 | gcc_assert (REG_P (op0) || SUBREG_P (op0)); | |
7927 | if (CONST_INT_P (op1) && CONST_INT_P (element_offset)) | |
7928 | { | |
7929 | HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset); | |
7930 | rtx offset_rtx = GEN_INT (offset); | |
7931 | ||
7932 | if (IN_RANGE (offset, -32768, 32767) | |
7933 | && (scalar_size < 8 || (offset & 0x3) == 0)) | |
7934 | new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx); | |
7935 | else | |
7936 | { | |
7937 | emit_move_insn (base_tmp, offset_rtx); | |
7938 | new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp); | |
7939 | } | |
7940 | } | |
7941 | else | |
7942 | { | |
7943 | bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1)); | |
7944 | bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset)); | |
7945 | ||
7946 | /* Note, ADDI requires the register being added to be a base | |
7947 | register. If the register was R0, load it up into the temporary | |
7948 | and do the add. */ | |
7949 | if (op1_reg_p | |
7950 | && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO)) | |
7951 | { | |
7952 | insn = gen_add3_insn (base_tmp, op1, element_offset); | |
7953 | gcc_assert (insn != NULL_RTX); | |
7954 | emit_insn (insn); | |
7955 | } | |
7956 | ||
7957 | else if (ele_reg_p | |
7958 | && reg_or_subregno (element_offset) != FIRST_GPR_REGNO) | |
7959 | { | |
7960 | insn = gen_add3_insn (base_tmp, element_offset, op1); | |
7961 | gcc_assert (insn != NULL_RTX); | |
7962 | emit_insn (insn); | |
7963 | } | |
7964 | ||
7965 | else | |
7966 | { | |
7967 | emit_move_insn (base_tmp, op1); | |
7968 | emit_insn (gen_add2_insn (base_tmp, element_offset)); | |
7969 | } | |
7970 | ||
7971 | new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp); | |
7972 | } | |
7973 | } | |
7974 | ||
7975 | else | |
7976 | { | |
7977 | emit_move_insn (base_tmp, addr); | |
7978 | new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset); | |
7979 | } | |
7980 | ||
7981 | /* If we have a PLUS, we need to see whether the particular register class | |
7982 | allows for D-FORM or X-FORM addressing. */ | |
7983 | if (GET_CODE (new_addr) == PLUS) | |
7984 | { | |
7985 | rtx op1 = XEXP (new_addr, 1); | |
7986 | addr_mask_type addr_mask; | |
7987 | int scalar_regno = regno_or_subregno (scalar_reg); | |
7988 | ||
7989 | gcc_assert (scalar_regno < FIRST_PSEUDO_REGISTER); | |
7990 | if (INT_REGNO_P (scalar_regno)) | |
7991 | addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR]; | |
7992 | ||
7993 | else if (FP_REGNO_P (scalar_regno)) | |
7994 | addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR]; | |
7995 | ||
7996 | else if (ALTIVEC_REGNO_P (scalar_regno)) | |
7997 | addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX]; | |
7998 | ||
7999 | else | |
8000 | gcc_unreachable (); | |
8001 | ||
8002 | if (REG_P (op1) || SUBREG_P (op1)) | |
8003 | valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0; | |
8004 | else | |
8005 | valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0; | |
8006 | } | |
8007 | ||
8008 | else if (REG_P (new_addr) || SUBREG_P (new_addr)) | |
8009 | valid_addr_p = true; | |
8010 | ||
8011 | else | |
8012 | valid_addr_p = false; | |
8013 | ||
8014 | if (!valid_addr_p) | |
8015 | { | |
8016 | emit_move_insn (base_tmp, new_addr); | |
8017 | new_addr = base_tmp; | |
8018 | } | |
8019 | ||
8020 | return change_address (mem, scalar_mode, new_addr); | |
8021 | } | |
8022 | ||
8023 | /* Split a variable vec_extract operation into the component instructions. */ | |
8024 | ||
8025 | void | |
8026 | rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr, | |
8027 | rtx tmp_altivec) | |
8028 | { | |
8029 | machine_mode mode = GET_MODE (src); | |
8030 | machine_mode scalar_mode = GET_MODE (dest); | |
8031 | unsigned scalar_size = GET_MODE_SIZE (scalar_mode); | |
8032 | int byte_shift = exact_log2 (scalar_size); | |
8033 | ||
8034 | gcc_assert (byte_shift >= 0); | |
8035 | ||
8036 | /* If we are given a memory address, optimize to load just the element. We | |
8037 | don't have to adjust the vector element number on little endian | |
8038 | systems. */ | |
8039 | if (MEM_P (src)) | |
8040 | { | |
8041 | gcc_assert (REG_P (tmp_gpr)); | |
8042 | emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element, | |
8043 | tmp_gpr, scalar_mode)); | |
8044 | return; | |
8045 | } | |
8046 | ||
8047 | else if (REG_P (src) || SUBREG_P (src)) | |
8048 | { | |
8049 | int bit_shift = byte_shift + 3; | |
8050 | rtx element2; | |
8051 | int dest_regno = regno_or_subregno (dest); | |
8052 | int src_regno = regno_or_subregno (src); | |
8053 | int element_regno = regno_or_subregno (element); | |
8054 | ||
8055 | gcc_assert (REG_P (tmp_gpr)); | |
8056 | ||
8057 | /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in | |
8058 | a general purpose register. */ | |
8059 | if (TARGET_P9_VECTOR | |
8060 | && (mode == V16QImode || mode == V8HImode || mode == V4SImode) | |
8061 | && INT_REGNO_P (dest_regno) | |
8062 | && ALTIVEC_REGNO_P (src_regno) | |
8063 | && INT_REGNO_P (element_regno)) | |
8064 | { | |
8065 | rtx dest_si = gen_rtx_REG (SImode, dest_regno); | |
8066 | rtx element_si = gen_rtx_REG (SImode, element_regno); | |
8067 | ||
8068 | if (mode == V16QImode) | |
8069 | emit_insn (VECTOR_ELT_ORDER_BIG | |
8070 | ? gen_vextublx (dest_si, element_si, src) | |
8071 | : gen_vextubrx (dest_si, element_si, src)); | |
8072 | ||
8073 | else if (mode == V8HImode) | |
8074 | { | |
8075 | rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr)); | |
8076 | emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx)); | |
8077 | emit_insn (VECTOR_ELT_ORDER_BIG | |
8078 | ? gen_vextuhlx (dest_si, tmp_gpr_si, src) | |
8079 | : gen_vextuhrx (dest_si, tmp_gpr_si, src)); | |
8080 | } | |
8081 | ||
8082 | ||
8083 | else | |
8084 | { | |
8085 | rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr)); | |
8086 | emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx)); | |
8087 | emit_insn (VECTOR_ELT_ORDER_BIG | |
8088 | ? gen_vextuwlx (dest_si, tmp_gpr_si, src) | |
8089 | : gen_vextuwrx (dest_si, tmp_gpr_si, src)); | |
8090 | } | |
8091 | ||
8092 | return; | |
8093 | } | |
8094 | ||
8095 | ||
8096 | gcc_assert (REG_P (tmp_altivec)); | |
8097 | ||
8098 | /* For little endian, adjust element ordering. For V2DI/V2DF, we can use | |
8099 | an XOR, otherwise we need to subtract. The shift amount is so VSLO | |
8100 | will shift the element into the upper position (adding 3 to convert a | |
8101 | byte shift into a bit shift). */ | |
8102 | if (scalar_size == 8) | |
8103 | { | |
8104 | if (!VECTOR_ELT_ORDER_BIG) | |
8105 | { | |
8106 | emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx)); | |
8107 | element2 = tmp_gpr; | |
8108 | } | |
8109 | else | |
8110 | element2 = element; | |
8111 | ||
8112 | /* Generate RLDIC directly to shift left 6 bits and retrieve 1 | |
8113 | bit. */ | |
8114 | emit_insn (gen_rtx_SET (tmp_gpr, | |
8115 | gen_rtx_AND (DImode, | |
8116 | gen_rtx_ASHIFT (DImode, | |
8117 | element2, | |
8118 | GEN_INT (6)), | |
8119 | GEN_INT (64)))); | |
8120 | } | |
8121 | else | |
8122 | { | |
8123 | if (!VECTOR_ELT_ORDER_BIG) | |
8124 | { | |
8125 | rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1); | |
8126 | ||
8127 | emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1)); | |
8128 | emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr)); | |
8129 | element2 = tmp_gpr; | |
8130 | } | |
8131 | else | |
8132 | element2 = element; | |
8133 | ||
8134 | emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift))); | |
8135 | } | |
8136 | ||
8137 | /* Get the value into the lower byte of the Altivec register where VSLO | |
8138 | expects it. */ | |
8139 | if (TARGET_P9_VECTOR) | |
8140 | emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr)); | |
8141 | else if (can_create_pseudo_p ()) | |
8142 | emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr)); | |
8143 | else | |
8144 | { | |
8145 | rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec)); | |
8146 | emit_move_insn (tmp_di, tmp_gpr); | |
8147 | emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di)); | |
8148 | } | |
8149 | ||
8150 | /* Do the VSLO to get the value into the final location. */ | |
8151 | switch (mode) | |
8152 | { | |
916ace94 | 8153 | case E_V2DFmode: |
01e91138 | 8154 | emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec)); |
8155 | return; | |
8156 | ||
916ace94 | 8157 | case E_V2DImode: |
01e91138 | 8158 | emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec)); |
8159 | return; | |
8160 | ||
916ace94 | 8161 | case E_V4SFmode: |
01e91138 | 8162 | { |
8163 | rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec)); | |
8164 | rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec)); | |
8165 | rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src)); | |
8166 | emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di, | |
8167 | tmp_altivec)); | |
8168 | ||
8169 | emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf)); | |
8170 | return; | |
8171 | } | |
8172 | ||
916ace94 | 8173 | case E_V4SImode: |
8174 | case E_V8HImode: | |
8175 | case E_V16QImode: | |
01e91138 | 8176 | { |
8177 | rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec)); | |
8178 | rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src)); | |
8179 | rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest)); | |
8180 | emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di, | |
8181 | tmp_altivec)); | |
8182 | emit_move_insn (tmp_gpr_di, tmp_altivec_di); | |
8183 | emit_insn (gen_ashrdi3 (tmp_gpr_di, tmp_gpr_di, | |
8184 | GEN_INT (64 - (8 * scalar_size)))); | |
8185 | return; | |
8186 | } | |
8187 | ||
8188 | default: | |
8189 | gcc_unreachable (); | |
8190 | } | |
8191 | ||
8192 | return; | |
8193 | } | |
8194 | else | |
8195 | gcc_unreachable (); | |
8196 | } | |
8197 | ||
8198 | /* Helper function for rs6000_split_v4si_init to build up a DImode value from | |
8199 | two SImode values. */ | |
8200 | ||
8201 | static void | |
8202 | rs6000_split_v4si_init_di_reg (rtx dest, rtx si1, rtx si2, rtx tmp) | |
8203 | { | |
8204 | const unsigned HOST_WIDE_INT mask_32bit = HOST_WIDE_INT_C (0xffffffff); | |
8205 | ||
8206 | if (CONST_INT_P (si1) && CONST_INT_P (si2)) | |
8207 | { | |
8208 | unsigned HOST_WIDE_INT const1 = (UINTVAL (si1) & mask_32bit) << 32; | |
8209 | unsigned HOST_WIDE_INT const2 = UINTVAL (si2) & mask_32bit; | |
8210 | ||
8211 | emit_move_insn (dest, GEN_INT (const1 | const2)); | |
8212 | return; | |
8213 | } | |
8214 | ||
8215 | /* Put si1 into upper 32-bits of dest. */ | |
8216 | if (CONST_INT_P (si1)) | |
8217 | emit_move_insn (dest, GEN_INT ((UINTVAL (si1) & mask_32bit) << 32)); | |
8218 | else | |
8219 | { | |
8220 | /* Generate RLDIC. */ | |
8221 | rtx si1_di = gen_rtx_REG (DImode, regno_or_subregno (si1)); | |
8222 | rtx shift_rtx = gen_rtx_ASHIFT (DImode, si1_di, GEN_INT (32)); | |
8223 | rtx mask_rtx = GEN_INT (mask_32bit << 32); | |
8224 | rtx and_rtx = gen_rtx_AND (DImode, shift_rtx, mask_rtx); | |
8225 | gcc_assert (!reg_overlap_mentioned_p (dest, si1)); | |
8226 | emit_insn (gen_rtx_SET (dest, and_rtx)); | |
8227 | } | |
8228 | ||
8229 | /* Put si2 into the temporary. */ | |
8230 | gcc_assert (!reg_overlap_mentioned_p (dest, tmp)); | |
8231 | if (CONST_INT_P (si2)) | |
8232 | emit_move_insn (tmp, GEN_INT (UINTVAL (si2) & mask_32bit)); | |
8233 | else | |
8234 | emit_insn (gen_zero_extendsidi2 (tmp, si2)); | |
8235 | ||
8236 | /* Combine the two parts. */ | |
8237 | emit_insn (gen_iordi3 (dest, dest, tmp)); | |
8238 | return; | |
8239 | } | |
8240 | ||
8241 | /* Split a V4SI initialization. */ | |
8242 | ||
8243 | void | |
8244 | rs6000_split_v4si_init (rtx operands[]) | |
8245 | { | |
8246 | rtx dest = operands[0]; | |
8247 | ||
8248 | /* Destination is a GPR, build up the two DImode parts in place. */ | |
8249 | if (REG_P (dest) || SUBREG_P (dest)) | |
8250 | { | |
8251 | int d_regno = regno_or_subregno (dest); | |
8252 | rtx scalar1 = operands[1]; | |
8253 | rtx scalar2 = operands[2]; | |
8254 | rtx scalar3 = operands[3]; | |
8255 | rtx scalar4 = operands[4]; | |
8256 | rtx tmp1 = operands[5]; | |
8257 | rtx tmp2 = operands[6]; | |
8258 | ||
8259 | /* Even though we only need one temporary (plus the destination, which | |
8260 | has an early clobber constraint, try to use two temporaries, one for | |
8261 | each double word created. That way the 2nd insn scheduling pass can | |
8262 | rearrange things so the two parts are done in parallel. */ | |
8263 | if (BYTES_BIG_ENDIAN) | |
8264 | { | |
8265 | rtx di_lo = gen_rtx_REG (DImode, d_regno); | |
8266 | rtx di_hi = gen_rtx_REG (DImode, d_regno + 1); | |
8267 | rs6000_split_v4si_init_di_reg (di_lo, scalar1, scalar2, tmp1); | |
8268 | rs6000_split_v4si_init_di_reg (di_hi, scalar3, scalar4, tmp2); | |
8269 | } | |
8270 | else | |
8271 | { | |
8272 | rtx di_lo = gen_rtx_REG (DImode, d_regno + 1); | |
8273 | rtx di_hi = gen_rtx_REG (DImode, d_regno); | |
8274 | gcc_assert (!VECTOR_ELT_ORDER_BIG); | |
8275 | rs6000_split_v4si_init_di_reg (di_lo, scalar4, scalar3, tmp1); | |
8276 | rs6000_split_v4si_init_di_reg (di_hi, scalar2, scalar1, tmp2); | |
8277 | } | |
8278 | return; | |
8279 | } | |
8280 | ||
8281 | else | |
8282 | gcc_unreachable (); | |
8283 | } | |
8284 | ||
8285 | /* Return TRUE if OP is an invalid SUBREG operation on the e500. */ | |
8286 | ||
8287 | bool | |
8288 | invalid_e500_subreg (rtx op, machine_mode mode) | |
8289 | { | |
8290 | if (TARGET_E500_DOUBLE) | |
8291 | { | |
8292 | /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or | |
8293 | subreg:TI and reg:TF. Decimal float modes are like integer | |
8294 | modes (only low part of each register used) for this | |
8295 | purpose. */ | |
8296 | if (GET_CODE (op) == SUBREG | |
8297 | && (mode == SImode || mode == DImode || mode == TImode | |
8298 | || mode == DDmode || mode == TDmode || mode == PTImode) | |
8299 | && REG_P (SUBREG_REG (op)) | |
8300 | && (GET_MODE (SUBREG_REG (op)) == DFmode | |
8301 | || GET_MODE (SUBREG_REG (op)) == TFmode | |
8302 | || GET_MODE (SUBREG_REG (op)) == IFmode | |
8303 | || GET_MODE (SUBREG_REG (op)) == KFmode)) | |
8304 | return true; | |
8305 | ||
8306 | /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and | |
8307 | reg:TI. */ | |
8308 | if (GET_CODE (op) == SUBREG | |
8309 | && (mode == DFmode || mode == TFmode || mode == IFmode | |
8310 | || mode == KFmode) | |
8311 | && REG_P (SUBREG_REG (op)) | |
8312 | && (GET_MODE (SUBREG_REG (op)) == DImode | |
8313 | || GET_MODE (SUBREG_REG (op)) == TImode | |
8314 | || GET_MODE (SUBREG_REG (op)) == PTImode | |
8315 | || GET_MODE (SUBREG_REG (op)) == DDmode | |
8316 | || GET_MODE (SUBREG_REG (op)) == TDmode)) | |
8317 | return true; | |
8318 | } | |
8319 | ||
8320 | if (TARGET_SPE | |
8321 | && GET_CODE (op) == SUBREG | |
8322 | && mode == SImode | |
8323 | && REG_P (SUBREG_REG (op)) | |
8324 | && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op)))) | |
8325 | return true; | |
8326 | ||
8327 | return false; | |
8328 | } | |
8329 | ||
8330 | /* Return alignment of TYPE. Existing alignment is ALIGN. HOW | |
8331 | selects whether the alignment is abi mandated, optional, or | |
8332 | both abi and optional alignment. */ | |
8333 | ||
8334 | unsigned int | |
8335 | rs6000_data_alignment (tree type, unsigned int align, enum data_align how) | |
8336 | { | |
8337 | if (how != align_opt) | |
8338 | { | |
8339 | if (TREE_CODE (type) == VECTOR_TYPE) | |
8340 | { | |
8341 | if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type))) | |
8342 | || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type)))) | |
8343 | { | |
8344 | if (align < 64) | |
8345 | align = 64; | |
8346 | } | |
8347 | else if (align < 128) | |
8348 | align = 128; | |
8349 | } | |
8350 | else if (TARGET_E500_DOUBLE | |
8351 | && TREE_CODE (type) == REAL_TYPE | |
8352 | && TYPE_MODE (type) == DFmode) | |
8353 | { | |
8354 | if (align < 64) | |
8355 | align = 64; | |
8356 | } | |
8357 | } | |
8358 | ||
8359 | if (how != align_abi) | |
8360 | { | |
8361 | if (TREE_CODE (type) == ARRAY_TYPE | |
8362 | && TYPE_MODE (TREE_TYPE (type)) == QImode) | |
8363 | { | |
8364 | if (align < BITS_PER_WORD) | |
8365 | align = BITS_PER_WORD; | |
8366 | } | |
8367 | } | |
8368 | ||
8369 | return align; | |
8370 | } | |
8371 | ||
dfdced85 | 8372 | /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory |
8373 | instructions simply ignore the low bits; SPE vector memory | |
8374 | instructions trap on unaligned accesses; VSX memory instructions are | |
8375 | aligned to 4 or 8 bytes. */ | |
8376 | ||
8377 | static bool | |
8378 | rs6000_slow_unaligned_access (machine_mode mode, unsigned int align) | |
8379 | { | |
8380 | return (STRICT_ALIGNMENT | |
8381 | || (!TARGET_EFFICIENT_UNALIGNED_VSX | |
8382 | && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32) | |
8383 | || ((VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode)) | |
8384 | && (int) align < VECTOR_ALIGN (mode))))); | |
8385 | } | |
8386 | ||
01e91138 | 8387 | /* Previous GCC releases forced all vector types to have 16-byte alignment. */ |
8388 | ||
8389 | bool | |
8390 | rs6000_special_adjust_field_align_p (tree type, unsigned int computed) | |
8391 | { | |
8392 | if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE) | |
8393 | { | |
8394 | if (computed != 128) | |
8395 | { | |
8396 | static bool warned; | |
8397 | if (!warned && warn_psabi) | |
8398 | { | |
8399 | warned = true; | |
8400 | inform (input_location, | |
8401 | "the layout of aggregates containing vectors with" | |
8402 | " %d-byte alignment has changed in GCC 5", | |
8403 | computed / BITS_PER_UNIT); | |
8404 | } | |
8405 | } | |
8406 | /* In current GCC there is no special case. */ | |
8407 | return false; | |
8408 | } | |
8409 | ||
8410 | return false; | |
8411 | } | |
8412 | ||
8413 | /* AIX increases natural record alignment to doubleword if the first | |
8414 | field is an FP double while the FP fields remain word aligned. */ | |
8415 | ||
8416 | unsigned int | |
8417 | rs6000_special_round_type_align (tree type, unsigned int computed, | |
8418 | unsigned int specified) | |
8419 | { | |
8420 | unsigned int align = MAX (computed, specified); | |
8421 | tree field = TYPE_FIELDS (type); | |
8422 | ||
8423 | /* Skip all non field decls */ | |
8424 | while (field != NULL && TREE_CODE (field) != FIELD_DECL) | |
8425 | field = DECL_CHAIN (field); | |
8426 | ||
8427 | if (field != NULL && field != type) | |
8428 | { | |
8429 | type = TREE_TYPE (field); | |
8430 | while (TREE_CODE (type) == ARRAY_TYPE) | |
8431 | type = TREE_TYPE (type); | |
8432 | ||
8433 | if (type != error_mark_node && TYPE_MODE (type) == DFmode) | |
8434 | align = MAX (align, 64); | |
8435 | } | |
8436 | ||
8437 | return align; | |
8438 | } | |
8439 | ||
8440 | /* Darwin increases record alignment to the natural alignment of | |
8441 | the first field. */ | |
8442 | ||
8443 | unsigned int | |
8444 | darwin_rs6000_special_round_type_align (tree type, unsigned int computed, | |
8445 | unsigned int specified) | |
8446 | { | |
8447 | unsigned int align = MAX (computed, specified); | |
8448 | ||
8449 | if (TYPE_PACKED (type)) | |
8450 | return align; | |
8451 | ||
8452 | /* Find the first field, looking down into aggregates. */ | |
8453 | do { | |
8454 | tree field = TYPE_FIELDS (type); | |
8455 | /* Skip all non field decls */ | |
8456 | while (field != NULL && TREE_CODE (field) != FIELD_DECL) | |
8457 | field = DECL_CHAIN (field); | |
8458 | if (! field) | |
8459 | break; | |
8460 | /* A packed field does not contribute any extra alignment. */ | |
8461 | if (DECL_PACKED (field)) | |
8462 | return align; | |
8463 | type = TREE_TYPE (field); | |
8464 | while (TREE_CODE (type) == ARRAY_TYPE) | |
8465 | type = TREE_TYPE (type); | |
8466 | } while (AGGREGATE_TYPE_P (type)); | |
8467 | ||
8468 | if (! AGGREGATE_TYPE_P (type) && type != error_mark_node) | |
8469 | align = MAX (align, TYPE_ALIGN (type)); | |
8470 | ||
8471 | return align; | |
8472 | } | |
8473 | ||
8474 | /* Return 1 for an operand in small memory on V.4/eabi. */ | |
8475 | ||
8476 | int | |
8477 | small_data_operand (rtx op ATTRIBUTE_UNUSED, | |
8478 | machine_mode mode ATTRIBUTE_UNUSED) | |
8479 | { | |
8480 | #if TARGET_ELF | |
8481 | rtx sym_ref; | |
8482 | ||
8483 | if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA) | |
8484 | return 0; | |
8485 | ||
8486 | if (DEFAULT_ABI != ABI_V4) | |
8487 | return 0; | |
8488 | ||
8489 | /* Vector and float memory instructions have a limited offset on the | |
8490 | SPE, so using a vector or float variable directly as an operand is | |
8491 | not useful. */ | |
8492 | if (TARGET_SPE | |
8493 | && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode))) | |
8494 | return 0; | |
8495 | ||
8496 | if (GET_CODE (op) == SYMBOL_REF) | |
8497 | sym_ref = op; | |
8498 | ||
8499 | else if (GET_CODE (op) != CONST | |
8500 | || GET_CODE (XEXP (op, 0)) != PLUS | |
8501 | || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF | |
8502 | || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT) | |
8503 | return 0; | |
8504 | ||
8505 | else | |
8506 | { | |
8507 | rtx sum = XEXP (op, 0); | |
8508 | HOST_WIDE_INT summand; | |
8509 | ||
8510 | /* We have to be careful here, because it is the referenced address | |
8511 | that must be 32k from _SDA_BASE_, not just the symbol. */ | |
8512 | summand = INTVAL (XEXP (sum, 1)); | |
8513 | if (summand < 0 || summand > g_switch_value) | |
8514 | return 0; | |
8515 | ||
8516 | sym_ref = XEXP (sum, 0); | |
8517 | } | |
8518 | ||
8519 | return SYMBOL_REF_SMALL_P (sym_ref); | |
8520 | #else | |
8521 | return 0; | |
8522 | #endif | |
8523 | } | |
8524 | ||
8525 | /* Return true if either operand is a general purpose register. */ | |
8526 | ||
8527 | bool | |
8528 | gpr_or_gpr_p (rtx op0, rtx op1) | |
8529 | { | |
8530 | return ((REG_P (op0) && INT_REGNO_P (REGNO (op0))) | |
8531 | || (REG_P (op1) && INT_REGNO_P (REGNO (op1)))); | |
8532 | } | |
8533 | ||
8534 | /* Return true if this is a move direct operation between GPR registers and | |
8535 | floating point/VSX registers. */ | |
8536 | ||
8537 | bool | |
8538 | direct_move_p (rtx op0, rtx op1) | |
8539 | { | |
8540 | int regno0, regno1; | |
8541 | ||
8542 | if (!REG_P (op0) || !REG_P (op1)) | |
8543 | return false; | |
8544 | ||
8545 | if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR) | |
8546 | return false; | |
8547 | ||
8548 | regno0 = REGNO (op0); | |
8549 | regno1 = REGNO (op1); | |
8550 | if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER) | |
8551 | return false; | |
8552 | ||
8553 | if (INT_REGNO_P (regno0)) | |
8554 | return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1); | |
8555 | ||
8556 | else if (INT_REGNO_P (regno1)) | |
8557 | { | |
8558 | if (TARGET_MFPGPR && FP_REGNO_P (regno0)) | |
8559 | return true; | |
8560 | ||
8561 | else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0)) | |
8562 | return true; | |
8563 | } | |
8564 | ||
8565 | return false; | |
8566 | } | |
8567 | ||
8568 | /* Return true if the OFFSET is valid for the quad address instructions that | |
8569 | use d-form (register + offset) addressing. */ | |
8570 | ||
8571 | static inline bool | |
8572 | quad_address_offset_p (HOST_WIDE_INT offset) | |
8573 | { | |
8574 | return (IN_RANGE (offset, -32768, 32767) && ((offset) & 0xf) == 0); | |
8575 | } | |
8576 | ||
8577 | /* Return true if the ADDR is an acceptable address for a quad memory | |
8578 | operation of mode MODE (either LQ/STQ for general purpose registers, or | |
8579 | LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address | |
8580 | is intended for LQ/STQ. If it is false, the address is intended for the ISA | |
8581 | 3.0 LXV/STXV instruction. */ | |
8582 | ||
8583 | bool | |
8584 | quad_address_p (rtx addr, machine_mode mode, bool strict) | |
8585 | { | |
8586 | rtx op0, op1; | |
8587 | ||
8588 | if (GET_MODE_SIZE (mode) != 16) | |
8589 | return false; | |
8590 | ||
8591 | if (legitimate_indirect_address_p (addr, strict)) | |
8592 | return true; | |
8593 | ||
8594 | if (VECTOR_MODE_P (mode) && !mode_supports_vsx_dform_quad (mode)) | |
8595 | return false; | |
8596 | ||
8597 | if (GET_CODE (addr) != PLUS) | |
8598 | return false; | |
8599 | ||
8600 | op0 = XEXP (addr, 0); | |
8601 | if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict)) | |
8602 | return false; | |
8603 | ||
8604 | op1 = XEXP (addr, 1); | |
8605 | if (!CONST_INT_P (op1)) | |
8606 | return false; | |
8607 | ||
8608 | return quad_address_offset_p (INTVAL (op1)); | |
8609 | } | |
8610 | ||
8611 | /* Return true if this is a load or store quad operation. This function does | |
8612 | not handle the atomic quad memory instructions. */ | |
8613 | ||
8614 | bool | |
8615 | quad_load_store_p (rtx op0, rtx op1) | |
8616 | { | |
8617 | bool ret; | |
8618 | ||
8619 | if (!TARGET_QUAD_MEMORY) | |
8620 | ret = false; | |
8621 | ||
8622 | else if (REG_P (op0) && MEM_P (op1)) | |
8623 | ret = (quad_int_reg_operand (op0, GET_MODE (op0)) | |
8624 | && quad_memory_operand (op1, GET_MODE (op1)) | |
8625 | && !reg_overlap_mentioned_p (op0, op1)); | |
8626 | ||
8627 | else if (MEM_P (op0) && REG_P (op1)) | |
8628 | ret = (quad_memory_operand (op0, GET_MODE (op0)) | |
8629 | && quad_int_reg_operand (op1, GET_MODE (op1))); | |
8630 | ||
8631 | else | |
8632 | ret = false; | |
8633 | ||
8634 | if (TARGET_DEBUG_ADDR) | |
8635 | { | |
8636 | fprintf (stderr, "\n========== quad_load_store, return %s\n", | |
8637 | ret ? "true" : "false"); | |
8638 | debug_rtx (gen_rtx_SET (op0, op1)); | |
8639 | } | |
8640 | ||
8641 | return ret; | |
8642 | } | |
8643 | ||
8644 | /* Given an address, return a constant offset term if one exists. */ | |
8645 | ||
8646 | static rtx | |
8647 | address_offset (rtx op) | |
8648 | { | |
8649 | if (GET_CODE (op) == PRE_INC | |
8650 | || GET_CODE (op) == PRE_DEC) | |
8651 | op = XEXP (op, 0); | |
8652 | else if (GET_CODE (op) == PRE_MODIFY | |
8653 | || GET_CODE (op) == LO_SUM) | |
8654 | op = XEXP (op, 1); | |
8655 | ||
8656 | if (GET_CODE (op) == CONST) | |
8657 | op = XEXP (op, 0); | |
8658 | ||
8659 | if (GET_CODE (op) == PLUS) | |
8660 | op = XEXP (op, 1); | |
8661 | ||
8662 | if (CONST_INT_P (op)) | |
8663 | return op; | |
8664 | ||
8665 | return NULL_RTX; | |
8666 | } | |
8667 | ||
8668 | /* Return true if the MEM operand is a memory operand suitable for use | |
8669 | with a (full width, possibly multiple) gpr load/store. On | |
8670 | powerpc64 this means the offset must be divisible by 4. | |
8671 | Implements 'Y' constraint. | |
8672 | ||
8673 | Accept direct, indexed, offset, lo_sum and tocref. Since this is | |
8674 | a constraint function we know the operand has satisfied a suitable | |
8675 | memory predicate. Also accept some odd rtl generated by reload | |
8676 | (see rs6000_legitimize_reload_address for various forms). It is | |
8677 | important that reload rtl be accepted by appropriate constraints | |
8678 | but not by the operand predicate. | |
8679 | ||
8680 | Offsetting a lo_sum should not be allowed, except where we know by | |
8681 | alignment that a 32k boundary is not crossed, but see the ??? | |
8682 | comment in rs6000_legitimize_reload_address. Note that by | |
8683 | "offsetting" here we mean a further offset to access parts of the | |
8684 | MEM. It's fine to have a lo_sum where the inner address is offset | |
8685 | from a sym, since the same sym+offset will appear in the high part | |
8686 | of the address calculation. */ | |
8687 | ||
8688 | bool | |
8689 | mem_operand_gpr (rtx op, machine_mode mode) | |
8690 | { | |
8691 | unsigned HOST_WIDE_INT offset; | |
8692 | int extra; | |
8693 | rtx addr = XEXP (op, 0); | |
8694 | ||
8695 | op = address_offset (addr); | |
8696 | if (op == NULL_RTX) | |
8697 | return true; | |
8698 | ||
8699 | offset = INTVAL (op); | |
8700 | if (TARGET_POWERPC64 && (offset & 3) != 0) | |
8701 | return false; | |
8702 | ||
8703 | extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD; | |
8704 | if (extra < 0) | |
8705 | extra = 0; | |
8706 | ||
8707 | if (GET_CODE (addr) == LO_SUM) | |
8708 | /* For lo_sum addresses, we must allow any offset except one that | |
8709 | causes a wrap, so test only the low 16 bits. */ | |
8710 | offset = ((offset & 0xffff) ^ 0x8000) - 0x8000; | |
8711 | ||
8712 | return offset + 0x8000 < 0x10000u - extra; | |
8713 | } | |
8714 | ||
8715 | /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr, | |
8716 | enforce an offset divisible by 4 even for 32-bit. */ | |
8717 | ||
8718 | bool | |
8719 | mem_operand_ds_form (rtx op, machine_mode mode) | |
8720 | { | |
8721 | unsigned HOST_WIDE_INT offset; | |
8722 | int extra; | |
8723 | rtx addr = XEXP (op, 0); | |
8724 | ||
8725 | if (!offsettable_address_p (false, mode, addr)) | |
8726 | return false; | |
8727 | ||
8728 | op = address_offset (addr); | |
8729 | if (op == NULL_RTX) | |
8730 | return true; | |
8731 | ||
8732 | offset = INTVAL (op); | |
8733 | if ((offset & 3) != 0) | |
8734 | return false; | |
8735 | ||
8736 | extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD; | |
8737 | if (extra < 0) | |
8738 | extra = 0; | |
8739 | ||
8740 | if (GET_CODE (addr) == LO_SUM) | |
8741 | /* For lo_sum addresses, we must allow any offset except one that | |
8742 | causes a wrap, so test only the low 16 bits. */ | |
8743 | offset = ((offset & 0xffff) ^ 0x8000) - 0x8000; | |
8744 | ||
8745 | return offset + 0x8000 < 0x10000u - extra; | |
8746 | } | |
8747 | \f | |
8748 | /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */ | |
8749 | ||
8750 | static bool | |
8751 | reg_offset_addressing_ok_p (machine_mode mode) | |
8752 | { | |
8753 | switch (mode) | |
8754 | { | |
916ace94 | 8755 | case E_V16QImode: |
8756 | case E_V8HImode: | |
8757 | case E_V4SFmode: | |
8758 | case E_V4SImode: | |
8759 | case E_V2DFmode: | |
8760 | case E_V2DImode: | |
8761 | case E_V1TImode: | |
8762 | case E_TImode: | |
8763 | case E_TFmode: | |
8764 | case E_KFmode: | |
01e91138 | 8765 | /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the |
8766 | ISA 3.0 vector d-form addressing mode was added. While TImode is not | |
8767 | a vector mode, if we want to use the VSX registers to move it around, | |
8768 | we need to restrict ourselves to reg+reg addressing. Similarly for | |
8769 | IEEE 128-bit floating point that is passed in a single vector | |
8770 | register. */ | |
8771 | if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)) | |
8772 | return mode_supports_vsx_dform_quad (mode); | |
8773 | break; | |
8774 | ||
916ace94 | 8775 | case E_V4HImode: |
8776 | case E_V2SImode: | |
8777 | case E_V1DImode: | |
8778 | case E_V2SFmode: | |
01e91138 | 8779 | /* Paired vector modes. Only reg+reg addressing is valid. */ |
8780 | if (TARGET_PAIRED_FLOAT) | |
8781 | return false; | |
8782 | break; | |
8783 | ||
916ace94 | 8784 | case E_SDmode: |
01e91138 | 8785 | /* If we can do direct load/stores of SDmode, restrict it to reg+reg |
8786 | addressing for the LFIWZX and STFIWX instructions. */ | |
8787 | if (TARGET_NO_SDMODE_STACK) | |
8788 | return false; | |
8789 | break; | |
8790 | ||
8791 | default: | |
8792 | break; | |
8793 | } | |
8794 | ||
8795 | return true; | |
8796 | } | |
8797 | ||
8798 | static bool | |
8799 | virtual_stack_registers_memory_p (rtx op) | |
8800 | { | |
8801 | int regnum; | |
8802 | ||
8803 | if (GET_CODE (op) == REG) | |
8804 | regnum = REGNO (op); | |
8805 | ||
8806 | else if (GET_CODE (op) == PLUS | |
8807 | && GET_CODE (XEXP (op, 0)) == REG | |
8808 | && GET_CODE (XEXP (op, 1)) == CONST_INT) | |
8809 | regnum = REGNO (XEXP (op, 0)); | |
8810 | ||
8811 | else | |
8812 | return false; | |
8813 | ||
8814 | return (regnum >= FIRST_VIRTUAL_REGISTER | |
8815 | && regnum <= LAST_VIRTUAL_POINTER_REGISTER); | |
8816 | } | |
8817 | ||
8818 | /* Return true if a MODE sized memory accesses to OP plus OFFSET | |
8819 | is known to not straddle a 32k boundary. This function is used | |
8820 | to determine whether -mcmodel=medium code can use TOC pointer | |
8821 | relative addressing for OP. This means the alignment of the TOC | |
8822 | pointer must also be taken into account, and unfortunately that is | |
8823 | only 8 bytes. */ | |
8824 | ||
8825 | #ifndef POWERPC64_TOC_POINTER_ALIGNMENT | |
8826 | #define POWERPC64_TOC_POINTER_ALIGNMENT 8 | |
8827 | #endif | |
8828 | ||
8829 | static bool | |
8830 | offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset, | |
8831 | machine_mode mode) | |
8832 | { | |
8833 | tree decl; | |
8834 | unsigned HOST_WIDE_INT dsize, dalign, lsb, mask; | |
8835 | ||
8836 | if (GET_CODE (op) != SYMBOL_REF) | |
8837 | return false; | |
8838 | ||
8839 | /* ISA 3.0 vector d-form addressing is restricted, don't allow | |
8840 | SYMBOL_REF. */ | |
8841 | if (mode_supports_vsx_dform_quad (mode)) | |
8842 | return false; | |
8843 | ||
8844 | dsize = GET_MODE_SIZE (mode); | |
8845 | decl = SYMBOL_REF_DECL (op); | |
8846 | if (!decl) | |
8847 | { | |
8848 | if (dsize == 0) | |
8849 | return false; | |
8850 | ||
8851 | /* -fsection-anchors loses the original SYMBOL_REF_DECL when | |
8852 | replacing memory addresses with an anchor plus offset. We | |
8853 | could find the decl by rummaging around in the block->objects | |
8854 | VEC for the given offset but that seems like too much work. */ | |
8855 | dalign = BITS_PER_UNIT; | |
8856 | if (SYMBOL_REF_HAS_BLOCK_INFO_P (op) | |
8857 | && SYMBOL_REF_ANCHOR_P (op) | |
8858 | && SYMBOL_REF_BLOCK (op) != NULL) | |
8859 | { | |
8860 | struct object_block *block = SYMBOL_REF_BLOCK (op); | |
8861 | ||
8862 | dalign = block->alignment; | |
8863 | offset += SYMBOL_REF_BLOCK_OFFSET (op); | |
8864 | } | |
8865 | else if (CONSTANT_POOL_ADDRESS_P (op)) | |
8866 | { | |
8867 | /* It would be nice to have get_pool_align().. */ | |
8868 | machine_mode cmode = get_pool_mode (op); | |
8869 | ||
8870 | dalign = GET_MODE_ALIGNMENT (cmode); | |
8871 | } | |
8872 | } | |
8873 | else if (DECL_P (decl)) | |
8874 | { | |
8875 | dalign = DECL_ALIGN (decl); | |
8876 | ||
8877 | if (dsize == 0) | |
8878 | { | |
8879 | /* Allow BLKmode when the entire object is known to not | |
8880 | cross a 32k boundary. */ | |
8881 | if (!DECL_SIZE_UNIT (decl)) | |
8882 | return false; | |
8883 | ||
8884 | if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl))) | |
8885 | return false; | |
8886 | ||
8887 | dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl)); | |
8888 | if (dsize > 32768) | |
8889 | return false; | |
8890 | ||
8891 | dalign /= BITS_PER_UNIT; | |
8892 | if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT) | |
8893 | dalign = POWERPC64_TOC_POINTER_ALIGNMENT; | |
8894 | return dalign >= dsize; | |
8895 | } | |
8896 | } | |
8897 | else | |
8898 | gcc_unreachable (); | |
8899 | ||
8900 | /* Find how many bits of the alignment we know for this access. */ | |
8901 | dalign /= BITS_PER_UNIT; | |
8902 | if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT) | |
8903 | dalign = POWERPC64_TOC_POINTER_ALIGNMENT; | |
8904 | mask = dalign - 1; | |
8905 | lsb = offset & -offset; | |
8906 | mask &= lsb - 1; | |
8907 | dalign = mask + 1; | |
8908 | ||
8909 | return dalign >= dsize; | |
8910 | } | |
8911 | ||
8912 | static bool | |
8913 | constant_pool_expr_p (rtx op) | |
8914 | { | |
8915 | rtx base, offset; | |
8916 | ||
8917 | split_const (op, &base, &offset); | |
8918 | return (GET_CODE (base) == SYMBOL_REF | |
8919 | && CONSTANT_POOL_ADDRESS_P (base) | |
8920 | && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode)); | |
8921 | } | |
8922 | ||
8923 | static const_rtx tocrel_base, tocrel_offset; | |
8924 | ||
8925 | /* Return true if OP is a toc pointer relative address (the output | |
8926 | of create_TOC_reference). If STRICT, do not match non-split | |
8927 | -mcmodel=large/medium toc pointer relative addresses. */ | |
8928 | ||
8929 | bool | |
8930 | toc_relative_expr_p (const_rtx op, bool strict) | |
8931 | { | |
8932 | if (!TARGET_TOC) | |
8933 | return false; | |
8934 | ||
8935 | if (TARGET_CMODEL != CMODEL_SMALL) | |
8936 | { | |
8937 | /* When strict ensure we have everything tidy. */ | |
8938 | if (strict | |
8939 | && !(GET_CODE (op) == LO_SUM | |
8940 | && REG_P (XEXP (op, 0)) | |
8941 | && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict))) | |
8942 | return false; | |
8943 | ||
8944 | /* When not strict, allow non-split TOC addresses and also allow | |
8945 | (lo_sum (high ..)) TOC addresses created during reload. */ | |
8946 | if (GET_CODE (op) == LO_SUM) | |
8947 | op = XEXP (op, 1); | |
8948 | } | |
8949 | ||
8950 | tocrel_base = op; | |
8951 | tocrel_offset = const0_rtx; | |
8952 | if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op))) | |
8953 | { | |
8954 | tocrel_base = XEXP (op, 0); | |
8955 | tocrel_offset = XEXP (op, 1); | |
8956 | } | |
8957 | ||
8958 | return (GET_CODE (tocrel_base) == UNSPEC | |
8959 | && XINT (tocrel_base, 1) == UNSPEC_TOCREL); | |
8960 | } | |
8961 | ||
8962 | /* Return true if X is a constant pool address, and also for cmodel=medium | |
8963 | if X is a toc-relative address known to be offsettable within MODE. */ | |
8964 | ||
8965 | bool | |
8966 | legitimate_constant_pool_address_p (const_rtx x, machine_mode mode, | |
8967 | bool strict) | |
8968 | { | |
8969 | return (toc_relative_expr_p (x, strict) | |
8970 | && (TARGET_CMODEL != CMODEL_MEDIUM | |
8971 | || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0)) | |
8972 | || mode == QImode | |
8973 | || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0), | |
8974 | INTVAL (tocrel_offset), mode))); | |
8975 | } | |
8976 | ||
8977 | static bool | |
8978 | legitimate_small_data_p (machine_mode mode, rtx x) | |
8979 | { | |
8980 | return (DEFAULT_ABI == ABI_V4 | |
8981 | && !flag_pic && !TARGET_TOC | |
8982 | && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST) | |
8983 | && small_data_operand (x, mode)); | |
8984 | } | |
8985 | ||
8986 | /* SPE offset addressing is limited to 5-bits worth of double words. */ | |
8987 | #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0) | |
8988 | ||
8989 | bool | |
8990 | rs6000_legitimate_offset_address_p (machine_mode mode, rtx x, | |
8991 | bool strict, bool worst_case) | |
8992 | { | |
8993 | unsigned HOST_WIDE_INT offset; | |
8994 | unsigned int extra; | |
8995 | ||
8996 | if (GET_CODE (x) != PLUS) | |
8997 | return false; | |
8998 | if (!REG_P (XEXP (x, 0))) | |
8999 | return false; | |
9000 | if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict)) | |
9001 | return false; | |
9002 | if (mode_supports_vsx_dform_quad (mode)) | |
9003 | return quad_address_p (x, mode, strict); | |
9004 | if (!reg_offset_addressing_ok_p (mode)) | |
9005 | return virtual_stack_registers_memory_p (x); | |
9006 | if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress)) | |
9007 | return true; | |
9008 | if (GET_CODE (XEXP (x, 1)) != CONST_INT) | |
9009 | return false; | |
9010 | ||
9011 | offset = INTVAL (XEXP (x, 1)); | |
9012 | extra = 0; | |
9013 | switch (mode) | |
9014 | { | |
916ace94 | 9015 | case E_V4HImode: |
9016 | case E_V2SImode: | |
9017 | case E_V1DImode: | |
9018 | case E_V2SFmode: | |
01e91138 | 9019 | /* SPE vector modes. */ |
9020 | return SPE_CONST_OFFSET_OK (offset); | |
9021 | ||
916ace94 | 9022 | case E_DFmode: |
9023 | case E_DDmode: | |
9024 | case E_DImode: | |
01e91138 | 9025 | /* On e500v2, we may have: |
9026 | ||
9027 | (subreg:DF (mem:DI (plus (reg) (const_int))) 0). | |
9028 | ||
9029 | Which gets addressed with evldd instructions. */ | |
9030 | if (TARGET_E500_DOUBLE) | |
9031 | return SPE_CONST_OFFSET_OK (offset); | |
9032 | ||
9033 | /* If we are using VSX scalar loads, restrict ourselves to reg+reg | |
9034 | addressing. */ | |
9035 | if (VECTOR_MEM_VSX_P (mode)) | |
9036 | return false; | |
9037 | ||
9038 | if (!worst_case) | |
9039 | break; | |
9040 | if (!TARGET_POWERPC64) | |
9041 | extra = 4; | |
9042 | else if (offset & 3) | |
9043 | return false; | |
9044 | break; | |
9045 | ||
916ace94 | 9046 | case E_TFmode: |
9047 | case E_IFmode: | |
9048 | case E_KFmode: | |
9049 | case E_TDmode: | |
9050 | case E_TImode: | |
9051 | case E_PTImode: | |
01e91138 | 9052 | if (TARGET_E500_DOUBLE) |
9053 | return (SPE_CONST_OFFSET_OK (offset) | |
9054 | && SPE_CONST_OFFSET_OK (offset + 8)); | |
9055 | ||
9056 | extra = 8; | |
9057 | if (!worst_case) | |
9058 | break; | |
9059 | if (!TARGET_POWERPC64) | |
9060 | extra = 12; | |
9061 | else if (offset & 3) | |
9062 | return false; | |
9063 | break; | |
9064 | ||
9065 | default: | |
9066 | break; | |
9067 | } | |
9068 | ||
9069 | offset += 0x8000; | |
9070 | return offset < 0x10000 - extra; | |
9071 | } | |
9072 | ||
9073 | bool | |
9074 | legitimate_indexed_address_p (rtx x, int strict) | |
9075 | { | |
9076 | rtx op0, op1; | |
9077 | ||
9078 | if (GET_CODE (x) != PLUS) | |
9079 | return false; | |
9080 | ||
9081 | op0 = XEXP (x, 0); | |
9082 | op1 = XEXP (x, 1); | |
9083 | ||
9084 | /* Recognize the rtl generated by reload which we know will later be | |
9085 | replaced with proper base and index regs. */ | |
9086 | if (!strict | |
9087 | && reload_in_progress | |
9088 | && (REG_P (op0) || GET_CODE (op0) == PLUS) | |
9089 | && REG_P (op1)) | |
9090 | return true; | |
9091 | ||
9092 | return (REG_P (op0) && REG_P (op1) | |
9093 | && ((INT_REG_OK_FOR_BASE_P (op0, strict) | |
9094 | && INT_REG_OK_FOR_INDEX_P (op1, strict)) | |
9095 | || (INT_REG_OK_FOR_BASE_P (op1, strict) | |
9096 | && INT_REG_OK_FOR_INDEX_P (op0, strict)))); | |
9097 | } | |
9098 | ||
9099 | bool | |
9100 | avoiding_indexed_address_p (machine_mode mode) | |
9101 | { | |
9102 | /* Avoid indexed addressing for modes that have non-indexed | |
9103 | load/store instruction forms. */ | |
9104 | return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode)); | |
9105 | } | |
9106 | ||
9107 | bool | |
9108 | legitimate_indirect_address_p (rtx x, int strict) | |
9109 | { | |
9110 | return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict); | |
9111 | } | |
9112 | ||
9113 | bool | |
9114 | macho_lo_sum_memory_operand (rtx x, machine_mode mode) | |
9115 | { | |
9116 | if (!TARGET_MACHO || !flag_pic | |
9117 | || mode != SImode || GET_CODE (x) != MEM) | |
9118 | return false; | |
9119 | x = XEXP (x, 0); | |
9120 | ||
9121 | if (GET_CODE (x) != LO_SUM) | |
9122 | return false; | |
9123 | if (GET_CODE (XEXP (x, 0)) != REG) | |
9124 | return false; | |
9125 | if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0)) | |
9126 | return false; | |
9127 | x = XEXP (x, 1); | |
9128 | ||
9129 | return CONSTANT_P (x); | |
9130 | } | |
9131 | ||
9132 | static bool | |
9133 | legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict) | |
9134 | { | |
9135 | if (GET_CODE (x) != LO_SUM) | |
9136 | return false; | |
9137 | if (GET_CODE (XEXP (x, 0)) != REG) | |
9138 | return false; | |
9139 | if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict)) | |
9140 | return false; | |
9141 | /* quad word addresses are restricted, and we can't use LO_SUM. */ | |
9142 | if (mode_supports_vsx_dform_quad (mode)) | |
9143 | return false; | |
9144 | /* Restrict addressing for DI because of our SUBREG hackery. */ | |
9145 | if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD) | |
9146 | return false; | |
9147 | x = XEXP (x, 1); | |
9148 | ||
9149 | if (TARGET_ELF || TARGET_MACHO) | |
9150 | { | |
9151 | bool large_toc_ok; | |
9152 | ||
9153 | if (DEFAULT_ABI == ABI_V4 && flag_pic) | |
9154 | return false; | |
9155 | /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls | |
9156 | push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS | |
9157 | recognizes some LO_SUM addresses as valid although this | |
9158 | function says opposite. In most cases, LRA through different | |
9159 | transformations can generate correct code for address reloads. | |
9160 | It can not manage only some LO_SUM cases. So we need to add | |
9161 | code analogous to one in rs6000_legitimize_reload_address for | |
9162 | LOW_SUM here saying that some addresses are still valid. */ | |
9163 | large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL | |
9164 | && small_toc_ref (x, VOIDmode)); | |
9165 | if (TARGET_TOC && ! large_toc_ok) | |
9166 | return false; | |
9167 | if (GET_MODE_NUNITS (mode) != 1) | |
9168 | return false; | |
9169 | if (GET_MODE_SIZE (mode) > UNITS_PER_WORD | |
9170 | && !(/* ??? Assume floating point reg based on mode? */ | |
9171 | TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT | |
9172 | && (mode == DFmode || mode == DDmode))) | |
9173 | return false; | |
9174 | ||
9175 | return CONSTANT_P (x) || large_toc_ok; | |
9176 | } | |
9177 | ||
9178 | return false; | |
9179 | } | |
9180 | ||
9181 | ||
9182 | /* Try machine-dependent ways of modifying an illegitimate address | |
9183 | to be legitimate. If we find one, return the new, valid address. | |
9184 | This is used from only one place: `memory_address' in explow.c. | |
9185 | ||
9186 | OLDX is the address as it was before break_out_memory_refs was | |
9187 | called. In some cases it is useful to look at this to decide what | |
9188 | needs to be done. | |
9189 | ||
9190 | It is always safe for this function to do nothing. It exists to | |
9191 | recognize opportunities to optimize the output. | |
9192 | ||
9193 | On RS/6000, first check for the sum of a register with a constant | |
9194 | integer that is out of range. If so, generate code to add the | |
9195 | constant with the low-order 16 bits masked to the register and force | |
9196 | this result into another register (this can be done with `cau'). | |
9197 | Then generate an address of REG+(CONST&0xffff), allowing for the | |
9198 | possibility of bit 16 being a one. | |
9199 | ||
9200 | Then check for the sum of a register and something not constant, try to | |
9201 | load the other things into a register and return the sum. */ | |
9202 | ||
9203 | static rtx | |
9204 | rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, | |
9205 | machine_mode mode) | |
9206 | { | |
9207 | unsigned int extra; | |
9208 | ||
9209 | if (!reg_offset_addressing_ok_p (mode) | |
9210 | || mode_supports_vsx_dform_quad (mode)) | |
9211 | { | |
9212 | if (virtual_stack_registers_memory_p (x)) | |
9213 | return x; | |
9214 | ||
9215 | /* In theory we should not be seeing addresses of the form reg+0, | |
9216 | but just in case it is generated, optimize it away. */ | |
9217 | if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx) | |
9218 | return force_reg (Pmode, XEXP (x, 0)); | |
9219 | ||
9220 | /* For TImode with load/store quad, restrict addresses to just a single | |
9221 | pointer, so it works with both GPRs and VSX registers. */ | |
9222 | /* Make sure both operands are registers. */ | |
9223 | else if (GET_CODE (x) == PLUS | |
9224 | && (mode != TImode || !TARGET_VSX_TIMODE)) | |
9225 | return gen_rtx_PLUS (Pmode, | |
9226 | force_reg (Pmode, XEXP (x, 0)), | |
9227 | force_reg (Pmode, XEXP (x, 1))); | |
9228 | else | |
9229 | return force_reg (Pmode, x); | |
9230 | } | |
9231 | if (GET_CODE (x) == SYMBOL_REF) | |
9232 | { | |
9233 | enum tls_model model = SYMBOL_REF_TLS_MODEL (x); | |
9234 | if (model != 0) | |
9235 | return rs6000_legitimize_tls_address (x, model); | |
9236 | } | |
9237 | ||
9238 | extra = 0; | |
9239 | switch (mode) | |
9240 | { | |
916ace94 | 9241 | case E_TFmode: |
9242 | case E_TDmode: | |
9243 | case E_TImode: | |
9244 | case E_PTImode: | |
9245 | case E_IFmode: | |
9246 | case E_KFmode: | |
01e91138 | 9247 | /* As in legitimate_offset_address_p we do not assume |
9248 | worst-case. The mode here is just a hint as to the registers | |
9249 | used. A TImode is usually in gprs, but may actually be in | |
9250 | fprs. Leave worst-case scenario for reload to handle via | |
9251 | insn constraints. PTImode is only GPRs. */ | |
9252 | extra = 8; | |
9253 | break; | |
9254 | default: | |
9255 | break; | |
9256 | } | |
9257 | ||
9258 | if (GET_CODE (x) == PLUS | |
9259 | && GET_CODE (XEXP (x, 0)) == REG | |
9260 | && GET_CODE (XEXP (x, 1)) == CONST_INT | |
9261 | && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000) | |
9262 | >= 0x10000 - extra) | |
9263 | && !(SPE_VECTOR_MODE (mode) | |
9264 | || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))) | |
9265 | { | |
9266 | HOST_WIDE_INT high_int, low_int; | |
9267 | rtx sum; | |
9268 | low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000; | |
9269 | if (low_int >= 0x8000 - extra) | |
9270 | low_int = 0; | |
9271 | high_int = INTVAL (XEXP (x, 1)) - low_int; | |
9272 | sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0), | |
9273 | GEN_INT (high_int)), 0); | |
9274 | return plus_constant (Pmode, sum, low_int); | |
9275 | } | |
9276 | else if (GET_CODE (x) == PLUS | |
9277 | && GET_CODE (XEXP (x, 0)) == REG | |
9278 | && GET_CODE (XEXP (x, 1)) != CONST_INT | |
9279 | && GET_MODE_NUNITS (mode) == 1 | |
9280 | && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD | |
9281 | || (/* ??? Assume floating point reg based on mode? */ | |
9282 | (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) | |
9283 | && (mode == DFmode || mode == DDmode))) | |
9284 | && !avoiding_indexed_address_p (mode)) | |
9285 | { | |
9286 | return gen_rtx_PLUS (Pmode, XEXP (x, 0), | |
9287 | force_reg (Pmode, force_operand (XEXP (x, 1), 0))); | |
9288 | } | |
9289 | else if (SPE_VECTOR_MODE (mode) | |
9290 | || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)) | |
9291 | { | |
9292 | if (mode == DImode) | |
9293 | return x; | |
9294 | /* We accept [reg + reg] and [reg + OFFSET]. */ | |
9295 | ||
9296 | if (GET_CODE (x) == PLUS) | |
9297 | { | |
9298 | rtx op1 = XEXP (x, 0); | |
9299 | rtx op2 = XEXP (x, 1); | |
9300 | rtx y; | |
9301 | ||
9302 | op1 = force_reg (Pmode, op1); | |
9303 | ||
9304 | if (GET_CODE (op2) != REG | |
9305 | && (GET_CODE (op2) != CONST_INT | |
9306 | || !SPE_CONST_OFFSET_OK (INTVAL (op2)) | |
9307 | || (GET_MODE_SIZE (mode) > 8 | |
9308 | && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8)))) | |
9309 | op2 = force_reg (Pmode, op2); | |
9310 | ||
9311 | /* We can't always do [reg + reg] for these, because [reg + | |
9312 | reg + offset] is not a legitimate addressing mode. */ | |
9313 | y = gen_rtx_PLUS (Pmode, op1, op2); | |
9314 | ||
9315 | if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2)) | |
9316 | return force_reg (Pmode, y); | |
9317 | else | |
9318 | return y; | |
9319 | } | |
9320 | ||
9321 | return force_reg (Pmode, x); | |
9322 | } | |
9323 | else if ((TARGET_ELF | |
9324 | #if TARGET_MACHO | |
9325 | || !MACHO_DYNAMIC_NO_PIC_P | |
9326 | #endif | |
9327 | ) | |
9328 | && TARGET_32BIT | |
9329 | && TARGET_NO_TOC | |
9330 | && ! flag_pic | |
9331 | && GET_CODE (x) != CONST_INT | |
9332 | && GET_CODE (x) != CONST_WIDE_INT | |
9333 | && GET_CODE (x) != CONST_DOUBLE | |
9334 | && CONSTANT_P (x) | |
9335 | && GET_MODE_NUNITS (mode) == 1 | |
9336 | && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD | |
9337 | || (/* ??? Assume floating point reg based on mode? */ | |
9338 | (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) | |
9339 | && (mode == DFmode || mode == DDmode)))) | |
9340 | { | |
9341 | rtx reg = gen_reg_rtx (Pmode); | |
9342 | if (TARGET_ELF) | |
9343 | emit_insn (gen_elf_high (reg, x)); | |
9344 | else | |
9345 | emit_insn (gen_macho_high (reg, x)); | |
9346 | return gen_rtx_LO_SUM (Pmode, reg, x); | |
9347 | } | |
9348 | else if (TARGET_TOC | |
9349 | && GET_CODE (x) == SYMBOL_REF | |
9350 | && constant_pool_expr_p (x) | |
9351 | && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode)) | |
9352 | return create_TOC_reference (x, NULL_RTX); | |
9353 | else | |
9354 | return x; | |
9355 | } | |
9356 | ||
9357 | /* Debug version of rs6000_legitimize_address. */ | |
9358 | static rtx | |
9359 | rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode) | |
9360 | { | |
9361 | rtx ret; | |
9362 | rtx_insn *insns; | |
9363 | ||
9364 | start_sequence (); | |
9365 | ret = rs6000_legitimize_address (x, oldx, mode); | |
9366 | insns = get_insns (); | |
9367 | end_sequence (); | |
9368 | ||
9369 | if (ret != x) | |
9370 | { | |
9371 | fprintf (stderr, | |
9372 | "\nrs6000_legitimize_address: mode %s, old code %s, " | |
9373 | "new code %s, modified\n", | |
9374 | GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)), | |
9375 | GET_RTX_NAME (GET_CODE (ret))); | |
9376 | ||
9377 | fprintf (stderr, "Original address:\n"); | |
9378 | debug_rtx (x); | |
9379 | ||
9380 | fprintf (stderr, "oldx:\n"); | |
9381 | debug_rtx (oldx); | |
9382 | ||
9383 | fprintf (stderr, "New address:\n"); | |
9384 | debug_rtx (ret); | |
9385 | ||
9386 | if (insns) | |
9387 | { | |
9388 | fprintf (stderr, "Insns added:\n"); | |
9389 | debug_rtx_list (insns, 20); | |
9390 | } | |
9391 | } | |
9392 | else | |
9393 | { | |
9394 | fprintf (stderr, | |
9395 | "\nrs6000_legitimize_address: mode %s, code %s, no change:\n", | |
9396 | GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x))); | |
9397 | ||
9398 | debug_rtx (x); | |
9399 | } | |
9400 | ||
9401 | if (insns) | |
9402 | emit_insn (insns); | |
9403 | ||
9404 | return ret; | |
9405 | } | |
9406 | ||
9407 | /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL. | |
9408 | We need to emit DTP-relative relocations. */ | |
9409 | ||
9410 | static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; | |
9411 | static void | |
9412 | rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x) | |
9413 | { | |
9414 | switch (size) | |
9415 | { | |
9416 | case 4: | |
9417 | fputs ("\t.long\t", file); | |
9418 | break; | |
9419 | case 8: | |
9420 | fputs (DOUBLE_INT_ASM_OP, file); | |
9421 | break; | |
9422 | default: | |
9423 | gcc_unreachable (); | |
9424 | } | |
9425 | output_addr_const (file, x); | |
9426 | if (TARGET_ELF) | |
9427 | fputs ("@dtprel+0x8000", file); | |
9428 | else if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF) | |
9429 | { | |
9430 | switch (SYMBOL_REF_TLS_MODEL (x)) | |
9431 | { | |
9432 | case 0: | |
9433 | break; | |
9434 | case TLS_MODEL_LOCAL_EXEC: | |
9435 | fputs ("@le", file); | |
9436 | break; | |
9437 | case TLS_MODEL_INITIAL_EXEC: | |
9438 | fputs ("@ie", file); | |
9439 | break; | |
9440 | case TLS_MODEL_GLOBAL_DYNAMIC: | |
9441 | case TLS_MODEL_LOCAL_DYNAMIC: | |
9442 | fputs ("@m", file); | |
9443 | break; | |
9444 | default: | |
9445 | gcc_unreachable (); | |
9446 | } | |
9447 | } | |
9448 | } | |
9449 | ||
9450 | /* Return true if X is a symbol that refers to real (rather than emulated) | |
9451 | TLS. */ | |
9452 | ||
9453 | static bool | |
9454 | rs6000_real_tls_symbol_ref_p (rtx x) | |
9455 | { | |
9456 | return (GET_CODE (x) == SYMBOL_REF | |
9457 | && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL); | |
9458 | } | |
9459 | ||
9460 | /* In the name of slightly smaller debug output, and to cater to | |
9461 | general assembler lossage, recognize various UNSPEC sequences | |
9462 | and turn them back into a direct symbol reference. */ | |
9463 | ||
9464 | static rtx | |
9465 | rs6000_delegitimize_address (rtx orig_x) | |
9466 | { | |
9467 | rtx x, y, offset; | |
9468 | ||
9469 | orig_x = delegitimize_mem_from_attrs (orig_x); | |
9470 | x = orig_x; | |
9471 | if (MEM_P (x)) | |
9472 | x = XEXP (x, 0); | |
9473 | ||
9474 | y = x; | |
9475 | if (TARGET_CMODEL != CMODEL_SMALL | |
9476 | && GET_CODE (y) == LO_SUM) | |
9477 | y = XEXP (y, 1); | |
9478 | ||
9479 | offset = NULL_RTX; | |
9480 | if (GET_CODE (y) == PLUS | |
9481 | && GET_MODE (y) == Pmode | |
9482 | && CONST_INT_P (XEXP (y, 1))) | |
9483 | { | |
9484 | offset = XEXP (y, 1); | |
9485 | y = XEXP (y, 0); | |
9486 | } | |
9487 | ||
9488 | if (GET_CODE (y) == UNSPEC | |
9489 | && XINT (y, 1) == UNSPEC_TOCREL) | |
9490 | { | |
9491 | y = XVECEXP (y, 0, 0); | |
9492 | ||
9493 | #ifdef HAVE_AS_TLS | |
9494 | /* Do not associate thread-local symbols with the original | |
9495 | constant pool symbol. */ | |
9496 | if (TARGET_XCOFF | |
9497 | && GET_CODE (y) == SYMBOL_REF | |
9498 | && CONSTANT_POOL_ADDRESS_P (y) | |
9499 | && rs6000_real_tls_symbol_ref_p (get_pool_constant (y))) | |
9500 | return orig_x; | |
9501 | #endif | |
9502 | ||
9503 | if (offset != NULL_RTX) | |
9504 | y = gen_rtx_PLUS (Pmode, y, offset); | |
9505 | if (!MEM_P (orig_x)) | |
9506 | return y; | |
9507 | else | |
9508 | return replace_equiv_address_nv (orig_x, y); | |
9509 | } | |
9510 | ||
9511 | if (TARGET_MACHO | |
9512 | && GET_CODE (orig_x) == LO_SUM | |
9513 | && GET_CODE (XEXP (orig_x, 1)) == CONST) | |
9514 | { | |
9515 | y = XEXP (XEXP (orig_x, 1), 0); | |
9516 | if (GET_CODE (y) == UNSPEC | |
9517 | && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET) | |
9518 | return XVECEXP (y, 0, 0); | |
9519 | } | |
9520 | ||
9521 | return orig_x; | |
9522 | } | |
9523 | ||
9524 | /* Return true if X shouldn't be emitted into the debug info. | |
9525 | The linker doesn't like .toc section references from | |
9526 | .debug_* sections, so reject .toc section symbols. */ | |
9527 | ||
9528 | static bool | |
9529 | rs6000_const_not_ok_for_debug_p (rtx x) | |
9530 | { | |
9531 | if (GET_CODE (x) == SYMBOL_REF | |
9532 | && CONSTANT_POOL_ADDRESS_P (x)) | |
9533 | { | |
9534 | rtx c = get_pool_constant (x); | |
9535 | machine_mode cmode = get_pool_mode (x); | |
9536 | if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode)) | |
9537 | return true; | |
9538 | } | |
9539 | ||
9540 | return false; | |
9541 | } | |
9542 | ||
9543 | ||
9544 | /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */ | |
9545 | ||
9546 | static bool | |
9547 | rs6000_legitimate_combined_insn (rtx_insn *insn) | |
9548 | { | |
9549 | int icode = INSN_CODE (insn); | |
9550 | ||
9551 | /* Reject creating doloop insns. Combine should not be allowed | |
9552 | to create these for a number of reasons: | |
9553 | 1) In a nested loop, if combine creates one of these in an | |
9554 | outer loop and the register allocator happens to allocate ctr | |
9555 | to the outer loop insn, then the inner loop can't use ctr. | |
9556 | Inner loops ought to be more highly optimized. | |
9557 | 2) Combine often wants to create one of these from what was | |
9558 | originally a three insn sequence, first combining the three | |
9559 | insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not | |
9560 | allocated ctr, the splitter takes use back to the three insn | |
9561 | sequence. It's better to stop combine at the two insn | |
9562 | sequence. | |
9563 | 3) Faced with not being able to allocate ctr for ctrsi/crtdi | |
9564 | insns, the register allocator sometimes uses floating point | |
9565 | or vector registers for the pseudo. Since ctrsi/ctrdi is a | |
9566 | jump insn and output reloads are not implemented for jumps, | |
9567 | the ctrsi/ctrdi splitters need to handle all possible cases. | |
9568 | That's a pain, and it gets to be seriously difficult when a | |
9569 | splitter that runs after reload needs memory to transfer from | |
9570 | a gpr to fpr. See PR70098 and PR71763 which are not fixed | |
9571 | for the difficult case. It's better to not create problems | |
9572 | in the first place. */ | |
9573 | if (icode != CODE_FOR_nothing | |
9574 | && (icode == CODE_FOR_ctrsi_internal1 | |
9575 | || icode == CODE_FOR_ctrdi_internal1 | |
9576 | || icode == CODE_FOR_ctrsi_internal2 | |
9577 | || icode == CODE_FOR_ctrdi_internal2 | |
9578 | || icode == CODE_FOR_ctrsi_internal3 | |
9579 | || icode == CODE_FOR_ctrdi_internal3 | |
9580 | || icode == CODE_FOR_ctrsi_internal4 | |
9581 | || icode == CODE_FOR_ctrdi_internal4)) | |
9582 | return false; | |
9583 | ||
9584 | return true; | |
9585 | } | |
9586 | ||
9587 | /* Construct the SYMBOL_REF for the tls_get_addr function. */ | |
9588 | ||
9589 | static GTY(()) rtx rs6000_tls_symbol; | |
9590 | static rtx | |
9591 | rs6000_tls_get_addr (void) | |
9592 | { | |
9593 | if (!rs6000_tls_symbol) | |
9594 | rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr"); | |
9595 | ||
9596 | return rs6000_tls_symbol; | |
9597 | } | |
9598 | ||
9599 | /* Construct the SYMBOL_REF for TLS GOT references. */ | |
9600 | ||
9601 | static GTY(()) rtx rs6000_got_symbol; | |
9602 | static rtx | |
9603 | rs6000_got_sym (void) | |
9604 | { | |
9605 | if (!rs6000_got_symbol) | |
9606 | { | |
9607 | rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); | |
9608 | SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL; | |
9609 | SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL; | |
9610 | } | |
9611 | ||
9612 | return rs6000_got_symbol; | |
9613 | } | |
9614 | ||
9615 | /* AIX Thread-Local Address support. */ | |
9616 | ||
9617 | static rtx | |
9618 | rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model) | |
9619 | { | |
9620 | rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr; | |
9621 | const char *name; | |
9622 | char *tlsname; | |
9623 | ||
9624 | name = XSTR (addr, 0); | |
9625 | /* Append TLS CSECT qualifier, unless the symbol already is qualified | |
9626 | or the symbol will be in TLS private data section. */ | |
9627 | if (name[strlen (name) - 1] != ']' | |
9628 | && (TREE_PUBLIC (SYMBOL_REF_DECL (addr)) | |
9629 | || bss_initializer_p (SYMBOL_REF_DECL (addr)))) | |
9630 | { | |
9631 | tlsname = XALLOCAVEC (char, strlen (name) + 4); | |
9632 | strcpy (tlsname, name); | |
9633 | strcat (tlsname, | |
9634 | bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]"); | |
9635 | tlsaddr = copy_rtx (addr); | |
9636 | XSTR (tlsaddr, 0) = ggc_strdup (tlsname); | |
9637 | } | |
9638 | else | |
9639 | tlsaddr = addr; | |
9640 | ||
9641 | /* Place addr into TOC constant pool. */ | |
9642 | sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr); | |
9643 | ||
9644 | /* Output the TOC entry and create the MEM referencing the value. */ | |
9645 | if (constant_pool_expr_p (XEXP (sym, 0)) | |
9646 | && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode)) | |
9647 | { | |
9648 | tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX); | |
9649 | mem = gen_const_mem (Pmode, tocref); | |
9650 | set_mem_alias_set (mem, get_TOC_alias_set ()); | |
9651 | } | |
9652 | else | |
9653 | return sym; | |
9654 | ||
9655 | /* Use global-dynamic for local-dynamic. */ | |
9656 | if (model == TLS_MODEL_GLOBAL_DYNAMIC | |
9657 | || model == TLS_MODEL_LOCAL_DYNAMIC) | |
9658 | { | |
9659 | /* Create new TOC reference for @m symbol. */ | |
9660 | name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0); | |
9661 | tlsname = XALLOCAVEC (char, strlen (name) + 1); | |
9662 | strcpy (tlsname, "*LCM"); | |
9663 | strcat (tlsname, name + 3); | |
9664 | rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname)); | |
9665 | SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL; | |
9666 | tocref = create_TOC_reference (modaddr, NULL_RTX); | |
9667 | rtx modmem = gen_const_mem (Pmode, tocref); | |
9668 | set_mem_alias_set (modmem, get_TOC_alias_set ()); | |
9669 | ||
9670 | rtx modreg = gen_reg_rtx (Pmode); | |
9671 | emit_insn (gen_rtx_SET (modreg, modmem)); | |
9672 | ||
9673 | tmpreg = gen_reg_rtx (Pmode); | |
9674 | emit_insn (gen_rtx_SET (tmpreg, mem)); | |
9675 | ||
9676 | dest = gen_reg_rtx (Pmode); | |
9677 | if (TARGET_32BIT) | |
9678 | emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg)); | |
9679 | else | |
9680 | emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg)); | |
9681 | return dest; | |
9682 | } | |
9683 | /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */ | |
9684 | else if (TARGET_32BIT) | |
9685 | { | |
9686 | tlsreg = gen_reg_rtx (SImode); | |
9687 | emit_insn (gen_tls_get_tpointer (tlsreg)); | |
9688 | } | |
9689 | else | |
9690 | tlsreg = gen_rtx_REG (DImode, 13); | |
9691 | ||
9692 | /* Load the TOC value into temporary register. */ | |
9693 | tmpreg = gen_reg_rtx (Pmode); | |
9694 | emit_insn (gen_rtx_SET (tmpreg, mem)); | |
9695 | set_unique_reg_note (get_last_insn (), REG_EQUAL, | |
9696 | gen_rtx_MINUS (Pmode, addr, tlsreg)); | |
9697 | ||
9698 | /* Add TOC symbol value to TLS pointer. */ | |
9699 | dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg)); | |
9700 | ||
9701 | return dest; | |
9702 | } | |
9703 | ||
9704 | /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute | |
9705 | this (thread-local) address. */ | |
9706 | ||
9707 | static rtx | |
9708 | rs6000_legitimize_tls_address (rtx addr, enum tls_model model) | |
9709 | { | |
9710 | rtx dest, insn; | |
9711 | ||
9712 | if (TARGET_XCOFF) | |
9713 | return rs6000_legitimize_tls_address_aix (addr, model); | |
9714 | ||
9715 | dest = gen_reg_rtx (Pmode); | |
9716 | if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16) | |
9717 | { | |
9718 | rtx tlsreg; | |
9719 | ||
9720 | if (TARGET_64BIT) | |
9721 | { | |
9722 | tlsreg = gen_rtx_REG (Pmode, 13); | |
9723 | insn = gen_tls_tprel_64 (dest, tlsreg, addr); | |
9724 | } | |
9725 | else | |
9726 | { | |
9727 | tlsreg = gen_rtx_REG (Pmode, 2); | |
9728 | insn = gen_tls_tprel_32 (dest, tlsreg, addr); | |
9729 | } | |
9730 | emit_insn (insn); | |
9731 | } | |
9732 | else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32) | |
9733 | { | |
9734 | rtx tlsreg, tmp; | |
9735 | ||
9736 | tmp = gen_reg_rtx (Pmode); | |
9737 | if (TARGET_64BIT) | |
9738 | { | |
9739 | tlsreg = gen_rtx_REG (Pmode, 13); | |
9740 | insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr); | |
9741 | } | |
9742 | else | |
9743 | { | |
9744 | tlsreg = gen_rtx_REG (Pmode, 2); | |
9745 | insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr); | |
9746 | } | |
9747 | emit_insn (insn); | |
9748 | if (TARGET_64BIT) | |
9749 | insn = gen_tls_tprel_lo_64 (dest, tmp, addr); | |
9750 | else | |
9751 | insn = gen_tls_tprel_lo_32 (dest, tmp, addr); | |
9752 | emit_insn (insn); | |
9753 | } | |
9754 | else | |
9755 | { | |
9756 | rtx r3, got, tga, tmp1, tmp2, call_insn; | |
9757 | ||
9758 | /* We currently use relocations like @got@tlsgd for tls, which | |
9759 | means the linker will handle allocation of tls entries, placing | |
9760 | them in the .got section. So use a pointer to the .got section, | |
9761 | not one to secondary TOC sections used by 64-bit -mminimal-toc, | |
9762 | or to secondary GOT sections used by 32-bit -fPIC. */ | |
9763 | if (TARGET_64BIT) | |
9764 | got = gen_rtx_REG (Pmode, 2); | |
9765 | else | |
9766 | { | |
9767 | if (flag_pic == 1) | |
9768 | got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM); | |
9769 | else | |
9770 | { | |
9771 | rtx gsym = rs6000_got_sym (); | |
9772 | got = gen_reg_rtx (Pmode); | |
9773 | if (flag_pic == 0) | |
9774 | rs6000_emit_move (got, gsym, Pmode); | |
9775 | else | |
9776 | { | |
9777 | rtx mem, lab; | |
9778 | ||
9779 | tmp1 = gen_reg_rtx (Pmode); | |
9780 | tmp2 = gen_reg_rtx (Pmode); | |
9781 | mem = gen_const_mem (Pmode, tmp1); | |
9782 | lab = gen_label_rtx (); | |
9783 | emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab)); | |
9784 | emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO)); | |
9785 | if (TARGET_LINK_STACK) | |
9786 | emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4))); | |
9787 | emit_move_insn (tmp2, mem); | |
9788 | rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2)); | |
9789 | set_unique_reg_note (last, REG_EQUAL, gsym); | |
9790 | } | |
9791 | } | |
9792 | } | |
9793 | ||
9794 | if (model == TLS_MODEL_GLOBAL_DYNAMIC) | |
9795 | { | |
9796 | tga = rs6000_tls_get_addr (); | |
9797 | emit_library_call_value (tga, dest, LCT_CONST, Pmode, | |
9e9e5c15 | 9798 | const0_rtx, Pmode); |
01e91138 | 9799 | |
9800 | r3 = gen_rtx_REG (Pmode, 3); | |
9801 | if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) | |
9802 | { | |
9803 | if (TARGET_64BIT) | |
9804 | insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx); | |
9805 | else | |
9806 | insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx); | |
9807 | } | |
9808 | else if (DEFAULT_ABI == ABI_V4) | |
9809 | insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx); | |
9810 | else | |
9811 | gcc_unreachable (); | |
9812 | call_insn = last_call_insn (); | |
9813 | PATTERN (call_insn) = insn; | |
9814 | if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic) | |
9815 | use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), | |
9816 | pic_offset_table_rtx); | |
9817 | } | |
9818 | else if (model == TLS_MODEL_LOCAL_DYNAMIC) | |
9819 | { | |
9820 | tga = rs6000_tls_get_addr (); | |
9821 | tmp1 = gen_reg_rtx (Pmode); | |
9822 | emit_library_call_value (tga, tmp1, LCT_CONST, Pmode, | |
9e9e5c15 | 9823 | const0_rtx, Pmode); |
01e91138 | 9824 | |
9825 | r3 = gen_rtx_REG (Pmode, 3); | |
9826 | if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) | |
9827 | { | |
9828 | if (TARGET_64BIT) | |
9829 | insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx); | |
9830 | else | |
9831 | insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx); | |
9832 | } | |
9833 | else if (DEFAULT_ABI == ABI_V4) | |
9834 | insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx); | |
9835 | else | |
9836 | gcc_unreachable (); | |
9837 | call_insn = last_call_insn (); | |
9838 | PATTERN (call_insn) = insn; | |
9839 | if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic) | |
9840 | use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), | |
9841 | pic_offset_table_rtx); | |
9842 | ||
9843 | if (rs6000_tls_size == 16) | |
9844 | { | |
9845 | if (TARGET_64BIT) | |
9846 | insn = gen_tls_dtprel_64 (dest, tmp1, addr); | |
9847 | else | |
9848 | insn = gen_tls_dtprel_32 (dest, tmp1, addr); | |
9849 | } | |
9850 | else if (rs6000_tls_size == 32) | |
9851 | { | |
9852 | tmp2 = gen_reg_rtx (Pmode); | |
9853 | if (TARGET_64BIT) | |
9854 | insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr); | |
9855 | else | |
9856 | insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr); | |
9857 | emit_insn (insn); | |
9858 | if (TARGET_64BIT) | |
9859 | insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr); | |
9860 | else | |
9861 | insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr); | |
9862 | } | |
9863 | else | |
9864 | { | |
9865 | tmp2 = gen_reg_rtx (Pmode); | |
9866 | if (TARGET_64BIT) | |
9867 | insn = gen_tls_got_dtprel_64 (tmp2, got, addr); | |
9868 | else | |
9869 | insn = gen_tls_got_dtprel_32 (tmp2, got, addr); | |
9870 | emit_insn (insn); | |
9871 | insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1)); | |
9872 | } | |
9873 | emit_insn (insn); | |
9874 | } | |
9875 | else | |
9876 | { | |
9877 | /* IE, or 64-bit offset LE. */ | |
9878 | tmp2 = gen_reg_rtx (Pmode); | |
9879 | if (TARGET_64BIT) | |
9880 | insn = gen_tls_got_tprel_64 (tmp2, got, addr); | |
9881 | else | |
9882 | insn = gen_tls_got_tprel_32 (tmp2, got, addr); | |
9883 | emit_insn (insn); | |
9884 | if (TARGET_64BIT) | |
9885 | insn = gen_tls_tls_64 (dest, tmp2, addr); | |
9886 | else | |
9887 | insn = gen_tls_tls_32 (dest, tmp2, addr); | |
9888 | emit_insn (insn); | |
9889 | } | |
9890 | } | |
9891 | ||
9892 | return dest; | |
9893 | } | |
9894 | ||
9895 | /* Only create the global variable for the stack protect guard if we are using | |
9896 | the global flavor of that guard. */ | |
9897 | static tree | |
9898 | rs6000_init_stack_protect_guard (void) | |
9899 | { | |
9900 | if (rs6000_stack_protector_guard == SSP_GLOBAL) | |
9901 | return default_stack_protect_guard (); | |
9902 | ||
9903 | return NULL_TREE; | |
9904 | } | |
9905 | ||
9906 | /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ | |
9907 | ||
9908 | static bool | |
9909 | rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) | |
9910 | { | |
9911 | if (GET_CODE (x) == HIGH | |
9912 | && GET_CODE (XEXP (x, 0)) == UNSPEC) | |
9913 | return true; | |
9914 | ||
9915 | /* A TLS symbol in the TOC cannot contain a sum. */ | |
9916 | if (GET_CODE (x) == CONST | |
9917 | && GET_CODE (XEXP (x, 0)) == PLUS | |
9918 | && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF | |
9919 | && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0) | |
9920 | return true; | |
9921 | ||
9922 | /* Do not place an ELF TLS symbol in the constant pool. */ | |
9923 | return TARGET_ELF && tls_referenced_p (x); | |
9924 | } | |
9925 | ||
9926 | /* Return true iff the given SYMBOL_REF refers to a constant pool entry | |
9927 | that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF | |
9928 | can be addressed relative to the toc pointer. */ | |
9929 | ||
9930 | static bool | |
9931 | use_toc_relative_ref (rtx sym, machine_mode mode) | |
9932 | { | |
9933 | return ((constant_pool_expr_p (sym) | |
9934 | && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym), | |
9935 | get_pool_mode (sym))) | |
9936 | || (TARGET_CMODEL == CMODEL_MEDIUM | |
9937 | && SYMBOL_REF_LOCAL_P (sym) | |
9938 | && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT)); | |
9939 | } | |
9940 | ||
9941 | /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to | |
9942 | replace the input X, or the original X if no replacement is called for. | |
9943 | The output parameter *WIN is 1 if the calling macro should goto WIN, | |
9944 | 0 if it should not. | |
9945 | ||
9946 | For RS/6000, we wish to handle large displacements off a base | |
9947 | register by splitting the addend across an addiu/addis and the mem insn. | |
9948 | This cuts number of extra insns needed from 3 to 1. | |
9949 | ||
9950 | On Darwin, we use this to generate code for floating point constants. | |
9951 | A movsf_low is generated so we wind up with 2 instructions rather than 3. | |
9952 | The Darwin code is inside #if TARGET_MACHO because only then are the | |
9953 | machopic_* functions defined. */ | |
9954 | static rtx | |
9955 | rs6000_legitimize_reload_address (rtx x, machine_mode mode, | |
9956 | int opnum, int type, | |
9957 | int ind_levels ATTRIBUTE_UNUSED, int *win) | |
9958 | { | |
9959 | bool reg_offset_p = reg_offset_addressing_ok_p (mode); | |
9960 | bool quad_offset_p = mode_supports_vsx_dform_quad (mode); | |
9961 | ||
9962 | /* Nasty hack for vsx_splat_v2df/v2di load from mem, which takes a | |
9963 | DFmode/DImode MEM. Ditto for ISA 3.0 vsx_splat_v4sf/v4si. */ | |
9964 | if (reg_offset_p | |
9965 | && opnum == 1 | |
9966 | && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode) | |
9967 | || (mode == DImode && recog_data.operand_mode[0] == V2DImode) | |
9968 | || (mode == SFmode && recog_data.operand_mode[0] == V4SFmode | |
9969 | && TARGET_P9_VECTOR) | |
9970 | || (mode == SImode && recog_data.operand_mode[0] == V4SImode | |
9971 | && TARGET_P9_VECTOR))) | |
9972 | reg_offset_p = false; | |
9973 | ||
9974 | /* We must recognize output that we have already generated ourselves. */ | |
9975 | if (GET_CODE (x) == PLUS | |
9976 | && GET_CODE (XEXP (x, 0)) == PLUS | |
9977 | && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG | |
9978 | && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT | |
9979 | && GET_CODE (XEXP (x, 1)) == CONST_INT) | |
9980 | { | |
9981 | if (TARGET_DEBUG_ADDR) | |
9982 | { | |
9983 | fprintf (stderr, "\nlegitimize_reload_address push_reload #1:\n"); | |
9984 | debug_rtx (x); | |
9985 | } | |
9986 | push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, | |
9987 | BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, | |
9988 | opnum, (enum reload_type) type); | |
9989 | *win = 1; | |
9990 | return x; | |
9991 | } | |
9992 | ||
9993 | /* Likewise for (lo_sum (high ...) ...) output we have generated. */ | |
9994 | if (GET_CODE (x) == LO_SUM | |
9995 | && GET_CODE (XEXP (x, 0)) == HIGH) | |
9996 | { | |
9997 | if (TARGET_DEBUG_ADDR) | |
9998 | { | |
9999 | fprintf (stderr, "\nlegitimize_reload_address push_reload #2:\n"); | |
10000 | debug_rtx (x); | |
10001 | } | |
10002 | push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, | |
10003 | BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, | |
10004 | opnum, (enum reload_type) type); | |
10005 | *win = 1; | |
10006 | return x; | |
10007 | } | |
10008 | ||
10009 | #if TARGET_MACHO | |
10010 | if (DEFAULT_ABI == ABI_DARWIN && flag_pic | |
10011 | && GET_CODE (x) == LO_SUM | |
10012 | && GET_CODE (XEXP (x, 0)) == PLUS | |
10013 | && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx | |
10014 | && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH | |
10015 | && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1) | |
10016 | && machopic_operand_p (XEXP (x, 1))) | |
10017 | { | |
10018 | /* Result of previous invocation of this function on Darwin | |
10019 | floating point constant. */ | |
10020 | push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, | |
10021 | BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, | |
10022 | opnum, (enum reload_type) type); | |
10023 | *win = 1; | |
10024 | return x; | |
10025 | } | |
10026 | #endif | |
10027 | ||
10028 | if (TARGET_CMODEL != CMODEL_SMALL | |
10029 | && reg_offset_p | |
10030 | && !quad_offset_p | |
10031 | && small_toc_ref (x, VOIDmode)) | |
10032 | { | |
10033 | rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x)); | |
10034 | x = gen_rtx_LO_SUM (Pmode, hi, x); | |
10035 | if (TARGET_DEBUG_ADDR) | |
10036 | { | |
10037 | fprintf (stderr, "\nlegitimize_reload_address push_reload #3:\n"); | |
10038 | debug_rtx (x); | |
10039 | } | |
10040 | push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, | |
10041 | BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, | |
10042 | opnum, (enum reload_type) type); | |
10043 | *win = 1; | |
10044 | return x; | |
10045 | } | |
10046 | ||
10047 | if (GET_CODE (x) == PLUS | |
10048 | && REG_P (XEXP (x, 0)) | |
10049 | && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER | |
10050 | && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1) | |
10051 | && CONST_INT_P (XEXP (x, 1)) | |
10052 | && reg_offset_p | |
10053 | && !SPE_VECTOR_MODE (mode) | |
10054 | && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD) | |
10055 | && (quad_offset_p || !VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))) | |
10056 | { | |
10057 | HOST_WIDE_INT val = INTVAL (XEXP (x, 1)); | |
10058 | HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000; | |
10059 | HOST_WIDE_INT high | |
10060 | = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000; | |
10061 | ||
10062 | /* Check for 32-bit overflow or quad addresses with one of the | |
10063 | four least significant bits set. */ | |
10064 | if (high + low != val | |
10065 | || (quad_offset_p && (low & 0xf))) | |
10066 | { | |
10067 | *win = 0; | |
10068 | return x; | |
10069 | } | |
10070 | ||
10071 | /* Reload the high part into a base reg; leave the low part | |
10072 | in the mem directly. */ | |
10073 | ||
10074 | x = gen_rtx_PLUS (GET_MODE (x), | |
10075 | gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0), | |
10076 | GEN_INT (high)), | |
10077 | GEN_INT (low)); | |
10078 | ||
10079 | if (TARGET_DEBUG_ADDR) | |
10080 | { | |
10081 | fprintf (stderr, "\nlegitimize_reload_address push_reload #4:\n"); | |
10082 | debug_rtx (x); | |
10083 | } | |
10084 | push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, | |
10085 | BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, | |
10086 | opnum, (enum reload_type) type); | |
10087 | *win = 1; | |
10088 | return x; | |
10089 | } | |
10090 | ||
10091 | if (GET_CODE (x) == SYMBOL_REF | |
10092 | && reg_offset_p | |
10093 | && !quad_offset_p | |
10094 | && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)) | |
10095 | && !SPE_VECTOR_MODE (mode) | |
10096 | #if TARGET_MACHO | |
10097 | && DEFAULT_ABI == ABI_DARWIN | |
10098 | && (flag_pic || MACHO_DYNAMIC_NO_PIC_P) | |
10099 | && machopic_symbol_defined_p (x) | |
10100 | #else | |
10101 | && DEFAULT_ABI == ABI_V4 | |
10102 | && !flag_pic | |
10103 | #endif | |
10104 | /* Don't do this for TFmode or TDmode, since the result isn't offsettable. | |
10105 | The same goes for DImode without 64-bit gprs and DFmode and DDmode | |
10106 | without fprs. | |
10107 | ??? Assume floating point reg based on mode? This assumption is | |
10108 | violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c | |
10109 | where reload ends up doing a DFmode load of a constant from | |
10110 | mem using two gprs. Unfortunately, at this point reload | |
10111 | hasn't yet selected regs so poking around in reload data | |
10112 | won't help and even if we could figure out the regs reliably, | |
10113 | we'd still want to allow this transformation when the mem is | |
10114 | naturally aligned. Since we say the address is good here, we | |
10115 | can't disable offsets from LO_SUMs in mem_operand_gpr. | |
10116 | FIXME: Allow offset from lo_sum for other modes too, when | |
10117 | mem is sufficiently aligned. | |
10118 | ||
10119 | Also disallow this if the type can go in VMX/Altivec registers, since | |
10120 | those registers do not have d-form (reg+offset) address modes. */ | |
10121 | && !reg_addr[mode].scalar_in_vmx_p | |
10122 | && mode != TFmode | |
10123 | && mode != TDmode | |
10124 | && mode != IFmode | |
10125 | && mode != KFmode | |
10126 | && (mode != TImode || !TARGET_VSX_TIMODE) | |
10127 | && mode != PTImode | |
10128 | && (mode != DImode || TARGET_POWERPC64) | |
10129 | && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64 | |
10130 | || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT))) | |
10131 | { | |
10132 | #if TARGET_MACHO | |
10133 | if (flag_pic) | |
10134 | { | |
10135 | rtx offset = machopic_gen_offset (x); | |
10136 | x = gen_rtx_LO_SUM (GET_MODE (x), | |
10137 | gen_rtx_PLUS (Pmode, pic_offset_table_rtx, | |
10138 | gen_rtx_HIGH (Pmode, offset)), offset); | |
10139 | } | |
10140 | else | |
10141 | #endif | |
10142 | x = gen_rtx_LO_SUM (GET_MODE (x), | |
10143 | gen_rtx_HIGH (Pmode, x), x); | |
10144 | ||
10145 | if (TARGET_DEBUG_ADDR) | |
10146 | { | |
10147 | fprintf (stderr, "\nlegitimize_reload_address push_reload #5:\n"); | |
10148 | debug_rtx (x); | |
10149 | } | |
10150 | push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, | |
10151 | BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, | |
10152 | opnum, (enum reload_type) type); | |
10153 | *win = 1; | |
10154 | return x; | |
10155 | } | |
10156 | ||
10157 | /* Reload an offset address wrapped by an AND that represents the | |
10158 | masking of the lower bits. Strip the outer AND and let reload | |
10159 | convert the offset address into an indirect address. For VSX, | |
10160 | force reload to create the address with an AND in a separate | |
10161 | register, because we can't guarantee an altivec register will | |
10162 | be used. */ | |
10163 | if (VECTOR_MEM_ALTIVEC_P (mode) | |
10164 | && GET_CODE (x) == AND | |
10165 | && GET_CODE (XEXP (x, 0)) == PLUS | |
10166 | && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG | |
10167 | && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT | |
10168 | && GET_CODE (XEXP (x, 1)) == CONST_INT | |
10169 | && INTVAL (XEXP (x, 1)) == -16) | |
10170 | { | |
10171 | x = XEXP (x, 0); | |
10172 | *win = 1; | |
10173 | return x; | |
10174 | } | |
10175 | ||
10176 | if (TARGET_TOC | |
10177 | && reg_offset_p | |
10178 | && !quad_offset_p | |
10179 | && GET_CODE (x) == SYMBOL_REF | |
10180 | && use_toc_relative_ref (x, mode)) | |
10181 | { | |
10182 | x = create_TOC_reference (x, NULL_RTX); | |
10183 | if (TARGET_CMODEL != CMODEL_SMALL) | |
10184 | { | |
10185 | if (TARGET_DEBUG_ADDR) | |
10186 | { | |
10187 | fprintf (stderr, "\nlegitimize_reload_address push_reload #6:\n"); | |
10188 | debug_rtx (x); | |
10189 | } | |
10190 | push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, | |
10191 | BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, | |
10192 | opnum, (enum reload_type) type); | |
10193 | } | |
10194 | *win = 1; | |
10195 | return x; | |
10196 | } | |
10197 | *win = 0; | |
10198 | return x; | |
10199 | } | |
10200 | ||
10201 | /* Debug version of rs6000_legitimize_reload_address. */ | |
10202 | static rtx | |
10203 | rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode, | |
10204 | int opnum, int type, | |
10205 | int ind_levels, int *win) | |
10206 | { | |
10207 | rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type, | |
10208 | ind_levels, win); | |
10209 | fprintf (stderr, | |
10210 | "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, " | |
10211 | "type = %d, ind_levels = %d, win = %d, original addr:\n", | |
10212 | GET_MODE_NAME (mode), opnum, type, ind_levels, *win); | |
10213 | debug_rtx (x); | |
10214 | ||
10215 | if (x == ret) | |
10216 | fprintf (stderr, "Same address returned\n"); | |
10217 | else if (!ret) | |
10218 | fprintf (stderr, "NULL returned\n"); | |
10219 | else | |
10220 | { | |
10221 | fprintf (stderr, "New address:\n"); | |
10222 | debug_rtx (ret); | |
10223 | } | |
10224 | ||
10225 | return ret; | |
10226 | } | |
10227 | ||
10228 | /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression | |
10229 | that is a valid memory address for an instruction. | |
10230 | The MODE argument is the machine mode for the MEM expression | |
10231 | that wants to use this address. | |
10232 | ||
10233 | On the RS/6000, there are four valid address: a SYMBOL_REF that | |
10234 | refers to a constant pool entry of an address (or the sum of it | |
10235 | plus a constant), a short (16-bit signed) constant plus a register, | |
10236 | the sum of two registers, or a register indirect, possibly with an | |
10237 | auto-increment. For DFmode, DDmode and DImode with a constant plus | |
10238 | register, we must ensure that both words are addressable or PowerPC64 | |
10239 | with offset word aligned. | |
10240 | ||
10241 | For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs, | |
10242 | 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used | |
10243 | because adjacent memory cells are accessed by adding word-sized offsets | |
10244 | during assembly output. */ | |
10245 | static bool | |
10246 | rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict) | |
10247 | { | |
10248 | bool reg_offset_p = reg_offset_addressing_ok_p (mode); | |
10249 | bool quad_offset_p = mode_supports_vsx_dform_quad (mode); | |
10250 | ||
10251 | /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */ | |
10252 | if (VECTOR_MEM_ALTIVEC_P (mode) | |
10253 | && GET_CODE (x) == AND | |
10254 | && GET_CODE (XEXP (x, 1)) == CONST_INT | |
10255 | && INTVAL (XEXP (x, 1)) == -16) | |
10256 | x = XEXP (x, 0); | |
10257 | ||
10258 | if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x)) | |
10259 | return 0; | |
10260 | if (legitimate_indirect_address_p (x, reg_ok_strict)) | |
10261 | return 1; | |
10262 | if (TARGET_UPDATE | |
10263 | && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC) | |
10264 | && mode_supports_pre_incdec_p (mode) | |
10265 | && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)) | |
10266 | return 1; | |
10267 | /* Handle restricted vector d-form offsets in ISA 3.0. */ | |
10268 | if (quad_offset_p) | |
10269 | { | |
10270 | if (quad_address_p (x, mode, reg_ok_strict)) | |
10271 | return 1; | |
10272 | } | |
10273 | else if (virtual_stack_registers_memory_p (x)) | |
10274 | return 1; | |
10275 | ||
10276 | else if (reg_offset_p) | |
10277 | { | |
10278 | if (legitimate_small_data_p (mode, x)) | |
10279 | return 1; | |
10280 | if (legitimate_constant_pool_address_p (x, mode, | |
10281 | reg_ok_strict || lra_in_progress)) | |
10282 | return 1; | |
10283 | if (reg_addr[mode].fused_toc && GET_CODE (x) == UNSPEC | |
10284 | && XINT (x, 1) == UNSPEC_FUSION_ADDIS) | |
10285 | return 1; | |
10286 | } | |
10287 | ||
10288 | /* For TImode, if we have TImode in VSX registers, only allow register | |
10289 | indirect addresses. This will allow the values to go in either GPRs | |
10290 | or VSX registers without reloading. The vector types would tend to | |
10291 | go into VSX registers, so we allow REG+REG, while TImode seems | |
10292 | somewhat split, in that some uses are GPR based, and some VSX based. */ | |
10293 | /* FIXME: We could loosen this by changing the following to | |
10294 | if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE) | |
10295 | but currently we cannot allow REG+REG addressing for TImode. See | |
10296 | PR72827 for complete details on how this ends up hoodwinking DSE. */ | |
10297 | if (mode == TImode && TARGET_VSX_TIMODE) | |
10298 | return 0; | |
10299 | /* If not REG_OK_STRICT (before reload) let pass any stack offset. */ | |
10300 | if (! reg_ok_strict | |
10301 | && reg_offset_p | |
10302 | && GET_CODE (x) == PLUS | |
10303 | && GET_CODE (XEXP (x, 0)) == REG | |
10304 | && (XEXP (x, 0) == virtual_stack_vars_rtx | |
10305 | || XEXP (x, 0) == arg_pointer_rtx) | |
10306 | && GET_CODE (XEXP (x, 1)) == CONST_INT) | |
10307 | return 1; | |
10308 | if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false)) | |
10309 | return 1; | |
10310 | if (!FLOAT128_2REG_P (mode) | |
10311 | && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) | |
10312 | || TARGET_POWERPC64 | |
10313 | || (mode != DFmode && mode != DDmode) | |
10314 | || (TARGET_E500_DOUBLE && mode != DDmode)) | |
10315 | && (TARGET_POWERPC64 || mode != DImode) | |
10316 | && (mode != TImode || VECTOR_MEM_VSX_P (TImode)) | |
10317 | && mode != PTImode | |
10318 | && !avoiding_indexed_address_p (mode) | |
10319 | && legitimate_indexed_address_p (x, reg_ok_strict)) | |
10320 | return 1; | |
10321 | if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY | |
10322 | && mode_supports_pre_modify_p (mode) | |
10323 | && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict) | |
10324 | && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1), | |
10325 | reg_ok_strict, false) | |
10326 | || (!avoiding_indexed_address_p (mode) | |
10327 | && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict))) | |
10328 | && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0))) | |
10329 | return 1; | |
10330 | if (reg_offset_p && !quad_offset_p | |
10331 | && legitimate_lo_sum_address_p (mode, x, reg_ok_strict)) | |
10332 | return 1; | |
10333 | return 0; | |
10334 | } | |
10335 | ||
10336 | /* Debug version of rs6000_legitimate_address_p. */ | |
10337 | static bool | |
10338 | rs6000_debug_legitimate_address_p (machine_mode mode, rtx x, | |
10339 | bool reg_ok_strict) | |
10340 | { | |
10341 | bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict); | |
10342 | fprintf (stderr, | |
10343 | "\nrs6000_legitimate_address_p: return = %s, mode = %s, " | |
10344 | "strict = %d, reload = %s, code = %s\n", | |
10345 | ret ? "true" : "false", | |
10346 | GET_MODE_NAME (mode), | |
10347 | reg_ok_strict, | |
10348 | (reload_completed | |
10349 | ? "after" | |
10350 | : (reload_in_progress ? "progress" : "before")), | |
10351 | GET_RTX_NAME (GET_CODE (x))); | |
10352 | debug_rtx (x); | |
10353 | ||
10354 | return ret; | |
10355 | } | |
10356 | ||
10357 | /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */ | |
10358 | ||
10359 | static bool | |
10360 | rs6000_mode_dependent_address_p (const_rtx addr, | |
10361 | addr_space_t as ATTRIBUTE_UNUSED) | |
10362 | { | |
10363 | return rs6000_mode_dependent_address_ptr (addr); | |
10364 | } | |
10365 | ||
10366 | /* Go to LABEL if ADDR (a legitimate address expression) | |
10367 | has an effect that depends on the machine mode it is used for. | |
10368 | ||
10369 | On the RS/6000 this is true of all integral offsets (since AltiVec | |
10370 | and VSX modes don't allow them) or is a pre-increment or decrement. | |
10371 | ||
10372 | ??? Except that due to conceptual problems in offsettable_address_p | |
10373 | we can't really report the problems of integral offsets. So leave | |
10374 | this assuming that the adjustable offset must be valid for the | |
10375 | sub-words of a TFmode operand, which is what we had before. */ | |
10376 | ||
10377 | static bool | |
10378 | rs6000_mode_dependent_address (const_rtx addr) | |
10379 | { | |
10380 | switch (GET_CODE (addr)) | |
10381 | { | |
10382 | case PLUS: | |
10383 | /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx | |
10384 | is considered a legitimate address before reload, so there | |
10385 | are no offset restrictions in that case. Note that this | |
10386 | condition is safe in strict mode because any address involving | |
10387 | virtual_stack_vars_rtx or arg_pointer_rtx would already have | |
10388 | been rejected as illegitimate. */ | |
10389 | if (XEXP (addr, 0) != virtual_stack_vars_rtx | |
10390 | && XEXP (addr, 0) != arg_pointer_rtx | |
10391 | && GET_CODE (XEXP (addr, 1)) == CONST_INT) | |
10392 | { | |
10393 | unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1)); | |
10394 | return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12); | |
10395 | } | |
10396 | break; | |
10397 | ||
10398 | case LO_SUM: | |
10399 | /* Anything in the constant pool is sufficiently aligned that | |
10400 | all bytes have the same high part address. */ | |
10401 | return !legitimate_constant_pool_address_p (addr, QImode, false); | |
10402 | ||
10403 | /* Auto-increment cases are now treated generically in recog.c. */ | |
10404 | case PRE_MODIFY: | |
10405 | return TARGET_UPDATE; | |
10406 | ||
10407 | /* AND is only allowed in Altivec loads. */ | |
10408 | case AND: | |
10409 | return true; | |
10410 | ||
10411 | default: | |
10412 | break; | |
10413 | } | |
10414 | ||
10415 | return false; | |
10416 | } | |
10417 | ||
10418 | /* Debug version of rs6000_mode_dependent_address. */ | |
10419 | static bool | |
10420 | rs6000_debug_mode_dependent_address (const_rtx addr) | |
10421 | { | |
10422 | bool ret = rs6000_mode_dependent_address (addr); | |
10423 | ||
10424 | fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n", | |
10425 | ret ? "true" : "false"); | |
10426 | debug_rtx (addr); | |
10427 | ||
10428 | return ret; | |
10429 | } | |
10430 | ||
10431 | /* Implement FIND_BASE_TERM. */ | |
10432 | ||
10433 | rtx | |
10434 | rs6000_find_base_term (rtx op) | |
10435 | { | |
10436 | rtx base; | |
10437 | ||
10438 | base = op; | |
10439 | if (GET_CODE (base) == CONST) | |
10440 | base = XEXP (base, 0); | |
10441 | if (GET_CODE (base) == PLUS) | |
10442 | base = XEXP (base, 0); | |
10443 | if (GET_CODE (base) == UNSPEC) | |
10444 | switch (XINT (base, 1)) | |
10445 | { | |
10446 | case UNSPEC_TOCREL: | |
10447 | case UNSPEC_MACHOPIC_OFFSET: | |
10448 | /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term | |
10449 | for aliasing purposes. */ | |
10450 | return XVECEXP (base, 0, 0); | |
10451 | } | |
10452 | ||
10453 | return op; | |
10454 | } | |
10455 | ||
10456 | /* More elaborate version of recog's offsettable_memref_p predicate | |
10457 | that works around the ??? note of rs6000_mode_dependent_address. | |
10458 | In particular it accepts | |
10459 | ||
10460 | (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8]))) | |
10461 | ||
10462 | in 32-bit mode, that the recog predicate rejects. */ | |
10463 | ||
10464 | static bool | |
10465 | rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode) | |
10466 | { | |
10467 | bool worst_case; | |
10468 | ||
10469 | if (!MEM_P (op)) | |
10470 | return false; | |
10471 | ||
10472 | /* First mimic offsettable_memref_p. */ | |
10473 | if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0))) | |
10474 | return true; | |
10475 | ||
10476 | /* offsettable_address_p invokes rs6000_mode_dependent_address, but | |
10477 | the latter predicate knows nothing about the mode of the memory | |
10478 | reference and, therefore, assumes that it is the largest supported | |
10479 | mode (TFmode). As a consequence, legitimate offsettable memory | |
10480 | references are rejected. rs6000_legitimate_offset_address_p contains | |
10481 | the correct logic for the PLUS case of rs6000_mode_dependent_address, | |
10482 | at least with a little bit of help here given that we know the | |
10483 | actual registers used. */ | |
10484 | worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT) | |
10485 | || GET_MODE_SIZE (reg_mode) == 4); | |
10486 | return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0), | |
10487 | true, worst_case); | |
10488 | } | |
10489 | ||
10490 | /* Determine the reassociation width to be used in reassociate_bb. | |
10491 | This takes into account how many parallel operations we | |
10492 | can actually do of a given type, and also the latency. | |
10493 | P8: | |
10494 | int add/sub 6/cycle | |
10495 | mul 2/cycle | |
10496 | vect add/sub/mul 2/cycle | |
10497 | fp add/sub/mul 2/cycle | |
10498 | dfp 1/cycle | |
10499 | */ | |
10500 | ||
10501 | static int | |
10502 | rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED, | |
582adad1 | 10503 | machine_mode mode) |
01e91138 | 10504 | { |
10505 | switch (rs6000_cpu) | |
10506 | { | |
10507 | case PROCESSOR_POWER8: | |
10508 | case PROCESSOR_POWER9: | |
10509 | if (DECIMAL_FLOAT_MODE_P (mode)) | |
10510 | return 1; | |
10511 | if (VECTOR_MODE_P (mode)) | |
10512 | return 4; | |
10513 | if (INTEGRAL_MODE_P (mode)) | |
10514 | return opc == MULT_EXPR ? 4 : 6; | |
10515 | if (FLOAT_MODE_P (mode)) | |
10516 | return 4; | |
10517 | break; | |
10518 | default: | |
10519 | break; | |
10520 | } | |
10521 | return 1; | |
10522 | } | |
10523 | ||
10524 | /* Change register usage conditional on target flags. */ | |
10525 | static void | |
10526 | rs6000_conditional_register_usage (void) | |
10527 | { | |
10528 | int i; | |
10529 | ||
10530 | if (TARGET_DEBUG_TARGET) | |
10531 | fprintf (stderr, "rs6000_conditional_register_usage called\n"); | |
10532 | ||
10533 | /* Set MQ register fixed (already call_used) so that it will not be | |
10534 | allocated. */ | |
10535 | fixed_regs[64] = 1; | |
10536 | ||
10537 | /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */ | |
10538 | if (TARGET_64BIT) | |
10539 | fixed_regs[13] = call_used_regs[13] | |
10540 | = call_really_used_regs[13] = 1; | |
10541 | ||
10542 | /* Conditionally disable FPRs. */ | |
10543 | if (TARGET_SOFT_FLOAT || !TARGET_FPRS) | |
10544 | for (i = 32; i < 64; i++) | |
10545 | fixed_regs[i] = call_used_regs[i] | |
10546 | = call_really_used_regs[i] = 1; | |
10547 | ||
10548 | /* The TOC register is not killed across calls in a way that is | |
10549 | visible to the compiler. */ | |
10550 | if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) | |
10551 | call_really_used_regs[2] = 0; | |
10552 | ||
10553 | if (DEFAULT_ABI == ABI_V4 && flag_pic == 2) | |
10554 | fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1; | |
10555 | ||
10556 | if (DEFAULT_ABI == ABI_V4 && flag_pic == 1) | |
10557 | fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] | |
10558 | = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] | |
10559 | = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1; | |
10560 | ||
10561 | if (DEFAULT_ABI == ABI_DARWIN && flag_pic) | |
10562 | fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] | |
10563 | = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] | |
10564 | = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1; | |
10565 | ||
10566 | if (TARGET_TOC && TARGET_MINIMAL_TOC) | |
10567 | fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] | |
10568 | = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1; | |
10569 | ||
10570 | if (TARGET_SPE) | |
10571 | { | |
10572 | global_regs[SPEFSCR_REGNO] = 1; | |
10573 | /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit | |
10574 | registers in prologues and epilogues. We no longer use r14 | |
10575 | for FIXED_SCRATCH, but we're keeping r14 out of the allocation | |
10576 | pool for link-compatibility with older versions of GCC. Once | |
10577 | "old" code has died out, we can return r14 to the allocation | |
10578 | pool. */ | |
10579 | fixed_regs[14] | |
10580 | = call_used_regs[14] | |
10581 | = call_really_used_regs[14] = 1; | |
10582 | } | |
10583 | ||
10584 | if (!TARGET_ALTIVEC && !TARGET_VSX) | |
10585 | { | |
10586 | for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i) | |
10587 | fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1; | |
10588 | call_really_used_regs[VRSAVE_REGNO] = 1; | |
10589 | } | |
10590 | ||
10591 | if (TARGET_ALTIVEC || TARGET_VSX) | |
10592 | global_regs[VSCR_REGNO] = 1; | |
10593 | ||
10594 | if (TARGET_ALTIVEC_ABI) | |
10595 | { | |
10596 | for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i) | |
10597 | call_used_regs[i] = call_really_used_regs[i] = 1; | |
10598 | ||
10599 | /* AIX reserves VR20:31 in non-extended ABI mode. */ | |
10600 | if (TARGET_XCOFF) | |
10601 | for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i) | |
10602 | fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1; | |
10603 | } | |
10604 | } | |
10605 | ||
10606 | \f | |
10607 | /* Output insns to set DEST equal to the constant SOURCE as a series of | |
10608 | lis, ori and shl instructions and return TRUE. */ | |
10609 | ||
10610 | bool | |
10611 | rs6000_emit_set_const (rtx dest, rtx source) | |
10612 | { | |
10613 | machine_mode mode = GET_MODE (dest); | |
10614 | rtx temp, set; | |
10615 | rtx_insn *insn; | |
10616 | HOST_WIDE_INT c; | |
10617 | ||
10618 | gcc_checking_assert (CONST_INT_P (source)); | |
10619 | c = INTVAL (source); | |
10620 | switch (mode) | |
10621 | { | |
916ace94 | 10622 | case E_QImode: |
10623 | case E_HImode: | |
01e91138 | 10624 | emit_insn (gen_rtx_SET (dest, source)); |
10625 | return true; | |
10626 | ||
916ace94 | 10627 | case E_SImode: |
01e91138 | 10628 | temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode); |
10629 | ||
10630 | emit_insn (gen_rtx_SET (copy_rtx (temp), | |
10631 | GEN_INT (c & ~(HOST_WIDE_INT) 0xffff))); | |
10632 | emit_insn (gen_rtx_SET (dest, | |
10633 | gen_rtx_IOR (SImode, copy_rtx (temp), | |
10634 | GEN_INT (c & 0xffff)))); | |
10635 | break; | |
10636 | ||
916ace94 | 10637 | case E_DImode: |
01e91138 | 10638 | if (!TARGET_POWERPC64) |
10639 | { | |
10640 | rtx hi, lo; | |
10641 | ||
10642 | hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0, | |
10643 | DImode); | |
10644 | lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0, | |
10645 | DImode); | |
10646 | emit_move_insn (hi, GEN_INT (c >> 32)); | |
10647 | c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000; | |
10648 | emit_move_insn (lo, GEN_INT (c)); | |
10649 | } | |
10650 | else | |
10651 | rs6000_emit_set_long_const (dest, c); | |
10652 | break; | |
10653 | ||
10654 | default: | |
10655 | gcc_unreachable (); | |
10656 | } | |
10657 | ||
10658 | insn = get_last_insn (); | |
10659 | set = single_set (insn); | |
10660 | if (! CONSTANT_P (SET_SRC (set))) | |
10661 | set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c)); | |
10662 | ||
10663 | return true; | |
10664 | } | |
10665 | ||
10666 | /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode. | |
10667 | Output insns to set DEST equal to the constant C as a series of | |
10668 | lis, ori and shl instructions. */ | |
10669 | ||
10670 | static void | |
10671 | rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) | |
10672 | { | |
10673 | rtx temp; | |
10674 | HOST_WIDE_INT ud1, ud2, ud3, ud4; | |
10675 | ||
10676 | ud1 = c & 0xffff; | |
10677 | c = c >> 16; | |
10678 | ud2 = c & 0xffff; | |
10679 | c = c >> 16; | |
10680 | ud3 = c & 0xffff; | |
10681 | c = c >> 16; | |
10682 | ud4 = c & 0xffff; | |
10683 | ||
10684 | if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000)) | |
10685 | || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000))) | |
10686 | emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000)); | |
10687 | ||
10688 | else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000)) | |
10689 | || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000))) | |
10690 | { | |
10691 | temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); | |
10692 | ||
10693 | emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest, | |
10694 | GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000)); | |
10695 | if (ud1 != 0) | |
10696 | emit_move_insn (dest, | |
10697 | gen_rtx_IOR (DImode, copy_rtx (temp), | |
10698 | GEN_INT (ud1))); | |
10699 | } | |
10700 | else if (ud3 == 0 && ud4 == 0) | |
10701 | { | |
10702 | temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); | |
10703 | ||
10704 | gcc_assert (ud2 & 0x8000); | |
10705 | emit_move_insn (copy_rtx (temp), | |
10706 | GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000)); | |
10707 | if (ud1 != 0) | |
10708 | emit_move_insn (copy_rtx (temp), | |
10709 | gen_rtx_IOR (DImode, copy_rtx (temp), | |
10710 | GEN_INT (ud1))); | |
10711 | emit_move_insn (dest, | |
10712 | gen_rtx_ZERO_EXTEND (DImode, | |
10713 | gen_lowpart (SImode, | |
10714 | copy_rtx (temp)))); | |
10715 | } | |
10716 | else if ((ud4 == 0xffff && (ud3 & 0x8000)) | |
10717 | || (ud4 == 0 && ! (ud3 & 0x8000))) | |
10718 | { | |
10719 | temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); | |
10720 | ||
10721 | emit_move_insn (copy_rtx (temp), | |
10722 | GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000)); | |
10723 | if (ud2 != 0) | |
10724 | emit_move_insn (copy_rtx (temp), | |
10725 | gen_rtx_IOR (DImode, copy_rtx (temp), | |
10726 | GEN_INT (ud2))); | |
10727 | emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest, | |
10728 | gen_rtx_ASHIFT (DImode, copy_rtx (temp), | |
10729 | GEN_INT (16))); | |
10730 | if (ud1 != 0) | |
10731 | emit_move_insn (dest, | |
10732 | gen_rtx_IOR (DImode, copy_rtx (temp), | |
10733 | GEN_INT (ud1))); | |
10734 | } | |
10735 | else | |
10736 | { | |
10737 | temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); | |
10738 | ||
10739 | emit_move_insn (copy_rtx (temp), | |
10740 | GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000)); | |
10741 | if (ud3 != 0) | |
10742 | emit_move_insn (copy_rtx (temp), | |
10743 | gen_rtx_IOR (DImode, copy_rtx (temp), | |
10744 | GEN_INT (ud3))); | |
10745 | ||
10746 | emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest, | |
10747 | gen_rtx_ASHIFT (DImode, copy_rtx (temp), | |
10748 | GEN_INT (32))); | |
10749 | if (ud2 != 0) | |
10750 | emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest, | |
10751 | gen_rtx_IOR (DImode, copy_rtx (temp), | |
10752 | GEN_INT (ud2 << 16))); | |
10753 | if (ud1 != 0) | |
10754 | emit_move_insn (dest, | |
10755 | gen_rtx_IOR (DImode, copy_rtx (temp), | |
10756 | GEN_INT (ud1))); | |
10757 | } | |
10758 | } | |
10759 | ||
10760 | /* Helper for the following. Get rid of [r+r] memory refs | |
10761 | in cases where it won't work (TImode, TFmode, TDmode, PTImode). */ | |
10762 | ||
10763 | static void | |
10764 | rs6000_eliminate_indexed_memrefs (rtx operands[2]) | |
10765 | { | |
10766 | if (reload_in_progress) | |
10767 | return; | |
10768 | ||
10769 | if (GET_CODE (operands[0]) == MEM | |
10770 | && GET_CODE (XEXP (operands[0], 0)) != REG | |
10771 | && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0), | |
10772 | GET_MODE (operands[0]), false)) | |
10773 | operands[0] | |
10774 | = replace_equiv_address (operands[0], | |
10775 | copy_addr_to_reg (XEXP (operands[0], 0))); | |
10776 | ||
10777 | if (GET_CODE (operands[1]) == MEM | |
10778 | && GET_CODE (XEXP (operands[1], 0)) != REG | |
10779 | && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0), | |
10780 | GET_MODE (operands[1]), false)) | |
10781 | operands[1] | |
10782 | = replace_equiv_address (operands[1], | |
10783 | copy_addr_to_reg (XEXP (operands[1], 0))); | |
10784 | } | |
10785 | ||
10786 | /* Generate a vector of constants to permute MODE for a little-endian | |
10787 | storage operation by swapping the two halves of a vector. */ | |
10788 | static rtvec | |
10789 | rs6000_const_vec (machine_mode mode) | |
10790 | { | |
10791 | int i, subparts; | |
10792 | rtvec v; | |
10793 | ||
10794 | switch (mode) | |
10795 | { | |
916ace94 | 10796 | case E_V1TImode: |
01e91138 | 10797 | subparts = 1; |
10798 | break; | |
916ace94 | 10799 | case E_V2DFmode: |
10800 | case E_V2DImode: | |
01e91138 | 10801 | subparts = 2; |
10802 | break; | |
916ace94 | 10803 | case E_V4SFmode: |
10804 | case E_V4SImode: | |
01e91138 | 10805 | subparts = 4; |
10806 | break; | |
916ace94 | 10807 | case E_V8HImode: |
01e91138 | 10808 | subparts = 8; |
10809 | break; | |
916ace94 | 10810 | case E_V16QImode: |
01e91138 | 10811 | subparts = 16; |
10812 | break; | |
10813 | default: | |
10814 | gcc_unreachable(); | |
10815 | } | |
10816 | ||
10817 | v = rtvec_alloc (subparts); | |
10818 | ||
10819 | for (i = 0; i < subparts / 2; ++i) | |
10820 | RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2); | |
10821 | for (i = subparts / 2; i < subparts; ++i) | |
10822 | RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2); | |
10823 | ||
10824 | return v; | |
10825 | } | |
10826 | ||
10827 | /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi | |
10828 | for a VSX load or store operation. */ | |
10829 | rtx | |
10830 | rs6000_gen_le_vsx_permute (rtx source, machine_mode mode) | |
10831 | { | |
10832 | /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and | |
10833 | 128-bit integers if they are allowed in VSX registers. */ | |
10834 | if (FLOAT128_VECTOR_P (mode) || mode == TImode || mode == V1TImode) | |
10835 | return gen_rtx_ROTATE (mode, source, GEN_INT (64)); | |
10836 | else | |
10837 | { | |
10838 | rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode)); | |
10839 | return gen_rtx_VEC_SELECT (mode, source, par); | |
10840 | } | |
10841 | } | |
10842 | ||
10843 | /* Emit a little-endian load from vector memory location SOURCE to VSX | |
10844 | register DEST in mode MODE. The load is done with two permuting | |
10845 | insn's that represent an lxvd2x and xxpermdi. */ | |
10846 | void | |
10847 | rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode) | |
10848 | { | |
10849 | rtx tmp, permute_mem, permute_reg; | |
10850 | ||
10851 | /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode, | |
10852 | V1TImode). */ | |
10853 | if (mode == TImode || mode == V1TImode) | |
10854 | { | |
10855 | mode = V2DImode; | |
10856 | dest = gen_lowpart (V2DImode, dest); | |
10857 | source = adjust_address (source, V2DImode, 0); | |
10858 | } | |
10859 | ||
10860 | tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest; | |
10861 | permute_mem = rs6000_gen_le_vsx_permute (source, mode); | |
10862 | permute_reg = rs6000_gen_le_vsx_permute (tmp, mode); | |
10863 | emit_insn (gen_rtx_SET (tmp, permute_mem)); | |
10864 | emit_insn (gen_rtx_SET (dest, permute_reg)); | |
10865 | } | |
10866 | ||
10867 | /* Emit a little-endian store to vector memory location DEST from VSX | |
10868 | register SOURCE in mode MODE. The store is done with two permuting | |
10869 | insn's that represent an xxpermdi and an stxvd2x. */ | |
10870 | void | |
10871 | rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode) | |
10872 | { | |
10873 | rtx tmp, permute_src, permute_tmp; | |
10874 | ||
10875 | /* This should never be called during or after reload, because it does | |
10876 | not re-permute the source register. It is intended only for use | |
10877 | during expand. */ | |
10878 | gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed); | |
10879 | ||
10880 | /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode, | |
10881 | V1TImode). */ | |
10882 | if (mode == TImode || mode == V1TImode) | |
10883 | { | |
10884 | mode = V2DImode; | |
10885 | dest = adjust_address (dest, V2DImode, 0); | |
10886 | source = gen_lowpart (V2DImode, source); | |
10887 | } | |
10888 | ||
10889 | tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source; | |
10890 | permute_src = rs6000_gen_le_vsx_permute (source, mode); | |
10891 | permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode); | |
10892 | emit_insn (gen_rtx_SET (tmp, permute_src)); | |
10893 | emit_insn (gen_rtx_SET (dest, permute_tmp)); | |
10894 | } | |
10895 | ||
10896 | /* Emit a sequence representing a little-endian VSX load or store, | |
10897 | moving data from SOURCE to DEST in mode MODE. This is done | |
10898 | separately from rs6000_emit_move to ensure it is called only | |
10899 | during expand. LE VSX loads and stores introduced later are | |
10900 | handled with a split. The expand-time RTL generation allows | |
10901 | us to optimize away redundant pairs of register-permutes. */ | |
10902 | void | |
10903 | rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode) | |
10904 | { | |
10905 | gcc_assert (!BYTES_BIG_ENDIAN | |
10906 | && VECTOR_MEM_VSX_P (mode) | |
10907 | && !TARGET_P9_VECTOR | |
10908 | && !gpr_or_gpr_p (dest, source) | |
10909 | && (MEM_P (source) ^ MEM_P (dest))); | |
10910 | ||
10911 | if (MEM_P (source)) | |
10912 | { | |
10913 | gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG); | |
10914 | rs6000_emit_le_vsx_load (dest, source, mode); | |
10915 | } | |
10916 | else | |
10917 | { | |
10918 | if (!REG_P (source)) | |
10919 | source = force_reg (mode, source); | |
10920 | rs6000_emit_le_vsx_store (dest, source, mode); | |
10921 | } | |
10922 | } | |
10923 | ||
10924 | /* Return whether a SFmode or SImode move can be done without converting one | |
10925 | mode to another. This arrises when we have: | |
10926 | ||
10927 | (SUBREG:SF (REG:SI ...)) | |
10928 | (SUBREG:SI (REG:SF ...)) | |
10929 | ||
10930 | and one of the values is in a floating point/vector register, where SFmode | |
10931 | scalars are stored in DFmode format. */ | |
10932 | ||
10933 | bool | |
10934 | valid_sf_si_move (rtx dest, rtx src, machine_mode mode) | |
10935 | { | |
10936 | if (TARGET_ALLOW_SF_SUBREG) | |
10937 | return true; | |
10938 | ||
10939 | if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT) | |
10940 | return true; | |
10941 | ||
10942 | if (!SUBREG_P (src) || !sf_subreg_operand (src, mode)) | |
10943 | return true; | |
10944 | ||
10945 | /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */ | |
10946 | if (SUBREG_P (dest)) | |
10947 | { | |
10948 | rtx dest_subreg = SUBREG_REG (dest); | |
10949 | rtx src_subreg = SUBREG_REG (src); | |
10950 | return GET_MODE (dest_subreg) == GET_MODE (src_subreg); | |
10951 | } | |
10952 | ||
10953 | return false; | |
10954 | } | |
10955 | ||
10956 | ||
10957 | /* Helper function to change moves with: | |
10958 | ||
10959 | (SUBREG:SF (REG:SI)) and | |
10960 | (SUBREG:SI (REG:SF)) | |
10961 | ||
10962 | into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode | |
10963 | values are stored as DFmode values in the VSX registers. We need to convert | |
10964 | the bits before we can use a direct move or operate on the bits in the | |
10965 | vector register as an integer type. | |
10966 | ||
10967 | Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */ | |
10968 | ||
10969 | static bool | |
10970 | rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode) | |
10971 | { | |
10972 | if (TARGET_DIRECT_MOVE_64BIT && !reload_in_progress && !reload_completed | |
10973 | && !lra_in_progress | |
10974 | && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode)) | |
10975 | && SUBREG_P (source) && sf_subreg_operand (source, mode)) | |
10976 | { | |
10977 | rtx inner_source = SUBREG_REG (source); | |
10978 | machine_mode inner_mode = GET_MODE (inner_source); | |
10979 | ||
10980 | if (mode == SImode && inner_mode == SFmode) | |
10981 | { | |
10982 | emit_insn (gen_movsi_from_sf (dest, inner_source)); | |
10983 | return true; | |
10984 | } | |
10985 | ||
10986 | if (mode == SFmode && inner_mode == SImode) | |
10987 | { | |
10988 | emit_insn (gen_movsf_from_si (dest, inner_source)); | |
10989 | return true; | |
10990 | } | |
10991 | } | |
10992 | ||
10993 | return false; | |
10994 | } | |
10995 | ||
10996 | /* Emit a move from SOURCE to DEST in mode MODE. */ | |
10997 | void | |
10998 | rs6000_emit_move (rtx dest, rtx source, machine_mode mode) | |
10999 | { | |
11000 | rtx operands[2]; | |
11001 | operands[0] = dest; | |
11002 | operands[1] = source; | |
11003 | ||
11004 | if (TARGET_DEBUG_ADDR) | |
11005 | { | |
11006 | fprintf (stderr, | |
11007 | "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, " | |
11008 | "reload_completed = %d, can_create_pseudos = %d.\ndest:\n", | |
11009 | GET_MODE_NAME (mode), | |
11010 | reload_in_progress, | |
11011 | reload_completed, | |
11012 | can_create_pseudo_p ()); | |
11013 | debug_rtx (dest); | |
11014 | fprintf (stderr, "source:\n"); | |
11015 | debug_rtx (source); | |
11016 | } | |
11017 | ||
11018 | /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */ | |
11019 | if (CONST_WIDE_INT_P (operands[1]) | |
11020 | && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT) | |
11021 | { | |
11022 | /* This should be fixed with the introduction of CONST_WIDE_INT. */ | |
11023 | gcc_unreachable (); | |
11024 | } | |
11025 | ||
11026 | /* See if we need to special case SImode/SFmode SUBREG moves. */ | |
11027 | if ((mode == SImode || mode == SFmode) && SUBREG_P (source) | |
11028 | && rs6000_emit_move_si_sf_subreg (dest, source, mode)) | |
11029 | return; | |
11030 | ||
11031 | /* Check if GCC is setting up a block move that will end up using FP | |
11032 | registers as temporaries. We must make sure this is acceptable. */ | |
11033 | if (GET_CODE (operands[0]) == MEM | |
11034 | && GET_CODE (operands[1]) == MEM | |
11035 | && mode == DImode | |
dfdced85 | 11036 | && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0])) |
11037 | || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1]))) | |
11038 | && ! (rs6000_slow_unaligned_access (SImode, | |
11039 | (MEM_ALIGN (operands[0]) > 32 | |
11040 | ? 32 : MEM_ALIGN (operands[0]))) | |
11041 | || rs6000_slow_unaligned_access (SImode, | |
11042 | (MEM_ALIGN (operands[1]) > 32 | |
11043 | ? 32 : MEM_ALIGN (operands[1])))) | |
01e91138 | 11044 | && ! MEM_VOLATILE_P (operands [0]) |
11045 | && ! MEM_VOLATILE_P (operands [1])) | |
11046 | { | |
11047 | emit_move_insn (adjust_address (operands[0], SImode, 0), | |
11048 | adjust_address (operands[1], SImode, 0)); | |
11049 | emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4), | |
11050 | adjust_address (copy_rtx (operands[1]), SImode, 4)); | |
11051 | return; | |
11052 | } | |
11053 | ||
11054 | if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM | |
11055 | && !gpc_reg_operand (operands[1], mode)) | |
11056 | operands[1] = force_reg (mode, operands[1]); | |
11057 | ||
11058 | /* Recognize the case where operand[1] is a reference to thread-local | |
11059 | data and load its address to a register. */ | |
11060 | if (tls_referenced_p (operands[1])) | |
11061 | { | |
11062 | enum tls_model model; | |
11063 | rtx tmp = operands[1]; | |
11064 | rtx addend = NULL; | |
11065 | ||
11066 | if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS) | |
11067 | { | |
11068 | addend = XEXP (XEXP (tmp, 0), 1); | |
11069 | tmp = XEXP (XEXP (tmp, 0), 0); | |
11070 | } | |
11071 | ||
11072 | gcc_assert (GET_CODE (tmp) == SYMBOL_REF); | |
11073 | model = SYMBOL_REF_TLS_MODEL (tmp); | |
11074 | gcc_assert (model != 0); | |
11075 | ||
11076 | tmp = rs6000_legitimize_tls_address (tmp, model); | |
11077 | if (addend) | |
11078 | { | |
11079 | tmp = gen_rtx_PLUS (mode, tmp, addend); | |
11080 | tmp = force_operand (tmp, operands[0]); | |
11081 | } | |
11082 | operands[1] = tmp; | |
11083 | } | |
11084 | ||
11085 | /* Handle the case where reload calls us with an invalid address. */ | |
11086 | if (reload_in_progress && mode == Pmode | |
11087 | && (! general_operand (operands[1], mode) | |
11088 | || ! nonimmediate_operand (operands[0], mode))) | |
11089 | goto emit_set; | |
11090 | ||
11091 | /* 128-bit constant floating-point values on Darwin should really be loaded | |
11092 | as two parts. However, this premature splitting is a problem when DFmode | |
11093 | values can go into Altivec registers. */ | |
11094 | if (FLOAT128_IBM_P (mode) && !reg_addr[DFmode].scalar_in_vmx_p | |
11095 | && GET_CODE (operands[1]) == CONST_DOUBLE) | |
11096 | { | |
11097 | rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0), | |
11098 | simplify_gen_subreg (DFmode, operands[1], mode, 0), | |
11099 | DFmode); | |
11100 | rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, | |
11101 | GET_MODE_SIZE (DFmode)), | |
11102 | simplify_gen_subreg (DFmode, operands[1], mode, | |
11103 | GET_MODE_SIZE (DFmode)), | |
11104 | DFmode); | |
11105 | return; | |
11106 | } | |
11107 | ||
11108 | if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX) | |
11109 | cfun->machine->sdmode_stack_slot = | |
11110 | eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX); | |
11111 | ||
11112 | ||
11113 | /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD), | |
11114 | p1:SD) if p1 is not of floating point class and p0 is spilled as | |
11115 | we can have no analogous movsd_store for this. */ | |
11116 | if (lra_in_progress && mode == DDmode | |
11117 | && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER | |
11118 | && reg_preferred_class (REGNO (operands[0])) == NO_REGS | |
11119 | && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1])) | |
11120 | && GET_MODE (SUBREG_REG (operands[1])) == SDmode) | |
11121 | { | |
11122 | enum reg_class cl; | |
11123 | int regno = REGNO (SUBREG_REG (operands[1])); | |
11124 | ||
11125 | if (regno >= FIRST_PSEUDO_REGISTER) | |
11126 | { | |
11127 | cl = reg_preferred_class (regno); | |
11128 | regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1]; | |
11129 | } | |
11130 | if (regno >= 0 && ! FP_REGNO_P (regno)) | |
11131 | { | |
11132 | mode = SDmode; | |
11133 | operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]); | |
11134 | operands[1] = SUBREG_REG (operands[1]); | |
11135 | } | |
11136 | } | |
11137 | if (lra_in_progress | |
11138 | && mode == SDmode | |
11139 | && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER | |
11140 | && reg_preferred_class (REGNO (operands[0])) == NO_REGS | |
11141 | && (REG_P (operands[1]) | |
11142 | || (GET_CODE (operands[1]) == SUBREG | |
11143 | && REG_P (SUBREG_REG (operands[1]))))) | |
11144 | { | |
11145 | int regno = REGNO (GET_CODE (operands[1]) == SUBREG | |
11146 | ? SUBREG_REG (operands[1]) : operands[1]); | |
11147 | enum reg_class cl; | |
11148 | ||
11149 | if (regno >= FIRST_PSEUDO_REGISTER) | |
11150 | { | |
11151 | cl = reg_preferred_class (regno); | |
11152 | gcc_assert (cl != NO_REGS); | |
11153 | regno = ira_class_hard_regs[cl][0]; | |
11154 | } | |
11155 | if (FP_REGNO_P (regno)) | |
11156 | { | |
11157 | if (GET_MODE (operands[0]) != DDmode) | |
11158 | operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0); | |
11159 | emit_insn (gen_movsd_store (operands[0], operands[1])); | |
11160 | } | |
11161 | else if (INT_REGNO_P (regno)) | |
11162 | emit_insn (gen_movsd_hardfloat (operands[0], operands[1])); | |
11163 | else | |
11164 | gcc_unreachable(); | |
11165 | return; | |
11166 | } | |
11167 | /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD | |
11168 | p:DD)) if p0 is not of floating point class and p1 is spilled as | |
11169 | we can have no analogous movsd_load for this. */ | |
11170 | if (lra_in_progress && mode == DDmode | |
11171 | && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0])) | |
11172 | && GET_MODE (SUBREG_REG (operands[0])) == SDmode | |
11173 | && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER | |
11174 | && reg_preferred_class (REGNO (operands[1])) == NO_REGS) | |
11175 | { | |
11176 | enum reg_class cl; | |
11177 | int regno = REGNO (SUBREG_REG (operands[0])); | |
11178 | ||
11179 | if (regno >= FIRST_PSEUDO_REGISTER) | |
11180 | { | |
11181 | cl = reg_preferred_class (regno); | |
11182 | regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0]; | |
11183 | } | |
11184 | if (regno >= 0 && ! FP_REGNO_P (regno)) | |
11185 | { | |
11186 | mode = SDmode; | |
11187 | operands[0] = SUBREG_REG (operands[0]); | |
11188 | operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]); | |
11189 | } | |
11190 | } | |
11191 | if (lra_in_progress | |
11192 | && mode == SDmode | |
11193 | && (REG_P (operands[0]) | |
11194 | || (GET_CODE (operands[0]) == SUBREG | |
11195 | && REG_P (SUBREG_REG (operands[0])))) | |
11196 | && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER | |
11197 | && reg_preferred_class (REGNO (operands[1])) == NO_REGS) | |
11198 | { | |
11199 | int regno = REGNO (GET_CODE (operands[0]) == SUBREG | |
11200 | ? SUBREG_REG (operands[0]) : operands[0]); | |
11201 | enum reg_class cl; | |
11202 | ||
11203 | if (regno >= FIRST_PSEUDO_REGISTER) | |
11204 | { | |
11205 | cl = reg_preferred_class (regno); | |
11206 | gcc_assert (cl != NO_REGS); | |
11207 | regno = ira_class_hard_regs[cl][0]; | |
11208 | } | |
11209 | if (FP_REGNO_P (regno)) | |
11210 | { | |
11211 | if (GET_MODE (operands[1]) != DDmode) | |
11212 | operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0); | |
11213 | emit_insn (gen_movsd_load (operands[0], operands[1])); | |
11214 | } | |
11215 | else if (INT_REGNO_P (regno)) | |
11216 | emit_insn (gen_movsd_hardfloat (operands[0], operands[1])); | |
11217 | else | |
11218 | gcc_unreachable(); | |
11219 | return; | |
11220 | } | |
11221 | ||
11222 | if (reload_in_progress | |
11223 | && mode == SDmode | |
11224 | && cfun->machine->sdmode_stack_slot != NULL_RTX | |
11225 | && MEM_P (operands[0]) | |
11226 | && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot) | |
11227 | && REG_P (operands[1])) | |
11228 | { | |
11229 | if (FP_REGNO_P (REGNO (operands[1]))) | |
11230 | { | |
11231 | rtx mem = adjust_address_nv (operands[0], DDmode, 0); | |
11232 | mem = eliminate_regs (mem, VOIDmode, NULL_RTX); | |
11233 | emit_insn (gen_movsd_store (mem, operands[1])); | |
11234 | } | |
11235 | else if (INT_REGNO_P (REGNO (operands[1]))) | |
11236 | { | |
11237 | rtx mem = operands[0]; | |
11238 | if (BYTES_BIG_ENDIAN) | |
11239 | mem = adjust_address_nv (mem, mode, 4); | |
11240 | mem = eliminate_regs (mem, VOIDmode, NULL_RTX); | |
11241 | emit_insn (gen_movsd_hardfloat (mem, operands[1])); | |
11242 | } | |
11243 | else | |
11244 | gcc_unreachable(); | |
11245 | return; | |
11246 | } | |
11247 | if (reload_in_progress | |
11248 | && mode == SDmode | |
11249 | && REG_P (operands[0]) | |
11250 | && MEM_P (operands[1]) | |
11251 | && cfun->machine->sdmode_stack_slot != NULL_RTX | |
11252 | && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot)) | |
11253 | { | |
11254 | if (FP_REGNO_P (REGNO (operands[0]))) | |
11255 | { | |
11256 | rtx mem = adjust_address_nv (operands[1], DDmode, 0); | |
11257 | mem = eliminate_regs (mem, VOIDmode, NULL_RTX); | |
11258 | emit_insn (gen_movsd_load (operands[0], mem)); | |
11259 | } | |
11260 | else if (INT_REGNO_P (REGNO (operands[0]))) | |
11261 | { | |
11262 | rtx mem = operands[1]; | |
11263 | if (BYTES_BIG_ENDIAN) | |
11264 | mem = adjust_address_nv (mem, mode, 4); | |
11265 | mem = eliminate_regs (mem, VOIDmode, NULL_RTX); | |
11266 | emit_insn (gen_movsd_hardfloat (operands[0], mem)); | |
11267 | } | |
11268 | else | |
11269 | gcc_unreachable(); | |
11270 | return; | |
11271 | } | |
11272 | ||
11273 | /* FIXME: In the long term, this switch statement should go away | |
11274 | and be replaced by a sequence of tests based on things like | |
11275 | mode == Pmode. */ | |
11276 | switch (mode) | |
11277 | { | |
916ace94 | 11278 | case E_HImode: |
11279 | case E_QImode: | |
01e91138 | 11280 | if (CONSTANT_P (operands[1]) |
11281 | && GET_CODE (operands[1]) != CONST_INT) | |
11282 | operands[1] = force_const_mem (mode, operands[1]); | |
11283 | break; | |
11284 | ||
916ace94 | 11285 | case E_TFmode: |
11286 | case E_TDmode: | |
11287 | case E_IFmode: | |
11288 | case E_KFmode: | |
01e91138 | 11289 | if (FLOAT128_2REG_P (mode)) |
11290 | rs6000_eliminate_indexed_memrefs (operands); | |
11291 | /* fall through */ | |
11292 | ||
916ace94 | 11293 | case E_DFmode: |
11294 | case E_DDmode: | |
11295 | case E_SFmode: | |
11296 | case E_SDmode: | |
01e91138 | 11297 | if (CONSTANT_P (operands[1]) |
11298 | && ! easy_fp_constant (operands[1], mode)) | |
11299 | operands[1] = force_const_mem (mode, operands[1]); | |
11300 | break; | |
11301 | ||
916ace94 | 11302 | case E_V16QImode: |
11303 | case E_V8HImode: | |
11304 | case E_V4SFmode: | |
11305 | case E_V4SImode: | |
11306 | case E_V4HImode: | |
11307 | case E_V2SFmode: | |
11308 | case E_V2SImode: | |
11309 | case E_V1DImode: | |
11310 | case E_V2DFmode: | |
11311 | case E_V2DImode: | |
11312 | case E_V1TImode: | |
01e91138 | 11313 | if (CONSTANT_P (operands[1]) |
11314 | && !easy_vector_constant (operands[1], mode)) | |
11315 | operands[1] = force_const_mem (mode, operands[1]); | |
11316 | break; | |
11317 | ||
916ace94 | 11318 | case E_SImode: |
11319 | case E_DImode: | |
01e91138 | 11320 | /* Use default pattern for address of ELF small data */ |
11321 | if (TARGET_ELF | |
11322 | && mode == Pmode | |
11323 | && DEFAULT_ABI == ABI_V4 | |
11324 | && (GET_CODE (operands[1]) == SYMBOL_REF | |
11325 | || GET_CODE (operands[1]) == CONST) | |
11326 | && small_data_operand (operands[1], mode)) | |
11327 | { | |
11328 | emit_insn (gen_rtx_SET (operands[0], operands[1])); | |
11329 | return; | |
11330 | } | |
11331 | ||
11332 | if (DEFAULT_ABI == ABI_V4 | |
11333 | && mode == Pmode && mode == SImode | |
11334 | && flag_pic == 1 && got_operand (operands[1], mode)) | |
11335 | { | |
11336 | emit_insn (gen_movsi_got (operands[0], operands[1])); | |
11337 | return; | |
11338 | } | |
11339 | ||
11340 | if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN) | |
11341 | && TARGET_NO_TOC | |
11342 | && ! flag_pic | |
11343 | && mode == Pmode | |
11344 | && CONSTANT_P (operands[1]) | |
11345 | && GET_CODE (operands[1]) != HIGH | |
11346 | && GET_CODE (operands[1]) != CONST_INT) | |
11347 | { | |
11348 | rtx target = (!can_create_pseudo_p () | |
11349 | ? operands[0] | |
11350 | : gen_reg_rtx (mode)); | |
11351 | ||
11352 | /* If this is a function address on -mcall-aixdesc, | |
11353 | convert it to the address of the descriptor. */ | |
11354 | if (DEFAULT_ABI == ABI_AIX | |
11355 | && GET_CODE (operands[1]) == SYMBOL_REF | |
11356 | && XSTR (operands[1], 0)[0] == '.') | |
11357 | { | |
11358 | const char *name = XSTR (operands[1], 0); | |
11359 | rtx new_ref; | |
11360 | while (*name == '.') | |
11361 | name++; | |
11362 | new_ref = gen_rtx_SYMBOL_REF (Pmode, name); | |
11363 | CONSTANT_POOL_ADDRESS_P (new_ref) | |
11364 | = CONSTANT_POOL_ADDRESS_P (operands[1]); | |
11365 | SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]); | |
11366 | SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]); | |
11367 | SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]); | |
11368 | operands[1] = new_ref; | |
11369 | } | |
11370 | ||
11371 | if (DEFAULT_ABI == ABI_DARWIN) | |
11372 | { | |
11373 | #if TARGET_MACHO | |
11374 | if (MACHO_DYNAMIC_NO_PIC_P) | |
11375 | { | |
11376 | /* Take care of any required data indirection. */ | |
11377 | operands[1] = rs6000_machopic_legitimize_pic_address ( | |
11378 | operands[1], mode, operands[0]); | |
11379 | if (operands[0] != operands[1]) | |
11380 | emit_insn (gen_rtx_SET (operands[0], operands[1])); | |
11381 | return; | |
11382 | } | |
11383 | #endif | |
11384 | emit_insn (gen_macho_high (target, operands[1])); | |
11385 | emit_insn (gen_macho_low (operands[0], target, operands[1])); | |
11386 | return; | |
11387 | } | |
11388 | ||
11389 | emit_insn (gen_elf_high (target, operands[1])); | |
11390 | emit_insn (gen_elf_low (operands[0], target, operands[1])); | |
11391 | return; | |
11392 | } | |
11393 | ||
11394 | /* If this is a SYMBOL_REF that refers to a constant pool entry, | |
11395 | and we have put it in the TOC, we just need to make a TOC-relative | |
11396 | reference to it. */ | |
11397 | if (TARGET_TOC | |
11398 | && GET_CODE (operands[1]) == SYMBOL_REF | |
11399 | && use_toc_relative_ref (operands[1], mode)) | |
11400 | operands[1] = create_TOC_reference (operands[1], operands[0]); | |
11401 | else if (mode == Pmode | |
11402 | && CONSTANT_P (operands[1]) | |
11403 | && GET_CODE (operands[1]) != HIGH | |
11404 | && ((GET_CODE (operands[1]) != CONST_INT | |
11405 | && ! easy_fp_constant (operands[1], mode)) | |
11406 | || (GET_CODE (operands[1]) == CONST_INT | |
11407 | && (num_insns_constant (operands[1], mode) | |
11408 | > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2))) | |
11409 | || (GET_CODE (operands[0]) == REG | |
11410 | && FP_REGNO_P (REGNO (operands[0])))) | |
11411 | && !toc_relative_expr_p (operands[1], false) | |
11412 | && (TARGET_CMODEL == CMODEL_SMALL | |
11413 | || can_create_pseudo_p () | |
11414 | || (REG_P (operands[0]) | |
11415 | && INT_REG_OK_FOR_BASE_P (operands[0], true)))) | |
11416 | { | |
11417 | ||
11418 | #if TARGET_MACHO | |
11419 | /* Darwin uses a special PIC legitimizer. */ | |
11420 | if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT) | |
11421 | { | |
11422 | operands[1] = | |
11423 | rs6000_machopic_legitimize_pic_address (operands[1], mode, | |
11424 | operands[0]); | |
11425 | if (operands[0] != operands[1]) | |
11426 | emit_insn (gen_rtx_SET (operands[0], operands[1])); | |
11427 | return; | |
11428 | } | |
11429 | #endif | |
11430 | ||
11431 | /* If we are to limit the number of things we put in the TOC and | |
11432 | this is a symbol plus a constant we can add in one insn, | |
11433 | just put the symbol in the TOC and add the constant. Don't do | |
11434 | this if reload is in progress. */ | |
11435 | if (GET_CODE (operands[1]) == CONST | |
11436 | && TARGET_NO_SUM_IN_TOC && ! reload_in_progress | |
11437 | && GET_CODE (XEXP (operands[1], 0)) == PLUS | |
11438 | && add_operand (XEXP (XEXP (operands[1], 0), 1), mode) | |
11439 | && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF | |
11440 | || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF) | |
11441 | && ! side_effects_p (operands[0])) | |
11442 | { | |
11443 | rtx sym = | |
11444 | force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0)); | |
11445 | rtx other = XEXP (XEXP (operands[1], 0), 1); | |
11446 | ||
11447 | sym = force_reg (mode, sym); | |
11448 | emit_insn (gen_add3_insn (operands[0], sym, other)); | |
11449 | return; | |
11450 | } | |
11451 | ||
11452 | operands[1] = force_const_mem (mode, operands[1]); | |
11453 | ||
11454 | if (TARGET_TOC | |
11455 | && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF | |
11456 | && use_toc_relative_ref (XEXP (operands[1], 0), mode)) | |
11457 | { | |
11458 | rtx tocref = create_TOC_reference (XEXP (operands[1], 0), | |
11459 | operands[0]); | |
11460 | operands[1] = gen_const_mem (mode, tocref); | |
11461 | set_mem_alias_set (operands[1], get_TOC_alias_set ()); | |
11462 | } | |
11463 | } | |
11464 | break; | |
11465 | ||
916ace94 | 11466 | case E_TImode: |
01e91138 | 11467 | if (!VECTOR_MEM_VSX_P (TImode)) |
11468 | rs6000_eliminate_indexed_memrefs (operands); | |
11469 | break; | |
11470 | ||
916ace94 | 11471 | case E_PTImode: |
01e91138 | 11472 | rs6000_eliminate_indexed_memrefs (operands); |
11473 | break; | |
11474 | ||
11475 | default: | |
11476 | fatal_insn ("bad move", gen_rtx_SET (dest, source)); | |
11477 | } | |
11478 | ||
11479 | /* Above, we may have called force_const_mem which may have returned | |
11480 | an invalid address. If we can, fix this up; otherwise, reload will | |
11481 | have to deal with it. */ | |
11482 | if (GET_CODE (operands[1]) == MEM && ! reload_in_progress) | |
11483 | operands[1] = validize_mem (operands[1]); | |
11484 | ||
11485 | emit_set: | |
11486 | emit_insn (gen_rtx_SET (operands[0], operands[1])); | |
11487 | } | |
11488 | ||
11489 | /* Return true if a structure, union or array containing FIELD should be | |
11490 | accessed using `BLKMODE'. | |
11491 | ||
11492 | For the SPE, simd types are V2SI, and gcc can be tempted to put the | |
11493 | entire thing in a DI and use subregs to access the internals. | |
11494 | store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the | |
11495 | back-end. Because a single GPR can hold a V2SI, but not a DI, the | |
11496 | best thing to do is set structs to BLKmode and avoid Severe Tire | |
11497 | Damage. | |
11498 | ||
11499 | On e500 v2, DF and DI modes suffer from the same anomaly. DF can | |
11500 | fit into 1, whereas DI still needs two. */ | |
11501 | ||
11502 | static bool | |
11503 | rs6000_member_type_forces_blk (const_tree field, machine_mode mode) | |
11504 | { | |
11505 | return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE) | |
11506 | || (TARGET_E500_DOUBLE && mode == DFmode)); | |
11507 | } | |
11508 | \f | |
11509 | /* Nonzero if we can use a floating-point register to pass this arg. */ | |
11510 | #define USE_FP_FOR_ARG_P(CUM,MODE) \ | |
11511 | (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \ | |
11512 | && (CUM)->fregno <= FP_ARG_MAX_REG \ | |
11513 | && TARGET_HARD_FLOAT && TARGET_FPRS) | |
11514 | ||
11515 | /* Nonzero if we can use an AltiVec register to pass this arg. */ | |
11516 | #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \ | |
11517 | (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \ | |
11518 | && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \ | |
11519 | && TARGET_ALTIVEC_ABI \ | |
11520 | && (NAMED)) | |
11521 | ||
11522 | /* Walk down the type tree of TYPE counting consecutive base elements. | |
11523 | If *MODEP is VOIDmode, then set it to the first valid floating point | |
11524 | or vector type. If a non-floating point or vector type is found, or | |
11525 | if a floating point or vector type that doesn't match a non-VOIDmode | |
11526 | *MODEP is found, then return -1, otherwise return the count in the | |
11527 | sub-tree. */ | |
11528 | ||
11529 | static int | |
11530 | rs6000_aggregate_candidate (const_tree type, machine_mode *modep) | |
11531 | { | |
11532 | machine_mode mode; | |
11533 | HOST_WIDE_INT size; | |
11534 | ||
11535 | switch (TREE_CODE (type)) | |
11536 | { | |
11537 | case REAL_TYPE: | |
11538 | mode = TYPE_MODE (type); | |
11539 | if (!SCALAR_FLOAT_MODE_P (mode)) | |
11540 | return -1; | |
11541 | ||
11542 | if (*modep == VOIDmode) | |
11543 | *modep = mode; | |
11544 | ||
11545 | if (*modep == mode) | |
11546 | return 1; | |
11547 | ||
11548 | break; | |
11549 | ||
11550 | case COMPLEX_TYPE: | |
11551 | mode = TYPE_MODE (TREE_TYPE (type)); | |
11552 | if (!SCALAR_FLOAT_MODE_P (mode)) | |
11553 | return -1; | |
11554 | ||
11555 | if (*modep == VOIDmode) | |
11556 | *modep = mode; | |
11557 | ||
11558 | if (*modep == mode) | |
11559 | return 2; | |
11560 | ||
11561 | break; | |
11562 | ||
11563 | case VECTOR_TYPE: | |
11564 | if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC) | |
11565 | return -1; | |
11566 | ||
11567 | /* Use V4SImode as representative of all 128-bit vector types. */ | |
11568 | size = int_size_in_bytes (type); | |
11569 | switch (size) | |
11570 | { | |
11571 | case 16: | |
11572 | mode = V4SImode; | |
11573 | break; | |
11574 | default: | |
11575 | return -1; | |
11576 | } | |
11577 | ||
11578 | if (*modep == VOIDmode) | |
11579 | *modep = mode; | |
11580 | ||
11581 | /* Vector modes are considered to be opaque: two vectors are | |
11582 | equivalent for the purposes of being homogeneous aggregates | |
11583 | if they are the same size. */ | |
11584 | if (*modep == mode) | |
11585 | return 1; | |
11586 | ||
11587 | break; | |
11588 | ||
11589 | case ARRAY_TYPE: | |
11590 | { | |
11591 | int count; | |
11592 | tree index = TYPE_DOMAIN (type); | |
11593 | ||
11594 | /* Can't handle incomplete types nor sizes that are not | |
11595 | fixed. */ | |
11596 | if (!COMPLETE_TYPE_P (type) | |
11597 | || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) | |
11598 | return -1; | |
11599 | ||
11600 | count = rs6000_aggregate_candidate (TREE_TYPE (type), modep); | |
11601 | if (count == -1 | |
11602 | || !index | |
11603 | || !TYPE_MAX_VALUE (index) | |
11604 | || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index)) | |
11605 | || !TYPE_MIN_VALUE (index) | |
11606 | || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index)) | |
11607 | || count < 0) | |
11608 | return -1; | |
11609 | ||
11610 | count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index)) | |
11611 | - tree_to_uhwi (TYPE_MIN_VALUE (index))); | |
11612 | ||
11613 | /* There must be no padding. */ | |
11614 | if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep))) | |
11615 | return -1; | |
11616 | ||
11617 | return count; | |
11618 | } | |
11619 | ||
11620 | case RECORD_TYPE: | |
11621 | { | |
11622 | int count = 0; | |
11623 | int sub_count; | |
11624 | tree field; | |
11625 | ||
11626 | /* Can't handle incomplete types nor sizes that are not | |
11627 | fixed. */ | |
11628 | if (!COMPLETE_TYPE_P (type) | |
11629 | || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) | |
11630 | return -1; | |
11631 | ||
11632 | for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) | |
11633 | { | |
11634 | if (TREE_CODE (field) != FIELD_DECL) | |
11635 | continue; | |
11636 | ||
11637 | sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep); | |
11638 | if (sub_count < 0) | |
11639 | return -1; | |
11640 | count += sub_count; | |
11641 | } | |
11642 | ||
11643 | /* There must be no padding. */ | |
11644 | if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep))) | |
11645 | return -1; | |
11646 | ||
11647 | return count; | |
11648 | } | |
11649 | ||
11650 | case UNION_TYPE: | |
11651 | case QUAL_UNION_TYPE: | |
11652 | { | |
11653 | /* These aren't very interesting except in a degenerate case. */ | |
11654 | int count = 0; | |
11655 | int sub_count; | |
11656 | tree field; | |
11657 | ||
11658 | /* Can't handle incomplete types nor sizes that are not | |
11659 | fixed. */ | |
11660 | if (!COMPLETE_TYPE_P (type) | |
11661 | || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) | |
11662 | return -1; | |
11663 | ||
11664 | for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) | |
11665 | { | |
11666 | if (TREE_CODE (field) != FIELD_DECL) | |
11667 | continue; | |
11668 | ||
11669 | sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep); | |
11670 | if (sub_count < 0) | |
11671 | return -1; | |
11672 | count = count > sub_count ? count : sub_count; | |
11673 | } | |
11674 | ||
11675 | /* There must be no padding. */ | |
11676 | if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep))) | |
11677 | return -1; | |
11678 | ||
11679 | return count; | |
11680 | } | |
11681 | ||
11682 | default: | |
11683 | break; | |
11684 | } | |
11685 | ||
11686 | return -1; | |
11687 | } | |
11688 | ||
11689 | /* If an argument, whose type is described by TYPE and MODE, is a homogeneous | |
11690 | float or vector aggregate that shall be passed in FP/vector registers | |
11691 | according to the ELFv2 ABI, return the homogeneous element mode in | |
11692 | *ELT_MODE and the number of elements in *N_ELTS, and return TRUE. | |
11693 | ||
11694 | Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */ | |
11695 | ||
11696 | static bool | |
11697 | rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type, | |
11698 | machine_mode *elt_mode, | |
11699 | int *n_elts) | |
11700 | { | |
11701 | /* Note that we do not accept complex types at the top level as | |
11702 | homogeneous aggregates; these types are handled via the | |
11703 | targetm.calls.split_complex_arg mechanism. Complex types | |
11704 | can be elements of homogeneous aggregates, however. */ | |
11705 | if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type)) | |
11706 | { | |
11707 | machine_mode field_mode = VOIDmode; | |
11708 | int field_count = rs6000_aggregate_candidate (type, &field_mode); | |
11709 | ||
11710 | if (field_count > 0) | |
11711 | { | |
11712 | int n_regs = (SCALAR_FLOAT_MODE_P (field_mode) ? | |
11713 | (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1); | |
11714 | ||
11715 | /* The ELFv2 ABI allows homogeneous aggregates to occupy | |
11716 | up to AGGR_ARG_NUM_REG registers. */ | |
11717 | if (field_count * n_regs <= AGGR_ARG_NUM_REG) | |
11718 | { | |
11719 | if (elt_mode) | |
11720 | *elt_mode = field_mode; | |
11721 | if (n_elts) | |
11722 | *n_elts = field_count; | |
11723 | return true; | |
11724 | } | |
11725 | } | |
11726 | } | |
11727 | ||
11728 | if (elt_mode) | |
11729 | *elt_mode = mode; | |
11730 | if (n_elts) | |
11731 | *n_elts = 1; | |
11732 | return false; | |
11733 | } | |
11734 | ||
11735 | /* Return a nonzero value to say to return the function value in | |
11736 | memory, just as large structures are always returned. TYPE will be | |
11737 | the data type of the value, and FNTYPE will be the type of the | |
11738 | function doing the returning, or @code{NULL} for libcalls. | |
11739 | ||
11740 | The AIX ABI for the RS/6000 specifies that all structures are | |
11741 | returned in memory. The Darwin ABI does the same. | |
11742 | ||
11743 | For the Darwin 64 Bit ABI, a function result can be returned in | |
11744 | registers or in memory, depending on the size of the return data | |
11745 | type. If it is returned in registers, the value occupies the same | |
11746 | registers as it would if it were the first and only function | |
11747 | argument. Otherwise, the function places its result in memory at | |
11748 | the location pointed to by GPR3. | |
11749 | ||
11750 | The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4, | |
11751 | but a draft put them in memory, and GCC used to implement the draft | |
11752 | instead of the final standard. Therefore, aix_struct_return | |
11753 | controls this instead of DEFAULT_ABI; V.4 targets needing backward | |
11754 | compatibility can change DRAFT_V4_STRUCT_RET to override the | |
11755 | default, and -m switches get the final word. See | |
11756 | rs6000_option_override_internal for more details. | |
11757 | ||
11758 | The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit | |
11759 | long double support is enabled. These values are returned in memory. | |
11760 | ||
11761 | int_size_in_bytes returns -1 for variable size objects, which go in | |
11762 | memory always. The cast to unsigned makes -1 > 8. */ | |
11763 | ||
11764 | static bool | |
11765 | rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) | |
11766 | { | |
11767 | /* For the Darwin64 ABI, test if we can fit the return value in regs. */ | |
11768 | if (TARGET_MACHO | |
11769 | && rs6000_darwin64_abi | |
11770 | && TREE_CODE (type) == RECORD_TYPE | |
11771 | && int_size_in_bytes (type) > 0) | |
11772 | { | |
11773 | CUMULATIVE_ARGS valcum; | |
11774 | rtx valret; | |
11775 | ||
11776 | valcum.words = 0; | |
11777 | valcum.fregno = FP_ARG_MIN_REG; | |
11778 | valcum.vregno = ALTIVEC_ARG_MIN_REG; | |
11779 | /* Do a trial code generation as if this were going to be passed | |
11780 | as an argument; if any part goes in memory, we return NULL. */ | |
11781 | valret = rs6000_darwin64_record_arg (&valcum, type, true, true); | |
11782 | if (valret) | |
11783 | return false; | |
11784 | /* Otherwise fall through to more conventional ABI rules. */ | |
11785 | } | |
11786 | ||
11787 | /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */ | |
11788 | if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type, | |
11789 | NULL, NULL)) | |
11790 | return false; | |
11791 | ||
11792 | /* The ELFv2 ABI returns aggregates up to 16B in registers */ | |
11793 | if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type) | |
11794 | && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16) | |
11795 | return false; | |
11796 | ||
11797 | if (AGGREGATE_TYPE_P (type) | |
11798 | && (aix_struct_return | |
11799 | || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8)) | |
11800 | return true; | |
11801 | ||
11802 | /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector | |
11803 | modes only exist for GCC vector types if -maltivec. */ | |
11804 | if (TARGET_32BIT && !TARGET_ALTIVEC_ABI | |
11805 | && ALTIVEC_VECTOR_MODE (TYPE_MODE (type))) | |
11806 | return false; | |
11807 | ||
11808 | /* Return synthetic vectors in memory. */ | |
11809 | if (TREE_CODE (type) == VECTOR_TYPE | |
11810 | && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8)) | |
11811 | { | |
11812 | static bool warned_for_return_big_vectors = false; | |
11813 | if (!warned_for_return_big_vectors) | |
11814 | { | |
11815 | warning (OPT_Wpsabi, "GCC vector returned by reference: " | |
11816 | "non-standard ABI extension with no compatibility guarantee"); | |
11817 | warned_for_return_big_vectors = true; | |
11818 | } | |
11819 | return true; | |
11820 | } | |
11821 | ||
11822 | if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD | |
11823 | && FLOAT128_IEEE_P (TYPE_MODE (type))) | |
11824 | return true; | |
11825 | ||
11826 | return false; | |
11827 | } | |
11828 | ||
11829 | /* Specify whether values returned in registers should be at the most | |
11830 | significant end of a register. We want aggregates returned by | |
11831 | value to match the way aggregates are passed to functions. */ | |
11832 | ||
11833 | static bool | |
11834 | rs6000_return_in_msb (const_tree valtype) | |
11835 | { | |
11836 | return (DEFAULT_ABI == ABI_ELFv2 | |
11837 | && BYTES_BIG_ENDIAN | |
11838 | && AGGREGATE_TYPE_P (valtype) | |
d7ab0e3d | 11839 | && rs6000_function_arg_padding (TYPE_MODE (valtype), |
11840 | valtype) == PAD_UPWARD); | |
01e91138 | 11841 | } |
11842 | ||
11843 | #ifdef HAVE_AS_GNU_ATTRIBUTE | |
11844 | /* Return TRUE if a call to function FNDECL may be one that | |
11845 | potentially affects the function calling ABI of the object file. */ | |
11846 | ||
11847 | static bool | |
11848 | call_ABI_of_interest (tree fndecl) | |
11849 | { | |
11850 | if (rs6000_gnu_attr && symtab->state == EXPANSION) | |
11851 | { | |
11852 | struct cgraph_node *c_node; | |
11853 | ||
11854 | /* Libcalls are always interesting. */ | |
11855 | if (fndecl == NULL_TREE) | |
11856 | return true; | |
11857 | ||
11858 | /* Any call to an external function is interesting. */ | |
11859 | if (DECL_EXTERNAL (fndecl)) | |
11860 | return true; | |
11861 | ||
11862 | /* Interesting functions that we are emitting in this object file. */ | |
11863 | c_node = cgraph_node::get (fndecl); | |
11864 | c_node = c_node->ultimate_alias_target (); | |
11865 | return !c_node->only_called_directly_p (); | |
11866 | } | |
11867 | return false; | |
11868 | } | |
11869 | #endif | |
11870 | ||
11871 | /* Initialize a variable CUM of type CUMULATIVE_ARGS | |
11872 | for a call to a function whose data type is FNTYPE. | |
11873 | For a library call, FNTYPE is 0 and RETURN_MODE the return value mode. | |
11874 | ||
11875 | For incoming args we set the number of arguments in the prototype large | |
11876 | so we never return a PARALLEL. */ | |
11877 | ||
11878 | void | |
11879 | init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype, | |
11880 | rtx libname ATTRIBUTE_UNUSED, int incoming, | |
11881 | int libcall, int n_named_args, | |
11882 | tree fndecl ATTRIBUTE_UNUSED, | |
11883 | machine_mode return_mode ATTRIBUTE_UNUSED) | |
11884 | { | |
11885 | static CUMULATIVE_ARGS zero_cumulative; | |
11886 | ||
11887 | *cum = zero_cumulative; | |
11888 | cum->words = 0; | |
11889 | cum->fregno = FP_ARG_MIN_REG; | |
11890 | cum->vregno = ALTIVEC_ARG_MIN_REG; | |
11891 | cum->prototype = (fntype && prototype_p (fntype)); | |
11892 | cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall) | |
11893 | ? CALL_LIBCALL : CALL_NORMAL); | |
11894 | cum->sysv_gregno = GP_ARG_MIN_REG; | |
11895 | cum->stdarg = stdarg_p (fntype); | |
11896 | cum->libcall = libcall; | |
11897 | ||
11898 | cum->nargs_prototype = 0; | |
11899 | if (incoming || cum->prototype) | |
11900 | cum->nargs_prototype = n_named_args; | |
11901 | ||
11902 | /* Check for a longcall attribute. */ | |
11903 | if ((!fntype && rs6000_default_long_calls) | |
11904 | || (fntype | |
11905 | && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype)) | |
11906 | && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype)))) | |
11907 | cum->call_cookie |= CALL_LONG; | |
11908 | ||
11909 | if (TARGET_DEBUG_ARG) | |
11910 | { | |
11911 | fprintf (stderr, "\ninit_cumulative_args:"); | |
11912 | if (fntype) | |
11913 | { | |
11914 | tree ret_type = TREE_TYPE (fntype); | |
11915 | fprintf (stderr, " ret code = %s,", | |
11916 | get_tree_code_name (TREE_CODE (ret_type))); | |
11917 | } | |
11918 | ||
11919 | if (cum->call_cookie & CALL_LONG) | |
11920 | fprintf (stderr, " longcall,"); | |
11921 | ||
11922 | fprintf (stderr, " proto = %d, nargs = %d\n", | |
11923 | cum->prototype, cum->nargs_prototype); | |
11924 | } | |
11925 | ||
11926 | #ifdef HAVE_AS_GNU_ATTRIBUTE | |
11927 | if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4)) | |
11928 | { | |
11929 | cum->escapes = call_ABI_of_interest (fndecl); | |
11930 | if (cum->escapes) | |
11931 | { | |
11932 | tree return_type; | |
11933 | ||
11934 | if (fntype) | |
11935 | { | |
11936 | return_type = TREE_TYPE (fntype); | |
11937 | return_mode = TYPE_MODE (return_type); | |
11938 | } | |
11939 | else | |
11940 | return_type = lang_hooks.types.type_for_mode (return_mode, 0); | |
11941 | ||
11942 | if (return_type != NULL) | |
11943 | { | |
11944 | if (TREE_CODE (return_type) == RECORD_TYPE | |
11945 | && TYPE_TRANSPARENT_AGGR (return_type)) | |
11946 | { | |
11947 | return_type = TREE_TYPE (first_field (return_type)); | |
11948 | return_mode = TYPE_MODE (return_type); | |
11949 | } | |
11950 | if (AGGREGATE_TYPE_P (return_type) | |
11951 | && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type) | |
11952 | <= 8)) | |
11953 | rs6000_returns_struct = true; | |
11954 | } | |
11955 | if (SCALAR_FLOAT_MODE_P (return_mode)) | |
11956 | { | |
11957 | rs6000_passes_float = true; | |
11958 | if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT) | |
11959 | && (FLOAT128_IBM_P (return_mode) | |
11960 | || FLOAT128_IEEE_P (return_mode) | |
11961 | || (return_type != NULL | |
11962 | && (TYPE_MAIN_VARIANT (return_type) | |
11963 | == long_double_type_node)))) | |
11964 | rs6000_passes_long_double = true; | |
11965 | } | |
11966 | if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode) | |
11967 | || SPE_VECTOR_MODE (return_mode)) | |
11968 | rs6000_passes_vector = true; | |
11969 | } | |
11970 | } | |
11971 | #endif | |
11972 | ||
11973 | if (fntype | |
11974 | && !TARGET_ALTIVEC | |
11975 | && TARGET_ALTIVEC_ABI | |
11976 | && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype)))) | |
11977 | { | |
11978 | error ("cannot return value in vector register because" | |
11979 | " altivec instructions are disabled, use -maltivec" | |
11980 | " to enable them"); | |
11981 | } | |
11982 | } | |
11983 | \f | |
11984 | /* The mode the ABI uses for a word. This is not the same as word_mode | |
11985 | for -m32 -mpowerpc64. This is used to implement various target hooks. */ | |
11986 | ||
f77c4496 | 11987 | static scalar_int_mode |
01e91138 | 11988 | rs6000_abi_word_mode (void) |
11989 | { | |
11990 | return TARGET_32BIT ? SImode : DImode; | |
11991 | } | |
11992 | ||
11993 | /* Implement the TARGET_OFFLOAD_OPTIONS hook. */ | |
11994 | static char * | |
11995 | rs6000_offload_options (void) | |
11996 | { | |
11997 | if (TARGET_64BIT) | |
11998 | return xstrdup ("-foffload-abi=lp64"); | |
11999 | else | |
12000 | return xstrdup ("-foffload-abi=ilp32"); | |
12001 | } | |
12002 | ||
12003 | /* On rs6000, function arguments are promoted, as are function return | |
12004 | values. */ | |
12005 | ||
12006 | static machine_mode | |
12007 | rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED, | |
12008 | machine_mode mode, | |
12009 | int *punsignedp ATTRIBUTE_UNUSED, | |
12010 | const_tree, int) | |
12011 | { | |
12012 | PROMOTE_MODE (mode, *punsignedp, type); | |
12013 | ||
12014 | return mode; | |
12015 | } | |
12016 | ||
12017 | /* Return true if TYPE must be passed on the stack and not in registers. */ | |
12018 | ||
12019 | static bool | |
12020 | rs6000_must_pass_in_stack (machine_mode mode, const_tree type) | |
12021 | { | |
12022 | if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT) | |
12023 | return must_pass_in_stack_var_size (mode, type); | |
12024 | else | |
12025 | return must_pass_in_stack_var_size_or_pad (mode, type); | |
12026 | } | |
12027 | ||
12028 | static inline bool | |
12029 | is_complex_IBM_long_double (machine_mode mode) | |
12030 | { | |
12031 | return mode == ICmode || (!TARGET_IEEEQUAD && mode == TCmode); | |
12032 | } | |
12033 | ||
12034 | /* Whether ABI_V4 passes MODE args to a function in floating point | |
12035 | registers. */ | |
12036 | ||
12037 | static bool | |
12038 | abi_v4_pass_in_fpr (machine_mode mode) | |
12039 | { | |
12040 | if (!TARGET_FPRS || !TARGET_HARD_FLOAT) | |
12041 | return false; | |
12042 | if (TARGET_SINGLE_FLOAT && mode == SFmode) | |
12043 | return true; | |
12044 | if (TARGET_DOUBLE_FLOAT && mode == DFmode) | |
12045 | return true; | |
12046 | /* ABI_V4 passes complex IBM long double in 8 gprs. | |
12047 | Stupid, but we can't change the ABI now. */ | |
12048 | if (is_complex_IBM_long_double (mode)) | |
12049 | return false; | |
12050 | if (FLOAT128_2REG_P (mode)) | |
12051 | return true; | |
12052 | if (DECIMAL_FLOAT_MODE_P (mode)) | |
12053 | return true; | |
12054 | return false; | |
12055 | } | |
12056 | ||
d7ab0e3d | 12057 | /* Implement TARGET_FUNCTION_ARG_PADDING |
01e91138 | 12058 | |
12059 | For the AIX ABI structs are always stored left shifted in their | |
12060 | argument slot. */ | |
12061 | ||
d7ab0e3d | 12062 | static pad_direction |
12063 | rs6000_function_arg_padding (machine_mode mode, const_tree type) | |
01e91138 | 12064 | { |
12065 | #ifndef AGGREGATE_PADDING_FIXED | |
12066 | #define AGGREGATE_PADDING_FIXED 0 | |
12067 | #endif | |
12068 | #ifndef AGGREGATES_PAD_UPWARD_ALWAYS | |
12069 | #define AGGREGATES_PAD_UPWARD_ALWAYS 0 | |
12070 | #endif | |
12071 | ||
12072 | if (!AGGREGATE_PADDING_FIXED) | |
12073 | { | |
12074 | /* GCC used to pass structures of the same size as integer types as | |
d7ab0e3d | 12075 | if they were in fact integers, ignoring TARGET_FUNCTION_ARG_PADDING. |
01e91138 | 12076 | i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were |
12077 | passed padded downward, except that -mstrict-align further | |
12078 | muddied the water in that multi-component structures of 2 and 4 | |
12079 | bytes in size were passed padded upward. | |
12080 | ||
12081 | The following arranges for best compatibility with previous | |
12082 | versions of gcc, but removes the -mstrict-align dependency. */ | |
12083 | if (BYTES_BIG_ENDIAN) | |
12084 | { | |
12085 | HOST_WIDE_INT size = 0; | |
12086 | ||
12087 | if (mode == BLKmode) | |
12088 | { | |
12089 | if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST) | |
12090 | size = int_size_in_bytes (type); | |
12091 | } | |
12092 | else | |
12093 | size = GET_MODE_SIZE (mode); | |
12094 | ||
12095 | if (size == 1 || size == 2 || size == 4) | |
d7ab0e3d | 12096 | return PAD_DOWNWARD; |
01e91138 | 12097 | } |
d7ab0e3d | 12098 | return PAD_UPWARD; |
01e91138 | 12099 | } |
12100 | ||
12101 | if (AGGREGATES_PAD_UPWARD_ALWAYS) | |
12102 | { | |
12103 | if (type != 0 && AGGREGATE_TYPE_P (type)) | |
d7ab0e3d | 12104 | return PAD_UPWARD; |
01e91138 | 12105 | } |
12106 | ||
12107 | /* Fall back to the default. */ | |
d7ab0e3d | 12108 | return default_function_arg_padding (mode, type); |
01e91138 | 12109 | } |
12110 | ||
12111 | /* If defined, a C expression that gives the alignment boundary, in bits, | |
12112 | of an argument with the specified mode and type. If it is not defined, | |
12113 | PARM_BOUNDARY is used for all arguments. | |
12114 | ||
12115 | V.4 wants long longs and doubles to be double word aligned. Just | |
12116 | testing the mode size is a boneheaded way to do this as it means | |
12117 | that other types such as complex int are also double word aligned. | |
12118 | However, we're stuck with this because changing the ABI might break | |
12119 | existing library interfaces. | |
12120 | ||
12121 | Doubleword align SPE vectors. | |
12122 | Quadword align Altivec/VSX vectors. | |
12123 | Quadword align large synthetic vector types. */ | |
12124 | ||
12125 | static unsigned int | |
12126 | rs6000_function_arg_boundary (machine_mode mode, const_tree type) | |
12127 | { | |
12128 | machine_mode elt_mode; | |
12129 | int n_elts; | |
12130 | ||
12131 | rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts); | |
12132 | ||
12133 | if (DEFAULT_ABI == ABI_V4 | |
12134 | && (GET_MODE_SIZE (mode) == 8 | |
12135 | || (TARGET_HARD_FLOAT | |
12136 | && TARGET_FPRS | |
12137 | && !is_complex_IBM_long_double (mode) | |
12138 | && FLOAT128_2REG_P (mode)))) | |
12139 | return 64; | |
12140 | else if (FLOAT128_VECTOR_P (mode)) | |
12141 | return 128; | |
12142 | else if (SPE_VECTOR_MODE (mode) | |
12143 | || (type && TREE_CODE (type) == VECTOR_TYPE | |
12144 | && int_size_in_bytes (type) >= 8 | |
12145 | && int_size_in_bytes (type) < 16)) | |
12146 | return 64; | |
12147 | else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode) | |
12148 | || (type && TREE_CODE (type) == VECTOR_TYPE | |
12149 | && int_size_in_bytes (type) >= 16)) | |
12150 | return 128; | |
12151 | ||
12152 | /* Aggregate types that need > 8 byte alignment are quadword-aligned | |
12153 | in the parameter area in the ELFv2 ABI, and in the AIX ABI unless | |
12154 | -mcompat-align-parm is used. */ | |
12155 | if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm) | |
12156 | || DEFAULT_ABI == ABI_ELFv2) | |
12157 | && type && TYPE_ALIGN (type) > 64) | |
12158 | { | |
12159 | /* "Aggregate" means any AGGREGATE_TYPE except for single-element | |
12160 | or homogeneous float/vector aggregates here. We already handled | |
12161 | vector aggregates above, but still need to check for float here. */ | |
12162 | bool aggregate_p = (AGGREGATE_TYPE_P (type) | |
12163 | && !SCALAR_FLOAT_MODE_P (elt_mode)); | |
12164 | ||
12165 | /* We used to check for BLKmode instead of the above aggregate type | |
12166 | check. Warn when this results in any difference to the ABI. */ | |
12167 | if (aggregate_p != (mode == BLKmode)) | |
12168 | { | |
12169 | static bool warned; | |
12170 | if (!warned && warn_psabi) | |
12171 | { | |
12172 | warned = true; | |
12173 | inform (input_location, | |
12174 | "the ABI of passing aggregates with %d-byte alignment" | |
12175 | " has changed in GCC 5", | |
12176 | (int) TYPE_ALIGN (type) / BITS_PER_UNIT); | |
12177 | } | |
12178 | } | |
12179 | ||
12180 | if (aggregate_p) | |
12181 | return 128; | |
12182 | } | |
12183 | ||
12184 | /* Similar for the Darwin64 ABI. Note that for historical reasons we | |
12185 | implement the "aggregate type" check as a BLKmode check here; this | |
12186 | means certain aggregate types are in fact not aligned. */ | |
12187 | if (TARGET_MACHO && rs6000_darwin64_abi | |
12188 | && mode == BLKmode | |
12189 | && type && TYPE_ALIGN (type) > 64) | |
12190 | return 128; | |
12191 | ||
12192 | return PARM_BOUNDARY; | |
12193 | } | |
12194 | ||
12195 | /* The offset in words to the start of the parameter save area. */ | |
12196 | ||
12197 | static unsigned int | |
12198 | rs6000_parm_offset (void) | |
12199 | { | |
12200 | return (DEFAULT_ABI == ABI_V4 ? 2 | |
12201 | : DEFAULT_ABI == ABI_ELFv2 ? 4 | |
12202 | : 6); | |
12203 | } | |
12204 | ||
12205 | /* For a function parm of MODE and TYPE, return the starting word in | |
12206 | the parameter area. NWORDS of the parameter area are already used. */ | |
12207 | ||
12208 | static unsigned int | |
12209 | rs6000_parm_start (machine_mode mode, const_tree type, | |
12210 | unsigned int nwords) | |
12211 | { | |
12212 | unsigned int align; | |
12213 | ||
12214 | align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1; | |
12215 | return nwords + (-(rs6000_parm_offset () + nwords) & align); | |
12216 | } | |
12217 | ||
12218 | /* Compute the size (in words) of a function argument. */ | |
12219 | ||
12220 | static unsigned long | |
12221 | rs6000_arg_size (machine_mode mode, const_tree type) | |
12222 | { | |
12223 | unsigned long size; | |
12224 | ||
12225 | if (mode != BLKmode) | |
12226 | size = GET_MODE_SIZE (mode); | |
12227 | else | |
12228 | size = int_size_in_bytes (type); | |
12229 | ||
12230 | if (TARGET_32BIT) | |
12231 | return (size + 3) >> 2; | |
12232 | else | |
12233 | return (size + 7) >> 3; | |
12234 | } | |
12235 | \f | |
12236 | /* Use this to flush pending int fields. */ | |
12237 | ||
12238 | static void | |
12239 | rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum, | |
12240 | HOST_WIDE_INT bitpos, int final) | |
12241 | { | |
12242 | unsigned int startbit, endbit; | |
12243 | int intregs, intoffset; | |
01e91138 | 12244 | |
12245 | /* Handle the situations where a float is taking up the first half | |
12246 | of the GPR, and the other half is empty (typically due to | |
12247 | alignment restrictions). We can detect this by a 8-byte-aligned | |
12248 | int field, or by seeing that this is the final flush for this | |
12249 | argument. Count the word and continue on. */ | |
12250 | if (cum->floats_in_gpr == 1 | |
12251 | && (cum->intoffset % 64 == 0 | |
12252 | || (cum->intoffset == -1 && final))) | |
12253 | { | |
12254 | cum->words++; | |
12255 | cum->floats_in_gpr = 0; | |
12256 | } | |
12257 | ||
12258 | if (cum->intoffset == -1) | |
12259 | return; | |
12260 | ||
12261 | intoffset = cum->intoffset; | |
12262 | cum->intoffset = -1; | |
12263 | cum->floats_in_gpr = 0; | |
12264 | ||
12265 | if (intoffset % BITS_PER_WORD != 0) | |
12266 | { | |
517be012 | 12267 | unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD; |
12268 | if (!int_mode_for_size (bits, 0).exists ()) | |
01e91138 | 12269 | { |
12270 | /* We couldn't find an appropriate mode, which happens, | |
12271 | e.g., in packed structs when there are 3 bytes to load. | |
12272 | Back intoffset back to the beginning of the word in this | |
12273 | case. */ | |
12274 | intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD); | |
12275 | } | |
12276 | } | |
12277 | ||
12278 | startbit = ROUND_DOWN (intoffset, BITS_PER_WORD); | |
12279 | endbit = ROUND_UP (bitpos, BITS_PER_WORD); | |
12280 | intregs = (endbit - startbit) / BITS_PER_WORD; | |
12281 | cum->words += intregs; | |
12282 | /* words should be unsigned. */ | |
12283 | if ((unsigned)cum->words < (endbit/BITS_PER_WORD)) | |
12284 | { | |
12285 | int pad = (endbit/BITS_PER_WORD) - cum->words; | |
12286 | cum->words += pad; | |
12287 | } | |
12288 | } | |
12289 | ||
12290 | /* The darwin64 ABI calls for us to recurse down through structs, | |
12291 | looking for elements passed in registers. Unfortunately, we have | |
12292 | to track int register count here also because of misalignments | |
12293 | in powerpc alignment mode. */ | |
12294 | ||
12295 | static void | |
12296 | rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum, | |
12297 | const_tree type, | |
12298 | HOST_WIDE_INT startbitpos) | |
12299 | { | |
12300 | tree f; | |
12301 | ||
12302 | for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f)) | |
12303 | if (TREE_CODE (f) == FIELD_DECL) | |
12304 | { | |
12305 | HOST_WIDE_INT bitpos = startbitpos; | |
12306 | tree ftype = TREE_TYPE (f); | |
12307 | machine_mode mode; | |
12308 | if (ftype == error_mark_node) | |
12309 | continue; | |
12310 | mode = TYPE_MODE (ftype); | |
12311 | ||
12312 | if (DECL_SIZE (f) != 0 | |
12313 | && tree_fits_uhwi_p (bit_position (f))) | |
12314 | bitpos += int_bit_position (f); | |
12315 | ||
12316 | /* ??? FIXME: else assume zero offset. */ | |
12317 | ||
12318 | if (TREE_CODE (ftype) == RECORD_TYPE) | |
12319 | rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos); | |
12320 | else if (USE_FP_FOR_ARG_P (cum, mode)) | |
12321 | { | |
12322 | unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3; | |
12323 | rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0); | |
12324 | cum->fregno += n_fpregs; | |
12325 | /* Single-precision floats present a special problem for | |
12326 | us, because they are smaller than an 8-byte GPR, and so | |
12327 | the structure-packing rules combined with the standard | |
12328 | varargs behavior mean that we want to pack float/float | |
12329 | and float/int combinations into a single register's | |
12330 | space. This is complicated by the arg advance flushing, | |
12331 | which works on arbitrarily large groups of int-type | |
12332 | fields. */ | |
12333 | if (mode == SFmode) | |
12334 | { | |
12335 | if (cum->floats_in_gpr == 1) | |
12336 | { | |
12337 | /* Two floats in a word; count the word and reset | |
12338 | the float count. */ | |
12339 | cum->words++; | |
12340 | cum->floats_in_gpr = 0; | |
12341 | } | |
12342 | else if (bitpos % 64 == 0) | |
12343 | { | |
12344 | /* A float at the beginning of an 8-byte word; | |
12345 | count it and put off adjusting cum->words until | |
12346 | we see if a arg advance flush is going to do it | |
12347 | for us. */ | |
12348 | cum->floats_in_gpr++; | |
12349 | } | |
12350 | else | |
12351 | { | |
12352 | /* The float is at the end of a word, preceded | |
12353 | by integer fields, so the arg advance flush | |
12354 | just above has already set cum->words and | |
12355 | everything is taken care of. */ | |
12356 | } | |
12357 | } | |
12358 | else | |
12359 | cum->words += n_fpregs; | |
12360 | } | |
12361 | else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1)) | |
12362 | { | |
12363 | rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0); | |
12364 | cum->vregno++; | |
12365 | cum->words += 2; | |
12366 | } | |
12367 | else if (cum->intoffset == -1) | |
12368 | cum->intoffset = bitpos; | |
12369 | } | |
12370 | } | |
12371 | ||
12372 | /* Check for an item that needs to be considered specially under the darwin 64 | |
12373 | bit ABI. These are record types where the mode is BLK or the structure is | |
12374 | 8 bytes in size. */ | |
12375 | static int | |
12376 | rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type) | |
12377 | { | |
12378 | return rs6000_darwin64_abi | |
12379 | && ((mode == BLKmode | |
12380 | && TREE_CODE (type) == RECORD_TYPE | |
12381 | && int_size_in_bytes (type) > 0) | |
12382 | || (type && TREE_CODE (type) == RECORD_TYPE | |
12383 | && int_size_in_bytes (type) == 8)) ? 1 : 0; | |
12384 | } | |
12385 | ||
12386 | /* Update the data in CUM to advance over an argument | |
12387 | of mode MODE and data type TYPE. | |
12388 | (TYPE is null for libcalls where that information may not be available.) | |
12389 | ||
12390 | Note that for args passed by reference, function_arg will be called | |
12391 | with MODE and TYPE set to that of the pointer to the arg, not the arg | |
12392 | itself. */ | |
12393 | ||
12394 | static void | |
12395 | rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode, | |
12396 | const_tree type, bool named, int depth) | |
12397 | { | |
12398 | machine_mode elt_mode; | |
12399 | int n_elts; | |
12400 | ||
12401 | rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts); | |
12402 | ||
12403 | /* Only tick off an argument if we're not recursing. */ | |
12404 | if (depth == 0) | |
12405 | cum->nargs_prototype--; | |
12406 | ||
12407 | #ifdef HAVE_AS_GNU_ATTRIBUTE | |
12408 | if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4) | |
12409 | && cum->escapes) | |
12410 | { | |
12411 | if (SCALAR_FLOAT_MODE_P (mode)) | |
12412 | { | |
12413 | rs6000_passes_float = true; | |
12414 | if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT) | |
12415 | && (FLOAT128_IBM_P (mode) | |
12416 | || FLOAT128_IEEE_P (mode) | |
12417 | || (type != NULL | |
12418 | && TYPE_MAIN_VARIANT (type) == long_double_type_node))) | |
12419 | rs6000_passes_long_double = true; | |
12420 | } | |
12421 | if ((named && ALTIVEC_OR_VSX_VECTOR_MODE (mode)) | |
12422 | || (SPE_VECTOR_MODE (mode) | |
12423 | && !cum->stdarg | |
12424 | && cum->sysv_gregno <= GP_ARG_MAX_REG)) | |
12425 | rs6000_passes_vector = true; | |
12426 | } | |
12427 | #endif | |
12428 | ||
12429 | if (TARGET_ALTIVEC_ABI | |
12430 | && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode) | |
12431 | || (type && TREE_CODE (type) == VECTOR_TYPE | |
12432 | && int_size_in_bytes (type) == 16))) | |
12433 | { | |
12434 | bool stack = false; | |
12435 | ||
12436 | if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named)) | |
12437 | { | |
12438 | cum->vregno += n_elts; | |
12439 | ||
12440 | if (!TARGET_ALTIVEC) | |
12441 | error ("cannot pass argument in vector register because" | |
12442 | " altivec instructions are disabled, use -maltivec" | |
12443 | " to enable them"); | |
12444 | ||
12445 | /* PowerPC64 Linux and AIX allocate GPRs for a vector argument | |
12446 | even if it is going to be passed in a vector register. | |
12447 | Darwin does the same for variable-argument functions. */ | |
12448 | if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) | |
12449 | && TARGET_64BIT) | |
12450 | || (cum->stdarg && DEFAULT_ABI != ABI_V4)) | |
12451 | stack = true; | |
12452 | } | |
12453 | else | |
12454 | stack = true; | |
12455 | ||
12456 | if (stack) | |
12457 | { | |
12458 | int align; | |
12459 | ||
12460 | /* Vector parameters must be 16-byte aligned. In 32-bit | |
12461 | mode this means we need to take into account the offset | |
12462 | to the parameter save area. In 64-bit mode, they just | |
12463 | have to start on an even word, since the parameter save | |
12464 | area is 16-byte aligned. */ | |
12465 | if (TARGET_32BIT) | |
12466 | align = -(rs6000_parm_offset () + cum->words) & 3; | |
12467 | else | |
12468 | align = cum->words & 1; | |
12469 | cum->words += align + rs6000_arg_size (mode, type); | |
12470 | ||
12471 | if (TARGET_DEBUG_ARG) | |
12472 | { | |
12473 | fprintf (stderr, "function_adv: words = %2d, align=%d, ", | |
12474 | cum->words, align); | |
12475 | fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n", | |
12476 | cum->nargs_prototype, cum->prototype, | |
12477 | GET_MODE_NAME (mode)); | |
12478 | } | |
12479 | } | |
12480 | } | |
12481 | else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode) | |
12482 | && !cum->stdarg | |
12483 | && cum->sysv_gregno <= GP_ARG_MAX_REG) | |
12484 | cum->sysv_gregno++; | |
12485 | ||
12486 | else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type)) | |
12487 | { | |
12488 | int size = int_size_in_bytes (type); | |
12489 | /* Variable sized types have size == -1 and are | |
12490 | treated as if consisting entirely of ints. | |
12491 | Pad to 16 byte boundary if needed. */ | |
12492 | if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD | |
12493 | && (cum->words % 2) != 0) | |
12494 | cum->words++; | |
12495 | /* For varargs, we can just go up by the size of the struct. */ | |
12496 | if (!named) | |
12497 | cum->words += (size + 7) / 8; | |
12498 | else | |
12499 | { | |
12500 | /* It is tempting to say int register count just goes up by | |
12501 | sizeof(type)/8, but this is wrong in a case such as | |
12502 | { int; double; int; } [powerpc alignment]. We have to | |
12503 | grovel through the fields for these too. */ | |
12504 | cum->intoffset = 0; | |
12505 | cum->floats_in_gpr = 0; | |
12506 | rs6000_darwin64_record_arg_advance_recurse (cum, type, 0); | |
12507 | rs6000_darwin64_record_arg_advance_flush (cum, | |
12508 | size * BITS_PER_UNIT, 1); | |
12509 | } | |
12510 | if (TARGET_DEBUG_ARG) | |
12511 | { | |
12512 | fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d", | |
12513 | cum->words, TYPE_ALIGN (type), size); | |
12514 | fprintf (stderr, | |
12515 | "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n", | |
12516 | cum->nargs_prototype, cum->prototype, | |
12517 | GET_MODE_NAME (mode)); | |
12518 | } | |
12519 | } | |
12520 | else if (DEFAULT_ABI == ABI_V4) | |
12521 | { | |
12522 | if (abi_v4_pass_in_fpr (mode)) | |
12523 | { | |
12524 | /* _Decimal128 must use an even/odd register pair. This assumes | |
12525 | that the register number is odd when fregno is odd. */ | |
12526 | if (mode == TDmode && (cum->fregno % 2) == 1) | |
12527 | cum->fregno++; | |
12528 | ||
12529 | if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0) | |
12530 | <= FP_ARG_V4_MAX_REG) | |
12531 | cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3; | |
12532 | else | |
12533 | { | |
12534 | cum->fregno = FP_ARG_V4_MAX_REG + 1; | |
12535 | if (mode == DFmode || FLOAT128_IBM_P (mode) | |
12536 | || mode == DDmode || mode == TDmode) | |
12537 | cum->words += cum->words & 1; | |
12538 | cum->words += rs6000_arg_size (mode, type); | |
12539 | } | |
12540 | } | |
12541 | else | |
12542 | { | |
12543 | int n_words = rs6000_arg_size (mode, type); | |
12544 | int gregno = cum->sysv_gregno; | |
12545 | ||
12546 | /* Long long and SPE vectors are put in (r3,r4), (r5,r6), | |
12547 | (r7,r8) or (r9,r10). As does any other 2 word item such | |
12548 | as complex int due to a historical mistake. */ | |
12549 | if (n_words == 2) | |
12550 | gregno += (1 - gregno) & 1; | |
12551 | ||
12552 | /* Multi-reg args are not split between registers and stack. */ | |
12553 | if (gregno + n_words - 1 > GP_ARG_MAX_REG) | |
12554 | { | |
12555 | /* Long long and SPE vectors are aligned on the stack. | |
12556 | So are other 2 word items such as complex int due to | |
12557 | a historical mistake. */ | |
12558 | if (n_words == 2) | |
12559 | cum->words += cum->words & 1; | |
12560 | cum->words += n_words; | |
12561 | } | |
12562 | ||
12563 | /* Note: continuing to accumulate gregno past when we've started | |
12564 | spilling to the stack indicates the fact that we've started | |
12565 | spilling to the stack to expand_builtin_saveregs. */ | |
12566 | cum->sysv_gregno = gregno + n_words; | |
12567 | } | |
12568 | ||
12569 | if (TARGET_DEBUG_ARG) | |
12570 | { | |
12571 | fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ", | |
12572 | cum->words, cum->fregno); | |
12573 | fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ", | |
12574 | cum->sysv_gregno, cum->nargs_prototype, cum->prototype); | |
12575 | fprintf (stderr, "mode = %4s, named = %d\n", | |
12576 | GET_MODE_NAME (mode), named); | |
12577 | } | |
12578 | } | |
12579 | else | |
12580 | { | |
12581 | int n_words = rs6000_arg_size (mode, type); | |
12582 | int start_words = cum->words; | |
12583 | int align_words = rs6000_parm_start (mode, type, start_words); | |
12584 | ||
12585 | cum->words = align_words + n_words; | |
12586 | ||
12587 | if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT && TARGET_FPRS) | |
12588 | { | |
12589 | /* _Decimal128 must be passed in an even/odd float register pair. | |
12590 | This assumes that the register number is odd when fregno is | |
12591 | odd. */ | |
12592 | if (elt_mode == TDmode && (cum->fregno % 2) == 1) | |
12593 | cum->fregno++; | |
12594 | cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3); | |
12595 | } | |
12596 | ||
12597 | if (TARGET_DEBUG_ARG) | |
12598 | { | |
12599 | fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ", | |
12600 | cum->words, cum->fregno); | |
12601 | fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ", | |
12602 | cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode)); | |
12603 | fprintf (stderr, "named = %d, align = %d, depth = %d\n", | |
12604 | named, align_words - start_words, depth); | |
12605 | } | |
12606 | } | |
12607 | } | |
12608 | ||
12609 | static void | |
12610 | rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode, | |
12611 | const_tree type, bool named) | |
12612 | { | |
12613 | rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named, | |
12614 | 0); | |
12615 | } | |
12616 | ||
12617 | static rtx | |
12618 | spe_build_register_parallel (machine_mode mode, int gregno) | |
12619 | { | |
12620 | rtx r1, r3, r5, r7; | |
12621 | ||
12622 | switch (mode) | |
12623 | { | |
916ace94 | 12624 | case E_DFmode: |
01e91138 | 12625 | r1 = gen_rtx_REG (DImode, gregno); |
12626 | r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx); | |
12627 | return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1)); | |
12628 | ||
916ace94 | 12629 | case E_DCmode: |
12630 | case E_TFmode: | |
01e91138 | 12631 | r1 = gen_rtx_REG (DImode, gregno); |
12632 | r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx); | |
12633 | r3 = gen_rtx_REG (DImode, gregno + 2); | |
12634 | r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8)); | |
12635 | return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3)); | |
12636 | ||
916ace94 | 12637 | case E_TCmode: |
01e91138 | 12638 | r1 = gen_rtx_REG (DImode, gregno); |
12639 | r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx); | |
12640 | r3 = gen_rtx_REG (DImode, gregno + 2); | |
12641 | r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8)); | |
12642 | r5 = gen_rtx_REG (DImode, gregno + 4); | |
12643 | r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16)); | |
12644 | r7 = gen_rtx_REG (DImode, gregno + 6); | |
12645 | r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24)); | |
12646 | return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7)); | |
12647 | ||
12648 | default: | |
12649 | gcc_unreachable (); | |
12650 | } | |
12651 | } | |
12652 | ||
12653 | /* Determine where to put a SIMD argument on the SPE. */ | |
12654 | static rtx | |
12655 | rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, machine_mode mode, | |
12656 | const_tree type) | |
12657 | { | |
12658 | int gregno = cum->sysv_gregno; | |
12659 | ||
12660 | /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but | |
12661 | are passed and returned in a pair of GPRs for ABI compatibility. */ | |
12662 | if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode | |
12663 | || mode == DCmode || mode == TCmode)) | |
12664 | { | |
12665 | int n_words = rs6000_arg_size (mode, type); | |
12666 | ||
12667 | /* Doubles go in an odd/even register pair (r5/r6, etc). */ | |
12668 | if (mode == DFmode) | |
12669 | gregno += (1 - gregno) & 1; | |
12670 | ||
12671 | /* Multi-reg args are not split between registers and stack. */ | |
12672 | if (gregno + n_words - 1 > GP_ARG_MAX_REG) | |
12673 | return NULL_RTX; | |
12674 | ||
12675 | return spe_build_register_parallel (mode, gregno); | |
12676 | } | |
12677 | if (cum->stdarg) | |
12678 | { | |
12679 | int n_words = rs6000_arg_size (mode, type); | |
12680 | ||
12681 | /* SPE vectors are put in odd registers. */ | |
12682 | if (n_words == 2 && (gregno & 1) == 0) | |
12683 | gregno += 1; | |
12684 | ||
12685 | if (gregno + n_words - 1 <= GP_ARG_MAX_REG) | |
12686 | { | |
12687 | rtx r1, r2; | |
12688 | machine_mode m = SImode; | |
12689 | ||
12690 | r1 = gen_rtx_REG (m, gregno); | |
12691 | r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx); | |
12692 | r2 = gen_rtx_REG (m, gregno + 1); | |
12693 | r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4)); | |
12694 | return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2)); | |
12695 | } | |
12696 | else | |
12697 | return NULL_RTX; | |
12698 | } | |
12699 | else | |
12700 | { | |
12701 | if (gregno <= GP_ARG_MAX_REG) | |
12702 | return gen_rtx_REG (mode, gregno); | |
12703 | else | |
12704 | return NULL_RTX; | |
12705 | } | |
12706 | } | |
12707 | ||
12708 | /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the | |
12709 | structure between cum->intoffset and bitpos to integer registers. */ | |
12710 | ||
12711 | static void | |
12712 | rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum, | |
12713 | HOST_WIDE_INT bitpos, rtx rvec[], int *k) | |
12714 | { | |
12715 | machine_mode mode; | |
12716 | unsigned int regno; | |
12717 | unsigned int startbit, endbit; | |
12718 | int this_regno, intregs, intoffset; | |
12719 | rtx reg; | |
12720 | ||
12721 | if (cum->intoffset == -1) | |
12722 | return; | |
12723 | ||
12724 | intoffset = cum->intoffset; | |
12725 | cum->intoffset = -1; | |
12726 | ||
12727 | /* If this is the trailing part of a word, try to only load that | |
12728 | much into the register. Otherwise load the whole register. Note | |
12729 | that in the latter case we may pick up unwanted bits. It's not a | |
12730 | problem at the moment but may wish to revisit. */ | |
12731 | ||
12732 | if (intoffset % BITS_PER_WORD != 0) | |
12733 | { | |
517be012 | 12734 | unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD; |
12735 | if (!int_mode_for_size (bits, 0).exists (&mode)) | |
01e91138 | 12736 | { |
12737 | /* We couldn't find an appropriate mode, which happens, | |
12738 | e.g., in packed structs when there are 3 bytes to load. | |
12739 | Back intoffset back to the beginning of the word in this | |
12740 | case. */ | |
12741 | intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD); | |
12742 | mode = word_mode; | |
12743 | } | |
12744 | } | |
12745 | else | |
12746 | mode = word_mode; | |
12747 | ||
12748 | startbit = ROUND_DOWN (intoffset, BITS_PER_WORD); | |
12749 | endbit = ROUND_UP (bitpos, BITS_PER_WORD); | |
12750 | intregs = (endbit - startbit) / BITS_PER_WORD; | |
12751 | this_regno = cum->words + intoffset / BITS_PER_WORD; | |
12752 | ||
12753 | if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno) | |
12754 | cum->use_stack = 1; | |
12755 | ||
12756 | intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno); | |
12757 | if (intregs <= 0) | |
12758 | return; | |
12759 | ||
12760 | intoffset /= BITS_PER_UNIT; | |
12761 | do | |
12762 | { | |
12763 | regno = GP_ARG_MIN_REG + this_regno; | |
12764 | reg = gen_rtx_REG (mode, regno); | |
12765 | rvec[(*k)++] = | |
12766 | gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset)); | |
12767 | ||
12768 | this_regno += 1; | |
12769 | intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1; | |
12770 | mode = word_mode; | |
12771 | intregs -= 1; | |
12772 | } | |
12773 | while (intregs > 0); | |
12774 | } | |
12775 | ||
12776 | /* Recursive workhorse for the following. */ | |
12777 | ||
12778 | static void | |
12779 | rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type, | |
12780 | HOST_WIDE_INT startbitpos, rtx rvec[], | |
12781 | int *k) | |
12782 | { | |
12783 | tree f; | |
12784 | ||
12785 | for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f)) | |
12786 | if (TREE_CODE (f) == FIELD_DECL) | |
12787 | { | |
12788 | HOST_WIDE_INT bitpos = startbitpos; | |
12789 | tree ftype = TREE_TYPE (f); | |
12790 | machine_mode mode; | |
12791 | if (ftype == error_mark_node) | |
12792 | continue; | |
12793 | mode = TYPE_MODE (ftype); | |
12794 | ||
12795 | if (DECL_SIZE (f) != 0 | |
12796 | && tree_fits_uhwi_p (bit_position (f))) | |
12797 | bitpos += int_bit_position (f); | |
12798 | ||
12799 | /* ??? FIXME: else assume zero offset. */ | |
12800 | ||
12801 | if (TREE_CODE (ftype) == RECORD_TYPE) | |
12802 | rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k); | |
12803 | else if (cum->named && USE_FP_FOR_ARG_P (cum, mode)) | |
12804 | { | |
12805 | unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3; | |
12806 | #if 0 | |
12807 | switch (mode) | |
12808 | { | |
916ace94 | 12809 | case E_SCmode: mode = SFmode; break; |
12810 | case E_DCmode: mode = DFmode; break; | |
12811 | case E_TCmode: mode = TFmode; break; | |
01e91138 | 12812 | default: break; |
12813 | } | |
12814 | #endif | |
12815 | rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k); | |
12816 | if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1) | |
12817 | { | |
12818 | gcc_assert (cum->fregno == FP_ARG_MAX_REG | |
12819 | && (mode == TFmode || mode == TDmode)); | |
12820 | /* Long double or _Decimal128 split over regs and memory. */ | |
12821 | mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode; | |
12822 | cum->use_stack=1; | |
12823 | } | |
12824 | rvec[(*k)++] | |
12825 | = gen_rtx_EXPR_LIST (VOIDmode, | |
12826 | gen_rtx_REG (mode, cum->fregno++), | |
12827 | GEN_INT (bitpos / BITS_PER_UNIT)); | |
12828 | if (FLOAT128_2REG_P (mode)) | |
12829 | cum->fregno++; | |
12830 | } | |
12831 | else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1)) | |
12832 | { | |
12833 | rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k); | |
12834 | rvec[(*k)++] | |
12835 | = gen_rtx_EXPR_LIST (VOIDmode, | |
12836 | gen_rtx_REG (mode, cum->vregno++), | |
12837 | GEN_INT (bitpos / BITS_PER_UNIT)); | |
12838 | } | |
12839 | else if (cum->intoffset == -1) | |
12840 | cum->intoffset = bitpos; | |
12841 | } | |
12842 | } | |
12843 | ||
12844 | /* For the darwin64 ABI, we want to construct a PARALLEL consisting of | |
12845 | the register(s) to be used for each field and subfield of a struct | |
12846 | being passed by value, along with the offset of where the | |
12847 | register's value may be found in the block. FP fields go in FP | |
12848 | register, vector fields go in vector registers, and everything | |
12849 | else goes in int registers, packed as in memory. | |
12850 | ||
12851 | This code is also used for function return values. RETVAL indicates | |
12852 | whether this is the case. | |
12853 | ||
12854 | Much of this is taken from the SPARC V9 port, which has a similar | |
12855 | calling convention. */ | |
12856 | ||
12857 | static rtx | |
12858 | rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type, | |
12859 | bool named, bool retval) | |
12860 | { | |
12861 | rtx rvec[FIRST_PSEUDO_REGISTER]; | |
12862 | int k = 1, kbase = 1; | |
12863 | HOST_WIDE_INT typesize = int_size_in_bytes (type); | |
12864 | /* This is a copy; modifications are not visible to our caller. */ | |
12865 | CUMULATIVE_ARGS copy_cum = *orig_cum; | |
12866 | CUMULATIVE_ARGS *cum = ©_cum; | |
12867 | ||
12868 | /* Pad to 16 byte boundary if needed. */ | |
12869 | if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD | |
12870 | && (cum->words % 2) != 0) | |
12871 | cum->words++; | |
12872 | ||
12873 | cum->intoffset = 0; | |
12874 | cum->use_stack = 0; | |
12875 | cum->named = named; | |
12876 | ||
12877 | /* Put entries into rvec[] for individual FP and vector fields, and | |
12878 | for the chunks of memory that go in int regs. Note we start at | |
12879 | element 1; 0 is reserved for an indication of using memory, and | |
12880 | may or may not be filled in below. */ | |
12881 | rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k); | |
12882 | rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k); | |
12883 | ||
12884 | /* If any part of the struct went on the stack put all of it there. | |
12885 | This hack is because the generic code for | |
12886 | FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register | |
12887 | parts of the struct are not at the beginning. */ | |
12888 | if (cum->use_stack) | |
12889 | { | |
12890 | if (retval) | |
12891 | return NULL_RTX; /* doesn't go in registers at all */ | |
12892 | kbase = 0; | |
12893 | rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx); | |
12894 | } | |
12895 | if (k > 1 || cum->use_stack) | |
12896 | return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase])); | |
12897 | else | |
12898 | return NULL_RTX; | |
12899 | } | |
12900 | ||
12901 | /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */ | |
12902 | ||
12903 | static rtx | |
12904 | rs6000_mixed_function_arg (machine_mode mode, const_tree type, | |
12905 | int align_words) | |
12906 | { | |
12907 | int n_units; | |
12908 | int i, k; | |
12909 | rtx rvec[GP_ARG_NUM_REG + 1]; | |
12910 | ||
12911 | if (align_words >= GP_ARG_NUM_REG) | |
12912 | return NULL_RTX; | |
12913 | ||
12914 | n_units = rs6000_arg_size (mode, type); | |
12915 | ||
12916 | /* Optimize the simple case where the arg fits in one gpr, except in | |
12917 | the case of BLKmode due to assign_parms assuming that registers are | |
12918 | BITS_PER_WORD wide. */ | |
12919 | if (n_units == 0 | |
12920 | || (n_units == 1 && mode != BLKmode)) | |
12921 | return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words); | |
12922 | ||
12923 | k = 0; | |
12924 | if (align_words + n_units > GP_ARG_NUM_REG) | |
12925 | /* Not all of the arg fits in gprs. Say that it goes in memory too, | |
12926 | using a magic NULL_RTX component. | |
12927 | This is not strictly correct. Only some of the arg belongs in | |
12928 | memory, not all of it. However, the normal scheme using | |
12929 | function_arg_partial_nregs can result in unusual subregs, eg. | |
12930 | (subreg:SI (reg:DF) 4), which are not handled well. The code to | |
12931 | store the whole arg to memory is often more efficient than code | |
12932 | to store pieces, and we know that space is available in the right | |
12933 | place for the whole arg. */ | |
12934 | rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx); | |
12935 | ||
12936 | i = 0; | |
12937 | do | |
12938 | { | |
12939 | rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words); | |
12940 | rtx off = GEN_INT (i++ * 4); | |
12941 | rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off); | |
12942 | } | |
12943 | while (++align_words < GP_ARG_NUM_REG && --n_units != 0); | |
12944 | ||
12945 | return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec)); | |
12946 | } | |
12947 | ||
12948 | /* We have an argument of MODE and TYPE that goes into FPRs or VRs, | |
12949 | but must also be copied into the parameter save area starting at | |
12950 | offset ALIGN_WORDS. Fill in RVEC with the elements corresponding | |
12951 | to the GPRs and/or memory. Return the number of elements used. */ | |
12952 | ||
12953 | static int | |
12954 | rs6000_psave_function_arg (machine_mode mode, const_tree type, | |
12955 | int align_words, rtx *rvec) | |
12956 | { | |
12957 | int k = 0; | |
12958 | ||
12959 | if (align_words < GP_ARG_NUM_REG) | |
12960 | { | |
12961 | int n_words = rs6000_arg_size (mode, type); | |
12962 | ||
12963 | if (align_words + n_words > GP_ARG_NUM_REG | |
12964 | || mode == BLKmode | |
12965 | || (TARGET_32BIT && TARGET_POWERPC64)) | |
12966 | { | |
12967 | /* If this is partially on the stack, then we only | |
12968 | include the portion actually in registers here. */ | |
12969 | machine_mode rmode = TARGET_32BIT ? SImode : DImode; | |
12970 | int i = 0; | |
12971 | ||
12972 | if (align_words + n_words > GP_ARG_NUM_REG) | |
12973 | { | |
12974 | /* Not all of the arg fits in gprs. Say that it goes in memory | |
12975 | too, using a magic NULL_RTX component. Also see comment in | |
12976 | rs6000_mixed_function_arg for why the normal | |
12977 | function_arg_partial_nregs scheme doesn't work in this case. */ | |
12978 | rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx); | |
12979 | } | |
12980 | ||
12981 | do | |
12982 | { | |
12983 | rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words); | |
12984 | rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode)); | |
12985 | rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off); | |
12986 | } | |
12987 | while (++align_words < GP_ARG_NUM_REG && --n_words != 0); | |
12988 | } | |
12989 | else | |
12990 | { | |
12991 | /* The whole arg fits in gprs. */ | |
12992 | rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words); | |
12993 | rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx); | |
12994 | } | |
12995 | } | |
12996 | else | |
12997 | { | |
12998 | /* It's entirely in memory. */ | |
12999 | rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx); | |
13000 | } | |
13001 | ||
13002 | return k; | |
13003 | } | |
13004 | ||
13005 | /* RVEC is a vector of K components of an argument of mode MODE. | |
13006 | Construct the final function_arg return value from it. */ | |
13007 | ||
13008 | static rtx | |
13009 | rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k) | |
13010 | { | |
13011 | gcc_assert (k >= 1); | |
13012 | ||
13013 | /* Avoid returning a PARALLEL in the trivial cases. */ | |
13014 | if (k == 1) | |
13015 | { | |
13016 | if (XEXP (rvec[0], 0) == NULL_RTX) | |
13017 | return NULL_RTX; | |
13018 | ||
13019 | if (GET_MODE (XEXP (rvec[0], 0)) == mode) | |
13020 | return XEXP (rvec[0], 0); | |
13021 | } | |
13022 | ||
13023 | return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec)); | |
13024 | } | |
13025 | ||
13026 | /* Determine where to put an argument to a function. | |
13027 | Value is zero to push the argument on the stack, | |
13028 | or a hard register in which to store the argument. | |
13029 | ||
13030 | MODE is the argument's machine mode. | |
13031 | TYPE is the data type of the argument (as a tree). | |
13032 | This is null for libcalls where that information may | |
13033 | not be available. | |
13034 | CUM is a variable of type CUMULATIVE_ARGS which gives info about | |
13035 | the preceding args and about the function being called. It is | |
13036 | not modified in this routine. | |
13037 | NAMED is nonzero if this argument is a named parameter | |
13038 | (otherwise it is an extra parameter matching an ellipsis). | |
13039 | ||
13040 | On RS/6000 the first eight words of non-FP are normally in registers | |
13041 | and the rest are pushed. Under AIX, the first 13 FP args are in registers. | |
13042 | Under V.4, the first 8 FP args are in registers. | |
13043 | ||
13044 | If this is floating-point and no prototype is specified, we use | |
13045 | both an FP and integer register (or possibly FP reg and stack). Library | |
13046 | functions (when CALL_LIBCALL is set) always have the proper types for args, | |
13047 | so we can pass the FP value just in one register. emit_library_function | |
13048 | doesn't support PARALLEL anyway. | |
13049 | ||
13050 | Note that for args passed by reference, function_arg will be called | |
13051 | with MODE and TYPE set to that of the pointer to the arg, not the arg | |
13052 | itself. */ | |
13053 | ||
13054 | static rtx | |
13055 | rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode, | |
13056 | const_tree type, bool named) | |
13057 | { | |
13058 | CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); | |
13059 | enum rs6000_abi abi = DEFAULT_ABI; | |
13060 | machine_mode elt_mode; | |
13061 | int n_elts; | |
13062 | ||
13063 | /* Return a marker to indicate whether CR1 needs to set or clear the | |
13064 | bit that V.4 uses to say fp args were passed in registers. | |
13065 | Assume that we don't need the marker for software floating point, | |
13066 | or compiler generated library calls. */ | |
13067 | if (mode == VOIDmode) | |
13068 | { | |
13069 | if (abi == ABI_V4 | |
13070 | && (cum->call_cookie & CALL_LIBCALL) == 0 | |
13071 | && (cum->stdarg | |
13072 | || (cum->nargs_prototype < 0 | |
13073 | && (cum->prototype || TARGET_NO_PROTOTYPE)))) | |
13074 | { | |
13075 | /* For the SPE, we need to crxor CR6 always. */ | |
13076 | if (TARGET_SPE_ABI) | |
13077 | return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS); | |
13078 | else if (TARGET_HARD_FLOAT && TARGET_FPRS) | |
13079 | return GEN_INT (cum->call_cookie | |
13080 | | ((cum->fregno == FP_ARG_MIN_REG) | |
13081 | ? CALL_V4_SET_FP_ARGS | |
13082 | : CALL_V4_CLEAR_FP_ARGS)); | |
13083 | } | |
13084 | ||
13085 | return GEN_INT (cum->call_cookie & ~CALL_LIBCALL); | |
13086 | } | |
13087 | ||
13088 | rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts); | |
13089 | ||
13090 | if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type)) | |
13091 | { | |
13092 | rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false); | |
13093 | if (rslt != NULL_RTX) | |
13094 | return rslt; | |
13095 | /* Else fall through to usual handling. */ | |
13096 | } | |
13097 | ||
13098 | if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named)) | |
13099 | { | |
13100 | rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1]; | |
13101 | rtx r, off; | |
13102 | int i, k = 0; | |
13103 | ||
13104 | /* Do we also need to pass this argument in the parameter save area? | |
13105 | Library support functions for IEEE 128-bit are assumed to not need the | |
13106 | value passed both in GPRs and in vector registers. */ | |
13107 | if (TARGET_64BIT && !cum->prototype | |
13108 | && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode))) | |
13109 | { | |
13110 | int align_words = ROUND_UP (cum->words, 2); | |
13111 | k = rs6000_psave_function_arg (mode, type, align_words, rvec); | |
13112 | } | |
13113 | ||
13114 | /* Describe where this argument goes in the vector registers. */ | |
13115 | for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++) | |
13116 | { | |
13117 | r = gen_rtx_REG (elt_mode, cum->vregno + i); | |
13118 | off = GEN_INT (i * GET_MODE_SIZE (elt_mode)); | |
13119 | rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off); | |
13120 | } | |
13121 | ||
13122 | return rs6000_finish_function_arg (mode, rvec, k); | |
13123 | } | |
13124 | else if (TARGET_ALTIVEC_ABI | |
13125 | && (ALTIVEC_OR_VSX_VECTOR_MODE (mode) | |
13126 | || (type && TREE_CODE (type) == VECTOR_TYPE | |
13127 | && int_size_in_bytes (type) == 16))) | |
13128 | { | |
13129 | if (named || abi == ABI_V4) | |
13130 | return NULL_RTX; | |
13131 | else | |
13132 | { | |
13133 | /* Vector parameters to varargs functions under AIX or Darwin | |
13134 | get passed in memory and possibly also in GPRs. */ | |
13135 | int align, align_words, n_words; | |
13136 | machine_mode part_mode; | |
13137 | ||
13138 | /* Vector parameters must be 16-byte aligned. In 32-bit | |
13139 | mode this means we need to take into account the offset | |
13140 | to the parameter save area. In 64-bit mode, they just | |
13141 | have to start on an even word, since the parameter save | |
13142 | area is 16-byte aligned. */ | |
13143 | if (TARGET_32BIT) | |
13144 | align = -(rs6000_parm_offset () + cum->words) & 3; | |
13145 | else | |
13146 | align = cum->words & 1; | |
13147 | align_words = cum->words + align; | |
13148 | ||
13149 | /* Out of registers? Memory, then. */ | |
13150 | if (align_words >= GP_ARG_NUM_REG) | |
13151 | return NULL_RTX; | |
13152 | ||
13153 | if (TARGET_32BIT && TARGET_POWERPC64) | |
13154 | return rs6000_mixed_function_arg (mode, type, align_words); | |
13155 | ||
13156 | /* The vector value goes in GPRs. Only the part of the | |
13157 | value in GPRs is reported here. */ | |
13158 | part_mode = mode; | |
13159 | n_words = rs6000_arg_size (mode, type); | |
13160 | if (align_words + n_words > GP_ARG_NUM_REG) | |
13161 | /* Fortunately, there are only two possibilities, the value | |
13162 | is either wholly in GPRs or half in GPRs and half not. */ | |
13163 | part_mode = DImode; | |
13164 | ||
13165 | return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words); | |
13166 | } | |
13167 | } | |
13168 | else if (TARGET_SPE_ABI && TARGET_SPE | |
13169 | && (SPE_VECTOR_MODE (mode) | |
13170 | || (TARGET_E500_DOUBLE && (mode == DFmode | |
13171 | || mode == DCmode | |
13172 | || mode == TFmode | |
13173 | || mode == TCmode)))) | |
13174 | return rs6000_spe_function_arg (cum, mode, type); | |
13175 | ||
13176 | else if (abi == ABI_V4) | |
13177 | { | |
13178 | if (abi_v4_pass_in_fpr (mode)) | |
13179 | { | |
13180 | /* _Decimal128 must use an even/odd register pair. This assumes | |
13181 | that the register number is odd when fregno is odd. */ | |
13182 | if (mode == TDmode && (cum->fregno % 2) == 1) | |
13183 | cum->fregno++; | |
13184 | ||
13185 | if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0) | |
13186 | <= FP_ARG_V4_MAX_REG) | |
13187 | return gen_rtx_REG (mode, cum->fregno); | |
13188 | else | |
13189 | return NULL_RTX; | |
13190 | } | |
13191 | else | |
13192 | { | |
13193 | int n_words = rs6000_arg_size (mode, type); | |
13194 | int gregno = cum->sysv_gregno; | |
13195 | ||
13196 | /* Long long and SPE vectors are put in (r3,r4), (r5,r6), | |
13197 | (r7,r8) or (r9,r10). As does any other 2 word item such | |
13198 | as complex int due to a historical mistake. */ | |
13199 | if (n_words == 2) | |
13200 | gregno += (1 - gregno) & 1; | |
13201 | ||
13202 | /* Multi-reg args are not split between registers and stack. */ | |
13203 | if (gregno + n_words - 1 > GP_ARG_MAX_REG) | |
13204 | return NULL_RTX; | |
13205 | ||
13206 | if (TARGET_32BIT && TARGET_POWERPC64) | |
13207 | return rs6000_mixed_function_arg (mode, type, | |
13208 | gregno - GP_ARG_MIN_REG); | |
13209 | return gen_rtx_REG (mode, gregno); | |
13210 | } | |
13211 | } | |
13212 | else | |
13213 | { | |
13214 | int align_words = rs6000_parm_start (mode, type, cum->words); | |
13215 | ||
13216 | /* _Decimal128 must be passed in an even/odd float register pair. | |
13217 | This assumes that the register number is odd when fregno is odd. */ | |
13218 | if (elt_mode == TDmode && (cum->fregno % 2) == 1) | |
13219 | cum->fregno++; | |
13220 | ||
13221 | if (USE_FP_FOR_ARG_P (cum, elt_mode)) | |
13222 | { | |
13223 | rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1]; | |
13224 | rtx r, off; | |
13225 | int i, k = 0; | |
13226 | unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3; | |
13227 | int fpr_words; | |
13228 | ||
13229 | /* Do we also need to pass this argument in the parameter | |
13230 | save area? */ | |
13231 | if (type && (cum->nargs_prototype <= 0 | |
13232 | || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) | |
13233 | && TARGET_XL_COMPAT | |
13234 | && align_words >= GP_ARG_NUM_REG))) | |
13235 | k = rs6000_psave_function_arg (mode, type, align_words, rvec); | |
13236 | ||
13237 | /* Describe where this argument goes in the fprs. */ | |
13238 | for (i = 0; i < n_elts | |
13239 | && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++) | |
13240 | { | |
13241 | /* Check if the argument is split over registers and memory. | |
13242 | This can only ever happen for long double or _Decimal128; | |
13243 | complex types are handled via split_complex_arg. */ | |
13244 | machine_mode fmode = elt_mode; | |
13245 | if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1) | |
13246 | { | |
13247 | gcc_assert (FLOAT128_2REG_P (fmode)); | |
13248 | fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode; | |
13249 | } | |
13250 | ||
13251 | r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg); | |
13252 | off = GEN_INT (i * GET_MODE_SIZE (elt_mode)); | |
13253 | rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off); | |
13254 | } | |
13255 | ||
13256 | /* If there were not enough FPRs to hold the argument, the rest | |
13257 | usually goes into memory. However, if the current position | |
13258 | is still within the register parameter area, a portion may | |
13259 | actually have to go into GPRs. | |
13260 | ||
13261 | Note that it may happen that the portion of the argument | |
13262 | passed in the first "half" of the first GPR was already | |
13263 | passed in the last FPR as well. | |
13264 | ||
13265 | For unnamed arguments, we already set up GPRs to cover the | |
13266 | whole argument in rs6000_psave_function_arg, so there is | |
13267 | nothing further to do at this point. */ | |
13268 | fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8); | |
13269 | if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG | |
13270 | && cum->nargs_prototype > 0) | |
13271 | { | |
13272 | static bool warned; | |
13273 | ||
13274 | machine_mode rmode = TARGET_32BIT ? SImode : DImode; | |
13275 | int n_words = rs6000_arg_size (mode, type); | |
13276 | ||
13277 | align_words += fpr_words; | |
13278 | n_words -= fpr_words; | |
13279 | ||
13280 | do | |
13281 | { | |
13282 | r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words); | |
13283 | off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode)); | |
13284 | rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off); | |
13285 | } | |
13286 | while (++align_words < GP_ARG_NUM_REG && --n_words != 0); | |
13287 | ||
13288 | if (!warned && warn_psabi) | |
13289 | { | |
13290 | warned = true; | |
13291 | inform (input_location, | |
13292 | "the ABI of passing homogeneous float aggregates" | |
13293 | " has changed in GCC 5"); | |
13294 | } | |
13295 | } | |
13296 | ||
13297 | return rs6000_finish_function_arg (mode, rvec, k); | |
13298 | } | |
13299 | else if (align_words < GP_ARG_NUM_REG) | |
13300 | { | |
13301 | if (TARGET_32BIT && TARGET_POWERPC64) | |
13302 | return rs6000_mixed_function_arg (mode, type, align_words); | |
13303 | ||
13304 | return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words); | |
13305 | } | |
13306 | else | |
13307 | return NULL_RTX; | |
13308 | } | |
13309 | } | |
13310 | \f | |
13311 | /* For an arg passed partly in registers and partly in memory, this is | |
13312 | the number of bytes passed in registers. For args passed entirely in | |
13313 | registers or entirely in memory, zero. When an arg is described by a | |
13314 | PARALLEL, perhaps using more than one register type, this function | |
13315 | returns the number of bytes used by the first element of the PARALLEL. */ | |
13316 | ||
13317 | static int | |
13318 | rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode, | |
13319 | tree type, bool named) | |
13320 | { | |
13321 | CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); | |
13322 | bool passed_in_gprs = true; | |
13323 | int ret = 0; | |
13324 | int align_words; | |
13325 | machine_mode elt_mode; | |
13326 | int n_elts; | |
13327 | ||
13328 | rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts); | |
13329 | ||
13330 | if (DEFAULT_ABI == ABI_V4) | |
13331 | return 0; | |
13332 | ||
13333 | if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named)) | |
13334 | { | |
13335 | /* If we are passing this arg in the fixed parameter save area (gprs or | |
13336 | memory) as well as VRs, we do not use the partial bytes mechanism; | |
13337 | instead, rs6000_function_arg will return a PARALLEL including a memory | |
13338 | element as necessary. Library support functions for IEEE 128-bit are | |
13339 | assumed to not need the value passed both in GPRs and in vector | |
13340 | registers. */ | |
13341 | if (TARGET_64BIT && !cum->prototype | |
13342 | && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode))) | |
13343 | return 0; | |
13344 | ||
13345 | /* Otherwise, we pass in VRs only. Check for partial copies. */ | |
13346 | passed_in_gprs = false; | |
13347 | if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1) | |
13348 | ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16; | |
13349 | } | |
13350 | ||
13351 | /* In this complicated case we just disable the partial_nregs code. */ | |
13352 | if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type)) | |
13353 | return 0; | |
13354 | ||
13355 | align_words = rs6000_parm_start (mode, type, cum->words); | |
13356 | ||
13357 | if (USE_FP_FOR_ARG_P (cum, elt_mode)) | |
13358 | { | |
13359 | unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3; | |
13360 | ||
13361 | /* If we are passing this arg in the fixed parameter save area | |
13362 | (gprs or memory) as well as FPRs, we do not use the partial | |
13363 | bytes mechanism; instead, rs6000_function_arg will return a | |
13364 | PARALLEL including a memory element as necessary. */ | |
13365 | if (type | |
13366 | && (cum->nargs_prototype <= 0 | |
13367 | || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) | |
13368 | && TARGET_XL_COMPAT | |
13369 | && align_words >= GP_ARG_NUM_REG))) | |
13370 | return 0; | |
13371 | ||
13372 | /* Otherwise, we pass in FPRs only. Check for partial copies. */ | |
13373 | passed_in_gprs = false; | |
13374 | if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1) | |
13375 | { | |
13376 | /* Compute number of bytes / words passed in FPRs. If there | |
13377 | is still space available in the register parameter area | |
13378 | *after* that amount, a part of the argument will be passed | |
13379 | in GPRs. In that case, the total amount passed in any | |
13380 | registers is equal to the amount that would have been passed | |
13381 | in GPRs if everything were passed there, so we fall back to | |
13382 | the GPR code below to compute the appropriate value. */ | |
13383 | int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno) | |
13384 | * MIN (8, GET_MODE_SIZE (elt_mode))); | |
13385 | int fpr_words = fpr / (TARGET_32BIT ? 4 : 8); | |
13386 | ||
13387 | if (align_words + fpr_words < GP_ARG_NUM_REG) | |
13388 | passed_in_gprs = true; | |
13389 | else | |
13390 | ret = fpr; | |
13391 | } | |
13392 | } | |
13393 | ||
13394 | if (passed_in_gprs | |
13395 | && align_words < GP_ARG_NUM_REG | |
13396 | && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type)) | |
13397 | ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8); | |
13398 | ||
13399 | if (ret != 0 && TARGET_DEBUG_ARG) | |
13400 | fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret); | |
13401 | ||
13402 | return ret; | |
13403 | } | |
13404 | \f | |
13405 | /* A C expression that indicates when an argument must be passed by | |
13406 | reference. If nonzero for an argument, a copy of that argument is | |
13407 | made in memory and a pointer to the argument is passed instead of | |
13408 | the argument itself. The pointer is passed in whatever way is | |
13409 | appropriate for passing a pointer to that type. | |
13410 | ||
13411 | Under V.4, aggregates and long double are passed by reference. | |
13412 | ||
13413 | As an extension to all 32-bit ABIs, AltiVec vectors are passed by | |
13414 | reference unless the AltiVec vector extension ABI is in force. | |
13415 | ||
13416 | As an extension to all ABIs, variable sized types are passed by | |
13417 | reference. */ | |
13418 | ||
13419 | static bool | |
13420 | rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED, | |
13421 | machine_mode mode, const_tree type, | |
13422 | bool named ATTRIBUTE_UNUSED) | |
13423 | { | |
13424 | if (!type) | |
13425 | return 0; | |
13426 | ||
13427 | if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD | |
13428 | && FLOAT128_IEEE_P (TYPE_MODE (type))) | |
13429 | { | |
13430 | if (TARGET_DEBUG_ARG) | |
13431 | fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n"); | |
13432 | return 1; | |
13433 | } | |
13434 | ||
13435 | if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type)) | |
13436 | { | |
13437 | if (TARGET_DEBUG_ARG) | |
13438 | fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n"); | |
13439 | return 1; | |
13440 | } | |
13441 | ||
13442 | if (int_size_in_bytes (type) < 0) | |
13443 | { | |
13444 | if (TARGET_DEBUG_ARG) | |
13445 | fprintf (stderr, "function_arg_pass_by_reference: variable size\n"); | |
13446 | return 1; | |
13447 | } | |
13448 | ||
13449 | /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector | |
13450 | modes only exist for GCC vector types if -maltivec. */ | |
13451 | if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode)) | |
13452 | { | |
13453 | if (TARGET_DEBUG_ARG) | |
13454 | fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n"); | |
13455 | return 1; | |
13456 | } | |
13457 | ||
13458 | /* Pass synthetic vectors in memory. */ | |
13459 | if (TREE_CODE (type) == VECTOR_TYPE | |
13460 | && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8)) | |
13461 | { | |
13462 | static bool warned_for_pass_big_vectors = false; | |
13463 | if (TARGET_DEBUG_ARG) | |
13464 | fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n"); | |
13465 | if (!warned_for_pass_big_vectors) | |
13466 | { | |
13467 | warning (OPT_Wpsabi, "GCC vector passed by reference: " | |
13468 | "non-standard ABI extension with no compatibility guarantee"); | |
13469 | warned_for_pass_big_vectors = true; | |
13470 | } | |
13471 | return 1; | |
13472 | } | |
13473 | ||
13474 | return 0; | |
13475 | } | |
13476 | ||
13477 | /* Process parameter of type TYPE after ARGS_SO_FAR parameters were | |
13478 | already processes. Return true if the parameter must be passed | |
13479 | (fully or partially) on the stack. */ | |
13480 | ||
13481 | static bool | |
13482 | rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type) | |
13483 | { | |
13484 | machine_mode mode; | |
13485 | int unsignedp; | |
13486 | rtx entry_parm; | |
13487 | ||
13488 | /* Catch errors. */ | |
13489 | if (type == NULL || type == error_mark_node) | |
13490 | return true; | |
13491 | ||
13492 | /* Handle types with no storage requirement. */ | |
13493 | if (TYPE_MODE (type) == VOIDmode) | |
13494 | return false; | |
13495 | ||
13496 | /* Handle complex types. */ | |
13497 | if (TREE_CODE (type) == COMPLEX_TYPE) | |
13498 | return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)) | |
13499 | || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))); | |
13500 | ||
13501 | /* Handle transparent aggregates. */ | |
13502 | if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE) | |
13503 | && TYPE_TRANSPARENT_AGGR (type)) | |
13504 | type = TREE_TYPE (first_field (type)); | |
13505 | ||
13506 | /* See if this arg was passed by invisible reference. */ | |
13507 | if (pass_by_reference (get_cumulative_args (args_so_far), | |
13508 | TYPE_MODE (type), type, true)) | |
13509 | type = build_pointer_type (type); | |
13510 | ||
13511 | /* Find mode as it is passed by the ABI. */ | |
13512 | unsignedp = TYPE_UNSIGNED (type); | |
13513 | mode = promote_mode (type, TYPE_MODE (type), &unsignedp); | |
13514 | ||
13515 | /* If we must pass in stack, we need a stack. */ | |
13516 | if (rs6000_must_pass_in_stack (mode, type)) | |
13517 | return true; | |
13518 | ||
13519 | /* If there is no incoming register, we need a stack. */ | |
13520 | entry_parm = rs6000_function_arg (args_so_far, mode, type, true); | |
13521 | if (entry_parm == NULL) | |
13522 | return true; | |
13523 | ||
13524 | /* Likewise if we need to pass both in registers and on the stack. */ | |
13525 | if (GET_CODE (entry_parm) == PARALLEL | |
13526 | && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX) | |
13527 | return true; | |
13528 | ||
13529 | /* Also true if we're partially in registers and partially not. */ | |
13530 | if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0) | |
13531 | return true; | |
13532 | ||
13533 | /* Update info on where next arg arrives in registers. */ | |
13534 | rs6000_function_arg_advance (args_so_far, mode, type, true); | |
13535 | return false; | |
13536 | } | |
13537 | ||
13538 | /* Return true if FUN has no prototype, has a variable argument | |
13539 | list, or passes any parameter in memory. */ | |
13540 | ||
13541 | static bool | |
13542 | rs6000_function_parms_need_stack (tree fun, bool incoming) | |
13543 | { | |
13544 | tree fntype, result; | |
13545 | CUMULATIVE_ARGS args_so_far_v; | |
13546 | cumulative_args_t args_so_far; | |
13547 | ||
13548 | if (!fun) | |
13549 | /* Must be a libcall, all of which only use reg parms. */ | |
13550 | return false; | |
13551 | ||
13552 | fntype = fun; | |
13553 | if (!TYPE_P (fun)) | |
13554 | fntype = TREE_TYPE (fun); | |
13555 | ||
13556 | /* Varargs functions need the parameter save area. */ | |
13557 | if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype)) | |
13558 | return true; | |
13559 | ||
13560 | INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX); | |
13561 | args_so_far = pack_cumulative_args (&args_so_far_v); | |
13562 | ||
13563 | /* When incoming, we will have been passed the function decl. | |
13564 | It is necessary to use the decl to handle K&R style functions, | |
13565 | where TYPE_ARG_TYPES may not be available. */ | |
13566 | if (incoming) | |
13567 | { | |
13568 | gcc_assert (DECL_P (fun)); | |
13569 | result = DECL_RESULT (fun); | |
13570 | } | |
13571 | else | |
13572 | result = TREE_TYPE (fntype); | |
13573 | ||
13574 | if (result && aggregate_value_p (result, fntype)) | |
13575 | { | |
13576 | if (!TYPE_P (result)) | |
13577 | result = TREE_TYPE (result); | |
13578 | result = build_pointer_type (result); | |
13579 | rs6000_parm_needs_stack (args_so_far, result); | |
13580 | } | |
13581 | ||
13582 | if (incoming) | |
13583 | { | |
13584 | tree parm; | |
13585 | ||
13586 | for (parm = DECL_ARGUMENTS (fun); | |
13587 | parm && parm != void_list_node; | |
13588 | parm = TREE_CHAIN (parm)) | |
13589 | if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm))) | |
13590 | return true; | |
13591 | } | |
13592 | else | |
13593 | { | |
13594 | function_args_iterator args_iter; | |
13595 | tree arg_type; | |
13596 | ||
13597 | FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter) | |
13598 | if (rs6000_parm_needs_stack (args_so_far, arg_type)) | |
13599 | return true; | |
13600 | } | |
13601 | ||
13602 | return false; | |
13603 | } | |
13604 | ||
13605 | /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is | |
13606 | usually a constant depending on the ABI. However, in the ELFv2 ABI | |
13607 | the register parameter area is optional when calling a function that | |
13608 | has a prototype is scope, has no variable argument list, and passes | |
13609 | all parameters in registers. */ | |
13610 | ||
13611 | int | |
13612 | rs6000_reg_parm_stack_space (tree fun, bool incoming) | |
13613 | { | |
13614 | int reg_parm_stack_space; | |
13615 | ||
13616 | switch (DEFAULT_ABI) | |
13617 | { | |
13618 | default: | |
13619 | reg_parm_stack_space = 0; | |
13620 | break; | |
13621 | ||
13622 | case ABI_AIX: | |
13623 | case ABI_DARWIN: | |
13624 | reg_parm_stack_space = TARGET_64BIT ? 64 : 32; | |
13625 | break; | |
13626 | ||
13627 | case ABI_ELFv2: | |
13628 | /* ??? Recomputing this every time is a bit expensive. Is there | |
13629 | a place to cache this information? */ | |
13630 | if (rs6000_function_parms_need_stack (fun, incoming)) | |
13631 | reg_parm_stack_space = TARGET_64BIT ? 64 : 32; | |
13632 | else | |
13633 | reg_parm_stack_space = 0; | |
13634 | break; | |
13635 | } | |
13636 | ||
13637 | return reg_parm_stack_space; | |
13638 | } | |
13639 | ||
13640 | static void | |
13641 | rs6000_move_block_from_reg (int regno, rtx x, int nregs) | |
13642 | { | |
13643 | int i; | |
13644 | machine_mode reg_mode = TARGET_32BIT ? SImode : DImode; | |
13645 | ||
13646 | if (nregs == 0) | |
13647 | return; | |
13648 | ||
13649 | for (i = 0; i < nregs; i++) | |
13650 | { | |
13651 | rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode)); | |
13652 | if (reload_completed) | |
13653 | { | |
13654 | if (! strict_memory_address_p (reg_mode, XEXP (tem, 0))) | |
13655 | tem = NULL_RTX; | |
13656 | else | |
13657 | tem = simplify_gen_subreg (reg_mode, x, BLKmode, | |
13658 | i * GET_MODE_SIZE (reg_mode)); | |
13659 | } | |
13660 | else | |
13661 | tem = replace_equiv_address (tem, XEXP (tem, 0)); | |
13662 | ||
13663 | gcc_assert (tem); | |
13664 | ||
13665 | emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i)); | |
13666 | } | |
13667 | } | |
13668 | \f | |
13669 | /* Perform any needed actions needed for a function that is receiving a | |
13670 | variable number of arguments. | |
13671 | ||
13672 | CUM is as above. | |
13673 | ||
13674 | MODE and TYPE are the mode and type of the current parameter. | |
13675 | ||
13676 | PRETEND_SIZE is a variable that should be set to the amount of stack | |
13677 | that must be pushed by the prolog to pretend that our caller pushed | |
13678 | it. | |
13679 | ||
13680 | Normally, this macro will push all remaining incoming registers on the | |
13681 | stack and set PRETEND_SIZE to the length of the registers pushed. */ | |
13682 | ||
13683 | static void | |
13684 | setup_incoming_varargs (cumulative_args_t cum, machine_mode mode, | |
13685 | tree type, int *pretend_size ATTRIBUTE_UNUSED, | |
13686 | int no_rtl) | |
13687 | { | |
13688 | CUMULATIVE_ARGS next_cum; | |
13689 | int reg_size = TARGET_32BIT ? 4 : 8; | |
13690 | rtx save_area = NULL_RTX, mem; | |
13691 | int first_reg_offset; | |
13692 | alias_set_type set; | |
13693 | ||
13694 | /* Skip the last named argument. */ | |
13695 | next_cum = *get_cumulative_args (cum); | |
13696 | rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0); | |
13697 | ||
13698 | if (DEFAULT_ABI == ABI_V4) | |
13699 | { | |
13700 | first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG; | |
13701 | ||
13702 | if (! no_rtl) | |
13703 | { | |
13704 | int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0; | |
13705 | HOST_WIDE_INT offset = 0; | |
13706 | ||
13707 | /* Try to optimize the size of the varargs save area. | |
13708 | The ABI requires that ap.reg_save_area is doubleword | |
13709 | aligned, but we don't need to allocate space for all | |
13710 | the bytes, only those to which we actually will save | |
13711 | anything. */ | |
13712 | if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG) | |
13713 | gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset; | |
13714 | if (TARGET_HARD_FLOAT && TARGET_FPRS | |
13715 | && next_cum.fregno <= FP_ARG_V4_MAX_REG | |
13716 | && cfun->va_list_fpr_size) | |
13717 | { | |
13718 | if (gpr_reg_num) | |
13719 | fpr_size = (next_cum.fregno - FP_ARG_MIN_REG) | |
13720 | * UNITS_PER_FP_WORD; | |
13721 | if (cfun->va_list_fpr_size | |
13722 | < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno) | |
13723 | fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD; | |
13724 | else | |
13725 | fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno) | |
13726 | * UNITS_PER_FP_WORD; | |
13727 | } | |
13728 | if (gpr_reg_num) | |
13729 | { | |
13730 | offset = -((first_reg_offset * reg_size) & ~7); | |
13731 | if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size) | |
13732 | { | |
13733 | gpr_reg_num = cfun->va_list_gpr_size; | |
13734 | if (reg_size == 4 && (first_reg_offset & 1)) | |
13735 | gpr_reg_num++; | |
13736 | } | |
13737 | gpr_size = (gpr_reg_num * reg_size + 7) & ~7; | |
13738 | } | |
13739 | else if (fpr_size) | |
13740 | offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG) | |
13741 | * UNITS_PER_FP_WORD | |
13742 | - (int) (GP_ARG_NUM_REG * reg_size); | |
13743 | ||
13744 | if (gpr_size + fpr_size) | |
13745 | { | |
13746 | rtx reg_save_area | |
13747 | = assign_stack_local (BLKmode, gpr_size + fpr_size, 64); | |
13748 | gcc_assert (GET_CODE (reg_save_area) == MEM); | |
13749 | reg_save_area = XEXP (reg_save_area, 0); | |
13750 | if (GET_CODE (reg_save_area) == PLUS) | |
13751 | { | |
13752 | gcc_assert (XEXP (reg_save_area, 0) | |
13753 | == virtual_stack_vars_rtx); | |
13754 | gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT); | |
13755 | offset += INTVAL (XEXP (reg_save_area, 1)); | |
13756 | } | |
13757 | else | |
13758 | gcc_assert (reg_save_area == virtual_stack_vars_rtx); | |
13759 | } | |
13760 | ||
13761 | cfun->machine->varargs_save_offset = offset; | |
13762 | save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset); | |
13763 | } | |
13764 | } | |
13765 | else | |
13766 | { | |
13767 | first_reg_offset = next_cum.words; | |
13768 | save_area = crtl->args.internal_arg_pointer; | |
13769 | ||
13770 | if (targetm.calls.must_pass_in_stack (mode, type)) | |
13771 | first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type); | |
13772 | } | |
13773 | ||
13774 | set = get_varargs_alias_set (); | |
13775 | if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG | |
13776 | && cfun->va_list_gpr_size) | |
13777 | { | |
13778 | int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset; | |
13779 | ||
13780 | if (va_list_gpr_counter_field) | |
13781 | /* V4 va_list_gpr_size counts number of registers needed. */ | |
13782 | n_gpr = cfun->va_list_gpr_size; | |
13783 | else | |
13784 | /* char * va_list instead counts number of bytes needed. */ | |
13785 | n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size; | |
13786 | ||
13787 | if (nregs > n_gpr) | |
13788 | nregs = n_gpr; | |
13789 | ||
13790 | mem = gen_rtx_MEM (BLKmode, | |
13791 | plus_constant (Pmode, save_area, | |
13792 | first_reg_offset * reg_size)); | |
13793 | MEM_NOTRAP_P (mem) = 1; | |
13794 | set_mem_alias_set (mem, set); | |
13795 | set_mem_align (mem, BITS_PER_WORD); | |
13796 | ||
13797 | rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem, | |
13798 | nregs); | |
13799 | } | |
13800 | ||
13801 | /* Save FP registers if needed. */ | |
13802 | if (DEFAULT_ABI == ABI_V4 | |
13803 | && TARGET_HARD_FLOAT && TARGET_FPRS | |
13804 | && ! no_rtl | |
13805 | && next_cum.fregno <= FP_ARG_V4_MAX_REG | |
13806 | && cfun->va_list_fpr_size) | |
13807 | { | |
13808 | int fregno = next_cum.fregno, nregs; | |
13809 | rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO); | |
13810 | rtx lab = gen_label_rtx (); | |
13811 | int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG) | |
13812 | * UNITS_PER_FP_WORD); | |
13813 | ||
13814 | emit_jump_insn | |
13815 | (gen_rtx_SET (pc_rtx, | |
13816 | gen_rtx_IF_THEN_ELSE (VOIDmode, | |
13817 | gen_rtx_NE (VOIDmode, cr1, | |
13818 | const0_rtx), | |
13819 | gen_rtx_LABEL_REF (VOIDmode, lab), | |
13820 | pc_rtx))); | |
13821 | ||
13822 | for (nregs = 0; | |
13823 | fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size; | |
13824 | fregno++, off += UNITS_PER_FP_WORD, nregs++) | |
13825 | { | |
13826 | mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) | |
13827 | ? DFmode : SFmode, | |
13828 | plus_constant (Pmode, save_area, off)); | |
13829 | MEM_NOTRAP_P (mem) = 1; | |
13830 | set_mem_alias_set (mem, set); | |
13831 | set_mem_align (mem, GET_MODE_ALIGNMENT ( | |
13832 | (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) | |
13833 | ? DFmode : SFmode)); | |
13834 | emit_move_insn (mem, gen_rtx_REG ( | |
13835 | (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) | |
13836 | ? DFmode : SFmode, fregno)); | |
13837 | } | |
13838 | ||
13839 | emit_label (lab); | |
13840 | } | |
13841 | } | |
13842 | ||
13843 | /* Create the va_list data type. */ | |
13844 | ||
13845 | static tree | |
13846 | rs6000_build_builtin_va_list (void) | |
13847 | { | |
13848 | tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl; | |
13849 | ||
13850 | /* For AIX, prefer 'char *' because that's what the system | |
13851 | header files like. */ | |
13852 | if (DEFAULT_ABI != ABI_V4) | |
13853 | return build_pointer_type (char_type_node); | |
13854 | ||
13855 | record = (*lang_hooks.types.make_type) (RECORD_TYPE); | |
13856 | type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL, | |
13857 | get_identifier ("__va_list_tag"), record); | |
13858 | ||
13859 | f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"), | |
13860 | unsigned_char_type_node); | |
13861 | f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"), | |
13862 | unsigned_char_type_node); | |
13863 | /* Give the two bytes of padding a name, so that -Wpadded won't warn on | |
13864 | every user file. */ | |
13865 | f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL, | |
13866 | get_identifier ("reserved"), short_unsigned_type_node); | |
13867 | f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL, | |
13868 | get_identifier ("overflow_arg_area"), | |
13869 | ptr_type_node); | |
13870 | f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL, | |
13871 | get_identifier ("reg_save_area"), | |
13872 | ptr_type_node); | |
13873 | ||
13874 | va_list_gpr_counter_field = f_gpr; | |
13875 | va_list_fpr_counter_field = f_fpr; | |
13876 | ||
13877 | DECL_FIELD_CONTEXT (f_gpr) = record; | |
13878 | DECL_FIELD_CONTEXT (f_fpr) = record; | |
13879 | DECL_FIELD_CONTEXT (f_res) = record; | |
13880 | DECL_FIELD_CONTEXT (f_ovf) = record; | |
13881 | DECL_FIELD_CONTEXT (f_sav) = record; | |
13882 | ||
13883 | TYPE_STUB_DECL (record) = type_decl; | |
13884 | TYPE_NAME (record) = type_decl; | |
13885 | TYPE_FIELDS (record) = f_gpr; | |
13886 | DECL_CHAIN (f_gpr) = f_fpr; | |
13887 | DECL_CHAIN (f_fpr) = f_res; | |
13888 | DECL_CHAIN (f_res) = f_ovf; | |
13889 | DECL_CHAIN (f_ovf) = f_sav; | |
13890 | ||
13891 | layout_type (record); | |
13892 | ||
13893 | /* The correct type is an array type of one element. */ | |
13894 | return build_array_type (record, build_index_type (size_zero_node)); | |
13895 | } | |
13896 | ||
13897 | /* Implement va_start. */ | |
13898 | ||
13899 | static void | |
13900 | rs6000_va_start (tree valist, rtx nextarg) | |
13901 | { | |
13902 | HOST_WIDE_INT words, n_gpr, n_fpr; | |
13903 | tree f_gpr, f_fpr, f_res, f_ovf, f_sav; | |
13904 | tree gpr, fpr, ovf, sav, t; | |
13905 | ||
13906 | /* Only SVR4 needs something special. */ | |
13907 | if (DEFAULT_ABI != ABI_V4) | |
13908 | { | |
13909 | std_expand_builtin_va_start (valist, nextarg); | |
13910 | return; | |
13911 | } | |
13912 | ||
13913 | f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); | |
13914 | f_fpr = DECL_CHAIN (f_gpr); | |
13915 | f_res = DECL_CHAIN (f_fpr); | |
13916 | f_ovf = DECL_CHAIN (f_res); | |
13917 | f_sav = DECL_CHAIN (f_ovf); | |
13918 | ||
13919 | valist = build_simple_mem_ref (valist); | |
13920 | gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE); | |
13921 | fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist), | |
13922 | f_fpr, NULL_TREE); | |
13923 | ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist), | |
13924 | f_ovf, NULL_TREE); | |
13925 | sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist), | |
13926 | f_sav, NULL_TREE); | |
13927 | ||
13928 | /* Count number of gp and fp argument registers used. */ | |
13929 | words = crtl->args.info.words; | |
13930 | n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG, | |
13931 | GP_ARG_NUM_REG); | |
13932 | n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG, | |
13933 | FP_ARG_NUM_REG); | |
13934 | ||
13935 | if (TARGET_DEBUG_ARG) | |
13936 | fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = " | |
13937 | HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n", | |
13938 | words, n_gpr, n_fpr); | |
13939 | ||
13940 | if (cfun->va_list_gpr_size) | |
13941 | { | |
13942 | t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, | |
13943 | build_int_cst (NULL_TREE, n_gpr)); | |
13944 | TREE_SIDE_EFFECTS (t) = 1; | |
13945 | expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); | |
13946 | } | |
13947 | ||
13948 | if (cfun->va_list_fpr_size) | |
13949 | { | |
13950 | t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, | |
13951 | build_int_cst (NULL_TREE, n_fpr)); | |
13952 | TREE_SIDE_EFFECTS (t) = 1; | |
13953 | expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); | |
13954 | ||
13955 | #ifdef HAVE_AS_GNU_ATTRIBUTE | |
13956 | if (call_ABI_of_interest (cfun->decl)) | |
13957 | rs6000_passes_float = true; | |
13958 | #endif | |
13959 | } | |
13960 | ||
13961 | /* Find the overflow area. */ | |
13962 | t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer); | |
13963 | if (words != 0) | |
13964 | t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD); | |
13965 | t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t); | |
13966 | TREE_SIDE_EFFECTS (t) = 1; | |
13967 | expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); | |
13968 | ||
13969 | /* If there were no va_arg invocations, don't set up the register | |
13970 | save area. */ | |
13971 | if (!cfun->va_list_gpr_size | |
13972 | && !cfun->va_list_fpr_size | |
13973 | && n_gpr < GP_ARG_NUM_REG | |
13974 | && n_fpr < FP_ARG_V4_MAX_REG) | |
13975 | return; | |
13976 | ||
13977 | /* Find the register save area. */ | |
13978 | t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx); | |
13979 | if (cfun->machine->varargs_save_offset) | |
13980 | t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset); | |
13981 | t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t); | |
13982 | TREE_SIDE_EFFECTS (t) = 1; | |
13983 | expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); | |
13984 | } | |
13985 | ||
13986 | /* Implement va_arg. */ | |
13987 | ||
13988 | static tree | |
13989 | rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, | |
13990 | gimple_seq *post_p) | |
13991 | { | |
13992 | tree f_gpr, f_fpr, f_res, f_ovf, f_sav; | |
13993 | tree gpr, fpr, ovf, sav, reg, t, u; | |
13994 | int size, rsize, n_reg, sav_ofs, sav_scale; | |
13995 | tree lab_false, lab_over, addr; | |
13996 | int align; | |
13997 | tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true); | |
13998 | int regalign = 0; | |
13999 | gimple *stmt; | |
14000 | ||
14001 | if (pass_by_reference (NULL, TYPE_MODE (type), type, false)) | |
14002 | { | |
14003 | t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p); | |
14004 | return build_va_arg_indirect_ref (t); | |
14005 | } | |
14006 | ||
14007 | /* We need to deal with the fact that the darwin ppc64 ABI is defined by an | |
14008 | earlier version of gcc, with the property that it always applied alignment | |
14009 | adjustments to the va-args (even for zero-sized types). The cheapest way | |
14010 | to deal with this is to replicate the effect of the part of | |
14011 | std_gimplify_va_arg_expr that carries out the align adjust, for the case | |
14012 | of relevance. | |
14013 | We don't need to check for pass-by-reference because of the test above. | |
14014 | We can return a simplifed answer, since we know there's no offset to add. */ | |
14015 | ||
14016 | if (((TARGET_MACHO | |
14017 | && rs6000_darwin64_abi) | |
14018 | || DEFAULT_ABI == ABI_ELFv2 | |
14019 | || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)) | |
14020 | && integer_zerop (TYPE_SIZE (type))) | |
14021 | { | |
14022 | unsigned HOST_WIDE_INT align, boundary; | |
14023 | tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL); | |
14024 | align = PARM_BOUNDARY / BITS_PER_UNIT; | |
14025 | boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type); | |
14026 | if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT) | |
14027 | boundary = MAX_SUPPORTED_STACK_ALIGNMENT; | |
14028 | boundary /= BITS_PER_UNIT; | |
14029 | if (boundary > align) | |
14030 | { | |
14031 | tree t ; | |
14032 | /* This updates arg ptr by the amount that would be necessary | |
14033 | to align the zero-sized (but not zero-alignment) item. */ | |
14034 | t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp, | |
14035 | fold_build_pointer_plus_hwi (valist_tmp, boundary - 1)); | |
14036 | gimplify_and_add (t, pre_p); | |
14037 | ||
14038 | t = fold_convert (sizetype, valist_tmp); | |
14039 | t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp, | |
14040 | fold_convert (TREE_TYPE (valist), | |
14041 | fold_build2 (BIT_AND_EXPR, sizetype, t, | |
14042 | size_int (-boundary)))); | |
14043 | t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t); | |
14044 | gimplify_and_add (t, pre_p); | |
14045 | } | |
14046 | /* Since it is zero-sized there's no increment for the item itself. */ | |
14047 | valist_tmp = fold_convert (build_pointer_type (type), valist_tmp); | |
14048 | return build_va_arg_indirect_ref (valist_tmp); | |
14049 | } | |
14050 | ||
14051 | if (DEFAULT_ABI != ABI_V4) | |
14052 | { | |
14053 | if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE) | |
14054 | { | |
14055 | tree elem_type = TREE_TYPE (type); | |
14056 | machine_mode elem_mode = TYPE_MODE (elem_type); | |
14057 | int elem_size = GET_MODE_SIZE (elem_mode); | |
14058 | ||
14059 | if (elem_size < UNITS_PER_WORD) | |
14060 | { | |
14061 | tree real_part, imag_part; | |
14062 | gimple_seq post = NULL; | |
14063 | ||
14064 | real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p, | |
14065 | &post); | |
14066 | /* Copy the value into a temporary, lest the formal temporary | |
14067 | be reused out from under us. */ | |
14068 | real_part = get_initialized_tmp_var (real_part, pre_p, &post); | |
14069 | gimple_seq_add_seq (pre_p, post); | |
14070 | ||
14071 | imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p, | |
14072 | post_p); | |
14073 | ||
14074 | return build2 (COMPLEX_EXPR, type, real_part, imag_part); | |
14075 | } | |
14076 | } | |
14077 | ||
14078 | return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); | |
14079 | } | |
14080 | ||
14081 | f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); | |
14082 | f_fpr = DECL_CHAIN (f_gpr); | |
14083 | f_res = DECL_CHAIN (f_fpr); | |
14084 | f_ovf = DECL_CHAIN (f_res); | |
14085 | f_sav = DECL_CHAIN (f_ovf); | |
14086 | ||
14087 | gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE); | |
14088 | fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist), | |
14089 | f_fpr, NULL_TREE); | |
14090 | ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist), | |
14091 | f_ovf, NULL_TREE); | |
14092 | sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist), | |
14093 | f_sav, NULL_TREE); | |
14094 | ||
14095 | size = int_size_in_bytes (type); | |
14096 | rsize = (size + 3) / 4; | |
14097 | int pad = 4 * rsize - size; | |
14098 | align = 1; | |
14099 | ||
14100 | machine_mode mode = TYPE_MODE (type); | |
14101 | if (abi_v4_pass_in_fpr (mode)) | |
14102 | { | |
14103 | /* FP args go in FP registers, if present. */ | |
14104 | reg = fpr; | |
14105 | n_reg = (size + 7) / 8; | |
14106 | sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4; | |
14107 | sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4); | |
14108 | if (mode != SFmode && mode != SDmode) | |
14109 | align = 8; | |
14110 | } | |
14111 | else | |
14112 | { | |
14113 | /* Otherwise into GP registers. */ | |
14114 | reg = gpr; | |
14115 | n_reg = rsize; | |
14116 | sav_ofs = 0; | |
14117 | sav_scale = 4; | |
14118 | if (n_reg == 2) | |
14119 | align = 8; | |
14120 | } | |
14121 | ||
14122 | /* Pull the value out of the saved registers.... */ | |
14123 | ||
14124 | lab_over = NULL; | |
14125 | addr = create_tmp_var (ptr_type_node, "addr"); | |
14126 | ||
14127 | /* AltiVec vectors never go in registers when -mabi=altivec. */ | |
14128 | if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode)) | |
14129 | align = 16; | |
14130 | else | |
14131 | { | |
14132 | lab_false = create_artificial_label (input_location); | |
14133 | lab_over = create_artificial_label (input_location); | |
14134 | ||
14135 | /* Long long and SPE vectors are aligned in the registers. | |
14136 | As are any other 2 gpr item such as complex int due to a | |
14137 | historical mistake. */ | |
14138 | u = reg; | |
14139 | if (n_reg == 2 && reg == gpr) | |
14140 | { | |
14141 | regalign = 1; | |
14142 | u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg), | |
14143 | build_int_cst (TREE_TYPE (reg), n_reg - 1)); | |
14144 | u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), | |
14145 | unshare_expr (reg), u); | |
14146 | } | |
14147 | /* _Decimal128 is passed in even/odd fpr pairs; the stored | |
14148 | reg number is 0 for f1, so we want to make it odd. */ | |
14149 | else if (reg == fpr && mode == TDmode) | |
14150 | { | |
14151 | t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg), | |
14152 | build_int_cst (TREE_TYPE (reg), 1)); | |
14153 | u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t); | |
14154 | } | |
14155 | ||
14156 | t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1)); | |
14157 | t = build2 (GE_EXPR, boolean_type_node, u, t); | |
14158 | u = build1 (GOTO_EXPR, void_type_node, lab_false); | |
14159 | t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE); | |
14160 | gimplify_and_add (t, pre_p); | |
14161 | ||
14162 | t = sav; | |
14163 | if (sav_ofs) | |
14164 | t = fold_build_pointer_plus_hwi (sav, sav_ofs); | |
14165 | ||
14166 | u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg), | |
14167 | build_int_cst (TREE_TYPE (reg), n_reg)); | |
14168 | u = fold_convert (sizetype, u); | |
14169 | u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale)); | |
14170 | t = fold_build_pointer_plus (t, u); | |
14171 | ||
14172 | /* _Decimal32 varargs are located in the second word of the 64-bit | |
14173 | FP register for 32-bit binaries. */ | |
14174 | if (TARGET_32BIT | |
14175 | && TARGET_HARD_FLOAT && TARGET_FPRS | |
14176 | && mode == SDmode) | |
14177 | t = fold_build_pointer_plus_hwi (t, size); | |
14178 | ||
14179 | /* Args are passed right-aligned. */ | |
14180 | if (BYTES_BIG_ENDIAN) | |
14181 | t = fold_build_pointer_plus_hwi (t, pad); | |
14182 | ||
14183 | gimplify_assign (addr, t, pre_p); | |
14184 | ||
14185 | gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over)); | |
14186 | ||
14187 | stmt = gimple_build_label (lab_false); | |
14188 | gimple_seq_add_stmt (pre_p, stmt); | |
14189 | ||
14190 | if ((n_reg == 2 && !regalign) || n_reg > 2) | |
14191 | { | |
14192 | /* Ensure that we don't find any more args in regs. | |
14193 | Alignment has taken care of for special cases. */ | |
14194 | gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p); | |
14195 | } | |
14196 | } | |
14197 | ||
14198 | /* ... otherwise out of the overflow area. */ | |
14199 | ||
14200 | /* Care for on-stack alignment if needed. */ | |
14201 | t = ovf; | |
14202 | if (align != 1) | |
14203 | { | |
14204 | t = fold_build_pointer_plus_hwi (t, align - 1); | |
14205 | t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, | |
14206 | build_int_cst (TREE_TYPE (t), -align)); | |
14207 | } | |
14208 | ||
14209 | /* Args are passed right-aligned. */ | |
14210 | if (BYTES_BIG_ENDIAN) | |
14211 | t = fold_build_pointer_plus_hwi (t, pad); | |
14212 | ||
14213 | gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue); | |
14214 | ||
14215 | gimplify_assign (unshare_expr (addr), t, pre_p); | |
14216 | ||
14217 | t = fold_build_pointer_plus_hwi (t, size); | |
14218 | gimplify_assign (unshare_expr (ovf), t, pre_p); | |
14219 | ||
14220 | if (lab_over) | |
14221 | { | |
14222 | stmt = gimple_build_label (lab_over); | |
14223 | gimple_seq_add_stmt (pre_p, stmt); | |
14224 | } | |
14225 | ||
14226 | if (STRICT_ALIGNMENT | |
14227 | && (TYPE_ALIGN (type) | |
14228 | > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align))) | |
14229 | { | |
14230 | /* The value (of type complex double, for example) may not be | |
14231 | aligned in memory in the saved registers, so copy via a | |
14232 | temporary. (This is the same code as used for SPARC.) */ | |
14233 | tree tmp = create_tmp_var (type, "va_arg_tmp"); | |
14234 | tree dest_addr = build_fold_addr_expr (tmp); | |
14235 | ||
14236 | tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY), | |
14237 | 3, dest_addr, addr, size_int (rsize * 4)); | |
14238 | ||
14239 | gimplify_and_add (copy, pre_p); | |
14240 | addr = dest_addr; | |
14241 | } | |
14242 | ||
14243 | addr = fold_convert (ptrtype, addr); | |
14244 | return build_va_arg_indirect_ref (addr); | |
14245 | } | |
14246 | ||
14247 | /* Builtins. */ | |
14248 | ||
14249 | static void | |
14250 | def_builtin (const char *name, tree type, enum rs6000_builtins code) | |
14251 | { | |
14252 | tree t; | |
14253 | unsigned classify = rs6000_builtin_info[(int)code].attr; | |
14254 | const char *attr_string = ""; | |
14255 | ||
14256 | gcc_assert (name != NULL); | |
14257 | gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT)); | |
14258 | ||
14259 | if (rs6000_builtin_decls[(int)code]) | |
14260 | fatal_error (input_location, | |
14261 | "internal error: builtin function %s already processed", name); | |
14262 | ||
14263 | rs6000_builtin_decls[(int)code] = t = | |
14264 | add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE); | |
14265 | ||
14266 | /* Set any special attributes. */ | |
14267 | if ((classify & RS6000_BTC_CONST) != 0) | |
14268 | { | |
14269 | /* const function, function only depends on the inputs. */ | |
14270 | TREE_READONLY (t) = 1; | |
14271 | TREE_NOTHROW (t) = 1; | |
14272 | attr_string = ", const"; | |
14273 | } | |
14274 | else if ((classify & RS6000_BTC_PURE) != 0) | |
14275 | { | |
14276 | /* pure function, function can read global memory, but does not set any | |
14277 | external state. */ | |
14278 | DECL_PURE_P (t) = 1; | |
14279 | TREE_NOTHROW (t) = 1; | |
14280 | attr_string = ", pure"; | |
14281 | } | |
14282 | else if ((classify & RS6000_BTC_FP) != 0) | |
14283 | { | |
14284 | /* Function is a math function. If rounding mode is on, then treat the | |
14285 | function as not reading global memory, but it can have arbitrary side | |
14286 | effects. If it is off, then assume the function is a const function. | |
14287 | This mimics the ATTR_MATHFN_FPROUNDING attribute in | |
14288 | builtin-attribute.def that is used for the math functions. */ | |
14289 | TREE_NOTHROW (t) = 1; | |
14290 | if (flag_rounding_math) | |
14291 | { | |
14292 | DECL_PURE_P (t) = 1; | |
14293 | DECL_IS_NOVOPS (t) = 1; | |
14294 | attr_string = ", fp, pure"; | |
14295 | } | |
14296 | else | |
14297 | { | |
14298 | TREE_READONLY (t) = 1; | |
14299 | attr_string = ", fp, const"; | |
14300 | } | |
14301 | } | |
14302 | else if ((classify & RS6000_BTC_ATTR_MASK) != 0) | |
14303 | gcc_unreachable (); | |
14304 | ||
14305 | if (TARGET_DEBUG_BUILTIN) | |
14306 | fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n", | |
14307 | (int)code, name, attr_string); | |
14308 | } | |
14309 | ||
14310 | /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */ | |
14311 | ||
14312 | #undef RS6000_BUILTIN_0 | |
14313 | #undef RS6000_BUILTIN_1 | |
14314 | #undef RS6000_BUILTIN_2 | |
14315 | #undef RS6000_BUILTIN_3 | |
14316 | #undef RS6000_BUILTIN_A | |
14317 | #undef RS6000_BUILTIN_D | |
14318 | #undef RS6000_BUILTIN_E | |
14319 | #undef RS6000_BUILTIN_H | |
14320 | #undef RS6000_BUILTIN_P | |
14321 | #undef RS6000_BUILTIN_Q | |
14322 | #undef RS6000_BUILTIN_S | |
14323 | #undef RS6000_BUILTIN_X | |
14324 | ||
14325 | #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) | |
14326 | #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) | |
14327 | #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) | |
14328 | #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \ | |
14329 | { MASK, ICODE, NAME, ENUM }, | |
14330 | ||
14331 | #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) | |
14332 | #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) | |
14333 | #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) | |
14334 | #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) | |
14335 | #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) | |
14336 | #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) | |
14337 | #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) | |
14338 | #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) | |
14339 | ||
14340 | static const struct builtin_description bdesc_3arg[] = | |
14341 | { | |
14342 | #include "powerpcspe-builtin.def" | |
14343 | }; | |
14344 | ||
14345 | /* DST operations: void foo (void *, const int, const char). */ | |
14346 | ||
14347 | #undef RS6000_BUILTIN_0 | |
14348 | #undef RS6000_BUILTIN_1 | |
14349 | #undef RS6000_BUILTIN_2 | |
14350 | #undef RS6000_BUILTIN_3 | |
14351 | #undef RS6000_BUILTIN_A | |
14352 | #undef RS6000_BUILTIN_D | |
14353 | #undef RS6000_BUILTIN_E | |
14354 | #undef RS6000_BUILTIN_H | |
14355 | #undef RS6000_BUILTIN_P | |
14356 | #undef RS6000_BUILTIN_Q | |
14357 | #undef RS6000_BUILTIN_S | |
14358 | #undef RS6000_BUILTIN_X | |
14359 | ||
14360 | #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) | |
14361 | #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) | |
14362 | #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) | |
14363 | #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) | |
14364 | #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) | |
14365 | #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \ | |
14366 | { MASK, ICODE, NAME, ENUM }, | |
14367 | ||
14368 | #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) | |
14369 | #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) | |
14370 | #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) | |
14371 | #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) | |
14372 | #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) | |
14373 | #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) | |
14374 | ||
14375 | static const struct builtin_description bdesc_dst[] = | |
14376 | { | |
14377 | #include "powerpcspe-builtin.def" | |
14378 | }; | |
14379 | ||
14380 | /* Simple binary operations: VECc = foo (VECa, VECb). */ | |
14381 | ||
14382 | #undef RS6000_BUILTIN_0 | |
14383 | #undef RS6000_BUILTIN_1 | |
14384 | #undef RS6000_BUILTIN_2 | |
14385 | #undef RS6000_BUILTIN_3 | |
14386 | #undef RS6000_BUILTIN_A | |
14387 | #undef RS6000_BUILTIN_D | |
14388 | #undef RS6000_BUILTIN_E | |
14389 | #undef RS6000_BUILTIN_H | |
14390 | #undef RS6000_BUILTIN_P | |
14391 | #undef RS6000_BUILTIN_Q | |
14392 | #undef RS6000_BUILTIN_S | |
14393 | #undef RS6000_BUILTIN_X | |
14394 | ||
14395 | #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) | |
14396 | #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) | |
14397 | #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \ | |
14398 | { MASK, ICODE, NAME, ENUM }, | |
14399 | ||
14400 | #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) | |
14401 | #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) | |
14402 | #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) | |
14403 | #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) | |
14404 | #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) | |
14405 | #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) | |
14406 | #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) | |
14407 | #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) | |
14408 | #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) | |
14409 | ||
14410 | static const struct builtin_description bdesc_2arg[] = | |
14411 | { | |
14412 | #include "powerpcspe-builtin.def" | |
14413 | }; | |
14414 | ||
14415 | #undef RS6000_BUILTIN_0 | |
14416 | #undef RS6000_BUILTIN_1 | |
14417 | #undef RS6000_BUILTIN_2 | |
14418 | #undef RS6000_BUILTIN_3 | |
14419 | #undef RS6000_BUILTIN_A | |
14420 | #undef RS6000_BUILTIN_D | |
14421 | #undef RS6000_BUILTIN_E | |
14422 | #undef RS6000_BUILTIN_H | |
14423 | #undef RS6000_BUILTIN_P | |
14424 | #undef RS6000_BUILTIN_Q | |
14425 | #undef RS6000_BUILTIN_S | |
14426 | #undef RS6000_BUILTIN_X | |
14427 | ||
14428 | #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) | |
14429 | #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) | |
14430 | #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) | |
14431 | #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) | |
14432 | #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) | |
14433 | #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) | |
14434 | #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) | |
14435 | #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) | |
14436 | #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \ | |
14437 | { MASK, ICODE, NAME, ENUM }, | |
14438 | ||
14439 | #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) | |
14440 | #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) | |
14441 | #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) | |
14442 | ||
14443 | /* AltiVec predicates. */ | |
14444 | ||
14445 | static const struct builtin_description bdesc_altivec_preds[] = | |
14446 | { | |
14447 | #include "powerpcspe-builtin.def" | |
14448 | }; | |
14449 | ||
14450 | /* SPE predicates. */ | |
14451 | #undef RS6000_BUILTIN_0 | |
14452 | #undef RS6000_BUILTIN_1 | |
14453 | #undef RS6000_BUILTIN_2 | |
14454 | #undef RS6000_BUILTIN_3 | |
14455 | #undef RS6000_BUILTIN_A | |
14456 | #undef RS6000_BUILTIN_D | |
14457 | #undef RS6000_BUILTIN_E | |
14458 | #undef RS6000_BUILTIN_H | |
14459 | #undef RS6000_BUILTIN_P | |
14460 | #undef RS6000_BUILTIN_Q | |
14461 | #undef RS6000_BUILTIN_S | |
14462 | #undef RS6000_BUILTIN_X | |
14463 | ||
14464 | #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) | |
14465 | #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) | |
14466 | #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) | |
14467 | #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) | |
14468 | #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) | |
14469 | #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) | |
14470 | #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) | |
14471 | #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) | |
14472 | #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) | |
14473 | #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) | |
14474 | #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \ | |
14475 | { MASK, ICODE, NAME, ENUM }, | |
14476 | ||
14477 | #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) | |
14478 | ||
14479 | static const struct builtin_description bdesc_spe_predicates[] = | |
14480 | { | |
14481 | #include "powerpcspe-builtin.def" | |
14482 | }; | |
14483 | ||
14484 | /* SPE evsel predicates. */ | |
14485 | #undef RS6000_BUILTIN_0 | |
14486 | #undef RS6000_BUILTIN_1 | |
14487 | #undef RS6000_BUILTIN_2 | |
14488 | #undef RS6000_BUILTIN_3 | |
14489 | #undef RS6000_BUILTIN_A | |
14490 | #undef RS6000_BUILTIN_D | |
14491 | #undef RS6000_BUILTIN_E | |
14492 | #undef RS6000_BUILTIN_H | |
14493 | #undef RS6000_BUILTIN_P | |
14494 | #undef RS6000_BUILTIN_Q | |
14495 | #undef RS6000_BUILTIN_S | |
14496 | #undef RS6000_BUILTIN_X | |
14497 | ||
14498 | #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) | |
14499 | #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) | |
14500 | #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) | |
14501 | #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) | |
14502 | #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) | |
14503 | #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) | |
14504 | #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \ | |
14505 | { MASK, ICODE, NAME, ENUM }, | |
14506 | ||
14507 | #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) | |
14508 | #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) | |
14509 | #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) | |
14510 | #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) | |
14511 | #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) | |
14512 | ||
14513 | static const struct builtin_description bdesc_spe_evsel[] = | |
14514 | { | |
14515 | #include "powerpcspe-builtin.def" | |
14516 | }; | |
14517 | ||
14518 | /* PAIRED predicates. */ | |
14519 | #undef RS6000_BUILTIN_0 | |
14520 | #undef RS6000_BUILTIN_1 | |
14521 | #undef RS6000_BUILTIN_2 | |
14522 | #undef RS6000_BUILTIN_3 | |
14523 | #undef RS6000_BUILTIN_A | |
14524 | #undef RS6000_BUILTIN_D | |
14525 | #undef RS6000_BUILTIN_E | |
14526 | #undef RS6000_BUILTIN_H | |
14527 | #undef RS6000_BUILTIN_P | |
14528 | #undef RS6000_BUILTIN_Q | |
14529 | #undef RS6000_BUILTIN_S | |
14530 | #undef RS6000_BUILTIN_X | |
14531 | ||
14532 | #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) | |
14533 | #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) | |
14534 | #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) | |
14535 | #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) | |
14536 | #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) | |
14537 | #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) | |
14538 | #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) | |
14539 | #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) | |
14540 | #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) | |
14541 | #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \ | |
14542 | { MASK, ICODE, NAME, ENUM }, | |
14543 | ||
14544 | #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) | |
14545 | #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) | |
14546 | ||
14547 | static const struct builtin_description bdesc_paired_preds[] = | |
14548 | { | |
14549 | #include "powerpcspe-builtin.def" | |
14550 | }; | |
14551 | ||
14552 | /* ABS* operations. */ | |
14553 | ||
14554 | #undef RS6000_BUILTIN_0 | |
14555 | #undef RS6000_BUILTIN_1 | |
14556 | #undef RS6000_BUILTIN_2 | |
14557 | #undef RS6000_BUILTIN_3 | |
14558 | #undef RS6000_BUILTIN_A | |
14559 | #undef RS6000_BUILTIN_D | |
14560 | #undef RS6000_BUILTIN_E | |
14561 | #undef RS6000_BUILTIN_H | |
14562 | #undef RS6000_BUILTIN_P | |
14563 | #undef RS6000_BUILTIN_Q | |
14564 | #undef RS6000_BUILTIN_S | |
14565 | #undef RS6000_BUILTIN_X | |
14566 | ||
14567 | #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) | |
14568 | #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) | |
14569 | #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) | |
14570 | #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) | |
14571 | #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \ | |
14572 | { MASK, ICODE, NAME, ENUM }, | |
14573 | ||
14574 | #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) | |
14575 | #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) | |
14576 | #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) | |
14577 | #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) | |
14578 | #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) | |
14579 | #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) | |
14580 | #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) | |
14581 | ||
14582 | static const struct builtin_description bdesc_abs[] = | |
14583 | { | |
14584 | #include "powerpcspe-builtin.def" | |
14585 | }; | |
14586 | ||
14587 | /* Simple unary operations: VECb = foo (unsigned literal) or VECb = | |
14588 | foo (VECa). */ | |
14589 | ||
14590 | #undef RS6000_BUILTIN_0 | |
14591 | #undef RS6000_BUILTIN_1 | |
14592 | #undef RS6000_BUILTIN_2 | |
14593 | #undef RS6000_BUILTIN_3 | |
14594 | #undef RS6000_BUILTIN_A | |
14595 | #undef RS6000_BUILTIN_D | |
14596 | #undef RS6000_BUILTIN_E | |
14597 | #undef RS6000_BUILTIN_H | |
14598 | #undef RS6000_BUILTIN_P | |
14599 | #undef RS6000_BUILTIN_Q | |
14600 | #undef RS6000_BUILTIN_S | |
14601 | #undef RS6000_BUILTIN_X | |
14602 | ||
14603 | #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) | |
14604 | #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \ | |
14605 | { MASK, ICODE, NAME, ENUM }, | |
14606 | ||
14607 | #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) | |
14608 | #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) | |
14609 | #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) | |
14610 | #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) | |
14611 | #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) | |
14612 | #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) | |
14613 | #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) | |
14614 | #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) | |
14615 | #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) | |
14616 | #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) | |
14617 | ||
14618 | static const struct builtin_description bdesc_1arg[] = | |
14619 | { | |
14620 | #include "powerpcspe-builtin.def" | |
14621 | }; | |
14622 | ||
14623 | /* Simple no-argument operations: result = __builtin_darn_32 () */ | |
14624 | ||
14625 | #undef RS6000_BUILTIN_0 | |
14626 | #undef RS6000_BUILTIN_1 | |
14627 | #undef RS6000_BUILTIN_2 | |
14628 | #undef RS6000_BUILTIN_3 | |
14629 | #undef RS6000_BUILTIN_A | |
14630 | #undef RS6000_BUILTIN_D | |
14631 | #undef RS6000_BUILTIN_E | |
14632 | #undef RS6000_BUILTIN_H | |
14633 | #undef RS6000_BUILTIN_P | |
14634 | #undef RS6000_BUILTIN_Q | |
14635 | #undef RS6000_BUILTIN_S | |
14636 | #undef RS6000_BUILTIN_X | |
14637 | ||
14638 | #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \ | |
14639 | { MASK, ICODE, NAME, ENUM }, | |
14640 | ||
14641 | #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) | |
14642 | #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) | |
14643 | #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) | |
14644 | #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) | |
14645 | #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) | |
14646 | #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) | |
14647 | #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) | |
14648 | #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) | |
14649 | #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) | |
14650 | #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) | |
14651 | #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) | |
14652 | ||
14653 | static const struct builtin_description bdesc_0arg[] = | |
14654 | { | |
14655 | #include "powerpcspe-builtin.def" | |
14656 | }; | |
14657 | ||
14658 | /* HTM builtins. */ | |
14659 | #undef RS6000_BUILTIN_0 | |
14660 | #undef RS6000_BUILTIN_1 | |
14661 | #undef RS6000_BUILTIN_2 | |
14662 | #undef RS6000_BUILTIN_3 | |
14663 | #undef RS6000_BUILTIN_A | |
14664 | #undef RS6000_BUILTIN_D | |
14665 | #undef RS6000_BUILTIN_E | |
14666 | #undef RS6000_BUILTIN_H | |
14667 | #undef RS6000_BUILTIN_P | |
14668 | #undef RS6000_BUILTIN_Q | |
14669 | #undef RS6000_BUILTIN_S | |
14670 | #undef RS6000_BUILTIN_X | |
14671 | ||
14672 | #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) | |
14673 | #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) | |
14674 | #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) | |
14675 | #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) | |
14676 | #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) | |
14677 | #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) | |
14678 | #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) | |
14679 | #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \ | |
14680 | { MASK, ICODE, NAME, ENUM }, | |
14681 | ||
14682 | #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) | |
14683 | #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) | |
14684 | #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) | |
14685 | #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) | |
14686 | ||
14687 | static const struct builtin_description bdesc_htm[] = | |
14688 | { | |
14689 | #include "powerpcspe-builtin.def" | |
14690 | }; | |
14691 | ||
14692 | #undef RS6000_BUILTIN_0 | |
14693 | #undef RS6000_BUILTIN_1 | |
14694 | #undef RS6000_BUILTIN_2 | |
14695 | #undef RS6000_BUILTIN_3 | |
14696 | #undef RS6000_BUILTIN_A | |
14697 | #undef RS6000_BUILTIN_D | |
14698 | #undef RS6000_BUILTIN_E | |
14699 | #undef RS6000_BUILTIN_H | |
14700 | #undef RS6000_BUILTIN_P | |
14701 | #undef RS6000_BUILTIN_Q | |
14702 | #undef RS6000_BUILTIN_S | |
14703 | ||
14704 | /* Return true if a builtin function is overloaded. */ | |
14705 | bool | |
14706 | rs6000_overloaded_builtin_p (enum rs6000_builtins fncode) | |
14707 | { | |
14708 | return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0; | |
14709 | } | |
14710 | ||
14711 | const char * | |
14712 | rs6000_overloaded_builtin_name (enum rs6000_builtins fncode) | |
14713 | { | |
14714 | return rs6000_builtin_info[(int)fncode].name; | |
14715 | } | |
14716 | ||
14717 | /* Expand an expression EXP that calls a builtin without arguments. */ | |
14718 | static rtx | |
14719 | rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target) | |
14720 | { | |
14721 | rtx pat; | |
14722 | machine_mode tmode = insn_data[icode].operand[0].mode; | |
14723 | ||
14724 | if (icode == CODE_FOR_nothing) | |
14725 | /* Builtin not supported on this processor. */ | |
14726 | return 0; | |
14727 | ||
14728 | if (target == 0 | |
14729 | || GET_MODE (target) != tmode | |
14730 | || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) | |
14731 | target = gen_reg_rtx (tmode); | |
14732 | ||
14733 | pat = GEN_FCN (icode) (target); | |
14734 | if (! pat) | |
14735 | return 0; | |
14736 | emit_insn (pat); | |
14737 | ||
14738 | return target; | |
14739 | } | |
14740 | ||
14741 | ||
14742 | static rtx | |
14743 | rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp) | |
14744 | { | |
14745 | rtx pat; | |
14746 | tree arg0 = CALL_EXPR_ARG (exp, 0); | |
14747 | tree arg1 = CALL_EXPR_ARG (exp, 1); | |
14748 | rtx op0 = expand_normal (arg0); | |
14749 | rtx op1 = expand_normal (arg1); | |
14750 | machine_mode mode0 = insn_data[icode].operand[0].mode; | |
14751 | machine_mode mode1 = insn_data[icode].operand[1].mode; | |
14752 | ||
14753 | if (icode == CODE_FOR_nothing) | |
14754 | /* Builtin not supported on this processor. */ | |
14755 | return 0; | |
14756 | ||
14757 | /* If we got invalid arguments bail out before generating bad rtl. */ | |
14758 | if (arg0 == error_mark_node || arg1 == error_mark_node) | |
14759 | return const0_rtx; | |
14760 | ||
14761 | if (GET_CODE (op0) != CONST_INT | |
14762 | || INTVAL (op0) > 255 | |
14763 | || INTVAL (op0) < 0) | |
14764 | { | |
14765 | error ("argument 1 must be an 8-bit field value"); | |
14766 | return const0_rtx; | |
14767 | } | |
14768 | ||
14769 | if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) | |
14770 | op0 = copy_to_mode_reg (mode0, op0); | |
14771 | ||
14772 | if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) | |
14773 | op1 = copy_to_mode_reg (mode1, op1); | |
14774 | ||
14775 | pat = GEN_FCN (icode) (op0, op1); | |
14776 | if (! pat) | |
14777 | return const0_rtx; | |
14778 | emit_insn (pat); | |
14779 | ||
14780 | return NULL_RTX; | |
14781 | } | |
14782 | ||
14783 | static rtx | |
14784 | rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target) | |
14785 | { | |
14786 | rtx pat; | |
14787 | tree arg0 = CALL_EXPR_ARG (exp, 0); | |
14788 | rtx op0 = expand_normal (arg0); | |
14789 | machine_mode tmode = insn_data[icode].operand[0].mode; | |
14790 | machine_mode mode0 = insn_data[icode].operand[1].mode; | |
14791 | ||
14792 | if (icode == CODE_FOR_nothing) | |
14793 | /* Builtin not supported on this processor. */ | |
14794 | return 0; | |
14795 | ||
14796 | /* If we got invalid arguments bail out before generating bad rtl. */ | |
14797 | if (arg0 == error_mark_node) | |
14798 | return const0_rtx; | |
14799 | ||
14800 | if (icode == CODE_FOR_altivec_vspltisb | |
14801 | || icode == CODE_FOR_altivec_vspltish | |
14802 | || icode == CODE_FOR_altivec_vspltisw | |
14803 | || icode == CODE_FOR_spe_evsplatfi | |
14804 | || icode == CODE_FOR_spe_evsplati) | |
14805 | { | |
14806 | /* Only allow 5-bit *signed* literals. */ | |
14807 | if (GET_CODE (op0) != CONST_INT | |
14808 | || INTVAL (op0) > 15 | |
14809 | || INTVAL (op0) < -16) | |
14810 | { | |
14811 | error ("argument 1 must be a 5-bit signed literal"); | |
14812 | return CONST0_RTX (tmode); | |
14813 | } | |
14814 | } | |
14815 | ||
14816 | if (target == 0 | |
14817 | || GET_MODE (target) != tmode | |
14818 | || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) | |
14819 | target = gen_reg_rtx (tmode); | |
14820 | ||
14821 | if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) | |
14822 | op0 = copy_to_mode_reg (mode0, op0); | |
14823 | ||
14824 | pat = GEN_FCN (icode) (target, op0); | |
14825 | if (! pat) | |
14826 | return 0; | |
14827 | emit_insn (pat); | |
14828 | ||
14829 | return target; | |
14830 | } | |
14831 | ||
14832 | static rtx | |
14833 | altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target) | |
14834 | { | |
14835 | rtx pat, scratch1, scratch2; | |
14836 | tree arg0 = CALL_EXPR_ARG (exp, 0); | |
14837 | rtx op0 = expand_normal (arg0); | |
14838 | machine_mode tmode = insn_data[icode].operand[0].mode; | |
14839 | machine_mode mode0 = insn_data[icode].operand[1].mode; | |
14840 | ||
14841 | /* If we have invalid arguments, bail out before generating bad rtl. */ | |
14842 | if (arg0 == error_mark_node) | |
14843 | return const0_rtx; | |
14844 | ||
14845 | if (target == 0 | |
14846 | || GET_MODE (target) != tmode | |
14847 | || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) | |
14848 | target = gen_reg_rtx (tmode); | |
14849 | ||
14850 | if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) | |
14851 | op0 = copy_to_mode_reg (mode0, op0); | |
14852 | ||
14853 | scratch1 = gen_reg_rtx (mode0); | |
14854 | scratch2 = gen_reg_rtx (mode0); | |
14855 | ||
14856 | pat = GEN_FCN (icode) (target, op0, scratch1, scratch2); | |
14857 | if (! pat) | |
14858 | return 0; | |
14859 | emit_insn (pat); | |
14860 | ||
14861 | return target; | |
14862 | } | |
14863 | ||
14864 | static rtx | |
14865 | rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target) | |
14866 | { | |
14867 | rtx pat; | |
14868 | tree arg0 = CALL_EXPR_ARG (exp, 0); | |
14869 | tree arg1 = CALL_EXPR_ARG (exp, 1); | |
14870 | rtx op0 = expand_normal (arg0); | |
14871 | rtx op1 = expand_normal (arg1); | |
14872 | machine_mode tmode = insn_data[icode].operand[0].mode; | |
14873 | machine_mode mode0 = insn_data[icode].operand[1].mode; | |
14874 | machine_mode mode1 = insn_data[icode].operand[2].mode; | |
14875 | ||
14876 | if (icode == CODE_FOR_nothing) | |
14877 | /* Builtin not supported on this processor. */ | |
14878 | return 0; | |
14879 | ||
14880 | /* If we got invalid arguments bail out before generating bad rtl. */ | |
14881 | if (arg0 == error_mark_node || arg1 == error_mark_node) | |
14882 | return const0_rtx; | |
14883 | ||
14884 | if (icode == CODE_FOR_altivec_vcfux | |
14885 | || icode == CODE_FOR_altivec_vcfsx | |
14886 | || icode == CODE_FOR_altivec_vctsxs | |
14887 | || icode == CODE_FOR_altivec_vctuxs | |
14888 | || icode == CODE_FOR_altivec_vspltb | |
14889 | || icode == CODE_FOR_altivec_vsplth | |
14890 | || icode == CODE_FOR_altivec_vspltw | |
14891 | || icode == CODE_FOR_spe_evaddiw | |
14892 | || icode == CODE_FOR_spe_evldd | |
14893 | || icode == CODE_FOR_spe_evldh | |
14894 | || icode == CODE_FOR_spe_evldw | |
14895 | || icode == CODE_FOR_spe_evlhhesplat | |
14896 | || icode == CODE_FOR_spe_evlhhossplat | |
14897 | || icode == CODE_FOR_spe_evlhhousplat | |
14898 | || icode == CODE_FOR_spe_evlwhe | |
14899 | || icode == CODE_FOR_spe_evlwhos | |
14900 | || icode == CODE_FOR_spe_evlwhou | |
14901 | || icode == CODE_FOR_spe_evlwhsplat | |
14902 | || icode == CODE_FOR_spe_evlwwsplat | |
14903 | || icode == CODE_FOR_spe_evrlwi | |
14904 | || icode == CODE_FOR_spe_evslwi | |
14905 | || icode == CODE_FOR_spe_evsrwis | |
14906 | || icode == CODE_FOR_spe_evsubifw | |
14907 | || icode == CODE_FOR_spe_evsrwiu) | |
14908 | { | |
14909 | /* Only allow 5-bit unsigned literals. */ | |
14910 | STRIP_NOPS (arg1); | |
14911 | if (TREE_CODE (arg1) != INTEGER_CST | |
14912 | || TREE_INT_CST_LOW (arg1) & ~0x1f) | |
14913 | { | |
14914 | error ("argument 2 must be a 5-bit unsigned literal"); | |
14915 | return CONST0_RTX (tmode); | |
14916 | } | |
14917 | } | |
14918 | else if (icode == CODE_FOR_dfptstsfi_eq_dd | |
14919 | || icode == CODE_FOR_dfptstsfi_lt_dd | |
14920 | || icode == CODE_FOR_dfptstsfi_gt_dd | |
14921 | || icode == CODE_FOR_dfptstsfi_unordered_dd | |
14922 | || icode == CODE_FOR_dfptstsfi_eq_td | |
14923 | || icode == CODE_FOR_dfptstsfi_lt_td | |
14924 | || icode == CODE_FOR_dfptstsfi_gt_td | |
14925 | || icode == CODE_FOR_dfptstsfi_unordered_td) | |
14926 | { | |
14927 | /* Only allow 6-bit unsigned literals. */ | |
14928 | STRIP_NOPS (arg0); | |
14929 | if (TREE_CODE (arg0) != INTEGER_CST | |
14930 | || !IN_RANGE (TREE_INT_CST_LOW (arg0), 0, 63)) | |
14931 | { | |
14932 | error ("argument 1 must be a 6-bit unsigned literal"); | |
14933 | return CONST0_RTX (tmode); | |
14934 | } | |
14935 | } | |
14936 | else if (icode == CODE_FOR_xststdcdp | |
14937 | || icode == CODE_FOR_xststdcsp | |
14938 | || icode == CODE_FOR_xvtstdcdp | |
14939 | || icode == CODE_FOR_xvtstdcsp) | |
14940 | { | |
14941 | /* Only allow 7-bit unsigned literals. */ | |
14942 | STRIP_NOPS (arg1); | |
14943 | if (TREE_CODE (arg1) != INTEGER_CST | |
14944 | || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 127)) | |
14945 | { | |
14946 | error ("argument 2 must be a 7-bit unsigned literal"); | |
14947 | return CONST0_RTX (tmode); | |
14948 | } | |
14949 | } | |
14950 | ||
14951 | if (target == 0 | |
14952 | || GET_MODE (target) != tmode | |
14953 | || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) | |
14954 | target = gen_reg_rtx (tmode); | |
14955 | ||
14956 | if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) | |
14957 | op0 = copy_to_mode_reg (mode0, op0); | |
14958 | if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) | |
14959 | op1 = copy_to_mode_reg (mode1, op1); | |
14960 | ||
14961 | pat = GEN_FCN (icode) (target, op0, op1); | |
14962 | if (! pat) | |
14963 | return 0; | |
14964 | emit_insn (pat); | |
14965 | ||
14966 | return target; | |
14967 | } | |
14968 | ||
14969 | static rtx | |
14970 | altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target) | |
14971 | { | |
14972 | rtx pat, scratch; | |
14973 | tree cr6_form = CALL_EXPR_ARG (exp, 0); | |
14974 | tree arg0 = CALL_EXPR_ARG (exp, 1); | |
14975 | tree arg1 = CALL_EXPR_ARG (exp, 2); | |
14976 | rtx op0 = expand_normal (arg0); | |
14977 | rtx op1 = expand_normal (arg1); | |
14978 | machine_mode tmode = SImode; | |
14979 | machine_mode mode0 = insn_data[icode].operand[1].mode; | |
14980 | machine_mode mode1 = insn_data[icode].operand[2].mode; | |
14981 | int cr6_form_int; | |
14982 | ||
14983 | if (TREE_CODE (cr6_form) != INTEGER_CST) | |
14984 | { | |
14985 | error ("argument 1 of __builtin_altivec_predicate must be a constant"); | |
14986 | return const0_rtx; | |
14987 | } | |
14988 | else | |
14989 | cr6_form_int = TREE_INT_CST_LOW (cr6_form); | |
14990 | ||
14991 | gcc_assert (mode0 == mode1); | |
14992 | ||
14993 | /* If we have invalid arguments, bail out before generating bad rtl. */ | |
14994 | if (arg0 == error_mark_node || arg1 == error_mark_node) | |
14995 | return const0_rtx; | |
14996 | ||
14997 | if (target == 0 | |
14998 | || GET_MODE (target) != tmode | |
14999 | || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) | |
15000 | target = gen_reg_rtx (tmode); | |
15001 | ||
15002 | if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) | |
15003 | op0 = copy_to_mode_reg (mode0, op0); | |
15004 | if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) | |
15005 | op1 = copy_to_mode_reg (mode1, op1); | |
15006 | ||
15007 | /* Note that for many of the relevant operations (e.g. cmpne or | |
15008 | cmpeq) with float or double operands, it makes more sense for the | |
15009 | mode of the allocated scratch register to select a vector of | |
15010 | integer. But the choice to copy the mode of operand 0 was made | |
15011 | long ago and there are no plans to change it. */ | |
15012 | scratch = gen_reg_rtx (mode0); | |
15013 | ||
15014 | pat = GEN_FCN (icode) (scratch, op0, op1); | |
15015 | if (! pat) | |
15016 | return 0; | |
15017 | emit_insn (pat); | |
15018 | ||
15019 | /* The vec_any* and vec_all* predicates use the same opcodes for two | |
15020 | different operations, but the bits in CR6 will be different | |
15021 | depending on what information we want. So we have to play tricks | |
15022 | with CR6 to get the right bits out. | |
15023 | ||
15024 | If you think this is disgusting, look at the specs for the | |
15025 | AltiVec predicates. */ | |
15026 | ||
15027 | switch (cr6_form_int) | |
15028 | { | |
15029 | case 0: | |
15030 | emit_insn (gen_cr6_test_for_zero (target)); | |
15031 | break; | |
15032 | case 1: | |
15033 | emit_insn (gen_cr6_test_for_zero_reverse (target)); | |
15034 | break; | |
15035 | case 2: | |
15036 | emit_insn (gen_cr6_test_for_lt (target)); | |
15037 | break; | |
15038 | case 3: | |
15039 | emit_insn (gen_cr6_test_for_lt_reverse (target)); | |
15040 | break; | |
15041 | default: | |
15042 | error ("argument 1 of __builtin_altivec_predicate is out of range"); | |
15043 | break; | |
15044 | } | |
15045 | ||
15046 | return target; | |
15047 | } | |
15048 | ||
15049 | static rtx | |
15050 | paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target) | |
15051 | { | |
15052 | rtx pat, addr; | |
15053 | tree arg0 = CALL_EXPR_ARG (exp, 0); | |
15054 | tree arg1 = CALL_EXPR_ARG (exp, 1); | |
15055 | machine_mode tmode = insn_data[icode].operand[0].mode; | |
15056 | machine_mode mode0 = Pmode; | |
15057 | machine_mode mode1 = Pmode; | |
15058 | rtx op0 = expand_normal (arg0); | |
15059 | rtx op1 = expand_normal (arg1); | |
15060 | ||
15061 | if (icode == CODE_FOR_nothing) | |
15062 | /* Builtin not supported on this processor. */ | |
15063 | return 0; | |
15064 | ||
15065 | /* If we got invalid arguments bail out before generating bad rtl. */ | |
15066 | if (arg0 == error_mark_node || arg1 == error_mark_node) | |
15067 | return const0_rtx; | |
15068 | ||
15069 | if (target == 0 | |
15070 | || GET_MODE (target) != tmode | |
15071 | || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) | |
15072 | target = gen_reg_rtx (tmode); | |
15073 | ||
15074 | op1 = copy_to_mode_reg (mode1, op1); | |
15075 | ||
15076 | if (op0 == const0_rtx) | |
15077 | { | |
15078 | addr = gen_rtx_MEM (tmode, op1); | |
15079 | } | |
15080 | else | |
15081 | { | |
15082 | op0 = copy_to_mode_reg (mode0, op0); | |
15083 | addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1)); | |
15084 | } | |
15085 | ||
15086 | pat = GEN_FCN (icode) (target, addr); | |
15087 | ||
15088 | if (! pat) | |
15089 | return 0; | |
15090 | emit_insn (pat); | |
15091 | ||
15092 | return target; | |
15093 | } | |
15094 | ||
15095 | /* Return a constant vector for use as a little-endian permute control vector | |
15096 | to reverse the order of elements of the given vector mode. */ | |
15097 | static rtx | |
15098 | swap_selector_for_mode (machine_mode mode) | |
15099 | { | |
15100 | /* These are little endian vectors, so their elements are reversed | |
15101 | from what you would normally expect for a permute control vector. */ | |
15102 | unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8}; | |
15103 | unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12}; | |
15104 | unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14}; | |
15105 | unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; | |
15106 | unsigned int *swaparray, i; | |
15107 | rtx perm[16]; | |
15108 | ||
15109 | switch (mode) | |
15110 | { | |
916ace94 | 15111 | case E_V2DFmode: |
15112 | case E_V2DImode: | |
01e91138 | 15113 | swaparray = swap2; |
15114 | break; | |
916ace94 | 15115 | case E_V4SFmode: |
15116 | case E_V4SImode: | |
01e91138 | 15117 | swaparray = swap4; |
15118 | break; | |
916ace94 | 15119 | case E_V8HImode: |
01e91138 | 15120 | swaparray = swap8; |
15121 | break; | |
916ace94 | 15122 | case E_V16QImode: |
01e91138 | 15123 | swaparray = swap16; |
15124 | break; | |
15125 | default: | |
15126 | gcc_unreachable (); | |
15127 | } | |
15128 | ||
15129 | for (i = 0; i < 16; ++i) | |
15130 | perm[i] = GEN_INT (swaparray[i]); | |
15131 | ||
15132 | return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm))); | |
15133 | } | |
15134 | ||
15135 | /* Generate code for an "lvxl", or "lve*x" built-in for a little endian target | |
15136 | with -maltivec=be specified. Issue the load followed by an element- | |
15137 | reversing permute. */ | |
15138 | void | |
15139 | altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec) | |
15140 | { | |
15141 | rtx tmp = gen_reg_rtx (mode); | |
15142 | rtx load = gen_rtx_SET (tmp, op1); | |
15143 | rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec); | |
15144 | rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx)); | |
15145 | rtx sel = swap_selector_for_mode (mode); | |
15146 | rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM); | |
15147 | ||
15148 | gcc_assert (REG_P (op0)); | |
15149 | emit_insn (par); | |
15150 | emit_insn (gen_rtx_SET (op0, vperm)); | |
15151 | } | |
15152 | ||
15153 | /* Generate code for a "stvxl" built-in for a little endian target with | |
15154 | -maltivec=be specified. Issue the store preceded by an element-reversing | |
15155 | permute. */ | |
15156 | void | |
15157 | altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec) | |
15158 | { | |
15159 | rtx tmp = gen_reg_rtx (mode); | |
15160 | rtx store = gen_rtx_SET (op0, tmp); | |
15161 | rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec); | |
15162 | rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx)); | |
15163 | rtx sel = swap_selector_for_mode (mode); | |
15164 | rtx vperm; | |
15165 | ||
15166 | gcc_assert (REG_P (op1)); | |
15167 | vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM); | |
15168 | emit_insn (gen_rtx_SET (tmp, vperm)); | |
15169 | emit_insn (par); | |
15170 | } | |
15171 | ||
15172 | /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be | |
15173 | specified. Issue the store preceded by an element-reversing permute. */ | |
15174 | void | |
15175 | altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec) | |
15176 | { | |
15177 | machine_mode inner_mode = GET_MODE_INNER (mode); | |
15178 | rtx tmp = gen_reg_rtx (mode); | |
15179 | rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec); | |
15180 | rtx sel = swap_selector_for_mode (mode); | |
15181 | rtx vperm; | |
15182 | ||
15183 | gcc_assert (REG_P (op1)); | |
15184 | vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM); | |
15185 | emit_insn (gen_rtx_SET (tmp, vperm)); | |
15186 | emit_insn (gen_rtx_SET (op0, stvx)); | |
15187 | } | |
15188 | ||
15189 | static rtx | |
15190 | altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk) | |
15191 | { | |
15192 | rtx pat, addr; | |
15193 | tree arg0 = CALL_EXPR_ARG (exp, 0); | |
15194 | tree arg1 = CALL_EXPR_ARG (exp, 1); | |
15195 | machine_mode tmode = insn_data[icode].operand[0].mode; | |
15196 | machine_mode mode0 = Pmode; | |
15197 | machine_mode mode1 = Pmode; | |
15198 | rtx op0 = expand_normal (arg0); | |
15199 | rtx op1 = expand_normal (arg1); | |
15200 | ||
15201 | if (icode == CODE_FOR_nothing) | |
15202 | /* Builtin not supported on this processor. */ | |
15203 | return 0; | |
15204 | ||
15205 | /* If we got invalid arguments bail out before generating bad rtl. */ | |
15206 | if (arg0 == error_mark_node || arg1 == error_mark_node) | |
15207 | return const0_rtx; | |
15208 | ||
15209 | if (target == 0 | |
15210 | || GET_MODE (target) != tmode | |
15211 | || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) | |
15212 | target = gen_reg_rtx (tmode); | |
15213 | ||
15214 | op1 = copy_to_mode_reg (mode1, op1); | |
15215 | ||
15216 | /* For LVX, express the RTL accurately by ANDing the address with -16. | |
15217 | LVXL and LVE*X expand to use UNSPECs to hide their special behavior, | |
15218 | so the raw address is fine. */ | |
15219 | if (icode == CODE_FOR_altivec_lvx_v2df_2op | |
15220 | || icode == CODE_FOR_altivec_lvx_v2di_2op | |
15221 | || icode == CODE_FOR_altivec_lvx_v4sf_2op | |
15222 | || icode == CODE_FOR_altivec_lvx_v4si_2op | |
15223 | || icode == CODE_FOR_altivec_lvx_v8hi_2op | |
15224 | || icode == CODE_FOR_altivec_lvx_v16qi_2op) | |
15225 | { | |
15226 | rtx rawaddr; | |
15227 | if (op0 == const0_rtx) | |
15228 | rawaddr = op1; | |
15229 | else | |
15230 | { | |
15231 | op0 = copy_to_mode_reg (mode0, op0); | |
15232 | rawaddr = gen_rtx_PLUS (Pmode, op1, op0); | |
15233 | } | |
15234 | addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16)); | |
15235 | addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr); | |
15236 | ||
15237 | /* For -maltivec=be, emit the load and follow it up with a | |
15238 | permute to swap the elements. */ | |
15239 | if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) | |
15240 | { | |
15241 | rtx temp = gen_reg_rtx (tmode); | |
15242 | emit_insn (gen_rtx_SET (temp, addr)); | |
15243 | ||
15244 | rtx sel = swap_selector_for_mode (tmode); | |
15245 | rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, temp, temp, sel), | |
15246 | UNSPEC_VPERM); | |
15247 | emit_insn (gen_rtx_SET (target, vperm)); | |
15248 | } | |
15249 | else | |
15250 | emit_insn (gen_rtx_SET (target, addr)); | |
15251 | } | |
15252 | else | |
15253 | { | |
15254 | if (op0 == const0_rtx) | |
15255 | addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1); | |
15256 | else | |
15257 | { | |
15258 | op0 = copy_to_mode_reg (mode0, op0); | |
15259 | addr = gen_rtx_MEM (blk ? BLKmode : tmode, | |
15260 | gen_rtx_PLUS (Pmode, op1, op0)); | |
15261 | } | |
15262 | ||
15263 | pat = GEN_FCN (icode) (target, addr); | |
15264 | if (! pat) | |
15265 | return 0; | |
15266 | emit_insn (pat); | |
15267 | } | |
15268 | ||
15269 | return target; | |
15270 | } | |
15271 | ||
15272 | static rtx | |
15273 | spe_expand_stv_builtin (enum insn_code icode, tree exp) | |
15274 | { | |
15275 | tree arg0 = CALL_EXPR_ARG (exp, 0); | |
15276 | tree arg1 = CALL_EXPR_ARG (exp, 1); | |
15277 | tree arg2 = CALL_EXPR_ARG (exp, 2); | |
15278 | rtx op0 = expand_normal (arg0); | |
15279 | rtx op1 = expand_normal (arg1); | |
15280 | rtx op2 = expand_normal (arg2); | |
15281 | rtx pat; | |
15282 | machine_mode mode0 = insn_data[icode].operand[0].mode; | |
15283 | machine_mode mode1 = insn_data[icode].operand[1].mode; | |
15284 | machine_mode mode2 = insn_data[icode].operand[2].mode; | |
15285 | ||
15286 | /* Invalid arguments. Bail before doing anything stoopid! */ | |
15287 | if (arg0 == error_mark_node | |
15288 | || arg1 == error_mark_node | |
15289 | || arg2 == error_mark_node) | |
15290 | return const0_rtx; | |
15291 | ||
15292 | if (! (*insn_data[icode].operand[2].predicate) (op0, mode2)) | |
15293 | op0 = copy_to_mode_reg (mode2, op0); | |
15294 | if (! (*insn_data[icode].operand[0].predicate) (op1, mode0)) | |
15295 | op1 = copy_to_mode_reg (mode0, op1); | |
15296 | if (! (*insn_data[icode].operand[1].predicate) (op2, mode1)) | |
15297 | op2 = copy_to_mode_reg (mode1, op2); | |
15298 | ||
15299 | pat = GEN_FCN (icode) (op1, op2, op0); | |
15300 | if (pat) | |
15301 | emit_insn (pat); | |
15302 | return NULL_RTX; | |
15303 | } | |
15304 | ||
15305 | static rtx | |
15306 | paired_expand_stv_builtin (enum insn_code icode, tree exp) | |
15307 | { | |
15308 | tree arg0 = CALL_EXPR_ARG (exp, 0); | |
15309 | tree arg1 = CALL_EXPR_ARG (exp, 1); | |
15310 | tree arg2 = CALL_EXPR_ARG (exp, 2); | |
15311 | rtx op0 = expand_normal (arg0); | |
15312 | rtx op1 = expand_normal (arg1); | |
15313 | rtx op2 = expand_normal (arg2); | |
15314 | rtx pat, addr; | |
15315 | machine_mode tmode = insn_data[icode].operand[0].mode; | |
15316 | machine_mode mode1 = Pmode; | |
15317 | machine_mode mode2 = Pmode; | |
15318 | ||
15319 | /* Invalid arguments. Bail before doing anything stoopid! */ | |
15320 | if (arg0 == error_mark_node | |
15321 | || arg1 == error_mark_node | |
15322 | || arg2 == error_mark_node) | |
15323 | return const0_rtx; | |
15324 | ||
15325 | if (! (*insn_data[icode].operand[1].predicate) (op0, tmode)) | |
15326 | op0 = copy_to_mode_reg (tmode, op0); | |
15327 | ||
15328 | op2 = copy_to_mode_reg (mode2, op2); | |
15329 | ||
15330 | if (op1 == const0_rtx) | |
15331 | { | |
15332 | addr = gen_rtx_MEM (tmode, op2); | |
15333 | } | |
15334 | else | |
15335 | { | |
15336 | op1 = copy_to_mode_reg (mode1, op1); | |
15337 | addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2)); | |
15338 | } | |
15339 | ||
15340 | pat = GEN_FCN (icode) (addr, op0); | |
15341 | if (pat) | |
15342 | emit_insn (pat); | |
15343 | return NULL_RTX; | |
15344 | } | |
15345 | ||
15346 | static rtx | |
15347 | altivec_expand_stxvl_builtin (enum insn_code icode, tree exp) | |
15348 | { | |
15349 | rtx pat; | |
15350 | tree arg0 = CALL_EXPR_ARG (exp, 0); | |
15351 | tree arg1 = CALL_EXPR_ARG (exp, 1); | |
15352 | tree arg2 = CALL_EXPR_ARG (exp, 2); | |
15353 | rtx op0 = expand_normal (arg0); | |
15354 | rtx op1 = expand_normal (arg1); | |
15355 | rtx op2 = expand_normal (arg2); | |
15356 | machine_mode mode0 = insn_data[icode].operand[0].mode; | |
15357 | machine_mode mode1 = insn_data[icode].operand[1].mode; | |
15358 | machine_mode mode2 = insn_data[icode].operand[2].mode; | |
15359 | ||
15360 | if (icode == CODE_FOR_nothing) | |
15361 | /* Builtin not supported on this processor. */ | |
15362 | return NULL_RTX; | |
15363 | ||
15364 | /* If we got invalid arguments bail out before generating bad rtl. */ | |
15365 | if (arg0 == error_mark_node | |
15366 | || arg1 == error_mark_node | |
15367 | || arg2 == error_mark_node) | |
15368 | return NULL_RTX; | |
15369 | ||
15370 | if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) | |
15371 | op0 = copy_to_mode_reg (mode0, op0); | |
15372 | if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) | |
15373 | op1 = copy_to_mode_reg (mode1, op1); | |
15374 | if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) | |
15375 | op2 = copy_to_mode_reg (mode2, op2); | |
15376 | ||
15377 | pat = GEN_FCN (icode) (op0, op1, op2); | |
15378 | if (pat) | |
15379 | emit_insn (pat); | |
15380 | ||
15381 | return NULL_RTX; | |
15382 | } | |
15383 | ||
15384 | static rtx | |
15385 | altivec_expand_stv_builtin (enum insn_code icode, tree exp) | |
15386 | { | |
15387 | tree arg0 = CALL_EXPR_ARG (exp, 0); | |
15388 | tree arg1 = CALL_EXPR_ARG (exp, 1); | |
15389 | tree arg2 = CALL_EXPR_ARG (exp, 2); | |
15390 | rtx op0 = expand_normal (arg0); | |
15391 | rtx op1 = expand_normal (arg1); | |
15392 | rtx op2 = expand_normal (arg2); | |
15393 | rtx pat, addr, rawaddr; | |
15394 | machine_mode tmode = insn_data[icode].operand[0].mode; | |
15395 | machine_mode smode = insn_data[icode].operand[1].mode; | |
15396 | machine_mode mode1 = Pmode; | |
15397 | machine_mode mode2 = Pmode; | |
15398 | ||
15399 | /* Invalid arguments. Bail before doing anything stoopid! */ | |
15400 | if (arg0 == error_mark_node | |
15401 | || arg1 == error_mark_node | |
15402 | || arg2 == error_mark_node) | |
15403 | return const0_rtx; | |
15404 | ||
15405 | op2 = copy_to_mode_reg (mode2, op2); | |
15406 | ||
15407 | /* For STVX, express the RTL accurately by ANDing the address with -16. | |
15408 | STVXL and STVE*X expand to use UNSPECs to hide their special behavior, | |
15409 | so the raw address is fine. */ | |
15410 | if (icode == CODE_FOR_altivec_stvx_v2df_2op | |
15411 | || icode == CODE_FOR_altivec_stvx_v2di_2op | |
15412 | || icode == CODE_FOR_altivec_stvx_v4sf_2op | |
15413 | || icode == CODE_FOR_altivec_stvx_v4si_2op | |
15414 | || icode == CODE_FOR_altivec_stvx_v8hi_2op | |
15415 | || icode == CODE_FOR_altivec_stvx_v16qi_2op) | |
15416 | { | |
15417 | if (op1 == const0_rtx) | |
15418 | rawaddr = op2; | |
15419 | else | |
15420 | { | |
15421 | op1 = copy_to_mode_reg (mode1, op1); | |
15422 | rawaddr = gen_rtx_PLUS (Pmode, op2, op1); | |
15423 | } | |
15424 | ||
15425 | addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16)); | |
15426 | addr = gen_rtx_MEM (tmode, addr); | |
15427 | ||
15428 | op0 = copy_to_mode_reg (tmode, op0); | |
15429 | ||
15430 | /* For -maltivec=be, emit a permute to swap the elements, followed | |
15431 | by the store. */ | |
15432 | if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) | |
15433 | { | |
15434 | rtx temp = gen_reg_rtx (tmode); | |
15435 | rtx sel = swap_selector_for_mode (tmode); | |
15436 | rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, op0, op0, sel), | |
15437 | UNSPEC_VPERM); | |
15438 | emit_insn (gen_rtx_SET (temp, vperm)); | |
15439 | emit_insn (gen_rtx_SET (addr, temp)); | |
15440 | } | |
15441 | else | |
15442 | emit_insn (gen_rtx_SET (addr, op0)); | |
15443 | } | |
15444 | else | |
15445 | { | |
15446 | if (! (*insn_data[icode].operand[1].predicate) (op0, smode)) | |
15447 | op0 = copy_to_mode_reg (smode, op0); | |
15448 | ||
15449 | if (op1 == const0_rtx) | |
15450 | addr = gen_rtx_MEM (tmode, op2); | |
15451 | else | |
15452 | { | |
15453 | op1 = copy_to_mode_reg (mode1, op1); | |
15454 | addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1)); | |
15455 | } | |
15456 | ||
15457 | pat = GEN_FCN (icode) (addr, op0); | |
15458 | if (pat) | |
15459 | emit_insn (pat); | |
15460 | } | |
15461 | ||
15462 | return NULL_RTX; | |
15463 | } | |
15464 | ||
15465 | /* Return the appropriate SPR number associated with the given builtin. */ | |
15466 | static inline HOST_WIDE_INT | |
15467 | htm_spr_num (enum rs6000_builtins code) | |
15468 | { | |
15469 | if (code == HTM_BUILTIN_GET_TFHAR | |
15470 | || code == HTM_BUILTIN_SET_TFHAR) | |
15471 | return TFHAR_SPR; | |
15472 | else if (code == HTM_BUILTIN_GET_TFIAR | |
15473 | || code == HTM_BUILTIN_SET_TFIAR) | |
15474 | return TFIAR_SPR; | |
15475 | else if (code == HTM_BUILTIN_GET_TEXASR | |
15476 | || code == HTM_BUILTIN_SET_TEXASR) | |
15477 | return TEXASR_SPR; | |
15478 | gcc_assert (code == HTM_BUILTIN_GET_TEXASRU | |
15479 | || code == HTM_BUILTIN_SET_TEXASRU); | |
15480 | return TEXASRU_SPR; | |
15481 | } | |
15482 | ||
15483 | /* Return the appropriate SPR regno associated with the given builtin. */ | |
15484 | static inline HOST_WIDE_INT | |
15485 | htm_spr_regno (enum rs6000_builtins code) | |
15486 | { | |
15487 | if (code == HTM_BUILTIN_GET_TFHAR | |
15488 | || code == HTM_BUILTIN_SET_TFHAR) | |
15489 | return TFHAR_REGNO; | |
15490 | else if (code == HTM_BUILTIN_GET_TFIAR | |
15491 | || code == HTM_BUILTIN_SET_TFIAR) | |
15492 | return TFIAR_REGNO; | |
15493 | gcc_assert (code == HTM_BUILTIN_GET_TEXASR | |
15494 | || code == HTM_BUILTIN_SET_TEXASR | |
15495 | || code == HTM_BUILTIN_GET_TEXASRU | |
15496 | || code == HTM_BUILTIN_SET_TEXASRU); | |
15497 | return TEXASR_REGNO; | |
15498 | } | |
15499 | ||
15500 | /* Return the correct ICODE value depending on whether we are | |
15501 | setting or reading the HTM SPRs. */ | |
15502 | static inline enum insn_code | |
15503 | rs6000_htm_spr_icode (bool nonvoid) | |
15504 | { | |
15505 | if (nonvoid) | |
15506 | return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si; | |
15507 | else | |
15508 | return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si; | |
15509 | } | |
15510 | ||
15511 | /* Expand the HTM builtin in EXP and store the result in TARGET. | |
15512 | Store true in *EXPANDEDP if we found a builtin to expand. */ | |
15513 | static rtx | |
15514 | htm_expand_builtin (tree exp, rtx target, bool * expandedp) | |
15515 | { | |
15516 | tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); | |
15517 | bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node; | |
15518 | enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl); | |
15519 | const struct builtin_description *d; | |
15520 | size_t i; | |
15521 | ||
15522 | *expandedp = true; | |
15523 | ||
15524 | if (!TARGET_POWERPC64 | |
15525 | && (fcode == HTM_BUILTIN_TABORTDC | |
15526 | || fcode == HTM_BUILTIN_TABORTDCI)) | |
15527 | { | |
15528 | size_t uns_fcode = (size_t)fcode; | |
15529 | const char *name = rs6000_builtin_info[uns_fcode].name; | |
15530 | error ("builtin %s is only valid in 64-bit mode", name); | |
15531 | return const0_rtx; | |
15532 | } | |
15533 | ||
15534 | /* Expand the HTM builtins. */ | |
15535 | d = bdesc_htm; | |
15536 | for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++) | |
15537 | if (d->code == fcode) | |
15538 | { | |
15539 | rtx op[MAX_HTM_OPERANDS], pat; | |
15540 | int nopnds = 0; | |
15541 | tree arg; | |
15542 | call_expr_arg_iterator iter; | |
15543 | unsigned attr = rs6000_builtin_info[fcode].attr; | |
15544 | enum insn_code icode = d->icode; | |
15545 | const struct insn_operand_data *insn_op; | |
15546 | bool uses_spr = (attr & RS6000_BTC_SPR); | |
15547 | rtx cr = NULL_RTX; | |
15548 | ||
15549 | if (uses_spr) | |
15550 | icode = rs6000_htm_spr_icode (nonvoid); | |
15551 | insn_op = &insn_data[icode].operand[0]; | |
15552 | ||
15553 | if (nonvoid) | |
15554 | { | |
1e0295b9 | 15555 | machine_mode tmode = (uses_spr) ? insn_op->mode : E_SImode; |
01e91138 | 15556 | if (!target |
15557 | || GET_MODE (target) != tmode | |
15558 | || (uses_spr && !(*insn_op->predicate) (target, tmode))) | |
15559 | target = gen_reg_rtx (tmode); | |
15560 | if (uses_spr) | |
15561 | op[nopnds++] = target; | |
15562 | } | |
15563 | ||
15564 | FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) | |
15565 | { | |
15566 | if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS) | |
15567 | return const0_rtx; | |
15568 | ||
15569 | insn_op = &insn_data[icode].operand[nopnds]; | |
15570 | ||
15571 | op[nopnds] = expand_normal (arg); | |
15572 | ||
15573 | if (!(*insn_op->predicate) (op[nopnds], insn_op->mode)) | |
15574 | { | |
15575 | if (!strcmp (insn_op->constraint, "n")) | |
15576 | { | |
15577 | int arg_num = (nonvoid) ? nopnds : nopnds + 1; | |
15578 | if (!CONST_INT_P (op[nopnds])) | |
15579 | error ("argument %d must be an unsigned literal", arg_num); | |
15580 | else | |
15581 | error ("argument %d is an unsigned literal that is " | |
15582 | "out of range", arg_num); | |
15583 | return const0_rtx; | |
15584 | } | |
15585 | op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]); | |
15586 | } | |
15587 | ||
15588 | nopnds++; | |
15589 | } | |
15590 | ||
15591 | /* Handle the builtins for extended mnemonics. These accept | |
15592 | no arguments, but map to builtins that take arguments. */ | |
15593 | switch (fcode) | |
15594 | { | |
15595 | case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */ | |
15596 | case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */ | |
15597 | op[nopnds++] = GEN_INT (1); | |
15598 | if (flag_checking) | |
15599 | attr |= RS6000_BTC_UNARY; | |
15600 | break; | |
15601 | case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */ | |
15602 | op[nopnds++] = GEN_INT (0); | |
15603 | if (flag_checking) | |
15604 | attr |= RS6000_BTC_UNARY; | |
15605 | break; | |
15606 | default: | |
15607 | break; | |
15608 | } | |
15609 | ||
15610 | /* If this builtin accesses SPRs, then pass in the appropriate | |
15611 | SPR number and SPR regno as the last two operands. */ | |
15612 | if (uses_spr) | |
15613 | { | |
15614 | machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode; | |
15615 | op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode)); | |
15616 | op[nopnds++] = gen_rtx_REG (mode, htm_spr_regno (fcode)); | |
15617 | } | |
15618 | /* If this builtin accesses a CR, then pass in a scratch | |
15619 | CR as the last operand. */ | |
15620 | else if (attr & RS6000_BTC_CR) | |
15621 | { cr = gen_reg_rtx (CCmode); | |
15622 | op[nopnds++] = cr; | |
15623 | } | |
15624 | ||
15625 | if (flag_checking) | |
15626 | { | |
15627 | int expected_nopnds = 0; | |
15628 | if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY) | |
15629 | expected_nopnds = 1; | |
15630 | else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY) | |
15631 | expected_nopnds = 2; | |
15632 | else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY) | |
15633 | expected_nopnds = 3; | |
15634 | if (!(attr & RS6000_BTC_VOID)) | |
15635 | expected_nopnds += 1; | |
15636 | if (uses_spr) | |
15637 | expected_nopnds += 2; | |
15638 | ||
15639 | gcc_assert (nopnds == expected_nopnds | |
15640 | && nopnds <= MAX_HTM_OPERANDS); | |
15641 | } | |
15642 | ||
15643 | switch (nopnds) | |
15644 | { | |
15645 | case 1: | |
15646 | pat = GEN_FCN (icode) (op[0]); | |
15647 | break; | |
15648 | case 2: | |
15649 | pat = GEN_FCN (icode) (op[0], op[1]); | |
15650 | break; | |
15651 | case 3: | |
15652 | pat = GEN_FCN (icode) (op[0], op[1], op[2]); | |
15653 | break; | |
15654 | case 4: | |
15655 | pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]); | |
15656 | break; | |
15657 | default: | |
15658 | gcc_unreachable (); | |
15659 | } | |
15660 | if (!pat) | |
15661 | return NULL_RTX; | |
15662 | emit_insn (pat); | |
15663 | ||
15664 | if (attr & RS6000_BTC_CR) | |
15665 | { | |
15666 | if (fcode == HTM_BUILTIN_TBEGIN) | |
15667 | { | |
15668 | /* Emit code to set TARGET to true or false depending on | |
15669 | whether the tbegin. instruction successfully or failed | |
15670 | to start a transaction. We do this by placing the 1's | |
15671 | complement of CR's EQ bit into TARGET. */ | |
15672 | rtx scratch = gen_reg_rtx (SImode); | |
15673 | emit_insn (gen_rtx_SET (scratch, | |
15674 | gen_rtx_EQ (SImode, cr, | |
15675 | const0_rtx))); | |
15676 | emit_insn (gen_rtx_SET (target, | |
15677 | gen_rtx_XOR (SImode, scratch, | |
15678 | GEN_INT (1)))); | |
15679 | } | |
15680 | else | |
15681 | { | |
15682 | /* Emit code to copy the 4-bit condition register field | |
15683 | CR into the least significant end of register TARGET. */ | |
15684 | rtx scratch1 = gen_reg_rtx (SImode); | |
15685 | rtx scratch2 = gen_reg_rtx (SImode); | |
15686 | rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0); | |
15687 | emit_insn (gen_movcc (subreg, cr)); | |
15688 | emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28))); | |
15689 | emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf))); | |
15690 | } | |
15691 | } | |
15692 | ||
15693 | if (nonvoid) | |
15694 | return target; | |
15695 | return const0_rtx; | |
15696 | } | |
15697 | ||
15698 | *expandedp = false; | |
15699 | return NULL_RTX; | |
15700 | } | |
15701 | ||
15702 | /* Expand the CPU builtin in FCODE and store the result in TARGET. */ | |
15703 | ||
15704 | static rtx | |
15705 | cpu_expand_builtin (enum rs6000_builtins fcode, tree exp ATTRIBUTE_UNUSED, | |
15706 | rtx target) | |
15707 | { | |
15708 | /* __builtin_cpu_init () is a nop, so expand to nothing. */ | |
15709 | if (fcode == RS6000_BUILTIN_CPU_INIT) | |
15710 | return const0_rtx; | |
15711 | ||
15712 | if (target == 0 || GET_MODE (target) != SImode) | |
15713 | target = gen_reg_rtx (SImode); | |
15714 | ||
15715 | #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB | |
15716 | tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0); | |
15717 | if (TREE_CODE (arg) != STRING_CST) | |
15718 | { | |
15719 | error ("builtin %s only accepts a string argument", | |
15720 | rs6000_builtin_info[(size_t) fcode].name); | |
15721 | return const0_rtx; | |
15722 | } | |
15723 | ||
15724 | if (fcode == RS6000_BUILTIN_CPU_IS) | |
15725 | { | |
15726 | const char *cpu = TREE_STRING_POINTER (arg); | |
15727 | rtx cpuid = NULL_RTX; | |
15728 | for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++) | |
15729 | if (strcmp (cpu, cpu_is_info[i].cpu) == 0) | |
15730 | { | |
15731 | /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */ | |
15732 | cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM); | |
15733 | break; | |
15734 | } | |
15735 | if (cpuid == NULL_RTX) | |
15736 | { | |
15737 | /* Invalid CPU argument. */ | |
15738 | error ("cpu %s is an invalid argument to builtin %s", | |
15739 | cpu, rs6000_builtin_info[(size_t) fcode].name); | |
15740 | return const0_rtx; | |
15741 | } | |
15742 | ||
15743 | rtx platform = gen_reg_rtx (SImode); | |
15744 | rtx tcbmem = gen_const_mem (SImode, | |
15745 | gen_rtx_PLUS (Pmode, | |
15746 | gen_rtx_REG (Pmode, TLS_REGNUM), | |
15747 | GEN_INT (TCB_PLATFORM_OFFSET))); | |
15748 | emit_move_insn (platform, tcbmem); | |
15749 | emit_insn (gen_eqsi3 (target, platform, cpuid)); | |
15750 | } | |
15751 | else if (fcode == RS6000_BUILTIN_CPU_SUPPORTS) | |
15752 | { | |
15753 | const char *hwcap = TREE_STRING_POINTER (arg); | |
15754 | rtx mask = NULL_RTX; | |
15755 | int hwcap_offset; | |
15756 | for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++) | |
15757 | if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0) | |
15758 | { | |
15759 | mask = GEN_INT (cpu_supports_info[i].mask); | |
15760 | hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id); | |
15761 | break; | |
15762 | } | |
15763 | if (mask == NULL_RTX) | |
15764 | { | |
15765 | /* Invalid HWCAP argument. */ | |
15766 | error ("hwcap %s is an invalid argument to builtin %s", | |
15767 | hwcap, rs6000_builtin_info[(size_t) fcode].name); | |
15768 | return const0_rtx; | |
15769 | } | |
15770 | ||
15771 | rtx tcb_hwcap = gen_reg_rtx (SImode); | |
15772 | rtx tcbmem = gen_const_mem (SImode, | |
15773 | gen_rtx_PLUS (Pmode, | |
15774 | gen_rtx_REG (Pmode, TLS_REGNUM), | |
15775 | GEN_INT (hwcap_offset))); | |
15776 | emit_move_insn (tcb_hwcap, tcbmem); | |
15777 | rtx scratch1 = gen_reg_rtx (SImode); | |
15778 | emit_insn (gen_rtx_SET (scratch1, gen_rtx_AND (SImode, tcb_hwcap, mask))); | |
15779 | rtx scratch2 = gen_reg_rtx (SImode); | |
15780 | emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx)); | |
15781 | emit_insn (gen_rtx_SET (target, gen_rtx_XOR (SImode, scratch2, const1_rtx))); | |
15782 | } | |
15783 | ||
15784 | /* Record that we have expanded a CPU builtin, so that we can later | |
15785 | emit a reference to the special symbol exported by LIBC to ensure we | |
15786 | do not link against an old LIBC that doesn't support this feature. */ | |
15787 | cpu_builtin_p = true; | |
15788 | ||
15789 | #else | |
15790 | /* For old LIBCs, always return FALSE. */ | |
15791 | emit_move_insn (target, GEN_INT (0)); | |
15792 | #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */ | |
15793 | ||
15794 | return target; | |
15795 | } | |
15796 | ||
15797 | static rtx | |
15798 | rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target) | |
15799 | { | |
15800 | rtx pat; | |
15801 | tree arg0 = CALL_EXPR_ARG (exp, 0); | |
15802 | tree arg1 = CALL_EXPR_ARG (exp, 1); | |
15803 | tree arg2 = CALL_EXPR_ARG (exp, 2); | |
15804 | rtx op0 = expand_normal (arg0); | |
15805 | rtx op1 = expand_normal (arg1); | |
15806 | rtx op2 = expand_normal (arg2); | |
15807 | machine_mode tmode = insn_data[icode].operand[0].mode; | |
15808 | machine_mode mode0 = insn_data[icode].operand[1].mode; | |
15809 | machine_mode mode1 = insn_data[icode].operand[2].mode; | |
15810 | machine_mode mode2 = insn_data[icode].operand[3].mode; | |
15811 | ||
15812 | if (icode == CODE_FOR_nothing) | |
15813 | /* Builtin not supported on this processor. */ | |
15814 | return 0; | |
15815 | ||
15816 | /* If we got invalid arguments bail out before generating bad rtl. */ | |
15817 | if (arg0 == error_mark_node | |
15818 | || arg1 == error_mark_node | |
15819 | || arg2 == error_mark_node) | |
15820 | return const0_rtx; | |
15821 | ||
15822 | /* Check and prepare argument depending on the instruction code. | |
15823 | ||
15824 | Note that a switch statement instead of the sequence of tests | |
15825 | would be incorrect as many of the CODE_FOR values could be | |
15826 | CODE_FOR_nothing and that would yield multiple alternatives | |
15827 | with identical values. We'd never reach here at runtime in | |
15828 | this case. */ | |
15829 | if (icode == CODE_FOR_altivec_vsldoi_v4sf | |
15830 | || icode == CODE_FOR_altivec_vsldoi_v2df | |
15831 | || icode == CODE_FOR_altivec_vsldoi_v4si | |
15832 | || icode == CODE_FOR_altivec_vsldoi_v8hi | |
15833 | || icode == CODE_FOR_altivec_vsldoi_v16qi) | |
15834 | { | |
15835 | /* Only allow 4-bit unsigned literals. */ | |
15836 | STRIP_NOPS (arg2); | |
15837 | if (TREE_CODE (arg2) != INTEGER_CST | |
15838 | || TREE_INT_CST_LOW (arg2) & ~0xf) | |
15839 | { | |
15840 | error ("argument 3 must be a 4-bit unsigned literal"); | |
15841 | return CONST0_RTX (tmode); | |
15842 | } | |
15843 | } | |
15844 | else if (icode == CODE_FOR_vsx_xxpermdi_v2df | |
15845 | || icode == CODE_FOR_vsx_xxpermdi_v2di | |
15846 | || icode == CODE_FOR_vsx_xxpermdi_v2df_be | |
15847 | || icode == CODE_FOR_vsx_xxpermdi_v2di_be | |
15848 | || icode == CODE_FOR_vsx_xxpermdi_v1ti | |
15849 | || icode == CODE_FOR_vsx_xxpermdi_v4sf | |
15850 | || icode == CODE_FOR_vsx_xxpermdi_v4si | |
15851 | || icode == CODE_FOR_vsx_xxpermdi_v8hi | |
15852 | || icode == CODE_FOR_vsx_xxpermdi_v16qi | |
15853 | || icode == CODE_FOR_vsx_xxsldwi_v16qi | |
15854 | || icode == CODE_FOR_vsx_xxsldwi_v8hi | |
15855 | || icode == CODE_FOR_vsx_xxsldwi_v4si | |
15856 | || icode == CODE_FOR_vsx_xxsldwi_v4sf | |
15857 | || icode == CODE_FOR_vsx_xxsldwi_v2di | |
15858 | || icode == CODE_FOR_vsx_xxsldwi_v2df) | |
15859 | { | |
15860 | /* Only allow 2-bit unsigned literals. */ | |
15861 | STRIP_NOPS (arg2); | |
15862 | if (TREE_CODE (arg2) != INTEGER_CST | |
15863 | || TREE_INT_CST_LOW (arg2) & ~0x3) | |
15864 | { | |
15865 | error ("argument 3 must be a 2-bit unsigned literal"); | |
15866 | return CONST0_RTX (tmode); | |
15867 | } | |
15868 | } | |
15869 | else if (icode == CODE_FOR_vsx_set_v2df | |
15870 | || icode == CODE_FOR_vsx_set_v2di | |
15871 | || icode == CODE_FOR_bcdadd | |
15872 | || icode == CODE_FOR_bcdadd_lt | |
15873 | || icode == CODE_FOR_bcdadd_eq | |
15874 | || icode == CODE_FOR_bcdadd_gt | |
15875 | || icode == CODE_FOR_bcdsub | |
15876 | || icode == CODE_FOR_bcdsub_lt | |
15877 | || icode == CODE_FOR_bcdsub_eq | |
15878 | || icode == CODE_FOR_bcdsub_gt) | |
15879 | { | |
15880 | /* Only allow 1-bit unsigned literals. */ | |
15881 | STRIP_NOPS (arg2); | |
15882 | if (TREE_CODE (arg2) != INTEGER_CST | |
15883 | || TREE_INT_CST_LOW (arg2) & ~0x1) | |
15884 | { | |
15885 | error ("argument 3 must be a 1-bit unsigned literal"); | |
15886 | return CONST0_RTX (tmode); | |
15887 | } | |
15888 | } | |
15889 | else if (icode == CODE_FOR_dfp_ddedpd_dd | |
15890 | || icode == CODE_FOR_dfp_ddedpd_td) | |
15891 | { | |
15892 | /* Only allow 2-bit unsigned literals where the value is 0 or 2. */ | |
15893 | STRIP_NOPS (arg0); | |
15894 | if (TREE_CODE (arg0) != INTEGER_CST | |
15895 | || TREE_INT_CST_LOW (arg2) & ~0x3) | |
15896 | { | |
15897 | error ("argument 1 must be 0 or 2"); | |
15898 | return CONST0_RTX (tmode); | |
15899 | } | |
15900 | } | |
15901 | else if (icode == CODE_FOR_dfp_denbcd_dd | |
15902 | || icode == CODE_FOR_dfp_denbcd_td) | |
15903 | { | |
15904 | /* Only allow 1-bit unsigned literals. */ | |
15905 | STRIP_NOPS (arg0); | |
15906 | if (TREE_CODE (arg0) != INTEGER_CST | |
15907 | || TREE_INT_CST_LOW (arg0) & ~0x1) | |
15908 | { | |
15909 | error ("argument 1 must be a 1-bit unsigned literal"); | |
15910 | return CONST0_RTX (tmode); | |
15911 | } | |
15912 | } | |
15913 | else if (icode == CODE_FOR_dfp_dscli_dd | |
15914 | || icode == CODE_FOR_dfp_dscli_td | |
15915 | || icode == CODE_FOR_dfp_dscri_dd | |
15916 | || icode == CODE_FOR_dfp_dscri_td) | |
15917 | { | |
15918 | /* Only allow 6-bit unsigned literals. */ | |
15919 | STRIP_NOPS (arg1); | |
15920 | if (TREE_CODE (arg1) != INTEGER_CST | |
15921 | || TREE_INT_CST_LOW (arg1) & ~0x3f) | |
15922 | { | |
15923 | error ("argument 2 must be a 6-bit unsigned literal"); | |
15924 | return CONST0_RTX (tmode); | |
15925 | } | |
15926 | } | |
15927 | else if (icode == CODE_FOR_crypto_vshasigmaw | |
15928 | || icode == CODE_FOR_crypto_vshasigmad) | |
15929 | { | |
15930 | /* Check whether the 2nd and 3rd arguments are integer constants and in | |
15931 | range and prepare arguments. */ | |
15932 | STRIP_NOPS (arg1); | |
15933 | if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (arg1, 2)) | |
15934 | { | |
15935 | error ("argument 2 must be 0 or 1"); | |
15936 | return CONST0_RTX (tmode); | |
15937 | } | |
15938 | ||
15939 | STRIP_NOPS (arg2); | |
15940 | if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg2, 16)) | |
15941 | { | |
15942 | error ("argument 3 must be in the range 0..15"); | |
15943 | return CONST0_RTX (tmode); | |
15944 | } | |
15945 | } | |
15946 | ||
15947 | if (target == 0 | |
15948 | || GET_MODE (target) != tmode | |
15949 | || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) | |
15950 | target = gen_reg_rtx (tmode); | |
15951 | ||
15952 | if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) | |
15953 | op0 = copy_to_mode_reg (mode0, op0); | |
15954 | if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) | |
15955 | op1 = copy_to_mode_reg (mode1, op1); | |
15956 | if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) | |
15957 | op2 = copy_to_mode_reg (mode2, op2); | |
15958 | ||
15959 | if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4) | |
15960 | pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode)); | |
15961 | else | |
15962 | pat = GEN_FCN (icode) (target, op0, op1, op2); | |
15963 | if (! pat) | |
15964 | return 0; | |
15965 | emit_insn (pat); | |
15966 | ||
15967 | return target; | |
15968 | } | |
15969 | ||
15970 | /* Expand the lvx builtins. */ | |
15971 | static rtx | |
15972 | altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp) | |
15973 | { | |
15974 | tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); | |
15975 | unsigned int fcode = DECL_FUNCTION_CODE (fndecl); | |
15976 | tree arg0; | |
15977 | machine_mode tmode, mode0; | |
15978 | rtx pat, op0; | |
15979 | enum insn_code icode; | |
15980 | ||
15981 | switch (fcode) | |
15982 | { | |
15983 | case ALTIVEC_BUILTIN_LD_INTERNAL_16qi: | |
15984 | icode = CODE_FOR_vector_altivec_load_v16qi; | |
15985 | break; | |
15986 | case ALTIVEC_BUILTIN_LD_INTERNAL_8hi: | |
15987 | icode = CODE_FOR_vector_altivec_load_v8hi; | |
15988 | break; | |
15989 | case ALTIVEC_BUILTIN_LD_INTERNAL_4si: | |
15990 | icode = CODE_FOR_vector_altivec_load_v4si; | |
15991 | break; | |
15992 | case ALTIVEC_BUILTIN_LD_INTERNAL_4sf: | |
15993 | icode = CODE_FOR_vector_altivec_load_v4sf; | |
15994 | break; | |
15995 | case ALTIVEC_BUILTIN_LD_INTERNAL_2df: | |
15996 | icode = CODE_FOR_vector_altivec_load_v2df; | |
15997 | break; | |
15998 | case ALTIVEC_BUILTIN_LD_INTERNAL_2di: | |
15999 | icode = CODE_FOR_vector_altivec_load_v2di; | |
16000 | break; | |
16001 | case ALTIVEC_BUILTIN_LD_INTERNAL_1ti: | |
16002 | icode = CODE_FOR_vector_altivec_load_v1ti; | |
16003 | break; | |
16004 | default: | |
16005 | *expandedp = false; | |
16006 | return NULL_RTX; | |
16007 | } | |
16008 | ||
16009 | *expandedp = true; | |
16010 | ||
16011 | arg0 = CALL_EXPR_ARG (exp, 0); | |
16012 | op0 = expand_normal (arg0); | |
16013 | tmode = insn_data[icode].operand[0].mode; | |
16014 | mode0 = insn_data[icode].operand[1].mode; | |
16015 | ||
16016 | if (target == 0 | |
16017 | || GET_MODE (target) != tmode | |
16018 | || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) | |
16019 | target = gen_reg_rtx (tmode); | |
16020 | ||
16021 | if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) | |
16022 | op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); | |
16023 | ||
16024 | pat = GEN_FCN (icode) (target, op0); | |
16025 | if (! pat) | |
16026 | return 0; | |
16027 | emit_insn (pat); | |
16028 | return target; | |
16029 | } | |
16030 | ||
16031 | /* Expand the stvx builtins. */ | |
16032 | static rtx | |
16033 | altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED, | |
16034 | bool *expandedp) | |
16035 | { | |
16036 | tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); | |
16037 | unsigned int fcode = DECL_FUNCTION_CODE (fndecl); | |
16038 | tree arg0, arg1; | |
16039 | machine_mode mode0, mode1; | |
16040 | rtx pat, op0, op1; | |
16041 | enum insn_code icode; | |
16042 | ||
16043 | switch (fcode) | |
16044 | { | |
16045 | case ALTIVEC_BUILTIN_ST_INTERNAL_16qi: | |
16046 | icode = CODE_FOR_vector_altivec_store_v16qi; | |
16047 | break; | |
16048 | case ALTIVEC_BUILTIN_ST_INTERNAL_8hi: | |
16049 | icode = CODE_FOR_vector_altivec_store_v8hi; | |
16050 | break; | |
16051 | case ALTIVEC_BUILTIN_ST_INTERNAL_4si: | |
16052 | icode = CODE_FOR_vector_altivec_store_v4si; | |
16053 | break; | |
16054 | case ALTIVEC_BUILTIN_ST_INTERNAL_4sf: | |
16055 | icode = CODE_FOR_vector_altivec_store_v4sf; | |
16056 | break; | |
16057 | case ALTIVEC_BUILTIN_ST_INTERNAL_2df: | |
16058 | icode = CODE_FOR_vector_altivec_store_v2df; | |
16059 | break; | |
16060 | case ALTIVEC_BUILTIN_ST_INTERNAL_2di: | |
16061 | icode = CODE_FOR_vector_altivec_store_v2di; | |
16062 | break; | |
16063 | case ALTIVEC_BUILTIN_ST_INTERNAL_1ti: | |
16064 | icode = CODE_FOR_vector_altivec_store_v1ti; | |
16065 | break; | |
16066 | default: | |
16067 | *expandedp = false; | |
16068 | return NULL_RTX; | |
16069 | } | |
16070 | ||
16071 | arg0 = CALL_EXPR_ARG (exp, 0); | |
16072 | arg1 = CALL_EXPR_ARG (exp, 1); | |
16073 | op0 = expand_normal (arg0); | |
16074 | op1 = expand_normal (arg1); | |
16075 | mode0 = insn_data[icode].operand[0].mode; | |
16076 | mode1 = insn_data[icode].operand[1].mode; | |
16077 | ||
16078 | if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) | |
16079 | op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); | |
16080 | if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) | |
16081 | op1 = copy_to_mode_reg (mode1, op1); | |
16082 | ||
16083 | pat = GEN_FCN (icode) (op0, op1); | |
16084 | if (pat) | |
16085 | emit_insn (pat); | |
16086 | ||
16087 | *expandedp = true; | |
16088 | return NULL_RTX; | |
16089 | } | |
16090 | ||
16091 | /* Expand the dst builtins. */ | |
16092 | static rtx | |
16093 | altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED, | |
16094 | bool *expandedp) | |
16095 | { | |
16096 | tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); | |
16097 | enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl); | |
16098 | tree arg0, arg1, arg2; | |
16099 | machine_mode mode0, mode1; | |
16100 | rtx pat, op0, op1, op2; | |
16101 | const struct builtin_description *d; | |
16102 | size_t i; | |
16103 | ||
16104 | *expandedp = false; | |
16105 | ||
16106 | /* Handle DST variants. */ | |
16107 | d = bdesc_dst; | |
16108 | for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++) | |
16109 | if (d->code == fcode) | |
16110 | { | |
16111 | arg0 = CALL_EXPR_ARG (exp, 0); | |
16112 | arg1 = CALL_EXPR_ARG (exp, 1); | |
16113 | arg2 = CALL_EXPR_ARG (exp, 2); | |
16114 | op0 = expand_normal (arg0); | |
16115 | op1 = expand_normal (arg1); | |
16116 | op2 = expand_normal (arg2); | |
16117 | mode0 = insn_data[d->icode].operand[0].mode; | |
16118 | mode1 = insn_data[d->icode].operand[1].mode; | |
16119 | ||
16120 | /* Invalid arguments, bail out before generating bad rtl. */ | |
16121 | if (arg0 == error_mark_node | |
16122 | || arg1 == error_mark_node | |
16123 | || arg2 == error_mark_node) | |
16124 | return const0_rtx; | |
16125 | ||
16126 | *expandedp = true; | |
16127 | STRIP_NOPS (arg2); | |
16128 | if (TREE_CODE (arg2) != INTEGER_CST | |
16129 | || TREE_INT_CST_LOW (arg2) & ~0x3) | |
16130 | { | |
16131 | error ("argument to %qs must be a 2-bit unsigned literal", d->name); | |
16132 | return const0_rtx; | |
16133 | } | |
16134 | ||
16135 | if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0)) | |
16136 | op0 = copy_to_mode_reg (Pmode, op0); | |
16137 | if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1)) | |
16138 | op1 = copy_to_mode_reg (mode1, op1); | |
16139 | ||
16140 | pat = GEN_FCN (d->icode) (op0, op1, op2); | |
16141 | if (pat != 0) | |
16142 | emit_insn (pat); | |
16143 | ||
16144 | return NULL_RTX; | |
16145 | } | |
16146 | ||
16147 | return NULL_RTX; | |
16148 | } | |
16149 | ||
16150 | /* Expand vec_init builtin. */ | |
16151 | static rtx | |
16152 | altivec_expand_vec_init_builtin (tree type, tree exp, rtx target) | |
16153 | { | |
16154 | machine_mode tmode = TYPE_MODE (type); | |
16155 | machine_mode inner_mode = GET_MODE_INNER (tmode); | |
16156 | int i, n_elt = GET_MODE_NUNITS (tmode); | |
16157 | ||
16158 | gcc_assert (VECTOR_MODE_P (tmode)); | |
16159 | gcc_assert (n_elt == call_expr_nargs (exp)); | |
16160 | ||
16161 | if (!target || !register_operand (target, tmode)) | |
16162 | target = gen_reg_rtx (tmode); | |
16163 | ||
16164 | /* If we have a vector compromised of a single element, such as V1TImode, do | |
16165 | the initialization directly. */ | |
16166 | if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode)) | |
16167 | { | |
16168 | rtx x = expand_normal (CALL_EXPR_ARG (exp, 0)); | |
16169 | emit_move_insn (target, gen_lowpart (tmode, x)); | |
16170 | } | |
16171 | else | |
16172 | { | |
16173 | rtvec v = rtvec_alloc (n_elt); | |
16174 | ||
16175 | for (i = 0; i < n_elt; ++i) | |
16176 | { | |
16177 | rtx x = expand_normal (CALL_EXPR_ARG (exp, i)); | |
16178 | RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x); | |
16179 | } | |
16180 | ||
16181 | rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v)); | |
16182 | } | |
16183 | ||
16184 | return target; | |
16185 | } | |
16186 | ||
16187 | /* Return the integer constant in ARG. Constrain it to be in the range | |
16188 | of the subparts of VEC_TYPE; issue an error if not. */ | |
16189 | ||
16190 | static int | |
16191 | get_element_number (tree vec_type, tree arg) | |
16192 | { | |
16193 | unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1; | |
16194 | ||
16195 | if (!tree_fits_uhwi_p (arg) | |
16196 | || (elt = tree_to_uhwi (arg), elt > max)) | |
16197 | { | |
16198 | error ("selector must be an integer constant in the range 0..%wi", max); | |
16199 | return 0; | |
16200 | } | |
16201 | ||
16202 | return elt; | |
16203 | } | |
16204 | ||
16205 | /* Expand vec_set builtin. */ | |
16206 | static rtx | |
16207 | altivec_expand_vec_set_builtin (tree exp) | |
16208 | { | |
16209 | machine_mode tmode, mode1; | |
16210 | tree arg0, arg1, arg2; | |
16211 | int elt; | |
16212 | rtx op0, op1; | |
16213 | ||
16214 | arg0 = CALL_EXPR_ARG (exp, 0); | |
16215 | arg1 = CALL_EXPR_ARG (exp, 1); | |
16216 | arg2 = CALL_EXPR_ARG (exp, 2); | |
16217 | ||
16218 | tmode = TYPE_MODE (TREE_TYPE (arg0)); | |
16219 | mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); | |
16220 | gcc_assert (VECTOR_MODE_P (tmode)); | |
16221 | ||
16222 | op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL); | |
16223 | op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL); | |
16224 | elt = get_element_number (TREE_TYPE (arg0), arg2); | |
16225 | ||
16226 | if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode) | |
16227 | op1 = convert_modes (mode1, GET_MODE (op1), op1, true); | |
16228 | ||
16229 | op0 = force_reg (tmode, op0); | |
16230 | op1 = force_reg (mode1, op1); | |
16231 | ||
16232 | rs6000_expand_vector_set (op0, op1, elt); | |
16233 | ||
16234 | return op0; | |
16235 | } | |
16236 | ||
16237 | /* Expand vec_ext builtin. */ | |
16238 | static rtx | |
16239 | altivec_expand_vec_ext_builtin (tree exp, rtx target) | |
16240 | { | |
16241 | machine_mode tmode, mode0; | |
16242 | tree arg0, arg1; | |
16243 | rtx op0; | |
16244 | rtx op1; | |
16245 | ||
16246 | arg0 = CALL_EXPR_ARG (exp, 0); | |
16247 | arg1 = CALL_EXPR_ARG (exp, 1); | |
16248 | ||
16249 | op0 = expand_normal (arg0); | |
16250 | op1 = expand_normal (arg1); | |
16251 | ||
16252 | /* Call get_element_number to validate arg1 if it is a constant. */ | |
16253 | if (TREE_CODE (arg1) == INTEGER_CST) | |
16254 | (void) get_element_number (TREE_TYPE (arg0), arg1); | |
16255 | ||
16256 | tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); | |
16257 | mode0 = TYPE_MODE (TREE_TYPE (arg0)); | |
16258 | gcc_assert (VECTOR_MODE_P (mode0)); | |
16259 | ||
16260 | op0 = force_reg (mode0, op0); | |
16261 | ||
16262 | if (optimize || !target || !register_operand (target, tmode)) | |
16263 | target = gen_reg_rtx (tmode); | |
16264 | ||
16265 | rs6000_expand_vector_extract (target, op0, op1); | |
16266 | ||
16267 | return target; | |
16268 | } | |
16269 | ||
16270 | /* Expand the builtin in EXP and store the result in TARGET. Store | |
16271 | true in *EXPANDEDP if we found a builtin to expand. */ | |
16272 | static rtx | |
16273 | altivec_expand_builtin (tree exp, rtx target, bool *expandedp) | |
16274 | { | |
16275 | const struct builtin_description *d; | |
16276 | size_t i; | |
16277 | enum insn_code icode; | |
16278 | tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); | |
16279 | tree arg0, arg1, arg2; | |
16280 | rtx op0, pat; | |
16281 | machine_mode tmode, mode0; | |
16282 | enum rs6000_builtins fcode | |
16283 | = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl); | |
16284 | ||
16285 | if (rs6000_overloaded_builtin_p (fcode)) | |
16286 | { | |
16287 | *expandedp = true; | |
16288 | error ("unresolved overload for Altivec builtin %qF", fndecl); | |
16289 | ||
16290 | /* Given it is invalid, just generate a normal call. */ | |
16291 | return expand_call (exp, target, false); | |
16292 | } | |
16293 | ||
16294 | target = altivec_expand_ld_builtin (exp, target, expandedp); | |
16295 | if (*expandedp) | |
16296 | return target; | |
16297 | ||
16298 | target = altivec_expand_st_builtin (exp, target, expandedp); | |
16299 | if (*expandedp) | |
16300 | return target; | |
16301 | ||
16302 | target = altivec_expand_dst_builtin (exp, target, expandedp); | |
16303 | if (*expandedp) | |
16304 | return target; | |
16305 | ||
16306 | *expandedp = true; | |
16307 | ||
16308 | switch (fcode) | |
16309 | { | |
16310 | case ALTIVEC_BUILTIN_STVX_V2DF: | |
16311 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df_2op, exp); | |
16312 | case ALTIVEC_BUILTIN_STVX_V2DI: | |
16313 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di_2op, exp); | |
16314 | case ALTIVEC_BUILTIN_STVX_V4SF: | |
16315 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf_2op, exp); | |
16316 | case ALTIVEC_BUILTIN_STVX: | |
16317 | case ALTIVEC_BUILTIN_STVX_V4SI: | |
16318 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si_2op, exp); | |
16319 | case ALTIVEC_BUILTIN_STVX_V8HI: | |
16320 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi_2op, exp); | |
16321 | case ALTIVEC_BUILTIN_STVX_V16QI: | |
16322 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi_2op, exp); | |
16323 | case ALTIVEC_BUILTIN_STVEBX: | |
16324 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp); | |
16325 | case ALTIVEC_BUILTIN_STVEHX: | |
16326 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp); | |
16327 | case ALTIVEC_BUILTIN_STVEWX: | |
16328 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp); | |
16329 | case ALTIVEC_BUILTIN_STVXL_V2DF: | |
16330 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp); | |
16331 | case ALTIVEC_BUILTIN_STVXL_V2DI: | |
16332 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp); | |
16333 | case ALTIVEC_BUILTIN_STVXL_V4SF: | |
16334 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp); | |
16335 | case ALTIVEC_BUILTIN_STVXL: | |
16336 | case ALTIVEC_BUILTIN_STVXL_V4SI: | |
16337 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp); | |
16338 | case ALTIVEC_BUILTIN_STVXL_V8HI: | |
16339 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp); | |
16340 | case ALTIVEC_BUILTIN_STVXL_V16QI: | |
16341 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp); | |
16342 | ||
16343 | case ALTIVEC_BUILTIN_STVLX: | |
16344 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp); | |
16345 | case ALTIVEC_BUILTIN_STVLXL: | |
16346 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp); | |
16347 | case ALTIVEC_BUILTIN_STVRX: | |
16348 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp); | |
16349 | case ALTIVEC_BUILTIN_STVRXL: | |
16350 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp); | |
16351 | ||
16352 | case P9V_BUILTIN_STXVL: | |
16353 | return altivec_expand_stxvl_builtin (CODE_FOR_stxvl, exp); | |
16354 | ||
16355 | case VSX_BUILTIN_STXVD2X_V1TI: | |
16356 | return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp); | |
16357 | case VSX_BUILTIN_STXVD2X_V2DF: | |
16358 | return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp); | |
16359 | case VSX_BUILTIN_STXVD2X_V2DI: | |
16360 | return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp); | |
16361 | case VSX_BUILTIN_STXVW4X_V4SF: | |
16362 | return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp); | |
16363 | case VSX_BUILTIN_STXVW4X_V4SI: | |
16364 | return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp); | |
16365 | case VSX_BUILTIN_STXVW4X_V8HI: | |
16366 | return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp); | |
16367 | case VSX_BUILTIN_STXVW4X_V16QI: | |
16368 | return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp); | |
16369 | ||
16370 | /* For the following on big endian, it's ok to use any appropriate | |
16371 | unaligned-supporting store, so use a generic expander. For | |
16372 | little-endian, the exact element-reversing instruction must | |
16373 | be used. */ | |
16374 | case VSX_BUILTIN_ST_ELEMREV_V2DF: | |
16375 | { | |
16376 | enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df | |
16377 | : CODE_FOR_vsx_st_elemrev_v2df); | |
16378 | return altivec_expand_stv_builtin (code, exp); | |
16379 | } | |
16380 | case VSX_BUILTIN_ST_ELEMREV_V2DI: | |
16381 | { | |
16382 | enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di | |
16383 | : CODE_FOR_vsx_st_elemrev_v2di); | |
16384 | return altivec_expand_stv_builtin (code, exp); | |
16385 | } | |
16386 | case VSX_BUILTIN_ST_ELEMREV_V4SF: | |
16387 | { | |
16388 | enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf | |
16389 | : CODE_FOR_vsx_st_elemrev_v4sf); | |
16390 | return altivec_expand_stv_builtin (code, exp); | |
16391 | } | |
16392 | case VSX_BUILTIN_ST_ELEMREV_V4SI: | |
16393 | { | |
16394 | enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si | |
16395 | : CODE_FOR_vsx_st_elemrev_v4si); | |
16396 | return altivec_expand_stv_builtin (code, exp); | |
16397 | } | |
16398 | case VSX_BUILTIN_ST_ELEMREV_V8HI: | |
16399 | { | |
16400 | enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi | |
16401 | : CODE_FOR_vsx_st_elemrev_v8hi); | |
16402 | return altivec_expand_stv_builtin (code, exp); | |
16403 | } | |
16404 | case VSX_BUILTIN_ST_ELEMREV_V16QI: | |
16405 | { | |
16406 | enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi | |
16407 | : CODE_FOR_vsx_st_elemrev_v16qi); | |
16408 | return altivec_expand_stv_builtin (code, exp); | |
16409 | } | |
16410 | ||
16411 | case ALTIVEC_BUILTIN_MFVSCR: | |
16412 | icode = CODE_FOR_altivec_mfvscr; | |
16413 | tmode = insn_data[icode].operand[0].mode; | |
16414 | ||
16415 | if (target == 0 | |
16416 | || GET_MODE (target) != tmode | |
16417 | || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) | |
16418 | target = gen_reg_rtx (tmode); | |
16419 | ||
16420 | pat = GEN_FCN (icode) (target); | |
16421 | if (! pat) | |
16422 | return 0; | |
16423 | emit_insn (pat); | |
16424 | return target; | |
16425 | ||
16426 | case ALTIVEC_BUILTIN_MTVSCR: | |
16427 | icode = CODE_FOR_altivec_mtvscr; | |
16428 | arg0 = CALL_EXPR_ARG (exp, 0); | |
16429 | op0 = expand_normal (arg0); | |
16430 | mode0 = insn_data[icode].operand[0].mode; | |
16431 | ||
16432 | /* If we got invalid arguments bail out before generating bad rtl. */ | |
16433 | if (arg0 == error_mark_node) | |
16434 | return const0_rtx; | |
16435 | ||
16436 | if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) | |
16437 | op0 = copy_to_mode_reg (mode0, op0); | |
16438 | ||
16439 | pat = GEN_FCN (icode) (op0); | |
16440 | if (pat) | |
16441 | emit_insn (pat); | |
16442 | return NULL_RTX; | |
16443 | ||
16444 | case ALTIVEC_BUILTIN_DSSALL: | |
16445 | emit_insn (gen_altivec_dssall ()); | |
16446 | return NULL_RTX; | |
16447 | ||
16448 | case ALTIVEC_BUILTIN_DSS: | |
16449 | icode = CODE_FOR_altivec_dss; | |
16450 | arg0 = CALL_EXPR_ARG (exp, 0); | |
16451 | STRIP_NOPS (arg0); | |
16452 | op0 = expand_normal (arg0); | |
16453 | mode0 = insn_data[icode].operand[0].mode; | |
16454 | ||
16455 | /* If we got invalid arguments bail out before generating bad rtl. */ | |
16456 | if (arg0 == error_mark_node) | |
16457 | return const0_rtx; | |
16458 | ||
16459 | if (TREE_CODE (arg0) != INTEGER_CST | |
16460 | || TREE_INT_CST_LOW (arg0) & ~0x3) | |
16461 | { | |
16462 | error ("argument to dss must be a 2-bit unsigned literal"); | |
16463 | return const0_rtx; | |
16464 | } | |
16465 | ||
16466 | if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) | |
16467 | op0 = copy_to_mode_reg (mode0, op0); | |
16468 | ||
16469 | emit_insn (gen_altivec_dss (op0)); | |
16470 | return NULL_RTX; | |
16471 | ||
16472 | case ALTIVEC_BUILTIN_VEC_INIT_V4SI: | |
16473 | case ALTIVEC_BUILTIN_VEC_INIT_V8HI: | |
16474 | case ALTIVEC_BUILTIN_VEC_INIT_V16QI: | |
16475 | case ALTIVEC_BUILTIN_VEC_INIT_V4SF: | |
16476 | case VSX_BUILTIN_VEC_INIT_V2DF: | |
16477 | case VSX_BUILTIN_VEC_INIT_V2DI: | |
16478 | case VSX_BUILTIN_VEC_INIT_V1TI: | |
16479 | return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target); | |
16480 | ||
16481 | case ALTIVEC_BUILTIN_VEC_SET_V4SI: | |
16482 | case ALTIVEC_BUILTIN_VEC_SET_V8HI: | |
16483 | case ALTIVEC_BUILTIN_VEC_SET_V16QI: | |
16484 | case ALTIVEC_BUILTIN_VEC_SET_V4SF: | |
16485 | case VSX_BUILTIN_VEC_SET_V2DF: | |
16486 | case VSX_BUILTIN_VEC_SET_V2DI: | |
16487 | case VSX_BUILTIN_VEC_SET_V1TI: | |
16488 | return altivec_expand_vec_set_builtin (exp); | |
16489 | ||
16490 | case ALTIVEC_BUILTIN_VEC_EXT_V4SI: | |
16491 | case ALTIVEC_BUILTIN_VEC_EXT_V8HI: | |
16492 | case ALTIVEC_BUILTIN_VEC_EXT_V16QI: | |
16493 | case ALTIVEC_BUILTIN_VEC_EXT_V4SF: | |
16494 | case VSX_BUILTIN_VEC_EXT_V2DF: | |
16495 | case VSX_BUILTIN_VEC_EXT_V2DI: | |
16496 | case VSX_BUILTIN_VEC_EXT_V1TI: | |
16497 | return altivec_expand_vec_ext_builtin (exp, target); | |
16498 | ||
16499 | case P9V_BUILTIN_VEXTRACT4B: | |
16500 | case P9V_BUILTIN_VEC_VEXTRACT4B: | |
16501 | arg1 = CALL_EXPR_ARG (exp, 1); | |
16502 | STRIP_NOPS (arg1); | |
16503 | ||
16504 | /* Generate a normal call if it is invalid. */ | |
16505 | if (arg1 == error_mark_node) | |
16506 | return expand_call (exp, target, false); | |
16507 | ||
16508 | if (TREE_CODE (arg1) != INTEGER_CST || TREE_INT_CST_LOW (arg1) > 12) | |
16509 | { | |
16510 | error ("second argument to vec_vextract4b must be 0..12"); | |
16511 | return expand_call (exp, target, false); | |
16512 | } | |
16513 | break; | |
16514 | ||
16515 | case P9V_BUILTIN_VINSERT4B: | |
16516 | case P9V_BUILTIN_VINSERT4B_DI: | |
16517 | case P9V_BUILTIN_VEC_VINSERT4B: | |
16518 | arg2 = CALL_EXPR_ARG (exp, 2); | |
16519 | STRIP_NOPS (arg2); | |
16520 | ||
16521 | /* Generate a normal call if it is invalid. */ | |
16522 | if (arg2 == error_mark_node) | |
16523 | return expand_call (exp, target, false); | |
16524 | ||
16525 | if (TREE_CODE (arg2) != INTEGER_CST || TREE_INT_CST_LOW (arg2) > 12) | |
16526 | { | |
16527 | error ("third argument to vec_vinsert4b must be 0..12"); | |
16528 | return expand_call (exp, target, false); | |
16529 | } | |
16530 | break; | |
16531 | ||
16532 | default: | |
16533 | break; | |
16534 | /* Fall through. */ | |
16535 | } | |
16536 | ||
16537 | /* Expand abs* operations. */ | |
16538 | d = bdesc_abs; | |
16539 | for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++) | |
16540 | if (d->code == fcode) | |
16541 | return altivec_expand_abs_builtin (d->icode, exp, target); | |
16542 | ||
16543 | /* Expand the AltiVec predicates. */ | |
16544 | d = bdesc_altivec_preds; | |
16545 | for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++) | |
16546 | if (d->code == fcode) | |
16547 | return altivec_expand_predicate_builtin (d->icode, exp, target); | |
16548 | ||
16549 | /* LV* are funky. We initialized them differently. */ | |
16550 | switch (fcode) | |
16551 | { | |
16552 | case ALTIVEC_BUILTIN_LVSL: | |
16553 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl, | |
16554 | exp, target, false); | |
16555 | case ALTIVEC_BUILTIN_LVSR: | |
16556 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr, | |
16557 | exp, target, false); | |
16558 | case ALTIVEC_BUILTIN_LVEBX: | |
16559 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx, | |
16560 | exp, target, false); | |
16561 | case ALTIVEC_BUILTIN_LVEHX: | |
16562 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx, | |
16563 | exp, target, false); | |
16564 | case ALTIVEC_BUILTIN_LVEWX: | |
16565 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx, | |
16566 | exp, target, false); | |
16567 | case ALTIVEC_BUILTIN_LVXL_V2DF: | |
16568 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df, | |
16569 | exp, target, false); | |
16570 | case ALTIVEC_BUILTIN_LVXL_V2DI: | |
16571 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di, | |
16572 | exp, target, false); | |
16573 | case ALTIVEC_BUILTIN_LVXL_V4SF: | |
16574 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf, | |
16575 | exp, target, false); | |
16576 | case ALTIVEC_BUILTIN_LVXL: | |
16577 | case ALTIVEC_BUILTIN_LVXL_V4SI: | |
16578 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si, | |
16579 | exp, target, false); | |
16580 | case ALTIVEC_BUILTIN_LVXL_V8HI: | |
16581 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi, | |
16582 | exp, target, false); | |
16583 | case ALTIVEC_BUILTIN_LVXL_V16QI: | |
16584 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi, | |
16585 | exp, target, false); | |
16586 | case ALTIVEC_BUILTIN_LVX_V2DF: | |
16587 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df_2op, | |
16588 | exp, target, false); | |
16589 | case ALTIVEC_BUILTIN_LVX_V2DI: | |
16590 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di_2op, | |
16591 | exp, target, false); | |
16592 | case ALTIVEC_BUILTIN_LVX_V4SF: | |
16593 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf_2op, | |
16594 | exp, target, false); | |
16595 | case ALTIVEC_BUILTIN_LVX: | |
16596 | case ALTIVEC_BUILTIN_LVX_V4SI: | |
16597 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si_2op, | |
16598 | exp, target, false); | |
16599 | case ALTIVEC_BUILTIN_LVX_V8HI: | |
16600 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi_2op, | |
16601 | exp, target, false); | |
16602 | case ALTIVEC_BUILTIN_LVX_V16QI: | |
16603 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi_2op, | |
16604 | exp, target, false); | |
16605 | case ALTIVEC_BUILTIN_LVLX: | |
16606 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx, | |
16607 | exp, target, true); | |
16608 | case ALTIVEC_BUILTIN_LVLXL: | |
16609 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl, | |
16610 | exp, target, true); | |
16611 | case ALTIVEC_BUILTIN_LVRX: | |
16612 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx, | |
16613 | exp, target, true); | |
16614 | case ALTIVEC_BUILTIN_LVRXL: | |
16615 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl, | |
16616 | exp, target, true); | |
16617 | case VSX_BUILTIN_LXVD2X_V1TI: | |
16618 | return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti, | |
16619 | exp, target, false); | |
16620 | case VSX_BUILTIN_LXVD2X_V2DF: | |
16621 | return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df, | |
16622 | exp, target, false); | |
16623 | case VSX_BUILTIN_LXVD2X_V2DI: | |
16624 | return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di, | |
16625 | exp, target, false); | |
16626 | case VSX_BUILTIN_LXVW4X_V4SF: | |
16627 | return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf, | |
16628 | exp, target, false); | |
16629 | case VSX_BUILTIN_LXVW4X_V4SI: | |
16630 | return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si, | |
16631 | exp, target, false); | |
16632 | case VSX_BUILTIN_LXVW4X_V8HI: | |
16633 | return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi, | |
16634 | exp, target, false); | |
16635 | case VSX_BUILTIN_LXVW4X_V16QI: | |
16636 | return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi, | |
16637 | exp, target, false); | |
16638 | /* For the following on big endian, it's ok to use any appropriate | |
16639 | unaligned-supporting load, so use a generic expander. For | |
16640 | little-endian, the exact element-reversing instruction must | |
16641 | be used. */ | |
16642 | case VSX_BUILTIN_LD_ELEMREV_V2DF: | |
16643 | { | |
16644 | enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df | |
16645 | : CODE_FOR_vsx_ld_elemrev_v2df); | |
16646 | return altivec_expand_lv_builtin (code, exp, target, false); | |
16647 | } | |
16648 | case VSX_BUILTIN_LD_ELEMREV_V2DI: | |
16649 | { | |
16650 | enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di | |
16651 | : CODE_FOR_vsx_ld_elemrev_v2di); | |
16652 | return altivec_expand_lv_builtin (code, exp, target, false); | |
16653 | } | |
16654 | case VSX_BUILTIN_LD_ELEMREV_V4SF: | |
16655 | { | |
16656 | enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf | |
16657 | : CODE_FOR_vsx_ld_elemrev_v4sf); | |
16658 | return altivec_expand_lv_builtin (code, exp, target, false); | |
16659 | } | |
16660 | case VSX_BUILTIN_LD_ELEMREV_V4SI: | |
16661 | { | |
16662 | enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si | |
16663 | : CODE_FOR_vsx_ld_elemrev_v4si); | |
16664 | return altivec_expand_lv_builtin (code, exp, target, false); | |
16665 | } | |
16666 | case VSX_BUILTIN_LD_ELEMREV_V8HI: | |
16667 | { | |
16668 | enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi | |
16669 | : CODE_FOR_vsx_ld_elemrev_v8hi); | |
16670 | return altivec_expand_lv_builtin (code, exp, target, false); | |
16671 | } | |
16672 | case VSX_BUILTIN_LD_ELEMREV_V16QI: | |
16673 | { | |
16674 | enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi | |
16675 | : CODE_FOR_vsx_ld_elemrev_v16qi); | |
16676 | return altivec_expand_lv_builtin (code, exp, target, false); | |
16677 | } | |
16678 | break; | |
16679 | default: | |
16680 | break; | |
16681 | /* Fall through. */ | |
16682 | } | |
16683 | ||
16684 | *expandedp = false; | |
16685 | return NULL_RTX; | |
16686 | } | |
16687 | ||
16688 | /* Expand the builtin in EXP and store the result in TARGET. Store | |
16689 | true in *EXPANDEDP if we found a builtin to expand. */ | |
16690 | static rtx | |
16691 | paired_expand_builtin (tree exp, rtx target, bool * expandedp) | |
16692 | { | |
16693 | tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); | |
16694 | enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl); | |
16695 | const struct builtin_description *d; | |
16696 | size_t i; | |
16697 | ||
16698 | *expandedp = true; | |
16699 | ||
16700 | switch (fcode) | |
16701 | { | |
16702 | case PAIRED_BUILTIN_STX: | |
16703 | return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp); | |
16704 | case PAIRED_BUILTIN_LX: | |
16705 | return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target); | |
16706 | default: | |
16707 | break; | |
16708 | /* Fall through. */ | |
16709 | } | |
16710 | ||
16711 | /* Expand the paired predicates. */ | |
16712 | d = bdesc_paired_preds; | |
16713 | for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++) | |
16714 | if (d->code == fcode) | |
16715 | return paired_expand_predicate_builtin (d->icode, exp, target); | |
16716 | ||
16717 | *expandedp = false; | |
16718 | return NULL_RTX; | |
16719 | } | |
16720 | ||
16721 | /* Binops that need to be initialized manually, but can be expanded | |
16722 | automagically by rs6000_expand_binop_builtin. */ | |
16723 | static const struct builtin_description bdesc_2arg_spe[] = | |
16724 | { | |
16725 | { RS6000_BTM_SPE, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX }, | |
16726 | { RS6000_BTM_SPE, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX }, | |
16727 | { RS6000_BTM_SPE, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX }, | |
16728 | { RS6000_BTM_SPE, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX }, | |
16729 | { RS6000_BTM_SPE, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX }, | |
16730 | { RS6000_BTM_SPE, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX }, | |
16731 | { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX }, | |
16732 | { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX }, | |
16733 | { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX }, | |
16734 | { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX }, | |
16735 | { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX }, | |
16736 | { RS6000_BTM_SPE, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD }, | |
16737 | { RS6000_BTM_SPE, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW }, | |
16738 | { RS6000_BTM_SPE, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH }, | |
16739 | { RS6000_BTM_SPE, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE }, | |
16740 | { RS6000_BTM_SPE, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU }, | |
16741 | { RS6000_BTM_SPE, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS }, | |
16742 | { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT }, | |
16743 | { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT }, | |
16744 | { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT }, | |
16745 | { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT }, | |
16746 | { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT } | |
16747 | }; | |
16748 | ||
16749 | /* Expand the builtin in EXP and store the result in TARGET. Store | |
16750 | true in *EXPANDEDP if we found a builtin to expand. | |
16751 | ||
16752 | This expands the SPE builtins that are not simple unary and binary | |
16753 | operations. */ | |
16754 | static rtx | |
16755 | spe_expand_builtin (tree exp, rtx target, bool *expandedp) | |
16756 | { | |
16757 | tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); | |
16758 | tree arg1, arg0; | |
16759 | enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl); | |
16760 | enum insn_code icode; | |
16761 | machine_mode tmode, mode0; | |
16762 | rtx pat, op0; | |
16763 | const struct builtin_description *d; | |
16764 | size_t i; | |
16765 | ||
16766 | *expandedp = true; | |
16767 | ||
16768 | /* Syntax check for a 5-bit unsigned immediate. */ | |
16769 | switch (fcode) | |
16770 | { | |
16771 | case SPE_BUILTIN_EVSTDD: | |
16772 | case SPE_BUILTIN_EVSTDH: | |
16773 | case SPE_BUILTIN_EVSTDW: | |
16774 | case SPE_BUILTIN_EVSTWHE: | |
16775 | case SPE_BUILTIN_EVSTWHO: | |
16776 | case SPE_BUILTIN_EVSTWWE: | |
16777 | case SPE_BUILTIN_EVSTWWO: | |
16778 | arg1 = CALL_EXPR_ARG (exp, 2); | |
16779 | if (TREE_CODE (arg1) != INTEGER_CST | |
16780 | || TREE_INT_CST_LOW (arg1) & ~0x1f) | |
16781 | { | |
16782 | error ("argument 2 must be a 5-bit unsigned literal"); | |
16783 | return const0_rtx; | |
16784 | } | |
16785 | break; | |
16786 | default: | |
16787 | break; | |
16788 | } | |
16789 | ||
16790 | /* The evsplat*i instructions are not quite generic. */ | |
16791 | switch (fcode) | |
16792 | { | |
16793 | case SPE_BUILTIN_EVSPLATFI: | |
16794 | return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi, | |
16795 | exp, target); | |
16796 | case SPE_BUILTIN_EVSPLATI: | |
16797 | return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati, | |
16798 | exp, target); | |
16799 | default: | |
16800 | break; | |
16801 | } | |
16802 | ||
16803 | d = bdesc_2arg_spe; | |
16804 | for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d) | |
16805 | if (d->code == fcode) | |
16806 | return rs6000_expand_binop_builtin (d->icode, exp, target); | |
16807 | ||
16808 | d = bdesc_spe_predicates; | |
16809 | for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d) | |
16810 | if (d->code == fcode) | |
16811 | return spe_expand_predicate_builtin (d->icode, exp, target); | |
16812 | ||
16813 | d = bdesc_spe_evsel; | |
16814 | for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d) | |
16815 | if (d->code == fcode) | |
16816 | return spe_expand_evsel_builtin (d->icode, exp, target); | |
16817 | ||
16818 | switch (fcode) | |
16819 | { | |
16820 | case SPE_BUILTIN_EVSTDDX: | |
16821 | return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp); | |
16822 | case SPE_BUILTIN_EVSTDHX: | |
16823 | return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp); | |
16824 | case SPE_BUILTIN_EVSTDWX: | |
16825 | return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp); | |
16826 | case SPE_BUILTIN_EVSTWHEX: | |
16827 | return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp); | |
16828 | case SPE_BUILTIN_EVSTWHOX: | |
16829 | return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp); | |
16830 | case SPE_BUILTIN_EVSTWWEX: | |
16831 | return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp); | |
16832 | case SPE_BUILTIN_EVSTWWOX: | |
16833 | return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp); | |
16834 | case SPE_BUILTIN_EVSTDD: | |
16835 | return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp); | |
16836 | case SPE_BUILTIN_EVSTDH: | |
16837 | return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp); | |
16838 | case SPE_BUILTIN_EVSTDW: | |
16839 | return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp); | |
16840 | case SPE_BUILTIN_EVSTWHE: | |
16841 | return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp); | |
16842 | case SPE_BUILTIN_EVSTWHO: | |
16843 | return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp); | |
16844 | case SPE_BUILTIN_EVSTWWE: | |
16845 | return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp); | |
16846 | case SPE_BUILTIN_EVSTWWO: | |
16847 | return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp); | |
16848 | case SPE_BUILTIN_MFSPEFSCR: | |
16849 | icode = CODE_FOR_spe_mfspefscr; | |
16850 | tmode = insn_data[icode].operand[0].mode; | |
16851 | ||
16852 | if (target == 0 | |
16853 | || GET_MODE (target) != tmode | |
16854 | || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) | |
16855 | target = gen_reg_rtx (tmode); | |
16856 | ||
16857 | pat = GEN_FCN (icode) (target); | |
16858 | if (! pat) | |
16859 | return 0; | |
16860 | emit_insn (pat); | |
16861 | return target; | |
16862 | case SPE_BUILTIN_MTSPEFSCR: | |
16863 | icode = CODE_FOR_spe_mtspefscr; | |
16864 | arg0 = CALL_EXPR_ARG (exp, 0); | |
16865 | op0 = expand_normal (arg0); | |
16866 | mode0 = insn_data[icode].operand[0].mode; | |
16867 | ||
16868 | if (arg0 == error_mark_node) | |
16869 | return const0_rtx; | |
16870 | ||
16871 | if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) | |
16872 | op0 = copy_to_mode_reg (mode0, op0); | |
16873 | ||
16874 | pat = GEN_FCN (icode) (op0); | |
16875 | if (pat) | |
16876 | emit_insn (pat); | |
16877 | return NULL_RTX; | |
16878 | default: | |
16879 | break; | |
16880 | } | |
16881 | ||
16882 | *expandedp = false; | |
16883 | return NULL_RTX; | |
16884 | } | |
16885 | ||
16886 | static rtx | |
16887 | paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target) | |
16888 | { | |
16889 | rtx pat, scratch, tmp; | |
16890 | tree form = CALL_EXPR_ARG (exp, 0); | |
16891 | tree arg0 = CALL_EXPR_ARG (exp, 1); | |
16892 | tree arg1 = CALL_EXPR_ARG (exp, 2); | |
16893 | rtx op0 = expand_normal (arg0); | |
16894 | rtx op1 = expand_normal (arg1); | |
16895 | machine_mode mode0 = insn_data[icode].operand[1].mode; | |
16896 | machine_mode mode1 = insn_data[icode].operand[2].mode; | |
16897 | int form_int; | |
16898 | enum rtx_code code; | |
16899 | ||
16900 | if (TREE_CODE (form) != INTEGER_CST) | |
16901 | { | |
16902 | error ("argument 1 of __builtin_paired_predicate must be a constant"); | |
16903 | return const0_rtx; | |
16904 | } | |
16905 | else | |
16906 | form_int = TREE_INT_CST_LOW (form); | |
16907 | ||
16908 | gcc_assert (mode0 == mode1); | |
16909 | ||
16910 | if (arg0 == error_mark_node || arg1 == error_mark_node) | |
16911 | return const0_rtx; | |
16912 | ||
16913 | if (target == 0 | |
16914 | || GET_MODE (target) != SImode | |
16915 | || !(*insn_data[icode].operand[0].predicate) (target, SImode)) | |
16916 | target = gen_reg_rtx (SImode); | |
16917 | if (!(*insn_data[icode].operand[1].predicate) (op0, mode0)) | |
16918 | op0 = copy_to_mode_reg (mode0, op0); | |
16919 | if (!(*insn_data[icode].operand[2].predicate) (op1, mode1)) | |
16920 | op1 = copy_to_mode_reg (mode1, op1); | |
16921 | ||
16922 | scratch = gen_reg_rtx (CCFPmode); | |
16923 | ||
16924 | pat = GEN_FCN (icode) (scratch, op0, op1); | |
16925 | if (!pat) | |
16926 | return const0_rtx; | |
16927 | ||
16928 | emit_insn (pat); | |
16929 | ||
16930 | switch (form_int) | |
16931 | { | |
16932 | /* LT bit. */ | |
16933 | case 0: | |
16934 | code = LT; | |
16935 | break; | |
16936 | /* GT bit. */ | |
16937 | case 1: | |
16938 | code = GT; | |
16939 | break; | |
16940 | /* EQ bit. */ | |
16941 | case 2: | |
16942 | code = EQ; | |
16943 | break; | |
16944 | /* UN bit. */ | |
16945 | case 3: | |
16946 | emit_insn (gen_move_from_CR_ov_bit (target, scratch)); | |
16947 | return target; | |
16948 | default: | |
16949 | error ("argument 1 of __builtin_paired_predicate is out of range"); | |
16950 | return const0_rtx; | |
16951 | } | |
16952 | ||
16953 | tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx); | |
16954 | emit_move_insn (target, tmp); | |
16955 | return target; | |
16956 | } | |
16957 | ||
16958 | static rtx | |
16959 | spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target) | |
16960 | { | |
16961 | rtx pat, scratch, tmp; | |
16962 | tree form = CALL_EXPR_ARG (exp, 0); | |
16963 | tree arg0 = CALL_EXPR_ARG (exp, 1); | |
16964 | tree arg1 = CALL_EXPR_ARG (exp, 2); | |
16965 | rtx op0 = expand_normal (arg0); | |
16966 | rtx op1 = expand_normal (arg1); | |
16967 | machine_mode mode0 = insn_data[icode].operand[1].mode; | |
16968 | machine_mode mode1 = insn_data[icode].operand[2].mode; | |
16969 | int form_int; | |
16970 | enum rtx_code code; | |
16971 | ||
16972 | if (TREE_CODE (form) != INTEGER_CST) | |
16973 | { | |
16974 | error ("argument 1 of __builtin_spe_predicate must be a constant"); | |
16975 | return const0_rtx; | |
16976 | } | |
16977 | else | |
16978 | form_int = TREE_INT_CST_LOW (form); | |
16979 | ||
16980 | gcc_assert (mode0 == mode1); | |
16981 | ||
16982 | if (arg0 == error_mark_node || arg1 == error_mark_node) | |
16983 | return const0_rtx; | |
16984 | ||
16985 | if (target == 0 | |
16986 | || GET_MODE (target) != SImode | |
16987 | || ! (*insn_data[icode].operand[0].predicate) (target, SImode)) | |
16988 | target = gen_reg_rtx (SImode); | |
16989 | ||
16990 | if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) | |
16991 | op0 = copy_to_mode_reg (mode0, op0); | |
16992 | if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) | |
16993 | op1 = copy_to_mode_reg (mode1, op1); | |
16994 | ||
16995 | scratch = gen_reg_rtx (CCmode); | |
16996 | ||
16997 | pat = GEN_FCN (icode) (scratch, op0, op1); | |
16998 | if (! pat) | |
16999 | return const0_rtx; | |
17000 | emit_insn (pat); | |
17001 | ||
17002 | /* There are 4 variants for each predicate: _any_, _all_, _upper_, | |
17003 | _lower_. We use one compare, but look in different bits of the | |
17004 | CR for each variant. | |
17005 | ||
17006 | There are 2 elements in each SPE simd type (upper/lower). The CR | |
17007 | bits are set as follows: | |
17008 | ||
17009 | BIT0 | BIT 1 | BIT 2 | BIT 3 | |
17010 | U | L | (U | L) | (U & L) | |
17011 | ||
17012 | So, for an "all" relationship, BIT 3 would be set. | |
17013 | For an "any" relationship, BIT 2 would be set. Etc. | |
17014 | ||
17015 | Following traditional nomenclature, these bits map to: | |
17016 | ||
17017 | BIT0 | BIT 1 | BIT 2 | BIT 3 | |
17018 | LT | GT | EQ | OV | |
17019 | ||
17020 | Later, we will generate rtl to look in the LT/EQ/EQ/OV bits. | |
17021 | */ | |
17022 | ||
17023 | switch (form_int) | |
17024 | { | |
17025 | /* All variant. OV bit. */ | |
17026 | case 0: | |
17027 | /* We need to get to the OV bit, which is the ORDERED bit. We | |
17028 | could generate (ordered:SI (reg:CC xx) (const_int 0)), but | |
17029 | that's ugly and will make validate_condition_mode die. | |
17030 | So let's just use another pattern. */ | |
17031 | emit_insn (gen_move_from_CR_ov_bit (target, scratch)); | |
17032 | return target; | |
17033 | /* Any variant. EQ bit. */ | |
17034 | case 1: | |
17035 | code = EQ; | |
17036 | break; | |
17037 | /* Upper variant. LT bit. */ | |
17038 | case 2: | |
17039 | code = LT; | |
17040 | break; | |
17041 | /* Lower variant. GT bit. */ | |
17042 | case 3: | |
17043 | code = GT; | |
17044 | break; | |
17045 | default: | |
17046 | error ("argument 1 of __builtin_spe_predicate is out of range"); | |
17047 | return const0_rtx; | |
17048 | } | |
17049 | ||
17050 | tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx); | |
17051 | emit_move_insn (target, tmp); | |
17052 | ||
17053 | return target; | |
17054 | } | |
17055 | ||
17056 | /* The evsel builtins look like this: | |
17057 | ||
17058 | e = __builtin_spe_evsel_OP (a, b, c, d); | |
17059 | ||
17060 | and work like this: | |
17061 | ||
17062 | e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper]; | |
17063 | e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower]; | |
17064 | */ | |
17065 | ||
17066 | static rtx | |
17067 | spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target) | |
17068 | { | |
17069 | rtx pat, scratch; | |
17070 | tree arg0 = CALL_EXPR_ARG (exp, 0); | |
17071 | tree arg1 = CALL_EXPR_ARG (exp, 1); | |
17072 | tree arg2 = CALL_EXPR_ARG (exp, 2); | |
17073 | tree arg3 = CALL_EXPR_ARG (exp, 3); | |
17074 | rtx op0 = expand_normal (arg0); | |
17075 | rtx op1 = expand_normal (arg1); | |
17076 | rtx op2 = expand_normal (arg2); | |
17077 | rtx op3 = expand_normal (arg3); | |
17078 | machine_mode mode0 = insn_data[icode].operand[1].mode; | |
17079 | machine_mode mode1 = insn_data[icode].operand[2].mode; | |
17080 | ||
17081 | gcc_assert (mode0 == mode1); | |
17082 | ||
17083 | if (arg0 == error_mark_node || arg1 == error_mark_node | |
17084 | || arg2 == error_mark_node || arg3 == error_mark_node) | |
17085 | return const0_rtx; | |
17086 | ||
17087 | if (target == 0 | |
17088 | || GET_MODE (target) != mode0 | |
17089 | || ! (*insn_data[icode].operand[0].predicate) (target, mode0)) | |
17090 | target = gen_reg_rtx (mode0); | |
17091 | ||
17092 | if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) | |
17093 | op0 = copy_to_mode_reg (mode0, op0); | |
17094 | if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) | |
17095 | op1 = copy_to_mode_reg (mode0, op1); | |
17096 | if (! (*insn_data[icode].operand[1].predicate) (op2, mode1)) | |
17097 | op2 = copy_to_mode_reg (mode0, op2); | |
17098 | if (! (*insn_data[icode].operand[1].predicate) (op3, mode1)) | |
17099 | op3 = copy_to_mode_reg (mode0, op3); | |
17100 | ||
17101 | /* Generate the compare. */ | |
17102 | scratch = gen_reg_rtx (CCmode); | |
17103 | pat = GEN_FCN (icode) (scratch, op0, op1); | |
17104 | if (! pat) | |
17105 | return const0_rtx; | |
17106 | emit_insn (pat); | |
17107 | ||
17108 | if (mode0 == V2SImode) | |
17109 | emit_insn (gen_spe_evsel (target, op2, op3, scratch)); | |
17110 | else | |
17111 | emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch)); | |
17112 | ||
17113 | return target; | |
17114 | } | |
17115 | ||
17116 | /* Raise an error message for a builtin function that is called without the | |
17117 | appropriate target options being set. */ | |
17118 | ||
17119 | static void | |
17120 | rs6000_invalid_builtin (enum rs6000_builtins fncode) | |
17121 | { | |
17122 | size_t uns_fncode = (size_t)fncode; | |
17123 | const char *name = rs6000_builtin_info[uns_fncode].name; | |
17124 | HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask; | |
17125 | ||
17126 | gcc_assert (name != NULL); | |
17127 | if ((fnmask & RS6000_BTM_CELL) != 0) | |
17128 | error ("Builtin function %s is only valid for the cell processor", name); | |
17129 | else if ((fnmask & RS6000_BTM_VSX) != 0) | |
17130 | error ("Builtin function %s requires the -mvsx option", name); | |
17131 | else if ((fnmask & RS6000_BTM_HTM) != 0) | |
17132 | error ("Builtin function %s requires the -mhtm option", name); | |
17133 | else if ((fnmask & RS6000_BTM_ALTIVEC) != 0) | |
17134 | error ("Builtin function %s requires the -maltivec option", name); | |
17135 | else if ((fnmask & RS6000_BTM_PAIRED) != 0) | |
17136 | error ("Builtin function %s requires the -mpaired option", name); | |
17137 | else if ((fnmask & RS6000_BTM_SPE) != 0) | |
17138 | error ("Builtin function %s requires the -mspe option", name); | |
17139 | else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR)) | |
17140 | == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR)) | |
17141 | error ("Builtin function %s requires the -mhard-dfp and" | |
17142 | " -mpower8-vector options", name); | |
17143 | else if ((fnmask & RS6000_BTM_DFP) != 0) | |
17144 | error ("Builtin function %s requires the -mhard-dfp option", name); | |
17145 | else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0) | |
17146 | error ("Builtin function %s requires the -mpower8-vector option", name); | |
17147 | else if ((fnmask & (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT)) | |
17148 | == (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT)) | |
17149 | error ("Builtin function %s requires the -mcpu=power9 and" | |
17150 | " -m64 options", name); | |
17151 | else if ((fnmask & RS6000_BTM_P9_VECTOR) != 0) | |
17152 | error ("Builtin function %s requires the -mcpu=power9 option", name); | |
17153 | else if ((fnmask & (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT)) | |
17154 | == (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT)) | |
17155 | error ("Builtin function %s requires the -mcpu=power9 and" | |
17156 | " -m64 options", name); | |
17157 | else if ((fnmask & RS6000_BTM_P9_MISC) == RS6000_BTM_P9_MISC) | |
17158 | error ("Builtin function %s requires the -mcpu=power9 option", name); | |
17159 | else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128)) | |
17160 | == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128)) | |
17161 | error ("Builtin function %s requires the -mhard-float and" | |
17162 | " -mlong-double-128 options", name); | |
17163 | else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0) | |
17164 | error ("Builtin function %s requires the -mhard-float option", name); | |
17165 | else if ((fnmask & RS6000_BTM_FLOAT128) != 0) | |
17166 | error ("Builtin function %s requires the -mfloat128 option", name); | |
17167 | else | |
17168 | error ("Builtin function %s is not supported with the current options", | |
17169 | name); | |
17170 | } | |
17171 | ||
17172 | /* Target hook for early folding of built-ins, shamelessly stolen | |
17173 | from ia64.c. */ | |
17174 | ||
17175 | static tree | |
17176 | rs6000_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, | |
17177 | tree *args, bool ignore ATTRIBUTE_UNUSED) | |
17178 | { | |
17179 | if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD) | |
17180 | { | |
17181 | enum rs6000_builtins fn_code | |
17182 | = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl); | |
17183 | switch (fn_code) | |
17184 | { | |
17185 | case RS6000_BUILTIN_NANQ: | |
17186 | case RS6000_BUILTIN_NANSQ: | |
17187 | { | |
17188 | tree type = TREE_TYPE (TREE_TYPE (fndecl)); | |
17189 | const char *str = c_getstr (*args); | |
17190 | int quiet = fn_code == RS6000_BUILTIN_NANQ; | |
17191 | REAL_VALUE_TYPE real; | |
17192 | ||
17193 | if (str && real_nan (&real, str, quiet, TYPE_MODE (type))) | |
17194 | return build_real (type, real); | |
17195 | return NULL_TREE; | |
17196 | } | |
17197 | case RS6000_BUILTIN_INFQ: | |
17198 | case RS6000_BUILTIN_HUGE_VALQ: | |
17199 | { | |
17200 | tree type = TREE_TYPE (TREE_TYPE (fndecl)); | |
17201 | REAL_VALUE_TYPE inf; | |
17202 | real_inf (&inf); | |
17203 | return build_real (type, inf); | |
17204 | } | |
17205 | default: | |
17206 | break; | |
17207 | } | |
17208 | } | |
17209 | #ifdef SUBTARGET_FOLD_BUILTIN | |
17210 | return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore); | |
17211 | #else | |
17212 | return NULL_TREE; | |
17213 | #endif | |
17214 | } | |
17215 | ||
17216 | /* Fold a machine-dependent built-in in GIMPLE. (For folding into | |
17217 | a constant, use rs6000_fold_builtin.) */ | |
17218 | ||
17219 | bool | |
17220 | rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) | |
17221 | { | |
17222 | gimple *stmt = gsi_stmt (*gsi); | |
17223 | tree fndecl = gimple_call_fndecl (stmt); | |
17224 | gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD); | |
17225 | enum rs6000_builtins fn_code | |
17226 | = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl); | |
17227 | tree arg0, arg1, lhs; | |
17228 | ||
17229 | switch (fn_code) | |
17230 | { | |
17231 | /* Flavors of vec_add. We deliberately don't expand | |
17232 | P8V_BUILTIN_VADDUQM as it gets lowered from V1TImode to | |
17233 | TImode, resulting in much poorer code generation. */ | |
17234 | case ALTIVEC_BUILTIN_VADDUBM: | |
17235 | case ALTIVEC_BUILTIN_VADDUHM: | |
17236 | case ALTIVEC_BUILTIN_VADDUWM: | |
17237 | case P8V_BUILTIN_VADDUDM: | |
17238 | case ALTIVEC_BUILTIN_VADDFP: | |
17239 | case VSX_BUILTIN_XVADDDP: | |
17240 | { | |
17241 | arg0 = gimple_call_arg (stmt, 0); | |
17242 | arg1 = gimple_call_arg (stmt, 1); | |
17243 | lhs = gimple_call_lhs (stmt); | |
17244 | gimple *g = gimple_build_assign (lhs, PLUS_EXPR, arg0, arg1); | |
17245 | gimple_set_location (g, gimple_location (stmt)); | |
17246 | gsi_replace (gsi, g, true); | |
17247 | return true; | |
17248 | } | |
17249 | /* Flavors of vec_sub. We deliberately don't expand | |
17250 | P8V_BUILTIN_VSUBUQM. */ | |
17251 | case ALTIVEC_BUILTIN_VSUBUBM: | |
17252 | case ALTIVEC_BUILTIN_VSUBUHM: | |
17253 | case ALTIVEC_BUILTIN_VSUBUWM: | |
17254 | case P8V_BUILTIN_VSUBUDM: | |
17255 | case ALTIVEC_BUILTIN_VSUBFP: | |
17256 | case VSX_BUILTIN_XVSUBDP: | |
17257 | { | |
17258 | arg0 = gimple_call_arg (stmt, 0); | |
17259 | arg1 = gimple_call_arg (stmt, 1); | |
17260 | lhs = gimple_call_lhs (stmt); | |
17261 | gimple *g = gimple_build_assign (lhs, MINUS_EXPR, arg0, arg1); | |
17262 | gimple_set_location (g, gimple_location (stmt)); | |
17263 | gsi_replace (gsi, g, true); | |
17264 | return true; | |
17265 | } | |
17266 | case VSX_BUILTIN_XVMULSP: | |
17267 | case VSX_BUILTIN_XVMULDP: | |
17268 | { | |
17269 | arg0 = gimple_call_arg (stmt, 0); | |
17270 | arg1 = gimple_call_arg (stmt, 1); | |
17271 | lhs = gimple_call_lhs (stmt); | |
17272 | gimple *g = gimple_build_assign (lhs, MULT_EXPR, arg0, arg1); | |
17273 | gimple_set_location (g, gimple_location (stmt)); | |
17274 | gsi_replace (gsi, g, true); | |
17275 | return true; | |
17276 | } | |
17277 | /* Even element flavors of vec_mul (signed). */ | |
17278 | case ALTIVEC_BUILTIN_VMULESB: | |
17279 | case ALTIVEC_BUILTIN_VMULESH: | |
17280 | /* Even element flavors of vec_mul (unsigned). */ | |
17281 | case ALTIVEC_BUILTIN_VMULEUB: | |
17282 | case ALTIVEC_BUILTIN_VMULEUH: | |
17283 | { | |
17284 | arg0 = gimple_call_arg (stmt, 0); | |
17285 | arg1 = gimple_call_arg (stmt, 1); | |
17286 | lhs = gimple_call_lhs (stmt); | |
17287 | gimple *g = gimple_build_assign (lhs, VEC_WIDEN_MULT_EVEN_EXPR, arg0, arg1); | |
17288 | gimple_set_location (g, gimple_location (stmt)); | |
17289 | gsi_replace (gsi, g, true); | |
17290 | return true; | |
17291 | } | |
17292 | /* Odd element flavors of vec_mul (signed). */ | |
17293 | case ALTIVEC_BUILTIN_VMULOSB: | |
17294 | case ALTIVEC_BUILTIN_VMULOSH: | |
17295 | /* Odd element flavors of vec_mul (unsigned). */ | |
17296 | case ALTIVEC_BUILTIN_VMULOUB: | |
17297 | case ALTIVEC_BUILTIN_VMULOUH: | |
17298 | { | |
17299 | arg0 = gimple_call_arg (stmt, 0); | |
17300 | arg1 = gimple_call_arg (stmt, 1); | |
17301 | lhs = gimple_call_lhs (stmt); | |
17302 | gimple *g = gimple_build_assign (lhs, VEC_WIDEN_MULT_ODD_EXPR, arg0, arg1); | |
17303 | gimple_set_location (g, gimple_location (stmt)); | |
17304 | gsi_replace (gsi, g, true); | |
17305 | return true; | |
17306 | } | |
17307 | /* Flavors of vec_div (Integer). */ | |
17308 | case VSX_BUILTIN_DIV_V2DI: | |
17309 | case VSX_BUILTIN_UDIV_V2DI: | |
17310 | { | |
17311 | arg0 = gimple_call_arg (stmt, 0); | |
17312 | arg1 = gimple_call_arg (stmt, 1); | |
17313 | lhs = gimple_call_lhs (stmt); | |
17314 | gimple *g = gimple_build_assign (lhs, TRUNC_DIV_EXPR, arg0, arg1); | |
17315 | gimple_set_location (g, gimple_location (stmt)); | |
17316 | gsi_replace (gsi, g, true); | |
17317 | return true; | |
17318 | } | |
17319 | /* Flavors of vec_div (Float). */ | |
17320 | case VSX_BUILTIN_XVDIVSP: | |
17321 | case VSX_BUILTIN_XVDIVDP: | |
17322 | { | |
17323 | arg0 = gimple_call_arg (stmt, 0); | |
17324 | arg1 = gimple_call_arg (stmt, 1); | |
17325 | lhs = gimple_call_lhs (stmt); | |
17326 | gimple *g = gimple_build_assign (lhs, RDIV_EXPR, arg0, arg1); | |
17327 | gimple_set_location (g, gimple_location (stmt)); | |
17328 | gsi_replace (gsi, g, true); | |
17329 | return true; | |
17330 | } | |
17331 | /* Flavors of vec_and. */ | |
17332 | case ALTIVEC_BUILTIN_VAND: | |
17333 | { | |
17334 | arg0 = gimple_call_arg (stmt, 0); | |
17335 | arg1 = gimple_call_arg (stmt, 1); | |
17336 | lhs = gimple_call_lhs (stmt); | |
17337 | gimple *g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, arg1); | |
17338 | gimple_set_location (g, gimple_location (stmt)); | |
17339 | gsi_replace (gsi, g, true); | |
17340 | return true; | |
17341 | } | |
17342 | /* Flavors of vec_andc. */ | |
17343 | case ALTIVEC_BUILTIN_VANDC: | |
17344 | { | |
17345 | arg0 = gimple_call_arg (stmt, 0); | |
17346 | arg1 = gimple_call_arg (stmt, 1); | |
17347 | lhs = gimple_call_lhs (stmt); | |
17348 | tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1)); | |
17349 | gimple *g = gimple_build_assign(temp, BIT_NOT_EXPR, arg1); | |
17350 | gimple_set_location (g, gimple_location (stmt)); | |
17351 | gsi_insert_before(gsi, g, GSI_SAME_STMT); | |
17352 | g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, temp); | |
17353 | gimple_set_location (g, gimple_location (stmt)); | |
17354 | gsi_replace (gsi, g, true); | |
17355 | return true; | |
17356 | } | |
17357 | /* Flavors of vec_nand. */ | |
17358 | case P8V_BUILTIN_VEC_NAND: | |
17359 | case P8V_BUILTIN_NAND_V16QI: | |
17360 | case P8V_BUILTIN_NAND_V8HI: | |
17361 | case P8V_BUILTIN_NAND_V4SI: | |
17362 | case P8V_BUILTIN_NAND_V4SF: | |
17363 | case P8V_BUILTIN_NAND_V2DF: | |
17364 | case P8V_BUILTIN_NAND_V2DI: | |
17365 | { | |
17366 | arg0 = gimple_call_arg (stmt, 0); | |
17367 | arg1 = gimple_call_arg (stmt, 1); | |
17368 | lhs = gimple_call_lhs (stmt); | |
17369 | tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1)); | |
17370 | gimple *g = gimple_build_assign(temp, BIT_AND_EXPR, arg0, arg1); | |
17371 | gimple_set_location (g, gimple_location (stmt)); | |
17372 | gsi_insert_before(gsi, g, GSI_SAME_STMT); | |
17373 | g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp); | |
17374 | gimple_set_location (g, gimple_location (stmt)); | |
17375 | gsi_replace (gsi, g, true); | |
17376 | return true; | |
17377 | } | |
17378 | /* Flavors of vec_or. */ | |
17379 | case ALTIVEC_BUILTIN_VOR: | |
17380 | { | |
17381 | arg0 = gimple_call_arg (stmt, 0); | |
17382 | arg1 = gimple_call_arg (stmt, 1); | |
17383 | lhs = gimple_call_lhs (stmt); | |
17384 | gimple *g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, arg1); | |
17385 | gimple_set_location (g, gimple_location (stmt)); | |
17386 | gsi_replace (gsi, g, true); | |
17387 | return true; | |
17388 | } | |
17389 | /* flavors of vec_orc. */ | |
17390 | case P8V_BUILTIN_ORC_V16QI: | |
17391 | case P8V_BUILTIN_ORC_V8HI: | |
17392 | case P8V_BUILTIN_ORC_V4SI: | |
17393 | case P8V_BUILTIN_ORC_V4SF: | |
17394 | case P8V_BUILTIN_ORC_V2DF: | |
17395 | case P8V_BUILTIN_ORC_V2DI: | |
17396 | { | |
17397 | arg0 = gimple_call_arg (stmt, 0); | |
17398 | arg1 = gimple_call_arg (stmt, 1); | |
17399 | lhs = gimple_call_lhs (stmt); | |
17400 | tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1)); | |
17401 | gimple *g = gimple_build_assign(temp, BIT_NOT_EXPR, arg1); | |
17402 | gimple_set_location (g, gimple_location (stmt)); | |
17403 | gsi_insert_before(gsi, g, GSI_SAME_STMT); | |
17404 | g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, temp); | |
17405 | gimple_set_location (g, gimple_location (stmt)); | |
17406 | gsi_replace (gsi, g, true); | |
17407 | return true; | |
17408 | } | |
17409 | /* Flavors of vec_xor. */ | |
17410 | case ALTIVEC_BUILTIN_VXOR: | |
17411 | { | |
17412 | arg0 = gimple_call_arg (stmt, 0); | |
17413 | arg1 = gimple_call_arg (stmt, 1); | |
17414 | lhs = gimple_call_lhs (stmt); | |
17415 | gimple *g = gimple_build_assign (lhs, BIT_XOR_EXPR, arg0, arg1); | |
17416 | gimple_set_location (g, gimple_location (stmt)); | |
17417 | gsi_replace (gsi, g, true); | |
17418 | return true; | |
17419 | } | |
17420 | /* Flavors of vec_nor. */ | |
17421 | case ALTIVEC_BUILTIN_VNOR: | |
17422 | { | |
17423 | arg0 = gimple_call_arg (stmt, 0); | |
17424 | arg1 = gimple_call_arg (stmt, 1); | |
17425 | lhs = gimple_call_lhs (stmt); | |
17426 | tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1)); | |
17427 | gimple *g = gimple_build_assign (temp, BIT_IOR_EXPR, arg0, arg1); | |
17428 | gimple_set_location (g, gimple_location (stmt)); | |
17429 | gsi_insert_before(gsi, g, GSI_SAME_STMT); | |
17430 | g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp); | |
17431 | gimple_set_location (g, gimple_location (stmt)); | |
17432 | gsi_replace (gsi, g, true); | |
17433 | return true; | |
17434 | } | |
17435 | default: | |
17436 | break; | |
17437 | } | |
17438 | ||
17439 | return false; | |
17440 | } | |
17441 | ||
17442 | /* Expand an expression EXP that calls a built-in function, | |
17443 | with result going to TARGET if that's convenient | |
17444 | (and in mode MODE if that's convenient). | |
17445 | SUBTARGET may be used as the target for computing one of EXP's operands. | |
17446 | IGNORE is nonzero if the value is to be ignored. */ | |
17447 | ||
17448 | static rtx | |
17449 | rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, | |
17450 | machine_mode mode ATTRIBUTE_UNUSED, | |
17451 | int ignore ATTRIBUTE_UNUSED) | |
17452 | { | |
17453 | tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); | |
17454 | enum rs6000_builtins fcode | |
17455 | = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl); | |
17456 | size_t uns_fcode = (size_t)fcode; | |
17457 | const struct builtin_description *d; | |
17458 | size_t i; | |
17459 | rtx ret; | |
17460 | bool success; | |
17461 | HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask; | |
17462 | bool func_valid_p = ((rs6000_builtin_mask & mask) == mask); | |
17463 | ||
17464 | if (TARGET_DEBUG_BUILTIN) | |
17465 | { | |
17466 | enum insn_code icode = rs6000_builtin_info[uns_fcode].icode; | |
17467 | const char *name1 = rs6000_builtin_info[uns_fcode].name; | |
17468 | const char *name2 = ((icode != CODE_FOR_nothing) | |
17469 | ? get_insn_name ((int)icode) | |
17470 | : "nothing"); | |
17471 | const char *name3; | |
17472 | ||
17473 | switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK) | |
17474 | { | |
17475 | default: name3 = "unknown"; break; | |
17476 | case RS6000_BTC_SPECIAL: name3 = "special"; break; | |
17477 | case RS6000_BTC_UNARY: name3 = "unary"; break; | |
17478 | case RS6000_BTC_BINARY: name3 = "binary"; break; | |
17479 | case RS6000_BTC_TERNARY: name3 = "ternary"; break; | |
17480 | case RS6000_BTC_PREDICATE: name3 = "predicate"; break; | |
17481 | case RS6000_BTC_ABS: name3 = "abs"; break; | |
17482 | case RS6000_BTC_EVSEL: name3 = "evsel"; break; | |
17483 | case RS6000_BTC_DST: name3 = "dst"; break; | |
17484 | } | |
17485 | ||
17486 | ||
17487 | fprintf (stderr, | |
17488 | "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n", | |
17489 | (name1) ? name1 : "---", fcode, | |
17490 | (name2) ? name2 : "---", (int)icode, | |
17491 | name3, | |
17492 | func_valid_p ? "" : ", not valid"); | |
17493 | } | |
17494 | ||
17495 | if (!func_valid_p) | |
17496 | { | |
17497 | rs6000_invalid_builtin (fcode); | |
17498 | ||
17499 | /* Given it is invalid, just generate a normal call. */ | |
17500 | return expand_call (exp, target, ignore); | |
17501 | } | |
17502 | ||
17503 | switch (fcode) | |
17504 | { | |
17505 | case RS6000_BUILTIN_RECIP: | |
17506 | return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target); | |
17507 | ||
17508 | case RS6000_BUILTIN_RECIPF: | |
17509 | return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target); | |
17510 | ||
17511 | case RS6000_BUILTIN_RSQRTF: | |
17512 | return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target); | |
17513 | ||
17514 | case RS6000_BUILTIN_RSQRT: | |
17515 | return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target); | |
17516 | ||
17517 | case POWER7_BUILTIN_BPERMD: | |
17518 | return rs6000_expand_binop_builtin (((TARGET_64BIT) | |
17519 | ? CODE_FOR_bpermd_di | |
17520 | : CODE_FOR_bpermd_si), exp, target); | |
17521 | ||
17522 | case RS6000_BUILTIN_GET_TB: | |
17523 | return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase, | |
17524 | target); | |
17525 | ||
17526 | case RS6000_BUILTIN_MFTB: | |
17527 | return rs6000_expand_zeroop_builtin (((TARGET_64BIT) | |
17528 | ? CODE_FOR_rs6000_mftb_di | |
17529 | : CODE_FOR_rs6000_mftb_si), | |
17530 | target); | |
17531 | ||
17532 | case RS6000_BUILTIN_MFFS: | |
17533 | return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target); | |
17534 | ||
17535 | case RS6000_BUILTIN_MTFSF: | |
17536 | return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp); | |
17537 | ||
17538 | case RS6000_BUILTIN_CPU_INIT: | |
17539 | case RS6000_BUILTIN_CPU_IS: | |
17540 | case RS6000_BUILTIN_CPU_SUPPORTS: | |
17541 | return cpu_expand_builtin (fcode, exp, target); | |
17542 | ||
17543 | case ALTIVEC_BUILTIN_MASK_FOR_LOAD: | |
17544 | case ALTIVEC_BUILTIN_MASK_FOR_STORE: | |
17545 | { | |
17546 | int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct | |
17547 | : (int) CODE_FOR_altivec_lvsl_direct); | |
17548 | machine_mode tmode = insn_data[icode].operand[0].mode; | |
17549 | machine_mode mode = insn_data[icode].operand[1].mode; | |
17550 | tree arg; | |
17551 | rtx op, addr, pat; | |
17552 | ||
17553 | gcc_assert (TARGET_ALTIVEC); | |
17554 | ||
17555 | arg = CALL_EXPR_ARG (exp, 0); | |
17556 | gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg))); | |
17557 | op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL); | |
17558 | addr = memory_address (mode, op); | |
17559 | if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE) | |
17560 | op = addr; | |
17561 | else | |
17562 | { | |
17563 | /* For the load case need to negate the address. */ | |
17564 | op = gen_reg_rtx (GET_MODE (addr)); | |
17565 | emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr))); | |
17566 | } | |
17567 | op = gen_rtx_MEM (mode, op); | |
17568 | ||
17569 | if (target == 0 | |
17570 | || GET_MODE (target) != tmode | |
17571 | || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) | |
17572 | target = gen_reg_rtx (tmode); | |
17573 | ||
17574 | pat = GEN_FCN (icode) (target, op); | |
17575 | if (!pat) | |
17576 | return 0; | |
17577 | emit_insn (pat); | |
17578 | ||
17579 | return target; | |
17580 | } | |
17581 | ||
17582 | case ALTIVEC_BUILTIN_VCFUX: | |
17583 | case ALTIVEC_BUILTIN_VCFSX: | |
17584 | case ALTIVEC_BUILTIN_VCTUXS: | |
17585 | case ALTIVEC_BUILTIN_VCTSXS: | |
17586 | /* FIXME: There's got to be a nicer way to handle this case than | |
17587 | constructing a new CALL_EXPR. */ | |
17588 | if (call_expr_nargs (exp) == 1) | |
17589 | { | |
17590 | exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp), | |
17591 | 2, CALL_EXPR_ARG (exp, 0), integer_zero_node); | |
17592 | } | |
17593 | break; | |
17594 | ||
17595 | default: | |
17596 | break; | |
17597 | } | |
17598 | ||
17599 | if (TARGET_ALTIVEC) | |
17600 | { | |
17601 | ret = altivec_expand_builtin (exp, target, &success); | |
17602 | ||
17603 | if (success) | |
17604 | return ret; | |
17605 | } | |
17606 | if (TARGET_SPE) | |
17607 | { | |
17608 | ret = spe_expand_builtin (exp, target, &success); | |
17609 | ||
17610 | if (success) | |
17611 | return ret; | |
17612 | } | |
17613 | if (TARGET_PAIRED_FLOAT) | |
17614 | { | |
17615 | ret = paired_expand_builtin (exp, target, &success); | |
17616 | ||
17617 | if (success) | |
17618 | return ret; | |
17619 | } | |
17620 | if (TARGET_HTM) | |
17621 | { | |
17622 | ret = htm_expand_builtin (exp, target, &success); | |
17623 | ||
17624 | if (success) | |
17625 | return ret; | |
17626 | } | |
17627 | ||
17628 | unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK; | |
17629 | /* RS6000_BTC_SPECIAL represents no-operand operators. */ | |
17630 | gcc_assert (attr == RS6000_BTC_UNARY | |
17631 | || attr == RS6000_BTC_BINARY | |
17632 | || attr == RS6000_BTC_TERNARY | |
17633 | || attr == RS6000_BTC_SPECIAL); | |
17634 | ||
17635 | /* Handle simple unary operations. */ | |
17636 | d = bdesc_1arg; | |
17637 | for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++) | |
17638 | if (d->code == fcode) | |
17639 | return rs6000_expand_unop_builtin (d->icode, exp, target); | |
17640 | ||
17641 | /* Handle simple binary operations. */ | |
17642 | d = bdesc_2arg; | |
17643 | for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++) | |
17644 | if (d->code == fcode) | |
17645 | return rs6000_expand_binop_builtin (d->icode, exp, target); | |
17646 | ||
17647 | /* Handle simple ternary operations. */ | |
17648 | d = bdesc_3arg; | |
17649 | for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++) | |
17650 | if (d->code == fcode) | |
17651 | return rs6000_expand_ternop_builtin (d->icode, exp, target); | |
17652 | ||
17653 | /* Handle simple no-argument operations. */ | |
17654 | d = bdesc_0arg; | |
17655 | for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++) | |
17656 | if (d->code == fcode) | |
17657 | return rs6000_expand_zeroop_builtin (d->icode, target); | |
17658 | ||
17659 | gcc_unreachable (); | |
17660 | } | |
17661 | ||
17662 | /* Create a builtin vector type with a name. Taking care not to give | |
17663 | the canonical type a name. */ | |
17664 | ||
17665 | static tree | |
17666 | rs6000_vector_type (const char *name, tree elt_type, unsigned num_elts) | |
17667 | { | |
17668 | tree result = build_vector_type (elt_type, num_elts); | |
17669 | ||
17670 | /* Copy so we don't give the canonical type a name. */ | |
17671 | result = build_variant_type_copy (result); | |
17672 | ||
17673 | add_builtin_type (name, result); | |
17674 | ||
17675 | return result; | |
17676 | } | |
17677 | ||
17678 | static void | |
17679 | rs6000_init_builtins (void) | |
17680 | { | |
17681 | tree tdecl; | |
17682 | tree ftype; | |
17683 | machine_mode mode; | |
17684 | ||
17685 | if (TARGET_DEBUG_BUILTIN) | |
17686 | fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n", | |
17687 | (TARGET_PAIRED_FLOAT) ? ", paired" : "", | |
17688 | (TARGET_SPE) ? ", spe" : "", | |
17689 | (TARGET_ALTIVEC) ? ", altivec" : "", | |
17690 | (TARGET_VSX) ? ", vsx" : ""); | |
17691 | ||
17692 | V2SI_type_node = build_vector_type (intSI_type_node, 2); | |
17693 | V2SF_type_node = build_vector_type (float_type_node, 2); | |
17694 | V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64 ? "__vector long" | |
17695 | : "__vector long long", | |
17696 | intDI_type_node, 2); | |
17697 | V2DF_type_node = rs6000_vector_type ("__vector double", double_type_node, 2); | |
17698 | V4HI_type_node = build_vector_type (intHI_type_node, 4); | |
17699 | V4SI_type_node = rs6000_vector_type ("__vector signed int", | |
17700 | intSI_type_node, 4); | |
17701 | V4SF_type_node = rs6000_vector_type ("__vector float", float_type_node, 4); | |
17702 | V8HI_type_node = rs6000_vector_type ("__vector signed short", | |
17703 | intHI_type_node, 8); | |
17704 | V16QI_type_node = rs6000_vector_type ("__vector signed char", | |
17705 | intQI_type_node, 16); | |
17706 | ||
17707 | unsigned_V16QI_type_node = rs6000_vector_type ("__vector unsigned char", | |
17708 | unsigned_intQI_type_node, 16); | |
17709 | unsigned_V8HI_type_node = rs6000_vector_type ("__vector unsigned short", | |
17710 | unsigned_intHI_type_node, 8); | |
17711 | unsigned_V4SI_type_node = rs6000_vector_type ("__vector unsigned int", | |
17712 | unsigned_intSI_type_node, 4); | |
17713 | unsigned_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64 | |
17714 | ? "__vector unsigned long" | |
17715 | : "__vector unsigned long long", | |
17716 | unsigned_intDI_type_node, 2); | |
17717 | ||
17718 | opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2); | |
17719 | opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2); | |
17720 | opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node); | |
17721 | opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4); | |
17722 | ||
17723 | const_str_type_node | |
17724 | = build_pointer_type (build_qualified_type (char_type_node, | |
17725 | TYPE_QUAL_CONST)); | |
17726 | ||
17727 | /* We use V1TI mode as a special container to hold __int128_t items that | |
17728 | must live in VSX registers. */ | |
17729 | if (intTI_type_node) | |
17730 | { | |
17731 | V1TI_type_node = rs6000_vector_type ("__vector __int128", | |
17732 | intTI_type_node, 1); | |
17733 | unsigned_V1TI_type_node | |
17734 | = rs6000_vector_type ("__vector unsigned __int128", | |
17735 | unsigned_intTI_type_node, 1); | |
17736 | } | |
17737 | ||
17738 | /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...' | |
17739 | types, especially in C++ land. Similarly, 'vector pixel' is distinct from | |
17740 | 'vector unsigned short'. */ | |
17741 | ||
17742 | bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node); | |
17743 | bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node); | |
17744 | bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node); | |
17745 | bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node); | |
17746 | pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node); | |
17747 | ||
17748 | long_integer_type_internal_node = long_integer_type_node; | |
17749 | long_unsigned_type_internal_node = long_unsigned_type_node; | |
17750 | long_long_integer_type_internal_node = long_long_integer_type_node; | |
17751 | long_long_unsigned_type_internal_node = long_long_unsigned_type_node; | |
17752 | intQI_type_internal_node = intQI_type_node; | |
17753 | uintQI_type_internal_node = unsigned_intQI_type_node; | |
17754 | intHI_type_internal_node = intHI_type_node; | |
17755 | uintHI_type_internal_node = unsigned_intHI_type_node; | |
17756 | intSI_type_internal_node = intSI_type_node; | |
17757 | uintSI_type_internal_node = unsigned_intSI_type_node; | |
17758 | intDI_type_internal_node = intDI_type_node; | |
17759 | uintDI_type_internal_node = unsigned_intDI_type_node; | |
17760 | intTI_type_internal_node = intTI_type_node; | |
17761 | uintTI_type_internal_node = unsigned_intTI_type_node; | |
17762 | float_type_internal_node = float_type_node; | |
17763 | double_type_internal_node = double_type_node; | |
17764 | long_double_type_internal_node = long_double_type_node; | |
17765 | dfloat64_type_internal_node = dfloat64_type_node; | |
17766 | dfloat128_type_internal_node = dfloat128_type_node; | |
17767 | void_type_internal_node = void_type_node; | |
17768 | ||
17769 | /* 128-bit floating point support. KFmode is IEEE 128-bit floating point. | |
17770 | IFmode is the IBM extended 128-bit format that is a pair of doubles. | |
17771 | TFmode will be either IEEE 128-bit floating point or the IBM double-double | |
17772 | format that uses a pair of doubles, depending on the switches and | |
17773 | defaults. | |
17774 | ||
17775 | We do not enable the actual __float128 keyword unless the user explicitly | |
17776 | asks for it, because the library support is not yet complete. | |
17777 | ||
17778 | If we don't support for either 128-bit IBM double double or IEEE 128-bit | |
17779 | floating point, we need make sure the type is non-zero or else self-test | |
17780 | fails during bootstrap. | |
17781 | ||
17782 | We don't register a built-in type for __ibm128 if the type is the same as | |
17783 | long double. Instead we add a #define for __ibm128 in | |
17784 | rs6000_cpu_cpp_builtins to long double. */ | |
17785 | if (TARGET_LONG_DOUBLE_128 && FLOAT128_IEEE_P (TFmode)) | |
17786 | { | |
17787 | ibm128_float_type_node = make_node (REAL_TYPE); | |
17788 | TYPE_PRECISION (ibm128_float_type_node) = 128; | |
17789 | SET_TYPE_MODE (ibm128_float_type_node, IFmode); | |
17790 | layout_type (ibm128_float_type_node); | |
17791 | ||
17792 | lang_hooks.types.register_builtin_type (ibm128_float_type_node, | |
17793 | "__ibm128"); | |
17794 | } | |
17795 | else | |
17796 | ibm128_float_type_node = long_double_type_node; | |
17797 | ||
17798 | if (TARGET_FLOAT128_KEYWORD) | |
17799 | { | |
17800 | ieee128_float_type_node = float128_type_node; | |
17801 | lang_hooks.types.register_builtin_type (ieee128_float_type_node, | |
17802 | "__float128"); | |
17803 | } | |
17804 | ||
17805 | else if (TARGET_FLOAT128_TYPE) | |
17806 | { | |
17807 | ieee128_float_type_node = make_node (REAL_TYPE); | |
17808 | TYPE_PRECISION (ibm128_float_type_node) = 128; | |
17809 | SET_TYPE_MODE (ieee128_float_type_node, KFmode); | |
17810 | layout_type (ieee128_float_type_node); | |
17811 | ||
17812 | /* If we are not exporting the __float128/_Float128 keywords, we need a | |
17813 | keyword to get the types created. Use __ieee128 as the dummy | |
17814 | keyword. */ | |
17815 | lang_hooks.types.register_builtin_type (ieee128_float_type_node, | |
17816 | "__ieee128"); | |
17817 | } | |
17818 | ||
17819 | else | |
17820 | ieee128_float_type_node = long_double_type_node; | |
17821 | ||
17822 | /* Initialize the modes for builtin_function_type, mapping a machine mode to | |
17823 | tree type node. */ | |
17824 | builtin_mode_to_type[QImode][0] = integer_type_node; | |
17825 | builtin_mode_to_type[HImode][0] = integer_type_node; | |
17826 | builtin_mode_to_type[SImode][0] = intSI_type_node; | |
17827 | builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node; | |
17828 | builtin_mode_to_type[DImode][0] = intDI_type_node; | |
17829 | builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node; | |
17830 | builtin_mode_to_type[TImode][0] = intTI_type_node; | |
17831 | builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node; | |
17832 | builtin_mode_to_type[SFmode][0] = float_type_node; | |
17833 | builtin_mode_to_type[DFmode][0] = double_type_node; | |
17834 | builtin_mode_to_type[IFmode][0] = ibm128_float_type_node; | |
17835 | builtin_mode_to_type[KFmode][0] = ieee128_float_type_node; | |
17836 | builtin_mode_to_type[TFmode][0] = long_double_type_node; | |
17837 | builtin_mode_to_type[DDmode][0] = dfloat64_type_node; | |
17838 | builtin_mode_to_type[TDmode][0] = dfloat128_type_node; | |
17839 | builtin_mode_to_type[V1TImode][0] = V1TI_type_node; | |
17840 | builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node; | |
17841 | builtin_mode_to_type[V2SImode][0] = V2SI_type_node; | |
17842 | builtin_mode_to_type[V2SFmode][0] = V2SF_type_node; | |
17843 | builtin_mode_to_type[V2DImode][0] = V2DI_type_node; | |
17844 | builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node; | |
17845 | builtin_mode_to_type[V2DFmode][0] = V2DF_type_node; | |
17846 | builtin_mode_to_type[V4HImode][0] = V4HI_type_node; | |
17847 | builtin_mode_to_type[V4SImode][0] = V4SI_type_node; | |
17848 | builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node; | |
17849 | builtin_mode_to_type[V4SFmode][0] = V4SF_type_node; | |
17850 | builtin_mode_to_type[V8HImode][0] = V8HI_type_node; | |
17851 | builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node; | |
17852 | builtin_mode_to_type[V16QImode][0] = V16QI_type_node; | |
17853 | builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node; | |
17854 | ||
17855 | tdecl = add_builtin_type ("__bool char", bool_char_type_node); | |
17856 | TYPE_NAME (bool_char_type_node) = tdecl; | |
17857 | ||
17858 | tdecl = add_builtin_type ("__bool short", bool_short_type_node); | |
17859 | TYPE_NAME (bool_short_type_node) = tdecl; | |
17860 | ||
17861 | tdecl = add_builtin_type ("__bool int", bool_int_type_node); | |
17862 | TYPE_NAME (bool_int_type_node) = tdecl; | |
17863 | ||
17864 | tdecl = add_builtin_type ("__pixel", pixel_type_node); | |
17865 | TYPE_NAME (pixel_type_node) = tdecl; | |
17866 | ||
17867 | bool_V16QI_type_node = rs6000_vector_type ("__vector __bool char", | |
17868 | bool_char_type_node, 16); | |
17869 | bool_V8HI_type_node = rs6000_vector_type ("__vector __bool short", | |
17870 | bool_short_type_node, 8); | |
17871 | bool_V4SI_type_node = rs6000_vector_type ("__vector __bool int", | |
17872 | bool_int_type_node, 4); | |
17873 | bool_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64 | |
17874 | ? "__vector __bool long" | |
17875 | : "__vector __bool long long", | |
17876 | bool_long_type_node, 2); | |
17877 | pixel_V8HI_type_node = rs6000_vector_type ("__vector __pixel", | |
17878 | pixel_type_node, 8); | |
17879 | ||
17880 | /* Paired and SPE builtins are only available if you build a compiler with | |
17881 | the appropriate options, so only create those builtins with the | |
17882 | appropriate compiler option. Create Altivec and VSX builtins on machines | |
17883 | with at least the general purpose extensions (970 and newer) to allow the | |
17884 | use of the target attribute. */ | |
17885 | if (TARGET_PAIRED_FLOAT) | |
17886 | paired_init_builtins (); | |
17887 | if (TARGET_SPE) | |
17888 | spe_init_builtins (); | |
17889 | if (TARGET_EXTRA_BUILTINS) | |
17890 | altivec_init_builtins (); | |
17891 | if (TARGET_HTM) | |
17892 | htm_init_builtins (); | |
17893 | ||
17894 | if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT) | |
17895 | rs6000_common_init_builtins (); | |
17896 | ||
17897 | ftype = build_function_type_list (ieee128_float_type_node, | |
17898 | const_str_type_node, NULL_TREE); | |
17899 | def_builtin ("__builtin_nanq", ftype, RS6000_BUILTIN_NANQ); | |
17900 | def_builtin ("__builtin_nansq", ftype, RS6000_BUILTIN_NANSQ); | |
17901 | ||
17902 | ftype = build_function_type_list (ieee128_float_type_node, NULL_TREE); | |
17903 | def_builtin ("__builtin_infq", ftype, RS6000_BUILTIN_INFQ); | |
17904 | def_builtin ("__builtin_huge_valq", ftype, RS6000_BUILTIN_HUGE_VALQ); | |
17905 | ||
17906 | ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode, | |
17907 | RS6000_BUILTIN_RECIP, "__builtin_recipdiv"); | |
17908 | def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP); | |
17909 | ||
17910 | ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode, | |
17911 | RS6000_BUILTIN_RECIPF, "__builtin_recipdivf"); | |
17912 | def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF); | |
17913 | ||
17914 | ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode, | |
17915 | RS6000_BUILTIN_RSQRT, "__builtin_rsqrt"); | |
17916 | def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT); | |
17917 | ||
17918 | ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode, | |
17919 | RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf"); | |
17920 | def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF); | |
17921 | ||
17922 | mode = (TARGET_64BIT) ? DImode : SImode; | |
17923 | ftype = builtin_function_type (mode, mode, mode, VOIDmode, | |
17924 | POWER7_BUILTIN_BPERMD, "__builtin_bpermd"); | |
17925 | def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD); | |
17926 | ||
17927 | ftype = build_function_type_list (unsigned_intDI_type_node, | |
17928 | NULL_TREE); | |
17929 | def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB); | |
17930 | ||
17931 | if (TARGET_64BIT) | |
17932 | ftype = build_function_type_list (unsigned_intDI_type_node, | |
17933 | NULL_TREE); | |
17934 | else | |
17935 | ftype = build_function_type_list (unsigned_intSI_type_node, | |
17936 | NULL_TREE); | |
17937 | def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB); | |
17938 | ||
17939 | ftype = build_function_type_list (double_type_node, NULL_TREE); | |
17940 | def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS); | |
17941 | ||
17942 | ftype = build_function_type_list (void_type_node, | |
17943 | intSI_type_node, double_type_node, | |
17944 | NULL_TREE); | |
17945 | def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF); | |
17946 | ||
17947 | ftype = build_function_type_list (void_type_node, NULL_TREE); | |
17948 | def_builtin ("__builtin_cpu_init", ftype, RS6000_BUILTIN_CPU_INIT); | |
17949 | ||
17950 | ftype = build_function_type_list (bool_int_type_node, const_ptr_type_node, | |
17951 | NULL_TREE); | |
17952 | def_builtin ("__builtin_cpu_is", ftype, RS6000_BUILTIN_CPU_IS); | |
17953 | def_builtin ("__builtin_cpu_supports", ftype, RS6000_BUILTIN_CPU_SUPPORTS); | |
17954 | ||
17955 | /* AIX libm provides clog as __clog. */ | |
17956 | if (TARGET_XCOFF && | |
17957 | (tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE) | |
17958 | set_user_assembler_name (tdecl, "__clog"); | |
17959 | ||
17960 | #ifdef SUBTARGET_INIT_BUILTINS | |
17961 | SUBTARGET_INIT_BUILTINS; | |
17962 | #endif | |
17963 | } | |
17964 | ||
17965 | /* Returns the rs6000 builtin decl for CODE. */ | |
17966 | ||
17967 | static tree | |
17968 | rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) | |
17969 | { | |
17970 | HOST_WIDE_INT fnmask; | |
17971 | ||
17972 | if (code >= RS6000_BUILTIN_COUNT) | |
17973 | return error_mark_node; | |
17974 | ||
17975 | fnmask = rs6000_builtin_info[code].mask; | |
17976 | if ((fnmask & rs6000_builtin_mask) != fnmask) | |
17977 | { | |
17978 | rs6000_invalid_builtin ((enum rs6000_builtins)code); | |
17979 | return error_mark_node; | |
17980 | } | |
17981 | ||
17982 | return rs6000_builtin_decls[code]; | |
17983 | } | |
17984 | ||
17985 | static void | |
17986 | spe_init_builtins (void) | |
17987 | { | |
17988 | tree puint_type_node = build_pointer_type (unsigned_type_node); | |
17989 | tree pushort_type_node = build_pointer_type (short_unsigned_type_node); | |
17990 | const struct builtin_description *d; | |
17991 | size_t i; | |
17992 | HOST_WIDE_INT builtin_mask = rs6000_builtin_mask; | |
17993 | ||
17994 | tree v2si_ftype_4_v2si | |
17995 | = build_function_type_list (opaque_V2SI_type_node, | |
17996 | opaque_V2SI_type_node, | |
17997 | opaque_V2SI_type_node, | |
17998 | opaque_V2SI_type_node, | |
17999 | opaque_V2SI_type_node, | |
18000 | NULL_TREE); | |
18001 | ||
18002 | tree v2sf_ftype_4_v2sf | |
18003 | = build_function_type_list (opaque_V2SF_type_node, | |
18004 | opaque_V2SF_type_node, | |
18005 | opaque_V2SF_type_node, | |
18006 | opaque_V2SF_type_node, | |
18007 | opaque_V2SF_type_node, | |
18008 | NULL_TREE); | |
18009 | ||
18010 | tree int_ftype_int_v2si_v2si | |
18011 | = build_function_type_list (integer_type_node, | |
18012 | integer_type_node, | |
18013 | opaque_V2SI_type_node, | |
18014 | opaque_V2SI_type_node, | |
18015 | NULL_TREE); | |
18016 | ||
18017 | tree int_ftype_int_v2sf_v2sf | |
18018 | = build_function_type_list (integer_type_node, | |
18019 | integer_type_node, | |
18020 | opaque_V2SF_type_node, | |
18021 | opaque_V2SF_type_node, | |
18022 | NULL_TREE); | |
18023 | ||
18024 | tree void_ftype_v2si_puint_int | |
18025 | = build_function_type_list (void_type_node, | |
18026 | opaque_V2SI_type_node, | |
18027 | puint_type_node, | |
18028 | integer_type_node, | |
18029 | NULL_TREE); | |
18030 | ||
18031 | tree void_ftype_v2si_puint_char | |
18032 | = build_function_type_list (void_type_node, | |
18033 | opaque_V2SI_type_node, | |
18034 | puint_type_node, | |
18035 | char_type_node, | |
18036 | NULL_TREE); | |
18037 | ||
18038 | tree void_ftype_v2si_pv2si_int | |
18039 | = build_function_type_list (void_type_node, | |
18040 | opaque_V2SI_type_node, | |
18041 | opaque_p_V2SI_type_node, | |
18042 | integer_type_node, | |
18043 | NULL_TREE); | |
18044 | ||
18045 | tree void_ftype_v2si_pv2si_char | |
18046 | = build_function_type_list (void_type_node, | |
18047 | opaque_V2SI_type_node, | |
18048 | opaque_p_V2SI_type_node, | |
18049 | char_type_node, | |
18050 | NULL_TREE); | |
18051 | ||
18052 | tree void_ftype_int | |
18053 | = build_function_type_list (void_type_node, integer_type_node, NULL_TREE); | |
18054 | ||
18055 | tree int_ftype_void | |
18056 | = build_function_type_list (integer_type_node, NULL_TREE); | |
18057 | ||
18058 | tree v2si_ftype_pv2si_int | |
18059 | = build_function_type_list (opaque_V2SI_type_node, | |
18060 | opaque_p_V2SI_type_node, | |
18061 | integer_type_node, | |
18062 | NULL_TREE); | |
18063 | ||
18064 | tree v2si_ftype_puint_int | |
18065 | = build_function_type_list (opaque_V2SI_type_node, | |
18066 | puint_type_node, | |
18067 | integer_type_node, | |
18068 | NULL_TREE); | |
18069 | ||
18070 | tree v2si_ftype_pushort_int | |
18071 | = build_function_type_list (opaque_V2SI_type_node, | |
18072 | pushort_type_node, | |
18073 | integer_type_node, | |
18074 | NULL_TREE); | |
18075 | ||
18076 | tree v2si_ftype_signed_char | |
18077 | = build_function_type_list (opaque_V2SI_type_node, | |
18078 | signed_char_type_node, | |
18079 | NULL_TREE); | |
18080 | ||
18081 | add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node); | |
18082 | ||
18083 | /* Initialize irregular SPE builtins. */ | |
18084 | ||
18085 | def_builtin ("__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR); | |
18086 | def_builtin ("__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR); | |
18087 | def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX); | |
18088 | def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX); | |
18089 | def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX); | |
18090 | def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX); | |
18091 | def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX); | |
18092 | def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX); | |
18093 | def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX); | |
18094 | def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD); | |
18095 | def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH); | |
18096 | def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW); | |
18097 | def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE); | |
18098 | def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO); | |
18099 | def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE); | |
18100 | def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO); | |
18101 | def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI); | |
18102 | def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI); | |
18103 | ||
18104 | /* Loads. */ | |
18105 | def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX); | |
18106 | def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX); | |
18107 | def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX); | |
18108 | def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX); | |
18109 | def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX); | |
18110 | def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX); | |
18111 | def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX); | |
18112 | def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX); | |
18113 | def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX); | |
18114 | def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX); | |
18115 | def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX); | |
18116 | def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD); | |
18117 | def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW); | |
18118 | def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH); | |
18119 | def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT); | |
18120 | def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT); | |
18121 | def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT); | |
18122 | def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE); | |
18123 | def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS); | |
18124 | def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU); | |
18125 | def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT); | |
18126 | def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT); | |
18127 | ||
18128 | /* Predicates. */ | |
18129 | d = bdesc_spe_predicates; | |
18130 | for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++) | |
18131 | { | |
18132 | tree type; | |
18133 | HOST_WIDE_INT mask = d->mask; | |
18134 | ||
18135 | if ((mask & builtin_mask) != mask) | |
18136 | { | |
18137 | if (TARGET_DEBUG_BUILTIN) | |
18138 | fprintf (stderr, "spe_init_builtins, skip predicate %s\n", | |
18139 | d->name); | |
18140 | continue; | |
18141 | } | |
18142 | ||
18143 | /* Cannot define builtin if the instruction is disabled. */ | |
18144 | gcc_assert (d->icode != CODE_FOR_nothing); | |
18145 | switch (insn_data[d->icode].operand[1].mode) | |
18146 | { | |
916ace94 | 18147 | case E_V2SImode: |
01e91138 | 18148 | type = int_ftype_int_v2si_v2si; |
18149 | break; | |
916ace94 | 18150 | case E_V2SFmode: |
01e91138 | 18151 | type = int_ftype_int_v2sf_v2sf; |
18152 | break; | |
18153 | default: | |
18154 | gcc_unreachable (); | |
18155 | } | |
18156 | ||
18157 | def_builtin (d->name, type, d->code); | |
18158 | } | |
18159 | ||
18160 | /* Evsel predicates. */ | |
18161 | d = bdesc_spe_evsel; | |
18162 | for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++) | |
18163 | { | |
18164 | tree type; | |
18165 | HOST_WIDE_INT mask = d->mask; | |
18166 | ||
18167 | if ((mask & builtin_mask) != mask) | |
18168 | { | |
18169 | if (TARGET_DEBUG_BUILTIN) | |
18170 | fprintf (stderr, "spe_init_builtins, skip evsel %s\n", | |
18171 | d->name); | |
18172 | continue; | |
18173 | } | |
18174 | ||
18175 | /* Cannot define builtin if the instruction is disabled. */ | |
18176 | gcc_assert (d->icode != CODE_FOR_nothing); | |
18177 | switch (insn_data[d->icode].operand[1].mode) | |
18178 | { | |
916ace94 | 18179 | case E_V2SImode: |
01e91138 | 18180 | type = v2si_ftype_4_v2si; |
18181 | break; | |
916ace94 | 18182 | case E_V2SFmode: |
01e91138 | 18183 | type = v2sf_ftype_4_v2sf; |
18184 | break; | |
18185 | default: | |
18186 | gcc_unreachable (); | |
18187 | } | |
18188 | ||
18189 | def_builtin (d->name, type, d->code); | |
18190 | } | |
18191 | } | |
18192 | ||
18193 | static void | |
18194 | paired_init_builtins (void) | |
18195 | { | |
18196 | const struct builtin_description *d; | |
18197 | size_t i; | |
18198 | HOST_WIDE_INT builtin_mask = rs6000_builtin_mask; | |
18199 | ||
18200 | tree int_ftype_int_v2sf_v2sf | |
18201 | = build_function_type_list (integer_type_node, | |
18202 | integer_type_node, | |
18203 | V2SF_type_node, | |
18204 | V2SF_type_node, | |
18205 | NULL_TREE); | |
18206 | tree pcfloat_type_node = | |
18207 | build_pointer_type (build_qualified_type | |
18208 | (float_type_node, TYPE_QUAL_CONST)); | |
18209 | ||
18210 | tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node, | |
18211 | long_integer_type_node, | |
18212 | pcfloat_type_node, | |
18213 | NULL_TREE); | |
18214 | tree void_ftype_v2sf_long_pcfloat = | |
18215 | build_function_type_list (void_type_node, | |
18216 | V2SF_type_node, | |
18217 | long_integer_type_node, | |
18218 | pcfloat_type_node, | |
18219 | NULL_TREE); | |
18220 | ||
18221 | ||
18222 | def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat, | |
18223 | PAIRED_BUILTIN_LX); | |
18224 | ||
18225 | ||
18226 | def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat, | |
18227 | PAIRED_BUILTIN_STX); | |
18228 | ||
18229 | /* Predicates. */ | |
18230 | d = bdesc_paired_preds; | |
18231 | for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++) | |
18232 | { | |
18233 | tree type; | |
18234 | HOST_WIDE_INT mask = d->mask; | |
18235 | ||
18236 | if ((mask & builtin_mask) != mask) | |
18237 | { | |
18238 | if (TARGET_DEBUG_BUILTIN) | |
18239 | fprintf (stderr, "paired_init_builtins, skip predicate %s\n", | |
18240 | d->name); | |
18241 | continue; | |
18242 | } | |
18243 | ||
18244 | /* Cannot define builtin if the instruction is disabled. */ | |
18245 | gcc_assert (d->icode != CODE_FOR_nothing); | |
18246 | ||
18247 | if (TARGET_DEBUG_BUILTIN) | |
18248 | fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n", | |
18249 | (int)i, get_insn_name (d->icode), (int)d->icode, | |
18250 | GET_MODE_NAME (insn_data[d->icode].operand[1].mode)); | |
18251 | ||
18252 | switch (insn_data[d->icode].operand[1].mode) | |
18253 | { | |
916ace94 | 18254 | case E_V2SFmode: |
01e91138 | 18255 | type = int_ftype_int_v2sf_v2sf; |
18256 | break; | |
18257 | default: | |
18258 | gcc_unreachable (); | |
18259 | } | |
18260 | ||
18261 | def_builtin (d->name, type, d->code); | |
18262 | } | |
18263 | } | |
18264 | ||
18265 | static void | |
18266 | altivec_init_builtins (void) | |
18267 | { | |
18268 | const struct builtin_description *d; | |
18269 | size_t i; | |
18270 | tree ftype; | |
18271 | tree decl; | |
18272 | HOST_WIDE_INT builtin_mask = rs6000_builtin_mask; | |
18273 | ||
18274 | tree pvoid_type_node = build_pointer_type (void_type_node); | |
18275 | ||
18276 | tree pcvoid_type_node | |
18277 | = build_pointer_type (build_qualified_type (void_type_node, | |
18278 | TYPE_QUAL_CONST)); | |
18279 | ||
18280 | tree int_ftype_opaque | |
18281 | = build_function_type_list (integer_type_node, | |
18282 | opaque_V4SI_type_node, NULL_TREE); | |
18283 | tree opaque_ftype_opaque | |
18284 | = build_function_type_list (integer_type_node, NULL_TREE); | |
18285 | tree opaque_ftype_opaque_int | |
18286 | = build_function_type_list (opaque_V4SI_type_node, | |
18287 | opaque_V4SI_type_node, integer_type_node, NULL_TREE); | |
18288 | tree opaque_ftype_opaque_opaque_int | |
18289 | = build_function_type_list (opaque_V4SI_type_node, | |
18290 | opaque_V4SI_type_node, opaque_V4SI_type_node, | |
18291 | integer_type_node, NULL_TREE); | |
18292 | tree opaque_ftype_opaque_opaque_opaque | |
18293 | = build_function_type_list (opaque_V4SI_type_node, | |
18294 | opaque_V4SI_type_node, opaque_V4SI_type_node, | |
18295 | opaque_V4SI_type_node, NULL_TREE); | |
18296 | tree opaque_ftype_opaque_opaque | |
18297 | = build_function_type_list (opaque_V4SI_type_node, | |
18298 | opaque_V4SI_type_node, opaque_V4SI_type_node, | |
18299 | NULL_TREE); | |
18300 | tree int_ftype_int_opaque_opaque | |
18301 | = build_function_type_list (integer_type_node, | |
18302 | integer_type_node, opaque_V4SI_type_node, | |
18303 | opaque_V4SI_type_node, NULL_TREE); | |
18304 | tree int_ftype_int_v4si_v4si | |
18305 | = build_function_type_list (integer_type_node, | |
18306 | integer_type_node, V4SI_type_node, | |
18307 | V4SI_type_node, NULL_TREE); | |
18308 | tree int_ftype_int_v2di_v2di | |
18309 | = build_function_type_list (integer_type_node, | |
18310 | integer_type_node, V2DI_type_node, | |
18311 | V2DI_type_node, NULL_TREE); | |
18312 | tree void_ftype_v4si | |
18313 | = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE); | |
18314 | tree v8hi_ftype_void | |
18315 | = build_function_type_list (V8HI_type_node, NULL_TREE); | |
18316 | tree void_ftype_void | |
18317 | = build_function_type_list (void_type_node, NULL_TREE); | |
18318 | tree void_ftype_int | |
18319 | = build_function_type_list (void_type_node, integer_type_node, NULL_TREE); | |
18320 | ||
18321 | tree opaque_ftype_long_pcvoid | |
18322 | = build_function_type_list (opaque_V4SI_type_node, | |
18323 | long_integer_type_node, pcvoid_type_node, | |
18324 | NULL_TREE); | |
18325 | tree v16qi_ftype_long_pcvoid | |
18326 | = build_function_type_list (V16QI_type_node, | |
18327 | long_integer_type_node, pcvoid_type_node, | |
18328 | NULL_TREE); | |
18329 | tree v8hi_ftype_long_pcvoid | |
18330 | = build_function_type_list (V8HI_type_node, | |
18331 | long_integer_type_node, pcvoid_type_node, | |
18332 | NULL_TREE); | |
18333 | tree v4si_ftype_long_pcvoid | |
18334 | = build_function_type_list (V4SI_type_node, | |
18335 | long_integer_type_node, pcvoid_type_node, | |
18336 | NULL_TREE); | |
18337 | tree v4sf_ftype_long_pcvoid | |
18338 | = build_function_type_list (V4SF_type_node, | |
18339 | long_integer_type_node, pcvoid_type_node, | |
18340 | NULL_TREE); | |
18341 | tree v2df_ftype_long_pcvoid | |
18342 | = build_function_type_list (V2DF_type_node, | |
18343 | long_integer_type_node, pcvoid_type_node, | |
18344 | NULL_TREE); | |
18345 | tree v2di_ftype_long_pcvoid | |
18346 | = build_function_type_list (V2DI_type_node, | |
18347 | long_integer_type_node, pcvoid_type_node, | |
18348 | NULL_TREE); | |
18349 | ||
18350 | tree void_ftype_opaque_long_pvoid | |
18351 | = build_function_type_list (void_type_node, | |
18352 | opaque_V4SI_type_node, long_integer_type_node, | |
18353 | pvoid_type_node, NULL_TREE); | |
18354 | tree void_ftype_v4si_long_pvoid | |
18355 | = build_function_type_list (void_type_node, | |
18356 | V4SI_type_node, long_integer_type_node, | |
18357 | pvoid_type_node, NULL_TREE); | |
18358 | tree void_ftype_v16qi_long_pvoid | |
18359 | = build_function_type_list (void_type_node, | |
18360 | V16QI_type_node, long_integer_type_node, | |
18361 | pvoid_type_node, NULL_TREE); | |
18362 | ||
18363 | tree void_ftype_v16qi_pvoid_long | |
18364 | = build_function_type_list (void_type_node, | |
18365 | V16QI_type_node, pvoid_type_node, | |
18366 | long_integer_type_node, NULL_TREE); | |
18367 | ||
18368 | tree void_ftype_v8hi_long_pvoid | |
18369 | = build_function_type_list (void_type_node, | |
18370 | V8HI_type_node, long_integer_type_node, | |
18371 | pvoid_type_node, NULL_TREE); | |
18372 | tree void_ftype_v4sf_long_pvoid | |
18373 | = build_function_type_list (void_type_node, | |
18374 | V4SF_type_node, long_integer_type_node, | |
18375 | pvoid_type_node, NULL_TREE); | |
18376 | tree void_ftype_v2df_long_pvoid | |
18377 | = build_function_type_list (void_type_node, | |
18378 | V2DF_type_node, long_integer_type_node, | |
18379 | pvoid_type_node, NULL_TREE); | |
18380 | tree void_ftype_v2di_long_pvoid | |
18381 | = build_function_type_list (void_type_node, | |
18382 | V2DI_type_node, long_integer_type_node, | |
18383 | pvoid_type_node, NULL_TREE); | |
18384 | tree int_ftype_int_v8hi_v8hi | |
18385 | = build_function_type_list (integer_type_node, | |
18386 | integer_type_node, V8HI_type_node, | |
18387 | V8HI_type_node, NULL_TREE); | |
18388 | tree int_ftype_int_v16qi_v16qi | |
18389 | = build_function_type_list (integer_type_node, | |
18390 | integer_type_node, V16QI_type_node, | |
18391 | V16QI_type_node, NULL_TREE); | |
18392 | tree int_ftype_int_v4sf_v4sf | |
18393 | = build_function_type_list (integer_type_node, | |
18394 | integer_type_node, V4SF_type_node, | |
18395 | V4SF_type_node, NULL_TREE); | |
18396 | tree int_ftype_int_v2df_v2df | |
18397 | = build_function_type_list (integer_type_node, | |
18398 | integer_type_node, V2DF_type_node, | |
18399 | V2DF_type_node, NULL_TREE); | |
18400 | tree v2di_ftype_v2di | |
18401 | = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE); | |
18402 | tree v4si_ftype_v4si | |
18403 | = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE); | |
18404 | tree v8hi_ftype_v8hi | |
18405 | = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE); | |
18406 | tree v16qi_ftype_v16qi | |
18407 | = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE); | |
18408 | tree v4sf_ftype_v4sf | |
18409 | = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE); | |
18410 | tree v2df_ftype_v2df | |
18411 | = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE); | |
18412 | tree void_ftype_pcvoid_int_int | |
18413 | = build_function_type_list (void_type_node, | |
18414 | pcvoid_type_node, integer_type_node, | |
18415 | integer_type_node, NULL_TREE); | |
18416 | ||
18417 | def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR); | |
18418 | def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR); | |
18419 | def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL); | |
18420 | def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS); | |
18421 | def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL); | |
18422 | def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR); | |
18423 | def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX); | |
18424 | def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX); | |
18425 | def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX); | |
18426 | def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL); | |
18427 | def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid, | |
18428 | ALTIVEC_BUILTIN_LVXL_V2DF); | |
18429 | def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid, | |
18430 | ALTIVEC_BUILTIN_LVXL_V2DI); | |
18431 | def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid, | |
18432 | ALTIVEC_BUILTIN_LVXL_V4SF); | |
18433 | def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid, | |
18434 | ALTIVEC_BUILTIN_LVXL_V4SI); | |
18435 | def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid, | |
18436 | ALTIVEC_BUILTIN_LVXL_V8HI); | |
18437 | def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid, | |
18438 | ALTIVEC_BUILTIN_LVXL_V16QI); | |
18439 | def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX); | |
18440 | def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid, | |
18441 | ALTIVEC_BUILTIN_LVX_V2DF); | |
18442 | def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid, | |
18443 | ALTIVEC_BUILTIN_LVX_V2DI); | |
18444 | def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid, | |
18445 | ALTIVEC_BUILTIN_LVX_V4SF); | |
18446 | def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid, | |
18447 | ALTIVEC_BUILTIN_LVX_V4SI); | |
18448 | def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid, | |
18449 | ALTIVEC_BUILTIN_LVX_V8HI); | |
18450 | def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid, | |
18451 | ALTIVEC_BUILTIN_LVX_V16QI); | |
18452 | def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX); | |
18453 | def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid, | |
18454 | ALTIVEC_BUILTIN_STVX_V2DF); | |
18455 | def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid, | |
18456 | ALTIVEC_BUILTIN_STVX_V2DI); | |
18457 | def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid, | |
18458 | ALTIVEC_BUILTIN_STVX_V4SF); | |
18459 | def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid, | |
18460 | ALTIVEC_BUILTIN_STVX_V4SI); | |
18461 | def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid, | |
18462 | ALTIVEC_BUILTIN_STVX_V8HI); | |
18463 | def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid, | |
18464 | ALTIVEC_BUILTIN_STVX_V16QI); | |
18465 | def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX); | |
18466 | def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL); | |
18467 | def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid, | |
18468 | ALTIVEC_BUILTIN_STVXL_V2DF); | |
18469 | def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid, | |
18470 | ALTIVEC_BUILTIN_STVXL_V2DI); | |
18471 | def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid, | |
18472 | ALTIVEC_BUILTIN_STVXL_V4SF); | |
18473 | def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid, | |
18474 | ALTIVEC_BUILTIN_STVXL_V4SI); | |
18475 | def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid, | |
18476 | ALTIVEC_BUILTIN_STVXL_V8HI); | |
18477 | def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid, | |
18478 | ALTIVEC_BUILTIN_STVXL_V16QI); | |
18479 | def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX); | |
18480 | def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX); | |
18481 | def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD); | |
18482 | def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE); | |
18483 | def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL); | |
18484 | def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL); | |
18485 | def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR); | |
18486 | def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX); | |
18487 | def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX); | |
18488 | def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX); | |
18489 | def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST); | |
18490 | def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE); | |
18491 | def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL); | |
18492 | def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX); | |
18493 | def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX); | |
18494 | def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX); | |
18495 | ||
18496 | def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid, | |
18497 | VSX_BUILTIN_LXVD2X_V2DF); | |
18498 | def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid, | |
18499 | VSX_BUILTIN_LXVD2X_V2DI); | |
18500 | def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid, | |
18501 | VSX_BUILTIN_LXVW4X_V4SF); | |
18502 | def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid, | |
18503 | VSX_BUILTIN_LXVW4X_V4SI); | |
18504 | def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid, | |
18505 | VSX_BUILTIN_LXVW4X_V8HI); | |
18506 | def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid, | |
18507 | VSX_BUILTIN_LXVW4X_V16QI); | |
18508 | def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid, | |
18509 | VSX_BUILTIN_STXVD2X_V2DF); | |
18510 | def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid, | |
18511 | VSX_BUILTIN_STXVD2X_V2DI); | |
18512 | def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid, | |
18513 | VSX_BUILTIN_STXVW4X_V4SF); | |
18514 | def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid, | |
18515 | VSX_BUILTIN_STXVW4X_V4SI); | |
18516 | def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid, | |
18517 | VSX_BUILTIN_STXVW4X_V8HI); | |
18518 | def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid, | |
18519 | VSX_BUILTIN_STXVW4X_V16QI); | |
18520 | ||
18521 | def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid, | |
18522 | VSX_BUILTIN_LD_ELEMREV_V2DF); | |
18523 | def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid, | |
18524 | VSX_BUILTIN_LD_ELEMREV_V2DI); | |
18525 | def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid, | |
18526 | VSX_BUILTIN_LD_ELEMREV_V4SF); | |
18527 | def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid, | |
18528 | VSX_BUILTIN_LD_ELEMREV_V4SI); | |
18529 | def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid, | |
18530 | VSX_BUILTIN_ST_ELEMREV_V2DF); | |
18531 | def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid, | |
18532 | VSX_BUILTIN_ST_ELEMREV_V2DI); | |
18533 | def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid, | |
18534 | VSX_BUILTIN_ST_ELEMREV_V4SF); | |
18535 | def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid, | |
18536 | VSX_BUILTIN_ST_ELEMREV_V4SI); | |
18537 | ||
18538 | if (TARGET_P9_VECTOR) | |
18539 | { | |
18540 | def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid, | |
18541 | VSX_BUILTIN_LD_ELEMREV_V8HI); | |
18542 | def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid, | |
18543 | VSX_BUILTIN_LD_ELEMREV_V16QI); | |
18544 | def_builtin ("__builtin_vsx_st_elemrev_v8hi", | |
18545 | void_ftype_v8hi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V8HI); | |
18546 | def_builtin ("__builtin_vsx_st_elemrev_v16qi", | |
18547 | void_ftype_v16qi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V16QI); | |
18548 | } | |
18549 | else | |
18550 | { | |
18551 | rs6000_builtin_decls[(int) VSX_BUILTIN_LD_ELEMREV_V8HI] | |
18552 | = rs6000_builtin_decls[(int) VSX_BUILTIN_LXVW4X_V8HI]; | |
18553 | rs6000_builtin_decls[(int) VSX_BUILTIN_LD_ELEMREV_V16QI] | |
18554 | = rs6000_builtin_decls[(int) VSX_BUILTIN_LXVW4X_V16QI]; | |
18555 | rs6000_builtin_decls[(int) VSX_BUILTIN_ST_ELEMREV_V8HI] | |
18556 | = rs6000_builtin_decls[(int) VSX_BUILTIN_STXVW4X_V8HI]; | |
18557 | rs6000_builtin_decls[(int) VSX_BUILTIN_ST_ELEMREV_V16QI] | |
18558 | = rs6000_builtin_decls[(int) VSX_BUILTIN_STXVW4X_V16QI]; | |
18559 | } | |
18560 | ||
18561 | def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid, | |
18562 | VSX_BUILTIN_VEC_LD); | |
18563 | def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid, | |
18564 | VSX_BUILTIN_VEC_ST); | |
18565 | def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid, | |
18566 | VSX_BUILTIN_VEC_XL); | |
18567 | def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid, | |
18568 | VSX_BUILTIN_VEC_XST); | |
18569 | ||
18570 | def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP); | |
18571 | def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS); | |
18572 | def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE); | |
18573 | ||
18574 | def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD); | |
18575 | def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT); | |
18576 | def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT); | |
18577 | def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT); | |
18578 | def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW); | |
18579 | def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH); | |
18580 | def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB); | |
18581 | def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF); | |
18582 | def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX); | |
18583 | def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX); | |
18584 | def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS); | |
18585 | def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU); | |
18586 | ||
18587 | def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque, | |
18588 | ALTIVEC_BUILTIN_VEC_ADDE); | |
18589 | def_builtin ("__builtin_vec_addec", opaque_ftype_opaque_opaque_opaque, | |
18590 | ALTIVEC_BUILTIN_VEC_ADDEC); | |
18591 | def_builtin ("__builtin_vec_cmpne", opaque_ftype_opaque_opaque, | |
18592 | ALTIVEC_BUILTIN_VEC_CMPNE); | |
18593 | def_builtin ("__builtin_vec_mul", opaque_ftype_opaque_opaque, | |
18594 | ALTIVEC_BUILTIN_VEC_MUL); | |
18595 | ||
18596 | /* Cell builtins. */ | |
18597 | def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX); | |
18598 | def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL); | |
18599 | def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX); | |
18600 | def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL); | |
18601 | ||
18602 | def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX); | |
18603 | def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL); | |
18604 | def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX); | |
18605 | def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL); | |
18606 | ||
18607 | def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX); | |
18608 | def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL); | |
18609 | def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX); | |
18610 | def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL); | |
18611 | ||
18612 | def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX); | |
18613 | def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL); | |
18614 | def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX); | |
18615 | def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL); | |
18616 | ||
18617 | if (TARGET_P9_VECTOR) | |
18618 | def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long, | |
18619 | P9V_BUILTIN_STXVL); | |
18620 | ||
18621 | /* Add the DST variants. */ | |
18622 | d = bdesc_dst; | |
18623 | for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++) | |
18624 | { | |
18625 | HOST_WIDE_INT mask = d->mask; | |
18626 | ||
18627 | /* It is expected that these dst built-in functions may have | |
18628 | d->icode equal to CODE_FOR_nothing. */ | |
18629 | if ((mask & builtin_mask) != mask) | |
18630 | { | |
18631 | if (TARGET_DEBUG_BUILTIN) | |
18632 | fprintf (stderr, "altivec_init_builtins, skip dst %s\n", | |
18633 | d->name); | |
18634 | continue; | |
18635 | } | |
18636 | def_builtin (d->name, void_ftype_pcvoid_int_int, d->code); | |
18637 | } | |
18638 | ||
18639 | /* Initialize the predicates. */ | |
18640 | d = bdesc_altivec_preds; | |
18641 | for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++) | |
18642 | { | |
18643 | machine_mode mode1; | |
18644 | tree type; | |
18645 | HOST_WIDE_INT mask = d->mask; | |
18646 | ||
18647 | if ((mask & builtin_mask) != mask) | |
18648 | { | |
18649 | if (TARGET_DEBUG_BUILTIN) | |
18650 | fprintf (stderr, "altivec_init_builtins, skip predicate %s\n", | |
18651 | d->name); | |
18652 | continue; | |
18653 | } | |
18654 | ||
18655 | if (rs6000_overloaded_builtin_p (d->code)) | |
18656 | mode1 = VOIDmode; | |
18657 | else | |
18658 | { | |
18659 | /* Cannot define builtin if the instruction is disabled. */ | |
18660 | gcc_assert (d->icode != CODE_FOR_nothing); | |
18661 | mode1 = insn_data[d->icode].operand[1].mode; | |
18662 | } | |
18663 | ||
18664 | switch (mode1) | |
18665 | { | |
916ace94 | 18666 | case E_VOIDmode: |
01e91138 | 18667 | type = int_ftype_int_opaque_opaque; |
18668 | break; | |
916ace94 | 18669 | case E_V2DImode: |
01e91138 | 18670 | type = int_ftype_int_v2di_v2di; |
18671 | break; | |
916ace94 | 18672 | case E_V4SImode: |
01e91138 | 18673 | type = int_ftype_int_v4si_v4si; |
18674 | break; | |
916ace94 | 18675 | case E_V8HImode: |
01e91138 | 18676 | type = int_ftype_int_v8hi_v8hi; |
18677 | break; | |
916ace94 | 18678 | case E_V16QImode: |
01e91138 | 18679 | type = int_ftype_int_v16qi_v16qi; |
18680 | break; | |
916ace94 | 18681 | case E_V4SFmode: |
01e91138 | 18682 | type = int_ftype_int_v4sf_v4sf; |
18683 | break; | |
916ace94 | 18684 | case E_V2DFmode: |
01e91138 | 18685 | type = int_ftype_int_v2df_v2df; |
18686 | break; | |
18687 | default: | |
18688 | gcc_unreachable (); | |
18689 | } | |
18690 | ||
18691 | def_builtin (d->name, type, d->code); | |
18692 | } | |
18693 | ||
18694 | /* Initialize the abs* operators. */ | |
18695 | d = bdesc_abs; | |
18696 | for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++) | |
18697 | { | |
18698 | machine_mode mode0; | |
18699 | tree type; | |
18700 | HOST_WIDE_INT mask = d->mask; | |
18701 | ||
18702 | if ((mask & builtin_mask) != mask) | |
18703 | { | |
18704 | if (TARGET_DEBUG_BUILTIN) | |
18705 | fprintf (stderr, "altivec_init_builtins, skip abs %s\n", | |
18706 | d->name); | |
18707 | continue; | |
18708 | } | |
18709 | ||
18710 | /* Cannot define builtin if the instruction is disabled. */ | |
18711 | gcc_assert (d->icode != CODE_FOR_nothing); | |
18712 | mode0 = insn_data[d->icode].operand[0].mode; | |
18713 | ||
18714 | switch (mode0) | |
18715 | { | |
916ace94 | 18716 | case E_V2DImode: |
01e91138 | 18717 | type = v2di_ftype_v2di; |
18718 | break; | |
916ace94 | 18719 | case E_V4SImode: |
01e91138 | 18720 | type = v4si_ftype_v4si; |
18721 | break; | |
916ace94 | 18722 | case E_V8HImode: |
01e91138 | 18723 | type = v8hi_ftype_v8hi; |
18724 | break; | |
916ace94 | 18725 | case E_V16QImode: |
01e91138 | 18726 | type = v16qi_ftype_v16qi; |
18727 | break; | |
916ace94 | 18728 | case E_V4SFmode: |
01e91138 | 18729 | type = v4sf_ftype_v4sf; |
18730 | break; | |
916ace94 | 18731 | case E_V2DFmode: |
01e91138 | 18732 | type = v2df_ftype_v2df; |
18733 | break; | |
18734 | default: | |
18735 | gcc_unreachable (); | |
18736 | } | |
18737 | ||
18738 | def_builtin (d->name, type, d->code); | |
18739 | } | |
18740 | ||
18741 | /* Initialize target builtin that implements | |
18742 | targetm.vectorize.builtin_mask_for_load. */ | |
18743 | ||
18744 | decl = add_builtin_function ("__builtin_altivec_mask_for_load", | |
18745 | v16qi_ftype_long_pcvoid, | |
18746 | ALTIVEC_BUILTIN_MASK_FOR_LOAD, | |
18747 | BUILT_IN_MD, NULL, NULL_TREE); | |
18748 | TREE_READONLY (decl) = 1; | |
18749 | /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */ | |
18750 | altivec_builtin_mask_for_load = decl; | |
18751 | ||
18752 | /* Access to the vec_init patterns. */ | |
18753 | ftype = build_function_type_list (V4SI_type_node, integer_type_node, | |
18754 | integer_type_node, integer_type_node, | |
18755 | integer_type_node, NULL_TREE); | |
18756 | def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI); | |
18757 | ||
18758 | ftype = build_function_type_list (V8HI_type_node, short_integer_type_node, | |
18759 | short_integer_type_node, | |
18760 | short_integer_type_node, | |
18761 | short_integer_type_node, | |
18762 | short_integer_type_node, | |
18763 | short_integer_type_node, | |
18764 | short_integer_type_node, | |
18765 | short_integer_type_node, NULL_TREE); | |
18766 | def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI); | |
18767 | ||
18768 | ftype = build_function_type_list (V16QI_type_node, char_type_node, | |
18769 | char_type_node, char_type_node, | |
18770 | char_type_node, char_type_node, | |
18771 | char_type_node, char_type_node, | |
18772 | char_type_node, char_type_node, | |
18773 | char_type_node, char_type_node, | |
18774 | char_type_node, char_type_node, | |
18775 | char_type_node, char_type_node, | |
18776 | char_type_node, NULL_TREE); | |
18777 | def_builtin ("__builtin_vec_init_v16qi", ftype, | |
18778 | ALTIVEC_BUILTIN_VEC_INIT_V16QI); | |
18779 | ||
18780 | ftype = build_function_type_list (V4SF_type_node, float_type_node, | |
18781 | float_type_node, float_type_node, | |
18782 | float_type_node, NULL_TREE); | |
18783 | def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF); | |
18784 | ||
18785 | /* VSX builtins. */ | |
18786 | ftype = build_function_type_list (V2DF_type_node, double_type_node, | |
18787 | double_type_node, NULL_TREE); | |
18788 | def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF); | |
18789 | ||
18790 | ftype = build_function_type_list (V2DI_type_node, intDI_type_node, | |
18791 | intDI_type_node, NULL_TREE); | |
18792 | def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI); | |
18793 | ||
18794 | /* Access to the vec_set patterns. */ | |
18795 | ftype = build_function_type_list (V4SI_type_node, V4SI_type_node, | |
18796 | intSI_type_node, | |
18797 | integer_type_node, NULL_TREE); | |
18798 | def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI); | |
18799 | ||
18800 | ftype = build_function_type_list (V8HI_type_node, V8HI_type_node, | |
18801 | intHI_type_node, | |
18802 | integer_type_node, NULL_TREE); | |
18803 | def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI); | |
18804 | ||
18805 | ftype = build_function_type_list (V16QI_type_node, V16QI_type_node, | |
18806 | intQI_type_node, | |
18807 | integer_type_node, NULL_TREE); | |
18808 | def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI); | |
18809 | ||
18810 | ftype = build_function_type_list (V4SF_type_node, V4SF_type_node, | |
18811 | float_type_node, | |
18812 | integer_type_node, NULL_TREE); | |
18813 | def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF); | |
18814 | ||
18815 | ftype = build_function_type_list (V2DF_type_node, V2DF_type_node, | |
18816 | double_type_node, | |
18817 | integer_type_node, NULL_TREE); | |
18818 | def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF); | |
18819 | ||
18820 | ftype = build_function_type_list (V2DI_type_node, V2DI_type_node, | |
18821 | intDI_type_node, | |
18822 | integer_type_node, NULL_TREE); | |
18823 | def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI); | |
18824 | ||
18825 | /* Access to the vec_extract patterns. */ | |
18826 | ftype = build_function_type_list (intSI_type_node, V4SI_type_node, | |
18827 | integer_type_node, NULL_TREE); | |
18828 | def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI); | |
18829 | ||
18830 | ftype = build_function_type_list (intHI_type_node, V8HI_type_node, | |
18831 | integer_type_node, NULL_TREE); | |
18832 | def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI); | |
18833 | ||
18834 | ftype = build_function_type_list (intQI_type_node, V16QI_type_node, | |
18835 | integer_type_node, NULL_TREE); | |
18836 | def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI); | |
18837 | ||
18838 | ftype = build_function_type_list (float_type_node, V4SF_type_node, | |
18839 | integer_type_node, NULL_TREE); | |
18840 | def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF); | |
18841 | ||
18842 | ftype = build_function_type_list (double_type_node, V2DF_type_node, | |
18843 | integer_type_node, NULL_TREE); | |
18844 | def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF); | |
18845 | ||
18846 | ftype = build_function_type_list (intDI_type_node, V2DI_type_node, | |
18847 | integer_type_node, NULL_TREE); | |
18848 | def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI); | |
18849 | ||
18850 | ||
18851 | if (V1TI_type_node) | |
18852 | { | |
18853 | tree v1ti_ftype_long_pcvoid | |
18854 | = build_function_type_list (V1TI_type_node, | |
18855 | long_integer_type_node, pcvoid_type_node, | |
18856 | NULL_TREE); | |
18857 | tree void_ftype_v1ti_long_pvoid | |
18858 | = build_function_type_list (void_type_node, | |
18859 | V1TI_type_node, long_integer_type_node, | |
18860 | pvoid_type_node, NULL_TREE); | |
18861 | def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid, | |
18862 | VSX_BUILTIN_LXVD2X_V1TI); | |
18863 | def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid, | |
18864 | VSX_BUILTIN_STXVD2X_V1TI); | |
18865 | ftype = build_function_type_list (V1TI_type_node, intTI_type_node, | |
18866 | NULL_TREE, NULL_TREE); | |
18867 | def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI); | |
18868 | ftype = build_function_type_list (V1TI_type_node, V1TI_type_node, | |
18869 | intTI_type_node, | |
18870 | integer_type_node, NULL_TREE); | |
18871 | def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI); | |
18872 | ftype = build_function_type_list (intTI_type_node, V1TI_type_node, | |
18873 | integer_type_node, NULL_TREE); | |
18874 | def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI); | |
18875 | } | |
18876 | ||
18877 | } | |
18878 | ||
18879 | static void | |
18880 | htm_init_builtins (void) | |
18881 | { | |
18882 | HOST_WIDE_INT builtin_mask = rs6000_builtin_mask; | |
18883 | const struct builtin_description *d; | |
18884 | size_t i; | |
18885 | ||
18886 | d = bdesc_htm; | |
18887 | for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++) | |
18888 | { | |
18889 | tree op[MAX_HTM_OPERANDS], type; | |
18890 | HOST_WIDE_INT mask = d->mask; | |
18891 | unsigned attr = rs6000_builtin_info[d->code].attr; | |
18892 | bool void_func = (attr & RS6000_BTC_VOID); | |
18893 | int attr_args = (attr & RS6000_BTC_TYPE_MASK); | |
18894 | int nopnds = 0; | |
18895 | tree gpr_type_node; | |
18896 | tree rettype; | |
18897 | tree argtype; | |
18898 | ||
18899 | /* It is expected that these htm built-in functions may have | |
18900 | d->icode equal to CODE_FOR_nothing. */ | |
18901 | ||
18902 | if (TARGET_32BIT && TARGET_POWERPC64) | |
18903 | gpr_type_node = long_long_unsigned_type_node; | |
18904 | else | |
18905 | gpr_type_node = long_unsigned_type_node; | |
18906 | ||
18907 | if (attr & RS6000_BTC_SPR) | |
18908 | { | |
18909 | rettype = gpr_type_node; | |
18910 | argtype = gpr_type_node; | |
18911 | } | |
18912 | else if (d->code == HTM_BUILTIN_TABORTDC | |
18913 | || d->code == HTM_BUILTIN_TABORTDCI) | |
18914 | { | |
18915 | rettype = unsigned_type_node; | |
18916 | argtype = gpr_type_node; | |
18917 | } | |
18918 | else | |
18919 | { | |
18920 | rettype = unsigned_type_node; | |
18921 | argtype = unsigned_type_node; | |
18922 | } | |
18923 | ||
18924 | if ((mask & builtin_mask) != mask) | |
18925 | { | |
18926 | if (TARGET_DEBUG_BUILTIN) | |
18927 | fprintf (stderr, "htm_builtin, skip binary %s\n", d->name); | |
18928 | continue; | |
18929 | } | |
18930 | ||
18931 | if (d->name == 0) | |
18932 | { | |
18933 | if (TARGET_DEBUG_BUILTIN) | |
18934 | fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n", | |
18935 | (long unsigned) i); | |
18936 | continue; | |
18937 | } | |
18938 | ||
18939 | op[nopnds++] = (void_func) ? void_type_node : rettype; | |
18940 | ||
18941 | if (attr_args == RS6000_BTC_UNARY) | |
18942 | op[nopnds++] = argtype; | |
18943 | else if (attr_args == RS6000_BTC_BINARY) | |
18944 | { | |
18945 | op[nopnds++] = argtype; | |
18946 | op[nopnds++] = argtype; | |
18947 | } | |
18948 | else if (attr_args == RS6000_BTC_TERNARY) | |
18949 | { | |
18950 | op[nopnds++] = argtype; | |
18951 | op[nopnds++] = argtype; | |
18952 | op[nopnds++] = argtype; | |
18953 | } | |
18954 | ||
18955 | switch (nopnds) | |
18956 | { | |
18957 | case 1: | |
18958 | type = build_function_type_list (op[0], NULL_TREE); | |
18959 | break; | |
18960 | case 2: | |
18961 | type = build_function_type_list (op[0], op[1], NULL_TREE); | |
18962 | break; | |
18963 | case 3: | |
18964 | type = build_function_type_list (op[0], op[1], op[2], NULL_TREE); | |
18965 | break; | |
18966 | case 4: | |
18967 | type = build_function_type_list (op[0], op[1], op[2], op[3], | |
18968 | NULL_TREE); | |
18969 | break; | |
18970 | default: | |
18971 | gcc_unreachable (); | |
18972 | } | |
18973 | ||
18974 | def_builtin (d->name, type, d->code); | |
18975 | } | |
18976 | } | |
18977 | ||
18978 | /* Hash function for builtin functions with up to 3 arguments and a return | |
18979 | type. */ | |
18980 | hashval_t | |
18981 | builtin_hasher::hash (builtin_hash_struct *bh) | |
18982 | { | |
18983 | unsigned ret = 0; | |
18984 | int i; | |
18985 | ||
18986 | for (i = 0; i < 4; i++) | |
18987 | { | |
18988 | ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]); | |
18989 | ret = (ret * 2) + bh->uns_p[i]; | |
18990 | } | |
18991 | ||
18992 | return ret; | |
18993 | } | |
18994 | ||
18995 | /* Compare builtin hash entries H1 and H2 for equivalence. */ | |
18996 | bool | |
18997 | builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2) | |
18998 | { | |
18999 | return ((p1->mode[0] == p2->mode[0]) | |
19000 | && (p1->mode[1] == p2->mode[1]) | |
19001 | && (p1->mode[2] == p2->mode[2]) | |
19002 | && (p1->mode[3] == p2->mode[3]) | |
19003 | && (p1->uns_p[0] == p2->uns_p[0]) | |
19004 | && (p1->uns_p[1] == p2->uns_p[1]) | |
19005 | && (p1->uns_p[2] == p2->uns_p[2]) | |
19006 | && (p1->uns_p[3] == p2->uns_p[3])); | |
19007 | } | |
19008 | ||
19009 | /* Map types for builtin functions with an explicit return type and up to 3 | |
19010 | arguments. Functions with fewer than 3 arguments use VOIDmode as the type | |
19011 | of the argument. */ | |
19012 | static tree | |
19013 | builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0, | |
19014 | machine_mode mode_arg1, machine_mode mode_arg2, | |
19015 | enum rs6000_builtins builtin, const char *name) | |
19016 | { | |
19017 | struct builtin_hash_struct h; | |
19018 | struct builtin_hash_struct *h2; | |
19019 | int num_args = 3; | |
19020 | int i; | |
19021 | tree ret_type = NULL_TREE; | |
19022 | tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE }; | |
19023 | ||
19024 | /* Create builtin_hash_table. */ | |
19025 | if (builtin_hash_table == NULL) | |
19026 | builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500); | |
19027 | ||
19028 | h.type = NULL_TREE; | |
19029 | h.mode[0] = mode_ret; | |
19030 | h.mode[1] = mode_arg0; | |
19031 | h.mode[2] = mode_arg1; | |
19032 | h.mode[3] = mode_arg2; | |
19033 | h.uns_p[0] = 0; | |
19034 | h.uns_p[1] = 0; | |
19035 | h.uns_p[2] = 0; | |
19036 | h.uns_p[3] = 0; | |
19037 | ||
19038 | /* If the builtin is a type that produces unsigned results or takes unsigned | |
19039 | arguments, and it is returned as a decl for the vectorizer (such as | |
19040 | widening multiplies, permute), make sure the arguments and return value | |
19041 | are type correct. */ | |
19042 | switch (builtin) | |
19043 | { | |
19044 | /* unsigned 1 argument functions. */ | |
19045 | case CRYPTO_BUILTIN_VSBOX: | |
19046 | case P8V_BUILTIN_VGBBD: | |
19047 | case MISC_BUILTIN_CDTBCD: | |
19048 | case MISC_BUILTIN_CBCDTD: | |
19049 | h.uns_p[0] = 1; | |
19050 | h.uns_p[1] = 1; | |
19051 | break; | |
19052 | ||
19053 | /* unsigned 2 argument functions. */ | |
19054 | case ALTIVEC_BUILTIN_VMULEUB: | |
19055 | case ALTIVEC_BUILTIN_VMULEUH: | |
19056 | case ALTIVEC_BUILTIN_VMULOUB: | |
19057 | case ALTIVEC_BUILTIN_VMULOUH: | |
19058 | case CRYPTO_BUILTIN_VCIPHER: | |
19059 | case CRYPTO_BUILTIN_VCIPHERLAST: | |
19060 | case CRYPTO_BUILTIN_VNCIPHER: | |
19061 | case CRYPTO_BUILTIN_VNCIPHERLAST: | |
19062 | case CRYPTO_BUILTIN_VPMSUMB: | |
19063 | case CRYPTO_BUILTIN_VPMSUMH: | |
19064 | case CRYPTO_BUILTIN_VPMSUMW: | |
19065 | case CRYPTO_BUILTIN_VPMSUMD: | |
19066 | case CRYPTO_BUILTIN_VPMSUM: | |
19067 | case MISC_BUILTIN_ADDG6S: | |
19068 | case MISC_BUILTIN_DIVWEU: | |
19069 | case MISC_BUILTIN_DIVWEUO: | |
19070 | case MISC_BUILTIN_DIVDEU: | |
19071 | case MISC_BUILTIN_DIVDEUO: | |
19072 | case VSX_BUILTIN_UDIV_V2DI: | |
19073 | h.uns_p[0] = 1; | |
19074 | h.uns_p[1] = 1; | |
19075 | h.uns_p[2] = 1; | |
19076 | break; | |
19077 | ||
19078 | /* unsigned 3 argument functions. */ | |
19079 | case ALTIVEC_BUILTIN_VPERM_16QI_UNS: | |
19080 | case ALTIVEC_BUILTIN_VPERM_8HI_UNS: | |
19081 | case ALTIVEC_BUILTIN_VPERM_4SI_UNS: | |
19082 | case ALTIVEC_BUILTIN_VPERM_2DI_UNS: | |
19083 | case ALTIVEC_BUILTIN_VSEL_16QI_UNS: | |
19084 | case ALTIVEC_BUILTIN_VSEL_8HI_UNS: | |
19085 | case ALTIVEC_BUILTIN_VSEL_4SI_UNS: | |
19086 | case ALTIVEC_BUILTIN_VSEL_2DI_UNS: | |
19087 | case VSX_BUILTIN_VPERM_16QI_UNS: | |
19088 | case VSX_BUILTIN_VPERM_8HI_UNS: | |
19089 | case VSX_BUILTIN_VPERM_4SI_UNS: | |
19090 | case VSX_BUILTIN_VPERM_2DI_UNS: | |
19091 | case VSX_BUILTIN_XXSEL_16QI_UNS: | |
19092 | case VSX_BUILTIN_XXSEL_8HI_UNS: | |
19093 | case VSX_BUILTIN_XXSEL_4SI_UNS: | |
19094 | case VSX_BUILTIN_XXSEL_2DI_UNS: | |
19095 | case CRYPTO_BUILTIN_VPERMXOR: | |
19096 | case CRYPTO_BUILTIN_VPERMXOR_V2DI: | |
19097 | case CRYPTO_BUILTIN_VPERMXOR_V4SI: | |
19098 | case CRYPTO_BUILTIN_VPERMXOR_V8HI: | |
19099 | case CRYPTO_BUILTIN_VPERMXOR_V16QI: | |
19100 | case CRYPTO_BUILTIN_VSHASIGMAW: | |
19101 | case CRYPTO_BUILTIN_VSHASIGMAD: | |
19102 | case CRYPTO_BUILTIN_VSHASIGMA: | |
19103 | h.uns_p[0] = 1; | |
19104 | h.uns_p[1] = 1; | |
19105 | h.uns_p[2] = 1; | |
19106 | h.uns_p[3] = 1; | |
19107 | break; | |
19108 | ||
19109 | /* signed permute functions with unsigned char mask. */ | |
19110 | case ALTIVEC_BUILTIN_VPERM_16QI: | |
19111 | case ALTIVEC_BUILTIN_VPERM_8HI: | |
19112 | case ALTIVEC_BUILTIN_VPERM_4SI: | |
19113 | case ALTIVEC_BUILTIN_VPERM_4SF: | |
19114 | case ALTIVEC_BUILTIN_VPERM_2DI: | |
19115 | case ALTIVEC_BUILTIN_VPERM_2DF: | |
19116 | case VSX_BUILTIN_VPERM_16QI: | |
19117 | case VSX_BUILTIN_VPERM_8HI: | |
19118 | case VSX_BUILTIN_VPERM_4SI: | |
19119 | case VSX_BUILTIN_VPERM_4SF: | |
19120 | case VSX_BUILTIN_VPERM_2DI: | |
19121 | case VSX_BUILTIN_VPERM_2DF: | |
19122 | h.uns_p[3] = 1; | |
19123 | break; | |
19124 | ||
19125 | /* unsigned args, signed return. */ | |
19126 | case VSX_BUILTIN_XVCVUXDSP: | |
19127 | case VSX_BUILTIN_XVCVUXDDP_UNS: | |
19128 | case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF: | |
19129 | h.uns_p[1] = 1; | |
19130 | break; | |
19131 | ||
19132 | /* signed args, unsigned return. */ | |
19133 | case VSX_BUILTIN_XVCVDPUXDS_UNS: | |
19134 | case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI: | |
19135 | case MISC_BUILTIN_UNPACK_TD: | |
19136 | case MISC_BUILTIN_UNPACK_V1TI: | |
19137 | h.uns_p[0] = 1; | |
19138 | break; | |
19139 | ||
19140 | /* unsigned arguments for 128-bit pack instructions. */ | |
19141 | case MISC_BUILTIN_PACK_TD: | |
19142 | case MISC_BUILTIN_PACK_V1TI: | |
19143 | h.uns_p[1] = 1; | |
19144 | h.uns_p[2] = 1; | |
19145 | break; | |
19146 | ||
19147 | default: | |
19148 | break; | |
19149 | } | |
19150 | ||
19151 | /* Figure out how many args are present. */ | |
19152 | while (num_args > 0 && h.mode[num_args] == VOIDmode) | |
19153 | num_args--; | |
19154 | ||
19155 | ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]]; | |
19156 | if (!ret_type && h.uns_p[0]) | |
19157 | ret_type = builtin_mode_to_type[h.mode[0]][0]; | |
19158 | ||
19159 | if (!ret_type) | |
19160 | fatal_error (input_location, | |
19161 | "internal error: builtin function %s had an unexpected " | |
19162 | "return type %s", name, GET_MODE_NAME (h.mode[0])); | |
19163 | ||
19164 | for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++) | |
19165 | arg_type[i] = NULL_TREE; | |
19166 | ||
19167 | for (i = 0; i < num_args; i++) | |
19168 | { | |
19169 | int m = (int) h.mode[i+1]; | |
19170 | int uns_p = h.uns_p[i+1]; | |
19171 | ||
19172 | arg_type[i] = builtin_mode_to_type[m][uns_p]; | |
19173 | if (!arg_type[i] && uns_p) | |
19174 | arg_type[i] = builtin_mode_to_type[m][0]; | |
19175 | ||
19176 | if (!arg_type[i]) | |
19177 | fatal_error (input_location, | |
19178 | "internal error: builtin function %s, argument %d " | |
19179 | "had unexpected argument type %s", name, i, | |
19180 | GET_MODE_NAME (m)); | |
19181 | } | |
19182 | ||
19183 | builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT); | |
19184 | if (*found == NULL) | |
19185 | { | |
19186 | h2 = ggc_alloc<builtin_hash_struct> (); | |
19187 | *h2 = h; | |
19188 | *found = h2; | |
19189 | ||
19190 | h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1], | |
19191 | arg_type[2], NULL_TREE); | |
19192 | } | |
19193 | ||
19194 | return (*found)->type; | |
19195 | } | |
19196 | ||
19197 | static void | |
19198 | rs6000_common_init_builtins (void) | |
19199 | { | |
19200 | const struct builtin_description *d; | |
19201 | size_t i; | |
19202 | ||
19203 | tree opaque_ftype_opaque = NULL_TREE; | |
19204 | tree opaque_ftype_opaque_opaque = NULL_TREE; | |
19205 | tree opaque_ftype_opaque_opaque_opaque = NULL_TREE; | |
19206 | tree v2si_ftype = NULL_TREE; | |
19207 | tree v2si_ftype_qi = NULL_TREE; | |
19208 | tree v2si_ftype_v2si_qi = NULL_TREE; | |
19209 | tree v2si_ftype_int_qi = NULL_TREE; | |
19210 | HOST_WIDE_INT builtin_mask = rs6000_builtin_mask; | |
19211 | ||
19212 | if (!TARGET_PAIRED_FLOAT) | |
19213 | { | |
19214 | builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node; | |
19215 | builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node; | |
19216 | } | |
19217 | ||
19218 | /* Paired and SPE builtins are only available if you build a compiler with | |
19219 | the appropriate options, so only create those builtins with the | |
19220 | appropriate compiler option. Create Altivec and VSX builtins on machines | |
19221 | with at least the general purpose extensions (970 and newer) to allow the | |
19222 | use of the target attribute.. */ | |
19223 | ||
19224 | if (TARGET_EXTRA_BUILTINS) | |
19225 | builtin_mask |= RS6000_BTM_COMMON; | |
19226 | ||
19227 | /* Add the ternary operators. */ | |
19228 | d = bdesc_3arg; | |
19229 | for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++) | |
19230 | { | |
19231 | tree type; | |
19232 | HOST_WIDE_INT mask = d->mask; | |
19233 | ||
19234 | if ((mask & builtin_mask) != mask) | |
19235 | { | |
19236 | if (TARGET_DEBUG_BUILTIN) | |
19237 | fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name); | |
19238 | continue; | |
19239 | } | |
19240 | ||
19241 | if (rs6000_overloaded_builtin_p (d->code)) | |
19242 | { | |
19243 | if (! (type = opaque_ftype_opaque_opaque_opaque)) | |
19244 | type = opaque_ftype_opaque_opaque_opaque | |
19245 | = build_function_type_list (opaque_V4SI_type_node, | |
19246 | opaque_V4SI_type_node, | |
19247 | opaque_V4SI_type_node, | |
19248 | opaque_V4SI_type_node, | |
19249 | NULL_TREE); | |
19250 | } | |
19251 | else | |
19252 | { | |
19253 | enum insn_code icode = d->icode; | |
19254 | if (d->name == 0) | |
19255 | { | |
19256 | if (TARGET_DEBUG_BUILTIN) | |
19257 | fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n", | |
19258 | (long unsigned)i); | |
19259 | ||
19260 | continue; | |
19261 | } | |
19262 | ||
19263 | if (icode == CODE_FOR_nothing) | |
19264 | { | |
19265 | if (TARGET_DEBUG_BUILTIN) | |
19266 | fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n", | |
19267 | d->name); | |
19268 | ||
19269 | continue; | |
19270 | } | |
19271 | ||
19272 | type = builtin_function_type (insn_data[icode].operand[0].mode, | |
19273 | insn_data[icode].operand[1].mode, | |
19274 | insn_data[icode].operand[2].mode, | |
19275 | insn_data[icode].operand[3].mode, | |
19276 | d->code, d->name); | |
19277 | } | |
19278 | ||
19279 | def_builtin (d->name, type, d->code); | |
19280 | } | |
19281 | ||
19282 | /* Add the binary operators. */ | |
19283 | d = bdesc_2arg; | |
19284 | for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++) | |
19285 | { | |
19286 | machine_mode mode0, mode1, mode2; | |
19287 | tree type; | |
19288 | HOST_WIDE_INT mask = d->mask; | |
19289 | ||
19290 | if ((mask & builtin_mask) != mask) | |
19291 | { | |
19292 | if (TARGET_DEBUG_BUILTIN) | |
19293 | fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name); | |
19294 | continue; | |
19295 | } | |
19296 | ||
19297 | if (rs6000_overloaded_builtin_p (d->code)) | |
19298 | { | |
19299 | if (! (type = opaque_ftype_opaque_opaque)) | |
19300 | type = opaque_ftype_opaque_opaque | |
19301 | = build_function_type_list (opaque_V4SI_type_node, | |
19302 | opaque_V4SI_type_node, | |
19303 | opaque_V4SI_type_node, | |
19304 | NULL_TREE); | |
19305 | } | |
19306 | else | |
19307 | { | |
19308 | enum insn_code icode = d->icode; | |
19309 | if (d->name == 0) | |
19310 | { | |
19311 | if (TARGET_DEBUG_BUILTIN) | |
19312 | fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n", | |
19313 | (long unsigned)i); | |
19314 | ||
19315 | continue; | |
19316 | } | |
19317 | ||
19318 | if (icode == CODE_FOR_nothing) | |
19319 | { | |
19320 | if (TARGET_DEBUG_BUILTIN) | |
19321 | fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n", | |
19322 | d->name); | |
19323 | ||
19324 | continue; | |
19325 | } | |
19326 | ||
19327 | mode0 = insn_data[icode].operand[0].mode; | |
19328 | mode1 = insn_data[icode].operand[1].mode; | |
19329 | mode2 = insn_data[icode].operand[2].mode; | |
19330 | ||
19331 | if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode) | |
19332 | { | |
19333 | if (! (type = v2si_ftype_v2si_qi)) | |
19334 | type = v2si_ftype_v2si_qi | |
19335 | = build_function_type_list (opaque_V2SI_type_node, | |
19336 | opaque_V2SI_type_node, | |
19337 | char_type_node, | |
19338 | NULL_TREE); | |
19339 | } | |
19340 | ||
19341 | else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT | |
19342 | && mode2 == QImode) | |
19343 | { | |
19344 | if (! (type = v2si_ftype_int_qi)) | |
19345 | type = v2si_ftype_int_qi | |
19346 | = build_function_type_list (opaque_V2SI_type_node, | |
19347 | integer_type_node, | |
19348 | char_type_node, | |
19349 | NULL_TREE); | |
19350 | } | |
19351 | ||
19352 | else | |
19353 | type = builtin_function_type (mode0, mode1, mode2, VOIDmode, | |
19354 | d->code, d->name); | |
19355 | } | |
19356 | ||
19357 | def_builtin (d->name, type, d->code); | |
19358 | } | |
19359 | ||
19360 | /* Add the simple unary operators. */ | |
19361 | d = bdesc_1arg; | |
19362 | for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++) | |
19363 | { | |
19364 | machine_mode mode0, mode1; | |
19365 | tree type; | |
19366 | HOST_WIDE_INT mask = d->mask; | |
19367 | ||
19368 | if ((mask & builtin_mask) != mask) | |
19369 | { | |
19370 | if (TARGET_DEBUG_BUILTIN) | |
19371 | fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name); | |
19372 | continue; | |
19373 | } | |
19374 | ||
19375 | if (rs6000_overloaded_builtin_p (d->code)) | |
19376 | { | |
19377 | if (! (type = opaque_ftype_opaque)) | |
19378 | type = opaque_ftype_opaque | |
19379 | = build_function_type_list (opaque_V4SI_type_node, | |
19380 | opaque_V4SI_type_node, | |
19381 | NULL_TREE); | |
19382 | } | |
19383 | else | |
19384 | { | |
19385 | enum insn_code icode = d->icode; | |
19386 | if (d->name == 0) | |
19387 | { | |
19388 | if (TARGET_DEBUG_BUILTIN) | |
19389 | fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n", | |
19390 | (long unsigned)i); | |
19391 | ||
19392 | continue; | |
19393 | } | |
19394 | ||
19395 | if (icode == CODE_FOR_nothing) | |
19396 | { | |
19397 | if (TARGET_DEBUG_BUILTIN) | |
19398 | fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n", | |
19399 | d->name); | |
19400 | ||
19401 | continue; | |
19402 | } | |
19403 | ||
19404 | mode0 = insn_data[icode].operand[0].mode; | |
19405 | mode1 = insn_data[icode].operand[1].mode; | |
19406 | ||
19407 | if (mode0 == V2SImode && mode1 == QImode) | |
19408 | { | |
19409 | if (! (type = v2si_ftype_qi)) | |
19410 | type = v2si_ftype_qi | |
19411 | = build_function_type_list (opaque_V2SI_type_node, | |
19412 | char_type_node, | |
19413 | NULL_TREE); | |
19414 | } | |
19415 | ||
19416 | else | |
19417 | type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode, | |
19418 | d->code, d->name); | |
19419 | } | |
19420 | ||
19421 | def_builtin (d->name, type, d->code); | |
19422 | } | |
19423 | ||
19424 | /* Add the simple no-argument operators. */ | |
19425 | d = bdesc_0arg; | |
19426 | for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++) | |
19427 | { | |
19428 | machine_mode mode0; | |
19429 | tree type; | |
19430 | HOST_WIDE_INT mask = d->mask; | |
19431 | ||
19432 | if ((mask & builtin_mask) != mask) | |
19433 | { | |
19434 | if (TARGET_DEBUG_BUILTIN) | |
19435 | fprintf (stderr, "rs6000_builtin, skip no-argument %s\n", d->name); | |
19436 | continue; | |
19437 | } | |
19438 | if (rs6000_overloaded_builtin_p (d->code)) | |
19439 | { | |
19440 | if (!opaque_ftype_opaque) | |
19441 | opaque_ftype_opaque | |
19442 | = build_function_type_list (opaque_V4SI_type_node, NULL_TREE); | |
19443 | type = opaque_ftype_opaque; | |
19444 | } | |
19445 | else | |
19446 | { | |
19447 | enum insn_code icode = d->icode; | |
19448 | if (d->name == 0) | |
19449 | { | |
19450 | if (TARGET_DEBUG_BUILTIN) | |
19451 | fprintf (stderr, "rs6000_builtin, bdesc_0arg[%lu] no name\n", | |
19452 | (long unsigned) i); | |
19453 | continue; | |
19454 | } | |
19455 | if (icode == CODE_FOR_nothing) | |
19456 | { | |
19457 | if (TARGET_DEBUG_BUILTIN) | |
19458 | fprintf (stderr, | |
19459 | "rs6000_builtin, skip no-argument %s (no code)\n", | |
19460 | d->name); | |
19461 | continue; | |
19462 | } | |
19463 | mode0 = insn_data[icode].operand[0].mode; | |
19464 | if (mode0 == V2SImode) | |
19465 | { | |
19466 | /* code for SPE */ | |
19467 | if (! (type = v2si_ftype)) | |
19468 | { | |
19469 | v2si_ftype | |
19470 | = build_function_type_list (opaque_V2SI_type_node, | |
19471 | NULL_TREE); | |
19472 | type = v2si_ftype; | |
19473 | } | |
19474 | } | |
19475 | else | |
19476 | type = builtin_function_type (mode0, VOIDmode, VOIDmode, VOIDmode, | |
19477 | d->code, d->name); | |
19478 | } | |
19479 | def_builtin (d->name, type, d->code); | |
19480 | } | |
19481 | } | |
19482 | ||
19483 | /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */ | |
19484 | static void | |
19485 | init_float128_ibm (machine_mode mode) | |
19486 | { | |
19487 | if (!TARGET_XL_COMPAT) | |
19488 | { | |
19489 | set_optab_libfunc (add_optab, mode, "__gcc_qadd"); | |
19490 | set_optab_libfunc (sub_optab, mode, "__gcc_qsub"); | |
19491 | set_optab_libfunc (smul_optab, mode, "__gcc_qmul"); | |
19492 | set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv"); | |
19493 | ||
19494 | if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE))) | |
19495 | { | |
19496 | set_optab_libfunc (neg_optab, mode, "__gcc_qneg"); | |
19497 | set_optab_libfunc (eq_optab, mode, "__gcc_qeq"); | |
19498 | set_optab_libfunc (ne_optab, mode, "__gcc_qne"); | |
19499 | set_optab_libfunc (gt_optab, mode, "__gcc_qgt"); | |
19500 | set_optab_libfunc (ge_optab, mode, "__gcc_qge"); | |
19501 | set_optab_libfunc (lt_optab, mode, "__gcc_qlt"); | |
19502 | set_optab_libfunc (le_optab, mode, "__gcc_qle"); | |
19503 | ||
19504 | set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq"); | |
19505 | set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq"); | |
19506 | set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos"); | |
19507 | set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod"); | |
19508 | set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi"); | |
19509 | set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou"); | |
19510 | set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq"); | |
19511 | set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq"); | |
19512 | } | |
19513 | ||
19514 | if (!(TARGET_HARD_FLOAT && TARGET_FPRS)) | |
19515 | set_optab_libfunc (unord_optab, mode, "__gcc_qunord"); | |
19516 | } | |
19517 | else | |
19518 | { | |
19519 | set_optab_libfunc (add_optab, mode, "_xlqadd"); | |
19520 | set_optab_libfunc (sub_optab, mode, "_xlqsub"); | |
19521 | set_optab_libfunc (smul_optab, mode, "_xlqmul"); | |
19522 | set_optab_libfunc (sdiv_optab, mode, "_xlqdiv"); | |
19523 | } | |
19524 | ||
19525 | /* Add various conversions for IFmode to use the traditional TFmode | |
19526 | names. */ | |
19527 | if (mode == IFmode) | |
19528 | { | |
19529 | set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf2"); | |
19530 | set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf2"); | |
19531 | set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctftd2"); | |
19532 | set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd2"); | |
19533 | set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd2"); | |
19534 | set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdtf2"); | |
19535 | ||
19536 | if (TARGET_POWERPC64) | |
19537 | { | |
19538 | set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti"); | |
19539 | set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti"); | |
19540 | set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf"); | |
19541 | set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf"); | |
19542 | } | |
19543 | } | |
19544 | } | |
19545 | ||
19546 | /* Set up IEEE 128-bit floating point routines. Use different names if the | |
19547 | arguments can be passed in a vector register. The historical PowerPC | |
19548 | implementation of IEEE 128-bit floating point used _q_<op> for the names, so | |
19549 | continue to use that if we aren't using vector registers to pass IEEE | |
19550 | 128-bit floating point. */ | |
19551 | ||
19552 | static void | |
19553 | init_float128_ieee (machine_mode mode) | |
19554 | { | |
19555 | if (FLOAT128_VECTOR_P (mode)) | |
19556 | { | |
19557 | set_optab_libfunc (add_optab, mode, "__addkf3"); | |
19558 | set_optab_libfunc (sub_optab, mode, "__subkf3"); | |
19559 | set_optab_libfunc (neg_optab, mode, "__negkf2"); | |
19560 | set_optab_libfunc (smul_optab, mode, "__mulkf3"); | |
19561 | set_optab_libfunc (sdiv_optab, mode, "__divkf3"); | |
19562 | set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2"); | |
19563 | set_optab_libfunc (abs_optab, mode, "__abstkf2"); | |
19564 | ||
19565 | set_optab_libfunc (eq_optab, mode, "__eqkf2"); | |
19566 | set_optab_libfunc (ne_optab, mode, "__nekf2"); | |
19567 | set_optab_libfunc (gt_optab, mode, "__gtkf2"); | |
19568 | set_optab_libfunc (ge_optab, mode, "__gekf2"); | |
19569 | set_optab_libfunc (lt_optab, mode, "__ltkf2"); | |
19570 | set_optab_libfunc (le_optab, mode, "__lekf2"); | |
19571 | set_optab_libfunc (unord_optab, mode, "__unordkf2"); | |
19572 | ||
19573 | set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2"); | |
19574 | set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2"); | |
19575 | set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2"); | |
19576 | set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2"); | |
19577 | ||
19578 | set_conv_libfunc (sext_optab, mode, IFmode, "__extendtfkf2"); | |
19579 | if (mode != TFmode && FLOAT128_IBM_P (TFmode)) | |
19580 | set_conv_libfunc (sext_optab, mode, TFmode, "__extendtfkf2"); | |
19581 | ||
19582 | set_conv_libfunc (trunc_optab, IFmode, mode, "__trunckftf2"); | |
19583 | if (mode != TFmode && FLOAT128_IBM_P (TFmode)) | |
19584 | set_conv_libfunc (trunc_optab, TFmode, mode, "__trunckftf2"); | |
19585 | ||
19586 | set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf2"); | |
19587 | set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf2"); | |
19588 | set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunckftd2"); | |
19589 | set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd2"); | |
19590 | set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd2"); | |
19591 | set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdkf2"); | |
19592 | ||
19593 | set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi"); | |
19594 | set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi"); | |
19595 | set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi"); | |
19596 | set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi"); | |
19597 | ||
19598 | set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf"); | |
19599 | set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf"); | |
19600 | set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf"); | |
19601 | set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf"); | |
19602 | ||
19603 | if (TARGET_POWERPC64) | |
19604 | { | |
19605 | set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti"); | |
19606 | set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti"); | |
19607 | set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf"); | |
19608 | set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf"); | |
19609 | } | |
19610 | } | |
19611 | ||
19612 | else | |
19613 | { | |
19614 | set_optab_libfunc (add_optab, mode, "_q_add"); | |
19615 | set_optab_libfunc (sub_optab, mode, "_q_sub"); | |
19616 | set_optab_libfunc (neg_optab, mode, "_q_neg"); | |
19617 | set_optab_libfunc (smul_optab, mode, "_q_mul"); | |
19618 | set_optab_libfunc (sdiv_optab, mode, "_q_div"); | |
19619 | if (TARGET_PPC_GPOPT) | |
19620 | set_optab_libfunc (sqrt_optab, mode, "_q_sqrt"); | |
19621 | ||
19622 | set_optab_libfunc (eq_optab, mode, "_q_feq"); | |
19623 | set_optab_libfunc (ne_optab, mode, "_q_fne"); | |
19624 | set_optab_libfunc (gt_optab, mode, "_q_fgt"); | |
19625 | set_optab_libfunc (ge_optab, mode, "_q_fge"); | |
19626 | set_optab_libfunc (lt_optab, mode, "_q_flt"); | |
19627 | set_optab_libfunc (le_optab, mode, "_q_fle"); | |
19628 | ||
19629 | set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq"); | |
19630 | set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq"); | |
19631 | set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos"); | |
19632 | set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod"); | |
19633 | set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi"); | |
19634 | set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou"); | |
19635 | set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq"); | |
19636 | set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq"); | |
19637 | } | |
19638 | } | |
19639 | ||
19640 | static void | |
19641 | rs6000_init_libfuncs (void) | |
19642 | { | |
19643 | /* __float128 support. */ | |
19644 | if (TARGET_FLOAT128_TYPE) | |
19645 | { | |
19646 | init_float128_ibm (IFmode); | |
19647 | init_float128_ieee (KFmode); | |
19648 | } | |
19649 | ||
19650 | /* AIX/Darwin/64-bit Linux quad floating point routines. */ | |
19651 | if (TARGET_LONG_DOUBLE_128) | |
19652 | { | |
19653 | if (!TARGET_IEEEQUAD) | |
19654 | init_float128_ibm (TFmode); | |
19655 | ||
19656 | /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */ | |
19657 | else | |
19658 | init_float128_ieee (TFmode); | |
19659 | } | |
19660 | } | |
19661 | ||
19662 | \f | |
19663 | /* Expand a block clear operation, and return 1 if successful. Return 0 | |
19664 | if we should let the compiler generate normal code. | |
19665 | ||
19666 | operands[0] is the destination | |
19667 | operands[1] is the length | |
19668 | operands[3] is the alignment */ | |
19669 | ||
19670 | int | |
19671 | expand_block_clear (rtx operands[]) | |
19672 | { | |
19673 | rtx orig_dest = operands[0]; | |
19674 | rtx bytes_rtx = operands[1]; | |
19675 | rtx align_rtx = operands[3]; | |
19676 | bool constp = (GET_CODE (bytes_rtx) == CONST_INT); | |
19677 | HOST_WIDE_INT align; | |
19678 | HOST_WIDE_INT bytes; | |
19679 | int offset; | |
19680 | int clear_bytes; | |
19681 | int clear_step; | |
19682 | ||
19683 | /* If this is not a fixed size move, just call memcpy */ | |
19684 | if (! constp) | |
19685 | return 0; | |
19686 | ||
19687 | /* This must be a fixed size alignment */ | |
19688 | gcc_assert (GET_CODE (align_rtx) == CONST_INT); | |
19689 | align = INTVAL (align_rtx) * BITS_PER_UNIT; | |
19690 | ||
19691 | /* Anything to clear? */ | |
19692 | bytes = INTVAL (bytes_rtx); | |
19693 | if (bytes <= 0) | |
19694 | return 1; | |
19695 | ||
19696 | /* Use the builtin memset after a point, to avoid huge code bloat. | |
19697 | When optimize_size, avoid any significant code bloat; calling | |
19698 | memset is about 4 instructions, so allow for one instruction to | |
19699 | load zero and three to do clearing. */ | |
19700 | if (TARGET_ALTIVEC && align >= 128) | |
19701 | clear_step = 16; | |
19702 | else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT)) | |
19703 | clear_step = 8; | |
19704 | else if (TARGET_SPE && align >= 64) | |
19705 | clear_step = 8; | |
19706 | else | |
19707 | clear_step = 4; | |
19708 | ||
19709 | if (optimize_size && bytes > 3 * clear_step) | |
19710 | return 0; | |
19711 | if (! optimize_size && bytes > 8 * clear_step) | |
19712 | return 0; | |
19713 | ||
19714 | for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes) | |
19715 | { | |
19716 | machine_mode mode = BLKmode; | |
19717 | rtx dest; | |
19718 | ||
19719 | if (bytes >= 16 && TARGET_ALTIVEC && align >= 128) | |
19720 | { | |
19721 | clear_bytes = 16; | |
19722 | mode = V4SImode; | |
19723 | } | |
19724 | else if (bytes >= 8 && TARGET_SPE && align >= 64) | |
19725 | { | |
19726 | clear_bytes = 8; | |
19727 | mode = V2SImode; | |
19728 | } | |
19729 | else if (bytes >= 8 && TARGET_POWERPC64 | |
19730 | && (align >= 64 || !STRICT_ALIGNMENT)) | |
19731 | { | |
19732 | clear_bytes = 8; | |
19733 | mode = DImode; | |
19734 | if (offset == 0 && align < 64) | |
19735 | { | |
19736 | rtx addr; | |
19737 | ||
19738 | /* If the address form is reg+offset with offset not a | |
19739 | multiple of four, reload into reg indirect form here | |
19740 | rather than waiting for reload. This way we get one | |
19741 | reload, not one per store. */ | |
19742 | addr = XEXP (orig_dest, 0); | |
19743 | if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM) | |
19744 | && GET_CODE (XEXP (addr, 1)) == CONST_INT | |
19745 | && (INTVAL (XEXP (addr, 1)) & 3) != 0) | |
19746 | { | |
19747 | addr = copy_addr_to_reg (addr); | |
19748 | orig_dest = replace_equiv_address (orig_dest, addr); | |
19749 | } | |
19750 | } | |
19751 | } | |
19752 | else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT)) | |
19753 | { /* move 4 bytes */ | |
19754 | clear_bytes = 4; | |
19755 | mode = SImode; | |
19756 | } | |
19757 | else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT)) | |
19758 | { /* move 2 bytes */ | |
19759 | clear_bytes = 2; | |
19760 | mode = HImode; | |
19761 | } | |
19762 | else /* move 1 byte at a time */ | |
19763 | { | |
19764 | clear_bytes = 1; | |
19765 | mode = QImode; | |
19766 | } | |
19767 | ||
19768 | dest = adjust_address (orig_dest, mode, offset); | |
19769 | ||
19770 | emit_move_insn (dest, CONST0_RTX (mode)); | |
19771 | } | |
19772 | ||
19773 | return 1; | |
19774 | } | |
19775 | ||
19776 | /* Emit a potentially record-form instruction, setting DST from SRC. | |
19777 | If DOT is 0, that is all; otherwise, set CCREG to the result of the | |
19778 | signed comparison of DST with zero. If DOT is 1, the generated RTL | |
19779 | doesn't care about the DST result; if DOT is 2, it does. If CCREG | |
19780 | is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and | |
19781 | a separate COMPARE. */ | |
19782 | ||
19783 | static void | |
19784 | rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg) | |
19785 | { | |
19786 | if (dot == 0) | |
19787 | { | |
19788 | emit_move_insn (dst, src); | |
19789 | return; | |
19790 | } | |
19791 | ||
19792 | if (cc_reg_not_cr0_operand (ccreg, CCmode)) | |
19793 | { | |
19794 | emit_move_insn (dst, src); | |
19795 | emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx)); | |
19796 | return; | |
19797 | } | |
19798 | ||
19799 | rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx)); | |
19800 | if (dot == 1) | |
19801 | { | |
19802 | rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst); | |
19803 | emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber))); | |
19804 | } | |
19805 | else | |
19806 | { | |
19807 | rtx set = gen_rtx_SET (dst, src); | |
19808 | emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set))); | |
19809 | } | |
19810 | } | |
19811 | ||
19812 | /* Figure out the correct instructions to generate to load data for | |
19813 | block compare. MODE is used for the read from memory, and | |
19814 | data is zero extended if REG is wider than MODE. If LE code | |
19815 | is being generated, bswap loads are used. | |
19816 | ||
19817 | REG is the destination register to move the data into. | |
19818 | MEM is the memory block being read. | |
19819 | MODE is the mode of memory to use for the read. */ | |
19820 | static void | |
19821 | do_load_for_compare (rtx reg, rtx mem, machine_mode mode) | |
19822 | { | |
19823 | switch (GET_MODE (reg)) | |
19824 | { | |
916ace94 | 19825 | case E_DImode: |
01e91138 | 19826 | switch (mode) |
19827 | { | |
916ace94 | 19828 | case E_QImode: |
01e91138 | 19829 | emit_insn (gen_zero_extendqidi2 (reg, mem)); |
19830 | break; | |
916ace94 | 19831 | case E_HImode: |
01e91138 | 19832 | { |
19833 | rtx src = mem; | |
19834 | if (!BYTES_BIG_ENDIAN) | |
19835 | { | |
19836 | src = gen_reg_rtx (HImode); | |
19837 | emit_insn (gen_bswaphi2 (src, mem)); | |
19838 | } | |
19839 | emit_insn (gen_zero_extendhidi2 (reg, src)); | |
19840 | break; | |
19841 | } | |
916ace94 | 19842 | case E_SImode: |
01e91138 | 19843 | { |
19844 | rtx src = mem; | |
19845 | if (!BYTES_BIG_ENDIAN) | |
19846 | { | |
19847 | src = gen_reg_rtx (SImode); | |
19848 | emit_insn (gen_bswapsi2 (src, mem)); | |
19849 | } | |
19850 | emit_insn (gen_zero_extendsidi2 (reg, src)); | |
19851 | } | |
19852 | break; | |
916ace94 | 19853 | case E_DImode: |
01e91138 | 19854 | if (!BYTES_BIG_ENDIAN) |
19855 | emit_insn (gen_bswapdi2 (reg, mem)); | |
19856 | else | |
19857 | emit_insn (gen_movdi (reg, mem)); | |
19858 | break; | |
19859 | default: | |
19860 | gcc_unreachable (); | |
19861 | } | |
19862 | break; | |
19863 | ||
916ace94 | 19864 | case E_SImode: |
01e91138 | 19865 | switch (mode) |
19866 | { | |
916ace94 | 19867 | case E_QImode: |
01e91138 | 19868 | emit_insn (gen_zero_extendqisi2 (reg, mem)); |
19869 | break; | |
916ace94 | 19870 | case E_HImode: |
01e91138 | 19871 | { |
19872 | rtx src = mem; | |
19873 | if (!BYTES_BIG_ENDIAN) | |
19874 | { | |
19875 | src = gen_reg_rtx (HImode); | |
19876 | emit_insn (gen_bswaphi2 (src, mem)); | |
19877 | } | |
19878 | emit_insn (gen_zero_extendhisi2 (reg, src)); | |
19879 | break; | |
19880 | } | |
916ace94 | 19881 | case E_SImode: |
01e91138 | 19882 | if (!BYTES_BIG_ENDIAN) |
19883 | emit_insn (gen_bswapsi2 (reg, mem)); | |
19884 | else | |
19885 | emit_insn (gen_movsi (reg, mem)); | |
19886 | break; | |
916ace94 | 19887 | case E_DImode: |
01e91138 | 19888 | /* DImode is larger than the destination reg so is not expected. */ |
19889 | gcc_unreachable (); | |
19890 | break; | |
19891 | default: | |
19892 | gcc_unreachable (); | |
19893 | } | |
19894 | break; | |
19895 | default: | |
19896 | gcc_unreachable (); | |
19897 | break; | |
19898 | } | |
19899 | } | |
19900 | ||
19901 | /* Select the mode to be used for reading the next chunk of bytes | |
19902 | in the compare. | |
19903 | ||
19904 | OFFSET is the current read offset from the beginning of the block. | |
19905 | BYTES is the number of bytes remaining to be read. | |
19906 | ALIGN is the minimum alignment of the memory blocks being compared in bytes. | |
19907 | WORD_MODE_OK indicates using WORD_MODE is allowed, else SImode is | |
19908 | the largest allowable mode. */ | |
19909 | static machine_mode | |
19910 | select_block_compare_mode (unsigned HOST_WIDE_INT offset, | |
19911 | unsigned HOST_WIDE_INT bytes, | |
19912 | unsigned HOST_WIDE_INT align, bool word_mode_ok) | |
19913 | { | |
19914 | /* First see if we can do a whole load unit | |
19915 | as that will be more efficient than a larger load + shift. */ | |
19916 | ||
19917 | /* If big, use biggest chunk. | |
19918 | If exactly chunk size, use that size. | |
19919 | If remainder can be done in one piece with shifting, do that. | |
19920 | Do largest chunk possible without violating alignment rules. */ | |
19921 | ||
19922 | /* The most we can read without potential page crossing. */ | |
19923 | unsigned HOST_WIDE_INT maxread = ROUND_UP (bytes, align); | |
19924 | ||
19925 | if (word_mode_ok && bytes >= UNITS_PER_WORD) | |
19926 | return word_mode; | |
19927 | else if (bytes == GET_MODE_SIZE (SImode)) | |
19928 | return SImode; | |
19929 | else if (bytes == GET_MODE_SIZE (HImode)) | |
19930 | return HImode; | |
19931 | else if (bytes == GET_MODE_SIZE (QImode)) | |
19932 | return QImode; | |
19933 | else if (bytes < GET_MODE_SIZE (SImode) | |
19934 | && offset >= GET_MODE_SIZE (SImode) - bytes) | |
19935 | /* This matches the case were we have SImode and 3 bytes | |
19936 | and offset >= 1 and permits us to move back one and overlap | |
19937 | with the previous read, thus avoiding having to shift | |
19938 | unwanted bytes off of the input. */ | |
19939 | return SImode; | |
19940 | else if (word_mode_ok && bytes < UNITS_PER_WORD | |
19941 | && offset >= UNITS_PER_WORD-bytes) | |
19942 | /* Similarly, if we can use DImode it will get matched here and | |
19943 | can do an overlapping read that ends at the end of the block. */ | |
19944 | return word_mode; | |
19945 | else if (word_mode_ok && maxread >= UNITS_PER_WORD) | |
19946 | /* It is safe to do all remaining in one load of largest size, | |
19947 | possibly with a shift to get rid of unwanted bytes. */ | |
19948 | return word_mode; | |
19949 | else if (maxread >= GET_MODE_SIZE (SImode)) | |
19950 | /* It is safe to do all remaining in one SImode load, | |
19951 | possibly with a shift to get rid of unwanted bytes. */ | |
19952 | return SImode; | |
19953 | else if (bytes > GET_MODE_SIZE (SImode)) | |
19954 | return SImode; | |
19955 | else if (bytes > GET_MODE_SIZE (HImode)) | |
19956 | return HImode; | |
19957 | ||
19958 | /* final fallback is do one byte */ | |
19959 | return QImode; | |
19960 | } | |
19961 | ||
19962 | /* Compute the alignment of pointer+OFFSET where the original alignment | |
19963 | of pointer was BASE_ALIGN. */ | |
19964 | static unsigned HOST_WIDE_INT | |
19965 | compute_current_alignment (unsigned HOST_WIDE_INT base_align, | |
19966 | unsigned HOST_WIDE_INT offset) | |
19967 | { | |
19968 | if (offset == 0) | |
19969 | return base_align; | |
19970 | return min (base_align, offset & -offset); | |
19971 | } | |
19972 | ||
19973 | /* Expand a block compare operation, and return true if successful. | |
19974 | Return false if we should let the compiler generate normal code, | |
19975 | probably a memcmp call. | |
19976 | ||
19977 | OPERANDS[0] is the target (result). | |
19978 | OPERANDS[1] is the first source. | |
19979 | OPERANDS[2] is the second source. | |
19980 | OPERANDS[3] is the length. | |
19981 | OPERANDS[4] is the alignment. */ | |
19982 | bool | |
19983 | expand_block_compare (rtx operands[]) | |
19984 | { | |
19985 | rtx target = operands[0]; | |
19986 | rtx orig_src1 = operands[1]; | |
19987 | rtx orig_src2 = operands[2]; | |
19988 | rtx bytes_rtx = operands[3]; | |
19989 | rtx align_rtx = operands[4]; | |
19990 | HOST_WIDE_INT cmp_bytes = 0; | |
19991 | rtx src1 = orig_src1; | |
19992 | rtx src2 = orig_src2; | |
19993 | ||
19994 | /* This case is complicated to handle because the subtract | |
19995 | with carry instructions do not generate the 64-bit | |
19996 | carry and so we must emit code to calculate it ourselves. | |
19997 | We choose not to implement this yet. */ | |
19998 | if (TARGET_32BIT && TARGET_POWERPC64) | |
19999 | return false; | |
20000 | ||
20001 | /* If this is not a fixed size compare, just call memcmp. */ | |
20002 | if (!CONST_INT_P (bytes_rtx)) | |
20003 | return false; | |
20004 | ||
20005 | /* This must be a fixed size alignment. */ | |
20006 | if (!CONST_INT_P (align_rtx)) | |
20007 | return false; | |
20008 | ||
20009 | unsigned int base_align = UINTVAL (align_rtx) / BITS_PER_UNIT; | |
20010 | ||
dfdced85 | 20011 | /* rs6000_slow_unaligned_access -- don't do unaligned stuff. */ |
20012 | if (rs6000_slow_unaligned_access (word_mode, MEM_ALIGN (orig_src1)) | |
20013 | || rs6000_slow_unaligned_access (word_mode, MEM_ALIGN (orig_src2))) | |
01e91138 | 20014 | return false; |
20015 | ||
20016 | gcc_assert (GET_MODE (target) == SImode); | |
20017 | ||
20018 | /* Anything to move? */ | |
20019 | unsigned HOST_WIDE_INT bytes = UINTVAL (bytes_rtx); | |
20020 | if (bytes == 0) | |
20021 | return true; | |
20022 | ||
20023 | /* The code generated for p7 and older is not faster than glibc | |
20024 | memcmp if alignment is small and length is not short, so bail | |
20025 | out to avoid those conditions. */ | |
20026 | if (!TARGET_EFFICIENT_OVERLAPPING_UNALIGNED | |
20027 | && ((base_align == 1 && bytes > 16) | |
20028 | || (base_align == 2 && bytes > 32))) | |
20029 | return false; | |
20030 | ||
20031 | rtx tmp_reg_src1 = gen_reg_rtx (word_mode); | |
20032 | rtx tmp_reg_src2 = gen_reg_rtx (word_mode); | |
20033 | /* P7/P8 code uses cond for subfc. but P9 uses | |
20034 | it for cmpld which needs CCUNSmode. */ | |
20035 | rtx cond; | |
20036 | if (TARGET_P9_MISC) | |
20037 | cond = gen_reg_rtx (CCUNSmode); | |
20038 | else | |
20039 | cond = gen_reg_rtx (CCmode); | |
20040 | ||
20041 | /* If we have an LE target without ldbrx and word_mode is DImode, | |
20042 | then we must avoid using word_mode. */ | |
20043 | int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX | |
20044 | && word_mode == DImode); | |
20045 | ||
20046 | /* Strategy phase. How many ops will this take and should we expand it? */ | |
20047 | ||
20048 | unsigned HOST_WIDE_INT offset = 0; | |
20049 | machine_mode load_mode = | |
20050 | select_block_compare_mode (offset, bytes, base_align, word_mode_ok); | |
20051 | unsigned int load_mode_size = GET_MODE_SIZE (load_mode); | |
20052 | ||
20053 | /* We don't want to generate too much code. */ | |
20054 | unsigned HOST_WIDE_INT max_bytes = | |
20055 | load_mode_size * (unsigned HOST_WIDE_INT) rs6000_block_compare_inline_limit; | |
20056 | if (!IN_RANGE (bytes, 1, max_bytes)) | |
20057 | return false; | |
20058 | ||
20059 | bool generate_6432_conversion = false; | |
20060 | rtx convert_label = NULL; | |
20061 | rtx final_label = NULL; | |
20062 | ||
20063 | /* Example of generated code for 18 bytes aligned 1 byte. | |
20064 | Compiled with -fno-reorder-blocks for clarity. | |
20065 | ldbrx 10,31,8 | |
20066 | ldbrx 9,7,8 | |
20067 | subfc. 9,9,10 | |
20068 | bne 0,.L6487 | |
20069 | addi 9,12,8 | |
20070 | addi 5,11,8 | |
20071 | ldbrx 10,0,9 | |
20072 | ldbrx 9,0,5 | |
20073 | subfc. 9,9,10 | |
20074 | bne 0,.L6487 | |
20075 | addi 9,12,16 | |
20076 | lhbrx 10,0,9 | |
20077 | addi 9,11,16 | |
20078 | lhbrx 9,0,9 | |
20079 | subf 9,9,10 | |
20080 | b .L6488 | |
20081 | .p2align 4,,15 | |
20082 | .L6487: #convert_label | |
20083 | popcntd 9,9 | |
20084 | subfe 10,10,10 | |
20085 | or 9,9,10 | |
20086 | .L6488: #final_label | |
20087 | extsw 10,9 | |
20088 | ||
20089 | We start off with DImode for two blocks that jump to the DI->SI conversion | |
20090 | if the difference is found there, then a final block of HImode that skips | |
20091 | the DI->SI conversion. */ | |
20092 | ||
20093 | while (bytes > 0) | |
20094 | { | |
20095 | unsigned int align = compute_current_alignment (base_align, offset); | |
20096 | if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED) | |
20097 | load_mode = select_block_compare_mode (offset, bytes, align, | |
20098 | word_mode_ok); | |
20099 | else | |
20100 | load_mode = select_block_compare_mode (0, bytes, align, word_mode_ok); | |
20101 | load_mode_size = GET_MODE_SIZE (load_mode); | |
20102 | if (bytes >= load_mode_size) | |
20103 | cmp_bytes = load_mode_size; | |
20104 | else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED) | |
20105 | { | |
20106 | /* Move this load back so it doesn't go past the end. | |
20107 | P8/P9 can do this efficiently. */ | |
20108 | unsigned int extra_bytes = load_mode_size - bytes; | |
20109 | cmp_bytes = bytes; | |
20110 | if (extra_bytes < offset) | |
20111 | { | |
20112 | offset -= extra_bytes; | |
20113 | cmp_bytes = load_mode_size; | |
20114 | bytes = cmp_bytes; | |
20115 | } | |
20116 | } | |
20117 | else | |
20118 | /* P7 and earlier can't do the overlapping load trick fast, | |
20119 | so this forces a non-overlapping load and a shift to get | |
20120 | rid of the extra bytes. */ | |
20121 | cmp_bytes = bytes; | |
20122 | ||
20123 | src1 = adjust_address (orig_src1, load_mode, offset); | |
20124 | src2 = adjust_address (orig_src2, load_mode, offset); | |
20125 | ||
20126 | if (!REG_P (XEXP (src1, 0))) | |
20127 | { | |
20128 | rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0)); | |
20129 | src1 = replace_equiv_address (src1, src1_reg); | |
20130 | } | |
20131 | set_mem_size (src1, cmp_bytes); | |
20132 | ||
20133 | if (!REG_P (XEXP (src2, 0))) | |
20134 | { | |
20135 | rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0)); | |
20136 | src2 = replace_equiv_address (src2, src2_reg); | |
20137 | } | |
20138 | set_mem_size (src2, cmp_bytes); | |
20139 | ||
20140 | do_load_for_compare (tmp_reg_src1, src1, load_mode); | |
20141 | do_load_for_compare (tmp_reg_src2, src2, load_mode); | |
20142 | ||
20143 | if (cmp_bytes < load_mode_size) | |
20144 | { | |
20145 | /* Shift unneeded bytes off. */ | |
20146 | rtx sh = GEN_INT (BITS_PER_UNIT * (load_mode_size - cmp_bytes)); | |
20147 | if (word_mode == DImode) | |
20148 | { | |
20149 | emit_insn (gen_lshrdi3 (tmp_reg_src1, tmp_reg_src1, sh)); | |
20150 | emit_insn (gen_lshrdi3 (tmp_reg_src2, tmp_reg_src2, sh)); | |
20151 | } | |
20152 | else | |
20153 | { | |
20154 | emit_insn (gen_lshrsi3 (tmp_reg_src1, tmp_reg_src1, sh)); | |
20155 | emit_insn (gen_lshrsi3 (tmp_reg_src2, tmp_reg_src2, sh)); | |
20156 | } | |
20157 | } | |
20158 | ||
20159 | int remain = bytes - cmp_bytes; | |
20160 | if (GET_MODE_SIZE (GET_MODE (target)) > GET_MODE_SIZE (load_mode)) | |
20161 | { | |
20162 | /* Target is larger than load size so we don't need to | |
20163 | reduce result size. */ | |
20164 | ||
20165 | /* We previously did a block that need 64->32 conversion but | |
20166 | the current block does not, so a label is needed to jump | |
20167 | to the end. */ | |
20168 | if (generate_6432_conversion && !final_label) | |
20169 | final_label = gen_label_rtx (); | |
20170 | ||
20171 | if (remain > 0) | |
20172 | { | |
20173 | /* This is not the last block, branch to the end if the result | |
20174 | of this subtract is not zero. */ | |
20175 | if (!final_label) | |
20176 | final_label = gen_label_rtx (); | |
20177 | rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label); | |
20178 | rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2); | |
20179 | rtx cr = gen_reg_rtx (CCmode); | |
20180 | rs6000_emit_dot_insn (tmp_reg_src2, tmp, 2, cr); | |
20181 | emit_insn (gen_movsi (target, | |
20182 | gen_lowpart (SImode, tmp_reg_src2))); | |
20183 | rtx ne_rtx = gen_rtx_NE (VOIDmode, cr, const0_rtx); | |
20184 | rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx, | |
20185 | fin_ref, pc_rtx); | |
20186 | rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse)); | |
20187 | JUMP_LABEL (j) = final_label; | |
20188 | LABEL_NUSES (final_label) += 1; | |
20189 | } | |
20190 | else | |
20191 | { | |
20192 | if (word_mode == DImode) | |
20193 | { | |
20194 | emit_insn (gen_subdi3 (tmp_reg_src2, tmp_reg_src1, | |
20195 | tmp_reg_src2)); | |
20196 | emit_insn (gen_movsi (target, | |
20197 | gen_lowpart (SImode, tmp_reg_src2))); | |
20198 | } | |
20199 | else | |
20200 | emit_insn (gen_subsi3 (target, tmp_reg_src1, tmp_reg_src2)); | |
20201 | ||
20202 | if (final_label) | |
20203 | { | |
20204 | rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label); | |
20205 | rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref)); | |
20206 | JUMP_LABEL(j) = final_label; | |
20207 | LABEL_NUSES (final_label) += 1; | |
20208 | emit_barrier (); | |
20209 | } | |
20210 | } | |
20211 | } | |
20212 | else | |
20213 | { | |
20214 | /* Do we need a 64->32 conversion block? We need the 64->32 | |
20215 | conversion even if target size == load_mode size because | |
20216 | the subtract generates one extra bit. */ | |
20217 | generate_6432_conversion = true; | |
20218 | ||
20219 | if (remain > 0) | |
20220 | { | |
20221 | if (!convert_label) | |
20222 | convert_label = gen_label_rtx (); | |
20223 | ||
20224 | /* Compare to zero and branch to convert_label if not zero. */ | |
20225 | rtx cvt_ref = gen_rtx_LABEL_REF (VOIDmode, convert_label); | |
20226 | if (TARGET_P9_MISC) | |
20227 | { | |
20228 | /* Generate a compare, and convert with a setb later. */ | |
20229 | rtx cmp = gen_rtx_COMPARE (CCUNSmode, tmp_reg_src1, | |
20230 | tmp_reg_src2); | |
20231 | emit_insn (gen_rtx_SET (cond, cmp)); | |
20232 | } | |
20233 | else | |
20234 | /* Generate a subfc. and use the longer | |
20235 | sequence for conversion. */ | |
20236 | if (TARGET_64BIT) | |
20237 | emit_insn (gen_subfdi3_carry_dot2 (tmp_reg_src2, tmp_reg_src2, | |
20238 | tmp_reg_src1, cond)); | |
20239 | else | |
20240 | emit_insn (gen_subfsi3_carry_dot2 (tmp_reg_src2, tmp_reg_src2, | |
20241 | tmp_reg_src1, cond)); | |
20242 | rtx ne_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx); | |
20243 | rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx, | |
20244 | cvt_ref, pc_rtx); | |
20245 | rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse)); | |
20246 | JUMP_LABEL(j) = convert_label; | |
20247 | LABEL_NUSES (convert_label) += 1; | |
20248 | } | |
20249 | else | |
20250 | { | |
20251 | /* Just do the subtract/compare. Since this is the last block | |
20252 | the convert code will be generated immediately following. */ | |
20253 | if (TARGET_P9_MISC) | |
20254 | { | |
20255 | rtx cmp = gen_rtx_COMPARE (CCUNSmode, tmp_reg_src1, | |
20256 | tmp_reg_src2); | |
20257 | emit_insn (gen_rtx_SET (cond, cmp)); | |
20258 | } | |
20259 | else | |
20260 | if (TARGET_64BIT) | |
20261 | emit_insn (gen_subfdi3_carry (tmp_reg_src2, tmp_reg_src2, | |
20262 | tmp_reg_src1)); | |
20263 | else | |
20264 | emit_insn (gen_subfsi3_carry (tmp_reg_src2, tmp_reg_src2, | |
20265 | tmp_reg_src1)); | |
20266 | } | |
20267 | } | |
20268 | ||
20269 | offset += cmp_bytes; | |
20270 | bytes -= cmp_bytes; | |
20271 | } | |
20272 | ||
20273 | if (generate_6432_conversion) | |
20274 | { | |
20275 | if (convert_label) | |
20276 | emit_label (convert_label); | |
20277 | ||
20278 | /* We need to produce DI result from sub, then convert to target SI | |
20279 | while maintaining <0 / ==0 / >0 properties. This sequence works: | |
20280 | subfc L,A,B | |
20281 | subfe H,H,H | |
20282 | popcntd L,L | |
20283 | rldimi L,H,6,0 | |
20284 | ||
20285 | This is an alternate one Segher cooked up if somebody | |
20286 | wants to expand this for something that doesn't have popcntd: | |
20287 | subfc L,a,b | |
20288 | subfe H,x,x | |
20289 | addic t,L,-1 | |
20290 | subfe v,t,L | |
20291 | or z,v,H | |
20292 | ||
20293 | And finally, p9 can just do this: | |
20294 | cmpld A,B | |
20295 | setb r */ | |
20296 | ||
20297 | if (TARGET_P9_MISC) | |
20298 | { | |
20299 | emit_insn (gen_setb_unsigned (target, cond)); | |
20300 | } | |
20301 | else | |
20302 | { | |
20303 | if (TARGET_64BIT) | |
20304 | { | |
20305 | rtx tmp_reg_ca = gen_reg_rtx (DImode); | |
20306 | emit_insn (gen_subfdi3_carry_in_xx (tmp_reg_ca)); | |
20307 | emit_insn (gen_popcntddi2 (tmp_reg_src2, tmp_reg_src2)); | |
20308 | emit_insn (gen_iordi3 (tmp_reg_src2, tmp_reg_src2, tmp_reg_ca)); | |
20309 | emit_insn (gen_movsi (target, gen_lowpart (SImode, tmp_reg_src2))); | |
20310 | } | |
20311 | else | |
20312 | { | |
20313 | rtx tmp_reg_ca = gen_reg_rtx (SImode); | |
20314 | emit_insn (gen_subfsi3_carry_in_xx (tmp_reg_ca)); | |
20315 | emit_insn (gen_popcntdsi2 (tmp_reg_src2, tmp_reg_src2)); | |
20316 | emit_insn (gen_iorsi3 (target, tmp_reg_src2, tmp_reg_ca)); | |
20317 | } | |
20318 | } | |
20319 | } | |
20320 | ||
20321 | if (final_label) | |
20322 | emit_label (final_label); | |
20323 | ||
20324 | gcc_assert (bytes == 0); | |
20325 | return true; | |
20326 | } | |
20327 | ||
20328 | /* Generate alignment check and branch code to set up for | |
20329 | strncmp when we don't have DI alignment. | |
20330 | STRNCMP_LABEL is the label to branch if there is a page crossing. | |
20331 | SRC is the string pointer to be examined. | |
20332 | BYTES is the max number of bytes to compare. */ | |
20333 | static void | |
20334 | expand_strncmp_align_check (rtx strncmp_label, rtx src, HOST_WIDE_INT bytes) | |
20335 | { | |
20336 | rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, strncmp_label); | |
20337 | rtx src_check = copy_addr_to_reg (XEXP (src, 0)); | |
20338 | if (GET_MODE (src_check) == SImode) | |
20339 | emit_insn (gen_andsi3 (src_check, src_check, GEN_INT (0xfff))); | |
20340 | else | |
20341 | emit_insn (gen_anddi3 (src_check, src_check, GEN_INT (0xfff))); | |
20342 | rtx cond = gen_reg_rtx (CCmode); | |
20343 | emit_move_insn (cond, gen_rtx_COMPARE (CCmode, src_check, | |
20344 | GEN_INT (4096 - bytes))); | |
20345 | ||
20346 | rtx cmp_rtx = gen_rtx_LT (VOIDmode, cond, const0_rtx); | |
20347 | ||
20348 | rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx, | |
20349 | pc_rtx, lab_ref); | |
20350 | rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse)); | |
20351 | JUMP_LABEL (j) = strncmp_label; | |
20352 | LABEL_NUSES (strncmp_label) += 1; | |
20353 | } | |
20354 | ||
20355 | /* Expand a string compare operation with length, and return | |
20356 | true if successful. Return false if we should let the | |
20357 | compiler generate normal code, probably a strncmp call. | |
20358 | ||
20359 | OPERANDS[0] is the target (result). | |
20360 | OPERANDS[1] is the first source. | |
20361 | OPERANDS[2] is the second source. | |
20362 | If NO_LENGTH is zero, then: | |
20363 | OPERANDS[3] is the length. | |
20364 | OPERANDS[4] is the alignment in bytes. | |
20365 | If NO_LENGTH is nonzero, then: | |
20366 | OPERANDS[3] is the alignment in bytes. */ | |
20367 | bool | |
20368 | expand_strn_compare (rtx operands[], int no_length) | |
20369 | { | |
20370 | rtx target = operands[0]; | |
20371 | rtx orig_src1 = operands[1]; | |
20372 | rtx orig_src2 = operands[2]; | |
20373 | rtx bytes_rtx, align_rtx; | |
20374 | if (no_length) | |
20375 | { | |
20376 | bytes_rtx = NULL; | |
20377 | align_rtx = operands[3]; | |
20378 | } | |
20379 | else | |
20380 | { | |
20381 | bytes_rtx = operands[3]; | |
20382 | align_rtx = operands[4]; | |
20383 | } | |
20384 | unsigned HOST_WIDE_INT cmp_bytes = 0; | |
20385 | rtx src1 = orig_src1; | |
20386 | rtx src2 = orig_src2; | |
20387 | ||
20388 | /* If we have a length, it must be constant. This simplifies things | |
20389 | a bit as we don't have to generate code to check if we've exceeded | |
20390 | the length. Later this could be expanded to handle this case. */ | |
20391 | if (!no_length && !CONST_INT_P (bytes_rtx)) | |
20392 | return false; | |
20393 | ||
20394 | /* This must be a fixed size alignment. */ | |
20395 | if (!CONST_INT_P (align_rtx)) | |
20396 | return false; | |
20397 | ||
20398 | unsigned int base_align = UINTVAL (align_rtx); | |
20399 | int align1 = MEM_ALIGN (orig_src1) / BITS_PER_UNIT; | |
20400 | int align2 = MEM_ALIGN (orig_src2) / BITS_PER_UNIT; | |
20401 | ||
dfdced85 | 20402 | /* rs6000_slow_unaligned_access -- don't do unaligned stuff. */ |
20403 | if (rs6000_slow_unaligned_access (word_mode, align1) | |
20404 | || rs6000_slow_unaligned_access (word_mode, align2)) | |
01e91138 | 20405 | return false; |
20406 | ||
20407 | gcc_assert (GET_MODE (target) == SImode); | |
20408 | ||
20409 | /* If we have an LE target without ldbrx and word_mode is DImode, | |
20410 | then we must avoid using word_mode. */ | |
20411 | int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX | |
20412 | && word_mode == DImode); | |
20413 | ||
20414 | unsigned int word_mode_size = GET_MODE_SIZE (word_mode); | |
20415 | ||
20416 | unsigned HOST_WIDE_INT offset = 0; | |
20417 | unsigned HOST_WIDE_INT bytes; /* N from the strncmp args if available. */ | |
20418 | unsigned HOST_WIDE_INT compare_length; /* How much to compare inline. */ | |
20419 | if (no_length) | |
20420 | /* Use this as a standin to determine the mode to use. */ | |
20421 | bytes = rs6000_string_compare_inline_limit * word_mode_size; | |
20422 | else | |
20423 | bytes = UINTVAL (bytes_rtx); | |
20424 | ||
20425 | machine_mode load_mode = | |
20426 | select_block_compare_mode (offset, bytes, base_align, word_mode_ok); | |
20427 | unsigned int load_mode_size = GET_MODE_SIZE (load_mode); | |
20428 | compare_length = rs6000_string_compare_inline_limit * load_mode_size; | |
20429 | ||
20430 | /* If we have equality at the end of the last compare and we have not | |
20431 | found the end of the string, we need to call strcmp/strncmp to | |
20432 | compare the remainder. */ | |
20433 | bool equality_compare_rest = false; | |
20434 | ||
20435 | if (no_length) | |
20436 | { | |
20437 | bytes = compare_length; | |
20438 | equality_compare_rest = true; | |
20439 | } | |
20440 | else | |
20441 | { | |
20442 | if (bytes <= compare_length) | |
20443 | compare_length = bytes; | |
20444 | else | |
20445 | equality_compare_rest = true; | |
20446 | } | |
20447 | ||
20448 | rtx result_reg = gen_reg_rtx (word_mode); | |
20449 | rtx final_move_label = gen_label_rtx (); | |
20450 | rtx final_label = gen_label_rtx (); | |
20451 | rtx begin_compare_label = NULL; | |
20452 | ||
20453 | if (base_align < 8) | |
20454 | { | |
20455 | /* Generate code that checks distance to 4k boundary for this case. */ | |
20456 | begin_compare_label = gen_label_rtx (); | |
20457 | rtx strncmp_label = gen_label_rtx (); | |
20458 | rtx jmp; | |
20459 | ||
20460 | /* Strncmp for power8 in glibc does this: | |
20461 | rldicl r8,r3,0,52 | |
20462 | cmpldi cr7,r8,4096-16 | |
20463 | bgt cr7,L(pagecross) */ | |
20464 | ||
20465 | /* Make sure that the length we use for the alignment test and | |
20466 | the subsequent code generation are in agreement so we do not | |
20467 | go past the length we tested for a 4k boundary crossing. */ | |
20468 | unsigned HOST_WIDE_INT align_test = compare_length; | |
20469 | if (align_test < 8) | |
20470 | { | |
20471 | align_test = HOST_WIDE_INT_1U << ceil_log2 (align_test); | |
20472 | base_align = align_test; | |
20473 | } | |
20474 | else | |
20475 | { | |
20476 | align_test = ROUND_UP (align_test, 8); | |
20477 | base_align = 8; | |
20478 | } | |
20479 | ||
20480 | if (align1 < 8) | |
20481 | expand_strncmp_align_check (strncmp_label, src1, align_test); | |
20482 | if (align2 < 8) | |
20483 | expand_strncmp_align_check (strncmp_label, src2, align_test); | |
20484 | ||
20485 | /* Now generate the following sequence: | |
20486 | - branch to begin_compare | |
20487 | - strncmp_label | |
20488 | - call to strncmp | |
20489 | - branch to final_label | |
20490 | - begin_compare_label */ | |
20491 | ||
20492 | rtx cmp_ref = gen_rtx_LABEL_REF (VOIDmode, begin_compare_label); | |
20493 | jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, cmp_ref)); | |
20494 | JUMP_LABEL (jmp) = begin_compare_label; | |
20495 | LABEL_NUSES (begin_compare_label) += 1; | |
20496 | emit_barrier (); | |
20497 | ||
20498 | emit_label (strncmp_label); | |
20499 | ||
20500 | if (!REG_P (XEXP (src1, 0))) | |
20501 | { | |
20502 | rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0)); | |
20503 | src1 = replace_equiv_address (src1, src1_reg); | |
20504 | } | |
20505 | ||
20506 | if (!REG_P (XEXP (src2, 0))) | |
20507 | { | |
20508 | rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0)); | |
20509 | src2 = replace_equiv_address (src2, src2_reg); | |
20510 | } | |
20511 | ||
20512 | if (no_length) | |
20513 | { | |
20514 | tree fun = builtin_decl_explicit (BUILT_IN_STRCMP); | |
20515 | emit_library_call_value (XEXP (DECL_RTL (fun), 0), | |
9e9e5c15 | 20516 | target, LCT_NORMAL, GET_MODE (target), |
01e91138 | 20517 | force_reg (Pmode, XEXP (src1, 0)), Pmode, |
20518 | force_reg (Pmode, XEXP (src2, 0)), Pmode); | |
20519 | } | |
20520 | else | |
20521 | { | |
20522 | /* -m32 -mpowerpc64 results in word_mode being DImode even | |
20523 | though otherwise it is 32-bit. The length arg to strncmp | |
20524 | is a size_t which will be the same size as pointers. */ | |
20525 | rtx len_rtx; | |
20526 | if (TARGET_64BIT) | |
20527 | len_rtx = gen_reg_rtx (DImode); | |
20528 | else | |
20529 | len_rtx = gen_reg_rtx (SImode); | |
20530 | ||
20531 | emit_move_insn (len_rtx, bytes_rtx); | |
20532 | ||
20533 | tree fun = builtin_decl_explicit (BUILT_IN_STRNCMP); | |
20534 | emit_library_call_value (XEXP (DECL_RTL (fun), 0), | |
9e9e5c15 | 20535 | target, LCT_NORMAL, GET_MODE (target), |
01e91138 | 20536 | force_reg (Pmode, XEXP (src1, 0)), Pmode, |
20537 | force_reg (Pmode, XEXP (src2, 0)), Pmode, | |
20538 | len_rtx, GET_MODE (len_rtx)); | |
20539 | } | |
20540 | ||
20541 | rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label); | |
20542 | jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref)); | |
20543 | JUMP_LABEL (jmp) = final_label; | |
20544 | LABEL_NUSES (final_label) += 1; | |
20545 | emit_barrier (); | |
20546 | emit_label (begin_compare_label); | |
20547 | } | |
20548 | ||
20549 | rtx cleanup_label = NULL; | |
20550 | rtx tmp_reg_src1 = gen_reg_rtx (word_mode); | |
20551 | rtx tmp_reg_src2 = gen_reg_rtx (word_mode); | |
20552 | ||
20553 | /* Generate sequence of ld/ldbrx, cmpb to compare out | |
20554 | to the length specified. */ | |
20555 | unsigned HOST_WIDE_INT bytes_to_compare = compare_length; | |
20556 | while (bytes_to_compare > 0) | |
20557 | { | |
20558 | /* Compare sequence: | |
20559 | check each 8B with: ld/ld cmpd bne | |
20560 | If equal, use rldicr/cmpb to check for zero byte. | |
20561 | cleanup code at end: | |
20562 | cmpb get byte that differs | |
20563 | cmpb look for zero byte | |
20564 | orc combine | |
20565 | cntlzd get bit of first zero/diff byte | |
20566 | subfic convert for rldcl use | |
20567 | rldcl rldcl extract diff/zero byte | |
20568 | subf subtract for final result | |
20569 | ||
20570 | The last compare can branch around the cleanup code if the | |
20571 | result is zero because the strings are exactly equal. */ | |
20572 | unsigned int align = compute_current_alignment (base_align, offset); | |
20573 | if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED) | |
20574 | load_mode = select_block_compare_mode (offset, bytes_to_compare, align, | |
20575 | word_mode_ok); | |
20576 | else | |
20577 | load_mode = select_block_compare_mode (0, bytes_to_compare, align, | |
20578 | word_mode_ok); | |
20579 | load_mode_size = GET_MODE_SIZE (load_mode); | |
20580 | if (bytes_to_compare >= load_mode_size) | |
20581 | cmp_bytes = load_mode_size; | |
20582 | else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED) | |
20583 | { | |
20584 | /* Move this load back so it doesn't go past the end. | |
20585 | P8/P9 can do this efficiently. */ | |
20586 | unsigned int extra_bytes = load_mode_size - bytes_to_compare; | |
20587 | cmp_bytes = bytes_to_compare; | |
20588 | if (extra_bytes < offset) | |
20589 | { | |
20590 | offset -= extra_bytes; | |
20591 | cmp_bytes = load_mode_size; | |
20592 | bytes_to_compare = cmp_bytes; | |
20593 | } | |
20594 | } | |
20595 | else | |
20596 | /* P7 and earlier can't do the overlapping load trick fast, | |
20597 | so this forces a non-overlapping load and a shift to get | |
20598 | rid of the extra bytes. */ | |
20599 | cmp_bytes = bytes_to_compare; | |
20600 | ||
20601 | src1 = adjust_address (orig_src1, load_mode, offset); | |
20602 | src2 = adjust_address (orig_src2, load_mode, offset); | |
20603 | ||
20604 | if (!REG_P (XEXP (src1, 0))) | |
20605 | { | |
20606 | rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0)); | |
20607 | src1 = replace_equiv_address (src1, src1_reg); | |
20608 | } | |
20609 | set_mem_size (src1, cmp_bytes); | |
20610 | ||
20611 | if (!REG_P (XEXP (src2, 0))) | |
20612 | { | |
20613 | rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0)); | |
20614 | src2 = replace_equiv_address (src2, src2_reg); | |
20615 | } | |
20616 | set_mem_size (src2, cmp_bytes); | |
20617 | ||
20618 | do_load_for_compare (tmp_reg_src1, src1, load_mode); | |
20619 | do_load_for_compare (tmp_reg_src2, src2, load_mode); | |
20620 | ||
20621 | /* We must always left-align the data we read, and | |
20622 | clear any bytes to the right that are beyond the string. | |
20623 | Otherwise the cmpb sequence won't produce the correct | |
20624 | results. The beginning of the compare will be done | |
20625 | with word_mode so will not have any extra shifts or | |
20626 | clear rights. */ | |
20627 | ||
20628 | if (load_mode_size < word_mode_size) | |
20629 | { | |
20630 | /* Rotate left first. */ | |
20631 | rtx sh = GEN_INT (BITS_PER_UNIT * (word_mode_size - load_mode_size)); | |
20632 | if (word_mode == DImode) | |
20633 | { | |
20634 | emit_insn (gen_rotldi3 (tmp_reg_src1, tmp_reg_src1, sh)); | |
20635 | emit_insn (gen_rotldi3 (tmp_reg_src2, tmp_reg_src2, sh)); | |
20636 | } | |
20637 | else | |
20638 | { | |
20639 | emit_insn (gen_rotlsi3 (tmp_reg_src1, tmp_reg_src1, sh)); | |
20640 | emit_insn (gen_rotlsi3 (tmp_reg_src2, tmp_reg_src2, sh)); | |
20641 | } | |
20642 | } | |
20643 | ||
20644 | if (cmp_bytes < word_mode_size) | |
20645 | { | |
20646 | /* Now clear right. This plus the rotate can be | |
20647 | turned into a rldicr instruction. */ | |
20648 | HOST_WIDE_INT mb = BITS_PER_UNIT * (word_mode_size - cmp_bytes); | |
20649 | rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb); | |
20650 | if (word_mode == DImode) | |
20651 | { | |
20652 | emit_insn (gen_anddi3_mask (tmp_reg_src1, tmp_reg_src1, mask)); | |
20653 | emit_insn (gen_anddi3_mask (tmp_reg_src2, tmp_reg_src2, mask)); | |
20654 | } | |
20655 | else | |
20656 | { | |
20657 | emit_insn (gen_andsi3_mask (tmp_reg_src1, tmp_reg_src1, mask)); | |
20658 | emit_insn (gen_andsi3_mask (tmp_reg_src2, tmp_reg_src2, mask)); | |
20659 | } | |
20660 | } | |
20661 | ||
20662 | /* Cases to handle. A and B are chunks of the two strings. | |
20663 | 1: Not end of comparison: | |
20664 | A != B: branch to cleanup code to compute result. | |
20665 | A == B: check for 0 byte, next block if not found. | |
20666 | 2: End of the inline comparison: | |
20667 | A != B: branch to cleanup code to compute result. | |
20668 | A == B: check for 0 byte, call strcmp/strncmp | |
20669 | 3: compared requested N bytes: | |
20670 | A == B: branch to result 0. | |
20671 | A != B: cleanup code to compute result. */ | |
20672 | ||
20673 | unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes; | |
20674 | ||
20675 | rtx dst_label; | |
20676 | if (remain > 0 || equality_compare_rest) | |
20677 | { | |
20678 | /* Branch to cleanup code, otherwise fall through to do | |
20679 | more compares. */ | |
20680 | if (!cleanup_label) | |
20681 | cleanup_label = gen_label_rtx (); | |
20682 | dst_label = cleanup_label; | |
20683 | } | |
20684 | else | |
20685 | /* Branch to end and produce result of 0. */ | |
20686 | dst_label = final_move_label; | |
20687 | ||
20688 | rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label); | |
20689 | rtx cond = gen_reg_rtx (CCmode); | |
20690 | ||
20691 | /* Always produce the 0 result, it is needed if | |
20692 | cmpb finds a 0 byte in this chunk. */ | |
20693 | rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2); | |
20694 | rs6000_emit_dot_insn (result_reg, tmp, 1, cond); | |
20695 | ||
20696 | rtx cmp_rtx; | |
20697 | if (remain == 0 && !equality_compare_rest) | |
20698 | cmp_rtx = gen_rtx_EQ (VOIDmode, cond, const0_rtx); | |
20699 | else | |
20700 | cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx); | |
20701 | ||
20702 | rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx, | |
20703 | lab_ref, pc_rtx); | |
20704 | rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse)); | |
20705 | JUMP_LABEL (j) = dst_label; | |
20706 | LABEL_NUSES (dst_label) += 1; | |
20707 | ||
20708 | if (remain > 0 || equality_compare_rest) | |
20709 | { | |
20710 | /* Generate a cmpb to test for a 0 byte and branch | |
20711 | to final result if found. */ | |
20712 | rtx cmpb_zero = gen_reg_rtx (word_mode); | |
20713 | rtx lab_ref_fin = gen_rtx_LABEL_REF (VOIDmode, final_move_label); | |
20714 | rtx condz = gen_reg_rtx (CCmode); | |
20715 | rtx zero_reg = gen_reg_rtx (word_mode); | |
20716 | if (word_mode == SImode) | |
20717 | { | |
20718 | emit_insn (gen_movsi (zero_reg, GEN_INT (0))); | |
20719 | emit_insn (gen_cmpbsi3 (cmpb_zero, tmp_reg_src1, zero_reg)); | |
20720 | if (cmp_bytes < word_mode_size) | |
20721 | { | |
20722 | /* Don't want to look at zero bytes past end. */ | |
20723 | HOST_WIDE_INT mb = | |
20724 | BITS_PER_UNIT * (word_mode_size - cmp_bytes); | |
20725 | rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb); | |
20726 | emit_insn (gen_andsi3_mask (cmpb_zero, cmpb_zero, mask)); | |
20727 | } | |
20728 | } | |
20729 | else | |
20730 | { | |
20731 | emit_insn (gen_movdi (zero_reg, GEN_INT (0))); | |
20732 | emit_insn (gen_cmpbdi3 (cmpb_zero, tmp_reg_src1, zero_reg)); | |
20733 | if (cmp_bytes < word_mode_size) | |
20734 | { | |
20735 | /* Don't want to look at zero bytes past end. */ | |
20736 | HOST_WIDE_INT mb = | |
20737 | BITS_PER_UNIT * (word_mode_size - cmp_bytes); | |
20738 | rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb); | |
20739 | emit_insn (gen_anddi3_mask (cmpb_zero, cmpb_zero, mask)); | |
20740 | } | |
20741 | } | |
20742 | ||
20743 | emit_move_insn (condz, gen_rtx_COMPARE (CCmode, cmpb_zero, zero_reg)); | |
20744 | rtx cmpnz_rtx = gen_rtx_NE (VOIDmode, condz, const0_rtx); | |
20745 | rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmpnz_rtx, | |
20746 | lab_ref_fin, pc_rtx); | |
20747 | rtx j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse)); | |
20748 | JUMP_LABEL (j2) = final_move_label; | |
20749 | LABEL_NUSES (final_move_label) += 1; | |
20750 | ||
20751 | } | |
20752 | ||
20753 | offset += cmp_bytes; | |
20754 | bytes_to_compare -= cmp_bytes; | |
20755 | } | |
20756 | ||
20757 | if (equality_compare_rest) | |
20758 | { | |
20759 | /* Update pointers past what has been compared already. */ | |
20760 | src1 = adjust_address (orig_src1, load_mode, offset); | |
20761 | src2 = adjust_address (orig_src2, load_mode, offset); | |
20762 | ||
20763 | if (!REG_P (XEXP (src1, 0))) | |
20764 | { | |
20765 | rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0)); | |
20766 | src1 = replace_equiv_address (src1, src1_reg); | |
20767 | } | |
20768 | set_mem_size (src1, cmp_bytes); | |
20769 | ||
20770 | if (!REG_P (XEXP (src2, 0))) | |
20771 | { | |
20772 | rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0)); | |
20773 | src2 = replace_equiv_address (src2, src2_reg); | |
20774 | } | |
20775 | set_mem_size (src2, cmp_bytes); | |
20776 | ||
20777 | /* Construct call to strcmp/strncmp to compare the rest of the string. */ | |
20778 | if (no_length) | |
20779 | { | |
20780 | tree fun = builtin_decl_explicit (BUILT_IN_STRCMP); | |
20781 | emit_library_call_value (XEXP (DECL_RTL (fun), 0), | |
9e9e5c15 | 20782 | target, LCT_NORMAL, GET_MODE (target), |
01e91138 | 20783 | force_reg (Pmode, XEXP (src1, 0)), Pmode, |
20784 | force_reg (Pmode, XEXP (src2, 0)), Pmode); | |
20785 | } | |
20786 | else | |
20787 | { | |
20788 | rtx len_rtx; | |
20789 | if (TARGET_64BIT) | |
20790 | len_rtx = gen_reg_rtx (DImode); | |
20791 | else | |
20792 | len_rtx = gen_reg_rtx (SImode); | |
20793 | ||
20794 | emit_move_insn (len_rtx, GEN_INT (bytes - compare_length)); | |
20795 | tree fun = builtin_decl_explicit (BUILT_IN_STRNCMP); | |
20796 | emit_library_call_value (XEXP (DECL_RTL (fun), 0), | |
9e9e5c15 | 20797 | target, LCT_NORMAL, GET_MODE (target), |
01e91138 | 20798 | force_reg (Pmode, XEXP (src1, 0)), Pmode, |
20799 | force_reg (Pmode, XEXP (src2, 0)), Pmode, | |
20800 | len_rtx, GET_MODE (len_rtx)); | |
20801 | } | |
20802 | ||
20803 | rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label); | |
20804 | rtx jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref)); | |
20805 | JUMP_LABEL (jmp) = final_label; | |
20806 | LABEL_NUSES (final_label) += 1; | |
20807 | emit_barrier (); | |
20808 | } | |
20809 | ||
20810 | if (cleanup_label) | |
20811 | emit_label (cleanup_label); | |
20812 | ||
20813 | /* Generate the final sequence that identifies the differing | |
20814 | byte and generates the final result, taking into account | |
20815 | zero bytes: | |
20816 | ||
20817 | cmpb cmpb_result1, src1, src2 | |
20818 | cmpb cmpb_result2, src1, zero | |
20819 | orc cmpb_result1, cmp_result1, cmpb_result2 | |
20820 | cntlzd get bit of first zero/diff byte | |
20821 | addi convert for rldcl use | |
20822 | rldcl rldcl extract diff/zero byte | |
20823 | subf subtract for final result | |
20824 | */ | |
20825 | ||
20826 | rtx cmpb_diff = gen_reg_rtx (word_mode); | |
20827 | rtx cmpb_zero = gen_reg_rtx (word_mode); | |
20828 | rtx rot_amt = gen_reg_rtx (word_mode); | |
20829 | rtx zero_reg = gen_reg_rtx (word_mode); | |
20830 | ||
20831 | rtx rot1_1 = gen_reg_rtx (word_mode); | |
20832 | rtx rot1_2 = gen_reg_rtx (word_mode); | |
20833 | rtx rot2_1 = gen_reg_rtx (word_mode); | |
20834 | rtx rot2_2 = gen_reg_rtx (word_mode); | |
20835 | ||
20836 | if (word_mode == SImode) | |
20837 | { | |
20838 | emit_insn (gen_cmpbsi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2)); | |
20839 | emit_insn (gen_movsi (zero_reg, GEN_INT (0))); | |
20840 | emit_insn (gen_cmpbsi3 (cmpb_zero, tmp_reg_src1, zero_reg)); | |
20841 | emit_insn (gen_one_cmplsi2 (cmpb_diff,cmpb_diff)); | |
20842 | emit_insn (gen_iorsi3 (cmpb_diff, cmpb_diff, cmpb_zero)); | |
20843 | emit_insn (gen_clzsi2 (rot_amt, cmpb_diff)); | |
20844 | emit_insn (gen_addsi3 (rot_amt, rot_amt, GEN_INT (8))); | |
20845 | emit_insn (gen_rotlsi3 (rot1_1, tmp_reg_src1, | |
20846 | gen_lowpart (SImode, rot_amt))); | |
20847 | emit_insn (gen_andsi3_mask (rot1_2, rot1_1, GEN_INT (0xff))); | |
20848 | emit_insn (gen_rotlsi3 (rot2_1, tmp_reg_src2, | |
20849 | gen_lowpart (SImode, rot_amt))); | |
20850 | emit_insn (gen_andsi3_mask (rot2_2, rot2_1, GEN_INT (0xff))); | |
20851 | emit_insn (gen_subsi3 (result_reg, rot1_2, rot2_2)); | |
20852 | } | |
20853 | else | |
20854 | { | |
20855 | emit_insn (gen_cmpbdi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2)); | |
20856 | emit_insn (gen_movdi (zero_reg, GEN_INT (0))); | |
20857 | emit_insn (gen_cmpbdi3 (cmpb_zero, tmp_reg_src1, zero_reg)); | |
20858 | emit_insn (gen_one_cmpldi2 (cmpb_diff,cmpb_diff)); | |
20859 | emit_insn (gen_iordi3 (cmpb_diff, cmpb_diff, cmpb_zero)); | |
20860 | emit_insn (gen_clzdi2 (rot_amt, cmpb_diff)); | |
20861 | emit_insn (gen_adddi3 (rot_amt, rot_amt, GEN_INT (8))); | |
20862 | emit_insn (gen_rotldi3 (rot1_1, tmp_reg_src1, | |
20863 | gen_lowpart (SImode, rot_amt))); | |
20864 | emit_insn (gen_anddi3_mask (rot1_2, rot1_1, GEN_INT (0xff))); | |
20865 | emit_insn (gen_rotldi3 (rot2_1, tmp_reg_src2, | |
20866 | gen_lowpart (SImode, rot_amt))); | |
20867 | emit_insn (gen_anddi3_mask (rot2_2, rot2_1, GEN_INT (0xff))); | |
20868 | emit_insn (gen_subdi3 (result_reg, rot1_2, rot2_2)); | |
20869 | } | |
20870 | ||
20871 | emit_label (final_move_label); | |
20872 | emit_insn (gen_movsi (target, | |
20873 | gen_lowpart (SImode, result_reg))); | |
20874 | emit_label (final_label); | |
20875 | return true; | |
20876 | } | |
20877 | ||
20878 | /* Expand a block move operation, and return 1 if successful. Return 0 | |
20879 | if we should let the compiler generate normal code. | |
20880 | ||
20881 | operands[0] is the destination | |
20882 | operands[1] is the source | |
20883 | operands[2] is the length | |
20884 | operands[3] is the alignment */ | |
20885 | ||
20886 | #define MAX_MOVE_REG 4 | |
20887 | ||
20888 | int | |
20889 | expand_block_move (rtx operands[]) | |
20890 | { | |
20891 | rtx orig_dest = operands[0]; | |
20892 | rtx orig_src = operands[1]; | |
20893 | rtx bytes_rtx = operands[2]; | |
20894 | rtx align_rtx = operands[3]; | |
20895 | int constp = (GET_CODE (bytes_rtx) == CONST_INT); | |
20896 | int align; | |
20897 | int bytes; | |
20898 | int offset; | |
20899 | int move_bytes; | |
20900 | rtx stores[MAX_MOVE_REG]; | |
20901 | int num_reg = 0; | |
20902 | ||
20903 | /* If this is not a fixed size move, just call memcpy */ | |
20904 | if (! constp) | |
20905 | return 0; | |
20906 | ||
20907 | /* This must be a fixed size alignment */ | |
20908 | gcc_assert (GET_CODE (align_rtx) == CONST_INT); | |
20909 | align = INTVAL (align_rtx) * BITS_PER_UNIT; | |
20910 | ||
20911 | /* Anything to move? */ | |
20912 | bytes = INTVAL (bytes_rtx); | |
20913 | if (bytes <= 0) | |
20914 | return 1; | |
20915 | ||
20916 | if (bytes > rs6000_block_move_inline_limit) | |
20917 | return 0; | |
20918 | ||
20919 | for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes) | |
20920 | { | |
20921 | union { | |
20922 | rtx (*movmemsi) (rtx, rtx, rtx, rtx); | |
20923 | rtx (*mov) (rtx, rtx); | |
20924 | } gen_func; | |
20925 | machine_mode mode = BLKmode; | |
20926 | rtx src, dest; | |
20927 | ||
20928 | /* Altivec first, since it will be faster than a string move | |
20929 | when it applies, and usually not significantly larger. */ | |
20930 | if (TARGET_ALTIVEC && bytes >= 16 && align >= 128) | |
20931 | { | |
20932 | move_bytes = 16; | |
20933 | mode = V4SImode; | |
20934 | gen_func.mov = gen_movv4si; | |
20935 | } | |
20936 | else if (TARGET_SPE && bytes >= 8 && align >= 64) | |
20937 | { | |
20938 | move_bytes = 8; | |
20939 | mode = V2SImode; | |
20940 | gen_func.mov = gen_movv2si; | |
20941 | } | |
20942 | else if (TARGET_STRING | |
20943 | && bytes > 24 /* move up to 32 bytes at a time */ | |
20944 | && ! fixed_regs[5] | |
20945 | && ! fixed_regs[6] | |
20946 | && ! fixed_regs[7] | |
20947 | && ! fixed_regs[8] | |
20948 | && ! fixed_regs[9] | |
20949 | && ! fixed_regs[10] | |
20950 | && ! fixed_regs[11] | |
20951 | && ! fixed_regs[12]) | |
20952 | { | |
20953 | move_bytes = (bytes > 32) ? 32 : bytes; | |
20954 | gen_func.movmemsi = gen_movmemsi_8reg; | |
20955 | } | |
20956 | else if (TARGET_STRING | |
20957 | && bytes > 16 /* move up to 24 bytes at a time */ | |
20958 | && ! fixed_regs[5] | |
20959 | && ! fixed_regs[6] | |
20960 | && ! fixed_regs[7] | |
20961 | && ! fixed_regs[8] | |
20962 | && ! fixed_regs[9] | |
20963 | && ! fixed_regs[10]) | |
20964 | { | |
20965 | move_bytes = (bytes > 24) ? 24 : bytes; | |
20966 | gen_func.movmemsi = gen_movmemsi_6reg; | |
20967 | } | |
20968 | else if (TARGET_STRING | |
20969 | && bytes > 8 /* move up to 16 bytes at a time */ | |
20970 | && ! fixed_regs[5] | |
20971 | && ! fixed_regs[6] | |
20972 | && ! fixed_regs[7] | |
20973 | && ! fixed_regs[8]) | |
20974 | { | |
20975 | move_bytes = (bytes > 16) ? 16 : bytes; | |
20976 | gen_func.movmemsi = gen_movmemsi_4reg; | |
20977 | } | |
20978 | else if (bytes >= 8 && TARGET_POWERPC64 | |
20979 | && (align >= 64 || !STRICT_ALIGNMENT)) | |
20980 | { | |
20981 | move_bytes = 8; | |
20982 | mode = DImode; | |
20983 | gen_func.mov = gen_movdi; | |
20984 | if (offset == 0 && align < 64) | |
20985 | { | |
20986 | rtx addr; | |
20987 | ||
20988 | /* If the address form is reg+offset with offset not a | |
20989 | multiple of four, reload into reg indirect form here | |
20990 | rather than waiting for reload. This way we get one | |
20991 | reload, not one per load and/or store. */ | |
20992 | addr = XEXP (orig_dest, 0); | |
20993 | if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM) | |
20994 | && GET_CODE (XEXP (addr, 1)) == CONST_INT | |
20995 | && (INTVAL (XEXP (addr, 1)) & 3) != 0) | |
20996 | { | |
20997 | addr = copy_addr_to_reg (addr); | |
20998 | orig_dest = replace_equiv_address (orig_dest, addr); | |
20999 | } | |
21000 | addr = XEXP (orig_src, 0); | |
21001 | if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM) | |
21002 | && GET_CODE (XEXP (addr, 1)) == CONST_INT | |
21003 | && (INTVAL (XEXP (addr, 1)) & 3) != 0) | |
21004 | { | |
21005 | addr = copy_addr_to_reg (addr); | |
21006 | orig_src = replace_equiv_address (orig_src, addr); | |
21007 | } | |
21008 | } | |
21009 | } | |
21010 | else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64) | |
21011 | { /* move up to 8 bytes at a time */ | |
21012 | move_bytes = (bytes > 8) ? 8 : bytes; | |
21013 | gen_func.movmemsi = gen_movmemsi_2reg; | |
21014 | } | |
21015 | else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT)) | |
21016 | { /* move 4 bytes */ | |
21017 | move_bytes = 4; | |
21018 | mode = SImode; | |
21019 | gen_func.mov = gen_movsi; | |
21020 | } | |
21021 | else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT)) | |
21022 | { /* move 2 bytes */ | |
21023 | move_bytes = 2; | |
21024 | mode = HImode; | |
21025 | gen_func.mov = gen_movhi; | |
21026 | } | |
21027 | else if (TARGET_STRING && bytes > 1) | |
21028 | { /* move up to 4 bytes at a time */ | |
21029 | move_bytes = (bytes > 4) ? 4 : bytes; | |
21030 | gen_func.movmemsi = gen_movmemsi_1reg; | |
21031 | } | |
21032 | else /* move 1 byte at a time */ | |
21033 | { | |
21034 | move_bytes = 1; | |
21035 | mode = QImode; | |
21036 | gen_func.mov = gen_movqi; | |
21037 | } | |
21038 | ||
21039 | src = adjust_address (orig_src, mode, offset); | |
21040 | dest = adjust_address (orig_dest, mode, offset); | |
21041 | ||
21042 | if (mode != BLKmode) | |
21043 | { | |
21044 | rtx tmp_reg = gen_reg_rtx (mode); | |
21045 | ||
21046 | emit_insn ((*gen_func.mov) (tmp_reg, src)); | |
21047 | stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg); | |
21048 | } | |
21049 | ||
21050 | if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes) | |
21051 | { | |
21052 | int i; | |
21053 | for (i = 0; i < num_reg; i++) | |
21054 | emit_insn (stores[i]); | |
21055 | num_reg = 0; | |
21056 | } | |
21057 | ||
21058 | if (mode == BLKmode) | |
21059 | { | |
21060 | /* Move the address into scratch registers. The movmemsi | |
21061 | patterns require zero offset. */ | |
21062 | if (!REG_P (XEXP (src, 0))) | |
21063 | { | |
21064 | rtx src_reg = copy_addr_to_reg (XEXP (src, 0)); | |
21065 | src = replace_equiv_address (src, src_reg); | |
21066 | } | |
21067 | set_mem_size (src, move_bytes); | |
21068 | ||
21069 | if (!REG_P (XEXP (dest, 0))) | |
21070 | { | |
21071 | rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0)); | |
21072 | dest = replace_equiv_address (dest, dest_reg); | |
21073 | } | |
21074 | set_mem_size (dest, move_bytes); | |
21075 | ||
21076 | emit_insn ((*gen_func.movmemsi) (dest, src, | |
21077 | GEN_INT (move_bytes & 31), | |
21078 | align_rtx)); | |
21079 | } | |
21080 | } | |
21081 | ||
21082 | return 1; | |
21083 | } | |
21084 | ||
21085 | \f | |
21086 | /* Return a string to perform a load_multiple operation. | |
21087 | operands[0] is the vector. | |
21088 | operands[1] is the source address. | |
21089 | operands[2] is the first destination register. */ | |
21090 | ||
21091 | const char * | |
21092 | rs6000_output_load_multiple (rtx operands[3]) | |
21093 | { | |
21094 | /* We have to handle the case where the pseudo used to contain the address | |
21095 | is assigned to one of the output registers. */ | |
21096 | int i, j; | |
21097 | int words = XVECLEN (operands[0], 0); | |
21098 | rtx xop[10]; | |
21099 | ||
21100 | if (XVECLEN (operands[0], 0) == 1) | |
21101 | return "lwz %2,0(%1)"; | |
21102 | ||
21103 | for (i = 0; i < words; i++) | |
21104 | if (refers_to_regno_p (REGNO (operands[2]) + i, operands[1])) | |
21105 | { | |
21106 | if (i == words-1) | |
21107 | { | |
21108 | xop[0] = GEN_INT (4 * (words-1)); | |
21109 | xop[1] = operands[1]; | |
21110 | xop[2] = operands[2]; | |
21111 | output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop); | |
21112 | return ""; | |
21113 | } | |
21114 | else if (i == 0) | |
21115 | { | |
21116 | xop[0] = GEN_INT (4 * (words-1)); | |
21117 | xop[1] = operands[1]; | |
21118 | xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1); | |
21119 | output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop); | |
21120 | return ""; | |
21121 | } | |
21122 | else | |
21123 | { | |
21124 | for (j = 0; j < words; j++) | |
21125 | if (j != i) | |
21126 | { | |
21127 | xop[0] = GEN_INT (j * 4); | |
21128 | xop[1] = operands[1]; | |
21129 | xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j); | |
21130 | output_asm_insn ("lwz %2,%0(%1)", xop); | |
21131 | } | |
21132 | xop[0] = GEN_INT (i * 4); | |
21133 | xop[1] = operands[1]; | |
21134 | output_asm_insn ("lwz %1,%0(%1)", xop); | |
21135 | return ""; | |
21136 | } | |
21137 | } | |
21138 | ||
21139 | return "lswi %2,%1,%N0"; | |
21140 | } | |
21141 | ||
21142 | \f | |
21143 | /* A validation routine: say whether CODE, a condition code, and MODE | |
21144 | match. The other alternatives either don't make sense or should | |
21145 | never be generated. */ | |
21146 | ||
21147 | void | |
21148 | validate_condition_mode (enum rtx_code code, machine_mode mode) | |
21149 | { | |
21150 | gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE | |
21151 | || GET_RTX_CLASS (code) == RTX_COMM_COMPARE) | |
21152 | && GET_MODE_CLASS (mode) == MODE_CC); | |
21153 | ||
21154 | /* These don't make sense. */ | |
21155 | gcc_assert ((code != GT && code != LT && code != GE && code != LE) | |
21156 | || mode != CCUNSmode); | |
21157 | ||
21158 | gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU) | |
21159 | || mode == CCUNSmode); | |
21160 | ||
21161 | gcc_assert (mode == CCFPmode | |
21162 | || (code != ORDERED && code != UNORDERED | |
21163 | && code != UNEQ && code != LTGT | |
21164 | && code != UNGT && code != UNLT | |
21165 | && code != UNGE && code != UNLE)); | |
21166 | ||
21167 | /* These should never be generated except for | |
21168 | flag_finite_math_only. */ | |
21169 | gcc_assert (mode != CCFPmode | |
21170 | || flag_finite_math_only | |
21171 | || (code != LE && code != GE | |
21172 | && code != UNEQ && code != LTGT | |
21173 | && code != UNGT && code != UNLT)); | |
21174 | ||
21175 | /* These are invalid; the information is not there. */ | |
21176 | gcc_assert (mode != CCEQmode || code == EQ || code == NE); | |
21177 | } | |
21178 | ||
21179 | \f | |
21180 | /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, | |
21181 | rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is | |
21182 | not zero, store there the bit offset (counted from the right) where | |
21183 | the single stretch of 1 bits begins; and similarly for B, the bit | |
21184 | offset where it ends. */ | |
21185 | ||
21186 | bool | |
21187 | rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode) | |
21188 | { | |
21189 | unsigned HOST_WIDE_INT val = INTVAL (mask); | |
21190 | unsigned HOST_WIDE_INT bit; | |
21191 | int nb, ne; | |
21192 | int n = GET_MODE_PRECISION (mode); | |
21193 | ||
21194 | if (mode != DImode && mode != SImode) | |
21195 | return false; | |
21196 | ||
21197 | if (INTVAL (mask) >= 0) | |
21198 | { | |
21199 | bit = val & -val; | |
21200 | ne = exact_log2 (bit); | |
21201 | nb = exact_log2 (val + bit); | |
21202 | } | |
21203 | else if (val + 1 == 0) | |
21204 | { | |
21205 | nb = n; | |
21206 | ne = 0; | |
21207 | } | |
21208 | else if (val & 1) | |
21209 | { | |
21210 | val = ~val; | |
21211 | bit = val & -val; | |
21212 | nb = exact_log2 (bit); | |
21213 | ne = exact_log2 (val + bit); | |
21214 | } | |
21215 | else | |
21216 | { | |
21217 | bit = val & -val; | |
21218 | ne = exact_log2 (bit); | |
21219 | if (val + bit == 0) | |
21220 | nb = n; | |
21221 | else | |
21222 | nb = 0; | |
21223 | } | |
21224 | ||
21225 | nb--; | |
21226 | ||
21227 | if (nb < 0 || ne < 0 || nb >= n || ne >= n) | |
21228 | return false; | |
21229 | ||
21230 | if (b) | |
21231 | *b = nb; | |
21232 | if (e) | |
21233 | *e = ne; | |
21234 | ||
21235 | return true; | |
21236 | } | |
21237 | ||
21238 | /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl, | |
21239 | or rldicr instruction, to implement an AND with it in mode MODE. */ | |
21240 | ||
21241 | bool | |
21242 | rs6000_is_valid_and_mask (rtx mask, machine_mode mode) | |
21243 | { | |
21244 | int nb, ne; | |
21245 | ||
21246 | if (!rs6000_is_valid_mask (mask, &nb, &ne, mode)) | |
21247 | return false; | |
21248 | ||
21249 | /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that | |
21250 | does not wrap. */ | |
21251 | if (mode == DImode) | |
21252 | return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb)); | |
21253 | ||
21254 | /* For SImode, rlwinm can do everything. */ | |
21255 | if (mode == SImode) | |
21256 | return (nb < 32 && ne < 32); | |
21257 | ||
21258 | return false; | |
21259 | } | |
21260 | ||
21261 | /* Return the instruction template for an AND with mask in mode MODE, with | |
21262 | operands OPERANDS. If DOT is true, make it a record-form instruction. */ | |
21263 | ||
21264 | const char * | |
21265 | rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot) | |
21266 | { | |
21267 | int nb, ne; | |
21268 | ||
21269 | if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode)) | |
21270 | gcc_unreachable (); | |
21271 | ||
21272 | if (mode == DImode && ne == 0) | |
21273 | { | |
21274 | operands[3] = GEN_INT (63 - nb); | |
21275 | if (dot) | |
21276 | return "rldicl. %0,%1,0,%3"; | |
21277 | return "rldicl %0,%1,0,%3"; | |
21278 | } | |
21279 | ||
21280 | if (mode == DImode && nb == 63) | |
21281 | { | |
21282 | operands[3] = GEN_INT (63 - ne); | |
21283 | if (dot) | |
21284 | return "rldicr. %0,%1,0,%3"; | |
21285 | return "rldicr %0,%1,0,%3"; | |
21286 | } | |
21287 | ||
21288 | if (nb < 32 && ne < 32) | |
21289 | { | |
21290 | operands[3] = GEN_INT (31 - nb); | |
21291 | operands[4] = GEN_INT (31 - ne); | |
21292 | if (dot) | |
21293 | return "rlwinm. %0,%1,0,%3,%4"; | |
21294 | return "rlwinm %0,%1,0,%3,%4"; | |
21295 | } | |
21296 | ||
21297 | gcc_unreachable (); | |
21298 | } | |
21299 | ||
21300 | /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm, | |
21301 | rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with | |
21302 | shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */ | |
21303 | ||
21304 | bool | |
21305 | rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode) | |
21306 | { | |
21307 | int nb, ne; | |
21308 | ||
21309 | if (!rs6000_is_valid_mask (mask, &nb, &ne, mode)) | |
21310 | return false; | |
21311 | ||
21312 | int n = GET_MODE_PRECISION (mode); | |
21313 | int sh = -1; | |
21314 | ||
21315 | if (CONST_INT_P (XEXP (shift, 1))) | |
21316 | { | |
21317 | sh = INTVAL (XEXP (shift, 1)); | |
21318 | if (sh < 0 || sh >= n) | |
21319 | return false; | |
21320 | } | |
21321 | ||
21322 | rtx_code code = GET_CODE (shift); | |
21323 | ||
21324 | /* Convert any shift by 0 to a rotate, to simplify below code. */ | |
21325 | if (sh == 0) | |
21326 | code = ROTATE; | |
21327 | ||
21328 | /* Convert rotate to simple shift if we can, to make analysis simpler. */ | |
21329 | if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh) | |
21330 | code = ASHIFT; | |
21331 | if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh) | |
21332 | { | |
21333 | code = LSHIFTRT; | |
21334 | sh = n - sh; | |
21335 | } | |
21336 | ||
21337 | /* DImode rotates need rld*. */ | |
21338 | if (mode == DImode && code == ROTATE) | |
21339 | return (nb == 63 || ne == 0 || ne == sh); | |
21340 | ||
21341 | /* SImode rotates need rlw*. */ | |
21342 | if (mode == SImode && code == ROTATE) | |
21343 | return (nb < 32 && ne < 32 && sh < 32); | |
21344 | ||
21345 | /* Wrap-around masks are only okay for rotates. */ | |
21346 | if (ne > nb) | |
21347 | return false; | |
21348 | ||
21349 | /* Variable shifts are only okay for rotates. */ | |
21350 | if (sh < 0) | |
21351 | return false; | |
21352 | ||
21353 | /* Don't allow ASHIFT if the mask is wrong for that. */ | |
21354 | if (code == ASHIFT && ne < sh) | |
21355 | return false; | |
21356 | ||
21357 | /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT | |
21358 | if the mask is wrong for that. */ | |
21359 | if (nb < 32 && ne < 32 && sh < 32 | |
21360 | && !(code == LSHIFTRT && nb >= 32 - sh)) | |
21361 | return true; | |
21362 | ||
21363 | /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT | |
21364 | if the mask is wrong for that. */ | |
21365 | if (code == LSHIFTRT) | |
21366 | sh = 64 - sh; | |
21367 | if (nb == 63 || ne == 0 || ne == sh) | |
21368 | return !(code == LSHIFTRT && nb >= sh); | |
21369 | ||
21370 | return false; | |
21371 | } | |
21372 | ||
21373 | /* Return the instruction template for a shift with mask in mode MODE, with | |
21374 | operands OPERANDS. If DOT is true, make it a record-form instruction. */ | |
21375 | ||
21376 | const char * | |
21377 | rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot) | |
21378 | { | |
21379 | int nb, ne; | |
21380 | ||
21381 | if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode)) | |
21382 | gcc_unreachable (); | |
21383 | ||
21384 | if (mode == DImode && ne == 0) | |
21385 | { | |
21386 | if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2])) | |
21387 | operands[2] = GEN_INT (64 - INTVAL (operands[2])); | |
21388 | operands[3] = GEN_INT (63 - nb); | |
21389 | if (dot) | |
21390 | return "rld%I2cl. %0,%1,%2,%3"; | |
21391 | return "rld%I2cl %0,%1,%2,%3"; | |
21392 | } | |
21393 | ||
21394 | if (mode == DImode && nb == 63) | |
21395 | { | |
21396 | operands[3] = GEN_INT (63 - ne); | |
21397 | if (dot) | |
21398 | return "rld%I2cr. %0,%1,%2,%3"; | |
21399 | return "rld%I2cr %0,%1,%2,%3"; | |
21400 | } | |
21401 | ||
21402 | if (mode == DImode | |
21403 | && GET_CODE (operands[4]) != LSHIFTRT | |
21404 | && CONST_INT_P (operands[2]) | |
21405 | && ne == INTVAL (operands[2])) | |
21406 | { | |
21407 | operands[3] = GEN_INT (63 - nb); | |
21408 | if (dot) | |
21409 | return "rld%I2c. %0,%1,%2,%3"; | |
21410 | return "rld%I2c %0,%1,%2,%3"; | |
21411 | } | |
21412 | ||
21413 | if (nb < 32 && ne < 32) | |
21414 | { | |
21415 | if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2])) | |
21416 | operands[2] = GEN_INT (32 - INTVAL (operands[2])); | |
21417 | operands[3] = GEN_INT (31 - nb); | |
21418 | operands[4] = GEN_INT (31 - ne); | |
21419 | /* This insn can also be a 64-bit rotate with mask that really makes | |
21420 | it just a shift right (with mask); the %h below are to adjust for | |
21421 | that situation (shift count is >= 32 in that case). */ | |
21422 | if (dot) | |
21423 | return "rlw%I2nm. %0,%1,%h2,%3,%4"; | |
21424 | return "rlw%I2nm %0,%1,%h2,%3,%4"; | |
21425 | } | |
21426 | ||
21427 | gcc_unreachable (); | |
21428 | } | |
21429 | ||
21430 | /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or | |
21431 | rldimi instruction, to implement an insert with shift SHIFT (a ROTATE, | |
21432 | ASHIFT, or LSHIFTRT) in mode MODE. */ | |
21433 | ||
21434 | bool | |
21435 | rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode) | |
21436 | { | |
21437 | int nb, ne; | |
21438 | ||
21439 | if (!rs6000_is_valid_mask (mask, &nb, &ne, mode)) | |
21440 | return false; | |
21441 | ||
21442 | int n = GET_MODE_PRECISION (mode); | |
21443 | ||
21444 | int sh = INTVAL (XEXP (shift, 1)); | |
21445 | if (sh < 0 || sh >= n) | |
21446 | return false; | |
21447 | ||
21448 | rtx_code code = GET_CODE (shift); | |
21449 | ||
21450 | /* Convert any shift by 0 to a rotate, to simplify below code. */ | |
21451 | if (sh == 0) | |
21452 | code = ROTATE; | |
21453 | ||
21454 | /* Convert rotate to simple shift if we can, to make analysis simpler. */ | |
21455 | if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh) | |
21456 | code = ASHIFT; | |
21457 | if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh) | |
21458 | { | |
21459 | code = LSHIFTRT; | |
21460 | sh = n - sh; | |
21461 | } | |
21462 | ||
21463 | /* DImode rotates need rldimi. */ | |
21464 | if (mode == DImode && code == ROTATE) | |
21465 | return (ne == sh); | |
21466 | ||
21467 | /* SImode rotates need rlwimi. */ | |
21468 | if (mode == SImode && code == ROTATE) | |
21469 | return (nb < 32 && ne < 32 && sh < 32); | |
21470 | ||
21471 | /* Wrap-around masks are only okay for rotates. */ | |
21472 | if (ne > nb) | |
21473 | return false; | |
21474 | ||
21475 | /* Don't allow ASHIFT if the mask is wrong for that. */ | |
21476 | if (code == ASHIFT && ne < sh) | |
21477 | return false; | |
21478 | ||
21479 | /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT | |
21480 | if the mask is wrong for that. */ | |
21481 | if (nb < 32 && ne < 32 && sh < 32 | |
21482 | && !(code == LSHIFTRT && nb >= 32 - sh)) | |
21483 | return true; | |
21484 | ||
21485 | /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT | |
21486 | if the mask is wrong for that. */ | |
21487 | if (code == LSHIFTRT) | |
21488 | sh = 64 - sh; | |
21489 | if (ne == sh) | |
21490 | return !(code == LSHIFTRT && nb >= sh); | |
21491 | ||
21492 | return false; | |
21493 | } | |
21494 | ||
21495 | /* Return the instruction template for an insert with mask in mode MODE, with | |
21496 | operands OPERANDS. If DOT is true, make it a record-form instruction. */ | |
21497 | ||
21498 | const char * | |
21499 | rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot) | |
21500 | { | |
21501 | int nb, ne; | |
21502 | ||
21503 | if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode)) | |
21504 | gcc_unreachable (); | |
21505 | ||
21506 | /* Prefer rldimi because rlwimi is cracked. */ | |
21507 | if (TARGET_POWERPC64 | |
21508 | && (!dot || mode == DImode) | |
21509 | && GET_CODE (operands[4]) != LSHIFTRT | |
21510 | && ne == INTVAL (operands[2])) | |
21511 | { | |
21512 | operands[3] = GEN_INT (63 - nb); | |
21513 | if (dot) | |
21514 | return "rldimi. %0,%1,%2,%3"; | |
21515 | return "rldimi %0,%1,%2,%3"; | |
21516 | } | |
21517 | ||
21518 | if (nb < 32 && ne < 32) | |
21519 | { | |
21520 | if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2])) | |
21521 | operands[2] = GEN_INT (32 - INTVAL (operands[2])); | |
21522 | operands[3] = GEN_INT (31 - nb); | |
21523 | operands[4] = GEN_INT (31 - ne); | |
21524 | if (dot) | |
21525 | return "rlwimi. %0,%1,%2,%3,%4"; | |
21526 | return "rlwimi %0,%1,%2,%3,%4"; | |
21527 | } | |
21528 | ||
21529 | gcc_unreachable (); | |
21530 | } | |
21531 | ||
21532 | /* Return whether an AND with C (a CONST_INT) in mode MODE can be done | |
21533 | using two machine instructions. */ | |
21534 | ||
21535 | bool | |
21536 | rs6000_is_valid_2insn_and (rtx c, machine_mode mode) | |
21537 | { | |
21538 | /* There are two kinds of AND we can handle with two insns: | |
21539 | 1) those we can do with two rl* insn; | |
21540 | 2) ori[s];xori[s]. | |
21541 | ||
21542 | We do not handle that last case yet. */ | |
21543 | ||
21544 | /* If there is just one stretch of ones, we can do it. */ | |
21545 | if (rs6000_is_valid_mask (c, NULL, NULL, mode)) | |
21546 | return true; | |
21547 | ||
21548 | /* Otherwise, fill in the lowest "hole"; if we can do the result with | |
21549 | one insn, we can do the whole thing with two. */ | |
21550 | unsigned HOST_WIDE_INT val = INTVAL (c); | |
21551 | unsigned HOST_WIDE_INT bit1 = val & -val; | |
21552 | unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val; | |
21553 | unsigned HOST_WIDE_INT val1 = (val + bit1) & val; | |
21554 | unsigned HOST_WIDE_INT bit3 = val1 & -val1; | |
21555 | return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode); | |
21556 | } | |
21557 | ||
21558 | /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS. | |
21559 | If EXPAND is true, split rotate-and-mask instructions we generate to | |
21560 | their constituent parts as well (this is used during expand); if DOT | |
21561 | is 1, make the last insn a record-form instruction clobbering the | |
21562 | destination GPR and setting the CC reg (from operands[3]); if 2, set | |
21563 | that GPR as well as the CC reg. */ | |
21564 | ||
21565 | void | |
21566 | rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot) | |
21567 | { | |
21568 | gcc_assert (!(expand && dot)); | |
21569 | ||
21570 | unsigned HOST_WIDE_INT val = INTVAL (operands[2]); | |
21571 | ||
21572 | /* If it is one stretch of ones, it is DImode; shift left, mask, then | |
21573 | shift right. This generates better code than doing the masks without | |
21574 | shifts, or shifting first right and then left. */ | |
21575 | int nb, ne; | |
21576 | if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne) | |
21577 | { | |
21578 | gcc_assert (mode == DImode); | |
21579 | ||
21580 | int shift = 63 - nb; | |
21581 | if (expand) | |
21582 | { | |
21583 | rtx tmp1 = gen_reg_rtx (DImode); | |
21584 | rtx tmp2 = gen_reg_rtx (DImode); | |
21585 | emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift))); | |
21586 | emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift))); | |
21587 | emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift))); | |
21588 | } | |
21589 | else | |
21590 | { | |
21591 | rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift)); | |
21592 | tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift)); | |
21593 | emit_move_insn (operands[0], tmp); | |
21594 | tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift)); | |
21595 | rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0); | |
21596 | } | |
21597 | return; | |
21598 | } | |
21599 | ||
21600 | /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1 | |
21601 | that does the rest. */ | |
21602 | unsigned HOST_WIDE_INT bit1 = val & -val; | |
21603 | unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val; | |
21604 | unsigned HOST_WIDE_INT val1 = (val + bit1) & val; | |
21605 | unsigned HOST_WIDE_INT bit3 = val1 & -val1; | |
21606 | ||
21607 | unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1; | |
21608 | unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2; | |
21609 | ||
21610 | gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode)); | |
21611 | ||
21612 | /* Two "no-rotate"-and-mask instructions, for SImode. */ | |
21613 | if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode)) | |
21614 | { | |
21615 | gcc_assert (mode == SImode); | |
21616 | ||
21617 | rtx reg = expand ? gen_reg_rtx (mode) : operands[0]; | |
21618 | rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1)); | |
21619 | emit_move_insn (reg, tmp); | |
21620 | tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2)); | |
21621 | rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0); | |
21622 | return; | |
21623 | } | |
21624 | ||
21625 | gcc_assert (mode == DImode); | |
21626 | ||
21627 | /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm | |
21628 | insns; we have to do the first in SImode, because it wraps. */ | |
21629 | if (mask2 <= 0xffffffff | |
21630 | && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode)) | |
21631 | { | |
21632 | rtx reg = expand ? gen_reg_rtx (mode) : operands[0]; | |
21633 | rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]), | |
21634 | GEN_INT (mask1)); | |
21635 | rtx reg_low = gen_lowpart (SImode, reg); | |
21636 | emit_move_insn (reg_low, tmp); | |
21637 | tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2)); | |
21638 | rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0); | |
21639 | return; | |
21640 | } | |
21641 | ||
21642 | /* Two rld* insns: rotate, clear the hole in the middle (which now is | |
21643 | at the top end), rotate back and clear the other hole. */ | |
21644 | int right = exact_log2 (bit3); | |
21645 | int left = 64 - right; | |
21646 | ||
21647 | /* Rotate the mask too. */ | |
21648 | mask1 = (mask1 >> right) | ((bit2 - 1) << left); | |
21649 | ||
21650 | if (expand) | |
21651 | { | |
21652 | rtx tmp1 = gen_reg_rtx (DImode); | |
21653 | rtx tmp2 = gen_reg_rtx (DImode); | |
21654 | rtx tmp3 = gen_reg_rtx (DImode); | |
21655 | emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left))); | |
21656 | emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1))); | |
21657 | emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right))); | |
21658 | emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2))); | |
21659 | } | |
21660 | else | |
21661 | { | |
21662 | rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left)); | |
21663 | tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1)); | |
21664 | emit_move_insn (operands[0], tmp); | |
21665 | tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right)); | |
21666 | tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2)); | |
21667 | rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0); | |
21668 | } | |
21669 | } | |
21670 | \f | |
21671 | /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates | |
21672 | for lfq and stfq insns iff the registers are hard registers. */ | |
21673 | ||
21674 | int | |
21675 | registers_ok_for_quad_peep (rtx reg1, rtx reg2) | |
21676 | { | |
21677 | /* We might have been passed a SUBREG. */ | |
21678 | if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG) | |
21679 | return 0; | |
21680 | ||
21681 | /* We might have been passed non floating point registers. */ | |
21682 | if (!FP_REGNO_P (REGNO (reg1)) | |
21683 | || !FP_REGNO_P (REGNO (reg2))) | |
21684 | return 0; | |
21685 | ||
21686 | return (REGNO (reg1) == REGNO (reg2) - 1); | |
21687 | } | |
21688 | ||
21689 | /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn. | |
21690 | addr1 and addr2 must be in consecutive memory locations | |
21691 | (addr2 == addr1 + 8). */ | |
21692 | ||
21693 | int | |
21694 | mems_ok_for_quad_peep (rtx mem1, rtx mem2) | |
21695 | { | |
21696 | rtx addr1, addr2; | |
21697 | unsigned int reg1, reg2; | |
21698 | int offset1, offset2; | |
21699 | ||
21700 | /* The mems cannot be volatile. */ | |
21701 | if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2)) | |
21702 | return 0; | |
21703 | ||
21704 | addr1 = XEXP (mem1, 0); | |
21705 | addr2 = XEXP (mem2, 0); | |
21706 | ||
21707 | /* Extract an offset (if used) from the first addr. */ | |
21708 | if (GET_CODE (addr1) == PLUS) | |
21709 | { | |
21710 | /* If not a REG, return zero. */ | |
21711 | if (GET_CODE (XEXP (addr1, 0)) != REG) | |
21712 | return 0; | |
21713 | else | |
21714 | { | |
21715 | reg1 = REGNO (XEXP (addr1, 0)); | |
21716 | /* The offset must be constant! */ | |
21717 | if (GET_CODE (XEXP (addr1, 1)) != CONST_INT) | |
21718 | return 0; | |
21719 | offset1 = INTVAL (XEXP (addr1, 1)); | |
21720 | } | |
21721 | } | |
21722 | else if (GET_CODE (addr1) != REG) | |
21723 | return 0; | |
21724 | else | |
21725 | { | |
21726 | reg1 = REGNO (addr1); | |
21727 | /* This was a simple (mem (reg)) expression. Offset is 0. */ | |
21728 | offset1 = 0; | |
21729 | } | |
21730 | ||
21731 | /* And now for the second addr. */ | |
21732 | if (GET_CODE (addr2) == PLUS) | |
21733 | { | |
21734 | /* If not a REG, return zero. */ | |
21735 | if (GET_CODE (XEXP (addr2, 0)) != REG) | |
21736 | return 0; | |
21737 | else | |
21738 | { | |
21739 | reg2 = REGNO (XEXP (addr2, 0)); | |
21740 | /* The offset must be constant. */ | |
21741 | if (GET_CODE (XEXP (addr2, 1)) != CONST_INT) | |
21742 | return 0; | |
21743 | offset2 = INTVAL (XEXP (addr2, 1)); | |
21744 | } | |
21745 | } | |
21746 | else if (GET_CODE (addr2) != REG) | |
21747 | return 0; | |
21748 | else | |
21749 | { | |
21750 | reg2 = REGNO (addr2); | |
21751 | /* This was a simple (mem (reg)) expression. Offset is 0. */ | |
21752 | offset2 = 0; | |
21753 | } | |
21754 | ||
21755 | /* Both of these must have the same base register. */ | |
21756 | if (reg1 != reg2) | |
21757 | return 0; | |
21758 | ||
21759 | /* The offset for the second addr must be 8 more than the first addr. */ | |
21760 | if (offset2 != offset1 + 8) | |
21761 | return 0; | |
21762 | ||
21763 | /* All the tests passed. addr1 and addr2 are valid for lfq or stfq | |
21764 | instructions. */ | |
21765 | return 1; | |
21766 | } | |
21767 | \f | |
21768 | ||
21769 | rtx | |
21770 | rs6000_secondary_memory_needed_rtx (machine_mode mode) | |
21771 | { | |
21772 | static bool eliminated = false; | |
21773 | rtx ret; | |
21774 | ||
21775 | if (mode != SDmode || TARGET_NO_SDMODE_STACK) | |
21776 | ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0); | |
21777 | else | |
21778 | { | |
21779 | rtx mem = cfun->machine->sdmode_stack_slot; | |
21780 | gcc_assert (mem != NULL_RTX); | |
21781 | ||
21782 | if (!eliminated) | |
21783 | { | |
21784 | mem = eliminate_regs (mem, VOIDmode, NULL_RTX); | |
21785 | cfun->machine->sdmode_stack_slot = mem; | |
21786 | eliminated = true; | |
21787 | } | |
21788 | ret = mem; | |
21789 | } | |
21790 | ||
21791 | if (TARGET_DEBUG_ADDR) | |
21792 | { | |
21793 | fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n", | |
21794 | GET_MODE_NAME (mode)); | |
21795 | if (!ret) | |
21796 | fprintf (stderr, "\tNULL_RTX\n"); | |
21797 | else | |
21798 | debug_rtx (ret); | |
21799 | } | |
21800 | ||
21801 | return ret; | |
21802 | } | |
21803 | ||
21804 | /* Return the mode to be used for memory when a secondary memory | |
21805 | location is needed. For SDmode values we need to use DDmode, in | |
21806 | all other cases we can use the same mode. */ | |
21807 | machine_mode | |
21808 | rs6000_secondary_memory_needed_mode (machine_mode mode) | |
21809 | { | |
21810 | if (lra_in_progress && mode == SDmode) | |
21811 | return DDmode; | |
21812 | return mode; | |
21813 | } | |
21814 | ||
21815 | static tree | |
21816 | rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED) | |
21817 | { | |
21818 | /* Don't walk into types. */ | |
21819 | if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp)) | |
21820 | { | |
21821 | *walk_subtrees = 0; | |
21822 | return NULL_TREE; | |
21823 | } | |
21824 | ||
21825 | switch (TREE_CODE (*tp)) | |
21826 | { | |
21827 | case VAR_DECL: | |
21828 | case PARM_DECL: | |
21829 | case FIELD_DECL: | |
21830 | case RESULT_DECL: | |
21831 | case SSA_NAME: | |
21832 | case REAL_CST: | |
21833 | case MEM_REF: | |
21834 | case VIEW_CONVERT_EXPR: | |
21835 | if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode) | |
21836 | return *tp; | |
21837 | break; | |
21838 | default: | |
21839 | break; | |
21840 | } | |
21841 | ||
21842 | return NULL_TREE; | |
21843 | } | |
21844 | ||
21845 | /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work | |
21846 | on traditional floating point registers, and the VMRGOW/VMRGEW instructions | |
21847 | only work on the traditional altivec registers, note if an altivec register | |
21848 | was chosen. */ | |
21849 | ||
21850 | static enum rs6000_reg_type | |
21851 | register_to_reg_type (rtx reg, bool *is_altivec) | |
21852 | { | |
21853 | HOST_WIDE_INT regno; | |
21854 | enum reg_class rclass; | |
21855 | ||
21856 | if (GET_CODE (reg) == SUBREG) | |
21857 | reg = SUBREG_REG (reg); | |
21858 | ||
21859 | if (!REG_P (reg)) | |
21860 | return NO_REG_TYPE; | |
21861 | ||
21862 | regno = REGNO (reg); | |
21863 | if (regno >= FIRST_PSEUDO_REGISTER) | |
21864 | { | |
21865 | if (!lra_in_progress && !reload_in_progress && !reload_completed) | |
21866 | return PSEUDO_REG_TYPE; | |
21867 | ||
21868 | regno = true_regnum (reg); | |
21869 | if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER) | |
21870 | return PSEUDO_REG_TYPE; | |
21871 | } | |
21872 | ||
21873 | gcc_assert (regno >= 0); | |
21874 | ||
21875 | if (is_altivec && ALTIVEC_REGNO_P (regno)) | |
21876 | *is_altivec = true; | |
21877 | ||
21878 | rclass = rs6000_regno_regclass[regno]; | |
21879 | return reg_class_to_reg_type[(int)rclass]; | |
21880 | } | |
21881 | ||
21882 | /* Helper function to return the cost of adding a TOC entry address. */ | |
21883 | ||
21884 | static inline int | |
21885 | rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask) | |
21886 | { | |
21887 | int ret; | |
21888 | ||
21889 | if (TARGET_CMODEL != CMODEL_SMALL) | |
21890 | ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2; | |
21891 | ||
21892 | else | |
21893 | ret = (TARGET_MINIMAL_TOC) ? 6 : 3; | |
21894 | ||
21895 | return ret; | |
21896 | } | |
21897 | ||
21898 | /* Helper function for rs6000_secondary_reload to determine whether the memory | |
21899 | address (ADDR) with a given register class (RCLASS) and machine mode (MODE) | |
21900 | needs reloading. Return negative if the memory is not handled by the memory | |
21901 | helper functions and to try a different reload method, 0 if no additional | |
21902 | instructions are need, and positive to give the extra cost for the | |
21903 | memory. */ | |
21904 | ||
21905 | static int | |
21906 | rs6000_secondary_reload_memory (rtx addr, | |
21907 | enum reg_class rclass, | |
21908 | machine_mode mode) | |
21909 | { | |
21910 | int extra_cost = 0; | |
21911 | rtx reg, and_arg, plus_arg0, plus_arg1; | |
21912 | addr_mask_type addr_mask; | |
21913 | const char *type = NULL; | |
21914 | const char *fail_msg = NULL; | |
21915 | ||
21916 | if (GPR_REG_CLASS_P (rclass)) | |
21917 | addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR]; | |
21918 | ||
21919 | else if (rclass == FLOAT_REGS) | |
21920 | addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR]; | |
21921 | ||
21922 | else if (rclass == ALTIVEC_REGS) | |
21923 | addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX]; | |
21924 | ||
21925 | /* For the combined VSX_REGS, turn off Altivec AND -16. */ | |
21926 | else if (rclass == VSX_REGS) | |
21927 | addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX] | |
21928 | & ~RELOAD_REG_AND_M16); | |
21929 | ||
21930 | /* If the register allocator hasn't made up its mind yet on the register | |
21931 | class to use, settle on defaults to use. */ | |
21932 | else if (rclass == NO_REGS) | |
21933 | { | |
21934 | addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY] | |
21935 | & ~RELOAD_REG_AND_M16); | |
21936 | ||
21937 | if ((addr_mask & RELOAD_REG_MULTIPLE) != 0) | |
21938 | addr_mask &= ~(RELOAD_REG_INDEXED | |
21939 | | RELOAD_REG_PRE_INCDEC | |
21940 | | RELOAD_REG_PRE_MODIFY); | |
21941 | } | |
21942 | ||
21943 | else | |
21944 | addr_mask = 0; | |
21945 | ||
21946 | /* If the register isn't valid in this register class, just return now. */ | |
21947 | if ((addr_mask & RELOAD_REG_VALID) == 0) | |
21948 | { | |
21949 | if (TARGET_DEBUG_ADDR) | |
21950 | { | |
21951 | fprintf (stderr, | |
21952 | "rs6000_secondary_reload_memory: mode = %s, class = %s, " | |
21953 | "not valid in class\n", | |
21954 | GET_MODE_NAME (mode), reg_class_names[rclass]); | |
21955 | debug_rtx (addr); | |
21956 | } | |
21957 | ||
21958 | return -1; | |
21959 | } | |
21960 | ||
21961 | switch (GET_CODE (addr)) | |
21962 | { | |
21963 | /* Does the register class supports auto update forms for this mode? We | |
21964 | don't need a scratch register, since the powerpc only supports | |
21965 | PRE_INC, PRE_DEC, and PRE_MODIFY. */ | |
21966 | case PRE_INC: | |
21967 | case PRE_DEC: | |
21968 | reg = XEXP (addr, 0); | |
21969 | if (!base_reg_operand (addr, GET_MODE (reg))) | |
21970 | { | |
21971 | fail_msg = "no base register #1"; | |
21972 | extra_cost = -1; | |
21973 | } | |
21974 | ||
21975 | else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0) | |
21976 | { | |
21977 | extra_cost = 1; | |
21978 | type = "update"; | |
21979 | } | |
21980 | break; | |
21981 | ||
21982 | case PRE_MODIFY: | |
21983 | reg = XEXP (addr, 0); | |
21984 | plus_arg1 = XEXP (addr, 1); | |
21985 | if (!base_reg_operand (reg, GET_MODE (reg)) | |
21986 | || GET_CODE (plus_arg1) != PLUS | |
21987 | || !rtx_equal_p (reg, XEXP (plus_arg1, 0))) | |
21988 | { | |
21989 | fail_msg = "bad PRE_MODIFY"; | |
21990 | extra_cost = -1; | |
21991 | } | |
21992 | ||
21993 | else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0) | |
21994 | { | |
21995 | extra_cost = 1; | |
21996 | type = "update"; | |
21997 | } | |
21998 | break; | |
21999 | ||
22000 | /* Do we need to simulate AND -16 to clear the bottom address bits used | |
22001 | in VMX load/stores? Only allow the AND for vector sizes. */ | |
22002 | case AND: | |
22003 | and_arg = XEXP (addr, 0); | |
22004 | if (GET_MODE_SIZE (mode) != 16 | |
22005 | || GET_CODE (XEXP (addr, 1)) != CONST_INT | |
22006 | || INTVAL (XEXP (addr, 1)) != -16) | |
22007 | { | |
22008 | fail_msg = "bad Altivec AND #1"; | |
22009 | extra_cost = -1; | |
22010 | } | |
22011 | ||
22012 | if (rclass != ALTIVEC_REGS) | |
22013 | { | |
22014 | if (legitimate_indirect_address_p (and_arg, false)) | |
22015 | extra_cost = 1; | |
22016 | ||
22017 | else if (legitimate_indexed_address_p (and_arg, false)) | |
22018 | extra_cost = 2; | |
22019 | ||
22020 | else | |
22021 | { | |
22022 | fail_msg = "bad Altivec AND #2"; | |
22023 | extra_cost = -1; | |
22024 | } | |
22025 | ||
22026 | type = "and"; | |
22027 | } | |
22028 | break; | |
22029 | ||
22030 | /* If this is an indirect address, make sure it is a base register. */ | |
22031 | case REG: | |
22032 | case SUBREG: | |
22033 | if (!legitimate_indirect_address_p (addr, false)) | |
22034 | { | |
22035 | extra_cost = 1; | |
22036 | type = "move"; | |
22037 | } | |
22038 | break; | |
22039 | ||
22040 | /* If this is an indexed address, make sure the register class can handle | |
22041 | indexed addresses for this mode. */ | |
22042 | case PLUS: | |
22043 | plus_arg0 = XEXP (addr, 0); | |
22044 | plus_arg1 = XEXP (addr, 1); | |
22045 | ||
22046 | /* (plus (plus (reg) (constant)) (constant)) is generated during | |
22047 | push_reload processing, so handle it now. */ | |
22048 | if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1)) | |
22049 | { | |
22050 | if ((addr_mask & RELOAD_REG_OFFSET) == 0) | |
22051 | { | |
22052 | extra_cost = 1; | |
22053 | type = "offset"; | |
22054 | } | |
22055 | } | |
22056 | ||
22057 | /* (plus (plus (reg) (constant)) (reg)) is also generated during | |
22058 | push_reload processing, so handle it now. */ | |
22059 | else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1)) | |
22060 | { | |
22061 | if ((addr_mask & RELOAD_REG_INDEXED) == 0) | |
22062 | { | |
22063 | extra_cost = 1; | |
22064 | type = "indexed #2"; | |
22065 | } | |
22066 | } | |
22067 | ||
22068 | else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0))) | |
22069 | { | |
22070 | fail_msg = "no base register #2"; | |
22071 | extra_cost = -1; | |
22072 | } | |
22073 | ||
22074 | else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1))) | |
22075 | { | |
22076 | if ((addr_mask & RELOAD_REG_INDEXED) == 0 | |
22077 | || !legitimate_indexed_address_p (addr, false)) | |
22078 | { | |
22079 | extra_cost = 1; | |
22080 | type = "indexed"; | |
22081 | } | |
22082 | } | |
22083 | ||
22084 | else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0 | |
22085 | && CONST_INT_P (plus_arg1)) | |
22086 | { | |
22087 | if (!quad_address_offset_p (INTVAL (plus_arg1))) | |
22088 | { | |
22089 | extra_cost = 1; | |
22090 | type = "vector d-form offset"; | |
22091 | } | |
22092 | } | |
22093 | ||
22094 | /* Make sure the register class can handle offset addresses. */ | |
22095 | else if (rs6000_legitimate_offset_address_p (mode, addr, false, true)) | |
22096 | { | |
22097 | if ((addr_mask & RELOAD_REG_OFFSET) == 0) | |
22098 | { | |
22099 | extra_cost = 1; | |
22100 | type = "offset #2"; | |
22101 | } | |
22102 | } | |
22103 | ||
22104 | else | |
22105 | { | |
22106 | fail_msg = "bad PLUS"; | |
22107 | extra_cost = -1; | |
22108 | } | |
22109 | ||
22110 | break; | |
22111 | ||
22112 | case LO_SUM: | |
22113 | /* Quad offsets are restricted and can't handle normal addresses. */ | |
22114 | if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0) | |
22115 | { | |
22116 | extra_cost = -1; | |
22117 | type = "vector d-form lo_sum"; | |
22118 | } | |
22119 | ||
22120 | else if (!legitimate_lo_sum_address_p (mode, addr, false)) | |
22121 | { | |
22122 | fail_msg = "bad LO_SUM"; | |
22123 | extra_cost = -1; | |
22124 | } | |
22125 | ||
22126 | if ((addr_mask & RELOAD_REG_OFFSET) == 0) | |
22127 | { | |
22128 | extra_cost = 1; | |
22129 | type = "lo_sum"; | |
22130 | } | |
22131 | break; | |
22132 | ||
22133 | /* Static addresses need to create a TOC entry. */ | |
22134 | case CONST: | |
22135 | case SYMBOL_REF: | |
22136 | case LABEL_REF: | |
22137 | if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0) | |
22138 | { | |
22139 | extra_cost = -1; | |
22140 | type = "vector d-form lo_sum #2"; | |
22141 | } | |
22142 | ||
22143 | else | |
22144 | { | |
22145 | type = "address"; | |
22146 | extra_cost = rs6000_secondary_reload_toc_costs (addr_mask); | |
22147 | } | |
22148 | break; | |
22149 | ||
22150 | /* TOC references look like offsetable memory. */ | |
22151 | case UNSPEC: | |
22152 | if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL) | |
22153 | { | |
22154 | fail_msg = "bad UNSPEC"; | |
22155 | extra_cost = -1; | |
22156 | } | |
22157 | ||
22158 | else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0) | |
22159 | { | |
22160 | extra_cost = -1; | |
22161 | type = "vector d-form lo_sum #3"; | |
22162 | } | |
22163 | ||
22164 | else if ((addr_mask & RELOAD_REG_OFFSET) == 0) | |
22165 | { | |
22166 | extra_cost = 1; | |
22167 | type = "toc reference"; | |
22168 | } | |
22169 | break; | |
22170 | ||
22171 | default: | |
22172 | { | |
22173 | fail_msg = "bad address"; | |
22174 | extra_cost = -1; | |
22175 | } | |
22176 | } | |
22177 | ||
22178 | if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */) | |
22179 | { | |
22180 | if (extra_cost < 0) | |
22181 | fprintf (stderr, | |
22182 | "rs6000_secondary_reload_memory error: mode = %s, " | |
22183 | "class = %s, addr_mask = '%s', %s\n", | |
22184 | GET_MODE_NAME (mode), | |
22185 | reg_class_names[rclass], | |
22186 | rs6000_debug_addr_mask (addr_mask, false), | |
22187 | (fail_msg != NULL) ? fail_msg : "<bad address>"); | |
22188 | ||
22189 | else | |
22190 | fprintf (stderr, | |
22191 | "rs6000_secondary_reload_memory: mode = %s, class = %s, " | |
22192 | "addr_mask = '%s', extra cost = %d, %s\n", | |
22193 | GET_MODE_NAME (mode), | |
22194 | reg_class_names[rclass], | |
22195 | rs6000_debug_addr_mask (addr_mask, false), | |
22196 | extra_cost, | |
22197 | (type) ? type : "<none>"); | |
22198 | ||
22199 | debug_rtx (addr); | |
22200 | } | |
22201 | ||
22202 | return extra_cost; | |
22203 | } | |
22204 | ||
22205 | /* Helper function for rs6000_secondary_reload to return true if a move to a | |
22206 | different register classe is really a simple move. */ | |
22207 | ||
22208 | static bool | |
22209 | rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type, | |
22210 | enum rs6000_reg_type from_type, | |
22211 | machine_mode mode) | |
22212 | { | |
22213 | int size = GET_MODE_SIZE (mode); | |
22214 | ||
22215 | /* Add support for various direct moves available. In this function, we only | |
22216 | look at cases where we don't need any extra registers, and one or more | |
22217 | simple move insns are issued. Originally small integers are not allowed | |
22218 | in FPR/VSX registers. Single precision binary floating is not a simple | |
22219 | move because we need to convert to the single precision memory layout. | |
22220 | The 4-byte SDmode can be moved. TDmode values are disallowed since they | |
22221 | need special direct move handling, which we do not support yet. */ | |
22222 | if (TARGET_DIRECT_MOVE | |
22223 | && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) | |
22224 | || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE))) | |
22225 | { | |
22226 | if (TARGET_POWERPC64) | |
22227 | { | |
22228 | /* ISA 2.07: MTVSRD or MVFVSRD. */ | |
22229 | if (size == 8) | |
22230 | return true; | |
22231 | ||
22232 | /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */ | |
22233 | if (size == 16 && TARGET_P9_VECTOR && mode != TDmode) | |
22234 | return true; | |
22235 | } | |
22236 | ||
22237 | /* ISA 2.07: MTVSRWZ or MFVSRWZ. */ | |
22238 | if (TARGET_VSX_SMALL_INTEGER) | |
22239 | { | |
22240 | if (mode == SImode) | |
22241 | return true; | |
22242 | ||
22243 | if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode)) | |
22244 | return true; | |
22245 | } | |
22246 | ||
22247 | /* ISA 2.07: MTVSRWZ or MFVSRWZ. */ | |
22248 | if (mode == SDmode) | |
22249 | return true; | |
22250 | } | |
22251 | ||
22252 | /* Power6+: MFTGPR or MFFGPR. */ | |
22253 | else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8 | |
22254 | && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE) | |
22255 | || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE))) | |
22256 | return true; | |
22257 | ||
22258 | /* Move to/from SPR. */ | |
22259 | else if ((size == 4 || (TARGET_POWERPC64 && size == 8)) | |
22260 | && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE) | |
22261 | || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE))) | |
22262 | return true; | |
22263 | ||
22264 | return false; | |
22265 | } | |
22266 | ||
22267 | /* Direct move helper function for rs6000_secondary_reload, handle all of the | |
22268 | special direct moves that involve allocating an extra register, return the | |
22269 | insn code of the helper function if there is such a function or | |
22270 | CODE_FOR_nothing if not. */ | |
22271 | ||
22272 | static bool | |
22273 | rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type, | |
22274 | enum rs6000_reg_type from_type, | |
22275 | machine_mode mode, | |
22276 | secondary_reload_info *sri, | |
22277 | bool altivec_p) | |
22278 | { | |
22279 | bool ret = false; | |
22280 | enum insn_code icode = CODE_FOR_nothing; | |
22281 | int cost = 0; | |
22282 | int size = GET_MODE_SIZE (mode); | |
22283 | ||
22284 | if (TARGET_POWERPC64 && size == 16) | |
22285 | { | |
22286 | /* Handle moving 128-bit values from GPRs to VSX point registers on | |
22287 | ISA 2.07 (power8, power9) when running in 64-bit mode using | |
22288 | XXPERMDI to glue the two 64-bit values back together. */ | |
22289 | if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE) | |
22290 | { | |
22291 | cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */ | |
22292 | icode = reg_addr[mode].reload_vsx_gpr; | |
22293 | } | |
22294 | ||
22295 | /* Handle moving 128-bit values from VSX point registers to GPRs on | |
22296 | ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the | |
22297 | bottom 64-bit value. */ | |
22298 | else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) | |
22299 | { | |
22300 | cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */ | |
22301 | icode = reg_addr[mode].reload_gpr_vsx; | |
22302 | } | |
22303 | } | |
22304 | ||
22305 | else if (TARGET_POWERPC64 && mode == SFmode) | |
22306 | { | |
22307 | if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) | |
22308 | { | |
22309 | cost = 3; /* xscvdpspn, mfvsrd, and. */ | |
22310 | icode = reg_addr[mode].reload_gpr_vsx; | |
22311 | } | |
22312 | ||
22313 | else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE) | |
22314 | { | |
22315 | cost = 2; /* mtvsrz, xscvspdpn. */ | |
22316 | icode = reg_addr[mode].reload_vsx_gpr; | |
22317 | } | |
22318 | } | |
22319 | ||
22320 | else if (!TARGET_POWERPC64 && size == 8) | |
22321 | { | |
22322 | /* Handle moving 64-bit values from GPRs to floating point registers on | |
22323 | ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two | |
22324 | 32-bit values back together. Altivec register classes must be handled | |
22325 | specially since a different instruction is used, and the secondary | |
22326 | reload support requires a single instruction class in the scratch | |
22327 | register constraint. However, right now TFmode is not allowed in | |
22328 | Altivec registers, so the pattern will never match. */ | |
22329 | if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p) | |
22330 | { | |
22331 | cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */ | |
22332 | icode = reg_addr[mode].reload_fpr_gpr; | |
22333 | } | |
22334 | } | |
22335 | ||
22336 | if (icode != CODE_FOR_nothing) | |
22337 | { | |
22338 | ret = true; | |
22339 | if (sri) | |
22340 | { | |
22341 | sri->icode = icode; | |
22342 | sri->extra_cost = cost; | |
22343 | } | |
22344 | } | |
22345 | ||
22346 | return ret; | |
22347 | } | |
22348 | ||
22349 | /* Return whether a move between two register classes can be done either | |
22350 | directly (simple move) or via a pattern that uses a single extra temporary | |
22351 | (using ISA 2.07's direct move in this case. */ | |
22352 | ||
22353 | static bool | |
22354 | rs6000_secondary_reload_move (enum rs6000_reg_type to_type, | |
22355 | enum rs6000_reg_type from_type, | |
22356 | machine_mode mode, | |
22357 | secondary_reload_info *sri, | |
22358 | bool altivec_p) | |
22359 | { | |
22360 | /* Fall back to load/store reloads if either type is not a register. */ | |
22361 | if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE) | |
22362 | return false; | |
22363 | ||
22364 | /* If we haven't allocated registers yet, assume the move can be done for the | |
22365 | standard register types. */ | |
22366 | if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE) | |
22367 | || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type)) | |
22368 | || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type))) | |
22369 | return true; | |
22370 | ||
22371 | /* Moves to the same set of registers is a simple move for non-specialized | |
22372 | registers. */ | |
22373 | if (to_type == from_type && IS_STD_REG_TYPE (to_type)) | |
22374 | return true; | |
22375 | ||
22376 | /* Check whether a simple move can be done directly. */ | |
22377 | if (rs6000_secondary_reload_simple_move (to_type, from_type, mode)) | |
22378 | { | |
22379 | if (sri) | |
22380 | { | |
22381 | sri->icode = CODE_FOR_nothing; | |
22382 | sri->extra_cost = 0; | |
22383 | } | |
22384 | return true; | |
22385 | } | |
22386 | ||
22387 | /* Now check if we can do it in a few steps. */ | |
22388 | return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri, | |
22389 | altivec_p); | |
22390 | } | |
22391 | ||
22392 | /* Inform reload about cases where moving X with a mode MODE to a register in | |
22393 | RCLASS requires an extra scratch or immediate register. Return the class | |
22394 | needed for the immediate register. | |
22395 | ||
22396 | For VSX and Altivec, we may need a register to convert sp+offset into | |
22397 | reg+sp. | |
22398 | ||
22399 | For misaligned 64-bit gpr loads and stores we need a register to | |
22400 | convert an offset address to indirect. */ | |
22401 | ||
22402 | static reg_class_t | |
22403 | rs6000_secondary_reload (bool in_p, | |
22404 | rtx x, | |
22405 | reg_class_t rclass_i, | |
22406 | machine_mode mode, | |
22407 | secondary_reload_info *sri) | |
22408 | { | |
22409 | enum reg_class rclass = (enum reg_class) rclass_i; | |
22410 | reg_class_t ret = ALL_REGS; | |
22411 | enum insn_code icode; | |
22412 | bool default_p = false; | |
22413 | bool done_p = false; | |
22414 | ||
22415 | /* Allow subreg of memory before/during reload. */ | |
22416 | bool memory_p = (MEM_P (x) | |
22417 | || (!reload_completed && GET_CODE (x) == SUBREG | |
22418 | && MEM_P (SUBREG_REG (x)))); | |
22419 | ||
22420 | sri->icode = CODE_FOR_nothing; | |
22421 | sri->t_icode = CODE_FOR_nothing; | |
22422 | sri->extra_cost = 0; | |
22423 | icode = ((in_p) | |
22424 | ? reg_addr[mode].reload_load | |
22425 | : reg_addr[mode].reload_store); | |
22426 | ||
22427 | if (REG_P (x) || register_operand (x, mode)) | |
22428 | { | |
22429 | enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass]; | |
22430 | bool altivec_p = (rclass == ALTIVEC_REGS); | |
22431 | enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p); | |
22432 | ||
22433 | if (!in_p) | |
22434 | std::swap (to_type, from_type); | |
22435 | ||
22436 | /* Can we do a direct move of some sort? */ | |
22437 | if (rs6000_secondary_reload_move (to_type, from_type, mode, sri, | |
22438 | altivec_p)) | |
22439 | { | |
22440 | icode = (enum insn_code)sri->icode; | |
22441 | default_p = false; | |
22442 | done_p = true; | |
22443 | ret = NO_REGS; | |
22444 | } | |
22445 | } | |
22446 | ||
22447 | /* Make sure 0.0 is not reloaded or forced into memory. */ | |
22448 | if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass)) | |
22449 | { | |
22450 | ret = NO_REGS; | |
22451 | default_p = false; | |
22452 | done_p = true; | |
22453 | } | |
22454 | ||
22455 | /* If this is a scalar floating point value and we want to load it into the | |
22456 | traditional Altivec registers, do it via a move via a traditional floating | |
22457 | point register, unless we have D-form addressing. Also make sure that | |
22458 | non-zero constants use a FPR. */ | |
22459 | if (!done_p && reg_addr[mode].scalar_in_vmx_p | |
22460 | && !mode_supports_vmx_dform (mode) | |
22461 | && (rclass == VSX_REGS || rclass == ALTIVEC_REGS) | |
22462 | && (memory_p || (GET_CODE (x) == CONST_DOUBLE))) | |
22463 | { | |
22464 | ret = FLOAT_REGS; | |
22465 | default_p = false; | |
22466 | done_p = true; | |
22467 | } | |
22468 | ||
22469 | /* Handle reload of load/stores if we have reload helper functions. */ | |
22470 | if (!done_p && icode != CODE_FOR_nothing && memory_p) | |
22471 | { | |
22472 | int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass, | |
22473 | mode); | |
22474 | ||
22475 | if (extra_cost >= 0) | |
22476 | { | |
22477 | done_p = true; | |
22478 | ret = NO_REGS; | |
22479 | if (extra_cost > 0) | |
22480 | { | |
22481 | sri->extra_cost = extra_cost; | |
22482 | sri->icode = icode; | |
22483 | } | |
22484 | } | |
22485 | } | |
22486 | ||
22487 | /* Handle unaligned loads and stores of integer registers. */ | |
22488 | if (!done_p && TARGET_POWERPC64 | |
22489 | && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE | |
22490 | && memory_p | |
22491 | && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD) | |
22492 | { | |
22493 | rtx addr = XEXP (x, 0); | |
22494 | rtx off = address_offset (addr); | |
22495 | ||
22496 | if (off != NULL_RTX) | |
22497 | { | |
22498 | unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD; | |
22499 | unsigned HOST_WIDE_INT offset = INTVAL (off); | |
22500 | ||
22501 | /* We need a secondary reload when our legitimate_address_p | |
22502 | says the address is good (as otherwise the entire address | |
22503 | will be reloaded), and the offset is not a multiple of | |
22504 | four or we have an address wrap. Address wrap will only | |
22505 | occur for LO_SUMs since legitimate_offset_address_p | |
22506 | rejects addresses for 16-byte mems that will wrap. */ | |
22507 | if (GET_CODE (addr) == LO_SUM | |
22508 | ? (1 /* legitimate_address_p allows any offset for lo_sum */ | |
22509 | && ((offset & 3) != 0 | |
22510 | || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra)) | |
22511 | : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */ | |
22512 | && (offset & 3) != 0)) | |
22513 | { | |
22514 | /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */ | |
22515 | if (in_p) | |
22516 | sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load | |
22517 | : CODE_FOR_reload_di_load); | |
22518 | else | |
22519 | sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store | |
22520 | : CODE_FOR_reload_di_store); | |
22521 | sri->extra_cost = 2; | |
22522 | ret = NO_REGS; | |
22523 | done_p = true; | |
22524 | } | |
22525 | else | |
22526 | default_p = true; | |
22527 | } | |
22528 | else | |
22529 | default_p = true; | |
22530 | } | |
22531 | ||
22532 | if (!done_p && !TARGET_POWERPC64 | |
22533 | && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE | |
22534 | && memory_p | |
22535 | && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD) | |
22536 | { | |
22537 | rtx addr = XEXP (x, 0); | |
22538 | rtx off = address_offset (addr); | |
22539 | ||
22540 | if (off != NULL_RTX) | |
22541 | { | |
22542 | unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD; | |
22543 | unsigned HOST_WIDE_INT offset = INTVAL (off); | |
22544 | ||
22545 | /* We need a secondary reload when our legitimate_address_p | |
22546 | says the address is good (as otherwise the entire address | |
22547 | will be reloaded), and we have a wrap. | |
22548 | ||
22549 | legitimate_lo_sum_address_p allows LO_SUM addresses to | |
22550 | have any offset so test for wrap in the low 16 bits. | |
22551 | ||
22552 | legitimate_offset_address_p checks for the range | |
22553 | [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7] | |
22554 | for mode size of 16. We wrap at [0x7ffc,0x7fff] and | |
22555 | [0x7ff4,0x7fff] respectively, so test for the | |
22556 | intersection of these ranges, [0x7ffc,0x7fff] and | |
22557 | [0x7ff4,0x7ff7] respectively. | |
22558 | ||
22559 | Note that the address we see here may have been | |
22560 | manipulated by legitimize_reload_address. */ | |
22561 | if (GET_CODE (addr) == LO_SUM | |
22562 | ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra | |
22563 | : offset - (0x8000 - extra) < UNITS_PER_WORD) | |
22564 | { | |
22565 | if (in_p) | |
22566 | sri->icode = CODE_FOR_reload_si_load; | |
22567 | else | |
22568 | sri->icode = CODE_FOR_reload_si_store; | |
22569 | sri->extra_cost = 2; | |
22570 | ret = NO_REGS; | |
22571 | done_p = true; | |
22572 | } | |
22573 | else | |
22574 | default_p = true; | |
22575 | } | |
22576 | else | |
22577 | default_p = true; | |
22578 | } | |
22579 | ||
22580 | if (!done_p) | |
22581 | default_p = true; | |
22582 | ||
22583 | if (default_p) | |
22584 | ret = default_secondary_reload (in_p, x, rclass, mode, sri); | |
22585 | ||
22586 | gcc_assert (ret != ALL_REGS); | |
22587 | ||
22588 | if (TARGET_DEBUG_ADDR) | |
22589 | { | |
22590 | fprintf (stderr, | |
22591 | "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, " | |
22592 | "mode = %s", | |
22593 | reg_class_names[ret], | |
22594 | in_p ? "true" : "false", | |
22595 | reg_class_names[rclass], | |
22596 | GET_MODE_NAME (mode)); | |
22597 | ||
22598 | if (reload_completed) | |
22599 | fputs (", after reload", stderr); | |
22600 | ||
22601 | if (!done_p) | |
22602 | fputs (", done_p not set", stderr); | |
22603 | ||
22604 | if (default_p) | |
22605 | fputs (", default secondary reload", stderr); | |
22606 | ||
22607 | if (sri->icode != CODE_FOR_nothing) | |
22608 | fprintf (stderr, ", reload func = %s, extra cost = %d", | |
22609 | insn_data[sri->icode].name, sri->extra_cost); | |
22610 | ||
22611 | else if (sri->extra_cost > 0) | |
22612 | fprintf (stderr, ", extra cost = %d", sri->extra_cost); | |
22613 | ||
22614 | fputs ("\n", stderr); | |
22615 | debug_rtx (x); | |
22616 | } | |
22617 | ||
22618 | return ret; | |
22619 | } | |
22620 | ||
22621 | /* Better tracing for rs6000_secondary_reload_inner. */ | |
22622 | ||
22623 | static void | |
22624 | rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch, | |
22625 | bool store_p) | |
22626 | { | |
22627 | rtx set, clobber; | |
22628 | ||
22629 | gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX); | |
22630 | ||
22631 | fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line, | |
22632 | store_p ? "store" : "load"); | |
22633 | ||
22634 | if (store_p) | |
22635 | set = gen_rtx_SET (mem, reg); | |
22636 | else | |
22637 | set = gen_rtx_SET (reg, mem); | |
22638 | ||
22639 | clobber = gen_rtx_CLOBBER (VOIDmode, scratch); | |
22640 | debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber))); | |
22641 | } | |
22642 | ||
22643 | static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool) | |
22644 | ATTRIBUTE_NORETURN; | |
22645 | ||
22646 | static void | |
22647 | rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch, | |
22648 | bool store_p) | |
22649 | { | |
22650 | rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p); | |
22651 | gcc_unreachable (); | |
22652 | } | |
22653 | ||
22654 | /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have | |
22655 | reload helper functions. These were identified in | |
22656 | rs6000_secondary_reload_memory, and if reload decided to use the secondary | |
22657 | reload, it calls the insns: | |
22658 | reload_<RELOAD:mode>_<P:mptrsize>_store | |
22659 | reload_<RELOAD:mode>_<P:mptrsize>_load | |
22660 | ||
22661 | which in turn calls this function, to do whatever is necessary to create | |
22662 | valid addresses. */ | |
22663 | ||
22664 | void | |
22665 | rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p) | |
22666 | { | |
22667 | int regno = true_regnum (reg); | |
22668 | machine_mode mode = GET_MODE (reg); | |
22669 | addr_mask_type addr_mask; | |
22670 | rtx addr; | |
22671 | rtx new_addr; | |
22672 | rtx op_reg, op0, op1; | |
22673 | rtx and_op; | |
22674 | rtx cc_clobber; | |
22675 | rtvec rv; | |
22676 | ||
22677 | if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem) | |
22678 | || !base_reg_operand (scratch, GET_MODE (scratch))) | |
22679 | rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); | |
22680 | ||
22681 | if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)) | |
22682 | addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR]; | |
22683 | ||
22684 | else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO)) | |
22685 | addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR]; | |
22686 | ||
22687 | else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO)) | |
22688 | addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX]; | |
22689 | ||
22690 | else | |
22691 | rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); | |
22692 | ||
22693 | /* Make sure the mode is valid in this register class. */ | |
22694 | if ((addr_mask & RELOAD_REG_VALID) == 0) | |
22695 | rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); | |
22696 | ||
22697 | if (TARGET_DEBUG_ADDR) | |
22698 | rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p); | |
22699 | ||
22700 | new_addr = addr = XEXP (mem, 0); | |
22701 | switch (GET_CODE (addr)) | |
22702 | { | |
22703 | /* Does the register class support auto update forms for this mode? If | |
22704 | not, do the update now. We don't need a scratch register, since the | |
22705 | powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */ | |
22706 | case PRE_INC: | |
22707 | case PRE_DEC: | |
22708 | op_reg = XEXP (addr, 0); | |
22709 | if (!base_reg_operand (op_reg, Pmode)) | |
22710 | rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); | |
22711 | ||
22712 | if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0) | |
22713 | { | |
22714 | emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode)))); | |
22715 | new_addr = op_reg; | |
22716 | } | |
22717 | break; | |
22718 | ||
22719 | case PRE_MODIFY: | |
22720 | op0 = XEXP (addr, 0); | |
22721 | op1 = XEXP (addr, 1); | |
22722 | if (!base_reg_operand (op0, Pmode) | |
22723 | || GET_CODE (op1) != PLUS | |
22724 | || !rtx_equal_p (op0, XEXP (op1, 0))) | |
22725 | rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); | |
22726 | ||
22727 | if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0) | |
22728 | { | |
22729 | emit_insn (gen_rtx_SET (op0, op1)); | |
22730 | new_addr = reg; | |
22731 | } | |
22732 | break; | |
22733 | ||
22734 | /* Do we need to simulate AND -16 to clear the bottom address bits used | |
22735 | in VMX load/stores? */ | |
22736 | case AND: | |
22737 | op0 = XEXP (addr, 0); | |
22738 | op1 = XEXP (addr, 1); | |
22739 | if ((addr_mask & RELOAD_REG_AND_M16) == 0) | |
22740 | { | |
22741 | if (REG_P (op0) || GET_CODE (op0) == SUBREG) | |
22742 | op_reg = op0; | |
22743 | ||
22744 | else if (GET_CODE (op1) == PLUS) | |
22745 | { | |
22746 | emit_insn (gen_rtx_SET (scratch, op1)); | |
22747 | op_reg = scratch; | |
22748 | } | |
22749 | ||
22750 | else | |
22751 | rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); | |
22752 | ||
22753 | and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1); | |
22754 | cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode)); | |
22755 | rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber); | |
22756 | emit_insn (gen_rtx_PARALLEL (VOIDmode, rv)); | |
22757 | new_addr = scratch; | |
22758 | } | |
22759 | break; | |
22760 | ||
22761 | /* If this is an indirect address, make sure it is a base register. */ | |
22762 | case REG: | |
22763 | case SUBREG: | |
22764 | if (!base_reg_operand (addr, GET_MODE (addr))) | |
22765 | { | |
22766 | emit_insn (gen_rtx_SET (scratch, addr)); | |
22767 | new_addr = scratch; | |
22768 | } | |
22769 | break; | |
22770 | ||
22771 | /* If this is an indexed address, make sure the register class can handle | |
22772 | indexed addresses for this mode. */ | |
22773 | case PLUS: | |
22774 | op0 = XEXP (addr, 0); | |
22775 | op1 = XEXP (addr, 1); | |
22776 | if (!base_reg_operand (op0, Pmode)) | |
22777 | rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); | |
22778 | ||
22779 | else if (int_reg_operand (op1, Pmode)) | |
22780 | { | |
22781 | if ((addr_mask & RELOAD_REG_INDEXED) == 0) | |
22782 | { | |
22783 | emit_insn (gen_rtx_SET (scratch, addr)); | |
22784 | new_addr = scratch; | |
22785 | } | |
22786 | } | |
22787 | ||
22788 | else if (mode_supports_vsx_dform_quad (mode) && CONST_INT_P (op1)) | |
22789 | { | |
22790 | if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0) | |
22791 | || !quad_address_p (addr, mode, false)) | |
22792 | { | |
22793 | emit_insn (gen_rtx_SET (scratch, addr)); | |
22794 | new_addr = scratch; | |
22795 | } | |
22796 | } | |
22797 | ||
22798 | /* Make sure the register class can handle offset addresses. */ | |
22799 | else if (rs6000_legitimate_offset_address_p (mode, addr, false, true)) | |
22800 | { | |
22801 | if ((addr_mask & RELOAD_REG_OFFSET) == 0) | |
22802 | { | |
22803 | emit_insn (gen_rtx_SET (scratch, addr)); | |
22804 | new_addr = scratch; | |
22805 | } | |
22806 | } | |
22807 | ||
22808 | else | |
22809 | rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); | |
22810 | ||
22811 | break; | |
22812 | ||
22813 | case LO_SUM: | |
22814 | op0 = XEXP (addr, 0); | |
22815 | op1 = XEXP (addr, 1); | |
22816 | if (!base_reg_operand (op0, Pmode)) | |
22817 | rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); | |
22818 | ||
22819 | else if (int_reg_operand (op1, Pmode)) | |
22820 | { | |
22821 | if ((addr_mask & RELOAD_REG_INDEXED) == 0) | |
22822 | { | |
22823 | emit_insn (gen_rtx_SET (scratch, addr)); | |
22824 | new_addr = scratch; | |
22825 | } | |
22826 | } | |
22827 | ||
22828 | /* Quad offsets are restricted and can't handle normal addresses. */ | |
22829 | else if (mode_supports_vsx_dform_quad (mode)) | |
22830 | { | |
22831 | emit_insn (gen_rtx_SET (scratch, addr)); | |
22832 | new_addr = scratch; | |
22833 | } | |
22834 | ||
22835 | /* Make sure the register class can handle offset addresses. */ | |
22836 | else if (legitimate_lo_sum_address_p (mode, addr, false)) | |
22837 | { | |
22838 | if ((addr_mask & RELOAD_REG_OFFSET) == 0) | |
22839 | { | |
22840 | emit_insn (gen_rtx_SET (scratch, addr)); | |
22841 | new_addr = scratch; | |
22842 | } | |
22843 | } | |
22844 | ||
22845 | else | |
22846 | rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); | |
22847 | ||
22848 | break; | |
22849 | ||
22850 | case SYMBOL_REF: | |
22851 | case CONST: | |
22852 | case LABEL_REF: | |
22853 | rs6000_emit_move (scratch, addr, Pmode); | |
22854 | new_addr = scratch; | |
22855 | break; | |
22856 | ||
22857 | default: | |
22858 | rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); | |
22859 | } | |
22860 | ||
22861 | /* Adjust the address if it changed. */ | |
22862 | if (addr != new_addr) | |
22863 | { | |
22864 | mem = replace_equiv_address_nv (mem, new_addr); | |
22865 | if (TARGET_DEBUG_ADDR) | |
22866 | fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n"); | |
22867 | } | |
22868 | ||
22869 | /* Now create the move. */ | |
22870 | if (store_p) | |
22871 | emit_insn (gen_rtx_SET (mem, reg)); | |
22872 | else | |
22873 | emit_insn (gen_rtx_SET (reg, mem)); | |
22874 | ||
22875 | return; | |
22876 | } | |
22877 | ||
22878 | /* Convert reloads involving 64-bit gprs and misaligned offset | |
22879 | addressing, or multiple 32-bit gprs and offsets that are too large, | |
22880 | to use indirect addressing. */ | |
22881 | ||
22882 | void | |
22883 | rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p) | |
22884 | { | |
22885 | int regno = true_regnum (reg); | |
22886 | enum reg_class rclass; | |
22887 | rtx addr; | |
22888 | rtx scratch_or_premodify = scratch; | |
22889 | ||
22890 | if (TARGET_DEBUG_ADDR) | |
22891 | { | |
22892 | fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n", | |
22893 | store_p ? "store" : "load"); | |
22894 | fprintf (stderr, "reg:\n"); | |
22895 | debug_rtx (reg); | |
22896 | fprintf (stderr, "mem:\n"); | |
22897 | debug_rtx (mem); | |
22898 | fprintf (stderr, "scratch:\n"); | |
22899 | debug_rtx (scratch); | |
22900 | } | |
22901 | ||
22902 | gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER); | |
22903 | gcc_assert (GET_CODE (mem) == MEM); | |
22904 | rclass = REGNO_REG_CLASS (regno); | |
22905 | gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS); | |
22906 | addr = XEXP (mem, 0); | |
22907 | ||
22908 | if (GET_CODE (addr) == PRE_MODIFY) | |
22909 | { | |
22910 | gcc_assert (REG_P (XEXP (addr, 0)) | |
22911 | && GET_CODE (XEXP (addr, 1)) == PLUS | |
22912 | && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0)); | |
22913 | scratch_or_premodify = XEXP (addr, 0); | |
22914 | if (!HARD_REGISTER_P (scratch_or_premodify)) | |
22915 | /* If we have a pseudo here then reload will have arranged | |
22916 | to have it replaced, but only in the original insn. | |
22917 | Use the replacement here too. */ | |
22918 | scratch_or_premodify = find_replacement (&XEXP (addr, 0)); | |
22919 | ||
22920 | /* RTL emitted by rs6000_secondary_reload_gpr uses RTL | |
22921 | expressions from the original insn, without unsharing them. | |
22922 | Any RTL that points into the original insn will of course | |
22923 | have register replacements applied. That is why we don't | |
22924 | need to look for replacements under the PLUS. */ | |
22925 | addr = XEXP (addr, 1); | |
22926 | } | |
22927 | gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM); | |
22928 | ||
22929 | rs6000_emit_move (scratch_or_premodify, addr, Pmode); | |
22930 | ||
22931 | mem = replace_equiv_address_nv (mem, scratch_or_premodify); | |
22932 | ||
22933 | /* Now create the move. */ | |
22934 | if (store_p) | |
22935 | emit_insn (gen_rtx_SET (mem, reg)); | |
22936 | else | |
22937 | emit_insn (gen_rtx_SET (reg, mem)); | |
22938 | ||
22939 | return; | |
22940 | } | |
22941 | ||
22942 | /* Allocate a 64-bit stack slot to be used for copying SDmode values through if | |
22943 | this function has any SDmode references. If we are on a power7 or later, we | |
22944 | don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions | |
22945 | can load/store the value. */ | |
22946 | ||
22947 | static void | |
22948 | rs6000_alloc_sdmode_stack_slot (void) | |
22949 | { | |
22950 | tree t; | |
22951 | basic_block bb; | |
22952 | gimple_stmt_iterator gsi; | |
22953 | ||
22954 | gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX); | |
22955 | /* We use a different approach for dealing with the secondary | |
22956 | memory in LRA. */ | |
22957 | if (ira_use_lra_p) | |
22958 | return; | |
22959 | ||
22960 | if (TARGET_NO_SDMODE_STACK) | |
22961 | return; | |
22962 | ||
22963 | FOR_EACH_BB_FN (bb, cfun) | |
22964 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
22965 | { | |
22966 | tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL); | |
22967 | if (ret) | |
22968 | { | |
22969 | rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0); | |
22970 | cfun->machine->sdmode_stack_slot = adjust_address_nv (stack, | |
22971 | SDmode, 0); | |
22972 | return; | |
22973 | } | |
22974 | } | |
22975 | ||
22976 | /* Check for any SDmode parameters of the function. */ | |
22977 | for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t)) | |
22978 | { | |
22979 | if (TREE_TYPE (t) == error_mark_node) | |
22980 | continue; | |
22981 | ||
22982 | if (TYPE_MODE (TREE_TYPE (t)) == SDmode | |
22983 | || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode) | |
22984 | { | |
22985 | rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0); | |
22986 | cfun->machine->sdmode_stack_slot = adjust_address_nv (stack, | |
22987 | SDmode, 0); | |
22988 | return; | |
22989 | } | |
22990 | } | |
22991 | } | |
22992 | ||
22993 | static void | |
22994 | rs6000_instantiate_decls (void) | |
22995 | { | |
22996 | if (cfun->machine->sdmode_stack_slot != NULL_RTX) | |
22997 | instantiate_decl_rtl (cfun->machine->sdmode_stack_slot); | |
22998 | } | |
22999 | ||
23000 | /* Given an rtx X being reloaded into a reg required to be | |
23001 | in class CLASS, return the class of reg to actually use. | |
23002 | In general this is just CLASS; but on some machines | |
23003 | in some cases it is preferable to use a more restrictive class. | |
23004 | ||
23005 | On the RS/6000, we have to return NO_REGS when we want to reload a | |
23006 | floating-point CONST_DOUBLE to force it to be copied to memory. | |
23007 | ||
23008 | We also don't want to reload integer values into floating-point | |
23009 | registers if we can at all help it. In fact, this can | |
23010 | cause reload to die, if it tries to generate a reload of CTR | |
23011 | into a FP register and discovers it doesn't have the memory location | |
23012 | required. | |
23013 | ||
23014 | ??? Would it be a good idea to have reload do the converse, that is | |
23015 | try to reload floating modes into FP registers if possible? | |
23016 | */ | |
23017 | ||
23018 | static enum reg_class | |
23019 | rs6000_preferred_reload_class (rtx x, enum reg_class rclass) | |
23020 | { | |
23021 | machine_mode mode = GET_MODE (x); | |
23022 | bool is_constant = CONSTANT_P (x); | |
23023 | ||
23024 | /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred | |
23025 | reload class for it. */ | |
23026 | if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS) | |
23027 | && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0) | |
23028 | return NO_REGS; | |
23029 | ||
23030 | if ((rclass == FLOAT_REGS || rclass == VSX_REGS) | |
23031 | && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0) | |
23032 | return NO_REGS; | |
23033 | ||
23034 | /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow | |
23035 | the reloading of address expressions using PLUS into floating point | |
23036 | registers. */ | |
23037 | if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS) | |
23038 | { | |
23039 | if (is_constant) | |
23040 | { | |
23041 | /* Zero is always allowed in all VSX registers. */ | |
23042 | if (x == CONST0_RTX (mode)) | |
23043 | return rclass; | |
23044 | ||
23045 | /* If this is a vector constant that can be formed with a few Altivec | |
23046 | instructions, we want altivec registers. */ | |
23047 | if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode)) | |
23048 | return ALTIVEC_REGS; | |
23049 | ||
23050 | /* If this is an integer constant that can easily be loaded into | |
23051 | vector registers, allow it. */ | |
23052 | if (CONST_INT_P (x)) | |
23053 | { | |
23054 | HOST_WIDE_INT value = INTVAL (x); | |
23055 | ||
23056 | /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA | |
23057 | 2.06 can generate it in the Altivec registers with | |
23058 | VSPLTI<x>. */ | |
23059 | if (value == -1) | |
23060 | { | |
23061 | if (TARGET_P8_VECTOR) | |
23062 | return rclass; | |
23063 | else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS) | |
23064 | return ALTIVEC_REGS; | |
23065 | else | |
23066 | return NO_REGS; | |
23067 | } | |
23068 | ||
23069 | /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and | |
23070 | a sign extend in the Altivec registers. */ | |
23071 | if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR | |
23072 | && TARGET_VSX_SMALL_INTEGER | |
23073 | && (rclass == ALTIVEC_REGS || rclass == VSX_REGS)) | |
23074 | return ALTIVEC_REGS; | |
23075 | } | |
23076 | ||
23077 | /* Force constant to memory. */ | |
23078 | return NO_REGS; | |
23079 | } | |
23080 | ||
23081 | /* D-form addressing can easily reload the value. */ | |
23082 | if (mode_supports_vmx_dform (mode) | |
23083 | || mode_supports_vsx_dform_quad (mode)) | |
23084 | return rclass; | |
23085 | ||
23086 | /* If this is a scalar floating point value and we don't have D-form | |
23087 | addressing, prefer the traditional floating point registers so that we | |
23088 | can use D-form (register+offset) addressing. */ | |
23089 | if (rclass == VSX_REGS | |
23090 | && (mode == SFmode || GET_MODE_SIZE (mode) == 8)) | |
23091 | return FLOAT_REGS; | |
23092 | ||
23093 | /* Prefer the Altivec registers if Altivec is handling the vector | |
23094 | operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec | |
23095 | loads. */ | |
23096 | if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode) | |
23097 | || mode == V1TImode) | |
23098 | return ALTIVEC_REGS; | |
23099 | ||
23100 | return rclass; | |
23101 | } | |
23102 | ||
23103 | if (is_constant || GET_CODE (x) == PLUS) | |
23104 | { | |
23105 | if (reg_class_subset_p (GENERAL_REGS, rclass)) | |
23106 | return GENERAL_REGS; | |
23107 | if (reg_class_subset_p (BASE_REGS, rclass)) | |
23108 | return BASE_REGS; | |
23109 | return NO_REGS; | |
23110 | } | |
23111 | ||
23112 | if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS) | |
23113 | return GENERAL_REGS; | |
23114 | ||
23115 | return rclass; | |
23116 | } | |
23117 | ||
23118 | /* Debug version of rs6000_preferred_reload_class. */ | |
23119 | static enum reg_class | |
23120 | rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass) | |
23121 | { | |
23122 | enum reg_class ret = rs6000_preferred_reload_class (x, rclass); | |
23123 | ||
23124 | fprintf (stderr, | |
23125 | "\nrs6000_preferred_reload_class, return %s, rclass = %s, " | |
23126 | "mode = %s, x:\n", | |
23127 | reg_class_names[ret], reg_class_names[rclass], | |
23128 | GET_MODE_NAME (GET_MODE (x))); | |
23129 | debug_rtx (x); | |
23130 | ||
23131 | return ret; | |
23132 | } | |
23133 | ||
23134 | /* If we are copying between FP or AltiVec registers and anything else, we need | |
23135 | a memory location. The exception is when we are targeting ppc64 and the | |
23136 | move to/from fpr to gpr instructions are available. Also, under VSX, you | |
23137 | can copy vector registers from the FP register set to the Altivec register | |
23138 | set and vice versa. */ | |
23139 | ||
23140 | static bool | |
23141 | rs6000_secondary_memory_needed (enum reg_class from_class, | |
23142 | enum reg_class to_class, | |
23143 | machine_mode mode) | |
23144 | { | |
23145 | enum rs6000_reg_type from_type, to_type; | |
23146 | bool altivec_p = ((from_class == ALTIVEC_REGS) | |
23147 | || (to_class == ALTIVEC_REGS)); | |
23148 | ||
23149 | /* If a simple/direct move is available, we don't need secondary memory */ | |
23150 | from_type = reg_class_to_reg_type[(int)from_class]; | |
23151 | to_type = reg_class_to_reg_type[(int)to_class]; | |
23152 | ||
23153 | if (rs6000_secondary_reload_move (to_type, from_type, mode, | |
23154 | (secondary_reload_info *)0, altivec_p)) | |
23155 | return false; | |
23156 | ||
23157 | /* If we have a floating point or vector register class, we need to use | |
23158 | memory to transfer the data. */ | |
23159 | if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type)) | |
23160 | return true; | |
23161 | ||
23162 | return false; | |
23163 | } | |
23164 | ||
23165 | /* Debug version of rs6000_secondary_memory_needed. */ | |
23166 | static bool | |
23167 | rs6000_debug_secondary_memory_needed (enum reg_class from_class, | |
23168 | enum reg_class to_class, | |
23169 | machine_mode mode) | |
23170 | { | |
23171 | bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode); | |
23172 | ||
23173 | fprintf (stderr, | |
23174 | "rs6000_secondary_memory_needed, return: %s, from_class = %s, " | |
23175 | "to_class = %s, mode = %s\n", | |
23176 | ret ? "true" : "false", | |
23177 | reg_class_names[from_class], | |
23178 | reg_class_names[to_class], | |
23179 | GET_MODE_NAME (mode)); | |
23180 | ||
23181 | return ret; | |
23182 | } | |
23183 | ||
23184 | /* Return the register class of a scratch register needed to copy IN into | |
23185 | or out of a register in RCLASS in MODE. If it can be done directly, | |
23186 | NO_REGS is returned. */ | |
23187 | ||
23188 | static enum reg_class | |
23189 | rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode, | |
23190 | rtx in) | |
23191 | { | |
23192 | int regno; | |
23193 | ||
23194 | if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN | |
23195 | #if TARGET_MACHO | |
23196 | && MACHOPIC_INDIRECT | |
23197 | #endif | |
23198 | )) | |
23199 | { | |
23200 | /* We cannot copy a symbolic operand directly into anything | |
23201 | other than BASE_REGS for TARGET_ELF. So indicate that a | |
23202 | register from BASE_REGS is needed as an intermediate | |
23203 | register. | |
23204 | ||
23205 | On Darwin, pic addresses require a load from memory, which | |
23206 | needs a base register. */ | |
23207 | if (rclass != BASE_REGS | |
23208 | && (GET_CODE (in) == SYMBOL_REF | |
23209 | || GET_CODE (in) == HIGH | |
23210 | || GET_CODE (in) == LABEL_REF | |
23211 | || GET_CODE (in) == CONST)) | |
23212 | return BASE_REGS; | |
23213 | } | |
23214 | ||
23215 | if (GET_CODE (in) == REG) | |
23216 | { | |
23217 | regno = REGNO (in); | |
23218 | if (regno >= FIRST_PSEUDO_REGISTER) | |
23219 | { | |
23220 | regno = true_regnum (in); | |
23221 | if (regno >= FIRST_PSEUDO_REGISTER) | |
23222 | regno = -1; | |
23223 | } | |
23224 | } | |
23225 | else if (GET_CODE (in) == SUBREG) | |
23226 | { | |
23227 | regno = true_regnum (in); | |
23228 | if (regno >= FIRST_PSEUDO_REGISTER) | |
23229 | regno = -1; | |
23230 | } | |
23231 | else | |
23232 | regno = -1; | |
23233 | ||
23234 | /* If we have VSX register moves, prefer moving scalar values between | |
23235 | Altivec registers and GPR by going via an FPR (and then via memory) | |
23236 | instead of reloading the secondary memory address for Altivec moves. */ | |
23237 | if (TARGET_VSX | |
23238 | && GET_MODE_SIZE (mode) < 16 | |
23239 | && !mode_supports_vmx_dform (mode) | |
23240 | && (((rclass == GENERAL_REGS || rclass == BASE_REGS) | |
23241 | && (regno >= 0 && ALTIVEC_REGNO_P (regno))) | |
23242 | || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS) | |
23243 | && (regno >= 0 && INT_REGNO_P (regno))))) | |
23244 | return FLOAT_REGS; | |
23245 | ||
23246 | /* We can place anything into GENERAL_REGS and can put GENERAL_REGS | |
23247 | into anything. */ | |
23248 | if (rclass == GENERAL_REGS || rclass == BASE_REGS | |
23249 | || (regno >= 0 && INT_REGNO_P (regno))) | |
23250 | return NO_REGS; | |
23251 | ||
23252 | /* Constants, memory, and VSX registers can go into VSX registers (both the | |
23253 | traditional floating point and the altivec registers). */ | |
23254 | if (rclass == VSX_REGS | |
23255 | && (regno == -1 || VSX_REGNO_P (regno))) | |
23256 | return NO_REGS; | |
23257 | ||
23258 | /* Constants, memory, and FP registers can go into FP registers. */ | |
23259 | if ((regno == -1 || FP_REGNO_P (regno)) | |
23260 | && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS)) | |
23261 | return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS; | |
23262 | ||
23263 | /* Memory, and AltiVec registers can go into AltiVec registers. */ | |
23264 | if ((regno == -1 || ALTIVEC_REGNO_P (regno)) | |
23265 | && rclass == ALTIVEC_REGS) | |
23266 | return NO_REGS; | |
23267 | ||
23268 | /* We can copy among the CR registers. */ | |
23269 | if ((rclass == CR_REGS || rclass == CR0_REGS) | |
23270 | && regno >= 0 && CR_REGNO_P (regno)) | |
23271 | return NO_REGS; | |
23272 | ||
23273 | /* Otherwise, we need GENERAL_REGS. */ | |
23274 | return GENERAL_REGS; | |
23275 | } | |
23276 | ||
23277 | /* Debug version of rs6000_secondary_reload_class. */ | |
23278 | static enum reg_class | |
23279 | rs6000_debug_secondary_reload_class (enum reg_class rclass, | |
23280 | machine_mode mode, rtx in) | |
23281 | { | |
23282 | enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in); | |
23283 | fprintf (stderr, | |
23284 | "\nrs6000_secondary_reload_class, return %s, rclass = %s, " | |
23285 | "mode = %s, input rtx:\n", | |
23286 | reg_class_names[ret], reg_class_names[rclass], | |
23287 | GET_MODE_NAME (mode)); | |
23288 | debug_rtx (in); | |
23289 | ||
23290 | return ret; | |
23291 | } | |
23292 | ||
23293 | /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */ | |
23294 | ||
23295 | static bool | |
23296 | rs6000_cannot_change_mode_class (machine_mode from, | |
23297 | machine_mode to, | |
23298 | enum reg_class rclass) | |
23299 | { | |
23300 | unsigned from_size = GET_MODE_SIZE (from); | |
23301 | unsigned to_size = GET_MODE_SIZE (to); | |
23302 | ||
23303 | if (from_size != to_size) | |
23304 | { | |
23305 | enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS; | |
23306 | ||
23307 | if (reg_classes_intersect_p (xclass, rclass)) | |
23308 | { | |
92d2aec3 | 23309 | unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to); |
23310 | unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from); | |
01e91138 | 23311 | bool to_float128_vector_p = FLOAT128_VECTOR_P (to); |
23312 | bool from_float128_vector_p = FLOAT128_VECTOR_P (from); | |
23313 | ||
23314 | /* Don't allow 64-bit types to overlap with 128-bit types that take a | |
23315 | single register under VSX because the scalar part of the register | |
23316 | is in the upper 64-bits, and not the lower 64-bits. Types like | |
23317 | TFmode/TDmode that take 2 scalar register can overlap. 128-bit | |
23318 | IEEE floating point can't overlap, and neither can small | |
23319 | values. */ | |
23320 | ||
23321 | if (to_float128_vector_p && from_float128_vector_p) | |
23322 | return false; | |
23323 | ||
23324 | else if (to_float128_vector_p || from_float128_vector_p) | |
23325 | return true; | |
23326 | ||
23327 | /* TDmode in floating-mode registers must always go into a register | |
23328 | pair with the most significant word in the even-numbered register | |
23329 | to match ISA requirements. In little-endian mode, this does not | |
23330 | match subreg numbering, so we cannot allow subregs. */ | |
23331 | if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode)) | |
23332 | return true; | |
23333 | ||
23334 | if (from_size < 8 || to_size < 8) | |
23335 | return true; | |
23336 | ||
23337 | if (from_size == 8 && (8 * to_nregs) != to_size) | |
23338 | return true; | |
23339 | ||
23340 | if (to_size == 8 && (8 * from_nregs) != from_size) | |
23341 | return true; | |
23342 | ||
23343 | return false; | |
23344 | } | |
23345 | else | |
23346 | return false; | |
23347 | } | |
23348 | ||
23349 | if (TARGET_E500_DOUBLE | |
23350 | && ((((to) == DFmode) + ((from) == DFmode)) == 1 | |
23351 | || (((to) == TFmode) + ((from) == TFmode)) == 1 | |
23352 | || (((to) == IFmode) + ((from) == IFmode)) == 1 | |
23353 | || (((to) == KFmode) + ((from) == KFmode)) == 1 | |
23354 | || (((to) == DDmode) + ((from) == DDmode)) == 1 | |
23355 | || (((to) == TDmode) + ((from) == TDmode)) == 1 | |
23356 | || (((to) == DImode) + ((from) == DImode)) == 1)) | |
23357 | return true; | |
23358 | ||
23359 | /* Since the VSX register set includes traditional floating point registers | |
23360 | and altivec registers, just check for the size being different instead of | |
23361 | trying to check whether the modes are vector modes. Otherwise it won't | |
23362 | allow say DF and DI to change classes. For types like TFmode and TDmode | |
23363 | that take 2 64-bit registers, rather than a single 128-bit register, don't | |
23364 | allow subregs of those types to other 128 bit types. */ | |
23365 | if (TARGET_VSX && VSX_REG_CLASS_P (rclass)) | |
23366 | { | |
23367 | unsigned num_regs = (from_size + 15) / 16; | |
92d2aec3 | 23368 | if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs |
23369 | || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs) | |
01e91138 | 23370 | return true; |
23371 | ||
23372 | return (from_size != 8 && from_size != 16); | |
23373 | } | |
23374 | ||
23375 | if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS | |
23376 | && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1) | |
23377 | return true; | |
23378 | ||
23379 | if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1 | |
23380 | && reg_classes_intersect_p (GENERAL_REGS, rclass)) | |
23381 | return true; | |
23382 | ||
23383 | return false; | |
23384 | } | |
23385 | ||
23386 | /* Debug version of rs6000_cannot_change_mode_class. */ | |
23387 | static bool | |
23388 | rs6000_debug_cannot_change_mode_class (machine_mode from, | |
23389 | machine_mode to, | |
23390 | enum reg_class rclass) | |
23391 | { | |
23392 | bool ret = rs6000_cannot_change_mode_class (from, to, rclass); | |
23393 | ||
23394 | fprintf (stderr, | |
23395 | "rs6000_cannot_change_mode_class, return %s, from = %s, " | |
23396 | "to = %s, rclass = %s\n", | |
23397 | ret ? "true" : "false", | |
23398 | GET_MODE_NAME (from), GET_MODE_NAME (to), | |
23399 | reg_class_names[rclass]); | |
23400 | ||
23401 | return ret; | |
23402 | } | |
23403 | \f | |
23404 | /* Return a string to do a move operation of 128 bits of data. */ | |
23405 | ||
23406 | const char * | |
23407 | rs6000_output_move_128bit (rtx operands[]) | |
23408 | { | |
23409 | rtx dest = operands[0]; | |
23410 | rtx src = operands[1]; | |
23411 | machine_mode mode = GET_MODE (dest); | |
23412 | int dest_regno; | |
23413 | int src_regno; | |
23414 | bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p; | |
23415 | bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p; | |
23416 | ||
23417 | if (REG_P (dest)) | |
23418 | { | |
23419 | dest_regno = REGNO (dest); | |
23420 | dest_gpr_p = INT_REGNO_P (dest_regno); | |
23421 | dest_fp_p = FP_REGNO_P (dest_regno); | |
23422 | dest_vmx_p = ALTIVEC_REGNO_P (dest_regno); | |
23423 | dest_vsx_p = dest_fp_p | dest_vmx_p; | |
23424 | } | |
23425 | else | |
23426 | { | |
23427 | dest_regno = -1; | |
23428 | dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false; | |
23429 | } | |
23430 | ||
23431 | if (REG_P (src)) | |
23432 | { | |
23433 | src_regno = REGNO (src); | |
23434 | src_gpr_p = INT_REGNO_P (src_regno); | |
23435 | src_fp_p = FP_REGNO_P (src_regno); | |
23436 | src_vmx_p = ALTIVEC_REGNO_P (src_regno); | |
23437 | src_vsx_p = src_fp_p | src_vmx_p; | |
23438 | } | |
23439 | else | |
23440 | { | |
23441 | src_regno = -1; | |
23442 | src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false; | |
23443 | } | |
23444 | ||
23445 | /* Register moves. */ | |
23446 | if (dest_regno >= 0 && src_regno >= 0) | |
23447 | { | |
23448 | if (dest_gpr_p) | |
23449 | { | |
23450 | if (src_gpr_p) | |
23451 | return "#"; | |
23452 | ||
23453 | if (TARGET_DIRECT_MOVE_128 && src_vsx_p) | |
23454 | return (WORDS_BIG_ENDIAN | |
23455 | ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1" | |
23456 | : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1"); | |
23457 | ||
23458 | else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p) | |
23459 | return "#"; | |
23460 | } | |
23461 | ||
23462 | else if (TARGET_VSX && dest_vsx_p) | |
23463 | { | |
23464 | if (src_vsx_p) | |
23465 | return "xxlor %x0,%x1,%x1"; | |
23466 | ||
23467 | else if (TARGET_DIRECT_MOVE_128 && src_gpr_p) | |
23468 | return (WORDS_BIG_ENDIAN | |
23469 | ? "mtvsrdd %x0,%1,%L1" | |
23470 | : "mtvsrdd %x0,%L1,%1"); | |
23471 | ||
23472 | else if (TARGET_DIRECT_MOVE && src_gpr_p) | |
23473 | return "#"; | |
23474 | } | |
23475 | ||
23476 | else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p) | |
23477 | return "vor %0,%1,%1"; | |
23478 | ||
23479 | else if (dest_fp_p && src_fp_p) | |
23480 | return "#"; | |
23481 | } | |
23482 | ||
23483 | /* Loads. */ | |
23484 | else if (dest_regno >= 0 && MEM_P (src)) | |
23485 | { | |
23486 | if (dest_gpr_p) | |
23487 | { | |
23488 | if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src)) | |
23489 | return "lq %0,%1"; | |
23490 | else | |
23491 | return "#"; | |
23492 | } | |
23493 | ||
23494 | else if (TARGET_ALTIVEC && dest_vmx_p | |
23495 | && altivec_indexed_or_indirect_operand (src, mode)) | |
23496 | return "lvx %0,%y1"; | |
23497 | ||
23498 | else if (TARGET_VSX && dest_vsx_p) | |
23499 | { | |
23500 | if (mode_supports_vsx_dform_quad (mode) | |
23501 | && quad_address_p (XEXP (src, 0), mode, true)) | |
23502 | return "lxv %x0,%1"; | |
23503 | ||
23504 | else if (TARGET_P9_VECTOR) | |
23505 | return "lxvx %x0,%y1"; | |
23506 | ||
23507 | else if (mode == V16QImode || mode == V8HImode || mode == V4SImode) | |
23508 | return "lxvw4x %x0,%y1"; | |
23509 | ||
23510 | else | |
23511 | return "lxvd2x %x0,%y1"; | |
23512 | } | |
23513 | ||
23514 | else if (TARGET_ALTIVEC && dest_vmx_p) | |
23515 | return "lvx %0,%y1"; | |
23516 | ||
23517 | else if (dest_fp_p) | |
23518 | return "#"; | |
23519 | } | |
23520 | ||
23521 | /* Stores. */ | |
23522 | else if (src_regno >= 0 && MEM_P (dest)) | |
23523 | { | |
23524 | if (src_gpr_p) | |
23525 | { | |
23526 | if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src)) | |
23527 | return "stq %1,%0"; | |
23528 | else | |
23529 | return "#"; | |
23530 | } | |
23531 | ||
23532 | else if (TARGET_ALTIVEC && src_vmx_p | |
23533 | && altivec_indexed_or_indirect_operand (src, mode)) | |
23534 | return "stvx %1,%y0"; | |
23535 | ||
23536 | else if (TARGET_VSX && src_vsx_p) | |
23537 | { | |
23538 | if (mode_supports_vsx_dform_quad (mode) | |
23539 | && quad_address_p (XEXP (dest, 0), mode, true)) | |
23540 | return "stxv %x1,%0"; | |
23541 | ||
23542 | else if (TARGET_P9_VECTOR) | |
23543 | return "stxvx %x1,%y0"; | |
23544 | ||
23545 | else if (mode == V16QImode || mode == V8HImode || mode == V4SImode) | |
23546 | return "stxvw4x %x1,%y0"; | |
23547 | ||
23548 | else | |
23549 | return "stxvd2x %x1,%y0"; | |
23550 | } | |
23551 | ||
23552 | else if (TARGET_ALTIVEC && src_vmx_p) | |
23553 | return "stvx %1,%y0"; | |
23554 | ||
23555 | else if (src_fp_p) | |
23556 | return "#"; | |
23557 | } | |
23558 | ||
23559 | /* Constants. */ | |
23560 | else if (dest_regno >= 0 | |
23561 | && (GET_CODE (src) == CONST_INT | |
23562 | || GET_CODE (src) == CONST_WIDE_INT | |
23563 | || GET_CODE (src) == CONST_DOUBLE | |
23564 | || GET_CODE (src) == CONST_VECTOR)) | |
23565 | { | |
23566 | if (dest_gpr_p) | |
23567 | return "#"; | |
23568 | ||
23569 | else if ((dest_vmx_p && TARGET_ALTIVEC) | |
23570 | || (dest_vsx_p && TARGET_VSX)) | |
23571 | return output_vec_const_move (operands); | |
23572 | } | |
23573 | ||
23574 | fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src)); | |
23575 | } | |
23576 | ||
23577 | /* Validate a 128-bit move. */ | |
23578 | bool | |
23579 | rs6000_move_128bit_ok_p (rtx operands[]) | |
23580 | { | |
23581 | machine_mode mode = GET_MODE (operands[0]); | |
23582 | return (gpc_reg_operand (operands[0], mode) | |
23583 | || gpc_reg_operand (operands[1], mode)); | |
23584 | } | |
23585 | ||
23586 | /* Return true if a 128-bit move needs to be split. */ | |
23587 | bool | |
23588 | rs6000_split_128bit_ok_p (rtx operands[]) | |
23589 | { | |
23590 | if (!reload_completed) | |
23591 | return false; | |
23592 | ||
23593 | if (!gpr_or_gpr_p (operands[0], operands[1])) | |
23594 | return false; | |
23595 | ||
23596 | if (quad_load_store_p (operands[0], operands[1])) | |
23597 | return false; | |
23598 | ||
23599 | return true; | |
23600 | } | |
23601 | ||
23602 | \f | |
23603 | /* Given a comparison operation, return the bit number in CCR to test. We | |
23604 | know this is a valid comparison. | |
23605 | ||
23606 | SCC_P is 1 if this is for an scc. That means that %D will have been | |
23607 | used instead of %C, so the bits will be in different places. | |
23608 | ||
23609 | Return -1 if OP isn't a valid comparison for some reason. */ | |
23610 | ||
23611 | int | |
23612 | ccr_bit (rtx op, int scc_p) | |
23613 | { | |
23614 | enum rtx_code code = GET_CODE (op); | |
23615 | machine_mode cc_mode; | |
23616 | int cc_regnum; | |
23617 | int base_bit; | |
23618 | rtx reg; | |
23619 | ||
23620 | if (!COMPARISON_P (op)) | |
23621 | return -1; | |
23622 | ||
23623 | reg = XEXP (op, 0); | |
23624 | ||
23625 | gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg))); | |
23626 | ||
23627 | cc_mode = GET_MODE (reg); | |
23628 | cc_regnum = REGNO (reg); | |
23629 | base_bit = 4 * (cc_regnum - CR0_REGNO); | |
23630 | ||
23631 | validate_condition_mode (code, cc_mode); | |
23632 | ||
23633 | /* When generating a sCOND operation, only positive conditions are | |
23634 | allowed. */ | |
23635 | gcc_assert (!scc_p | |
23636 | || code == EQ || code == GT || code == LT || code == UNORDERED | |
23637 | || code == GTU || code == LTU); | |
23638 | ||
23639 | switch (code) | |
23640 | { | |
23641 | case NE: | |
23642 | return scc_p ? base_bit + 3 : base_bit + 2; | |
23643 | case EQ: | |
23644 | return base_bit + 2; | |
23645 | case GT: case GTU: case UNLE: | |
23646 | return base_bit + 1; | |
23647 | case LT: case LTU: case UNGE: | |
23648 | return base_bit; | |
23649 | case ORDERED: case UNORDERED: | |
23650 | return base_bit + 3; | |
23651 | ||
23652 | case GE: case GEU: | |
23653 | /* If scc, we will have done a cror to put the bit in the | |
23654 | unordered position. So test that bit. For integer, this is ! LT | |
23655 | unless this is an scc insn. */ | |
23656 | return scc_p ? base_bit + 3 : base_bit; | |
23657 | ||
23658 | case LE: case LEU: | |
23659 | return scc_p ? base_bit + 3 : base_bit + 1; | |
23660 | ||
23661 | default: | |
23662 | gcc_unreachable (); | |
23663 | } | |
23664 | } | |
23665 | \f | |
23666 | /* Return the GOT register. */ | |
23667 | ||
23668 | rtx | |
23669 | rs6000_got_register (rtx value ATTRIBUTE_UNUSED) | |
23670 | { | |
23671 | /* The second flow pass currently (June 1999) can't update | |
23672 | regs_ever_live without disturbing other parts of the compiler, so | |
23673 | update it here to make the prolog/epilogue code happy. */ | |
23674 | if (!can_create_pseudo_p () | |
23675 | && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM)) | |
23676 | df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true); | |
23677 | ||
23678 | crtl->uses_pic_offset_table = 1; | |
23679 | ||
23680 | return pic_offset_table_rtx; | |
23681 | } | |
23682 | \f | |
23683 | static rs6000_stack_t stack_info; | |
23684 | ||
23685 | /* Function to init struct machine_function. | |
23686 | This will be called, via a pointer variable, | |
23687 | from push_function_context. */ | |
23688 | ||
23689 | static struct machine_function * | |
23690 | rs6000_init_machine_status (void) | |
23691 | { | |
23692 | stack_info.reload_completed = 0; | |
23693 | return ggc_cleared_alloc<machine_function> (); | |
23694 | } | |
23695 | \f | |
23696 | #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode) | |
23697 | ||
23698 | /* Write out a function code label. */ | |
23699 | ||
23700 | void | |
23701 | rs6000_output_function_entry (FILE *file, const char *fname) | |
23702 | { | |
23703 | if (fname[0] != '.') | |
23704 | { | |
23705 | switch (DEFAULT_ABI) | |
23706 | { | |
23707 | default: | |
23708 | gcc_unreachable (); | |
23709 | ||
23710 | case ABI_AIX: | |
23711 | if (DOT_SYMBOLS) | |
23712 | putc ('.', file); | |
23713 | else | |
23714 | ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L."); | |
23715 | break; | |
23716 | ||
23717 | case ABI_ELFv2: | |
23718 | case ABI_V4: | |
23719 | case ABI_DARWIN: | |
23720 | break; | |
23721 | } | |
23722 | } | |
23723 | ||
23724 | RS6000_OUTPUT_BASENAME (file, fname); | |
23725 | } | |
23726 | ||
23727 | /* Print an operand. Recognize special options, documented below. */ | |
23728 | ||
23729 | #if TARGET_ELF | |
23730 | #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel") | |
23731 | #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13) | |
23732 | #else | |
23733 | #define SMALL_DATA_RELOC "sda21" | |
23734 | #define SMALL_DATA_REG 0 | |
23735 | #endif | |
23736 | ||
23737 | void | |
23738 | print_operand (FILE *file, rtx x, int code) | |
23739 | { | |
23740 | int i; | |
23741 | unsigned HOST_WIDE_INT uval; | |
23742 | ||
23743 | switch (code) | |
23744 | { | |
23745 | /* %a is output_address. */ | |
23746 | ||
23747 | /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise | |
23748 | output_operand. */ | |
23749 | ||
23750 | case 'D': | |
23751 | /* Like 'J' but get to the GT bit only. */ | |
23752 | gcc_assert (REG_P (x)); | |
23753 | ||
23754 | /* Bit 1 is GT bit. */ | |
23755 | i = 4 * (REGNO (x) - CR0_REGNO) + 1; | |
23756 | ||
23757 | /* Add one for shift count in rlinm for scc. */ | |
23758 | fprintf (file, "%d", i + 1); | |
23759 | return; | |
23760 | ||
23761 | case 'e': | |
23762 | /* If the low 16 bits are 0, but some other bit is set, write 's'. */ | |
23763 | if (! INT_P (x)) | |
23764 | { | |
23765 | output_operand_lossage ("invalid %%e value"); | |
23766 | return; | |
23767 | } | |
23768 | ||
23769 | uval = INTVAL (x); | |
23770 | if ((uval & 0xffff) == 0 && uval != 0) | |
23771 | putc ('s', file); | |
23772 | return; | |
23773 | ||
23774 | case 'E': | |
23775 | /* X is a CR register. Print the number of the EQ bit of the CR */ | |
23776 | if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x))) | |
23777 | output_operand_lossage ("invalid %%E value"); | |
23778 | else | |
23779 | fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2); | |
23780 | return; | |
23781 | ||
23782 | case 'f': | |
23783 | /* X is a CR register. Print the shift count needed to move it | |
23784 | to the high-order four bits. */ | |
23785 | if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x))) | |
23786 | output_operand_lossage ("invalid %%f value"); | |
23787 | else | |
23788 | fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO)); | |
23789 | return; | |
23790 | ||
23791 | case 'F': | |
23792 | /* Similar, but print the count for the rotate in the opposite | |
23793 | direction. */ | |
23794 | if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x))) | |
23795 | output_operand_lossage ("invalid %%F value"); | |
23796 | else | |
23797 | fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO)); | |
23798 | return; | |
23799 | ||
23800 | case 'G': | |
23801 | /* X is a constant integer. If it is negative, print "m", | |
23802 | otherwise print "z". This is to make an aze or ame insn. */ | |
23803 | if (GET_CODE (x) != CONST_INT) | |
23804 | output_operand_lossage ("invalid %%G value"); | |
23805 | else if (INTVAL (x) >= 0) | |
23806 | putc ('z', file); | |
23807 | else | |
23808 | putc ('m', file); | |
23809 | return; | |
23810 | ||
23811 | case 'h': | |
23812 | /* If constant, output low-order five bits. Otherwise, write | |
23813 | normally. */ | |
23814 | if (INT_P (x)) | |
23815 | fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31); | |
23816 | else | |
23817 | print_operand (file, x, 0); | |
23818 | return; | |
23819 | ||
23820 | case 'H': | |
23821 | /* If constant, output low-order six bits. Otherwise, write | |
23822 | normally. */ | |
23823 | if (INT_P (x)) | |
23824 | fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63); | |
23825 | else | |
23826 | print_operand (file, x, 0); | |
23827 | return; | |
23828 | ||
23829 | case 'I': | |
23830 | /* Print `i' if this is a constant, else nothing. */ | |
23831 | if (INT_P (x)) | |
23832 | putc ('i', file); | |
23833 | return; | |
23834 | ||
23835 | case 'j': | |
23836 | /* Write the bit number in CCR for jump. */ | |
23837 | i = ccr_bit (x, 0); | |
23838 | if (i == -1) | |
23839 | output_operand_lossage ("invalid %%j code"); | |
23840 | else | |
23841 | fprintf (file, "%d", i); | |
23842 | return; | |
23843 | ||
23844 | case 'J': | |
23845 | /* Similar, but add one for shift count in rlinm for scc and pass | |
23846 | scc flag to `ccr_bit'. */ | |
23847 | i = ccr_bit (x, 1); | |
23848 | if (i == -1) | |
23849 | output_operand_lossage ("invalid %%J code"); | |
23850 | else | |
23851 | /* If we want bit 31, write a shift count of zero, not 32. */ | |
23852 | fprintf (file, "%d", i == 31 ? 0 : i + 1); | |
23853 | return; | |
23854 | ||
23855 | case 'k': | |
23856 | /* X must be a constant. Write the 1's complement of the | |
23857 | constant. */ | |
23858 | if (! INT_P (x)) | |
23859 | output_operand_lossage ("invalid %%k value"); | |
23860 | else | |
23861 | fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x)); | |
23862 | return; | |
23863 | ||
23864 | case 'K': | |
23865 | /* X must be a symbolic constant on ELF. Write an | |
23866 | expression suitable for an 'addi' that adds in the low 16 | |
23867 | bits of the MEM. */ | |
23868 | if (GET_CODE (x) == CONST) | |
23869 | { | |
23870 | if (GET_CODE (XEXP (x, 0)) != PLUS | |
23871 | || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF | |
23872 | && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF) | |
23873 | || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT) | |
23874 | output_operand_lossage ("invalid %%K value"); | |
23875 | } | |
23876 | print_operand_address (file, x); | |
23877 | fputs ("@l", file); | |
23878 | return; | |
23879 | ||
23880 | /* %l is output_asm_label. */ | |
23881 | ||
23882 | case 'L': | |
23883 | /* Write second word of DImode or DFmode reference. Works on register | |
23884 | or non-indexed memory only. */ | |
23885 | if (REG_P (x)) | |
23886 | fputs (reg_names[REGNO (x) + 1], file); | |
23887 | else if (MEM_P (x)) | |
23888 | { | |
23889 | machine_mode mode = GET_MODE (x); | |
23890 | /* Handle possible auto-increment. Since it is pre-increment and | |
23891 | we have already done it, we can just use an offset of word. */ | |
23892 | if (GET_CODE (XEXP (x, 0)) == PRE_INC | |
23893 | || GET_CODE (XEXP (x, 0)) == PRE_DEC) | |
23894 | output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0), | |
23895 | UNITS_PER_WORD)); | |
23896 | else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY) | |
23897 | output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0), | |
23898 | UNITS_PER_WORD)); | |
23899 | else | |
23900 | output_address (mode, XEXP (adjust_address_nv (x, SImode, | |
23901 | UNITS_PER_WORD), | |
23902 | 0)); | |
23903 | ||
23904 | if (small_data_operand (x, GET_MODE (x))) | |
23905 | fprintf (file, "@%s(%s)", SMALL_DATA_RELOC, | |
23906 | reg_names[SMALL_DATA_REG]); | |
23907 | } | |
23908 | return; | |
23909 | ||
23910 | case 'N': | |
23911 | /* Write the number of elements in the vector times 4. */ | |
23912 | if (GET_CODE (x) != PARALLEL) | |
23913 | output_operand_lossage ("invalid %%N value"); | |
23914 | else | |
23915 | fprintf (file, "%d", XVECLEN (x, 0) * 4); | |
23916 | return; | |
23917 | ||
23918 | case 'O': | |
23919 | /* Similar, but subtract 1 first. */ | |
23920 | if (GET_CODE (x) != PARALLEL) | |
23921 | output_operand_lossage ("invalid %%O value"); | |
23922 | else | |
23923 | fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4); | |
23924 | return; | |
23925 | ||
23926 | case 'p': | |
23927 | /* X is a CONST_INT that is a power of two. Output the logarithm. */ | |
23928 | if (! INT_P (x) | |
23929 | || INTVAL (x) < 0 | |
23930 | || (i = exact_log2 (INTVAL (x))) < 0) | |
23931 | output_operand_lossage ("invalid %%p value"); | |
23932 | else | |
23933 | fprintf (file, "%d", i); | |
23934 | return; | |
23935 | ||
23936 | case 'P': | |
23937 | /* The operand must be an indirect memory reference. The result | |
23938 | is the register name. */ | |
23939 | if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG | |
23940 | || REGNO (XEXP (x, 0)) >= 32) | |
23941 | output_operand_lossage ("invalid %%P value"); | |
23942 | else | |
23943 | fputs (reg_names[REGNO (XEXP (x, 0))], file); | |
23944 | return; | |
23945 | ||
23946 | case 'q': | |
23947 | /* This outputs the logical code corresponding to a boolean | |
23948 | expression. The expression may have one or both operands | |
23949 | negated (if one, only the first one). For condition register | |
23950 | logical operations, it will also treat the negated | |
23951 | CR codes as NOTs, but not handle NOTs of them. */ | |
23952 | { | |
23953 | const char *const *t = 0; | |
23954 | const char *s; | |
23955 | enum rtx_code code = GET_CODE (x); | |
23956 | static const char * const tbl[3][3] = { | |
23957 | { "and", "andc", "nor" }, | |
23958 | { "or", "orc", "nand" }, | |
23959 | { "xor", "eqv", "xor" } }; | |
23960 | ||
23961 | if (code == AND) | |
23962 | t = tbl[0]; | |
23963 | else if (code == IOR) | |
23964 | t = tbl[1]; | |
23965 | else if (code == XOR) | |
23966 | t = tbl[2]; | |
23967 | else | |
23968 | output_operand_lossage ("invalid %%q value"); | |
23969 | ||
23970 | if (GET_CODE (XEXP (x, 0)) != NOT) | |
23971 | s = t[0]; | |
23972 | else | |
23973 | { | |
23974 | if (GET_CODE (XEXP (x, 1)) == NOT) | |
23975 | s = t[2]; | |
23976 | else | |
23977 | s = t[1]; | |
23978 | } | |
23979 | ||
23980 | fputs (s, file); | |
23981 | } | |
23982 | return; | |
23983 | ||
23984 | case 'Q': | |
23985 | if (! TARGET_MFCRF) | |
23986 | return; | |
23987 | fputc (',', file); | |
23988 | /* FALLTHRU */ | |
23989 | ||
23990 | case 'R': | |
23991 | /* X is a CR register. Print the mask for `mtcrf'. */ | |
23992 | if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x))) | |
23993 | output_operand_lossage ("invalid %%R value"); | |
23994 | else | |
23995 | fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO)); | |
23996 | return; | |
23997 | ||
23998 | case 's': | |
23999 | /* Low 5 bits of 32 - value */ | |
24000 | if (! INT_P (x)) | |
24001 | output_operand_lossage ("invalid %%s value"); | |
24002 | else | |
24003 | fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31); | |
24004 | return; | |
24005 | ||
24006 | case 't': | |
24007 | /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */ | |
24008 | gcc_assert (REG_P (x) && GET_MODE (x) == CCmode); | |
24009 | ||
24010 | /* Bit 3 is OV bit. */ | |
24011 | i = 4 * (REGNO (x) - CR0_REGNO) + 3; | |
24012 | ||
24013 | /* If we want bit 31, write a shift count of zero, not 32. */ | |
24014 | fprintf (file, "%d", i == 31 ? 0 : i + 1); | |
24015 | return; | |
24016 | ||
24017 | case 'T': | |
24018 | /* Print the symbolic name of a branch target register. */ | |
24019 | if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO | |
24020 | && REGNO (x) != CTR_REGNO)) | |
24021 | output_operand_lossage ("invalid %%T value"); | |
24022 | else if (REGNO (x) == LR_REGNO) | |
24023 | fputs ("lr", file); | |
24024 | else | |
24025 | fputs ("ctr", file); | |
24026 | return; | |
24027 | ||
24028 | case 'u': | |
24029 | /* High-order or low-order 16 bits of constant, whichever is non-zero, | |
24030 | for use in unsigned operand. */ | |
24031 | if (! INT_P (x)) | |
24032 | { | |
24033 | output_operand_lossage ("invalid %%u value"); | |
24034 | return; | |
24035 | } | |
24036 | ||
24037 | uval = INTVAL (x); | |
24038 | if ((uval & 0xffff) == 0) | |
24039 | uval >>= 16; | |
24040 | ||
24041 | fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff); | |
24042 | return; | |
24043 | ||
24044 | case 'v': | |
24045 | /* High-order 16 bits of constant for use in signed operand. */ | |
24046 | if (! INT_P (x)) | |
24047 | output_operand_lossage ("invalid %%v value"); | |
24048 | else | |
24049 | fprintf (file, HOST_WIDE_INT_PRINT_HEX, | |
24050 | (INTVAL (x) >> 16) & 0xffff); | |
24051 | return; | |
24052 | ||
24053 | case 'U': | |
24054 | /* Print `u' if this has an auto-increment or auto-decrement. */ | |
24055 | if (MEM_P (x) | |
24056 | && (GET_CODE (XEXP (x, 0)) == PRE_INC | |
24057 | || GET_CODE (XEXP (x, 0)) == PRE_DEC | |
24058 | || GET_CODE (XEXP (x, 0)) == PRE_MODIFY)) | |
24059 | putc ('u', file); | |
24060 | return; | |
24061 | ||
24062 | case 'V': | |
24063 | /* Print the trap code for this operand. */ | |
24064 | switch (GET_CODE (x)) | |
24065 | { | |
24066 | case EQ: | |
24067 | fputs ("eq", file); /* 4 */ | |
24068 | break; | |
24069 | case NE: | |
24070 | fputs ("ne", file); /* 24 */ | |
24071 | break; | |
24072 | case LT: | |
24073 | fputs ("lt", file); /* 16 */ | |
24074 | break; | |
24075 | case LE: | |
24076 | fputs ("le", file); /* 20 */ | |
24077 | break; | |
24078 | case GT: | |
24079 | fputs ("gt", file); /* 8 */ | |
24080 | break; | |
24081 | case GE: | |
24082 | fputs ("ge", file); /* 12 */ | |
24083 | break; | |
24084 | case LTU: | |
24085 | fputs ("llt", file); /* 2 */ | |
24086 | break; | |
24087 | case LEU: | |
24088 | fputs ("lle", file); /* 6 */ | |
24089 | break; | |
24090 | case GTU: | |
24091 | fputs ("lgt", file); /* 1 */ | |
24092 | break; | |
24093 | case GEU: | |
24094 | fputs ("lge", file); /* 5 */ | |
24095 | break; | |
24096 | default: | |
24097 | gcc_unreachable (); | |
24098 | } | |
24099 | break; | |
24100 | ||
24101 | case 'w': | |
24102 | /* If constant, low-order 16 bits of constant, signed. Otherwise, write | |
24103 | normally. */ | |
24104 | if (INT_P (x)) | |
24105 | fprintf (file, HOST_WIDE_INT_PRINT_DEC, | |
24106 | ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000); | |
24107 | else | |
24108 | print_operand (file, x, 0); | |
24109 | return; | |
24110 | ||
24111 | case 'x': | |
24112 | /* X is a FPR or Altivec register used in a VSX context. */ | |
24113 | if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x))) | |
24114 | output_operand_lossage ("invalid %%x value"); | |
24115 | else | |
24116 | { | |
24117 | int reg = REGNO (x); | |
24118 | int vsx_reg = (FP_REGNO_P (reg) | |
24119 | ? reg - 32 | |
24120 | : reg - FIRST_ALTIVEC_REGNO + 32); | |
24121 | ||
24122 | #ifdef TARGET_REGNAMES | |
24123 | if (TARGET_REGNAMES) | |
24124 | fprintf (file, "%%vs%d", vsx_reg); | |
24125 | else | |
24126 | #endif | |
24127 | fprintf (file, "%d", vsx_reg); | |
24128 | } | |
24129 | return; | |
24130 | ||
24131 | case 'X': | |
24132 | if (MEM_P (x) | |
24133 | && (legitimate_indexed_address_p (XEXP (x, 0), 0) | |
24134 | || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY | |
24135 | && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0)))) | |
24136 | putc ('x', file); | |
24137 | return; | |
24138 | ||
24139 | case 'Y': | |
24140 | /* Like 'L', for third word of TImode/PTImode */ | |
24141 | if (REG_P (x)) | |
24142 | fputs (reg_names[REGNO (x) + 2], file); | |
24143 | else if (MEM_P (x)) | |
24144 | { | |
24145 | machine_mode mode = GET_MODE (x); | |
24146 | if (GET_CODE (XEXP (x, 0)) == PRE_INC | |
24147 | || GET_CODE (XEXP (x, 0)) == PRE_DEC) | |
24148 | output_address (mode, plus_constant (Pmode, | |
24149 | XEXP (XEXP (x, 0), 0), 8)); | |
24150 | else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY) | |
24151 | output_address (mode, plus_constant (Pmode, | |
24152 | XEXP (XEXP (x, 0), 0), 8)); | |
24153 | else | |
24154 | output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0)); | |
24155 | if (small_data_operand (x, GET_MODE (x))) | |
24156 | fprintf (file, "@%s(%s)", SMALL_DATA_RELOC, | |
24157 | reg_names[SMALL_DATA_REG]); | |
24158 | } | |
24159 | return; | |
24160 | ||
24161 | case 'z': | |
24162 | /* X is a SYMBOL_REF. Write out the name preceded by a | |
24163 | period and without any trailing data in brackets. Used for function | |
24164 | names. If we are configured for System V (or the embedded ABI) on | |
24165 | the PowerPC, do not emit the period, since those systems do not use | |
24166 | TOCs and the like. */ | |
24167 | gcc_assert (GET_CODE (x) == SYMBOL_REF); | |
24168 | ||
24169 | /* For macho, check to see if we need a stub. */ | |
24170 | if (TARGET_MACHO) | |
24171 | { | |
24172 | const char *name = XSTR (x, 0); | |
24173 | #if TARGET_MACHO | |
24174 | if (darwin_emit_branch_islands | |
24175 | && MACHOPIC_INDIRECT | |
24176 | && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION) | |
24177 | name = machopic_indirection_name (x, /*stub_p=*/true); | |
24178 | #endif | |
24179 | assemble_name (file, name); | |
24180 | } | |
24181 | else if (!DOT_SYMBOLS) | |
24182 | assemble_name (file, XSTR (x, 0)); | |
24183 | else | |
24184 | rs6000_output_function_entry (file, XSTR (x, 0)); | |
24185 | return; | |
24186 | ||
24187 | case 'Z': | |
24188 | /* Like 'L', for last word of TImode/PTImode. */ | |
24189 | if (REG_P (x)) | |
24190 | fputs (reg_names[REGNO (x) + 3], file); | |
24191 | else if (MEM_P (x)) | |
24192 | { | |
24193 | machine_mode mode = GET_MODE (x); | |
24194 | if (GET_CODE (XEXP (x, 0)) == PRE_INC | |
24195 | || GET_CODE (XEXP (x, 0)) == PRE_DEC) | |
24196 | output_address (mode, plus_constant (Pmode, | |
24197 | XEXP (XEXP (x, 0), 0), 12)); | |
24198 | else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY) | |
24199 | output_address (mode, plus_constant (Pmode, | |
24200 | XEXP (XEXP (x, 0), 0), 12)); | |
24201 | else | |
24202 | output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0)); | |
24203 | if (small_data_operand (x, GET_MODE (x))) | |
24204 | fprintf (file, "@%s(%s)", SMALL_DATA_RELOC, | |
24205 | reg_names[SMALL_DATA_REG]); | |
24206 | } | |
24207 | return; | |
24208 | ||
24209 | /* Print AltiVec or SPE memory operand. */ | |
24210 | case 'y': | |
24211 | { | |
24212 | rtx tmp; | |
24213 | ||
24214 | gcc_assert (MEM_P (x)); | |
24215 | ||
24216 | tmp = XEXP (x, 0); | |
24217 | ||
24218 | /* Ugly hack because %y is overloaded. */ | |
24219 | if ((TARGET_SPE || TARGET_E500_DOUBLE) | |
24220 | && (GET_MODE_SIZE (GET_MODE (x)) == 8 | |
24221 | || FLOAT128_2REG_P (GET_MODE (x)) | |
24222 | || GET_MODE (x) == TImode | |
24223 | || GET_MODE (x) == PTImode)) | |
24224 | { | |
24225 | /* Handle [reg]. */ | |
24226 | if (REG_P (tmp)) | |
24227 | { | |
24228 | fprintf (file, "0(%s)", reg_names[REGNO (tmp)]); | |
24229 | break; | |
24230 | } | |
24231 | /* Handle [reg+UIMM]. */ | |
24232 | else if (GET_CODE (tmp) == PLUS && | |
24233 | GET_CODE (XEXP (tmp, 1)) == CONST_INT) | |
24234 | { | |
24235 | int x; | |
24236 | ||
24237 | gcc_assert (REG_P (XEXP (tmp, 0))); | |
24238 | ||
24239 | x = INTVAL (XEXP (tmp, 1)); | |
24240 | fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]); | |
24241 | break; | |
24242 | } | |
24243 | ||
24244 | /* Fall through. Must be [reg+reg]. */ | |
24245 | } | |
24246 | if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x)) | |
24247 | && GET_CODE (tmp) == AND | |
24248 | && GET_CODE (XEXP (tmp, 1)) == CONST_INT | |
24249 | && INTVAL (XEXP (tmp, 1)) == -16) | |
24250 | tmp = XEXP (tmp, 0); | |
24251 | else if (VECTOR_MEM_VSX_P (GET_MODE (x)) | |
24252 | && GET_CODE (tmp) == PRE_MODIFY) | |
24253 | tmp = XEXP (tmp, 1); | |
24254 | if (REG_P (tmp)) | |
24255 | fprintf (file, "0,%s", reg_names[REGNO (tmp)]); | |
24256 | else | |
24257 | { | |
24258 | if (GET_CODE (tmp) != PLUS | |
24259 | || !REG_P (XEXP (tmp, 0)) | |
24260 | || !REG_P (XEXP (tmp, 1))) | |
24261 | { | |
24262 | output_operand_lossage ("invalid %%y value, try using the 'Z' constraint"); | |
24263 | break; | |
24264 | } | |
24265 | ||
24266 | if (REGNO (XEXP (tmp, 0)) == 0) | |
24267 | fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ], | |
24268 | reg_names[ REGNO (XEXP (tmp, 0)) ]); | |
24269 | else | |
24270 | fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ], | |
24271 | reg_names[ REGNO (XEXP (tmp, 1)) ]); | |
24272 | } | |
24273 | break; | |
24274 | } | |
24275 | ||
24276 | case 0: | |
24277 | if (REG_P (x)) | |
24278 | fprintf (file, "%s", reg_names[REGNO (x)]); | |
24279 | else if (MEM_P (x)) | |
24280 | { | |
24281 | /* We need to handle PRE_INC and PRE_DEC here, since we need to | |
24282 | know the width from the mode. */ | |
24283 | if (GET_CODE (XEXP (x, 0)) == PRE_INC) | |
24284 | fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)), | |
24285 | reg_names[REGNO (XEXP (XEXP (x, 0), 0))]); | |
24286 | else if (GET_CODE (XEXP (x, 0)) == PRE_DEC) | |
24287 | fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)), | |
24288 | reg_names[REGNO (XEXP (XEXP (x, 0), 0))]); | |
24289 | else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY) | |
24290 | output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1)); | |
24291 | else | |
24292 | output_address (GET_MODE (x), XEXP (x, 0)); | |
24293 | } | |
24294 | else | |
24295 | { | |
24296 | if (toc_relative_expr_p (x, false)) | |
24297 | /* This hack along with a corresponding hack in | |
24298 | rs6000_output_addr_const_extra arranges to output addends | |
24299 | where the assembler expects to find them. eg. | |
24300 | (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4) | |
24301 | without this hack would be output as "x@toc+4". We | |
24302 | want "x+4@toc". */ | |
24303 | output_addr_const (file, CONST_CAST_RTX (tocrel_base)); | |
24304 | else | |
24305 | output_addr_const (file, x); | |
24306 | } | |
24307 | return; | |
24308 | ||
24309 | case '&': | |
24310 | if (const char *name = get_some_local_dynamic_name ()) | |
24311 | assemble_name (file, name); | |
24312 | else | |
24313 | output_operand_lossage ("'%%&' used without any " | |
24314 | "local dynamic TLS references"); | |
24315 | return; | |
24316 | ||
24317 | default: | |
24318 | output_operand_lossage ("invalid %%xn code"); | |
24319 | } | |
24320 | } | |
24321 | \f | |
24322 | /* Print the address of an operand. */ | |
24323 | ||
24324 | void | |
24325 | print_operand_address (FILE *file, rtx x) | |
24326 | { | |
24327 | if (REG_P (x)) | |
24328 | fprintf (file, "0(%s)", reg_names[ REGNO (x) ]); | |
24329 | else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST | |
24330 | || GET_CODE (x) == LABEL_REF) | |
24331 | { | |
24332 | output_addr_const (file, x); | |
24333 | if (small_data_operand (x, GET_MODE (x))) | |
24334 | fprintf (file, "@%s(%s)", SMALL_DATA_RELOC, | |
24335 | reg_names[SMALL_DATA_REG]); | |
24336 | else | |
24337 | gcc_assert (!TARGET_TOC); | |
24338 | } | |
24339 | else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) | |
24340 | && REG_P (XEXP (x, 1))) | |
24341 | { | |
24342 | if (REGNO (XEXP (x, 0)) == 0) | |
24343 | fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ], | |
24344 | reg_names[ REGNO (XEXP (x, 0)) ]); | |
24345 | else | |
24346 | fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ], | |
24347 | reg_names[ REGNO (XEXP (x, 1)) ]); | |
24348 | } | |
24349 | else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) | |
24350 | && GET_CODE (XEXP (x, 1)) == CONST_INT) | |
24351 | fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)", | |
24352 | INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]); | |
24353 | #if TARGET_MACHO | |
24354 | else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0)) | |
24355 | && CONSTANT_P (XEXP (x, 1))) | |
24356 | { | |
24357 | fprintf (file, "lo16("); | |
24358 | output_addr_const (file, XEXP (x, 1)); | |
24359 | fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]); | |
24360 | } | |
24361 | #endif | |
24362 | #if TARGET_ELF | |
24363 | else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0)) | |
24364 | && CONSTANT_P (XEXP (x, 1))) | |
24365 | { | |
24366 | output_addr_const (file, XEXP (x, 1)); | |
24367 | fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]); | |
24368 | } | |
24369 | #endif | |
24370 | else if (toc_relative_expr_p (x, false)) | |
24371 | { | |
24372 | /* This hack along with a corresponding hack in | |
24373 | rs6000_output_addr_const_extra arranges to output addends | |
24374 | where the assembler expects to find them. eg. | |
24375 | (lo_sum (reg 9) | |
24376 | . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8)) | |
24377 | without this hack would be output as "x@toc+8@l(9)". We | |
24378 | want "x+8@toc@l(9)". */ | |
24379 | output_addr_const (file, CONST_CAST_RTX (tocrel_base)); | |
24380 | if (GET_CODE (x) == LO_SUM) | |
24381 | fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]); | |
24382 | else | |
24383 | fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base, 0, 1))]); | |
24384 | } | |
24385 | else | |
24386 | gcc_unreachable (); | |
24387 | } | |
24388 | \f | |
24389 | /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */ | |
24390 | ||
24391 | static bool | |
24392 | rs6000_output_addr_const_extra (FILE *file, rtx x) | |
24393 | { | |
24394 | if (GET_CODE (x) == UNSPEC) | |
24395 | switch (XINT (x, 1)) | |
24396 | { | |
24397 | case UNSPEC_TOCREL: | |
24398 | gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF | |
24399 | && REG_P (XVECEXP (x, 0, 1)) | |
24400 | && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER); | |
24401 | output_addr_const (file, XVECEXP (x, 0, 0)); | |
24402 | if (x == tocrel_base && tocrel_offset != const0_rtx) | |
24403 | { | |
24404 | if (INTVAL (tocrel_offset) >= 0) | |
24405 | fprintf (file, "+"); | |
24406 | output_addr_const (file, CONST_CAST_RTX (tocrel_offset)); | |
24407 | } | |
24408 | if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC)) | |
24409 | { | |
24410 | putc ('-', file); | |
24411 | assemble_name (file, toc_label_name); | |
24412 | need_toc_init = 1; | |
24413 | } | |
24414 | else if (TARGET_ELF) | |
24415 | fputs ("@toc", file); | |
24416 | return true; | |
24417 | ||
24418 | #if TARGET_MACHO | |
24419 | case UNSPEC_MACHOPIC_OFFSET: | |
24420 | output_addr_const (file, XVECEXP (x, 0, 0)); | |
24421 | putc ('-', file); | |
24422 | machopic_output_function_base_name (file); | |
24423 | return true; | |
24424 | #endif | |
24425 | } | |
24426 | return false; | |
24427 | } | |
24428 | \f | |
24429 | /* Target hook for assembling integer objects. The PowerPC version has | |
24430 | to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP | |
24431 | is defined. It also needs to handle DI-mode objects on 64-bit | |
24432 | targets. */ | |
24433 | ||
24434 | static bool | |
24435 | rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p) | |
24436 | { | |
24437 | #ifdef RELOCATABLE_NEEDS_FIXUP | |
24438 | /* Special handling for SI values. */ | |
24439 | if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p) | |
24440 | { | |
24441 | static int recurse = 0; | |
24442 | ||
24443 | /* For -mrelocatable, we mark all addresses that need to be fixed up in | |
24444 | the .fixup section. Since the TOC section is already relocated, we | |
24445 | don't need to mark it here. We used to skip the text section, but it | |
24446 | should never be valid for relocated addresses to be placed in the text | |
24447 | section. */ | |
24448 | if (DEFAULT_ABI == ABI_V4 | |
24449 | && (TARGET_RELOCATABLE || flag_pic > 1) | |
24450 | && in_section != toc_section | |
24451 | && !recurse | |
24452 | && !CONST_SCALAR_INT_P (x) | |
24453 | && CONSTANT_P (x)) | |
24454 | { | |
24455 | char buf[256]; | |
24456 | ||
24457 | recurse = 1; | |
24458 | ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno); | |
24459 | fixuplabelno++; | |
24460 | ASM_OUTPUT_LABEL (asm_out_file, buf); | |
24461 | fprintf (asm_out_file, "\t.long\t("); | |
24462 | output_addr_const (asm_out_file, x); | |
24463 | fprintf (asm_out_file, ")@fixup\n"); | |
24464 | fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n"); | |
24465 | ASM_OUTPUT_ALIGN (asm_out_file, 2); | |
24466 | fprintf (asm_out_file, "\t.long\t"); | |
24467 | assemble_name (asm_out_file, buf); | |
24468 | fprintf (asm_out_file, "\n\t.previous\n"); | |
24469 | recurse = 0; | |
24470 | return true; | |
24471 | } | |
24472 | /* Remove initial .'s to turn a -mcall-aixdesc function | |
24473 | address into the address of the descriptor, not the function | |
24474 | itself. */ | |
24475 | else if (GET_CODE (x) == SYMBOL_REF | |
24476 | && XSTR (x, 0)[0] == '.' | |
24477 | && DEFAULT_ABI == ABI_AIX) | |
24478 | { | |
24479 | const char *name = XSTR (x, 0); | |
24480 | while (*name == '.') | |
24481 | name++; | |
24482 | ||
24483 | fprintf (asm_out_file, "\t.long\t%s\n", name); | |
24484 | return true; | |
24485 | } | |
24486 | } | |
24487 | #endif /* RELOCATABLE_NEEDS_FIXUP */ | |
24488 | return default_assemble_integer (x, size, aligned_p); | |
24489 | } | |
24490 | ||
24491 | #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO | |
24492 | /* Emit an assembler directive to set symbol visibility for DECL to | |
24493 | VISIBILITY_TYPE. */ | |
24494 | ||
24495 | static void | |
24496 | rs6000_assemble_visibility (tree decl, int vis) | |
24497 | { | |
24498 | if (TARGET_XCOFF) | |
24499 | return; | |
24500 | ||
24501 | /* Functions need to have their entry point symbol visibility set as | |
24502 | well as their descriptor symbol visibility. */ | |
24503 | if (DEFAULT_ABI == ABI_AIX | |
24504 | && DOT_SYMBOLS | |
24505 | && TREE_CODE (decl) == FUNCTION_DECL) | |
24506 | { | |
24507 | static const char * const visibility_types[] = { | |
24508 | NULL, "protected", "hidden", "internal" | |
24509 | }; | |
24510 | ||
24511 | const char *name, *type; | |
24512 | ||
24513 | name = ((* targetm.strip_name_encoding) | |
24514 | (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)))); | |
24515 | type = visibility_types[vis]; | |
24516 | ||
24517 | fprintf (asm_out_file, "\t.%s\t%s\n", type, name); | |
24518 | fprintf (asm_out_file, "\t.%s\t.%s\n", type, name); | |
24519 | } | |
24520 | else | |
24521 | default_assemble_visibility (decl, vis); | |
24522 | } | |
24523 | #endif | |
24524 | \f | |
24525 | enum rtx_code | |
24526 | rs6000_reverse_condition (machine_mode mode, enum rtx_code code) | |
24527 | { | |
24528 | /* Reversal of FP compares takes care -- an ordered compare | |
24529 | becomes an unordered compare and vice versa. */ | |
24530 | if (mode == CCFPmode | |
24531 | && (!flag_finite_math_only | |
24532 | || code == UNLT || code == UNLE || code == UNGT || code == UNGE | |
24533 | || code == UNEQ || code == LTGT)) | |
24534 | return reverse_condition_maybe_unordered (code); | |
24535 | else | |
24536 | return reverse_condition (code); | |
24537 | } | |
24538 | ||
24539 | /* Generate a compare for CODE. Return a brand-new rtx that | |
24540 | represents the result of the compare. */ | |
24541 | ||
24542 | static rtx | |
24543 | rs6000_generate_compare (rtx cmp, machine_mode mode) | |
24544 | { | |
24545 | machine_mode comp_mode; | |
24546 | rtx compare_result; | |
24547 | enum rtx_code code = GET_CODE (cmp); | |
24548 | rtx op0 = XEXP (cmp, 0); | |
24549 | rtx op1 = XEXP (cmp, 1); | |
24550 | ||
24551 | if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode)) | |
24552 | comp_mode = CCmode; | |
24553 | else if (FLOAT_MODE_P (mode)) | |
24554 | comp_mode = CCFPmode; | |
24555 | else if (code == GTU || code == LTU | |
24556 | || code == GEU || code == LEU) | |
24557 | comp_mode = CCUNSmode; | |
24558 | else if ((code == EQ || code == NE) | |
24559 | && unsigned_reg_p (op0) | |
24560 | && (unsigned_reg_p (op1) | |
24561 | || (CONST_INT_P (op1) && INTVAL (op1) != 0))) | |
24562 | /* These are unsigned values, perhaps there will be a later | |
24563 | ordering compare that can be shared with this one. */ | |
24564 | comp_mode = CCUNSmode; | |
24565 | else | |
24566 | comp_mode = CCmode; | |
24567 | ||
24568 | /* If we have an unsigned compare, make sure we don't have a signed value as | |
24569 | an immediate. */ | |
24570 | if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT | |
24571 | && INTVAL (op1) < 0) | |
24572 | { | |
24573 | op0 = copy_rtx_if_shared (op0); | |
24574 | op1 = force_reg (GET_MODE (op0), op1); | |
24575 | cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1); | |
24576 | } | |
24577 | ||
24578 | /* First, the compare. */ | |
24579 | compare_result = gen_reg_rtx (comp_mode); | |
24580 | ||
24581 | /* E500 FP compare instructions on the GPRs. Yuck! */ | |
24582 | if ((!TARGET_FPRS && TARGET_HARD_FLOAT) | |
24583 | && FLOAT_MODE_P (mode)) | |
24584 | { | |
24585 | rtx cmp, or_result, compare_result2; | |
24586 | machine_mode op_mode = GET_MODE (op0); | |
24587 | bool reverse_p; | |
24588 | ||
24589 | if (op_mode == VOIDmode) | |
24590 | op_mode = GET_MODE (op1); | |
24591 | ||
24592 | /* First reverse the condition codes that aren't directly supported. */ | |
24593 | switch (code) | |
24594 | { | |
24595 | case NE: | |
24596 | case UNLT: | |
24597 | case UNLE: | |
24598 | case UNGT: | |
24599 | case UNGE: | |
24600 | code = reverse_condition_maybe_unordered (code); | |
24601 | reverse_p = true; | |
24602 | break; | |
24603 | ||
24604 | case EQ: | |
24605 | case LT: | |
24606 | case LE: | |
24607 | case GT: | |
24608 | case GE: | |
24609 | reverse_p = false; | |
24610 | break; | |
24611 | ||
24612 | default: | |
24613 | gcc_unreachable (); | |
24614 | } | |
24615 | ||
24616 | /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only. | |
24617 | This explains the following mess. */ | |
24618 | ||
24619 | switch (code) | |
24620 | { | |
24621 | case EQ: | |
24622 | switch (op_mode) | |
24623 | { | |
916ace94 | 24624 | case E_SFmode: |
01e91138 | 24625 | cmp = (flag_finite_math_only && !flag_trapping_math) |
24626 | ? gen_tstsfeq_gpr (compare_result, op0, op1) | |
24627 | : gen_cmpsfeq_gpr (compare_result, op0, op1); | |
24628 | break; | |
24629 | ||
916ace94 | 24630 | case E_DFmode: |
01e91138 | 24631 | cmp = (flag_finite_math_only && !flag_trapping_math) |
24632 | ? gen_tstdfeq_gpr (compare_result, op0, op1) | |
24633 | : gen_cmpdfeq_gpr (compare_result, op0, op1); | |
24634 | break; | |
24635 | ||
916ace94 | 24636 | case E_TFmode: |
24637 | case E_IFmode: | |
24638 | case E_KFmode: | |
01e91138 | 24639 | cmp = (flag_finite_math_only && !flag_trapping_math) |
24640 | ? gen_tsttfeq_gpr (compare_result, op0, op1) | |
24641 | : gen_cmptfeq_gpr (compare_result, op0, op1); | |
24642 | break; | |
24643 | ||
24644 | default: | |
24645 | gcc_unreachable (); | |
24646 | } | |
24647 | break; | |
24648 | ||
24649 | case GT: | |
24650 | case GE: | |
24651 | switch (op_mode) | |
24652 | { | |
916ace94 | 24653 | case E_SFmode: |
01e91138 | 24654 | cmp = (flag_finite_math_only && !flag_trapping_math) |
24655 | ? gen_tstsfgt_gpr (compare_result, op0, op1) | |
24656 | : gen_cmpsfgt_gpr (compare_result, op0, op1); | |
24657 | break; | |
24658 | ||
916ace94 | 24659 | case E_DFmode: |
01e91138 | 24660 | cmp = (flag_finite_math_only && !flag_trapping_math) |
24661 | ? gen_tstdfgt_gpr (compare_result, op0, op1) | |
24662 | : gen_cmpdfgt_gpr (compare_result, op0, op1); | |
24663 | break; | |
24664 | ||
916ace94 | 24665 | case E_TFmode: |
24666 | case E_IFmode: | |
24667 | case E_KFmode: | |
01e91138 | 24668 | cmp = (flag_finite_math_only && !flag_trapping_math) |
24669 | ? gen_tsttfgt_gpr (compare_result, op0, op1) | |
24670 | : gen_cmptfgt_gpr (compare_result, op0, op1); | |
24671 | break; | |
24672 | ||
24673 | default: | |
24674 | gcc_unreachable (); | |
24675 | } | |
24676 | break; | |
24677 | ||
24678 | case LT: | |
24679 | case LE: | |
24680 | switch (op_mode) | |
24681 | { | |
916ace94 | 24682 | case E_SFmode: |
01e91138 | 24683 | cmp = (flag_finite_math_only && !flag_trapping_math) |
24684 | ? gen_tstsflt_gpr (compare_result, op0, op1) | |
24685 | : gen_cmpsflt_gpr (compare_result, op0, op1); | |
24686 | break; | |
24687 | ||
916ace94 | 24688 | case E_DFmode: |
01e91138 | 24689 | cmp = (flag_finite_math_only && !flag_trapping_math) |
24690 | ? gen_tstdflt_gpr (compare_result, op0, op1) | |
24691 | : gen_cmpdflt_gpr (compare_result, op0, op1); | |
24692 | break; | |
24693 | ||
916ace94 | 24694 | case E_TFmode: |
24695 | case E_IFmode: | |
24696 | case E_KFmode: | |
01e91138 | 24697 | cmp = (flag_finite_math_only && !flag_trapping_math) |
24698 | ? gen_tsttflt_gpr (compare_result, op0, op1) | |
24699 | : gen_cmptflt_gpr (compare_result, op0, op1); | |
24700 | break; | |
24701 | ||
24702 | default: | |
24703 | gcc_unreachable (); | |
24704 | } | |
24705 | break; | |
24706 | ||
24707 | default: | |
24708 | gcc_unreachable (); | |
24709 | } | |
24710 | ||
24711 | /* Synthesize LE and GE from LT/GT || EQ. */ | |
24712 | if (code == LE || code == GE) | |
24713 | { | |
24714 | emit_insn (cmp); | |
24715 | ||
24716 | compare_result2 = gen_reg_rtx (CCFPmode); | |
24717 | ||
24718 | /* Do the EQ. */ | |
24719 | switch (op_mode) | |
24720 | { | |
916ace94 | 24721 | case E_SFmode: |
01e91138 | 24722 | cmp = (flag_finite_math_only && !flag_trapping_math) |
24723 | ? gen_tstsfeq_gpr (compare_result2, op0, op1) | |
24724 | : gen_cmpsfeq_gpr (compare_result2, op0, op1); | |
24725 | break; | |
24726 | ||
916ace94 | 24727 | case E_DFmode: |
01e91138 | 24728 | cmp = (flag_finite_math_only && !flag_trapping_math) |
24729 | ? gen_tstdfeq_gpr (compare_result2, op0, op1) | |
24730 | : gen_cmpdfeq_gpr (compare_result2, op0, op1); | |
24731 | break; | |
24732 | ||
916ace94 | 24733 | case E_TFmode: |
24734 | case E_IFmode: | |
24735 | case E_KFmode: | |
01e91138 | 24736 | cmp = (flag_finite_math_only && !flag_trapping_math) |
24737 | ? gen_tsttfeq_gpr (compare_result2, op0, op1) | |
24738 | : gen_cmptfeq_gpr (compare_result2, op0, op1); | |
24739 | break; | |
24740 | ||
24741 | default: | |
24742 | gcc_unreachable (); | |
24743 | } | |
24744 | ||
24745 | emit_insn (cmp); | |
24746 | ||
24747 | /* OR them together. */ | |
24748 | or_result = gen_reg_rtx (CCFPmode); | |
24749 | cmp = gen_e500_cr_ior_compare (or_result, compare_result, | |
24750 | compare_result2); | |
24751 | compare_result = or_result; | |
24752 | } | |
24753 | ||
24754 | code = reverse_p ? NE : EQ; | |
24755 | ||
24756 | emit_insn (cmp); | |
24757 | } | |
24758 | ||
24759 | /* IEEE 128-bit support in VSX registers when we do not have hardware | |
24760 | support. */ | |
24761 | else if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode)) | |
24762 | { | |
24763 | rtx libfunc = NULL_RTX; | |
24764 | bool check_nan = false; | |
24765 | rtx dest; | |
24766 | ||
24767 | switch (code) | |
24768 | { | |
24769 | case EQ: | |
24770 | case NE: | |
24771 | libfunc = optab_libfunc (eq_optab, mode); | |
24772 | break; | |
24773 | ||
24774 | case GT: | |
24775 | case GE: | |
24776 | libfunc = optab_libfunc (ge_optab, mode); | |
24777 | break; | |
24778 | ||
24779 | case LT: | |
24780 | case LE: | |
24781 | libfunc = optab_libfunc (le_optab, mode); | |
24782 | break; | |
24783 | ||
24784 | case UNORDERED: | |
24785 | case ORDERED: | |
24786 | libfunc = optab_libfunc (unord_optab, mode); | |
24787 | code = (code == UNORDERED) ? NE : EQ; | |
24788 | break; | |
24789 | ||
24790 | case UNGE: | |
24791 | case UNGT: | |
24792 | check_nan = true; | |
24793 | libfunc = optab_libfunc (ge_optab, mode); | |
24794 | code = (code == UNGE) ? GE : GT; | |
24795 | break; | |
24796 | ||
24797 | case UNLE: | |
24798 | case UNLT: | |
24799 | check_nan = true; | |
24800 | libfunc = optab_libfunc (le_optab, mode); | |
24801 | code = (code == UNLE) ? LE : LT; | |
24802 | break; | |
24803 | ||
24804 | case UNEQ: | |
24805 | case LTGT: | |
24806 | check_nan = true; | |
24807 | libfunc = optab_libfunc (eq_optab, mode); | |
24808 | code = (code = UNEQ) ? EQ : NE; | |
24809 | break; | |
24810 | ||
24811 | default: | |
24812 | gcc_unreachable (); | |
24813 | } | |
24814 | ||
24815 | gcc_assert (libfunc); | |
24816 | ||
24817 | if (!check_nan) | |
24818 | dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST, | |
9e9e5c15 | 24819 | SImode, op0, mode, op1, mode); |
01e91138 | 24820 | |
24821 | /* The library signals an exception for signalling NaNs, so we need to | |
24822 | handle isgreater, etc. by first checking isordered. */ | |
24823 | else | |
24824 | { | |
24825 | rtx ne_rtx, normal_dest, unord_dest; | |
24826 | rtx unord_func = optab_libfunc (unord_optab, mode); | |
24827 | rtx join_label = gen_label_rtx (); | |
24828 | rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label); | |
24829 | rtx unord_cmp = gen_reg_rtx (comp_mode); | |
24830 | ||
24831 | ||
24832 | /* Test for either value being a NaN. */ | |
24833 | gcc_assert (unord_func); | |
24834 | unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST, | |
9e9e5c15 | 24835 | SImode, op0, mode, op1, mode); |
01e91138 | 24836 | |
24837 | /* Set value (0) if either value is a NaN, and jump to the join | |
24838 | label. */ | |
24839 | dest = gen_reg_rtx (SImode); | |
24840 | emit_move_insn (dest, const1_rtx); | |
24841 | emit_insn (gen_rtx_SET (unord_cmp, | |
24842 | gen_rtx_COMPARE (comp_mode, unord_dest, | |
24843 | const0_rtx))); | |
24844 | ||
24845 | ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx); | |
24846 | emit_jump_insn (gen_rtx_SET (pc_rtx, | |
24847 | gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx, | |
24848 | join_ref, | |
24849 | pc_rtx))); | |
24850 | ||
24851 | /* Do the normal comparison, knowing that the values are not | |
24852 | NaNs. */ | |
24853 | normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST, | |
9e9e5c15 | 24854 | SImode, op0, mode, op1, mode); |
01e91138 | 24855 | |
24856 | emit_insn (gen_cstoresi4 (dest, | |
24857 | gen_rtx_fmt_ee (code, SImode, normal_dest, | |
24858 | const0_rtx), | |
24859 | normal_dest, const0_rtx)); | |
24860 | ||
24861 | /* Join NaN and non-Nan paths. Compare dest against 0. */ | |
24862 | emit_label (join_label); | |
24863 | code = NE; | |
24864 | } | |
24865 | ||
24866 | emit_insn (gen_rtx_SET (compare_result, | |
24867 | gen_rtx_COMPARE (comp_mode, dest, const0_rtx))); | |
24868 | } | |
24869 | ||
24870 | else | |
24871 | { | |
24872 | /* Generate XLC-compatible TFmode compare as PARALLEL with extra | |
24873 | CLOBBERs to match cmptf_internal2 pattern. */ | |
24874 | if (comp_mode == CCFPmode && TARGET_XL_COMPAT | |
24875 | && FLOAT128_IBM_P (GET_MODE (op0)) | |
24876 | && TARGET_HARD_FLOAT && TARGET_FPRS) | |
24877 | emit_insn (gen_rtx_PARALLEL (VOIDmode, | |
24878 | gen_rtvec (10, | |
24879 | gen_rtx_SET (compare_result, | |
24880 | gen_rtx_COMPARE (comp_mode, op0, op1)), | |
24881 | gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), | |
24882 | gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), | |
24883 | gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), | |
24884 | gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), | |
24885 | gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), | |
24886 | gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), | |
24887 | gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), | |
24888 | gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), | |
24889 | gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode))))); | |
24890 | else if (GET_CODE (op1) == UNSPEC | |
24891 | && XINT (op1, 1) == UNSPEC_SP_TEST) | |
24892 | { | |
24893 | rtx op1b = XVECEXP (op1, 0, 0); | |
24894 | comp_mode = CCEQmode; | |
24895 | compare_result = gen_reg_rtx (CCEQmode); | |
24896 | if (TARGET_64BIT) | |
24897 | emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b)); | |
24898 | else | |
24899 | emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b)); | |
24900 | } | |
24901 | else | |
24902 | emit_insn (gen_rtx_SET (compare_result, | |
24903 | gen_rtx_COMPARE (comp_mode, op0, op1))); | |
24904 | } | |
24905 | ||
24906 | /* Some kinds of FP comparisons need an OR operation; | |
24907 | under flag_finite_math_only we don't bother. */ | |
24908 | if (FLOAT_MODE_P (mode) | |
24909 | && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW) | |
24910 | && !flag_finite_math_only | |
24911 | && !(TARGET_HARD_FLOAT && !TARGET_FPRS) | |
24912 | && (code == LE || code == GE | |
24913 | || code == UNEQ || code == LTGT | |
24914 | || code == UNGT || code == UNLT)) | |
24915 | { | |
24916 | enum rtx_code or1, or2; | |
24917 | rtx or1_rtx, or2_rtx, compare2_rtx; | |
24918 | rtx or_result = gen_reg_rtx (CCEQmode); | |
24919 | ||
24920 | switch (code) | |
24921 | { | |
24922 | case LE: or1 = LT; or2 = EQ; break; | |
24923 | case GE: or1 = GT; or2 = EQ; break; | |
24924 | case UNEQ: or1 = UNORDERED; or2 = EQ; break; | |
24925 | case LTGT: or1 = LT; or2 = GT; break; | |
24926 | case UNGT: or1 = UNORDERED; or2 = GT; break; | |
24927 | case UNLT: or1 = UNORDERED; or2 = LT; break; | |
24928 | default: gcc_unreachable (); | |
24929 | } | |
24930 | validate_condition_mode (or1, comp_mode); | |
24931 | validate_condition_mode (or2, comp_mode); | |
24932 | or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx); | |
24933 | or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx); | |
24934 | compare2_rtx = gen_rtx_COMPARE (CCEQmode, | |
24935 | gen_rtx_IOR (SImode, or1_rtx, or2_rtx), | |
24936 | const_true_rtx); | |
24937 | emit_insn (gen_rtx_SET (or_result, compare2_rtx)); | |
24938 | ||
24939 | compare_result = or_result; | |
24940 | code = EQ; | |
24941 | } | |
24942 | ||
24943 | validate_condition_mode (code, GET_MODE (compare_result)); | |
24944 | ||
24945 | return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx); | |
24946 | } | |
24947 | ||
24948 | \f | |
24949 | /* Return the diagnostic message string if the binary operation OP is | |
24950 | not permitted on TYPE1 and TYPE2, NULL otherwise. */ | |
24951 | ||
24952 | static const char* | |
24953 | rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED, | |
24954 | const_tree type1, | |
24955 | const_tree type2) | |
24956 | { | |
582adad1 | 24957 | machine_mode mode1 = TYPE_MODE (type1); |
24958 | machine_mode mode2 = TYPE_MODE (type2); | |
01e91138 | 24959 | |
24960 | /* For complex modes, use the inner type. */ | |
24961 | if (COMPLEX_MODE_P (mode1)) | |
24962 | mode1 = GET_MODE_INNER (mode1); | |
24963 | ||
24964 | if (COMPLEX_MODE_P (mode2)) | |
24965 | mode2 = GET_MODE_INNER (mode2); | |
24966 | ||
24967 | /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended | |
24968 | double to intermix unless -mfloat128-convert. */ | |
24969 | if (mode1 == mode2) | |
24970 | return NULL; | |
24971 | ||
24972 | if (!TARGET_FLOAT128_CVT) | |
24973 | { | |
24974 | if ((mode1 == KFmode && mode2 == IFmode) | |
24975 | || (mode1 == IFmode && mode2 == KFmode)) | |
24976 | return N_("__float128 and __ibm128 cannot be used in the same " | |
24977 | "expression"); | |
24978 | ||
24979 | if (TARGET_IEEEQUAD | |
24980 | && ((mode1 == IFmode && mode2 == TFmode) | |
24981 | || (mode1 == TFmode && mode2 == IFmode))) | |
24982 | return N_("__ibm128 and long double cannot be used in the same " | |
24983 | "expression"); | |
24984 | ||
24985 | if (!TARGET_IEEEQUAD | |
24986 | && ((mode1 == KFmode && mode2 == TFmode) | |
24987 | || (mode1 == TFmode && mode2 == KFmode))) | |
24988 | return N_("__float128 and long double cannot be used in the same " | |
24989 | "expression"); | |
24990 | } | |
24991 | ||
24992 | return NULL; | |
24993 | } | |
24994 | ||
24995 | \f | |
24996 | /* Expand floating point conversion to/from __float128 and __ibm128. */ | |
24997 | ||
24998 | void | |
24999 | rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p) | |
25000 | { | |
25001 | machine_mode dest_mode = GET_MODE (dest); | |
25002 | machine_mode src_mode = GET_MODE (src); | |
25003 | convert_optab cvt = unknown_optab; | |
25004 | bool do_move = false; | |
25005 | rtx libfunc = NULL_RTX; | |
25006 | rtx dest2; | |
25007 | typedef rtx (*rtx_2func_t) (rtx, rtx); | |
25008 | rtx_2func_t hw_convert = (rtx_2func_t)0; | |
25009 | size_t kf_or_tf; | |
25010 | ||
25011 | struct hw_conv_t { | |
25012 | rtx_2func_t from_df; | |
25013 | rtx_2func_t from_sf; | |
25014 | rtx_2func_t from_si_sign; | |
25015 | rtx_2func_t from_si_uns; | |
25016 | rtx_2func_t from_di_sign; | |
25017 | rtx_2func_t from_di_uns; | |
25018 | rtx_2func_t to_df; | |
25019 | rtx_2func_t to_sf; | |
25020 | rtx_2func_t to_si_sign; | |
25021 | rtx_2func_t to_si_uns; | |
25022 | rtx_2func_t to_di_sign; | |
25023 | rtx_2func_t to_di_uns; | |
25024 | } hw_conversions[2] = { | |
25025 | /* convertions to/from KFmode */ | |
25026 | { | |
25027 | gen_extenddfkf2_hw, /* KFmode <- DFmode. */ | |
25028 | gen_extendsfkf2_hw, /* KFmode <- SFmode. */ | |
25029 | gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */ | |
25030 | gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */ | |
25031 | gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */ | |
25032 | gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */ | |
25033 | gen_trunckfdf2_hw, /* DFmode <- KFmode. */ | |
25034 | gen_trunckfsf2_hw, /* SFmode <- KFmode. */ | |
25035 | gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */ | |
25036 | gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */ | |
25037 | gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */ | |
25038 | gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */ | |
25039 | }, | |
25040 | ||
25041 | /* convertions to/from TFmode */ | |
25042 | { | |
25043 | gen_extenddftf2_hw, /* TFmode <- DFmode. */ | |
25044 | gen_extendsftf2_hw, /* TFmode <- SFmode. */ | |
25045 | gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */ | |
25046 | gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */ | |
25047 | gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */ | |
25048 | gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */ | |
25049 | gen_trunctfdf2_hw, /* DFmode <- TFmode. */ | |
25050 | gen_trunctfsf2_hw, /* SFmode <- TFmode. */ | |
25051 | gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */ | |
25052 | gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */ | |
25053 | gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */ | |
25054 | gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */ | |
25055 | }, | |
25056 | }; | |
25057 | ||
25058 | if (dest_mode == src_mode) | |
25059 | gcc_unreachable (); | |
25060 | ||
25061 | /* Eliminate memory operations. */ | |
25062 | if (MEM_P (src)) | |
25063 | src = force_reg (src_mode, src); | |
25064 | ||
25065 | if (MEM_P (dest)) | |
25066 | { | |
25067 | rtx tmp = gen_reg_rtx (dest_mode); | |
25068 | rs6000_expand_float128_convert (tmp, src, unsigned_p); | |
25069 | rs6000_emit_move (dest, tmp, dest_mode); | |
25070 | return; | |
25071 | } | |
25072 | ||
25073 | /* Convert to IEEE 128-bit floating point. */ | |
25074 | if (FLOAT128_IEEE_P (dest_mode)) | |
25075 | { | |
25076 | if (dest_mode == KFmode) | |
25077 | kf_or_tf = 0; | |
25078 | else if (dest_mode == TFmode) | |
25079 | kf_or_tf = 1; | |
25080 | else | |
25081 | gcc_unreachable (); | |
25082 | ||
25083 | switch (src_mode) | |
25084 | { | |
916ace94 | 25085 | case E_DFmode: |
01e91138 | 25086 | cvt = sext_optab; |
25087 | hw_convert = hw_conversions[kf_or_tf].from_df; | |
25088 | break; | |
25089 | ||
916ace94 | 25090 | case E_SFmode: |
01e91138 | 25091 | cvt = sext_optab; |
25092 | hw_convert = hw_conversions[kf_or_tf].from_sf; | |
25093 | break; | |
25094 | ||
916ace94 | 25095 | case E_KFmode: |
25096 | case E_IFmode: | |
25097 | case E_TFmode: | |
01e91138 | 25098 | if (FLOAT128_IBM_P (src_mode)) |
25099 | cvt = sext_optab; | |
25100 | else | |
25101 | do_move = true; | |
25102 | break; | |
25103 | ||
916ace94 | 25104 | case E_SImode: |
01e91138 | 25105 | if (unsigned_p) |
25106 | { | |
25107 | cvt = ufloat_optab; | |
25108 | hw_convert = hw_conversions[kf_or_tf].from_si_uns; | |
25109 | } | |
25110 | else | |
25111 | { | |
25112 | cvt = sfloat_optab; | |
25113 | hw_convert = hw_conversions[kf_or_tf].from_si_sign; | |
25114 | } | |
25115 | break; | |
25116 | ||
916ace94 | 25117 | case E_DImode: |
01e91138 | 25118 | if (unsigned_p) |
25119 | { | |
25120 | cvt = ufloat_optab; | |
25121 | hw_convert = hw_conversions[kf_or_tf].from_di_uns; | |
25122 | } | |
25123 | else | |
25124 | { | |
25125 | cvt = sfloat_optab; | |
25126 | hw_convert = hw_conversions[kf_or_tf].from_di_sign; | |
25127 | } | |
25128 | break; | |
25129 | ||
25130 | default: | |
25131 | gcc_unreachable (); | |
25132 | } | |
25133 | } | |
25134 | ||
25135 | /* Convert from IEEE 128-bit floating point. */ | |
25136 | else if (FLOAT128_IEEE_P (src_mode)) | |
25137 | { | |
25138 | if (src_mode == KFmode) | |
25139 | kf_or_tf = 0; | |
25140 | else if (src_mode == TFmode) | |
25141 | kf_or_tf = 1; | |
25142 | else | |
25143 | gcc_unreachable (); | |
25144 | ||
25145 | switch (dest_mode) | |
25146 | { | |
916ace94 | 25147 | case E_DFmode: |
01e91138 | 25148 | cvt = trunc_optab; |
25149 | hw_convert = hw_conversions[kf_or_tf].to_df; | |
25150 | break; | |
25151 | ||
916ace94 | 25152 | case E_SFmode: |
01e91138 | 25153 | cvt = trunc_optab; |
25154 | hw_convert = hw_conversions[kf_or_tf].to_sf; | |
25155 | break; | |
25156 | ||
916ace94 | 25157 | case E_KFmode: |
25158 | case E_IFmode: | |
25159 | case E_TFmode: | |
01e91138 | 25160 | if (FLOAT128_IBM_P (dest_mode)) |
25161 | cvt = trunc_optab; | |
25162 | else | |
25163 | do_move = true; | |
25164 | break; | |
25165 | ||
916ace94 | 25166 | case E_SImode: |
01e91138 | 25167 | if (unsigned_p) |
25168 | { | |
25169 | cvt = ufix_optab; | |
25170 | hw_convert = hw_conversions[kf_or_tf].to_si_uns; | |
25171 | } | |
25172 | else | |
25173 | { | |
25174 | cvt = sfix_optab; | |
25175 | hw_convert = hw_conversions[kf_or_tf].to_si_sign; | |
25176 | } | |
25177 | break; | |
25178 | ||
916ace94 | 25179 | case E_DImode: |
01e91138 | 25180 | if (unsigned_p) |
25181 | { | |
25182 | cvt = ufix_optab; | |
25183 | hw_convert = hw_conversions[kf_or_tf].to_di_uns; | |
25184 | } | |
25185 | else | |
25186 | { | |
25187 | cvt = sfix_optab; | |
25188 | hw_convert = hw_conversions[kf_or_tf].to_di_sign; | |
25189 | } | |
25190 | break; | |
25191 | ||
25192 | default: | |
25193 | gcc_unreachable (); | |
25194 | } | |
25195 | } | |
25196 | ||
25197 | /* Both IBM format. */ | |
25198 | else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode)) | |
25199 | do_move = true; | |
25200 | ||
25201 | else | |
25202 | gcc_unreachable (); | |
25203 | ||
25204 | /* Handle conversion between TFmode/KFmode. */ | |
25205 | if (do_move) | |
25206 | emit_move_insn (dest, gen_lowpart (dest_mode, src)); | |
25207 | ||
25208 | /* Handle conversion if we have hardware support. */ | |
25209 | else if (TARGET_FLOAT128_HW && hw_convert) | |
25210 | emit_insn ((hw_convert) (dest, src)); | |
25211 | ||
25212 | /* Call an external function to do the conversion. */ | |
25213 | else if (cvt != unknown_optab) | |
25214 | { | |
25215 | libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode); | |
25216 | gcc_assert (libfunc != NULL_RTX); | |
25217 | ||
9e9e5c15 | 25218 | dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode, |
25219 | src, src_mode); | |
01e91138 | 25220 | |
25221 | gcc_assert (dest2 != NULL_RTX); | |
25222 | if (!rtx_equal_p (dest, dest2)) | |
25223 | emit_move_insn (dest, dest2); | |
25224 | } | |
25225 | ||
25226 | else | |
25227 | gcc_unreachable (); | |
25228 | ||
25229 | return; | |
25230 | } | |
25231 | ||
25232 | \f | |
25233 | /* Emit the RTL for an sISEL pattern. */ | |
25234 | ||
25235 | void | |
25236 | rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED, rtx operands[]) | |
25237 | { | |
25238 | rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx); | |
25239 | } | |
25240 | ||
25241 | /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH | |
25242 | can be used as that dest register. Return the dest register. */ | |
25243 | ||
25244 | rtx | |
25245 | rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch) | |
25246 | { | |
25247 | if (op2 == const0_rtx) | |
25248 | return op1; | |
25249 | ||
25250 | if (GET_CODE (scratch) == SCRATCH) | |
25251 | scratch = gen_reg_rtx (mode); | |
25252 | ||
25253 | if (logical_operand (op2, mode)) | |
25254 | emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2))); | |
25255 | else | |
25256 | emit_insn (gen_rtx_SET (scratch, | |
25257 | gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2)))); | |
25258 | ||
25259 | return scratch; | |
25260 | } | |
25261 | ||
25262 | void | |
25263 | rs6000_emit_sCOND (machine_mode mode, rtx operands[]) | |
25264 | { | |
25265 | rtx condition_rtx; | |
25266 | machine_mode op_mode; | |
25267 | enum rtx_code cond_code; | |
25268 | rtx result = operands[0]; | |
25269 | ||
25270 | condition_rtx = rs6000_generate_compare (operands[1], mode); | |
25271 | cond_code = GET_CODE (condition_rtx); | |
25272 | ||
25273 | if (FLOAT_MODE_P (mode) | |
25274 | && !TARGET_FPRS && TARGET_HARD_FLOAT) | |
25275 | { | |
25276 | rtx t; | |
25277 | ||
25278 | PUT_MODE (condition_rtx, SImode); | |
25279 | t = XEXP (condition_rtx, 0); | |
25280 | ||
25281 | gcc_assert (cond_code == NE || cond_code == EQ); | |
25282 | ||
25283 | if (cond_code == NE) | |
25284 | emit_insn (gen_e500_flip_gt_bit (t, t)); | |
25285 | ||
25286 | emit_insn (gen_move_from_CR_gt_bit (result, t)); | |
25287 | return; | |
25288 | } | |
25289 | ||
25290 | if (cond_code == NE | |
25291 | || cond_code == GE || cond_code == LE | |
25292 | || cond_code == GEU || cond_code == LEU | |
25293 | || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE) | |
25294 | { | |
25295 | rtx not_result = gen_reg_rtx (CCEQmode); | |
25296 | rtx not_op, rev_cond_rtx; | |
25297 | machine_mode cc_mode; | |
25298 | ||
25299 | cc_mode = GET_MODE (XEXP (condition_rtx, 0)); | |
25300 | ||
25301 | rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code), | |
25302 | SImode, XEXP (condition_rtx, 0), const0_rtx); | |
25303 | not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx); | |
25304 | emit_insn (gen_rtx_SET (not_result, not_op)); | |
25305 | condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx); | |
25306 | } | |
25307 | ||
25308 | op_mode = GET_MODE (XEXP (operands[1], 0)); | |
25309 | if (op_mode == VOIDmode) | |
25310 | op_mode = GET_MODE (XEXP (operands[1], 1)); | |
25311 | ||
25312 | if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode))) | |
25313 | { | |
25314 | PUT_MODE (condition_rtx, DImode); | |
25315 | convert_move (result, condition_rtx, 0); | |
25316 | } | |
25317 | else | |
25318 | { | |
25319 | PUT_MODE (condition_rtx, SImode); | |
25320 | emit_insn (gen_rtx_SET (result, condition_rtx)); | |
25321 | } | |
25322 | } | |
25323 | ||
25324 | /* Emit a branch of kind CODE to location LOC. */ | |
25325 | ||
25326 | void | |
25327 | rs6000_emit_cbranch (machine_mode mode, rtx operands[]) | |
25328 | { | |
25329 | rtx condition_rtx, loc_ref; | |
25330 | ||
25331 | condition_rtx = rs6000_generate_compare (operands[0], mode); | |
25332 | loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]); | |
25333 | emit_jump_insn (gen_rtx_SET (pc_rtx, | |
25334 | gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx, | |
25335 | loc_ref, pc_rtx))); | |
25336 | } | |
25337 | ||
25338 | /* Return the string to output a conditional branch to LABEL, which is | |
25339 | the operand template of the label, or NULL if the branch is really a | |
25340 | conditional return. | |
25341 | ||
25342 | OP is the conditional expression. XEXP (OP, 0) is assumed to be a | |
25343 | condition code register and its mode specifies what kind of | |
25344 | comparison we made. | |
25345 | ||
25346 | REVERSED is nonzero if we should reverse the sense of the comparison. | |
25347 | ||
25348 | INSN is the insn. */ | |
25349 | ||
25350 | char * | |
25351 | output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn) | |
25352 | { | |
25353 | static char string[64]; | |
25354 | enum rtx_code code = GET_CODE (op); | |
25355 | rtx cc_reg = XEXP (op, 0); | |
25356 | machine_mode mode = GET_MODE (cc_reg); | |
25357 | int cc_regno = REGNO (cc_reg) - CR0_REGNO; | |
25358 | int need_longbranch = label != NULL && get_attr_length (insn) == 8; | |
25359 | int really_reversed = reversed ^ need_longbranch; | |
25360 | char *s = string; | |
25361 | const char *ccode; | |
25362 | const char *pred; | |
25363 | rtx note; | |
25364 | ||
25365 | validate_condition_mode (code, mode); | |
25366 | ||
25367 | /* Work out which way this really branches. We could use | |
25368 | reverse_condition_maybe_unordered here always but this | |
25369 | makes the resulting assembler clearer. */ | |
25370 | if (really_reversed) | |
25371 | { | |
25372 | /* Reversal of FP compares takes care -- an ordered compare | |
25373 | becomes an unordered compare and vice versa. */ | |
25374 | if (mode == CCFPmode) | |
25375 | code = reverse_condition_maybe_unordered (code); | |
25376 | else | |
25377 | code = reverse_condition (code); | |
25378 | } | |
25379 | ||
25380 | if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode) | |
25381 | { | |
25382 | /* The efscmp/tst* instructions twiddle bit 2, which maps nicely | |
25383 | to the GT bit. */ | |
25384 | switch (code) | |
25385 | { | |
25386 | case EQ: | |
25387 | /* Opposite of GT. */ | |
25388 | code = GT; | |
25389 | break; | |
25390 | ||
25391 | case NE: | |
25392 | code = UNLE; | |
25393 | break; | |
25394 | ||
25395 | default: | |
25396 | gcc_unreachable (); | |
25397 | } | |
25398 | } | |
25399 | ||
25400 | switch (code) | |
25401 | { | |
25402 | /* Not all of these are actually distinct opcodes, but | |
25403 | we distinguish them for clarity of the resulting assembler. */ | |
25404 | case NE: case LTGT: | |
25405 | ccode = "ne"; break; | |
25406 | case EQ: case UNEQ: | |
25407 | ccode = "eq"; break; | |
25408 | case GE: case GEU: | |
25409 | ccode = "ge"; break; | |
25410 | case GT: case GTU: case UNGT: | |
25411 | ccode = "gt"; break; | |
25412 | case LE: case LEU: | |
25413 | ccode = "le"; break; | |
25414 | case LT: case LTU: case UNLT: | |
25415 | ccode = "lt"; break; | |
25416 | case UNORDERED: ccode = "un"; break; | |
25417 | case ORDERED: ccode = "nu"; break; | |
25418 | case UNGE: ccode = "nl"; break; | |
25419 | case UNLE: ccode = "ng"; break; | |
25420 | default: | |
25421 | gcc_unreachable (); | |
25422 | } | |
25423 | ||
25424 | /* Maybe we have a guess as to how likely the branch is. */ | |
25425 | pred = ""; | |
25426 | note = find_reg_note (insn, REG_BR_PROB, NULL_RTX); | |
25427 | if (note != NULL_RTX) | |
25428 | { | |
25429 | /* PROB is the difference from 50%. */ | |
61cb1816 | 25430 | int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0)) |
25431 | .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2; | |
01e91138 | 25432 | |
25433 | /* Only hint for highly probable/improbable branches on newer cpus when | |
25434 | we have real profile data, as static prediction overrides processor | |
25435 | dynamic prediction. For older cpus we may as well always hint, but | |
25436 | assume not taken for branches that are very close to 50% as a | |
25437 | mispredicted taken branch is more expensive than a | |
25438 | mispredicted not-taken branch. */ | |
25439 | if (rs6000_always_hint | |
25440 | || (abs (prob) > REG_BR_PROB_BASE / 100 * 48 | |
25441 | && (profile_status_for_fn (cfun) != PROFILE_GUESSED) | |
25442 | && br_prob_note_reliable_p (note))) | |
25443 | { | |
25444 | if (abs (prob) > REG_BR_PROB_BASE / 20 | |
25445 | && ((prob > 0) ^ need_longbranch)) | |
25446 | pred = "+"; | |
25447 | else | |
25448 | pred = "-"; | |
25449 | } | |
25450 | } | |
25451 | ||
25452 | if (label == NULL) | |
25453 | s += sprintf (s, "b%slr%s ", ccode, pred); | |
25454 | else | |
25455 | s += sprintf (s, "b%s%s ", ccode, pred); | |
25456 | ||
25457 | /* We need to escape any '%' characters in the reg_names string. | |
25458 | Assume they'd only be the first character.... */ | |
25459 | if (reg_names[cc_regno + CR0_REGNO][0] == '%') | |
25460 | *s++ = '%'; | |
25461 | s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]); | |
25462 | ||
25463 | if (label != NULL) | |
25464 | { | |
25465 | /* If the branch distance was too far, we may have to use an | |
25466 | unconditional branch to go the distance. */ | |
25467 | if (need_longbranch) | |
25468 | s += sprintf (s, ",$+8\n\tb %s", label); | |
25469 | else | |
25470 | s += sprintf (s, ",%s", label); | |
25471 | } | |
25472 | ||
25473 | return string; | |
25474 | } | |
25475 | ||
25476 | /* Return the string to flip the GT bit on a CR. */ | |
25477 | char * | |
25478 | output_e500_flip_gt_bit (rtx dst, rtx src) | |
25479 | { | |
25480 | static char string[64]; | |
25481 | int a, b; | |
25482 | ||
25483 | gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst)) | |
25484 | && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src))); | |
25485 | ||
25486 | /* GT bit. */ | |
25487 | a = 4 * (REGNO (dst) - CR0_REGNO) + 1; | |
25488 | b = 4 * (REGNO (src) - CR0_REGNO) + 1; | |
25489 | ||
25490 | sprintf (string, "crnot %d,%d", a, b); | |
25491 | return string; | |
25492 | } | |
25493 | ||
25494 | /* Return insn for VSX or Altivec comparisons. */ | |
25495 | ||
25496 | static rtx | |
25497 | rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1) | |
25498 | { | |
25499 | rtx mask; | |
25500 | machine_mode mode = GET_MODE (op0); | |
25501 | ||
25502 | switch (code) | |
25503 | { | |
25504 | default: | |
25505 | break; | |
25506 | ||
25507 | case GE: | |
25508 | if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) | |
25509 | return NULL_RTX; | |
25510 | /* FALLTHRU */ | |
25511 | ||
25512 | case EQ: | |
25513 | case GT: | |
25514 | case GTU: | |
25515 | case ORDERED: | |
25516 | case UNORDERED: | |
25517 | case UNEQ: | |
25518 | case LTGT: | |
25519 | mask = gen_reg_rtx (mode); | |
25520 | emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1))); | |
25521 | return mask; | |
25522 | } | |
25523 | ||
25524 | return NULL_RTX; | |
25525 | } | |
25526 | ||
25527 | /* Emit vector compare for operands OP0 and OP1 using code RCODE. | |
25528 | DMODE is expected destination mode. This is a recursive function. */ | |
25529 | ||
25530 | static rtx | |
25531 | rs6000_emit_vector_compare (enum rtx_code rcode, | |
25532 | rtx op0, rtx op1, | |
25533 | machine_mode dmode) | |
25534 | { | |
25535 | rtx mask; | |
25536 | bool swap_operands = false; | |
25537 | bool try_again = false; | |
25538 | ||
25539 | gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode)); | |
25540 | gcc_assert (GET_MODE (op0) == GET_MODE (op1)); | |
25541 | ||
25542 | /* See if the comparison works as is. */ | |
25543 | mask = rs6000_emit_vector_compare_inner (rcode, op0, op1); | |
25544 | if (mask) | |
25545 | return mask; | |
25546 | ||
25547 | switch (rcode) | |
25548 | { | |
25549 | case LT: | |
25550 | rcode = GT; | |
25551 | swap_operands = true; | |
25552 | try_again = true; | |
25553 | break; | |
25554 | case LTU: | |
25555 | rcode = GTU; | |
25556 | swap_operands = true; | |
25557 | try_again = true; | |
25558 | break; | |
25559 | case NE: | |
25560 | case UNLE: | |
25561 | case UNLT: | |
25562 | case UNGE: | |
25563 | case UNGT: | |
25564 | /* Invert condition and try again. | |
25565 | e.g., A != B becomes ~(A==B). */ | |
25566 | { | |
25567 | enum rtx_code rev_code; | |
25568 | enum insn_code nor_code; | |
25569 | rtx mask2; | |
25570 | ||
25571 | rev_code = reverse_condition_maybe_unordered (rcode); | |
25572 | if (rev_code == UNKNOWN) | |
25573 | return NULL_RTX; | |
25574 | ||
25575 | nor_code = optab_handler (one_cmpl_optab, dmode); | |
25576 | if (nor_code == CODE_FOR_nothing) | |
25577 | return NULL_RTX; | |
25578 | ||
25579 | mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode); | |
25580 | if (!mask2) | |
25581 | return NULL_RTX; | |
25582 | ||
25583 | mask = gen_reg_rtx (dmode); | |
25584 | emit_insn (GEN_FCN (nor_code) (mask, mask2)); | |
25585 | return mask; | |
25586 | } | |
25587 | break; | |
25588 | case GE: | |
25589 | case GEU: | |
25590 | case LE: | |
25591 | case LEU: | |
25592 | /* Try GT/GTU/LT/LTU OR EQ */ | |
25593 | { | |
25594 | rtx c_rtx, eq_rtx; | |
25595 | enum insn_code ior_code; | |
25596 | enum rtx_code new_code; | |
25597 | ||
25598 | switch (rcode) | |
25599 | { | |
25600 | case GE: | |
25601 | new_code = GT; | |
25602 | break; | |
25603 | ||
25604 | case GEU: | |
25605 | new_code = GTU; | |
25606 | break; | |
25607 | ||
25608 | case LE: | |
25609 | new_code = LT; | |
25610 | break; | |
25611 | ||
25612 | case LEU: | |
25613 | new_code = LTU; | |
25614 | break; | |
25615 | ||
25616 | default: | |
25617 | gcc_unreachable (); | |
25618 | } | |
25619 | ||
25620 | ior_code = optab_handler (ior_optab, dmode); | |
25621 | if (ior_code == CODE_FOR_nothing) | |
25622 | return NULL_RTX; | |
25623 | ||
25624 | c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode); | |
25625 | if (!c_rtx) | |
25626 | return NULL_RTX; | |
25627 | ||
25628 | eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode); | |
25629 | if (!eq_rtx) | |
25630 | return NULL_RTX; | |
25631 | ||
25632 | mask = gen_reg_rtx (dmode); | |
25633 | emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx)); | |
25634 | return mask; | |
25635 | } | |
25636 | break; | |
25637 | default: | |
25638 | return NULL_RTX; | |
25639 | } | |
25640 | ||
25641 | if (try_again) | |
25642 | { | |
25643 | if (swap_operands) | |
25644 | std::swap (op0, op1); | |
25645 | ||
25646 | mask = rs6000_emit_vector_compare_inner (rcode, op0, op1); | |
25647 | if (mask) | |
25648 | return mask; | |
25649 | } | |
25650 | ||
25651 | /* You only get two chances. */ | |
25652 | return NULL_RTX; | |
25653 | } | |
25654 | ||
25655 | /* Emit vector conditional expression. DEST is destination. OP_TRUE and | |
25656 | OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two | |
25657 | operands for the relation operation COND. */ | |
25658 | ||
25659 | int | |
25660 | rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false, | |
25661 | rtx cond, rtx cc_op0, rtx cc_op1) | |
25662 | { | |
25663 | machine_mode dest_mode = GET_MODE (dest); | |
25664 | machine_mode mask_mode = GET_MODE (cc_op0); | |
25665 | enum rtx_code rcode = GET_CODE (cond); | |
25666 | machine_mode cc_mode = CCmode; | |
25667 | rtx mask; | |
25668 | rtx cond2; | |
25669 | bool invert_move = false; | |
25670 | ||
25671 | if (VECTOR_UNIT_NONE_P (dest_mode)) | |
25672 | return 0; | |
25673 | ||
25674 | gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode) | |
25675 | && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode)); | |
25676 | ||
25677 | switch (rcode) | |
25678 | { | |
25679 | /* Swap operands if we can, and fall back to doing the operation as | |
25680 | specified, and doing a NOR to invert the test. */ | |
25681 | case NE: | |
25682 | case UNLE: | |
25683 | case UNLT: | |
25684 | case UNGE: | |
25685 | case UNGT: | |
25686 | /* Invert condition and try again. | |
25687 | e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */ | |
25688 | invert_move = true; | |
25689 | rcode = reverse_condition_maybe_unordered (rcode); | |
25690 | if (rcode == UNKNOWN) | |
25691 | return 0; | |
25692 | break; | |
25693 | ||
25694 | case GE: | |
25695 | case LE: | |
25696 | if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT) | |
25697 | { | |
25698 | /* Invert condition to avoid compound test. */ | |
25699 | invert_move = true; | |
25700 | rcode = reverse_condition (rcode); | |
25701 | } | |
25702 | break; | |
25703 | ||
25704 | case GTU: | |
25705 | case GEU: | |
25706 | case LTU: | |
25707 | case LEU: | |
25708 | /* Mark unsigned tests with CCUNSmode. */ | |
25709 | cc_mode = CCUNSmode; | |
25710 | ||
25711 | /* Invert condition to avoid compound test if necessary. */ | |
25712 | if (rcode == GEU || rcode == LEU) | |
25713 | { | |
25714 | invert_move = true; | |
25715 | rcode = reverse_condition (rcode); | |
25716 | } | |
25717 | break; | |
25718 | ||
25719 | default: | |
25720 | break; | |
25721 | } | |
25722 | ||
25723 | /* Get the vector mask for the given relational operations. */ | |
25724 | mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode); | |
25725 | ||
25726 | if (!mask) | |
25727 | return 0; | |
25728 | ||
25729 | if (invert_move) | |
25730 | std::swap (op_true, op_false); | |
25731 | ||
25732 | /* Optimize vec1 == vec2, to know the mask generates -1/0. */ | |
25733 | if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT | |
25734 | && (GET_CODE (op_true) == CONST_VECTOR | |
25735 | || GET_CODE (op_false) == CONST_VECTOR)) | |
25736 | { | |
25737 | rtx constant_0 = CONST0_RTX (dest_mode); | |
25738 | rtx constant_m1 = CONSTM1_RTX (dest_mode); | |
25739 | ||
25740 | if (op_true == constant_m1 && op_false == constant_0) | |
25741 | { | |
25742 | emit_move_insn (dest, mask); | |
25743 | return 1; | |
25744 | } | |
25745 | ||
25746 | else if (op_true == constant_0 && op_false == constant_m1) | |
25747 | { | |
25748 | emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask))); | |
25749 | return 1; | |
25750 | } | |
25751 | ||
25752 | /* If we can't use the vector comparison directly, perhaps we can use | |
25753 | the mask for the true or false fields, instead of loading up a | |
25754 | constant. */ | |
25755 | if (op_true == constant_m1) | |
25756 | op_true = mask; | |
25757 | ||
25758 | if (op_false == constant_0) | |
25759 | op_false = mask; | |
25760 | } | |
25761 | ||
25762 | if (!REG_P (op_true) && !SUBREG_P (op_true)) | |
25763 | op_true = force_reg (dest_mode, op_true); | |
25764 | ||
25765 | if (!REG_P (op_false) && !SUBREG_P (op_false)) | |
25766 | op_false = force_reg (dest_mode, op_false); | |
25767 | ||
25768 | cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask), | |
25769 | CONST0_RTX (dest_mode)); | |
25770 | emit_insn (gen_rtx_SET (dest, | |
25771 | gen_rtx_IF_THEN_ELSE (dest_mode, | |
25772 | cond2, | |
25773 | op_true, | |
25774 | op_false))); | |
25775 | return 1; | |
25776 | } | |
25777 | ||
25778 | /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction | |
25779 | for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last | |
25780 | comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the | |
25781 | hardware has no such operation. */ | |
25782 | ||
25783 | static int | |
25784 | rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond) | |
25785 | { | |
25786 | enum rtx_code code = GET_CODE (op); | |
25787 | rtx op0 = XEXP (op, 0); | |
25788 | rtx op1 = XEXP (op, 1); | |
25789 | machine_mode compare_mode = GET_MODE (op0); | |
25790 | machine_mode result_mode = GET_MODE (dest); | |
25791 | bool max_p = false; | |
25792 | ||
25793 | if (result_mode != compare_mode) | |
25794 | return 0; | |
25795 | ||
25796 | if (code == GE || code == GT) | |
25797 | max_p = true; | |
25798 | else if (code == LE || code == LT) | |
25799 | max_p = false; | |
25800 | else | |
25801 | return 0; | |
25802 | ||
25803 | if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond)) | |
25804 | ; | |
25805 | ||
25806 | else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond)) | |
25807 | max_p = !max_p; | |
25808 | ||
25809 | else | |
25810 | return 0; | |
25811 | ||
25812 | rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1); | |
25813 | return 1; | |
25814 | } | |
25815 | ||
25816 | /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and | |
25817 | XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the | |
25818 | operands of the last comparison is nonzero/true, FALSE_COND if it is | |
25819 | zero/false. Return 0 if the hardware has no such operation. */ | |
25820 | ||
25821 | static int | |
25822 | rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond) | |
25823 | { | |
25824 | enum rtx_code code = GET_CODE (op); | |
25825 | rtx op0 = XEXP (op, 0); | |
25826 | rtx op1 = XEXP (op, 1); | |
25827 | machine_mode result_mode = GET_MODE (dest); | |
25828 | rtx compare_rtx; | |
25829 | rtx cmove_rtx; | |
25830 | rtx clobber_rtx; | |
25831 | ||
25832 | if (!can_create_pseudo_p ()) | |
25833 | return 0; | |
25834 | ||
25835 | switch (code) | |
25836 | { | |
25837 | case EQ: | |
25838 | case GE: | |
25839 | case GT: | |
25840 | break; | |
25841 | ||
25842 | case NE: | |
25843 | case LT: | |
25844 | case LE: | |
25845 | code = swap_condition (code); | |
25846 | std::swap (op0, op1); | |
25847 | break; | |
25848 | ||
25849 | default: | |
25850 | return 0; | |
25851 | } | |
25852 | ||
25853 | /* Generate: [(parallel [(set (dest) | |
25854 | (if_then_else (op (cmp1) (cmp2)) | |
25855 | (true) | |
25856 | (false))) | |
25857 | (clobber (scratch))])]. */ | |
25858 | ||
25859 | compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1); | |
25860 | cmove_rtx = gen_rtx_SET (dest, | |
25861 | gen_rtx_IF_THEN_ELSE (result_mode, | |
25862 | compare_rtx, | |
25863 | true_cond, | |
25864 | false_cond)); | |
25865 | ||
25866 | clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode)); | |
25867 | emit_insn (gen_rtx_PARALLEL (VOIDmode, | |
25868 | gen_rtvec (2, cmove_rtx, clobber_rtx))); | |
25869 | ||
25870 | return 1; | |
25871 | } | |
25872 | ||
25873 | /* Emit a conditional move: move TRUE_COND to DEST if OP of the | |
25874 | operands of the last comparison is nonzero/true, FALSE_COND if it | |
25875 | is zero/false. Return 0 if the hardware has no such operation. */ | |
25876 | ||
25877 | int | |
25878 | rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond) | |
25879 | { | |
25880 | enum rtx_code code = GET_CODE (op); | |
25881 | rtx op0 = XEXP (op, 0); | |
25882 | rtx op1 = XEXP (op, 1); | |
25883 | machine_mode compare_mode = GET_MODE (op0); | |
25884 | machine_mode result_mode = GET_MODE (dest); | |
25885 | rtx temp; | |
25886 | bool is_against_zero; | |
25887 | ||
25888 | /* These modes should always match. */ | |
25889 | if (GET_MODE (op1) != compare_mode | |
25890 | /* In the isel case however, we can use a compare immediate, so | |
25891 | op1 may be a small constant. */ | |
25892 | && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode))) | |
25893 | return 0; | |
25894 | if (GET_MODE (true_cond) != result_mode) | |
25895 | return 0; | |
25896 | if (GET_MODE (false_cond) != result_mode) | |
25897 | return 0; | |
25898 | ||
25899 | /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */ | |
25900 | if (TARGET_P9_MINMAX | |
25901 | && (compare_mode == SFmode || compare_mode == DFmode) | |
25902 | && (result_mode == SFmode || result_mode == DFmode)) | |
25903 | { | |
25904 | if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond)) | |
25905 | return 1; | |
25906 | ||
25907 | if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond)) | |
25908 | return 1; | |
25909 | } | |
25910 | ||
25911 | /* Don't allow using floating point comparisons for integer results for | |
25912 | now. */ | |
25913 | if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode)) | |
25914 | return 0; | |
25915 | ||
25916 | /* First, work out if the hardware can do this at all, or | |
25917 | if it's too slow.... */ | |
25918 | if (!FLOAT_MODE_P (compare_mode)) | |
25919 | { | |
25920 | if (TARGET_ISEL) | |
25921 | return rs6000_emit_int_cmove (dest, op, true_cond, false_cond); | |
25922 | return 0; | |
25923 | } | |
25924 | else if (TARGET_HARD_FLOAT && !TARGET_FPRS | |
25925 | && SCALAR_FLOAT_MODE_P (compare_mode)) | |
25926 | return 0; | |
25927 | ||
25928 | is_against_zero = op1 == CONST0_RTX (compare_mode); | |
25929 | ||
25930 | /* A floating-point subtract might overflow, underflow, or produce | |
25931 | an inexact result, thus changing the floating-point flags, so it | |
25932 | can't be generated if we care about that. It's safe if one side | |
25933 | of the construct is zero, since then no subtract will be | |
25934 | generated. */ | |
25935 | if (SCALAR_FLOAT_MODE_P (compare_mode) | |
25936 | && flag_trapping_math && ! is_against_zero) | |
25937 | return 0; | |
25938 | ||
25939 | /* Eliminate half of the comparisons by switching operands, this | |
25940 | makes the remaining code simpler. */ | |
25941 | if (code == UNLT || code == UNGT || code == UNORDERED || code == NE | |
25942 | || code == LTGT || code == LT || code == UNLE) | |
25943 | { | |
25944 | code = reverse_condition_maybe_unordered (code); | |
25945 | temp = true_cond; | |
25946 | true_cond = false_cond; | |
25947 | false_cond = temp; | |
25948 | } | |
25949 | ||
25950 | /* UNEQ and LTGT take four instructions for a comparison with zero, | |
25951 | it'll probably be faster to use a branch here too. */ | |
25952 | if (code == UNEQ && HONOR_NANS (compare_mode)) | |
25953 | return 0; | |
25954 | ||
25955 | /* We're going to try to implement comparisons by performing | |
25956 | a subtract, then comparing against zero. Unfortunately, | |
25957 | Inf - Inf is NaN which is not zero, and so if we don't | |
25958 | know that the operand is finite and the comparison | |
25959 | would treat EQ different to UNORDERED, we can't do it. */ | |
25960 | if (HONOR_INFINITIES (compare_mode) | |
25961 | && code != GT && code != UNGE | |
25962 | && (GET_CODE (op1) != CONST_DOUBLE | |
25963 | || real_isinf (CONST_DOUBLE_REAL_VALUE (op1))) | |
25964 | /* Constructs of the form (a OP b ? a : b) are safe. */ | |
25965 | && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond)) | |
25966 | || (! rtx_equal_p (op0, true_cond) | |
25967 | && ! rtx_equal_p (op1, true_cond)))) | |
25968 | return 0; | |
25969 | ||
25970 | /* At this point we know we can use fsel. */ | |
25971 | ||
25972 | /* Reduce the comparison to a comparison against zero. */ | |
25973 | if (! is_against_zero) | |
25974 | { | |
25975 | temp = gen_reg_rtx (compare_mode); | |
25976 | emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1))); | |
25977 | op0 = temp; | |
25978 | op1 = CONST0_RTX (compare_mode); | |
25979 | } | |
25980 | ||
25981 | /* If we don't care about NaNs we can reduce some of the comparisons | |
25982 | down to faster ones. */ | |
25983 | if (! HONOR_NANS (compare_mode)) | |
25984 | switch (code) | |
25985 | { | |
25986 | case GT: | |
25987 | code = LE; | |
25988 | temp = true_cond; | |
25989 | true_cond = false_cond; | |
25990 | false_cond = temp; | |
25991 | break; | |
25992 | case UNGE: | |
25993 | code = GE; | |
25994 | break; | |
25995 | case UNEQ: | |
25996 | code = EQ; | |
25997 | break; | |
25998 | default: | |
25999 | break; | |
26000 | } | |
26001 | ||
26002 | /* Now, reduce everything down to a GE. */ | |
26003 | switch (code) | |
26004 | { | |
26005 | case GE: | |
26006 | break; | |
26007 | ||
26008 | case LE: | |
26009 | temp = gen_reg_rtx (compare_mode); | |
26010 | emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0))); | |
26011 | op0 = temp; | |
26012 | break; | |
26013 | ||
26014 | case ORDERED: | |
26015 | temp = gen_reg_rtx (compare_mode); | |
26016 | emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0))); | |
26017 | op0 = temp; | |
26018 | break; | |
26019 | ||
26020 | case EQ: | |
26021 | temp = gen_reg_rtx (compare_mode); | |
26022 | emit_insn (gen_rtx_SET (temp, | |
26023 | gen_rtx_NEG (compare_mode, | |
26024 | gen_rtx_ABS (compare_mode, op0)))); | |
26025 | op0 = temp; | |
26026 | break; | |
26027 | ||
26028 | case UNGE: | |
26029 | /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */ | |
26030 | temp = gen_reg_rtx (result_mode); | |
26031 | emit_insn (gen_rtx_SET (temp, | |
26032 | gen_rtx_IF_THEN_ELSE (result_mode, | |
26033 | gen_rtx_GE (VOIDmode, | |
26034 | op0, op1), | |
26035 | true_cond, false_cond))); | |
26036 | false_cond = true_cond; | |
26037 | true_cond = temp; | |
26038 | ||
26039 | temp = gen_reg_rtx (compare_mode); | |
26040 | emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0))); | |
26041 | op0 = temp; | |
26042 | break; | |
26043 | ||
26044 | case GT: | |
26045 | /* a GT 0 <-> (a GE 0 && -a UNLT 0) */ | |
26046 | temp = gen_reg_rtx (result_mode); | |
26047 | emit_insn (gen_rtx_SET (temp, | |
26048 | gen_rtx_IF_THEN_ELSE (result_mode, | |
26049 | gen_rtx_GE (VOIDmode, | |
26050 | op0, op1), | |
26051 | true_cond, false_cond))); | |
26052 | true_cond = false_cond; | |
26053 | false_cond = temp; | |
26054 | ||
26055 | temp = gen_reg_rtx (compare_mode); | |
26056 | emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0))); | |
26057 | op0 = temp; | |
26058 | break; | |
26059 | ||
26060 | default: | |
26061 | gcc_unreachable (); | |
26062 | } | |
26063 | ||
26064 | emit_insn (gen_rtx_SET (dest, | |
26065 | gen_rtx_IF_THEN_ELSE (result_mode, | |
26066 | gen_rtx_GE (VOIDmode, | |
26067 | op0, op1), | |
26068 | true_cond, false_cond))); | |
26069 | return 1; | |
26070 | } | |
26071 | ||
26072 | /* Same as above, but for ints (isel). */ | |
26073 | ||
26074 | static int | |
26075 | rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond) | |
26076 | { | |
26077 | rtx condition_rtx, cr; | |
26078 | machine_mode mode = GET_MODE (dest); | |
26079 | enum rtx_code cond_code; | |
26080 | rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx); | |
26081 | bool signedp; | |
26082 | ||
26083 | if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode)) | |
26084 | return 0; | |
26085 | ||
26086 | /* We still have to do the compare, because isel doesn't do a | |
26087 | compare, it just looks at the CRx bits set by a previous compare | |
26088 | instruction. */ | |
26089 | condition_rtx = rs6000_generate_compare (op, mode); | |
26090 | cond_code = GET_CODE (condition_rtx); | |
26091 | cr = XEXP (condition_rtx, 0); | |
26092 | signedp = GET_MODE (cr) == CCmode; | |
26093 | ||
26094 | isel_func = (mode == SImode | |
26095 | ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si) | |
26096 | : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di)); | |
26097 | ||
26098 | switch (cond_code) | |
26099 | { | |
26100 | case LT: case GT: case LTU: case GTU: case EQ: | |
26101 | /* isel handles these directly. */ | |
26102 | break; | |
26103 | ||
26104 | default: | |
26105 | /* We need to swap the sense of the comparison. */ | |
26106 | { | |
26107 | std::swap (false_cond, true_cond); | |
26108 | PUT_CODE (condition_rtx, reverse_condition (cond_code)); | |
26109 | } | |
26110 | break; | |
26111 | } | |
26112 | ||
26113 | false_cond = force_reg (mode, false_cond); | |
26114 | if (true_cond != const0_rtx) | |
26115 | true_cond = force_reg (mode, true_cond); | |
26116 | ||
26117 | emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr)); | |
26118 | ||
26119 | return 1; | |
26120 | } | |
26121 | ||
26122 | const char * | |
26123 | output_isel (rtx *operands) | |
26124 | { | |
26125 | enum rtx_code code; | |
26126 | ||
26127 | code = GET_CODE (operands[1]); | |
26128 | ||
26129 | if (code == GE || code == GEU || code == LE || code == LEU || code == NE) | |
26130 | { | |
26131 | gcc_assert (GET_CODE (operands[2]) == REG | |
26132 | && GET_CODE (operands[3]) == REG); | |
26133 | PUT_CODE (operands[1], reverse_condition (code)); | |
26134 | return "isel %0,%3,%2,%j1"; | |
26135 | } | |
26136 | ||
26137 | return "isel %0,%2,%3,%j1"; | |
26138 | } | |
26139 | ||
26140 | void | |
26141 | rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1) | |
26142 | { | |
26143 | machine_mode mode = GET_MODE (op0); | |
26144 | enum rtx_code c; | |
26145 | rtx target; | |
26146 | ||
26147 | /* VSX/altivec have direct min/max insns. */ | |
26148 | if ((code == SMAX || code == SMIN) | |
26149 | && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode) | |
26150 | || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode)))) | |
26151 | { | |
26152 | emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1))); | |
26153 | return; | |
26154 | } | |
26155 | ||
26156 | if (code == SMAX || code == SMIN) | |
26157 | c = GE; | |
26158 | else | |
26159 | c = GEU; | |
26160 | ||
26161 | if (code == SMAX || code == UMAX) | |
26162 | target = emit_conditional_move (dest, c, op0, op1, mode, | |
26163 | op0, op1, mode, 0); | |
26164 | else | |
26165 | target = emit_conditional_move (dest, c, op0, op1, mode, | |
26166 | op1, op0, mode, 0); | |
26167 | gcc_assert (target); | |
26168 | if (target != dest) | |
26169 | emit_move_insn (dest, target); | |
26170 | } | |
26171 | ||
26172 | /* Split a signbit operation on 64-bit machines with direct move. Also allow | |
26173 | for the value to come from memory or if it is already loaded into a GPR. */ | |
26174 | ||
26175 | void | |
26176 | rs6000_split_signbit (rtx dest, rtx src) | |
26177 | { | |
26178 | machine_mode d_mode = GET_MODE (dest); | |
26179 | machine_mode s_mode = GET_MODE (src); | |
26180 | rtx dest_di = (d_mode == DImode) ? dest : gen_lowpart (DImode, dest); | |
26181 | rtx shift_reg = dest_di; | |
26182 | ||
26183 | gcc_assert (FLOAT128_IEEE_P (s_mode) && TARGET_POWERPC64); | |
26184 | ||
26185 | if (MEM_P (src)) | |
26186 | { | |
26187 | rtx mem = (WORDS_BIG_ENDIAN | |
26188 | ? adjust_address (src, DImode, 0) | |
26189 | : adjust_address (src, DImode, 8)); | |
26190 | emit_insn (gen_rtx_SET (dest_di, mem)); | |
26191 | } | |
26192 | ||
26193 | else | |
26194 | { | |
26195 | unsigned int r = reg_or_subregno (src); | |
26196 | ||
26197 | if (INT_REGNO_P (r)) | |
26198 | shift_reg = gen_rtx_REG (DImode, r + (BYTES_BIG_ENDIAN == 0)); | |
26199 | ||
26200 | else | |
26201 | { | |
26202 | /* Generate the special mfvsrd instruction to get it in a GPR. */ | |
26203 | gcc_assert (VSX_REGNO_P (r)); | |
26204 | if (s_mode == KFmode) | |
26205 | emit_insn (gen_signbitkf2_dm2 (dest_di, src)); | |
26206 | else | |
26207 | emit_insn (gen_signbittf2_dm2 (dest_di, src)); | |
26208 | } | |
26209 | } | |
26210 | ||
26211 | emit_insn (gen_lshrdi3 (dest_di, shift_reg, GEN_INT (63))); | |
26212 | return; | |
26213 | } | |
26214 | ||
26215 | /* A subroutine of the atomic operation splitters. Jump to LABEL if | |
26216 | COND is true. Mark the jump as unlikely to be taken. */ | |
26217 | ||
26218 | static void | |
26219 | emit_unlikely_jump (rtx cond, rtx label) | |
26220 | { | |
01e91138 | 26221 | rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx); |
26222 | rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x)); | |
61cb1816 | 26223 | add_reg_br_prob_note (insn, profile_probability::very_unlikely ()); |
01e91138 | 26224 | } |
26225 | ||
26226 | /* A subroutine of the atomic operation splitters. Emit a load-locked | |
26227 | instruction in MODE. For QI/HImode, possibly use a pattern than includes | |
26228 | the zero_extend operation. */ | |
26229 | ||
26230 | static void | |
26231 | emit_load_locked (machine_mode mode, rtx reg, rtx mem) | |
26232 | { | |
26233 | rtx (*fn) (rtx, rtx) = NULL; | |
26234 | ||
26235 | switch (mode) | |
26236 | { | |
916ace94 | 26237 | case E_QImode: |
01e91138 | 26238 | fn = gen_load_lockedqi; |
26239 | break; | |
916ace94 | 26240 | case E_HImode: |
01e91138 | 26241 | fn = gen_load_lockedhi; |
26242 | break; | |
916ace94 | 26243 | case E_SImode: |
01e91138 | 26244 | if (GET_MODE (mem) == QImode) |
26245 | fn = gen_load_lockedqi_si; | |
26246 | else if (GET_MODE (mem) == HImode) | |
26247 | fn = gen_load_lockedhi_si; | |
26248 | else | |
26249 | fn = gen_load_lockedsi; | |
26250 | break; | |
916ace94 | 26251 | case E_DImode: |
01e91138 | 26252 | fn = gen_load_lockeddi; |
26253 | break; | |
916ace94 | 26254 | case E_TImode: |
01e91138 | 26255 | fn = gen_load_lockedti; |
26256 | break; | |
26257 | default: | |
26258 | gcc_unreachable (); | |
26259 | } | |
26260 | emit_insn (fn (reg, mem)); | |
26261 | } | |
26262 | ||
26263 | /* A subroutine of the atomic operation splitters. Emit a store-conditional | |
26264 | instruction in MODE. */ | |
26265 | ||
26266 | static void | |
26267 | emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val) | |
26268 | { | |
26269 | rtx (*fn) (rtx, rtx, rtx) = NULL; | |
26270 | ||
26271 | switch (mode) | |
26272 | { | |
916ace94 | 26273 | case E_QImode: |
01e91138 | 26274 | fn = gen_store_conditionalqi; |
26275 | break; | |
916ace94 | 26276 | case E_HImode: |
01e91138 | 26277 | fn = gen_store_conditionalhi; |
26278 | break; | |
916ace94 | 26279 | case E_SImode: |
01e91138 | 26280 | fn = gen_store_conditionalsi; |
26281 | break; | |
916ace94 | 26282 | case E_DImode: |
01e91138 | 26283 | fn = gen_store_conditionaldi; |
26284 | break; | |
916ace94 | 26285 | case E_TImode: |
01e91138 | 26286 | fn = gen_store_conditionalti; |
26287 | break; | |
26288 | default: | |
26289 | gcc_unreachable (); | |
26290 | } | |
26291 | ||
26292 | /* Emit sync before stwcx. to address PPC405 Erratum. */ | |
26293 | if (PPC405_ERRATUM77) | |
26294 | emit_insn (gen_hwsync ()); | |
26295 | ||
26296 | emit_insn (fn (res, mem, val)); | |
26297 | } | |
26298 | ||
26299 | /* Expand barriers before and after a load_locked/store_cond sequence. */ | |
26300 | ||
26301 | static rtx | |
26302 | rs6000_pre_atomic_barrier (rtx mem, enum memmodel model) | |
26303 | { | |
26304 | rtx addr = XEXP (mem, 0); | |
26305 | int strict_p = (reload_in_progress || reload_completed); | |
26306 | ||
26307 | if (!legitimate_indirect_address_p (addr, strict_p) | |
26308 | && !legitimate_indexed_address_p (addr, strict_p)) | |
26309 | { | |
26310 | addr = force_reg (Pmode, addr); | |
26311 | mem = replace_equiv_address_nv (mem, addr); | |
26312 | } | |
26313 | ||
26314 | switch (model) | |
26315 | { | |
26316 | case MEMMODEL_RELAXED: | |
26317 | case MEMMODEL_CONSUME: | |
26318 | case MEMMODEL_ACQUIRE: | |
26319 | break; | |
26320 | case MEMMODEL_RELEASE: | |
26321 | case MEMMODEL_ACQ_REL: | |
26322 | emit_insn (gen_lwsync ()); | |
26323 | break; | |
26324 | case MEMMODEL_SEQ_CST: | |
26325 | emit_insn (gen_hwsync ()); | |
26326 | break; | |
26327 | default: | |
26328 | gcc_unreachable (); | |
26329 | } | |
26330 | return mem; | |
26331 | } | |
26332 | ||
26333 | static void | |
26334 | rs6000_post_atomic_barrier (enum memmodel model) | |
26335 | { | |
26336 | switch (model) | |
26337 | { | |
26338 | case MEMMODEL_RELAXED: | |
26339 | case MEMMODEL_CONSUME: | |
26340 | case MEMMODEL_RELEASE: | |
26341 | break; | |
26342 | case MEMMODEL_ACQUIRE: | |
26343 | case MEMMODEL_ACQ_REL: | |
26344 | case MEMMODEL_SEQ_CST: | |
26345 | emit_insn (gen_isync ()); | |
26346 | break; | |
26347 | default: | |
26348 | gcc_unreachable (); | |
26349 | } | |
26350 | } | |
26351 | ||
26352 | /* A subroutine of the various atomic expanders. For sub-word operations, | |
26353 | we must adjust things to operate on SImode. Given the original MEM, | |
26354 | return a new aligned memory. Also build and return the quantities by | |
26355 | which to shift and mask. */ | |
26356 | ||
26357 | static rtx | |
26358 | rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask) | |
26359 | { | |
26360 | rtx addr, align, shift, mask, mem; | |
26361 | HOST_WIDE_INT shift_mask; | |
26362 | machine_mode mode = GET_MODE (orig_mem); | |
26363 | ||
26364 | /* For smaller modes, we have to implement this via SImode. */ | |
26365 | shift_mask = (mode == QImode ? 0x18 : 0x10); | |
26366 | ||
26367 | addr = XEXP (orig_mem, 0); | |
26368 | addr = force_reg (GET_MODE (addr), addr); | |
26369 | ||
26370 | /* Aligned memory containing subword. Generate a new memory. We | |
26371 | do not want any of the existing MEM_ATTR data, as we're now | |
26372 | accessing memory outside the original object. */ | |
26373 | align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4), | |
26374 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
26375 | mem = gen_rtx_MEM (SImode, align); | |
26376 | MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem); | |
26377 | if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER) | |
26378 | set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER); | |
26379 | ||
26380 | /* Shift amount for subword relative to aligned word. */ | |
26381 | shift = gen_reg_rtx (SImode); | |
26382 | addr = gen_lowpart (SImode, addr); | |
26383 | rtx tmp = gen_reg_rtx (SImode); | |
26384 | emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3))); | |
26385 | emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask))); | |
26386 | if (BYTES_BIG_ENDIAN) | |
26387 | shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask), | |
26388 | shift, 1, OPTAB_LIB_WIDEN); | |
26389 | *pshift = shift; | |
26390 | ||
26391 | /* Mask for insertion. */ | |
26392 | mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)), | |
26393 | shift, NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
26394 | *pmask = mask; | |
26395 | ||
26396 | return mem; | |
26397 | } | |
26398 | ||
26399 | /* A subroutine of the various atomic expanders. For sub-word operands, | |
26400 | combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */ | |
26401 | ||
26402 | static rtx | |
26403 | rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask) | |
26404 | { | |
26405 | rtx x; | |
26406 | ||
26407 | x = gen_reg_rtx (SImode); | |
26408 | emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode, | |
26409 | gen_rtx_NOT (SImode, mask), | |
26410 | oldval))); | |
26411 | ||
26412 | x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN); | |
26413 | ||
26414 | return x; | |
26415 | } | |
26416 | ||
26417 | /* A subroutine of the various atomic expanders. For sub-word operands, | |
26418 | extract WIDE to NARROW via SHIFT. */ | |
26419 | ||
26420 | static void | |
26421 | rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift) | |
26422 | { | |
26423 | wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift, | |
26424 | wide, 1, OPTAB_LIB_WIDEN); | |
26425 | emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide)); | |
26426 | } | |
26427 | ||
26428 | /* Expand an atomic compare and swap operation. */ | |
26429 | ||
26430 | void | |
26431 | rs6000_expand_atomic_compare_and_swap (rtx operands[]) | |
26432 | { | |
26433 | rtx boolval, retval, mem, oldval, newval, cond; | |
26434 | rtx label1, label2, x, mask, shift; | |
26435 | machine_mode mode, orig_mode; | |
26436 | enum memmodel mod_s, mod_f; | |
26437 | bool is_weak; | |
26438 | ||
26439 | boolval = operands[0]; | |
26440 | retval = operands[1]; | |
26441 | mem = operands[2]; | |
26442 | oldval = operands[3]; | |
26443 | newval = operands[4]; | |
26444 | is_weak = (INTVAL (operands[5]) != 0); | |
26445 | mod_s = memmodel_base (INTVAL (operands[6])); | |
26446 | mod_f = memmodel_base (INTVAL (operands[7])); | |
26447 | orig_mode = mode = GET_MODE (mem); | |
26448 | ||
26449 | mask = shift = NULL_RTX; | |
26450 | if (mode == QImode || mode == HImode) | |
26451 | { | |
26452 | /* Before power8, we didn't have access to lbarx/lharx, so generate a | |
26453 | lwarx and shift/mask operations. With power8, we need to do the | |
26454 | comparison in SImode, but the store is still done in QI/HImode. */ | |
26455 | oldval = convert_modes (SImode, mode, oldval, 1); | |
26456 | ||
26457 | if (!TARGET_SYNC_HI_QI) | |
26458 | { | |
26459 | mem = rs6000_adjust_atomic_subword (mem, &shift, &mask); | |
26460 | ||
26461 | /* Shift and mask OLDVAL into position with the word. */ | |
26462 | oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift, | |
26463 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
26464 | ||
26465 | /* Shift and mask NEWVAL into position within the word. */ | |
26466 | newval = convert_modes (SImode, mode, newval, 1); | |
26467 | newval = expand_simple_binop (SImode, ASHIFT, newval, shift, | |
26468 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
26469 | } | |
26470 | ||
26471 | /* Prepare to adjust the return value. */ | |
26472 | retval = gen_reg_rtx (SImode); | |
26473 | mode = SImode; | |
26474 | } | |
26475 | else if (reg_overlap_mentioned_p (retval, oldval)) | |
26476 | oldval = copy_to_reg (oldval); | |
26477 | ||
26478 | if (mode != TImode && !reg_or_short_operand (oldval, mode)) | |
26479 | oldval = copy_to_mode_reg (mode, oldval); | |
26480 | ||
26481 | if (reg_overlap_mentioned_p (retval, newval)) | |
26482 | newval = copy_to_reg (newval); | |
26483 | ||
26484 | mem = rs6000_pre_atomic_barrier (mem, mod_s); | |
26485 | ||
26486 | label1 = NULL_RTX; | |
26487 | if (!is_weak) | |
26488 | { | |
26489 | label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ()); | |
26490 | emit_label (XEXP (label1, 0)); | |
26491 | } | |
26492 | label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ()); | |
26493 | ||
26494 | emit_load_locked (mode, retval, mem); | |
26495 | ||
26496 | x = retval; | |
26497 | if (mask) | |
26498 | x = expand_simple_binop (SImode, AND, retval, mask, | |
26499 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
26500 | ||
26501 | cond = gen_reg_rtx (CCmode); | |
26502 | /* If we have TImode, synthesize a comparison. */ | |
26503 | if (mode != TImode) | |
26504 | x = gen_rtx_COMPARE (CCmode, x, oldval); | |
26505 | else | |
26506 | { | |
26507 | rtx xor1_result = gen_reg_rtx (DImode); | |
26508 | rtx xor2_result = gen_reg_rtx (DImode); | |
26509 | rtx or_result = gen_reg_rtx (DImode); | |
26510 | rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0); | |
26511 | rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8); | |
26512 | rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0); | |
26513 | rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8); | |
26514 | ||
26515 | emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0)); | |
26516 | emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1)); | |
26517 | emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result)); | |
26518 | x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx); | |
26519 | } | |
26520 | ||
26521 | emit_insn (gen_rtx_SET (cond, x)); | |
26522 | ||
26523 | x = gen_rtx_NE (VOIDmode, cond, const0_rtx); | |
26524 | emit_unlikely_jump (x, label2); | |
26525 | ||
26526 | x = newval; | |
26527 | if (mask) | |
26528 | x = rs6000_mask_atomic_subword (retval, newval, mask); | |
26529 | ||
26530 | emit_store_conditional (orig_mode, cond, mem, x); | |
26531 | ||
26532 | if (!is_weak) | |
26533 | { | |
26534 | x = gen_rtx_NE (VOIDmode, cond, const0_rtx); | |
26535 | emit_unlikely_jump (x, label1); | |
26536 | } | |
26537 | ||
26538 | if (!is_mm_relaxed (mod_f)) | |
26539 | emit_label (XEXP (label2, 0)); | |
26540 | ||
26541 | rs6000_post_atomic_barrier (mod_s); | |
26542 | ||
26543 | if (is_mm_relaxed (mod_f)) | |
26544 | emit_label (XEXP (label2, 0)); | |
26545 | ||
26546 | if (shift) | |
26547 | rs6000_finish_atomic_subword (operands[1], retval, shift); | |
26548 | else if (mode != GET_MODE (operands[1])) | |
26549 | convert_move (operands[1], retval, 1); | |
26550 | ||
26551 | /* In all cases, CR0 contains EQ on success, and NE on failure. */ | |
26552 | x = gen_rtx_EQ (SImode, cond, const0_rtx); | |
26553 | emit_insn (gen_rtx_SET (boolval, x)); | |
26554 | } | |
26555 | ||
26556 | /* Expand an atomic exchange operation. */ | |
26557 | ||
26558 | void | |
26559 | rs6000_expand_atomic_exchange (rtx operands[]) | |
26560 | { | |
26561 | rtx retval, mem, val, cond; | |
26562 | machine_mode mode; | |
26563 | enum memmodel model; | |
26564 | rtx label, x, mask, shift; | |
26565 | ||
26566 | retval = operands[0]; | |
26567 | mem = operands[1]; | |
26568 | val = operands[2]; | |
26569 | model = memmodel_base (INTVAL (operands[3])); | |
26570 | mode = GET_MODE (mem); | |
26571 | ||
26572 | mask = shift = NULL_RTX; | |
26573 | if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode)) | |
26574 | { | |
26575 | mem = rs6000_adjust_atomic_subword (mem, &shift, &mask); | |
26576 | ||
26577 | /* Shift and mask VAL into position with the word. */ | |
26578 | val = convert_modes (SImode, mode, val, 1); | |
26579 | val = expand_simple_binop (SImode, ASHIFT, val, shift, | |
26580 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
26581 | ||
26582 | /* Prepare to adjust the return value. */ | |
26583 | retval = gen_reg_rtx (SImode); | |
26584 | mode = SImode; | |
26585 | } | |
26586 | ||
26587 | mem = rs6000_pre_atomic_barrier (mem, model); | |
26588 | ||
26589 | label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ()); | |
26590 | emit_label (XEXP (label, 0)); | |
26591 | ||
26592 | emit_load_locked (mode, retval, mem); | |
26593 | ||
26594 | x = val; | |
26595 | if (mask) | |
26596 | x = rs6000_mask_atomic_subword (retval, val, mask); | |
26597 | ||
26598 | cond = gen_reg_rtx (CCmode); | |
26599 | emit_store_conditional (mode, cond, mem, x); | |
26600 | ||
26601 | x = gen_rtx_NE (VOIDmode, cond, const0_rtx); | |
26602 | emit_unlikely_jump (x, label); | |
26603 | ||
26604 | rs6000_post_atomic_barrier (model); | |
26605 | ||
26606 | if (shift) | |
26607 | rs6000_finish_atomic_subword (operands[0], retval, shift); | |
26608 | } | |
26609 | ||
26610 | /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation | |
26611 | to perform. MEM is the memory on which to operate. VAL is the second | |
26612 | operand of the binary operator. BEFORE and AFTER are optional locations to | |
26613 | return the value of MEM either before of after the operation. MODEL_RTX | |
26614 | is a CONST_INT containing the memory model to use. */ | |
26615 | ||
26616 | void | |
26617 | rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val, | |
26618 | rtx orig_before, rtx orig_after, rtx model_rtx) | |
26619 | { | |
26620 | enum memmodel model = memmodel_base (INTVAL (model_rtx)); | |
26621 | machine_mode mode = GET_MODE (mem); | |
26622 | machine_mode store_mode = mode; | |
26623 | rtx label, x, cond, mask, shift; | |
26624 | rtx before = orig_before, after = orig_after; | |
26625 | ||
26626 | mask = shift = NULL_RTX; | |
26627 | /* On power8, we want to use SImode for the operation. On previous systems, | |
26628 | use the operation in a subword and shift/mask to get the proper byte or | |
26629 | halfword. */ | |
26630 | if (mode == QImode || mode == HImode) | |
26631 | { | |
26632 | if (TARGET_SYNC_HI_QI) | |
26633 | { | |
26634 | val = convert_modes (SImode, mode, val, 1); | |
26635 | ||
26636 | /* Prepare to adjust the return value. */ | |
26637 | before = gen_reg_rtx (SImode); | |
26638 | if (after) | |
26639 | after = gen_reg_rtx (SImode); | |
26640 | mode = SImode; | |
26641 | } | |
26642 | else | |
26643 | { | |
26644 | mem = rs6000_adjust_atomic_subword (mem, &shift, &mask); | |
26645 | ||
26646 | /* Shift and mask VAL into position with the word. */ | |
26647 | val = convert_modes (SImode, mode, val, 1); | |
26648 | val = expand_simple_binop (SImode, ASHIFT, val, shift, | |
26649 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
26650 | ||
26651 | switch (code) | |
26652 | { | |
26653 | case IOR: | |
26654 | case XOR: | |
26655 | /* We've already zero-extended VAL. That is sufficient to | |
26656 | make certain that it does not affect other bits. */ | |
26657 | mask = NULL; | |
26658 | break; | |
26659 | ||
26660 | case AND: | |
26661 | /* If we make certain that all of the other bits in VAL are | |
26662 | set, that will be sufficient to not affect other bits. */ | |
26663 | x = gen_rtx_NOT (SImode, mask); | |
26664 | x = gen_rtx_IOR (SImode, x, val); | |
26665 | emit_insn (gen_rtx_SET (val, x)); | |
26666 | mask = NULL; | |
26667 | break; | |
26668 | ||
26669 | case NOT: | |
26670 | case PLUS: | |
26671 | case MINUS: | |
26672 | /* These will all affect bits outside the field and need | |
26673 | adjustment via MASK within the loop. */ | |
26674 | break; | |
26675 | ||
26676 | default: | |
26677 | gcc_unreachable (); | |
26678 | } | |
26679 | ||
26680 | /* Prepare to adjust the return value. */ | |
26681 | before = gen_reg_rtx (SImode); | |
26682 | if (after) | |
26683 | after = gen_reg_rtx (SImode); | |
26684 | store_mode = mode = SImode; | |
26685 | } | |
26686 | } | |
26687 | ||
26688 | mem = rs6000_pre_atomic_barrier (mem, model); | |
26689 | ||
26690 | label = gen_label_rtx (); | |
26691 | emit_label (label); | |
26692 | label = gen_rtx_LABEL_REF (VOIDmode, label); | |
26693 | ||
26694 | if (before == NULL_RTX) | |
26695 | before = gen_reg_rtx (mode); | |
26696 | ||
26697 | emit_load_locked (mode, before, mem); | |
26698 | ||
26699 | if (code == NOT) | |
26700 | { | |
26701 | x = expand_simple_binop (mode, AND, before, val, | |
26702 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
26703 | after = expand_simple_unop (mode, NOT, x, after, 1); | |
26704 | } | |
26705 | else | |
26706 | { | |
26707 | after = expand_simple_binop (mode, code, before, val, | |
26708 | after, 1, OPTAB_LIB_WIDEN); | |
26709 | } | |
26710 | ||
26711 | x = after; | |
26712 | if (mask) | |
26713 | { | |
26714 | x = expand_simple_binop (SImode, AND, after, mask, | |
26715 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
26716 | x = rs6000_mask_atomic_subword (before, x, mask); | |
26717 | } | |
26718 | else if (store_mode != mode) | |
26719 | x = convert_modes (store_mode, mode, x, 1); | |
26720 | ||
26721 | cond = gen_reg_rtx (CCmode); | |
26722 | emit_store_conditional (store_mode, cond, mem, x); | |
26723 | ||
26724 | x = gen_rtx_NE (VOIDmode, cond, const0_rtx); | |
26725 | emit_unlikely_jump (x, label); | |
26726 | ||
26727 | rs6000_post_atomic_barrier (model); | |
26728 | ||
26729 | if (shift) | |
26730 | { | |
26731 | /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and | |
26732 | then do the calcuations in a SImode register. */ | |
26733 | if (orig_before) | |
26734 | rs6000_finish_atomic_subword (orig_before, before, shift); | |
26735 | if (orig_after) | |
26736 | rs6000_finish_atomic_subword (orig_after, after, shift); | |
26737 | } | |
26738 | else if (store_mode != mode) | |
26739 | { | |
26740 | /* QImode/HImode on machines with lbarx/lharx where we do the native | |
26741 | operation and then do the calcuations in a SImode register. */ | |
26742 | if (orig_before) | |
26743 | convert_move (orig_before, before, 1); | |
26744 | if (orig_after) | |
26745 | convert_move (orig_after, after, 1); | |
26746 | } | |
26747 | else if (orig_after && after != orig_after) | |
26748 | emit_move_insn (orig_after, after); | |
26749 | } | |
26750 | ||
26751 | /* Emit instructions to move SRC to DST. Called by splitters for | |
26752 | multi-register moves. It will emit at most one instruction for | |
26753 | each register that is accessed; that is, it won't emit li/lis pairs | |
26754 | (or equivalent for 64-bit code). One of SRC or DST must be a hard | |
26755 | register. */ | |
26756 | ||
26757 | void | |
26758 | rs6000_split_multireg_move (rtx dst, rtx src) | |
26759 | { | |
26760 | /* The register number of the first register being moved. */ | |
26761 | int reg; | |
26762 | /* The mode that is to be moved. */ | |
26763 | machine_mode mode; | |
26764 | /* The mode that the move is being done in, and its size. */ | |
26765 | machine_mode reg_mode; | |
26766 | int reg_mode_size; | |
26767 | /* The number of registers that will be moved. */ | |
26768 | int nregs; | |
26769 | ||
26770 | reg = REG_P (dst) ? REGNO (dst) : REGNO (src); | |
26771 | mode = GET_MODE (dst); | |
92d2aec3 | 26772 | nregs = hard_regno_nregs (reg, mode); |
01e91138 | 26773 | if (FP_REGNO_P (reg)) |
26774 | reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : | |
26775 | ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode); | |
26776 | else if (ALTIVEC_REGNO_P (reg)) | |
26777 | reg_mode = V16QImode; | |
26778 | else if (TARGET_E500_DOUBLE && FLOAT128_2REG_P (mode)) | |
26779 | reg_mode = DFmode; | |
26780 | else | |
26781 | reg_mode = word_mode; | |
26782 | reg_mode_size = GET_MODE_SIZE (reg_mode); | |
26783 | ||
26784 | gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode)); | |
26785 | ||
26786 | /* TDmode residing in FP registers is special, since the ISA requires that | |
26787 | the lower-numbered word of a register pair is always the most significant | |
26788 | word, even in little-endian mode. This does not match the usual subreg | |
26789 | semantics, so we cannnot use simplify_gen_subreg in those cases. Access | |
26790 | the appropriate constituent registers "by hand" in little-endian mode. | |
26791 | ||
26792 | Note we do not need to check for destructive overlap here since TDmode | |
26793 | can only reside in even/odd register pairs. */ | |
26794 | if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN) | |
26795 | { | |
26796 | rtx p_src, p_dst; | |
26797 | int i; | |
26798 | ||
26799 | for (i = 0; i < nregs; i++) | |
26800 | { | |
26801 | if (REG_P (src) && FP_REGNO_P (REGNO (src))) | |
26802 | p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i); | |
26803 | else | |
26804 | p_src = simplify_gen_subreg (reg_mode, src, mode, | |
26805 | i * reg_mode_size); | |
26806 | ||
26807 | if (REG_P (dst) && FP_REGNO_P (REGNO (dst))) | |
26808 | p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i); | |
26809 | else | |
26810 | p_dst = simplify_gen_subreg (reg_mode, dst, mode, | |
26811 | i * reg_mode_size); | |
26812 | ||
26813 | emit_insn (gen_rtx_SET (p_dst, p_src)); | |
26814 | } | |
26815 | ||
26816 | return; | |
26817 | } | |
26818 | ||
26819 | if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst))) | |
26820 | { | |
26821 | /* Move register range backwards, if we might have destructive | |
26822 | overlap. */ | |
26823 | int i; | |
26824 | for (i = nregs - 1; i >= 0; i--) | |
26825 | emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode, | |
26826 | i * reg_mode_size), | |
26827 | simplify_gen_subreg (reg_mode, src, mode, | |
26828 | i * reg_mode_size))); | |
26829 | } | |
26830 | else | |
26831 | { | |
26832 | int i; | |
26833 | int j = -1; | |
26834 | bool used_update = false; | |
26835 | rtx restore_basereg = NULL_RTX; | |
26836 | ||
26837 | if (MEM_P (src) && INT_REGNO_P (reg)) | |
26838 | { | |
26839 | rtx breg; | |
26840 | ||
26841 | if (GET_CODE (XEXP (src, 0)) == PRE_INC | |
26842 | || GET_CODE (XEXP (src, 0)) == PRE_DEC) | |
26843 | { | |
26844 | rtx delta_rtx; | |
26845 | breg = XEXP (XEXP (src, 0), 0); | |
26846 | delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC | |
26847 | ? GEN_INT (GET_MODE_SIZE (GET_MODE (src))) | |
26848 | : GEN_INT (-GET_MODE_SIZE (GET_MODE (src)))); | |
26849 | emit_insn (gen_add3_insn (breg, breg, delta_rtx)); | |
26850 | src = replace_equiv_address (src, breg); | |
26851 | } | |
26852 | else if (! rs6000_offsettable_memref_p (src, reg_mode)) | |
26853 | { | |
26854 | if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY) | |
26855 | { | |
26856 | rtx basereg = XEXP (XEXP (src, 0), 0); | |
26857 | if (TARGET_UPDATE) | |
26858 | { | |
26859 | rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0); | |
26860 | emit_insn (gen_rtx_SET (ndst, | |
26861 | gen_rtx_MEM (reg_mode, | |
26862 | XEXP (src, 0)))); | |
26863 | used_update = true; | |
26864 | } | |
26865 | else | |
26866 | emit_insn (gen_rtx_SET (basereg, | |
26867 | XEXP (XEXP (src, 0), 1))); | |
26868 | src = replace_equiv_address (src, basereg); | |
26869 | } | |
26870 | else | |
26871 | { | |
26872 | rtx basereg = gen_rtx_REG (Pmode, reg); | |
26873 | emit_insn (gen_rtx_SET (basereg, XEXP (src, 0))); | |
26874 | src = replace_equiv_address (src, basereg); | |
26875 | } | |
26876 | } | |
26877 | ||
26878 | breg = XEXP (src, 0); | |
26879 | if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM) | |
26880 | breg = XEXP (breg, 0); | |
26881 | ||
26882 | /* If the base register we are using to address memory is | |
26883 | also a destination reg, then change that register last. */ | |
26884 | if (REG_P (breg) | |
26885 | && REGNO (breg) >= REGNO (dst) | |
26886 | && REGNO (breg) < REGNO (dst) + nregs) | |
26887 | j = REGNO (breg) - REGNO (dst); | |
26888 | } | |
26889 | else if (MEM_P (dst) && INT_REGNO_P (reg)) | |
26890 | { | |
26891 | rtx breg; | |
26892 | ||
26893 | if (GET_CODE (XEXP (dst, 0)) == PRE_INC | |
26894 | || GET_CODE (XEXP (dst, 0)) == PRE_DEC) | |
26895 | { | |
26896 | rtx delta_rtx; | |
26897 | breg = XEXP (XEXP (dst, 0), 0); | |
26898 | delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC | |
26899 | ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst))) | |
26900 | : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst)))); | |
26901 | ||
26902 | /* We have to update the breg before doing the store. | |
26903 | Use store with update, if available. */ | |
26904 | ||
26905 | if (TARGET_UPDATE) | |
26906 | { | |
26907 | rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0); | |
26908 | emit_insn (TARGET_32BIT | |
26909 | ? (TARGET_POWERPC64 | |
26910 | ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc) | |
26911 | : gen_movsi_update (breg, breg, delta_rtx, nsrc)) | |
26912 | : gen_movdi_di_update (breg, breg, delta_rtx, nsrc)); | |
26913 | used_update = true; | |
26914 | } | |
26915 | else | |
26916 | emit_insn (gen_add3_insn (breg, breg, delta_rtx)); | |
26917 | dst = replace_equiv_address (dst, breg); | |
26918 | } | |
26919 | else if (!rs6000_offsettable_memref_p (dst, reg_mode) | |
26920 | && GET_CODE (XEXP (dst, 0)) != LO_SUM) | |
26921 | { | |
26922 | if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY) | |
26923 | { | |
26924 | rtx basereg = XEXP (XEXP (dst, 0), 0); | |
26925 | if (TARGET_UPDATE) | |
26926 | { | |
26927 | rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0); | |
26928 | emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode, | |
26929 | XEXP (dst, 0)), | |
26930 | nsrc)); | |
26931 | used_update = true; | |
26932 | } | |
26933 | else | |
26934 | emit_insn (gen_rtx_SET (basereg, | |
26935 | XEXP (XEXP (dst, 0), 1))); | |
26936 | dst = replace_equiv_address (dst, basereg); | |
26937 | } | |
26938 | else | |
26939 | { | |
26940 | rtx basereg = XEXP (XEXP (dst, 0), 0); | |
26941 | rtx offsetreg = XEXP (XEXP (dst, 0), 1); | |
26942 | gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS | |
26943 | && REG_P (basereg) | |
26944 | && REG_P (offsetreg) | |
26945 | && REGNO (basereg) != REGNO (offsetreg)); | |
26946 | if (REGNO (basereg) == 0) | |
26947 | { | |
26948 | rtx tmp = offsetreg; | |
26949 | offsetreg = basereg; | |
26950 | basereg = tmp; | |
26951 | } | |
26952 | emit_insn (gen_add3_insn (basereg, basereg, offsetreg)); | |
26953 | restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg); | |
26954 | dst = replace_equiv_address (dst, basereg); | |
26955 | } | |
26956 | } | |
26957 | else if (GET_CODE (XEXP (dst, 0)) != LO_SUM) | |
26958 | gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode)); | |
26959 | } | |
26960 | ||
26961 | for (i = 0; i < nregs; i++) | |
26962 | { | |
26963 | /* Calculate index to next subword. */ | |
26964 | ++j; | |
26965 | if (j == nregs) | |
26966 | j = 0; | |
26967 | ||
26968 | /* If compiler already emitted move of first word by | |
26969 | store with update, no need to do anything. */ | |
26970 | if (j == 0 && used_update) | |
26971 | continue; | |
26972 | ||
26973 | emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode, | |
26974 | j * reg_mode_size), | |
26975 | simplify_gen_subreg (reg_mode, src, mode, | |
26976 | j * reg_mode_size))); | |
26977 | } | |
26978 | if (restore_basereg != NULL_RTX) | |
26979 | emit_insn (restore_basereg); | |
26980 | } | |
26981 | } | |
26982 | ||
26983 | \f | |
26984 | /* This page contains routines that are used to determine what the | |
26985 | function prologue and epilogue code will do and write them out. */ | |
26986 | ||
26987 | static inline bool | |
26988 | save_reg_p (int r) | |
26989 | { | |
26990 | return !call_used_regs[r] && df_regs_ever_live_p (r); | |
26991 | } | |
26992 | ||
26993 | /* Determine whether the gp REG is really used. */ | |
26994 | ||
26995 | static bool | |
26996 | rs6000_reg_live_or_pic_offset_p (int reg) | |
26997 | { | |
26998 | /* We need to mark the PIC offset register live for the same conditions | |
26999 | as it is set up, or otherwise it won't be saved before we clobber it. */ | |
27000 | ||
27001 | if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM && !TARGET_SINGLE_PIC_BASE) | |
27002 | { | |
27003 | if (TARGET_TOC && TARGET_MINIMAL_TOC | |
27004 | && (crtl->calls_eh_return | |
27005 | || df_regs_ever_live_p (reg) | |
27006 | || !constant_pool_empty_p ())) | |
27007 | return true; | |
27008 | ||
27009 | if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) | |
27010 | && flag_pic) | |
27011 | return true; | |
27012 | } | |
27013 | ||
27014 | /* If the function calls eh_return, claim used all the registers that would | |
27015 | be checked for liveness otherwise. */ | |
27016 | ||
27017 | return ((crtl->calls_eh_return || df_regs_ever_live_p (reg)) | |
27018 | && !call_used_regs[reg]); | |
27019 | } | |
27020 | ||
27021 | /* Return the first fixed-point register that is required to be | |
27022 | saved. 32 if none. */ | |
27023 | ||
27024 | int | |
27025 | first_reg_to_save (void) | |
27026 | { | |
27027 | int first_reg; | |
27028 | ||
27029 | /* Find lowest numbered live register. */ | |
27030 | for (first_reg = 13; first_reg <= 31; first_reg++) | |
27031 | if (save_reg_p (first_reg)) | |
27032 | break; | |
27033 | ||
27034 | if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM | |
27035 | && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0) | |
27036 | || (DEFAULT_ABI == ABI_DARWIN && flag_pic) | |
27037 | || (TARGET_TOC && TARGET_MINIMAL_TOC)) | |
27038 | && rs6000_reg_live_or_pic_offset_p (RS6000_PIC_OFFSET_TABLE_REGNUM)) | |
27039 | first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM; | |
27040 | ||
27041 | #if TARGET_MACHO | |
27042 | if (flag_pic | |
27043 | && crtl->uses_pic_offset_table | |
27044 | && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM) | |
27045 | return RS6000_PIC_OFFSET_TABLE_REGNUM; | |
27046 | #endif | |
27047 | ||
27048 | return first_reg; | |
27049 | } | |
27050 | ||
27051 | /* Similar, for FP regs. */ | |
27052 | ||
27053 | int | |
27054 | first_fp_reg_to_save (void) | |
27055 | { | |
27056 | int first_reg; | |
27057 | ||
27058 | /* Find lowest numbered live register. */ | |
27059 | for (first_reg = 14 + 32; first_reg <= 63; first_reg++) | |
27060 | if (save_reg_p (first_reg)) | |
27061 | break; | |
27062 | ||
27063 | return first_reg; | |
27064 | } | |
27065 | ||
27066 | /* Similar, for AltiVec regs. */ | |
27067 | ||
27068 | static int | |
27069 | first_altivec_reg_to_save (void) | |
27070 | { | |
27071 | int i; | |
27072 | ||
27073 | /* Stack frame remains as is unless we are in AltiVec ABI. */ | |
27074 | if (! TARGET_ALTIVEC_ABI) | |
27075 | return LAST_ALTIVEC_REGNO + 1; | |
27076 | ||
27077 | /* On Darwin, the unwind routines are compiled without | |
27078 | TARGET_ALTIVEC, and use save_world to save/restore the | |
27079 | altivec registers when necessary. */ | |
27080 | if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return | |
27081 | && ! TARGET_ALTIVEC) | |
27082 | return FIRST_ALTIVEC_REGNO + 20; | |
27083 | ||
27084 | /* Find lowest numbered live register. */ | |
27085 | for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i) | |
27086 | if (save_reg_p (i)) | |
27087 | break; | |
27088 | ||
27089 | return i; | |
27090 | } | |
27091 | ||
27092 | /* Return a 32-bit mask of the AltiVec registers we need to set in | |
27093 | VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in | |
27094 | the 32-bit word is 0. */ | |
27095 | ||
27096 | static unsigned int | |
27097 | compute_vrsave_mask (void) | |
27098 | { | |
27099 | unsigned int i, mask = 0; | |
27100 | ||
27101 | /* On Darwin, the unwind routines are compiled without | |
27102 | TARGET_ALTIVEC, and use save_world to save/restore the | |
27103 | call-saved altivec registers when necessary. */ | |
27104 | if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return | |
27105 | && ! TARGET_ALTIVEC) | |
27106 | mask |= 0xFFF; | |
27107 | ||
27108 | /* First, find out if we use _any_ altivec registers. */ | |
27109 | for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i) | |
27110 | if (df_regs_ever_live_p (i)) | |
27111 | mask |= ALTIVEC_REG_BIT (i); | |
27112 | ||
27113 | if (mask == 0) | |
27114 | return mask; | |
27115 | ||
27116 | /* Next, remove the argument registers from the set. These must | |
27117 | be in the VRSAVE mask set by the caller, so we don't need to add | |
27118 | them in again. More importantly, the mask we compute here is | |
27119 | used to generate CLOBBERs in the set_vrsave insn, and we do not | |
27120 | wish the argument registers to die. */ | |
27121 | for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++) | |
27122 | mask &= ~ALTIVEC_REG_BIT (i); | |
27123 | ||
27124 | /* Similarly, remove the return value from the set. */ | |
27125 | { | |
27126 | bool yes = false; | |
27127 | diddle_return_value (is_altivec_return_reg, &yes); | |
27128 | if (yes) | |
27129 | mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN); | |
27130 | } | |
27131 | ||
27132 | return mask; | |
27133 | } | |
27134 | ||
27135 | /* For a very restricted set of circumstances, we can cut down the | |
27136 | size of prologues/epilogues by calling our own save/restore-the-world | |
27137 | routines. */ | |
27138 | ||
27139 | static void | |
27140 | compute_save_world_info (rs6000_stack_t *info) | |
27141 | { | |
27142 | info->world_save_p = 1; | |
27143 | info->world_save_p | |
27144 | = (WORLD_SAVE_P (info) | |
27145 | && DEFAULT_ABI == ABI_DARWIN | |
27146 | && !cfun->has_nonlocal_label | |
27147 | && info->first_fp_reg_save == FIRST_SAVED_FP_REGNO | |
27148 | && info->first_gp_reg_save == FIRST_SAVED_GP_REGNO | |
27149 | && info->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO | |
27150 | && info->cr_save_p); | |
27151 | ||
27152 | /* This will not work in conjunction with sibcalls. Make sure there | |
27153 | are none. (This check is expensive, but seldom executed.) */ | |
27154 | if (WORLD_SAVE_P (info)) | |
27155 | { | |
27156 | rtx_insn *insn; | |
27157 | for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn)) | |
27158 | if (CALL_P (insn) && SIBLING_CALL_P (insn)) | |
27159 | { | |
27160 | info->world_save_p = 0; | |
27161 | break; | |
27162 | } | |
27163 | } | |
27164 | ||
27165 | if (WORLD_SAVE_P (info)) | |
27166 | { | |
27167 | /* Even if we're not touching VRsave, make sure there's room on the | |
27168 | stack for it, if it looks like we're calling SAVE_WORLD, which | |
27169 | will attempt to save it. */ | |
27170 | info->vrsave_size = 4; | |
27171 | ||
27172 | /* If we are going to save the world, we need to save the link register too. */ | |
27173 | info->lr_save_p = 1; | |
27174 | ||
27175 | /* "Save" the VRsave register too if we're saving the world. */ | |
27176 | if (info->vrsave_mask == 0) | |
27177 | info->vrsave_mask = compute_vrsave_mask (); | |
27178 | ||
27179 | /* Because the Darwin register save/restore routines only handle | |
27180 | F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency | |
27181 | check. */ | |
27182 | gcc_assert (info->first_fp_reg_save >= FIRST_SAVED_FP_REGNO | |
27183 | && (info->first_altivec_reg_save | |
27184 | >= FIRST_SAVED_ALTIVEC_REGNO)); | |
27185 | } | |
27186 | ||
27187 | return; | |
27188 | } | |
27189 | ||
27190 | ||
27191 | static void | |
27192 | is_altivec_return_reg (rtx reg, void *xyes) | |
27193 | { | |
27194 | bool *yes = (bool *) xyes; | |
27195 | if (REGNO (reg) == ALTIVEC_ARG_RETURN) | |
27196 | *yes = true; | |
27197 | } | |
27198 | ||
27199 | \f | |
27200 | /* Return whether REG is a global user reg or has been specifed by | |
27201 | -ffixed-REG. We should not restore these, and so cannot use | |
27202 | lmw or out-of-line restore functions if there are any. We also | |
27203 | can't save them (well, emit frame notes for them), because frame | |
27204 | unwinding during exception handling will restore saved registers. */ | |
27205 | ||
27206 | static bool | |
27207 | fixed_reg_p (int reg) | |
27208 | { | |
27209 | /* Ignore fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] when the | |
27210 | backend sets it, overriding anything the user might have given. */ | |
27211 | if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM | |
27212 | && ((DEFAULT_ABI == ABI_V4 && flag_pic) | |
27213 | || (DEFAULT_ABI == ABI_DARWIN && flag_pic) | |
27214 | || (TARGET_TOC && TARGET_MINIMAL_TOC))) | |
27215 | return false; | |
27216 | ||
27217 | return fixed_regs[reg]; | |
27218 | } | |
27219 | ||
27220 | /* Determine the strategy for savings/restoring registers. */ | |
27221 | ||
27222 | enum { | |
27223 | SAVE_MULTIPLE = 0x1, | |
27224 | SAVE_INLINE_GPRS = 0x2, | |
27225 | SAVE_INLINE_FPRS = 0x4, | |
27226 | SAVE_NOINLINE_GPRS_SAVES_LR = 0x8, | |
27227 | SAVE_NOINLINE_FPRS_SAVES_LR = 0x10, | |
27228 | SAVE_INLINE_VRS = 0x20, | |
27229 | REST_MULTIPLE = 0x100, | |
27230 | REST_INLINE_GPRS = 0x200, | |
27231 | REST_INLINE_FPRS = 0x400, | |
27232 | REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x800, | |
27233 | REST_INLINE_VRS = 0x1000 | |
27234 | }; | |
27235 | ||
27236 | static int | |
27237 | rs6000_savres_strategy (rs6000_stack_t *info, | |
27238 | bool using_static_chain_p) | |
27239 | { | |
27240 | int strategy = 0; | |
27241 | ||
27242 | /* Select between in-line and out-of-line save and restore of regs. | |
27243 | First, all the obvious cases where we don't use out-of-line. */ | |
27244 | if (crtl->calls_eh_return | |
27245 | || cfun->machine->ra_need_lr) | |
27246 | strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS | |
27247 | | SAVE_INLINE_GPRS | REST_INLINE_GPRS | |
27248 | | SAVE_INLINE_VRS | REST_INLINE_VRS); | |
27249 | ||
27250 | if (info->first_gp_reg_save == 32) | |
27251 | strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS; | |
27252 | ||
27253 | if (info->first_fp_reg_save == 64 | |
27254 | /* The out-of-line FP routines use double-precision stores; | |
27255 | we can't use those routines if we don't have such stores. */ | |
27256 | || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT)) | |
27257 | strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS; | |
27258 | ||
27259 | if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1) | |
27260 | strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS; | |
27261 | ||
27262 | /* Define cutoff for using out-of-line functions to save registers. */ | |
27263 | if (DEFAULT_ABI == ABI_V4 || TARGET_ELF) | |
27264 | { | |
27265 | if (!optimize_size) | |
27266 | { | |
27267 | strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS; | |
27268 | strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS; | |
27269 | strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS; | |
27270 | } | |
27271 | else | |
27272 | { | |
27273 | /* Prefer out-of-line restore if it will exit. */ | |
27274 | if (info->first_fp_reg_save > 61) | |
27275 | strategy |= SAVE_INLINE_FPRS; | |
27276 | if (info->first_gp_reg_save > 29) | |
27277 | { | |
27278 | if (info->first_fp_reg_save == 64) | |
27279 | strategy |= SAVE_INLINE_GPRS; | |
27280 | else | |
27281 | strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS; | |
27282 | } | |
27283 | if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO) | |
27284 | strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS; | |
27285 | } | |
27286 | } | |
27287 | else if (DEFAULT_ABI == ABI_DARWIN) | |
27288 | { | |
27289 | if (info->first_fp_reg_save > 60) | |
27290 | strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS; | |
27291 | if (info->first_gp_reg_save > 29) | |
27292 | strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS; | |
27293 | strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS; | |
27294 | } | |
27295 | else | |
27296 | { | |
27297 | gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2); | |
27298 | if ((flag_shrink_wrap_separate && optimize_function_for_speed_p (cfun)) | |
27299 | || info->first_fp_reg_save > 61) | |
27300 | strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS; | |
27301 | strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS; | |
27302 | strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS; | |
27303 | } | |
27304 | ||
27305 | /* Don't bother to try to save things out-of-line if r11 is occupied | |
27306 | by the static chain. It would require too much fiddling and the | |
27307 | static chain is rarely used anyway. FPRs are saved w.r.t the stack | |
27308 | pointer on Darwin, and AIX uses r1 or r12. */ | |
27309 | if (using_static_chain_p | |
27310 | && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)) | |
27311 | strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS) | |
27312 | | SAVE_INLINE_GPRS | |
27313 | | SAVE_INLINE_VRS); | |
27314 | ||
27315 | /* Saving CR interferes with the exit routines used on the SPE, so | |
27316 | just punt here. */ | |
27317 | if (TARGET_SPE_ABI | |
27318 | && info->spe_64bit_regs_used | |
27319 | && info->cr_save_p) | |
27320 | strategy |= REST_INLINE_GPRS; | |
27321 | ||
27322 | /* We can only use the out-of-line routines to restore fprs if we've | |
27323 | saved all the registers from first_fp_reg_save in the prologue. | |
27324 | Otherwise, we risk loading garbage. Of course, if we have saved | |
27325 | out-of-line then we know we haven't skipped any fprs. */ | |
27326 | if ((strategy & SAVE_INLINE_FPRS) | |
27327 | && !(strategy & REST_INLINE_FPRS)) | |
27328 | { | |
27329 | int i; | |
27330 | ||
27331 | for (i = info->first_fp_reg_save; i < 64; i++) | |
27332 | if (fixed_regs[i] || !save_reg_p (i)) | |
27333 | { | |
27334 | strategy |= REST_INLINE_FPRS; | |
27335 | break; | |
27336 | } | |
27337 | } | |
27338 | ||
27339 | /* Similarly, for altivec regs. */ | |
27340 | if ((strategy & SAVE_INLINE_VRS) | |
27341 | && !(strategy & REST_INLINE_VRS)) | |
27342 | { | |
27343 | int i; | |
27344 | ||
27345 | for (i = info->first_altivec_reg_save; i < LAST_ALTIVEC_REGNO + 1; i++) | |
27346 | if (fixed_regs[i] || !save_reg_p (i)) | |
27347 | { | |
27348 | strategy |= REST_INLINE_VRS; | |
27349 | break; | |
27350 | } | |
27351 | } | |
27352 | ||
27353 | /* info->lr_save_p isn't yet set if the only reason lr needs to be | |
27354 | saved is an out-of-line save or restore. Set up the value for | |
27355 | the next test (excluding out-of-line gprs). */ | |
27356 | bool lr_save_p = (info->lr_save_p | |
27357 | || !(strategy & SAVE_INLINE_FPRS) | |
27358 | || !(strategy & SAVE_INLINE_VRS) | |
27359 | || !(strategy & REST_INLINE_FPRS) | |
27360 | || !(strategy & REST_INLINE_VRS)); | |
27361 | ||
27362 | if (TARGET_MULTIPLE | |
27363 | && !TARGET_POWERPC64 | |
27364 | && !(TARGET_SPE_ABI && info->spe_64bit_regs_used) | |
27365 | && info->first_gp_reg_save < 31 | |
27366 | && !(flag_shrink_wrap | |
27367 | && flag_shrink_wrap_separate | |
27368 | && optimize_function_for_speed_p (cfun))) | |
27369 | { | |
27370 | /* Prefer store multiple for saves over out-of-line routines, | |
27371 | since the store-multiple instruction will always be smaller. */ | |
27372 | strategy |= SAVE_INLINE_GPRS | SAVE_MULTIPLE; | |
27373 | ||
27374 | /* The situation is more complicated with load multiple. We'd | |
27375 | prefer to use the out-of-line routines for restores, since the | |
27376 | "exit" out-of-line routines can handle the restore of LR and the | |
27377 | frame teardown. However if doesn't make sense to use the | |
27378 | out-of-line routine if that is the only reason we'd need to save | |
27379 | LR, and we can't use the "exit" out-of-line gpr restore if we | |
27380 | have saved some fprs; In those cases it is advantageous to use | |
27381 | load multiple when available. */ | |
27382 | if (info->first_fp_reg_save != 64 || !lr_save_p) | |
27383 | strategy |= REST_INLINE_GPRS | REST_MULTIPLE; | |
27384 | } | |
27385 | ||
27386 | /* Using the "exit" out-of-line routine does not improve code size | |
27387 | if using it would require lr to be saved and if only saving one | |
27388 | or two gprs. */ | |
27389 | else if (!lr_save_p && info->first_gp_reg_save > 29) | |
27390 | strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS; | |
27391 | ||
27392 | /* We can only use load multiple or the out-of-line routines to | |
27393 | restore gprs if we've saved all the registers from | |
27394 | first_gp_reg_save. Otherwise, we risk loading garbage. | |
27395 | Of course, if we have saved out-of-line or used stmw then we know | |
27396 | we haven't skipped any gprs. */ | |
27397 | if ((strategy & (SAVE_INLINE_GPRS | SAVE_MULTIPLE)) == SAVE_INLINE_GPRS | |
27398 | && (strategy & (REST_INLINE_GPRS | REST_MULTIPLE)) != REST_INLINE_GPRS) | |
27399 | { | |
27400 | int i; | |
27401 | ||
27402 | for (i = info->first_gp_reg_save; i < 32; i++) | |
27403 | if (fixed_reg_p (i) || !save_reg_p (i)) | |
27404 | { | |
27405 | strategy |= REST_INLINE_GPRS; | |
27406 | strategy &= ~REST_MULTIPLE; | |
27407 | break; | |
27408 | } | |
27409 | } | |
27410 | ||
27411 | if (TARGET_ELF && TARGET_64BIT) | |
27412 | { | |
27413 | if (!(strategy & SAVE_INLINE_FPRS)) | |
27414 | strategy |= SAVE_NOINLINE_FPRS_SAVES_LR; | |
27415 | else if (!(strategy & SAVE_INLINE_GPRS) | |
27416 | && info->first_fp_reg_save == 64) | |
27417 | strategy |= SAVE_NOINLINE_GPRS_SAVES_LR; | |
27418 | } | |
27419 | else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS)) | |
27420 | strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR; | |
27421 | ||
27422 | if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS)) | |
27423 | strategy |= SAVE_NOINLINE_FPRS_SAVES_LR; | |
27424 | ||
27425 | return strategy; | |
27426 | } | |
27427 | ||
27428 | /* Calculate the stack information for the current function. This is | |
27429 | complicated by having two separate calling sequences, the AIX calling | |
27430 | sequence and the V.4 calling sequence. | |
27431 | ||
27432 | AIX (and Darwin/Mac OS X) stack frames look like: | |
27433 | 32-bit 64-bit | |
27434 | SP----> +---------------------------------------+ | |
27435 | | back chain to caller | 0 0 | |
27436 | +---------------------------------------+ | |
27437 | | saved CR | 4 8 (8-11) | |
27438 | +---------------------------------------+ | |
27439 | | saved LR | 8 16 | |
27440 | +---------------------------------------+ | |
27441 | | reserved for compilers | 12 24 | |
27442 | +---------------------------------------+ | |
27443 | | reserved for binders | 16 32 | |
27444 | +---------------------------------------+ | |
27445 | | saved TOC pointer | 20 40 | |
27446 | +---------------------------------------+ | |
27447 | | Parameter save area (+padding*) (P) | 24 48 | |
27448 | +---------------------------------------+ | |
27449 | | Alloca space (A) | 24+P etc. | |
27450 | +---------------------------------------+ | |
27451 | | Local variable space (L) | 24+P+A | |
27452 | +---------------------------------------+ | |
27453 | | Float/int conversion temporary (X) | 24+P+A+L | |
27454 | +---------------------------------------+ | |
27455 | | Save area for AltiVec registers (W) | 24+P+A+L+X | |
27456 | +---------------------------------------+ | |
27457 | | AltiVec alignment padding (Y) | 24+P+A+L+X+W | |
27458 | +---------------------------------------+ | |
27459 | | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y | |
27460 | +---------------------------------------+ | |
27461 | | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z | |
27462 | +---------------------------------------+ | |
27463 | | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G | |
27464 | +---------------------------------------+ | |
27465 | old SP->| back chain to caller's caller | | |
27466 | +---------------------------------------+ | |
27467 | ||
27468 | * If the alloca area is present, the parameter save area is | |
27469 | padded so that the former starts 16-byte aligned. | |
27470 | ||
27471 | The required alignment for AIX configurations is two words (i.e., 8 | |
27472 | or 16 bytes). | |
27473 | ||
27474 | The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like: | |
27475 | ||
27476 | SP----> +---------------------------------------+ | |
27477 | | Back chain to caller | 0 | |
27478 | +---------------------------------------+ | |
27479 | | Save area for CR | 8 | |
27480 | +---------------------------------------+ | |
27481 | | Saved LR | 16 | |
27482 | +---------------------------------------+ | |
27483 | | Saved TOC pointer | 24 | |
27484 | +---------------------------------------+ | |
27485 | | Parameter save area (+padding*) (P) | 32 | |
27486 | +---------------------------------------+ | |
27487 | | Alloca space (A) | 32+P | |
27488 | +---------------------------------------+ | |
27489 | | Local variable space (L) | 32+P+A | |
27490 | +---------------------------------------+ | |
27491 | | Save area for AltiVec registers (W) | 32+P+A+L | |
27492 | +---------------------------------------+ | |
27493 | | AltiVec alignment padding (Y) | 32+P+A+L+W | |
27494 | +---------------------------------------+ | |
27495 | | Save area for GP registers (G) | 32+P+A+L+W+Y | |
27496 | +---------------------------------------+ | |
27497 | | Save area for FP registers (F) | 32+P+A+L+W+Y+G | |
27498 | +---------------------------------------+ | |
27499 | old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F | |
27500 | +---------------------------------------+ | |
27501 | ||
27502 | * If the alloca area is present, the parameter save area is | |
27503 | padded so that the former starts 16-byte aligned. | |
27504 | ||
27505 | V.4 stack frames look like: | |
27506 | ||
27507 | SP----> +---------------------------------------+ | |
27508 | | back chain to caller | 0 | |
27509 | +---------------------------------------+ | |
27510 | | caller's saved LR | 4 | |
27511 | +---------------------------------------+ | |
27512 | | Parameter save area (+padding*) (P) | 8 | |
27513 | +---------------------------------------+ | |
27514 | | Alloca space (A) | 8+P | |
27515 | +---------------------------------------+ | |
27516 | | Varargs save area (V) | 8+P+A | |
27517 | +---------------------------------------+ | |
27518 | | Local variable space (L) | 8+P+A+V | |
27519 | +---------------------------------------+ | |
27520 | | Float/int conversion temporary (X) | 8+P+A+V+L | |
27521 | +---------------------------------------+ | |
27522 | | Save area for AltiVec registers (W) | 8+P+A+V+L+X | |
27523 | +---------------------------------------+ | |
27524 | | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W | |
27525 | +---------------------------------------+ | |
27526 | | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y | |
27527 | +---------------------------------------+ | |
27528 | | SPE: area for 64-bit GP registers | | |
27529 | +---------------------------------------+ | |
27530 | | SPE alignment padding | | |
27531 | +---------------------------------------+ | |
27532 | | saved CR (C) | 8+P+A+V+L+X+W+Y+Z | |
27533 | +---------------------------------------+ | |
27534 | | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C | |
27535 | +---------------------------------------+ | |
27536 | | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G | |
27537 | +---------------------------------------+ | |
27538 | old SP->| back chain to caller's caller | | |
27539 | +---------------------------------------+ | |
27540 | ||
27541 | * If the alloca area is present and the required alignment is | |
27542 | 16 bytes, the parameter save area is padded so that the | |
27543 | alloca area starts 16-byte aligned. | |
27544 | ||
27545 | The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is | |
27546 | given. (But note below and in sysv4.h that we require only 8 and | |
27547 | may round up the size of our stack frame anyways. The historical | |
27548 | reason is early versions of powerpc-linux which didn't properly | |
27549 | align the stack at program startup. A happy side-effect is that | |
27550 | -mno-eabi libraries can be used with -meabi programs.) | |
27551 | ||
27552 | The EABI configuration defaults to the V.4 layout. However, | |
27553 | the stack alignment requirements may differ. If -mno-eabi is not | |
27554 | given, the required stack alignment is 8 bytes; if -mno-eabi is | |
27555 | given, the required alignment is 16 bytes. (But see V.4 comment | |
27556 | above.) */ | |
27557 | ||
27558 | #ifndef ABI_STACK_BOUNDARY | |
27559 | #define ABI_STACK_BOUNDARY STACK_BOUNDARY | |
27560 | #endif | |
27561 | ||
27562 | static rs6000_stack_t * | |
27563 | rs6000_stack_info (void) | |
27564 | { | |
27565 | /* We should never be called for thunks, we are not set up for that. */ | |
27566 | gcc_assert (!cfun->is_thunk); | |
27567 | ||
27568 | rs6000_stack_t *info = &stack_info; | |
27569 | int reg_size = TARGET_32BIT ? 4 : 8; | |
27570 | int ehrd_size; | |
27571 | int ehcr_size; | |
27572 | int save_align; | |
27573 | int first_gp; | |
27574 | HOST_WIDE_INT non_fixed_size; | |
27575 | bool using_static_chain_p; | |
27576 | ||
27577 | if (reload_completed && info->reload_completed) | |
27578 | return info; | |
27579 | ||
27580 | memset (info, 0, sizeof (*info)); | |
27581 | info->reload_completed = reload_completed; | |
27582 | ||
27583 | if (TARGET_SPE) | |
27584 | { | |
27585 | /* Cache value so we don't rescan instruction chain over and over. */ | |
27586 | if (cfun->machine->spe_insn_chain_scanned_p == 0) | |
27587 | cfun->machine->spe_insn_chain_scanned_p | |
27588 | = spe_func_has_64bit_regs_p () + 1; | |
27589 | info->spe_64bit_regs_used = cfun->machine->spe_insn_chain_scanned_p - 1; | |
27590 | } | |
27591 | ||
27592 | /* Select which calling sequence. */ | |
27593 | info->abi = DEFAULT_ABI; | |
27594 | ||
27595 | /* Calculate which registers need to be saved & save area size. */ | |
27596 | info->first_gp_reg_save = first_reg_to_save (); | |
27597 | /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM, | |
27598 | even if it currently looks like we won't. Reload may need it to | |
27599 | get at a constant; if so, it will have already created a constant | |
27600 | pool entry for it. */ | |
27601 | if (((TARGET_TOC && TARGET_MINIMAL_TOC) | |
27602 | || (flag_pic == 1 && DEFAULT_ABI == ABI_V4) | |
27603 | || (flag_pic && DEFAULT_ABI == ABI_DARWIN)) | |
27604 | && crtl->uses_const_pool | |
27605 | && info->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM) | |
27606 | first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM; | |
27607 | else | |
27608 | first_gp = info->first_gp_reg_save; | |
27609 | ||
27610 | info->gp_size = reg_size * (32 - first_gp); | |
27611 | ||
27612 | /* For the SPE, we have an additional upper 32-bits on each GPR. | |
27613 | Ideally we should save the entire 64-bits only when the upper | |
27614 | half is used in SIMD instructions. Since we only record | |
27615 | registers live (not the size they are used in), this proves | |
27616 | difficult because we'd have to traverse the instruction chain at | |
27617 | the right time, taking reload into account. This is a real pain, | |
27618 | so we opt to save the GPRs in 64-bits always if but one register | |
27619 | gets used in 64-bits. Otherwise, all the registers in the frame | |
27620 | get saved in 32-bits. | |
27621 | ||
27622 | So... since when we save all GPRs (except the SP) in 64-bits, the | |
27623 | traditional GP save area will be empty. */ | |
27624 | if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0) | |
27625 | info->gp_size = 0; | |
27626 | ||
27627 | info->first_fp_reg_save = first_fp_reg_to_save (); | |
27628 | info->fp_size = 8 * (64 - info->first_fp_reg_save); | |
27629 | ||
27630 | info->first_altivec_reg_save = first_altivec_reg_to_save (); | |
27631 | info->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1 | |
27632 | - info->first_altivec_reg_save); | |
27633 | ||
27634 | /* Does this function call anything? */ | |
27635 | info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame); | |
27636 | ||
27637 | /* Determine if we need to save the condition code registers. */ | |
27638 | if (save_reg_p (CR2_REGNO) | |
27639 | || save_reg_p (CR3_REGNO) | |
27640 | || save_reg_p (CR4_REGNO)) | |
27641 | { | |
27642 | info->cr_save_p = 1; | |
27643 | if (DEFAULT_ABI == ABI_V4) | |
27644 | info->cr_size = reg_size; | |
27645 | } | |
27646 | ||
27647 | /* If the current function calls __builtin_eh_return, then we need | |
27648 | to allocate stack space for registers that will hold data for | |
27649 | the exception handler. */ | |
27650 | if (crtl->calls_eh_return) | |
27651 | { | |
27652 | unsigned int i; | |
27653 | for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i) | |
27654 | continue; | |
27655 | ||
27656 | /* SPE saves EH registers in 64-bits. */ | |
27657 | ehrd_size = i * (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0 | |
27658 | ? UNITS_PER_SPE_WORD : UNITS_PER_WORD); | |
27659 | } | |
27660 | else | |
27661 | ehrd_size = 0; | |
27662 | ||
27663 | /* In the ELFv2 ABI, we also need to allocate space for separate | |
27664 | CR field save areas if the function calls __builtin_eh_return. */ | |
27665 | if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return) | |
27666 | { | |
27667 | /* This hard-codes that we have three call-saved CR fields. */ | |
27668 | ehcr_size = 3 * reg_size; | |
27669 | /* We do *not* use the regular CR save mechanism. */ | |
27670 | info->cr_save_p = 0; | |
27671 | } | |
27672 | else | |
27673 | ehcr_size = 0; | |
27674 | ||
27675 | /* Determine various sizes. */ | |
27676 | info->reg_size = reg_size; | |
27677 | info->fixed_size = RS6000_SAVE_AREA; | |
27678 | info->vars_size = RS6000_ALIGN (get_frame_size (), 8); | |
27679 | if (cfun->calls_alloca) | |
27680 | info->parm_size = | |
27681 | RS6000_ALIGN (crtl->outgoing_args_size + info->fixed_size, | |
27682 | STACK_BOUNDARY / BITS_PER_UNIT) - info->fixed_size; | |
27683 | else | |
27684 | info->parm_size = RS6000_ALIGN (crtl->outgoing_args_size, | |
27685 | TARGET_ALTIVEC ? 16 : 8); | |
27686 | if (FRAME_GROWS_DOWNWARD) | |
27687 | info->vars_size | |
27688 | += RS6000_ALIGN (info->fixed_size + info->vars_size + info->parm_size, | |
27689 | ABI_STACK_BOUNDARY / BITS_PER_UNIT) | |
27690 | - (info->fixed_size + info->vars_size + info->parm_size); | |
27691 | ||
27692 | if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0) | |
27693 | info->spe_gp_size = 8 * (32 - first_gp); | |
27694 | ||
27695 | if (TARGET_ALTIVEC_ABI) | |
27696 | info->vrsave_mask = compute_vrsave_mask (); | |
27697 | ||
27698 | if (TARGET_ALTIVEC_VRSAVE && info->vrsave_mask) | |
27699 | info->vrsave_size = 4; | |
27700 | ||
27701 | compute_save_world_info (info); | |
27702 | ||
27703 | /* Calculate the offsets. */ | |
27704 | switch (DEFAULT_ABI) | |
27705 | { | |
27706 | case ABI_NONE: | |
27707 | default: | |
27708 | gcc_unreachable (); | |
27709 | ||
27710 | case ABI_AIX: | |
27711 | case ABI_ELFv2: | |
27712 | case ABI_DARWIN: | |
27713 | info->fp_save_offset = -info->fp_size; | |
27714 | info->gp_save_offset = info->fp_save_offset - info->gp_size; | |
27715 | ||
27716 | if (TARGET_ALTIVEC_ABI) | |
27717 | { | |
27718 | info->vrsave_save_offset = info->gp_save_offset - info->vrsave_size; | |
27719 | ||
27720 | /* Align stack so vector save area is on a quadword boundary. | |
27721 | The padding goes above the vectors. */ | |
27722 | if (info->altivec_size != 0) | |
27723 | info->altivec_padding_size = info->vrsave_save_offset & 0xF; | |
27724 | ||
27725 | info->altivec_save_offset = info->vrsave_save_offset | |
27726 | - info->altivec_padding_size | |
27727 | - info->altivec_size; | |
27728 | gcc_assert (info->altivec_size == 0 | |
27729 | || info->altivec_save_offset % 16 == 0); | |
27730 | ||
27731 | /* Adjust for AltiVec case. */ | |
27732 | info->ehrd_offset = info->altivec_save_offset - ehrd_size; | |
27733 | } | |
27734 | else | |
27735 | info->ehrd_offset = info->gp_save_offset - ehrd_size; | |
27736 | ||
27737 | info->ehcr_offset = info->ehrd_offset - ehcr_size; | |
27738 | info->cr_save_offset = reg_size; /* first word when 64-bit. */ | |
27739 | info->lr_save_offset = 2*reg_size; | |
27740 | break; | |
27741 | ||
27742 | case ABI_V4: | |
27743 | info->fp_save_offset = -info->fp_size; | |
27744 | info->gp_save_offset = info->fp_save_offset - info->gp_size; | |
27745 | info->cr_save_offset = info->gp_save_offset - info->cr_size; | |
27746 | ||
27747 | if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0) | |
27748 | { | |
27749 | /* Align stack so SPE GPR save area is aligned on a | |
27750 | double-word boundary. */ | |
27751 | if (info->spe_gp_size != 0 && info->cr_save_offset != 0) | |
27752 | info->spe_padding_size = 8 - (-info->cr_save_offset % 8); | |
27753 | else | |
27754 | info->spe_padding_size = 0; | |
27755 | ||
27756 | info->spe_gp_save_offset = info->cr_save_offset | |
27757 | - info->spe_padding_size | |
27758 | - info->spe_gp_size; | |
27759 | ||
27760 | /* Adjust for SPE case. */ | |
27761 | info->ehrd_offset = info->spe_gp_save_offset; | |
27762 | } | |
27763 | else if (TARGET_ALTIVEC_ABI) | |
27764 | { | |
27765 | info->vrsave_save_offset = info->cr_save_offset - info->vrsave_size; | |
27766 | ||
27767 | /* Align stack so vector save area is on a quadword boundary. */ | |
27768 | if (info->altivec_size != 0) | |
27769 | info->altivec_padding_size = 16 - (-info->vrsave_save_offset % 16); | |
27770 | ||
27771 | info->altivec_save_offset = info->vrsave_save_offset | |
27772 | - info->altivec_padding_size | |
27773 | - info->altivec_size; | |
27774 | ||
27775 | /* Adjust for AltiVec case. */ | |
27776 | info->ehrd_offset = info->altivec_save_offset; | |
27777 | } | |
27778 | else | |
27779 | info->ehrd_offset = info->cr_save_offset; | |
27780 | ||
27781 | info->ehrd_offset -= ehrd_size; | |
27782 | info->lr_save_offset = reg_size; | |
27783 | } | |
27784 | ||
27785 | save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8; | |
27786 | info->save_size = RS6000_ALIGN (info->fp_size | |
27787 | + info->gp_size | |
27788 | + info->altivec_size | |
27789 | + info->altivec_padding_size | |
27790 | + info->spe_gp_size | |
27791 | + info->spe_padding_size | |
27792 | + ehrd_size | |
27793 | + ehcr_size | |
27794 | + info->cr_size | |
27795 | + info->vrsave_size, | |
27796 | save_align); | |
27797 | ||
27798 | non_fixed_size = info->vars_size + info->parm_size + info->save_size; | |
27799 | ||
27800 | info->total_size = RS6000_ALIGN (non_fixed_size + info->fixed_size, | |
27801 | ABI_STACK_BOUNDARY / BITS_PER_UNIT); | |
27802 | ||
27803 | /* Determine if we need to save the link register. */ | |
27804 | if (info->calls_p | |
27805 | || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) | |
27806 | && crtl->profile | |
27807 | && !TARGET_PROFILE_KERNEL) | |
27808 | || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca) | |
27809 | #ifdef TARGET_RELOCATABLE | |
27810 | || (DEFAULT_ABI == ABI_V4 | |
27811 | && (TARGET_RELOCATABLE || flag_pic > 1) | |
27812 | && !constant_pool_empty_p ()) | |
27813 | #endif | |
27814 | || rs6000_ra_ever_killed ()) | |
27815 | info->lr_save_p = 1; | |
27816 | ||
27817 | using_static_chain_p = (cfun->static_chain_decl != NULL_TREE | |
27818 | && df_regs_ever_live_p (STATIC_CHAIN_REGNUM) | |
27819 | && call_used_regs[STATIC_CHAIN_REGNUM]); | |
27820 | info->savres_strategy = rs6000_savres_strategy (info, using_static_chain_p); | |
27821 | ||
27822 | if (!(info->savres_strategy & SAVE_INLINE_GPRS) | |
27823 | || !(info->savres_strategy & SAVE_INLINE_FPRS) | |
27824 | || !(info->savres_strategy & SAVE_INLINE_VRS) | |
27825 | || !(info->savres_strategy & REST_INLINE_GPRS) | |
27826 | || !(info->savres_strategy & REST_INLINE_FPRS) | |
27827 | || !(info->savres_strategy & REST_INLINE_VRS)) | |
27828 | info->lr_save_p = 1; | |
27829 | ||
27830 | if (info->lr_save_p) | |
27831 | df_set_regs_ever_live (LR_REGNO, true); | |
27832 | ||
27833 | /* Determine if we need to allocate any stack frame: | |
27834 | ||
27835 | For AIX we need to push the stack if a frame pointer is needed | |
27836 | (because the stack might be dynamically adjusted), if we are | |
27837 | debugging, if we make calls, or if the sum of fp_save, gp_save, | |
27838 | and local variables are more than the space needed to save all | |
27839 | non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8 | |
27840 | + 18*8 = 288 (GPR13 reserved). | |
27841 | ||
27842 | For V.4 we don't have the stack cushion that AIX uses, but assume | |
27843 | that the debugger can handle stackless frames. */ | |
27844 | ||
27845 | if (info->calls_p) | |
27846 | info->push_p = 1; | |
27847 | ||
27848 | else if (DEFAULT_ABI == ABI_V4) | |
27849 | info->push_p = non_fixed_size != 0; | |
27850 | ||
27851 | else if (frame_pointer_needed) | |
27852 | info->push_p = 1; | |
27853 | ||
27854 | else if (TARGET_XCOFF && write_symbols != NO_DEBUG) | |
27855 | info->push_p = 1; | |
27856 | ||
27857 | else | |
27858 | info->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288); | |
27859 | ||
27860 | return info; | |
27861 | } | |
27862 | ||
27863 | /* Return true if the current function uses any GPRs in 64-bit SIMD | |
27864 | mode. */ | |
27865 | ||
27866 | static bool | |
27867 | spe_func_has_64bit_regs_p (void) | |
27868 | { | |
27869 | rtx_insn *insns, *insn; | |
27870 | ||
27871 | /* Functions that save and restore all the call-saved registers will | |
27872 | need to save/restore the registers in 64-bits. */ | |
27873 | if (crtl->calls_eh_return | |
27874 | || cfun->calls_setjmp | |
27875 | || crtl->has_nonlocal_goto) | |
27876 | return true; | |
27877 | ||
27878 | insns = get_insns (); | |
27879 | ||
27880 | for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn)) | |
27881 | { | |
27882 | if (INSN_P (insn)) | |
27883 | { | |
27884 | rtx i; | |
27885 | ||
27886 | /* FIXME: This should be implemented with attributes... | |
27887 | ||
27888 | (set_attr "spe64" "true")....then, | |
27889 | if (get_spe64(insn)) return true; | |
27890 | ||
27891 | It's the only reliable way to do the stuff below. */ | |
27892 | ||
27893 | i = PATTERN (insn); | |
27894 | if (GET_CODE (i) == SET) | |
27895 | { | |
27896 | machine_mode mode = GET_MODE (SET_SRC (i)); | |
27897 | ||
27898 | if (SPE_VECTOR_MODE (mode)) | |
27899 | return true; | |
27900 | if (TARGET_E500_DOUBLE | |
27901 | && (mode == DFmode || FLOAT128_2REG_P (mode))) | |
27902 | return true; | |
27903 | } | |
27904 | } | |
27905 | } | |
27906 | ||
27907 | return false; | |
27908 | } | |
27909 | ||
27910 | static void | |
27911 | debug_stack_info (rs6000_stack_t *info) | |
27912 | { | |
27913 | const char *abi_string; | |
27914 | ||
27915 | if (! info) | |
27916 | info = rs6000_stack_info (); | |
27917 | ||
27918 | fprintf (stderr, "\nStack information for function %s:\n", | |
27919 | ((current_function_decl && DECL_NAME (current_function_decl)) | |
27920 | ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl)) | |
27921 | : "<unknown>")); | |
27922 | ||
27923 | switch (info->abi) | |
27924 | { | |
27925 | default: abi_string = "Unknown"; break; | |
27926 | case ABI_NONE: abi_string = "NONE"; break; | |
27927 | case ABI_AIX: abi_string = "AIX"; break; | |
27928 | case ABI_ELFv2: abi_string = "ELFv2"; break; | |
27929 | case ABI_DARWIN: abi_string = "Darwin"; break; | |
27930 | case ABI_V4: abi_string = "V.4"; break; | |
27931 | } | |
27932 | ||
27933 | fprintf (stderr, "\tABI = %5s\n", abi_string); | |
27934 | ||
27935 | if (TARGET_ALTIVEC_ABI) | |
27936 | fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n"); | |
27937 | ||
27938 | if (TARGET_SPE_ABI) | |
27939 | fprintf (stderr, "\tSPE ABI extensions enabled.\n"); | |
27940 | ||
27941 | if (info->first_gp_reg_save != 32) | |
27942 | fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save); | |
27943 | ||
27944 | if (info->first_fp_reg_save != 64) | |
27945 | fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save); | |
27946 | ||
27947 | if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO) | |
27948 | fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n", | |
27949 | info->first_altivec_reg_save); | |
27950 | ||
27951 | if (info->lr_save_p) | |
27952 | fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p); | |
27953 | ||
27954 | if (info->cr_save_p) | |
27955 | fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p); | |
27956 | ||
27957 | if (info->vrsave_mask) | |
27958 | fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask); | |
27959 | ||
27960 | if (info->push_p) | |
27961 | fprintf (stderr, "\tpush_p = %5d\n", info->push_p); | |
27962 | ||
27963 | if (info->calls_p) | |
27964 | fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p); | |
27965 | ||
27966 | if (info->gp_size) | |
27967 | fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset); | |
27968 | ||
27969 | if (info->fp_size) | |
27970 | fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset); | |
27971 | ||
27972 | if (info->altivec_size) | |
27973 | fprintf (stderr, "\taltivec_save_offset = %5d\n", | |
27974 | info->altivec_save_offset); | |
27975 | ||
27976 | if (info->spe_gp_size) | |
27977 | fprintf (stderr, "\tspe_gp_save_offset = %5d\n", | |
27978 | info->spe_gp_save_offset); | |
27979 | ||
27980 | if (info->vrsave_size) | |
27981 | fprintf (stderr, "\tvrsave_save_offset = %5d\n", | |
27982 | info->vrsave_save_offset); | |
27983 | ||
27984 | if (info->lr_save_p) | |
27985 | fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset); | |
27986 | ||
27987 | if (info->cr_save_p) | |
27988 | fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset); | |
27989 | ||
27990 | if (info->varargs_save_offset) | |
27991 | fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset); | |
27992 | ||
27993 | if (info->total_size) | |
27994 | fprintf (stderr, "\ttotal_size = " HOST_WIDE_INT_PRINT_DEC"\n", | |
27995 | info->total_size); | |
27996 | ||
27997 | if (info->vars_size) | |
27998 | fprintf (stderr, "\tvars_size = " HOST_WIDE_INT_PRINT_DEC"\n", | |
27999 | info->vars_size); | |
28000 | ||
28001 | if (info->parm_size) | |
28002 | fprintf (stderr, "\tparm_size = %5d\n", info->parm_size); | |
28003 | ||
28004 | if (info->fixed_size) | |
28005 | fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size); | |
28006 | ||
28007 | if (info->gp_size) | |
28008 | fprintf (stderr, "\tgp_size = %5d\n", info->gp_size); | |
28009 | ||
28010 | if (info->spe_gp_size) | |
28011 | fprintf (stderr, "\tspe_gp_size = %5d\n", info->spe_gp_size); | |
28012 | ||
28013 | if (info->fp_size) | |
28014 | fprintf (stderr, "\tfp_size = %5d\n", info->fp_size); | |
28015 | ||
28016 | if (info->altivec_size) | |
28017 | fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size); | |
28018 | ||
28019 | if (info->vrsave_size) | |
28020 | fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size); | |
28021 | ||
28022 | if (info->altivec_padding_size) | |
28023 | fprintf (stderr, "\taltivec_padding_size= %5d\n", | |
28024 | info->altivec_padding_size); | |
28025 | ||
28026 | if (info->spe_padding_size) | |
28027 | fprintf (stderr, "\tspe_padding_size = %5d\n", | |
28028 | info->spe_padding_size); | |
28029 | ||
28030 | if (info->cr_size) | |
28031 | fprintf (stderr, "\tcr_size = %5d\n", info->cr_size); | |
28032 | ||
28033 | if (info->save_size) | |
28034 | fprintf (stderr, "\tsave_size = %5d\n", info->save_size); | |
28035 | ||
28036 | if (info->reg_size != 4) | |
28037 | fprintf (stderr, "\treg_size = %5d\n", info->reg_size); | |
28038 | ||
28039 | fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy); | |
28040 | ||
28041 | fprintf (stderr, "\n"); | |
28042 | } | |
28043 | ||
28044 | rtx | |
28045 | rs6000_return_addr (int count, rtx frame) | |
28046 | { | |
28047 | /* Currently we don't optimize very well between prolog and body | |
28048 | code and for PIC code the code can be actually quite bad, so | |
28049 | don't try to be too clever here. */ | |
28050 | if (count != 0 | |
28051 | || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic)) | |
28052 | { | |
28053 | cfun->machine->ra_needs_full_frame = 1; | |
28054 | ||
28055 | return | |
28056 | gen_rtx_MEM | |
28057 | (Pmode, | |
28058 | memory_address | |
28059 | (Pmode, | |
28060 | plus_constant (Pmode, | |
28061 | copy_to_reg | |
28062 | (gen_rtx_MEM (Pmode, | |
28063 | memory_address (Pmode, frame))), | |
28064 | RETURN_ADDRESS_OFFSET))); | |
28065 | } | |
28066 | ||
28067 | cfun->machine->ra_need_lr = 1; | |
28068 | return get_hard_reg_initial_val (Pmode, LR_REGNO); | |
28069 | } | |
28070 | ||
28071 | /* Say whether a function is a candidate for sibcall handling or not. */ | |
28072 | ||
28073 | static bool | |
28074 | rs6000_function_ok_for_sibcall (tree decl, tree exp) | |
28075 | { | |
28076 | tree fntype; | |
28077 | ||
28078 | if (decl) | |
28079 | fntype = TREE_TYPE (decl); | |
28080 | else | |
28081 | fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp))); | |
28082 | ||
28083 | /* We can't do it if the called function has more vector parameters | |
28084 | than the current function; there's nowhere to put the VRsave code. */ | |
28085 | if (TARGET_ALTIVEC_ABI | |
28086 | && TARGET_ALTIVEC_VRSAVE | |
28087 | && !(decl && decl == current_function_decl)) | |
28088 | { | |
28089 | function_args_iterator args_iter; | |
28090 | tree type; | |
28091 | int nvreg = 0; | |
28092 | ||
28093 | /* Functions with vector parameters are required to have a | |
28094 | prototype, so the argument type info must be available | |
28095 | here. */ | |
28096 | FOREACH_FUNCTION_ARGS(fntype, type, args_iter) | |
28097 | if (TREE_CODE (type) == VECTOR_TYPE | |
28098 | && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type))) | |
28099 | nvreg++; | |
28100 | ||
28101 | FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter) | |
28102 | if (TREE_CODE (type) == VECTOR_TYPE | |
28103 | && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type))) | |
28104 | nvreg--; | |
28105 | ||
28106 | if (nvreg > 0) | |
28107 | return false; | |
28108 | } | |
28109 | ||
28110 | /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local | |
28111 | functions, because the callee may have a different TOC pointer to | |
28112 | the caller and there's no way to ensure we restore the TOC when | |
28113 | we return. With the secure-plt SYSV ABI we can't make non-local | |
28114 | calls when -fpic/PIC because the plt call stubs use r30. */ | |
28115 | if (DEFAULT_ABI == ABI_DARWIN | |
28116 | || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) | |
28117 | && decl | |
28118 | && !DECL_EXTERNAL (decl) | |
28119 | && !DECL_WEAK (decl) | |
28120 | && (*targetm.binds_local_p) (decl)) | |
28121 | || (DEFAULT_ABI == ABI_V4 | |
28122 | && (!TARGET_SECURE_PLT | |
28123 | || !flag_pic | |
28124 | || (decl | |
28125 | && (*targetm.binds_local_p) (decl))))) | |
28126 | { | |
28127 | tree attr_list = TYPE_ATTRIBUTES (fntype); | |
28128 | ||
28129 | if (!lookup_attribute ("longcall", attr_list) | |
28130 | || lookup_attribute ("shortcall", attr_list)) | |
28131 | return true; | |
28132 | } | |
28133 | ||
28134 | return false; | |
28135 | } | |
28136 | ||
28137 | static int | |
28138 | rs6000_ra_ever_killed (void) | |
28139 | { | |
28140 | rtx_insn *top; | |
28141 | rtx reg; | |
28142 | rtx_insn *insn; | |
28143 | ||
28144 | if (cfun->is_thunk) | |
28145 | return 0; | |
28146 | ||
28147 | if (cfun->machine->lr_save_state) | |
28148 | return cfun->machine->lr_save_state - 1; | |
28149 | ||
28150 | /* regs_ever_live has LR marked as used if any sibcalls are present, | |
28151 | but this should not force saving and restoring in the | |
28152 | pro/epilogue. Likewise, reg_set_between_p thinks a sibcall | |
28153 | clobbers LR, so that is inappropriate. */ | |
28154 | ||
28155 | /* Also, the prologue can generate a store into LR that | |
28156 | doesn't really count, like this: | |
28157 | ||
28158 | move LR->R0 | |
28159 | bcl to set PIC register | |
28160 | move LR->R31 | |
28161 | move R0->LR | |
28162 | ||
28163 | When we're called from the epilogue, we need to avoid counting | |
28164 | this as a store. */ | |
28165 | ||
28166 | push_topmost_sequence (); | |
28167 | top = get_insns (); | |
28168 | pop_topmost_sequence (); | |
28169 | reg = gen_rtx_REG (Pmode, LR_REGNO); | |
28170 | ||
28171 | for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn)) | |
28172 | { | |
28173 | if (INSN_P (insn)) | |
28174 | { | |
28175 | if (CALL_P (insn)) | |
28176 | { | |
28177 | if (!SIBLING_CALL_P (insn)) | |
28178 | return 1; | |
28179 | } | |
28180 | else if (find_regno_note (insn, REG_INC, LR_REGNO)) | |
28181 | return 1; | |
28182 | else if (set_of (reg, insn) != NULL_RTX | |
28183 | && !prologue_epilogue_contains (insn)) | |
28184 | return 1; | |
28185 | } | |
28186 | } | |
28187 | return 0; | |
28188 | } | |
28189 | \f | |
28190 | /* Emit instructions needed to load the TOC register. | |
28191 | This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is | |
28192 | a constant pool; or for SVR4 -fpic. */ | |
28193 | ||
28194 | void | |
28195 | rs6000_emit_load_toc_table (int fromprolog) | |
28196 | { | |
28197 | rtx dest; | |
28198 | dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM); | |
28199 | ||
28200 | if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic) | |
28201 | { | |
28202 | char buf[30]; | |
28203 | rtx lab, tmp1, tmp2, got; | |
28204 | ||
28205 | lab = gen_label_rtx (); | |
28206 | ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab)); | |
28207 | lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf)); | |
28208 | if (flag_pic == 2) | |
28209 | { | |
28210 | got = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name)); | |
28211 | need_toc_init = 1; | |
28212 | } | |
28213 | else | |
28214 | got = rs6000_got_sym (); | |
28215 | tmp1 = tmp2 = dest; | |
28216 | if (!fromprolog) | |
28217 | { | |
28218 | tmp1 = gen_reg_rtx (Pmode); | |
28219 | tmp2 = gen_reg_rtx (Pmode); | |
28220 | } | |
28221 | emit_insn (gen_load_toc_v4_PIC_1 (lab)); | |
28222 | emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO)); | |
28223 | emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab)); | |
28224 | emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab)); | |
28225 | } | |
28226 | else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1) | |
28227 | { | |
28228 | emit_insn (gen_load_toc_v4_pic_si ()); | |
28229 | emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO)); | |
28230 | } | |
28231 | else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2) | |
28232 | { | |
28233 | char buf[30]; | |
28234 | rtx temp0 = (fromprolog | |
28235 | ? gen_rtx_REG (Pmode, 0) | |
28236 | : gen_reg_rtx (Pmode)); | |
28237 | ||
28238 | if (fromprolog) | |
28239 | { | |
28240 | rtx symF, symL; | |
28241 | ||
28242 | ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno); | |
28243 | symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf)); | |
28244 | ||
28245 | ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno); | |
28246 | symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf)); | |
28247 | ||
28248 | emit_insn (gen_load_toc_v4_PIC_1 (symF)); | |
28249 | emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO)); | |
28250 | emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF)); | |
28251 | } | |
28252 | else | |
28253 | { | |
28254 | rtx tocsym, lab; | |
28255 | ||
28256 | tocsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name)); | |
28257 | need_toc_init = 1; | |
28258 | lab = gen_label_rtx (); | |
28259 | emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab)); | |
28260 | emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO)); | |
28261 | if (TARGET_LINK_STACK) | |
28262 | emit_insn (gen_addsi3 (dest, dest, GEN_INT (4))); | |
28263 | emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest)); | |
28264 | } | |
28265 | emit_insn (gen_addsi3 (dest, temp0, dest)); | |
28266 | } | |
28267 | else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC) | |
28268 | { | |
28269 | /* This is for AIX code running in non-PIC ELF32. */ | |
28270 | rtx realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name)); | |
28271 | ||
28272 | need_toc_init = 1; | |
28273 | emit_insn (gen_elf_high (dest, realsym)); | |
28274 | emit_insn (gen_elf_low (dest, dest, realsym)); | |
28275 | } | |
28276 | else | |
28277 | { | |
28278 | gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2); | |
28279 | ||
28280 | if (TARGET_32BIT) | |
28281 | emit_insn (gen_load_toc_aix_si (dest)); | |
28282 | else | |
28283 | emit_insn (gen_load_toc_aix_di (dest)); | |
28284 | } | |
28285 | } | |
28286 | ||
28287 | /* Emit instructions to restore the link register after determining where | |
28288 | its value has been stored. */ | |
28289 | ||
28290 | void | |
28291 | rs6000_emit_eh_reg_restore (rtx source, rtx scratch) | |
28292 | { | |
28293 | rs6000_stack_t *info = rs6000_stack_info (); | |
28294 | rtx operands[2]; | |
28295 | ||
28296 | operands[0] = source; | |
28297 | operands[1] = scratch; | |
28298 | ||
28299 | if (info->lr_save_p) | |
28300 | { | |
28301 | rtx frame_rtx = stack_pointer_rtx; | |
28302 | HOST_WIDE_INT sp_offset = 0; | |
28303 | rtx tmp; | |
28304 | ||
28305 | if (frame_pointer_needed | |
28306 | || cfun->calls_alloca | |
28307 | || info->total_size > 32767) | |
28308 | { | |
28309 | tmp = gen_frame_mem (Pmode, frame_rtx); | |
28310 | emit_move_insn (operands[1], tmp); | |
28311 | frame_rtx = operands[1]; | |
28312 | } | |
28313 | else if (info->push_p) | |
28314 | sp_offset = info->total_size; | |
28315 | ||
28316 | tmp = plus_constant (Pmode, frame_rtx, | |
28317 | info->lr_save_offset + sp_offset); | |
28318 | tmp = gen_frame_mem (Pmode, tmp); | |
28319 | emit_move_insn (tmp, operands[0]); | |
28320 | } | |
28321 | else | |
28322 | emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]); | |
28323 | ||
28324 | /* Freeze lr_save_p. We've just emitted rtl that depends on the | |
28325 | state of lr_save_p so any change from here on would be a bug. In | |
28326 | particular, stop rs6000_ra_ever_killed from considering the SET | |
28327 | of lr we may have added just above. */ | |
28328 | cfun->machine->lr_save_state = info->lr_save_p + 1; | |
28329 | } | |
28330 | ||
28331 | static GTY(()) alias_set_type set = -1; | |
28332 | ||
28333 | alias_set_type | |
28334 | get_TOC_alias_set (void) | |
28335 | { | |
28336 | if (set == -1) | |
28337 | set = new_alias_set (); | |
28338 | return set; | |
28339 | } | |
28340 | ||
28341 | /* This returns nonzero if the current function uses the TOC. This is | |
28342 | determined by the presence of (use (unspec ... UNSPEC_TOC)), which | |
28343 | is generated by the ABI_V4 load_toc_* patterns. */ | |
28344 | #if TARGET_ELF | |
28345 | static int | |
28346 | uses_TOC (void) | |
28347 | { | |
28348 | rtx_insn *insn; | |
28349 | ||
28350 | for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) | |
28351 | if (INSN_P (insn)) | |
28352 | { | |
28353 | rtx pat = PATTERN (insn); | |
28354 | int i; | |
28355 | ||
28356 | if (GET_CODE (pat) == PARALLEL) | |
28357 | for (i = 0; i < XVECLEN (pat, 0); i++) | |
28358 | { | |
28359 | rtx sub = XVECEXP (pat, 0, i); | |
28360 | if (GET_CODE (sub) == USE) | |
28361 | { | |
28362 | sub = XEXP (sub, 0); | |
28363 | if (GET_CODE (sub) == UNSPEC | |
28364 | && XINT (sub, 1) == UNSPEC_TOC) | |
28365 | return 1; | |
28366 | } | |
28367 | } | |
28368 | } | |
28369 | return 0; | |
28370 | } | |
28371 | #endif | |
28372 | ||
28373 | rtx | |
28374 | create_TOC_reference (rtx symbol, rtx largetoc_reg) | |
28375 | { | |
28376 | rtx tocrel, tocreg, hi; | |
28377 | ||
28378 | if (TARGET_DEBUG_ADDR) | |
28379 | { | |
28380 | if (GET_CODE (symbol) == SYMBOL_REF) | |
28381 | fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n", | |
28382 | XSTR (symbol, 0)); | |
28383 | else | |
28384 | { | |
28385 | fprintf (stderr, "\ncreate_TOC_reference, code %s:\n", | |
28386 | GET_RTX_NAME (GET_CODE (symbol))); | |
28387 | debug_rtx (symbol); | |
28388 | } | |
28389 | } | |
28390 | ||
28391 | if (!can_create_pseudo_p ()) | |
28392 | df_set_regs_ever_live (TOC_REGISTER, true); | |
28393 | ||
28394 | tocreg = gen_rtx_REG (Pmode, TOC_REGISTER); | |
28395 | tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL); | |
28396 | if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ()) | |
28397 | return tocrel; | |
28398 | ||
28399 | hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel)); | |
28400 | if (largetoc_reg != NULL) | |
28401 | { | |
28402 | emit_move_insn (largetoc_reg, hi); | |
28403 | hi = largetoc_reg; | |
28404 | } | |
28405 | return gen_rtx_LO_SUM (Pmode, hi, tocrel); | |
28406 | } | |
28407 | ||
28408 | /* Issue assembly directives that create a reference to the given DWARF | |
28409 | FRAME_TABLE_LABEL from the current function section. */ | |
28410 | void | |
28411 | rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label) | |
28412 | { | |
28413 | fprintf (asm_out_file, "\t.ref %s\n", | |
28414 | (* targetm.strip_name_encoding) (frame_table_label)); | |
28415 | } | |
28416 | \f | |
28417 | /* This ties together stack memory (MEM with an alias set of frame_alias_set) | |
28418 | and the change to the stack pointer. */ | |
28419 | ||
28420 | static void | |
28421 | rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed) | |
28422 | { | |
28423 | rtvec p; | |
28424 | int i; | |
28425 | rtx regs[3]; | |
28426 | ||
28427 | i = 0; | |
28428 | regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); | |
28429 | if (hard_frame_needed) | |
28430 | regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM); | |
28431 | if (!(REGNO (fp) == STACK_POINTER_REGNUM | |
28432 | || (hard_frame_needed | |
28433 | && REGNO (fp) == HARD_FRAME_POINTER_REGNUM))) | |
28434 | regs[i++] = fp; | |
28435 | ||
28436 | p = rtvec_alloc (i); | |
28437 | while (--i >= 0) | |
28438 | { | |
28439 | rtx mem = gen_frame_mem (BLKmode, regs[i]); | |
28440 | RTVEC_ELT (p, i) = gen_rtx_SET (mem, const0_rtx); | |
28441 | } | |
28442 | ||
28443 | emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p))); | |
28444 | } | |
28445 | ||
28446 | /* Emit the correct code for allocating stack space, as insns. | |
28447 | If COPY_REG, make sure a copy of the old frame is left there. | |
28448 | The generated code may use hard register 0 as a temporary. */ | |
28449 | ||
28450 | static rtx_insn * | |
28451 | rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off) | |
28452 | { | |
28453 | rtx_insn *insn; | |
28454 | rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); | |
28455 | rtx tmp_reg = gen_rtx_REG (Pmode, 0); | |
28456 | rtx todec = gen_int_mode (-size, Pmode); | |
28457 | rtx par, set, mem; | |
28458 | ||
28459 | if (INTVAL (todec) != -size) | |
28460 | { | |
28461 | warning (0, "stack frame too large"); | |
28462 | emit_insn (gen_trap ()); | |
28463 | return 0; | |
28464 | } | |
28465 | ||
28466 | if (crtl->limit_stack) | |
28467 | { | |
28468 | if (REG_P (stack_limit_rtx) | |
28469 | && REGNO (stack_limit_rtx) > 1 | |
28470 | && REGNO (stack_limit_rtx) <= 31) | |
28471 | { | |
28472 | emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size))); | |
28473 | emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg, | |
28474 | const0_rtx)); | |
28475 | } | |
28476 | else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF | |
28477 | && TARGET_32BIT | |
28478 | && DEFAULT_ABI == ABI_V4 | |
28479 | && !flag_pic) | |
28480 | { | |
28481 | rtx toload = gen_rtx_CONST (VOIDmode, | |
28482 | gen_rtx_PLUS (Pmode, | |
28483 | stack_limit_rtx, | |
28484 | GEN_INT (size))); | |
28485 | ||
28486 | emit_insn (gen_elf_high (tmp_reg, toload)); | |
28487 | emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload)); | |
28488 | emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg, | |
28489 | const0_rtx)); | |
28490 | } | |
28491 | else | |
28492 | warning (0, "stack limit expression is not supported"); | |
28493 | } | |
28494 | ||
28495 | if (copy_reg) | |
28496 | { | |
28497 | if (copy_off != 0) | |
28498 | emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off))); | |
28499 | else | |
28500 | emit_move_insn (copy_reg, stack_reg); | |
28501 | } | |
28502 | ||
28503 | if (size > 32767) | |
28504 | { | |
28505 | /* Need a note here so that try_split doesn't get confused. */ | |
28506 | if (get_last_insn () == NULL_RTX) | |
28507 | emit_note (NOTE_INSN_DELETED); | |
28508 | insn = emit_move_insn (tmp_reg, todec); | |
28509 | try_split (PATTERN (insn), insn, 0); | |
28510 | todec = tmp_reg; | |
28511 | } | |
28512 | ||
28513 | insn = emit_insn (TARGET_32BIT | |
28514 | ? gen_movsi_update_stack (stack_reg, stack_reg, | |
28515 | todec, stack_reg) | |
28516 | : gen_movdi_di_update_stack (stack_reg, stack_reg, | |
28517 | todec, stack_reg)); | |
28518 | /* Since we didn't use gen_frame_mem to generate the MEM, grab | |
28519 | it now and set the alias set/attributes. The above gen_*_update | |
28520 | calls will generate a PARALLEL with the MEM set being the first | |
28521 | operation. */ | |
28522 | par = PATTERN (insn); | |
28523 | gcc_assert (GET_CODE (par) == PARALLEL); | |
28524 | set = XVECEXP (par, 0, 0); | |
28525 | gcc_assert (GET_CODE (set) == SET); | |
28526 | mem = SET_DEST (set); | |
28527 | gcc_assert (MEM_P (mem)); | |
28528 | MEM_NOTRAP_P (mem) = 1; | |
28529 | set_mem_alias_set (mem, get_frame_alias_set ()); | |
28530 | ||
28531 | RTX_FRAME_RELATED_P (insn) = 1; | |
28532 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, | |
28533 | gen_rtx_SET (stack_reg, gen_rtx_PLUS (Pmode, stack_reg, | |
28534 | GEN_INT (-size)))); | |
28535 | return insn; | |
28536 | } | |
28537 | ||
28538 | #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP) | |
28539 | ||
28540 | #if PROBE_INTERVAL > 32768 | |
28541 | #error Cannot use indexed addressing mode for stack probing | |
28542 | #endif | |
28543 | ||
28544 | /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE, | |
28545 | inclusive. These are offsets from the current stack pointer. */ | |
28546 | ||
28547 | static void | |
28548 | rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size) | |
28549 | { | |
28550 | /* See if we have a constant small number of probes to generate. If so, | |
28551 | that's the easy case. */ | |
28552 | if (first + size <= 32768) | |
28553 | { | |
28554 | HOST_WIDE_INT i; | |
28555 | ||
28556 | /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until | |
28557 | it exceeds SIZE. If only one probe is needed, this will not | |
28558 | generate any code. Then probe at FIRST + SIZE. */ | |
28559 | for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL) | |
28560 | emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, | |
28561 | -(first + i))); | |
28562 | ||
28563 | emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, | |
28564 | -(first + size))); | |
28565 | } | |
28566 | ||
28567 | /* Otherwise, do the same as above, but in a loop. Note that we must be | |
28568 | extra careful with variables wrapping around because we might be at | |
28569 | the very top (or the very bottom) of the address space and we have | |
28570 | to be able to handle this case properly; in particular, we use an | |
28571 | equality test for the loop condition. */ | |
28572 | else | |
28573 | { | |
28574 | HOST_WIDE_INT rounded_size; | |
28575 | rtx r12 = gen_rtx_REG (Pmode, 12); | |
28576 | rtx r0 = gen_rtx_REG (Pmode, 0); | |
28577 | ||
28578 | /* Sanity check for the addressing mode we're going to use. */ | |
28579 | gcc_assert (first <= 32768); | |
28580 | ||
28581 | /* Step 1: round SIZE to the previous multiple of the interval. */ | |
28582 | ||
28583 | rounded_size = ROUND_DOWN (size, PROBE_INTERVAL); | |
28584 | ||
28585 | ||
28586 | /* Step 2: compute initial and final value of the loop counter. */ | |
28587 | ||
28588 | /* TEST_ADDR = SP + FIRST. */ | |
28589 | emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, stack_pointer_rtx, | |
28590 | -first))); | |
28591 | ||
28592 | /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */ | |
28593 | if (rounded_size > 32768) | |
28594 | { | |
28595 | emit_move_insn (r0, GEN_INT (-rounded_size)); | |
28596 | emit_insn (gen_rtx_SET (r0, gen_rtx_PLUS (Pmode, r12, r0))); | |
28597 | } | |
28598 | else | |
28599 | emit_insn (gen_rtx_SET (r0, plus_constant (Pmode, r12, | |
28600 | -rounded_size))); | |
28601 | ||
28602 | ||
28603 | /* Step 3: the loop | |
28604 | ||
28605 | do | |
28606 | { | |
28607 | TEST_ADDR = TEST_ADDR + PROBE_INTERVAL | |
28608 | probe at TEST_ADDR | |
28609 | } | |
28610 | while (TEST_ADDR != LAST_ADDR) | |
28611 | ||
28612 | probes at FIRST + N * PROBE_INTERVAL for values of N from 1 | |
28613 | until it is equal to ROUNDED_SIZE. */ | |
28614 | ||
28615 | if (TARGET_64BIT) | |
28616 | emit_insn (gen_probe_stack_rangedi (r12, r12, r0)); | |
28617 | else | |
28618 | emit_insn (gen_probe_stack_rangesi (r12, r12, r0)); | |
28619 | ||
28620 | ||
28621 | /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time | |
28622 | that SIZE is equal to ROUNDED_SIZE. */ | |
28623 | ||
28624 | if (size != rounded_size) | |
28625 | emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size)); | |
28626 | } | |
28627 | } | |
28628 | ||
28629 | /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are | |
28630 | absolute addresses. */ | |
28631 | ||
28632 | const char * | |
28633 | output_probe_stack_range (rtx reg1, rtx reg2) | |
28634 | { | |
28635 | static int labelno = 0; | |
28636 | char loop_lab[32]; | |
28637 | rtx xops[2]; | |
28638 | ||
28639 | ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++); | |
28640 | ||
28641 | /* Loop. */ | |
28642 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); | |
28643 | ||
28644 | /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */ | |
28645 | xops[0] = reg1; | |
28646 | xops[1] = GEN_INT (-PROBE_INTERVAL); | |
28647 | output_asm_insn ("addi %0,%0,%1", xops); | |
28648 | ||
28649 | /* Probe at TEST_ADDR. */ | |
28650 | xops[1] = gen_rtx_REG (Pmode, 0); | |
28651 | output_asm_insn ("stw %1,0(%0)", xops); | |
28652 | ||
28653 | /* Test if TEST_ADDR == LAST_ADDR. */ | |
28654 | xops[1] = reg2; | |
28655 | if (TARGET_64BIT) | |
28656 | output_asm_insn ("cmpd 0,%0,%1", xops); | |
28657 | else | |
28658 | output_asm_insn ("cmpw 0,%0,%1", xops); | |
28659 | ||
28660 | /* Branch. */ | |
28661 | fputs ("\tbne 0,", asm_out_file); | |
28662 | assemble_name_raw (asm_out_file, loop_lab); | |
28663 | fputc ('\n', asm_out_file); | |
28664 | ||
28665 | return ""; | |
28666 | } | |
28667 | ||
28668 | /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced | |
28669 | with (plus:P (reg 1) VAL), and with REG2 replaced with REPL2 if REG2 | |
28670 | is not NULL. It would be nice if dwarf2out_frame_debug_expr could | |
28671 | deduce these equivalences by itself so it wasn't necessary to hold | |
28672 | its hand so much. Don't be tempted to always supply d2_f_d_e with | |
28673 | the actual cfa register, ie. r31 when we are using a hard frame | |
28674 | pointer. That fails when saving regs off r1, and sched moves the | |
28675 | r31 setup past the reg saves. */ | |
28676 | ||
28677 | static rtx_insn * | |
28678 | rs6000_frame_related (rtx_insn *insn, rtx reg, HOST_WIDE_INT val, | |
28679 | rtx reg2, rtx repl2) | |
28680 | { | |
28681 | rtx repl; | |
28682 | ||
28683 | if (REGNO (reg) == STACK_POINTER_REGNUM) | |
28684 | { | |
28685 | gcc_checking_assert (val == 0); | |
28686 | repl = NULL_RTX; | |
28687 | } | |
28688 | else | |
28689 | repl = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM), | |
28690 | GEN_INT (val)); | |
28691 | ||
28692 | rtx pat = PATTERN (insn); | |
28693 | if (!repl && !reg2) | |
28694 | { | |
28695 | /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */ | |
28696 | if (GET_CODE (pat) == PARALLEL) | |
28697 | for (int i = 0; i < XVECLEN (pat, 0); i++) | |
28698 | if (GET_CODE (XVECEXP (pat, 0, i)) == SET) | |
28699 | { | |
28700 | rtx set = XVECEXP (pat, 0, i); | |
28701 | ||
28702 | /* If this PARALLEL has been emitted for out-of-line | |
28703 | register save functions, or store multiple, then omit | |
28704 | eh_frame info for any user-defined global regs. If | |
28705 | eh_frame info is supplied, frame unwinding will | |
28706 | restore a user reg. */ | |
28707 | if (!REG_P (SET_SRC (set)) | |
28708 | || !fixed_reg_p (REGNO (SET_SRC (set)))) | |
28709 | RTX_FRAME_RELATED_P (set) = 1; | |
28710 | } | |
28711 | RTX_FRAME_RELATED_P (insn) = 1; | |
28712 | return insn; | |
28713 | } | |
28714 | ||
28715 | /* We expect that 'pat' is either a SET or a PARALLEL containing | |
28716 | SETs (and possibly other stuff). In a PARALLEL, all the SETs | |
28717 | are important so they all have to be marked RTX_FRAME_RELATED_P. | |
28718 | Call simplify_replace_rtx on the SETs rather than the whole insn | |
28719 | so as to leave the other stuff alone (for example USE of r12). */ | |
28720 | ||
28721 | set_used_flags (pat); | |
28722 | if (GET_CODE (pat) == SET) | |
28723 | { | |
28724 | if (repl) | |
28725 | pat = simplify_replace_rtx (pat, reg, repl); | |
28726 | if (reg2) | |
28727 | pat = simplify_replace_rtx (pat, reg2, repl2); | |
28728 | } | |
28729 | else if (GET_CODE (pat) == PARALLEL) | |
28730 | { | |
28731 | pat = shallow_copy_rtx (pat); | |
28732 | XVEC (pat, 0) = shallow_copy_rtvec (XVEC (pat, 0)); | |
28733 | ||
28734 | for (int i = 0; i < XVECLEN (pat, 0); i++) | |
28735 | if (GET_CODE (XVECEXP (pat, 0, i)) == SET) | |
28736 | { | |
28737 | rtx set = XVECEXP (pat, 0, i); | |
28738 | ||
28739 | if (repl) | |
28740 | set = simplify_replace_rtx (set, reg, repl); | |
28741 | if (reg2) | |
28742 | set = simplify_replace_rtx (set, reg2, repl2); | |
28743 | XVECEXP (pat, 0, i) = set; | |
28744 | ||
28745 | /* Omit eh_frame info for any user-defined global regs. */ | |
28746 | if (!REG_P (SET_SRC (set)) | |
28747 | || !fixed_reg_p (REGNO (SET_SRC (set)))) | |
28748 | RTX_FRAME_RELATED_P (set) = 1; | |
28749 | } | |
28750 | } | |
28751 | else | |
28752 | gcc_unreachable (); | |
28753 | ||
28754 | RTX_FRAME_RELATED_P (insn) = 1; | |
28755 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, copy_rtx_if_shared (pat)); | |
28756 | ||
28757 | return insn; | |
28758 | } | |
28759 | ||
28760 | /* Returns an insn that has a vrsave set operation with the | |
28761 | appropriate CLOBBERs. */ | |
28762 | ||
28763 | static rtx | |
28764 | generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep) | |
28765 | { | |
28766 | int nclobs, i; | |
28767 | rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1]; | |
28768 | rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO); | |
28769 | ||
28770 | clobs[0] | |
28771 | = gen_rtx_SET (vrsave, | |
28772 | gen_rtx_UNSPEC_VOLATILE (SImode, | |
28773 | gen_rtvec (2, reg, vrsave), | |
28774 | UNSPECV_SET_VRSAVE)); | |
28775 | ||
28776 | nclobs = 1; | |
28777 | ||
28778 | /* We need to clobber the registers in the mask so the scheduler | |
28779 | does not move sets to VRSAVE before sets of AltiVec registers. | |
28780 | ||
28781 | However, if the function receives nonlocal gotos, reload will set | |
28782 | all call saved registers live. We will end up with: | |
28783 | ||
28784 | (set (reg 999) (mem)) | |
28785 | (parallel [ (set (reg vrsave) (unspec blah)) | |
28786 | (clobber (reg 999))]) | |
28787 | ||
28788 | The clobber will cause the store into reg 999 to be dead, and | |
28789 | flow will attempt to delete an epilogue insn. In this case, we | |
28790 | need an unspec use/set of the register. */ | |
28791 | ||
28792 | for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i) | |
28793 | if (info->vrsave_mask & ALTIVEC_REG_BIT (i)) | |
28794 | { | |
28795 | if (!epiloguep || call_used_regs [i]) | |
28796 | clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode, | |
28797 | gen_rtx_REG (V4SImode, i)); | |
28798 | else | |
28799 | { | |
28800 | rtx reg = gen_rtx_REG (V4SImode, i); | |
28801 | ||
28802 | clobs[nclobs++] | |
28803 | = gen_rtx_SET (reg, | |
28804 | gen_rtx_UNSPEC (V4SImode, | |
28805 | gen_rtvec (1, reg), 27)); | |
28806 | } | |
28807 | } | |
28808 | ||
28809 | insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs)); | |
28810 | ||
28811 | for (i = 0; i < nclobs; ++i) | |
28812 | XVECEXP (insn, 0, i) = clobs[i]; | |
28813 | ||
28814 | return insn; | |
28815 | } | |
28816 | ||
28817 | static rtx | |
28818 | gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store) | |
28819 | { | |
28820 | rtx addr, mem; | |
28821 | ||
28822 | addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset)); | |
28823 | mem = gen_frame_mem (GET_MODE (reg), addr); | |
28824 | return gen_rtx_SET (store ? mem : reg, store ? reg : mem); | |
28825 | } | |
28826 | ||
28827 | static rtx | |
28828 | gen_frame_load (rtx reg, rtx frame_reg, int offset) | |
28829 | { | |
28830 | return gen_frame_set (reg, frame_reg, offset, false); | |
28831 | } | |
28832 | ||
28833 | static rtx | |
28834 | gen_frame_store (rtx reg, rtx frame_reg, int offset) | |
28835 | { | |
28836 | return gen_frame_set (reg, frame_reg, offset, true); | |
28837 | } | |
28838 | ||
28839 | /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes. | |
28840 | Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */ | |
28841 | ||
28842 | static rtx_insn * | |
28843 | emit_frame_save (rtx frame_reg, machine_mode mode, | |
28844 | unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp) | |
28845 | { | |
28846 | rtx reg; | |
28847 | ||
28848 | /* Some cases that need register indexed addressing. */ | |
28849 | gcc_checking_assert (!((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode)) | |
28850 | || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode)) | |
28851 | || (TARGET_E500_DOUBLE && mode == DFmode) | |
28852 | || (TARGET_SPE_ABI | |
28853 | && SPE_VECTOR_MODE (mode) | |
28854 | && !SPE_CONST_OFFSET_OK (offset)))); | |
28855 | ||
28856 | reg = gen_rtx_REG (mode, regno); | |
28857 | rtx_insn *insn = emit_insn (gen_frame_store (reg, frame_reg, offset)); | |
28858 | return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp, | |
28859 | NULL_RTX, NULL_RTX); | |
28860 | } | |
28861 | ||
28862 | /* Emit an offset memory reference suitable for a frame store, while | |
28863 | converting to a valid addressing mode. */ | |
28864 | ||
28865 | static rtx | |
28866 | gen_frame_mem_offset (machine_mode mode, rtx reg, int offset) | |
28867 | { | |
28868 | rtx int_rtx, offset_rtx; | |
28869 | ||
28870 | int_rtx = GEN_INT (offset); | |
28871 | ||
28872 | if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) && !SPE_CONST_OFFSET_OK (offset)) | |
28873 | || (TARGET_E500_DOUBLE && mode == DFmode)) | |
28874 | { | |
28875 | offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH); | |
28876 | emit_move_insn (offset_rtx, int_rtx); | |
28877 | } | |
28878 | else | |
28879 | offset_rtx = int_rtx; | |
28880 | ||
28881 | return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx)); | |
28882 | } | |
28883 | ||
28884 | #ifndef TARGET_FIX_AND_CONTINUE | |
28885 | #define TARGET_FIX_AND_CONTINUE 0 | |
28886 | #endif | |
28887 | ||
28888 | /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */ | |
28889 | #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO | |
28890 | #define LAST_SAVRES_REGISTER 31 | |
28891 | #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1) | |
28892 | ||
28893 | enum { | |
28894 | SAVRES_LR = 0x1, | |
28895 | SAVRES_SAVE = 0x2, | |
28896 | SAVRES_REG = 0x0c, | |
28897 | SAVRES_GPR = 0, | |
28898 | SAVRES_FPR = 4, | |
28899 | SAVRES_VR = 8 | |
28900 | }; | |
28901 | ||
28902 | static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12]; | |
28903 | ||
28904 | /* Temporary holding space for an out-of-line register save/restore | |
28905 | routine name. */ | |
28906 | static char savres_routine_name[30]; | |
28907 | ||
28908 | /* Return the name for an out-of-line register save/restore routine. | |
28909 | We are saving/restoring GPRs if GPR is true. */ | |
28910 | ||
28911 | static char * | |
28912 | rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel) | |
28913 | { | |
28914 | const char *prefix = ""; | |
28915 | const char *suffix = ""; | |
28916 | ||
28917 | /* Different targets are supposed to define | |
28918 | {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed | |
28919 | routine name could be defined with: | |
28920 | ||
28921 | sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX) | |
28922 | ||
28923 | This is a nice idea in practice, but in reality, things are | |
28924 | complicated in several ways: | |
28925 | ||
28926 | - ELF targets have save/restore routines for GPRs. | |
28927 | ||
28928 | - SPE targets use different prefixes for 32/64-bit registers, and | |
28929 | neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen. | |
28930 | ||
28931 | - PPC64 ELF targets have routines for save/restore of GPRs that | |
28932 | differ in what they do with the link register, so having a set | |
28933 | prefix doesn't work. (We only use one of the save routines at | |
28934 | the moment, though.) | |
28935 | ||
28936 | - PPC32 elf targets have "exit" versions of the restore routines | |
28937 | that restore the link register and can save some extra space. | |
28938 | These require an extra suffix. (There are also "tail" versions | |
28939 | of the restore routines and "GOT" versions of the save routines, | |
28940 | but we don't generate those at present. Same problems apply, | |
28941 | though.) | |
28942 | ||
28943 | We deal with all this by synthesizing our own prefix/suffix and | |
28944 | using that for the simple sprintf call shown above. */ | |
28945 | if (TARGET_SPE) | |
28946 | { | |
28947 | /* No floating point saves on the SPE. */ | |
28948 | gcc_assert ((sel & SAVRES_REG) == SAVRES_GPR); | |
28949 | ||
28950 | if ((sel & SAVRES_SAVE)) | |
28951 | prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_"; | |
28952 | else | |
28953 | prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_"; | |
28954 | ||
28955 | if ((sel & SAVRES_LR)) | |
28956 | suffix = "_x"; | |
28957 | } | |
28958 | else if (DEFAULT_ABI == ABI_V4) | |
28959 | { | |
28960 | if (TARGET_64BIT) | |
28961 | goto aix_names; | |
28962 | ||
28963 | if ((sel & SAVRES_REG) == SAVRES_GPR) | |
28964 | prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_"; | |
28965 | else if ((sel & SAVRES_REG) == SAVRES_FPR) | |
28966 | prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_"; | |
28967 | else if ((sel & SAVRES_REG) == SAVRES_VR) | |
28968 | prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_"; | |
28969 | else | |
28970 | abort (); | |
28971 | ||
28972 | if ((sel & SAVRES_LR)) | |
28973 | suffix = "_x"; | |
28974 | } | |
28975 | else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) | |
28976 | { | |
28977 | #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD) | |
28978 | /* No out-of-line save/restore routines for GPRs on AIX. */ | |
28979 | gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR); | |
28980 | #endif | |
28981 | ||
28982 | aix_names: | |
28983 | if ((sel & SAVRES_REG) == SAVRES_GPR) | |
28984 | prefix = ((sel & SAVRES_SAVE) | |
28985 | ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_") | |
28986 | : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_")); | |
28987 | else if ((sel & SAVRES_REG) == SAVRES_FPR) | |
28988 | { | |
28989 | #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD) | |
28990 | if ((sel & SAVRES_LR)) | |
28991 | prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_"); | |
28992 | else | |
28993 | #endif | |
28994 | { | |
28995 | prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX; | |
28996 | suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX; | |
28997 | } | |
28998 | } | |
28999 | else if ((sel & SAVRES_REG) == SAVRES_VR) | |
29000 | prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_"; | |
29001 | else | |
29002 | abort (); | |
29003 | } | |
29004 | ||
29005 | if (DEFAULT_ABI == ABI_DARWIN) | |
29006 | { | |
29007 | /* The Darwin approach is (slightly) different, in order to be | |
29008 | compatible with code generated by the system toolchain. There is a | |
29009 | single symbol for the start of save sequence, and the code here | |
29010 | embeds an offset into that code on the basis of the first register | |
29011 | to be saved. */ | |
29012 | prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ; | |
29013 | if ((sel & SAVRES_REG) == SAVRES_GPR) | |
29014 | sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix, | |
29015 | ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"), | |
29016 | (regno - 13) * 4, prefix, regno); | |
29017 | else if ((sel & SAVRES_REG) == SAVRES_FPR) | |
29018 | sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix, | |
29019 | (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno); | |
29020 | else if ((sel & SAVRES_REG) == SAVRES_VR) | |
29021 | sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix, | |
29022 | (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno); | |
29023 | else | |
29024 | abort (); | |
29025 | } | |
29026 | else | |
29027 | sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix); | |
29028 | ||
29029 | return savres_routine_name; | |
29030 | } | |
29031 | ||
29032 | /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine. | |
29033 | We are saving/restoring GPRs if GPR is true. */ | |
29034 | ||
29035 | static rtx | |
29036 | rs6000_savres_routine_sym (rs6000_stack_t *info, int sel) | |
29037 | { | |
29038 | int regno = ((sel & SAVRES_REG) == SAVRES_GPR | |
29039 | ? info->first_gp_reg_save | |
29040 | : (sel & SAVRES_REG) == SAVRES_FPR | |
29041 | ? info->first_fp_reg_save - 32 | |
29042 | : (sel & SAVRES_REG) == SAVRES_VR | |
29043 | ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO | |
29044 | : -1); | |
29045 | rtx sym; | |
29046 | int select = sel; | |
29047 | ||
29048 | /* On the SPE, we never have any FPRs, but we do have 32/64-bit | |
29049 | versions of the gpr routines. */ | |
29050 | if (TARGET_SPE_ABI && (sel & SAVRES_REG) == SAVRES_GPR | |
29051 | && info->spe_64bit_regs_used) | |
29052 | select ^= SAVRES_FPR ^ SAVRES_GPR; | |
29053 | ||
29054 | /* Don't generate bogus routine names. */ | |
29055 | gcc_assert (FIRST_SAVRES_REGISTER <= regno | |
29056 | && regno <= LAST_SAVRES_REGISTER | |
29057 | && select >= 0 && select <= 12); | |
29058 | ||
29059 | sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]; | |
29060 | ||
29061 | if (sym == NULL) | |
29062 | { | |
29063 | char *name; | |
29064 | ||
29065 | name = rs6000_savres_routine_name (info, regno, sel); | |
29066 | ||
29067 | sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select] | |
29068 | = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); | |
29069 | SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION; | |
29070 | } | |
29071 | ||
29072 | return sym; | |
29073 | } | |
29074 | ||
29075 | /* Emit a sequence of insns, including a stack tie if needed, for | |
29076 | resetting the stack pointer. If UPDT_REGNO is not 1, then don't | |
29077 | reset the stack pointer, but move the base of the frame into | |
29078 | reg UPDT_REGNO for use by out-of-line register restore routines. */ | |
29079 | ||
29080 | static rtx | |
29081 | rs6000_emit_stack_reset (rs6000_stack_t *info, | |
29082 | rtx frame_reg_rtx, HOST_WIDE_INT frame_off, | |
29083 | unsigned updt_regno) | |
29084 | { | |
29085 | /* If there is nothing to do, don't do anything. */ | |
29086 | if (frame_off == 0 && REGNO (frame_reg_rtx) == updt_regno) | |
29087 | return NULL_RTX; | |
29088 | ||
29089 | rtx updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno); | |
29090 | ||
29091 | /* This blockage is needed so that sched doesn't decide to move | |
29092 | the sp change before the register restores. */ | |
29093 | if (DEFAULT_ABI == ABI_V4 | |
29094 | || (TARGET_SPE_ABI | |
29095 | && info->spe_64bit_regs_used != 0 | |
29096 | && info->first_gp_reg_save != 32)) | |
29097 | return emit_insn (gen_stack_restore_tie (updt_reg_rtx, frame_reg_rtx, | |
29098 | GEN_INT (frame_off))); | |
29099 | ||
29100 | /* If we are restoring registers out-of-line, we will be using the | |
29101 | "exit" variants of the restore routines, which will reset the | |
29102 | stack for us. But we do need to point updt_reg into the | |
29103 | right place for those routines. */ | |
29104 | if (frame_off != 0) | |
29105 | return emit_insn (gen_add3_insn (updt_reg_rtx, | |
29106 | frame_reg_rtx, GEN_INT (frame_off))); | |
29107 | else | |
29108 | return emit_move_insn (updt_reg_rtx, frame_reg_rtx); | |
29109 | ||
29110 | return NULL_RTX; | |
29111 | } | |
29112 | ||
29113 | /* Return the register number used as a pointer by out-of-line | |
29114 | save/restore functions. */ | |
29115 | ||
29116 | static inline unsigned | |
29117 | ptr_regno_for_savres (int sel) | |
29118 | { | |
29119 | if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) | |
29120 | return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12; | |
29121 | return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11; | |
29122 | } | |
29123 | ||
29124 | /* Construct a parallel rtx describing the effect of a call to an | |
29125 | out-of-line register save/restore routine, and emit the insn | |
29126 | or jump_insn as appropriate. */ | |
29127 | ||
29128 | static rtx_insn * | |
29129 | rs6000_emit_savres_rtx (rs6000_stack_t *info, | |
29130 | rtx frame_reg_rtx, int save_area_offset, int lr_offset, | |
29131 | machine_mode reg_mode, int sel) | |
29132 | { | |
29133 | int i; | |
29134 | int offset, start_reg, end_reg, n_regs, use_reg; | |
29135 | int reg_size = GET_MODE_SIZE (reg_mode); | |
29136 | rtx sym; | |
29137 | rtvec p; | |
29138 | rtx par; | |
29139 | rtx_insn *insn; | |
29140 | ||
29141 | offset = 0; | |
29142 | start_reg = ((sel & SAVRES_REG) == SAVRES_GPR | |
29143 | ? info->first_gp_reg_save | |
29144 | : (sel & SAVRES_REG) == SAVRES_FPR | |
29145 | ? info->first_fp_reg_save | |
29146 | : (sel & SAVRES_REG) == SAVRES_VR | |
29147 | ? info->first_altivec_reg_save | |
29148 | : -1); | |
29149 | end_reg = ((sel & SAVRES_REG) == SAVRES_GPR | |
29150 | ? 32 | |
29151 | : (sel & SAVRES_REG) == SAVRES_FPR | |
29152 | ? 64 | |
29153 | : (sel & SAVRES_REG) == SAVRES_VR | |
29154 | ? LAST_ALTIVEC_REGNO + 1 | |
29155 | : -1); | |
29156 | n_regs = end_reg - start_reg; | |
29157 | p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0) | |
29158 | + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0) | |
29159 | + n_regs); | |
29160 | ||
29161 | if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR)) | |
29162 | RTVEC_ELT (p, offset++) = ret_rtx; | |
29163 | ||
29164 | RTVEC_ELT (p, offset++) | |
29165 | = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO)); | |
29166 | ||
29167 | sym = rs6000_savres_routine_sym (info, sel); | |
29168 | RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym); | |
29169 | ||
29170 | use_reg = ptr_regno_for_savres (sel); | |
29171 | if ((sel & SAVRES_REG) == SAVRES_VR) | |
29172 | { | |
29173 | /* Vector regs are saved/restored using [reg+reg] addressing. */ | |
29174 | RTVEC_ELT (p, offset++) | |
29175 | = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg)); | |
29176 | RTVEC_ELT (p, offset++) | |
29177 | = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0)); | |
29178 | } | |
29179 | else | |
29180 | RTVEC_ELT (p, offset++) | |
29181 | = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg)); | |
29182 | ||
29183 | for (i = 0; i < end_reg - start_reg; i++) | |
29184 | RTVEC_ELT (p, i + offset) | |
29185 | = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i), | |
29186 | frame_reg_rtx, save_area_offset + reg_size * i, | |
29187 | (sel & SAVRES_SAVE) != 0); | |
29188 | ||
29189 | if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR)) | |
29190 | RTVEC_ELT (p, i + offset) | |
29191 | = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset); | |
29192 | ||
29193 | par = gen_rtx_PARALLEL (VOIDmode, p); | |
29194 | ||
29195 | if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR)) | |
29196 | { | |
29197 | insn = emit_jump_insn (par); | |
29198 | JUMP_LABEL (insn) = ret_rtx; | |
29199 | } | |
29200 | else | |
29201 | insn = emit_insn (par); | |
29202 | return insn; | |
29203 | } | |
29204 | ||
29205 | /* Emit code to store CR fields that need to be saved into REG. */ | |
29206 | ||
29207 | static void | |
29208 | rs6000_emit_move_from_cr (rtx reg) | |
29209 | { | |
29210 | /* Only the ELFv2 ABI allows storing only selected fields. */ | |
29211 | if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF) | |
29212 | { | |
29213 | int i, cr_reg[8], count = 0; | |
29214 | ||
29215 | /* Collect CR fields that must be saved. */ | |
29216 | for (i = 0; i < 8; i++) | |
29217 | if (save_reg_p (CR0_REGNO + i)) | |
29218 | cr_reg[count++] = i; | |
29219 | ||
29220 | /* If it's just a single one, use mfcrf. */ | |
29221 | if (count == 1) | |
29222 | { | |
29223 | rtvec p = rtvec_alloc (1); | |
29224 | rtvec r = rtvec_alloc (2); | |
29225 | RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]); | |
29226 | RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0])); | |
29227 | RTVEC_ELT (p, 0) | |
29228 | = gen_rtx_SET (reg, | |
29229 | gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR)); | |
29230 | ||
29231 | emit_insn (gen_rtx_PARALLEL (VOIDmode, p)); | |
29232 | return; | |
29233 | } | |
29234 | ||
29235 | /* ??? It might be better to handle count == 2 / 3 cases here | |
29236 | as well, using logical operations to combine the values. */ | |
29237 | } | |
29238 | ||
29239 | emit_insn (gen_movesi_from_cr (reg)); | |
29240 | } | |
29241 | ||
29242 | /* Return whether the split-stack arg pointer (r12) is used. */ | |
29243 | ||
29244 | static bool | |
29245 | split_stack_arg_pointer_used_p (void) | |
29246 | { | |
29247 | /* If the pseudo holding the arg pointer is no longer a pseudo, | |
29248 | then the arg pointer is used. */ | |
29249 | if (cfun->machine->split_stack_arg_pointer != NULL_RTX | |
29250 | && (!REG_P (cfun->machine->split_stack_arg_pointer) | |
29251 | || (REGNO (cfun->machine->split_stack_arg_pointer) | |
29252 | < FIRST_PSEUDO_REGISTER))) | |
29253 | return true; | |
29254 | ||
29255 | /* Unfortunately we also need to do some code scanning, since | |
29256 | r12 may have been substituted for the pseudo. */ | |
29257 | rtx_insn *insn; | |
29258 | basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; | |
29259 | FOR_BB_INSNS (bb, insn) | |
29260 | if (NONDEBUG_INSN_P (insn)) | |
29261 | { | |
29262 | /* A call destroys r12. */ | |
29263 | if (CALL_P (insn)) | |
29264 | return false; | |
29265 | ||
29266 | df_ref use; | |
29267 | FOR_EACH_INSN_USE (use, insn) | |
29268 | { | |
29269 | rtx x = DF_REF_REG (use); | |
29270 | if (REG_P (x) && REGNO (x) == 12) | |
29271 | return true; | |
29272 | } | |
29273 | df_ref def; | |
29274 | FOR_EACH_INSN_DEF (def, insn) | |
29275 | { | |
29276 | rtx x = DF_REF_REG (def); | |
29277 | if (REG_P (x) && REGNO (x) == 12) | |
29278 | return false; | |
29279 | } | |
29280 | } | |
29281 | return bitmap_bit_p (DF_LR_OUT (bb), 12); | |
29282 | } | |
29283 | ||
29284 | /* Return whether we need to emit an ELFv2 global entry point prologue. */ | |
29285 | ||
29286 | static bool | |
29287 | rs6000_global_entry_point_needed_p (void) | |
29288 | { | |
29289 | /* Only needed for the ELFv2 ABI. */ | |
29290 | if (DEFAULT_ABI != ABI_ELFv2) | |
29291 | return false; | |
29292 | ||
29293 | /* With -msingle-pic-base, we assume the whole program shares the same | |
29294 | TOC, so no global entry point prologues are needed anywhere. */ | |
29295 | if (TARGET_SINGLE_PIC_BASE) | |
29296 | return false; | |
29297 | ||
29298 | /* Ensure we have a global entry point for thunks. ??? We could | |
29299 | avoid that if the target routine doesn't need a global entry point, | |
29300 | but we do not know whether this is the case at this point. */ | |
29301 | if (cfun->is_thunk) | |
29302 | return true; | |
29303 | ||
29304 | /* For regular functions, rs6000_emit_prologue sets this flag if the | |
29305 | routine ever uses the TOC pointer. */ | |
29306 | return cfun->machine->r2_setup_needed; | |
29307 | } | |
29308 | ||
29309 | /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */ | |
29310 | static sbitmap | |
29311 | rs6000_get_separate_components (void) | |
29312 | { | |
29313 | rs6000_stack_t *info = rs6000_stack_info (); | |
29314 | ||
29315 | if (WORLD_SAVE_P (info)) | |
29316 | return NULL; | |
29317 | ||
29318 | if (TARGET_SPE_ABI) | |
29319 | return NULL; | |
29320 | ||
29321 | gcc_assert (!(info->savres_strategy & SAVE_MULTIPLE) | |
29322 | && !(info->savres_strategy & REST_MULTIPLE)); | |
29323 | ||
29324 | /* Component 0 is the save/restore of LR (done via GPR0). | |
29325 | Components 13..31 are the save/restore of GPR13..GPR31. | |
29326 | Components 46..63 are the save/restore of FPR14..FPR31. */ | |
29327 | ||
29328 | cfun->machine->n_components = 64; | |
29329 | ||
29330 | sbitmap components = sbitmap_alloc (cfun->machine->n_components); | |
29331 | bitmap_clear (components); | |
29332 | ||
29333 | int reg_size = TARGET_32BIT ? 4 : 8; | |
29334 | int fp_reg_size = 8; | |
29335 | ||
29336 | /* The GPRs we need saved to the frame. */ | |
29337 | if ((info->savres_strategy & SAVE_INLINE_GPRS) | |
29338 | && (info->savres_strategy & REST_INLINE_GPRS)) | |
29339 | { | |
29340 | int offset = info->gp_save_offset; | |
29341 | if (info->push_p) | |
29342 | offset += info->total_size; | |
29343 | ||
29344 | for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++) | |
29345 | { | |
29346 | if (IN_RANGE (offset, -0x8000, 0x7fff) | |
29347 | && rs6000_reg_live_or_pic_offset_p (regno)) | |
29348 | bitmap_set_bit (components, regno); | |
29349 | ||
29350 | offset += reg_size; | |
29351 | } | |
29352 | } | |
29353 | ||
29354 | /* Don't mess with the hard frame pointer. */ | |
29355 | if (frame_pointer_needed) | |
29356 | bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM); | |
29357 | ||
29358 | /* Don't mess with the fixed TOC register. */ | |
29359 | if ((TARGET_TOC && TARGET_MINIMAL_TOC) | |
29360 | || (flag_pic == 1 && DEFAULT_ABI == ABI_V4) | |
29361 | || (flag_pic && DEFAULT_ABI == ABI_DARWIN)) | |
29362 | bitmap_clear_bit (components, RS6000_PIC_OFFSET_TABLE_REGNUM); | |
29363 | ||
29364 | /* The FPRs we need saved to the frame. */ | |
29365 | if ((info->savres_strategy & SAVE_INLINE_FPRS) | |
29366 | && (info->savres_strategy & REST_INLINE_FPRS)) | |
29367 | { | |
29368 | int offset = info->fp_save_offset; | |
29369 | if (info->push_p) | |
29370 | offset += info->total_size; | |
29371 | ||
29372 | for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++) | |
29373 | { | |
29374 | if (IN_RANGE (offset, -0x8000, 0x7fff) && save_reg_p (regno)) | |
29375 | bitmap_set_bit (components, regno); | |
29376 | ||
29377 | offset += fp_reg_size; | |
29378 | } | |
29379 | } | |
29380 | ||
29381 | /* Optimize LR save and restore if we can. This is component 0. Any | |
29382 | out-of-line register save/restore routines need LR. */ | |
29383 | if (info->lr_save_p | |
29384 | && !(flag_pic && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)) | |
29385 | && (info->savres_strategy & SAVE_INLINE_GPRS) | |
29386 | && (info->savres_strategy & REST_INLINE_GPRS) | |
29387 | && (info->savres_strategy & SAVE_INLINE_FPRS) | |
29388 | && (info->savres_strategy & REST_INLINE_FPRS) | |
29389 | && (info->savres_strategy & SAVE_INLINE_VRS) | |
29390 | && (info->savres_strategy & REST_INLINE_VRS)) | |
29391 | { | |
29392 | int offset = info->lr_save_offset; | |
29393 | if (info->push_p) | |
29394 | offset += info->total_size; | |
29395 | if (IN_RANGE (offset, -0x8000, 0x7fff)) | |
29396 | bitmap_set_bit (components, 0); | |
29397 | } | |
29398 | ||
29399 | return components; | |
29400 | } | |
29401 | ||
29402 | /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */ | |
29403 | static sbitmap | |
29404 | rs6000_components_for_bb (basic_block bb) | |
29405 | { | |
29406 | rs6000_stack_t *info = rs6000_stack_info (); | |
29407 | ||
29408 | bitmap in = DF_LIVE_IN (bb); | |
29409 | bitmap gen = &DF_LIVE_BB_INFO (bb)->gen; | |
29410 | bitmap kill = &DF_LIVE_BB_INFO (bb)->kill; | |
29411 | ||
29412 | sbitmap components = sbitmap_alloc (cfun->machine->n_components); | |
29413 | bitmap_clear (components); | |
29414 | ||
29415 | /* A register is used in a bb if it is in the IN, GEN, or KILL sets. */ | |
29416 | ||
29417 | /* GPRs. */ | |
29418 | for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++) | |
29419 | if (bitmap_bit_p (in, regno) | |
29420 | || bitmap_bit_p (gen, regno) | |
29421 | || bitmap_bit_p (kill, regno)) | |
29422 | bitmap_set_bit (components, regno); | |
29423 | ||
29424 | /* FPRs. */ | |
29425 | for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++) | |
29426 | if (bitmap_bit_p (in, regno) | |
29427 | || bitmap_bit_p (gen, regno) | |
29428 | || bitmap_bit_p (kill, regno)) | |
29429 | bitmap_set_bit (components, regno); | |
29430 | ||
29431 | /* The link register. */ | |
29432 | if (bitmap_bit_p (in, LR_REGNO) | |
29433 | || bitmap_bit_p (gen, LR_REGNO) | |
29434 | || bitmap_bit_p (kill, LR_REGNO)) | |
29435 | bitmap_set_bit (components, 0); | |
29436 | ||
29437 | return components; | |
29438 | } | |
29439 | ||
29440 | /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */ | |
29441 | static void | |
29442 | rs6000_disqualify_components (sbitmap components, edge e, | |
29443 | sbitmap edge_components, bool /*is_prologue*/) | |
29444 | { | |
29445 | /* Our LR pro/epilogue code moves LR via R0, so R0 had better not be | |
29446 | live where we want to place that code. */ | |
29447 | if (bitmap_bit_p (edge_components, 0) | |
29448 | && bitmap_bit_p (DF_LIVE_IN (e->dest), 0)) | |
29449 | { | |
29450 | if (dump_file) | |
29451 | fprintf (dump_file, "Disqualifying LR because GPR0 is live " | |
29452 | "on entry to bb %d\n", e->dest->index); | |
29453 | bitmap_clear_bit (components, 0); | |
29454 | } | |
29455 | } | |
29456 | ||
29457 | /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */ | |
29458 | static void | |
29459 | rs6000_emit_prologue_components (sbitmap components) | |
29460 | { | |
29461 | rs6000_stack_t *info = rs6000_stack_info (); | |
29462 | rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed | |
29463 | ? HARD_FRAME_POINTER_REGNUM | |
29464 | : STACK_POINTER_REGNUM); | |
29465 | ||
29466 | machine_mode reg_mode = Pmode; | |
29467 | int reg_size = TARGET_32BIT ? 4 : 8; | |
29468 | machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) | |
29469 | ? DFmode : SFmode; | |
29470 | int fp_reg_size = 8; | |
29471 | ||
29472 | /* Prologue for LR. */ | |
29473 | if (bitmap_bit_p (components, 0)) | |
29474 | { | |
29475 | rtx reg = gen_rtx_REG (reg_mode, 0); | |
29476 | rtx_insn *insn = emit_move_insn (reg, gen_rtx_REG (reg_mode, LR_REGNO)); | |
29477 | RTX_FRAME_RELATED_P (insn) = 1; | |
29478 | add_reg_note (insn, REG_CFA_REGISTER, NULL); | |
29479 | ||
29480 | int offset = info->lr_save_offset; | |
29481 | if (info->push_p) | |
29482 | offset += info->total_size; | |
29483 | ||
29484 | insn = emit_insn (gen_frame_store (reg, ptr_reg, offset)); | |
29485 | RTX_FRAME_RELATED_P (insn) = 1; | |
29486 | rtx lr = gen_rtx_REG (reg_mode, LR_REGNO); | |
29487 | rtx mem = copy_rtx (SET_DEST (single_set (insn))); | |
29488 | add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, lr)); | |
29489 | } | |
29490 | ||
29491 | /* Prologue for the GPRs. */ | |
29492 | int offset = info->gp_save_offset; | |
29493 | if (info->push_p) | |
29494 | offset += info->total_size; | |
29495 | ||
29496 | for (int i = info->first_gp_reg_save; i < 32; i++) | |
29497 | { | |
29498 | if (bitmap_bit_p (components, i)) | |
29499 | { | |
29500 | rtx reg = gen_rtx_REG (reg_mode, i); | |
29501 | rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset)); | |
29502 | RTX_FRAME_RELATED_P (insn) = 1; | |
29503 | rtx set = copy_rtx (single_set (insn)); | |
29504 | add_reg_note (insn, REG_CFA_OFFSET, set); | |
29505 | } | |
29506 | ||
29507 | offset += reg_size; | |
29508 | } | |
29509 | ||
29510 | /* Prologue for the FPRs. */ | |
29511 | offset = info->fp_save_offset; | |
29512 | if (info->push_p) | |
29513 | offset += info->total_size; | |
29514 | ||
29515 | for (int i = info->first_fp_reg_save; i < 64; i++) | |
29516 | { | |
29517 | if (bitmap_bit_p (components, i)) | |
29518 | { | |
29519 | rtx reg = gen_rtx_REG (fp_reg_mode, i); | |
29520 | rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset)); | |
29521 | RTX_FRAME_RELATED_P (insn) = 1; | |
29522 | rtx set = copy_rtx (single_set (insn)); | |
29523 | add_reg_note (insn, REG_CFA_OFFSET, set); | |
29524 | } | |
29525 | ||
29526 | offset += fp_reg_size; | |
29527 | } | |
29528 | } | |
29529 | ||
29530 | /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */ | |
29531 | static void | |
29532 | rs6000_emit_epilogue_components (sbitmap components) | |
29533 | { | |
29534 | rs6000_stack_t *info = rs6000_stack_info (); | |
29535 | rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed | |
29536 | ? HARD_FRAME_POINTER_REGNUM | |
29537 | : STACK_POINTER_REGNUM); | |
29538 | ||
29539 | machine_mode reg_mode = Pmode; | |
29540 | int reg_size = TARGET_32BIT ? 4 : 8; | |
29541 | ||
29542 | machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) | |
29543 | ? DFmode : SFmode; | |
29544 | int fp_reg_size = 8; | |
29545 | ||
29546 | /* Epilogue for the FPRs. */ | |
29547 | int offset = info->fp_save_offset; | |
29548 | if (info->push_p) | |
29549 | offset += info->total_size; | |
29550 | ||
29551 | for (int i = info->first_fp_reg_save; i < 64; i++) | |
29552 | { | |
29553 | if (bitmap_bit_p (components, i)) | |
29554 | { | |
29555 | rtx reg = gen_rtx_REG (fp_reg_mode, i); | |
29556 | rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset)); | |
29557 | RTX_FRAME_RELATED_P (insn) = 1; | |
29558 | add_reg_note (insn, REG_CFA_RESTORE, reg); | |
29559 | } | |
29560 | ||
29561 | offset += fp_reg_size; | |
29562 | } | |
29563 | ||
29564 | /* Epilogue for the GPRs. */ | |
29565 | offset = info->gp_save_offset; | |
29566 | if (info->push_p) | |
29567 | offset += info->total_size; | |
29568 | ||
29569 | for (int i = info->first_gp_reg_save; i < 32; i++) | |
29570 | { | |
29571 | if (bitmap_bit_p (components, i)) | |
29572 | { | |
29573 | rtx reg = gen_rtx_REG (reg_mode, i); | |
29574 | rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset)); | |
29575 | RTX_FRAME_RELATED_P (insn) = 1; | |
29576 | add_reg_note (insn, REG_CFA_RESTORE, reg); | |
29577 | } | |
29578 | ||
29579 | offset += reg_size; | |
29580 | } | |
29581 | ||
29582 | /* Epilogue for LR. */ | |
29583 | if (bitmap_bit_p (components, 0)) | |
29584 | { | |
29585 | int offset = info->lr_save_offset; | |
29586 | if (info->push_p) | |
29587 | offset += info->total_size; | |
29588 | ||
29589 | rtx reg = gen_rtx_REG (reg_mode, 0); | |
29590 | rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset)); | |
29591 | ||
29592 | rtx lr = gen_rtx_REG (Pmode, LR_REGNO); | |
29593 | insn = emit_move_insn (lr, reg); | |
29594 | RTX_FRAME_RELATED_P (insn) = 1; | |
29595 | add_reg_note (insn, REG_CFA_RESTORE, lr); | |
29596 | } | |
29597 | } | |
29598 | ||
29599 | /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */ | |
29600 | static void | |
29601 | rs6000_set_handled_components (sbitmap components) | |
29602 | { | |
29603 | rs6000_stack_t *info = rs6000_stack_info (); | |
29604 | ||
29605 | for (int i = info->first_gp_reg_save; i < 32; i++) | |
29606 | if (bitmap_bit_p (components, i)) | |
29607 | cfun->machine->gpr_is_wrapped_separately[i] = true; | |
29608 | ||
29609 | for (int i = info->first_fp_reg_save; i < 64; i++) | |
29610 | if (bitmap_bit_p (components, i)) | |
29611 | cfun->machine->fpr_is_wrapped_separately[i - 32] = true; | |
29612 | ||
29613 | if (bitmap_bit_p (components, 0)) | |
29614 | cfun->machine->lr_is_wrapped_separately = true; | |
29615 | } | |
29616 | ||
29617 | /* Emit function prologue as insns. */ | |
29618 | ||
29619 | void | |
29620 | rs6000_emit_prologue (void) | |
29621 | { | |
29622 | rs6000_stack_t *info = rs6000_stack_info (); | |
29623 | machine_mode reg_mode = Pmode; | |
29624 | int reg_size = TARGET_32BIT ? 4 : 8; | |
29625 | machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) | |
29626 | ? DFmode : SFmode; | |
29627 | int fp_reg_size = 8; | |
29628 | rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); | |
29629 | rtx frame_reg_rtx = sp_reg_rtx; | |
29630 | unsigned int cr_save_regno; | |
29631 | rtx cr_save_rtx = NULL_RTX; | |
29632 | rtx_insn *insn; | |
29633 | int strategy; | |
29634 | int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE | |
29635 | && df_regs_ever_live_p (STATIC_CHAIN_REGNUM) | |
29636 | && call_used_regs[STATIC_CHAIN_REGNUM]); | |
29637 | int using_split_stack = (flag_split_stack | |
29638 | && (lookup_attribute ("no_split_stack", | |
29639 | DECL_ATTRIBUTES (cfun->decl)) | |
29640 | == NULL)); | |
29641 | ||
29642 | /* Offset to top of frame for frame_reg and sp respectively. */ | |
29643 | HOST_WIDE_INT frame_off = 0; | |
29644 | HOST_WIDE_INT sp_off = 0; | |
29645 | /* sp_adjust is the stack adjusting instruction, tracked so that the | |
29646 | insn setting up the split-stack arg pointer can be emitted just | |
29647 | prior to it, when r12 is not used here for other purposes. */ | |
29648 | rtx_insn *sp_adjust = 0; | |
29649 | ||
29650 | #if CHECKING_P | |
29651 | /* Track and check usage of r0, r11, r12. */ | |
29652 | int reg_inuse = using_static_chain_p ? 1 << 11 : 0; | |
29653 | #define START_USE(R) do \ | |
29654 | { \ | |
29655 | gcc_assert ((reg_inuse & (1 << (R))) == 0); \ | |
29656 | reg_inuse |= 1 << (R); \ | |
29657 | } while (0) | |
29658 | #define END_USE(R) do \ | |
29659 | { \ | |
29660 | gcc_assert ((reg_inuse & (1 << (R))) != 0); \ | |
29661 | reg_inuse &= ~(1 << (R)); \ | |
29662 | } while (0) | |
29663 | #define NOT_INUSE(R) do \ | |
29664 | { \ | |
29665 | gcc_assert ((reg_inuse & (1 << (R))) == 0); \ | |
29666 | } while (0) | |
29667 | #else | |
29668 | #define START_USE(R) do {} while (0) | |
29669 | #define END_USE(R) do {} while (0) | |
29670 | #define NOT_INUSE(R) do {} while (0) | |
29671 | #endif | |
29672 | ||
29673 | if (DEFAULT_ABI == ABI_ELFv2 | |
29674 | && !TARGET_SINGLE_PIC_BASE) | |
29675 | { | |
29676 | cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM); | |
29677 | ||
29678 | /* With -mminimal-toc we may generate an extra use of r2 below. */ | |
29679 | if (TARGET_TOC && TARGET_MINIMAL_TOC | |
29680 | && !constant_pool_empty_p ()) | |
29681 | cfun->machine->r2_setup_needed = true; | |
29682 | } | |
29683 | ||
29684 | ||
29685 | if (flag_stack_usage_info) | |
29686 | current_function_static_stack_size = info->total_size; | |
29687 | ||
29688 | if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) | |
29689 | { | |
29690 | HOST_WIDE_INT size = info->total_size; | |
29691 | ||
29692 | if (crtl->is_leaf && !cfun->calls_alloca) | |
29693 | { | |
29694 | if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT) | |
29695 | rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, | |
29696 | size - STACK_CHECK_PROTECT); | |
29697 | } | |
29698 | else if (size > 0) | |
29699 | rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size); | |
29700 | } | |
29701 | ||
29702 | if (TARGET_FIX_AND_CONTINUE) | |
29703 | { | |
29704 | /* gdb on darwin arranges to forward a function from the old | |
29705 | address by modifying the first 5 instructions of the function | |
29706 | to branch to the overriding function. This is necessary to | |
29707 | permit function pointers that point to the old function to | |
29708 | actually forward to the new function. */ | |
29709 | emit_insn (gen_nop ()); | |
29710 | emit_insn (gen_nop ()); | |
29711 | emit_insn (gen_nop ()); | |
29712 | emit_insn (gen_nop ()); | |
29713 | emit_insn (gen_nop ()); | |
29714 | } | |
29715 | ||
29716 | if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0) | |
29717 | { | |
29718 | reg_mode = V2SImode; | |
29719 | reg_size = 8; | |
29720 | } | |
29721 | ||
29722 | /* Handle world saves specially here. */ | |
29723 | if (WORLD_SAVE_P (info)) | |
29724 | { | |
29725 | int i, j, sz; | |
29726 | rtx treg; | |
29727 | rtvec p; | |
29728 | rtx reg0; | |
29729 | ||
29730 | /* save_world expects lr in r0. */ | |
29731 | reg0 = gen_rtx_REG (Pmode, 0); | |
29732 | if (info->lr_save_p) | |
29733 | { | |
29734 | insn = emit_move_insn (reg0, | |
29735 | gen_rtx_REG (Pmode, LR_REGNO)); | |
29736 | RTX_FRAME_RELATED_P (insn) = 1; | |
29737 | } | |
29738 | ||
29739 | /* The SAVE_WORLD and RESTORE_WORLD routines make a number of | |
29740 | assumptions about the offsets of various bits of the stack | |
29741 | frame. */ | |
29742 | gcc_assert (info->gp_save_offset == -220 | |
29743 | && info->fp_save_offset == -144 | |
29744 | && info->lr_save_offset == 8 | |
29745 | && info->cr_save_offset == 4 | |
29746 | && info->push_p | |
29747 | && info->lr_save_p | |
29748 | && (!crtl->calls_eh_return | |
29749 | || info->ehrd_offset == -432) | |
29750 | && info->vrsave_save_offset == -224 | |
29751 | && info->altivec_save_offset == -416); | |
29752 | ||
29753 | treg = gen_rtx_REG (SImode, 11); | |
29754 | emit_move_insn (treg, GEN_INT (-info->total_size)); | |
29755 | ||
29756 | /* SAVE_WORLD takes the caller's LR in R0 and the frame size | |
29757 | in R11. It also clobbers R12, so beware! */ | |
29758 | ||
29759 | /* Preserve CR2 for save_world prologues */ | |
29760 | sz = 5; | |
29761 | sz += 32 - info->first_gp_reg_save; | |
29762 | sz += 64 - info->first_fp_reg_save; | |
29763 | sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1; | |
29764 | p = rtvec_alloc (sz); | |
29765 | j = 0; | |
29766 | RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode, | |
29767 | gen_rtx_REG (SImode, | |
29768 | LR_REGNO)); | |
29769 | RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode, | |
29770 | gen_rtx_SYMBOL_REF (Pmode, | |
29771 | "*save_world")); | |
29772 | /* We do floats first so that the instruction pattern matches | |
29773 | properly. */ | |
29774 | for (i = 0; i < 64 - info->first_fp_reg_save; i++) | |
29775 | RTVEC_ELT (p, j++) | |
29776 | = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT | |
29777 | ? DFmode : SFmode, | |
29778 | info->first_fp_reg_save + i), | |
29779 | frame_reg_rtx, | |
29780 | info->fp_save_offset + frame_off + 8 * i); | |
29781 | for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++) | |
29782 | RTVEC_ELT (p, j++) | |
29783 | = gen_frame_store (gen_rtx_REG (V4SImode, | |
29784 | info->first_altivec_reg_save + i), | |
29785 | frame_reg_rtx, | |
29786 | info->altivec_save_offset + frame_off + 16 * i); | |
29787 | for (i = 0; i < 32 - info->first_gp_reg_save; i++) | |
29788 | RTVEC_ELT (p, j++) | |
29789 | = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i), | |
29790 | frame_reg_rtx, | |
29791 | info->gp_save_offset + frame_off + reg_size * i); | |
29792 | ||
29793 | /* CR register traditionally saved as CR2. */ | |
29794 | RTVEC_ELT (p, j++) | |
29795 | = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO), | |
29796 | frame_reg_rtx, info->cr_save_offset + frame_off); | |
29797 | /* Explain about use of R0. */ | |
29798 | if (info->lr_save_p) | |
29799 | RTVEC_ELT (p, j++) | |
29800 | = gen_frame_store (reg0, | |
29801 | frame_reg_rtx, info->lr_save_offset + frame_off); | |
29802 | /* Explain what happens to the stack pointer. */ | |
29803 | { | |
29804 | rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg); | |
29805 | RTVEC_ELT (p, j++) = gen_rtx_SET (sp_reg_rtx, newval); | |
29806 | } | |
29807 | ||
29808 | insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p)); | |
29809 | rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off, | |
29810 | treg, GEN_INT (-info->total_size)); | |
29811 | sp_off = frame_off = info->total_size; | |
29812 | } | |
29813 | ||
29814 | strategy = info->savres_strategy; | |
29815 | ||
29816 | /* For V.4, update stack before we do any saving and set back pointer. */ | |
29817 | if (! WORLD_SAVE_P (info) | |
29818 | && info->push_p | |
29819 | && (DEFAULT_ABI == ABI_V4 | |
29820 | || crtl->calls_eh_return)) | |
29821 | { | |
29822 | bool need_r11 = (TARGET_SPE | |
29823 | ? (!(strategy & SAVE_INLINE_GPRS) | |
29824 | && info->spe_64bit_regs_used == 0) | |
29825 | : (!(strategy & SAVE_INLINE_FPRS) | |
29826 | || !(strategy & SAVE_INLINE_GPRS) | |
29827 | || !(strategy & SAVE_INLINE_VRS))); | |
29828 | int ptr_regno = -1; | |
29829 | rtx ptr_reg = NULL_RTX; | |
29830 | int ptr_off = 0; | |
29831 | ||
29832 | if (info->total_size < 32767) | |
29833 | frame_off = info->total_size; | |
29834 | else if (need_r11) | |
29835 | ptr_regno = 11; | |
29836 | else if (info->cr_save_p | |
29837 | || info->lr_save_p | |
29838 | || info->first_fp_reg_save < 64 | |
29839 | || info->first_gp_reg_save < 32 | |
29840 | || info->altivec_size != 0 | |
29841 | || info->vrsave_size != 0 | |
29842 | || crtl->calls_eh_return) | |
29843 | ptr_regno = 12; | |
29844 | else | |
29845 | { | |
29846 | /* The prologue won't be saving any regs so there is no need | |
29847 | to set up a frame register to access any frame save area. | |
29848 | We also won't be using frame_off anywhere below, but set | |
29849 | the correct value anyway to protect against future | |
29850 | changes to this function. */ | |
29851 | frame_off = info->total_size; | |
29852 | } | |
29853 | if (ptr_regno != -1) | |
29854 | { | |
29855 | /* Set up the frame offset to that needed by the first | |
29856 | out-of-line save function. */ | |
29857 | START_USE (ptr_regno); | |
29858 | ptr_reg = gen_rtx_REG (Pmode, ptr_regno); | |
29859 | frame_reg_rtx = ptr_reg; | |
29860 | if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0) | |
29861 | gcc_checking_assert (info->fp_save_offset + info->fp_size == 0); | |
29862 | else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32) | |
29863 | ptr_off = info->gp_save_offset + info->gp_size; | |
29864 | else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0) | |
29865 | ptr_off = info->altivec_save_offset + info->altivec_size; | |
29866 | frame_off = -ptr_off; | |
29867 | } | |
29868 | sp_adjust = rs6000_emit_allocate_stack (info->total_size, | |
29869 | ptr_reg, ptr_off); | |
29870 | if (REGNO (frame_reg_rtx) == 12) | |
29871 | sp_adjust = 0; | |
29872 | sp_off = info->total_size; | |
29873 | if (frame_reg_rtx != sp_reg_rtx) | |
29874 | rs6000_emit_stack_tie (frame_reg_rtx, false); | |
29875 | } | |
29876 | ||
29877 | /* If we use the link register, get it into r0. */ | |
29878 | if (!WORLD_SAVE_P (info) && info->lr_save_p | |
29879 | && !cfun->machine->lr_is_wrapped_separately) | |
29880 | { | |
29881 | rtx addr, reg, mem; | |
29882 | ||
29883 | reg = gen_rtx_REG (Pmode, 0); | |
29884 | START_USE (0); | |
29885 | insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO)); | |
29886 | RTX_FRAME_RELATED_P (insn) = 1; | |
29887 | ||
29888 | if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR | |
29889 | | SAVE_NOINLINE_FPRS_SAVES_LR))) | |
29890 | { | |
29891 | addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, | |
29892 | GEN_INT (info->lr_save_offset + frame_off)); | |
29893 | mem = gen_rtx_MEM (Pmode, addr); | |
29894 | /* This should not be of rs6000_sr_alias_set, because of | |
29895 | __builtin_return_address. */ | |
29896 | ||
29897 | insn = emit_move_insn (mem, reg); | |
29898 | rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off, | |
29899 | NULL_RTX, NULL_RTX); | |
29900 | END_USE (0); | |
29901 | } | |
29902 | } | |
29903 | ||
29904 | /* If we need to save CR, put it into r12 or r11. Choose r12 except when | |
29905 | r12 will be needed by out-of-line gpr restore. */ | |
29906 | cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) | |
29907 | && !(strategy & (SAVE_INLINE_GPRS | |
29908 | | SAVE_NOINLINE_GPRS_SAVES_LR)) | |
29909 | ? 11 : 12); | |
29910 | if (!WORLD_SAVE_P (info) | |
29911 | && info->cr_save_p | |
29912 | && REGNO (frame_reg_rtx) != cr_save_regno | |
29913 | && !(using_static_chain_p && cr_save_regno == 11) | |
29914 | && !(using_split_stack && cr_save_regno == 12 && sp_adjust)) | |
29915 | { | |
29916 | cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno); | |
29917 | START_USE (cr_save_regno); | |
29918 | rs6000_emit_move_from_cr (cr_save_rtx); | |
29919 | } | |
29920 | ||
29921 | /* Do any required saving of fpr's. If only one or two to save, do | |
29922 | it ourselves. Otherwise, call function. */ | |
29923 | if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS)) | |
29924 | { | |
29925 | int offset = info->fp_save_offset + frame_off; | |
29926 | for (int i = info->first_fp_reg_save; i < 64; i++) | |
29927 | { | |
29928 | if (save_reg_p (i) | |
29929 | && !cfun->machine->fpr_is_wrapped_separately[i - 32]) | |
29930 | emit_frame_save (frame_reg_rtx, fp_reg_mode, i, offset, | |
29931 | sp_off - frame_off); | |
29932 | ||
29933 | offset += fp_reg_size; | |
29934 | } | |
29935 | } | |
29936 | else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64) | |
29937 | { | |
29938 | bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0; | |
29939 | int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0); | |
29940 | unsigned ptr_regno = ptr_regno_for_savres (sel); | |
29941 | rtx ptr_reg = frame_reg_rtx; | |
29942 | ||
29943 | if (REGNO (frame_reg_rtx) == ptr_regno) | |
29944 | gcc_checking_assert (frame_off == 0); | |
29945 | else | |
29946 | { | |
29947 | ptr_reg = gen_rtx_REG (Pmode, ptr_regno); | |
29948 | NOT_INUSE (ptr_regno); | |
29949 | emit_insn (gen_add3_insn (ptr_reg, | |
29950 | frame_reg_rtx, GEN_INT (frame_off))); | |
29951 | } | |
29952 | insn = rs6000_emit_savres_rtx (info, ptr_reg, | |
29953 | info->fp_save_offset, | |
29954 | info->lr_save_offset, | |
29955 | DFmode, sel); | |
29956 | rs6000_frame_related (insn, ptr_reg, sp_off, | |
29957 | NULL_RTX, NULL_RTX); | |
29958 | if (lr) | |
29959 | END_USE (0); | |
29960 | } | |
29961 | ||
29962 | /* Save GPRs. This is done as a PARALLEL if we are using | |
29963 | the store-multiple instructions. */ | |
29964 | if (!WORLD_SAVE_P (info) | |
29965 | && TARGET_SPE_ABI | |
29966 | && info->spe_64bit_regs_used != 0 | |
29967 | && info->first_gp_reg_save != 32) | |
29968 | { | |
29969 | int i; | |
29970 | rtx spe_save_area_ptr; | |
29971 | HOST_WIDE_INT save_off; | |
29972 | int ool_adjust = 0; | |
29973 | ||
29974 | /* Determine whether we can address all of the registers that need | |
29975 | to be saved with an offset from frame_reg_rtx that fits in | |
29976 | the small const field for SPE memory instructions. */ | |
29977 | int spe_regs_addressable | |
29978 | = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off | |
29979 | + reg_size * (32 - info->first_gp_reg_save - 1)) | |
29980 | && (strategy & SAVE_INLINE_GPRS)); | |
29981 | ||
29982 | if (spe_regs_addressable) | |
29983 | { | |
29984 | spe_save_area_ptr = frame_reg_rtx; | |
29985 | save_off = frame_off; | |
29986 | } | |
29987 | else | |
29988 | { | |
29989 | /* Make r11 point to the start of the SPE save area. We need | |
29990 | to be careful here if r11 is holding the static chain. If | |
29991 | it is, then temporarily save it in r0. */ | |
29992 | HOST_WIDE_INT offset; | |
29993 | ||
29994 | if (!(strategy & SAVE_INLINE_GPRS)) | |
29995 | ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO); | |
29996 | offset = info->spe_gp_save_offset + frame_off - ool_adjust; | |
29997 | spe_save_area_ptr = gen_rtx_REG (Pmode, 11); | |
29998 | save_off = frame_off - offset; | |
29999 | ||
30000 | if (using_static_chain_p) | |
30001 | { | |
30002 | rtx r0 = gen_rtx_REG (Pmode, 0); | |
30003 | ||
30004 | START_USE (0); | |
30005 | gcc_assert (info->first_gp_reg_save > 11); | |
30006 | ||
30007 | emit_move_insn (r0, spe_save_area_ptr); | |
30008 | } | |
30009 | else if (REGNO (frame_reg_rtx) != 11) | |
30010 | START_USE (11); | |
30011 | ||
30012 | emit_insn (gen_addsi3 (spe_save_area_ptr, | |
30013 | frame_reg_rtx, GEN_INT (offset))); | |
30014 | if (!using_static_chain_p && REGNO (frame_reg_rtx) == 11) | |
30015 | frame_off = -info->spe_gp_save_offset + ool_adjust; | |
30016 | } | |
30017 | ||
30018 | if ((strategy & SAVE_INLINE_GPRS)) | |
30019 | { | |
30020 | for (i = 0; i < 32 - info->first_gp_reg_save; i++) | |
30021 | if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i)) | |
30022 | emit_frame_save (spe_save_area_ptr, reg_mode, | |
30023 | info->first_gp_reg_save + i, | |
30024 | (info->spe_gp_save_offset + save_off | |
30025 | + reg_size * i), | |
30026 | sp_off - save_off); | |
30027 | } | |
30028 | else | |
30029 | { | |
30030 | insn = rs6000_emit_savres_rtx (info, spe_save_area_ptr, | |
30031 | info->spe_gp_save_offset + save_off, | |
30032 | 0, reg_mode, | |
30033 | SAVRES_SAVE | SAVRES_GPR); | |
30034 | ||
30035 | rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off, | |
30036 | NULL_RTX, NULL_RTX); | |
30037 | } | |
30038 | ||
30039 | /* Move the static chain pointer back. */ | |
30040 | if (!spe_regs_addressable) | |
30041 | { | |
30042 | if (using_static_chain_p) | |
30043 | { | |
30044 | emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0)); | |
30045 | END_USE (0); | |
30046 | } | |
30047 | else if (REGNO (frame_reg_rtx) != 11) | |
30048 | END_USE (11); | |
30049 | } | |
30050 | } | |
30051 | else if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS)) | |
30052 | { | |
30053 | bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0; | |
30054 | int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0); | |
30055 | unsigned ptr_regno = ptr_regno_for_savres (sel); | |
30056 | rtx ptr_reg = frame_reg_rtx; | |
30057 | bool ptr_set_up = REGNO (ptr_reg) == ptr_regno; | |
30058 | int end_save = info->gp_save_offset + info->gp_size; | |
30059 | int ptr_off; | |
30060 | ||
30061 | if (ptr_regno == 12) | |
30062 | sp_adjust = 0; | |
30063 | if (!ptr_set_up) | |
30064 | ptr_reg = gen_rtx_REG (Pmode, ptr_regno); | |
30065 | ||
30066 | /* Need to adjust r11 (r12) if we saved any FPRs. */ | |
30067 | if (end_save + frame_off != 0) | |
30068 | { | |
30069 | rtx offset = GEN_INT (end_save + frame_off); | |
30070 | ||
30071 | if (ptr_set_up) | |
30072 | frame_off = -end_save; | |
30073 | else | |
30074 | NOT_INUSE (ptr_regno); | |
30075 | emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset)); | |
30076 | } | |
30077 | else if (!ptr_set_up) | |
30078 | { | |
30079 | NOT_INUSE (ptr_regno); | |
30080 | emit_move_insn (ptr_reg, frame_reg_rtx); | |
30081 | } | |
30082 | ptr_off = -end_save; | |
30083 | insn = rs6000_emit_savres_rtx (info, ptr_reg, | |
30084 | info->gp_save_offset + ptr_off, | |
30085 | info->lr_save_offset + ptr_off, | |
30086 | reg_mode, sel); | |
30087 | rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off, | |
30088 | NULL_RTX, NULL_RTX); | |
30089 | if (lr) | |
30090 | END_USE (0); | |
30091 | } | |
30092 | else if (!WORLD_SAVE_P (info) && (strategy & SAVE_MULTIPLE)) | |
30093 | { | |
30094 | rtvec p; | |
30095 | int i; | |
30096 | p = rtvec_alloc (32 - info->first_gp_reg_save); | |
30097 | for (i = 0; i < 32 - info->first_gp_reg_save; i++) | |
30098 | RTVEC_ELT (p, i) | |
30099 | = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i), | |
30100 | frame_reg_rtx, | |
30101 | info->gp_save_offset + frame_off + reg_size * i); | |
30102 | insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p)); | |
30103 | rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off, | |
30104 | NULL_RTX, NULL_RTX); | |
30105 | } | |
30106 | else if (!WORLD_SAVE_P (info)) | |
30107 | { | |
30108 | int offset = info->gp_save_offset + frame_off; | |
30109 | for (int i = info->first_gp_reg_save; i < 32; i++) | |
30110 | { | |
30111 | if (rs6000_reg_live_or_pic_offset_p (i) | |
30112 | && !cfun->machine->gpr_is_wrapped_separately[i]) | |
30113 | emit_frame_save (frame_reg_rtx, reg_mode, i, offset, | |
30114 | sp_off - frame_off); | |
30115 | ||
30116 | offset += reg_size; | |
30117 | } | |
30118 | } | |
30119 | ||
30120 | if (crtl->calls_eh_return) | |
30121 | { | |
30122 | unsigned int i; | |
30123 | rtvec p; | |
30124 | ||
30125 | for (i = 0; ; ++i) | |
30126 | { | |
30127 | unsigned int regno = EH_RETURN_DATA_REGNO (i); | |
30128 | if (regno == INVALID_REGNUM) | |
30129 | break; | |
30130 | } | |
30131 | ||
30132 | p = rtvec_alloc (i); | |
30133 | ||
30134 | for (i = 0; ; ++i) | |
30135 | { | |
30136 | unsigned int regno = EH_RETURN_DATA_REGNO (i); | |
30137 | if (regno == INVALID_REGNUM) | |
30138 | break; | |
30139 | ||
30140 | rtx set | |
30141 | = gen_frame_store (gen_rtx_REG (reg_mode, regno), | |
30142 | sp_reg_rtx, | |
30143 | info->ehrd_offset + sp_off + reg_size * (int) i); | |
30144 | RTVEC_ELT (p, i) = set; | |
30145 | RTX_FRAME_RELATED_P (set) = 1; | |
30146 | } | |
30147 | ||
30148 | insn = emit_insn (gen_blockage ()); | |
30149 | RTX_FRAME_RELATED_P (insn) = 1; | |
30150 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p)); | |
30151 | } | |
30152 | ||
30153 | /* In AIX ABI we need to make sure r2 is really saved. */ | |
30154 | if (TARGET_AIX && crtl->calls_eh_return) | |
30155 | { | |
30156 | rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump; | |
30157 | rtx join_insn, note; | |
30158 | rtx_insn *save_insn; | |
30159 | long toc_restore_insn; | |
30160 | ||
30161 | tmp_reg = gen_rtx_REG (Pmode, 11); | |
30162 | tmp_reg_si = gen_rtx_REG (SImode, 11); | |
30163 | if (using_static_chain_p) | |
30164 | { | |
30165 | START_USE (0); | |
30166 | emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg); | |
30167 | } | |
30168 | else | |
30169 | START_USE (11); | |
30170 | emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO)); | |
30171 | /* Peek at instruction to which this function returns. If it's | |
30172 | restoring r2, then we know we've already saved r2. We can't | |
30173 | unconditionally save r2 because the value we have will already | |
30174 | be updated if we arrived at this function via a plt call or | |
30175 | toc adjusting stub. */ | |
30176 | emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg)); | |
30177 | toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000) | |
30178 | + RS6000_TOC_SAVE_SLOT); | |
30179 | hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode); | |
30180 | emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi)); | |
30181 | compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO); | |
30182 | validate_condition_mode (EQ, CCUNSmode); | |
30183 | lo = gen_int_mode (toc_restore_insn & 0xffff, SImode); | |
30184 | emit_insn (gen_rtx_SET (compare_result, | |
30185 | gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo))); | |
30186 | toc_save_done = gen_label_rtx (); | |
30187 | jump = gen_rtx_IF_THEN_ELSE (VOIDmode, | |
30188 | gen_rtx_EQ (VOIDmode, compare_result, | |
30189 | const0_rtx), | |
30190 | gen_rtx_LABEL_REF (VOIDmode, toc_save_done), | |
30191 | pc_rtx); | |
30192 | jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump)); | |
30193 | JUMP_LABEL (jump) = toc_save_done; | |
30194 | LABEL_NUSES (toc_save_done) += 1; | |
30195 | ||
30196 | save_insn = emit_frame_save (frame_reg_rtx, reg_mode, | |
30197 | TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT, | |
30198 | sp_off - frame_off); | |
30199 | ||
30200 | emit_label (toc_save_done); | |
30201 | ||
30202 | /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll | |
30203 | have a CFG that has different saves along different paths. | |
30204 | Move the note to a dummy blockage insn, which describes that | |
30205 | R2 is unconditionally saved after the label. */ | |
30206 | /* ??? An alternate representation might be a special insn pattern | |
30207 | containing both the branch and the store. That might let the | |
30208 | code that minimizes the number of DW_CFA_advance opcodes better | |
30209 | freedom in placing the annotations. */ | |
30210 | note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL); | |
30211 | if (note) | |
30212 | remove_note (save_insn, note); | |
30213 | else | |
30214 | note = alloc_reg_note (REG_FRAME_RELATED_EXPR, | |
30215 | copy_rtx (PATTERN (save_insn)), NULL_RTX); | |
30216 | RTX_FRAME_RELATED_P (save_insn) = 0; | |
30217 | ||
30218 | join_insn = emit_insn (gen_blockage ()); | |
30219 | REG_NOTES (join_insn) = note; | |
30220 | RTX_FRAME_RELATED_P (join_insn) = 1; | |
30221 | ||
30222 | if (using_static_chain_p) | |
30223 | { | |
30224 | emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0)); | |
30225 | END_USE (0); | |
30226 | } | |
30227 | else | |
30228 | END_USE (11); | |
30229 | } | |
30230 | ||
30231 | /* Save CR if we use any that must be preserved. */ | |
30232 | if (!WORLD_SAVE_P (info) && info->cr_save_p) | |
30233 | { | |
30234 | rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, | |
30235 | GEN_INT (info->cr_save_offset + frame_off)); | |
30236 | rtx mem = gen_frame_mem (SImode, addr); | |
30237 | ||
30238 | /* If we didn't copy cr before, do so now using r0. */ | |
30239 | if (cr_save_rtx == NULL_RTX) | |
30240 | { | |
30241 | START_USE (0); | |
30242 | cr_save_rtx = gen_rtx_REG (SImode, 0); | |
30243 | rs6000_emit_move_from_cr (cr_save_rtx); | |
30244 | } | |
30245 | ||
30246 | /* Saving CR requires a two-instruction sequence: one instruction | |
30247 | to move the CR to a general-purpose register, and a second | |
30248 | instruction that stores the GPR to memory. | |
30249 | ||
30250 | We do not emit any DWARF CFI records for the first of these, | |
30251 | because we cannot properly represent the fact that CR is saved in | |
30252 | a register. One reason is that we cannot express that multiple | |
30253 | CR fields are saved; another reason is that on 64-bit, the size | |
30254 | of the CR register in DWARF (4 bytes) differs from the size of | |
30255 | a general-purpose register. | |
30256 | ||
30257 | This means if any intervening instruction were to clobber one of | |
30258 | the call-saved CR fields, we'd have incorrect CFI. To prevent | |
30259 | this from happening, we mark the store to memory as a use of | |
30260 | those CR fields, which prevents any such instruction from being | |
30261 | scheduled in between the two instructions. */ | |
30262 | rtx crsave_v[9]; | |
30263 | int n_crsave = 0; | |
30264 | int i; | |
30265 | ||
30266 | crsave_v[n_crsave++] = gen_rtx_SET (mem, cr_save_rtx); | |
30267 | for (i = 0; i < 8; i++) | |
30268 | if (save_reg_p (CR0_REGNO + i)) | |
30269 | crsave_v[n_crsave++] | |
30270 | = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i)); | |
30271 | ||
30272 | insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, | |
30273 | gen_rtvec_v (n_crsave, crsave_v))); | |
30274 | END_USE (REGNO (cr_save_rtx)); | |
30275 | ||
30276 | /* Now, there's no way that dwarf2out_frame_debug_expr is going to | |
30277 | understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)', | |
30278 | so we need to construct a frame expression manually. */ | |
30279 | RTX_FRAME_RELATED_P (insn) = 1; | |
30280 | ||
30281 | /* Update address to be stack-pointer relative, like | |
30282 | rs6000_frame_related would do. */ | |
30283 | addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM), | |
30284 | GEN_INT (info->cr_save_offset + sp_off)); | |
30285 | mem = gen_frame_mem (SImode, addr); | |
30286 | ||
30287 | if (DEFAULT_ABI == ABI_ELFv2) | |
30288 | { | |
30289 | /* In the ELFv2 ABI we generate separate CFI records for each | |
30290 | CR field that was actually saved. They all point to the | |
30291 | same 32-bit stack slot. */ | |
30292 | rtx crframe[8]; | |
30293 | int n_crframe = 0; | |
30294 | ||
30295 | for (i = 0; i < 8; i++) | |
30296 | if (save_reg_p (CR0_REGNO + i)) | |
30297 | { | |
30298 | crframe[n_crframe] | |
30299 | = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR0_REGNO + i)); | |
30300 | ||
30301 | RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1; | |
30302 | n_crframe++; | |
30303 | } | |
30304 | ||
30305 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, | |
30306 | gen_rtx_PARALLEL (VOIDmode, | |
30307 | gen_rtvec_v (n_crframe, crframe))); | |
30308 | } | |
30309 | else | |
30310 | { | |
30311 | /* In other ABIs, by convention, we use a single CR regnum to | |
30312 | represent the fact that all call-saved CR fields are saved. | |
30313 | We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */ | |
30314 | rtx set = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR2_REGNO)); | |
30315 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, set); | |
30316 | } | |
30317 | } | |
30318 | ||
30319 | /* In the ELFv2 ABI we need to save all call-saved CR fields into | |
30320 | *separate* slots if the routine calls __builtin_eh_return, so | |
30321 | that they can be independently restored by the unwinder. */ | |
30322 | if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return) | |
30323 | { | |
30324 | int i, cr_off = info->ehcr_offset; | |
30325 | rtx crsave; | |
30326 | ||
30327 | /* ??? We might get better performance by using multiple mfocrf | |
30328 | instructions. */ | |
30329 | crsave = gen_rtx_REG (SImode, 0); | |
30330 | emit_insn (gen_movesi_from_cr (crsave)); | |
30331 | ||
30332 | for (i = 0; i < 8; i++) | |
30333 | if (!call_used_regs[CR0_REGNO + i]) | |
30334 | { | |
30335 | rtvec p = rtvec_alloc (2); | |
30336 | RTVEC_ELT (p, 0) | |
30337 | = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off); | |
30338 | RTVEC_ELT (p, 1) | |
30339 | = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i)); | |
30340 | ||
30341 | insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p)); | |
30342 | ||
30343 | RTX_FRAME_RELATED_P (insn) = 1; | |
30344 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, | |
30345 | gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i), | |
30346 | sp_reg_rtx, cr_off + sp_off)); | |
30347 | ||
30348 | cr_off += reg_size; | |
30349 | } | |
30350 | } | |
30351 | ||
30352 | /* Update stack and set back pointer unless this is V.4, | |
30353 | for which it was done previously. */ | |
30354 | if (!WORLD_SAVE_P (info) && info->push_p | |
30355 | && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return)) | |
30356 | { | |
30357 | rtx ptr_reg = NULL; | |
30358 | int ptr_off = 0; | |
30359 | ||
30360 | /* If saving altivec regs we need to be able to address all save | |
30361 | locations using a 16-bit offset. */ | |
30362 | if ((strategy & SAVE_INLINE_VRS) == 0 | |
30363 | || (info->altivec_size != 0 | |
30364 | && (info->altivec_save_offset + info->altivec_size - 16 | |
30365 | + info->total_size - frame_off) > 32767) | |
30366 | || (info->vrsave_size != 0 | |
30367 | && (info->vrsave_save_offset | |
30368 | + info->total_size - frame_off) > 32767)) | |
30369 | { | |
30370 | int sel = SAVRES_SAVE | SAVRES_VR; | |
30371 | unsigned ptr_regno = ptr_regno_for_savres (sel); | |
30372 | ||
30373 | if (using_static_chain_p | |
30374 | && ptr_regno == STATIC_CHAIN_REGNUM) | |
30375 | ptr_regno = 12; | |
30376 | if (REGNO (frame_reg_rtx) != ptr_regno) | |
30377 | START_USE (ptr_regno); | |
30378 | ptr_reg = gen_rtx_REG (Pmode, ptr_regno); | |
30379 | frame_reg_rtx = ptr_reg; | |
30380 | ptr_off = info->altivec_save_offset + info->altivec_size; | |
30381 | frame_off = -ptr_off; | |
30382 | } | |
30383 | else if (REGNO (frame_reg_rtx) == 1) | |
30384 | frame_off = info->total_size; | |
30385 | sp_adjust = rs6000_emit_allocate_stack (info->total_size, | |
30386 | ptr_reg, ptr_off); | |
30387 | if (REGNO (frame_reg_rtx) == 12) | |
30388 | sp_adjust = 0; | |
30389 | sp_off = info->total_size; | |
30390 | if (frame_reg_rtx != sp_reg_rtx) | |
30391 | rs6000_emit_stack_tie (frame_reg_rtx, false); | |
30392 | } | |
30393 | ||
30394 | /* Set frame pointer, if needed. */ | |
30395 | if (frame_pointer_needed) | |
30396 | { | |
30397 | insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM), | |
30398 | sp_reg_rtx); | |
30399 | RTX_FRAME_RELATED_P (insn) = 1; | |
30400 | } | |
30401 | ||
30402 | /* Save AltiVec registers if needed. Save here because the red zone does | |
30403 | not always include AltiVec registers. */ | |
30404 | if (!WORLD_SAVE_P (info) | |
30405 | && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0) | |
30406 | { | |
30407 | int end_save = info->altivec_save_offset + info->altivec_size; | |
30408 | int ptr_off; | |
30409 | /* Oddly, the vector save/restore functions point r0 at the end | |
30410 | of the save area, then use r11 or r12 to load offsets for | |
30411 | [reg+reg] addressing. */ | |
30412 | rtx ptr_reg = gen_rtx_REG (Pmode, 0); | |
30413 | int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR); | |
30414 | rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno); | |
30415 | ||
30416 | gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12); | |
30417 | NOT_INUSE (0); | |
30418 | if (scratch_regno == 12) | |
30419 | sp_adjust = 0; | |
30420 | if (end_save + frame_off != 0) | |
30421 | { | |
30422 | rtx offset = GEN_INT (end_save + frame_off); | |
30423 | ||
30424 | emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset)); | |
30425 | } | |
30426 | else | |
30427 | emit_move_insn (ptr_reg, frame_reg_rtx); | |
30428 | ||
30429 | ptr_off = -end_save; | |
30430 | insn = rs6000_emit_savres_rtx (info, scratch_reg, | |
30431 | info->altivec_save_offset + ptr_off, | |
30432 | 0, V4SImode, SAVRES_SAVE | SAVRES_VR); | |
30433 | rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off, | |
30434 | NULL_RTX, NULL_RTX); | |
30435 | if (REGNO (frame_reg_rtx) == REGNO (scratch_reg)) | |
30436 | { | |
30437 | /* The oddity mentioned above clobbered our frame reg. */ | |
30438 | emit_move_insn (frame_reg_rtx, ptr_reg); | |
30439 | frame_off = ptr_off; | |
30440 | } | |
30441 | } | |
30442 | else if (!WORLD_SAVE_P (info) | |
30443 | && info->altivec_size != 0) | |
30444 | { | |
30445 | int i; | |
30446 | ||
30447 | for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i) | |
30448 | if (info->vrsave_mask & ALTIVEC_REG_BIT (i)) | |
30449 | { | |
30450 | rtx areg, savereg, mem; | |
30451 | HOST_WIDE_INT offset; | |
30452 | ||
30453 | offset = (info->altivec_save_offset + frame_off | |
30454 | + 16 * (i - info->first_altivec_reg_save)); | |
30455 | ||
30456 | savereg = gen_rtx_REG (V4SImode, i); | |
30457 | ||
30458 | if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset)) | |
30459 | { | |
30460 | mem = gen_frame_mem (V4SImode, | |
30461 | gen_rtx_PLUS (Pmode, frame_reg_rtx, | |
30462 | GEN_INT (offset))); | |
30463 | insn = emit_insn (gen_rtx_SET (mem, savereg)); | |
30464 | areg = NULL_RTX; | |
30465 | } | |
30466 | else | |
30467 | { | |
30468 | NOT_INUSE (0); | |
30469 | areg = gen_rtx_REG (Pmode, 0); | |
30470 | emit_move_insn (areg, GEN_INT (offset)); | |
30471 | ||
30472 | /* AltiVec addressing mode is [reg+reg]. */ | |
30473 | mem = gen_frame_mem (V4SImode, | |
30474 | gen_rtx_PLUS (Pmode, frame_reg_rtx, areg)); | |
30475 | ||
30476 | /* Rather than emitting a generic move, force use of the stvx | |
30477 | instruction, which we always want on ISA 2.07 (power8) systems. | |
30478 | In particular we don't want xxpermdi/stxvd2x for little | |
30479 | endian. */ | |
30480 | insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg)); | |
30481 | } | |
30482 | ||
30483 | rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off, | |
30484 | areg, GEN_INT (offset)); | |
30485 | } | |
30486 | } | |
30487 | ||
30488 | /* VRSAVE is a bit vector representing which AltiVec registers | |
30489 | are used. The OS uses this to determine which vector | |
30490 | registers to save on a context switch. We need to save | |
30491 | VRSAVE on the stack frame, add whatever AltiVec registers we | |
30492 | used in this function, and do the corresponding magic in the | |
30493 | epilogue. */ | |
30494 | ||
30495 | if (!WORLD_SAVE_P (info) | |
30496 | && info->vrsave_size != 0) | |
30497 | { | |
30498 | rtx reg, vrsave; | |
30499 | int offset; | |
30500 | int save_regno; | |
30501 | ||
30502 | /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might | |
30503 | be using r12 as frame_reg_rtx and r11 as the static chain | |
30504 | pointer for nested functions. */ | |
30505 | save_regno = 12; | |
30506 | if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) | |
30507 | && !using_static_chain_p) | |
30508 | save_regno = 11; | |
30509 | else if (using_split_stack || REGNO (frame_reg_rtx) == 12) | |
30510 | { | |
30511 | save_regno = 11; | |
30512 | if (using_static_chain_p) | |
30513 | save_regno = 0; | |
30514 | } | |
30515 | ||
30516 | NOT_INUSE (save_regno); | |
30517 | reg = gen_rtx_REG (SImode, save_regno); | |
30518 | vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO); | |
30519 | if (TARGET_MACHO) | |
30520 | emit_insn (gen_get_vrsave_internal (reg)); | |
30521 | else | |
30522 | emit_insn (gen_rtx_SET (reg, vrsave)); | |
30523 | ||
30524 | /* Save VRSAVE. */ | |
30525 | offset = info->vrsave_save_offset + frame_off; | |
30526 | insn = emit_insn (gen_frame_store (reg, frame_reg_rtx, offset)); | |
30527 | ||
30528 | /* Include the registers in the mask. */ | |
30529 | emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask))); | |
30530 | ||
30531 | insn = emit_insn (generate_set_vrsave (reg, info, 0)); | |
30532 | } | |
30533 | ||
30534 | /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */ | |
30535 | if (!TARGET_SINGLE_PIC_BASE | |
30536 | && ((TARGET_TOC && TARGET_MINIMAL_TOC | |
30537 | && !constant_pool_empty_p ()) | |
30538 | || (DEFAULT_ABI == ABI_V4 | |
30539 | && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT)) | |
30540 | && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM)))) | |
30541 | { | |
30542 | /* If emit_load_toc_table will use the link register, we need to save | |
30543 | it. We use R12 for this purpose because emit_load_toc_table | |
30544 | can use register 0. This allows us to use a plain 'blr' to return | |
30545 | from the procedure more often. */ | |
30546 | int save_LR_around_toc_setup = (TARGET_ELF | |
30547 | && DEFAULT_ABI == ABI_V4 | |
30548 | && flag_pic | |
30549 | && ! info->lr_save_p | |
30550 | && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0); | |
30551 | if (save_LR_around_toc_setup) | |
30552 | { | |
30553 | rtx lr = gen_rtx_REG (Pmode, LR_REGNO); | |
30554 | rtx tmp = gen_rtx_REG (Pmode, 12); | |
30555 | ||
30556 | sp_adjust = 0; | |
30557 | insn = emit_move_insn (tmp, lr); | |
30558 | RTX_FRAME_RELATED_P (insn) = 1; | |
30559 | ||
30560 | rs6000_emit_load_toc_table (TRUE); | |
30561 | ||
30562 | insn = emit_move_insn (lr, tmp); | |
30563 | add_reg_note (insn, REG_CFA_RESTORE, lr); | |
30564 | RTX_FRAME_RELATED_P (insn) = 1; | |
30565 | } | |
30566 | else | |
30567 | rs6000_emit_load_toc_table (TRUE); | |
30568 | } | |
30569 | ||
30570 | #if TARGET_MACHO | |
30571 | if (!TARGET_SINGLE_PIC_BASE | |
30572 | && DEFAULT_ABI == ABI_DARWIN | |
30573 | && flag_pic && crtl->uses_pic_offset_table) | |
30574 | { | |
30575 | rtx lr = gen_rtx_REG (Pmode, LR_REGNO); | |
30576 | rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME); | |
30577 | ||
30578 | /* Save and restore LR locally around this call (in R0). */ | |
30579 | if (!info->lr_save_p) | |
30580 | emit_move_insn (gen_rtx_REG (Pmode, 0), lr); | |
30581 | ||
30582 | emit_insn (gen_load_macho_picbase (src)); | |
30583 | ||
30584 | emit_move_insn (gen_rtx_REG (Pmode, | |
30585 | RS6000_PIC_OFFSET_TABLE_REGNUM), | |
30586 | lr); | |
30587 | ||
30588 | if (!info->lr_save_p) | |
30589 | emit_move_insn (lr, gen_rtx_REG (Pmode, 0)); | |
30590 | } | |
30591 | #endif | |
30592 | ||
30593 | /* If we need to, save the TOC register after doing the stack setup. | |
30594 | Do not emit eh frame info for this save. The unwinder wants info, | |
30595 | conceptually attached to instructions in this function, about | |
30596 | register values in the caller of this function. This R2 may have | |
30597 | already been changed from the value in the caller. | |
30598 | We don't attempt to write accurate DWARF EH frame info for R2 | |
30599 | because code emitted by gcc for a (non-pointer) function call | |
30600 | doesn't save and restore R2. Instead, R2 is managed out-of-line | |
30601 | by a linker generated plt call stub when the function resides in | |
30602 | a shared library. This behavior is costly to describe in DWARF, | |
30603 | both in terms of the size of DWARF info and the time taken in the | |
30604 | unwinder to interpret it. R2 changes, apart from the | |
30605 | calls_eh_return case earlier in this function, are handled by | |
30606 | linux-unwind.h frob_update_context. */ | |
30607 | if (rs6000_save_toc_in_prologue_p ()) | |
30608 | { | |
30609 | rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM); | |
30610 | emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT)); | |
30611 | } | |
30612 | ||
30613 | if (using_split_stack && split_stack_arg_pointer_used_p ()) | |
30614 | { | |
30615 | /* Set up the arg pointer (r12) for -fsplit-stack code. If | |
30616 | __morestack was called, it left the arg pointer to the old | |
30617 | stack in r29. Otherwise, the arg pointer is the top of the | |
30618 | current frame. */ | |
30619 | cfun->machine->split_stack_argp_used = true; | |
30620 | if (sp_adjust) | |
30621 | { | |
30622 | rtx r12 = gen_rtx_REG (Pmode, 12); | |
30623 | rtx set_r12 = gen_rtx_SET (r12, sp_reg_rtx); | |
30624 | emit_insn_before (set_r12, sp_adjust); | |
30625 | } | |
30626 | else if (frame_off != 0 || REGNO (frame_reg_rtx) != 12) | |
30627 | { | |
30628 | rtx r12 = gen_rtx_REG (Pmode, 12); | |
30629 | if (frame_off == 0) | |
30630 | emit_move_insn (r12, frame_reg_rtx); | |
30631 | else | |
30632 | emit_insn (gen_add3_insn (r12, frame_reg_rtx, GEN_INT (frame_off))); | |
30633 | } | |
30634 | if (info->push_p) | |
30635 | { | |
30636 | rtx r12 = gen_rtx_REG (Pmode, 12); | |
30637 | rtx r29 = gen_rtx_REG (Pmode, 29); | |
30638 | rtx cr7 = gen_rtx_REG (CCUNSmode, CR7_REGNO); | |
30639 | rtx not_more = gen_label_rtx (); | |
30640 | rtx jump; | |
30641 | ||
30642 | jump = gen_rtx_IF_THEN_ELSE (VOIDmode, | |
30643 | gen_rtx_GEU (VOIDmode, cr7, const0_rtx), | |
30644 | gen_rtx_LABEL_REF (VOIDmode, not_more), | |
30645 | pc_rtx); | |
30646 | jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump)); | |
30647 | JUMP_LABEL (jump) = not_more; | |
30648 | LABEL_NUSES (not_more) += 1; | |
30649 | emit_move_insn (r12, r29); | |
30650 | emit_label (not_more); | |
30651 | } | |
30652 | } | |
30653 | } | |
30654 | ||
30655 | /* Output .extern statements for the save/restore routines we use. */ | |
30656 | ||
30657 | static void | |
30658 | rs6000_output_savres_externs (FILE *file) | |
30659 | { | |
30660 | rs6000_stack_t *info = rs6000_stack_info (); | |
30661 | ||
30662 | if (TARGET_DEBUG_STACK) | |
30663 | debug_stack_info (info); | |
30664 | ||
30665 | /* Write .extern for any function we will call to save and restore | |
30666 | fp values. */ | |
30667 | if (info->first_fp_reg_save < 64 | |
30668 | && !TARGET_MACHO | |
30669 | && !TARGET_ELF) | |
30670 | { | |
30671 | char *name; | |
30672 | int regno = info->first_fp_reg_save - 32; | |
30673 | ||
30674 | if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0) | |
30675 | { | |
30676 | bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0; | |
30677 | int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0); | |
30678 | name = rs6000_savres_routine_name (info, regno, sel); | |
30679 | fprintf (file, "\t.extern %s\n", name); | |
30680 | } | |
30681 | if ((info->savres_strategy & REST_INLINE_FPRS) == 0) | |
30682 | { | |
30683 | bool lr = (info->savres_strategy | |
30684 | & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0; | |
30685 | int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0); | |
30686 | name = rs6000_savres_routine_name (info, regno, sel); | |
30687 | fprintf (file, "\t.extern %s\n", name); | |
30688 | } | |
30689 | } | |
30690 | } | |
30691 | ||
30692 | /* Write function prologue. */ | |
30693 | ||
30694 | static void | |
718e6d56 | 30695 | rs6000_output_function_prologue (FILE *file) |
01e91138 | 30696 | { |
30697 | if (!cfun->is_thunk) | |
30698 | rs6000_output_savres_externs (file); | |
30699 | ||
30700 | /* ELFv2 ABI r2 setup code and local entry point. This must follow | |
30701 | immediately after the global entry point label. */ | |
30702 | if (rs6000_global_entry_point_needed_p ()) | |
30703 | { | |
30704 | const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0); | |
30705 | ||
30706 | (*targetm.asm_out.internal_label) (file, "LCF", rs6000_pic_labelno); | |
30707 | ||
30708 | if (TARGET_CMODEL != CMODEL_LARGE) | |
30709 | { | |
30710 | /* In the small and medium code models, we assume the TOC is less | |
30711 | 2 GB away from the text section, so it can be computed via the | |
30712 | following two-instruction sequence. */ | |
30713 | char buf[256]; | |
30714 | ||
30715 | ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno); | |
30716 | fprintf (file, "0:\taddis 2,12,.TOC.-"); | |
30717 | assemble_name (file, buf); | |
30718 | fprintf (file, "@ha\n"); | |
30719 | fprintf (file, "\taddi 2,2,.TOC.-"); | |
30720 | assemble_name (file, buf); | |
30721 | fprintf (file, "@l\n"); | |
30722 | } | |
30723 | else | |
30724 | { | |
30725 | /* In the large code model, we allow arbitrary offsets between the | |
30726 | TOC and the text section, so we have to load the offset from | |
30727 | memory. The data field is emitted directly before the global | |
30728 | entry point in rs6000_elf_declare_function_name. */ | |
30729 | char buf[256]; | |
30730 | ||
30731 | #ifdef HAVE_AS_ENTRY_MARKERS | |
30732 | /* If supported by the linker, emit a marker relocation. If the | |
30733 | total code size of the final executable or shared library | |
30734 | happens to fit into 2 GB after all, the linker will replace | |
30735 | this code sequence with the sequence for the small or medium | |
30736 | code model. */ | |
30737 | fprintf (file, "\t.reloc .,R_PPC64_ENTRY\n"); | |
30738 | #endif | |
30739 | fprintf (file, "\tld 2,"); | |
30740 | ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno); | |
30741 | assemble_name (file, buf); | |
30742 | fprintf (file, "-"); | |
30743 | ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno); | |
30744 | assemble_name (file, buf); | |
30745 | fprintf (file, "(12)\n"); | |
30746 | fprintf (file, "\tadd 2,2,12\n"); | |
30747 | } | |
30748 | ||
30749 | fputs ("\t.localentry\t", file); | |
30750 | assemble_name (file, name); | |
30751 | fputs (",.-", file); | |
30752 | assemble_name (file, name); | |
30753 | fputs ("\n", file); | |
30754 | } | |
30755 | ||
30756 | /* Output -mprofile-kernel code. This needs to be done here instead of | |
30757 | in output_function_profile since it must go after the ELFv2 ABI | |
30758 | local entry point. */ | |
30759 | if (TARGET_PROFILE_KERNEL && crtl->profile) | |
30760 | { | |
30761 | gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2); | |
30762 | gcc_assert (!TARGET_32BIT); | |
30763 | ||
30764 | asm_fprintf (file, "\tmflr %s\n", reg_names[0]); | |
30765 | ||
30766 | /* In the ELFv2 ABI we have no compiler stack word. It must be | |
30767 | the resposibility of _mcount to preserve the static chain | |
30768 | register if required. */ | |
30769 | if (DEFAULT_ABI != ABI_ELFv2 | |
30770 | && cfun->static_chain_decl != NULL) | |
30771 | { | |
30772 | asm_fprintf (file, "\tstd %s,24(%s)\n", | |
30773 | reg_names[STATIC_CHAIN_REGNUM], reg_names[1]); | |
30774 | fprintf (file, "\tbl %s\n", RS6000_MCOUNT); | |
30775 | asm_fprintf (file, "\tld %s,24(%s)\n", | |
30776 | reg_names[STATIC_CHAIN_REGNUM], reg_names[1]); | |
30777 | } | |
30778 | else | |
30779 | fprintf (file, "\tbl %s\n", RS6000_MCOUNT); | |
30780 | } | |
30781 | ||
30782 | rs6000_pic_labelno++; | |
30783 | } | |
30784 | ||
30785 | /* -mprofile-kernel code calls mcount before the function prolog, | |
30786 | so a profiled leaf function should stay a leaf function. */ | |
30787 | static bool | |
30788 | rs6000_keep_leaf_when_profiled () | |
30789 | { | |
30790 | return TARGET_PROFILE_KERNEL; | |
30791 | } | |
30792 | ||
30793 | /* Non-zero if vmx regs are restored before the frame pop, zero if | |
30794 | we restore after the pop when possible. */ | |
30795 | #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0 | |
30796 | ||
30797 | /* Restoring cr is a two step process: loading a reg from the frame | |
30798 | save, then moving the reg to cr. For ABI_V4 we must let the | |
30799 | unwinder know that the stack location is no longer valid at or | |
30800 | before the stack deallocation, but we can't emit a cfa_restore for | |
30801 | cr at the stack deallocation like we do for other registers. | |
30802 | The trouble is that it is possible for the move to cr to be | |
30803 | scheduled after the stack deallocation. So say exactly where cr | |
30804 | is located on each of the two insns. */ | |
30805 | ||
30806 | static rtx | |
30807 | load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func) | |
30808 | { | |
30809 | rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset); | |
30810 | rtx reg = gen_rtx_REG (SImode, regno); | |
30811 | rtx_insn *insn = emit_move_insn (reg, mem); | |
30812 | ||
30813 | if (!exit_func && DEFAULT_ABI == ABI_V4) | |
30814 | { | |
30815 | rtx cr = gen_rtx_REG (SImode, CR2_REGNO); | |
30816 | rtx set = gen_rtx_SET (reg, cr); | |
30817 | ||
30818 | add_reg_note (insn, REG_CFA_REGISTER, set); | |
30819 | RTX_FRAME_RELATED_P (insn) = 1; | |
30820 | } | |
30821 | return reg; | |
30822 | } | |
30823 | ||
30824 | /* Reload CR from REG. */ | |
30825 | ||
30826 | static void | |
30827 | restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func) | |
30828 | { | |
30829 | int count = 0; | |
30830 | int i; | |
30831 | ||
30832 | if (using_mfcr_multiple) | |
30833 | { | |
30834 | for (i = 0; i < 8; i++) | |
30835 | if (save_reg_p (CR0_REGNO + i)) | |
30836 | count++; | |
30837 | gcc_assert (count); | |
30838 | } | |
30839 | ||
30840 | if (using_mfcr_multiple && count > 1) | |
30841 | { | |
30842 | rtx_insn *insn; | |
30843 | rtvec p; | |
30844 | int ndx; | |
30845 | ||
30846 | p = rtvec_alloc (count); | |
30847 | ||
30848 | ndx = 0; | |
30849 | for (i = 0; i < 8; i++) | |
30850 | if (save_reg_p (CR0_REGNO + i)) | |
30851 | { | |
30852 | rtvec r = rtvec_alloc (2); | |
30853 | RTVEC_ELT (r, 0) = reg; | |
30854 | RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i)); | |
30855 | RTVEC_ELT (p, ndx) = | |
30856 | gen_rtx_SET (gen_rtx_REG (CCmode, CR0_REGNO + i), | |
30857 | gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR)); | |
30858 | ndx++; | |
30859 | } | |
30860 | insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p)); | |
30861 | gcc_assert (ndx == count); | |
30862 | ||
30863 | /* For the ELFv2 ABI we generate a CFA_RESTORE for each | |
30864 | CR field separately. */ | |
30865 | if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap) | |
30866 | { | |
30867 | for (i = 0; i < 8; i++) | |
30868 | if (save_reg_p (CR0_REGNO + i)) | |
30869 | add_reg_note (insn, REG_CFA_RESTORE, | |
30870 | gen_rtx_REG (SImode, CR0_REGNO + i)); | |
30871 | ||
30872 | RTX_FRAME_RELATED_P (insn) = 1; | |
30873 | } | |
30874 | } | |
30875 | else | |
30876 | for (i = 0; i < 8; i++) | |
30877 | if (save_reg_p (CR0_REGNO + i)) | |
30878 | { | |
30879 | rtx insn = emit_insn (gen_movsi_to_cr_one | |
30880 | (gen_rtx_REG (CCmode, CR0_REGNO + i), reg)); | |
30881 | ||
30882 | /* For the ELFv2 ABI we generate a CFA_RESTORE for each | |
30883 | CR field separately, attached to the insn that in fact | |
30884 | restores this particular CR field. */ | |
30885 | if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap) | |
30886 | { | |
30887 | add_reg_note (insn, REG_CFA_RESTORE, | |
30888 | gen_rtx_REG (SImode, CR0_REGNO + i)); | |
30889 | ||
30890 | RTX_FRAME_RELATED_P (insn) = 1; | |
30891 | } | |
30892 | } | |
30893 | ||
30894 | /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */ | |
30895 | if (!exit_func && DEFAULT_ABI != ABI_ELFv2 | |
30896 | && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)) | |
30897 | { | |
30898 | rtx_insn *insn = get_last_insn (); | |
30899 | rtx cr = gen_rtx_REG (SImode, CR2_REGNO); | |
30900 | ||
30901 | add_reg_note (insn, REG_CFA_RESTORE, cr); | |
30902 | RTX_FRAME_RELATED_P (insn) = 1; | |
30903 | } | |
30904 | } | |
30905 | ||
30906 | /* Like cr, the move to lr instruction can be scheduled after the | |
30907 | stack deallocation, but unlike cr, its stack frame save is still | |
30908 | valid. So we only need to emit the cfa_restore on the correct | |
30909 | instruction. */ | |
30910 | ||
30911 | static void | |
30912 | load_lr_save (int regno, rtx frame_reg_rtx, int offset) | |
30913 | { | |
30914 | rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset); | |
30915 | rtx reg = gen_rtx_REG (Pmode, regno); | |
30916 | ||
30917 | emit_move_insn (reg, mem); | |
30918 | } | |
30919 | ||
30920 | static void | |
30921 | restore_saved_lr (int regno, bool exit_func) | |
30922 | { | |
30923 | rtx reg = gen_rtx_REG (Pmode, regno); | |
30924 | rtx lr = gen_rtx_REG (Pmode, LR_REGNO); | |
30925 | rtx_insn *insn = emit_move_insn (lr, reg); | |
30926 | ||
30927 | if (!exit_func && flag_shrink_wrap) | |
30928 | { | |
30929 | add_reg_note (insn, REG_CFA_RESTORE, lr); | |
30930 | RTX_FRAME_RELATED_P (insn) = 1; | |
30931 | } | |
30932 | } | |
30933 | ||
30934 | static rtx | |
30935 | add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores) | |
30936 | { | |
30937 | if (DEFAULT_ABI == ABI_ELFv2) | |
30938 | { | |
30939 | int i; | |
30940 | for (i = 0; i < 8; i++) | |
30941 | if (save_reg_p (CR0_REGNO + i)) | |
30942 | { | |
30943 | rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i); | |
30944 | cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr, | |
30945 | cfa_restores); | |
30946 | } | |
30947 | } | |
30948 | else if (info->cr_save_p) | |
30949 | cfa_restores = alloc_reg_note (REG_CFA_RESTORE, | |
30950 | gen_rtx_REG (SImode, CR2_REGNO), | |
30951 | cfa_restores); | |
30952 | ||
30953 | if (info->lr_save_p) | |
30954 | cfa_restores = alloc_reg_note (REG_CFA_RESTORE, | |
30955 | gen_rtx_REG (Pmode, LR_REGNO), | |
30956 | cfa_restores); | |
30957 | return cfa_restores; | |
30958 | } | |
30959 | ||
30960 | /* Return true if OFFSET from stack pointer can be clobbered by signals. | |
30961 | V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes | |
30962 | below stack pointer not cloberred by signals. */ | |
30963 | ||
30964 | static inline bool | |
30965 | offset_below_red_zone_p (HOST_WIDE_INT offset) | |
30966 | { | |
30967 | return offset < (DEFAULT_ABI == ABI_V4 | |
30968 | ? 0 | |
30969 | : TARGET_32BIT ? -220 : -288); | |
30970 | } | |
30971 | ||
30972 | /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */ | |
30973 | ||
30974 | static void | |
30975 | emit_cfa_restores (rtx cfa_restores) | |
30976 | { | |
30977 | rtx_insn *insn = get_last_insn (); | |
30978 | rtx *loc = ®_NOTES (insn); | |
30979 | ||
30980 | while (*loc) | |
30981 | loc = &XEXP (*loc, 1); | |
30982 | *loc = cfa_restores; | |
30983 | RTX_FRAME_RELATED_P (insn) = 1; | |
30984 | } | |
30985 | ||
30986 | /* Emit function epilogue as insns. */ | |
30987 | ||
30988 | void | |
30989 | rs6000_emit_epilogue (int sibcall) | |
30990 | { | |
30991 | rs6000_stack_t *info; | |
30992 | int restoring_GPRs_inline; | |
30993 | int restoring_FPRs_inline; | |
30994 | int using_load_multiple; | |
30995 | int using_mtcr_multiple; | |
30996 | int use_backchain_to_restore_sp; | |
30997 | int restore_lr; | |
30998 | int strategy; | |
30999 | HOST_WIDE_INT frame_off = 0; | |
31000 | rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1); | |
31001 | rtx frame_reg_rtx = sp_reg_rtx; | |
31002 | rtx cfa_restores = NULL_RTX; | |
31003 | rtx insn; | |
31004 | rtx cr_save_reg = NULL_RTX; | |
31005 | machine_mode reg_mode = Pmode; | |
31006 | int reg_size = TARGET_32BIT ? 4 : 8; | |
31007 | machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) | |
31008 | ? DFmode : SFmode; | |
31009 | int fp_reg_size = 8; | |
31010 | int i; | |
31011 | bool exit_func; | |
31012 | unsigned ptr_regno; | |
31013 | ||
31014 | info = rs6000_stack_info (); | |
31015 | ||
31016 | if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0) | |
31017 | { | |
31018 | reg_mode = V2SImode; | |
31019 | reg_size = 8; | |
31020 | } | |
31021 | ||
31022 | strategy = info->savres_strategy; | |
31023 | using_load_multiple = strategy & REST_MULTIPLE; | |
31024 | restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS); | |
31025 | restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS); | |
31026 | using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601 | |
31027 | || rs6000_cpu == PROCESSOR_PPC603 | |
31028 | || rs6000_cpu == PROCESSOR_PPC750 | |
31029 | || optimize_size); | |
31030 | /* Restore via the backchain when we have a large frame, since this | |
31031 | is more efficient than an addis, addi pair. The second condition | |
31032 | here will not trigger at the moment; We don't actually need a | |
31033 | frame pointer for alloca, but the generic parts of the compiler | |
31034 | give us one anyway. */ | |
31035 | use_backchain_to_restore_sp = (info->total_size + (info->lr_save_p | |
31036 | ? info->lr_save_offset | |
31037 | : 0) > 32767 | |
31038 | || (cfun->calls_alloca | |
31039 | && !frame_pointer_needed)); | |
31040 | restore_lr = (info->lr_save_p | |
31041 | && (restoring_FPRs_inline | |
31042 | || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR)) | |
31043 | && (restoring_GPRs_inline | |
31044 | || info->first_fp_reg_save < 64) | |
31045 | && !cfun->machine->lr_is_wrapped_separately); | |
31046 | ||
31047 | ||
31048 | if (WORLD_SAVE_P (info)) | |
31049 | { | |
31050 | int i, j; | |
31051 | char rname[30]; | |
31052 | const char *alloc_rname; | |
31053 | rtvec p; | |
31054 | ||
31055 | /* eh_rest_world_r10 will return to the location saved in the LR | |
31056 | stack slot (which is not likely to be our caller.) | |
31057 | Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8. | |
31058 | rest_world is similar, except any R10 parameter is ignored. | |
31059 | The exception-handling stuff that was here in 2.95 is no | |
31060 | longer necessary. */ | |
31061 | ||
31062 | p = rtvec_alloc (9 | |
31063 | + 32 - info->first_gp_reg_save | |
31064 | + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save | |
31065 | + 63 + 1 - info->first_fp_reg_save); | |
31066 | ||
31067 | strcpy (rname, ((crtl->calls_eh_return) ? | |
31068 | "*eh_rest_world_r10" : "*rest_world")); | |
31069 | alloc_rname = ggc_strdup (rname); | |
31070 | ||
31071 | j = 0; | |
31072 | RTVEC_ELT (p, j++) = ret_rtx; | |
31073 | RTVEC_ELT (p, j++) | |
31074 | = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname)); | |
31075 | /* The instruction pattern requires a clobber here; | |
31076 | it is shared with the restVEC helper. */ | |
31077 | RTVEC_ELT (p, j++) | |
31078 | = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11)); | |
31079 | ||
31080 | { | |
31081 | /* CR register traditionally saved as CR2. */ | |
31082 | rtx reg = gen_rtx_REG (SImode, CR2_REGNO); | |
31083 | RTVEC_ELT (p, j++) | |
31084 | = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset); | |
31085 | if (flag_shrink_wrap) | |
31086 | { | |
31087 | cfa_restores = alloc_reg_note (REG_CFA_RESTORE, | |
31088 | gen_rtx_REG (Pmode, LR_REGNO), | |
31089 | cfa_restores); | |
31090 | cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); | |
31091 | } | |
31092 | } | |
31093 | ||
31094 | for (i = 0; i < 32 - info->first_gp_reg_save; i++) | |
31095 | { | |
31096 | rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i); | |
31097 | RTVEC_ELT (p, j++) | |
31098 | = gen_frame_load (reg, | |
31099 | frame_reg_rtx, info->gp_save_offset + reg_size * i); | |
31100 | if (flag_shrink_wrap) | |
31101 | cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); | |
31102 | } | |
31103 | for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++) | |
31104 | { | |
31105 | rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i); | |
31106 | RTVEC_ELT (p, j++) | |
31107 | = gen_frame_load (reg, | |
31108 | frame_reg_rtx, info->altivec_save_offset + 16 * i); | |
31109 | if (flag_shrink_wrap) | |
31110 | cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); | |
31111 | } | |
31112 | for (i = 0; info->first_fp_reg_save + i <= 63; i++) | |
31113 | { | |
31114 | rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT | |
31115 | ? DFmode : SFmode), | |
31116 | info->first_fp_reg_save + i); | |
31117 | RTVEC_ELT (p, j++) | |
31118 | = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i); | |
31119 | if (flag_shrink_wrap) | |
31120 | cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); | |
31121 | } | |
31122 | RTVEC_ELT (p, j++) | |
31123 | = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0)); | |
31124 | RTVEC_ELT (p, j++) | |
31125 | = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12)); | |
31126 | RTVEC_ELT (p, j++) | |
31127 | = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7)); | |
31128 | RTVEC_ELT (p, j++) | |
31129 | = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8)); | |
31130 | RTVEC_ELT (p, j++) | |
31131 | = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10)); | |
31132 | insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p)); | |
31133 | ||
31134 | if (flag_shrink_wrap) | |
31135 | { | |
31136 | REG_NOTES (insn) = cfa_restores; | |
31137 | add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx); | |
31138 | RTX_FRAME_RELATED_P (insn) = 1; | |
31139 | } | |
31140 | return; | |
31141 | } | |
31142 | ||
31143 | /* frame_reg_rtx + frame_off points to the top of this stack frame. */ | |
31144 | if (info->push_p) | |
31145 | frame_off = info->total_size; | |
31146 | ||
31147 | /* Restore AltiVec registers if we must do so before adjusting the | |
31148 | stack. */ | |
31149 | if (info->altivec_size != 0 | |
31150 | && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP | |
31151 | || (DEFAULT_ABI != ABI_V4 | |
31152 | && offset_below_red_zone_p (info->altivec_save_offset)))) | |
31153 | { | |
31154 | int i; | |
31155 | int scratch_regno = ptr_regno_for_savres (SAVRES_VR); | |
31156 | ||
31157 | gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12); | |
31158 | if (use_backchain_to_restore_sp) | |
31159 | { | |
31160 | int frame_regno = 11; | |
31161 | ||
31162 | if ((strategy & REST_INLINE_VRS) == 0) | |
31163 | { | |
31164 | /* Of r11 and r12, select the one not clobbered by an | |
31165 | out-of-line restore function for the frame register. */ | |
31166 | frame_regno = 11 + 12 - scratch_regno; | |
31167 | } | |
31168 | frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno); | |
31169 | emit_move_insn (frame_reg_rtx, | |
31170 | gen_rtx_MEM (Pmode, sp_reg_rtx)); | |
31171 | frame_off = 0; | |
31172 | } | |
31173 | else if (frame_pointer_needed) | |
31174 | frame_reg_rtx = hard_frame_pointer_rtx; | |
31175 | ||
31176 | if ((strategy & REST_INLINE_VRS) == 0) | |
31177 | { | |
31178 | int end_save = info->altivec_save_offset + info->altivec_size; | |
31179 | int ptr_off; | |
31180 | rtx ptr_reg = gen_rtx_REG (Pmode, 0); | |
31181 | rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno); | |
31182 | ||
31183 | if (end_save + frame_off != 0) | |
31184 | { | |
31185 | rtx offset = GEN_INT (end_save + frame_off); | |
31186 | ||
31187 | emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset)); | |
31188 | } | |
31189 | else | |
31190 | emit_move_insn (ptr_reg, frame_reg_rtx); | |
31191 | ||
31192 | ptr_off = -end_save; | |
31193 | insn = rs6000_emit_savres_rtx (info, scratch_reg, | |
31194 | info->altivec_save_offset + ptr_off, | |
31195 | 0, V4SImode, SAVRES_VR); | |
31196 | } | |
31197 | else | |
31198 | { | |
31199 | for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i) | |
31200 | if (info->vrsave_mask & ALTIVEC_REG_BIT (i)) | |
31201 | { | |
31202 | rtx addr, areg, mem, insn; | |
31203 | rtx reg = gen_rtx_REG (V4SImode, i); | |
31204 | HOST_WIDE_INT offset | |
31205 | = (info->altivec_save_offset + frame_off | |
31206 | + 16 * (i - info->first_altivec_reg_save)); | |
31207 | ||
31208 | if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset)) | |
31209 | { | |
31210 | mem = gen_frame_mem (V4SImode, | |
31211 | gen_rtx_PLUS (Pmode, frame_reg_rtx, | |
31212 | GEN_INT (offset))); | |
31213 | insn = gen_rtx_SET (reg, mem); | |
31214 | } | |
31215 | else | |
31216 | { | |
31217 | areg = gen_rtx_REG (Pmode, 0); | |
31218 | emit_move_insn (areg, GEN_INT (offset)); | |
31219 | ||
31220 | /* AltiVec addressing mode is [reg+reg]. */ | |
31221 | addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg); | |
31222 | mem = gen_frame_mem (V4SImode, addr); | |
31223 | ||
31224 | /* Rather than emitting a generic move, force use of the | |
31225 | lvx instruction, which we always want. In particular we | |
31226 | don't want lxvd2x/xxpermdi for little endian. */ | |
31227 | insn = gen_altivec_lvx_v4si_internal (reg, mem); | |
31228 | } | |
31229 | ||
31230 | (void) emit_insn (insn); | |
31231 | } | |
31232 | } | |
31233 | ||
31234 | for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i) | |
31235 | if (((strategy & REST_INLINE_VRS) == 0 | |
31236 | || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0) | |
31237 | && (flag_shrink_wrap | |
31238 | || (offset_below_red_zone_p | |
31239 | (info->altivec_save_offset | |
31240 | + 16 * (i - info->first_altivec_reg_save))))) | |
31241 | { | |
31242 | rtx reg = gen_rtx_REG (V4SImode, i); | |
31243 | cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); | |
31244 | } | |
31245 | } | |
31246 | ||
31247 | /* Restore VRSAVE if we must do so before adjusting the stack. */ | |
31248 | if (info->vrsave_size != 0 | |
31249 | && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP | |
31250 | || (DEFAULT_ABI != ABI_V4 | |
31251 | && offset_below_red_zone_p (info->vrsave_save_offset)))) | |
31252 | { | |
31253 | rtx reg; | |
31254 | ||
31255 | if (frame_reg_rtx == sp_reg_rtx) | |
31256 | { | |
31257 | if (use_backchain_to_restore_sp) | |
31258 | { | |
31259 | frame_reg_rtx = gen_rtx_REG (Pmode, 11); | |
31260 | emit_move_insn (frame_reg_rtx, | |
31261 | gen_rtx_MEM (Pmode, sp_reg_rtx)); | |
31262 | frame_off = 0; | |
31263 | } | |
31264 | else if (frame_pointer_needed) | |
31265 | frame_reg_rtx = hard_frame_pointer_rtx; | |
31266 | } | |
31267 | ||
31268 | reg = gen_rtx_REG (SImode, 12); | |
31269 | emit_insn (gen_frame_load (reg, frame_reg_rtx, | |
31270 | info->vrsave_save_offset + frame_off)); | |
31271 | ||
31272 | emit_insn (generate_set_vrsave (reg, info, 1)); | |
31273 | } | |
31274 | ||
31275 | insn = NULL_RTX; | |
31276 | /* If we have a large stack frame, restore the old stack pointer | |
31277 | using the backchain. */ | |
31278 | if (use_backchain_to_restore_sp) | |
31279 | { | |
31280 | if (frame_reg_rtx == sp_reg_rtx) | |
31281 | { | |
31282 | /* Under V.4, don't reset the stack pointer until after we're done | |
31283 | loading the saved registers. */ | |
31284 | if (DEFAULT_ABI == ABI_V4) | |
31285 | frame_reg_rtx = gen_rtx_REG (Pmode, 11); | |
31286 | ||
31287 | insn = emit_move_insn (frame_reg_rtx, | |
31288 | gen_rtx_MEM (Pmode, sp_reg_rtx)); | |
31289 | frame_off = 0; | |
31290 | } | |
31291 | else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP | |
31292 | && DEFAULT_ABI == ABI_V4) | |
31293 | /* frame_reg_rtx has been set up by the altivec restore. */ | |
31294 | ; | |
31295 | else | |
31296 | { | |
31297 | insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx); | |
31298 | frame_reg_rtx = sp_reg_rtx; | |
31299 | } | |
31300 | } | |
31301 | /* If we have a frame pointer, we can restore the old stack pointer | |
31302 | from it. */ | |
31303 | else if (frame_pointer_needed) | |
31304 | { | |
31305 | frame_reg_rtx = sp_reg_rtx; | |
31306 | if (DEFAULT_ABI == ABI_V4) | |
31307 | frame_reg_rtx = gen_rtx_REG (Pmode, 11); | |
31308 | /* Prevent reordering memory accesses against stack pointer restore. */ | |
31309 | else if (cfun->calls_alloca | |
31310 | || offset_below_red_zone_p (-info->total_size)) | |
31311 | rs6000_emit_stack_tie (frame_reg_rtx, true); | |
31312 | ||
31313 | insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx, | |
31314 | GEN_INT (info->total_size))); | |
31315 | frame_off = 0; | |
31316 | } | |
31317 | else if (info->push_p | |
31318 | && DEFAULT_ABI != ABI_V4 | |
31319 | && !crtl->calls_eh_return) | |
31320 | { | |
31321 | /* Prevent reordering memory accesses against stack pointer restore. */ | |
31322 | if (cfun->calls_alloca | |
31323 | || offset_below_red_zone_p (-info->total_size)) | |
31324 | rs6000_emit_stack_tie (frame_reg_rtx, false); | |
31325 | insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, | |
31326 | GEN_INT (info->total_size))); | |
31327 | frame_off = 0; | |
31328 | } | |
31329 | if (insn && frame_reg_rtx == sp_reg_rtx) | |
31330 | { | |
31331 | if (cfa_restores) | |
31332 | { | |
31333 | REG_NOTES (insn) = cfa_restores; | |
31334 | cfa_restores = NULL_RTX; | |
31335 | } | |
31336 | add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx); | |
31337 | RTX_FRAME_RELATED_P (insn) = 1; | |
31338 | } | |
31339 | ||
31340 | /* Restore AltiVec registers if we have not done so already. */ | |
31341 | if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP | |
31342 | && info->altivec_size != 0 | |
31343 | && (DEFAULT_ABI == ABI_V4 | |
31344 | || !offset_below_red_zone_p (info->altivec_save_offset))) | |
31345 | { | |
31346 | int i; | |
31347 | ||
31348 | if ((strategy & REST_INLINE_VRS) == 0) | |
31349 | { | |
31350 | int end_save = info->altivec_save_offset + info->altivec_size; | |
31351 | int ptr_off; | |
31352 | rtx ptr_reg = gen_rtx_REG (Pmode, 0); | |
31353 | int scratch_regno = ptr_regno_for_savres (SAVRES_VR); | |
31354 | rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno); | |
31355 | ||
31356 | if (end_save + frame_off != 0) | |
31357 | { | |
31358 | rtx offset = GEN_INT (end_save + frame_off); | |
31359 | ||
31360 | emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset)); | |
31361 | } | |
31362 | else | |
31363 | emit_move_insn (ptr_reg, frame_reg_rtx); | |
31364 | ||
31365 | ptr_off = -end_save; | |
31366 | insn = rs6000_emit_savres_rtx (info, scratch_reg, | |
31367 | info->altivec_save_offset + ptr_off, | |
31368 | 0, V4SImode, SAVRES_VR); | |
31369 | if (REGNO (frame_reg_rtx) == REGNO (scratch_reg)) | |
31370 | { | |
31371 | /* Frame reg was clobbered by out-of-line save. Restore it | |
31372 | from ptr_reg, and if we are calling out-of-line gpr or | |
31373 | fpr restore set up the correct pointer and offset. */ | |
31374 | unsigned newptr_regno = 1; | |
31375 | if (!restoring_GPRs_inline) | |
31376 | { | |
31377 | bool lr = info->gp_save_offset + info->gp_size == 0; | |
31378 | int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0); | |
31379 | newptr_regno = ptr_regno_for_savres (sel); | |
31380 | end_save = info->gp_save_offset + info->gp_size; | |
31381 | } | |
31382 | else if (!restoring_FPRs_inline) | |
31383 | { | |
31384 | bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR); | |
31385 | int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0); | |
31386 | newptr_regno = ptr_regno_for_savres (sel); | |
31387 | end_save = info->fp_save_offset + info->fp_size; | |
31388 | } | |
31389 | ||
31390 | if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno) | |
31391 | frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno); | |
31392 | ||
31393 | if (end_save + ptr_off != 0) | |
31394 | { | |
31395 | rtx offset = GEN_INT (end_save + ptr_off); | |
31396 | ||
31397 | frame_off = -end_save; | |
31398 | if (TARGET_32BIT) | |
31399 | emit_insn (gen_addsi3_carry (frame_reg_rtx, | |
31400 | ptr_reg, offset)); | |
31401 | else | |
31402 | emit_insn (gen_adddi3_carry (frame_reg_rtx, | |
31403 | ptr_reg, offset)); | |
31404 | } | |
31405 | else | |
31406 | { | |
31407 | frame_off = ptr_off; | |
31408 | emit_move_insn (frame_reg_rtx, ptr_reg); | |
31409 | } | |
31410 | } | |
31411 | } | |
31412 | else | |
31413 | { | |
31414 | for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i) | |
31415 | if (info->vrsave_mask & ALTIVEC_REG_BIT (i)) | |
31416 | { | |
31417 | rtx addr, areg, mem, insn; | |
31418 | rtx reg = gen_rtx_REG (V4SImode, i); | |
31419 | HOST_WIDE_INT offset | |
31420 | = (info->altivec_save_offset + frame_off | |
31421 | + 16 * (i - info->first_altivec_reg_save)); | |
31422 | ||
31423 | if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset)) | |
31424 | { | |
31425 | mem = gen_frame_mem (V4SImode, | |
31426 | gen_rtx_PLUS (Pmode, frame_reg_rtx, | |
31427 | GEN_INT (offset))); | |
31428 | insn = gen_rtx_SET (reg, mem); | |
31429 | } | |
31430 | else | |
31431 | { | |
31432 | areg = gen_rtx_REG (Pmode, 0); | |
31433 | emit_move_insn (areg, GEN_INT (offset)); | |
31434 | ||
31435 | /* AltiVec addressing mode is [reg+reg]. */ | |
31436 | addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg); | |
31437 | mem = gen_frame_mem (V4SImode, addr); | |
31438 | ||
31439 | /* Rather than emitting a generic move, force use of the | |
31440 | lvx instruction, which we always want. In particular we | |
31441 | don't want lxvd2x/xxpermdi for little endian. */ | |
31442 | insn = gen_altivec_lvx_v4si_internal (reg, mem); | |
31443 | } | |
31444 | ||
31445 | (void) emit_insn (insn); | |
31446 | } | |
31447 | } | |
31448 | ||
31449 | for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i) | |
31450 | if (((strategy & REST_INLINE_VRS) == 0 | |
31451 | || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0) | |
31452 | && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)) | |
31453 | { | |
31454 | rtx reg = gen_rtx_REG (V4SImode, i); | |
31455 | cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); | |
31456 | } | |
31457 | } | |
31458 | ||
31459 | /* Restore VRSAVE if we have not done so already. */ | |
31460 | if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP | |
31461 | && info->vrsave_size != 0 | |
31462 | && (DEFAULT_ABI == ABI_V4 | |
31463 | || !offset_below_red_zone_p (info->vrsave_save_offset))) | |
31464 | { | |
31465 | rtx reg; | |
31466 | ||
31467 | reg = gen_rtx_REG (SImode, 12); | |
31468 | emit_insn (gen_frame_load (reg, frame_reg_rtx, | |
31469 | info->vrsave_save_offset + frame_off)); | |
31470 | ||
31471 | emit_insn (generate_set_vrsave (reg, info, 1)); | |
31472 | } | |
31473 | ||
31474 | /* If we exit by an out-of-line restore function on ABI_V4 then that | |
31475 | function will deallocate the stack, so we don't need to worry | |
31476 | about the unwinder restoring cr from an invalid stack frame | |
31477 | location. */ | |
31478 | exit_func = (!restoring_FPRs_inline | |
31479 | || (!restoring_GPRs_inline | |
31480 | && info->first_fp_reg_save == 64)); | |
31481 | ||
31482 | /* In the ELFv2 ABI we need to restore all call-saved CR fields from | |
31483 | *separate* slots if the routine calls __builtin_eh_return, so | |
31484 | that they can be independently restored by the unwinder. */ | |
31485 | if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return) | |
31486 | { | |
31487 | int i, cr_off = info->ehcr_offset; | |
31488 | ||
31489 | for (i = 0; i < 8; i++) | |
31490 | if (!call_used_regs[CR0_REGNO + i]) | |
31491 | { | |
31492 | rtx reg = gen_rtx_REG (SImode, 0); | |
31493 | emit_insn (gen_frame_load (reg, frame_reg_rtx, | |
31494 | cr_off + frame_off)); | |
31495 | ||
31496 | insn = emit_insn (gen_movsi_to_cr_one | |
31497 | (gen_rtx_REG (CCmode, CR0_REGNO + i), reg)); | |
31498 | ||
31499 | if (!exit_func && flag_shrink_wrap) | |
31500 | { | |
31501 | add_reg_note (insn, REG_CFA_RESTORE, | |
31502 | gen_rtx_REG (SImode, CR0_REGNO + i)); | |
31503 | ||
31504 | RTX_FRAME_RELATED_P (insn) = 1; | |
31505 | } | |
31506 | ||
31507 | cr_off += reg_size; | |
31508 | } | |
31509 | } | |
31510 | ||
31511 | /* Get the old lr if we saved it. If we are restoring registers | |
31512 | out-of-line, then the out-of-line routines can do this for us. */ | |
31513 | if (restore_lr && restoring_GPRs_inline) | |
31514 | load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off); | |
31515 | ||
31516 | /* Get the old cr if we saved it. */ | |
31517 | if (info->cr_save_p) | |
31518 | { | |
31519 | unsigned cr_save_regno = 12; | |
31520 | ||
31521 | if (!restoring_GPRs_inline) | |
31522 | { | |
31523 | /* Ensure we don't use the register used by the out-of-line | |
31524 | gpr register restore below. */ | |
31525 | bool lr = info->gp_save_offset + info->gp_size == 0; | |
31526 | int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0); | |
31527 | int gpr_ptr_regno = ptr_regno_for_savres (sel); | |
31528 | ||
31529 | if (gpr_ptr_regno == 12) | |
31530 | cr_save_regno = 11; | |
31531 | gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno); | |
31532 | } | |
31533 | else if (REGNO (frame_reg_rtx) == 12) | |
31534 | cr_save_regno = 11; | |
31535 | ||
31536 | cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx, | |
31537 | info->cr_save_offset + frame_off, | |
31538 | exit_func); | |
31539 | } | |
31540 | ||
31541 | /* Set LR here to try to overlap restores below. */ | |
31542 | if (restore_lr && restoring_GPRs_inline) | |
31543 | restore_saved_lr (0, exit_func); | |
31544 | ||
31545 | /* Load exception handler data registers, if needed. */ | |
31546 | if (crtl->calls_eh_return) | |
31547 | { | |
31548 | unsigned int i, regno; | |
31549 | ||
31550 | if (TARGET_AIX) | |
31551 | { | |
31552 | rtx reg = gen_rtx_REG (reg_mode, 2); | |
31553 | emit_insn (gen_frame_load (reg, frame_reg_rtx, | |
31554 | frame_off + RS6000_TOC_SAVE_SLOT)); | |
31555 | } | |
31556 | ||
31557 | for (i = 0; ; ++i) | |
31558 | { | |
31559 | rtx mem; | |
31560 | ||
31561 | regno = EH_RETURN_DATA_REGNO (i); | |
31562 | if (regno == INVALID_REGNUM) | |
31563 | break; | |
31564 | ||
31565 | /* Note: possible use of r0 here to address SPE regs. */ | |
31566 | mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx, | |
31567 | info->ehrd_offset + frame_off | |
31568 | + reg_size * (int) i); | |
31569 | ||
31570 | emit_move_insn (gen_rtx_REG (reg_mode, regno), mem); | |
31571 | } | |
31572 | } | |
31573 | ||
31574 | /* Restore GPRs. This is done as a PARALLEL if we are using | |
31575 | the load-multiple instructions. */ | |
31576 | if (TARGET_SPE_ABI | |
31577 | && info->spe_64bit_regs_used | |
31578 | && info->first_gp_reg_save != 32) | |
31579 | { | |
31580 | /* Determine whether we can address all of the registers that need | |
31581 | to be saved with an offset from frame_reg_rtx that fits in | |
31582 | the small const field for SPE memory instructions. */ | |
31583 | int spe_regs_addressable | |
31584 | = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off | |
31585 | + reg_size * (32 - info->first_gp_reg_save - 1)) | |
31586 | && restoring_GPRs_inline); | |
31587 | ||
31588 | if (!spe_regs_addressable) | |
31589 | { | |
31590 | int ool_adjust = 0; | |
31591 | rtx old_frame_reg_rtx = frame_reg_rtx; | |
31592 | /* Make r11 point to the start of the SPE save area. We worried about | |
31593 | not clobbering it when we were saving registers in the prologue. | |
31594 | There's no need to worry here because the static chain is passed | |
31595 | anew to every function. */ | |
31596 | ||
31597 | if (!restoring_GPRs_inline) | |
31598 | ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO); | |
31599 | frame_reg_rtx = gen_rtx_REG (Pmode, 11); | |
31600 | emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx, | |
31601 | GEN_INT (info->spe_gp_save_offset | |
31602 | + frame_off | |
31603 | - ool_adjust))); | |
31604 | /* Keep the invariant that frame_reg_rtx + frame_off points | |
31605 | at the top of the stack frame. */ | |
31606 | frame_off = -info->spe_gp_save_offset + ool_adjust; | |
31607 | } | |
31608 | ||
31609 | if (restoring_GPRs_inline) | |
31610 | { | |
31611 | HOST_WIDE_INT spe_offset = info->spe_gp_save_offset + frame_off; | |
31612 | ||
31613 | for (i = 0; i < 32 - info->first_gp_reg_save; i++) | |
31614 | if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i)) | |
31615 | { | |
31616 | rtx offset, addr, mem, reg; | |
31617 | ||
31618 | /* We're doing all this to ensure that the immediate offset | |
31619 | fits into the immediate field of 'evldd'. */ | |
31620 | gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i)); | |
31621 | ||
31622 | offset = GEN_INT (spe_offset + reg_size * i); | |
31623 | addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset); | |
31624 | mem = gen_rtx_MEM (V2SImode, addr); | |
31625 | reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i); | |
31626 | ||
31627 | emit_move_insn (reg, mem); | |
31628 | } | |
31629 | } | |
31630 | else | |
31631 | rs6000_emit_savres_rtx (info, frame_reg_rtx, | |
31632 | info->spe_gp_save_offset + frame_off, | |
31633 | info->lr_save_offset + frame_off, | |
31634 | reg_mode, | |
31635 | SAVRES_GPR | SAVRES_LR); | |
31636 | } | |
31637 | else if (!restoring_GPRs_inline) | |
31638 | { | |
31639 | /* We are jumping to an out-of-line function. */ | |
31640 | rtx ptr_reg; | |
31641 | int end_save = info->gp_save_offset + info->gp_size; | |
31642 | bool can_use_exit = end_save == 0; | |
31643 | int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0); | |
31644 | int ptr_off; | |
31645 | ||
31646 | /* Emit stack reset code if we need it. */ | |
31647 | ptr_regno = ptr_regno_for_savres (sel); | |
31648 | ptr_reg = gen_rtx_REG (Pmode, ptr_regno); | |
31649 | if (can_use_exit) | |
31650 | rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno); | |
31651 | else if (end_save + frame_off != 0) | |
31652 | emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, | |
31653 | GEN_INT (end_save + frame_off))); | |
31654 | else if (REGNO (frame_reg_rtx) != ptr_regno) | |
31655 | emit_move_insn (ptr_reg, frame_reg_rtx); | |
31656 | if (REGNO (frame_reg_rtx) == ptr_regno) | |
31657 | frame_off = -end_save; | |
31658 | ||
31659 | if (can_use_exit && info->cr_save_p) | |
31660 | restore_saved_cr (cr_save_reg, using_mtcr_multiple, true); | |
31661 | ||
31662 | ptr_off = -end_save; | |
31663 | rs6000_emit_savres_rtx (info, ptr_reg, | |
31664 | info->gp_save_offset + ptr_off, | |
31665 | info->lr_save_offset + ptr_off, | |
31666 | reg_mode, sel); | |
31667 | } | |
31668 | else if (using_load_multiple) | |
31669 | { | |
31670 | rtvec p; | |
31671 | p = rtvec_alloc (32 - info->first_gp_reg_save); | |
31672 | for (i = 0; i < 32 - info->first_gp_reg_save; i++) | |
31673 | RTVEC_ELT (p, i) | |
31674 | = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i), | |
31675 | frame_reg_rtx, | |
31676 | info->gp_save_offset + frame_off + reg_size * i); | |
31677 | emit_insn (gen_rtx_PARALLEL (VOIDmode, p)); | |
31678 | } | |
31679 | else | |
31680 | { | |
31681 | int offset = info->gp_save_offset + frame_off; | |
31682 | for (i = info->first_gp_reg_save; i < 32; i++) | |
31683 | { | |
31684 | if (rs6000_reg_live_or_pic_offset_p (i) | |
31685 | && !cfun->machine->gpr_is_wrapped_separately[i]) | |
31686 | { | |
31687 | rtx reg = gen_rtx_REG (reg_mode, i); | |
31688 | emit_insn (gen_frame_load (reg, frame_reg_rtx, offset)); | |
31689 | } | |
31690 | ||
31691 | offset += reg_size; | |
31692 | } | |
31693 | } | |
31694 | ||
31695 | if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap) | |
31696 | { | |
31697 | /* If the frame pointer was used then we can't delay emitting | |
31698 | a REG_CFA_DEF_CFA note. This must happen on the insn that | |
31699 | restores the frame pointer, r31. We may have already emitted | |
31700 | a REG_CFA_DEF_CFA note, but that's OK; A duplicate is | |
31701 | discarded by dwarf2cfi.c/dwarf2out.c, and in any case would | |
31702 | be harmless if emitted. */ | |
31703 | if (frame_pointer_needed) | |
31704 | { | |
31705 | insn = get_last_insn (); | |
31706 | add_reg_note (insn, REG_CFA_DEF_CFA, | |
31707 | plus_constant (Pmode, frame_reg_rtx, frame_off)); | |
31708 | RTX_FRAME_RELATED_P (insn) = 1; | |
31709 | } | |
31710 | ||
31711 | /* Set up cfa_restores. We always need these when | |
31712 | shrink-wrapping. If not shrink-wrapping then we only need | |
31713 | the cfa_restore when the stack location is no longer valid. | |
31714 | The cfa_restores must be emitted on or before the insn that | |
31715 | invalidates the stack, and of course must not be emitted | |
31716 | before the insn that actually does the restore. The latter | |
31717 | is why it is a bad idea to emit the cfa_restores as a group | |
31718 | on the last instruction here that actually does a restore: | |
31719 | That insn may be reordered with respect to others doing | |
31720 | restores. */ | |
31721 | if (flag_shrink_wrap | |
31722 | && !restoring_GPRs_inline | |
31723 | && info->first_fp_reg_save == 64) | |
31724 | cfa_restores = add_crlr_cfa_restore (info, cfa_restores); | |
31725 | ||
31726 | for (i = info->first_gp_reg_save; i < 32; i++) | |
31727 | if (!restoring_GPRs_inline | |
31728 | || using_load_multiple | |
31729 | || rs6000_reg_live_or_pic_offset_p (i)) | |
31730 | { | |
31731 | if (cfun->machine->gpr_is_wrapped_separately[i]) | |
31732 | continue; | |
31733 | ||
31734 | rtx reg = gen_rtx_REG (reg_mode, i); | |
31735 | cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); | |
31736 | } | |
31737 | } | |
31738 | ||
31739 | if (!restoring_GPRs_inline | |
31740 | && info->first_fp_reg_save == 64) | |
31741 | { | |
31742 | /* We are jumping to an out-of-line function. */ | |
31743 | if (cfa_restores) | |
31744 | emit_cfa_restores (cfa_restores); | |
31745 | return; | |
31746 | } | |
31747 | ||
31748 | if (restore_lr && !restoring_GPRs_inline) | |
31749 | { | |
31750 | load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off); | |
31751 | restore_saved_lr (0, exit_func); | |
31752 | } | |
31753 | ||
31754 | /* Restore fpr's if we need to do it without calling a function. */ | |
31755 | if (restoring_FPRs_inline) | |
31756 | { | |
31757 | int offset = info->fp_save_offset + frame_off; | |
31758 | for (i = info->first_fp_reg_save; i < 64; i++) | |
31759 | { | |
31760 | if (save_reg_p (i) | |
31761 | && !cfun->machine->fpr_is_wrapped_separately[i - 32]) | |
31762 | { | |
31763 | rtx reg = gen_rtx_REG (fp_reg_mode, i); | |
31764 | emit_insn (gen_frame_load (reg, frame_reg_rtx, offset)); | |
31765 | if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap) | |
31766 | cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, | |
31767 | cfa_restores); | |
31768 | } | |
31769 | ||
31770 | offset += fp_reg_size; | |
31771 | } | |
31772 | } | |
31773 | ||
31774 | /* If we saved cr, restore it here. Just those that were used. */ | |
31775 | if (info->cr_save_p) | |
31776 | restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func); | |
31777 | ||
31778 | /* If this is V.4, unwind the stack pointer after all of the loads | |
31779 | have been done, or set up r11 if we are restoring fp out of line. */ | |
31780 | ptr_regno = 1; | |
31781 | if (!restoring_FPRs_inline) | |
31782 | { | |
31783 | bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0; | |
31784 | int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0); | |
31785 | ptr_regno = ptr_regno_for_savres (sel); | |
31786 | } | |
31787 | ||
31788 | insn = rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno); | |
31789 | if (REGNO (frame_reg_rtx) == ptr_regno) | |
31790 | frame_off = 0; | |
31791 | ||
31792 | if (insn && restoring_FPRs_inline) | |
31793 | { | |
31794 | if (cfa_restores) | |
31795 | { | |
31796 | REG_NOTES (insn) = cfa_restores; | |
31797 | cfa_restores = NULL_RTX; | |
31798 | } | |
31799 | add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx); | |
31800 | RTX_FRAME_RELATED_P (insn) = 1; | |
31801 | } | |
31802 | ||
31803 | if (crtl->calls_eh_return) | |
31804 | { | |
31805 | rtx sa = EH_RETURN_STACKADJ_RTX; | |
31806 | emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa)); | |
31807 | } | |
31808 | ||
31809 | if (!sibcall && restoring_FPRs_inline) | |
31810 | { | |
31811 | if (cfa_restores) | |
31812 | { | |
31813 | /* We can't hang the cfa_restores off a simple return, | |
31814 | since the shrink-wrap code sometimes uses an existing | |
31815 | return. This means there might be a path from | |
31816 | pre-prologue code to this return, and dwarf2cfi code | |
31817 | wants the eh_frame unwinder state to be the same on | |
31818 | all paths to any point. So we need to emit the | |
31819 | cfa_restores before the return. For -m64 we really | |
31820 | don't need epilogue cfa_restores at all, except for | |
31821 | this irritating dwarf2cfi with shrink-wrap | |
31822 | requirement; The stack red-zone means eh_frame info | |
31823 | from the prologue telling the unwinder to restore | |
31824 | from the stack is perfectly good right to the end of | |
31825 | the function. */ | |
31826 | emit_insn (gen_blockage ()); | |
31827 | emit_cfa_restores (cfa_restores); | |
31828 | cfa_restores = NULL_RTX; | |
31829 | } | |
31830 | ||
31831 | emit_jump_insn (targetm.gen_simple_return ()); | |
31832 | } | |
31833 | ||
31834 | if (!sibcall && !restoring_FPRs_inline) | |
31835 | { | |
31836 | bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0; | |
31837 | rtvec p = rtvec_alloc (3 + !!lr + 64 - info->first_fp_reg_save); | |
31838 | int elt = 0; | |
31839 | RTVEC_ELT (p, elt++) = ret_rtx; | |
31840 | if (lr) | |
31841 | RTVEC_ELT (p, elt++) | |
31842 | = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO)); | |
31843 | ||
31844 | /* We have to restore more than two FP registers, so branch to the | |
31845 | restore function. It will return to our caller. */ | |
31846 | int i; | |
31847 | int reg; | |
31848 | rtx sym; | |
31849 | ||
31850 | if (flag_shrink_wrap) | |
31851 | cfa_restores = add_crlr_cfa_restore (info, cfa_restores); | |
31852 | ||
31853 | sym = rs6000_savres_routine_sym (info, SAVRES_FPR | (lr ? SAVRES_LR : 0)); | |
31854 | RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, sym); | |
31855 | reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11; | |
31856 | RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg)); | |
31857 | ||
31858 | for (i = 0; i < 64 - info->first_fp_reg_save; i++) | |
31859 | { | |
31860 | rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i); | |
31861 | ||
31862 | RTVEC_ELT (p, elt++) | |
31863 | = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i); | |
31864 | if (flag_shrink_wrap) | |
31865 | cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); | |
31866 | } | |
31867 | ||
31868 | emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p)); | |
31869 | } | |
31870 | ||
31871 | if (cfa_restores) | |
31872 | { | |
31873 | if (sibcall) | |
31874 | /* Ensure the cfa_restores are hung off an insn that won't | |
31875 | be reordered above other restores. */ | |
31876 | emit_insn (gen_blockage ()); | |
31877 | ||
31878 | emit_cfa_restores (cfa_restores); | |
31879 | } | |
31880 | } | |
31881 | ||
31882 | /* Write function epilogue. */ | |
31883 | ||
31884 | static void | |
718e6d56 | 31885 | rs6000_output_function_epilogue (FILE *file) |
01e91138 | 31886 | { |
31887 | #if TARGET_MACHO | |
31888 | macho_branch_islands (); | |
31889 | ||
31890 | { | |
31891 | rtx_insn *insn = get_last_insn (); | |
31892 | rtx_insn *deleted_debug_label = NULL; | |
31893 | ||
31894 | /* Mach-O doesn't support labels at the end of objects, so if | |
31895 | it looks like we might want one, take special action. | |
31896 | ||
31897 | First, collect any sequence of deleted debug labels. */ | |
31898 | while (insn | |
31899 | && NOTE_P (insn) | |
31900 | && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL) | |
31901 | { | |
31902 | /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL | |
31903 | notes only, instead set their CODE_LABEL_NUMBER to -1, | |
31904 | otherwise there would be code generation differences | |
31905 | in between -g and -g0. */ | |
31906 | if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL) | |
31907 | deleted_debug_label = insn; | |
31908 | insn = PREV_INSN (insn); | |
31909 | } | |
31910 | ||
31911 | /* Second, if we have: | |
31912 | label: | |
31913 | barrier | |
31914 | then this needs to be detected, so skip past the barrier. */ | |
31915 | ||
31916 | if (insn && BARRIER_P (insn)) | |
31917 | insn = PREV_INSN (insn); | |
31918 | ||
31919 | /* Up to now we've only seen notes or barriers. */ | |
31920 | if (insn) | |
31921 | { | |
31922 | if (LABEL_P (insn) | |
31923 | || (NOTE_P (insn) | |
31924 | && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)) | |
31925 | /* Trailing label: <barrier>. */ | |
31926 | fputs ("\tnop\n", file); | |
31927 | else | |
31928 | { | |
31929 | /* Lastly, see if we have a completely empty function body. */ | |
31930 | while (insn && ! INSN_P (insn)) | |
31931 | insn = PREV_INSN (insn); | |
31932 | /* If we don't find any insns, we've got an empty function body; | |
31933 | I.e. completely empty - without a return or branch. This is | |
31934 | taken as the case where a function body has been removed | |
31935 | because it contains an inline __builtin_unreachable(). GCC | |
31936 | states that reaching __builtin_unreachable() means UB so we're | |
31937 | not obliged to do anything special; however, we want | |
31938 | non-zero-sized function bodies. To meet this, and help the | |
31939 | user out, let's trap the case. */ | |
31940 | if (insn == NULL) | |
31941 | fputs ("\ttrap\n", file); | |
31942 | } | |
31943 | } | |
31944 | else if (deleted_debug_label) | |
31945 | for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn)) | |
31946 | if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL) | |
31947 | CODE_LABEL_NUMBER (insn) = -1; | |
31948 | } | |
31949 | #endif | |
31950 | ||
31951 | /* Output a traceback table here. See /usr/include/sys/debug.h for info | |
31952 | on its format. | |
31953 | ||
31954 | We don't output a traceback table if -finhibit-size-directive was | |
31955 | used. The documentation for -finhibit-size-directive reads | |
31956 | ``don't output a @code{.size} assembler directive, or anything | |
31957 | else that would cause trouble if the function is split in the | |
31958 | middle, and the two halves are placed at locations far apart in | |
31959 | memory.'' The traceback table has this property, since it | |
31960 | includes the offset from the start of the function to the | |
31961 | traceback table itself. | |
31962 | ||
31963 | System V.4 Powerpc's (and the embedded ABI derived from it) use a | |
31964 | different traceback table. */ | |
31965 | if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) | |
31966 | && ! flag_inhibit_size_directive | |
31967 | && rs6000_traceback != traceback_none && !cfun->is_thunk) | |
31968 | { | |
31969 | const char *fname = NULL; | |
31970 | const char *language_string = lang_hooks.name; | |
31971 | int fixed_parms = 0, float_parms = 0, parm_info = 0; | |
31972 | int i; | |
31973 | int optional_tbtab; | |
31974 | rs6000_stack_t *info = rs6000_stack_info (); | |
31975 | ||
31976 | if (rs6000_traceback == traceback_full) | |
31977 | optional_tbtab = 1; | |
31978 | else if (rs6000_traceback == traceback_part) | |
31979 | optional_tbtab = 0; | |
31980 | else | |
31981 | optional_tbtab = !optimize_size && !TARGET_ELF; | |
31982 | ||
31983 | if (optional_tbtab) | |
31984 | { | |
31985 | fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0); | |
31986 | while (*fname == '.') /* V.4 encodes . in the name */ | |
31987 | fname++; | |
31988 | ||
31989 | /* Need label immediately before tbtab, so we can compute | |
31990 | its offset from the function start. */ | |
31991 | ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT"); | |
31992 | ASM_OUTPUT_LABEL (file, fname); | |
31993 | } | |
31994 | ||
31995 | /* The .tbtab pseudo-op can only be used for the first eight | |
31996 | expressions, since it can't handle the possibly variable | |
31997 | length fields that follow. However, if you omit the optional | |
31998 | fields, the assembler outputs zeros for all optional fields | |
31999 | anyways, giving each variable length field is minimum length | |
32000 | (as defined in sys/debug.h). Thus we can not use the .tbtab | |
32001 | pseudo-op at all. */ | |
32002 | ||
32003 | /* An all-zero word flags the start of the tbtab, for debuggers | |
32004 | that have to find it by searching forward from the entry | |
32005 | point or from the current pc. */ | |
32006 | fputs ("\t.long 0\n", file); | |
32007 | ||
32008 | /* Tbtab format type. Use format type 0. */ | |
32009 | fputs ("\t.byte 0,", file); | |
32010 | ||
32011 | /* Language type. Unfortunately, there does not seem to be any | |
32012 | official way to discover the language being compiled, so we | |
32013 | use language_string. | |
32014 | C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9. | |
32015 | Java is 13. Objective-C is 14. Objective-C++ isn't assigned | |
32016 | a number, so for now use 9. LTO, Go and JIT aren't assigned numbers | |
32017 | either, so for now use 0. */ | |
32018 | if (lang_GNU_C () | |
32019 | || ! strcmp (language_string, "GNU GIMPLE") | |
32020 | || ! strcmp (language_string, "GNU Go") | |
32021 | || ! strcmp (language_string, "libgccjit")) | |
32022 | i = 0; | |
32023 | else if (! strcmp (language_string, "GNU F77") | |
32024 | || lang_GNU_Fortran ()) | |
32025 | i = 1; | |
32026 | else if (! strcmp (language_string, "GNU Pascal")) | |
32027 | i = 2; | |
32028 | else if (! strcmp (language_string, "GNU Ada")) | |
32029 | i = 3; | |
32030 | else if (lang_GNU_CXX () | |
32031 | || ! strcmp (language_string, "GNU Objective-C++")) | |
32032 | i = 9; | |
32033 | else if (! strcmp (language_string, "GNU Java")) | |
32034 | i = 13; | |
32035 | else if (! strcmp (language_string, "GNU Objective-C")) | |
32036 | i = 14; | |
32037 | else | |
32038 | gcc_unreachable (); | |
32039 | fprintf (file, "%d,", i); | |
32040 | ||
32041 | /* 8 single bit fields: global linkage (not set for C extern linkage, | |
32042 | apparently a PL/I convention?), out-of-line epilogue/prologue, offset | |
32043 | from start of procedure stored in tbtab, internal function, function | |
32044 | has controlled storage, function has no toc, function uses fp, | |
32045 | function logs/aborts fp operations. */ | |
32046 | /* Assume that fp operations are used if any fp reg must be saved. */ | |
32047 | fprintf (file, "%d,", | |
32048 | (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1)); | |
32049 | ||
32050 | /* 6 bitfields: function is interrupt handler, name present in | |
32051 | proc table, function calls alloca, on condition directives | |
32052 | (controls stack walks, 3 bits), saves condition reg, saves | |
32053 | link reg. */ | |
32054 | /* The `function calls alloca' bit seems to be set whenever reg 31 is | |
32055 | set up as a frame pointer, even when there is no alloca call. */ | |
32056 | fprintf (file, "%d,", | |
32057 | ((optional_tbtab << 6) | |
32058 | | ((optional_tbtab & frame_pointer_needed) << 5) | |
32059 | | (info->cr_save_p << 1) | |
32060 | | (info->lr_save_p))); | |
32061 | ||
32062 | /* 3 bitfields: saves backchain, fixup code, number of fpr saved | |
32063 | (6 bits). */ | |
32064 | fprintf (file, "%d,", | |
32065 | (info->push_p << 7) | (64 - info->first_fp_reg_save)); | |
32066 | ||
32067 | /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */ | |
32068 | fprintf (file, "%d,", (32 - first_reg_to_save ())); | |
32069 | ||
32070 | if (optional_tbtab) | |
32071 | { | |
32072 | /* Compute the parameter info from the function decl argument | |
32073 | list. */ | |
32074 | tree decl; | |
32075 | int next_parm_info_bit = 31; | |
32076 | ||
32077 | for (decl = DECL_ARGUMENTS (current_function_decl); | |
32078 | decl; decl = DECL_CHAIN (decl)) | |
32079 | { | |
32080 | rtx parameter = DECL_INCOMING_RTL (decl); | |
32081 | machine_mode mode = GET_MODE (parameter); | |
32082 | ||
32083 | if (GET_CODE (parameter) == REG) | |
32084 | { | |
32085 | if (SCALAR_FLOAT_MODE_P (mode)) | |
32086 | { | |
32087 | int bits; | |
32088 | ||
32089 | float_parms++; | |
32090 | ||
32091 | switch (mode) | |
32092 | { | |
916ace94 | 32093 | case E_SFmode: |
32094 | case E_SDmode: | |
01e91138 | 32095 | bits = 0x2; |
32096 | break; | |
32097 | ||
916ace94 | 32098 | case E_DFmode: |
32099 | case E_DDmode: | |
32100 | case E_TFmode: | |
32101 | case E_TDmode: | |
32102 | case E_IFmode: | |
32103 | case E_KFmode: | |
01e91138 | 32104 | bits = 0x3; |
32105 | break; | |
32106 | ||
32107 | default: | |
32108 | gcc_unreachable (); | |
32109 | } | |
32110 | ||
32111 | /* If only one bit will fit, don't or in this entry. */ | |
32112 | if (next_parm_info_bit > 0) | |
32113 | parm_info |= (bits << (next_parm_info_bit - 1)); | |
32114 | next_parm_info_bit -= 2; | |
32115 | } | |
32116 | else | |
32117 | { | |
32118 | fixed_parms += ((GET_MODE_SIZE (mode) | |
32119 | + (UNITS_PER_WORD - 1)) | |
32120 | / UNITS_PER_WORD); | |
32121 | next_parm_info_bit -= 1; | |
32122 | } | |
32123 | } | |
32124 | } | |
32125 | } | |
32126 | ||
32127 | /* Number of fixed point parameters. */ | |
32128 | /* This is actually the number of words of fixed point parameters; thus | |
32129 | an 8 byte struct counts as 2; and thus the maximum value is 8. */ | |
32130 | fprintf (file, "%d,", fixed_parms); | |
32131 | ||
32132 | /* 2 bitfields: number of floating point parameters (7 bits), parameters | |
32133 | all on stack. */ | |
32134 | /* This is actually the number of fp registers that hold parameters; | |
32135 | and thus the maximum value is 13. */ | |
32136 | /* Set parameters on stack bit if parameters are not in their original | |
32137 | registers, regardless of whether they are on the stack? Xlc | |
32138 | seems to set the bit when not optimizing. */ | |
32139 | fprintf (file, "%d\n", ((float_parms << 1) | (! optimize))); | |
32140 | ||
32141 | if (optional_tbtab) | |
32142 | { | |
32143 | /* Optional fields follow. Some are variable length. */ | |
32144 | ||
32145 | /* Parameter types, left adjusted bit fields: 0 fixed, 10 single | |
32146 | float, 11 double float. */ | |
32147 | /* There is an entry for each parameter in a register, in the order | |
32148 | that they occur in the parameter list. Any intervening arguments | |
32149 | on the stack are ignored. If the list overflows a long (max | |
32150 | possible length 34 bits) then completely leave off all elements | |
32151 | that don't fit. */ | |
32152 | /* Only emit this long if there was at least one parameter. */ | |
32153 | if (fixed_parms || float_parms) | |
32154 | fprintf (file, "\t.long %d\n", parm_info); | |
32155 | ||
32156 | /* Offset from start of code to tb table. */ | |
32157 | fputs ("\t.long ", file); | |
32158 | ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT"); | |
32159 | RS6000_OUTPUT_BASENAME (file, fname); | |
32160 | putc ('-', file); | |
32161 | rs6000_output_function_entry (file, fname); | |
32162 | putc ('\n', file); | |
32163 | ||
32164 | /* Interrupt handler mask. */ | |
32165 | /* Omit this long, since we never set the interrupt handler bit | |
32166 | above. */ | |
32167 | ||
32168 | /* Number of CTL (controlled storage) anchors. */ | |
32169 | /* Omit this long, since the has_ctl bit is never set above. */ | |
32170 | ||
32171 | /* Displacement into stack of each CTL anchor. */ | |
32172 | /* Omit this list of longs, because there are no CTL anchors. */ | |
32173 | ||
32174 | /* Length of function name. */ | |
32175 | if (*fname == '*') | |
32176 | ++fname; | |
32177 | fprintf (file, "\t.short %d\n", (int) strlen (fname)); | |
32178 | ||
32179 | /* Function name. */ | |
32180 | assemble_string (fname, strlen (fname)); | |
32181 | ||
32182 | /* Register for alloca automatic storage; this is always reg 31. | |
32183 | Only emit this if the alloca bit was set above. */ | |
32184 | if (frame_pointer_needed) | |
32185 | fputs ("\t.byte 31\n", file); | |
32186 | ||
32187 | fputs ("\t.align 2\n", file); | |
32188 | } | |
32189 | } | |
32190 | ||
32191 | /* Arrange to define .LCTOC1 label, if not already done. */ | |
32192 | if (need_toc_init) | |
32193 | { | |
32194 | need_toc_init = 0; | |
32195 | if (!toc_initialized) | |
32196 | { | |
32197 | switch_to_section (toc_section); | |
32198 | switch_to_section (current_function_section ()); | |
32199 | } | |
32200 | } | |
32201 | } | |
32202 | ||
32203 | /* -fsplit-stack support. */ | |
32204 | ||
32205 | /* A SYMBOL_REF for __morestack. */ | |
32206 | static GTY(()) rtx morestack_ref; | |
32207 | ||
32208 | static rtx | |
32209 | gen_add3_const (rtx rt, rtx ra, long c) | |
32210 | { | |
32211 | if (TARGET_64BIT) | |
32212 | return gen_adddi3 (rt, ra, GEN_INT (c)); | |
32213 | else | |
32214 | return gen_addsi3 (rt, ra, GEN_INT (c)); | |
32215 | } | |
32216 | ||
32217 | /* Emit -fsplit-stack prologue, which goes before the regular function | |
32218 | prologue (at local entry point in the case of ELFv2). */ | |
32219 | ||
32220 | void | |
32221 | rs6000_expand_split_stack_prologue (void) | |
32222 | { | |
32223 | rs6000_stack_t *info = rs6000_stack_info (); | |
32224 | unsigned HOST_WIDE_INT allocate; | |
32225 | long alloc_hi, alloc_lo; | |
32226 | rtx r0, r1, r12, lr, ok_label, compare, jump, call_fusage; | |
32227 | rtx_insn *insn; | |
32228 | ||
32229 | gcc_assert (flag_split_stack && reload_completed); | |
32230 | ||
32231 | if (!info->push_p) | |
32232 | return; | |
32233 | ||
32234 | if (global_regs[29]) | |
32235 | { | |
32236 | error ("-fsplit-stack uses register r29"); | |
32237 | inform (DECL_SOURCE_LOCATION (global_regs_decl[29]), | |
32238 | "conflicts with %qD", global_regs_decl[29]); | |
32239 | } | |
32240 | ||
32241 | allocate = info->total_size; | |
32242 | if (allocate > (unsigned HOST_WIDE_INT) 1 << 31) | |
32243 | { | |
32244 | sorry ("Stack frame larger than 2G is not supported for -fsplit-stack"); | |
32245 | return; | |
32246 | } | |
32247 | if (morestack_ref == NULL_RTX) | |
32248 | { | |
32249 | morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack"); | |
32250 | SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL | |
32251 | | SYMBOL_FLAG_FUNCTION); | |
32252 | } | |
32253 | ||
32254 | r0 = gen_rtx_REG (Pmode, 0); | |
32255 | r1 = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); | |
32256 | r12 = gen_rtx_REG (Pmode, 12); | |
32257 | emit_insn (gen_load_split_stack_limit (r0)); | |
32258 | /* Always emit two insns here to calculate the requested stack, | |
32259 | so that the linker can edit them when adjusting size for calling | |
32260 | non-split-stack code. */ | |
32261 | alloc_hi = (-allocate + 0x8000) & ~0xffffL; | |
32262 | alloc_lo = -allocate - alloc_hi; | |
32263 | if (alloc_hi != 0) | |
32264 | { | |
32265 | emit_insn (gen_add3_const (r12, r1, alloc_hi)); | |
32266 | if (alloc_lo != 0) | |
32267 | emit_insn (gen_add3_const (r12, r12, alloc_lo)); | |
32268 | else | |
32269 | emit_insn (gen_nop ()); | |
32270 | } | |
32271 | else | |
32272 | { | |
32273 | emit_insn (gen_add3_const (r12, r1, alloc_lo)); | |
32274 | emit_insn (gen_nop ()); | |
32275 | } | |
32276 | ||
32277 | compare = gen_rtx_REG (CCUNSmode, CR7_REGNO); | |
32278 | emit_insn (gen_rtx_SET (compare, gen_rtx_COMPARE (CCUNSmode, r12, r0))); | |
32279 | ok_label = gen_label_rtx (); | |
32280 | jump = gen_rtx_IF_THEN_ELSE (VOIDmode, | |
32281 | gen_rtx_GEU (VOIDmode, compare, const0_rtx), | |
32282 | gen_rtx_LABEL_REF (VOIDmode, ok_label), | |
32283 | pc_rtx); | |
32284 | insn = emit_jump_insn (gen_rtx_SET (pc_rtx, jump)); | |
32285 | JUMP_LABEL (insn) = ok_label; | |
32286 | /* Mark the jump as very likely to be taken. */ | |
61cb1816 | 32287 | add_reg_br_prob_note (insn, profile_probability::very_likely ()); |
01e91138 | 32288 | |
32289 | lr = gen_rtx_REG (Pmode, LR_REGNO); | |
32290 | insn = emit_move_insn (r0, lr); | |
32291 | RTX_FRAME_RELATED_P (insn) = 1; | |
32292 | insn = emit_insn (gen_frame_store (r0, r1, info->lr_save_offset)); | |
32293 | RTX_FRAME_RELATED_P (insn) = 1; | |
32294 | ||
32295 | insn = emit_call_insn (gen_call (gen_rtx_MEM (SImode, morestack_ref), | |
32296 | const0_rtx, const0_rtx)); | |
32297 | call_fusage = NULL_RTX; | |
32298 | use_reg (&call_fusage, r12); | |
32299 | /* Say the call uses r0, even though it doesn't, to stop regrename | |
32300 | from twiddling with the insns saving lr, trashing args for cfun. | |
32301 | The insns restoring lr are similarly protected by making | |
32302 | split_stack_return use r0. */ | |
32303 | use_reg (&call_fusage, r0); | |
32304 | add_function_usage_to (insn, call_fusage); | |
32305 | /* Indicate that this function can't jump to non-local gotos. */ | |
32306 | make_reg_eh_region_note_nothrow_nononlocal (insn); | |
32307 | emit_insn (gen_frame_load (r0, r1, info->lr_save_offset)); | |
32308 | insn = emit_move_insn (lr, r0); | |
32309 | add_reg_note (insn, REG_CFA_RESTORE, lr); | |
32310 | RTX_FRAME_RELATED_P (insn) = 1; | |
32311 | emit_insn (gen_split_stack_return ()); | |
32312 | ||
32313 | emit_label (ok_label); | |
32314 | LABEL_NUSES (ok_label) = 1; | |
32315 | } | |
32316 | ||
32317 | /* Return the internal arg pointer used for function incoming | |
32318 | arguments. When -fsplit-stack, the arg pointer is r12 so we need | |
32319 | to copy it to a pseudo in order for it to be preserved over calls | |
32320 | and suchlike. We'd really like to use a pseudo here for the | |
32321 | internal arg pointer but data-flow analysis is not prepared to | |
32322 | accept pseudos as live at the beginning of a function. */ | |
32323 | ||
32324 | static rtx | |
32325 | rs6000_internal_arg_pointer (void) | |
32326 | { | |
32327 | if (flag_split_stack | |
32328 | && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl)) | |
32329 | == NULL)) | |
32330 | ||
32331 | { | |
32332 | if (cfun->machine->split_stack_arg_pointer == NULL_RTX) | |
32333 | { | |
32334 | rtx pat; | |
32335 | ||
32336 | cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode); | |
32337 | REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1; | |
32338 | ||
32339 | /* Put the pseudo initialization right after the note at the | |
32340 | beginning of the function. */ | |
32341 | pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer, | |
32342 | gen_rtx_REG (Pmode, 12)); | |
32343 | push_topmost_sequence (); | |
32344 | emit_insn_after (pat, get_insns ()); | |
32345 | pop_topmost_sequence (); | |
32346 | } | |
32347 | return plus_constant (Pmode, cfun->machine->split_stack_arg_pointer, | |
32348 | FIRST_PARM_OFFSET (current_function_decl)); | |
32349 | } | |
32350 | return virtual_incoming_args_rtx; | |
32351 | } | |
32352 | ||
32353 | /* We may have to tell the dataflow pass that the split stack prologue | |
32354 | is initializing a register. */ | |
32355 | ||
32356 | static void | |
32357 | rs6000_live_on_entry (bitmap regs) | |
32358 | { | |
32359 | if (flag_split_stack) | |
32360 | bitmap_set_bit (regs, 12); | |
32361 | } | |
32362 | ||
32363 | /* Emit -fsplit-stack dynamic stack allocation space check. */ | |
32364 | ||
32365 | void | |
32366 | rs6000_split_stack_space_check (rtx size, rtx label) | |
32367 | { | |
32368 | rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); | |
32369 | rtx limit = gen_reg_rtx (Pmode); | |
32370 | rtx requested = gen_reg_rtx (Pmode); | |
32371 | rtx cmp = gen_reg_rtx (CCUNSmode); | |
32372 | rtx jump; | |
32373 | ||
32374 | emit_insn (gen_load_split_stack_limit (limit)); | |
32375 | if (CONST_INT_P (size)) | |
32376 | emit_insn (gen_add3_insn (requested, sp, GEN_INT (-INTVAL (size)))); | |
32377 | else | |
32378 | { | |
32379 | size = force_reg (Pmode, size); | |
32380 | emit_move_insn (requested, gen_rtx_MINUS (Pmode, sp, size)); | |
32381 | } | |
32382 | emit_insn (gen_rtx_SET (cmp, gen_rtx_COMPARE (CCUNSmode, requested, limit))); | |
32383 | jump = gen_rtx_IF_THEN_ELSE (VOIDmode, | |
32384 | gen_rtx_GEU (VOIDmode, cmp, const0_rtx), | |
32385 | gen_rtx_LABEL_REF (VOIDmode, label), | |
32386 | pc_rtx); | |
32387 | jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump)); | |
32388 | JUMP_LABEL (jump) = label; | |
32389 | } | |
32390 | \f | |
32391 | /* A C compound statement that outputs the assembler code for a thunk | |
32392 | function, used to implement C++ virtual function calls with | |
32393 | multiple inheritance. The thunk acts as a wrapper around a virtual | |
32394 | function, adjusting the implicit object parameter before handing | |
32395 | control off to the real function. | |
32396 | ||
32397 | First, emit code to add the integer DELTA to the location that | |
32398 | contains the incoming first argument. Assume that this argument | |
32399 | contains a pointer, and is the one used to pass the `this' pointer | |
32400 | in C++. This is the incoming argument *before* the function | |
32401 | prologue, e.g. `%o0' on a sparc. The addition must preserve the | |
32402 | values of all other incoming arguments. | |
32403 | ||
32404 | After the addition, emit code to jump to FUNCTION, which is a | |
32405 | `FUNCTION_DECL'. This is a direct pure jump, not a call, and does | |
32406 | not touch the return address. Hence returning from FUNCTION will | |
32407 | return to whoever called the current `thunk'. | |
32408 | ||
32409 | The effect must be as if FUNCTION had been called directly with the | |
32410 | adjusted first argument. This macro is responsible for emitting | |
32411 | all of the code for a thunk function; output_function_prologue() | |
32412 | and output_function_epilogue() are not invoked. | |
32413 | ||
32414 | The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already | |
32415 | been extracted from it.) It might possibly be useful on some | |
32416 | targets, but probably not. | |
32417 | ||
32418 | If you do not define this macro, the target-independent code in the | |
32419 | C++ frontend will generate a less efficient heavyweight thunk that | |
32420 | calls FUNCTION instead of jumping to it. The generic approach does | |
32421 | not support varargs. */ | |
32422 | ||
32423 | static void | |
32424 | rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED, | |
32425 | HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, | |
32426 | tree function) | |
32427 | { | |
32428 | rtx this_rtx, funexp; | |
32429 | rtx_insn *insn; | |
32430 | ||
32431 | reload_completed = 1; | |
32432 | epilogue_completed = 1; | |
32433 | ||
32434 | /* Mark the end of the (empty) prologue. */ | |
32435 | emit_note (NOTE_INSN_PROLOGUE_END); | |
32436 | ||
32437 | /* Find the "this" pointer. If the function returns a structure, | |
32438 | the structure return pointer is in r3. */ | |
32439 | if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)) | |
32440 | this_rtx = gen_rtx_REG (Pmode, 4); | |
32441 | else | |
32442 | this_rtx = gen_rtx_REG (Pmode, 3); | |
32443 | ||
32444 | /* Apply the constant offset, if required. */ | |
32445 | if (delta) | |
32446 | emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta))); | |
32447 | ||
32448 | /* Apply the offset from the vtable, if required. */ | |
32449 | if (vcall_offset) | |
32450 | { | |
32451 | rtx vcall_offset_rtx = GEN_INT (vcall_offset); | |
32452 | rtx tmp = gen_rtx_REG (Pmode, 12); | |
32453 | ||
32454 | emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx)); | |
32455 | if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000) | |
32456 | { | |
32457 | emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx)); | |
32458 | emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp)); | |
32459 | } | |
32460 | else | |
32461 | { | |
32462 | rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx); | |
32463 | ||
32464 | emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc)); | |
32465 | } | |
32466 | emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp)); | |
32467 | } | |
32468 | ||
32469 | /* Generate a tail call to the target function. */ | |
32470 | if (!TREE_USED (function)) | |
32471 | { | |
32472 | assemble_external (function); | |
32473 | TREE_USED (function) = 1; | |
32474 | } | |
32475 | funexp = XEXP (DECL_RTL (function), 0); | |
32476 | funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); | |
32477 | ||
32478 | #if TARGET_MACHO | |
32479 | if (MACHOPIC_INDIRECT) | |
32480 | funexp = machopic_indirect_call_target (funexp); | |
32481 | #endif | |
32482 | ||
32483 | /* gen_sibcall expects reload to convert scratch pseudo to LR so we must | |
32484 | generate sibcall RTL explicitly. */ | |
32485 | insn = emit_call_insn ( | |
32486 | gen_rtx_PARALLEL (VOIDmode, | |
32487 | gen_rtvec (3, | |
32488 | gen_rtx_CALL (VOIDmode, | |
32489 | funexp, const0_rtx), | |
32490 | gen_rtx_USE (VOIDmode, const0_rtx), | |
32491 | simple_return_rtx))); | |
32492 | SIBLING_CALL_P (insn) = 1; | |
32493 | emit_barrier (); | |
32494 | ||
32495 | /* Run just enough of rest_of_compilation to get the insns emitted. | |
32496 | There's not really enough bulk here to make other passes such as | |
32497 | instruction scheduling worth while. Note that use_thunk calls | |
32498 | assemble_start_function and assemble_end_function. */ | |
32499 | insn = get_insns (); | |
32500 | shorten_branches (insn); | |
32501 | final_start_function (insn, file, 1); | |
32502 | final (insn, file, 1); | |
32503 | final_end_function (); | |
32504 | ||
32505 | reload_completed = 0; | |
32506 | epilogue_completed = 0; | |
32507 | } | |
32508 | \f | |
32509 | /* A quick summary of the various types of 'constant-pool tables' | |
32510 | under PowerPC: | |
32511 | ||
32512 | Target Flags Name One table per | |
32513 | AIX (none) AIX TOC object file | |
32514 | AIX -mfull-toc AIX TOC object file | |
32515 | AIX -mminimal-toc AIX minimal TOC translation unit | |
32516 | SVR4/EABI (none) SVR4 SDATA object file | |
32517 | SVR4/EABI -fpic SVR4 pic object file | |
32518 | SVR4/EABI -fPIC SVR4 PIC translation unit | |
32519 | SVR4/EABI -mrelocatable EABI TOC function | |
32520 | SVR4/EABI -maix AIX TOC object file | |
32521 | SVR4/EABI -maix -mminimal-toc | |
32522 | AIX minimal TOC translation unit | |
32523 | ||
32524 | Name Reg. Set by entries contains: | |
32525 | made by addrs? fp? sum? | |
32526 | ||
32527 | AIX TOC 2 crt0 as Y option option | |
32528 | AIX minimal TOC 30 prolog gcc Y Y option | |
32529 | SVR4 SDATA 13 crt0 gcc N Y N | |
32530 | SVR4 pic 30 prolog ld Y not yet N | |
32531 | SVR4 PIC 30 prolog gcc Y option option | |
32532 | EABI TOC 30 prolog gcc Y option option | |
32533 | ||
32534 | */ | |
32535 | ||
32536 | /* Hash functions for the hash table. */ | |
32537 | ||
32538 | static unsigned | |
32539 | rs6000_hash_constant (rtx k) | |
32540 | { | |
32541 | enum rtx_code code = GET_CODE (k); | |
32542 | machine_mode mode = GET_MODE (k); | |
32543 | unsigned result = (code << 3) ^ mode; | |
32544 | const char *format; | |
32545 | int flen, fidx; | |
32546 | ||
32547 | format = GET_RTX_FORMAT (code); | |
32548 | flen = strlen (format); | |
32549 | fidx = 0; | |
32550 | ||
32551 | switch (code) | |
32552 | { | |
32553 | case LABEL_REF: | |
32554 | return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0)); | |
32555 | ||
32556 | case CONST_WIDE_INT: | |
32557 | { | |
32558 | int i; | |
32559 | flen = CONST_WIDE_INT_NUNITS (k); | |
32560 | for (i = 0; i < flen; i++) | |
32561 | result = result * 613 + CONST_WIDE_INT_ELT (k, i); | |
32562 | return result; | |
32563 | } | |
32564 | ||
32565 | case CONST_DOUBLE: | |
32566 | if (mode != VOIDmode) | |
32567 | return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result; | |
32568 | flen = 2; | |
32569 | break; | |
32570 | ||
32571 | case CODE_LABEL: | |
32572 | fidx = 3; | |
32573 | break; | |
32574 | ||
32575 | default: | |
32576 | break; | |
32577 | } | |
32578 | ||
32579 | for (; fidx < flen; fidx++) | |
32580 | switch (format[fidx]) | |
32581 | { | |
32582 | case 's': | |
32583 | { | |
32584 | unsigned i, len; | |
32585 | const char *str = XSTR (k, fidx); | |
32586 | len = strlen (str); | |
32587 | result = result * 613 + len; | |
32588 | for (i = 0; i < len; i++) | |
32589 | result = result * 613 + (unsigned) str[i]; | |
32590 | break; | |
32591 | } | |
32592 | case 'u': | |
32593 | case 'e': | |
32594 | result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx)); | |
32595 | break; | |
32596 | case 'i': | |
32597 | case 'n': | |
32598 | result = result * 613 + (unsigned) XINT (k, fidx); | |
32599 | break; | |
32600 | case 'w': | |
32601 | if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT)) | |
32602 | result = result * 613 + (unsigned) XWINT (k, fidx); | |
32603 | else | |
32604 | { | |
32605 | size_t i; | |
32606 | for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++) | |
32607 | result = result * 613 + (unsigned) (XWINT (k, fidx) | |
32608 | >> CHAR_BIT * i); | |
32609 | } | |
32610 | break; | |
32611 | case '0': | |
32612 | break; | |
32613 | default: | |
32614 | gcc_unreachable (); | |
32615 | } | |
32616 | ||
32617 | return result; | |
32618 | } | |
32619 | ||
32620 | hashval_t | |
32621 | toc_hasher::hash (toc_hash_struct *thc) | |
32622 | { | |
32623 | return rs6000_hash_constant (thc->key) ^ thc->key_mode; | |
32624 | } | |
32625 | ||
32626 | /* Compare H1 and H2 for equivalence. */ | |
32627 | ||
32628 | bool | |
32629 | toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2) | |
32630 | { | |
32631 | rtx r1 = h1->key; | |
32632 | rtx r2 = h2->key; | |
32633 | ||
32634 | if (h1->key_mode != h2->key_mode) | |
32635 | return 0; | |
32636 | ||
32637 | return rtx_equal_p (r1, r2); | |
32638 | } | |
32639 | ||
32640 | /* These are the names given by the C++ front-end to vtables, and | |
32641 | vtable-like objects. Ideally, this logic should not be here; | |
32642 | instead, there should be some programmatic way of inquiring as | |
32643 | to whether or not an object is a vtable. */ | |
32644 | ||
32645 | #define VTABLE_NAME_P(NAME) \ | |
32646 | (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \ | |
32647 | || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \ | |
32648 | || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \ | |
32649 | || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \ | |
32650 | || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0) | |
32651 | ||
32652 | #ifdef NO_DOLLAR_IN_LABEL | |
32653 | /* Return a GGC-allocated character string translating dollar signs in | |
32654 | input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */ | |
32655 | ||
32656 | const char * | |
32657 | rs6000_xcoff_strip_dollar (const char *name) | |
32658 | { | |
32659 | char *strip, *p; | |
32660 | const char *q; | |
32661 | size_t len; | |
32662 | ||
32663 | q = (const char *) strchr (name, '$'); | |
32664 | ||
32665 | if (q == 0 || q == name) | |
32666 | return name; | |
32667 | ||
32668 | len = strlen (name); | |
32669 | strip = XALLOCAVEC (char, len + 1); | |
32670 | strcpy (strip, name); | |
32671 | p = strip + (q - name); | |
32672 | while (p) | |
32673 | { | |
32674 | *p = '_'; | |
32675 | p = strchr (p + 1, '$'); | |
32676 | } | |
32677 | ||
32678 | return ggc_alloc_string (strip, len); | |
32679 | } | |
32680 | #endif | |
32681 | ||
32682 | void | |
32683 | rs6000_output_symbol_ref (FILE *file, rtx x) | |
32684 | { | |
32685 | const char *name = XSTR (x, 0); | |
32686 | ||
32687 | /* Currently C++ toc references to vtables can be emitted before it | |
32688 | is decided whether the vtable is public or private. If this is | |
32689 | the case, then the linker will eventually complain that there is | |
32690 | a reference to an unknown section. Thus, for vtables only, | |
32691 | we emit the TOC reference to reference the identifier and not the | |
32692 | symbol. */ | |
32693 | if (VTABLE_NAME_P (name)) | |
32694 | { | |
32695 | RS6000_OUTPUT_BASENAME (file, name); | |
32696 | } | |
32697 | else | |
32698 | assemble_name (file, name); | |
32699 | } | |
32700 | ||
32701 | /* Output a TOC entry. We derive the entry name from what is being | |
32702 | written. */ | |
32703 | ||
32704 | void | |
32705 | output_toc (FILE *file, rtx x, int labelno, machine_mode mode) | |
32706 | { | |
32707 | char buf[256]; | |
32708 | const char *name = buf; | |
32709 | rtx base = x; | |
32710 | HOST_WIDE_INT offset = 0; | |
32711 | ||
32712 | gcc_assert (!TARGET_NO_TOC); | |
32713 | ||
32714 | /* When the linker won't eliminate them, don't output duplicate | |
32715 | TOC entries (this happens on AIX if there is any kind of TOC, | |
32716 | and on SVR4 under -fPIC or -mrelocatable). Don't do this for | |
32717 | CODE_LABELs. */ | |
32718 | if (TARGET_TOC && GET_CODE (x) != LABEL_REF) | |
32719 | { | |
32720 | struct toc_hash_struct *h; | |
32721 | ||
32722 | /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE | |
32723 | time because GGC is not initialized at that point. */ | |
32724 | if (toc_hash_table == NULL) | |
32725 | toc_hash_table = hash_table<toc_hasher>::create_ggc (1021); | |
32726 | ||
32727 | h = ggc_alloc<toc_hash_struct> (); | |
32728 | h->key = x; | |
32729 | h->key_mode = mode; | |
32730 | h->labelno = labelno; | |
32731 | ||
32732 | toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT); | |
32733 | if (*found == NULL) | |
32734 | *found = h; | |
32735 | else /* This is indeed a duplicate. | |
32736 | Set this label equal to that label. */ | |
32737 | { | |
32738 | fputs ("\t.set ", file); | |
32739 | ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC"); | |
32740 | fprintf (file, "%d,", labelno); | |
32741 | ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC"); | |
32742 | fprintf (file, "%d\n", ((*found)->labelno)); | |
32743 | ||
32744 | #ifdef HAVE_AS_TLS | |
32745 | if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF | |
32746 | && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC | |
32747 | || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)) | |
32748 | { | |
32749 | fputs ("\t.set ", file); | |
32750 | ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM"); | |
32751 | fprintf (file, "%d,", labelno); | |
32752 | ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM"); | |
32753 | fprintf (file, "%d\n", ((*found)->labelno)); | |
32754 | } | |
32755 | #endif | |
32756 | return; | |
32757 | } | |
32758 | } | |
32759 | ||
32760 | /* If we're going to put a double constant in the TOC, make sure it's | |
32761 | aligned properly when strict alignment is on. */ | |
32762 | if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x)) | |
32763 | && STRICT_ALIGNMENT | |
32764 | && GET_MODE_BITSIZE (mode) >= 64 | |
32765 | && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) { | |
32766 | ASM_OUTPUT_ALIGN (file, 3); | |
32767 | } | |
32768 | ||
32769 | (*targetm.asm_out.internal_label) (file, "LC", labelno); | |
32770 | ||
32771 | /* Handle FP constants specially. Note that if we have a minimal | |
32772 | TOC, things we put here aren't actually in the TOC, so we can allow | |
32773 | FP constants. */ | |
32774 | if (GET_CODE (x) == CONST_DOUBLE && | |
32775 | (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode | |
32776 | || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode)) | |
32777 | { | |
32778 | long k[4]; | |
32779 | ||
32780 | if (DECIMAL_FLOAT_MODE_P (GET_MODE (x))) | |
32781 | REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k); | |
32782 | else | |
32783 | REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k); | |
32784 | ||
32785 | if (TARGET_64BIT) | |
32786 | { | |
32787 | if (TARGET_ELF || TARGET_MINIMAL_TOC) | |
32788 | fputs (DOUBLE_INT_ASM_OP, file); | |
32789 | else | |
32790 | fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],", | |
32791 | k[0] & 0xffffffff, k[1] & 0xffffffff, | |
32792 | k[2] & 0xffffffff, k[3] & 0xffffffff); | |
32793 | fprintf (file, "0x%lx%08lx,0x%lx%08lx\n", | |
32794 | k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff, | |
32795 | k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff, | |
32796 | k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff, | |
32797 | k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff); | |
32798 | return; | |
32799 | } | |
32800 | else | |
32801 | { | |
32802 | if (TARGET_ELF || TARGET_MINIMAL_TOC) | |
32803 | fputs ("\t.long ", file); | |
32804 | else | |
32805 | fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],", | |
32806 | k[0] & 0xffffffff, k[1] & 0xffffffff, | |
32807 | k[2] & 0xffffffff, k[3] & 0xffffffff); | |
32808 | fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n", | |
32809 | k[0] & 0xffffffff, k[1] & 0xffffffff, | |
32810 | k[2] & 0xffffffff, k[3] & 0xffffffff); | |
32811 | return; | |
32812 | } | |
32813 | } | |
32814 | else if (GET_CODE (x) == CONST_DOUBLE && | |
32815 | (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode)) | |
32816 | { | |
32817 | long k[2]; | |
32818 | ||
32819 | if (DECIMAL_FLOAT_MODE_P (GET_MODE (x))) | |
32820 | REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k); | |
32821 | else | |
32822 | REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k); | |
32823 | ||
32824 | if (TARGET_64BIT) | |
32825 | { | |
32826 | if (TARGET_ELF || TARGET_MINIMAL_TOC) | |
32827 | fputs (DOUBLE_INT_ASM_OP, file); | |
32828 | else | |
32829 | fprintf (file, "\t.tc FD_%lx_%lx[TC],", | |
32830 | k[0] & 0xffffffff, k[1] & 0xffffffff); | |
32831 | fprintf (file, "0x%lx%08lx\n", | |
32832 | k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff, | |
32833 | k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff); | |
32834 | return; | |
32835 | } | |
32836 | else | |
32837 | { | |
32838 | if (TARGET_ELF || TARGET_MINIMAL_TOC) | |
32839 | fputs ("\t.long ", file); | |
32840 | else | |
32841 | fprintf (file, "\t.tc FD_%lx_%lx[TC],", | |
32842 | k[0] & 0xffffffff, k[1] & 0xffffffff); | |
32843 | fprintf (file, "0x%lx,0x%lx\n", | |
32844 | k[0] & 0xffffffff, k[1] & 0xffffffff); | |
32845 | return; | |
32846 | } | |
32847 | } | |
32848 | else if (GET_CODE (x) == CONST_DOUBLE && | |
32849 | (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode)) | |
32850 | { | |
32851 | long l; | |
32852 | ||
32853 | if (DECIMAL_FLOAT_MODE_P (GET_MODE (x))) | |
32854 | REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l); | |
32855 | else | |
32856 | REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l); | |
32857 | ||
32858 | if (TARGET_64BIT) | |
32859 | { | |
32860 | if (TARGET_ELF || TARGET_MINIMAL_TOC) | |
32861 | fputs (DOUBLE_INT_ASM_OP, file); | |
32862 | else | |
32863 | fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff); | |
32864 | if (WORDS_BIG_ENDIAN) | |
32865 | fprintf (file, "0x%lx00000000\n", l & 0xffffffff); | |
32866 | else | |
32867 | fprintf (file, "0x%lx\n", l & 0xffffffff); | |
32868 | return; | |
32869 | } | |
32870 | else | |
32871 | { | |
32872 | if (TARGET_ELF || TARGET_MINIMAL_TOC) | |
32873 | fputs ("\t.long ", file); | |
32874 | else | |
32875 | fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff); | |
32876 | fprintf (file, "0x%lx\n", l & 0xffffffff); | |
32877 | return; | |
32878 | } | |
32879 | } | |
32880 | else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT) | |
32881 | { | |
32882 | unsigned HOST_WIDE_INT low; | |
32883 | HOST_WIDE_INT high; | |
32884 | ||
32885 | low = INTVAL (x) & 0xffffffff; | |
32886 | high = (HOST_WIDE_INT) INTVAL (x) >> 32; | |
32887 | ||
32888 | /* TOC entries are always Pmode-sized, so when big-endian | |
32889 | smaller integer constants in the TOC need to be padded. | |
32890 | (This is still a win over putting the constants in | |
32891 | a separate constant pool, because then we'd have | |
32892 | to have both a TOC entry _and_ the actual constant.) | |
32893 | ||
32894 | For a 32-bit target, CONST_INT values are loaded and shifted | |
32895 | entirely within `low' and can be stored in one TOC entry. */ | |
32896 | ||
32897 | /* It would be easy to make this work, but it doesn't now. */ | |
32898 | gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode)); | |
32899 | ||
32900 | if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode)) | |
32901 | { | |
32902 | low |= high << 32; | |
32903 | low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode); | |
32904 | high = (HOST_WIDE_INT) low >> 32; | |
32905 | low &= 0xffffffff; | |
32906 | } | |
32907 | ||
32908 | if (TARGET_64BIT) | |
32909 | { | |
32910 | if (TARGET_ELF || TARGET_MINIMAL_TOC) | |
32911 | fputs (DOUBLE_INT_ASM_OP, file); | |
32912 | else | |
32913 | fprintf (file, "\t.tc ID_%lx_%lx[TC],", | |
32914 | (long) high & 0xffffffff, (long) low & 0xffffffff); | |
32915 | fprintf (file, "0x%lx%08lx\n", | |
32916 | (long) high & 0xffffffff, (long) low & 0xffffffff); | |
32917 | return; | |
32918 | } | |
32919 | else | |
32920 | { | |
32921 | if (POINTER_SIZE < GET_MODE_BITSIZE (mode)) | |
32922 | { | |
32923 | if (TARGET_ELF || TARGET_MINIMAL_TOC) | |
32924 | fputs ("\t.long ", file); | |
32925 | else | |
32926 | fprintf (file, "\t.tc ID_%lx_%lx[TC],", | |
32927 | (long) high & 0xffffffff, (long) low & 0xffffffff); | |
32928 | fprintf (file, "0x%lx,0x%lx\n", | |
32929 | (long) high & 0xffffffff, (long) low & 0xffffffff); | |
32930 | } | |
32931 | else | |
32932 | { | |
32933 | if (TARGET_ELF || TARGET_MINIMAL_TOC) | |
32934 | fputs ("\t.long ", file); | |
32935 | else | |
32936 | fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff); | |
32937 | fprintf (file, "0x%lx\n", (long) low & 0xffffffff); | |
32938 | } | |
32939 | return; | |
32940 | } | |
32941 | } | |
32942 | ||
32943 | if (GET_CODE (x) == CONST) | |
32944 | { | |
32945 | gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS | |
32946 | && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT); | |
32947 | ||
32948 | base = XEXP (XEXP (x, 0), 0); | |
32949 | offset = INTVAL (XEXP (XEXP (x, 0), 1)); | |
32950 | } | |
32951 | ||
32952 | switch (GET_CODE (base)) | |
32953 | { | |
32954 | case SYMBOL_REF: | |
32955 | name = XSTR (base, 0); | |
32956 | break; | |
32957 | ||
32958 | case LABEL_REF: | |
32959 | ASM_GENERATE_INTERNAL_LABEL (buf, "L", | |
32960 | CODE_LABEL_NUMBER (XEXP (base, 0))); | |
32961 | break; | |
32962 | ||
32963 | case CODE_LABEL: | |
32964 | ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base)); | |
32965 | break; | |
32966 | ||
32967 | default: | |
32968 | gcc_unreachable (); | |
32969 | } | |
32970 | ||
32971 | if (TARGET_ELF || TARGET_MINIMAL_TOC) | |
32972 | fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file); | |
32973 | else | |
32974 | { | |
32975 | fputs ("\t.tc ", file); | |
32976 | RS6000_OUTPUT_BASENAME (file, name); | |
32977 | ||
32978 | if (offset < 0) | |
32979 | fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset); | |
32980 | else if (offset) | |
32981 | fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset); | |
32982 | ||
32983 | /* Mark large TOC symbols on AIX with [TE] so they are mapped | |
32984 | after other TOC symbols, reducing overflow of small TOC access | |
32985 | to [TC] symbols. */ | |
32986 | fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL | |
32987 | ? "[TE]," : "[TC],", file); | |
32988 | } | |
32989 | ||
32990 | /* Currently C++ toc references to vtables can be emitted before it | |
32991 | is decided whether the vtable is public or private. If this is | |
32992 | the case, then the linker will eventually complain that there is | |
32993 | a TOC reference to an unknown section. Thus, for vtables only, | |
32994 | we emit the TOC reference to reference the symbol and not the | |
32995 | section. */ | |
32996 | if (VTABLE_NAME_P (name)) | |
32997 | { | |
32998 | RS6000_OUTPUT_BASENAME (file, name); | |
32999 | if (offset < 0) | |
33000 | fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset); | |
33001 | else if (offset > 0) | |
33002 | fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset); | |
33003 | } | |
33004 | else | |
33005 | output_addr_const (file, x); | |
33006 | ||
33007 | #if HAVE_AS_TLS | |
33008 | if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF) | |
33009 | { | |
33010 | switch (SYMBOL_REF_TLS_MODEL (base)) | |
33011 | { | |
33012 | case 0: | |
33013 | break; | |
33014 | case TLS_MODEL_LOCAL_EXEC: | |
33015 | fputs ("@le", file); | |
33016 | break; | |
33017 | case TLS_MODEL_INITIAL_EXEC: | |
33018 | fputs ("@ie", file); | |
33019 | break; | |
33020 | /* Use global-dynamic for local-dynamic. */ | |
33021 | case TLS_MODEL_GLOBAL_DYNAMIC: | |
33022 | case TLS_MODEL_LOCAL_DYNAMIC: | |
33023 | putc ('\n', file); | |
33024 | (*targetm.asm_out.internal_label) (file, "LCM", labelno); | |
33025 | fputs ("\t.tc .", file); | |
33026 | RS6000_OUTPUT_BASENAME (file, name); | |
33027 | fputs ("[TC],", file); | |
33028 | output_addr_const (file, x); | |
33029 | fputs ("@m", file); | |
33030 | break; | |
33031 | default: | |
33032 | gcc_unreachable (); | |
33033 | } | |
33034 | } | |
33035 | #endif | |
33036 | ||
33037 | putc ('\n', file); | |
33038 | } | |
33039 | \f | |
33040 | /* Output an assembler pseudo-op to write an ASCII string of N characters | |
33041 | starting at P to FILE. | |
33042 | ||
33043 | On the RS/6000, we have to do this using the .byte operation and | |
33044 | write out special characters outside the quoted string. | |
33045 | Also, the assembler is broken; very long strings are truncated, | |
33046 | so we must artificially break them up early. */ | |
33047 | ||
33048 | void | |
33049 | output_ascii (FILE *file, const char *p, int n) | |
33050 | { | |
33051 | char c; | |
33052 | int i, count_string; | |
33053 | const char *for_string = "\t.byte \""; | |
33054 | const char *for_decimal = "\t.byte "; | |
33055 | const char *to_close = NULL; | |
33056 | ||
33057 | count_string = 0; | |
33058 | for (i = 0; i < n; i++) | |
33059 | { | |
33060 | c = *p++; | |
33061 | if (c >= ' ' && c < 0177) | |
33062 | { | |
33063 | if (for_string) | |
33064 | fputs (for_string, file); | |
33065 | putc (c, file); | |
33066 | ||
33067 | /* Write two quotes to get one. */ | |
33068 | if (c == '"') | |
33069 | { | |
33070 | putc (c, file); | |
33071 | ++count_string; | |
33072 | } | |
33073 | ||
33074 | for_string = NULL; | |
33075 | for_decimal = "\"\n\t.byte "; | |
33076 | to_close = "\"\n"; | |
33077 | ++count_string; | |
33078 | ||
33079 | if (count_string >= 512) | |
33080 | { | |
33081 | fputs (to_close, file); | |
33082 | ||
33083 | for_string = "\t.byte \""; | |
33084 | for_decimal = "\t.byte "; | |
33085 | to_close = NULL; | |
33086 | count_string = 0; | |
33087 | } | |
33088 | } | |
33089 | else | |
33090 | { | |
33091 | if (for_decimal) | |
33092 | fputs (for_decimal, file); | |
33093 | fprintf (file, "%d", c); | |
33094 | ||
33095 | for_string = "\n\t.byte \""; | |
33096 | for_decimal = ", "; | |
33097 | to_close = "\n"; | |
33098 | count_string = 0; | |
33099 | } | |
33100 | } | |
33101 | ||
33102 | /* Now close the string if we have written one. Then end the line. */ | |
33103 | if (to_close) | |
33104 | fputs (to_close, file); | |
33105 | } | |
33106 | \f | |
33107 | /* Generate a unique section name for FILENAME for a section type | |
33108 | represented by SECTION_DESC. Output goes into BUF. | |
33109 | ||
33110 | SECTION_DESC can be any string, as long as it is different for each | |
33111 | possible section type. | |
33112 | ||
33113 | We name the section in the same manner as xlc. The name begins with an | |
33114 | underscore followed by the filename (after stripping any leading directory | |
33115 | names) with the last period replaced by the string SECTION_DESC. If | |
33116 | FILENAME does not contain a period, SECTION_DESC is appended to the end of | |
33117 | the name. */ | |
33118 | ||
33119 | void | |
33120 | rs6000_gen_section_name (char **buf, const char *filename, | |
33121 | const char *section_desc) | |
33122 | { | |
33123 | const char *q, *after_last_slash, *last_period = 0; | |
33124 | char *p; | |
33125 | int len; | |
33126 | ||
33127 | after_last_slash = filename; | |
33128 | for (q = filename; *q; q++) | |
33129 | { | |
33130 | if (*q == '/') | |
33131 | after_last_slash = q + 1; | |
33132 | else if (*q == '.') | |
33133 | last_period = q; | |
33134 | } | |
33135 | ||
33136 | len = strlen (after_last_slash) + strlen (section_desc) + 2; | |
33137 | *buf = (char *) xmalloc (len); | |
33138 | ||
33139 | p = *buf; | |
33140 | *p++ = '_'; | |
33141 | ||
33142 | for (q = after_last_slash; *q; q++) | |
33143 | { | |
33144 | if (q == last_period) | |
33145 | { | |
33146 | strcpy (p, section_desc); | |
33147 | p += strlen (section_desc); | |
33148 | break; | |
33149 | } | |
33150 | ||
33151 | else if (ISALNUM (*q)) | |
33152 | *p++ = *q; | |
33153 | } | |
33154 | ||
33155 | if (last_period == 0) | |
33156 | strcpy (p, section_desc); | |
33157 | else | |
33158 | *p = '\0'; | |
33159 | } | |
33160 | \f | |
33161 | /* Emit profile function. */ | |
33162 | ||
33163 | void | |
33164 | output_profile_hook (int labelno ATTRIBUTE_UNUSED) | |
33165 | { | |
33166 | /* Non-standard profiling for kernels, which just saves LR then calls | |
33167 | _mcount without worrying about arg saves. The idea is to change | |
33168 | the function prologue as little as possible as it isn't easy to | |
33169 | account for arg save/restore code added just for _mcount. */ | |
33170 | if (TARGET_PROFILE_KERNEL) | |
33171 | return; | |
33172 | ||
33173 | if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) | |
33174 | { | |
33175 | #ifndef NO_PROFILE_COUNTERS | |
33176 | # define NO_PROFILE_COUNTERS 0 | |
33177 | #endif | |
33178 | if (NO_PROFILE_COUNTERS) | |
33179 | emit_library_call (init_one_libfunc (RS6000_MCOUNT), | |
9e9e5c15 | 33180 | LCT_NORMAL, VOIDmode); |
01e91138 | 33181 | else |
33182 | { | |
33183 | char buf[30]; | |
33184 | const char *label_name; | |
33185 | rtx fun; | |
33186 | ||
33187 | ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno); | |
33188 | label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf)); | |
33189 | fun = gen_rtx_SYMBOL_REF (Pmode, label_name); | |
33190 | ||
33191 | emit_library_call (init_one_libfunc (RS6000_MCOUNT), | |
9e9e5c15 | 33192 | LCT_NORMAL, VOIDmode, fun, Pmode); |
01e91138 | 33193 | } |
33194 | } | |
33195 | else if (DEFAULT_ABI == ABI_DARWIN) | |
33196 | { | |
33197 | const char *mcount_name = RS6000_MCOUNT; | |
33198 | int caller_addr_regno = LR_REGNO; | |
33199 | ||
33200 | /* Be conservative and always set this, at least for now. */ | |
33201 | crtl->uses_pic_offset_table = 1; | |
33202 | ||
33203 | #if TARGET_MACHO | |
33204 | /* For PIC code, set up a stub and collect the caller's address | |
33205 | from r0, which is where the prologue puts it. */ | |
33206 | if (MACHOPIC_INDIRECT | |
33207 | && crtl->uses_pic_offset_table) | |
33208 | caller_addr_regno = 0; | |
33209 | #endif | |
33210 | emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name), | |
9e9e5c15 | 33211 | LCT_NORMAL, VOIDmode, |
01e91138 | 33212 | gen_rtx_REG (Pmode, caller_addr_regno), Pmode); |
33213 | } | |
33214 | } | |
33215 | ||
33216 | /* Write function profiler code. */ | |
33217 | ||
33218 | void | |
33219 | output_function_profiler (FILE *file, int labelno) | |
33220 | { | |
33221 | char buf[100]; | |
33222 | ||
33223 | switch (DEFAULT_ABI) | |
33224 | { | |
33225 | default: | |
33226 | gcc_unreachable (); | |
33227 | ||
33228 | case ABI_V4: | |
33229 | if (!TARGET_32BIT) | |
33230 | { | |
33231 | warning (0, "no profiling of 64-bit code for this ABI"); | |
33232 | return; | |
33233 | } | |
33234 | ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno); | |
33235 | fprintf (file, "\tmflr %s\n", reg_names[0]); | |
33236 | if (NO_PROFILE_COUNTERS) | |
33237 | { | |
33238 | asm_fprintf (file, "\tstw %s,4(%s)\n", | |
33239 | reg_names[0], reg_names[1]); | |
33240 | } | |
33241 | else if (TARGET_SECURE_PLT && flag_pic) | |
33242 | { | |
33243 | if (TARGET_LINK_STACK) | |
33244 | { | |
33245 | char name[32]; | |
33246 | get_ppc476_thunk_name (name); | |
33247 | asm_fprintf (file, "\tbl %s\n", name); | |
33248 | } | |
33249 | else | |
33250 | asm_fprintf (file, "\tbcl 20,31,1f\n1:\n"); | |
33251 | asm_fprintf (file, "\tstw %s,4(%s)\n", | |
33252 | reg_names[0], reg_names[1]); | |
33253 | asm_fprintf (file, "\tmflr %s\n", reg_names[12]); | |
33254 | asm_fprintf (file, "\taddis %s,%s,", | |
33255 | reg_names[12], reg_names[12]); | |
33256 | assemble_name (file, buf); | |
33257 | asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]); | |
33258 | assemble_name (file, buf); | |
33259 | asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]); | |
33260 | } | |
33261 | else if (flag_pic == 1) | |
33262 | { | |
33263 | fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file); | |
33264 | asm_fprintf (file, "\tstw %s,4(%s)\n", | |
33265 | reg_names[0], reg_names[1]); | |
33266 | asm_fprintf (file, "\tmflr %s\n", reg_names[12]); | |
33267 | asm_fprintf (file, "\tlwz %s,", reg_names[0]); | |
33268 | assemble_name (file, buf); | |
33269 | asm_fprintf (file, "@got(%s)\n", reg_names[12]); | |
33270 | } | |
33271 | else if (flag_pic > 1) | |
33272 | { | |
33273 | asm_fprintf (file, "\tstw %s,4(%s)\n", | |
33274 | reg_names[0], reg_names[1]); | |
33275 | /* Now, we need to get the address of the label. */ | |
33276 | if (TARGET_LINK_STACK) | |
33277 | { | |
33278 | char name[32]; | |
33279 | get_ppc476_thunk_name (name); | |
33280 | asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name); | |
33281 | assemble_name (file, buf); | |
33282 | fputs ("-.\n1:", file); | |
33283 | asm_fprintf (file, "\tmflr %s\n", reg_names[11]); | |
33284 | asm_fprintf (file, "\taddi %s,%s,4\n", | |
33285 | reg_names[11], reg_names[11]); | |
33286 | } | |
33287 | else | |
33288 | { | |
33289 | fputs ("\tbcl 20,31,1f\n\t.long ", file); | |
33290 | assemble_name (file, buf); | |
33291 | fputs ("-.\n1:", file); | |
33292 | asm_fprintf (file, "\tmflr %s\n", reg_names[11]); | |
33293 | } | |
33294 | asm_fprintf (file, "\tlwz %s,0(%s)\n", | |
33295 | reg_names[0], reg_names[11]); | |
33296 | asm_fprintf (file, "\tadd %s,%s,%s\n", | |
33297 | reg_names[0], reg_names[0], reg_names[11]); | |
33298 | } | |
33299 | else | |
33300 | { | |
33301 | asm_fprintf (file, "\tlis %s,", reg_names[12]); | |
33302 | assemble_name (file, buf); | |
33303 | fputs ("@ha\n", file); | |
33304 | asm_fprintf (file, "\tstw %s,4(%s)\n", | |
33305 | reg_names[0], reg_names[1]); | |
33306 | asm_fprintf (file, "\tla %s,", reg_names[0]); | |
33307 | assemble_name (file, buf); | |
33308 | asm_fprintf (file, "@l(%s)\n", reg_names[12]); | |
33309 | } | |
33310 | ||
33311 | /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */ | |
33312 | fprintf (file, "\tbl %s%s\n", | |
33313 | RS6000_MCOUNT, flag_pic ? "@plt" : ""); | |
33314 | break; | |
33315 | ||
33316 | case ABI_AIX: | |
33317 | case ABI_ELFv2: | |
33318 | case ABI_DARWIN: | |
33319 | /* Don't do anything, done in output_profile_hook (). */ | |
33320 | break; | |
33321 | } | |
33322 | } | |
33323 | ||
33324 | \f | |
33325 | ||
33326 | /* The following variable value is the last issued insn. */ | |
33327 | ||
33328 | static rtx_insn *last_scheduled_insn; | |
33329 | ||
33330 | /* The following variable helps to balance issuing of load and | |
33331 | store instructions */ | |
33332 | ||
33333 | static int load_store_pendulum; | |
33334 | ||
33335 | /* The following variable helps pair divide insns during scheduling. */ | |
33336 | static int divide_cnt; | |
33337 | /* The following variable helps pair and alternate vector and vector load | |
33338 | insns during scheduling. */ | |
33339 | static int vec_pairing; | |
33340 | ||
33341 | ||
33342 | /* Power4 load update and store update instructions are cracked into a | |
33343 | load or store and an integer insn which are executed in the same cycle. | |
33344 | Branches have their own dispatch slot which does not count against the | |
33345 | GCC issue rate, but it changes the program flow so there are no other | |
33346 | instructions to issue in this cycle. */ | |
33347 | ||
33348 | static int | |
33349 | rs6000_variable_issue_1 (rtx_insn *insn, int more) | |
33350 | { | |
33351 | last_scheduled_insn = insn; | |
33352 | if (GET_CODE (PATTERN (insn)) == USE | |
33353 | || GET_CODE (PATTERN (insn)) == CLOBBER) | |
33354 | { | |
33355 | cached_can_issue_more = more; | |
33356 | return cached_can_issue_more; | |
33357 | } | |
33358 | ||
33359 | if (insn_terminates_group_p (insn, current_group)) | |
33360 | { | |
33361 | cached_can_issue_more = 0; | |
33362 | return cached_can_issue_more; | |
33363 | } | |
33364 | ||
33365 | /* If no reservation, but reach here */ | |
33366 | if (recog_memoized (insn) < 0) | |
33367 | return more; | |
33368 | ||
33369 | if (rs6000_sched_groups) | |
33370 | { | |
33371 | if (is_microcoded_insn (insn)) | |
33372 | cached_can_issue_more = 0; | |
33373 | else if (is_cracked_insn (insn)) | |
33374 | cached_can_issue_more = more > 2 ? more - 2 : 0; | |
33375 | else | |
33376 | cached_can_issue_more = more - 1; | |
33377 | ||
33378 | return cached_can_issue_more; | |
33379 | } | |
33380 | ||
33381 | if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn)) | |
33382 | return 0; | |
33383 | ||
33384 | cached_can_issue_more = more - 1; | |
33385 | return cached_can_issue_more; | |
33386 | } | |
33387 | ||
33388 | static int | |
33389 | rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more) | |
33390 | { | |
33391 | int r = rs6000_variable_issue_1 (insn, more); | |
33392 | if (verbose) | |
33393 | fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r); | |
33394 | return r; | |
33395 | } | |
33396 | ||
33397 | /* Adjust the cost of a scheduling dependency. Return the new cost of | |
33398 | a dependency LINK or INSN on DEP_INSN. COST is the current cost. */ | |
33399 | ||
33400 | static int | |
33401 | rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost, | |
33402 | unsigned int) | |
33403 | { | |
33404 | enum attr_type attr_type; | |
33405 | ||
33406 | if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0) | |
33407 | return cost; | |
33408 | ||
33409 | switch (dep_type) | |
33410 | { | |
33411 | case REG_DEP_TRUE: | |
33412 | { | |
33413 | /* Data dependency; DEP_INSN writes a register that INSN reads | |
33414 | some cycles later. */ | |
33415 | ||
33416 | /* Separate a load from a narrower, dependent store. */ | |
33417 | if ((rs6000_sched_groups || rs6000_cpu_attr == CPU_POWER9) | |
33418 | && GET_CODE (PATTERN (insn)) == SET | |
33419 | && GET_CODE (PATTERN (dep_insn)) == SET | |
33420 | && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM | |
33421 | && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM | |
33422 | && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1))) | |
33423 | > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0))))) | |
33424 | return cost + 14; | |
33425 | ||
33426 | attr_type = get_attr_type (insn); | |
33427 | ||
33428 | switch (attr_type) | |
33429 | { | |
33430 | case TYPE_JMPREG: | |
33431 | /* Tell the first scheduling pass about the latency between | |
33432 | a mtctr and bctr (and mtlr and br/blr). The first | |
33433 | scheduling pass will not know about this latency since | |
33434 | the mtctr instruction, which has the latency associated | |
33435 | to it, will be generated by reload. */ | |
33436 | return 4; | |
33437 | case TYPE_BRANCH: | |
33438 | /* Leave some extra cycles between a compare and its | |
33439 | dependent branch, to inhibit expensive mispredicts. */ | |
33440 | if ((rs6000_cpu_attr == CPU_PPC603 | |
33441 | || rs6000_cpu_attr == CPU_PPC604 | |
33442 | || rs6000_cpu_attr == CPU_PPC604E | |
33443 | || rs6000_cpu_attr == CPU_PPC620 | |
33444 | || rs6000_cpu_attr == CPU_PPC630 | |
33445 | || rs6000_cpu_attr == CPU_PPC750 | |
33446 | || rs6000_cpu_attr == CPU_PPC7400 | |
33447 | || rs6000_cpu_attr == CPU_PPC7450 | |
33448 | || rs6000_cpu_attr == CPU_PPCE5500 | |
33449 | || rs6000_cpu_attr == CPU_PPCE6500 | |
33450 | || rs6000_cpu_attr == CPU_POWER4 | |
33451 | || rs6000_cpu_attr == CPU_POWER5 | |
33452 | || rs6000_cpu_attr == CPU_POWER7 | |
33453 | || rs6000_cpu_attr == CPU_POWER8 | |
33454 | || rs6000_cpu_attr == CPU_POWER9 | |
33455 | || rs6000_cpu_attr == CPU_CELL) | |
33456 | && recog_memoized (dep_insn) | |
33457 | && (INSN_CODE (dep_insn) >= 0)) | |
33458 | ||
33459 | switch (get_attr_type (dep_insn)) | |
33460 | { | |
33461 | case TYPE_CMP: | |
33462 | case TYPE_FPCOMPARE: | |
33463 | case TYPE_CR_LOGICAL: | |
33464 | case TYPE_DELAYED_CR: | |
33465 | return cost + 2; | |
33466 | case TYPE_EXTS: | |
33467 | case TYPE_MUL: | |
33468 | if (get_attr_dot (dep_insn) == DOT_YES) | |
33469 | return cost + 2; | |
33470 | else | |
33471 | break; | |
33472 | case TYPE_SHIFT: | |
33473 | if (get_attr_dot (dep_insn) == DOT_YES | |
33474 | && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO) | |
33475 | return cost + 2; | |
33476 | else | |
33477 | break; | |
33478 | default: | |
33479 | break; | |
33480 | } | |
33481 | break; | |
33482 | ||
33483 | case TYPE_STORE: | |
33484 | case TYPE_FPSTORE: | |
33485 | if ((rs6000_cpu == PROCESSOR_POWER6) | |
33486 | && recog_memoized (dep_insn) | |
33487 | && (INSN_CODE (dep_insn) >= 0)) | |
33488 | { | |
33489 | ||
33490 | if (GET_CODE (PATTERN (insn)) != SET) | |
33491 | /* If this happens, we have to extend this to schedule | |
33492 | optimally. Return default for now. */ | |
33493 | return cost; | |
33494 | ||
33495 | /* Adjust the cost for the case where the value written | |
33496 | by a fixed point operation is used as the address | |
33497 | gen value on a store. */ | |
33498 | switch (get_attr_type (dep_insn)) | |
33499 | { | |
33500 | case TYPE_LOAD: | |
33501 | case TYPE_CNTLZ: | |
33502 | { | |
33503 | if (! rs6000_store_data_bypass_p (dep_insn, insn)) | |
33504 | return get_attr_sign_extend (dep_insn) | |
33505 | == SIGN_EXTEND_YES ? 6 : 4; | |
33506 | break; | |
33507 | } | |
33508 | case TYPE_SHIFT: | |
33509 | { | |
33510 | if (! rs6000_store_data_bypass_p (dep_insn, insn)) | |
33511 | return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ? | |
33512 | 6 : 3; | |
33513 | break; | |
33514 | } | |
33515 | case TYPE_INTEGER: | |
33516 | case TYPE_ADD: | |
33517 | case TYPE_LOGICAL: | |
33518 | case TYPE_EXTS: | |
33519 | case TYPE_INSERT: | |
33520 | { | |
33521 | if (! rs6000_store_data_bypass_p (dep_insn, insn)) | |
33522 | return 3; | |
33523 | break; | |
33524 | } | |
33525 | case TYPE_STORE: | |
33526 | case TYPE_FPLOAD: | |
33527 | case TYPE_FPSTORE: | |
33528 | { | |
33529 | if (get_attr_update (dep_insn) == UPDATE_YES | |
33530 | && ! rs6000_store_data_bypass_p (dep_insn, insn)) | |
33531 | return 3; | |
33532 | break; | |
33533 | } | |
33534 | case TYPE_MUL: | |
33535 | { | |
33536 | if (! rs6000_store_data_bypass_p (dep_insn, insn)) | |
33537 | return 17; | |
33538 | break; | |
33539 | } | |
33540 | case TYPE_DIV: | |
33541 | { | |
33542 | if (! rs6000_store_data_bypass_p (dep_insn, insn)) | |
33543 | return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57; | |
33544 | break; | |
33545 | } | |
33546 | default: | |
33547 | break; | |
33548 | } | |
33549 | } | |
33550 | break; | |
33551 | ||
33552 | case TYPE_LOAD: | |
33553 | if ((rs6000_cpu == PROCESSOR_POWER6) | |
33554 | && recog_memoized (dep_insn) | |
33555 | && (INSN_CODE (dep_insn) >= 0)) | |
33556 | { | |
33557 | ||
33558 | /* Adjust the cost for the case where the value written | |
33559 | by a fixed point instruction is used within the address | |
33560 | gen portion of a subsequent load(u)(x) */ | |
33561 | switch (get_attr_type (dep_insn)) | |
33562 | { | |
33563 | case TYPE_LOAD: | |
33564 | case TYPE_CNTLZ: | |
33565 | { | |
33566 | if (set_to_load_agen (dep_insn, insn)) | |
33567 | return get_attr_sign_extend (dep_insn) | |
33568 | == SIGN_EXTEND_YES ? 6 : 4; | |
33569 | break; | |
33570 | } | |
33571 | case TYPE_SHIFT: | |
33572 | { | |
33573 | if (set_to_load_agen (dep_insn, insn)) | |
33574 | return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ? | |
33575 | 6 : 3; | |
33576 | break; | |
33577 | } | |
33578 | case TYPE_INTEGER: | |
33579 | case TYPE_ADD: | |
33580 | case TYPE_LOGICAL: | |
33581 | case TYPE_EXTS: | |
33582 | case TYPE_INSERT: | |
33583 | { | |
33584 | if (set_to_load_agen (dep_insn, insn)) | |
33585 | return 3; | |
33586 | break; | |
33587 | } | |
33588 | case TYPE_STORE: | |
33589 | case TYPE_FPLOAD: | |
33590 | case TYPE_FPSTORE: | |
33591 | { | |
33592 | if (get_attr_update (dep_insn) == UPDATE_YES | |
33593 | && set_to_load_agen (dep_insn, insn)) | |
33594 | return 3; | |
33595 | break; | |
33596 | } | |
33597 | case TYPE_MUL: | |
33598 | { | |
33599 | if (set_to_load_agen (dep_insn, insn)) | |
33600 | return 17; | |
33601 | break; | |
33602 | } | |
33603 | case TYPE_DIV: | |
33604 | { | |
33605 | if (set_to_load_agen (dep_insn, insn)) | |
33606 | return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57; | |
33607 | break; | |
33608 | } | |
33609 | default: | |
33610 | break; | |
33611 | } | |
33612 | } | |
33613 | break; | |
33614 | ||
33615 | case TYPE_FPLOAD: | |
33616 | if ((rs6000_cpu == PROCESSOR_POWER6) | |
33617 | && get_attr_update (insn) == UPDATE_NO | |
33618 | && recog_memoized (dep_insn) | |
33619 | && (INSN_CODE (dep_insn) >= 0) | |
33620 | && (get_attr_type (dep_insn) == TYPE_MFFGPR)) | |
33621 | return 2; | |
33622 | ||
33623 | default: | |
33624 | break; | |
33625 | } | |
33626 | ||
33627 | /* Fall out to return default cost. */ | |
33628 | } | |
33629 | break; | |
33630 | ||
33631 | case REG_DEP_OUTPUT: | |
33632 | /* Output dependency; DEP_INSN writes a register that INSN writes some | |
33633 | cycles later. */ | |
33634 | if ((rs6000_cpu == PROCESSOR_POWER6) | |
33635 | && recog_memoized (dep_insn) | |
33636 | && (INSN_CODE (dep_insn) >= 0)) | |
33637 | { | |
33638 | attr_type = get_attr_type (insn); | |
33639 | ||
33640 | switch (attr_type) | |
33641 | { | |
33642 | case TYPE_FP: | |
33643 | case TYPE_FPSIMPLE: | |
33644 | if (get_attr_type (dep_insn) == TYPE_FP | |
33645 | || get_attr_type (dep_insn) == TYPE_FPSIMPLE) | |
33646 | return 1; | |
33647 | break; | |
33648 | case TYPE_FPLOAD: | |
33649 | if (get_attr_update (insn) == UPDATE_NO | |
33650 | && get_attr_type (dep_insn) == TYPE_MFFGPR) | |
33651 | return 2; | |
33652 | break; | |
33653 | default: | |
33654 | break; | |
33655 | } | |
33656 | } | |
33657 | /* Fall through, no cost for output dependency. */ | |
33658 | /* FALLTHRU */ | |
33659 | ||
33660 | case REG_DEP_ANTI: | |
33661 | /* Anti dependency; DEP_INSN reads a register that INSN writes some | |
33662 | cycles later. */ | |
33663 | return 0; | |
33664 | ||
33665 | default: | |
33666 | gcc_unreachable (); | |
33667 | } | |
33668 | ||
33669 | return cost; | |
33670 | } | |
33671 | ||
33672 | /* Debug version of rs6000_adjust_cost. */ | |
33673 | ||
33674 | static int | |
33675 | rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, | |
33676 | int cost, unsigned int dw) | |
33677 | { | |
33678 | int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw); | |
33679 | ||
33680 | if (ret != cost) | |
33681 | { | |
33682 | const char *dep; | |
33683 | ||
33684 | switch (dep_type) | |
33685 | { | |
33686 | default: dep = "unknown depencency"; break; | |
33687 | case REG_DEP_TRUE: dep = "data dependency"; break; | |
33688 | case REG_DEP_OUTPUT: dep = "output dependency"; break; | |
33689 | case REG_DEP_ANTI: dep = "anti depencency"; break; | |
33690 | } | |
33691 | ||
33692 | fprintf (stderr, | |
33693 | "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, " | |
33694 | "%s, insn:\n", ret, cost, dep); | |
33695 | ||
33696 | debug_rtx (insn); | |
33697 | } | |
33698 | ||
33699 | return ret; | |
33700 | } | |
33701 | ||
33702 | /* The function returns a true if INSN is microcoded. | |
33703 | Return false otherwise. */ | |
33704 | ||
33705 | static bool | |
33706 | is_microcoded_insn (rtx_insn *insn) | |
33707 | { | |
33708 | if (!insn || !NONDEBUG_INSN_P (insn) | |
33709 | || GET_CODE (PATTERN (insn)) == USE | |
33710 | || GET_CODE (PATTERN (insn)) == CLOBBER) | |
33711 | return false; | |
33712 | ||
33713 | if (rs6000_cpu_attr == CPU_CELL) | |
33714 | return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS; | |
33715 | ||
33716 | if (rs6000_sched_groups | |
33717 | && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5)) | |
33718 | { | |
33719 | enum attr_type type = get_attr_type (insn); | |
33720 | if ((type == TYPE_LOAD | |
33721 | && get_attr_update (insn) == UPDATE_YES | |
33722 | && get_attr_sign_extend (insn) == SIGN_EXTEND_YES) | |
33723 | || ((type == TYPE_LOAD || type == TYPE_STORE) | |
33724 | && get_attr_update (insn) == UPDATE_YES | |
33725 | && get_attr_indexed (insn) == INDEXED_YES) | |
33726 | || type == TYPE_MFCR) | |
33727 | return true; | |
33728 | } | |
33729 | ||
33730 | return false; | |
33731 | } | |
33732 | ||
33733 | /* The function returns true if INSN is cracked into 2 instructions | |
33734 | by the processor (and therefore occupies 2 issue slots). */ | |
33735 | ||
33736 | static bool | |
33737 | is_cracked_insn (rtx_insn *insn) | |
33738 | { | |
33739 | if (!insn || !NONDEBUG_INSN_P (insn) | |
33740 | || GET_CODE (PATTERN (insn)) == USE | |
33741 | || GET_CODE (PATTERN (insn)) == CLOBBER) | |
33742 | return false; | |
33743 | ||
33744 | if (rs6000_sched_groups | |
33745 | && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5)) | |
33746 | { | |
33747 | enum attr_type type = get_attr_type (insn); | |
33748 | if ((type == TYPE_LOAD | |
33749 | && get_attr_sign_extend (insn) == SIGN_EXTEND_YES | |
33750 | && get_attr_update (insn) == UPDATE_NO) | |
33751 | || (type == TYPE_LOAD | |
33752 | && get_attr_sign_extend (insn) == SIGN_EXTEND_NO | |
33753 | && get_attr_update (insn) == UPDATE_YES | |
33754 | && get_attr_indexed (insn) == INDEXED_NO) | |
33755 | || (type == TYPE_STORE | |
33756 | && get_attr_update (insn) == UPDATE_YES | |
33757 | && get_attr_indexed (insn) == INDEXED_NO) | |
33758 | || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE) | |
33759 | && get_attr_update (insn) == UPDATE_YES) | |
33760 | || type == TYPE_DELAYED_CR | |
33761 | || (type == TYPE_EXTS | |
33762 | && get_attr_dot (insn) == DOT_YES) | |
33763 | || (type == TYPE_SHIFT | |
33764 | && get_attr_dot (insn) == DOT_YES | |
33765 | && get_attr_var_shift (insn) == VAR_SHIFT_NO) | |
33766 | || (type == TYPE_MUL | |
33767 | && get_attr_dot (insn) == DOT_YES) | |
33768 | || type == TYPE_DIV | |
33769 | || (type == TYPE_INSERT | |
33770 | && get_attr_size (insn) == SIZE_32)) | |
33771 | return true; | |
33772 | } | |
33773 | ||
33774 | return false; | |
33775 | } | |
33776 | ||
33777 | /* The function returns true if INSN can be issued only from | |
33778 | the branch slot. */ | |
33779 | ||
33780 | static bool | |
33781 | is_branch_slot_insn (rtx_insn *insn) | |
33782 | { | |
33783 | if (!insn || !NONDEBUG_INSN_P (insn) | |
33784 | || GET_CODE (PATTERN (insn)) == USE | |
33785 | || GET_CODE (PATTERN (insn)) == CLOBBER) | |
33786 | return false; | |
33787 | ||
33788 | if (rs6000_sched_groups) | |
33789 | { | |
33790 | enum attr_type type = get_attr_type (insn); | |
33791 | if (type == TYPE_BRANCH || type == TYPE_JMPREG) | |
33792 | return true; | |
33793 | return false; | |
33794 | } | |
33795 | ||
33796 | return false; | |
33797 | } | |
33798 | ||
33799 | /* The function returns true if out_inst sets a value that is | |
33800 | used in the address generation computation of in_insn */ | |
33801 | static bool | |
33802 | set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn) | |
33803 | { | |
33804 | rtx out_set, in_set; | |
33805 | ||
33806 | /* For performance reasons, only handle the simple case where | |
33807 | both loads are a single_set. */ | |
33808 | out_set = single_set (out_insn); | |
33809 | if (out_set) | |
33810 | { | |
33811 | in_set = single_set (in_insn); | |
33812 | if (in_set) | |
33813 | return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set)); | |
33814 | } | |
33815 | ||
33816 | return false; | |
33817 | } | |
33818 | ||
33819 | /* Try to determine base/offset/size parts of the given MEM. | |
33820 | Return true if successful, false if all the values couldn't | |
33821 | be determined. | |
33822 | ||
33823 | This function only looks for REG or REG+CONST address forms. | |
33824 | REG+REG address form will return false. */ | |
33825 | ||
33826 | static bool | |
33827 | get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset, | |
33828 | HOST_WIDE_INT *size) | |
33829 | { | |
33830 | rtx addr_rtx; | |
33831 | if MEM_SIZE_KNOWN_P (mem) | |
33832 | *size = MEM_SIZE (mem); | |
33833 | else | |
33834 | return false; | |
33835 | ||
33836 | addr_rtx = (XEXP (mem, 0)); | |
33837 | if (GET_CODE (addr_rtx) == PRE_MODIFY) | |
33838 | addr_rtx = XEXP (addr_rtx, 1); | |
33839 | ||
33840 | *offset = 0; | |
33841 | while (GET_CODE (addr_rtx) == PLUS | |
33842 | && CONST_INT_P (XEXP (addr_rtx, 1))) | |
33843 | { | |
33844 | *offset += INTVAL (XEXP (addr_rtx, 1)); | |
33845 | addr_rtx = XEXP (addr_rtx, 0); | |
33846 | } | |
33847 | if (!REG_P (addr_rtx)) | |
33848 | return false; | |
33849 | ||
33850 | *base = addr_rtx; | |
33851 | return true; | |
33852 | } | |
33853 | ||
33854 | /* The function returns true if the target storage location of | |
33855 | mem1 is adjacent to the target storage location of mem2 */ | |
33856 | /* Return 1 if memory locations are adjacent. */ | |
33857 | ||
33858 | static bool | |
33859 | adjacent_mem_locations (rtx mem1, rtx mem2) | |
33860 | { | |
33861 | rtx reg1, reg2; | |
33862 | HOST_WIDE_INT off1, size1, off2, size2; | |
33863 | ||
33864 | if (get_memref_parts (mem1, ®1, &off1, &size1) | |
33865 | && get_memref_parts (mem2, ®2, &off2, &size2)) | |
33866 | return ((REGNO (reg1) == REGNO (reg2)) | |
33867 | && ((off1 + size1 == off2) | |
33868 | || (off2 + size2 == off1))); | |
33869 | ||
33870 | return false; | |
33871 | } | |
33872 | ||
33873 | /* This function returns true if it can be determined that the two MEM | |
33874 | locations overlap by at least 1 byte based on base reg/offset/size. */ | |
33875 | ||
33876 | static bool | |
33877 | mem_locations_overlap (rtx mem1, rtx mem2) | |
33878 | { | |
33879 | rtx reg1, reg2; | |
33880 | HOST_WIDE_INT off1, size1, off2, size2; | |
33881 | ||
33882 | if (get_memref_parts (mem1, ®1, &off1, &size1) | |
33883 | && get_memref_parts (mem2, ®2, &off2, &size2)) | |
33884 | return ((REGNO (reg1) == REGNO (reg2)) | |
33885 | && (((off1 <= off2) && (off1 + size1 > off2)) | |
33886 | || ((off2 <= off1) && (off2 + size2 > off1)))); | |
33887 | ||
33888 | return false; | |
33889 | } | |
33890 | ||
33891 | /* A C statement (sans semicolon) to update the integer scheduling | |
33892 | priority INSN_PRIORITY (INSN). Increase the priority to execute the | |
33893 | INSN earlier, reduce the priority to execute INSN later. Do not | |
33894 | define this macro if you do not need to adjust the scheduling | |
33895 | priorities of insns. */ | |
33896 | ||
33897 | static int | |
33898 | rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority) | |
33899 | { | |
33900 | rtx load_mem, str_mem; | |
33901 | /* On machines (like the 750) which have asymmetric integer units, | |
33902 | where one integer unit can do multiply and divides and the other | |
33903 | can't, reduce the priority of multiply/divide so it is scheduled | |
33904 | before other integer operations. */ | |
33905 | ||
33906 | #if 0 | |
33907 | if (! INSN_P (insn)) | |
33908 | return priority; | |
33909 | ||
33910 | if (GET_CODE (PATTERN (insn)) == USE) | |
33911 | return priority; | |
33912 | ||
33913 | switch (rs6000_cpu_attr) { | |
33914 | case CPU_PPC750: | |
33915 | switch (get_attr_type (insn)) | |
33916 | { | |
33917 | default: | |
33918 | break; | |
33919 | ||
33920 | case TYPE_MUL: | |
33921 | case TYPE_DIV: | |
33922 | fprintf (stderr, "priority was %#x (%d) before adjustment\n", | |
33923 | priority, priority); | |
33924 | if (priority >= 0 && priority < 0x01000000) | |
33925 | priority >>= 3; | |
33926 | break; | |
33927 | } | |
33928 | } | |
33929 | #endif | |
33930 | ||
33931 | if (insn_must_be_first_in_group (insn) | |
33932 | && reload_completed | |
33933 | && current_sched_info->sched_max_insns_priority | |
33934 | && rs6000_sched_restricted_insns_priority) | |
33935 | { | |
33936 | ||
33937 | /* Prioritize insns that can be dispatched only in the first | |
33938 | dispatch slot. */ | |
33939 | if (rs6000_sched_restricted_insns_priority == 1) | |
33940 | /* Attach highest priority to insn. This means that in | |
33941 | haifa-sched.c:ready_sort(), dispatch-slot restriction considerations | |
33942 | precede 'priority' (critical path) considerations. */ | |
33943 | return current_sched_info->sched_max_insns_priority; | |
33944 | else if (rs6000_sched_restricted_insns_priority == 2) | |
33945 | /* Increase priority of insn by a minimal amount. This means that in | |
33946 | haifa-sched.c:ready_sort(), only 'priority' (critical path) | |
33947 | considerations precede dispatch-slot restriction considerations. */ | |
33948 | return (priority + 1); | |
33949 | } | |
33950 | ||
33951 | if (rs6000_cpu == PROCESSOR_POWER6 | |
33952 | && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem)) | |
33953 | || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem)))) | |
33954 | /* Attach highest priority to insn if the scheduler has just issued two | |
33955 | stores and this instruction is a load, or two loads and this instruction | |
33956 | is a store. Power6 wants loads and stores scheduled alternately | |
33957 | when possible */ | |
33958 | return current_sched_info->sched_max_insns_priority; | |
33959 | ||
33960 | return priority; | |
33961 | } | |
33962 | ||
33963 | /* Return true if the instruction is nonpipelined on the Cell. */ | |
33964 | static bool | |
33965 | is_nonpipeline_insn (rtx_insn *insn) | |
33966 | { | |
33967 | enum attr_type type; | |
33968 | if (!insn || !NONDEBUG_INSN_P (insn) | |
33969 | || GET_CODE (PATTERN (insn)) == USE | |
33970 | || GET_CODE (PATTERN (insn)) == CLOBBER) | |
33971 | return false; | |
33972 | ||
33973 | type = get_attr_type (insn); | |
33974 | if (type == TYPE_MUL | |
33975 | || type == TYPE_DIV | |
33976 | || type == TYPE_SDIV | |
33977 | || type == TYPE_DDIV | |
33978 | || type == TYPE_SSQRT | |
33979 | || type == TYPE_DSQRT | |
33980 | || type == TYPE_MFCR | |
33981 | || type == TYPE_MFCRF | |
33982 | || type == TYPE_MFJMPR) | |
33983 | { | |
33984 | return true; | |
33985 | } | |
33986 | return false; | |
33987 | } | |
33988 | ||
33989 | ||
33990 | /* Return how many instructions the machine can issue per cycle. */ | |
33991 | ||
33992 | static int | |
33993 | rs6000_issue_rate (void) | |
33994 | { | |
33995 | /* Unless scheduling for register pressure, use issue rate of 1 for | |
33996 | first scheduling pass to decrease degradation. */ | |
33997 | if (!reload_completed && !flag_sched_pressure) | |
33998 | return 1; | |
33999 | ||
34000 | switch (rs6000_cpu_attr) { | |
34001 | case CPU_RS64A: | |
34002 | case CPU_PPC601: /* ? */ | |
34003 | case CPU_PPC7450: | |
34004 | return 3; | |
34005 | case CPU_PPC440: | |
34006 | case CPU_PPC603: | |
34007 | case CPU_PPC750: | |
34008 | case CPU_PPC7400: | |
34009 | case CPU_PPC8540: | |
34010 | case CPU_PPC8548: | |
34011 | case CPU_CELL: | |
34012 | case CPU_PPCE300C2: | |
34013 | case CPU_PPCE300C3: | |
34014 | case CPU_PPCE500MC: | |
34015 | case CPU_PPCE500MC64: | |
34016 | case CPU_PPCE5500: | |
34017 | case CPU_PPCE6500: | |
34018 | case CPU_TITAN: | |
34019 | return 2; | |
34020 | case CPU_PPC476: | |
34021 | case CPU_PPC604: | |
34022 | case CPU_PPC604E: | |
34023 | case CPU_PPC620: | |
34024 | case CPU_PPC630: | |
34025 | return 4; | |
34026 | case CPU_POWER4: | |
34027 | case CPU_POWER5: | |
34028 | case CPU_POWER6: | |
34029 | case CPU_POWER7: | |
34030 | return 5; | |
34031 | case CPU_POWER8: | |
34032 | return 7; | |
34033 | case CPU_POWER9: | |
34034 | return 6; | |
34035 | default: | |
34036 | return 1; | |
34037 | } | |
34038 | } | |
34039 | ||
34040 | /* Return how many instructions to look ahead for better insn | |
34041 | scheduling. */ | |
34042 | ||
34043 | static int | |
34044 | rs6000_use_sched_lookahead (void) | |
34045 | { | |
34046 | switch (rs6000_cpu_attr) | |
34047 | { | |
34048 | case CPU_PPC8540: | |
34049 | case CPU_PPC8548: | |
34050 | return 4; | |
34051 | ||
34052 | case CPU_CELL: | |
34053 | return (reload_completed ? 8 : 0); | |
34054 | ||
34055 | default: | |
34056 | return 0; | |
34057 | } | |
34058 | } | |
34059 | ||
34060 | /* We are choosing insn from the ready queue. Return zero if INSN can be | |
34061 | chosen. */ | |
34062 | static int | |
34063 | rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index) | |
34064 | { | |
34065 | if (ready_index == 0) | |
34066 | return 0; | |
34067 | ||
34068 | if (rs6000_cpu_attr != CPU_CELL) | |
34069 | return 0; | |
34070 | ||
34071 | gcc_assert (insn != NULL_RTX && INSN_P (insn)); | |
34072 | ||
34073 | if (!reload_completed | |
34074 | || is_nonpipeline_insn (insn) | |
34075 | || is_microcoded_insn (insn)) | |
34076 | return 1; | |
34077 | ||
34078 | return 0; | |
34079 | } | |
34080 | ||
34081 | /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx | |
34082 | and return true. */ | |
34083 | ||
34084 | static bool | |
34085 | find_mem_ref (rtx pat, rtx *mem_ref) | |
34086 | { | |
34087 | const char * fmt; | |
34088 | int i, j; | |
34089 | ||
34090 | /* stack_tie does not produce any real memory traffic. */ | |
34091 | if (tie_operand (pat, VOIDmode)) | |
34092 | return false; | |
34093 | ||
34094 | if (GET_CODE (pat) == MEM) | |
34095 | { | |
34096 | *mem_ref = pat; | |
34097 | return true; | |
34098 | } | |
34099 | ||
34100 | /* Recursively process the pattern. */ | |
34101 | fmt = GET_RTX_FORMAT (GET_CODE (pat)); | |
34102 | ||
34103 | for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--) | |
34104 | { | |
34105 | if (fmt[i] == 'e') | |
34106 | { | |
34107 | if (find_mem_ref (XEXP (pat, i), mem_ref)) | |
34108 | return true; | |
34109 | } | |
34110 | else if (fmt[i] == 'E') | |
34111 | for (j = XVECLEN (pat, i) - 1; j >= 0; j--) | |
34112 | { | |
34113 | if (find_mem_ref (XVECEXP (pat, i, j), mem_ref)) | |
34114 | return true; | |
34115 | } | |
34116 | } | |
34117 | ||
34118 | return false; | |
34119 | } | |
34120 | ||
34121 | /* Determine if PAT is a PATTERN of a load insn. */ | |
34122 | ||
34123 | static bool | |
34124 | is_load_insn1 (rtx pat, rtx *load_mem) | |
34125 | { | |
34126 | if (!pat || pat == NULL_RTX) | |
34127 | return false; | |
34128 | ||
34129 | if (GET_CODE (pat) == SET) | |
34130 | return find_mem_ref (SET_SRC (pat), load_mem); | |
34131 | ||
34132 | if (GET_CODE (pat) == PARALLEL) | |
34133 | { | |
34134 | int i; | |
34135 | ||
34136 | for (i = 0; i < XVECLEN (pat, 0); i++) | |
34137 | if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem)) | |
34138 | return true; | |
34139 | } | |
34140 | ||
34141 | return false; | |
34142 | } | |
34143 | ||
34144 | /* Determine if INSN loads from memory. */ | |
34145 | ||
34146 | static bool | |
34147 | is_load_insn (rtx insn, rtx *load_mem) | |
34148 | { | |
34149 | if (!insn || !INSN_P (insn)) | |
34150 | return false; | |
34151 | ||
34152 | if (CALL_P (insn)) | |
34153 | return false; | |
34154 | ||
34155 | return is_load_insn1 (PATTERN (insn), load_mem); | |
34156 | } | |
34157 | ||
34158 | /* Determine if PAT is a PATTERN of a store insn. */ | |
34159 | ||
34160 | static bool | |
34161 | is_store_insn1 (rtx pat, rtx *str_mem) | |
34162 | { | |
34163 | if (!pat || pat == NULL_RTX) | |
34164 | return false; | |
34165 | ||
34166 | if (GET_CODE (pat) == SET) | |
34167 | return find_mem_ref (SET_DEST (pat), str_mem); | |
34168 | ||
34169 | if (GET_CODE (pat) == PARALLEL) | |
34170 | { | |
34171 | int i; | |
34172 | ||
34173 | for (i = 0; i < XVECLEN (pat, 0); i++) | |
34174 | if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem)) | |
34175 | return true; | |
34176 | } | |
34177 | ||
34178 | return false; | |
34179 | } | |
34180 | ||
34181 | /* Determine if INSN stores to memory. */ | |
34182 | ||
34183 | static bool | |
34184 | is_store_insn (rtx insn, rtx *str_mem) | |
34185 | { | |
34186 | if (!insn || !INSN_P (insn)) | |
34187 | return false; | |
34188 | ||
34189 | return is_store_insn1 (PATTERN (insn), str_mem); | |
34190 | } | |
34191 | ||
34192 | /* Return whether TYPE is a Power9 pairable vector instruction type. */ | |
34193 | ||
34194 | static bool | |
34195 | is_power9_pairable_vec_type (enum attr_type type) | |
34196 | { | |
34197 | switch (type) | |
34198 | { | |
34199 | case TYPE_VECSIMPLE: | |
34200 | case TYPE_VECCOMPLEX: | |
34201 | case TYPE_VECDIV: | |
34202 | case TYPE_VECCMP: | |
34203 | case TYPE_VECPERM: | |
34204 | case TYPE_VECFLOAT: | |
34205 | case TYPE_VECFDIV: | |
34206 | case TYPE_VECDOUBLE: | |
34207 | return true; | |
34208 | default: | |
34209 | break; | |
34210 | } | |
34211 | return false; | |
34212 | } | |
34213 | ||
34214 | /* Returns whether the dependence between INSN and NEXT is considered | |
34215 | costly by the given target. */ | |
34216 | ||
34217 | static bool | |
34218 | rs6000_is_costly_dependence (dep_t dep, int cost, int distance) | |
34219 | { | |
34220 | rtx insn; | |
34221 | rtx next; | |
34222 | rtx load_mem, str_mem; | |
34223 | ||
34224 | /* If the flag is not enabled - no dependence is considered costly; | |
34225 | allow all dependent insns in the same group. | |
34226 | This is the most aggressive option. */ | |
34227 | if (rs6000_sched_costly_dep == no_dep_costly) | |
34228 | return false; | |
34229 | ||
34230 | /* If the flag is set to 1 - a dependence is always considered costly; | |
34231 | do not allow dependent instructions in the same group. | |
34232 | This is the most conservative option. */ | |
34233 | if (rs6000_sched_costly_dep == all_deps_costly) | |
34234 | return true; | |
34235 | ||
34236 | insn = DEP_PRO (dep); | |
34237 | next = DEP_CON (dep); | |
34238 | ||
34239 | if (rs6000_sched_costly_dep == store_to_load_dep_costly | |
34240 | && is_load_insn (next, &load_mem) | |
34241 | && is_store_insn (insn, &str_mem)) | |
34242 | /* Prevent load after store in the same group. */ | |
34243 | return true; | |
34244 | ||
34245 | if (rs6000_sched_costly_dep == true_store_to_load_dep_costly | |
34246 | && is_load_insn (next, &load_mem) | |
34247 | && is_store_insn (insn, &str_mem) | |
34248 | && DEP_TYPE (dep) == REG_DEP_TRUE | |
34249 | && mem_locations_overlap(str_mem, load_mem)) | |
34250 | /* Prevent load after store in the same group if it is a true | |
34251 | dependence. */ | |
34252 | return true; | |
34253 | ||
34254 | /* The flag is set to X; dependences with latency >= X are considered costly, | |
34255 | and will not be scheduled in the same group. */ | |
34256 | if (rs6000_sched_costly_dep <= max_dep_latency | |
34257 | && ((cost - distance) >= (int)rs6000_sched_costly_dep)) | |
34258 | return true; | |
34259 | ||
34260 | return false; | |
34261 | } | |
34262 | ||
34263 | /* Return the next insn after INSN that is found before TAIL is reached, | |
34264 | skipping any "non-active" insns - insns that will not actually occupy | |
34265 | an issue slot. Return NULL_RTX if such an insn is not found. */ | |
34266 | ||
34267 | static rtx_insn * | |
34268 | get_next_active_insn (rtx_insn *insn, rtx_insn *tail) | |
34269 | { | |
34270 | if (insn == NULL_RTX || insn == tail) | |
34271 | return NULL; | |
34272 | ||
34273 | while (1) | |
34274 | { | |
34275 | insn = NEXT_INSN (insn); | |
34276 | if (insn == NULL_RTX || insn == tail) | |
34277 | return NULL; | |
34278 | ||
34279 | if (CALL_P (insn) | |
34280 | || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) | |
34281 | || (NONJUMP_INSN_P (insn) | |
34282 | && GET_CODE (PATTERN (insn)) != USE | |
34283 | && GET_CODE (PATTERN (insn)) != CLOBBER | |
34284 | && INSN_CODE (insn) != CODE_FOR_stack_tie)) | |
34285 | break; | |
34286 | } | |
34287 | return insn; | |
34288 | } | |
34289 | ||
34290 | /* Do Power9 specific sched_reorder2 reordering of ready list. */ | |
34291 | ||
34292 | static int | |
34293 | power9_sched_reorder2 (rtx_insn **ready, int lastpos) | |
34294 | { | |
34295 | int pos; | |
34296 | int i; | |
34297 | rtx_insn *tmp; | |
34298 | enum attr_type type, type2; | |
34299 | ||
34300 | type = get_attr_type (last_scheduled_insn); | |
34301 | ||
34302 | /* Try to issue fixed point divides back-to-back in pairs so they will be | |
34303 | routed to separate execution units and execute in parallel. */ | |
34304 | if (type == TYPE_DIV && divide_cnt == 0) | |
34305 | { | |
34306 | /* First divide has been scheduled. */ | |
34307 | divide_cnt = 1; | |
34308 | ||
34309 | /* Scan the ready list looking for another divide, if found move it | |
34310 | to the end of the list so it is chosen next. */ | |
34311 | pos = lastpos; | |
34312 | while (pos >= 0) | |
34313 | { | |
34314 | if (recog_memoized (ready[pos]) >= 0 | |
34315 | && get_attr_type (ready[pos]) == TYPE_DIV) | |
34316 | { | |
34317 | tmp = ready[pos]; | |
34318 | for (i = pos; i < lastpos; i++) | |
34319 | ready[i] = ready[i + 1]; | |
34320 | ready[lastpos] = tmp; | |
34321 | break; | |
34322 | } | |
34323 | pos--; | |
34324 | } | |
34325 | } | |
34326 | else | |
34327 | { | |
34328 | /* Last insn was the 2nd divide or not a divide, reset the counter. */ | |
34329 | divide_cnt = 0; | |
34330 | ||
34331 | /* The best dispatch throughput for vector and vector load insns can be | |
34332 | achieved by interleaving a vector and vector load such that they'll | |
34333 | dispatch to the same superslice. If this pairing cannot be achieved | |
34334 | then it is best to pair vector insns together and vector load insns | |
34335 | together. | |
34336 | ||
34337 | To aid in this pairing, vec_pairing maintains the current state with | |
34338 | the following values: | |
34339 | ||
34340 | 0 : Initial state, no vecload/vector pairing has been started. | |
34341 | ||
34342 | 1 : A vecload or vector insn has been issued and a candidate for | |
34343 | pairing has been found and moved to the end of the ready | |
34344 | list. */ | |
34345 | if (type == TYPE_VECLOAD) | |
34346 | { | |
34347 | /* Issued a vecload. */ | |
34348 | if (vec_pairing == 0) | |
34349 | { | |
34350 | int vecload_pos = -1; | |
34351 | /* We issued a single vecload, look for a vector insn to pair it | |
34352 | with. If one isn't found, try to pair another vecload. */ | |
34353 | pos = lastpos; | |
34354 | while (pos >= 0) | |
34355 | { | |
34356 | if (recog_memoized (ready[pos]) >= 0) | |
34357 | { | |
34358 | type2 = get_attr_type (ready[pos]); | |
34359 | if (is_power9_pairable_vec_type (type2)) | |
34360 | { | |
34361 | /* Found a vector insn to pair with, move it to the | |
34362 | end of the ready list so it is scheduled next. */ | |
34363 | tmp = ready[pos]; | |
34364 | for (i = pos; i < lastpos; i++) | |
34365 | ready[i] = ready[i + 1]; | |
34366 | ready[lastpos] = tmp; | |
34367 | vec_pairing = 1; | |
34368 | return cached_can_issue_more; | |
34369 | } | |
34370 | else if (type2 == TYPE_VECLOAD && vecload_pos == -1) | |
34371 | /* Remember position of first vecload seen. */ | |
34372 | vecload_pos = pos; | |
34373 | } | |
34374 | pos--; | |
34375 | } | |
34376 | if (vecload_pos >= 0) | |
34377 | { | |
34378 | /* Didn't find a vector to pair with but did find a vecload, | |
34379 | move it to the end of the ready list. */ | |
34380 | tmp = ready[vecload_pos]; | |
34381 | for (i = vecload_pos; i < lastpos; i++) | |
34382 | ready[i] = ready[i + 1]; | |
34383 | ready[lastpos] = tmp; | |
34384 | vec_pairing = 1; | |
34385 | return cached_can_issue_more; | |
34386 | } | |
34387 | } | |
34388 | } | |
34389 | else if (is_power9_pairable_vec_type (type)) | |
34390 | { | |
34391 | /* Issued a vector operation. */ | |
34392 | if (vec_pairing == 0) | |
34393 | { | |
34394 | int vec_pos = -1; | |
34395 | /* We issued a single vector insn, look for a vecload to pair it | |
34396 | with. If one isn't found, try to pair another vector. */ | |
34397 | pos = lastpos; | |
34398 | while (pos >= 0) | |
34399 | { | |
34400 | if (recog_memoized (ready[pos]) >= 0) | |
34401 | { | |
34402 | type2 = get_attr_type (ready[pos]); | |
34403 | if (type2 == TYPE_VECLOAD) | |
34404 | { | |
34405 | /* Found a vecload insn to pair with, move it to the | |
34406 | end of the ready list so it is scheduled next. */ | |
34407 | tmp = ready[pos]; | |
34408 | for (i = pos; i < lastpos; i++) | |
34409 | ready[i] = ready[i + 1]; | |
34410 | ready[lastpos] = tmp; | |
34411 | vec_pairing = 1; | |
34412 | return cached_can_issue_more; | |
34413 | } | |
34414 | else if (is_power9_pairable_vec_type (type2) | |
34415 | && vec_pos == -1) | |
34416 | /* Remember position of first vector insn seen. */ | |
34417 | vec_pos = pos; | |
34418 | } | |
34419 | pos--; | |
34420 | } | |
34421 | if (vec_pos >= 0) | |
34422 | { | |
34423 | /* Didn't find a vecload to pair with but did find a vector | |
34424 | insn, move it to the end of the ready list. */ | |
34425 | tmp = ready[vec_pos]; | |
34426 | for (i = vec_pos; i < lastpos; i++) | |
34427 | ready[i] = ready[i + 1]; | |
34428 | ready[lastpos] = tmp; | |
34429 | vec_pairing = 1; | |
34430 | return cached_can_issue_more; | |
34431 | } | |
34432 | } | |
34433 | } | |
34434 | ||
34435 | /* We've either finished a vec/vecload pair, couldn't find an insn to | |
34436 | continue the current pair, or the last insn had nothing to do with | |
34437 | with pairing. In any case, reset the state. */ | |
34438 | vec_pairing = 0; | |
34439 | } | |
34440 | ||
34441 | return cached_can_issue_more; | |
34442 | } | |
34443 | ||
34444 | /* We are about to begin issuing insns for this clock cycle. */ | |
34445 | ||
34446 | static int | |
34447 | rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose, | |
34448 | rtx_insn **ready ATTRIBUTE_UNUSED, | |
34449 | int *pn_ready ATTRIBUTE_UNUSED, | |
34450 | int clock_var ATTRIBUTE_UNUSED) | |
34451 | { | |
34452 | int n_ready = *pn_ready; | |
34453 | ||
34454 | if (sched_verbose) | |
34455 | fprintf (dump, "// rs6000_sched_reorder :\n"); | |
34456 | ||
34457 | /* Reorder the ready list, if the second to last ready insn | |
34458 | is a nonepipeline insn. */ | |
34459 | if (rs6000_cpu_attr == CPU_CELL && n_ready > 1) | |
34460 | { | |
34461 | if (is_nonpipeline_insn (ready[n_ready - 1]) | |
34462 | && (recog_memoized (ready[n_ready - 2]) > 0)) | |
34463 | /* Simply swap first two insns. */ | |
34464 | std::swap (ready[n_ready - 1], ready[n_ready - 2]); | |
34465 | } | |
34466 | ||
34467 | if (rs6000_cpu == PROCESSOR_POWER6) | |
34468 | load_store_pendulum = 0; | |
34469 | ||
34470 | return rs6000_issue_rate (); | |
34471 | } | |
34472 | ||
34473 | /* Like rs6000_sched_reorder, but called after issuing each insn. */ | |
34474 | ||
34475 | static int | |
34476 | rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready, | |
34477 | int *pn_ready, int clock_var ATTRIBUTE_UNUSED) | |
34478 | { | |
34479 | if (sched_verbose) | |
34480 | fprintf (dump, "// rs6000_sched_reorder2 :\n"); | |
34481 | ||
34482 | /* For Power6, we need to handle some special cases to try and keep the | |
34483 | store queue from overflowing and triggering expensive flushes. | |
34484 | ||
34485 | This code monitors how load and store instructions are being issued | |
34486 | and skews the ready list one way or the other to increase the likelihood | |
34487 | that a desired instruction is issued at the proper time. | |
34488 | ||
34489 | A couple of things are done. First, we maintain a "load_store_pendulum" | |
34490 | to track the current state of load/store issue. | |
34491 | ||
34492 | - If the pendulum is at zero, then no loads or stores have been | |
34493 | issued in the current cycle so we do nothing. | |
34494 | ||
34495 | - If the pendulum is 1, then a single load has been issued in this | |
34496 | cycle and we attempt to locate another load in the ready list to | |
34497 | issue with it. | |
34498 | ||
34499 | - If the pendulum is -2, then two stores have already been | |
34500 | issued in this cycle, so we increase the priority of the first load | |
34501 | in the ready list to increase it's likelihood of being chosen first | |
34502 | in the next cycle. | |
34503 | ||
34504 | - If the pendulum is -1, then a single store has been issued in this | |
34505 | cycle and we attempt to locate another store in the ready list to | |
34506 | issue with it, preferring a store to an adjacent memory location to | |
34507 | facilitate store pairing in the store queue. | |
34508 | ||
34509 | - If the pendulum is 2, then two loads have already been | |
34510 | issued in this cycle, so we increase the priority of the first store | |
34511 | in the ready list to increase it's likelihood of being chosen first | |
34512 | in the next cycle. | |
34513 | ||
34514 | - If the pendulum < -2 or > 2, then do nothing. | |
34515 | ||
34516 | Note: This code covers the most common scenarios. There exist non | |
34517 | load/store instructions which make use of the LSU and which | |
34518 | would need to be accounted for to strictly model the behavior | |
34519 | of the machine. Those instructions are currently unaccounted | |
34520 | for to help minimize compile time overhead of this code. | |
34521 | */ | |
34522 | if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn) | |
34523 | { | |
34524 | int pos; | |
34525 | int i; | |
34526 | rtx_insn *tmp; | |
34527 | rtx load_mem, str_mem; | |
34528 | ||
34529 | if (is_store_insn (last_scheduled_insn, &str_mem)) | |
34530 | /* Issuing a store, swing the load_store_pendulum to the left */ | |
34531 | load_store_pendulum--; | |
34532 | else if (is_load_insn (last_scheduled_insn, &load_mem)) | |
34533 | /* Issuing a load, swing the load_store_pendulum to the right */ | |
34534 | load_store_pendulum++; | |
34535 | else | |
34536 | return cached_can_issue_more; | |
34537 | ||
34538 | /* If the pendulum is balanced, or there is only one instruction on | |
34539 | the ready list, then all is well, so return. */ | |
34540 | if ((load_store_pendulum == 0) || (*pn_ready <= 1)) | |
34541 | return cached_can_issue_more; | |
34542 | ||
34543 | if (load_store_pendulum == 1) | |
34544 | { | |
34545 | /* A load has been issued in this cycle. Scan the ready list | |
34546 | for another load to issue with it */ | |
34547 | pos = *pn_ready-1; | |
34548 | ||
34549 | while (pos >= 0) | |
34550 | { | |
34551 | if (is_load_insn (ready[pos], &load_mem)) | |
34552 | { | |
34553 | /* Found a load. Move it to the head of the ready list, | |
34554 | and adjust it's priority so that it is more likely to | |
34555 | stay there */ | |
34556 | tmp = ready[pos]; | |
34557 | for (i=pos; i<*pn_ready-1; i++) | |
34558 | ready[i] = ready[i + 1]; | |
34559 | ready[*pn_ready-1] = tmp; | |
34560 | ||
34561 | if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp)) | |
34562 | INSN_PRIORITY (tmp)++; | |
34563 | break; | |
34564 | } | |
34565 | pos--; | |
34566 | } | |
34567 | } | |
34568 | else if (load_store_pendulum == -2) | |
34569 | { | |
34570 | /* Two stores have been issued in this cycle. Increase the | |
34571 | priority of the first load in the ready list to favor it for | |
34572 | issuing in the next cycle. */ | |
34573 | pos = *pn_ready-1; | |
34574 | ||
34575 | while (pos >= 0) | |
34576 | { | |
34577 | if (is_load_insn (ready[pos], &load_mem) | |
34578 | && !sel_sched_p () | |
34579 | && INSN_PRIORITY_KNOWN (ready[pos])) | |
34580 | { | |
34581 | INSN_PRIORITY (ready[pos])++; | |
34582 | ||
34583 | /* Adjust the pendulum to account for the fact that a load | |
34584 | was found and increased in priority. This is to prevent | |
34585 | increasing the priority of multiple loads */ | |
34586 | load_store_pendulum--; | |
34587 | ||
34588 | break; | |
34589 | } | |
34590 | pos--; | |
34591 | } | |
34592 | } | |
34593 | else if (load_store_pendulum == -1) | |
34594 | { | |
34595 | /* A store has been issued in this cycle. Scan the ready list for | |
34596 | another store to issue with it, preferring a store to an adjacent | |
34597 | memory location */ | |
34598 | int first_store_pos = -1; | |
34599 | ||
34600 | pos = *pn_ready-1; | |
34601 | ||
34602 | while (pos >= 0) | |
34603 | { | |
34604 | if (is_store_insn (ready[pos], &str_mem)) | |
34605 | { | |
34606 | rtx str_mem2; | |
34607 | /* Maintain the index of the first store found on the | |
34608 | list */ | |
34609 | if (first_store_pos == -1) | |
34610 | first_store_pos = pos; | |
34611 | ||
34612 | if (is_store_insn (last_scheduled_insn, &str_mem2) | |
34613 | && adjacent_mem_locations (str_mem, str_mem2)) | |
34614 | { | |
34615 | /* Found an adjacent store. Move it to the head of the | |
34616 | ready list, and adjust it's priority so that it is | |
34617 | more likely to stay there */ | |
34618 | tmp = ready[pos]; | |
34619 | for (i=pos; i<*pn_ready-1; i++) | |
34620 | ready[i] = ready[i + 1]; | |
34621 | ready[*pn_ready-1] = tmp; | |
34622 | ||
34623 | if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp)) | |
34624 | INSN_PRIORITY (tmp)++; | |
34625 | ||
34626 | first_store_pos = -1; | |
34627 | ||
34628 | break; | |
34629 | }; | |
34630 | } | |
34631 | pos--; | |
34632 | } | |
34633 | ||
34634 | if (first_store_pos >= 0) | |
34635 | { | |
34636 | /* An adjacent store wasn't found, but a non-adjacent store was, | |
34637 | so move the non-adjacent store to the front of the ready | |
34638 | list, and adjust its priority so that it is more likely to | |
34639 | stay there. */ | |
34640 | tmp = ready[first_store_pos]; | |
34641 | for (i=first_store_pos; i<*pn_ready-1; i++) | |
34642 | ready[i] = ready[i + 1]; | |
34643 | ready[*pn_ready-1] = tmp; | |
34644 | if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp)) | |
34645 | INSN_PRIORITY (tmp)++; | |
34646 | } | |
34647 | } | |
34648 | else if (load_store_pendulum == 2) | |
34649 | { | |
34650 | /* Two loads have been issued in this cycle. Increase the priority | |
34651 | of the first store in the ready list to favor it for issuing in | |
34652 | the next cycle. */ | |
34653 | pos = *pn_ready-1; | |
34654 | ||
34655 | while (pos >= 0) | |
34656 | { | |
34657 | if (is_store_insn (ready[pos], &str_mem) | |
34658 | && !sel_sched_p () | |
34659 | && INSN_PRIORITY_KNOWN (ready[pos])) | |
34660 | { | |
34661 | INSN_PRIORITY (ready[pos])++; | |
34662 | ||
34663 | /* Adjust the pendulum to account for the fact that a store | |
34664 | was found and increased in priority. This is to prevent | |
34665 | increasing the priority of multiple stores */ | |
34666 | load_store_pendulum++; | |
34667 | ||
34668 | break; | |
34669 | } | |
34670 | pos--; | |
34671 | } | |
34672 | } | |
34673 | } | |
34674 | ||
34675 | /* Do Power9 dependent reordering if necessary. */ | |
34676 | if (rs6000_cpu == PROCESSOR_POWER9 && last_scheduled_insn | |
34677 | && recog_memoized (last_scheduled_insn) >= 0) | |
34678 | return power9_sched_reorder2 (ready, *pn_ready - 1); | |
34679 | ||
34680 | return cached_can_issue_more; | |
34681 | } | |
34682 | ||
34683 | /* Return whether the presence of INSN causes a dispatch group termination | |
34684 | of group WHICH_GROUP. | |
34685 | ||
34686 | If WHICH_GROUP == current_group, this function will return true if INSN | |
34687 | causes the termination of the current group (i.e, the dispatch group to | |
34688 | which INSN belongs). This means that INSN will be the last insn in the | |
34689 | group it belongs to. | |
34690 | ||
34691 | If WHICH_GROUP == previous_group, this function will return true if INSN | |
34692 | causes the termination of the previous group (i.e, the dispatch group that | |
34693 | precedes the group to which INSN belongs). This means that INSN will be | |
34694 | the first insn in the group it belongs to). */ | |
34695 | ||
34696 | static bool | |
34697 | insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group) | |
34698 | { | |
34699 | bool first, last; | |
34700 | ||
34701 | if (! insn) | |
34702 | return false; | |
34703 | ||
34704 | first = insn_must_be_first_in_group (insn); | |
34705 | last = insn_must_be_last_in_group (insn); | |
34706 | ||
34707 | if (first && last) | |
34708 | return true; | |
34709 | ||
34710 | if (which_group == current_group) | |
34711 | return last; | |
34712 | else if (which_group == previous_group) | |
34713 | return first; | |
34714 | ||
34715 | return false; | |
34716 | } | |
34717 | ||
34718 | ||
34719 | static bool | |
34720 | insn_must_be_first_in_group (rtx_insn *insn) | |
34721 | { | |
34722 | enum attr_type type; | |
34723 | ||
34724 | if (!insn | |
34725 | || NOTE_P (insn) | |
34726 | || DEBUG_INSN_P (insn) | |
34727 | || GET_CODE (PATTERN (insn)) == USE | |
34728 | || GET_CODE (PATTERN (insn)) == CLOBBER) | |
34729 | return false; | |
34730 | ||
34731 | switch (rs6000_cpu) | |
34732 | { | |
34733 | case PROCESSOR_POWER5: | |
34734 | if (is_cracked_insn (insn)) | |
34735 | return true; | |
34736 | /* FALLTHRU */ | |
34737 | case PROCESSOR_POWER4: | |
34738 | if (is_microcoded_insn (insn)) | |
34739 | return true; | |
34740 | ||
34741 | if (!rs6000_sched_groups) | |
34742 | return false; | |
34743 | ||
34744 | type = get_attr_type (insn); | |
34745 | ||
34746 | switch (type) | |
34747 | { | |
34748 | case TYPE_MFCR: | |
34749 | case TYPE_MFCRF: | |
34750 | case TYPE_MTCR: | |
34751 | case TYPE_DELAYED_CR: | |
34752 | case TYPE_CR_LOGICAL: | |
34753 | case TYPE_MTJMPR: | |
34754 | case TYPE_MFJMPR: | |
34755 | case TYPE_DIV: | |
34756 | case TYPE_LOAD_L: | |
34757 | case TYPE_STORE_C: | |
34758 | case TYPE_ISYNC: | |
34759 | case TYPE_SYNC: | |
34760 | return true; | |
34761 | default: | |
34762 | break; | |
34763 | } | |
34764 | break; | |
34765 | case PROCESSOR_POWER6: | |
34766 | type = get_attr_type (insn); | |
34767 | ||
34768 | switch (type) | |
34769 | { | |
34770 | case TYPE_EXTS: | |
34771 | case TYPE_CNTLZ: | |
34772 | case TYPE_TRAP: | |
34773 | case TYPE_MUL: | |
34774 | case TYPE_INSERT: | |
34775 | case TYPE_FPCOMPARE: | |
34776 | case TYPE_MFCR: | |
34777 | case TYPE_MTCR: | |
34778 | case TYPE_MFJMPR: | |
34779 | case TYPE_MTJMPR: | |
34780 | case TYPE_ISYNC: | |
34781 | case TYPE_SYNC: | |
34782 | case TYPE_LOAD_L: | |
34783 | case TYPE_STORE_C: | |
34784 | return true; | |
34785 | case TYPE_SHIFT: | |
34786 | if (get_attr_dot (insn) == DOT_NO | |
34787 | || get_attr_var_shift (insn) == VAR_SHIFT_NO) | |
34788 | return true; | |
34789 | else | |
34790 | break; | |
34791 | case TYPE_DIV: | |
34792 | if (get_attr_size (insn) == SIZE_32) | |
34793 | return true; | |
34794 | else | |
34795 | break; | |
34796 | case TYPE_LOAD: | |
34797 | case TYPE_STORE: | |
34798 | case TYPE_FPLOAD: | |
34799 | case TYPE_FPSTORE: | |
34800 | if (get_attr_update (insn) == UPDATE_YES) | |
34801 | return true; | |
34802 | else | |
34803 | break; | |
34804 | default: | |
34805 | break; | |
34806 | } | |
34807 | break; | |
34808 | case PROCESSOR_POWER7: | |
34809 | type = get_attr_type (insn); | |
34810 | ||
34811 | switch (type) | |
34812 | { | |
34813 | case TYPE_CR_LOGICAL: | |
34814 | case TYPE_MFCR: | |
34815 | case TYPE_MFCRF: | |
34816 | case TYPE_MTCR: | |
34817 | case TYPE_DIV: | |
34818 | case TYPE_ISYNC: | |
34819 | case TYPE_LOAD_L: | |
34820 | case TYPE_STORE_C: | |
34821 | case TYPE_MFJMPR: | |
34822 | case TYPE_MTJMPR: | |
34823 | return true; | |
34824 | case TYPE_MUL: | |
34825 | case TYPE_SHIFT: | |
34826 | case TYPE_EXTS: | |
34827 | if (get_attr_dot (insn) == DOT_YES) | |
34828 | return true; | |
34829 | else | |
34830 | break; | |
34831 | case TYPE_LOAD: | |
34832 | if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES | |
34833 | || get_attr_update (insn) == UPDATE_YES) | |
34834 | return true; | |
34835 | else | |
34836 | break; | |
34837 | case TYPE_STORE: | |
34838 | case TYPE_FPLOAD: | |
34839 | case TYPE_FPSTORE: | |
34840 | if (get_attr_update (insn) == UPDATE_YES) | |
34841 | return true; | |
34842 | else | |
34843 | break; | |
34844 | default: | |
34845 | break; | |
34846 | } | |
34847 | break; | |
34848 | case PROCESSOR_POWER8: | |
34849 | type = get_attr_type (insn); | |
34850 | ||
34851 | switch (type) | |
34852 | { | |
34853 | case TYPE_CR_LOGICAL: | |
34854 | case TYPE_DELAYED_CR: | |
34855 | case TYPE_MFCR: | |
34856 | case TYPE_MFCRF: | |
34857 | case TYPE_MTCR: | |
34858 | case TYPE_SYNC: | |
34859 | case TYPE_ISYNC: | |
34860 | case TYPE_LOAD_L: | |
34861 | case TYPE_STORE_C: | |
34862 | case TYPE_VECSTORE: | |
34863 | case TYPE_MFJMPR: | |
34864 | case TYPE_MTJMPR: | |
34865 | return true; | |
34866 | case TYPE_SHIFT: | |
34867 | case TYPE_EXTS: | |
34868 | case TYPE_MUL: | |
34869 | if (get_attr_dot (insn) == DOT_YES) | |
34870 | return true; | |
34871 | else | |
34872 | break; | |
34873 | case TYPE_LOAD: | |
34874 | if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES | |
34875 | || get_attr_update (insn) == UPDATE_YES) | |
34876 | return true; | |
34877 | else | |
34878 | break; | |
34879 | case TYPE_STORE: | |
34880 | if (get_attr_update (insn) == UPDATE_YES | |
34881 | && get_attr_indexed (insn) == INDEXED_YES) | |
34882 | return true; | |
34883 | else | |
34884 | break; | |
34885 | default: | |
34886 | break; | |
34887 | } | |
34888 | break; | |
34889 | default: | |
34890 | break; | |
34891 | } | |
34892 | ||
34893 | return false; | |
34894 | } | |
34895 | ||
34896 | static bool | |
34897 | insn_must_be_last_in_group (rtx_insn *insn) | |
34898 | { | |
34899 | enum attr_type type; | |
34900 | ||
34901 | if (!insn | |
34902 | || NOTE_P (insn) | |
34903 | || DEBUG_INSN_P (insn) | |
34904 | || GET_CODE (PATTERN (insn)) == USE | |
34905 | || GET_CODE (PATTERN (insn)) == CLOBBER) | |
34906 | return false; | |
34907 | ||
34908 | switch (rs6000_cpu) { | |
34909 | case PROCESSOR_POWER4: | |
34910 | case PROCESSOR_POWER5: | |
34911 | if (is_microcoded_insn (insn)) | |
34912 | return true; | |
34913 | ||
34914 | if (is_branch_slot_insn (insn)) | |
34915 | return true; | |
34916 | ||
34917 | break; | |
34918 | case PROCESSOR_POWER6: | |
34919 | type = get_attr_type (insn); | |
34920 | ||
34921 | switch (type) | |
34922 | { | |
34923 | case TYPE_EXTS: | |
34924 | case TYPE_CNTLZ: | |
34925 | case TYPE_TRAP: | |
34926 | case TYPE_MUL: | |
34927 | case TYPE_FPCOMPARE: | |
34928 | case TYPE_MFCR: | |
34929 | case TYPE_MTCR: | |
34930 | case TYPE_MFJMPR: | |
34931 | case TYPE_MTJMPR: | |
34932 | case TYPE_ISYNC: | |
34933 | case TYPE_SYNC: | |
34934 | case TYPE_LOAD_L: | |
34935 | case TYPE_STORE_C: | |
34936 | return true; | |
34937 | case TYPE_SHIFT: | |
34938 | if (get_attr_dot (insn) == DOT_NO | |
34939 | || get_attr_var_shift (insn) == VAR_SHIFT_NO) | |
34940 | return true; | |
34941 | else | |
34942 | break; | |
34943 | case TYPE_DIV: | |
34944 | if (get_attr_size (insn) == SIZE_32) | |
34945 | return true; | |
34946 | else | |
34947 | break; | |
34948 | default: | |
34949 | break; | |
34950 | } | |
34951 | break; | |
34952 | case PROCESSOR_POWER7: | |
34953 | type = get_attr_type (insn); | |
34954 | ||
34955 | switch (type) | |
34956 | { | |
34957 | case TYPE_ISYNC: | |
34958 | case TYPE_SYNC: | |
34959 | case TYPE_LOAD_L: | |
34960 | case TYPE_STORE_C: | |
34961 | return true; | |
34962 | case TYPE_LOAD: | |
34963 | if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES | |
34964 | && get_attr_update (insn) == UPDATE_YES) | |
34965 | return true; | |
34966 | else | |
34967 | break; | |
34968 | case TYPE_STORE: | |
34969 | if (get_attr_update (insn) == UPDATE_YES | |
34970 | && get_attr_indexed (insn) == INDEXED_YES) | |
34971 | return true; | |
34972 | else | |
34973 | break; | |
34974 | default: | |
34975 | break; | |
34976 | } | |
34977 | break; | |
34978 | case PROCESSOR_POWER8: | |
34979 | type = get_attr_type (insn); | |
34980 | ||
34981 | switch (type) | |
34982 | { | |
34983 | case TYPE_MFCR: | |
34984 | case TYPE_MTCR: | |
34985 | case TYPE_ISYNC: | |
34986 | case TYPE_SYNC: | |
34987 | case TYPE_LOAD_L: | |
34988 | case TYPE_STORE_C: | |
34989 | return true; | |
34990 | case TYPE_LOAD: | |
34991 | if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES | |
34992 | && get_attr_update (insn) == UPDATE_YES) | |
34993 | return true; | |
34994 | else | |
34995 | break; | |
34996 | case TYPE_STORE: | |
34997 | if (get_attr_update (insn) == UPDATE_YES | |
34998 | && get_attr_indexed (insn) == INDEXED_YES) | |
34999 | return true; | |
35000 | else | |
35001 | break; | |
35002 | default: | |
35003 | break; | |
35004 | } | |
35005 | break; | |
35006 | default: | |
35007 | break; | |
35008 | } | |
35009 | ||
35010 | return false; | |
35011 | } | |
35012 | ||
35013 | /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate | |
35014 | dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */ | |
35015 | ||
35016 | static bool | |
35017 | is_costly_group (rtx *group_insns, rtx next_insn) | |
35018 | { | |
35019 | int i; | |
35020 | int issue_rate = rs6000_issue_rate (); | |
35021 | ||
35022 | for (i = 0; i < issue_rate; i++) | |
35023 | { | |
35024 | sd_iterator_def sd_it; | |
35025 | dep_t dep; | |
35026 | rtx insn = group_insns[i]; | |
35027 | ||
35028 | if (!insn) | |
35029 | continue; | |
35030 | ||
35031 | FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep) | |
35032 | { | |
35033 | rtx next = DEP_CON (dep); | |
35034 | ||
35035 | if (next == next_insn | |
35036 | && rs6000_is_costly_dependence (dep, dep_cost (dep), 0)) | |
35037 | return true; | |
35038 | } | |
35039 | } | |
35040 | ||
35041 | return false; | |
35042 | } | |
35043 | ||
35044 | /* Utility of the function redefine_groups. | |
35045 | Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS | |
35046 | in the same dispatch group. If so, insert nops before NEXT_INSN, in order | |
35047 | to keep it "far" (in a separate group) from GROUP_INSNS, following | |
35048 | one of the following schemes, depending on the value of the flag | |
35049 | -minsert_sched_nops = X: | |
35050 | (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed | |
35051 | in order to force NEXT_INSN into a separate group. | |
35052 | (2) X < sched_finish_regroup_exact: insert exactly X nops. | |
35053 | GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop | |
35054 | insertion (has a group just ended, how many vacant issue slots remain in the | |
35055 | last group, and how many dispatch groups were encountered so far). */ | |
35056 | ||
35057 | static int | |
35058 | force_new_group (int sched_verbose, FILE *dump, rtx *group_insns, | |
35059 | rtx_insn *next_insn, bool *group_end, int can_issue_more, | |
35060 | int *group_count) | |
35061 | { | |
35062 | rtx nop; | |
35063 | bool force; | |
35064 | int issue_rate = rs6000_issue_rate (); | |
35065 | bool end = *group_end; | |
35066 | int i; | |
35067 | ||
35068 | if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn)) | |
35069 | return can_issue_more; | |
35070 | ||
35071 | if (rs6000_sched_insert_nops > sched_finish_regroup_exact) | |
35072 | return can_issue_more; | |
35073 | ||
35074 | force = is_costly_group (group_insns, next_insn); | |
35075 | if (!force) | |
35076 | return can_issue_more; | |
35077 | ||
35078 | if (sched_verbose > 6) | |
35079 | fprintf (dump,"force: group count = %d, can_issue_more = %d\n", | |
35080 | *group_count ,can_issue_more); | |
35081 | ||
35082 | if (rs6000_sched_insert_nops == sched_finish_regroup_exact) | |
35083 | { | |
35084 | if (*group_end) | |
35085 | can_issue_more = 0; | |
35086 | ||
35087 | /* Since only a branch can be issued in the last issue_slot, it is | |
35088 | sufficient to insert 'can_issue_more - 1' nops if next_insn is not | |
35089 | a branch. If next_insn is a branch, we insert 'can_issue_more' nops; | |
35090 | in this case the last nop will start a new group and the branch | |
35091 | will be forced to the new group. */ | |
35092 | if (can_issue_more && !is_branch_slot_insn (next_insn)) | |
35093 | can_issue_more--; | |
35094 | ||
35095 | /* Do we have a special group ending nop? */ | |
35096 | if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7 | |
35097 | || rs6000_cpu_attr == CPU_POWER8) | |
35098 | { | |
35099 | nop = gen_group_ending_nop (); | |
35100 | emit_insn_before (nop, next_insn); | |
35101 | can_issue_more = 0; | |
35102 | } | |
35103 | else | |
35104 | while (can_issue_more > 0) | |
35105 | { | |
35106 | nop = gen_nop (); | |
35107 | emit_insn_before (nop, next_insn); | |
35108 | can_issue_more--; | |
35109 | } | |
35110 | ||
35111 | *group_end = true; | |
35112 | return 0; | |
35113 | } | |
35114 | ||
35115 | if (rs6000_sched_insert_nops < sched_finish_regroup_exact) | |
35116 | { | |
35117 | int n_nops = rs6000_sched_insert_nops; | |
35118 | ||
35119 | /* Nops can't be issued from the branch slot, so the effective | |
35120 | issue_rate for nops is 'issue_rate - 1'. */ | |
35121 | if (can_issue_more == 0) | |
35122 | can_issue_more = issue_rate; | |
35123 | can_issue_more--; | |
35124 | if (can_issue_more == 0) | |
35125 | { | |
35126 | can_issue_more = issue_rate - 1; | |
35127 | (*group_count)++; | |
35128 | end = true; | |
35129 | for (i = 0; i < issue_rate; i++) | |
35130 | { | |
35131 | group_insns[i] = 0; | |
35132 | } | |
35133 | } | |
35134 | ||
35135 | while (n_nops > 0) | |
35136 | { | |
35137 | nop = gen_nop (); | |
35138 | emit_insn_before (nop, next_insn); | |
35139 | if (can_issue_more == issue_rate - 1) /* new group begins */ | |
35140 | end = false; | |
35141 | can_issue_more--; | |
35142 | if (can_issue_more == 0) | |
35143 | { | |
35144 | can_issue_more = issue_rate - 1; | |
35145 | (*group_count)++; | |
35146 | end = true; | |
35147 | for (i = 0; i < issue_rate; i++) | |
35148 | { | |
35149 | group_insns[i] = 0; | |
35150 | } | |
35151 | } | |
35152 | n_nops--; | |
35153 | } | |
35154 | ||
35155 | /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */ | |
35156 | can_issue_more++; | |
35157 | ||
35158 | /* Is next_insn going to start a new group? */ | |
35159 | *group_end | |
35160 | = (end | |
35161 | || (can_issue_more == 1 && !is_branch_slot_insn (next_insn)) | |
35162 | || (can_issue_more <= 2 && is_cracked_insn (next_insn)) | |
35163 | || (can_issue_more < issue_rate && | |
35164 | insn_terminates_group_p (next_insn, previous_group))); | |
35165 | if (*group_end && end) | |
35166 | (*group_count)--; | |
35167 | ||
35168 | if (sched_verbose > 6) | |
35169 | fprintf (dump, "done force: group count = %d, can_issue_more = %d\n", | |
35170 | *group_count, can_issue_more); | |
35171 | return can_issue_more; | |
35172 | } | |
35173 | ||
35174 | return can_issue_more; | |
35175 | } | |
35176 | ||
35177 | /* This function tries to synch the dispatch groups that the compiler "sees" | |
35178 | with the dispatch groups that the processor dispatcher is expected to | |
35179 | form in practice. It tries to achieve this synchronization by forcing the | |
35180 | estimated processor grouping on the compiler (as opposed to the function | |
35181 | 'pad_goups' which tries to force the scheduler's grouping on the processor). | |
35182 | ||
35183 | The function scans the insn sequence between PREV_HEAD_INSN and TAIL and | |
35184 | examines the (estimated) dispatch groups that will be formed by the processor | |
35185 | dispatcher. It marks these group boundaries to reflect the estimated | |
35186 | processor grouping, overriding the grouping that the scheduler had marked. | |
35187 | Depending on the value of the flag '-minsert-sched-nops' this function can | |
35188 | force certain insns into separate groups or force a certain distance between | |
35189 | them by inserting nops, for example, if there exists a "costly dependence" | |
35190 | between the insns. | |
35191 | ||
35192 | The function estimates the group boundaries that the processor will form as | |
35193 | follows: It keeps track of how many vacant issue slots are available after | |
35194 | each insn. A subsequent insn will start a new group if one of the following | |
35195 | 4 cases applies: | |
35196 | - no more vacant issue slots remain in the current dispatch group. | |
35197 | - only the last issue slot, which is the branch slot, is vacant, but the next | |
35198 | insn is not a branch. | |
35199 | - only the last 2 or less issue slots, including the branch slot, are vacant, | |
35200 | which means that a cracked insn (which occupies two issue slots) can't be | |
35201 | issued in this group. | |
35202 | - less than 'issue_rate' slots are vacant, and the next insn always needs to | |
35203 | start a new group. */ | |
35204 | ||
35205 | static int | |
35206 | redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn, | |
35207 | rtx_insn *tail) | |
35208 | { | |
35209 | rtx_insn *insn, *next_insn; | |
35210 | int issue_rate; | |
35211 | int can_issue_more; | |
35212 | int slot, i; | |
35213 | bool group_end; | |
35214 | int group_count = 0; | |
35215 | rtx *group_insns; | |
35216 | ||
35217 | /* Initialize. */ | |
35218 | issue_rate = rs6000_issue_rate (); | |
35219 | group_insns = XALLOCAVEC (rtx, issue_rate); | |
35220 | for (i = 0; i < issue_rate; i++) | |
35221 | { | |
35222 | group_insns[i] = 0; | |
35223 | } | |
35224 | can_issue_more = issue_rate; | |
35225 | slot = 0; | |
35226 | insn = get_next_active_insn (prev_head_insn, tail); | |
35227 | group_end = false; | |
35228 | ||
35229 | while (insn != NULL_RTX) | |
35230 | { | |
35231 | slot = (issue_rate - can_issue_more); | |
35232 | group_insns[slot] = insn; | |
35233 | can_issue_more = | |
35234 | rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more); | |
35235 | if (insn_terminates_group_p (insn, current_group)) | |
35236 | can_issue_more = 0; | |
35237 | ||
35238 | next_insn = get_next_active_insn (insn, tail); | |
35239 | if (next_insn == NULL_RTX) | |
35240 | return group_count + 1; | |
35241 | ||
35242 | /* Is next_insn going to start a new group? */ | |
35243 | group_end | |
35244 | = (can_issue_more == 0 | |
35245 | || (can_issue_more == 1 && !is_branch_slot_insn (next_insn)) | |
35246 | || (can_issue_more <= 2 && is_cracked_insn (next_insn)) | |
35247 | || (can_issue_more < issue_rate && | |
35248 | insn_terminates_group_p (next_insn, previous_group))); | |
35249 | ||
35250 | can_issue_more = force_new_group (sched_verbose, dump, group_insns, | |
35251 | next_insn, &group_end, can_issue_more, | |
35252 | &group_count); | |
35253 | ||
35254 | if (group_end) | |
35255 | { | |
35256 | group_count++; | |
35257 | can_issue_more = 0; | |
35258 | for (i = 0; i < issue_rate; i++) | |
35259 | { | |
35260 | group_insns[i] = 0; | |
35261 | } | |
35262 | } | |
35263 | ||
35264 | if (GET_MODE (next_insn) == TImode && can_issue_more) | |
35265 | PUT_MODE (next_insn, VOIDmode); | |
35266 | else if (!can_issue_more && GET_MODE (next_insn) != TImode) | |
35267 | PUT_MODE (next_insn, TImode); | |
35268 | ||
35269 | insn = next_insn; | |
35270 | if (can_issue_more == 0) | |
35271 | can_issue_more = issue_rate; | |
35272 | } /* while */ | |
35273 | ||
35274 | return group_count; | |
35275 | } | |
35276 | ||
35277 | /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the | |
35278 | dispatch group boundaries that the scheduler had marked. Pad with nops | |
35279 | any dispatch groups which have vacant issue slots, in order to force the | |
35280 | scheduler's grouping on the processor dispatcher. The function | |
35281 | returns the number of dispatch groups found. */ | |
35282 | ||
35283 | static int | |
35284 | pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn, | |
35285 | rtx_insn *tail) | |
35286 | { | |
35287 | rtx_insn *insn, *next_insn; | |
35288 | rtx nop; | |
35289 | int issue_rate; | |
35290 | int can_issue_more; | |
35291 | int group_end; | |
35292 | int group_count = 0; | |
35293 | ||
35294 | /* Initialize issue_rate. */ | |
35295 | issue_rate = rs6000_issue_rate (); | |
35296 | can_issue_more = issue_rate; | |
35297 | ||
35298 | insn = get_next_active_insn (prev_head_insn, tail); | |
35299 | next_insn = get_next_active_insn (insn, tail); | |
35300 | ||
35301 | while (insn != NULL_RTX) | |
35302 | { | |
35303 | can_issue_more = | |
35304 | rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more); | |
35305 | ||
35306 | group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode); | |
35307 | ||
35308 | if (next_insn == NULL_RTX) | |
35309 | break; | |
35310 | ||
35311 | if (group_end) | |
35312 | { | |
35313 | /* If the scheduler had marked group termination at this location | |
35314 | (between insn and next_insn), and neither insn nor next_insn will | |
35315 | force group termination, pad the group with nops to force group | |
35316 | termination. */ | |
35317 | if (can_issue_more | |
35318 | && (rs6000_sched_insert_nops == sched_finish_pad_groups) | |
35319 | && !insn_terminates_group_p (insn, current_group) | |
35320 | && !insn_terminates_group_p (next_insn, previous_group)) | |
35321 | { | |
35322 | if (!is_branch_slot_insn (next_insn)) | |
35323 | can_issue_more--; | |
35324 | ||
35325 | while (can_issue_more) | |
35326 | { | |
35327 | nop = gen_nop (); | |
35328 | emit_insn_before (nop, next_insn); | |
35329 | can_issue_more--; | |
35330 | } | |
35331 | } | |
35332 | ||
35333 | can_issue_more = issue_rate; | |
35334 | group_count++; | |
35335 | } | |
35336 | ||
35337 | insn = next_insn; | |
35338 | next_insn = get_next_active_insn (insn, tail); | |
35339 | } | |
35340 | ||
35341 | return group_count; | |
35342 | } | |
35343 | ||
35344 | /* We're beginning a new block. Initialize data structures as necessary. */ | |
35345 | ||
35346 | static void | |
35347 | rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED, | |
35348 | int sched_verbose ATTRIBUTE_UNUSED, | |
35349 | int max_ready ATTRIBUTE_UNUSED) | |
35350 | { | |
35351 | last_scheduled_insn = NULL; | |
35352 | load_store_pendulum = 0; | |
35353 | divide_cnt = 0; | |
35354 | vec_pairing = 0; | |
35355 | } | |
35356 | ||
35357 | /* The following function is called at the end of scheduling BB. | |
35358 | After reload, it inserts nops at insn group bundling. */ | |
35359 | ||
35360 | static void | |
35361 | rs6000_sched_finish (FILE *dump, int sched_verbose) | |
35362 | { | |
35363 | int n_groups; | |
35364 | ||
35365 | if (sched_verbose) | |
35366 | fprintf (dump, "=== Finishing schedule.\n"); | |
35367 | ||
35368 | if (reload_completed && rs6000_sched_groups) | |
35369 | { | |
35370 | /* Do not run sched_finish hook when selective scheduling enabled. */ | |
35371 | if (sel_sched_p ()) | |
35372 | return; | |
35373 | ||
35374 | if (rs6000_sched_insert_nops == sched_finish_none) | |
35375 | return; | |
35376 | ||
35377 | if (rs6000_sched_insert_nops == sched_finish_pad_groups) | |
35378 | n_groups = pad_groups (dump, sched_verbose, | |
35379 | current_sched_info->prev_head, | |
35380 | current_sched_info->next_tail); | |
35381 | else | |
35382 | n_groups = redefine_groups (dump, sched_verbose, | |
35383 | current_sched_info->prev_head, | |
35384 | current_sched_info->next_tail); | |
35385 | ||
35386 | if (sched_verbose >= 6) | |
35387 | { | |
35388 | fprintf (dump, "ngroups = %d\n", n_groups); | |
35389 | print_rtl (dump, current_sched_info->prev_head); | |
35390 | fprintf (dump, "Done finish_sched\n"); | |
35391 | } | |
35392 | } | |
35393 | } | |
35394 | ||
35395 | struct rs6000_sched_context | |
35396 | { | |
35397 | short cached_can_issue_more; | |
35398 | rtx_insn *last_scheduled_insn; | |
35399 | int load_store_pendulum; | |
35400 | int divide_cnt; | |
35401 | int vec_pairing; | |
35402 | }; | |
35403 | ||
35404 | typedef struct rs6000_sched_context rs6000_sched_context_def; | |
35405 | typedef rs6000_sched_context_def *rs6000_sched_context_t; | |
35406 | ||
35407 | /* Allocate store for new scheduling context. */ | |
35408 | static void * | |
35409 | rs6000_alloc_sched_context (void) | |
35410 | { | |
35411 | return xmalloc (sizeof (rs6000_sched_context_def)); | |
35412 | } | |
35413 | ||
35414 | /* If CLEAN_P is true then initializes _SC with clean data, | |
35415 | and from the global context otherwise. */ | |
35416 | static void | |
35417 | rs6000_init_sched_context (void *_sc, bool clean_p) | |
35418 | { | |
35419 | rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc; | |
35420 | ||
35421 | if (clean_p) | |
35422 | { | |
35423 | sc->cached_can_issue_more = 0; | |
35424 | sc->last_scheduled_insn = NULL; | |
35425 | sc->load_store_pendulum = 0; | |
35426 | sc->divide_cnt = 0; | |
35427 | sc->vec_pairing = 0; | |
35428 | } | |
35429 | else | |
35430 | { | |
35431 | sc->cached_can_issue_more = cached_can_issue_more; | |
35432 | sc->last_scheduled_insn = last_scheduled_insn; | |
35433 | sc->load_store_pendulum = load_store_pendulum; | |
35434 | sc->divide_cnt = divide_cnt; | |
35435 | sc->vec_pairing = vec_pairing; | |
35436 | } | |
35437 | } | |
35438 | ||
35439 | /* Sets the global scheduling context to the one pointed to by _SC. */ | |
35440 | static void | |
35441 | rs6000_set_sched_context (void *_sc) | |
35442 | { | |
35443 | rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc; | |
35444 | ||
35445 | gcc_assert (sc != NULL); | |
35446 | ||
35447 | cached_can_issue_more = sc->cached_can_issue_more; | |
35448 | last_scheduled_insn = sc->last_scheduled_insn; | |
35449 | load_store_pendulum = sc->load_store_pendulum; | |
35450 | divide_cnt = sc->divide_cnt; | |
35451 | vec_pairing = sc->vec_pairing; | |
35452 | } | |
35453 | ||
35454 | /* Free _SC. */ | |
35455 | static void | |
35456 | rs6000_free_sched_context (void *_sc) | |
35457 | { | |
35458 | gcc_assert (_sc != NULL); | |
35459 | ||
35460 | free (_sc); | |
35461 | } | |
35462 | ||
35463 | static bool | |
35464 | rs6000_sched_can_speculate_insn (rtx_insn *insn) | |
35465 | { | |
35466 | switch (get_attr_type (insn)) | |
35467 | { | |
35468 | case TYPE_DIV: | |
35469 | case TYPE_SDIV: | |
35470 | case TYPE_DDIV: | |
35471 | case TYPE_VECDIV: | |
35472 | case TYPE_SSQRT: | |
35473 | case TYPE_DSQRT: | |
35474 | return false; | |
35475 | ||
35476 | default: | |
35477 | return true; | |
35478 | } | |
35479 | } | |
35480 | \f | |
35481 | /* Length in units of the trampoline for entering a nested function. */ | |
35482 | ||
35483 | int | |
35484 | rs6000_trampoline_size (void) | |
35485 | { | |
35486 | int ret = 0; | |
35487 | ||
35488 | switch (DEFAULT_ABI) | |
35489 | { | |
35490 | default: | |
35491 | gcc_unreachable (); | |
35492 | ||
35493 | case ABI_AIX: | |
35494 | ret = (TARGET_32BIT) ? 12 : 24; | |
35495 | break; | |
35496 | ||
35497 | case ABI_ELFv2: | |
35498 | gcc_assert (!TARGET_32BIT); | |
35499 | ret = 32; | |
35500 | break; | |
35501 | ||
35502 | case ABI_DARWIN: | |
35503 | case ABI_V4: | |
35504 | ret = (TARGET_32BIT) ? 40 : 48; | |
35505 | break; | |
35506 | } | |
35507 | ||
35508 | return ret; | |
35509 | } | |
35510 | ||
35511 | /* Emit RTL insns to initialize the variable parts of a trampoline. | |
35512 | FNADDR is an RTX for the address of the function's pure code. | |
35513 | CXT is an RTX for the static chain value for the function. */ | |
35514 | ||
35515 | static void | |
35516 | rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt) | |
35517 | { | |
35518 | int regsize = (TARGET_32BIT) ? 4 : 8; | |
35519 | rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); | |
35520 | rtx ctx_reg = force_reg (Pmode, cxt); | |
35521 | rtx addr = force_reg (Pmode, XEXP (m_tramp, 0)); | |
35522 | ||
35523 | switch (DEFAULT_ABI) | |
35524 | { | |
35525 | default: | |
35526 | gcc_unreachable (); | |
35527 | ||
35528 | /* Under AIX, just build the 3 word function descriptor */ | |
35529 | case ABI_AIX: | |
35530 | { | |
35531 | rtx fnmem, fn_reg, toc_reg; | |
35532 | ||
35533 | if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS) | |
35534 | error ("You cannot take the address of a nested function if you use " | |
35535 | "the -mno-pointers-to-nested-functions option."); | |
35536 | ||
35537 | fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr)); | |
35538 | fn_reg = gen_reg_rtx (Pmode); | |
35539 | toc_reg = gen_reg_rtx (Pmode); | |
35540 | ||
35541 | /* Macro to shorten the code expansions below. */ | |
35542 | # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET) | |
35543 | ||
35544 | m_tramp = replace_equiv_address (m_tramp, addr); | |
35545 | ||
35546 | emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0)); | |
35547 | emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize)); | |
35548 | emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg); | |
35549 | emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg); | |
35550 | emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg); | |
35551 | ||
35552 | # undef MEM_PLUS | |
35553 | } | |
35554 | break; | |
35555 | ||
35556 | /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */ | |
35557 | case ABI_ELFv2: | |
35558 | case ABI_DARWIN: | |
35559 | case ABI_V4: | |
35560 | emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"), | |
9e9e5c15 | 35561 | LCT_NORMAL, VOIDmode, |
01e91138 | 35562 | addr, Pmode, |
35563 | GEN_INT (rs6000_trampoline_size ()), SImode, | |
35564 | fnaddr, Pmode, | |
35565 | ctx_reg, Pmode); | |
35566 | break; | |
35567 | } | |
35568 | } | |
35569 | ||
35570 | \f | |
35571 | /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain | |
35572 | identifier as an argument, so the front end shouldn't look it up. */ | |
35573 | ||
35574 | static bool | |
35575 | rs6000_attribute_takes_identifier_p (const_tree attr_id) | |
35576 | { | |
35577 | return is_attribute_p ("altivec", attr_id); | |
35578 | } | |
35579 | ||
35580 | /* Handle the "altivec" attribute. The attribute may have | |
35581 | arguments as follows: | |
35582 | ||
35583 | __attribute__((altivec(vector__))) | |
35584 | __attribute__((altivec(pixel__))) (always followed by 'unsigned short') | |
35585 | __attribute__((altivec(bool__))) (always followed by 'unsigned') | |
35586 | ||
35587 | and may appear more than once (e.g., 'vector bool char') in a | |
35588 | given declaration. */ | |
35589 | ||
35590 | static tree | |
35591 | rs6000_handle_altivec_attribute (tree *node, | |
35592 | tree name ATTRIBUTE_UNUSED, | |
35593 | tree args, | |
35594 | int flags ATTRIBUTE_UNUSED, | |
35595 | bool *no_add_attrs) | |
35596 | { | |
35597 | tree type = *node, result = NULL_TREE; | |
35598 | machine_mode mode; | |
35599 | int unsigned_p; | |
35600 | char altivec_type | |
35601 | = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args) | |
35602 | && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE) | |
35603 | ? *IDENTIFIER_POINTER (TREE_VALUE (args)) | |
35604 | : '?'); | |
35605 | ||
35606 | while (POINTER_TYPE_P (type) | |
35607 | || TREE_CODE (type) == FUNCTION_TYPE | |
35608 | || TREE_CODE (type) == METHOD_TYPE | |
35609 | || TREE_CODE (type) == ARRAY_TYPE) | |
35610 | type = TREE_TYPE (type); | |
35611 | ||
35612 | mode = TYPE_MODE (type); | |
35613 | ||
35614 | /* Check for invalid AltiVec type qualifiers. */ | |
35615 | if (type == long_double_type_node) | |
35616 | error ("use of %<long double%> in AltiVec types is invalid"); | |
35617 | else if (type == boolean_type_node) | |
35618 | error ("use of boolean types in AltiVec types is invalid"); | |
35619 | else if (TREE_CODE (type) == COMPLEX_TYPE) | |
35620 | error ("use of %<complex%> in AltiVec types is invalid"); | |
35621 | else if (DECIMAL_FLOAT_MODE_P (mode)) | |
35622 | error ("use of decimal floating point types in AltiVec types is invalid"); | |
35623 | else if (!TARGET_VSX) | |
35624 | { | |
35625 | if (type == long_unsigned_type_node || type == long_integer_type_node) | |
35626 | { | |
35627 | if (TARGET_64BIT) | |
35628 | error ("use of %<long%> in AltiVec types is invalid for " | |
35629 | "64-bit code without -mvsx"); | |
35630 | else if (rs6000_warn_altivec_long) | |
35631 | warning (0, "use of %<long%> in AltiVec types is deprecated; " | |
35632 | "use %<int%>"); | |
35633 | } | |
35634 | else if (type == long_long_unsigned_type_node | |
35635 | || type == long_long_integer_type_node) | |
35636 | error ("use of %<long long%> in AltiVec types is invalid without " | |
35637 | "-mvsx"); | |
35638 | else if (type == double_type_node) | |
35639 | error ("use of %<double%> in AltiVec types is invalid without -mvsx"); | |
35640 | } | |
35641 | ||
35642 | switch (altivec_type) | |
35643 | { | |
35644 | case 'v': | |
35645 | unsigned_p = TYPE_UNSIGNED (type); | |
35646 | switch (mode) | |
35647 | { | |
916ace94 | 35648 | case E_TImode: |
01e91138 | 35649 | result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node); |
35650 | break; | |
916ace94 | 35651 | case E_DImode: |
01e91138 | 35652 | result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node); |
35653 | break; | |
916ace94 | 35654 | case E_SImode: |
01e91138 | 35655 | result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node); |
35656 | break; | |
916ace94 | 35657 | case E_HImode: |
01e91138 | 35658 | result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node); |
35659 | break; | |
916ace94 | 35660 | case E_QImode: |
01e91138 | 35661 | result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node); |
35662 | break; | |
916ace94 | 35663 | case E_SFmode: result = V4SF_type_node; break; |
35664 | case E_DFmode: result = V2DF_type_node; break; | |
01e91138 | 35665 | /* If the user says 'vector int bool', we may be handed the 'bool' |
35666 | attribute _before_ the 'vector' attribute, and so select the | |
35667 | proper type in the 'b' case below. */ | |
916ace94 | 35668 | case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode: |
35669 | case E_V2DImode: case E_V2DFmode: | |
01e91138 | 35670 | result = type; |
35671 | default: break; | |
35672 | } | |
35673 | break; | |
35674 | case 'b': | |
35675 | switch (mode) | |
35676 | { | |
916ace94 | 35677 | case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break; |
35678 | case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break; | |
35679 | case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break; | |
35680 | case E_QImode: case E_V16QImode: result = bool_V16QI_type_node; | |
01e91138 | 35681 | default: break; |
35682 | } | |
35683 | break; | |
35684 | case 'p': | |
35685 | switch (mode) | |
35686 | { | |
916ace94 | 35687 | case E_V8HImode: result = pixel_V8HI_type_node; |
01e91138 | 35688 | default: break; |
35689 | } | |
35690 | default: break; | |
35691 | } | |
35692 | ||
35693 | /* Propagate qualifiers attached to the element type | |
35694 | onto the vector type. */ | |
35695 | if (result && result != type && TYPE_QUALS (type)) | |
35696 | result = build_qualified_type (result, TYPE_QUALS (type)); | |
35697 | ||
35698 | *no_add_attrs = true; /* No need to hang on to the attribute. */ | |
35699 | ||
35700 | if (result) | |
35701 | *node = lang_hooks.types.reconstruct_complex_type (*node, result); | |
35702 | ||
35703 | return NULL_TREE; | |
35704 | } | |
35705 | ||
35706 | /* AltiVec defines four built-in scalar types that serve as vector | |
35707 | elements; we must teach the compiler how to mangle them. */ | |
35708 | ||
35709 | static const char * | |
35710 | rs6000_mangle_type (const_tree type) | |
35711 | { | |
35712 | type = TYPE_MAIN_VARIANT (type); | |
35713 | ||
35714 | if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE | |
35715 | && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE) | |
35716 | return NULL; | |
35717 | ||
35718 | if (type == bool_char_type_node) return "U6__boolc"; | |
35719 | if (type == bool_short_type_node) return "U6__bools"; | |
35720 | if (type == pixel_type_node) return "u7__pixel"; | |
35721 | if (type == bool_int_type_node) return "U6__booli"; | |
35722 | if (type == bool_long_type_node) return "U6__booll"; | |
35723 | ||
35724 | /* Use a unique name for __float128 rather than trying to use "e" or "g". Use | |
35725 | "g" for IBM extended double, no matter whether it is long double (using | |
35726 | -mabi=ibmlongdouble) or the distinct __ibm128 type. */ | |
35727 | if (TARGET_FLOAT128_TYPE) | |
35728 | { | |
35729 | if (type == ieee128_float_type_node) | |
35730 | return "U10__float128"; | |
35731 | ||
35732 | if (type == ibm128_float_type_node) | |
35733 | return "g"; | |
35734 | ||
35735 | if (type == long_double_type_node && TARGET_LONG_DOUBLE_128) | |
35736 | return (TARGET_IEEEQUAD) ? "U10__float128" : "g"; | |
35737 | } | |
35738 | ||
35739 | /* Mangle IBM extended float long double as `g' (__float128) on | |
35740 | powerpc*-linux where long-double-64 previously was the default. */ | |
35741 | if (TYPE_MAIN_VARIANT (type) == long_double_type_node | |
35742 | && TARGET_ELF | |
35743 | && TARGET_LONG_DOUBLE_128 | |
35744 | && !TARGET_IEEEQUAD) | |
35745 | return "g"; | |
35746 | ||
35747 | /* For all other types, use normal C++ mangling. */ | |
35748 | return NULL; | |
35749 | } | |
35750 | ||
35751 | /* Handle a "longcall" or "shortcall" attribute; arguments as in | |
35752 | struct attribute_spec.handler. */ | |
35753 | ||
35754 | static tree | |
35755 | rs6000_handle_longcall_attribute (tree *node, tree name, | |
35756 | tree args ATTRIBUTE_UNUSED, | |
35757 | int flags ATTRIBUTE_UNUSED, | |
35758 | bool *no_add_attrs) | |
35759 | { | |
35760 | if (TREE_CODE (*node) != FUNCTION_TYPE | |
35761 | && TREE_CODE (*node) != FIELD_DECL | |
35762 | && TREE_CODE (*node) != TYPE_DECL) | |
35763 | { | |
35764 | warning (OPT_Wattributes, "%qE attribute only applies to functions", | |
35765 | name); | |
35766 | *no_add_attrs = true; | |
35767 | } | |
35768 | ||
35769 | return NULL_TREE; | |
35770 | } | |
35771 | ||
35772 | /* Set longcall attributes on all functions declared when | |
35773 | rs6000_default_long_calls is true. */ | |
35774 | static void | |
35775 | rs6000_set_default_type_attributes (tree type) | |
35776 | { | |
35777 | if (rs6000_default_long_calls | |
35778 | && (TREE_CODE (type) == FUNCTION_TYPE | |
35779 | || TREE_CODE (type) == METHOD_TYPE)) | |
35780 | TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"), | |
35781 | NULL_TREE, | |
35782 | TYPE_ATTRIBUTES (type)); | |
35783 | ||
35784 | #if TARGET_MACHO | |
35785 | darwin_set_default_type_attributes (type); | |
35786 | #endif | |
35787 | } | |
35788 | ||
35789 | /* Return a reference suitable for calling a function with the | |
35790 | longcall attribute. */ | |
35791 | ||
35792 | rtx | |
35793 | rs6000_longcall_ref (rtx call_ref) | |
35794 | { | |
35795 | const char *call_name; | |
35796 | tree node; | |
35797 | ||
35798 | if (GET_CODE (call_ref) != SYMBOL_REF) | |
35799 | return call_ref; | |
35800 | ||
35801 | /* System V adds '.' to the internal name, so skip them. */ | |
35802 | call_name = XSTR (call_ref, 0); | |
35803 | if (*call_name == '.') | |
35804 | { | |
35805 | while (*call_name == '.') | |
35806 | call_name++; | |
35807 | ||
35808 | node = get_identifier (call_name); | |
35809 | call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node)); | |
35810 | } | |
35811 | ||
35812 | return force_reg (Pmode, call_ref); | |
35813 | } | |
35814 | \f | |
35815 | #ifndef TARGET_USE_MS_BITFIELD_LAYOUT | |
35816 | #define TARGET_USE_MS_BITFIELD_LAYOUT 0 | |
35817 | #endif | |
35818 | ||
35819 | /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in | |
35820 | struct attribute_spec.handler. */ | |
35821 | static tree | |
35822 | rs6000_handle_struct_attribute (tree *node, tree name, | |
35823 | tree args ATTRIBUTE_UNUSED, | |
35824 | int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) | |
35825 | { | |
35826 | tree *type = NULL; | |
35827 | if (DECL_P (*node)) | |
35828 | { | |
35829 | if (TREE_CODE (*node) == TYPE_DECL) | |
35830 | type = &TREE_TYPE (*node); | |
35831 | } | |
35832 | else | |
35833 | type = node; | |
35834 | ||
35835 | if (!(type && (TREE_CODE (*type) == RECORD_TYPE | |
35836 | || TREE_CODE (*type) == UNION_TYPE))) | |
35837 | { | |
35838 | warning (OPT_Wattributes, "%qE attribute ignored", name); | |
35839 | *no_add_attrs = true; | |
35840 | } | |
35841 | ||
35842 | else if ((is_attribute_p ("ms_struct", name) | |
35843 | && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type))) | |
35844 | || ((is_attribute_p ("gcc_struct", name) | |
35845 | && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type))))) | |
35846 | { | |
35847 | warning (OPT_Wattributes, "%qE incompatible attribute ignored", | |
35848 | name); | |
35849 | *no_add_attrs = true; | |
35850 | } | |
35851 | ||
35852 | return NULL_TREE; | |
35853 | } | |
35854 | ||
35855 | static bool | |
35856 | rs6000_ms_bitfield_layout_p (const_tree record_type) | |
35857 | { | |
35858 | return (TARGET_USE_MS_BITFIELD_LAYOUT && | |
35859 | !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type))) | |
35860 | || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)); | |
35861 | } | |
35862 | \f | |
35863 | #ifdef USING_ELFOS_H | |
35864 | ||
35865 | /* A get_unnamed_section callback, used for switching to toc_section. */ | |
35866 | ||
35867 | static void | |
35868 | rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED) | |
35869 | { | |
35870 | if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) | |
35871 | && TARGET_MINIMAL_TOC) | |
35872 | { | |
35873 | if (!toc_initialized) | |
35874 | { | |
35875 | fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP); | |
35876 | ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2); | |
35877 | (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0); | |
35878 | fprintf (asm_out_file, "\t.tc "); | |
35879 | ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],"); | |
35880 | ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1"); | |
35881 | fprintf (asm_out_file, "\n"); | |
35882 | ||
35883 | fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP); | |
35884 | ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2); | |
35885 | ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1"); | |
35886 | fprintf (asm_out_file, " = .+32768\n"); | |
35887 | toc_initialized = 1; | |
35888 | } | |
35889 | else | |
35890 | fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP); | |
35891 | } | |
35892 | else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) | |
35893 | { | |
35894 | fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP); | |
35895 | if (!toc_initialized) | |
35896 | { | |
35897 | ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2); | |
35898 | toc_initialized = 1; | |
35899 | } | |
35900 | } | |
35901 | else | |
35902 | { | |
35903 | fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP); | |
35904 | if (!toc_initialized) | |
35905 | { | |
35906 | ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2); | |
35907 | ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1"); | |
35908 | fprintf (asm_out_file, " = .+32768\n"); | |
35909 | toc_initialized = 1; | |
35910 | } | |
35911 | } | |
35912 | } | |
35913 | ||
35914 | /* Implement TARGET_ASM_INIT_SECTIONS. */ | |
35915 | ||
35916 | static void | |
35917 | rs6000_elf_asm_init_sections (void) | |
35918 | { | |
35919 | toc_section | |
35920 | = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL); | |
35921 | ||
35922 | sdata2_section | |
35923 | = get_unnamed_section (SECTION_WRITE, output_section_asm_op, | |
35924 | SDATA2_SECTION_ASM_OP); | |
35925 | } | |
35926 | ||
35927 | /* Implement TARGET_SELECT_RTX_SECTION. */ | |
35928 | ||
35929 | static section * | |
35930 | rs6000_elf_select_rtx_section (machine_mode mode, rtx x, | |
35931 | unsigned HOST_WIDE_INT align) | |
35932 | { | |
35933 | if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode)) | |
35934 | return toc_section; | |
35935 | else | |
35936 | return default_elf_select_rtx_section (mode, x, align); | |
35937 | } | |
35938 | \f | |
35939 | /* For a SYMBOL_REF, set generic flags and then perform some | |
35940 | target-specific processing. | |
35941 | ||
35942 | When the AIX ABI is requested on a non-AIX system, replace the | |
35943 | function name with the real name (with a leading .) rather than the | |
35944 | function descriptor name. This saves a lot of overriding code to | |
35945 | read the prefixes. */ | |
35946 | ||
35947 | static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED; | |
35948 | static void | |
35949 | rs6000_elf_encode_section_info (tree decl, rtx rtl, int first) | |
35950 | { | |
35951 | default_encode_section_info (decl, rtl, first); | |
35952 | ||
35953 | if (first | |
35954 | && TREE_CODE (decl) == FUNCTION_DECL | |
35955 | && !TARGET_AIX | |
35956 | && DEFAULT_ABI == ABI_AIX) | |
35957 | { | |
35958 | rtx sym_ref = XEXP (rtl, 0); | |
35959 | size_t len = strlen (XSTR (sym_ref, 0)); | |
35960 | char *str = XALLOCAVEC (char, len + 2); | |
35961 | str[0] = '.'; | |
35962 | memcpy (str + 1, XSTR (sym_ref, 0), len + 1); | |
35963 | XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1); | |
35964 | } | |
35965 | } | |
35966 | ||
35967 | static inline bool | |
35968 | compare_section_name (const char *section, const char *templ) | |
35969 | { | |
35970 | int len; | |
35971 | ||
35972 | len = strlen (templ); | |
35973 | return (strncmp (section, templ, len) == 0 | |
35974 | && (section[len] == 0 || section[len] == '.')); | |
35975 | } | |
35976 | ||
35977 | bool | |
35978 | rs6000_elf_in_small_data_p (const_tree decl) | |
35979 | { | |
35980 | if (rs6000_sdata == SDATA_NONE) | |
35981 | return false; | |
35982 | ||
35983 | /* We want to merge strings, so we never consider them small data. */ | |
35984 | if (TREE_CODE (decl) == STRING_CST) | |
35985 | return false; | |
35986 | ||
35987 | /* Functions are never in the small data area. */ | |
35988 | if (TREE_CODE (decl) == FUNCTION_DECL) | |
35989 | return false; | |
35990 | ||
35991 | if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl)) | |
35992 | { | |
35993 | const char *section = DECL_SECTION_NAME (decl); | |
35994 | if (compare_section_name (section, ".sdata") | |
35995 | || compare_section_name (section, ".sdata2") | |
35996 | || compare_section_name (section, ".gnu.linkonce.s") | |
35997 | || compare_section_name (section, ".sbss") | |
35998 | || compare_section_name (section, ".sbss2") | |
35999 | || compare_section_name (section, ".gnu.linkonce.sb") | |
36000 | || strcmp (section, ".PPC.EMB.sdata0") == 0 | |
36001 | || strcmp (section, ".PPC.EMB.sbss0") == 0) | |
36002 | return true; | |
36003 | } | |
36004 | else | |
36005 | { | |
36006 | HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl)); | |
36007 | ||
36008 | if (size > 0 | |
36009 | && size <= g_switch_value | |
36010 | /* If it's not public, and we're not going to reference it there, | |
36011 | there's no need to put it in the small data section. */ | |
36012 | && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl))) | |
36013 | return true; | |
36014 | } | |
36015 | ||
36016 | return false; | |
36017 | } | |
36018 | ||
36019 | #endif /* USING_ELFOS_H */ | |
36020 | \f | |
36021 | /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */ | |
36022 | ||
36023 | static bool | |
36024 | rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x) | |
36025 | { | |
36026 | return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode); | |
36027 | } | |
36028 | ||
36029 | /* Do not place thread-local symbols refs in the object blocks. */ | |
36030 | ||
36031 | static bool | |
36032 | rs6000_use_blocks_for_decl_p (const_tree decl) | |
36033 | { | |
36034 | return !DECL_THREAD_LOCAL_P (decl); | |
36035 | } | |
36036 | \f | |
36037 | /* Return a REG that occurs in ADDR with coefficient 1. | |
36038 | ADDR can be effectively incremented by incrementing REG. | |
36039 | ||
36040 | r0 is special and we must not select it as an address | |
36041 | register by this routine since our caller will try to | |
36042 | increment the returned register via an "la" instruction. */ | |
36043 | ||
36044 | rtx | |
36045 | find_addr_reg (rtx addr) | |
36046 | { | |
36047 | while (GET_CODE (addr) == PLUS) | |
36048 | { | |
36049 | if (GET_CODE (XEXP (addr, 0)) == REG | |
36050 | && REGNO (XEXP (addr, 0)) != 0) | |
36051 | addr = XEXP (addr, 0); | |
36052 | else if (GET_CODE (XEXP (addr, 1)) == REG | |
36053 | && REGNO (XEXP (addr, 1)) != 0) | |
36054 | addr = XEXP (addr, 1); | |
36055 | else if (CONSTANT_P (XEXP (addr, 0))) | |
36056 | addr = XEXP (addr, 1); | |
36057 | else if (CONSTANT_P (XEXP (addr, 1))) | |
36058 | addr = XEXP (addr, 0); | |
36059 | else | |
36060 | gcc_unreachable (); | |
36061 | } | |
36062 | gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0); | |
36063 | return addr; | |
36064 | } | |
36065 | ||
36066 | void | |
36067 | rs6000_fatal_bad_address (rtx op) | |
36068 | { | |
36069 | fatal_insn ("bad address", op); | |
36070 | } | |
36071 | ||
36072 | #if TARGET_MACHO | |
36073 | ||
36074 | typedef struct branch_island_d { | |
36075 | tree function_name; | |
36076 | tree label_name; | |
36077 | int line_number; | |
36078 | } branch_island; | |
36079 | ||
36080 | ||
36081 | static vec<branch_island, va_gc> *branch_islands; | |
36082 | ||
36083 | /* Remember to generate a branch island for far calls to the given | |
36084 | function. */ | |
36085 | ||
36086 | static void | |
36087 | add_compiler_branch_island (tree label_name, tree function_name, | |
36088 | int line_number) | |
36089 | { | |
36090 | branch_island bi = {function_name, label_name, line_number}; | |
36091 | vec_safe_push (branch_islands, bi); | |
36092 | } | |
36093 | ||
36094 | /* Generate far-jump branch islands for everything recorded in | |
36095 | branch_islands. Invoked immediately after the last instruction of | |
36096 | the epilogue has been emitted; the branch islands must be appended | |
36097 | to, and contiguous with, the function body. Mach-O stubs are | |
36098 | generated in machopic_output_stub(). */ | |
36099 | ||
36100 | static void | |
36101 | macho_branch_islands (void) | |
36102 | { | |
36103 | char tmp_buf[512]; | |
36104 | ||
36105 | while (!vec_safe_is_empty (branch_islands)) | |
36106 | { | |
36107 | branch_island *bi = &branch_islands->last (); | |
36108 | const char *label = IDENTIFIER_POINTER (bi->label_name); | |
36109 | const char *name = IDENTIFIER_POINTER (bi->function_name); | |
36110 | char name_buf[512]; | |
36111 | /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */ | |
36112 | if (name[0] == '*' || name[0] == '&') | |
36113 | strcpy (name_buf, name+1); | |
36114 | else | |
36115 | { | |
36116 | name_buf[0] = '_'; | |
36117 | strcpy (name_buf+1, name); | |
36118 | } | |
36119 | strcpy (tmp_buf, "\n"); | |
36120 | strcat (tmp_buf, label); | |
36121 | #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO) | |
36122 | if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG) | |
36123 | dbxout_stabd (N_SLINE, bi->line_number); | |
36124 | #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */ | |
36125 | if (flag_pic) | |
36126 | { | |
36127 | if (TARGET_LINK_STACK) | |
36128 | { | |
36129 | char name[32]; | |
36130 | get_ppc476_thunk_name (name); | |
36131 | strcat (tmp_buf, ":\n\tmflr r0\n\tbl "); | |
36132 | strcat (tmp_buf, name); | |
36133 | strcat (tmp_buf, "\n"); | |
36134 | strcat (tmp_buf, label); | |
36135 | strcat (tmp_buf, "_pic:\n\tmflr r11\n"); | |
36136 | } | |
36137 | else | |
36138 | { | |
36139 | strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,"); | |
36140 | strcat (tmp_buf, label); | |
36141 | strcat (tmp_buf, "_pic\n"); | |
36142 | strcat (tmp_buf, label); | |
36143 | strcat (tmp_buf, "_pic:\n\tmflr r11\n"); | |
36144 | } | |
36145 | ||
36146 | strcat (tmp_buf, "\taddis r11,r11,ha16("); | |
36147 | strcat (tmp_buf, name_buf); | |
36148 | strcat (tmp_buf, " - "); | |
36149 | strcat (tmp_buf, label); | |
36150 | strcat (tmp_buf, "_pic)\n"); | |
36151 | ||
36152 | strcat (tmp_buf, "\tmtlr r0\n"); | |
36153 | ||
36154 | strcat (tmp_buf, "\taddi r12,r11,lo16("); | |
36155 | strcat (tmp_buf, name_buf); | |
36156 | strcat (tmp_buf, " - "); | |
36157 | strcat (tmp_buf, label); | |
36158 | strcat (tmp_buf, "_pic)\n"); | |
36159 | ||
36160 | strcat (tmp_buf, "\tmtctr r12\n\tbctr\n"); | |
36161 | } | |
36162 | else | |
36163 | { | |
36164 | strcat (tmp_buf, ":\nlis r12,hi16("); | |
36165 | strcat (tmp_buf, name_buf); | |
36166 | strcat (tmp_buf, ")\n\tori r12,r12,lo16("); | |
36167 | strcat (tmp_buf, name_buf); | |
36168 | strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr"); | |
36169 | } | |
36170 | output_asm_insn (tmp_buf, 0); | |
36171 | #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO) | |
36172 | if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG) | |
36173 | dbxout_stabd (N_SLINE, bi->line_number); | |
36174 | #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */ | |
36175 | branch_islands->pop (); | |
36176 | } | |
36177 | } | |
36178 | ||
36179 | /* NO_PREVIOUS_DEF checks in the link list whether the function name is | |
36180 | already there or not. */ | |
36181 | ||
36182 | static int | |
36183 | no_previous_def (tree function_name) | |
36184 | { | |
36185 | branch_island *bi; | |
36186 | unsigned ix; | |
36187 | ||
36188 | FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi) | |
36189 | if (function_name == bi->function_name) | |
36190 | return 0; | |
36191 | return 1; | |
36192 | } | |
36193 | ||
36194 | /* GET_PREV_LABEL gets the label name from the previous definition of | |
36195 | the function. */ | |
36196 | ||
36197 | static tree | |
36198 | get_prev_label (tree function_name) | |
36199 | { | |
36200 | branch_island *bi; | |
36201 | unsigned ix; | |
36202 | ||
36203 | FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi) | |
36204 | if (function_name == bi->function_name) | |
36205 | return bi->label_name; | |
36206 | return NULL_TREE; | |
36207 | } | |
36208 | ||
36209 | /* INSN is either a function call or a millicode call. It may have an | |
36210 | unconditional jump in its delay slot. | |
36211 | ||
36212 | CALL_DEST is the routine we are calling. */ | |
36213 | ||
36214 | char * | |
36215 | output_call (rtx_insn *insn, rtx *operands, int dest_operand_number, | |
36216 | int cookie_operand_number) | |
36217 | { | |
36218 | static char buf[256]; | |
36219 | if (darwin_emit_branch_islands | |
36220 | && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF | |
36221 | && (INTVAL (operands[cookie_operand_number]) & CALL_LONG)) | |
36222 | { | |
36223 | tree labelname; | |
36224 | tree funname = get_identifier (XSTR (operands[dest_operand_number], 0)); | |
36225 | ||
36226 | if (no_previous_def (funname)) | |
36227 | { | |
36228 | rtx label_rtx = gen_label_rtx (); | |
36229 | char *label_buf, temp_buf[256]; | |
36230 | ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L", | |
36231 | CODE_LABEL_NUMBER (label_rtx)); | |
36232 | label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf; | |
36233 | labelname = get_identifier (label_buf); | |
36234 | add_compiler_branch_island (labelname, funname, insn_line (insn)); | |
36235 | } | |
36236 | else | |
36237 | labelname = get_prev_label (funname); | |
36238 | ||
36239 | /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl' | |
36240 | instruction will reach 'foo', otherwise link as 'bl L42'". | |
36241 | "L42" should be a 'branch island', that will do a far jump to | |
36242 | 'foo'. Branch islands are generated in | |
36243 | macho_branch_islands(). */ | |
36244 | sprintf (buf, "jbsr %%z%d,%.246s", | |
36245 | dest_operand_number, IDENTIFIER_POINTER (labelname)); | |
36246 | } | |
36247 | else | |
36248 | sprintf (buf, "bl %%z%d", dest_operand_number); | |
36249 | return buf; | |
36250 | } | |
36251 | ||
36252 | /* Generate PIC and indirect symbol stubs. */ | |
36253 | ||
36254 | void | |
36255 | machopic_output_stub (FILE *file, const char *symb, const char *stub) | |
36256 | { | |
36257 | unsigned int length; | |
36258 | char *symbol_name, *lazy_ptr_name; | |
36259 | char *local_label_0; | |
36260 | static int label = 0; | |
36261 | ||
36262 | /* Lose our funky encoding stuff so it doesn't contaminate the stub. */ | |
36263 | symb = (*targetm.strip_name_encoding) (symb); | |
36264 | ||
36265 | ||
36266 | length = strlen (symb); | |
36267 | symbol_name = XALLOCAVEC (char, length + 32); | |
36268 | GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length); | |
36269 | ||
36270 | lazy_ptr_name = XALLOCAVEC (char, length + 32); | |
36271 | GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length); | |
36272 | ||
36273 | if (flag_pic == 2) | |
36274 | switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]); | |
36275 | else | |
36276 | switch_to_section (darwin_sections[machopic_symbol_stub1_section]); | |
36277 | ||
36278 | if (flag_pic == 2) | |
36279 | { | |
36280 | fprintf (file, "\t.align 5\n"); | |
36281 | ||
36282 | fprintf (file, "%s:\n", stub); | |
36283 | fprintf (file, "\t.indirect_symbol %s\n", symbol_name); | |
36284 | ||
36285 | label++; | |
36286 | local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\"")); | |
36287 | sprintf (local_label_0, "\"L%011d$spb\"", label); | |
36288 | ||
36289 | fprintf (file, "\tmflr r0\n"); | |
36290 | if (TARGET_LINK_STACK) | |
36291 | { | |
36292 | char name[32]; | |
36293 | get_ppc476_thunk_name (name); | |
36294 | fprintf (file, "\tbl %s\n", name); | |
36295 | fprintf (file, "%s:\n\tmflr r11\n", local_label_0); | |
36296 | } | |
36297 | else | |
36298 | { | |
36299 | fprintf (file, "\tbcl 20,31,%s\n", local_label_0); | |
36300 | fprintf (file, "%s:\n\tmflr r11\n", local_label_0); | |
36301 | } | |
36302 | fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n", | |
36303 | lazy_ptr_name, local_label_0); | |
36304 | fprintf (file, "\tmtlr r0\n"); | |
36305 | fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n", | |
36306 | (TARGET_64BIT ? "ldu" : "lwzu"), | |
36307 | lazy_ptr_name, local_label_0); | |
36308 | fprintf (file, "\tmtctr r12\n"); | |
36309 | fprintf (file, "\tbctr\n"); | |
36310 | } | |
36311 | else | |
36312 | { | |
36313 | fprintf (file, "\t.align 4\n"); | |
36314 | ||
36315 | fprintf (file, "%s:\n", stub); | |
36316 | fprintf (file, "\t.indirect_symbol %s\n", symbol_name); | |
36317 | ||
36318 | fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name); | |
36319 | fprintf (file, "\t%s r12,lo16(%s)(r11)\n", | |
36320 | (TARGET_64BIT ? "ldu" : "lwzu"), | |
36321 | lazy_ptr_name); | |
36322 | fprintf (file, "\tmtctr r12\n"); | |
36323 | fprintf (file, "\tbctr\n"); | |
36324 | } | |
36325 | ||
36326 | switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]); | |
36327 | fprintf (file, "%s:\n", lazy_ptr_name); | |
36328 | fprintf (file, "\t.indirect_symbol %s\n", symbol_name); | |
36329 | fprintf (file, "%sdyld_stub_binding_helper\n", | |
36330 | (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t")); | |
36331 | } | |
36332 | ||
36333 | /* Legitimize PIC addresses. If the address is already | |
36334 | position-independent, we return ORIG. Newly generated | |
36335 | position-independent addresses go into a reg. This is REG if non | |
36336 | zero, otherwise we allocate register(s) as necessary. */ | |
36337 | ||
36338 | #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000) | |
36339 | ||
36340 | rtx | |
36341 | rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode, | |
36342 | rtx reg) | |
36343 | { | |
36344 | rtx base, offset; | |
36345 | ||
36346 | if (reg == NULL && ! reload_in_progress && ! reload_completed) | |
36347 | reg = gen_reg_rtx (Pmode); | |
36348 | ||
36349 | if (GET_CODE (orig) == CONST) | |
36350 | { | |
36351 | rtx reg_temp; | |
36352 | ||
36353 | if (GET_CODE (XEXP (orig, 0)) == PLUS | |
36354 | && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx) | |
36355 | return orig; | |
36356 | ||
36357 | gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS); | |
36358 | ||
36359 | /* Use a different reg for the intermediate value, as | |
36360 | it will be marked UNCHANGING. */ | |
36361 | reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode); | |
36362 | base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), | |
36363 | Pmode, reg_temp); | |
36364 | offset = | |
36365 | rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1), | |
36366 | Pmode, reg); | |
36367 | ||
36368 | if (GET_CODE (offset) == CONST_INT) | |
36369 | { | |
36370 | if (SMALL_INT (offset)) | |
36371 | return plus_constant (Pmode, base, INTVAL (offset)); | |
36372 | else if (! reload_in_progress && ! reload_completed) | |
36373 | offset = force_reg (Pmode, offset); | |
36374 | else | |
36375 | { | |
36376 | rtx mem = force_const_mem (Pmode, orig); | |
36377 | return machopic_legitimize_pic_address (mem, Pmode, reg); | |
36378 | } | |
36379 | } | |
36380 | return gen_rtx_PLUS (Pmode, base, offset); | |
36381 | } | |
36382 | ||
36383 | /* Fall back on generic machopic code. */ | |
36384 | return machopic_legitimize_pic_address (orig, mode, reg); | |
36385 | } | |
36386 | ||
36387 | /* Output a .machine directive for the Darwin assembler, and call | |
36388 | the generic start_file routine. */ | |
36389 | ||
36390 | static void | |
36391 | rs6000_darwin_file_start (void) | |
36392 | { | |
36393 | static const struct | |
36394 | { | |
36395 | const char *arg; | |
36396 | const char *name; | |
36397 | HOST_WIDE_INT if_set; | |
36398 | } mapping[] = { | |
36399 | { "ppc64", "ppc64", MASK_64BIT }, | |
36400 | { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 }, | |
36401 | { "power4", "ppc970", 0 }, | |
36402 | { "G5", "ppc970", 0 }, | |
36403 | { "7450", "ppc7450", 0 }, | |
36404 | { "7400", "ppc7400", MASK_ALTIVEC }, | |
36405 | { "G4", "ppc7400", 0 }, | |
36406 | { "750", "ppc750", 0 }, | |
36407 | { "740", "ppc750", 0 }, | |
36408 | { "G3", "ppc750", 0 }, | |
36409 | { "604e", "ppc604e", 0 }, | |
36410 | { "604", "ppc604", 0 }, | |
36411 | { "603e", "ppc603", 0 }, | |
36412 | { "603", "ppc603", 0 }, | |
36413 | { "601", "ppc601", 0 }, | |
36414 | { NULL, "ppc", 0 } }; | |
36415 | const char *cpu_id = ""; | |
36416 | size_t i; | |
36417 | ||
36418 | rs6000_file_start (); | |
36419 | darwin_file_start (); | |
36420 | ||
36421 | /* Determine the argument to -mcpu=. Default to G3 if not specified. */ | |
36422 | ||
36423 | if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0') | |
36424 | cpu_id = rs6000_default_cpu; | |
36425 | ||
36426 | if (global_options_set.x_rs6000_cpu_index) | |
36427 | cpu_id = processor_target_table[rs6000_cpu_index].name; | |
36428 | ||
36429 | /* Look through the mapping array. Pick the first name that either | |
36430 | matches the argument, has a bit set in IF_SET that is also set | |
36431 | in the target flags, or has a NULL name. */ | |
36432 | ||
36433 | i = 0; | |
36434 | while (mapping[i].arg != NULL | |
36435 | && strcmp (mapping[i].arg, cpu_id) != 0 | |
36436 | && (mapping[i].if_set & rs6000_isa_flags) == 0) | |
36437 | i++; | |
36438 | ||
36439 | fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name); | |
36440 | } | |
36441 | ||
36442 | #endif /* TARGET_MACHO */ | |
36443 | ||
36444 | #if TARGET_ELF | |
36445 | static int | |
36446 | rs6000_elf_reloc_rw_mask (void) | |
36447 | { | |
36448 | if (flag_pic) | |
36449 | return 3; | |
36450 | else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) | |
36451 | return 2; | |
36452 | else | |
36453 | return 0; | |
36454 | } | |
36455 | ||
36456 | /* Record an element in the table of global constructors. SYMBOL is | |
36457 | a SYMBOL_REF of the function to be called; PRIORITY is a number | |
36458 | between 0 and MAX_INIT_PRIORITY. | |
36459 | ||
36460 | This differs from default_named_section_asm_out_constructor in | |
36461 | that we have special handling for -mrelocatable. */ | |
36462 | ||
36463 | static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED; | |
36464 | static void | |
36465 | rs6000_elf_asm_out_constructor (rtx symbol, int priority) | |
36466 | { | |
36467 | const char *section = ".ctors"; | |
36468 | char buf[18]; | |
36469 | ||
36470 | if (priority != DEFAULT_INIT_PRIORITY) | |
36471 | { | |
36472 | sprintf (buf, ".ctors.%.5u", | |
36473 | /* Invert the numbering so the linker puts us in the proper | |
36474 | order; constructors are run from right to left, and the | |
36475 | linker sorts in increasing order. */ | |
36476 | MAX_INIT_PRIORITY - priority); | |
36477 | section = buf; | |
36478 | } | |
36479 | ||
36480 | switch_to_section (get_section (section, SECTION_WRITE, NULL)); | |
36481 | assemble_align (POINTER_SIZE); | |
36482 | ||
36483 | if (DEFAULT_ABI == ABI_V4 | |
36484 | && (TARGET_RELOCATABLE || flag_pic > 1)) | |
36485 | { | |
36486 | fputs ("\t.long (", asm_out_file); | |
36487 | output_addr_const (asm_out_file, symbol); | |
36488 | fputs (")@fixup\n", asm_out_file); | |
36489 | } | |
36490 | else | |
36491 | assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1); | |
36492 | } | |
36493 | ||
36494 | static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED; | |
36495 | static void | |
36496 | rs6000_elf_asm_out_destructor (rtx symbol, int priority) | |
36497 | { | |
36498 | const char *section = ".dtors"; | |
36499 | char buf[18]; | |
36500 | ||
36501 | if (priority != DEFAULT_INIT_PRIORITY) | |
36502 | { | |
36503 | sprintf (buf, ".dtors.%.5u", | |
36504 | /* Invert the numbering so the linker puts us in the proper | |
36505 | order; constructors are run from right to left, and the | |
36506 | linker sorts in increasing order. */ | |
36507 | MAX_INIT_PRIORITY - priority); | |
36508 | section = buf; | |
36509 | } | |
36510 | ||
36511 | switch_to_section (get_section (section, SECTION_WRITE, NULL)); | |
36512 | assemble_align (POINTER_SIZE); | |
36513 | ||
36514 | if (DEFAULT_ABI == ABI_V4 | |
36515 | && (TARGET_RELOCATABLE || flag_pic > 1)) | |
36516 | { | |
36517 | fputs ("\t.long (", asm_out_file); | |
36518 | output_addr_const (asm_out_file, symbol); | |
36519 | fputs (")@fixup\n", asm_out_file); | |
36520 | } | |
36521 | else | |
36522 | assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1); | |
36523 | } | |
36524 | ||
36525 | void | |
36526 | rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl) | |
36527 | { | |
36528 | if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2) | |
36529 | { | |
36530 | fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file); | |
36531 | ASM_OUTPUT_LABEL (file, name); | |
36532 | fputs (DOUBLE_INT_ASM_OP, file); | |
36533 | rs6000_output_function_entry (file, name); | |
36534 | fputs (",.TOC.@tocbase,0\n\t.previous\n", file); | |
36535 | if (DOT_SYMBOLS) | |
36536 | { | |
36537 | fputs ("\t.size\t", file); | |
36538 | assemble_name (file, name); | |
36539 | fputs (",24\n\t.type\t.", file); | |
36540 | assemble_name (file, name); | |
36541 | fputs (",@function\n", file); | |
36542 | if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl)) | |
36543 | { | |
36544 | fputs ("\t.globl\t.", file); | |
36545 | assemble_name (file, name); | |
36546 | putc ('\n', file); | |
36547 | } | |
36548 | } | |
36549 | else | |
36550 | ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function"); | |
36551 | ASM_DECLARE_RESULT (file, DECL_RESULT (decl)); | |
36552 | rs6000_output_function_entry (file, name); | |
36553 | fputs (":\n", file); | |
36554 | return; | |
36555 | } | |
36556 | ||
36557 | if (DEFAULT_ABI == ABI_V4 | |
36558 | && (TARGET_RELOCATABLE || flag_pic > 1) | |
36559 | && !TARGET_SECURE_PLT | |
36560 | && (!constant_pool_empty_p () || crtl->profile) | |
36561 | && uses_TOC ()) | |
36562 | { | |
36563 | char buf[256]; | |
36564 | ||
36565 | (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno); | |
36566 | ||
36567 | fprintf (file, "\t.long "); | |
36568 | assemble_name (file, toc_label_name); | |
36569 | need_toc_init = 1; | |
36570 | putc ('-', file); | |
36571 | ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno); | |
36572 | assemble_name (file, buf); | |
36573 | putc ('\n', file); | |
36574 | } | |
36575 | ||
36576 | ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function"); | |
36577 | ASM_DECLARE_RESULT (file, DECL_RESULT (decl)); | |
36578 | ||
36579 | if (TARGET_CMODEL == CMODEL_LARGE && rs6000_global_entry_point_needed_p ()) | |
36580 | { | |
36581 | char buf[256]; | |
36582 | ||
36583 | (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno); | |
36584 | ||
36585 | fprintf (file, "\t.quad .TOC.-"); | |
36586 | ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno); | |
36587 | assemble_name (file, buf); | |
36588 | putc ('\n', file); | |
36589 | } | |
36590 | ||
36591 | if (DEFAULT_ABI == ABI_AIX) | |
36592 | { | |
36593 | const char *desc_name, *orig_name; | |
36594 | ||
36595 | orig_name = (*targetm.strip_name_encoding) (name); | |
36596 | desc_name = orig_name; | |
36597 | while (*desc_name == '.') | |
36598 | desc_name++; | |
36599 | ||
36600 | if (TREE_PUBLIC (decl)) | |
36601 | fprintf (file, "\t.globl %s\n", desc_name); | |
36602 | ||
36603 | fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP); | |
36604 | fprintf (file, "%s:\n", desc_name); | |
36605 | fprintf (file, "\t.long %s\n", orig_name); | |
36606 | fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file); | |
36607 | fputs ("\t.long 0\n", file); | |
36608 | fprintf (file, "\t.previous\n"); | |
36609 | } | |
36610 | ASM_OUTPUT_LABEL (file, name); | |
36611 | } | |
36612 | ||
36613 | static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED; | |
36614 | static void | |
36615 | rs6000_elf_file_end (void) | |
36616 | { | |
36617 | #ifdef HAVE_AS_GNU_ATTRIBUTE | |
36618 | /* ??? The value emitted depends on options active at file end. | |
36619 | Assume anyone using #pragma or attributes that might change | |
36620 | options knows what they are doing. */ | |
36621 | if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4) | |
36622 | && rs6000_passes_float) | |
36623 | { | |
36624 | int fp; | |
36625 | ||
36626 | if (TARGET_DF_FPR | TARGET_DF_SPE) | |
36627 | fp = 1; | |
36628 | else if (TARGET_SF_FPR | TARGET_SF_SPE) | |
36629 | fp = 3; | |
36630 | else | |
36631 | fp = 2; | |
36632 | if (rs6000_passes_long_double) | |
36633 | { | |
36634 | if (!TARGET_LONG_DOUBLE_128) | |
36635 | fp |= 2 * 4; | |
36636 | else if (TARGET_IEEEQUAD) | |
36637 | fp |= 3 * 4; | |
36638 | else | |
36639 | fp |= 1 * 4; | |
36640 | } | |
36641 | fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp); | |
36642 | } | |
36643 | if (TARGET_32BIT && DEFAULT_ABI == ABI_V4) | |
36644 | { | |
36645 | if (rs6000_passes_vector) | |
36646 | fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n", | |
36647 | (TARGET_ALTIVEC_ABI ? 2 | |
36648 | : TARGET_SPE_ABI ? 3 | |
36649 | : 1)); | |
36650 | if (rs6000_returns_struct) | |
36651 | fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n", | |
36652 | aix_struct_return ? 2 : 1); | |
36653 | } | |
36654 | #endif | |
36655 | #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD) | |
36656 | if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2) | |
36657 | file_end_indicate_exec_stack (); | |
36658 | #endif | |
36659 | ||
36660 | if (flag_split_stack) | |
36661 | file_end_indicate_split_stack (); | |
36662 | ||
36663 | if (cpu_builtin_p) | |
36664 | { | |
36665 | /* We have expanded a CPU builtin, so we need to emit a reference to | |
36666 | the special symbol that LIBC uses to declare it supports the | |
36667 | AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */ | |
36668 | switch_to_section (data_section); | |
36669 | fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3); | |
36670 | fprintf (asm_out_file, "\t%s %s\n", | |
36671 | TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol); | |
36672 | } | |
36673 | } | |
36674 | #endif | |
36675 | ||
36676 | #if TARGET_XCOFF | |
36677 | ||
36678 | #ifndef HAVE_XCOFF_DWARF_EXTRAS | |
36679 | #define HAVE_XCOFF_DWARF_EXTRAS 0 | |
36680 | #endif | |
36681 | ||
36682 | static enum unwind_info_type | |
36683 | rs6000_xcoff_debug_unwind_info (void) | |
36684 | { | |
36685 | return UI_NONE; | |
36686 | } | |
36687 | ||
36688 | static void | |
36689 | rs6000_xcoff_asm_output_anchor (rtx symbol) | |
36690 | { | |
36691 | char buffer[100]; | |
36692 | ||
36693 | sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC, | |
36694 | SYMBOL_REF_BLOCK_OFFSET (symbol)); | |
36695 | fprintf (asm_out_file, "%s", SET_ASM_OP); | |
36696 | RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0)); | |
36697 | fprintf (asm_out_file, ","); | |
36698 | RS6000_OUTPUT_BASENAME (asm_out_file, buffer); | |
36699 | fprintf (asm_out_file, "\n"); | |
36700 | } | |
36701 | ||
36702 | static void | |
36703 | rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name) | |
36704 | { | |
36705 | fputs (GLOBAL_ASM_OP, stream); | |
36706 | RS6000_OUTPUT_BASENAME (stream, name); | |
36707 | putc ('\n', stream); | |
36708 | } | |
36709 | ||
36710 | /* A get_unnamed_decl callback, used for read-only sections. PTR | |
36711 | points to the section string variable. */ | |
36712 | ||
36713 | static void | |
36714 | rs6000_xcoff_output_readonly_section_asm_op (const void *directive) | |
36715 | { | |
36716 | fprintf (asm_out_file, "\t.csect %s[RO],%s\n", | |
36717 | *(const char *const *) directive, | |
36718 | XCOFF_CSECT_DEFAULT_ALIGNMENT_STR); | |
36719 | } | |
36720 | ||
36721 | /* Likewise for read-write sections. */ | |
36722 | ||
36723 | static void | |
36724 | rs6000_xcoff_output_readwrite_section_asm_op (const void *directive) | |
36725 | { | |
36726 | fprintf (asm_out_file, "\t.csect %s[RW],%s\n", | |
36727 | *(const char *const *) directive, | |
36728 | XCOFF_CSECT_DEFAULT_ALIGNMENT_STR); | |
36729 | } | |
36730 | ||
36731 | static void | |
36732 | rs6000_xcoff_output_tls_section_asm_op (const void *directive) | |
36733 | { | |
36734 | fprintf (asm_out_file, "\t.csect %s[TL],%s\n", | |
36735 | *(const char *const *) directive, | |
36736 | XCOFF_CSECT_DEFAULT_ALIGNMENT_STR); | |
36737 | } | |
36738 | ||
36739 | /* A get_unnamed_section callback, used for switching to toc_section. */ | |
36740 | ||
36741 | static void | |
36742 | rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED) | |
36743 | { | |
36744 | if (TARGET_MINIMAL_TOC) | |
36745 | { | |
36746 | /* toc_section is always selected at least once from | |
36747 | rs6000_xcoff_file_start, so this is guaranteed to | |
36748 | always be defined once and only once in each file. */ | |
36749 | if (!toc_initialized) | |
36750 | { | |
36751 | fputs ("\t.toc\nLCTOC..1:\n", asm_out_file); | |
36752 | fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file); | |
36753 | toc_initialized = 1; | |
36754 | } | |
36755 | fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n", | |
36756 | (TARGET_32BIT ? "" : ",3")); | |
36757 | } | |
36758 | else | |
36759 | fputs ("\t.toc\n", asm_out_file); | |
36760 | } | |
36761 | ||
36762 | /* Implement TARGET_ASM_INIT_SECTIONS. */ | |
36763 | ||
36764 | static void | |
36765 | rs6000_xcoff_asm_init_sections (void) | |
36766 | { | |
36767 | read_only_data_section | |
36768 | = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op, | |
36769 | &xcoff_read_only_section_name); | |
36770 | ||
36771 | private_data_section | |
36772 | = get_unnamed_section (SECTION_WRITE, | |
36773 | rs6000_xcoff_output_readwrite_section_asm_op, | |
36774 | &xcoff_private_data_section_name); | |
36775 | ||
36776 | tls_data_section | |
36777 | = get_unnamed_section (SECTION_TLS, | |
36778 | rs6000_xcoff_output_tls_section_asm_op, | |
36779 | &xcoff_tls_data_section_name); | |
36780 | ||
36781 | tls_private_data_section | |
36782 | = get_unnamed_section (SECTION_TLS, | |
36783 | rs6000_xcoff_output_tls_section_asm_op, | |
36784 | &xcoff_private_data_section_name); | |
36785 | ||
36786 | read_only_private_data_section | |
36787 | = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op, | |
36788 | &xcoff_private_data_section_name); | |
36789 | ||
36790 | toc_section | |
36791 | = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL); | |
36792 | ||
36793 | readonly_data_section = read_only_data_section; | |
36794 | } | |
36795 | ||
36796 | static int | |
36797 | rs6000_xcoff_reloc_rw_mask (void) | |
36798 | { | |
36799 | return 3; | |
36800 | } | |
36801 | ||
36802 | static void | |
36803 | rs6000_xcoff_asm_named_section (const char *name, unsigned int flags, | |
36804 | tree decl ATTRIBUTE_UNUSED) | |
36805 | { | |
36806 | int smclass; | |
36807 | static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" }; | |
36808 | ||
36809 | if (flags & SECTION_EXCLUDE) | |
36810 | smclass = 4; | |
36811 | else if (flags & SECTION_DEBUG) | |
36812 | { | |
36813 | fprintf (asm_out_file, "\t.dwsect %s\n", name); | |
36814 | return; | |
36815 | } | |
36816 | else if (flags & SECTION_CODE) | |
36817 | smclass = 0; | |
36818 | else if (flags & SECTION_TLS) | |
36819 | smclass = 3; | |
36820 | else if (flags & SECTION_WRITE) | |
36821 | smclass = 2; | |
36822 | else | |
36823 | smclass = 1; | |
36824 | ||
36825 | fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n", | |
36826 | (flags & SECTION_CODE) ? "." : "", | |
36827 | name, suffix[smclass], flags & SECTION_ENTSIZE); | |
36828 | } | |
36829 | ||
36830 | #define IN_NAMED_SECTION(DECL) \ | |
36831 | ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \ | |
36832 | && DECL_SECTION_NAME (DECL) != NULL) | |
36833 | ||
36834 | static section * | |
36835 | rs6000_xcoff_select_section (tree decl, int reloc, | |
36836 | unsigned HOST_WIDE_INT align) | |
36837 | { | |
36838 | /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into | |
36839 | named section. */ | |
36840 | if (align > BIGGEST_ALIGNMENT) | |
36841 | { | |
36842 | resolve_unique_section (decl, reloc, true); | |
36843 | if (IN_NAMED_SECTION (decl)) | |
36844 | return get_named_section (decl, NULL, reloc); | |
36845 | } | |
36846 | ||
36847 | if (decl_readonly_section (decl, reloc)) | |
36848 | { | |
36849 | if (TREE_PUBLIC (decl)) | |
36850 | return read_only_data_section; | |
36851 | else | |
36852 | return read_only_private_data_section; | |
36853 | } | |
36854 | else | |
36855 | { | |
36856 | #if HAVE_AS_TLS | |
36857 | if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl)) | |
36858 | { | |
36859 | if (TREE_PUBLIC (decl)) | |
36860 | return tls_data_section; | |
36861 | else if (bss_initializer_p (decl)) | |
36862 | { | |
36863 | /* Convert to COMMON to emit in BSS. */ | |
36864 | DECL_COMMON (decl) = 1; | |
36865 | return tls_comm_section; | |
36866 | } | |
36867 | else | |
36868 | return tls_private_data_section; | |
36869 | } | |
36870 | else | |
36871 | #endif | |
36872 | if (TREE_PUBLIC (decl)) | |
36873 | return data_section; | |
36874 | else | |
36875 | return private_data_section; | |
36876 | } | |
36877 | } | |
36878 | ||
36879 | static void | |
36880 | rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED) | |
36881 | { | |
36882 | const char *name; | |
36883 | ||
36884 | /* Use select_section for private data and uninitialized data with | |
36885 | alignment <= BIGGEST_ALIGNMENT. */ | |
36886 | if (!TREE_PUBLIC (decl) | |
36887 | || DECL_COMMON (decl) | |
36888 | || (DECL_INITIAL (decl) == NULL_TREE | |
36889 | && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT) | |
36890 | || DECL_INITIAL (decl) == error_mark_node | |
36891 | || (flag_zero_initialized_in_bss | |
36892 | && initializer_zerop (DECL_INITIAL (decl)))) | |
36893 | return; | |
36894 | ||
36895 | name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); | |
36896 | name = (*targetm.strip_name_encoding) (name); | |
36897 | set_decl_section_name (decl, name); | |
36898 | } | |
36899 | ||
36900 | /* Select section for constant in constant pool. | |
36901 | ||
36902 | On RS/6000, all constants are in the private read-only data area. | |
36903 | However, if this is being placed in the TOC it must be output as a | |
36904 | toc entry. */ | |
36905 | ||
36906 | static section * | |
36907 | rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x, | |
36908 | unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED) | |
36909 | { | |
36910 | if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode)) | |
36911 | return toc_section; | |
36912 | else | |
36913 | return read_only_private_data_section; | |
36914 | } | |
36915 | ||
36916 | /* Remove any trailing [DS] or the like from the symbol name. */ | |
36917 | ||
36918 | static const char * | |
36919 | rs6000_xcoff_strip_name_encoding (const char *name) | |
36920 | { | |
36921 | size_t len; | |
36922 | if (*name == '*') | |
36923 | name++; | |
36924 | len = strlen (name); | |
36925 | if (name[len - 1] == ']') | |
36926 | return ggc_alloc_string (name, len - 4); | |
36927 | else | |
36928 | return name; | |
36929 | } | |
36930 | ||
36931 | /* Section attributes. AIX is always PIC. */ | |
36932 | ||
36933 | static unsigned int | |
36934 | rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc) | |
36935 | { | |
36936 | unsigned int align; | |
36937 | unsigned int flags = default_section_type_flags (decl, name, reloc); | |
36938 | ||
36939 | /* Align to at least UNIT size. */ | |
36940 | if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl)) | |
36941 | align = MIN_UNITS_PER_WORD; | |
36942 | else | |
36943 | /* Increase alignment of large objects if not already stricter. */ | |
36944 | align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT), | |
36945 | int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD | |
36946 | ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD); | |
36947 | ||
36948 | return flags | (exact_log2 (align) & SECTION_ENTSIZE); | |
36949 | } | |
36950 | ||
36951 | /* Output at beginning of assembler file. | |
36952 | ||
36953 | Initialize the section names for the RS/6000 at this point. | |
36954 | ||
36955 | Specify filename, including full path, to assembler. | |
36956 | ||
36957 | We want to go into the TOC section so at least one .toc will be emitted. | |
36958 | Also, in order to output proper .bs/.es pairs, we need at least one static | |
36959 | [RW] section emitted. | |
36960 | ||
36961 | Finally, declare mcount when profiling to make the assembler happy. */ | |
36962 | ||
36963 | static void | |
36964 | rs6000_xcoff_file_start (void) | |
36965 | { | |
36966 | rs6000_gen_section_name (&xcoff_bss_section_name, | |
36967 | main_input_filename, ".bss_"); | |
36968 | rs6000_gen_section_name (&xcoff_private_data_section_name, | |
36969 | main_input_filename, ".rw_"); | |
36970 | rs6000_gen_section_name (&xcoff_read_only_section_name, | |
36971 | main_input_filename, ".ro_"); | |
36972 | rs6000_gen_section_name (&xcoff_tls_data_section_name, | |
36973 | main_input_filename, ".tls_"); | |
36974 | rs6000_gen_section_name (&xcoff_tbss_section_name, | |
36975 | main_input_filename, ".tbss_[UL]"); | |
36976 | ||
36977 | fputs ("\t.file\t", asm_out_file); | |
36978 | output_quoted_string (asm_out_file, main_input_filename); | |
36979 | fputc ('\n', asm_out_file); | |
36980 | if (write_symbols != NO_DEBUG) | |
36981 | switch_to_section (private_data_section); | |
36982 | switch_to_section (toc_section); | |
36983 | switch_to_section (text_section); | |
36984 | if (profile_flag) | |
36985 | fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT); | |
36986 | rs6000_file_start (); | |
36987 | } | |
36988 | ||
36989 | /* Output at end of assembler file. | |
36990 | On the RS/6000, referencing data should automatically pull in text. */ | |
36991 | ||
36992 | static void | |
36993 | rs6000_xcoff_file_end (void) | |
36994 | { | |
36995 | switch_to_section (text_section); | |
36996 | fputs ("_section_.text:\n", asm_out_file); | |
36997 | switch_to_section (data_section); | |
36998 | fputs (TARGET_32BIT | |
36999 | ? "\t.long _section_.text\n" : "\t.llong _section_.text\n", | |
37000 | asm_out_file); | |
37001 | } | |
37002 | ||
37003 | struct declare_alias_data | |
37004 | { | |
37005 | FILE *file; | |
37006 | bool function_descriptor; | |
37007 | }; | |
37008 | ||
37009 | /* Declare alias N. A helper function for for_node_and_aliases. */ | |
37010 | ||
37011 | static bool | |
37012 | rs6000_declare_alias (struct symtab_node *n, void *d) | |
37013 | { | |
37014 | struct declare_alias_data *data = (struct declare_alias_data *)d; | |
37015 | /* Main symbol is output specially, because varasm machinery does part of | |
37016 | the job for us - we do not need to declare .globl/lglobs and such. */ | |
37017 | if (!n->alias || n->weakref) | |
37018 | return false; | |
37019 | ||
37020 | if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl))) | |
37021 | return false; | |
37022 | ||
37023 | /* Prevent assemble_alias from trying to use .set pseudo operation | |
37024 | that does not behave as expected by the middle-end. */ | |
37025 | TREE_ASM_WRITTEN (n->decl) = true; | |
37026 | ||
37027 | const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl)); | |
37028 | char *buffer = (char *) alloca (strlen (name) + 2); | |
37029 | char *p; | |
37030 | int dollar_inside = 0; | |
37031 | ||
37032 | strcpy (buffer, name); | |
37033 | p = strchr (buffer, '$'); | |
37034 | while (p) { | |
37035 | *p = '_'; | |
37036 | dollar_inside++; | |
37037 | p = strchr (p + 1, '$'); | |
37038 | } | |
37039 | if (TREE_PUBLIC (n->decl)) | |
37040 | { | |
37041 | if (!RS6000_WEAK || !DECL_WEAK (n->decl)) | |
37042 | { | |
37043 | if (dollar_inside) { | |
37044 | if (data->function_descriptor) | |
37045 | fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name); | |
37046 | fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name); | |
37047 | } | |
37048 | if (data->function_descriptor) | |
37049 | { | |
37050 | fputs ("\t.globl .", data->file); | |
37051 | RS6000_OUTPUT_BASENAME (data->file, buffer); | |
37052 | putc ('\n', data->file); | |
37053 | } | |
37054 | fputs ("\t.globl ", data->file); | |
37055 | RS6000_OUTPUT_BASENAME (data->file, buffer); | |
37056 | putc ('\n', data->file); | |
37057 | } | |
37058 | #ifdef ASM_WEAKEN_DECL | |
37059 | else if (DECL_WEAK (n->decl) && !data->function_descriptor) | |
37060 | ASM_WEAKEN_DECL (data->file, n->decl, name, NULL); | |
37061 | #endif | |
37062 | } | |
37063 | else | |
37064 | { | |
37065 | if (dollar_inside) | |
37066 | { | |
37067 | if (data->function_descriptor) | |
37068 | fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name); | |
37069 | fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name); | |
37070 | } | |
37071 | if (data->function_descriptor) | |
37072 | { | |
37073 | fputs ("\t.lglobl .", data->file); | |
37074 | RS6000_OUTPUT_BASENAME (data->file, buffer); | |
37075 | putc ('\n', data->file); | |
37076 | } | |
37077 | fputs ("\t.lglobl ", data->file); | |
37078 | RS6000_OUTPUT_BASENAME (data->file, buffer); | |
37079 | putc ('\n', data->file); | |
37080 | } | |
37081 | if (data->function_descriptor) | |
37082 | fputs (".", data->file); | |
37083 | RS6000_OUTPUT_BASENAME (data->file, buffer); | |
37084 | fputs (":\n", data->file); | |
37085 | return false; | |
37086 | } | |
37087 | ||
37088 | ||
37089 | #ifdef HAVE_GAS_HIDDEN | |
37090 | /* Helper function to calculate visibility of a DECL | |
37091 | and return the value as a const string. */ | |
37092 | ||
37093 | static const char * | |
37094 | rs6000_xcoff_visibility (tree decl) | |
37095 | { | |
37096 | static const char * const visibility_types[] = { | |
37097 | "", ",protected", ",hidden", ",internal" | |
37098 | }; | |
37099 | ||
37100 | enum symbol_visibility vis = DECL_VISIBILITY (decl); | |
37101 | ||
37102 | if (TREE_CODE (decl) == FUNCTION_DECL | |
37103 | && cgraph_node::get (decl) | |
37104 | && cgraph_node::get (decl)->instrumentation_clone | |
37105 | && cgraph_node::get (decl)->instrumented_version) | |
37106 | vis = DECL_VISIBILITY (cgraph_node::get (decl)->instrumented_version->decl); | |
37107 | ||
37108 | return visibility_types[vis]; | |
37109 | } | |
37110 | #endif | |
37111 | ||
37112 | ||
37113 | /* This macro produces the initial definition of a function name. | |
37114 | On the RS/6000, we need to place an extra '.' in the function name and | |
37115 | output the function descriptor. | |
37116 | Dollar signs are converted to underscores. | |
37117 | ||
37118 | The csect for the function will have already been created when | |
37119 | text_section was selected. We do have to go back to that csect, however. | |
37120 | ||
37121 | The third and fourth parameters to the .function pseudo-op (16 and 044) | |
37122 | are placeholders which no longer have any use. | |
37123 | ||
37124 | Because AIX assembler's .set command has unexpected semantics, we output | |
37125 | all aliases as alternative labels in front of the definition. */ | |
37126 | ||
37127 | void | |
37128 | rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl) | |
37129 | { | |
37130 | char *buffer = (char *) alloca (strlen (name) + 1); | |
37131 | char *p; | |
37132 | int dollar_inside = 0; | |
37133 | struct declare_alias_data data = {file, false}; | |
37134 | ||
37135 | strcpy (buffer, name); | |
37136 | p = strchr (buffer, '$'); | |
37137 | while (p) { | |
37138 | *p = '_'; | |
37139 | dollar_inside++; | |
37140 | p = strchr (p + 1, '$'); | |
37141 | } | |
37142 | if (TREE_PUBLIC (decl)) | |
37143 | { | |
37144 | if (!RS6000_WEAK || !DECL_WEAK (decl)) | |
37145 | { | |
37146 | if (dollar_inside) { | |
37147 | fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name); | |
37148 | fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name); | |
37149 | } | |
37150 | fputs ("\t.globl .", file); | |
37151 | RS6000_OUTPUT_BASENAME (file, buffer); | |
37152 | #ifdef HAVE_GAS_HIDDEN | |
37153 | fputs (rs6000_xcoff_visibility (decl), file); | |
37154 | #endif | |
37155 | putc ('\n', file); | |
37156 | } | |
37157 | } | |
37158 | else | |
37159 | { | |
37160 | if (dollar_inside) { | |
37161 | fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name); | |
37162 | fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name); | |
37163 | } | |
37164 | fputs ("\t.lglobl .", file); | |
37165 | RS6000_OUTPUT_BASENAME (file, buffer); | |
37166 | putc ('\n', file); | |
37167 | } | |
37168 | fputs ("\t.csect ", file); | |
37169 | RS6000_OUTPUT_BASENAME (file, buffer); | |
37170 | fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file); | |
37171 | RS6000_OUTPUT_BASENAME (file, buffer); | |
37172 | fputs (":\n", file); | |
37173 | symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, | |
37174 | &data, true); | |
37175 | fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file); | |
37176 | RS6000_OUTPUT_BASENAME (file, buffer); | |
37177 | fputs (", TOC[tc0], 0\n", file); | |
37178 | in_section = NULL; | |
37179 | switch_to_section (function_section (decl)); | |
37180 | putc ('.', file); | |
37181 | RS6000_OUTPUT_BASENAME (file, buffer); | |
37182 | fputs (":\n", file); | |
37183 | data.function_descriptor = true; | |
37184 | symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, | |
37185 | &data, true); | |
37186 | if (!DECL_IGNORED_P (decl)) | |
37187 | { | |
37188 | if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG) | |
37189 | xcoffout_declare_function (file, decl, buffer); | |
37190 | else if (write_symbols == DWARF2_DEBUG) | |
37191 | { | |
37192 | name = (*targetm.strip_name_encoding) (name); | |
37193 | fprintf (file, "\t.function .%s,.%s,2,0\n", name, name); | |
37194 | } | |
37195 | } | |
37196 | return; | |
37197 | } | |
37198 | ||
37199 | ||
37200 | /* Output assembly language to globalize a symbol from a DECL, | |
37201 | possibly with visibility. */ | |
37202 | ||
37203 | void | |
37204 | rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl) | |
37205 | { | |
37206 | const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0); | |
37207 | fputs (GLOBAL_ASM_OP, stream); | |
37208 | RS6000_OUTPUT_BASENAME (stream, name); | |
37209 | #ifdef HAVE_GAS_HIDDEN | |
37210 | fputs (rs6000_xcoff_visibility (decl), stream); | |
37211 | #endif | |
37212 | putc ('\n', stream); | |
37213 | } | |
37214 | ||
37215 | /* Output assembly language to define a symbol as COMMON from a DECL, | |
37216 | possibly with visibility. */ | |
37217 | ||
37218 | void | |
37219 | rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream, | |
37220 | tree decl ATTRIBUTE_UNUSED, | |
37221 | const char *name, | |
37222 | unsigned HOST_WIDE_INT size, | |
37223 | unsigned HOST_WIDE_INT align) | |
37224 | { | |
37225 | unsigned HOST_WIDE_INT align2 = 2; | |
37226 | ||
37227 | if (align > 32) | |
37228 | align2 = floor_log2 (align / BITS_PER_UNIT); | |
37229 | else if (size > 4) | |
37230 | align2 = 3; | |
37231 | ||
37232 | fputs (COMMON_ASM_OP, stream); | |
37233 | RS6000_OUTPUT_BASENAME (stream, name); | |
37234 | ||
37235 | fprintf (stream, | |
37236 | "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED, | |
37237 | size, align2); | |
37238 | ||
37239 | #ifdef HAVE_GAS_HIDDEN | |
37240 | fputs (rs6000_xcoff_visibility (decl), stream); | |
37241 | #endif | |
37242 | putc ('\n', stream); | |
37243 | } | |
37244 | ||
37245 | /* This macro produces the initial definition of a object (variable) name. | |
37246 | Because AIX assembler's .set command has unexpected semantics, we output | |
37247 | all aliases as alternative labels in front of the definition. */ | |
37248 | ||
37249 | void | |
37250 | rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl) | |
37251 | { | |
37252 | struct declare_alias_data data = {file, false}; | |
37253 | RS6000_OUTPUT_BASENAME (file, name); | |
37254 | fputs (":\n", file); | |
37255 | symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, | |
37256 | &data, true); | |
37257 | } | |
37258 | ||
37259 | /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */ | |
37260 | ||
37261 | void | |
37262 | rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label) | |
37263 | { | |
37264 | fputs (integer_asm_op (size, FALSE), file); | |
37265 | assemble_name (file, label); | |
37266 | fputs ("-$", file); | |
37267 | } | |
37268 | ||
37269 | /* Output a symbol offset relative to the dbase for the current object. | |
37270 | We use __gcc_unwind_dbase as an arbitrary base for dbase and assume | |
37271 | signed offsets. | |
37272 | ||
37273 | __gcc_unwind_dbase is embedded in all executables/libraries through | |
37274 | libgcc/config/rs6000/crtdbase.S. */ | |
37275 | ||
37276 | void | |
37277 | rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label) | |
37278 | { | |
37279 | fputs (integer_asm_op (size, FALSE), file); | |
37280 | assemble_name (file, label); | |
37281 | fputs("-__gcc_unwind_dbase", file); | |
37282 | } | |
37283 | ||
37284 | #ifdef HAVE_AS_TLS | |
37285 | static void | |
37286 | rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first) | |
37287 | { | |
37288 | rtx symbol; | |
37289 | int flags; | |
37290 | const char *symname; | |
37291 | ||
37292 | default_encode_section_info (decl, rtl, first); | |
37293 | ||
37294 | /* Careful not to prod global register variables. */ | |
37295 | if (!MEM_P (rtl)) | |
37296 | return; | |
37297 | symbol = XEXP (rtl, 0); | |
37298 | if (GET_CODE (symbol) != SYMBOL_REF) | |
37299 | return; | |
37300 | ||
37301 | flags = SYMBOL_REF_FLAGS (symbol); | |
37302 | ||
37303 | if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl)) | |
37304 | flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO; | |
37305 | ||
37306 | SYMBOL_REF_FLAGS (symbol) = flags; | |
37307 | ||
37308 | /* Append mapping class to extern decls. */ | |
37309 | symname = XSTR (symbol, 0); | |
37310 | if (decl /* sync condition with assemble_external () */ | |
37311 | && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl) | |
37312 | && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl)) | |
37313 | || TREE_CODE (decl) == FUNCTION_DECL) | |
37314 | && symname[strlen (symname) - 1] != ']') | |
37315 | { | |
37316 | char *newname = (char *) alloca (strlen (symname) + 5); | |
37317 | strcpy (newname, symname); | |
37318 | strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL | |
37319 | ? "[DS]" : "[UA]")); | |
37320 | XSTR (symbol, 0) = ggc_strdup (newname); | |
37321 | } | |
37322 | } | |
37323 | #endif /* HAVE_AS_TLS */ | |
37324 | #endif /* TARGET_XCOFF */ | |
37325 | ||
37326 | void | |
37327 | rs6000_asm_weaken_decl (FILE *stream, tree decl, | |
37328 | const char *name, const char *val) | |
37329 | { | |
37330 | fputs ("\t.weak\t", stream); | |
37331 | RS6000_OUTPUT_BASENAME (stream, name); | |
37332 | if (decl && TREE_CODE (decl) == FUNCTION_DECL | |
37333 | && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS) | |
37334 | { | |
37335 | if (TARGET_XCOFF) | |
37336 | fputs ("[DS]", stream); | |
37337 | #if TARGET_XCOFF && HAVE_GAS_HIDDEN | |
37338 | if (TARGET_XCOFF) | |
37339 | fputs (rs6000_xcoff_visibility (decl), stream); | |
37340 | #endif | |
37341 | fputs ("\n\t.weak\t.", stream); | |
37342 | RS6000_OUTPUT_BASENAME (stream, name); | |
37343 | } | |
37344 | #if TARGET_XCOFF && HAVE_GAS_HIDDEN | |
37345 | if (TARGET_XCOFF) | |
37346 | fputs (rs6000_xcoff_visibility (decl), stream); | |
37347 | #endif | |
37348 | fputc ('\n', stream); | |
37349 | if (val) | |
37350 | { | |
37351 | #ifdef ASM_OUTPUT_DEF | |
37352 | ASM_OUTPUT_DEF (stream, name, val); | |
37353 | #endif | |
37354 | if (decl && TREE_CODE (decl) == FUNCTION_DECL | |
37355 | && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS) | |
37356 | { | |
37357 | fputs ("\t.set\t.", stream); | |
37358 | RS6000_OUTPUT_BASENAME (stream, name); | |
37359 | fputs (",.", stream); | |
37360 | RS6000_OUTPUT_BASENAME (stream, val); | |
37361 | fputc ('\n', stream); | |
37362 | } | |
37363 | } | |
37364 | } | |
37365 | ||
37366 | ||
37367 | /* Return true if INSN should not be copied. */ | |
37368 | ||
37369 | static bool | |
37370 | rs6000_cannot_copy_insn_p (rtx_insn *insn) | |
37371 | { | |
37372 | return recog_memoized (insn) >= 0 | |
37373 | && get_attr_cannot_copy (insn); | |
37374 | } | |
37375 | ||
37376 | /* Compute a (partial) cost for rtx X. Return true if the complete | |
37377 | cost has been computed, and false if subexpressions should be | |
37378 | scanned. In either case, *TOTAL contains the cost result. */ | |
37379 | ||
37380 | static bool | |
37381 | rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code, | |
37382 | int opno ATTRIBUTE_UNUSED, int *total, bool speed) | |
37383 | { | |
37384 | int code = GET_CODE (x); | |
37385 | ||
37386 | switch (code) | |
37387 | { | |
37388 | /* On the RS/6000, if it is valid in the insn, it is free. */ | |
37389 | case CONST_INT: | |
37390 | if (((outer_code == SET | |
37391 | || outer_code == PLUS | |
37392 | || outer_code == MINUS) | |
37393 | && (satisfies_constraint_I (x) | |
37394 | || satisfies_constraint_L (x))) | |
37395 | || (outer_code == AND | |
37396 | && (satisfies_constraint_K (x) | |
37397 | || (mode == SImode | |
37398 | ? satisfies_constraint_L (x) | |
37399 | : satisfies_constraint_J (x)))) | |
37400 | || ((outer_code == IOR || outer_code == XOR) | |
37401 | && (satisfies_constraint_K (x) | |
37402 | || (mode == SImode | |
37403 | ? satisfies_constraint_L (x) | |
37404 | : satisfies_constraint_J (x)))) | |
37405 | || outer_code == ASHIFT | |
37406 | || outer_code == ASHIFTRT | |
37407 | || outer_code == LSHIFTRT | |
37408 | || outer_code == ROTATE | |
37409 | || outer_code == ROTATERT | |
37410 | || outer_code == ZERO_EXTRACT | |
37411 | || (outer_code == MULT | |
37412 | && satisfies_constraint_I (x)) | |
37413 | || ((outer_code == DIV || outer_code == UDIV | |
37414 | || outer_code == MOD || outer_code == UMOD) | |
37415 | && exact_log2 (INTVAL (x)) >= 0) | |
37416 | || (outer_code == COMPARE | |
37417 | && (satisfies_constraint_I (x) | |
37418 | || satisfies_constraint_K (x))) | |
37419 | || ((outer_code == EQ || outer_code == NE) | |
37420 | && (satisfies_constraint_I (x) | |
37421 | || satisfies_constraint_K (x) | |
37422 | || (mode == SImode | |
37423 | ? satisfies_constraint_L (x) | |
37424 | : satisfies_constraint_J (x)))) | |
37425 | || (outer_code == GTU | |
37426 | && satisfies_constraint_I (x)) | |
37427 | || (outer_code == LTU | |
37428 | && satisfies_constraint_P (x))) | |
37429 | { | |
37430 | *total = 0; | |
37431 | return true; | |
37432 | } | |
37433 | else if ((outer_code == PLUS | |
37434 | && reg_or_add_cint_operand (x, VOIDmode)) | |
37435 | || (outer_code == MINUS | |
37436 | && reg_or_sub_cint_operand (x, VOIDmode)) | |
37437 | || ((outer_code == SET | |
37438 | || outer_code == IOR | |
37439 | || outer_code == XOR) | |
37440 | && (INTVAL (x) | |
37441 | & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0)) | |
37442 | { | |
37443 | *total = COSTS_N_INSNS (1); | |
37444 | return true; | |
37445 | } | |
37446 | /* FALLTHRU */ | |
37447 | ||
37448 | case CONST_DOUBLE: | |
37449 | case CONST_WIDE_INT: | |
37450 | case CONST: | |
37451 | case HIGH: | |
37452 | case SYMBOL_REF: | |
37453 | *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2); | |
37454 | return true; | |
37455 | ||
37456 | case MEM: | |
37457 | /* When optimizing for size, MEM should be slightly more expensive | |
37458 | than generating address, e.g., (plus (reg) (const)). | |
37459 | L1 cache latency is about two instructions. */ | |
37460 | *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2); | |
dfdced85 | 37461 | if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x))) |
01e91138 | 37462 | *total += COSTS_N_INSNS (100); |
37463 | return true; | |
37464 | ||
37465 | case LABEL_REF: | |
37466 | *total = 0; | |
37467 | return true; | |
37468 | ||
37469 | case PLUS: | |
37470 | case MINUS: | |
37471 | if (FLOAT_MODE_P (mode)) | |
37472 | *total = rs6000_cost->fp; | |
37473 | else | |
37474 | *total = COSTS_N_INSNS (1); | |
37475 | return false; | |
37476 | ||
37477 | case MULT: | |
37478 | if (GET_CODE (XEXP (x, 1)) == CONST_INT | |
37479 | && satisfies_constraint_I (XEXP (x, 1))) | |
37480 | { | |
37481 | if (INTVAL (XEXP (x, 1)) >= -256 | |
37482 | && INTVAL (XEXP (x, 1)) <= 255) | |
37483 | *total = rs6000_cost->mulsi_const9; | |
37484 | else | |
37485 | *total = rs6000_cost->mulsi_const; | |
37486 | } | |
37487 | else if (mode == SFmode) | |
37488 | *total = rs6000_cost->fp; | |
37489 | else if (FLOAT_MODE_P (mode)) | |
37490 | *total = rs6000_cost->dmul; | |
37491 | else if (mode == DImode) | |
37492 | *total = rs6000_cost->muldi; | |
37493 | else | |
37494 | *total = rs6000_cost->mulsi; | |
37495 | return false; | |
37496 | ||
37497 | case FMA: | |
37498 | if (mode == SFmode) | |
37499 | *total = rs6000_cost->fp; | |
37500 | else | |
37501 | *total = rs6000_cost->dmul; | |
37502 | break; | |
37503 | ||
37504 | case DIV: | |
37505 | case MOD: | |
37506 | if (FLOAT_MODE_P (mode)) | |
37507 | { | |
37508 | *total = mode == DFmode ? rs6000_cost->ddiv | |
37509 | : rs6000_cost->sdiv; | |
37510 | return false; | |
37511 | } | |
37512 | /* FALLTHRU */ | |
37513 | ||
37514 | case UDIV: | |
37515 | case UMOD: | |
37516 | if (GET_CODE (XEXP (x, 1)) == CONST_INT | |
37517 | && exact_log2 (INTVAL (XEXP (x, 1))) >= 0) | |
37518 | { | |
37519 | if (code == DIV || code == MOD) | |
37520 | /* Shift, addze */ | |
37521 | *total = COSTS_N_INSNS (2); | |
37522 | else | |
37523 | /* Shift */ | |
37524 | *total = COSTS_N_INSNS (1); | |
37525 | } | |
37526 | else | |
37527 | { | |
37528 | if (GET_MODE (XEXP (x, 1)) == DImode) | |
37529 | *total = rs6000_cost->divdi; | |
37530 | else | |
37531 | *total = rs6000_cost->divsi; | |
37532 | } | |
37533 | /* Add in shift and subtract for MOD unless we have a mod instruction. */ | |
37534 | if (!TARGET_MODULO && (code == MOD || code == UMOD)) | |
37535 | *total += COSTS_N_INSNS (2); | |
37536 | return false; | |
37537 | ||
37538 | case CTZ: | |
37539 | *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4); | |
37540 | return false; | |
37541 | ||
37542 | case FFS: | |
37543 | *total = COSTS_N_INSNS (4); | |
37544 | return false; | |
37545 | ||
37546 | case POPCOUNT: | |
37547 | *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6); | |
37548 | return false; | |
37549 | ||
37550 | case PARITY: | |
37551 | *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6); | |
37552 | return false; | |
37553 | ||
37554 | case NOT: | |
37555 | if (outer_code == AND || outer_code == IOR || outer_code == XOR) | |
37556 | *total = 0; | |
37557 | else | |
37558 | *total = COSTS_N_INSNS (1); | |
37559 | return false; | |
37560 | ||
37561 | case AND: | |
37562 | if (CONST_INT_P (XEXP (x, 1))) | |
37563 | { | |
37564 | rtx left = XEXP (x, 0); | |
37565 | rtx_code left_code = GET_CODE (left); | |
37566 | ||
37567 | /* rotate-and-mask: 1 insn. */ | |
37568 | if ((left_code == ROTATE | |
37569 | || left_code == ASHIFT | |
37570 | || left_code == LSHIFTRT) | |
37571 | && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode)) | |
37572 | { | |
37573 | *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed); | |
37574 | if (!CONST_INT_P (XEXP (left, 1))) | |
37575 | *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed); | |
37576 | *total += COSTS_N_INSNS (1); | |
37577 | return true; | |
37578 | } | |
37579 | ||
37580 | /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */ | |
37581 | HOST_WIDE_INT val = INTVAL (XEXP (x, 1)); | |
37582 | if (rs6000_is_valid_and_mask (XEXP (x, 1), mode) | |
37583 | || (val & 0xffff) == val | |
37584 | || (val & 0xffff0000) == val | |
37585 | || ((val & 0xffff) == 0 && mode == SImode)) | |
37586 | { | |
37587 | *total = rtx_cost (left, mode, AND, 0, speed); | |
37588 | *total += COSTS_N_INSNS (1); | |
37589 | return true; | |
37590 | } | |
37591 | ||
37592 | /* 2 insns. */ | |
37593 | if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode)) | |
37594 | { | |
37595 | *total = rtx_cost (left, mode, AND, 0, speed); | |
37596 | *total += COSTS_N_INSNS (2); | |
37597 | return true; | |
37598 | } | |
37599 | } | |
37600 | ||
37601 | *total = COSTS_N_INSNS (1); | |
37602 | return false; | |
37603 | ||
37604 | case IOR: | |
37605 | /* FIXME */ | |
37606 | *total = COSTS_N_INSNS (1); | |
37607 | return true; | |
37608 | ||
37609 | case CLZ: | |
37610 | case XOR: | |
37611 | case ZERO_EXTRACT: | |
37612 | *total = COSTS_N_INSNS (1); | |
37613 | return false; | |
37614 | ||
37615 | case ASHIFT: | |
37616 | /* The EXTSWSLI instruction is a combined instruction. Don't count both | |
37617 | the sign extend and shift separately within the insn. */ | |
37618 | if (TARGET_EXTSWSLI && mode == DImode | |
37619 | && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND | |
37620 | && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode) | |
37621 | { | |
37622 | *total = 0; | |
37623 | return false; | |
37624 | } | |
37625 | /* fall through */ | |
37626 | ||
37627 | case ASHIFTRT: | |
37628 | case LSHIFTRT: | |
37629 | case ROTATE: | |
37630 | case ROTATERT: | |
37631 | /* Handle mul_highpart. */ | |
37632 | if (outer_code == TRUNCATE | |
37633 | && GET_CODE (XEXP (x, 0)) == MULT) | |
37634 | { | |
37635 | if (mode == DImode) | |
37636 | *total = rs6000_cost->muldi; | |
37637 | else | |
37638 | *total = rs6000_cost->mulsi; | |
37639 | return true; | |
37640 | } | |
37641 | else if (outer_code == AND) | |
37642 | *total = 0; | |
37643 | else | |
37644 | *total = COSTS_N_INSNS (1); | |
37645 | return false; | |
37646 | ||
37647 | case SIGN_EXTEND: | |
37648 | case ZERO_EXTEND: | |
37649 | if (GET_CODE (XEXP (x, 0)) == MEM) | |
37650 | *total = 0; | |
37651 | else | |
37652 | *total = COSTS_N_INSNS (1); | |
37653 | return false; | |
37654 | ||
37655 | case COMPARE: | |
37656 | case NEG: | |
37657 | case ABS: | |
37658 | if (!FLOAT_MODE_P (mode)) | |
37659 | { | |
37660 | *total = COSTS_N_INSNS (1); | |
37661 | return false; | |
37662 | } | |
37663 | /* FALLTHRU */ | |
37664 | ||
37665 | case FLOAT: | |
37666 | case UNSIGNED_FLOAT: | |
37667 | case FIX: | |
37668 | case UNSIGNED_FIX: | |
37669 | case FLOAT_TRUNCATE: | |
37670 | *total = rs6000_cost->fp; | |
37671 | return false; | |
37672 | ||
37673 | case FLOAT_EXTEND: | |
37674 | if (mode == DFmode) | |
37675 | *total = rs6000_cost->sfdf_convert; | |
37676 | else | |
37677 | *total = rs6000_cost->fp; | |
37678 | return false; | |
37679 | ||
37680 | case UNSPEC: | |
37681 | switch (XINT (x, 1)) | |
37682 | { | |
37683 | case UNSPEC_FRSP: | |
37684 | *total = rs6000_cost->fp; | |
37685 | return true; | |
37686 | ||
37687 | default: | |
37688 | break; | |
37689 | } | |
37690 | break; | |
37691 | ||
37692 | case CALL: | |
37693 | case IF_THEN_ELSE: | |
37694 | if (!speed) | |
37695 | { | |
37696 | *total = COSTS_N_INSNS (1); | |
37697 | return true; | |
37698 | } | |
37699 | else if (FLOAT_MODE_P (mode) | |
37700 | && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS) | |
37701 | { | |
37702 | *total = rs6000_cost->fp; | |
37703 | return false; | |
37704 | } | |
37705 | break; | |
37706 | ||
37707 | case NE: | |
37708 | case EQ: | |
37709 | case GTU: | |
37710 | case LTU: | |
37711 | /* Carry bit requires mode == Pmode. | |
37712 | NEG or PLUS already counted so only add one. */ | |
37713 | if (mode == Pmode | |
37714 | && (outer_code == NEG || outer_code == PLUS)) | |
37715 | { | |
37716 | *total = COSTS_N_INSNS (1); | |
37717 | return true; | |
37718 | } | |
37719 | if (outer_code == SET) | |
37720 | { | |
37721 | if (XEXP (x, 1) == const0_rtx) | |
37722 | { | |
37723 | if (TARGET_ISEL && !TARGET_MFCRF) | |
37724 | *total = COSTS_N_INSNS (8); | |
37725 | else | |
37726 | *total = COSTS_N_INSNS (2); | |
37727 | return true; | |
37728 | } | |
37729 | else | |
37730 | { | |
37731 | *total = COSTS_N_INSNS (3); | |
37732 | return false; | |
37733 | } | |
37734 | } | |
37735 | /* FALLTHRU */ | |
37736 | ||
37737 | case GT: | |
37738 | case LT: | |
37739 | case UNORDERED: | |
37740 | if (outer_code == SET && (XEXP (x, 1) == const0_rtx)) | |
37741 | { | |
37742 | if (TARGET_ISEL && !TARGET_MFCRF) | |
37743 | *total = COSTS_N_INSNS (8); | |
37744 | else | |
37745 | *total = COSTS_N_INSNS (2); | |
37746 | return true; | |
37747 | } | |
37748 | /* CC COMPARE. */ | |
37749 | if (outer_code == COMPARE) | |
37750 | { | |
37751 | *total = 0; | |
37752 | return true; | |
37753 | } | |
37754 | break; | |
37755 | ||
37756 | default: | |
37757 | break; | |
37758 | } | |
37759 | ||
37760 | return false; | |
37761 | } | |
37762 | ||
37763 | /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */ | |
37764 | ||
37765 | static bool | |
37766 | rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code, | |
37767 | int opno, int *total, bool speed) | |
37768 | { | |
37769 | bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed); | |
37770 | ||
37771 | fprintf (stderr, | |
37772 | "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, " | |
37773 | "opno = %d, total = %d, speed = %s, x:\n", | |
37774 | ret ? "complete" : "scan inner", | |
37775 | GET_MODE_NAME (mode), | |
37776 | GET_RTX_NAME (outer_code), | |
37777 | opno, | |
37778 | *total, | |
37779 | speed ? "true" : "false"); | |
37780 | ||
37781 | debug_rtx (x); | |
37782 | ||
37783 | return ret; | |
37784 | } | |
37785 | ||
37786 | /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */ | |
37787 | ||
37788 | static int | |
37789 | rs6000_debug_address_cost (rtx x, machine_mode mode, | |
37790 | addr_space_t as, bool speed) | |
37791 | { | |
37792 | int ret = TARGET_ADDRESS_COST (x, mode, as, speed); | |
37793 | ||
37794 | fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n", | |
37795 | ret, speed ? "true" : "false"); | |
37796 | debug_rtx (x); | |
37797 | ||
37798 | return ret; | |
37799 | } | |
37800 | ||
37801 | ||
37802 | /* A C expression returning the cost of moving data from a register of class | |
37803 | CLASS1 to one of CLASS2. */ | |
37804 | ||
37805 | static int | |
37806 | rs6000_register_move_cost (machine_mode mode, | |
37807 | reg_class_t from, reg_class_t to) | |
37808 | { | |
37809 | int ret; | |
37810 | ||
37811 | if (TARGET_DEBUG_COST) | |
37812 | dbg_cost_ctrl++; | |
37813 | ||
37814 | /* Moves from/to GENERAL_REGS. */ | |
37815 | if (reg_classes_intersect_p (to, GENERAL_REGS) | |
37816 | || reg_classes_intersect_p (from, GENERAL_REGS)) | |
37817 | { | |
37818 | reg_class_t rclass = from; | |
37819 | ||
37820 | if (! reg_classes_intersect_p (to, GENERAL_REGS)) | |
37821 | rclass = to; | |
37822 | ||
37823 | if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS) | |
37824 | ret = (rs6000_memory_move_cost (mode, rclass, false) | |
37825 | + rs6000_memory_move_cost (mode, GENERAL_REGS, false)); | |
37826 | ||
37827 | /* It's more expensive to move CR_REGS than CR0_REGS because of the | |
37828 | shift. */ | |
37829 | else if (rclass == CR_REGS) | |
37830 | ret = 4; | |
37831 | ||
37832 | /* For those processors that have slow LR/CTR moves, make them more | |
37833 | expensive than memory in order to bias spills to memory .*/ | |
37834 | else if ((rs6000_cpu == PROCESSOR_POWER6 | |
37835 | || rs6000_cpu == PROCESSOR_POWER7 | |
37836 | || rs6000_cpu == PROCESSOR_POWER8 | |
37837 | || rs6000_cpu == PROCESSOR_POWER9) | |
37838 | && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS)) | |
92d2aec3 | 37839 | ret = 6 * hard_regno_nregs (0, mode); |
01e91138 | 37840 | |
37841 | else | |
37842 | /* A move will cost one instruction per GPR moved. */ | |
92d2aec3 | 37843 | ret = 2 * hard_regno_nregs (0, mode); |
01e91138 | 37844 | } |
37845 | ||
37846 | /* If we have VSX, we can easily move between FPR or Altivec registers. */ | |
37847 | else if (VECTOR_MEM_VSX_P (mode) | |
37848 | && reg_classes_intersect_p (to, VSX_REGS) | |
37849 | && reg_classes_intersect_p (from, VSX_REGS)) | |
92d2aec3 | 37850 | ret = 2 * hard_regno_nregs (FIRST_FPR_REGNO, mode); |
01e91138 | 37851 | |
37852 | /* Moving between two similar registers is just one instruction. */ | |
37853 | else if (reg_classes_intersect_p (to, from)) | |
37854 | ret = (FLOAT128_2REG_P (mode)) ? 4 : 2; | |
37855 | ||
37856 | /* Everything else has to go through GENERAL_REGS. */ | |
37857 | else | |
37858 | ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to) | |
37859 | + rs6000_register_move_cost (mode, from, GENERAL_REGS)); | |
37860 | ||
37861 | if (TARGET_DEBUG_COST) | |
37862 | { | |
37863 | if (dbg_cost_ctrl == 1) | |
37864 | fprintf (stderr, | |
37865 | "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n", | |
37866 | ret, GET_MODE_NAME (mode), reg_class_names[from], | |
37867 | reg_class_names[to]); | |
37868 | dbg_cost_ctrl--; | |
37869 | } | |
37870 | ||
37871 | return ret; | |
37872 | } | |
37873 | ||
37874 | /* A C expressions returning the cost of moving data of MODE from a register to | |
37875 | or from memory. */ | |
37876 | ||
37877 | static int | |
37878 | rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass, | |
37879 | bool in ATTRIBUTE_UNUSED) | |
37880 | { | |
37881 | int ret; | |
37882 | ||
37883 | if (TARGET_DEBUG_COST) | |
37884 | dbg_cost_ctrl++; | |
37885 | ||
37886 | if (reg_classes_intersect_p (rclass, GENERAL_REGS)) | |
92d2aec3 | 37887 | ret = 4 * hard_regno_nregs (0, mode); |
01e91138 | 37888 | else if ((reg_classes_intersect_p (rclass, FLOAT_REGS) |
37889 | || reg_classes_intersect_p (rclass, VSX_REGS))) | |
92d2aec3 | 37890 | ret = 4 * hard_regno_nregs (32, mode); |
01e91138 | 37891 | else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS)) |
92d2aec3 | 37892 | ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode); |
01e91138 | 37893 | else |
37894 | ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS); | |
37895 | ||
37896 | if (TARGET_DEBUG_COST) | |
37897 | { | |
37898 | if (dbg_cost_ctrl == 1) | |
37899 | fprintf (stderr, | |
37900 | "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n", | |
37901 | ret, GET_MODE_NAME (mode), reg_class_names[rclass], in); | |
37902 | dbg_cost_ctrl--; | |
37903 | } | |
37904 | ||
37905 | return ret; | |
37906 | } | |
37907 | ||
37908 | /* Returns a code for a target-specific builtin that implements | |
37909 | reciprocal of the function, or NULL_TREE if not available. */ | |
37910 | ||
37911 | static tree | |
37912 | rs6000_builtin_reciprocal (tree fndecl) | |
37913 | { | |
37914 | switch (DECL_FUNCTION_CODE (fndecl)) | |
37915 | { | |
37916 | case VSX_BUILTIN_XVSQRTDP: | |
37917 | if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode)) | |
37918 | return NULL_TREE; | |
37919 | ||
37920 | return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF]; | |
37921 | ||
37922 | case VSX_BUILTIN_XVSQRTSP: | |
37923 | if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode)) | |
37924 | return NULL_TREE; | |
37925 | ||
37926 | return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF]; | |
37927 | ||
37928 | default: | |
37929 | return NULL_TREE; | |
37930 | } | |
37931 | } | |
37932 | ||
37933 | /* Load up a constant. If the mode is a vector mode, splat the value across | |
37934 | all of the vector elements. */ | |
37935 | ||
37936 | static rtx | |
37937 | rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst) | |
37938 | { | |
37939 | rtx reg; | |
37940 | ||
37941 | if (mode == SFmode || mode == DFmode) | |
37942 | { | |
37943 | rtx d = const_double_from_real_value (dconst, mode); | |
37944 | reg = force_reg (mode, d); | |
37945 | } | |
37946 | else if (mode == V4SFmode) | |
37947 | { | |
37948 | rtx d = const_double_from_real_value (dconst, SFmode); | |
37949 | rtvec v = gen_rtvec (4, d, d, d, d); | |
37950 | reg = gen_reg_rtx (mode); | |
37951 | rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v)); | |
37952 | } | |
37953 | else if (mode == V2DFmode) | |
37954 | { | |
37955 | rtx d = const_double_from_real_value (dconst, DFmode); | |
37956 | rtvec v = gen_rtvec (2, d, d); | |
37957 | reg = gen_reg_rtx (mode); | |
37958 | rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v)); | |
37959 | } | |
37960 | else | |
37961 | gcc_unreachable (); | |
37962 | ||
37963 | return reg; | |
37964 | } | |
37965 | ||
37966 | /* Generate an FMA instruction. */ | |
37967 | ||
37968 | static void | |
37969 | rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a) | |
37970 | { | |
37971 | machine_mode mode = GET_MODE (target); | |
37972 | rtx dst; | |
37973 | ||
37974 | dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0); | |
37975 | gcc_assert (dst != NULL); | |
37976 | ||
37977 | if (dst != target) | |
37978 | emit_move_insn (target, dst); | |
37979 | } | |
37980 | ||
37981 | /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */ | |
37982 | ||
37983 | static void | |
37984 | rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a) | |
37985 | { | |
37986 | machine_mode mode = GET_MODE (dst); | |
37987 | rtx r; | |
37988 | ||
37989 | /* This is a tad more complicated, since the fnma_optab is for | |
37990 | a different expression: fma(-m1, m2, a), which is the same | |
37991 | thing except in the case of signed zeros. | |
37992 | ||
37993 | Fortunately we know that if FMA is supported that FNMSUB is | |
37994 | also supported in the ISA. Just expand it directly. */ | |
37995 | ||
37996 | gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing); | |
37997 | ||
37998 | r = gen_rtx_NEG (mode, a); | |
37999 | r = gen_rtx_FMA (mode, m1, m2, r); | |
38000 | r = gen_rtx_NEG (mode, r); | |
38001 | emit_insn (gen_rtx_SET (dst, r)); | |
38002 | } | |
38003 | ||
38004 | /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P, | |
38005 | add a reg_note saying that this was a division. Support both scalar and | |
38006 | vector divide. Assumes no trapping math and finite arguments. */ | |
38007 | ||
38008 | void | |
38009 | rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p) | |
38010 | { | |
38011 | machine_mode mode = GET_MODE (dst); | |
38012 | rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v; | |
38013 | int i; | |
38014 | ||
38015 | /* Low precision estimates guarantee 5 bits of accuracy. High | |
38016 | precision estimates guarantee 14 bits of accuracy. SFmode | |
38017 | requires 23 bits of accuracy. DFmode requires 52 bits of | |
38018 | accuracy. Each pass at least doubles the accuracy, leading | |
38019 | to the following. */ | |
38020 | int passes = (TARGET_RECIP_PRECISION) ? 1 : 3; | |
38021 | if (mode == DFmode || mode == V2DFmode) | |
38022 | passes++; | |
38023 | ||
38024 | enum insn_code code = optab_handler (smul_optab, mode); | |
38025 | insn_gen_fn gen_mul = GEN_FCN (code); | |
38026 | ||
38027 | gcc_assert (code != CODE_FOR_nothing); | |
38028 | ||
38029 | one = rs6000_load_constant_and_splat (mode, dconst1); | |
38030 | ||
38031 | /* x0 = 1./d estimate */ | |
38032 | x0 = gen_reg_rtx (mode); | |
38033 | emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d), | |
38034 | UNSPEC_FRES))); | |
38035 | ||
38036 | /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */ | |
38037 | if (passes > 1) { | |
38038 | ||
38039 | /* e0 = 1. - d * x0 */ | |
38040 | e0 = gen_reg_rtx (mode); | |
38041 | rs6000_emit_nmsub (e0, d, x0, one); | |
38042 | ||
38043 | /* x1 = x0 + e0 * x0 */ | |
38044 | x1 = gen_reg_rtx (mode); | |
38045 | rs6000_emit_madd (x1, e0, x0, x0); | |
38046 | ||
38047 | for (i = 0, xprev = x1, eprev = e0; i < passes - 2; | |
38048 | ++i, xprev = xnext, eprev = enext) { | |
38049 | ||
38050 | /* enext = eprev * eprev */ | |
38051 | enext = gen_reg_rtx (mode); | |
38052 | emit_insn (gen_mul (enext, eprev, eprev)); | |
38053 | ||
38054 | /* xnext = xprev + enext * xprev */ | |
38055 | xnext = gen_reg_rtx (mode); | |
38056 | rs6000_emit_madd (xnext, enext, xprev, xprev); | |
38057 | } | |
38058 | ||
38059 | } else | |
38060 | xprev = x0; | |
38061 | ||
38062 | /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */ | |
38063 | ||
38064 | /* u = n * xprev */ | |
38065 | u = gen_reg_rtx (mode); | |
38066 | emit_insn (gen_mul (u, n, xprev)); | |
38067 | ||
38068 | /* v = n - (d * u) */ | |
38069 | v = gen_reg_rtx (mode); | |
38070 | rs6000_emit_nmsub (v, d, u, n); | |
38071 | ||
38072 | /* dst = (v * xprev) + u */ | |
38073 | rs6000_emit_madd (dst, v, xprev, u); | |
38074 | ||
38075 | if (note_p) | |
38076 | add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d)); | |
38077 | } | |
38078 | ||
38079 | /* Goldschmidt's Algorithm for single/double-precision floating point | |
38080 | sqrt and rsqrt. Assumes no trapping math and finite arguments. */ | |
38081 | ||
38082 | void | |
38083 | rs6000_emit_swsqrt (rtx dst, rtx src, bool recip) | |
38084 | { | |
38085 | machine_mode mode = GET_MODE (src); | |
38086 | rtx e = gen_reg_rtx (mode); | |
38087 | rtx g = gen_reg_rtx (mode); | |
38088 | rtx h = gen_reg_rtx (mode); | |
38089 | ||
38090 | /* Low precision estimates guarantee 5 bits of accuracy. High | |
38091 | precision estimates guarantee 14 bits of accuracy. SFmode | |
38092 | requires 23 bits of accuracy. DFmode requires 52 bits of | |
38093 | accuracy. Each pass at least doubles the accuracy, leading | |
38094 | to the following. */ | |
38095 | int passes = (TARGET_RECIP_PRECISION) ? 1 : 3; | |
38096 | if (mode == DFmode || mode == V2DFmode) | |
38097 | passes++; | |
38098 | ||
38099 | int i; | |
38100 | rtx mhalf; | |
38101 | enum insn_code code = optab_handler (smul_optab, mode); | |
38102 | insn_gen_fn gen_mul = GEN_FCN (code); | |
38103 | ||
38104 | gcc_assert (code != CODE_FOR_nothing); | |
38105 | ||
38106 | mhalf = rs6000_load_constant_and_splat (mode, dconsthalf); | |
38107 | ||
38108 | /* e = rsqrt estimate */ | |
38109 | emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src), | |
38110 | UNSPEC_RSQRT))); | |
38111 | ||
38112 | /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */ | |
38113 | if (!recip) | |
38114 | { | |
38115 | rtx zero = force_reg (mode, CONST0_RTX (mode)); | |
38116 | ||
38117 | if (mode == SFmode) | |
38118 | { | |
38119 | rtx target = emit_conditional_move (e, GT, src, zero, mode, | |
38120 | e, zero, mode, 0); | |
38121 | if (target != e) | |
38122 | emit_move_insn (e, target); | |
38123 | } | |
38124 | else | |
38125 | { | |
38126 | rtx cond = gen_rtx_GT (VOIDmode, e, zero); | |
38127 | rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero); | |
38128 | } | |
38129 | } | |
38130 | ||
38131 | /* g = sqrt estimate. */ | |
38132 | emit_insn (gen_mul (g, e, src)); | |
38133 | /* h = 1/(2*sqrt) estimate. */ | |
38134 | emit_insn (gen_mul (h, e, mhalf)); | |
38135 | ||
38136 | if (recip) | |
38137 | { | |
38138 | if (passes == 1) | |
38139 | { | |
38140 | rtx t = gen_reg_rtx (mode); | |
38141 | rs6000_emit_nmsub (t, g, h, mhalf); | |
38142 | /* Apply correction directly to 1/rsqrt estimate. */ | |
38143 | rs6000_emit_madd (dst, e, t, e); | |
38144 | } | |
38145 | else | |
38146 | { | |
38147 | for (i = 0; i < passes; i++) | |
38148 | { | |
38149 | rtx t1 = gen_reg_rtx (mode); | |
38150 | rtx g1 = gen_reg_rtx (mode); | |
38151 | rtx h1 = gen_reg_rtx (mode); | |
38152 | ||
38153 | rs6000_emit_nmsub (t1, g, h, mhalf); | |
38154 | rs6000_emit_madd (g1, g, t1, g); | |
38155 | rs6000_emit_madd (h1, h, t1, h); | |
38156 | ||
38157 | g = g1; | |
38158 | h = h1; | |
38159 | } | |
38160 | /* Multiply by 2 for 1/rsqrt. */ | |
38161 | emit_insn (gen_add3_insn (dst, h, h)); | |
38162 | } | |
38163 | } | |
38164 | else | |
38165 | { | |
38166 | rtx t = gen_reg_rtx (mode); | |
38167 | rs6000_emit_nmsub (t, g, h, mhalf); | |
38168 | rs6000_emit_madd (dst, g, t, g); | |
38169 | } | |
38170 | ||
38171 | return; | |
38172 | } | |
38173 | ||
38174 | /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD | |
38175 | (Power7) targets. DST is the target, and SRC is the argument operand. */ | |
38176 | ||
38177 | void | |
38178 | rs6000_emit_popcount (rtx dst, rtx src) | |
38179 | { | |
38180 | machine_mode mode = GET_MODE (dst); | |
38181 | rtx tmp1, tmp2; | |
38182 | ||
38183 | /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */ | |
38184 | if (TARGET_POPCNTD) | |
38185 | { | |
38186 | if (mode == SImode) | |
38187 | emit_insn (gen_popcntdsi2 (dst, src)); | |
38188 | else | |
38189 | emit_insn (gen_popcntddi2 (dst, src)); | |
38190 | return; | |
38191 | } | |
38192 | ||
38193 | tmp1 = gen_reg_rtx (mode); | |
38194 | ||
38195 | if (mode == SImode) | |
38196 | { | |
38197 | emit_insn (gen_popcntbsi2 (tmp1, src)); | |
38198 | tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101), | |
38199 | NULL_RTX, 0); | |
38200 | tmp2 = force_reg (SImode, tmp2); | |
38201 | emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24))); | |
38202 | } | |
38203 | else | |
38204 | { | |
38205 | emit_insn (gen_popcntbdi2 (tmp1, src)); | |
38206 | tmp2 = expand_mult (DImode, tmp1, | |
38207 | GEN_INT ((HOST_WIDE_INT) | |
38208 | 0x01010101 << 32 | 0x01010101), | |
38209 | NULL_RTX, 0); | |
38210 | tmp2 = force_reg (DImode, tmp2); | |
38211 | emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56))); | |
38212 | } | |
38213 | } | |
38214 | ||
38215 | ||
38216 | /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the | |
38217 | target, and SRC is the argument operand. */ | |
38218 | ||
38219 | void | |
38220 | rs6000_emit_parity (rtx dst, rtx src) | |
38221 | { | |
38222 | machine_mode mode = GET_MODE (dst); | |
38223 | rtx tmp; | |
38224 | ||
38225 | tmp = gen_reg_rtx (mode); | |
38226 | ||
38227 | /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */ | |
38228 | if (TARGET_CMPB) | |
38229 | { | |
38230 | if (mode == SImode) | |
38231 | { | |
38232 | emit_insn (gen_popcntbsi2 (tmp, src)); | |
38233 | emit_insn (gen_paritysi2_cmpb (dst, tmp)); | |
38234 | } | |
38235 | else | |
38236 | { | |
38237 | emit_insn (gen_popcntbdi2 (tmp, src)); | |
38238 | emit_insn (gen_paritydi2_cmpb (dst, tmp)); | |
38239 | } | |
38240 | return; | |
38241 | } | |
38242 | ||
38243 | if (mode == SImode) | |
38244 | { | |
38245 | /* Is mult+shift >= shift+xor+shift+xor? */ | |
38246 | if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3)) | |
38247 | { | |
38248 | rtx tmp1, tmp2, tmp3, tmp4; | |
38249 | ||
38250 | tmp1 = gen_reg_rtx (SImode); | |
38251 | emit_insn (gen_popcntbsi2 (tmp1, src)); | |
38252 | ||
38253 | tmp2 = gen_reg_rtx (SImode); | |
38254 | emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16))); | |
38255 | tmp3 = gen_reg_rtx (SImode); | |
38256 | emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2)); | |
38257 | ||
38258 | tmp4 = gen_reg_rtx (SImode); | |
38259 | emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8))); | |
38260 | emit_insn (gen_xorsi3 (tmp, tmp3, tmp4)); | |
38261 | } | |
38262 | else | |
38263 | rs6000_emit_popcount (tmp, src); | |
38264 | emit_insn (gen_andsi3 (dst, tmp, const1_rtx)); | |
38265 | } | |
38266 | else | |
38267 | { | |
38268 | /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */ | |
38269 | if (rs6000_cost->muldi >= COSTS_N_INSNS (5)) | |
38270 | { | |
38271 | rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; | |
38272 | ||
38273 | tmp1 = gen_reg_rtx (DImode); | |
38274 | emit_insn (gen_popcntbdi2 (tmp1, src)); | |
38275 | ||
38276 | tmp2 = gen_reg_rtx (DImode); | |
38277 | emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32))); | |
38278 | tmp3 = gen_reg_rtx (DImode); | |
38279 | emit_insn (gen_xordi3 (tmp3, tmp1, tmp2)); | |
38280 | ||
38281 | tmp4 = gen_reg_rtx (DImode); | |
38282 | emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16))); | |
38283 | tmp5 = gen_reg_rtx (DImode); | |
38284 | emit_insn (gen_xordi3 (tmp5, tmp3, tmp4)); | |
38285 | ||
38286 | tmp6 = gen_reg_rtx (DImode); | |
38287 | emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8))); | |
38288 | emit_insn (gen_xordi3 (tmp, tmp5, tmp6)); | |
38289 | } | |
38290 | else | |
38291 | rs6000_emit_popcount (tmp, src); | |
38292 | emit_insn (gen_anddi3 (dst, tmp, const1_rtx)); | |
38293 | } | |
38294 | } | |
38295 | ||
38296 | /* Expand an Altivec constant permutation for little endian mode. | |
38297 | There are two issues: First, the two input operands must be | |
38298 | swapped so that together they form a double-wide array in LE | |
38299 | order. Second, the vperm instruction has surprising behavior | |
38300 | in LE mode: it interprets the elements of the source vectors | |
38301 | in BE mode ("left to right") and interprets the elements of | |
38302 | the destination vector in LE mode ("right to left"). To | |
38303 | correct for this, we must subtract each element of the permute | |
38304 | control vector from 31. | |
38305 | ||
38306 | For example, suppose we want to concatenate vr10 = {0, 1, 2, 3} | |
38307 | with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm. | |
38308 | We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to | |
38309 | serve as the permute control vector. Then, in BE mode, | |
38310 | ||
38311 | vperm 9,10,11,12 | |
38312 | ||
38313 | places the desired result in vr9. However, in LE mode the | |
38314 | vector contents will be | |
38315 | ||
38316 | vr10 = 00000003 00000002 00000001 00000000 | |
38317 | vr11 = 00000007 00000006 00000005 00000004 | |
38318 | ||
38319 | The result of the vperm using the same permute control vector is | |
38320 | ||
38321 | vr9 = 05000000 07000000 01000000 03000000 | |
38322 | ||
38323 | That is, the leftmost 4 bytes of vr10 are interpreted as the | |
38324 | source for the rightmost 4 bytes of vr9, and so on. | |
38325 | ||
38326 | If we change the permute control vector to | |
38327 | ||
38328 | vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4} | |
38329 | ||
38330 | and issue | |
38331 | ||
38332 | vperm 9,11,10,12 | |
38333 | ||
38334 | we get the desired | |
38335 | ||
38336 | vr9 = 00000006 00000004 00000002 00000000. */ | |
38337 | ||
38338 | void | |
38339 | altivec_expand_vec_perm_const_le (rtx operands[4]) | |
38340 | { | |
38341 | unsigned int i; | |
38342 | rtx perm[16]; | |
38343 | rtx constv, unspec; | |
38344 | rtx target = operands[0]; | |
38345 | rtx op0 = operands[1]; | |
38346 | rtx op1 = operands[2]; | |
38347 | rtx sel = operands[3]; | |
38348 | ||
38349 | /* Unpack and adjust the constant selector. */ | |
38350 | for (i = 0; i < 16; ++i) | |
38351 | { | |
38352 | rtx e = XVECEXP (sel, 0, i); | |
38353 | unsigned int elt = 31 - (INTVAL (e) & 31); | |
38354 | perm[i] = GEN_INT (elt); | |
38355 | } | |
38356 | ||
38357 | /* Expand to a permute, swapping the inputs and using the | |
38358 | adjusted selector. */ | |
38359 | if (!REG_P (op0)) | |
38360 | op0 = force_reg (V16QImode, op0); | |
38361 | if (!REG_P (op1)) | |
38362 | op1 = force_reg (V16QImode, op1); | |
38363 | ||
38364 | constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)); | |
38365 | constv = force_reg (V16QImode, constv); | |
38366 | unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv), | |
38367 | UNSPEC_VPERM); | |
38368 | if (!REG_P (target)) | |
38369 | { | |
38370 | rtx tmp = gen_reg_rtx (V16QImode); | |
38371 | emit_move_insn (tmp, unspec); | |
38372 | unspec = tmp; | |
38373 | } | |
38374 | ||
38375 | emit_move_insn (target, unspec); | |
38376 | } | |
38377 | ||
38378 | /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the | |
38379 | permute control vector. But here it's not a constant, so we must | |
38380 | generate a vector NAND or NOR to do the adjustment. */ | |
38381 | ||
38382 | void | |
38383 | altivec_expand_vec_perm_le (rtx operands[4]) | |
38384 | { | |
38385 | rtx notx, iorx, unspec; | |
38386 | rtx target = operands[0]; | |
38387 | rtx op0 = operands[1]; | |
38388 | rtx op1 = operands[2]; | |
38389 | rtx sel = operands[3]; | |
38390 | rtx tmp = target; | |
38391 | rtx norreg = gen_reg_rtx (V16QImode); | |
38392 | machine_mode mode = GET_MODE (target); | |
38393 | ||
38394 | /* Get everything in regs so the pattern matches. */ | |
38395 | if (!REG_P (op0)) | |
38396 | op0 = force_reg (mode, op0); | |
38397 | if (!REG_P (op1)) | |
38398 | op1 = force_reg (mode, op1); | |
38399 | if (!REG_P (sel)) | |
38400 | sel = force_reg (V16QImode, sel); | |
38401 | if (!REG_P (target)) | |
38402 | tmp = gen_reg_rtx (mode); | |
38403 | ||
38404 | if (TARGET_P9_VECTOR) | |
38405 | { | |
38406 | unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op0, op1, sel), | |
38407 | UNSPEC_VPERMR); | |
38408 | } | |
38409 | else | |
38410 | { | |
38411 | /* Invert the selector with a VNAND if available, else a VNOR. | |
38412 | The VNAND is preferred for future fusion opportunities. */ | |
38413 | notx = gen_rtx_NOT (V16QImode, sel); | |
38414 | iorx = (TARGET_P8_VECTOR | |
38415 | ? gen_rtx_IOR (V16QImode, notx, notx) | |
38416 | : gen_rtx_AND (V16QImode, notx, notx)); | |
38417 | emit_insn (gen_rtx_SET (norreg, iorx)); | |
38418 | ||
38419 | /* Permute with operands reversed and adjusted selector. */ | |
38420 | unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg), | |
38421 | UNSPEC_VPERM); | |
38422 | } | |
38423 | ||
38424 | /* Copy into target, possibly by way of a register. */ | |
38425 | if (!REG_P (target)) | |
38426 | { | |
38427 | emit_move_insn (tmp, unspec); | |
38428 | unspec = tmp; | |
38429 | } | |
38430 | ||
38431 | emit_move_insn (target, unspec); | |
38432 | } | |
38433 | ||
38434 | /* Expand an Altivec constant permutation. Return true if we match | |
38435 | an efficient implementation; false to fall back to VPERM. */ | |
38436 | ||
38437 | bool | |
38438 | altivec_expand_vec_perm_const (rtx operands[4]) | |
38439 | { | |
38440 | struct altivec_perm_insn { | |
38441 | HOST_WIDE_INT mask; | |
38442 | enum insn_code impl; | |
38443 | unsigned char perm[16]; | |
38444 | }; | |
38445 | static const struct altivec_perm_insn patterns[] = { | |
38446 | { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct, | |
38447 | { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } }, | |
38448 | { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct, | |
38449 | { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } }, | |
38450 | { OPTION_MASK_ALTIVEC, | |
38451 | (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct | |
38452 | : CODE_FOR_altivec_vmrglb_direct), | |
38453 | { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } }, | |
38454 | { OPTION_MASK_ALTIVEC, | |
38455 | (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct | |
38456 | : CODE_FOR_altivec_vmrglh_direct), | |
38457 | { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } }, | |
38458 | { OPTION_MASK_ALTIVEC, | |
38459 | (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct | |
38460 | : CODE_FOR_altivec_vmrglw_direct), | |
38461 | { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } }, | |
38462 | { OPTION_MASK_ALTIVEC, | |
38463 | (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct | |
38464 | : CODE_FOR_altivec_vmrghb_direct), | |
38465 | { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } }, | |
38466 | { OPTION_MASK_ALTIVEC, | |
38467 | (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct | |
38468 | : CODE_FOR_altivec_vmrghh_direct), | |
38469 | { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } }, | |
38470 | { OPTION_MASK_ALTIVEC, | |
38471 | (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct | |
38472 | : CODE_FOR_altivec_vmrghw_direct), | |
38473 | { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } }, | |
38474 | { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew, | |
38475 | { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } }, | |
38476 | { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow, | |
38477 | { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } } | |
38478 | }; | |
38479 | ||
38480 | unsigned int i, j, elt, which; | |
38481 | unsigned char perm[16]; | |
38482 | rtx target, op0, op1, sel, x; | |
38483 | bool one_vec; | |
38484 | ||
38485 | target = operands[0]; | |
38486 | op0 = operands[1]; | |
38487 | op1 = operands[2]; | |
38488 | sel = operands[3]; | |
38489 | ||
38490 | /* Unpack the constant selector. */ | |
38491 | for (i = which = 0; i < 16; ++i) | |
38492 | { | |
38493 | rtx e = XVECEXP (sel, 0, i); | |
38494 | elt = INTVAL (e) & 31; | |
38495 | which |= (elt < 16 ? 1 : 2); | |
38496 | perm[i] = elt; | |
38497 | } | |
38498 | ||
38499 | /* Simplify the constant selector based on operands. */ | |
38500 | switch (which) | |
38501 | { | |
38502 | default: | |
38503 | gcc_unreachable (); | |
38504 | ||
38505 | case 3: | |
38506 | one_vec = false; | |
38507 | if (!rtx_equal_p (op0, op1)) | |
38508 | break; | |
38509 | /* FALLTHRU */ | |
38510 | ||
38511 | case 2: | |
38512 | for (i = 0; i < 16; ++i) | |
38513 | perm[i] &= 15; | |
38514 | op0 = op1; | |
38515 | one_vec = true; | |
38516 | break; | |
38517 | ||
38518 | case 1: | |
38519 | op1 = op0; | |
38520 | one_vec = true; | |
38521 | break; | |
38522 | } | |
38523 | ||
38524 | /* Look for splat patterns. */ | |
38525 | if (one_vec) | |
38526 | { | |
38527 | elt = perm[0]; | |
38528 | ||
38529 | for (i = 0; i < 16; ++i) | |
38530 | if (perm[i] != elt) | |
38531 | break; | |
38532 | if (i == 16) | |
38533 | { | |
38534 | if (!BYTES_BIG_ENDIAN) | |
38535 | elt = 15 - elt; | |
38536 | emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt))); | |
38537 | return true; | |
38538 | } | |
38539 | ||
38540 | if (elt % 2 == 0) | |
38541 | { | |
38542 | for (i = 0; i < 16; i += 2) | |
38543 | if (perm[i] != elt || perm[i + 1] != elt + 1) | |
38544 | break; | |
38545 | if (i == 16) | |
38546 | { | |
38547 | int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2; | |
38548 | x = gen_reg_rtx (V8HImode); | |
38549 | emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0), | |
38550 | GEN_INT (field))); | |
38551 | emit_move_insn (target, gen_lowpart (V16QImode, x)); | |
38552 | return true; | |
38553 | } | |
38554 | } | |
38555 | ||
38556 | if (elt % 4 == 0) | |
38557 | { | |
38558 | for (i = 0; i < 16; i += 4) | |
38559 | if (perm[i] != elt | |
38560 | || perm[i + 1] != elt + 1 | |
38561 | || perm[i + 2] != elt + 2 | |
38562 | || perm[i + 3] != elt + 3) | |
38563 | break; | |
38564 | if (i == 16) | |
38565 | { | |
38566 | int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4; | |
38567 | x = gen_reg_rtx (V4SImode); | |
38568 | emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0), | |
38569 | GEN_INT (field))); | |
38570 | emit_move_insn (target, gen_lowpart (V16QImode, x)); | |
38571 | return true; | |
38572 | } | |
38573 | } | |
38574 | } | |
38575 | ||
38576 | /* Look for merge and pack patterns. */ | |
38577 | for (j = 0; j < ARRAY_SIZE (patterns); ++j) | |
38578 | { | |
38579 | bool swapped; | |
38580 | ||
38581 | if ((patterns[j].mask & rs6000_isa_flags) == 0) | |
38582 | continue; | |
38583 | ||
38584 | elt = patterns[j].perm[0]; | |
38585 | if (perm[0] == elt) | |
38586 | swapped = false; | |
38587 | else if (perm[0] == elt + 16) | |
38588 | swapped = true; | |
38589 | else | |
38590 | continue; | |
38591 | for (i = 1; i < 16; ++i) | |
38592 | { | |
38593 | elt = patterns[j].perm[i]; | |
38594 | if (swapped) | |
38595 | elt = (elt >= 16 ? elt - 16 : elt + 16); | |
38596 | else if (one_vec && elt >= 16) | |
38597 | elt -= 16; | |
38598 | if (perm[i] != elt) | |
38599 | break; | |
38600 | } | |
38601 | if (i == 16) | |
38602 | { | |
38603 | enum insn_code icode = patterns[j].impl; | |
38604 | machine_mode omode = insn_data[icode].operand[0].mode; | |
38605 | machine_mode imode = insn_data[icode].operand[1].mode; | |
38606 | ||
38607 | /* For little-endian, don't use vpkuwum and vpkuhum if the | |
38608 | underlying vector type is not V4SI and V8HI, respectively. | |
38609 | For example, using vpkuwum with a V8HI picks up the even | |
38610 | halfwords (BE numbering) when the even halfwords (LE | |
38611 | numbering) are what we need. */ | |
38612 | if (!BYTES_BIG_ENDIAN | |
38613 | && icode == CODE_FOR_altivec_vpkuwum_direct | |
38614 | && ((GET_CODE (op0) == REG | |
38615 | && GET_MODE (op0) != V4SImode) | |
38616 | || (GET_CODE (op0) == SUBREG | |
38617 | && GET_MODE (XEXP (op0, 0)) != V4SImode))) | |
38618 | continue; | |
38619 | if (!BYTES_BIG_ENDIAN | |
38620 | && icode == CODE_FOR_altivec_vpkuhum_direct | |
38621 | && ((GET_CODE (op0) == REG | |
38622 | && GET_MODE (op0) != V8HImode) | |
38623 | || (GET_CODE (op0) == SUBREG | |
38624 | && GET_MODE (XEXP (op0, 0)) != V8HImode))) | |
38625 | continue; | |
38626 | ||
38627 | /* For little-endian, the two input operands must be swapped | |
38628 | (or swapped back) to ensure proper right-to-left numbering | |
38629 | from 0 to 2N-1. */ | |
38630 | if (swapped ^ !BYTES_BIG_ENDIAN) | |
38631 | std::swap (op0, op1); | |
38632 | if (imode != V16QImode) | |
38633 | { | |
38634 | op0 = gen_lowpart (imode, op0); | |
38635 | op1 = gen_lowpart (imode, op1); | |
38636 | } | |
38637 | if (omode == V16QImode) | |
38638 | x = target; | |
38639 | else | |
38640 | x = gen_reg_rtx (omode); | |
38641 | emit_insn (GEN_FCN (icode) (x, op0, op1)); | |
38642 | if (omode != V16QImode) | |
38643 | emit_move_insn (target, gen_lowpart (V16QImode, x)); | |
38644 | return true; | |
38645 | } | |
38646 | } | |
38647 | ||
38648 | if (!BYTES_BIG_ENDIAN) | |
38649 | { | |
38650 | altivec_expand_vec_perm_const_le (operands); | |
38651 | return true; | |
38652 | } | |
38653 | ||
38654 | return false; | |
38655 | } | |
38656 | ||
38657 | /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation. | |
38658 | Return true if we match an efficient implementation. */ | |
38659 | ||
38660 | static bool | |
38661 | rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1, | |
38662 | unsigned char perm0, unsigned char perm1) | |
38663 | { | |
38664 | rtx x; | |
38665 | ||
38666 | /* If both selectors come from the same operand, fold to single op. */ | |
38667 | if ((perm0 & 2) == (perm1 & 2)) | |
38668 | { | |
38669 | if (perm0 & 2) | |
38670 | op0 = op1; | |
38671 | else | |
38672 | op1 = op0; | |
38673 | } | |
38674 | /* If both operands are equal, fold to simpler permutation. */ | |
38675 | if (rtx_equal_p (op0, op1)) | |
38676 | { | |
38677 | perm0 = perm0 & 1; | |
38678 | perm1 = (perm1 & 1) + 2; | |
38679 | } | |
38680 | /* If the first selector comes from the second operand, swap. */ | |
38681 | else if (perm0 & 2) | |
38682 | { | |
38683 | if (perm1 & 2) | |
38684 | return false; | |
38685 | perm0 -= 2; | |
38686 | perm1 += 2; | |
38687 | std::swap (op0, op1); | |
38688 | } | |
38689 | /* If the second selector does not come from the second operand, fail. */ | |
38690 | else if ((perm1 & 2) == 0) | |
38691 | return false; | |
38692 | ||
38693 | /* Success! */ | |
38694 | if (target != NULL) | |
38695 | { | |
38696 | machine_mode vmode, dmode; | |
38697 | rtvec v; | |
38698 | ||
38699 | vmode = GET_MODE (target); | |
38700 | gcc_assert (GET_MODE_NUNITS (vmode) == 2); | |
ab53cba7 | 38701 | dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require (); |
01e91138 | 38702 | x = gen_rtx_VEC_CONCAT (dmode, op0, op1); |
38703 | v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1)); | |
38704 | x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v)); | |
38705 | emit_insn (gen_rtx_SET (target, x)); | |
38706 | } | |
38707 | return true; | |
38708 | } | |
38709 | ||
38710 | bool | |
38711 | rs6000_expand_vec_perm_const (rtx operands[4]) | |
38712 | { | |
38713 | rtx target, op0, op1, sel; | |
38714 | unsigned char perm0, perm1; | |
38715 | ||
38716 | target = operands[0]; | |
38717 | op0 = operands[1]; | |
38718 | op1 = operands[2]; | |
38719 | sel = operands[3]; | |
38720 | ||
38721 | /* Unpack the constant selector. */ | |
38722 | perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3; | |
38723 | perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3; | |
38724 | ||
38725 | return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1); | |
38726 | } | |
38727 | ||
38728 | /* Test whether a constant permutation is supported. */ | |
38729 | ||
38730 | static bool | |
38731 | rs6000_vectorize_vec_perm_const_ok (machine_mode vmode, | |
38732 | const unsigned char *sel) | |
38733 | { | |
38734 | /* AltiVec (and thus VSX) can handle arbitrary permutations. */ | |
38735 | if (TARGET_ALTIVEC) | |
38736 | return true; | |
38737 | ||
38738 | /* Check for ps_merge* or evmerge* insns. */ | |
38739 | if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode) | |
38740 | || (TARGET_SPE && vmode == V2SImode)) | |
38741 | { | |
38742 | rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1); | |
38743 | rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2); | |
38744 | return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]); | |
38745 | } | |
38746 | ||
38747 | return false; | |
38748 | } | |
38749 | ||
38750 | /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */ | |
38751 | ||
38752 | static void | |
38753 | rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1, | |
38754 | machine_mode vmode, unsigned nelt, rtx perm[]) | |
38755 | { | |
38756 | machine_mode imode; | |
38757 | rtx x; | |
38758 | ||
38759 | imode = vmode; | |
38760 | if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT) | |
12bdf7c0 | 38761 | imode = mode_for_int_vector (vmode).require (); |
01e91138 | 38762 | |
38763 | x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm)); | |
38764 | x = expand_vec_perm (vmode, op0, op1, x, target); | |
38765 | if (x != target) | |
38766 | emit_move_insn (target, x); | |
38767 | } | |
38768 | ||
38769 | /* Expand an extract even operation. */ | |
38770 | ||
38771 | void | |
38772 | rs6000_expand_extract_even (rtx target, rtx op0, rtx op1) | |
38773 | { | |
38774 | machine_mode vmode = GET_MODE (target); | |
38775 | unsigned i, nelt = GET_MODE_NUNITS (vmode); | |
38776 | rtx perm[16]; | |
38777 | ||
38778 | for (i = 0; i < nelt; i++) | |
38779 | perm[i] = GEN_INT (i * 2); | |
38780 | ||
38781 | rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm); | |
38782 | } | |
38783 | ||
38784 | /* Expand a vector interleave operation. */ | |
38785 | ||
38786 | void | |
38787 | rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp) | |
38788 | { | |
38789 | machine_mode vmode = GET_MODE (target); | |
38790 | unsigned i, high, nelt = GET_MODE_NUNITS (vmode); | |
38791 | rtx perm[16]; | |
38792 | ||
38793 | high = (highp ? 0 : nelt / 2); | |
38794 | for (i = 0; i < nelt / 2; i++) | |
38795 | { | |
38796 | perm[i * 2] = GEN_INT (i + high); | |
38797 | perm[i * 2 + 1] = GEN_INT (i + nelt + high); | |
38798 | } | |
38799 | ||
38800 | rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm); | |
38801 | } | |
38802 | ||
38803 | /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */ | |
38804 | void | |
38805 | rs6000_scale_v2df (rtx tgt, rtx src, int scale) | |
38806 | { | |
38807 | HOST_WIDE_INT hwi_scale (scale); | |
38808 | REAL_VALUE_TYPE r_pow; | |
38809 | rtvec v = rtvec_alloc (2); | |
38810 | rtx elt; | |
38811 | rtx scale_vec = gen_reg_rtx (V2DFmode); | |
38812 | (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale); | |
38813 | elt = const_double_from_real_value (r_pow, DFmode); | |
38814 | RTVEC_ELT (v, 0) = elt; | |
38815 | RTVEC_ELT (v, 1) = elt; | |
38816 | rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v)); | |
38817 | emit_insn (gen_mulv2df3 (tgt, src, scale_vec)); | |
38818 | } | |
38819 | ||
38820 | /* Return an RTX representing where to find the function value of a | |
38821 | function returning MODE. */ | |
38822 | static rtx | |
38823 | rs6000_complex_function_value (machine_mode mode) | |
38824 | { | |
38825 | unsigned int regno; | |
38826 | rtx r1, r2; | |
38827 | machine_mode inner = GET_MODE_INNER (mode); | |
38828 | unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode); | |
38829 | ||
38830 | if (TARGET_FLOAT128_TYPE | |
38831 | && (mode == KCmode | |
38832 | || (mode == TCmode && TARGET_IEEEQUAD))) | |
38833 | regno = ALTIVEC_ARG_RETURN; | |
38834 | ||
38835 | else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS) | |
38836 | regno = FP_ARG_RETURN; | |
38837 | ||
38838 | else | |
38839 | { | |
38840 | regno = GP_ARG_RETURN; | |
38841 | ||
38842 | /* 32-bit is OK since it'll go in r3/r4. */ | |
38843 | if (TARGET_32BIT && inner_bytes >= 4) | |
38844 | return gen_rtx_REG (mode, regno); | |
38845 | } | |
38846 | ||
38847 | if (inner_bytes >= 8) | |
38848 | return gen_rtx_REG (mode, regno); | |
38849 | ||
38850 | r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno), | |
38851 | const0_rtx); | |
38852 | r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1), | |
38853 | GEN_INT (inner_bytes)); | |
38854 | return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2)); | |
38855 | } | |
38856 | ||
38857 | /* Return an rtx describing a return value of MODE as a PARALLEL | |
38858 | in N_ELTS registers, each of mode ELT_MODE, starting at REGNO, | |
38859 | stride REG_STRIDE. */ | |
38860 | ||
38861 | static rtx | |
38862 | rs6000_parallel_return (machine_mode mode, | |
38863 | int n_elts, machine_mode elt_mode, | |
38864 | unsigned int regno, unsigned int reg_stride) | |
38865 | { | |
38866 | rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts)); | |
38867 | ||
38868 | int i; | |
38869 | for (i = 0; i < n_elts; i++) | |
38870 | { | |
38871 | rtx r = gen_rtx_REG (elt_mode, regno); | |
38872 | rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode)); | |
38873 | XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off); | |
38874 | regno += reg_stride; | |
38875 | } | |
38876 | ||
38877 | return par; | |
38878 | } | |
38879 | ||
38880 | /* Target hook for TARGET_FUNCTION_VALUE. | |
38881 | ||
38882 | On the SPE, both FPs and vectors are returned in r3. | |
38883 | ||
38884 | On RS/6000 an integer value is in r3 and a floating-point value is in | |
38885 | fp1, unless -msoft-float. */ | |
38886 | ||
38887 | static rtx | |
38888 | rs6000_function_value (const_tree valtype, | |
38889 | const_tree fn_decl_or_type ATTRIBUTE_UNUSED, | |
38890 | bool outgoing ATTRIBUTE_UNUSED) | |
38891 | { | |
38892 | machine_mode mode; | |
38893 | unsigned int regno; | |
38894 | machine_mode elt_mode; | |
38895 | int n_elts; | |
38896 | ||
38897 | /* Special handling for structs in darwin64. */ | |
38898 | if (TARGET_MACHO | |
38899 | && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype)) | |
38900 | { | |
38901 | CUMULATIVE_ARGS valcum; | |
38902 | rtx valret; | |
38903 | ||
38904 | valcum.words = 0; | |
38905 | valcum.fregno = FP_ARG_MIN_REG; | |
38906 | valcum.vregno = ALTIVEC_ARG_MIN_REG; | |
38907 | /* Do a trial code generation as if this were going to be passed as | |
38908 | an argument; if any part goes in memory, we return NULL. */ | |
38909 | valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true); | |
38910 | if (valret) | |
38911 | return valret; | |
38912 | /* Otherwise fall through to standard ABI rules. */ | |
38913 | } | |
38914 | ||
38915 | mode = TYPE_MODE (valtype); | |
38916 | ||
38917 | /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */ | |
38918 | if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts)) | |
38919 | { | |
38920 | int first_reg, n_regs; | |
38921 | ||
38922 | if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode)) | |
38923 | { | |
38924 | /* _Decimal128 must use even/odd register pairs. */ | |
38925 | first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN; | |
38926 | n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3; | |
38927 | } | |
38928 | else | |
38929 | { | |
38930 | first_reg = ALTIVEC_ARG_RETURN; | |
38931 | n_regs = 1; | |
38932 | } | |
38933 | ||
38934 | return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs); | |
38935 | } | |
38936 | ||
38937 | /* Some return value types need be split in -mpowerpc64, 32bit ABI. */ | |
38938 | if (TARGET_32BIT && TARGET_POWERPC64) | |
38939 | switch (mode) | |
38940 | { | |
38941 | default: | |
38942 | break; | |
916ace94 | 38943 | case E_DImode: |
38944 | case E_SCmode: | |
38945 | case E_DCmode: | |
38946 | case E_TCmode: | |
01e91138 | 38947 | int count = GET_MODE_SIZE (mode) / 4; |
38948 | return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1); | |
38949 | } | |
38950 | ||
38951 | if ((INTEGRAL_TYPE_P (valtype) | |
38952 | && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64)) | |
38953 | || POINTER_TYPE_P (valtype)) | |
38954 | mode = TARGET_32BIT ? SImode : DImode; | |
38955 | ||
38956 | if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS) | |
38957 | /* _Decimal128 must use an even/odd register pair. */ | |
38958 | regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN; | |
38959 | else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT && TARGET_FPRS | |
38960 | && !FLOAT128_VECTOR_P (mode) | |
38961 | && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT)) | |
38962 | regno = FP_ARG_RETURN; | |
38963 | else if (TREE_CODE (valtype) == COMPLEX_TYPE | |
38964 | && targetm.calls.split_complex_arg) | |
38965 | return rs6000_complex_function_value (mode); | |
38966 | /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same | |
38967 | return register is used in both cases, and we won't see V2DImode/V2DFmode | |
38968 | for pure altivec, combine the two cases. */ | |
38969 | else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode)) | |
38970 | && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI | |
38971 | && ALTIVEC_OR_VSX_VECTOR_MODE (mode)) | |
38972 | regno = ALTIVEC_ARG_RETURN; | |
38973 | else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT | |
38974 | && (mode == DFmode || mode == DCmode | |
38975 | || FLOAT128_IBM_P (mode) || mode == TCmode)) | |
38976 | return spe_build_register_parallel (mode, GP_ARG_RETURN); | |
38977 | else | |
38978 | regno = GP_ARG_RETURN; | |
38979 | ||
38980 | return gen_rtx_REG (mode, regno); | |
38981 | } | |
38982 | ||
38983 | /* Define how to find the value returned by a library function | |
38984 | assuming the value has mode MODE. */ | |
38985 | rtx | |
38986 | rs6000_libcall_value (machine_mode mode) | |
38987 | { | |
38988 | unsigned int regno; | |
38989 | ||
38990 | /* Long long return value need be split in -mpowerpc64, 32bit ABI. */ | |
38991 | if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode) | |
38992 | return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1); | |
38993 | ||
38994 | if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS) | |
38995 | /* _Decimal128 must use an even/odd register pair. */ | |
38996 | regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN; | |
38997 | else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) | |
38998 | && TARGET_HARD_FLOAT && TARGET_FPRS | |
38999 | && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT)) | |
39000 | regno = FP_ARG_RETURN; | |
39001 | /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same | |
39002 | return register is used in both cases, and we won't see V2DImode/V2DFmode | |
39003 | for pure altivec, combine the two cases. */ | |
39004 | else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) | |
39005 | && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI) | |
39006 | regno = ALTIVEC_ARG_RETURN; | |
39007 | else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg) | |
39008 | return rs6000_complex_function_value (mode); | |
39009 | else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT | |
39010 | && (mode == DFmode || mode == DCmode | |
39011 | || FLOAT128_IBM_P (mode) || mode == TCmode)) | |
39012 | return spe_build_register_parallel (mode, GP_ARG_RETURN); | |
39013 | else | |
39014 | regno = GP_ARG_RETURN; | |
39015 | ||
39016 | return gen_rtx_REG (mode, regno); | |
39017 | } | |
39018 | ||
39019 | ||
39020 | /* Return true if we use LRA instead of reload pass. */ | |
39021 | static bool | |
39022 | rs6000_lra_p (void) | |
39023 | { | |
39024 | return TARGET_LRA; | |
39025 | } | |
39026 | ||
39027 | /* Compute register pressure classes. We implement the target hook to avoid | |
39028 | IRA picking something like NON_SPECIAL_REGS as a pressure class, which can | |
39029 | lead to incorrect estimates of number of available registers and therefor | |
39030 | increased register pressure/spill. */ | |
39031 | static int | |
39032 | rs6000_compute_pressure_classes (enum reg_class *pressure_classes) | |
39033 | { | |
39034 | int n; | |
39035 | ||
39036 | n = 0; | |
39037 | pressure_classes[n++] = GENERAL_REGS; | |
39038 | if (TARGET_VSX) | |
39039 | pressure_classes[n++] = VSX_REGS; | |
39040 | else | |
39041 | { | |
39042 | if (TARGET_ALTIVEC) | |
39043 | pressure_classes[n++] = ALTIVEC_REGS; | |
39044 | if (TARGET_HARD_FLOAT && TARGET_FPRS) | |
39045 | pressure_classes[n++] = FLOAT_REGS; | |
39046 | } | |
39047 | pressure_classes[n++] = CR_REGS; | |
39048 | pressure_classes[n++] = SPECIAL_REGS; | |
39049 | ||
39050 | return n; | |
39051 | } | |
39052 | ||
39053 | /* Given FROM and TO register numbers, say whether this elimination is allowed. | |
39054 | Frame pointer elimination is automatically handled. | |
39055 | ||
39056 | For the RS/6000, if frame pointer elimination is being done, we would like | |
39057 | to convert ap into fp, not sp. | |
39058 | ||
39059 | We need r30 if -mminimal-toc was specified, and there are constant pool | |
39060 | references. */ | |
39061 | ||
39062 | static bool | |
39063 | rs6000_can_eliminate (const int from, const int to) | |
39064 | { | |
39065 | return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM | |
39066 | ? ! frame_pointer_needed | |
39067 | : from == RS6000_PIC_OFFSET_TABLE_REGNUM | |
39068 | ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC | |
39069 | || constant_pool_empty_p () | |
39070 | : true); | |
39071 | } | |
39072 | ||
39073 | /* Define the offset between two registers, FROM to be eliminated and its | |
39074 | replacement TO, at the start of a routine. */ | |
39075 | HOST_WIDE_INT | |
39076 | rs6000_initial_elimination_offset (int from, int to) | |
39077 | { | |
39078 | rs6000_stack_t *info = rs6000_stack_info (); | |
39079 | HOST_WIDE_INT offset; | |
39080 | ||
39081 | if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM) | |
39082 | offset = info->push_p ? 0 : -info->total_size; | |
39083 | else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM) | |
39084 | { | |
39085 | offset = info->push_p ? 0 : -info->total_size; | |
39086 | if (FRAME_GROWS_DOWNWARD) | |
39087 | offset += info->fixed_size + info->vars_size + info->parm_size; | |
39088 | } | |
39089 | else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) | |
39090 | offset = FRAME_GROWS_DOWNWARD | |
39091 | ? info->fixed_size + info->vars_size + info->parm_size | |
39092 | : 0; | |
39093 | else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) | |
39094 | offset = info->total_size; | |
39095 | else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM) | |
39096 | offset = info->push_p ? info->total_size : 0; | |
39097 | else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM) | |
39098 | offset = 0; | |
39099 | else | |
39100 | gcc_unreachable (); | |
39101 | ||
39102 | return offset; | |
39103 | } | |
39104 | ||
39105 | static rtx | |
39106 | rs6000_dwarf_register_span (rtx reg) | |
39107 | { | |
39108 | rtx parts[8]; | |
39109 | int i, words; | |
39110 | unsigned regno = REGNO (reg); | |
39111 | machine_mode mode = GET_MODE (reg); | |
39112 | ||
39113 | if (TARGET_SPE | |
39114 | && regno < 32 | |
39115 | && (SPE_VECTOR_MODE (GET_MODE (reg)) | |
39116 | || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) | |
39117 | && mode != SFmode && mode != SDmode && mode != SCmode))) | |
39118 | ; | |
39119 | else | |
39120 | return NULL_RTX; | |
39121 | ||
39122 | regno = REGNO (reg); | |
39123 | ||
39124 | /* The duality of the SPE register size wreaks all kinds of havoc. | |
39125 | This is a way of distinguishing r0 in 32-bits from r0 in | |
39126 | 64-bits. */ | |
39127 | words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD; | |
39128 | gcc_assert (words <= 4); | |
39129 | for (i = 0; i < words; i++, regno++) | |
39130 | { | |
39131 | if (BYTES_BIG_ENDIAN) | |
39132 | { | |
39133 | parts[2 * i] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO); | |
39134 | parts[2 * i + 1] = gen_rtx_REG (SImode, regno); | |
39135 | } | |
39136 | else | |
39137 | { | |
39138 | parts[2 * i] = gen_rtx_REG (SImode, regno); | |
39139 | parts[2 * i + 1] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO); | |
39140 | } | |
39141 | } | |
39142 | ||
39143 | return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts)); | |
39144 | } | |
39145 | ||
39146 | /* Fill in sizes for SPE register high parts in table used by unwinder. */ | |
39147 | ||
39148 | static void | |
39149 | rs6000_init_dwarf_reg_sizes_extra (tree address) | |
39150 | { | |
39151 | if (TARGET_SPE) | |
39152 | { | |
39153 | int i; | |
39154 | machine_mode mode = TYPE_MODE (char_type_node); | |
39155 | rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL); | |
39156 | rtx mem = gen_rtx_MEM (BLKmode, addr); | |
39157 | rtx value = gen_int_mode (4, mode); | |
39158 | ||
39159 | for (i = FIRST_SPE_HIGH_REGNO; i < LAST_SPE_HIGH_REGNO+1; i++) | |
39160 | { | |
39161 | int column = DWARF_REG_TO_UNWIND_COLUMN | |
39162 | (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true)); | |
39163 | HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode); | |
39164 | ||
39165 | emit_move_insn (adjust_address (mem, mode, offset), value); | |
39166 | } | |
39167 | } | |
39168 | ||
39169 | if (TARGET_MACHO && ! TARGET_ALTIVEC) | |
39170 | { | |
39171 | int i; | |
39172 | machine_mode mode = TYPE_MODE (char_type_node); | |
39173 | rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL); | |
39174 | rtx mem = gen_rtx_MEM (BLKmode, addr); | |
39175 | rtx value = gen_int_mode (16, mode); | |
39176 | ||
39177 | /* On Darwin, libgcc may be built to run on both G3 and G4/5. | |
39178 | The unwinder still needs to know the size of Altivec registers. */ | |
39179 | ||
39180 | for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++) | |
39181 | { | |
39182 | int column = DWARF_REG_TO_UNWIND_COLUMN | |
39183 | (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true)); | |
39184 | HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode); | |
39185 | ||
39186 | emit_move_insn (adjust_address (mem, mode, offset), value); | |
39187 | } | |
39188 | } | |
39189 | } | |
39190 | ||
39191 | /* Map internal gcc register numbers to debug format register numbers. | |
39192 | FORMAT specifies the type of debug register number to use: | |
39193 | 0 -- debug information, except for frame-related sections | |
39194 | 1 -- DWARF .debug_frame section | |
39195 | 2 -- DWARF .eh_frame section */ | |
39196 | ||
39197 | unsigned int | |
39198 | rs6000_dbx_register_number (unsigned int regno, unsigned int format) | |
39199 | { | |
39200 | /* We never use the GCC internal number for SPE high registers. | |
39201 | Those are mapped to the 1200..1231 range for all debug formats. */ | |
39202 | if (SPE_HIGH_REGNO_P (regno)) | |
39203 | return regno - FIRST_SPE_HIGH_REGNO + 1200; | |
39204 | ||
39205 | /* Except for the above, we use the internal number for non-DWARF | |
39206 | debug information, and also for .eh_frame. */ | |
39207 | if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2) | |
39208 | return regno; | |
39209 | ||
39210 | /* On some platforms, we use the standard DWARF register | |
39211 | numbering for .debug_info and .debug_frame. */ | |
39212 | #ifdef RS6000_USE_DWARF_NUMBERING | |
39213 | if (regno <= 63) | |
39214 | return regno; | |
39215 | if (regno == LR_REGNO) | |
39216 | return 108; | |
39217 | if (regno == CTR_REGNO) | |
39218 | return 109; | |
39219 | /* Special handling for CR for .debug_frame: rs6000_emit_prologue has | |
39220 | translated any combination of CR2, CR3, CR4 saves to a save of CR2. | |
39221 | The actual code emitted saves the whole of CR, so we map CR2_REGNO | |
39222 | to the DWARF reg for CR. */ | |
39223 | if (format == 1 && regno == CR2_REGNO) | |
39224 | return 64; | |
39225 | if (CR_REGNO_P (regno)) | |
39226 | return regno - CR0_REGNO + 86; | |
39227 | if (regno == CA_REGNO) | |
39228 | return 101; /* XER */ | |
39229 | if (ALTIVEC_REGNO_P (regno)) | |
39230 | return regno - FIRST_ALTIVEC_REGNO + 1124; | |
39231 | if (regno == VRSAVE_REGNO) | |
39232 | return 356; | |
39233 | if (regno == VSCR_REGNO) | |
39234 | return 67; | |
39235 | if (regno == SPE_ACC_REGNO) | |
39236 | return 99; | |
39237 | if (regno == SPEFSCR_REGNO) | |
39238 | return 612; | |
39239 | #endif | |
39240 | return regno; | |
39241 | } | |
39242 | ||
39243 | /* target hook eh_return_filter_mode */ | |
f77c4496 | 39244 | static scalar_int_mode |
01e91138 | 39245 | rs6000_eh_return_filter_mode (void) |
39246 | { | |
39247 | return TARGET_32BIT ? SImode : word_mode; | |
39248 | } | |
39249 | ||
39250 | /* Target hook for scalar_mode_supported_p. */ | |
39251 | static bool | |
8aec1ebb | 39252 | rs6000_scalar_mode_supported_p (scalar_mode mode) |
01e91138 | 39253 | { |
39254 | /* -m32 does not support TImode. This is the default, from | |
39255 | default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the | |
39256 | same ABI as for -m32. But default_scalar_mode_supported_p allows | |
39257 | integer modes of precision 2 * BITS_PER_WORD, which matches TImode | |
39258 | for -mpowerpc64. */ | |
39259 | if (TARGET_32BIT && mode == TImode) | |
39260 | return false; | |
39261 | ||
39262 | if (DECIMAL_FLOAT_MODE_P (mode)) | |
39263 | return default_decimal_float_supported_p (); | |
39264 | else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode)) | |
39265 | return true; | |
39266 | else | |
39267 | return default_scalar_mode_supported_p (mode); | |
39268 | } | |
39269 | ||
39270 | /* Target hook for vector_mode_supported_p. */ | |
39271 | static bool | |
39272 | rs6000_vector_mode_supported_p (machine_mode mode) | |
39273 | { | |
39274 | ||
39275 | if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode)) | |
39276 | return true; | |
39277 | ||
39278 | if (TARGET_SPE && SPE_VECTOR_MODE (mode)) | |
39279 | return true; | |
39280 | ||
39281 | /* There is no vector form for IEEE 128-bit. If we return true for IEEE | |
39282 | 128-bit, the compiler might try to widen IEEE 128-bit to IBM | |
39283 | double-double. */ | |
39284 | else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode)) | |
39285 | return true; | |
39286 | ||
39287 | else | |
39288 | return false; | |
39289 | } | |
39290 | ||
39291 | /* Target hook for floatn_mode. */ | |
a15787d8 | 39292 | static opt_scalar_float_mode |
01e91138 | 39293 | rs6000_floatn_mode (int n, bool extended) |
39294 | { | |
39295 | if (extended) | |
39296 | { | |
39297 | switch (n) | |
39298 | { | |
39299 | case 32: | |
39300 | return DFmode; | |
39301 | ||
39302 | case 64: | |
39303 | if (TARGET_FLOAT128_KEYWORD) | |
39304 | return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode; | |
39305 | else | |
a15787d8 | 39306 | return opt_scalar_float_mode (); |
01e91138 | 39307 | |
39308 | case 128: | |
a15787d8 | 39309 | return opt_scalar_float_mode (); |
01e91138 | 39310 | |
39311 | default: | |
39312 | /* Those are the only valid _FloatNx types. */ | |
39313 | gcc_unreachable (); | |
39314 | } | |
39315 | } | |
39316 | else | |
39317 | { | |
39318 | switch (n) | |
39319 | { | |
39320 | case 32: | |
39321 | return SFmode; | |
39322 | ||
39323 | case 64: | |
39324 | return DFmode; | |
39325 | ||
39326 | case 128: | |
39327 | if (TARGET_FLOAT128_KEYWORD) | |
39328 | return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode; | |
39329 | else | |
a15787d8 | 39330 | return opt_scalar_float_mode (); |
01e91138 | 39331 | |
39332 | default: | |
a15787d8 | 39333 | return opt_scalar_float_mode (); |
01e91138 | 39334 | } |
39335 | } | |
39336 | ||
39337 | } | |
39338 | ||
39339 | /* Target hook for c_mode_for_suffix. */ | |
39340 | static machine_mode | |
39341 | rs6000_c_mode_for_suffix (char suffix) | |
39342 | { | |
39343 | if (TARGET_FLOAT128_TYPE) | |
39344 | { | |
39345 | if (suffix == 'q' || suffix == 'Q') | |
39346 | return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode; | |
39347 | ||
39348 | /* At the moment, we are not defining a suffix for IBM extended double. | |
39349 | If/when the default for -mabi=ieeelongdouble is changed, and we want | |
39350 | to support __ibm128 constants in legacy library code, we may need to | |
39351 | re-evalaute this decision. Currently, c-lex.c only supports 'w' and | |
39352 | 'q' as machine dependent suffixes. The x86_64 port uses 'w' for | |
39353 | __float80 constants. */ | |
39354 | } | |
39355 | ||
39356 | return VOIDmode; | |
39357 | } | |
39358 | ||
39359 | /* Target hook for invalid_arg_for_unprototyped_fn. */ | |
39360 | static const char * | |
39361 | invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val) | |
39362 | { | |
39363 | return (!rs6000_darwin64_abi | |
39364 | && typelist == 0 | |
39365 | && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE | |
39366 | && (funcdecl == NULL_TREE | |
39367 | || (TREE_CODE (funcdecl) == FUNCTION_DECL | |
39368 | && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD))) | |
39369 | ? N_("AltiVec argument passed to unprototyped function") | |
39370 | : NULL; | |
39371 | } | |
39372 | ||
39373 | /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register | |
39374 | setup by using __stack_chk_fail_local hidden function instead of | |
39375 | calling __stack_chk_fail directly. Otherwise it is better to call | |
39376 | __stack_chk_fail directly. */ | |
39377 | ||
39378 | static tree ATTRIBUTE_UNUSED | |
39379 | rs6000_stack_protect_fail (void) | |
39380 | { | |
39381 | return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic) | |
39382 | ? default_hidden_stack_protect_fail () | |
39383 | : default_external_stack_protect_fail (); | |
39384 | } | |
39385 | ||
39386 | void | |
39387 | rs6000_final_prescan_insn (rtx_insn *insn, rtx *operand ATTRIBUTE_UNUSED, | |
39388 | int num_operands ATTRIBUTE_UNUSED) | |
39389 | { | |
39390 | if (rs6000_warn_cell_microcode) | |
39391 | { | |
39392 | const char *temp; | |
39393 | int insn_code_number = recog_memoized (insn); | |
39394 | location_t location = INSN_LOCATION (insn); | |
39395 | ||
39396 | /* Punt on insns we cannot recognize. */ | |
39397 | if (insn_code_number < 0) | |
39398 | return; | |
39399 | ||
39400 | /* get_insn_template can modify recog_data, so save and restore it. */ | |
39401 | struct recog_data_d recog_data_save = recog_data; | |
39402 | for (int i = 0; i < recog_data.n_operands; i++) | |
39403 | recog_data.operand[i] = copy_rtx (recog_data.operand[i]); | |
39404 | temp = get_insn_template (insn_code_number, insn); | |
39405 | recog_data = recog_data_save; | |
39406 | ||
39407 | if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS) | |
39408 | warning_at (location, OPT_mwarn_cell_microcode, | |
39409 | "emitting microcode insn %s\t[%s] #%d", | |
39410 | temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn)); | |
39411 | else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL) | |
39412 | warning_at (location, OPT_mwarn_cell_microcode, | |
39413 | "emitting conditional microcode insn %s\t[%s] #%d", | |
39414 | temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn)); | |
39415 | } | |
39416 | } | |
39417 | ||
39418 | /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ | |
39419 | ||
39420 | #if TARGET_ELF | |
39421 | static unsigned HOST_WIDE_INT | |
39422 | rs6000_asan_shadow_offset (void) | |
39423 | { | |
39424 | return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29); | |
39425 | } | |
39426 | #endif | |
39427 | \f | |
39428 | /* Mask options that we want to support inside of attribute((target)) and | |
39429 | #pragma GCC target operations. Note, we do not include things like | |
39430 | 64/32-bit, endianness, hard/soft floating point, etc. that would have | |
39431 | different calling sequences. */ | |
39432 | ||
39433 | struct rs6000_opt_mask { | |
39434 | const char *name; /* option name */ | |
39435 | HOST_WIDE_INT mask; /* mask to set */ | |
39436 | bool invert; /* invert sense of mask */ | |
39437 | bool valid_target; /* option is a target option */ | |
39438 | }; | |
39439 | ||
39440 | static struct rs6000_opt_mask const rs6000_opt_masks[] = | |
39441 | { | |
39442 | { "altivec", OPTION_MASK_ALTIVEC, false, true }, | |
39443 | { "cmpb", OPTION_MASK_CMPB, false, true }, | |
39444 | { "crypto", OPTION_MASK_CRYPTO, false, true }, | |
39445 | { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true }, | |
39446 | { "dlmzb", OPTION_MASK_DLMZB, false, true }, | |
39447 | { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX, | |
39448 | false, true }, | |
39449 | { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, false }, | |
39450 | { "float128-type", OPTION_MASK_FLOAT128_TYPE, false, false }, | |
39451 | { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, false }, | |
39452 | { "fprnd", OPTION_MASK_FPRND, false, true }, | |
39453 | { "hard-dfp", OPTION_MASK_DFP, false, true }, | |
39454 | { "htm", OPTION_MASK_HTM, false, true }, | |
39455 | { "isel", OPTION_MASK_ISEL, false, true }, | |
39456 | { "mfcrf", OPTION_MASK_MFCRF, false, true }, | |
39457 | { "mfpgpr", OPTION_MASK_MFPGPR, false, true }, | |
39458 | { "modulo", OPTION_MASK_MODULO, false, true }, | |
39459 | { "mulhw", OPTION_MASK_MULHW, false, true }, | |
39460 | { "multiple", OPTION_MASK_MULTIPLE, false, true }, | |
39461 | { "popcntb", OPTION_MASK_POPCNTB, false, true }, | |
39462 | { "popcntd", OPTION_MASK_POPCNTD, false, true }, | |
39463 | { "power8-fusion", OPTION_MASK_P8_FUSION, false, true }, | |
39464 | { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true }, | |
39465 | { "power8-vector", OPTION_MASK_P8_VECTOR, false, true }, | |
39466 | { "power9-dform-scalar", OPTION_MASK_P9_DFORM_SCALAR, false, true }, | |
39467 | { "power9-dform-vector", OPTION_MASK_P9_DFORM_VECTOR, false, true }, | |
39468 | { "power9-fusion", OPTION_MASK_P9_FUSION, false, true }, | |
39469 | { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true }, | |
39470 | { "power9-misc", OPTION_MASK_P9_MISC, false, true }, | |
39471 | { "power9-vector", OPTION_MASK_P9_VECTOR, false, true }, | |
39472 | { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true }, | |
39473 | { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true }, | |
39474 | { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true }, | |
39475 | { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true }, | |
39476 | { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true }, | |
39477 | { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true }, | |
39478 | { "string", OPTION_MASK_STRING, false, true }, | |
39479 | { "toc-fusion", OPTION_MASK_TOC_FUSION, false, true }, | |
39480 | { "update", OPTION_MASK_NO_UPDATE, true , true }, | |
39481 | { "upper-regs-di", OPTION_MASK_UPPER_REGS_DI, false, true }, | |
39482 | { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, true }, | |
39483 | { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, true }, | |
39484 | { "vsx", OPTION_MASK_VSX, false, true }, | |
39485 | { "vsx-small-integer", OPTION_MASK_VSX_SMALL_INTEGER, false, true }, | |
39486 | { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true }, | |
39487 | #ifdef OPTION_MASK_64BIT | |
39488 | #if TARGET_AIX_OS | |
39489 | { "aix64", OPTION_MASK_64BIT, false, false }, | |
39490 | { "aix32", OPTION_MASK_64BIT, true, false }, | |
39491 | #else | |
39492 | { "64", OPTION_MASK_64BIT, false, false }, | |
39493 | { "32", OPTION_MASK_64BIT, true, false }, | |
39494 | #endif | |
39495 | #endif | |
39496 | #ifdef OPTION_MASK_EABI | |
39497 | { "eabi", OPTION_MASK_EABI, false, false }, | |
39498 | #endif | |
39499 | #ifdef OPTION_MASK_LITTLE_ENDIAN | |
39500 | { "little", OPTION_MASK_LITTLE_ENDIAN, false, false }, | |
39501 | { "big", OPTION_MASK_LITTLE_ENDIAN, true, false }, | |
39502 | #endif | |
39503 | #ifdef OPTION_MASK_RELOCATABLE | |
39504 | { "relocatable", OPTION_MASK_RELOCATABLE, false, false }, | |
39505 | #endif | |
39506 | #ifdef OPTION_MASK_STRICT_ALIGN | |
39507 | { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false }, | |
39508 | #endif | |
39509 | { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false }, | |
39510 | { "string", OPTION_MASK_STRING, false, false }, | |
39511 | }; | |
39512 | ||
39513 | /* Builtin mask mapping for printing the flags. */ | |
39514 | static struct rs6000_opt_mask const rs6000_builtin_mask_names[] = | |
39515 | { | |
39516 | { "altivec", RS6000_BTM_ALTIVEC, false, false }, | |
39517 | { "vsx", RS6000_BTM_VSX, false, false }, | |
39518 | { "spe", RS6000_BTM_SPE, false, false }, | |
39519 | { "paired", RS6000_BTM_PAIRED, false, false }, | |
39520 | { "fre", RS6000_BTM_FRE, false, false }, | |
39521 | { "fres", RS6000_BTM_FRES, false, false }, | |
39522 | { "frsqrte", RS6000_BTM_FRSQRTE, false, false }, | |
39523 | { "frsqrtes", RS6000_BTM_FRSQRTES, false, false }, | |
39524 | { "popcntd", RS6000_BTM_POPCNTD, false, false }, | |
39525 | { "cell", RS6000_BTM_CELL, false, false }, | |
39526 | { "power8-vector", RS6000_BTM_P8_VECTOR, false, false }, | |
39527 | { "power9-vector", RS6000_BTM_P9_VECTOR, false, false }, | |
39528 | { "power9-misc", RS6000_BTM_P9_MISC, false, false }, | |
39529 | { "crypto", RS6000_BTM_CRYPTO, false, false }, | |
39530 | { "htm", RS6000_BTM_HTM, false, false }, | |
39531 | { "hard-dfp", RS6000_BTM_DFP, false, false }, | |
39532 | { "hard-float", RS6000_BTM_HARD_FLOAT, false, false }, | |
39533 | { "long-double-128", RS6000_BTM_LDBL128, false, false }, | |
39534 | { "float128", RS6000_BTM_FLOAT128, false, false }, | |
39535 | }; | |
39536 | ||
39537 | /* Option variables that we want to support inside attribute((target)) and | |
39538 | #pragma GCC target operations. */ | |
39539 | ||
39540 | struct rs6000_opt_var { | |
39541 | const char *name; /* option name */ | |
39542 | size_t global_offset; /* offset of the option in global_options. */ | |
39543 | size_t target_offset; /* offset of the option in target options. */ | |
39544 | }; | |
39545 | ||
39546 | static struct rs6000_opt_var const rs6000_opt_vars[] = | |
39547 | { | |
39548 | { "friz", | |
39549 | offsetof (struct gcc_options, x_TARGET_FRIZ), | |
39550 | offsetof (struct cl_target_option, x_TARGET_FRIZ), }, | |
39551 | { "avoid-indexed-addresses", | |
39552 | offsetof (struct gcc_options, x_TARGET_AVOID_XFORM), | |
39553 | offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) }, | |
39554 | { "paired", | |
39555 | offsetof (struct gcc_options, x_rs6000_paired_float), | |
39556 | offsetof (struct cl_target_option, x_rs6000_paired_float), }, | |
39557 | { "longcall", | |
39558 | offsetof (struct gcc_options, x_rs6000_default_long_calls), | |
39559 | offsetof (struct cl_target_option, x_rs6000_default_long_calls), }, | |
39560 | { "optimize-swaps", | |
39561 | offsetof (struct gcc_options, x_rs6000_optimize_swaps), | |
39562 | offsetof (struct cl_target_option, x_rs6000_optimize_swaps), }, | |
39563 | { "allow-movmisalign", | |
39564 | offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN), | |
39565 | offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), }, | |
39566 | { "allow-df-permute", | |
39567 | offsetof (struct gcc_options, x_TARGET_ALLOW_DF_PERMUTE), | |
39568 | offsetof (struct cl_target_option, x_TARGET_ALLOW_DF_PERMUTE), }, | |
39569 | { "sched-groups", | |
39570 | offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS), | |
39571 | offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), }, | |
39572 | { "always-hint", | |
39573 | offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT), | |
39574 | offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), }, | |
39575 | { "align-branch-targets", | |
39576 | offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS), | |
39577 | offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), }, | |
39578 | { "vectorize-builtins", | |
39579 | offsetof (struct gcc_options, x_TARGET_VECTORIZE_BUILTINS), | |
39580 | offsetof (struct cl_target_option, x_TARGET_VECTORIZE_BUILTINS), }, | |
39581 | { "tls-markers", | |
39582 | offsetof (struct gcc_options, x_tls_markers), | |
39583 | offsetof (struct cl_target_option, x_tls_markers), }, | |
39584 | { "sched-prolog", | |
39585 | offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG), | |
39586 | offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), }, | |
39587 | { "sched-epilog", | |
39588 | offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG), | |
39589 | offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), }, | |
39590 | { "gen-cell-microcode", | |
39591 | offsetof (struct gcc_options, x_rs6000_gen_cell_microcode), | |
39592 | offsetof (struct cl_target_option, x_rs6000_gen_cell_microcode), }, | |
39593 | { "warn-cell-microcode", | |
39594 | offsetof (struct gcc_options, x_rs6000_warn_cell_microcode), | |
39595 | offsetof (struct cl_target_option, x_rs6000_warn_cell_microcode), }, | |
39596 | }; | |
39597 | ||
39598 | /* Inner function to handle attribute((target("..."))) and #pragma GCC target | |
39599 | parsing. Return true if there were no errors. */ | |
39600 | ||
39601 | static bool | |
39602 | rs6000_inner_target_options (tree args, bool attr_p) | |
39603 | { | |
39604 | bool ret = true; | |
39605 | ||
39606 | if (args == NULL_TREE) | |
39607 | ; | |
39608 | ||
39609 | else if (TREE_CODE (args) == STRING_CST) | |
39610 | { | |
39611 | char *p = ASTRDUP (TREE_STRING_POINTER (args)); | |
39612 | char *q; | |
39613 | ||
39614 | while ((q = strtok (p, ",")) != NULL) | |
39615 | { | |
39616 | bool error_p = false; | |
39617 | bool not_valid_p = false; | |
39618 | const char *cpu_opt = NULL; | |
39619 | ||
39620 | p = NULL; | |
39621 | if (strncmp (q, "cpu=", 4) == 0) | |
39622 | { | |
39623 | int cpu_index = rs6000_cpu_name_lookup (q+4); | |
39624 | if (cpu_index >= 0) | |
39625 | rs6000_cpu_index = cpu_index; | |
39626 | else | |
39627 | { | |
39628 | error_p = true; | |
39629 | cpu_opt = q+4; | |
39630 | } | |
39631 | } | |
39632 | else if (strncmp (q, "tune=", 5) == 0) | |
39633 | { | |
39634 | int tune_index = rs6000_cpu_name_lookup (q+5); | |
39635 | if (tune_index >= 0) | |
39636 | rs6000_tune_index = tune_index; | |
39637 | else | |
39638 | { | |
39639 | error_p = true; | |
39640 | cpu_opt = q+5; | |
39641 | } | |
39642 | } | |
39643 | else | |
39644 | { | |
39645 | size_t i; | |
39646 | bool invert = false; | |
39647 | char *r = q; | |
39648 | ||
39649 | error_p = true; | |
39650 | if (strncmp (r, "no-", 3) == 0) | |
39651 | { | |
39652 | invert = true; | |
39653 | r += 3; | |
39654 | } | |
39655 | ||
39656 | for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++) | |
39657 | if (strcmp (r, rs6000_opt_masks[i].name) == 0) | |
39658 | { | |
39659 | HOST_WIDE_INT mask = rs6000_opt_masks[i].mask; | |
39660 | ||
39661 | if (!rs6000_opt_masks[i].valid_target) | |
39662 | not_valid_p = true; | |
39663 | else | |
39664 | { | |
39665 | error_p = false; | |
39666 | rs6000_isa_flags_explicit |= mask; | |
39667 | ||
39668 | /* VSX needs altivec, so -mvsx automagically sets | |
39669 | altivec and disables -mavoid-indexed-addresses. */ | |
39670 | if (!invert) | |
39671 | { | |
39672 | if (mask == OPTION_MASK_VSX) | |
39673 | { | |
39674 | mask |= OPTION_MASK_ALTIVEC; | |
39675 | TARGET_AVOID_XFORM = 0; | |
39676 | } | |
39677 | } | |
39678 | ||
39679 | if (rs6000_opt_masks[i].invert) | |
39680 | invert = !invert; | |
39681 | ||
39682 | if (invert) | |
39683 | rs6000_isa_flags &= ~mask; | |
39684 | else | |
39685 | rs6000_isa_flags |= mask; | |
39686 | } | |
39687 | break; | |
39688 | } | |
39689 | ||
39690 | if (error_p && !not_valid_p) | |
39691 | { | |
39692 | for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++) | |
39693 | if (strcmp (r, rs6000_opt_vars[i].name) == 0) | |
39694 | { | |
39695 | size_t j = rs6000_opt_vars[i].global_offset; | |
39696 | *((int *) ((char *)&global_options + j)) = !invert; | |
39697 | error_p = false; | |
39698 | not_valid_p = false; | |
39699 | break; | |
39700 | } | |
39701 | } | |
39702 | } | |
39703 | ||
39704 | if (error_p) | |
39705 | { | |
39706 | const char *eprefix, *esuffix; | |
39707 | ||
39708 | ret = false; | |
39709 | if (attr_p) | |
39710 | { | |
39711 | eprefix = "__attribute__((__target__("; | |
39712 | esuffix = ")))"; | |
39713 | } | |
39714 | else | |
39715 | { | |
39716 | eprefix = "#pragma GCC target "; | |
39717 | esuffix = ""; | |
39718 | } | |
39719 | ||
39720 | if (cpu_opt) | |
39721 | error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix, | |
39722 | q, esuffix); | |
39723 | else if (not_valid_p) | |
39724 | error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix); | |
39725 | else | |
39726 | error ("%s\"%s\"%s is invalid", eprefix, q, esuffix); | |
39727 | } | |
39728 | } | |
39729 | } | |
39730 | ||
39731 | else if (TREE_CODE (args) == TREE_LIST) | |
39732 | { | |
39733 | do | |
39734 | { | |
39735 | tree value = TREE_VALUE (args); | |
39736 | if (value) | |
39737 | { | |
39738 | bool ret2 = rs6000_inner_target_options (value, attr_p); | |
39739 | if (!ret2) | |
39740 | ret = false; | |
39741 | } | |
39742 | args = TREE_CHAIN (args); | |
39743 | } | |
39744 | while (args != NULL_TREE); | |
39745 | } | |
39746 | ||
39747 | else | |
39748 | { | |
39749 | error ("attribute %<target%> argument not a string"); | |
39750 | return false; | |
39751 | } | |
39752 | ||
39753 | return ret; | |
39754 | } | |
39755 | ||
39756 | /* Print out the target options as a list for -mdebug=target. */ | |
39757 | ||
39758 | static void | |
39759 | rs6000_debug_target_options (tree args, const char *prefix) | |
39760 | { | |
39761 | if (args == NULL_TREE) | |
39762 | fprintf (stderr, "%s<NULL>", prefix); | |
39763 | ||
39764 | else if (TREE_CODE (args) == STRING_CST) | |
39765 | { | |
39766 | char *p = ASTRDUP (TREE_STRING_POINTER (args)); | |
39767 | char *q; | |
39768 | ||
39769 | while ((q = strtok (p, ",")) != NULL) | |
39770 | { | |
39771 | p = NULL; | |
39772 | fprintf (stderr, "%s\"%s\"", prefix, q); | |
39773 | prefix = ", "; | |
39774 | } | |
39775 | } | |
39776 | ||
39777 | else if (TREE_CODE (args) == TREE_LIST) | |
39778 | { | |
39779 | do | |
39780 | { | |
39781 | tree value = TREE_VALUE (args); | |
39782 | if (value) | |
39783 | { | |
39784 | rs6000_debug_target_options (value, prefix); | |
39785 | prefix = ", "; | |
39786 | } | |
39787 | args = TREE_CHAIN (args); | |
39788 | } | |
39789 | while (args != NULL_TREE); | |
39790 | } | |
39791 | ||
39792 | else | |
39793 | gcc_unreachable (); | |
39794 | ||
39795 | return; | |
39796 | } | |
39797 | ||
39798 | \f | |
39799 | /* Hook to validate attribute((target("..."))). */ | |
39800 | ||
39801 | static bool | |
39802 | rs6000_valid_attribute_p (tree fndecl, | |
39803 | tree ARG_UNUSED (name), | |
39804 | tree args, | |
39805 | int flags) | |
39806 | { | |
39807 | struct cl_target_option cur_target; | |
39808 | bool ret; | |
39809 | tree old_optimize = build_optimization_node (&global_options); | |
39810 | tree new_target, new_optimize; | |
39811 | tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl); | |
39812 | ||
39813 | gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE)); | |
39814 | ||
39815 | if (TARGET_DEBUG_TARGET) | |
39816 | { | |
39817 | tree tname = DECL_NAME (fndecl); | |
39818 | fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n"); | |
39819 | if (tname) | |
39820 | fprintf (stderr, "function: %.*s\n", | |
39821 | (int) IDENTIFIER_LENGTH (tname), | |
39822 | IDENTIFIER_POINTER (tname)); | |
39823 | else | |
39824 | fprintf (stderr, "function: unknown\n"); | |
39825 | ||
39826 | fprintf (stderr, "args:"); | |
39827 | rs6000_debug_target_options (args, " "); | |
39828 | fprintf (stderr, "\n"); | |
39829 | ||
39830 | if (flags) | |
39831 | fprintf (stderr, "flags: 0x%x\n", flags); | |
39832 | ||
39833 | fprintf (stderr, "--------------------\n"); | |
39834 | } | |
39835 | ||
39836 | old_optimize = build_optimization_node (&global_options); | |
39837 | func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl); | |
39838 | ||
39839 | /* If the function changed the optimization levels as well as setting target | |
39840 | options, start with the optimizations specified. */ | |
39841 | if (func_optimize && func_optimize != old_optimize) | |
39842 | cl_optimization_restore (&global_options, | |
39843 | TREE_OPTIMIZATION (func_optimize)); | |
39844 | ||
39845 | /* The target attributes may also change some optimization flags, so update | |
39846 | the optimization options if necessary. */ | |
39847 | cl_target_option_save (&cur_target, &global_options); | |
39848 | rs6000_cpu_index = rs6000_tune_index = -1; | |
39849 | ret = rs6000_inner_target_options (args, true); | |
39850 | ||
39851 | /* Set up any additional state. */ | |
39852 | if (ret) | |
39853 | { | |
39854 | ret = rs6000_option_override_internal (false); | |
39855 | new_target = build_target_option_node (&global_options); | |
39856 | } | |
39857 | else | |
39858 | new_target = NULL; | |
39859 | ||
39860 | new_optimize = build_optimization_node (&global_options); | |
39861 | ||
39862 | if (!new_target) | |
39863 | ret = false; | |
39864 | ||
39865 | else if (fndecl) | |
39866 | { | |
39867 | DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target; | |
39868 | ||
39869 | if (old_optimize != new_optimize) | |
39870 | DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize; | |
39871 | } | |
39872 | ||
39873 | cl_target_option_restore (&global_options, &cur_target); | |
39874 | ||
39875 | if (old_optimize != new_optimize) | |
39876 | cl_optimization_restore (&global_options, | |
39877 | TREE_OPTIMIZATION (old_optimize)); | |
39878 | ||
39879 | return ret; | |
39880 | } | |
39881 | ||
39882 | \f | |
39883 | /* Hook to validate the current #pragma GCC target and set the state, and | |
39884 | update the macros based on what was changed. If ARGS is NULL, then | |
39885 | POP_TARGET is used to reset the options. */ | |
39886 | ||
39887 | bool | |
39888 | rs6000_pragma_target_parse (tree args, tree pop_target) | |
39889 | { | |
39890 | tree prev_tree = build_target_option_node (&global_options); | |
39891 | tree cur_tree; | |
39892 | struct cl_target_option *prev_opt, *cur_opt; | |
39893 | HOST_WIDE_INT prev_flags, cur_flags, diff_flags; | |
39894 | HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask; | |
39895 | ||
39896 | if (TARGET_DEBUG_TARGET) | |
39897 | { | |
39898 | fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n"); | |
39899 | fprintf (stderr, "args:"); | |
39900 | rs6000_debug_target_options (args, " "); | |
39901 | fprintf (stderr, "\n"); | |
39902 | ||
39903 | if (pop_target) | |
39904 | { | |
39905 | fprintf (stderr, "pop_target:\n"); | |
39906 | debug_tree (pop_target); | |
39907 | } | |
39908 | else | |
39909 | fprintf (stderr, "pop_target: <NULL>\n"); | |
39910 | ||
39911 | fprintf (stderr, "--------------------\n"); | |
39912 | } | |
39913 | ||
39914 | if (! args) | |
39915 | { | |
39916 | cur_tree = ((pop_target) | |
39917 | ? pop_target | |
39918 | : target_option_default_node); | |
39919 | cl_target_option_restore (&global_options, | |
39920 | TREE_TARGET_OPTION (cur_tree)); | |
39921 | } | |
39922 | else | |
39923 | { | |
39924 | rs6000_cpu_index = rs6000_tune_index = -1; | |
39925 | if (!rs6000_inner_target_options (args, false) | |
39926 | || !rs6000_option_override_internal (false) | |
39927 | || (cur_tree = build_target_option_node (&global_options)) | |
39928 | == NULL_TREE) | |
39929 | { | |
39930 | if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET) | |
39931 | fprintf (stderr, "invalid pragma\n"); | |
39932 | ||
39933 | return false; | |
39934 | } | |
39935 | } | |
39936 | ||
39937 | target_option_current_node = cur_tree; | |
39938 | ||
39939 | /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly | |
39940 | change the macros that are defined. */ | |
39941 | if (rs6000_target_modify_macros_ptr) | |
39942 | { | |
39943 | prev_opt = TREE_TARGET_OPTION (prev_tree); | |
39944 | prev_bumask = prev_opt->x_rs6000_builtin_mask; | |
39945 | prev_flags = prev_opt->x_rs6000_isa_flags; | |
39946 | ||
39947 | cur_opt = TREE_TARGET_OPTION (cur_tree); | |
39948 | cur_flags = cur_opt->x_rs6000_isa_flags; | |
39949 | cur_bumask = cur_opt->x_rs6000_builtin_mask; | |
39950 | ||
39951 | diff_bumask = (prev_bumask ^ cur_bumask); | |
39952 | diff_flags = (prev_flags ^ cur_flags); | |
39953 | ||
39954 | if ((diff_flags != 0) || (diff_bumask != 0)) | |
39955 | { | |
39956 | /* Delete old macros. */ | |
39957 | rs6000_target_modify_macros_ptr (false, | |
39958 | prev_flags & diff_flags, | |
39959 | prev_bumask & diff_bumask); | |
39960 | ||
39961 | /* Define new macros. */ | |
39962 | rs6000_target_modify_macros_ptr (true, | |
39963 | cur_flags & diff_flags, | |
39964 | cur_bumask & diff_bumask); | |
39965 | } | |
39966 | } | |
39967 | ||
39968 | return true; | |
39969 | } | |
39970 | ||
39971 | \f | |
39972 | /* Remember the last target of rs6000_set_current_function. */ | |
39973 | static GTY(()) tree rs6000_previous_fndecl; | |
39974 | ||
39975 | /* Establish appropriate back-end context for processing the function | |
39976 | FNDECL. The argument might be NULL to indicate processing at top | |
39977 | level, outside of any function scope. */ | |
39978 | static void | |
39979 | rs6000_set_current_function (tree fndecl) | |
39980 | { | |
39981 | tree old_tree = (rs6000_previous_fndecl | |
39982 | ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl) | |
39983 | : NULL_TREE); | |
39984 | ||
39985 | tree new_tree = (fndecl | |
39986 | ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl) | |
39987 | : NULL_TREE); | |
39988 | ||
39989 | if (TARGET_DEBUG_TARGET) | |
39990 | { | |
39991 | bool print_final = false; | |
39992 | fprintf (stderr, "\n==================== rs6000_set_current_function"); | |
39993 | ||
39994 | if (fndecl) | |
39995 | fprintf (stderr, ", fndecl %s (%p)", | |
39996 | (DECL_NAME (fndecl) | |
39997 | ? IDENTIFIER_POINTER (DECL_NAME (fndecl)) | |
39998 | : "<unknown>"), (void *)fndecl); | |
39999 | ||
40000 | if (rs6000_previous_fndecl) | |
40001 | fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl); | |
40002 | ||
40003 | fprintf (stderr, "\n"); | |
40004 | if (new_tree) | |
40005 | { | |
40006 | fprintf (stderr, "\nnew fndecl target specific options:\n"); | |
40007 | debug_tree (new_tree); | |
40008 | print_final = true; | |
40009 | } | |
40010 | ||
40011 | if (old_tree) | |
40012 | { | |
40013 | fprintf (stderr, "\nold fndecl target specific options:\n"); | |
40014 | debug_tree (old_tree); | |
40015 | print_final = true; | |
40016 | } | |
40017 | ||
40018 | if (print_final) | |
40019 | fprintf (stderr, "--------------------\n"); | |
40020 | } | |
40021 | ||
40022 | /* Only change the context if the function changes. This hook is called | |
40023 | several times in the course of compiling a function, and we don't want to | |
40024 | slow things down too much or call target_reinit when it isn't safe. */ | |
40025 | if (fndecl && fndecl != rs6000_previous_fndecl) | |
40026 | { | |
40027 | rs6000_previous_fndecl = fndecl; | |
40028 | if (old_tree == new_tree) | |
40029 | ; | |
40030 | ||
40031 | else if (new_tree && new_tree != target_option_default_node) | |
40032 | { | |
40033 | cl_target_option_restore (&global_options, | |
40034 | TREE_TARGET_OPTION (new_tree)); | |
40035 | if (TREE_TARGET_GLOBALS (new_tree)) | |
40036 | restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); | |
40037 | else | |
40038 | TREE_TARGET_GLOBALS (new_tree) | |
40039 | = save_target_globals_default_opts (); | |
40040 | } | |
40041 | ||
40042 | else if (old_tree && old_tree != target_option_default_node) | |
40043 | { | |
40044 | new_tree = target_option_current_node; | |
40045 | cl_target_option_restore (&global_options, | |
40046 | TREE_TARGET_OPTION (new_tree)); | |
40047 | if (TREE_TARGET_GLOBALS (new_tree)) | |
40048 | restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); | |
40049 | else if (new_tree == target_option_default_node) | |
40050 | restore_target_globals (&default_target_globals); | |
40051 | else | |
40052 | TREE_TARGET_GLOBALS (new_tree) | |
40053 | = save_target_globals_default_opts (); | |
40054 | } | |
40055 | } | |
40056 | } | |
40057 | ||
40058 | \f | |
40059 | /* Save the current options */ | |
40060 | ||
40061 | static void | |
40062 | rs6000_function_specific_save (struct cl_target_option *ptr, | |
40063 | struct gcc_options *opts) | |
40064 | { | |
40065 | ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags; | |
40066 | ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit; | |
40067 | } | |
40068 | ||
40069 | /* Restore the current options */ | |
40070 | ||
40071 | static void | |
40072 | rs6000_function_specific_restore (struct gcc_options *opts, | |
40073 | struct cl_target_option *ptr) | |
40074 | ||
40075 | { | |
40076 | opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags; | |
40077 | opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit; | |
40078 | (void) rs6000_option_override_internal (false); | |
40079 | } | |
40080 | ||
40081 | /* Print the current options */ | |
40082 | ||
40083 | static void | |
40084 | rs6000_function_specific_print (FILE *file, int indent, | |
40085 | struct cl_target_option *ptr) | |
40086 | { | |
40087 | rs6000_print_isa_options (file, indent, "Isa options set", | |
40088 | ptr->x_rs6000_isa_flags); | |
40089 | ||
40090 | rs6000_print_isa_options (file, indent, "Isa options explicit", | |
40091 | ptr->x_rs6000_isa_flags_explicit); | |
40092 | } | |
40093 | ||
40094 | /* Helper function to print the current isa or misc options on a line. */ | |
40095 | ||
40096 | static void | |
40097 | rs6000_print_options_internal (FILE *file, | |
40098 | int indent, | |
40099 | const char *string, | |
40100 | HOST_WIDE_INT flags, | |
40101 | const char *prefix, | |
40102 | const struct rs6000_opt_mask *opts, | |
40103 | size_t num_elements) | |
40104 | { | |
40105 | size_t i; | |
40106 | size_t start_column = 0; | |
40107 | size_t cur_column; | |
40108 | size_t max_column = 120; | |
40109 | size_t prefix_len = strlen (prefix); | |
40110 | size_t comma_len = 0; | |
40111 | const char *comma = ""; | |
40112 | ||
40113 | if (indent) | |
40114 | start_column += fprintf (file, "%*s", indent, ""); | |
40115 | ||
40116 | if (!flags) | |
40117 | { | |
40118 | fprintf (stderr, DEBUG_FMT_S, string, "<none>"); | |
40119 | return; | |
40120 | } | |
40121 | ||
40122 | start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags); | |
40123 | ||
40124 | /* Print the various mask options. */ | |
40125 | cur_column = start_column; | |
40126 | for (i = 0; i < num_elements; i++) | |
40127 | { | |
40128 | bool invert = opts[i].invert; | |
40129 | const char *name = opts[i].name; | |
40130 | const char *no_str = ""; | |
40131 | HOST_WIDE_INT mask = opts[i].mask; | |
40132 | size_t len = comma_len + prefix_len + strlen (name); | |
40133 | ||
40134 | if (!invert) | |
40135 | { | |
40136 | if ((flags & mask) == 0) | |
40137 | { | |
40138 | no_str = "no-"; | |
40139 | len += sizeof ("no-") - 1; | |
40140 | } | |
40141 | ||
40142 | flags &= ~mask; | |
40143 | } | |
40144 | ||
40145 | else | |
40146 | { | |
40147 | if ((flags & mask) != 0) | |
40148 | { | |
40149 | no_str = "no-"; | |
40150 | len += sizeof ("no-") - 1; | |
40151 | } | |
40152 | ||
40153 | flags |= mask; | |
40154 | } | |
40155 | ||
40156 | cur_column += len; | |
40157 | if (cur_column > max_column) | |
40158 | { | |
40159 | fprintf (stderr, ", \\\n%*s", (int)start_column, ""); | |
40160 | cur_column = start_column + len; | |
40161 | comma = ""; | |
40162 | } | |
40163 | ||
40164 | fprintf (file, "%s%s%s%s", comma, prefix, no_str, name); | |
40165 | comma = ", "; | |
40166 | comma_len = sizeof (", ") - 1; | |
40167 | } | |
40168 | ||
40169 | fputs ("\n", file); | |
40170 | } | |
40171 | ||
40172 | /* Helper function to print the current isa options on a line. */ | |
40173 | ||
40174 | static void | |
40175 | rs6000_print_isa_options (FILE *file, int indent, const char *string, | |
40176 | HOST_WIDE_INT flags) | |
40177 | { | |
40178 | rs6000_print_options_internal (file, indent, string, flags, "-m", | |
40179 | &rs6000_opt_masks[0], | |
40180 | ARRAY_SIZE (rs6000_opt_masks)); | |
40181 | } | |
40182 | ||
40183 | static void | |
40184 | rs6000_print_builtin_options (FILE *file, int indent, const char *string, | |
40185 | HOST_WIDE_INT flags) | |
40186 | { | |
40187 | rs6000_print_options_internal (file, indent, string, flags, "", | |
40188 | &rs6000_builtin_mask_names[0], | |
40189 | ARRAY_SIZE (rs6000_builtin_mask_names)); | |
40190 | } | |
40191 | ||
40192 | /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06, | |
40193 | 2.07, and 3.0 options that relate to the vector unit (-mdirect-move, | |
40194 | -mvsx-timode, -mupper-regs-df). | |
40195 | ||
40196 | If the user used -mno-power8-vector, we need to turn off all of the implicit | |
40197 | ISA 2.07 and 3.0 options that relate to the vector unit. | |
40198 | ||
40199 | If the user used -mno-power9-vector, we need to turn off all of the implicit | |
40200 | ISA 3.0 options that relate to the vector unit. | |
40201 | ||
40202 | This function does not handle explicit options such as the user specifying | |
40203 | -mdirect-move. These are handled in rs6000_option_override_internal, and | |
40204 | the appropriate error is given if needed. | |
40205 | ||
40206 | We return a mask of all of the implicit options that should not be enabled | |
40207 | by default. */ | |
40208 | ||
40209 | static HOST_WIDE_INT | |
40210 | rs6000_disable_incompatible_switches (void) | |
40211 | { | |
40212 | HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit; | |
40213 | size_t i, j; | |
40214 | ||
40215 | static const struct { | |
40216 | const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */ | |
40217 | const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */ | |
40218 | const char *const name; /* name of the switch. */ | |
40219 | } flags[] = { | |
40220 | { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" }, | |
40221 | { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" }, | |
40222 | { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" }, | |
40223 | }; | |
40224 | ||
40225 | for (i = 0; i < ARRAY_SIZE (flags); i++) | |
40226 | { | |
40227 | HOST_WIDE_INT no_flag = flags[i].no_flag; | |
40228 | ||
40229 | if ((rs6000_isa_flags & no_flag) == 0 | |
40230 | && (rs6000_isa_flags_explicit & no_flag) != 0) | |
40231 | { | |
40232 | HOST_WIDE_INT dep_flags = flags[i].dep_flags; | |
40233 | HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit | |
40234 | & rs6000_isa_flags | |
40235 | & dep_flags); | |
40236 | ||
40237 | if (set_flags) | |
40238 | { | |
40239 | for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++) | |
40240 | if ((set_flags & rs6000_opt_masks[j].mask) != 0) | |
40241 | { | |
40242 | set_flags &= ~rs6000_opt_masks[j].mask; | |
40243 | error ("-mno-%s turns off -m%s", | |
40244 | flags[i].name, | |
40245 | rs6000_opt_masks[j].name); | |
40246 | } | |
40247 | ||
40248 | gcc_assert (!set_flags); | |
40249 | } | |
40250 | ||
40251 | rs6000_isa_flags &= ~dep_flags; | |
40252 | ignore_masks |= no_flag | dep_flags; | |
40253 | } | |
40254 | } | |
40255 | ||
40256 | if (!TARGET_P9_VECTOR | |
40257 | && (rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) != 0 | |
40258 | && TARGET_P9_DFORM_BOTH > 0) | |
40259 | { | |
40260 | error ("-mno-power9-vector turns off -mpower9-dform"); | |
40261 | TARGET_P9_DFORM_BOTH = 0; | |
40262 | } | |
40263 | ||
40264 | return ignore_masks; | |
40265 | } | |
40266 | ||
40267 | \f | |
40268 | /* Hook to determine if one function can safely inline another. */ | |
40269 | ||
40270 | static bool | |
40271 | rs6000_can_inline_p (tree caller, tree callee) | |
40272 | { | |
40273 | bool ret = false; | |
40274 | tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller); | |
40275 | tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee); | |
40276 | ||
40277 | /* If callee has no option attributes, then it is ok to inline. */ | |
40278 | if (!callee_tree) | |
40279 | ret = true; | |
40280 | ||
40281 | /* If caller has no option attributes, but callee does then it is not ok to | |
40282 | inline. */ | |
40283 | else if (!caller_tree) | |
40284 | ret = false; | |
40285 | ||
40286 | else | |
40287 | { | |
40288 | struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree); | |
40289 | struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree); | |
40290 | ||
40291 | /* Callee's options should a subset of the caller's, i.e. a vsx function | |
40292 | can inline an altivec function but a non-vsx function can't inline a | |
40293 | vsx function. */ | |
40294 | if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags) | |
40295 | == callee_opts->x_rs6000_isa_flags) | |
40296 | ret = true; | |
40297 | } | |
40298 | ||
40299 | if (TARGET_DEBUG_TARGET) | |
40300 | fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n", | |
40301 | (DECL_NAME (caller) | |
40302 | ? IDENTIFIER_POINTER (DECL_NAME (caller)) | |
40303 | : "<unknown>"), | |
40304 | (DECL_NAME (callee) | |
40305 | ? IDENTIFIER_POINTER (DECL_NAME (callee)) | |
40306 | : "<unknown>"), | |
40307 | (ret ? "can" : "cannot")); | |
40308 | ||
40309 | return ret; | |
40310 | } | |
40311 | \f | |
40312 | /* Allocate a stack temp and fixup the address so it meets the particular | |
40313 | memory requirements (either offetable or REG+REG addressing). */ | |
40314 | ||
40315 | rtx | |
40316 | rs6000_allocate_stack_temp (machine_mode mode, | |
40317 | bool offsettable_p, | |
40318 | bool reg_reg_p) | |
40319 | { | |
40320 | rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode)); | |
40321 | rtx addr = XEXP (stack, 0); | |
40322 | int strict_p = (reload_in_progress || reload_completed); | |
40323 | ||
40324 | if (!legitimate_indirect_address_p (addr, strict_p)) | |
40325 | { | |
40326 | if (offsettable_p | |
40327 | && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true)) | |
40328 | stack = replace_equiv_address (stack, copy_addr_to_reg (addr)); | |
40329 | ||
40330 | else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p)) | |
40331 | stack = replace_equiv_address (stack, copy_addr_to_reg (addr)); | |
40332 | } | |
40333 | ||
40334 | return stack; | |
40335 | } | |
40336 | ||
40337 | /* Given a memory reference, if it is not a reg or reg+reg addressing, convert | |
40338 | to such a form to deal with memory reference instructions like STFIWX that | |
40339 | only take reg+reg addressing. */ | |
40340 | ||
40341 | rtx | |
40342 | rs6000_address_for_fpconvert (rtx x) | |
40343 | { | |
40344 | int strict_p = (reload_in_progress || reload_completed); | |
40345 | rtx addr; | |
40346 | ||
40347 | gcc_assert (MEM_P (x)); | |
40348 | addr = XEXP (x, 0); | |
40349 | if (! legitimate_indirect_address_p (addr, strict_p) | |
40350 | && ! legitimate_indexed_address_p (addr, strict_p)) | |
40351 | { | |
40352 | if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC) | |
40353 | { | |
40354 | rtx reg = XEXP (addr, 0); | |
40355 | HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x)); | |
40356 | rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size); | |
40357 | gcc_assert (REG_P (reg)); | |
40358 | emit_insn (gen_add3_insn (reg, reg, size_rtx)); | |
40359 | addr = reg; | |
40360 | } | |
40361 | else if (GET_CODE (addr) == PRE_MODIFY) | |
40362 | { | |
40363 | rtx reg = XEXP (addr, 0); | |
40364 | rtx expr = XEXP (addr, 1); | |
40365 | gcc_assert (REG_P (reg)); | |
40366 | gcc_assert (GET_CODE (expr) == PLUS); | |
40367 | emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1))); | |
40368 | addr = reg; | |
40369 | } | |
40370 | ||
40371 | x = replace_equiv_address (x, copy_addr_to_reg (addr)); | |
40372 | } | |
40373 | ||
40374 | return x; | |
40375 | } | |
40376 | ||
40377 | /* Given a memory reference, if it is not in the form for altivec memory | |
40378 | reference instructions (i.e. reg or reg+reg addressing with AND of -16), | |
40379 | convert to the altivec format. */ | |
40380 | ||
40381 | rtx | |
40382 | rs6000_address_for_altivec (rtx x) | |
40383 | { | |
40384 | gcc_assert (MEM_P (x)); | |
40385 | if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x))) | |
40386 | { | |
40387 | rtx addr = XEXP (x, 0); | |
40388 | int strict_p = (reload_in_progress || reload_completed); | |
40389 | ||
40390 | if (!legitimate_indexed_address_p (addr, strict_p) | |
40391 | && !legitimate_indirect_address_p (addr, strict_p)) | |
40392 | addr = copy_to_mode_reg (Pmode, addr); | |
40393 | ||
40394 | addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16)); | |
40395 | x = change_address (x, GET_MODE (x), addr); | |
40396 | } | |
40397 | ||
40398 | return x; | |
40399 | } | |
40400 | ||
40401 | /* Implement TARGET_LEGITIMATE_CONSTANT_P. | |
40402 | ||
40403 | On the RS/6000, all integer constants are acceptable, most won't be valid | |
40404 | for particular insns, though. Only easy FP constants are acceptable. */ | |
40405 | ||
40406 | static bool | |
40407 | rs6000_legitimate_constant_p (machine_mode mode, rtx x) | |
40408 | { | |
40409 | if (TARGET_ELF && tls_referenced_p (x)) | |
40410 | return false; | |
40411 | ||
40412 | return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR) | |
40413 | || GET_MODE (x) == VOIDmode | |
40414 | || (TARGET_POWERPC64 && mode == DImode) | |
40415 | || easy_fp_constant (x, mode) | |
40416 | || easy_vector_constant (x, mode)); | |
40417 | } | |
40418 | ||
40419 | \f | |
40420 | /* Return TRUE iff the sequence ending in LAST sets the static chain. */ | |
40421 | ||
40422 | static bool | |
40423 | chain_already_loaded (rtx_insn *last) | |
40424 | { | |
40425 | for (; last != NULL; last = PREV_INSN (last)) | |
40426 | { | |
40427 | if (NONJUMP_INSN_P (last)) | |
40428 | { | |
40429 | rtx patt = PATTERN (last); | |
40430 | ||
40431 | if (GET_CODE (patt) == SET) | |
40432 | { | |
40433 | rtx lhs = XEXP (patt, 0); | |
40434 | ||
40435 | if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM) | |
40436 | return true; | |
40437 | } | |
40438 | } | |
40439 | } | |
40440 | return false; | |
40441 | } | |
40442 | ||
40443 | /* Expand code to perform a call under the AIX or ELFv2 ABI. */ | |
40444 | ||
40445 | void | |
40446 | rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie) | |
40447 | { | |
40448 | const bool direct_call_p | |
40449 | = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc); | |
40450 | rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM); | |
40451 | rtx toc_load = NULL_RTX; | |
40452 | rtx toc_restore = NULL_RTX; | |
40453 | rtx func_addr; | |
40454 | rtx abi_reg = NULL_RTX; | |
40455 | rtx call[4]; | |
40456 | int n_call; | |
40457 | rtx insn; | |
40458 | ||
40459 | /* Handle longcall attributes. */ | |
40460 | if (INTVAL (cookie) & CALL_LONG) | |
40461 | func_desc = rs6000_longcall_ref (func_desc); | |
40462 | ||
40463 | /* Handle indirect calls. */ | |
40464 | if (GET_CODE (func_desc) != SYMBOL_REF | |
40465 | || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc))) | |
40466 | { | |
40467 | /* Save the TOC into its reserved slot before the call, | |
40468 | and prepare to restore it after the call. */ | |
40469 | rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); | |
40470 | rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT); | |
40471 | rtx stack_toc_mem = gen_frame_mem (Pmode, | |
40472 | gen_rtx_PLUS (Pmode, stack_ptr, | |
40473 | stack_toc_offset)); | |
40474 | rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode, | |
40475 | gen_rtvec (1, stack_toc_offset), | |
40476 | UNSPEC_TOCSLOT); | |
40477 | toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec); | |
40478 | ||
40479 | /* Can we optimize saving the TOC in the prologue or | |
40480 | do we need to do it at every call? */ | |
40481 | if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca) | |
40482 | cfun->machine->save_toc_in_prologue = true; | |
40483 | else | |
40484 | { | |
40485 | MEM_VOLATILE_P (stack_toc_mem) = 1; | |
40486 | emit_move_insn (stack_toc_mem, toc_reg); | |
40487 | } | |
40488 | ||
40489 | if (DEFAULT_ABI == ABI_ELFv2) | |
40490 | { | |
40491 | /* A function pointer in the ELFv2 ABI is just a plain address, but | |
40492 | the ABI requires it to be loaded into r12 before the call. */ | |
40493 | func_addr = gen_rtx_REG (Pmode, 12); | |
40494 | emit_move_insn (func_addr, func_desc); | |
40495 | abi_reg = func_addr; | |
40496 | } | |
40497 | else | |
40498 | { | |
40499 | /* A function pointer under AIX is a pointer to a data area whose | |
40500 | first word contains the actual address of the function, whose | |
40501 | second word contains a pointer to its TOC, and whose third word | |
40502 | contains a value to place in the static chain register (r11). | |
40503 | Note that if we load the static chain, our "trampoline" need | |
40504 | not have any executable code. */ | |
40505 | ||
40506 | /* Load up address of the actual function. */ | |
40507 | func_desc = force_reg (Pmode, func_desc); | |
40508 | func_addr = gen_reg_rtx (Pmode); | |
40509 | emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc)); | |
40510 | ||
40511 | /* Prepare to load the TOC of the called function. Note that the | |
40512 | TOC load must happen immediately before the actual call so | |
40513 | that unwinding the TOC registers works correctly. See the | |
40514 | comment in frob_update_context. */ | |
40515 | rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode)); | |
40516 | rtx func_toc_mem = gen_rtx_MEM (Pmode, | |
40517 | gen_rtx_PLUS (Pmode, func_desc, | |
40518 | func_toc_offset)); | |
40519 | toc_load = gen_rtx_USE (VOIDmode, func_toc_mem); | |
40520 | ||
40521 | /* If we have a static chain, load it up. But, if the call was | |
40522 | originally direct, the 3rd word has not been written since no | |
40523 | trampoline has been built, so we ought not to load it, lest we | |
40524 | override a static chain value. */ | |
40525 | if (!direct_call_p | |
40526 | && TARGET_POINTERS_TO_NESTED_FUNCTIONS | |
40527 | && !chain_already_loaded (get_current_sequence ()->next->last)) | |
40528 | { | |
40529 | rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM); | |
40530 | rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode)); | |
40531 | rtx func_sc_mem = gen_rtx_MEM (Pmode, | |
40532 | gen_rtx_PLUS (Pmode, func_desc, | |
40533 | func_sc_offset)); | |
40534 | emit_move_insn (sc_reg, func_sc_mem); | |
40535 | abi_reg = sc_reg; | |
40536 | } | |
40537 | } | |
40538 | } | |
40539 | else | |
40540 | { | |
40541 | /* Direct calls use the TOC: for local calls, the callee will | |
40542 | assume the TOC register is set; for non-local calls, the | |
40543 | PLT stub needs the TOC register. */ | |
40544 | abi_reg = toc_reg; | |
40545 | func_addr = func_desc; | |
40546 | } | |
40547 | ||
40548 | /* Create the call. */ | |
40549 | call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag); | |
40550 | if (value != NULL_RTX) | |
40551 | call[0] = gen_rtx_SET (value, call[0]); | |
40552 | n_call = 1; | |
40553 | ||
40554 | if (toc_load) | |
40555 | call[n_call++] = toc_load; | |
40556 | if (toc_restore) | |
40557 | call[n_call++] = toc_restore; | |
40558 | ||
40559 | call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO)); | |
40560 | ||
40561 | insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call)); | |
40562 | insn = emit_call_insn (insn); | |
40563 | ||
40564 | /* Mention all registers defined by the ABI to hold information | |
40565 | as uses in CALL_INSN_FUNCTION_USAGE. */ | |
40566 | if (abi_reg) | |
40567 | use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg); | |
40568 | } | |
40569 | ||
40570 | /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */ | |
40571 | ||
40572 | void | |
40573 | rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie) | |
40574 | { | |
40575 | rtx call[2]; | |
40576 | rtx insn; | |
40577 | ||
40578 | gcc_assert (INTVAL (cookie) == 0); | |
40579 | ||
40580 | /* Create the call. */ | |
40581 | call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag); | |
40582 | if (value != NULL_RTX) | |
40583 | call[0] = gen_rtx_SET (value, call[0]); | |
40584 | ||
40585 | call[1] = simple_return_rtx; | |
40586 | ||
40587 | insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call)); | |
40588 | insn = emit_call_insn (insn); | |
40589 | ||
40590 | /* Note use of the TOC register. */ | |
40591 | use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM)); | |
40592 | } | |
40593 | ||
40594 | /* Return whether we need to always update the saved TOC pointer when we update | |
40595 | the stack pointer. */ | |
40596 | ||
40597 | static bool | |
40598 | rs6000_save_toc_in_prologue_p (void) | |
40599 | { | |
40600 | return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue); | |
40601 | } | |
40602 | ||
40603 | #ifdef HAVE_GAS_HIDDEN | |
40604 | # define USE_HIDDEN_LINKONCE 1 | |
40605 | #else | |
40606 | # define USE_HIDDEN_LINKONCE 0 | |
40607 | #endif | |
40608 | ||
40609 | /* Fills in the label name that should be used for a 476 link stack thunk. */ | |
40610 | ||
40611 | void | |
40612 | get_ppc476_thunk_name (char name[32]) | |
40613 | { | |
40614 | gcc_assert (TARGET_LINK_STACK); | |
40615 | ||
40616 | if (USE_HIDDEN_LINKONCE) | |
40617 | sprintf (name, "__ppc476.get_thunk"); | |
40618 | else | |
40619 | ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0); | |
40620 | } | |
40621 | ||
40622 | /* This function emits the simple thunk routine that is used to preserve | |
40623 | the link stack on the 476 cpu. */ | |
40624 | ||
40625 | static void rs6000_code_end (void) ATTRIBUTE_UNUSED; | |
40626 | static void | |
40627 | rs6000_code_end (void) | |
40628 | { | |
40629 | char name[32]; | |
40630 | tree decl; | |
40631 | ||
40632 | if (!TARGET_LINK_STACK) | |
40633 | return; | |
40634 | ||
40635 | get_ppc476_thunk_name (name); | |
40636 | ||
40637 | decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name), | |
40638 | build_function_type_list (void_type_node, NULL_TREE)); | |
40639 | DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, | |
40640 | NULL_TREE, void_type_node); | |
40641 | TREE_PUBLIC (decl) = 1; | |
40642 | TREE_STATIC (decl) = 1; | |
40643 | ||
40644 | #if RS6000_WEAK | |
40645 | if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF) | |
40646 | { | |
40647 | cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl)); | |
40648 | targetm.asm_out.unique_section (decl, 0); | |
40649 | switch_to_section (get_named_section (decl, NULL, 0)); | |
40650 | DECL_WEAK (decl) = 1; | |
40651 | ASM_WEAKEN_DECL (asm_out_file, decl, name, 0); | |
40652 | targetm.asm_out.globalize_label (asm_out_file, name); | |
40653 | targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN); | |
40654 | ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); | |
40655 | } | |
40656 | else | |
40657 | #endif | |
40658 | { | |
40659 | switch_to_section (text_section); | |
40660 | ASM_OUTPUT_LABEL (asm_out_file, name); | |
40661 | } | |
40662 | ||
40663 | DECL_INITIAL (decl) = make_node (BLOCK); | |
40664 | current_function_decl = decl; | |
40665 | allocate_struct_function (decl, false); | |
40666 | init_function_start (decl); | |
40667 | first_function_block_is_cold = false; | |
40668 | /* Make sure unwind info is emitted for the thunk if needed. */ | |
40669 | final_start_function (emit_barrier (), asm_out_file, 1); | |
40670 | ||
40671 | fputs ("\tblr\n", asm_out_file); | |
40672 | ||
40673 | final_end_function (); | |
40674 | init_insn_lengths (); | |
40675 | free_after_compilation (cfun); | |
40676 | set_cfun (NULL); | |
40677 | current_function_decl = NULL; | |
40678 | } | |
40679 | ||
40680 | /* Add r30 to hard reg set if the prologue sets it up and it is not | |
40681 | pic_offset_table_rtx. */ | |
40682 | ||
40683 | static void | |
40684 | rs6000_set_up_by_prologue (struct hard_reg_set_container *set) | |
40685 | { | |
40686 | if (!TARGET_SINGLE_PIC_BASE | |
40687 | && TARGET_TOC | |
40688 | && TARGET_MINIMAL_TOC | |
40689 | && !constant_pool_empty_p ()) | |
40690 | add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM); | |
40691 | if (cfun->machine->split_stack_argp_used) | |
40692 | add_to_hard_reg_set (&set->set, Pmode, 12); | |
40693 | } | |
40694 | ||
40695 | \f | |
40696 | /* Helper function for rs6000_split_logical to emit a logical instruction after | |
40697 | spliting the operation to single GPR registers. | |
40698 | ||
40699 | DEST is the destination register. | |
40700 | OP1 and OP2 are the input source registers. | |
40701 | CODE is the base operation (AND, IOR, XOR, NOT). | |
40702 | MODE is the machine mode. | |
40703 | If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT. | |
40704 | If COMPLEMENT_OP1_P is true, wrap operand1 with NOT. | |
40705 | If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */ | |
40706 | ||
40707 | static void | |
40708 | rs6000_split_logical_inner (rtx dest, | |
40709 | rtx op1, | |
40710 | rtx op2, | |
40711 | enum rtx_code code, | |
40712 | machine_mode mode, | |
40713 | bool complement_final_p, | |
40714 | bool complement_op1_p, | |
40715 | bool complement_op2_p) | |
40716 | { | |
40717 | rtx bool_rtx; | |
40718 | ||
40719 | /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */ | |
40720 | if (op2 && GET_CODE (op2) == CONST_INT | |
40721 | && (mode == SImode || (mode == DImode && TARGET_POWERPC64)) | |
40722 | && !complement_final_p && !complement_op1_p && !complement_op2_p) | |
40723 | { | |
40724 | HOST_WIDE_INT mask = GET_MODE_MASK (mode); | |
40725 | HOST_WIDE_INT value = INTVAL (op2) & mask; | |
40726 | ||
40727 | /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */ | |
40728 | if (code == AND) | |
40729 | { | |
40730 | if (value == 0) | |
40731 | { | |
40732 | emit_insn (gen_rtx_SET (dest, const0_rtx)); | |
40733 | return; | |
40734 | } | |
40735 | ||
40736 | else if (value == mask) | |
40737 | { | |
40738 | if (!rtx_equal_p (dest, op1)) | |
40739 | emit_insn (gen_rtx_SET (dest, op1)); | |
40740 | return; | |
40741 | } | |
40742 | } | |
40743 | ||
40744 | /* Optimize IOR/XOR of 0 to be a simple move. Split large operations | |
40745 | into separate ORI/ORIS or XORI/XORIS instrucitons. */ | |
40746 | else if (code == IOR || code == XOR) | |
40747 | { | |
40748 | if (value == 0) | |
40749 | { | |
40750 | if (!rtx_equal_p (dest, op1)) | |
40751 | emit_insn (gen_rtx_SET (dest, op1)); | |
40752 | return; | |
40753 | } | |
40754 | } | |
40755 | } | |
40756 | ||
40757 | if (code == AND && mode == SImode | |
40758 | && !complement_final_p && !complement_op1_p && !complement_op2_p) | |
40759 | { | |
40760 | emit_insn (gen_andsi3 (dest, op1, op2)); | |
40761 | return; | |
40762 | } | |
40763 | ||
40764 | if (complement_op1_p) | |
40765 | op1 = gen_rtx_NOT (mode, op1); | |
40766 | ||
40767 | if (complement_op2_p) | |
40768 | op2 = gen_rtx_NOT (mode, op2); | |
40769 | ||
40770 | /* For canonical RTL, if only one arm is inverted it is the first. */ | |
40771 | if (!complement_op1_p && complement_op2_p) | |
40772 | std::swap (op1, op2); | |
40773 | ||
40774 | bool_rtx = ((code == NOT) | |
40775 | ? gen_rtx_NOT (mode, op1) | |
40776 | : gen_rtx_fmt_ee (code, mode, op1, op2)); | |
40777 | ||
40778 | if (complement_final_p) | |
40779 | bool_rtx = gen_rtx_NOT (mode, bool_rtx); | |
40780 | ||
40781 | emit_insn (gen_rtx_SET (dest, bool_rtx)); | |
40782 | } | |
40783 | ||
40784 | /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These | |
40785 | operations are split immediately during RTL generation to allow for more | |
40786 | optimizations of the AND/IOR/XOR. | |
40787 | ||
40788 | OPERANDS is an array containing the destination and two input operands. | |
40789 | CODE is the base operation (AND, IOR, XOR, NOT). | |
40790 | MODE is the machine mode. | |
40791 | If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT. | |
40792 | If COMPLEMENT_OP1_P is true, wrap operand1 with NOT. | |
40793 | If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. | |
40794 | CLOBBER_REG is either NULL or a scratch register of type CC to allow | |
40795 | formation of the AND instructions. */ | |
40796 | ||
40797 | static void | |
40798 | rs6000_split_logical_di (rtx operands[3], | |
40799 | enum rtx_code code, | |
40800 | bool complement_final_p, | |
40801 | bool complement_op1_p, | |
40802 | bool complement_op2_p) | |
40803 | { | |
40804 | const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff); | |
40805 | const HOST_WIDE_INT upper_32bits = ~ lower_32bits; | |
40806 | const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000); | |
40807 | enum hi_lo { hi = 0, lo = 1 }; | |
40808 | rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2]; | |
40809 | size_t i; | |
40810 | ||
40811 | op0_hi_lo[hi] = gen_highpart (SImode, operands[0]); | |
40812 | op1_hi_lo[hi] = gen_highpart (SImode, operands[1]); | |
40813 | op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]); | |
40814 | op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]); | |
40815 | ||
40816 | if (code == NOT) | |
40817 | op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX; | |
40818 | else | |
40819 | { | |
40820 | if (GET_CODE (operands[2]) != CONST_INT) | |
40821 | { | |
40822 | op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]); | |
40823 | op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]); | |
40824 | } | |
40825 | else | |
40826 | { | |
40827 | HOST_WIDE_INT value = INTVAL (operands[2]); | |
40828 | HOST_WIDE_INT value_hi_lo[2]; | |
40829 | ||
40830 | gcc_assert (!complement_final_p); | |
40831 | gcc_assert (!complement_op1_p); | |
40832 | gcc_assert (!complement_op2_p); | |
40833 | ||
40834 | value_hi_lo[hi] = value >> 32; | |
40835 | value_hi_lo[lo] = value & lower_32bits; | |
40836 | ||
40837 | for (i = 0; i < 2; i++) | |
40838 | { | |
40839 | HOST_WIDE_INT sub_value = value_hi_lo[i]; | |
40840 | ||
40841 | if (sub_value & sign_bit) | |
40842 | sub_value |= upper_32bits; | |
40843 | ||
40844 | op2_hi_lo[i] = GEN_INT (sub_value); | |
40845 | ||
40846 | /* If this is an AND instruction, check to see if we need to load | |
40847 | the value in a register. */ | |
40848 | if (code == AND && sub_value != -1 && sub_value != 0 | |
40849 | && !and_operand (op2_hi_lo[i], SImode)) | |
40850 | op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]); | |
40851 | } | |
40852 | } | |
40853 | } | |
40854 | ||
40855 | for (i = 0; i < 2; i++) | |
40856 | { | |
40857 | /* Split large IOR/XOR operations. */ | |
40858 | if ((code == IOR || code == XOR) | |
40859 | && GET_CODE (op2_hi_lo[i]) == CONST_INT | |
40860 | && !complement_final_p | |
40861 | && !complement_op1_p | |
40862 | && !complement_op2_p | |
40863 | && !logical_const_operand (op2_hi_lo[i], SImode)) | |
40864 | { | |
40865 | HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]); | |
40866 | HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000); | |
40867 | HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff); | |
40868 | rtx tmp = gen_reg_rtx (SImode); | |
40869 | ||
40870 | /* Make sure the constant is sign extended. */ | |
40871 | if ((hi_16bits & sign_bit) != 0) | |
40872 | hi_16bits |= upper_32bits; | |
40873 | ||
40874 | rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits), | |
40875 | code, SImode, false, false, false); | |
40876 | ||
40877 | rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits), | |
40878 | code, SImode, false, false, false); | |
40879 | } | |
40880 | else | |
40881 | rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i], | |
40882 | code, SImode, complement_final_p, | |
40883 | complement_op1_p, complement_op2_p); | |
40884 | } | |
40885 | ||
40886 | return; | |
40887 | } | |
40888 | ||
40889 | /* Split the insns that make up boolean operations operating on multiple GPR | |
40890 | registers. The boolean MD patterns ensure that the inputs either are | |
40891 | exactly the same as the output registers, or there is no overlap. | |
40892 | ||
40893 | OPERANDS is an array containing the destination and two input operands. | |
40894 | CODE is the base operation (AND, IOR, XOR, NOT). | |
40895 | If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT. | |
40896 | If COMPLEMENT_OP1_P is true, wrap operand1 with NOT. | |
40897 | If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */ | |
40898 | ||
40899 | void | |
40900 | rs6000_split_logical (rtx operands[3], | |
40901 | enum rtx_code code, | |
40902 | bool complement_final_p, | |
40903 | bool complement_op1_p, | |
40904 | bool complement_op2_p) | |
40905 | { | |
40906 | machine_mode mode = GET_MODE (operands[0]); | |
40907 | machine_mode sub_mode; | |
40908 | rtx op0, op1, op2; | |
40909 | int sub_size, regno0, regno1, nregs, i; | |
40910 | ||
40911 | /* If this is DImode, use the specialized version that can run before | |
40912 | register allocation. */ | |
40913 | if (mode == DImode && !TARGET_POWERPC64) | |
40914 | { | |
40915 | rs6000_split_logical_di (operands, code, complement_final_p, | |
40916 | complement_op1_p, complement_op2_p); | |
40917 | return; | |
40918 | } | |
40919 | ||
40920 | op0 = operands[0]; | |
40921 | op1 = operands[1]; | |
40922 | op2 = (code == NOT) ? NULL_RTX : operands[2]; | |
40923 | sub_mode = (TARGET_POWERPC64) ? DImode : SImode; | |
40924 | sub_size = GET_MODE_SIZE (sub_mode); | |
40925 | regno0 = REGNO (op0); | |
40926 | regno1 = REGNO (op1); | |
40927 | ||
40928 | gcc_assert (reload_completed); | |
40929 | gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO)); | |
40930 | gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO)); | |
40931 | ||
40932 | nregs = rs6000_hard_regno_nregs[(int)mode][regno0]; | |
40933 | gcc_assert (nregs > 1); | |
40934 | ||
40935 | if (op2 && REG_P (op2)) | |
40936 | gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO)); | |
40937 | ||
40938 | for (i = 0; i < nregs; i++) | |
40939 | { | |
40940 | int offset = i * sub_size; | |
40941 | rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset); | |
40942 | rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset); | |
40943 | rtx sub_op2 = ((code == NOT) | |
40944 | ? NULL_RTX | |
40945 | : simplify_subreg (sub_mode, op2, mode, offset)); | |
40946 | ||
40947 | rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode, | |
40948 | complement_final_p, complement_op1_p, | |
40949 | complement_op2_p); | |
40950 | } | |
40951 | ||
40952 | return; | |
40953 | } | |
40954 | ||
40955 | \f | |
40956 | /* Return true if the peephole2 can combine a load involving a combination of | |
40957 | an addis instruction and a load with an offset that can be fused together on | |
40958 | a power8. */ | |
40959 | ||
40960 | bool | |
40961 | fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */ | |
40962 | rtx addis_value, /* addis value. */ | |
40963 | rtx target, /* target register that is loaded. */ | |
40964 | rtx mem) /* bottom part of the memory addr. */ | |
40965 | { | |
40966 | rtx addr; | |
40967 | rtx base_reg; | |
40968 | ||
40969 | /* Validate arguments. */ | |
40970 | if (!base_reg_operand (addis_reg, GET_MODE (addis_reg))) | |
40971 | return false; | |
40972 | ||
40973 | if (!base_reg_operand (target, GET_MODE (target))) | |
40974 | return false; | |
40975 | ||
40976 | if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value))) | |
40977 | return false; | |
40978 | ||
40979 | /* Allow sign/zero extension. */ | |
40980 | if (GET_CODE (mem) == ZERO_EXTEND | |
40981 | || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN)) | |
40982 | mem = XEXP (mem, 0); | |
40983 | ||
40984 | if (!MEM_P (mem)) | |
40985 | return false; | |
40986 | ||
40987 | if (!fusion_gpr_mem_load (mem, GET_MODE (mem))) | |
40988 | return false; | |
40989 | ||
40990 | addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */ | |
40991 | if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM) | |
40992 | return false; | |
40993 | ||
40994 | /* Validate that the register used to load the high value is either the | |
40995 | register being loaded, or we can safely replace its use. | |
40996 | ||
40997 | This function is only called from the peephole2 pass and we assume that | |
40998 | there are 2 instructions in the peephole (addis and load), so we want to | |
40999 | check if the target register was not used in the memory address and the | |
41000 | register to hold the addis result is dead after the peephole. */ | |
41001 | if (REGNO (addis_reg) != REGNO (target)) | |
41002 | { | |
41003 | if (reg_mentioned_p (target, mem)) | |
41004 | return false; | |
41005 | ||
41006 | if (!peep2_reg_dead_p (2, addis_reg)) | |
41007 | return false; | |
41008 | ||
41009 | /* If the target register being loaded is the stack pointer, we must | |
41010 | avoid loading any other value into it, even temporarily. */ | |
41011 | if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM) | |
41012 | return false; | |
41013 | } | |
41014 | ||
41015 | base_reg = XEXP (addr, 0); | |
41016 | return REGNO (addis_reg) == REGNO (base_reg); | |
41017 | } | |
41018 | ||
41019 | /* During the peephole2 pass, adjust and expand the insns for a load fusion | |
41020 | sequence. We adjust the addis register to use the target register. If the | |
41021 | load sign extends, we adjust the code to do the zero extending load, and an | |
41022 | explicit sign extension later since the fusion only covers zero extending | |
41023 | loads. | |
41024 | ||
41025 | The operands are: | |
41026 | operands[0] register set with addis (to be replaced with target) | |
41027 | operands[1] value set via addis | |
41028 | operands[2] target register being loaded | |
41029 | operands[3] D-form memory reference using operands[0]. */ | |
41030 | ||
41031 | void | |
41032 | expand_fusion_gpr_load (rtx *operands) | |
41033 | { | |
41034 | rtx addis_value = operands[1]; | |
41035 | rtx target = operands[2]; | |
41036 | rtx orig_mem = operands[3]; | |
41037 | rtx new_addr, new_mem, orig_addr, offset; | |
41038 | enum rtx_code plus_or_lo_sum; | |
41039 | machine_mode target_mode = GET_MODE (target); | |
41040 | machine_mode extend_mode = target_mode; | |
41041 | machine_mode ptr_mode = Pmode; | |
41042 | enum rtx_code extend = UNKNOWN; | |
41043 | ||
41044 | if (GET_CODE (orig_mem) == ZERO_EXTEND | |
41045 | || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND)) | |
41046 | { | |
41047 | extend = GET_CODE (orig_mem); | |
41048 | orig_mem = XEXP (orig_mem, 0); | |
41049 | target_mode = GET_MODE (orig_mem); | |
41050 | } | |
41051 | ||
41052 | gcc_assert (MEM_P (orig_mem)); | |
41053 | ||
41054 | orig_addr = XEXP (orig_mem, 0); | |
41055 | plus_or_lo_sum = GET_CODE (orig_addr); | |
41056 | gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM); | |
41057 | ||
41058 | offset = XEXP (orig_addr, 1); | |
41059 | new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset); | |
41060 | new_mem = replace_equiv_address_nv (orig_mem, new_addr, false); | |
41061 | ||
41062 | if (extend != UNKNOWN) | |
41063 | new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem); | |
41064 | ||
41065 | new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem), | |
41066 | UNSPEC_FUSION_GPR); | |
41067 | emit_insn (gen_rtx_SET (target, new_mem)); | |
41068 | ||
41069 | if (extend == SIGN_EXTEND) | |
41070 | { | |
41071 | int sub_off = ((BYTES_BIG_ENDIAN) | |
41072 | ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode) | |
41073 | : 0); | |
41074 | rtx sign_reg | |
41075 | = simplify_subreg (target_mode, target, extend_mode, sub_off); | |
41076 | ||
41077 | emit_insn (gen_rtx_SET (target, | |
41078 | gen_rtx_SIGN_EXTEND (extend_mode, sign_reg))); | |
41079 | } | |
41080 | ||
41081 | return; | |
41082 | } | |
41083 | ||
41084 | /* Emit the addis instruction that will be part of a fused instruction | |
41085 | sequence. */ | |
41086 | ||
41087 | void | |
41088 | emit_fusion_addis (rtx target, rtx addis_value, const char *comment, | |
41089 | const char *mode_name) | |
41090 | { | |
41091 | rtx fuse_ops[10]; | |
41092 | char insn_template[80]; | |
41093 | const char *addis_str = NULL; | |
41094 | const char *comment_str = ASM_COMMENT_START; | |
41095 | ||
41096 | if (*comment_str == ' ') | |
41097 | comment_str++; | |
41098 | ||
41099 | /* Emit the addis instruction. */ | |
41100 | fuse_ops[0] = target; | |
41101 | if (satisfies_constraint_L (addis_value)) | |
41102 | { | |
41103 | fuse_ops[1] = addis_value; | |
41104 | addis_str = "lis %0,%v1"; | |
41105 | } | |
41106 | ||
41107 | else if (GET_CODE (addis_value) == PLUS) | |
41108 | { | |
41109 | rtx op0 = XEXP (addis_value, 0); | |
41110 | rtx op1 = XEXP (addis_value, 1); | |
41111 | ||
41112 | if (REG_P (op0) && CONST_INT_P (op1) | |
41113 | && satisfies_constraint_L (op1)) | |
41114 | { | |
41115 | fuse_ops[1] = op0; | |
41116 | fuse_ops[2] = op1; | |
41117 | addis_str = "addis %0,%1,%v2"; | |
41118 | } | |
41119 | } | |
41120 | ||
41121 | else if (GET_CODE (addis_value) == HIGH) | |
41122 | { | |
41123 | rtx value = XEXP (addis_value, 0); | |
41124 | if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL) | |
41125 | { | |
41126 | fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */ | |
41127 | fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */ | |
41128 | if (TARGET_ELF) | |
41129 | addis_str = "addis %0,%2,%1@toc@ha"; | |
41130 | ||
41131 | else if (TARGET_XCOFF) | |
41132 | addis_str = "addis %0,%1@u(%2)"; | |
41133 | ||
41134 | else | |
41135 | gcc_unreachable (); | |
41136 | } | |
41137 | ||
41138 | else if (GET_CODE (value) == PLUS) | |
41139 | { | |
41140 | rtx op0 = XEXP (value, 0); | |
41141 | rtx op1 = XEXP (value, 1); | |
41142 | ||
41143 | if (GET_CODE (op0) == UNSPEC | |
41144 | && XINT (op0, 1) == UNSPEC_TOCREL | |
41145 | && CONST_INT_P (op1)) | |
41146 | { | |
41147 | fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */ | |
41148 | fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */ | |
41149 | fuse_ops[3] = op1; | |
41150 | if (TARGET_ELF) | |
41151 | addis_str = "addis %0,%2,%1+%3@toc@ha"; | |
41152 | ||
41153 | else if (TARGET_XCOFF) | |
41154 | addis_str = "addis %0,%1+%3@u(%2)"; | |
41155 | ||
41156 | else | |
41157 | gcc_unreachable (); | |
41158 | } | |
41159 | } | |
41160 | ||
41161 | else if (satisfies_constraint_L (value)) | |
41162 | { | |
41163 | fuse_ops[1] = value; | |
41164 | addis_str = "lis %0,%v1"; | |
41165 | } | |
41166 | ||
41167 | else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value)) | |
41168 | { | |
41169 | fuse_ops[1] = value; | |
41170 | addis_str = "lis %0,%1@ha"; | |
41171 | } | |
41172 | } | |
41173 | ||
41174 | if (!addis_str) | |
41175 | fatal_insn ("Could not generate addis value for fusion", addis_value); | |
41176 | ||
41177 | sprintf (insn_template, "%s\t\t%s %s, type %s", addis_str, comment_str, | |
41178 | comment, mode_name); | |
41179 | output_asm_insn (insn_template, fuse_ops); | |
41180 | } | |
41181 | ||
41182 | /* Emit a D-form load or store instruction that is the second instruction | |
41183 | of a fusion sequence. */ | |
41184 | ||
41185 | void | |
41186 | emit_fusion_load_store (rtx load_store_reg, rtx addis_reg, rtx offset, | |
41187 | const char *insn_str) | |
41188 | { | |
41189 | rtx fuse_ops[10]; | |
41190 | char insn_template[80]; | |
41191 | ||
41192 | fuse_ops[0] = load_store_reg; | |
41193 | fuse_ops[1] = addis_reg; | |
41194 | ||
41195 | if (CONST_INT_P (offset) && satisfies_constraint_I (offset)) | |
41196 | { | |
41197 | sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str); | |
41198 | fuse_ops[2] = offset; | |
41199 | output_asm_insn (insn_template, fuse_ops); | |
41200 | } | |
41201 | ||
41202 | else if (GET_CODE (offset) == UNSPEC | |
41203 | && XINT (offset, 1) == UNSPEC_TOCREL) | |
41204 | { | |
41205 | if (TARGET_ELF) | |
41206 | sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str); | |
41207 | ||
41208 | else if (TARGET_XCOFF) | |
41209 | sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str); | |
41210 | ||
41211 | else | |
41212 | gcc_unreachable (); | |
41213 | ||
41214 | fuse_ops[2] = XVECEXP (offset, 0, 0); | |
41215 | output_asm_insn (insn_template, fuse_ops); | |
41216 | } | |
41217 | ||
41218 | else if (GET_CODE (offset) == PLUS | |
41219 | && GET_CODE (XEXP (offset, 0)) == UNSPEC | |
41220 | && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL | |
41221 | && CONST_INT_P (XEXP (offset, 1))) | |
41222 | { | |
41223 | rtx tocrel_unspec = XEXP (offset, 0); | |
41224 | if (TARGET_ELF) | |
41225 | sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str); | |
41226 | ||
41227 | else if (TARGET_XCOFF) | |
41228 | sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str); | |
41229 | ||
41230 | else | |
41231 | gcc_unreachable (); | |
41232 | ||
41233 | fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0); | |
41234 | fuse_ops[3] = XEXP (offset, 1); | |
41235 | output_asm_insn (insn_template, fuse_ops); | |
41236 | } | |
41237 | ||
41238 | else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset)) | |
41239 | { | |
41240 | sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str); | |
41241 | ||
41242 | fuse_ops[2] = offset; | |
41243 | output_asm_insn (insn_template, fuse_ops); | |
41244 | } | |
41245 | ||
41246 | else | |
41247 | fatal_insn ("Unable to generate load/store offset for fusion", offset); | |
41248 | ||
41249 | return; | |
41250 | } | |
41251 | ||
41252 | /* Wrap a TOC address that can be fused to indicate that special fusion | |
41253 | processing is needed. */ | |
41254 | ||
41255 | rtx | |
41256 | fusion_wrap_memory_address (rtx old_mem) | |
41257 | { | |
41258 | rtx old_addr = XEXP (old_mem, 0); | |
41259 | rtvec v = gen_rtvec (1, old_addr); | |
41260 | rtx new_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_FUSION_ADDIS); | |
41261 | return replace_equiv_address_nv (old_mem, new_addr, false); | |
41262 | } | |
41263 | ||
41264 | /* Given an address, convert it into the addis and load offset parts. Addresses | |
41265 | created during the peephole2 process look like: | |
41266 | (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL)) | |
41267 | (unspec [(...)] UNSPEC_TOCREL)) | |
41268 | ||
41269 | Addresses created via toc fusion look like: | |
41270 | (unspec [(unspec [(...)] UNSPEC_TOCREL)] UNSPEC_FUSION_ADDIS)) */ | |
41271 | ||
41272 | static void | |
41273 | fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo) | |
41274 | { | |
41275 | rtx hi, lo; | |
41276 | ||
41277 | if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_FUSION_ADDIS) | |
41278 | { | |
41279 | lo = XVECEXP (addr, 0, 0); | |
41280 | hi = gen_rtx_HIGH (Pmode, lo); | |
41281 | } | |
41282 | else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM) | |
41283 | { | |
41284 | hi = XEXP (addr, 0); | |
41285 | lo = XEXP (addr, 1); | |
41286 | } | |
41287 | else | |
41288 | gcc_unreachable (); | |
41289 | ||
41290 | *p_hi = hi; | |
41291 | *p_lo = lo; | |
41292 | } | |
41293 | ||
41294 | /* Return a string to fuse an addis instruction with a gpr load to the same | |
41295 | register that we loaded up the addis instruction. The address that is used | |
41296 | is the logical address that was formed during peephole2: | |
41297 | (lo_sum (high) (low-part)) | |
41298 | ||
41299 | Or the address is the TOC address that is wrapped before register allocation: | |
41300 | (unspec [(addr) (toc-reg)] UNSPEC_FUSION_ADDIS) | |
41301 | ||
41302 | The code is complicated, so we call output_asm_insn directly, and just | |
41303 | return "". */ | |
41304 | ||
41305 | const char * | |
41306 | emit_fusion_gpr_load (rtx target, rtx mem) | |
41307 | { | |
41308 | rtx addis_value; | |
41309 | rtx addr; | |
41310 | rtx load_offset; | |
41311 | const char *load_str = NULL; | |
41312 | const char *mode_name = NULL; | |
41313 | machine_mode mode; | |
41314 | ||
41315 | if (GET_CODE (mem) == ZERO_EXTEND) | |
41316 | mem = XEXP (mem, 0); | |
41317 | ||
41318 | gcc_assert (REG_P (target) && MEM_P (mem)); | |
41319 | ||
41320 | addr = XEXP (mem, 0); | |
41321 | fusion_split_address (addr, &addis_value, &load_offset); | |
41322 | ||
41323 | /* Now emit the load instruction to the same register. */ | |
41324 | mode = GET_MODE (mem); | |
41325 | switch (mode) | |
41326 | { | |
916ace94 | 41327 | case E_QImode: |
01e91138 | 41328 | mode_name = "char"; |
41329 | load_str = "lbz"; | |
41330 | break; | |
41331 | ||
916ace94 | 41332 | case E_HImode: |
01e91138 | 41333 | mode_name = "short"; |
41334 | load_str = "lhz"; | |
41335 | break; | |
41336 | ||
916ace94 | 41337 | case E_SImode: |
41338 | case E_SFmode: | |
01e91138 | 41339 | mode_name = (mode == SFmode) ? "float" : "int"; |
41340 | load_str = "lwz"; | |
41341 | break; | |
41342 | ||
916ace94 | 41343 | case E_DImode: |
41344 | case E_DFmode: | |
01e91138 | 41345 | gcc_assert (TARGET_POWERPC64); |
41346 | mode_name = (mode == DFmode) ? "double" : "long"; | |
41347 | load_str = "ld"; | |
41348 | break; | |
41349 | ||
41350 | default: | |
41351 | fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem)); | |
41352 | } | |
41353 | ||
41354 | /* Emit the addis instruction. */ | |
41355 | emit_fusion_addis (target, addis_value, "gpr load fusion", mode_name); | |
41356 | ||
41357 | /* Emit the D-form load instruction. */ | |
41358 | emit_fusion_load_store (target, target, load_offset, load_str); | |
41359 | ||
41360 | return ""; | |
41361 | } | |
41362 | \f | |
41363 | ||
41364 | /* Return true if the peephole2 can combine a load/store involving a | |
41365 | combination of an addis instruction and the memory operation. This was | |
41366 | added to the ISA 3.0 (power9) hardware. */ | |
41367 | ||
41368 | bool | |
41369 | fusion_p9_p (rtx addis_reg, /* register set via addis. */ | |
41370 | rtx addis_value, /* addis value. */ | |
41371 | rtx dest, /* destination (memory or register). */ | |
41372 | rtx src) /* source (register or memory). */ | |
41373 | { | |
41374 | rtx addr, mem, offset; | |
582adad1 | 41375 | machine_mode mode = GET_MODE (src); |
01e91138 | 41376 | |
41377 | /* Validate arguments. */ | |
41378 | if (!base_reg_operand (addis_reg, GET_MODE (addis_reg))) | |
41379 | return false; | |
41380 | ||
41381 | if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value))) | |
41382 | return false; | |
41383 | ||
41384 | /* Ignore extend operations that are part of the load. */ | |
41385 | if (GET_CODE (src) == FLOAT_EXTEND || GET_CODE (src) == ZERO_EXTEND) | |
41386 | src = XEXP (src, 0); | |
41387 | ||
41388 | /* Test for memory<-register or register<-memory. */ | |
41389 | if (fpr_reg_operand (src, mode) || int_reg_operand (src, mode)) | |
41390 | { | |
41391 | if (!MEM_P (dest)) | |
41392 | return false; | |
41393 | ||
41394 | mem = dest; | |
41395 | } | |
41396 | ||
41397 | else if (MEM_P (src)) | |
41398 | { | |
41399 | if (!fpr_reg_operand (dest, mode) && !int_reg_operand (dest, mode)) | |
41400 | return false; | |
41401 | ||
41402 | mem = src; | |
41403 | } | |
41404 | ||
41405 | else | |
41406 | return false; | |
41407 | ||
41408 | addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */ | |
41409 | if (GET_CODE (addr) == PLUS) | |
41410 | { | |
41411 | if (!rtx_equal_p (addis_reg, XEXP (addr, 0))) | |
41412 | return false; | |
41413 | ||
41414 | return satisfies_constraint_I (XEXP (addr, 1)); | |
41415 | } | |
41416 | ||
41417 | else if (GET_CODE (addr) == LO_SUM) | |
41418 | { | |
41419 | if (!rtx_equal_p (addis_reg, XEXP (addr, 0))) | |
41420 | return false; | |
41421 | ||
41422 | offset = XEXP (addr, 1); | |
41423 | if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64)) | |
41424 | return small_toc_ref (offset, GET_MODE (offset)); | |
41425 | ||
41426 | else if (TARGET_ELF && !TARGET_POWERPC64) | |
41427 | return CONSTANT_P (offset); | |
41428 | } | |
41429 | ||
41430 | return false; | |
41431 | } | |
41432 | ||
41433 | /* During the peephole2 pass, adjust and expand the insns for an extended fusion | |
41434 | load sequence. | |
41435 | ||
41436 | The operands are: | |
41437 | operands[0] register set with addis | |
41438 | operands[1] value set via addis | |
41439 | operands[2] target register being loaded | |
41440 | operands[3] D-form memory reference using operands[0]. | |
41441 | ||
41442 | This is similar to the fusion introduced with power8, except it scales to | |
41443 | both loads/stores and does not require the result register to be the same as | |
41444 | the base register. At the moment, we only do this if register set with addis | |
41445 | is dead. */ | |
41446 | ||
41447 | void | |
41448 | expand_fusion_p9_load (rtx *operands) | |
41449 | { | |
41450 | rtx tmp_reg = operands[0]; | |
41451 | rtx addis_value = operands[1]; | |
41452 | rtx target = operands[2]; | |
41453 | rtx orig_mem = operands[3]; | |
41454 | rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn; | |
41455 | enum rtx_code plus_or_lo_sum; | |
41456 | machine_mode target_mode = GET_MODE (target); | |
41457 | machine_mode extend_mode = target_mode; | |
41458 | machine_mode ptr_mode = Pmode; | |
41459 | enum rtx_code extend = UNKNOWN; | |
41460 | ||
41461 | if (GET_CODE (orig_mem) == FLOAT_EXTEND || GET_CODE (orig_mem) == ZERO_EXTEND) | |
41462 | { | |
41463 | extend = GET_CODE (orig_mem); | |
41464 | orig_mem = XEXP (orig_mem, 0); | |
41465 | target_mode = GET_MODE (orig_mem); | |
41466 | } | |
41467 | ||
41468 | gcc_assert (MEM_P (orig_mem)); | |
41469 | ||
41470 | orig_addr = XEXP (orig_mem, 0); | |
41471 | plus_or_lo_sum = GET_CODE (orig_addr); | |
41472 | gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM); | |
41473 | ||
41474 | offset = XEXP (orig_addr, 1); | |
41475 | new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset); | |
41476 | new_mem = replace_equiv_address_nv (orig_mem, new_addr, false); | |
41477 | ||
41478 | if (extend != UNKNOWN) | |
41479 | new_mem = gen_rtx_fmt_e (extend, extend_mode, new_mem); | |
41480 | ||
41481 | new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem), | |
41482 | UNSPEC_FUSION_P9); | |
41483 | ||
41484 | set = gen_rtx_SET (target, new_mem); | |
41485 | clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg); | |
41486 | insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)); | |
41487 | emit_insn (insn); | |
41488 | ||
41489 | return; | |
41490 | } | |
41491 | ||
41492 | /* During the peephole2 pass, adjust and expand the insns for an extended fusion | |
41493 | store sequence. | |
41494 | ||
41495 | The operands are: | |
41496 | operands[0] register set with addis | |
41497 | operands[1] value set via addis | |
41498 | operands[2] target D-form memory being stored to | |
41499 | operands[3] register being stored | |
41500 | ||
41501 | This is similar to the fusion introduced with power8, except it scales to | |
41502 | both loads/stores and does not require the result register to be the same as | |
41503 | the base register. At the moment, we only do this if register set with addis | |
41504 | is dead. */ | |
41505 | ||
41506 | void | |
41507 | expand_fusion_p9_store (rtx *operands) | |
41508 | { | |
41509 | rtx tmp_reg = operands[0]; | |
41510 | rtx addis_value = operands[1]; | |
41511 | rtx orig_mem = operands[2]; | |
41512 | rtx src = operands[3]; | |
41513 | rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn, new_src; | |
41514 | enum rtx_code plus_or_lo_sum; | |
41515 | machine_mode target_mode = GET_MODE (orig_mem); | |
41516 | machine_mode ptr_mode = Pmode; | |
41517 | ||
41518 | gcc_assert (MEM_P (orig_mem)); | |
41519 | ||
41520 | orig_addr = XEXP (orig_mem, 0); | |
41521 | plus_or_lo_sum = GET_CODE (orig_addr); | |
41522 | gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM); | |
41523 | ||
41524 | offset = XEXP (orig_addr, 1); | |
41525 | new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset); | |
41526 | new_mem = replace_equiv_address_nv (orig_mem, new_addr, false); | |
41527 | ||
41528 | new_src = gen_rtx_UNSPEC (target_mode, gen_rtvec (1, src), | |
41529 | UNSPEC_FUSION_P9); | |
41530 | ||
41531 | set = gen_rtx_SET (new_mem, new_src); | |
41532 | clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg); | |
41533 | insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)); | |
41534 | emit_insn (insn); | |
41535 | ||
41536 | return; | |
41537 | } | |
41538 | ||
41539 | /* Return a string to fuse an addis instruction with a load using extended | |
41540 | fusion. The address that is used is the logical address that was formed | |
41541 | during peephole2: (lo_sum (high) (low-part)) | |
41542 | ||
41543 | The code is complicated, so we call output_asm_insn directly, and just | |
41544 | return "". */ | |
41545 | ||
41546 | const char * | |
41547 | emit_fusion_p9_load (rtx reg, rtx mem, rtx tmp_reg) | |
41548 | { | |
582adad1 | 41549 | machine_mode mode = GET_MODE (reg); |
01e91138 | 41550 | rtx hi; |
41551 | rtx lo; | |
41552 | rtx addr; | |
41553 | const char *load_string; | |
41554 | int r; | |
41555 | ||
41556 | if (GET_CODE (mem) == FLOAT_EXTEND || GET_CODE (mem) == ZERO_EXTEND) | |
41557 | { | |
41558 | mem = XEXP (mem, 0); | |
41559 | mode = GET_MODE (mem); | |
41560 | } | |
41561 | ||
41562 | if (GET_CODE (reg) == SUBREG) | |
41563 | { | |
41564 | gcc_assert (SUBREG_BYTE (reg) == 0); | |
41565 | reg = SUBREG_REG (reg); | |
41566 | } | |
41567 | ||
41568 | if (!REG_P (reg)) | |
41569 | fatal_insn ("emit_fusion_p9_load, bad reg #1", reg); | |
41570 | ||
41571 | r = REGNO (reg); | |
41572 | if (FP_REGNO_P (r)) | |
41573 | { | |
41574 | if (mode == SFmode) | |
41575 | load_string = "lfs"; | |
41576 | else if (mode == DFmode || mode == DImode) | |
41577 | load_string = "lfd"; | |
41578 | else | |
41579 | gcc_unreachable (); | |
41580 | } | |
41581 | else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR) | |
41582 | { | |
41583 | if (mode == SFmode) | |
41584 | load_string = "lxssp"; | |
41585 | else if (mode == DFmode || mode == DImode) | |
41586 | load_string = "lxsd"; | |
41587 | else | |
41588 | gcc_unreachable (); | |
41589 | } | |
41590 | else if (INT_REGNO_P (r)) | |
41591 | { | |
41592 | switch (mode) | |
41593 | { | |
916ace94 | 41594 | case E_QImode: |
01e91138 | 41595 | load_string = "lbz"; |
41596 | break; | |
916ace94 | 41597 | case E_HImode: |
01e91138 | 41598 | load_string = "lhz"; |
41599 | break; | |
916ace94 | 41600 | case E_SImode: |
41601 | case E_SFmode: | |
01e91138 | 41602 | load_string = "lwz"; |
41603 | break; | |
916ace94 | 41604 | case E_DImode: |
41605 | case E_DFmode: | |
01e91138 | 41606 | if (!TARGET_POWERPC64) |
41607 | gcc_unreachable (); | |
41608 | load_string = "ld"; | |
41609 | break; | |
41610 | default: | |
41611 | gcc_unreachable (); | |
41612 | } | |
41613 | } | |
41614 | else | |
41615 | fatal_insn ("emit_fusion_p9_load, bad reg #2", reg); | |
41616 | ||
41617 | if (!MEM_P (mem)) | |
41618 | fatal_insn ("emit_fusion_p9_load not MEM", mem); | |
41619 | ||
41620 | addr = XEXP (mem, 0); | |
41621 | fusion_split_address (addr, &hi, &lo); | |
41622 | ||
41623 | /* Emit the addis instruction. */ | |
41624 | emit_fusion_addis (tmp_reg, hi, "power9 load fusion", GET_MODE_NAME (mode)); | |
41625 | ||
41626 | /* Emit the D-form load instruction. */ | |
41627 | emit_fusion_load_store (reg, tmp_reg, lo, load_string); | |
41628 | ||
41629 | return ""; | |
41630 | } | |
41631 | ||
41632 | /* Return a string to fuse an addis instruction with a store using extended | |
41633 | fusion. The address that is used is the logical address that was formed | |
41634 | during peephole2: (lo_sum (high) (low-part)) | |
41635 | ||
41636 | The code is complicated, so we call output_asm_insn directly, and just | |
41637 | return "". */ | |
41638 | ||
41639 | const char * | |
41640 | emit_fusion_p9_store (rtx mem, rtx reg, rtx tmp_reg) | |
41641 | { | |
582adad1 | 41642 | machine_mode mode = GET_MODE (reg); |
01e91138 | 41643 | rtx hi; |
41644 | rtx lo; | |
41645 | rtx addr; | |
41646 | const char *store_string; | |
41647 | int r; | |
41648 | ||
41649 | if (GET_CODE (reg) == SUBREG) | |
41650 | { | |
41651 | gcc_assert (SUBREG_BYTE (reg) == 0); | |
41652 | reg = SUBREG_REG (reg); | |
41653 | } | |
41654 | ||
41655 | if (!REG_P (reg)) | |
41656 | fatal_insn ("emit_fusion_p9_store, bad reg #1", reg); | |
41657 | ||
41658 | r = REGNO (reg); | |
41659 | if (FP_REGNO_P (r)) | |
41660 | { | |
41661 | if (mode == SFmode) | |
41662 | store_string = "stfs"; | |
41663 | else if (mode == DFmode) | |
41664 | store_string = "stfd"; | |
41665 | else | |
41666 | gcc_unreachable (); | |
41667 | } | |
41668 | else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR) | |
41669 | { | |
41670 | if (mode == SFmode) | |
41671 | store_string = "stxssp"; | |
41672 | else if (mode == DFmode || mode == DImode) | |
41673 | store_string = "stxsd"; | |
41674 | else | |
41675 | gcc_unreachable (); | |
41676 | } | |
41677 | else if (INT_REGNO_P (r)) | |
41678 | { | |
41679 | switch (mode) | |
41680 | { | |
916ace94 | 41681 | case E_QImode: |
01e91138 | 41682 | store_string = "stb"; |
41683 | break; | |
916ace94 | 41684 | case E_HImode: |
01e91138 | 41685 | store_string = "sth"; |
41686 | break; | |
916ace94 | 41687 | case E_SImode: |
41688 | case E_SFmode: | |
01e91138 | 41689 | store_string = "stw"; |
41690 | break; | |
916ace94 | 41691 | case E_DImode: |
41692 | case E_DFmode: | |
01e91138 | 41693 | if (!TARGET_POWERPC64) |
41694 | gcc_unreachable (); | |
41695 | store_string = "std"; | |
41696 | break; | |
41697 | default: | |
41698 | gcc_unreachable (); | |
41699 | } | |
41700 | } | |
41701 | else | |
41702 | fatal_insn ("emit_fusion_p9_store, bad reg #2", reg); | |
41703 | ||
41704 | if (!MEM_P (mem)) | |
41705 | fatal_insn ("emit_fusion_p9_store not MEM", mem); | |
41706 | ||
41707 | addr = XEXP (mem, 0); | |
41708 | fusion_split_address (addr, &hi, &lo); | |
41709 | ||
41710 | /* Emit the addis instruction. */ | |
41711 | emit_fusion_addis (tmp_reg, hi, "power9 store fusion", GET_MODE_NAME (mode)); | |
41712 | ||
41713 | /* Emit the D-form load instruction. */ | |
41714 | emit_fusion_load_store (reg, tmp_reg, lo, store_string); | |
41715 | ||
41716 | return ""; | |
41717 | } | |
41718 | ||
41719 | \f | |
41720 | /* Analyze vector computations and remove unnecessary doubleword | |
41721 | swaps (xxswapdi instructions). This pass is performed only | |
41722 | for little-endian VSX code generation. | |
41723 | ||
41724 | For this specific case, loads and stores of 4x32 and 2x64 vectors | |
41725 | are inefficient. These are implemented using the lvx2dx and | |
41726 | stvx2dx instructions, which invert the order of doublewords in | |
41727 | a vector register. Thus the code generation inserts an xxswapdi | |
41728 | after each such load, and prior to each such store. (For spill | |
41729 | code after register assignment, an additional xxswapdi is inserted | |
41730 | following each store in order to return a hard register to its | |
41731 | unpermuted value.) | |
41732 | ||
41733 | The extra xxswapdi instructions reduce performance. This can be | |
41734 | particularly bad for vectorized code. The purpose of this pass | |
41735 | is to reduce the number of xxswapdi instructions required for | |
41736 | correctness. | |
41737 | ||
41738 | The primary insight is that much code that operates on vectors | |
41739 | does not care about the relative order of elements in a register, | |
41740 | so long as the correct memory order is preserved. If we have | |
41741 | a computation where all input values are provided by lvxd2x/xxswapdi | |
41742 | sequences, all outputs are stored using xxswapdi/stvxd2x sequences, | |
41743 | and all intermediate computations are pure SIMD (independent of | |
41744 | element order), then all the xxswapdi's associated with the loads | |
41745 | and stores may be removed. | |
41746 | ||
41747 | This pass uses some of the infrastructure and logical ideas from | |
41748 | the "web" pass in web.c. We create maximal webs of computations | |
41749 | fitting the description above using union-find. Each such web is | |
41750 | then optimized by removing its unnecessary xxswapdi instructions. | |
41751 | ||
41752 | The pass is placed prior to global optimization so that we can | |
41753 | perform the optimization in the safest and simplest way possible; | |
41754 | that is, by replacing each xxswapdi insn with a register copy insn. | |
41755 | Subsequent forward propagation will remove copies where possible. | |
41756 | ||
41757 | There are some operations sensitive to element order for which we | |
41758 | can still allow the operation, provided we modify those operations. | |
41759 | These include CONST_VECTORs, for which we must swap the first and | |
41760 | second halves of the constant vector; and SUBREGs, for which we | |
41761 | must adjust the byte offset to account for the swapped doublewords. | |
41762 | A remaining opportunity would be non-immediate-form splats, for | |
41763 | which we should adjust the selected lane of the input. We should | |
41764 | also make code generation adjustments for sum-across operations, | |
41765 | since this is a common vectorizer reduction. | |
41766 | ||
41767 | Because we run prior to the first split, we can see loads and stores | |
41768 | here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla | |
41769 | vector loads and stores that have not yet been split into a permuting | |
41770 | load/store and a swap. (One way this can happen is with a builtin | |
41771 | call to vec_vsx_{ld,st}.) We can handle these as well, but rather | |
41772 | than deleting a swap, we convert the load/store into a permuting | |
41773 | load/store (which effectively removes the swap). */ | |
41774 | ||
41775 | /* Notes on Permutes | |
41776 | ||
41777 | We do not currently handle computations that contain permutes. There | |
41778 | is a general transformation that can be performed correctly, but it | |
41779 | may introduce more expensive code than it replaces. To handle these | |
41780 | would require a cost model to determine when to perform the optimization. | |
41781 | This commentary records how this could be done if desired. | |
41782 | ||
41783 | The most general permute is something like this (example for V16QI): | |
41784 | ||
41785 | (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI)) | |
41786 | (parallel [(const_int a0) (const_int a1) | |
41787 | ... | |
41788 | (const_int a14) (const_int a15)])) | |
41789 | ||
41790 | where a0,...,a15 are in [0,31] and select elements from op1 and op2 | |
41791 | to produce in the result. | |
41792 | ||
41793 | Regardless of mode, we can convert the PARALLEL to a mask of 16 | |
41794 | byte-element selectors. Let's call this M, with M[i] representing | |
41795 | the ith byte-element selector value. Then if we swap doublewords | |
41796 | throughout the computation, we can get correct behavior by replacing | |
41797 | M with M' as follows: | |
41798 | ||
41799 | M'[i] = { (M[i]+8)%16 : M[i] in [0,15] | |
41800 | { ((M[i]+8)%16)+16 : M[i] in [16,31] | |
41801 | ||
41802 | This seems promising at first, since we are just replacing one mask | |
41803 | with another. But certain masks are preferable to others. If M | |
41804 | is a mask that matches a vmrghh pattern, for example, M' certainly | |
41805 | will not. Instead of a single vmrghh, we would generate a load of | |
41806 | M' and a vperm. So we would need to know how many xxswapd's we can | |
41807 | remove as a result of this transformation to determine if it's | |
41808 | profitable; and preferably the logic would need to be aware of all | |
41809 | the special preferable masks. | |
41810 | ||
41811 | Another form of permute is an UNSPEC_VPERM, in which the mask is | |
41812 | already in a register. In some cases, this mask may be a constant | |
41813 | that we can discover with ud-chains, in which case the above | |
41814 | transformation is ok. However, the common usage here is for the | |
41815 | mask to be produced by an UNSPEC_LVSL, in which case the mask | |
41816 | cannot be known at compile time. In such a case we would have to | |
41817 | generate several instructions to compute M' as above at run time, | |
41818 | and a cost model is needed again. | |
41819 | ||
41820 | However, when the mask M for an UNSPEC_VPERM is loaded from the | |
41821 | constant pool, we can replace M with M' as above at no cost | |
41822 | beyond adding a constant pool entry. */ | |
41823 | ||
41824 | /* This is based on the union-find logic in web.c. web_entry_base is | |
41825 | defined in df.h. */ | |
41826 | class swap_web_entry : public web_entry_base | |
41827 | { | |
41828 | public: | |
41829 | /* Pointer to the insn. */ | |
41830 | rtx_insn *insn; | |
41831 | /* Set if insn contains a mention of a vector register. All other | |
41832 | fields are undefined if this field is unset. */ | |
41833 | unsigned int is_relevant : 1; | |
41834 | /* Set if insn is a load. */ | |
41835 | unsigned int is_load : 1; | |
41836 | /* Set if insn is a store. */ | |
41837 | unsigned int is_store : 1; | |
41838 | /* Set if insn is a doubleword swap. This can either be a register swap | |
41839 | or a permuting load or store (test is_load and is_store for this). */ | |
41840 | unsigned int is_swap : 1; | |
41841 | /* Set if the insn has a live-in use of a parameter register. */ | |
41842 | unsigned int is_live_in : 1; | |
41843 | /* Set if the insn has a live-out def of a return register. */ | |
41844 | unsigned int is_live_out : 1; | |
41845 | /* Set if the insn contains a subreg reference of a vector register. */ | |
41846 | unsigned int contains_subreg : 1; | |
41847 | /* Set if the insn contains a 128-bit integer operand. */ | |
41848 | unsigned int is_128_int : 1; | |
41849 | /* Set if this is a call-insn. */ | |
41850 | unsigned int is_call : 1; | |
41851 | /* Set if this insn does not perform a vector operation for which | |
41852 | element order matters, or if we know how to fix it up if it does. | |
41853 | Undefined if is_swap is set. */ | |
41854 | unsigned int is_swappable : 1; | |
41855 | /* A nonzero value indicates what kind of special handling for this | |
41856 | insn is required if doublewords are swapped. Undefined if | |
41857 | is_swappable is not set. */ | |
41858 | unsigned int special_handling : 4; | |
41859 | /* Set if the web represented by this entry cannot be optimized. */ | |
41860 | unsigned int web_not_optimizable : 1; | |
41861 | /* Set if this insn should be deleted. */ | |
41862 | unsigned int will_delete : 1; | |
41863 | }; | |
41864 | ||
41865 | enum special_handling_values { | |
41866 | SH_NONE = 0, | |
41867 | SH_CONST_VECTOR, | |
41868 | SH_SUBREG, | |
41869 | SH_NOSWAP_LD, | |
41870 | SH_NOSWAP_ST, | |
41871 | SH_EXTRACT, | |
41872 | SH_SPLAT, | |
41873 | SH_XXPERMDI, | |
41874 | SH_CONCAT, | |
41875 | SH_VPERM | |
41876 | }; | |
41877 | ||
41878 | /* Union INSN with all insns containing definitions that reach USE. | |
41879 | Detect whether USE is live-in to the current function. */ | |
41880 | static void | |
41881 | union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use) | |
41882 | { | |
41883 | struct df_link *link = DF_REF_CHAIN (use); | |
41884 | ||
41885 | if (!link) | |
41886 | insn_entry[INSN_UID (insn)].is_live_in = 1; | |
41887 | ||
41888 | while (link) | |
41889 | { | |
41890 | if (DF_REF_IS_ARTIFICIAL (link->ref)) | |
41891 | insn_entry[INSN_UID (insn)].is_live_in = 1; | |
41892 | ||
41893 | if (DF_REF_INSN_INFO (link->ref)) | |
41894 | { | |
41895 | rtx def_insn = DF_REF_INSN (link->ref); | |
41896 | (void)unionfind_union (insn_entry + INSN_UID (insn), | |
41897 | insn_entry + INSN_UID (def_insn)); | |
41898 | } | |
41899 | ||
41900 | link = link->next; | |
41901 | } | |
41902 | } | |
41903 | ||
41904 | /* Union INSN with all insns containing uses reached from DEF. | |
41905 | Detect whether DEF is live-out from the current function. */ | |
41906 | static void | |
41907 | union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def) | |
41908 | { | |
41909 | struct df_link *link = DF_REF_CHAIN (def); | |
41910 | ||
41911 | if (!link) | |
41912 | insn_entry[INSN_UID (insn)].is_live_out = 1; | |
41913 | ||
41914 | while (link) | |
41915 | { | |
41916 | /* This could be an eh use or some other artificial use; | |
41917 | we treat these all the same (killing the optimization). */ | |
41918 | if (DF_REF_IS_ARTIFICIAL (link->ref)) | |
41919 | insn_entry[INSN_UID (insn)].is_live_out = 1; | |
41920 | ||
41921 | if (DF_REF_INSN_INFO (link->ref)) | |
41922 | { | |
41923 | rtx use_insn = DF_REF_INSN (link->ref); | |
41924 | (void)unionfind_union (insn_entry + INSN_UID (insn), | |
41925 | insn_entry + INSN_UID (use_insn)); | |
41926 | } | |
41927 | ||
41928 | link = link->next; | |
41929 | } | |
41930 | } | |
41931 | ||
41932 | /* Return 1 iff INSN is a load insn, including permuting loads that | |
41933 | represent an lvxd2x instruction; else return 0. */ | |
41934 | static unsigned int | |
41935 | insn_is_load_p (rtx insn) | |
41936 | { | |
41937 | rtx body = PATTERN (insn); | |
41938 | ||
41939 | if (GET_CODE (body) == SET) | |
41940 | { | |
41941 | if (GET_CODE (SET_SRC (body)) == MEM) | |
41942 | return 1; | |
41943 | ||
41944 | if (GET_CODE (SET_SRC (body)) == VEC_SELECT | |
41945 | && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM) | |
41946 | return 1; | |
41947 | ||
41948 | return 0; | |
41949 | } | |
41950 | ||
41951 | if (GET_CODE (body) != PARALLEL) | |
41952 | return 0; | |
41953 | ||
41954 | rtx set = XVECEXP (body, 0, 0); | |
41955 | ||
41956 | if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM) | |
41957 | return 1; | |
41958 | ||
41959 | return 0; | |
41960 | } | |
41961 | ||
41962 | /* Return 1 iff INSN is a store insn, including permuting stores that | |
41963 | represent an stvxd2x instruction; else return 0. */ | |
41964 | static unsigned int | |
41965 | insn_is_store_p (rtx insn) | |
41966 | { | |
41967 | rtx body = PATTERN (insn); | |
41968 | if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM) | |
41969 | return 1; | |
41970 | if (GET_CODE (body) != PARALLEL) | |
41971 | return 0; | |
41972 | rtx set = XVECEXP (body, 0, 0); | |
41973 | if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM) | |
41974 | return 1; | |
41975 | return 0; | |
41976 | } | |
41977 | ||
41978 | /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap, | |
41979 | a permuting load, or a permuting store. */ | |
41980 | static unsigned int | |
41981 | insn_is_swap_p (rtx insn) | |
41982 | { | |
41983 | rtx body = PATTERN (insn); | |
41984 | if (GET_CODE (body) != SET) | |
41985 | return 0; | |
41986 | rtx rhs = SET_SRC (body); | |
41987 | if (GET_CODE (rhs) != VEC_SELECT) | |
41988 | return 0; | |
41989 | rtx parallel = XEXP (rhs, 1); | |
41990 | if (GET_CODE (parallel) != PARALLEL) | |
41991 | return 0; | |
41992 | unsigned int len = XVECLEN (parallel, 0); | |
41993 | if (len != 2 && len != 4 && len != 8 && len != 16) | |
41994 | return 0; | |
41995 | for (unsigned int i = 0; i < len / 2; ++i) | |
41996 | { | |
41997 | rtx op = XVECEXP (parallel, 0, i); | |
41998 | if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i) | |
41999 | return 0; | |
42000 | } | |
42001 | for (unsigned int i = len / 2; i < len; ++i) | |
42002 | { | |
42003 | rtx op = XVECEXP (parallel, 0, i); | |
42004 | if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2) | |
42005 | return 0; | |
42006 | } | |
42007 | return 1; | |
42008 | } | |
42009 | ||
42010 | /* Return TRUE if insn is a swap fed by a load from the constant pool. */ | |
42011 | static bool | |
42012 | const_load_sequence_p (swap_web_entry *insn_entry, rtx insn) | |
42013 | { | |
42014 | unsigned uid = INSN_UID (insn); | |
42015 | if (!insn_entry[uid].is_swap || insn_entry[uid].is_load) | |
42016 | return false; | |
42017 | ||
42018 | /* Find the unique use in the swap and locate its def. If the def | |
42019 | isn't unique, punt. */ | |
42020 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
42021 | df_ref use; | |
42022 | FOR_EACH_INSN_INFO_USE (use, insn_info) | |
42023 | { | |
42024 | struct df_link *def_link = DF_REF_CHAIN (use); | |
42025 | if (!def_link || def_link->next) | |
42026 | return false; | |
42027 | ||
42028 | rtx def_insn = DF_REF_INSN (def_link->ref); | |
42029 | unsigned uid2 = INSN_UID (def_insn); | |
42030 | if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap) | |
42031 | return false; | |
42032 | ||
42033 | rtx body = PATTERN (def_insn); | |
42034 | if (GET_CODE (body) != SET | |
42035 | || GET_CODE (SET_SRC (body)) != VEC_SELECT | |
42036 | || GET_CODE (XEXP (SET_SRC (body), 0)) != MEM) | |
42037 | return false; | |
42038 | ||
42039 | rtx mem = XEXP (SET_SRC (body), 0); | |
42040 | rtx base_reg = XEXP (mem, 0); | |
42041 | ||
42042 | df_ref base_use; | |
42043 | insn_info = DF_INSN_INFO_GET (def_insn); | |
42044 | FOR_EACH_INSN_INFO_USE (base_use, insn_info) | |
42045 | { | |
42046 | if (!rtx_equal_p (DF_REF_REG (base_use), base_reg)) | |
42047 | continue; | |
42048 | ||
42049 | struct df_link *base_def_link = DF_REF_CHAIN (base_use); | |
42050 | if (!base_def_link || base_def_link->next) | |
42051 | return false; | |
42052 | ||
42053 | rtx tocrel_insn = DF_REF_INSN (base_def_link->ref); | |
42054 | rtx tocrel_body = PATTERN (tocrel_insn); | |
42055 | rtx base, offset; | |
42056 | if (GET_CODE (tocrel_body) != SET) | |
42057 | return false; | |
42058 | /* There is an extra level of indirection for small/large | |
42059 | code models. */ | |
42060 | rtx tocrel_expr = SET_SRC (tocrel_body); | |
42061 | if (GET_CODE (tocrel_expr) == MEM) | |
42062 | tocrel_expr = XEXP (tocrel_expr, 0); | |
42063 | if (!toc_relative_expr_p (tocrel_expr, false)) | |
42064 | return false; | |
42065 | split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset); | |
42066 | if (GET_CODE (base) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P (base)) | |
42067 | return false; | |
42068 | } | |
42069 | } | |
42070 | return true; | |
42071 | } | |
42072 | ||
42073 | /* Return TRUE iff OP matches a V2DF reduction pattern. See the | |
42074 | definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md. */ | |
42075 | static bool | |
42076 | v2df_reduction_p (rtx op) | |
42077 | { | |
42078 | if (GET_MODE (op) != V2DFmode) | |
42079 | return false; | |
42080 | ||
42081 | enum rtx_code code = GET_CODE (op); | |
42082 | if (code != PLUS && code != SMIN && code != SMAX) | |
42083 | return false; | |
42084 | ||
42085 | rtx concat = XEXP (op, 0); | |
42086 | if (GET_CODE (concat) != VEC_CONCAT) | |
42087 | return false; | |
42088 | ||
42089 | rtx select0 = XEXP (concat, 0); | |
42090 | rtx select1 = XEXP (concat, 1); | |
42091 | if (GET_CODE (select0) != VEC_SELECT || GET_CODE (select1) != VEC_SELECT) | |
42092 | return false; | |
42093 | ||
42094 | rtx reg0 = XEXP (select0, 0); | |
42095 | rtx reg1 = XEXP (select1, 0); | |
42096 | if (!rtx_equal_p (reg0, reg1) || !REG_P (reg0)) | |
42097 | return false; | |
42098 | ||
42099 | rtx parallel0 = XEXP (select0, 1); | |
42100 | rtx parallel1 = XEXP (select1, 1); | |
42101 | if (GET_CODE (parallel0) != PARALLEL || GET_CODE (parallel1) != PARALLEL) | |
42102 | return false; | |
42103 | ||
42104 | if (!rtx_equal_p (XVECEXP (parallel0, 0, 0), const1_rtx) | |
42105 | || !rtx_equal_p (XVECEXP (parallel1, 0, 0), const0_rtx)) | |
42106 | return false; | |
42107 | ||
42108 | return true; | |
42109 | } | |
42110 | ||
42111 | /* Return 1 iff OP is an operand that will not be affected by having | |
42112 | vector doublewords swapped in memory. */ | |
42113 | static unsigned int | |
42114 | rtx_is_swappable_p (rtx op, unsigned int *special) | |
42115 | { | |
42116 | enum rtx_code code = GET_CODE (op); | |
42117 | int i, j; | |
42118 | rtx parallel; | |
42119 | ||
42120 | switch (code) | |
42121 | { | |
42122 | case LABEL_REF: | |
42123 | case SYMBOL_REF: | |
42124 | case CLOBBER: | |
42125 | case REG: | |
42126 | return 1; | |
42127 | ||
42128 | case VEC_CONCAT: | |
42129 | case ASM_INPUT: | |
42130 | case ASM_OPERANDS: | |
42131 | return 0; | |
42132 | ||
42133 | case CONST_VECTOR: | |
42134 | { | |
42135 | *special = SH_CONST_VECTOR; | |
42136 | return 1; | |
42137 | } | |
42138 | ||
42139 | case VEC_DUPLICATE: | |
42140 | /* Opportunity: If XEXP (op, 0) has the same mode as the result, | |
42141 | and XEXP (op, 1) is a PARALLEL with a single QImode const int, | |
42142 | it represents a vector splat for which we can do special | |
42143 | handling. */ | |
42144 | if (GET_CODE (XEXP (op, 0)) == CONST_INT) | |
42145 | return 1; | |
42146 | else if (REG_P (XEXP (op, 0)) | |
42147 | && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0))) | |
42148 | /* This catches V2DF and V2DI splat, at a minimum. */ | |
42149 | return 1; | |
42150 | else if (GET_CODE (XEXP (op, 0)) == TRUNCATE | |
42151 | && REG_P (XEXP (XEXP (op, 0), 0)) | |
42152 | && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0))) | |
42153 | /* This catches splat of a truncated value. */ | |
42154 | return 1; | |
42155 | else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT) | |
42156 | /* If the duplicated item is from a select, defer to the select | |
42157 | processing to see if we can change the lane for the splat. */ | |
42158 | return rtx_is_swappable_p (XEXP (op, 0), special); | |
42159 | else | |
42160 | return 0; | |
42161 | ||
42162 | case VEC_SELECT: | |
42163 | /* A vec_extract operation is ok if we change the lane. */ | |
42164 | if (GET_CODE (XEXP (op, 0)) == REG | |
42165 | && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op) | |
42166 | && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL | |
42167 | && XVECLEN (parallel, 0) == 1 | |
42168 | && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT) | |
42169 | { | |
42170 | *special = SH_EXTRACT; | |
42171 | return 1; | |
42172 | } | |
42173 | /* An XXPERMDI is ok if we adjust the lanes. Note that if the | |
42174 | XXPERMDI is a swap operation, it will be identified by | |
42175 | insn_is_swap_p and therefore we won't get here. */ | |
42176 | else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT | |
42177 | && (GET_MODE (XEXP (op, 0)) == V4DFmode | |
42178 | || GET_MODE (XEXP (op, 0)) == V4DImode) | |
42179 | && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL | |
42180 | && XVECLEN (parallel, 0) == 2 | |
42181 | && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT | |
42182 | && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT) | |
42183 | { | |
42184 | *special = SH_XXPERMDI; | |
42185 | return 1; | |
42186 | } | |
42187 | else if (v2df_reduction_p (op)) | |
42188 | return 1; | |
42189 | else | |
42190 | return 0; | |
42191 | ||
42192 | case UNSPEC: | |
42193 | { | |
42194 | /* Various operations are unsafe for this optimization, at least | |
42195 | without significant additional work. Permutes are obviously | |
42196 | problematic, as both the permute control vector and the ordering | |
42197 | of the target values are invalidated by doubleword swapping. | |
42198 | Vector pack and unpack modify the number of vector lanes. | |
42199 | Merge-high/low will not operate correctly on swapped operands. | |
42200 | Vector shifts across element boundaries are clearly uncool, | |
42201 | as are vector select and concatenate operations. Vector | |
42202 | sum-across instructions define one operand with a specific | |
42203 | order-dependent element, so additional fixup code would be | |
42204 | needed to make those work. Vector set and non-immediate-form | |
42205 | vector splat are element-order sensitive. A few of these | |
42206 | cases might be workable with special handling if required. | |
42207 | Adding cost modeling would be appropriate in some cases. */ | |
42208 | int val = XINT (op, 1); | |
42209 | switch (val) | |
42210 | { | |
42211 | default: | |
42212 | break; | |
42213 | case UNSPEC_VMRGH_DIRECT: | |
42214 | case UNSPEC_VMRGL_DIRECT: | |
42215 | case UNSPEC_VPACK_SIGN_SIGN_SAT: | |
42216 | case UNSPEC_VPACK_SIGN_UNS_SAT: | |
42217 | case UNSPEC_VPACK_UNS_UNS_MOD: | |
42218 | case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT: | |
42219 | case UNSPEC_VPACK_UNS_UNS_SAT: | |
42220 | case UNSPEC_VPERM: | |
42221 | case UNSPEC_VPERM_UNS: | |
42222 | case UNSPEC_VPERMHI: | |
42223 | case UNSPEC_VPERMSI: | |
42224 | case UNSPEC_VPKPX: | |
42225 | case UNSPEC_VSLDOI: | |
42226 | case UNSPEC_VSLO: | |
42227 | case UNSPEC_VSRO: | |
42228 | case UNSPEC_VSUM2SWS: | |
42229 | case UNSPEC_VSUM4S: | |
42230 | case UNSPEC_VSUM4UBS: | |
42231 | case UNSPEC_VSUMSWS: | |
42232 | case UNSPEC_VSUMSWS_DIRECT: | |
42233 | case UNSPEC_VSX_CONCAT: | |
42234 | case UNSPEC_VSX_SET: | |
42235 | case UNSPEC_VSX_SLDWI: | |
42236 | case UNSPEC_VUNPACK_HI_SIGN: | |
42237 | case UNSPEC_VUNPACK_HI_SIGN_DIRECT: | |
42238 | case UNSPEC_VUNPACK_LO_SIGN: | |
42239 | case UNSPEC_VUNPACK_LO_SIGN_DIRECT: | |
42240 | case UNSPEC_VUPKHPX: | |
42241 | case UNSPEC_VUPKHS_V4SF: | |
42242 | case UNSPEC_VUPKHU_V4SF: | |
42243 | case UNSPEC_VUPKLPX: | |
42244 | case UNSPEC_VUPKLS_V4SF: | |
42245 | case UNSPEC_VUPKLU_V4SF: | |
42246 | case UNSPEC_VSX_CVDPSPN: | |
42247 | case UNSPEC_VSX_CVSPDP: | |
42248 | case UNSPEC_VSX_CVSPDPN: | |
42249 | case UNSPEC_VSX_EXTRACT: | |
42250 | case UNSPEC_VSX_VSLO: | |
42251 | case UNSPEC_VSX_VEC_INIT: | |
42252 | return 0; | |
42253 | case UNSPEC_VSPLT_DIRECT: | |
42254 | case UNSPEC_VSX_XXSPLTD: | |
42255 | *special = SH_SPLAT; | |
42256 | return 1; | |
42257 | case UNSPEC_REDUC_PLUS: | |
42258 | case UNSPEC_REDUC: | |
42259 | return 1; | |
42260 | } | |
42261 | } | |
42262 | ||
42263 | default: | |
42264 | break; | |
42265 | } | |
42266 | ||
42267 | const char *fmt = GET_RTX_FORMAT (code); | |
42268 | int ok = 1; | |
42269 | ||
42270 | for (i = 0; i < GET_RTX_LENGTH (code); ++i) | |
42271 | if (fmt[i] == 'e' || fmt[i] == 'u') | |
42272 | { | |
42273 | unsigned int special_op = SH_NONE; | |
42274 | ok &= rtx_is_swappable_p (XEXP (op, i), &special_op); | |
42275 | if (special_op == SH_NONE) | |
42276 | continue; | |
42277 | /* Ensure we never have two kinds of special handling | |
42278 | for the same insn. */ | |
42279 | if (*special != SH_NONE && *special != special_op) | |
42280 | return 0; | |
42281 | *special = special_op; | |
42282 | } | |
42283 | else if (fmt[i] == 'E') | |
42284 | for (j = 0; j < XVECLEN (op, i); ++j) | |
42285 | { | |
42286 | unsigned int special_op = SH_NONE; | |
42287 | ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op); | |
42288 | if (special_op == SH_NONE) | |
42289 | continue; | |
42290 | /* Ensure we never have two kinds of special handling | |
42291 | for the same insn. */ | |
42292 | if (*special != SH_NONE && *special != special_op) | |
42293 | return 0; | |
42294 | *special = special_op; | |
42295 | } | |
42296 | ||
42297 | return ok; | |
42298 | } | |
42299 | ||
42300 | /* Return 1 iff INSN is an operand that will not be affected by | |
42301 | having vector doublewords swapped in memory (in which case | |
42302 | *SPECIAL is unchanged), or that can be modified to be correct | |
42303 | if vector doublewords are swapped in memory (in which case | |
42304 | *SPECIAL is changed to a value indicating how). */ | |
42305 | static unsigned int | |
42306 | insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn, | |
42307 | unsigned int *special) | |
42308 | { | |
42309 | /* Calls are always bad. */ | |
42310 | if (GET_CODE (insn) == CALL_INSN) | |
42311 | return 0; | |
42312 | ||
42313 | /* Loads and stores seen here are not permuting, but we can still | |
42314 | fix them up by converting them to permuting ones. Exceptions: | |
42315 | UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL | |
42316 | body instead of a SET; and UNSPEC_STVE, which has an UNSPEC | |
42317 | for the SET source. Also we must now make an exception for lvx | |
42318 | and stvx when they are not in the UNSPEC_LVX/STVX form (with the | |
42319 | explicit "& -16") since this leads to unrecognizable insns. */ | |
42320 | rtx body = PATTERN (insn); | |
42321 | int i = INSN_UID (insn); | |
42322 | ||
42323 | if (insn_entry[i].is_load) | |
42324 | { | |
42325 | if (GET_CODE (body) == SET) | |
42326 | { | |
42327 | rtx rhs = SET_SRC (body); | |
42328 | /* Even without a swap, the RHS might be a vec_select for, say, | |
42329 | a byte-reversing load. */ | |
42330 | if (GET_CODE (rhs) != MEM) | |
42331 | return 0; | |
42332 | if (GET_CODE (XEXP (rhs, 0)) == AND) | |
42333 | return 0; | |
42334 | ||
42335 | *special = SH_NOSWAP_LD; | |
42336 | return 1; | |
42337 | } | |
42338 | else | |
42339 | return 0; | |
42340 | } | |
42341 | ||
42342 | if (insn_entry[i].is_store) | |
42343 | { | |
42344 | if (GET_CODE (body) == SET | |
42345 | && GET_CODE (SET_SRC (body)) != UNSPEC) | |
42346 | { | |
42347 | rtx lhs = SET_DEST (body); | |
42348 | /* Even without a swap, the LHS might be a vec_select for, say, | |
42349 | a byte-reversing store. */ | |
42350 | if (GET_CODE (lhs) != MEM) | |
42351 | return 0; | |
42352 | if (GET_CODE (XEXP (lhs, 0)) == AND) | |
42353 | return 0; | |
42354 | ||
42355 | *special = SH_NOSWAP_ST; | |
42356 | return 1; | |
42357 | } | |
42358 | else | |
42359 | return 0; | |
42360 | } | |
42361 | ||
42362 | /* A convert to single precision can be left as is provided that | |
42363 | all of its uses are in xxspltw instructions that splat BE element | |
42364 | zero. */ | |
42365 | if (GET_CODE (body) == SET | |
42366 | && GET_CODE (SET_SRC (body)) == UNSPEC | |
42367 | && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN) | |
42368 | { | |
42369 | df_ref def; | |
42370 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
42371 | ||
42372 | FOR_EACH_INSN_INFO_DEF (def, insn_info) | |
42373 | { | |
42374 | struct df_link *link = DF_REF_CHAIN (def); | |
42375 | if (!link) | |
42376 | return 0; | |
42377 | ||
42378 | for (; link; link = link->next) { | |
42379 | rtx use_insn = DF_REF_INSN (link->ref); | |
42380 | rtx use_body = PATTERN (use_insn); | |
42381 | if (GET_CODE (use_body) != SET | |
42382 | || GET_CODE (SET_SRC (use_body)) != UNSPEC | |
42383 | || XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW | |
42384 | || XVECEXP (SET_SRC (use_body), 0, 1) != const0_rtx) | |
42385 | return 0; | |
42386 | } | |
42387 | } | |
42388 | ||
42389 | return 1; | |
42390 | } | |
42391 | ||
42392 | /* A concatenation of two doublewords is ok if we reverse the | |
42393 | order of the inputs. */ | |
42394 | if (GET_CODE (body) == SET | |
42395 | && GET_CODE (SET_SRC (body)) == VEC_CONCAT | |
42396 | && (GET_MODE (SET_SRC (body)) == V2DFmode | |
42397 | || GET_MODE (SET_SRC (body)) == V2DImode)) | |
42398 | { | |
42399 | *special = SH_CONCAT; | |
42400 | return 1; | |
42401 | } | |
42402 | ||
42403 | /* V2DF reductions are always swappable. */ | |
42404 | if (GET_CODE (body) == PARALLEL) | |
42405 | { | |
42406 | rtx expr = XVECEXP (body, 0, 0); | |
42407 | if (GET_CODE (expr) == SET | |
42408 | && v2df_reduction_p (SET_SRC (expr))) | |
42409 | return 1; | |
42410 | } | |
42411 | ||
42412 | /* An UNSPEC_VPERM is ok if the mask operand is loaded from the | |
42413 | constant pool. */ | |
42414 | if (GET_CODE (body) == SET | |
42415 | && GET_CODE (SET_SRC (body)) == UNSPEC | |
42416 | && XINT (SET_SRC (body), 1) == UNSPEC_VPERM | |
42417 | && XVECLEN (SET_SRC (body), 0) == 3 | |
42418 | && GET_CODE (XVECEXP (SET_SRC (body), 0, 2)) == REG) | |
42419 | { | |
42420 | rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2); | |
42421 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
42422 | df_ref use; | |
42423 | FOR_EACH_INSN_INFO_USE (use, insn_info) | |
42424 | if (rtx_equal_p (DF_REF_REG (use), mask_reg)) | |
42425 | { | |
42426 | struct df_link *def_link = DF_REF_CHAIN (use); | |
42427 | /* Punt if multiple definitions for this reg. */ | |
42428 | if (def_link && !def_link->next && | |
42429 | const_load_sequence_p (insn_entry, | |
42430 | DF_REF_INSN (def_link->ref))) | |
42431 | { | |
42432 | *special = SH_VPERM; | |
42433 | return 1; | |
42434 | } | |
42435 | } | |
42436 | } | |
42437 | ||
42438 | /* Otherwise check the operands for vector lane violations. */ | |
42439 | return rtx_is_swappable_p (body, special); | |
42440 | } | |
42441 | ||
42442 | enum chain_purpose { FOR_LOADS, FOR_STORES }; | |
42443 | ||
42444 | /* Return true if the UD or DU chain headed by LINK is non-empty, | |
42445 | and every entry on the chain references an insn that is a | |
42446 | register swap. Furthermore, if PURPOSE is FOR_LOADS, each such | |
42447 | register swap must have only permuting loads as reaching defs. | |
42448 | If PURPOSE is FOR_STORES, each such register swap must have only | |
42449 | register swaps or permuting stores as reached uses. */ | |
42450 | static bool | |
42451 | chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link, | |
42452 | enum chain_purpose purpose) | |
42453 | { | |
42454 | if (!link) | |
42455 | return false; | |
42456 | ||
42457 | for (; link; link = link->next) | |
42458 | { | |
42459 | if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link->ref)))) | |
42460 | continue; | |
42461 | ||
42462 | if (DF_REF_IS_ARTIFICIAL (link->ref)) | |
42463 | return false; | |
42464 | ||
42465 | rtx reached_insn = DF_REF_INSN (link->ref); | |
42466 | unsigned uid = INSN_UID (reached_insn); | |
42467 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn); | |
42468 | ||
42469 | if (!insn_entry[uid].is_swap || insn_entry[uid].is_load | |
42470 | || insn_entry[uid].is_store) | |
42471 | return false; | |
42472 | ||
42473 | if (purpose == FOR_LOADS) | |
42474 | { | |
42475 | df_ref use; | |
42476 | FOR_EACH_INSN_INFO_USE (use, insn_info) | |
42477 | { | |
42478 | struct df_link *swap_link = DF_REF_CHAIN (use); | |
42479 | ||
42480 | while (swap_link) | |
42481 | { | |
42482 | if (DF_REF_IS_ARTIFICIAL (link->ref)) | |
42483 | return false; | |
42484 | ||
42485 | rtx swap_def_insn = DF_REF_INSN (swap_link->ref); | |
42486 | unsigned uid2 = INSN_UID (swap_def_insn); | |
42487 | ||
42488 | /* Only permuting loads are allowed. */ | |
42489 | if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load) | |
42490 | return false; | |
42491 | ||
42492 | swap_link = swap_link->next; | |
42493 | } | |
42494 | } | |
42495 | } | |
42496 | else if (purpose == FOR_STORES) | |
42497 | { | |
42498 | df_ref def; | |
42499 | FOR_EACH_INSN_INFO_DEF (def, insn_info) | |
42500 | { | |
42501 | struct df_link *swap_link = DF_REF_CHAIN (def); | |
42502 | ||
42503 | while (swap_link) | |
42504 | { | |
42505 | if (DF_REF_IS_ARTIFICIAL (link->ref)) | |
42506 | return false; | |
42507 | ||
42508 | rtx swap_use_insn = DF_REF_INSN (swap_link->ref); | |
42509 | unsigned uid2 = INSN_UID (swap_use_insn); | |
42510 | ||
42511 | /* Permuting stores or register swaps are allowed. */ | |
42512 | if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load) | |
42513 | return false; | |
42514 | ||
42515 | swap_link = swap_link->next; | |
42516 | } | |
42517 | } | |
42518 | } | |
42519 | } | |
42520 | ||
42521 | return true; | |
42522 | } | |
42523 | ||
42524 | /* Mark the xxswapdi instructions associated with permuting loads and | |
42525 | stores for removal. Note that we only flag them for deletion here, | |
42526 | as there is a possibility of a swap being reached from multiple | |
42527 | loads, etc. */ | |
42528 | static void | |
42529 | mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i) | |
42530 | { | |
42531 | rtx insn = insn_entry[i].insn; | |
42532 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
42533 | ||
42534 | if (insn_entry[i].is_load) | |
42535 | { | |
42536 | df_ref def; | |
42537 | FOR_EACH_INSN_INFO_DEF (def, insn_info) | |
42538 | { | |
42539 | struct df_link *link = DF_REF_CHAIN (def); | |
42540 | ||
42541 | /* We know by now that these are swaps, so we can delete | |
42542 | them confidently. */ | |
42543 | while (link) | |
42544 | { | |
42545 | rtx use_insn = DF_REF_INSN (link->ref); | |
42546 | insn_entry[INSN_UID (use_insn)].will_delete = 1; | |
42547 | link = link->next; | |
42548 | } | |
42549 | } | |
42550 | } | |
42551 | else if (insn_entry[i].is_store) | |
42552 | { | |
42553 | df_ref use; | |
42554 | FOR_EACH_INSN_INFO_USE (use, insn_info) | |
42555 | { | |
42556 | /* Ignore uses for addressability. */ | |
42557 | machine_mode mode = GET_MODE (DF_REF_REG (use)); | |
42558 | if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode)) | |
42559 | continue; | |
42560 | ||
42561 | struct df_link *link = DF_REF_CHAIN (use); | |
42562 | ||
42563 | /* We know by now that these are swaps, so we can delete | |
42564 | them confidently. */ | |
42565 | while (link) | |
42566 | { | |
42567 | rtx def_insn = DF_REF_INSN (link->ref); | |
42568 | insn_entry[INSN_UID (def_insn)].will_delete = 1; | |
42569 | link = link->next; | |
42570 | } | |
42571 | } | |
42572 | } | |
42573 | } | |
42574 | ||
42575 | /* OP is either a CONST_VECTOR or an expression containing one. | |
42576 | Swap the first half of the vector with the second in the first | |
42577 | case. Recurse to find it in the second. */ | |
42578 | static void | |
42579 | swap_const_vector_halves (rtx op) | |
42580 | { | |
42581 | int i; | |
42582 | enum rtx_code code = GET_CODE (op); | |
42583 | if (GET_CODE (op) == CONST_VECTOR) | |
42584 | { | |
42585 | int half_units = GET_MODE_NUNITS (GET_MODE (op)) / 2; | |
42586 | for (i = 0; i < half_units; ++i) | |
42587 | { | |
42588 | rtx temp = CONST_VECTOR_ELT (op, i); | |
42589 | CONST_VECTOR_ELT (op, i) = CONST_VECTOR_ELT (op, i + half_units); | |
42590 | CONST_VECTOR_ELT (op, i + half_units) = temp; | |
42591 | } | |
42592 | } | |
42593 | else | |
42594 | { | |
42595 | int j; | |
42596 | const char *fmt = GET_RTX_FORMAT (code); | |
42597 | for (i = 0; i < GET_RTX_LENGTH (code); ++i) | |
42598 | if (fmt[i] == 'e' || fmt[i] == 'u') | |
42599 | swap_const_vector_halves (XEXP (op, i)); | |
42600 | else if (fmt[i] == 'E') | |
42601 | for (j = 0; j < XVECLEN (op, i); ++j) | |
42602 | swap_const_vector_halves (XVECEXP (op, i, j)); | |
42603 | } | |
42604 | } | |
42605 | ||
42606 | /* Find all subregs of a vector expression that perform a narrowing, | |
42607 | and adjust the subreg index to account for doubleword swapping. */ | |
42608 | static void | |
42609 | adjust_subreg_index (rtx op) | |
42610 | { | |
42611 | enum rtx_code code = GET_CODE (op); | |
42612 | if (code == SUBREG | |
42613 | && (GET_MODE_SIZE (GET_MODE (op)) | |
42614 | < GET_MODE_SIZE (GET_MODE (XEXP (op, 0))))) | |
42615 | { | |
42616 | unsigned int index = SUBREG_BYTE (op); | |
42617 | if (index < 8) | |
42618 | index += 8; | |
42619 | else | |
42620 | index -= 8; | |
42621 | SUBREG_BYTE (op) = index; | |
42622 | } | |
42623 | ||
42624 | const char *fmt = GET_RTX_FORMAT (code); | |
42625 | int i,j; | |
42626 | for (i = 0; i < GET_RTX_LENGTH (code); ++i) | |
42627 | if (fmt[i] == 'e' || fmt[i] == 'u') | |
42628 | adjust_subreg_index (XEXP (op, i)); | |
42629 | else if (fmt[i] == 'E') | |
42630 | for (j = 0; j < XVECLEN (op, i); ++j) | |
42631 | adjust_subreg_index (XVECEXP (op, i, j)); | |
42632 | } | |
42633 | ||
42634 | /* Convert the non-permuting load INSN to a permuting one. */ | |
42635 | static void | |
42636 | permute_load (rtx_insn *insn) | |
42637 | { | |
42638 | rtx body = PATTERN (insn); | |
42639 | rtx mem_op = SET_SRC (body); | |
42640 | rtx tgt_reg = SET_DEST (body); | |
42641 | machine_mode mode = GET_MODE (tgt_reg); | |
42642 | int n_elts = GET_MODE_NUNITS (mode); | |
42643 | int half_elts = n_elts / 2; | |
42644 | rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts)); | |
42645 | int i, j; | |
42646 | for (i = 0, j = half_elts; i < half_elts; ++i, ++j) | |
42647 | XVECEXP (par, 0, i) = GEN_INT (j); | |
42648 | for (i = half_elts, j = 0; j < half_elts; ++i, ++j) | |
42649 | XVECEXP (par, 0, i) = GEN_INT (j); | |
42650 | rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par); | |
42651 | SET_SRC (body) = sel; | |
42652 | INSN_CODE (insn) = -1; /* Force re-recognition. */ | |
42653 | df_insn_rescan (insn); | |
42654 | ||
42655 | if (dump_file) | |
42656 | fprintf (dump_file, "Replacing load %d with permuted load\n", | |
42657 | INSN_UID (insn)); | |
42658 | } | |
42659 | ||
42660 | /* Convert the non-permuting store INSN to a permuting one. */ | |
42661 | static void | |
42662 | permute_store (rtx_insn *insn) | |
42663 | { | |
42664 | rtx body = PATTERN (insn); | |
42665 | rtx src_reg = SET_SRC (body); | |
42666 | machine_mode mode = GET_MODE (src_reg); | |
42667 | int n_elts = GET_MODE_NUNITS (mode); | |
42668 | int half_elts = n_elts / 2; | |
42669 | rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts)); | |
42670 | int i, j; | |
42671 | for (i = 0, j = half_elts; i < half_elts; ++i, ++j) | |
42672 | XVECEXP (par, 0, i) = GEN_INT (j); | |
42673 | for (i = half_elts, j = 0; j < half_elts; ++i, ++j) | |
42674 | XVECEXP (par, 0, i) = GEN_INT (j); | |
42675 | rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par); | |
42676 | SET_SRC (body) = sel; | |
42677 | INSN_CODE (insn) = -1; /* Force re-recognition. */ | |
42678 | df_insn_rescan (insn); | |
42679 | ||
42680 | if (dump_file) | |
42681 | fprintf (dump_file, "Replacing store %d with permuted store\n", | |
42682 | INSN_UID (insn)); | |
42683 | } | |
42684 | ||
42685 | /* Given OP that contains a vector extract operation, adjust the index | |
42686 | of the extracted lane to account for the doubleword swap. */ | |
42687 | static void | |
42688 | adjust_extract (rtx_insn *insn) | |
42689 | { | |
42690 | rtx pattern = PATTERN (insn); | |
42691 | if (GET_CODE (pattern) == PARALLEL) | |
42692 | pattern = XVECEXP (pattern, 0, 0); | |
42693 | rtx src = SET_SRC (pattern); | |
42694 | /* The vec_select may be wrapped in a vec_duplicate for a splat, so | |
42695 | account for that. */ | |
42696 | rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src; | |
42697 | rtx par = XEXP (sel, 1); | |
42698 | int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1; | |
42699 | int lane = INTVAL (XVECEXP (par, 0, 0)); | |
42700 | lane = lane >= half_elts ? lane - half_elts : lane + half_elts; | |
42701 | XVECEXP (par, 0, 0) = GEN_INT (lane); | |
42702 | INSN_CODE (insn) = -1; /* Force re-recognition. */ | |
42703 | df_insn_rescan (insn); | |
42704 | ||
42705 | if (dump_file) | |
42706 | fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn)); | |
42707 | } | |
42708 | ||
42709 | /* Given OP that contains a vector direct-splat operation, adjust the index | |
42710 | of the source lane to account for the doubleword swap. */ | |
42711 | static void | |
42712 | adjust_splat (rtx_insn *insn) | |
42713 | { | |
42714 | rtx body = PATTERN (insn); | |
42715 | rtx unspec = XEXP (body, 1); | |
42716 | int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1; | |
42717 | int lane = INTVAL (XVECEXP (unspec, 0, 1)); | |
42718 | lane = lane >= half_elts ? lane - half_elts : lane + half_elts; | |
42719 | XVECEXP (unspec, 0, 1) = GEN_INT (lane); | |
42720 | INSN_CODE (insn) = -1; /* Force re-recognition. */ | |
42721 | df_insn_rescan (insn); | |
42722 | ||
42723 | if (dump_file) | |
42724 | fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn)); | |
42725 | } | |
42726 | ||
42727 | /* Given OP that contains an XXPERMDI operation (that is not a doubleword | |
42728 | swap), reverse the order of the source operands and adjust the indices | |
42729 | of the source lanes to account for doubleword reversal. */ | |
42730 | static void | |
42731 | adjust_xxpermdi (rtx_insn *insn) | |
42732 | { | |
42733 | rtx set = PATTERN (insn); | |
42734 | rtx select = XEXP (set, 1); | |
42735 | rtx concat = XEXP (select, 0); | |
42736 | rtx src0 = XEXP (concat, 0); | |
42737 | XEXP (concat, 0) = XEXP (concat, 1); | |
42738 | XEXP (concat, 1) = src0; | |
42739 | rtx parallel = XEXP (select, 1); | |
42740 | int lane0 = INTVAL (XVECEXP (parallel, 0, 0)); | |
42741 | int lane1 = INTVAL (XVECEXP (parallel, 0, 1)); | |
42742 | int new_lane0 = 3 - lane1; | |
42743 | int new_lane1 = 3 - lane0; | |
42744 | XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0); | |
42745 | XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1); | |
42746 | INSN_CODE (insn) = -1; /* Force re-recognition. */ | |
42747 | df_insn_rescan (insn); | |
42748 | ||
42749 | if (dump_file) | |
42750 | fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn)); | |
42751 | } | |
42752 | ||
42753 | /* Given OP that contains a VEC_CONCAT operation of two doublewords, | |
42754 | reverse the order of those inputs. */ | |
42755 | static void | |
42756 | adjust_concat (rtx_insn *insn) | |
42757 | { | |
42758 | rtx set = PATTERN (insn); | |
42759 | rtx concat = XEXP (set, 1); | |
42760 | rtx src0 = XEXP (concat, 0); | |
42761 | XEXP (concat, 0) = XEXP (concat, 1); | |
42762 | XEXP (concat, 1) = src0; | |
42763 | INSN_CODE (insn) = -1; /* Force re-recognition. */ | |
42764 | df_insn_rescan (insn); | |
42765 | ||
42766 | if (dump_file) | |
42767 | fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn)); | |
42768 | } | |
42769 | ||
42770 | /* Given an UNSPEC_VPERM insn, modify the mask loaded from the | |
42771 | constant pool to reflect swapped doublewords. */ | |
42772 | static void | |
42773 | adjust_vperm (rtx_insn *insn) | |
42774 | { | |
42775 | /* We previously determined that the UNSPEC_VPERM was fed by a | |
42776 | swap of a swapping load of a TOC-relative constant pool symbol. | |
42777 | Find the MEM in the swapping load and replace it with a MEM for | |
42778 | the adjusted mask constant. */ | |
42779 | rtx set = PATTERN (insn); | |
42780 | rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2); | |
42781 | ||
42782 | /* Find the swap. */ | |
42783 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
42784 | df_ref use; | |
42785 | rtx_insn *swap_insn = 0; | |
42786 | FOR_EACH_INSN_INFO_USE (use, insn_info) | |
42787 | if (rtx_equal_p (DF_REF_REG (use), mask_reg)) | |
42788 | { | |
42789 | struct df_link *def_link = DF_REF_CHAIN (use); | |
42790 | gcc_assert (def_link && !def_link->next); | |
42791 | swap_insn = DF_REF_INSN (def_link->ref); | |
42792 | break; | |
42793 | } | |
42794 | gcc_assert (swap_insn); | |
42795 | ||
42796 | /* Find the load. */ | |
42797 | insn_info = DF_INSN_INFO_GET (swap_insn); | |
42798 | rtx_insn *load_insn = 0; | |
42799 | FOR_EACH_INSN_INFO_USE (use, insn_info) | |
42800 | { | |
42801 | struct df_link *def_link = DF_REF_CHAIN (use); | |
42802 | gcc_assert (def_link && !def_link->next); | |
42803 | load_insn = DF_REF_INSN (def_link->ref); | |
42804 | break; | |
42805 | } | |
42806 | gcc_assert (load_insn); | |
42807 | ||
42808 | /* Find the TOC-relative symbol access. */ | |
42809 | insn_info = DF_INSN_INFO_GET (load_insn); | |
42810 | rtx_insn *tocrel_insn = 0; | |
42811 | FOR_EACH_INSN_INFO_USE (use, insn_info) | |
42812 | { | |
42813 | struct df_link *def_link = DF_REF_CHAIN (use); | |
42814 | gcc_assert (def_link && !def_link->next); | |
42815 | tocrel_insn = DF_REF_INSN (def_link->ref); | |
42816 | break; | |
42817 | } | |
42818 | gcc_assert (tocrel_insn); | |
42819 | ||
42820 | /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p | |
42821 | to set tocrel_base; otherwise it would be unnecessary as we've | |
42822 | already established it will return true. */ | |
42823 | rtx base, offset; | |
42824 | rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn)); | |
42825 | /* There is an extra level of indirection for small/large code models. */ | |
42826 | if (GET_CODE (tocrel_expr) == MEM) | |
42827 | tocrel_expr = XEXP (tocrel_expr, 0); | |
42828 | if (!toc_relative_expr_p (tocrel_expr, false)) | |
42829 | gcc_unreachable (); | |
42830 | split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset); | |
42831 | rtx const_vector = get_pool_constant (base); | |
42832 | /* With the extra indirection, get_pool_constant will produce the | |
42833 | real constant from the reg_equal expression, so get the real | |
42834 | constant. */ | |
42835 | if (GET_CODE (const_vector) == SYMBOL_REF) | |
42836 | const_vector = get_pool_constant (const_vector); | |
42837 | gcc_assert (GET_CODE (const_vector) == CONST_VECTOR); | |
42838 | ||
42839 | /* Create an adjusted mask from the initial mask. */ | |
42840 | unsigned int new_mask[16], i, val; | |
42841 | for (i = 0; i < 16; ++i) { | |
42842 | val = INTVAL (XVECEXP (const_vector, 0, i)); | |
42843 | if (val < 16) | |
42844 | new_mask[i] = (val + 8) % 16; | |
42845 | else | |
42846 | new_mask[i] = ((val + 8) % 16) + 16; | |
42847 | } | |
42848 | ||
42849 | /* Create a new CONST_VECTOR and a MEM that references it. */ | |
42850 | rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16)); | |
42851 | for (i = 0; i < 16; ++i) | |
42852 | XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]); | |
42853 | rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0)); | |
42854 | rtx new_mem = force_const_mem (V16QImode, new_const_vector); | |
42855 | /* This gives us a MEM whose base operand is a SYMBOL_REF, which we | |
42856 | can't recognize. Force the SYMBOL_REF into a register. */ | |
42857 | if (!REG_P (XEXP (new_mem, 0))) { | |
42858 | rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0)); | |
42859 | XEXP (new_mem, 0) = base_reg; | |
42860 | /* Move the newly created insn ahead of the load insn. */ | |
42861 | rtx_insn *force_insn = get_last_insn (); | |
42862 | remove_insn (force_insn); | |
42863 | rtx_insn *before_load_insn = PREV_INSN (load_insn); | |
42864 | add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn)); | |
42865 | df_insn_rescan (before_load_insn); | |
42866 | df_insn_rescan (force_insn); | |
42867 | } | |
42868 | ||
42869 | /* Replace the MEM in the load instruction and rescan it. */ | |
42870 | XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem; | |
42871 | INSN_CODE (load_insn) = -1; /* Force re-recognition. */ | |
42872 | df_insn_rescan (load_insn); | |
42873 | ||
42874 | if (dump_file) | |
42875 | fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn)); | |
42876 | } | |
42877 | ||
42878 | /* The insn described by INSN_ENTRY[I] can be swapped, but only | |
42879 | with special handling. Take care of that here. */ | |
42880 | static void | |
42881 | handle_special_swappables (swap_web_entry *insn_entry, unsigned i) | |
42882 | { | |
42883 | rtx_insn *insn = insn_entry[i].insn; | |
42884 | rtx body = PATTERN (insn); | |
42885 | ||
42886 | switch (insn_entry[i].special_handling) | |
42887 | { | |
42888 | default: | |
42889 | gcc_unreachable (); | |
42890 | case SH_CONST_VECTOR: | |
42891 | { | |
42892 | /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */ | |
42893 | gcc_assert (GET_CODE (body) == SET); | |
42894 | rtx rhs = SET_SRC (body); | |
42895 | swap_const_vector_halves (rhs); | |
42896 | if (dump_file) | |
42897 | fprintf (dump_file, "Swapping constant halves in insn %d\n", i); | |
42898 | break; | |
42899 | } | |
42900 | case SH_SUBREG: | |
42901 | /* A subreg of the same size is already safe. For subregs that | |
42902 | select a smaller portion of a reg, adjust the index for | |
42903 | swapped doublewords. */ | |
42904 | adjust_subreg_index (body); | |
42905 | if (dump_file) | |
42906 | fprintf (dump_file, "Adjusting subreg in insn %d\n", i); | |
42907 | break; | |
42908 | case SH_NOSWAP_LD: | |
42909 | /* Convert a non-permuting load to a permuting one. */ | |
42910 | permute_load (insn); | |
42911 | break; | |
42912 | case SH_NOSWAP_ST: | |
42913 | /* Convert a non-permuting store to a permuting one. */ | |
42914 | permute_store (insn); | |
42915 | break; | |
42916 | case SH_EXTRACT: | |
42917 | /* Change the lane on an extract operation. */ | |
42918 | adjust_extract (insn); | |
42919 | break; | |
42920 | case SH_SPLAT: | |
42921 | /* Change the lane on a direct-splat operation. */ | |
42922 | adjust_splat (insn); | |
42923 | break; | |
42924 | case SH_XXPERMDI: | |
42925 | /* Change the lanes on an XXPERMDI operation. */ | |
42926 | adjust_xxpermdi (insn); | |
42927 | break; | |
42928 | case SH_CONCAT: | |
42929 | /* Reverse the order of a concatenation operation. */ | |
42930 | adjust_concat (insn); | |
42931 | break; | |
42932 | case SH_VPERM: | |
42933 | /* Change the mask loaded from the constant pool for a VPERM. */ | |
42934 | adjust_vperm (insn); | |
42935 | break; | |
42936 | } | |
42937 | } | |
42938 | ||
42939 | /* Find the insn from the Ith table entry, which is known to be a | |
42940 | register swap Y = SWAP(X). Replace it with a copy Y = X. */ | |
42941 | static void | |
42942 | replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i) | |
42943 | { | |
42944 | rtx_insn *insn = insn_entry[i].insn; | |
42945 | rtx body = PATTERN (insn); | |
42946 | rtx src_reg = XEXP (SET_SRC (body), 0); | |
42947 | rtx copy = gen_rtx_SET (SET_DEST (body), src_reg); | |
42948 | rtx_insn *new_insn = emit_insn_before (copy, insn); | |
42949 | set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn)); | |
42950 | df_insn_rescan (new_insn); | |
42951 | ||
42952 | if (dump_file) | |
42953 | { | |
42954 | unsigned int new_uid = INSN_UID (new_insn); | |
42955 | fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid); | |
42956 | } | |
42957 | ||
42958 | df_insn_delete (insn); | |
42959 | remove_insn (insn); | |
42960 | insn->set_deleted (); | |
42961 | } | |
42962 | ||
42963 | /* Dump the swap table to DUMP_FILE. */ | |
42964 | static void | |
42965 | dump_swap_insn_table (swap_web_entry *insn_entry) | |
42966 | { | |
42967 | int e = get_max_uid (); | |
42968 | fprintf (dump_file, "\nRelevant insns with their flag settings\n\n"); | |
42969 | ||
42970 | for (int i = 0; i < e; ++i) | |
42971 | if (insn_entry[i].is_relevant) | |
42972 | { | |
42973 | swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred (); | |
42974 | fprintf (dump_file, "%6d %6d ", i, | |
42975 | pred_entry && pred_entry->insn | |
42976 | ? INSN_UID (pred_entry->insn) : 0); | |
42977 | if (insn_entry[i].is_load) | |
42978 | fputs ("load ", dump_file); | |
42979 | if (insn_entry[i].is_store) | |
42980 | fputs ("store ", dump_file); | |
42981 | if (insn_entry[i].is_swap) | |
42982 | fputs ("swap ", dump_file); | |
42983 | if (insn_entry[i].is_live_in) | |
42984 | fputs ("live-in ", dump_file); | |
42985 | if (insn_entry[i].is_live_out) | |
42986 | fputs ("live-out ", dump_file); | |
42987 | if (insn_entry[i].contains_subreg) | |
42988 | fputs ("subreg ", dump_file); | |
42989 | if (insn_entry[i].is_128_int) | |
42990 | fputs ("int128 ", dump_file); | |
42991 | if (insn_entry[i].is_call) | |
42992 | fputs ("call ", dump_file); | |
42993 | if (insn_entry[i].is_swappable) | |
42994 | { | |
42995 | fputs ("swappable ", dump_file); | |
42996 | if (insn_entry[i].special_handling == SH_CONST_VECTOR) | |
42997 | fputs ("special:constvec ", dump_file); | |
42998 | else if (insn_entry[i].special_handling == SH_SUBREG) | |
42999 | fputs ("special:subreg ", dump_file); | |
43000 | else if (insn_entry[i].special_handling == SH_NOSWAP_LD) | |
43001 | fputs ("special:load ", dump_file); | |
43002 | else if (insn_entry[i].special_handling == SH_NOSWAP_ST) | |
43003 | fputs ("special:store ", dump_file); | |
43004 | else if (insn_entry[i].special_handling == SH_EXTRACT) | |
43005 | fputs ("special:extract ", dump_file); | |
43006 | else if (insn_entry[i].special_handling == SH_SPLAT) | |
43007 | fputs ("special:splat ", dump_file); | |
43008 | else if (insn_entry[i].special_handling == SH_XXPERMDI) | |
43009 | fputs ("special:xxpermdi ", dump_file); | |
43010 | else if (insn_entry[i].special_handling == SH_CONCAT) | |
43011 | fputs ("special:concat ", dump_file); | |
43012 | else if (insn_entry[i].special_handling == SH_VPERM) | |
43013 | fputs ("special:vperm ", dump_file); | |
43014 | } | |
43015 | if (insn_entry[i].web_not_optimizable) | |
43016 | fputs ("unoptimizable ", dump_file); | |
43017 | if (insn_entry[i].will_delete) | |
43018 | fputs ("delete ", dump_file); | |
43019 | fputs ("\n", dump_file); | |
43020 | } | |
43021 | fputs ("\n", dump_file); | |
43022 | } | |
43023 | ||
43024 | /* Return RTX with its address canonicalized to (reg) or (+ reg reg). | |
43025 | Here RTX is an (& addr (const_int -16)). Always return a new copy | |
43026 | to avoid problems with combine. */ | |
43027 | static rtx | |
43028 | alignment_with_canonical_addr (rtx align) | |
43029 | { | |
43030 | rtx canon; | |
43031 | rtx addr = XEXP (align, 0); | |
43032 | ||
43033 | if (REG_P (addr)) | |
43034 | canon = addr; | |
43035 | ||
43036 | else if (GET_CODE (addr) == PLUS) | |
43037 | { | |
43038 | rtx addrop0 = XEXP (addr, 0); | |
43039 | rtx addrop1 = XEXP (addr, 1); | |
43040 | ||
43041 | if (!REG_P (addrop0)) | |
43042 | addrop0 = force_reg (GET_MODE (addrop0), addrop0); | |
43043 | ||
43044 | if (!REG_P (addrop1)) | |
43045 | addrop1 = force_reg (GET_MODE (addrop1), addrop1); | |
43046 | ||
43047 | canon = gen_rtx_PLUS (GET_MODE (addr), addrop0, addrop1); | |
43048 | } | |
43049 | ||
43050 | else | |
43051 | canon = force_reg (GET_MODE (addr), addr); | |
43052 | ||
43053 | return gen_rtx_AND (GET_MODE (align), canon, GEN_INT (-16)); | |
43054 | } | |
43055 | ||
43056 | /* Check whether an rtx is an alignment mask, and if so, return | |
43057 | a fully-expanded rtx for the masking operation. */ | |
43058 | static rtx | |
43059 | alignment_mask (rtx_insn *insn) | |
43060 | { | |
43061 | rtx body = PATTERN (insn); | |
43062 | ||
43063 | if (GET_CODE (body) != SET | |
43064 | || GET_CODE (SET_SRC (body)) != AND | |
43065 | || !REG_P (XEXP (SET_SRC (body), 0))) | |
43066 | return 0; | |
43067 | ||
43068 | rtx mask = XEXP (SET_SRC (body), 1); | |
43069 | ||
43070 | if (GET_CODE (mask) == CONST_INT) | |
43071 | { | |
43072 | if (INTVAL (mask) == -16) | |
43073 | return alignment_with_canonical_addr (SET_SRC (body)); | |
43074 | else | |
43075 | return 0; | |
43076 | } | |
43077 | ||
43078 | if (!REG_P (mask)) | |
43079 | return 0; | |
43080 | ||
43081 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
43082 | df_ref use; | |
43083 | rtx real_mask = 0; | |
43084 | ||
43085 | FOR_EACH_INSN_INFO_USE (use, insn_info) | |
43086 | { | |
43087 | if (!rtx_equal_p (DF_REF_REG (use), mask)) | |
43088 | continue; | |
43089 | ||
43090 | struct df_link *def_link = DF_REF_CHAIN (use); | |
43091 | if (!def_link || def_link->next) | |
43092 | return 0; | |
43093 | ||
43094 | rtx_insn *const_insn = DF_REF_INSN (def_link->ref); | |
43095 | rtx const_body = PATTERN (const_insn); | |
43096 | if (GET_CODE (const_body) != SET) | |
43097 | return 0; | |
43098 | ||
43099 | real_mask = SET_SRC (const_body); | |
43100 | ||
43101 | if (GET_CODE (real_mask) != CONST_INT | |
43102 | || INTVAL (real_mask) != -16) | |
43103 | return 0; | |
43104 | } | |
43105 | ||
43106 | if (real_mask == 0) | |
43107 | return 0; | |
43108 | ||
43109 | return alignment_with_canonical_addr (SET_SRC (body)); | |
43110 | } | |
43111 | ||
43112 | /* Given INSN that's a load or store based at BASE_REG, look for a | |
43113 | feeding computation that aligns its address on a 16-byte boundary. */ | |
43114 | static rtx | |
43115 | find_alignment_op (rtx_insn *insn, rtx base_reg) | |
43116 | { | |
43117 | df_ref base_use; | |
43118 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
43119 | rtx and_operation = 0; | |
43120 | ||
43121 | FOR_EACH_INSN_INFO_USE (base_use, insn_info) | |
43122 | { | |
43123 | if (!rtx_equal_p (DF_REF_REG (base_use), base_reg)) | |
43124 | continue; | |
43125 | ||
43126 | struct df_link *base_def_link = DF_REF_CHAIN (base_use); | |
43127 | if (!base_def_link || base_def_link->next) | |
43128 | break; | |
43129 | ||
43130 | /* With stack-protector code enabled, and possibly in other | |
43131 | circumstances, there may not be an associated insn for | |
43132 | the def. */ | |
43133 | if (DF_REF_IS_ARTIFICIAL (base_def_link->ref)) | |
43134 | break; | |
43135 | ||
43136 | rtx_insn *and_insn = DF_REF_INSN (base_def_link->ref); | |
43137 | and_operation = alignment_mask (and_insn); | |
43138 | if (and_operation != 0) | |
43139 | break; | |
43140 | } | |
43141 | ||
43142 | return and_operation; | |
43143 | } | |
43144 | ||
43145 | struct del_info { bool replace; rtx_insn *replace_insn; }; | |
43146 | ||
43147 | /* If INSN is the load for an lvx pattern, put it in canonical form. */ | |
43148 | static void | |
43149 | recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete) | |
43150 | { | |
43151 | rtx body = PATTERN (insn); | |
43152 | gcc_assert (GET_CODE (body) == SET | |
43153 | && GET_CODE (SET_SRC (body)) == VEC_SELECT | |
43154 | && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM); | |
43155 | ||
43156 | rtx mem = XEXP (SET_SRC (body), 0); | |
43157 | rtx base_reg = XEXP (mem, 0); | |
43158 | ||
43159 | rtx and_operation = find_alignment_op (insn, base_reg); | |
43160 | ||
43161 | if (and_operation != 0) | |
43162 | { | |
43163 | df_ref def; | |
43164 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
43165 | FOR_EACH_INSN_INFO_DEF (def, insn_info) | |
43166 | { | |
43167 | struct df_link *link = DF_REF_CHAIN (def); | |
43168 | if (!link || link->next) | |
43169 | break; | |
43170 | ||
43171 | rtx_insn *swap_insn = DF_REF_INSN (link->ref); | |
43172 | if (!insn_is_swap_p (swap_insn) | |
43173 | || insn_is_load_p (swap_insn) | |
43174 | || insn_is_store_p (swap_insn)) | |
43175 | break; | |
43176 | ||
43177 | /* Expected lvx pattern found. Change the swap to | |
43178 | a copy, and propagate the AND operation into the | |
43179 | load. */ | |
43180 | to_delete[INSN_UID (swap_insn)].replace = true; | |
43181 | to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn; | |
43182 | ||
43183 | XEXP (mem, 0) = and_operation; | |
43184 | SET_SRC (body) = mem; | |
43185 | INSN_CODE (insn) = -1; /* Force re-recognition. */ | |
43186 | df_insn_rescan (insn); | |
43187 | ||
43188 | if (dump_file) | |
43189 | fprintf (dump_file, "lvx opportunity found at %d\n", | |
43190 | INSN_UID (insn)); | |
43191 | } | |
43192 | } | |
43193 | } | |
43194 | ||
43195 | /* If INSN is the store for an stvx pattern, put it in canonical form. */ | |
43196 | static void | |
43197 | recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete) | |
43198 | { | |
43199 | rtx body = PATTERN (insn); | |
43200 | gcc_assert (GET_CODE (body) == SET | |
43201 | && GET_CODE (SET_DEST (body)) == MEM | |
43202 | && GET_CODE (SET_SRC (body)) == VEC_SELECT); | |
43203 | rtx mem = SET_DEST (body); | |
43204 | rtx base_reg = XEXP (mem, 0); | |
43205 | ||
43206 | rtx and_operation = find_alignment_op (insn, base_reg); | |
43207 | ||
43208 | if (and_operation != 0) | |
43209 | { | |
43210 | rtx src_reg = XEXP (SET_SRC (body), 0); | |
43211 | df_ref src_use; | |
43212 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
43213 | FOR_EACH_INSN_INFO_USE (src_use, insn_info) | |
43214 | { | |
43215 | if (!rtx_equal_p (DF_REF_REG (src_use), src_reg)) | |
43216 | continue; | |
43217 | ||
43218 | struct df_link *link = DF_REF_CHAIN (src_use); | |
43219 | if (!link || link->next) | |
43220 | break; | |
43221 | ||
43222 | rtx_insn *swap_insn = DF_REF_INSN (link->ref); | |
43223 | if (!insn_is_swap_p (swap_insn) | |
43224 | || insn_is_load_p (swap_insn) | |
43225 | || insn_is_store_p (swap_insn)) | |
43226 | break; | |
43227 | ||
43228 | /* Expected stvx pattern found. Change the swap to | |
43229 | a copy, and propagate the AND operation into the | |
43230 | store. */ | |
43231 | to_delete[INSN_UID (swap_insn)].replace = true; | |
43232 | to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn; | |
43233 | ||
43234 | XEXP (mem, 0) = and_operation; | |
43235 | SET_SRC (body) = src_reg; | |
43236 | INSN_CODE (insn) = -1; /* Force re-recognition. */ | |
43237 | df_insn_rescan (insn); | |
43238 | ||
43239 | if (dump_file) | |
43240 | fprintf (dump_file, "stvx opportunity found at %d\n", | |
43241 | INSN_UID (insn)); | |
43242 | } | |
43243 | } | |
43244 | } | |
43245 | ||
43246 | /* Look for patterns created from builtin lvx and stvx calls, and | |
43247 | canonicalize them to be properly recognized as such. */ | |
43248 | static void | |
43249 | recombine_lvx_stvx_patterns (function *fun) | |
43250 | { | |
43251 | int i; | |
43252 | basic_block bb; | |
43253 | rtx_insn *insn; | |
43254 | ||
43255 | int num_insns = get_max_uid (); | |
43256 | del_info *to_delete = XCNEWVEC (del_info, num_insns); | |
43257 | ||
43258 | FOR_ALL_BB_FN (bb, fun) | |
43259 | FOR_BB_INSNS (bb, insn) | |
43260 | { | |
43261 | if (!NONDEBUG_INSN_P (insn)) | |
43262 | continue; | |
43263 | ||
43264 | if (insn_is_load_p (insn) && insn_is_swap_p (insn)) | |
43265 | recombine_lvx_pattern (insn, to_delete); | |
43266 | else if (insn_is_store_p (insn) && insn_is_swap_p (insn)) | |
43267 | recombine_stvx_pattern (insn, to_delete); | |
43268 | } | |
43269 | ||
43270 | /* Turning swaps into copies is delayed until now, to avoid problems | |
43271 | with deleting instructions during the insn walk. */ | |
43272 | for (i = 0; i < num_insns; i++) | |
43273 | if (to_delete[i].replace) | |
43274 | { | |
43275 | rtx swap_body = PATTERN (to_delete[i].replace_insn); | |
43276 | rtx src_reg = XEXP (SET_SRC (swap_body), 0); | |
43277 | rtx copy = gen_rtx_SET (SET_DEST (swap_body), src_reg); | |
43278 | rtx_insn *new_insn = emit_insn_before (copy, | |
43279 | to_delete[i].replace_insn); | |
43280 | set_block_for_insn (new_insn, | |
43281 | BLOCK_FOR_INSN (to_delete[i].replace_insn)); | |
43282 | df_insn_rescan (new_insn); | |
43283 | df_insn_delete (to_delete[i].replace_insn); | |
43284 | remove_insn (to_delete[i].replace_insn); | |
43285 | to_delete[i].replace_insn->set_deleted (); | |
43286 | } | |
43287 | ||
43288 | free (to_delete); | |
43289 | } | |
43290 | ||
43291 | /* Main entry point for this pass. */ | |
43292 | unsigned int | |
43293 | rs6000_analyze_swaps (function *fun) | |
43294 | { | |
43295 | swap_web_entry *insn_entry; | |
43296 | basic_block bb; | |
43297 | rtx_insn *insn, *curr_insn = 0; | |
43298 | ||
43299 | /* Dataflow analysis for use-def chains. */ | |
43300 | df_set_flags (DF_RD_PRUNE_DEAD_DEFS); | |
43301 | df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN); | |
43302 | df_analyze (); | |
43303 | df_set_flags (DF_DEFER_INSN_RESCAN); | |
43304 | ||
43305 | /* Pre-pass to recombine lvx and stvx patterns so we don't lose info. */ | |
43306 | recombine_lvx_stvx_patterns (fun); | |
43307 | ||
43308 | /* Allocate structure to represent webs of insns. */ | |
43309 | insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ()); | |
43310 | ||
43311 | /* Walk the insns to gather basic data. */ | |
43312 | FOR_ALL_BB_FN (bb, fun) | |
43313 | FOR_BB_INSNS_SAFE (bb, insn, curr_insn) | |
43314 | { | |
43315 | unsigned int uid = INSN_UID (insn); | |
43316 | if (NONDEBUG_INSN_P (insn)) | |
43317 | { | |
43318 | insn_entry[uid].insn = insn; | |
43319 | ||
43320 | if (GET_CODE (insn) == CALL_INSN) | |
43321 | insn_entry[uid].is_call = 1; | |
43322 | ||
43323 | /* Walk the uses and defs to see if we mention vector regs. | |
43324 | Record any constraints on optimization of such mentions. */ | |
43325 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
43326 | df_ref mention; | |
43327 | FOR_EACH_INSN_INFO_USE (mention, insn_info) | |
43328 | { | |
43329 | /* We use DF_REF_REAL_REG here to get inside any subregs. */ | |
43330 | machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention)); | |
43331 | ||
43332 | /* If a use gets its value from a call insn, it will be | |
43333 | a hard register and will look like (reg:V4SI 3 3). | |
43334 | The df analysis creates two mentions for GPR3 and GPR4, | |
43335 | both DImode. We must recognize this and treat it as a | |
43336 | vector mention to ensure the call is unioned with this | |
43337 | use. */ | |
43338 | if (mode == DImode && DF_REF_INSN_INFO (mention)) | |
43339 | { | |
43340 | rtx feeder = DF_REF_INSN (mention); | |
43341 | /* FIXME: It is pretty hard to get from the df mention | |
43342 | to the mode of the use in the insn. We arbitrarily | |
43343 | pick a vector mode here, even though the use might | |
43344 | be a real DImode. We can be too conservative | |
43345 | (create a web larger than necessary) because of | |
43346 | this, so consider eventually fixing this. */ | |
43347 | if (GET_CODE (feeder) == CALL_INSN) | |
43348 | mode = V4SImode; | |
43349 | } | |
43350 | ||
43351 | if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode) | |
43352 | { | |
43353 | insn_entry[uid].is_relevant = 1; | |
43354 | if (mode == TImode || mode == V1TImode | |
43355 | || FLOAT128_VECTOR_P (mode)) | |
43356 | insn_entry[uid].is_128_int = 1; | |
43357 | if (DF_REF_INSN_INFO (mention)) | |
43358 | insn_entry[uid].contains_subreg | |
43359 | = !rtx_equal_p (DF_REF_REG (mention), | |
43360 | DF_REF_REAL_REG (mention)); | |
43361 | union_defs (insn_entry, insn, mention); | |
43362 | } | |
43363 | } | |
43364 | FOR_EACH_INSN_INFO_DEF (mention, insn_info) | |
43365 | { | |
43366 | /* We use DF_REF_REAL_REG here to get inside any subregs. */ | |
43367 | machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention)); | |
43368 | ||
43369 | /* If we're loading up a hard vector register for a call, | |
43370 | it looks like (set (reg:V4SI 9 9) (...)). The df | |
43371 | analysis creates two mentions for GPR9 and GPR10, both | |
43372 | DImode. So relying on the mode from the mentions | |
43373 | isn't sufficient to ensure we union the call into the | |
43374 | web with the parameter setup code. */ | |
43375 | if (mode == DImode && GET_CODE (insn) == SET | |
43376 | && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn)))) | |
43377 | mode = GET_MODE (SET_DEST (insn)); | |
43378 | ||
43379 | if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode) | |
43380 | { | |
43381 | insn_entry[uid].is_relevant = 1; | |
43382 | if (mode == TImode || mode == V1TImode | |
43383 | || FLOAT128_VECTOR_P (mode)) | |
43384 | insn_entry[uid].is_128_int = 1; | |
43385 | if (DF_REF_INSN_INFO (mention)) | |
43386 | insn_entry[uid].contains_subreg | |
43387 | = !rtx_equal_p (DF_REF_REG (mention), | |
43388 | DF_REF_REAL_REG (mention)); | |
43389 | /* REG_FUNCTION_VALUE_P is not valid for subregs. */ | |
43390 | else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention))) | |
43391 | insn_entry[uid].is_live_out = 1; | |
43392 | union_uses (insn_entry, insn, mention); | |
43393 | } | |
43394 | } | |
43395 | ||
43396 | if (insn_entry[uid].is_relevant) | |
43397 | { | |
43398 | /* Determine if this is a load or store. */ | |
43399 | insn_entry[uid].is_load = insn_is_load_p (insn); | |
43400 | insn_entry[uid].is_store = insn_is_store_p (insn); | |
43401 | ||
43402 | /* Determine if this is a doubleword swap. If not, | |
43403 | determine whether it can legally be swapped. */ | |
43404 | if (insn_is_swap_p (insn)) | |
43405 | insn_entry[uid].is_swap = 1; | |
43406 | else | |
43407 | { | |
43408 | unsigned int special = SH_NONE; | |
43409 | insn_entry[uid].is_swappable | |
43410 | = insn_is_swappable_p (insn_entry, insn, &special); | |
43411 | if (special != SH_NONE && insn_entry[uid].contains_subreg) | |
43412 | insn_entry[uid].is_swappable = 0; | |
43413 | else if (special != SH_NONE) | |
43414 | insn_entry[uid].special_handling = special; | |
43415 | else if (insn_entry[uid].contains_subreg) | |
43416 | insn_entry[uid].special_handling = SH_SUBREG; | |
43417 | } | |
43418 | } | |
43419 | } | |
43420 | } | |
43421 | ||
43422 | if (dump_file) | |
43423 | { | |
43424 | fprintf (dump_file, "\nSwap insn entry table when first built\n"); | |
43425 | dump_swap_insn_table (insn_entry); | |
43426 | } | |
43427 | ||
43428 | /* Record unoptimizable webs. */ | |
43429 | unsigned e = get_max_uid (), i; | |
43430 | for (i = 0; i < e; ++i) | |
43431 | { | |
43432 | if (!insn_entry[i].is_relevant) | |
43433 | continue; | |
43434 | ||
43435 | swap_web_entry *root | |
43436 | = (swap_web_entry*)(&insn_entry[i])->unionfind_root (); | |
43437 | ||
43438 | if (insn_entry[i].is_live_in || insn_entry[i].is_live_out | |
43439 | || (insn_entry[i].contains_subreg | |
43440 | && insn_entry[i].special_handling != SH_SUBREG) | |
43441 | || insn_entry[i].is_128_int || insn_entry[i].is_call | |
43442 | || !(insn_entry[i].is_swappable || insn_entry[i].is_swap)) | |
43443 | root->web_not_optimizable = 1; | |
43444 | ||
43445 | /* If we have loads or stores that aren't permuting then the | |
43446 | optimization isn't appropriate. */ | |
43447 | else if ((insn_entry[i].is_load || insn_entry[i].is_store) | |
43448 | && !insn_entry[i].is_swap && !insn_entry[i].is_swappable) | |
43449 | root->web_not_optimizable = 1; | |
43450 | ||
43451 | /* If we have permuting loads or stores that are not accompanied | |
43452 | by a register swap, the optimization isn't appropriate. */ | |
43453 | else if (insn_entry[i].is_load && insn_entry[i].is_swap) | |
43454 | { | |
43455 | rtx insn = insn_entry[i].insn; | |
43456 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
43457 | df_ref def; | |
43458 | ||
43459 | FOR_EACH_INSN_INFO_DEF (def, insn_info) | |
43460 | { | |
43461 | struct df_link *link = DF_REF_CHAIN (def); | |
43462 | ||
43463 | if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS)) | |
43464 | { | |
43465 | root->web_not_optimizable = 1; | |
43466 | break; | |
43467 | } | |
43468 | } | |
43469 | } | |
43470 | else if (insn_entry[i].is_store && insn_entry[i].is_swap) | |
43471 | { | |
43472 | rtx insn = insn_entry[i].insn; | |
43473 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
43474 | df_ref use; | |
43475 | ||
43476 | FOR_EACH_INSN_INFO_USE (use, insn_info) | |
43477 | { | |
43478 | struct df_link *link = DF_REF_CHAIN (use); | |
43479 | ||
43480 | if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES)) | |
43481 | { | |
43482 | root->web_not_optimizable = 1; | |
43483 | break; | |
43484 | } | |
43485 | } | |
43486 | } | |
43487 | } | |
43488 | ||
43489 | if (dump_file) | |
43490 | { | |
43491 | fprintf (dump_file, "\nSwap insn entry table after web analysis\n"); | |
43492 | dump_swap_insn_table (insn_entry); | |
43493 | } | |
43494 | ||
43495 | /* For each load and store in an optimizable web (which implies | |
43496 | the loads and stores are permuting), find the associated | |
43497 | register swaps and mark them for removal. Due to various | |
43498 | optimizations we may mark the same swap more than once. Also | |
43499 | perform special handling for swappable insns that require it. */ | |
43500 | for (i = 0; i < e; ++i) | |
43501 | if ((insn_entry[i].is_load || insn_entry[i].is_store) | |
43502 | && insn_entry[i].is_swap) | |
43503 | { | |
43504 | swap_web_entry* root_entry | |
43505 | = (swap_web_entry*)((&insn_entry[i])->unionfind_root ()); | |
43506 | if (!root_entry->web_not_optimizable) | |
43507 | mark_swaps_for_removal (insn_entry, i); | |
43508 | } | |
43509 | else if (insn_entry[i].is_swappable && insn_entry[i].special_handling) | |
43510 | { | |
43511 | swap_web_entry* root_entry | |
43512 | = (swap_web_entry*)((&insn_entry[i])->unionfind_root ()); | |
43513 | if (!root_entry->web_not_optimizable) | |
43514 | handle_special_swappables (insn_entry, i); | |
43515 | } | |
43516 | ||
43517 | /* Now delete the swaps marked for removal. */ | |
43518 | for (i = 0; i < e; ++i) | |
43519 | if (insn_entry[i].will_delete) | |
43520 | replace_swap_with_copy (insn_entry, i); | |
43521 | ||
43522 | /* Clean up. */ | |
43523 | free (insn_entry); | |
43524 | return 0; | |
43525 | } | |
43526 | ||
43527 | const pass_data pass_data_analyze_swaps = | |
43528 | { | |
43529 | RTL_PASS, /* type */ | |
43530 | "swaps", /* name */ | |
43531 | OPTGROUP_NONE, /* optinfo_flags */ | |
43532 | TV_NONE, /* tv_id */ | |
43533 | 0, /* properties_required */ | |
43534 | 0, /* properties_provided */ | |
43535 | 0, /* properties_destroyed */ | |
43536 | 0, /* todo_flags_start */ | |
43537 | TODO_df_finish, /* todo_flags_finish */ | |
43538 | }; | |
43539 | ||
43540 | class pass_analyze_swaps : public rtl_opt_pass | |
43541 | { | |
43542 | public: | |
43543 | pass_analyze_swaps(gcc::context *ctxt) | |
43544 | : rtl_opt_pass(pass_data_analyze_swaps, ctxt) | |
43545 | {} | |
43546 | ||
43547 | /* opt_pass methods: */ | |
43548 | virtual bool gate (function *) | |
43549 | { | |
43550 | return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX | |
43551 | && !TARGET_P9_VECTOR && rs6000_optimize_swaps); | |
43552 | } | |
43553 | ||
43554 | virtual unsigned int execute (function *fun) | |
43555 | { | |
43556 | return rs6000_analyze_swaps (fun); | |
43557 | } | |
43558 | ||
43559 | opt_pass *clone () | |
43560 | { | |
43561 | return new pass_analyze_swaps (m_ctxt); | |
43562 | } | |
43563 | ||
43564 | }; // class pass_analyze_swaps | |
43565 | ||
43566 | rtl_opt_pass * | |
43567 | make_pass_analyze_swaps (gcc::context *ctxt) | |
43568 | { | |
43569 | return new pass_analyze_swaps (ctxt); | |
43570 | } | |
43571 | ||
43572 | #ifdef RS6000_GLIBC_ATOMIC_FENV | |
43573 | /* Function declarations for rs6000_atomic_assign_expand_fenv. */ | |
43574 | static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl; | |
43575 | #endif | |
43576 | ||
43577 | /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */ | |
43578 | ||
43579 | static void | |
43580 | rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) | |
43581 | { | |
43582 | if (!TARGET_HARD_FLOAT || !TARGET_FPRS) | |
43583 | { | |
43584 | #ifdef RS6000_GLIBC_ATOMIC_FENV | |
43585 | if (atomic_hold_decl == NULL_TREE) | |
43586 | { | |
43587 | atomic_hold_decl | |
43588 | = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, | |
43589 | get_identifier ("__atomic_feholdexcept"), | |
43590 | build_function_type_list (void_type_node, | |
43591 | double_ptr_type_node, | |
43592 | NULL_TREE)); | |
43593 | TREE_PUBLIC (atomic_hold_decl) = 1; | |
43594 | DECL_EXTERNAL (atomic_hold_decl) = 1; | |
43595 | } | |
43596 | ||
43597 | if (atomic_clear_decl == NULL_TREE) | |
43598 | { | |
43599 | atomic_clear_decl | |
43600 | = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, | |
43601 | get_identifier ("__atomic_feclearexcept"), | |
43602 | build_function_type_list (void_type_node, | |
43603 | NULL_TREE)); | |
43604 | TREE_PUBLIC (atomic_clear_decl) = 1; | |
43605 | DECL_EXTERNAL (atomic_clear_decl) = 1; | |
43606 | } | |
43607 | ||
43608 | tree const_double = build_qualified_type (double_type_node, | |
43609 | TYPE_QUAL_CONST); | |
43610 | tree const_double_ptr = build_pointer_type (const_double); | |
43611 | if (atomic_update_decl == NULL_TREE) | |
43612 | { | |
43613 | atomic_update_decl | |
43614 | = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, | |
43615 | get_identifier ("__atomic_feupdateenv"), | |
43616 | build_function_type_list (void_type_node, | |
43617 | const_double_ptr, | |
43618 | NULL_TREE)); | |
43619 | TREE_PUBLIC (atomic_update_decl) = 1; | |
43620 | DECL_EXTERNAL (atomic_update_decl) = 1; | |
43621 | } | |
43622 | ||
43623 | tree fenv_var = create_tmp_var_raw (double_type_node); | |
43624 | TREE_ADDRESSABLE (fenv_var) = 1; | |
43625 | tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var); | |
43626 | ||
43627 | *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr); | |
43628 | *clear = build_call_expr (atomic_clear_decl, 0); | |
43629 | *update = build_call_expr (atomic_update_decl, 1, | |
43630 | fold_convert (const_double_ptr, fenv_addr)); | |
43631 | #endif | |
43632 | return; | |
43633 | } | |
43634 | ||
43635 | tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS]; | |
43636 | tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF]; | |
43637 | tree call_mffs = build_call_expr (mffs, 0); | |
43638 | ||
43639 | /* Generates the equivalent of feholdexcept (&fenv_var) | |
43640 | ||
43641 | *fenv_var = __builtin_mffs (); | |
43642 | double fenv_hold; | |
43643 | *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL; | |
43644 | __builtin_mtfsf (0xff, fenv_hold); */ | |
43645 | ||
43646 | /* Mask to clear everything except for the rounding modes and non-IEEE | |
43647 | arithmetic flag. */ | |
43648 | const unsigned HOST_WIDE_INT hold_exception_mask = | |
43649 | HOST_WIDE_INT_C (0xffffffff00000007); | |
43650 | ||
43651 | tree fenv_var = create_tmp_var_raw (double_type_node); | |
43652 | ||
43653 | tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs); | |
43654 | ||
43655 | tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var); | |
43656 | tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu, | |
43657 | build_int_cst (uint64_type_node, | |
43658 | hold_exception_mask)); | |
43659 | ||
43660 | tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node, | |
43661 | fenv_llu_and); | |
43662 | ||
43663 | tree hold_mtfsf = build_call_expr (mtfsf, 2, | |
43664 | build_int_cst (unsigned_type_node, 0xff), | |
43665 | fenv_hold_mtfsf); | |
43666 | ||
43667 | *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf); | |
43668 | ||
43669 | /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT): | |
43670 | ||
43671 | double fenv_clear = __builtin_mffs (); | |
43672 | *(uint64_t)&fenv_clear &= 0xffffffff00000000LL; | |
43673 | __builtin_mtfsf (0xff, fenv_clear); */ | |
43674 | ||
43675 | /* Mask to clear everything except for the rounding modes and non-IEEE | |
43676 | arithmetic flag. */ | |
43677 | const unsigned HOST_WIDE_INT clear_exception_mask = | |
43678 | HOST_WIDE_INT_C (0xffffffff00000000); | |
43679 | ||
43680 | tree fenv_clear = create_tmp_var_raw (double_type_node); | |
43681 | ||
43682 | tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs); | |
43683 | ||
43684 | tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear); | |
43685 | tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, | |
43686 | fenv_clean_llu, | |
43687 | build_int_cst (uint64_type_node, | |
43688 | clear_exception_mask)); | |
43689 | ||
43690 | tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node, | |
43691 | fenv_clear_llu_and); | |
43692 | ||
43693 | tree clear_mtfsf = build_call_expr (mtfsf, 2, | |
43694 | build_int_cst (unsigned_type_node, 0xff), | |
43695 | fenv_clear_mtfsf); | |
43696 | ||
43697 | *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf); | |
43698 | ||
43699 | /* Generates the equivalent of feupdateenv (&fenv_var) | |
43700 | ||
43701 | double old_fenv = __builtin_mffs (); | |
43702 | double fenv_update; | |
43703 | *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) | | |
43704 | (*(uint64_t*)fenv_var 0x1ff80fff); | |
43705 | __builtin_mtfsf (0xff, fenv_update); */ | |
43706 | ||
43707 | const unsigned HOST_WIDE_INT update_exception_mask = | |
43708 | HOST_WIDE_INT_C (0xffffffff1fffff00); | |
43709 | const unsigned HOST_WIDE_INT new_exception_mask = | |
43710 | HOST_WIDE_INT_C (0x1ff80fff); | |
43711 | ||
43712 | tree old_fenv = create_tmp_var_raw (double_type_node); | |
43713 | tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs); | |
43714 | ||
43715 | tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv); | |
43716 | tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu, | |
43717 | build_int_cst (uint64_type_node, | |
43718 | update_exception_mask)); | |
43719 | ||
43720 | tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu, | |
43721 | build_int_cst (uint64_type_node, | |
43722 | new_exception_mask)); | |
43723 | ||
43724 | tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node, | |
43725 | old_llu_and, new_llu_and); | |
43726 | ||
43727 | tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node, | |
43728 | new_llu_mask); | |
43729 | ||
43730 | tree update_mtfsf = build_call_expr (mtfsf, 2, | |
43731 | build_int_cst (unsigned_type_node, 0xff), | |
43732 | fenv_update_mtfsf); | |
43733 | ||
43734 | *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf); | |
43735 | } | |
43736 | ||
43737 | /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */ | |
43738 | ||
43739 | static bool | |
43740 | rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode, | |
43741 | optimization_type opt_type) | |
43742 | { | |
43743 | switch (op) | |
43744 | { | |
43745 | case rsqrt_optab: | |
43746 | return (opt_type == OPTIMIZE_FOR_SPEED | |
43747 | && RS6000_RECIP_AUTO_RSQRTE_P (mode1)); | |
43748 | ||
43749 | default: | |
43750 | return true; | |
43751 | } | |
43752 | } | |
43753 | \f | |
43754 | struct gcc_target targetm = TARGET_INITIALIZER; | |
43755 | ||
43756 | #include "gt-powerpcspe.h" |