]>
Commit | Line | Data |
---|---|---|
eecee223 BS |
1 | /* Target-specific built-in function support for the Power architecture. |
2 | See also rs6000-c.c, rs6000-gen-builtins.c, rs6000-builtins.def, and | |
3 | rs6000-overloads.def. | |
4 | Note that "normal" builtins (generic math functions, etc.) are handled | |
5 | in rs6000.c. | |
6 | ||
a945c346 | 7 | Copyright (C) 2002-2024 Free Software Foundation, Inc. |
eecee223 BS |
8 | |
9 | This file is part of GCC. | |
10 | ||
11 | GCC is free software; you can redistribute it and/or modify it | |
12 | under the terms of the GNU General Public License as published | |
13 | by the Free Software Foundation; either version 3, or (at your | |
14 | option) any later version. | |
15 | ||
16 | GCC is distributed in the hope that it will be useful, but WITHOUT | |
17 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
18 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | |
19 | License for more details. | |
20 | ||
21 | You should have received a copy of the GNU General Public License | |
22 | along with GCC; see the file COPYING3. If not see | |
23 | <http://www.gnu.org/licenses/>. */ | |
24 | ||
25 | #define IN_TARGET_CODE 1 | |
26 | ||
27 | #include "config.h" | |
28 | #include "system.h" | |
29 | #include "coretypes.h" | |
30 | #include "target.h" | |
31 | #include "backend.h" | |
32 | #include "rtl.h" | |
33 | #include "tree.h" | |
34 | #include "memmodel.h" | |
35 | #include "gimple.h" | |
36 | #include "tm_p.h" | |
37 | #include "optabs.h" | |
38 | #include "recog.h" | |
39 | #include "diagnostic-core.h" | |
40 | #include "fold-const.h" | |
41 | #include "stor-layout.h" | |
42 | #include "calls.h" | |
43 | #include "varasm.h" | |
44 | #include "explow.h" | |
45 | #include "expr.h" | |
46 | #include "langhooks.h" | |
47 | #include "gimplify.h" | |
eecee223 | 48 | #include "gimple-iterator.h" |
ba206889 | 49 | #include "gimple-fold.h" |
eecee223 BS |
50 | #include "ssa.h" |
51 | #include "tree-ssa-propagate.h" | |
52 | #include "builtins.h" | |
53 | #include "tree-vector-builder.h" | |
eecee223 BS |
54 | #include "ppc-auxv.h" |
55 | #include "rs6000-internal.h" | |
56 | ||
57 | /* Built in types. */ | |
58 | tree rs6000_builtin_types[RS6000_BTI_MAX]; | |
59 | ||
60 | /* Support targetm.vectorize.builtin_mask_for_load. */ | |
61 | tree altivec_builtin_mask_for_load; | |
62 | ||
63 | /* **** General support functions **** */ | |
64 | ||
65 | /* Raise an error message for a builtin function that is called without the | |
66 | appropriate target options being set. */ | |
67 | ||
68 | void | |
69 | rs6000_invalid_builtin (enum rs6000_gen_builtins fncode) | |
70 | { | |
71 | size_t j = (size_t) fncode; | |
72 | const char *name = rs6000_builtin_info[j].bifname; | |
73 | ||
74 | switch (rs6000_builtin_info[j].enable) | |
75 | { | |
76 | case ENB_P5: | |
77 | error ("%qs requires the %qs option", name, "-mcpu=power5"); | |
78 | break; | |
79 | case ENB_P6: | |
80 | error ("%qs requires the %qs option", name, "-mcpu=power6"); | |
81 | break; | |
82 | case ENB_P6_64: | |
83 | error ("%qs requires the %qs option and either the %qs or %qs option", | |
84 | name, "-mcpu=power6", "-m64", "-mpowerpc64"); | |
85 | break; | |
86 | case ENB_ALTIVEC: | |
87 | error ("%qs requires the %qs option", name, "-maltivec"); | |
88 | break; | |
89 | case ENB_CELL: | |
90 | error ("%qs requires the %qs option", name, "-mcpu=cell"); | |
91 | break; | |
92 | case ENB_VSX: | |
93 | error ("%qs requires the %qs option", name, "-mvsx"); | |
94 | break; | |
95 | case ENB_P7: | |
96 | error ("%qs requires the %qs option", name, "-mcpu=power7"); | |
97 | break; | |
98 | case ENB_P7_64: | |
99 | error ("%qs requires the %qs option and either the %qs or %qs option", | |
100 | name, "-mcpu=power7", "-m64", "-mpowerpc64"); | |
101 | break; | |
102 | case ENB_P8: | |
103 | error ("%qs requires the %qs option", name, "-mcpu=power8"); | |
104 | break; | |
105 | case ENB_P8V: | |
106 | error ("%qs requires the %qs and %qs options", name, "-mcpu=power8", | |
107 | "-mvsx"); | |
108 | break; | |
109 | case ENB_P9: | |
110 | error ("%qs requires the %qs option", name, "-mcpu=power9"); | |
111 | break; | |
112 | case ENB_P9_64: | |
113 | error ("%qs requires the %qs option and either the %qs or %qs option", | |
114 | name, "-mcpu=power9", "-m64", "-mpowerpc64"); | |
115 | break; | |
116 | case ENB_P9V: | |
117 | error ("%qs requires the %qs and %qs options", name, "-mcpu=power9", | |
118 | "-mvsx"); | |
119 | break; | |
120 | case ENB_IEEE128_HW: | |
121 | error ("%qs requires quad-precision floating-point arithmetic", name); | |
122 | break; | |
123 | case ENB_DFP: | |
124 | error ("%qs requires the %qs option", name, "-mhard-dfp"); | |
125 | break; | |
126 | case ENB_CRYPTO: | |
127 | error ("%qs requires the %qs option", name, "-mcrypto"); | |
128 | break; | |
129 | case ENB_HTM: | |
130 | error ("%qs requires the %qs option", name, "-mhtm"); | |
131 | break; | |
132 | case ENB_P10: | |
133 | error ("%qs requires the %qs option", name, "-mcpu=power10"); | |
134 | break; | |
135 | case ENB_P10_64: | |
136 | error ("%qs requires the %qs option and either the %qs or %qs option", | |
137 | name, "-mcpu=power10", "-m64", "-mpowerpc64"); | |
138 | break; | |
139 | case ENB_MMA: | |
140 | error ("%qs requires the %qs option", name, "-mmma"); | |
141 | break; | |
142 | default: | |
143 | case ENB_ALWAYS: | |
144 | gcc_unreachable (); | |
145 | } | |
146 | } | |
147 | ||
148 | /* Check whether a builtin function is supported in this target | |
149 | configuration. */ | |
150 | bool | |
151 | rs6000_builtin_is_supported (enum rs6000_gen_builtins fncode) | |
152 | { | |
153 | switch (rs6000_builtin_info[(size_t) fncode].enable) | |
154 | { | |
155 | case ENB_ALWAYS: | |
156 | return true; | |
157 | case ENB_P5: | |
158 | return TARGET_POPCNTB; | |
159 | case ENB_P6: | |
160 | return TARGET_CMPB; | |
161 | case ENB_P6_64: | |
162 | return TARGET_CMPB && TARGET_POWERPC64; | |
163 | case ENB_P7: | |
164 | return TARGET_POPCNTD; | |
165 | case ENB_P7_64: | |
166 | return TARGET_POPCNTD && TARGET_POWERPC64; | |
167 | case ENB_P8: | |
168 | return TARGET_DIRECT_MOVE; | |
169 | case ENB_P8V: | |
170 | return TARGET_P8_VECTOR; | |
171 | case ENB_P9: | |
172 | return TARGET_MODULO; | |
173 | case ENB_P9_64: | |
174 | return TARGET_MODULO && TARGET_POWERPC64; | |
175 | case ENB_P9V: | |
176 | return TARGET_P9_VECTOR; | |
177 | case ENB_P10: | |
178 | return TARGET_POWER10; | |
179 | case ENB_P10_64: | |
180 | return TARGET_POWER10 && TARGET_POWERPC64; | |
181 | case ENB_ALTIVEC: | |
182 | return TARGET_ALTIVEC; | |
183 | case ENB_VSX: | |
184 | return TARGET_VSX; | |
185 | case ENB_CELL: | |
186 | return TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL; | |
187 | case ENB_IEEE128_HW: | |
188 | return TARGET_FLOAT128_HW; | |
189 | case ENB_DFP: | |
190 | return TARGET_DFP; | |
191 | case ENB_CRYPTO: | |
192 | return TARGET_CRYPTO; | |
193 | case ENB_HTM: | |
194 | return TARGET_HTM; | |
195 | case ENB_MMA: | |
196 | return TARGET_MMA; | |
197 | default: | |
198 | gcc_unreachable (); | |
199 | } | |
200 | gcc_unreachable (); | |
201 | } | |
202 | ||
203 | /* Target hook for early folding of built-ins, shamelessly stolen | |
204 | from ia64.cc. */ | |
205 | ||
206 | tree | |
207 | rs6000_fold_builtin (tree fndecl ATTRIBUTE_UNUSED, | |
208 | int n_args ATTRIBUTE_UNUSED, | |
209 | tree *args ATTRIBUTE_UNUSED, | |
210 | bool ignore ATTRIBUTE_UNUSED) | |
211 | { | |
212 | #ifdef SUBTARGET_FOLD_BUILTIN | |
213 | return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore); | |
214 | #else | |
215 | return NULL_TREE; | |
216 | #endif | |
217 | } | |
218 | ||
219 | tree | |
220 | rs6000_builtin_decl (unsigned code, bool /* initialize_p */) | |
221 | { | |
222 | rs6000_gen_builtins fcode = (rs6000_gen_builtins) code; | |
223 | ||
224 | if (fcode >= RS6000_OVLD_MAX) | |
225 | return error_mark_node; | |
226 | ||
227 | return rs6000_builtin_decls[code]; | |
228 | } | |
229 | ||
230 | /* Implement targetm.vectorize.builtin_mask_for_load. */ | |
231 | tree | |
232 | rs6000_builtin_mask_for_load (void) | |
233 | { | |
234 | /* Don't use lvsl/vperm for P8 and similarly efficient machines. */ | |
235 | if ((TARGET_ALTIVEC && !TARGET_VSX) | |
236 | || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX)) | |
237 | return altivec_builtin_mask_for_load; | |
238 | else | |
239 | return 0; | |
240 | } | |
241 | ||
242 | /* Implement targetm.vectorize.builtin_md_vectorized_function. */ | |
243 | ||
244 | tree | |
245 | rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out, | |
246 | tree type_in) | |
247 | { | |
248 | machine_mode in_mode, out_mode; | |
249 | int in_n, out_n; | |
250 | ||
251 | if (TARGET_DEBUG_BUILTIN) | |
252 | fprintf (stderr, | |
253 | "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n", | |
254 | IDENTIFIER_POINTER (DECL_NAME (fndecl)), | |
255 | GET_MODE_NAME (TYPE_MODE (type_out)), | |
256 | GET_MODE_NAME (TYPE_MODE (type_in))); | |
257 | ||
258 | /* TODO: Should this be gcc_assert? */ | |
259 | if (TREE_CODE (type_out) != VECTOR_TYPE | |
260 | || TREE_CODE (type_in) != VECTOR_TYPE) | |
261 | return NULL_TREE; | |
262 | ||
263 | out_mode = TYPE_MODE (TREE_TYPE (type_out)); | |
264 | out_n = TYPE_VECTOR_SUBPARTS (type_out); | |
265 | in_mode = TYPE_MODE (TREE_TYPE (type_in)); | |
266 | in_n = TYPE_VECTOR_SUBPARTS (type_in); | |
267 | ||
268 | enum rs6000_gen_builtins fn | |
269 | = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl); | |
270 | switch (fn) | |
271 | { | |
272 | case RS6000_BIF_RSQRTF: | |
273 | if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode) | |
274 | && out_mode == SFmode && out_n == 4 | |
275 | && in_mode == SFmode && in_n == 4) | |
276 | return rs6000_builtin_decls[RS6000_BIF_VRSQRTFP]; | |
277 | break; | |
278 | case RS6000_BIF_RSQRT: | |
279 | if (VECTOR_UNIT_VSX_P (V2DFmode) | |
280 | && out_mode == DFmode && out_n == 2 | |
281 | && in_mode == DFmode && in_n == 2) | |
282 | return rs6000_builtin_decls[RS6000_BIF_RSQRT_2DF]; | |
283 | break; | |
284 | case RS6000_BIF_RECIPF: | |
285 | if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode) | |
286 | && out_mode == SFmode && out_n == 4 | |
287 | && in_mode == SFmode && in_n == 4) | |
288 | return rs6000_builtin_decls[RS6000_BIF_VRECIPFP]; | |
289 | break; | |
290 | case RS6000_BIF_RECIP: | |
291 | if (VECTOR_UNIT_VSX_P (V2DFmode) | |
292 | && out_mode == DFmode && out_n == 2 | |
293 | && in_mode == DFmode && in_n == 2) | |
294 | return rs6000_builtin_decls[RS6000_BIF_RECIP_V2DF]; | |
295 | break; | |
296 | default: | |
297 | break; | |
298 | } | |
299 | ||
300 | machine_mode in_vmode = TYPE_MODE (type_in); | |
301 | machine_mode out_vmode = TYPE_MODE (type_out); | |
302 | ||
303 | /* Power10 supported vectorized built-in functions. */ | |
304 | if (TARGET_POWER10 | |
305 | && in_vmode == out_vmode | |
306 | && VECTOR_UNIT_ALTIVEC_OR_VSX_P (in_vmode)) | |
307 | { | |
308 | machine_mode exp_mode = DImode; | |
309 | machine_mode exp_vmode = V2DImode; | |
310 | enum rs6000_gen_builtins bif; | |
311 | switch (fn) | |
312 | { | |
313 | case RS6000_BIF_DIVWE: | |
314 | case RS6000_BIF_DIVWEU: | |
315 | exp_mode = SImode; | |
316 | exp_vmode = V4SImode; | |
317 | if (fn == RS6000_BIF_DIVWE) | |
318 | bif = RS6000_BIF_VDIVESW; | |
319 | else | |
320 | bif = RS6000_BIF_VDIVEUW; | |
321 | break; | |
322 | case RS6000_BIF_DIVDE: | |
323 | case RS6000_BIF_DIVDEU: | |
324 | if (fn == RS6000_BIF_DIVDE) | |
325 | bif = RS6000_BIF_VDIVESD; | |
326 | else | |
327 | bif = RS6000_BIF_VDIVEUD; | |
328 | break; | |
329 | case RS6000_BIF_CFUGED: | |
330 | bif = RS6000_BIF_VCFUGED; | |
331 | break; | |
332 | case RS6000_BIF_CNTLZDM: | |
333 | bif = RS6000_BIF_VCLZDM; | |
334 | break; | |
335 | case RS6000_BIF_CNTTZDM: | |
336 | bif = RS6000_BIF_VCTZDM; | |
337 | break; | |
338 | case RS6000_BIF_PDEPD: | |
339 | bif = RS6000_BIF_VPDEPD; | |
340 | break; | |
341 | case RS6000_BIF_PEXTD: | |
342 | bif = RS6000_BIF_VPEXTD; | |
343 | break; | |
344 | default: | |
345 | return NULL_TREE; | |
346 | } | |
347 | ||
348 | if (in_mode == exp_mode && in_vmode == exp_vmode) | |
349 | return rs6000_builtin_decls[bif]; | |
350 | } | |
351 | ||
352 | return NULL_TREE; | |
353 | } | |
354 | ||
355 | /* Returns a code for a target-specific builtin that implements | |
356 | reciprocal of the function, or NULL_TREE if not available. */ | |
357 | ||
358 | tree | |
359 | rs6000_builtin_reciprocal (tree fndecl) | |
360 | { | |
361 | switch (DECL_MD_FUNCTION_CODE (fndecl)) | |
362 | { | |
363 | case RS6000_BIF_XVSQRTDP: | |
364 | if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode)) | |
365 | return NULL_TREE; | |
366 | ||
367 | return rs6000_builtin_decls[RS6000_BIF_RSQRT_2DF]; | |
368 | ||
369 | case RS6000_BIF_XVSQRTSP: | |
370 | if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode)) | |
371 | return NULL_TREE; | |
372 | ||
373 | return rs6000_builtin_decls[RS6000_BIF_RSQRT_4SF]; | |
374 | ||
375 | default: | |
376 | return NULL_TREE; | |
377 | } | |
378 | } | |
379 | ||
380 | /* **** Initialization support **** */ | |
381 | ||
382 | /* Create a builtin vector type with a name. Taking care not to give | |
383 | the canonical type a name. */ | |
384 | ||
385 | static tree | |
386 | rs6000_vector_type (const char *name, tree elt_type, unsigned num_elts) | |
387 | { | |
388 | tree result = build_vector_type (elt_type, num_elts); | |
389 | ||
390 | /* Copy so we don't give the canonical type a name. */ | |
391 | result = build_variant_type_copy (result); | |
392 | ||
393 | add_builtin_type (name, result); | |
394 | ||
395 | return result; | |
396 | } | |
397 | ||
398 | /* Debug utility to translate a type node to a single textual token. */ | |
399 | static | |
400 | const char *rs6000_type_string (tree type_node) | |
401 | { | |
6f8abf2b JJ |
402 | if (type_node == NULL_TREE) |
403 | return "**NULL**"; | |
404 | else if (type_node == void_type_node) | |
eecee223 BS |
405 | return "void"; |
406 | else if (type_node == long_integer_type_node) | |
407 | return "long"; | |
408 | else if (type_node == long_unsigned_type_node) | |
409 | return "ulong"; | |
410 | else if (type_node == long_long_integer_type_node) | |
411 | return "longlong"; | |
412 | else if (type_node == long_long_unsigned_type_node) | |
413 | return "ulonglong"; | |
414 | else if (type_node == bool_V2DI_type_node) | |
415 | return "vbll"; | |
416 | else if (type_node == bool_V4SI_type_node) | |
417 | return "vbi"; | |
418 | else if (type_node == bool_V8HI_type_node) | |
419 | return "vbs"; | |
420 | else if (type_node == bool_V16QI_type_node) | |
421 | return "vbc"; | |
422 | else if (type_node == bool_int_type_node) | |
423 | return "bool"; | |
424 | else if (type_node == dfloat64_type_node) | |
425 | return "_Decimal64"; | |
426 | else if (type_node == double_type_node) | |
427 | return "double"; | |
428 | else if (type_node == intDI_type_node) | |
429 | return "sll"; | |
430 | else if (type_node == intHI_type_node) | |
431 | return "ss"; | |
432 | else if (type_node == ibm128_float_type_node) | |
433 | return "__ibm128"; | |
6f8abf2b JJ |
434 | else if (type_node == ieee128_float_type_node) |
435 | return "__ieee128"; | |
eecee223 BS |
436 | else if (type_node == opaque_V4SI_type_node) |
437 | return "opaque"; | |
438 | else if (POINTER_TYPE_P (type_node)) | |
439 | return "void*"; | |
440 | else if (type_node == intQI_type_node || type_node == char_type_node) | |
441 | return "sc"; | |
442 | else if (type_node == dfloat32_type_node) | |
443 | return "_Decimal32"; | |
444 | else if (type_node == float_type_node) | |
445 | return "float"; | |
446 | else if (type_node == intSI_type_node || type_node == integer_type_node) | |
447 | return "si"; | |
448 | else if (type_node == dfloat128_type_node) | |
449 | return "_Decimal128"; | |
450 | else if (type_node == long_double_type_node) | |
451 | return "longdouble"; | |
452 | else if (type_node == intTI_type_node) | |
453 | return "sq"; | |
454 | else if (type_node == unsigned_intDI_type_node) | |
455 | return "ull"; | |
456 | else if (type_node == unsigned_intHI_type_node) | |
457 | return "us"; | |
458 | else if (type_node == unsigned_intQI_type_node) | |
459 | return "uc"; | |
460 | else if (type_node == unsigned_intSI_type_node) | |
461 | return "ui"; | |
462 | else if (type_node == unsigned_intTI_type_node) | |
463 | return "uq"; | |
464 | else if (type_node == unsigned_V1TI_type_node) | |
465 | return "vuq"; | |
466 | else if (type_node == unsigned_V2DI_type_node) | |
467 | return "vull"; | |
468 | else if (type_node == unsigned_V4SI_type_node) | |
469 | return "vui"; | |
470 | else if (type_node == unsigned_V8HI_type_node) | |
471 | return "vus"; | |
472 | else if (type_node == unsigned_V16QI_type_node) | |
473 | return "vuc"; | |
474 | else if (type_node == V16QI_type_node) | |
475 | return "vsc"; | |
476 | else if (type_node == V1TI_type_node) | |
477 | return "vsq"; | |
478 | else if (type_node == V2DF_type_node) | |
479 | return "vd"; | |
480 | else if (type_node == V2DI_type_node) | |
481 | return "vsll"; | |
482 | else if (type_node == V4SF_type_node) | |
483 | return "vf"; | |
484 | else if (type_node == V4SI_type_node) | |
485 | return "vsi"; | |
486 | else if (type_node == V8HI_type_node) | |
487 | return "vss"; | |
488 | else if (type_node == pixel_V8HI_type_node) | |
489 | return "vp"; | |
490 | else if (type_node == pcvoid_type_node) | |
491 | return "voidc*"; | |
492 | else if (type_node == float128_type_node) | |
493 | return "_Float128"; | |
494 | else if (type_node == vector_pair_type_node) | |
495 | return "__vector_pair"; | |
496 | else if (type_node == vector_quad_type_node) | |
497 | return "__vector_quad"; | |
498 | ||
499 | return "unknown"; | |
500 | } | |
501 | ||
502 | void | |
503 | rs6000_init_builtins (void) | |
504 | { | |
505 | tree tdecl; | |
506 | tree t; | |
507 | ||
508 | if (TARGET_DEBUG_BUILTIN) | |
509 | fprintf (stderr, "rs6000_init_builtins%s%s\n", | |
510 | (TARGET_ALTIVEC) ? ", altivec" : "", | |
511 | (TARGET_VSX) ? ", vsx" : ""); | |
512 | ||
513 | V2DI_type_node = rs6000_vector_type ("__vector long long", | |
514 | long_long_integer_type_node, 2); | |
515 | ptr_V2DI_type_node | |
516 | = build_pointer_type (build_qualified_type (V2DI_type_node, | |
517 | TYPE_QUAL_CONST)); | |
518 | ||
519 | V2DF_type_node = rs6000_vector_type ("__vector double", double_type_node, 2); | |
520 | ptr_V2DF_type_node | |
521 | = build_pointer_type (build_qualified_type (V2DF_type_node, | |
522 | TYPE_QUAL_CONST)); | |
523 | ||
524 | V4SI_type_node = rs6000_vector_type ("__vector signed int", | |
525 | intSI_type_node, 4); | |
526 | ptr_V4SI_type_node | |
527 | = build_pointer_type (build_qualified_type (V4SI_type_node, | |
528 | TYPE_QUAL_CONST)); | |
529 | ||
530 | V4SF_type_node = rs6000_vector_type ("__vector float", float_type_node, 4); | |
531 | ptr_V4SF_type_node | |
532 | = build_pointer_type (build_qualified_type (V4SF_type_node, | |
533 | TYPE_QUAL_CONST)); | |
534 | ||
535 | V8HI_type_node = rs6000_vector_type ("__vector signed short", | |
536 | intHI_type_node, 8); | |
537 | ptr_V8HI_type_node | |
538 | = build_pointer_type (build_qualified_type (V8HI_type_node, | |
539 | TYPE_QUAL_CONST)); | |
540 | ||
541 | V16QI_type_node = rs6000_vector_type ("__vector signed char", | |
542 | intQI_type_node, 16); | |
543 | ptr_V16QI_type_node | |
544 | = build_pointer_type (build_qualified_type (V16QI_type_node, | |
545 | TYPE_QUAL_CONST)); | |
546 | ||
547 | unsigned_V16QI_type_node = rs6000_vector_type ("__vector unsigned char", | |
548 | unsigned_intQI_type_node, 16); | |
549 | ptr_unsigned_V16QI_type_node | |
550 | = build_pointer_type (build_qualified_type (unsigned_V16QI_type_node, | |
551 | TYPE_QUAL_CONST)); | |
552 | ||
553 | unsigned_V8HI_type_node = rs6000_vector_type ("__vector unsigned short", | |
554 | unsigned_intHI_type_node, 8); | |
555 | ptr_unsigned_V8HI_type_node | |
556 | = build_pointer_type (build_qualified_type (unsigned_V8HI_type_node, | |
557 | TYPE_QUAL_CONST)); | |
558 | ||
559 | unsigned_V4SI_type_node = rs6000_vector_type ("__vector unsigned int", | |
560 | unsigned_intSI_type_node, 4); | |
561 | ptr_unsigned_V4SI_type_node | |
562 | = build_pointer_type (build_qualified_type (unsigned_V4SI_type_node, | |
563 | TYPE_QUAL_CONST)); | |
564 | ||
565 | unsigned_V2DI_type_node | |
566 | = rs6000_vector_type ("__vector unsigned long long", | |
567 | long_long_unsigned_type_node, 2); | |
568 | ||
569 | ptr_unsigned_V2DI_type_node | |
570 | = build_pointer_type (build_qualified_type (unsigned_V2DI_type_node, | |
571 | TYPE_QUAL_CONST)); | |
572 | ||
573 | opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4); | |
574 | ||
575 | const_str_type_node | |
576 | = build_pointer_type (build_qualified_type (char_type_node, | |
577 | TYPE_QUAL_CONST)); | |
578 | ||
579 | /* We use V1TI mode as a special container to hold __int128_t items that | |
580 | must live in VSX registers. */ | |
581 | if (intTI_type_node) | |
582 | { | |
583 | V1TI_type_node = rs6000_vector_type ("__vector __int128", | |
584 | intTI_type_node, 1); | |
585 | ptr_V1TI_type_node | |
586 | = build_pointer_type (build_qualified_type (V1TI_type_node, | |
587 | TYPE_QUAL_CONST)); | |
588 | unsigned_V1TI_type_node | |
589 | = rs6000_vector_type ("__vector unsigned __int128", | |
590 | unsigned_intTI_type_node, 1); | |
591 | ptr_unsigned_V1TI_type_node | |
592 | = build_pointer_type (build_qualified_type (unsigned_V1TI_type_node, | |
593 | TYPE_QUAL_CONST)); | |
594 | } | |
595 | ||
596 | /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...' | |
597 | types, especially in C++ land. Similarly, 'vector pixel' is distinct from | |
598 | 'vector unsigned short'. */ | |
599 | ||
600 | bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node); | |
601 | bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node); | |
602 | bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node); | |
603 | bool_long_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node); | |
604 | pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node); | |
605 | ||
606 | long_integer_type_internal_node = long_integer_type_node; | |
607 | long_unsigned_type_internal_node = long_unsigned_type_node; | |
608 | long_long_integer_type_internal_node = long_long_integer_type_node; | |
609 | long_long_unsigned_type_internal_node = long_long_unsigned_type_node; | |
610 | intQI_type_internal_node = intQI_type_node; | |
611 | uintQI_type_internal_node = unsigned_intQI_type_node; | |
612 | intHI_type_internal_node = intHI_type_node; | |
613 | uintHI_type_internal_node = unsigned_intHI_type_node; | |
614 | intSI_type_internal_node = intSI_type_node; | |
615 | uintSI_type_internal_node = unsigned_intSI_type_node; | |
616 | intDI_type_internal_node = intDI_type_node; | |
617 | uintDI_type_internal_node = unsigned_intDI_type_node; | |
618 | intTI_type_internal_node = intTI_type_node; | |
619 | uintTI_type_internal_node = unsigned_intTI_type_node; | |
620 | float_type_internal_node = float_type_node; | |
621 | double_type_internal_node = double_type_node; | |
622 | long_double_type_internal_node = long_double_type_node; | |
623 | dfloat64_type_internal_node = dfloat64_type_node; | |
624 | dfloat128_type_internal_node = dfloat128_type_node; | |
625 | void_type_internal_node = void_type_node; | |
626 | ||
627 | ptr_intQI_type_node | |
628 | = build_pointer_type (build_qualified_type (intQI_type_internal_node, | |
629 | TYPE_QUAL_CONST)); | |
630 | ptr_uintQI_type_node | |
631 | = build_pointer_type (build_qualified_type (uintQI_type_internal_node, | |
632 | TYPE_QUAL_CONST)); | |
633 | ptr_intHI_type_node | |
634 | = build_pointer_type (build_qualified_type (intHI_type_internal_node, | |
635 | TYPE_QUAL_CONST)); | |
636 | ptr_uintHI_type_node | |
637 | = build_pointer_type (build_qualified_type (uintHI_type_internal_node, | |
638 | TYPE_QUAL_CONST)); | |
639 | ptr_intSI_type_node | |
640 | = build_pointer_type (build_qualified_type (intSI_type_internal_node, | |
641 | TYPE_QUAL_CONST)); | |
642 | ptr_uintSI_type_node | |
643 | = build_pointer_type (build_qualified_type (uintSI_type_internal_node, | |
644 | TYPE_QUAL_CONST)); | |
645 | ptr_intDI_type_node | |
646 | = build_pointer_type (build_qualified_type (intDI_type_internal_node, | |
647 | TYPE_QUAL_CONST)); | |
648 | ptr_uintDI_type_node | |
649 | = build_pointer_type (build_qualified_type (uintDI_type_internal_node, | |
650 | TYPE_QUAL_CONST)); | |
651 | ptr_intTI_type_node | |
652 | = build_pointer_type (build_qualified_type (intTI_type_internal_node, | |
653 | TYPE_QUAL_CONST)); | |
654 | ptr_uintTI_type_node | |
655 | = build_pointer_type (build_qualified_type (uintTI_type_internal_node, | |
656 | TYPE_QUAL_CONST)); | |
657 | ||
658 | t = build_qualified_type (long_integer_type_internal_node, TYPE_QUAL_CONST); | |
659 | ptr_long_integer_type_node = build_pointer_type (t); | |
660 | ||
661 | t = build_qualified_type (long_unsigned_type_internal_node, TYPE_QUAL_CONST); | |
662 | ptr_long_unsigned_type_node = build_pointer_type (t); | |
663 | ||
664 | ptr_float_type_node | |
665 | = build_pointer_type (build_qualified_type (float_type_internal_node, | |
666 | TYPE_QUAL_CONST)); | |
667 | ptr_double_type_node | |
668 | = build_pointer_type (build_qualified_type (double_type_internal_node, | |
669 | TYPE_QUAL_CONST)); | |
670 | ptr_long_double_type_node | |
671 | = build_pointer_type (build_qualified_type (long_double_type_internal_node, | |
672 | TYPE_QUAL_CONST)); | |
673 | if (dfloat64_type_node) | |
674 | { | |
675 | t = build_qualified_type (dfloat64_type_internal_node, TYPE_QUAL_CONST); | |
676 | ptr_dfloat64_type_node = build_pointer_type (t); | |
677 | } | |
678 | else | |
679 | ptr_dfloat64_type_node = NULL; | |
680 | ||
681 | if (dfloat128_type_node) | |
682 | { | |
683 | t = build_qualified_type (dfloat128_type_internal_node, TYPE_QUAL_CONST); | |
684 | ptr_dfloat128_type_node = build_pointer_type (t); | |
685 | } | |
686 | else | |
687 | ptr_dfloat128_type_node = NULL; | |
688 | ||
689 | t = build_qualified_type (long_long_integer_type_internal_node, | |
690 | TYPE_QUAL_CONST); | |
691 | ptr_long_long_integer_type_node = build_pointer_type (t); | |
692 | ||
693 | t = build_qualified_type (long_long_unsigned_type_internal_node, | |
694 | TYPE_QUAL_CONST); | |
695 | ptr_long_long_unsigned_type_node = build_pointer_type (t); | |
696 | ||
697 | /* 128-bit floating point support. KFmode is IEEE 128-bit floating point. | |
698 | IFmode is the IBM extended 128-bit format that is a pair of doubles. | |
699 | TFmode will be either IEEE 128-bit floating point or the IBM double-double | |
700 | format that uses a pair of doubles, depending on the switches and | |
701 | defaults. | |
702 | ||
703 | If we don't support for either 128-bit IBM double double or IEEE 128-bit | |
704 | floating point, we need make sure the type is non-zero or else self-test | |
705 | fails during bootstrap. | |
706 | ||
707 | Always create __ibm128 as a separate type, even if the current long double | |
708 | format is IBM extended double. | |
709 | ||
710 | For IEEE 128-bit floating point, always create the type __ieee128. If the | |
711 | user used -mfloat128, rs6000-c.cc will create a define from __float128 to | |
712 | __ieee128. */ | |
6f8abf2b | 713 | if (TARGET_LONG_DOUBLE_128 && (!TARGET_IEEEQUAD || TARGET_FLOAT128_TYPE)) |
eecee223 | 714 | { |
6f8abf2b | 715 | if (!TARGET_IEEEQUAD) |
eecee223 BS |
716 | ibm128_float_type_node = long_double_type_node; |
717 | else | |
718 | { | |
719 | ibm128_float_type_node = make_node (REAL_TYPE); | |
720 | TYPE_PRECISION (ibm128_float_type_node) = 128; | |
721 | SET_TYPE_MODE (ibm128_float_type_node, IFmode); | |
722 | layout_type (ibm128_float_type_node); | |
723 | } | |
724 | t = build_qualified_type (ibm128_float_type_node, TYPE_QUAL_CONST); | |
eecee223 BS |
725 | lang_hooks.types.register_builtin_type (ibm128_float_type_node, |
726 | "__ibm128"); | |
6f8abf2b JJ |
727 | } |
728 | else | |
729 | ibm128_float_type_node = NULL_TREE; | |
eecee223 | 730 | |
6f8abf2b JJ |
731 | if (TARGET_FLOAT128_TYPE) |
732 | { | |
eecee223 BS |
733 | if (TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128) |
734 | ieee128_float_type_node = long_double_type_node; | |
735 | else | |
b0420889 JJ |
736 | { |
737 | /* For C we only need to register the __ieee128 name for | |
738 | it. For C++, we create a distinct type which will mangle | |
739 | differently (u9__ieee128) vs. _Float128 (DF128_) and behave | |
740 | backwards compatibly. */ | |
741 | if (float128t_type_node == NULL_TREE) | |
742 | { | |
743 | float128t_type_node = make_node (REAL_TYPE); | |
744 | TYPE_PRECISION (float128t_type_node) | |
745 | = TYPE_PRECISION (float128_type_node); | |
746 | layout_type (float128t_type_node); | |
747 | SET_TYPE_MODE (float128t_type_node, | |
748 | TYPE_MODE (float128_type_node)); | |
749 | } | |
750 | ieee128_float_type_node = float128t_type_node; | |
751 | } | |
eecee223 | 752 | t = build_qualified_type (ieee128_float_type_node, TYPE_QUAL_CONST); |
eecee223 BS |
753 | lang_hooks.types.register_builtin_type (ieee128_float_type_node, |
754 | "__ieee128"); | |
755 | } | |
eecee223 | 756 | else |
6f8abf2b | 757 | ieee128_float_type_node = NULL_TREE; |
eecee223 BS |
758 | |
759 | /* Vector pair and vector quad support. */ | |
760 | vector_pair_type_node = make_node (OPAQUE_TYPE); | |
761 | SET_TYPE_MODE (vector_pair_type_node, OOmode); | |
762 | TYPE_SIZE (vector_pair_type_node) = bitsize_int (GET_MODE_BITSIZE (OOmode)); | |
763 | TYPE_PRECISION (vector_pair_type_node) = GET_MODE_BITSIZE (OOmode); | |
764 | TYPE_SIZE_UNIT (vector_pair_type_node) = size_int (GET_MODE_SIZE (OOmode)); | |
765 | SET_TYPE_ALIGN (vector_pair_type_node, 256); | |
766 | TYPE_USER_ALIGN (vector_pair_type_node) = 0; | |
767 | lang_hooks.types.register_builtin_type (vector_pair_type_node, | |
768 | "__vector_pair"); | |
769 | t = build_qualified_type (vector_pair_type_node, TYPE_QUAL_CONST); | |
770 | ptr_vector_pair_type_node = build_pointer_type (t); | |
771 | ||
772 | vector_quad_type_node = make_node (OPAQUE_TYPE); | |
773 | SET_TYPE_MODE (vector_quad_type_node, XOmode); | |
774 | TYPE_SIZE (vector_quad_type_node) = bitsize_int (GET_MODE_BITSIZE (XOmode)); | |
775 | TYPE_PRECISION (vector_quad_type_node) = GET_MODE_BITSIZE (XOmode); | |
776 | TYPE_SIZE_UNIT (vector_quad_type_node) = size_int (GET_MODE_SIZE (XOmode)); | |
777 | SET_TYPE_ALIGN (vector_quad_type_node, 512); | |
778 | TYPE_USER_ALIGN (vector_quad_type_node) = 0; | |
779 | lang_hooks.types.register_builtin_type (vector_quad_type_node, | |
780 | "__vector_quad"); | |
781 | t = build_qualified_type (vector_quad_type_node, TYPE_QUAL_CONST); | |
782 | ptr_vector_quad_type_node = build_pointer_type (t); | |
783 | ||
784 | tdecl = add_builtin_type ("__bool char", bool_char_type_node); | |
785 | TYPE_NAME (bool_char_type_node) = tdecl; | |
786 | ||
787 | tdecl = add_builtin_type ("__bool short", bool_short_type_node); | |
788 | TYPE_NAME (bool_short_type_node) = tdecl; | |
789 | ||
790 | tdecl = add_builtin_type ("__bool int", bool_int_type_node); | |
791 | TYPE_NAME (bool_int_type_node) = tdecl; | |
792 | ||
793 | tdecl = add_builtin_type ("__pixel", pixel_type_node); | |
794 | TYPE_NAME (pixel_type_node) = tdecl; | |
795 | ||
796 | bool_V16QI_type_node = rs6000_vector_type ("__vector __bool char", | |
797 | bool_char_type_node, 16); | |
798 | ptr_bool_V16QI_type_node | |
799 | = build_pointer_type (build_qualified_type (bool_V16QI_type_node, | |
800 | TYPE_QUAL_CONST)); | |
801 | ||
802 | bool_V8HI_type_node = rs6000_vector_type ("__vector __bool short", | |
803 | bool_short_type_node, 8); | |
804 | ptr_bool_V8HI_type_node | |
805 | = build_pointer_type (build_qualified_type (bool_V8HI_type_node, | |
806 | TYPE_QUAL_CONST)); | |
807 | ||
808 | bool_V4SI_type_node = rs6000_vector_type ("__vector __bool int", | |
809 | bool_int_type_node, 4); | |
810 | ptr_bool_V4SI_type_node | |
811 | = build_pointer_type (build_qualified_type (bool_V4SI_type_node, | |
812 | TYPE_QUAL_CONST)); | |
813 | ||
814 | bool_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64 | |
815 | ? "__vector __bool long" | |
816 | : "__vector __bool long long", | |
817 | bool_long_long_type_node, 2); | |
818 | ptr_bool_V2DI_type_node | |
819 | = build_pointer_type (build_qualified_type (bool_V2DI_type_node, | |
820 | TYPE_QUAL_CONST)); | |
821 | ||
822 | bool_V1TI_type_node = rs6000_vector_type ("__vector __bool __int128", | |
823 | intTI_type_node, 1); | |
824 | ptr_bool_V1TI_type_node | |
825 | = build_pointer_type (build_qualified_type (bool_V1TI_type_node, | |
826 | TYPE_QUAL_CONST)); | |
827 | ||
828 | pixel_V8HI_type_node = rs6000_vector_type ("__vector __pixel", | |
829 | pixel_type_node, 8); | |
830 | ptr_pixel_V8HI_type_node | |
831 | = build_pointer_type (build_qualified_type (pixel_V8HI_type_node, | |
832 | TYPE_QUAL_CONST)); | |
833 | pcvoid_type_node | |
834 | = build_pointer_type (build_qualified_type (void_type_node, | |
835 | TYPE_QUAL_CONST)); | |
836 | ||
837 | /* Execute the autogenerated initialization code for builtins. */ | |
838 | rs6000_init_generated_builtins (); | |
839 | ||
840 | if (TARGET_DEBUG_BUILTIN) | |
841 | { | |
842 | fprintf (stderr, "\nAutogenerated built-in functions:\n\n"); | |
843 | for (int i = 1; i < (int) RS6000_BIF_MAX; i++) | |
844 | { | |
b22086c2 KL |
845 | enum rs6000_gen_builtins fn_code = (enum rs6000_gen_builtins) i; |
846 | if (!rs6000_builtin_is_supported (fn_code)) | |
eecee223 BS |
847 | continue; |
848 | tree fntype = rs6000_builtin_info[i].fntype; | |
849 | tree t = TREE_TYPE (fntype); | |
850 | fprintf (stderr, "%s %s (", rs6000_type_string (t), | |
851 | rs6000_builtin_info[i].bifname); | |
852 | t = TYPE_ARG_TYPES (fntype); | |
853 | while (t && TREE_VALUE (t) != void_type_node) | |
854 | { | |
855 | fprintf (stderr, "%s", | |
856 | rs6000_type_string (TREE_VALUE (t))); | |
857 | t = TREE_CHAIN (t); | |
858 | if (t && TREE_VALUE (t) != void_type_node) | |
859 | fprintf (stderr, ", "); | |
860 | } | |
861 | fprintf (stderr, "); %s [%4d]\n", | |
862 | rs6000_builtin_info[i].attr_string, (int) i); | |
863 | } | |
864 | fprintf (stderr, "\nEnd autogenerated built-in functions.\n\n\n"); | |
865 | } | |
866 | ||
867 | if (TARGET_XCOFF) | |
868 | { | |
869 | /* AIX libm provides clog as __clog. */ | |
870 | if ((tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE) | |
871 | set_user_assembler_name (tdecl, "__clog"); | |
872 | ||
873 | /* When long double is 64 bit, some long double builtins of libc | |
874 | functions (like __builtin_frexpl) must call the double version | |
875 | (frexp) not the long double version (frexpl) that expects a 128 bit | |
876 | argument. */ | |
877 | if (! TARGET_LONG_DOUBLE_128) | |
878 | { | |
879 | if ((tdecl = builtin_decl_explicit (BUILT_IN_FMODL)) != NULL_TREE) | |
880 | set_user_assembler_name (tdecl, "fmod"); | |
881 | if ((tdecl = builtin_decl_explicit (BUILT_IN_FREXPL)) != NULL_TREE) | |
882 | set_user_assembler_name (tdecl, "frexp"); | |
883 | if ((tdecl = builtin_decl_explicit (BUILT_IN_LDEXPL)) != NULL_TREE) | |
884 | set_user_assembler_name (tdecl, "ldexp"); | |
885 | if ((tdecl = builtin_decl_explicit (BUILT_IN_MODFL)) != NULL_TREE) | |
886 | set_user_assembler_name (tdecl, "modf"); | |
887 | } | |
888 | } | |
889 | ||
890 | altivec_builtin_mask_for_load | |
891 | = rs6000_builtin_decls[RS6000_BIF_MASK_FOR_LOAD]; | |
892 | ||
893 | #ifdef SUBTARGET_INIT_BUILTINS | |
894 | SUBTARGET_INIT_BUILTINS; | |
895 | #endif | |
896 | ||
897 | return; | |
898 | } | |
899 | ||
900 | /* **** GIMPLE folding support **** */ | |
901 | ||
902 | /* Helper function to handle the gimple folding of a vector compare | |
903 | operation. This sets up true/false vectors, and uses the | |
904 | VEC_COND_EXPR operation. | |
905 | CODE indicates which comparison is to be made. (EQ, GT, ...). | |
906 | TYPE indicates the type of the result. | |
907 | Code is inserted before GSI. */ | |
908 | static tree | |
909 | fold_build_vec_cmp (tree_code code, tree type, tree arg0, tree arg1, | |
910 | gimple_stmt_iterator *gsi) | |
911 | { | |
912 | tree cmp_type = truth_type_for (type); | |
913 | tree zero_vec = build_zero_cst (type); | |
914 | tree minus_one_vec = build_minus_one_cst (type); | |
915 | tree temp = create_tmp_reg_or_ssa_name (cmp_type); | |
916 | gimple *g = gimple_build_assign (temp, code, arg0, arg1); | |
917 | gsi_insert_before (gsi, g, GSI_SAME_STMT); | |
918 | return fold_build3 (VEC_COND_EXPR, type, temp, minus_one_vec, zero_vec); | |
919 | } | |
920 | ||
921 | /* Helper function to handle the in-between steps for the | |
922 | vector compare built-ins. */ | |
923 | static void | |
924 | fold_compare_helper (gimple_stmt_iterator *gsi, tree_code code, gimple *stmt) | |
925 | { | |
926 | tree arg0 = gimple_call_arg (stmt, 0); | |
927 | tree arg1 = gimple_call_arg (stmt, 1); | |
928 | tree lhs = gimple_call_lhs (stmt); | |
929 | tree cmp = fold_build_vec_cmp (code, TREE_TYPE (lhs), arg0, arg1, gsi); | |
930 | gimple *g = gimple_build_assign (lhs, cmp); | |
931 | gimple_set_location (g, gimple_location (stmt)); | |
932 | gsi_replace (gsi, g, true); | |
933 | } | |
934 | ||
935 | /* Helper function to map V2DF and V4SF types to their | |
936 | integral equivalents (V2DI and V4SI). */ | |
937 | tree map_to_integral_tree_type (tree input_tree_type) | |
938 | { | |
939 | if (INTEGRAL_TYPE_P (TREE_TYPE (input_tree_type))) | |
940 | return input_tree_type; | |
941 | else | |
942 | { | |
943 | if (types_compatible_p (TREE_TYPE (input_tree_type), | |
944 | TREE_TYPE (V2DF_type_node))) | |
945 | return V2DI_type_node; | |
946 | else if (types_compatible_p (TREE_TYPE (input_tree_type), | |
947 | TREE_TYPE (V4SF_type_node))) | |
948 | return V4SI_type_node; | |
949 | else | |
950 | gcc_unreachable (); | |
951 | } | |
952 | } | |
953 | ||
954 | /* Helper function to handle the vector merge[hl] built-ins. The | |
955 | implementation difference between h and l versions for this code are in | |
956 | the values used when building of the permute vector for high word versus | |
957 | low word merge. The variance is keyed off the use_high parameter. */ | |
958 | static void | |
959 | fold_mergehl_helper (gimple_stmt_iterator *gsi, gimple *stmt, int use_high) | |
960 | { | |
961 | tree arg0 = gimple_call_arg (stmt, 0); | |
962 | tree arg1 = gimple_call_arg (stmt, 1); | |
963 | tree lhs = gimple_call_lhs (stmt); | |
964 | tree lhs_type = TREE_TYPE (lhs); | |
965 | int n_elts = TYPE_VECTOR_SUBPARTS (lhs_type); | |
966 | int midpoint = n_elts / 2; | |
967 | int offset = 0; | |
968 | ||
969 | if (use_high == 1) | |
970 | offset = midpoint; | |
971 | ||
972 | /* The permute_type will match the lhs for integral types. For double and | |
973 | float types, the permute type needs to map to the V2 or V4 type that | |
974 | matches size. */ | |
975 | tree permute_type; | |
976 | permute_type = map_to_integral_tree_type (lhs_type); | |
977 | tree_vector_builder elts (permute_type, VECTOR_CST_NELTS (arg0), 1); | |
978 | ||
979 | for (int i = 0; i < midpoint; i++) | |
980 | { | |
981 | elts.safe_push (build_int_cst (TREE_TYPE (permute_type), | |
982 | offset + i)); | |
983 | elts.safe_push (build_int_cst (TREE_TYPE (permute_type), | |
984 | offset + n_elts + i)); | |
985 | } | |
986 | ||
987 | tree permute = elts.build (); | |
988 | ||
989 | gimple *g = gimple_build_assign (lhs, VEC_PERM_EXPR, arg0, arg1, permute); | |
990 | gimple_set_location (g, gimple_location (stmt)); | |
991 | gsi_replace (gsi, g, true); | |
992 | } | |
993 | ||
994 | /* Helper function to handle the vector merge[eo] built-ins. */ | |
995 | static void | |
996 | fold_mergeeo_helper (gimple_stmt_iterator *gsi, gimple *stmt, int use_odd) | |
997 | { | |
998 | tree arg0 = gimple_call_arg (stmt, 0); | |
999 | tree arg1 = gimple_call_arg (stmt, 1); | |
1000 | tree lhs = gimple_call_lhs (stmt); | |
1001 | tree lhs_type = TREE_TYPE (lhs); | |
1002 | int n_elts = TYPE_VECTOR_SUBPARTS (lhs_type); | |
1003 | ||
1004 | /* The permute_type will match the lhs for integral types. For double and | |
1005 | float types, the permute type needs to map to the V2 or V4 type that | |
1006 | matches size. */ | |
1007 | tree permute_type; | |
1008 | permute_type = map_to_integral_tree_type (lhs_type); | |
1009 | ||
1010 | tree_vector_builder elts (permute_type, VECTOR_CST_NELTS (arg0), 1); | |
1011 | ||
1012 | /* Build the permute vector. */ | |
1013 | for (int i = 0; i < n_elts / 2; i++) | |
1014 | { | |
1015 | elts.safe_push (build_int_cst (TREE_TYPE (permute_type), | |
1016 | 2*i + use_odd)); | |
1017 | elts.safe_push (build_int_cst (TREE_TYPE (permute_type), | |
1018 | 2*i + use_odd + n_elts)); | |
1019 | } | |
1020 | ||
1021 | tree permute = elts.build (); | |
1022 | ||
1023 | gimple *g = gimple_build_assign (lhs, VEC_PERM_EXPR, arg0, arg1, permute); | |
1024 | gimple_set_location (g, gimple_location (stmt)); | |
1025 | gsi_replace (gsi, g, true); | |
1026 | } | |
1027 | ||
1028 | /* Helper function to sort out which built-ins may be valid without having | |
1029 | a LHS. */ | |
1030 | static bool | |
1031 | rs6000_builtin_valid_without_lhs (enum rs6000_gen_builtins fn_code, | |
1032 | tree fndecl) | |
1033 | { | |
1034 | if (TREE_TYPE (TREE_TYPE (fndecl)) == void_type_node) | |
1035 | return true; | |
1036 | ||
1037 | switch (fn_code) | |
1038 | { | |
1039 | case RS6000_BIF_STVX_V16QI: | |
1040 | case RS6000_BIF_STVX_V8HI: | |
1041 | case RS6000_BIF_STVX_V4SI: | |
1042 | case RS6000_BIF_STVX_V4SF: | |
1043 | case RS6000_BIF_STVX_V2DI: | |
1044 | case RS6000_BIF_STVX_V2DF: | |
1045 | case RS6000_BIF_STXVW4X_V16QI: | |
1046 | case RS6000_BIF_STXVW4X_V8HI: | |
1047 | case RS6000_BIF_STXVW4X_V4SF: | |
1048 | case RS6000_BIF_STXVW4X_V4SI: | |
1049 | case RS6000_BIF_STXVD2X_V2DF: | |
1050 | case RS6000_BIF_STXVD2X_V2DI: | |
1051 | return true; | |
1052 | default: | |
1053 | return false; | |
1054 | } | |
1055 | } | |
1056 | ||
1057 | /* Expand the MMA built-ins early, so that we can convert the pass-by-reference | |
1058 | __vector_quad arguments into pass-by-value arguments, leading to more | |
1059 | efficient code generation. */ | |
1060 | static bool | |
1061 | rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi, | |
1062 | rs6000_gen_builtins fn_code) | |
1063 | { | |
1064 | gimple *stmt = gsi_stmt (*gsi); | |
1065 | size_t fncode = (size_t) fn_code; | |
1066 | ||
1067 | if (!bif_is_mma (rs6000_builtin_info[fncode])) | |
1068 | return false; | |
1069 | ||
1070 | /* Each call that can be gimple-expanded has an associated built-in | |
1071 | function that it will expand into. If this one doesn't, we have | |
1072 | already expanded it! Exceptions: lxvp and stxvp. */ | |
1073 | if (rs6000_builtin_info[fncode].assoc_bif == RS6000_BIF_NONE | |
1074 | && fncode != RS6000_BIF_LXVP | |
1075 | && fncode != RS6000_BIF_STXVP) | |
1076 | return false; | |
1077 | ||
1078 | bifdata *bd = &rs6000_builtin_info[fncode]; | |
1079 | unsigned nopnds = bd->nargs; | |
1080 | gimple_seq new_seq = NULL; | |
1081 | gimple *new_call; | |
1082 | tree new_decl; | |
1083 | ||
1084 | /* Compatibility built-ins; we used to call these | |
1085 | __builtin_mma_{dis,}assemble_pair, but now we call them | |
1086 | __builtin_vsx_{dis,}assemble_pair. Handle the old versions. */ | |
1087 | if (fncode == RS6000_BIF_ASSEMBLE_PAIR) | |
1088 | fncode = RS6000_BIF_ASSEMBLE_PAIR_V; | |
1089 | else if (fncode == RS6000_BIF_DISASSEMBLE_PAIR) | |
1090 | fncode = RS6000_BIF_DISASSEMBLE_PAIR_V; | |
1091 | ||
1092 | if (fncode == RS6000_BIF_DISASSEMBLE_ACC | |
1093 | || fncode == RS6000_BIF_DISASSEMBLE_PAIR_V) | |
1094 | { | |
1095 | /* This is an MMA disassemble built-in function. */ | |
1096 | push_gimplify_context (true); | |
1097 | unsigned nvec = (fncode == RS6000_BIF_DISASSEMBLE_ACC) ? 4 : 2; | |
1098 | tree dst_ptr = gimple_call_arg (stmt, 0); | |
1099 | tree src_ptr = gimple_call_arg (stmt, 1); | |
29850490 PB |
1100 | tree src_type = (fncode == RS6000_BIF_DISASSEMBLE_ACC) |
1101 | ? build_pointer_type (vector_quad_type_node) | |
1102 | : build_pointer_type (vector_pair_type_node); | |
1103 | if (TREE_TYPE (src_ptr) != src_type) | |
1104 | src_ptr = build1 (NOP_EXPR, src_type, src_ptr); | |
1105 | ||
eecee223 BS |
1106 | tree src = create_tmp_reg_or_ssa_name (TREE_TYPE (src_type)); |
1107 | gimplify_assign (src, build_simple_mem_ref (src_ptr), &new_seq); | |
1108 | ||
1109 | /* If we are not disassembling an accumulator/pair or our destination is | |
1110 | another accumulator/pair, then just copy the entire thing as is. */ | |
1111 | if ((fncode == RS6000_BIF_DISASSEMBLE_ACC | |
1112 | && TREE_TYPE (TREE_TYPE (dst_ptr)) == vector_quad_type_node) | |
1113 | || (fncode == RS6000_BIF_DISASSEMBLE_PAIR_V | |
1114 | && TREE_TYPE (TREE_TYPE (dst_ptr)) == vector_pair_type_node)) | |
1115 | { | |
65c12e38 | 1116 | tree dst = build_simple_mem_ref (build1 (NOP_EXPR, |
eecee223 BS |
1117 | src_type, dst_ptr)); |
1118 | gimplify_assign (dst, src, &new_seq); | |
1119 | pop_gimplify_context (NULL); | |
1120 | gsi_replace_with_seq (gsi, new_seq, true); | |
1121 | return true; | |
1122 | } | |
1123 | ||
1124 | /* If we're disassembling an accumulator into a different type, we need | |
1125 | to emit a xxmfacc instruction now, since we cannot do it later. */ | |
1126 | if (fncode == RS6000_BIF_DISASSEMBLE_ACC) | |
1127 | { | |
1128 | new_decl = rs6000_builtin_decls[RS6000_BIF_XXMFACC_INTERNAL]; | |
1129 | new_call = gimple_build_call (new_decl, 1, src); | |
1130 | src = create_tmp_reg_or_ssa_name (vector_quad_type_node); | |
1131 | gimple_call_set_lhs (new_call, src); | |
1132 | gimple_seq_add_stmt (&new_seq, new_call); | |
1133 | } | |
1134 | ||
1135 | /* Copy the accumulator/pair vector by vector. */ | |
1136 | new_decl | |
1137 | = rs6000_builtin_decls[rs6000_builtin_info[fncode].assoc_bif]; | |
1138 | tree dst_type = build_pointer_type_for_mode (unsigned_V16QI_type_node, | |
1139 | ptr_mode, true); | |
65c12e38 | 1140 | tree dst_base = build1 (NOP_EXPR, dst_type, dst_ptr); |
eecee223 BS |
1141 | for (unsigned i = 0; i < nvec; i++) |
1142 | { | |
1143 | unsigned index = WORDS_BIG_ENDIAN ? i : nvec - 1 - i; | |
1144 | tree dst = build2 (MEM_REF, unsigned_V16QI_type_node, dst_base, | |
1145 | build_int_cst (dst_type, index * 16)); | |
1146 | tree dstssa = create_tmp_reg_or_ssa_name (unsigned_V16QI_type_node); | |
1147 | new_call = gimple_build_call (new_decl, 2, src, | |
1148 | build_int_cstu (uint16_type_node, i)); | |
1149 | gimple_call_set_lhs (new_call, dstssa); | |
1150 | gimple_seq_add_stmt (&new_seq, new_call); | |
1151 | gimplify_assign (dst, dstssa, &new_seq); | |
1152 | } | |
1153 | pop_gimplify_context (NULL); | |
1154 | gsi_replace_with_seq (gsi, new_seq, true); | |
1155 | return true; | |
1156 | } | |
1157 | ||
1158 | /* TODO: Do some factoring on these two chunks. */ | |
1159 | if (fncode == RS6000_BIF_LXVP) | |
1160 | { | |
1161 | push_gimplify_context (true); | |
1162 | tree offset = gimple_call_arg (stmt, 0); | |
1163 | tree ptr = gimple_call_arg (stmt, 1); | |
1164 | tree lhs = gimple_call_lhs (stmt); | |
1165 | if (TREE_TYPE (TREE_TYPE (ptr)) != vector_pair_type_node) | |
65c12e38 | 1166 | ptr = build1 (NOP_EXPR, |
eecee223 BS |
1167 | build_pointer_type (vector_pair_type_node), ptr); |
1168 | tree mem = build_simple_mem_ref (build2 (POINTER_PLUS_EXPR, | |
1169 | TREE_TYPE (ptr), ptr, offset)); | |
1170 | gimplify_assign (lhs, mem, &new_seq); | |
1171 | pop_gimplify_context (NULL); | |
1172 | gsi_replace_with_seq (gsi, new_seq, true); | |
1173 | return true; | |
1174 | } | |
1175 | ||
1176 | if (fncode == RS6000_BIF_STXVP) | |
1177 | { | |
1178 | push_gimplify_context (true); | |
1179 | tree src = gimple_call_arg (stmt, 0); | |
1180 | tree offset = gimple_call_arg (stmt, 1); | |
1181 | tree ptr = gimple_call_arg (stmt, 2); | |
1182 | if (TREE_TYPE (TREE_TYPE (ptr)) != vector_pair_type_node) | |
65c12e38 | 1183 | ptr = build1 (NOP_EXPR, |
eecee223 BS |
1184 | build_pointer_type (vector_pair_type_node), ptr); |
1185 | tree mem = build_simple_mem_ref (build2 (POINTER_PLUS_EXPR, | |
1186 | TREE_TYPE (ptr), ptr, offset)); | |
1187 | gimplify_assign (mem, src, &new_seq); | |
1188 | pop_gimplify_context (NULL); | |
1189 | gsi_replace_with_seq (gsi, new_seq, true); | |
1190 | return true; | |
1191 | } | |
1192 | ||
1193 | /* Convert this built-in into an internal version that uses pass-by-value | |
1194 | arguments. The internal built-in is found in the assoc_bif field. */ | |
1195 | new_decl = rs6000_builtin_decls[rs6000_builtin_info[fncode].assoc_bif]; | |
1196 | tree lhs, op[MAX_MMA_OPERANDS]; | |
1197 | tree acc = gimple_call_arg (stmt, 0); | |
1198 | push_gimplify_context (true); | |
1199 | ||
1200 | if (bif_is_quad (*bd)) | |
1201 | { | |
1202 | /* This built-in has a pass-by-reference accumulator input, so load it | |
1203 | into a temporary accumulator for use as a pass-by-value input. */ | |
1204 | op[0] = create_tmp_reg_or_ssa_name (vector_quad_type_node); | |
1205 | for (unsigned i = 1; i < nopnds; i++) | |
1206 | op[i] = gimple_call_arg (stmt, i); | |
1207 | gimplify_assign (op[0], build_simple_mem_ref (acc), &new_seq); | |
1208 | } | |
1209 | else | |
1210 | { | |
1211 | /* This built-in does not use its pass-by-reference accumulator argument | |
1212 | as an input argument, so remove it from the input list. */ | |
1213 | nopnds--; | |
1214 | for (unsigned i = 0; i < nopnds; i++) | |
1215 | op[i] = gimple_call_arg (stmt, i + 1); | |
1216 | } | |
1217 | ||
1218 | switch (nopnds) | |
1219 | { | |
1220 | case 0: | |
1221 | new_call = gimple_build_call (new_decl, 0); | |
1222 | break; | |
1223 | case 1: | |
1224 | new_call = gimple_build_call (new_decl, 1, op[0]); | |
1225 | break; | |
1226 | case 2: | |
1227 | new_call = gimple_build_call (new_decl, 2, op[0], op[1]); | |
1228 | break; | |
1229 | case 3: | |
1230 | new_call = gimple_build_call (new_decl, 3, op[0], op[1], op[2]); | |
1231 | break; | |
1232 | case 4: | |
1233 | new_call = gimple_build_call (new_decl, 4, op[0], op[1], op[2], op[3]); | |
1234 | break; | |
1235 | case 5: | |
1236 | new_call = gimple_build_call (new_decl, 5, op[0], op[1], op[2], op[3], | |
1237 | op[4]); | |
1238 | break; | |
1239 | case 6: | |
1240 | new_call = gimple_build_call (new_decl, 6, op[0], op[1], op[2], op[3], | |
1241 | op[4], op[5]); | |
1242 | break; | |
1243 | case 7: | |
1244 | new_call = gimple_build_call (new_decl, 7, op[0], op[1], op[2], op[3], | |
1245 | op[4], op[5], op[6]); | |
1246 | break; | |
1247 | default: | |
1248 | gcc_unreachable (); | |
1249 | } | |
1250 | ||
1251 | if (fncode == RS6000_BIF_BUILD_PAIR || fncode == RS6000_BIF_ASSEMBLE_PAIR_V) | |
1252 | lhs = create_tmp_reg_or_ssa_name (vector_pair_type_node); | |
1253 | else | |
1254 | lhs = create_tmp_reg_or_ssa_name (vector_quad_type_node); | |
1255 | gimple_call_set_lhs (new_call, lhs); | |
1256 | gimple_seq_add_stmt (&new_seq, new_call); | |
1257 | gimplify_assign (build_simple_mem_ref (acc), lhs, &new_seq); | |
1258 | pop_gimplify_context (NULL); | |
1259 | gsi_replace_with_seq (gsi, new_seq, true); | |
1260 | ||
1261 | return true; | |
1262 | } | |
1263 | ||
1264 | /* Fold a machine-dependent built-in in GIMPLE. (For folding into | |
1265 | a constant, use rs6000_fold_builtin.) */ | |
1266 | bool | |
1267 | rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) | |
1268 | { | |
1269 | gimple *stmt = gsi_stmt (*gsi); | |
1270 | tree fndecl = gimple_call_fndecl (stmt); | |
1271 | gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD); | |
1272 | enum rs6000_gen_builtins fn_code | |
1273 | = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl); | |
1274 | tree arg0, arg1, lhs, temp; | |
1275 | enum tree_code bcode; | |
1276 | gimple *g; | |
1277 | ||
94504c9a KL |
1278 | /* For an unresolved overloaded builtin, return early here since there |
1279 | is no builtin info for it and we are unable to fold it. */ | |
1280 | if (fn_code > RS6000_OVLD_NONE) | |
1281 | return false; | |
1282 | ||
eecee223 BS |
1283 | size_t uns_fncode = (size_t) fn_code; |
1284 | enum insn_code icode = rs6000_builtin_info[uns_fncode].icode; | |
1285 | const char *fn_name1 = rs6000_builtin_info[uns_fncode].bifname; | |
1286 | const char *fn_name2 = (icode != CODE_FOR_nothing) | |
1287 | ? get_insn_name ((int) icode) | |
1288 | : "nothing"; | |
1289 | ||
1290 | if (TARGET_DEBUG_BUILTIN) | |
1291 | fprintf (stderr, "rs6000_gimple_fold_builtin %d %s %s\n", | |
1292 | fn_code, fn_name1, fn_name2); | |
1293 | ||
eecee223 BS |
1294 | /* Prevent gimple folding for code that does not have a LHS, unless it is |
1295 | allowed per the rs6000_builtin_valid_without_lhs helper function. */ | |
1296 | if (!gimple_call_lhs (stmt) | |
1297 | && !rs6000_builtin_valid_without_lhs (fn_code, fndecl)) | |
1298 | return false; | |
1299 | ||
1300 | /* Don't fold invalid builtins, let rs6000_expand_builtin diagnose it. */ | |
1301 | if (!rs6000_builtin_is_supported (fn_code)) | |
1302 | return false; | |
1303 | ||
1304 | if (rs6000_gimple_fold_mma_builtin (gsi, fn_code)) | |
1305 | return true; | |
1306 | ||
1307 | switch (fn_code) | |
1308 | { | |
1309 | /* Flavors of vec_add. We deliberately don't expand | |
1310 | RS6000_BIF_VADDUQM as it gets lowered from V1TImode to | |
1311 | TImode, resulting in much poorer code generation. */ | |
1312 | case RS6000_BIF_VADDUBM: | |
1313 | case RS6000_BIF_VADDUHM: | |
1314 | case RS6000_BIF_VADDUWM: | |
1315 | case RS6000_BIF_VADDUDM: | |
1316 | case RS6000_BIF_VADDFP: | |
1317 | case RS6000_BIF_XVADDDP: | |
1318 | case RS6000_BIF_XVADDSP: | |
1319 | bcode = PLUS_EXPR; | |
1320 | do_binary: | |
1321 | arg0 = gimple_call_arg (stmt, 0); | |
1322 | arg1 = gimple_call_arg (stmt, 1); | |
1323 | lhs = gimple_call_lhs (stmt); | |
1324 | if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (lhs))) | |
1325 | && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (lhs)))) | |
1326 | { | |
1327 | /* Ensure the binary operation is performed in a type | |
1328 | that wraps if it is integral type. */ | |
1329 | gimple_seq stmts = NULL; | |
1330 | tree type = unsigned_type_for (TREE_TYPE (lhs)); | |
1331 | tree uarg0 = gimple_build (&stmts, VIEW_CONVERT_EXPR, | |
1332 | type, arg0); | |
1333 | tree uarg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR, | |
1334 | type, arg1); | |
1335 | tree res = gimple_build (&stmts, gimple_location (stmt), bcode, | |
1336 | type, uarg0, uarg1); | |
1337 | gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); | |
1338 | g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, | |
1339 | build1 (VIEW_CONVERT_EXPR, | |
1340 | TREE_TYPE (lhs), res)); | |
1341 | gsi_replace (gsi, g, true); | |
1342 | return true; | |
1343 | } | |
1344 | g = gimple_build_assign (lhs, bcode, arg0, arg1); | |
1345 | gimple_set_location (g, gimple_location (stmt)); | |
1346 | gsi_replace (gsi, g, true); | |
1347 | return true; | |
1348 | /* Flavors of vec_sub. We deliberately don't expand | |
1349 | RS6000_BIF_VSUBUQM. */ | |
1350 | case RS6000_BIF_VSUBUBM: | |
1351 | case RS6000_BIF_VSUBUHM: | |
1352 | case RS6000_BIF_VSUBUWM: | |
1353 | case RS6000_BIF_VSUBUDM: | |
1354 | case RS6000_BIF_VSUBFP: | |
1355 | case RS6000_BIF_XVSUBDP: | |
1356 | case RS6000_BIF_XVSUBSP: | |
1357 | bcode = MINUS_EXPR; | |
1358 | goto do_binary; | |
1359 | case RS6000_BIF_XVMULSP: | |
1360 | case RS6000_BIF_XVMULDP: | |
1361 | arg0 = gimple_call_arg (stmt, 0); | |
1362 | arg1 = gimple_call_arg (stmt, 1); | |
1363 | lhs = gimple_call_lhs (stmt); | |
1364 | g = gimple_build_assign (lhs, MULT_EXPR, arg0, arg1); | |
1365 | gimple_set_location (g, gimple_location (stmt)); | |
1366 | gsi_replace (gsi, g, true); | |
1367 | return true; | |
1368 | /* Even element flavors of vec_mul (signed). */ | |
1369 | case RS6000_BIF_VMULESB: | |
1370 | case RS6000_BIF_VMULESH: | |
1371 | case RS6000_BIF_VMULESW: | |
1372 | /* Even element flavors of vec_mul (unsigned). */ | |
1373 | case RS6000_BIF_VMULEUB: | |
1374 | case RS6000_BIF_VMULEUH: | |
1375 | case RS6000_BIF_VMULEUW: | |
1376 | arg0 = gimple_call_arg (stmt, 0); | |
1377 | arg1 = gimple_call_arg (stmt, 1); | |
1378 | lhs = gimple_call_lhs (stmt); | |
1379 | g = gimple_build_assign (lhs, VEC_WIDEN_MULT_EVEN_EXPR, arg0, arg1); | |
1380 | gimple_set_location (g, gimple_location (stmt)); | |
1381 | gsi_replace (gsi, g, true); | |
1382 | return true; | |
1383 | /* Odd element flavors of vec_mul (signed). */ | |
1384 | case RS6000_BIF_VMULOSB: | |
1385 | case RS6000_BIF_VMULOSH: | |
1386 | case RS6000_BIF_VMULOSW: | |
1387 | /* Odd element flavors of vec_mul (unsigned). */ | |
1388 | case RS6000_BIF_VMULOUB: | |
1389 | case RS6000_BIF_VMULOUH: | |
1390 | case RS6000_BIF_VMULOUW: | |
1391 | arg0 = gimple_call_arg (stmt, 0); | |
1392 | arg1 = gimple_call_arg (stmt, 1); | |
1393 | lhs = gimple_call_lhs (stmt); | |
1394 | g = gimple_build_assign (lhs, VEC_WIDEN_MULT_ODD_EXPR, arg0, arg1); | |
1395 | gimple_set_location (g, gimple_location (stmt)); | |
1396 | gsi_replace (gsi, g, true); | |
1397 | return true; | |
1398 | /* Flavors of vec_div (Integer). */ | |
1399 | case RS6000_BIF_DIV_V2DI: | |
1400 | case RS6000_BIF_UDIV_V2DI: | |
1401 | arg0 = gimple_call_arg (stmt, 0); | |
1402 | arg1 = gimple_call_arg (stmt, 1); | |
1403 | lhs = gimple_call_lhs (stmt); | |
1404 | g = gimple_build_assign (lhs, TRUNC_DIV_EXPR, arg0, arg1); | |
1405 | gimple_set_location (g, gimple_location (stmt)); | |
1406 | gsi_replace (gsi, g, true); | |
1407 | return true; | |
1408 | /* Flavors of vec_div (Float). */ | |
1409 | case RS6000_BIF_XVDIVSP: | |
1410 | case RS6000_BIF_XVDIVDP: | |
1411 | arg0 = gimple_call_arg (stmt, 0); | |
1412 | arg1 = gimple_call_arg (stmt, 1); | |
1413 | lhs = gimple_call_lhs (stmt); | |
1414 | g = gimple_build_assign (lhs, RDIV_EXPR, arg0, arg1); | |
1415 | gimple_set_location (g, gimple_location (stmt)); | |
1416 | gsi_replace (gsi, g, true); | |
1417 | return true; | |
1418 | /* Flavors of vec_and. */ | |
1419 | case RS6000_BIF_VAND_V16QI_UNS: | |
1420 | case RS6000_BIF_VAND_V16QI: | |
1421 | case RS6000_BIF_VAND_V8HI_UNS: | |
1422 | case RS6000_BIF_VAND_V8HI: | |
1423 | case RS6000_BIF_VAND_V4SI_UNS: | |
1424 | case RS6000_BIF_VAND_V4SI: | |
1425 | case RS6000_BIF_VAND_V2DI_UNS: | |
1426 | case RS6000_BIF_VAND_V2DI: | |
1427 | case RS6000_BIF_VAND_V4SF: | |
1428 | case RS6000_BIF_VAND_V2DF: | |
1429 | arg0 = gimple_call_arg (stmt, 0); | |
1430 | arg1 = gimple_call_arg (stmt, 1); | |
1431 | lhs = gimple_call_lhs (stmt); | |
1432 | g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, arg1); | |
1433 | gimple_set_location (g, gimple_location (stmt)); | |
1434 | gsi_replace (gsi, g, true); | |
1435 | return true; | |
1436 | /* Flavors of vec_andc. */ | |
1437 | case RS6000_BIF_VANDC_V16QI_UNS: | |
1438 | case RS6000_BIF_VANDC_V16QI: | |
1439 | case RS6000_BIF_VANDC_V8HI_UNS: | |
1440 | case RS6000_BIF_VANDC_V8HI: | |
1441 | case RS6000_BIF_VANDC_V4SI_UNS: | |
1442 | case RS6000_BIF_VANDC_V4SI: | |
1443 | case RS6000_BIF_VANDC_V2DI_UNS: | |
1444 | case RS6000_BIF_VANDC_V2DI: | |
1445 | case RS6000_BIF_VANDC_V4SF: | |
1446 | case RS6000_BIF_VANDC_V2DF: | |
1447 | arg0 = gimple_call_arg (stmt, 0); | |
1448 | arg1 = gimple_call_arg (stmt, 1); | |
1449 | lhs = gimple_call_lhs (stmt); | |
1450 | temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1)); | |
1451 | g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1); | |
1452 | gimple_set_location (g, gimple_location (stmt)); | |
1453 | gsi_insert_before (gsi, g, GSI_SAME_STMT); | |
1454 | g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, temp); | |
1455 | gimple_set_location (g, gimple_location (stmt)); | |
1456 | gsi_replace (gsi, g, true); | |
1457 | return true; | |
1458 | /* Flavors of vec_nand. */ | |
1459 | case RS6000_BIF_NAND_V16QI_UNS: | |
1460 | case RS6000_BIF_NAND_V16QI: | |
1461 | case RS6000_BIF_NAND_V8HI_UNS: | |
1462 | case RS6000_BIF_NAND_V8HI: | |
1463 | case RS6000_BIF_NAND_V4SI_UNS: | |
1464 | case RS6000_BIF_NAND_V4SI: | |
1465 | case RS6000_BIF_NAND_V2DI_UNS: | |
1466 | case RS6000_BIF_NAND_V2DI: | |
1467 | case RS6000_BIF_NAND_V4SF: | |
1468 | case RS6000_BIF_NAND_V2DF: | |
1469 | arg0 = gimple_call_arg (stmt, 0); | |
1470 | arg1 = gimple_call_arg (stmt, 1); | |
1471 | lhs = gimple_call_lhs (stmt); | |
1472 | temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1)); | |
1473 | g = gimple_build_assign (temp, BIT_AND_EXPR, arg0, arg1); | |
1474 | gimple_set_location (g, gimple_location (stmt)); | |
1475 | gsi_insert_before (gsi, g, GSI_SAME_STMT); | |
1476 | g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp); | |
1477 | gimple_set_location (g, gimple_location (stmt)); | |
1478 | gsi_replace (gsi, g, true); | |
1479 | return true; | |
1480 | /* Flavors of vec_or. */ | |
1481 | case RS6000_BIF_VOR_V16QI_UNS: | |
1482 | case RS6000_BIF_VOR_V16QI: | |
1483 | case RS6000_BIF_VOR_V8HI_UNS: | |
1484 | case RS6000_BIF_VOR_V8HI: | |
1485 | case RS6000_BIF_VOR_V4SI_UNS: | |
1486 | case RS6000_BIF_VOR_V4SI: | |
1487 | case RS6000_BIF_VOR_V2DI_UNS: | |
1488 | case RS6000_BIF_VOR_V2DI: | |
1489 | case RS6000_BIF_VOR_V4SF: | |
1490 | case RS6000_BIF_VOR_V2DF: | |
1491 | arg0 = gimple_call_arg (stmt, 0); | |
1492 | arg1 = gimple_call_arg (stmt, 1); | |
1493 | lhs = gimple_call_lhs (stmt); | |
1494 | g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, arg1); | |
1495 | gimple_set_location (g, gimple_location (stmt)); | |
1496 | gsi_replace (gsi, g, true); | |
1497 | return true; | |
1498 | /* flavors of vec_orc. */ | |
1499 | case RS6000_BIF_ORC_V16QI_UNS: | |
1500 | case RS6000_BIF_ORC_V16QI: | |
1501 | case RS6000_BIF_ORC_V8HI_UNS: | |
1502 | case RS6000_BIF_ORC_V8HI: | |
1503 | case RS6000_BIF_ORC_V4SI_UNS: | |
1504 | case RS6000_BIF_ORC_V4SI: | |
1505 | case RS6000_BIF_ORC_V2DI_UNS: | |
1506 | case RS6000_BIF_ORC_V2DI: | |
1507 | case RS6000_BIF_ORC_V4SF: | |
1508 | case RS6000_BIF_ORC_V2DF: | |
1509 | arg0 = gimple_call_arg (stmt, 0); | |
1510 | arg1 = gimple_call_arg (stmt, 1); | |
1511 | lhs = gimple_call_lhs (stmt); | |
1512 | temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1)); | |
1513 | g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1); | |
1514 | gimple_set_location (g, gimple_location (stmt)); | |
1515 | gsi_insert_before (gsi, g, GSI_SAME_STMT); | |
1516 | g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, temp); | |
1517 | gimple_set_location (g, gimple_location (stmt)); | |
1518 | gsi_replace (gsi, g, true); | |
1519 | return true; | |
1520 | /* Flavors of vec_xor. */ | |
1521 | case RS6000_BIF_VXOR_V16QI_UNS: | |
1522 | case RS6000_BIF_VXOR_V16QI: | |
1523 | case RS6000_BIF_VXOR_V8HI_UNS: | |
1524 | case RS6000_BIF_VXOR_V8HI: | |
1525 | case RS6000_BIF_VXOR_V4SI_UNS: | |
1526 | case RS6000_BIF_VXOR_V4SI: | |
1527 | case RS6000_BIF_VXOR_V2DI_UNS: | |
1528 | case RS6000_BIF_VXOR_V2DI: | |
1529 | case RS6000_BIF_VXOR_V4SF: | |
1530 | case RS6000_BIF_VXOR_V2DF: | |
1531 | arg0 = gimple_call_arg (stmt, 0); | |
1532 | arg1 = gimple_call_arg (stmt, 1); | |
1533 | lhs = gimple_call_lhs (stmt); | |
1534 | g = gimple_build_assign (lhs, BIT_XOR_EXPR, arg0, arg1); | |
1535 | gimple_set_location (g, gimple_location (stmt)); | |
1536 | gsi_replace (gsi, g, true); | |
1537 | return true; | |
1538 | /* Flavors of vec_nor. */ | |
1539 | case RS6000_BIF_VNOR_V16QI_UNS: | |
1540 | case RS6000_BIF_VNOR_V16QI: | |
1541 | case RS6000_BIF_VNOR_V8HI_UNS: | |
1542 | case RS6000_BIF_VNOR_V8HI: | |
1543 | case RS6000_BIF_VNOR_V4SI_UNS: | |
1544 | case RS6000_BIF_VNOR_V4SI: | |
1545 | case RS6000_BIF_VNOR_V2DI_UNS: | |
1546 | case RS6000_BIF_VNOR_V2DI: | |
1547 | case RS6000_BIF_VNOR_V4SF: | |
1548 | case RS6000_BIF_VNOR_V2DF: | |
1549 | arg0 = gimple_call_arg (stmt, 0); | |
1550 | arg1 = gimple_call_arg (stmt, 1); | |
1551 | lhs = gimple_call_lhs (stmt); | |
1552 | temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1)); | |
1553 | g = gimple_build_assign (temp, BIT_IOR_EXPR, arg0, arg1); | |
1554 | gimple_set_location (g, gimple_location (stmt)); | |
1555 | gsi_insert_before (gsi, g, GSI_SAME_STMT); | |
1556 | g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp); | |
1557 | gimple_set_location (g, gimple_location (stmt)); | |
1558 | gsi_replace (gsi, g, true); | |
1559 | return true; | |
1560 | /* flavors of vec_abs. */ | |
1561 | case RS6000_BIF_ABS_V16QI: | |
1562 | case RS6000_BIF_ABS_V8HI: | |
1563 | case RS6000_BIF_ABS_V4SI: | |
1564 | case RS6000_BIF_ABS_V4SF: | |
1565 | case RS6000_BIF_ABS_V2DI: | |
1566 | case RS6000_BIF_XVABSDP: | |
1567 | case RS6000_BIF_XVABSSP: | |
1568 | arg0 = gimple_call_arg (stmt, 0); | |
1569 | if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (arg0))) | |
1570 | && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (arg0)))) | |
1571 | return false; | |
1572 | lhs = gimple_call_lhs (stmt); | |
1573 | g = gimple_build_assign (lhs, ABS_EXPR, arg0); | |
1574 | gimple_set_location (g, gimple_location (stmt)); | |
1575 | gsi_replace (gsi, g, true); | |
1576 | return true; | |
54ce3cbd HG |
1577 | /* fold into MIN_EXPR when fast-math is set. */ |
1578 | case RS6000_BIF_XSMINDP: | |
eecee223 BS |
1579 | /* flavors of vec_min. */ |
1580 | case RS6000_BIF_XVMINDP: | |
1581 | case RS6000_BIF_XVMINSP: | |
1582 | case RS6000_BIF_VMINFP: | |
1583 | { | |
1584 | lhs = gimple_call_lhs (stmt); | |
1585 | tree type = TREE_TYPE (lhs); | |
1586 | if (HONOR_NANS (type)) | |
1587 | return false; | |
1588 | gcc_fallthrough (); | |
1589 | } | |
1590 | case RS6000_BIF_VMINSD: | |
1591 | case RS6000_BIF_VMINUD: | |
1592 | case RS6000_BIF_VMINSB: | |
1593 | case RS6000_BIF_VMINSH: | |
1594 | case RS6000_BIF_VMINSW: | |
1595 | case RS6000_BIF_VMINUB: | |
1596 | case RS6000_BIF_VMINUH: | |
1597 | case RS6000_BIF_VMINUW: | |
1598 | arg0 = gimple_call_arg (stmt, 0); | |
1599 | arg1 = gimple_call_arg (stmt, 1); | |
1600 | lhs = gimple_call_lhs (stmt); | |
1601 | g = gimple_build_assign (lhs, MIN_EXPR, arg0, arg1); | |
1602 | gimple_set_location (g, gimple_location (stmt)); | |
1603 | gsi_replace (gsi, g, true); | |
1604 | return true; | |
54ce3cbd HG |
1605 | /* fold into MAX_EXPR when fast-math is set. */ |
1606 | case RS6000_BIF_XSMAXDP: | |
eecee223 BS |
1607 | /* flavors of vec_max. */ |
1608 | case RS6000_BIF_XVMAXDP: | |
1609 | case RS6000_BIF_XVMAXSP: | |
1610 | case RS6000_BIF_VMAXFP: | |
1611 | { | |
1612 | lhs = gimple_call_lhs (stmt); | |
1613 | tree type = TREE_TYPE (lhs); | |
1614 | if (HONOR_NANS (type)) | |
1615 | return false; | |
1616 | gcc_fallthrough (); | |
1617 | } | |
1618 | case RS6000_BIF_VMAXSD: | |
1619 | case RS6000_BIF_VMAXUD: | |
1620 | case RS6000_BIF_VMAXSB: | |
1621 | case RS6000_BIF_VMAXSH: | |
1622 | case RS6000_BIF_VMAXSW: | |
1623 | case RS6000_BIF_VMAXUB: | |
1624 | case RS6000_BIF_VMAXUH: | |
1625 | case RS6000_BIF_VMAXUW: | |
1626 | arg0 = gimple_call_arg (stmt, 0); | |
1627 | arg1 = gimple_call_arg (stmt, 1); | |
1628 | lhs = gimple_call_lhs (stmt); | |
1629 | g = gimple_build_assign (lhs, MAX_EXPR, arg0, arg1); | |
1630 | gimple_set_location (g, gimple_location (stmt)); | |
1631 | gsi_replace (gsi, g, true); | |
1632 | return true; | |
1633 | /* Flavors of vec_eqv. */ | |
1634 | case RS6000_BIF_EQV_V16QI: | |
1635 | case RS6000_BIF_EQV_V8HI: | |
1636 | case RS6000_BIF_EQV_V4SI: | |
1637 | case RS6000_BIF_EQV_V4SF: | |
1638 | case RS6000_BIF_EQV_V2DF: | |
1639 | case RS6000_BIF_EQV_V2DI: | |
1640 | arg0 = gimple_call_arg (stmt, 0); | |
1641 | arg1 = gimple_call_arg (stmt, 1); | |
1642 | lhs = gimple_call_lhs (stmt); | |
1643 | temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1)); | |
1644 | g = gimple_build_assign (temp, BIT_XOR_EXPR, arg0, arg1); | |
1645 | gimple_set_location (g, gimple_location (stmt)); | |
1646 | gsi_insert_before (gsi, g, GSI_SAME_STMT); | |
1647 | g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp); | |
1648 | gimple_set_location (g, gimple_location (stmt)); | |
1649 | gsi_replace (gsi, g, true); | |
1650 | return true; | |
1651 | /* Flavors of vec_rotate_left. */ | |
1652 | case RS6000_BIF_VRLB: | |
1653 | case RS6000_BIF_VRLH: | |
1654 | case RS6000_BIF_VRLW: | |
1655 | case RS6000_BIF_VRLD: | |
1656 | arg0 = gimple_call_arg (stmt, 0); | |
1657 | arg1 = gimple_call_arg (stmt, 1); | |
1658 | lhs = gimple_call_lhs (stmt); | |
1659 | g = gimple_build_assign (lhs, LROTATE_EXPR, arg0, arg1); | |
1660 | gimple_set_location (g, gimple_location (stmt)); | |
1661 | gsi_replace (gsi, g, true); | |
1662 | return true; | |
1663 | /* Flavors of vector shift right algebraic. | |
1664 | vec_sra{b,h,w} -> vsra{b,h,w}. */ | |
1665 | case RS6000_BIF_VSRAB: | |
1666 | case RS6000_BIF_VSRAH: | |
1667 | case RS6000_BIF_VSRAW: | |
1668 | case RS6000_BIF_VSRAD: | |
1669 | { | |
1670 | arg0 = gimple_call_arg (stmt, 0); | |
1671 | arg1 = gimple_call_arg (stmt, 1); | |
1672 | lhs = gimple_call_lhs (stmt); | |
1673 | tree arg1_type = TREE_TYPE (arg1); | |
1674 | tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1)); | |
1675 | tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type)); | |
1676 | location_t loc = gimple_location (stmt); | |
1677 | /* Force arg1 into the range valid matching the arg0 type. */ | |
1678 | /* Build a vector consisting of the max valid bit-size values. */ | |
1679 | int n_elts = VECTOR_CST_NELTS (arg1); | |
1680 | tree element_size = build_int_cst (unsigned_element_type, | |
1681 | 128 / n_elts); | |
1682 | tree_vector_builder elts (unsigned_arg1_type, n_elts, 1); | |
1683 | for (int i = 0; i < n_elts; i++) | |
1684 | elts.safe_push (element_size); | |
1685 | tree modulo_tree = elts.build (); | |
1686 | /* Modulo the provided shift value against that vector. */ | |
1687 | gimple_seq stmts = NULL; | |
1688 | tree unsigned_arg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR, | |
1689 | unsigned_arg1_type, arg1); | |
1690 | tree new_arg1 = gimple_build (&stmts, loc, TRUNC_MOD_EXPR, | |
1691 | unsigned_arg1_type, unsigned_arg1, | |
1692 | modulo_tree); | |
1693 | gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); | |
1694 | /* And finally, do the shift. */ | |
1695 | g = gimple_build_assign (lhs, RSHIFT_EXPR, arg0, new_arg1); | |
1696 | gimple_set_location (g, loc); | |
1697 | gsi_replace (gsi, g, true); | |
1698 | return true; | |
1699 | } | |
1700 | /* Flavors of vector shift left. | |
1701 | builtin_altivec_vsl{b,h,w} -> vsl{b,h,w}. */ | |
1702 | case RS6000_BIF_VSLB: | |
1703 | case RS6000_BIF_VSLH: | |
1704 | case RS6000_BIF_VSLW: | |
1705 | case RS6000_BIF_VSLD: | |
1706 | { | |
1707 | location_t loc; | |
1708 | gimple_seq stmts = NULL; | |
1709 | arg0 = gimple_call_arg (stmt, 0); | |
1710 | tree arg0_type = TREE_TYPE (arg0); | |
1711 | if (INTEGRAL_TYPE_P (TREE_TYPE (arg0_type)) | |
1712 | && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (arg0_type))) | |
1713 | return false; | |
1714 | arg1 = gimple_call_arg (stmt, 1); | |
1715 | tree arg1_type = TREE_TYPE (arg1); | |
1716 | tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1)); | |
1717 | tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type)); | |
1718 | loc = gimple_location (stmt); | |
1719 | lhs = gimple_call_lhs (stmt); | |
1720 | /* Force arg1 into the range valid matching the arg0 type. */ | |
1721 | /* Build a vector consisting of the max valid bit-size values. */ | |
1722 | int n_elts = VECTOR_CST_NELTS (arg1); | |
1723 | int tree_size_in_bits = TREE_INT_CST_LOW (size_in_bytes (arg1_type)) | |
1724 | * BITS_PER_UNIT; | |
1725 | tree element_size = build_int_cst (unsigned_element_type, | |
1726 | tree_size_in_bits / n_elts); | |
1727 | tree_vector_builder elts (unsigned_type_for (arg1_type), n_elts, 1); | |
1728 | for (int i = 0; i < n_elts; i++) | |
1729 | elts.safe_push (element_size); | |
1730 | tree modulo_tree = elts.build (); | |
1731 | /* Modulo the provided shift value against that vector. */ | |
1732 | tree unsigned_arg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR, | |
1733 | unsigned_arg1_type, arg1); | |
1734 | tree new_arg1 = gimple_build (&stmts, loc, TRUNC_MOD_EXPR, | |
1735 | unsigned_arg1_type, unsigned_arg1, | |
1736 | modulo_tree); | |
1737 | gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); | |
1738 | /* And finally, do the shift. */ | |
1739 | g = gimple_build_assign (lhs, LSHIFT_EXPR, arg0, new_arg1); | |
1740 | gimple_set_location (g, gimple_location (stmt)); | |
1741 | gsi_replace (gsi, g, true); | |
1742 | return true; | |
1743 | } | |
1744 | /* Flavors of vector shift right. */ | |
1745 | case RS6000_BIF_VSRB: | |
1746 | case RS6000_BIF_VSRH: | |
1747 | case RS6000_BIF_VSRW: | |
1748 | case RS6000_BIF_VSRD: | |
1749 | { | |
1750 | arg0 = gimple_call_arg (stmt, 0); | |
1751 | arg1 = gimple_call_arg (stmt, 1); | |
1752 | lhs = gimple_call_lhs (stmt); | |
1753 | tree arg1_type = TREE_TYPE (arg1); | |
1754 | tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1)); | |
1755 | tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type)); | |
1756 | location_t loc = gimple_location (stmt); | |
1757 | gimple_seq stmts = NULL; | |
1758 | /* Convert arg0 to unsigned. */ | |
1759 | tree arg0_unsigned | |
1760 | = gimple_build (&stmts, VIEW_CONVERT_EXPR, | |
1761 | unsigned_type_for (TREE_TYPE (arg0)), arg0); | |
1762 | /* Force arg1 into the range valid matching the arg0 type. */ | |
1763 | /* Build a vector consisting of the max valid bit-size values. */ | |
1764 | int n_elts = VECTOR_CST_NELTS (arg1); | |
1765 | tree element_size = build_int_cst (unsigned_element_type, | |
1766 | 128 / n_elts); | |
1767 | tree_vector_builder elts (unsigned_arg1_type, n_elts, 1); | |
1768 | for (int i = 0; i < n_elts; i++) | |
1769 | elts.safe_push (element_size); | |
1770 | tree modulo_tree = elts.build (); | |
1771 | /* Modulo the provided shift value against that vector. */ | |
1772 | tree unsigned_arg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR, | |
1773 | unsigned_arg1_type, arg1); | |
1774 | tree new_arg1 = gimple_build (&stmts, loc, TRUNC_MOD_EXPR, | |
1775 | unsigned_arg1_type, unsigned_arg1, | |
1776 | modulo_tree); | |
1777 | /* Do the shift. */ | |
1778 | tree res | |
1779 | = gimple_build (&stmts, RSHIFT_EXPR, | |
1780 | TREE_TYPE (arg0_unsigned), arg0_unsigned, new_arg1); | |
1781 | /* Convert result back to the lhs type. */ | |
1782 | res = gimple_build (&stmts, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), res); | |
1783 | gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); | |
1784 | replace_call_with_value (gsi, res); | |
1785 | return true; | |
1786 | } | |
1787 | /* Vector loads. */ | |
1788 | case RS6000_BIF_LVX_V16QI: | |
1789 | case RS6000_BIF_LVX_V8HI: | |
1790 | case RS6000_BIF_LVX_V4SI: | |
1791 | case RS6000_BIF_LVX_V4SF: | |
1792 | case RS6000_BIF_LVX_V2DI: | |
1793 | case RS6000_BIF_LVX_V2DF: | |
1794 | case RS6000_BIF_LVX_V1TI: | |
1795 | { | |
1796 | arg0 = gimple_call_arg (stmt, 0); // offset | |
1797 | arg1 = gimple_call_arg (stmt, 1); // address | |
1798 | lhs = gimple_call_lhs (stmt); | |
1799 | location_t loc = gimple_location (stmt); | |
1800 | /* Since arg1 may be cast to a different type, just use ptr_type_node | |
1801 | here instead of trying to enforce TBAA on pointer types. */ | |
1802 | tree arg1_type = ptr_type_node; | |
1803 | tree lhs_type = TREE_TYPE (lhs); | |
1804 | /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create | |
1805 | the tree using the value from arg0. The resulting type will match | |
1806 | the type of arg1. */ | |
1807 | gimple_seq stmts = NULL; | |
1808 | tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0); | |
1809 | tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR, | |
1810 | arg1_type, arg1, temp_offset); | |
1811 | /* Mask off any lower bits from the address. */ | |
1812 | tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR, | |
1813 | arg1_type, temp_addr, | |
1814 | build_int_cst (arg1_type, -16)); | |
1815 | gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); | |
1816 | if (!is_gimple_mem_ref_addr (aligned_addr)) | |
1817 | { | |
1818 | tree t = make_ssa_name (TREE_TYPE (aligned_addr)); | |
1819 | gimple *g = gimple_build_assign (t, aligned_addr); | |
1820 | gsi_insert_before (gsi, g, GSI_SAME_STMT); | |
1821 | aligned_addr = t; | |
1822 | } | |
1823 | /* Use the build2 helper to set up the mem_ref. The MEM_REF could also | |
1824 | take an offset, but since we've already incorporated the offset | |
1825 | above, here we just pass in a zero. */ | |
1826 | gimple *g | |
1827 | = gimple_build_assign (lhs, build2 (MEM_REF, lhs_type, aligned_addr, | |
1828 | build_int_cst (arg1_type, 0))); | |
1829 | gimple_set_location (g, loc); | |
1830 | gsi_replace (gsi, g, true); | |
1831 | return true; | |
1832 | } | |
1833 | /* Vector stores. */ | |
1834 | case RS6000_BIF_STVX_V16QI: | |
1835 | case RS6000_BIF_STVX_V8HI: | |
1836 | case RS6000_BIF_STVX_V4SI: | |
1837 | case RS6000_BIF_STVX_V4SF: | |
1838 | case RS6000_BIF_STVX_V2DI: | |
1839 | case RS6000_BIF_STVX_V2DF: | |
1840 | { | |
1841 | arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */ | |
1842 | arg1 = gimple_call_arg (stmt, 1); /* Offset. */ | |
1843 | tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */ | |
1844 | location_t loc = gimple_location (stmt); | |
1845 | tree arg0_type = TREE_TYPE (arg0); | |
1846 | /* Use ptr_type_node (no TBAA) for the arg2_type. | |
1847 | FIXME: (Richard) "A proper fix would be to transition this type as | |
1848 | seen from the frontend to GIMPLE, for example in a similar way we | |
1849 | do for MEM_REFs by piggy-backing that on an extra argument, a | |
1850 | constant zero pointer of the alias pointer type to use (which would | |
1851 | also serve as a type indicator of the store itself). I'd use a | |
1852 | target specific internal function for this (not sure if we can have | |
1853 | those target specific, but I guess if it's folded away then that's | |
1854 | fine) and get away with the overload set." */ | |
1855 | tree arg2_type = ptr_type_node; | |
1856 | /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create | |
1857 | the tree using the value from arg0. The resulting type will match | |
1858 | the type of arg2. */ | |
1859 | gimple_seq stmts = NULL; | |
1860 | tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1); | |
1861 | tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR, | |
1862 | arg2_type, arg2, temp_offset); | |
1863 | /* Mask off any lower bits from the address. */ | |
1864 | tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR, | |
1865 | arg2_type, temp_addr, | |
1866 | build_int_cst (arg2_type, -16)); | |
1867 | gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); | |
1868 | if (!is_gimple_mem_ref_addr (aligned_addr)) | |
1869 | { | |
1870 | tree t = make_ssa_name (TREE_TYPE (aligned_addr)); | |
1871 | gimple *g = gimple_build_assign (t, aligned_addr); | |
1872 | gsi_insert_before (gsi, g, GSI_SAME_STMT); | |
1873 | aligned_addr = t; | |
1874 | } | |
1875 | /* The desired gimple result should be similar to: | |
1876 | MEM[(__vector floatD.1407 *)_1] = vf1D.2697; */ | |
1877 | gimple *g | |
1878 | = gimple_build_assign (build2 (MEM_REF, arg0_type, aligned_addr, | |
1879 | build_int_cst (arg2_type, 0)), arg0); | |
1880 | gimple_set_location (g, loc); | |
1881 | gsi_replace (gsi, g, true); | |
1882 | return true; | |
1883 | } | |
1884 | ||
1885 | /* unaligned Vector loads. */ | |
1886 | case RS6000_BIF_LXVW4X_V16QI: | |
1887 | case RS6000_BIF_LXVW4X_V8HI: | |
1888 | case RS6000_BIF_LXVW4X_V4SF: | |
1889 | case RS6000_BIF_LXVW4X_V4SI: | |
1890 | case RS6000_BIF_LXVD2X_V2DF: | |
1891 | case RS6000_BIF_LXVD2X_V2DI: | |
1892 | { | |
1893 | arg0 = gimple_call_arg (stmt, 0); // offset | |
1894 | arg1 = gimple_call_arg (stmt, 1); // address | |
1895 | lhs = gimple_call_lhs (stmt); | |
1896 | location_t loc = gimple_location (stmt); | |
1897 | /* Since arg1 may be cast to a different type, just use ptr_type_node | |
1898 | here instead of trying to enforce TBAA on pointer types. */ | |
1899 | tree arg1_type = ptr_type_node; | |
1900 | tree lhs_type = TREE_TYPE (lhs); | |
1901 | /* In GIMPLE the type of the MEM_REF specifies the alignment. The | |
1902 | required alignment (power) is 4 bytes regardless of data type. */ | |
1903 | tree align_ltype = build_aligned_type (lhs_type, 4); | |
1904 | /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create | |
1905 | the tree using the value from arg0. The resulting type will match | |
1906 | the type of arg1. */ | |
1907 | gimple_seq stmts = NULL; | |
1908 | tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0); | |
1909 | tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR, | |
1910 | arg1_type, arg1, temp_offset); | |
1911 | gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); | |
1912 | if (!is_gimple_mem_ref_addr (temp_addr)) | |
1913 | { | |
1914 | tree t = make_ssa_name (TREE_TYPE (temp_addr)); | |
1915 | gimple *g = gimple_build_assign (t, temp_addr); | |
1916 | gsi_insert_before (gsi, g, GSI_SAME_STMT); | |
1917 | temp_addr = t; | |
1918 | } | |
1919 | /* Use the build2 helper to set up the mem_ref. The MEM_REF could also | |
1920 | take an offset, but since we've already incorporated the offset | |
1921 | above, here we just pass in a zero. */ | |
1922 | gimple *g; | |
1923 | g = gimple_build_assign (lhs, build2 (MEM_REF, align_ltype, temp_addr, | |
1924 | build_int_cst (arg1_type, 0))); | |
1925 | gimple_set_location (g, loc); | |
1926 | gsi_replace (gsi, g, true); | |
1927 | return true; | |
1928 | } | |
1929 | ||
1930 | /* unaligned Vector stores. */ | |
1931 | case RS6000_BIF_STXVW4X_V16QI: | |
1932 | case RS6000_BIF_STXVW4X_V8HI: | |
1933 | case RS6000_BIF_STXVW4X_V4SF: | |
1934 | case RS6000_BIF_STXVW4X_V4SI: | |
1935 | case RS6000_BIF_STXVD2X_V2DF: | |
1936 | case RS6000_BIF_STXVD2X_V2DI: | |
1937 | { | |
1938 | arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */ | |
1939 | arg1 = gimple_call_arg (stmt, 1); /* Offset. */ | |
1940 | tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */ | |
1941 | location_t loc = gimple_location (stmt); | |
1942 | tree arg0_type = TREE_TYPE (arg0); | |
1943 | /* Use ptr_type_node (no TBAA) for the arg2_type. */ | |
1944 | tree arg2_type = ptr_type_node; | |
1945 | /* In GIMPLE the type of the MEM_REF specifies the alignment. The | |
1946 | required alignment (power) is 4 bytes regardless of data type. */ | |
1947 | tree align_stype = build_aligned_type (arg0_type, 4); | |
1948 | /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create | |
1949 | the tree using the value from arg1. */ | |
1950 | gimple_seq stmts = NULL; | |
1951 | tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1); | |
1952 | tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR, | |
1953 | arg2_type, arg2, temp_offset); | |
1954 | gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); | |
1955 | if (!is_gimple_mem_ref_addr (temp_addr)) | |
1956 | { | |
1957 | tree t = make_ssa_name (TREE_TYPE (temp_addr)); | |
1958 | gimple *g = gimple_build_assign (t, temp_addr); | |
1959 | gsi_insert_before (gsi, g, GSI_SAME_STMT); | |
1960 | temp_addr = t; | |
1961 | } | |
1962 | gimple *g; | |
1963 | g = gimple_build_assign (build2 (MEM_REF, align_stype, temp_addr, | |
1964 | build_int_cst (arg2_type, 0)), arg0); | |
1965 | gimple_set_location (g, loc); | |
1966 | gsi_replace (gsi, g, true); | |
1967 | return true; | |
1968 | } | |
1969 | ||
1970 | /* Vector Fused multiply-add (fma). */ | |
1971 | case RS6000_BIF_VMADDFP: | |
1972 | case RS6000_BIF_XVMADDDP: | |
1973 | case RS6000_BIF_XVMADDSP: | |
1974 | case RS6000_BIF_VMLADDUHM: | |
1975 | { | |
1976 | arg0 = gimple_call_arg (stmt, 0); | |
1977 | arg1 = gimple_call_arg (stmt, 1); | |
1978 | tree arg2 = gimple_call_arg (stmt, 2); | |
1979 | lhs = gimple_call_lhs (stmt); | |
1980 | gcall *g = gimple_build_call_internal (IFN_FMA, 3, arg0, arg1, arg2); | |
1981 | gimple_call_set_lhs (g, lhs); | |
1982 | gimple_call_set_nothrow (g, true); | |
1983 | gimple_set_location (g, gimple_location (stmt)); | |
1984 | gsi_replace (gsi, g, true); | |
1985 | return true; | |
1986 | } | |
1987 | ||
1988 | /* Vector compares; EQ, NE, GE, GT, LE. */ | |
1989 | case RS6000_BIF_VCMPEQUB: | |
1990 | case RS6000_BIF_VCMPEQUH: | |
1991 | case RS6000_BIF_VCMPEQUW: | |
1992 | case RS6000_BIF_VCMPEQUD: | |
8d1c6e70 | 1993 | case RS6000_BIF_VCMPEQUT: |
eecee223 BS |
1994 | fold_compare_helper (gsi, EQ_EXPR, stmt); |
1995 | return true; | |
1996 | ||
1997 | case RS6000_BIF_VCMPNEB: | |
1998 | case RS6000_BIF_VCMPNEH: | |
1999 | case RS6000_BIF_VCMPNEW: | |
8d1c6e70 | 2000 | case RS6000_BIF_VCMPNET: |
eecee223 BS |
2001 | fold_compare_helper (gsi, NE_EXPR, stmt); |
2002 | return true; | |
2003 | ||
2004 | case RS6000_BIF_CMPGE_16QI: | |
2005 | case RS6000_BIF_CMPGE_U16QI: | |
2006 | case RS6000_BIF_CMPGE_8HI: | |
2007 | case RS6000_BIF_CMPGE_U8HI: | |
2008 | case RS6000_BIF_CMPGE_4SI: | |
2009 | case RS6000_BIF_CMPGE_U4SI: | |
2010 | case RS6000_BIF_CMPGE_2DI: | |
2011 | case RS6000_BIF_CMPGE_U2DI: | |
8d1c6e70 HG |
2012 | case RS6000_BIF_CMPGE_1TI: |
2013 | case RS6000_BIF_CMPGE_U1TI: | |
eecee223 BS |
2014 | fold_compare_helper (gsi, GE_EXPR, stmt); |
2015 | return true; | |
2016 | ||
2017 | case RS6000_BIF_VCMPGTSB: | |
2018 | case RS6000_BIF_VCMPGTUB: | |
2019 | case RS6000_BIF_VCMPGTSH: | |
2020 | case RS6000_BIF_VCMPGTUH: | |
2021 | case RS6000_BIF_VCMPGTSW: | |
2022 | case RS6000_BIF_VCMPGTUW: | |
2023 | case RS6000_BIF_VCMPGTUD: | |
2024 | case RS6000_BIF_VCMPGTSD: | |
8d1c6e70 HG |
2025 | case RS6000_BIF_VCMPGTUT: |
2026 | case RS6000_BIF_VCMPGTST: | |
eecee223 BS |
2027 | fold_compare_helper (gsi, GT_EXPR, stmt); |
2028 | return true; | |
2029 | ||
2030 | case RS6000_BIF_CMPLE_16QI: | |
2031 | case RS6000_BIF_CMPLE_U16QI: | |
2032 | case RS6000_BIF_CMPLE_8HI: | |
2033 | case RS6000_BIF_CMPLE_U8HI: | |
2034 | case RS6000_BIF_CMPLE_4SI: | |
2035 | case RS6000_BIF_CMPLE_U4SI: | |
2036 | case RS6000_BIF_CMPLE_2DI: | |
2037 | case RS6000_BIF_CMPLE_U2DI: | |
8d1c6e70 HG |
2038 | case RS6000_BIF_CMPLE_1TI: |
2039 | case RS6000_BIF_CMPLE_U1TI: | |
eecee223 BS |
2040 | fold_compare_helper (gsi, LE_EXPR, stmt); |
2041 | return true; | |
2042 | ||
2043 | /* flavors of vec_splat_[us]{8,16,32}. */ | |
2044 | case RS6000_BIF_VSPLTISB: | |
2045 | case RS6000_BIF_VSPLTISH: | |
2046 | case RS6000_BIF_VSPLTISW: | |
2047 | { | |
2048 | arg0 = gimple_call_arg (stmt, 0); | |
2049 | lhs = gimple_call_lhs (stmt); | |
2050 | ||
2051 | /* Only fold the vec_splat_*() if the lower bits of arg 0 is a | |
2052 | 5-bit signed constant in range -16 to +15. */ | |
2053 | if (TREE_CODE (arg0) != INTEGER_CST | |
2054 | || !IN_RANGE (TREE_INT_CST_LOW (arg0), -16, 15)) | |
2055 | return false; | |
2056 | gimple_seq stmts = NULL; | |
2057 | location_t loc = gimple_location (stmt); | |
2058 | tree splat_value = gimple_convert (&stmts, loc, | |
2059 | TREE_TYPE (TREE_TYPE (lhs)), arg0); | |
2060 | gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); | |
2061 | tree splat_tree = build_vector_from_val (TREE_TYPE (lhs), splat_value); | |
2062 | g = gimple_build_assign (lhs, splat_tree); | |
2063 | gimple_set_location (g, gimple_location (stmt)); | |
2064 | gsi_replace (gsi, g, true); | |
2065 | return true; | |
2066 | } | |
2067 | ||
2068 | /* Flavors of vec_splat. */ | |
2069 | /* a = vec_splat (b, 0x3) becomes a = { b[3],b[3],b[3],...}; */ | |
2070 | case RS6000_BIF_VSPLTB: | |
2071 | case RS6000_BIF_VSPLTH: | |
2072 | case RS6000_BIF_VSPLTW: | |
2073 | case RS6000_BIF_XXSPLTD_V2DI: | |
2074 | case RS6000_BIF_XXSPLTD_V2DF: | |
2075 | { | |
2076 | arg0 = gimple_call_arg (stmt, 0); /* input vector. */ | |
2077 | arg1 = gimple_call_arg (stmt, 1); /* index into arg0. */ | |
2078 | /* Only fold the vec_splat_*() if arg1 is both a constant value and | |
2079 | is a valid index into the arg0 vector. */ | |
2080 | unsigned int n_elts = VECTOR_CST_NELTS (arg0); | |
2081 | if (TREE_CODE (arg1) != INTEGER_CST | |
2082 | || TREE_INT_CST_LOW (arg1) > (n_elts -1)) | |
2083 | return false; | |
2084 | lhs = gimple_call_lhs (stmt); | |
2085 | tree lhs_type = TREE_TYPE (lhs); | |
2086 | tree arg0_type = TREE_TYPE (arg0); | |
2087 | tree splat; | |
2088 | if (TREE_CODE (arg0) == VECTOR_CST) | |
2089 | splat = VECTOR_CST_ELT (arg0, TREE_INT_CST_LOW (arg1)); | |
2090 | else | |
2091 | { | |
2092 | /* Determine (in bits) the length and start location of the | |
2093 | splat value for a call to the tree_vec_extract helper. */ | |
2094 | int splat_elem_size = TREE_INT_CST_LOW (size_in_bytes (arg0_type)) | |
2095 | * BITS_PER_UNIT / n_elts; | |
2096 | int splat_start_bit = TREE_INT_CST_LOW (arg1) * splat_elem_size; | |
2097 | tree len = build_int_cst (bitsizetype, splat_elem_size); | |
2098 | tree start = build_int_cst (bitsizetype, splat_start_bit); | |
2099 | splat = tree_vec_extract (gsi, TREE_TYPE (lhs_type), arg0, | |
2100 | len, start); | |
2101 | } | |
2102 | /* And finally, build the new vector. */ | |
2103 | tree splat_tree = build_vector_from_val (lhs_type, splat); | |
2104 | g = gimple_build_assign (lhs, splat_tree); | |
2105 | gimple_set_location (g, gimple_location (stmt)); | |
2106 | gsi_replace (gsi, g, true); | |
2107 | return true; | |
2108 | } | |
2109 | ||
2110 | /* vec_mergel (integrals). */ | |
2111 | case RS6000_BIF_VMRGLH: | |
2112 | case RS6000_BIF_VMRGLW: | |
2113 | case RS6000_BIF_XXMRGLW_4SI: | |
2114 | case RS6000_BIF_VMRGLB: | |
2115 | case RS6000_BIF_VEC_MERGEL_V2DI: | |
2116 | case RS6000_BIF_XXMRGLW_4SF: | |
2117 | case RS6000_BIF_VEC_MERGEL_V2DF: | |
2118 | fold_mergehl_helper (gsi, stmt, 1); | |
2119 | return true; | |
2120 | /* vec_mergeh (integrals). */ | |
2121 | case RS6000_BIF_VMRGHH: | |
2122 | case RS6000_BIF_VMRGHW: | |
2123 | case RS6000_BIF_XXMRGHW_4SI: | |
2124 | case RS6000_BIF_VMRGHB: | |
2125 | case RS6000_BIF_VEC_MERGEH_V2DI: | |
2126 | case RS6000_BIF_XXMRGHW_4SF: | |
2127 | case RS6000_BIF_VEC_MERGEH_V2DF: | |
2128 | fold_mergehl_helper (gsi, stmt, 0); | |
2129 | return true; | |
2130 | ||
2131 | /* Flavors of vec_mergee. */ | |
2132 | case RS6000_BIF_VMRGEW_V4SI: | |
2133 | case RS6000_BIF_VMRGEW_V2DI: | |
2134 | case RS6000_BIF_VMRGEW_V4SF: | |
2135 | case RS6000_BIF_VMRGEW_V2DF: | |
2136 | fold_mergeeo_helper (gsi, stmt, 0); | |
2137 | return true; | |
2138 | /* Flavors of vec_mergeo. */ | |
2139 | case RS6000_BIF_VMRGOW_V4SI: | |
2140 | case RS6000_BIF_VMRGOW_V2DI: | |
2141 | case RS6000_BIF_VMRGOW_V4SF: | |
2142 | case RS6000_BIF_VMRGOW_V2DF: | |
2143 | fold_mergeeo_helper (gsi, stmt, 1); | |
2144 | return true; | |
2145 | ||
2146 | /* d = vec_pack (a, b) */ | |
2147 | case RS6000_BIF_VPKUDUM: | |
2148 | case RS6000_BIF_VPKUHUM: | |
2149 | case RS6000_BIF_VPKUWUM: | |
2150 | { | |
2151 | arg0 = gimple_call_arg (stmt, 0); | |
2152 | arg1 = gimple_call_arg (stmt, 1); | |
2153 | lhs = gimple_call_lhs (stmt); | |
2154 | gimple *g = gimple_build_assign (lhs, VEC_PACK_TRUNC_EXPR, arg0, arg1); | |
2155 | gimple_set_location (g, gimple_location (stmt)); | |
2156 | gsi_replace (gsi, g, true); | |
2157 | return true; | |
2158 | } | |
2159 | ||
2160 | /* d = vec_unpackh (a) */ | |
2161 | /* Note that the UNPACK_{HI,LO}_EXPR used in the gimple_build_assign call | |
2162 | in this code is sensitive to endian-ness, and needs to be inverted to | |
2163 | handle both LE and BE targets. */ | |
2164 | case RS6000_BIF_VUPKHSB: | |
2165 | case RS6000_BIF_VUPKHSH: | |
2166 | case RS6000_BIF_VUPKHSW: | |
2167 | { | |
2168 | arg0 = gimple_call_arg (stmt, 0); | |
2169 | lhs = gimple_call_lhs (stmt); | |
2170 | if (BYTES_BIG_ENDIAN) | |
2171 | g = gimple_build_assign (lhs, VEC_UNPACK_HI_EXPR, arg0); | |
2172 | else | |
2173 | g = gimple_build_assign (lhs, VEC_UNPACK_LO_EXPR, arg0); | |
2174 | gimple_set_location (g, gimple_location (stmt)); | |
2175 | gsi_replace (gsi, g, true); | |
2176 | return true; | |
2177 | } | |
2178 | /* d = vec_unpackl (a) */ | |
2179 | case RS6000_BIF_VUPKLSB: | |
2180 | case RS6000_BIF_VUPKLSH: | |
2181 | case RS6000_BIF_VUPKLSW: | |
2182 | { | |
2183 | arg0 = gimple_call_arg (stmt, 0); | |
2184 | lhs = gimple_call_lhs (stmt); | |
2185 | if (BYTES_BIG_ENDIAN) | |
2186 | g = gimple_build_assign (lhs, VEC_UNPACK_LO_EXPR, arg0); | |
2187 | else | |
2188 | g = gimple_build_assign (lhs, VEC_UNPACK_HI_EXPR, arg0); | |
2189 | gimple_set_location (g, gimple_location (stmt)); | |
2190 | gsi_replace (gsi, g, true); | |
2191 | return true; | |
2192 | } | |
2193 | /* There is no gimple type corresponding with pixel, so just return. */ | |
2194 | case RS6000_BIF_VUPKHPX: | |
2195 | case RS6000_BIF_VUPKLPX: | |
2196 | return false; | |
2197 | ||
2198 | /* vec_perm. */ | |
2199 | case RS6000_BIF_VPERM_16QI: | |
2200 | case RS6000_BIF_VPERM_8HI: | |
2201 | case RS6000_BIF_VPERM_4SI: | |
2202 | case RS6000_BIF_VPERM_2DI: | |
2203 | case RS6000_BIF_VPERM_4SF: | |
2204 | case RS6000_BIF_VPERM_2DF: | |
2205 | case RS6000_BIF_VPERM_16QI_UNS: | |
2206 | case RS6000_BIF_VPERM_8HI_UNS: | |
2207 | case RS6000_BIF_VPERM_4SI_UNS: | |
2208 | case RS6000_BIF_VPERM_2DI_UNS: | |
2209 | { | |
2210 | arg0 = gimple_call_arg (stmt, 0); | |
2211 | arg1 = gimple_call_arg (stmt, 1); | |
2212 | tree permute = gimple_call_arg (stmt, 2); | |
2213 | lhs = gimple_call_lhs (stmt); | |
2214 | location_t loc = gimple_location (stmt); | |
2215 | gimple_seq stmts = NULL; | |
2216 | // convert arg0 and arg1 to match the type of the permute | |
2217 | // for the VEC_PERM_EXPR operation. | |
2218 | tree permute_type = (TREE_TYPE (permute)); | |
2219 | tree arg0_ptype = gimple_build (&stmts, loc, VIEW_CONVERT_EXPR, | |
2220 | permute_type, arg0); | |
2221 | tree arg1_ptype = gimple_build (&stmts, loc, VIEW_CONVERT_EXPR, | |
2222 | permute_type, arg1); | |
2223 | tree lhs_ptype = gimple_build (&stmts, loc, VEC_PERM_EXPR, | |
2224 | permute_type, arg0_ptype, arg1_ptype, | |
2225 | permute); | |
2226 | // Convert the result back to the desired lhs type upon completion. | |
2227 | tree temp = gimple_build (&stmts, loc, VIEW_CONVERT_EXPR, | |
2228 | TREE_TYPE (lhs), lhs_ptype); | |
2229 | gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); | |
2230 | g = gimple_build_assign (lhs, temp); | |
2231 | gimple_set_location (g, loc); | |
2232 | gsi_replace (gsi, g, true); | |
2233 | return true; | |
2234 | } | |
2235 | ||
2236 | default: | |
2237 | if (TARGET_DEBUG_BUILTIN) | |
2238 | fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n", | |
2239 | fn_code, fn_name1, fn_name2); | |
2240 | break; | |
2241 | } | |
2242 | ||
2243 | return false; | |
2244 | } | |
2245 | ||
2246 | /* **** Expansion support **** */ | |
2247 | ||
2248 | static rtx | |
2249 | altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target) | |
2250 | { | |
2251 | rtx pat, scratch; | |
2252 | tree cr6_form = CALL_EXPR_ARG (exp, 0); | |
2253 | tree arg0 = CALL_EXPR_ARG (exp, 1); | |
2254 | tree arg1 = CALL_EXPR_ARG (exp, 2); | |
2255 | rtx op0 = expand_normal (arg0); | |
2256 | rtx op1 = expand_normal (arg1); | |
2257 | machine_mode tmode = SImode; | |
2258 | machine_mode mode0 = insn_data[icode].operand[1].mode; | |
2259 | machine_mode mode1 = insn_data[icode].operand[2].mode; | |
2260 | int cr6_form_int; | |
2261 | ||
2262 | if (TREE_CODE (cr6_form) != INTEGER_CST) | |
2263 | { | |
2264 | error ("argument 1 of %qs must be a constant", | |
2265 | "__builtin_altivec_predicate"); | |
2266 | return const0_rtx; | |
2267 | } | |
2268 | else | |
2269 | cr6_form_int = TREE_INT_CST_LOW (cr6_form); | |
2270 | ||
2271 | gcc_assert (mode0 == mode1); | |
2272 | ||
2273 | /* If we have invalid arguments, bail out before generating bad rtl. */ | |
2274 | if (arg0 == error_mark_node || arg1 == error_mark_node) | |
2275 | return const0_rtx; | |
2276 | ||
2277 | if (target == 0 | |
2278 | || GET_MODE (target) != tmode | |
2279 | || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) | |
2280 | target = gen_reg_rtx (tmode); | |
2281 | ||
2282 | if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) | |
2283 | op0 = copy_to_mode_reg (mode0, op0); | |
2284 | if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) | |
2285 | op1 = copy_to_mode_reg (mode1, op1); | |
2286 | ||
2287 | /* Note that for many of the relevant operations (e.g. cmpne or | |
2288 | cmpeq) with float or double operands, it makes more sense for the | |
2289 | mode of the allocated scratch register to select a vector of | |
2290 | integer. But the choice to copy the mode of operand 0 was made | |
2291 | long ago and there are no plans to change it. */ | |
2292 | scratch = gen_reg_rtx (mode0); | |
2293 | ||
2294 | pat = GEN_FCN (icode) (scratch, op0, op1); | |
2295 | if (! pat) | |
2296 | return 0; | |
2297 | emit_insn (pat); | |
2298 | ||
2299 | /* The vec_any* and vec_all* predicates use the same opcodes for two | |
2300 | different operations, but the bits in CR6 will be different | |
2301 | depending on what information we want. So we have to play tricks | |
2302 | with CR6 to get the right bits out. | |
2303 | ||
2304 | If you think this is disgusting, look at the specs for the | |
2305 | AltiVec predicates. */ | |
2306 | ||
2307 | switch (cr6_form_int) | |
2308 | { | |
2309 | case 0: | |
2310 | emit_insn (gen_cr6_test_for_zero (target)); | |
2311 | break; | |
2312 | case 1: | |
2313 | emit_insn (gen_cr6_test_for_zero_reverse (target)); | |
2314 | break; | |
2315 | case 2: | |
2316 | emit_insn (gen_cr6_test_for_lt (target)); | |
2317 | break; | |
2318 | case 3: | |
2319 | emit_insn (gen_cr6_test_for_lt_reverse (target)); | |
2320 | break; | |
2321 | default: | |
2322 | error ("argument 1 of %qs is out of range", | |
2323 | "__builtin_altivec_predicate"); | |
2324 | break; | |
2325 | } | |
2326 | ||
2327 | return target; | |
2328 | } | |
2329 | ||
2330 | /* Expand vec_init builtin. */ | |
2331 | static rtx | |
2332 | altivec_expand_vec_init_builtin (tree type, tree exp, rtx target) | |
2333 | { | |
2334 | machine_mode tmode = TYPE_MODE (type); | |
2335 | machine_mode inner_mode = GET_MODE_INNER (tmode); | |
2336 | int i, n_elt = GET_MODE_NUNITS (tmode); | |
2337 | ||
2338 | gcc_assert (VECTOR_MODE_P (tmode)); | |
2339 | gcc_assert (n_elt == call_expr_nargs (exp)); | |
2340 | ||
2341 | if (!target || !register_operand (target, tmode)) | |
2342 | target = gen_reg_rtx (tmode); | |
2343 | ||
2344 | /* If we have a vector compromised of a single element, such as V1TImode, do | |
2345 | the initialization directly. */ | |
2346 | if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode)) | |
2347 | { | |
2348 | rtx x = expand_normal (CALL_EXPR_ARG (exp, 0)); | |
2349 | emit_move_insn (target, gen_lowpart (tmode, x)); | |
2350 | } | |
2351 | else | |
2352 | { | |
2353 | rtvec v = rtvec_alloc (n_elt); | |
2354 | ||
2355 | for (i = 0; i < n_elt; ++i) | |
2356 | { | |
2357 | rtx x = expand_normal (CALL_EXPR_ARG (exp, i)); | |
2358 | RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x); | |
2359 | } | |
2360 | ||
2361 | rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v)); | |
2362 | } | |
2363 | ||
2364 | return target; | |
2365 | } | |
2366 | ||
2367 | /* Return the integer constant in ARG. Constrain it to be in the range | |
2368 | of the subparts of VEC_TYPE; issue an error if not. */ | |
2369 | ||
2370 | static int | |
2371 | get_element_number (tree vec_type, tree arg) | |
2372 | { | |
2373 | unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1; | |
2374 | ||
2375 | if (!tree_fits_uhwi_p (arg) | |
2376 | || (elt = tree_to_uhwi (arg), elt > max)) | |
2377 | { | |
2378 | error ("selector must be an integer constant in the range [0, %wi]", max); | |
2379 | return 0; | |
2380 | } | |
2381 | ||
2382 | return elt; | |
2383 | } | |
2384 | ||
2385 | /* Expand vec_set builtin. */ | |
2386 | static rtx | |
2387 | altivec_expand_vec_set_builtin (tree exp) | |
2388 | { | |
2389 | machine_mode tmode, mode1; | |
2390 | tree arg0, arg1, arg2; | |
2391 | int elt; | |
2392 | rtx op0, op1; | |
2393 | ||
2394 | arg0 = CALL_EXPR_ARG (exp, 0); | |
2395 | arg1 = CALL_EXPR_ARG (exp, 1); | |
2396 | arg2 = CALL_EXPR_ARG (exp, 2); | |
2397 | ||
2398 | tmode = TYPE_MODE (TREE_TYPE (arg0)); | |
2399 | mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); | |
2400 | gcc_assert (VECTOR_MODE_P (tmode)); | |
2401 | ||
2402 | op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL); | |
2403 | op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL); | |
2404 | elt = get_element_number (TREE_TYPE (arg0), arg2); | |
2405 | ||
2406 | if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode) | |
2407 | op1 = convert_modes (mode1, GET_MODE (op1), op1, true); | |
2408 | ||
2409 | op0 = force_reg (tmode, op0); | |
2410 | op1 = force_reg (mode1, op1); | |
2411 | ||
2412 | rs6000_expand_vector_set (op0, op1, GEN_INT (elt)); | |
2413 | ||
2414 | return op0; | |
2415 | } | |
2416 | ||
2417 | /* Expand vec_ext builtin. */ | |
2418 | static rtx | |
2419 | altivec_expand_vec_ext_builtin (tree exp, rtx target) | |
2420 | { | |
2421 | machine_mode tmode, mode0; | |
2422 | tree arg0, arg1; | |
2423 | rtx op0; | |
2424 | rtx op1; | |
2425 | ||
2426 | arg0 = CALL_EXPR_ARG (exp, 0); | |
2427 | arg1 = CALL_EXPR_ARG (exp, 1); | |
2428 | ||
2429 | op0 = expand_normal (arg0); | |
2430 | op1 = expand_normal (arg1); | |
2431 | ||
2432 | if (TREE_CODE (arg1) == INTEGER_CST) | |
2433 | { | |
2434 | unsigned HOST_WIDE_INT elt; | |
2435 | unsigned HOST_WIDE_INT size = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); | |
2436 | unsigned int truncated_selector; | |
2437 | /* Even if !tree_fits_uhwi_p (arg1)), TREE_INT_CST_LOW (arg0) | |
2438 | returns low-order bits of INTEGER_CST for modulo indexing. */ | |
2439 | elt = TREE_INT_CST_LOW (arg1); | |
2440 | truncated_selector = elt % size; | |
2441 | op1 = GEN_INT (truncated_selector); | |
2442 | } | |
2443 | ||
2444 | tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); | |
2445 | mode0 = TYPE_MODE (TREE_TYPE (arg0)); | |
2446 | gcc_assert (VECTOR_MODE_P (mode0)); | |
2447 | ||
2448 | op0 = force_reg (mode0, op0); | |
2449 | ||
2450 | if (optimize || !target || !register_operand (target, tmode)) | |
2451 | target = gen_reg_rtx (tmode); | |
2452 | ||
2453 | rs6000_expand_vector_extract (target, op0, op1); | |
2454 | ||
2455 | return target; | |
2456 | } | |
2457 | ||
2458 | /* Expand ALTIVEC_BUILTIN_MASK_FOR_LOAD. */ | |
2459 | rtx | |
2460 | rs6000_expand_ldst_mask (rtx target, tree arg0) | |
2461 | { | |
2462 | int icode2 = BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct | |
2463 | : (int) CODE_FOR_altivec_lvsl_direct; | |
2464 | machine_mode tmode = insn_data[icode2].operand[0].mode; | |
2465 | machine_mode mode = insn_data[icode2].operand[1].mode; | |
2466 | ||
2467 | gcc_assert (TARGET_ALTIVEC); | |
2468 | ||
2469 | gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg0))); | |
2470 | rtx op = expand_expr (arg0, NULL_RTX, Pmode, EXPAND_NORMAL); | |
2471 | rtx addr = memory_address (mode, op); | |
2472 | /* We need to negate the address. */ | |
2473 | op = gen_reg_rtx (GET_MODE (addr)); | |
2474 | emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr))); | |
2475 | op = gen_rtx_MEM (mode, op); | |
2476 | ||
2477 | if (target == 0 | |
2478 | || GET_MODE (target) != tmode | |
2479 | || !insn_data[icode2].operand[0].predicate (target, tmode)) | |
2480 | target = gen_reg_rtx (tmode); | |
2481 | ||
2482 | rtx pat = GEN_FCN (icode2) (target, op); | |
2483 | if (!pat) | |
2484 | return 0; | |
2485 | emit_insn (pat); | |
2486 | ||
2487 | return target; | |
2488 | } | |
2489 | ||
2490 | /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */ | |
2491 | static const struct | |
2492 | { | |
2493 | const char *cpu; | |
2494 | unsigned int cpuid; | |
2495 | } cpu_is_info[] = { | |
2496 | { "power10", PPC_PLATFORM_POWER10 }, | |
2497 | { "power9", PPC_PLATFORM_POWER9 }, | |
2498 | { "power8", PPC_PLATFORM_POWER8 }, | |
2499 | { "power7", PPC_PLATFORM_POWER7 }, | |
2500 | { "power6x", PPC_PLATFORM_POWER6X }, | |
2501 | { "power6", PPC_PLATFORM_POWER6 }, | |
2502 | { "power5+", PPC_PLATFORM_POWER5_PLUS }, | |
2503 | { "power5", PPC_PLATFORM_POWER5 }, | |
2504 | { "ppc970", PPC_PLATFORM_PPC970 }, | |
2505 | { "power4", PPC_PLATFORM_POWER4 }, | |
2506 | { "ppca2", PPC_PLATFORM_PPCA2 }, | |
2507 | { "ppc476", PPC_PLATFORM_PPC476 }, | |
2508 | { "ppc464", PPC_PLATFORM_PPC464 }, | |
2509 | { "ppc440", PPC_PLATFORM_PPC440 }, | |
2510 | { "ppc405", PPC_PLATFORM_PPC405 }, | |
2511 | { "ppc-cell-be", PPC_PLATFORM_CELL_BE } | |
2512 | }; | |
2513 | ||
2514 | /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */ | |
2515 | static const struct | |
2516 | { | |
2517 | const char *hwcap; | |
2518 | int mask; | |
2519 | unsigned int id; | |
2520 | } cpu_supports_info[] = { | |
2521 | /* AT_HWCAP masks. */ | |
2522 | { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 }, | |
2523 | { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 }, | |
2524 | { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 }, | |
2525 | { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 }, | |
2526 | { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 }, | |
2527 | { "booke", PPC_FEATURE_BOOKE, 0 }, | |
2528 | { "cellbe", PPC_FEATURE_CELL_BE, 0 }, | |
2529 | { "dfp", PPC_FEATURE_HAS_DFP, 0 }, | |
2530 | { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 }, | |
2531 | { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 }, | |
2532 | { "fpu", PPC_FEATURE_HAS_FPU, 0 }, | |
2533 | { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 }, | |
2534 | { "mmu", PPC_FEATURE_HAS_MMU, 0 }, | |
2535 | { "notb", PPC_FEATURE_NO_TB, 0 }, | |
2536 | { "pa6t", PPC_FEATURE_PA6T, 0 }, | |
2537 | { "power4", PPC_FEATURE_POWER4, 0 }, | |
2538 | { "power5", PPC_FEATURE_POWER5, 0 }, | |
2539 | { "power5+", PPC_FEATURE_POWER5_PLUS, 0 }, | |
2540 | { "power6x", PPC_FEATURE_POWER6_EXT, 0 }, | |
2541 | { "ppc32", PPC_FEATURE_32, 0 }, | |
2542 | { "ppc601", PPC_FEATURE_601_INSTR, 0 }, | |
2543 | { "ppc64", PPC_FEATURE_64, 0 }, | |
2544 | { "ppcle", PPC_FEATURE_PPC_LE, 0 }, | |
2545 | { "smt", PPC_FEATURE_SMT, 0 }, | |
2546 | { "spe", PPC_FEATURE_HAS_SPE, 0 }, | |
2547 | { "true_le", PPC_FEATURE_TRUE_LE, 0 }, | |
2548 | { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 }, | |
2549 | { "vsx", PPC_FEATURE_HAS_VSX, 0 }, | |
2550 | ||
2551 | /* AT_HWCAP2 masks. */ | |
2552 | { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 }, | |
2553 | { "dscr", PPC_FEATURE2_HAS_DSCR, 1 }, | |
2554 | { "ebb", PPC_FEATURE2_HAS_EBB, 1 }, | |
2555 | { "htm", PPC_FEATURE2_HAS_HTM, 1 }, | |
2556 | { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 }, | |
2557 | { "htm-no-suspend", PPC_FEATURE2_HTM_NO_SUSPEND, 1 }, | |
2558 | { "isel", PPC_FEATURE2_HAS_ISEL, 1 }, | |
2559 | { "tar", PPC_FEATURE2_HAS_TAR, 1 }, | |
2560 | { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 }, | |
2561 | { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 }, | |
2562 | { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 }, | |
2563 | { "darn", PPC_FEATURE2_DARN, 1 }, | |
2564 | { "scv", PPC_FEATURE2_SCV, 1 }, | |
2565 | { "arch_3_1", PPC_FEATURE2_ARCH_3_1, 1 }, | |
2566 | { "mma", PPC_FEATURE2_MMA, 1 }, | |
2567 | }; | |
2568 | ||
2569 | /* Expand the CPU builtin in FCODE and store the result in TARGET. */ | |
2570 | static rtx | |
2571 | cpu_expand_builtin (enum rs6000_gen_builtins fcode, | |
2572 | tree exp ATTRIBUTE_UNUSED, rtx target) | |
2573 | { | |
2574 | /* __builtin_cpu_init () is a nop, so expand to nothing. */ | |
2575 | if (fcode == RS6000_BIF_CPU_INIT) | |
2576 | return const0_rtx; | |
2577 | ||
2578 | if (target == 0 || GET_MODE (target) != SImode) | |
2579 | target = gen_reg_rtx (SImode); | |
2580 | ||
2581 | /* TODO: Factor the #ifdef'd code into a separate function. */ | |
2582 | #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB | |
2583 | tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0); | |
2584 | /* Target clones creates an ARRAY_REF instead of STRING_CST, convert it back | |
2585 | to a STRING_CST. */ | |
2586 | if (TREE_CODE (arg) == ARRAY_REF | |
2587 | && TREE_CODE (TREE_OPERAND (arg, 0)) == STRING_CST | |
2588 | && TREE_CODE (TREE_OPERAND (arg, 1)) == INTEGER_CST | |
2589 | && compare_tree_int (TREE_OPERAND (arg, 1), 0) == 0) | |
2590 | arg = TREE_OPERAND (arg, 0); | |
2591 | ||
2592 | if (TREE_CODE (arg) != STRING_CST) | |
2593 | { | |
2594 | error ("builtin %qs only accepts a string argument", | |
2595 | rs6000_builtin_info[(size_t) fcode].bifname); | |
2596 | return const0_rtx; | |
2597 | } | |
2598 | ||
2599 | if (fcode == RS6000_BIF_CPU_IS) | |
2600 | { | |
2601 | const char *cpu = TREE_STRING_POINTER (arg); | |
2602 | rtx cpuid = NULL_RTX; | |
2603 | for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++) | |
2604 | if (strcmp (cpu, cpu_is_info[i].cpu) == 0) | |
2605 | { | |
2606 | /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */ | |
2607 | cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM); | |
2608 | break; | |
2609 | } | |
2610 | if (cpuid == NULL_RTX) | |
2611 | { | |
2612 | /* Invalid CPU argument. */ | |
2613 | error ("cpu %qs is an invalid argument to builtin %qs", | |
2614 | cpu, rs6000_builtin_info[(size_t) fcode].bifname); | |
2615 | return const0_rtx; | |
2616 | } | |
2617 | ||
2618 | rtx platform = gen_reg_rtx (SImode); | |
2619 | rtx address = gen_rtx_PLUS (Pmode, | |
2620 | gen_rtx_REG (Pmode, TLS_REGNUM), | |
2621 | GEN_INT (TCB_PLATFORM_OFFSET)); | |
2622 | rtx tcbmem = gen_const_mem (SImode, address); | |
2623 | emit_move_insn (platform, tcbmem); | |
2624 | emit_insn (gen_eqsi3 (target, platform, cpuid)); | |
2625 | } | |
2626 | else if (fcode == RS6000_BIF_CPU_SUPPORTS) | |
2627 | { | |
2628 | const char *hwcap = TREE_STRING_POINTER (arg); | |
2629 | rtx mask = NULL_RTX; | |
2630 | int hwcap_offset; | |
2631 | for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++) | |
2632 | if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0) | |
2633 | { | |
2634 | mask = GEN_INT (cpu_supports_info[i].mask); | |
2635 | hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id); | |
2636 | break; | |
2637 | } | |
2638 | if (mask == NULL_RTX) | |
2639 | { | |
2640 | /* Invalid HWCAP argument. */ | |
2641 | error ("%s %qs is an invalid argument to builtin %qs", | |
2642 | "hwcap", hwcap, | |
2643 | rs6000_builtin_info[(size_t) fcode].bifname); | |
2644 | return const0_rtx; | |
2645 | } | |
2646 | ||
2647 | rtx tcb_hwcap = gen_reg_rtx (SImode); | |
2648 | rtx address = gen_rtx_PLUS (Pmode, | |
2649 | gen_rtx_REG (Pmode, TLS_REGNUM), | |
2650 | GEN_INT (hwcap_offset)); | |
2651 | rtx tcbmem = gen_const_mem (SImode, address); | |
2652 | emit_move_insn (tcb_hwcap, tcbmem); | |
2653 | rtx scratch1 = gen_reg_rtx (SImode); | |
2654 | emit_insn (gen_rtx_SET (scratch1, | |
2655 | gen_rtx_AND (SImode, tcb_hwcap, mask))); | |
2656 | rtx scratch2 = gen_reg_rtx (SImode); | |
2657 | emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx)); | |
2658 | emit_insn (gen_rtx_SET (target, | |
2659 | gen_rtx_XOR (SImode, scratch2, const1_rtx))); | |
2660 | } | |
2661 | else | |
2662 | gcc_unreachable (); | |
2663 | ||
2664 | /* Record that we have expanded a CPU builtin, so that we can later | |
2665 | emit a reference to the special symbol exported by LIBC to ensure we | |
2666 | do not link against an old LIBC that doesn't support this feature. */ | |
2667 | cpu_builtin_p = true; | |
2668 | ||
2669 | #else | |
2670 | warning (0, "builtin %qs needs GLIBC (2.23 and newer) that exports hardware " | |
2671 | "capability bits", rs6000_builtin_info[(size_t) fcode].bifname); | |
2672 | ||
2673 | /* For old LIBCs, always return FALSE. */ | |
2674 | emit_move_insn (target, GEN_INT (0)); | |
2675 | #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */ | |
2676 | ||
2677 | return target; | |
2678 | } | |
2679 | ||
2680 | /* For the element-reversing load/store built-ins, produce the correct | |
2681 | insn_code depending on the target endianness. */ | |
2682 | static insn_code | |
2683 | elemrev_icode (rs6000_gen_builtins fcode) | |
2684 | { | |
2685 | switch (fcode) | |
2686 | { | |
2687 | case RS6000_BIF_ST_ELEMREV_V1TI: | |
2688 | return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v1ti | |
2689 | : CODE_FOR_vsx_st_elemrev_v1ti; | |
2690 | ||
2691 | case RS6000_BIF_ST_ELEMREV_V2DF: | |
2692 | return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df | |
2693 | : CODE_FOR_vsx_st_elemrev_v2df; | |
2694 | ||
2695 | case RS6000_BIF_ST_ELEMREV_V2DI: | |
2696 | return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di | |
2697 | : CODE_FOR_vsx_st_elemrev_v2di; | |
2698 | ||
2699 | case RS6000_BIF_ST_ELEMREV_V4SF: | |
2700 | return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf | |
2701 | : CODE_FOR_vsx_st_elemrev_v4sf; | |
2702 | ||
2703 | case RS6000_BIF_ST_ELEMREV_V4SI: | |
2704 | return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si | |
2705 | : CODE_FOR_vsx_st_elemrev_v4si; | |
2706 | ||
2707 | case RS6000_BIF_ST_ELEMREV_V8HI: | |
2708 | return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi | |
2709 | : CODE_FOR_vsx_st_elemrev_v8hi; | |
2710 | ||
2711 | case RS6000_BIF_ST_ELEMREV_V16QI: | |
2712 | return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi | |
2713 | : CODE_FOR_vsx_st_elemrev_v16qi; | |
2714 | ||
2715 | case RS6000_BIF_LD_ELEMREV_V2DF: | |
2716 | return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df | |
2717 | : CODE_FOR_vsx_ld_elemrev_v2df; | |
2718 | ||
2719 | case RS6000_BIF_LD_ELEMREV_V1TI: | |
2720 | return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v1ti | |
2721 | : CODE_FOR_vsx_ld_elemrev_v1ti; | |
2722 | ||
2723 | case RS6000_BIF_LD_ELEMREV_V2DI: | |
2724 | return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di | |
2725 | : CODE_FOR_vsx_ld_elemrev_v2di; | |
2726 | ||
2727 | case RS6000_BIF_LD_ELEMREV_V4SF: | |
2728 | return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf | |
2729 | : CODE_FOR_vsx_ld_elemrev_v4sf; | |
2730 | ||
2731 | case RS6000_BIF_LD_ELEMREV_V4SI: | |
2732 | return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si | |
2733 | : CODE_FOR_vsx_ld_elemrev_v4si; | |
2734 | ||
2735 | case RS6000_BIF_LD_ELEMREV_V8HI: | |
2736 | return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi | |
2737 | : CODE_FOR_vsx_ld_elemrev_v8hi; | |
2738 | ||
2739 | case RS6000_BIF_LD_ELEMREV_V16QI: | |
2740 | return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi | |
2741 | : CODE_FOR_vsx_ld_elemrev_v16qi; | |
2742 | default: | |
2743 | ; | |
2744 | } | |
2745 | ||
2746 | gcc_unreachable (); | |
2747 | } | |
2748 | ||
2749 | /* Expand an AltiVec vector load builtin, and return the expanded rtx. */ | |
2750 | static rtx | |
2751 | ldv_expand_builtin (rtx target, insn_code icode, rtx *op, machine_mode tmode) | |
2752 | { | |
2753 | if (target == 0 | |
2754 | || GET_MODE (target) != tmode | |
2755 | || !insn_data[icode].operand[0].predicate (target, tmode)) | |
2756 | target = gen_reg_rtx (tmode); | |
2757 | ||
2758 | op[1] = copy_to_mode_reg (Pmode, op[1]); | |
2759 | ||
2760 | /* These CELL built-ins use BLKmode instead of tmode for historical | |
2761 | (i.e., unknown) reasons. TODO: Is this necessary? */ | |
2762 | bool blk = (icode == CODE_FOR_altivec_lvlx | |
2763 | || icode == CODE_FOR_altivec_lvlxl | |
2764 | || icode == CODE_FOR_altivec_lvrx | |
2765 | || icode == CODE_FOR_altivec_lvrxl); | |
2766 | ||
2767 | /* For LVX, express the RTL accurately by ANDing the address with -16. | |
2768 | LVXL and LVE*X expand to use UNSPECs to hide their special behavior, | |
2769 | so the raw address is fine. */ | |
2770 | /* TODO: That statement seems wrong, as the UNSPECs don't surround the | |
2771 | memory expression, so a latent bug may lie here. The &-16 is likely | |
2772 | needed for all VMX-style loads. */ | |
2773 | if (icode == CODE_FOR_altivec_lvx_v1ti | |
2774 | || icode == CODE_FOR_altivec_lvx_v2df | |
2775 | || icode == CODE_FOR_altivec_lvx_v2di | |
2776 | || icode == CODE_FOR_altivec_lvx_v4sf | |
2777 | || icode == CODE_FOR_altivec_lvx_v4si | |
2778 | || icode == CODE_FOR_altivec_lvx_v8hi | |
2779 | || icode == CODE_FOR_altivec_lvx_v16qi) | |
2780 | { | |
2781 | rtx rawaddr; | |
2782 | if (op[0] == const0_rtx) | |
2783 | rawaddr = op[1]; | |
2784 | else | |
2785 | { | |
2786 | op[0] = copy_to_mode_reg (Pmode, op[0]); | |
2787 | rawaddr = gen_rtx_PLUS (Pmode, op[1], op[0]); | |
2788 | } | |
2789 | rtx addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16)); | |
2790 | addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr); | |
2791 | ||
2792 | emit_insn (gen_rtx_SET (target, addr)); | |
2793 | } | |
2794 | else | |
2795 | { | |
2796 | rtx addr; | |
2797 | if (op[0] == const0_rtx) | |
2798 | addr = gen_rtx_MEM (blk ? BLKmode : tmode, op[1]); | |
2799 | else | |
2800 | { | |
2801 | op[0] = copy_to_mode_reg (Pmode, op[0]); | |
2802 | addr = gen_rtx_MEM (blk ? BLKmode : tmode, | |
2803 | gen_rtx_PLUS (Pmode, op[1], op[0])); | |
2804 | } | |
2805 | ||
2806 | rtx pat = GEN_FCN (icode) (target, addr); | |
2807 | if (!pat) | |
2808 | return 0; | |
2809 | emit_insn (pat); | |
2810 | } | |
2811 | ||
2812 | return target; | |
2813 | } | |
2814 | ||
2815 | /* Expand a builtin function that loads a scalar into a vector register | |
2816 | with sign extension, and return the expanded rtx. */ | |
2817 | static rtx | |
2818 | lxvrse_expand_builtin (rtx target, insn_code icode, rtx *op, | |
2819 | machine_mode tmode, machine_mode smode) | |
2820 | { | |
2821 | rtx pat, addr; | |
2822 | op[1] = copy_to_mode_reg (Pmode, op[1]); | |
2823 | ||
2824 | if (op[0] == const0_rtx) | |
2825 | addr = gen_rtx_MEM (tmode, op[1]); | |
2826 | else | |
2827 | { | |
2828 | op[0] = copy_to_mode_reg (Pmode, op[0]); | |
2829 | addr = gen_rtx_MEM (smode, | |
2830 | gen_rtx_PLUS (Pmode, op[1], op[0])); | |
2831 | } | |
2832 | ||
2833 | rtx discratch = gen_reg_rtx (V2DImode); | |
2834 | rtx tiscratch = gen_reg_rtx (TImode); | |
2835 | ||
2836 | /* Emit the lxvr*x insn. */ | |
2837 | pat = GEN_FCN (icode) (tiscratch, addr); | |
2838 | if (!pat) | |
2839 | return 0; | |
2840 | emit_insn (pat); | |
2841 | ||
2842 | /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI. */ | |
2843 | rtx temp1; | |
2844 | if (icode == CODE_FOR_vsx_lxvrbx) | |
2845 | { | |
2846 | temp1 = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0); | |
a213e2c9 | 2847 | emit_insn (gen_vsx_sign_extend_v16qi_v2di (discratch, temp1)); |
eecee223 BS |
2848 | } |
2849 | else if (icode == CODE_FOR_vsx_lxvrhx) | |
2850 | { | |
2851 | temp1 = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0); | |
a213e2c9 | 2852 | emit_insn (gen_vsx_sign_extend_v8hi_v2di (discratch, temp1)); |
eecee223 BS |
2853 | } |
2854 | else if (icode == CODE_FOR_vsx_lxvrwx) | |
2855 | { | |
2856 | temp1 = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0); | |
a213e2c9 | 2857 | emit_insn (gen_vsx_sign_extend_v4si_v2di (discratch, temp1)); |
eecee223 BS |
2858 | } |
2859 | else if (icode == CODE_FOR_vsx_lxvrdx) | |
2860 | discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0); | |
2861 | else | |
2862 | gcc_unreachable (); | |
2863 | ||
2864 | /* Emit the sign extension from V2DI (double) to TI (quad). */ | |
2865 | rtx temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0); | |
2866 | emit_insn (gen_extendditi2_vector (target, temp2)); | |
2867 | ||
2868 | return target; | |
2869 | } | |
2870 | ||
2871 | /* Expand a builtin function that loads a scalar into a vector register | |
2872 | with zero extension, and return the expanded rtx. */ | |
2873 | static rtx | |
2874 | lxvrze_expand_builtin (rtx target, insn_code icode, rtx *op, | |
2875 | machine_mode tmode, machine_mode smode) | |
2876 | { | |
2877 | rtx pat, addr; | |
2878 | op[1] = copy_to_mode_reg (Pmode, op[1]); | |
2879 | ||
2880 | if (op[0] == const0_rtx) | |
2881 | addr = gen_rtx_MEM (tmode, op[1]); | |
2882 | else | |
2883 | { | |
2884 | op[0] = copy_to_mode_reg (Pmode, op[0]); | |
2885 | addr = gen_rtx_MEM (smode, | |
2886 | gen_rtx_PLUS (Pmode, op[1], op[0])); | |
2887 | } | |
2888 | ||
2889 | pat = GEN_FCN (icode) (target, addr); | |
2890 | if (!pat) | |
2891 | return 0; | |
2892 | emit_insn (pat); | |
2893 | return target; | |
2894 | } | |
2895 | ||
2896 | /* Expand an AltiVec vector store builtin, and return the expanded rtx. */ | |
2897 | static rtx | |
2898 | stv_expand_builtin (insn_code icode, rtx *op, | |
2899 | machine_mode tmode, machine_mode smode) | |
2900 | { | |
2901 | op[2] = copy_to_mode_reg (Pmode, op[2]); | |
2902 | ||
2903 | /* For STVX, express the RTL accurately by ANDing the address with -16. | |
2904 | STVXL and STVE*X expand to use UNSPECs to hide their special behavior, | |
2905 | so the raw address is fine. */ | |
2906 | /* TODO: That statement seems wrong, as the UNSPECs don't surround the | |
2907 | memory expression, so a latent bug may lie here. The &-16 is likely | |
2908 | needed for all VMX-style stores. */ | |
2909 | if (icode == CODE_FOR_altivec_stvx_v2df | |
2910 | || icode == CODE_FOR_altivec_stvx_v2di | |
2911 | || icode == CODE_FOR_altivec_stvx_v4sf | |
2912 | || icode == CODE_FOR_altivec_stvx_v4si | |
2913 | || icode == CODE_FOR_altivec_stvx_v8hi | |
2914 | || icode == CODE_FOR_altivec_stvx_v16qi) | |
2915 | { | |
2916 | rtx rawaddr; | |
2917 | if (op[1] == const0_rtx) | |
2918 | rawaddr = op[2]; | |
2919 | else | |
2920 | { | |
2921 | op[1] = copy_to_mode_reg (Pmode, op[1]); | |
2922 | rawaddr = gen_rtx_PLUS (Pmode, op[2], op[1]); | |
2923 | } | |
2924 | ||
2925 | rtx addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16)); | |
2926 | addr = gen_rtx_MEM (tmode, addr); | |
2927 | op[0] = copy_to_mode_reg (tmode, op[0]); | |
2928 | emit_insn (gen_rtx_SET (addr, op[0])); | |
2929 | } | |
2930 | else if (icode == CODE_FOR_vsx_stxvrbx | |
2931 | || icode == CODE_FOR_vsx_stxvrhx | |
2932 | || icode == CODE_FOR_vsx_stxvrwx | |
2933 | || icode == CODE_FOR_vsx_stxvrdx) | |
2934 | { | |
2935 | rtx truncrtx = gen_rtx_TRUNCATE (tmode, op[0]); | |
2936 | op[0] = copy_to_mode_reg (E_TImode, truncrtx); | |
2937 | ||
2938 | rtx addr; | |
2939 | if (op[1] == const0_rtx) | |
fbd50e86 | 2940 | addr = gen_rtx_MEM (tmode, op[2]); |
eecee223 BS |
2941 | else |
2942 | { | |
2943 | op[1] = copy_to_mode_reg (Pmode, op[1]); | |
2944 | addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op[2], op[1])); | |
2945 | } | |
2946 | rtx pat = GEN_FCN (icode) (addr, op[0]); | |
2947 | if (pat) | |
2948 | emit_insn (pat); | |
2949 | } | |
2950 | else | |
2951 | { | |
2952 | if (!insn_data[icode].operand[1].predicate (op[0], smode)) | |
2953 | op[0] = copy_to_mode_reg (smode, op[0]); | |
2954 | ||
2955 | rtx addr; | |
2956 | if (op[1] == const0_rtx) | |
2957 | addr = gen_rtx_MEM (tmode, op[2]); | |
2958 | else | |
2959 | { | |
2960 | op[1] = copy_to_mode_reg (Pmode, op[1]); | |
2961 | addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op[2], op[1])); | |
2962 | } | |
2963 | ||
2964 | rtx pat = GEN_FCN (icode) (addr, op[0]); | |
2965 | if (pat) | |
2966 | emit_insn (pat); | |
2967 | } | |
2968 | ||
2969 | return NULL_RTX; | |
2970 | } | |
2971 | ||
2972 | /* Expand the MMA built-in in EXP, and return it. */ | |
2973 | static rtx | |
2974 | mma_expand_builtin (tree exp, rtx target, insn_code icode, | |
2975 | rs6000_gen_builtins fcode) | |
2976 | { | |
2977 | tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); | |
2978 | bool void_func = TREE_TYPE (TREE_TYPE (fndecl)) == void_type_node; | |
2979 | machine_mode tmode = VOIDmode; | |
2980 | rtx op[MAX_MMA_OPERANDS]; | |
2981 | unsigned nopnds = 0; | |
2982 | ||
2983 | if (!void_func) | |
2984 | { | |
2985 | tmode = insn_data[icode].operand[0].mode; | |
2986 | if (!(target | |
2987 | && GET_MODE (target) == tmode | |
2988 | && insn_data[icode].operand[0].predicate (target, tmode))) | |
2989 | target = gen_reg_rtx (tmode); | |
2990 | op[nopnds++] = target; | |
2991 | } | |
2992 | else | |
2993 | target = const0_rtx; | |
2994 | ||
2995 | call_expr_arg_iterator iter; | |
2996 | tree arg; | |
2997 | FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) | |
2998 | { | |
2999 | if (arg == error_mark_node) | |
3000 | return const0_rtx; | |
3001 | ||
3002 | rtx opnd; | |
3003 | const struct insn_operand_data *insn_op; | |
3004 | insn_op = &insn_data[icode].operand[nopnds]; | |
3005 | if (TREE_CODE (arg) == ADDR_EXPR | |
3006 | && MEM_P (DECL_RTL (TREE_OPERAND (arg, 0)))) | |
3007 | opnd = DECL_RTL (TREE_OPERAND (arg, 0)); | |
3008 | else | |
3009 | opnd = expand_normal (arg); | |
3010 | ||
3011 | if (!insn_op->predicate (opnd, insn_op->mode)) | |
3012 | { | |
3013 | /* TODO: This use of constraints needs explanation. */ | |
3014 | if (!strcmp (insn_op->constraint, "n")) | |
3015 | { | |
3016 | if (!CONST_INT_P (opnd)) | |
3017 | error ("argument %d must be an unsigned literal", nopnds); | |
3018 | else | |
3019 | error ("argument %d is an unsigned literal that is " | |
3020 | "out of range", nopnds); | |
3021 | return const0_rtx; | |
3022 | } | |
3023 | opnd = copy_to_mode_reg (insn_op->mode, opnd); | |
3024 | } | |
3025 | ||
3026 | /* Some MMA instructions have INOUT accumulator operands, so force | |
3027 | their target register to be the same as their input register. */ | |
3028 | if (!void_func | |
3029 | && nopnds == 1 | |
3030 | && !strcmp (insn_op->constraint, "0") | |
3031 | && insn_op->mode == tmode | |
3032 | && REG_P (opnd) | |
3033 | && insn_data[icode].operand[0].predicate (opnd, tmode)) | |
3034 | target = op[0] = opnd; | |
3035 | ||
3036 | op[nopnds++] = opnd; | |
3037 | } | |
3038 | ||
3039 | rtx pat; | |
3040 | switch (nopnds) | |
3041 | { | |
3042 | case 1: | |
3043 | pat = GEN_FCN (icode) (op[0]); | |
3044 | break; | |
3045 | case 2: | |
3046 | pat = GEN_FCN (icode) (op[0], op[1]); | |
3047 | break; | |
3048 | case 3: | |
3049 | /* The ASSEMBLE builtin source operands are reversed in little-endian | |
3050 | mode, so reorder them. */ | |
3051 | if (fcode == RS6000_BIF_ASSEMBLE_PAIR_V_INTERNAL && !WORDS_BIG_ENDIAN) | |
3052 | std::swap (op[1], op[2]); | |
3053 | pat = GEN_FCN (icode) (op[0], op[1], op[2]); | |
3054 | break; | |
3055 | case 4: | |
3056 | pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]); | |
3057 | break; | |
3058 | case 5: | |
3059 | /* The ASSEMBLE builtin source operands are reversed in little-endian | |
3060 | mode, so reorder them. */ | |
3061 | if (fcode == RS6000_BIF_ASSEMBLE_ACC_INTERNAL && !WORDS_BIG_ENDIAN) | |
3062 | { | |
3063 | std::swap (op[1], op[4]); | |
3064 | std::swap (op[2], op[3]); | |
3065 | } | |
3066 | pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]); | |
3067 | break; | |
3068 | case 6: | |
3069 | pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]); | |
3070 | break; | |
3071 | case 7: | |
3072 | pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5], op[6]); | |
3073 | break; | |
3074 | default: | |
3075 | gcc_unreachable (); | |
3076 | } | |
3077 | ||
3078 | if (!pat) | |
3079 | return NULL_RTX; | |
3080 | ||
3081 | emit_insn (pat); | |
3082 | return target; | |
3083 | } | |
3084 | ||
3085 | /* Return the correct ICODE value depending on whether we are | |
3086 | setting or reading the HTM SPRs. */ | |
3087 | static inline enum insn_code | |
3088 | rs6000_htm_spr_icode (bool nonvoid) | |
3089 | { | |
3090 | if (nonvoid) | |
3091 | return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si; | |
3092 | else | |
3093 | return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si; | |
3094 | } | |
3095 | ||
3096 | /* Return the appropriate SPR number associated with the given builtin. */ | |
3097 | static inline HOST_WIDE_INT | |
3098 | htm_spr_num (enum rs6000_gen_builtins code) | |
3099 | { | |
3100 | if (code == RS6000_BIF_GET_TFHAR | |
3101 | || code == RS6000_BIF_SET_TFHAR) | |
3102 | return TFHAR_SPR; | |
3103 | else if (code == RS6000_BIF_GET_TFIAR | |
3104 | || code == RS6000_BIF_SET_TFIAR) | |
3105 | return TFIAR_SPR; | |
3106 | else if (code == RS6000_BIF_GET_TEXASR | |
3107 | || code == RS6000_BIF_SET_TEXASR) | |
3108 | return TEXASR_SPR; | |
3109 | gcc_assert (code == RS6000_BIF_GET_TEXASRU | |
3110 | || code == RS6000_BIF_SET_TEXASRU); | |
3111 | return TEXASRU_SPR; | |
3112 | } | |
3113 | ||
3114 | /* Expand the HTM builtin in EXP and store the result in TARGET. | |
3115 | Return the expanded rtx. */ | |
3116 | static rtx | |
3117 | htm_expand_builtin (bifdata *bifaddr, rs6000_gen_builtins fcode, | |
3118 | tree exp, rtx target) | |
3119 | { | |
3120 | if (!TARGET_POWERPC64 | |
3121 | && (fcode == RS6000_BIF_TABORTDC | |
3122 | || fcode == RS6000_BIF_TABORTDCI)) | |
3123 | { | |
3124 | error ("builtin %qs is only valid in 64-bit mode", bifaddr->bifname); | |
3125 | return const0_rtx; | |
3126 | } | |
3127 | ||
3128 | tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); | |
3129 | bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node; | |
3130 | bool uses_spr = bif_is_htmspr (*bifaddr); | |
3131 | insn_code icode = bifaddr->icode; | |
3132 | ||
3133 | if (uses_spr) | |
3134 | icode = rs6000_htm_spr_icode (nonvoid); | |
3135 | ||
3136 | rtx op[MAX_HTM_OPERANDS]; | |
3137 | int nopnds = 0; | |
3138 | const insn_operand_data *insn_op = &insn_data[icode].operand[0]; | |
3139 | ||
3140 | if (nonvoid) | |
3141 | { | |
3142 | machine_mode tmode = (uses_spr) ? insn_op->mode : E_SImode; | |
3143 | if (!target | |
3144 | || GET_MODE (target) != tmode | |
3145 | || (uses_spr && !insn_op->predicate (target, tmode))) | |
3146 | target = gen_reg_rtx (tmode); | |
3147 | if (uses_spr) | |
3148 | op[nopnds++] = target; | |
3149 | } | |
3150 | ||
3151 | tree arg; | |
3152 | call_expr_arg_iterator iter; | |
3153 | ||
3154 | FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) | |
3155 | { | |
3156 | if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS) | |
3157 | return const0_rtx; | |
3158 | ||
3159 | insn_op = &insn_data[icode].operand[nopnds]; | |
3160 | op[nopnds] = expand_normal (arg); | |
3161 | ||
3162 | if (!insn_op->predicate (op[nopnds], insn_op->mode)) | |
3163 | { | |
3164 | /* TODO: This use of constraints could use explanation. | |
3165 | This happens a couple of places, perhaps make that a | |
3166 | function to document what's happening. */ | |
3167 | if (!strcmp (insn_op->constraint, "n")) | |
3168 | { | |
3169 | int arg_num = nonvoid ? nopnds : nopnds + 1; | |
3170 | if (!CONST_INT_P (op[nopnds])) | |
3171 | error ("argument %d must be an unsigned literal", arg_num); | |
3172 | else | |
3173 | error ("argument %d is an unsigned literal that is " | |
3174 | "out of range", arg_num); | |
3175 | return const0_rtx; | |
3176 | } | |
3177 | op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]); | |
3178 | } | |
3179 | ||
3180 | nopnds++; | |
3181 | } | |
3182 | ||
3183 | /* Handle the builtins for extended mnemonics. These accept | |
3184 | no arguments, but map to builtins that take arguments. */ | |
3185 | switch (fcode) | |
3186 | { | |
3187 | case RS6000_BIF_TENDALL: /* Alias for: tend. 1 */ | |
3188 | case RS6000_BIF_TRESUME: /* Alias for: tsr. 1 */ | |
3189 | op[nopnds++] = GEN_INT (1); | |
3190 | break; | |
3191 | case RS6000_BIF_TSUSPEND: /* Alias for: tsr. 0 */ | |
3192 | op[nopnds++] = GEN_INT (0); | |
3193 | break; | |
3194 | default: | |
3195 | break; | |
3196 | } | |
3197 | ||
3198 | /* If this builtin accesses SPRs, then pass in the appropriate | |
3199 | SPR number and SPR regno as the last two operands. */ | |
3200 | rtx cr = NULL_RTX; | |
3201 | if (uses_spr) | |
3202 | { | |
3203 | machine_mode mode = TARGET_POWERPC64 ? DImode : SImode; | |
3204 | op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode)); | |
3205 | } | |
3206 | /* If this builtin accesses a CR field, then pass in a scratch | |
3207 | CR field as the last operand. */ | |
3208 | else if (bif_is_htmcr (*bifaddr)) | |
3209 | { | |
3210 | cr = gen_reg_rtx (CCmode); | |
3211 | op[nopnds++] = cr; | |
3212 | } | |
3213 | ||
3214 | rtx pat; | |
3215 | switch (nopnds) | |
3216 | { | |
3217 | case 1: | |
3218 | pat = GEN_FCN (icode) (op[0]); | |
3219 | break; | |
3220 | case 2: | |
3221 | pat = GEN_FCN (icode) (op[0], op[1]); | |
3222 | break; | |
3223 | case 3: | |
3224 | pat = GEN_FCN (icode) (op[0], op[1], op[2]); | |
3225 | break; | |
3226 | case 4: | |
3227 | pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]); | |
3228 | break; | |
3229 | default: | |
3230 | gcc_unreachable (); | |
3231 | } | |
3232 | if (!pat) | |
3233 | return NULL_RTX; | |
3234 | emit_insn (pat); | |
3235 | ||
3236 | if (bif_is_htmcr (*bifaddr)) | |
3237 | { | |
3238 | if (fcode == RS6000_BIF_TBEGIN) | |
3239 | { | |
3240 | /* Emit code to set TARGET to true or false depending on | |
3241 | whether the tbegin. instruction succeeded or failed | |
3242 | to start a transaction. We do this by placing the 1's | |
3243 | complement of CR's EQ bit into TARGET. */ | |
3244 | rtx scratch = gen_reg_rtx (SImode); | |
3245 | emit_insn (gen_rtx_SET (scratch, | |
3246 | gen_rtx_EQ (SImode, cr, | |
3247 | const0_rtx))); | |
3248 | emit_insn (gen_rtx_SET (target, | |
3249 | gen_rtx_XOR (SImode, scratch, | |
3250 | GEN_INT (1)))); | |
3251 | } | |
3252 | else | |
3253 | { | |
3254 | /* Emit code to copy the 4-bit condition register field | |
3255 | CR into the least significant end of register TARGET. */ | |
3256 | rtx scratch1 = gen_reg_rtx (SImode); | |
3257 | rtx scratch2 = gen_reg_rtx (SImode); | |
3258 | rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0); | |
3259 | emit_insn (gen_movcc (subreg, cr)); | |
3260 | emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28))); | |
3261 | emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf))); | |
3262 | } | |
3263 | } | |
3264 | ||
3265 | if (nonvoid) | |
3266 | return target; | |
3267 | return const0_rtx; | |
3268 | } | |
3269 | ||
3270 | /* Expand an expression EXP that calls a built-in function, | |
3271 | with result going to TARGET if that's convenient | |
3272 | (and in mode MODE if that's convenient). | |
3273 | SUBTARGET may be used as the target for computing one of EXP's operands. | |
3274 | IGNORE is nonzero if the value is to be ignored. | |
3275 | Use the new builtin infrastructure. */ | |
3276 | rtx | |
3277 | rs6000_expand_builtin (tree exp, rtx target, rtx /* subtarget */, | |
3278 | machine_mode /* mode */, int ignore) | |
3279 | { | |
3280 | tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); | |
3281 | enum rs6000_gen_builtins fcode | |
3282 | = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl); | |
94504c9a KL |
3283 | |
3284 | /* Emit error message if it's an unresolved overloaded builtin. */ | |
3285 | if (fcode > RS6000_OVLD_NONE) | |
3286 | { | |
3287 | error ("unresolved overload for builtin %qF", fndecl); | |
3288 | return const0_rtx; | |
3289 | } | |
3290 | ||
eecee223 BS |
3291 | size_t uns_fcode = (size_t)fcode; |
3292 | enum insn_code icode = rs6000_builtin_info[uns_fcode].icode; | |
3293 | ||
3294 | /* TODO: The following commentary and code is inherited from the original | |
3295 | builtin processing code. The commentary is a bit confusing, with the | |
3296 | intent being that KFmode is always IEEE-128, IFmode is always IBM | |
3297 | double-double, and TFmode is the current long double. The code is | |
3298 | confusing in that it converts from KFmode to TFmode pattern names, | |
3299 | when the other direction is more intuitive. Try to address this. */ | |
3300 | ||
3301 | /* We have two different modes (KFmode, TFmode) that are the IEEE | |
3302 | 128-bit floating point type, depending on whether long double is the | |
3303 | IBM extended double (KFmode) or long double is IEEE 128-bit (TFmode). | |
3304 | It is simpler if we only define one variant of the built-in function, | |
3305 | and switch the code when defining it, rather than defining two built- | |
3306 | ins and using the overload table in rs6000-c.cc to switch between the | |
3307 | two. If we don't have the proper assembler, don't do this switch | |
3308 | because CODE_FOR_*kf* and CODE_FOR_*tf* will be CODE_FOR_nothing. */ | |
3309 | if (FLOAT128_IEEE_P (TFmode)) | |
3310 | switch (icode) | |
3311 | { | |
3312 | case CODE_FOR_sqrtkf2_odd: | |
3313 | icode = CODE_FOR_sqrttf2_odd; | |
3314 | break; | |
3315 | case CODE_FOR_trunckfdf2_odd: | |
3316 | icode = CODE_FOR_trunctfdf2_odd; | |
3317 | break; | |
3318 | case CODE_FOR_addkf3_odd: | |
3319 | icode = CODE_FOR_addtf3_odd; | |
3320 | break; | |
3321 | case CODE_FOR_subkf3_odd: | |
3322 | icode = CODE_FOR_subtf3_odd; | |
3323 | break; | |
3324 | case CODE_FOR_mulkf3_odd: | |
3325 | icode = CODE_FOR_multf3_odd; | |
3326 | break; | |
3327 | case CODE_FOR_divkf3_odd: | |
3328 | icode = CODE_FOR_divtf3_odd; | |
3329 | break; | |
3330 | case CODE_FOR_fmakf4_odd: | |
3331 | icode = CODE_FOR_fmatf4_odd; | |
3332 | break; | |
86df278d CL |
3333 | case CODE_FOR_xsxexpqp_kf_di: |
3334 | icode = CODE_FOR_xsxexpqp_tf_di; | |
eecee223 | 3335 | break; |
86df278d CL |
3336 | case CODE_FOR_xsxexpqp_kf_v2di: |
3337 | icode = CODE_FOR_xsxexpqp_tf_v2di; | |
3338 | break; | |
3339 | case CODE_FOR_xsxsigqp_kf_ti: | |
3340 | icode = CODE_FOR_xsxsigqp_tf_ti; | |
3341 | break; | |
3342 | case CODE_FOR_xsxsigqp_kf_v1ti: | |
3343 | icode = CODE_FOR_xsxsigqp_tf_v1ti; | |
eecee223 BS |
3344 | break; |
3345 | case CODE_FOR_xststdcnegqp_kf: | |
3346 | icode = CODE_FOR_xststdcnegqp_tf; | |
3347 | break; | |
86df278d CL |
3348 | case CODE_FOR_xsiexpqp_kf_di: |
3349 | icode = CODE_FOR_xsiexpqp_tf_di; | |
3350 | break; | |
3351 | case CODE_FOR_xsiexpqp_kf_v2di: | |
3352 | icode = CODE_FOR_xsiexpqp_tf_v2di; | |
eecee223 BS |
3353 | break; |
3354 | case CODE_FOR_xsiexpqpf_kf: | |
3355 | icode = CODE_FOR_xsiexpqpf_tf; | |
3356 | break; | |
3357 | case CODE_FOR_xststdcqp_kf: | |
3358 | icode = CODE_FOR_xststdcqp_tf; | |
3359 | break; | |
3360 | case CODE_FOR_xscmpexpqp_eq_kf: | |
3361 | icode = CODE_FOR_xscmpexpqp_eq_tf; | |
3362 | break; | |
3363 | case CODE_FOR_xscmpexpqp_lt_kf: | |
3364 | icode = CODE_FOR_xscmpexpqp_lt_tf; | |
3365 | break; | |
3366 | case CODE_FOR_xscmpexpqp_gt_kf: | |
3367 | icode = CODE_FOR_xscmpexpqp_gt_tf; | |
3368 | break; | |
3369 | case CODE_FOR_xscmpexpqp_unordered_kf: | |
3370 | icode = CODE_FOR_xscmpexpqp_unordered_tf; | |
3371 | break; | |
3372 | default: | |
3373 | break; | |
3374 | } | |
3375 | ||
3376 | /* In case of "#pragma target" changes, we initialize all builtins | |
3377 | but check for actual availability now, during expand time. For | |
3378 | invalid builtins, generate a normal call. */ | |
3379 | bifdata *bifaddr = &rs6000_builtin_info[uns_fcode]; | |
b22086c2 KL |
3380 | |
3381 | if (!rs6000_builtin_is_supported (fcode)) | |
eecee223 BS |
3382 | { |
3383 | rs6000_invalid_builtin (fcode); | |
3384 | return expand_call (exp, target, ignore); | |
3385 | } | |
3386 | ||
3387 | if (bif_is_nosoft (*bifaddr) | |
3388 | && rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT) | |
3389 | { | |
3390 | error ("%qs not supported with %<-msoft-float%>", | |
3391 | bifaddr->bifname); | |
3392 | return const0_rtx; | |
3393 | } | |
3394 | ||
3395 | if (bif_is_no32bit (*bifaddr) && TARGET_32BIT) | |
3396 | { | |
3397 | error ("%qs is not supported in 32-bit mode", bifaddr->bifname); | |
3398 | return const0_rtx; | |
3399 | } | |
3400 | ||
3401 | if (bif_is_ibmld (*bifaddr) && !FLOAT128_2REG_P (TFmode)) | |
3402 | { | |
3403 | error ("%qs requires %<long double%> to be IBM 128-bit format", | |
3404 | bifaddr->bifname); | |
3405 | return const0_rtx; | |
3406 | } | |
3407 | ||
6f8abf2b JJ |
3408 | if (bif_is_ibm128 (*bifaddr) && !ibm128_float_type_node) |
3409 | { | |
3410 | error ("%qs requires %<__ibm128%> type support", | |
3411 | bifaddr->bifname); | |
3412 | return const0_rtx; | |
3413 | } | |
3414 | ||
eecee223 BS |
3415 | if (bif_is_cpu (*bifaddr)) |
3416 | return cpu_expand_builtin (fcode, exp, target); | |
3417 | ||
3418 | if (bif_is_init (*bifaddr)) | |
3419 | return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target); | |
3420 | ||
3421 | if (bif_is_set (*bifaddr)) | |
3422 | return altivec_expand_vec_set_builtin (exp); | |
3423 | ||
3424 | if (bif_is_extract (*bifaddr)) | |
3425 | return altivec_expand_vec_ext_builtin (exp, target); | |
3426 | ||
3427 | if (bif_is_predicate (*bifaddr)) | |
3428 | return altivec_expand_predicate_builtin (icode, exp, target); | |
3429 | ||
3430 | if (bif_is_htm (*bifaddr)) | |
3431 | return htm_expand_builtin (bifaddr, fcode, exp, target); | |
3432 | ||
3433 | if (bif_is_32bit (*bifaddr) && TARGET_32BIT) | |
3434 | { | |
3435 | if (fcode == RS6000_BIF_MFTB) | |
3436 | icode = CODE_FOR_rs6000_mftb_si; | |
3437 | else if (fcode == RS6000_BIF_BPERMD) | |
3438 | icode = CODE_FOR_bpermd_si; | |
3439 | else if (fcode == RS6000_BIF_DARN) | |
3440 | icode = CODE_FOR_darn_64_si; | |
3441 | else if (fcode == RS6000_BIF_DARN_32) | |
3442 | icode = CODE_FOR_darn_32_si; | |
3443 | else if (fcode == RS6000_BIF_DARN_RAW) | |
3444 | icode = CODE_FOR_darn_raw_si; | |
3445 | else | |
3446 | gcc_unreachable (); | |
3447 | } | |
3448 | ||
3449 | if (bif_is_endian (*bifaddr) && BYTES_BIG_ENDIAN) | |
3450 | { | |
3451 | if (fcode == RS6000_BIF_LD_ELEMREV_V1TI) | |
3452 | icode = CODE_FOR_vsx_load_v1ti; | |
3453 | else if (fcode == RS6000_BIF_LD_ELEMREV_V2DF) | |
3454 | icode = CODE_FOR_vsx_load_v2df; | |
3455 | else if (fcode == RS6000_BIF_LD_ELEMREV_V2DI) | |
3456 | icode = CODE_FOR_vsx_load_v2di; | |
3457 | else if (fcode == RS6000_BIF_LD_ELEMREV_V4SF) | |
3458 | icode = CODE_FOR_vsx_load_v4sf; | |
3459 | else if (fcode == RS6000_BIF_LD_ELEMREV_V4SI) | |
3460 | icode = CODE_FOR_vsx_load_v4si; | |
3461 | else if (fcode == RS6000_BIF_LD_ELEMREV_V8HI) | |
3462 | icode = CODE_FOR_vsx_load_v8hi; | |
3463 | else if (fcode == RS6000_BIF_LD_ELEMREV_V16QI) | |
3464 | icode = CODE_FOR_vsx_load_v16qi; | |
3465 | else if (fcode == RS6000_BIF_ST_ELEMREV_V1TI) | |
3466 | icode = CODE_FOR_vsx_store_v1ti; | |
3467 | else if (fcode == RS6000_BIF_ST_ELEMREV_V2DF) | |
3468 | icode = CODE_FOR_vsx_store_v2df; | |
3469 | else if (fcode == RS6000_BIF_ST_ELEMREV_V2DI) | |
3470 | icode = CODE_FOR_vsx_store_v2di; | |
3471 | else if (fcode == RS6000_BIF_ST_ELEMREV_V4SF) | |
3472 | icode = CODE_FOR_vsx_store_v4sf; | |
3473 | else if (fcode == RS6000_BIF_ST_ELEMREV_V4SI) | |
3474 | icode = CODE_FOR_vsx_store_v4si; | |
3475 | else if (fcode == RS6000_BIF_ST_ELEMREV_V8HI) | |
3476 | icode = CODE_FOR_vsx_store_v8hi; | |
3477 | else if (fcode == RS6000_BIF_ST_ELEMREV_V16QI) | |
3478 | icode = CODE_FOR_vsx_store_v16qi; | |
3f30f2d1 BS |
3479 | else if (fcode == RS6000_BIF_VCLZLSBB_V16QI) |
3480 | icode = CODE_FOR_vclzlsbb_v16qi; | |
3481 | else if (fcode == RS6000_BIF_VCLZLSBB_V4SI) | |
3482 | icode = CODE_FOR_vclzlsbb_v4si; | |
3483 | else if (fcode == RS6000_BIF_VCLZLSBB_V8HI) | |
3484 | icode = CODE_FOR_vclzlsbb_v8hi; | |
3485 | else if (fcode == RS6000_BIF_VCTZLSBB_V16QI) | |
3486 | icode = CODE_FOR_vctzlsbb_v16qi; | |
3487 | else if (fcode == RS6000_BIF_VCTZLSBB_V4SI) | |
3488 | icode = CODE_FOR_vctzlsbb_v4si; | |
3489 | else if (fcode == RS6000_BIF_VCTZLSBB_V8HI) | |
3490 | icode = CODE_FOR_vctzlsbb_v8hi; | |
eecee223 BS |
3491 | else |
3492 | gcc_unreachable (); | |
3493 | } | |
3494 | ||
6f8abf2b JJ |
3495 | if (bif_is_ibm128 (*bifaddr) && TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD) |
3496 | { | |
3497 | if (fcode == RS6000_BIF_PACK_IF) | |
3498 | { | |
3499 | icode = CODE_FOR_packtf; | |
3500 | fcode = RS6000_BIF_PACK_TF; | |
3501 | uns_fcode = (size_t) fcode; | |
3502 | } | |
3503 | else if (fcode == RS6000_BIF_UNPACK_IF) | |
3504 | { | |
3505 | icode = CODE_FOR_unpacktf; | |
3506 | fcode = RS6000_BIF_UNPACK_TF; | |
3507 | uns_fcode = (size_t) fcode; | |
3508 | } | |
3509 | } | |
eecee223 BS |
3510 | |
3511 | /* TRUE iff the built-in function returns void. */ | |
3512 | bool void_func = TREE_TYPE (TREE_TYPE (fndecl)) == void_type_node; | |
3513 | /* Position of first argument (0 for void-returning functions, else 1). */ | |
3514 | int k; | |
3515 | /* Modes for the return value, if any, and arguments. */ | |
3516 | const int MAX_BUILTIN_ARGS = 6; | |
3517 | machine_mode mode[MAX_BUILTIN_ARGS + 1]; | |
3518 | ||
3519 | if (void_func) | |
3520 | k = 0; | |
3521 | else | |
3522 | { | |
3523 | k = 1; | |
3524 | mode[0] = insn_data[icode].operand[0].mode; | |
3525 | } | |
3526 | ||
3527 | /* Tree expressions for each argument. */ | |
3528 | tree arg[MAX_BUILTIN_ARGS]; | |
3529 | /* RTL expressions for each argument. */ | |
3530 | rtx op[MAX_BUILTIN_ARGS]; | |
3531 | ||
3532 | int nargs = bifaddr->nargs; | |
3533 | gcc_assert (nargs <= MAX_BUILTIN_ARGS); | |
3534 | ||
3535 | ||
3536 | for (int i = 0; i < nargs; i++) | |
3537 | { | |
3538 | arg[i] = CALL_EXPR_ARG (exp, i); | |
3539 | if (arg[i] == error_mark_node) | |
3540 | return const0_rtx; | |
3541 | STRIP_NOPS (arg[i]); | |
3542 | op[i] = expand_normal (arg[i]); | |
3543 | /* We have a couple of pesky patterns that don't specify the mode... */ | |
3544 | mode[i+k] = insn_data[icode].operand[i+k].mode; | |
3545 | if (!mode[i+k]) | |
3546 | mode[i+k] = Pmode; | |
3547 | } | |
3548 | ||
3549 | /* Check for restricted constant arguments. */ | |
3550 | for (int i = 0; i < 2; i++) | |
3551 | { | |
3552 | switch (bifaddr->restr[i]) | |
3553 | { | |
3554 | case RES_BITS: | |
3555 | { | |
3556 | size_t mask = 1; | |
3557 | mask <<= bifaddr->restr_val1[i]; | |
3558 | mask--; | |
3559 | tree restr_arg = arg[bifaddr->restr_opnd[i] - 1]; | |
3560 | STRIP_NOPS (restr_arg); | |
3561 | if (!(TREE_CODE (restr_arg) == INTEGER_CST | |
3562 | && (TREE_INT_CST_LOW (restr_arg) & ~mask) == 0)) | |
3563 | { | |
3564 | unsigned p = (1U << bifaddr->restr_val1[i]) - 1; | |
3565 | error ("argument %d must be a literal between 0 and %d," | |
3566 | " inclusive", | |
3567 | bifaddr->restr_opnd[i], p); | |
3568 | return CONST0_RTX (mode[0]); | |
3569 | } | |
3570 | break; | |
3571 | } | |
3572 | case RES_RANGE: | |
3573 | { | |
3574 | tree restr_arg = arg[bifaddr->restr_opnd[i] - 1]; | |
3575 | STRIP_NOPS (restr_arg); | |
3576 | if (!(TREE_CODE (restr_arg) == INTEGER_CST | |
3577 | && IN_RANGE (tree_to_shwi (restr_arg), | |
3578 | bifaddr->restr_val1[i], | |
3579 | bifaddr->restr_val2[i]))) | |
3580 | { | |
3581 | error ("argument %d must be a literal between %d and %d," | |
3582 | " inclusive", | |
3583 | bifaddr->restr_opnd[i], bifaddr->restr_val1[i], | |
3584 | bifaddr->restr_val2[i]); | |
3585 | return CONST0_RTX (mode[0]); | |
3586 | } | |
3587 | break; | |
3588 | } | |
3589 | case RES_VAR_RANGE: | |
3590 | { | |
3591 | tree restr_arg = arg[bifaddr->restr_opnd[i] - 1]; | |
3592 | STRIP_NOPS (restr_arg); | |
3593 | if (TREE_CODE (restr_arg) == INTEGER_CST | |
3594 | && !IN_RANGE (tree_to_shwi (restr_arg), | |
3595 | bifaddr->restr_val1[i], | |
3596 | bifaddr->restr_val2[i])) | |
3597 | { | |
3598 | error ("argument %d must be a variable or a literal " | |
3599 | "between %d and %d, inclusive", | |
3600 | bifaddr->restr_opnd[i], bifaddr->restr_val1[i], | |
3601 | bifaddr->restr_val2[i]); | |
3602 | return CONST0_RTX (mode[0]); | |
3603 | } | |
3604 | break; | |
3605 | } | |
3606 | case RES_VALUES: | |
3607 | { | |
3608 | tree restr_arg = arg[bifaddr->restr_opnd[i] - 1]; | |
3609 | STRIP_NOPS (restr_arg); | |
3610 | if (!(TREE_CODE (restr_arg) == INTEGER_CST | |
3611 | && (tree_to_shwi (restr_arg) == bifaddr->restr_val1[i] | |
3612 | || tree_to_shwi (restr_arg) == bifaddr->restr_val2[i]))) | |
3613 | { | |
3614 | error ("argument %d must be either a literal %d or a " | |
3615 | "literal %d", | |
3616 | bifaddr->restr_opnd[i], bifaddr->restr_val1[i], | |
3617 | bifaddr->restr_val2[i]); | |
3618 | return CONST0_RTX (mode[0]); | |
3619 | } | |
3620 | break; | |
3621 | } | |
3622 | default: | |
3623 | case RES_NONE: | |
3624 | break; | |
3625 | } | |
3626 | } | |
3627 | ||
3628 | if (bif_is_ldstmask (*bifaddr)) | |
3629 | return rs6000_expand_ldst_mask (target, arg[0]); | |
3630 | ||
3631 | if (bif_is_stvec (*bifaddr)) | |
3632 | { | |
3633 | if (bif_is_reve (*bifaddr)) | |
3634 | icode = elemrev_icode (fcode); | |
3635 | return stv_expand_builtin (icode, op, mode[0], mode[1]); | |
3636 | } | |
3637 | ||
3638 | if (bif_is_ldvec (*bifaddr)) | |
3639 | { | |
3640 | if (bif_is_reve (*bifaddr)) | |
3641 | icode = elemrev_icode (fcode); | |
3642 | return ldv_expand_builtin (target, icode, op, mode[0]); | |
3643 | } | |
3644 | ||
3645 | if (bif_is_lxvrse (*bifaddr)) | |
3646 | return lxvrse_expand_builtin (target, icode, op, mode[0], mode[1]); | |
3647 | ||
3648 | if (bif_is_lxvrze (*bifaddr)) | |
3649 | return lxvrze_expand_builtin (target, icode, op, mode[0], mode[1]); | |
3650 | ||
3651 | if (bif_is_mma (*bifaddr)) | |
3652 | return mma_expand_builtin (exp, target, icode, fcode); | |
3653 | ||
eecee223 BS |
3654 | if (TREE_TYPE (TREE_TYPE (fndecl)) == void_type_node) |
3655 | target = NULL_RTX; | |
3656 | else if (target == 0 | |
3657 | || GET_MODE (target) != mode[0] | |
3658 | || !insn_data[icode].operand[0].predicate (target, mode[0])) | |
3659 | target = gen_reg_rtx (mode[0]); | |
3660 | ||
3661 | for (int i = 0; i < nargs; i++) | |
3662 | if (!insn_data[icode].operand[i+k].predicate (op[i], mode[i+k])) | |
3663 | op[i] = copy_to_mode_reg (mode[i+k], op[i]); | |
3664 | ||
3665 | rtx pat; | |
3666 | ||
3667 | switch (nargs) | |
3668 | { | |
3669 | case 0: | |
3670 | pat = (void_func | |
3671 | ? GEN_FCN (icode) () | |
3672 | : GEN_FCN (icode) (target)); | |
3673 | break; | |
3674 | case 1: | |
3675 | pat = (void_func | |
3676 | ? GEN_FCN (icode) (op[0]) | |
3677 | : GEN_FCN (icode) (target, op[0])); | |
3678 | break; | |
3679 | case 2: | |
3680 | pat = (void_func | |
3681 | ? GEN_FCN (icode) (op[0], op[1]) | |
3682 | : GEN_FCN (icode) (target, op[0], op[1])); | |
3683 | break; | |
3684 | case 3: | |
3685 | pat = (void_func | |
3686 | ? GEN_FCN (icode) (op[0], op[1], op[2]) | |
3687 | : GEN_FCN (icode) (target, op[0], op[1], op[2])); | |
3688 | break; | |
3689 | case 4: | |
3690 | pat = (void_func | |
3691 | ? GEN_FCN (icode) (op[0], op[1], op[2], op[3]) | |
3692 | : GEN_FCN (icode) (target, op[0], op[1], op[2], op[3])); | |
3693 | break; | |
3694 | case 5: | |
3695 | pat = (void_func | |
3696 | ? GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]) | |
3697 | : GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4])); | |
3698 | break; | |
3699 | case 6: | |
3700 | pat = (void_func | |
3701 | ? GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]) | |
3702 | : GEN_FCN (icode) (target, op[0], op[1], | |
3703 | op[2], op[3], op[4], op[5])); | |
3704 | break; | |
3705 | default: | |
3706 | gcc_assert (MAX_BUILTIN_ARGS == 6); | |
3707 | gcc_unreachable (); | |
3708 | } | |
3709 | ||
3710 | if (!pat) | |
3711 | return 0; | |
3712 | ||
3713 | emit_insn (pat); | |
3714 | return target; | |
3715 | } |