]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/rs6000/rs6000-builtin.cc
3ce729c1e6de81a4eaf37b931c74bbfd34761a05
[thirdparty/gcc.git] / gcc / config / rs6000 / rs6000-builtin.cc
1 /* Target-specific built-in function support for the Power architecture.
2 See also rs6000-c.c, rs6000-gen-builtins.c, rs6000-builtins.def, and
3 rs6000-overloads.def.
4 Note that "normal" builtins (generic math functions, etc.) are handled
5 in rs6000.c.
6
7 Copyright (C) 2002-2022 Free Software Foundation, Inc.
8
9 This file is part of GCC.
10
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
15
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
24
25 #define IN_TARGET_CODE 1
26
27 #include "config.h"
28 #include "system.h"
29 #include "coretypes.h"
30 #include "target.h"
31 #include "backend.h"
32 #include "rtl.h"
33 #include "tree.h"
34 #include "memmodel.h"
35 #include "gimple.h"
36 #include "tm_p.h"
37 #include "optabs.h"
38 #include "recog.h"
39 #include "diagnostic-core.h"
40 #include "fold-const.h"
41 #include "stor-layout.h"
42 #include "calls.h"
43 #include "varasm.h"
44 #include "explow.h"
45 #include "expr.h"
46 #include "langhooks.h"
47 #include "gimplify.h"
48 #include "gimple-iterator.h"
49 #include "gimple-fold.h"
50 #include "ssa.h"
51 #include "tree-ssa-propagate.h"
52 #include "builtins.h"
53 #include "tree-vector-builder.h"
54 #include "ppc-auxv.h"
55 #include "rs6000-internal.h"
56
57 /* Built in types. */
58 tree rs6000_builtin_types[RS6000_BTI_MAX];
59
60 /* Support targetm.vectorize.builtin_mask_for_load. */
61 tree altivec_builtin_mask_for_load;
62
63 /* **** General support functions **** */
64
65 /* Raise an error message for a builtin function that is called without the
66 appropriate target options being set. */
67
68 void
69 rs6000_invalid_builtin (enum rs6000_gen_builtins fncode)
70 {
71 size_t j = (size_t) fncode;
72 const char *name = rs6000_builtin_info[j].bifname;
73
74 switch (rs6000_builtin_info[j].enable)
75 {
76 case ENB_P5:
77 error ("%qs requires the %qs option", name, "-mcpu=power5");
78 break;
79 case ENB_P6:
80 error ("%qs requires the %qs option", name, "-mcpu=power6");
81 break;
82 case ENB_P6_64:
83 error ("%qs requires the %qs option and either the %qs or %qs option",
84 name, "-mcpu=power6", "-m64", "-mpowerpc64");
85 break;
86 case ENB_ALTIVEC:
87 error ("%qs requires the %qs option", name, "-maltivec");
88 break;
89 case ENB_CELL:
90 error ("%qs requires the %qs option", name, "-mcpu=cell");
91 break;
92 case ENB_VSX:
93 error ("%qs requires the %qs option", name, "-mvsx");
94 break;
95 case ENB_P7:
96 error ("%qs requires the %qs option", name, "-mcpu=power7");
97 break;
98 case ENB_P7_64:
99 error ("%qs requires the %qs option and either the %qs or %qs option",
100 name, "-mcpu=power7", "-m64", "-mpowerpc64");
101 break;
102 case ENB_P8:
103 error ("%qs requires the %qs option", name, "-mcpu=power8");
104 break;
105 case ENB_P8V:
106 error ("%qs requires the %qs and %qs options", name, "-mcpu=power8",
107 "-mvsx");
108 break;
109 case ENB_P9:
110 error ("%qs requires the %qs option", name, "-mcpu=power9");
111 break;
112 case ENB_P9_64:
113 error ("%qs requires the %qs option and either the %qs or %qs option",
114 name, "-mcpu=power9", "-m64", "-mpowerpc64");
115 break;
116 case ENB_P9V:
117 error ("%qs requires the %qs and %qs options", name, "-mcpu=power9",
118 "-mvsx");
119 break;
120 case ENB_IEEE128_HW:
121 error ("%qs requires quad-precision floating-point arithmetic", name);
122 break;
123 case ENB_DFP:
124 error ("%qs requires the %qs option", name, "-mhard-dfp");
125 break;
126 case ENB_CRYPTO:
127 error ("%qs requires the %qs option", name, "-mcrypto");
128 break;
129 case ENB_HTM:
130 error ("%qs requires the %qs option", name, "-mhtm");
131 break;
132 case ENB_P10:
133 error ("%qs requires the %qs option", name, "-mcpu=power10");
134 break;
135 case ENB_P10_64:
136 error ("%qs requires the %qs option and either the %qs or %qs option",
137 name, "-mcpu=power10", "-m64", "-mpowerpc64");
138 break;
139 case ENB_MMA:
140 error ("%qs requires the %qs option", name, "-mmma");
141 break;
142 default:
143 case ENB_ALWAYS:
144 gcc_unreachable ();
145 }
146 }
147
148 /* Check whether a builtin function is supported in this target
149 configuration. */
150 bool
151 rs6000_builtin_is_supported (enum rs6000_gen_builtins fncode)
152 {
153 switch (rs6000_builtin_info[(size_t) fncode].enable)
154 {
155 case ENB_ALWAYS:
156 return true;
157 case ENB_P5:
158 return TARGET_POPCNTB;
159 case ENB_P6:
160 return TARGET_CMPB;
161 case ENB_P6_64:
162 return TARGET_CMPB && TARGET_POWERPC64;
163 case ENB_P7:
164 return TARGET_POPCNTD;
165 case ENB_P7_64:
166 return TARGET_POPCNTD && TARGET_POWERPC64;
167 case ENB_P8:
168 return TARGET_DIRECT_MOVE;
169 case ENB_P8V:
170 return TARGET_P8_VECTOR;
171 case ENB_P9:
172 return TARGET_MODULO;
173 case ENB_P9_64:
174 return TARGET_MODULO && TARGET_POWERPC64;
175 case ENB_P9V:
176 return TARGET_P9_VECTOR;
177 case ENB_P10:
178 return TARGET_POWER10;
179 case ENB_P10_64:
180 return TARGET_POWER10 && TARGET_POWERPC64;
181 case ENB_ALTIVEC:
182 return TARGET_ALTIVEC;
183 case ENB_VSX:
184 return TARGET_VSX;
185 case ENB_CELL:
186 return TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL;
187 case ENB_IEEE128_HW:
188 return TARGET_FLOAT128_HW;
189 case ENB_DFP:
190 return TARGET_DFP;
191 case ENB_CRYPTO:
192 return TARGET_CRYPTO;
193 case ENB_HTM:
194 return TARGET_HTM;
195 case ENB_MMA:
196 return TARGET_MMA;
197 default:
198 gcc_unreachable ();
199 }
200 gcc_unreachable ();
201 }
202
203 /* Target hook for early folding of built-ins, shamelessly stolen
204 from ia64.cc. */
205
206 tree
207 rs6000_fold_builtin (tree fndecl ATTRIBUTE_UNUSED,
208 int n_args ATTRIBUTE_UNUSED,
209 tree *args ATTRIBUTE_UNUSED,
210 bool ignore ATTRIBUTE_UNUSED)
211 {
212 #ifdef SUBTARGET_FOLD_BUILTIN
213 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
214 #else
215 return NULL_TREE;
216 #endif
217 }
218
219 tree
220 rs6000_builtin_decl (unsigned code, bool /* initialize_p */)
221 {
222 rs6000_gen_builtins fcode = (rs6000_gen_builtins) code;
223
224 if (fcode >= RS6000_OVLD_MAX)
225 return error_mark_node;
226
227 return rs6000_builtin_decls[code];
228 }
229
230 /* Implement targetm.vectorize.builtin_mask_for_load. */
231 tree
232 rs6000_builtin_mask_for_load (void)
233 {
234 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
235 if ((TARGET_ALTIVEC && !TARGET_VSX)
236 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
237 return altivec_builtin_mask_for_load;
238 else
239 return 0;
240 }
241
242 /* Implement targetm.vectorize.builtin_md_vectorized_function. */
243
244 tree
245 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
246 tree type_in)
247 {
248 machine_mode in_mode, out_mode;
249 int in_n, out_n;
250
251 if (TARGET_DEBUG_BUILTIN)
252 fprintf (stderr,
253 "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
254 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
255 GET_MODE_NAME (TYPE_MODE (type_out)),
256 GET_MODE_NAME (TYPE_MODE (type_in)));
257
258 /* TODO: Should this be gcc_assert? */
259 if (TREE_CODE (type_out) != VECTOR_TYPE
260 || TREE_CODE (type_in) != VECTOR_TYPE)
261 return NULL_TREE;
262
263 out_mode = TYPE_MODE (TREE_TYPE (type_out));
264 out_n = TYPE_VECTOR_SUBPARTS (type_out);
265 in_mode = TYPE_MODE (TREE_TYPE (type_in));
266 in_n = TYPE_VECTOR_SUBPARTS (type_in);
267
268 enum rs6000_gen_builtins fn
269 = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl);
270 switch (fn)
271 {
272 case RS6000_BIF_RSQRTF:
273 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
274 && out_mode == SFmode && out_n == 4
275 && in_mode == SFmode && in_n == 4)
276 return rs6000_builtin_decls[RS6000_BIF_VRSQRTFP];
277 break;
278 case RS6000_BIF_RSQRT:
279 if (VECTOR_UNIT_VSX_P (V2DFmode)
280 && out_mode == DFmode && out_n == 2
281 && in_mode == DFmode && in_n == 2)
282 return rs6000_builtin_decls[RS6000_BIF_RSQRT_2DF];
283 break;
284 case RS6000_BIF_RECIPF:
285 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
286 && out_mode == SFmode && out_n == 4
287 && in_mode == SFmode && in_n == 4)
288 return rs6000_builtin_decls[RS6000_BIF_VRECIPFP];
289 break;
290 case RS6000_BIF_RECIP:
291 if (VECTOR_UNIT_VSX_P (V2DFmode)
292 && out_mode == DFmode && out_n == 2
293 && in_mode == DFmode && in_n == 2)
294 return rs6000_builtin_decls[RS6000_BIF_RECIP_V2DF];
295 break;
296 default:
297 break;
298 }
299
300 machine_mode in_vmode = TYPE_MODE (type_in);
301 machine_mode out_vmode = TYPE_MODE (type_out);
302
303 /* Power10 supported vectorized built-in functions. */
304 if (TARGET_POWER10
305 && in_vmode == out_vmode
306 && VECTOR_UNIT_ALTIVEC_OR_VSX_P (in_vmode))
307 {
308 machine_mode exp_mode = DImode;
309 machine_mode exp_vmode = V2DImode;
310 enum rs6000_gen_builtins bif;
311 switch (fn)
312 {
313 case RS6000_BIF_DIVWE:
314 case RS6000_BIF_DIVWEU:
315 exp_mode = SImode;
316 exp_vmode = V4SImode;
317 if (fn == RS6000_BIF_DIVWE)
318 bif = RS6000_BIF_VDIVESW;
319 else
320 bif = RS6000_BIF_VDIVEUW;
321 break;
322 case RS6000_BIF_DIVDE:
323 case RS6000_BIF_DIVDEU:
324 if (fn == RS6000_BIF_DIVDE)
325 bif = RS6000_BIF_VDIVESD;
326 else
327 bif = RS6000_BIF_VDIVEUD;
328 break;
329 case RS6000_BIF_CFUGED:
330 bif = RS6000_BIF_VCFUGED;
331 break;
332 case RS6000_BIF_CNTLZDM:
333 bif = RS6000_BIF_VCLZDM;
334 break;
335 case RS6000_BIF_CNTTZDM:
336 bif = RS6000_BIF_VCTZDM;
337 break;
338 case RS6000_BIF_PDEPD:
339 bif = RS6000_BIF_VPDEPD;
340 break;
341 case RS6000_BIF_PEXTD:
342 bif = RS6000_BIF_VPEXTD;
343 break;
344 default:
345 return NULL_TREE;
346 }
347
348 if (in_mode == exp_mode && in_vmode == exp_vmode)
349 return rs6000_builtin_decls[bif];
350 }
351
352 return NULL_TREE;
353 }
354
355 /* Returns a code for a target-specific builtin that implements
356 reciprocal of the function, or NULL_TREE if not available. */
357
358 tree
359 rs6000_builtin_reciprocal (tree fndecl)
360 {
361 switch (DECL_MD_FUNCTION_CODE (fndecl))
362 {
363 case RS6000_BIF_XVSQRTDP:
364 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
365 return NULL_TREE;
366
367 return rs6000_builtin_decls[RS6000_BIF_RSQRT_2DF];
368
369 case RS6000_BIF_XVSQRTSP:
370 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
371 return NULL_TREE;
372
373 return rs6000_builtin_decls[RS6000_BIF_RSQRT_4SF];
374
375 default:
376 return NULL_TREE;
377 }
378 }
379
380 /* **** Initialization support **** */
381
382 /* Create a builtin vector type with a name. Taking care not to give
383 the canonical type a name. */
384
385 static tree
386 rs6000_vector_type (const char *name, tree elt_type, unsigned num_elts)
387 {
388 tree result = build_vector_type (elt_type, num_elts);
389
390 /* Copy so we don't give the canonical type a name. */
391 result = build_variant_type_copy (result);
392
393 add_builtin_type (name, result);
394
395 return result;
396 }
397
398 /* Debug utility to translate a type node to a single textual token. */
399 static
400 const char *rs6000_type_string (tree type_node)
401 {
402 if (type_node == NULL_TREE)
403 return "**NULL**";
404 else if (type_node == void_type_node)
405 return "void";
406 else if (type_node == long_integer_type_node)
407 return "long";
408 else if (type_node == long_unsigned_type_node)
409 return "ulong";
410 else if (type_node == long_long_integer_type_node)
411 return "longlong";
412 else if (type_node == long_long_unsigned_type_node)
413 return "ulonglong";
414 else if (type_node == bool_V2DI_type_node)
415 return "vbll";
416 else if (type_node == bool_V4SI_type_node)
417 return "vbi";
418 else if (type_node == bool_V8HI_type_node)
419 return "vbs";
420 else if (type_node == bool_V16QI_type_node)
421 return "vbc";
422 else if (type_node == bool_int_type_node)
423 return "bool";
424 else if (type_node == dfloat64_type_node)
425 return "_Decimal64";
426 else if (type_node == double_type_node)
427 return "double";
428 else if (type_node == intDI_type_node)
429 return "sll";
430 else if (type_node == intHI_type_node)
431 return "ss";
432 else if (type_node == ibm128_float_type_node)
433 return "__ibm128";
434 else if (type_node == ieee128_float_type_node)
435 return "__ieee128";
436 else if (type_node == opaque_V4SI_type_node)
437 return "opaque";
438 else if (POINTER_TYPE_P (type_node))
439 return "void*";
440 else if (type_node == intQI_type_node || type_node == char_type_node)
441 return "sc";
442 else if (type_node == dfloat32_type_node)
443 return "_Decimal32";
444 else if (type_node == float_type_node)
445 return "float";
446 else if (type_node == intSI_type_node || type_node == integer_type_node)
447 return "si";
448 else if (type_node == dfloat128_type_node)
449 return "_Decimal128";
450 else if (type_node == long_double_type_node)
451 return "longdouble";
452 else if (type_node == intTI_type_node)
453 return "sq";
454 else if (type_node == unsigned_intDI_type_node)
455 return "ull";
456 else if (type_node == unsigned_intHI_type_node)
457 return "us";
458 else if (type_node == unsigned_intQI_type_node)
459 return "uc";
460 else if (type_node == unsigned_intSI_type_node)
461 return "ui";
462 else if (type_node == unsigned_intTI_type_node)
463 return "uq";
464 else if (type_node == unsigned_V1TI_type_node)
465 return "vuq";
466 else if (type_node == unsigned_V2DI_type_node)
467 return "vull";
468 else if (type_node == unsigned_V4SI_type_node)
469 return "vui";
470 else if (type_node == unsigned_V8HI_type_node)
471 return "vus";
472 else if (type_node == unsigned_V16QI_type_node)
473 return "vuc";
474 else if (type_node == V16QI_type_node)
475 return "vsc";
476 else if (type_node == V1TI_type_node)
477 return "vsq";
478 else if (type_node == V2DF_type_node)
479 return "vd";
480 else if (type_node == V2DI_type_node)
481 return "vsll";
482 else if (type_node == V4SF_type_node)
483 return "vf";
484 else if (type_node == V4SI_type_node)
485 return "vsi";
486 else if (type_node == V8HI_type_node)
487 return "vss";
488 else if (type_node == pixel_V8HI_type_node)
489 return "vp";
490 else if (type_node == pcvoid_type_node)
491 return "voidc*";
492 else if (type_node == float128_type_node)
493 return "_Float128";
494 else if (type_node == vector_pair_type_node)
495 return "__vector_pair";
496 else if (type_node == vector_quad_type_node)
497 return "__vector_quad";
498
499 return "unknown";
500 }
501
502 void
503 rs6000_init_builtins (void)
504 {
505 tree tdecl;
506 tree t;
507
508 if (TARGET_DEBUG_BUILTIN)
509 fprintf (stderr, "rs6000_init_builtins%s%s\n",
510 (TARGET_ALTIVEC) ? ", altivec" : "",
511 (TARGET_VSX) ? ", vsx" : "");
512
513 V2DI_type_node = rs6000_vector_type ("__vector long long",
514 long_long_integer_type_node, 2);
515 ptr_V2DI_type_node
516 = build_pointer_type (build_qualified_type (V2DI_type_node,
517 TYPE_QUAL_CONST));
518
519 V2DF_type_node = rs6000_vector_type ("__vector double", double_type_node, 2);
520 ptr_V2DF_type_node
521 = build_pointer_type (build_qualified_type (V2DF_type_node,
522 TYPE_QUAL_CONST));
523
524 V4SI_type_node = rs6000_vector_type ("__vector signed int",
525 intSI_type_node, 4);
526 ptr_V4SI_type_node
527 = build_pointer_type (build_qualified_type (V4SI_type_node,
528 TYPE_QUAL_CONST));
529
530 V4SF_type_node = rs6000_vector_type ("__vector float", float_type_node, 4);
531 ptr_V4SF_type_node
532 = build_pointer_type (build_qualified_type (V4SF_type_node,
533 TYPE_QUAL_CONST));
534
535 V8HI_type_node = rs6000_vector_type ("__vector signed short",
536 intHI_type_node, 8);
537 ptr_V8HI_type_node
538 = build_pointer_type (build_qualified_type (V8HI_type_node,
539 TYPE_QUAL_CONST));
540
541 V16QI_type_node = rs6000_vector_type ("__vector signed char",
542 intQI_type_node, 16);
543 ptr_V16QI_type_node
544 = build_pointer_type (build_qualified_type (V16QI_type_node,
545 TYPE_QUAL_CONST));
546
547 unsigned_V16QI_type_node = rs6000_vector_type ("__vector unsigned char",
548 unsigned_intQI_type_node, 16);
549 ptr_unsigned_V16QI_type_node
550 = build_pointer_type (build_qualified_type (unsigned_V16QI_type_node,
551 TYPE_QUAL_CONST));
552
553 unsigned_V8HI_type_node = rs6000_vector_type ("__vector unsigned short",
554 unsigned_intHI_type_node, 8);
555 ptr_unsigned_V8HI_type_node
556 = build_pointer_type (build_qualified_type (unsigned_V8HI_type_node,
557 TYPE_QUAL_CONST));
558
559 unsigned_V4SI_type_node = rs6000_vector_type ("__vector unsigned int",
560 unsigned_intSI_type_node, 4);
561 ptr_unsigned_V4SI_type_node
562 = build_pointer_type (build_qualified_type (unsigned_V4SI_type_node,
563 TYPE_QUAL_CONST));
564
565 unsigned_V2DI_type_node
566 = rs6000_vector_type ("__vector unsigned long long",
567 long_long_unsigned_type_node, 2);
568
569 ptr_unsigned_V2DI_type_node
570 = build_pointer_type (build_qualified_type (unsigned_V2DI_type_node,
571 TYPE_QUAL_CONST));
572
573 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
574
575 const_str_type_node
576 = build_pointer_type (build_qualified_type (char_type_node,
577 TYPE_QUAL_CONST));
578
579 /* We use V1TI mode as a special container to hold __int128_t items that
580 must live in VSX registers. */
581 if (intTI_type_node)
582 {
583 V1TI_type_node = rs6000_vector_type ("__vector __int128",
584 intTI_type_node, 1);
585 ptr_V1TI_type_node
586 = build_pointer_type (build_qualified_type (V1TI_type_node,
587 TYPE_QUAL_CONST));
588 unsigned_V1TI_type_node
589 = rs6000_vector_type ("__vector unsigned __int128",
590 unsigned_intTI_type_node, 1);
591 ptr_unsigned_V1TI_type_node
592 = build_pointer_type (build_qualified_type (unsigned_V1TI_type_node,
593 TYPE_QUAL_CONST));
594 }
595
596 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
597 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
598 'vector unsigned short'. */
599
600 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
601 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
602 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
603 bool_long_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
604 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
605
606 long_integer_type_internal_node = long_integer_type_node;
607 long_unsigned_type_internal_node = long_unsigned_type_node;
608 long_long_integer_type_internal_node = long_long_integer_type_node;
609 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
610 intQI_type_internal_node = intQI_type_node;
611 uintQI_type_internal_node = unsigned_intQI_type_node;
612 intHI_type_internal_node = intHI_type_node;
613 uintHI_type_internal_node = unsigned_intHI_type_node;
614 intSI_type_internal_node = intSI_type_node;
615 uintSI_type_internal_node = unsigned_intSI_type_node;
616 intDI_type_internal_node = intDI_type_node;
617 uintDI_type_internal_node = unsigned_intDI_type_node;
618 intTI_type_internal_node = intTI_type_node;
619 uintTI_type_internal_node = unsigned_intTI_type_node;
620 float_type_internal_node = float_type_node;
621 double_type_internal_node = double_type_node;
622 long_double_type_internal_node = long_double_type_node;
623 dfloat64_type_internal_node = dfloat64_type_node;
624 dfloat128_type_internal_node = dfloat128_type_node;
625 void_type_internal_node = void_type_node;
626
627 ptr_intQI_type_node
628 = build_pointer_type (build_qualified_type (intQI_type_internal_node,
629 TYPE_QUAL_CONST));
630 ptr_uintQI_type_node
631 = build_pointer_type (build_qualified_type (uintQI_type_internal_node,
632 TYPE_QUAL_CONST));
633 ptr_intHI_type_node
634 = build_pointer_type (build_qualified_type (intHI_type_internal_node,
635 TYPE_QUAL_CONST));
636 ptr_uintHI_type_node
637 = build_pointer_type (build_qualified_type (uintHI_type_internal_node,
638 TYPE_QUAL_CONST));
639 ptr_intSI_type_node
640 = build_pointer_type (build_qualified_type (intSI_type_internal_node,
641 TYPE_QUAL_CONST));
642 ptr_uintSI_type_node
643 = build_pointer_type (build_qualified_type (uintSI_type_internal_node,
644 TYPE_QUAL_CONST));
645 ptr_intDI_type_node
646 = build_pointer_type (build_qualified_type (intDI_type_internal_node,
647 TYPE_QUAL_CONST));
648 ptr_uintDI_type_node
649 = build_pointer_type (build_qualified_type (uintDI_type_internal_node,
650 TYPE_QUAL_CONST));
651 ptr_intTI_type_node
652 = build_pointer_type (build_qualified_type (intTI_type_internal_node,
653 TYPE_QUAL_CONST));
654 ptr_uintTI_type_node
655 = build_pointer_type (build_qualified_type (uintTI_type_internal_node,
656 TYPE_QUAL_CONST));
657
658 t = build_qualified_type (long_integer_type_internal_node, TYPE_QUAL_CONST);
659 ptr_long_integer_type_node = build_pointer_type (t);
660
661 t = build_qualified_type (long_unsigned_type_internal_node, TYPE_QUAL_CONST);
662 ptr_long_unsigned_type_node = build_pointer_type (t);
663
664 ptr_float_type_node
665 = build_pointer_type (build_qualified_type (float_type_internal_node,
666 TYPE_QUAL_CONST));
667 ptr_double_type_node
668 = build_pointer_type (build_qualified_type (double_type_internal_node,
669 TYPE_QUAL_CONST));
670 ptr_long_double_type_node
671 = build_pointer_type (build_qualified_type (long_double_type_internal_node,
672 TYPE_QUAL_CONST));
673 if (dfloat64_type_node)
674 {
675 t = build_qualified_type (dfloat64_type_internal_node, TYPE_QUAL_CONST);
676 ptr_dfloat64_type_node = build_pointer_type (t);
677 }
678 else
679 ptr_dfloat64_type_node = NULL;
680
681 if (dfloat128_type_node)
682 {
683 t = build_qualified_type (dfloat128_type_internal_node, TYPE_QUAL_CONST);
684 ptr_dfloat128_type_node = build_pointer_type (t);
685 }
686 else
687 ptr_dfloat128_type_node = NULL;
688
689 t = build_qualified_type (long_long_integer_type_internal_node,
690 TYPE_QUAL_CONST);
691 ptr_long_long_integer_type_node = build_pointer_type (t);
692
693 t = build_qualified_type (long_long_unsigned_type_internal_node,
694 TYPE_QUAL_CONST);
695 ptr_long_long_unsigned_type_node = build_pointer_type (t);
696
697 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
698 IFmode is the IBM extended 128-bit format that is a pair of doubles.
699 TFmode will be either IEEE 128-bit floating point or the IBM double-double
700 format that uses a pair of doubles, depending on the switches and
701 defaults.
702
703 If we don't support for either 128-bit IBM double double or IEEE 128-bit
704 floating point, we need make sure the type is non-zero or else self-test
705 fails during bootstrap.
706
707 Always create __ibm128 as a separate type, even if the current long double
708 format is IBM extended double.
709
710 For IEEE 128-bit floating point, always create the type __ieee128. If the
711 user used -mfloat128, rs6000-c.cc will create a define from __float128 to
712 __ieee128. */
713 if (TARGET_LONG_DOUBLE_128 && (!TARGET_IEEEQUAD || TARGET_FLOAT128_TYPE))
714 {
715 if (!TARGET_IEEEQUAD)
716 ibm128_float_type_node = long_double_type_node;
717 else
718 {
719 ibm128_float_type_node = make_node (REAL_TYPE);
720 TYPE_PRECISION (ibm128_float_type_node) = 128;
721 SET_TYPE_MODE (ibm128_float_type_node, IFmode);
722 layout_type (ibm128_float_type_node);
723 }
724 t = build_qualified_type (ibm128_float_type_node, TYPE_QUAL_CONST);
725 lang_hooks.types.register_builtin_type (ibm128_float_type_node,
726 "__ibm128");
727 }
728 else
729 ibm128_float_type_node = NULL_TREE;
730
731 if (TARGET_FLOAT128_TYPE)
732 {
733 if (TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128)
734 ieee128_float_type_node = long_double_type_node;
735 else
736 ieee128_float_type_node = float128_type_node;
737 t = build_qualified_type (ieee128_float_type_node, TYPE_QUAL_CONST);
738 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
739 "__ieee128");
740 }
741 else
742 ieee128_float_type_node = NULL_TREE;
743
744 /* Vector pair and vector quad support. */
745 vector_pair_type_node = make_node (OPAQUE_TYPE);
746 SET_TYPE_MODE (vector_pair_type_node, OOmode);
747 TYPE_SIZE (vector_pair_type_node) = bitsize_int (GET_MODE_BITSIZE (OOmode));
748 TYPE_PRECISION (vector_pair_type_node) = GET_MODE_BITSIZE (OOmode);
749 TYPE_SIZE_UNIT (vector_pair_type_node) = size_int (GET_MODE_SIZE (OOmode));
750 SET_TYPE_ALIGN (vector_pair_type_node, 256);
751 TYPE_USER_ALIGN (vector_pair_type_node) = 0;
752 lang_hooks.types.register_builtin_type (vector_pair_type_node,
753 "__vector_pair");
754 t = build_qualified_type (vector_pair_type_node, TYPE_QUAL_CONST);
755 ptr_vector_pair_type_node = build_pointer_type (t);
756
757 vector_quad_type_node = make_node (OPAQUE_TYPE);
758 SET_TYPE_MODE (vector_quad_type_node, XOmode);
759 TYPE_SIZE (vector_quad_type_node) = bitsize_int (GET_MODE_BITSIZE (XOmode));
760 TYPE_PRECISION (vector_quad_type_node) = GET_MODE_BITSIZE (XOmode);
761 TYPE_SIZE_UNIT (vector_quad_type_node) = size_int (GET_MODE_SIZE (XOmode));
762 SET_TYPE_ALIGN (vector_quad_type_node, 512);
763 TYPE_USER_ALIGN (vector_quad_type_node) = 0;
764 lang_hooks.types.register_builtin_type (vector_quad_type_node,
765 "__vector_quad");
766 t = build_qualified_type (vector_quad_type_node, TYPE_QUAL_CONST);
767 ptr_vector_quad_type_node = build_pointer_type (t);
768
769 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
770 TYPE_NAME (bool_char_type_node) = tdecl;
771
772 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
773 TYPE_NAME (bool_short_type_node) = tdecl;
774
775 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
776 TYPE_NAME (bool_int_type_node) = tdecl;
777
778 tdecl = add_builtin_type ("__pixel", pixel_type_node);
779 TYPE_NAME (pixel_type_node) = tdecl;
780
781 bool_V16QI_type_node = rs6000_vector_type ("__vector __bool char",
782 bool_char_type_node, 16);
783 ptr_bool_V16QI_type_node
784 = build_pointer_type (build_qualified_type (bool_V16QI_type_node,
785 TYPE_QUAL_CONST));
786
787 bool_V8HI_type_node = rs6000_vector_type ("__vector __bool short",
788 bool_short_type_node, 8);
789 ptr_bool_V8HI_type_node
790 = build_pointer_type (build_qualified_type (bool_V8HI_type_node,
791 TYPE_QUAL_CONST));
792
793 bool_V4SI_type_node = rs6000_vector_type ("__vector __bool int",
794 bool_int_type_node, 4);
795 ptr_bool_V4SI_type_node
796 = build_pointer_type (build_qualified_type (bool_V4SI_type_node,
797 TYPE_QUAL_CONST));
798
799 bool_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
800 ? "__vector __bool long"
801 : "__vector __bool long long",
802 bool_long_long_type_node, 2);
803 ptr_bool_V2DI_type_node
804 = build_pointer_type (build_qualified_type (bool_V2DI_type_node,
805 TYPE_QUAL_CONST));
806
807 bool_V1TI_type_node = rs6000_vector_type ("__vector __bool __int128",
808 intTI_type_node, 1);
809 ptr_bool_V1TI_type_node
810 = build_pointer_type (build_qualified_type (bool_V1TI_type_node,
811 TYPE_QUAL_CONST));
812
813 pixel_V8HI_type_node = rs6000_vector_type ("__vector __pixel",
814 pixel_type_node, 8);
815 ptr_pixel_V8HI_type_node
816 = build_pointer_type (build_qualified_type (pixel_V8HI_type_node,
817 TYPE_QUAL_CONST));
818 pcvoid_type_node
819 = build_pointer_type (build_qualified_type (void_type_node,
820 TYPE_QUAL_CONST));
821
822 /* Execute the autogenerated initialization code for builtins. */
823 rs6000_init_generated_builtins ();
824
825 if (TARGET_DEBUG_BUILTIN)
826 {
827 fprintf (stderr, "\nAutogenerated built-in functions:\n\n");
828 for (int i = 1; i < (int) RS6000_BIF_MAX; i++)
829 {
830 enum rs6000_gen_builtins fn_code = (enum rs6000_gen_builtins) i;
831 if (!rs6000_builtin_is_supported (fn_code))
832 continue;
833 tree fntype = rs6000_builtin_info[i].fntype;
834 tree t = TREE_TYPE (fntype);
835 fprintf (stderr, "%s %s (", rs6000_type_string (t),
836 rs6000_builtin_info[i].bifname);
837 t = TYPE_ARG_TYPES (fntype);
838 while (t && TREE_VALUE (t) != void_type_node)
839 {
840 fprintf (stderr, "%s",
841 rs6000_type_string (TREE_VALUE (t)));
842 t = TREE_CHAIN (t);
843 if (t && TREE_VALUE (t) != void_type_node)
844 fprintf (stderr, ", ");
845 }
846 fprintf (stderr, "); %s [%4d]\n",
847 rs6000_builtin_info[i].attr_string, (int) i);
848 }
849 fprintf (stderr, "\nEnd autogenerated built-in functions.\n\n\n");
850 }
851
852 if (TARGET_XCOFF)
853 {
854 /* AIX libm provides clog as __clog. */
855 if ((tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
856 set_user_assembler_name (tdecl, "__clog");
857
858 /* When long double is 64 bit, some long double builtins of libc
859 functions (like __builtin_frexpl) must call the double version
860 (frexp) not the long double version (frexpl) that expects a 128 bit
861 argument. */
862 if (! TARGET_LONG_DOUBLE_128)
863 {
864 if ((tdecl = builtin_decl_explicit (BUILT_IN_FMODL)) != NULL_TREE)
865 set_user_assembler_name (tdecl, "fmod");
866 if ((tdecl = builtin_decl_explicit (BUILT_IN_FREXPL)) != NULL_TREE)
867 set_user_assembler_name (tdecl, "frexp");
868 if ((tdecl = builtin_decl_explicit (BUILT_IN_LDEXPL)) != NULL_TREE)
869 set_user_assembler_name (tdecl, "ldexp");
870 if ((tdecl = builtin_decl_explicit (BUILT_IN_MODFL)) != NULL_TREE)
871 set_user_assembler_name (tdecl, "modf");
872 }
873 }
874
875 altivec_builtin_mask_for_load
876 = rs6000_builtin_decls[RS6000_BIF_MASK_FOR_LOAD];
877
878 #ifdef SUBTARGET_INIT_BUILTINS
879 SUBTARGET_INIT_BUILTINS;
880 #endif
881
882 return;
883 }
884
885 /* **** GIMPLE folding support **** */
886
887 /* Helper function to handle the gimple folding of a vector compare
888 operation. This sets up true/false vectors, and uses the
889 VEC_COND_EXPR operation.
890 CODE indicates which comparison is to be made. (EQ, GT, ...).
891 TYPE indicates the type of the result.
892 Code is inserted before GSI. */
893 static tree
894 fold_build_vec_cmp (tree_code code, tree type, tree arg0, tree arg1,
895 gimple_stmt_iterator *gsi)
896 {
897 tree cmp_type = truth_type_for (type);
898 tree zero_vec = build_zero_cst (type);
899 tree minus_one_vec = build_minus_one_cst (type);
900 tree temp = create_tmp_reg_or_ssa_name (cmp_type);
901 gimple *g = gimple_build_assign (temp, code, arg0, arg1);
902 gsi_insert_before (gsi, g, GSI_SAME_STMT);
903 return fold_build3 (VEC_COND_EXPR, type, temp, minus_one_vec, zero_vec);
904 }
905
906 /* Helper function to handle the in-between steps for the
907 vector compare built-ins. */
908 static void
909 fold_compare_helper (gimple_stmt_iterator *gsi, tree_code code, gimple *stmt)
910 {
911 tree arg0 = gimple_call_arg (stmt, 0);
912 tree arg1 = gimple_call_arg (stmt, 1);
913 tree lhs = gimple_call_lhs (stmt);
914 tree cmp = fold_build_vec_cmp (code, TREE_TYPE (lhs), arg0, arg1, gsi);
915 gimple *g = gimple_build_assign (lhs, cmp);
916 gimple_set_location (g, gimple_location (stmt));
917 gsi_replace (gsi, g, true);
918 }
919
920 /* Helper function to map V2DF and V4SF types to their
921 integral equivalents (V2DI and V4SI). */
922 tree map_to_integral_tree_type (tree input_tree_type)
923 {
924 if (INTEGRAL_TYPE_P (TREE_TYPE (input_tree_type)))
925 return input_tree_type;
926 else
927 {
928 if (types_compatible_p (TREE_TYPE (input_tree_type),
929 TREE_TYPE (V2DF_type_node)))
930 return V2DI_type_node;
931 else if (types_compatible_p (TREE_TYPE (input_tree_type),
932 TREE_TYPE (V4SF_type_node)))
933 return V4SI_type_node;
934 else
935 gcc_unreachable ();
936 }
937 }
938
939 /* Helper function to handle the vector merge[hl] built-ins. The
940 implementation difference between h and l versions for this code are in
941 the values used when building of the permute vector for high word versus
942 low word merge. The variance is keyed off the use_high parameter. */
943 static void
944 fold_mergehl_helper (gimple_stmt_iterator *gsi, gimple *stmt, int use_high)
945 {
946 tree arg0 = gimple_call_arg (stmt, 0);
947 tree arg1 = gimple_call_arg (stmt, 1);
948 tree lhs = gimple_call_lhs (stmt);
949 tree lhs_type = TREE_TYPE (lhs);
950 int n_elts = TYPE_VECTOR_SUBPARTS (lhs_type);
951 int midpoint = n_elts / 2;
952 int offset = 0;
953
954 if (use_high == 1)
955 offset = midpoint;
956
957 /* The permute_type will match the lhs for integral types. For double and
958 float types, the permute type needs to map to the V2 or V4 type that
959 matches size. */
960 tree permute_type;
961 permute_type = map_to_integral_tree_type (lhs_type);
962 tree_vector_builder elts (permute_type, VECTOR_CST_NELTS (arg0), 1);
963
964 for (int i = 0; i < midpoint; i++)
965 {
966 elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
967 offset + i));
968 elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
969 offset + n_elts + i));
970 }
971
972 tree permute = elts.build ();
973
974 gimple *g = gimple_build_assign (lhs, VEC_PERM_EXPR, arg0, arg1, permute);
975 gimple_set_location (g, gimple_location (stmt));
976 gsi_replace (gsi, g, true);
977 }
978
979 /* Helper function to handle the vector merge[eo] built-ins. */
980 static void
981 fold_mergeeo_helper (gimple_stmt_iterator *gsi, gimple *stmt, int use_odd)
982 {
983 tree arg0 = gimple_call_arg (stmt, 0);
984 tree arg1 = gimple_call_arg (stmt, 1);
985 tree lhs = gimple_call_lhs (stmt);
986 tree lhs_type = TREE_TYPE (lhs);
987 int n_elts = TYPE_VECTOR_SUBPARTS (lhs_type);
988
989 /* The permute_type will match the lhs for integral types. For double and
990 float types, the permute type needs to map to the V2 or V4 type that
991 matches size. */
992 tree permute_type;
993 permute_type = map_to_integral_tree_type (lhs_type);
994
995 tree_vector_builder elts (permute_type, VECTOR_CST_NELTS (arg0), 1);
996
997 /* Build the permute vector. */
998 for (int i = 0; i < n_elts / 2; i++)
999 {
1000 elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
1001 2*i + use_odd));
1002 elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
1003 2*i + use_odd + n_elts));
1004 }
1005
1006 tree permute = elts.build ();
1007
1008 gimple *g = gimple_build_assign (lhs, VEC_PERM_EXPR, arg0, arg1, permute);
1009 gimple_set_location (g, gimple_location (stmt));
1010 gsi_replace (gsi, g, true);
1011 }
1012
1013 /* Helper function to sort out which built-ins may be valid without having
1014 a LHS. */
1015 static bool
1016 rs6000_builtin_valid_without_lhs (enum rs6000_gen_builtins fn_code,
1017 tree fndecl)
1018 {
1019 if (TREE_TYPE (TREE_TYPE (fndecl)) == void_type_node)
1020 return true;
1021
1022 switch (fn_code)
1023 {
1024 case RS6000_BIF_STVX_V16QI:
1025 case RS6000_BIF_STVX_V8HI:
1026 case RS6000_BIF_STVX_V4SI:
1027 case RS6000_BIF_STVX_V4SF:
1028 case RS6000_BIF_STVX_V2DI:
1029 case RS6000_BIF_STVX_V2DF:
1030 case RS6000_BIF_STXVW4X_V16QI:
1031 case RS6000_BIF_STXVW4X_V8HI:
1032 case RS6000_BIF_STXVW4X_V4SF:
1033 case RS6000_BIF_STXVW4X_V4SI:
1034 case RS6000_BIF_STXVD2X_V2DF:
1035 case RS6000_BIF_STXVD2X_V2DI:
1036 return true;
1037 default:
1038 return false;
1039 }
1040 }
1041
1042 /* Expand the MMA built-ins early, so that we can convert the pass-by-reference
1043 __vector_quad arguments into pass-by-value arguments, leading to more
1044 efficient code generation. */
1045 static bool
1046 rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi,
1047 rs6000_gen_builtins fn_code)
1048 {
1049 gimple *stmt = gsi_stmt (*gsi);
1050 size_t fncode = (size_t) fn_code;
1051
1052 if (!bif_is_mma (rs6000_builtin_info[fncode]))
1053 return false;
1054
1055 /* Each call that can be gimple-expanded has an associated built-in
1056 function that it will expand into. If this one doesn't, we have
1057 already expanded it! Exceptions: lxvp and stxvp. */
1058 if (rs6000_builtin_info[fncode].assoc_bif == RS6000_BIF_NONE
1059 && fncode != RS6000_BIF_LXVP
1060 && fncode != RS6000_BIF_STXVP)
1061 return false;
1062
1063 bifdata *bd = &rs6000_builtin_info[fncode];
1064 unsigned nopnds = bd->nargs;
1065 gimple_seq new_seq = NULL;
1066 gimple *new_call;
1067 tree new_decl;
1068
1069 /* Compatibility built-ins; we used to call these
1070 __builtin_mma_{dis,}assemble_pair, but now we call them
1071 __builtin_vsx_{dis,}assemble_pair. Handle the old versions. */
1072 if (fncode == RS6000_BIF_ASSEMBLE_PAIR)
1073 fncode = RS6000_BIF_ASSEMBLE_PAIR_V;
1074 else if (fncode == RS6000_BIF_DISASSEMBLE_PAIR)
1075 fncode = RS6000_BIF_DISASSEMBLE_PAIR_V;
1076
1077 if (fncode == RS6000_BIF_DISASSEMBLE_ACC
1078 || fncode == RS6000_BIF_DISASSEMBLE_PAIR_V)
1079 {
1080 /* This is an MMA disassemble built-in function. */
1081 push_gimplify_context (true);
1082 unsigned nvec = (fncode == RS6000_BIF_DISASSEMBLE_ACC) ? 4 : 2;
1083 tree dst_ptr = gimple_call_arg (stmt, 0);
1084 tree src_ptr = gimple_call_arg (stmt, 1);
1085 tree src_type = (fncode == RS6000_BIF_DISASSEMBLE_ACC)
1086 ? build_pointer_type (vector_quad_type_node)
1087 : build_pointer_type (vector_pair_type_node);
1088 if (TREE_TYPE (src_ptr) != src_type)
1089 src_ptr = build1 (NOP_EXPR, src_type, src_ptr);
1090
1091 tree src = create_tmp_reg_or_ssa_name (TREE_TYPE (src_type));
1092 gimplify_assign (src, build_simple_mem_ref (src_ptr), &new_seq);
1093
1094 /* If we are not disassembling an accumulator/pair or our destination is
1095 another accumulator/pair, then just copy the entire thing as is. */
1096 if ((fncode == RS6000_BIF_DISASSEMBLE_ACC
1097 && TREE_TYPE (TREE_TYPE (dst_ptr)) == vector_quad_type_node)
1098 || (fncode == RS6000_BIF_DISASSEMBLE_PAIR_V
1099 && TREE_TYPE (TREE_TYPE (dst_ptr)) == vector_pair_type_node))
1100 {
1101 tree dst = build_simple_mem_ref (build1 (NOP_EXPR,
1102 src_type, dst_ptr));
1103 gimplify_assign (dst, src, &new_seq);
1104 pop_gimplify_context (NULL);
1105 gsi_replace_with_seq (gsi, new_seq, true);
1106 return true;
1107 }
1108
1109 /* If we're disassembling an accumulator into a different type, we need
1110 to emit a xxmfacc instruction now, since we cannot do it later. */
1111 if (fncode == RS6000_BIF_DISASSEMBLE_ACC)
1112 {
1113 new_decl = rs6000_builtin_decls[RS6000_BIF_XXMFACC_INTERNAL];
1114 new_call = gimple_build_call (new_decl, 1, src);
1115 src = create_tmp_reg_or_ssa_name (vector_quad_type_node);
1116 gimple_call_set_lhs (new_call, src);
1117 gimple_seq_add_stmt (&new_seq, new_call);
1118 }
1119
1120 /* Copy the accumulator/pair vector by vector. */
1121 new_decl
1122 = rs6000_builtin_decls[rs6000_builtin_info[fncode].assoc_bif];
1123 tree dst_type = build_pointer_type_for_mode (unsigned_V16QI_type_node,
1124 ptr_mode, true);
1125 tree dst_base = build1 (NOP_EXPR, dst_type, dst_ptr);
1126 for (unsigned i = 0; i < nvec; i++)
1127 {
1128 unsigned index = WORDS_BIG_ENDIAN ? i : nvec - 1 - i;
1129 tree dst = build2 (MEM_REF, unsigned_V16QI_type_node, dst_base,
1130 build_int_cst (dst_type, index * 16));
1131 tree dstssa = create_tmp_reg_or_ssa_name (unsigned_V16QI_type_node);
1132 new_call = gimple_build_call (new_decl, 2, src,
1133 build_int_cstu (uint16_type_node, i));
1134 gimple_call_set_lhs (new_call, dstssa);
1135 gimple_seq_add_stmt (&new_seq, new_call);
1136 gimplify_assign (dst, dstssa, &new_seq);
1137 }
1138 pop_gimplify_context (NULL);
1139 gsi_replace_with_seq (gsi, new_seq, true);
1140 return true;
1141 }
1142
1143 /* TODO: Do some factoring on these two chunks. */
1144 if (fncode == RS6000_BIF_LXVP)
1145 {
1146 push_gimplify_context (true);
1147 tree offset = gimple_call_arg (stmt, 0);
1148 tree ptr = gimple_call_arg (stmt, 1);
1149 tree lhs = gimple_call_lhs (stmt);
1150 if (TREE_TYPE (TREE_TYPE (ptr)) != vector_pair_type_node)
1151 ptr = build1 (NOP_EXPR,
1152 build_pointer_type (vector_pair_type_node), ptr);
1153 tree mem = build_simple_mem_ref (build2 (POINTER_PLUS_EXPR,
1154 TREE_TYPE (ptr), ptr, offset));
1155 gimplify_assign (lhs, mem, &new_seq);
1156 pop_gimplify_context (NULL);
1157 gsi_replace_with_seq (gsi, new_seq, true);
1158 return true;
1159 }
1160
1161 if (fncode == RS6000_BIF_STXVP)
1162 {
1163 push_gimplify_context (true);
1164 tree src = gimple_call_arg (stmt, 0);
1165 tree offset = gimple_call_arg (stmt, 1);
1166 tree ptr = gimple_call_arg (stmt, 2);
1167 if (TREE_TYPE (TREE_TYPE (ptr)) != vector_pair_type_node)
1168 ptr = build1 (NOP_EXPR,
1169 build_pointer_type (vector_pair_type_node), ptr);
1170 tree mem = build_simple_mem_ref (build2 (POINTER_PLUS_EXPR,
1171 TREE_TYPE (ptr), ptr, offset));
1172 gimplify_assign (mem, src, &new_seq);
1173 pop_gimplify_context (NULL);
1174 gsi_replace_with_seq (gsi, new_seq, true);
1175 return true;
1176 }
1177
1178 /* Convert this built-in into an internal version that uses pass-by-value
1179 arguments. The internal built-in is found in the assoc_bif field. */
1180 new_decl = rs6000_builtin_decls[rs6000_builtin_info[fncode].assoc_bif];
1181 tree lhs, op[MAX_MMA_OPERANDS];
1182 tree acc = gimple_call_arg (stmt, 0);
1183 push_gimplify_context (true);
1184
1185 if (bif_is_quad (*bd))
1186 {
1187 /* This built-in has a pass-by-reference accumulator input, so load it
1188 into a temporary accumulator for use as a pass-by-value input. */
1189 op[0] = create_tmp_reg_or_ssa_name (vector_quad_type_node);
1190 for (unsigned i = 1; i < nopnds; i++)
1191 op[i] = gimple_call_arg (stmt, i);
1192 gimplify_assign (op[0], build_simple_mem_ref (acc), &new_seq);
1193 }
1194 else
1195 {
1196 /* This built-in does not use its pass-by-reference accumulator argument
1197 as an input argument, so remove it from the input list. */
1198 nopnds--;
1199 for (unsigned i = 0; i < nopnds; i++)
1200 op[i] = gimple_call_arg (stmt, i + 1);
1201 }
1202
1203 switch (nopnds)
1204 {
1205 case 0:
1206 new_call = gimple_build_call (new_decl, 0);
1207 break;
1208 case 1:
1209 new_call = gimple_build_call (new_decl, 1, op[0]);
1210 break;
1211 case 2:
1212 new_call = gimple_build_call (new_decl, 2, op[0], op[1]);
1213 break;
1214 case 3:
1215 new_call = gimple_build_call (new_decl, 3, op[0], op[1], op[2]);
1216 break;
1217 case 4:
1218 new_call = gimple_build_call (new_decl, 4, op[0], op[1], op[2], op[3]);
1219 break;
1220 case 5:
1221 new_call = gimple_build_call (new_decl, 5, op[0], op[1], op[2], op[3],
1222 op[4]);
1223 break;
1224 case 6:
1225 new_call = gimple_build_call (new_decl, 6, op[0], op[1], op[2], op[3],
1226 op[4], op[5]);
1227 break;
1228 case 7:
1229 new_call = gimple_build_call (new_decl, 7, op[0], op[1], op[2], op[3],
1230 op[4], op[5], op[6]);
1231 break;
1232 default:
1233 gcc_unreachable ();
1234 }
1235
1236 if (fncode == RS6000_BIF_BUILD_PAIR || fncode == RS6000_BIF_ASSEMBLE_PAIR_V)
1237 lhs = create_tmp_reg_or_ssa_name (vector_pair_type_node);
1238 else
1239 lhs = create_tmp_reg_or_ssa_name (vector_quad_type_node);
1240 gimple_call_set_lhs (new_call, lhs);
1241 gimple_seq_add_stmt (&new_seq, new_call);
1242 gimplify_assign (build_simple_mem_ref (acc), lhs, &new_seq);
1243 pop_gimplify_context (NULL);
1244 gsi_replace_with_seq (gsi, new_seq, true);
1245
1246 return true;
1247 }
1248
1249 /* Fold a machine-dependent built-in in GIMPLE. (For folding into
1250 a constant, use rs6000_fold_builtin.) */
1251 bool
1252 rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
1253 {
1254 gimple *stmt = gsi_stmt (*gsi);
1255 tree fndecl = gimple_call_fndecl (stmt);
1256 gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD);
1257 enum rs6000_gen_builtins fn_code
1258 = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl);
1259 tree arg0, arg1, lhs, temp;
1260 enum tree_code bcode;
1261 gimple *g;
1262
1263 /* For an unresolved overloaded builtin, return early here since there
1264 is no builtin info for it and we are unable to fold it. */
1265 if (fn_code > RS6000_OVLD_NONE)
1266 return false;
1267
1268 size_t uns_fncode = (size_t) fn_code;
1269 enum insn_code icode = rs6000_builtin_info[uns_fncode].icode;
1270 const char *fn_name1 = rs6000_builtin_info[uns_fncode].bifname;
1271 const char *fn_name2 = (icode != CODE_FOR_nothing)
1272 ? get_insn_name ((int) icode)
1273 : "nothing";
1274
1275 if (TARGET_DEBUG_BUILTIN)
1276 fprintf (stderr, "rs6000_gimple_fold_builtin %d %s %s\n",
1277 fn_code, fn_name1, fn_name2);
1278
1279 /* Prevent gimple folding for code that does not have a LHS, unless it is
1280 allowed per the rs6000_builtin_valid_without_lhs helper function. */
1281 if (!gimple_call_lhs (stmt)
1282 && !rs6000_builtin_valid_without_lhs (fn_code, fndecl))
1283 return false;
1284
1285 /* Don't fold invalid builtins, let rs6000_expand_builtin diagnose it. */
1286 if (!rs6000_builtin_is_supported (fn_code))
1287 return false;
1288
1289 if (rs6000_gimple_fold_mma_builtin (gsi, fn_code))
1290 return true;
1291
1292 switch (fn_code)
1293 {
1294 /* Flavors of vec_add. We deliberately don't expand
1295 RS6000_BIF_VADDUQM as it gets lowered from V1TImode to
1296 TImode, resulting in much poorer code generation. */
1297 case RS6000_BIF_VADDUBM:
1298 case RS6000_BIF_VADDUHM:
1299 case RS6000_BIF_VADDUWM:
1300 case RS6000_BIF_VADDUDM:
1301 case RS6000_BIF_VADDFP:
1302 case RS6000_BIF_XVADDDP:
1303 case RS6000_BIF_XVADDSP:
1304 bcode = PLUS_EXPR;
1305 do_binary:
1306 arg0 = gimple_call_arg (stmt, 0);
1307 arg1 = gimple_call_arg (stmt, 1);
1308 lhs = gimple_call_lhs (stmt);
1309 if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (lhs)))
1310 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (lhs))))
1311 {
1312 /* Ensure the binary operation is performed in a type
1313 that wraps if it is integral type. */
1314 gimple_seq stmts = NULL;
1315 tree type = unsigned_type_for (TREE_TYPE (lhs));
1316 tree uarg0 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1317 type, arg0);
1318 tree uarg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1319 type, arg1);
1320 tree res = gimple_build (&stmts, gimple_location (stmt), bcode,
1321 type, uarg0, uarg1);
1322 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1323 g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR,
1324 build1 (VIEW_CONVERT_EXPR,
1325 TREE_TYPE (lhs), res));
1326 gsi_replace (gsi, g, true);
1327 return true;
1328 }
1329 g = gimple_build_assign (lhs, bcode, arg0, arg1);
1330 gimple_set_location (g, gimple_location (stmt));
1331 gsi_replace (gsi, g, true);
1332 return true;
1333 /* Flavors of vec_sub. We deliberately don't expand
1334 RS6000_BIF_VSUBUQM. */
1335 case RS6000_BIF_VSUBUBM:
1336 case RS6000_BIF_VSUBUHM:
1337 case RS6000_BIF_VSUBUWM:
1338 case RS6000_BIF_VSUBUDM:
1339 case RS6000_BIF_VSUBFP:
1340 case RS6000_BIF_XVSUBDP:
1341 case RS6000_BIF_XVSUBSP:
1342 bcode = MINUS_EXPR;
1343 goto do_binary;
1344 case RS6000_BIF_XVMULSP:
1345 case RS6000_BIF_XVMULDP:
1346 arg0 = gimple_call_arg (stmt, 0);
1347 arg1 = gimple_call_arg (stmt, 1);
1348 lhs = gimple_call_lhs (stmt);
1349 g = gimple_build_assign (lhs, MULT_EXPR, arg0, arg1);
1350 gimple_set_location (g, gimple_location (stmt));
1351 gsi_replace (gsi, g, true);
1352 return true;
1353 /* Even element flavors of vec_mul (signed). */
1354 case RS6000_BIF_VMULESB:
1355 case RS6000_BIF_VMULESH:
1356 case RS6000_BIF_VMULESW:
1357 /* Even element flavors of vec_mul (unsigned). */
1358 case RS6000_BIF_VMULEUB:
1359 case RS6000_BIF_VMULEUH:
1360 case RS6000_BIF_VMULEUW:
1361 arg0 = gimple_call_arg (stmt, 0);
1362 arg1 = gimple_call_arg (stmt, 1);
1363 lhs = gimple_call_lhs (stmt);
1364 g = gimple_build_assign (lhs, VEC_WIDEN_MULT_EVEN_EXPR, arg0, arg1);
1365 gimple_set_location (g, gimple_location (stmt));
1366 gsi_replace (gsi, g, true);
1367 return true;
1368 /* Odd element flavors of vec_mul (signed). */
1369 case RS6000_BIF_VMULOSB:
1370 case RS6000_BIF_VMULOSH:
1371 case RS6000_BIF_VMULOSW:
1372 /* Odd element flavors of vec_mul (unsigned). */
1373 case RS6000_BIF_VMULOUB:
1374 case RS6000_BIF_VMULOUH:
1375 case RS6000_BIF_VMULOUW:
1376 arg0 = gimple_call_arg (stmt, 0);
1377 arg1 = gimple_call_arg (stmt, 1);
1378 lhs = gimple_call_lhs (stmt);
1379 g = gimple_build_assign (lhs, VEC_WIDEN_MULT_ODD_EXPR, arg0, arg1);
1380 gimple_set_location (g, gimple_location (stmt));
1381 gsi_replace (gsi, g, true);
1382 return true;
1383 /* Flavors of vec_div (Integer). */
1384 case RS6000_BIF_DIV_V2DI:
1385 case RS6000_BIF_UDIV_V2DI:
1386 arg0 = gimple_call_arg (stmt, 0);
1387 arg1 = gimple_call_arg (stmt, 1);
1388 lhs = gimple_call_lhs (stmt);
1389 g = gimple_build_assign (lhs, TRUNC_DIV_EXPR, arg0, arg1);
1390 gimple_set_location (g, gimple_location (stmt));
1391 gsi_replace (gsi, g, true);
1392 return true;
1393 /* Flavors of vec_div (Float). */
1394 case RS6000_BIF_XVDIVSP:
1395 case RS6000_BIF_XVDIVDP:
1396 arg0 = gimple_call_arg (stmt, 0);
1397 arg1 = gimple_call_arg (stmt, 1);
1398 lhs = gimple_call_lhs (stmt);
1399 g = gimple_build_assign (lhs, RDIV_EXPR, arg0, arg1);
1400 gimple_set_location (g, gimple_location (stmt));
1401 gsi_replace (gsi, g, true);
1402 return true;
1403 /* Flavors of vec_and. */
1404 case RS6000_BIF_VAND_V16QI_UNS:
1405 case RS6000_BIF_VAND_V16QI:
1406 case RS6000_BIF_VAND_V8HI_UNS:
1407 case RS6000_BIF_VAND_V8HI:
1408 case RS6000_BIF_VAND_V4SI_UNS:
1409 case RS6000_BIF_VAND_V4SI:
1410 case RS6000_BIF_VAND_V2DI_UNS:
1411 case RS6000_BIF_VAND_V2DI:
1412 case RS6000_BIF_VAND_V4SF:
1413 case RS6000_BIF_VAND_V2DF:
1414 arg0 = gimple_call_arg (stmt, 0);
1415 arg1 = gimple_call_arg (stmt, 1);
1416 lhs = gimple_call_lhs (stmt);
1417 g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, arg1);
1418 gimple_set_location (g, gimple_location (stmt));
1419 gsi_replace (gsi, g, true);
1420 return true;
1421 /* Flavors of vec_andc. */
1422 case RS6000_BIF_VANDC_V16QI_UNS:
1423 case RS6000_BIF_VANDC_V16QI:
1424 case RS6000_BIF_VANDC_V8HI_UNS:
1425 case RS6000_BIF_VANDC_V8HI:
1426 case RS6000_BIF_VANDC_V4SI_UNS:
1427 case RS6000_BIF_VANDC_V4SI:
1428 case RS6000_BIF_VANDC_V2DI_UNS:
1429 case RS6000_BIF_VANDC_V2DI:
1430 case RS6000_BIF_VANDC_V4SF:
1431 case RS6000_BIF_VANDC_V2DF:
1432 arg0 = gimple_call_arg (stmt, 0);
1433 arg1 = gimple_call_arg (stmt, 1);
1434 lhs = gimple_call_lhs (stmt);
1435 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
1436 g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1);
1437 gimple_set_location (g, gimple_location (stmt));
1438 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1439 g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, temp);
1440 gimple_set_location (g, gimple_location (stmt));
1441 gsi_replace (gsi, g, true);
1442 return true;
1443 /* Flavors of vec_nand. */
1444 case RS6000_BIF_NAND_V16QI_UNS:
1445 case RS6000_BIF_NAND_V16QI:
1446 case RS6000_BIF_NAND_V8HI_UNS:
1447 case RS6000_BIF_NAND_V8HI:
1448 case RS6000_BIF_NAND_V4SI_UNS:
1449 case RS6000_BIF_NAND_V4SI:
1450 case RS6000_BIF_NAND_V2DI_UNS:
1451 case RS6000_BIF_NAND_V2DI:
1452 case RS6000_BIF_NAND_V4SF:
1453 case RS6000_BIF_NAND_V2DF:
1454 arg0 = gimple_call_arg (stmt, 0);
1455 arg1 = gimple_call_arg (stmt, 1);
1456 lhs = gimple_call_lhs (stmt);
1457 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
1458 g = gimple_build_assign (temp, BIT_AND_EXPR, arg0, arg1);
1459 gimple_set_location (g, gimple_location (stmt));
1460 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1461 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
1462 gimple_set_location (g, gimple_location (stmt));
1463 gsi_replace (gsi, g, true);
1464 return true;
1465 /* Flavors of vec_or. */
1466 case RS6000_BIF_VOR_V16QI_UNS:
1467 case RS6000_BIF_VOR_V16QI:
1468 case RS6000_BIF_VOR_V8HI_UNS:
1469 case RS6000_BIF_VOR_V8HI:
1470 case RS6000_BIF_VOR_V4SI_UNS:
1471 case RS6000_BIF_VOR_V4SI:
1472 case RS6000_BIF_VOR_V2DI_UNS:
1473 case RS6000_BIF_VOR_V2DI:
1474 case RS6000_BIF_VOR_V4SF:
1475 case RS6000_BIF_VOR_V2DF:
1476 arg0 = gimple_call_arg (stmt, 0);
1477 arg1 = gimple_call_arg (stmt, 1);
1478 lhs = gimple_call_lhs (stmt);
1479 g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, arg1);
1480 gimple_set_location (g, gimple_location (stmt));
1481 gsi_replace (gsi, g, true);
1482 return true;
1483 /* flavors of vec_orc. */
1484 case RS6000_BIF_ORC_V16QI_UNS:
1485 case RS6000_BIF_ORC_V16QI:
1486 case RS6000_BIF_ORC_V8HI_UNS:
1487 case RS6000_BIF_ORC_V8HI:
1488 case RS6000_BIF_ORC_V4SI_UNS:
1489 case RS6000_BIF_ORC_V4SI:
1490 case RS6000_BIF_ORC_V2DI_UNS:
1491 case RS6000_BIF_ORC_V2DI:
1492 case RS6000_BIF_ORC_V4SF:
1493 case RS6000_BIF_ORC_V2DF:
1494 arg0 = gimple_call_arg (stmt, 0);
1495 arg1 = gimple_call_arg (stmt, 1);
1496 lhs = gimple_call_lhs (stmt);
1497 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
1498 g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1);
1499 gimple_set_location (g, gimple_location (stmt));
1500 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1501 g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, temp);
1502 gimple_set_location (g, gimple_location (stmt));
1503 gsi_replace (gsi, g, true);
1504 return true;
1505 /* Flavors of vec_xor. */
1506 case RS6000_BIF_VXOR_V16QI_UNS:
1507 case RS6000_BIF_VXOR_V16QI:
1508 case RS6000_BIF_VXOR_V8HI_UNS:
1509 case RS6000_BIF_VXOR_V8HI:
1510 case RS6000_BIF_VXOR_V4SI_UNS:
1511 case RS6000_BIF_VXOR_V4SI:
1512 case RS6000_BIF_VXOR_V2DI_UNS:
1513 case RS6000_BIF_VXOR_V2DI:
1514 case RS6000_BIF_VXOR_V4SF:
1515 case RS6000_BIF_VXOR_V2DF:
1516 arg0 = gimple_call_arg (stmt, 0);
1517 arg1 = gimple_call_arg (stmt, 1);
1518 lhs = gimple_call_lhs (stmt);
1519 g = gimple_build_assign (lhs, BIT_XOR_EXPR, arg0, arg1);
1520 gimple_set_location (g, gimple_location (stmt));
1521 gsi_replace (gsi, g, true);
1522 return true;
1523 /* Flavors of vec_nor. */
1524 case RS6000_BIF_VNOR_V16QI_UNS:
1525 case RS6000_BIF_VNOR_V16QI:
1526 case RS6000_BIF_VNOR_V8HI_UNS:
1527 case RS6000_BIF_VNOR_V8HI:
1528 case RS6000_BIF_VNOR_V4SI_UNS:
1529 case RS6000_BIF_VNOR_V4SI:
1530 case RS6000_BIF_VNOR_V2DI_UNS:
1531 case RS6000_BIF_VNOR_V2DI:
1532 case RS6000_BIF_VNOR_V4SF:
1533 case RS6000_BIF_VNOR_V2DF:
1534 arg0 = gimple_call_arg (stmt, 0);
1535 arg1 = gimple_call_arg (stmt, 1);
1536 lhs = gimple_call_lhs (stmt);
1537 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
1538 g = gimple_build_assign (temp, BIT_IOR_EXPR, arg0, arg1);
1539 gimple_set_location (g, gimple_location (stmt));
1540 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1541 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
1542 gimple_set_location (g, gimple_location (stmt));
1543 gsi_replace (gsi, g, true);
1544 return true;
1545 /* flavors of vec_abs. */
1546 case RS6000_BIF_ABS_V16QI:
1547 case RS6000_BIF_ABS_V8HI:
1548 case RS6000_BIF_ABS_V4SI:
1549 case RS6000_BIF_ABS_V4SF:
1550 case RS6000_BIF_ABS_V2DI:
1551 case RS6000_BIF_XVABSDP:
1552 case RS6000_BIF_XVABSSP:
1553 arg0 = gimple_call_arg (stmt, 0);
1554 if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (arg0)))
1555 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (arg0))))
1556 return false;
1557 lhs = gimple_call_lhs (stmt);
1558 g = gimple_build_assign (lhs, ABS_EXPR, arg0);
1559 gimple_set_location (g, gimple_location (stmt));
1560 gsi_replace (gsi, g, true);
1561 return true;
1562 /* flavors of vec_min. */
1563 case RS6000_BIF_XVMINDP:
1564 case RS6000_BIF_XVMINSP:
1565 case RS6000_BIF_VMINFP:
1566 {
1567 lhs = gimple_call_lhs (stmt);
1568 tree type = TREE_TYPE (lhs);
1569 if (HONOR_NANS (type))
1570 return false;
1571 gcc_fallthrough ();
1572 }
1573 case RS6000_BIF_VMINSD:
1574 case RS6000_BIF_VMINUD:
1575 case RS6000_BIF_VMINSB:
1576 case RS6000_BIF_VMINSH:
1577 case RS6000_BIF_VMINSW:
1578 case RS6000_BIF_VMINUB:
1579 case RS6000_BIF_VMINUH:
1580 case RS6000_BIF_VMINUW:
1581 arg0 = gimple_call_arg (stmt, 0);
1582 arg1 = gimple_call_arg (stmt, 1);
1583 lhs = gimple_call_lhs (stmt);
1584 g = gimple_build_assign (lhs, MIN_EXPR, arg0, arg1);
1585 gimple_set_location (g, gimple_location (stmt));
1586 gsi_replace (gsi, g, true);
1587 return true;
1588 /* flavors of vec_max. */
1589 case RS6000_BIF_XVMAXDP:
1590 case RS6000_BIF_XVMAXSP:
1591 case RS6000_BIF_VMAXFP:
1592 {
1593 lhs = gimple_call_lhs (stmt);
1594 tree type = TREE_TYPE (lhs);
1595 if (HONOR_NANS (type))
1596 return false;
1597 gcc_fallthrough ();
1598 }
1599 case RS6000_BIF_VMAXSD:
1600 case RS6000_BIF_VMAXUD:
1601 case RS6000_BIF_VMAXSB:
1602 case RS6000_BIF_VMAXSH:
1603 case RS6000_BIF_VMAXSW:
1604 case RS6000_BIF_VMAXUB:
1605 case RS6000_BIF_VMAXUH:
1606 case RS6000_BIF_VMAXUW:
1607 arg0 = gimple_call_arg (stmt, 0);
1608 arg1 = gimple_call_arg (stmt, 1);
1609 lhs = gimple_call_lhs (stmt);
1610 g = gimple_build_assign (lhs, MAX_EXPR, arg0, arg1);
1611 gimple_set_location (g, gimple_location (stmt));
1612 gsi_replace (gsi, g, true);
1613 return true;
1614 /* Flavors of vec_eqv. */
1615 case RS6000_BIF_EQV_V16QI:
1616 case RS6000_BIF_EQV_V8HI:
1617 case RS6000_BIF_EQV_V4SI:
1618 case RS6000_BIF_EQV_V4SF:
1619 case RS6000_BIF_EQV_V2DF:
1620 case RS6000_BIF_EQV_V2DI:
1621 arg0 = gimple_call_arg (stmt, 0);
1622 arg1 = gimple_call_arg (stmt, 1);
1623 lhs = gimple_call_lhs (stmt);
1624 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
1625 g = gimple_build_assign (temp, BIT_XOR_EXPR, arg0, arg1);
1626 gimple_set_location (g, gimple_location (stmt));
1627 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1628 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
1629 gimple_set_location (g, gimple_location (stmt));
1630 gsi_replace (gsi, g, true);
1631 return true;
1632 /* Flavors of vec_rotate_left. */
1633 case RS6000_BIF_VRLB:
1634 case RS6000_BIF_VRLH:
1635 case RS6000_BIF_VRLW:
1636 case RS6000_BIF_VRLD:
1637 arg0 = gimple_call_arg (stmt, 0);
1638 arg1 = gimple_call_arg (stmt, 1);
1639 lhs = gimple_call_lhs (stmt);
1640 g = gimple_build_assign (lhs, LROTATE_EXPR, arg0, arg1);
1641 gimple_set_location (g, gimple_location (stmt));
1642 gsi_replace (gsi, g, true);
1643 return true;
1644 /* Flavors of vector shift right algebraic.
1645 vec_sra{b,h,w} -> vsra{b,h,w}. */
1646 case RS6000_BIF_VSRAB:
1647 case RS6000_BIF_VSRAH:
1648 case RS6000_BIF_VSRAW:
1649 case RS6000_BIF_VSRAD:
1650 {
1651 arg0 = gimple_call_arg (stmt, 0);
1652 arg1 = gimple_call_arg (stmt, 1);
1653 lhs = gimple_call_lhs (stmt);
1654 tree arg1_type = TREE_TYPE (arg1);
1655 tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1));
1656 tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type));
1657 location_t loc = gimple_location (stmt);
1658 /* Force arg1 into the range valid matching the arg0 type. */
1659 /* Build a vector consisting of the max valid bit-size values. */
1660 int n_elts = VECTOR_CST_NELTS (arg1);
1661 tree element_size = build_int_cst (unsigned_element_type,
1662 128 / n_elts);
1663 tree_vector_builder elts (unsigned_arg1_type, n_elts, 1);
1664 for (int i = 0; i < n_elts; i++)
1665 elts.safe_push (element_size);
1666 tree modulo_tree = elts.build ();
1667 /* Modulo the provided shift value against that vector. */
1668 gimple_seq stmts = NULL;
1669 tree unsigned_arg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1670 unsigned_arg1_type, arg1);
1671 tree new_arg1 = gimple_build (&stmts, loc, TRUNC_MOD_EXPR,
1672 unsigned_arg1_type, unsigned_arg1,
1673 modulo_tree);
1674 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1675 /* And finally, do the shift. */
1676 g = gimple_build_assign (lhs, RSHIFT_EXPR, arg0, new_arg1);
1677 gimple_set_location (g, loc);
1678 gsi_replace (gsi, g, true);
1679 return true;
1680 }
1681 /* Flavors of vector shift left.
1682 builtin_altivec_vsl{b,h,w} -> vsl{b,h,w}. */
1683 case RS6000_BIF_VSLB:
1684 case RS6000_BIF_VSLH:
1685 case RS6000_BIF_VSLW:
1686 case RS6000_BIF_VSLD:
1687 {
1688 location_t loc;
1689 gimple_seq stmts = NULL;
1690 arg0 = gimple_call_arg (stmt, 0);
1691 tree arg0_type = TREE_TYPE (arg0);
1692 if (INTEGRAL_TYPE_P (TREE_TYPE (arg0_type))
1693 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (arg0_type)))
1694 return false;
1695 arg1 = gimple_call_arg (stmt, 1);
1696 tree arg1_type = TREE_TYPE (arg1);
1697 tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1));
1698 tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type));
1699 loc = gimple_location (stmt);
1700 lhs = gimple_call_lhs (stmt);
1701 /* Force arg1 into the range valid matching the arg0 type. */
1702 /* Build a vector consisting of the max valid bit-size values. */
1703 int n_elts = VECTOR_CST_NELTS (arg1);
1704 int tree_size_in_bits = TREE_INT_CST_LOW (size_in_bytes (arg1_type))
1705 * BITS_PER_UNIT;
1706 tree element_size = build_int_cst (unsigned_element_type,
1707 tree_size_in_bits / n_elts);
1708 tree_vector_builder elts (unsigned_type_for (arg1_type), n_elts, 1);
1709 for (int i = 0; i < n_elts; i++)
1710 elts.safe_push (element_size);
1711 tree modulo_tree = elts.build ();
1712 /* Modulo the provided shift value against that vector. */
1713 tree unsigned_arg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1714 unsigned_arg1_type, arg1);
1715 tree new_arg1 = gimple_build (&stmts, loc, TRUNC_MOD_EXPR,
1716 unsigned_arg1_type, unsigned_arg1,
1717 modulo_tree);
1718 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1719 /* And finally, do the shift. */
1720 g = gimple_build_assign (lhs, LSHIFT_EXPR, arg0, new_arg1);
1721 gimple_set_location (g, gimple_location (stmt));
1722 gsi_replace (gsi, g, true);
1723 return true;
1724 }
1725 /* Flavors of vector shift right. */
1726 case RS6000_BIF_VSRB:
1727 case RS6000_BIF_VSRH:
1728 case RS6000_BIF_VSRW:
1729 case RS6000_BIF_VSRD:
1730 {
1731 arg0 = gimple_call_arg (stmt, 0);
1732 arg1 = gimple_call_arg (stmt, 1);
1733 lhs = gimple_call_lhs (stmt);
1734 tree arg1_type = TREE_TYPE (arg1);
1735 tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1));
1736 tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type));
1737 location_t loc = gimple_location (stmt);
1738 gimple_seq stmts = NULL;
1739 /* Convert arg0 to unsigned. */
1740 tree arg0_unsigned
1741 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1742 unsigned_type_for (TREE_TYPE (arg0)), arg0);
1743 /* Force arg1 into the range valid matching the arg0 type. */
1744 /* Build a vector consisting of the max valid bit-size values. */
1745 int n_elts = VECTOR_CST_NELTS (arg1);
1746 tree element_size = build_int_cst (unsigned_element_type,
1747 128 / n_elts);
1748 tree_vector_builder elts (unsigned_arg1_type, n_elts, 1);
1749 for (int i = 0; i < n_elts; i++)
1750 elts.safe_push (element_size);
1751 tree modulo_tree = elts.build ();
1752 /* Modulo the provided shift value against that vector. */
1753 tree unsigned_arg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1754 unsigned_arg1_type, arg1);
1755 tree new_arg1 = gimple_build (&stmts, loc, TRUNC_MOD_EXPR,
1756 unsigned_arg1_type, unsigned_arg1,
1757 modulo_tree);
1758 /* Do the shift. */
1759 tree res
1760 = gimple_build (&stmts, RSHIFT_EXPR,
1761 TREE_TYPE (arg0_unsigned), arg0_unsigned, new_arg1);
1762 /* Convert result back to the lhs type. */
1763 res = gimple_build (&stmts, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), res);
1764 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1765 replace_call_with_value (gsi, res);
1766 return true;
1767 }
1768 /* Vector loads. */
1769 case RS6000_BIF_LVX_V16QI:
1770 case RS6000_BIF_LVX_V8HI:
1771 case RS6000_BIF_LVX_V4SI:
1772 case RS6000_BIF_LVX_V4SF:
1773 case RS6000_BIF_LVX_V2DI:
1774 case RS6000_BIF_LVX_V2DF:
1775 case RS6000_BIF_LVX_V1TI:
1776 {
1777 arg0 = gimple_call_arg (stmt, 0); // offset
1778 arg1 = gimple_call_arg (stmt, 1); // address
1779 lhs = gimple_call_lhs (stmt);
1780 location_t loc = gimple_location (stmt);
1781 /* Since arg1 may be cast to a different type, just use ptr_type_node
1782 here instead of trying to enforce TBAA on pointer types. */
1783 tree arg1_type = ptr_type_node;
1784 tree lhs_type = TREE_TYPE (lhs);
1785 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
1786 the tree using the value from arg0. The resulting type will match
1787 the type of arg1. */
1788 gimple_seq stmts = NULL;
1789 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
1790 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
1791 arg1_type, arg1, temp_offset);
1792 /* Mask off any lower bits from the address. */
1793 tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR,
1794 arg1_type, temp_addr,
1795 build_int_cst (arg1_type, -16));
1796 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1797 if (!is_gimple_mem_ref_addr (aligned_addr))
1798 {
1799 tree t = make_ssa_name (TREE_TYPE (aligned_addr));
1800 gimple *g = gimple_build_assign (t, aligned_addr);
1801 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1802 aligned_addr = t;
1803 }
1804 /* Use the build2 helper to set up the mem_ref. The MEM_REF could also
1805 take an offset, but since we've already incorporated the offset
1806 above, here we just pass in a zero. */
1807 gimple *g
1808 = gimple_build_assign (lhs, build2 (MEM_REF, lhs_type, aligned_addr,
1809 build_int_cst (arg1_type, 0)));
1810 gimple_set_location (g, loc);
1811 gsi_replace (gsi, g, true);
1812 return true;
1813 }
1814 /* Vector stores. */
1815 case RS6000_BIF_STVX_V16QI:
1816 case RS6000_BIF_STVX_V8HI:
1817 case RS6000_BIF_STVX_V4SI:
1818 case RS6000_BIF_STVX_V4SF:
1819 case RS6000_BIF_STVX_V2DI:
1820 case RS6000_BIF_STVX_V2DF:
1821 {
1822 arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */
1823 arg1 = gimple_call_arg (stmt, 1); /* Offset. */
1824 tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */
1825 location_t loc = gimple_location (stmt);
1826 tree arg0_type = TREE_TYPE (arg0);
1827 /* Use ptr_type_node (no TBAA) for the arg2_type.
1828 FIXME: (Richard) "A proper fix would be to transition this type as
1829 seen from the frontend to GIMPLE, for example in a similar way we
1830 do for MEM_REFs by piggy-backing that on an extra argument, a
1831 constant zero pointer of the alias pointer type to use (which would
1832 also serve as a type indicator of the store itself). I'd use a
1833 target specific internal function for this (not sure if we can have
1834 those target specific, but I guess if it's folded away then that's
1835 fine) and get away with the overload set." */
1836 tree arg2_type = ptr_type_node;
1837 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
1838 the tree using the value from arg0. The resulting type will match
1839 the type of arg2. */
1840 gimple_seq stmts = NULL;
1841 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1);
1842 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
1843 arg2_type, arg2, temp_offset);
1844 /* Mask off any lower bits from the address. */
1845 tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR,
1846 arg2_type, temp_addr,
1847 build_int_cst (arg2_type, -16));
1848 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1849 if (!is_gimple_mem_ref_addr (aligned_addr))
1850 {
1851 tree t = make_ssa_name (TREE_TYPE (aligned_addr));
1852 gimple *g = gimple_build_assign (t, aligned_addr);
1853 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1854 aligned_addr = t;
1855 }
1856 /* The desired gimple result should be similar to:
1857 MEM[(__vector floatD.1407 *)_1] = vf1D.2697; */
1858 gimple *g
1859 = gimple_build_assign (build2 (MEM_REF, arg0_type, aligned_addr,
1860 build_int_cst (arg2_type, 0)), arg0);
1861 gimple_set_location (g, loc);
1862 gsi_replace (gsi, g, true);
1863 return true;
1864 }
1865
1866 /* unaligned Vector loads. */
1867 case RS6000_BIF_LXVW4X_V16QI:
1868 case RS6000_BIF_LXVW4X_V8HI:
1869 case RS6000_BIF_LXVW4X_V4SF:
1870 case RS6000_BIF_LXVW4X_V4SI:
1871 case RS6000_BIF_LXVD2X_V2DF:
1872 case RS6000_BIF_LXVD2X_V2DI:
1873 {
1874 arg0 = gimple_call_arg (stmt, 0); // offset
1875 arg1 = gimple_call_arg (stmt, 1); // address
1876 lhs = gimple_call_lhs (stmt);
1877 location_t loc = gimple_location (stmt);
1878 /* Since arg1 may be cast to a different type, just use ptr_type_node
1879 here instead of trying to enforce TBAA on pointer types. */
1880 tree arg1_type = ptr_type_node;
1881 tree lhs_type = TREE_TYPE (lhs);
1882 /* In GIMPLE the type of the MEM_REF specifies the alignment. The
1883 required alignment (power) is 4 bytes regardless of data type. */
1884 tree align_ltype = build_aligned_type (lhs_type, 4);
1885 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
1886 the tree using the value from arg0. The resulting type will match
1887 the type of arg1. */
1888 gimple_seq stmts = NULL;
1889 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
1890 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
1891 arg1_type, arg1, temp_offset);
1892 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1893 if (!is_gimple_mem_ref_addr (temp_addr))
1894 {
1895 tree t = make_ssa_name (TREE_TYPE (temp_addr));
1896 gimple *g = gimple_build_assign (t, temp_addr);
1897 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1898 temp_addr = t;
1899 }
1900 /* Use the build2 helper to set up the mem_ref. The MEM_REF could also
1901 take an offset, but since we've already incorporated the offset
1902 above, here we just pass in a zero. */
1903 gimple *g;
1904 g = gimple_build_assign (lhs, build2 (MEM_REF, align_ltype, temp_addr,
1905 build_int_cst (arg1_type, 0)));
1906 gimple_set_location (g, loc);
1907 gsi_replace (gsi, g, true);
1908 return true;
1909 }
1910
1911 /* unaligned Vector stores. */
1912 case RS6000_BIF_STXVW4X_V16QI:
1913 case RS6000_BIF_STXVW4X_V8HI:
1914 case RS6000_BIF_STXVW4X_V4SF:
1915 case RS6000_BIF_STXVW4X_V4SI:
1916 case RS6000_BIF_STXVD2X_V2DF:
1917 case RS6000_BIF_STXVD2X_V2DI:
1918 {
1919 arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */
1920 arg1 = gimple_call_arg (stmt, 1); /* Offset. */
1921 tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */
1922 location_t loc = gimple_location (stmt);
1923 tree arg0_type = TREE_TYPE (arg0);
1924 /* Use ptr_type_node (no TBAA) for the arg2_type. */
1925 tree arg2_type = ptr_type_node;
1926 /* In GIMPLE the type of the MEM_REF specifies the alignment. The
1927 required alignment (power) is 4 bytes regardless of data type. */
1928 tree align_stype = build_aligned_type (arg0_type, 4);
1929 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
1930 the tree using the value from arg1. */
1931 gimple_seq stmts = NULL;
1932 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1);
1933 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
1934 arg2_type, arg2, temp_offset);
1935 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1936 if (!is_gimple_mem_ref_addr (temp_addr))
1937 {
1938 tree t = make_ssa_name (TREE_TYPE (temp_addr));
1939 gimple *g = gimple_build_assign (t, temp_addr);
1940 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1941 temp_addr = t;
1942 }
1943 gimple *g;
1944 g = gimple_build_assign (build2 (MEM_REF, align_stype, temp_addr,
1945 build_int_cst (arg2_type, 0)), arg0);
1946 gimple_set_location (g, loc);
1947 gsi_replace (gsi, g, true);
1948 return true;
1949 }
1950
1951 /* Vector Fused multiply-add (fma). */
1952 case RS6000_BIF_VMADDFP:
1953 case RS6000_BIF_XVMADDDP:
1954 case RS6000_BIF_XVMADDSP:
1955 case RS6000_BIF_VMLADDUHM:
1956 {
1957 arg0 = gimple_call_arg (stmt, 0);
1958 arg1 = gimple_call_arg (stmt, 1);
1959 tree arg2 = gimple_call_arg (stmt, 2);
1960 lhs = gimple_call_lhs (stmt);
1961 gcall *g = gimple_build_call_internal (IFN_FMA, 3, arg0, arg1, arg2);
1962 gimple_call_set_lhs (g, lhs);
1963 gimple_call_set_nothrow (g, true);
1964 gimple_set_location (g, gimple_location (stmt));
1965 gsi_replace (gsi, g, true);
1966 return true;
1967 }
1968
1969 /* Vector compares; EQ, NE, GE, GT, LE. */
1970 case RS6000_BIF_VCMPEQUB:
1971 case RS6000_BIF_VCMPEQUH:
1972 case RS6000_BIF_VCMPEQUW:
1973 case RS6000_BIF_VCMPEQUD:
1974 case RS6000_BIF_VCMPEQUT:
1975 fold_compare_helper (gsi, EQ_EXPR, stmt);
1976 return true;
1977
1978 case RS6000_BIF_VCMPNEB:
1979 case RS6000_BIF_VCMPNEH:
1980 case RS6000_BIF_VCMPNEW:
1981 case RS6000_BIF_VCMPNET:
1982 fold_compare_helper (gsi, NE_EXPR, stmt);
1983 return true;
1984
1985 case RS6000_BIF_CMPGE_16QI:
1986 case RS6000_BIF_CMPGE_U16QI:
1987 case RS6000_BIF_CMPGE_8HI:
1988 case RS6000_BIF_CMPGE_U8HI:
1989 case RS6000_BIF_CMPGE_4SI:
1990 case RS6000_BIF_CMPGE_U4SI:
1991 case RS6000_BIF_CMPGE_2DI:
1992 case RS6000_BIF_CMPGE_U2DI:
1993 case RS6000_BIF_CMPGE_1TI:
1994 case RS6000_BIF_CMPGE_U1TI:
1995 fold_compare_helper (gsi, GE_EXPR, stmt);
1996 return true;
1997
1998 case RS6000_BIF_VCMPGTSB:
1999 case RS6000_BIF_VCMPGTUB:
2000 case RS6000_BIF_VCMPGTSH:
2001 case RS6000_BIF_VCMPGTUH:
2002 case RS6000_BIF_VCMPGTSW:
2003 case RS6000_BIF_VCMPGTUW:
2004 case RS6000_BIF_VCMPGTUD:
2005 case RS6000_BIF_VCMPGTSD:
2006 case RS6000_BIF_VCMPGTUT:
2007 case RS6000_BIF_VCMPGTST:
2008 fold_compare_helper (gsi, GT_EXPR, stmt);
2009 return true;
2010
2011 case RS6000_BIF_CMPLE_16QI:
2012 case RS6000_BIF_CMPLE_U16QI:
2013 case RS6000_BIF_CMPLE_8HI:
2014 case RS6000_BIF_CMPLE_U8HI:
2015 case RS6000_BIF_CMPLE_4SI:
2016 case RS6000_BIF_CMPLE_U4SI:
2017 case RS6000_BIF_CMPLE_2DI:
2018 case RS6000_BIF_CMPLE_U2DI:
2019 case RS6000_BIF_CMPLE_1TI:
2020 case RS6000_BIF_CMPLE_U1TI:
2021 fold_compare_helper (gsi, LE_EXPR, stmt);
2022 return true;
2023
2024 /* flavors of vec_splat_[us]{8,16,32}. */
2025 case RS6000_BIF_VSPLTISB:
2026 case RS6000_BIF_VSPLTISH:
2027 case RS6000_BIF_VSPLTISW:
2028 {
2029 arg0 = gimple_call_arg (stmt, 0);
2030 lhs = gimple_call_lhs (stmt);
2031
2032 /* Only fold the vec_splat_*() if the lower bits of arg 0 is a
2033 5-bit signed constant in range -16 to +15. */
2034 if (TREE_CODE (arg0) != INTEGER_CST
2035 || !IN_RANGE (TREE_INT_CST_LOW (arg0), -16, 15))
2036 return false;
2037 gimple_seq stmts = NULL;
2038 location_t loc = gimple_location (stmt);
2039 tree splat_value = gimple_convert (&stmts, loc,
2040 TREE_TYPE (TREE_TYPE (lhs)), arg0);
2041 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
2042 tree splat_tree = build_vector_from_val (TREE_TYPE (lhs), splat_value);
2043 g = gimple_build_assign (lhs, splat_tree);
2044 gimple_set_location (g, gimple_location (stmt));
2045 gsi_replace (gsi, g, true);
2046 return true;
2047 }
2048
2049 /* Flavors of vec_splat. */
2050 /* a = vec_splat (b, 0x3) becomes a = { b[3],b[3],b[3],...}; */
2051 case RS6000_BIF_VSPLTB:
2052 case RS6000_BIF_VSPLTH:
2053 case RS6000_BIF_VSPLTW:
2054 case RS6000_BIF_XXSPLTD_V2DI:
2055 case RS6000_BIF_XXSPLTD_V2DF:
2056 {
2057 arg0 = gimple_call_arg (stmt, 0); /* input vector. */
2058 arg1 = gimple_call_arg (stmt, 1); /* index into arg0. */
2059 /* Only fold the vec_splat_*() if arg1 is both a constant value and
2060 is a valid index into the arg0 vector. */
2061 unsigned int n_elts = VECTOR_CST_NELTS (arg0);
2062 if (TREE_CODE (arg1) != INTEGER_CST
2063 || TREE_INT_CST_LOW (arg1) > (n_elts -1))
2064 return false;
2065 lhs = gimple_call_lhs (stmt);
2066 tree lhs_type = TREE_TYPE (lhs);
2067 tree arg0_type = TREE_TYPE (arg0);
2068 tree splat;
2069 if (TREE_CODE (arg0) == VECTOR_CST)
2070 splat = VECTOR_CST_ELT (arg0, TREE_INT_CST_LOW (arg1));
2071 else
2072 {
2073 /* Determine (in bits) the length and start location of the
2074 splat value for a call to the tree_vec_extract helper. */
2075 int splat_elem_size = TREE_INT_CST_LOW (size_in_bytes (arg0_type))
2076 * BITS_PER_UNIT / n_elts;
2077 int splat_start_bit = TREE_INT_CST_LOW (arg1) * splat_elem_size;
2078 tree len = build_int_cst (bitsizetype, splat_elem_size);
2079 tree start = build_int_cst (bitsizetype, splat_start_bit);
2080 splat = tree_vec_extract (gsi, TREE_TYPE (lhs_type), arg0,
2081 len, start);
2082 }
2083 /* And finally, build the new vector. */
2084 tree splat_tree = build_vector_from_val (lhs_type, splat);
2085 g = gimple_build_assign (lhs, splat_tree);
2086 gimple_set_location (g, gimple_location (stmt));
2087 gsi_replace (gsi, g, true);
2088 return true;
2089 }
2090
2091 /* vec_mergel (integrals). */
2092 case RS6000_BIF_VMRGLH:
2093 case RS6000_BIF_VMRGLW:
2094 case RS6000_BIF_XXMRGLW_4SI:
2095 case RS6000_BIF_VMRGLB:
2096 case RS6000_BIF_VEC_MERGEL_V2DI:
2097 case RS6000_BIF_XXMRGLW_4SF:
2098 case RS6000_BIF_VEC_MERGEL_V2DF:
2099 fold_mergehl_helper (gsi, stmt, 1);
2100 return true;
2101 /* vec_mergeh (integrals). */
2102 case RS6000_BIF_VMRGHH:
2103 case RS6000_BIF_VMRGHW:
2104 case RS6000_BIF_XXMRGHW_4SI:
2105 case RS6000_BIF_VMRGHB:
2106 case RS6000_BIF_VEC_MERGEH_V2DI:
2107 case RS6000_BIF_XXMRGHW_4SF:
2108 case RS6000_BIF_VEC_MERGEH_V2DF:
2109 fold_mergehl_helper (gsi, stmt, 0);
2110 return true;
2111
2112 /* Flavors of vec_mergee. */
2113 case RS6000_BIF_VMRGEW_V4SI:
2114 case RS6000_BIF_VMRGEW_V2DI:
2115 case RS6000_BIF_VMRGEW_V4SF:
2116 case RS6000_BIF_VMRGEW_V2DF:
2117 fold_mergeeo_helper (gsi, stmt, 0);
2118 return true;
2119 /* Flavors of vec_mergeo. */
2120 case RS6000_BIF_VMRGOW_V4SI:
2121 case RS6000_BIF_VMRGOW_V2DI:
2122 case RS6000_BIF_VMRGOW_V4SF:
2123 case RS6000_BIF_VMRGOW_V2DF:
2124 fold_mergeeo_helper (gsi, stmt, 1);
2125 return true;
2126
2127 /* d = vec_pack (a, b) */
2128 case RS6000_BIF_VPKUDUM:
2129 case RS6000_BIF_VPKUHUM:
2130 case RS6000_BIF_VPKUWUM:
2131 {
2132 arg0 = gimple_call_arg (stmt, 0);
2133 arg1 = gimple_call_arg (stmt, 1);
2134 lhs = gimple_call_lhs (stmt);
2135 gimple *g = gimple_build_assign (lhs, VEC_PACK_TRUNC_EXPR, arg0, arg1);
2136 gimple_set_location (g, gimple_location (stmt));
2137 gsi_replace (gsi, g, true);
2138 return true;
2139 }
2140
2141 /* d = vec_unpackh (a) */
2142 /* Note that the UNPACK_{HI,LO}_EXPR used in the gimple_build_assign call
2143 in this code is sensitive to endian-ness, and needs to be inverted to
2144 handle both LE and BE targets. */
2145 case RS6000_BIF_VUPKHSB:
2146 case RS6000_BIF_VUPKHSH:
2147 case RS6000_BIF_VUPKHSW:
2148 {
2149 arg0 = gimple_call_arg (stmt, 0);
2150 lhs = gimple_call_lhs (stmt);
2151 if (BYTES_BIG_ENDIAN)
2152 g = gimple_build_assign (lhs, VEC_UNPACK_HI_EXPR, arg0);
2153 else
2154 g = gimple_build_assign (lhs, VEC_UNPACK_LO_EXPR, arg0);
2155 gimple_set_location (g, gimple_location (stmt));
2156 gsi_replace (gsi, g, true);
2157 return true;
2158 }
2159 /* d = vec_unpackl (a) */
2160 case RS6000_BIF_VUPKLSB:
2161 case RS6000_BIF_VUPKLSH:
2162 case RS6000_BIF_VUPKLSW:
2163 {
2164 arg0 = gimple_call_arg (stmt, 0);
2165 lhs = gimple_call_lhs (stmt);
2166 if (BYTES_BIG_ENDIAN)
2167 g = gimple_build_assign (lhs, VEC_UNPACK_LO_EXPR, arg0);
2168 else
2169 g = gimple_build_assign (lhs, VEC_UNPACK_HI_EXPR, arg0);
2170 gimple_set_location (g, gimple_location (stmt));
2171 gsi_replace (gsi, g, true);
2172 return true;
2173 }
2174 /* There is no gimple type corresponding with pixel, so just return. */
2175 case RS6000_BIF_VUPKHPX:
2176 case RS6000_BIF_VUPKLPX:
2177 return false;
2178
2179 /* vec_perm. */
2180 case RS6000_BIF_VPERM_16QI:
2181 case RS6000_BIF_VPERM_8HI:
2182 case RS6000_BIF_VPERM_4SI:
2183 case RS6000_BIF_VPERM_2DI:
2184 case RS6000_BIF_VPERM_4SF:
2185 case RS6000_BIF_VPERM_2DF:
2186 case RS6000_BIF_VPERM_16QI_UNS:
2187 case RS6000_BIF_VPERM_8HI_UNS:
2188 case RS6000_BIF_VPERM_4SI_UNS:
2189 case RS6000_BIF_VPERM_2DI_UNS:
2190 {
2191 arg0 = gimple_call_arg (stmt, 0);
2192 arg1 = gimple_call_arg (stmt, 1);
2193 tree permute = gimple_call_arg (stmt, 2);
2194 lhs = gimple_call_lhs (stmt);
2195 location_t loc = gimple_location (stmt);
2196 gimple_seq stmts = NULL;
2197 // convert arg0 and arg1 to match the type of the permute
2198 // for the VEC_PERM_EXPR operation.
2199 tree permute_type = (TREE_TYPE (permute));
2200 tree arg0_ptype = gimple_build (&stmts, loc, VIEW_CONVERT_EXPR,
2201 permute_type, arg0);
2202 tree arg1_ptype = gimple_build (&stmts, loc, VIEW_CONVERT_EXPR,
2203 permute_type, arg1);
2204 tree lhs_ptype = gimple_build (&stmts, loc, VEC_PERM_EXPR,
2205 permute_type, arg0_ptype, arg1_ptype,
2206 permute);
2207 // Convert the result back to the desired lhs type upon completion.
2208 tree temp = gimple_build (&stmts, loc, VIEW_CONVERT_EXPR,
2209 TREE_TYPE (lhs), lhs_ptype);
2210 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
2211 g = gimple_build_assign (lhs, temp);
2212 gimple_set_location (g, loc);
2213 gsi_replace (gsi, g, true);
2214 return true;
2215 }
2216
2217 default:
2218 if (TARGET_DEBUG_BUILTIN)
2219 fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
2220 fn_code, fn_name1, fn_name2);
2221 break;
2222 }
2223
2224 return false;
2225 }
2226
2227 /* **** Expansion support **** */
2228
2229 static rtx
2230 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
2231 {
2232 rtx pat, scratch;
2233 tree cr6_form = CALL_EXPR_ARG (exp, 0);
2234 tree arg0 = CALL_EXPR_ARG (exp, 1);
2235 tree arg1 = CALL_EXPR_ARG (exp, 2);
2236 rtx op0 = expand_normal (arg0);
2237 rtx op1 = expand_normal (arg1);
2238 machine_mode tmode = SImode;
2239 machine_mode mode0 = insn_data[icode].operand[1].mode;
2240 machine_mode mode1 = insn_data[icode].operand[2].mode;
2241 int cr6_form_int;
2242
2243 if (TREE_CODE (cr6_form) != INTEGER_CST)
2244 {
2245 error ("argument 1 of %qs must be a constant",
2246 "__builtin_altivec_predicate");
2247 return const0_rtx;
2248 }
2249 else
2250 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
2251
2252 gcc_assert (mode0 == mode1);
2253
2254 /* If we have invalid arguments, bail out before generating bad rtl. */
2255 if (arg0 == error_mark_node || arg1 == error_mark_node)
2256 return const0_rtx;
2257
2258 if (target == 0
2259 || GET_MODE (target) != tmode
2260 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
2261 target = gen_reg_rtx (tmode);
2262
2263 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
2264 op0 = copy_to_mode_reg (mode0, op0);
2265 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
2266 op1 = copy_to_mode_reg (mode1, op1);
2267
2268 /* Note that for many of the relevant operations (e.g. cmpne or
2269 cmpeq) with float or double operands, it makes more sense for the
2270 mode of the allocated scratch register to select a vector of
2271 integer. But the choice to copy the mode of operand 0 was made
2272 long ago and there are no plans to change it. */
2273 scratch = gen_reg_rtx (mode0);
2274
2275 pat = GEN_FCN (icode) (scratch, op0, op1);
2276 if (! pat)
2277 return 0;
2278 emit_insn (pat);
2279
2280 /* The vec_any* and vec_all* predicates use the same opcodes for two
2281 different operations, but the bits in CR6 will be different
2282 depending on what information we want. So we have to play tricks
2283 with CR6 to get the right bits out.
2284
2285 If you think this is disgusting, look at the specs for the
2286 AltiVec predicates. */
2287
2288 switch (cr6_form_int)
2289 {
2290 case 0:
2291 emit_insn (gen_cr6_test_for_zero (target));
2292 break;
2293 case 1:
2294 emit_insn (gen_cr6_test_for_zero_reverse (target));
2295 break;
2296 case 2:
2297 emit_insn (gen_cr6_test_for_lt (target));
2298 break;
2299 case 3:
2300 emit_insn (gen_cr6_test_for_lt_reverse (target));
2301 break;
2302 default:
2303 error ("argument 1 of %qs is out of range",
2304 "__builtin_altivec_predicate");
2305 break;
2306 }
2307
2308 return target;
2309 }
2310
2311 /* Expand vec_init builtin. */
2312 static rtx
2313 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
2314 {
2315 machine_mode tmode = TYPE_MODE (type);
2316 machine_mode inner_mode = GET_MODE_INNER (tmode);
2317 int i, n_elt = GET_MODE_NUNITS (tmode);
2318
2319 gcc_assert (VECTOR_MODE_P (tmode));
2320 gcc_assert (n_elt == call_expr_nargs (exp));
2321
2322 if (!target || !register_operand (target, tmode))
2323 target = gen_reg_rtx (tmode);
2324
2325 /* If we have a vector compromised of a single element, such as V1TImode, do
2326 the initialization directly. */
2327 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
2328 {
2329 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
2330 emit_move_insn (target, gen_lowpart (tmode, x));
2331 }
2332 else
2333 {
2334 rtvec v = rtvec_alloc (n_elt);
2335
2336 for (i = 0; i < n_elt; ++i)
2337 {
2338 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
2339 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
2340 }
2341
2342 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
2343 }
2344
2345 return target;
2346 }
2347
2348 /* Return the integer constant in ARG. Constrain it to be in the range
2349 of the subparts of VEC_TYPE; issue an error if not. */
2350
2351 static int
2352 get_element_number (tree vec_type, tree arg)
2353 {
2354 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
2355
2356 if (!tree_fits_uhwi_p (arg)
2357 || (elt = tree_to_uhwi (arg), elt > max))
2358 {
2359 error ("selector must be an integer constant in the range [0, %wi]", max);
2360 return 0;
2361 }
2362
2363 return elt;
2364 }
2365
2366 /* Expand vec_set builtin. */
2367 static rtx
2368 altivec_expand_vec_set_builtin (tree exp)
2369 {
2370 machine_mode tmode, mode1;
2371 tree arg0, arg1, arg2;
2372 int elt;
2373 rtx op0, op1;
2374
2375 arg0 = CALL_EXPR_ARG (exp, 0);
2376 arg1 = CALL_EXPR_ARG (exp, 1);
2377 arg2 = CALL_EXPR_ARG (exp, 2);
2378
2379 tmode = TYPE_MODE (TREE_TYPE (arg0));
2380 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
2381 gcc_assert (VECTOR_MODE_P (tmode));
2382
2383 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
2384 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
2385 elt = get_element_number (TREE_TYPE (arg0), arg2);
2386
2387 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
2388 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
2389
2390 op0 = force_reg (tmode, op0);
2391 op1 = force_reg (mode1, op1);
2392
2393 rs6000_expand_vector_set (op0, op1, GEN_INT (elt));
2394
2395 return op0;
2396 }
2397
2398 /* Expand vec_ext builtin. */
2399 static rtx
2400 altivec_expand_vec_ext_builtin (tree exp, rtx target)
2401 {
2402 machine_mode tmode, mode0;
2403 tree arg0, arg1;
2404 rtx op0;
2405 rtx op1;
2406
2407 arg0 = CALL_EXPR_ARG (exp, 0);
2408 arg1 = CALL_EXPR_ARG (exp, 1);
2409
2410 op0 = expand_normal (arg0);
2411 op1 = expand_normal (arg1);
2412
2413 if (TREE_CODE (arg1) == INTEGER_CST)
2414 {
2415 unsigned HOST_WIDE_INT elt;
2416 unsigned HOST_WIDE_INT size = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
2417 unsigned int truncated_selector;
2418 /* Even if !tree_fits_uhwi_p (arg1)), TREE_INT_CST_LOW (arg0)
2419 returns low-order bits of INTEGER_CST for modulo indexing. */
2420 elt = TREE_INT_CST_LOW (arg1);
2421 truncated_selector = elt % size;
2422 op1 = GEN_INT (truncated_selector);
2423 }
2424
2425 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
2426 mode0 = TYPE_MODE (TREE_TYPE (arg0));
2427 gcc_assert (VECTOR_MODE_P (mode0));
2428
2429 op0 = force_reg (mode0, op0);
2430
2431 if (optimize || !target || !register_operand (target, tmode))
2432 target = gen_reg_rtx (tmode);
2433
2434 rs6000_expand_vector_extract (target, op0, op1);
2435
2436 return target;
2437 }
2438
2439 /* Expand ALTIVEC_BUILTIN_MASK_FOR_LOAD. */
2440 rtx
2441 rs6000_expand_ldst_mask (rtx target, tree arg0)
2442 {
2443 int icode2 = BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
2444 : (int) CODE_FOR_altivec_lvsl_direct;
2445 machine_mode tmode = insn_data[icode2].operand[0].mode;
2446 machine_mode mode = insn_data[icode2].operand[1].mode;
2447
2448 gcc_assert (TARGET_ALTIVEC);
2449
2450 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg0)));
2451 rtx op = expand_expr (arg0, NULL_RTX, Pmode, EXPAND_NORMAL);
2452 rtx addr = memory_address (mode, op);
2453 /* We need to negate the address. */
2454 op = gen_reg_rtx (GET_MODE (addr));
2455 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
2456 op = gen_rtx_MEM (mode, op);
2457
2458 if (target == 0
2459 || GET_MODE (target) != tmode
2460 || !insn_data[icode2].operand[0].predicate (target, tmode))
2461 target = gen_reg_rtx (tmode);
2462
2463 rtx pat = GEN_FCN (icode2) (target, op);
2464 if (!pat)
2465 return 0;
2466 emit_insn (pat);
2467
2468 return target;
2469 }
2470
2471 /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */
2472 static const struct
2473 {
2474 const char *cpu;
2475 unsigned int cpuid;
2476 } cpu_is_info[] = {
2477 { "power10", PPC_PLATFORM_POWER10 },
2478 { "power9", PPC_PLATFORM_POWER9 },
2479 { "power8", PPC_PLATFORM_POWER8 },
2480 { "power7", PPC_PLATFORM_POWER7 },
2481 { "power6x", PPC_PLATFORM_POWER6X },
2482 { "power6", PPC_PLATFORM_POWER6 },
2483 { "power5+", PPC_PLATFORM_POWER5_PLUS },
2484 { "power5", PPC_PLATFORM_POWER5 },
2485 { "ppc970", PPC_PLATFORM_PPC970 },
2486 { "power4", PPC_PLATFORM_POWER4 },
2487 { "ppca2", PPC_PLATFORM_PPCA2 },
2488 { "ppc476", PPC_PLATFORM_PPC476 },
2489 { "ppc464", PPC_PLATFORM_PPC464 },
2490 { "ppc440", PPC_PLATFORM_PPC440 },
2491 { "ppc405", PPC_PLATFORM_PPC405 },
2492 { "ppc-cell-be", PPC_PLATFORM_CELL_BE }
2493 };
2494
2495 /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */
2496 static const struct
2497 {
2498 const char *hwcap;
2499 int mask;
2500 unsigned int id;
2501 } cpu_supports_info[] = {
2502 /* AT_HWCAP masks. */
2503 { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 },
2504 { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 },
2505 { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 },
2506 { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 },
2507 { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 },
2508 { "booke", PPC_FEATURE_BOOKE, 0 },
2509 { "cellbe", PPC_FEATURE_CELL_BE, 0 },
2510 { "dfp", PPC_FEATURE_HAS_DFP, 0 },
2511 { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 },
2512 { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 },
2513 { "fpu", PPC_FEATURE_HAS_FPU, 0 },
2514 { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 },
2515 { "mmu", PPC_FEATURE_HAS_MMU, 0 },
2516 { "notb", PPC_FEATURE_NO_TB, 0 },
2517 { "pa6t", PPC_FEATURE_PA6T, 0 },
2518 { "power4", PPC_FEATURE_POWER4, 0 },
2519 { "power5", PPC_FEATURE_POWER5, 0 },
2520 { "power5+", PPC_FEATURE_POWER5_PLUS, 0 },
2521 { "power6x", PPC_FEATURE_POWER6_EXT, 0 },
2522 { "ppc32", PPC_FEATURE_32, 0 },
2523 { "ppc601", PPC_FEATURE_601_INSTR, 0 },
2524 { "ppc64", PPC_FEATURE_64, 0 },
2525 { "ppcle", PPC_FEATURE_PPC_LE, 0 },
2526 { "smt", PPC_FEATURE_SMT, 0 },
2527 { "spe", PPC_FEATURE_HAS_SPE, 0 },
2528 { "true_le", PPC_FEATURE_TRUE_LE, 0 },
2529 { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 },
2530 { "vsx", PPC_FEATURE_HAS_VSX, 0 },
2531
2532 /* AT_HWCAP2 masks. */
2533 { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 },
2534 { "dscr", PPC_FEATURE2_HAS_DSCR, 1 },
2535 { "ebb", PPC_FEATURE2_HAS_EBB, 1 },
2536 { "htm", PPC_FEATURE2_HAS_HTM, 1 },
2537 { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 },
2538 { "htm-no-suspend", PPC_FEATURE2_HTM_NO_SUSPEND, 1 },
2539 { "isel", PPC_FEATURE2_HAS_ISEL, 1 },
2540 { "tar", PPC_FEATURE2_HAS_TAR, 1 },
2541 { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 },
2542 { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 },
2543 { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 },
2544 { "darn", PPC_FEATURE2_DARN, 1 },
2545 { "scv", PPC_FEATURE2_SCV, 1 },
2546 { "arch_3_1", PPC_FEATURE2_ARCH_3_1, 1 },
2547 { "mma", PPC_FEATURE2_MMA, 1 },
2548 };
2549
2550 /* Expand the CPU builtin in FCODE and store the result in TARGET. */
2551 static rtx
2552 cpu_expand_builtin (enum rs6000_gen_builtins fcode,
2553 tree exp ATTRIBUTE_UNUSED, rtx target)
2554 {
2555 /* __builtin_cpu_init () is a nop, so expand to nothing. */
2556 if (fcode == RS6000_BIF_CPU_INIT)
2557 return const0_rtx;
2558
2559 if (target == 0 || GET_MODE (target) != SImode)
2560 target = gen_reg_rtx (SImode);
2561
2562 /* TODO: Factor the #ifdef'd code into a separate function. */
2563 #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
2564 tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0);
2565 /* Target clones creates an ARRAY_REF instead of STRING_CST, convert it back
2566 to a STRING_CST. */
2567 if (TREE_CODE (arg) == ARRAY_REF
2568 && TREE_CODE (TREE_OPERAND (arg, 0)) == STRING_CST
2569 && TREE_CODE (TREE_OPERAND (arg, 1)) == INTEGER_CST
2570 && compare_tree_int (TREE_OPERAND (arg, 1), 0) == 0)
2571 arg = TREE_OPERAND (arg, 0);
2572
2573 if (TREE_CODE (arg) != STRING_CST)
2574 {
2575 error ("builtin %qs only accepts a string argument",
2576 rs6000_builtin_info[(size_t) fcode].bifname);
2577 return const0_rtx;
2578 }
2579
2580 if (fcode == RS6000_BIF_CPU_IS)
2581 {
2582 const char *cpu = TREE_STRING_POINTER (arg);
2583 rtx cpuid = NULL_RTX;
2584 for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++)
2585 if (strcmp (cpu, cpu_is_info[i].cpu) == 0)
2586 {
2587 /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */
2588 cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM);
2589 break;
2590 }
2591 if (cpuid == NULL_RTX)
2592 {
2593 /* Invalid CPU argument. */
2594 error ("cpu %qs is an invalid argument to builtin %qs",
2595 cpu, rs6000_builtin_info[(size_t) fcode].bifname);
2596 return const0_rtx;
2597 }
2598
2599 rtx platform = gen_reg_rtx (SImode);
2600 rtx address = gen_rtx_PLUS (Pmode,
2601 gen_rtx_REG (Pmode, TLS_REGNUM),
2602 GEN_INT (TCB_PLATFORM_OFFSET));
2603 rtx tcbmem = gen_const_mem (SImode, address);
2604 emit_move_insn (platform, tcbmem);
2605 emit_insn (gen_eqsi3 (target, platform, cpuid));
2606 }
2607 else if (fcode == RS6000_BIF_CPU_SUPPORTS)
2608 {
2609 const char *hwcap = TREE_STRING_POINTER (arg);
2610 rtx mask = NULL_RTX;
2611 int hwcap_offset;
2612 for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++)
2613 if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0)
2614 {
2615 mask = GEN_INT (cpu_supports_info[i].mask);
2616 hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id);
2617 break;
2618 }
2619 if (mask == NULL_RTX)
2620 {
2621 /* Invalid HWCAP argument. */
2622 error ("%s %qs is an invalid argument to builtin %qs",
2623 "hwcap", hwcap,
2624 rs6000_builtin_info[(size_t) fcode].bifname);
2625 return const0_rtx;
2626 }
2627
2628 rtx tcb_hwcap = gen_reg_rtx (SImode);
2629 rtx address = gen_rtx_PLUS (Pmode,
2630 gen_rtx_REG (Pmode, TLS_REGNUM),
2631 GEN_INT (hwcap_offset));
2632 rtx tcbmem = gen_const_mem (SImode, address);
2633 emit_move_insn (tcb_hwcap, tcbmem);
2634 rtx scratch1 = gen_reg_rtx (SImode);
2635 emit_insn (gen_rtx_SET (scratch1,
2636 gen_rtx_AND (SImode, tcb_hwcap, mask)));
2637 rtx scratch2 = gen_reg_rtx (SImode);
2638 emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx));
2639 emit_insn (gen_rtx_SET (target,
2640 gen_rtx_XOR (SImode, scratch2, const1_rtx)));
2641 }
2642 else
2643 gcc_unreachable ();
2644
2645 /* Record that we have expanded a CPU builtin, so that we can later
2646 emit a reference to the special symbol exported by LIBC to ensure we
2647 do not link against an old LIBC that doesn't support this feature. */
2648 cpu_builtin_p = true;
2649
2650 #else
2651 warning (0, "builtin %qs needs GLIBC (2.23 and newer) that exports hardware "
2652 "capability bits", rs6000_builtin_info[(size_t) fcode].bifname);
2653
2654 /* For old LIBCs, always return FALSE. */
2655 emit_move_insn (target, GEN_INT (0));
2656 #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */
2657
2658 return target;
2659 }
2660
2661 /* For the element-reversing load/store built-ins, produce the correct
2662 insn_code depending on the target endianness. */
2663 static insn_code
2664 elemrev_icode (rs6000_gen_builtins fcode)
2665 {
2666 switch (fcode)
2667 {
2668 case RS6000_BIF_ST_ELEMREV_V1TI:
2669 return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v1ti
2670 : CODE_FOR_vsx_st_elemrev_v1ti;
2671
2672 case RS6000_BIF_ST_ELEMREV_V2DF:
2673 return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df
2674 : CODE_FOR_vsx_st_elemrev_v2df;
2675
2676 case RS6000_BIF_ST_ELEMREV_V2DI:
2677 return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di
2678 : CODE_FOR_vsx_st_elemrev_v2di;
2679
2680 case RS6000_BIF_ST_ELEMREV_V4SF:
2681 return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf
2682 : CODE_FOR_vsx_st_elemrev_v4sf;
2683
2684 case RS6000_BIF_ST_ELEMREV_V4SI:
2685 return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si
2686 : CODE_FOR_vsx_st_elemrev_v4si;
2687
2688 case RS6000_BIF_ST_ELEMREV_V8HI:
2689 return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi
2690 : CODE_FOR_vsx_st_elemrev_v8hi;
2691
2692 case RS6000_BIF_ST_ELEMREV_V16QI:
2693 return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi
2694 : CODE_FOR_vsx_st_elemrev_v16qi;
2695
2696 case RS6000_BIF_LD_ELEMREV_V2DF:
2697 return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df
2698 : CODE_FOR_vsx_ld_elemrev_v2df;
2699
2700 case RS6000_BIF_LD_ELEMREV_V1TI:
2701 return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v1ti
2702 : CODE_FOR_vsx_ld_elemrev_v1ti;
2703
2704 case RS6000_BIF_LD_ELEMREV_V2DI:
2705 return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di
2706 : CODE_FOR_vsx_ld_elemrev_v2di;
2707
2708 case RS6000_BIF_LD_ELEMREV_V4SF:
2709 return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf
2710 : CODE_FOR_vsx_ld_elemrev_v4sf;
2711
2712 case RS6000_BIF_LD_ELEMREV_V4SI:
2713 return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si
2714 : CODE_FOR_vsx_ld_elemrev_v4si;
2715
2716 case RS6000_BIF_LD_ELEMREV_V8HI:
2717 return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi
2718 : CODE_FOR_vsx_ld_elemrev_v8hi;
2719
2720 case RS6000_BIF_LD_ELEMREV_V16QI:
2721 return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi
2722 : CODE_FOR_vsx_ld_elemrev_v16qi;
2723 default:
2724 ;
2725 }
2726
2727 gcc_unreachable ();
2728 }
2729
2730 /* Expand an AltiVec vector load builtin, and return the expanded rtx. */
2731 static rtx
2732 ldv_expand_builtin (rtx target, insn_code icode, rtx *op, machine_mode tmode)
2733 {
2734 if (target == 0
2735 || GET_MODE (target) != tmode
2736 || !insn_data[icode].operand[0].predicate (target, tmode))
2737 target = gen_reg_rtx (tmode);
2738
2739 op[1] = copy_to_mode_reg (Pmode, op[1]);
2740
2741 /* These CELL built-ins use BLKmode instead of tmode for historical
2742 (i.e., unknown) reasons. TODO: Is this necessary? */
2743 bool blk = (icode == CODE_FOR_altivec_lvlx
2744 || icode == CODE_FOR_altivec_lvlxl
2745 || icode == CODE_FOR_altivec_lvrx
2746 || icode == CODE_FOR_altivec_lvrxl);
2747
2748 /* For LVX, express the RTL accurately by ANDing the address with -16.
2749 LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
2750 so the raw address is fine. */
2751 /* TODO: That statement seems wrong, as the UNSPECs don't surround the
2752 memory expression, so a latent bug may lie here. The &-16 is likely
2753 needed for all VMX-style loads. */
2754 if (icode == CODE_FOR_altivec_lvx_v1ti
2755 || icode == CODE_FOR_altivec_lvx_v2df
2756 || icode == CODE_FOR_altivec_lvx_v2di
2757 || icode == CODE_FOR_altivec_lvx_v4sf
2758 || icode == CODE_FOR_altivec_lvx_v4si
2759 || icode == CODE_FOR_altivec_lvx_v8hi
2760 || icode == CODE_FOR_altivec_lvx_v16qi)
2761 {
2762 rtx rawaddr;
2763 if (op[0] == const0_rtx)
2764 rawaddr = op[1];
2765 else
2766 {
2767 op[0] = copy_to_mode_reg (Pmode, op[0]);
2768 rawaddr = gen_rtx_PLUS (Pmode, op[1], op[0]);
2769 }
2770 rtx addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
2771 addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr);
2772
2773 emit_insn (gen_rtx_SET (target, addr));
2774 }
2775 else
2776 {
2777 rtx addr;
2778 if (op[0] == const0_rtx)
2779 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op[1]);
2780 else
2781 {
2782 op[0] = copy_to_mode_reg (Pmode, op[0]);
2783 addr = gen_rtx_MEM (blk ? BLKmode : tmode,
2784 gen_rtx_PLUS (Pmode, op[1], op[0]));
2785 }
2786
2787 rtx pat = GEN_FCN (icode) (target, addr);
2788 if (!pat)
2789 return 0;
2790 emit_insn (pat);
2791 }
2792
2793 return target;
2794 }
2795
2796 /* Expand a builtin function that loads a scalar into a vector register
2797 with sign extension, and return the expanded rtx. */
2798 static rtx
2799 lxvrse_expand_builtin (rtx target, insn_code icode, rtx *op,
2800 machine_mode tmode, machine_mode smode)
2801 {
2802 rtx pat, addr;
2803 op[1] = copy_to_mode_reg (Pmode, op[1]);
2804
2805 if (op[0] == const0_rtx)
2806 addr = gen_rtx_MEM (tmode, op[1]);
2807 else
2808 {
2809 op[0] = copy_to_mode_reg (Pmode, op[0]);
2810 addr = gen_rtx_MEM (smode,
2811 gen_rtx_PLUS (Pmode, op[1], op[0]));
2812 }
2813
2814 rtx discratch = gen_reg_rtx (V2DImode);
2815 rtx tiscratch = gen_reg_rtx (TImode);
2816
2817 /* Emit the lxvr*x insn. */
2818 pat = GEN_FCN (icode) (tiscratch, addr);
2819 if (!pat)
2820 return 0;
2821 emit_insn (pat);
2822
2823 /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI. */
2824 rtx temp1;
2825 if (icode == CODE_FOR_vsx_lxvrbx)
2826 {
2827 temp1 = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);
2828 emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1));
2829 }
2830 else if (icode == CODE_FOR_vsx_lxvrhx)
2831 {
2832 temp1 = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);
2833 emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1));
2834 }
2835 else if (icode == CODE_FOR_vsx_lxvrwx)
2836 {
2837 temp1 = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);
2838 emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1));
2839 }
2840 else if (icode == CODE_FOR_vsx_lxvrdx)
2841 discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);
2842 else
2843 gcc_unreachable ();
2844
2845 /* Emit the sign extension from V2DI (double) to TI (quad). */
2846 rtx temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0);
2847 emit_insn (gen_extendditi2_vector (target, temp2));
2848
2849 return target;
2850 }
2851
2852 /* Expand a builtin function that loads a scalar into a vector register
2853 with zero extension, and return the expanded rtx. */
2854 static rtx
2855 lxvrze_expand_builtin (rtx target, insn_code icode, rtx *op,
2856 machine_mode tmode, machine_mode smode)
2857 {
2858 rtx pat, addr;
2859 op[1] = copy_to_mode_reg (Pmode, op[1]);
2860
2861 if (op[0] == const0_rtx)
2862 addr = gen_rtx_MEM (tmode, op[1]);
2863 else
2864 {
2865 op[0] = copy_to_mode_reg (Pmode, op[0]);
2866 addr = gen_rtx_MEM (smode,
2867 gen_rtx_PLUS (Pmode, op[1], op[0]));
2868 }
2869
2870 pat = GEN_FCN (icode) (target, addr);
2871 if (!pat)
2872 return 0;
2873 emit_insn (pat);
2874 return target;
2875 }
2876
2877 /* Expand an AltiVec vector store builtin, and return the expanded rtx. */
2878 static rtx
2879 stv_expand_builtin (insn_code icode, rtx *op,
2880 machine_mode tmode, machine_mode smode)
2881 {
2882 op[2] = copy_to_mode_reg (Pmode, op[2]);
2883
2884 /* For STVX, express the RTL accurately by ANDing the address with -16.
2885 STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
2886 so the raw address is fine. */
2887 /* TODO: That statement seems wrong, as the UNSPECs don't surround the
2888 memory expression, so a latent bug may lie here. The &-16 is likely
2889 needed for all VMX-style stores. */
2890 if (icode == CODE_FOR_altivec_stvx_v2df
2891 || icode == CODE_FOR_altivec_stvx_v2di
2892 || icode == CODE_FOR_altivec_stvx_v4sf
2893 || icode == CODE_FOR_altivec_stvx_v4si
2894 || icode == CODE_FOR_altivec_stvx_v8hi
2895 || icode == CODE_FOR_altivec_stvx_v16qi)
2896 {
2897 rtx rawaddr;
2898 if (op[1] == const0_rtx)
2899 rawaddr = op[2];
2900 else
2901 {
2902 op[1] = copy_to_mode_reg (Pmode, op[1]);
2903 rawaddr = gen_rtx_PLUS (Pmode, op[2], op[1]);
2904 }
2905
2906 rtx addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
2907 addr = gen_rtx_MEM (tmode, addr);
2908 op[0] = copy_to_mode_reg (tmode, op[0]);
2909 emit_insn (gen_rtx_SET (addr, op[0]));
2910 }
2911 else if (icode == CODE_FOR_vsx_stxvrbx
2912 || icode == CODE_FOR_vsx_stxvrhx
2913 || icode == CODE_FOR_vsx_stxvrwx
2914 || icode == CODE_FOR_vsx_stxvrdx)
2915 {
2916 rtx truncrtx = gen_rtx_TRUNCATE (tmode, op[0]);
2917 op[0] = copy_to_mode_reg (E_TImode, truncrtx);
2918
2919 rtx addr;
2920 if (op[1] == const0_rtx)
2921 addr = gen_rtx_MEM (Pmode, op[2]);
2922 else
2923 {
2924 op[1] = copy_to_mode_reg (Pmode, op[1]);
2925 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op[2], op[1]));
2926 }
2927 rtx pat = GEN_FCN (icode) (addr, op[0]);
2928 if (pat)
2929 emit_insn (pat);
2930 }
2931 else
2932 {
2933 if (!insn_data[icode].operand[1].predicate (op[0], smode))
2934 op[0] = copy_to_mode_reg (smode, op[0]);
2935
2936 rtx addr;
2937 if (op[1] == const0_rtx)
2938 addr = gen_rtx_MEM (tmode, op[2]);
2939 else
2940 {
2941 op[1] = copy_to_mode_reg (Pmode, op[1]);
2942 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op[2], op[1]));
2943 }
2944
2945 rtx pat = GEN_FCN (icode) (addr, op[0]);
2946 if (pat)
2947 emit_insn (pat);
2948 }
2949
2950 return NULL_RTX;
2951 }
2952
2953 /* Expand the MMA built-in in EXP, and return it. */
2954 static rtx
2955 mma_expand_builtin (tree exp, rtx target, insn_code icode,
2956 rs6000_gen_builtins fcode)
2957 {
2958 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
2959 bool void_func = TREE_TYPE (TREE_TYPE (fndecl)) == void_type_node;
2960 machine_mode tmode = VOIDmode;
2961 rtx op[MAX_MMA_OPERANDS];
2962 unsigned nopnds = 0;
2963
2964 if (!void_func)
2965 {
2966 tmode = insn_data[icode].operand[0].mode;
2967 if (!(target
2968 && GET_MODE (target) == tmode
2969 && insn_data[icode].operand[0].predicate (target, tmode)))
2970 target = gen_reg_rtx (tmode);
2971 op[nopnds++] = target;
2972 }
2973 else
2974 target = const0_rtx;
2975
2976 call_expr_arg_iterator iter;
2977 tree arg;
2978 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
2979 {
2980 if (arg == error_mark_node)
2981 return const0_rtx;
2982
2983 rtx opnd;
2984 const struct insn_operand_data *insn_op;
2985 insn_op = &insn_data[icode].operand[nopnds];
2986 if (TREE_CODE (arg) == ADDR_EXPR
2987 && MEM_P (DECL_RTL (TREE_OPERAND (arg, 0))))
2988 opnd = DECL_RTL (TREE_OPERAND (arg, 0));
2989 else
2990 opnd = expand_normal (arg);
2991
2992 if (!insn_op->predicate (opnd, insn_op->mode))
2993 {
2994 /* TODO: This use of constraints needs explanation. */
2995 if (!strcmp (insn_op->constraint, "n"))
2996 {
2997 if (!CONST_INT_P (opnd))
2998 error ("argument %d must be an unsigned literal", nopnds);
2999 else
3000 error ("argument %d is an unsigned literal that is "
3001 "out of range", nopnds);
3002 return const0_rtx;
3003 }
3004 opnd = copy_to_mode_reg (insn_op->mode, opnd);
3005 }
3006
3007 /* Some MMA instructions have INOUT accumulator operands, so force
3008 their target register to be the same as their input register. */
3009 if (!void_func
3010 && nopnds == 1
3011 && !strcmp (insn_op->constraint, "0")
3012 && insn_op->mode == tmode
3013 && REG_P (opnd)
3014 && insn_data[icode].operand[0].predicate (opnd, tmode))
3015 target = op[0] = opnd;
3016
3017 op[nopnds++] = opnd;
3018 }
3019
3020 rtx pat;
3021 switch (nopnds)
3022 {
3023 case 1:
3024 pat = GEN_FCN (icode) (op[0]);
3025 break;
3026 case 2:
3027 pat = GEN_FCN (icode) (op[0], op[1]);
3028 break;
3029 case 3:
3030 /* The ASSEMBLE builtin source operands are reversed in little-endian
3031 mode, so reorder them. */
3032 if (fcode == RS6000_BIF_ASSEMBLE_PAIR_V_INTERNAL && !WORDS_BIG_ENDIAN)
3033 std::swap (op[1], op[2]);
3034 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
3035 break;
3036 case 4:
3037 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
3038 break;
3039 case 5:
3040 /* The ASSEMBLE builtin source operands are reversed in little-endian
3041 mode, so reorder them. */
3042 if (fcode == RS6000_BIF_ASSEMBLE_ACC_INTERNAL && !WORDS_BIG_ENDIAN)
3043 {
3044 std::swap (op[1], op[4]);
3045 std::swap (op[2], op[3]);
3046 }
3047 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
3048 break;
3049 case 6:
3050 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
3051 break;
3052 case 7:
3053 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5], op[6]);
3054 break;
3055 default:
3056 gcc_unreachable ();
3057 }
3058
3059 if (!pat)
3060 return NULL_RTX;
3061
3062 emit_insn (pat);
3063 return target;
3064 }
3065
3066 /* Return the correct ICODE value depending on whether we are
3067 setting or reading the HTM SPRs. */
3068 static inline enum insn_code
3069 rs6000_htm_spr_icode (bool nonvoid)
3070 {
3071 if (nonvoid)
3072 return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
3073 else
3074 return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
3075 }
3076
3077 /* Return the appropriate SPR number associated with the given builtin. */
3078 static inline HOST_WIDE_INT
3079 htm_spr_num (enum rs6000_gen_builtins code)
3080 {
3081 if (code == RS6000_BIF_GET_TFHAR
3082 || code == RS6000_BIF_SET_TFHAR)
3083 return TFHAR_SPR;
3084 else if (code == RS6000_BIF_GET_TFIAR
3085 || code == RS6000_BIF_SET_TFIAR)
3086 return TFIAR_SPR;
3087 else if (code == RS6000_BIF_GET_TEXASR
3088 || code == RS6000_BIF_SET_TEXASR)
3089 return TEXASR_SPR;
3090 gcc_assert (code == RS6000_BIF_GET_TEXASRU
3091 || code == RS6000_BIF_SET_TEXASRU);
3092 return TEXASRU_SPR;
3093 }
3094
3095 /* Expand the HTM builtin in EXP and store the result in TARGET.
3096 Return the expanded rtx. */
3097 static rtx
3098 htm_expand_builtin (bifdata *bifaddr, rs6000_gen_builtins fcode,
3099 tree exp, rtx target)
3100 {
3101 if (!TARGET_POWERPC64
3102 && (fcode == RS6000_BIF_TABORTDC
3103 || fcode == RS6000_BIF_TABORTDCI))
3104 {
3105 error ("builtin %qs is only valid in 64-bit mode", bifaddr->bifname);
3106 return const0_rtx;
3107 }
3108
3109 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
3110 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
3111 bool uses_spr = bif_is_htmspr (*bifaddr);
3112 insn_code icode = bifaddr->icode;
3113
3114 if (uses_spr)
3115 icode = rs6000_htm_spr_icode (nonvoid);
3116
3117 rtx op[MAX_HTM_OPERANDS];
3118 int nopnds = 0;
3119 const insn_operand_data *insn_op = &insn_data[icode].operand[0];
3120
3121 if (nonvoid)
3122 {
3123 machine_mode tmode = (uses_spr) ? insn_op->mode : E_SImode;
3124 if (!target
3125 || GET_MODE (target) != tmode
3126 || (uses_spr && !insn_op->predicate (target, tmode)))
3127 target = gen_reg_rtx (tmode);
3128 if (uses_spr)
3129 op[nopnds++] = target;
3130 }
3131
3132 tree arg;
3133 call_expr_arg_iterator iter;
3134
3135 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
3136 {
3137 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
3138 return const0_rtx;
3139
3140 insn_op = &insn_data[icode].operand[nopnds];
3141 op[nopnds] = expand_normal (arg);
3142
3143 if (!insn_op->predicate (op[nopnds], insn_op->mode))
3144 {
3145 /* TODO: This use of constraints could use explanation.
3146 This happens a couple of places, perhaps make that a
3147 function to document what's happening. */
3148 if (!strcmp (insn_op->constraint, "n"))
3149 {
3150 int arg_num = nonvoid ? nopnds : nopnds + 1;
3151 if (!CONST_INT_P (op[nopnds]))
3152 error ("argument %d must be an unsigned literal", arg_num);
3153 else
3154 error ("argument %d is an unsigned literal that is "
3155 "out of range", arg_num);
3156 return const0_rtx;
3157 }
3158 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
3159 }
3160
3161 nopnds++;
3162 }
3163
3164 /* Handle the builtins for extended mnemonics. These accept
3165 no arguments, but map to builtins that take arguments. */
3166 switch (fcode)
3167 {
3168 case RS6000_BIF_TENDALL: /* Alias for: tend. 1 */
3169 case RS6000_BIF_TRESUME: /* Alias for: tsr. 1 */
3170 op[nopnds++] = GEN_INT (1);
3171 break;
3172 case RS6000_BIF_TSUSPEND: /* Alias for: tsr. 0 */
3173 op[nopnds++] = GEN_INT (0);
3174 break;
3175 default:
3176 break;
3177 }
3178
3179 /* If this builtin accesses SPRs, then pass in the appropriate
3180 SPR number and SPR regno as the last two operands. */
3181 rtx cr = NULL_RTX;
3182 if (uses_spr)
3183 {
3184 machine_mode mode = TARGET_POWERPC64 ? DImode : SImode;
3185 op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode));
3186 }
3187 /* If this builtin accesses a CR field, then pass in a scratch
3188 CR field as the last operand. */
3189 else if (bif_is_htmcr (*bifaddr))
3190 {
3191 cr = gen_reg_rtx (CCmode);
3192 op[nopnds++] = cr;
3193 }
3194
3195 rtx pat;
3196 switch (nopnds)
3197 {
3198 case 1:
3199 pat = GEN_FCN (icode) (op[0]);
3200 break;
3201 case 2:
3202 pat = GEN_FCN (icode) (op[0], op[1]);
3203 break;
3204 case 3:
3205 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
3206 break;
3207 case 4:
3208 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
3209 break;
3210 default:
3211 gcc_unreachable ();
3212 }
3213 if (!pat)
3214 return NULL_RTX;
3215 emit_insn (pat);
3216
3217 if (bif_is_htmcr (*bifaddr))
3218 {
3219 if (fcode == RS6000_BIF_TBEGIN)
3220 {
3221 /* Emit code to set TARGET to true or false depending on
3222 whether the tbegin. instruction succeeded or failed
3223 to start a transaction. We do this by placing the 1's
3224 complement of CR's EQ bit into TARGET. */
3225 rtx scratch = gen_reg_rtx (SImode);
3226 emit_insn (gen_rtx_SET (scratch,
3227 gen_rtx_EQ (SImode, cr,
3228 const0_rtx)));
3229 emit_insn (gen_rtx_SET (target,
3230 gen_rtx_XOR (SImode, scratch,
3231 GEN_INT (1))));
3232 }
3233 else
3234 {
3235 /* Emit code to copy the 4-bit condition register field
3236 CR into the least significant end of register TARGET. */
3237 rtx scratch1 = gen_reg_rtx (SImode);
3238 rtx scratch2 = gen_reg_rtx (SImode);
3239 rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0);
3240 emit_insn (gen_movcc (subreg, cr));
3241 emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28)));
3242 emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf)));
3243 }
3244 }
3245
3246 if (nonvoid)
3247 return target;
3248 return const0_rtx;
3249 }
3250
3251 /* Expand an expression EXP that calls a built-in function,
3252 with result going to TARGET if that's convenient
3253 (and in mode MODE if that's convenient).
3254 SUBTARGET may be used as the target for computing one of EXP's operands.
3255 IGNORE is nonzero if the value is to be ignored.
3256 Use the new builtin infrastructure. */
3257 rtx
3258 rs6000_expand_builtin (tree exp, rtx target, rtx /* subtarget */,
3259 machine_mode /* mode */, int ignore)
3260 {
3261 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
3262 enum rs6000_gen_builtins fcode
3263 = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl);
3264
3265 /* Emit error message if it's an unresolved overloaded builtin. */
3266 if (fcode > RS6000_OVLD_NONE)
3267 {
3268 error ("unresolved overload for builtin %qF", fndecl);
3269 return const0_rtx;
3270 }
3271
3272 size_t uns_fcode = (size_t)fcode;
3273 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
3274
3275 /* TODO: The following commentary and code is inherited from the original
3276 builtin processing code. The commentary is a bit confusing, with the
3277 intent being that KFmode is always IEEE-128, IFmode is always IBM
3278 double-double, and TFmode is the current long double. The code is
3279 confusing in that it converts from KFmode to TFmode pattern names,
3280 when the other direction is more intuitive. Try to address this. */
3281
3282 /* We have two different modes (KFmode, TFmode) that are the IEEE
3283 128-bit floating point type, depending on whether long double is the
3284 IBM extended double (KFmode) or long double is IEEE 128-bit (TFmode).
3285 It is simpler if we only define one variant of the built-in function,
3286 and switch the code when defining it, rather than defining two built-
3287 ins and using the overload table in rs6000-c.cc to switch between the
3288 two. If we don't have the proper assembler, don't do this switch
3289 because CODE_FOR_*kf* and CODE_FOR_*tf* will be CODE_FOR_nothing. */
3290 if (FLOAT128_IEEE_P (TFmode))
3291 switch (icode)
3292 {
3293 case CODE_FOR_sqrtkf2_odd:
3294 icode = CODE_FOR_sqrttf2_odd;
3295 break;
3296 case CODE_FOR_trunckfdf2_odd:
3297 icode = CODE_FOR_trunctfdf2_odd;
3298 break;
3299 case CODE_FOR_addkf3_odd:
3300 icode = CODE_FOR_addtf3_odd;
3301 break;
3302 case CODE_FOR_subkf3_odd:
3303 icode = CODE_FOR_subtf3_odd;
3304 break;
3305 case CODE_FOR_mulkf3_odd:
3306 icode = CODE_FOR_multf3_odd;
3307 break;
3308 case CODE_FOR_divkf3_odd:
3309 icode = CODE_FOR_divtf3_odd;
3310 break;
3311 case CODE_FOR_fmakf4_odd:
3312 icode = CODE_FOR_fmatf4_odd;
3313 break;
3314 case CODE_FOR_xsxexpqp_kf:
3315 icode = CODE_FOR_xsxexpqp_tf;
3316 break;
3317 case CODE_FOR_xsxsigqp_kf:
3318 icode = CODE_FOR_xsxsigqp_tf;
3319 break;
3320 case CODE_FOR_xststdcnegqp_kf:
3321 icode = CODE_FOR_xststdcnegqp_tf;
3322 break;
3323 case CODE_FOR_xsiexpqp_kf:
3324 icode = CODE_FOR_xsiexpqp_tf;
3325 break;
3326 case CODE_FOR_xsiexpqpf_kf:
3327 icode = CODE_FOR_xsiexpqpf_tf;
3328 break;
3329 case CODE_FOR_xststdcqp_kf:
3330 icode = CODE_FOR_xststdcqp_tf;
3331 break;
3332 case CODE_FOR_xscmpexpqp_eq_kf:
3333 icode = CODE_FOR_xscmpexpqp_eq_tf;
3334 break;
3335 case CODE_FOR_xscmpexpqp_lt_kf:
3336 icode = CODE_FOR_xscmpexpqp_lt_tf;
3337 break;
3338 case CODE_FOR_xscmpexpqp_gt_kf:
3339 icode = CODE_FOR_xscmpexpqp_gt_tf;
3340 break;
3341 case CODE_FOR_xscmpexpqp_unordered_kf:
3342 icode = CODE_FOR_xscmpexpqp_unordered_tf;
3343 break;
3344 default:
3345 break;
3346 }
3347
3348 /* In case of "#pragma target" changes, we initialize all builtins
3349 but check for actual availability now, during expand time. For
3350 invalid builtins, generate a normal call. */
3351 bifdata *bifaddr = &rs6000_builtin_info[uns_fcode];
3352
3353 if (!rs6000_builtin_is_supported (fcode))
3354 {
3355 rs6000_invalid_builtin (fcode);
3356 return expand_call (exp, target, ignore);
3357 }
3358
3359 if (bif_is_nosoft (*bifaddr)
3360 && rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
3361 {
3362 error ("%qs not supported with %<-msoft-float%>",
3363 bifaddr->bifname);
3364 return const0_rtx;
3365 }
3366
3367 if (bif_is_no32bit (*bifaddr) && TARGET_32BIT)
3368 {
3369 error ("%qs is not supported in 32-bit mode", bifaddr->bifname);
3370 return const0_rtx;
3371 }
3372
3373 if (bif_is_ibmld (*bifaddr) && !FLOAT128_2REG_P (TFmode))
3374 {
3375 error ("%qs requires %<long double%> to be IBM 128-bit format",
3376 bifaddr->bifname);
3377 return const0_rtx;
3378 }
3379
3380 if (bif_is_ibm128 (*bifaddr) && !ibm128_float_type_node)
3381 {
3382 error ("%qs requires %<__ibm128%> type support",
3383 bifaddr->bifname);
3384 return const0_rtx;
3385 }
3386
3387 if (bif_is_cpu (*bifaddr))
3388 return cpu_expand_builtin (fcode, exp, target);
3389
3390 if (bif_is_init (*bifaddr))
3391 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
3392
3393 if (bif_is_set (*bifaddr))
3394 return altivec_expand_vec_set_builtin (exp);
3395
3396 if (bif_is_extract (*bifaddr))
3397 return altivec_expand_vec_ext_builtin (exp, target);
3398
3399 if (bif_is_predicate (*bifaddr))
3400 return altivec_expand_predicate_builtin (icode, exp, target);
3401
3402 if (bif_is_htm (*bifaddr))
3403 return htm_expand_builtin (bifaddr, fcode, exp, target);
3404
3405 if (bif_is_32bit (*bifaddr) && TARGET_32BIT)
3406 {
3407 if (fcode == RS6000_BIF_MFTB)
3408 icode = CODE_FOR_rs6000_mftb_si;
3409 else if (fcode == RS6000_BIF_BPERMD)
3410 icode = CODE_FOR_bpermd_si;
3411 else if (fcode == RS6000_BIF_DARN)
3412 icode = CODE_FOR_darn_64_si;
3413 else if (fcode == RS6000_BIF_DARN_32)
3414 icode = CODE_FOR_darn_32_si;
3415 else if (fcode == RS6000_BIF_DARN_RAW)
3416 icode = CODE_FOR_darn_raw_si;
3417 else
3418 gcc_unreachable ();
3419 }
3420
3421 if (bif_is_endian (*bifaddr) && BYTES_BIG_ENDIAN)
3422 {
3423 if (fcode == RS6000_BIF_LD_ELEMREV_V1TI)
3424 icode = CODE_FOR_vsx_load_v1ti;
3425 else if (fcode == RS6000_BIF_LD_ELEMREV_V2DF)
3426 icode = CODE_FOR_vsx_load_v2df;
3427 else if (fcode == RS6000_BIF_LD_ELEMREV_V2DI)
3428 icode = CODE_FOR_vsx_load_v2di;
3429 else if (fcode == RS6000_BIF_LD_ELEMREV_V4SF)
3430 icode = CODE_FOR_vsx_load_v4sf;
3431 else if (fcode == RS6000_BIF_LD_ELEMREV_V4SI)
3432 icode = CODE_FOR_vsx_load_v4si;
3433 else if (fcode == RS6000_BIF_LD_ELEMREV_V8HI)
3434 icode = CODE_FOR_vsx_load_v8hi;
3435 else if (fcode == RS6000_BIF_LD_ELEMREV_V16QI)
3436 icode = CODE_FOR_vsx_load_v16qi;
3437 else if (fcode == RS6000_BIF_ST_ELEMREV_V1TI)
3438 icode = CODE_FOR_vsx_store_v1ti;
3439 else if (fcode == RS6000_BIF_ST_ELEMREV_V2DF)
3440 icode = CODE_FOR_vsx_store_v2df;
3441 else if (fcode == RS6000_BIF_ST_ELEMREV_V2DI)
3442 icode = CODE_FOR_vsx_store_v2di;
3443 else if (fcode == RS6000_BIF_ST_ELEMREV_V4SF)
3444 icode = CODE_FOR_vsx_store_v4sf;
3445 else if (fcode == RS6000_BIF_ST_ELEMREV_V4SI)
3446 icode = CODE_FOR_vsx_store_v4si;
3447 else if (fcode == RS6000_BIF_ST_ELEMREV_V8HI)
3448 icode = CODE_FOR_vsx_store_v8hi;
3449 else if (fcode == RS6000_BIF_ST_ELEMREV_V16QI)
3450 icode = CODE_FOR_vsx_store_v16qi;
3451 else if (fcode == RS6000_BIF_VCLZLSBB_V16QI)
3452 icode = CODE_FOR_vclzlsbb_v16qi;
3453 else if (fcode == RS6000_BIF_VCLZLSBB_V4SI)
3454 icode = CODE_FOR_vclzlsbb_v4si;
3455 else if (fcode == RS6000_BIF_VCLZLSBB_V8HI)
3456 icode = CODE_FOR_vclzlsbb_v8hi;
3457 else if (fcode == RS6000_BIF_VCTZLSBB_V16QI)
3458 icode = CODE_FOR_vctzlsbb_v16qi;
3459 else if (fcode == RS6000_BIF_VCTZLSBB_V4SI)
3460 icode = CODE_FOR_vctzlsbb_v4si;
3461 else if (fcode == RS6000_BIF_VCTZLSBB_V8HI)
3462 icode = CODE_FOR_vctzlsbb_v8hi;
3463 else
3464 gcc_unreachable ();
3465 }
3466
3467 if (bif_is_ibm128 (*bifaddr) && TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
3468 {
3469 if (fcode == RS6000_BIF_PACK_IF)
3470 {
3471 icode = CODE_FOR_packtf;
3472 fcode = RS6000_BIF_PACK_TF;
3473 uns_fcode = (size_t) fcode;
3474 }
3475 else if (fcode == RS6000_BIF_UNPACK_IF)
3476 {
3477 icode = CODE_FOR_unpacktf;
3478 fcode = RS6000_BIF_UNPACK_TF;
3479 uns_fcode = (size_t) fcode;
3480 }
3481 }
3482
3483 /* TRUE iff the built-in function returns void. */
3484 bool void_func = TREE_TYPE (TREE_TYPE (fndecl)) == void_type_node;
3485 /* Position of first argument (0 for void-returning functions, else 1). */
3486 int k;
3487 /* Modes for the return value, if any, and arguments. */
3488 const int MAX_BUILTIN_ARGS = 6;
3489 machine_mode mode[MAX_BUILTIN_ARGS + 1];
3490
3491 if (void_func)
3492 k = 0;
3493 else
3494 {
3495 k = 1;
3496 mode[0] = insn_data[icode].operand[0].mode;
3497 }
3498
3499 /* Tree expressions for each argument. */
3500 tree arg[MAX_BUILTIN_ARGS];
3501 /* RTL expressions for each argument. */
3502 rtx op[MAX_BUILTIN_ARGS];
3503
3504 int nargs = bifaddr->nargs;
3505 gcc_assert (nargs <= MAX_BUILTIN_ARGS);
3506
3507
3508 for (int i = 0; i < nargs; i++)
3509 {
3510 arg[i] = CALL_EXPR_ARG (exp, i);
3511 if (arg[i] == error_mark_node)
3512 return const0_rtx;
3513 STRIP_NOPS (arg[i]);
3514 op[i] = expand_normal (arg[i]);
3515 /* We have a couple of pesky patterns that don't specify the mode... */
3516 mode[i+k] = insn_data[icode].operand[i+k].mode;
3517 if (!mode[i+k])
3518 mode[i+k] = Pmode;
3519 }
3520
3521 /* Check for restricted constant arguments. */
3522 for (int i = 0; i < 2; i++)
3523 {
3524 switch (bifaddr->restr[i])
3525 {
3526 case RES_BITS:
3527 {
3528 size_t mask = 1;
3529 mask <<= bifaddr->restr_val1[i];
3530 mask--;
3531 tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
3532 STRIP_NOPS (restr_arg);
3533 if (!(TREE_CODE (restr_arg) == INTEGER_CST
3534 && (TREE_INT_CST_LOW (restr_arg) & ~mask) == 0))
3535 {
3536 unsigned p = (1U << bifaddr->restr_val1[i]) - 1;
3537 error ("argument %d must be a literal between 0 and %d,"
3538 " inclusive",
3539 bifaddr->restr_opnd[i], p);
3540 return CONST0_RTX (mode[0]);
3541 }
3542 break;
3543 }
3544 case RES_RANGE:
3545 {
3546 tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
3547 STRIP_NOPS (restr_arg);
3548 if (!(TREE_CODE (restr_arg) == INTEGER_CST
3549 && IN_RANGE (tree_to_shwi (restr_arg),
3550 bifaddr->restr_val1[i],
3551 bifaddr->restr_val2[i])))
3552 {
3553 error ("argument %d must be a literal between %d and %d,"
3554 " inclusive",
3555 bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
3556 bifaddr->restr_val2[i]);
3557 return CONST0_RTX (mode[0]);
3558 }
3559 break;
3560 }
3561 case RES_VAR_RANGE:
3562 {
3563 tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
3564 STRIP_NOPS (restr_arg);
3565 if (TREE_CODE (restr_arg) == INTEGER_CST
3566 && !IN_RANGE (tree_to_shwi (restr_arg),
3567 bifaddr->restr_val1[i],
3568 bifaddr->restr_val2[i]))
3569 {
3570 error ("argument %d must be a variable or a literal "
3571 "between %d and %d, inclusive",
3572 bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
3573 bifaddr->restr_val2[i]);
3574 return CONST0_RTX (mode[0]);
3575 }
3576 break;
3577 }
3578 case RES_VALUES:
3579 {
3580 tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
3581 STRIP_NOPS (restr_arg);
3582 if (!(TREE_CODE (restr_arg) == INTEGER_CST
3583 && (tree_to_shwi (restr_arg) == bifaddr->restr_val1[i]
3584 || tree_to_shwi (restr_arg) == bifaddr->restr_val2[i])))
3585 {
3586 error ("argument %d must be either a literal %d or a "
3587 "literal %d",
3588 bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
3589 bifaddr->restr_val2[i]);
3590 return CONST0_RTX (mode[0]);
3591 }
3592 break;
3593 }
3594 default:
3595 case RES_NONE:
3596 break;
3597 }
3598 }
3599
3600 if (bif_is_ldstmask (*bifaddr))
3601 return rs6000_expand_ldst_mask (target, arg[0]);
3602
3603 if (bif_is_stvec (*bifaddr))
3604 {
3605 if (bif_is_reve (*bifaddr))
3606 icode = elemrev_icode (fcode);
3607 return stv_expand_builtin (icode, op, mode[0], mode[1]);
3608 }
3609
3610 if (bif_is_ldvec (*bifaddr))
3611 {
3612 if (bif_is_reve (*bifaddr))
3613 icode = elemrev_icode (fcode);
3614 return ldv_expand_builtin (target, icode, op, mode[0]);
3615 }
3616
3617 if (bif_is_lxvrse (*bifaddr))
3618 return lxvrse_expand_builtin (target, icode, op, mode[0], mode[1]);
3619
3620 if (bif_is_lxvrze (*bifaddr))
3621 return lxvrze_expand_builtin (target, icode, op, mode[0], mode[1]);
3622
3623 if (bif_is_mma (*bifaddr))
3624 return mma_expand_builtin (exp, target, icode, fcode);
3625
3626 if (TREE_TYPE (TREE_TYPE (fndecl)) == void_type_node)
3627 target = NULL_RTX;
3628 else if (target == 0
3629 || GET_MODE (target) != mode[0]
3630 || !insn_data[icode].operand[0].predicate (target, mode[0]))
3631 target = gen_reg_rtx (mode[0]);
3632
3633 for (int i = 0; i < nargs; i++)
3634 if (!insn_data[icode].operand[i+k].predicate (op[i], mode[i+k]))
3635 op[i] = copy_to_mode_reg (mode[i+k], op[i]);
3636
3637 rtx pat;
3638
3639 switch (nargs)
3640 {
3641 case 0:
3642 pat = (void_func
3643 ? GEN_FCN (icode) ()
3644 : GEN_FCN (icode) (target));
3645 break;
3646 case 1:
3647 pat = (void_func
3648 ? GEN_FCN (icode) (op[0])
3649 : GEN_FCN (icode) (target, op[0]));
3650 break;
3651 case 2:
3652 pat = (void_func
3653 ? GEN_FCN (icode) (op[0], op[1])
3654 : GEN_FCN (icode) (target, op[0], op[1]));
3655 break;
3656 case 3:
3657 pat = (void_func
3658 ? GEN_FCN (icode) (op[0], op[1], op[2])
3659 : GEN_FCN (icode) (target, op[0], op[1], op[2]));
3660 break;
3661 case 4:
3662 pat = (void_func
3663 ? GEN_FCN (icode) (op[0], op[1], op[2], op[3])
3664 : GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]));
3665 break;
3666 case 5:
3667 pat = (void_func
3668 ? GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4])
3669 : GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]));
3670 break;
3671 case 6:
3672 pat = (void_func
3673 ? GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5])
3674 : GEN_FCN (icode) (target, op[0], op[1],
3675 op[2], op[3], op[4], op[5]));
3676 break;
3677 default:
3678 gcc_assert (MAX_BUILTIN_ARGS == 6);
3679 gcc_unreachable ();
3680 }
3681
3682 if (!pat)
3683 return 0;
3684
3685 emit_insn (pat);
3686 return target;
3687 }