]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/rs6000/rs6000-builtin.cc
testsuite, coroutines: Add tests for non-supension ramp returns.
[thirdparty/gcc.git] / gcc / config / rs6000 / rs6000-builtin.cc
1 /* Target-specific built-in function support for the Power architecture.
2 See also rs6000-c.c, rs6000-gen-builtins.c, rs6000-builtins.def, and
3 rs6000-overloads.def.
4 Note that "normal" builtins (generic math functions, etc.) are handled
5 in rs6000.c.
6
7 Copyright (C) 2002-2024 Free Software Foundation, Inc.
8
9 This file is part of GCC.
10
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
15
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
24
25 #define IN_TARGET_CODE 1
26
27 #include "config.h"
28 #include "system.h"
29 #include "coretypes.h"
30 #include "target.h"
31 #include "backend.h"
32 #include "rtl.h"
33 #include "tree.h"
34 #include "memmodel.h"
35 #include "gimple.h"
36 #include "tm_p.h"
37 #include "optabs.h"
38 #include "recog.h"
39 #include "diagnostic-core.h"
40 #include "fold-const.h"
41 #include "stor-layout.h"
42 #include "calls.h"
43 #include "varasm.h"
44 #include "explow.h"
45 #include "expr.h"
46 #include "langhooks.h"
47 #include "gimplify.h"
48 #include "gimple-iterator.h"
49 #include "gimple-fold.h"
50 #include "ssa.h"
51 #include "tree-ssa-propagate.h"
52 #include "builtins.h"
53 #include "tree-vector-builder.h"
54 #include "ppc-auxv.h"
55 #include "rs6000-internal.h"
56
57 /* Built in types. */
58 tree rs6000_builtin_types[RS6000_BTI_MAX];
59
60 /* Support targetm.vectorize.builtin_mask_for_load. */
61 tree altivec_builtin_mask_for_load;
62
63 /* **** General support functions **** */
64
65 /* Raise an error message for a builtin function that is called without the
66 appropriate target options being set. */
67
68 void
69 rs6000_invalid_builtin (enum rs6000_gen_builtins fncode)
70 {
71 size_t j = (size_t) fncode;
72 const char *name = rs6000_builtin_info[j].bifname;
73
74 switch (rs6000_builtin_info[j].enable)
75 {
76 case ENB_P5:
77 error ("%qs requires the %qs option", name, "-mcpu=power5");
78 break;
79 case ENB_P6:
80 error ("%qs requires the %qs option", name, "-mcpu=power6");
81 break;
82 case ENB_P6_64:
83 error ("%qs requires the %qs option and either the %qs or %qs option",
84 name, "-mcpu=power6", "-m64", "-mpowerpc64");
85 break;
86 case ENB_ALTIVEC:
87 error ("%qs requires the %qs option", name, "-maltivec");
88 break;
89 case ENB_CELL:
90 error ("%qs requires the %qs option", name, "-mcpu=cell");
91 break;
92 case ENB_VSX:
93 error ("%qs requires the %qs option", name, "-mvsx");
94 break;
95 case ENB_P7:
96 error ("%qs requires the %qs option", name, "-mcpu=power7");
97 break;
98 case ENB_P7_64:
99 error ("%qs requires the %qs option and either the %qs or %qs option",
100 name, "-mcpu=power7", "-m64", "-mpowerpc64");
101 break;
102 case ENB_P8:
103 error ("%qs requires the %qs option", name, "-mcpu=power8");
104 break;
105 case ENB_P8V:
106 error ("%qs requires the %qs and %qs options", name, "-mcpu=power8",
107 "-mvsx");
108 break;
109 case ENB_P9:
110 error ("%qs requires the %qs option", name, "-mcpu=power9");
111 break;
112 case ENB_P9_64:
113 error ("%qs requires the %qs option and either the %qs or %qs option",
114 name, "-mcpu=power9", "-m64", "-mpowerpc64");
115 break;
116 case ENB_P9V:
117 error ("%qs requires the %qs and %qs options", name, "-mcpu=power9",
118 "-mvsx");
119 break;
120 case ENB_IEEE128_HW:
121 error ("%qs requires quad-precision floating-point arithmetic", name);
122 break;
123 case ENB_DFP:
124 error ("%qs requires the %qs option", name, "-mhard-dfp");
125 break;
126 case ENB_CRYPTO:
127 error ("%qs requires the %qs option", name, "-mcrypto");
128 break;
129 case ENB_HTM:
130 error ("%qs requires the %qs option", name, "-mhtm");
131 break;
132 case ENB_P10:
133 error ("%qs requires the %qs option", name, "-mcpu=power10");
134 break;
135 case ENB_P10_64:
136 error ("%qs requires the %qs option and either the %qs or %qs option",
137 name, "-mcpu=power10", "-m64", "-mpowerpc64");
138 break;
139 case ENB_MMA:
140 error ("%qs requires the %qs option", name, "-mmma");
141 break;
142 default:
143 case ENB_ALWAYS:
144 gcc_unreachable ();
145 }
146 }
147
148 /* Check whether a builtin function is supported in this target
149 configuration. */
150 bool
151 rs6000_builtin_is_supported (enum rs6000_gen_builtins fncode)
152 {
153 switch (rs6000_builtin_info[(size_t) fncode].enable)
154 {
155 case ENB_ALWAYS:
156 return true;
157 case ENB_P5:
158 return TARGET_POPCNTB;
159 case ENB_P6:
160 return TARGET_CMPB;
161 case ENB_P6_64:
162 return TARGET_CMPB && TARGET_POWERPC64;
163 case ENB_P7:
164 return TARGET_POPCNTD;
165 case ENB_P7_64:
166 return TARGET_POPCNTD && TARGET_POWERPC64;
167 case ENB_P8:
168 return TARGET_POWER8;
169 case ENB_P8V:
170 return TARGET_P8_VECTOR;
171 case ENB_P9:
172 return TARGET_MODULO;
173 case ENB_P9_64:
174 return TARGET_MODULO && TARGET_POWERPC64;
175 case ENB_P9V:
176 return TARGET_P9_VECTOR;
177 case ENB_P10:
178 return TARGET_POWER10;
179 case ENB_P10_64:
180 return TARGET_POWER10 && TARGET_POWERPC64;
181 case ENB_ALTIVEC:
182 return TARGET_ALTIVEC;
183 case ENB_VSX:
184 return TARGET_VSX;
185 case ENB_CELL:
186 return TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL;
187 case ENB_IEEE128_HW:
188 return TARGET_FLOAT128_HW;
189 case ENB_DFP:
190 return TARGET_DFP;
191 case ENB_CRYPTO:
192 return TARGET_CRYPTO;
193 case ENB_HTM:
194 return TARGET_HTM;
195 case ENB_MMA:
196 return TARGET_MMA;
197 default:
198 gcc_unreachable ();
199 }
200 gcc_unreachable ();
201 }
202
203 /* Target hook for early folding of built-ins, shamelessly stolen
204 from ia64.cc. */
205
206 tree
207 rs6000_fold_builtin (tree fndecl ATTRIBUTE_UNUSED,
208 int n_args ATTRIBUTE_UNUSED,
209 tree *args ATTRIBUTE_UNUSED,
210 bool ignore ATTRIBUTE_UNUSED)
211 {
212 #ifdef SUBTARGET_FOLD_BUILTIN
213 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
214 #else
215 return NULL_TREE;
216 #endif
217 }
218
219 tree
220 rs6000_builtin_decl (unsigned code, bool /* initialize_p */)
221 {
222 rs6000_gen_builtins fcode = (rs6000_gen_builtins) code;
223
224 if (fcode >= RS6000_OVLD_MAX)
225 return error_mark_node;
226
227 return rs6000_builtin_decls[code];
228 }
229
230 /* Implement targetm.vectorize.builtin_mask_for_load. */
231 tree
232 rs6000_builtin_mask_for_load (void)
233 {
234 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
235 if ((TARGET_ALTIVEC && !TARGET_VSX)
236 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
237 return altivec_builtin_mask_for_load;
238 else
239 return 0;
240 }
241
242 /* Implement targetm.vectorize.builtin_md_vectorized_function. */
243
244 tree
245 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
246 tree type_in)
247 {
248 machine_mode in_mode, out_mode;
249 int in_n, out_n;
250
251 if (TARGET_DEBUG_BUILTIN)
252 fprintf (stderr,
253 "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
254 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
255 GET_MODE_NAME (TYPE_MODE (type_out)),
256 GET_MODE_NAME (TYPE_MODE (type_in)));
257
258 /* TODO: Should this be gcc_assert? */
259 if (TREE_CODE (type_out) != VECTOR_TYPE
260 || TREE_CODE (type_in) != VECTOR_TYPE)
261 return NULL_TREE;
262
263 out_mode = TYPE_MODE (TREE_TYPE (type_out));
264 out_n = TYPE_VECTOR_SUBPARTS (type_out);
265 in_mode = TYPE_MODE (TREE_TYPE (type_in));
266 in_n = TYPE_VECTOR_SUBPARTS (type_in);
267
268 enum rs6000_gen_builtins fn
269 = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl);
270 switch (fn)
271 {
272 case RS6000_BIF_RSQRTF:
273 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
274 && out_mode == SFmode && out_n == 4
275 && in_mode == SFmode && in_n == 4)
276 return rs6000_builtin_decls[RS6000_BIF_VRSQRTFP];
277 break;
278 case RS6000_BIF_RSQRT:
279 if (VECTOR_UNIT_VSX_P (V2DFmode)
280 && out_mode == DFmode && out_n == 2
281 && in_mode == DFmode && in_n == 2)
282 return rs6000_builtin_decls[RS6000_BIF_RSQRT_2DF];
283 break;
284 case RS6000_BIF_RECIPF:
285 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
286 && out_mode == SFmode && out_n == 4
287 && in_mode == SFmode && in_n == 4)
288 return rs6000_builtin_decls[RS6000_BIF_VRECIPFP];
289 break;
290 case RS6000_BIF_RECIP:
291 if (VECTOR_UNIT_VSX_P (V2DFmode)
292 && out_mode == DFmode && out_n == 2
293 && in_mode == DFmode && in_n == 2)
294 return rs6000_builtin_decls[RS6000_BIF_RECIP_V2DF];
295 break;
296 default:
297 break;
298 }
299
300 machine_mode in_vmode = TYPE_MODE (type_in);
301 machine_mode out_vmode = TYPE_MODE (type_out);
302
303 /* Power10 supported vectorized built-in functions. */
304 if (TARGET_POWER10
305 && in_vmode == out_vmode
306 && VECTOR_UNIT_ALTIVEC_OR_VSX_P (in_vmode))
307 {
308 machine_mode exp_mode = DImode;
309 machine_mode exp_vmode = V2DImode;
310 enum rs6000_gen_builtins bif;
311 switch (fn)
312 {
313 case RS6000_BIF_DIVWE:
314 case RS6000_BIF_DIVWEU:
315 exp_mode = SImode;
316 exp_vmode = V4SImode;
317 if (fn == RS6000_BIF_DIVWE)
318 bif = RS6000_BIF_VDIVESW;
319 else
320 bif = RS6000_BIF_VDIVEUW;
321 break;
322 case RS6000_BIF_DIVDE:
323 case RS6000_BIF_DIVDEU:
324 if (fn == RS6000_BIF_DIVDE)
325 bif = RS6000_BIF_VDIVESD;
326 else
327 bif = RS6000_BIF_VDIVEUD;
328 break;
329 case RS6000_BIF_CFUGED:
330 bif = RS6000_BIF_VCFUGED;
331 break;
332 case RS6000_BIF_CNTLZDM:
333 bif = RS6000_BIF_VCLZDM;
334 break;
335 case RS6000_BIF_CNTTZDM:
336 bif = RS6000_BIF_VCTZDM;
337 break;
338 case RS6000_BIF_PDEPD:
339 bif = RS6000_BIF_VPDEPD;
340 break;
341 case RS6000_BIF_PEXTD:
342 bif = RS6000_BIF_VPEXTD;
343 break;
344 default:
345 return NULL_TREE;
346 }
347
348 if (in_mode == exp_mode && in_vmode == exp_vmode)
349 return rs6000_builtin_decls[bif];
350 }
351
352 return NULL_TREE;
353 }
354
355 /* Returns a code for a target-specific builtin that implements
356 reciprocal of the function, or NULL_TREE if not available. */
357
358 tree
359 rs6000_builtin_reciprocal (tree fndecl)
360 {
361 switch (DECL_MD_FUNCTION_CODE (fndecl))
362 {
363 case RS6000_BIF_XVSQRTDP:
364 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
365 return NULL_TREE;
366
367 return rs6000_builtin_decls[RS6000_BIF_RSQRT_2DF];
368
369 case RS6000_BIF_XVSQRTSP:
370 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
371 return NULL_TREE;
372
373 return rs6000_builtin_decls[RS6000_BIF_RSQRT_4SF];
374
375 default:
376 return NULL_TREE;
377 }
378 }
379
380 /* **** Initialization support **** */
381
382 /* Create a builtin vector type with a name. Taking care not to give
383 the canonical type a name. */
384
385 static tree
386 rs6000_vector_type (const char *name, tree elt_type, unsigned num_elts)
387 {
388 tree result = build_vector_type (elt_type, num_elts);
389
390 /* Copy so we don't give the canonical type a name. */
391 result = build_variant_type_copy (result);
392
393 add_builtin_type (name, result);
394
395 return result;
396 }
397
398 /* Debug utility to translate a type node to a single textual token. */
399 static
400 const char *rs6000_type_string (tree type_node)
401 {
402 if (type_node == NULL_TREE)
403 return "**NULL**";
404 else if (type_node == void_type_node)
405 return "void";
406 else if (type_node == long_integer_type_node)
407 return "long";
408 else if (type_node == long_unsigned_type_node)
409 return "ulong";
410 else if (type_node == long_long_integer_type_node)
411 return "longlong";
412 else if (type_node == long_long_unsigned_type_node)
413 return "ulonglong";
414 else if (type_node == bool_V2DI_type_node)
415 return "vbll";
416 else if (type_node == bool_V4SI_type_node)
417 return "vbi";
418 else if (type_node == bool_V8HI_type_node)
419 return "vbs";
420 else if (type_node == bool_V16QI_type_node)
421 return "vbc";
422 else if (type_node == bool_int_type_node)
423 return "bool";
424 else if (type_node == dfloat64_type_node)
425 return "_Decimal64";
426 else if (type_node == double_type_node)
427 return "double";
428 else if (type_node == intDI_type_node)
429 return "sll";
430 else if (type_node == intHI_type_node)
431 return "ss";
432 else if (type_node == ibm128_float_type_node)
433 return "__ibm128";
434 else if (type_node == ieee128_float_type_node)
435 return "__ieee128";
436 else if (type_node == opaque_V4SI_type_node)
437 return "opaque";
438 else if (POINTER_TYPE_P (type_node))
439 return "void*";
440 else if (type_node == intQI_type_node || type_node == char_type_node)
441 return "sc";
442 else if (type_node == dfloat32_type_node)
443 return "_Decimal32";
444 else if (type_node == float_type_node)
445 return "float";
446 else if (type_node == intSI_type_node || type_node == integer_type_node)
447 return "si";
448 else if (type_node == dfloat128_type_node)
449 return "_Decimal128";
450 else if (type_node == long_double_type_node)
451 return "longdouble";
452 else if (type_node == intTI_type_node)
453 return "sq";
454 else if (type_node == unsigned_intDI_type_node)
455 return "ull";
456 else if (type_node == unsigned_intHI_type_node)
457 return "us";
458 else if (type_node == unsigned_intQI_type_node)
459 return "uc";
460 else if (type_node == unsigned_intSI_type_node)
461 return "ui";
462 else if (type_node == unsigned_intTI_type_node)
463 return "uq";
464 else if (type_node == unsigned_V1TI_type_node)
465 return "vuq";
466 else if (type_node == unsigned_V2DI_type_node)
467 return "vull";
468 else if (type_node == unsigned_V4SI_type_node)
469 return "vui";
470 else if (type_node == unsigned_V8HI_type_node)
471 return "vus";
472 else if (type_node == unsigned_V16QI_type_node)
473 return "vuc";
474 else if (type_node == V16QI_type_node)
475 return "vsc";
476 else if (type_node == V1TI_type_node)
477 return "vsq";
478 else if (type_node == V2DF_type_node)
479 return "vd";
480 else if (type_node == V2DI_type_node)
481 return "vsll";
482 else if (type_node == V4SF_type_node)
483 return "vf";
484 else if (type_node == V4SI_type_node)
485 return "vsi";
486 else if (type_node == V8HI_type_node)
487 return "vss";
488 else if (type_node == pixel_V8HI_type_node)
489 return "vp";
490 else if (type_node == pcvoid_type_node)
491 return "voidc*";
492 else if (type_node == float128_type_node)
493 return "_Float128";
494 else if (type_node == vector_pair_type_node)
495 return "__vector_pair";
496 else if (type_node == vector_quad_type_node)
497 return "__vector_quad";
498
499 return "unknown";
500 }
501
502 void
503 rs6000_init_builtins (void)
504 {
505 tree tdecl;
506 tree t;
507
508 if (TARGET_DEBUG_BUILTIN)
509 fprintf (stderr, "rs6000_init_builtins%s%s\n",
510 (TARGET_ALTIVEC) ? ", altivec" : "",
511 (TARGET_VSX) ? ", vsx" : "");
512
513 V2DI_type_node = rs6000_vector_type ("__vector long long",
514 long_long_integer_type_node, 2);
515 ptr_V2DI_type_node
516 = build_pointer_type (build_qualified_type (V2DI_type_node,
517 TYPE_QUAL_CONST));
518
519 V2DF_type_node = rs6000_vector_type ("__vector double", double_type_node, 2);
520 ptr_V2DF_type_node
521 = build_pointer_type (build_qualified_type (V2DF_type_node,
522 TYPE_QUAL_CONST));
523
524 V4SI_type_node = rs6000_vector_type ("__vector signed int",
525 intSI_type_node, 4);
526 ptr_V4SI_type_node
527 = build_pointer_type (build_qualified_type (V4SI_type_node,
528 TYPE_QUAL_CONST));
529
530 V4SF_type_node = rs6000_vector_type ("__vector float", float_type_node, 4);
531 ptr_V4SF_type_node
532 = build_pointer_type (build_qualified_type (V4SF_type_node,
533 TYPE_QUAL_CONST));
534
535 V8HI_type_node = rs6000_vector_type ("__vector signed short",
536 intHI_type_node, 8);
537 ptr_V8HI_type_node
538 = build_pointer_type (build_qualified_type (V8HI_type_node,
539 TYPE_QUAL_CONST));
540
541 V16QI_type_node = rs6000_vector_type ("__vector signed char",
542 intQI_type_node, 16);
543 ptr_V16QI_type_node
544 = build_pointer_type (build_qualified_type (V16QI_type_node,
545 TYPE_QUAL_CONST));
546
547 unsigned_V16QI_type_node = rs6000_vector_type ("__vector unsigned char",
548 unsigned_intQI_type_node, 16);
549 ptr_unsigned_V16QI_type_node
550 = build_pointer_type (build_qualified_type (unsigned_V16QI_type_node,
551 TYPE_QUAL_CONST));
552
553 unsigned_V8HI_type_node = rs6000_vector_type ("__vector unsigned short",
554 unsigned_intHI_type_node, 8);
555 ptr_unsigned_V8HI_type_node
556 = build_pointer_type (build_qualified_type (unsigned_V8HI_type_node,
557 TYPE_QUAL_CONST));
558
559 unsigned_V4SI_type_node = rs6000_vector_type ("__vector unsigned int",
560 unsigned_intSI_type_node, 4);
561 ptr_unsigned_V4SI_type_node
562 = build_pointer_type (build_qualified_type (unsigned_V4SI_type_node,
563 TYPE_QUAL_CONST));
564
565 unsigned_V2DI_type_node
566 = rs6000_vector_type ("__vector unsigned long long",
567 long_long_unsigned_type_node, 2);
568
569 ptr_unsigned_V2DI_type_node
570 = build_pointer_type (build_qualified_type (unsigned_V2DI_type_node,
571 TYPE_QUAL_CONST));
572
573 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
574
575 const_str_type_node
576 = build_pointer_type (build_qualified_type (char_type_node,
577 TYPE_QUAL_CONST));
578
579 /* We use V1TI mode as a special container to hold __int128_t items that
580 must live in VSX registers. */
581 if (intTI_type_node)
582 {
583 V1TI_type_node = rs6000_vector_type ("__vector __int128",
584 intTI_type_node, 1);
585 ptr_V1TI_type_node
586 = build_pointer_type (build_qualified_type (V1TI_type_node,
587 TYPE_QUAL_CONST));
588 unsigned_V1TI_type_node
589 = rs6000_vector_type ("__vector unsigned __int128",
590 unsigned_intTI_type_node, 1);
591 ptr_unsigned_V1TI_type_node
592 = build_pointer_type (build_qualified_type (unsigned_V1TI_type_node,
593 TYPE_QUAL_CONST));
594 }
595
596 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
597 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
598 'vector unsigned short'. */
599
600 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
601 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
602 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
603 bool_long_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
604 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
605
606 long_integer_type_internal_node = long_integer_type_node;
607 long_unsigned_type_internal_node = long_unsigned_type_node;
608 long_long_integer_type_internal_node = long_long_integer_type_node;
609 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
610 intQI_type_internal_node = intQI_type_node;
611 uintQI_type_internal_node = unsigned_intQI_type_node;
612 intHI_type_internal_node = intHI_type_node;
613 uintHI_type_internal_node = unsigned_intHI_type_node;
614 intSI_type_internal_node = intSI_type_node;
615 uintSI_type_internal_node = unsigned_intSI_type_node;
616 intDI_type_internal_node = intDI_type_node;
617 uintDI_type_internal_node = unsigned_intDI_type_node;
618 intTI_type_internal_node = intTI_type_node;
619 uintTI_type_internal_node = unsigned_intTI_type_node;
620 float_type_internal_node = float_type_node;
621 double_type_internal_node = double_type_node;
622 long_double_type_internal_node = long_double_type_node;
623 dfloat64_type_internal_node = dfloat64_type_node;
624 dfloat128_type_internal_node = dfloat128_type_node;
625 void_type_internal_node = void_type_node;
626
627 ptr_intQI_type_node
628 = build_pointer_type (build_qualified_type (intQI_type_internal_node,
629 TYPE_QUAL_CONST));
630 ptr_uintQI_type_node
631 = build_pointer_type (build_qualified_type (uintQI_type_internal_node,
632 TYPE_QUAL_CONST));
633 ptr_intHI_type_node
634 = build_pointer_type (build_qualified_type (intHI_type_internal_node,
635 TYPE_QUAL_CONST));
636 ptr_uintHI_type_node
637 = build_pointer_type (build_qualified_type (uintHI_type_internal_node,
638 TYPE_QUAL_CONST));
639 ptr_intSI_type_node
640 = build_pointer_type (build_qualified_type (intSI_type_internal_node,
641 TYPE_QUAL_CONST));
642 ptr_uintSI_type_node
643 = build_pointer_type (build_qualified_type (uintSI_type_internal_node,
644 TYPE_QUAL_CONST));
645 ptr_intDI_type_node
646 = build_pointer_type (build_qualified_type (intDI_type_internal_node,
647 TYPE_QUAL_CONST));
648 ptr_uintDI_type_node
649 = build_pointer_type (build_qualified_type (uintDI_type_internal_node,
650 TYPE_QUAL_CONST));
651 ptr_intTI_type_node
652 = build_pointer_type (build_qualified_type (intTI_type_internal_node,
653 TYPE_QUAL_CONST));
654 ptr_uintTI_type_node
655 = build_pointer_type (build_qualified_type (uintTI_type_internal_node,
656 TYPE_QUAL_CONST));
657
658 t = build_qualified_type (long_integer_type_internal_node, TYPE_QUAL_CONST);
659 ptr_long_integer_type_node = build_pointer_type (t);
660
661 t = build_qualified_type (long_unsigned_type_internal_node, TYPE_QUAL_CONST);
662 ptr_long_unsigned_type_node = build_pointer_type (t);
663
664 ptr_float_type_node
665 = build_pointer_type (build_qualified_type (float_type_internal_node,
666 TYPE_QUAL_CONST));
667 ptr_double_type_node
668 = build_pointer_type (build_qualified_type (double_type_internal_node,
669 TYPE_QUAL_CONST));
670 ptr_long_double_type_node
671 = build_pointer_type (build_qualified_type (long_double_type_internal_node,
672 TYPE_QUAL_CONST));
673 if (dfloat64_type_node)
674 {
675 t = build_qualified_type (dfloat64_type_internal_node, TYPE_QUAL_CONST);
676 ptr_dfloat64_type_node = build_pointer_type (t);
677 }
678 else
679 ptr_dfloat64_type_node = NULL;
680
681 if (dfloat128_type_node)
682 {
683 t = build_qualified_type (dfloat128_type_internal_node, TYPE_QUAL_CONST);
684 ptr_dfloat128_type_node = build_pointer_type (t);
685 }
686 else
687 ptr_dfloat128_type_node = NULL;
688
689 t = build_qualified_type (long_long_integer_type_internal_node,
690 TYPE_QUAL_CONST);
691 ptr_long_long_integer_type_node = build_pointer_type (t);
692
693 t = build_qualified_type (long_long_unsigned_type_internal_node,
694 TYPE_QUAL_CONST);
695 ptr_long_long_unsigned_type_node = build_pointer_type (t);
696
697 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
698 IFmode is the IBM extended 128-bit format that is a pair of doubles.
699 TFmode will be either IEEE 128-bit floating point or the IBM double-double
700 format that uses a pair of doubles, depending on the switches and
701 defaults.
702
703 If we don't support for either 128-bit IBM double double or IEEE 128-bit
704 floating point, we need make sure the type is non-zero or else self-test
705 fails during bootstrap.
706
707 Always create __ibm128 as a separate type, even if the current long double
708 format is IBM extended double.
709
710 For IEEE 128-bit floating point, always create the type __ieee128. If the
711 user used -mfloat128, rs6000-c.cc will create a define from __float128 to
712 __ieee128. */
713 if (TARGET_LONG_DOUBLE_128 && (!TARGET_IEEEQUAD || TARGET_FLOAT128_TYPE))
714 {
715 if (!TARGET_IEEEQUAD)
716 ibm128_float_type_node = long_double_type_node;
717 else
718 {
719 ibm128_float_type_node = make_node (REAL_TYPE);
720 TYPE_PRECISION (ibm128_float_type_node) = 128;
721 SET_TYPE_MODE (ibm128_float_type_node, IFmode);
722 layout_type (ibm128_float_type_node);
723 }
724 t = build_qualified_type (ibm128_float_type_node, TYPE_QUAL_CONST);
725 lang_hooks.types.register_builtin_type (ibm128_float_type_node,
726 "__ibm128");
727 }
728 else
729 ibm128_float_type_node = NULL_TREE;
730
731 if (TARGET_FLOAT128_TYPE)
732 {
733 if (TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128)
734 ieee128_float_type_node = long_double_type_node;
735 else
736 {
737 /* For C we only need to register the __ieee128 name for
738 it. For C++, we create a distinct type which will mangle
739 differently (u9__ieee128) vs. _Float128 (DF128_) and behave
740 backwards compatibly. */
741 if (float128t_type_node == NULL_TREE)
742 {
743 float128t_type_node = make_node (REAL_TYPE);
744 TYPE_PRECISION (float128t_type_node)
745 = TYPE_PRECISION (float128_type_node);
746 layout_type (float128t_type_node);
747 SET_TYPE_MODE (float128t_type_node,
748 TYPE_MODE (float128_type_node));
749 }
750 ieee128_float_type_node = float128t_type_node;
751 }
752 t = build_qualified_type (ieee128_float_type_node, TYPE_QUAL_CONST);
753 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
754 "__ieee128");
755 }
756 else
757 ieee128_float_type_node = NULL_TREE;
758
759 /* Vector pair and vector quad support. */
760 vector_pair_type_node = make_node (OPAQUE_TYPE);
761 SET_TYPE_MODE (vector_pair_type_node, OOmode);
762 TYPE_SIZE (vector_pair_type_node) = bitsize_int (GET_MODE_BITSIZE (OOmode));
763 TYPE_PRECISION (vector_pair_type_node) = GET_MODE_BITSIZE (OOmode);
764 TYPE_SIZE_UNIT (vector_pair_type_node) = size_int (GET_MODE_SIZE (OOmode));
765 SET_TYPE_ALIGN (vector_pair_type_node, 256);
766 TYPE_USER_ALIGN (vector_pair_type_node) = 0;
767 lang_hooks.types.register_builtin_type (vector_pair_type_node,
768 "__vector_pair");
769 t = build_qualified_type (vector_pair_type_node, TYPE_QUAL_CONST);
770 ptr_vector_pair_type_node = build_pointer_type (t);
771
772 vector_quad_type_node = make_node (OPAQUE_TYPE);
773 SET_TYPE_MODE (vector_quad_type_node, XOmode);
774 TYPE_SIZE (vector_quad_type_node) = bitsize_int (GET_MODE_BITSIZE (XOmode));
775 TYPE_PRECISION (vector_quad_type_node) = GET_MODE_BITSIZE (XOmode);
776 TYPE_SIZE_UNIT (vector_quad_type_node) = size_int (GET_MODE_SIZE (XOmode));
777 SET_TYPE_ALIGN (vector_quad_type_node, 512);
778 TYPE_USER_ALIGN (vector_quad_type_node) = 0;
779 lang_hooks.types.register_builtin_type (vector_quad_type_node,
780 "__vector_quad");
781 t = build_qualified_type (vector_quad_type_node, TYPE_QUAL_CONST);
782 ptr_vector_quad_type_node = build_pointer_type (t);
783
784 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
785 TYPE_NAME (bool_char_type_node) = tdecl;
786
787 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
788 TYPE_NAME (bool_short_type_node) = tdecl;
789
790 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
791 TYPE_NAME (bool_int_type_node) = tdecl;
792
793 tdecl = add_builtin_type ("__pixel", pixel_type_node);
794 TYPE_NAME (pixel_type_node) = tdecl;
795
796 bool_V16QI_type_node = rs6000_vector_type ("__vector __bool char",
797 bool_char_type_node, 16);
798 ptr_bool_V16QI_type_node
799 = build_pointer_type (build_qualified_type (bool_V16QI_type_node,
800 TYPE_QUAL_CONST));
801
802 bool_V8HI_type_node = rs6000_vector_type ("__vector __bool short",
803 bool_short_type_node, 8);
804 ptr_bool_V8HI_type_node
805 = build_pointer_type (build_qualified_type (bool_V8HI_type_node,
806 TYPE_QUAL_CONST));
807
808 bool_V4SI_type_node = rs6000_vector_type ("__vector __bool int",
809 bool_int_type_node, 4);
810 ptr_bool_V4SI_type_node
811 = build_pointer_type (build_qualified_type (bool_V4SI_type_node,
812 TYPE_QUAL_CONST));
813
814 bool_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
815 ? "__vector __bool long"
816 : "__vector __bool long long",
817 bool_long_long_type_node, 2);
818 ptr_bool_V2DI_type_node
819 = build_pointer_type (build_qualified_type (bool_V2DI_type_node,
820 TYPE_QUAL_CONST));
821
822 bool_V1TI_type_node = rs6000_vector_type ("__vector __bool __int128",
823 intTI_type_node, 1);
824 ptr_bool_V1TI_type_node
825 = build_pointer_type (build_qualified_type (bool_V1TI_type_node,
826 TYPE_QUAL_CONST));
827
828 pixel_V8HI_type_node = rs6000_vector_type ("__vector __pixel",
829 pixel_type_node, 8);
830 ptr_pixel_V8HI_type_node
831 = build_pointer_type (build_qualified_type (pixel_V8HI_type_node,
832 TYPE_QUAL_CONST));
833 pcvoid_type_node
834 = build_pointer_type (build_qualified_type (void_type_node,
835 TYPE_QUAL_CONST));
836
837 /* Execute the autogenerated initialization code for builtins. */
838 rs6000_init_generated_builtins (rs6000_builtin_info_fntype,
839 rs6000_instance_info_fntype,
840 rs6000_overload_info,
841 rs6000_builtin_decls);
842
843 if (TARGET_DEBUG_BUILTIN)
844 {
845 fprintf (stderr, "\nAutogenerated built-in functions:\n\n");
846 for (int i = 1; i < (int) RS6000_BIF_MAX; i++)
847 {
848 enum rs6000_gen_builtins fn_code = (enum rs6000_gen_builtins) i;
849 if (!rs6000_builtin_is_supported (fn_code))
850 continue;
851 tree fntype = rs6000_builtin_info_fntype[i];
852 tree t = TREE_TYPE (fntype);
853 fprintf (stderr, "%s %s (", rs6000_type_string (t),
854 rs6000_builtin_info[i].bifname);
855 t = TYPE_ARG_TYPES (fntype);
856 while (t && TREE_VALUE (t) != void_type_node)
857 {
858 fprintf (stderr, "%s",
859 rs6000_type_string (TREE_VALUE (t)));
860 t = TREE_CHAIN (t);
861 if (t && TREE_VALUE (t) != void_type_node)
862 fprintf (stderr, ", ");
863 }
864 fprintf (stderr, "); %s [%4d]\n",
865 rs6000_builtin_info[i].attr_string, (int) i);
866 }
867 fprintf (stderr, "\nEnd autogenerated built-in functions.\n\n\n");
868 }
869
870 if (TARGET_XCOFF)
871 {
872 /* AIX libm provides clog as __clog. */
873 if ((tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
874 set_user_assembler_name (tdecl, "__clog");
875
876 /* When long double is 64 bit, some long double builtins of libc
877 functions (like __builtin_frexpl) must call the double version
878 (frexp) not the long double version (frexpl) that expects a 128 bit
879 argument. */
880 if (! TARGET_LONG_DOUBLE_128)
881 {
882 if ((tdecl = builtin_decl_explicit (BUILT_IN_FMODL)) != NULL_TREE)
883 set_user_assembler_name (tdecl, "fmod");
884 if ((tdecl = builtin_decl_explicit (BUILT_IN_FREXPL)) != NULL_TREE)
885 set_user_assembler_name (tdecl, "frexp");
886 if ((tdecl = builtin_decl_explicit (BUILT_IN_LDEXPL)) != NULL_TREE)
887 set_user_assembler_name (tdecl, "ldexp");
888 if ((tdecl = builtin_decl_explicit (BUILT_IN_MODFL)) != NULL_TREE)
889 set_user_assembler_name (tdecl, "modf");
890 }
891 }
892
893 altivec_builtin_mask_for_load
894 = rs6000_builtin_decls[RS6000_BIF_MASK_FOR_LOAD];
895
896 #ifdef SUBTARGET_INIT_BUILTINS
897 SUBTARGET_INIT_BUILTINS;
898 #endif
899
900 return;
901 }
902
903 /* **** GIMPLE folding support **** */
904
905 /* Helper function to handle the gimple folding of a vector compare
906 operation. This sets up true/false vectors, and uses the
907 VEC_COND_EXPR operation.
908 CODE indicates which comparison is to be made. (EQ, GT, ...).
909 TYPE indicates the type of the result.
910 Code is inserted before GSI. */
911 static tree
912 fold_build_vec_cmp (tree_code code, tree type, tree arg0, tree arg1,
913 gimple_stmt_iterator *gsi)
914 {
915 tree cmp_type = truth_type_for (type);
916 tree zero_vec = build_zero_cst (type);
917 tree minus_one_vec = build_minus_one_cst (type);
918 tree temp = create_tmp_reg_or_ssa_name (cmp_type);
919 gimple *g = gimple_build_assign (temp, code, arg0, arg1);
920 gsi_insert_before (gsi, g, GSI_SAME_STMT);
921 return fold_build3 (VEC_COND_EXPR, type, temp, minus_one_vec, zero_vec);
922 }
923
924 /* Helper function to handle the in-between steps for the
925 vector compare built-ins. */
926 static void
927 fold_compare_helper (gimple_stmt_iterator *gsi, tree_code code, gimple *stmt)
928 {
929 tree arg0 = gimple_call_arg (stmt, 0);
930 tree arg1 = gimple_call_arg (stmt, 1);
931 tree lhs = gimple_call_lhs (stmt);
932 tree cmp = fold_build_vec_cmp (code, TREE_TYPE (lhs), arg0, arg1, gsi);
933 gimple *g = gimple_build_assign (lhs, cmp);
934 gimple_set_location (g, gimple_location (stmt));
935 gsi_replace (gsi, g, true);
936 }
937
938 /* Helper function to map V2DF and V4SF types to their
939 integral equivalents (V2DI and V4SI). */
940 tree map_to_integral_tree_type (tree input_tree_type)
941 {
942 if (INTEGRAL_TYPE_P (TREE_TYPE (input_tree_type)))
943 return input_tree_type;
944 else
945 {
946 if (types_compatible_p (TREE_TYPE (input_tree_type),
947 TREE_TYPE (V2DF_type_node)))
948 return V2DI_type_node;
949 else if (types_compatible_p (TREE_TYPE (input_tree_type),
950 TREE_TYPE (V4SF_type_node)))
951 return V4SI_type_node;
952 else
953 gcc_unreachable ();
954 }
955 }
956
957 /* Helper function to handle the vector merge[hl] built-ins. The
958 implementation difference between h and l versions for this code are in
959 the values used when building of the permute vector for high word versus
960 low word merge. The variance is keyed off the use_high parameter. */
961 static void
962 fold_mergehl_helper (gimple_stmt_iterator *gsi, gimple *stmt, int use_high)
963 {
964 tree arg0 = gimple_call_arg (stmt, 0);
965 tree arg1 = gimple_call_arg (stmt, 1);
966 tree lhs = gimple_call_lhs (stmt);
967 tree lhs_type = TREE_TYPE (lhs);
968 int n_elts = TYPE_VECTOR_SUBPARTS (lhs_type);
969 int midpoint = n_elts / 2;
970 int offset = 0;
971
972 if (use_high == 1)
973 offset = midpoint;
974
975 /* The permute_type will match the lhs for integral types. For double and
976 float types, the permute type needs to map to the V2 or V4 type that
977 matches size. */
978 tree permute_type;
979 permute_type = map_to_integral_tree_type (lhs_type);
980 tree_vector_builder elts (permute_type, VECTOR_CST_NELTS (arg0), 1);
981
982 for (int i = 0; i < midpoint; i++)
983 {
984 elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
985 offset + i));
986 elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
987 offset + n_elts + i));
988 }
989
990 tree permute = elts.build ();
991
992 gimple *g = gimple_build_assign (lhs, VEC_PERM_EXPR, arg0, arg1, permute);
993 gimple_set_location (g, gimple_location (stmt));
994 gsi_replace (gsi, g, true);
995 }
996
997 /* Helper function to handle the vector merge[eo] built-ins. */
998 static void
999 fold_mergeeo_helper (gimple_stmt_iterator *gsi, gimple *stmt, int use_odd)
1000 {
1001 tree arg0 = gimple_call_arg (stmt, 0);
1002 tree arg1 = gimple_call_arg (stmt, 1);
1003 tree lhs = gimple_call_lhs (stmt);
1004 tree lhs_type = TREE_TYPE (lhs);
1005 int n_elts = TYPE_VECTOR_SUBPARTS (lhs_type);
1006
1007 /* The permute_type will match the lhs for integral types. For double and
1008 float types, the permute type needs to map to the V2 or V4 type that
1009 matches size. */
1010 tree permute_type;
1011 permute_type = map_to_integral_tree_type (lhs_type);
1012
1013 tree_vector_builder elts (permute_type, VECTOR_CST_NELTS (arg0), 1);
1014
1015 /* Build the permute vector. */
1016 for (int i = 0; i < n_elts / 2; i++)
1017 {
1018 elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
1019 2*i + use_odd));
1020 elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
1021 2*i + use_odd + n_elts));
1022 }
1023
1024 tree permute = elts.build ();
1025
1026 gimple *g = gimple_build_assign (lhs, VEC_PERM_EXPR, arg0, arg1, permute);
1027 gimple_set_location (g, gimple_location (stmt));
1028 gsi_replace (gsi, g, true);
1029 }
1030
1031 /* Helper function to sort out which built-ins may be valid without having
1032 a LHS. */
1033 static bool
1034 rs6000_builtin_valid_without_lhs (enum rs6000_gen_builtins fn_code,
1035 tree fndecl)
1036 {
1037 if (TREE_TYPE (TREE_TYPE (fndecl)) == void_type_node)
1038 return true;
1039
1040 switch (fn_code)
1041 {
1042 case RS6000_BIF_STVX_V16QI:
1043 case RS6000_BIF_STVX_V8HI:
1044 case RS6000_BIF_STVX_V4SI:
1045 case RS6000_BIF_STVX_V4SF:
1046 case RS6000_BIF_STVX_V2DI:
1047 case RS6000_BIF_STVX_V2DF:
1048 case RS6000_BIF_STXVW4X_V16QI:
1049 case RS6000_BIF_STXVW4X_V8HI:
1050 case RS6000_BIF_STXVW4X_V4SF:
1051 case RS6000_BIF_STXVW4X_V4SI:
1052 case RS6000_BIF_STXVD2X_V2DF:
1053 case RS6000_BIF_STXVD2X_V2DI:
1054 return true;
1055 default:
1056 return false;
1057 }
1058 }
1059
1060 /* Expand the MMA built-ins early, so that we can convert the pass-by-reference
1061 __vector_quad arguments into pass-by-value arguments, leading to more
1062 efficient code generation. */
1063 static bool
1064 rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi,
1065 rs6000_gen_builtins fn_code)
1066 {
1067 gimple *stmt = gsi_stmt (*gsi);
1068 size_t fncode = (size_t) fn_code;
1069
1070 if (!bif_is_mma (rs6000_builtin_info[fncode]))
1071 return false;
1072
1073 /* Each call that can be gimple-expanded has an associated built-in
1074 function that it will expand into. If this one doesn't, we have
1075 already expanded it! Exceptions: lxvp and stxvp. */
1076 if (rs6000_builtin_info[fncode].assoc_bif == RS6000_BIF_NONE
1077 && fncode != RS6000_BIF_LXVP
1078 && fncode != RS6000_BIF_STXVP)
1079 return false;
1080
1081 bifdata *bd = &rs6000_builtin_info[fncode];
1082 unsigned nopnds = bd->nargs;
1083 gimple_seq new_seq = NULL;
1084 gimple *new_call;
1085 tree new_decl;
1086
1087 /* Compatibility built-ins; we used to call these
1088 __builtin_mma_{dis,}assemble_pair, but now we call them
1089 __builtin_vsx_{dis,}assemble_pair. Handle the old versions. */
1090 if (fncode == RS6000_BIF_ASSEMBLE_PAIR)
1091 fncode = RS6000_BIF_ASSEMBLE_PAIR_V;
1092 else if (fncode == RS6000_BIF_DISASSEMBLE_PAIR)
1093 fncode = RS6000_BIF_DISASSEMBLE_PAIR_V;
1094
1095 if (fncode == RS6000_BIF_DISASSEMBLE_ACC
1096 || fncode == RS6000_BIF_DISASSEMBLE_PAIR_V)
1097 {
1098 /* This is an MMA disassemble built-in function. */
1099 push_gimplify_context (true);
1100 unsigned nvec = (fncode == RS6000_BIF_DISASSEMBLE_ACC) ? 4 : 2;
1101 tree dst_ptr = gimple_call_arg (stmt, 0);
1102 tree src_ptr = gimple_call_arg (stmt, 1);
1103 tree src_type = (fncode == RS6000_BIF_DISASSEMBLE_ACC)
1104 ? build_pointer_type (vector_quad_type_node)
1105 : build_pointer_type (vector_pair_type_node);
1106 if (TREE_TYPE (src_ptr) != src_type)
1107 src_ptr = build1 (NOP_EXPR, src_type, src_ptr);
1108
1109 tree src = create_tmp_reg_or_ssa_name (TREE_TYPE (src_type));
1110 gimplify_assign (src, build_simple_mem_ref (src_ptr), &new_seq);
1111
1112 /* If we are not disassembling an accumulator/pair or our destination is
1113 another accumulator/pair, then just copy the entire thing as is. */
1114 if ((fncode == RS6000_BIF_DISASSEMBLE_ACC
1115 && TREE_TYPE (TREE_TYPE (dst_ptr)) == vector_quad_type_node)
1116 || (fncode == RS6000_BIF_DISASSEMBLE_PAIR_V
1117 && TREE_TYPE (TREE_TYPE (dst_ptr)) == vector_pair_type_node))
1118 {
1119 tree dst = build_simple_mem_ref (build1 (NOP_EXPR,
1120 src_type, dst_ptr));
1121 gimplify_assign (dst, src, &new_seq);
1122 pop_gimplify_context (NULL);
1123 gsi_replace_with_seq (gsi, new_seq, true);
1124 return true;
1125 }
1126
1127 /* If we're disassembling an accumulator into a different type, we need
1128 to emit a xxmfacc instruction now, since we cannot do it later. */
1129 if (fncode == RS6000_BIF_DISASSEMBLE_ACC)
1130 {
1131 new_decl = rs6000_builtin_decls[RS6000_BIF_XXMFACC_INTERNAL];
1132 new_call = gimple_build_call (new_decl, 1, src);
1133 src = create_tmp_reg_or_ssa_name (vector_quad_type_node);
1134 gimple_call_set_lhs (new_call, src);
1135 gimple_seq_add_stmt (&new_seq, new_call);
1136 }
1137
1138 /* Copy the accumulator/pair vector by vector. */
1139 new_decl
1140 = rs6000_builtin_decls[rs6000_builtin_info[fncode].assoc_bif];
1141 tree dst_type = build_pointer_type_for_mode (unsigned_V16QI_type_node,
1142 ptr_mode, true);
1143 tree dst_base = build1 (NOP_EXPR, dst_type, dst_ptr);
1144 for (unsigned i = 0; i < nvec; i++)
1145 {
1146 unsigned index = WORDS_BIG_ENDIAN ? i : nvec - 1 - i;
1147 tree dst = build2 (MEM_REF, unsigned_V16QI_type_node, dst_base,
1148 build_int_cst (dst_type, index * 16));
1149 tree dstssa = create_tmp_reg_or_ssa_name (unsigned_V16QI_type_node);
1150 new_call = gimple_build_call (new_decl, 2, src,
1151 build_int_cstu (uint16_type_node, i));
1152 gimple_call_set_lhs (new_call, dstssa);
1153 gimple_seq_add_stmt (&new_seq, new_call);
1154 gimplify_assign (dst, dstssa, &new_seq);
1155 }
1156 pop_gimplify_context (NULL);
1157 gsi_replace_with_seq (gsi, new_seq, true);
1158 return true;
1159 }
1160
1161 /* TODO: Do some factoring on these two chunks. */
1162 if (fncode == RS6000_BIF_LXVP)
1163 {
1164 push_gimplify_context (true);
1165 tree offset = gimple_call_arg (stmt, 0);
1166 tree ptr = gimple_call_arg (stmt, 1);
1167 tree lhs = gimple_call_lhs (stmt);
1168 if (TREE_TYPE (TREE_TYPE (ptr)) != vector_pair_type_node)
1169 ptr = build1 (NOP_EXPR,
1170 build_pointer_type (vector_pair_type_node), ptr);
1171 tree mem = build_simple_mem_ref (build2 (POINTER_PLUS_EXPR,
1172 TREE_TYPE (ptr), ptr, offset));
1173 gimplify_assign (lhs, mem, &new_seq);
1174 pop_gimplify_context (NULL);
1175 gsi_replace_with_seq (gsi, new_seq, true);
1176 return true;
1177 }
1178
1179 if (fncode == RS6000_BIF_STXVP)
1180 {
1181 push_gimplify_context (true);
1182 tree src = gimple_call_arg (stmt, 0);
1183 tree offset = gimple_call_arg (stmt, 1);
1184 tree ptr = gimple_call_arg (stmt, 2);
1185 if (TREE_TYPE (TREE_TYPE (ptr)) != vector_pair_type_node)
1186 ptr = build1 (NOP_EXPR,
1187 build_pointer_type (vector_pair_type_node), ptr);
1188 tree mem = build_simple_mem_ref (build2 (POINTER_PLUS_EXPR,
1189 TREE_TYPE (ptr), ptr, offset));
1190 gimplify_assign (mem, src, &new_seq);
1191 pop_gimplify_context (NULL);
1192 gsi_replace_with_seq (gsi, new_seq, true);
1193 return true;
1194 }
1195
1196 /* Convert this built-in into an internal version that uses pass-by-value
1197 arguments. The internal built-in is found in the assoc_bif field. */
1198 new_decl = rs6000_builtin_decls[rs6000_builtin_info[fncode].assoc_bif];
1199 tree lhs, op[MAX_MMA_OPERANDS];
1200 tree acc = gimple_call_arg (stmt, 0);
1201 push_gimplify_context (true);
1202
1203 if (bif_is_quad (*bd))
1204 {
1205 /* This built-in has a pass-by-reference accumulator input, so load it
1206 into a temporary accumulator for use as a pass-by-value input. */
1207 op[0] = create_tmp_reg_or_ssa_name (vector_quad_type_node);
1208 for (unsigned i = 1; i < nopnds; i++)
1209 op[i] = gimple_call_arg (stmt, i);
1210 gimplify_assign (op[0], build_simple_mem_ref (acc), &new_seq);
1211 }
1212 else
1213 {
1214 /* This built-in does not use its pass-by-reference accumulator argument
1215 as an input argument, so remove it from the input list. */
1216 nopnds--;
1217 for (unsigned i = 0; i < nopnds; i++)
1218 op[i] = gimple_call_arg (stmt, i + 1);
1219 }
1220
1221 switch (nopnds)
1222 {
1223 case 0:
1224 new_call = gimple_build_call (new_decl, 0);
1225 break;
1226 case 1:
1227 new_call = gimple_build_call (new_decl, 1, op[0]);
1228 break;
1229 case 2:
1230 new_call = gimple_build_call (new_decl, 2, op[0], op[1]);
1231 break;
1232 case 3:
1233 new_call = gimple_build_call (new_decl, 3, op[0], op[1], op[2]);
1234 break;
1235 case 4:
1236 new_call = gimple_build_call (new_decl, 4, op[0], op[1], op[2], op[3]);
1237 break;
1238 case 5:
1239 new_call = gimple_build_call (new_decl, 5, op[0], op[1], op[2], op[3],
1240 op[4]);
1241 break;
1242 case 6:
1243 new_call = gimple_build_call (new_decl, 6, op[0], op[1], op[2], op[3],
1244 op[4], op[5]);
1245 break;
1246 case 7:
1247 new_call = gimple_build_call (new_decl, 7, op[0], op[1], op[2], op[3],
1248 op[4], op[5], op[6]);
1249 break;
1250 default:
1251 gcc_unreachable ();
1252 }
1253
1254 if (fncode == RS6000_BIF_BUILD_PAIR || fncode == RS6000_BIF_ASSEMBLE_PAIR_V)
1255 lhs = create_tmp_reg_or_ssa_name (vector_pair_type_node);
1256 else
1257 lhs = create_tmp_reg_or_ssa_name (vector_quad_type_node);
1258 gimple_call_set_lhs (new_call, lhs);
1259 gimple_seq_add_stmt (&new_seq, new_call);
1260 gimplify_assign (build_simple_mem_ref (acc), lhs, &new_seq);
1261 pop_gimplify_context (NULL);
1262 gsi_replace_with_seq (gsi, new_seq, true);
1263
1264 return true;
1265 }
1266
1267 /* Fold a machine-dependent built-in in GIMPLE. (For folding into
1268 a constant, use rs6000_fold_builtin.) */
1269 bool
1270 rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
1271 {
1272 gimple *stmt = gsi_stmt (*gsi);
1273 tree fndecl = gimple_call_fndecl (stmt);
1274 gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD);
1275 enum rs6000_gen_builtins fn_code
1276 = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl);
1277 tree arg0, arg1, lhs, temp;
1278 enum tree_code bcode;
1279 gimple *g;
1280
1281 /* For an unresolved overloaded builtin, return early here since there
1282 is no builtin info for it and we are unable to fold it. */
1283 if (fn_code > RS6000_OVLD_NONE)
1284 return false;
1285
1286 size_t uns_fncode = (size_t) fn_code;
1287 enum insn_code icode = rs6000_builtin_info[uns_fncode].icode;
1288 const char *fn_name1 = rs6000_builtin_info[uns_fncode].bifname;
1289 const char *fn_name2 = (icode != CODE_FOR_nothing)
1290 ? get_insn_name ((int) icode)
1291 : "nothing";
1292
1293 if (TARGET_DEBUG_BUILTIN)
1294 fprintf (stderr, "rs6000_gimple_fold_builtin %d %s %s\n",
1295 fn_code, fn_name1, fn_name2);
1296
1297 /* Prevent gimple folding for code that does not have a LHS, unless it is
1298 allowed per the rs6000_builtin_valid_without_lhs helper function. */
1299 if (!gimple_call_lhs (stmt)
1300 && !rs6000_builtin_valid_without_lhs (fn_code, fndecl))
1301 return false;
1302
1303 /* Don't fold invalid builtins, let rs6000_expand_builtin diagnose it. */
1304 if (!rs6000_builtin_is_supported (fn_code))
1305 return false;
1306
1307 if (rs6000_gimple_fold_mma_builtin (gsi, fn_code))
1308 return true;
1309
1310 switch (fn_code)
1311 {
1312 /* Flavors of vec_add. We deliberately don't expand
1313 RS6000_BIF_VADDUQM as it gets lowered from V1TImode to
1314 TImode, resulting in much poorer code generation. */
1315 case RS6000_BIF_VADDUBM:
1316 case RS6000_BIF_VADDUHM:
1317 case RS6000_BIF_VADDUWM:
1318 case RS6000_BIF_VADDUDM:
1319 case RS6000_BIF_VADDFP:
1320 case RS6000_BIF_XVADDDP:
1321 case RS6000_BIF_XVADDSP:
1322 bcode = PLUS_EXPR;
1323 do_binary:
1324 arg0 = gimple_call_arg (stmt, 0);
1325 arg1 = gimple_call_arg (stmt, 1);
1326 lhs = gimple_call_lhs (stmt);
1327 if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (lhs)))
1328 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (lhs))))
1329 {
1330 /* Ensure the binary operation is performed in a type
1331 that wraps if it is integral type. */
1332 gimple_seq stmts = NULL;
1333 tree type = unsigned_type_for (TREE_TYPE (lhs));
1334 tree uarg0 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1335 type, arg0);
1336 tree uarg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1337 type, arg1);
1338 tree res = gimple_build (&stmts, gimple_location (stmt), bcode,
1339 type, uarg0, uarg1);
1340 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1341 g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR,
1342 build1 (VIEW_CONVERT_EXPR,
1343 TREE_TYPE (lhs), res));
1344 gsi_replace (gsi, g, true);
1345 return true;
1346 }
1347 g = gimple_build_assign (lhs, bcode, arg0, arg1);
1348 gimple_set_location (g, gimple_location (stmt));
1349 gsi_replace (gsi, g, true);
1350 return true;
1351 /* Flavors of vec_sub. We deliberately don't expand
1352 RS6000_BIF_VSUBUQM. */
1353 case RS6000_BIF_VSUBUBM:
1354 case RS6000_BIF_VSUBUHM:
1355 case RS6000_BIF_VSUBUWM:
1356 case RS6000_BIF_VSUBUDM:
1357 case RS6000_BIF_VSUBFP:
1358 case RS6000_BIF_XVSUBDP:
1359 case RS6000_BIF_XVSUBSP:
1360 bcode = MINUS_EXPR;
1361 goto do_binary;
1362 case RS6000_BIF_XVMULSP:
1363 case RS6000_BIF_XVMULDP:
1364 arg0 = gimple_call_arg (stmt, 0);
1365 arg1 = gimple_call_arg (stmt, 1);
1366 lhs = gimple_call_lhs (stmt);
1367 g = gimple_build_assign (lhs, MULT_EXPR, arg0, arg1);
1368 gimple_set_location (g, gimple_location (stmt));
1369 gsi_replace (gsi, g, true);
1370 return true;
1371 /* Even element flavors of vec_mul (signed). */
1372 case RS6000_BIF_VMULESB:
1373 case RS6000_BIF_VMULESH:
1374 case RS6000_BIF_VMULESW:
1375 /* Even element flavors of vec_mul (unsigned). */
1376 case RS6000_BIF_VMULEUB:
1377 case RS6000_BIF_VMULEUH:
1378 case RS6000_BIF_VMULEUW:
1379 arg0 = gimple_call_arg (stmt, 0);
1380 arg1 = gimple_call_arg (stmt, 1);
1381 lhs = gimple_call_lhs (stmt);
1382 g = gimple_build_assign (lhs, VEC_WIDEN_MULT_EVEN_EXPR, arg0, arg1);
1383 gimple_set_location (g, gimple_location (stmt));
1384 gsi_replace (gsi, g, true);
1385 return true;
1386 /* Odd element flavors of vec_mul (signed). */
1387 case RS6000_BIF_VMULOSB:
1388 case RS6000_BIF_VMULOSH:
1389 case RS6000_BIF_VMULOSW:
1390 /* Odd element flavors of vec_mul (unsigned). */
1391 case RS6000_BIF_VMULOUB:
1392 case RS6000_BIF_VMULOUH:
1393 case RS6000_BIF_VMULOUW:
1394 arg0 = gimple_call_arg (stmt, 0);
1395 arg1 = gimple_call_arg (stmt, 1);
1396 lhs = gimple_call_lhs (stmt);
1397 g = gimple_build_assign (lhs, VEC_WIDEN_MULT_ODD_EXPR, arg0, arg1);
1398 gimple_set_location (g, gimple_location (stmt));
1399 gsi_replace (gsi, g, true);
1400 return true;
1401 /* Flavors of vec_div (Integer). */
1402 case RS6000_BIF_DIV_V2DI:
1403 case RS6000_BIF_UDIV_V2DI:
1404 arg0 = gimple_call_arg (stmt, 0);
1405 arg1 = gimple_call_arg (stmt, 1);
1406 lhs = gimple_call_lhs (stmt);
1407 g = gimple_build_assign (lhs, TRUNC_DIV_EXPR, arg0, arg1);
1408 gimple_set_location (g, gimple_location (stmt));
1409 gsi_replace (gsi, g, true);
1410 return true;
1411 /* Flavors of vec_div (Float). */
1412 case RS6000_BIF_XVDIVSP:
1413 case RS6000_BIF_XVDIVDP:
1414 arg0 = gimple_call_arg (stmt, 0);
1415 arg1 = gimple_call_arg (stmt, 1);
1416 lhs = gimple_call_lhs (stmt);
1417 g = gimple_build_assign (lhs, RDIV_EXPR, arg0, arg1);
1418 gimple_set_location (g, gimple_location (stmt));
1419 gsi_replace (gsi, g, true);
1420 return true;
1421 /* Flavors of vec_and. */
1422 case RS6000_BIF_VAND_V16QI_UNS:
1423 case RS6000_BIF_VAND_V16QI:
1424 case RS6000_BIF_VAND_V8HI_UNS:
1425 case RS6000_BIF_VAND_V8HI:
1426 case RS6000_BIF_VAND_V4SI_UNS:
1427 case RS6000_BIF_VAND_V4SI:
1428 case RS6000_BIF_VAND_V2DI_UNS:
1429 case RS6000_BIF_VAND_V2DI:
1430 case RS6000_BIF_VAND_V4SF:
1431 case RS6000_BIF_VAND_V2DF:
1432 arg0 = gimple_call_arg (stmt, 0);
1433 arg1 = gimple_call_arg (stmt, 1);
1434 lhs = gimple_call_lhs (stmt);
1435 g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, arg1);
1436 gimple_set_location (g, gimple_location (stmt));
1437 gsi_replace (gsi, g, true);
1438 return true;
1439 /* Flavors of vec_andc. */
1440 case RS6000_BIF_VANDC_V16QI_UNS:
1441 case RS6000_BIF_VANDC_V16QI:
1442 case RS6000_BIF_VANDC_V8HI_UNS:
1443 case RS6000_BIF_VANDC_V8HI:
1444 case RS6000_BIF_VANDC_V4SI_UNS:
1445 case RS6000_BIF_VANDC_V4SI:
1446 case RS6000_BIF_VANDC_V2DI_UNS:
1447 case RS6000_BIF_VANDC_V2DI:
1448 case RS6000_BIF_VANDC_V4SF:
1449 case RS6000_BIF_VANDC_V2DF:
1450 arg0 = gimple_call_arg (stmt, 0);
1451 arg1 = gimple_call_arg (stmt, 1);
1452 lhs = gimple_call_lhs (stmt);
1453 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
1454 g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1);
1455 gimple_set_location (g, gimple_location (stmt));
1456 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1457 g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, temp);
1458 gimple_set_location (g, gimple_location (stmt));
1459 gsi_replace (gsi, g, true);
1460 return true;
1461 /* Flavors of vec_nand. */
1462 case RS6000_BIF_NAND_V16QI_UNS:
1463 case RS6000_BIF_NAND_V16QI:
1464 case RS6000_BIF_NAND_V8HI_UNS:
1465 case RS6000_BIF_NAND_V8HI:
1466 case RS6000_BIF_NAND_V4SI_UNS:
1467 case RS6000_BIF_NAND_V4SI:
1468 case RS6000_BIF_NAND_V2DI_UNS:
1469 case RS6000_BIF_NAND_V2DI:
1470 case RS6000_BIF_NAND_V4SF:
1471 case RS6000_BIF_NAND_V2DF:
1472 arg0 = gimple_call_arg (stmt, 0);
1473 arg1 = gimple_call_arg (stmt, 1);
1474 lhs = gimple_call_lhs (stmt);
1475 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
1476 g = gimple_build_assign (temp, BIT_AND_EXPR, arg0, arg1);
1477 gimple_set_location (g, gimple_location (stmt));
1478 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1479 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
1480 gimple_set_location (g, gimple_location (stmt));
1481 gsi_replace (gsi, g, true);
1482 return true;
1483 /* Flavors of vec_or. */
1484 case RS6000_BIF_VOR_V16QI_UNS:
1485 case RS6000_BIF_VOR_V16QI:
1486 case RS6000_BIF_VOR_V8HI_UNS:
1487 case RS6000_BIF_VOR_V8HI:
1488 case RS6000_BIF_VOR_V4SI_UNS:
1489 case RS6000_BIF_VOR_V4SI:
1490 case RS6000_BIF_VOR_V2DI_UNS:
1491 case RS6000_BIF_VOR_V2DI:
1492 case RS6000_BIF_VOR_V4SF:
1493 case RS6000_BIF_VOR_V2DF:
1494 arg0 = gimple_call_arg (stmt, 0);
1495 arg1 = gimple_call_arg (stmt, 1);
1496 lhs = gimple_call_lhs (stmt);
1497 g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, arg1);
1498 gimple_set_location (g, gimple_location (stmt));
1499 gsi_replace (gsi, g, true);
1500 return true;
1501 /* flavors of vec_orc. */
1502 case RS6000_BIF_ORC_V16QI_UNS:
1503 case RS6000_BIF_ORC_V16QI:
1504 case RS6000_BIF_ORC_V8HI_UNS:
1505 case RS6000_BIF_ORC_V8HI:
1506 case RS6000_BIF_ORC_V4SI_UNS:
1507 case RS6000_BIF_ORC_V4SI:
1508 case RS6000_BIF_ORC_V2DI_UNS:
1509 case RS6000_BIF_ORC_V2DI:
1510 case RS6000_BIF_ORC_V4SF:
1511 case RS6000_BIF_ORC_V2DF:
1512 arg0 = gimple_call_arg (stmt, 0);
1513 arg1 = gimple_call_arg (stmt, 1);
1514 lhs = gimple_call_lhs (stmt);
1515 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
1516 g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1);
1517 gimple_set_location (g, gimple_location (stmt));
1518 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1519 g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, temp);
1520 gimple_set_location (g, gimple_location (stmt));
1521 gsi_replace (gsi, g, true);
1522 return true;
1523 /* Flavors of vec_xor. */
1524 case RS6000_BIF_VXOR_V16QI_UNS:
1525 case RS6000_BIF_VXOR_V16QI:
1526 case RS6000_BIF_VXOR_V8HI_UNS:
1527 case RS6000_BIF_VXOR_V8HI:
1528 case RS6000_BIF_VXOR_V4SI_UNS:
1529 case RS6000_BIF_VXOR_V4SI:
1530 case RS6000_BIF_VXOR_V2DI_UNS:
1531 case RS6000_BIF_VXOR_V2DI:
1532 case RS6000_BIF_VXOR_V4SF:
1533 case RS6000_BIF_VXOR_V2DF:
1534 arg0 = gimple_call_arg (stmt, 0);
1535 arg1 = gimple_call_arg (stmt, 1);
1536 lhs = gimple_call_lhs (stmt);
1537 g = gimple_build_assign (lhs, BIT_XOR_EXPR, arg0, arg1);
1538 gimple_set_location (g, gimple_location (stmt));
1539 gsi_replace (gsi, g, true);
1540 return true;
1541 /* Flavors of vec_nor. */
1542 case RS6000_BIF_VNOR_V16QI_UNS:
1543 case RS6000_BIF_VNOR_V16QI:
1544 case RS6000_BIF_VNOR_V8HI_UNS:
1545 case RS6000_BIF_VNOR_V8HI:
1546 case RS6000_BIF_VNOR_V4SI_UNS:
1547 case RS6000_BIF_VNOR_V4SI:
1548 case RS6000_BIF_VNOR_V2DI_UNS:
1549 case RS6000_BIF_VNOR_V2DI:
1550 case RS6000_BIF_VNOR_V4SF:
1551 case RS6000_BIF_VNOR_V2DF:
1552 arg0 = gimple_call_arg (stmt, 0);
1553 arg1 = gimple_call_arg (stmt, 1);
1554 lhs = gimple_call_lhs (stmt);
1555 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
1556 g = gimple_build_assign (temp, BIT_IOR_EXPR, arg0, arg1);
1557 gimple_set_location (g, gimple_location (stmt));
1558 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1559 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
1560 gimple_set_location (g, gimple_location (stmt));
1561 gsi_replace (gsi, g, true);
1562 return true;
1563 /* flavors of vec_abs. */
1564 case RS6000_BIF_ABS_V16QI:
1565 case RS6000_BIF_ABS_V8HI:
1566 case RS6000_BIF_ABS_V4SI:
1567 case RS6000_BIF_ABS_V4SF:
1568 case RS6000_BIF_ABS_V2DI:
1569 case RS6000_BIF_XVABSDP:
1570 case RS6000_BIF_XVABSSP:
1571 arg0 = gimple_call_arg (stmt, 0);
1572 if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (arg0)))
1573 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (arg0))))
1574 return false;
1575 lhs = gimple_call_lhs (stmt);
1576 g = gimple_build_assign (lhs, ABS_EXPR, arg0);
1577 gimple_set_location (g, gimple_location (stmt));
1578 gsi_replace (gsi, g, true);
1579 return true;
1580 /* fold into MIN_EXPR when fast-math is set. */
1581 case RS6000_BIF_XSMINDP:
1582 /* flavors of vec_min. */
1583 case RS6000_BIF_XVMINDP:
1584 case RS6000_BIF_XVMINSP:
1585 case RS6000_BIF_VMINFP:
1586 {
1587 lhs = gimple_call_lhs (stmt);
1588 tree type = TREE_TYPE (lhs);
1589 if (HONOR_NANS (type))
1590 return false;
1591 gcc_fallthrough ();
1592 }
1593 case RS6000_BIF_VMINSD:
1594 case RS6000_BIF_VMINUD:
1595 case RS6000_BIF_VMINSB:
1596 case RS6000_BIF_VMINSH:
1597 case RS6000_BIF_VMINSW:
1598 case RS6000_BIF_VMINUB:
1599 case RS6000_BIF_VMINUH:
1600 case RS6000_BIF_VMINUW:
1601 arg0 = gimple_call_arg (stmt, 0);
1602 arg1 = gimple_call_arg (stmt, 1);
1603 lhs = gimple_call_lhs (stmt);
1604 g = gimple_build_assign (lhs, MIN_EXPR, arg0, arg1);
1605 gimple_set_location (g, gimple_location (stmt));
1606 gsi_replace (gsi, g, true);
1607 return true;
1608 /* fold into MAX_EXPR when fast-math is set. */
1609 case RS6000_BIF_XSMAXDP:
1610 /* flavors of vec_max. */
1611 case RS6000_BIF_XVMAXDP:
1612 case RS6000_BIF_XVMAXSP:
1613 case RS6000_BIF_VMAXFP:
1614 {
1615 lhs = gimple_call_lhs (stmt);
1616 tree type = TREE_TYPE (lhs);
1617 if (HONOR_NANS (type))
1618 return false;
1619 gcc_fallthrough ();
1620 }
1621 case RS6000_BIF_VMAXSD:
1622 case RS6000_BIF_VMAXUD:
1623 case RS6000_BIF_VMAXSB:
1624 case RS6000_BIF_VMAXSH:
1625 case RS6000_BIF_VMAXSW:
1626 case RS6000_BIF_VMAXUB:
1627 case RS6000_BIF_VMAXUH:
1628 case RS6000_BIF_VMAXUW:
1629 arg0 = gimple_call_arg (stmt, 0);
1630 arg1 = gimple_call_arg (stmt, 1);
1631 lhs = gimple_call_lhs (stmt);
1632 g = gimple_build_assign (lhs, MAX_EXPR, arg0, arg1);
1633 gimple_set_location (g, gimple_location (stmt));
1634 gsi_replace (gsi, g, true);
1635 return true;
1636 /* Flavors of vec_eqv. */
1637 case RS6000_BIF_EQV_V16QI:
1638 case RS6000_BIF_EQV_V8HI:
1639 case RS6000_BIF_EQV_V4SI:
1640 case RS6000_BIF_EQV_V4SF:
1641 case RS6000_BIF_EQV_V2DF:
1642 case RS6000_BIF_EQV_V2DI:
1643 arg0 = gimple_call_arg (stmt, 0);
1644 arg1 = gimple_call_arg (stmt, 1);
1645 lhs = gimple_call_lhs (stmt);
1646 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
1647 g = gimple_build_assign (temp, BIT_XOR_EXPR, arg0, arg1);
1648 gimple_set_location (g, gimple_location (stmt));
1649 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1650 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
1651 gimple_set_location (g, gimple_location (stmt));
1652 gsi_replace (gsi, g, true);
1653 return true;
1654 /* Flavors of vec_rotate_left. */
1655 case RS6000_BIF_VRLB:
1656 case RS6000_BIF_VRLH:
1657 case RS6000_BIF_VRLW:
1658 case RS6000_BIF_VRLD:
1659 arg0 = gimple_call_arg (stmt, 0);
1660 arg1 = gimple_call_arg (stmt, 1);
1661 lhs = gimple_call_lhs (stmt);
1662 g = gimple_build_assign (lhs, LROTATE_EXPR, arg0, arg1);
1663 gimple_set_location (g, gimple_location (stmt));
1664 gsi_replace (gsi, g, true);
1665 return true;
1666 /* Flavors of vector shift right algebraic.
1667 vec_sra{b,h,w} -> vsra{b,h,w}. */
1668 case RS6000_BIF_VSRAB:
1669 case RS6000_BIF_VSRAH:
1670 case RS6000_BIF_VSRAW:
1671 case RS6000_BIF_VSRAD:
1672 {
1673 arg0 = gimple_call_arg (stmt, 0);
1674 arg1 = gimple_call_arg (stmt, 1);
1675 lhs = gimple_call_lhs (stmt);
1676 tree arg1_type = TREE_TYPE (arg1);
1677 tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1));
1678 tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type));
1679 location_t loc = gimple_location (stmt);
1680 /* Force arg1 into the range valid matching the arg0 type. */
1681 /* Build a vector consisting of the max valid bit-size values. */
1682 int n_elts = VECTOR_CST_NELTS (arg1);
1683 tree element_size = build_int_cst (unsigned_element_type,
1684 128 / n_elts);
1685 tree_vector_builder elts (unsigned_arg1_type, n_elts, 1);
1686 for (int i = 0; i < n_elts; i++)
1687 elts.safe_push (element_size);
1688 tree modulo_tree = elts.build ();
1689 /* Modulo the provided shift value against that vector. */
1690 gimple_seq stmts = NULL;
1691 tree unsigned_arg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1692 unsigned_arg1_type, arg1);
1693 tree new_arg1 = gimple_build (&stmts, loc, TRUNC_MOD_EXPR,
1694 unsigned_arg1_type, unsigned_arg1,
1695 modulo_tree);
1696 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1697 /* And finally, do the shift. */
1698 g = gimple_build_assign (lhs, RSHIFT_EXPR, arg0, new_arg1);
1699 gimple_set_location (g, loc);
1700 gsi_replace (gsi, g, true);
1701 return true;
1702 }
1703 /* Flavors of vector shift left.
1704 builtin_altivec_vsl{b,h,w} -> vsl{b,h,w}. */
1705 case RS6000_BIF_VSLB:
1706 case RS6000_BIF_VSLH:
1707 case RS6000_BIF_VSLW:
1708 case RS6000_BIF_VSLD:
1709 {
1710 location_t loc;
1711 gimple_seq stmts = NULL;
1712 arg0 = gimple_call_arg (stmt, 0);
1713 tree arg0_type = TREE_TYPE (arg0);
1714 if (INTEGRAL_TYPE_P (TREE_TYPE (arg0_type))
1715 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (arg0_type)))
1716 return false;
1717 arg1 = gimple_call_arg (stmt, 1);
1718 tree arg1_type = TREE_TYPE (arg1);
1719 tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1));
1720 tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type));
1721 loc = gimple_location (stmt);
1722 lhs = gimple_call_lhs (stmt);
1723 /* Force arg1 into the range valid matching the arg0 type. */
1724 /* Build a vector consisting of the max valid bit-size values. */
1725 int n_elts = VECTOR_CST_NELTS (arg1);
1726 int tree_size_in_bits = TREE_INT_CST_LOW (size_in_bytes (arg1_type))
1727 * BITS_PER_UNIT;
1728 tree element_size = build_int_cst (unsigned_element_type,
1729 tree_size_in_bits / n_elts);
1730 tree_vector_builder elts (unsigned_type_for (arg1_type), n_elts, 1);
1731 for (int i = 0; i < n_elts; i++)
1732 elts.safe_push (element_size);
1733 tree modulo_tree = elts.build ();
1734 /* Modulo the provided shift value against that vector. */
1735 tree unsigned_arg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1736 unsigned_arg1_type, arg1);
1737 tree new_arg1 = gimple_build (&stmts, loc, TRUNC_MOD_EXPR,
1738 unsigned_arg1_type, unsigned_arg1,
1739 modulo_tree);
1740 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1741 /* And finally, do the shift. */
1742 g = gimple_build_assign (lhs, LSHIFT_EXPR, arg0, new_arg1);
1743 gimple_set_location (g, gimple_location (stmt));
1744 gsi_replace (gsi, g, true);
1745 return true;
1746 }
1747 /* Flavors of vector shift right. */
1748 case RS6000_BIF_VSRB:
1749 case RS6000_BIF_VSRH:
1750 case RS6000_BIF_VSRW:
1751 case RS6000_BIF_VSRD:
1752 {
1753 arg0 = gimple_call_arg (stmt, 0);
1754 arg1 = gimple_call_arg (stmt, 1);
1755 lhs = gimple_call_lhs (stmt);
1756 tree arg1_type = TREE_TYPE (arg1);
1757 tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1));
1758 tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type));
1759 location_t loc = gimple_location (stmt);
1760 gimple_seq stmts = NULL;
1761 /* Convert arg0 to unsigned. */
1762 tree arg0_unsigned
1763 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1764 unsigned_type_for (TREE_TYPE (arg0)), arg0);
1765 /* Force arg1 into the range valid matching the arg0 type. */
1766 /* Build a vector consisting of the max valid bit-size values. */
1767 int n_elts = VECTOR_CST_NELTS (arg1);
1768 tree element_size = build_int_cst (unsigned_element_type,
1769 128 / n_elts);
1770 tree_vector_builder elts (unsigned_arg1_type, n_elts, 1);
1771 for (int i = 0; i < n_elts; i++)
1772 elts.safe_push (element_size);
1773 tree modulo_tree = elts.build ();
1774 /* Modulo the provided shift value against that vector. */
1775 tree unsigned_arg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1776 unsigned_arg1_type, arg1);
1777 tree new_arg1 = gimple_build (&stmts, loc, TRUNC_MOD_EXPR,
1778 unsigned_arg1_type, unsigned_arg1,
1779 modulo_tree);
1780 /* Do the shift. */
1781 tree res
1782 = gimple_build (&stmts, RSHIFT_EXPR,
1783 TREE_TYPE (arg0_unsigned), arg0_unsigned, new_arg1);
1784 /* Convert result back to the lhs type. */
1785 res = gimple_build (&stmts, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), res);
1786 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1787 replace_call_with_value (gsi, res);
1788 return true;
1789 }
1790 /* Vector loads. */
1791 case RS6000_BIF_LVX_V16QI:
1792 case RS6000_BIF_LVX_V8HI:
1793 case RS6000_BIF_LVX_V4SI:
1794 case RS6000_BIF_LVX_V4SF:
1795 case RS6000_BIF_LVX_V2DI:
1796 case RS6000_BIF_LVX_V2DF:
1797 case RS6000_BIF_LVX_V1TI:
1798 {
1799 arg0 = gimple_call_arg (stmt, 0); // offset
1800 arg1 = gimple_call_arg (stmt, 1); // address
1801 lhs = gimple_call_lhs (stmt);
1802 location_t loc = gimple_location (stmt);
1803 /* Since arg1 may be cast to a different type, just use ptr_type_node
1804 here instead of trying to enforce TBAA on pointer types. */
1805 tree arg1_type = ptr_type_node;
1806 tree lhs_type = TREE_TYPE (lhs);
1807 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
1808 the tree using the value from arg0. The resulting type will match
1809 the type of arg1. */
1810 gimple_seq stmts = NULL;
1811 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
1812 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
1813 arg1_type, arg1, temp_offset);
1814 /* Mask off any lower bits from the address. */
1815 tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR,
1816 arg1_type, temp_addr,
1817 build_int_cst (arg1_type, -16));
1818 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1819 if (!is_gimple_mem_ref_addr (aligned_addr))
1820 {
1821 tree t = make_ssa_name (TREE_TYPE (aligned_addr));
1822 gimple *g = gimple_build_assign (t, aligned_addr);
1823 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1824 aligned_addr = t;
1825 }
1826 /* Use the build2 helper to set up the mem_ref. The MEM_REF could also
1827 take an offset, but since we've already incorporated the offset
1828 above, here we just pass in a zero. */
1829 gimple *g
1830 = gimple_build_assign (lhs, build2 (MEM_REF, lhs_type, aligned_addr,
1831 build_int_cst (arg1_type, 0)));
1832 gimple_set_location (g, loc);
1833 gsi_replace (gsi, g, true);
1834 return true;
1835 }
1836 /* Vector stores. */
1837 case RS6000_BIF_STVX_V16QI:
1838 case RS6000_BIF_STVX_V8HI:
1839 case RS6000_BIF_STVX_V4SI:
1840 case RS6000_BIF_STVX_V4SF:
1841 case RS6000_BIF_STVX_V2DI:
1842 case RS6000_BIF_STVX_V2DF:
1843 {
1844 arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */
1845 arg1 = gimple_call_arg (stmt, 1); /* Offset. */
1846 tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */
1847 location_t loc = gimple_location (stmt);
1848 tree arg0_type = TREE_TYPE (arg0);
1849 /* Use ptr_type_node (no TBAA) for the arg2_type.
1850 FIXME: (Richard) "A proper fix would be to transition this type as
1851 seen from the frontend to GIMPLE, for example in a similar way we
1852 do for MEM_REFs by piggy-backing that on an extra argument, a
1853 constant zero pointer of the alias pointer type to use (which would
1854 also serve as a type indicator of the store itself). I'd use a
1855 target specific internal function for this (not sure if we can have
1856 those target specific, but I guess if it's folded away then that's
1857 fine) and get away with the overload set." */
1858 tree arg2_type = ptr_type_node;
1859 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
1860 the tree using the value from arg0. The resulting type will match
1861 the type of arg2. */
1862 gimple_seq stmts = NULL;
1863 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1);
1864 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
1865 arg2_type, arg2, temp_offset);
1866 /* Mask off any lower bits from the address. */
1867 tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR,
1868 arg2_type, temp_addr,
1869 build_int_cst (arg2_type, -16));
1870 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1871 if (!is_gimple_mem_ref_addr (aligned_addr))
1872 {
1873 tree t = make_ssa_name (TREE_TYPE (aligned_addr));
1874 gimple *g = gimple_build_assign (t, aligned_addr);
1875 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1876 aligned_addr = t;
1877 }
1878 /* The desired gimple result should be similar to:
1879 MEM[(__vector floatD.1407 *)_1] = vf1D.2697; */
1880 gimple *g
1881 = gimple_build_assign (build2 (MEM_REF, arg0_type, aligned_addr,
1882 build_int_cst (arg2_type, 0)), arg0);
1883 gimple_set_location (g, loc);
1884 gsi_replace (gsi, g, true);
1885 return true;
1886 }
1887
1888 /* unaligned Vector loads. */
1889 case RS6000_BIF_LXVW4X_V16QI:
1890 case RS6000_BIF_LXVW4X_V8HI:
1891 case RS6000_BIF_LXVW4X_V4SF:
1892 case RS6000_BIF_LXVW4X_V4SI:
1893 case RS6000_BIF_LXVD2X_V2DF:
1894 case RS6000_BIF_LXVD2X_V2DI:
1895 {
1896 arg0 = gimple_call_arg (stmt, 0); // offset
1897 arg1 = gimple_call_arg (stmt, 1); // address
1898 lhs = gimple_call_lhs (stmt);
1899 location_t loc = gimple_location (stmt);
1900 /* Since arg1 may be cast to a different type, just use ptr_type_node
1901 here instead of trying to enforce TBAA on pointer types. */
1902 tree arg1_type = ptr_type_node;
1903 tree lhs_type = TREE_TYPE (lhs);
1904 /* In GIMPLE the type of the MEM_REF specifies the alignment. The
1905 required alignment (power) is 4 bytes regardless of data type. */
1906 tree align_ltype = build_aligned_type (lhs_type, 32);
1907 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
1908 the tree using the value from arg0. The resulting type will match
1909 the type of arg1. */
1910 gimple_seq stmts = NULL;
1911 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
1912 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
1913 arg1_type, arg1, temp_offset);
1914 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1915 if (!is_gimple_mem_ref_addr (temp_addr))
1916 {
1917 tree t = make_ssa_name (TREE_TYPE (temp_addr));
1918 gimple *g = gimple_build_assign (t, temp_addr);
1919 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1920 temp_addr = t;
1921 }
1922 /* Use the build2 helper to set up the mem_ref. The MEM_REF could also
1923 take an offset, but since we've already incorporated the offset
1924 above, here we just pass in a zero. */
1925 gimple *g;
1926 g = gimple_build_assign (lhs, build2 (MEM_REF, align_ltype, temp_addr,
1927 build_int_cst (arg1_type, 0)));
1928 gimple_set_location (g, loc);
1929 gsi_replace (gsi, g, true);
1930 return true;
1931 }
1932
1933 /* unaligned Vector stores. */
1934 case RS6000_BIF_STXVW4X_V16QI:
1935 case RS6000_BIF_STXVW4X_V8HI:
1936 case RS6000_BIF_STXVW4X_V4SF:
1937 case RS6000_BIF_STXVW4X_V4SI:
1938 case RS6000_BIF_STXVD2X_V2DF:
1939 case RS6000_BIF_STXVD2X_V2DI:
1940 {
1941 arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */
1942 arg1 = gimple_call_arg (stmt, 1); /* Offset. */
1943 tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */
1944 location_t loc = gimple_location (stmt);
1945 tree arg0_type = TREE_TYPE (arg0);
1946 /* Use ptr_type_node (no TBAA) for the arg2_type. */
1947 tree arg2_type = ptr_type_node;
1948 /* In GIMPLE the type of the MEM_REF specifies the alignment. The
1949 required alignment (power) is 4 bytes regardless of data type. */
1950 tree align_stype = build_aligned_type (arg0_type, 32);
1951 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
1952 the tree using the value from arg1. */
1953 gimple_seq stmts = NULL;
1954 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1);
1955 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
1956 arg2_type, arg2, temp_offset);
1957 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1958 if (!is_gimple_mem_ref_addr (temp_addr))
1959 {
1960 tree t = make_ssa_name (TREE_TYPE (temp_addr));
1961 gimple *g = gimple_build_assign (t, temp_addr);
1962 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1963 temp_addr = t;
1964 }
1965 gimple *g;
1966 g = gimple_build_assign (build2 (MEM_REF, align_stype, temp_addr,
1967 build_int_cst (arg2_type, 0)), arg0);
1968 gimple_set_location (g, loc);
1969 gsi_replace (gsi, g, true);
1970 return true;
1971 }
1972
1973 /* Vector Fused multiply-add (fma). */
1974 case RS6000_BIF_VMADDFP:
1975 case RS6000_BIF_XVMADDDP:
1976 case RS6000_BIF_XVMADDSP:
1977 case RS6000_BIF_VMLADDUHM:
1978 {
1979 arg0 = gimple_call_arg (stmt, 0);
1980 arg1 = gimple_call_arg (stmt, 1);
1981 tree arg2 = gimple_call_arg (stmt, 2);
1982 lhs = gimple_call_lhs (stmt);
1983 gcall *g = gimple_build_call_internal (IFN_FMA, 3, arg0, arg1, arg2);
1984 gimple_call_set_lhs (g, lhs);
1985 gimple_call_set_nothrow (g, true);
1986 gimple_set_location (g, gimple_location (stmt));
1987 gsi_replace (gsi, g, true);
1988 return true;
1989 }
1990
1991 /* Vector compares; EQ, NE, GE, GT, LE. */
1992 case RS6000_BIF_VCMPEQUB:
1993 case RS6000_BIF_VCMPEQUH:
1994 case RS6000_BIF_VCMPEQUW:
1995 case RS6000_BIF_VCMPEQUD:
1996 case RS6000_BIF_VCMPEQUT:
1997 fold_compare_helper (gsi, EQ_EXPR, stmt);
1998 return true;
1999
2000 case RS6000_BIF_VCMPNEB:
2001 case RS6000_BIF_VCMPNEH:
2002 case RS6000_BIF_VCMPNEW:
2003 case RS6000_BIF_VCMPNET:
2004 fold_compare_helper (gsi, NE_EXPR, stmt);
2005 return true;
2006
2007 case RS6000_BIF_CMPGE_16QI:
2008 case RS6000_BIF_CMPGE_U16QI:
2009 case RS6000_BIF_CMPGE_8HI:
2010 case RS6000_BIF_CMPGE_U8HI:
2011 case RS6000_BIF_CMPGE_4SI:
2012 case RS6000_BIF_CMPGE_U4SI:
2013 case RS6000_BIF_CMPGE_2DI:
2014 case RS6000_BIF_CMPGE_U2DI:
2015 case RS6000_BIF_CMPGE_1TI:
2016 case RS6000_BIF_CMPGE_U1TI:
2017 fold_compare_helper (gsi, GE_EXPR, stmt);
2018 return true;
2019
2020 case RS6000_BIF_VCMPGTSB:
2021 case RS6000_BIF_VCMPGTUB:
2022 case RS6000_BIF_VCMPGTSH:
2023 case RS6000_BIF_VCMPGTUH:
2024 case RS6000_BIF_VCMPGTSW:
2025 case RS6000_BIF_VCMPGTUW:
2026 case RS6000_BIF_VCMPGTUD:
2027 case RS6000_BIF_VCMPGTSD:
2028 case RS6000_BIF_VCMPGTUT:
2029 case RS6000_BIF_VCMPGTST:
2030 fold_compare_helper (gsi, GT_EXPR, stmt);
2031 return true;
2032
2033 /* flavors of vec_splat_[us]{8,16,32}. */
2034 case RS6000_BIF_VSPLTISB:
2035 case RS6000_BIF_VSPLTISH:
2036 case RS6000_BIF_VSPLTISW:
2037 {
2038 arg0 = gimple_call_arg (stmt, 0);
2039 lhs = gimple_call_lhs (stmt);
2040
2041 /* Only fold the vec_splat_*() if the lower bits of arg 0 is a
2042 5-bit signed constant in range -16 to +15. */
2043 if (TREE_CODE (arg0) != INTEGER_CST
2044 || !IN_RANGE (TREE_INT_CST_LOW (arg0), -16, 15))
2045 return false;
2046 gimple_seq stmts = NULL;
2047 location_t loc = gimple_location (stmt);
2048 tree splat_value = gimple_convert (&stmts, loc,
2049 TREE_TYPE (TREE_TYPE (lhs)), arg0);
2050 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
2051 tree splat_tree = build_vector_from_val (TREE_TYPE (lhs), splat_value);
2052 g = gimple_build_assign (lhs, splat_tree);
2053 gimple_set_location (g, gimple_location (stmt));
2054 gsi_replace (gsi, g, true);
2055 return true;
2056 }
2057
2058 /* Flavors of vec_splat. */
2059 /* a = vec_splat (b, 0x3) becomes a = { b[3],b[3],b[3],...}; */
2060 case RS6000_BIF_VSPLTB:
2061 case RS6000_BIF_VSPLTH:
2062 case RS6000_BIF_VSPLTW:
2063 case RS6000_BIF_XXSPLTD_V2DI:
2064 case RS6000_BIF_XXSPLTD_V2DF:
2065 {
2066 arg0 = gimple_call_arg (stmt, 0); /* input vector. */
2067 arg1 = gimple_call_arg (stmt, 1); /* index into arg0. */
2068 /* Only fold the vec_splat_*() if arg1 is both a constant value and
2069 is a valid index into the arg0 vector. */
2070 unsigned int n_elts = VECTOR_CST_NELTS (arg0);
2071 if (TREE_CODE (arg1) != INTEGER_CST
2072 || TREE_INT_CST_LOW (arg1) > (n_elts -1))
2073 return false;
2074 lhs = gimple_call_lhs (stmt);
2075 tree lhs_type = TREE_TYPE (lhs);
2076 tree arg0_type = TREE_TYPE (arg0);
2077 tree splat;
2078 if (TREE_CODE (arg0) == VECTOR_CST)
2079 splat = VECTOR_CST_ELT (arg0, TREE_INT_CST_LOW (arg1));
2080 else
2081 {
2082 /* Determine (in bits) the length and start location of the
2083 splat value for a call to the tree_vec_extract helper. */
2084 int splat_elem_size = TREE_INT_CST_LOW (size_in_bytes (arg0_type))
2085 * BITS_PER_UNIT / n_elts;
2086 int splat_start_bit = TREE_INT_CST_LOW (arg1) * splat_elem_size;
2087 tree len = build_int_cst (bitsizetype, splat_elem_size);
2088 tree start = build_int_cst (bitsizetype, splat_start_bit);
2089 splat = tree_vec_extract (gsi, TREE_TYPE (lhs_type), arg0,
2090 len, start);
2091 }
2092 /* And finally, build the new vector. */
2093 tree splat_tree = build_vector_from_val (lhs_type, splat);
2094 g = gimple_build_assign (lhs, splat_tree);
2095 gimple_set_location (g, gimple_location (stmt));
2096 gsi_replace (gsi, g, true);
2097 return true;
2098 }
2099
2100 /* vec_mergel (integrals). */
2101 case RS6000_BIF_VMRGLH:
2102 case RS6000_BIF_VMRGLW:
2103 case RS6000_BIF_VMRGLB:
2104 case RS6000_BIF_VEC_MERGEL_V2DI:
2105 case RS6000_BIF_VEC_MERGEL_V2DF:
2106 fold_mergehl_helper (gsi, stmt, 1);
2107 return true;
2108 /* vec_mergeh (integrals). */
2109 case RS6000_BIF_VMRGHH:
2110 case RS6000_BIF_VMRGHW:
2111 case RS6000_BIF_VMRGHB:
2112 case RS6000_BIF_VEC_MERGEH_V2DI:
2113 case RS6000_BIF_VEC_MERGEH_V2DF:
2114 fold_mergehl_helper (gsi, stmt, 0);
2115 return true;
2116
2117 /* Flavors of vec_mergee. */
2118 case RS6000_BIF_VMRGEW_V4SI:
2119 case RS6000_BIF_VMRGEW_V2DI:
2120 case RS6000_BIF_VMRGEW_V4SF:
2121 case RS6000_BIF_VMRGEW_V2DF:
2122 fold_mergeeo_helper (gsi, stmt, 0);
2123 return true;
2124 /* Flavors of vec_mergeo. */
2125 case RS6000_BIF_VMRGOW_V4SI:
2126 case RS6000_BIF_VMRGOW_V2DI:
2127 case RS6000_BIF_VMRGOW_V4SF:
2128 case RS6000_BIF_VMRGOW_V2DF:
2129 fold_mergeeo_helper (gsi, stmt, 1);
2130 return true;
2131
2132 /* d = vec_pack (a, b) */
2133 case RS6000_BIF_VPKUDUM:
2134 case RS6000_BIF_VPKUHUM:
2135 case RS6000_BIF_VPKUWUM:
2136 {
2137 arg0 = gimple_call_arg (stmt, 0);
2138 arg1 = gimple_call_arg (stmt, 1);
2139 lhs = gimple_call_lhs (stmt);
2140 gimple *g = gimple_build_assign (lhs, VEC_PACK_TRUNC_EXPR, arg0, arg1);
2141 gimple_set_location (g, gimple_location (stmt));
2142 gsi_replace (gsi, g, true);
2143 return true;
2144 }
2145
2146 /* d = vec_unpackh (a) */
2147 /* Note that the UNPACK_{HI,LO}_EXPR used in the gimple_build_assign call
2148 in this code is sensitive to endian-ness, and needs to be inverted to
2149 handle both LE and BE targets. */
2150 case RS6000_BIF_VUPKHSB:
2151 case RS6000_BIF_VUPKHSH:
2152 case RS6000_BIF_VUPKHSW:
2153 {
2154 arg0 = gimple_call_arg (stmt, 0);
2155 lhs = gimple_call_lhs (stmt);
2156 if (BYTES_BIG_ENDIAN)
2157 g = gimple_build_assign (lhs, VEC_UNPACK_HI_EXPR, arg0);
2158 else
2159 g = gimple_build_assign (lhs, VEC_UNPACK_LO_EXPR, arg0);
2160 gimple_set_location (g, gimple_location (stmt));
2161 gsi_replace (gsi, g, true);
2162 return true;
2163 }
2164 /* d = vec_unpackl (a) */
2165 case RS6000_BIF_VUPKLSB:
2166 case RS6000_BIF_VUPKLSH:
2167 case RS6000_BIF_VUPKLSW:
2168 {
2169 arg0 = gimple_call_arg (stmt, 0);
2170 lhs = gimple_call_lhs (stmt);
2171 if (BYTES_BIG_ENDIAN)
2172 g = gimple_build_assign (lhs, VEC_UNPACK_LO_EXPR, arg0);
2173 else
2174 g = gimple_build_assign (lhs, VEC_UNPACK_HI_EXPR, arg0);
2175 gimple_set_location (g, gimple_location (stmt));
2176 gsi_replace (gsi, g, true);
2177 return true;
2178 }
2179 /* There is no gimple type corresponding with pixel, so just return. */
2180 case RS6000_BIF_VUPKHPX:
2181 case RS6000_BIF_VUPKLPX:
2182 return false;
2183
2184 /* vec_perm. */
2185 case RS6000_BIF_VPERM_16QI:
2186 case RS6000_BIF_VPERM_8HI:
2187 case RS6000_BIF_VPERM_4SI:
2188 case RS6000_BIF_VPERM_2DI:
2189 case RS6000_BIF_VPERM_4SF:
2190 case RS6000_BIF_VPERM_2DF:
2191 case RS6000_BIF_VPERM_16QI_UNS:
2192 case RS6000_BIF_VPERM_8HI_UNS:
2193 case RS6000_BIF_VPERM_4SI_UNS:
2194 case RS6000_BIF_VPERM_2DI_UNS:
2195 {
2196 arg0 = gimple_call_arg (stmt, 0);
2197 arg1 = gimple_call_arg (stmt, 1);
2198 tree permute = gimple_call_arg (stmt, 2);
2199 lhs = gimple_call_lhs (stmt);
2200 location_t loc = gimple_location (stmt);
2201 gimple_seq stmts = NULL;
2202 // convert arg0 and arg1 to match the type of the permute
2203 // for the VEC_PERM_EXPR operation.
2204 tree permute_type = (TREE_TYPE (permute));
2205 tree arg0_ptype = gimple_build (&stmts, loc, VIEW_CONVERT_EXPR,
2206 permute_type, arg0);
2207 tree arg1_ptype = gimple_build (&stmts, loc, VIEW_CONVERT_EXPR,
2208 permute_type, arg1);
2209 tree lhs_ptype = gimple_build (&stmts, loc, VEC_PERM_EXPR,
2210 permute_type, arg0_ptype, arg1_ptype,
2211 permute);
2212 // Convert the result back to the desired lhs type upon completion.
2213 tree temp = gimple_build (&stmts, loc, VIEW_CONVERT_EXPR,
2214 TREE_TYPE (lhs), lhs_ptype);
2215 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
2216 g = gimple_build_assign (lhs, temp);
2217 gimple_set_location (g, loc);
2218 gsi_replace (gsi, g, true);
2219 return true;
2220 }
2221
2222 default:
2223 if (TARGET_DEBUG_BUILTIN)
2224 fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
2225 fn_code, fn_name1, fn_name2);
2226 break;
2227 }
2228
2229 return false;
2230 }
2231
2232 /* **** Expansion support **** */
2233
2234 static rtx
2235 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
2236 {
2237 rtx pat, scratch;
2238 tree cr6_form = CALL_EXPR_ARG (exp, 0);
2239 tree arg0 = CALL_EXPR_ARG (exp, 1);
2240 tree arg1 = CALL_EXPR_ARG (exp, 2);
2241 rtx op0 = expand_normal (arg0);
2242 rtx op1 = expand_normal (arg1);
2243 machine_mode tmode = SImode;
2244 machine_mode mode0 = insn_data[icode].operand[1].mode;
2245 machine_mode mode1 = insn_data[icode].operand[2].mode;
2246 int cr6_form_int;
2247
2248 if (TREE_CODE (cr6_form) != INTEGER_CST)
2249 {
2250 error ("argument 1 of %qs must be a constant",
2251 "__builtin_altivec_predicate");
2252 return const0_rtx;
2253 }
2254 else
2255 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
2256
2257 gcc_assert (mode0 == mode1);
2258
2259 /* If we have invalid arguments, bail out before generating bad rtl. */
2260 if (arg0 == error_mark_node || arg1 == error_mark_node)
2261 return const0_rtx;
2262
2263 if (target == 0
2264 || GET_MODE (target) != tmode
2265 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
2266 target = gen_reg_rtx (tmode);
2267
2268 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
2269 op0 = copy_to_mode_reg (mode0, op0);
2270 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
2271 op1 = copy_to_mode_reg (mode1, op1);
2272
2273 /* Note that for many of the relevant operations (e.g. cmpne or
2274 cmpeq) with float or double operands, it makes more sense for the
2275 mode of the allocated scratch register to select a vector of
2276 integer. But the choice to copy the mode of operand 0 was made
2277 long ago and there are no plans to change it. */
2278 scratch = gen_reg_rtx (mode0);
2279
2280 pat = GEN_FCN (icode) (scratch, op0, op1);
2281 if (! pat)
2282 return 0;
2283 emit_insn (pat);
2284
2285 /* The vec_any* and vec_all* predicates use the same opcodes for two
2286 different operations, but the bits in CR6 will be different
2287 depending on what information we want. So we have to play tricks
2288 with CR6 to get the right bits out.
2289
2290 If you think this is disgusting, look at the specs for the
2291 AltiVec predicates. */
2292
2293 switch (cr6_form_int)
2294 {
2295 case 0:
2296 emit_insn (gen_cr6_test_for_zero (target));
2297 break;
2298 case 1:
2299 emit_insn (gen_cr6_test_for_zero_reverse (target));
2300 break;
2301 case 2:
2302 emit_insn (gen_cr6_test_for_lt (target));
2303 break;
2304 case 3:
2305 emit_insn (gen_cr6_test_for_lt_reverse (target));
2306 break;
2307 default:
2308 error ("argument 1 of %qs is out of range",
2309 "__builtin_altivec_predicate");
2310 break;
2311 }
2312
2313 return target;
2314 }
2315
2316 /* Expand vec_ext builtin. */
2317 static rtx
2318 altivec_expand_vec_ext_builtin (tree exp, rtx target)
2319 {
2320 machine_mode tmode, mode0;
2321 tree arg0, arg1;
2322 rtx op0;
2323 rtx op1;
2324
2325 arg0 = CALL_EXPR_ARG (exp, 0);
2326 arg1 = CALL_EXPR_ARG (exp, 1);
2327
2328 op0 = expand_normal (arg0);
2329 op1 = expand_normal (arg1);
2330
2331 if (TREE_CODE (arg1) == INTEGER_CST)
2332 {
2333 unsigned HOST_WIDE_INT elt;
2334 unsigned HOST_WIDE_INT size = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
2335 unsigned int truncated_selector;
2336 /* Even if !tree_fits_uhwi_p (arg1)), TREE_INT_CST_LOW (arg0)
2337 returns low-order bits of INTEGER_CST for modulo indexing. */
2338 elt = TREE_INT_CST_LOW (arg1);
2339 truncated_selector = elt % size;
2340 op1 = GEN_INT (truncated_selector);
2341 }
2342
2343 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
2344 mode0 = TYPE_MODE (TREE_TYPE (arg0));
2345 gcc_assert (VECTOR_MODE_P (mode0));
2346
2347 op0 = force_reg (mode0, op0);
2348
2349 if (optimize || !target || !register_operand (target, tmode))
2350 target = gen_reg_rtx (tmode);
2351
2352 rs6000_expand_vector_extract (target, op0, op1);
2353
2354 return target;
2355 }
2356
2357 /* Expand ALTIVEC_BUILTIN_MASK_FOR_LOAD. */
2358 rtx
2359 rs6000_expand_ldst_mask (rtx target, tree arg0)
2360 {
2361 int icode2 = BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
2362 : (int) CODE_FOR_altivec_lvsl_direct;
2363 machine_mode tmode = insn_data[icode2].operand[0].mode;
2364 machine_mode mode = insn_data[icode2].operand[1].mode;
2365
2366 gcc_assert (TARGET_ALTIVEC);
2367
2368 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg0)));
2369 rtx op = expand_expr (arg0, NULL_RTX, Pmode, EXPAND_NORMAL);
2370 rtx addr = memory_address (mode, op);
2371 /* We need to negate the address. */
2372 op = gen_reg_rtx (GET_MODE (addr));
2373 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
2374 op = gen_rtx_MEM (mode, op);
2375
2376 if (target == 0
2377 || GET_MODE (target) != tmode
2378 || !insn_data[icode2].operand[0].predicate (target, tmode))
2379 target = gen_reg_rtx (tmode);
2380
2381 rtx pat = GEN_FCN (icode2) (target, op);
2382 if (!pat)
2383 return 0;
2384 emit_insn (pat);
2385
2386 return target;
2387 }
2388
2389 /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */
2390 static const struct
2391 {
2392 const char *cpu;
2393 unsigned int cpuid;
2394 } cpu_is_info[] = {
2395 { "power11", PPC_PLATFORM_POWER11 },
2396 { "power10", PPC_PLATFORM_POWER10 },
2397 { "power9", PPC_PLATFORM_POWER9 },
2398 { "power8", PPC_PLATFORM_POWER8 },
2399 { "power7", PPC_PLATFORM_POWER7 },
2400 { "power6x", PPC_PLATFORM_POWER6X },
2401 { "power6", PPC_PLATFORM_POWER6 },
2402 { "power5+", PPC_PLATFORM_POWER5_PLUS },
2403 { "power5", PPC_PLATFORM_POWER5 },
2404 { "ppc970", PPC_PLATFORM_PPC970 },
2405 { "power4", PPC_PLATFORM_POWER4 },
2406 { "ppca2", PPC_PLATFORM_PPCA2 },
2407 { "ppc476", PPC_PLATFORM_PPC476 },
2408 { "ppc464", PPC_PLATFORM_PPC464 },
2409 { "ppc440", PPC_PLATFORM_PPC440 },
2410 { "ppc405", PPC_PLATFORM_PPC405 },
2411 { "ppc-cell-be", PPC_PLATFORM_CELL_BE }
2412 };
2413
2414 /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */
2415 static const struct
2416 {
2417 const char *hwcap;
2418 int mask;
2419 unsigned int id;
2420 } cpu_supports_info[] = {
2421 /* AT_HWCAP masks. */
2422 { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 },
2423 { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 },
2424 { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 },
2425 { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 },
2426 { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 },
2427 { "booke", PPC_FEATURE_BOOKE, 0 },
2428 { "cellbe", PPC_FEATURE_CELL_BE, 0 },
2429 { "dfp", PPC_FEATURE_HAS_DFP, 0 },
2430 { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 },
2431 { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 },
2432 { "fpu", PPC_FEATURE_HAS_FPU, 0 },
2433 { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 },
2434 { "mmu", PPC_FEATURE_HAS_MMU, 0 },
2435 { "notb", PPC_FEATURE_NO_TB, 0 },
2436 { "pa6t", PPC_FEATURE_PA6T, 0 },
2437 { "power4", PPC_FEATURE_POWER4, 0 },
2438 { "power5", PPC_FEATURE_POWER5, 0 },
2439 { "power5+", PPC_FEATURE_POWER5_PLUS, 0 },
2440 { "power6x", PPC_FEATURE_POWER6_EXT, 0 },
2441 { "ppc32", PPC_FEATURE_32, 0 },
2442 { "ppc601", PPC_FEATURE_601_INSTR, 0 },
2443 { "ppc64", PPC_FEATURE_64, 0 },
2444 { "ppcle", PPC_FEATURE_PPC_LE, 0 },
2445 { "smt", PPC_FEATURE_SMT, 0 },
2446 { "spe", PPC_FEATURE_HAS_SPE, 0 },
2447 { "true_le", PPC_FEATURE_TRUE_LE, 0 },
2448 { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 },
2449 { "vsx", PPC_FEATURE_HAS_VSX, 0 },
2450
2451 /* AT_HWCAP2 masks. */
2452 { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 },
2453 { "dscr", PPC_FEATURE2_HAS_DSCR, 1 },
2454 { "ebb", PPC_FEATURE2_HAS_EBB, 1 },
2455 { "htm", PPC_FEATURE2_HAS_HTM, 1 },
2456 { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 },
2457 { "htm-no-suspend", PPC_FEATURE2_HTM_NO_SUSPEND, 1 },
2458 { "isel", PPC_FEATURE2_HAS_ISEL, 1 },
2459 { "tar", PPC_FEATURE2_HAS_TAR, 1 },
2460 { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 },
2461 { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 },
2462 { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 },
2463 { "darn", PPC_FEATURE2_DARN, 1 },
2464 { "scv", PPC_FEATURE2_SCV, 1 },
2465 { "arch_3_1", PPC_FEATURE2_ARCH_3_1, 1 },
2466 { "mma", PPC_FEATURE2_MMA, 1 },
2467 };
2468
2469 /* Expand the CPU builtin in FCODE and store the result in TARGET. */
2470 static rtx
2471 cpu_expand_builtin (enum rs6000_gen_builtins fcode,
2472 tree exp ATTRIBUTE_UNUSED, rtx target)
2473 {
2474 /* __builtin_cpu_init () is a nop, so expand to nothing. */
2475 if (fcode == RS6000_BIF_CPU_INIT)
2476 return const0_rtx;
2477
2478 if (target == 0 || GET_MODE (target) != SImode)
2479 target = gen_reg_rtx (SImode);
2480
2481 /* TODO: Factor the #ifdef'd code into a separate function. */
2482 #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
2483 tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0);
2484 /* Target clones creates an ARRAY_REF instead of STRING_CST, convert it back
2485 to a STRING_CST. */
2486 if (TREE_CODE (arg) == ARRAY_REF
2487 && TREE_CODE (TREE_OPERAND (arg, 0)) == STRING_CST
2488 && TREE_CODE (TREE_OPERAND (arg, 1)) == INTEGER_CST
2489 && compare_tree_int (TREE_OPERAND (arg, 1), 0) == 0)
2490 arg = TREE_OPERAND (arg, 0);
2491
2492 if (TREE_CODE (arg) != STRING_CST)
2493 {
2494 error ("builtin %qs only accepts a string argument",
2495 rs6000_builtin_info[(size_t) fcode].bifname);
2496 return const0_rtx;
2497 }
2498
2499 if (fcode == RS6000_BIF_CPU_IS)
2500 {
2501 const char *cpu = TREE_STRING_POINTER (arg);
2502 rtx cpuid = NULL_RTX;
2503 for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++)
2504 if (strcmp (cpu, cpu_is_info[i].cpu) == 0)
2505 {
2506 /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */
2507 cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM);
2508 break;
2509 }
2510 if (cpuid == NULL_RTX)
2511 {
2512 /* Invalid CPU argument. */
2513 error ("cpu %qs is an invalid argument to builtin %qs",
2514 cpu, rs6000_builtin_info[(size_t) fcode].bifname);
2515 return const0_rtx;
2516 }
2517
2518 rtx platform = gen_reg_rtx (SImode);
2519 rtx address = gen_rtx_PLUS (Pmode,
2520 gen_rtx_REG (Pmode, TLS_REGNUM),
2521 GEN_INT (TCB_PLATFORM_OFFSET));
2522 rtx tcbmem = gen_const_mem (SImode, address);
2523 emit_move_insn (platform, tcbmem);
2524 emit_insn (gen_eqsi3 (target, platform, cpuid));
2525 }
2526 else if (fcode == RS6000_BIF_CPU_SUPPORTS)
2527 {
2528 const char *hwcap = TREE_STRING_POINTER (arg);
2529 rtx mask = NULL_RTX;
2530 int hwcap_offset;
2531 for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++)
2532 if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0)
2533 {
2534 mask = GEN_INT (cpu_supports_info[i].mask);
2535 hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id);
2536 break;
2537 }
2538 if (mask == NULL_RTX)
2539 {
2540 /* Invalid HWCAP argument. */
2541 error ("%s %qs is an invalid argument to builtin %qs",
2542 "hwcap", hwcap,
2543 rs6000_builtin_info[(size_t) fcode].bifname);
2544 return const0_rtx;
2545 }
2546
2547 rtx tcb_hwcap = gen_reg_rtx (SImode);
2548 rtx address = gen_rtx_PLUS (Pmode,
2549 gen_rtx_REG (Pmode, TLS_REGNUM),
2550 GEN_INT (hwcap_offset));
2551 rtx tcbmem = gen_const_mem (SImode, address);
2552 emit_move_insn (tcb_hwcap, tcbmem);
2553 rtx scratch1 = gen_reg_rtx (SImode);
2554 emit_insn (gen_rtx_SET (scratch1,
2555 gen_rtx_AND (SImode, tcb_hwcap, mask)));
2556 rtx scratch2 = gen_reg_rtx (SImode);
2557 emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx));
2558 emit_insn (gen_rtx_SET (target,
2559 gen_rtx_XOR (SImode, scratch2, const1_rtx)));
2560 }
2561 else
2562 gcc_unreachable ();
2563
2564 /* Record that we have expanded a CPU builtin, so that we can later
2565 emit a reference to the special symbol exported by LIBC to ensure we
2566 do not link against an old LIBC that doesn't support this feature. */
2567 cpu_builtin_p = true;
2568
2569 #else
2570 warning (0, "builtin %qs needs GLIBC (2.23 and newer) that exports hardware "
2571 "capability bits", rs6000_builtin_info[(size_t) fcode].bifname);
2572
2573 /* For old LIBCs, always return FALSE. */
2574 emit_move_insn (target, GEN_INT (0));
2575 #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */
2576
2577 return target;
2578 }
2579
2580 /* For the element-reversing load/store built-ins, produce the correct
2581 insn_code depending on the target endianness. */
2582 static insn_code
2583 elemrev_icode (rs6000_gen_builtins fcode)
2584 {
2585 switch (fcode)
2586 {
2587 case RS6000_BIF_ST_ELEMREV_V1TI:
2588 return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v1ti
2589 : CODE_FOR_vsx_st_elemrev_v1ti;
2590
2591 case RS6000_BIF_ST_ELEMREV_V2DF:
2592 return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df
2593 : CODE_FOR_vsx_st_elemrev_v2df;
2594
2595 case RS6000_BIF_ST_ELEMREV_V2DI:
2596 return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di
2597 : CODE_FOR_vsx_st_elemrev_v2di;
2598
2599 case RS6000_BIF_ST_ELEMREV_V4SF:
2600 return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf
2601 : CODE_FOR_vsx_st_elemrev_v4sf;
2602
2603 case RS6000_BIF_ST_ELEMREV_V4SI:
2604 return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si
2605 : CODE_FOR_vsx_st_elemrev_v4si;
2606
2607 case RS6000_BIF_ST_ELEMREV_V8HI:
2608 return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi
2609 : CODE_FOR_vsx_st_elemrev_v8hi;
2610
2611 case RS6000_BIF_ST_ELEMREV_V16QI:
2612 return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi
2613 : CODE_FOR_vsx_st_elemrev_v16qi;
2614
2615 case RS6000_BIF_LD_ELEMREV_V2DF:
2616 return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df
2617 : CODE_FOR_vsx_ld_elemrev_v2df;
2618
2619 case RS6000_BIF_LD_ELEMREV_V1TI:
2620 return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v1ti
2621 : CODE_FOR_vsx_ld_elemrev_v1ti;
2622
2623 case RS6000_BIF_LD_ELEMREV_V2DI:
2624 return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di
2625 : CODE_FOR_vsx_ld_elemrev_v2di;
2626
2627 case RS6000_BIF_LD_ELEMREV_V4SF:
2628 return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf
2629 : CODE_FOR_vsx_ld_elemrev_v4sf;
2630
2631 case RS6000_BIF_LD_ELEMREV_V4SI:
2632 return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si
2633 : CODE_FOR_vsx_ld_elemrev_v4si;
2634
2635 case RS6000_BIF_LD_ELEMREV_V8HI:
2636 return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi
2637 : CODE_FOR_vsx_ld_elemrev_v8hi;
2638
2639 case RS6000_BIF_LD_ELEMREV_V16QI:
2640 return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi
2641 : CODE_FOR_vsx_ld_elemrev_v16qi;
2642 default:
2643 ;
2644 }
2645
2646 gcc_unreachable ();
2647 }
2648
2649 /* Expand an AltiVec vector load builtin, and return the expanded rtx. */
2650 static rtx
2651 ldv_expand_builtin (rtx target, insn_code icode, rtx *op, machine_mode tmode)
2652 {
2653 if (target == 0
2654 || GET_MODE (target) != tmode
2655 || !insn_data[icode].operand[0].predicate (target, tmode))
2656 target = gen_reg_rtx (tmode);
2657
2658 op[1] = copy_to_mode_reg (Pmode, op[1]);
2659
2660 /* These CELL built-ins use BLKmode instead of tmode for historical
2661 (i.e., unknown) reasons. TODO: Is this necessary? */
2662 bool blk = (icode == CODE_FOR_altivec_lvlx
2663 || icode == CODE_FOR_altivec_lvlxl
2664 || icode == CODE_FOR_altivec_lvrx
2665 || icode == CODE_FOR_altivec_lvrxl);
2666
2667 /* For LVX, express the RTL accurately by ANDing the address with -16.
2668 LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
2669 so the raw address is fine. */
2670 /* TODO: That statement seems wrong, as the UNSPECs don't surround the
2671 memory expression, so a latent bug may lie here. The &-16 is likely
2672 needed for all VMX-style loads. */
2673 if (icode == CODE_FOR_altivec_lvx_v1ti
2674 || icode == CODE_FOR_altivec_lvx_v2df
2675 || icode == CODE_FOR_altivec_lvx_v2di
2676 || icode == CODE_FOR_altivec_lvx_v4sf
2677 || icode == CODE_FOR_altivec_lvx_v4si
2678 || icode == CODE_FOR_altivec_lvx_v8hi
2679 || icode == CODE_FOR_altivec_lvx_v16qi)
2680 {
2681 rtx rawaddr;
2682 if (op[0] == const0_rtx)
2683 rawaddr = op[1];
2684 else
2685 {
2686 op[0] = copy_to_mode_reg (Pmode, op[0]);
2687 rawaddr = gen_rtx_PLUS (Pmode, op[1], op[0]);
2688 }
2689 rtx addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
2690 addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr);
2691
2692 emit_insn (gen_rtx_SET (target, addr));
2693 }
2694 else
2695 {
2696 rtx addr;
2697 if (op[0] == const0_rtx)
2698 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op[1]);
2699 else
2700 {
2701 op[0] = copy_to_mode_reg (Pmode, op[0]);
2702 addr = gen_rtx_MEM (blk ? BLKmode : tmode,
2703 gen_rtx_PLUS (Pmode, op[1], op[0]));
2704 }
2705
2706 rtx pat = GEN_FCN (icode) (target, addr);
2707 if (!pat)
2708 return 0;
2709 emit_insn (pat);
2710 }
2711
2712 return target;
2713 }
2714
2715 /* Expand a builtin function that loads a scalar into a vector register
2716 with sign extension, and return the expanded rtx. */
2717 static rtx
2718 lxvrse_expand_builtin (rtx target, insn_code icode, rtx *op,
2719 machine_mode tmode, machine_mode smode)
2720 {
2721 rtx pat, addr;
2722 op[1] = copy_to_mode_reg (Pmode, op[1]);
2723
2724 if (op[0] == const0_rtx)
2725 addr = gen_rtx_MEM (tmode, op[1]);
2726 else
2727 {
2728 op[0] = copy_to_mode_reg (Pmode, op[0]);
2729 addr = gen_rtx_MEM (smode,
2730 gen_rtx_PLUS (Pmode, op[1], op[0]));
2731 }
2732
2733 rtx discratch = gen_reg_rtx (V2DImode);
2734 rtx tiscratch = gen_reg_rtx (TImode);
2735
2736 /* Emit the lxvr*x insn. */
2737 pat = GEN_FCN (icode) (tiscratch, addr);
2738 if (!pat)
2739 return 0;
2740 emit_insn (pat);
2741
2742 /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI. */
2743 rtx temp1;
2744 if (icode == CODE_FOR_vsx_lxvrbx)
2745 {
2746 temp1 = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);
2747 emit_insn (gen_vsx_sign_extend_v16qi_v2di (discratch, temp1));
2748 }
2749 else if (icode == CODE_FOR_vsx_lxvrhx)
2750 {
2751 temp1 = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);
2752 emit_insn (gen_vsx_sign_extend_v8hi_v2di (discratch, temp1));
2753 }
2754 else if (icode == CODE_FOR_vsx_lxvrwx)
2755 {
2756 temp1 = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);
2757 emit_insn (gen_vsx_sign_extend_v4si_v2di (discratch, temp1));
2758 }
2759 else if (icode == CODE_FOR_vsx_lxvrdx)
2760 discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);
2761 else
2762 gcc_unreachable ();
2763
2764 /* Emit the sign extension from V2DI (double) to TI (quad). */
2765 rtx temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0);
2766 emit_insn (gen_extendditi2_vector (target, temp2));
2767
2768 return target;
2769 }
2770
2771 /* Expand a builtin function that loads a scalar into a vector register
2772 with zero extension, and return the expanded rtx. */
2773 static rtx
2774 lxvrze_expand_builtin (rtx target, insn_code icode, rtx *op,
2775 machine_mode tmode, machine_mode smode)
2776 {
2777 rtx pat, addr;
2778 op[1] = copy_to_mode_reg (Pmode, op[1]);
2779
2780 if (op[0] == const0_rtx)
2781 addr = gen_rtx_MEM (tmode, op[1]);
2782 else
2783 {
2784 op[0] = copy_to_mode_reg (Pmode, op[0]);
2785 addr = gen_rtx_MEM (smode,
2786 gen_rtx_PLUS (Pmode, op[1], op[0]));
2787 }
2788
2789 pat = GEN_FCN (icode) (target, addr);
2790 if (!pat)
2791 return 0;
2792 emit_insn (pat);
2793 return target;
2794 }
2795
2796 /* Expand an AltiVec vector store builtin, and return the expanded rtx. */
2797 static rtx
2798 stv_expand_builtin (insn_code icode, rtx *op,
2799 machine_mode tmode, machine_mode smode)
2800 {
2801 op[2] = copy_to_mode_reg (Pmode, op[2]);
2802
2803 /* For STVX, express the RTL accurately by ANDing the address with -16.
2804 STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
2805 so the raw address is fine. */
2806 /* TODO: That statement seems wrong, as the UNSPECs don't surround the
2807 memory expression, so a latent bug may lie here. The &-16 is likely
2808 needed for all VMX-style stores. */
2809 if (icode == CODE_FOR_altivec_stvx_v2df
2810 || icode == CODE_FOR_altivec_stvx_v2di
2811 || icode == CODE_FOR_altivec_stvx_v4sf
2812 || icode == CODE_FOR_altivec_stvx_v4si
2813 || icode == CODE_FOR_altivec_stvx_v8hi
2814 || icode == CODE_FOR_altivec_stvx_v16qi)
2815 {
2816 rtx rawaddr;
2817 if (op[1] == const0_rtx)
2818 rawaddr = op[2];
2819 else
2820 {
2821 op[1] = copy_to_mode_reg (Pmode, op[1]);
2822 rawaddr = gen_rtx_PLUS (Pmode, op[2], op[1]);
2823 }
2824
2825 rtx addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
2826 addr = gen_rtx_MEM (tmode, addr);
2827 op[0] = copy_to_mode_reg (tmode, op[0]);
2828 emit_insn (gen_rtx_SET (addr, op[0]));
2829 }
2830 else if (icode == CODE_FOR_vsx_stxvrbx
2831 || icode == CODE_FOR_vsx_stxvrhx
2832 || icode == CODE_FOR_vsx_stxvrwx
2833 || icode == CODE_FOR_vsx_stxvrdx)
2834 {
2835 rtx truncrtx = gen_rtx_TRUNCATE (tmode, op[0]);
2836 op[0] = copy_to_mode_reg (E_TImode, truncrtx);
2837
2838 rtx addr;
2839 if (op[1] == const0_rtx)
2840 addr = gen_rtx_MEM (tmode, op[2]);
2841 else
2842 {
2843 op[1] = copy_to_mode_reg (Pmode, op[1]);
2844 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op[2], op[1]));
2845 }
2846 rtx pat = GEN_FCN (icode) (addr, op[0]);
2847 if (pat)
2848 emit_insn (pat);
2849 }
2850 else
2851 {
2852 if (!insn_data[icode].operand[1].predicate (op[0], smode))
2853 op[0] = copy_to_mode_reg (smode, op[0]);
2854
2855 rtx addr;
2856 if (op[1] == const0_rtx)
2857 addr = gen_rtx_MEM (tmode, op[2]);
2858 else
2859 {
2860 op[1] = copy_to_mode_reg (Pmode, op[1]);
2861 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op[2], op[1]));
2862 }
2863
2864 rtx pat = GEN_FCN (icode) (addr, op[0]);
2865 if (pat)
2866 emit_insn (pat);
2867 }
2868
2869 return NULL_RTX;
2870 }
2871
2872 /* Expand the MMA built-in in EXP, and return it. */
2873 static rtx
2874 mma_expand_builtin (tree exp, rtx target, insn_code icode,
2875 rs6000_gen_builtins fcode)
2876 {
2877 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
2878 bool void_func = TREE_TYPE (TREE_TYPE (fndecl)) == void_type_node;
2879 machine_mode tmode = VOIDmode;
2880 rtx op[MAX_MMA_OPERANDS];
2881 unsigned nopnds = 0;
2882
2883 if (!void_func)
2884 {
2885 tmode = insn_data[icode].operand[0].mode;
2886 if (!(target
2887 && GET_MODE (target) == tmode
2888 && insn_data[icode].operand[0].predicate (target, tmode)))
2889 target = gen_reg_rtx (tmode);
2890 op[nopnds++] = target;
2891 }
2892 else
2893 target = const0_rtx;
2894
2895 call_expr_arg_iterator iter;
2896 tree arg;
2897 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
2898 {
2899 if (arg == error_mark_node)
2900 return const0_rtx;
2901
2902 rtx opnd;
2903 const struct insn_operand_data *insn_op;
2904 insn_op = &insn_data[icode].operand[nopnds];
2905 if (TREE_CODE (arg) == ADDR_EXPR
2906 && MEM_P (DECL_RTL (TREE_OPERAND (arg, 0))))
2907 opnd = DECL_RTL (TREE_OPERAND (arg, 0));
2908 else
2909 opnd = expand_normal (arg);
2910
2911 if (!insn_op->predicate (opnd, insn_op->mode))
2912 {
2913 /* TODO: This use of constraints needs explanation. */
2914 if (!strcmp (insn_op->constraint, "n"))
2915 {
2916 if (!CONST_INT_P (opnd))
2917 error ("argument %d must be an unsigned literal", nopnds);
2918 else
2919 error ("argument %d is an unsigned literal that is "
2920 "out of range", nopnds);
2921 return const0_rtx;
2922 }
2923 opnd = copy_to_mode_reg (insn_op->mode, opnd);
2924 }
2925
2926 /* Some MMA instructions have INOUT accumulator operands, so force
2927 their target register to be the same as their input register. */
2928 if (!void_func
2929 && nopnds == 1
2930 && !strcmp (insn_op->constraint, "0")
2931 && insn_op->mode == tmode
2932 && REG_P (opnd)
2933 && insn_data[icode].operand[0].predicate (opnd, tmode))
2934 target = op[0] = opnd;
2935
2936 op[nopnds++] = opnd;
2937 }
2938
2939 rtx pat;
2940 switch (nopnds)
2941 {
2942 case 1:
2943 pat = GEN_FCN (icode) (op[0]);
2944 break;
2945 case 2:
2946 pat = GEN_FCN (icode) (op[0], op[1]);
2947 break;
2948 case 3:
2949 /* The ASSEMBLE builtin source operands are reversed in little-endian
2950 mode, so reorder them. */
2951 if (fcode == RS6000_BIF_ASSEMBLE_PAIR_V_INTERNAL && !WORDS_BIG_ENDIAN)
2952 std::swap (op[1], op[2]);
2953 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
2954 break;
2955 case 4:
2956 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
2957 break;
2958 case 5:
2959 /* The ASSEMBLE builtin source operands are reversed in little-endian
2960 mode, so reorder them. */
2961 if (fcode == RS6000_BIF_ASSEMBLE_ACC_INTERNAL && !WORDS_BIG_ENDIAN)
2962 {
2963 std::swap (op[1], op[4]);
2964 std::swap (op[2], op[3]);
2965 }
2966 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
2967 break;
2968 case 6:
2969 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
2970 break;
2971 case 7:
2972 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5], op[6]);
2973 break;
2974 default:
2975 gcc_unreachable ();
2976 }
2977
2978 if (!pat)
2979 return NULL_RTX;
2980
2981 emit_insn (pat);
2982 return target;
2983 }
2984
2985 /* Return the correct ICODE value depending on whether we are
2986 setting or reading the HTM SPRs. */
2987 static inline enum insn_code
2988 rs6000_htm_spr_icode (bool nonvoid)
2989 {
2990 if (nonvoid)
2991 return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
2992 else
2993 return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
2994 }
2995
2996 /* Return the appropriate SPR number associated with the given builtin. */
2997 static inline HOST_WIDE_INT
2998 htm_spr_num (enum rs6000_gen_builtins code)
2999 {
3000 if (code == RS6000_BIF_GET_TFHAR
3001 || code == RS6000_BIF_SET_TFHAR)
3002 return TFHAR_SPR;
3003 else if (code == RS6000_BIF_GET_TFIAR
3004 || code == RS6000_BIF_SET_TFIAR)
3005 return TFIAR_SPR;
3006 else if (code == RS6000_BIF_GET_TEXASR
3007 || code == RS6000_BIF_SET_TEXASR)
3008 return TEXASR_SPR;
3009 gcc_assert (code == RS6000_BIF_GET_TEXASRU
3010 || code == RS6000_BIF_SET_TEXASRU);
3011 return TEXASRU_SPR;
3012 }
3013
3014 /* Expand the HTM builtin in EXP and store the result in TARGET.
3015 Return the expanded rtx. */
3016 static rtx
3017 htm_expand_builtin (bifdata *bifaddr, rs6000_gen_builtins fcode,
3018 tree exp, rtx target)
3019 {
3020 if (!TARGET_POWERPC64
3021 && (fcode == RS6000_BIF_TABORTDC
3022 || fcode == RS6000_BIF_TABORTDCI))
3023 {
3024 error ("builtin %qs is only valid in 64-bit mode", bifaddr->bifname);
3025 return const0_rtx;
3026 }
3027
3028 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
3029 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
3030 bool uses_spr = bif_is_htmspr (*bifaddr);
3031 insn_code icode = bifaddr->icode;
3032
3033 if (uses_spr)
3034 icode = rs6000_htm_spr_icode (nonvoid);
3035
3036 rtx op[MAX_HTM_OPERANDS];
3037 int nopnds = 0;
3038 const insn_operand_data *insn_op = &insn_data[icode].operand[0];
3039
3040 if (nonvoid)
3041 {
3042 machine_mode tmode = (uses_spr) ? insn_op->mode : E_SImode;
3043 if (!target
3044 || GET_MODE (target) != tmode
3045 || (uses_spr && !insn_op->predicate (target, tmode)))
3046 target = gen_reg_rtx (tmode);
3047 if (uses_spr)
3048 op[nopnds++] = target;
3049 }
3050
3051 tree arg;
3052 call_expr_arg_iterator iter;
3053
3054 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
3055 {
3056 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
3057 return const0_rtx;
3058
3059 insn_op = &insn_data[icode].operand[nopnds];
3060 op[nopnds] = expand_normal (arg);
3061
3062 if (!insn_op->predicate (op[nopnds], insn_op->mode))
3063 {
3064 /* TODO: This use of constraints could use explanation.
3065 This happens a couple of places, perhaps make that a
3066 function to document what's happening. */
3067 if (!strcmp (insn_op->constraint, "n"))
3068 {
3069 int arg_num = nonvoid ? nopnds : nopnds + 1;
3070 if (!CONST_INT_P (op[nopnds]))
3071 error ("argument %d must be an unsigned literal", arg_num);
3072 else
3073 error ("argument %d is an unsigned literal that is "
3074 "out of range", arg_num);
3075 return const0_rtx;
3076 }
3077 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
3078 }
3079
3080 nopnds++;
3081 }
3082
3083 /* Handle the builtins for extended mnemonics. These accept
3084 no arguments, but map to builtins that take arguments. */
3085 switch (fcode)
3086 {
3087 case RS6000_BIF_TENDALL: /* Alias for: tend. 1 */
3088 case RS6000_BIF_TRESUME: /* Alias for: tsr. 1 */
3089 op[nopnds++] = GEN_INT (1);
3090 break;
3091 case RS6000_BIF_TSUSPEND: /* Alias for: tsr. 0 */
3092 op[nopnds++] = GEN_INT (0);
3093 break;
3094 default:
3095 break;
3096 }
3097
3098 /* If this builtin accesses SPRs, then pass in the appropriate
3099 SPR number and SPR regno as the last two operands. */
3100 rtx cr = NULL_RTX;
3101 if (uses_spr)
3102 {
3103 machine_mode mode = TARGET_POWERPC64 ? DImode : SImode;
3104 op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode));
3105 }
3106 /* If this builtin accesses a CR field, then pass in a scratch
3107 CR field as the last operand. */
3108 else if (bif_is_htmcr (*bifaddr))
3109 {
3110 cr = gen_reg_rtx (CCmode);
3111 op[nopnds++] = cr;
3112 }
3113
3114 rtx pat;
3115 switch (nopnds)
3116 {
3117 case 1:
3118 pat = GEN_FCN (icode) (op[0]);
3119 break;
3120 case 2:
3121 pat = GEN_FCN (icode) (op[0], op[1]);
3122 break;
3123 case 3:
3124 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
3125 break;
3126 case 4:
3127 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
3128 break;
3129 default:
3130 gcc_unreachable ();
3131 }
3132 if (!pat)
3133 return NULL_RTX;
3134 emit_insn (pat);
3135
3136 if (bif_is_htmcr (*bifaddr))
3137 {
3138 if (fcode == RS6000_BIF_TBEGIN)
3139 {
3140 /* Emit code to set TARGET to true or false depending on
3141 whether the tbegin. instruction succeeded or failed
3142 to start a transaction. We do this by placing the 1's
3143 complement of CR's EQ bit into TARGET. */
3144 rtx scratch = gen_reg_rtx (SImode);
3145 emit_insn (gen_rtx_SET (scratch,
3146 gen_rtx_EQ (SImode, cr,
3147 const0_rtx)));
3148 emit_insn (gen_rtx_SET (target,
3149 gen_rtx_XOR (SImode, scratch,
3150 GEN_INT (1))));
3151 }
3152 else
3153 {
3154 /* Emit code to copy the 4-bit condition register field
3155 CR into the least significant end of register TARGET. */
3156 rtx scratch1 = gen_reg_rtx (SImode);
3157 rtx scratch2 = gen_reg_rtx (SImode);
3158 rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0);
3159 emit_insn (gen_movcc (subreg, cr));
3160 emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28)));
3161 emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf)));
3162 }
3163 }
3164
3165 if (nonvoid)
3166 return target;
3167 return const0_rtx;
3168 }
3169
3170 /* Expand an expression EXP that calls a built-in function,
3171 with result going to TARGET if that's convenient
3172 (and in mode MODE if that's convenient).
3173 SUBTARGET may be used as the target for computing one of EXP's operands.
3174 IGNORE is nonzero if the value is to be ignored.
3175 Use the new builtin infrastructure. */
3176 rtx
3177 rs6000_expand_builtin (tree exp, rtx target, rtx /* subtarget */,
3178 machine_mode /* mode */, int ignore)
3179 {
3180 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
3181 enum rs6000_gen_builtins fcode
3182 = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl);
3183
3184 /* Emit error message if it's an unresolved overloaded builtin. */
3185 if (fcode > RS6000_OVLD_NONE)
3186 {
3187 error ("unresolved overload for builtin %qF", fndecl);
3188 return const0_rtx;
3189 }
3190
3191 size_t uns_fcode = (size_t)fcode;
3192 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
3193
3194 /* TODO: The following commentary and code is inherited from the original
3195 builtin processing code. The commentary is a bit confusing, with the
3196 intent being that KFmode is always IEEE-128, IFmode is always IBM
3197 double-double, and TFmode is the current long double. The code is
3198 confusing in that it converts from KFmode to TFmode pattern names,
3199 when the other direction is more intuitive. Try to address this. */
3200
3201 /* We have two different modes (KFmode, TFmode) that are the IEEE
3202 128-bit floating point type, depending on whether long double is the
3203 IBM extended double (KFmode) or long double is IEEE 128-bit (TFmode).
3204 It is simpler if we only define one variant of the built-in function,
3205 and switch the code when defining it, rather than defining two built-
3206 ins and using the overload table in rs6000-c.cc to switch between the
3207 two. If we don't have the proper assembler, don't do this switch
3208 because CODE_FOR_*kf* and CODE_FOR_*tf* will be CODE_FOR_nothing. */
3209 if (FLOAT128_IEEE_P (TFmode))
3210 switch (icode)
3211 {
3212 case CODE_FOR_sqrtkf2_odd:
3213 icode = CODE_FOR_sqrttf2_odd;
3214 break;
3215 case CODE_FOR_trunckfdf2_odd:
3216 icode = CODE_FOR_trunctfdf2_odd;
3217 break;
3218 case CODE_FOR_addkf3_odd:
3219 icode = CODE_FOR_addtf3_odd;
3220 break;
3221 case CODE_FOR_subkf3_odd:
3222 icode = CODE_FOR_subtf3_odd;
3223 break;
3224 case CODE_FOR_mulkf3_odd:
3225 icode = CODE_FOR_multf3_odd;
3226 break;
3227 case CODE_FOR_divkf3_odd:
3228 icode = CODE_FOR_divtf3_odd;
3229 break;
3230 case CODE_FOR_fmakf4_odd:
3231 icode = CODE_FOR_fmatf4_odd;
3232 break;
3233 case CODE_FOR_xsxexpqp_kf_di:
3234 icode = CODE_FOR_xsxexpqp_tf_di;
3235 break;
3236 case CODE_FOR_xsxexpqp_kf_v2di:
3237 icode = CODE_FOR_xsxexpqp_tf_v2di;
3238 break;
3239 case CODE_FOR_xsxsigqp_kf_ti:
3240 icode = CODE_FOR_xsxsigqp_tf_ti;
3241 break;
3242 case CODE_FOR_xsxsigqp_kf_v1ti:
3243 icode = CODE_FOR_xsxsigqp_tf_v1ti;
3244 break;
3245 case CODE_FOR_xststdcnegqp_kf:
3246 icode = CODE_FOR_xststdcnegqp_tf;
3247 break;
3248 case CODE_FOR_xsiexpqp_kf_di:
3249 icode = CODE_FOR_xsiexpqp_tf_di;
3250 break;
3251 case CODE_FOR_xsiexpqp_kf_v2di:
3252 icode = CODE_FOR_xsiexpqp_tf_v2di;
3253 break;
3254 case CODE_FOR_xsiexpqpf_kf:
3255 icode = CODE_FOR_xsiexpqpf_tf;
3256 break;
3257 case CODE_FOR_xststdc_kf:
3258 icode = CODE_FOR_xststdc_tf;
3259 break;
3260 case CODE_FOR_xscmpexpqp_eq_kf:
3261 icode = CODE_FOR_xscmpexpqp_eq_tf;
3262 break;
3263 case CODE_FOR_xscmpexpqp_lt_kf:
3264 icode = CODE_FOR_xscmpexpqp_lt_tf;
3265 break;
3266 case CODE_FOR_xscmpexpqp_gt_kf:
3267 icode = CODE_FOR_xscmpexpqp_gt_tf;
3268 break;
3269 case CODE_FOR_xscmpexpqp_unordered_kf:
3270 icode = CODE_FOR_xscmpexpqp_unordered_tf;
3271 break;
3272 default:
3273 break;
3274 }
3275
3276 /* In case of "#pragma target" changes, we initialize all builtins
3277 but check for actual availability now, during expand time. For
3278 invalid builtins, generate a normal call. */
3279 bifdata *bifaddr = &rs6000_builtin_info[uns_fcode];
3280
3281 if (!rs6000_builtin_is_supported (fcode))
3282 {
3283 rs6000_invalid_builtin (fcode);
3284 return expand_call (exp, target, ignore);
3285 }
3286
3287 if (bif_is_nosoft (*bifaddr)
3288 && rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
3289 {
3290 error ("%qs not supported with %<-msoft-float%>",
3291 bifaddr->bifname);
3292 return const0_rtx;
3293 }
3294
3295 if (bif_is_no32bit (*bifaddr) && TARGET_32BIT)
3296 {
3297 error ("%qs is not supported in 32-bit mode", bifaddr->bifname);
3298 return const0_rtx;
3299 }
3300
3301 if (bif_is_ibmld (*bifaddr) && !FLOAT128_2REG_P (TFmode))
3302 {
3303 error ("%qs requires %<long double%> to be IBM 128-bit format",
3304 bifaddr->bifname);
3305 return const0_rtx;
3306 }
3307
3308 if (bif_is_ibm128 (*bifaddr) && !ibm128_float_type_node)
3309 {
3310 error ("%qs requires %<__ibm128%> type support",
3311 bifaddr->bifname);
3312 return const0_rtx;
3313 }
3314
3315 if (bif_is_cpu (*bifaddr))
3316 return cpu_expand_builtin (fcode, exp, target);
3317
3318 if (bif_is_extract (*bifaddr))
3319 return altivec_expand_vec_ext_builtin (exp, target);
3320
3321 if (bif_is_predicate (*bifaddr))
3322 return altivec_expand_predicate_builtin (icode, exp, target);
3323
3324 if (bif_is_htm (*bifaddr))
3325 return htm_expand_builtin (bifaddr, fcode, exp, target);
3326
3327 if (bif_is_32bit (*bifaddr) && TARGET_32BIT)
3328 {
3329 if (fcode == RS6000_BIF_MFTB)
3330 icode = CODE_FOR_rs6000_mftb_si;
3331 else if (fcode == RS6000_BIF_BPERMD)
3332 icode = CODE_FOR_bpermd_si;
3333 else if (fcode == RS6000_BIF_DARN)
3334 icode = CODE_FOR_darn_64_si;
3335 else if (fcode == RS6000_BIF_DARN_32)
3336 icode = CODE_FOR_darn_32_si;
3337 else if (fcode == RS6000_BIF_DARN_RAW)
3338 icode = CODE_FOR_darn_raw_si;
3339 else
3340 gcc_unreachable ();
3341 }
3342
3343 if (bif_is_endian (*bifaddr) && BYTES_BIG_ENDIAN)
3344 {
3345 if (fcode == RS6000_BIF_LD_ELEMREV_V1TI)
3346 icode = CODE_FOR_vsx_load_v1ti;
3347 else if (fcode == RS6000_BIF_LD_ELEMREV_V2DF)
3348 icode = CODE_FOR_vsx_load_v2df;
3349 else if (fcode == RS6000_BIF_LD_ELEMREV_V2DI)
3350 icode = CODE_FOR_vsx_load_v2di;
3351 else if (fcode == RS6000_BIF_LD_ELEMREV_V4SF)
3352 icode = CODE_FOR_vsx_load_v4sf;
3353 else if (fcode == RS6000_BIF_LD_ELEMREV_V4SI)
3354 icode = CODE_FOR_vsx_load_v4si;
3355 else if (fcode == RS6000_BIF_LD_ELEMREV_V8HI)
3356 icode = CODE_FOR_vsx_load_v8hi;
3357 else if (fcode == RS6000_BIF_LD_ELEMREV_V16QI)
3358 icode = CODE_FOR_vsx_load_v16qi;
3359 else if (fcode == RS6000_BIF_ST_ELEMREV_V1TI)
3360 icode = CODE_FOR_vsx_store_v1ti;
3361 else if (fcode == RS6000_BIF_ST_ELEMREV_V2DF)
3362 icode = CODE_FOR_vsx_store_v2df;
3363 else if (fcode == RS6000_BIF_ST_ELEMREV_V2DI)
3364 icode = CODE_FOR_vsx_store_v2di;
3365 else if (fcode == RS6000_BIF_ST_ELEMREV_V4SF)
3366 icode = CODE_FOR_vsx_store_v4sf;
3367 else if (fcode == RS6000_BIF_ST_ELEMREV_V4SI)
3368 icode = CODE_FOR_vsx_store_v4si;
3369 else if (fcode == RS6000_BIF_ST_ELEMREV_V8HI)
3370 icode = CODE_FOR_vsx_store_v8hi;
3371 else if (fcode == RS6000_BIF_ST_ELEMREV_V16QI)
3372 icode = CODE_FOR_vsx_store_v16qi;
3373 else if (fcode == RS6000_BIF_VCLZLSBB_V16QI)
3374 icode = CODE_FOR_vclzlsbb_v16qi;
3375 else if (fcode == RS6000_BIF_VCLZLSBB_V4SI)
3376 icode = CODE_FOR_vclzlsbb_v4si;
3377 else if (fcode == RS6000_BIF_VCLZLSBB_V8HI)
3378 icode = CODE_FOR_vclzlsbb_v8hi;
3379 else if (fcode == RS6000_BIF_VCTZLSBB_V16QI)
3380 icode = CODE_FOR_vctzlsbb_v16qi;
3381 else if (fcode == RS6000_BIF_VCTZLSBB_V4SI)
3382 icode = CODE_FOR_vctzlsbb_v4si;
3383 else if (fcode == RS6000_BIF_VCTZLSBB_V8HI)
3384 icode = CODE_FOR_vctzlsbb_v8hi;
3385 else
3386 gcc_unreachable ();
3387 }
3388
3389 if (bif_is_ibm128 (*bifaddr) && TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
3390 {
3391 if (fcode == RS6000_BIF_PACK_IF)
3392 {
3393 icode = CODE_FOR_packtf;
3394 fcode = RS6000_BIF_PACK_TF;
3395 uns_fcode = (size_t) fcode;
3396 }
3397 else if (fcode == RS6000_BIF_UNPACK_IF)
3398 {
3399 icode = CODE_FOR_unpacktf;
3400 fcode = RS6000_BIF_UNPACK_TF;
3401 uns_fcode = (size_t) fcode;
3402 }
3403 }
3404
3405 /* TRUE iff the built-in function returns void. */
3406 bool void_func = TREE_TYPE (TREE_TYPE (fndecl)) == void_type_node;
3407 /* Position of first argument (0 for void-returning functions, else 1). */
3408 int k;
3409 /* Modes for the return value, if any, and arguments. */
3410 const int MAX_BUILTIN_ARGS = 6;
3411 machine_mode mode[MAX_BUILTIN_ARGS + 1];
3412
3413 if (void_func)
3414 k = 0;
3415 else
3416 {
3417 k = 1;
3418 mode[0] = insn_data[icode].operand[0].mode;
3419 }
3420
3421 /* Tree expressions for each argument. */
3422 tree arg[MAX_BUILTIN_ARGS];
3423 /* RTL expressions for each argument. */
3424 rtx op[MAX_BUILTIN_ARGS];
3425
3426 int nargs = bifaddr->nargs;
3427 gcc_assert (nargs <= MAX_BUILTIN_ARGS);
3428
3429
3430 for (int i = 0; i < nargs; i++)
3431 {
3432 arg[i] = CALL_EXPR_ARG (exp, i);
3433 if (arg[i] == error_mark_node)
3434 return const0_rtx;
3435 STRIP_NOPS (arg[i]);
3436 op[i] = expand_normal (arg[i]);
3437 /* We have a couple of pesky patterns that don't specify the mode... */
3438 mode[i+k] = insn_data[icode].operand[i+k].mode;
3439 if (!mode[i+k])
3440 mode[i+k] = Pmode;
3441 }
3442
3443 /* Check for restricted constant arguments. */
3444 for (int i = 0; i < 2; i++)
3445 {
3446 switch (bifaddr->restr[i])
3447 {
3448 case RES_BITS:
3449 {
3450 size_t mask = 1;
3451 mask <<= bifaddr->restr_val1[i];
3452 mask--;
3453 tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
3454 STRIP_NOPS (restr_arg);
3455 if (!(TREE_CODE (restr_arg) == INTEGER_CST
3456 && (TREE_INT_CST_LOW (restr_arg) & ~mask) == 0))
3457 {
3458 unsigned p = (1U << bifaddr->restr_val1[i]) - 1;
3459 error ("argument %d must be a literal between 0 and %d,"
3460 " inclusive",
3461 bifaddr->restr_opnd[i], p);
3462 return CONST0_RTX (mode[0]);
3463 }
3464 break;
3465 }
3466 case RES_RANGE:
3467 {
3468 tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
3469 STRIP_NOPS (restr_arg);
3470 if (!(TREE_CODE (restr_arg) == INTEGER_CST
3471 && IN_RANGE (tree_to_shwi (restr_arg),
3472 bifaddr->restr_val1[i],
3473 bifaddr->restr_val2[i])))
3474 {
3475 error ("argument %d must be a literal between %d and %d,"
3476 " inclusive",
3477 bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
3478 bifaddr->restr_val2[i]);
3479 return CONST0_RTX (mode[0]);
3480 }
3481 break;
3482 }
3483 case RES_VAR_RANGE:
3484 {
3485 tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
3486 STRIP_NOPS (restr_arg);
3487 if (TREE_CODE (restr_arg) == INTEGER_CST
3488 && !IN_RANGE (tree_to_shwi (restr_arg),
3489 bifaddr->restr_val1[i],
3490 bifaddr->restr_val2[i]))
3491 {
3492 error ("argument %d must be a variable or a literal "
3493 "between %d and %d, inclusive",
3494 bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
3495 bifaddr->restr_val2[i]);
3496 return CONST0_RTX (mode[0]);
3497 }
3498 break;
3499 }
3500 case RES_VALUES:
3501 {
3502 tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
3503 STRIP_NOPS (restr_arg);
3504 if (!(TREE_CODE (restr_arg) == INTEGER_CST
3505 && (tree_to_shwi (restr_arg) == bifaddr->restr_val1[i]
3506 || tree_to_shwi (restr_arg) == bifaddr->restr_val2[i])))
3507 {
3508 error ("argument %d must be either a literal %d or a "
3509 "literal %d",
3510 bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
3511 bifaddr->restr_val2[i]);
3512 return CONST0_RTX (mode[0]);
3513 }
3514 break;
3515 }
3516 default:
3517 case RES_NONE:
3518 break;
3519 }
3520 }
3521
3522 if (bif_is_ldstmask (*bifaddr))
3523 return rs6000_expand_ldst_mask (target, arg[0]);
3524
3525 if (bif_is_stvec (*bifaddr))
3526 {
3527 if (bif_is_reve (*bifaddr))
3528 icode = elemrev_icode (fcode);
3529 return stv_expand_builtin (icode, op, mode[0], mode[1]);
3530 }
3531
3532 if (bif_is_ldvec (*bifaddr))
3533 {
3534 if (bif_is_reve (*bifaddr))
3535 icode = elemrev_icode (fcode);
3536 return ldv_expand_builtin (target, icode, op, mode[0]);
3537 }
3538
3539 if (bif_is_lxvrse (*bifaddr))
3540 return lxvrse_expand_builtin (target, icode, op, mode[0], mode[1]);
3541
3542 if (bif_is_lxvrze (*bifaddr))
3543 return lxvrze_expand_builtin (target, icode, op, mode[0], mode[1]);
3544
3545 if (bif_is_mma (*bifaddr))
3546 return mma_expand_builtin (exp, target, icode, fcode);
3547
3548 if (TREE_TYPE (TREE_TYPE (fndecl)) == void_type_node)
3549 target = NULL_RTX;
3550 else if (target == 0
3551 || GET_MODE (target) != mode[0]
3552 || !insn_data[icode].operand[0].predicate (target, mode[0]))
3553 target = gen_reg_rtx (mode[0]);
3554
3555 for (int i = 0; i < nargs; i++)
3556 if (!insn_data[icode].operand[i+k].predicate (op[i], mode[i+k]))
3557 op[i] = copy_to_mode_reg (mode[i+k], op[i]);
3558
3559 rtx pat;
3560
3561 switch (nargs)
3562 {
3563 case 0:
3564 pat = (void_func
3565 ? GEN_FCN (icode) ()
3566 : GEN_FCN (icode) (target));
3567 break;
3568 case 1:
3569 pat = (void_func
3570 ? GEN_FCN (icode) (op[0])
3571 : GEN_FCN (icode) (target, op[0]));
3572 break;
3573 case 2:
3574 pat = (void_func
3575 ? GEN_FCN (icode) (op[0], op[1])
3576 : GEN_FCN (icode) (target, op[0], op[1]));
3577 break;
3578 case 3:
3579 pat = (void_func
3580 ? GEN_FCN (icode) (op[0], op[1], op[2])
3581 : GEN_FCN (icode) (target, op[0], op[1], op[2]));
3582 break;
3583 case 4:
3584 pat = (void_func
3585 ? GEN_FCN (icode) (op[0], op[1], op[2], op[3])
3586 : GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]));
3587 break;
3588 case 5:
3589 pat = (void_func
3590 ? GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4])
3591 : GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]));
3592 break;
3593 case 6:
3594 pat = (void_func
3595 ? GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5])
3596 : GEN_FCN (icode) (target, op[0], op[1],
3597 op[2], op[3], op[4], op[5]));
3598 break;
3599 default:
3600 gcc_assert (MAX_BUILTIN_ARGS == 6);
3601 gcc_unreachable ();
3602 }
3603
3604 if (!pat)
3605 return 0;
3606
3607 emit_insn (pat);
3608 return target;
3609 }