2 ;; Copyright (C) 2009-2020 Free Software Foundation, Inc.
3 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published
9 ;; by the Free Software Foundation; either version 3, or (at your
10 ;; option) any later version.
12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 ;; License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; Iterator for comparison types
22 (define_code_iterator CMP_TEST [eq lt gt unordered])
24 ;; Mode attribute for vector floate and floato conversions
25 (define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")])
27 ;; Iterator for both scalar and vector floating point types supported by VSX
28 (define_mode_iterator VSX_B [DF V4SF V2DF])
30 ;; Iterator for the 2 64-bit vector types
31 (define_mode_iterator VSX_D [V2DF V2DI])
33 ;; Mode iterator to handle swapping words on little endian for the 128-bit
34 ;; types that goes in a single vector register.
35 (define_mode_iterator VSX_LE_128 [(KF "FLOAT128_VECTOR_P (KFmode)")
36 (TF "FLOAT128_VECTOR_P (TFmode)")
40 ;; Iterator for 128-bit integer types that go in a single vector register.
41 (define_mode_iterator VSX_TI [TI V1TI])
43 ;; Iterator for the 2 32-bit vector types
44 (define_mode_iterator VSX_W [V4SF V4SI])
46 ;; Iterator for the DF types
47 (define_mode_iterator VSX_DF [V2DF DF])
49 ;; Iterator for vector floating point types supported by VSX
50 (define_mode_iterator VSX_F [V4SF V2DF])
52 ;; Iterator for logical types supported by VSX
53 (define_mode_iterator VSX_L [V16QI
61 (KF "FLOAT128_VECTOR_P (KFmode)")
62 (TF "FLOAT128_VECTOR_P (TFmode)")])
64 ;; Iterator for memory moves.
65 (define_mode_iterator VSX_M [V16QI
72 (KF "FLOAT128_VECTOR_P (KFmode)")
73 (TF "FLOAT128_VECTOR_P (TFmode)")
76 (define_mode_attr VSX_XXBR [(V8HI "h")
83 ;; Map into the appropriate load/store name based on the type
84 (define_mode_attr VSm [(V16QI "vw4")
96 ;; Map the register class used
97 (define_mode_attr VSr [(V16QI "v")
111 ;; What value we need in the "isa" field, to make the IEEE QP float work.
112 (define_mode_attr VSisa [(V16QI "*")
126 ;; A mode attribute to disparage use of GPR registers, except for scalar
128 (define_mode_attr ??r [(V16QI "??r")
139 ;; A mode attribute used for 128-bit constant values.
140 (define_mode_attr nW [(V16QI "W")
151 ;; Same size integer type for floating point data
152 (define_mode_attr VSi [(V4SF "v4si")
156 (define_mode_attr VSI [(V4SF "V4SI")
160 ;; Word size for same size conversion
161 (define_mode_attr VSc [(V4SF "w")
165 ;; Map into either s or v, depending on whether this is a scalar or vector
167 (define_mode_attr VSv [(V16QI "v")
177 ;; Appropriate type for add ops (and other simple FP ops)
178 (define_mode_attr VStype_simple [(V2DF "vecdouble")
182 ;; Appropriate type for multiply ops
183 (define_mode_attr VStype_mul [(V2DF "vecdouble")
187 ;; Appropriate type for divide ops.
188 (define_mode_attr VStype_div [(V2DF "vecdiv")
192 ;; Map the scalar mode for a vector type
193 (define_mode_attr VS_scalar [(V1TI "TI")
201 ;; Map to a double-sized vector mode
202 (define_mode_attr VS_double [(V4SI "V8SI")
208 ;; Iterators for loading constants with xxspltib
209 (define_mode_iterator VSINT_84 [V4SI V2DI DI SI])
210 (define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
212 ;; Vector reverse byte modes
213 (define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI])
215 ;; Iterator for ISA 3.0 vector extract/insert of small integer vectors.
216 ;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be
217 ;; done on ISA 2.07 and not just ISA 3.0.
218 (define_mode_iterator VSX_EXTRACT_I [V16QI V8HI V4SI])
219 (define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI])
220 (define_mode_iterator VSX_EXTRACT_I4 [V16QI V8HI V4SI V2DI])
222 (define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b")
226 ;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and
227 ;; insert to validate the operand number.
228 (define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand")
229 (V8HI "const_0_to_7_operand")
230 (V4SI "const_0_to_3_operand")])
232 ;; Mode attribute to give the constraint for vector extract and insert
234 (define_mode_attr VSX_EX [(V16QI "v")
238 ;; Mode iterator for binary floating types other than double to
239 ;; optimize convert to that floating point type from an extract
240 ;; of an integer type
241 (define_mode_iterator VSX_EXTRACT_FL [SF
242 (IF "FLOAT128_2REG_P (IFmode)")
243 (KF "TARGET_FLOAT128_HW")
244 (TF "FLOAT128_2REG_P (TFmode)
245 || (FLOAT128_IEEE_P (TFmode)
246 && TARGET_FLOAT128_HW)")])
248 ;; Mode iterator for binary floating types that have a direct conversion
249 ;; from 64-bit integer to floating point
250 (define_mode_iterator FL_CONV [SF
252 (KF "TARGET_FLOAT128_HW")
253 (TF "TARGET_FLOAT128_HW
254 && FLOAT128_IEEE_P (TFmode)")])
256 ;; Iterator for the 2 short vector types to do a splat from an integer
257 (define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
259 ;; Mode attribute to give the count for the splat instruction to splat
260 ;; the value in the 64-bit integer slot
261 (define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")])
263 ;; Mode attribute to give the suffix for the splat instruction
264 (define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")])
266 ;; Constants for creating unspecs
267 (define_c_enum "unspec"
280 UNSPEC_VSX_UNS_FLOAT2
282 UNSPEC_VSX_UNS_FLOATE
284 UNSPEC_VSX_UNS_FLOATO
298 UNSPEC_VSX_SIGN_EXTEND
299 UNSPEC_VSX_XVCVBF16SP
300 UNSPEC_VSX_XVCVSPBF16
301 UNSPEC_VSX_XVCVSPSXDS
312 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH
313 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL
345 UNSPEC_VSX_FIRST_MATCH_INDEX
346 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX
347 UNSPEC_VSX_FIRST_MISMATCH_INDEX
348 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX
352 (define_int_iterator XVCVBF16 [UNSPEC_VSX_XVCVSPBF16
353 UNSPEC_VSX_XVCVBF16SP])
355 (define_int_attr xvcvbf16 [(UNSPEC_VSX_XVCVSPBF16 "xvcvspbf16")
356 (UNSPEC_VSX_XVCVBF16SP "xvcvbf16sp")])
360 ;; The patterns for LE permuted loads and stores come before the general
361 ;; VSX moves so they match first.
362 (define_insn_and_split "*vsx_le_perm_load_<mode>"
363 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
364 (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))]
365 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
371 (parallel [(const_int 1) (const_int 0)])))
375 (parallel [(const_int 1) (const_int 0)])))]
377 rtx mem = operands[1];
379 /* Don't apply the swap optimization if we've already performed register
380 allocation and the hard register destination is not in the altivec
382 if ((MEM_ALIGN (mem) >= 128)
383 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[0]))
384 || ALTIVEC_REGNO_P (reg_or_subregno (operands[0]))))
386 rtx mem_address = XEXP (mem, 0);
387 enum machine_mode mode = GET_MODE (mem);
389 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
391 /* Replace the source memory address with masked address. */
392 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
393 emit_insn (lvx_set_expr);
396 else if (rs6000_quadword_masked_address_p (mem_address))
398 /* This rtl is already in the form that matches lvx
399 instruction, so leave it alone. */
402 /* Otherwise, fall through to transform into a swapping load. */
404 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
407 [(set_attr "type" "vecload")
408 (set_attr "length" "8")])
410 (define_insn_and_split "*vsx_le_perm_load_<mode>"
411 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
412 (match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))]
413 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
419 (parallel [(const_int 2) (const_int 3)
420 (const_int 0) (const_int 1)])))
424 (parallel [(const_int 2) (const_int 3)
425 (const_int 0) (const_int 1)])))]
427 rtx mem = operands[1];
429 /* Don't apply the swap optimization if we've already performed register
430 allocation and the hard register destination is not in the altivec
432 if ((MEM_ALIGN (mem) >= 128)
433 && (!HARD_REGISTER_P (operands[0])
434 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
436 rtx mem_address = XEXP (mem, 0);
437 enum machine_mode mode = GET_MODE (mem);
439 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
441 /* Replace the source memory address with masked address. */
442 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
443 emit_insn (lvx_set_expr);
446 else if (rs6000_quadword_masked_address_p (mem_address))
448 /* This rtl is already in the form that matches lvx
449 instruction, so leave it alone. */
452 /* Otherwise, fall through to transform into a swapping load. */
454 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
457 [(set_attr "type" "vecload")
458 (set_attr "length" "8")])
460 (define_insn_and_split "*vsx_le_perm_load_v8hi"
461 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
462 (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))]
463 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
469 (parallel [(const_int 4) (const_int 5)
470 (const_int 6) (const_int 7)
471 (const_int 0) (const_int 1)
472 (const_int 2) (const_int 3)])))
476 (parallel [(const_int 4) (const_int 5)
477 (const_int 6) (const_int 7)
478 (const_int 0) (const_int 1)
479 (const_int 2) (const_int 3)])))]
481 rtx mem = operands[1];
483 /* Don't apply the swap optimization if we've already performed register
484 allocation and the hard register destination is not in the altivec
486 if ((MEM_ALIGN (mem) >= 128)
487 && (!HARD_REGISTER_P (operands[0])
488 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
490 rtx mem_address = XEXP (mem, 0);
491 enum machine_mode mode = GET_MODE (mem);
493 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
495 /* Replace the source memory address with masked address. */
496 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
497 emit_insn (lvx_set_expr);
500 else if (rs6000_quadword_masked_address_p (mem_address))
502 /* This rtl is already in the form that matches lvx
503 instruction, so leave it alone. */
506 /* Otherwise, fall through to transform into a swapping load. */
508 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
511 [(set_attr "type" "vecload")
512 (set_attr "length" "8")])
514 (define_insn_and_split "*vsx_le_perm_load_v16qi"
515 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
516 (match_operand:V16QI 1 "indexed_or_indirect_operand" "Z"))]
517 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
523 (parallel [(const_int 8) (const_int 9)
524 (const_int 10) (const_int 11)
525 (const_int 12) (const_int 13)
526 (const_int 14) (const_int 15)
527 (const_int 0) (const_int 1)
528 (const_int 2) (const_int 3)
529 (const_int 4) (const_int 5)
530 (const_int 6) (const_int 7)])))
534 (parallel [(const_int 8) (const_int 9)
535 (const_int 10) (const_int 11)
536 (const_int 12) (const_int 13)
537 (const_int 14) (const_int 15)
538 (const_int 0) (const_int 1)
539 (const_int 2) (const_int 3)
540 (const_int 4) (const_int 5)
541 (const_int 6) (const_int 7)])))]
543 rtx mem = operands[1];
545 /* Don't apply the swap optimization if we've already performed register
546 allocation and the hard register destination is not in the altivec
548 if ((MEM_ALIGN (mem) >= 128)
549 && (!HARD_REGISTER_P (operands[0])
550 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
552 rtx mem_address = XEXP (mem, 0);
553 enum machine_mode mode = GET_MODE (mem);
555 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
557 /* Replace the source memory address with masked address. */
558 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
559 emit_insn (lvx_set_expr);
562 else if (rs6000_quadword_masked_address_p (mem_address))
564 /* This rtl is already in the form that matches lvx
565 instruction, so leave it alone. */
568 /* Otherwise, fall through to transform into a swapping load. */
570 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
573 [(set_attr "type" "vecload")
574 (set_attr "length" "8")])
576 (define_insn "*vsx_le_perm_store_<mode>"
577 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "=Z")
578 (match_operand:VSX_D 1 "vsx_register_operand" "+wa"))]
579 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
581 [(set_attr "type" "vecstore")
582 (set_attr "length" "12")])
585 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
586 (match_operand:VSX_D 1 "vsx_register_operand"))]
587 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
591 (parallel [(const_int 1) (const_int 0)])))
595 (parallel [(const_int 1) (const_int 0)])))]
597 rtx mem = operands[0];
599 /* Don't apply the swap optimization if we've already performed register
600 allocation and the hard register source is not in the altivec range. */
601 if ((MEM_ALIGN (mem) >= 128)
602 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
603 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
605 rtx mem_address = XEXP (mem, 0);
606 enum machine_mode mode = GET_MODE (mem);
607 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
609 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
610 emit_insn (stvx_set_expr);
613 else if (rs6000_quadword_masked_address_p (mem_address))
615 /* This rtl is already in the form that matches stvx instruction,
616 so leave it alone. */
619 /* Otherwise, fall through to transform into a swapping store. */
622 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
626 ;; The post-reload split requires that we re-permute the source
627 ;; register in case it is still live.
629 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
630 (match_operand:VSX_D 1 "vsx_register_operand"))]
631 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
635 (parallel [(const_int 1) (const_int 0)])))
639 (parallel [(const_int 1) (const_int 0)])))
643 (parallel [(const_int 1) (const_int 0)])))]
646 (define_insn "*vsx_le_perm_store_<mode>"
647 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "=Z")
648 (match_operand:VSX_W 1 "vsx_register_operand" "+wa"))]
649 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
651 [(set_attr "type" "vecstore")
652 (set_attr "length" "12")])
655 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
656 (match_operand:VSX_W 1 "vsx_register_operand"))]
657 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
661 (parallel [(const_int 2) (const_int 3)
662 (const_int 0) (const_int 1)])))
666 (parallel [(const_int 2) (const_int 3)
667 (const_int 0) (const_int 1)])))]
669 rtx mem = operands[0];
671 /* Don't apply the swap optimization if we've already performed register
672 allocation and the hard register source is not in the altivec range. */
673 if ((MEM_ALIGN (mem) >= 128)
674 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
675 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
677 rtx mem_address = XEXP (mem, 0);
678 enum machine_mode mode = GET_MODE (mem);
679 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
681 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
682 emit_insn (stvx_set_expr);
685 else if (rs6000_quadword_masked_address_p (mem_address))
687 /* This rtl is already in the form that matches stvx instruction,
688 so leave it alone. */
691 /* Otherwise, fall through to transform into a swapping store. */
694 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
698 ;; The post-reload split requires that we re-permute the source
699 ;; register in case it is still live.
701 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
702 (match_operand:VSX_W 1 "vsx_register_operand"))]
703 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
707 (parallel [(const_int 2) (const_int 3)
708 (const_int 0) (const_int 1)])))
712 (parallel [(const_int 2) (const_int 3)
713 (const_int 0) (const_int 1)])))
717 (parallel [(const_int 2) (const_int 3)
718 (const_int 0) (const_int 1)])))]
721 (define_insn "*vsx_le_perm_store_v8hi"
722 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z")
723 (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
724 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
726 [(set_attr "type" "vecstore")
727 (set_attr "length" "12")])
730 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
731 (match_operand:V8HI 1 "vsx_register_operand"))]
732 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
736 (parallel [(const_int 4) (const_int 5)
737 (const_int 6) (const_int 7)
738 (const_int 0) (const_int 1)
739 (const_int 2) (const_int 3)])))
743 (parallel [(const_int 4) (const_int 5)
744 (const_int 6) (const_int 7)
745 (const_int 0) (const_int 1)
746 (const_int 2) (const_int 3)])))]
748 rtx mem = operands[0];
750 /* Don't apply the swap optimization if we've already performed register
751 allocation and the hard register source is not in the altivec range. */
752 if ((MEM_ALIGN (mem) >= 128)
753 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
754 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
756 rtx mem_address = XEXP (mem, 0);
757 enum machine_mode mode = GET_MODE (mem);
758 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
760 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
761 emit_insn (stvx_set_expr);
764 else if (rs6000_quadword_masked_address_p (mem_address))
766 /* This rtl is already in the form that matches stvx instruction,
767 so leave it alone. */
770 /* Otherwise, fall through to transform into a swapping store. */
773 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
777 ;; The post-reload split requires that we re-permute the source
778 ;; register in case it is still live.
780 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
781 (match_operand:V8HI 1 "vsx_register_operand"))]
782 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
786 (parallel [(const_int 4) (const_int 5)
787 (const_int 6) (const_int 7)
788 (const_int 0) (const_int 1)
789 (const_int 2) (const_int 3)])))
793 (parallel [(const_int 4) (const_int 5)
794 (const_int 6) (const_int 7)
795 (const_int 0) (const_int 1)
796 (const_int 2) (const_int 3)])))
800 (parallel [(const_int 4) (const_int 5)
801 (const_int 6) (const_int 7)
802 (const_int 0) (const_int 1)
803 (const_int 2) (const_int 3)])))]
806 (define_insn "*vsx_le_perm_store_v16qi"
807 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "=Z")
808 (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
809 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
811 [(set_attr "type" "vecstore")
812 (set_attr "length" "12")])
815 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
816 (match_operand:V16QI 1 "vsx_register_operand"))]
817 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
821 (parallel [(const_int 8) (const_int 9)
822 (const_int 10) (const_int 11)
823 (const_int 12) (const_int 13)
824 (const_int 14) (const_int 15)
825 (const_int 0) (const_int 1)
826 (const_int 2) (const_int 3)
827 (const_int 4) (const_int 5)
828 (const_int 6) (const_int 7)])))
832 (parallel [(const_int 8) (const_int 9)
833 (const_int 10) (const_int 11)
834 (const_int 12) (const_int 13)
835 (const_int 14) (const_int 15)
836 (const_int 0) (const_int 1)
837 (const_int 2) (const_int 3)
838 (const_int 4) (const_int 5)
839 (const_int 6) (const_int 7)])))]
841 rtx mem = operands[0];
843 /* Don't apply the swap optimization if we've already performed register
844 allocation and the hard register source is not in the altivec range. */
845 if ((MEM_ALIGN (mem) >= 128)
846 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
847 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
849 rtx mem_address = XEXP (mem, 0);
850 enum machine_mode mode = GET_MODE (mem);
851 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
853 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
854 emit_insn (stvx_set_expr);
857 else if (rs6000_quadword_masked_address_p (mem_address))
859 /* This rtl is already in the form that matches stvx instruction,
860 so leave it alone. */
863 /* Otherwise, fall through to transform into a swapping store. */
866 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
870 ;; The post-reload split requires that we re-permute the source
871 ;; register in case it is still live.
873 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
874 (match_operand:V16QI 1 "vsx_register_operand"))]
875 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
879 (parallel [(const_int 8) (const_int 9)
880 (const_int 10) (const_int 11)
881 (const_int 12) (const_int 13)
882 (const_int 14) (const_int 15)
883 (const_int 0) (const_int 1)
884 (const_int 2) (const_int 3)
885 (const_int 4) (const_int 5)
886 (const_int 6) (const_int 7)])))
890 (parallel [(const_int 8) (const_int 9)
891 (const_int 10) (const_int 11)
892 (const_int 12) (const_int 13)
893 (const_int 14) (const_int 15)
894 (const_int 0) (const_int 1)
895 (const_int 2) (const_int 3)
896 (const_int 4) (const_int 5)
897 (const_int 6) (const_int 7)])))
901 (parallel [(const_int 8) (const_int 9)
902 (const_int 10) (const_int 11)
903 (const_int 12) (const_int 13)
904 (const_int 14) (const_int 15)
905 (const_int 0) (const_int 1)
906 (const_int 2) (const_int 3)
907 (const_int 4) (const_int 5)
908 (const_int 6) (const_int 7)])))]
911 ;; Little endian word swapping for 128-bit types that are either scalars or the
912 ;; special V1TI container class, which it is not appropriate to use vec_select
914 (define_insn "*vsx_le_permute_<mode>"
915 [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=wa,wa,Z,&r,&r,Q")
917 (match_operand:VSX_TI 1 "input_operand" "wa,Z,wa,r,Q,r")
919 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
921 xxpermdi %x0,%x1,%x1,2
925 ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1
926 std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0"
927 [(set_attr "length" "*,*,*,8,8,8")
928 (set_attr "type" "vecperm,vecload,vecstore,*,load,store")])
930 (define_insn_and_split "*vsx_le_undo_permute_<mode>"
931 [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=wa,wa")
934 (match_operand:VSX_TI 1 "vsx_register_operand" "0,wa")
937 "!BYTES_BIG_ENDIAN && TARGET_VSX"
942 [(set (match_dup 0) (match_dup 1))]
944 if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))
946 emit_note (NOTE_INSN_DELETED);
950 [(set_attr "length" "0,4")
951 (set_attr "type" "veclogical")])
953 (define_insn_and_split "*vsx_le_perm_load_<mode>"
954 [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=wa,r")
955 (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))]
956 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
960 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
963 rtx tmp = (can_create_pseudo_p ()
964 ? gen_reg_rtx_and_attrs (operands[0])
966 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
967 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
970 [(set_attr "type" "vecload,load")
971 (set_attr "length" "8,8")
972 (set_attr "isa" "<VSisa>,*")])
974 (define_insn "*vsx_le_perm_store_<mode>"
975 [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q")
976 (match_operand:VSX_LE_128 1 "vsx_register_operand" "+wa,r"))]
977 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
981 [(set_attr "type" "vecstore,store")
982 (set_attr "length" "12,8")
983 (set_attr "isa" "<VSisa>,*")])
986 [(set (match_operand:VSX_LE_128 0 "memory_operand")
987 (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
988 "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
991 rtx tmp = (can_create_pseudo_p ()
992 ? gen_reg_rtx_and_attrs (operands[0])
994 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
995 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
999 ;; Peepholes to catch loads and stores for TImode if TImode landed in
1000 ;; GPR registers on a little endian system.
1002 [(set (match_operand:VSX_TI 0 "int_reg_operand")
1003 (rotate:VSX_TI (match_operand:VSX_TI 1 "memory_operand")
1005 (set (match_operand:VSX_TI 2 "int_reg_operand")
1006 (rotate:VSX_TI (match_dup 0)
1008 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1009 && (rtx_equal_p (operands[0], operands[2])
1010 || peep2_reg_dead_p (2, operands[0]))"
1011 [(set (match_dup 2) (match_dup 1))])
1014 [(set (match_operand:VSX_TI 0 "int_reg_operand")
1015 (rotate:VSX_TI (match_operand:VSX_TI 1 "int_reg_operand")
1017 (set (match_operand:VSX_TI 2 "memory_operand")
1018 (rotate:VSX_TI (match_dup 0)
1020 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1021 && peep2_reg_dead_p (2, operands[0])"
1022 [(set (match_dup 2) (match_dup 1))])
1024 ;; Peephole to catch memory to memory transfers for TImode if TImode landed in
1025 ;; VSX registers on a little endian system. The vector types and IEEE 128-bit
1026 ;; floating point are handled by the more generic swap elimination pass.
1028 [(set (match_operand:TI 0 "vsx_register_operand")
1029 (rotate:TI (match_operand:TI 1 "vsx_register_operand")
1031 (set (match_operand:TI 2 "vsx_register_operand")
1032 (rotate:TI (match_dup 0)
1034 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1035 && (rtx_equal_p (operands[0], operands[2])
1036 || peep2_reg_dead_p (2, operands[0]))"
1037 [(set (match_dup 2) (match_dup 1))])
1039 ;; The post-reload split requires that we re-permute the source
1040 ;; register in case it is still live.
1042 [(set (match_operand:VSX_LE_128 0 "memory_operand")
1043 (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1044 "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
1047 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1048 rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
1049 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1053 ;; Vector constants that can be generated with XXSPLTIB that was added in ISA
1054 ;; 3.0. Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
1055 (define_insn "xxspltib_v16qi"
1056 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1057 (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))]
1060 operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);
1061 return "xxspltib %x0,%2";
1063 [(set_attr "type" "vecperm")])
1065 (define_insn "xxspltib_<mode>_nosplit"
1066 [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa")
1067 (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))]
1070 rtx op1 = operands[1];
1074 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1078 operands[2] = GEN_INT (value & 0xff);
1079 return "xxspltib %x0,%2";
1081 [(set_attr "type" "vecperm")])
1083 (define_insn_and_split "*xxspltib_<mode>_split"
1084 [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")
1085 (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]
1093 rtx op0 = operands[0];
1094 rtx op1 = operands[1];
1095 rtx tmp = ((can_create_pseudo_p ())
1096 ? gen_reg_rtx (V16QImode)
1097 : gen_lowpart (V16QImode, op0));
1099 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1103 emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));
1105 if (<MODE>mode == V2DImode)
1106 emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp));
1108 else if (<MODE>mode == V4SImode)
1109 emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp));
1111 else if (<MODE>mode == V8HImode)
1112 emit_insn (gen_altivec_vupkhsb (op0, tmp));
1119 [(set_attr "type" "vecperm")
1120 (set_attr "length" "8")])
1123 ;; Prefer using vector registers over GPRs. Prefer using ISA 3.0's XXSPLTISB
1124 ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
1125 ;; all 1's, since the machine does not have to wait for the previous
1126 ;; instruction using the register being set (such as a store waiting on a slow
1127 ;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
1129 ;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
1130 ;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
1131 ;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX)
1132 (define_insn "vsx_mov<mode>_64bit"
1133 [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1134 "=ZwO, wa, wa, r, we, ?wQ,
1135 ?&r, ??r, ??Y, <??r>, wa, v,
1136 ?wa, v, <??r>, wZ, v")
1138 (match_operand:VSX_M 1 "input_operand"
1139 "wa, ZwO, wa, we, r, r,
1140 wQ, Y, r, r, wE, jwM,
1141 ?jwM, W, <nW>, v, wZ"))]
1143 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1144 && (register_operand (operands[0], <MODE>mode)
1145 || register_operand (operands[1], <MODE>mode))"
1147 return rs6000_output_move_128bit (operands);
1150 "vecstore, vecload, vecsimple, mffgpr, mftgpr, load,
1151 store, load, store, *, vecsimple, vecsimple,
1152 vecsimple, *, *, vecstore, vecload")
1153 (set_attr "num_insns"
1157 (set_attr "max_prefixed_insns"
1166 "<VSisa>, <VSisa>, <VSisa>, *, *, *,
1168 <VSisa>, *, *, *, *")])
1170 ;; VSX store VSX load VSX move GPR load GPR store GPR move
1171 ;; XXSPLTIB VSPLTISW VSX 0/-1 VMX const GPR const
1172 ;; LVX (VMX) STVX (VMX)
1173 (define_insn "*vsx_mov<mode>_32bit"
1174 [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1175 "=ZwO, wa, wa, ??r, ??Y, <??r>,
1176 wa, v, ?wa, v, <??r>,
1179 (match_operand:VSX_M 1 "input_operand"
1180 "wa, ZwO, wa, Y, r, r,
1181 wE, jwM, ?jwM, W, <nW>,
1184 "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1185 && (register_operand (operands[0], <MODE>mode)
1186 || register_operand (operands[1], <MODE>mode))"
1188 return rs6000_output_move_128bit (operands);
1191 "vecstore, vecload, vecsimple, load, store, *,
1192 vecsimple, vecsimple, vecsimple, *, *,
1195 "*, *, *, 16, 16, 16,
1199 "<VSisa>, <VSisa>, <VSisa>, *, *, *,
1200 p9v, *, <VSisa>, *, *,
1203 ;; Explicit load/store expanders for the builtin functions
1204 (define_expand "vsx_load_<mode>"
1205 [(set (match_operand:VSX_M 0 "vsx_register_operand")
1206 (match_operand:VSX_M 1 "memory_operand"))]
1207 "VECTOR_MEM_VSX_P (<MODE>mode)"
1209 /* Expand to swaps if needed, prior to swap optimization. */
1210 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1212 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1217 (define_expand "vsx_store_<mode>"
1218 [(set (match_operand:VSX_M 0 "memory_operand")
1219 (match_operand:VSX_M 1 "vsx_register_operand"))]
1220 "VECTOR_MEM_VSX_P (<MODE>mode)"
1222 /* Expand to swaps if needed, prior to swap optimization. */
1223 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1225 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1230 ;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
1231 ;; when you really want their element-reversing behavior.
1232 (define_insn "vsx_ld_elemrev_v2di"
1233 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1235 (match_operand:V2DI 1 "memory_operand" "Z")
1236 (parallel [(const_int 1) (const_int 0)])))]
1237 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1239 [(set_attr "type" "vecload")])
1241 (define_insn "vsx_ld_elemrev_v1ti"
1242 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
1244 (match_operand:V1TI 1 "memory_operand" "Z")
1245 (parallel [(const_int 0)])))]
1246 "VECTOR_MEM_VSX_P (V1TImode) && !BYTES_BIG_ENDIAN"
1248 return "lxvd2x %x0,%y1\;xxpermdi %x0,%x0,%x0,2";
1250 [(set_attr "type" "vecload")])
1252 (define_insn "vsx_ld_elemrev_v2df"
1253 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1255 (match_operand:V2DF 1 "memory_operand" "Z")
1256 (parallel [(const_int 1) (const_int 0)])))]
1257 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1259 [(set_attr "type" "vecload")])
1261 (define_insn "vsx_ld_elemrev_v4si"
1262 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
1264 (match_operand:V4SI 1 "memory_operand" "Z")
1265 (parallel [(const_int 3) (const_int 2)
1266 (const_int 1) (const_int 0)])))]
1267 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1269 [(set_attr "type" "vecload")])
1271 (define_insn "vsx_ld_elemrev_v4sf"
1272 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1274 (match_operand:V4SF 1 "memory_operand" "Z")
1275 (parallel [(const_int 3) (const_int 2)
1276 (const_int 1) (const_int 0)])))]
1277 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1279 [(set_attr "type" "vecload")])
1281 (define_expand "vsx_ld_elemrev_v8hi"
1282 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1284 (match_operand:V8HI 1 "memory_operand" "Z")
1285 (parallel [(const_int 7) (const_int 6)
1286 (const_int 5) (const_int 4)
1287 (const_int 3) (const_int 2)
1288 (const_int 1) (const_int 0)])))]
1289 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1291 if (!TARGET_P9_VECTOR)
1293 rtx tmp = gen_reg_rtx (V4SImode);
1294 rtx subreg, subreg2, perm[16], pcv;
1295 /* 2 is leftmost element in register */
1296 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1299 subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0);
1300 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1301 subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0);
1303 for (i = 0; i < 16; ++i)
1304 perm[i] = GEN_INT (reorder[i]);
1306 pcv = force_reg (V16QImode,
1307 gen_rtx_CONST_VECTOR (V16QImode,
1308 gen_rtvec_v (16, perm)));
1309 emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2,
1315 (define_insn "*vsx_ld_elemrev_v8hi_internal"
1316 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1318 (match_operand:V8HI 1 "memory_operand" "Z")
1319 (parallel [(const_int 7) (const_int 6)
1320 (const_int 5) (const_int 4)
1321 (const_int 3) (const_int 2)
1322 (const_int 1) (const_int 0)])))]
1323 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1325 [(set_attr "type" "vecload")])
1327 (define_expand "vsx_ld_elemrev_v16qi"
1328 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1330 (match_operand:V16QI 1 "memory_operand" "Z")
1331 (parallel [(const_int 15) (const_int 14)
1332 (const_int 13) (const_int 12)
1333 (const_int 11) (const_int 10)
1334 (const_int 9) (const_int 8)
1335 (const_int 7) (const_int 6)
1336 (const_int 5) (const_int 4)
1337 (const_int 3) (const_int 2)
1338 (const_int 1) (const_int 0)])))]
1339 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1341 if (!TARGET_P9_VECTOR)
1343 rtx tmp = gen_reg_rtx (V4SImode);
1344 rtx subreg, subreg2, perm[16], pcv;
1345 /* 3 is leftmost element in register */
1346 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1349 subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0);
1350 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1351 subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0);
1353 for (i = 0; i < 16; ++i)
1354 perm[i] = GEN_INT (reorder[i]);
1356 pcv = force_reg (V16QImode,
1357 gen_rtx_CONST_VECTOR (V16QImode,
1358 gen_rtvec_v (16, perm)));
1359 emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2,
1365 (define_insn "vsx_ld_elemrev_v16qi_internal"
1366 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1368 (match_operand:V16QI 1 "memory_operand" "Z")
1369 (parallel [(const_int 15) (const_int 14)
1370 (const_int 13) (const_int 12)
1371 (const_int 11) (const_int 10)
1372 (const_int 9) (const_int 8)
1373 (const_int 7) (const_int 6)
1374 (const_int 5) (const_int 4)
1375 (const_int 3) (const_int 2)
1376 (const_int 1) (const_int 0)])))]
1377 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1379 [(set_attr "type" "vecload")])
1381 (define_insn "vsx_st_elemrev_v1ti"
1382 [(set (match_operand:V1TI 0 "memory_operand" "=Z")
1384 (match_operand:V1TI 1 "vsx_register_operand" "+wa")
1385 (parallel [(const_int 0)])))
1386 (clobber (match_dup 1))]
1387 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1389 return "xxpermdi %x1,%x1,%x1,2\;stxvd2x %x1,%y0";
1391 [(set_attr "type" "vecstore")])
1393 (define_insn "vsx_st_elemrev_v2df"
1394 [(set (match_operand:V2DF 0 "memory_operand" "=Z")
1396 (match_operand:V2DF 1 "vsx_register_operand" "wa")
1397 (parallel [(const_int 1) (const_int 0)])))]
1398 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1400 [(set_attr "type" "vecstore")])
1402 (define_insn "vsx_st_elemrev_v2di"
1403 [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1405 (match_operand:V2DI 1 "vsx_register_operand" "wa")
1406 (parallel [(const_int 1) (const_int 0)])))]
1407 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1409 [(set_attr "type" "vecstore")])
1411 (define_insn "vsx_st_elemrev_v4sf"
1412 [(set (match_operand:V4SF 0 "memory_operand" "=Z")
1414 (match_operand:V4SF 1 "vsx_register_operand" "wa")
1415 (parallel [(const_int 3) (const_int 2)
1416 (const_int 1) (const_int 0)])))]
1417 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1419 [(set_attr "type" "vecstore")])
1421 (define_insn "vsx_st_elemrev_v4si"
1422 [(set (match_operand:V4SI 0 "memory_operand" "=Z")
1424 (match_operand:V4SI 1 "vsx_register_operand" "wa")
1425 (parallel [(const_int 3) (const_int 2)
1426 (const_int 1) (const_int 0)])))]
1427 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1429 [(set_attr "type" "vecstore")])
1431 (define_expand "vsx_st_elemrev_v8hi"
1432 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1434 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1435 (parallel [(const_int 7) (const_int 6)
1436 (const_int 5) (const_int 4)
1437 (const_int 3) (const_int 2)
1438 (const_int 1) (const_int 0)])))]
1439 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1441 if (!TARGET_P9_VECTOR)
1443 rtx mem_subreg, subreg, perm[16], pcv;
1444 rtx tmp = gen_reg_rtx (V8HImode);
1445 /* 2 is leftmost element in register */
1446 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1449 for (i = 0; i < 16; ++i)
1450 perm[i] = GEN_INT (reorder[i]);
1452 pcv = force_reg (V16QImode,
1453 gen_rtx_CONST_VECTOR (V16QImode,
1454 gen_rtvec_v (16, perm)));
1455 emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1],
1457 subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0);
1458 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0);
1459 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1464 (define_insn "*vsx_st_elemrev_v2di_internal"
1465 [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1467 (match_operand:V2DI 1 "vsx_register_operand" "wa")
1468 (parallel [(const_int 1) (const_int 0)])))]
1469 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1471 [(set_attr "type" "vecstore")])
1473 (define_insn "*vsx_st_elemrev_v8hi_internal"
1474 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1476 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1477 (parallel [(const_int 7) (const_int 6)
1478 (const_int 5) (const_int 4)
1479 (const_int 3) (const_int 2)
1480 (const_int 1) (const_int 0)])))]
1481 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1483 [(set_attr "type" "vecstore")])
1485 (define_expand "vsx_st_elemrev_v16qi"
1486 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1488 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1489 (parallel [(const_int 15) (const_int 14)
1490 (const_int 13) (const_int 12)
1491 (const_int 11) (const_int 10)
1492 (const_int 9) (const_int 8)
1493 (const_int 7) (const_int 6)
1494 (const_int 5) (const_int 4)
1495 (const_int 3) (const_int 2)
1496 (const_int 1) (const_int 0)])))]
1497 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1499 if (!TARGET_P9_VECTOR)
1501 rtx mem_subreg, subreg, perm[16], pcv;
1502 rtx tmp = gen_reg_rtx (V16QImode);
1503 /* 3 is leftmost element in register */
1504 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1507 for (i = 0; i < 16; ++i)
1508 perm[i] = GEN_INT (reorder[i]);
1510 pcv = force_reg (V16QImode,
1511 gen_rtx_CONST_VECTOR (V16QImode,
1512 gen_rtvec_v (16, perm)));
1513 emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1],
1515 subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0);
1516 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V16QImode, 0);
1517 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1522 (define_insn "*vsx_st_elemrev_v16qi_internal"
1523 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1525 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1526 (parallel [(const_int 15) (const_int 14)
1527 (const_int 13) (const_int 12)
1528 (const_int 11) (const_int 10)
1529 (const_int 9) (const_int 8)
1530 (const_int 7) (const_int 6)
1531 (const_int 5) (const_int 4)
1532 (const_int 3) (const_int 2)
1533 (const_int 1) (const_int 0)])))]
1534 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1536 [(set_attr "type" "vecstore")])
1539 ;; VSX vector floating point arithmetic instructions. The VSX scalar
1540 ;; instructions are now combined with the insn for the traditional floating
1542 (define_insn "*vsx_add<mode>3"
1543 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1544 (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1545 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1546 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1547 "xvadd<sd>p %x0,%x1,%x2"
1548 [(set_attr "type" "<VStype_simple>")])
1550 (define_insn "*vsx_sub<mode>3"
1551 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa>")
1552 (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1553 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1554 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1555 "xvsub<sd>p %x0,%x1,%x2"
1556 [(set_attr "type" "<VStype_simple>")])
1558 (define_insn "*vsx_mul<mode>3"
1559 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1560 (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1561 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1562 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1563 "xvmul<sd>p %x0,%x1,%x2"
1564 [(set_attr "type" "<VStype_simple>")])
1566 ; Emulate vector with scalar for vec_mul in V2DImode
1567 (define_insn_and_split "vsx_mul_v2di"
1568 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1569 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1570 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1572 "VECTOR_MEM_VSX_P (V2DImode)"
1574 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1577 rtx op0 = operands[0];
1578 rtx op1 = operands[1];
1579 rtx op2 = operands[2];
1580 rtx op3 = gen_reg_rtx (DImode);
1581 rtx op4 = gen_reg_rtx (DImode);
1582 rtx op5 = gen_reg_rtx (DImode);
1583 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1584 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1585 if (TARGET_POWERPC64)
1586 emit_insn (gen_muldi3 (op5, op3, op4));
1589 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1590 emit_move_insn (op5, ret);
1592 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1593 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1594 if (TARGET_POWERPC64)
1595 emit_insn (gen_muldi3 (op3, op3, op4));
1598 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1599 emit_move_insn (op3, ret);
1601 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1604 [(set_attr "type" "mul")])
1606 (define_insn "*vsx_div<mode>3"
1607 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1608 (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1609 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1610 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1611 "xvdiv<sd>p %x0,%x1,%x2"
1612 [(set_attr "type" "<VStype_div>")])
1614 ; Emulate vector with scalar for vec_div in V2DImode
1615 (define_insn_and_split "vsx_div_v2di"
1616 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1617 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1618 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1620 "VECTOR_MEM_VSX_P (V2DImode)"
1622 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1625 rtx op0 = operands[0];
1626 rtx op1 = operands[1];
1627 rtx op2 = operands[2];
1628 rtx op3 = gen_reg_rtx (DImode);
1629 rtx op4 = gen_reg_rtx (DImode);
1630 rtx op5 = gen_reg_rtx (DImode);
1631 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1632 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1633 if (TARGET_POWERPC64)
1634 emit_insn (gen_divdi3 (op5, op3, op4));
1637 rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1638 rtx target = emit_library_call_value (libfunc,
1639 op5, LCT_NORMAL, DImode,
1642 emit_move_insn (op5, target);
1644 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1645 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1646 if (TARGET_POWERPC64)
1647 emit_insn (gen_divdi3 (op3, op3, op4));
1650 rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1651 rtx target = emit_library_call_value (libfunc,
1652 op3, LCT_NORMAL, DImode,
1655 emit_move_insn (op3, target);
1657 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1660 [(set_attr "type" "div")])
1662 (define_insn_and_split "vsx_udiv_v2di"
1663 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1664 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1665 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1667 "VECTOR_MEM_VSX_P (V2DImode)"
1669 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1672 rtx op0 = operands[0];
1673 rtx op1 = operands[1];
1674 rtx op2 = operands[2];
1675 rtx op3 = gen_reg_rtx (DImode);
1676 rtx op4 = gen_reg_rtx (DImode);
1677 rtx op5 = gen_reg_rtx (DImode);
1678 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1679 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1680 if (TARGET_POWERPC64)
1681 emit_insn (gen_udivdi3 (op5, op3, op4));
1684 rtx libfunc = optab_libfunc (udiv_optab, DImode);
1685 rtx target = emit_library_call_value (libfunc,
1686 op5, LCT_NORMAL, DImode,
1689 emit_move_insn (op5, target);
1691 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1692 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1693 if (TARGET_POWERPC64)
1694 emit_insn (gen_udivdi3 (op3, op3, op4));
1697 rtx libfunc = optab_libfunc (udiv_optab, DImode);
1698 rtx target = emit_library_call_value (libfunc,
1699 op3, LCT_NORMAL, DImode,
1702 emit_move_insn (op3, target);
1704 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1707 [(set_attr "type" "div")])
1709 ;; *tdiv* instruction returning the FG flag
1710 (define_expand "vsx_tdiv<mode>3_fg"
1712 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1713 (match_operand:VSX_B 2 "vsx_register_operand")]
1715 (set (match_operand:SI 0 "gpc_reg_operand")
1716 (gt:SI (match_dup 3)
1718 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1720 operands[3] = gen_reg_rtx (CCFPmode);
1723 ;; *tdiv* instruction returning the FE flag
1724 (define_expand "vsx_tdiv<mode>3_fe"
1726 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1727 (match_operand:VSX_B 2 "vsx_register_operand")]
1729 (set (match_operand:SI 0 "gpc_reg_operand")
1730 (eq:SI (match_dup 3)
1732 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1734 operands[3] = gen_reg_rtx (CCFPmode);
1737 (define_insn "*vsx_tdiv<mode>3_internal"
1738 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x")
1739 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa")
1740 (match_operand:VSX_B 2 "vsx_register_operand" "wa")]
1742 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1743 "x<VSv>tdiv<sd>p %0,%x1,%x2"
1744 [(set_attr "type" "<VStype_simple>")])
1746 (define_insn "vsx_fre<mode>2"
1747 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1748 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
1750 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1752 [(set_attr "type" "<VStype_simple>")])
1754 (define_insn "*vsx_neg<mode>2"
1755 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1756 (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1757 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1758 "xvneg<sd>p %x0,%x1"
1759 [(set_attr "type" "<VStype_simple>")])
1761 (define_insn "*vsx_abs<mode>2"
1762 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1763 (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1764 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1765 "xvabs<sd>p %x0,%x1"
1766 [(set_attr "type" "<VStype_simple>")])
1768 (define_insn "vsx_nabs<mode>2"
1769 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1772 (match_operand:VSX_F 1 "vsx_register_operand" "wa"))))]
1773 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1774 "xvnabs<sd>p %x0,%x1"
1775 [(set_attr "type" "<VStype_simple>")])
1777 (define_insn "vsx_smax<mode>3"
1778 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1779 (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1780 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1781 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1782 "xvmax<sd>p %x0,%x1,%x2"
1783 [(set_attr "type" "<VStype_simple>")])
1785 (define_insn "*vsx_smin<mode>3"
1786 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1787 (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1788 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1789 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1790 "xvmin<sd>p %x0,%x1,%x2"
1791 [(set_attr "type" "<VStype_simple>")])
1793 (define_insn "*vsx_sqrt<mode>2"
1794 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1795 (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1796 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1797 "xvsqrt<sd>p %x0,%x1"
1798 [(set_attr "type" "<sd>sqrt")])
1800 (define_insn "*vsx_rsqrte<mode>2"
1801 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1802 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
1804 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1805 "xvrsqrte<sd>p %x0,%x1"
1806 [(set_attr "type" "<VStype_simple>")])
1808 ;; *tsqrt* returning the fg flag
1809 (define_expand "vsx_tsqrt<mode>2_fg"
1811 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1813 (set (match_operand:SI 0 "gpc_reg_operand")
1814 (gt:SI (match_dup 2)
1816 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1818 operands[2] = gen_reg_rtx (CCFPmode);
1821 ;; *tsqrt* returning the fe flag
1822 (define_expand "vsx_tsqrt<mode>2_fe"
1824 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1826 (set (match_operand:SI 0 "gpc_reg_operand")
1827 (eq:SI (match_dup 2)
1829 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1831 operands[2] = gen_reg_rtx (CCFPmode);
1834 (define_insn "*vsx_tsqrt<mode>2_internal"
1835 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x")
1836 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
1838 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1839 "x<VSv>tsqrt<sd>p %0,%x1"
1840 [(set_attr "type" "<VStype_simple>")])
1842 ;; Fused vector multiply/add instructions. Support the classical Altivec
1843 ;; versions of fma, which allows the target to be a separate register from the
1844 ;; 3 inputs. Under VSX, the target must be either the addend or the first
1847 (define_insn "*vsx_fmav4sf4"
1848 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,v")
1850 (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa,v")
1851 (match_operand:V4SF 2 "vsx_register_operand" "wa,0,v")
1852 (match_operand:V4SF 3 "vsx_register_operand" "0,wa,v")))]
1853 "VECTOR_UNIT_VSX_P (V4SFmode)"
1855 xvmaddasp %x0,%x1,%x2
1856 xvmaddmsp %x0,%x1,%x3
1857 vmaddfp %0,%1,%2,%3"
1858 [(set_attr "type" "vecfloat")])
1860 (define_insn "*vsx_fmav2df4"
1861 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa")
1863 (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa")
1864 (match_operand:V2DF 2 "vsx_register_operand" "wa,0")
1865 (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))]
1866 "VECTOR_UNIT_VSX_P (V2DFmode)"
1868 xvmaddadp %x0,%x1,%x2
1869 xvmaddmdp %x0,%x1,%x3"
1870 [(set_attr "type" "vecdouble")])
1872 (define_insn "*vsx_fms<mode>4"
1873 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa")
1875 (match_operand:VSX_F 1 "vsx_register_operand" "%wa,wa")
1876 (match_operand:VSX_F 2 "vsx_register_operand" "wa,0")
1878 (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))]
1879 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1881 xvmsuba<sd>p %x0,%x1,%x2
1882 xvmsubm<sd>p %x0,%x1,%x3"
1883 [(set_attr "type" "<VStype_mul>")])
1885 (define_insn "*vsx_nfma<mode>4"
1886 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa")
1889 (match_operand:VSX_F 1 "vsx_register_operand" "wa,wa")
1890 (match_operand:VSX_F 2 "vsx_register_operand" "wa,0")
1891 (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))]
1892 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1894 xvnmadda<sd>p %x0,%x1,%x2
1895 xvnmaddm<sd>p %x0,%x1,%x3"
1896 [(set_attr "type" "<VStype_mul>")])
1898 (define_insn "*vsx_nfmsv4sf4"
1899 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,v")
1902 (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa,v")
1903 (match_operand:V4SF 2 "vsx_register_operand" "wa,0,v")
1905 (match_operand:V4SF 3 "vsx_register_operand" "0,wa,v")))))]
1906 "VECTOR_UNIT_VSX_P (V4SFmode)"
1908 xvnmsubasp %x0,%x1,%x2
1909 xvnmsubmsp %x0,%x1,%x3
1910 vnmsubfp %0,%1,%2,%3"
1911 [(set_attr "type" "vecfloat")])
1913 (define_insn "*vsx_nfmsv2df4"
1914 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa")
1917 (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa")
1918 (match_operand:V2DF 2 "vsx_register_operand" "wa,0")
1920 (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))))]
1921 "VECTOR_UNIT_VSX_P (V2DFmode)"
1923 xvnmsubadp %x0,%x1,%x2
1924 xvnmsubmdp %x0,%x1,%x3"
1925 [(set_attr "type" "vecdouble")])
1927 ;; Vector conditional expressions (no scalar version for these instructions)
1928 (define_insn "vsx_eq<mode>"
1929 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1930 (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1931 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1932 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1933 "xvcmpeq<sd>p %x0,%x1,%x2"
1934 [(set_attr "type" "<VStype_simple>")])
1936 (define_insn "vsx_gt<mode>"
1937 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1938 (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1939 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1940 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1941 "xvcmpgt<sd>p %x0,%x1,%x2"
1942 [(set_attr "type" "<VStype_simple>")])
1944 (define_insn "*vsx_ge<mode>"
1945 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1946 (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1947 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1948 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1949 "xvcmpge<sd>p %x0,%x1,%x2"
1950 [(set_attr "type" "<VStype_simple>")])
1952 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
1953 ;; indicate a combined status
1954 (define_insn "*vsx_eq_<mode>_p"
1955 [(set (reg:CC CR6_REGNO)
1957 [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1958 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
1960 (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1961 (eq:VSX_F (match_dup 1)
1963 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1964 "xvcmpeq<sd>p. %x0,%x1,%x2"
1965 [(set_attr "type" "<VStype_simple>")])
1967 (define_insn "*vsx_gt_<mode>_p"
1968 [(set (reg:CC CR6_REGNO)
1970 [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1971 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
1973 (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1974 (gt:VSX_F (match_dup 1)
1976 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1977 "xvcmpgt<sd>p. %x0,%x1,%x2"
1978 [(set_attr "type" "<VStype_simple>")])
1980 (define_insn "*vsx_ge_<mode>_p"
1981 [(set (reg:CC CR6_REGNO)
1983 [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1984 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
1986 (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1987 (ge:VSX_F (match_dup 1)
1989 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1990 "xvcmpge<sd>p. %x0,%x1,%x2"
1991 [(set_attr "type" "<VStype_simple>")])
1994 (define_insn "*vsx_xxsel<mode>"
1995 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
1997 (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
1998 (match_operand:VSX_L 4 "zero_constant" ""))
1999 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
2000 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
2001 "VECTOR_MEM_VSX_P (<MODE>mode)"
2002 "xxsel %x0,%x3,%x2,%x1"
2003 [(set_attr "type" "vecmove")
2004 (set_attr "isa" "<VSisa>")])
2006 (define_insn "*vsx_xxsel<mode>_uns"
2007 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
2009 (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
2010 (match_operand:VSX_L 4 "zero_constant" ""))
2011 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
2012 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
2013 "VECTOR_MEM_VSX_P (<MODE>mode)"
2014 "xxsel %x0,%x3,%x2,%x1"
2015 [(set_attr "type" "vecmove")
2016 (set_attr "isa" "<VSisa>")])
2019 (define_insn "vsx_copysign<mode>3"
2020 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2022 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
2023 (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
2025 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2026 "xvcpsgn<sd>p %x0,%x2,%x1"
2027 [(set_attr "type" "<VStype_simple>")])
2029 ;; For the conversions, limit the register class for the integer value to be
2030 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.
2031 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion
2032 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
2033 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md
2034 ;; in allowing virtual registers.
2035 (define_insn "vsx_float<VSi><mode>2"
2036 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa")
2037 (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))]
2038 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2039 "xvcvsx<VSc><sd>p %x0,%x1"
2040 [(set_attr "type" "<VStype_simple>")])
2042 (define_insn "vsx_floatuns<VSi><mode>2"
2043 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa")
2044 (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))]
2045 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2046 "xvcvux<VSc><sd>p %x0,%x1"
2047 [(set_attr "type" "<VStype_simple>")])
2049 (define_insn "vsx_fix_trunc<mode><VSi>2"
2050 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa")
2051 (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))]
2052 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2053 "x<VSv>cv<sd>psx<VSc>s %x0,%x1"
2054 [(set_attr "type" "<VStype_simple>")])
2056 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
2057 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa")
2058 (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))]
2059 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2060 "x<VSv>cv<sd>pux<VSc>s %x0,%x1"
2061 [(set_attr "type" "<VStype_simple>")])
2063 ;; Math rounding functions
2064 (define_insn "vsx_x<VSv>r<sd>pi"
2065 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2066 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2067 UNSPEC_VSX_ROUND_I))]
2068 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2069 "x<VSv>r<sd>pi %x0,%x1"
2070 [(set_attr "type" "<VStype_simple>")])
2072 (define_insn "vsx_x<VSv>r<sd>pic"
2073 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2074 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2075 UNSPEC_VSX_ROUND_IC))]
2076 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2077 "x<VSv>r<sd>pic %x0,%x1"
2078 [(set_attr "type" "<VStype_simple>")])
2080 (define_insn "vsx_btrunc<mode>2"
2081 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2082 (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
2083 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2084 "xvr<sd>piz %x0,%x1"
2085 [(set_attr "type" "<VStype_simple>")])
2087 (define_insn "*vsx_b2trunc<mode>2"
2088 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2089 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2091 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2092 "x<VSv>r<sd>piz %x0,%x1"
2093 [(set_attr "type" "<VStype_simple>")])
2095 (define_insn "vsx_floor<mode>2"
2096 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2097 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
2099 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2100 "xvr<sd>pim %x0,%x1"
2101 [(set_attr "type" "<VStype_simple>")])
2103 (define_insn "vsx_ceil<mode>2"
2104 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2105 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
2107 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2108 "xvr<sd>pip %x0,%x1"
2109 [(set_attr "type" "<VStype_simple>")])
2112 ;; VSX convert to/from double vector
2114 ;; Convert between single and double precision
2115 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
2116 ;; scalar single precision instructions internally use the double format.
2117 ;; Prefer the altivec registers, since we likely will need to do a vperm
2118 (define_insn "vsx_xscvdpsp"
2119 [(set (match_operand:V4SF 0 "vsx_register_operand" "=f,?wa")
2120 (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "f,wa")]
2121 UNSPEC_VSX_CVSPDP))]
2122 "VECTOR_UNIT_VSX_P (DFmode)"
2124 [(set_attr "type" "fp")])
2126 (define_insn "vsx_xvcvspdp_be"
2127 [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa")
2129 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2130 (parallel [(const_int 0) (const_int 2)]))))]
2131 "VECTOR_UNIT_VSX_P (V4SFmode) && BYTES_BIG_ENDIAN"
2133 [(set_attr "type" "vecdouble")])
2135 (define_insn "vsx_xvcvspdp_le"
2136 [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa")
2138 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2139 (parallel [(const_int 1) (const_int 3)]))))]
2140 "VECTOR_UNIT_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
2142 [(set_attr "type" "vecdouble")])
2144 (define_expand "vsx_xvcvspdp"
2145 [(match_operand:V2DF 0 "vsx_register_operand")
2146 (match_operand:V4SF 1 "vsx_register_operand")]
2147 "VECTOR_UNIT_VSX_P (V4SFmode)"
2149 if (BYTES_BIG_ENDIAN)
2150 emit_insn (gen_vsx_xvcvspdp_be (operands[0], operands[1]));
2152 emit_insn (gen_vsx_xvcvspdp_le (operands[0], operands[1]));
2156 (define_insn "vsx_xvcvdpsp"
2157 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,?wa")
2158 (unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "v,wa")]
2159 UNSPEC_VSX_CVSPDP))]
2160 "VECTOR_UNIT_VSX_P (V2DFmode)"
2162 [(set_attr "type" "vecdouble")])
2164 ;; xscvspdp, represent the scalar SF type as V4SF
2165 (define_insn "vsx_xscvspdp"
2166 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2167 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2168 UNSPEC_VSX_CVSPDP))]
2169 "VECTOR_UNIT_VSX_P (V4SFmode)"
2171 [(set_attr "type" "fp")])
2173 ;; Same as vsx_xscvspdp, but use SF as the type
2174 (define_insn "vsx_xscvspdp_scalar2"
2175 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2176 (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2177 UNSPEC_VSX_CVSPDP))]
2178 "VECTOR_UNIT_VSX_P (V4SFmode)"
2180 [(set_attr "type" "fp")])
2182 ;; Generate xvcvhpsp instruction
2183 (define_insn "vsx_xvcvhpsp"
2184 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2185 (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")]
2186 UNSPEC_VSX_CVHPSP))]
2189 [(set_attr "type" "vecfloat")])
2191 ;; Generate xvcvsphp
2192 (define_insn "vsx_xvcvsphp"
2193 [(set (match_operand:V4SI 0 "register_operand" "=wa")
2194 (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2195 UNSPEC_VSX_XVCVSPHP))]
2198 [(set_attr "type" "vecfloat")])
2200 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
2201 ;; format of scalars is actually DF.
2202 (define_insn "vsx_xscvdpsp_scalar"
2203 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2204 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2205 UNSPEC_VSX_CVSPDP))]
2206 "VECTOR_UNIT_VSX_P (V4SFmode)"
2208 [(set_attr "type" "fp")])
2210 ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
2211 (define_insn "vsx_xscvdpspn"
2212 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2213 (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wa")]
2214 UNSPEC_VSX_CVDPSPN))]
2217 [(set_attr "type" "fp")])
2219 (define_insn "vsx_xscvspdpn"
2220 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2221 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2222 UNSPEC_VSX_CVSPDPN))]
2225 [(set_attr "type" "fp")])
2227 (define_insn "vsx_xscvdpspn_scalar"
2228 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2229 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2230 UNSPEC_VSX_CVDPSPN))]
2233 [(set_attr "type" "fp")])
2235 ;; Used by direct move to move a SFmode value from GPR to VSX register
2236 (define_insn "vsx_xscvspdpn_directmove"
2237 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2238 (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2239 UNSPEC_VSX_CVSPDPN))]
2242 [(set_attr "type" "fp")])
2244 ;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
2246 (define_insn "vsx_xvcv<su>xwsp"
2247 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2248 (any_float:V4SF (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
2249 "VECTOR_UNIT_VSX_P (V4SFmode)"
2250 "xvcv<su>xwsp %x0,%x1"
2251 [(set_attr "type" "vecfloat")])
2253 (define_insn "vsx_xvcv<su>xddp"
2254 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2255 (any_float:V2DF (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
2256 "VECTOR_UNIT_VSX_P (V2DFmode)"
2257 "xvcv<su>xddp %x0,%x1"
2258 [(set_attr "type" "vecdouble")])
2260 (define_insn "vsx_xvcvsp<su>xws"
2261 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2262 (any_fix:V4SI (match_operand:V4SF 1 "vsx_register_operand" "wa")))]
2263 "VECTOR_UNIT_VSX_P (V4SFmode)"
2264 "xvcvsp<su>xws %x0,%x1"
2265 [(set_attr "type" "vecfloat")])
2267 (define_insn "vsx_xvcvdp<su>xds"
2268 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2269 (any_fix:V2DI (match_operand:V2DF 1 "vsx_register_operand" "wa")))]
2270 "VECTOR_UNIT_VSX_P (V2DFmode)"
2271 "xvcvdp<su>xds %x0,%x1"
2272 [(set_attr "type" "vecdouble")])
2274 (define_expand "vsx_xvcvsxddp_scale"
2275 [(match_operand:V2DF 0 "vsx_register_operand")
2276 (match_operand:V2DI 1 "vsx_register_operand")
2277 (match_operand:QI 2 "immediate_operand")]
2278 "VECTOR_UNIT_VSX_P (V2DFmode)"
2280 rtx op0 = operands[0];
2281 rtx op1 = operands[1];
2282 int scale = INTVAL(operands[2]);
2283 emit_insn (gen_vsx_xvcvsxddp (op0, op1));
2285 rs6000_scale_v2df (op0, op0, -scale);
2289 (define_expand "vsx_xvcvuxddp_scale"
2290 [(match_operand:V2DF 0 "vsx_register_operand")
2291 (match_operand:V2DI 1 "vsx_register_operand")
2292 (match_operand:QI 2 "immediate_operand")]
2293 "VECTOR_UNIT_VSX_P (V2DFmode)"
2295 rtx op0 = operands[0];
2296 rtx op1 = operands[1];
2297 int scale = INTVAL(operands[2]);
2298 emit_insn (gen_vsx_xvcvuxddp (op0, op1));
2300 rs6000_scale_v2df (op0, op0, -scale);
2304 (define_expand "vsx_xvcvdpsxds_scale"
2305 [(match_operand:V2DI 0 "vsx_register_operand")
2306 (match_operand:V2DF 1 "vsx_register_operand")
2307 (match_operand:QI 2 "immediate_operand")]
2308 "VECTOR_UNIT_VSX_P (V2DFmode)"
2310 rtx op0 = operands[0];
2311 rtx op1 = operands[1];
2313 int scale = INTVAL (operands[2]);
2318 tmp = gen_reg_rtx (V2DFmode);
2319 rs6000_scale_v2df (tmp, op1, scale);
2321 emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
2325 ;; convert vector of 64-bit floating point numbers to vector of
2326 ;; 64-bit unsigned integer
2327 (define_expand "vsx_xvcvdpuxds_scale"
2328 [(match_operand:V2DI 0 "vsx_register_operand")
2329 (match_operand:V2DF 1 "vsx_register_operand")
2330 (match_operand:QI 2 "immediate_operand")]
2331 "VECTOR_UNIT_VSX_P (V2DFmode)"
2333 rtx op0 = operands[0];
2334 rtx op1 = operands[1];
2336 int scale = INTVAL (operands[2]);
2341 tmp = gen_reg_rtx (V2DFmode);
2342 rs6000_scale_v2df (tmp, op1, scale);
2344 emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
2348 ;; Convert from 64-bit to 32-bit types
2349 ;; Note, favor the Altivec registers since the usual use of these instructions
2350 ;; is in vector converts and we need to use the Altivec vperm instruction.
2352 (define_insn "vsx_xvcvdpsxws"
2353 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2354 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")]
2355 UNSPEC_VSX_CVDPSXWS))]
2356 "VECTOR_UNIT_VSX_P (V2DFmode)"
2357 "xvcvdpsxws %x0,%x1"
2358 [(set_attr "type" "vecdouble")])
2360 (define_insn "vsx_xvcvdpuxws"
2361 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2362 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")]
2363 UNSPEC_VSX_CVDPUXWS))]
2364 "VECTOR_UNIT_VSX_P (V2DFmode)"
2365 "xvcvdpuxws %x0,%x1"
2366 [(set_attr "type" "vecdouble")])
2368 (define_insn "vsx_xvcvsxdsp"
2369 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2370 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2371 UNSPEC_VSX_CVSXDSP))]
2372 "VECTOR_UNIT_VSX_P (V2DFmode)"
2374 [(set_attr "type" "vecfloat")])
2376 (define_insn "vsx_xvcvuxdsp"
2377 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2378 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2379 UNSPEC_VSX_CVUXDSP))]
2380 "VECTOR_UNIT_VSX_P (V2DFmode)"
2382 [(set_attr "type" "vecdouble")])
2384 ;; Convert vector of 32-bit signed/unsigned integers to vector of
2385 ;; 64-bit floating point numbers.
2386 (define_insn "vsx_xvcv<su>xwdp_be"
2387 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2389 (vec_select:V2SI (match_operand:V4SI 1 "vsx_register_operand" "wa")
2390 (parallel [(const_int 0) (const_int 2)]))))]
2391 "VECTOR_UNIT_VSX_P (V2DFmode) && BYTES_BIG_ENDIAN"
2392 "xvcv<su>xwdp %x0,%x1"
2393 [(set_attr "type" "vecdouble")])
2395 (define_insn "vsx_xvcv<su>xwdp_le"
2396 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2398 (vec_select:V2SI (match_operand:V4SI 1 "vsx_register_operand" "wa")
2399 (parallel [(const_int 1) (const_int 3)]))))]
2400 "VECTOR_UNIT_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
2401 "xvcv<su>xwdp %x0,%x1"
2402 [(set_attr "type" "vecdouble")])
2404 (define_expand "vsx_xvcv<su>xwdp"
2405 [(match_operand:V2DF 0 "vsx_register_operand")
2406 (match_operand:V4SI 1 "vsx_register_operand")
2408 "VECTOR_UNIT_VSX_P (V2DFmode)"
2410 if (BYTES_BIG_ENDIAN)
2411 emit_insn (gen_vsx_xvcv<su>xwdp_be (operands[0], operands[1]));
2413 emit_insn (gen_vsx_xvcv<su>xwdp_le (operands[0], operands[1]));
2417 (define_insn "vsx_xvcvsxwdp_df"
2418 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2419 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2420 UNSPEC_VSX_CVSXWDP))]
2423 [(set_attr "type" "vecdouble")])
2425 (define_insn "vsx_xvcvuxwdp_df"
2426 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2427 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2428 UNSPEC_VSX_CVUXWDP))]
2431 [(set_attr "type" "vecdouble")])
2433 ;; Convert vector of 32-bit floating point numbers to vector of
2434 ;; 64-bit signed/unsigned integers.
2435 (define_insn "vsx_xvcvsp<su>xds_be"
2436 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2438 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2439 (parallel [(const_int 0) (const_int 2)]))))]
2440 "VECTOR_UNIT_VSX_P (V2DFmode) && BYTES_BIG_ENDIAN"
2441 "xvcvsp<su>xds %x0,%x1"
2442 [(set_attr "type" "vecdouble")])
2444 (define_insn "vsx_xvcvsp<su>xds_le"
2445 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2447 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2448 (parallel [(const_int 1) (const_int 3)]))))]
2449 "VECTOR_UNIT_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
2450 "xvcvsp<su>xds %x0,%x1"
2451 [(set_attr "type" "vecdouble")])
2453 (define_expand "vsx_xvcvsp<su>xds"
2454 [(match_operand:V2DI 0 "vsx_register_operand")
2455 (match_operand:V4SF 1 "vsx_register_operand")
2457 "VECTOR_UNIT_VSX_P (V2DFmode)"
2459 if (BYTES_BIG_ENDIAN)
2460 emit_insn (gen_vsx_xvcvsp<su>xds_be (operands[0], operands[1]));
2462 emit_insn (gen_vsx_xvcvsp<su>xds_le (operands[0], operands[1]));
2466 ;; Generate float2 double
2467 ;; convert two double to float
2468 (define_expand "float2_v2df"
2469 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2470 (use (match_operand:V2DF 1 "register_operand" "wa"))
2471 (use (match_operand:V2DF 2 "register_operand" "wa"))]
2472 "VECTOR_UNIT_VSX_P (V4SFmode)"
2474 rtx rtx_src1, rtx_src2, rtx_dst;
2476 rtx_dst = operands[0];
2477 rtx_src1 = operands[1];
2478 rtx_src2 = operands[2];
2480 rs6000_generate_float2_double_code (rtx_dst, rtx_src1, rtx_src2);
2485 ;; convert two long long signed ints to float
2486 (define_expand "float2_v2di"
2487 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2488 (use (match_operand:V2DI 1 "register_operand" "wa"))
2489 (use (match_operand:V2DI 2 "register_operand" "wa"))]
2490 "VECTOR_UNIT_VSX_P (V4SFmode)"
2492 rtx rtx_src1, rtx_src2, rtx_dst;
2494 rtx_dst = operands[0];
2495 rtx_src1 = operands[1];
2496 rtx_src2 = operands[2];
2498 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2502 ;; Generate uns_float2
2503 ;; convert two long long unsigned ints to float
2504 (define_expand "uns_float2_v2di"
2505 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2506 (use (match_operand:V2DI 1 "register_operand" "wa"))
2507 (use (match_operand:V2DI 2 "register_operand" "wa"))]
2508 "VECTOR_UNIT_VSX_P (V4SFmode)"
2510 rtx rtx_src1, rtx_src2, rtx_dst;
2512 rtx_dst = operands[0];
2513 rtx_src1 = operands[1];
2514 rtx_src2 = operands[2];
2516 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2521 ;; convert double or long long signed to float
2522 ;; (Only even words are valid, BE numbering)
2523 (define_expand "floate<mode>"
2524 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2525 (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2526 "VECTOR_UNIT_VSX_P (V4SFmode)"
2528 if (BYTES_BIG_ENDIAN)
2530 /* Shift left one word to put even word correct location */
2532 rtx rtx_val = GEN_INT (4);
2534 rtx_tmp = gen_reg_rtx (V4SFmode);
2535 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2536 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2537 rtx_tmp, rtx_tmp, rtx_val));
2540 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2545 ;; Generate uns_floate
2546 ;; convert long long unsigned to float
2547 ;; (Only even words are valid, BE numbering)
2548 (define_expand "unsfloatev2di"
2549 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2550 (use (match_operand:V2DI 1 "register_operand" "wa"))]
2551 "VECTOR_UNIT_VSX_P (V4SFmode)"
2553 if (BYTES_BIG_ENDIAN)
2555 /* Shift left one word to put even word correct location */
2557 rtx rtx_val = GEN_INT (4);
2559 rtx_tmp = gen_reg_rtx (V4SFmode);
2560 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2561 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2562 rtx_tmp, rtx_tmp, rtx_val));
2565 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2571 ;; convert double or long long signed to float
2572 ;; Only odd words are valid, BE numbering)
2573 (define_expand "floato<mode>"
2574 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2575 (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2576 "VECTOR_UNIT_VSX_P (V4SFmode)"
2578 if (BYTES_BIG_ENDIAN)
2579 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2582 /* Shift left one word to put odd word correct location */
2584 rtx rtx_val = GEN_INT (4);
2586 rtx_tmp = gen_reg_rtx (V4SFmode);
2587 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2588 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2589 rtx_tmp, rtx_tmp, rtx_val));
2594 ;; Generate uns_floato
2595 ;; convert long long unsigned to float
2596 ;; (Only odd words are valid, BE numbering)
2597 (define_expand "unsfloatov2di"
2598 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2599 (use (match_operand:V2DI 1 "register_operand" "wa"))]
2600 "VECTOR_UNIT_VSX_P (V4SFmode)"
2602 if (BYTES_BIG_ENDIAN)
2603 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2606 /* Shift left one word to put odd word correct location */
2608 rtx rtx_val = GEN_INT (4);
2610 rtx_tmp = gen_reg_rtx (V4SFmode);
2611 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2612 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2613 rtx_tmp, rtx_tmp, rtx_val));
2618 ;; Generate vsigned2
2619 ;; convert two double float vectors to a vector of single precision ints
2620 (define_expand "vsigned2_v2df"
2621 [(match_operand:V4SI 0 "register_operand" "=wa")
2622 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "wa")
2623 (match_operand:V2DF 2 "register_operand" "wa")]
2624 UNSPEC_VSX_VSIGNED2)]
2627 rtx rtx_src1, rtx_src2, rtx_dst;
2628 bool signed_convert=true;
2630 rtx_dst = operands[0];
2631 rtx_src1 = operands[1];
2632 rtx_src2 = operands[2];
2634 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2638 ;; Generate vsignedo_v2df
2639 ;; signed double float to int convert odd word
2640 (define_expand "vsignedo_v2df"
2641 [(set (match_operand:V4SI 0 "register_operand" "=wa")
2642 (match_operand:V2DF 1 "register_operand" "wa"))]
2645 if (BYTES_BIG_ENDIAN)
2648 rtx rtx_val = GEN_INT (12);
2649 rtx_tmp = gen_reg_rtx (V4SImode);
2651 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2653 /* Big endian word numbering for words in operand is 0 1 2 3.
2654 take (operand[1] operand[1]) and shift left one word
2655 0 1 2 3 0 1 2 3 => 1 2 3 0
2656 Words 1 and 3 are now are now where they need to be for result. */
2658 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2662 /* Little endian word numbering for operand is 3 2 1 0.
2663 Result words 3 and 1 are where they need to be. */
2664 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2668 [(set_attr "type" "veccomplex")])
2670 ;; Generate vsignede_v2df
2671 ;; signed double float to int even word
2672 (define_expand "vsignede_v2df"
2673 [(set (match_operand:V4SI 0 "register_operand" "=v")
2674 (match_operand:V2DF 1 "register_operand" "v"))]
2677 if (BYTES_BIG_ENDIAN)
2678 /* Big endian word numbering for words in operand is 0 1
2679 Result words 0 is where they need to be. */
2680 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2685 rtx rtx_val = GEN_INT (12);
2686 rtx_tmp = gen_reg_rtx (V4SImode);
2688 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2690 /* Little endian word numbering for operand is 3 2 1 0.
2691 take (operand[1] operand[1]) and shift left three words
2692 0 1 2 3 0 1 2 3 => 3 0 1 2
2693 Words 0 and 2 are now where they need to be for the result. */
2694 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2699 [(set_attr "type" "veccomplex")])
2701 ;; Generate unsigned2
2702 ;; convert two double float vectors to a vector of single precision
2704 (define_expand "vunsigned2_v2df"
2705 [(match_operand:V4SI 0 "register_operand" "=v")
2706 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "v")
2707 (match_operand:V2DF 2 "register_operand" "v")]
2708 UNSPEC_VSX_VSIGNED2)]
2711 rtx rtx_src1, rtx_src2, rtx_dst;
2712 bool signed_convert=false;
2714 rtx_dst = operands[0];
2715 rtx_src1 = operands[1];
2716 rtx_src2 = operands[2];
2718 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2722 ;; Generate vunsignedo_v2df
2723 ;; unsigned double float to int convert odd word
2724 (define_expand "vunsignedo_v2df"
2725 [(set (match_operand:V4SI 0 "register_operand" "=v")
2726 (match_operand:V2DF 1 "register_operand" "v"))]
2729 if (BYTES_BIG_ENDIAN)
2732 rtx rtx_val = GEN_INT (12);
2733 rtx_tmp = gen_reg_rtx (V4SImode);
2735 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2737 /* Big endian word numbering for words in operand is 0 1 2 3.
2738 take (operand[1] operand[1]) and shift left one word
2739 0 1 2 3 0 1 2 3 => 1 2 3 0
2740 Words 1 and 3 are now are now where they need to be for result. */
2742 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2746 /* Little endian word numbering for operand is 3 2 1 0.
2747 Result words 3 and 1 are where they need to be. */
2748 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2752 [(set_attr "type" "veccomplex")])
2754 ;; Generate vunsignede_v2df
2755 ;; unsigned double float to int even word
2756 (define_expand "vunsignede_v2df"
2757 [(set (match_operand:V4SI 0 "register_operand" "=v")
2758 (match_operand:V2DF 1 "register_operand" "v"))]
2761 if (BYTES_BIG_ENDIAN)
2762 /* Big endian word numbering for words in operand is 0 1
2763 Result words 0 is where they need to be. */
2764 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2769 rtx rtx_val = GEN_INT (12);
2770 rtx_tmp = gen_reg_rtx (V4SImode);
2772 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2774 /* Little endian word numbering for operand is 3 2 1 0.
2775 take (operand[1] operand[1]) and shift left three words
2776 0 1 2 3 0 1 2 3 => 3 0 1 2
2777 Words 0 and 2 are now where they need to be for the result. */
2778 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2783 [(set_attr "type" "veccomplex")])
2785 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
2786 ;; since the xvrdpiz instruction does not truncate the value if the floating
2787 ;; point value is < LONG_MIN or > LONG_MAX.
2788 (define_insn "*vsx_float_fix_v2df2"
2789 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,?wa")
2792 (match_operand:V2DF 1 "vsx_register_operand" "wa,?wa"))))]
2794 && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
2795 && !flag_trapping_math && TARGET_FRIZ"
2797 [(set_attr "type" "vecdouble")])
2800 ;; Permute operations
2802 ;; Build a V2DF/V2DI vector from two scalars
2803 (define_insn "vsx_concat_<mode>"
2804 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
2806 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")
2807 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))]
2808 "VECTOR_MEM_VSX_P (<MODE>mode)"
2810 if (which_alternative == 0)
2811 return (BYTES_BIG_ENDIAN
2812 ? "xxpermdi %x0,%x1,%x2,0"
2813 : "xxpermdi %x0,%x2,%x1,0");
2815 else if (which_alternative == 1)
2816 return (BYTES_BIG_ENDIAN
2817 ? "mtvsrdd %x0,%1,%2"
2818 : "mtvsrdd %x0,%2,%1");
2823 [(set_attr "type" "vecperm")])
2825 ;; Combiner patterns to allow creating XXPERMDI's to access either double
2826 ;; word element in a vector register.
2827 (define_insn "*vsx_concat_<mode>_1"
2828 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2830 (vec_select:<VS_scalar>
2831 (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2832 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2833 (match_operand:<VS_scalar> 3 "gpc_reg_operand" "wa")))]
2834 "VECTOR_MEM_VSX_P (<MODE>mode)"
2836 HOST_WIDE_INT dword = INTVAL (operands[2]);
2837 if (BYTES_BIG_ENDIAN)
2839 operands[4] = GEN_INT (2*dword);
2840 return "xxpermdi %x0,%x1,%x3,%4";
2844 operands[4] = GEN_INT (!dword);
2845 return "xxpermdi %x0,%x3,%x1,%4";
2848 [(set_attr "type" "vecperm")])
2850 (define_insn "*vsx_concat_<mode>_2"
2851 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2853 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa")
2854 (vec_select:<VS_scalar>
2855 (match_operand:VSX_D 2 "gpc_reg_operand" "wa")
2856 (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))]
2857 "VECTOR_MEM_VSX_P (<MODE>mode)"
2859 HOST_WIDE_INT dword = INTVAL (operands[3]);
2860 if (BYTES_BIG_ENDIAN)
2862 operands[4] = GEN_INT (dword);
2863 return "xxpermdi %x0,%x1,%x2,%4";
2867 operands[4] = GEN_INT (2 * !dword);
2868 return "xxpermdi %x0,%x2,%x1,%4";
2871 [(set_attr "type" "vecperm")])
2873 (define_insn "*vsx_concat_<mode>_3"
2874 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2876 (vec_select:<VS_scalar>
2877 (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2878 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2879 (vec_select:<VS_scalar>
2880 (match_operand:VSX_D 3 "gpc_reg_operand" "wa")
2881 (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))]
2882 "VECTOR_MEM_VSX_P (<MODE>mode)"
2884 HOST_WIDE_INT dword1 = INTVAL (operands[2]);
2885 HOST_WIDE_INT dword2 = INTVAL (operands[4]);
2886 if (BYTES_BIG_ENDIAN)
2888 operands[5] = GEN_INT ((2 * dword1) + dword2);
2889 return "xxpermdi %x0,%x1,%x3,%5";
2893 operands[5] = GEN_INT ((2 * !dword2) + !dword1);
2894 return "xxpermdi %x0,%x3,%x1,%5";
2897 [(set_attr "type" "vecperm")])
2899 ;; Special purpose concat using xxpermdi to glue two single precision values
2900 ;; together, relying on the fact that internally scalar floats are represented
2901 ;; as doubles. This is used to initialize a V4SF vector with 4 floats
2902 (define_insn "vsx_concat_v2sf"
2903 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2905 [(match_operand:SF 1 "vsx_register_operand" "wa")
2906 (match_operand:SF 2 "vsx_register_operand" "wa")]
2907 UNSPEC_VSX_CONCAT))]
2908 "VECTOR_MEM_VSX_P (V2DFmode)"
2910 if (BYTES_BIG_ENDIAN)
2911 return "xxpermdi %x0,%x1,%x2,0";
2913 return "xxpermdi %x0,%x2,%x1,0";
2915 [(set_attr "type" "vecperm")])
2917 ;; Concatenate 4 SImode elements into a V4SImode reg.
2918 (define_expand "vsx_init_v4si"
2919 [(use (match_operand:V4SI 0 "gpc_reg_operand"))
2920 (use (match_operand:SI 1 "gpc_reg_operand"))
2921 (use (match_operand:SI 2 "gpc_reg_operand"))
2922 (use (match_operand:SI 3 "gpc_reg_operand"))
2923 (use (match_operand:SI 4 "gpc_reg_operand"))]
2924 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
2926 rtx a = gen_reg_rtx (DImode);
2927 rtx b = gen_reg_rtx (DImode);
2928 rtx c = gen_reg_rtx (DImode);
2929 rtx d = gen_reg_rtx (DImode);
2930 emit_insn (gen_zero_extendsidi2 (a, operands[1]));
2931 emit_insn (gen_zero_extendsidi2 (b, operands[2]));
2932 emit_insn (gen_zero_extendsidi2 (c, operands[3]));
2933 emit_insn (gen_zero_extendsidi2 (d, operands[4]));
2934 if (!BYTES_BIG_ENDIAN)
2940 rtx aa = gen_reg_rtx (DImode);
2941 rtx ab = gen_reg_rtx (DImode);
2942 rtx cc = gen_reg_rtx (DImode);
2943 rtx cd = gen_reg_rtx (DImode);
2944 emit_insn (gen_ashldi3 (aa, a, GEN_INT (32)));
2945 emit_insn (gen_ashldi3 (cc, c, GEN_INT (32)));
2946 emit_insn (gen_iordi3 (ab, aa, b));
2947 emit_insn (gen_iordi3 (cd, cc, d));
2949 rtx abcd = gen_reg_rtx (V2DImode);
2950 emit_insn (gen_vsx_concat_v2di (abcd, ab, cd));
2951 emit_move_insn (operands[0], gen_lowpart (V4SImode, abcd));
2955 ;; xxpermdi for little endian loads and stores. We need several of
2956 ;; these since the form of the PARALLEL differs by mode.
2957 (define_insn "*vsx_xxpermdi2_le_<mode>"
2958 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2960 (match_operand:VSX_D 1 "vsx_register_operand" "wa")
2961 (parallel [(const_int 1) (const_int 0)])))]
2962 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
2963 "xxpermdi %x0,%x1,%x1,2"
2964 [(set_attr "type" "vecperm")])
2966 (define_insn "xxswapd_v16qi"
2967 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
2969 (match_operand:V16QI 1 "vsx_register_operand" "wa")
2970 (parallel [(const_int 8) (const_int 9)
2971 (const_int 10) (const_int 11)
2972 (const_int 12) (const_int 13)
2973 (const_int 14) (const_int 15)
2974 (const_int 0) (const_int 1)
2975 (const_int 2) (const_int 3)
2976 (const_int 4) (const_int 5)
2977 (const_int 6) (const_int 7)])))]
2979 ;; AIX does not support the extended mnemonic xxswapd. Use the basic
2980 ;; mnemonic xxpermdi instead.
2981 "xxpermdi %x0,%x1,%x1,2"
2982 [(set_attr "type" "vecperm")])
2984 (define_insn "xxswapd_v8hi"
2985 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
2987 (match_operand:V8HI 1 "vsx_register_operand" "wa")
2988 (parallel [(const_int 4) (const_int 5)
2989 (const_int 6) (const_int 7)
2990 (const_int 0) (const_int 1)
2991 (const_int 2) (const_int 3)])))]
2993 ;; AIX does not support the extended mnemonic xxswapd. Use the basic
2994 ;; mnemonic xxpermdi instead.
2995 "xxpermdi %x0,%x1,%x1,2"
2996 [(set_attr "type" "vecperm")])
2998 (define_insn "xxswapd_<mode>"
2999 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
3001 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
3002 (parallel [(const_int 2) (const_int 3)
3003 (const_int 0) (const_int 1)])))]
3005 ;; AIX does not support extended mnemonic xxswapd. Use the basic
3006 ;; mnemonic xxpermdi instead.
3007 "xxpermdi %x0,%x1,%x1,2"
3008 [(set_attr "type" "vecperm")])
3010 (define_insn "xxswapd_<mode>"
3011 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3013 (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3014 (parallel [(const_int 1) (const_int 0)])))]
3016 ;; AIX does not support extended mnemonic xxswapd. Use the basic
3017 ;; mnemonic xxpermdi instead.
3018 "xxpermdi %x0,%x1,%x1,2"
3019 [(set_attr "type" "vecperm")])
3021 (define_insn "xxgenpcvm_<mode>_internal"
3022 [(set (match_operand:VSX_EXTRACT_I4 0 "altivec_register_operand" "=wa")
3023 (unspec:VSX_EXTRACT_I4
3024 [(match_operand:VSX_EXTRACT_I4 1 "altivec_register_operand" "v")
3025 (match_operand:QI 2 "const_0_to_3_operand" "n")]
3027 "TARGET_POWER10 && TARGET_64BIT"
3028 "xxgenpcv<wd>m %x0,%1,%2"
3029 [(set_attr "type" "vecsimple")])
3031 (define_expand "xxgenpcvm_<mode>"
3032 [(use (match_operand:VSX_EXTRACT_I4 0 "register_operand"))
3033 (use (match_operand:VSX_EXTRACT_I4 1 "register_operand"))
3034 (use (match_operand:QI 2 "immediate_operand"))]
3037 if (!BYTES_BIG_ENDIAN)
3039 /* gen_xxgenpcvm assumes Big Endian order. If LE,
3040 change swap upper and lower double words. */
3041 rtx tmp = gen_reg_rtx (<MODE>mode);
3043 emit_insn (gen_xxswapd_<mode> (tmp, operands[1]));
3046 emit_insn (gen_xxgenpcvm_<mode>_internal (operands[0], operands[1],
3051 ;; lxvd2x for little endian loads. We need several of
3052 ;; these since the form of the PARALLEL differs by mode.
3053 (define_insn "*vsx_lxvd2x2_le_<mode>"
3054 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3056 (match_operand:VSX_D 1 "memory_operand" "Z")
3057 (parallel [(const_int 1) (const_int 0)])))]
3058 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3060 [(set_attr "type" "vecload")])
3062 (define_insn "*vsx_lxvd2x4_le_<mode>"
3063 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
3065 (match_operand:VSX_W 1 "memory_operand" "Z")
3066 (parallel [(const_int 2) (const_int 3)
3067 (const_int 0) (const_int 1)])))]
3068 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3070 [(set_attr "type" "vecload")])
3072 (define_insn "*vsx_lxvd2x8_le_V8HI"
3073 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3075 (match_operand:V8HI 1 "memory_operand" "Z")
3076 (parallel [(const_int 4) (const_int 5)
3077 (const_int 6) (const_int 7)
3078 (const_int 0) (const_int 1)
3079 (const_int 2) (const_int 3)])))]
3080 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3082 [(set_attr "type" "vecload")])
3084 (define_insn "*vsx_lxvd2x16_le_V16QI"
3085 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3087 (match_operand:V16QI 1 "memory_operand" "Z")
3088 (parallel [(const_int 8) (const_int 9)
3089 (const_int 10) (const_int 11)
3090 (const_int 12) (const_int 13)
3091 (const_int 14) (const_int 15)
3092 (const_int 0) (const_int 1)
3093 (const_int 2) (const_int 3)
3094 (const_int 4) (const_int 5)
3095 (const_int 6) (const_int 7)])))]
3096 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3098 [(set_attr "type" "vecload")])
3100 ;; stxvd2x for little endian stores. We need several of
3101 ;; these since the form of the PARALLEL differs by mode.
3102 (define_insn "*vsx_stxvd2x2_le_<mode>"
3103 [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
3105 (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3106 (parallel [(const_int 1) (const_int 0)])))]
3107 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3109 [(set_attr "type" "vecstore")])
3111 (define_insn "*vsx_stxvd2x4_le_<mode>"
3112 [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
3114 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
3115 (parallel [(const_int 2) (const_int 3)
3116 (const_int 0) (const_int 1)])))]
3117 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3119 [(set_attr "type" "vecstore")])
3121 (define_insn "*vsx_stxvd2x8_le_V8HI"
3122 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
3124 (match_operand:V8HI 1 "vsx_register_operand" "wa")
3125 (parallel [(const_int 4) (const_int 5)
3126 (const_int 6) (const_int 7)
3127 (const_int 0) (const_int 1)
3128 (const_int 2) (const_int 3)])))]
3129 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3131 [(set_attr "type" "vecstore")])
3133 (define_insn "*vsx_stxvd2x16_le_V16QI"
3134 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
3136 (match_operand:V16QI 1 "vsx_register_operand" "wa")
3137 (parallel [(const_int 8) (const_int 9)
3138 (const_int 10) (const_int 11)
3139 (const_int 12) (const_int 13)
3140 (const_int 14) (const_int 15)
3141 (const_int 0) (const_int 1)
3142 (const_int 2) (const_int 3)
3143 (const_int 4) (const_int 5)
3144 (const_int 6) (const_int 7)])))]
3145 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3147 [(set_attr "type" "vecstore")])
3149 ;; Convert a TImode value into V1TImode
3150 (define_expand "vsx_set_v1ti"
3151 [(match_operand:V1TI 0 "nonimmediate_operand")
3152 (match_operand:V1TI 1 "nonimmediate_operand")
3153 (match_operand:TI 2 "input_operand")
3154 (match_operand:QI 3 "u5bit_cint_operand")]
3155 "VECTOR_MEM_VSX_P (V1TImode)"
3157 if (operands[3] != const0_rtx)
3160 emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
3164 ;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT
3165 (define_expand "vsx_set_<mode>"
3166 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
3167 (use (match_operand:VSX_D 1 "vsx_register_operand"))
3168 (use (match_operand:<VS_scalar> 2 "gpc_reg_operand"))
3169 (use (match_operand:QI 3 "const_0_to_1_operand"))]
3170 "VECTOR_MEM_VSX_P (<MODE>mode)"
3172 rtx dest = operands[0];
3173 rtx vec_reg = operands[1];
3174 rtx value = operands[2];
3175 rtx ele = operands[3];
3176 rtx tmp = gen_reg_rtx (<VS_scalar>mode);
3178 if (ele == const0_rtx)
3180 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx));
3181 emit_insn (gen_vsx_concat_<mode> (dest, value, tmp));
3184 else if (ele == const1_rtx)
3186 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx));
3187 emit_insn (gen_vsx_concat_<mode> (dest, tmp, value));
3194 ;; Extract a DF/DI element from V2DF/V2DI
3195 ;; Optimize cases were we can do a simple or direct move.
3196 ;; Or see if we can avoid doing the move at all
3198 ;; There are some unresolved problems with reload that show up if an Altivec
3199 ;; register was picked. Limit the scalar value to FPRs for now.
3201 (define_insn "vsx_extract_<mode>"
3202 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d, d, wr, wr")
3203 (vec_select:<VS_scalar>
3204 (match_operand:VSX_D 1 "gpc_reg_operand" "wa, wa, wa, wa")
3206 [(match_operand:QI 2 "const_0_to_1_operand" "wD, n, wD, n")])))]
3207 "VECTOR_MEM_VSX_P (<MODE>mode)"
3209 int element = INTVAL (operands[2]);
3210 int op0_regno = REGNO (operands[0]);
3211 int op1_regno = REGNO (operands[1]);
3214 gcc_assert (IN_RANGE (element, 0, 1));
3215 gcc_assert (VSX_REGNO_P (op1_regno));
3217 if (element == VECTOR_ELEMENT_SCALAR_64BIT)
3219 if (op0_regno == op1_regno)
3220 return ASM_COMMENT_START " vec_extract to same register";
3222 else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE
3223 && TARGET_POWERPC64)
3224 return "mfvsrd %0,%x1";
3226 else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
3229 else if (VSX_REGNO_P (op0_regno))
3230 return "xxlor %x0,%x1,%x1";
3236 else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno)
3237 && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE)
3238 return "mfvsrld %0,%x1";
3240 else if (VSX_REGNO_P (op0_regno))
3242 fldDM = element << 1;
3243 if (!BYTES_BIG_ENDIAN)
3245 operands[3] = GEN_INT (fldDM);
3246 return "xxpermdi %x0,%x1,%x1,%3";
3252 [(set_attr "type" "veclogical,mftgpr,mftgpr,vecperm")
3253 (set_attr "isa" "*,*,p8v,p9v")])
3255 ;; Optimize extracting a single scalar element from memory.
3256 (define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load"
3257 [(set (match_operand:<VS_scalar> 0 "register_operand" "=wa,wr")
3258 (vec_select:<VSX_D:VS_scalar>
3259 (match_operand:VSX_D 1 "memory_operand" "m,m")
3260 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))
3261 (clobber (match_scratch:P 3 "=&b,&b"))]
3262 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)"
3264 "&& reload_completed"
3265 [(set (match_dup 0) (match_dup 4))]
3267 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3268 operands[3], <VSX_D:VS_scalar>mode);
3270 [(set_attr "type" "fpload,load")
3271 (set_attr "length" "8")])
3273 ;; Optimize storing a single scalar element that is the right location to
3275 (define_insn "*vsx_extract_<mode>_store"
3276 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,wY")
3277 (vec_select:<VS_scalar>
3278 (match_operand:VSX_D 1 "register_operand" "d,v,v")
3279 (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
3280 "VECTOR_MEM_VSX_P (<MODE>mode)"
3285 [(set_attr "type" "fpstore")
3286 (set_attr "isa" "*,p7v,p9v")])
3288 ;; Variable V2DI/V2DF extract shift
3289 (define_insn "vsx_vslo_<mode>"
3290 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
3291 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3292 (match_operand:V2DI 2 "gpc_reg_operand" "v")]
3294 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3296 [(set_attr "type" "vecperm")])
3298 ;; Variable V2DI/V2DF extract from a register
3299 (define_insn_and_split "vsx_extract_<mode>_var"
3300 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
3301 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3302 (match_operand:DI 2 "gpc_reg_operand" "r")]
3303 UNSPEC_VSX_EXTRACT))
3304 (clobber (match_scratch:DI 3 "=r"))
3305 (clobber (match_scratch:V2DI 4 "=&v"))]
3306 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3308 "&& reload_completed"
3311 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3312 operands[3], operands[4]);
3316 ;; Variable V2DI/V2DF extract from memory
3317 (define_insn_and_split "*vsx_extract_<mode>_var_load"
3318 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=wa,r")
3319 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "memory_operand" "Q,Q")
3320 (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3321 UNSPEC_VSX_EXTRACT))
3322 (clobber (match_scratch:DI 3 "=&b,&b"))]
3323 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3325 "&& reload_completed"
3326 [(set (match_dup 0) (match_dup 4))]
3328 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3329 operands[3], <VS_scalar>mode);
3331 [(set_attr "type" "fpload,load")])
3333 ;; Extract a SF element from V4SF
3334 (define_insn_and_split "vsx_extract_v4sf"
3335 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
3337 (match_operand:V4SF 1 "vsx_register_operand" "wa")
3338 (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")])))
3339 (clobber (match_scratch:V4SF 3 "=0"))]
3340 "VECTOR_UNIT_VSX_P (V4SFmode)"
3345 rtx op0 = operands[0];
3346 rtx op1 = operands[1];
3347 rtx op2 = operands[2];
3348 rtx op3 = operands[3];
3350 HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
3356 if (GET_CODE (op3) == SCRATCH)
3357 op3 = gen_reg_rtx (V4SFmode);
3358 emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
3361 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
3364 [(set_attr "length" "8")
3365 (set_attr "type" "fp")])
3367 (define_insn_and_split "*vsx_extract_v4sf_<mode>_load"
3368 [(set (match_operand:SF 0 "register_operand" "=f,v,v,?r")
3370 (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
3371 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
3372 (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
3373 "VECTOR_MEM_VSX_P (V4SFmode)"
3375 "&& reload_completed"
3376 [(set (match_dup 0) (match_dup 4))]
3378 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3379 operands[3], SFmode);
3381 [(set_attr "type" "fpload,fpload,fpload,load")
3382 (set_attr "length" "8")
3383 (set_attr "isa" "*,p7v,p9v,*")])
3385 ;; Variable V4SF extract from a register
3386 (define_insn_and_split "vsx_extract_v4sf_var"
3387 [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
3388 (unspec:SF [(match_operand:V4SF 1 "gpc_reg_operand" "v")
3389 (match_operand:DI 2 "gpc_reg_operand" "r")]
3390 UNSPEC_VSX_EXTRACT))
3391 (clobber (match_scratch:DI 3 "=r"))
3392 (clobber (match_scratch:V2DI 4 "=&v"))]
3393 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3395 "&& reload_completed"
3398 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3399 operands[3], operands[4]);
3403 ;; Variable V4SF extract from memory
3404 (define_insn_and_split "*vsx_extract_v4sf_var_load"
3405 [(set (match_operand:SF 0 "gpc_reg_operand" "=wa,?r")
3406 (unspec:SF [(match_operand:V4SF 1 "memory_operand" "Q,Q")
3407 (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3408 UNSPEC_VSX_EXTRACT))
3409 (clobber (match_scratch:DI 3 "=&b,&b"))]
3410 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3412 "&& reload_completed"
3413 [(set (match_dup 0) (match_dup 4))]
3415 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3416 operands[3], SFmode);
3418 [(set_attr "type" "fpload,load")])
3420 ;; Expand the builtin form of xxpermdi to canonical rtl.
3421 (define_expand "vsx_xxpermdi_<mode>"
3422 [(match_operand:VSX_L 0 "vsx_register_operand")
3423 (match_operand:VSX_L 1 "vsx_register_operand")
3424 (match_operand:VSX_L 2 "vsx_register_operand")
3425 (match_operand:QI 3 "u5bit_cint_operand")]
3426 "VECTOR_MEM_VSX_P (<MODE>mode)"
3428 rtx target = operands[0];
3429 rtx op0 = operands[1];
3430 rtx op1 = operands[2];
3431 int mask = INTVAL (operands[3]);
3432 rtx perm0 = GEN_INT ((mask >> 1) & 1);
3433 rtx perm1 = GEN_INT ((mask & 1) + 2);
3434 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3436 if (<MODE>mode == V2DFmode)
3437 gen = gen_vsx_xxpermdi2_v2df_1;
3440 gen = gen_vsx_xxpermdi2_v2di_1;
3441 if (<MODE>mode != V2DImode)
3443 target = gen_lowpart (V2DImode, target);
3444 op0 = gen_lowpart (V2DImode, op0);
3445 op1 = gen_lowpart (V2DImode, op1);
3448 emit_insn (gen (target, op0, op1, perm0, perm1));
3452 ;; Special version of xxpermdi that retains big-endian semantics.
3453 (define_expand "vsx_xxpermdi_<mode>_be"
3454 [(match_operand:VSX_L 0 "vsx_register_operand")
3455 (match_operand:VSX_L 1 "vsx_register_operand")
3456 (match_operand:VSX_L 2 "vsx_register_operand")
3457 (match_operand:QI 3 "u5bit_cint_operand")]
3458 "VECTOR_MEM_VSX_P (<MODE>mode)"
3460 rtx target = operands[0];
3461 rtx op0 = operands[1];
3462 rtx op1 = operands[2];
3463 int mask = INTVAL (operands[3]);
3464 rtx perm0 = GEN_INT ((mask >> 1) & 1);
3465 rtx perm1 = GEN_INT ((mask & 1) + 2);
3466 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3468 if (<MODE>mode == V2DFmode)
3469 gen = gen_vsx_xxpermdi2_v2df_1;
3472 gen = gen_vsx_xxpermdi2_v2di_1;
3473 if (<MODE>mode != V2DImode)
3475 target = gen_lowpart (V2DImode, target);
3476 op0 = gen_lowpart (V2DImode, op0);
3477 op1 = gen_lowpart (V2DImode, op1);
3480 /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
3481 transformation we don't want; it is necessary for
3482 rs6000_expand_vec_perm_const_1 but not for this use. So we
3483 prepare for that by reversing the transformation here. */
3484 if (BYTES_BIG_ENDIAN)
3485 emit_insn (gen (target, op0, op1, perm0, perm1));
3488 rtx p0 = GEN_INT (3 - INTVAL (perm1));
3489 rtx p1 = GEN_INT (3 - INTVAL (perm0));
3490 emit_insn (gen (target, op1, op0, p0, p1));
3495 (define_insn "vsx_xxpermdi2_<mode>_1"
3496 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3498 (vec_concat:<VS_double>
3499 (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3500 (match_operand:VSX_D 2 "vsx_register_operand" "wa"))
3501 (parallel [(match_operand 3 "const_0_to_1_operand" "")
3502 (match_operand 4 "const_2_to_3_operand" "")])))]
3503 "VECTOR_MEM_VSX_P (<MODE>mode)"
3507 /* For little endian, swap operands and invert/swap selectors
3508 to get the correct xxpermdi. The operand swap sets up the
3509 inputs as a little endian array. The selectors are swapped
3510 because they are defined to use big endian ordering. The
3511 selectors are inverted to get the correct doublewords for
3512 little endian ordering. */
3513 if (BYTES_BIG_ENDIAN)
3515 op3 = INTVAL (operands[3]);
3516 op4 = INTVAL (operands[4]);
3520 op3 = 3 - INTVAL (operands[4]);
3521 op4 = 3 - INTVAL (operands[3]);
3524 mask = (op3 << 1) | (op4 - 2);
3525 operands[3] = GEN_INT (mask);
3527 if (BYTES_BIG_ENDIAN)
3528 return "xxpermdi %x0,%x1,%x2,%3";
3530 return "xxpermdi %x0,%x2,%x1,%3";
3532 [(set_attr "type" "vecperm")])
3534 ;; Extraction of a single element in a small integer vector. Until ISA 3.0,
3535 ;; none of the small types were allowed in a vector register, so we had to
3536 ;; extract to a DImode and either do a direct move or store.
3537 (define_expand "vsx_extract_<mode>"
3538 [(parallel [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand")
3539 (vec_select:<VS_scalar>
3540 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
3541 (parallel [(match_operand:QI 2 "const_int_operand")])))
3542 (clobber (match_scratch:VSX_EXTRACT_I 3))])]
3543 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3545 /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}. */
3546 if (TARGET_P9_VECTOR)
3548 emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1],
3554 (define_insn "vsx_extract_<mode>_p9"
3555 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>")
3556 (vec_select:<VS_scalar>
3557 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>")
3558 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
3559 (clobber (match_scratch:SI 3 "=r,X"))]
3560 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3562 if (which_alternative == 0)
3567 HOST_WIDE_INT elt = INTVAL (operands[2]);
3568 HOST_WIDE_INT elt_adj = (!BYTES_BIG_ENDIAN
3569 ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
3572 HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
3573 HOST_WIDE_INT offset = unit_size * elt_adj;
3575 operands[2] = GEN_INT (offset);
3577 return "xxextractuw %x0,%x1,%2";
3579 return "vextractu<wd> %0,%1,%2";
3582 [(set_attr "type" "vecsimple")
3583 (set_attr "isa" "p9v,*")])
3586 [(set (match_operand:<VS_scalar> 0 "int_reg_operand")
3587 (vec_select:<VS_scalar>
3588 (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand")
3589 (parallel [(match_operand:QI 2 "const_int_operand")])))
3590 (clobber (match_operand:SI 3 "int_reg_operand"))]
3591 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed"
3594 rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0]));
3595 rtx op1 = operands[1];
3596 rtx op2 = operands[2];
3597 rtx op3 = operands[3];
3598 HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode);
3600 emit_move_insn (op3, GEN_INT (offset));
3601 if (BYTES_BIG_ENDIAN)
3602 emit_insn (gen_vextu<wd>lx (op0_si, op3, op1));
3604 emit_insn (gen_vextu<wd>rx (op0_si, op3, op1));
3608 ;; Optimize zero extracts to eliminate the AND after the extract.
3609 (define_insn_and_split "*vsx_extract_<mode>_di_p9"
3610 [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>")
3612 (vec_select:<VS_scalar>
3613 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>")
3614 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))
3615 (clobber (match_scratch:SI 3 "=r,X"))]
3616 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3618 "&& reload_completed"
3619 [(parallel [(set (match_dup 4)
3620 (vec_select:<VS_scalar>
3622 (parallel [(match_dup 2)])))
3623 (clobber (match_dup 3))])]
3625 operands[4] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0]));
3627 [(set_attr "isa" "p9v,*")])
3629 ;; Optimize stores to use the ISA 3.0 scalar store instructions
3630 (define_insn_and_split "*vsx_extract_<mode>_store_p9"
3631 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z,m")
3632 (vec_select:<VS_scalar>
3633 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v")
3634 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))
3635 (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&r"))
3636 (clobber (match_scratch:SI 4 "=X,&r"))]
3637 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3639 "&& reload_completed"
3640 [(parallel [(set (match_dup 3)
3641 (vec_select:<VS_scalar>
3643 (parallel [(match_dup 2)])))
3644 (clobber (match_dup 4))])
3648 (define_insn_and_split "*vsx_extract_si"
3649 [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z")
3651 (match_operand:V4SI 1 "gpc_reg_operand" "v,v,v")
3652 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
3653 (clobber (match_scratch:V4SI 3 "=v,v,v"))]
3654 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR"
3656 "&& reload_completed"
3659 rtx dest = operands[0];
3660 rtx src = operands[1];
3661 rtx element = operands[2];
3662 rtx vec_tmp = operands[3];
3665 if (!BYTES_BIG_ENDIAN)
3666 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3668 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3670 value = INTVAL (element);
3672 emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
3676 if (MEM_P (operands[0]))
3678 if (can_create_pseudo_p ())
3679 dest = rs6000_force_indexed_or_indirect_mem (dest);
3681 if (TARGET_P8_VECTOR)
3682 emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3684 emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp))));
3687 else if (TARGET_P8_VECTOR)
3688 emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3690 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3691 gen_rtx_REG (DImode, REGNO (vec_tmp)));
3695 [(set_attr "type" "mftgpr,vecperm,fpstore")
3696 (set_attr "length" "8")
3697 (set_attr "isa" "*,p8v,*")])
3699 (define_insn_and_split "*vsx_extract_<mode>_p8"
3700 [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r")
3701 (vec_select:<VS_scalar>
3702 (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v")
3703 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3704 (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))]
3705 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3706 && !TARGET_P9_VECTOR"
3708 "&& reload_completed"
3711 rtx dest = operands[0];
3712 rtx src = operands[1];
3713 rtx element = operands[2];
3714 rtx vec_tmp = operands[3];
3717 if (!BYTES_BIG_ENDIAN)
3718 element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element));
3720 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3722 value = INTVAL (element);
3723 if (<MODE>mode == V16QImode)
3726 emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element));
3730 else if (<MODE>mode == V8HImode)
3733 emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));
3740 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3741 gen_rtx_REG (DImode, REGNO (vec_tmp)));
3744 [(set_attr "type" "mftgpr")])
3746 ;; Optimize extracting a single scalar element from memory.
3747 (define_insn_and_split "*vsx_extract_<mode>_load"
3748 [(set (match_operand:<VS_scalar> 0 "register_operand" "=r")
3749 (vec_select:<VS_scalar>
3750 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
3751 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3752 (clobber (match_scratch:DI 3 "=&b"))]
3753 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3755 "&& reload_completed"
3756 [(set (match_dup 0) (match_dup 4))]
3758 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3759 operands[3], <VS_scalar>mode);
3761 [(set_attr "type" "load")
3762 (set_attr "length" "8")])
3764 ;; Variable V16QI/V8HI/V4SI extract from a register
3765 (define_insn_and_split "vsx_extract_<mode>_var"
3766 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r")
3768 [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,v")
3769 (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3770 UNSPEC_VSX_EXTRACT))
3771 (clobber (match_scratch:DI 3 "=r,r"))
3772 (clobber (match_scratch:V2DI 4 "=X,&v"))]
3773 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3775 "&& reload_completed"
3778 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3779 operands[3], operands[4]);
3782 [(set_attr "isa" "p9v,*")])
3784 ;; Variable V16QI/V8HI/V4SI extract from memory
3785 (define_insn_and_split "*vsx_extract_<mode>_var_load"
3786 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r")
3788 [(match_operand:VSX_EXTRACT_I 1 "memory_operand" "Q")
3789 (match_operand:DI 2 "gpc_reg_operand" "r")]
3790 UNSPEC_VSX_EXTRACT))
3791 (clobber (match_scratch:DI 3 "=&b"))]
3792 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3794 "&& reload_completed"
3795 [(set (match_dup 0) (match_dup 4))]
3797 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3798 operands[3], <VS_scalar>mode);
3800 [(set_attr "type" "load")])
3802 ;; VSX_EXTRACT optimizations
3803 ;; Optimize double d = (double) vec_extract (vi, <n>)
3804 ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
3805 (define_insn_and_split "*vsx_extract_si_<uns>float_df"
3806 [(set (match_operand:DF 0 "gpc_reg_operand" "=wa")
3809 (match_operand:V4SI 1 "gpc_reg_operand" "v")
3810 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3811 (clobber (match_scratch:V4SI 3 "=v"))]
3812 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3817 rtx dest = operands[0];
3818 rtx src = operands[1];
3819 rtx element = operands[2];
3820 rtx v4si_tmp = operands[3];
3823 if (!BYTES_BIG_ENDIAN)
3824 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3826 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3828 value = INTVAL (element);
3831 if (GET_CODE (v4si_tmp) == SCRATCH)
3832 v4si_tmp = gen_reg_rtx (V4SImode);
3833 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3838 emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));
3842 ;; Optimize <type> f = (<type>) vec_extract (vi, <n>)
3843 ;; where <type> is a floating point type that supported by the hardware that is
3844 ;; not double. First convert the value to double, and then to the desired
3846 (define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"
3847 [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=wa")
3848 (any_float:VSX_EXTRACT_FL
3850 (match_operand:V4SI 1 "gpc_reg_operand" "v")
3851 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3852 (clobber (match_scratch:V4SI 3 "=v"))
3853 (clobber (match_scratch:DF 4 "=wa"))]
3854 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3859 rtx dest = operands[0];
3860 rtx src = operands[1];
3861 rtx element = operands[2];
3862 rtx v4si_tmp = operands[3];
3863 rtx df_tmp = operands[4];
3866 if (!BYTES_BIG_ENDIAN)
3867 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3869 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3871 value = INTVAL (element);
3874 if (GET_CODE (v4si_tmp) == SCRATCH)
3875 v4si_tmp = gen_reg_rtx (V4SImode);
3876 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3881 if (GET_CODE (df_tmp) == SCRATCH)
3882 df_tmp = gen_reg_rtx (DFmode);
3884 emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));
3886 if (<MODE>mode == SFmode)
3887 emit_insn (gen_truncdfsf2 (dest, df_tmp));
3888 else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))
3889 emit_insn (gen_extenddftf2_vsx (dest, df_tmp));
3890 else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)
3891 && TARGET_FLOAT128_HW)
3892 emit_insn (gen_extenddftf2_hw (dest, df_tmp));
3893 else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))
3894 emit_insn (gen_extenddfif2 (dest, df_tmp));
3895 else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)
3896 emit_insn (gen_extenddfkf2_hw (dest, df_tmp));
3903 ;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>)
3904 ;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE
3905 ;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char,
3906 ;; vector short or vector unsigned short.
3907 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>"
3908 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa")
3910 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3911 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3912 (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3913 (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3914 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3915 && TARGET_P9_VECTOR"
3917 "&& reload_completed"
3918 [(parallel [(set (match_dup 3)
3919 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3921 (parallel [(match_dup 2)])))
3922 (clobber (scratch:SI))])
3924 (sign_extend:DI (match_dup 3)))
3926 (float:<FL_CONV:MODE> (match_dup 4)))]
3928 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3930 [(set_attr "isa" "<FL_CONV:VSisa>")])
3932 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>"
3933 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa")
3934 (unsigned_float:FL_CONV
3935 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3936 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3937 (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3938 (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3939 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3940 && TARGET_P9_VECTOR"
3942 "&& reload_completed"
3943 [(parallel [(set (match_dup 3)
3944 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3946 (parallel [(match_dup 2)])))
3947 (clobber (scratch:SI))])
3949 (float:<FL_CONV:MODE> (match_dup 4)))]
3951 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3953 [(set_attr "isa" "<FL_CONV:VSisa>")])
3955 ;; V4SI/V8HI/V16QI set operation on ISA 3.0
3956 (define_insn "vsx_set_<mode>_p9"
3957 [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>")
3958 (unspec:VSX_EXTRACT_I
3959 [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0")
3960 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VSX_EX>")
3961 (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")]
3963 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3965 int ele = INTVAL (operands[3]);
3966 int nunits = GET_MODE_NUNITS (<MODE>mode);
3968 if (!BYTES_BIG_ENDIAN)
3969 ele = nunits - 1 - ele;
3971 operands[3] = GEN_INT (GET_MODE_SIZE (<VS_scalar>mode) * ele);
3972 if (<MODE>mode == V4SImode)
3973 return "xxinsertw %x0,%x2,%3";
3975 return "vinsert<wd> %0,%2,%3";
3977 [(set_attr "type" "vecperm")])
3979 (define_insn_and_split "vsx_set_v4sf_p9"
3980 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3982 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3983 (match_operand:SF 2 "gpc_reg_operand" "wa")
3984 (match_operand:QI 3 "const_0_to_3_operand" "n")]
3986 (clobber (match_scratch:SI 4 "=&wa"))]
3987 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3989 "&& reload_completed"
3991 (unspec:V4SF [(match_dup 2)]
3992 UNSPEC_VSX_CVDPSPN))
3993 (parallel [(set (match_dup 4)
3994 (vec_select:SI (match_dup 6)
3995 (parallel [(match_dup 7)])))
3996 (clobber (scratch:SI))])
3998 (unspec:V4SI [(match_dup 8)
4003 unsigned int tmp_regno = reg_or_subregno (operands[4]);
4005 operands[5] = gen_rtx_REG (V4SFmode, tmp_regno);
4006 operands[6] = gen_rtx_REG (V4SImode, tmp_regno);
4007 operands[7] = GEN_INT (BYTES_BIG_ENDIAN ? 0 : 3);
4008 operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
4010 [(set_attr "type" "vecperm")
4011 (set_attr "length" "12")
4012 (set_attr "isa" "p9v")])
4014 ;; Special case setting 0.0f to a V4SF element
4015 (define_insn_and_split "*vsx_set_v4sf_p9_zero"
4016 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4018 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4019 (match_operand:SF 2 "zero_fp_constant" "j")
4020 (match_operand:QI 3 "const_0_to_3_operand" "n")]
4022 (clobber (match_scratch:SI 4 "=&wa"))]
4023 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
4025 "&& reload_completed"
4029 (unspec:V4SI [(match_dup 5)
4034 operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
4036 [(set_attr "type" "vecperm")
4037 (set_attr "length" "8")
4038 (set_attr "isa" "p9v")])
4040 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element
4041 ;; that is in the default scalar position (1 for big endian, 2 for little
4042 ;; endian). We just need to do an xxinsertw since the element is in the
4043 ;; correct location.
4045 (define_insn "*vsx_insert_extract_v4sf_p9"
4046 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4048 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4049 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4051 [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4052 (match_operand:QI 4 "const_0_to_3_operand" "n")]
4054 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64
4055 && (INTVAL (operands[3]) == (BYTES_BIG_ENDIAN ? 1 : 2))"
4057 int ele = INTVAL (operands[4]);
4059 if (!BYTES_BIG_ENDIAN)
4060 ele = GET_MODE_NUNITS (V4SFmode) - 1 - ele;
4062 operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele);
4063 return "xxinsertw %x0,%x2,%4";
4065 [(set_attr "type" "vecperm")])
4067 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element
4068 ;; that is in the default scalar position (1 for big endian, 2 for little
4069 ;; endian). Convert the insert/extract to int and avoid doing the conversion.
4071 (define_insn_and_split "*vsx_insert_extract_v4sf_p9_2"
4072 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4074 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4075 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4077 [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4078 (match_operand:QI 4 "const_0_to_3_operand" "n")]
4080 (clobber (match_scratch:SI 5 "=&wa"))]
4081 "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode)
4082 && TARGET_P9_VECTOR && TARGET_POWERPC64
4083 && (INTVAL (operands[3]) != (BYTES_BIG_ENDIAN ? 1 : 2))"
4086 [(parallel [(set (match_dup 5)
4087 (vec_select:SI (match_dup 6)
4088 (parallel [(match_dup 3)])))
4089 (clobber (scratch:SI))])
4091 (unspec:V4SI [(match_dup 8)
4096 if (GET_CODE (operands[5]) == SCRATCH)
4097 operands[5] = gen_reg_rtx (SImode);
4099 operands[6] = gen_lowpart (V4SImode, operands[2]);
4100 operands[7] = gen_lowpart (V4SImode, operands[0]);
4101 operands[8] = gen_lowpart (V4SImode, operands[1]);
4103 [(set_attr "type" "vecperm")
4104 (set_attr "isa" "p9v")])
4106 ;; Expanders for builtins
4107 (define_expand "vsx_mergel_<mode>"
4108 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4109 (use (match_operand:VSX_D 1 "vsx_register_operand"))
4110 (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4111 "VECTOR_MEM_VSX_P (<MODE>mode)"
4113 rtvec v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
4114 rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4115 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4116 emit_insn (gen_rtx_SET (operands[0], x));
4120 (define_expand "vsx_mergeh_<mode>"
4121 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4122 (use (match_operand:VSX_D 1 "vsx_register_operand"))
4123 (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4124 "VECTOR_MEM_VSX_P (<MODE>mode)"
4126 rtvec v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
4127 rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4128 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4129 emit_insn (gen_rtx_SET (operands[0], x));
4134 ;; We separate the register splat insn from the memory splat insn to force the
4135 ;; register allocator to generate the indexed form of the SPLAT when it is
4136 ;; given an offsettable memory reference. Otherwise, if the register and
4137 ;; memory insns were combined into a single insn, the register allocator will
4138 ;; load the value into a register, and then do a double word permute.
4139 (define_expand "vsx_splat_<mode>"
4140 [(set (match_operand:VSX_D 0 "vsx_register_operand")
4141 (vec_duplicate:VSX_D
4142 (match_operand:<VS_scalar> 1 "input_operand")))]
4143 "VECTOR_MEM_VSX_P (<MODE>mode)"
4145 rtx op1 = operands[1];
4147 operands[1] = rs6000_force_indexed_or_indirect_mem (op1);
4148 else if (!REG_P (op1))
4149 op1 = force_reg (<VSX_D:VS_scalar>mode, op1);
4152 (define_insn "vsx_splat_<mode>_reg"
4153 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
4154 (vec_duplicate:VSX_D
4155 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")))]
4156 "VECTOR_MEM_VSX_P (<MODE>mode)"
4158 xxpermdi %x0,%x1,%x1,0
4160 [(set_attr "type" "vecperm")])
4162 (define_insn "vsx_splat_<mode>_mem"
4163 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4164 (vec_duplicate:VSX_D
4165 (match_operand:<VSX_D:VS_scalar> 1 "memory_operand" "Z")))]
4166 "VECTOR_MEM_VSX_P (<MODE>mode)"
4168 [(set_attr "type" "vecload")])
4170 ;; V4SI splat support
4171 (define_insn "vsx_splat_v4si"
4172 [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we")
4174 (match_operand:SI 1 "splat_input_operand" "r,Z")))]
4179 [(set_attr "type" "vecperm,vecload")])
4181 ;; SImode is not currently allowed in vector registers. This pattern
4182 ;; allows us to use direct move to get the value in a vector register
4183 ;; so that we can use XXSPLTW
4184 (define_insn "vsx_splat_v4si_di"
4185 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we")
4188 (match_operand:DI 1 "gpc_reg_operand" "wa,r"))))]
4189 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4193 [(set_attr "type" "vecperm")
4194 (set_attr "isa" "p8v,*")])
4196 ;; V4SF splat (ISA 3.0)
4197 (define_insn_and_split "vsx_splat_v4sf"
4198 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
4200 (match_operand:SF 1 "splat_input_operand" "Z,wa,r")))]
4206 "&& reload_completed && vsx_register_operand (operands[1], SFmode)"
4208 (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))
4210 (unspec:V4SF [(match_dup 0)
4211 (const_int 0)] UNSPEC_VSX_XXSPLTW))]
4213 [(set_attr "type" "vecload,vecperm,mftgpr")
4214 (set_attr "length" "*,8,*")
4215 (set_attr "isa" "*,p8v,*")])
4217 ;; V4SF/V4SI splat from a vector element
4218 (define_insn "vsx_xxspltw_<mode>"
4219 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4220 (vec_duplicate:VSX_W
4221 (vec_select:<VS_scalar>
4222 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4224 [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))]
4225 "VECTOR_MEM_VSX_P (<MODE>mode)"
4227 if (!BYTES_BIG_ENDIAN)
4228 operands[2] = GEN_INT (3 - INTVAL (operands[2]));
4230 return "xxspltw %x0,%x1,%2";
4232 [(set_attr "type" "vecperm")])
4234 (define_insn "vsx_xxspltw_<mode>_direct"
4235 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4236 (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wa")
4237 (match_operand:QI 2 "u5bit_cint_operand" "i")]
4238 UNSPEC_VSX_XXSPLTW))]
4239 "VECTOR_MEM_VSX_P (<MODE>mode)"
4240 "xxspltw %x0,%x1,%2"
4241 [(set_attr "type" "vecperm")])
4243 ;; V16QI/V8HI splat support on ISA 2.07
4244 (define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di"
4245 [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v")
4246 (vec_duplicate:VSX_SPLAT_I
4247 (truncate:<VS_scalar>
4248 (match_operand:DI 1 "altivec_register_operand" "v"))))]
4249 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
4250 "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>"
4251 [(set_attr "type" "vecperm")])
4253 ;; V2DF/V2DI splat for use by vec_splat builtin
4254 (define_insn "vsx_xxspltd_<mode>"
4255 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4256 (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
4257 (match_operand:QI 2 "u5bit_cint_operand" "i")]
4258 UNSPEC_VSX_XXSPLTD))]
4259 "VECTOR_MEM_VSX_P (<MODE>mode)"
4261 if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)
4262 || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))
4263 return "xxpermdi %x0,%x1,%x1,0";
4265 return "xxpermdi %x0,%x1,%x1,3";
4267 [(set_attr "type" "vecperm")])
4269 ;; V4SF/V4SI interleave
4270 (define_insn "vsx_xxmrghw_<mode>"
4271 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4273 (vec_concat:<VS_double>
4274 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4275 (match_operand:VSX_W 2 "vsx_register_operand" "wa"))
4276 (parallel [(const_int 0) (const_int 4)
4277 (const_int 1) (const_int 5)])))]
4278 "VECTOR_MEM_VSX_P (<MODE>mode)"
4280 if (BYTES_BIG_ENDIAN)
4281 return "xxmrghw %x0,%x1,%x2";
4283 return "xxmrglw %x0,%x2,%x1";
4285 [(set_attr "type" "vecperm")])
4287 (define_insn "vsx_xxmrglw_<mode>"
4288 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4290 (vec_concat:<VS_double>
4291 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4292 (match_operand:VSX_W 2 "vsx_register_operand" "wa"))
4293 (parallel [(const_int 2) (const_int 6)
4294 (const_int 3) (const_int 7)])))]
4295 "VECTOR_MEM_VSX_P (<MODE>mode)"
4297 if (BYTES_BIG_ENDIAN)
4298 return "xxmrglw %x0,%x1,%x2";
4300 return "xxmrghw %x0,%x2,%x1";
4302 [(set_attr "type" "vecperm")])
4304 ;; Shift left double by word immediate
4305 (define_insn "vsx_xxsldwi_<mode>"
4306 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=wa")
4307 (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "wa")
4308 (match_operand:VSX_L 2 "vsx_register_operand" "wa")
4309 (match_operand:QI 3 "u5bit_cint_operand" "i")]
4311 "VECTOR_MEM_VSX_P (<MODE>mode)"
4312 "xxsldwi %x0,%x1,%x2,%3"
4313 [(set_attr "type" "vecperm")
4314 (set_attr "isa" "<VSisa>")])
4317 ;; Vector reduction insns and splitters
4319 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"
4320 [(set (match_operand:V2DF 0 "vfloat_operand" "=&wa,wa")
4324 (match_operand:V2DF 1 "vfloat_operand" "wa,wa")
4325 (parallel [(const_int 1)]))
4328 (parallel [(const_int 0)])))
4330 (clobber (match_scratch:V2DF 2 "=0,&wa"))]
4331 "VECTOR_UNIT_VSX_P (V2DFmode)"
4336 rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
4337 ? gen_reg_rtx (V2DFmode)
4339 emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
4340 emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
4343 [(set_attr "length" "8")
4344 (set_attr "type" "veccomplex")])
4346 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"
4347 [(set (match_operand:V4SF 0 "vfloat_operand" "=wa")
4349 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4350 (match_operand:V4SF 1 "vfloat_operand" "wa")))
4351 (clobber (match_scratch:V4SF 2 "=&wa"))
4352 (clobber (match_scratch:V4SF 3 "=&wa"))]
4353 "VECTOR_UNIT_VSX_P (V4SFmode)"
4358 rtx op0 = operands[0];
4359 rtx op1 = operands[1];
4360 rtx tmp2, tmp3, tmp4;
4362 if (can_create_pseudo_p ())
4364 tmp2 = gen_reg_rtx (V4SFmode);
4365 tmp3 = gen_reg_rtx (V4SFmode);
4366 tmp4 = gen_reg_rtx (V4SFmode);
4375 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4376 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4377 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4378 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
4381 [(set_attr "length" "16")
4382 (set_attr "type" "veccomplex")])
4384 ;; Combiner patterns with the vector reduction patterns that knows we can get
4385 ;; to the top element of the V2DF array without doing an extract.
4387 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
4388 [(set (match_operand:DF 0 "vfloat_operand" "=&wa,wa")
4393 (match_operand:V2DF 1 "vfloat_operand" "wa,wa")
4394 (parallel [(const_int 1)]))
4397 (parallel [(const_int 0)])))
4399 (parallel [(const_int 1)])))
4400 (clobber (match_scratch:DF 2 "=0,&wa"))]
4401 "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V2DFmode)"
4406 rtx hi = gen_highpart (DFmode, operands[1]);
4407 rtx lo = (GET_CODE (operands[2]) == SCRATCH)
4408 ? gen_reg_rtx (DFmode)
4411 emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
4412 emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
4415 [(set_attr "length" "8")
4416 (set_attr "type" "veccomplex")])
4418 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
4419 [(set (match_operand:SF 0 "vfloat_operand" "=f")
4422 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4423 (match_operand:V4SF 1 "vfloat_operand" "wa"))
4424 (parallel [(const_int 3)])))
4425 (clobber (match_scratch:V4SF 2 "=&wa"))
4426 (clobber (match_scratch:V4SF 3 "=&wa"))
4427 (clobber (match_scratch:V4SF 4 "=0"))]
4428 "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V4SFmode)"
4433 rtx op0 = operands[0];
4434 rtx op1 = operands[1];
4435 rtx tmp2, tmp3, tmp4, tmp5;
4437 if (can_create_pseudo_p ())
4439 tmp2 = gen_reg_rtx (V4SFmode);
4440 tmp3 = gen_reg_rtx (V4SFmode);
4441 tmp4 = gen_reg_rtx (V4SFmode);
4442 tmp5 = gen_reg_rtx (V4SFmode);
4452 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4453 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4454 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4455 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
4456 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
4459 [(set_attr "length" "20")
4460 (set_attr "type" "veccomplex")])
4463 ;; Power8 Vector fusion. The fused ops must be physically adjacent.
4465 [(set (match_operand:P 0 "base_reg_operand")
4466 (match_operand:P 1 "short_cint_operand"))
4467 (set (match_operand:VSX_M 2 "vsx_register_operand")
4468 (mem:VSX_M (plus:P (match_dup 0)
4469 (match_operand:P 3 "int_reg_operand"))))]
4470 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4471 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4472 [(set_attr "length" "8")
4473 (set_attr "type" "vecload")])
4476 [(set (match_operand:P 0 "base_reg_operand")
4477 (match_operand:P 1 "short_cint_operand"))
4478 (set (match_operand:VSX_M 2 "vsx_register_operand")
4479 (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand")
4481 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4482 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4483 [(set_attr "length" "8")
4484 (set_attr "type" "vecload")])
4487 ;; ISA 3.0 vector extend sign support
4489 (define_insn "vsx_sign_extend_qi_<mode>"
4490 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4492 [(match_operand:V16QI 1 "vsx_register_operand" "v")]
4493 UNSPEC_VSX_SIGN_EXTEND))]
4496 [(set_attr "type" "vecexts")])
4498 (define_insn "vsx_sign_extend_hi_<mode>"
4499 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4501 [(match_operand:V8HI 1 "vsx_register_operand" "v")]
4502 UNSPEC_VSX_SIGN_EXTEND))]
4505 [(set_attr "type" "vecexts")])
4507 (define_insn "*vsx_sign_extend_si_v2di"
4508 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
4509 (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
4510 UNSPEC_VSX_SIGN_EXTEND))]
4513 [(set_attr "type" "vecexts")])
4516 ;; ISA 3.0 Binary Floating-Point Support
4518 ;; VSX Scalar Extract Exponent Quad-Precision
4519 (define_insn "xsxexpqp_<mode>"
4520 [(set (match_operand:DI 0 "altivec_register_operand" "=v")
4521 (unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4522 UNSPEC_VSX_SXEXPDP))]
4525 [(set_attr "type" "vecmove")])
4527 ;; VSX Scalar Extract Exponent Double-Precision
4528 (define_insn "xsxexpdp"
4529 [(set (match_operand:DI 0 "register_operand" "=r")
4530 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4531 UNSPEC_VSX_SXEXPDP))]
4532 "TARGET_P9_VECTOR && TARGET_64BIT"
4534 [(set_attr "type" "integer")])
4536 ;; VSX Scalar Extract Significand Quad-Precision
4537 (define_insn "xsxsigqp_<mode>"
4538 [(set (match_operand:TI 0 "altivec_register_operand" "=v")
4539 (unspec:TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4543 [(set_attr "type" "vecmove")])
4545 ;; VSX Scalar Extract Significand Double-Precision
4546 (define_insn "xsxsigdp"
4547 [(set (match_operand:DI 0 "register_operand" "=r")
4548 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4550 "TARGET_P9_VECTOR && TARGET_64BIT"
4552 [(set_attr "type" "integer")])
4554 ;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument
4555 (define_insn "xsiexpqpf_<mode>"
4556 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4558 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4559 (match_operand:DI 2 "altivec_register_operand" "v")]
4560 UNSPEC_VSX_SIEXPQP))]
4563 [(set_attr "type" "vecmove")])
4565 ;; VSX Scalar Insert Exponent Quad-Precision
4566 (define_insn "xsiexpqp_<mode>"
4567 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4568 (unspec:IEEE128 [(match_operand:TI 1 "altivec_register_operand" "v")
4569 (match_operand:DI 2 "altivec_register_operand" "v")]
4570 UNSPEC_VSX_SIEXPQP))]
4573 [(set_attr "type" "vecmove")])
4575 ;; VSX Scalar Insert Exponent Double-Precision
4576 (define_insn "xsiexpdp"
4577 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4578 (unspec:DF [(match_operand:DI 1 "register_operand" "r")
4579 (match_operand:DI 2 "register_operand" "r")]
4580 UNSPEC_VSX_SIEXPDP))]
4581 "TARGET_P9_VECTOR && TARGET_64BIT"
4582 "xsiexpdp %x0,%1,%2"
4583 [(set_attr "type" "fpsimple")])
4585 ;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument
4586 (define_insn "xsiexpdpf"
4587 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4588 (unspec:DF [(match_operand:DF 1 "register_operand" "r")
4589 (match_operand:DI 2 "register_operand" "r")]
4590 UNSPEC_VSX_SIEXPDP))]
4591 "TARGET_P9_VECTOR && TARGET_64BIT"
4592 "xsiexpdp %x0,%1,%2"
4593 [(set_attr "type" "fpsimple")])
4595 ;; VSX Scalar Compare Exponents Double-Precision
4596 (define_expand "xscmpexpdp_<code>"
4600 [(match_operand:DF 1 "vsx_register_operand" "wa")
4601 (match_operand:DF 2 "vsx_register_operand" "wa")]
4602 UNSPEC_VSX_SCMPEXPDP)
4604 (set (match_operand:SI 0 "register_operand" "=r")
4605 (CMP_TEST:SI (match_dup 3)
4609 if (<CODE> == UNORDERED && !HONOR_NANS (DFmode))
4611 emit_move_insn (operands[0], const0_rtx);
4615 operands[3] = gen_reg_rtx (CCFPmode);
4618 (define_insn "*xscmpexpdp"
4619 [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
4621 (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa")
4622 (match_operand:DF 2 "vsx_register_operand" "wa")]
4623 UNSPEC_VSX_SCMPEXPDP)
4624 (match_operand:SI 3 "zero_constant" "j")))]
4626 "xscmpexpdp %0,%x1,%x2"
4627 [(set_attr "type" "fpcompare")])
4629 ;; VSX Scalar Compare Exponents Quad-Precision
4630 (define_expand "xscmpexpqp_<code>_<mode>"
4634 [(match_operand:IEEE128 1 "vsx_register_operand" "v")
4635 (match_operand:IEEE128 2 "vsx_register_operand" "v")]
4636 UNSPEC_VSX_SCMPEXPQP)
4638 (set (match_operand:SI 0 "register_operand" "=r")
4639 (CMP_TEST:SI (match_dup 3)
4643 if (<CODE> == UNORDERED && !HONOR_NANS (<MODE>mode))
4645 emit_move_insn (operands[0], const0_rtx);
4649 operands[3] = gen_reg_rtx (CCFPmode);
4652 (define_insn "*xscmpexpqp"
4653 [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
4655 (unspec:IEEE128 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4656 (match_operand:IEEE128 2 "altivec_register_operand" "v")]
4657 UNSPEC_VSX_SCMPEXPQP)
4658 (match_operand:SI 3 "zero_constant" "j")))]
4660 "xscmpexpqp %0,%1,%2"
4661 [(set_attr "type" "fpcompare")])
4663 ;; VSX Scalar Test Data Class Quad-Precision
4664 ;; (Expansion for scalar_test_data_class (__ieee128, int))
4665 ;; (Has side effect of setting the lt bit if operand 1 is negative,
4666 ;; setting the eq bit if any of the conditions tested by operand 2
4667 ;; are satisfied, and clearing the gt and undordered bits to zero.)
4668 (define_expand "xststdcqp_<mode>"
4672 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4673 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4676 (set (match_operand:SI 0 "register_operand" "=r")
4677 (eq:SI (match_dup 3)
4681 operands[3] = gen_reg_rtx (CCFPmode);
4684 ;; VSX Scalar Test Data Class Double- and Single-Precision
4685 ;; (The lt bit is set if operand 1 is negative. The eq bit is set
4686 ;; if any of the conditions tested by operand 2 are satisfied.
4687 ;; The gt and unordered bits are cleared to zero.)
4688 (define_expand "xststdc<sd>p"
4692 [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4693 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4696 (set (match_operand:SI 0 "register_operand" "=r")
4697 (eq:SI (match_dup 3)
4701 operands[3] = gen_reg_rtx (CCFPmode);
4702 operands[4] = CONST0_RTX (SImode);
4705 ;; The VSX Scalar Test Negative Quad-Precision
4706 (define_expand "xststdcnegqp_<mode>"
4710 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4714 (set (match_operand:SI 0 "register_operand" "=r")
4715 (lt:SI (match_dup 2)
4719 operands[2] = gen_reg_rtx (CCFPmode);
4722 ;; The VSX Scalar Test Negative Double- and Single-Precision
4723 (define_expand "xststdcneg<sd>p"
4727 [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4731 (set (match_operand:SI 0 "register_operand" "=r")
4732 (lt:SI (match_dup 2)
4736 operands[2] = gen_reg_rtx (CCFPmode);
4737 operands[3] = CONST0_RTX (SImode);
4740 (define_insn "*xststdcqp_<mode>"
4741 [(set (match_operand:CCFP 0 "" "=y")
4744 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4745 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4749 "xststdcqp %0,%1,%2"
4750 [(set_attr "type" "fpcompare")])
4752 (define_insn "*xststdc<sd>p"
4753 [(set (match_operand:CCFP 0 "" "=y")
4755 (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4756 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4758 (match_operand:SI 3 "zero_constant" "j")))]
4760 "xststdc<sd>p %0,%x1,%2"
4761 [(set_attr "type" "fpcompare")])
4763 ;; VSX Vector Extract Exponent Double and Single Precision
4764 (define_insn "xvxexp<sd>p"
4765 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4767 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4770 "xvxexp<sd>p %x0,%x1"
4771 [(set_attr "type" "vecsimple")])
4773 ;; VSX Vector Extract Significand Double and Single Precision
4774 (define_insn "xvxsig<sd>p"
4775 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4777 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4780 "xvxsig<sd>p %x0,%x1"
4781 [(set_attr "type" "vecsimple")])
4783 ;; VSX Vector Insert Exponent Double and Single Precision
4784 (define_insn "xviexp<sd>p"
4785 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4787 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4788 (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
4791 "xviexp<sd>p %x0,%x1,%x2"
4792 [(set_attr "type" "vecsimple")])
4794 ;; VSX Vector Test Data Class Double and Single Precision
4795 ;; The corresponding elements of the result vector are all ones
4796 ;; if any of the conditions tested by operand 3 are satisfied.
4797 (define_insn "xvtstdc<sd>p"
4798 [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa")
4800 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4801 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4802 UNSPEC_VSX_VTSTDC))]
4804 "xvtstdc<sd>p %x0,%x1,%2"
4805 [(set_attr "type" "vecsimple")])
4807 ;; ISA 3.0 String Operations Support
4809 ;; Compare vectors producing a vector result and a predicate, setting CR6
4810 ;; to indicate a combined status. This pattern matches v16qi, v8hi, and
4811 ;; v4si modes. It does not match v2df, v4sf, or v2di modes. There's no
4812 ;; need to match v4sf, v2df, or v2di modes because those are expanded
4813 ;; to use Power8 instructions.
4814 (define_insn "*vsx_ne_<mode>_p"
4815 [(set (reg:CC CR6_REGNO)
4817 [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4818 (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))]
4820 (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v")
4821 (ne:VSX_EXTRACT_I (match_dup 1)
4824 "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4825 [(set_attr "type" "vecsimple")])
4827 (define_insn "*vector_nez_<mode>_p"
4828 [(set (reg:CC CR6_REGNO)
4829 (unspec:CC [(unspec:VI
4830 [(match_operand:VI 1 "gpc_reg_operand" "v")
4831 (match_operand:VI 2 "gpc_reg_operand" "v")]
4834 (set (match_operand:VI 0 "gpc_reg_operand" "=v")
4835 (unspec:VI [(match_dup 1)
4839 "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4840 [(set_attr "type" "vecsimple")])
4842 ;; Return first position of match between vectors using natural order
4843 ;; for both LE and BE execution modes.
4844 (define_expand "first_match_index_<mode>"
4845 [(match_operand:SI 0 "register_operand")
4846 (unspec:SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4847 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4848 UNSPEC_VSX_FIRST_MATCH_INDEX)]
4853 rtx cmp_result = gen_reg_rtx (<MODE>mode);
4854 rtx not_result = gen_reg_rtx (<MODE>mode);
4856 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4858 emit_insn (gen_one_cmpl<mode>2 (not_result, cmp_result));
4860 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4862 if (<MODE>mode == V16QImode)
4864 if (!BYTES_BIG_ENDIAN)
4865 emit_insn (gen_vctzlsbb_<mode> (operands[0], not_result));
4867 emit_insn (gen_vclzlsbb_<mode> (operands[0], not_result));
4871 rtx tmp = gen_reg_rtx (SImode);
4872 if (!BYTES_BIG_ENDIAN)
4873 emit_insn (gen_vctzlsbb_<mode> (tmp, not_result));
4875 emit_insn (gen_vclzlsbb_<mode> (tmp, not_result));
4876 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4881 ;; Return first position of match between vectors or end of string (EOS) using
4882 ;; natural element order for both LE and BE execution modes.
4883 (define_expand "first_match_or_eos_index_<mode>"
4884 [(match_operand:SI 0 "register_operand")
4885 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4886 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4887 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX)]
4891 rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4892 rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4893 rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4894 rtx and_result = gen_reg_rtx (<MODE>mode);
4895 rtx result = gen_reg_rtx (<MODE>mode);
4896 rtx vzero = gen_reg_rtx (<MODE>mode);
4898 /* Vector with zeros in elements that correspond to zeros in operands. */
4899 emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4900 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4901 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4902 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4904 /* Vector with ones in elments that do not match. */
4905 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4908 /* Create vector with ones in elements where there was a zero in one of
4909 the source elements or the elements that match. */
4910 emit_insn (gen_nand<mode>3 (result, and_result, cmpz_result));
4911 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4913 if (<MODE>mode == V16QImode)
4915 if (!BYTES_BIG_ENDIAN)
4916 emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
4918 emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
4922 rtx tmp = gen_reg_rtx (SImode);
4923 if (!BYTES_BIG_ENDIAN)
4924 emit_insn (gen_vctzlsbb_<mode> (tmp, result));
4926 emit_insn (gen_vclzlsbb_<mode> (tmp, result));
4927 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4932 ;; Return first position of mismatch between vectors using natural
4933 ;; element order for both LE and BE execution modes.
4934 (define_expand "first_mismatch_index_<mode>"
4935 [(match_operand:SI 0 "register_operand")
4936 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4937 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4938 UNSPEC_VSX_FIRST_MISMATCH_INDEX)]
4942 rtx cmp_result = gen_reg_rtx (<MODE>mode);
4944 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4946 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4948 if (<MODE>mode == V16QImode)
4950 if (!BYTES_BIG_ENDIAN)
4951 emit_insn (gen_vctzlsbb_<mode> (operands[0], cmp_result));
4953 emit_insn (gen_vclzlsbb_<mode> (operands[0], cmp_result));
4957 rtx tmp = gen_reg_rtx (SImode);
4958 if (!BYTES_BIG_ENDIAN)
4959 emit_insn (gen_vctzlsbb_<mode> (tmp, cmp_result));
4961 emit_insn (gen_vclzlsbb_<mode> (tmp, cmp_result));
4962 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4967 ;; Return first position of mismatch between vectors or end of string (EOS)
4968 ;; using natural element order for both LE and BE execution modes.
4969 (define_expand "first_mismatch_or_eos_index_<mode>"
4970 [(match_operand:SI 0 "register_operand")
4971 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4972 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4973 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX)]
4977 rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4978 rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4979 rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4980 rtx not_cmpz_result = gen_reg_rtx (<MODE>mode);
4981 rtx and_result = gen_reg_rtx (<MODE>mode);
4982 rtx result = gen_reg_rtx (<MODE>mode);
4983 rtx vzero = gen_reg_rtx (<MODE>mode);
4985 /* Vector with zeros in elements that correspond to zeros in operands. */
4986 emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4988 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4989 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4990 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4992 /* Vector with ones in elments that match. */
4993 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4995 emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result));
4997 /* Create vector with ones in elements where there was a zero in one of
4998 the source elements or the elements did not match. */
4999 emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result));
5000 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
5002 if (<MODE>mode == V16QImode)
5004 if (!BYTES_BIG_ENDIAN)
5005 emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
5007 emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
5011 rtx tmp = gen_reg_rtx (SImode);
5012 if (!BYTES_BIG_ENDIAN)
5013 emit_insn (gen_vctzlsbb_<mode> (tmp, result));
5015 emit_insn (gen_vclzlsbb_<mode> (tmp, result));
5016 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
5021 ;; Load VSX Vector with Length
5022 (define_expand "lxvl"
5024 (ashift:DI (match_operand:DI 2 "register_operand")
5026 (set (match_operand:V16QI 0 "vsx_register_operand")
5028 [(match_operand:DI 1 "gpc_reg_operand")
5029 (mem:V16QI (match_dup 1))
5032 "TARGET_P9_VECTOR && TARGET_64BIT"
5034 operands[3] = gen_reg_rtx (DImode);
5037 (define_insn "*lxvl"
5038 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5040 [(match_operand:DI 1 "gpc_reg_operand" "b")
5041 (mem:V16QI (match_dup 1))
5042 (match_operand:DI 2 "register_operand" "r")]
5044 "TARGET_P9_VECTOR && TARGET_64BIT"
5046 [(set_attr "type" "vecload")])
5048 (define_insn "lxvll"
5049 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5050 (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b")
5051 (mem:V16QI (match_dup 1))
5052 (match_operand:DI 2 "register_operand" "r")]
5056 [(set_attr "type" "vecload")])
5058 ;; Expand for builtin xl_len_r
5059 (define_expand "xl_len_r"
5060 [(match_operand:V16QI 0 "vsx_register_operand")
5061 (match_operand:DI 1 "register_operand")
5062 (match_operand:DI 2 "register_operand")]
5065 rtx shift_mask = gen_reg_rtx (V16QImode);
5066 rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5067 rtx tmp = gen_reg_rtx (DImode);
5069 emit_insn (gen_altivec_lvsl_reg (shift_mask, operands[2]));
5070 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5071 emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp));
5072 emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp,
5077 (define_insn "stxvll"
5078 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5079 (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5080 (mem:V16QI (match_dup 1))
5081 (match_operand:DI 2 "register_operand" "r")]
5085 [(set_attr "type" "vecstore")])
5087 ;; Store VSX Vector with Length
5088 (define_expand "stxvl"
5090 (ashift:DI (match_operand:DI 2 "register_operand")
5092 (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand"))
5094 [(match_operand:V16QI 0 "vsx_register_operand")
5095 (mem:V16QI (match_dup 1))
5098 "TARGET_P9_VECTOR && TARGET_64BIT"
5100 operands[3] = gen_reg_rtx (DImode);
5103 (define_insn "*stxvl"
5104 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5106 [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5107 (mem:V16QI (match_dup 1))
5108 (match_operand:DI 2 "register_operand" "r")]
5110 "TARGET_P9_VECTOR && TARGET_64BIT"
5112 [(set_attr "type" "vecstore")])
5114 ;; Expand for builtin xst_len_r
5115 (define_expand "xst_len_r"
5116 [(match_operand:V16QI 0 "vsx_register_operand" "=wa")
5117 (match_operand:DI 1 "register_operand" "b")
5118 (match_operand:DI 2 "register_operand" "r")]
5121 rtx shift_mask = gen_reg_rtx (V16QImode);
5122 rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5123 rtx tmp = gen_reg_rtx (DImode);
5125 emit_insn (gen_altivec_lvsr_reg (shift_mask, operands[2]));
5126 emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0],
5128 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5129 emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp));
5133 ;; Vector Compare Not Equal Byte (specified/not+eq:)
5134 (define_insn "vcmpneb"
5135 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5137 (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v")
5138 (match_operand:V16QI 2 "altivec_register_operand" "v"))))]
5141 [(set_attr "type" "vecsimple")])
5143 ;; Vector Compare Not Equal or Zero Byte
5144 (define_insn "vcmpnezb"
5145 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5147 [(match_operand:V16QI 1 "altivec_register_operand" "v")
5148 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5152 [(set_attr "type" "vecsimple")])
5154 ;; Vector Compare Not Equal or Zero Byte predicate or record-form
5155 (define_insn "vcmpnezb_p"
5156 [(set (reg:CC CR6_REGNO)
5158 [(match_operand:V16QI 1 "altivec_register_operand" "v")
5159 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5161 (set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5167 "vcmpnezb. %0,%1,%2"
5168 [(set_attr "type" "vecsimple")])
5170 ;; Vector Compare Not Equal Half Word (specified/not+eq:)
5171 (define_insn "vcmpneh"
5172 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5174 (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v")
5175 (match_operand:V8HI 2 "altivec_register_operand" "v"))))]
5178 [(set_attr "type" "vecsimple")])
5180 ;; Vector Compare Not Equal or Zero Half Word
5181 (define_insn "vcmpnezh"
5182 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5183 (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
5184 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5188 [(set_attr "type" "vecsimple")])
5190 ;; Vector Compare Not Equal Word (specified/not+eq:)
5191 (define_insn "vcmpnew"
5192 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5194 (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v")
5195 (match_operand:V4SI 2 "altivec_register_operand" "v"))))]
5198 [(set_attr "type" "vecsimple")])
5200 ;; Vector Compare Not Equal or Zero Word
5201 (define_insn "vcmpnezw"
5202 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5203 (unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v")
5204 (match_operand:V4SI 2 "altivec_register_operand" "v")]
5208 [(set_attr "type" "vecsimple")])
5210 ;; Vector Count Leading Zero Least-Significant Bits Byte
5211 (define_insn "vclzlsbb_<mode>"
5212 [(set (match_operand:SI 0 "register_operand" "=r")
5214 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5218 [(set_attr "type" "vecsimple")])
5220 ;; Vector Count Trailing Zero Least-Significant Bits Byte
5221 (define_insn "vctzlsbb_<mode>"
5222 [(set (match_operand:SI 0 "register_operand" "=r")
5224 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5228 [(set_attr "type" "vecsimple")])
5230 ;; Vector Extract Unsigned Byte Left-Indexed
5231 (define_insn "vextublx"
5232 [(set (match_operand:SI 0 "register_operand" "=r")
5234 [(match_operand:SI 1 "register_operand" "r")
5235 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5239 [(set_attr "type" "vecsimple")])
5241 ;; Vector Extract Unsigned Byte Right-Indexed
5242 (define_insn "vextubrx"
5243 [(set (match_operand:SI 0 "register_operand" "=r")
5245 [(match_operand:SI 1 "register_operand" "r")
5246 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5250 [(set_attr "type" "vecsimple")])
5252 ;; Vector Extract Unsigned Half Word Left-Indexed
5253 (define_insn "vextuhlx"
5254 [(set (match_operand:SI 0 "register_operand" "=r")
5256 [(match_operand:SI 1 "register_operand" "r")
5257 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5261 [(set_attr "type" "vecsimple")])
5263 ;; Vector Extract Unsigned Half Word Right-Indexed
5264 (define_insn "vextuhrx"
5265 [(set (match_operand:SI 0 "register_operand" "=r")
5267 [(match_operand:SI 1 "register_operand" "r")
5268 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5272 [(set_attr "type" "vecsimple")])
5274 ;; Vector Extract Unsigned Word Left-Indexed
5275 (define_insn "vextuwlx"
5276 [(set (match_operand:SI 0 "register_operand" "=r")
5278 [(match_operand:SI 1 "register_operand" "r")
5279 (match_operand:V4SI 2 "altivec_register_operand" "v")]
5283 [(set_attr "type" "vecsimple")])
5285 ;; Vector Extract Unsigned Word Right-Indexed
5286 (define_insn "vextuwrx"
5287 [(set (match_operand:SI 0 "register_operand" "=r")
5289 [(match_operand:SI 1 "register_operand" "r")
5290 (match_operand:V4SI 2 "altivec_register_operand" "v")]
5294 [(set_attr "type" "vecsimple")])
5296 ;; Vector insert/extract word at arbitrary byte values. Note, the little
5297 ;; endian version needs to adjust the byte number, and the V4SI element in
5299 (define_insn "extract4b"
5300 [(set (match_operand:V2DI 0 "vsx_register_operand")
5301 (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
5302 (match_operand:QI 2 "const_0_to_12_operand" "n")]
5303 UNSPEC_XXEXTRACTUW))]
5306 if (!BYTES_BIG_ENDIAN)
5307 operands[2] = GEN_INT (12 - INTVAL (operands[2]));
5309 return "xxextractuw %x0,%x1,%2";
5312 (define_expand "insert4b"
5313 [(set (match_operand:V16QI 0 "vsx_register_operand")
5314 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")
5315 (match_operand:V16QI 2 "vsx_register_operand")
5316 (match_operand:QI 3 "const_0_to_12_operand")]
5320 if (!BYTES_BIG_ENDIAN)
5322 rtx op1 = operands[1];
5323 rtx v4si_tmp = gen_reg_rtx (V4SImode);
5324 emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));
5325 operands[1] = v4si_tmp;
5326 operands[3] = GEN_INT (12 - INTVAL (operands[3]));
5330 (define_insn "*insert4b_internal"
5331 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5332 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")
5333 (match_operand:V16QI 2 "vsx_register_operand" "0")
5334 (match_operand:QI 3 "const_0_to_12_operand" "n")]
5337 "xxinsertw %x0,%x1,%3"
5338 [(set_attr "type" "vecperm")])
5341 ;; Generate vector extract four float 32 values from left four elements
5342 ;; of eight element vector of float 16 values.
5343 (define_expand "vextract_fp_from_shorth"
5344 [(set (match_operand:V4SF 0 "register_operand" "=wa")
5345 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5346 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))]
5350 int vals_le[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5351 int vals_be[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5354 rtx mask = gen_reg_rtx (V16QImode);
5355 rtx tmp = gen_reg_rtx (V16QImode);
5358 for (i = 0; i < 16; i++)
5359 if (!BYTES_BIG_ENDIAN)
5360 rvals[i] = GEN_INT (vals_le[i]);
5362 rvals[i] = GEN_INT (vals_be[i]);
5364 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5365 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move
5366 src half words 0,1,2,3 (LE), src half words 4,5,6,7 (BE) for the
5367 conversion instruction. */
5368 v = gen_rtvec_v (16, rvals);
5369 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5370 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5371 operands[1], mask));
5372 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5376 ;; Generate vector extract four float 32 values from right four elements
5377 ;; of eight element vector of float 16 values.
5378 (define_expand "vextract_fp_from_shortl"
5379 [(set (match_operand:V4SF 0 "register_operand" "=wa")
5380 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5381 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))]
5384 int vals_le[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5385 int vals_be[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5389 rtx mask = gen_reg_rtx (V16QImode);
5390 rtx tmp = gen_reg_rtx (V16QImode);
5393 for (i = 0; i < 16; i++)
5394 if (!BYTES_BIG_ENDIAN)
5395 rvals[i] = GEN_INT (vals_le[i]);
5397 rvals[i] = GEN_INT (vals_be[i]);
5399 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5400 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move
5401 src half words 4,5,6,7 (LE), src half words 0,1,2,3 (BE) for the
5402 conversion instruction. */
5403 v = gen_rtvec_v (16, rvals);
5404 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5405 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5406 operands[1], mask));
5407 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5411 ;; Support for ISA 3.0 vector byte reverse
5413 ;; Swap all bytes with in a vector
5414 (define_insn "p9_xxbrq_v1ti"
5415 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
5416 (bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))]
5419 [(set_attr "type" "vecperm")])
5421 (define_expand "p9_xxbrq_v16qi"
5422 [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa"))
5423 (use (match_operand:V16QI 1 "vsx_register_operand" "wa"))]
5426 rtx op0 = gen_reg_rtx (V1TImode);
5427 rtx op1 = gen_lowpart (V1TImode, operands[1]);
5428 emit_insn (gen_p9_xxbrq_v1ti (op0, op1));
5429 emit_move_insn (operands[0], gen_lowpart (V16QImode, op0));
5433 ;; Swap all bytes in each 64-bit element
5434 (define_insn "p9_xxbrd_v2di"
5435 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
5436 (bswap:V2DI (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
5439 [(set_attr "type" "vecperm")])
5441 (define_expand "p9_xxbrd_v2df"
5442 [(use (match_operand:V2DF 0 "vsx_register_operand" "=wa"))
5443 (use (match_operand:V2DF 1 "vsx_register_operand" "wa"))]
5446 rtx op0 = gen_reg_rtx (V2DImode);
5447 rtx op1 = gen_lowpart (V2DImode, operands[1]);
5448 emit_insn (gen_p9_xxbrd_v2di (op0, op1));
5449 emit_move_insn (operands[0], gen_lowpart (V2DFmode, op0));
5453 ;; Swap all bytes in each 32-bit element
5454 (define_insn "p9_xxbrw_v4si"
5455 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
5456 (bswap:V4SI (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
5459 [(set_attr "type" "vecperm")])
5461 (define_expand "p9_xxbrw_v4sf"
5462 [(use (match_operand:V4SF 0 "vsx_register_operand" "=wa"))
5463 (use (match_operand:V4SF 1 "vsx_register_operand" "wa"))]
5466 rtx op0 = gen_reg_rtx (V4SImode);
5467 rtx op1 = gen_lowpart (V4SImode, operands[1]);
5468 emit_insn (gen_p9_xxbrw_v4si (op0, op1));
5469 emit_move_insn (operands[0], gen_lowpart (V4SFmode, op0));
5473 ;; Swap all bytes in each element of vector
5474 (define_expand "revb_<mode>"
5475 [(use (match_operand:VEC_REVB 0 "vsx_register_operand"))
5476 (use (match_operand:VEC_REVB 1 "vsx_register_operand"))]
5479 if (TARGET_P9_VECTOR)
5480 emit_insn (gen_p9_xxbr<VSX_XXBR>_<mode> (operands[0], operands[1]));
5483 /* Want to have the elements in reverse order relative
5484 to the endian mode in use, i.e. in LE mode, put elements
5486 rtx sel = swap_endian_selector_for_mode(<MODE>mode);
5487 emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1],
5494 ;; Reversing bytes in vector char is just a NOP.
5495 (define_expand "revb_v16qi"
5496 [(set (match_operand:V16QI 0 "vsx_register_operand")
5497 (bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))]
5500 emit_move_insn (operands[0], operands[1]);
5504 ;; Swap all bytes in each 16-bit element
5505 (define_insn "p9_xxbrh_v8hi"
5506 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
5507 (bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))]
5510 [(set_attr "type" "vecperm")])
5513 ;; Operand numbers for the following peephole2
5515 [(SFBOOL_TMP_GPR 0) ;; GPR temporary
5516 (SFBOOL_TMP_VSX 1) ;; vector temporary
5517 (SFBOOL_MFVSR_D 2) ;; move to gpr dest
5518 (SFBOOL_MFVSR_A 3) ;; move to gpr src
5519 (SFBOOL_BOOL_D 4) ;; and/ior/xor dest
5520 (SFBOOL_BOOL_A1 5) ;; and/ior/xor arg1
5521 (SFBOOL_BOOL_A2 6) ;; and/ior/xor arg1
5522 (SFBOOL_SHL_D 7) ;; shift left dest
5523 (SFBOOL_SHL_A 8) ;; shift left arg
5524 (SFBOOL_MTVSR_D 9) ;; move to vecter dest
5525 (SFBOOL_MFVSR_A_V4SF 10) ;; SFBOOL_MFVSR_A as V4SFmode
5526 (SFBOOL_BOOL_A_DI 11) ;; SFBOOL_BOOL_A1/A2 as DImode
5527 (SFBOOL_TMP_VSX_DI 12) ;; SFBOOL_TMP_VSX as DImode
5528 (SFBOOL_MTVSR_D_V4SF 13)]) ;; SFBOOL_MTVSRD_D as V4SFmode
5530 ;; Attempt to optimize some common GLIBC operations using logical operations to
5531 ;; pick apart SFmode operations. For example, there is code from e_powf.c
5532 ;; after macro expansion that looks like:
5537 ;; } ieee_float_shape_type;
5543 ;; ieee_float_shape_type gf_u;
5544 ;; gf_u.value = (t1);
5545 ;; (is) = gf_u.word;
5549 ;; ieee_float_shape_type sf_u;
5550 ;; sf_u.word = (is & 0xfffff000);
5551 ;; (t1) = sf_u.value;
5555 ;; This would result in two direct move operations (convert to memory format,
5556 ;; direct move to GPR, do the AND operation, direct move to VSX, convert to
5557 ;; scalar format). With this peephole, we eliminate the direct move to the
5558 ;; GPR, and instead move the integer mask value to the vector register after a
5559 ;; shift and do the VSX logical operation.
5561 ;; The insns for dealing with SFmode in GPR registers looks like:
5562 ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
5564 ;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
5566 ;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
5568 ;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
5570 ;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
5572 ;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
5575 [(match_scratch:DI SFBOOL_TMP_GPR "r")
5576 (match_scratch:V4SF SFBOOL_TMP_VSX "wa")
5578 ;; MFVSRWZ (aka zero_extend)
5579 (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand")
5581 (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand")))
5583 ;; AND/IOR/XOR operation on int
5584 (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand")
5585 (and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand")
5586 (match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand")))
5589 (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand")
5590 (ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand")
5594 (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand")
5595 (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))]
5597 "TARGET_POWERPC64 && TARGET_DIRECT_MOVE
5598 /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO
5599 to compare registers, when the mode is different. */
5600 && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D])
5601 && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D])
5602 && REG_P (operands[SFBOOL_SHL_A]) && REG_P (operands[SFBOOL_MTVSR_D])
5603 && (REG_P (operands[SFBOOL_BOOL_A2])
5604 || CONST_INT_P (operands[SFBOOL_BOOL_A2]))
5605 && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])
5606 || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D]))
5607 && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])
5608 || (REG_P (operands[SFBOOL_BOOL_A2])
5609 && REGNO (operands[SFBOOL_MFVSR_D])
5610 == REGNO (operands[SFBOOL_BOOL_A2])))
5611 && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])
5612 && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])
5613 || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D]))
5614 && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])"
5615 [(set (match_dup SFBOOL_TMP_GPR)
5616 (ashift:DI (match_dup SFBOOL_BOOL_A_DI)
5619 (set (match_dup SFBOOL_TMP_VSX_DI)
5620 (match_dup SFBOOL_TMP_GPR))
5622 (set (match_dup SFBOOL_MTVSR_D_V4SF)
5623 (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF)
5624 (match_dup SFBOOL_TMP_VSX)))]
5626 rtx bool_a1 = operands[SFBOOL_BOOL_A1];
5627 rtx bool_a2 = operands[SFBOOL_BOOL_A2];
5628 int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]);
5629 int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]);
5630 int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]);
5631 int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]);
5633 if (CONST_INT_P (bool_a2))
5635 rtx tmp_gpr = operands[SFBOOL_TMP_GPR];
5636 emit_move_insn (tmp_gpr, bool_a2);
5637 operands[SFBOOL_BOOL_A_DI] = tmp_gpr;
5641 int regno_bool_a1 = REGNO (bool_a1);
5642 int regno_bool_a2 = REGNO (bool_a2);
5643 int regno_bool_a = (regno_mfvsr_d == regno_bool_a1
5644 ? regno_bool_a2 : regno_bool_a1);
5645 operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a);
5648 operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a);
5649 operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx);
5650 operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
5653 ;; Support signed/unsigned long long to float conversion vectorization.
5654 ;; Note that any_float (pc) here is just for code attribute <su>.
5655 (define_expand "vec_pack<su>_float_v2di"
5656 [(match_operand:V4SF 0 "vfloat_operand")
5657 (match_operand:V2DI 1 "vint_operand")
5658 (match_operand:V2DI 2 "vint_operand")
5662 rtx r1 = gen_reg_rtx (V4SFmode);
5663 rtx r2 = gen_reg_rtx (V4SFmode);
5664 emit_insn (gen_vsx_xvcv<su>xdsp (r1, operands[1]));
5665 emit_insn (gen_vsx_xvcv<su>xdsp (r2, operands[2]));
5666 rs6000_expand_extract_even (operands[0], r1, r2);
5670 ;; Support float to signed/unsigned long long conversion vectorization.
5671 ;; Note that any_fix (pc) here is just for code attribute <su>.
5672 (define_expand "vec_unpack_<su>fix_trunc_hi_v4sf"
5673 [(match_operand:V2DI 0 "vint_operand")
5674 (match_operand:V4SF 1 "vfloat_operand")
5678 rtx reg = gen_reg_rtx (V4SFmode);
5679 rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN);
5680 emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg));
5684 ;; Note that any_fix (pc) here is just for code attribute <su>.
5685 (define_expand "vec_unpack_<su>fix_trunc_lo_v4sf"
5686 [(match_operand:V2DI 0 "vint_operand")
5687 (match_operand:V4SF 1 "vfloat_operand")
5691 rtx reg = gen_reg_rtx (V4SFmode);
5692 rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN);
5693 emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg));
5697 (define_insn "vsx_<xvcvbf16>"
5698 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5699 (unspec:V16QI [(match_operand:V16QI 1 "vsx_register_operand" "wa")]
5702 "<xvcvbf16> %x0,%x1"
5703 [(set_attr "type" "vecfloat")])