gcc/config/rs6000/vsx.md

   1 ;; VSX patterns.
   2 ;; Copyright (C) 2009-2020 Free Software Foundation, Inc.
   3 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
   4
   5 ;; This file is part of GCC.
   6
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published
   9 ;; by the Free Software Foundation; either version 3, or (at your
  10 ;; option) any later version.
  11
  12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  14 ;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  15 ;; License for more details.
  16
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 ;; Iterator for comparison types
  22 (define_code_iterator CMP_TEST [eq lt gt unordered])
  23
  24 ;; Mode attribute for vector floate and floato conversions
  25 (define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")])
  26
  27 ;; Iterator for both scalar and vector floating point types supported by VSX
  28 (define_mode_iterator VSX_B [DF V4SF V2DF])
  29
  30 ;; Iterator for the 2 64-bit vector types
  31 (define_mode_iterator VSX_D [V2DF V2DI])
  32
  33 ;; Mode iterator to handle swapping words on little endian for the 128-bit
  34 ;; types that goes in a single vector register.
  35 (define_mode_iterator VSX_LE_128 [(KF   "FLOAT128_VECTOR_P (KFmode)")
  36                                   (TF   "FLOAT128_VECTOR_P (TFmode)")
  37                                   TI
  38                                   V1TI])
  39
  40 ;; Iterator for 128-bit integer types that go in a single vector register.
  41 (define_mode_iterator VSX_TI [TI V1TI])
  42
  43 ;; Iterator for the 2 32-bit vector types
  44 (define_mode_iterator VSX_W [V4SF V4SI])
  45
  46 ;; Iterator for the DF types
  47 (define_mode_iterator VSX_DF [V2DF DF])
  48
  49 ;; Iterator for vector floating point types supported by VSX
  50 (define_mode_iterator VSX_F [V4SF V2DF])
  51
  52 ;; Iterator for logical types supported by VSX
  53 (define_mode_iterator VSX_L [V16QI
  54                              V8HI
  55                              V4SI
  56                              V2DI
  57                              V4SF
  58                              V2DF
  59                              V1TI
  60                              TI
  61                              (KF        "FLOAT128_VECTOR_P (KFmode)")
  62                              (TF        "FLOAT128_VECTOR_P (TFmode)")])
  63
  64 ;; Iterator for memory moves.
  65 (define_mode_iterator VSX_M [V16QI
  66                              V8HI
  67                              V4SI
  68                              V2DI
  69                              V4SF
  70                              V2DF
  71                              V1TI
  72                              (KF        "FLOAT128_VECTOR_P (KFmode)")
  73                              (TF        "FLOAT128_VECTOR_P (TFmode)")
  74                              TI])
  75
  76 (define_mode_attr VSX_XXBR  [(V8HI  "h")
  77                              (V4SI  "w")
  78                              (V4SF  "w")
  79                              (V2DF  "d")
  80                              (V2DI  "d")
  81                              (V1TI  "q")])
  82
  83 ;; Map into the appropriate load/store name based on the type
  84 (define_mode_attr VSm  [(V16QI "vw4")
  85                         (V8HI  "vw4")
  86                         (V4SI  "vw4")
  87                         (V4SF  "vw4")
  88                         (V2DF  "vd2")
  89                         (V2DI  "vd2")
  90                         (DF    "d")
  91                         (TF    "vd2")
  92                         (KF    "vd2")
  93                         (V1TI  "vd2")
  94                         (TI    "vd2")])
  95
  96 ;; Map the register class used
  97 (define_mode_attr VSr   [(V16QI "v")
  98                          (V8HI  "v")
  99                          (V4SI  "v")
 100                          (V4SF  "wa")
 101                          (V2DI  "wa")
 102                          (V2DF  "wa")
 103                          (DI    "wa")
 104                          (DF    "wa")
 105                          (SF    "wa")
 106                          (TF    "wa")
 107                          (KF    "wa")
 108                          (V1TI  "v")
 109                          (TI    "wa")])
 110
 111 ;; What value we need in the "isa" field, to make the IEEE QP float work.
 112 (define_mode_attr VSisa [(V16QI "*")
 113                          (V8HI  "*")
 114                          (V4SI  "*")
 115                          (V4SF  "*")
 116                          (V2DI  "*")
 117                          (V2DF  "*")
 118                          (DI    "*")
 119                          (DF    "*")
 120                          (SF    "*")
 121                          (V1TI  "*")
 122                          (TI    "*")
 123                          (TF    "p9tf")
 124                          (KF    "p9kf")])
 125
 126 ;; A mode attribute to disparage use of GPR registers, except for scalar
 127 ;; integer modes.
 128 (define_mode_attr ??r   [(V16QI "??r")
 129                          (V8HI  "??r")
 130                          (V4SI  "??r")
 131                          (V4SF  "??r")
 132                          (V2DI  "??r")
 133                          (V2DF  "??r")
 134                          (V1TI  "??r")
 135                          (KF    "??r")
 136                          (TF    "??r")
 137                          (TI    "r")])
 138
 139 ;; A mode attribute used for 128-bit constant values.
 140 (define_mode_attr nW    [(V16QI "W")
 141                          (V8HI  "W")
 142                          (V4SI  "W")
 143                          (V4SF  "W")
 144                          (V2DI  "W")
 145                          (V2DF  "W")
 146                          (V1TI  "W")
 147                          (KF    "W")
 148                          (TF    "W")
 149                          (TI    "n")])
 150
 151 ;; Same size integer type for floating point data
 152 (define_mode_attr VSi [(V4SF  "v4si")
 153                        (V2DF  "v2di")
 154                        (DF    "di")])
 155
 156 (define_mode_attr VSI [(V4SF  "V4SI")
 157                        (V2DF  "V2DI")
 158                        (DF    "DI")])
 159
 160 ;; Word size for same size conversion
 161 (define_mode_attr VSc [(V4SF "w")
 162                        (V2DF "d")
 163                        (DF   "d")])
 164
 165 ;; Map into either s or v, depending on whether this is a scalar or vector
 166 ;; operation
 167 (define_mode_attr VSv   [(V16QI "v")
 168                          (V8HI  "v")
 169                          (V4SI  "v")
 170                          (V4SF  "v")
 171                          (V2DI  "v")
 172                          (V2DF  "v")
 173                          (V1TI  "v")
 174                          (DF    "s")
 175                          (KF    "v")])
 176
 177 ;; Appropriate type for add ops (and other simple FP ops)
 178 (define_mode_attr VStype_simple [(V2DF "vecdouble")
 179                                  (V4SF "vecfloat")
 180                                  (DF   "fp")])
 181
 182 ;; Appropriate type for multiply ops
 183 (define_mode_attr VStype_mul    [(V2DF "vecdouble")
 184                                  (V4SF "vecfloat")
 185                                  (DF   "dmul")])
 186
 187 ;; Appropriate type for divide ops.
 188 (define_mode_attr VStype_div    [(V2DF "vecdiv")
 189                                  (V4SF "vecfdiv")
 190                                  (DF   "ddiv")])
 191
 192 ;; Map the scalar mode for a vector type
 193 (define_mode_attr VS_scalar [(V1TI      "TI")
 194                              (V2DF      "DF")
 195                              (V2DI      "DI")
 196                              (V4SF      "SF")
 197                              (V4SI      "SI")
 198                              (V8HI      "HI")
 199                              (V16QI     "QI")])
 200
 201 ;; Map to a double-sized vector mode
 202 (define_mode_attr VS_double [(V4SI      "V8SI")
 203                              (V4SF      "V8SF")
 204                              (V2DI      "V4DI")
 205                              (V2DF      "V4DF")
 206                              (V1TI      "V2TI")])
 207
 208 ;; Iterators for loading constants with xxspltib
 209 (define_mode_iterator VSINT_84  [V4SI V2DI DI SI])
 210 (define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
 211
 212 ;; Vector reverse byte modes
 213 (define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI])
 214
 215 ;; Iterator for ISA 3.0 vector extract/insert of small integer vectors.
 216 ;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be
 217 ;; done on ISA 2.07 and not just ISA 3.0.
 218 (define_mode_iterator VSX_EXTRACT_I  [V16QI V8HI V4SI])
 219 (define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI])
 220 (define_mode_iterator VSX_EXTRACT_I4 [V16QI V8HI V4SI V2DI])
 221
 222 (define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b")
 223                                      (V8HI "h")
 224                                      (V4SI "w")])
 225
 226 ;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and
 227 ;; insert to validate the operand number.
 228 (define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand")
 229                                          (V8HI  "const_0_to_7_operand")
 230                                          (V4SI  "const_0_to_3_operand")])
 231
 232 ;; Mode attribute to give the constraint for vector extract and insert
 233 ;; operations.
 234 (define_mode_attr VSX_EX [(V16QI "v")
 235                           (V8HI  "v")
 236                           (V4SI  "wa")])
 237
 238 ;; Mode iterator for binary floating types other than double to
 239 ;; optimize convert to that floating point type from an extract
 240 ;; of an integer type
 241 (define_mode_iterator VSX_EXTRACT_FL [SF
 242                                       (IF "FLOAT128_2REG_P (IFmode)")
 243                                       (KF "TARGET_FLOAT128_HW")
 244                                       (TF "FLOAT128_2REG_P (TFmode)
 245                                            || (FLOAT128_IEEE_P (TFmode)
 246                                                && TARGET_FLOAT128_HW)")])
 247
 248 ;; Mode iterator for binary floating types that have a direct conversion
 249 ;; from 64-bit integer to floating point
 250 (define_mode_iterator FL_CONV [SF
 251                                DF
 252                                (KF "TARGET_FLOAT128_HW")
 253                                (TF "TARGET_FLOAT128_HW
 254                                     && FLOAT128_IEEE_P (TFmode)")])
 255
 256 ;; Iterator for the 2 short vector types to do a splat from an integer
 257 (define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
 258
 259 ;; Mode attribute to give the count for the splat instruction to splat
 260 ;; the value in the 64-bit integer slot
 261 (define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")])
 262
 263 ;; Mode attribute to give the suffix for the splat instruction
 264 (define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")])
 265
 266 ;; Constants for creating unspecs
 267 (define_c_enum "unspec"
 268   [UNSPEC_VSX_CONCAT
 269    UNSPEC_VSX_CVDPSXWS
 270    UNSPEC_VSX_CVDPUXWS
 271    UNSPEC_VSX_CVSPDP
 272    UNSPEC_VSX_CVHPSP
 273    UNSPEC_VSX_CVSPDPN
 274    UNSPEC_VSX_CVDPSPN
 275    UNSPEC_VSX_CVSXWDP
 276    UNSPEC_VSX_CVUXWDP
 277    UNSPEC_VSX_CVSXDSP
 278    UNSPEC_VSX_CVUXDSP
 279    UNSPEC_VSX_FLOAT2
 280    UNSPEC_VSX_UNS_FLOAT2
 281    UNSPEC_VSX_FLOATE
 282    UNSPEC_VSX_UNS_FLOATE
 283    UNSPEC_VSX_FLOATO
 284    UNSPEC_VSX_UNS_FLOATO
 285    UNSPEC_VSX_TDIV
 286    UNSPEC_VSX_TSQRT
 287    UNSPEC_VSX_SET
 288    UNSPEC_VSX_ROUND_I
 289    UNSPEC_VSX_ROUND_IC
 290    UNSPEC_VSX_SLDWI
 291    UNSPEC_VSX_XXPERM
 292
 293    UNSPEC_VSX_XXSPLTW
 294    UNSPEC_VSX_XXSPLTD
 295    UNSPEC_VSX_DIVSD
 296    UNSPEC_VSX_DIVUD
 297    UNSPEC_VSX_MULSD
 298    UNSPEC_VSX_SIGN_EXTEND
 299    UNSPEC_VSX_XVCVBF16SP
 300    UNSPEC_VSX_XVCVSPBF16
 301    UNSPEC_VSX_XVCVSPSXDS
 302    UNSPEC_VSX_XVCVSPHP
 303    UNSPEC_VSX_VSLO
 304    UNSPEC_VSX_EXTRACT
 305    UNSPEC_VSX_SXEXPDP
 306    UNSPEC_VSX_SXSIG
 307    UNSPEC_VSX_SIEXPDP
 308    UNSPEC_VSX_SIEXPQP
 309    UNSPEC_VSX_SCMPEXPDP
 310    UNSPEC_VSX_SCMPEXPQP
 311    UNSPEC_VSX_STSTDC
 312    UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH
 313    UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL
 314    UNSPEC_VSX_VXEXP
 315    UNSPEC_VSX_VXSIG
 316    UNSPEC_VSX_VIEXP
 317    UNSPEC_VSX_VTSTDC
 318    UNSPEC_VSX_VSIGNED2
 319
 320    UNSPEC_LXVL
 321    UNSPEC_LXVLL
 322    UNSPEC_LVSL_REG
 323    UNSPEC_LVSR_REG
 324    UNSPEC_STXVL
 325    UNSPEC_STXVLL
 326    UNSPEC_XL_LEN_R
 327    UNSPEC_XST_LEN_R
 328
 329    UNSPEC_VCLZLSBB
 330    UNSPEC_VCTZLSBB
 331    UNSPEC_VEXTUBLX
 332    UNSPEC_VEXTUHLX
 333    UNSPEC_VEXTUWLX
 334    UNSPEC_VEXTUBRX
 335    UNSPEC_VEXTUHRX
 336    UNSPEC_VEXTUWRX
 337    UNSPEC_VCMPNEB
 338    UNSPEC_VCMPNEZB
 339    UNSPEC_VCMPNEH
 340    UNSPEC_VCMPNEZH
 341    UNSPEC_VCMPNEW
 342    UNSPEC_VCMPNEZW
 343    UNSPEC_XXEXTRACTUW
 344    UNSPEC_XXINSERTW
 345    UNSPEC_VSX_FIRST_MATCH_INDEX
 346    UNSPEC_VSX_FIRST_MATCH_EOS_INDEX
 347    UNSPEC_VSX_FIRST_MISMATCH_INDEX
 348    UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX
 349    UNSPEC_XXGENPCV
 350   ])
 351
 352 (define_int_iterator XVCVBF16   [UNSPEC_VSX_XVCVSPBF16
 353                                  UNSPEC_VSX_XVCVBF16SP])
 354
 355 (define_int_attr xvcvbf16       [(UNSPEC_VSX_XVCVSPBF16 "xvcvspbf16")
 356                                  (UNSPEC_VSX_XVCVBF16SP "xvcvbf16sp")])
 357
 358 ;; VSX moves
 359
 360 ;; The patterns for LE permuted loads and stores come before the general
 361 ;; VSX moves so they match first.
 362 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 363   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
 364         (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))]
 365   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 366   "#"
 367   "&& 1"
 368   [(set (match_dup 2)
 369         (vec_select:<MODE>
 370           (match_dup 1)
 371           (parallel [(const_int 1) (const_int 0)])))
 372    (set (match_dup 0)
 373         (vec_select:<MODE>
 374           (match_dup 2)
 375           (parallel [(const_int 1) (const_int 0)])))]
 376 {
 377   rtx mem = operands[1];
 378
 379   /* Don't apply the swap optimization if we've already performed register
 380      allocation and the hard register destination is not in the altivec
 381      range.  */
 382   if ((MEM_ALIGN (mem) >= 128)
 383       && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[0]))
 384           || ALTIVEC_REGNO_P (reg_or_subregno (operands[0]))))
 385     {
 386       rtx mem_address = XEXP (mem, 0);
 387       enum machine_mode mode = GET_MODE (mem);
 388
 389       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 390         {
 391           /* Replace the source memory address with masked address.  */
 392           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 393           emit_insn (lvx_set_expr);
 394           DONE;
 395         }
 396       else if (rs6000_quadword_masked_address_p (mem_address))
 397         {
 398           /* This rtl is already in the form that matches lvx
 399              instruction, so leave it alone.  */
 400           DONE;
 401         }
 402       /* Otherwise, fall through to transform into a swapping load.  */
 403     }
 404   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 405                                        : operands[0];
 406 }
 407   [(set_attr "type" "vecload")
 408    (set_attr "length" "8")])
 409
 410 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 411   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
 412         (match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))]
 413   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 414   "#"
 415   "&& 1"
 416   [(set (match_dup 2)
 417         (vec_select:<MODE>
 418           (match_dup 1)
 419           (parallel [(const_int 2) (const_int 3)
 420                      (const_int 0) (const_int 1)])))
 421    (set (match_dup 0)
 422         (vec_select:<MODE>
 423           (match_dup 2)
 424           (parallel [(const_int 2) (const_int 3)
 425                      (const_int 0) (const_int 1)])))]
 426 {
 427   rtx mem = operands[1];
 428
 429   /* Don't apply the swap optimization if we've already performed register
 430      allocation and the hard register destination is not in the altivec
 431      range.  */
 432   if ((MEM_ALIGN (mem) >= 128)
 433       && (!HARD_REGISTER_P (operands[0])
 434           || ALTIVEC_REGNO_P (REGNO(operands[0]))))
 435     {
 436       rtx mem_address = XEXP (mem, 0);
 437       enum machine_mode mode = GET_MODE (mem);
 438
 439       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 440         {
 441           /* Replace the source memory address with masked address.  */
 442           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 443           emit_insn (lvx_set_expr);
 444           DONE;
 445         }
 446       else if (rs6000_quadword_masked_address_p (mem_address))
 447         {
 448           /* This rtl is already in the form that matches lvx
 449              instruction, so leave it alone.  */
 450           DONE;
 451         }
 452       /* Otherwise, fall through to transform into a swapping load.  */
 453     }
 454   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 455                                        : operands[0];
 456 }
 457   [(set_attr "type" "vecload")
 458    (set_attr "length" "8")])
 459
 460 (define_insn_and_split "*vsx_le_perm_load_v8hi"
 461   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
 462         (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))]
 463   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 464   "#"
 465   "&& 1"
 466   [(set (match_dup 2)
 467         (vec_select:V8HI
 468           (match_dup 1)
 469           (parallel [(const_int 4) (const_int 5)
 470                      (const_int 6) (const_int 7)
 471                      (const_int 0) (const_int 1)
 472                      (const_int 2) (const_int 3)])))
 473    (set (match_dup 0)
 474         (vec_select:V8HI
 475           (match_dup 2)
 476           (parallel [(const_int 4) (const_int 5)
 477                      (const_int 6) (const_int 7)
 478                      (const_int 0) (const_int 1)
 479                      (const_int 2) (const_int 3)])))]
 480 {
 481   rtx mem = operands[1];
 482
 483   /* Don't apply the swap optimization if we've already performed register
 484      allocation and the hard register destination is not in the altivec
 485      range.  */
 486   if ((MEM_ALIGN (mem) >= 128)
 487       && (!HARD_REGISTER_P (operands[0])
 488           || ALTIVEC_REGNO_P (REGNO(operands[0]))))
 489     {
 490       rtx mem_address = XEXP (mem, 0);
 491       enum machine_mode mode = GET_MODE (mem);
 492
 493       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 494         {
 495           /* Replace the source memory address with masked address.  */
 496           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 497           emit_insn (lvx_set_expr);
 498           DONE;
 499         }
 500       else if (rs6000_quadword_masked_address_p (mem_address))
 501         {
 502           /* This rtl is already in the form that matches lvx
 503              instruction, so leave it alone.  */
 504           DONE;
 505         }
 506       /* Otherwise, fall through to transform into a swapping load.  */
 507     }
 508   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 509                                        : operands[0];
 510 }
 511   [(set_attr "type" "vecload")
 512    (set_attr "length" "8")])
 513
 514 (define_insn_and_split "*vsx_le_perm_load_v16qi"
 515   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
 516         (match_operand:V16QI 1 "indexed_or_indirect_operand" "Z"))]
 517   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 518   "#"
 519   "&& 1"
 520   [(set (match_dup 2)
 521         (vec_select:V16QI
 522           (match_dup 1)
 523           (parallel [(const_int 8) (const_int 9)
 524                      (const_int 10) (const_int 11)
 525                      (const_int 12) (const_int 13)
 526                      (const_int 14) (const_int 15)
 527                      (const_int 0) (const_int 1)
 528                      (const_int 2) (const_int 3)
 529                      (const_int 4) (const_int 5)
 530                      (const_int 6) (const_int 7)])))
 531    (set (match_dup 0)
 532         (vec_select:V16QI
 533           (match_dup 2)
 534           (parallel [(const_int 8) (const_int 9)
 535                      (const_int 10) (const_int 11)
 536                      (const_int 12) (const_int 13)
 537                      (const_int 14) (const_int 15)
 538                      (const_int 0) (const_int 1)
 539                      (const_int 2) (const_int 3)
 540                      (const_int 4) (const_int 5)
 541                      (const_int 6) (const_int 7)])))]
 542 {
 543   rtx mem = operands[1];
 544
 545   /* Don't apply the swap optimization if we've already performed register
 546      allocation and the hard register destination is not in the altivec
 547      range.  */
 548   if ((MEM_ALIGN (mem) >= 128)
 549       && (!HARD_REGISTER_P (operands[0])
 550           || ALTIVEC_REGNO_P (REGNO(operands[0]))))
 551     {
 552       rtx mem_address = XEXP (mem, 0);
 553       enum machine_mode mode = GET_MODE (mem);
 554
 555       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 556         {
 557           /* Replace the source memory address with masked address.  */
 558           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 559           emit_insn (lvx_set_expr);
 560           DONE;
 561         }
 562       else if (rs6000_quadword_masked_address_p (mem_address))
 563         {
 564           /* This rtl is already in the form that matches lvx
 565              instruction, so leave it alone.  */
 566           DONE;
 567         }
 568       /* Otherwise, fall through to transform into a swapping load.  */
 569     }
 570   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 571                                        : operands[0];
 572 }
 573   [(set_attr "type" "vecload")
 574    (set_attr "length" "8")])
 575
 576 (define_insn "*vsx_le_perm_store_<mode>"
 577   [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "=Z")
 578         (match_operand:VSX_D 1 "vsx_register_operand" "+wa"))]
 579   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 580   "#"
 581   [(set_attr "type" "vecstore")
 582    (set_attr "length" "12")])
 583
 584 (define_split
 585   [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
 586         (match_operand:VSX_D 1 "vsx_register_operand"))]
 587   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 588   [(set (match_dup 2)
 589         (vec_select:<MODE>
 590           (match_dup 1)
 591           (parallel [(const_int 1) (const_int 0)])))
 592    (set (match_dup 0)
 593         (vec_select:<MODE>
 594           (match_dup 2)
 595           (parallel [(const_int 1) (const_int 0)])))]
 596 {
 597   rtx mem = operands[0];
 598
 599   /* Don't apply the swap optimization if we've already performed register
 600      allocation and the hard register source is not in the altivec range.  */
 601   if ((MEM_ALIGN (mem) >= 128)
 602       && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
 603           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 604     {
 605       rtx mem_address = XEXP (mem, 0);
 606       enum machine_mode mode = GET_MODE (mem);
 607       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 608         {
 609           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 610           emit_insn (stvx_set_expr);
 611           DONE;
 612         }
 613       else if (rs6000_quadword_masked_address_p (mem_address))
 614         {
 615           /* This rtl is already in the form that matches stvx instruction,
 616              so leave it alone.  */
 617           DONE;
 618         }
 619       /* Otherwise, fall through to transform into a swapping store.  */
 620     }
 621
 622   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 623                                        : operands[1];
 624 })
 625
 626 ;; The post-reload split requires that we re-permute the source
 627 ;; register in case it is still live.
 628 (define_split
 629   [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
 630         (match_operand:VSX_D 1 "vsx_register_operand"))]
 631   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 632   [(set (match_dup 1)
 633         (vec_select:<MODE>
 634           (match_dup 1)
 635           (parallel [(const_int 1) (const_int 0)])))
 636    (set (match_dup 0)
 637         (vec_select:<MODE>
 638           (match_dup 1)
 639           (parallel [(const_int 1) (const_int 0)])))
 640    (set (match_dup 1)
 641         (vec_select:<MODE>
 642           (match_dup 1)
 643           (parallel [(const_int 1) (const_int 0)])))]
 644   "")
 645
 646 (define_insn "*vsx_le_perm_store_<mode>"
 647   [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "=Z")
 648         (match_operand:VSX_W 1 "vsx_register_operand" "+wa"))]
 649   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 650   "#"
 651   [(set_attr "type" "vecstore")
 652    (set_attr "length" "12")])
 653
 654 (define_split
 655   [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
 656         (match_operand:VSX_W 1 "vsx_register_operand"))]
 657   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 658   [(set (match_dup 2)
 659         (vec_select:<MODE>
 660           (match_dup 1)
 661           (parallel [(const_int 2) (const_int 3)
 662                      (const_int 0) (const_int 1)])))
 663    (set (match_dup 0)
 664         (vec_select:<MODE>
 665           (match_dup 2)
 666           (parallel [(const_int 2) (const_int 3)
 667                      (const_int 0) (const_int 1)])))]
 668 {
 669   rtx mem = operands[0];
 670
 671   /* Don't apply the swap optimization if we've already performed register
 672      allocation and the hard register source is not in the altivec range.  */
 673   if ((MEM_ALIGN (mem) >= 128)
 674       && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
 675           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 676     {
 677       rtx mem_address = XEXP (mem, 0);
 678       enum machine_mode mode = GET_MODE (mem);
 679       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 680         {
 681           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 682           emit_insn (stvx_set_expr);
 683           DONE;
 684         }
 685       else if (rs6000_quadword_masked_address_p (mem_address))
 686         {
 687           /* This rtl is already in the form that matches stvx instruction,
 688              so leave it alone.  */
 689           DONE;
 690         }
 691       /* Otherwise, fall through to transform into a swapping store.  */
 692     }
 693
 694   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 695                                        : operands[1];
 696 })
 697
 698 ;; The post-reload split requires that we re-permute the source
 699 ;; register in case it is still live.
 700 (define_split
 701   [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
 702         (match_operand:VSX_W 1 "vsx_register_operand"))]
 703   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 704   [(set (match_dup 1)
 705         (vec_select:<MODE>
 706           (match_dup 1)
 707           (parallel [(const_int 2) (const_int 3)
 708                      (const_int 0) (const_int 1)])))
 709    (set (match_dup 0)
 710         (vec_select:<MODE>
 711           (match_dup 1)
 712           (parallel [(const_int 2) (const_int 3)
 713                      (const_int 0) (const_int 1)])))
 714    (set (match_dup 1)
 715         (vec_select:<MODE>
 716           (match_dup 1)
 717           (parallel [(const_int 2) (const_int 3)
 718                      (const_int 0) (const_int 1)])))]
 719   "")
 720
 721 (define_insn "*vsx_le_perm_store_v8hi"
 722   [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z")
 723         (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
 724   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 725   "#"
 726   [(set_attr "type" "vecstore")
 727    (set_attr "length" "12")])
 728
 729 (define_split
 730   [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
 731         (match_operand:V8HI 1 "vsx_register_operand"))]
 732   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 733   [(set (match_dup 2)
 734         (vec_select:V8HI
 735           (match_dup 1)
 736           (parallel [(const_int 4) (const_int 5)
 737                      (const_int 6) (const_int 7)
 738                      (const_int 0) (const_int 1)
 739                      (const_int 2) (const_int 3)])))
 740    (set (match_dup 0)
 741         (vec_select:V8HI
 742           (match_dup 2)
 743           (parallel [(const_int 4) (const_int 5)
 744                      (const_int 6) (const_int 7)
 745                      (const_int 0) (const_int 1)
 746                      (const_int 2) (const_int 3)])))]
 747 {
 748   rtx mem = operands[0];
 749
 750   /* Don't apply the swap optimization if we've already performed register
 751      allocation and the hard register source is not in the altivec range.  */
 752   if ((MEM_ALIGN (mem) >= 128)
 753       && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
 754           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 755     {
 756       rtx mem_address = XEXP (mem, 0);
 757       enum machine_mode mode = GET_MODE (mem);
 758       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 759         {
 760           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 761           emit_insn (stvx_set_expr);
 762           DONE;
 763         }
 764       else if (rs6000_quadword_masked_address_p (mem_address))
 765         {
 766           /* This rtl is already in the form that matches stvx instruction,
 767              so leave it alone.  */
 768           DONE;
 769         }
 770       /* Otherwise, fall through to transform into a swapping store.  */
 771     }
 772
 773   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 774                                        : operands[1];
 775 })
 776
 777 ;; The post-reload split requires that we re-permute the source
 778 ;; register in case it is still live.
 779 (define_split
 780   [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
 781         (match_operand:V8HI 1 "vsx_register_operand"))]
 782   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 783   [(set (match_dup 1)
 784         (vec_select:V8HI
 785           (match_dup 1)
 786           (parallel [(const_int 4) (const_int 5)
 787                      (const_int 6) (const_int 7)
 788                      (const_int 0) (const_int 1)
 789                      (const_int 2) (const_int 3)])))
 790    (set (match_dup 0)
 791         (vec_select:V8HI
 792           (match_dup 1)
 793           (parallel [(const_int 4) (const_int 5)
 794                      (const_int 6) (const_int 7)
 795                      (const_int 0) (const_int 1)
 796                      (const_int 2) (const_int 3)])))
 797    (set (match_dup 1)
 798         (vec_select:V8HI
 799           (match_dup 1)
 800           (parallel [(const_int 4) (const_int 5)
 801                      (const_int 6) (const_int 7)
 802                      (const_int 0) (const_int 1)
 803                      (const_int 2) (const_int 3)])))]
 804   "")
 805
 806 (define_insn "*vsx_le_perm_store_v16qi"
 807   [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "=Z")
 808         (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
 809   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 810   "#"
 811   [(set_attr "type" "vecstore")
 812    (set_attr "length" "12")])
 813
 814 (define_split
 815   [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
 816         (match_operand:V16QI 1 "vsx_register_operand"))]
 817   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 818   [(set (match_dup 2)
 819         (vec_select:V16QI
 820           (match_dup 1)
 821           (parallel [(const_int 8) (const_int 9)
 822                      (const_int 10) (const_int 11)
 823                      (const_int 12) (const_int 13)
 824                      (const_int 14) (const_int 15)
 825                      (const_int 0) (const_int 1)
 826                      (const_int 2) (const_int 3)
 827                      (const_int 4) (const_int 5)
 828                      (const_int 6) (const_int 7)])))
 829    (set (match_dup 0)
 830         (vec_select:V16QI
 831           (match_dup 2)
 832           (parallel [(const_int 8) (const_int 9)
 833                      (const_int 10) (const_int 11)
 834                      (const_int 12) (const_int 13)
 835                      (const_int 14) (const_int 15)
 836                      (const_int 0) (const_int 1)
 837                      (const_int 2) (const_int 3)
 838                      (const_int 4) (const_int 5)
 839                      (const_int 6) (const_int 7)])))]
 840 {
 841   rtx mem = operands[0];
 842
 843   /* Don't apply the swap optimization if we've already performed register
 844      allocation and the hard register source is not in the altivec range.  */
 845   if ((MEM_ALIGN (mem) >= 128)
 846       && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
 847           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 848     {
 849       rtx mem_address = XEXP (mem, 0);
 850       enum machine_mode mode = GET_MODE (mem);
 851       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 852         {
 853           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 854           emit_insn (stvx_set_expr);
 855           DONE;
 856         }
 857       else if (rs6000_quadword_masked_address_p (mem_address))
 858         {
 859           /* This rtl is already in the form that matches stvx instruction,
 860              so leave it alone.  */
 861           DONE;
 862         }
 863       /* Otherwise, fall through to transform into a swapping store.  */
 864     }
 865
 866   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 867                                        : operands[1];
 868 })
 869
 870 ;; The post-reload split requires that we re-permute the source
 871 ;; register in case it is still live.
 872 (define_split
 873   [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
 874         (match_operand:V16QI 1 "vsx_register_operand"))]
 875   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 876   [(set (match_dup 1)
 877         (vec_select:V16QI
 878           (match_dup 1)
 879           (parallel [(const_int 8) (const_int 9)
 880                      (const_int 10) (const_int 11)
 881                      (const_int 12) (const_int 13)
 882                      (const_int 14) (const_int 15)
 883                      (const_int 0) (const_int 1)
 884                      (const_int 2) (const_int 3)
 885                      (const_int 4) (const_int 5)
 886                      (const_int 6) (const_int 7)])))
 887    (set (match_dup 0)
 888         (vec_select:V16QI
 889           (match_dup 1)
 890           (parallel [(const_int 8) (const_int 9)
 891                      (const_int 10) (const_int 11)
 892                      (const_int 12) (const_int 13)
 893                      (const_int 14) (const_int 15)
 894                      (const_int 0) (const_int 1)
 895                      (const_int 2) (const_int 3)
 896                      (const_int 4) (const_int 5)
 897                      (const_int 6) (const_int 7)])))
 898    (set (match_dup 1)
 899         (vec_select:V16QI
 900           (match_dup 1)
 901           (parallel [(const_int 8) (const_int 9)
 902                      (const_int 10) (const_int 11)
 903                      (const_int 12) (const_int 13)
 904                      (const_int 14) (const_int 15)
 905                      (const_int 0) (const_int 1)
 906                      (const_int 2) (const_int 3)
 907                      (const_int 4) (const_int 5)
 908                      (const_int 6) (const_int 7)])))]
 909   "")
 910
 911 ;; Little endian word swapping for 128-bit types that are either scalars or the
 912 ;; special V1TI container class, which it is not appropriate to use vec_select
 913 ;; for the type.
 914 (define_insn "*vsx_le_permute_<mode>"
 915   [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=wa,wa,Z,&r,&r,Q")
 916         (rotate:VSX_TI
 917          (match_operand:VSX_TI 1 "input_operand" "wa,Z,wa,r,Q,r")
 918          (const_int 64)))]
 919   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 920   "@
 921    xxpermdi %x0,%x1,%x1,2
 922    lxvd2x %x0,%y1
 923    stxvd2x %x1,%y0
 924    mr %0,%L1\;mr %L0,%1
 925    ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1
 926    std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0"
 927   [(set_attr "length" "*,*,*,8,8,8")
 928    (set_attr "type" "vecperm,vecload,vecstore,*,load,store")])
 929
 930 (define_insn_and_split "*vsx_le_undo_permute_<mode>"
 931   [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=wa,wa")
 932         (rotate:VSX_TI
 933          (rotate:VSX_TI
 934           (match_operand:VSX_TI 1 "vsx_register_operand" "0,wa")
 935           (const_int 64))
 936          (const_int 64)))]
 937   "!BYTES_BIG_ENDIAN && TARGET_VSX"
 938   "@
 939    #
 940    xxlor %x0,%x1"
 941   ""
 942   [(set (match_dup 0) (match_dup 1))]
 943 {
 944   if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))
 945     {
 946       emit_note (NOTE_INSN_DELETED);
 947       DONE;
 948     }
 949 }
 950   [(set_attr "length" "0,4")
 951    (set_attr "type" "veclogical")])
 952
 953 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 954   [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=wa,r")
 955         (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))]
 956   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 957   "@
 958    #
 959    #"
 960   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 961   [(const_int 0)]
 962 {
 963   rtx tmp = (can_create_pseudo_p ()
 964              ? gen_reg_rtx_and_attrs (operands[0])
 965              : operands[0]);
 966   rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
 967   rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
 968   DONE;
 969 }
 970   [(set_attr "type" "vecload,load")
 971    (set_attr "length" "8,8")
 972    (set_attr "isa" "<VSisa>,*")])
 973
 974 (define_insn "*vsx_le_perm_store_<mode>"
 975   [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q")
 976         (match_operand:VSX_LE_128 1 "vsx_register_operand" "+wa,r"))]
 977   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 978   "@
 979    #
 980    #"
 981   [(set_attr "type" "vecstore,store")
 982    (set_attr "length" "12,8")
 983    (set_attr "isa" "<VSisa>,*")])
 984
 985 (define_split
 986   [(set (match_operand:VSX_LE_128 0 "memory_operand")
 987         (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
 988   "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
 989   [(const_int 0)]
 990 {
 991   rtx tmp = (can_create_pseudo_p ()
 992              ? gen_reg_rtx_and_attrs (operands[0])
 993              : operands[0]);
 994   rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
 995   rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
 996   DONE;
 997 })
 998
 999 ;; Peepholes to catch loads and stores for TImode if TImode landed in
1000 ;; GPR registers on a little endian system.
1001 (define_peephole2
1002   [(set (match_operand:VSX_TI 0 "int_reg_operand")
1003         (rotate:VSX_TI (match_operand:VSX_TI 1 "memory_operand")
1004                        (const_int 64)))
1005    (set (match_operand:VSX_TI 2 "int_reg_operand")
1006         (rotate:VSX_TI (match_dup 0)
1007                        (const_int 64)))]
1008   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1009    && (rtx_equal_p (operands[0], operands[2])
1010        || peep2_reg_dead_p (2, operands[0]))"
1011    [(set (match_dup 2) (match_dup 1))])
1012
1013 (define_peephole2
1014   [(set (match_operand:VSX_TI 0 "int_reg_operand")
1015         (rotate:VSX_TI (match_operand:VSX_TI 1 "int_reg_operand")
1016                        (const_int 64)))
1017    (set (match_operand:VSX_TI 2 "memory_operand")
1018         (rotate:VSX_TI (match_dup 0)
1019                        (const_int 64)))]
1020   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1021    && peep2_reg_dead_p (2, operands[0])"
1022    [(set (match_dup 2) (match_dup 1))])
1023
1024 ;; Peephole to catch memory to memory transfers for TImode if TImode landed in
1025 ;; VSX registers on a little endian system.  The vector types and IEEE 128-bit
1026 ;; floating point are handled by the more generic swap elimination pass.
1027 (define_peephole2
1028   [(set (match_operand:TI 0 "vsx_register_operand")
1029         (rotate:TI (match_operand:TI 1 "vsx_register_operand")
1030                    (const_int 64)))
1031    (set (match_operand:TI 2 "vsx_register_operand")
1032         (rotate:TI (match_dup 0)
1033                    (const_int 64)))]
1034   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1035    && (rtx_equal_p (operands[0], operands[2])
1036        || peep2_reg_dead_p (2, operands[0]))"
1037    [(set (match_dup 2) (match_dup 1))])
1038
1039 ;; The post-reload split requires that we re-permute the source
1040 ;; register in case it is still live.
1041 (define_split
1042   [(set (match_operand:VSX_LE_128 0 "memory_operand")
1043         (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1044   "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
1045   [(const_int 0)]
1046 {
1047   rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1048   rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
1049   rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1050   DONE;
1051 })
1052
1053 ;; Vector constants that can be generated with XXSPLTIB that was added in ISA
1054 ;; 3.0.  Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
1055 (define_insn "xxspltib_v16qi"
1056   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1057         (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))]
1058   "TARGET_P9_VECTOR"
1059 {
1060   operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);
1061   return "xxspltib %x0,%2";
1062 }
1063   [(set_attr "type" "vecperm")])
1064
1065 (define_insn "xxspltib_<mode>_nosplit"
1066   [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa")
1067         (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))]
1068   "TARGET_P9_VECTOR"
1069 {
1070   rtx op1 = operands[1];
1071   int value = 256;
1072   int num_insns = -1;
1073
1074   if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1075       || num_insns != 1)
1076     gcc_unreachable ();
1077
1078   operands[2] = GEN_INT (value & 0xff);
1079   return "xxspltib %x0,%2";
1080 }
1081   [(set_attr "type" "vecperm")])
1082
1083 (define_insn_and_split "*xxspltib_<mode>_split"
1084   [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")
1085         (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]
1086   "TARGET_P9_VECTOR"
1087   "#"
1088   "&& 1"
1089   [(const_int 0)]
1090 {
1091   int value = 256;
1092   int num_insns = -1;
1093   rtx op0 = operands[0];
1094   rtx op1 = operands[1];
1095   rtx tmp = ((can_create_pseudo_p ())
1096              ? gen_reg_rtx (V16QImode)
1097              : gen_lowpart (V16QImode, op0));
1098
1099   if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1100       || num_insns != 2)
1101     gcc_unreachable ();
1102
1103   emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));
1104
1105   if (<MODE>mode == V2DImode)
1106     emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp));
1107
1108   else if (<MODE>mode == V4SImode)
1109     emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp));
1110
1111   else if (<MODE>mode == V8HImode)
1112     emit_insn (gen_altivec_vupkhsb  (op0, tmp));
1113
1114   else
1115     gcc_unreachable ();
1116
1117   DONE;
1118 }
1119   [(set_attr "type" "vecperm")
1120    (set_attr "length" "8")])
1121
1122
1123 ;; Prefer using vector registers over GPRs.  Prefer using ISA 3.0's XXSPLTISB
1124 ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
1125 ;; all 1's, since the machine does not have to wait for the previous
1126 ;; instruction using the register being set (such as a store waiting on a slow
1127 ;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
1128
1129 ;;              VSX store  VSX load   VSX move  VSX->GPR   GPR->VSX    LQ (GPR)
1130 ;;              STQ (GPR)  GPR load   GPR store GPR move   XXSPLTIB    VSPLTISW
1131 ;;              VSX 0/-1   VMX const  GPR const LVX (VMX)  STVX (VMX)
1132 (define_insn "vsx_mov<mode>_64bit"
1133   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1134                "=ZwO,      wa,        wa,        r,         we,        ?wQ,
1135                 ?&r,       ??r,       ??Y,       <??r>,     wa,        v,
1136                 ?wa,       v,         <??r>,     wZ,        v")
1137
1138         (match_operand:VSX_M 1 "input_operand"
1139                "wa,        ZwO,       wa,        we,        r,         r,
1140                 wQ,        Y,         r,         r,         wE,        jwM,
1141                 ?jwM,      W,         <nW>,      v,         wZ"))]
1142
1143   "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1144    && (register_operand (operands[0], <MODE>mode)
1145        || register_operand (operands[1], <MODE>mode))"
1146 {
1147   return rs6000_output_move_128bit (operands);
1148 }
1149   [(set_attr "type"
1150                "vecstore,  vecload,   vecsimple, mffgpr,    mftgpr,    load,
1151                 store,     load,      store,     *,         vecsimple, vecsimple,
1152                 vecsimple, *,         *,         vecstore,  vecload")
1153    (set_attr "num_insns"
1154                "*,         *,         *,         2,         *,         2,
1155                 2,         2,         2,         2,         *,         *,
1156                 *,         5,         2,         *,         *")
1157    (set_attr "max_prefixed_insns"
1158                "*,         *,         *,         *,         *,         2,
1159                 2,         2,         2,         2,         *,         *,
1160                 *,         *,         *,         *,         *")
1161    (set_attr "length"
1162                "*,         *,         *,         8,         *,         8,
1163                 8,         8,         8,         8,         *,         *,
1164                 *,         20,        8,         *,         *")
1165    (set_attr "isa"
1166                "<VSisa>,   <VSisa>,   <VSisa>,   *,         *,         *,
1167                 *,         *,         *,         *,         p9v,       *,
1168                 <VSisa>,   *,         *,         *,         *")])
1169
1170 ;;              VSX store  VSX load   VSX move   GPR load   GPR store  GPR move
1171 ;;              XXSPLTIB   VSPLTISW   VSX 0/-1   VMX const  GPR const
1172 ;;              LVX (VMX)  STVX (VMX)
1173 (define_insn "*vsx_mov<mode>_32bit"
1174   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1175                "=ZwO,      wa,        wa,        ??r,       ??Y,       <??r>,
1176                 wa,        v,         ?wa,       v,         <??r>,
1177                 wZ,        v")
1178
1179         (match_operand:VSX_M 1 "input_operand"
1180                "wa,        ZwO,       wa,        Y,         r,         r,
1181                 wE,        jwM,       ?jwM,      W,         <nW>,
1182                 v,         wZ"))]
1183
1184   "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1185    && (register_operand (operands[0], <MODE>mode)
1186        || register_operand (operands[1], <MODE>mode))"
1187 {
1188   return rs6000_output_move_128bit (operands);
1189 }
1190   [(set_attr "type"
1191                "vecstore,  vecload,   vecsimple, load,      store,    *,
1192                 vecsimple, vecsimple, vecsimple, *,         *,
1193                 vecstore,  vecload")
1194    (set_attr "length"
1195                "*,         *,         *,         16,        16,        16,
1196                 *,         *,         *,         20,        16,
1197                 *,         *")
1198    (set_attr "isa"
1199                "<VSisa>,   <VSisa>,   <VSisa>,   *,         *,         *,
1200                 p9v,       *,         <VSisa>,   *,         *,
1201                 *,         *")])
1202
1203 ;; Explicit  load/store expanders for the builtin functions
1204 (define_expand "vsx_load_<mode>"
1205   [(set (match_operand:VSX_M 0 "vsx_register_operand")
1206         (match_operand:VSX_M 1 "memory_operand"))]
1207   "VECTOR_MEM_VSX_P (<MODE>mode)"
1208 {
1209   /* Expand to swaps if needed, prior to swap optimization.  */
1210   if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1211     {
1212       rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1213       DONE;
1214     }
1215 })
1216
1217 (define_expand "vsx_store_<mode>"
1218   [(set (match_operand:VSX_M 0 "memory_operand")
1219         (match_operand:VSX_M 1 "vsx_register_operand"))]
1220   "VECTOR_MEM_VSX_P (<MODE>mode)"
1221 {
1222   /* Expand to swaps if needed, prior to swap optimization.  */
1223   if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1224     {
1225       rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1226       DONE;
1227     }
1228 })
1229
1230 ;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
1231 ;; when you really want their element-reversing behavior.
1232 (define_insn "vsx_ld_elemrev_v2di"
1233   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1234         (vec_select:V2DI
1235           (match_operand:V2DI 1 "memory_operand" "Z")
1236           (parallel [(const_int 1) (const_int 0)])))]
1237   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1238   "lxvd2x %x0,%y1"
1239   [(set_attr "type" "vecload")])
1240
1241 (define_insn "vsx_ld_elemrev_v1ti"
1242   [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
1243         (vec_select:V1TI
1244           (match_operand:V1TI 1 "memory_operand" "Z")
1245           (parallel [(const_int 0)])))]
1246   "VECTOR_MEM_VSX_P (V1TImode) && !BYTES_BIG_ENDIAN"
1247 {
1248    return "lxvd2x %x0,%y1\;xxpermdi %x0,%x0,%x0,2";
1249 }
1250   [(set_attr "type" "vecload")])
1251
1252 (define_insn "vsx_ld_elemrev_v2df"
1253   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1254         (vec_select:V2DF
1255           (match_operand:V2DF 1 "memory_operand" "Z")
1256           (parallel [(const_int 1) (const_int 0)])))]
1257   "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1258   "lxvd2x %x0,%y1"
1259   [(set_attr "type" "vecload")])
1260
1261 (define_insn "vsx_ld_elemrev_v4si"
1262   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
1263         (vec_select:V4SI
1264           (match_operand:V4SI 1 "memory_operand" "Z")
1265           (parallel [(const_int 3) (const_int 2)
1266                      (const_int 1) (const_int 0)])))]
1267   "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1268   "lxvw4x %x0,%y1"
1269   [(set_attr "type" "vecload")])
1270
1271 (define_insn "vsx_ld_elemrev_v4sf"
1272   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1273         (vec_select:V4SF
1274           (match_operand:V4SF 1 "memory_operand" "Z")
1275           (parallel [(const_int 3) (const_int 2)
1276                      (const_int 1) (const_int 0)])))]
1277   "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1278   "lxvw4x %x0,%y1"
1279   [(set_attr "type" "vecload")])
1280
1281 (define_expand "vsx_ld_elemrev_v8hi"
1282   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1283         (vec_select:V8HI
1284           (match_operand:V8HI 1 "memory_operand" "Z")
1285           (parallel [(const_int 7) (const_int 6)
1286                      (const_int 5) (const_int 4)
1287                      (const_int 3) (const_int 2)
1288                      (const_int 1) (const_int 0)])))]
1289   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1290 {
1291   if (!TARGET_P9_VECTOR)
1292     {
1293       rtx tmp = gen_reg_rtx (V4SImode);
1294       rtx subreg, subreg2, perm[16], pcv;
1295       /* 2 is leftmost element in register */
1296       unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1297       int i;
1298
1299       subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0);
1300       emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1301       subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0);
1302
1303       for (i = 0; i < 16; ++i)
1304         perm[i] = GEN_INT (reorder[i]);
1305
1306       pcv = force_reg (V16QImode,
1307                        gen_rtx_CONST_VECTOR (V16QImode,
1308                                              gen_rtvec_v (16, perm)));
1309       emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2,
1310                                                 subreg2, pcv));
1311       DONE;
1312     }
1313 })
1314
1315 (define_insn "*vsx_ld_elemrev_v8hi_internal"
1316   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1317         (vec_select:V8HI
1318           (match_operand:V8HI 1 "memory_operand" "Z")
1319           (parallel [(const_int 7) (const_int 6)
1320                      (const_int 5) (const_int 4)
1321                      (const_int 3) (const_int 2)
1322                      (const_int 1) (const_int 0)])))]
1323   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1324   "lxvh8x %x0,%y1"
1325   [(set_attr "type" "vecload")])
1326
1327 (define_expand "vsx_ld_elemrev_v16qi"
1328   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1329         (vec_select:V16QI
1330           (match_operand:V16QI 1 "memory_operand" "Z")
1331           (parallel [(const_int 15) (const_int 14)
1332                      (const_int 13) (const_int 12)
1333                      (const_int 11) (const_int 10)
1334                      (const_int  9) (const_int  8)
1335                      (const_int  7) (const_int  6)
1336                      (const_int  5) (const_int  4)
1337                      (const_int  3) (const_int  2)
1338                      (const_int  1) (const_int  0)])))]
1339   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1340 {
1341   if (!TARGET_P9_VECTOR)
1342     {
1343       rtx tmp = gen_reg_rtx (V4SImode);
1344       rtx subreg, subreg2, perm[16], pcv;
1345       /* 3 is leftmost element in register */
1346       unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1347       int i;
1348
1349       subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0);
1350       emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1351       subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0);
1352
1353       for (i = 0; i < 16; ++i)
1354         perm[i] = GEN_INT (reorder[i]);
1355
1356       pcv = force_reg (V16QImode,
1357                        gen_rtx_CONST_VECTOR (V16QImode,
1358                                              gen_rtvec_v (16, perm)));
1359       emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2,
1360                                                  subreg2, pcv));
1361       DONE;
1362     }
1363 })
1364
1365 (define_insn "vsx_ld_elemrev_v16qi_internal"
1366   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1367         (vec_select:V16QI
1368           (match_operand:V16QI 1 "memory_operand" "Z")
1369           (parallel [(const_int 15) (const_int 14)
1370                      (const_int 13) (const_int 12)
1371                      (const_int 11) (const_int 10)
1372                      (const_int  9) (const_int  8)
1373                      (const_int  7) (const_int  6)
1374                      (const_int  5) (const_int  4)
1375                      (const_int  3) (const_int  2)
1376                      (const_int  1) (const_int  0)])))]
1377   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1378   "lxvb16x %x0,%y1"
1379   [(set_attr "type" "vecload")])
1380
1381 (define_insn "vsx_st_elemrev_v1ti"
1382   [(set (match_operand:V1TI 0 "memory_operand" "=Z")
1383         (vec_select:V1TI
1384           (match_operand:V1TI 1 "vsx_register_operand" "+wa")
1385           (parallel [(const_int 0)])))
1386    (clobber (match_dup 1))]
1387   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1388 {
1389   return "xxpermdi %x1,%x1,%x1,2\;stxvd2x %x1,%y0";
1390 }
1391   [(set_attr "type" "vecstore")])
1392
1393 (define_insn "vsx_st_elemrev_v2df"
1394   [(set (match_operand:V2DF 0 "memory_operand" "=Z")
1395         (vec_select:V2DF
1396           (match_operand:V2DF 1 "vsx_register_operand" "wa")
1397           (parallel [(const_int 1) (const_int 0)])))]
1398   "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1399   "stxvd2x %x1,%y0"
1400   [(set_attr "type" "vecstore")])
1401
1402 (define_insn "vsx_st_elemrev_v2di"
1403   [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1404         (vec_select:V2DI
1405           (match_operand:V2DI 1 "vsx_register_operand" "wa")
1406           (parallel [(const_int 1) (const_int 0)])))]
1407   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1408   "stxvd2x %x1,%y0"
1409   [(set_attr "type" "vecstore")])
1410
1411 (define_insn "vsx_st_elemrev_v4sf"
1412   [(set (match_operand:V4SF 0 "memory_operand" "=Z")
1413         (vec_select:V4SF
1414           (match_operand:V4SF 1 "vsx_register_operand" "wa")
1415           (parallel [(const_int 3) (const_int 2)
1416                      (const_int 1) (const_int 0)])))]
1417   "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1418   "stxvw4x %x1,%y0"
1419   [(set_attr "type" "vecstore")])
1420
1421 (define_insn "vsx_st_elemrev_v4si"
1422   [(set (match_operand:V4SI 0 "memory_operand" "=Z")
1423         (vec_select:V4SI
1424           (match_operand:V4SI 1 "vsx_register_operand" "wa")
1425           (parallel [(const_int 3) (const_int 2)
1426                      (const_int 1) (const_int 0)])))]
1427   "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1428   "stxvw4x %x1,%y0"
1429   [(set_attr "type" "vecstore")])
1430
1431 (define_expand "vsx_st_elemrev_v8hi"
1432   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1433         (vec_select:V8HI
1434           (match_operand:V8HI 1 "vsx_register_operand" "wa")
1435           (parallel [(const_int 7) (const_int 6)
1436                      (const_int 5) (const_int 4)
1437                      (const_int 3) (const_int 2)
1438                      (const_int 1) (const_int 0)])))]
1439   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1440 {
1441   if (!TARGET_P9_VECTOR)
1442     {
1443       rtx mem_subreg, subreg, perm[16], pcv;
1444       rtx tmp = gen_reg_rtx (V8HImode);
1445       /* 2 is leftmost element in register */
1446       unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1447       int i;
1448
1449       for (i = 0; i < 16; ++i)
1450         perm[i] = GEN_INT (reorder[i]);
1451
1452       pcv = force_reg (V16QImode,
1453                        gen_rtx_CONST_VECTOR (V16QImode,
1454                                              gen_rtvec_v (16, perm)));
1455       emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1],
1456                                                 operands[1], pcv));
1457       subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0);
1458       mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0);
1459       emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1460       DONE;
1461     }
1462 })
1463
1464 (define_insn "*vsx_st_elemrev_v2di_internal"
1465   [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1466         (vec_select:V2DI
1467           (match_operand:V2DI 1 "vsx_register_operand" "wa")
1468           (parallel [(const_int 1) (const_int 0)])))]
1469   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1470   "stxvd2x %x1,%y0"
1471   [(set_attr "type" "vecstore")])
1472
1473 (define_insn "*vsx_st_elemrev_v8hi_internal"
1474   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1475         (vec_select:V8HI
1476           (match_operand:V8HI 1 "vsx_register_operand" "wa")
1477           (parallel [(const_int 7) (const_int 6)
1478                      (const_int 5) (const_int 4)
1479                      (const_int 3) (const_int 2)
1480                      (const_int 1) (const_int 0)])))]
1481   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1482   "stxvh8x %x1,%y0"
1483   [(set_attr "type" "vecstore")])
1484
1485 (define_expand "vsx_st_elemrev_v16qi"
1486   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1487         (vec_select:V16QI
1488           (match_operand:V16QI 1 "vsx_register_operand" "wa")
1489           (parallel [(const_int 15) (const_int 14)
1490                      (const_int 13) (const_int 12)
1491                      (const_int 11) (const_int 10)
1492                      (const_int  9) (const_int  8)
1493                      (const_int  7) (const_int  6)
1494                      (const_int  5) (const_int  4)
1495                      (const_int  3) (const_int  2)
1496                      (const_int  1) (const_int  0)])))]
1497   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1498 {
1499   if (!TARGET_P9_VECTOR)
1500     {
1501       rtx mem_subreg, subreg, perm[16], pcv;
1502       rtx tmp = gen_reg_rtx (V16QImode);
1503       /* 3 is leftmost element in register */
1504       unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1505       int i;
1506
1507       for (i = 0; i < 16; ++i)
1508         perm[i] = GEN_INT (reorder[i]);
1509
1510       pcv = force_reg (V16QImode,
1511                        gen_rtx_CONST_VECTOR (V16QImode,
1512                                              gen_rtvec_v (16, perm)));
1513       emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1],
1514                                                  operands[1], pcv));
1515       subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0);
1516       mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V16QImode, 0);
1517       emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1518       DONE;
1519     }
1520 })
1521
1522 (define_insn "*vsx_st_elemrev_v16qi_internal"
1523   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1524         (vec_select:V16QI
1525           (match_operand:V16QI 1 "vsx_register_operand" "wa")
1526           (parallel [(const_int 15) (const_int 14)
1527                      (const_int 13) (const_int 12)
1528                      (const_int 11) (const_int 10)
1529                      (const_int  9) (const_int  8)
1530                      (const_int  7) (const_int  6)
1531                      (const_int  5) (const_int  4)
1532                      (const_int  3) (const_int  2)
1533                      (const_int  1) (const_int  0)])))]
1534   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1535   "stxvb16x %x1,%y0"
1536   [(set_attr "type" "vecstore")])
1537
1538 \f
1539 ;; VSX vector floating point arithmetic instructions.  The VSX scalar
1540 ;; instructions are now combined with the insn for the traditional floating
1541 ;; point unit.
1542 (define_insn "*vsx_add<mode>3"
1543   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1544         (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1545                     (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1546   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1547   "xvadd<sd>p %x0,%x1,%x2"
1548   [(set_attr "type" "<VStype_simple>")])
1549
1550 (define_insn "*vsx_sub<mode>3"
1551   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa>")
1552         (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1553                      (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1554   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1555   "xvsub<sd>p %x0,%x1,%x2"
1556   [(set_attr "type" "<VStype_simple>")])
1557
1558 (define_insn "*vsx_mul<mode>3"
1559   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1560         (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1561                     (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1562   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1563   "xvmul<sd>p %x0,%x1,%x2"
1564   [(set_attr "type" "<VStype_simple>")])
1565
1566 ; Emulate vector with scalar for vec_mul in V2DImode
1567 (define_insn_and_split "vsx_mul_v2di"
1568   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1569         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1570                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1571                      UNSPEC_VSX_MULSD))]
1572   "VECTOR_MEM_VSX_P (V2DImode)"
1573   "#"
1574   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1575   [(const_int 0)]
1576 {
1577   rtx op0 = operands[0];
1578   rtx op1 = operands[1];
1579   rtx op2 = operands[2];
1580   rtx op3 = gen_reg_rtx (DImode);
1581   rtx op4 = gen_reg_rtx (DImode);
1582   rtx op5 = gen_reg_rtx (DImode);
1583   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1584   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1585   if (TARGET_POWERPC64)
1586     emit_insn (gen_muldi3 (op5, op3, op4));
1587   else
1588     {
1589       rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1590       emit_move_insn (op5, ret);
1591     }
1592   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1593   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1594   if (TARGET_POWERPC64)
1595     emit_insn (gen_muldi3 (op3, op3, op4));
1596   else
1597     {
1598       rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1599       emit_move_insn (op3, ret);
1600     }
1601   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1602   DONE;
1603 }
1604   [(set_attr "type" "mul")])
1605
1606 (define_insn "*vsx_div<mode>3"
1607   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1608         (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1609                    (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1610   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1611   "xvdiv<sd>p %x0,%x1,%x2"
1612   [(set_attr "type" "<VStype_div>")])
1613
1614 ; Emulate vector with scalar for vec_div in V2DImode
1615 (define_insn_and_split "vsx_div_v2di"
1616   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1617         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1618                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1619                      UNSPEC_VSX_DIVSD))]
1620   "VECTOR_MEM_VSX_P (V2DImode)"
1621   "#"
1622   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1623   [(const_int 0)]
1624 {
1625   rtx op0 = operands[0];
1626   rtx op1 = operands[1];
1627   rtx op2 = operands[2];
1628   rtx op3 = gen_reg_rtx (DImode);
1629   rtx op4 = gen_reg_rtx (DImode);
1630   rtx op5 = gen_reg_rtx (DImode);
1631   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1632   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1633   if (TARGET_POWERPC64)
1634     emit_insn (gen_divdi3 (op5, op3, op4));
1635   else
1636     {
1637       rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1638       rtx target = emit_library_call_value (libfunc,
1639                                             op5, LCT_NORMAL, DImode,
1640                                             op3, DImode,
1641                                             op4, DImode);
1642       emit_move_insn (op5, target);
1643     }
1644   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1645   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1646   if (TARGET_POWERPC64)
1647     emit_insn (gen_divdi3 (op3, op3, op4));
1648   else
1649     {
1650       rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1651       rtx target = emit_library_call_value (libfunc,
1652                                             op3, LCT_NORMAL, DImode,
1653                                             op3, DImode,
1654                                             op4, DImode);
1655       emit_move_insn (op3, target);
1656     }
1657   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1658   DONE;
1659 }
1660   [(set_attr "type" "div")])
1661
1662 (define_insn_and_split "vsx_udiv_v2di"
1663   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1664         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1665                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1666                      UNSPEC_VSX_DIVUD))]
1667   "VECTOR_MEM_VSX_P (V2DImode)"
1668   "#"
1669   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1670   [(const_int 0)]
1671 {
1672   rtx op0 = operands[0];
1673   rtx op1 = operands[1];
1674   rtx op2 = operands[2];
1675   rtx op3 = gen_reg_rtx (DImode);
1676   rtx op4 = gen_reg_rtx (DImode);
1677   rtx op5 = gen_reg_rtx (DImode);
1678   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1679   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1680   if (TARGET_POWERPC64)
1681     emit_insn (gen_udivdi3 (op5, op3, op4));
1682   else
1683     {
1684       rtx libfunc = optab_libfunc (udiv_optab, DImode);
1685       rtx target = emit_library_call_value (libfunc,
1686                                             op5, LCT_NORMAL, DImode,
1687                                             op3, DImode,
1688                                             op4, DImode);
1689       emit_move_insn (op5, target);
1690     }
1691   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1692   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1693   if (TARGET_POWERPC64)
1694     emit_insn (gen_udivdi3 (op3, op3, op4));
1695   else
1696     {
1697       rtx libfunc = optab_libfunc (udiv_optab, DImode);
1698       rtx target = emit_library_call_value (libfunc,
1699                                             op3, LCT_NORMAL, DImode,
1700                                             op3, DImode,
1701                                             op4, DImode);
1702       emit_move_insn (op3, target);
1703     }
1704   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1705   DONE;
1706 }
1707   [(set_attr "type" "div")])
1708
1709 ;; *tdiv* instruction returning the FG flag
1710 (define_expand "vsx_tdiv<mode>3_fg"
1711   [(set (match_dup 3)
1712         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1713                       (match_operand:VSX_B 2 "vsx_register_operand")]
1714                      UNSPEC_VSX_TDIV))
1715    (set (match_operand:SI 0 "gpc_reg_operand")
1716         (gt:SI (match_dup 3)
1717                (const_int 0)))]
1718   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1719 {
1720   operands[3] = gen_reg_rtx (CCFPmode);
1721 })
1722
1723 ;; *tdiv* instruction returning the FE flag
1724 (define_expand "vsx_tdiv<mode>3_fe"
1725   [(set (match_dup 3)
1726         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1727                       (match_operand:VSX_B 2 "vsx_register_operand")]
1728                      UNSPEC_VSX_TDIV))
1729    (set (match_operand:SI 0 "gpc_reg_operand")
1730         (eq:SI (match_dup 3)
1731                (const_int 0)))]
1732   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1733 {
1734   operands[3] = gen_reg_rtx (CCFPmode);
1735 })
1736
1737 (define_insn "*vsx_tdiv<mode>3_internal"
1738   [(set (match_operand:CCFP 0 "cc_reg_operand" "=x")
1739         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa")
1740                       (match_operand:VSX_B 2 "vsx_register_operand" "wa")]
1741                    UNSPEC_VSX_TDIV))]
1742   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1743   "x<VSv>tdiv<sd>p %0,%x1,%x2"
1744   [(set_attr "type" "<VStype_simple>")])
1745
1746 (define_insn "vsx_fre<mode>2"
1747   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1748         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
1749                       UNSPEC_FRES))]
1750   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1751   "xvre<sd>p %x0,%x1"
1752   [(set_attr "type" "<VStype_simple>")])
1753
1754 (define_insn "*vsx_neg<mode>2"
1755   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1756         (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1757   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1758   "xvneg<sd>p %x0,%x1"
1759   [(set_attr "type" "<VStype_simple>")])
1760
1761 (define_insn "*vsx_abs<mode>2"
1762   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1763         (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1764   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1765   "xvabs<sd>p %x0,%x1"
1766   [(set_attr "type" "<VStype_simple>")])
1767
1768 (define_insn "vsx_nabs<mode>2"
1769   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1770         (neg:VSX_F
1771          (abs:VSX_F
1772           (match_operand:VSX_F 1 "vsx_register_operand" "wa"))))]
1773   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1774   "xvnabs<sd>p %x0,%x1"
1775   [(set_attr "type" "<VStype_simple>")])
1776
1777 (define_insn "vsx_smax<mode>3"
1778   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1779         (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1780                     (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1781   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1782   "xvmax<sd>p %x0,%x1,%x2"
1783   [(set_attr "type" "<VStype_simple>")])
1784
1785 (define_insn "*vsx_smin<mode>3"
1786   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1787         (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1788                     (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1789   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1790   "xvmin<sd>p %x0,%x1,%x2"
1791   [(set_attr "type" "<VStype_simple>")])
1792
1793 (define_insn "*vsx_sqrt<mode>2"
1794   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1795         (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1796   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1797   "xvsqrt<sd>p %x0,%x1"
1798   [(set_attr "type" "<sd>sqrt")])
1799
1800 (define_insn "*vsx_rsqrte<mode>2"
1801   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1802         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
1803                       UNSPEC_RSQRT))]
1804   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1805   "xvrsqrte<sd>p %x0,%x1"
1806   [(set_attr "type" "<VStype_simple>")])
1807
1808 ;; *tsqrt* returning the fg flag
1809 (define_expand "vsx_tsqrt<mode>2_fg"
1810   [(set (match_dup 2)
1811         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1812                      UNSPEC_VSX_TSQRT))
1813    (set (match_operand:SI 0 "gpc_reg_operand")
1814         (gt:SI (match_dup 2)
1815                (const_int 0)))]
1816   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1817 {
1818   operands[2] = gen_reg_rtx (CCFPmode);
1819 })
1820
1821 ;; *tsqrt* returning the fe flag
1822 (define_expand "vsx_tsqrt<mode>2_fe"
1823   [(set (match_dup 2)
1824         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1825                      UNSPEC_VSX_TSQRT))
1826    (set (match_operand:SI 0 "gpc_reg_operand")
1827         (eq:SI (match_dup 2)
1828                (const_int 0)))]
1829   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1830 {
1831   operands[2] = gen_reg_rtx (CCFPmode);
1832 })
1833
1834 (define_insn "*vsx_tsqrt<mode>2_internal"
1835   [(set (match_operand:CCFP 0 "cc_reg_operand" "=x")
1836         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
1837                      UNSPEC_VSX_TSQRT))]
1838   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1839   "x<VSv>tsqrt<sd>p %0,%x1"
1840   [(set_attr "type" "<VStype_simple>")])
1841
1842 ;; Fused vector multiply/add instructions. Support the classical Altivec
1843 ;; versions of fma, which allows the target to be a separate register from the
1844 ;; 3 inputs.  Under VSX, the target must be either the addend or the first
1845 ;; multiply.
1846
1847 (define_insn "*vsx_fmav4sf4"
1848   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,v")
1849         (fma:V4SF
1850           (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa,v")
1851           (match_operand:V4SF 2 "vsx_register_operand" "wa,0,v")
1852           (match_operand:V4SF 3 "vsx_register_operand" "0,wa,v")))]
1853   "VECTOR_UNIT_VSX_P (V4SFmode)"
1854   "@
1855    xvmaddasp %x0,%x1,%x2
1856    xvmaddmsp %x0,%x1,%x3
1857    vmaddfp %0,%1,%2,%3"
1858   [(set_attr "type" "vecfloat")])
1859
1860 (define_insn "*vsx_fmav2df4"
1861   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa")
1862         (fma:V2DF
1863           (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa")
1864           (match_operand:V2DF 2 "vsx_register_operand" "wa,0")
1865           (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))]
1866   "VECTOR_UNIT_VSX_P (V2DFmode)"
1867   "@
1868    xvmaddadp %x0,%x1,%x2
1869    xvmaddmdp %x0,%x1,%x3"
1870   [(set_attr "type" "vecdouble")])
1871
1872 (define_insn "*vsx_fms<mode>4"
1873   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa")
1874         (fma:VSX_F
1875           (match_operand:VSX_F 1 "vsx_register_operand" "%wa,wa")
1876           (match_operand:VSX_F 2 "vsx_register_operand" "wa,0")
1877           (neg:VSX_F
1878             (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))]
1879   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1880   "@
1881    xvmsuba<sd>p %x0,%x1,%x2
1882    xvmsubm<sd>p %x0,%x1,%x3"
1883   [(set_attr "type" "<VStype_mul>")])
1884
1885 (define_insn "*vsx_nfma<mode>4"
1886   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa")
1887         (neg:VSX_F
1888          (fma:VSX_F
1889           (match_operand:VSX_F 1 "vsx_register_operand" "wa,wa")
1890           (match_operand:VSX_F 2 "vsx_register_operand" "wa,0")
1891           (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))]
1892   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1893   "@
1894    xvnmadda<sd>p %x0,%x1,%x2
1895    xvnmaddm<sd>p %x0,%x1,%x3"
1896   [(set_attr "type" "<VStype_mul>")])
1897
1898 (define_insn "*vsx_nfmsv4sf4"
1899   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,v")
1900         (neg:V4SF
1901          (fma:V4SF
1902            (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa,v")
1903            (match_operand:V4SF 2 "vsx_register_operand" "wa,0,v")
1904            (neg:V4SF
1905              (match_operand:V4SF 3 "vsx_register_operand" "0,wa,v")))))]
1906   "VECTOR_UNIT_VSX_P (V4SFmode)"
1907   "@
1908    xvnmsubasp %x0,%x1,%x2
1909    xvnmsubmsp %x0,%x1,%x3
1910    vnmsubfp %0,%1,%2,%3"
1911   [(set_attr "type" "vecfloat")])
1912
1913 (define_insn "*vsx_nfmsv2df4"
1914   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa")
1915         (neg:V2DF
1916          (fma:V2DF
1917            (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa")
1918            (match_operand:V2DF 2 "vsx_register_operand" "wa,0")
1919            (neg:V2DF
1920              (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))))]
1921   "VECTOR_UNIT_VSX_P (V2DFmode)"
1922   "@
1923    xvnmsubadp %x0,%x1,%x2
1924    xvnmsubmdp %x0,%x1,%x3"
1925   [(set_attr "type" "vecdouble")])
1926
1927 ;; Vector conditional expressions (no scalar version for these instructions)
1928 (define_insn "vsx_eq<mode>"
1929   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1930         (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1931                   (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1932   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1933   "xvcmpeq<sd>p %x0,%x1,%x2"
1934   [(set_attr "type" "<VStype_simple>")])
1935
1936 (define_insn "vsx_gt<mode>"
1937   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1938         (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1939                   (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1940   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1941   "xvcmpgt<sd>p %x0,%x1,%x2"
1942   [(set_attr "type" "<VStype_simple>")])
1943
1944 (define_insn "*vsx_ge<mode>"
1945   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1946         (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1947                   (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1948   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1949   "xvcmpge<sd>p %x0,%x1,%x2"
1950   [(set_attr "type" "<VStype_simple>")])
1951
1952 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
1953 ;; indicate a combined status
1954 (define_insn "*vsx_eq_<mode>_p"
1955   [(set (reg:CC CR6_REGNO)
1956         (unspec:CC
1957          [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1958                  (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
1959          UNSPEC_PREDICATE))
1960    (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1961         (eq:VSX_F (match_dup 1)
1962                   (match_dup 2)))]
1963   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1964   "xvcmpeq<sd>p. %x0,%x1,%x2"
1965   [(set_attr "type" "<VStype_simple>")])
1966
1967 (define_insn "*vsx_gt_<mode>_p"
1968   [(set (reg:CC CR6_REGNO)
1969         (unspec:CC
1970          [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1971                  (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
1972          UNSPEC_PREDICATE))
1973    (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1974         (gt:VSX_F (match_dup 1)
1975                   (match_dup 2)))]
1976   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1977   "xvcmpgt<sd>p. %x0,%x1,%x2"
1978   [(set_attr "type" "<VStype_simple>")])
1979
1980 (define_insn "*vsx_ge_<mode>_p"
1981   [(set (reg:CC CR6_REGNO)
1982         (unspec:CC
1983          [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1984                  (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
1985          UNSPEC_PREDICATE))
1986    (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1987         (ge:VSX_F (match_dup 1)
1988                   (match_dup 2)))]
1989   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1990   "xvcmpge<sd>p. %x0,%x1,%x2"
1991   [(set_attr "type" "<VStype_simple>")])
1992
1993 ;; Vector select
1994 (define_insn "*vsx_xxsel<mode>"
1995   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
1996         (if_then_else:VSX_L
1997          (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
1998                 (match_operand:VSX_L 4 "zero_constant" ""))
1999          (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
2000          (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
2001   "VECTOR_MEM_VSX_P (<MODE>mode)"
2002   "xxsel %x0,%x3,%x2,%x1"
2003   [(set_attr "type" "vecmove")
2004    (set_attr "isa" "<VSisa>")])
2005
2006 (define_insn "*vsx_xxsel<mode>_uns"
2007   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
2008         (if_then_else:VSX_L
2009          (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
2010                    (match_operand:VSX_L 4 "zero_constant" ""))
2011          (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
2012          (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
2013   "VECTOR_MEM_VSX_P (<MODE>mode)"
2014   "xxsel %x0,%x3,%x2,%x1"
2015   [(set_attr "type" "vecmove")
2016    (set_attr "isa" "<VSisa>")])
2017
2018 ;; Copy sign
2019 (define_insn "vsx_copysign<mode>3"
2020   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2021         (unspec:VSX_F
2022          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
2023           (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
2024          UNSPEC_COPYSIGN))]
2025   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2026   "xvcpsgn<sd>p %x0,%x2,%x1"
2027   [(set_attr "type" "<VStype_simple>")])
2028
2029 ;; For the conversions, limit the register class for the integer value to be
2030 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.
2031 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion
2032 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
2033 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md
2034 ;; in allowing virtual registers.
2035 (define_insn "vsx_float<VSi><mode>2"
2036   [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa")
2037         (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))]
2038   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2039   "xvcvsx<VSc><sd>p %x0,%x1"
2040   [(set_attr "type" "<VStype_simple>")])
2041
2042 (define_insn "vsx_floatuns<VSi><mode>2"
2043   [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa")
2044         (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))]
2045   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2046   "xvcvux<VSc><sd>p %x0,%x1"
2047   [(set_attr "type" "<VStype_simple>")])
2048
2049 (define_insn "vsx_fix_trunc<mode><VSi>2"
2050   [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa")
2051         (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))]
2052   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2053   "x<VSv>cv<sd>psx<VSc>s %x0,%x1"
2054   [(set_attr "type" "<VStype_simple>")])
2055
2056 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
2057   [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa")
2058         (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))]
2059   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2060   "x<VSv>cv<sd>pux<VSc>s %x0,%x1"
2061   [(set_attr "type" "<VStype_simple>")])
2062
2063 ;; Math rounding functions
2064 (define_insn "vsx_x<VSv>r<sd>pi"
2065   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2066         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2067                       UNSPEC_VSX_ROUND_I))]
2068   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2069   "x<VSv>r<sd>pi %x0,%x1"
2070   [(set_attr "type" "<VStype_simple>")])
2071
2072 (define_insn "vsx_x<VSv>r<sd>pic"
2073   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2074         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2075                       UNSPEC_VSX_ROUND_IC))]
2076   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2077   "x<VSv>r<sd>pic %x0,%x1"
2078   [(set_attr "type" "<VStype_simple>")])
2079
2080 (define_insn "vsx_btrunc<mode>2"
2081   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2082         (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
2083   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2084   "xvr<sd>piz %x0,%x1"
2085   [(set_attr "type" "<VStype_simple>")])
2086
2087 (define_insn "*vsx_b2trunc<mode>2"
2088   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2089         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2090                       UNSPEC_FRIZ))]
2091   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2092   "x<VSv>r<sd>piz %x0,%x1"
2093   [(set_attr "type" "<VStype_simple>")])
2094
2095 (define_insn "vsx_floor<mode>2"
2096   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2097         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
2098                       UNSPEC_FRIM))]
2099   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2100   "xvr<sd>pim %x0,%x1"
2101   [(set_attr "type" "<VStype_simple>")])
2102
2103 (define_insn "vsx_ceil<mode>2"
2104   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2105         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
2106                       UNSPEC_FRIP))]
2107   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2108   "xvr<sd>pip %x0,%x1"
2109   [(set_attr "type" "<VStype_simple>")])
2110
2111 \f
2112 ;; VSX convert to/from double vector
2113
2114 ;; Convert between single and double precision
2115 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
2116 ;; scalar single precision instructions internally use the double format.
2117 ;; Prefer the altivec registers, since we likely will need to do a vperm
2118 (define_insn "vsx_xscvdpsp"
2119   [(set (match_operand:V4SF 0 "vsx_register_operand" "=f,?wa")
2120         (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "f,wa")]
2121                               UNSPEC_VSX_CVSPDP))]
2122   "VECTOR_UNIT_VSX_P (DFmode)"
2123   "xscvdpsp %x0,%x1"
2124   [(set_attr "type" "fp")])
2125
2126 (define_insn "vsx_xvcvspdp_be"
2127   [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa")
2128      (float_extend:V2DF
2129        (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2130          (parallel [(const_int 0) (const_int 2)]))))]
2131   "VECTOR_UNIT_VSX_P (V4SFmode) && BYTES_BIG_ENDIAN"
2132   "xvcvspdp %x0,%x1"
2133   [(set_attr "type" "vecdouble")])
2134
2135 (define_insn "vsx_xvcvspdp_le"
2136   [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa")
2137      (float_extend:V2DF
2138        (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2139          (parallel [(const_int 1) (const_int 3)]))))]
2140   "VECTOR_UNIT_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
2141   "xvcvspdp %x0,%x1"
2142   [(set_attr "type" "vecdouble")])
2143
2144 (define_expand "vsx_xvcvspdp"
2145   [(match_operand:V2DF 0 "vsx_register_operand")
2146    (match_operand:V4SF 1 "vsx_register_operand")]
2147   "VECTOR_UNIT_VSX_P (V4SFmode)"
2148 {
2149   if (BYTES_BIG_ENDIAN)
2150     emit_insn (gen_vsx_xvcvspdp_be (operands[0], operands[1]));
2151   else
2152     emit_insn (gen_vsx_xvcvspdp_le (operands[0], operands[1]));
2153   DONE;
2154 })
2155
2156 (define_insn "vsx_xvcvdpsp"
2157   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,?wa")
2158         (unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "v,wa")]
2159                               UNSPEC_VSX_CVSPDP))]
2160   "VECTOR_UNIT_VSX_P (V2DFmode)"
2161   "xvcvdpsp %x0,%x1"
2162   [(set_attr "type" "vecdouble")])
2163
2164 ;; xscvspdp, represent the scalar SF type as V4SF
2165 (define_insn "vsx_xscvspdp"
2166   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2167         (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2168                    UNSPEC_VSX_CVSPDP))]
2169   "VECTOR_UNIT_VSX_P (V4SFmode)"
2170   "xscvspdp %x0,%x1"
2171   [(set_attr "type" "fp")])
2172
2173 ;; Same as vsx_xscvspdp, but use SF as the type
2174 (define_insn "vsx_xscvspdp_scalar2"
2175   [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2176         (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2177                    UNSPEC_VSX_CVSPDP))]
2178   "VECTOR_UNIT_VSX_P (V4SFmode)"
2179   "xscvspdp %x0,%x1"
2180   [(set_attr "type" "fp")])
2181
2182 ;; Generate xvcvhpsp instruction
2183 (define_insn "vsx_xvcvhpsp"
2184   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2185         (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")]
2186                      UNSPEC_VSX_CVHPSP))]
2187   "TARGET_P9_VECTOR"
2188   "xvcvhpsp %x0,%x1"
2189   [(set_attr "type" "vecfloat")])
2190
2191 ;; Generate xvcvsphp
2192 (define_insn "vsx_xvcvsphp"
2193   [(set (match_operand:V4SI 0 "register_operand" "=wa")
2194         (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2195                      UNSPEC_VSX_XVCVSPHP))]
2196   "TARGET_P9_VECTOR"
2197   "xvcvsphp %x0,%x1"
2198 [(set_attr "type" "vecfloat")])
2199
2200 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
2201 ;; format of scalars is actually DF.
2202 (define_insn "vsx_xscvdpsp_scalar"
2203   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2204         (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2205                      UNSPEC_VSX_CVSPDP))]
2206   "VECTOR_UNIT_VSX_P (V4SFmode)"
2207   "xscvdpsp %x0,%x1"
2208   [(set_attr "type" "fp")])
2209
2210 ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
2211 (define_insn "vsx_xscvdpspn"
2212   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2213         (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wa")]
2214                      UNSPEC_VSX_CVDPSPN))]
2215   "TARGET_XSCVDPSPN"
2216   "xscvdpspn %x0,%x1"
2217   [(set_attr "type" "fp")])
2218
2219 (define_insn "vsx_xscvspdpn"
2220   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2221         (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2222                    UNSPEC_VSX_CVSPDPN))]
2223   "TARGET_XSCVSPDPN"
2224   "xscvspdpn %x0,%x1"
2225   [(set_attr "type" "fp")])
2226
2227 (define_insn "vsx_xscvdpspn_scalar"
2228   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2229         (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2230                      UNSPEC_VSX_CVDPSPN))]
2231   "TARGET_XSCVDPSPN"
2232   "xscvdpspn %x0,%x1"
2233   [(set_attr "type" "fp")])
2234
2235 ;; Used by direct move to move a SFmode value from GPR to VSX register
2236 (define_insn "vsx_xscvspdpn_directmove"
2237   [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2238         (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2239                    UNSPEC_VSX_CVSPDPN))]
2240   "TARGET_XSCVSPDPN"
2241   "xscvspdpn %x0,%x1"
2242   [(set_attr "type" "fp")])
2243
2244 ;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
2245
2246 (define_insn "vsx_xvcv<su>xwsp"
2247   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2248      (any_float:V4SF (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
2249   "VECTOR_UNIT_VSX_P (V4SFmode)"
2250   "xvcv<su>xwsp %x0,%x1"
2251   [(set_attr "type" "vecfloat")])
2252
2253 (define_insn "vsx_xvcv<su>xddp"
2254   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2255         (any_float:V2DF (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
2256   "VECTOR_UNIT_VSX_P (V2DFmode)"
2257   "xvcv<su>xddp %x0,%x1"
2258   [(set_attr "type" "vecdouble")])
2259
2260 (define_insn "vsx_xvcvsp<su>xws"
2261   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2262         (any_fix:V4SI (match_operand:V4SF 1 "vsx_register_operand" "wa")))]
2263   "VECTOR_UNIT_VSX_P (V4SFmode)"
2264   "xvcvsp<su>xws %x0,%x1"
2265   [(set_attr "type" "vecfloat")])
2266
2267 (define_insn "vsx_xvcvdp<su>xds"
2268   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2269         (any_fix:V2DI (match_operand:V2DF 1 "vsx_register_operand" "wa")))]
2270   "VECTOR_UNIT_VSX_P (V2DFmode)"
2271   "xvcvdp<su>xds %x0,%x1"
2272   [(set_attr "type" "vecdouble")])
2273
2274 (define_expand "vsx_xvcvsxddp_scale"
2275   [(match_operand:V2DF 0 "vsx_register_operand")
2276    (match_operand:V2DI 1 "vsx_register_operand")
2277    (match_operand:QI 2 "immediate_operand")]
2278   "VECTOR_UNIT_VSX_P (V2DFmode)"
2279 {
2280   rtx op0 = operands[0];
2281   rtx op1 = operands[1];
2282   int scale = INTVAL(operands[2]);
2283   emit_insn (gen_vsx_xvcvsxddp (op0, op1));
2284   if (scale != 0)
2285     rs6000_scale_v2df (op0, op0, -scale);
2286   DONE;
2287 })
2288
2289 (define_expand "vsx_xvcvuxddp_scale"
2290   [(match_operand:V2DF 0 "vsx_register_operand")
2291    (match_operand:V2DI 1 "vsx_register_operand")
2292    (match_operand:QI 2 "immediate_operand")]
2293   "VECTOR_UNIT_VSX_P (V2DFmode)"
2294 {
2295   rtx op0 = operands[0];
2296   rtx op1 = operands[1];
2297   int scale = INTVAL(operands[2]);
2298   emit_insn (gen_vsx_xvcvuxddp (op0, op1));
2299   if (scale != 0)
2300     rs6000_scale_v2df (op0, op0, -scale);
2301   DONE;
2302 })
2303
2304 (define_expand "vsx_xvcvdpsxds_scale"
2305   [(match_operand:V2DI 0 "vsx_register_operand")
2306    (match_operand:V2DF 1 "vsx_register_operand")
2307    (match_operand:QI 2 "immediate_operand")]
2308   "VECTOR_UNIT_VSX_P (V2DFmode)"
2309 {
2310   rtx op0 = operands[0];
2311   rtx op1 = operands[1];
2312   rtx tmp;
2313   int scale = INTVAL (operands[2]);
2314   if (scale == 0)
2315     tmp = op1;
2316   else
2317     {
2318       tmp  = gen_reg_rtx (V2DFmode);
2319       rs6000_scale_v2df (tmp, op1, scale);
2320     }
2321   emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
2322   DONE;
2323 })
2324
2325 ;; convert vector of 64-bit floating point numbers to vector of
2326 ;; 64-bit unsigned integer
2327 (define_expand "vsx_xvcvdpuxds_scale"
2328   [(match_operand:V2DI 0 "vsx_register_operand")
2329    (match_operand:V2DF 1 "vsx_register_operand")
2330    (match_operand:QI 2 "immediate_operand")]
2331   "VECTOR_UNIT_VSX_P (V2DFmode)"
2332 {
2333   rtx op0 = operands[0];
2334   rtx op1 = operands[1];
2335   rtx tmp;
2336   int scale = INTVAL (operands[2]);
2337   if (scale == 0)
2338     tmp = op1;
2339   else
2340     {
2341       tmp = gen_reg_rtx (V2DFmode);
2342       rs6000_scale_v2df (tmp, op1, scale);
2343     }
2344   emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
2345   DONE;
2346 })
2347
2348 ;; Convert from 64-bit to 32-bit types
2349 ;; Note, favor the Altivec registers since the usual use of these instructions
2350 ;; is in vector converts and we need to use the Altivec vperm instruction.
2351
2352 (define_insn "vsx_xvcvdpsxws"
2353   [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2354         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")]
2355                      UNSPEC_VSX_CVDPSXWS))]
2356   "VECTOR_UNIT_VSX_P (V2DFmode)"
2357   "xvcvdpsxws %x0,%x1"
2358   [(set_attr "type" "vecdouble")])
2359
2360 (define_insn "vsx_xvcvdpuxws"
2361   [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2362         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")]
2363                      UNSPEC_VSX_CVDPUXWS))]
2364   "VECTOR_UNIT_VSX_P (V2DFmode)"
2365   "xvcvdpuxws %x0,%x1"
2366   [(set_attr "type" "vecdouble")])
2367
2368 (define_insn "vsx_xvcvsxdsp"
2369   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2370         (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2371                      UNSPEC_VSX_CVSXDSP))]
2372   "VECTOR_UNIT_VSX_P (V2DFmode)"
2373   "xvcvsxdsp %x0,%x1"
2374   [(set_attr "type" "vecfloat")])
2375
2376 (define_insn "vsx_xvcvuxdsp"
2377   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2378         (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2379                      UNSPEC_VSX_CVUXDSP))]
2380   "VECTOR_UNIT_VSX_P (V2DFmode)"
2381   "xvcvuxdsp %x0,%x1"
2382   [(set_attr "type" "vecdouble")])
2383
2384 ;; Convert vector of 32-bit signed/unsigned integers to vector of
2385 ;; 64-bit floating point numbers.
2386 (define_insn "vsx_xvcv<su>xwdp_be"
2387   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2388      (any_float:V2DF
2389        (vec_select:V2SI (match_operand:V4SI 1 "vsx_register_operand" "wa")
2390          (parallel [(const_int 0) (const_int 2)]))))]
2391   "VECTOR_UNIT_VSX_P (V2DFmode) && BYTES_BIG_ENDIAN"
2392   "xvcv<su>xwdp %x0,%x1"
2393   [(set_attr "type" "vecdouble")])
2394
2395 (define_insn "vsx_xvcv<su>xwdp_le"
2396   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2397      (any_float:V2DF
2398        (vec_select:V2SI (match_operand:V4SI 1 "vsx_register_operand" "wa")
2399          (parallel [(const_int 1) (const_int 3)]))))]
2400   "VECTOR_UNIT_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
2401   "xvcv<su>xwdp %x0,%x1"
2402   [(set_attr "type" "vecdouble")])
2403
2404 (define_expand "vsx_xvcv<su>xwdp"
2405   [(match_operand:V2DF 0 "vsx_register_operand")
2406    (match_operand:V4SI 1 "vsx_register_operand")
2407    (any_float (pc))]
2408   "VECTOR_UNIT_VSX_P (V2DFmode)"
2409 {
2410   if (BYTES_BIG_ENDIAN)
2411     emit_insn (gen_vsx_xvcv<su>xwdp_be (operands[0], operands[1]));
2412   else
2413     emit_insn (gen_vsx_xvcv<su>xwdp_le (operands[0], operands[1]));
2414   DONE;
2415 })
2416
2417 (define_insn "vsx_xvcvsxwdp_df"
2418   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2419         (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2420                    UNSPEC_VSX_CVSXWDP))]
2421   "TARGET_VSX"
2422   "xvcvsxwdp %x0,%x1"
2423   [(set_attr "type" "vecdouble")])
2424
2425 (define_insn "vsx_xvcvuxwdp_df"
2426   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2427         (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2428                    UNSPEC_VSX_CVUXWDP))]
2429   "TARGET_VSX"
2430   "xvcvuxwdp %x0,%x1"
2431   [(set_attr "type" "vecdouble")])
2432
2433 ;; Convert vector of 32-bit floating point numbers to vector of
2434 ;; 64-bit signed/unsigned integers.
2435 (define_insn "vsx_xvcvsp<su>xds_be"
2436   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2437      (any_fix:V2DI
2438        (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2439          (parallel [(const_int 0) (const_int 2)]))))]
2440   "VECTOR_UNIT_VSX_P (V2DFmode) && BYTES_BIG_ENDIAN"
2441   "xvcvsp<su>xds %x0,%x1"
2442   [(set_attr "type" "vecdouble")])
2443
2444 (define_insn "vsx_xvcvsp<su>xds_le"
2445   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2446      (any_fix:V2DI
2447        (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2448          (parallel [(const_int 1) (const_int 3)]))))]
2449   "VECTOR_UNIT_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
2450   "xvcvsp<su>xds %x0,%x1"
2451   [(set_attr "type" "vecdouble")])
2452
2453 (define_expand "vsx_xvcvsp<su>xds"
2454   [(match_operand:V2DI 0 "vsx_register_operand")
2455    (match_operand:V4SF 1 "vsx_register_operand")
2456    (any_fix (pc))]
2457   "VECTOR_UNIT_VSX_P (V2DFmode)"
2458 {
2459   if (BYTES_BIG_ENDIAN)
2460     emit_insn (gen_vsx_xvcvsp<su>xds_be (operands[0], operands[1]));
2461   else
2462     emit_insn (gen_vsx_xvcvsp<su>xds_le (operands[0], operands[1]));
2463   DONE;
2464 })
2465
2466 ;; Generate float2 double
2467 ;; convert two double to float
2468 (define_expand "float2_v2df"
2469   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2470    (use (match_operand:V2DF 1 "register_operand" "wa"))
2471    (use (match_operand:V2DF 2 "register_operand" "wa"))]
2472  "VECTOR_UNIT_VSX_P (V4SFmode)"
2473 {
2474   rtx rtx_src1, rtx_src2, rtx_dst;
2475
2476   rtx_dst = operands[0];
2477   rtx_src1 = operands[1];
2478   rtx_src2 = operands[2];
2479
2480   rs6000_generate_float2_double_code (rtx_dst, rtx_src1, rtx_src2);
2481   DONE;
2482 })
2483
2484 ;; Generate float2
2485 ;; convert two long long signed ints to float
2486 (define_expand "float2_v2di"
2487   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2488    (use (match_operand:V2DI 1 "register_operand" "wa"))
2489    (use (match_operand:V2DI 2 "register_operand" "wa"))]
2490  "VECTOR_UNIT_VSX_P (V4SFmode)"
2491 {
2492   rtx rtx_src1, rtx_src2, rtx_dst;
2493
2494   rtx_dst = operands[0];
2495   rtx_src1 = operands[1];
2496   rtx_src2 = operands[2];
2497
2498   rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2499   DONE;
2500 })
2501
2502 ;; Generate uns_float2
2503 ;; convert two long long unsigned ints to float
2504 (define_expand "uns_float2_v2di"
2505   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2506    (use (match_operand:V2DI 1 "register_operand" "wa"))
2507    (use (match_operand:V2DI 2 "register_operand" "wa"))]
2508  "VECTOR_UNIT_VSX_P (V4SFmode)"
2509 {
2510   rtx rtx_src1, rtx_src2, rtx_dst;
2511
2512   rtx_dst = operands[0];
2513   rtx_src1 = operands[1];
2514   rtx_src2 = operands[2];
2515
2516   rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2517   DONE;
2518 })
2519
2520 ;; Generate floate
2521 ;; convert  double or long long signed to float
2522 ;; (Only even words are valid, BE numbering)
2523 (define_expand "floate<mode>"
2524   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2525    (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2526   "VECTOR_UNIT_VSX_P (V4SFmode)"
2527 {
2528   if (BYTES_BIG_ENDIAN)
2529     {
2530       /* Shift left one word to put even word correct location */
2531       rtx rtx_tmp;
2532       rtx rtx_val = GEN_INT (4);
2533
2534       rtx_tmp = gen_reg_rtx (V4SFmode);
2535       emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2536       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2537                  rtx_tmp, rtx_tmp, rtx_val));
2538     }
2539   else
2540     emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2541
2542   DONE;
2543 })
2544
2545 ;; Generate uns_floate
2546 ;; convert long long unsigned to float
2547 ;; (Only even words are valid, BE numbering)
2548 (define_expand "unsfloatev2di"
2549   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2550    (use (match_operand:V2DI 1 "register_operand" "wa"))]
2551   "VECTOR_UNIT_VSX_P (V4SFmode)"
2552 {
2553   if (BYTES_BIG_ENDIAN)
2554     {
2555       /* Shift left one word to put even word correct location */
2556       rtx rtx_tmp;
2557       rtx rtx_val = GEN_INT (4);
2558
2559       rtx_tmp = gen_reg_rtx (V4SFmode);
2560       emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2561       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2562                  rtx_tmp, rtx_tmp, rtx_val));
2563     }
2564   else
2565     emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2566
2567   DONE;
2568 })
2569
2570 ;; Generate floato
2571 ;; convert double or long long signed to float
2572 ;; Only odd words are valid, BE numbering)
2573 (define_expand "floato<mode>"
2574   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2575    (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2576   "VECTOR_UNIT_VSX_P (V4SFmode)"
2577 {
2578   if (BYTES_BIG_ENDIAN)
2579     emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2580   else
2581     {
2582       /* Shift left one word to put odd word correct location */
2583       rtx rtx_tmp;
2584       rtx rtx_val = GEN_INT (4);
2585
2586       rtx_tmp = gen_reg_rtx (V4SFmode);
2587       emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2588       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2589                  rtx_tmp, rtx_tmp, rtx_val));
2590     }
2591   DONE;
2592 })
2593
2594 ;; Generate uns_floato
2595 ;; convert long long unsigned to float
2596 ;; (Only odd words are valid, BE numbering)
2597 (define_expand "unsfloatov2di"
2598  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2599   (use (match_operand:V2DI 1 "register_operand" "wa"))]
2600  "VECTOR_UNIT_VSX_P (V4SFmode)"
2601 {
2602   if (BYTES_BIG_ENDIAN)
2603     emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2604   else
2605     {
2606       /* Shift left one word to put odd word correct location */
2607       rtx rtx_tmp;
2608       rtx rtx_val = GEN_INT (4);
2609
2610       rtx_tmp = gen_reg_rtx (V4SFmode);
2611       emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2612       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2613                  rtx_tmp, rtx_tmp, rtx_val));
2614     }
2615   DONE;
2616 })
2617
2618 ;; Generate vsigned2
2619 ;; convert two double float vectors to a vector of single precision ints
2620 (define_expand "vsigned2_v2df"
2621   [(match_operand:V4SI 0 "register_operand" "=wa")
2622    (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "wa")
2623                  (match_operand:V2DF 2 "register_operand" "wa")]
2624   UNSPEC_VSX_VSIGNED2)]
2625   "TARGET_VSX"
2626 {
2627   rtx rtx_src1, rtx_src2, rtx_dst;
2628   bool signed_convert=true;
2629
2630   rtx_dst = operands[0];
2631   rtx_src1 = operands[1];
2632   rtx_src2 = operands[2];
2633
2634   rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2635   DONE;
2636 })
2637
2638 ;; Generate vsignedo_v2df
2639 ;; signed double float to int convert odd word
2640 (define_expand "vsignedo_v2df"
2641   [(set (match_operand:V4SI 0 "register_operand" "=wa")
2642         (match_operand:V2DF 1 "register_operand" "wa"))]
2643   "TARGET_VSX"
2644 {
2645   if (BYTES_BIG_ENDIAN)
2646     {
2647       rtx rtx_tmp;
2648       rtx rtx_val = GEN_INT (12);
2649       rtx_tmp = gen_reg_rtx (V4SImode);
2650
2651       emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2652
2653       /* Big endian word numbering for words in operand is 0 1 2 3.
2654          take (operand[1] operand[1]) and shift left one word
2655          0 1 2 3    0 1 2 3  =>  1 2 3 0
2656          Words 1 and 3 are now are now where they need to be for result.  */
2657
2658       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2659                  rtx_tmp, rtx_val));
2660     }
2661   else
2662     /* Little endian word numbering for operand is 3 2 1 0.
2663        Result words 3 and 1 are where they need to be.  */
2664     emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2665
2666   DONE;
2667 }
2668   [(set_attr "type" "veccomplex")])
2669
2670 ;; Generate vsignede_v2df
2671 ;; signed double float to int even word
2672 (define_expand "vsignede_v2df"
2673   [(set (match_operand:V4SI 0 "register_operand" "=v")
2674         (match_operand:V2DF 1 "register_operand" "v"))]
2675   "TARGET_VSX"
2676 {
2677   if (BYTES_BIG_ENDIAN)
2678     /* Big endian word numbering for words in operand is 0 1
2679        Result words 0 is where they need to be.  */
2680     emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2681
2682   else
2683     {
2684       rtx rtx_tmp;
2685       rtx rtx_val = GEN_INT (12);
2686       rtx_tmp = gen_reg_rtx (V4SImode);
2687
2688       emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2689
2690       /* Little endian word numbering for operand is 3 2 1 0.
2691          take (operand[1] operand[1]) and shift left three words
2692          0 1 2 3   0 1 2 3  =>  3 0 1 2
2693          Words 0 and 2 are now where they need to be for the result.  */
2694       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2695                  rtx_tmp, rtx_val));
2696     }
2697   DONE;
2698 }
2699   [(set_attr "type" "veccomplex")])
2700
2701 ;; Generate unsigned2
2702 ;; convert two double float vectors to a vector of single precision
2703 ;; unsigned ints
2704 (define_expand "vunsigned2_v2df"
2705 [(match_operand:V4SI 0 "register_operand" "=v")
2706  (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "v")
2707                (match_operand:V2DF 2 "register_operand" "v")]
2708               UNSPEC_VSX_VSIGNED2)]
2709  "TARGET_VSX"
2710 {
2711   rtx rtx_src1, rtx_src2, rtx_dst;
2712   bool signed_convert=false;
2713
2714   rtx_dst = operands[0];
2715   rtx_src1 = operands[1];
2716   rtx_src2 = operands[2];
2717
2718   rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2719   DONE;
2720 })
2721
2722 ;; Generate vunsignedo_v2df
2723 ;; unsigned double float to int convert odd word
2724 (define_expand "vunsignedo_v2df"
2725   [(set (match_operand:V4SI 0 "register_operand" "=v")
2726         (match_operand:V2DF 1 "register_operand" "v"))]
2727   "TARGET_VSX"
2728 {
2729   if (BYTES_BIG_ENDIAN)
2730     {
2731       rtx rtx_tmp;
2732       rtx rtx_val = GEN_INT (12);
2733       rtx_tmp = gen_reg_rtx (V4SImode);
2734
2735       emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2736
2737       /* Big endian word numbering for words in operand is 0 1 2 3.
2738          take (operand[1] operand[1]) and shift left one word
2739          0 1 2 3    0 1 2 3  =>  1 2 3 0
2740          Words 1 and 3 are now are now where they need to be for result.  */
2741
2742       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2743                  rtx_tmp, rtx_val));
2744     }
2745   else
2746     /* Little endian word numbering for operand is 3 2 1 0.
2747        Result words 3 and 1 are where they need to be.  */
2748     emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2749
2750   DONE;
2751 }
2752   [(set_attr "type" "veccomplex")])
2753
2754 ;; Generate vunsignede_v2df
2755 ;; unsigned double float to int even word
2756 (define_expand "vunsignede_v2df"
2757   [(set (match_operand:V4SI 0 "register_operand" "=v")
2758         (match_operand:V2DF 1 "register_operand" "v"))]
2759   "TARGET_VSX"
2760 {
2761   if (BYTES_BIG_ENDIAN)
2762     /* Big endian word numbering for words in operand is 0 1
2763        Result words 0 is where they need to be.  */
2764     emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2765
2766   else
2767     {
2768       rtx rtx_tmp;
2769       rtx rtx_val = GEN_INT (12);
2770       rtx_tmp = gen_reg_rtx (V4SImode);
2771
2772       emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2773
2774       /* Little endian word numbering for operand is 3 2 1 0.
2775          take (operand[1] operand[1]) and shift left three words
2776          0 1 2 3   0 1 2 3  =>  3 0 1 2
2777          Words 0 and 2 are now where they need to be for the result.  */
2778       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2779                  rtx_tmp, rtx_val));
2780     }
2781   DONE;
2782 }
2783   [(set_attr "type" "veccomplex")])
2784
2785 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
2786 ;; since the xvrdpiz instruction does not truncate the value if the floating
2787 ;; point value is < LONG_MIN or > LONG_MAX.
2788 (define_insn "*vsx_float_fix_v2df2"
2789   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,?wa")
2790         (float:V2DF
2791          (fix:V2DI
2792           (match_operand:V2DF 1 "vsx_register_operand" "wa,?wa"))))]
2793   "TARGET_HARD_FLOAT
2794    && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
2795    && !flag_trapping_math && TARGET_FRIZ"
2796   "xvrdpiz %x0,%x1"
2797   [(set_attr "type" "vecdouble")])
2798
2799 \f
2800 ;; Permute operations
2801
2802 ;; Build a V2DF/V2DI vector from two scalars
2803 (define_insn "vsx_concat_<mode>"
2804   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
2805         (vec_concat:VSX_D
2806          (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")
2807          (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))]
2808   "VECTOR_MEM_VSX_P (<MODE>mode)"
2809 {
2810   if (which_alternative == 0)
2811     return (BYTES_BIG_ENDIAN
2812             ? "xxpermdi %x0,%x1,%x2,0"
2813             : "xxpermdi %x0,%x2,%x1,0");
2814
2815   else if (which_alternative == 1)
2816     return (BYTES_BIG_ENDIAN
2817             ? "mtvsrdd %x0,%1,%2"
2818             : "mtvsrdd %x0,%2,%1");
2819
2820   else
2821     gcc_unreachable ();
2822 }
2823   [(set_attr "type" "vecperm")])
2824
2825 ;; Combiner patterns to allow creating XXPERMDI's to access either double
2826 ;; word element in a vector register.
2827 (define_insn "*vsx_concat_<mode>_1"
2828   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2829         (vec_concat:VSX_D
2830          (vec_select:<VS_scalar>
2831           (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2832           (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2833          (match_operand:<VS_scalar> 3 "gpc_reg_operand" "wa")))]
2834   "VECTOR_MEM_VSX_P (<MODE>mode)"
2835 {
2836   HOST_WIDE_INT dword = INTVAL (operands[2]);
2837   if (BYTES_BIG_ENDIAN)
2838     {
2839       operands[4] = GEN_INT (2*dword);
2840       return "xxpermdi %x0,%x1,%x3,%4";
2841     }
2842   else
2843     {
2844       operands[4] = GEN_INT (!dword);
2845       return "xxpermdi %x0,%x3,%x1,%4";
2846     }
2847 }
2848   [(set_attr "type" "vecperm")])
2849
2850 (define_insn "*vsx_concat_<mode>_2"
2851   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2852         (vec_concat:VSX_D
2853          (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa")
2854          (vec_select:<VS_scalar>
2855           (match_operand:VSX_D 2 "gpc_reg_operand" "wa")
2856           (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))]
2857   "VECTOR_MEM_VSX_P (<MODE>mode)"
2858 {
2859   HOST_WIDE_INT dword = INTVAL (operands[3]);
2860   if (BYTES_BIG_ENDIAN)
2861     {
2862       operands[4] = GEN_INT (dword);
2863       return "xxpermdi %x0,%x1,%x2,%4";
2864     }
2865   else
2866     {
2867       operands[4] = GEN_INT (2 * !dword);
2868       return "xxpermdi %x0,%x2,%x1,%4";
2869     }
2870 }
2871   [(set_attr "type" "vecperm")])
2872
2873 (define_insn "*vsx_concat_<mode>_3"
2874   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2875         (vec_concat:VSX_D
2876          (vec_select:<VS_scalar>
2877           (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2878           (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2879          (vec_select:<VS_scalar>
2880           (match_operand:VSX_D 3 "gpc_reg_operand" "wa")
2881           (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))]
2882   "VECTOR_MEM_VSX_P (<MODE>mode)"
2883 {
2884   HOST_WIDE_INT dword1 = INTVAL (operands[2]);
2885   HOST_WIDE_INT dword2 = INTVAL (operands[4]);
2886   if (BYTES_BIG_ENDIAN)
2887     {
2888       operands[5] = GEN_INT ((2 * dword1) + dword2);
2889       return "xxpermdi %x0,%x1,%x3,%5";
2890     }
2891   else
2892     {
2893       operands[5] = GEN_INT ((2 * !dword2) + !dword1);
2894       return "xxpermdi %x0,%x3,%x1,%5";
2895     }
2896 }
2897   [(set_attr "type" "vecperm")])
2898
2899 ;; Special purpose concat using xxpermdi to glue two single precision values
2900 ;; together, relying on the fact that internally scalar floats are represented
2901 ;; as doubles.  This is used to initialize a V4SF vector with 4 floats
2902 (define_insn "vsx_concat_v2sf"
2903   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2904         (unspec:V2DF
2905          [(match_operand:SF 1 "vsx_register_operand" "wa")
2906           (match_operand:SF 2 "vsx_register_operand" "wa")]
2907          UNSPEC_VSX_CONCAT))]
2908   "VECTOR_MEM_VSX_P (V2DFmode)"
2909 {
2910   if (BYTES_BIG_ENDIAN)
2911     return "xxpermdi %x0,%x1,%x2,0";
2912   else
2913     return "xxpermdi %x0,%x2,%x1,0";
2914 }
2915   [(set_attr "type" "vecperm")])
2916
2917 ;; Concatenate 4 SImode elements into a V4SImode reg.
2918 (define_expand "vsx_init_v4si"
2919   [(use (match_operand:V4SI 0 "gpc_reg_operand"))
2920    (use (match_operand:SI 1 "gpc_reg_operand"))
2921    (use (match_operand:SI 2 "gpc_reg_operand"))
2922    (use (match_operand:SI 3 "gpc_reg_operand"))
2923    (use (match_operand:SI 4 "gpc_reg_operand"))]
2924    "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
2925 {
2926   rtx a = gen_reg_rtx (DImode);
2927   rtx b = gen_reg_rtx (DImode);
2928   rtx c = gen_reg_rtx (DImode);
2929   rtx d = gen_reg_rtx (DImode);
2930   emit_insn (gen_zero_extendsidi2 (a, operands[1]));
2931   emit_insn (gen_zero_extendsidi2 (b, operands[2]));
2932   emit_insn (gen_zero_extendsidi2 (c, operands[3]));
2933   emit_insn (gen_zero_extendsidi2 (d, operands[4]));
2934   if (!BYTES_BIG_ENDIAN)
2935     {
2936       std::swap (a, b);
2937       std::swap (c, d);
2938     }
2939
2940   rtx aa = gen_reg_rtx (DImode);
2941   rtx ab = gen_reg_rtx (DImode);
2942   rtx cc = gen_reg_rtx (DImode);
2943   rtx cd = gen_reg_rtx (DImode);
2944   emit_insn (gen_ashldi3 (aa, a, GEN_INT (32)));
2945   emit_insn (gen_ashldi3 (cc, c, GEN_INT (32)));
2946   emit_insn (gen_iordi3 (ab, aa, b));
2947   emit_insn (gen_iordi3 (cd, cc, d));
2948
2949   rtx abcd = gen_reg_rtx (V2DImode);
2950   emit_insn (gen_vsx_concat_v2di (abcd, ab, cd));
2951   emit_move_insn (operands[0], gen_lowpart (V4SImode, abcd));
2952   DONE;
2953 })
2954
2955 ;; xxpermdi for little endian loads and stores.  We need several of
2956 ;; these since the form of the PARALLEL differs by mode.
2957 (define_insn "*vsx_xxpermdi2_le_<mode>"
2958   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2959         (vec_select:VSX_D
2960           (match_operand:VSX_D 1 "vsx_register_operand" "wa")
2961           (parallel [(const_int 1) (const_int 0)])))]
2962   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
2963   "xxpermdi %x0,%x1,%x1,2"
2964   [(set_attr "type" "vecperm")])
2965
2966 (define_insn "xxswapd_v16qi"
2967   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
2968         (vec_select:V16QI
2969           (match_operand:V16QI 1 "vsx_register_operand" "wa")
2970           (parallel [(const_int 8) (const_int 9)
2971                      (const_int 10) (const_int 11)
2972                      (const_int 12) (const_int 13)
2973                      (const_int 14) (const_int 15)
2974                      (const_int 0) (const_int 1)
2975                      (const_int 2) (const_int 3)
2976                      (const_int 4) (const_int 5)
2977                      (const_int 6) (const_int 7)])))]
2978   "TARGET_VSX"
2979 ;; AIX does not support the extended mnemonic xxswapd.  Use the basic
2980 ;; mnemonic xxpermdi instead.
2981   "xxpermdi %x0,%x1,%x1,2"
2982   [(set_attr "type" "vecperm")])
2983
2984 (define_insn "xxswapd_v8hi"
2985   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
2986         (vec_select:V8HI
2987           (match_operand:V8HI 1 "vsx_register_operand" "wa")
2988           (parallel [(const_int 4) (const_int 5)
2989                      (const_int 6) (const_int 7)
2990                      (const_int 0) (const_int 1)
2991                      (const_int 2) (const_int 3)])))]
2992   "TARGET_VSX"
2993 ;; AIX does not support the extended mnemonic xxswapd.  Use the basic
2994 ;; mnemonic xxpermdi instead.
2995   "xxpermdi %x0,%x1,%x1,2"
2996   [(set_attr "type" "vecperm")])
2997
2998 (define_insn "xxswapd_<mode>"
2999   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
3000         (vec_select:VSX_W
3001           (match_operand:VSX_W 1 "vsx_register_operand" "wa")
3002           (parallel [(const_int 2) (const_int 3)
3003                      (const_int 0) (const_int 1)])))]
3004   "TARGET_VSX"
3005 ;; AIX does not support extended mnemonic xxswapd.  Use the basic
3006 ;; mnemonic xxpermdi instead.
3007   "xxpermdi %x0,%x1,%x1,2"
3008   [(set_attr "type" "vecperm")])
3009
3010 (define_insn "xxswapd_<mode>"
3011   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3012         (vec_select:VSX_D
3013           (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3014           (parallel [(const_int 1) (const_int 0)])))]
3015   "TARGET_VSX"
3016 ;; AIX does not support extended mnemonic xxswapd.  Use the basic
3017 ;; mnemonic xxpermdi instead.
3018   "xxpermdi %x0,%x1,%x1,2"
3019   [(set_attr "type" "vecperm")])
3020
3021 (define_insn "xxgenpcvm_<mode>_internal"
3022   [(set (match_operand:VSX_EXTRACT_I4 0 "altivec_register_operand" "=wa")
3023         (unspec:VSX_EXTRACT_I4
3024          [(match_operand:VSX_EXTRACT_I4 1 "altivec_register_operand" "v")
3025           (match_operand:QI 2 "const_0_to_3_operand" "n")]
3026          UNSPEC_XXGENPCV))]
3027     "TARGET_POWER10 && TARGET_64BIT"
3028     "xxgenpcv<wd>m %x0,%1,%2"
3029     [(set_attr "type" "vecsimple")])
3030
3031 (define_expand "xxgenpcvm_<mode>"
3032   [(use (match_operand:VSX_EXTRACT_I4 0 "register_operand"))
3033    (use (match_operand:VSX_EXTRACT_I4 1 "register_operand"))
3034    (use (match_operand:QI 2 "immediate_operand"))]
3035   "TARGET_POWER10"
3036 {
3037   if (!BYTES_BIG_ENDIAN)
3038     {
3039       /* gen_xxgenpcvm assumes Big Endian order.  If LE,
3040          change swap upper and lower double words.  */
3041       rtx tmp = gen_reg_rtx (<MODE>mode);
3042
3043       emit_insn (gen_xxswapd_<mode> (tmp, operands[1]));
3044       operands[1] = tmp;
3045     }
3046     emit_insn (gen_xxgenpcvm_<mode>_internal (operands[0], operands[1],
3047                                               operands[2]));
3048   DONE;
3049 })
3050
3051 ;; lxvd2x for little endian loads.  We need several of
3052 ;; these since the form of the PARALLEL differs by mode.
3053 (define_insn "*vsx_lxvd2x2_le_<mode>"
3054   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3055         (vec_select:VSX_D
3056           (match_operand:VSX_D 1 "memory_operand" "Z")
3057           (parallel [(const_int 1) (const_int 0)])))]
3058   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3059   "lxvd2x %x0,%y1"
3060   [(set_attr "type" "vecload")])
3061
3062 (define_insn "*vsx_lxvd2x4_le_<mode>"
3063   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
3064         (vec_select:VSX_W
3065           (match_operand:VSX_W 1 "memory_operand" "Z")
3066           (parallel [(const_int 2) (const_int 3)
3067                      (const_int 0) (const_int 1)])))]
3068   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3069   "lxvd2x %x0,%y1"
3070   [(set_attr "type" "vecload")])
3071
3072 (define_insn "*vsx_lxvd2x8_le_V8HI"
3073   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3074         (vec_select:V8HI
3075           (match_operand:V8HI 1 "memory_operand" "Z")
3076           (parallel [(const_int 4) (const_int 5)
3077                      (const_int 6) (const_int 7)
3078                      (const_int 0) (const_int 1)
3079                      (const_int 2) (const_int 3)])))]
3080   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3081   "lxvd2x %x0,%y1"
3082   [(set_attr "type" "vecload")])
3083
3084 (define_insn "*vsx_lxvd2x16_le_V16QI"
3085   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3086         (vec_select:V16QI
3087           (match_operand:V16QI 1 "memory_operand" "Z")
3088           (parallel [(const_int 8) (const_int 9)
3089                      (const_int 10) (const_int 11)
3090                      (const_int 12) (const_int 13)
3091                      (const_int 14) (const_int 15)
3092                      (const_int 0) (const_int 1)
3093                      (const_int 2) (const_int 3)
3094                      (const_int 4) (const_int 5)
3095                      (const_int 6) (const_int 7)])))]
3096   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3097   "lxvd2x %x0,%y1"
3098   [(set_attr "type" "vecload")])
3099
3100 ;; stxvd2x for little endian stores.  We need several of
3101 ;; these since the form of the PARALLEL differs by mode.
3102 (define_insn "*vsx_stxvd2x2_le_<mode>"
3103   [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
3104         (vec_select:VSX_D
3105           (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3106           (parallel [(const_int 1) (const_int 0)])))]
3107   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3108   "stxvd2x %x1,%y0"
3109   [(set_attr "type" "vecstore")])
3110
3111 (define_insn "*vsx_stxvd2x4_le_<mode>"
3112   [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
3113         (vec_select:VSX_W
3114           (match_operand:VSX_W 1 "vsx_register_operand" "wa")
3115           (parallel [(const_int 2) (const_int 3)
3116                      (const_int 0) (const_int 1)])))]
3117   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3118   "stxvd2x %x1,%y0"
3119   [(set_attr "type" "vecstore")])
3120
3121 (define_insn "*vsx_stxvd2x8_le_V8HI"
3122   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
3123         (vec_select:V8HI
3124           (match_operand:V8HI 1 "vsx_register_operand" "wa")
3125           (parallel [(const_int 4) (const_int 5)
3126                      (const_int 6) (const_int 7)
3127                      (const_int 0) (const_int 1)
3128                      (const_int 2) (const_int 3)])))]
3129   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3130   "stxvd2x %x1,%y0"
3131   [(set_attr "type" "vecstore")])
3132
3133 (define_insn "*vsx_stxvd2x16_le_V16QI"
3134   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
3135         (vec_select:V16QI
3136           (match_operand:V16QI 1 "vsx_register_operand" "wa")
3137           (parallel [(const_int 8) (const_int 9)
3138                      (const_int 10) (const_int 11)
3139                      (const_int 12) (const_int 13)
3140                      (const_int 14) (const_int 15)
3141                      (const_int 0) (const_int 1)
3142                      (const_int 2) (const_int 3)
3143                      (const_int 4) (const_int 5)
3144                      (const_int 6) (const_int 7)])))]
3145   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3146   "stxvd2x %x1,%y0"
3147   [(set_attr "type" "vecstore")])
3148
3149 ;; Convert a TImode value into V1TImode
3150 (define_expand "vsx_set_v1ti"
3151   [(match_operand:V1TI 0 "nonimmediate_operand")
3152    (match_operand:V1TI 1 "nonimmediate_operand")
3153    (match_operand:TI 2 "input_operand")
3154    (match_operand:QI 3 "u5bit_cint_operand")]
3155   "VECTOR_MEM_VSX_P (V1TImode)"
3156 {
3157   if (operands[3] != const0_rtx)
3158     gcc_unreachable ();
3159
3160   emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
3161   DONE;
3162 })
3163
3164 ;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT
3165 (define_expand "vsx_set_<mode>"
3166   [(use (match_operand:VSX_D 0 "vsx_register_operand"))
3167    (use (match_operand:VSX_D 1 "vsx_register_operand"))
3168    (use (match_operand:<VS_scalar> 2 "gpc_reg_operand"))
3169    (use (match_operand:QI 3 "const_0_to_1_operand"))]
3170   "VECTOR_MEM_VSX_P (<MODE>mode)"
3171 {
3172   rtx dest = operands[0];
3173   rtx vec_reg = operands[1];
3174   rtx value = operands[2];
3175   rtx ele = operands[3];
3176   rtx tmp = gen_reg_rtx (<VS_scalar>mode);
3177
3178   if (ele == const0_rtx)
3179     {
3180       emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx));
3181       emit_insn (gen_vsx_concat_<mode> (dest, value, tmp));
3182       DONE;
3183     }
3184   else if (ele == const1_rtx)
3185     {
3186       emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx));
3187       emit_insn (gen_vsx_concat_<mode> (dest, tmp, value));
3188       DONE;
3189     }
3190   else
3191     gcc_unreachable ();
3192 })
3193
3194 ;; Extract a DF/DI element from V2DF/V2DI
3195 ;; Optimize cases were we can do a simple or direct move.
3196 ;; Or see if we can avoid doing the move at all
3197
3198 ;; There are some unresolved problems with reload that show up if an Altivec
3199 ;; register was picked.  Limit the scalar value to FPRs for now.
3200
3201 (define_insn "vsx_extract_<mode>"
3202   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d, d,  wr, wr")
3203         (vec_select:<VS_scalar>
3204          (match_operand:VSX_D 1 "gpc_reg_operand"      "wa, wa, wa, wa")
3205          (parallel
3206           [(match_operand:QI 2 "const_0_to_1_operand"  "wD, n,  wD, n")])))]
3207   "VECTOR_MEM_VSX_P (<MODE>mode)"
3208 {
3209   int element = INTVAL (operands[2]);
3210   int op0_regno = REGNO (operands[0]);
3211   int op1_regno = REGNO (operands[1]);
3212   int fldDM;
3213
3214   gcc_assert (IN_RANGE (element, 0, 1));
3215   gcc_assert (VSX_REGNO_P (op1_regno));
3216
3217   if (element == VECTOR_ELEMENT_SCALAR_64BIT)
3218     {
3219       if (op0_regno == op1_regno)
3220         return ASM_COMMENT_START " vec_extract to same register";
3221
3222       else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE
3223                && TARGET_POWERPC64)
3224         return "mfvsrd %0,%x1";
3225
3226       else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
3227         return "fmr %0,%1";
3228
3229       else if (VSX_REGNO_P (op0_regno))
3230         return "xxlor %x0,%x1,%x1";
3231
3232       else
3233         gcc_unreachable ();
3234     }
3235
3236   else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno)
3237            && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE)
3238     return "mfvsrld %0,%x1";
3239
3240   else if (VSX_REGNO_P (op0_regno))
3241     {
3242       fldDM = element << 1;
3243       if (!BYTES_BIG_ENDIAN)
3244         fldDM = 3 - fldDM;
3245       operands[3] = GEN_INT (fldDM);
3246       return "xxpermdi %x0,%x1,%x1,%3";
3247     }
3248
3249   else
3250     gcc_unreachable ();
3251 }
3252   [(set_attr "type" "veclogical,mftgpr,mftgpr,vecperm")
3253    (set_attr "isa" "*,*,p8v,p9v")])
3254
3255 ;; Optimize extracting a single scalar element from memory.
3256 (define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load"
3257   [(set (match_operand:<VS_scalar> 0 "register_operand" "=wa,wr")
3258         (vec_select:<VSX_D:VS_scalar>
3259          (match_operand:VSX_D 1 "memory_operand" "m,m")
3260          (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))
3261    (clobber (match_scratch:P 3 "=&b,&b"))]
3262   "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)"
3263   "#"
3264   "&& reload_completed"
3265   [(set (match_dup 0) (match_dup 4))]
3266 {
3267   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3268                                            operands[3], <VSX_D:VS_scalar>mode);
3269 }
3270   [(set_attr "type" "fpload,load")
3271    (set_attr "length" "8")])
3272
3273 ;; Optimize storing a single scalar element that is the right location to
3274 ;; memory
3275 (define_insn "*vsx_extract_<mode>_store"
3276   [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,wY")
3277         (vec_select:<VS_scalar>
3278          (match_operand:VSX_D 1 "register_operand" "d,v,v")
3279          (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
3280   "VECTOR_MEM_VSX_P (<MODE>mode)"
3281   "@
3282    stfd%U0%X0 %1,%0
3283    stxsdx %x1,%y0
3284    stxsd %1,%0"
3285   [(set_attr "type" "fpstore")
3286    (set_attr "isa" "*,p7v,p9v")])
3287
3288 ;; Variable V2DI/V2DF extract shift
3289 (define_insn "vsx_vslo_<mode>"
3290   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
3291         (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3292                              (match_operand:V2DI 2 "gpc_reg_operand" "v")]
3293                             UNSPEC_VSX_VSLO))]
3294   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3295   "vslo %0,%1,%2"
3296   [(set_attr "type" "vecperm")])
3297
3298 ;; Variable V2DI/V2DF extract from a register
3299 (define_insn_and_split "vsx_extract_<mode>_var"
3300   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
3301         (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3302                              (match_operand:DI 2 "gpc_reg_operand" "r")]
3303                             UNSPEC_VSX_EXTRACT))
3304    (clobber (match_scratch:DI 3 "=r"))
3305    (clobber (match_scratch:V2DI 4 "=&v"))]
3306   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3307   "#"
3308   "&& reload_completed"
3309   [(const_int 0)]
3310 {
3311   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3312                                 operands[3], operands[4]);
3313   DONE;
3314 })
3315
3316 ;; Variable V2DI/V2DF extract from memory
3317 (define_insn_and_split "*vsx_extract_<mode>_var_load"
3318   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=wa,r")
3319         (unspec:<VS_scalar> [(match_operand:VSX_D 1 "memory_operand" "Q,Q")
3320                              (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3321                             UNSPEC_VSX_EXTRACT))
3322    (clobber (match_scratch:DI 3 "=&b,&b"))]
3323   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3324   "#"
3325   "&& reload_completed"
3326   [(set (match_dup 0) (match_dup 4))]
3327 {
3328   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3329                                            operands[3], <VS_scalar>mode);
3330 }
3331   [(set_attr "type" "fpload,load")])
3332
3333 ;; Extract a SF element from V4SF
3334 (define_insn_and_split "vsx_extract_v4sf"
3335   [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
3336         (vec_select:SF
3337          (match_operand:V4SF 1 "vsx_register_operand" "wa")
3338          (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")])))
3339    (clobber (match_scratch:V4SF 3 "=0"))]
3340   "VECTOR_UNIT_VSX_P (V4SFmode)"
3341   "#"
3342   "&& 1"
3343   [(const_int 0)]
3344 {
3345   rtx op0 = operands[0];
3346   rtx op1 = operands[1];
3347   rtx op2 = operands[2];
3348   rtx op3 = operands[3];
3349   rtx tmp;
3350   HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
3351
3352   if (ele == 0)
3353     tmp = op1;
3354   else
3355     {
3356       if (GET_CODE (op3) == SCRATCH)
3357         op3 = gen_reg_rtx (V4SFmode);
3358       emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
3359       tmp = op3;
3360     }
3361   emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
3362   DONE;
3363 }
3364   [(set_attr "length" "8")
3365    (set_attr "type" "fp")])
3366
3367 (define_insn_and_split "*vsx_extract_v4sf_<mode>_load"
3368   [(set (match_operand:SF 0 "register_operand" "=f,v,v,?r")
3369         (vec_select:SF
3370          (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
3371          (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
3372    (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
3373   "VECTOR_MEM_VSX_P (V4SFmode)"
3374   "#"
3375   "&& reload_completed"
3376   [(set (match_dup 0) (match_dup 4))]
3377 {
3378   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3379                                            operands[3], SFmode);
3380 }
3381   [(set_attr "type" "fpload,fpload,fpload,load")
3382    (set_attr "length" "8")
3383    (set_attr "isa" "*,p7v,p9v,*")])
3384
3385 ;; Variable V4SF extract from a register
3386 (define_insn_and_split "vsx_extract_v4sf_var"
3387   [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
3388         (unspec:SF [(match_operand:V4SF 1 "gpc_reg_operand" "v")
3389                     (match_operand:DI 2 "gpc_reg_operand" "r")]
3390                    UNSPEC_VSX_EXTRACT))
3391    (clobber (match_scratch:DI 3 "=r"))
3392    (clobber (match_scratch:V2DI 4 "=&v"))]
3393   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3394   "#"
3395   "&& reload_completed"
3396   [(const_int 0)]
3397 {
3398   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3399                                 operands[3], operands[4]);
3400   DONE;
3401 })
3402
3403 ;; Variable V4SF extract from memory
3404 (define_insn_and_split "*vsx_extract_v4sf_var_load"
3405   [(set (match_operand:SF 0 "gpc_reg_operand" "=wa,?r")
3406         (unspec:SF [(match_operand:V4SF 1 "memory_operand" "Q,Q")
3407                     (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3408                    UNSPEC_VSX_EXTRACT))
3409    (clobber (match_scratch:DI 3 "=&b,&b"))]
3410   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3411   "#"
3412   "&& reload_completed"
3413   [(set (match_dup 0) (match_dup 4))]
3414 {
3415   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3416                                            operands[3], SFmode);
3417 }
3418   [(set_attr "type" "fpload,load")])
3419
3420 ;; Expand the builtin form of xxpermdi to canonical rtl.
3421 (define_expand "vsx_xxpermdi_<mode>"
3422   [(match_operand:VSX_L 0 "vsx_register_operand")
3423    (match_operand:VSX_L 1 "vsx_register_operand")
3424    (match_operand:VSX_L 2 "vsx_register_operand")
3425    (match_operand:QI 3 "u5bit_cint_operand")]
3426   "VECTOR_MEM_VSX_P (<MODE>mode)"
3427 {
3428   rtx target = operands[0];
3429   rtx op0 = operands[1];
3430   rtx op1 = operands[2];
3431   int mask = INTVAL (operands[3]);
3432   rtx perm0 = GEN_INT ((mask >> 1) & 1);
3433   rtx perm1 = GEN_INT ((mask & 1) + 2);
3434   rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3435
3436   if (<MODE>mode == V2DFmode)
3437     gen = gen_vsx_xxpermdi2_v2df_1;
3438   else
3439     {
3440       gen = gen_vsx_xxpermdi2_v2di_1;
3441       if (<MODE>mode != V2DImode)
3442         {
3443           target = gen_lowpart (V2DImode, target);
3444           op0 = gen_lowpart (V2DImode, op0);
3445           op1 = gen_lowpart (V2DImode, op1);
3446         }
3447     }
3448   emit_insn (gen (target, op0, op1, perm0, perm1));
3449   DONE;
3450 })
3451
3452 ;; Special version of xxpermdi that retains big-endian semantics.
3453 (define_expand "vsx_xxpermdi_<mode>_be"
3454   [(match_operand:VSX_L 0 "vsx_register_operand")
3455    (match_operand:VSX_L 1 "vsx_register_operand")
3456    (match_operand:VSX_L 2 "vsx_register_operand")
3457    (match_operand:QI 3 "u5bit_cint_operand")]
3458   "VECTOR_MEM_VSX_P (<MODE>mode)"
3459 {
3460   rtx target = operands[0];
3461   rtx op0 = operands[1];
3462   rtx op1 = operands[2];
3463   int mask = INTVAL (operands[3]);
3464   rtx perm0 = GEN_INT ((mask >> 1) & 1);
3465   rtx perm1 = GEN_INT ((mask & 1) + 2);
3466   rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3467
3468   if (<MODE>mode == V2DFmode)
3469     gen = gen_vsx_xxpermdi2_v2df_1;
3470   else
3471     {
3472       gen = gen_vsx_xxpermdi2_v2di_1;
3473       if (<MODE>mode != V2DImode)
3474         {
3475           target = gen_lowpart (V2DImode, target);
3476           op0 = gen_lowpart (V2DImode, op0);
3477           op1 = gen_lowpart (V2DImode, op1);
3478         }
3479     }
3480   /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
3481      transformation we don't want; it is necessary for
3482      rs6000_expand_vec_perm_const_1 but not for this use.  So we
3483      prepare for that by reversing the transformation here.  */
3484   if (BYTES_BIG_ENDIAN)
3485     emit_insn (gen (target, op0, op1, perm0, perm1));
3486   else
3487     {
3488       rtx p0 = GEN_INT (3 - INTVAL (perm1));
3489       rtx p1 = GEN_INT (3 - INTVAL (perm0));
3490       emit_insn (gen (target, op1, op0, p0, p1));
3491     }
3492   DONE;
3493 })
3494
3495 (define_insn "vsx_xxpermdi2_<mode>_1"
3496   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3497         (vec_select:VSX_D
3498           (vec_concat:<VS_double>
3499             (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3500             (match_operand:VSX_D 2 "vsx_register_operand" "wa"))
3501           (parallel [(match_operand 3 "const_0_to_1_operand" "")
3502                      (match_operand 4 "const_2_to_3_operand" "")])))]
3503   "VECTOR_MEM_VSX_P (<MODE>mode)"
3504 {
3505   int op3, op4, mask;
3506
3507   /* For little endian, swap operands and invert/swap selectors
3508      to get the correct xxpermdi.  The operand swap sets up the
3509      inputs as a little endian array.  The selectors are swapped
3510      because they are defined to use big endian ordering.  The
3511      selectors are inverted to get the correct doublewords for
3512      little endian ordering.  */
3513   if (BYTES_BIG_ENDIAN)
3514     {
3515       op3 = INTVAL (operands[3]);
3516       op4 = INTVAL (operands[4]);
3517     }
3518   else
3519     {
3520       op3 = 3 - INTVAL (operands[4]);
3521       op4 = 3 - INTVAL (operands[3]);
3522     }
3523
3524   mask = (op3 << 1) | (op4 - 2);
3525   operands[3] = GEN_INT (mask);
3526
3527   if (BYTES_BIG_ENDIAN)
3528     return "xxpermdi %x0,%x1,%x2,%3";
3529   else
3530     return "xxpermdi %x0,%x2,%x1,%3";
3531 }
3532   [(set_attr "type" "vecperm")])
3533
3534 ;; Extraction of a single element in a small integer vector.  Until ISA 3.0,
3535 ;; none of the small types were allowed in a vector register, so we had to
3536 ;; extract to a DImode and either do a direct move or store.
3537 (define_expand  "vsx_extract_<mode>"
3538   [(parallel [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand")
3539                    (vec_select:<VS_scalar>
3540                     (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
3541                     (parallel [(match_operand:QI 2 "const_int_operand")])))
3542               (clobber (match_scratch:VSX_EXTRACT_I 3))])]
3543   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3544 {
3545   /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}.  */
3546   if (TARGET_P9_VECTOR)
3547     {
3548       emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1],
3549                                             operands[2]));
3550       DONE;
3551     }
3552 })
3553
3554 (define_insn "vsx_extract_<mode>_p9"
3555   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>")
3556         (vec_select:<VS_scalar>
3557          (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>")
3558          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
3559    (clobber (match_scratch:SI 3 "=r,X"))]
3560   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3561 {
3562   if (which_alternative == 0)
3563     return "#";
3564
3565   else
3566     {
3567       HOST_WIDE_INT elt = INTVAL (operands[2]);
3568       HOST_WIDE_INT elt_adj = (!BYTES_BIG_ENDIAN
3569                                ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
3570                                : elt);
3571
3572       HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
3573       HOST_WIDE_INT offset = unit_size * elt_adj;
3574
3575       operands[2] = GEN_INT (offset);
3576       if (unit_size == 4)
3577         return "xxextractuw %x0,%x1,%2";
3578       else
3579         return "vextractu<wd> %0,%1,%2";
3580     }
3581 }
3582   [(set_attr "type" "vecsimple")
3583    (set_attr "isa" "p9v,*")])
3584
3585 (define_split
3586   [(set (match_operand:<VS_scalar> 0 "int_reg_operand")
3587         (vec_select:<VS_scalar>
3588          (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand")
3589          (parallel [(match_operand:QI 2 "const_int_operand")])))
3590    (clobber (match_operand:SI 3 "int_reg_operand"))]
3591   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed"
3592   [(const_int 0)]
3593 {
3594   rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0]));
3595   rtx op1 = operands[1];
3596   rtx op2 = operands[2];
3597   rtx op3 = operands[3];
3598   HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode);
3599
3600   emit_move_insn (op3, GEN_INT (offset));
3601   if (BYTES_BIG_ENDIAN)
3602     emit_insn (gen_vextu<wd>lx (op0_si, op3, op1));
3603   else
3604     emit_insn (gen_vextu<wd>rx (op0_si, op3, op1));
3605   DONE;
3606 })
3607
3608 ;; Optimize zero extracts to eliminate the AND after the extract.
3609 (define_insn_and_split "*vsx_extract_<mode>_di_p9"
3610   [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>")
3611         (zero_extend:DI
3612          (vec_select:<VS_scalar>
3613           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>")
3614           (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))
3615    (clobber (match_scratch:SI 3 "=r,X"))]
3616   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3617   "#"
3618   "&& reload_completed"
3619   [(parallel [(set (match_dup 4)
3620                    (vec_select:<VS_scalar>
3621                     (match_dup 1)
3622                     (parallel [(match_dup 2)])))
3623               (clobber (match_dup 3))])]
3624 {
3625   operands[4] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0]));
3626 }
3627   [(set_attr "isa" "p9v,*")])
3628
3629 ;; Optimize stores to use the ISA 3.0 scalar store instructions
3630 (define_insn_and_split "*vsx_extract_<mode>_store_p9"
3631   [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z,m")
3632         (vec_select:<VS_scalar>
3633          (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v")
3634          (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))
3635    (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&r"))
3636    (clobber (match_scratch:SI 4 "=X,&r"))]
3637   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3638   "#"
3639   "&& reload_completed"
3640   [(parallel [(set (match_dup 3)
3641                    (vec_select:<VS_scalar>
3642                     (match_dup 1)
3643                     (parallel [(match_dup 2)])))
3644               (clobber (match_dup 4))])
3645    (set (match_dup 0)
3646         (match_dup 3))])
3647
3648 (define_insn_and_split  "*vsx_extract_si"
3649   [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z")
3650         (vec_select:SI
3651          (match_operand:V4SI 1 "gpc_reg_operand" "v,v,v")
3652          (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
3653    (clobber (match_scratch:V4SI 3 "=v,v,v"))]
3654   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR"
3655   "#"
3656   "&& reload_completed"
3657   [(const_int 0)]
3658 {
3659   rtx dest = operands[0];
3660   rtx src = operands[1];
3661   rtx element = operands[2];
3662   rtx vec_tmp = operands[3];
3663   int value;
3664
3665   if (!BYTES_BIG_ENDIAN)
3666     element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3667
3668   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3669      instruction.  */
3670   value = INTVAL (element);
3671   if (value != 1)
3672     emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
3673   else
3674     vec_tmp = src;
3675
3676   if (MEM_P (operands[0]))
3677     {
3678       if (can_create_pseudo_p ())
3679         dest = rs6000_force_indexed_or_indirect_mem (dest);
3680
3681       if (TARGET_P8_VECTOR)
3682         emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3683       else
3684         emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp))));
3685     }
3686
3687   else if (TARGET_P8_VECTOR)
3688     emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3689   else
3690     emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3691                     gen_rtx_REG (DImode, REGNO (vec_tmp)));
3692
3693   DONE;
3694 }
3695   [(set_attr "type" "mftgpr,vecperm,fpstore")
3696    (set_attr "length" "8")
3697    (set_attr "isa" "*,p8v,*")])
3698
3699 (define_insn_and_split  "*vsx_extract_<mode>_p8"
3700   [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r")
3701         (vec_select:<VS_scalar>
3702          (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v")
3703          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3704    (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))]
3705   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3706    && !TARGET_P9_VECTOR"
3707   "#"
3708   "&& reload_completed"
3709   [(const_int 0)]
3710 {
3711   rtx dest = operands[0];
3712   rtx src = operands[1];
3713   rtx element = operands[2];
3714   rtx vec_tmp = operands[3];
3715   int value;
3716
3717   if (!BYTES_BIG_ENDIAN)
3718     element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element));
3719
3720   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3721      instruction.  */
3722   value = INTVAL (element);
3723   if (<MODE>mode == V16QImode)
3724     {
3725       if (value != 7)
3726         emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element));
3727       else
3728         vec_tmp = src;
3729     }
3730   else if (<MODE>mode == V8HImode)
3731     {
3732       if (value != 3)
3733         emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));
3734       else
3735         vec_tmp = src;
3736     }
3737   else
3738     gcc_unreachable ();
3739
3740   emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3741                   gen_rtx_REG (DImode, REGNO (vec_tmp)));
3742   DONE;
3743 }
3744   [(set_attr "type" "mftgpr")])
3745
3746 ;; Optimize extracting a single scalar element from memory.
3747 (define_insn_and_split "*vsx_extract_<mode>_load"
3748   [(set (match_operand:<VS_scalar> 0 "register_operand" "=r")
3749         (vec_select:<VS_scalar>
3750          (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
3751          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3752    (clobber (match_scratch:DI 3 "=&b"))]
3753   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3754   "#"
3755   "&& reload_completed"
3756   [(set (match_dup 0) (match_dup 4))]
3757 {
3758   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3759                                            operands[3], <VS_scalar>mode);
3760 }
3761   [(set_attr "type" "load")
3762    (set_attr "length" "8")])
3763
3764 ;; Variable V16QI/V8HI/V4SI extract from a register
3765 (define_insn_and_split "vsx_extract_<mode>_var"
3766   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r")
3767         (unspec:<VS_scalar>
3768          [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,v")
3769           (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3770          UNSPEC_VSX_EXTRACT))
3771    (clobber (match_scratch:DI 3 "=r,r"))
3772    (clobber (match_scratch:V2DI 4 "=X,&v"))]
3773   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3774   "#"
3775   "&& reload_completed"
3776   [(const_int 0)]
3777 {
3778   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3779                                 operands[3], operands[4]);
3780   DONE;
3781 }
3782   [(set_attr "isa" "p9v,*")])
3783
3784 ;; Variable V16QI/V8HI/V4SI extract from memory
3785 (define_insn_and_split "*vsx_extract_<mode>_var_load"
3786   [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r")
3787         (unspec:<VS_scalar>
3788          [(match_operand:VSX_EXTRACT_I 1 "memory_operand" "Q")
3789           (match_operand:DI 2 "gpc_reg_operand" "r")]
3790          UNSPEC_VSX_EXTRACT))
3791    (clobber (match_scratch:DI 3 "=&b"))]
3792   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3793   "#"
3794   "&& reload_completed"
3795   [(set (match_dup 0) (match_dup 4))]
3796 {
3797   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3798                                            operands[3], <VS_scalar>mode);
3799 }
3800   [(set_attr "type" "load")])
3801
3802 ;; VSX_EXTRACT optimizations
3803 ;; Optimize double d = (double) vec_extract (vi, <n>)
3804 ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
3805 (define_insn_and_split "*vsx_extract_si_<uns>float_df"
3806   [(set (match_operand:DF 0 "gpc_reg_operand" "=wa")
3807         (any_float:DF
3808          (vec_select:SI
3809           (match_operand:V4SI 1 "gpc_reg_operand" "v")
3810           (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3811    (clobber (match_scratch:V4SI 3 "=v"))]
3812   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3813   "#"
3814   "&& 1"
3815   [(const_int 0)]
3816 {
3817   rtx dest = operands[0];
3818   rtx src = operands[1];
3819   rtx element = operands[2];
3820   rtx v4si_tmp = operands[3];
3821   int value;
3822
3823   if (!BYTES_BIG_ENDIAN)
3824     element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3825
3826   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3827      instruction.  */
3828   value = INTVAL (element);
3829   if (value != 0)
3830     {
3831       if (GET_CODE (v4si_tmp) == SCRATCH)
3832         v4si_tmp = gen_reg_rtx (V4SImode);
3833       emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3834     }
3835   else
3836     v4si_tmp = src;
3837
3838   emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));
3839   DONE;
3840 })
3841
3842 ;; Optimize <type> f = (<type>) vec_extract (vi, <n>)
3843 ;; where <type> is a floating point type that supported by the hardware that is
3844 ;; not double.  First convert the value to double, and then to the desired
3845 ;; type.
3846 (define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"
3847   [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=wa")
3848         (any_float:VSX_EXTRACT_FL
3849          (vec_select:SI
3850           (match_operand:V4SI 1 "gpc_reg_operand" "v")
3851           (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3852    (clobber (match_scratch:V4SI 3 "=v"))
3853    (clobber (match_scratch:DF 4 "=wa"))]
3854   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3855   "#"
3856   "&& 1"
3857   [(const_int 0)]
3858 {
3859   rtx dest = operands[0];
3860   rtx src = operands[1];
3861   rtx element = operands[2];
3862   rtx v4si_tmp = operands[3];
3863   rtx df_tmp = operands[4];
3864   int value;
3865
3866   if (!BYTES_BIG_ENDIAN)
3867     element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3868
3869   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3870      instruction.  */
3871   value = INTVAL (element);
3872   if (value != 0)
3873     {
3874       if (GET_CODE (v4si_tmp) == SCRATCH)
3875         v4si_tmp = gen_reg_rtx (V4SImode);
3876       emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3877     }
3878   else
3879     v4si_tmp = src;
3880
3881   if (GET_CODE (df_tmp) == SCRATCH)
3882     df_tmp = gen_reg_rtx (DFmode);
3883
3884   emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));
3885
3886   if (<MODE>mode == SFmode)
3887     emit_insn (gen_truncdfsf2 (dest, df_tmp));
3888   else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))
3889     emit_insn (gen_extenddftf2_vsx (dest, df_tmp));
3890   else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)
3891            && TARGET_FLOAT128_HW)
3892     emit_insn (gen_extenddftf2_hw (dest, df_tmp));
3893   else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))
3894     emit_insn (gen_extenddfif2 (dest, df_tmp));
3895   else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)
3896     emit_insn (gen_extenddfkf2_hw (dest, df_tmp));
3897   else
3898     gcc_unreachable ();
3899
3900   DONE;
3901 })
3902
3903 ;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>)
3904 ;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE
3905 ;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char,
3906 ;; vector short or vector unsigned short.
3907 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>"
3908   [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa")
3909         (float:FL_CONV
3910          (vec_select:<VSX_EXTRACT_I:VS_scalar>
3911           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3912           (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3913    (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3914   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3915    && TARGET_P9_VECTOR"
3916   "#"
3917   "&& reload_completed"
3918   [(parallel [(set (match_dup 3)
3919                    (vec_select:<VSX_EXTRACT_I:VS_scalar>
3920                     (match_dup 1)
3921                     (parallel [(match_dup 2)])))
3922               (clobber (scratch:SI))])
3923    (set (match_dup 4)
3924         (sign_extend:DI (match_dup 3)))
3925    (set (match_dup 0)
3926         (float:<FL_CONV:MODE> (match_dup 4)))]
3927 {
3928   operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3929 }
3930   [(set_attr "isa" "<FL_CONV:VSisa>")])
3931
3932 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>"
3933   [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa")
3934         (unsigned_float:FL_CONV
3935          (vec_select:<VSX_EXTRACT_I:VS_scalar>
3936           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3937           (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3938    (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3939   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3940    && TARGET_P9_VECTOR"
3941   "#"
3942   "&& reload_completed"
3943   [(parallel [(set (match_dup 3)
3944                    (vec_select:<VSX_EXTRACT_I:VS_scalar>
3945                     (match_dup 1)
3946                     (parallel [(match_dup 2)])))
3947               (clobber (scratch:SI))])
3948    (set (match_dup 0)
3949         (float:<FL_CONV:MODE> (match_dup 4)))]
3950 {
3951   operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3952 }
3953   [(set_attr "isa" "<FL_CONV:VSisa>")])
3954
3955 ;; V4SI/V8HI/V16QI set operation on ISA 3.0
3956 (define_insn "vsx_set_<mode>_p9"
3957   [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>")
3958         (unspec:VSX_EXTRACT_I
3959          [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0")
3960           (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VSX_EX>")
3961           (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")]
3962          UNSPEC_VSX_SET))]
3963   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3964 {
3965   int ele = INTVAL (operands[3]);
3966   int nunits = GET_MODE_NUNITS (<MODE>mode);
3967
3968   if (!BYTES_BIG_ENDIAN)
3969     ele = nunits - 1 - ele;
3970
3971   operands[3] = GEN_INT (GET_MODE_SIZE (<VS_scalar>mode) * ele);
3972   if (<MODE>mode == V4SImode)
3973     return "xxinsertw %x0,%x2,%3";
3974   else
3975     return "vinsert<wd> %0,%2,%3";
3976 }
3977   [(set_attr "type" "vecperm")])
3978
3979 (define_insn_and_split "vsx_set_v4sf_p9"
3980   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3981         (unspec:V4SF
3982          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3983           (match_operand:SF 2 "gpc_reg_operand" "wa")
3984           (match_operand:QI 3 "const_0_to_3_operand" "n")]
3985          UNSPEC_VSX_SET))
3986    (clobber (match_scratch:SI 4 "=&wa"))]
3987   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3988   "#"
3989   "&& reload_completed"
3990   [(set (match_dup 5)
3991         (unspec:V4SF [(match_dup 2)]
3992                      UNSPEC_VSX_CVDPSPN))
3993    (parallel [(set (match_dup 4)
3994                    (vec_select:SI (match_dup 6)
3995                                   (parallel [(match_dup 7)])))
3996               (clobber (scratch:SI))])
3997    (set (match_dup 8)
3998         (unspec:V4SI [(match_dup 8)
3999                       (match_dup 4)
4000                       (match_dup 3)]
4001                      UNSPEC_VSX_SET))]
4002 {
4003   unsigned int tmp_regno = reg_or_subregno (operands[4]);
4004
4005   operands[5] = gen_rtx_REG (V4SFmode, tmp_regno);
4006   operands[6] = gen_rtx_REG (V4SImode, tmp_regno);
4007   operands[7] = GEN_INT (BYTES_BIG_ENDIAN ? 0 : 3);
4008   operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
4009 }
4010   [(set_attr "type" "vecperm")
4011    (set_attr "length" "12")
4012    (set_attr "isa" "p9v")])
4013
4014 ;; Special case setting 0.0f to a V4SF element
4015 (define_insn_and_split "*vsx_set_v4sf_p9_zero"
4016   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4017         (unspec:V4SF
4018          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4019           (match_operand:SF 2 "zero_fp_constant" "j")
4020           (match_operand:QI 3 "const_0_to_3_operand" "n")]
4021          UNSPEC_VSX_SET))
4022    (clobber (match_scratch:SI 4 "=&wa"))]
4023   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
4024   "#"
4025   "&& reload_completed"
4026   [(set (match_dup 4)
4027         (const_int 0))
4028    (set (match_dup 5)
4029         (unspec:V4SI [(match_dup 5)
4030                       (match_dup 4)
4031                       (match_dup 3)]
4032                      UNSPEC_VSX_SET))]
4033 {
4034   operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
4035 }
4036   [(set_attr "type" "vecperm")
4037    (set_attr "length" "8")
4038    (set_attr "isa" "p9v")])
4039
4040 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element
4041 ;; that is in the default scalar position (1 for big endian, 2 for little
4042 ;; endian).  We just need to do an xxinsertw since the element is in the
4043 ;; correct location.
4044
4045 (define_insn "*vsx_insert_extract_v4sf_p9"
4046   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4047         (unspec:V4SF
4048          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4049           (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4050                          (parallel
4051                           [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4052           (match_operand:QI 4 "const_0_to_3_operand" "n")]
4053          UNSPEC_VSX_SET))]
4054   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64
4055    && (INTVAL (operands[3]) == (BYTES_BIG_ENDIAN ? 1 : 2))"
4056 {
4057   int ele = INTVAL (operands[4]);
4058
4059   if (!BYTES_BIG_ENDIAN)
4060     ele = GET_MODE_NUNITS (V4SFmode) - 1 - ele;
4061
4062   operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele);
4063   return "xxinsertw %x0,%x2,%4";
4064 }
4065   [(set_attr "type" "vecperm")])
4066
4067 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element
4068 ;; that is in the default scalar position (1 for big endian, 2 for little
4069 ;; endian).  Convert the insert/extract to int and avoid doing the conversion.
4070
4071 (define_insn_and_split "*vsx_insert_extract_v4sf_p9_2"
4072   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4073         (unspec:V4SF
4074          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4075           (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4076                          (parallel
4077                           [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4078           (match_operand:QI 4 "const_0_to_3_operand" "n")]
4079          UNSPEC_VSX_SET))
4080    (clobber (match_scratch:SI 5 "=&wa"))]
4081   "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode)
4082    && TARGET_P9_VECTOR && TARGET_POWERPC64
4083    && (INTVAL (operands[3]) != (BYTES_BIG_ENDIAN ? 1 : 2))"
4084   "#"
4085   "&& 1"
4086   [(parallel [(set (match_dup 5)
4087                    (vec_select:SI (match_dup 6)
4088                                   (parallel [(match_dup 3)])))
4089               (clobber (scratch:SI))])
4090    (set (match_dup 7)
4091         (unspec:V4SI [(match_dup 8)
4092                       (match_dup 5)
4093                       (match_dup 4)]
4094                      UNSPEC_VSX_SET))]
4095 {
4096   if (GET_CODE (operands[5]) == SCRATCH)
4097     operands[5] = gen_reg_rtx (SImode);
4098
4099   operands[6] = gen_lowpart (V4SImode, operands[2]);
4100   operands[7] = gen_lowpart (V4SImode, operands[0]);
4101   operands[8] = gen_lowpart (V4SImode, operands[1]);
4102 }
4103   [(set_attr "type" "vecperm")
4104    (set_attr "isa" "p9v")])
4105
4106 ;; Expanders for builtins
4107 (define_expand "vsx_mergel_<mode>"
4108   [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4109    (use (match_operand:VSX_D 1 "vsx_register_operand"))
4110    (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4111   "VECTOR_MEM_VSX_P (<MODE>mode)"
4112 {
4113   rtvec v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
4114   rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4115   x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4116   emit_insn (gen_rtx_SET (operands[0], x));
4117   DONE;
4118 })
4119
4120 (define_expand "vsx_mergeh_<mode>"
4121   [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4122    (use (match_operand:VSX_D 1 "vsx_register_operand"))
4123    (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4124   "VECTOR_MEM_VSX_P (<MODE>mode)"
4125 {
4126   rtvec v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
4127   rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4128   x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4129   emit_insn (gen_rtx_SET (operands[0], x));
4130   DONE;
4131 })
4132
4133 ;; V2DF/V2DI splat
4134 ;; We separate the register splat insn from the memory splat insn to force the
4135 ;; register allocator to generate the indexed form of the SPLAT when it is
4136 ;; given an offsettable memory reference.  Otherwise, if the register and
4137 ;; memory insns were combined into a single insn, the register allocator will
4138 ;; load the value into a register, and then do a double word permute.
4139 (define_expand "vsx_splat_<mode>"
4140   [(set (match_operand:VSX_D 0 "vsx_register_operand")
4141         (vec_duplicate:VSX_D
4142          (match_operand:<VS_scalar> 1 "input_operand")))]
4143   "VECTOR_MEM_VSX_P (<MODE>mode)"
4144 {
4145   rtx op1 = operands[1];
4146   if (MEM_P (op1))
4147     operands[1] = rs6000_force_indexed_or_indirect_mem (op1);
4148   else if (!REG_P (op1))
4149     op1 = force_reg (<VSX_D:VS_scalar>mode, op1);
4150 })
4151
4152 (define_insn "vsx_splat_<mode>_reg"
4153   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
4154         (vec_duplicate:VSX_D
4155          (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")))]
4156   "VECTOR_MEM_VSX_P (<MODE>mode)"
4157   "@
4158    xxpermdi %x0,%x1,%x1,0
4159    mtvsrdd %x0,%1,%1"
4160   [(set_attr "type" "vecperm")])
4161
4162 (define_insn "vsx_splat_<mode>_mem"
4163   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4164         (vec_duplicate:VSX_D
4165          (match_operand:<VSX_D:VS_scalar> 1 "memory_operand" "Z")))]
4166   "VECTOR_MEM_VSX_P (<MODE>mode)"
4167   "lxvdsx %x0,%y1"
4168   [(set_attr "type" "vecload")])
4169
4170 ;; V4SI splat support
4171 (define_insn "vsx_splat_v4si"
4172   [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we")
4173         (vec_duplicate:V4SI
4174          (match_operand:SI 1 "splat_input_operand" "r,Z")))]
4175   "TARGET_P9_VECTOR"
4176   "@
4177    mtvsrws %x0,%1
4178    lxvwsx %x0,%y1"
4179   [(set_attr "type" "vecperm,vecload")])
4180
4181 ;; SImode is not currently allowed in vector registers.  This pattern
4182 ;; allows us to use direct move to get the value in a vector register
4183 ;; so that we can use XXSPLTW
4184 (define_insn "vsx_splat_v4si_di"
4185   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we")
4186         (vec_duplicate:V4SI
4187          (truncate:SI
4188           (match_operand:DI 1 "gpc_reg_operand" "wa,r"))))]
4189   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4190   "@
4191    xxspltw %x0,%x1,1
4192    mtvsrws %x0,%1"
4193   [(set_attr "type" "vecperm")
4194    (set_attr "isa" "p8v,*")])
4195
4196 ;; V4SF splat (ISA 3.0)
4197 (define_insn_and_split "vsx_splat_v4sf"
4198   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
4199         (vec_duplicate:V4SF
4200          (match_operand:SF 1 "splat_input_operand" "Z,wa,r")))]
4201   "TARGET_P9_VECTOR"
4202   "@
4203    lxvwsx %x0,%y1
4204    #
4205    mtvsrws %x0,%1"
4206   "&& reload_completed && vsx_register_operand (operands[1], SFmode)"
4207   [(set (match_dup 0)
4208         (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))
4209    (set (match_dup 0)
4210         (unspec:V4SF [(match_dup 0)
4211                       (const_int 0)] UNSPEC_VSX_XXSPLTW))]
4212   ""
4213   [(set_attr "type" "vecload,vecperm,mftgpr")
4214    (set_attr "length" "*,8,*")
4215    (set_attr "isa" "*,p8v,*")])
4216
4217 ;; V4SF/V4SI splat from a vector element
4218 (define_insn "vsx_xxspltw_<mode>"
4219   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4220         (vec_duplicate:VSX_W
4221          (vec_select:<VS_scalar>
4222           (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4223           (parallel
4224            [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))]
4225   "VECTOR_MEM_VSX_P (<MODE>mode)"
4226 {
4227   if (!BYTES_BIG_ENDIAN)
4228     operands[2] = GEN_INT (3 - INTVAL (operands[2]));
4229
4230   return "xxspltw %x0,%x1,%2";
4231 }
4232   [(set_attr "type" "vecperm")])
4233
4234 (define_insn "vsx_xxspltw_<mode>_direct"
4235   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4236         (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wa")
4237                        (match_operand:QI 2 "u5bit_cint_operand" "i")]
4238                       UNSPEC_VSX_XXSPLTW))]
4239   "VECTOR_MEM_VSX_P (<MODE>mode)"
4240   "xxspltw %x0,%x1,%2"
4241   [(set_attr "type" "vecperm")])
4242
4243 ;; V16QI/V8HI splat support on ISA 2.07
4244 (define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di"
4245   [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v")
4246         (vec_duplicate:VSX_SPLAT_I
4247          (truncate:<VS_scalar>
4248           (match_operand:DI 1 "altivec_register_operand" "v"))))]
4249   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
4250   "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>"
4251   [(set_attr "type" "vecperm")])
4252
4253 ;; V2DF/V2DI splat for use by vec_splat builtin
4254 (define_insn "vsx_xxspltd_<mode>"
4255   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4256         (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
4257                        (match_operand:QI 2 "u5bit_cint_operand" "i")]
4258                       UNSPEC_VSX_XXSPLTD))]
4259   "VECTOR_MEM_VSX_P (<MODE>mode)"
4260 {
4261   if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)
4262       || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))
4263     return "xxpermdi %x0,%x1,%x1,0";
4264   else
4265     return "xxpermdi %x0,%x1,%x1,3";
4266 }
4267   [(set_attr "type" "vecperm")])
4268
4269 ;; V4SF/V4SI interleave
4270 (define_insn "vsx_xxmrghw_<mode>"
4271   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4272         (vec_select:VSX_W
4273           (vec_concat:<VS_double>
4274             (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4275             (match_operand:VSX_W 2 "vsx_register_operand" "wa"))
4276           (parallel [(const_int 0) (const_int 4)
4277                      (const_int 1) (const_int 5)])))]
4278   "VECTOR_MEM_VSX_P (<MODE>mode)"
4279 {
4280   if (BYTES_BIG_ENDIAN)
4281     return "xxmrghw %x0,%x1,%x2";
4282   else
4283     return "xxmrglw %x0,%x2,%x1";
4284 }
4285   [(set_attr "type" "vecperm")])
4286
4287 (define_insn "vsx_xxmrglw_<mode>"
4288   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4289         (vec_select:VSX_W
4290           (vec_concat:<VS_double>
4291             (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4292             (match_operand:VSX_W 2 "vsx_register_operand" "wa"))
4293           (parallel [(const_int 2) (const_int 6)
4294                      (const_int 3) (const_int 7)])))]
4295   "VECTOR_MEM_VSX_P (<MODE>mode)"
4296 {
4297   if (BYTES_BIG_ENDIAN)
4298     return "xxmrglw %x0,%x1,%x2";
4299   else
4300     return "xxmrghw %x0,%x2,%x1";
4301 }
4302   [(set_attr "type" "vecperm")])
4303
4304 ;; Shift left double by word immediate
4305 (define_insn "vsx_xxsldwi_<mode>"
4306   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=wa")
4307         (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "wa")
4308                        (match_operand:VSX_L 2 "vsx_register_operand" "wa")
4309                        (match_operand:QI 3 "u5bit_cint_operand" "i")]
4310                       UNSPEC_VSX_SLDWI))]
4311   "VECTOR_MEM_VSX_P (<MODE>mode)"
4312   "xxsldwi %x0,%x1,%x2,%3"
4313   [(set_attr "type" "vecperm")
4314    (set_attr "isa" "<VSisa>")])
4315
4316 \f
4317 ;; Vector reduction insns and splitters
4318
4319 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"
4320   [(set (match_operand:V2DF 0 "vfloat_operand" "=&wa,wa")
4321         (VEC_reduc:V2DF
4322          (vec_concat:V2DF
4323           (vec_select:DF
4324            (match_operand:V2DF 1 "vfloat_operand" "wa,wa")
4325            (parallel [(const_int 1)]))
4326           (vec_select:DF
4327            (match_dup 1)
4328            (parallel [(const_int 0)])))
4329          (match_dup 1)))
4330    (clobber (match_scratch:V2DF 2 "=0,&wa"))]
4331   "VECTOR_UNIT_VSX_P (V2DFmode)"
4332   "#"
4333   ""
4334   [(const_int 0)]
4335 {
4336   rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
4337              ? gen_reg_rtx (V2DFmode)
4338              : operands[2];
4339   emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
4340   emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
4341   DONE;
4342 }
4343   [(set_attr "length" "8")
4344    (set_attr "type" "veccomplex")])
4345
4346 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"
4347   [(set (match_operand:V4SF 0 "vfloat_operand" "=wa")
4348         (VEC_reduc:V4SF
4349          (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4350          (match_operand:V4SF 1 "vfloat_operand" "wa")))
4351    (clobber (match_scratch:V4SF 2 "=&wa"))
4352    (clobber (match_scratch:V4SF 3 "=&wa"))]
4353   "VECTOR_UNIT_VSX_P (V4SFmode)"
4354   "#"
4355   ""
4356   [(const_int 0)]
4357 {
4358   rtx op0 = operands[0];
4359   rtx op1 = operands[1];
4360   rtx tmp2, tmp3, tmp4;
4361
4362   if (can_create_pseudo_p ())
4363     {
4364       tmp2 = gen_reg_rtx (V4SFmode);
4365       tmp3 = gen_reg_rtx (V4SFmode);
4366       tmp4 = gen_reg_rtx (V4SFmode);
4367     }
4368   else
4369     {
4370       tmp2 = operands[2];
4371       tmp3 = operands[3];
4372       tmp4 = tmp2;
4373     }
4374
4375   emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4376   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4377   emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4378   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
4379   DONE;
4380 }
4381   [(set_attr "length" "16")
4382    (set_attr "type" "veccomplex")])
4383
4384 ;; Combiner patterns with the vector reduction patterns that knows we can get
4385 ;; to the top element of the V2DF array without doing an extract.
4386
4387 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
4388   [(set (match_operand:DF 0 "vfloat_operand" "=&wa,wa")
4389         (vec_select:DF
4390          (VEC_reduc:V2DF
4391           (vec_concat:V2DF
4392            (vec_select:DF
4393             (match_operand:V2DF 1 "vfloat_operand" "wa,wa")
4394             (parallel [(const_int 1)]))
4395            (vec_select:DF
4396             (match_dup 1)
4397             (parallel [(const_int 0)])))
4398           (match_dup 1))
4399          (parallel [(const_int 1)])))
4400    (clobber (match_scratch:DF 2 "=0,&wa"))]
4401   "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V2DFmode)"
4402   "#"
4403   ""
4404   [(const_int 0)]
4405 {
4406   rtx hi = gen_highpart (DFmode, operands[1]);
4407   rtx lo = (GET_CODE (operands[2]) == SCRATCH)
4408             ? gen_reg_rtx (DFmode)
4409             : operands[2];
4410
4411   emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
4412   emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
4413   DONE;
4414 }
4415   [(set_attr "length" "8")
4416    (set_attr "type" "veccomplex")])
4417
4418 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
4419   [(set (match_operand:SF 0 "vfloat_operand" "=f")
4420         (vec_select:SF
4421          (VEC_reduc:V4SF
4422           (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4423           (match_operand:V4SF 1 "vfloat_operand" "wa"))
4424          (parallel [(const_int 3)])))
4425    (clobber (match_scratch:V4SF 2 "=&wa"))
4426    (clobber (match_scratch:V4SF 3 "=&wa"))
4427    (clobber (match_scratch:V4SF 4 "=0"))]
4428   "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V4SFmode)"
4429   "#"
4430   ""
4431   [(const_int 0)]
4432 {
4433   rtx op0 = operands[0];
4434   rtx op1 = operands[1];
4435   rtx tmp2, tmp3, tmp4, tmp5;
4436
4437   if (can_create_pseudo_p ())
4438     {
4439       tmp2 = gen_reg_rtx (V4SFmode);
4440       tmp3 = gen_reg_rtx (V4SFmode);
4441       tmp4 = gen_reg_rtx (V4SFmode);
4442       tmp5 = gen_reg_rtx (V4SFmode);
4443     }
4444   else
4445     {
4446       tmp2 = operands[2];
4447       tmp3 = operands[3];
4448       tmp4 = tmp2;
4449       tmp5 = operands[4];
4450     }
4451
4452   emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4453   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4454   emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4455   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
4456   emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
4457   DONE;
4458 }
4459   [(set_attr "length" "20")
4460    (set_attr "type" "veccomplex")])
4461
4462 \f
4463 ;; Power8 Vector fusion.  The fused ops must be physically adjacent.
4464 (define_peephole
4465   [(set (match_operand:P 0 "base_reg_operand")
4466         (match_operand:P 1 "short_cint_operand"))
4467    (set (match_operand:VSX_M 2 "vsx_register_operand")
4468         (mem:VSX_M (plus:P (match_dup 0)
4469                            (match_operand:P 3 "int_reg_operand"))))]
4470   "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4471   "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4472   [(set_attr "length" "8")
4473    (set_attr "type" "vecload")])
4474
4475 (define_peephole
4476   [(set (match_operand:P 0 "base_reg_operand")
4477         (match_operand:P 1 "short_cint_operand"))
4478    (set (match_operand:VSX_M 2 "vsx_register_operand")
4479         (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand")
4480                            (match_dup 0))))]
4481   "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4482   "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4483   [(set_attr "length" "8")
4484    (set_attr "type" "vecload")])
4485
4486 \f
4487 ;; ISA 3.0 vector extend sign support
4488
4489 (define_insn "vsx_sign_extend_qi_<mode>"
4490   [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4491         (unspec:VSINT_84
4492          [(match_operand:V16QI 1 "vsx_register_operand" "v")]
4493          UNSPEC_VSX_SIGN_EXTEND))]
4494   "TARGET_P9_VECTOR"
4495   "vextsb2<wd> %0,%1"
4496   [(set_attr "type" "vecexts")])
4497
4498 (define_insn "vsx_sign_extend_hi_<mode>"
4499   [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4500         (unspec:VSINT_84
4501          [(match_operand:V8HI 1 "vsx_register_operand" "v")]
4502          UNSPEC_VSX_SIGN_EXTEND))]
4503   "TARGET_P9_VECTOR"
4504   "vextsh2<wd> %0,%1"
4505   [(set_attr "type" "vecexts")])
4506
4507 (define_insn "*vsx_sign_extend_si_v2di"
4508   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
4509         (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
4510                      UNSPEC_VSX_SIGN_EXTEND))]
4511   "TARGET_P9_VECTOR"
4512   "vextsw2d %0,%1"
4513   [(set_attr "type" "vecexts")])
4514
4515 \f
4516 ;; ISA 3.0 Binary Floating-Point Support
4517
4518 ;; VSX Scalar Extract Exponent Quad-Precision
4519 (define_insn "xsxexpqp_<mode>"
4520   [(set (match_operand:DI 0 "altivec_register_operand" "=v")
4521         (unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4522          UNSPEC_VSX_SXEXPDP))]
4523   "TARGET_P9_VECTOR"
4524   "xsxexpqp %0,%1"
4525   [(set_attr "type" "vecmove")])
4526
4527 ;; VSX Scalar Extract Exponent Double-Precision
4528 (define_insn "xsxexpdp"
4529   [(set (match_operand:DI 0 "register_operand" "=r")
4530         (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4531          UNSPEC_VSX_SXEXPDP))]
4532   "TARGET_P9_VECTOR && TARGET_64BIT"
4533   "xsxexpdp %0,%x1"
4534   [(set_attr "type" "integer")])
4535
4536 ;; VSX Scalar Extract Significand Quad-Precision
4537 (define_insn "xsxsigqp_<mode>"
4538   [(set (match_operand:TI 0 "altivec_register_operand" "=v")
4539         (unspec:TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4540          UNSPEC_VSX_SXSIG))]
4541   "TARGET_P9_VECTOR"
4542   "xsxsigqp %0,%1"
4543   [(set_attr "type" "vecmove")])
4544
4545 ;; VSX Scalar Extract Significand Double-Precision
4546 (define_insn "xsxsigdp"
4547   [(set (match_operand:DI 0 "register_operand" "=r")
4548         (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4549          UNSPEC_VSX_SXSIG))]
4550   "TARGET_P9_VECTOR && TARGET_64BIT"
4551   "xsxsigdp %0,%x1"
4552   [(set_attr "type" "integer")])
4553
4554 ;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument
4555 (define_insn "xsiexpqpf_<mode>"
4556   [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4557         (unspec:IEEE128
4558          [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4559           (match_operand:DI 2 "altivec_register_operand" "v")]
4560          UNSPEC_VSX_SIEXPQP))]
4561   "TARGET_P9_VECTOR"
4562   "xsiexpqp %0,%1,%2"
4563   [(set_attr "type" "vecmove")])
4564
4565 ;; VSX Scalar Insert Exponent Quad-Precision
4566 (define_insn "xsiexpqp_<mode>"
4567   [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4568         (unspec:IEEE128 [(match_operand:TI 1 "altivec_register_operand" "v")
4569                          (match_operand:DI 2 "altivec_register_operand" "v")]
4570          UNSPEC_VSX_SIEXPQP))]
4571   "TARGET_P9_VECTOR"
4572   "xsiexpqp %0,%1,%2"
4573   [(set_attr "type" "vecmove")])
4574
4575 ;; VSX Scalar Insert Exponent Double-Precision
4576 (define_insn "xsiexpdp"
4577   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4578         (unspec:DF [(match_operand:DI 1 "register_operand" "r")
4579                     (match_operand:DI 2 "register_operand" "r")]
4580          UNSPEC_VSX_SIEXPDP))]
4581   "TARGET_P9_VECTOR && TARGET_64BIT"
4582   "xsiexpdp %x0,%1,%2"
4583   [(set_attr "type" "fpsimple")])
4584
4585 ;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument
4586 (define_insn "xsiexpdpf"
4587   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4588         (unspec:DF [(match_operand:DF 1 "register_operand" "r")
4589                     (match_operand:DI 2 "register_operand" "r")]
4590          UNSPEC_VSX_SIEXPDP))]
4591   "TARGET_P9_VECTOR && TARGET_64BIT"
4592   "xsiexpdp %x0,%1,%2"
4593   [(set_attr "type" "fpsimple")])
4594
4595 ;; VSX Scalar Compare Exponents Double-Precision
4596 (define_expand "xscmpexpdp_<code>"
4597   [(set (match_dup 3)
4598         (compare:CCFP
4599          (unspec:DF
4600           [(match_operand:DF 1 "vsx_register_operand" "wa")
4601            (match_operand:DF 2 "vsx_register_operand" "wa")]
4602           UNSPEC_VSX_SCMPEXPDP)
4603          (const_int 0)))
4604    (set (match_operand:SI 0 "register_operand" "=r")
4605         (CMP_TEST:SI (match_dup 3)
4606                      (const_int 0)))]
4607   "TARGET_P9_VECTOR"
4608 {
4609   if (<CODE> == UNORDERED && !HONOR_NANS (DFmode))
4610     {
4611       emit_move_insn (operands[0], const0_rtx);
4612       DONE;
4613     }
4614
4615   operands[3] = gen_reg_rtx (CCFPmode);
4616 })
4617
4618 (define_insn "*xscmpexpdp"
4619   [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
4620         (compare:CCFP
4621          (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa")
4622                      (match_operand:DF 2 "vsx_register_operand" "wa")]
4623           UNSPEC_VSX_SCMPEXPDP)
4624          (match_operand:SI 3 "zero_constant" "j")))]
4625   "TARGET_P9_VECTOR"
4626   "xscmpexpdp %0,%x1,%x2"
4627   [(set_attr "type" "fpcompare")])
4628
4629 ;; VSX Scalar Compare Exponents Quad-Precision
4630 (define_expand "xscmpexpqp_<code>_<mode>"
4631   [(set (match_dup 3)
4632         (compare:CCFP
4633          (unspec:IEEE128
4634           [(match_operand:IEEE128 1 "vsx_register_operand" "v")
4635            (match_operand:IEEE128 2 "vsx_register_operand" "v")]
4636           UNSPEC_VSX_SCMPEXPQP)
4637          (const_int 0)))
4638    (set (match_operand:SI 0 "register_operand" "=r")
4639         (CMP_TEST:SI (match_dup 3)
4640                      (const_int 0)))]
4641   "TARGET_P9_VECTOR"
4642 {
4643   if (<CODE> == UNORDERED && !HONOR_NANS (<MODE>mode))
4644     {
4645       emit_move_insn (operands[0], const0_rtx);
4646       DONE;
4647     }
4648
4649   operands[3] = gen_reg_rtx (CCFPmode);
4650 })
4651
4652 (define_insn "*xscmpexpqp"
4653   [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
4654         (compare:CCFP
4655          (unspec:IEEE128 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4656                           (match_operand:IEEE128 2 "altivec_register_operand" "v")]
4657           UNSPEC_VSX_SCMPEXPQP)
4658          (match_operand:SI 3 "zero_constant" "j")))]
4659   "TARGET_P9_VECTOR"
4660   "xscmpexpqp %0,%1,%2"
4661   [(set_attr "type" "fpcompare")])
4662
4663 ;; VSX Scalar Test Data Class Quad-Precision
4664 ;;  (Expansion for scalar_test_data_class (__ieee128, int))
4665 ;;   (Has side effect of setting the lt bit if operand 1 is negative,
4666 ;;    setting the eq bit if any of the conditions tested by operand 2
4667 ;;    are satisfied, and clearing the gt and undordered bits to zero.)
4668 (define_expand "xststdcqp_<mode>"
4669   [(set (match_dup 3)
4670         (compare:CCFP
4671          (unspec:IEEE128
4672           [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4673            (match_operand:SI 2 "u7bit_cint_operand" "n")]
4674           UNSPEC_VSX_STSTDC)
4675          (const_int 0)))
4676    (set (match_operand:SI 0 "register_operand" "=r")
4677         (eq:SI (match_dup 3)
4678                (const_int 0)))]
4679   "TARGET_P9_VECTOR"
4680 {
4681   operands[3] = gen_reg_rtx (CCFPmode);
4682 })
4683
4684 ;; VSX Scalar Test Data Class Double- and Single-Precision
4685 ;;  (The lt bit is set if operand 1 is negative.  The eq bit is set
4686 ;;   if any of the conditions tested by operand 2 are satisfied.
4687 ;;   The gt and unordered bits are cleared to zero.)
4688 (define_expand "xststdc<sd>p"
4689   [(set (match_dup 3)
4690         (compare:CCFP
4691          (unspec:SFDF
4692           [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4693            (match_operand:SI 2 "u7bit_cint_operand" "n")]
4694           UNSPEC_VSX_STSTDC)
4695          (match_dup 4)))
4696    (set (match_operand:SI 0 "register_operand" "=r")
4697         (eq:SI (match_dup 3)
4698                (const_int 0)))]
4699   "TARGET_P9_VECTOR"
4700 {
4701   operands[3] = gen_reg_rtx (CCFPmode);
4702   operands[4] = CONST0_RTX (SImode);
4703 })
4704
4705 ;; The VSX Scalar Test Negative Quad-Precision
4706 (define_expand "xststdcnegqp_<mode>"
4707   [(set (match_dup 2)
4708         (compare:CCFP
4709          (unspec:IEEE128
4710           [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4711            (const_int 0)]
4712           UNSPEC_VSX_STSTDC)
4713          (const_int 0)))
4714    (set (match_operand:SI 0 "register_operand" "=r")
4715         (lt:SI (match_dup 2)
4716                (const_int 0)))]
4717   "TARGET_P9_VECTOR"
4718 {
4719   operands[2] = gen_reg_rtx (CCFPmode);
4720 })
4721
4722 ;; The VSX Scalar Test Negative Double- and Single-Precision
4723 (define_expand "xststdcneg<sd>p"
4724   [(set (match_dup 2)
4725         (compare:CCFP
4726          (unspec:SFDF
4727           [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4728            (const_int 0)]
4729           UNSPEC_VSX_STSTDC)
4730          (match_dup 3)))
4731    (set (match_operand:SI 0 "register_operand" "=r")
4732         (lt:SI (match_dup 2)
4733                (const_int 0)))]
4734   "TARGET_P9_VECTOR"
4735 {
4736   operands[2] = gen_reg_rtx (CCFPmode);
4737   operands[3] = CONST0_RTX (SImode);
4738 })
4739
4740 (define_insn "*xststdcqp_<mode>"
4741   [(set (match_operand:CCFP 0 "" "=y")
4742         (compare:CCFP
4743          (unspec:IEEE128
4744           [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4745            (match_operand:SI 2 "u7bit_cint_operand" "n")]
4746           UNSPEC_VSX_STSTDC)
4747          (const_int 0)))]
4748   "TARGET_P9_VECTOR"
4749   "xststdcqp %0,%1,%2"
4750   [(set_attr "type" "fpcompare")])
4751
4752 (define_insn "*xststdc<sd>p"
4753   [(set (match_operand:CCFP 0 "" "=y")
4754         (compare:CCFP
4755          (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4756                        (match_operand:SI 2 "u7bit_cint_operand" "n")]
4757           UNSPEC_VSX_STSTDC)
4758          (match_operand:SI 3 "zero_constant" "j")))]
4759   "TARGET_P9_VECTOR"
4760   "xststdc<sd>p %0,%x1,%2"
4761   [(set_attr "type" "fpcompare")])
4762
4763 ;; VSX Vector Extract Exponent Double and Single Precision
4764 (define_insn "xvxexp<sd>p"
4765   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4766         (unspec:VSX_F
4767          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4768          UNSPEC_VSX_VXEXP))]
4769   "TARGET_P9_VECTOR"
4770   "xvxexp<sd>p %x0,%x1"
4771   [(set_attr "type" "vecsimple")])
4772
4773 ;; VSX Vector Extract Significand Double and Single Precision
4774 (define_insn "xvxsig<sd>p"
4775   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4776         (unspec:VSX_F
4777          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4778          UNSPEC_VSX_VXSIG))]
4779   "TARGET_P9_VECTOR"
4780   "xvxsig<sd>p %x0,%x1"
4781   [(set_attr "type" "vecsimple")])
4782
4783 ;; VSX Vector Insert Exponent Double and Single Precision
4784 (define_insn "xviexp<sd>p"
4785   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4786         (unspec:VSX_F
4787          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4788           (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
4789          UNSPEC_VSX_VIEXP))]
4790   "TARGET_P9_VECTOR"
4791   "xviexp<sd>p %x0,%x1,%x2"
4792   [(set_attr "type" "vecsimple")])
4793
4794 ;; VSX Vector Test Data Class Double and Single Precision
4795 ;; The corresponding elements of the result vector are all ones
4796 ;; if any of the conditions tested by operand 3 are satisfied.
4797 (define_insn "xvtstdc<sd>p"
4798   [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa")
4799         (unspec:<VSI>
4800          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4801           (match_operand:SI 2 "u7bit_cint_operand" "n")]
4802          UNSPEC_VSX_VTSTDC))]
4803   "TARGET_P9_VECTOR"
4804   "xvtstdc<sd>p %x0,%x1,%2"
4805   [(set_attr "type" "vecsimple")])
4806
4807 ;; ISA 3.0 String Operations Support
4808
4809 ;; Compare vectors producing a vector result and a predicate, setting CR6
4810 ;; to indicate a combined status.  This pattern matches v16qi, v8hi, and
4811 ;; v4si modes.  It does not match v2df, v4sf, or v2di modes.  There's no
4812 ;; need to match v4sf, v2df, or v2di modes because those are expanded
4813 ;; to use Power8 instructions.
4814 (define_insn "*vsx_ne_<mode>_p"
4815   [(set (reg:CC CR6_REGNO)
4816         (unspec:CC
4817          [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4818                  (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))]
4819          UNSPEC_PREDICATE))
4820    (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v")
4821         (ne:VSX_EXTRACT_I (match_dup 1)
4822                           (match_dup 2)))]
4823   "TARGET_P9_VECTOR"
4824   "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4825   [(set_attr "type" "vecsimple")])
4826
4827 (define_insn "*vector_nez_<mode>_p"
4828   [(set (reg:CC CR6_REGNO)
4829         (unspec:CC [(unspec:VI
4830                      [(match_operand:VI 1 "gpc_reg_operand" "v")
4831                       (match_operand:VI 2 "gpc_reg_operand" "v")]
4832                      UNSPEC_NEZ_P)]
4833          UNSPEC_PREDICATE))
4834    (set (match_operand:VI 0 "gpc_reg_operand" "=v")
4835         (unspec:VI [(match_dup 1)
4836                     (match_dup 2)]
4837          UNSPEC_NEZ_P))]
4838   "TARGET_P9_VECTOR"
4839   "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4840   [(set_attr "type" "vecsimple")])
4841
4842 ;; Return first position of match between vectors using natural order
4843 ;; for both LE and BE execution modes.
4844 (define_expand "first_match_index_<mode>"
4845   [(match_operand:SI 0 "register_operand")
4846    (unspec:SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4847                (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4848   UNSPEC_VSX_FIRST_MATCH_INDEX)]
4849   "TARGET_P9_VECTOR"
4850 {
4851   int sh;
4852
4853   rtx cmp_result = gen_reg_rtx (<MODE>mode);
4854   rtx not_result = gen_reg_rtx (<MODE>mode);
4855
4856   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4857                                             operands[2]));
4858   emit_insn (gen_one_cmpl<mode>2 (not_result, cmp_result));
4859
4860   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4861
4862   if (<MODE>mode == V16QImode)
4863     {
4864       if (!BYTES_BIG_ENDIAN)
4865         emit_insn (gen_vctzlsbb_<mode> (operands[0], not_result));
4866       else
4867         emit_insn (gen_vclzlsbb_<mode> (operands[0], not_result));
4868     }
4869   else
4870     {
4871       rtx tmp = gen_reg_rtx (SImode);
4872       if (!BYTES_BIG_ENDIAN)
4873         emit_insn (gen_vctzlsbb_<mode> (tmp, not_result));
4874       else
4875         emit_insn (gen_vclzlsbb_<mode> (tmp, not_result));
4876       emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4877     }
4878   DONE;
4879 })
4880
4881 ;; Return first position of match between vectors or end of string (EOS) using
4882 ;; natural element order for both LE and BE execution modes.
4883 (define_expand "first_match_or_eos_index_<mode>"
4884   [(match_operand:SI 0 "register_operand")
4885    (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4886    (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4887   UNSPEC_VSX_FIRST_MATCH_EOS_INDEX)]
4888   "TARGET_P9_VECTOR"
4889 {
4890   int sh;
4891   rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4892   rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4893   rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4894   rtx and_result = gen_reg_rtx (<MODE>mode);
4895   rtx result = gen_reg_rtx (<MODE>mode);
4896   rtx vzero = gen_reg_rtx (<MODE>mode);
4897
4898   /* Vector with zeros in elements that correspond to zeros in operands.  */
4899   emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4900   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4901   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4902   emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4903
4904   /* Vector with ones in elments that do not match.  */
4905   emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4906                                              operands[2]));
4907
4908   /* Create vector with ones in elements where there was a zero in one of
4909      the source elements or the elements that match.  */
4910   emit_insn (gen_nand<mode>3 (result, and_result, cmpz_result));
4911   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4912
4913   if (<MODE>mode == V16QImode)
4914     {
4915       if (!BYTES_BIG_ENDIAN)
4916         emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
4917       else
4918         emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
4919     }
4920   else
4921     {
4922       rtx tmp = gen_reg_rtx (SImode);
4923       if (!BYTES_BIG_ENDIAN)
4924         emit_insn (gen_vctzlsbb_<mode> (tmp, result));
4925       else
4926         emit_insn (gen_vclzlsbb_<mode> (tmp, result));
4927       emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4928     }
4929   DONE;
4930 })
4931
4932 ;; Return first position of mismatch between vectors using natural
4933 ;; element order for both LE and BE execution modes.
4934 (define_expand "first_mismatch_index_<mode>"
4935   [(match_operand:SI 0 "register_operand")
4936    (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4937    (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4938   UNSPEC_VSX_FIRST_MISMATCH_INDEX)]
4939   "TARGET_P9_VECTOR"
4940 {
4941   int sh;
4942   rtx cmp_result = gen_reg_rtx (<MODE>mode);
4943
4944   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4945                                             operands[2]));
4946   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4947
4948   if (<MODE>mode == V16QImode)
4949     {
4950       if (!BYTES_BIG_ENDIAN)
4951         emit_insn (gen_vctzlsbb_<mode> (operands[0], cmp_result));
4952       else
4953         emit_insn (gen_vclzlsbb_<mode> (operands[0], cmp_result));
4954     }
4955   else
4956     {
4957       rtx tmp = gen_reg_rtx (SImode);
4958       if (!BYTES_BIG_ENDIAN)
4959         emit_insn (gen_vctzlsbb_<mode> (tmp, cmp_result));
4960       else
4961         emit_insn (gen_vclzlsbb_<mode> (tmp, cmp_result));
4962       emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4963     }
4964   DONE;
4965 })
4966
4967 ;; Return first position of mismatch between vectors or end of string (EOS)
4968 ;; using natural element order for both LE and BE execution modes.
4969 (define_expand "first_mismatch_or_eos_index_<mode>"
4970   [(match_operand:SI 0 "register_operand")
4971    (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4972    (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4973   UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX)]
4974   "TARGET_P9_VECTOR"
4975 {
4976   int sh;
4977   rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4978   rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4979   rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4980   rtx not_cmpz_result = gen_reg_rtx (<MODE>mode);
4981   rtx and_result = gen_reg_rtx (<MODE>mode);
4982   rtx result = gen_reg_rtx (<MODE>mode);
4983   rtx vzero = gen_reg_rtx (<MODE>mode);
4984
4985   /* Vector with zeros in elements that correspond to zeros in operands.  */
4986   emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4987
4988   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4989   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4990   emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4991
4992   /* Vector with ones in elments that match.  */
4993   emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4994                                              operands[2]));
4995   emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result));
4996
4997   /* Create vector with ones in elements where there was a zero in one of
4998      the source elements or the elements did not match.  */
4999   emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result));
5000   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
5001
5002   if (<MODE>mode == V16QImode)
5003     {
5004       if (!BYTES_BIG_ENDIAN)
5005         emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
5006       else
5007         emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
5008     }
5009   else
5010     {
5011       rtx tmp = gen_reg_rtx (SImode);
5012       if (!BYTES_BIG_ENDIAN)
5013         emit_insn (gen_vctzlsbb_<mode> (tmp, result));
5014       else
5015         emit_insn (gen_vclzlsbb_<mode> (tmp, result));
5016       emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
5017     }
5018   DONE;
5019 })
5020
5021 ;; Load VSX Vector with Length
5022 (define_expand "lxvl"
5023   [(set (match_dup 3)
5024         (ashift:DI (match_operand:DI 2 "register_operand")
5025                    (const_int 56)))
5026    (set (match_operand:V16QI 0 "vsx_register_operand")
5027         (unspec:V16QI
5028          [(match_operand:DI 1 "gpc_reg_operand")
5029           (mem:V16QI (match_dup 1))
5030           (match_dup 3)]
5031          UNSPEC_LXVL))]
5032   "TARGET_P9_VECTOR && TARGET_64BIT"
5033 {
5034   operands[3] = gen_reg_rtx (DImode);
5035 })
5036
5037 (define_insn "*lxvl"
5038   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5039         (unspec:V16QI
5040          [(match_operand:DI 1 "gpc_reg_operand" "b")
5041           (mem:V16QI (match_dup 1))
5042           (match_operand:DI 2 "register_operand" "r")]
5043          UNSPEC_LXVL))]
5044   "TARGET_P9_VECTOR && TARGET_64BIT"
5045   "lxvl %x0,%1,%2"
5046   [(set_attr "type" "vecload")])
5047
5048 (define_insn "lxvll"
5049   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5050         (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b")
5051                        (mem:V16QI (match_dup 1))
5052                        (match_operand:DI 2 "register_operand" "r")]
5053                       UNSPEC_LXVLL))]
5054   "TARGET_P9_VECTOR"
5055   "lxvll %x0,%1,%2"
5056   [(set_attr "type" "vecload")])
5057
5058 ;; Expand for builtin xl_len_r
5059 (define_expand "xl_len_r"
5060   [(match_operand:V16QI 0 "vsx_register_operand")
5061    (match_operand:DI 1 "register_operand")
5062    (match_operand:DI 2 "register_operand")]
5063   ""
5064 {
5065   rtx shift_mask = gen_reg_rtx (V16QImode);
5066   rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5067   rtx tmp = gen_reg_rtx (DImode);
5068
5069   emit_insn (gen_altivec_lvsl_reg (shift_mask, operands[2]));
5070   emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5071   emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp));
5072   emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp,
5073              shift_mask));
5074   DONE;
5075 })
5076
5077 (define_insn "stxvll"
5078   [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5079         (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5080                        (mem:V16QI (match_dup 1))
5081                        (match_operand:DI 2 "register_operand" "r")]
5082                       UNSPEC_STXVLL))]
5083   "TARGET_P9_VECTOR"
5084   "stxvll %x0,%1,%2"
5085   [(set_attr "type" "vecstore")])
5086
5087 ;; Store VSX Vector with Length
5088 (define_expand "stxvl"
5089   [(set (match_dup 3)
5090         (ashift:DI (match_operand:DI 2 "register_operand")
5091                    (const_int 56)))
5092    (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand"))
5093         (unspec:V16QI
5094          [(match_operand:V16QI 0 "vsx_register_operand")
5095           (mem:V16QI (match_dup 1))
5096           (match_dup 3)]
5097          UNSPEC_STXVL))]
5098   "TARGET_P9_VECTOR && TARGET_64BIT"
5099 {
5100   operands[3] = gen_reg_rtx (DImode);
5101 })
5102
5103 (define_insn "*stxvl"
5104   [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5105         (unspec:V16QI
5106          [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5107           (mem:V16QI (match_dup 1))
5108           (match_operand:DI 2 "register_operand" "r")]
5109          UNSPEC_STXVL))]
5110   "TARGET_P9_VECTOR && TARGET_64BIT"
5111   "stxvl %x0,%1,%2"
5112   [(set_attr "type" "vecstore")])
5113
5114 ;; Expand for builtin xst_len_r
5115 (define_expand "xst_len_r"
5116   [(match_operand:V16QI 0 "vsx_register_operand" "=wa")
5117    (match_operand:DI 1 "register_operand" "b")
5118    (match_operand:DI 2 "register_operand" "r")]
5119   "UNSPEC_XST_LEN_R"
5120 {
5121   rtx shift_mask = gen_reg_rtx (V16QImode);
5122   rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5123   rtx tmp = gen_reg_rtx (DImode);
5124
5125   emit_insn (gen_altivec_lvsr_reg (shift_mask, operands[2]));
5126   emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0],
5127              shift_mask));
5128   emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5129   emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp));
5130   DONE;
5131 })
5132
5133 ;; Vector Compare Not Equal Byte (specified/not+eq:)
5134 (define_insn "vcmpneb"
5135   [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5136          (not:V16QI
5137            (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v")
5138                      (match_operand:V16QI 2 "altivec_register_operand" "v"))))]
5139   "TARGET_P9_VECTOR"
5140   "vcmpneb %0,%1,%2"
5141   [(set_attr "type" "vecsimple")])
5142
5143 ;; Vector Compare Not Equal or Zero Byte
5144 (define_insn "vcmpnezb"
5145   [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5146         (unspec:V16QI
5147          [(match_operand:V16QI 1 "altivec_register_operand" "v")
5148           (match_operand:V16QI 2 "altivec_register_operand" "v")]
5149          UNSPEC_VCMPNEZB))]
5150   "TARGET_P9_VECTOR"
5151   "vcmpnezb %0,%1,%2"
5152   [(set_attr "type" "vecsimple")])
5153
5154 ;; Vector Compare Not Equal or Zero Byte predicate or record-form
5155 (define_insn "vcmpnezb_p"
5156   [(set (reg:CC CR6_REGNO)
5157         (unspec:CC
5158          [(match_operand:V16QI 1 "altivec_register_operand" "v")
5159           (match_operand:V16QI 2 "altivec_register_operand" "v")]
5160          UNSPEC_VCMPNEZB))
5161    (set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5162         (unspec:V16QI
5163          [(match_dup 1)
5164           (match_dup 2)]
5165          UNSPEC_VCMPNEZB))]
5166   "TARGET_P9_VECTOR"
5167   "vcmpnezb. %0,%1,%2"
5168   [(set_attr "type" "vecsimple")])
5169
5170 ;; Vector Compare Not Equal Half Word (specified/not+eq:)
5171 (define_insn "vcmpneh"
5172   [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5173         (not:V8HI
5174           (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v")
5175                    (match_operand:V8HI 2 "altivec_register_operand" "v"))))]
5176   "TARGET_P9_VECTOR"
5177   "vcmpneh %0,%1,%2"
5178   [(set_attr "type" "vecsimple")])
5179
5180 ;; Vector Compare Not Equal or Zero Half Word
5181 (define_insn "vcmpnezh"
5182   [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5183         (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
5184                       (match_operand:V8HI 2 "altivec_register_operand" "v")]
5185          UNSPEC_VCMPNEZH))]
5186   "TARGET_P9_VECTOR"
5187   "vcmpnezh %0,%1,%2"
5188   [(set_attr "type" "vecsimple")])
5189
5190 ;; Vector Compare Not Equal Word (specified/not+eq:)
5191 (define_insn "vcmpnew"
5192   [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5193         (not:V4SI
5194           (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v")
5195                    (match_operand:V4SI 2 "altivec_register_operand" "v"))))]
5196   "TARGET_P9_VECTOR"
5197   "vcmpnew %0,%1,%2"
5198   [(set_attr "type" "vecsimple")])
5199
5200 ;; Vector Compare Not Equal or Zero Word
5201 (define_insn "vcmpnezw"
5202   [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5203         (unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v")
5204                       (match_operand:V4SI 2 "altivec_register_operand" "v")]
5205          UNSPEC_VCMPNEZW))]
5206   "TARGET_P9_VECTOR"
5207   "vcmpnezw %0,%1,%2"
5208   [(set_attr "type" "vecsimple")])
5209
5210 ;; Vector Count Leading Zero Least-Significant Bits Byte
5211 (define_insn "vclzlsbb_<mode>"
5212   [(set (match_operand:SI 0 "register_operand" "=r")
5213         (unspec:SI
5214          [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5215          UNSPEC_VCLZLSBB))]
5216   "TARGET_P9_VECTOR"
5217   "vclzlsbb %0,%1"
5218   [(set_attr "type" "vecsimple")])
5219
5220 ;; Vector Count Trailing Zero Least-Significant Bits Byte
5221 (define_insn "vctzlsbb_<mode>"
5222   [(set (match_operand:SI 0 "register_operand" "=r")
5223         (unspec:SI
5224          [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5225          UNSPEC_VCTZLSBB))]
5226   "TARGET_P9_VECTOR"
5227   "vctzlsbb %0,%1"
5228   [(set_attr "type" "vecsimple")])
5229
5230 ;; Vector Extract Unsigned Byte Left-Indexed
5231 (define_insn "vextublx"
5232   [(set (match_operand:SI 0 "register_operand" "=r")
5233         (unspec:SI
5234          [(match_operand:SI 1 "register_operand" "r")
5235           (match_operand:V16QI 2 "altivec_register_operand" "v")]
5236          UNSPEC_VEXTUBLX))]
5237   "TARGET_P9_VECTOR"
5238   "vextublx %0,%1,%2"
5239   [(set_attr "type" "vecsimple")])
5240
5241 ;; Vector Extract Unsigned Byte Right-Indexed
5242 (define_insn "vextubrx"
5243   [(set (match_operand:SI 0 "register_operand" "=r")
5244         (unspec:SI
5245          [(match_operand:SI 1 "register_operand" "r")
5246           (match_operand:V16QI 2 "altivec_register_operand" "v")]
5247          UNSPEC_VEXTUBRX))]
5248   "TARGET_P9_VECTOR"
5249   "vextubrx %0,%1,%2"
5250   [(set_attr "type" "vecsimple")])
5251
5252 ;; Vector Extract Unsigned Half Word Left-Indexed
5253 (define_insn "vextuhlx"
5254   [(set (match_operand:SI 0 "register_operand" "=r")
5255         (unspec:SI
5256          [(match_operand:SI 1 "register_operand" "r")
5257           (match_operand:V8HI 2 "altivec_register_operand" "v")]
5258          UNSPEC_VEXTUHLX))]
5259   "TARGET_P9_VECTOR"
5260   "vextuhlx %0,%1,%2"
5261   [(set_attr "type" "vecsimple")])
5262
5263 ;; Vector Extract Unsigned Half Word Right-Indexed
5264 (define_insn "vextuhrx"
5265   [(set (match_operand:SI 0 "register_operand" "=r")
5266         (unspec:SI
5267          [(match_operand:SI 1 "register_operand" "r")
5268           (match_operand:V8HI 2 "altivec_register_operand" "v")]
5269          UNSPEC_VEXTUHRX))]
5270   "TARGET_P9_VECTOR"
5271   "vextuhrx %0,%1,%2"
5272   [(set_attr "type" "vecsimple")])
5273
5274 ;; Vector Extract Unsigned Word Left-Indexed
5275 (define_insn "vextuwlx"
5276   [(set (match_operand:SI 0 "register_operand" "=r")
5277         (unspec:SI
5278          [(match_operand:SI 1 "register_operand" "r")
5279           (match_operand:V4SI 2 "altivec_register_operand" "v")]
5280          UNSPEC_VEXTUWLX))]
5281   "TARGET_P9_VECTOR"
5282   "vextuwlx %0,%1,%2"
5283   [(set_attr "type" "vecsimple")])
5284
5285 ;; Vector Extract Unsigned Word Right-Indexed
5286 (define_insn "vextuwrx"
5287   [(set (match_operand:SI 0 "register_operand" "=r")
5288         (unspec:SI
5289          [(match_operand:SI 1 "register_operand" "r")
5290           (match_operand:V4SI 2 "altivec_register_operand" "v")]
5291          UNSPEC_VEXTUWRX))]
5292   "TARGET_P9_VECTOR"
5293   "vextuwrx %0,%1,%2"
5294   [(set_attr "type" "vecsimple")])
5295
5296 ;; Vector insert/extract word at arbitrary byte values.  Note, the little
5297 ;; endian version needs to adjust the byte number, and the V4SI element in
5298 ;; vinsert4b.
5299 (define_insn "extract4b"
5300   [(set (match_operand:V2DI 0 "vsx_register_operand")
5301        (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
5302                      (match_operand:QI 2 "const_0_to_12_operand" "n")]
5303                     UNSPEC_XXEXTRACTUW))]
5304   "TARGET_P9_VECTOR"
5305 {
5306   if (!BYTES_BIG_ENDIAN)
5307     operands[2] = GEN_INT (12 - INTVAL (operands[2]));
5308
5309   return "xxextractuw %x0,%x1,%2";
5310 })
5311
5312 (define_expand "insert4b"
5313   [(set (match_operand:V16QI 0 "vsx_register_operand")
5314         (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")
5315                        (match_operand:V16QI 2 "vsx_register_operand")
5316                        (match_operand:QI 3 "const_0_to_12_operand")]
5317                    UNSPEC_XXINSERTW))]
5318   "TARGET_P9_VECTOR"
5319 {
5320   if (!BYTES_BIG_ENDIAN)
5321     {
5322       rtx op1 = operands[1];
5323       rtx v4si_tmp = gen_reg_rtx (V4SImode);
5324       emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));
5325       operands[1] = v4si_tmp;
5326       operands[3] = GEN_INT (12 - INTVAL (operands[3]));
5327     }
5328 })
5329
5330 (define_insn "*insert4b_internal"
5331   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5332         (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")
5333                        (match_operand:V16QI 2 "vsx_register_operand" "0")
5334                        (match_operand:QI 3 "const_0_to_12_operand" "n")]
5335                    UNSPEC_XXINSERTW))]
5336   "TARGET_P9_VECTOR"
5337   "xxinsertw %x0,%x1,%3"
5338   [(set_attr "type" "vecperm")])
5339
5340
5341 ;; Generate vector extract four float 32 values from left four elements
5342 ;; of eight element vector of float 16 values.
5343 (define_expand "vextract_fp_from_shorth"
5344   [(set (match_operand:V4SF 0 "register_operand" "=wa")
5345         (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5346    UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))]
5347   "TARGET_P9_VECTOR"
5348 {
5349   int i;
5350   int vals_le[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5351   int vals_be[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5352
5353   rtx rvals[16];
5354   rtx mask = gen_reg_rtx (V16QImode);
5355   rtx tmp = gen_reg_rtx (V16QImode);
5356   rtvec v;
5357
5358   for (i = 0; i < 16; i++)
5359     if (!BYTES_BIG_ENDIAN)
5360       rvals[i] = GEN_INT (vals_le[i]);
5361     else
5362       rvals[i] = GEN_INT (vals_be[i]);
5363
5364   /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5365      inputs in half words 1,3,5,7 (IBM numbering).  Use xxperm to move
5366      src half words 0,1,2,3 (LE), src half words 4,5,6,7 (BE) for the
5367      conversion instruction.  */
5368   v = gen_rtvec_v (16, rvals);
5369   emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5370   emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5371                                           operands[1], mask));
5372   emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5373   DONE;
5374 })
5375
5376 ;; Generate vector extract four float 32 values from right four elements
5377 ;; of eight element vector of float 16 values.
5378 (define_expand "vextract_fp_from_shortl"
5379   [(set (match_operand:V4SF 0 "register_operand" "=wa")
5380         (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5381         UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))]
5382   "TARGET_P9_VECTOR"
5383 {
5384   int vals_le[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5385   int vals_be[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5386
5387   int i;
5388   rtx rvals[16];
5389   rtx mask = gen_reg_rtx (V16QImode);
5390   rtx tmp = gen_reg_rtx (V16QImode);
5391   rtvec v;
5392
5393   for (i = 0; i < 16; i++)
5394     if (!BYTES_BIG_ENDIAN)
5395       rvals[i] = GEN_INT (vals_le[i]);
5396     else
5397       rvals[i] = GEN_INT (vals_be[i]);
5398
5399   /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5400      inputs in half words 1,3,5,7 (IBM numbering).  Use xxperm to move
5401      src half words 4,5,6,7 (LE), src half words 0,1,2,3 (BE) for the
5402      conversion instruction.  */
5403   v = gen_rtvec_v (16, rvals);
5404   emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5405   emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5406                                           operands[1], mask));
5407   emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5408   DONE;
5409 })
5410
5411 ;; Support for ISA 3.0 vector byte reverse
5412
5413 ;; Swap all bytes with in a vector
5414 (define_insn "p9_xxbrq_v1ti"
5415   [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
5416         (bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))]
5417   "TARGET_P9_VECTOR"
5418   "xxbrq %x0,%x1"
5419   [(set_attr "type" "vecperm")])
5420
5421 (define_expand "p9_xxbrq_v16qi"
5422   [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa"))
5423    (use (match_operand:V16QI 1 "vsx_register_operand" "wa"))]
5424   "TARGET_P9_VECTOR"
5425 {
5426   rtx op0 = gen_reg_rtx (V1TImode);
5427   rtx op1 = gen_lowpart (V1TImode, operands[1]);
5428   emit_insn (gen_p9_xxbrq_v1ti (op0, op1));
5429   emit_move_insn (operands[0], gen_lowpart (V16QImode, op0));
5430   DONE;
5431 })
5432
5433 ;; Swap all bytes in each 64-bit element
5434 (define_insn "p9_xxbrd_v2di"
5435   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
5436         (bswap:V2DI (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
5437   "TARGET_P9_VECTOR"
5438   "xxbrd %x0,%x1"
5439   [(set_attr "type" "vecperm")])
5440
5441 (define_expand "p9_xxbrd_v2df"
5442   [(use (match_operand:V2DF 0 "vsx_register_operand" "=wa"))
5443    (use (match_operand:V2DF 1 "vsx_register_operand" "wa"))]
5444   "TARGET_P9_VECTOR"
5445 {
5446   rtx op0 = gen_reg_rtx (V2DImode);
5447   rtx op1 = gen_lowpart (V2DImode, operands[1]);
5448   emit_insn (gen_p9_xxbrd_v2di (op0, op1));
5449   emit_move_insn (operands[0], gen_lowpart (V2DFmode, op0));
5450   DONE;
5451 })
5452
5453 ;; Swap all bytes in each 32-bit element
5454 (define_insn "p9_xxbrw_v4si"
5455   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
5456         (bswap:V4SI (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
5457   "TARGET_P9_VECTOR"
5458   "xxbrw %x0,%x1"
5459   [(set_attr "type" "vecperm")])
5460
5461 (define_expand "p9_xxbrw_v4sf"
5462   [(use (match_operand:V4SF 0 "vsx_register_operand" "=wa"))
5463    (use (match_operand:V4SF 1 "vsx_register_operand" "wa"))]
5464   "TARGET_P9_VECTOR"
5465 {
5466   rtx op0 = gen_reg_rtx (V4SImode);
5467   rtx op1 = gen_lowpart (V4SImode, operands[1]);
5468   emit_insn (gen_p9_xxbrw_v4si (op0, op1));
5469   emit_move_insn (operands[0], gen_lowpart (V4SFmode, op0));
5470   DONE;
5471 })
5472
5473 ;; Swap all bytes in each element of vector
5474 (define_expand "revb_<mode>"
5475   [(use (match_operand:VEC_REVB 0 "vsx_register_operand"))
5476    (use (match_operand:VEC_REVB 1 "vsx_register_operand"))]
5477   ""
5478 {
5479   if (TARGET_P9_VECTOR)
5480     emit_insn (gen_p9_xxbr<VSX_XXBR>_<mode> (operands[0], operands[1]));
5481   else
5482     {
5483       /* Want to have the elements in reverse order relative
5484          to the endian mode in use, i.e. in LE mode, put elements
5485          in BE order.  */
5486       rtx sel = swap_endian_selector_for_mode(<MODE>mode);
5487       emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1],
5488                                            operands[1], sel));
5489     }
5490
5491   DONE;
5492 })
5493
5494 ;; Reversing bytes in vector char is just a NOP.
5495 (define_expand "revb_v16qi"
5496   [(set (match_operand:V16QI 0 "vsx_register_operand")
5497         (bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))]
5498   ""
5499 {
5500   emit_move_insn (operands[0], operands[1]);
5501   DONE;
5502 })
5503
5504 ;; Swap all bytes in each 16-bit element
5505 (define_insn "p9_xxbrh_v8hi"
5506   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
5507         (bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))]
5508   "TARGET_P9_VECTOR"
5509   "xxbrh %x0,%x1"
5510   [(set_attr "type" "vecperm")])
5511 \f
5512
5513 ;; Operand numbers for the following peephole2
5514 (define_constants
5515   [(SFBOOL_TMP_GPR               0)             ;; GPR temporary
5516    (SFBOOL_TMP_VSX               1)             ;; vector temporary
5517    (SFBOOL_MFVSR_D               2)             ;; move to gpr dest
5518    (SFBOOL_MFVSR_A               3)             ;; move to gpr src
5519    (SFBOOL_BOOL_D                4)             ;; and/ior/xor dest
5520    (SFBOOL_BOOL_A1               5)             ;; and/ior/xor arg1
5521    (SFBOOL_BOOL_A2               6)             ;; and/ior/xor arg1
5522    (SFBOOL_SHL_D                 7)             ;; shift left dest
5523    (SFBOOL_SHL_A                 8)             ;; shift left arg
5524    (SFBOOL_MTVSR_D               9)             ;; move to vecter dest
5525    (SFBOOL_MFVSR_A_V4SF         10)             ;; SFBOOL_MFVSR_A as V4SFmode
5526    (SFBOOL_BOOL_A_DI            11)             ;; SFBOOL_BOOL_A1/A2 as DImode
5527    (SFBOOL_TMP_VSX_DI           12)             ;; SFBOOL_TMP_VSX as DImode
5528    (SFBOOL_MTVSR_D_V4SF         13)])           ;; SFBOOL_MTVSRD_D as V4SFmode
5529
5530 ;; Attempt to optimize some common GLIBC operations using logical operations to
5531 ;; pick apart SFmode operations.  For example, there is code from e_powf.c
5532 ;; after macro expansion that looks like:
5533 ;;
5534 ;;      typedef union {
5535 ;;        float value;
5536 ;;        uint32_t word;
5537 ;;      } ieee_float_shape_type;
5538 ;;
5539 ;;      float t1;
5540 ;;      int32_t is;
5541 ;;
5542 ;;      do {
5543 ;;        ieee_float_shape_type gf_u;
5544 ;;        gf_u.value = (t1);
5545 ;;        (is) = gf_u.word;
5546 ;;      } while (0);
5547 ;;
5548 ;;      do {
5549 ;;        ieee_float_shape_type sf_u;
5550 ;;        sf_u.word = (is & 0xfffff000);
5551 ;;        (t1) = sf_u.value;
5552 ;;      } while (0);
5553 ;;
5554 ;;
5555 ;; This would result in two direct move operations (convert to memory format,
5556 ;; direct move to GPR, do the AND operation, direct move to VSX, convert to
5557 ;; scalar format).  With this peephole, we eliminate the direct move to the
5558 ;; GPR, and instead move the integer mask value to the vector register after a
5559 ;; shift and do the VSX logical operation.
5560
5561 ;; The insns for dealing with SFmode in GPR registers looks like:
5562 ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
5563 ;;
5564 ;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
5565 ;;
5566 ;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
5567 ;;
5568 ;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
5569 ;;
5570 ;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
5571 ;;
5572 ;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
5573
5574 (define_peephole2
5575   [(match_scratch:DI SFBOOL_TMP_GPR "r")
5576    (match_scratch:V4SF SFBOOL_TMP_VSX "wa")
5577
5578    ;; MFVSRWZ (aka zero_extend)
5579    (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand")
5580         (zero_extend:DI
5581          (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand")))
5582
5583    ;; AND/IOR/XOR operation on int
5584    (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand")
5585         (and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand")
5586                         (match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand")))
5587
5588    ;; SLDI
5589    (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand")
5590         (ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand")
5591                    (const_int 32)))
5592
5593    ;; MTVSRD
5594    (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand")
5595         (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))]
5596
5597   "TARGET_POWERPC64 && TARGET_DIRECT_MOVE
5598    /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO
5599       to compare registers, when the mode is different.  */
5600    && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D])
5601    && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D])
5602    && REG_P (operands[SFBOOL_SHL_A])   && REG_P (operands[SFBOOL_MTVSR_D])
5603    && (REG_P (operands[SFBOOL_BOOL_A2])
5604        || CONST_INT_P (operands[SFBOOL_BOOL_A2]))
5605    && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])
5606        || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D]))
5607    && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])
5608        || (REG_P (operands[SFBOOL_BOOL_A2])
5609            && REGNO (operands[SFBOOL_MFVSR_D])
5610                 == REGNO (operands[SFBOOL_BOOL_A2])))
5611    && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])
5612    && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])
5613        || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D]))
5614    && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])"
5615   [(set (match_dup SFBOOL_TMP_GPR)
5616         (ashift:DI (match_dup SFBOOL_BOOL_A_DI)
5617                    (const_int 32)))
5618
5619    (set (match_dup SFBOOL_TMP_VSX_DI)
5620         (match_dup SFBOOL_TMP_GPR))
5621
5622    (set (match_dup SFBOOL_MTVSR_D_V4SF)
5623         (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF)
5624                           (match_dup SFBOOL_TMP_VSX)))]
5625 {
5626   rtx bool_a1 = operands[SFBOOL_BOOL_A1];
5627   rtx bool_a2 = operands[SFBOOL_BOOL_A2];
5628   int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]);
5629   int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]);
5630   int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]);
5631   int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]);
5632
5633   if (CONST_INT_P (bool_a2))
5634     {
5635       rtx tmp_gpr = operands[SFBOOL_TMP_GPR];
5636       emit_move_insn (tmp_gpr, bool_a2);
5637       operands[SFBOOL_BOOL_A_DI] = tmp_gpr;
5638     }
5639   else
5640     {
5641       int regno_bool_a1 = REGNO (bool_a1);
5642       int regno_bool_a2 = REGNO (bool_a2);
5643       int regno_bool_a = (regno_mfvsr_d == regno_bool_a1
5644                           ? regno_bool_a2 : regno_bool_a1);
5645       operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a);
5646     }
5647
5648   operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a);
5649   operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx);
5650   operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
5651 })
5652
5653 ;; Support signed/unsigned long long to float conversion vectorization.
5654 ;; Note that any_float (pc) here is just for code attribute <su>.
5655 (define_expand "vec_pack<su>_float_v2di"
5656   [(match_operand:V4SF 0 "vfloat_operand")
5657    (match_operand:V2DI 1 "vint_operand")
5658    (match_operand:V2DI 2 "vint_operand")
5659    (any_float (pc))]
5660   "TARGET_VSX"
5661 {
5662   rtx r1 = gen_reg_rtx (V4SFmode);
5663   rtx r2 = gen_reg_rtx (V4SFmode);
5664   emit_insn (gen_vsx_xvcv<su>xdsp (r1, operands[1]));
5665   emit_insn (gen_vsx_xvcv<su>xdsp (r2, operands[2]));
5666   rs6000_expand_extract_even (operands[0], r1, r2);
5667   DONE;
5668 })
5669
5670 ;; Support float to signed/unsigned long long conversion vectorization.
5671 ;; Note that any_fix (pc) here is just for code attribute <su>.
5672 (define_expand "vec_unpack_<su>fix_trunc_hi_v4sf"
5673   [(match_operand:V2DI 0 "vint_operand")
5674    (match_operand:V4SF 1 "vfloat_operand")
5675    (any_fix (pc))]
5676   "TARGET_VSX"
5677 {
5678   rtx reg = gen_reg_rtx (V4SFmode);
5679   rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN);
5680   emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg));
5681   DONE;
5682 })
5683
5684 ;; Note that any_fix (pc) here is just for code attribute <su>.
5685 (define_expand "vec_unpack_<su>fix_trunc_lo_v4sf"
5686   [(match_operand:V2DI 0 "vint_operand")
5687    (match_operand:V4SF 1 "vfloat_operand")
5688    (any_fix (pc))]
5689   "TARGET_VSX"
5690 {
5691   rtx reg = gen_reg_rtx (V4SFmode);
5692   rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN);
5693   emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg));
5694   DONE;
5695 })
5696
5697 (define_insn "vsx_<xvcvbf16>"
5698   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5699         (unspec:V16QI [(match_operand:V16QI 1 "vsx_register_operand" "wa")]
5700                       XVCVBF16))]
5701   "TARGET_POWER10"
5702   "<xvcvbf16> %x0,%x1"
5703   [(set_attr "type" "vecfloat")])