]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/rs6000/vsx.md
[PATCH, PR target/94954] Fix wrong codegen for vec_pack_to_short_fp32() builtin
[thirdparty/gcc.git] / gcc / config / rs6000 / vsx.md
1 ;; VSX patterns.
2 ;; Copyright (C) 2009-2020 Free Software Foundation, Inc.
3 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
4
5 ;; This file is part of GCC.
6
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published
9 ;; by the Free Software Foundation; either version 3, or (at your
10 ;; option) any later version.
11
12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 ;; License for more details.
16
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21 ;; Iterator for comparison types
22 (define_code_iterator CMP_TEST [eq lt gt unordered])
23
24 ;; Mode attribute for vector floate and floato conversions
25 (define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")])
26
27 ;; Iterator for both scalar and vector floating point types supported by VSX
28 (define_mode_iterator VSX_B [DF V4SF V2DF])
29
30 ;; Iterator for the 2 64-bit vector types
31 (define_mode_iterator VSX_D [V2DF V2DI])
32
33 ;; Mode iterator to handle swapping words on little endian for the 128-bit
34 ;; types that goes in a single vector register.
35 (define_mode_iterator VSX_LE_128 [(KF "FLOAT128_VECTOR_P (KFmode)")
36 (TF "FLOAT128_VECTOR_P (TFmode)")
37 TI
38 V1TI])
39
40 ;; Iterator for 128-bit integer types that go in a single vector register.
41 (define_mode_iterator VSX_TI [TI V1TI])
42
43 ;; Iterator for the 2 32-bit vector types
44 (define_mode_iterator VSX_W [V4SF V4SI])
45
46 ;; Iterator for the DF types
47 (define_mode_iterator VSX_DF [V2DF DF])
48
49 ;; Iterator for vector floating point types supported by VSX
50 (define_mode_iterator VSX_F [V4SF V2DF])
51
52 ;; Iterator for logical types supported by VSX
53 (define_mode_iterator VSX_L [V16QI
54 V8HI
55 V4SI
56 V2DI
57 V4SF
58 V2DF
59 V1TI
60 TI
61 (KF "FLOAT128_VECTOR_P (KFmode)")
62 (TF "FLOAT128_VECTOR_P (TFmode)")])
63
64 ;; Iterator for memory moves.
65 (define_mode_iterator VSX_M [V16QI
66 V8HI
67 V4SI
68 V2DI
69 V4SF
70 V2DF
71 V1TI
72 (KF "FLOAT128_VECTOR_P (KFmode)")
73 (TF "FLOAT128_VECTOR_P (TFmode)")
74 TI])
75
76 (define_mode_attr VSX_XXBR [(V8HI "h")
77 (V4SI "w")
78 (V4SF "w")
79 (V2DF "d")
80 (V2DI "d")
81 (V1TI "q")])
82
83 ;; Map into the appropriate load/store name based on the type
84 (define_mode_attr VSm [(V16QI "vw4")
85 (V8HI "vw4")
86 (V4SI "vw4")
87 (V4SF "vw4")
88 (V2DF "vd2")
89 (V2DI "vd2")
90 (DF "d")
91 (TF "vd2")
92 (KF "vd2")
93 (V1TI "vd2")
94 (TI "vd2")])
95
96 ;; Map the register class used
97 (define_mode_attr VSr [(V16QI "v")
98 (V8HI "v")
99 (V4SI "v")
100 (V4SF "wa")
101 (V2DI "wa")
102 (V2DF "wa")
103 (DI "wa")
104 (DF "wa")
105 (SF "wa")
106 (TF "wa")
107 (KF "wa")
108 (V1TI "v")
109 (TI "wa")])
110
111 ;; What value we need in the "isa" field, to make the IEEE QP float work.
112 (define_mode_attr VSisa [(V16QI "*")
113 (V8HI "*")
114 (V4SI "*")
115 (V4SF "*")
116 (V2DI "*")
117 (V2DF "*")
118 (DI "*")
119 (DF "*")
120 (SF "*")
121 (V1TI "*")
122 (TI "*")
123 (TF "p9tf")
124 (KF "p9kf")])
125
126 ;; A mode attribute to disparage use of GPR registers, except for scalar
127 ;; integer modes.
128 (define_mode_attr ??r [(V16QI "??r")
129 (V8HI "??r")
130 (V4SI "??r")
131 (V4SF "??r")
132 (V2DI "??r")
133 (V2DF "??r")
134 (V1TI "??r")
135 (KF "??r")
136 (TF "??r")
137 (TI "r")])
138
139 ;; A mode attribute used for 128-bit constant values.
140 (define_mode_attr nW [(V16QI "W")
141 (V8HI "W")
142 (V4SI "W")
143 (V4SF "W")
144 (V2DI "W")
145 (V2DF "W")
146 (V1TI "W")
147 (KF "W")
148 (TF "W")
149 (TI "n")])
150
151 ;; Same size integer type for floating point data
152 (define_mode_attr VSi [(V4SF "v4si")
153 (V2DF "v2di")
154 (DF "di")])
155
156 (define_mode_attr VSI [(V4SF "V4SI")
157 (V2DF "V2DI")
158 (DF "DI")])
159
160 ;; Word size for same size conversion
161 (define_mode_attr VSc [(V4SF "w")
162 (V2DF "d")
163 (DF "d")])
164
165 ;; Map into either s or v, depending on whether this is a scalar or vector
166 ;; operation
167 (define_mode_attr VSv [(V16QI "v")
168 (V8HI "v")
169 (V4SI "v")
170 (V4SF "v")
171 (V2DI "v")
172 (V2DF "v")
173 (V1TI "v")
174 (DF "s")
175 (KF "v")])
176
177 ;; Appropriate type for add ops (and other simple FP ops)
178 (define_mode_attr VStype_simple [(V2DF "vecdouble")
179 (V4SF "vecfloat")
180 (DF "fp")])
181
182 ;; Appropriate type for multiply ops
183 (define_mode_attr VStype_mul [(V2DF "vecdouble")
184 (V4SF "vecfloat")
185 (DF "dmul")])
186
187 ;; Appropriate type for divide ops.
188 (define_mode_attr VStype_div [(V2DF "vecdiv")
189 (V4SF "vecfdiv")
190 (DF "ddiv")])
191
192 ;; Map the scalar mode for a vector type
193 (define_mode_attr VS_scalar [(V1TI "TI")
194 (V2DF "DF")
195 (V2DI "DI")
196 (V4SF "SF")
197 (V4SI "SI")
198 (V8HI "HI")
199 (V16QI "QI")])
200
201 ;; Map to a double-sized vector mode
202 (define_mode_attr VS_double [(V4SI "V8SI")
203 (V4SF "V8SF")
204 (V2DI "V4DI")
205 (V2DF "V4DF")
206 (V1TI "V2TI")])
207
208 ;; Iterators for loading constants with xxspltib
209 (define_mode_iterator VSINT_84 [V4SI V2DI DI SI])
210 (define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
211
212 ;; Vector reverse byte modes
213 (define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI])
214
215 ;; Iterator for ISA 3.0 vector extract/insert of small integer vectors.
216 ;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be
217 ;; done on ISA 2.07 and not just ISA 3.0.
218 (define_mode_iterator VSX_EXTRACT_I [V16QI V8HI V4SI])
219 (define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI])
220 (define_mode_iterator VSX_EXTRACT_I4 [V16QI V8HI V4SI V2DI])
221
222 (define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b")
223 (V8HI "h")
224 (V4SI "w")])
225
226 ;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and
227 ;; insert to validate the operand number.
228 (define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand")
229 (V8HI "const_0_to_7_operand")
230 (V4SI "const_0_to_3_operand")])
231
232 ;; Mode attribute to give the constraint for vector extract and insert
233 ;; operations.
234 (define_mode_attr VSX_EX [(V16QI "v")
235 (V8HI "v")
236 (V4SI "wa")])
237
238 ;; Mode iterator for binary floating types other than double to
239 ;; optimize convert to that floating point type from an extract
240 ;; of an integer type
241 (define_mode_iterator VSX_EXTRACT_FL [SF
242 (IF "FLOAT128_2REG_P (IFmode)")
243 (KF "TARGET_FLOAT128_HW")
244 (TF "FLOAT128_2REG_P (TFmode)
245 || (FLOAT128_IEEE_P (TFmode)
246 && TARGET_FLOAT128_HW)")])
247
248 ;; Mode iterator for binary floating types that have a direct conversion
249 ;; from 64-bit integer to floating point
250 (define_mode_iterator FL_CONV [SF
251 DF
252 (KF "TARGET_FLOAT128_HW")
253 (TF "TARGET_FLOAT128_HW
254 && FLOAT128_IEEE_P (TFmode)")])
255
256 ;; Iterator for the 2 short vector types to do a splat from an integer
257 (define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
258
259 ;; Mode attribute to give the count for the splat instruction to splat
260 ;; the value in the 64-bit integer slot
261 (define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")])
262
263 ;; Mode attribute to give the suffix for the splat instruction
264 (define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")])
265
266 ;; Constants for creating unspecs
267 (define_c_enum "unspec"
268 [UNSPEC_VSX_CONCAT
269 UNSPEC_VSX_CVDPSXWS
270 UNSPEC_VSX_CVDPUXWS
271 UNSPEC_VSX_CVSPDP
272 UNSPEC_VSX_CVHPSP
273 UNSPEC_VSX_CVSPDPN
274 UNSPEC_VSX_CVDPSPN
275 UNSPEC_VSX_CVSXWDP
276 UNSPEC_VSX_CVUXWDP
277 UNSPEC_VSX_CVSXDSP
278 UNSPEC_VSX_CVUXDSP
279 UNSPEC_VSX_FLOAT2
280 UNSPEC_VSX_UNS_FLOAT2
281 UNSPEC_VSX_FLOATE
282 UNSPEC_VSX_UNS_FLOATE
283 UNSPEC_VSX_FLOATO
284 UNSPEC_VSX_UNS_FLOATO
285 UNSPEC_VSX_TDIV
286 UNSPEC_VSX_TSQRT
287 UNSPEC_VSX_SET
288 UNSPEC_VSX_ROUND_I
289 UNSPEC_VSX_ROUND_IC
290 UNSPEC_VSX_SLDWI
291 UNSPEC_VSX_XXPERM
292
293 UNSPEC_VSX_XXSPLTW
294 UNSPEC_VSX_XXSPLTD
295 UNSPEC_VSX_DIVSD
296 UNSPEC_VSX_DIVUD
297 UNSPEC_VSX_MULSD
298 UNSPEC_VSX_SIGN_EXTEND
299 UNSPEC_VSX_XVCVBF16SP
300 UNSPEC_VSX_XVCVSPBF16
301 UNSPEC_VSX_XVCVSPSXDS
302 UNSPEC_VSX_XVCVSPHP
303 UNSPEC_VSX_VSLO
304 UNSPEC_VSX_EXTRACT
305 UNSPEC_VSX_SXEXPDP
306 UNSPEC_VSX_SXSIG
307 UNSPEC_VSX_SIEXPDP
308 UNSPEC_VSX_SIEXPQP
309 UNSPEC_VSX_SCMPEXPDP
310 UNSPEC_VSX_SCMPEXPQP
311 UNSPEC_VSX_STSTDC
312 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH
313 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL
314 UNSPEC_VSX_VXEXP
315 UNSPEC_VSX_VXSIG
316 UNSPEC_VSX_VIEXP
317 UNSPEC_VSX_VTSTDC
318 UNSPEC_VSX_VSIGNED2
319
320 UNSPEC_LXVL
321 UNSPEC_LXVLL
322 UNSPEC_LVSL_REG
323 UNSPEC_LVSR_REG
324 UNSPEC_STXVL
325 UNSPEC_STXVLL
326 UNSPEC_XL_LEN_R
327 UNSPEC_XST_LEN_R
328
329 UNSPEC_VCLZLSBB
330 UNSPEC_VCTZLSBB
331 UNSPEC_VEXTUBLX
332 UNSPEC_VEXTUHLX
333 UNSPEC_VEXTUWLX
334 UNSPEC_VEXTUBRX
335 UNSPEC_VEXTUHRX
336 UNSPEC_VEXTUWRX
337 UNSPEC_VCMPNEB
338 UNSPEC_VCMPNEZB
339 UNSPEC_VCMPNEH
340 UNSPEC_VCMPNEZH
341 UNSPEC_VCMPNEW
342 UNSPEC_VCMPNEZW
343 UNSPEC_XXEXTRACTUW
344 UNSPEC_XXINSERTW
345 UNSPEC_VSX_FIRST_MATCH_INDEX
346 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX
347 UNSPEC_VSX_FIRST_MISMATCH_INDEX
348 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX
349 UNSPEC_XXGENPCV
350 ])
351
352 (define_int_iterator XVCVBF16 [UNSPEC_VSX_XVCVSPBF16
353 UNSPEC_VSX_XVCVBF16SP])
354
355 (define_int_attr xvcvbf16 [(UNSPEC_VSX_XVCVSPBF16 "xvcvspbf16")
356 (UNSPEC_VSX_XVCVBF16SP "xvcvbf16sp")])
357
358 ;; VSX moves
359
360 ;; The patterns for LE permuted loads and stores come before the general
361 ;; VSX moves so they match first.
362 (define_insn_and_split "*vsx_le_perm_load_<mode>"
363 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
364 (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))]
365 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
366 "#"
367 "&& 1"
368 [(set (match_dup 2)
369 (vec_select:<MODE>
370 (match_dup 1)
371 (parallel [(const_int 1) (const_int 0)])))
372 (set (match_dup 0)
373 (vec_select:<MODE>
374 (match_dup 2)
375 (parallel [(const_int 1) (const_int 0)])))]
376 {
377 rtx mem = operands[1];
378
379 /* Don't apply the swap optimization if we've already performed register
380 allocation and the hard register destination is not in the altivec
381 range. */
382 if ((MEM_ALIGN (mem) >= 128)
383 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[0]))
384 || ALTIVEC_REGNO_P (reg_or_subregno (operands[0]))))
385 {
386 rtx mem_address = XEXP (mem, 0);
387 enum machine_mode mode = GET_MODE (mem);
388
389 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
390 {
391 /* Replace the source memory address with masked address. */
392 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
393 emit_insn (lvx_set_expr);
394 DONE;
395 }
396 else if (rs6000_quadword_masked_address_p (mem_address))
397 {
398 /* This rtl is already in the form that matches lvx
399 instruction, so leave it alone. */
400 DONE;
401 }
402 /* Otherwise, fall through to transform into a swapping load. */
403 }
404 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
405 : operands[0];
406 }
407 [(set_attr "type" "vecload")
408 (set_attr "length" "8")])
409
410 (define_insn_and_split "*vsx_le_perm_load_<mode>"
411 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
412 (match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))]
413 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
414 "#"
415 "&& 1"
416 [(set (match_dup 2)
417 (vec_select:<MODE>
418 (match_dup 1)
419 (parallel [(const_int 2) (const_int 3)
420 (const_int 0) (const_int 1)])))
421 (set (match_dup 0)
422 (vec_select:<MODE>
423 (match_dup 2)
424 (parallel [(const_int 2) (const_int 3)
425 (const_int 0) (const_int 1)])))]
426 {
427 rtx mem = operands[1];
428
429 /* Don't apply the swap optimization if we've already performed register
430 allocation and the hard register destination is not in the altivec
431 range. */
432 if ((MEM_ALIGN (mem) >= 128)
433 && (!HARD_REGISTER_P (operands[0])
434 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
435 {
436 rtx mem_address = XEXP (mem, 0);
437 enum machine_mode mode = GET_MODE (mem);
438
439 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
440 {
441 /* Replace the source memory address with masked address. */
442 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
443 emit_insn (lvx_set_expr);
444 DONE;
445 }
446 else if (rs6000_quadword_masked_address_p (mem_address))
447 {
448 /* This rtl is already in the form that matches lvx
449 instruction, so leave it alone. */
450 DONE;
451 }
452 /* Otherwise, fall through to transform into a swapping load. */
453 }
454 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
455 : operands[0];
456 }
457 [(set_attr "type" "vecload")
458 (set_attr "length" "8")])
459
460 (define_insn_and_split "*vsx_le_perm_load_v8hi"
461 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
462 (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))]
463 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
464 "#"
465 "&& 1"
466 [(set (match_dup 2)
467 (vec_select:V8HI
468 (match_dup 1)
469 (parallel [(const_int 4) (const_int 5)
470 (const_int 6) (const_int 7)
471 (const_int 0) (const_int 1)
472 (const_int 2) (const_int 3)])))
473 (set (match_dup 0)
474 (vec_select:V8HI
475 (match_dup 2)
476 (parallel [(const_int 4) (const_int 5)
477 (const_int 6) (const_int 7)
478 (const_int 0) (const_int 1)
479 (const_int 2) (const_int 3)])))]
480 {
481 rtx mem = operands[1];
482
483 /* Don't apply the swap optimization if we've already performed register
484 allocation and the hard register destination is not in the altivec
485 range. */
486 if ((MEM_ALIGN (mem) >= 128)
487 && (!HARD_REGISTER_P (operands[0])
488 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
489 {
490 rtx mem_address = XEXP (mem, 0);
491 enum machine_mode mode = GET_MODE (mem);
492
493 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
494 {
495 /* Replace the source memory address with masked address. */
496 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
497 emit_insn (lvx_set_expr);
498 DONE;
499 }
500 else if (rs6000_quadword_masked_address_p (mem_address))
501 {
502 /* This rtl is already in the form that matches lvx
503 instruction, so leave it alone. */
504 DONE;
505 }
506 /* Otherwise, fall through to transform into a swapping load. */
507 }
508 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
509 : operands[0];
510 }
511 [(set_attr "type" "vecload")
512 (set_attr "length" "8")])
513
514 (define_insn_and_split "*vsx_le_perm_load_v16qi"
515 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
516 (match_operand:V16QI 1 "indexed_or_indirect_operand" "Z"))]
517 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
518 "#"
519 "&& 1"
520 [(set (match_dup 2)
521 (vec_select:V16QI
522 (match_dup 1)
523 (parallel [(const_int 8) (const_int 9)
524 (const_int 10) (const_int 11)
525 (const_int 12) (const_int 13)
526 (const_int 14) (const_int 15)
527 (const_int 0) (const_int 1)
528 (const_int 2) (const_int 3)
529 (const_int 4) (const_int 5)
530 (const_int 6) (const_int 7)])))
531 (set (match_dup 0)
532 (vec_select:V16QI
533 (match_dup 2)
534 (parallel [(const_int 8) (const_int 9)
535 (const_int 10) (const_int 11)
536 (const_int 12) (const_int 13)
537 (const_int 14) (const_int 15)
538 (const_int 0) (const_int 1)
539 (const_int 2) (const_int 3)
540 (const_int 4) (const_int 5)
541 (const_int 6) (const_int 7)])))]
542 {
543 rtx mem = operands[1];
544
545 /* Don't apply the swap optimization if we've already performed register
546 allocation and the hard register destination is not in the altivec
547 range. */
548 if ((MEM_ALIGN (mem) >= 128)
549 && (!HARD_REGISTER_P (operands[0])
550 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
551 {
552 rtx mem_address = XEXP (mem, 0);
553 enum machine_mode mode = GET_MODE (mem);
554
555 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
556 {
557 /* Replace the source memory address with masked address. */
558 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
559 emit_insn (lvx_set_expr);
560 DONE;
561 }
562 else if (rs6000_quadword_masked_address_p (mem_address))
563 {
564 /* This rtl is already in the form that matches lvx
565 instruction, so leave it alone. */
566 DONE;
567 }
568 /* Otherwise, fall through to transform into a swapping load. */
569 }
570 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
571 : operands[0];
572 }
573 [(set_attr "type" "vecload")
574 (set_attr "length" "8")])
575
576 (define_insn "*vsx_le_perm_store_<mode>"
577 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "=Z")
578 (match_operand:VSX_D 1 "vsx_register_operand" "+wa"))]
579 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
580 "#"
581 [(set_attr "type" "vecstore")
582 (set_attr "length" "12")])
583
584 (define_split
585 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
586 (match_operand:VSX_D 1 "vsx_register_operand"))]
587 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
588 [(set (match_dup 2)
589 (vec_select:<MODE>
590 (match_dup 1)
591 (parallel [(const_int 1) (const_int 0)])))
592 (set (match_dup 0)
593 (vec_select:<MODE>
594 (match_dup 2)
595 (parallel [(const_int 1) (const_int 0)])))]
596 {
597 rtx mem = operands[0];
598
599 /* Don't apply the swap optimization if we've already performed register
600 allocation and the hard register source is not in the altivec range. */
601 if ((MEM_ALIGN (mem) >= 128)
602 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
603 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
604 {
605 rtx mem_address = XEXP (mem, 0);
606 enum machine_mode mode = GET_MODE (mem);
607 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
608 {
609 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
610 emit_insn (stvx_set_expr);
611 DONE;
612 }
613 else if (rs6000_quadword_masked_address_p (mem_address))
614 {
615 /* This rtl is already in the form that matches stvx instruction,
616 so leave it alone. */
617 DONE;
618 }
619 /* Otherwise, fall through to transform into a swapping store. */
620 }
621
622 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
623 : operands[1];
624 })
625
626 ;; The post-reload split requires that we re-permute the source
627 ;; register in case it is still live.
628 (define_split
629 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
630 (match_operand:VSX_D 1 "vsx_register_operand"))]
631 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
632 [(set (match_dup 1)
633 (vec_select:<MODE>
634 (match_dup 1)
635 (parallel [(const_int 1) (const_int 0)])))
636 (set (match_dup 0)
637 (vec_select:<MODE>
638 (match_dup 1)
639 (parallel [(const_int 1) (const_int 0)])))
640 (set (match_dup 1)
641 (vec_select:<MODE>
642 (match_dup 1)
643 (parallel [(const_int 1) (const_int 0)])))]
644 "")
645
646 (define_insn "*vsx_le_perm_store_<mode>"
647 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "=Z")
648 (match_operand:VSX_W 1 "vsx_register_operand" "+wa"))]
649 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
650 "#"
651 [(set_attr "type" "vecstore")
652 (set_attr "length" "12")])
653
654 (define_split
655 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
656 (match_operand:VSX_W 1 "vsx_register_operand"))]
657 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
658 [(set (match_dup 2)
659 (vec_select:<MODE>
660 (match_dup 1)
661 (parallel [(const_int 2) (const_int 3)
662 (const_int 0) (const_int 1)])))
663 (set (match_dup 0)
664 (vec_select:<MODE>
665 (match_dup 2)
666 (parallel [(const_int 2) (const_int 3)
667 (const_int 0) (const_int 1)])))]
668 {
669 rtx mem = operands[0];
670
671 /* Don't apply the swap optimization if we've already performed register
672 allocation and the hard register source is not in the altivec range. */
673 if ((MEM_ALIGN (mem) >= 128)
674 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
675 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
676 {
677 rtx mem_address = XEXP (mem, 0);
678 enum machine_mode mode = GET_MODE (mem);
679 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
680 {
681 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
682 emit_insn (stvx_set_expr);
683 DONE;
684 }
685 else if (rs6000_quadword_masked_address_p (mem_address))
686 {
687 /* This rtl is already in the form that matches stvx instruction,
688 so leave it alone. */
689 DONE;
690 }
691 /* Otherwise, fall through to transform into a swapping store. */
692 }
693
694 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
695 : operands[1];
696 })
697
698 ;; The post-reload split requires that we re-permute the source
699 ;; register in case it is still live.
700 (define_split
701 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
702 (match_operand:VSX_W 1 "vsx_register_operand"))]
703 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
704 [(set (match_dup 1)
705 (vec_select:<MODE>
706 (match_dup 1)
707 (parallel [(const_int 2) (const_int 3)
708 (const_int 0) (const_int 1)])))
709 (set (match_dup 0)
710 (vec_select:<MODE>
711 (match_dup 1)
712 (parallel [(const_int 2) (const_int 3)
713 (const_int 0) (const_int 1)])))
714 (set (match_dup 1)
715 (vec_select:<MODE>
716 (match_dup 1)
717 (parallel [(const_int 2) (const_int 3)
718 (const_int 0) (const_int 1)])))]
719 "")
720
721 (define_insn "*vsx_le_perm_store_v8hi"
722 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z")
723 (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
724 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
725 "#"
726 [(set_attr "type" "vecstore")
727 (set_attr "length" "12")])
728
729 (define_split
730 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
731 (match_operand:V8HI 1 "vsx_register_operand"))]
732 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
733 [(set (match_dup 2)
734 (vec_select:V8HI
735 (match_dup 1)
736 (parallel [(const_int 4) (const_int 5)
737 (const_int 6) (const_int 7)
738 (const_int 0) (const_int 1)
739 (const_int 2) (const_int 3)])))
740 (set (match_dup 0)
741 (vec_select:V8HI
742 (match_dup 2)
743 (parallel [(const_int 4) (const_int 5)
744 (const_int 6) (const_int 7)
745 (const_int 0) (const_int 1)
746 (const_int 2) (const_int 3)])))]
747 {
748 rtx mem = operands[0];
749
750 /* Don't apply the swap optimization if we've already performed register
751 allocation and the hard register source is not in the altivec range. */
752 if ((MEM_ALIGN (mem) >= 128)
753 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
754 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
755 {
756 rtx mem_address = XEXP (mem, 0);
757 enum machine_mode mode = GET_MODE (mem);
758 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
759 {
760 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
761 emit_insn (stvx_set_expr);
762 DONE;
763 }
764 else if (rs6000_quadword_masked_address_p (mem_address))
765 {
766 /* This rtl is already in the form that matches stvx instruction,
767 so leave it alone. */
768 DONE;
769 }
770 /* Otherwise, fall through to transform into a swapping store. */
771 }
772
773 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
774 : operands[1];
775 })
776
777 ;; The post-reload split requires that we re-permute the source
778 ;; register in case it is still live.
779 (define_split
780 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
781 (match_operand:V8HI 1 "vsx_register_operand"))]
782 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
783 [(set (match_dup 1)
784 (vec_select:V8HI
785 (match_dup 1)
786 (parallel [(const_int 4) (const_int 5)
787 (const_int 6) (const_int 7)
788 (const_int 0) (const_int 1)
789 (const_int 2) (const_int 3)])))
790 (set (match_dup 0)
791 (vec_select:V8HI
792 (match_dup 1)
793 (parallel [(const_int 4) (const_int 5)
794 (const_int 6) (const_int 7)
795 (const_int 0) (const_int 1)
796 (const_int 2) (const_int 3)])))
797 (set (match_dup 1)
798 (vec_select:V8HI
799 (match_dup 1)
800 (parallel [(const_int 4) (const_int 5)
801 (const_int 6) (const_int 7)
802 (const_int 0) (const_int 1)
803 (const_int 2) (const_int 3)])))]
804 "")
805
806 (define_insn "*vsx_le_perm_store_v16qi"
807 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "=Z")
808 (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
809 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
810 "#"
811 [(set_attr "type" "vecstore")
812 (set_attr "length" "12")])
813
814 (define_split
815 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
816 (match_operand:V16QI 1 "vsx_register_operand"))]
817 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
818 [(set (match_dup 2)
819 (vec_select:V16QI
820 (match_dup 1)
821 (parallel [(const_int 8) (const_int 9)
822 (const_int 10) (const_int 11)
823 (const_int 12) (const_int 13)
824 (const_int 14) (const_int 15)
825 (const_int 0) (const_int 1)
826 (const_int 2) (const_int 3)
827 (const_int 4) (const_int 5)
828 (const_int 6) (const_int 7)])))
829 (set (match_dup 0)
830 (vec_select:V16QI
831 (match_dup 2)
832 (parallel [(const_int 8) (const_int 9)
833 (const_int 10) (const_int 11)
834 (const_int 12) (const_int 13)
835 (const_int 14) (const_int 15)
836 (const_int 0) (const_int 1)
837 (const_int 2) (const_int 3)
838 (const_int 4) (const_int 5)
839 (const_int 6) (const_int 7)])))]
840 {
841 rtx mem = operands[0];
842
843 /* Don't apply the swap optimization if we've already performed register
844 allocation and the hard register source is not in the altivec range. */
845 if ((MEM_ALIGN (mem) >= 128)
846 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
847 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
848 {
849 rtx mem_address = XEXP (mem, 0);
850 enum machine_mode mode = GET_MODE (mem);
851 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
852 {
853 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
854 emit_insn (stvx_set_expr);
855 DONE;
856 }
857 else if (rs6000_quadword_masked_address_p (mem_address))
858 {
859 /* This rtl is already in the form that matches stvx instruction,
860 so leave it alone. */
861 DONE;
862 }
863 /* Otherwise, fall through to transform into a swapping store. */
864 }
865
866 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
867 : operands[1];
868 })
869
870 ;; The post-reload split requires that we re-permute the source
871 ;; register in case it is still live.
872 (define_split
873 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
874 (match_operand:V16QI 1 "vsx_register_operand"))]
875 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
876 [(set (match_dup 1)
877 (vec_select:V16QI
878 (match_dup 1)
879 (parallel [(const_int 8) (const_int 9)
880 (const_int 10) (const_int 11)
881 (const_int 12) (const_int 13)
882 (const_int 14) (const_int 15)
883 (const_int 0) (const_int 1)
884 (const_int 2) (const_int 3)
885 (const_int 4) (const_int 5)
886 (const_int 6) (const_int 7)])))
887 (set (match_dup 0)
888 (vec_select:V16QI
889 (match_dup 1)
890 (parallel [(const_int 8) (const_int 9)
891 (const_int 10) (const_int 11)
892 (const_int 12) (const_int 13)
893 (const_int 14) (const_int 15)
894 (const_int 0) (const_int 1)
895 (const_int 2) (const_int 3)
896 (const_int 4) (const_int 5)
897 (const_int 6) (const_int 7)])))
898 (set (match_dup 1)
899 (vec_select:V16QI
900 (match_dup 1)
901 (parallel [(const_int 8) (const_int 9)
902 (const_int 10) (const_int 11)
903 (const_int 12) (const_int 13)
904 (const_int 14) (const_int 15)
905 (const_int 0) (const_int 1)
906 (const_int 2) (const_int 3)
907 (const_int 4) (const_int 5)
908 (const_int 6) (const_int 7)])))]
909 "")
910
911 ;; Little endian word swapping for 128-bit types that are either scalars or the
912 ;; special V1TI container class, which it is not appropriate to use vec_select
913 ;; for the type.
914 (define_insn "*vsx_le_permute_<mode>"
915 [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=wa,wa,Z,&r,&r,Q")
916 (rotate:VSX_TI
917 (match_operand:VSX_TI 1 "input_operand" "wa,Z,wa,r,Q,r")
918 (const_int 64)))]
919 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
920 "@
921 xxpermdi %x0,%x1,%x1,2
922 lxvd2x %x0,%y1
923 stxvd2x %x1,%y0
924 mr %0,%L1\;mr %L0,%1
925 ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1
926 std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0"
927 [(set_attr "length" "*,*,*,8,8,8")
928 (set_attr "type" "vecperm,vecload,vecstore,*,load,store")])
929
930 (define_insn_and_split "*vsx_le_undo_permute_<mode>"
931 [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=wa,wa")
932 (rotate:VSX_TI
933 (rotate:VSX_TI
934 (match_operand:VSX_TI 1 "vsx_register_operand" "0,wa")
935 (const_int 64))
936 (const_int 64)))]
937 "!BYTES_BIG_ENDIAN && TARGET_VSX"
938 "@
939 #
940 xxlor %x0,%x1"
941 ""
942 [(set (match_dup 0) (match_dup 1))]
943 {
944 if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))
945 {
946 emit_note (NOTE_INSN_DELETED);
947 DONE;
948 }
949 }
950 [(set_attr "length" "0,4")
951 (set_attr "type" "veclogical")])
952
953 (define_insn_and_split "*vsx_le_perm_load_<mode>"
954 [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=wa,r")
955 (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))]
956 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
957 "@
958 #
959 #"
960 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
961 [(const_int 0)]
962 {
963 rtx tmp = (can_create_pseudo_p ()
964 ? gen_reg_rtx_and_attrs (operands[0])
965 : operands[0]);
966 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
967 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
968 DONE;
969 }
970 [(set_attr "type" "vecload,load")
971 (set_attr "length" "8,8")
972 (set_attr "isa" "<VSisa>,*")])
973
974 (define_insn "*vsx_le_perm_store_<mode>"
975 [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q")
976 (match_operand:VSX_LE_128 1 "vsx_register_operand" "+wa,r"))]
977 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
978 "@
979 #
980 #"
981 [(set_attr "type" "vecstore,store")
982 (set_attr "length" "12,8")
983 (set_attr "isa" "<VSisa>,*")])
984
985 (define_split
986 [(set (match_operand:VSX_LE_128 0 "memory_operand")
987 (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
988 "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
989 [(const_int 0)]
990 {
991 rtx tmp = (can_create_pseudo_p ()
992 ? gen_reg_rtx_and_attrs (operands[0])
993 : operands[0]);
994 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
995 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
996 DONE;
997 })
998
999 ;; Peepholes to catch loads and stores for TImode if TImode landed in
1000 ;; GPR registers on a little endian system.
1001 (define_peephole2
1002 [(set (match_operand:VSX_TI 0 "int_reg_operand")
1003 (rotate:VSX_TI (match_operand:VSX_TI 1 "memory_operand")
1004 (const_int 64)))
1005 (set (match_operand:VSX_TI 2 "int_reg_operand")
1006 (rotate:VSX_TI (match_dup 0)
1007 (const_int 64)))]
1008 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1009 && (rtx_equal_p (operands[0], operands[2])
1010 || peep2_reg_dead_p (2, operands[0]))"
1011 [(set (match_dup 2) (match_dup 1))])
1012
1013 (define_peephole2
1014 [(set (match_operand:VSX_TI 0 "int_reg_operand")
1015 (rotate:VSX_TI (match_operand:VSX_TI 1 "int_reg_operand")
1016 (const_int 64)))
1017 (set (match_operand:VSX_TI 2 "memory_operand")
1018 (rotate:VSX_TI (match_dup 0)
1019 (const_int 64)))]
1020 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1021 && peep2_reg_dead_p (2, operands[0])"
1022 [(set (match_dup 2) (match_dup 1))])
1023
1024 ;; Peephole to catch memory to memory transfers for TImode if TImode landed in
1025 ;; VSX registers on a little endian system. The vector types and IEEE 128-bit
1026 ;; floating point are handled by the more generic swap elimination pass.
1027 (define_peephole2
1028 [(set (match_operand:TI 0 "vsx_register_operand")
1029 (rotate:TI (match_operand:TI 1 "vsx_register_operand")
1030 (const_int 64)))
1031 (set (match_operand:TI 2 "vsx_register_operand")
1032 (rotate:TI (match_dup 0)
1033 (const_int 64)))]
1034 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1035 && (rtx_equal_p (operands[0], operands[2])
1036 || peep2_reg_dead_p (2, operands[0]))"
1037 [(set (match_dup 2) (match_dup 1))])
1038
1039 ;; The post-reload split requires that we re-permute the source
1040 ;; register in case it is still live.
1041 (define_split
1042 [(set (match_operand:VSX_LE_128 0 "memory_operand")
1043 (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1044 "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
1045 [(const_int 0)]
1046 {
1047 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1048 rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
1049 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1050 DONE;
1051 })
1052
1053 ;; Vector constants that can be generated with XXSPLTIB that was added in ISA
1054 ;; 3.0. Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
1055 (define_insn "xxspltib_v16qi"
1056 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1057 (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))]
1058 "TARGET_P9_VECTOR"
1059 {
1060 operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);
1061 return "xxspltib %x0,%2";
1062 }
1063 [(set_attr "type" "vecperm")])
1064
1065 (define_insn "xxspltib_<mode>_nosplit"
1066 [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa")
1067 (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))]
1068 "TARGET_P9_VECTOR"
1069 {
1070 rtx op1 = operands[1];
1071 int value = 256;
1072 int num_insns = -1;
1073
1074 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1075 || num_insns != 1)
1076 gcc_unreachable ();
1077
1078 operands[2] = GEN_INT (value & 0xff);
1079 return "xxspltib %x0,%2";
1080 }
1081 [(set_attr "type" "vecperm")])
1082
1083 (define_insn_and_split "*xxspltib_<mode>_split"
1084 [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")
1085 (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]
1086 "TARGET_P9_VECTOR"
1087 "#"
1088 "&& 1"
1089 [(const_int 0)]
1090 {
1091 int value = 256;
1092 int num_insns = -1;
1093 rtx op0 = operands[0];
1094 rtx op1 = operands[1];
1095 rtx tmp = ((can_create_pseudo_p ())
1096 ? gen_reg_rtx (V16QImode)
1097 : gen_lowpart (V16QImode, op0));
1098
1099 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1100 || num_insns != 2)
1101 gcc_unreachable ();
1102
1103 emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));
1104
1105 if (<MODE>mode == V2DImode)
1106 emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp));
1107
1108 else if (<MODE>mode == V4SImode)
1109 emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp));
1110
1111 else if (<MODE>mode == V8HImode)
1112 emit_insn (gen_altivec_vupkhsb (op0, tmp));
1113
1114 else
1115 gcc_unreachable ();
1116
1117 DONE;
1118 }
1119 [(set_attr "type" "vecperm")
1120 (set_attr "length" "8")])
1121
1122
1123 ;; Prefer using vector registers over GPRs. Prefer using ISA 3.0's XXSPLTISB
1124 ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
1125 ;; all 1's, since the machine does not have to wait for the previous
1126 ;; instruction using the register being set (such as a store waiting on a slow
1127 ;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
1128
1129 ;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
1130 ;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
1131 ;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX)
1132 (define_insn "vsx_mov<mode>_64bit"
1133 [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1134 "=ZwO, wa, wa, r, we, ?wQ,
1135 ?&r, ??r, ??Y, <??r>, wa, v,
1136 ?wa, v, <??r>, wZ, v")
1137
1138 (match_operand:VSX_M 1 "input_operand"
1139 "wa, ZwO, wa, we, r, r,
1140 wQ, Y, r, r, wE, jwM,
1141 ?jwM, W, <nW>, v, wZ"))]
1142
1143 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1144 && (register_operand (operands[0], <MODE>mode)
1145 || register_operand (operands[1], <MODE>mode))"
1146 {
1147 return rs6000_output_move_128bit (operands);
1148 }
1149 [(set_attr "type"
1150 "vecstore, vecload, vecsimple, mffgpr, mftgpr, load,
1151 store, load, store, *, vecsimple, vecsimple,
1152 vecsimple, *, *, vecstore, vecload")
1153 (set_attr "num_insns"
1154 "*, *, *, 2, *, 2,
1155 2, 2, 2, 2, *, *,
1156 *, 5, 2, *, *")
1157 (set_attr "max_prefixed_insns"
1158 "*, *, *, *, *, 2,
1159 2, 2, 2, 2, *, *,
1160 *, *, *, *, *")
1161 (set_attr "length"
1162 "*, *, *, 8, *, 8,
1163 8, 8, 8, 8, *, *,
1164 *, 20, 8, *, *")
1165 (set_attr "isa"
1166 "<VSisa>, <VSisa>, <VSisa>, *, *, *,
1167 *, *, *, *, p9v, *,
1168 <VSisa>, *, *, *, *")])
1169
1170 ;; VSX store VSX load VSX move GPR load GPR store GPR move
1171 ;; XXSPLTIB VSPLTISW VSX 0/-1 VMX const GPR const
1172 ;; LVX (VMX) STVX (VMX)
1173 (define_insn "*vsx_mov<mode>_32bit"
1174 [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1175 "=ZwO, wa, wa, ??r, ??Y, <??r>,
1176 wa, v, ?wa, v, <??r>,
1177 wZ, v")
1178
1179 (match_operand:VSX_M 1 "input_operand"
1180 "wa, ZwO, wa, Y, r, r,
1181 wE, jwM, ?jwM, W, <nW>,
1182 v, wZ"))]
1183
1184 "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1185 && (register_operand (operands[0], <MODE>mode)
1186 || register_operand (operands[1], <MODE>mode))"
1187 {
1188 return rs6000_output_move_128bit (operands);
1189 }
1190 [(set_attr "type"
1191 "vecstore, vecload, vecsimple, load, store, *,
1192 vecsimple, vecsimple, vecsimple, *, *,
1193 vecstore, vecload")
1194 (set_attr "length"
1195 "*, *, *, 16, 16, 16,
1196 *, *, *, 20, 16,
1197 *, *")
1198 (set_attr "isa"
1199 "<VSisa>, <VSisa>, <VSisa>, *, *, *,
1200 p9v, *, <VSisa>, *, *,
1201 *, *")])
1202
1203 ;; Explicit load/store expanders for the builtin functions
1204 (define_expand "vsx_load_<mode>"
1205 [(set (match_operand:VSX_M 0 "vsx_register_operand")
1206 (match_operand:VSX_M 1 "memory_operand"))]
1207 "VECTOR_MEM_VSX_P (<MODE>mode)"
1208 {
1209 /* Expand to swaps if needed, prior to swap optimization. */
1210 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1211 {
1212 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1213 DONE;
1214 }
1215 })
1216
1217 (define_expand "vsx_store_<mode>"
1218 [(set (match_operand:VSX_M 0 "memory_operand")
1219 (match_operand:VSX_M 1 "vsx_register_operand"))]
1220 "VECTOR_MEM_VSX_P (<MODE>mode)"
1221 {
1222 /* Expand to swaps if needed, prior to swap optimization. */
1223 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1224 {
1225 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1226 DONE;
1227 }
1228 })
1229
1230 ;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
1231 ;; when you really want their element-reversing behavior.
1232 (define_insn "vsx_ld_elemrev_v2di"
1233 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1234 (vec_select:V2DI
1235 (match_operand:V2DI 1 "memory_operand" "Z")
1236 (parallel [(const_int 1) (const_int 0)])))]
1237 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1238 "lxvd2x %x0,%y1"
1239 [(set_attr "type" "vecload")])
1240
1241 (define_insn "vsx_ld_elemrev_v1ti"
1242 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
1243 (vec_select:V1TI
1244 (match_operand:V1TI 1 "memory_operand" "Z")
1245 (parallel [(const_int 0)])))]
1246 "VECTOR_MEM_VSX_P (V1TImode) && !BYTES_BIG_ENDIAN"
1247 {
1248 return "lxvd2x %x0,%y1\;xxpermdi %x0,%x0,%x0,2";
1249 }
1250 [(set_attr "type" "vecload")])
1251
1252 (define_insn "vsx_ld_elemrev_v2df"
1253 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1254 (vec_select:V2DF
1255 (match_operand:V2DF 1 "memory_operand" "Z")
1256 (parallel [(const_int 1) (const_int 0)])))]
1257 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1258 "lxvd2x %x0,%y1"
1259 [(set_attr "type" "vecload")])
1260
1261 (define_insn "vsx_ld_elemrev_v4si"
1262 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
1263 (vec_select:V4SI
1264 (match_operand:V4SI 1 "memory_operand" "Z")
1265 (parallel [(const_int 3) (const_int 2)
1266 (const_int 1) (const_int 0)])))]
1267 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1268 "lxvw4x %x0,%y1"
1269 [(set_attr "type" "vecload")])
1270
1271 (define_insn "vsx_ld_elemrev_v4sf"
1272 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1273 (vec_select:V4SF
1274 (match_operand:V4SF 1 "memory_operand" "Z")
1275 (parallel [(const_int 3) (const_int 2)
1276 (const_int 1) (const_int 0)])))]
1277 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1278 "lxvw4x %x0,%y1"
1279 [(set_attr "type" "vecload")])
1280
1281 (define_expand "vsx_ld_elemrev_v8hi"
1282 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1283 (vec_select:V8HI
1284 (match_operand:V8HI 1 "memory_operand" "Z")
1285 (parallel [(const_int 7) (const_int 6)
1286 (const_int 5) (const_int 4)
1287 (const_int 3) (const_int 2)
1288 (const_int 1) (const_int 0)])))]
1289 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1290 {
1291 if (!TARGET_P9_VECTOR)
1292 {
1293 rtx tmp = gen_reg_rtx (V4SImode);
1294 rtx subreg, subreg2, perm[16], pcv;
1295 /* 2 is leftmost element in register */
1296 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1297 int i;
1298
1299 subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0);
1300 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1301 subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0);
1302
1303 for (i = 0; i < 16; ++i)
1304 perm[i] = GEN_INT (reorder[i]);
1305
1306 pcv = force_reg (V16QImode,
1307 gen_rtx_CONST_VECTOR (V16QImode,
1308 gen_rtvec_v (16, perm)));
1309 emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2,
1310 subreg2, pcv));
1311 DONE;
1312 }
1313 })
1314
1315 (define_insn "*vsx_ld_elemrev_v8hi_internal"
1316 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1317 (vec_select:V8HI
1318 (match_operand:V8HI 1 "memory_operand" "Z")
1319 (parallel [(const_int 7) (const_int 6)
1320 (const_int 5) (const_int 4)
1321 (const_int 3) (const_int 2)
1322 (const_int 1) (const_int 0)])))]
1323 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1324 "lxvh8x %x0,%y1"
1325 [(set_attr "type" "vecload")])
1326
1327 (define_expand "vsx_ld_elemrev_v16qi"
1328 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1329 (vec_select:V16QI
1330 (match_operand:V16QI 1 "memory_operand" "Z")
1331 (parallel [(const_int 15) (const_int 14)
1332 (const_int 13) (const_int 12)
1333 (const_int 11) (const_int 10)
1334 (const_int 9) (const_int 8)
1335 (const_int 7) (const_int 6)
1336 (const_int 5) (const_int 4)
1337 (const_int 3) (const_int 2)
1338 (const_int 1) (const_int 0)])))]
1339 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1340 {
1341 if (!TARGET_P9_VECTOR)
1342 {
1343 rtx tmp = gen_reg_rtx (V4SImode);
1344 rtx subreg, subreg2, perm[16], pcv;
1345 /* 3 is leftmost element in register */
1346 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1347 int i;
1348
1349 subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0);
1350 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1351 subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0);
1352
1353 for (i = 0; i < 16; ++i)
1354 perm[i] = GEN_INT (reorder[i]);
1355
1356 pcv = force_reg (V16QImode,
1357 gen_rtx_CONST_VECTOR (V16QImode,
1358 gen_rtvec_v (16, perm)));
1359 emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2,
1360 subreg2, pcv));
1361 DONE;
1362 }
1363 })
1364
1365 (define_insn "vsx_ld_elemrev_v16qi_internal"
1366 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1367 (vec_select:V16QI
1368 (match_operand:V16QI 1 "memory_operand" "Z")
1369 (parallel [(const_int 15) (const_int 14)
1370 (const_int 13) (const_int 12)
1371 (const_int 11) (const_int 10)
1372 (const_int 9) (const_int 8)
1373 (const_int 7) (const_int 6)
1374 (const_int 5) (const_int 4)
1375 (const_int 3) (const_int 2)
1376 (const_int 1) (const_int 0)])))]
1377 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1378 "lxvb16x %x0,%y1"
1379 [(set_attr "type" "vecload")])
1380
1381 (define_insn "vsx_st_elemrev_v1ti"
1382 [(set (match_operand:V1TI 0 "memory_operand" "=Z")
1383 (vec_select:V1TI
1384 (match_operand:V1TI 1 "vsx_register_operand" "+wa")
1385 (parallel [(const_int 0)])))
1386 (clobber (match_dup 1))]
1387 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1388 {
1389 return "xxpermdi %x1,%x1,%x1,2\;stxvd2x %x1,%y0";
1390 }
1391 [(set_attr "type" "vecstore")])
1392
1393 (define_insn "vsx_st_elemrev_v2df"
1394 [(set (match_operand:V2DF 0 "memory_operand" "=Z")
1395 (vec_select:V2DF
1396 (match_operand:V2DF 1 "vsx_register_operand" "wa")
1397 (parallel [(const_int 1) (const_int 0)])))]
1398 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1399 "stxvd2x %x1,%y0"
1400 [(set_attr "type" "vecstore")])
1401
1402 (define_insn "vsx_st_elemrev_v2di"
1403 [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1404 (vec_select:V2DI
1405 (match_operand:V2DI 1 "vsx_register_operand" "wa")
1406 (parallel [(const_int 1) (const_int 0)])))]
1407 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1408 "stxvd2x %x1,%y0"
1409 [(set_attr "type" "vecstore")])
1410
1411 (define_insn "vsx_st_elemrev_v4sf"
1412 [(set (match_operand:V4SF 0 "memory_operand" "=Z")
1413 (vec_select:V4SF
1414 (match_operand:V4SF 1 "vsx_register_operand" "wa")
1415 (parallel [(const_int 3) (const_int 2)
1416 (const_int 1) (const_int 0)])))]
1417 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1418 "stxvw4x %x1,%y0"
1419 [(set_attr "type" "vecstore")])
1420
1421 (define_insn "vsx_st_elemrev_v4si"
1422 [(set (match_operand:V4SI 0 "memory_operand" "=Z")
1423 (vec_select:V4SI
1424 (match_operand:V4SI 1 "vsx_register_operand" "wa")
1425 (parallel [(const_int 3) (const_int 2)
1426 (const_int 1) (const_int 0)])))]
1427 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1428 "stxvw4x %x1,%y0"
1429 [(set_attr "type" "vecstore")])
1430
1431 (define_expand "vsx_st_elemrev_v8hi"
1432 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1433 (vec_select:V8HI
1434 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1435 (parallel [(const_int 7) (const_int 6)
1436 (const_int 5) (const_int 4)
1437 (const_int 3) (const_int 2)
1438 (const_int 1) (const_int 0)])))]
1439 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1440 {
1441 if (!TARGET_P9_VECTOR)
1442 {
1443 rtx mem_subreg, subreg, perm[16], pcv;
1444 rtx tmp = gen_reg_rtx (V8HImode);
1445 /* 2 is leftmost element in register */
1446 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1447 int i;
1448
1449 for (i = 0; i < 16; ++i)
1450 perm[i] = GEN_INT (reorder[i]);
1451
1452 pcv = force_reg (V16QImode,
1453 gen_rtx_CONST_VECTOR (V16QImode,
1454 gen_rtvec_v (16, perm)));
1455 emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1],
1456 operands[1], pcv));
1457 subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0);
1458 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0);
1459 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1460 DONE;
1461 }
1462 })
1463
1464 (define_insn "*vsx_st_elemrev_v2di_internal"
1465 [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1466 (vec_select:V2DI
1467 (match_operand:V2DI 1 "vsx_register_operand" "wa")
1468 (parallel [(const_int 1) (const_int 0)])))]
1469 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1470 "stxvd2x %x1,%y0"
1471 [(set_attr "type" "vecstore")])
1472
1473 (define_insn "*vsx_st_elemrev_v8hi_internal"
1474 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1475 (vec_select:V8HI
1476 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1477 (parallel [(const_int 7) (const_int 6)
1478 (const_int 5) (const_int 4)
1479 (const_int 3) (const_int 2)
1480 (const_int 1) (const_int 0)])))]
1481 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1482 "stxvh8x %x1,%y0"
1483 [(set_attr "type" "vecstore")])
1484
1485 (define_expand "vsx_st_elemrev_v16qi"
1486 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1487 (vec_select:V16QI
1488 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1489 (parallel [(const_int 15) (const_int 14)
1490 (const_int 13) (const_int 12)
1491 (const_int 11) (const_int 10)
1492 (const_int 9) (const_int 8)
1493 (const_int 7) (const_int 6)
1494 (const_int 5) (const_int 4)
1495 (const_int 3) (const_int 2)
1496 (const_int 1) (const_int 0)])))]
1497 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1498 {
1499 if (!TARGET_P9_VECTOR)
1500 {
1501 rtx mem_subreg, subreg, perm[16], pcv;
1502 rtx tmp = gen_reg_rtx (V16QImode);
1503 /* 3 is leftmost element in register */
1504 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1505 int i;
1506
1507 for (i = 0; i < 16; ++i)
1508 perm[i] = GEN_INT (reorder[i]);
1509
1510 pcv = force_reg (V16QImode,
1511 gen_rtx_CONST_VECTOR (V16QImode,
1512 gen_rtvec_v (16, perm)));
1513 emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1],
1514 operands[1], pcv));
1515 subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0);
1516 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V16QImode, 0);
1517 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1518 DONE;
1519 }
1520 })
1521
1522 (define_insn "*vsx_st_elemrev_v16qi_internal"
1523 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1524 (vec_select:V16QI
1525 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1526 (parallel [(const_int 15) (const_int 14)
1527 (const_int 13) (const_int 12)
1528 (const_int 11) (const_int 10)
1529 (const_int 9) (const_int 8)
1530 (const_int 7) (const_int 6)
1531 (const_int 5) (const_int 4)
1532 (const_int 3) (const_int 2)
1533 (const_int 1) (const_int 0)])))]
1534 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1535 "stxvb16x %x1,%y0"
1536 [(set_attr "type" "vecstore")])
1537
1538 \f
1539 ;; VSX vector floating point arithmetic instructions. The VSX scalar
1540 ;; instructions are now combined with the insn for the traditional floating
1541 ;; point unit.
1542 (define_insn "*vsx_add<mode>3"
1543 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1544 (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1545 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1546 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1547 "xvadd<sd>p %x0,%x1,%x2"
1548 [(set_attr "type" "<VStype_simple>")])
1549
1550 (define_insn "*vsx_sub<mode>3"
1551 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa>")
1552 (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1553 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1554 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1555 "xvsub<sd>p %x0,%x1,%x2"
1556 [(set_attr "type" "<VStype_simple>")])
1557
1558 (define_insn "*vsx_mul<mode>3"
1559 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1560 (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1561 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1562 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1563 "xvmul<sd>p %x0,%x1,%x2"
1564 [(set_attr "type" "<VStype_simple>")])
1565
1566 ; Emulate vector with scalar for vec_mul in V2DImode
1567 (define_insn_and_split "vsx_mul_v2di"
1568 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1569 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1570 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1571 UNSPEC_VSX_MULSD))]
1572 "VECTOR_MEM_VSX_P (V2DImode)"
1573 "#"
1574 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1575 [(const_int 0)]
1576 {
1577 rtx op0 = operands[0];
1578 rtx op1 = operands[1];
1579 rtx op2 = operands[2];
1580 rtx op3 = gen_reg_rtx (DImode);
1581 rtx op4 = gen_reg_rtx (DImode);
1582 rtx op5 = gen_reg_rtx (DImode);
1583 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1584 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1585 if (TARGET_POWERPC64)
1586 emit_insn (gen_muldi3 (op5, op3, op4));
1587 else
1588 {
1589 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1590 emit_move_insn (op5, ret);
1591 }
1592 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1593 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1594 if (TARGET_POWERPC64)
1595 emit_insn (gen_muldi3 (op3, op3, op4));
1596 else
1597 {
1598 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1599 emit_move_insn (op3, ret);
1600 }
1601 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1602 DONE;
1603 }
1604 [(set_attr "type" "mul")])
1605
1606 (define_insn "*vsx_div<mode>3"
1607 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1608 (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1609 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1610 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1611 "xvdiv<sd>p %x0,%x1,%x2"
1612 [(set_attr "type" "<VStype_div>")])
1613
1614 ; Emulate vector with scalar for vec_div in V2DImode
1615 (define_insn_and_split "vsx_div_v2di"
1616 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1617 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1618 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1619 UNSPEC_VSX_DIVSD))]
1620 "VECTOR_MEM_VSX_P (V2DImode)"
1621 "#"
1622 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1623 [(const_int 0)]
1624 {
1625 rtx op0 = operands[0];
1626 rtx op1 = operands[1];
1627 rtx op2 = operands[2];
1628 rtx op3 = gen_reg_rtx (DImode);
1629 rtx op4 = gen_reg_rtx (DImode);
1630 rtx op5 = gen_reg_rtx (DImode);
1631 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1632 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1633 if (TARGET_POWERPC64)
1634 emit_insn (gen_divdi3 (op5, op3, op4));
1635 else
1636 {
1637 rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1638 rtx target = emit_library_call_value (libfunc,
1639 op5, LCT_NORMAL, DImode,
1640 op3, DImode,
1641 op4, DImode);
1642 emit_move_insn (op5, target);
1643 }
1644 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1645 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1646 if (TARGET_POWERPC64)
1647 emit_insn (gen_divdi3 (op3, op3, op4));
1648 else
1649 {
1650 rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1651 rtx target = emit_library_call_value (libfunc,
1652 op3, LCT_NORMAL, DImode,
1653 op3, DImode,
1654 op4, DImode);
1655 emit_move_insn (op3, target);
1656 }
1657 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1658 DONE;
1659 }
1660 [(set_attr "type" "div")])
1661
1662 (define_insn_and_split "vsx_udiv_v2di"
1663 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1664 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1665 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1666 UNSPEC_VSX_DIVUD))]
1667 "VECTOR_MEM_VSX_P (V2DImode)"
1668 "#"
1669 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1670 [(const_int 0)]
1671 {
1672 rtx op0 = operands[0];
1673 rtx op1 = operands[1];
1674 rtx op2 = operands[2];
1675 rtx op3 = gen_reg_rtx (DImode);
1676 rtx op4 = gen_reg_rtx (DImode);
1677 rtx op5 = gen_reg_rtx (DImode);
1678 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1679 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1680 if (TARGET_POWERPC64)
1681 emit_insn (gen_udivdi3 (op5, op3, op4));
1682 else
1683 {
1684 rtx libfunc = optab_libfunc (udiv_optab, DImode);
1685 rtx target = emit_library_call_value (libfunc,
1686 op5, LCT_NORMAL, DImode,
1687 op3, DImode,
1688 op4, DImode);
1689 emit_move_insn (op5, target);
1690 }
1691 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1692 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1693 if (TARGET_POWERPC64)
1694 emit_insn (gen_udivdi3 (op3, op3, op4));
1695 else
1696 {
1697 rtx libfunc = optab_libfunc (udiv_optab, DImode);
1698 rtx target = emit_library_call_value (libfunc,
1699 op3, LCT_NORMAL, DImode,
1700 op3, DImode,
1701 op4, DImode);
1702 emit_move_insn (op3, target);
1703 }
1704 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1705 DONE;
1706 }
1707 [(set_attr "type" "div")])
1708
1709 ;; *tdiv* instruction returning the FG flag
1710 (define_expand "vsx_tdiv<mode>3_fg"
1711 [(set (match_dup 3)
1712 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1713 (match_operand:VSX_B 2 "vsx_register_operand")]
1714 UNSPEC_VSX_TDIV))
1715 (set (match_operand:SI 0 "gpc_reg_operand")
1716 (gt:SI (match_dup 3)
1717 (const_int 0)))]
1718 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1719 {
1720 operands[3] = gen_reg_rtx (CCFPmode);
1721 })
1722
1723 ;; *tdiv* instruction returning the FE flag
1724 (define_expand "vsx_tdiv<mode>3_fe"
1725 [(set (match_dup 3)
1726 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1727 (match_operand:VSX_B 2 "vsx_register_operand")]
1728 UNSPEC_VSX_TDIV))
1729 (set (match_operand:SI 0 "gpc_reg_operand")
1730 (eq:SI (match_dup 3)
1731 (const_int 0)))]
1732 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1733 {
1734 operands[3] = gen_reg_rtx (CCFPmode);
1735 })
1736
1737 (define_insn "*vsx_tdiv<mode>3_internal"
1738 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x")
1739 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa")
1740 (match_operand:VSX_B 2 "vsx_register_operand" "wa")]
1741 UNSPEC_VSX_TDIV))]
1742 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1743 "x<VSv>tdiv<sd>p %0,%x1,%x2"
1744 [(set_attr "type" "<VStype_simple>")])
1745
1746 (define_insn "vsx_fre<mode>2"
1747 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1748 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
1749 UNSPEC_FRES))]
1750 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1751 "xvre<sd>p %x0,%x1"
1752 [(set_attr "type" "<VStype_simple>")])
1753
1754 (define_insn "*vsx_neg<mode>2"
1755 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1756 (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1757 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1758 "xvneg<sd>p %x0,%x1"
1759 [(set_attr "type" "<VStype_simple>")])
1760
1761 (define_insn "*vsx_abs<mode>2"
1762 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1763 (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1764 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1765 "xvabs<sd>p %x0,%x1"
1766 [(set_attr "type" "<VStype_simple>")])
1767
1768 (define_insn "vsx_nabs<mode>2"
1769 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1770 (neg:VSX_F
1771 (abs:VSX_F
1772 (match_operand:VSX_F 1 "vsx_register_operand" "wa"))))]
1773 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1774 "xvnabs<sd>p %x0,%x1"
1775 [(set_attr "type" "<VStype_simple>")])
1776
1777 (define_insn "vsx_smax<mode>3"
1778 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1779 (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1780 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1781 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1782 "xvmax<sd>p %x0,%x1,%x2"
1783 [(set_attr "type" "<VStype_simple>")])
1784
1785 (define_insn "*vsx_smin<mode>3"
1786 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1787 (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1788 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1789 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1790 "xvmin<sd>p %x0,%x1,%x2"
1791 [(set_attr "type" "<VStype_simple>")])
1792
1793 (define_insn "*vsx_sqrt<mode>2"
1794 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1795 (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1796 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1797 "xvsqrt<sd>p %x0,%x1"
1798 [(set_attr "type" "<sd>sqrt")])
1799
1800 (define_insn "*vsx_rsqrte<mode>2"
1801 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1802 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
1803 UNSPEC_RSQRT))]
1804 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1805 "xvrsqrte<sd>p %x0,%x1"
1806 [(set_attr "type" "<VStype_simple>")])
1807
1808 ;; *tsqrt* returning the fg flag
1809 (define_expand "vsx_tsqrt<mode>2_fg"
1810 [(set (match_dup 2)
1811 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1812 UNSPEC_VSX_TSQRT))
1813 (set (match_operand:SI 0 "gpc_reg_operand")
1814 (gt:SI (match_dup 2)
1815 (const_int 0)))]
1816 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1817 {
1818 operands[2] = gen_reg_rtx (CCFPmode);
1819 })
1820
1821 ;; *tsqrt* returning the fe flag
1822 (define_expand "vsx_tsqrt<mode>2_fe"
1823 [(set (match_dup 2)
1824 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1825 UNSPEC_VSX_TSQRT))
1826 (set (match_operand:SI 0 "gpc_reg_operand")
1827 (eq:SI (match_dup 2)
1828 (const_int 0)))]
1829 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1830 {
1831 operands[2] = gen_reg_rtx (CCFPmode);
1832 })
1833
1834 (define_insn "*vsx_tsqrt<mode>2_internal"
1835 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x")
1836 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
1837 UNSPEC_VSX_TSQRT))]
1838 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1839 "x<VSv>tsqrt<sd>p %0,%x1"
1840 [(set_attr "type" "<VStype_simple>")])
1841
1842 ;; Fused vector multiply/add instructions. Support the classical Altivec
1843 ;; versions of fma, which allows the target to be a separate register from the
1844 ;; 3 inputs. Under VSX, the target must be either the addend or the first
1845 ;; multiply.
1846
1847 (define_insn "*vsx_fmav4sf4"
1848 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,v")
1849 (fma:V4SF
1850 (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa,v")
1851 (match_operand:V4SF 2 "vsx_register_operand" "wa,0,v")
1852 (match_operand:V4SF 3 "vsx_register_operand" "0,wa,v")))]
1853 "VECTOR_UNIT_VSX_P (V4SFmode)"
1854 "@
1855 xvmaddasp %x0,%x1,%x2
1856 xvmaddmsp %x0,%x1,%x3
1857 vmaddfp %0,%1,%2,%3"
1858 [(set_attr "type" "vecfloat")])
1859
1860 (define_insn "*vsx_fmav2df4"
1861 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa")
1862 (fma:V2DF
1863 (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa")
1864 (match_operand:V2DF 2 "vsx_register_operand" "wa,0")
1865 (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))]
1866 "VECTOR_UNIT_VSX_P (V2DFmode)"
1867 "@
1868 xvmaddadp %x0,%x1,%x2
1869 xvmaddmdp %x0,%x1,%x3"
1870 [(set_attr "type" "vecdouble")])
1871
1872 (define_insn "*vsx_fms<mode>4"
1873 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa")
1874 (fma:VSX_F
1875 (match_operand:VSX_F 1 "vsx_register_operand" "%wa,wa")
1876 (match_operand:VSX_F 2 "vsx_register_operand" "wa,0")
1877 (neg:VSX_F
1878 (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))]
1879 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1880 "@
1881 xvmsuba<sd>p %x0,%x1,%x2
1882 xvmsubm<sd>p %x0,%x1,%x3"
1883 [(set_attr "type" "<VStype_mul>")])
1884
1885 (define_insn "*vsx_nfma<mode>4"
1886 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa")
1887 (neg:VSX_F
1888 (fma:VSX_F
1889 (match_operand:VSX_F 1 "vsx_register_operand" "wa,wa")
1890 (match_operand:VSX_F 2 "vsx_register_operand" "wa,0")
1891 (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))]
1892 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1893 "@
1894 xvnmadda<sd>p %x0,%x1,%x2
1895 xvnmaddm<sd>p %x0,%x1,%x3"
1896 [(set_attr "type" "<VStype_mul>")])
1897
1898 (define_insn "*vsx_nfmsv4sf4"
1899 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,v")
1900 (neg:V4SF
1901 (fma:V4SF
1902 (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa,v")
1903 (match_operand:V4SF 2 "vsx_register_operand" "wa,0,v")
1904 (neg:V4SF
1905 (match_operand:V4SF 3 "vsx_register_operand" "0,wa,v")))))]
1906 "VECTOR_UNIT_VSX_P (V4SFmode)"
1907 "@
1908 xvnmsubasp %x0,%x1,%x2
1909 xvnmsubmsp %x0,%x1,%x3
1910 vnmsubfp %0,%1,%2,%3"
1911 [(set_attr "type" "vecfloat")])
1912
1913 (define_insn "*vsx_nfmsv2df4"
1914 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa")
1915 (neg:V2DF
1916 (fma:V2DF
1917 (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa")
1918 (match_operand:V2DF 2 "vsx_register_operand" "wa,0")
1919 (neg:V2DF
1920 (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))))]
1921 "VECTOR_UNIT_VSX_P (V2DFmode)"
1922 "@
1923 xvnmsubadp %x0,%x1,%x2
1924 xvnmsubmdp %x0,%x1,%x3"
1925 [(set_attr "type" "vecdouble")])
1926
1927 ;; Vector conditional expressions (no scalar version for these instructions)
1928 (define_insn "vsx_eq<mode>"
1929 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1930 (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1931 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1932 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1933 "xvcmpeq<sd>p %x0,%x1,%x2"
1934 [(set_attr "type" "<VStype_simple>")])
1935
1936 (define_insn "vsx_gt<mode>"
1937 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1938 (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1939 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1940 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1941 "xvcmpgt<sd>p %x0,%x1,%x2"
1942 [(set_attr "type" "<VStype_simple>")])
1943
1944 (define_insn "*vsx_ge<mode>"
1945 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1946 (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1947 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1948 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1949 "xvcmpge<sd>p %x0,%x1,%x2"
1950 [(set_attr "type" "<VStype_simple>")])
1951
1952 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
1953 ;; indicate a combined status
1954 (define_insn "*vsx_eq_<mode>_p"
1955 [(set (reg:CC CR6_REGNO)
1956 (unspec:CC
1957 [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1958 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
1959 UNSPEC_PREDICATE))
1960 (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1961 (eq:VSX_F (match_dup 1)
1962 (match_dup 2)))]
1963 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1964 "xvcmpeq<sd>p. %x0,%x1,%x2"
1965 [(set_attr "type" "<VStype_simple>")])
1966
1967 (define_insn "*vsx_gt_<mode>_p"
1968 [(set (reg:CC CR6_REGNO)
1969 (unspec:CC
1970 [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1971 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
1972 UNSPEC_PREDICATE))
1973 (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1974 (gt:VSX_F (match_dup 1)
1975 (match_dup 2)))]
1976 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1977 "xvcmpgt<sd>p. %x0,%x1,%x2"
1978 [(set_attr "type" "<VStype_simple>")])
1979
1980 (define_insn "*vsx_ge_<mode>_p"
1981 [(set (reg:CC CR6_REGNO)
1982 (unspec:CC
1983 [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1984 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
1985 UNSPEC_PREDICATE))
1986 (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1987 (ge:VSX_F (match_dup 1)
1988 (match_dup 2)))]
1989 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1990 "xvcmpge<sd>p. %x0,%x1,%x2"
1991 [(set_attr "type" "<VStype_simple>")])
1992
1993 ;; Vector select
1994 (define_insn "*vsx_xxsel<mode>"
1995 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
1996 (if_then_else:VSX_L
1997 (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
1998 (match_operand:VSX_L 4 "zero_constant" ""))
1999 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
2000 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
2001 "VECTOR_MEM_VSX_P (<MODE>mode)"
2002 "xxsel %x0,%x3,%x2,%x1"
2003 [(set_attr "type" "vecmove")
2004 (set_attr "isa" "<VSisa>")])
2005
2006 (define_insn "*vsx_xxsel<mode>_uns"
2007 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
2008 (if_then_else:VSX_L
2009 (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
2010 (match_operand:VSX_L 4 "zero_constant" ""))
2011 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
2012 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
2013 "VECTOR_MEM_VSX_P (<MODE>mode)"
2014 "xxsel %x0,%x3,%x2,%x1"
2015 [(set_attr "type" "vecmove")
2016 (set_attr "isa" "<VSisa>")])
2017
2018 ;; Copy sign
2019 (define_insn "vsx_copysign<mode>3"
2020 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2021 (unspec:VSX_F
2022 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
2023 (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
2024 UNSPEC_COPYSIGN))]
2025 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2026 "xvcpsgn<sd>p %x0,%x2,%x1"
2027 [(set_attr "type" "<VStype_simple>")])
2028
2029 ;; For the conversions, limit the register class for the integer value to be
2030 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.
2031 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion
2032 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
2033 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md
2034 ;; in allowing virtual registers.
2035 (define_insn "vsx_float<VSi><mode>2"
2036 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa")
2037 (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))]
2038 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2039 "xvcvsx<VSc><sd>p %x0,%x1"
2040 [(set_attr "type" "<VStype_simple>")])
2041
2042 (define_insn "vsx_floatuns<VSi><mode>2"
2043 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa")
2044 (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))]
2045 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2046 "xvcvux<VSc><sd>p %x0,%x1"
2047 [(set_attr "type" "<VStype_simple>")])
2048
2049 (define_insn "vsx_fix_trunc<mode><VSi>2"
2050 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa")
2051 (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))]
2052 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2053 "x<VSv>cv<sd>psx<VSc>s %x0,%x1"
2054 [(set_attr "type" "<VStype_simple>")])
2055
2056 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
2057 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa")
2058 (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))]
2059 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2060 "x<VSv>cv<sd>pux<VSc>s %x0,%x1"
2061 [(set_attr "type" "<VStype_simple>")])
2062
2063 ;; Math rounding functions
2064 (define_insn "vsx_x<VSv>r<sd>pi"
2065 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2066 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2067 UNSPEC_VSX_ROUND_I))]
2068 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2069 "x<VSv>r<sd>pi %x0,%x1"
2070 [(set_attr "type" "<VStype_simple>")])
2071
2072 (define_insn "vsx_x<VSv>r<sd>pic"
2073 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2074 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2075 UNSPEC_VSX_ROUND_IC))]
2076 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2077 "x<VSv>r<sd>pic %x0,%x1"
2078 [(set_attr "type" "<VStype_simple>")])
2079
2080 (define_insn "vsx_btrunc<mode>2"
2081 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2082 (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
2083 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2084 "xvr<sd>piz %x0,%x1"
2085 [(set_attr "type" "<VStype_simple>")])
2086
2087 (define_insn "*vsx_b2trunc<mode>2"
2088 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2089 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2090 UNSPEC_FRIZ))]
2091 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2092 "x<VSv>r<sd>piz %x0,%x1"
2093 [(set_attr "type" "<VStype_simple>")])
2094
2095 (define_insn "vsx_floor<mode>2"
2096 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2097 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
2098 UNSPEC_FRIM))]
2099 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2100 "xvr<sd>pim %x0,%x1"
2101 [(set_attr "type" "<VStype_simple>")])
2102
2103 (define_insn "vsx_ceil<mode>2"
2104 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2105 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
2106 UNSPEC_FRIP))]
2107 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2108 "xvr<sd>pip %x0,%x1"
2109 [(set_attr "type" "<VStype_simple>")])
2110
2111 \f
2112 ;; VSX convert to/from double vector
2113
2114 ;; Convert between single and double precision
2115 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
2116 ;; scalar single precision instructions internally use the double format.
2117 ;; Prefer the altivec registers, since we likely will need to do a vperm
2118 (define_insn "vsx_xscvdpsp"
2119 [(set (match_operand:V4SF 0 "vsx_register_operand" "=f,?wa")
2120 (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "f,wa")]
2121 UNSPEC_VSX_CVSPDP))]
2122 "VECTOR_UNIT_VSX_P (DFmode)"
2123 "xscvdpsp %x0,%x1"
2124 [(set_attr "type" "fp")])
2125
2126 (define_insn "vsx_xvcvspdp_be"
2127 [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa")
2128 (float_extend:V2DF
2129 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2130 (parallel [(const_int 0) (const_int 2)]))))]
2131 "VECTOR_UNIT_VSX_P (V4SFmode) && BYTES_BIG_ENDIAN"
2132 "xvcvspdp %x0,%x1"
2133 [(set_attr "type" "vecdouble")])
2134
2135 (define_insn "vsx_xvcvspdp_le"
2136 [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa")
2137 (float_extend:V2DF
2138 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2139 (parallel [(const_int 1) (const_int 3)]))))]
2140 "VECTOR_UNIT_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
2141 "xvcvspdp %x0,%x1"
2142 [(set_attr "type" "vecdouble")])
2143
2144 (define_expand "vsx_xvcvspdp"
2145 [(match_operand:V2DF 0 "vsx_register_operand")
2146 (match_operand:V4SF 1 "vsx_register_operand")]
2147 "VECTOR_UNIT_VSX_P (V4SFmode)"
2148 {
2149 if (BYTES_BIG_ENDIAN)
2150 emit_insn (gen_vsx_xvcvspdp_be (operands[0], operands[1]));
2151 else
2152 emit_insn (gen_vsx_xvcvspdp_le (operands[0], operands[1]));
2153 DONE;
2154 })
2155
2156 (define_insn "vsx_xvcvdpsp"
2157 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,?wa")
2158 (unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "v,wa")]
2159 UNSPEC_VSX_CVSPDP))]
2160 "VECTOR_UNIT_VSX_P (V2DFmode)"
2161 "xvcvdpsp %x0,%x1"
2162 [(set_attr "type" "vecdouble")])
2163
2164 ;; xscvspdp, represent the scalar SF type as V4SF
2165 (define_insn "vsx_xscvspdp"
2166 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2167 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2168 UNSPEC_VSX_CVSPDP))]
2169 "VECTOR_UNIT_VSX_P (V4SFmode)"
2170 "xscvspdp %x0,%x1"
2171 [(set_attr "type" "fp")])
2172
2173 ;; Same as vsx_xscvspdp, but use SF as the type
2174 (define_insn "vsx_xscvspdp_scalar2"
2175 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2176 (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2177 UNSPEC_VSX_CVSPDP))]
2178 "VECTOR_UNIT_VSX_P (V4SFmode)"
2179 "xscvspdp %x0,%x1"
2180 [(set_attr "type" "fp")])
2181
2182 ;; Generate xvcvhpsp instruction
2183 (define_insn "vsx_xvcvhpsp"
2184 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2185 (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")]
2186 UNSPEC_VSX_CVHPSP))]
2187 "TARGET_P9_VECTOR"
2188 "xvcvhpsp %x0,%x1"
2189 [(set_attr "type" "vecfloat")])
2190
2191 ;; Generate xvcvsphp
2192 (define_insn "vsx_xvcvsphp"
2193 [(set (match_operand:V4SI 0 "register_operand" "=wa")
2194 (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2195 UNSPEC_VSX_XVCVSPHP))]
2196 "TARGET_P9_VECTOR"
2197 "xvcvsphp %x0,%x1"
2198 [(set_attr "type" "vecfloat")])
2199
2200 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
2201 ;; format of scalars is actually DF.
2202 (define_insn "vsx_xscvdpsp_scalar"
2203 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2204 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2205 UNSPEC_VSX_CVSPDP))]
2206 "VECTOR_UNIT_VSX_P (V4SFmode)"
2207 "xscvdpsp %x0,%x1"
2208 [(set_attr "type" "fp")])
2209
2210 ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
2211 (define_insn "vsx_xscvdpspn"
2212 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2213 (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wa")]
2214 UNSPEC_VSX_CVDPSPN))]
2215 "TARGET_XSCVDPSPN"
2216 "xscvdpspn %x0,%x1"
2217 [(set_attr "type" "fp")])
2218
2219 (define_insn "vsx_xscvspdpn"
2220 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2221 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2222 UNSPEC_VSX_CVSPDPN))]
2223 "TARGET_XSCVSPDPN"
2224 "xscvspdpn %x0,%x1"
2225 [(set_attr "type" "fp")])
2226
2227 (define_insn "vsx_xscvdpspn_scalar"
2228 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2229 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2230 UNSPEC_VSX_CVDPSPN))]
2231 "TARGET_XSCVDPSPN"
2232 "xscvdpspn %x0,%x1"
2233 [(set_attr "type" "fp")])
2234
2235 ;; Used by direct move to move a SFmode value from GPR to VSX register
2236 (define_insn "vsx_xscvspdpn_directmove"
2237 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2238 (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2239 UNSPEC_VSX_CVSPDPN))]
2240 "TARGET_XSCVSPDPN"
2241 "xscvspdpn %x0,%x1"
2242 [(set_attr "type" "fp")])
2243
2244 ;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
2245
2246 (define_insn "vsx_xvcv<su>xwsp"
2247 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2248 (any_float:V4SF (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
2249 "VECTOR_UNIT_VSX_P (V4SFmode)"
2250 "xvcv<su>xwsp %x0,%x1"
2251 [(set_attr "type" "vecfloat")])
2252
2253 (define_insn "vsx_xvcv<su>xddp"
2254 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2255 (any_float:V2DF (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
2256 "VECTOR_UNIT_VSX_P (V2DFmode)"
2257 "xvcv<su>xddp %x0,%x1"
2258 [(set_attr "type" "vecdouble")])
2259
2260 (define_insn "vsx_xvcvsp<su>xws"
2261 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2262 (any_fix:V4SI (match_operand:V4SF 1 "vsx_register_operand" "wa")))]
2263 "VECTOR_UNIT_VSX_P (V4SFmode)"
2264 "xvcvsp<su>xws %x0,%x1"
2265 [(set_attr "type" "vecfloat")])
2266
2267 (define_insn "vsx_xvcvdp<su>xds"
2268 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2269 (any_fix:V2DI (match_operand:V2DF 1 "vsx_register_operand" "wa")))]
2270 "VECTOR_UNIT_VSX_P (V2DFmode)"
2271 "xvcvdp<su>xds %x0,%x1"
2272 [(set_attr "type" "vecdouble")])
2273
2274 (define_expand "vsx_xvcvsxddp_scale"
2275 [(match_operand:V2DF 0 "vsx_register_operand")
2276 (match_operand:V2DI 1 "vsx_register_operand")
2277 (match_operand:QI 2 "immediate_operand")]
2278 "VECTOR_UNIT_VSX_P (V2DFmode)"
2279 {
2280 rtx op0 = operands[0];
2281 rtx op1 = operands[1];
2282 int scale = INTVAL(operands[2]);
2283 emit_insn (gen_vsx_xvcvsxddp (op0, op1));
2284 if (scale != 0)
2285 rs6000_scale_v2df (op0, op0, -scale);
2286 DONE;
2287 })
2288
2289 (define_expand "vsx_xvcvuxddp_scale"
2290 [(match_operand:V2DF 0 "vsx_register_operand")
2291 (match_operand:V2DI 1 "vsx_register_operand")
2292 (match_operand:QI 2 "immediate_operand")]
2293 "VECTOR_UNIT_VSX_P (V2DFmode)"
2294 {
2295 rtx op0 = operands[0];
2296 rtx op1 = operands[1];
2297 int scale = INTVAL(operands[2]);
2298 emit_insn (gen_vsx_xvcvuxddp (op0, op1));
2299 if (scale != 0)
2300 rs6000_scale_v2df (op0, op0, -scale);
2301 DONE;
2302 })
2303
2304 (define_expand "vsx_xvcvdpsxds_scale"
2305 [(match_operand:V2DI 0 "vsx_register_operand")
2306 (match_operand:V2DF 1 "vsx_register_operand")
2307 (match_operand:QI 2 "immediate_operand")]
2308 "VECTOR_UNIT_VSX_P (V2DFmode)"
2309 {
2310 rtx op0 = operands[0];
2311 rtx op1 = operands[1];
2312 rtx tmp;
2313 int scale = INTVAL (operands[2]);
2314 if (scale == 0)
2315 tmp = op1;
2316 else
2317 {
2318 tmp = gen_reg_rtx (V2DFmode);
2319 rs6000_scale_v2df (tmp, op1, scale);
2320 }
2321 emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
2322 DONE;
2323 })
2324
2325 ;; convert vector of 64-bit floating point numbers to vector of
2326 ;; 64-bit unsigned integer
2327 (define_expand "vsx_xvcvdpuxds_scale"
2328 [(match_operand:V2DI 0 "vsx_register_operand")
2329 (match_operand:V2DF 1 "vsx_register_operand")
2330 (match_operand:QI 2 "immediate_operand")]
2331 "VECTOR_UNIT_VSX_P (V2DFmode)"
2332 {
2333 rtx op0 = operands[0];
2334 rtx op1 = operands[1];
2335 rtx tmp;
2336 int scale = INTVAL (operands[2]);
2337 if (scale == 0)
2338 tmp = op1;
2339 else
2340 {
2341 tmp = gen_reg_rtx (V2DFmode);
2342 rs6000_scale_v2df (tmp, op1, scale);
2343 }
2344 emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
2345 DONE;
2346 })
2347
2348 ;; Convert from 64-bit to 32-bit types
2349 ;; Note, favor the Altivec registers since the usual use of these instructions
2350 ;; is in vector converts and we need to use the Altivec vperm instruction.
2351
2352 (define_insn "vsx_xvcvdpsxws"
2353 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2354 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")]
2355 UNSPEC_VSX_CVDPSXWS))]
2356 "VECTOR_UNIT_VSX_P (V2DFmode)"
2357 "xvcvdpsxws %x0,%x1"
2358 [(set_attr "type" "vecdouble")])
2359
2360 (define_insn "vsx_xvcvdpuxws"
2361 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2362 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")]
2363 UNSPEC_VSX_CVDPUXWS))]
2364 "VECTOR_UNIT_VSX_P (V2DFmode)"
2365 "xvcvdpuxws %x0,%x1"
2366 [(set_attr "type" "vecdouble")])
2367
2368 (define_insn "vsx_xvcvsxdsp"
2369 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2370 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2371 UNSPEC_VSX_CVSXDSP))]
2372 "VECTOR_UNIT_VSX_P (V2DFmode)"
2373 "xvcvsxdsp %x0,%x1"
2374 [(set_attr "type" "vecfloat")])
2375
2376 (define_insn "vsx_xvcvuxdsp"
2377 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2378 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2379 UNSPEC_VSX_CVUXDSP))]
2380 "VECTOR_UNIT_VSX_P (V2DFmode)"
2381 "xvcvuxdsp %x0,%x1"
2382 [(set_attr "type" "vecdouble")])
2383
2384 ;; Convert vector of 32-bit signed/unsigned integers to vector of
2385 ;; 64-bit floating point numbers.
2386 (define_insn "vsx_xvcv<su>xwdp_be"
2387 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2388 (any_float:V2DF
2389 (vec_select:V2SI (match_operand:V4SI 1 "vsx_register_operand" "wa")
2390 (parallel [(const_int 0) (const_int 2)]))))]
2391 "VECTOR_UNIT_VSX_P (V2DFmode) && BYTES_BIG_ENDIAN"
2392 "xvcv<su>xwdp %x0,%x1"
2393 [(set_attr "type" "vecdouble")])
2394
2395 (define_insn "vsx_xvcv<su>xwdp_le"
2396 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2397 (any_float:V2DF
2398 (vec_select:V2SI (match_operand:V4SI 1 "vsx_register_operand" "wa")
2399 (parallel [(const_int 1) (const_int 3)]))))]
2400 "VECTOR_UNIT_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
2401 "xvcv<su>xwdp %x0,%x1"
2402 [(set_attr "type" "vecdouble")])
2403
2404 (define_expand "vsx_xvcv<su>xwdp"
2405 [(match_operand:V2DF 0 "vsx_register_operand")
2406 (match_operand:V4SI 1 "vsx_register_operand")
2407 (any_float (pc))]
2408 "VECTOR_UNIT_VSX_P (V2DFmode)"
2409 {
2410 if (BYTES_BIG_ENDIAN)
2411 emit_insn (gen_vsx_xvcv<su>xwdp_be (operands[0], operands[1]));
2412 else
2413 emit_insn (gen_vsx_xvcv<su>xwdp_le (operands[0], operands[1]));
2414 DONE;
2415 })
2416
2417 (define_insn "vsx_xvcvsxwdp_df"
2418 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2419 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2420 UNSPEC_VSX_CVSXWDP))]
2421 "TARGET_VSX"
2422 "xvcvsxwdp %x0,%x1"
2423 [(set_attr "type" "vecdouble")])
2424
2425 (define_insn "vsx_xvcvuxwdp_df"
2426 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2427 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2428 UNSPEC_VSX_CVUXWDP))]
2429 "TARGET_VSX"
2430 "xvcvuxwdp %x0,%x1"
2431 [(set_attr "type" "vecdouble")])
2432
2433 ;; Convert vector of 32-bit floating point numbers to vector of
2434 ;; 64-bit signed/unsigned integers.
2435 (define_insn "vsx_xvcvsp<su>xds_be"
2436 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2437 (any_fix:V2DI
2438 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2439 (parallel [(const_int 0) (const_int 2)]))))]
2440 "VECTOR_UNIT_VSX_P (V2DFmode) && BYTES_BIG_ENDIAN"
2441 "xvcvsp<su>xds %x0,%x1"
2442 [(set_attr "type" "vecdouble")])
2443
2444 (define_insn "vsx_xvcvsp<su>xds_le"
2445 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2446 (any_fix:V2DI
2447 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2448 (parallel [(const_int 1) (const_int 3)]))))]
2449 "VECTOR_UNIT_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
2450 "xvcvsp<su>xds %x0,%x1"
2451 [(set_attr "type" "vecdouble")])
2452
2453 (define_expand "vsx_xvcvsp<su>xds"
2454 [(match_operand:V2DI 0 "vsx_register_operand")
2455 (match_operand:V4SF 1 "vsx_register_operand")
2456 (any_fix (pc))]
2457 "VECTOR_UNIT_VSX_P (V2DFmode)"
2458 {
2459 if (BYTES_BIG_ENDIAN)
2460 emit_insn (gen_vsx_xvcvsp<su>xds_be (operands[0], operands[1]));
2461 else
2462 emit_insn (gen_vsx_xvcvsp<su>xds_le (operands[0], operands[1]));
2463 DONE;
2464 })
2465
2466 ;; Generate float2 double
2467 ;; convert two double to float
2468 (define_expand "float2_v2df"
2469 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2470 (use (match_operand:V2DF 1 "register_operand" "wa"))
2471 (use (match_operand:V2DF 2 "register_operand" "wa"))]
2472 "VECTOR_UNIT_VSX_P (V4SFmode)"
2473 {
2474 rtx rtx_src1, rtx_src2, rtx_dst;
2475
2476 rtx_dst = operands[0];
2477 rtx_src1 = operands[1];
2478 rtx_src2 = operands[2];
2479
2480 rs6000_generate_float2_double_code (rtx_dst, rtx_src1, rtx_src2);
2481 DONE;
2482 })
2483
2484 ;; Generate float2
2485 ;; convert two long long signed ints to float
2486 (define_expand "float2_v2di"
2487 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2488 (use (match_operand:V2DI 1 "register_operand" "wa"))
2489 (use (match_operand:V2DI 2 "register_operand" "wa"))]
2490 "VECTOR_UNIT_VSX_P (V4SFmode)"
2491 {
2492 rtx rtx_src1, rtx_src2, rtx_dst;
2493
2494 rtx_dst = operands[0];
2495 rtx_src1 = operands[1];
2496 rtx_src2 = operands[2];
2497
2498 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2499 DONE;
2500 })
2501
2502 ;; Generate uns_float2
2503 ;; convert two long long unsigned ints to float
2504 (define_expand "uns_float2_v2di"
2505 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2506 (use (match_operand:V2DI 1 "register_operand" "wa"))
2507 (use (match_operand:V2DI 2 "register_operand" "wa"))]
2508 "VECTOR_UNIT_VSX_P (V4SFmode)"
2509 {
2510 rtx rtx_src1, rtx_src2, rtx_dst;
2511
2512 rtx_dst = operands[0];
2513 rtx_src1 = operands[1];
2514 rtx_src2 = operands[2];
2515
2516 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2517 DONE;
2518 })
2519
2520 ;; Generate floate
2521 ;; convert double or long long signed to float
2522 ;; (Only even words are valid, BE numbering)
2523 (define_expand "floate<mode>"
2524 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2525 (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2526 "VECTOR_UNIT_VSX_P (V4SFmode)"
2527 {
2528 if (BYTES_BIG_ENDIAN)
2529 {
2530 /* Shift left one word to put even word correct location */
2531 rtx rtx_tmp;
2532 rtx rtx_val = GEN_INT (4);
2533
2534 rtx_tmp = gen_reg_rtx (V4SFmode);
2535 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2536 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2537 rtx_tmp, rtx_tmp, rtx_val));
2538 }
2539 else
2540 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2541
2542 DONE;
2543 })
2544
2545 ;; Generate uns_floate
2546 ;; convert long long unsigned to float
2547 ;; (Only even words are valid, BE numbering)
2548 (define_expand "unsfloatev2di"
2549 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2550 (use (match_operand:V2DI 1 "register_operand" "wa"))]
2551 "VECTOR_UNIT_VSX_P (V4SFmode)"
2552 {
2553 if (BYTES_BIG_ENDIAN)
2554 {
2555 /* Shift left one word to put even word correct location */
2556 rtx rtx_tmp;
2557 rtx rtx_val = GEN_INT (4);
2558
2559 rtx_tmp = gen_reg_rtx (V4SFmode);
2560 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2561 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2562 rtx_tmp, rtx_tmp, rtx_val));
2563 }
2564 else
2565 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2566
2567 DONE;
2568 })
2569
2570 ;; Generate floato
2571 ;; convert double or long long signed to float
2572 ;; Only odd words are valid, BE numbering)
2573 (define_expand "floato<mode>"
2574 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2575 (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2576 "VECTOR_UNIT_VSX_P (V4SFmode)"
2577 {
2578 if (BYTES_BIG_ENDIAN)
2579 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2580 else
2581 {
2582 /* Shift left one word to put odd word correct location */
2583 rtx rtx_tmp;
2584 rtx rtx_val = GEN_INT (4);
2585
2586 rtx_tmp = gen_reg_rtx (V4SFmode);
2587 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2588 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2589 rtx_tmp, rtx_tmp, rtx_val));
2590 }
2591 DONE;
2592 })
2593
2594 ;; Generate uns_floato
2595 ;; convert long long unsigned to float
2596 ;; (Only odd words are valid, BE numbering)
2597 (define_expand "unsfloatov2di"
2598 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2599 (use (match_operand:V2DI 1 "register_operand" "wa"))]
2600 "VECTOR_UNIT_VSX_P (V4SFmode)"
2601 {
2602 if (BYTES_BIG_ENDIAN)
2603 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2604 else
2605 {
2606 /* Shift left one word to put odd word correct location */
2607 rtx rtx_tmp;
2608 rtx rtx_val = GEN_INT (4);
2609
2610 rtx_tmp = gen_reg_rtx (V4SFmode);
2611 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2612 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2613 rtx_tmp, rtx_tmp, rtx_val));
2614 }
2615 DONE;
2616 })
2617
2618 ;; Generate vsigned2
2619 ;; convert two double float vectors to a vector of single precision ints
2620 (define_expand "vsigned2_v2df"
2621 [(match_operand:V4SI 0 "register_operand" "=wa")
2622 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "wa")
2623 (match_operand:V2DF 2 "register_operand" "wa")]
2624 UNSPEC_VSX_VSIGNED2)]
2625 "TARGET_VSX"
2626 {
2627 rtx rtx_src1, rtx_src2, rtx_dst;
2628 bool signed_convert=true;
2629
2630 rtx_dst = operands[0];
2631 rtx_src1 = operands[1];
2632 rtx_src2 = operands[2];
2633
2634 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2635 DONE;
2636 })
2637
2638 ;; Generate vsignedo_v2df
2639 ;; signed double float to int convert odd word
2640 (define_expand "vsignedo_v2df"
2641 [(set (match_operand:V4SI 0 "register_operand" "=wa")
2642 (match_operand:V2DF 1 "register_operand" "wa"))]
2643 "TARGET_VSX"
2644 {
2645 if (BYTES_BIG_ENDIAN)
2646 {
2647 rtx rtx_tmp;
2648 rtx rtx_val = GEN_INT (12);
2649 rtx_tmp = gen_reg_rtx (V4SImode);
2650
2651 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2652
2653 /* Big endian word numbering for words in operand is 0 1 2 3.
2654 take (operand[1] operand[1]) and shift left one word
2655 0 1 2 3 0 1 2 3 => 1 2 3 0
2656 Words 1 and 3 are now are now where they need to be for result. */
2657
2658 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2659 rtx_tmp, rtx_val));
2660 }
2661 else
2662 /* Little endian word numbering for operand is 3 2 1 0.
2663 Result words 3 and 1 are where they need to be. */
2664 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2665
2666 DONE;
2667 }
2668 [(set_attr "type" "veccomplex")])
2669
2670 ;; Generate vsignede_v2df
2671 ;; signed double float to int even word
2672 (define_expand "vsignede_v2df"
2673 [(set (match_operand:V4SI 0 "register_operand" "=v")
2674 (match_operand:V2DF 1 "register_operand" "v"))]
2675 "TARGET_VSX"
2676 {
2677 if (BYTES_BIG_ENDIAN)
2678 /* Big endian word numbering for words in operand is 0 1
2679 Result words 0 is where they need to be. */
2680 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2681
2682 else
2683 {
2684 rtx rtx_tmp;
2685 rtx rtx_val = GEN_INT (12);
2686 rtx_tmp = gen_reg_rtx (V4SImode);
2687
2688 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2689
2690 /* Little endian word numbering for operand is 3 2 1 0.
2691 take (operand[1] operand[1]) and shift left three words
2692 0 1 2 3 0 1 2 3 => 3 0 1 2
2693 Words 0 and 2 are now where they need to be for the result. */
2694 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2695 rtx_tmp, rtx_val));
2696 }
2697 DONE;
2698 }
2699 [(set_attr "type" "veccomplex")])
2700
2701 ;; Generate unsigned2
2702 ;; convert two double float vectors to a vector of single precision
2703 ;; unsigned ints
2704 (define_expand "vunsigned2_v2df"
2705 [(match_operand:V4SI 0 "register_operand" "=v")
2706 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "v")
2707 (match_operand:V2DF 2 "register_operand" "v")]
2708 UNSPEC_VSX_VSIGNED2)]
2709 "TARGET_VSX"
2710 {
2711 rtx rtx_src1, rtx_src2, rtx_dst;
2712 bool signed_convert=false;
2713
2714 rtx_dst = operands[0];
2715 rtx_src1 = operands[1];
2716 rtx_src2 = operands[2];
2717
2718 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2719 DONE;
2720 })
2721
2722 ;; Generate vunsignedo_v2df
2723 ;; unsigned double float to int convert odd word
2724 (define_expand "vunsignedo_v2df"
2725 [(set (match_operand:V4SI 0 "register_operand" "=v")
2726 (match_operand:V2DF 1 "register_operand" "v"))]
2727 "TARGET_VSX"
2728 {
2729 if (BYTES_BIG_ENDIAN)
2730 {
2731 rtx rtx_tmp;
2732 rtx rtx_val = GEN_INT (12);
2733 rtx_tmp = gen_reg_rtx (V4SImode);
2734
2735 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2736
2737 /* Big endian word numbering for words in operand is 0 1 2 3.
2738 take (operand[1] operand[1]) and shift left one word
2739 0 1 2 3 0 1 2 3 => 1 2 3 0
2740 Words 1 and 3 are now are now where they need to be for result. */
2741
2742 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2743 rtx_tmp, rtx_val));
2744 }
2745 else
2746 /* Little endian word numbering for operand is 3 2 1 0.
2747 Result words 3 and 1 are where they need to be. */
2748 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2749
2750 DONE;
2751 }
2752 [(set_attr "type" "veccomplex")])
2753
2754 ;; Generate vunsignede_v2df
2755 ;; unsigned double float to int even word
2756 (define_expand "vunsignede_v2df"
2757 [(set (match_operand:V4SI 0 "register_operand" "=v")
2758 (match_operand:V2DF 1 "register_operand" "v"))]
2759 "TARGET_VSX"
2760 {
2761 if (BYTES_BIG_ENDIAN)
2762 /* Big endian word numbering for words in operand is 0 1
2763 Result words 0 is where they need to be. */
2764 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2765
2766 else
2767 {
2768 rtx rtx_tmp;
2769 rtx rtx_val = GEN_INT (12);
2770 rtx_tmp = gen_reg_rtx (V4SImode);
2771
2772 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2773
2774 /* Little endian word numbering for operand is 3 2 1 0.
2775 take (operand[1] operand[1]) and shift left three words
2776 0 1 2 3 0 1 2 3 => 3 0 1 2
2777 Words 0 and 2 are now where they need to be for the result. */
2778 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2779 rtx_tmp, rtx_val));
2780 }
2781 DONE;
2782 }
2783 [(set_attr "type" "veccomplex")])
2784
2785 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
2786 ;; since the xvrdpiz instruction does not truncate the value if the floating
2787 ;; point value is < LONG_MIN or > LONG_MAX.
2788 (define_insn "*vsx_float_fix_v2df2"
2789 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,?wa")
2790 (float:V2DF
2791 (fix:V2DI
2792 (match_operand:V2DF 1 "vsx_register_operand" "wa,?wa"))))]
2793 "TARGET_HARD_FLOAT
2794 && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
2795 && !flag_trapping_math && TARGET_FRIZ"
2796 "xvrdpiz %x0,%x1"
2797 [(set_attr "type" "vecdouble")])
2798
2799 \f
2800 ;; Permute operations
2801
2802 ;; Build a V2DF/V2DI vector from two scalars
2803 (define_insn "vsx_concat_<mode>"
2804 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
2805 (vec_concat:VSX_D
2806 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")
2807 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))]
2808 "VECTOR_MEM_VSX_P (<MODE>mode)"
2809 {
2810 if (which_alternative == 0)
2811 return (BYTES_BIG_ENDIAN
2812 ? "xxpermdi %x0,%x1,%x2,0"
2813 : "xxpermdi %x0,%x2,%x1,0");
2814
2815 else if (which_alternative == 1)
2816 return (BYTES_BIG_ENDIAN
2817 ? "mtvsrdd %x0,%1,%2"
2818 : "mtvsrdd %x0,%2,%1");
2819
2820 else
2821 gcc_unreachable ();
2822 }
2823 [(set_attr "type" "vecperm")])
2824
2825 ;; Combiner patterns to allow creating XXPERMDI's to access either double
2826 ;; word element in a vector register.
2827 (define_insn "*vsx_concat_<mode>_1"
2828 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2829 (vec_concat:VSX_D
2830 (vec_select:<VS_scalar>
2831 (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2832 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2833 (match_operand:<VS_scalar> 3 "gpc_reg_operand" "wa")))]
2834 "VECTOR_MEM_VSX_P (<MODE>mode)"
2835 {
2836 HOST_WIDE_INT dword = INTVAL (operands[2]);
2837 if (BYTES_BIG_ENDIAN)
2838 {
2839 operands[4] = GEN_INT (2*dword);
2840 return "xxpermdi %x0,%x1,%x3,%4";
2841 }
2842 else
2843 {
2844 operands[4] = GEN_INT (!dword);
2845 return "xxpermdi %x0,%x3,%x1,%4";
2846 }
2847 }
2848 [(set_attr "type" "vecperm")])
2849
2850 (define_insn "*vsx_concat_<mode>_2"
2851 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2852 (vec_concat:VSX_D
2853 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa")
2854 (vec_select:<VS_scalar>
2855 (match_operand:VSX_D 2 "gpc_reg_operand" "wa")
2856 (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))]
2857 "VECTOR_MEM_VSX_P (<MODE>mode)"
2858 {
2859 HOST_WIDE_INT dword = INTVAL (operands[3]);
2860 if (BYTES_BIG_ENDIAN)
2861 {
2862 operands[4] = GEN_INT (dword);
2863 return "xxpermdi %x0,%x1,%x2,%4";
2864 }
2865 else
2866 {
2867 operands[4] = GEN_INT (2 * !dword);
2868 return "xxpermdi %x0,%x2,%x1,%4";
2869 }
2870 }
2871 [(set_attr "type" "vecperm")])
2872
2873 (define_insn "*vsx_concat_<mode>_3"
2874 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2875 (vec_concat:VSX_D
2876 (vec_select:<VS_scalar>
2877 (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2878 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2879 (vec_select:<VS_scalar>
2880 (match_operand:VSX_D 3 "gpc_reg_operand" "wa")
2881 (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))]
2882 "VECTOR_MEM_VSX_P (<MODE>mode)"
2883 {
2884 HOST_WIDE_INT dword1 = INTVAL (operands[2]);
2885 HOST_WIDE_INT dword2 = INTVAL (operands[4]);
2886 if (BYTES_BIG_ENDIAN)
2887 {
2888 operands[5] = GEN_INT ((2 * dword1) + dword2);
2889 return "xxpermdi %x0,%x1,%x3,%5";
2890 }
2891 else
2892 {
2893 operands[5] = GEN_INT ((2 * !dword2) + !dword1);
2894 return "xxpermdi %x0,%x3,%x1,%5";
2895 }
2896 }
2897 [(set_attr "type" "vecperm")])
2898
2899 ;; Special purpose concat using xxpermdi to glue two single precision values
2900 ;; together, relying on the fact that internally scalar floats are represented
2901 ;; as doubles. This is used to initialize a V4SF vector with 4 floats
2902 (define_insn "vsx_concat_v2sf"
2903 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2904 (unspec:V2DF
2905 [(match_operand:SF 1 "vsx_register_operand" "wa")
2906 (match_operand:SF 2 "vsx_register_operand" "wa")]
2907 UNSPEC_VSX_CONCAT))]
2908 "VECTOR_MEM_VSX_P (V2DFmode)"
2909 {
2910 if (BYTES_BIG_ENDIAN)
2911 return "xxpermdi %x0,%x1,%x2,0";
2912 else
2913 return "xxpermdi %x0,%x2,%x1,0";
2914 }
2915 [(set_attr "type" "vecperm")])
2916
2917 ;; Concatenate 4 SImode elements into a V4SImode reg.
2918 (define_expand "vsx_init_v4si"
2919 [(use (match_operand:V4SI 0 "gpc_reg_operand"))
2920 (use (match_operand:SI 1 "gpc_reg_operand"))
2921 (use (match_operand:SI 2 "gpc_reg_operand"))
2922 (use (match_operand:SI 3 "gpc_reg_operand"))
2923 (use (match_operand:SI 4 "gpc_reg_operand"))]
2924 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
2925 {
2926 rtx a = gen_reg_rtx (DImode);
2927 rtx b = gen_reg_rtx (DImode);
2928 rtx c = gen_reg_rtx (DImode);
2929 rtx d = gen_reg_rtx (DImode);
2930 emit_insn (gen_zero_extendsidi2 (a, operands[1]));
2931 emit_insn (gen_zero_extendsidi2 (b, operands[2]));
2932 emit_insn (gen_zero_extendsidi2 (c, operands[3]));
2933 emit_insn (gen_zero_extendsidi2 (d, operands[4]));
2934 if (!BYTES_BIG_ENDIAN)
2935 {
2936 std::swap (a, b);
2937 std::swap (c, d);
2938 }
2939
2940 rtx aa = gen_reg_rtx (DImode);
2941 rtx ab = gen_reg_rtx (DImode);
2942 rtx cc = gen_reg_rtx (DImode);
2943 rtx cd = gen_reg_rtx (DImode);
2944 emit_insn (gen_ashldi3 (aa, a, GEN_INT (32)));
2945 emit_insn (gen_ashldi3 (cc, c, GEN_INT (32)));
2946 emit_insn (gen_iordi3 (ab, aa, b));
2947 emit_insn (gen_iordi3 (cd, cc, d));
2948
2949 rtx abcd = gen_reg_rtx (V2DImode);
2950 emit_insn (gen_vsx_concat_v2di (abcd, ab, cd));
2951 emit_move_insn (operands[0], gen_lowpart (V4SImode, abcd));
2952 DONE;
2953 })
2954
2955 ;; xxpermdi for little endian loads and stores. We need several of
2956 ;; these since the form of the PARALLEL differs by mode.
2957 (define_insn "*vsx_xxpermdi2_le_<mode>"
2958 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2959 (vec_select:VSX_D
2960 (match_operand:VSX_D 1 "vsx_register_operand" "wa")
2961 (parallel [(const_int 1) (const_int 0)])))]
2962 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
2963 "xxpermdi %x0,%x1,%x1,2"
2964 [(set_attr "type" "vecperm")])
2965
2966 (define_insn "xxswapd_v16qi"
2967 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
2968 (vec_select:V16QI
2969 (match_operand:V16QI 1 "vsx_register_operand" "wa")
2970 (parallel [(const_int 8) (const_int 9)
2971 (const_int 10) (const_int 11)
2972 (const_int 12) (const_int 13)
2973 (const_int 14) (const_int 15)
2974 (const_int 0) (const_int 1)
2975 (const_int 2) (const_int 3)
2976 (const_int 4) (const_int 5)
2977 (const_int 6) (const_int 7)])))]
2978 "TARGET_VSX"
2979 ;; AIX does not support the extended mnemonic xxswapd. Use the basic
2980 ;; mnemonic xxpermdi instead.
2981 "xxpermdi %x0,%x1,%x1,2"
2982 [(set_attr "type" "vecperm")])
2983
2984 (define_insn "xxswapd_v8hi"
2985 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
2986 (vec_select:V8HI
2987 (match_operand:V8HI 1 "vsx_register_operand" "wa")
2988 (parallel [(const_int 4) (const_int 5)
2989 (const_int 6) (const_int 7)
2990 (const_int 0) (const_int 1)
2991 (const_int 2) (const_int 3)])))]
2992 "TARGET_VSX"
2993 ;; AIX does not support the extended mnemonic xxswapd. Use the basic
2994 ;; mnemonic xxpermdi instead.
2995 "xxpermdi %x0,%x1,%x1,2"
2996 [(set_attr "type" "vecperm")])
2997
2998 (define_insn "xxswapd_<mode>"
2999 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
3000 (vec_select:VSX_W
3001 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
3002 (parallel [(const_int 2) (const_int 3)
3003 (const_int 0) (const_int 1)])))]
3004 "TARGET_VSX"
3005 ;; AIX does not support extended mnemonic xxswapd. Use the basic
3006 ;; mnemonic xxpermdi instead.
3007 "xxpermdi %x0,%x1,%x1,2"
3008 [(set_attr "type" "vecperm")])
3009
3010 (define_insn "xxswapd_<mode>"
3011 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3012 (vec_select:VSX_D
3013 (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3014 (parallel [(const_int 1) (const_int 0)])))]
3015 "TARGET_VSX"
3016 ;; AIX does not support extended mnemonic xxswapd. Use the basic
3017 ;; mnemonic xxpermdi instead.
3018 "xxpermdi %x0,%x1,%x1,2"
3019 [(set_attr "type" "vecperm")])
3020
3021 (define_insn "xxgenpcvm_<mode>_internal"
3022 [(set (match_operand:VSX_EXTRACT_I4 0 "altivec_register_operand" "=wa")
3023 (unspec:VSX_EXTRACT_I4
3024 [(match_operand:VSX_EXTRACT_I4 1 "altivec_register_operand" "v")
3025 (match_operand:QI 2 "const_0_to_3_operand" "n")]
3026 UNSPEC_XXGENPCV))]
3027 "TARGET_POWER10 && TARGET_64BIT"
3028 "xxgenpcv<wd>m %x0,%1,%2"
3029 [(set_attr "type" "vecsimple")])
3030
3031 (define_expand "xxgenpcvm_<mode>"
3032 [(use (match_operand:VSX_EXTRACT_I4 0 "register_operand"))
3033 (use (match_operand:VSX_EXTRACT_I4 1 "register_operand"))
3034 (use (match_operand:QI 2 "immediate_operand"))]
3035 "TARGET_POWER10"
3036 {
3037 if (!BYTES_BIG_ENDIAN)
3038 {
3039 /* gen_xxgenpcvm assumes Big Endian order. If LE,
3040 change swap upper and lower double words. */
3041 rtx tmp = gen_reg_rtx (<MODE>mode);
3042
3043 emit_insn (gen_xxswapd_<mode> (tmp, operands[1]));
3044 operands[1] = tmp;
3045 }
3046 emit_insn (gen_xxgenpcvm_<mode>_internal (operands[0], operands[1],
3047 operands[2]));
3048 DONE;
3049 })
3050
3051 ;; lxvd2x for little endian loads. We need several of
3052 ;; these since the form of the PARALLEL differs by mode.
3053 (define_insn "*vsx_lxvd2x2_le_<mode>"
3054 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3055 (vec_select:VSX_D
3056 (match_operand:VSX_D 1 "memory_operand" "Z")
3057 (parallel [(const_int 1) (const_int 0)])))]
3058 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3059 "lxvd2x %x0,%y1"
3060 [(set_attr "type" "vecload")])
3061
3062 (define_insn "*vsx_lxvd2x4_le_<mode>"
3063 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
3064 (vec_select:VSX_W
3065 (match_operand:VSX_W 1 "memory_operand" "Z")
3066 (parallel [(const_int 2) (const_int 3)
3067 (const_int 0) (const_int 1)])))]
3068 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3069 "lxvd2x %x0,%y1"
3070 [(set_attr "type" "vecload")])
3071
3072 (define_insn "*vsx_lxvd2x8_le_V8HI"
3073 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3074 (vec_select:V8HI
3075 (match_operand:V8HI 1 "memory_operand" "Z")
3076 (parallel [(const_int 4) (const_int 5)
3077 (const_int 6) (const_int 7)
3078 (const_int 0) (const_int 1)
3079 (const_int 2) (const_int 3)])))]
3080 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3081 "lxvd2x %x0,%y1"
3082 [(set_attr "type" "vecload")])
3083
3084 (define_insn "*vsx_lxvd2x16_le_V16QI"
3085 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3086 (vec_select:V16QI
3087 (match_operand:V16QI 1 "memory_operand" "Z")
3088 (parallel [(const_int 8) (const_int 9)
3089 (const_int 10) (const_int 11)
3090 (const_int 12) (const_int 13)
3091 (const_int 14) (const_int 15)
3092 (const_int 0) (const_int 1)
3093 (const_int 2) (const_int 3)
3094 (const_int 4) (const_int 5)
3095 (const_int 6) (const_int 7)])))]
3096 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3097 "lxvd2x %x0,%y1"
3098 [(set_attr "type" "vecload")])
3099
3100 ;; stxvd2x for little endian stores. We need several of
3101 ;; these since the form of the PARALLEL differs by mode.
3102 (define_insn "*vsx_stxvd2x2_le_<mode>"
3103 [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
3104 (vec_select:VSX_D
3105 (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3106 (parallel [(const_int 1) (const_int 0)])))]
3107 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3108 "stxvd2x %x1,%y0"
3109 [(set_attr "type" "vecstore")])
3110
3111 (define_insn "*vsx_stxvd2x4_le_<mode>"
3112 [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
3113 (vec_select:VSX_W
3114 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
3115 (parallel [(const_int 2) (const_int 3)
3116 (const_int 0) (const_int 1)])))]
3117 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3118 "stxvd2x %x1,%y0"
3119 [(set_attr "type" "vecstore")])
3120
3121 (define_insn "*vsx_stxvd2x8_le_V8HI"
3122 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
3123 (vec_select:V8HI
3124 (match_operand:V8HI 1 "vsx_register_operand" "wa")
3125 (parallel [(const_int 4) (const_int 5)
3126 (const_int 6) (const_int 7)
3127 (const_int 0) (const_int 1)
3128 (const_int 2) (const_int 3)])))]
3129 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3130 "stxvd2x %x1,%y0"
3131 [(set_attr "type" "vecstore")])
3132
3133 (define_insn "*vsx_stxvd2x16_le_V16QI"
3134 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
3135 (vec_select:V16QI
3136 (match_operand:V16QI 1 "vsx_register_operand" "wa")
3137 (parallel [(const_int 8) (const_int 9)
3138 (const_int 10) (const_int 11)
3139 (const_int 12) (const_int 13)
3140 (const_int 14) (const_int 15)
3141 (const_int 0) (const_int 1)
3142 (const_int 2) (const_int 3)
3143 (const_int 4) (const_int 5)
3144 (const_int 6) (const_int 7)])))]
3145 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3146 "stxvd2x %x1,%y0"
3147 [(set_attr "type" "vecstore")])
3148
3149 ;; Convert a TImode value into V1TImode
3150 (define_expand "vsx_set_v1ti"
3151 [(match_operand:V1TI 0 "nonimmediate_operand")
3152 (match_operand:V1TI 1 "nonimmediate_operand")
3153 (match_operand:TI 2 "input_operand")
3154 (match_operand:QI 3 "u5bit_cint_operand")]
3155 "VECTOR_MEM_VSX_P (V1TImode)"
3156 {
3157 if (operands[3] != const0_rtx)
3158 gcc_unreachable ();
3159
3160 emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
3161 DONE;
3162 })
3163
3164 ;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT
3165 (define_expand "vsx_set_<mode>"
3166 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
3167 (use (match_operand:VSX_D 1 "vsx_register_operand"))
3168 (use (match_operand:<VS_scalar> 2 "gpc_reg_operand"))
3169 (use (match_operand:QI 3 "const_0_to_1_operand"))]
3170 "VECTOR_MEM_VSX_P (<MODE>mode)"
3171 {
3172 rtx dest = operands[0];
3173 rtx vec_reg = operands[1];
3174 rtx value = operands[2];
3175 rtx ele = operands[3];
3176 rtx tmp = gen_reg_rtx (<VS_scalar>mode);
3177
3178 if (ele == const0_rtx)
3179 {
3180 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx));
3181 emit_insn (gen_vsx_concat_<mode> (dest, value, tmp));
3182 DONE;
3183 }
3184 else if (ele == const1_rtx)
3185 {
3186 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx));
3187 emit_insn (gen_vsx_concat_<mode> (dest, tmp, value));
3188 DONE;
3189 }
3190 else
3191 gcc_unreachable ();
3192 })
3193
3194 ;; Extract a DF/DI element from V2DF/V2DI
3195 ;; Optimize cases were we can do a simple or direct move.
3196 ;; Or see if we can avoid doing the move at all
3197
3198 ;; There are some unresolved problems with reload that show up if an Altivec
3199 ;; register was picked. Limit the scalar value to FPRs for now.
3200
3201 (define_insn "vsx_extract_<mode>"
3202 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d, d, wr, wr")
3203 (vec_select:<VS_scalar>
3204 (match_operand:VSX_D 1 "gpc_reg_operand" "wa, wa, wa, wa")
3205 (parallel
3206 [(match_operand:QI 2 "const_0_to_1_operand" "wD, n, wD, n")])))]
3207 "VECTOR_MEM_VSX_P (<MODE>mode)"
3208 {
3209 int element = INTVAL (operands[2]);
3210 int op0_regno = REGNO (operands[0]);
3211 int op1_regno = REGNO (operands[1]);
3212 int fldDM;
3213
3214 gcc_assert (IN_RANGE (element, 0, 1));
3215 gcc_assert (VSX_REGNO_P (op1_regno));
3216
3217 if (element == VECTOR_ELEMENT_SCALAR_64BIT)
3218 {
3219 if (op0_regno == op1_regno)
3220 return ASM_COMMENT_START " vec_extract to same register";
3221
3222 else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE
3223 && TARGET_POWERPC64)
3224 return "mfvsrd %0,%x1";
3225
3226 else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
3227 return "fmr %0,%1";
3228
3229 else if (VSX_REGNO_P (op0_regno))
3230 return "xxlor %x0,%x1,%x1";
3231
3232 else
3233 gcc_unreachable ();
3234 }
3235
3236 else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno)
3237 && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE)
3238 return "mfvsrld %0,%x1";
3239
3240 else if (VSX_REGNO_P (op0_regno))
3241 {
3242 fldDM = element << 1;
3243 if (!BYTES_BIG_ENDIAN)
3244 fldDM = 3 - fldDM;
3245 operands[3] = GEN_INT (fldDM);
3246 return "xxpermdi %x0,%x1,%x1,%3";
3247 }
3248
3249 else
3250 gcc_unreachable ();
3251 }
3252 [(set_attr "type" "veclogical,mftgpr,mftgpr,vecperm")
3253 (set_attr "isa" "*,*,p8v,p9v")])
3254
3255 ;; Optimize extracting a single scalar element from memory.
3256 (define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load"
3257 [(set (match_operand:<VS_scalar> 0 "register_operand" "=wa,wr")
3258 (vec_select:<VSX_D:VS_scalar>
3259 (match_operand:VSX_D 1 "memory_operand" "m,m")
3260 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))
3261 (clobber (match_scratch:P 3 "=&b,&b"))]
3262 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)"
3263 "#"
3264 "&& reload_completed"
3265 [(set (match_dup 0) (match_dup 4))]
3266 {
3267 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3268 operands[3], <VSX_D:VS_scalar>mode);
3269 }
3270 [(set_attr "type" "fpload,load")
3271 (set_attr "length" "8")])
3272
3273 ;; Optimize storing a single scalar element that is the right location to
3274 ;; memory
3275 (define_insn "*vsx_extract_<mode>_store"
3276 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,wY")
3277 (vec_select:<VS_scalar>
3278 (match_operand:VSX_D 1 "register_operand" "d,v,v")
3279 (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
3280 "VECTOR_MEM_VSX_P (<MODE>mode)"
3281 "@
3282 stfd%U0%X0 %1,%0
3283 stxsdx %x1,%y0
3284 stxsd %1,%0"
3285 [(set_attr "type" "fpstore")
3286 (set_attr "isa" "*,p7v,p9v")])
3287
3288 ;; Variable V2DI/V2DF extract shift
3289 (define_insn "vsx_vslo_<mode>"
3290 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
3291 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3292 (match_operand:V2DI 2 "gpc_reg_operand" "v")]
3293 UNSPEC_VSX_VSLO))]
3294 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3295 "vslo %0,%1,%2"
3296 [(set_attr "type" "vecperm")])
3297
3298 ;; Variable V2DI/V2DF extract from a register
3299 (define_insn_and_split "vsx_extract_<mode>_var"
3300 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
3301 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3302 (match_operand:DI 2 "gpc_reg_operand" "r")]
3303 UNSPEC_VSX_EXTRACT))
3304 (clobber (match_scratch:DI 3 "=r"))
3305 (clobber (match_scratch:V2DI 4 "=&v"))]
3306 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3307 "#"
3308 "&& reload_completed"
3309 [(const_int 0)]
3310 {
3311 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3312 operands[3], operands[4]);
3313 DONE;
3314 })
3315
3316 ;; Variable V2DI/V2DF extract from memory
3317 (define_insn_and_split "*vsx_extract_<mode>_var_load"
3318 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=wa,r")
3319 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "memory_operand" "Q,Q")
3320 (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3321 UNSPEC_VSX_EXTRACT))
3322 (clobber (match_scratch:DI 3 "=&b,&b"))]
3323 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3324 "#"
3325 "&& reload_completed"
3326 [(set (match_dup 0) (match_dup 4))]
3327 {
3328 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3329 operands[3], <VS_scalar>mode);
3330 }
3331 [(set_attr "type" "fpload,load")])
3332
3333 ;; Extract a SF element from V4SF
3334 (define_insn_and_split "vsx_extract_v4sf"
3335 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
3336 (vec_select:SF
3337 (match_operand:V4SF 1 "vsx_register_operand" "wa")
3338 (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")])))
3339 (clobber (match_scratch:V4SF 3 "=0"))]
3340 "VECTOR_UNIT_VSX_P (V4SFmode)"
3341 "#"
3342 "&& 1"
3343 [(const_int 0)]
3344 {
3345 rtx op0 = operands[0];
3346 rtx op1 = operands[1];
3347 rtx op2 = operands[2];
3348 rtx op3 = operands[3];
3349 rtx tmp;
3350 HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
3351
3352 if (ele == 0)
3353 tmp = op1;
3354 else
3355 {
3356 if (GET_CODE (op3) == SCRATCH)
3357 op3 = gen_reg_rtx (V4SFmode);
3358 emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
3359 tmp = op3;
3360 }
3361 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
3362 DONE;
3363 }
3364 [(set_attr "length" "8")
3365 (set_attr "type" "fp")])
3366
3367 (define_insn_and_split "*vsx_extract_v4sf_<mode>_load"
3368 [(set (match_operand:SF 0 "register_operand" "=f,v,v,?r")
3369 (vec_select:SF
3370 (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
3371 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
3372 (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
3373 "VECTOR_MEM_VSX_P (V4SFmode)"
3374 "#"
3375 "&& reload_completed"
3376 [(set (match_dup 0) (match_dup 4))]
3377 {
3378 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3379 operands[3], SFmode);
3380 }
3381 [(set_attr "type" "fpload,fpload,fpload,load")
3382 (set_attr "length" "8")
3383 (set_attr "isa" "*,p7v,p9v,*")])
3384
3385 ;; Variable V4SF extract from a register
3386 (define_insn_and_split "vsx_extract_v4sf_var"
3387 [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
3388 (unspec:SF [(match_operand:V4SF 1 "gpc_reg_operand" "v")
3389 (match_operand:DI 2 "gpc_reg_operand" "r")]
3390 UNSPEC_VSX_EXTRACT))
3391 (clobber (match_scratch:DI 3 "=r"))
3392 (clobber (match_scratch:V2DI 4 "=&v"))]
3393 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3394 "#"
3395 "&& reload_completed"
3396 [(const_int 0)]
3397 {
3398 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3399 operands[3], operands[4]);
3400 DONE;
3401 })
3402
3403 ;; Variable V4SF extract from memory
3404 (define_insn_and_split "*vsx_extract_v4sf_var_load"
3405 [(set (match_operand:SF 0 "gpc_reg_operand" "=wa,?r")
3406 (unspec:SF [(match_operand:V4SF 1 "memory_operand" "Q,Q")
3407 (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3408 UNSPEC_VSX_EXTRACT))
3409 (clobber (match_scratch:DI 3 "=&b,&b"))]
3410 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3411 "#"
3412 "&& reload_completed"
3413 [(set (match_dup 0) (match_dup 4))]
3414 {
3415 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3416 operands[3], SFmode);
3417 }
3418 [(set_attr "type" "fpload,load")])
3419
3420 ;; Expand the builtin form of xxpermdi to canonical rtl.
3421 (define_expand "vsx_xxpermdi_<mode>"
3422 [(match_operand:VSX_L 0 "vsx_register_operand")
3423 (match_operand:VSX_L 1 "vsx_register_operand")
3424 (match_operand:VSX_L 2 "vsx_register_operand")
3425 (match_operand:QI 3 "u5bit_cint_operand")]
3426 "VECTOR_MEM_VSX_P (<MODE>mode)"
3427 {
3428 rtx target = operands[0];
3429 rtx op0 = operands[1];
3430 rtx op1 = operands[2];
3431 int mask = INTVAL (operands[3]);
3432 rtx perm0 = GEN_INT ((mask >> 1) & 1);
3433 rtx perm1 = GEN_INT ((mask & 1) + 2);
3434 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3435
3436 if (<MODE>mode == V2DFmode)
3437 gen = gen_vsx_xxpermdi2_v2df_1;
3438 else
3439 {
3440 gen = gen_vsx_xxpermdi2_v2di_1;
3441 if (<MODE>mode != V2DImode)
3442 {
3443 target = gen_lowpart (V2DImode, target);
3444 op0 = gen_lowpart (V2DImode, op0);
3445 op1 = gen_lowpart (V2DImode, op1);
3446 }
3447 }
3448 emit_insn (gen (target, op0, op1, perm0, perm1));
3449 DONE;
3450 })
3451
3452 ;; Special version of xxpermdi that retains big-endian semantics.
3453 (define_expand "vsx_xxpermdi_<mode>_be"
3454 [(match_operand:VSX_L 0 "vsx_register_operand")
3455 (match_operand:VSX_L 1 "vsx_register_operand")
3456 (match_operand:VSX_L 2 "vsx_register_operand")
3457 (match_operand:QI 3 "u5bit_cint_operand")]
3458 "VECTOR_MEM_VSX_P (<MODE>mode)"
3459 {
3460 rtx target = operands[0];
3461 rtx op0 = operands[1];
3462 rtx op1 = operands[2];
3463 int mask = INTVAL (operands[3]);
3464 rtx perm0 = GEN_INT ((mask >> 1) & 1);
3465 rtx perm1 = GEN_INT ((mask & 1) + 2);
3466 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3467
3468 if (<MODE>mode == V2DFmode)
3469 gen = gen_vsx_xxpermdi2_v2df_1;
3470 else
3471 {
3472 gen = gen_vsx_xxpermdi2_v2di_1;
3473 if (<MODE>mode != V2DImode)
3474 {
3475 target = gen_lowpart (V2DImode, target);
3476 op0 = gen_lowpart (V2DImode, op0);
3477 op1 = gen_lowpart (V2DImode, op1);
3478 }
3479 }
3480 /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
3481 transformation we don't want; it is necessary for
3482 rs6000_expand_vec_perm_const_1 but not for this use. So we
3483 prepare for that by reversing the transformation here. */
3484 if (BYTES_BIG_ENDIAN)
3485 emit_insn (gen (target, op0, op1, perm0, perm1));
3486 else
3487 {
3488 rtx p0 = GEN_INT (3 - INTVAL (perm1));
3489 rtx p1 = GEN_INT (3 - INTVAL (perm0));
3490 emit_insn (gen (target, op1, op0, p0, p1));
3491 }
3492 DONE;
3493 })
3494
3495 (define_insn "vsx_xxpermdi2_<mode>_1"
3496 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3497 (vec_select:VSX_D
3498 (vec_concat:<VS_double>
3499 (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3500 (match_operand:VSX_D 2 "vsx_register_operand" "wa"))
3501 (parallel [(match_operand 3 "const_0_to_1_operand" "")
3502 (match_operand 4 "const_2_to_3_operand" "")])))]
3503 "VECTOR_MEM_VSX_P (<MODE>mode)"
3504 {
3505 int op3, op4, mask;
3506
3507 /* For little endian, swap operands and invert/swap selectors
3508 to get the correct xxpermdi. The operand swap sets up the
3509 inputs as a little endian array. The selectors are swapped
3510 because they are defined to use big endian ordering. The
3511 selectors are inverted to get the correct doublewords for
3512 little endian ordering. */
3513 if (BYTES_BIG_ENDIAN)
3514 {
3515 op3 = INTVAL (operands[3]);
3516 op4 = INTVAL (operands[4]);
3517 }
3518 else
3519 {
3520 op3 = 3 - INTVAL (operands[4]);
3521 op4 = 3 - INTVAL (operands[3]);
3522 }
3523
3524 mask = (op3 << 1) | (op4 - 2);
3525 operands[3] = GEN_INT (mask);
3526
3527 if (BYTES_BIG_ENDIAN)
3528 return "xxpermdi %x0,%x1,%x2,%3";
3529 else
3530 return "xxpermdi %x0,%x2,%x1,%3";
3531 }
3532 [(set_attr "type" "vecperm")])
3533
3534 ;; Extraction of a single element in a small integer vector. Until ISA 3.0,
3535 ;; none of the small types were allowed in a vector register, so we had to
3536 ;; extract to a DImode and either do a direct move or store.
3537 (define_expand "vsx_extract_<mode>"
3538 [(parallel [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand")
3539 (vec_select:<VS_scalar>
3540 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
3541 (parallel [(match_operand:QI 2 "const_int_operand")])))
3542 (clobber (match_scratch:VSX_EXTRACT_I 3))])]
3543 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3544 {
3545 /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}. */
3546 if (TARGET_P9_VECTOR)
3547 {
3548 emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1],
3549 operands[2]));
3550 DONE;
3551 }
3552 })
3553
3554 (define_insn "vsx_extract_<mode>_p9"
3555 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>")
3556 (vec_select:<VS_scalar>
3557 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>")
3558 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
3559 (clobber (match_scratch:SI 3 "=r,X"))]
3560 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3561 {
3562 if (which_alternative == 0)
3563 return "#";
3564
3565 else
3566 {
3567 HOST_WIDE_INT elt = INTVAL (operands[2]);
3568 HOST_WIDE_INT elt_adj = (!BYTES_BIG_ENDIAN
3569 ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
3570 : elt);
3571
3572 HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
3573 HOST_WIDE_INT offset = unit_size * elt_adj;
3574
3575 operands[2] = GEN_INT (offset);
3576 if (unit_size == 4)
3577 return "xxextractuw %x0,%x1,%2";
3578 else
3579 return "vextractu<wd> %0,%1,%2";
3580 }
3581 }
3582 [(set_attr "type" "vecsimple")
3583 (set_attr "isa" "p9v,*")])
3584
3585 (define_split
3586 [(set (match_operand:<VS_scalar> 0 "int_reg_operand")
3587 (vec_select:<VS_scalar>
3588 (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand")
3589 (parallel [(match_operand:QI 2 "const_int_operand")])))
3590 (clobber (match_operand:SI 3 "int_reg_operand"))]
3591 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed"
3592 [(const_int 0)]
3593 {
3594 rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0]));
3595 rtx op1 = operands[1];
3596 rtx op2 = operands[2];
3597 rtx op3 = operands[3];
3598 HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode);
3599
3600 emit_move_insn (op3, GEN_INT (offset));
3601 if (BYTES_BIG_ENDIAN)
3602 emit_insn (gen_vextu<wd>lx (op0_si, op3, op1));
3603 else
3604 emit_insn (gen_vextu<wd>rx (op0_si, op3, op1));
3605 DONE;
3606 })
3607
3608 ;; Optimize zero extracts to eliminate the AND after the extract.
3609 (define_insn_and_split "*vsx_extract_<mode>_di_p9"
3610 [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>")
3611 (zero_extend:DI
3612 (vec_select:<VS_scalar>
3613 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>")
3614 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))
3615 (clobber (match_scratch:SI 3 "=r,X"))]
3616 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3617 "#"
3618 "&& reload_completed"
3619 [(parallel [(set (match_dup 4)
3620 (vec_select:<VS_scalar>
3621 (match_dup 1)
3622 (parallel [(match_dup 2)])))
3623 (clobber (match_dup 3))])]
3624 {
3625 operands[4] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0]));
3626 }
3627 [(set_attr "isa" "p9v,*")])
3628
3629 ;; Optimize stores to use the ISA 3.0 scalar store instructions
3630 (define_insn_and_split "*vsx_extract_<mode>_store_p9"
3631 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z,m")
3632 (vec_select:<VS_scalar>
3633 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v")
3634 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))
3635 (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&r"))
3636 (clobber (match_scratch:SI 4 "=X,&r"))]
3637 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3638 "#"
3639 "&& reload_completed"
3640 [(parallel [(set (match_dup 3)
3641 (vec_select:<VS_scalar>
3642 (match_dup 1)
3643 (parallel [(match_dup 2)])))
3644 (clobber (match_dup 4))])
3645 (set (match_dup 0)
3646 (match_dup 3))])
3647
3648 (define_insn_and_split "*vsx_extract_si"
3649 [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z")
3650 (vec_select:SI
3651 (match_operand:V4SI 1 "gpc_reg_operand" "v,v,v")
3652 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
3653 (clobber (match_scratch:V4SI 3 "=v,v,v"))]
3654 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR"
3655 "#"
3656 "&& reload_completed"
3657 [(const_int 0)]
3658 {
3659 rtx dest = operands[0];
3660 rtx src = operands[1];
3661 rtx element = operands[2];
3662 rtx vec_tmp = operands[3];
3663 int value;
3664
3665 if (!BYTES_BIG_ENDIAN)
3666 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3667
3668 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3669 instruction. */
3670 value = INTVAL (element);
3671 if (value != 1)
3672 emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
3673 else
3674 vec_tmp = src;
3675
3676 if (MEM_P (operands[0]))
3677 {
3678 if (can_create_pseudo_p ())
3679 dest = rs6000_force_indexed_or_indirect_mem (dest);
3680
3681 if (TARGET_P8_VECTOR)
3682 emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3683 else
3684 emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp))));
3685 }
3686
3687 else if (TARGET_P8_VECTOR)
3688 emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3689 else
3690 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3691 gen_rtx_REG (DImode, REGNO (vec_tmp)));
3692
3693 DONE;
3694 }
3695 [(set_attr "type" "mftgpr,vecperm,fpstore")
3696 (set_attr "length" "8")
3697 (set_attr "isa" "*,p8v,*")])
3698
3699 (define_insn_and_split "*vsx_extract_<mode>_p8"
3700 [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r")
3701 (vec_select:<VS_scalar>
3702 (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v")
3703 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3704 (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))]
3705 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3706 && !TARGET_P9_VECTOR"
3707 "#"
3708 "&& reload_completed"
3709 [(const_int 0)]
3710 {
3711 rtx dest = operands[0];
3712 rtx src = operands[1];
3713 rtx element = operands[2];
3714 rtx vec_tmp = operands[3];
3715 int value;
3716
3717 if (!BYTES_BIG_ENDIAN)
3718 element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element));
3719
3720 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3721 instruction. */
3722 value = INTVAL (element);
3723 if (<MODE>mode == V16QImode)
3724 {
3725 if (value != 7)
3726 emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element));
3727 else
3728 vec_tmp = src;
3729 }
3730 else if (<MODE>mode == V8HImode)
3731 {
3732 if (value != 3)
3733 emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));
3734 else
3735 vec_tmp = src;
3736 }
3737 else
3738 gcc_unreachable ();
3739
3740 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3741 gen_rtx_REG (DImode, REGNO (vec_tmp)));
3742 DONE;
3743 }
3744 [(set_attr "type" "mftgpr")])
3745
3746 ;; Optimize extracting a single scalar element from memory.
3747 (define_insn_and_split "*vsx_extract_<mode>_load"
3748 [(set (match_operand:<VS_scalar> 0 "register_operand" "=r")
3749 (vec_select:<VS_scalar>
3750 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
3751 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3752 (clobber (match_scratch:DI 3 "=&b"))]
3753 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3754 "#"
3755 "&& reload_completed"
3756 [(set (match_dup 0) (match_dup 4))]
3757 {
3758 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3759 operands[3], <VS_scalar>mode);
3760 }
3761 [(set_attr "type" "load")
3762 (set_attr "length" "8")])
3763
3764 ;; Variable V16QI/V8HI/V4SI extract from a register
3765 (define_insn_and_split "vsx_extract_<mode>_var"
3766 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r")
3767 (unspec:<VS_scalar>
3768 [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,v")
3769 (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3770 UNSPEC_VSX_EXTRACT))
3771 (clobber (match_scratch:DI 3 "=r,r"))
3772 (clobber (match_scratch:V2DI 4 "=X,&v"))]
3773 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3774 "#"
3775 "&& reload_completed"
3776 [(const_int 0)]
3777 {
3778 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3779 operands[3], operands[4]);
3780 DONE;
3781 }
3782 [(set_attr "isa" "p9v,*")])
3783
3784 ;; Variable V16QI/V8HI/V4SI extract from memory
3785 (define_insn_and_split "*vsx_extract_<mode>_var_load"
3786 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r")
3787 (unspec:<VS_scalar>
3788 [(match_operand:VSX_EXTRACT_I 1 "memory_operand" "Q")
3789 (match_operand:DI 2 "gpc_reg_operand" "r")]
3790 UNSPEC_VSX_EXTRACT))
3791 (clobber (match_scratch:DI 3 "=&b"))]
3792 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3793 "#"
3794 "&& reload_completed"
3795 [(set (match_dup 0) (match_dup 4))]
3796 {
3797 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3798 operands[3], <VS_scalar>mode);
3799 }
3800 [(set_attr "type" "load")])
3801
3802 ;; VSX_EXTRACT optimizations
3803 ;; Optimize double d = (double) vec_extract (vi, <n>)
3804 ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
3805 (define_insn_and_split "*vsx_extract_si_<uns>float_df"
3806 [(set (match_operand:DF 0 "gpc_reg_operand" "=wa")
3807 (any_float:DF
3808 (vec_select:SI
3809 (match_operand:V4SI 1 "gpc_reg_operand" "v")
3810 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3811 (clobber (match_scratch:V4SI 3 "=v"))]
3812 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3813 "#"
3814 "&& 1"
3815 [(const_int 0)]
3816 {
3817 rtx dest = operands[0];
3818 rtx src = operands[1];
3819 rtx element = operands[2];
3820 rtx v4si_tmp = operands[3];
3821 int value;
3822
3823 if (!BYTES_BIG_ENDIAN)
3824 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3825
3826 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3827 instruction. */
3828 value = INTVAL (element);
3829 if (value != 0)
3830 {
3831 if (GET_CODE (v4si_tmp) == SCRATCH)
3832 v4si_tmp = gen_reg_rtx (V4SImode);
3833 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3834 }
3835 else
3836 v4si_tmp = src;
3837
3838 emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));
3839 DONE;
3840 })
3841
3842 ;; Optimize <type> f = (<type>) vec_extract (vi, <n>)
3843 ;; where <type> is a floating point type that supported by the hardware that is
3844 ;; not double. First convert the value to double, and then to the desired
3845 ;; type.
3846 (define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"
3847 [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=wa")
3848 (any_float:VSX_EXTRACT_FL
3849 (vec_select:SI
3850 (match_operand:V4SI 1 "gpc_reg_operand" "v")
3851 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3852 (clobber (match_scratch:V4SI 3 "=v"))
3853 (clobber (match_scratch:DF 4 "=wa"))]
3854 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3855 "#"
3856 "&& 1"
3857 [(const_int 0)]
3858 {
3859 rtx dest = operands[0];
3860 rtx src = operands[1];
3861 rtx element = operands[2];
3862 rtx v4si_tmp = operands[3];
3863 rtx df_tmp = operands[4];
3864 int value;
3865
3866 if (!BYTES_BIG_ENDIAN)
3867 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3868
3869 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3870 instruction. */
3871 value = INTVAL (element);
3872 if (value != 0)
3873 {
3874 if (GET_CODE (v4si_tmp) == SCRATCH)
3875 v4si_tmp = gen_reg_rtx (V4SImode);
3876 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3877 }
3878 else
3879 v4si_tmp = src;
3880
3881 if (GET_CODE (df_tmp) == SCRATCH)
3882 df_tmp = gen_reg_rtx (DFmode);
3883
3884 emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));
3885
3886 if (<MODE>mode == SFmode)
3887 emit_insn (gen_truncdfsf2 (dest, df_tmp));
3888 else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))
3889 emit_insn (gen_extenddftf2_vsx (dest, df_tmp));
3890 else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)
3891 && TARGET_FLOAT128_HW)
3892 emit_insn (gen_extenddftf2_hw (dest, df_tmp));
3893 else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))
3894 emit_insn (gen_extenddfif2 (dest, df_tmp));
3895 else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)
3896 emit_insn (gen_extenddfkf2_hw (dest, df_tmp));
3897 else
3898 gcc_unreachable ();
3899
3900 DONE;
3901 })
3902
3903 ;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>)
3904 ;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE
3905 ;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char,
3906 ;; vector short or vector unsigned short.
3907 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>"
3908 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa")
3909 (float:FL_CONV
3910 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3911 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3912 (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3913 (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3914 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3915 && TARGET_P9_VECTOR"
3916 "#"
3917 "&& reload_completed"
3918 [(parallel [(set (match_dup 3)
3919 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3920 (match_dup 1)
3921 (parallel [(match_dup 2)])))
3922 (clobber (scratch:SI))])
3923 (set (match_dup 4)
3924 (sign_extend:DI (match_dup 3)))
3925 (set (match_dup 0)
3926 (float:<FL_CONV:MODE> (match_dup 4)))]
3927 {
3928 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3929 }
3930 [(set_attr "isa" "<FL_CONV:VSisa>")])
3931
3932 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>"
3933 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa")
3934 (unsigned_float:FL_CONV
3935 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3936 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3937 (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3938 (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3939 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3940 && TARGET_P9_VECTOR"
3941 "#"
3942 "&& reload_completed"
3943 [(parallel [(set (match_dup 3)
3944 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3945 (match_dup 1)
3946 (parallel [(match_dup 2)])))
3947 (clobber (scratch:SI))])
3948 (set (match_dup 0)
3949 (float:<FL_CONV:MODE> (match_dup 4)))]
3950 {
3951 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3952 }
3953 [(set_attr "isa" "<FL_CONV:VSisa>")])
3954
3955 ;; V4SI/V8HI/V16QI set operation on ISA 3.0
3956 (define_insn "vsx_set_<mode>_p9"
3957 [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>")
3958 (unspec:VSX_EXTRACT_I
3959 [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0")
3960 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VSX_EX>")
3961 (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")]
3962 UNSPEC_VSX_SET))]
3963 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3964 {
3965 int ele = INTVAL (operands[3]);
3966 int nunits = GET_MODE_NUNITS (<MODE>mode);
3967
3968 if (!BYTES_BIG_ENDIAN)
3969 ele = nunits - 1 - ele;
3970
3971 operands[3] = GEN_INT (GET_MODE_SIZE (<VS_scalar>mode) * ele);
3972 if (<MODE>mode == V4SImode)
3973 return "xxinsertw %x0,%x2,%3";
3974 else
3975 return "vinsert<wd> %0,%2,%3";
3976 }
3977 [(set_attr "type" "vecperm")])
3978
3979 (define_insn_and_split "vsx_set_v4sf_p9"
3980 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3981 (unspec:V4SF
3982 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3983 (match_operand:SF 2 "gpc_reg_operand" "wa")
3984 (match_operand:QI 3 "const_0_to_3_operand" "n")]
3985 UNSPEC_VSX_SET))
3986 (clobber (match_scratch:SI 4 "=&wa"))]
3987 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3988 "#"
3989 "&& reload_completed"
3990 [(set (match_dup 5)
3991 (unspec:V4SF [(match_dup 2)]
3992 UNSPEC_VSX_CVDPSPN))
3993 (parallel [(set (match_dup 4)
3994 (vec_select:SI (match_dup 6)
3995 (parallel [(match_dup 7)])))
3996 (clobber (scratch:SI))])
3997 (set (match_dup 8)
3998 (unspec:V4SI [(match_dup 8)
3999 (match_dup 4)
4000 (match_dup 3)]
4001 UNSPEC_VSX_SET))]
4002 {
4003 unsigned int tmp_regno = reg_or_subregno (operands[4]);
4004
4005 operands[5] = gen_rtx_REG (V4SFmode, tmp_regno);
4006 operands[6] = gen_rtx_REG (V4SImode, tmp_regno);
4007 operands[7] = GEN_INT (BYTES_BIG_ENDIAN ? 0 : 3);
4008 operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
4009 }
4010 [(set_attr "type" "vecperm")
4011 (set_attr "length" "12")
4012 (set_attr "isa" "p9v")])
4013
4014 ;; Special case setting 0.0f to a V4SF element
4015 (define_insn_and_split "*vsx_set_v4sf_p9_zero"
4016 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4017 (unspec:V4SF
4018 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4019 (match_operand:SF 2 "zero_fp_constant" "j")
4020 (match_operand:QI 3 "const_0_to_3_operand" "n")]
4021 UNSPEC_VSX_SET))
4022 (clobber (match_scratch:SI 4 "=&wa"))]
4023 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
4024 "#"
4025 "&& reload_completed"
4026 [(set (match_dup 4)
4027 (const_int 0))
4028 (set (match_dup 5)
4029 (unspec:V4SI [(match_dup 5)
4030 (match_dup 4)
4031 (match_dup 3)]
4032 UNSPEC_VSX_SET))]
4033 {
4034 operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
4035 }
4036 [(set_attr "type" "vecperm")
4037 (set_attr "length" "8")
4038 (set_attr "isa" "p9v")])
4039
4040 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element
4041 ;; that is in the default scalar position (1 for big endian, 2 for little
4042 ;; endian). We just need to do an xxinsertw since the element is in the
4043 ;; correct location.
4044
4045 (define_insn "*vsx_insert_extract_v4sf_p9"
4046 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4047 (unspec:V4SF
4048 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4049 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4050 (parallel
4051 [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4052 (match_operand:QI 4 "const_0_to_3_operand" "n")]
4053 UNSPEC_VSX_SET))]
4054 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64
4055 && (INTVAL (operands[3]) == (BYTES_BIG_ENDIAN ? 1 : 2))"
4056 {
4057 int ele = INTVAL (operands[4]);
4058
4059 if (!BYTES_BIG_ENDIAN)
4060 ele = GET_MODE_NUNITS (V4SFmode) - 1 - ele;
4061
4062 operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele);
4063 return "xxinsertw %x0,%x2,%4";
4064 }
4065 [(set_attr "type" "vecperm")])
4066
4067 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element
4068 ;; that is in the default scalar position (1 for big endian, 2 for little
4069 ;; endian). Convert the insert/extract to int and avoid doing the conversion.
4070
4071 (define_insn_and_split "*vsx_insert_extract_v4sf_p9_2"
4072 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4073 (unspec:V4SF
4074 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4075 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4076 (parallel
4077 [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4078 (match_operand:QI 4 "const_0_to_3_operand" "n")]
4079 UNSPEC_VSX_SET))
4080 (clobber (match_scratch:SI 5 "=&wa"))]
4081 "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode)
4082 && TARGET_P9_VECTOR && TARGET_POWERPC64
4083 && (INTVAL (operands[3]) != (BYTES_BIG_ENDIAN ? 1 : 2))"
4084 "#"
4085 "&& 1"
4086 [(parallel [(set (match_dup 5)
4087 (vec_select:SI (match_dup 6)
4088 (parallel [(match_dup 3)])))
4089 (clobber (scratch:SI))])
4090 (set (match_dup 7)
4091 (unspec:V4SI [(match_dup 8)
4092 (match_dup 5)
4093 (match_dup 4)]
4094 UNSPEC_VSX_SET))]
4095 {
4096 if (GET_CODE (operands[5]) == SCRATCH)
4097 operands[5] = gen_reg_rtx (SImode);
4098
4099 operands[6] = gen_lowpart (V4SImode, operands[2]);
4100 operands[7] = gen_lowpart (V4SImode, operands[0]);
4101 operands[8] = gen_lowpart (V4SImode, operands[1]);
4102 }
4103 [(set_attr "type" "vecperm")
4104 (set_attr "isa" "p9v")])
4105
4106 ;; Expanders for builtins
4107 (define_expand "vsx_mergel_<mode>"
4108 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4109 (use (match_operand:VSX_D 1 "vsx_register_operand"))
4110 (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4111 "VECTOR_MEM_VSX_P (<MODE>mode)"
4112 {
4113 rtvec v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
4114 rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4115 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4116 emit_insn (gen_rtx_SET (operands[0], x));
4117 DONE;
4118 })
4119
4120 (define_expand "vsx_mergeh_<mode>"
4121 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4122 (use (match_operand:VSX_D 1 "vsx_register_operand"))
4123 (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4124 "VECTOR_MEM_VSX_P (<MODE>mode)"
4125 {
4126 rtvec v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
4127 rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4128 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4129 emit_insn (gen_rtx_SET (operands[0], x));
4130 DONE;
4131 })
4132
4133 ;; V2DF/V2DI splat
4134 ;; We separate the register splat insn from the memory splat insn to force the
4135 ;; register allocator to generate the indexed form of the SPLAT when it is
4136 ;; given an offsettable memory reference. Otherwise, if the register and
4137 ;; memory insns were combined into a single insn, the register allocator will
4138 ;; load the value into a register, and then do a double word permute.
4139 (define_expand "vsx_splat_<mode>"
4140 [(set (match_operand:VSX_D 0 "vsx_register_operand")
4141 (vec_duplicate:VSX_D
4142 (match_operand:<VS_scalar> 1 "input_operand")))]
4143 "VECTOR_MEM_VSX_P (<MODE>mode)"
4144 {
4145 rtx op1 = operands[1];
4146 if (MEM_P (op1))
4147 operands[1] = rs6000_force_indexed_or_indirect_mem (op1);
4148 else if (!REG_P (op1))
4149 op1 = force_reg (<VSX_D:VS_scalar>mode, op1);
4150 })
4151
4152 (define_insn "vsx_splat_<mode>_reg"
4153 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
4154 (vec_duplicate:VSX_D
4155 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")))]
4156 "VECTOR_MEM_VSX_P (<MODE>mode)"
4157 "@
4158 xxpermdi %x0,%x1,%x1,0
4159 mtvsrdd %x0,%1,%1"
4160 [(set_attr "type" "vecperm")])
4161
4162 (define_insn "vsx_splat_<mode>_mem"
4163 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4164 (vec_duplicate:VSX_D
4165 (match_operand:<VSX_D:VS_scalar> 1 "memory_operand" "Z")))]
4166 "VECTOR_MEM_VSX_P (<MODE>mode)"
4167 "lxvdsx %x0,%y1"
4168 [(set_attr "type" "vecload")])
4169
4170 ;; V4SI splat support
4171 (define_insn "vsx_splat_v4si"
4172 [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we")
4173 (vec_duplicate:V4SI
4174 (match_operand:SI 1 "splat_input_operand" "r,Z")))]
4175 "TARGET_P9_VECTOR"
4176 "@
4177 mtvsrws %x0,%1
4178 lxvwsx %x0,%y1"
4179 [(set_attr "type" "vecperm,vecload")])
4180
4181 ;; SImode is not currently allowed in vector registers. This pattern
4182 ;; allows us to use direct move to get the value in a vector register
4183 ;; so that we can use XXSPLTW
4184 (define_insn "vsx_splat_v4si_di"
4185 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we")
4186 (vec_duplicate:V4SI
4187 (truncate:SI
4188 (match_operand:DI 1 "gpc_reg_operand" "wa,r"))))]
4189 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4190 "@
4191 xxspltw %x0,%x1,1
4192 mtvsrws %x0,%1"
4193 [(set_attr "type" "vecperm")
4194 (set_attr "isa" "p8v,*")])
4195
4196 ;; V4SF splat (ISA 3.0)
4197 (define_insn_and_split "vsx_splat_v4sf"
4198 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
4199 (vec_duplicate:V4SF
4200 (match_operand:SF 1 "splat_input_operand" "Z,wa,r")))]
4201 "TARGET_P9_VECTOR"
4202 "@
4203 lxvwsx %x0,%y1
4204 #
4205 mtvsrws %x0,%1"
4206 "&& reload_completed && vsx_register_operand (operands[1], SFmode)"
4207 [(set (match_dup 0)
4208 (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))
4209 (set (match_dup 0)
4210 (unspec:V4SF [(match_dup 0)
4211 (const_int 0)] UNSPEC_VSX_XXSPLTW))]
4212 ""
4213 [(set_attr "type" "vecload,vecperm,mftgpr")
4214 (set_attr "length" "*,8,*")
4215 (set_attr "isa" "*,p8v,*")])
4216
4217 ;; V4SF/V4SI splat from a vector element
4218 (define_insn "vsx_xxspltw_<mode>"
4219 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4220 (vec_duplicate:VSX_W
4221 (vec_select:<VS_scalar>
4222 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4223 (parallel
4224 [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))]
4225 "VECTOR_MEM_VSX_P (<MODE>mode)"
4226 {
4227 if (!BYTES_BIG_ENDIAN)
4228 operands[2] = GEN_INT (3 - INTVAL (operands[2]));
4229
4230 return "xxspltw %x0,%x1,%2";
4231 }
4232 [(set_attr "type" "vecperm")])
4233
4234 (define_insn "vsx_xxspltw_<mode>_direct"
4235 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4236 (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wa")
4237 (match_operand:QI 2 "u5bit_cint_operand" "i")]
4238 UNSPEC_VSX_XXSPLTW))]
4239 "VECTOR_MEM_VSX_P (<MODE>mode)"
4240 "xxspltw %x0,%x1,%2"
4241 [(set_attr "type" "vecperm")])
4242
4243 ;; V16QI/V8HI splat support on ISA 2.07
4244 (define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di"
4245 [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v")
4246 (vec_duplicate:VSX_SPLAT_I
4247 (truncate:<VS_scalar>
4248 (match_operand:DI 1 "altivec_register_operand" "v"))))]
4249 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
4250 "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>"
4251 [(set_attr "type" "vecperm")])
4252
4253 ;; V2DF/V2DI splat for use by vec_splat builtin
4254 (define_insn "vsx_xxspltd_<mode>"
4255 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4256 (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
4257 (match_operand:QI 2 "u5bit_cint_operand" "i")]
4258 UNSPEC_VSX_XXSPLTD))]
4259 "VECTOR_MEM_VSX_P (<MODE>mode)"
4260 {
4261 if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)
4262 || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))
4263 return "xxpermdi %x0,%x1,%x1,0";
4264 else
4265 return "xxpermdi %x0,%x1,%x1,3";
4266 }
4267 [(set_attr "type" "vecperm")])
4268
4269 ;; V4SF/V4SI interleave
4270 (define_insn "vsx_xxmrghw_<mode>"
4271 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4272 (vec_select:VSX_W
4273 (vec_concat:<VS_double>
4274 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4275 (match_operand:VSX_W 2 "vsx_register_operand" "wa"))
4276 (parallel [(const_int 0) (const_int 4)
4277 (const_int 1) (const_int 5)])))]
4278 "VECTOR_MEM_VSX_P (<MODE>mode)"
4279 {
4280 if (BYTES_BIG_ENDIAN)
4281 return "xxmrghw %x0,%x1,%x2";
4282 else
4283 return "xxmrglw %x0,%x2,%x1";
4284 }
4285 [(set_attr "type" "vecperm")])
4286
4287 (define_insn "vsx_xxmrglw_<mode>"
4288 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4289 (vec_select:VSX_W
4290 (vec_concat:<VS_double>
4291 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4292 (match_operand:VSX_W 2 "vsx_register_operand" "wa"))
4293 (parallel [(const_int 2) (const_int 6)
4294 (const_int 3) (const_int 7)])))]
4295 "VECTOR_MEM_VSX_P (<MODE>mode)"
4296 {
4297 if (BYTES_BIG_ENDIAN)
4298 return "xxmrglw %x0,%x1,%x2";
4299 else
4300 return "xxmrghw %x0,%x2,%x1";
4301 }
4302 [(set_attr "type" "vecperm")])
4303
4304 ;; Shift left double by word immediate
4305 (define_insn "vsx_xxsldwi_<mode>"
4306 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=wa")
4307 (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "wa")
4308 (match_operand:VSX_L 2 "vsx_register_operand" "wa")
4309 (match_operand:QI 3 "u5bit_cint_operand" "i")]
4310 UNSPEC_VSX_SLDWI))]
4311 "VECTOR_MEM_VSX_P (<MODE>mode)"
4312 "xxsldwi %x0,%x1,%x2,%3"
4313 [(set_attr "type" "vecperm")
4314 (set_attr "isa" "<VSisa>")])
4315
4316 \f
4317 ;; Vector reduction insns and splitters
4318
4319 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"
4320 [(set (match_operand:V2DF 0 "vfloat_operand" "=&wa,wa")
4321 (VEC_reduc:V2DF
4322 (vec_concat:V2DF
4323 (vec_select:DF
4324 (match_operand:V2DF 1 "vfloat_operand" "wa,wa")
4325 (parallel [(const_int 1)]))
4326 (vec_select:DF
4327 (match_dup 1)
4328 (parallel [(const_int 0)])))
4329 (match_dup 1)))
4330 (clobber (match_scratch:V2DF 2 "=0,&wa"))]
4331 "VECTOR_UNIT_VSX_P (V2DFmode)"
4332 "#"
4333 ""
4334 [(const_int 0)]
4335 {
4336 rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
4337 ? gen_reg_rtx (V2DFmode)
4338 : operands[2];
4339 emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
4340 emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
4341 DONE;
4342 }
4343 [(set_attr "length" "8")
4344 (set_attr "type" "veccomplex")])
4345
4346 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"
4347 [(set (match_operand:V4SF 0 "vfloat_operand" "=wa")
4348 (VEC_reduc:V4SF
4349 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4350 (match_operand:V4SF 1 "vfloat_operand" "wa")))
4351 (clobber (match_scratch:V4SF 2 "=&wa"))
4352 (clobber (match_scratch:V4SF 3 "=&wa"))]
4353 "VECTOR_UNIT_VSX_P (V4SFmode)"
4354 "#"
4355 ""
4356 [(const_int 0)]
4357 {
4358 rtx op0 = operands[0];
4359 rtx op1 = operands[1];
4360 rtx tmp2, tmp3, tmp4;
4361
4362 if (can_create_pseudo_p ())
4363 {
4364 tmp2 = gen_reg_rtx (V4SFmode);
4365 tmp3 = gen_reg_rtx (V4SFmode);
4366 tmp4 = gen_reg_rtx (V4SFmode);
4367 }
4368 else
4369 {
4370 tmp2 = operands[2];
4371 tmp3 = operands[3];
4372 tmp4 = tmp2;
4373 }
4374
4375 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4376 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4377 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4378 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
4379 DONE;
4380 }
4381 [(set_attr "length" "16")
4382 (set_attr "type" "veccomplex")])
4383
4384 ;; Combiner patterns with the vector reduction patterns that knows we can get
4385 ;; to the top element of the V2DF array without doing an extract.
4386
4387 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
4388 [(set (match_operand:DF 0 "vfloat_operand" "=&wa,wa")
4389 (vec_select:DF
4390 (VEC_reduc:V2DF
4391 (vec_concat:V2DF
4392 (vec_select:DF
4393 (match_operand:V2DF 1 "vfloat_operand" "wa,wa")
4394 (parallel [(const_int 1)]))
4395 (vec_select:DF
4396 (match_dup 1)
4397 (parallel [(const_int 0)])))
4398 (match_dup 1))
4399 (parallel [(const_int 1)])))
4400 (clobber (match_scratch:DF 2 "=0,&wa"))]
4401 "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V2DFmode)"
4402 "#"
4403 ""
4404 [(const_int 0)]
4405 {
4406 rtx hi = gen_highpart (DFmode, operands[1]);
4407 rtx lo = (GET_CODE (operands[2]) == SCRATCH)
4408 ? gen_reg_rtx (DFmode)
4409 : operands[2];
4410
4411 emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
4412 emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
4413 DONE;
4414 }
4415 [(set_attr "length" "8")
4416 (set_attr "type" "veccomplex")])
4417
4418 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
4419 [(set (match_operand:SF 0 "vfloat_operand" "=f")
4420 (vec_select:SF
4421 (VEC_reduc:V4SF
4422 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4423 (match_operand:V4SF 1 "vfloat_operand" "wa"))
4424 (parallel [(const_int 3)])))
4425 (clobber (match_scratch:V4SF 2 "=&wa"))
4426 (clobber (match_scratch:V4SF 3 "=&wa"))
4427 (clobber (match_scratch:V4SF 4 "=0"))]
4428 "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V4SFmode)"
4429 "#"
4430 ""
4431 [(const_int 0)]
4432 {
4433 rtx op0 = operands[0];
4434 rtx op1 = operands[1];
4435 rtx tmp2, tmp3, tmp4, tmp5;
4436
4437 if (can_create_pseudo_p ())
4438 {
4439 tmp2 = gen_reg_rtx (V4SFmode);
4440 tmp3 = gen_reg_rtx (V4SFmode);
4441 tmp4 = gen_reg_rtx (V4SFmode);
4442 tmp5 = gen_reg_rtx (V4SFmode);
4443 }
4444 else
4445 {
4446 tmp2 = operands[2];
4447 tmp3 = operands[3];
4448 tmp4 = tmp2;
4449 tmp5 = operands[4];
4450 }
4451
4452 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4453 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4454 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4455 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
4456 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
4457 DONE;
4458 }
4459 [(set_attr "length" "20")
4460 (set_attr "type" "veccomplex")])
4461
4462 \f
4463 ;; Power8 Vector fusion. The fused ops must be physically adjacent.
4464 (define_peephole
4465 [(set (match_operand:P 0 "base_reg_operand")
4466 (match_operand:P 1 "short_cint_operand"))
4467 (set (match_operand:VSX_M 2 "vsx_register_operand")
4468 (mem:VSX_M (plus:P (match_dup 0)
4469 (match_operand:P 3 "int_reg_operand"))))]
4470 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4471 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4472 [(set_attr "length" "8")
4473 (set_attr "type" "vecload")])
4474
4475 (define_peephole
4476 [(set (match_operand:P 0 "base_reg_operand")
4477 (match_operand:P 1 "short_cint_operand"))
4478 (set (match_operand:VSX_M 2 "vsx_register_operand")
4479 (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand")
4480 (match_dup 0))))]
4481 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4482 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4483 [(set_attr "length" "8")
4484 (set_attr "type" "vecload")])
4485
4486 \f
4487 ;; ISA 3.0 vector extend sign support
4488
4489 (define_insn "vsx_sign_extend_qi_<mode>"
4490 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4491 (unspec:VSINT_84
4492 [(match_operand:V16QI 1 "vsx_register_operand" "v")]
4493 UNSPEC_VSX_SIGN_EXTEND))]
4494 "TARGET_P9_VECTOR"
4495 "vextsb2<wd> %0,%1"
4496 [(set_attr "type" "vecexts")])
4497
4498 (define_insn "vsx_sign_extend_hi_<mode>"
4499 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4500 (unspec:VSINT_84
4501 [(match_operand:V8HI 1 "vsx_register_operand" "v")]
4502 UNSPEC_VSX_SIGN_EXTEND))]
4503 "TARGET_P9_VECTOR"
4504 "vextsh2<wd> %0,%1"
4505 [(set_attr "type" "vecexts")])
4506
4507 (define_insn "*vsx_sign_extend_si_v2di"
4508 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
4509 (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
4510 UNSPEC_VSX_SIGN_EXTEND))]
4511 "TARGET_P9_VECTOR"
4512 "vextsw2d %0,%1"
4513 [(set_attr "type" "vecexts")])
4514
4515 \f
4516 ;; ISA 3.0 Binary Floating-Point Support
4517
4518 ;; VSX Scalar Extract Exponent Quad-Precision
4519 (define_insn "xsxexpqp_<mode>"
4520 [(set (match_operand:DI 0 "altivec_register_operand" "=v")
4521 (unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4522 UNSPEC_VSX_SXEXPDP))]
4523 "TARGET_P9_VECTOR"
4524 "xsxexpqp %0,%1"
4525 [(set_attr "type" "vecmove")])
4526
4527 ;; VSX Scalar Extract Exponent Double-Precision
4528 (define_insn "xsxexpdp"
4529 [(set (match_operand:DI 0 "register_operand" "=r")
4530 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4531 UNSPEC_VSX_SXEXPDP))]
4532 "TARGET_P9_VECTOR && TARGET_64BIT"
4533 "xsxexpdp %0,%x1"
4534 [(set_attr "type" "integer")])
4535
4536 ;; VSX Scalar Extract Significand Quad-Precision
4537 (define_insn "xsxsigqp_<mode>"
4538 [(set (match_operand:TI 0 "altivec_register_operand" "=v")
4539 (unspec:TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4540 UNSPEC_VSX_SXSIG))]
4541 "TARGET_P9_VECTOR"
4542 "xsxsigqp %0,%1"
4543 [(set_attr "type" "vecmove")])
4544
4545 ;; VSX Scalar Extract Significand Double-Precision
4546 (define_insn "xsxsigdp"
4547 [(set (match_operand:DI 0 "register_operand" "=r")
4548 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4549 UNSPEC_VSX_SXSIG))]
4550 "TARGET_P9_VECTOR && TARGET_64BIT"
4551 "xsxsigdp %0,%x1"
4552 [(set_attr "type" "integer")])
4553
4554 ;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument
4555 (define_insn "xsiexpqpf_<mode>"
4556 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4557 (unspec:IEEE128
4558 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4559 (match_operand:DI 2 "altivec_register_operand" "v")]
4560 UNSPEC_VSX_SIEXPQP))]
4561 "TARGET_P9_VECTOR"
4562 "xsiexpqp %0,%1,%2"
4563 [(set_attr "type" "vecmove")])
4564
4565 ;; VSX Scalar Insert Exponent Quad-Precision
4566 (define_insn "xsiexpqp_<mode>"
4567 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4568 (unspec:IEEE128 [(match_operand:TI 1 "altivec_register_operand" "v")
4569 (match_operand:DI 2 "altivec_register_operand" "v")]
4570 UNSPEC_VSX_SIEXPQP))]
4571 "TARGET_P9_VECTOR"
4572 "xsiexpqp %0,%1,%2"
4573 [(set_attr "type" "vecmove")])
4574
4575 ;; VSX Scalar Insert Exponent Double-Precision
4576 (define_insn "xsiexpdp"
4577 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4578 (unspec:DF [(match_operand:DI 1 "register_operand" "r")
4579 (match_operand:DI 2 "register_operand" "r")]
4580 UNSPEC_VSX_SIEXPDP))]
4581 "TARGET_P9_VECTOR && TARGET_64BIT"
4582 "xsiexpdp %x0,%1,%2"
4583 [(set_attr "type" "fpsimple")])
4584
4585 ;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument
4586 (define_insn "xsiexpdpf"
4587 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4588 (unspec:DF [(match_operand:DF 1 "register_operand" "r")
4589 (match_operand:DI 2 "register_operand" "r")]
4590 UNSPEC_VSX_SIEXPDP))]
4591 "TARGET_P9_VECTOR && TARGET_64BIT"
4592 "xsiexpdp %x0,%1,%2"
4593 [(set_attr "type" "fpsimple")])
4594
4595 ;; VSX Scalar Compare Exponents Double-Precision
4596 (define_expand "xscmpexpdp_<code>"
4597 [(set (match_dup 3)
4598 (compare:CCFP
4599 (unspec:DF
4600 [(match_operand:DF 1 "vsx_register_operand" "wa")
4601 (match_operand:DF 2 "vsx_register_operand" "wa")]
4602 UNSPEC_VSX_SCMPEXPDP)
4603 (const_int 0)))
4604 (set (match_operand:SI 0 "register_operand" "=r")
4605 (CMP_TEST:SI (match_dup 3)
4606 (const_int 0)))]
4607 "TARGET_P9_VECTOR"
4608 {
4609 if (<CODE> == UNORDERED && !HONOR_NANS (DFmode))
4610 {
4611 emit_move_insn (operands[0], const0_rtx);
4612 DONE;
4613 }
4614
4615 operands[3] = gen_reg_rtx (CCFPmode);
4616 })
4617
4618 (define_insn "*xscmpexpdp"
4619 [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
4620 (compare:CCFP
4621 (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa")
4622 (match_operand:DF 2 "vsx_register_operand" "wa")]
4623 UNSPEC_VSX_SCMPEXPDP)
4624 (match_operand:SI 3 "zero_constant" "j")))]
4625 "TARGET_P9_VECTOR"
4626 "xscmpexpdp %0,%x1,%x2"
4627 [(set_attr "type" "fpcompare")])
4628
4629 ;; VSX Scalar Compare Exponents Quad-Precision
4630 (define_expand "xscmpexpqp_<code>_<mode>"
4631 [(set (match_dup 3)
4632 (compare:CCFP
4633 (unspec:IEEE128
4634 [(match_operand:IEEE128 1 "vsx_register_operand" "v")
4635 (match_operand:IEEE128 2 "vsx_register_operand" "v")]
4636 UNSPEC_VSX_SCMPEXPQP)
4637 (const_int 0)))
4638 (set (match_operand:SI 0 "register_operand" "=r")
4639 (CMP_TEST:SI (match_dup 3)
4640 (const_int 0)))]
4641 "TARGET_P9_VECTOR"
4642 {
4643 if (<CODE> == UNORDERED && !HONOR_NANS (<MODE>mode))
4644 {
4645 emit_move_insn (operands[0], const0_rtx);
4646 DONE;
4647 }
4648
4649 operands[3] = gen_reg_rtx (CCFPmode);
4650 })
4651
4652 (define_insn "*xscmpexpqp"
4653 [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
4654 (compare:CCFP
4655 (unspec:IEEE128 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4656 (match_operand:IEEE128 2 "altivec_register_operand" "v")]
4657 UNSPEC_VSX_SCMPEXPQP)
4658 (match_operand:SI 3 "zero_constant" "j")))]
4659 "TARGET_P9_VECTOR"
4660 "xscmpexpqp %0,%1,%2"
4661 [(set_attr "type" "fpcompare")])
4662
4663 ;; VSX Scalar Test Data Class Quad-Precision
4664 ;; (Expansion for scalar_test_data_class (__ieee128, int))
4665 ;; (Has side effect of setting the lt bit if operand 1 is negative,
4666 ;; setting the eq bit if any of the conditions tested by operand 2
4667 ;; are satisfied, and clearing the gt and undordered bits to zero.)
4668 (define_expand "xststdcqp_<mode>"
4669 [(set (match_dup 3)
4670 (compare:CCFP
4671 (unspec:IEEE128
4672 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4673 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4674 UNSPEC_VSX_STSTDC)
4675 (const_int 0)))
4676 (set (match_operand:SI 0 "register_operand" "=r")
4677 (eq:SI (match_dup 3)
4678 (const_int 0)))]
4679 "TARGET_P9_VECTOR"
4680 {
4681 operands[3] = gen_reg_rtx (CCFPmode);
4682 })
4683
4684 ;; VSX Scalar Test Data Class Double- and Single-Precision
4685 ;; (The lt bit is set if operand 1 is negative. The eq bit is set
4686 ;; if any of the conditions tested by operand 2 are satisfied.
4687 ;; The gt and unordered bits are cleared to zero.)
4688 (define_expand "xststdc<sd>p"
4689 [(set (match_dup 3)
4690 (compare:CCFP
4691 (unspec:SFDF
4692 [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4693 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4694 UNSPEC_VSX_STSTDC)
4695 (match_dup 4)))
4696 (set (match_operand:SI 0 "register_operand" "=r")
4697 (eq:SI (match_dup 3)
4698 (const_int 0)))]
4699 "TARGET_P9_VECTOR"
4700 {
4701 operands[3] = gen_reg_rtx (CCFPmode);
4702 operands[4] = CONST0_RTX (SImode);
4703 })
4704
4705 ;; The VSX Scalar Test Negative Quad-Precision
4706 (define_expand "xststdcnegqp_<mode>"
4707 [(set (match_dup 2)
4708 (compare:CCFP
4709 (unspec:IEEE128
4710 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4711 (const_int 0)]
4712 UNSPEC_VSX_STSTDC)
4713 (const_int 0)))
4714 (set (match_operand:SI 0 "register_operand" "=r")
4715 (lt:SI (match_dup 2)
4716 (const_int 0)))]
4717 "TARGET_P9_VECTOR"
4718 {
4719 operands[2] = gen_reg_rtx (CCFPmode);
4720 })
4721
4722 ;; The VSX Scalar Test Negative Double- and Single-Precision
4723 (define_expand "xststdcneg<sd>p"
4724 [(set (match_dup 2)
4725 (compare:CCFP
4726 (unspec:SFDF
4727 [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4728 (const_int 0)]
4729 UNSPEC_VSX_STSTDC)
4730 (match_dup 3)))
4731 (set (match_operand:SI 0 "register_operand" "=r")
4732 (lt:SI (match_dup 2)
4733 (const_int 0)))]
4734 "TARGET_P9_VECTOR"
4735 {
4736 operands[2] = gen_reg_rtx (CCFPmode);
4737 operands[3] = CONST0_RTX (SImode);
4738 })
4739
4740 (define_insn "*xststdcqp_<mode>"
4741 [(set (match_operand:CCFP 0 "" "=y")
4742 (compare:CCFP
4743 (unspec:IEEE128
4744 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4745 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4746 UNSPEC_VSX_STSTDC)
4747 (const_int 0)))]
4748 "TARGET_P9_VECTOR"
4749 "xststdcqp %0,%1,%2"
4750 [(set_attr "type" "fpcompare")])
4751
4752 (define_insn "*xststdc<sd>p"
4753 [(set (match_operand:CCFP 0 "" "=y")
4754 (compare:CCFP
4755 (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4756 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4757 UNSPEC_VSX_STSTDC)
4758 (match_operand:SI 3 "zero_constant" "j")))]
4759 "TARGET_P9_VECTOR"
4760 "xststdc<sd>p %0,%x1,%2"
4761 [(set_attr "type" "fpcompare")])
4762
4763 ;; VSX Vector Extract Exponent Double and Single Precision
4764 (define_insn "xvxexp<sd>p"
4765 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4766 (unspec:VSX_F
4767 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4768 UNSPEC_VSX_VXEXP))]
4769 "TARGET_P9_VECTOR"
4770 "xvxexp<sd>p %x0,%x1"
4771 [(set_attr "type" "vecsimple")])
4772
4773 ;; VSX Vector Extract Significand Double and Single Precision
4774 (define_insn "xvxsig<sd>p"
4775 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4776 (unspec:VSX_F
4777 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4778 UNSPEC_VSX_VXSIG))]
4779 "TARGET_P9_VECTOR"
4780 "xvxsig<sd>p %x0,%x1"
4781 [(set_attr "type" "vecsimple")])
4782
4783 ;; VSX Vector Insert Exponent Double and Single Precision
4784 (define_insn "xviexp<sd>p"
4785 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4786 (unspec:VSX_F
4787 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4788 (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
4789 UNSPEC_VSX_VIEXP))]
4790 "TARGET_P9_VECTOR"
4791 "xviexp<sd>p %x0,%x1,%x2"
4792 [(set_attr "type" "vecsimple")])
4793
4794 ;; VSX Vector Test Data Class Double and Single Precision
4795 ;; The corresponding elements of the result vector are all ones
4796 ;; if any of the conditions tested by operand 3 are satisfied.
4797 (define_insn "xvtstdc<sd>p"
4798 [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa")
4799 (unspec:<VSI>
4800 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4801 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4802 UNSPEC_VSX_VTSTDC))]
4803 "TARGET_P9_VECTOR"
4804 "xvtstdc<sd>p %x0,%x1,%2"
4805 [(set_attr "type" "vecsimple")])
4806
4807 ;; ISA 3.0 String Operations Support
4808
4809 ;; Compare vectors producing a vector result and a predicate, setting CR6
4810 ;; to indicate a combined status. This pattern matches v16qi, v8hi, and
4811 ;; v4si modes. It does not match v2df, v4sf, or v2di modes. There's no
4812 ;; need to match v4sf, v2df, or v2di modes because those are expanded
4813 ;; to use Power8 instructions.
4814 (define_insn "*vsx_ne_<mode>_p"
4815 [(set (reg:CC CR6_REGNO)
4816 (unspec:CC
4817 [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4818 (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))]
4819 UNSPEC_PREDICATE))
4820 (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v")
4821 (ne:VSX_EXTRACT_I (match_dup 1)
4822 (match_dup 2)))]
4823 "TARGET_P9_VECTOR"
4824 "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4825 [(set_attr "type" "vecsimple")])
4826
4827 (define_insn "*vector_nez_<mode>_p"
4828 [(set (reg:CC CR6_REGNO)
4829 (unspec:CC [(unspec:VI
4830 [(match_operand:VI 1 "gpc_reg_operand" "v")
4831 (match_operand:VI 2 "gpc_reg_operand" "v")]
4832 UNSPEC_NEZ_P)]
4833 UNSPEC_PREDICATE))
4834 (set (match_operand:VI 0 "gpc_reg_operand" "=v")
4835 (unspec:VI [(match_dup 1)
4836 (match_dup 2)]
4837 UNSPEC_NEZ_P))]
4838 "TARGET_P9_VECTOR"
4839 "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4840 [(set_attr "type" "vecsimple")])
4841
4842 ;; Return first position of match between vectors using natural order
4843 ;; for both LE and BE execution modes.
4844 (define_expand "first_match_index_<mode>"
4845 [(match_operand:SI 0 "register_operand")
4846 (unspec:SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4847 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4848 UNSPEC_VSX_FIRST_MATCH_INDEX)]
4849 "TARGET_P9_VECTOR"
4850 {
4851 int sh;
4852
4853 rtx cmp_result = gen_reg_rtx (<MODE>mode);
4854 rtx not_result = gen_reg_rtx (<MODE>mode);
4855
4856 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4857 operands[2]));
4858 emit_insn (gen_one_cmpl<mode>2 (not_result, cmp_result));
4859
4860 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4861
4862 if (<MODE>mode == V16QImode)
4863 {
4864 if (!BYTES_BIG_ENDIAN)
4865 emit_insn (gen_vctzlsbb_<mode> (operands[0], not_result));
4866 else
4867 emit_insn (gen_vclzlsbb_<mode> (operands[0], not_result));
4868 }
4869 else
4870 {
4871 rtx tmp = gen_reg_rtx (SImode);
4872 if (!BYTES_BIG_ENDIAN)
4873 emit_insn (gen_vctzlsbb_<mode> (tmp, not_result));
4874 else
4875 emit_insn (gen_vclzlsbb_<mode> (tmp, not_result));
4876 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4877 }
4878 DONE;
4879 })
4880
4881 ;; Return first position of match between vectors or end of string (EOS) using
4882 ;; natural element order for both LE and BE execution modes.
4883 (define_expand "first_match_or_eos_index_<mode>"
4884 [(match_operand:SI 0 "register_operand")
4885 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4886 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4887 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX)]
4888 "TARGET_P9_VECTOR"
4889 {
4890 int sh;
4891 rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4892 rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4893 rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4894 rtx and_result = gen_reg_rtx (<MODE>mode);
4895 rtx result = gen_reg_rtx (<MODE>mode);
4896 rtx vzero = gen_reg_rtx (<MODE>mode);
4897
4898 /* Vector with zeros in elements that correspond to zeros in operands. */
4899 emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4900 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4901 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4902 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4903
4904 /* Vector with ones in elments that do not match. */
4905 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4906 operands[2]));
4907
4908 /* Create vector with ones in elements where there was a zero in one of
4909 the source elements or the elements that match. */
4910 emit_insn (gen_nand<mode>3 (result, and_result, cmpz_result));
4911 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4912
4913 if (<MODE>mode == V16QImode)
4914 {
4915 if (!BYTES_BIG_ENDIAN)
4916 emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
4917 else
4918 emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
4919 }
4920 else
4921 {
4922 rtx tmp = gen_reg_rtx (SImode);
4923 if (!BYTES_BIG_ENDIAN)
4924 emit_insn (gen_vctzlsbb_<mode> (tmp, result));
4925 else
4926 emit_insn (gen_vclzlsbb_<mode> (tmp, result));
4927 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4928 }
4929 DONE;
4930 })
4931
4932 ;; Return first position of mismatch between vectors using natural
4933 ;; element order for both LE and BE execution modes.
4934 (define_expand "first_mismatch_index_<mode>"
4935 [(match_operand:SI 0 "register_operand")
4936 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4937 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4938 UNSPEC_VSX_FIRST_MISMATCH_INDEX)]
4939 "TARGET_P9_VECTOR"
4940 {
4941 int sh;
4942 rtx cmp_result = gen_reg_rtx (<MODE>mode);
4943
4944 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4945 operands[2]));
4946 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4947
4948 if (<MODE>mode == V16QImode)
4949 {
4950 if (!BYTES_BIG_ENDIAN)
4951 emit_insn (gen_vctzlsbb_<mode> (operands[0], cmp_result));
4952 else
4953 emit_insn (gen_vclzlsbb_<mode> (operands[0], cmp_result));
4954 }
4955 else
4956 {
4957 rtx tmp = gen_reg_rtx (SImode);
4958 if (!BYTES_BIG_ENDIAN)
4959 emit_insn (gen_vctzlsbb_<mode> (tmp, cmp_result));
4960 else
4961 emit_insn (gen_vclzlsbb_<mode> (tmp, cmp_result));
4962 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4963 }
4964 DONE;
4965 })
4966
4967 ;; Return first position of mismatch between vectors or end of string (EOS)
4968 ;; using natural element order for both LE and BE execution modes.
4969 (define_expand "first_mismatch_or_eos_index_<mode>"
4970 [(match_operand:SI 0 "register_operand")
4971 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4972 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4973 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX)]
4974 "TARGET_P9_VECTOR"
4975 {
4976 int sh;
4977 rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4978 rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4979 rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4980 rtx not_cmpz_result = gen_reg_rtx (<MODE>mode);
4981 rtx and_result = gen_reg_rtx (<MODE>mode);
4982 rtx result = gen_reg_rtx (<MODE>mode);
4983 rtx vzero = gen_reg_rtx (<MODE>mode);
4984
4985 /* Vector with zeros in elements that correspond to zeros in operands. */
4986 emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4987
4988 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4989 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4990 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4991
4992 /* Vector with ones in elments that match. */
4993 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4994 operands[2]));
4995 emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result));
4996
4997 /* Create vector with ones in elements where there was a zero in one of
4998 the source elements or the elements did not match. */
4999 emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result));
5000 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
5001
5002 if (<MODE>mode == V16QImode)
5003 {
5004 if (!BYTES_BIG_ENDIAN)
5005 emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
5006 else
5007 emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
5008 }
5009 else
5010 {
5011 rtx tmp = gen_reg_rtx (SImode);
5012 if (!BYTES_BIG_ENDIAN)
5013 emit_insn (gen_vctzlsbb_<mode> (tmp, result));
5014 else
5015 emit_insn (gen_vclzlsbb_<mode> (tmp, result));
5016 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
5017 }
5018 DONE;
5019 })
5020
5021 ;; Load VSX Vector with Length
5022 (define_expand "lxvl"
5023 [(set (match_dup 3)
5024 (ashift:DI (match_operand:DI 2 "register_operand")
5025 (const_int 56)))
5026 (set (match_operand:V16QI 0 "vsx_register_operand")
5027 (unspec:V16QI
5028 [(match_operand:DI 1 "gpc_reg_operand")
5029 (mem:V16QI (match_dup 1))
5030 (match_dup 3)]
5031 UNSPEC_LXVL))]
5032 "TARGET_P9_VECTOR && TARGET_64BIT"
5033 {
5034 operands[3] = gen_reg_rtx (DImode);
5035 })
5036
5037 (define_insn "*lxvl"
5038 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5039 (unspec:V16QI
5040 [(match_operand:DI 1 "gpc_reg_operand" "b")
5041 (mem:V16QI (match_dup 1))
5042 (match_operand:DI 2 "register_operand" "r")]
5043 UNSPEC_LXVL))]
5044 "TARGET_P9_VECTOR && TARGET_64BIT"
5045 "lxvl %x0,%1,%2"
5046 [(set_attr "type" "vecload")])
5047
5048 (define_insn "lxvll"
5049 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5050 (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b")
5051 (mem:V16QI (match_dup 1))
5052 (match_operand:DI 2 "register_operand" "r")]
5053 UNSPEC_LXVLL))]
5054 "TARGET_P9_VECTOR"
5055 "lxvll %x0,%1,%2"
5056 [(set_attr "type" "vecload")])
5057
5058 ;; Expand for builtin xl_len_r
5059 (define_expand "xl_len_r"
5060 [(match_operand:V16QI 0 "vsx_register_operand")
5061 (match_operand:DI 1 "register_operand")
5062 (match_operand:DI 2 "register_operand")]
5063 ""
5064 {
5065 rtx shift_mask = gen_reg_rtx (V16QImode);
5066 rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5067 rtx tmp = gen_reg_rtx (DImode);
5068
5069 emit_insn (gen_altivec_lvsl_reg (shift_mask, operands[2]));
5070 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5071 emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp));
5072 emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp,
5073 shift_mask));
5074 DONE;
5075 })
5076
5077 (define_insn "stxvll"
5078 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5079 (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5080 (mem:V16QI (match_dup 1))
5081 (match_operand:DI 2 "register_operand" "r")]
5082 UNSPEC_STXVLL))]
5083 "TARGET_P9_VECTOR"
5084 "stxvll %x0,%1,%2"
5085 [(set_attr "type" "vecstore")])
5086
5087 ;; Store VSX Vector with Length
5088 (define_expand "stxvl"
5089 [(set (match_dup 3)
5090 (ashift:DI (match_operand:DI 2 "register_operand")
5091 (const_int 56)))
5092 (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand"))
5093 (unspec:V16QI
5094 [(match_operand:V16QI 0 "vsx_register_operand")
5095 (mem:V16QI (match_dup 1))
5096 (match_dup 3)]
5097 UNSPEC_STXVL))]
5098 "TARGET_P9_VECTOR && TARGET_64BIT"
5099 {
5100 operands[3] = gen_reg_rtx (DImode);
5101 })
5102
5103 (define_insn "*stxvl"
5104 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5105 (unspec:V16QI
5106 [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5107 (mem:V16QI (match_dup 1))
5108 (match_operand:DI 2 "register_operand" "r")]
5109 UNSPEC_STXVL))]
5110 "TARGET_P9_VECTOR && TARGET_64BIT"
5111 "stxvl %x0,%1,%2"
5112 [(set_attr "type" "vecstore")])
5113
5114 ;; Expand for builtin xst_len_r
5115 (define_expand "xst_len_r"
5116 [(match_operand:V16QI 0 "vsx_register_operand" "=wa")
5117 (match_operand:DI 1 "register_operand" "b")
5118 (match_operand:DI 2 "register_operand" "r")]
5119 "UNSPEC_XST_LEN_R"
5120 {
5121 rtx shift_mask = gen_reg_rtx (V16QImode);
5122 rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5123 rtx tmp = gen_reg_rtx (DImode);
5124
5125 emit_insn (gen_altivec_lvsr_reg (shift_mask, operands[2]));
5126 emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0],
5127 shift_mask));
5128 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5129 emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp));
5130 DONE;
5131 })
5132
5133 ;; Vector Compare Not Equal Byte (specified/not+eq:)
5134 (define_insn "vcmpneb"
5135 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5136 (not:V16QI
5137 (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v")
5138 (match_operand:V16QI 2 "altivec_register_operand" "v"))))]
5139 "TARGET_P9_VECTOR"
5140 "vcmpneb %0,%1,%2"
5141 [(set_attr "type" "vecsimple")])
5142
5143 ;; Vector Compare Not Equal or Zero Byte
5144 (define_insn "vcmpnezb"
5145 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5146 (unspec:V16QI
5147 [(match_operand:V16QI 1 "altivec_register_operand" "v")
5148 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5149 UNSPEC_VCMPNEZB))]
5150 "TARGET_P9_VECTOR"
5151 "vcmpnezb %0,%1,%2"
5152 [(set_attr "type" "vecsimple")])
5153
5154 ;; Vector Compare Not Equal or Zero Byte predicate or record-form
5155 (define_insn "vcmpnezb_p"
5156 [(set (reg:CC CR6_REGNO)
5157 (unspec:CC
5158 [(match_operand:V16QI 1 "altivec_register_operand" "v")
5159 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5160 UNSPEC_VCMPNEZB))
5161 (set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5162 (unspec:V16QI
5163 [(match_dup 1)
5164 (match_dup 2)]
5165 UNSPEC_VCMPNEZB))]
5166 "TARGET_P9_VECTOR"
5167 "vcmpnezb. %0,%1,%2"
5168 [(set_attr "type" "vecsimple")])
5169
5170 ;; Vector Compare Not Equal Half Word (specified/not+eq:)
5171 (define_insn "vcmpneh"
5172 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5173 (not:V8HI
5174 (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v")
5175 (match_operand:V8HI 2 "altivec_register_operand" "v"))))]
5176 "TARGET_P9_VECTOR"
5177 "vcmpneh %0,%1,%2"
5178 [(set_attr "type" "vecsimple")])
5179
5180 ;; Vector Compare Not Equal or Zero Half Word
5181 (define_insn "vcmpnezh"
5182 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5183 (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
5184 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5185 UNSPEC_VCMPNEZH))]
5186 "TARGET_P9_VECTOR"
5187 "vcmpnezh %0,%1,%2"
5188 [(set_attr "type" "vecsimple")])
5189
5190 ;; Vector Compare Not Equal Word (specified/not+eq:)
5191 (define_insn "vcmpnew"
5192 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5193 (not:V4SI
5194 (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v")
5195 (match_operand:V4SI 2 "altivec_register_operand" "v"))))]
5196 "TARGET_P9_VECTOR"
5197 "vcmpnew %0,%1,%2"
5198 [(set_attr "type" "vecsimple")])
5199
5200 ;; Vector Compare Not Equal or Zero Word
5201 (define_insn "vcmpnezw"
5202 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5203 (unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v")
5204 (match_operand:V4SI 2 "altivec_register_operand" "v")]
5205 UNSPEC_VCMPNEZW))]
5206 "TARGET_P9_VECTOR"
5207 "vcmpnezw %0,%1,%2"
5208 [(set_attr "type" "vecsimple")])
5209
5210 ;; Vector Count Leading Zero Least-Significant Bits Byte
5211 (define_insn "vclzlsbb_<mode>"
5212 [(set (match_operand:SI 0 "register_operand" "=r")
5213 (unspec:SI
5214 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5215 UNSPEC_VCLZLSBB))]
5216 "TARGET_P9_VECTOR"
5217 "vclzlsbb %0,%1"
5218 [(set_attr "type" "vecsimple")])
5219
5220 ;; Vector Count Trailing Zero Least-Significant Bits Byte
5221 (define_insn "vctzlsbb_<mode>"
5222 [(set (match_operand:SI 0 "register_operand" "=r")
5223 (unspec:SI
5224 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5225 UNSPEC_VCTZLSBB))]
5226 "TARGET_P9_VECTOR"
5227 "vctzlsbb %0,%1"
5228 [(set_attr "type" "vecsimple")])
5229
5230 ;; Vector Extract Unsigned Byte Left-Indexed
5231 (define_insn "vextublx"
5232 [(set (match_operand:SI 0 "register_operand" "=r")
5233 (unspec:SI
5234 [(match_operand:SI 1 "register_operand" "r")
5235 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5236 UNSPEC_VEXTUBLX))]
5237 "TARGET_P9_VECTOR"
5238 "vextublx %0,%1,%2"
5239 [(set_attr "type" "vecsimple")])
5240
5241 ;; Vector Extract Unsigned Byte Right-Indexed
5242 (define_insn "vextubrx"
5243 [(set (match_operand:SI 0 "register_operand" "=r")
5244 (unspec:SI
5245 [(match_operand:SI 1 "register_operand" "r")
5246 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5247 UNSPEC_VEXTUBRX))]
5248 "TARGET_P9_VECTOR"
5249 "vextubrx %0,%1,%2"
5250 [(set_attr "type" "vecsimple")])
5251
5252 ;; Vector Extract Unsigned Half Word Left-Indexed
5253 (define_insn "vextuhlx"
5254 [(set (match_operand:SI 0 "register_operand" "=r")
5255 (unspec:SI
5256 [(match_operand:SI 1 "register_operand" "r")
5257 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5258 UNSPEC_VEXTUHLX))]
5259 "TARGET_P9_VECTOR"
5260 "vextuhlx %0,%1,%2"
5261 [(set_attr "type" "vecsimple")])
5262
5263 ;; Vector Extract Unsigned Half Word Right-Indexed
5264 (define_insn "vextuhrx"
5265 [(set (match_operand:SI 0 "register_operand" "=r")
5266 (unspec:SI
5267 [(match_operand:SI 1 "register_operand" "r")
5268 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5269 UNSPEC_VEXTUHRX))]
5270 "TARGET_P9_VECTOR"
5271 "vextuhrx %0,%1,%2"
5272 [(set_attr "type" "vecsimple")])
5273
5274 ;; Vector Extract Unsigned Word Left-Indexed
5275 (define_insn "vextuwlx"
5276 [(set (match_operand:SI 0 "register_operand" "=r")
5277 (unspec:SI
5278 [(match_operand:SI 1 "register_operand" "r")
5279 (match_operand:V4SI 2 "altivec_register_operand" "v")]
5280 UNSPEC_VEXTUWLX))]
5281 "TARGET_P9_VECTOR"
5282 "vextuwlx %0,%1,%2"
5283 [(set_attr "type" "vecsimple")])
5284
5285 ;; Vector Extract Unsigned Word Right-Indexed
5286 (define_insn "vextuwrx"
5287 [(set (match_operand:SI 0 "register_operand" "=r")
5288 (unspec:SI
5289 [(match_operand:SI 1 "register_operand" "r")
5290 (match_operand:V4SI 2 "altivec_register_operand" "v")]
5291 UNSPEC_VEXTUWRX))]
5292 "TARGET_P9_VECTOR"
5293 "vextuwrx %0,%1,%2"
5294 [(set_attr "type" "vecsimple")])
5295
5296 ;; Vector insert/extract word at arbitrary byte values. Note, the little
5297 ;; endian version needs to adjust the byte number, and the V4SI element in
5298 ;; vinsert4b.
5299 (define_insn "extract4b"
5300 [(set (match_operand:V2DI 0 "vsx_register_operand")
5301 (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
5302 (match_operand:QI 2 "const_0_to_12_operand" "n")]
5303 UNSPEC_XXEXTRACTUW))]
5304 "TARGET_P9_VECTOR"
5305 {
5306 if (!BYTES_BIG_ENDIAN)
5307 operands[2] = GEN_INT (12 - INTVAL (operands[2]));
5308
5309 return "xxextractuw %x0,%x1,%2";
5310 })
5311
5312 (define_expand "insert4b"
5313 [(set (match_operand:V16QI 0 "vsx_register_operand")
5314 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")
5315 (match_operand:V16QI 2 "vsx_register_operand")
5316 (match_operand:QI 3 "const_0_to_12_operand")]
5317 UNSPEC_XXINSERTW))]
5318 "TARGET_P9_VECTOR"
5319 {
5320 if (!BYTES_BIG_ENDIAN)
5321 {
5322 rtx op1 = operands[1];
5323 rtx v4si_tmp = gen_reg_rtx (V4SImode);
5324 emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));
5325 operands[1] = v4si_tmp;
5326 operands[3] = GEN_INT (12 - INTVAL (operands[3]));
5327 }
5328 })
5329
5330 (define_insn "*insert4b_internal"
5331 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5332 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")
5333 (match_operand:V16QI 2 "vsx_register_operand" "0")
5334 (match_operand:QI 3 "const_0_to_12_operand" "n")]
5335 UNSPEC_XXINSERTW))]
5336 "TARGET_P9_VECTOR"
5337 "xxinsertw %x0,%x1,%3"
5338 [(set_attr "type" "vecperm")])
5339
5340
5341 ;; Generate vector extract four float 32 values from left four elements
5342 ;; of eight element vector of float 16 values.
5343 (define_expand "vextract_fp_from_shorth"
5344 [(set (match_operand:V4SF 0 "register_operand" "=wa")
5345 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5346 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))]
5347 "TARGET_P9_VECTOR"
5348 {
5349 int i;
5350 int vals_le[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5351 int vals_be[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5352
5353 rtx rvals[16];
5354 rtx mask = gen_reg_rtx (V16QImode);
5355 rtx tmp = gen_reg_rtx (V16QImode);
5356 rtvec v;
5357
5358 for (i = 0; i < 16; i++)
5359 if (!BYTES_BIG_ENDIAN)
5360 rvals[i] = GEN_INT (vals_le[i]);
5361 else
5362 rvals[i] = GEN_INT (vals_be[i]);
5363
5364 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5365 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move
5366 src half words 0,1,2,3 (LE), src half words 4,5,6,7 (BE) for the
5367 conversion instruction. */
5368 v = gen_rtvec_v (16, rvals);
5369 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5370 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5371 operands[1], mask));
5372 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5373 DONE;
5374 })
5375
5376 ;; Generate vector extract four float 32 values from right four elements
5377 ;; of eight element vector of float 16 values.
5378 (define_expand "vextract_fp_from_shortl"
5379 [(set (match_operand:V4SF 0 "register_operand" "=wa")
5380 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5381 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))]
5382 "TARGET_P9_VECTOR"
5383 {
5384 int vals_le[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5385 int vals_be[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5386
5387 int i;
5388 rtx rvals[16];
5389 rtx mask = gen_reg_rtx (V16QImode);
5390 rtx tmp = gen_reg_rtx (V16QImode);
5391 rtvec v;
5392
5393 for (i = 0; i < 16; i++)
5394 if (!BYTES_BIG_ENDIAN)
5395 rvals[i] = GEN_INT (vals_le[i]);
5396 else
5397 rvals[i] = GEN_INT (vals_be[i]);
5398
5399 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5400 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move
5401 src half words 4,5,6,7 (LE), src half words 0,1,2,3 (BE) for the
5402 conversion instruction. */
5403 v = gen_rtvec_v (16, rvals);
5404 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5405 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5406 operands[1], mask));
5407 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5408 DONE;
5409 })
5410
5411 ;; Support for ISA 3.0 vector byte reverse
5412
5413 ;; Swap all bytes with in a vector
5414 (define_insn "p9_xxbrq_v1ti"
5415 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
5416 (bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))]
5417 "TARGET_P9_VECTOR"
5418 "xxbrq %x0,%x1"
5419 [(set_attr "type" "vecperm")])
5420
5421 (define_expand "p9_xxbrq_v16qi"
5422 [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa"))
5423 (use (match_operand:V16QI 1 "vsx_register_operand" "wa"))]
5424 "TARGET_P9_VECTOR"
5425 {
5426 rtx op0 = gen_reg_rtx (V1TImode);
5427 rtx op1 = gen_lowpart (V1TImode, operands[1]);
5428 emit_insn (gen_p9_xxbrq_v1ti (op0, op1));
5429 emit_move_insn (operands[0], gen_lowpart (V16QImode, op0));
5430 DONE;
5431 })
5432
5433 ;; Swap all bytes in each 64-bit element
5434 (define_insn "p9_xxbrd_v2di"
5435 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
5436 (bswap:V2DI (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
5437 "TARGET_P9_VECTOR"
5438 "xxbrd %x0,%x1"
5439 [(set_attr "type" "vecperm")])
5440
5441 (define_expand "p9_xxbrd_v2df"
5442 [(use (match_operand:V2DF 0 "vsx_register_operand" "=wa"))
5443 (use (match_operand:V2DF 1 "vsx_register_operand" "wa"))]
5444 "TARGET_P9_VECTOR"
5445 {
5446 rtx op0 = gen_reg_rtx (V2DImode);
5447 rtx op1 = gen_lowpart (V2DImode, operands[1]);
5448 emit_insn (gen_p9_xxbrd_v2di (op0, op1));
5449 emit_move_insn (operands[0], gen_lowpart (V2DFmode, op0));
5450 DONE;
5451 })
5452
5453 ;; Swap all bytes in each 32-bit element
5454 (define_insn "p9_xxbrw_v4si"
5455 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
5456 (bswap:V4SI (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
5457 "TARGET_P9_VECTOR"
5458 "xxbrw %x0,%x1"
5459 [(set_attr "type" "vecperm")])
5460
5461 (define_expand "p9_xxbrw_v4sf"
5462 [(use (match_operand:V4SF 0 "vsx_register_operand" "=wa"))
5463 (use (match_operand:V4SF 1 "vsx_register_operand" "wa"))]
5464 "TARGET_P9_VECTOR"
5465 {
5466 rtx op0 = gen_reg_rtx (V4SImode);
5467 rtx op1 = gen_lowpart (V4SImode, operands[1]);
5468 emit_insn (gen_p9_xxbrw_v4si (op0, op1));
5469 emit_move_insn (operands[0], gen_lowpart (V4SFmode, op0));
5470 DONE;
5471 })
5472
5473 ;; Swap all bytes in each element of vector
5474 (define_expand "revb_<mode>"
5475 [(use (match_operand:VEC_REVB 0 "vsx_register_operand"))
5476 (use (match_operand:VEC_REVB 1 "vsx_register_operand"))]
5477 ""
5478 {
5479 if (TARGET_P9_VECTOR)
5480 emit_insn (gen_p9_xxbr<VSX_XXBR>_<mode> (operands[0], operands[1]));
5481 else
5482 {
5483 /* Want to have the elements in reverse order relative
5484 to the endian mode in use, i.e. in LE mode, put elements
5485 in BE order. */
5486 rtx sel = swap_endian_selector_for_mode(<MODE>mode);
5487 emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1],
5488 operands[1], sel));
5489 }
5490
5491 DONE;
5492 })
5493
5494 ;; Reversing bytes in vector char is just a NOP.
5495 (define_expand "revb_v16qi"
5496 [(set (match_operand:V16QI 0 "vsx_register_operand")
5497 (bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))]
5498 ""
5499 {
5500 emit_move_insn (operands[0], operands[1]);
5501 DONE;
5502 })
5503
5504 ;; Swap all bytes in each 16-bit element
5505 (define_insn "p9_xxbrh_v8hi"
5506 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
5507 (bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))]
5508 "TARGET_P9_VECTOR"
5509 "xxbrh %x0,%x1"
5510 [(set_attr "type" "vecperm")])
5511 \f
5512
5513 ;; Operand numbers for the following peephole2
5514 (define_constants
5515 [(SFBOOL_TMP_GPR 0) ;; GPR temporary
5516 (SFBOOL_TMP_VSX 1) ;; vector temporary
5517 (SFBOOL_MFVSR_D 2) ;; move to gpr dest
5518 (SFBOOL_MFVSR_A 3) ;; move to gpr src
5519 (SFBOOL_BOOL_D 4) ;; and/ior/xor dest
5520 (SFBOOL_BOOL_A1 5) ;; and/ior/xor arg1
5521 (SFBOOL_BOOL_A2 6) ;; and/ior/xor arg1
5522 (SFBOOL_SHL_D 7) ;; shift left dest
5523 (SFBOOL_SHL_A 8) ;; shift left arg
5524 (SFBOOL_MTVSR_D 9) ;; move to vecter dest
5525 (SFBOOL_MFVSR_A_V4SF 10) ;; SFBOOL_MFVSR_A as V4SFmode
5526 (SFBOOL_BOOL_A_DI 11) ;; SFBOOL_BOOL_A1/A2 as DImode
5527 (SFBOOL_TMP_VSX_DI 12) ;; SFBOOL_TMP_VSX as DImode
5528 (SFBOOL_MTVSR_D_V4SF 13)]) ;; SFBOOL_MTVSRD_D as V4SFmode
5529
5530 ;; Attempt to optimize some common GLIBC operations using logical operations to
5531 ;; pick apart SFmode operations. For example, there is code from e_powf.c
5532 ;; after macro expansion that looks like:
5533 ;;
5534 ;; typedef union {
5535 ;; float value;
5536 ;; uint32_t word;
5537 ;; } ieee_float_shape_type;
5538 ;;
5539 ;; float t1;
5540 ;; int32_t is;
5541 ;;
5542 ;; do {
5543 ;; ieee_float_shape_type gf_u;
5544 ;; gf_u.value = (t1);
5545 ;; (is) = gf_u.word;
5546 ;; } while (0);
5547 ;;
5548 ;; do {
5549 ;; ieee_float_shape_type sf_u;
5550 ;; sf_u.word = (is & 0xfffff000);
5551 ;; (t1) = sf_u.value;
5552 ;; } while (0);
5553 ;;
5554 ;;
5555 ;; This would result in two direct move operations (convert to memory format,
5556 ;; direct move to GPR, do the AND operation, direct move to VSX, convert to
5557 ;; scalar format). With this peephole, we eliminate the direct move to the
5558 ;; GPR, and instead move the integer mask value to the vector register after a
5559 ;; shift and do the VSX logical operation.
5560
5561 ;; The insns for dealing with SFmode in GPR registers looks like:
5562 ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
5563 ;;
5564 ;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
5565 ;;
5566 ;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
5567 ;;
5568 ;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
5569 ;;
5570 ;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
5571 ;;
5572 ;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
5573
5574 (define_peephole2
5575 [(match_scratch:DI SFBOOL_TMP_GPR "r")
5576 (match_scratch:V4SF SFBOOL_TMP_VSX "wa")
5577
5578 ;; MFVSRWZ (aka zero_extend)
5579 (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand")
5580 (zero_extend:DI
5581 (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand")))
5582
5583 ;; AND/IOR/XOR operation on int
5584 (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand")
5585 (and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand")
5586 (match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand")))
5587
5588 ;; SLDI
5589 (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand")
5590 (ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand")
5591 (const_int 32)))
5592
5593 ;; MTVSRD
5594 (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand")
5595 (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))]
5596
5597 "TARGET_POWERPC64 && TARGET_DIRECT_MOVE
5598 /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO
5599 to compare registers, when the mode is different. */
5600 && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D])
5601 && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D])
5602 && REG_P (operands[SFBOOL_SHL_A]) && REG_P (operands[SFBOOL_MTVSR_D])
5603 && (REG_P (operands[SFBOOL_BOOL_A2])
5604 || CONST_INT_P (operands[SFBOOL_BOOL_A2]))
5605 && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])
5606 || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D]))
5607 && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])
5608 || (REG_P (operands[SFBOOL_BOOL_A2])
5609 && REGNO (operands[SFBOOL_MFVSR_D])
5610 == REGNO (operands[SFBOOL_BOOL_A2])))
5611 && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])
5612 && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])
5613 || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D]))
5614 && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])"
5615 [(set (match_dup SFBOOL_TMP_GPR)
5616 (ashift:DI (match_dup SFBOOL_BOOL_A_DI)
5617 (const_int 32)))
5618
5619 (set (match_dup SFBOOL_TMP_VSX_DI)
5620 (match_dup SFBOOL_TMP_GPR))
5621
5622 (set (match_dup SFBOOL_MTVSR_D_V4SF)
5623 (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF)
5624 (match_dup SFBOOL_TMP_VSX)))]
5625 {
5626 rtx bool_a1 = operands[SFBOOL_BOOL_A1];
5627 rtx bool_a2 = operands[SFBOOL_BOOL_A2];
5628 int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]);
5629 int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]);
5630 int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]);
5631 int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]);
5632
5633 if (CONST_INT_P (bool_a2))
5634 {
5635 rtx tmp_gpr = operands[SFBOOL_TMP_GPR];
5636 emit_move_insn (tmp_gpr, bool_a2);
5637 operands[SFBOOL_BOOL_A_DI] = tmp_gpr;
5638 }
5639 else
5640 {
5641 int regno_bool_a1 = REGNO (bool_a1);
5642 int regno_bool_a2 = REGNO (bool_a2);
5643 int regno_bool_a = (regno_mfvsr_d == regno_bool_a1
5644 ? regno_bool_a2 : regno_bool_a1);
5645 operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a);
5646 }
5647
5648 operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a);
5649 operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx);
5650 operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
5651 })
5652
5653 ;; Support signed/unsigned long long to float conversion vectorization.
5654 ;; Note that any_float (pc) here is just for code attribute <su>.
5655 (define_expand "vec_pack<su>_float_v2di"
5656 [(match_operand:V4SF 0 "vfloat_operand")
5657 (match_operand:V2DI 1 "vint_operand")
5658 (match_operand:V2DI 2 "vint_operand")
5659 (any_float (pc))]
5660 "TARGET_VSX"
5661 {
5662 rtx r1 = gen_reg_rtx (V4SFmode);
5663 rtx r2 = gen_reg_rtx (V4SFmode);
5664 emit_insn (gen_vsx_xvcv<su>xdsp (r1, operands[1]));
5665 emit_insn (gen_vsx_xvcv<su>xdsp (r2, operands[2]));
5666 rs6000_expand_extract_even (operands[0], r1, r2);
5667 DONE;
5668 })
5669
5670 ;; Support float to signed/unsigned long long conversion vectorization.
5671 ;; Note that any_fix (pc) here is just for code attribute <su>.
5672 (define_expand "vec_unpack_<su>fix_trunc_hi_v4sf"
5673 [(match_operand:V2DI 0 "vint_operand")
5674 (match_operand:V4SF 1 "vfloat_operand")
5675 (any_fix (pc))]
5676 "TARGET_VSX"
5677 {
5678 rtx reg = gen_reg_rtx (V4SFmode);
5679 rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN);
5680 emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg));
5681 DONE;
5682 })
5683
5684 ;; Note that any_fix (pc) here is just for code attribute <su>.
5685 (define_expand "vec_unpack_<su>fix_trunc_lo_v4sf"
5686 [(match_operand:V2DI 0 "vint_operand")
5687 (match_operand:V4SF 1 "vfloat_operand")
5688 (any_fix (pc))]
5689 "TARGET_VSX"
5690 {
5691 rtx reg = gen_reg_rtx (V4SFmode);
5692 rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN);
5693 emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg));
5694 DONE;
5695 })
5696
5697 (define_insn "vsx_<xvcvbf16>"
5698 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5699 (unspec:V16QI [(match_operand:V16QI 1 "vsx_register_operand" "wa")]
5700 XVCVBF16))]
5701 "TARGET_POWER10"
5702 "<xvcvbf16> %x0,%x1"
5703 [(set_attr "type" "vecfloat")])