1 ;; Copyright (C) 2016-2020 Free Software Foundation, Inc.
3 ;; This file is free software; you can redistribute it and/or modify it under
4 ;; the terms of the GNU General Public License as published by the Free
5 ;; Software Foundation; either version 3 of the License, or (at your option)
8 ;; This file is distributed in the hope that it will be useful, but WITHOUT
9 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 ;; You should have received a copy of the GNU General Public License
14 ;; along with GCC; see the file COPYING3. If not see
15 ;; <http://www.gnu.org/licenses/>.
17 ;; {{{ Vector iterators
19 ; Vector modes for one vector register
20 (define_mode_iterator VEC_1REG_MODE
22 (define_mode_iterator VEC_1REG_ALT
24 (define_mode_iterator VEC_ALL1REG_MODE
25 [V64QI V64HI V64SI V64HF V64SF])
27 (define_mode_iterator VEC_1REG_INT_MODE
29 (define_mode_iterator VEC_1REG_INT_ALT
31 (define_mode_iterator VEC_ALL1REG_INT_MODE
33 (define_mode_iterator VEC_ALL1REG_INT_ALT
36 ; Vector modes for two vector registers
37 (define_mode_iterator VEC_2REG_MODE
41 (define_mode_iterator VEC_REG_MODE
42 [V64SI V64HF V64SF ; Single reg
43 V64DI V64DF]) ; Double reg
44 (define_mode_iterator VEC_ALLREG_MODE
45 [V64QI V64HI V64SI V64HF V64SF ; Single reg
46 V64DI V64DF]) ; Double reg
48 (define_mode_attr scalar_mode
49 [(V64QI "qi") (V64HI "hi") (V64SI "si")
50 (V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")])
52 (define_mode_attr SCALAR_MODE
53 [(V64QI "QI") (V64HI "HI") (V64SI "SI")
54 (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")])
56 (define_mode_attr sdwa [(V64QI "BYTE_0") (V64HI "WORD_0") (V64SI "DWORD")])
61 (define_subst_attr "exec" "vec_merge"
63 (define_subst_attr "exec_clobber" "vec_merge_with_clobber"
65 (define_subst_attr "exec_vcc" "vec_merge_with_vcc"
67 (define_subst_attr "exec_scatter" "scatter_store"
70 (define_subst "vec_merge"
71 [(set (match_operand:VEC_ALLREG_MODE 0)
72 (match_operand:VEC_ALLREG_MODE 1))]
75 (vec_merge:VEC_ALLREG_MODE
77 (match_operand:VEC_ALLREG_MODE 3
78 "gcn_register_or_unspec_operand" "U0")
79 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))])
81 (define_subst "vec_merge_with_clobber"
82 [(set (match_operand:VEC_ALLREG_MODE 0)
83 (match_operand:VEC_ALLREG_MODE 1))
84 (clobber (match_operand 2))]
87 (vec_merge:VEC_ALLREG_MODE
89 (match_operand:VEC_ALLREG_MODE 3
90 "gcn_register_or_unspec_operand" "U0")
91 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))
92 (clobber (match_dup 2))])
94 (define_subst "vec_merge_with_vcc"
95 [(set (match_operand:VEC_ALLREG_MODE 0)
96 (match_operand:VEC_ALLREG_MODE 1))
97 (set (match_operand:DI 2)
98 (match_operand:DI 3))]
102 (vec_merge:VEC_ALLREG_MODE
104 (match_operand:VEC_ALLREG_MODE 4
105 "gcn_register_or_unspec_operand" "U0")
106 (match_operand:DI 5 "gcn_exec_reg_operand" "e")))
108 (and:DI (match_dup 3)
109 (reg:DI EXEC_REG)))])])
111 (define_subst "scatter_store"
112 [(set (mem:BLK (scratch))
120 [(set (mem:BLK (scratch))
126 (match_operand:DI 4 "gcn_exec_reg_operand" "e")]
132 ; This is the entry point for all vector register moves. Memory accesses can
133 ; come this way also, but will more usually use the reload_in/out,
134 ; gather/scatter, maskload/store, etc.
136 (define_expand "mov<mode>"
137 [(set (match_operand:VEC_ALLREG_MODE 0 "nonimmediate_operand")
138 (match_operand:VEC_ALLREG_MODE 1 "general_operand"))]
141 if (MEM_P (operands[0]) && !lra_in_progress && !reload_completed)
143 operands[1] = force_reg (<MODE>mode, operands[1]);
144 rtx scratch = gen_rtx_SCRATCH (V64DImode);
145 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
146 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
147 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
150 emit_insn (gen_scatter<mode>_expr (expr, operands[1], a, v));
153 else if (MEM_P (operands[1]) && !lra_in_progress && !reload_completed)
155 rtx scratch = gen_rtx_SCRATCH (V64DImode);
156 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
157 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
158 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
161 emit_insn (gen_gather<mode>_expr (operands[0], expr, a, v));
164 else if ((MEM_P (operands[0]) || MEM_P (operands[1])))
166 gcc_assert (!reload_completed);
167 rtx scratch = gen_reg_rtx (V64DImode);
168 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], scratch));
173 ; A pseudo instruction that helps LRA use the "U0" constraint.
175 (define_insn "mov<mode>_unspec"
176 [(set (match_operand:VEC_ALLREG_MODE 0 "nonimmediate_operand" "=v")
177 (match_operand:VEC_ALLREG_MODE 1 "gcn_unspec_operand" " U"))]
180 [(set_attr "type" "unknown")
181 (set_attr "length" "0")])
183 (define_insn "*mov<mode>"
184 [(set (match_operand:VEC_ALL1REG_MODE 0 "nonimmediate_operand" "=v,v")
185 (match_operand:VEC_ALL1REG_MODE 1 "general_operand" "vA,B"))]
188 [(set_attr "type" "vop1,vop1")
189 (set_attr "length" "4,8")])
191 (define_insn "mov<mode>_exec"
192 [(set (match_operand:VEC_ALL1REG_MODE 0 "nonimmediate_operand"
194 (vec_merge:VEC_ALL1REG_MODE
195 (match_operand:VEC_ALL1REG_MODE 1 "general_operand"
197 (match_operand:VEC_ALL1REG_MODE 3 "gcn_alu_or_unspec_operand"
199 (match_operand:DI 2 "register_operand" " e, e,cV,Sv, e, e")))
200 (clobber (match_scratch:V64DI 4 "=X, X, X, X,&v,&v"))]
201 "!MEM_P (operands[0]) || REG_P (operands[1])"
205 v_cndmask_b32\t%0, %3, %1, vcc
206 v_cndmask_b32\t%0, %3, %1, %2
209 [(set_attr "type" "vop1,vop1,vop2,vop3a,*,*")
210 (set_attr "length" "4,8,4,8,16,16")])
212 ; This variant does not accept an unspec, but does permit MEM
213 ; read/modify/write which is necessary for maskstore.
215 ;(define_insn "*mov<mode>_exec_match"
216 ; [(set (match_operand:VEC_ALL1REG_MODE 0 "nonimmediate_operand"
218 ; (vec_merge:VEC_ALL1REG_MODE
219 ; (match_operand:VEC_ALL1REG_MODE 1 "general_operand" "vA,B, m, v")
221 ; (match_operand:DI 2 "gcn_exec_reg_operand" " e,e, e, e")))
222 ; (clobber (match_scratch:V64DI 3 "=X,X,&v,&v"))]
223 ; "!MEM_P (operands[0]) || REG_P (operands[1])"
229 ; [(set_attr "type" "vop1,vop1,*,*")
230 ; (set_attr "length" "4,8,16,16")])
232 (define_insn "*mov<mode>"
233 [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "=v")
234 (match_operand:VEC_2REG_MODE 1 "general_operand" "vDB"))]
237 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
238 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
240 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
242 [(set_attr "type" "vmult")
243 (set_attr "length" "16")])
245 (define_insn "mov<mode>_exec"
246 [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand"
248 (vec_merge:VEC_2REG_MODE
249 (match_operand:VEC_2REG_MODE 1 "general_operand"
251 (match_operand:VEC_2REG_MODE 3 "gcn_alu_or_unspec_operand"
252 " U0,vDA0,vDA0,U0,U0")
253 (match_operand:DI 2 "register_operand" " e, cV, Sv, e, e")))
254 (clobber (match_scratch:V64DI 4 "= X, X, X,&v,&v"))]
255 "!MEM_P (operands[0]) || REG_P (operands[1])"
257 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
258 switch (which_alternative)
261 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
263 return "v_cndmask_b32\t%L0, %L3, %L1, vcc\;"
264 "v_cndmask_b32\t%H0, %H3, %H1, vcc";
266 return "v_cndmask_b32\t%L0, %L3, %L1, %2\;"
267 "v_cndmask_b32\t%H0, %H3, %H1, %2";
270 switch (which_alternative)
273 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
275 return "v_cndmask_b32\t%H0, %H3, %H1, vcc\;"
276 "v_cndmask_b32\t%L0, %L3, %L1, vcc";
278 return "v_cndmask_b32\t%H0, %H3, %H1, %2\;"
279 "v_cndmask_b32\t%L0, %L3, %L1, %2";
284 [(set_attr "type" "vmult,vmult,vmult,*,*")
285 (set_attr "length" "16,16,16,16,16")])
287 ; This variant does not accept an unspec, but does permit MEM
288 ; read/modify/write which is necessary for maskstore.
290 ;(define_insn "*mov<mode>_exec_match"
291 ; [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "=v, v, m")
292 ; (vec_merge:VEC_2REG_MODE
293 ; (match_operand:VEC_2REG_MODE 1 "general_operand" "vDB, m, v")
295 ; (match_operand:DI 2 "gcn_exec_reg_operand" " e, e, e")))
296 ; (clobber (match_scratch:V64DI 3 "=X,&v,&v"))]
297 ; "!MEM_P (operands[0]) || REG_P (operands[1])"
299 ; * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
300 ; return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
302 ; return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
305 ; [(set_attr "type" "vmult,*,*")
306 ; (set_attr "length" "16,16,16")])
308 ; A SGPR-base load looks like:
311 ; There's no hardware instruction that corresponds to this, but vector base
312 ; addresses are placed in an SGPR because it is easier to add to a vector.
313 ; We also have a temporary vT, and the vector v1 holding numbered lanes.
316 ; vT = v1 << log2(element-size)
320 (define_insn "mov<mode>_sgprbase"
321 [(set (match_operand:VEC_ALL1REG_MODE 0 "nonimmediate_operand"
323 (unspec:VEC_ALL1REG_MODE
324 [(match_operand:VEC_ALL1REG_MODE 1 "general_operand"
327 (clobber (match_operand:V64DI 2 "register_operand" "=&v,&v,&v,&v"))]
328 "lra_in_progress || reload_completed"
334 [(set_attr "type" "vop1,vop1,*,*")
335 (set_attr "length" "4,8,12,12")])
337 (define_insn "mov<mode>_sgprbase"
338 [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "= v, v, m")
339 (unspec:VEC_2REG_MODE
340 [(match_operand:VEC_2REG_MODE 1 "general_operand" "vDB, m, v")]
342 (clobber (match_operand:V64DI 2 "register_operand" "=&v,&v,&v"))]
343 "lra_in_progress || reload_completed"
345 * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
346 return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
348 return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
351 [(set_attr "type" "vmult,*,*")
352 (set_attr "length" "8,12,12")])
354 ; reload_in was once a standard name, but here it's only referenced by
355 ; gcn_secondary_reload. It allows a reload with a scratch register.
357 (define_expand "reload_in<mode>"
358 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand" "= v")
359 (match_operand:VEC_ALLREG_MODE 1 "memory_operand" " m"))
360 (clobber (match_operand:V64DI 2 "register_operand" "=&v"))]
363 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
367 ; reload_out is similar to reload_in, above.
369 (define_expand "reload_out<mode>"
370 [(set (match_operand:VEC_ALLREG_MODE 0 "memory_operand" "= m")
371 (match_operand:VEC_ALLREG_MODE 1 "register_operand" " v"))
372 (clobber (match_operand:V64DI 2 "register_operand" "=&v"))]
375 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
379 ; Expand scalar addresses into gather/scatter patterns
382 [(set (match_operand:VEC_ALLREG_MODE 0 "memory_operand")
383 (unspec:VEC_ALLREG_MODE
384 [(match_operand:VEC_ALLREG_MODE 1 "general_operand")]
386 (clobber (match_scratch:V64DI 2))]
388 [(set (mem:BLK (scratch))
389 (unspec:BLK [(match_dup 5) (match_dup 1) (match_dup 6) (match_dup 7)]
392 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
395 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
396 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
400 [(set (match_operand:VEC_ALLREG_MODE 0 "memory_operand")
401 (vec_merge:VEC_ALLREG_MODE
402 (match_operand:VEC_ALLREG_MODE 1 "general_operand")
403 (match_operand:VEC_ALLREG_MODE 2 "")
404 (match_operand:DI 3 "gcn_exec_reg_operand")))
405 (clobber (match_scratch:V64DI 4))]
407 [(set (mem:BLK (scratch))
408 (unspec:BLK [(match_dup 5) (match_dup 1)
409 (match_dup 6) (match_dup 7) (match_dup 3)]
412 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
416 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
417 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
421 [(set (match_operand:VEC_ALLREG_MODE 0 "nonimmediate_operand")
422 (unspec:VEC_ALLREG_MODE
423 [(match_operand:VEC_ALLREG_MODE 1 "memory_operand")]
425 (clobber (match_scratch:V64DI 2))]
428 (unspec:VEC_ALLREG_MODE [(match_dup 5) (match_dup 6) (match_dup 7)
432 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
435 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
436 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
440 [(set (match_operand:VEC_ALLREG_MODE 0 "nonimmediate_operand")
441 (vec_merge:VEC_ALLREG_MODE
442 (match_operand:VEC_ALLREG_MODE 1 "memory_operand")
443 (match_operand:VEC_ALLREG_MODE 2 "")
444 (match_operand:DI 3 "gcn_exec_reg_operand")))
445 (clobber (match_scratch:V64DI 4))]
448 (vec_merge:VEC_ALLREG_MODE
449 (unspec:VEC_ALLREG_MODE [(match_dup 5) (match_dup 6) (match_dup 7)
455 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
459 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
460 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
463 ; TODO: Add zero/sign extending variants.
468 ; v_writelane and v_readlane work regardless of exec flags.
469 ; We allow source to be scratch.
471 ; FIXME these should take A immediates
473 (define_insn "*vec_set<mode>"
474 [(set (match_operand:VEC_ALL1REG_MODE 0 "register_operand" "= v")
475 (vec_merge:VEC_ALL1REG_MODE
476 (vec_duplicate:VEC_ALL1REG_MODE
477 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
478 (match_operand:VEC_ALL1REG_MODE 3 "gcn_register_or_unspec_operand"
480 (ashift (const_int 1)
481 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
483 "v_writelane_b32 %0, %1, %2"
484 [(set_attr "type" "vop3a")
485 (set_attr "length" "8")
486 (set_attr "exec" "none")
487 (set_attr "laneselect" "yes")])
489 ; FIXME: 64bit operations really should be splitters, but I am not sure how
490 ; to represent vertical subregs.
491 (define_insn "*vec_set<mode>"
492 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "= v")
493 (vec_merge:VEC_2REG_MODE
494 (vec_duplicate:VEC_2REG_MODE
495 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
496 (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand"
498 (ashift (const_int 1)
499 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
501 "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2"
502 [(set_attr "type" "vmult")
503 (set_attr "length" "16")
504 (set_attr "exec" "none")
505 (set_attr "laneselect" "yes")])
507 (define_expand "vec_set<mode>"
508 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand")
509 (vec_merge:VEC_ALLREG_MODE
510 (vec_duplicate:VEC_ALLREG_MODE
511 (match_operand:<SCALAR_MODE> 1 "register_operand"))
513 (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand"))))]
516 (define_insn "*vec_set<mode>_1"
517 [(set (match_operand:VEC_ALL1REG_MODE 0 "register_operand" "=v")
518 (vec_merge:VEC_ALL1REG_MODE
519 (vec_duplicate:VEC_ALL1REG_MODE
520 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
521 (match_operand:VEC_ALL1REG_MODE 3 "gcn_register_or_unspec_operand"
523 (match_operand:SI 2 "const_int_operand" " i")))]
524 "((unsigned) exact_log2 (INTVAL (operands[2])) < 64)"
526 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
527 return "v_writelane_b32 %0, %1, %2";
529 [(set_attr "type" "vop3a")
530 (set_attr "length" "8")
531 (set_attr "exec" "none")
532 (set_attr "laneselect" "yes")])
534 (define_insn "*vec_set<mode>_1"
535 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "=v")
536 (vec_merge:VEC_2REG_MODE
537 (vec_duplicate:VEC_2REG_MODE
538 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
539 (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand"
541 (match_operand:SI 2 "const_int_operand" " i")))]
542 "((unsigned) exact_log2 (INTVAL (operands[2])) < 64)"
544 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
545 return "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2";
547 [(set_attr "type" "vmult")
548 (set_attr "length" "16")
549 (set_attr "exec" "none")
550 (set_attr "laneselect" "yes")])
552 (define_insn "vec_duplicate<mode><exec>"
553 [(set (match_operand:VEC_ALL1REG_MODE 0 "register_operand" "=v")
554 (vec_duplicate:VEC_ALL1REG_MODE
555 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvB")))]
558 [(set_attr "type" "vop3a")
559 (set_attr "length" "8")])
561 (define_insn "vec_duplicate<mode><exec>"
562 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "= v")
563 (vec_duplicate:VEC_2REG_MODE
564 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvDB")))]
566 "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"
567 [(set_attr "type" "vop3a")
568 (set_attr "length" "16")])
570 (define_insn "vec_extract<mode><scalar_mode>"
571 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg")
572 (vec_select:<SCALAR_MODE>
573 (match_operand:VEC_ALL1REG_MODE 1 "register_operand" " v")
574 (parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))]
576 "v_readlane_b32 %0, %1, %2"
577 [(set_attr "type" "vop3a")
578 (set_attr "length" "8")
579 (set_attr "exec" "none")
580 (set_attr "laneselect" "yes")])
582 (define_insn "vec_extract<mode><scalar_mode>"
583 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg")
584 (vec_select:<SCALAR_MODE>
585 (match_operand:VEC_2REG_MODE 1 "register_operand" " v")
586 (parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))]
588 "v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2"
589 [(set_attr "type" "vmult")
590 (set_attr "length" "16")
591 (set_attr "exec" "none")
592 (set_attr "laneselect" "yes")])
594 (define_expand "extract_last_<mode>"
595 [(match_operand:<SCALAR_MODE> 0 "register_operand")
596 (match_operand:DI 1 "gcn_alu_operand")
597 (match_operand:VEC_ALLREG_MODE 2 "register_operand")]
598 "can_create_pseudo_p ()"
600 rtx dst = operands[0];
601 rtx mask = operands[1];
602 rtx vect = operands[2];
603 rtx tmpreg = gen_reg_rtx (SImode);
605 emit_insn (gen_clzdi2 (tmpreg, mask));
606 emit_insn (gen_subsi3 (tmpreg, GEN_INT (63), tmpreg));
607 emit_insn (gen_vec_extract<mode><scalar_mode> (dst, vect, tmpreg));
611 (define_expand "fold_extract_last_<mode>"
612 [(match_operand:<SCALAR_MODE> 0 "register_operand")
613 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand")
614 (match_operand:DI 2 "gcn_alu_operand")
615 (match_operand:VEC_ALLREG_MODE 3 "register_operand")]
616 "can_create_pseudo_p ()"
618 rtx dst = operands[0];
619 rtx default_value = operands[1];
620 rtx mask = operands[2];
621 rtx vect = operands[3];
622 rtx else_label = gen_label_rtx ();
623 rtx end_label = gen_label_rtx ();
625 rtx cond = gen_rtx_EQ (VOIDmode, mask, const0_rtx);
626 emit_jump_insn (gen_cbranchdi4 (cond, mask, const0_rtx, else_label));
627 emit_insn (gen_extract_last_<mode> (dst, mask, vect));
628 emit_jump_insn (gen_jump (end_label));
630 emit_label (else_label);
631 emit_move_insn (dst, default_value);
632 emit_label (end_label);
636 (define_expand "vec_init<mode><scalar_mode>"
637 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
641 gcn_expand_vector_init (operands[0], operands[1]);
646 ;; {{{ Scatter / Gather
648 ;; GCN does not have an instruction for loading a vector from contiguous
649 ;; memory so *all* loads and stores are eventually converted to scatter
652 ;; GCC does not permit MEM to hold vectors of addresses, so we must use an
653 ;; unspec. The unspec formats are as follows:
656 ;; [(<address expression>)
659 ;; (mem:BLK (scratch))]
663 ;; [(<address expression>)
664 ;; (<source register>)
670 ;; - Loads are expected to be wrapped in a vec_merge, so do not need <exec>.
671 ;; - The mem:BLK does not contain any real information, but indicates that an
672 ;; unknown memory read is taking place. Stores are expected to use a similar
673 ;; mem:BLK outside the unspec.
674 ;; - The address space and glc (volatile) fields are there to replace the
675 ;; fields normally found in a MEM.
676 ;; - Multiple forms of address expression are supported, below.
678 (define_expand "gather_load<mode>"
679 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
680 (match_operand:DI 1 "register_operand")
681 (match_operand 2 "register_operand")
682 (match_operand 3 "immediate_operand")
683 (match_operand:SI 4 "gcn_alu_operand")]
686 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
687 operands[2], operands[4],
688 INTVAL (operands[3]), NULL);
690 if (GET_MODE (addr) == V64DImode)
691 emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx,
692 const0_rtx, const0_rtx));
694 emit_insn (gen_gather<mode>_insn_2offsets (operands[0], operands[1],
695 addr, const0_rtx, const0_rtx,
700 (define_expand "gather<mode>_exec"
701 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
702 (match_operand:DI 1 "register_operand")
703 (match_operand:V64SI 2 "register_operand")
704 (match_operand 3 "immediate_operand")
705 (match_operand:SI 4 "gcn_alu_operand")
706 (match_operand:DI 5 "gcn_exec_reg_operand")]
709 rtx undefmode = gcn_gen_undef (<MODE>mode);
711 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
712 operands[2], operands[4],
713 INTVAL (operands[3]), operands[5]);
715 if (GET_MODE (addr) == V64DImode)
716 emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr,
717 const0_rtx, const0_rtx,
718 const0_rtx, undefmode,
721 emit_insn (gen_gather<mode>_insn_2offsets_exec (operands[0], operands[1],
723 const0_rtx, const0_rtx,
724 undefmode, operands[5]));
728 ; Allow any address expression
729 (define_expand "gather<mode>_expr<exec>"
730 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand")
731 (unspec:VEC_ALLREG_MODE
732 [(match_operand 1 "")
733 (match_operand 2 "immediate_operand")
734 (match_operand 3 "immediate_operand")
740 (define_insn "gather<mode>_insn_1offset<exec>"
741 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand" "=v")
742 (unspec:VEC_ALLREG_MODE
743 [(plus:V64DI (match_operand:V64DI 1 "register_operand" " v")
745 (match_operand 2 "immediate_operand" " n")))
746 (match_operand 3 "immediate_operand" " n")
747 (match_operand 4 "immediate_operand" " n")
750 "(AS_FLAT_P (INTVAL (operands[3]))
751 && ((TARGET_GCN3 && INTVAL(operands[2]) == 0)
752 || ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x1000)))
753 || (AS_GLOBAL_P (INTVAL (operands[3]))
754 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
756 addr_space_t as = INTVAL (operands[3]);
757 const char *glc = INTVAL (operands[4]) ? " glc" : "";
759 static char buf[200];
762 if (TARGET_GCN5_PLUS)
763 sprintf (buf, "flat_load%%o0\t%%0, %%1 offset:%%2%s\;s_waitcnt\t0",
766 sprintf (buf, "flat_load%%o0\t%%0, %%1%s\;s_waitcnt\t0", glc);
768 else if (AS_GLOBAL_P (as))
769 sprintf (buf, "global_load%%o0\t%%0, %%1, off offset:%%2%s\;"
770 "s_waitcnt\tvmcnt(0)", glc);
776 [(set_attr "type" "flat")
777 (set_attr "length" "12")])
779 (define_insn "gather<mode>_insn_1offset_ds<exec>"
780 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand" "=v")
781 (unspec:VEC_ALLREG_MODE
782 [(plus:V64SI (match_operand:V64SI 1 "register_operand" " v")
784 (match_operand 2 "immediate_operand" " n")))
785 (match_operand 3 "immediate_operand" " n")
786 (match_operand 4 "immediate_operand" " n")
789 "(AS_ANY_DS_P (INTVAL (operands[3]))
790 && ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x10000))"
792 addr_space_t as = INTVAL (operands[3]);
793 static char buf[200];
794 sprintf (buf, "ds_read%%b0\t%%0, %%1 offset:%%2%s\;s_waitcnt\tlgkmcnt(0)",
795 (AS_GDS_P (as) ? " gds" : ""));
798 [(set_attr "type" "ds")
799 (set_attr "length" "12")])
801 (define_insn "gather<mode>_insn_2offsets<exec>"
802 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand" "=v")
803 (unspec:VEC_ALLREG_MODE
807 (match_operand:DI 1 "register_operand" "Sv"))
809 (match_operand:V64SI 2 "register_operand" " v")))
810 (vec_duplicate:V64DI (match_operand 3 "immediate_operand" " n")))
811 (match_operand 4 "immediate_operand" " n")
812 (match_operand 5 "immediate_operand" " n")
815 "(AS_GLOBAL_P (INTVAL (operands[4]))
816 && (((unsigned HOST_WIDE_INT)INTVAL(operands[3]) + 0x1000) < 0x2000))"
818 addr_space_t as = INTVAL (operands[4]);
819 const char *glc = INTVAL (operands[5]) ? " glc" : "";
821 static char buf[200];
822 if (AS_GLOBAL_P (as))
824 /* Work around assembler bug in which a 64-bit register is expected,
825 but a 32-bit value would be correct. */
826 int reg = REGNO (operands[2]) - FIRST_VGPR_REG;
827 sprintf (buf, "global_load%%o0\t%%0, v[%d:%d], %%1 offset:%%3%s\;"
828 "s_waitcnt\tvmcnt(0)", reg, reg + 1, glc);
835 [(set_attr "type" "flat")
836 (set_attr "length" "12")])
838 (define_expand "scatter_store<mode>"
839 [(match_operand:DI 0 "register_operand")
840 (match_operand 1 "register_operand")
841 (match_operand 2 "immediate_operand")
842 (match_operand:SI 3 "gcn_alu_operand")
843 (match_operand:VEC_ALLREG_MODE 4 "register_operand")]
846 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
847 operands[1], operands[3],
848 INTVAL (operands[2]), NULL);
850 if (GET_MODE (addr) == V64DImode)
851 emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4],
852 const0_rtx, const0_rtx));
854 emit_insn (gen_scatter<mode>_insn_2offsets (operands[0], addr,
855 const0_rtx, operands[4],
856 const0_rtx, const0_rtx));
860 (define_expand "scatter<mode>_exec"
861 [(match_operand:DI 0 "register_operand")
862 (match_operand 1 "register_operand")
863 (match_operand 2 "immediate_operand")
864 (match_operand:SI 3 "gcn_alu_operand")
865 (match_operand:VEC_ALLREG_MODE 4 "register_operand")
866 (match_operand:DI 5 "gcn_exec_reg_operand")]
869 operands[5] = force_reg (DImode, operands[5]);
871 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
872 operands[1], operands[3],
873 INTVAL (operands[2]), operands[5]);
875 if (GET_MODE (addr) == V64DImode)
876 emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx,
877 operands[4], const0_rtx,
881 emit_insn (gen_scatter<mode>_insn_2offsets_exec (operands[0], addr,
882 const0_rtx, operands[4],
883 const0_rtx, const0_rtx,
888 ; Allow any address expression
889 (define_expand "scatter<mode>_expr<exec_scatter>"
890 [(set (mem:BLK (scratch))
892 [(match_operand:V64DI 0 "")
893 (match_operand:VEC_ALLREG_MODE 1 "register_operand")
894 (match_operand 2 "immediate_operand")
895 (match_operand 3 "immediate_operand")]
900 (define_insn "scatter<mode>_insn_1offset<exec_scatter>"
901 [(set (mem:BLK (scratch))
903 [(plus:V64DI (match_operand:V64DI 0 "register_operand" "v")
905 (match_operand 1 "immediate_operand" "n")))
906 (match_operand:VEC_ALLREG_MODE 2 "register_operand" "v")
907 (match_operand 3 "immediate_operand" "n")
908 (match_operand 4 "immediate_operand" "n")]
910 "(AS_FLAT_P (INTVAL (operands[3]))
911 && (INTVAL(operands[1]) == 0
913 && (unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x1000)))
914 || (AS_GLOBAL_P (INTVAL (operands[3]))
915 && (((unsigned HOST_WIDE_INT)INTVAL(operands[1]) + 0x1000) < 0x2000))"
917 addr_space_t as = INTVAL (operands[3]);
918 const char *glc = INTVAL (operands[4]) ? " glc" : "";
920 static char buf[200];
923 if (TARGET_GCN5_PLUS)
924 sprintf (buf, "flat_store%%s2\t%%0, %%2 offset:%%1%s", glc);
926 sprintf (buf, "flat_store%%s2\t%%0, %%2%s", glc);
928 else if (AS_GLOBAL_P (as))
929 sprintf (buf, "global_store%%s2\t%%0, %%2, off offset:%%1%s", glc);
935 [(set_attr "type" "flat")
936 (set_attr "length" "12")])
938 (define_insn "scatter<mode>_insn_1offset_ds<exec_scatter>"
939 [(set (mem:BLK (scratch))
941 [(plus:V64SI (match_operand:V64SI 0 "register_operand" "v")
943 (match_operand 1 "immediate_operand" "n")))
944 (match_operand:VEC_ALLREG_MODE 2 "register_operand" "v")
945 (match_operand 3 "immediate_operand" "n")
946 (match_operand 4 "immediate_operand" "n")]
948 "(AS_ANY_DS_P (INTVAL (operands[3]))
949 && ((unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x10000))"
951 addr_space_t as = INTVAL (operands[3]);
952 static char buf[200];
953 sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s",
954 (AS_GDS_P (as) ? " gds" : ""));
957 [(set_attr "type" "ds")
958 (set_attr "length" "12")])
960 (define_insn "scatter<mode>_insn_2offsets<exec_scatter>"
961 [(set (mem:BLK (scratch))
966 (match_operand:DI 0 "register_operand" "Sv"))
968 (match_operand:V64SI 1 "register_operand" " v")))
969 (vec_duplicate:V64DI (match_operand 2 "immediate_operand"
971 (match_operand:VEC_ALLREG_MODE 3 "register_operand" " v")
972 (match_operand 4 "immediate_operand" " n")
973 (match_operand 5 "immediate_operand" " n")]
975 "(AS_GLOBAL_P (INTVAL (operands[4]))
976 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
978 addr_space_t as = INTVAL (operands[4]);
979 const char *glc = INTVAL (operands[5]) ? " glc" : "";
981 static char buf[200];
982 if (AS_GLOBAL_P (as))
984 /* Work around assembler bug in which a 64-bit register is expected,
985 but a 32-bit value would be correct. */
986 int reg = REGNO (operands[1]) - FIRST_VGPR_REG;
987 sprintf (buf, "global_store%%s3\tv[%d:%d], %%3, %%0 offset:%%2%s",
995 [(set_attr "type" "flat")
996 (set_attr "length" "12")])
1001 (define_insn "ds_bpermute<mode>"
1002 [(set (match_operand:VEC_ALL1REG_MODE 0 "register_operand" "=v")
1003 (unspec:VEC_ALL1REG_MODE
1004 [(match_operand:VEC_ALL1REG_MODE 2 "register_operand" " v")
1005 (match_operand:V64SI 1 "register_operand" " v")
1006 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
1009 "ds_bpermute_b32\t%0, %1, %2\;s_waitcnt\tlgkmcnt(0)"
1010 [(set_attr "type" "vop2")
1011 (set_attr "length" "12")])
1013 (define_insn_and_split "ds_bpermute<mode>"
1014 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "=&v")
1015 (unspec:VEC_2REG_MODE
1016 [(match_operand:VEC_2REG_MODE 2 "register_operand" " v0")
1017 (match_operand:V64SI 1 "register_operand" " v")
1018 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
1023 [(set (match_dup 4) (unspec:V64SI [(match_dup 6) (match_dup 1) (match_dup 3)]
1025 (set (match_dup 5) (unspec:V64SI [(match_dup 7) (match_dup 1) (match_dup 3)]
1028 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
1029 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
1030 operands[6] = gcn_operand_part (<MODE>mode, operands[2], 0);
1031 operands[7] = gcn_operand_part (<MODE>mode, operands[2], 1);
1033 [(set_attr "type" "vmult")
1034 (set_attr "length" "24")])
1037 ;; {{{ ALU special case: add/sub
1039 (define_insn "add<mode>3<exec_clobber>"
1040 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "= v")
1041 (plus:VEC_ALL1REG_INT_MODE
1042 (match_operand:VEC_ALL1REG_INT_MODE 1 "register_operand" "% v")
1043 (match_operand:VEC_ALL1REG_INT_MODE 2 "gcn_alu_operand" "vSvB")))
1044 (clobber (reg:DI VCC_REG))]
1046 "v_add%^_u32\t%0, vcc, %2, %1"
1047 [(set_attr "type" "vop2")
1048 (set_attr "length" "8")])
1050 (define_insn "add<mode>3_dup<exec_clobber>"
1051 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "= v")
1052 (plus:VEC_ALL1REG_INT_MODE
1053 (vec_duplicate:VEC_ALL1REG_INT_MODE
1054 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" "SvB"))
1055 (match_operand:VEC_ALL1REG_INT_MODE 1 "register_operand" " v")))
1056 (clobber (reg:DI VCC_REG))]
1058 "v_add%^_u32\t%0, vcc, %2, %1"
1059 [(set_attr "type" "vop2")
1060 (set_attr "length" "8")])
1062 (define_insn "addv64si3_vcc<exec_vcc>"
1063 [(set (match_operand:V64SI 0 "register_operand" "= v, v")
1065 (match_operand:V64SI 1 "register_operand" "% v, v")
1066 (match_operand:V64SI 2 "gcn_alu_operand" "vSvB,vSvB")))
1067 (set (match_operand:DI 3 "register_operand" "= cV, Sg")
1068 (ltu:DI (plus:V64SI (match_dup 1) (match_dup 2))
1071 "v_add%^_u32\t%0, %3, %2, %1"
1072 [(set_attr "type" "vop2,vop3b")
1073 (set_attr "length" "8")])
1075 ; This pattern only changes the VCC bits when the corresponding lane is
1076 ; enabled, so the set must be described as an ior.
1078 (define_insn "addv64si3_vcc_dup<exec_vcc>"
1079 [(set (match_operand:V64SI 0 "register_operand" "= v, v")
1081 (vec_duplicate:V64SI
1082 (match_operand:SI 1 "gcn_alu_operand" "SvB,SvB"))
1083 (match_operand:V64SI 2 "register_operand" " v, v")))
1084 (set (match_operand:DI 3 "register_operand" "=cV, Sg")
1085 (ltu:DI (plus:V64SI (vec_duplicate:V64SI (match_dup 2))
1087 (vec_duplicate:V64SI (match_dup 2))))]
1089 "v_add%^_u32\t%0, %3, %2, %1"
1090 [(set_attr "type" "vop2,vop3b")
1091 (set_attr "length" "8,8")])
1093 ; This pattern does not accept SGPR because VCC read already counts as an
1094 ; SGPR use and number of SGPR operands is limited to 1.
1096 (define_insn "addcv64si3<exec_vcc>"
1097 [(set (match_operand:V64SI 0 "register_operand" "=v,v")
1101 (vec_duplicate:V64SI (const_int 1))
1102 (vec_duplicate:V64SI (const_int 0))
1103 (match_operand:DI 3 "register_operand" " cV,Sv"))
1104 (match_operand:V64SI 1 "gcn_alu_operand" "%vA,vA"))
1105 (match_operand:V64SI 2 "gcn_alu_operand" " vB,vB")))
1106 (set (match_operand:DI 4 "register_operand" "=cV,Sg")
1107 (ior:DI (ltu:DI (plus:V64SI
1110 (vec_duplicate:V64SI (const_int 1))
1111 (vec_duplicate:V64SI (const_int 0))
1118 (vec_duplicate:V64SI (const_int 1))
1119 (vec_duplicate:V64SI (const_int 0))
1124 "v_addc%^_u32\t%0, %4, %1, %2, %3"
1125 [(set_attr "type" "vop2,vop3b")
1126 (set_attr "length" "4,8")])
1128 (define_insn "addcv64si3_dup<exec_vcc>"
1129 [(set (match_operand:V64SI 0 "register_operand" "=v,v")
1133 (vec_duplicate:V64SI (const_int 1))
1134 (vec_duplicate:V64SI (const_int 0))
1135 (match_operand:DI 3 "register_operand" " cV, Sv"))
1136 (match_operand:V64SI 1 "gcn_alu_operand" "%vA, vA"))
1137 (vec_duplicate:V64SI
1138 (match_operand:SI 2 "gcn_alu_operand" "SvB,SvB"))))
1139 (set (match_operand:DI 4 "register_operand" "=cV, Sg")
1140 (ior:DI (ltu:DI (plus:V64SI (plus:V64SI
1142 (vec_duplicate:V64SI (const_int 1))
1143 (vec_duplicate:V64SI (const_int 0))
1146 (vec_duplicate:V64SI
1148 (vec_duplicate:V64SI
1150 (ltu:DI (plus:V64SI (vec_merge:V64SI
1151 (vec_duplicate:V64SI (const_int 1))
1152 (vec_duplicate:V64SI (const_int 0))
1157 "v_addc%^_u32\t%0, %4, %1, %2, %3"
1158 [(set_attr "type" "vop2,vop3b")
1159 (set_attr "length" "4,8")])
1161 (define_insn "sub<mode>3<exec_clobber>"
1162 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "= v, v")
1163 (minus:VEC_ALL1REG_INT_MODE
1164 (match_operand:VEC_ALL1REG_INT_MODE 1 "gcn_alu_operand" "vSvB, v")
1165 (match_operand:VEC_ALL1REG_INT_MODE 2 "gcn_alu_operand" " v,vSvB")))
1166 (clobber (reg:DI VCC_REG))]
1169 v_sub%^_u32\t%0, vcc, %1, %2
1170 v_subrev%^_u32\t%0, vcc, %2, %1"
1171 [(set_attr "type" "vop2")
1172 (set_attr "length" "8,8")])
1174 (define_insn "subv64si3_vcc<exec_vcc>"
1175 [(set (match_operand:V64SI 0 "register_operand" "= v, v, v, v")
1177 (match_operand:V64SI 1 "gcn_alu_operand" "vSvB,vSvB, v, v")
1178 (match_operand:V64SI 2 "gcn_alu_operand" " v, v,vSvB,vSvB")))
1179 (set (match_operand:DI 3 "register_operand" "= cV, Sg, cV, Sg")
1180 (gtu:DI (minus:V64SI (match_dup 1) (match_dup 2))
1184 v_sub%^_u32\t%0, %3, %1, %2
1185 v_sub%^_u32\t%0, %3, %1, %2
1186 v_subrev%^_u32\t%0, %3, %2, %1
1187 v_subrev%^_u32\t%0, %3, %2, %1"
1188 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1189 (set_attr "length" "8")])
1191 ; This pattern does not accept SGPR because VCC read already counts
1192 ; as a SGPR use and number of SGPR operands is limited to 1.
1194 (define_insn "subcv64si3<exec_vcc>"
1195 [(set (match_operand:V64SI 0 "register_operand" "= v, v, v, v")
1199 (vec_duplicate:V64SI (const_int 1))
1200 (vec_duplicate:V64SI (const_int 0))
1201 (match_operand:DI 3 "gcn_alu_operand" " cV,Sv,cV,Sv"))
1202 (match_operand:V64SI 1 "gcn_alu_operand" " vA,vA,vB,vB"))
1203 (match_operand:V64SI 2 "gcn_alu_operand" " vB,vB,vA,vA")))
1204 (set (match_operand:DI 4 "register_operand" "=cV,Sg,cV,Sg")
1205 (ior:DI (gtu:DI (minus:V64SI (minus:V64SI
1207 (vec_duplicate:V64SI (const_int 1))
1208 (vec_duplicate:V64SI (const_int 0))
1213 (ltu:DI (minus:V64SI (vec_merge:V64SI
1214 (vec_duplicate:V64SI (const_int 1))
1215 (vec_duplicate:V64SI (const_int 0))
1221 v_subb%^_u32\t%0, %4, %1, %2, %3
1222 v_subb%^_u32\t%0, %4, %1, %2, %3
1223 v_subbrev%^_u32\t%0, %4, %2, %1, %3
1224 v_subbrev%^_u32\t%0, %4, %2, %1, %3"
1225 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1226 (set_attr "length" "8")])
1228 (define_insn_and_split "addv64di3"
1229 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1231 (match_operand:V64DI 1 "register_operand" "% v0")
1232 (match_operand:V64DI 2 "gcn_alu_operand" "vSvB0")))
1233 (clobber (reg:DI VCC_REG))]
1236 "gcn_can_split_p (V64DImode, operands[0])
1237 && gcn_can_split_p (V64DImode, operands[1])
1238 && gcn_can_split_p (V64DImode, operands[2])"
1241 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1242 emit_insn (gen_addv64si3_vcc
1243 (gcn_operand_part (V64DImode, operands[0], 0),
1244 gcn_operand_part (V64DImode, operands[1], 0),
1245 gcn_operand_part (V64DImode, operands[2], 0),
1247 emit_insn (gen_addcv64si3
1248 (gcn_operand_part (V64DImode, operands[0], 1),
1249 gcn_operand_part (V64DImode, operands[1], 1),
1250 gcn_operand_part (V64DImode, operands[2], 1),
1254 [(set_attr "type" "vmult")
1255 (set_attr "length" "8")])
1257 (define_insn_and_split "addv64di3_exec"
1258 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1261 (match_operand:V64DI 1 "register_operand" "% v0")
1262 (match_operand:V64DI 2 "gcn_alu_operand" "vSvB0"))
1263 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1264 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1265 (clobber (reg:DI VCC_REG))]
1268 "gcn_can_split_p (V64DImode, operands[0])
1269 && gcn_can_split_p (V64DImode, operands[1])
1270 && gcn_can_split_p (V64DImode, operands[2])
1271 && gcn_can_split_p (V64DImode, operands[4])"
1274 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1275 emit_insn (gen_addv64si3_vcc_exec
1276 (gcn_operand_part (V64DImode, operands[0], 0),
1277 gcn_operand_part (V64DImode, operands[1], 0),
1278 gcn_operand_part (V64DImode, operands[2], 0),
1280 gcn_operand_part (V64DImode, operands[3], 0),
1282 emit_insn (gen_addcv64si3_exec
1283 (gcn_operand_part (V64DImode, operands[0], 1),
1284 gcn_operand_part (V64DImode, operands[1], 1),
1285 gcn_operand_part (V64DImode, operands[2], 1),
1287 gcn_operand_part (V64DImode, operands[3], 1),
1291 [(set_attr "type" "vmult")
1292 (set_attr "length" "8")])
1294 (define_insn_and_split "subv64di3"
1295 [(set (match_operand:V64DI 0 "register_operand" "= &v, &v")
1297 (match_operand:V64DI 1 "gcn_alu_operand" "vSvB0, v0")
1298 (match_operand:V64DI 2 "gcn_alu_operand" " v0,vSvB0")))
1299 (clobber (reg:DI VCC_REG))]
1302 "gcn_can_split_p (V64DImode, operands[0])
1303 && gcn_can_split_p (V64DImode, operands[1])
1304 && gcn_can_split_p (V64DImode, operands[2])"
1307 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1308 emit_insn (gen_subv64si3_vcc
1309 (gcn_operand_part (V64DImode, operands[0], 0),
1310 gcn_operand_part (V64DImode, operands[1], 0),
1311 gcn_operand_part (V64DImode, operands[2], 0),
1313 emit_insn (gen_subcv64si3
1314 (gcn_operand_part (V64DImode, operands[0], 1),
1315 gcn_operand_part (V64DImode, operands[1], 1),
1316 gcn_operand_part (V64DImode, operands[2], 1),
1320 [(set_attr "type" "vmult")
1321 (set_attr "length" "8,8")])
1323 (define_insn_and_split "subv64di3_exec"
1324 [(set (match_operand:V64DI 0 "register_operand" "= &v, &v")
1327 (match_operand:V64DI 1 "gcn_alu_operand" "vSvB0, v0")
1328 (match_operand:V64DI 2 "gcn_alu_operand" " v0,vSvB0"))
1329 (match_operand:V64DI 3 "gcn_register_or_unspec_operand"
1331 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
1332 (clobber (reg:DI VCC_REG))]
1333 "register_operand (operands[1], VOIDmode)
1334 || register_operand (operands[2], VOIDmode)"
1336 "gcn_can_split_p (V64DImode, operands[0])
1337 && gcn_can_split_p (V64DImode, operands[1])
1338 && gcn_can_split_p (V64DImode, operands[2])
1339 && gcn_can_split_p (V64DImode, operands[3])"
1342 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1343 emit_insn (gen_subv64si3_vcc_exec
1344 (gcn_operand_part (V64DImode, operands[0], 0),
1345 gcn_operand_part (V64DImode, operands[1], 0),
1346 gcn_operand_part (V64DImode, operands[2], 0),
1348 gcn_operand_part (V64DImode, operands[3], 0),
1350 emit_insn (gen_subcv64si3_exec
1351 (gcn_operand_part (V64DImode, operands[0], 1),
1352 gcn_operand_part (V64DImode, operands[1], 1),
1353 gcn_operand_part (V64DImode, operands[2], 1),
1355 gcn_operand_part (V64DImode, operands[3], 1),
1359 [(set_attr "type" "vmult")
1360 (set_attr "length" "8,8")])
1362 (define_insn_and_split "addv64di3_dup"
1363 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1365 (match_operand:V64DI 1 "register_operand" " v0")
1366 (vec_duplicate:V64DI
1367 (match_operand:DI 2 "gcn_alu_operand" "SvDB"))))
1368 (clobber (reg:DI VCC_REG))]
1371 "gcn_can_split_p (V64DImode, operands[0])
1372 && gcn_can_split_p (V64DImode, operands[1])
1373 && gcn_can_split_p (V64DImode, operands[2])"
1376 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1377 emit_insn (gen_addv64si3_vcc_dup
1378 (gcn_operand_part (V64DImode, operands[0], 0),
1379 gcn_operand_part (DImode, operands[2], 0),
1380 gcn_operand_part (V64DImode, operands[1], 0),
1382 emit_insn (gen_addcv64si3_dup
1383 (gcn_operand_part (V64DImode, operands[0], 1),
1384 gcn_operand_part (V64DImode, operands[1], 1),
1385 gcn_operand_part (DImode, operands[2], 1),
1389 [(set_attr "type" "vmult")
1390 (set_attr "length" "8")])
1392 (define_insn_and_split "addv64di3_dup_exec"
1393 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1396 (match_operand:V64DI 1 "register_operand" " v0")
1397 (vec_duplicate:V64DI
1398 (match_operand:DI 2 "gcn_alu_operand" "SvDB")))
1399 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1400 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1401 (clobber (reg:DI VCC_REG))]
1404 "gcn_can_split_p (V64DImode, operands[0])
1405 && gcn_can_split_p (V64DImode, operands[1])
1406 && gcn_can_split_p (V64DImode, operands[2])
1407 && gcn_can_split_p (V64DImode, operands[3])"
1410 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1411 emit_insn (gen_addv64si3_vcc_dup_exec
1412 (gcn_operand_part (V64DImode, operands[0], 0),
1413 gcn_operand_part (DImode, operands[2], 0),
1414 gcn_operand_part (V64DImode, operands[1], 0),
1416 gcn_operand_part (V64DImode, operands[3], 0),
1418 emit_insn (gen_addcv64si3_dup_exec
1419 (gcn_operand_part (V64DImode, operands[0], 1),
1420 gcn_operand_part (V64DImode, operands[1], 1),
1421 gcn_operand_part (DImode, operands[2], 1),
1423 gcn_operand_part (V64DImode, operands[3], 1),
1427 [(set_attr "type" "vmult")
1428 (set_attr "length" "8")])
1430 (define_insn_and_split "addv64di3_zext"
1431 [(set (match_operand:V64DI 0 "register_operand" "=&v,&v")
1434 (match_operand:V64SI 1 "gcn_alu_operand" "0vA,0vB"))
1435 (match_operand:V64DI 2 "gcn_alu_operand" "0vB,0vA")))
1436 (clobber (reg:DI VCC_REG))]
1439 "gcn_can_split_p (V64DImode, operands[0])
1440 && gcn_can_split_p (V64DImode, operands[2])"
1443 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1444 emit_insn (gen_addv64si3_vcc
1445 (gcn_operand_part (V64DImode, operands[0], 0),
1447 gcn_operand_part (V64DImode, operands[2], 0),
1449 emit_insn (gen_addcv64si3
1450 (gcn_operand_part (V64DImode, operands[0], 1),
1451 gcn_operand_part (V64DImode, operands[2], 1),
1452 const0_rtx, vcc, vcc));
1455 [(set_attr "type" "vmult")
1456 (set_attr "length" "8,8")])
1458 (define_insn_and_split "addv64di3_zext_exec"
1459 [(set (match_operand:V64DI 0 "register_operand" "=&v,&v")
1463 (match_operand:V64SI 1 "gcn_alu_operand" "0vA,0vB"))
1464 (match_operand:V64DI 2 "gcn_alu_operand" "0vB,0vA"))
1465 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0, U0")
1466 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
1467 (clobber (reg:DI VCC_REG))]
1470 "gcn_can_split_p (V64DImode, operands[0])
1471 && gcn_can_split_p (V64DImode, operands[2])
1472 && gcn_can_split_p (V64DImode, operands[3])"
1475 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1476 emit_insn (gen_addv64si3_vcc_exec
1477 (gcn_operand_part (V64DImode, operands[0], 0),
1479 gcn_operand_part (V64DImode, operands[2], 0),
1481 gcn_operand_part (V64DImode, operands[3], 0),
1483 emit_insn (gen_addcv64si3_exec
1484 (gcn_operand_part (V64DImode, operands[0], 1),
1485 gcn_operand_part (V64DImode, operands[2], 1),
1486 const0_rtx, vcc, vcc,
1487 gcn_operand_part (V64DImode, operands[3], 1),
1491 [(set_attr "type" "vmult")
1492 (set_attr "length" "8,8")])
1494 (define_insn_and_split "addv64di3_zext_dup"
1495 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1498 (vec_duplicate:V64SI
1499 (match_operand:SI 1 "gcn_alu_operand" "BSv")))
1500 (match_operand:V64DI 2 "gcn_alu_operand" "vA0")))
1501 (clobber (reg:DI VCC_REG))]
1504 "gcn_can_split_p (V64DImode, operands[0])
1505 && gcn_can_split_p (V64DImode, operands[2])"
1508 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1509 emit_insn (gen_addv64si3_vcc_dup
1510 (gcn_operand_part (V64DImode, operands[0], 0),
1511 gcn_operand_part (DImode, operands[1], 0),
1512 gcn_operand_part (V64DImode, operands[2], 0),
1514 emit_insn (gen_addcv64si3
1515 (gcn_operand_part (V64DImode, operands[0], 1),
1516 gcn_operand_part (V64DImode, operands[2], 1),
1517 const0_rtx, vcc, vcc));
1520 [(set_attr "type" "vmult")
1521 (set_attr "length" "8")])
1523 (define_insn_and_split "addv64di3_zext_dup_exec"
1524 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1528 (vec_duplicate:V64SI
1529 (match_operand:SI 1 "gcn_alu_operand" "BSv")))
1530 (match_operand:V64DI 2 "gcn_alu_operand" "vA0"))
1531 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1532 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1533 (clobber (reg:DI VCC_REG))]
1536 "gcn_can_split_p (V64DImode, operands[0])
1537 && gcn_can_split_p (V64DImode, operands[2])
1538 && gcn_can_split_p (V64DImode, operands[3])"
1541 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1542 emit_insn (gen_addv64si3_vcc_dup_exec
1543 (gcn_operand_part (V64DImode, operands[0], 0),
1544 gcn_operand_part (DImode, operands[1], 0),
1545 gcn_operand_part (V64DImode, operands[2], 0),
1547 gcn_operand_part (V64DImode, operands[3], 0),
1549 emit_insn (gen_addcv64si3_exec
1550 (gcn_operand_part (V64DImode, operands[0], 1),
1551 gcn_operand_part (V64DImode, operands[2], 1),
1552 const0_rtx, vcc, vcc,
1553 gcn_operand_part (V64DImode, operands[3], 1),
1557 [(set_attr "type" "vmult")
1558 (set_attr "length" "8")])
1560 (define_insn_and_split "addv64di3_zext_dup2"
1561 [(set (match_operand:V64DI 0 "register_operand" "= v")
1563 (zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA"))
1564 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))))
1565 (clobber (reg:DI VCC_REG))]
1568 "gcn_can_split_p (V64DImode, operands[0])"
1571 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1572 emit_insn (gen_addv64si3_vcc_dup
1573 (gcn_operand_part (V64DImode, operands[0], 0),
1574 gcn_operand_part (DImode, operands[2], 0),
1577 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1578 emit_insn (gen_vec_duplicatev64si
1579 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
1580 emit_insn (gen_addcv64si3 (dsthi, dsthi, const0_rtx, vcc, vcc));
1583 [(set_attr "type" "vmult")
1584 (set_attr "length" "8")])
1586 (define_insn_and_split "addv64di3_zext_dup2_exec"
1587 [(set (match_operand:V64DI 0 "register_operand" "= v")
1590 (zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand"
1592 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
1593 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1594 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1595 (clobber (reg:DI VCC_REG))]
1598 "gcn_can_split_p (V64DImode, operands[0])
1599 && gcn_can_split_p (V64DImode, operands[3])"
1602 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1603 emit_insn (gen_addv64si3_vcc_dup_exec
1604 (gcn_operand_part (V64DImode, operands[0], 0),
1605 gcn_operand_part (DImode, operands[2], 0),
1608 gcn_operand_part (V64DImode, operands[3], 0),
1610 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1611 emit_insn (gen_vec_duplicatev64si_exec
1612 (dsthi, gcn_operand_part (DImode, operands[2], 1),
1613 gcn_gen_undef (V64SImode), operands[4]));
1614 emit_insn (gen_addcv64si3_exec
1615 (dsthi, dsthi, const0_rtx, vcc, vcc,
1616 gcn_operand_part (V64DImode, operands[3], 1),
1620 [(set_attr "type" "vmult")
1621 (set_attr "length" "8")])
1623 (define_insn_and_split "addv64di3_sext_dup2"
1624 [(set (match_operand:V64DI 0 "register_operand" "= v")
1626 (sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA"))
1627 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))))
1628 (clobber (match_scratch:V64SI 3 "=&v"))
1629 (clobber (reg:DI VCC_REG))]
1632 "gcn_can_split_p (V64DImode, operands[0])"
1635 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1636 emit_insn (gen_ashrv64si3 (operands[3], operands[1], GEN_INT (31)));
1637 emit_insn (gen_addv64si3_vcc_dup
1638 (gcn_operand_part (V64DImode, operands[0], 0),
1639 gcn_operand_part (DImode, operands[2], 0),
1642 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1643 emit_insn (gen_vec_duplicatev64si
1644 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
1645 emit_insn (gen_addcv64si3 (dsthi, dsthi, operands[3], vcc, vcc));
1648 [(set_attr "type" "vmult")
1649 (set_attr "length" "8")])
1651 (define_insn_and_split "addv64di3_sext_dup2_exec"
1652 [(set (match_operand:V64DI 0 "register_operand" "= v")
1655 (sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand"
1657 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
1658 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1659 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1660 (clobber (match_scratch:V64SI 5 "=&v"))
1661 (clobber (reg:DI VCC_REG))]
1664 "gcn_can_split_p (V64DImode, operands[0])
1665 && gcn_can_split_p (V64DImode, operands[3])"
1668 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1669 emit_insn (gen_ashrv64si3_exec (operands[5], operands[1], GEN_INT (31),
1670 gcn_gen_undef (V64SImode), operands[4]));
1671 emit_insn (gen_addv64si3_vcc_dup_exec
1672 (gcn_operand_part (V64DImode, operands[0], 0),
1673 gcn_operand_part (DImode, operands[2], 0),
1676 gcn_operand_part (V64DImode, operands[3], 0),
1678 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1679 emit_insn (gen_vec_duplicatev64si_exec
1680 (dsthi, gcn_operand_part (DImode, operands[2], 1),
1681 gcn_gen_undef (V64SImode), operands[4]));
1682 emit_insn (gen_addcv64si3_exec
1683 (dsthi, dsthi, operands[5], vcc, vcc,
1684 gcn_operand_part (V64DImode, operands[3], 1),
1688 [(set_attr "type" "vmult")
1689 (set_attr "length" "8")])
1692 ;; {{{ DS memory ALU: add/sub
1694 (define_mode_iterator DS_ARITH_MODE [V64SI V64SF V64DI])
1695 (define_mode_iterator DS_ARITH_SCALAR_MODE [SI SF DI])
1697 ;; FIXME: the vector patterns probably need RD expanded to a vector of
1698 ;; addresses. For now, the only way a vector can get into LDS is
1699 ;; if the user puts it there manually.
1701 ;; FIXME: the scalar patterns are probably fine in themselves, but need to be
1702 ;; checked to see if anything can ever use them.
1704 (define_insn "add<mode>3_ds<exec>"
1705 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1707 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "%RD")
1708 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
1709 "rtx_equal_p (operands[0], operands[1])"
1710 "ds_add%u0\t%A0, %2%O0"
1711 [(set_attr "type" "ds")
1712 (set_attr "length" "8")])
1714 (define_insn "add<mode>3_ds_scalar"
1715 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1716 (plus:DS_ARITH_SCALAR_MODE
1717 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1719 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
1720 "rtx_equal_p (operands[0], operands[1])"
1721 "ds_add%u0\t%A0, %2%O0"
1722 [(set_attr "type" "ds")
1723 (set_attr "length" "8")])
1725 (define_insn "sub<mode>3_ds<exec>"
1726 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1727 (minus:DS_ARITH_MODE
1728 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")
1729 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
1730 "rtx_equal_p (operands[0], operands[1])"
1731 "ds_sub%u0\t%A0, %2%O0"
1732 [(set_attr "type" "ds")
1733 (set_attr "length" "8")])
1735 (define_insn "sub<mode>3_ds_scalar"
1736 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1737 (minus:DS_ARITH_SCALAR_MODE
1738 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1740 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
1741 "rtx_equal_p (operands[0], operands[1])"
1742 "ds_sub%u0\t%A0, %2%O0"
1743 [(set_attr "type" "ds")
1744 (set_attr "length" "8")])
1746 (define_insn "subr<mode>3_ds<exec>"
1747 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1748 (minus:DS_ARITH_MODE
1749 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")
1750 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")))]
1751 "rtx_equal_p (operands[0], operands[1])"
1752 "ds_rsub%u0\t%A0, %2%O0"
1753 [(set_attr "type" "ds")
1754 (set_attr "length" "8")])
1756 (define_insn "subr<mode>3_ds_scalar"
1757 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1758 (minus:DS_ARITH_SCALAR_MODE
1759 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")
1760 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1762 "rtx_equal_p (operands[0], operands[1])"
1763 "ds_rsub%u0\t%A0, %2%O0"
1764 [(set_attr "type" "ds")
1765 (set_attr "length" "8")])
1768 ;; {{{ ALU special case: mult
1770 (define_insn "<su>mulv64si3_highpart<exec>"
1771 [(set (match_operand:V64SI 0 "register_operand" "= v")
1776 (match_operand:V64SI 1 "gcn_alu_operand" " %v"))
1778 (match_operand:V64SI 2 "gcn_alu_operand" "vSvA")))
1781 "v_mul_hi<sgnsuffix>0\t%0, %2, %1"
1782 [(set_attr "type" "vop3a")
1783 (set_attr "length" "8")])
1785 (define_insn "mul<mode>3<exec>"
1786 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "= v")
1787 (mult:VEC_ALL1REG_INT_MODE
1788 (match_operand:VEC_ALL1REG_INT_MODE 1 "gcn_alu_operand" "%vSvA")
1789 (match_operand:VEC_ALL1REG_INT_MODE 2 "gcn_alu_operand" " vSvA")))]
1791 "v_mul_lo_u32\t%0, %1, %2"
1792 [(set_attr "type" "vop3a")
1793 (set_attr "length" "8")])
1795 (define_insn "mul<mode>3_dup<exec>"
1796 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "= v")
1797 (mult:VEC_ALL1REG_INT_MODE
1798 (match_operand:VEC_ALL1REG_INT_MODE 1 "gcn_alu_operand" "%vSvA")
1799 (vec_duplicate:VEC_ALL1REG_INT_MODE
1800 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" " SvA"))))]
1802 "v_mul_lo_u32\t%0, %1, %2"
1803 [(set_attr "type" "vop3a")
1804 (set_attr "length" "8")])
1806 (define_insn_and_split "mulv64di3"
1807 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1809 (match_operand:V64DI 1 "gcn_alu_operand" "% v")
1810 (match_operand:V64DI 2 "gcn_alu_operand" "vDA")))
1811 (clobber (match_scratch:V64SI 3 "=&v"))]
1817 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1818 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1819 rtx left_lo = gcn_operand_part (V64DImode, operands[1], 0);
1820 rtx left_hi = gcn_operand_part (V64DImode, operands[1], 1);
1821 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1822 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1823 rtx tmp = operands[3];
1825 emit_insn (gen_mulv64si3 (out_lo, left_lo, right_lo));
1826 emit_insn (gen_umulv64si3_highpart (out_hi, left_lo, right_lo));
1827 emit_insn (gen_mulv64si3 (tmp, left_hi, right_lo));
1828 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1829 emit_insn (gen_mulv64si3 (tmp, left_lo, right_hi));
1830 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1831 emit_insn (gen_mulv64si3 (tmp, left_hi, right_hi));
1832 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1836 (define_insn_and_split "mulv64di3_exec"
1837 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1840 (match_operand:V64DI 1 "gcn_alu_operand" "% v")
1841 (match_operand:V64DI 2 "gcn_alu_operand" "vDA"))
1842 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1843 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1844 (clobber (match_scratch:V64SI 5 "=&v"))]
1850 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1851 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1852 rtx left_lo = gcn_operand_part (V64DImode, operands[1], 0);
1853 rtx left_hi = gcn_operand_part (V64DImode, operands[1], 1);
1854 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1855 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1856 rtx exec = operands[4];
1857 rtx tmp = operands[5];
1860 if (GET_CODE (operands[3]) == UNSPEC)
1862 old_lo = old_hi = gcn_gen_undef (V64SImode);
1866 old_lo = gcn_operand_part (V64DImode, operands[3], 0);
1867 old_hi = gcn_operand_part (V64DImode, operands[3], 1);
1870 rtx undef = gcn_gen_undef (V64SImode);
1872 emit_insn (gen_mulv64si3_exec (out_lo, left_lo, right_lo, old_lo, exec));
1873 emit_insn (gen_umulv64si3_highpart_exec (out_hi, left_lo, right_lo,
1875 emit_insn (gen_mulv64si3_exec (tmp, left_hi, right_lo, undef, exec));
1876 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1877 emit_insn (gen_mulv64si3_exec (tmp, left_lo, right_hi, undef, exec));
1878 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1879 emit_insn (gen_mulv64si3_exec (tmp, left_hi, right_hi, undef, exec));
1880 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1884 (define_insn_and_split "mulv64di3_zext"
1885 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1888 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
1889 (match_operand:V64DI 2 "gcn_alu_operand" "vDA")))
1890 (clobber (match_scratch:V64SI 3 "=&v"))]
1896 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1897 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1898 rtx left = operands[1];
1899 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1900 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1901 rtx tmp = operands[3];
1903 emit_insn (gen_mulv64si3 (out_lo, left, right_lo));
1904 emit_insn (gen_umulv64si3_highpart (out_hi, left, right_lo));
1905 emit_insn (gen_mulv64si3 (tmp, left, right_hi));
1906 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1910 (define_insn_and_split "mulv64di3_zext_exec"
1911 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1915 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
1916 (match_operand:V64DI 2 "gcn_alu_operand" "vDA"))
1917 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1918 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1919 (clobber (match_scratch:V64SI 5 "=&v"))]
1925 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1926 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1927 rtx left = operands[1];
1928 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1929 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1930 rtx exec = operands[4];
1931 rtx tmp = operands[5];
1934 if (GET_CODE (operands[3]) == UNSPEC)
1936 old_lo = old_hi = gcn_gen_undef (V64SImode);
1940 old_lo = gcn_operand_part (V64DImode, operands[3], 0);
1941 old_hi = gcn_operand_part (V64DImode, operands[3], 1);
1944 rtx undef = gcn_gen_undef (V64SImode);
1946 emit_insn (gen_mulv64si3_exec (out_lo, left, right_lo, old_lo, exec));
1947 emit_insn (gen_umulv64si3_highpart_exec (out_hi, left, right_lo,
1949 emit_insn (gen_mulv64si3_exec (tmp, left, right_hi, undef, exec));
1950 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1954 (define_insn_and_split "mulv64di3_zext_dup2"
1955 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1958 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
1959 (vec_duplicate:V64DI
1960 (match_operand:DI 2 "gcn_alu_operand" "SvDA"))))
1961 (clobber (match_scratch:V64SI 3 "= &v"))]
1967 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1968 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1969 rtx left = operands[1];
1970 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1971 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1972 rtx tmp = operands[3];
1974 emit_insn (gen_mulv64si3 (out_lo, left, right_lo));
1975 emit_insn (gen_umulv64si3_highpart (out_hi, left, right_lo));
1976 emit_insn (gen_mulv64si3 (tmp, left, right_hi));
1977 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1981 (define_insn_and_split "mulv64di3_zext_dup2_exec"
1982 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1986 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
1987 (vec_duplicate:V64DI
1988 (match_operand:DI 2 "gcn_alu_operand" "SvDA")))
1989 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1990 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1991 (clobber (match_scratch:V64SI 5 "= &v"))]
1997 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1998 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1999 rtx left = operands[1];
2000 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
2001 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
2002 rtx exec = operands[4];
2003 rtx tmp = operands[5];
2006 if (GET_CODE (operands[3]) == UNSPEC)
2008 old_lo = old_hi = gcn_gen_undef (V64SImode);
2012 old_lo = gcn_operand_part (V64DImode, operands[3], 0);
2013 old_hi = gcn_operand_part (V64DImode, operands[3], 1);
2016 rtx undef = gcn_gen_undef (V64SImode);
2018 emit_insn (gen_mulv64si3_exec (out_lo, left, right_lo, old_lo, exec));
2019 emit_insn (gen_umulv64si3_highpart_exec (out_hi, left, right_lo,
2021 emit_insn (gen_mulv64si3_exec (tmp, left, right_hi, undef, exec));
2022 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
2027 ;; {{{ ALU generic case
2029 (define_mode_iterator VEC_INT_MODE [V64SI V64DI])
2031 (define_code_iterator bitop [and ior xor])
2032 (define_code_iterator shiftop [ashift lshiftrt ashiftrt])
2033 (define_code_iterator minmaxop [smin smax umin umax])
2035 (define_insn "<expander><mode>2<exec>"
2036 [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v")
2037 (bitunop:VEC_1REG_INT_MODE
2038 (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand" "vSvB")))]
2040 "v_<mnemonic>0\t%0, %1"
2041 [(set_attr "type" "vop1")
2042 (set_attr "length" "8")])
2044 (define_insn "<expander><mode>3<exec>"
2045 [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v,RD")
2046 (bitop:VEC_1REG_INT_MODE
2047 (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand"
2049 (match_operand:VEC_1REG_INT_MODE 2 "gcn_valu_src1com_operand"
2053 v_<mnemonic>0\t%0, %2, %1
2054 ds_<mnemonic>0\t%A0, %2%O0"
2055 [(set_attr "type" "vop2,ds")
2056 (set_attr "length" "8,8")])
2058 (define_insn_and_split "<expander>v64di3"
2059 [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD")
2061 (match_operand:V64DI 1 "gcn_valu_src0_operand" "% v,RD")
2062 (match_operand:V64DI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
2066 ds_<mnemonic>0\t%A0, %2%O0"
2067 "(reload_completed && !gcn_ds_memory_operand (operands[0], V64DImode))"
2069 (bitop:V64SI (match_dup 5) (match_dup 7)))
2071 (bitop:V64SI (match_dup 6) (match_dup 8)))]
2073 operands[3] = gcn_operand_part (V64DImode, operands[0], 0);
2074 operands[4] = gcn_operand_part (V64DImode, operands[0], 1);
2075 operands[5] = gcn_operand_part (V64DImode, operands[1], 0);
2076 operands[6] = gcn_operand_part (V64DImode, operands[1], 1);
2077 operands[7] = gcn_operand_part (V64DImode, operands[2], 0);
2078 operands[8] = gcn_operand_part (V64DImode, operands[2], 1);
2080 [(set_attr "type" "vmult,ds")
2081 (set_attr "length" "16,8")])
2083 (define_insn_and_split "<expander>v64di3_exec"
2084 [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD")
2087 (match_operand:V64DI 1 "gcn_valu_src0_operand" "% v,RD")
2088 (match_operand:V64DI 2 "gcn_valu_src1com_operand" "vSvB, v"))
2089 (match_operand:V64DI 3 "gcn_register_ds_or_unspec_operand"
2091 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))]
2092 "!memory_operand (operands[0], VOIDmode)
2093 || (rtx_equal_p (operands[0], operands[1])
2094 && register_operand (operands[2], VOIDmode))"
2097 ds_<mnemonic>0\t%A0, %2%O0"
2098 "(reload_completed && !gcn_ds_memory_operand (operands[0], V64DImode))"
2101 (bitop:V64SI (match_dup 7) (match_dup 9))
2106 (bitop:V64SI (match_dup 8) (match_dup 10))
2110 operands[5] = gcn_operand_part (V64DImode, operands[0], 0);
2111 operands[6] = gcn_operand_part (V64DImode, operands[0], 1);
2112 operands[7] = gcn_operand_part (V64DImode, operands[1], 0);
2113 operands[8] = gcn_operand_part (V64DImode, operands[1], 1);
2114 operands[9] = gcn_operand_part (V64DImode, operands[2], 0);
2115 operands[10] = gcn_operand_part (V64DImode, operands[2], 1);
2116 operands[11] = gcn_operand_part (V64DImode, operands[3], 0);
2117 operands[12] = gcn_operand_part (V64DImode, operands[3], 1);
2119 [(set_attr "type" "vmult,ds")
2120 (set_attr "length" "16,8")])
2122 (define_insn "<expander>v64si3<exec>"
2123 [(set (match_operand:V64SI 0 "register_operand" "= v")
2125 (match_operand:V64SI 1 "gcn_alu_operand" " v")
2126 (vec_duplicate:V64SI
2127 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
2129 "v_<revmnemonic>0\t%0, %2, %1"
2130 [(set_attr "type" "vop2")
2131 (set_attr "length" "8")])
2133 (define_insn "v<expander>v64si3<exec>"
2134 [(set (match_operand:V64SI 0 "register_operand" "=v")
2136 (match_operand:V64SI 1 "gcn_alu_operand" " v")
2137 (match_operand:V64SI 2 "gcn_alu_operand" "vB")))]
2139 "v_<revmnemonic>0\t%0, %2, %1"
2140 [(set_attr "type" "vop2")
2141 (set_attr "length" "8")])
2143 (define_insn "<expander><mode>3<exec>"
2144 [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v,RD")
2145 (minmaxop:VEC_1REG_INT_MODE
2146 (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand"
2148 (match_operand:VEC_1REG_INT_MODE 2 "gcn_valu_src1com_operand"
2152 v_<mnemonic>0\t%0, %2, %1
2153 ds_<mnemonic>0\t%A0, %2%O0"
2154 [(set_attr "type" "vop2,ds")
2155 (set_attr "length" "8,8")])
2158 ;; {{{ FP binops - special cases
2160 ; GCN does not directly provide a DFmode subtract instruction, so we do it by
2161 ; adding the negated second operand to the first.
2163 (define_insn "subv64df3<exec>"
2164 [(set (match_operand:V64DF 0 "register_operand" "= v, v")
2166 (match_operand:V64DF 1 "gcn_alu_operand" "vSvB, v")
2167 (match_operand:V64DF 2 "gcn_alu_operand" " v,vSvB")))]
2170 v_add_f64\t%0, %1, -%2
2171 v_add_f64\t%0, -%2, %1"
2172 [(set_attr "type" "vop3a")
2173 (set_attr "length" "8,8")])
2175 (define_insn "subdf"
2176 [(set (match_operand:DF 0 "register_operand" "= v, v")
2178 (match_operand:DF 1 "gcn_alu_operand" "vSvB, v")
2179 (match_operand:DF 2 "gcn_alu_operand" " v,vSvB")))]
2182 v_add_f64\t%0, %1, -%2
2183 v_add_f64\t%0, -%2, %1"
2184 [(set_attr "type" "vop3a")
2185 (set_attr "length" "8,8")])
2188 ;; {{{ FP binops - generic
2190 (define_mode_iterator VEC_FP_MODE [V64HF V64SF V64DF])
2191 (define_mode_iterator VEC_FP_1REG_MODE [V64HF V64SF])
2192 (define_mode_iterator FP_MODE [HF SF DF])
2193 (define_mode_iterator FP_1REG_MODE [HF SF])
2195 (define_code_iterator comm_fp [plus mult smin smax])
2196 (define_code_iterator nocomm_fp [minus])
2197 (define_code_iterator all_fp [plus mult minus smin smax])
2199 (define_insn "<expander><mode>3<exec>"
2200 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v")
2201 (comm_fp:VEC_FP_MODE
2202 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "% v")
2203 (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" "vSvB")))]
2205 "v_<mnemonic>0\t%0, %2, %1"
2206 [(set_attr "type" "vop2")
2207 (set_attr "length" "8")])
2209 (define_insn "<expander><mode>3"
2210 [(set (match_operand:FP_MODE 0 "gcn_valu_dst_operand" "= v, RL")
2212 (match_operand:FP_MODE 1 "gcn_valu_src0_operand" "% v, 0")
2213 (match_operand:FP_MODE 2 "gcn_valu_src1_operand" "vSvB,vSvB")))]
2216 v_<mnemonic>0\t%0, %2, %1
2217 v_<mnemonic>0\t%0, %1%O0"
2218 [(set_attr "type" "vop2,ds")
2219 (set_attr "length" "8")])
2221 (define_insn "<expander><mode>3<exec>"
2222 [(set (match_operand:VEC_FP_1REG_MODE 0 "register_operand" "= v, v")
2223 (nocomm_fp:VEC_FP_1REG_MODE
2224 (match_operand:VEC_FP_1REG_MODE 1 "gcn_alu_operand" "vSvB, v")
2225 (match_operand:VEC_FP_1REG_MODE 2 "gcn_alu_operand" " v,vSvB")))]
2228 v_<mnemonic>0\t%0, %1, %2
2229 v_<revmnemonic>0\t%0, %2, %1"
2230 [(set_attr "type" "vop2")
2231 (set_attr "length" "8,8")])
2233 (define_insn "<expander><mode>3"
2234 [(set (match_operand:FP_1REG_MODE 0 "register_operand" "= v, v")
2235 (nocomm_fp:FP_1REG_MODE
2236 (match_operand:FP_1REG_MODE 1 "gcn_alu_operand" "vSvB, v")
2237 (match_operand:FP_1REG_MODE 2 "gcn_alu_operand" " v,vSvB")))]
2240 v_<mnemonic>0\t%0, %1, %2
2241 v_<revmnemonic>0\t%0, %2, %1"
2242 [(set_attr "type" "vop2")
2243 (set_attr "length" "8,8")])
2248 (define_insn "abs<mode>2"
2249 [(set (match_operand:FP_MODE 0 "register_operand" "=v")
2250 (abs:FP_MODE (match_operand:FP_MODE 1 "register_operand" " v")))]
2252 "v_add%i0\t%0, 0, |%1|"
2253 [(set_attr "type" "vop3a")
2254 (set_attr "length" "8")])
2256 (define_insn "abs<mode>2<exec>"
2257 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "=v")
2259 (match_operand:VEC_FP_MODE 1 "register_operand" " v")))]
2261 "v_add%i0\t%0, 0, |%1|"
2262 [(set_attr "type" "vop3a")
2263 (set_attr "length" "8")])
2265 (define_insn "neg<mode>2<exec>"
2266 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "=v")
2268 (match_operand:VEC_FP_MODE 1 "register_operand" " v")))]
2270 "v_add%i0\t%0, 0, -%1"
2271 [(set_attr "type" "vop3a")
2272 (set_attr "length" "8")])
2274 (define_insn "sqrt<mode>2<exec>"
2275 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v")
2277 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "vSvB")))]
2278 "flag_unsafe_math_optimizations"
2280 [(set_attr "type" "vop1")
2281 (set_attr "length" "8")])
2283 (define_insn "sqrt<mode>2"
2284 [(set (match_operand:FP_MODE 0 "register_operand" "= v")
2286 (match_operand:FP_MODE 1 "gcn_alu_operand" "vSvB")))]
2287 "flag_unsafe_math_optimizations"
2289 [(set_attr "type" "vop1")
2290 (set_attr "length" "8")])
2293 ;; {{{ FP fused multiply and add
2295 (define_insn "fma<mode>4<exec>"
2296 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v, v")
2298 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "% vA, vA")
2299 (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" " vA,vSvA")
2300 (match_operand:VEC_FP_MODE 3 "gcn_alu_operand" "vSvA, vA")))]
2302 "v_fma%i0\t%0, %1, %2, %3"
2303 [(set_attr "type" "vop3a")
2304 (set_attr "length" "8")])
2306 (define_insn "fma<mode>4_negop2<exec>"
2307 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v, v, v")
2309 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" " vA, vA,vSvA")
2311 (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" " vA,vSvA, vA"))
2312 (match_operand:VEC_FP_MODE 3 "gcn_alu_operand" "vSvA, vA, vA")))]
2314 "v_fma%i0\t%0, %1, -%2, %3"
2315 [(set_attr "type" "vop3a")
2316 (set_attr "length" "8")])
2318 (define_insn "fma<mode>4"
2319 [(set (match_operand:FP_MODE 0 "register_operand" "= v, v")
2321 (match_operand:FP_MODE 1 "gcn_alu_operand" "% vA, vA")
2322 (match_operand:FP_MODE 2 "gcn_alu_operand" " vA,vSvA")
2323 (match_operand:FP_MODE 3 "gcn_alu_operand" "vSvA, vA")))]
2325 "v_fma%i0\t%0, %1, %2, %3"
2326 [(set_attr "type" "vop3a")
2327 (set_attr "length" "8")])
2329 (define_insn "fma<mode>4_negop2"
2330 [(set (match_operand:FP_MODE 0 "register_operand" "= v, v, v")
2332 (match_operand:FP_MODE 1 "gcn_alu_operand" " vA, vA,vSvA")
2334 (match_operand:FP_MODE 2 "gcn_alu_operand" " vA,vSvA, vA"))
2335 (match_operand:FP_MODE 3 "gcn_alu_operand" "vSvA, vA, vA")))]
2337 "v_fma%i0\t%0, %1, -%2, %3"
2338 [(set_attr "type" "vop3a")
2339 (set_attr "length" "8")])
2344 (define_insn "recip<mode>2<exec>"
2345 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v")
2347 (vec_duplicate:VEC_FP_MODE (float:<SCALAR_MODE> (const_int 1)))
2348 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "vSvB")))]
2351 [(set_attr "type" "vop1")
2352 (set_attr "length" "8")])
2354 (define_insn "recip<mode>2"
2355 [(set (match_operand:FP_MODE 0 "register_operand" "= v")
2357 (float:FP_MODE (const_int 1))
2358 (match_operand:FP_MODE 1 "gcn_alu_operand" "vSvB")))]
2361 [(set_attr "type" "vop1")
2362 (set_attr "length" "8")])
2364 ;; Do division via a = b * 1/c
2365 ;; The v_rcp_* instructions are not sufficiently accurate on their own,
2366 ;; so we use 2 v_fma_* instructions to do one round of Newton-Raphson
2367 ;; which the ISA manual says is enough to improve the reciprocal accuracy.
2369 ;; FIXME: This does not handle denormals, NaNs, division-by-zero etc.
2371 (define_expand "div<mode>3"
2372 [(match_operand:VEC_FP_MODE 0 "gcn_valu_dst_operand")
2373 (match_operand:VEC_FP_MODE 1 "gcn_valu_src0_operand")
2374 (match_operand:VEC_FP_MODE 2 "gcn_valu_src0_operand")]
2375 "flag_reciprocal_math"
2377 rtx two = gcn_vec_constant (<MODE>mode,
2378 const_double_from_real_value (dconst2, <SCALAR_MODE>mode));
2379 rtx initrcp = gen_reg_rtx (<MODE>mode);
2380 rtx fma = gen_reg_rtx (<MODE>mode);
2383 bool is_rcp = (GET_CODE (operands[1]) == CONST_VECTOR
2385 (CONST_DOUBLE_REAL_VALUE
2386 (CONST_VECTOR_ELT (operands[1], 0)), &dconstm1));
2391 rcp = gen_reg_rtx (<MODE>mode);
2393 emit_insn (gen_recip<mode>2 (initrcp, operands[2]));
2394 emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, operands[2], two));
2395 emit_insn (gen_mul<mode>3 (rcp, initrcp, fma));
2398 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rcp));
2403 (define_expand "div<mode>3"
2404 [(match_operand:FP_MODE 0 "gcn_valu_dst_operand")
2405 (match_operand:FP_MODE 1 "gcn_valu_src0_operand")
2406 (match_operand:FP_MODE 2 "gcn_valu_src0_operand")]
2407 "flag_reciprocal_math"
2409 rtx two = const_double_from_real_value (dconst2, <MODE>mode);
2410 rtx initrcp = gen_reg_rtx (<MODE>mode);
2411 rtx fma = gen_reg_rtx (<MODE>mode);
2414 bool is_rcp = (GET_CODE (operands[1]) == CONST_DOUBLE
2415 && real_identical (CONST_DOUBLE_REAL_VALUE (operands[1]),
2421 rcp = gen_reg_rtx (<MODE>mode);
2423 emit_insn (gen_recip<mode>2 (initrcp, operands[2]));
2424 emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, operands[2], two));
2425 emit_insn (gen_mul<mode>3 (rcp, initrcp, fma));
2428 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rcp));
2434 ;; {{{ Int/FP conversions
2436 (define_mode_iterator CVT_FROM_MODE [HI SI HF SF DF])
2437 (define_mode_iterator CVT_TO_MODE [HI SI HF SF DF])
2439 (define_mode_iterator VCVT_MODE [V64HI V64SI V64HF V64SF V64DF])
2440 (define_mode_iterator VCVT_FMODE [V64HF V64SF V64DF])
2441 (define_mode_iterator VCVT_IMODE [V64HI V64SI])
2443 (define_code_iterator cvt_op [fix unsigned_fix
2444 float unsigned_float
2445 float_extend float_truncate])
2446 (define_code_attr cvt_name [(fix "fix_trunc") (unsigned_fix "fixuns_trunc")
2447 (float "float") (unsigned_float "floatuns")
2448 (float_extend "extend") (float_truncate "trunc")])
2449 (define_code_attr cvt_operands [(fix "%i0%i1") (unsigned_fix "%u0%i1")
2450 (float "%i0%i1") (unsigned_float "%i0%u1")
2451 (float_extend "%i0%i1")
2452 (float_truncate "%i0%i1")])
2454 (define_insn "<cvt_name><CVT_FROM_MODE:mode><CVT_TO_MODE:mode>2"
2455 [(set (match_operand:CVT_TO_MODE 0 "register_operand" "= v")
2457 (match_operand:CVT_FROM_MODE 1 "gcn_alu_operand" "vSvB")))]
2458 "gcn_valid_cvt_p (<CVT_FROM_MODE:MODE>mode, <CVT_TO_MODE:MODE>mode,
2460 "v_cvt<cvt_operands>\t%0, %1"
2461 [(set_attr "type" "vop1")
2462 (set_attr "length" "8")])
2464 (define_insn "<cvt_name><VCVT_MODE:mode><VCVT_FMODE:mode>2<exec>"
2465 [(set (match_operand:VCVT_FMODE 0 "register_operand" "= v")
2467 (match_operand:VCVT_MODE 1 "gcn_alu_operand" "vSvB")))]
2468 "gcn_valid_cvt_p (<VCVT_MODE:MODE>mode, <VCVT_FMODE:MODE>mode,
2470 "v_cvt<cvt_operands>\t%0, %1"
2471 [(set_attr "type" "vop1")
2472 (set_attr "length" "8")])
2474 (define_insn "<cvt_name><VCVT_FMODE:mode><VCVT_IMODE:mode>2<exec>"
2475 [(set (match_operand:VCVT_IMODE 0 "register_operand" "= v")
2477 (match_operand:VCVT_FMODE 1 "gcn_alu_operand" "vSvB")))]
2478 "gcn_valid_cvt_p (<VCVT_FMODE:MODE>mode, <VCVT_IMODE:MODE>mode,
2480 "v_cvt<cvt_operands>\t%0, %1"
2481 [(set_attr "type" "vop1")
2482 (set_attr "length" "8")])
2485 ;; {{{ Int/int conversions
2487 (define_code_iterator zero_convert [truncate zero_extend])
2488 (define_code_attr convop [
2489 (sign_extend "extend")
2490 (zero_extend "zero_extend")
2491 (truncate "trunc")])
2493 (define_insn "<convop><VEC_ALL1REG_INT_ALT:mode><VEC_ALL1REG_INT_MODE:mode>2<exec>"
2494 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "=v")
2495 (zero_convert:VEC_ALL1REG_INT_MODE
2496 (match_operand:VEC_ALL1REG_INT_ALT 1 "gcn_alu_operand" " v")))]
2498 "v_mov_b32_sdwa\t%0, %1 dst_sel:<VEC_ALL1REG_INT_MODE:sdwa> dst_unused:UNUSED_PAD src0_sel:<VEC_ALL1REG_INT_ALT:sdwa>"
2499 [(set_attr "type" "vop_sdwa")
2500 (set_attr "length" "8")])
2502 (define_insn "extend<VEC_ALL1REG_INT_ALT:mode><VEC_ALL1REG_INT_MODE:mode>2<exec>"
2503 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "=v")
2504 (sign_extend:VEC_ALL1REG_INT_MODE
2505 (match_operand:VEC_ALL1REG_INT_ALT 1 "gcn_alu_operand" " v")))]
2507 "v_mov_b32_sdwa\t%0, sext(%1) src0_sel:<VEC_ALL1REG_INT_ALT:sdwa>"
2508 [(set_attr "type" "vop_sdwa")
2509 (set_attr "length" "8")])
2511 ;; GCC can already do these for scalar types, but not for vector types.
2512 ;; Unfortunately you can't just do SUBREG on a vector to select the low part,
2513 ;; so there must be a few tricks here.
2515 (define_insn_and_split "truncv64di<mode>2"
2516 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "=v")
2517 (truncate:VEC_ALL1REG_INT_MODE
2518 (match_operand:V64DI 1 "gcn_alu_operand" " v")))]
2524 rtx inlo = gcn_operand_part (V64DImode, operands[1], 0);
2525 rtx out = operands[0];
2527 if (<MODE>mode != V64SImode)
2528 emit_insn (gen_truncv64si<mode>2 (out, inlo));
2530 emit_move_insn (out, inlo);
2532 [(set_attr "type" "vop2")
2533 (set_attr "length" "4")])
2535 (define_insn_and_split "truncv64di<mode>2_exec"
2536 [(set (match_operand:VEC_ALL1REG_INT_MODE 0 "register_operand" "=v")
2537 (vec_merge:VEC_ALL1REG_INT_MODE
2538 (truncate:VEC_ALL1REG_INT_MODE
2539 (match_operand:V64DI 1 "gcn_alu_operand" " v"))
2540 (match_operand:VEC_ALL1REG_INT_MODE 2 "gcn_alu_or_unspec_operand"
2542 (match_operand:DI 3 "gcn_exec_operand" " e")))]
2548 rtx out = operands[0];
2549 rtx inlo = gcn_operand_part (V64DImode, operands[1], 0);
2550 rtx merge = operands[2];
2551 rtx exec = operands[3];
2553 if (<MODE>mode != V64SImode)
2554 emit_insn (gen_truncv64si<mode>2_exec (out, inlo, merge, exec));
2556 emit_insn (gen_mov<mode>_exec (out, inlo, exec, merge));
2558 [(set_attr "type" "vop2")
2559 (set_attr "length" "4")])
2561 (define_insn_and_split "<convop><mode>v64di2"
2562 [(set (match_operand:V64DI 0 "register_operand" "=v")
2564 (match_operand:VEC_ALL1REG_INT_MODE 1 "gcn_alu_operand" " v")))]
2570 rtx outlo = gcn_operand_part (V64DImode, operands[0], 0);
2571 rtx outhi = gcn_operand_part (V64DImode, operands[0], 1);
2572 rtx in = operands[1];
2574 if (<MODE>mode != V64SImode)
2575 emit_insn (gen_<convop><mode>v64si2 (outlo, in));
2577 emit_move_insn (outlo, in);
2579 emit_insn (gen_ashrv64si3 (outhi, outlo, GEN_INT (31)));
2581 emit_insn (gen_vec_duplicatev64si (outhi, const0_rtx));
2583 [(set_attr "type" "mult")
2584 (set_attr "length" "12")])
2586 (define_insn_and_split "<convop><mode>v64di2_exec"
2587 [(set (match_operand:V64DI 0 "register_operand" "=v")
2590 (match_operand:VEC_ALL1REG_INT_MODE 1 "gcn_alu_operand" " v"))
2591 (match_operand:V64DI 2 "gcn_alu_or_unspec_operand" "U0")
2592 (match_operand:DI 3 "gcn_exec_operand" " e")))]
2598 rtx outlo = gcn_operand_part (V64DImode, operands[0], 0);
2599 rtx outhi = gcn_operand_part (V64DImode, operands[0], 1);
2600 rtx in = operands[1];
2601 rtx mergelo = gcn_operand_part (V64DImode, operands[2], 0);
2602 rtx mergehi = gcn_operand_part (V64DImode, operands[2], 1);
2603 rtx exec = operands[3];
2605 if (<MODE>mode != V64SImode)
2606 emit_insn (gen_<convop><mode>v64si2_exec (outlo, in, mergelo, exec));
2608 emit_insn (gen_mov<mode>_exec (outlo, in, exec, mergelo));
2610 emit_insn (gen_ashrv64si3_exec (outhi, outlo, GEN_INT (31), mergehi,
2613 emit_insn (gen_vec_duplicatev64si_exec (outhi, const0_rtx, mergehi,
2616 [(set_attr "type" "mult")
2617 (set_attr "length" "12")])
2620 ;; {{{ Vector comparison/merge
2622 (define_insn "vec_cmp<mode>di"
2623 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
2624 (match_operator 1 "comparison_operator"
2625 [(match_operand:VEC_1REG_MODE 2 "gcn_alu_operand"
2626 "vSv, B,vSv, B, v,vA")
2627 (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
2628 " v, v, v, v,vA, v")]))
2629 (clobber (match_scratch:DI 4 "= X, X, cV,cV, X, X"))]
2632 v_cmp%E1\tvcc, %2, %3
2633 v_cmp%E1\tvcc, %2, %3
2634 v_cmpx%E1\tvcc, %2, %3
2635 v_cmpx%E1\tvcc, %2, %3
2636 v_cmp%E1\t%0, %2, %3
2637 v_cmp%E1\t%0, %2, %3"
2638 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
2639 (set_attr "length" "4,8,4,8,8,8")])
2641 (define_expand "vec_cmpu<mode>di"
2642 [(match_operand:DI 0 "register_operand")
2643 (match_operator 1 "comparison_operator"
2644 [(match_operand:VEC_1REG_INT_MODE 2 "gcn_alu_operand")
2645 (match_operand:VEC_1REG_INT_MODE 3 "gcn_vop3_operand")])]
2648 /* Unsigned comparisons use the same patterns as signed comparisons,
2649 except that they use unsigned operators (e.g. LTU vs LT).
2650 The '%E1' directive then does the Right Thing. */
2651 emit_insn (gen_vec_cmp<mode>di (operands[0], operands[1], operands[2],
2656 (define_insn "vec_cmp<mode>di_exec"
2657 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
2659 (match_operator 1 "comparison_operator"
2660 [(match_operand:VEC_1REG_MODE 2 "gcn_alu_operand"
2661 "vSv, B,vSv, B, v,vA")
2662 (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
2663 " v, v, v, v,vA, v")])
2664 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e, e, e")))
2665 (clobber (match_scratch:DI 5 "= X, X, cV,cV, X, X"))]
2668 v_cmp%E1\tvcc, %2, %3
2669 v_cmp%E1\tvcc, %2, %3
2670 v_cmpx%E1\tvcc, %2, %3
2671 v_cmpx%E1\tvcc, %2, %3
2672 v_cmp%E1\t%0, %2, %3
2673 v_cmp%E1\t%0, %2, %3"
2674 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
2675 (set_attr "length" "4,8,4,8,8,8")])
2677 (define_insn "vec_cmp<mode>di_dup"
2678 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
2679 (match_operator 1 "comparison_operator"
2680 [(vec_duplicate:VEC_1REG_MODE
2681 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
2683 (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
2685 (clobber (match_scratch:DI 4 "= X,X,cV,cV, X"))]
2688 v_cmp%E1\tvcc, %2, %3
2689 v_cmp%E1\tvcc, %2, %3
2690 v_cmpx%E1\tvcc, %2, %3
2691 v_cmpx%E1\tvcc, %2, %3
2692 v_cmp%E1\t%0, %2, %3"
2693 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
2694 (set_attr "length" "4,8,4,8,8")])
2696 (define_insn "vec_cmp<mode>di_dup_exec"
2697 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
2699 (match_operator 1 "comparison_operator"
2700 [(vec_duplicate:VEC_1REG_MODE
2701 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
2703 (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
2705 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e,e, e")))
2706 (clobber (match_scratch:DI 5 "= X,X,cV,cV, X"))]
2709 v_cmp%E1\tvcc, %2, %3
2710 v_cmp%E1\tvcc, %2, %3
2711 v_cmpx%E1\tvcc, %2, %3
2712 v_cmpx%E1\tvcc, %2, %3
2713 v_cmp%E1\t%0, %2, %3"
2714 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
2715 (set_attr "length" "4,8,4,8,8")])
2717 (define_expand "vcond_mask_<mode>di"
2719 [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand" "")
2720 (vec_merge:VEC_ALLREG_MODE
2721 (match_operand:VEC_ALLREG_MODE 1 "gcn_vop3_operand" "")
2722 (match_operand:VEC_ALLREG_MODE 2 "gcn_alu_operand" "")
2723 (match_operand:DI 3 "register_operand" "")))
2724 (clobber (scratch:V64DI))])]
2728 (define_expand "vcond<VEC_ALL1REG_MODE:mode><VEC_1REG_ALT:mode>"
2729 [(match_operand:VEC_ALL1REG_MODE 0 "register_operand")
2730 (match_operand:VEC_ALL1REG_MODE 1 "gcn_vop3_operand")
2731 (match_operand:VEC_ALL1REG_MODE 2 "gcn_alu_operand")
2732 (match_operator 3 "comparison_operator"
2733 [(match_operand:VEC_1REG_ALT 4 "gcn_alu_operand")
2734 (match_operand:VEC_1REG_ALT 5 "gcn_vop3_operand")])]
2737 rtx tmp = gen_reg_rtx (DImode);
2738 emit_insn (gen_vec_cmp<VEC_1REG_ALT:mode>di
2739 (tmp, operands[3], operands[4], operands[5]));
2740 emit_insn (gen_vcond_mask_<VEC_ALL1REG_MODE:mode>di
2741 (operands[0], operands[1], operands[2], tmp));
2745 (define_expand "vcond<VEC_ALL1REG_MODE:mode><VEC_1REG_ALT:mode>_exec"
2746 [(match_operand:VEC_ALL1REG_MODE 0 "register_operand")
2747 (match_operand:VEC_ALL1REG_MODE 1 "gcn_vop3_operand")
2748 (match_operand:VEC_ALL1REG_MODE 2 "gcn_alu_operand")
2749 (match_operator 3 "comparison_operator"
2750 [(match_operand:VEC_1REG_ALT 4 "gcn_alu_operand")
2751 (match_operand:VEC_1REG_ALT 5 "gcn_vop3_operand")])
2752 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
2755 rtx tmp = gen_reg_rtx (DImode);
2756 emit_insn (gen_vec_cmp<VEC_1REG_ALT:mode>di_exec
2757 (tmp, operands[3], operands[4], operands[5], operands[6]));
2758 emit_insn (gen_vcond_mask_<VEC_ALL1REG_MODE:mode>di
2759 (operands[0], operands[1], operands[2], tmp));
2763 (define_expand "vcondu<VEC_ALL1REG_MODE:mode><VEC_1REG_INT_ALT:mode>"
2764 [(match_operand:VEC_ALL1REG_MODE 0 "register_operand")
2765 (match_operand:VEC_ALL1REG_MODE 1 "gcn_vop3_operand")
2766 (match_operand:VEC_ALL1REG_MODE 2 "gcn_alu_operand")
2767 (match_operator 3 "comparison_operator"
2768 [(match_operand:VEC_1REG_INT_ALT 4 "gcn_alu_operand")
2769 (match_operand:VEC_1REG_INT_ALT 5 "gcn_vop3_operand")])]
2772 rtx tmp = gen_reg_rtx (DImode);
2773 emit_insn (gen_vec_cmp<VEC_1REG_INT_ALT:mode>di
2774 (tmp, operands[3], operands[4], operands[5]));
2775 emit_insn (gen_vcond_mask_<VEC_ALL1REG_MODE:mode>di
2776 (operands[0], operands[1], operands[2], tmp));
2780 (define_expand "vcondu<VEC_ALL1REG_MODE:mode><VEC_1REG_INT_ALT:mode>_exec"
2781 [(match_operand:VEC_ALL1REG_MODE 0 "register_operand")
2782 (match_operand:VEC_ALL1REG_MODE 1 "gcn_vop3_operand")
2783 (match_operand:VEC_ALL1REG_MODE 2 "gcn_alu_operand")
2784 (match_operator 3 "comparison_operator"
2785 [(match_operand:VEC_1REG_INT_ALT 4 "gcn_alu_operand")
2786 (match_operand:VEC_1REG_INT_ALT 5 "gcn_vop3_operand")])
2787 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
2790 rtx tmp = gen_reg_rtx (DImode);
2791 emit_insn (gen_vec_cmp<VEC_1REG_INT_ALT:mode>di_exec
2792 (tmp, operands[3], operands[4], operands[5], operands[6]));
2793 emit_insn (gen_vcond_mask_<VEC_ALL1REG_MODE:mode>di
2794 (operands[0], operands[1], operands[2], tmp));
2799 ;; {{{ Fully masked loop support
2801 (define_expand "while_ultsidi"
2802 [(match_operand:DI 0 "register_operand")
2803 (match_operand:SI 1 "")
2804 (match_operand:SI 2 "")]
2807 if (GET_CODE (operands[1]) != CONST_INT
2808 || GET_CODE (operands[2]) != CONST_INT)
2810 rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
2812 if (GET_CODE (operands[1]) != CONST_INT
2813 || INTVAL (operands[1]) != 0)
2815 tmp = gen_reg_rtx (V64SImode);
2816 emit_insn (gen_addv64si3_dup (tmp, _0_1_2_3, operands[1]));
2818 emit_insn (gen_vec_cmpv64sidi_dup (operands[0],
2819 gen_rtx_GT (VOIDmode, 0, 0),
2824 HOST_WIDE_INT diff = INTVAL (operands[2]) - INTVAL (operands[1]);
2825 HOST_WIDE_INT mask = (diff >= 64 ? -1
2826 : ~((unsigned HOST_WIDE_INT)-1 << diff));
2827 emit_move_insn (operands[0], gen_rtx_CONST_INT (VOIDmode, mask));
2832 (define_expand "maskload<mode>di"
2833 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
2834 (match_operand:VEC_ALLREG_MODE 1 "memory_operand")
2835 (match_operand 2 "")]
2838 rtx exec = force_reg (DImode, operands[2]);
2839 rtx addr = gcn_expand_scalar_to_vector_address
2840 (<MODE>mode, exec, operands[1], gen_rtx_SCRATCH (V64DImode));
2841 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
2842 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
2843 rtx undef = gcn_gen_undef (<MODE>mode);
2844 emit_insn (gen_gather<mode>_expr_exec (operands[0], addr, as, v, undef,
2849 (define_expand "maskstore<mode>di"
2850 [(match_operand:VEC_ALLREG_MODE 0 "memory_operand")
2851 (match_operand:VEC_ALLREG_MODE 1 "register_operand")
2852 (match_operand 2 "")]
2855 rtx exec = force_reg (DImode, operands[2]);
2856 rtx addr = gcn_expand_scalar_to_vector_address
2857 (<MODE>mode, exec, operands[0], gen_rtx_SCRATCH (V64DImode));
2858 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
2859 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
2860 emit_insn (gen_scatter<mode>_expr_exec (addr, operands[1], as, v, exec));
2864 (define_expand "mask_gather_load<mode>"
2865 [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
2866 (match_operand:DI 1 "register_operand")
2867 (match_operand 2 "register_operand")
2868 (match_operand 3 "immediate_operand")
2869 (match_operand:SI 4 "gcn_alu_operand")
2870 (match_operand:DI 5 "")]
2873 rtx exec = force_reg (DImode, operands[5]);
2875 /* TODO: more conversions will be needed when more types are vectorized. */
2876 if (GET_MODE (operands[2]) == V64DImode)
2878 rtx tmp = gen_reg_rtx (V64SImode);
2879 emit_insn (gen_truncv64div64si2_exec (tmp, operands[2],
2880 gcn_gen_undef (V64SImode),
2885 emit_insn (gen_gather<mode>_exec (operands[0], operands[1], operands[2],
2886 operands[3], operands[4], exec));
2890 (define_expand "mask_scatter_store<mode>"
2891 [(match_operand:DI 0 "register_operand")
2892 (match_operand 1 "register_operand")
2893 (match_operand 2 "immediate_operand")
2894 (match_operand:SI 3 "gcn_alu_operand")
2895 (match_operand:VEC_ALLREG_MODE 4 "register_operand")
2896 (match_operand:DI 5 "")]
2899 rtx exec = force_reg (DImode, operands[5]);
2901 /* TODO: more conversions will be needed when more types are vectorized. */
2902 if (GET_MODE (operands[1]) == V64DImode)
2904 rtx tmp = gen_reg_rtx (V64SImode);
2905 emit_insn (gen_truncv64div64si2_exec (tmp, operands[1],
2906 gcn_gen_undef (V64SImode),
2911 emit_insn (gen_scatter<mode>_exec (operands[0], operands[1], operands[2],
2912 operands[3], operands[4], exec));
2916 ; FIXME this should be VEC_REG_MODE, but not all dependencies are implemented.
2917 (define_mode_iterator COND_MODE [V64SI V64DI V64SF V64DF])
2918 (define_mode_iterator COND_INT_MODE [V64SI V64DI])
2920 (define_code_iterator cond_op [plus minus])
2922 (define_expand "cond_<expander><mode>"
2923 [(match_operand:COND_MODE 0 "register_operand")
2924 (match_operand:DI 1 "register_operand")
2926 (match_operand:COND_MODE 2 "gcn_alu_operand")
2927 (match_operand:COND_MODE 3 "gcn_alu_operand"))
2928 (match_operand:COND_MODE 4 "register_operand")]
2931 operands[1] = force_reg (DImode, operands[1]);
2932 operands[2] = force_reg (<MODE>mode, operands[2]);
2934 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
2935 operands[3], operands[4],
2940 (define_code_iterator cond_bitop [and ior xor])
2942 (define_expand "cond_<expander><mode>"
2943 [(match_operand:COND_INT_MODE 0 "register_operand")
2944 (match_operand:DI 1 "register_operand")
2945 (cond_bitop:COND_INT_MODE
2946 (match_operand:COND_INT_MODE 2 "gcn_alu_operand")
2947 (match_operand:COND_INT_MODE 3 "gcn_alu_operand"))
2948 (match_operand:COND_INT_MODE 4 "register_operand")]
2951 operands[1] = force_reg (DImode, operands[1]);
2952 operands[2] = force_reg (<MODE>mode, operands[2]);
2954 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
2955 operands[3], operands[4],
2961 ;; {{{ Vector reductions
2963 (define_int_iterator REDUC_UNSPEC [UNSPEC_SMIN_DPP_SHR UNSPEC_SMAX_DPP_SHR
2964 UNSPEC_UMIN_DPP_SHR UNSPEC_UMAX_DPP_SHR
2967 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
2969 (define_int_iterator REDUC_2REG_UNSPEC [UNSPEC_PLUS_DPP_SHR
2971 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
2973 ; FIXME: Isn't there a better way of doing this?
2974 (define_int_attr reduc_unspec [(UNSPEC_SMIN_DPP_SHR "UNSPEC_SMIN_DPP_SHR")
2975 (UNSPEC_SMAX_DPP_SHR "UNSPEC_SMAX_DPP_SHR")
2976 (UNSPEC_UMIN_DPP_SHR "UNSPEC_UMIN_DPP_SHR")
2977 (UNSPEC_UMAX_DPP_SHR "UNSPEC_UMAX_DPP_SHR")
2978 (UNSPEC_PLUS_DPP_SHR "UNSPEC_PLUS_DPP_SHR")
2979 (UNSPEC_AND_DPP_SHR "UNSPEC_AND_DPP_SHR")
2980 (UNSPEC_IOR_DPP_SHR "UNSPEC_IOR_DPP_SHR")
2981 (UNSPEC_XOR_DPP_SHR "UNSPEC_XOR_DPP_SHR")])
2983 (define_int_attr reduc_op [(UNSPEC_SMIN_DPP_SHR "smin")
2984 (UNSPEC_SMAX_DPP_SHR "smax")
2985 (UNSPEC_UMIN_DPP_SHR "umin")
2986 (UNSPEC_UMAX_DPP_SHR "umax")
2987 (UNSPEC_PLUS_DPP_SHR "plus")
2988 (UNSPEC_AND_DPP_SHR "and")
2989 (UNSPEC_IOR_DPP_SHR "ior")
2990 (UNSPEC_XOR_DPP_SHR "xor")])
2992 (define_int_attr reduc_insn [(UNSPEC_SMIN_DPP_SHR "v_min%i0")
2993 (UNSPEC_SMAX_DPP_SHR "v_max%i0")
2994 (UNSPEC_UMIN_DPP_SHR "v_min%u0")
2995 (UNSPEC_UMAX_DPP_SHR "v_max%u0")
2996 (UNSPEC_PLUS_DPP_SHR "v_add%u0")
2997 (UNSPEC_AND_DPP_SHR "v_and%b0")
2998 (UNSPEC_IOR_DPP_SHR "v_or%b0")
2999 (UNSPEC_XOR_DPP_SHR "v_xor%b0")])
3001 (define_expand "reduc_<reduc_op>_scal_<mode>"
3002 [(set (match_operand:<SCALAR_MODE> 0 "register_operand")
3003 (unspec:<SCALAR_MODE>
3004 [(match_operand:VEC_1REG_MODE 1 "register_operand")]
3008 rtx tmp = gcn_expand_reduc_scalar (<MODE>mode, operands[1],
3011 /* The result of the reduction is in lane 63 of tmp. */
3012 emit_insn (gen_mov_from_lane63_<mode> (operands[0], tmp));
3017 (define_expand "reduc_<reduc_op>_scal_v64di"
3018 [(set (match_operand:DI 0 "register_operand")
3020 [(match_operand:V64DI 1 "register_operand")]
3021 REDUC_2REG_UNSPEC))]
3024 rtx tmp = gcn_expand_reduc_scalar (V64DImode, operands[1],
3027 /* The result of the reduction is in lane 63 of tmp. */
3028 emit_insn (gen_mov_from_lane63_v64di (operands[0], tmp));
3033 (define_insn "*<reduc_op>_dpp_shr_<mode>"
3034 [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v")
3035 (unspec:VEC_1REG_MODE
3036 [(match_operand:VEC_1REG_MODE 1 "register_operand" "v")
3037 (match_operand:VEC_1REG_MODE 2 "register_operand" "v")
3038 (match_operand:SI 3 "const_int_operand" "n")]
3040 "!(TARGET_GCN3 && SCALAR_INT_MODE_P (<SCALAR_MODE>mode)
3041 && <reduc_unspec> == UNSPEC_PLUS_DPP_SHR)"
3043 return gcn_expand_dpp_shr_insn (<MODE>mode, "<reduc_insn>",
3044 <reduc_unspec>, INTVAL (operands[3]));
3046 [(set_attr "type" "vop_dpp")
3047 (set_attr "length" "8")])
3049 (define_insn_and_split "*<reduc_op>_dpp_shr_v64di"
3050 [(set (match_operand:V64DI 0 "register_operand" "=&v")
3052 [(match_operand:V64DI 1 "register_operand" "v0")
3053 (match_operand:V64DI 2 "register_operand" "v0")
3054 (match_operand:SI 3 "const_int_operand" "n")]
3055 REDUC_2REG_UNSPEC))]
3061 [(match_dup 6) (match_dup 8) (match_dup 3)] REDUC_2REG_UNSPEC))
3064 [(match_dup 7) (match_dup 9) (match_dup 3)] REDUC_2REG_UNSPEC))]
3066 operands[4] = gcn_operand_part (V64DImode, operands[0], 0);
3067 operands[5] = gcn_operand_part (V64DImode, operands[0], 1);
3068 operands[6] = gcn_operand_part (V64DImode, operands[1], 0);
3069 operands[7] = gcn_operand_part (V64DImode, operands[1], 1);
3070 operands[8] = gcn_operand_part (V64DImode, operands[2], 0);
3071 operands[9] = gcn_operand_part (V64DImode, operands[2], 1);
3073 [(set_attr "type" "vmult")
3074 (set_attr "length" "16")])
3076 ; Special cases for addition.
3078 (define_insn "*plus_carry_dpp_shr_v64si"
3079 [(set (match_operand:V64SI 0 "register_operand" "=v")
3081 [(match_operand:V64SI 1 "register_operand" "v")
3082 (match_operand:V64SI 2 "register_operand" "v")
3083 (match_operand:SI 3 "const_int_operand" "n")]
3084 UNSPEC_PLUS_CARRY_DPP_SHR))
3085 (clobber (reg:DI VCC_REG))]
3088 const char *insn = TARGET_GCN3 ? "v_add%u0" : "v_add_co%u0";
3089 return gcn_expand_dpp_shr_insn (V64SImode, insn,
3090 UNSPEC_PLUS_CARRY_DPP_SHR,
3091 INTVAL (operands[3]));
3093 [(set_attr "type" "vop_dpp")
3094 (set_attr "length" "8")])
3096 (define_insn "*plus_carry_in_dpp_shr_v64si"
3097 [(set (match_operand:V64SI 0 "register_operand" "=v")
3099 [(match_operand:V64SI 1 "register_operand" "v")
3100 (match_operand:V64SI 2 "register_operand" "v")
3101 (match_operand:SI 3 "const_int_operand" "n")
3102 (match_operand:DI 4 "register_operand" "cV")]
3103 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
3104 (clobber (reg:DI VCC_REG))]
3107 const char *insn = TARGET_GCN3 ? "v_addc%u0" : "v_addc_co%u0";
3108 return gcn_expand_dpp_shr_insn (V64SImode, insn,
3109 UNSPEC_PLUS_CARRY_IN_DPP_SHR,
3110 INTVAL (operands[3]));
3112 [(set_attr "type" "vop_dpp")
3113 (set_attr "length" "8")])
3115 (define_insn_and_split "*plus_carry_dpp_shr_v64di"
3116 [(set (match_operand:V64DI 0 "register_operand" "=&v")
3118 [(match_operand:V64DI 1 "register_operand" "v0")
3119 (match_operand:V64DI 2 "register_operand" "v0")
3120 (match_operand:SI 3 "const_int_operand" "n")]
3121 UNSPEC_PLUS_CARRY_DPP_SHR))
3122 (clobber (reg:DI VCC_REG))]
3126 [(parallel [(set (match_dup 4)
3128 [(match_dup 6) (match_dup 8) (match_dup 3)]
3129 UNSPEC_PLUS_CARRY_DPP_SHR))
3130 (clobber (reg:DI VCC_REG))])
3131 (parallel [(set (match_dup 5)
3133 [(match_dup 7) (match_dup 9) (match_dup 3) (reg:DI VCC_REG)]
3134 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
3135 (clobber (reg:DI VCC_REG))])]
3137 operands[4] = gcn_operand_part (V64DImode, operands[0], 0);
3138 operands[5] = gcn_operand_part (V64DImode, operands[0], 1);
3139 operands[6] = gcn_operand_part (V64DImode, operands[1], 0);
3140 operands[7] = gcn_operand_part (V64DImode, operands[1], 1);
3141 operands[8] = gcn_operand_part (V64DImode, operands[2], 0);
3142 operands[9] = gcn_operand_part (V64DImode, operands[2], 1);
3144 [(set_attr "type" "vmult")
3145 (set_attr "length" "16")])
3147 ; Instructions to move a scalar value from lane 63 of a vector register.
3148 (define_insn "mov_from_lane63_<mode>"
3149 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v")
3150 (unspec:<SCALAR_MODE>
3151 [(match_operand:VEC_ALL1REG_MODE 1 "register_operand" "v,v")]
3152 UNSPEC_MOV_FROM_LANE63))]
3155 v_readlane_b32\t%0, %1, 63
3156 v_mov_b32\t%0, %1 wave_ror:1"
3157 [(set_attr "type" "vop3a,vop_dpp")
3158 (set_attr "exec" "none,*")
3159 (set_attr "length" "8")])
3161 (define_insn "mov_from_lane63_v64di"
3162 [(set (match_operand:DI 0 "register_operand" "=Sg,v")
3164 [(match_operand:V64DI 1 "register_operand" "v,v")]
3165 UNSPEC_MOV_FROM_LANE63))]
3168 v_readlane_b32\t%L0, %L1, 63\;v_readlane_b32\t%H0, %H1, 63
3169 * if (REGNO (operands[0]) <= REGNO (operands[1])) \
3170 return \"v_mov_b32\t%L0, %L1 wave_ror:1\;\" \
3171 \"v_mov_b32\t%H0, %H1 wave_ror:1\"; \
3173 return \"v_mov_b32\t%H0, %H1 wave_ror:1\;\" \
3174 \"v_mov_b32\t%L0, %L1 wave_ror:1\";"
3175 [(set_attr "type" "vop3a,vop_dpp")
3176 (set_attr "exec" "none,*")
3177 (set_attr "length" "8")])
3180 ;; {{{ Miscellaneous
3182 (define_expand "vec_seriesv64si"
3183 [(match_operand:V64SI 0 "register_operand")
3184 (match_operand:SI 1 "gcn_alu_operand")
3185 (match_operand:SI 2 "gcn_alu_operand")]
3188 rtx tmp = gen_reg_rtx (V64SImode);
3189 rtx v1 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
3191 emit_insn (gen_mulv64si3_dup (tmp, v1, operands[2]));
3192 emit_insn (gen_addv64si3_dup (operands[0], tmp, operands[1]));
3196 (define_expand "vec_seriesv64di"
3197 [(match_operand:V64DI 0 "register_operand")
3198 (match_operand:DI 1 "gcn_alu_operand")
3199 (match_operand:DI 2 "gcn_alu_operand")]
3202 rtx tmp = gen_reg_rtx (V64DImode);
3203 rtx v1 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
3205 emit_insn (gen_mulv64di3_zext_dup2 (tmp, v1, operands[2]));
3206 emit_insn (gen_addv64di3_dup (operands[0], tmp, operands[1]));